From b3b47143a8e268dc77eab88d280fcfde5a2b6087 Mon Sep 17 00:00:00 2001 From: chenhaozhe Date: Mon, 9 Jan 2023 12:37:55 +0300 Subject: [PATCH] add hypercomplex operators and corresponding tests fix comments length and function order fix lints add copyrights fix code styles fix pylints add method docs,fix lints fix code checks move hypercomplex upper directory support the complex data type --- .jenkins/check/config/filter_pylint.txt | 1 + .../python/mindspore/hypercomplex/__init__.py | 24 + .../hypercomplex/complex/__init__.py | 23 + .../hypercomplex/complex/_complex_bn_impl.py | 139 ++ .../complex/_complex_conv_impl.py | 201 +++ .../complex/_complex_dense_impl.py | 125 ++ .../hypercomplex/complex/complex_operators.py | 1118 ++++++++++++++ .../hypercomplex/complex/complex_relu.py | 62 + .../mindspore/hypercomplex/double/__init__.py | 23 + .../hypercomplex/double/_double_bn_impl.py | 256 ++++ .../hypercomplex/double/_double_conv_impl.py | 126 ++ .../hypercomplex/double/_double_dense_impl.py | 123 ++ .../hypercomplex/double/double_operators.py | 1331 +++++++++++++++++ .../hypercomplex/double/double_relu.py | 77 + .../mindspore/hypercomplex/dual/__init__.py | 24 + .../hypercomplex/dual/_dual_bn_impl.py | 140 ++ .../hypercomplex/dual/_dual_conv_impl.py | 138 ++ .../hypercomplex/dual/_dual_dense_impl.py | 69 + .../hypercomplex/dual/dual_operators.py | 958 ++++++++++++ .../hypercomplex/hypercomplex/_hc_bn_impl.py | 339 +++++ .../hypercomplex/_hc_conv_impl.py | 123 ++ .../hypercomplex/_hc_dense_impl.py | 114 ++ .../hypercomplex/hypercomplex/hc_bn.py | 627 ++++++++ .../hypercomplex/hypercomplex/hc_conv.py | 1055 +++++++++++++ .../hypercomplex/hypercomplex/hc_dense.py | 200 +++ .../hypercomplex/hypercomplex/hc_pool.py | 1018 +++++++++++++ .../hypercomplex/uniform_operator.py | 49 + .../python/mindspore/hypercomplex/utils.py | 51 + tests/st/hypercomplex/deepconvnet.py | 83 + tests/st/hypercomplex/hcmodel.py | 32 + tests/st/hypercomplex/resnet.py | 593 ++++++++ tests/st/hypercomplex/test_deepconvnet.py | 13 + tests/st/hypercomplex/test_mnist.py | 112 ++ tests/st/hypercomplex/test_resnet.py | 115 ++ 34 files changed, 9482 insertions(+) create mode 100644 mindspore/python/mindspore/hypercomplex/__init__.py create mode 100644 mindspore/python/mindspore/hypercomplex/complex/__init__.py create mode 100644 mindspore/python/mindspore/hypercomplex/complex/_complex_bn_impl.py create mode 100644 mindspore/python/mindspore/hypercomplex/complex/_complex_conv_impl.py create mode 100644 mindspore/python/mindspore/hypercomplex/complex/_complex_dense_impl.py create mode 100644 mindspore/python/mindspore/hypercomplex/complex/complex_operators.py create mode 100644 mindspore/python/mindspore/hypercomplex/complex/complex_relu.py create mode 100644 mindspore/python/mindspore/hypercomplex/double/__init__.py create mode 100644 mindspore/python/mindspore/hypercomplex/double/_double_bn_impl.py create mode 100644 mindspore/python/mindspore/hypercomplex/double/_double_conv_impl.py create mode 100644 mindspore/python/mindspore/hypercomplex/double/_double_dense_impl.py create mode 100644 mindspore/python/mindspore/hypercomplex/double/double_operators.py create mode 100644 mindspore/python/mindspore/hypercomplex/double/double_relu.py create mode 100644 mindspore/python/mindspore/hypercomplex/dual/__init__.py create mode 100644 mindspore/python/mindspore/hypercomplex/dual/_dual_bn_impl.py create mode 100644 mindspore/python/mindspore/hypercomplex/dual/_dual_conv_impl.py create mode 100644 mindspore/python/mindspore/hypercomplex/dual/_dual_dense_impl.py create mode 100644 mindspore/python/mindspore/hypercomplex/dual/dual_operators.py create mode 100644 mindspore/python/mindspore/hypercomplex/hypercomplex/_hc_bn_impl.py create mode 100644 mindspore/python/mindspore/hypercomplex/hypercomplex/_hc_conv_impl.py create mode 100644 mindspore/python/mindspore/hypercomplex/hypercomplex/_hc_dense_impl.py create mode 100644 mindspore/python/mindspore/hypercomplex/hypercomplex/hc_bn.py create mode 100644 mindspore/python/mindspore/hypercomplex/hypercomplex/hc_conv.py create mode 100644 mindspore/python/mindspore/hypercomplex/hypercomplex/hc_dense.py create mode 100644 mindspore/python/mindspore/hypercomplex/hypercomplex/hc_pool.py create mode 100644 mindspore/python/mindspore/hypercomplex/hypercomplex/uniform_operator.py create mode 100644 mindspore/python/mindspore/hypercomplex/utils.py create mode 100644 tests/st/hypercomplex/deepconvnet.py create mode 100644 tests/st/hypercomplex/hcmodel.py create mode 100644 tests/st/hypercomplex/resnet.py create mode 100644 tests/st/hypercomplex/test_deepconvnet.py create mode 100644 tests/st/hypercomplex/test_mnist.py create mode 100644 tests/st/hypercomplex/test_resnet.py diff --git a/.jenkins/check/config/filter_pylint.txt b/.jenkins/check/config/filter_pylint.txt index f57108b15ce..e85bffcfc05 100644 --- a/.jenkins/check/config/filter_pylint.txt +++ b/.jenkins/check/config/filter_pylint.txt @@ -74,6 +74,7 @@ "mindspore/mindspore/python/mindspore/ops/operations/array_ops.py" "redefined-builtin" "mindspore/mindspore/python/mindspore/ops/_grad_experimental/grad_sparse_ops.py" "unused-variable" "mindspore/mindspore/python/mindspore/ops/operations/_inner_ops.py" "not-callable" +"mindspore/mindspore/python/mindspore/hypercomplex" "useless-return" # MindData "mindspore/mindspore/python/mindspore/dataset/__init__.py" "redefined-builtin" diff --git a/mindspore/python/mindspore/hypercomplex/__init__.py b/mindspore/python/mindspore/hypercomplex/__init__.py new file mode 100644 index 00000000000..c76e4e50663 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/__init__.py @@ -0,0 +1,24 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +HyperComplex operators. + +Note: + This feature is a beta feature, and we are still improving its functionality. + The interface may be changed or removed in the future. +""" +import mindspore.hypercomplex.complex +import mindspore.hypercomplex.dual +import mindspore.hypercomplex.double diff --git a/mindspore/python/mindspore/hypercomplex/complex/__init__.py b/mindspore/python/mindspore/hypercomplex/complex/__init__.py new file mode 100644 index 00000000000..21b673d19fd --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/complex/__init__.py @@ -0,0 +1,23 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Complex Operators""" +from mindspore.hypercomplex.complex.complex_relu import ReLU +from mindspore.hypercomplex.complex.complex_operators import Conv1d, Conv2d, Conv3d +from mindspore.hypercomplex.complex.complex_operators import BatchNorm1d, BatchNorm2d, BatchNorm3d +from mindspore.hypercomplex.complex.complex_operators import Dense + +from mindspore.hypercomplex.hypercomplex.hc_pool import MaxPool1d, MaxPool2d, \ + AvgPool1d, AvgPool2d, AdaptiveAvgPool1d, AdaptiveAvgPool2d, \ + AdaptiveAvgPool3d, AdaptiveMaxPool1d, AdaptiveMaxPool2d diff --git a/mindspore/python/mindspore/hypercomplex/complex/_complex_bn_impl.py b/mindspore/python/mindspore/hypercomplex/complex/_complex_bn_impl.py new file mode 100644 index 00000000000..5d8f6a0ff9e --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/complex/_complex_bn_impl.py @@ -0,0 +1,139 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""complex BatchNorm implementation""" +from typing import Tuple + +import mindspore.ops as ops +from mindspore.common.tensor import Tensor +from mindspore.hypercomplex.hypercomplex._hc_bn_impl import _BaseBatchNormImpl as HCBatchNormImpl +from mindspore.hypercomplex.utils import get_x_and_y as get_real_and_imag + + +class _BatchNormImpl(HCBatchNormImpl): + r""" + The implementor class of the Batch Normalization layer for complex numbers. + + Implements the functionality specific to complex numbers and needed by the 'BatchNorm' class. This includes: + getting the norm of complex number, applying scaling and shift to a complex tensor, and updating the running + mean and variance, which are used during inference. + + Args: + affine (bool) - A bool value. When set to True, gamma and beta can be learned. + use_batch_statistics (bool): If true, use the mean value and variance value of current batch data. If false, + use the mean value and variance value of specified value. If None, the training process will use the mean + and variance of current batch data and track the running mean and variance, the evaluation process will use + the running mean and variance. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. + num_features (int): The number of features in the input space. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def scale_and_shift(self, + u_x: Tensor, + u_y: Tensor, + scale_x: Tensor, + scale_y: Tensor, + shift_x: Tensor, + shift_y: Tensor) -> Tuple[Tensor, Tensor]: + r""" + Applies complex scaling and shift to an input tensor. + + This function implements the operation as: + + .. math:: + \begin{align} + \text{Re(out)} = \text{Re(inp)} * \text{Re(scale)} - \text{Im(inp)} * \text{Im(scale)} + \text{Re(shift)}\\ + \text{Im(out)} = \text{Re(inp)} * \text{Im(scale)} + \text{Im(inp)} * \text{Re(scale)} + \text{Im(shift)}, + \end{align} + + where :math:`inp` is the complex input tensors, :math:`scale` and :math:`shift` are complex parameters + representing the scaling and shift coefficients respectively. :math:`\text{Re(...)}` and :math:`\text{Im(...)}` + are respectively real and imaginary parts of the complex-valued expression inside the parentheses. + + Args: + u_x (Tensor): A tensor of shape (C,), which represents the real part of the normalized inputs. + u_y (Tensor): A tensor of shape (C,), which represents the imaginary part of the normalized inputs. + scale_x (Tensor): A tensor of shape (C,), which represents the real part of the scaling vector. + scale_y (Tensor): A tensor of shape (C,), which represents the imaginary part of the scaling vector. + shift_x (Tensor): A tensor of shape (C,), which represents the real part of the bias vector. + shift_y (Tensor): A tensor of shape (C,), which represents the imaginary part of the bias vector. + + Returns: + Tuple of two tensors of shape (C,), which contains the real and the imaginary parts of rescaled and + recentered inputs. + """ + out_x = u_x * scale_x - u_y * scale_y + shift_x + out_y = u_x * scale_y + u_y * scale_x + shift_y + return out_x, out_y + + def get_norm(self, + u: Tensor) -> Tensor: + r""" + Calculates norm of complex elements of an input tensor. + + Norm is a non-negative real number that is a characteristic of 'magnitude' of that number, i.e. how far away it + is from zero. The function implements the operation as: + + .. math:: + \text{out} = \sqrt{\text{Re(inp)}^2 + \text{Im(inp)}^2 + \delta}, + + where :math:`inp` is the complex input tensors and :math:`\delta` is a small positive constant, which is needed + to avoid division by zero in case statistical variance is close to zero. :math:`\text{Re(...)}` and + :math:`\text{Im(...)}` are respectively real and imaginary parts of the complex-valued expression inside + the parentheses. + + Args: + u (Tensor): Tensor of shape (2, *, ..., *). '2' denotes that the input tensor belongs to the complex domain + and has a real and an imaginary parts. + + Returns: + Tensor of shape (*, ..., *). The count and size of dimensions of the output tensor are the same ones as in + the input tensor, but without the very first dimension because the output tensor is real-valued. + """ + norm2 = self.get_square_norm(u) + eps = 1e-7 + out = ops.sqrt(norm2 + eps) + return out + + def get_square_norm(self, + u: Tensor) -> Tensor: + r""" + Calculates element-wise squared norm of complex elements of an input tensor. + + Norm is a non-negative real number that is a characteristic of 'magnitude' of that number, i.e. how far away it + is from zero. The function implements the operation as: + + .. math:: + \text{out} = \text{Re(inp)}^2 + \text{Im(inp)}^2, + + where :math:`inp` is the complex input tensors, :math:`\text{Re(...)}` and :math:`\text{Im(...)}` + are respectively real and imaginary parts of the complex-valued expression inside the parentheses. + + Args: + u (Tensor): Tensor of shape (2, *, ..., *). '2' denotes that the input tensor belongs to the complex domain + and has a real and an imaginary parts. + + Returns: + Tensor of shape (*, ..., *). The count and size of dimensions of the output tensor are the same ones as in + the input tensor, but without the very first dimension because the output tensor is real-valued. + """ + u_r, u_i = get_real_and_imag(u) + out = u_r ** 2 + u_i ** 2 + return out diff --git a/mindspore/python/mindspore/hypercomplex/complex/_complex_conv_impl.py b/mindspore/python/mindspore/hypercomplex/complex/_complex_conv_impl.py new file mode 100644 index 00000000000..c0329f57d55 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/complex/_complex_conv_impl.py @@ -0,0 +1,201 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""complex convolution implementation""" +import numbers +from typing import Callable, Tuple, Union + +from mindspore.common.tensor import Tensor +from mindspore.common.initializer import Initializer +from mindspore.hypercomplex.hypercomplex._hc_conv_impl import _BaseConvImpl as BaseConvImpl +from mindspore import ops as P + + +class _ConvImpl(BaseConvImpl): + r""" + The implementor class of the convolution layer for complex numbers. + + Applies complex-valued convolution transformation. This layer implements the operation as: + + .. math:: + \begin{align} + \text{Re(ccor)} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)}) + - \text{ccor}(\text{Im(kernel)}, \text{Im(inp)})\\ + \text{Im(ccor)} = \text{ccor}(\text{Im(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Re(kernel)}, \text{Im(inp)}) + \end{align} + + where :math:`ccor` is the real-valued `cross-correlation `_, + :math:`inp` is the complex-valued input tensors, :math:`\text{kernel}` is a complex weight matrix with the same + data type as the :math:`inp` created by the layer, :math:`\text{Re(...)}` and :math:`\text{Im(...)}` + are respectively real and imaginary parts of the complex-valued expression inside the parentheses. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and imaginary parts of the kernel. + + Inputs: + - **conv_op** (Callable) - the function of the real-valued convolution to be used for decomposition + of the complex convolution transformation. For example, mindspore.ops.operations.Conv2D(...) may be passed + - **real** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the real part of the input. + - **imag** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the imaginary part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(N, C_{out}, *, ..., *)` or :math:`(N, *, ..., *, C_{out})`, + which represents the real and the imaginary parts of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def construct(self, + conv_op: Callable, + real: Tensor, + imag: Tensor) -> Tuple[Tensor, Tensor]: + out_rr = conv_op(real, self.weight_x) + out_ii = conv_op(imag, self.weight_y) + out_ri = conv_op(real, self.weight_y) + out_ir = conv_op(imag, self.weight_x) + + out_r = out_rr - out_ii + out_i = out_ri + out_ir + + return out_r, out_i + + +class _KaratsubaConvImpl(BaseConvImpl): + r""" + The implementor class of the convolution layer for complex numbers. + + Applies complex-valued convolution transformation. This layer implements the operation as: + + .. math:: + \begin{align} + \text{C1} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)})\\ + \text{C2} = \text{ccor}(\text{Im(kernel)}, \text{Im(inp)})\\ + \text{C3} = \text{ccor}(\text{Re(kernel)} + \text{Im(kernel)}, \text{Re(inp)} + \text{Im(inp)})\\ + \text{Re(out)} = C1 - C2\\ + \text{Im(out)} = C3 - C1 - C2, + \end{align} + + where :math:`ccor` is the real-valued `cross-correlation `_, + :math:`inp` is the complex-valued input tensors, :math:`\text{kernel}` is a complex weight matrix with the same + data type as the :math:`inp` created by the layer, :math:`\text{Re(...)}` and :math:`\text{Im(...)}` + are respectively real and imaginary parts of the complex-valued expression inside the parentheses. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and imaginary parts of the kernel. + + Inputs: + - **conv_op** (Callable) - the function of the real-valued convolution to be used for decomposition + of the complex convolution transformation. For example, mindspore.ops.operations.Conv2D(...) may be passed + - **real** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the real part of the input. + - **imag** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the imaginary part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(N, C_{out}, *, ..., *)` or :math:`(N, *, ..., *, C_{out})`, + which represents the real and the imaginary parts of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def construct(self, + conv_op: Callable, + real: Tensor, + imag: Tensor) -> Tuple[Tensor, Tensor]: + c1 = conv_op(real, self.weight_x) + c2 = conv_op(imag, self.weight_y) + c3 = conv_op(real + imag, self.weight_x + self.weight_y) + + out_r = c1 - c2 + out_i = c3 - c1 - c2 + + return out_r, out_i + + +class _ReImConvImpl(BaseConvImpl): + r""" + The implementor class of the convolution layer for complex numbers. + + Applies complex-valued convolution transformation. This layer implements the operation as: + + .. math:: + \begin{align} + \text{inp_cat} = \text{cat}(\text{Re(inp)}, \text{Im(inp)}) \\ + \text{K1} = \text{cat}(\text{Re(kernel)}, \text{-Im(kernel)}) \\ + \text{K2} = \text{cat}(\text{Im(kernel)}, \text{Re(kernel)}) \\ + \text{Re(ccor)} = \text{ccor}(\text{K1}, \text{Re(inp_cat)}) \\ + \text{Im(ccor)} = \text{ccor}(\text{K2}, \text{Re(inp_cat)}) + \end{align} + + where :math:`ccor` is the real-valued `cross-correlation `_, + :math:`inp` is the complex-valued input tensors, :math:`\text{kernel}` is a complex weight matrix with the same + data type as the :math:`inp` created by the layer, :math:`\text{cat}` is concatenation along the channel axis, + :math:`\text{Re(...)}` and :math:`\text{Im(...)}` are respectively real and imaginary parts of the complex-valued + expression inside the parentheses. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and imaginary parts of the kernel. + factory_kwargs (dict): Additional parameters, which must include data_format. + + Inputs: + - **conv_op** (Callable) - the function of the real-valued convolution to be used for decomposition + of the complex convolution transformation. For example, mindspore.ops.operations.Conv2D(...) may be passed + - **real** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the real part of the input. + - **imag** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the imaginary part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(N, C_{out}, *, ..., *)` or :math:`(N, *, ..., *, C_{out})`, + which represents the real and the imaginary parts of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + weight_init: Union[Tensor, str, Initializer, numbers.Number], + weight_shape: tuple, + **factory_kwargs) -> None: + super(_ReImConvImpl, self).__init__(weight_init, weight_shape, **factory_kwargs) + data_format = factory_kwargs.get('data_format', 'nchw') + c_idx = data_format.lower().find('c') + if c_idx < 0: + raise ValueError(f"Data format {data_format} is unsupported") + self.concat = P.Concat(c_idx) + self.neg = P.Neg() + + def construct(self, + conv_op: Callable, + real: Tensor, + imag: Tensor) -> Tuple[Tensor, Tensor]: + + inp = self.concat([real, imag]) + weight_y_neg = self.neg(self.weight_y) + w1 = self.concat([self.weight_x, weight_y_neg]) + w2 = self.concat([self.weight_y, self.weight_x]) + out_r = conv_op(inp, w1) + out_i = conv_op(inp, w2) + return out_r, out_i diff --git a/mindspore/python/mindspore/hypercomplex/complex/_complex_dense_impl.py b/mindspore/python/mindspore/hypercomplex/complex/_complex_dense_impl.py new file mode 100644 index 00000000000..18bfad327a2 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/complex/_complex_dense_impl.py @@ -0,0 +1,125 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""complex dense implementation""" +from typing import Callable, Tuple + +from mindspore.common.tensor import Tensor +from mindspore.hypercomplex.hypercomplex._hc_dense_impl import _BaseDenseImpl as BaseDenseImpl + + +class _DenseImpl(BaseDenseImpl): + r""" + The implementor class of the dense connected layer for complex numbers. + + Applies complex-valued matrix multiplication for dense connected layer. This layer implements the operation as: + + .. math:: + \begin{align} + \text{Re(out)} = \text{Re(inp)} * \text{Re(kernel)} - \text{Im(inp)} * \text{Im(kernel)}\\ + \text{Im(out)} = \text{Re(inp)} * \text{Im(kernel)} + \text{Im(inp)} * \text{Re(kernel)}, + \end{align} + + where :math:`inp` is the complex input tensors, :math:`\text{kernel}` is a complex weight matrix with the same + data type as the :math:`inp` created by the layer, :math:`\text{Re(...)}` and :math:`\text{Im(...)}` + are respectively real and imaginary parts of the complex-valued expression inside the parentheses. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and imaginary parts of the kernel. + **factory_kwargs (dict): Extra parameters which may be needed by specific subclasses. + + Inputs: + - **matmul_op** (Callable) - the function of the real-valued matrix multiplication to be used for decomposition + of the complex linear transformation. Usually, mindspore.ops.operations.MatMul(...) is passed + - **real** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the real part of the input. + - **imag** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the imaginary part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(*, out\_channels)`, which represents the real and the imaginary + parts of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def construct(self, + matmul_op: Callable, + real: Tensor, + imag: Tensor) -> Tuple[Tensor, Tensor]: + out_rr = matmul_op(real, self.weight_x) + out_ii = matmul_op(imag, self.weight_y) + out_ri = matmul_op(real, self.weight_y) + out_ir = matmul_op(imag, self.weight_x) + + out_r = out_rr - out_ii + out_i = out_ri + out_ir + + return out_r, out_i + + +class _KaratsubaDenseImpl(BaseDenseImpl): + r""" + The implementor class of the dense connected layer for complex numbers. + + Applies complex-valued matrix multiplication for dense connected layer. This layer implements the operation as: + + .. math:: + \begin{align} + \text{L1} = \text{Re(inp)} * \text{Re(kernel)}\\ + \text{L2} = \text{Im(inp)} * \text{Im(kernel)}\\ + \text{L3} = (\text{Re(inp)} + \text{Im(inp)}) * (\text{Re(kernel)} + \text{Im(kernel)})\\ + \text{Re(out)} = L1 - L2\\ + \text{Im(out)} = L3 - L1 - L2, + \end{align} + + where :math:`inp` is the complex input tensors, :math:`\text{kernel}` is a complex weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a complex bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{Re(...)}` and + :math:`\text{Im(...)}` are respectively real and imaginary parts of the complex-valued expression inside + the parentheses. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and imaginary parts of the kernel. + **factory_kwargs (dict): Extra parameters which may be needed by specific subclasses. + + Inputs: + - **matmul_op** (Callable) - the function of the real-valued matrix multiplication to be used for decomposition + of the complex linear transformation. Usually, mindspore.ops.operations.MatMul(...) is passed + - **real** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the real part of the input. + - **imag** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the imaginary part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(*, out\_channels)`, which represents the real and the imaginary + parts of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def construct(self, + matmul_op: Callable, + real: Tensor, + imag: Tensor) -> Tuple[Tensor, Tensor]: + l1 = matmul_op(real, self.weight_x) + l2 = matmul_op(imag, self.weight_y) + l3 = matmul_op(real + imag, self.weight_x + self.weight_y) + + out_r = l1 - l2 + out_i = l3 - l1 - l2 + + return out_r, out_i diff --git a/mindspore/python/mindspore/hypercomplex/complex/complex_operators.py b/mindspore/python/mindspore/hypercomplex/complex/complex_operators.py new file mode 100644 index 00000000000..27e8701f675 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/complex/complex_operators.py @@ -0,0 +1,1118 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Complex operators""" +import numbers +from typing import Union + +from mindspore.common.initializer import Initializer +from mindspore.common.tensor import Tensor +# Batch Normalization +from mindspore.hypercomplex.hypercomplex.hc_bn import BatchNorm1d as HBatchNorm1d, \ + BatchNorm2d as HBatchNorm2d, BatchNorm3d as HBatchNorm3d +from mindspore.hypercomplex.complex._complex_bn_impl import _BatchNormImpl as BatchNormImpl +# Convolution +from mindspore.hypercomplex.hypercomplex.hc_conv import Conv1d as HConv1d, Conv2d as HConv2d, Conv3d as HConv3d +from mindspore.hypercomplex.complex._complex_conv_impl import _ReImConvImpl as ConvImpl, \ + _KaratsubaConvImpl as KaratsubaConvImpl +# Dense +from mindspore.hypercomplex.hypercomplex.hc_dense import Dense as HDense +from mindspore.hypercomplex.complex._complex_dense_impl import _DenseImpl as DenseImpl, \ + _KaratsubaDenseImpl as KaratsubaDenseImpl + +from mindspore.hypercomplex.hypercomplex.uniform_operator import _UniformOperator + +from mindspore.hypercomplex.utils import _size_1_t, _size_2_t, _size_3_t + + +class Conv2d(_UniformOperator): + r""" + 2D convolution layer on the complex-valued input. + + Calculates the 2D convolution on the input tensor which is typically of shape + :math:`(2, N, C_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C_{in}` is a number of channels, + :math:`H_{in}, W_{in}` are the height and width of the feature layer respectively. The formula is defined as: + + .. math:: + + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \text{hccor}({\text{weight}(C_{\text{out}_j}, k), \text{inp}(N_i, k)}) + + where :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of + the output and :math:`j` is in the range of :math:`[0, C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)` + is a convolution kernel slice with shape :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`, + where :math:`\text{kernel_size[0]}` and :math:`\text{kernel_size[1]}` are the height and width of the convolution + kernel respectively. :math:`\text{bias}` is the bias parameter and :math:`\text{inp}` is the input tensor. + In this case, `data_format` of the input tensor is 'NCHW' and the shape of full convolution kernel is + :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`, + where `group` is the number of groups to split the input `inp` in the channel dimension. If `data_format` of the + input tensor is 'NHWC', the shape of full convolution kernel will be + :math:`(C_{out}, \text{kernel_size[0]}, \text{kernel_size[1]}), C_{in} / \text{group}`. + :math:`hccor` is the complex-valued `cross-correlation `_. + If use_karatsuba is False, this implies the operation as: + + .. math:: + \text{Re(ccor)} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)}) + - \text{ccor}(\text{Im(kernel)}, \text{Im(inp)}) + \text{Re(bias)}\\ + \text{Im(ccor)} = \text{ccor}(\text{Im(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Re(kernel)}, \text{Im(inp)}) + \text{Im(bias)} + + And, if use_karatsuba is True then: + + .. math:: + \begin{align} + \text{C1} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)})\\ + \text{C2} = \text{ccor}(\text{Im(kernel)}, \text{Im(inp)})\\ + \text{C3} = \text{ccor}(\text{Re(kernel)} + \text{Im(kernel)}, \text{Re(inp)} + \text{Im(inp)})\\ + \text{Re(out)} = C1 - C2 + \text{Re(bias)}\\ + \text{Im(out)} = C3 - C1 - C2 + \text{Im(bias)}, + \end{align} + + where :math:`ccor` is the real-valued `cross-correlation `_, + :math:`inp` is the complex input tensors, :math:`\text{kernel}` is a complex weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a complex bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{Re(...)}` and + :math:`\text{Im(...)}` are respectively real and imaginary parts of the complex-valued expression + inside the parentheses. + + Both the operations with and without using Karatsuba's algorithm are mathematically identical, but Karatsuba's + method requires three real-valued convolutions instead of four, at the cost of increased number of additions + and subtractions. Hence, it can be effective in terms of time consumption, but only if input tensors and kernels + are of big size. + + For more details, please refers to the paper `Gradient Based Learning Applied to Document + Recognition `_. + + Note: + On Ascend platform, only group convolution in depthwise convolution scenarios is supported. + That is, when `group > 1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. + + 'NCHW' format is supported only with GPU target device as of now. + + Args: + in_channels (int): The channel number of the input tensor of the Conv2d layer. + out_channels (int): The channel number of the output tensor of the Conv2d layer. + kernel_size (Union[int, tuple[int]]): Specifies the height and width of the 2D convolution kernel. + The data type is an integer or a tuple of two integers. An integer represents the height + and width of the convolution kernel. A tuple of two integers represents the height + and width of the convolution kernel respectively. + stride (Union[int, tuple[int]]): The movement stride of the 2D convolution kernel. + The data type is an integer or a tuple of two integers. An integer represents the movement step size + in both height and width directions. A tuple of two integers represents the movement step size in the + height and width directions respectively. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (Union[int, tuple[int]]): The number of padding on the height and width directions of the input. + The data type is an integer or a tuple of four integers. If `padding` is an integer, + then the top, bottom, left, and right padding are all equal to `padding`. + If `padding` is a tuple of 4 integers, then the top, bottom, left, and right padding + is equal to `padding[0]`, `padding[1]`, `padding[2]`, and `padding[3]` respectively. + The value should be greater than or equal to 0. Default: 0. + dilation (Union[int, tuple[int]]): Dilation size of 2D convolution kernel. + The data type is an integer or a tuple of two integers. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` on the height and width directions is in range of [1, H] + and [1, W] respectively. Default: 1. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. If the group is equal to `in_channels` and `out_channels`, + this 2D convolution layer also can be called 2D depthwise convolution layer. Default: 1. + has_bias (bool): Whether the Conv2d layer has a bias parameter. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. + Default: 'NCHW'. + use_karatsuba (bool): Specifies whether the layer uses Karatsuba's algorithm for complex-valued multiplication. + Default: False + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, H_{in}, W_{in})` + or :math:`(2, N, H_{in}, W_{in}, C_{in})`, with float16 or float32 data type, or + :math:`(N, C_{in}, H_{in}, W_{in})` or :math:`(N, H_{in}, W_{in}, C_{in})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C_{out}, H_{out}, W_{out})` or + :math:`(2, N, H_{out}, W_{out}, C_{out})`, with float16 or float32 data type, or + :math:`(N, C_{out}, H_{out}, W_{out})` or :math:`(N, H_{out}, W_{out}, C_{out})`, with complex64 data type. + + pad_mode is 'same': + + .. math:: + \begin{array}{ll} \\ + H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[0]}}} \right \rceil \\ + W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[1]}}} \right \rceil \\ + \end{array} + + pad_mode is 'valid': + + .. math:: + \begin{array}{ll} \\ + H_{out} = \left \lceil{\frac{H_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) } + {\text{stride[0]}}} \right \rceil \\ + W_{out} = \left \lceil{\frac{W_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) } + {\text{stride[1]}}} \right \rceil \\ + \end{array} + + pad_mode is 'pad': + + .. math:: + \begin{array}{ll} \\ + H_{out} = \left \lfloor{\frac{H_{in} + padding[0] + padding[1] - (\text{kernel_size[0]} - 1) \times + \text{dilation[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} + padding[2] + padding[3] - (\text{kernel_size[1]} - 1) \times + \text{dilation[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\ + \end{array} + + Raises: + TypeError: If `in_channels`, `out_channels` or `group` is not an int. + TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int not a tuple. + TypeError: If any two of `inp`, `weight_init` and `bias_init` are Tensors of different data type. + ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + ValueError: If `padding` is a tuple whose length is not equal to 4. + ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0). + ValueError: If `data_format` is neither 'NCHW' not 'NHWC'. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.complex import Conv2d + >>> from mindspore import Tensor + >>> w = Tensor(np.random.random((2, 128, 3, 7, 7)).astype(np.float32)) + >>> b = Tensor(np.random.random((2, 128)).astype(np.float32)) + >>> net = Conv2d( + >>> in_channels=3, out_channels=128, kernel_size=7, stride=2, padding=3, + >>> pad_mode='pad', weight_init=w, bias_init=b, has_bias=True + >>> ) + >>> z = Tensor(np.random.random((2, 16, 3, 224, 224)).astype(np.float32)) + >>> out = net(z) + >>> print(out.shape) + (2, 16, 128, 112, 112) + """ + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: _size_2_t, + stride: _size_2_t = 1, + pad_mode: str = 'same', + padding: _size_2_t = 0, + dilation: _size_2_t = 1, + group: int = 1, + has_bias: bool = False, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + data_format: str = 'NCHW', + use_karatsuba: bool = False) -> None: + if use_karatsuba: + super(Conv2d, self).__init__(HConv2d, + KaratsubaConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init, + data_format=data_format) + else: + super(Conv2d, self).__init__(HConv2d, + ConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init, + data_format=data_format) + + +class Conv1d(_UniformOperator): + r""" + 1D convolution layer on the complex-valued input. + + Calculates the 1D convolution on the input tensor which is typically of shape :math:`(2, N, C_{in}, L_{in})`, + where :math:`N` is batch size, :math:`C_{in}` is a number of channels and :math:`L_{in}` is a length of sequence. + The formula is defined as: + + .. math:: + + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{inp}(N_i, k)}), + + where :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of + the output and :math:`j` is in the range of :math:`[0, C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)` + is a convolution kernel slice with shape :math:`\text{kernel_size}`, where :math:`\text{kernel_size}` + is the width of the convolution kernel. :math:`\text{bias}` is the bias parameter, + and :math:`\text{inp}` is the input tensor. The shape of full convolution kernel is + :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size})`, + where `group` is the number of groups to split the input `inp` in the channel dimension. + :math:`ccor` is the complex-valued `cross-correlation `_. + If use_karatsuba is False, this implies the operation as: + + .. math:: + \text{Re(ccor)} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)}) + - \text{ccor}(\text{Im(kernel)}, \text{Im(inp)}) + \text{Re(bias)}\\ + \text{Im(ccor)} = \text{ccor}(\text{Im(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Re(kernel)}, \text{Im(inp)}) + \text{Im(bias)} + + And, if use_karatsuba is True then: + + .. math:: + \begin{align} + \text{C1} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)})\\ + \text{C2} = \text{ccor}(\text{Im(kernel)}, \text{Im(inp)})\\ + \text{C3} = \text{ccor}(\text{Re(kernel)} + \text{Im(kernel)}, \text{Re(inp)} + \text{Im(inp)})\\ + \text{Re(out)} = C1 - C2 + \text{Re(bias)}\\ + \text{Im(out)} = C3 - C1 - C2 + \text{Im(bias)}, + \end{align} + + where and :math:`ccor` is the real-valued `cross-correlation `_, + :math:`inp` is the complex input tensors, :math:`\text{kernel}` is a complex weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a complex bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{Re(...)}` and + :math:`\text{Im(...)}` are respectively real and imaginary parts of the complex-valued expression inside + the parentheses. + + Both the operations with and without using Karatsuba's algorithm are mathematically identical, but Karatsuba's + method requires three real-valued convolutions instead of four, at the cost of increased number of additions + and subtractions. Hence, it can be effective in terms of time consumption, but only if input tensors and kernels + are of big size. + + For more details, please refers to the paper `Gradient Based Learning Applied to Document + Recognition `_. + + Note: + On Ascend platform, only group convolution in depthwise convolution scenarios is supported. + That is, when `group > 1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. + + Args: + in_channels (int): The channel number of the input tensor of the Conv1d layer. + out_channels (int): The channel number of the output tensor of the Conv1d layer. + kernel_size (int): Specifies the width of the 1D convolution kernel. + stride (int): The movement stride of the 1D convolution kernel. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (int): The number of padding on both sides of input. + The value should be greater than or equal to 0. Default: 0. + dilation (int): Dilation size of 1D convolution kernel. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` is in range of [1, L]. Default: 1. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. Default: 1. + has_bias (bool): Whether the Conv1d layer has a bias parameter. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + use_karatsuba (bool): Specifies whether the layer uses Karatsuba's algorithm for complex-valued multiplication. + Default: False + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, L_{in})`, with float16 or float32 data type, or + :math:`(N, C_{out}, L_{out})` with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C_{out}, L_{out})`, with float16 or float32 + data type, or :math:`(N, C_{out}, L_{out})`, with complex64 data type. + + pad_mode is 'same': + + .. math:: + L_{out} = \left \lceil{\frac{L_{in}}{\text{stride}}} \right \rceil + + pad_mode is 'valid': + + .. math:: + L_{out} = \left \lceil{\frac{L_{in} - \text{dilation} \times (\text{kernel_size} - 1) } + {\text{stride}}} \right \rceil + + pad_mode is 'pad': + + .. math:: + L_{out} = \left \lfloor{\frac{L_{in} + 2 \times padding - (\text{kernel_size} - 1) \times + \text{dilation} - 1 }{\text{stride}} + 1} \right \rfloor + + Raises: + TypeError: If `in_channels`, `out_channels`, `kernel_size`, `stride`, `padding` or `dilation` is not an int. + TypeError: If any two of `inp`, `weight_init` and `bias_init` are Tensors of different data type. + ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.complex import Conv1d + >>> from mindspore import Tensor + >>> w = Tensor(np.random.random((2, 16, 1, 6)).astype(np.float32)) + >>> b = Tensor(np.random.random((2, 16)).astype(np.float32)) + >>> net = Conv1d( + >>> in_channels=1, out_channels=16, kernel_size=6, stride=2, padding=2, + >>> pad_mode='pad', weight_init=w, bias_init=b, has_bias=True + >>> ) + >>> z = Tensor(np.random.random((2, 8, 1, 4096)).astype(np.float32)) + >>> out = net(z) + >>> print(out.shape) + (2, 8, 16, 2048) + """ + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: _size_1_t, + stride: _size_1_t = 1, + pad_mode: str = 'same', + padding: _size_1_t = 0, + dilation: _size_1_t = 1, + group: int = 1, + has_bias: bool = False, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + use_karatsuba: bool = False) -> None: + if use_karatsuba: + super(Conv1d, self).__init__(HConv1d, + KaratsubaConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init) + else: + super(Conv1d, self).__init__(HConv1d, + ConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init) + + +class Conv3d(_UniformOperator): + r""" + 3D convolution layer on the complex-valued input. + + Calculates the 3D convolution on the input tensor which is typically of shape + :math:`(2, N, C_{in}, D_{in}, H_{in}, W_{in})`, + where :math:`N` is batch size, :math:`C_{in}` is a number of channels, + :math:`D_{in}, H_{in}, W_{in}` are the depth, height and width of the feature layer respectively. + The formula is defined as: + + .. math:: + + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{inp}(N_i, k)}) + + where :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of + the output and :math:`j` is in the range of :math:`[0,C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)` + is a convolution kernel slice with shape + :math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`, + where :math:`\text{kernel_size[0]}`, :math:`\text{kernel_size[1]}` and :math:`\text{kernel_size[2]}` are + the depth, height and width of the convolution kernel respectively. :math:`\text{bias}` is the bias parameter + and :math:`\text{inp}` is the input tensor. + The shape of full convolution kernel is + :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`, + where `group` is the number of groups to split the input `x` in the channel dimension. + :math:`hccor` is the complex-valued `cross-correlation `_. + If use_karatsuba is False, this implies the operation as: + + .. math:: + \text{Re(ccor)} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)}) + - \text{ccor}(\text{Im(kernel)}, \text{Im(inp)}) + \text{Re(bias)}\\ + \text{Im(ccor)} = \text{ccor}(\text{Im(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Re(kernel)}, \text{Im(inp)}) + \text{Im(bias)} + + And, if use_karatsuba is True then: + + .. math:: + \begin{align} + \text{C1} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)})\\ + \text{C2} = \text{ccor}(\text{Im(kernel)}, \text{Im(inp)})\\ + \text{C3} = \text{ccor}(\text{Re(kernel)} + \text{Im(kernel)}, \text{Re(inp)} + \text{Im(inp)})\\ + \text{Re(out)} = C1 - C2 + \text{Re(bias)}\\ + \text{Im(out)} = C3 - C1 - C2 + \text{Im(bias)}, + \end{align} + + where and :math:`ccor` is the real-valued `cross-correlation `_, + :math:`inp` is the complex input tensors, :math:`\text{kernel}` is a complex weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a complex bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{Re(...)}` + and :math:`\text{Im(...)}` are respectively real and imaginary parts of the complex-valued expression inside + the parentheses. + + Both the operations with and without using Karatsuba's algorithm are mathematically identical, but Karatsuba's + method requires three real-valued convolutions instead of four, at the cost of increased number of additions + and subtractions. Hence, it can be effective in terms of time consumption, but only if input tensors and kernels + are of big size. + + For more details, please refers to the paper `Gradient Based Learning Applied to Document + Recognition `_. + + Note: + On Ascend platform, only group convolution in depthwise convolution scenarios is supported. + That is, when `group>1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. + + Args: + in_channels (int): The channel number of the input tensor of the Conv3d layer. + out_channels (int): The channel number of the output tensor of the Conv3d layer. + kernel_size (Union[int, tuple[int]]): Specifies the depth, height and width of the 3D convolution kernel. + The data type is an integer or a tuple of three integers. An integer represents the depth, height + and width of the convolution kernel. A tuple of three integers represents the depth, height + and width of the convolution kernel respectively. + stride (Union[int, tuple[int]]): The movement stride of the 3D convolution kernel. + The data type is an integer or a tuple of three integers. An integer represents the movement step size + in depth, height and width directions. A tuple of three integers represents the movement step size + in the depth, height and width directions respectively. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (Union(int, tuple[int])): The number of padding on the depth, height and width directions of the input. + The data type is an integer or a tuple of six integers. If `padding` is an integer, + then the head, tail, top, bottom, left, and right padding are all equal to `padding`. + If `padding` is a tuple of six integers, then the head, tail, top, bottom, left, and right padding + is equal to `padding[0]`, `padding[1]`, `padding[2]`, `padding[3]`, `padding[4]` and `padding[5]` + respectively. The value should be greater than or equal to 0. Default: 0. + dilation (Union[int, tuple[int]]): Dilation size of 3D convolution kernel. + The data type is an integer or a tuple of three integers. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` on the depth, height and width directions is in range of + [1, D], [1, H] and [1, W] respectively. Default: 1. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. Default: 1. Only 1 is currently supported. + has_bias (bool): Whether the Conv3d layer has a bias parameter. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + data_format (str): The optional value for data format. Currently only support "NCDHW". + use_karatsuba (bool): Specifies whether the layer uses Karatsuba's algorithm for complex-valued multiplication. + Default: False + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, D_{in}, H_{in}, W_{in})`, with float16 or float32 + data type, or :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C_{out}, D_{out}, H_{out}, W_{out})`, with + float16 or float32 data type, or :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`, with complex64 data type. + + pad_mode is 'same': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lceil{\frac{D_{in}}{\text{stride[0]}}} \right \rceil \\ + H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[1]}}} \right \rceil \\ + W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\ + \end{array} + + + pad_mode is 'valid': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lfloor{\frac{D_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) } + {\text{stride[0]}} + 1} \right \rfloor \\ + H_{out} = \left \lfloor{\frac{H_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) } + {\text{stride[1]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} - \text{dilation[2]} \times (\text{kernel_size[2]} - 1) } + {\text{stride[2]}} + 1} \right \rfloor \\ + \end{array} + + pad_mode is 'pad': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lfloor{\frac{D_{in} + padding[0] + padding[1] - (\text{dilation[0]} - 1) \times + \text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\ + H_{out} = \left \lfloor{\frac{H_{in} + padding[2] + padding[3] - (\text{dilation[1]} - 1) \times + \text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} + padding[4] + padding[5] - (\text{dilation[2]} - 1) \times + \text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\ + \end{array} + + Raises: + TypeError: If `in_channels`, `out_channels` or `group` is not an int. + TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int nor a tuple. + ValueError: If `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + ValueError: If `padding` is a tuple whose length is not equal to 6. + ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0, 0, 0). + ValueError: If `data_format` is not 'NCDHW'. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.complex import Conv3d + >>> from mindspore import Tensor + >>> w = Tensor(np.random.random((2, 128, 3, 3, 3, 3)).astype(np.float32)) + >>> b = Tensor(np.random.random((2, 128)).astype(np.float32)) + >>> net = Conv3d( + >>> in_channels=3, out_channels=128, kernel_size=3, stride=1, padding=1, + >>> pad_mode='pad', weight_init=w, bias_init=b, has_bias=True + >>> ) + >>> z = Tensor(np.random.random((2, 64, 3, 32, 32, 32)).astype(np.float32)) + >>> out = net(z) + >>> print(out.shape) + (2, 64, 128, 32, 32, 32) + """ + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: _size_3_t, + stride: _size_3_t = 1, + pad_mode: str = 'same', + padding: _size_3_t = 0, + dilation: _size_3_t = 1, + group: int = 1, + has_bias: bool = False, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + data_format: str = 'NCDHW', + use_karatsuba: bool = False) -> None: + if use_karatsuba: + super(Conv3d, self).__init__(HConv3d, + KaratsubaConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init, + data_format=data_format) + else: + super(Conv3d, self).__init__(HConv3d, + ConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init, + data_format=data_format) + + +class BatchNorm1d(_UniformOperator): + r""" + The complex-valued Batch Normalization layer over a second-order complex input of four dimensions including + one spatial dimension, or three dimensions. + + This layer applies Batch Normalization over a complex input to reduce internal covariate shift. + Batch Normalization is widely used in convolutional networks. It rescales and recenters the feature using + a mini-batch of data and the learned parameters which can be described by the following formula: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + \hat{y} = \frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}\\ + \text{Re(\hat{out})} = \text{Re(out)} * \text{Re(\gamma)} + - \text{Im(out)} * \text{Im(\gamma)} + \text{Re(\beta)}\\ + \text{Im(\hat{out})} = \text{Re(out)} * \text{Im(\gamma)} + + \text{Im(out)} * \text{Re(\gamma)} + \text{Im(\beta)}, + \end{align} + + where :math:`inp` is the complex input tensors, :math:`\mathrm{E}[inp]` is the arithmetic mean of the input tensor + over the batch dimension, :math:`\mathrm{Var}[inp]` is the statistical variance of the input tensor over spatial + dimensions, based on the complex norm :math:`\|x+iy\|=\sqrt{x^2+y^2}`, :math:`\gamma` and :math:`\beta` are complex + learnable parameters representing the scale and shift coefficients respectively, and :math:`\delta` is a small + positive constant, which is needed to avoid division by zero in case statistical variance is close to zero. + :math:`\text{Re(...)}` and :math:`\text{Im(...)}` are respectively real and imaginary parts of the complex-valued + expression inside the parentheses. + + Args: + num_features (int): The number of features in the input space. + eps (float): A small positive threshold, which is needed to avoid division by zero. Default: :math:`10^{-5}` + momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. + Default: 0.9. + affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + use_batch_statistics (bool): + + - If True, use the mean value and variance value of current batch data and track running mean + and running variance. + - If False, use the mean value and variance value of specified value, and not track statistical value. + - If None, the use_batch_statistics is automatically set to True or False according to the training + and evaluation mode. During training, the parameter is set to True, and during evaluation, the + parameter is set to False. Default: None. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, W)` or :math:`(2, N, C)`, with float16 or float32 data + type, or :math:`(N, C, W)` or :math:`(N, C)`, with complex64 data type. In the former case '2' denotes that + the input tensor belongs to the complex domain and has got a real and an imaginary parts. The `num_features` + in `Args` has to be equal to :math:`C` in `inp`. + + Outputs: + Tensor, the normalized, scaled, offset tensor of the same data type and shape as :math:`u`: + :math:`(2, N, C, W)` or :math:`(2, N, C)`, with float16 or float32 data type, or :math:`(N, C, W)` + or :math:`(N, C)`, with complex64 data type. + + Raises: + TypeError: If `num_features` is not an int. + TypeError: If `eps` is not a float. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If `num_features` is less than 1. + ValueError: If `momentum` is not in range [0, 1]. + ValueError: if 'inp' is not a Tensor of 3 or 4 dimensions. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.complex import BatchNorm1d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32)).astype(np.float32)) + >>> bn = BatchNorm1d(64) + >>> y = bn(u) + >>> print(y.shape) + (2, 8, 64, 32) + """ + + def __init__(self, + num_features: int, + eps: float = 1e-5, + momentum: float = 0.9, + affine: bool = True, + gamma_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + beta_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_mean_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_var_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + use_batch_statistics: bool = True) -> None: + super(BatchNorm1d, self).__init__(HBatchNorm1d, + BatchNormImpl, + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + gamma_init=gamma_init, + beta_init=beta_init, + moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, + use_batch_statistics=use_batch_statistics) + + +class BatchNorm2d(_UniformOperator): + r""" + The complex-valued Batch Normalization layer over a second-order complex input of five dimensions, including + two spatial dimensions. + + This layer applies Batch Normalization over a complex input to reduce internal covariate shift. + Batch Normalization is widely used in convolutional networks. It rescales and recenters the feature using + a mini-batch of data and the learned parameters which can be described by the following formula: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + out = \frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}\\ + \text{Re(\hat{out})} = \text{Re(out)} * \text{Re(\gamma)}\ + - \text{Im(out)} * \text{Im(\gamma)} + \text{Re(\beta)}\\ + \text{Im(\hat{out})} = \text{Re(out)} * \text{Im(\gamma)} + + \text{Im(out)} * \text{Re(\gamma)} + \text{Im(\beta)}, + \end{align} + + where :math:`inp` is the complex input tensors, :math:`\mathrm{E}[inp]` is the arithmetic mean of the input tensor + over the batch dimension, :math:`\mathrm{Var}[inp]` is the statistical variance of the input tensor over spatial + dimensions, based on the complex norm :math:`\|x+iy\|=\sqrt{x^2+y^2}`, :math:`\gamma` and :math:`\beta` are complex + learnable parameters representing the scale and shift coefficients respectively, and :math:`\delta` is a small + positive constant, which is needed to avoid division by zero in case statistical variance is close to zero. + :math:`\text{Re(...)}` and :math:`\text{Im(...)}` are respectively real and imaginary parts of the complex-valued + expression inside the parentheses. + + Args: + num_features (int): The number of features in the input space. + eps (float): A small positive threshold, which is needed to avoid division by zero. Default: :math:`10^{-5}` + momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. + Default: 0.9. + affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + use_batch_statistics (bool): + + - If True, use the mean value and variance value of current batch data and track running mean + and running variance. + - If False, use the mean value and variance value of specified value, and not track statistical value. + - If None, the use_batch_statistics is automatically set to True or False according to the training + and evaluation mode. During training, the parameter is set to True, and during evaluation, the + parameter is set to False. Default: None. + data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. Default: 'NCHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, H, W)` if data_format is 'NCHW', or + :math:`(2, N, H, W, C)` if data_format is 'NHWC', with float16 or float32 data type. '2' denotes that + the input tensor belongs to the complex domain and has got a real and an imaginary parts. Or, + :math:`(N, C, H, W)` if data_format is 'NCHW', or :math:`(N, H, W, C)` if data_format is 'NHWC', + with complex64 data type. The `num_features` in `Args` has to be equal to :math:`C` in `inp`. + + Outputs: + Tensor, the normalized, scaled, offset tensor of the same data type and shape as :math:`u`: + :math:`(2, N, C, H, W)` if data_format is 'NCHW', or :math:`(2, N, H, W, C)` if data_format is 'NHWC', + with float16 or float32 data type, or :math:`(N, C, W)`. Or, :math:`(N, C, H, W)` if data_format is 'NCHW', + or :math:`(N, H, W, C)` if data_format is 'NHWC', with complex64 data type. + + Raises: + TypeError: If `num_features` is not an int. + TypeError: If `eps` is not a float. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If `num_features` is less than 1. + ValueError: If `momentum` is not in range [0, 1]. + ValueError: If `data_format` is neither 'NHWC' not 'NCHW'. + ValueError: if 'inp' is not a Tensor of 5 dimensions. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.complex import BatchNorm2d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32, 32)).astype(np.float32)) + >>> bn = BatchNorm2d(64) + >>> y = bn(u) + >>> print(y.shape) + (2, 8, 64, 32, 32) + """ + + def __init__(self, + num_features: int, + eps: float = 1e-5, + momentum: float = 0.9, + affine: bool = True, + gamma_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + beta_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_mean_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_var_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + use_batch_statistics: bool = True, + data_format='NCHW') -> None: + super(BatchNorm2d, self).__init__(HBatchNorm2d, + BatchNormImpl, + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + gamma_init=gamma_init, + beta_init=beta_init, + moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, + use_batch_statistics=use_batch_statistics, + data_format=data_format) + + +class BatchNorm3d(_UniformOperator): + r""" + The complex-valued Batch Normalization layer over a second-order complex input of six dimensions, including + three spatial dimensions. + + This layer applies Batch Normalization over a complex input to reduce internal covariate shift. + Batch Normalization is widely used in convolutional networks. It rescales and recenters the feature + using a mini-batch of data and the learned parameters which can be described by the following formula: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + out = \frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}\\ + \text{Re(\hat{out})} = \text{Re(out)} * \text{Re(\gamma)} + - \text{Im(out)} * \text{Im(\gamma)} + \text{Re(\beta)}\\ + \text{Im(\hat{out})} = \text{Re(out)} * \text{Im(\gamma)} + + \text{Im(out)} * \text{Re(\gamma)} + \text{Im(\beta)}, + \end{align} + + where :math:`inp` is the complex input tensors, :math:`\mathrm{E}[inp]` is the arithmetic mean of the input tensor + over the batch dimension, :math:`\mathrm{Var}[inp]` is the statistical variance of the input tensor over spatial + dimensions, based on the complex norm :math:`\|x+iy\|=\sqrt{x^2+y^2}`, :math:`\gamma` and :math:`\beta` are complex + learnable parameters representing the scale and shift coefficients respectively, and :math:`\delta` is a small + positive constant, which is needed to avoid division by zero in case statistical variance is close to zero. + :math:`\text{Re(...)}` and :math:`\text{Im(...)}` are respectively real and imaginary parts of the complex-valued + expression inside the parentheses. + + Args: + num_features (int): The number of features in the input space. + eps (float): A small positive threshold, which is needed to avoid division by zero. Default: :math:`10^{-5}` + momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. + Default: 0.9. + affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + use_batch_statistics (bool): + + - If True, use the mean value and variance value of current batch data and track running mean + and running variance. + - If False, use the mean value and variance value of specified value, and not track statistical value. + - If None, the use_batch_statistics is automatically set to True or False according to the training + and evaluation mode. During training, the parameter is set to True, and during evaluation, the + parameter is set to False. Default: None. + data_format (str): The optional value for data format. Only 'NCDHW' format is supported as of now. + Default: 'NCDHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, D, H, W)`. '2' denotes that the input tensor belongs + to the complex domain and has got a real and an imaginary parts. The `num_features` in `Args` has to be equal + to :math:`C` in `Inputs`. + + Outputs: + Tensor, the normalized, scaled, offset tensor of the same shape :math:`(2, N, C, D, H, W)` as :math:`u`. + + Raises: + TypeError: If `num_features` is not an int. + TypeError: If `eps` is not a float. + ValueError: If `num_features` is less than 1. + ValueError: If `momentum` is not in range [0, 1]. + ValueError: If `data_format` is not 'NCDHW'. + ValueError: if 'inp' is not a Tensor of 6 dimensions. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.complex import BatchNorm3d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32, 32, 32)).astype(np.float32)) + >>> bn = BatchNorm3d(64) + >>> y = bn(u) + >>> print(y.shape) + (2, 8, 64, 32, 32, 32) + """ + + def __init__(self, + num_features: int, + eps: float = 1e-5, + momentum: float = 0.9, + affine: bool = True, + gamma_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + beta_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_mean_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_var_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + use_batch_statistics: bool = True, + data_format='NCDHW') -> None: + super(BatchNorm3d, self).__init__(HBatchNorm3d, + BatchNormImpl, + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + gamma_init=gamma_init, + beta_init=beta_init, + moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, + use_batch_statistics=use_batch_statistics, + data_format=data_format) + + +class Dense(_UniformOperator): + r""" + The complex-valued dense connected layer. + + Applies dense connected layer for the complex-valued input. If use_karatsuba is False, this layer implements + the operation as: + + .. math:: + \text{Re(out)} = \text{Re(inp)} * \text{Re(kernel)} - \text{Im(inp)} * \text{Im(kernel)} + \text{Re(bias)} + \text{Im(out)} = \text{Re(inp)} * \text{Im(kernel)} + \text{Im(inp)} * \text{Re(kernel)} + \text{Im(bias)}, + + And, if use_karatsuba is True then: + + .. math:: + \begin{align} + \text{L1} = \text{Re(inp)} * \text{Re(kernel)}\\ + \text{L2} = \text{Im(inp)} * \text{Im(kernel)}\\ + \text{L3} = (\text{Re(inp)} + \text{Im(inp)}) * (\text{Re(kernel)} + \text{Im(kernel)})\\ + \text{Re(out)} = L1 - L2 + \text{Re(bias)}\\ + \text{Im(out)} = L3 - L1 - L2 + \text{Im(bias)}, + \end{align} + + where :math:`inp` is the complex input tensors, :math:`\text{kernel}` is a complex weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a complex bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{Re(...)}` and + :math:`\text{Im(...)}` are respectively real and imaginary parts of the complex-valued expression inside + the parentheses. + + Both the operations with and without using Karatsuba's algorithm are mathematically identical, but Karatsuba's + method requires three matrix multiplications instead of four, at the cost of increased number of additions and + subtractions. Hence, it can be effective in terms of time consumption, but only if input tensors and kernels are + of big size. + + Args: + in_channels (int): The number of channels in the input space. + out_channels (int): The number of channels in the output space. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is + same as `inp`. The values of str refer to the function `initializer`. Default: 'zeros'. + has_bias (bool): Specifies whether the layer uses a bias vector. Default: True. + use_karatsuba (bool): Specifies whether the layer uses Karatsuba's algorithm for complex-valued multiplication. + Default: False + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, *, ..., *, in\_channels)`, with float16 or float32 data type, + or :math:`(*, ..., *, in\_channels)`, with complex64 data type. In the former case '2' denotes that the input + tensor belongs to the complex domain and has got a real and an imaginary parts. The `in_channels` in `Args` + has to be equal to :math:`in\_channels` in `Inputs`. The count of mediator dimensions denoted by '*' + is arbitrary but must be at least one. + + Outputs: + Tensor of the same data type as 'inp' and of shape :math:`(2, *, ..., *, out\_channels)`, with float16 or + float32 data type, or :math:`(*, ..., *, out\_channels)`, with complex64 data type. The count of mediator + dimensions is the same as one in 'Inputs'. + + Raises: + TypeError: If `in_channels` or `out_channels` is not an int. + TypeError: If `has_bias` is not a bool. + TypeError: If any two of `inp`, `weight_init` and `bias_init` are Tensors of different data type. + ValueError: If length of shape of `weight_init` is not equal to 3, + or shape[0] of 'weight_init' is not equal to 2, + or shape[1] of `weight_init` is not equal to `out_channels`, + or shape[2] of `weight_init` is not equal to `in_channels`. + ValueError: If length of shape of `bias_init` is not equal to 2, + or shape[0] of 'bias_init' is not equal to 2, + or shape[1] of `bias_init` is not equal to `out_channels`. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.complex import Dense + >>> from mindspore import Tensor + >>> w = Tensor(np.random.random((2, 7, 5)).astype(np.float32)) + >>> b = Tensor(np.random.random((2, 7)).astype(np.float32)) + >>> net = Dense(in_channels=5, out_channels=7, weight_init=w, bias_init=b, has_bias=True) + >>> z = Tensor(np.random.random((2, 34, 1, 5)).astype(np.float32)) + >>> out = net(z) + >>> print(out.shape) + (2, 34, 1, 7) + """ + + def __init__(self, + in_channels: int, + out_channels: int, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + has_bias: bool = True, + use_karatsuba: bool = False) -> None: + if use_karatsuba: + super(Dense, self).__init__(HDense, + KaratsubaDenseImpl, + in_channels=in_channels, + out_channels=out_channels, + weight_init=weight_init, + bias_init=bias_init, + has_bias=has_bias) + else: + super(Dense, self).__init__(HDense, + DenseImpl, + in_channels=in_channels, + out_channels=out_channels, + weight_init=weight_init, + bias_init=bias_init, + has_bias=has_bias) diff --git a/mindspore/python/mindspore/hypercomplex/complex/complex_relu.py b/mindspore/python/mindspore/hypercomplex/complex/complex_relu.py new file mode 100644 index 00000000000..3a7cf95a46f --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/complex/complex_relu.py @@ -0,0 +1,62 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""complex ReLU implementation""" +import mindspore +from mindspore import nn, Tensor +from mindspore.ops import operations as P +from mindspore.hypercomplex.utils import get_x_and_y as get_real_and_imag, to_2channel as to_complex + + +class ReLU(nn.Cell): + r""" + Rectified Linear Unit activation function for complex-valued input. + + Applies ReLU activation layer for the complex-valued input. This layer applies the element-wise + :math:`\max(0, x)` for both real and imaginary parts of the input tensor independently: + + .. math:: + \begin{align} + \text{Re(out)} = (Re(inp))^+ = \max(0, Re(inp))\\ + \text{Im(out)} = (Im(inp))^+ = \max(0, Im(inp)), + \end{align} + + Inputs: + - **inp** (Tensor) - The input of ReLU is a Tensor of shape (2, *, ..., *), with float16 or float32 data type, + or (*, ..., *), with complex64 data type. + + Outputs: + Tensor, with the same data type and shape as the `inp`. + + Raises: + TypeError: If dtype of `inp` is not float16, float32, or complex64. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self): + """Initialize ReLU.""" + super(ReLU, self).__init__() + self.relu = P.ReLU() + + def construct(self, u: Tensor) -> Tensor: + if u.dtype == mindspore.complex64: + real, imag = get_real_and_imag(u) + real = self.relu(real) + imag = self.relu(imag) + out = to_complex(real, imag, u.dtype) + else: + out = self.relu(u) + return out diff --git a/mindspore/python/mindspore/hypercomplex/double/__init__.py b/mindspore/python/mindspore/hypercomplex/double/__init__.py new file mode 100644 index 00000000000..ee534519089 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/double/__init__.py @@ -0,0 +1,23 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Double Operators""" +from mindspore.hypercomplex.double.double_operators import Conv1d, Conv2d, Conv3d +from mindspore.hypercomplex.double.double_operators import BatchNorm1d, BatchNorm2d, BatchNorm3d +from mindspore.hypercomplex.double.double_operators import Dense +from mindspore.hypercomplex.double.double_operators import ReLU + +from mindspore.hypercomplex.hypercomplex.hc_pool import MaxPool1d, MaxPool2d, \ + AvgPool1d, AvgPool2d, AdaptiveAvgPool1d, AdaptiveAvgPool2d, \ + AdaptiveAvgPool3d, AdaptiveMaxPool1d, AdaptiveMaxPool2d diff --git a/mindspore/python/mindspore/hypercomplex/double/_double_bn_impl.py b/mindspore/python/mindspore/hypercomplex/double/_double_bn_impl.py new file mode 100644 index 00000000000..55e2e4bece8 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/double/_double_bn_impl.py @@ -0,0 +1,256 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Double BatchNorm implementation""" +from typing import Tuple + +import mindspore.ops as ops +from mindspore.common.tensor import Tensor +from mindspore.hypercomplex.hypercomplex._hc_bn_impl import _BaseBatchNormImpl as HCBatchNormImpl +from mindspore.hypercomplex.utils import get_x_and_y + +get_real_and_double = get_x_and_y +get_u1_and_u2 = get_x_and_y + + +class _BatchNormImpl(HCBatchNormImpl): + r""" + The implementor class of the Batch Normalization layer for double numbers in regular representation. + + Implements the functionality specific to double numbers and needed by the 'BatchNorm' class. This includes: + getting the norm of double number, applying scaling and shift to a double-valued tensor, and updating the running + mean and variance, which are used during inference. + + Args: + affine (bool) - A bool value. When set to True, gamma and beta can be learned. + use_batch_statistics (bool): If true, use the mean value and variance value of current batch data. If false, + use the mean value and variance value of specified value. If None, the training process will use the mean + and variance of current batch data and track the running mean and variance, the evaluation process will use + the running mean and variance. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. + num_features (int): The number of features in the input space. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def scale_and_shift(self, + u_x: Tensor, + u_y: Tensor, + scale_x: Tensor, + scale_y: Tensor, + shift_x: Tensor, + shift_y: Tensor) -> Tuple[Tensor, Tensor]: + r""" + Applies double scaling and shift to an input tensor in regular representation. + + This function implements the operation as: + + .. math:: + \begin{align} + \text{Re(out)} = \text{Re(inp)} * \text{Re(scale)} + \text{Db(inp)} * \text{Db(scale)} + \text{Re(shift)}\\ + \text{Db(out)} = \text{Re(inp)} * \text{Db(scale)} + \text{Db(inp)} * \text{Re(scale)} + \text{Db(shift)}, + \end{align} + + where :math:`inp` is the double input tensors, :math:`scale` and :math:`shift` are double parameters + representing the scaling and shift coefficients respectively. :math:`\text{Re(...)}` and :math:`\text{Db(...)}` + are respectively real and double parts of the double-valued expression inside the parentheses. + + Args: + u_x (Tensor): A tensor of shape (C,), which represents the real part of the normalized inputs. + u_y (Tensor): A tensor of shape (C,), which represents the double part of the normalized inputs. + scale_x (Tensor): A tensor of shape (C,), which represents the real part of the scaling vector. + scale_y (Tensor): A tensor of shape (C,), which represents the double part of the scaling vector. + shift_x (Tensor): A tensor of shape (C,), which represents the real part of the bias vector. + shift_y (Tensor): A tensor of shape (C,), which represents the double part of the bias vector. + + Returns: + Tuple of two tensors of shape (C,), which contains the real and the double parts of rescaled and + recentered inputs. + """ + out_x = u_x * scale_x + u_y * scale_y + shift_x + out_y = u_x * scale_y + u_y * scale_x + shift_y + return out_x, out_y + + def get_norm(self, u: Tensor) -> Tensor: + r""" + Calculates norm of double elements of an input tensor in regular representation. + + Norm is a non-negative real number that is a characteristic of 'magnitude' of that number, i.e. how far away it + is from zero. The function implements the operation as: + + .. math:: + \text{out} = |Re(inp)| + |Db(inp)|, + + where :math:`inp` is the double input tensors, :math:`\text{Re(...)}` and :math:`\text{Db(...)}` + are respectively real and double parts of the double-valued expression inside the parentheses. + + Args: + u (Tensor): Tensor of shape (2, *, ..., *). '2' denotes that the input tensor belongs to the double domain + and has two components. + + Returns: + Tensor of shape (*, ..., *). The count and size of dimensions of the output tensor are the same ones as in + the input tensor, but without the very first dimension because the output tensor is real-valued. + """ + u_r, u_d = get_real_and_double(u) + abs_op = ops.Abs() + out = abs_op(u_r) + abs_op(u_d) + return out + + def get_square_norm(self, u: Tensor) -> Tensor: + r""" + Calculates element-wise squared norm of double elements of an input tensor in regular representation. + + Norm is a non-negative real number that is a characteristic of 'magnitude' of that number, i.e. how far away it + is from zero. The function implements the operation as: + + .. math:: + \text{out} = \left(|Re(inp)| + |Db(inp)|\right)^2, + + where :math:`inp` is the double input tensors, :math:`\text{Re(...)}` and :math:`\text{Du(...)}` + are respectively real and double parts of the double-valued expression inside the parentheses. + + Args: + u (Tensor): Tensor of shape (2, *, ..., *). '2' denotes that the input tensor belongs to the double domain + and has a real and a double parts. + + Returns: + Tensor of shape (*, ..., *). The count and size of dimensions of the output tensor are the same ones as in + the input tensor, but without the very first dimension because the output tensor is real-valued. + """ + norm = self.get_norm(u) + out = norm ** 2 + return out + + +class _J1J2BatchNormImpl(HCBatchNormImpl): + r""" + The implementor class of the Batch Normalization layer for double numbers in diagonal representation. + + Implements the functionality specific to double numbers and needed by the 'BatchNorm' class. This includes: + getting the norm of double number, applying scaling and shift to a double-valued tensor, and updating the running + mean and variance, which are used during inference. + + Args: + affine (bool) - A bool value. When set to True, gamma and beta can be learned. + use_batch_statistics (bool): If true, use the mean value and variance value of current batch data. If false, + use the mean value and variance value of specified value. If None, the training process will use the mean + and variance of current batch data and track the running mean and variance, the evaluation process will use + the running mean and variance. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. + num_features (int): The number of features in the input space. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def scale_and_shift(self, + u_x: Tensor, + u_y: Tensor, + scale_x: Tensor, + scale_y: Tensor, + shift_x: Tensor, + shift_y: Tensor) -> Tuple[Tensor, Tensor]: + r""" + Applies double scaling and shift to an input tensor in diagonal representation. + + This function implements the operation as: + + .. math:: + \begin{align} + \text{Re(out)} = \text{X(inp)} * \text{Y(scale)} + \text{X(shift)}\\ + \text{Db(out)} = \text{X(inp)} * \text{Y(scale)} + \text{Y(inp)}, + \end{align} + + where :math:`inp` is the double input tensors in diagonal form, :math:`scale` and :math:`shift` are + double parameters representing the scaling and shift coefficients respectively. :math:`\text{X(...)}` + and :math:`\text{Y(...)}` are respectively the first and the second components of the double-valued + expression inside the parentheses. + + Args: + u_x (Tensor): A tensor of shape (C,), which represents the first part of the normalized inputs. + u_y (Tensor): A tensor of shape (C,), which represents the second part of the normalized inputs. + scale_x (Tensor): A tensor of shape (C,), which represents the first part of the scaling vector. + scale_y (Tensor): A tensor of shape (C,), which represents the second part of the scaling vector. + shift_x (Tensor): A tensor of shape (C,), which represents the first part of the bias vector. + shift_y (Tensor): A tensor of shape (C,), which represents the second part of the bias vector. + + Returns: + Tuple of two tensors of shape (C,), which contains the first and the second parts of rescaled and + recentered inputs in the diagonal representation. + """ + out_x = u_x * scale_x + shift_x + out_y = u_y * scale_y + shift_y + return out_x, out_y + + def get_norm(self, u: Tensor) -> Tensor: + r""" + Calculates norm of double elements of an input tensor in diagonal representation. + + Norm is a non-negative real number that is a characteristic of 'magnitude' of that number, i.e. how far away it + is from zero. The function implements the operation as: + + .. math:: + \text{out} = \text{max}(|X(inp)|, |Y(inp)|), + + where :math:`inp` is the double input tensors in diagonal form, :math:`\text{max}` is the maximum value of its + arguments. :math:`\text{X(...)}` and :math:`\text{Y(...)}` are respectively the first and the second components + of the double-valued expression inside the parentheses. + + Args: + u (Tensor): Tensor of shape (2, *, ..., *). '2' denotes that the input tensor belongs to the double domain + and has two components. + + Returns: + Tensor of shape (*, ..., *). The count and size of dimensions of the output tensor are the same ones as in + the input tensor, but without the very first dimension because the output tensor is real-valued. + """ + u_1, u_2 = get_u1_and_u2(u) + abs_op = ops.Abs() + max_op = ops.Maximum() + out = max_op(abs_op(u_1), abs_op(u_2)) + return out + + def get_square_norm(self, u: Tensor) -> Tensor: + r""" + Calculates element-wise squared norm of double elements of an input tensor in diagonal representation. + + Norm is a non-negative real number that is a characteristic of 'magnitude' of that number, i.e. how far away it + is from zero. The function implements the operation as: + + .. math:: + \text{out} = \left(\text{max}(|X(inp)|, |Y(inp)|)\right)^2, + + where :math:`inp` is the double input tensors in diagonal form, :math:`\text{max}` is the maximum value of its + arguments. :math:`\text{X(...)}` and :math:`\text{Y(...)}` are respectively the first and the second components + of the double-valued expression inside the parentheses. + + Args: + u (Tensor): Tensor of shape (2, *, ..., *). '2' denotes that the input tensor belongs to the double domain + and has two components. + + Returns: + Tensor of shape (*, ..., *). The count and size of dimensions of the output tensor are the same ones as in + the input tensor, but without the very first dimension because the output tensor is real-valued. + """ + norm = self.get_norm(u) + out = norm ** 2 + return out diff --git a/mindspore/python/mindspore/hypercomplex/double/_double_conv_impl.py b/mindspore/python/mindspore/hypercomplex/double/_double_conv_impl.py new file mode 100644 index 00000000000..691c9ecdaec --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/double/_double_conv_impl.py @@ -0,0 +1,126 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Double convolution implementation""" +from typing import Callable, Tuple + +from mindspore.common.tensor import Tensor +from mindspore.hypercomplex.hypercomplex._hc_conv_impl import _BaseConvImpl as BaseConvImpl + + +class _ConvImpl(BaseConvImpl): + r""" + The implementor class of the convolution layer for double numbers in regular representation. + + Applies double-valued convolution transformation. This layer implements the operation as: + + .. math:: + \begin{align} + \text{Re(ccor)} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Db(kernel)}, \text{Db(inp)})\\ + \text{Du(ccor)} = \text{ccor}(\text{Db(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Re(kernel)}, \text{Db(inp)}), + \end{align} + + where and :math:`ccor` is the real-valued `cross-correlation `_, + :math:`inp` is the double input tensors, :math:`\text{kernel}` is a double weight matrix with the same + data type as the :math:`inp` created by the layer. :math:`\text{Re(...)}` and :math:`\text{Db(...)}` + are respectively the first and the second parts of the double-valued expression inside the parentheses in the + regular form. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and double parts of the kernel. + + Inputs: + - **conv_op** (Callable) - the function of the real-valued convolution to be used for decomposition + of the double convolution transformation. For example, mindspore.ops.operations.Conv2D(...) may be passed + - **real** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the first part of the input. + - **double** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the second part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(N, C_{out}, *, ..., *)` or :math:`(N, *, ..., *, C_{out})`, + which represents both the first and the second parts of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def construct(self, + conv_op: Callable, + real: Tensor, + double: Tensor) -> Tuple[Tensor, Tensor]: + + u1 = real + double + u2 = real - double + + out1 = conv_op(u1, self.weight_x) + out2 = conv_op(u2, self.weight_y) + + out_r = out1 + out2 + out_d = out1 - out2 + return out_r, out_d + + +class _J1J2ConvImpl(BaseConvImpl): + r""" + The implementor class of the convolution layer for double numbers in diagonal representation. + + Applies double-valued convolution transformation. This layer implements the operation as: + + .. math:: + \begin{align} + \text{Re(ccor)} = \text{ccor}(\text{X(kernel)}, \text{X(inp)}) + \text{X(bias)}\\ + \text{Du(ccor)} = \text{ccor}(\text{Y(kernel)}, \text{Y(inp)}) + \text{Y(bias)}, + \end{align} + + where and :math:`ccor` is the real-valued `cross-correlation `_, + :math:`inp` is the double input tensors, :math:`\text{kernel}` is a double weight matrix with the same + data type as the :math:`inp` created by the layer. :math:`\text{X(...)}` and :math:`\text{Y(...)}` + are respectively the first and the second parts of the double-valued expression inside the parentheses in the + diagonal form. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and double parts of the kernel. + + Inputs: + - **conv_op** (Callable) - the function of the real-valued convolution to be used for decomposition + of the double convolution transformation. For example, mindspore.ops.operations.Conv2D(...) may be passed + - **u1** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the first part of the input. + - **u2** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the second part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(N, C_{out}, *, ..., *)` or :math:`(N, *, ..., *, C_{out})`, + which represents both the first and the second parts of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def construct(self, + conv_op: Callable, + u1: Tensor, + u2: Tensor) -> Tuple[Tensor, Tensor]: + + out1 = conv_op(u1, self.weight_x) + out2 = conv_op(u2, self.weight_y) + + return out1, out2 diff --git a/mindspore/python/mindspore/hypercomplex/double/_double_dense_impl.py b/mindspore/python/mindspore/hypercomplex/double/_double_dense_impl.py new file mode 100644 index 00000000000..fb15e05c594 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/double/_double_dense_impl.py @@ -0,0 +1,123 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Double dense implementation""" +from typing import Callable, Tuple + +from mindspore.common.tensor import Tensor +from mindspore.hypercomplex.hypercomplex._hc_dense_impl import _BaseDenseImpl as BaseDenseImpl + + +class _DenseImpl(BaseDenseImpl): + r""" + The implementor class of the dense connected layer for double numbers in normal representation. + + Applies double-valued matrix multiplication for dense connected layer. This layer implements the operation as: + + .. math:: + \begin{align} + \text{X(out)} = \text{X(inp)} * \text{X(kernel)} + \text{Y(inp)} * \text{Y(kernel)}\\ + \text{Y(out)} = \text{X(inp)} * \text{Y(kernel)} + \text{Y(inp)} * \text{X(kernel)}, + \end{align} + + where :math:`inp` is the double input tensors, :math:`\text{kernel}` is a double weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a double bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{X(...)}` and + :math:`\text{Y(...)}` are respectively the first and the second parts of the double-valued expression + inside the parentheses. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and imaginary parts of the kernel. + **factory_kwargs (dict): Extra parameters which may be needed by specific subclasses. + + Inputs: + - **matmul_op** (Callable) - the function of the real-valued matrix multiplication to be used for decomposition + of the complex linear transformation. Usually, mindspore.ops.operations.MatMul(...) is passed + - **real** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the real part of the input. + - **double** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the imaginary part of the + input. + + Outputs: + Tuple of two tensors, each of shape :math:`(*, out\_channels)`, which represents the real and the imaginary + parts of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def construct(self, + matmul_op: Callable, + real: Tensor, + double: Tensor) -> Tuple[Tensor, Tensor]: + u1 = real + double + u2 = real - double + + out1 = matmul_op(u1, self.weight_x) + out2 = matmul_op(u2, self.weight_y) + + out_r = out1 + out2 + out_d = out1 - out2 + + return out_r, out_d + + +class _J1J2DenseImpl(BaseDenseImpl): + r""" + The implementor class of the dense connected layer for double numbers in the diagonal representation. + + Applies double-valued matrix multiplication for dense connected layer. This layer implements the operation as: + + .. math:: + \begin{align} + \text{X(out)} = \text{X(inp)} * \text{X(kernel)}\\ + \text{Y(out)} = \text{Y(inp)} * \text{Y(kernel)}, + \end{align} + + where :math:`inp` is the double input tensors in the diagonal form, :math:`\text{kernel}` is a double weight matrix + in the diagonal form with the same data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is + a double bias vector in the diagonal form with the same data type as the :math:`inp` created by the layer + (only if has_bias is True). :math:`\text{X(...)}` and :math:`\text{Y(...)}` are respectively the first and the + second parts of the double-valued expression inside the parentheses. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and imaginary parts of the kernel. + **factory_kwargs (dict): Extra parameters which may be needed by specific subclasses. + + Inputs: + - **matmul_op** (Callable) - the function of the real-valued matrix multiplication to be used for decomposition + of the complex linear transformation. Usually, mindspore.ops.operations.MatMul(...) is passed + - **u1** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the real part of the input. + - **u2** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the imaginary part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(*, out\_channels)`, which represents the real and the imaginary + parts of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def construct(self, + matmul_op: Callable, + u1: Tensor, + u2: Tensor) -> Tuple[Tensor, Tensor]: + + out1 = matmul_op(u1, self.weight_x) + out2 = matmul_op(u2, self.weight_y) + + return out1, out2 diff --git a/mindspore/python/mindspore/hypercomplex/double/double_operators.py b/mindspore/python/mindspore/hypercomplex/double/double_operators.py new file mode 100644 index 00000000000..b29ac809ca1 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/double/double_operators.py @@ -0,0 +1,1331 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Double operators""" +import numbers +from typing import Union + +from mindspore import nn +from mindspore.common.initializer import Initializer +from mindspore.common.tensor import Tensor +# Batch Normalization +from mindspore.hypercomplex.hypercomplex.hc_bn import BatchNorm1d as HBatchNorm1d, \ + BatchNorm2d as HBatchNorm2d, BatchNorm3d as HBatchNorm3d +from mindspore.hypercomplex.double._double_bn_impl import _BatchNormImpl as BatchNormImpl, \ + _J1J2BatchNormImpl as J1J2BatchNormImpl +# Convolution +from mindspore.hypercomplex.hypercomplex.hc_conv import Conv1d as HConv1d, \ + Conv2d as HConv2d, Conv3d as HConv3d +from mindspore.hypercomplex.double._double_conv_impl import _ConvImpl as ConvImpl, \ + _J1J2ConvImpl as J1J2ConvImpl +# Dense +from mindspore.hypercomplex.hypercomplex.hc_dense import Dense as HDense +from mindspore.hypercomplex.double._double_dense_impl import _DenseImpl as DenseImpl, \ + _J1J2DenseImpl as J1J2DenseImpl +from mindspore.hypercomplex.hypercomplex.uniform_operator import _UniformOperator +# ReLU +from mindspore.hypercomplex.double.double_relu import J1J2ReLU + +from mindspore.hypercomplex.utils import _size_1_t, _size_2_t, _size_3_t + + +class Conv2d(_UniformOperator): + r""" + 2D convolution layer on the double-valued input. + + Calculates the 2D convolution on the input tensor which is typically of shape + :math:`(2, N, C_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C_{in}` is a number of channels, + :math:`H_{in}, W_{in}` are the height and width of the feature layer respectively. The formula is defined as: + + .. math:: + + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \text{hccor}({\text{weight}(C_{\text{out}_j}, k), \text{inp}(N_i, k)}) + + where :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of + the output and :math:`j` is in the range of :math:`[0, C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)` + is a convolution kernel slice with shape :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`, + where :math:`\text{kernel_size[0]}` and :math:`\text{kernel_size[1]}` are the height and width of the convolution + kernel respectively. :math:`\text{bias}` is the bias parameter and :math:`\text{inp}` is the input tensor. + In this case, `data_format` of the input tensor is 'NCHW' and the shape of full convolution kernel is + :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`, + where `group` is the number of groups to split the input `inp` in the channel dDuension. If `data_format` of the + input tensor is 'NHWC', the shape of full convolution kernel will be + :math:`(C_{out}, \text{kernel_size[0]}, \text{kernel_size[1]}), C_{in} / \text{group}`. + :math:`hccor` is the double-valued `cross-correlation `_. + If has_diagonal_form is False, this implies the operation as: + + .. math:: + \begin{align} + \text{Re(ccor)} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Db(kernel)}, \text{Db(inp)}) + \text{Re(bias)}\\ + \text{Du(ccor)} = \text{ccor}(\text{Db(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Re(kernel)}, \text{Db(inp)}) + \text{Db(bias)}, + \end{align} + + And, if has_diagonal_form is True then: + + .. math:: + \begin{align} + \text{Re(ccor)} = \text{ccor}(\text{X(kernel)}, \text{X(inp)}) + \text{X(bias)}\\ + \text{Du(ccor)} = \text{ccor}(\text{Y(kernel)}, \text{Y(inp)}) + \text{Y(bias)}, + \end{align} + + where and :math:`ccor` is the real-valued `cross-correlation `_, + :math:`inp` is the double input tensors, :math:`\text{kernel}` is a double weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a double bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{Re(...)}` and + :math:`\text{Db(...)}` are respectively the first and the second parts of the double-valued expression inside + the parentheses in the regular form, and :math:`\text{X(...)}` and :math:`\text{Y(...)}` stand for the same + functors, but with the parameters in the diagonal form. + + The convolution layer is notably more effective when double numbers are given in the diagonal form, because it then + requires only two convolutions instead of four. For some other kinds of layers (e.g. activation functions) it can + be otherwise. It is always possible to transfer to and from the diagonal representation: + + .. math:: + \begin{align} + \text{X(inp)} = \text{Re(inp)} + \text{Db(inp)}\\ + \text{Y(inp)} = \text{Re(inp)} - \text{Db(inp)}\\ + \text{Re(inp)} = 0.5 * (\text{X(inp)} + \text{Y(inp)})\\ + \text{Db(inp)} = 0.5 * (\text{X(inp)} - \text{Y(inp)}) + \end{align} + + However, every conversion consumes time resources, so it is a tradeoff which representation to choose. + For more details, please refers to the paper `Gradient Based Learning Applied to Document + Recognition `_. + + Note: + On Ascend platform, only group convolution in depthwise convolution scenarios is supported. + That is, when `group > 1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. + + 'NCHW' format is supported only with GPU target device as of now. + + Args: + in_channels (int): The channel number of the input tensor of the Conv2d layer. + out_channels (int): The channel number of the output tensor of the Conv2d layer. + kernel_size (Union[int, tuple[int]]): Specifies the height and width of the 2D convolution kernel. + The data type is an integer or a tuple of two integers. An integer represents the height + and width of the convolution kernel. A tuple of two integers represents the height + and width of the convolution kernel respectively. + stride (Union[int, tuple[int]]): The movement stride of the 2D convolution kernel. + The data type is an integer or a tuple of two integers. An integer represents the movement step size + in both height and width directions. A tuple of two integers represents the movement step size in the + height and width directions respectively. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (Union[int, tuple[int]]): The number of padding on the height and width directions of the input. + The data type is an integer or a tuple of four integers. If `padding` is an integer, + then the top, bottom, left, and right padding are all equal to `padding`. + If `padding` is a tuple of 4 integers, then the top, bottom, left, and right padding + is equal to `padding[0]`, `padding[1]`, `padding[2]`, and `padding[3]` respectively. + The value should be greater than or equal to 0. Default: 0. + dilation (Union[int, tuple[int]]): Dilation size of 2D convolution kernel. + The data type is an integer or a tuple of two integers. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` on the height and width directions is in range of [1, H] + and [1, W] respectively. Default: 1. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. If the group is equal to `in_channels` and `out_channels`, + this 2D convolution layer also can be called 2D depthwise convolution layer. Default: 1. + has_bias (bool): Whether the Conv2d layer has a bias parameter. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. + Default: 'NCHW'. + has_diagonal_form (bool): Specifies whether the input tensor is provided in the diagonal form. Default: False. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, H_{in}, W_{in})` \ + or :math:`(2, N, H_{in}, W_{in}, C_{in})`. + + Outputs: + Tensor of shape :math:`(2, N, C_{out}, H_{out}, W_{out})` or :math:`(2, N, H_{out}, W_{out}, C_{out})`. + + pad_mode is 'same': + + .. math:: + \begin{array}{ll} \\ + H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[0]}}} \right \rceil \\ + W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[1]}}} \right \rceil \\ + \end{array} + + pad_mode is 'valid': + + .. math:: + \begin{array}{ll} \\ + H_{out} = \left \lceil{\frac{H_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) } + {\text{stride[0]}}} \right \rceil \\ + W_{out} = \left \lceil{\frac{W_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) } + {\text{stride[1]}}} \right \rceil \\ + \end{array} + + pad_mode is 'pad': + + .. math:: + \begin{array}{ll} \\ + H_{out} = \left \lfloor{\frac{H_{in} + padding[0] + padding[1] - (\text{kernel_size[0]} - 1) \times + \text{dilation[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} + padding[2] + padding[3] - (\text{kernel_size[1]} - 1) \times + \text{dilation[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\ + \end{array} + + Raises: + TypeError: If `in_channels`, `out_channels` or `group` is not an int. + TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int not a tuple. + ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + ValueError: If `padding` is a tuple whose length is not equal to 4. + ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0). + ValueError: If `data_format` is neither 'NCHW' not 'NHWC'. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.double import Conv2d + >>> from mindspore import Tensor + >>> w = Tensor(np.random.random((2, 128, 3, 7, 7)).astype(np.float32)) + >>> b = Tensor(np.random.random((2, 128)).astype(np.float32)) + >>> net = Conv2d( + >>> in_channels=3, out_channels=128, kernel_size=7, stride=2, padding=3, + >>> pad_mode='pad', weight_init=w, bias_init=b, has_bias=True + >>> ) + >>> inp = Tensor(np.random.random((2, 16, 3, 224, 224)).astype(np.float32)) + >>> out = net(inp) + >>> print(out.shape) + (2, 16, 128, 112, 112) + """ + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: _size_2_t, + stride: _size_2_t = 1, + pad_mode: str = 'same', + padding: _size_2_t = 0, + dilation: _size_2_t = 1, + group: int = 1, + has_bias: bool = False, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + data_format: str = 'NCHW', + has_diagonal_form: bool = False) -> None: + if has_diagonal_form: + super(Conv2d, self).__init__(HConv2d, + J1J2ConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init, + data_format=data_format) + else: + super(Conv2d, self).__init__(HConv2d, + ConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init, + data_format=data_format) + + +class Conv1d(_UniformOperator): + r""" + 1D convolution layer on the double-valued input. + + Calculates the 1D convolution on the input tensor which is typically of shape :math:`(2, N, C_{in}, L_{in})`, + where :math:`N` is batch size, :math:`C_{in}` is a number of channels and :math:`L_{in}` is a length of sequence. + The formula is defined as: + + .. math:: + + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{inp}(N_i, k)}) + + where :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of + the output and :math:`j` is in the range of :math:`[0, C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)` + is a convolution kernel slice with shape :math:`\text{kernel_size}`, where :math:`\text{kernel_size}` + is the width of the convolution kernel. :math:`\text{bias}` is the bias parameter, + and :math:`\text{inp}` is the input tensor. The shape of full convolution kernel is + :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size})`, + where `group` is the number of groups to split the input `inp` in the channel dimension. + :math:`ccor` is the double-valued `cross-correlation `_. + If has_diagonal_form is False, this implies the operation as: + + .. math:: + \begin{align} + \text{Re(ccor)} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Db(kernel)}, \text{Db(inp)}) + \text{Re(bias)}\\ + \text{Du(ccor)} = \text{ccor}(\text{Db(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Re(kernel)}, \text{Db(inp)}) + \text{Db(bias)}, + \end{align} + + And, if has_diagonal_form is True then: + + .. math:: + \begin{align} + \text{Re(ccor)} = \text{ccor}(\text{X(kernel)}, \text{X(inp)}) + \text{X(bias)}\\ + \text{Du(ccor)} = \text{ccor}(\text{Y(kernel)}, \text{Y(inp)}) + \text{Y(bias)}, + \end{align} + + where and :math:`ccor` is the real-valued `cross-correlation `_, + :math:`inp` is the double input tensors, :math:`\text{kernel}` is a double weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a double bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{Re(...)}` and + :math:`\text{Db(...)}` are respectively the first and the second parts of the double-valued expression inside the + parentheses in the regular form, and :math:`\text{X(...)}` and :math:`\text{Y(...)}` stand for the same functors, + but with the parameters in the diagonal form. + + The convolution layer is notably more effective when double numbers are given in the diagonal form, because it then + requires only two convolutions instead of four. For some other kinds of layers (e.g. activation functions) it can + be otherwise. It is always possible to transfer to and from the diagonal representation: + + .. math:: + \begin{align} + \text{X(inp)} = \text{Re(inp)} + \text{Db(inp)}\\ + \text{Y(inp)} = \text{Re(inp)} - \text{Db(inp)}\\ + \text{Re(inp)} = 0.5 * (\text{X(inp)} + \text{Y(inp)})\\ + \text{Db(inp)} = 0.5 * (\text{X(inp)} - \text{Y(inp)}) + \end{align} + + However, every conversion consumes time resources, so it is a tradeoff which representation to choose. + For more details, please refers to the paper `Gradient Based Learning Applied to Document + Recognition `_. + + Note: + On Ascend platform, only group convolution in depthwise convolution scenarios is supported. + That is, when `group > 1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. + + Args: + in_channels (int): The channel number of the input tensor of the Conv1d layer. + out_channels (int): The channel number of the output tensor of the Conv1d layer. + kernel_size (int): Specifies the width of the 1D convolution kernel. + stride (int): The movement stride of the 1D convolution kernel. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (int): The number of padding on both sides of input. + The value should be greater than or equal to 0. Default: 0. + dilation (int): Dilation size of 1D convolution kernel. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` is in range of [1, L]. Default: 1. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. Default: 1. + has_bias (bool): Whether the Conv1d layer has a bias parameter. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + has_diagonal_form (bool): Specifies whether the input tensor is provided in the diagonal form. Default: False. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, L_{in})`. + + Outputs: + Tensor of shape :math:`(2, N, C_{out}, L_{out})`. + + pad_mode is 'same': + + .. math:: + L_{out} = \left \lceil{\frac{L_{in}}{\text{stride}}} \right \rceil + + pad_mode is 'valid': + + .. math:: + L_{out} = \left \lceil{\frac{L_{in} - \text{dilation} \times (\text{kernel_size} - 1) } + {\text{stride}}} \right \rceil + + pad_mode is 'pad': + + .. math:: + L_{out} = \left \lfloor{\frac{L_{in} + 2 \times padding - (\text{kernel_size} - 1) \times + \text{dilation} - 1 }{\text{stride}} + 1} \right \rfloor + + Raises: + TypeError: If `in_channels`, `out_channels`, `kernel_size`, `stride`, `padding` or `dilation` is not an int. + ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.double import Conv1d + >>> from mindspore import Tensor + >>> w = Tensor(np.random.random((2, 16, 1, 6)).astype(np.float32)) + >>> b = Tensor(np.random.random((2, 16)).astype(np.float32)) + >>> net = Conv1d( + >>> in_channels=1, out_channels=16, kernel_size=6, stride=2, padding=2, + >>> pad_mode='pad', weight_init=w, bias_init=b, has_bias=True + >>> ) + >>> u = Tensor(np.random.random((2, 8, 1, 4096)).astype(np.float32)) + >>> out = net(u) + >>> print(out.shape) + (2, 8, 16, 2048) + """ + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: _size_1_t, + stride: _size_1_t = 1, + pad_mode: str = 'same', + padding: _size_1_t = 0, + dilation: _size_1_t = 1, + group: int = 1, + has_bias: bool = False, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + has_diagonal_form: bool = False) -> None: + if has_diagonal_form: + super(Conv1d, self).__init__(HConv1d, + J1J2ConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init) + else: + super(Conv1d, self).__init__(HConv1d, + ConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init) + + +class Conv3d(_UniformOperator): + r""" + 3D convolution layer on the dual-valued input. + + Calculates the 3D convolution on the input tensor which is typically of shape + :math:`(2, N, C_{in}, D_{in}, H_{in}, W_{in})`, + where :math:`N` is batch size, :math:`C_{in}` is a number of channels, + :math:`D_{in}, H_{in}, W_{in}` are the depth, height and width of the feature layer respectively. + The formula is defined as: + + .. math:: + + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{inp}(N_i, k)}) + + where :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of + the output and :math:`j` is in the range of :math:`[0,C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)` + is a convolution kernel slice with shape + :math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`, + where :math:`\text{kernel_size[0]}`, :math:`\text{kernel_size[1]}` and :math:`\text{kernel_size[2]}` are + the depth, height and width of the convolution kernel respectively. :math:`\text{bias}` is the bias parameter + and :math:`\text{inp}` is the input tensor. + The shape of full convolution kernel is + :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`, + where `group` is the number of groups to split the input `x` in the channel dimension. + :math:`hccor` is the dual-valued `cross-correlation `_. + If has_diagonal_form is False, this implies the operation as: + + .. math:: + \begin{align} + \text{Re(ccor)} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Db(kernel)}, \text{Db(inp)}) + \text{Re(bias)}\\ + \text{Du(ccor)} = \text{ccor}(\text{Db(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Re(kernel)}, \text{Db(inp)}) + \text{Db(bias)}, + \end{align} + + And, if has_diagonal_form is True then: + + .. math:: + \begin{align} + \text{Re(ccor)} = \text{ccor}(\text{X(kernel)}, \text{X(inp)}) + \text{X(bias)}\\ + \text{Du(ccor)} = \text{ccor}(\text{Y(kernel)}, \text{Y(inp)}) + \text{Y(bias)}, + \end{align} + + where and :math:`ccor` is the real-valued `cross-correlation `_, + :math:`inp` is the double input tensors, :math:`\text{kernel}` is a double weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a double bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{Re(...)}` and + :math:`\text{Db(...)}` are respectively the first and the second parts of the double-valued expression inside the + parentheses in the regular form, and :math:`\text{X(...)}` and :math:`\text{Y(...)}` stand for the same functors, + but with the parameters in the diagonal form. + + The convolution layer is notably more effective when double numbers are given in the diagonal form, because it then + requires only two convolutions instead of four. For some other kinds of layers (e.g. activation functions) it can + be otherwise. It is always possible to transfer to and from the diagonal representation: + + .. math:: + \begin{align} + \text{X(inp)} = \text{Re(inp)} + \text{Db(inp)}\\ + \text{Y(inp)} = \text{Re(inp)} - \text{Db(inp)}\\ + \text{Re(inp)} = 0.5 * (\text{X(inp)} + \text{Y(inp)})\\ + \text{Db(inp)} = 0.5 * (\text{X(inp)} - \text{Y(inp)}) + \end{align} + + However, every conversion consumes time resources, so it is a tradeoff which representation to choose. + For more details, please refers to the paper `Gradient Based Learning Applied to Document + Recognition `_. + + Note: + On Ascend platform, only group convolution in depthwise convolution scenarios is supported. + That is, when `group > 1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. + + Args: + in_channels (int): The channel number of the input tensor of the Conv3d layer. + out_channels (int): The channel number of the output tensor of the Conv3d layer. + kernel_size (Union[int, tuple[int]]): Specifies the depth, height and width of the 3D convolution kernel. + The data type is an integer or a tuple of three integers. An integer represents the depth, height + and width of the convolution kernel. A tuple of three integers represents the depth, height + and width of the convolution kernel respectively. + stride (Union[int, tuple[int]]): The movement stride of the 3D convolution kernel. + The data type is an integer or a tuple of three integers. An integer represents the movement step size + in depth, height and width directions. A tuple of three integers represents the movement step size + in the depth, height and width directions respectively. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (Union(int, tuple[int])): The number of padding on the depth, height and width directions of the input. + The data type is an integer or a tuple of six integers. If `padding` is an integer, + then the head, tail, top, bottom, left, and right padding are all equal to `padding`. + If `padding` is a tuple of six integers, then the head, tail, top, bottom, left, and right padding + is equal to `padding[0]`, `padding[1]`, `padding[2]`, `padding[3]`, `padding[4]` and `padding[5]` + respectively. The value should be greater than or equal to 0. Default: 0. + dilation (Union[int, tuple[int]]): Dilation size of 3D convolution kernel. + The data type is an integer or a tuple of three integers. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` on the depth, height and width directions is in range of + [1, D], [1, H] and [1, W] respectively. Default: 1. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. Default: 1. Only 1 is currently supported. + has_bias (bool): Whether the Conv3d layer has a bias parameter. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + data_format (str): The optional value for data format. Currently only support "NCDHW". + has_diagonal_form (bool): Specifies whether the input tensor is provided in the diagonal form. Default: False. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, D_{in}, H_{in}, W_{in})`. + Currently input data type only support float16 and float32. + + Outputs: + Tensor of shape is :math:`(2, N, C_{out}, D_{out}, H_{out}, W_{out})`. + + pad_mode is 'same': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lceil{\frac{D_{in}}{\text{stride[0]}}} \right \rceil \\ + H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[1]}}} \right \rceil \\ + W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\ + \end{array} + + + pad_mode is 'valid': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lfloor{\frac{D_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) } + {\text{stride[0]}} + 1} \right \rfloor \\ + H_{out} = \left \lfloor{\frac{H_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) } + {\text{stride[1]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} - \text{dilation[2]} \times (\text{kernel_size[2]} - 1) } + {\text{stride[2]}} + 1} \right \rfloor \\ + \end{array} + + pad_mode is 'pad': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lfloor{\frac{D_{in} + padding[0] + padding[1] - (\text{dilation[0]} - 1) \times + \text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\ + H_{out} = \left \lfloor{\frac{H_{in} + padding[2] + padding[3] - (\text{dilation[1]} - 1) \times + \text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} + padding[4] + padding[5] - (\text{dilation[2]} - 1) \times + \text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\ + \end{array} + + Raises: + TypeError: If `in_channels`, `out_channels` or `group` is not an int. + TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int nor a tuple. + ValueError: If `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + ValueError: If `padding` is a tuple whose length is not equal to 6. + ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0, 0, 0). + ValueError: If `data_format` is not 'NCDHW'. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.dual import Conv3d + >>> from mindspore import Tensor + >>> w = Tensor(np.random.random((2, 128, 3, 3, 3, 3)).astype(np.float32)) + >>> b = Tensor(np.random.random((2, 128)).astype(np.float32)) + >>> net = Conv3d( + >>> in_channels=3, out_channels=128, kernel_size=3, stride=1, padding=1, + >>> pad_mode='pad', weight_init=w, bias_init=b, has_bias=True + >>> ) + >>> u = Tensor(np.random.random((2, 64, 3, 32, 32, 32)).astype(np.float32)) + >>> out = net(u) + >>> print(out.shape) + (2, 64, 128, 32, 32, 32) + """ + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: _size_3_t, + stride: _size_3_t = 1, + pad_mode: str = 'same', + padding: _size_3_t = 0, + dilation: _size_3_t = 1, + group: int = 1, + has_bias: bool = False, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + data_format: str = 'NCDHW', + has_diagonal_form: bool = False) -> None: + if has_diagonal_form: + super(Conv3d, self).__init__(HConv3d, + J1J2ConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init, + data_format=data_format) + else: + super(Conv3d, self).__init__(HConv3d, + ConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init, + data_format=data_format) + + +class BatchNorm1d(_UniformOperator): + r""" + The double-valued Batch Normalization layer over a second-order double input of four dimensions, including + one spatial dimension, or three dimensions. + + This layer applies Batch Normalization over a double input to reduce internal covariate shift. + Batch Normalization is widely used in convolutional networks. It rescales and recenters the feature using a + mini-batch of data and the learned parameters. If has_diagonal_form is False, this can be described by the + following formula: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + out = \frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}\\ + \text{Re(\hat{out})} = \text{Re(out)} * \text{Re(\gamma)} + + \text{Db(out)} * \text{Db(\gamma)} + \text{Re(\beta)}\\ + \text{Db(\hat{out})} = \text{Re(out)} * \text{Db(\gamma)} + + \text{Db(out)} * \text{Re(\gamma)} + \text{Db(\beta)}. + \end{align} + + And, if has_diagonal_form is True then: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + out = \frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}\\ + \text{Re(\hat{out})} = \text{X(out)} * \text{X(\gamma)} + \text{X(\beta)}\\ + \text{Db(\hat{out})} = \text{Y(out)} * \text{Y(\gamma)} + \text{Y(\beta)}. + \end{align} + + where :math:`inp` is the double input tensors, :math:`\mathrm{E}[inp]` is the arithmetic mean of the input tensor + over the batch dimension, :math:`\mathrm{Var}[inp]` is the statistical variance of the input tensor over spatial + dimensions, based on the double norm :math:`\|x+jy\|=|x|+|y|` for the regular representation and + :math:`\|x+jy\|=\text{max}(|x|,|y|)` in the diagonal representation. :math:`\gamma` and :math:`\beta` are double + learnable parameters representing the scale and shift coefficients respectively, and :math:`\delta` is a small + positive constant, which is needed to avoid division by zero in case statistical variance is close to zero. + :math:`\text{Re(...)}` and :math:`\text{Db(...)}` are respectively the first and the second parts of the + double-valued expression inside the parentheses in the regular form, and :math:`\text{X(...)}` and + :math:`\text{Y(...)}` stand for the same functors, but with the parameters in the diagonal form. + + The Batch Normalization layer is more effective when double numbers are given in the diagonal form, because of + effective multiplications. For some other kinds of layers (e.g. activation functions) it can be otherwise. It is + always possible to transfer to and from the diagonal representation: + + .. math:: + \begin{align} + \text{X(inp)} = \text{Re(inp)} + \text{Db(inp)}\\ + \text{Y(inp)} = \text{Re(inp)} - \text{Db(inp)}\\ + \text{Re(inp)} = 0.5 * (\text{X(inp)} + \text{Y(inp)})\\ + \text{Db(inp)} = 0.5 * (\text{X(inp)} - \text{Y(inp)}) + \end{align} + + However, every conversion consumes time resources, so it is a tradeoff which representation to choose. + + Args: + num_features (int): The number of features in the input space. + eps (float): A small positive threshold, which is needed to avoid division by zero. Default: :math:`10^{-5}` + momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. + Default: 0.9. + affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + use_batch_statistics (bool): + + - If True, use the mean value and variance value of current batch data and track running mean + and running variance. + - If False, use the mean value and variance value of specified value, and not track statistical value. + - If None, the use_batch_statistics is automatically set to True or False according to the training + and evaluation mode. During training, the parameter is set to True, and during evaluation, the + parameter is set to False. Default: None. + has_diagonal_form (bool): Specifies whether the input tensor is provided in the diagonal form. Default: False. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, W)` or :math:`(2, N, C)`. + '2' denotes that the input tensor belongs to the double domain and has got a real and + a double parts. The `num_features` in `Args` has to be equal to :math:`C` in `Inputs`. + + Outputs: + Tensor, the normalized, scaled, offset tensor of the same shape as :math:`u`: + :math:`(2, N, C, W)` or :math:`(2, C, W)`. + + Raises: + TypeError: If `num_features` is not an int. + TypeError: If `eps` is not a float. + ValueError: If `num_features` is less than 1. + ValueError: If `momentum` is not in range [0, 1]. + ValueError: if 'inp' is not a Tensor of 3 or 4 dimensions. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.double import BatchNorm1d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32)).astype(np.float32)) + >>> bn = BatchNorm1d(64) + >>> y = bn(u) + >>> print(y.shape) + (2, 8, 64, 32) + """ + + def __init__(self, + num_features: int, + eps: float = 1e-5, + momentum: float = 0.9, + affine: bool = True, + gamma_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + beta_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_mean_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_var_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + use_batch_statistics: bool = True, + has_diagonal_form: bool = False) -> None: + if has_diagonal_form: + super(BatchNorm1d, self).__init__(HBatchNorm1d, + J1J2BatchNormImpl, + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + gamma_init=gamma_init, + beta_init=beta_init, + moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, + use_batch_statistics=use_batch_statistics) + else: + super(BatchNorm1d, self).__init__(HBatchNorm1d, + BatchNormImpl, + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + gamma_init=gamma_init, + beta_init=beta_init, + moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, + use_batch_statistics=use_batch_statistics) + + +class BatchNorm2d(_UniformOperator): + r""" + The double-valued Batch Normalization layer over a second-order double input of five dimensions, including + two spatial dimensions. + + This layer applies Batch Normalization over a double input to reduce internal covariate shift. + Batch Normalization is widely used in convolutional networks. It rescales and recenters the feature using a + mini-batch of data and the learned parameters. If has_diagonal_form is False, this can be described by the + following formula: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + out = \frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}\\ + \text{Re(\hat{out})} = \text{Re(out)} * \text{Re(\gamma)} + + \text{Db(out)} * \text{Db(\gamma)} + \text{Re(\beta)}\\ + \text{Db(\hat{out})} = \text{Re(out)} * \text{Db(\gamma)} + + \text{Db(out)} * \text{Re(\gamma)} + \text{Db(\beta)}. + \end{align} + + And, if has_diagonal_form is True then: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + out = \frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}\\ + \text{Re(\hat{out})} = \text{X(out)} * \text{X(\gamma)} + \text{X(\beta)}\\ + \text{Db(\hat{out})} = \text{Y(out)} * \text{Y(\gamma)} + \text{Y(\beta)}. + \end{align} + + where :math:`inp` is the double input tensors, :math:`\mathrm{E}[inp]` is the arithmetic mean of the input tensor + over the batch dimension, :math:`\mathrm{Var}[inp]` is the statistical variance of the input tensor over spatial + dimensions, based on the double norm :math:`\|x+jy\|=|x|+|y|` for the regular representation and + :math:`\|x+jy\|=\text{max}(|x|,|y|)` in the diagonal representation. :math:`\gamma` and :math:`\beta` are double + learnable parameters representing the scale and shift coefficients respectively, and :math:`\delta` is a small + positive constant, which is needed to avoid division by zero in case statistical variance is close to zero. + :math:`\text{Re(...)}` and :math:`\text{Db(...)}` are respectively the first and the second parts of the + double-valued expression inside the parentheses in the regular form, and :math:`\text{X(...)}` and + :math:`\text{Y(...)}` stand for the same functors, but with the parameters in the diagonal form. + + The Batch Normalization layer is more effective when double numbers are given in the diagonal form, because of + effective multiplications. For some other kinds of layers (e.g. activation functions) it can be otherwise. It is + always possible to transfer to and from the diagonal representation: + + .. math:: + \begin{align} + \text{X(inp)} = \text{Re(inp)} + \text{Db(inp)}\\ + \text{Y(inp)} = \text{Re(inp)} - \text{Db(inp)}\\ + \text{Re(inp)} = 0.5 * (\text{X(inp)} + \text{Y(inp)})\\ + \text{Db(inp)} = 0.5 * (\text{X(inp)} - \text{Y(inp)}) + \end{align} + + However, every conversion consumes time resources, so it is a tradeoff which representation to choose. + + Args: + num_features (int): The number of features in the input space. + eps (float): A small positive threshold, which is needed to avoid division by zero. Default: :math:`10^{-5}` + momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. + Default: 0.9. + affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + use_batch_statistics (bool): + + - If True, use the mean value and variance value of current batch data and track running mean + and running variance. + - If False, use the mean value and variance value of specified value, and not track statistical value. + - If None, the use_batch_statistics is automatically set to True or False according to the training + and evaluation mode. During training, the parameter is set to True, and during evaluation, the + parameter is set to False. Default: None. + has_diagonal_form (bool): Specifies whether the input tensor is provided in the diagonal form. Default: False. + data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. Default: 'NCHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, H, W)` if data_format is 'NCHW', or + :math:`(2, N, H, W, C)` if data_format is 'NHWC'. '2' denotes that the input tensor belongs to the double + domain and has got a real and a double parts. The `num_features` in `Args` has to be equal to :math:`C` in + `Inputs`. + + Outputs: + Tensor, the normalized, scaled, offset tensor of the same shape as :math:`u`: + :math:`(2, N, C, H, W)` if data_format is 'NCHW', or :math:`(2, N, H, W, C)` if data_format is 'NHWC'. + + Raises: + TypeError: If `num_features` is not an int. + TypeError: If `eps` is not a float. + ValueError: If `num_features` is less than 1. + ValueError: If `momentum` is not in range [0, 1]. + ValueError: If `data_format` is neither 'NHWC' not 'NCHW'. + ValueError: if 'inp' is not a Tensor of 5 dimensions. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.double import BatchNorm2d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32, 32)).astype(np.float32)) + >>> bn = BatchNorm2d(64) + >>> y = bn(u) + >>> print(y.shape) + (2, 8, 64, 32, 32) + """ + + def __init__(self, + num_features: int, + eps: float = 1e-5, + momentum: float = 0.9, + affine: bool = True, + gamma_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + beta_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_mean_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_var_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + use_batch_statistics: bool = True, + has_diagonal_form: bool = False, + data_format='NCHW') -> None: + if has_diagonal_form: + super(BatchNorm2d, self).__init__(HBatchNorm2d, + J1J2BatchNormImpl, + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + gamma_init=gamma_init, + beta_init=beta_init, + moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, + use_batch_statistics=use_batch_statistics, + data_format=data_format) + else: + super(BatchNorm2d, self).__init__(HBatchNorm2d, + BatchNormImpl, + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + gamma_init=gamma_init, + beta_init=beta_init, + moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, + use_batch_statistics=use_batch_statistics, + data_format=data_format) + + +class BatchNorm3d(_UniformOperator): + r""" + The double-valued Batch Normalization layer over a second-order double input of six dimensions, including + three spatial dimensions. + + This layer applies Batch Normalization over a double input to reduce internal covariate shift. + Batch Normalization is widely used in convolutional networks. It rescales and recenters the feature using a + mini-batch of data and the learned parameters. If has_diagonal_form is False, this can be described by the + following formula: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + out = \frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}\\ + \text{Re(\hat{out})} = \text{Re(out)} * \text{Re(\gamma)} + + \text{Db(out)} * \text{Db(\gamma)} + \text{Re(\beta)}\\ + \text{Db(\hat{out})} = \text{Re(out)} * \text{Db(\gamma)} + + \text{Db(out)} * \text{Re(\gamma)} + \text{Db(\beta)}. + \end{align} + + And, if has_diagonal_form is True then: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + out = \frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}\\ + \text{Re(\hat{out})} = \text{X(out)} * \text{X(\gamma)} + \text{X(\beta)}\\ + \text{Db(\hat{out})} = \text{Y(out)} * \text{Y(\gamma)} + \text{Y(\beta)}. + \end{align} + + where :math:`inp` is the double input tensors, :math:`\mathrm{E}[inp]` is the arithmetic mean of the input tensor + over the batch dimension, :math:`\mathrm{Var}[inp]` is the statistical variance of the input tensor over spatial + dimensions, based on the double norm :math:`\|x+jy\|=|x|+|y|` for the regular representation and + :math:`\|x+jy\|=\text{max}(|x|,|y|)` in the diagonal representation. :math:`\gamma` and :math:`\beta` are double + learnable parameters representing the scale and shift coefficients respectively, and :math:`\delta` is a small + positive constant, which is needed to avoid division by zero in case statistical variance is close to zero. + :math:`\text{Re(...)}` and :math:`\text{Db(...)}` are respectively the first and the second parts of the + double-valued expression inside the parentheses in the regular form, and :math:`\text{X(...)}` and + :math:`\text{Y(...)}` stand for the same functors, but with the parameters in the diagonal form. + + The Batch Normalization layer is more effective when double numbers are given in the diagonal form, because of + effective multiplications. For some other kinds of layers (e.g. activation functions) it can be otherwise. It is + always possible to transfer to and from the diagonal representation: + + .. math:: + \begin{align} + \text{X(inp)} = \text{Re(inp)} + \text{Db(inp)}\\ + \text{Y(inp)} = \text{Re(inp)} - \text{Db(inp)}\\ + \text{Re(inp)} = 0.5 * (\text{X(inp)} + \text{Y(inp)})\\ + \text{Db(inp)} = 0.5 * (\text{X(inp)} - \text{Y(inp)}) + \end{align} + + However, every conversion consumes time resources, so it is a tradeoff which representation to choose. + + Args: + num_features (int): The number of features in the input space. + eps (float): A small positive threshold, which is needed to avoid division by zero. Default: :math:`10^{-5}` + momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. + Default: 0.9. + affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + use_batch_statistics (bool): + + - If True, use the mean value and variance value of current batch data and track running mean + and running variance. + - If False, use the mean value and variance value of specified value, and not track statistical value. + - If None, the use_batch_statistics is automatically set to True or False according to the training + and evaluation mode. During training, the parameter is set to True, and during evaluation, the + parameter is set to False. Default: None. + has_diagonal_form (bool): Specifies whether the input tensor is provided in the diagonal form. Default: False. + data_format (str): The optional value for data format. Only 'NCDHW' format is supported as of now. + Default: 'NCDHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, D, H, W)`. '2' denotes that the input tensor belongs + to the double domain and has got a real and a double parts. The `num_features` in `Args` has to be equal + to :math:`C` in `Inputs`. + + Outputs: + Tensor, the normalized, scaled, offset tensor of the same shape :math:`(2, N, C, D, H, W)` as :math:`u`. + + Raises: + TypeError: If `num_features` is not an int. + TypeError: If `eps` is not a float. + ValueError: If `num_features` is less than 1. + ValueError: If `momentum` is not in range [0, 1]. + ValueError: If `data_format` is not 'NCDHW'. + ValueError: if 'inp' is not a Tensor of 6 dimensions. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.double import BatchNorm3d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32, 32, 32)).astype(np.float32)) + >>> bn = BatchNorm3d(64) + >>> y = bn(u) + >>> print(y.shape) + (2, 8, 64, 32, 32, 32) + """ + + def __init__(self, + num_features: int, + eps: float = 1e-5, + momentum: float = 0.9, + affine: bool = True, + gamma_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + beta_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_mean_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_var_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + use_batch_statistics: bool = True, + has_diagonal_form: bool = False, + data_format='NCDHW') -> None: + if has_diagonal_form: + super(BatchNorm3d, self).__init__(HBatchNorm3d, + J1J2BatchNormImpl, + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + gamma_init=gamma_init, + beta_init=beta_init, + moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, + use_batch_statistics=use_batch_statistics, + data_format=data_format) + else: + super(BatchNorm3d, self).__init__(HBatchNorm3d, + BatchNormImpl, + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + gamma_init=gamma_init, + beta_init=beta_init, + moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, + use_batch_statistics=use_batch_statistics, + data_format=data_format) + + +class Dense(_UniformOperator): + r""" + The double-valued dense connected layer. + + Applies dense connected layer for the double-valued input. If has_diagonal_form is False, this layer implements + the operation as: + + .. math:: + \begin{align} + \text{Re(out)} = \text{Re(inp)} * \text{Re(kernel)} + \text{Db(inp)} * \text{Db(kernel)} + \text{Re(bias)}\\ + \text{Db(out)} = \text{Re(inp)} * \text{Db(kernel)} + \text{Db(inp)} * \text{Re(kernel)} + \text{Db(bias)}, + \end{align} + + And, if has_diagonal_form is True then: + + .. math:: + \begin{align} + \text{X(out)} = \text{X(inp)} * \text{X(kernel)} + \text{X(bias)}\\ + \text{Y(out)} = \text{Y(inp)} * \text{Y(kernel)} + \text{Y(bias)}, + \end{align} + + where :math:`inp` is the double input tensors, :math:`\text{kernel}` is a double weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a double bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{Re(...)}` and + :math:`\text{Db(...)}` are respectively the first and the second parts of the double-valued expression inside the + parentheses in the regular form, and :math:`\text{X(...)}` and :math:`\text{Y(...)}` stand for the same functors, + but with the parameters in the diagonal form. + + The dense layer is notably more effective when double numbers are given in the diagonal form, because it then + requires only two matrix multiplications instead of four. For some other kinds of layers (e.g. activation + functions) it can be otherwise. It is always possible to transfer to and from the diagonal representation: + + .. math:: + \begin{align} + \text{X(inp)} = \text{Re(inp)} + \text{Db(inp)}\\ + \text{Y(inp)} = \text{Re(inp)} - \text{Db(inp)}\\ + \text{Re(inp)} = 0.5 * (\text{X(inp)} + \text{Y(inp)})\\ + \text{Db(inp)} = 0.5 * (\text{X(inp)} - \text{Y(inp)}) + \end{align} + + However, every conversion consumes time resources, so it is a tradeoff which representation to choose. + + Args: + in_channels (int): The number of channels in the input space. + out_channels (int): The number of channels in the output space. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is + same as `inp`. The values of str refer to the function `initializer`. Default: 'zeros'. + has_bias (bool): Specifies whether the layer uses a bias vector. Default: True. + has_diagonal_form (bool): Specifies whether the input tensor is provided in the diagonal form. Default: False. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, *, ..., *, in\_channels)`. '2' denotes that the input tensor + belongs to the domain of double numbers and has got two components. The `in_channels` in `Args` + has to be equal to :math:`in\_channels` in `Inputs`. The count of mediator dimensions denoted by '*' is + arbitrary but must be at least one. + + Outputs: + Tensor of shape :math:`(2, *, ..., *, out\_channels)`. The count of mediator dimensions is the same as one + in 'Inputs'. + + Raises: + TypeError: If `in_channels` or `out_channels` is not an int. + TypeError: If `has_bias` is not a bool. + ValueError: If length of shape of `weight_init` is not equal to 3, + or shape[0] of 'weight_init' is not equal to 2, + or shape[1] of `weight_init` is not equal to `out_channels`, + or shape[2] of `weight_init` is not equal to `in_channels`. + ValueError: If length of shape of `bias_init` is not equal to 2, + or shape[0] of 'bias_init' is not equal to 2, + or shape[1] of `bias_init` is not equal to `out_channels`. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.double import Dense + >>> from mindspore import Tensor + >>> w = Tensor(np.random.random((2, 7, 5)).astype(np.float32)) + >>> b = Tensor(np.random.random((2, 7)).astype(np.float32)) + >>> net = Dense(in_channels=5, out_channels=7, weight_init=w, bias_init=b, has_bias=True) + >>> u = Tensor(np.random.random((2, 34, 1, 5)).astype(np.float32)) + >>> out = net(u) + >>> print(out.shape) + (2, 34, 1, 7) + """ + + def __init__(self, + in_channels: int, + out_channels: int, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + has_bias: bool = True, + has_diagonal_form: bool = False) -> None: + if has_diagonal_form: + super(Dense, self).__init__(HDense, + J1J2DenseImpl, + in_channels=in_channels, + out_channels=out_channels, + weight_init=weight_init, + bias_init=bias_init, + has_bias=has_bias) + else: + super(Dense, self).__init__(HDense, + DenseImpl, + in_channels=in_channels, + out_channels=out_channels, + weight_init=weight_init, + bias_init=bias_init, + has_bias=has_bias) + + +class ReLU(nn.Cell): + r""" + Rectified Linear Unit activation function for double numbers. + + Applies ReLU activation layer for the double-valued input. If has_diagonal_form is False, this layer implements + the operation as: + + .. math:: + \begin{align} + \text{Re(out)} = (Re(inp))^+ = \max(0, Re(inp))\\ + \text{Db(out)} = (Db(inp))^+ = \max(0, Db(inp)), + \end{align} + + It returns element-wise :math:`\max(0, x)` for both real and double parts of the input tensor independently. + Specially, the neurons with the negative output components will be suppressed and the active neurons will stay + the same. If has_diagonal_form is True then this layer first converts the input to the regular form: + + .. math:: + \begin{align} + \text{Re(inp)} = 0.5 * (\text{X(inp)} + \text{Y(inp)})\\ + \text{Db(inp)} = 0.5 * (\text{X(inp)} - \text{Y(inp)}), + \end{align} + + then applies element-wise ReLU as shown above, and transfers the result back to the diagonal representation: + + .. math:: + \begin{align} + \text{X(out)} = \text{Re(out)} + \text{Db(out)}\\ + \text{Y(out)} = \text{Re(out)} - \text{Db(out)} + \end{align} + + Args: + has_diagonal_form (bool): Specifies whether the input tensor is provided in the diagonal form. Default: False. + + Inputs: + - **inp** (Tensor) - The input of ReLU is a Tensor of shape (2, *, ..., *). The data type is + `number `_ . + + Outputs: + Tensor, with the same type and shape as the `inp`. + + Raises: + TypeError: If dtype of `inp` is not a number. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.sboule import ReLU + >>> from mindspore import Tensor + >>> net = ReLU() + >>> u = Tensor(np.array([[-1, 2, -3], [0, -2, 1]]).astype(np.float32)) + >>> out = net(u) + >>> print(out) + [[0. 2. 0] + [0. 0. 1.]] + """ + + def __init__(self, has_diagonal_form: bool = False) -> None: + super(ReLU, self).__init__() + if has_diagonal_form: + self.relu = J1J2ReLU() + else: + self.relu = nn.ReLU() + + def construct(self, u: Tensor) -> Tensor: + return self.relu(u) diff --git a/mindspore/python/mindspore/hypercomplex/double/double_relu.py b/mindspore/python/mindspore/hypercomplex/double/double_relu.py new file mode 100644 index 00000000000..a93ca8a847b --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/double/double_relu.py @@ -0,0 +1,77 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Double relu operators""" +from mindspore import nn, Tensor +from mindspore.ops import operations as P +from mindspore.hypercomplex.utils import get_x_and_y as get_u1_and_u2, \ + to_2channel as to_double + + +class J1J2ReLU(nn.Cell): + r""" + Rectified Linear Unit activation function for double-valued input in the diagonal representation. + + Applies ReLU activation layer for the double-valued input. This layer first converts the input to the regular form: + + .. math:: + \begin{align} + \text{Re(inp)} = 0.5 * (\text{X(inp)} + \text{Y(inp)})\\ + \text{Db(inp)} = 0.5 * (\text{X(inp)} - \text{Y(inp)}), + \end{align} + + then applies the element-wise :math:`\max(0, x)` for both real and double parts of the input tensor independently: + + .. math:: + \begin{align} + \text{Re(out)} = (Re(inp))^+ = \max(0, Re(inp))\\ + \text{Db(out)} = (Db(inp))^+ = \max(0, Db(inp)), + \end{align} + + and finally transfers the result back to the diagonal representation: + + .. math:: + \begin{align} + \text{X(out)} = \text{Re(out)} + \text{Db(out)}\\ + \text{Y(out)} = \text{Re(out)} - \text{Db(out)} + \end{align} + + Inputs: + - **inp** (Tensor) - The input of ReLU is a Tensor of shape (2, *, ..., *). The data type is + `number `_ . + + Outputs: + Tensor, with the same type and shape as the `inp`. + + Raises: + TypeError: If dtype of `inp` is not a number. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self): + """Initialize J1J2ReLU.""" + super(J1J2ReLU, self).__init__() + self.relu = P.ReLU() + + def construct(self, u: Tensor) -> Tensor: + u = u / 2 + u1, u2 = get_u1_and_u2(u) + x = self.relu(u1 + u2) + y = self.relu(u1 - u2) + out1 = x + y + out2 = x - y + out = to_double(out1, out2) + return out diff --git a/mindspore/python/mindspore/hypercomplex/dual/__init__.py b/mindspore/python/mindspore/hypercomplex/dual/__init__.py new file mode 100644 index 00000000000..3bfb1c3e4f8 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/dual/__init__.py @@ -0,0 +1,24 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Dual operators""" +from mindspore.nn import ReLU + +from mindspore.hypercomplex.dual.dual_operators import Conv1d, Conv2d, Conv3d +from mindspore.hypercomplex.dual.dual_operators import BatchNorm1d, BatchNorm2d, BatchNorm3d +from mindspore.hypercomplex.dual.dual_operators import Dense + +from mindspore.hypercomplex.hypercomplex.hc_pool import MaxPool1d, MaxPool2d, \ + AvgPool1d, AvgPool2d, AdaptiveAvgPool1d, AdaptiveAvgPool2d, \ + AdaptiveAvgPool3d, AdaptiveMaxPool1d, AdaptiveMaxPool2d diff --git a/mindspore/python/mindspore/hypercomplex/dual/_dual_bn_impl.py b/mindspore/python/mindspore/hypercomplex/dual/_dual_bn_impl.py new file mode 100644 index 00000000000..e4c4560b91d --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/dual/_dual_bn_impl.py @@ -0,0 +1,140 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Dual BatchNorm Implementation""" +from typing import Tuple + +import mindspore.ops as ops +from mindspore.common.tensor import Tensor +from mindspore.hypercomplex.hypercomplex._hc_bn_impl import _BaseBatchNormImpl as HCBatchNormImpl +from mindspore.hypercomplex.utils import get_x_and_y as get_real_and_dual + + +class _BatchNormImpl(HCBatchNormImpl): + r""" + The implementor class of the Batch Normalization layer for dual numbers. + + Implements the functionality specific to dual numbers and needed by the 'BatchNorm' class. This includes: + getting the norm of dual number, applying scaling and shift to a dual tensor, and updating the running + mean and variance, which are used during inference. + + Args: + affine (bool) - A bool value. When set to True, gamma and beta can be learned. + use_batch_statistics (bool): If true, use the mean value and variance value of current batch data. If false, + use the mean value and variance value of specified value. If None, the training process will use the mean + and variance of current batch data and track the running mean and variance, the evaluation process will use + the running mean and variance. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. + num_features (int): The number of features in the input space. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def scale_and_shift(self, + u_x: Tensor, + u_y: Tensor, + scale_x: Tensor, + scale_y: Tensor, + shift_x: Tensor, + shift_y: Tensor) -> Tuple[Tensor, Tensor]: + r""" + Applies dual scaling and shift to an input tensor. + + This function implements the operation as: + + .. math:: + \begin{align} + \text{Re(out)} = \text{Re(inp)} * \text{Re(scale)} + \text{Re(shift)}\\ + \text{Du(out)} = \text{Re(inp)} * \text{Du(scale)} + \text{Du(inp)} * \text{Re(scale)} + \text{Du(shift)}, + \end{align} + + where :math:`inp` is the dual input tensors, :math:`scale` and :math:`shift` are dual parameters representing + the scaling and shift coefficients respectively. :math:`\text{Re(...)}` and :math:`\text{Du(...)}` are + respectively real and dual parts of the dual-valued expression inside the parentheses. + + Args: + u_x (Tensor): A tensor of shape (C,), which represents the real part of the normalized inputs. + u_y (Tensor): A tensor of shape (C,), which represents the dual part of the normalized inputs. + scale_x (Tensor): A tensor of shape (C,), which represents the real part of the scaling vector. + scale_y (Tensor): A tensor of shape (C,), which represents the dual part of the scaling vector. + shift_x (Tensor): A tensor of shape (C,), which represents the real part of the bias vector. + shift_y (Tensor): A tensor of shape (C,), which represents the dual part of the bias vector. + + Returns: + Tuple of two tensors of shape (C,), which contains the real and the dual parts of rescaled and + recentered inputs. + """ + out_x = u_x * scale_x + shift_x + out_y = u_x * scale_y + u_y * scale_x + shift_y + return out_x, out_y + + def get_norm(self, u: Tensor) -> Tensor: + r""" + Calculates norm of dual elements of an input tensor. + + Norm is a non-negative real number that is a characteristic of 'magnitude' of that number, i.e. how far away it + is from zero. The function implements the operation as: + + .. math:: + \text{out} = \left|\frac{Du(inp)}{2}\right|+\sqrt{Re(inp)^2+\frac{Du(inp)^2}{4}+\delta}, + + where :math:`inp` is the dual input tensors and :math:`\delta` is a small positive constant, which is needed + to avoid division by zero in case statistical variance is close to zero. :math:`\text{Re(...)}` and + :math:`\text{Du(...)}` are respectively real and dual parts of the dual-valued expression inside + the parentheses. + + Args: + u (Tensor): Tensor of shape (2, *, ..., *). '2' denotes that the input tensor belongs to the dual domain + and has a real and a dual parts. + + Returns: + Tensor of shape (*, ..., *). The count and size of dimensions of the output tensor are the same ones as in + the input tensor, but without the very first dimension because the output tensor is real-valued. + """ + u_r, u_d = get_real_and_dual(u) + dual_half = u_d.abs() / 2 + eps = 1e-7 + sqrt = u_r ** 2 + dual_half ** 2 + eps + sqrt = ops.sqrt(sqrt) + out = dual_half + sqrt + return out + + def get_square_norm(self, u: Tensor) -> Tensor: + r""" + Calculates element-wise squared norm of dual elements of an input tensor. + + Norm is a non-negative real number that is a characteristic of 'magnitude' of that number, i.e. how far away it + is from zero. The function implements the operation as: + + .. math:: + \text{out} = \left(\left|\frac{Du(inp)}{2}\right|+\sqrt{Re(inp)^2+\frac{Du(inp)^2}{4}+\delta}\right)^2, + + where :math:`inp` is the dual input tensors, :math:`\text{Re(...)}` and :math:`\text{Du(...)}` + are respectively real and dual parts of the dual-valued expression inside the parentheses. + + Args: + u (Tensor): Tensor of shape (2, *, ..., *). '2' denotes that the input tensor belongs to the dual domain + and has a real and a dual parts. + + Returns: + Tensor of shape (*, ..., *). The count and size of dimensions of the output tensor are the same ones as in + the input tensor, but without the very first dimension because the output tensor is real-valued. + """ + norm = self.get_norm(u) + out = norm ** 2 + return out diff --git a/mindspore/python/mindspore/hypercomplex/dual/_dual_conv_impl.py b/mindspore/python/mindspore/hypercomplex/dual/_dual_conv_impl.py new file mode 100644 index 00000000000..f3653f60942 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/dual/_dual_conv_impl.py @@ -0,0 +1,138 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Dual Convolution Implementation""" +import numbers +from typing import Callable, Tuple, Union + +from mindspore.common.tensor import Tensor +from mindspore.common.initializer import Initializer +from mindspore.hypercomplex.hypercomplex._hc_conv_impl import _BaseConvImpl as BaseConvImpl +from mindspore import ops as P + + +class _ConvImpl(BaseConvImpl): + r""" + The implementor class of the convolution layer for dual numbers. + + Applies dual-valued convolution transformation. This layer implements the operation as: + + .. math:: + \begin{align} + \text{Re(ccor)} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)})\\ + \text{Du(ccor)} = \text{ccor}(\text{Du(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Re(kernel)}, \text{Du(inp)}), + \end{align} + + where and :math:`cccor` is the real-valued `cross-correlation `_, + :math:`inp` is the dual input tensors, :math:`\text{kernel}` is a dual weight matrix with the same + data type as the :math:`inp` created by the layer. :math:`\text{Re(...)}` and :math:`\text{Du(...)}` + are respectively real and dual parts of the dual-valued expression inside the parentheses. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and dual parts of the kernel. + + Inputs: + - **conv_op** (Callable) - the function of the real-valued convolution to be used for decomposition + of the dual convolution transformation. For example, mindspore.ops.operations.Conv2D(...) may be passed + - **real** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the real part of the input. + - **dual** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the dual part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(N, C_{out}, *, ..., *)` or :math:`(N, *, ..., *, C_{out})`, + which represents the real and the dual parts of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def construct(self, + conv_op: Callable, + real: Tensor, + dual: Tensor) -> Tuple[Tensor, Tensor]: + + out_r = conv_op(real, self.weight_x) + out_rd = conv_op(real, self.weight_y) + out_dr = conv_op(dual, self.weight_x) + + out_d = out_rd + out_dr + return out_r, out_d + + +class _ReDuConvImpl(BaseConvImpl): + r""" + The implementor class of the convolution layer for dual numbers. + + Applies dual-valued convolution transformation. This layer implements the operation as: + + .. math:: + \begin{align} + \text{inp_cat} = \text{cat}(\text{Re(inp)}, \text{Du(inp)}) \\ + \text{K} = \text{cat}(\text{Du(kernel)}, \text{Re(kernel)}) \\ + \text{Re(ccor)} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)})\\ + \text{Du(ccor)} = \text{ccor}(\text{K}, \text{Re(inp_cat)}) + \end{align} + + where and :math:`cccor` is the real-valued `cross-correlation `_, + :math:`inp` is the dual input tensors, :math:`\text{kernel}` is a dual weight matrix with the same + data type as the :math:`inp` created by the layer, :math:`\text{cat}` is concatenation along the channel axis. + :math:`\text{Re(...)}` and :math:`\text{Du(...)}` are respectively real and dual parts of the dual-valued expression + inside the parentheses. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and dual parts of the kernel. + + Inputs: + - **conv_op** (Callable) - the function of the real-valued convolution to be used for decomposition + of the dual convolution transformation. For example, mindspore.ops.operations.Conv2D(...) may be passed + - **real** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the real part of the input. + - **dual** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the dual part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(N, C_{out}, *, ..., *)` or :math:`(N, *, ..., *, C_{out})`, + which represents the real and the dual parts of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + weight_init: Union[Tensor, str, Initializer, numbers.Number], + weight_shape: tuple, + **factory_kwargs) -> None: + super(_ReDuConvImpl, self).__init__(weight_init, weight_shape, **factory_kwargs) + data_format = factory_kwargs.get('data_format', 'nchw') + c_idx = data_format.lower().find('c') + if c_idx < 0: + raise ValueError(f"Data format {data_format} is unsupported") + self.concat = P.Concat(c_idx) + + def construct(self, + conv_op: Callable, + real: Tensor, + imag: Tensor) -> Tuple[Tensor, Tensor]: + + out_r = conv_op(real, self.weight_x) + inp = self.concat([real, imag]) + w = self.concat([self.weight_y, self.weight_x]) + out_d = conv_op(inp, w) + return out_r, out_d diff --git a/mindspore/python/mindspore/hypercomplex/dual/_dual_dense_impl.py b/mindspore/python/mindspore/hypercomplex/dual/_dual_dense_impl.py new file mode 100644 index 00000000000..4de9cff50da --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/dual/_dual_dense_impl.py @@ -0,0 +1,69 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Dual Dense Implementation""" +from typing import Callable, Tuple + +from mindspore.common.tensor import Tensor +from mindspore.hypercomplex.hypercomplex._hc_dense_impl import _BaseDenseImpl as BaseDenseImpl + + +class _DenseImpl(BaseDenseImpl): + r""" + The implementor class of the dense connected layer for dual numbers. + + Applies dual-valued matrix multiplication for dense connected layer. This layer implements the operation as: + + .. math:: + \begin{align} + \text{Re(out)} = \text{Re(inp)} * \text{Re(kernel)}\\ + \text{Du(out)} = \text{Re(inp)} * \text{Du(kernel)} + \text{Du(inp)} * \text{Re(kernel)}, + \end{align} + + where :math:`inp` is the hypercomplex input tensors, :math:`\text{kernel}` is + a hypercomplex weight matrix with the same data type as the :math:`inp` created by the layer, + :math:`\text{Re(...)}` and :math:`\text{Du(...)}` are respectively real and dual parts of the dual-valued + expression inside the parentheses. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and dual parts of the kernel. + **factory_kwargs (dict): Extra parameters which may be needed by specific subclasses. + + Inputs: + - **matmul_op** (Callable) - the function of the real-valued matrix multiplication to be used for decomposition + of the dual linear transformation. Usually, mindspore.ops.operations.MatMul(...) is passed + - **real** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the real part of the input. + - **dual** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the dual part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(*, out\_channels)`, which represents the real and the dual + parts of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def construct(self, + matmul_op: Callable, + real: Tensor, + dual: Tensor) -> Tuple[Tensor, Tensor]: + + out_r = matmul_op(real, self.weight_x) + out_rd = matmul_op(real, self.weight_y) + out_dr = matmul_op(dual, self.weight_x) + + out_d = out_rd + out_dr + return out_r, out_d diff --git a/mindspore/python/mindspore/hypercomplex/dual/dual_operators.py b/mindspore/python/mindspore/hypercomplex/dual/dual_operators.py new file mode 100644 index 00000000000..bf5faeb9c7c --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/dual/dual_operators.py @@ -0,0 +1,958 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Dual Operators""" +import numbers +from typing import Union +from mindspore.common.initializer import Initializer +from mindspore.common.tensor import Tensor +# Batch Normalization +from mindspore.hypercomplex.hypercomplex.hc_bn import BatchNorm1d as HBatchNorm1d, \ + BatchNorm2d as HBatchNorm2d, BatchNorm3d as HBatchNorm3d +from mindspore.hypercomplex.dual._dual_bn_impl import _BatchNormImpl as BatchNormImpl +# Convolution +from mindspore.hypercomplex.hypercomplex.hc_conv import Conv1d as HConv1d, \ + Conv2d as HConv2d, Conv3d as HConv3d +from mindspore.hypercomplex.dual._dual_conv_impl import _ReDuConvImpl as ConvImpl +# Dense +from mindspore.hypercomplex.hypercomplex.hc_dense import Dense as HDense +from mindspore.hypercomplex.dual._dual_dense_impl import _DenseImpl as DenseImpl +from mindspore.hypercomplex.hypercomplex.uniform_operator import _UniformOperator + +from mindspore.hypercomplex.utils import _size_1_t, _size_2_t, _size_3_t + + +class Conv2d(_UniformOperator): + r""" + 2D convolution layer on the dual-valued input. + + Calculates the 2D convolution on the input tensor which is typically of shape + :math:`(2, N, C_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C_{in}` is a number of channels, + :math:`H_{in}, W_{in}` are the height and width of the feature layer respectively. The formula is defined as: + + .. math:: + + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \text{hccor}({\text{weight}(C_{\text{out}_j}, k), \text{inp}(N_i, k)}) + + where :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of + the output and :math:`j` is in the range of :math:`[0, C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)` + is a convolution kernel slice with shape :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`, + where :math:`\text{kernel_size[0]}` and :math:`\text{kernel_size[1]}` are the height and width of the convolution + kernel respectively. :math:`\text{bias}` is the bias parameter and :math:`\text{inp}` is the input tensor. + In this case, `data_format` of the input tensor is 'NCHW' and the shape of full convolution kernel is + :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`, + where `group` is the number of groups to split the input `inp` in the channel dDuension. If `data_format` of the + input tensor is 'NHWC', the shape of full convolution kernel will be + :math:`(C_{out}, \text{kernel_size[0]}, \text{kernel_size[1]}), C_{in} / \text{group}`. + :math:`hccor` is the dual-valued `cross-correlation `_. + This implies the operation as: + + .. math:: + \text{Re(ccor)} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)}) + \text{Re(bias)}\\ + \text{Du(ccor)} = \text{ccor}(\text{Du(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Re(kernel)}, \text{Du(inp)}) + \text{Du(bias)} + + where and :math:`cccor` is the real-valued `cross-correlation `_, + :math:`inp` is the dual input tensors, :math:`\text{kernel}` is a dual weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a dual bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{Re(...)}` + and :math:`\text{Du(...)}` are respectively real and dual parts of the dual-valued expression inside + the parentheses. + + For more details, please refers to the paper `Gradient Based Learning Applied to Document + Recognition `_. + + Note: + On Ascend platform, only group convolution in depthwise convolution scenarios is supported. + That is, when `group > 1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. + + 'NCHW' format is supported only with GPU target device as of now. + + Args: + in_channels (int): The channel number of the input tensor of the Conv2d layer. + out_channels (int): The channel number of the output tensor of the Conv2d layer. + kernel_size (Union[int, tuple[int]]): Specifies the height and width of the 2D convolution kernel. + The data type is an integer or a tuple of two integers. An integer represents the height + and width of the convolution kernel. A tuple of two integers represents the height + and width of the convolution kernel respectively. + stride (Union[int, tuple[int]]): The movement stride of the 2D convolution kernel. + The data type is an integer or a tuple of two integers. An integer represents the movement step size + in both height and width directions. A tuple of two integers represents the movement step size in + the height and width directions respectively. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (Union[int, tuple[int]]): The number of padding on the height and width directions of the input. + The data type is an integer or a tuple of four integers. If `padding` is an integer, + then the top, bottom, left, and right padding are all equal to `padding`. + If `padding` is a tuple of 4 integers, then the top, bottom, left, and right padding + is equal to `padding[0]`, `padding[1]`, `padding[2]`, and `padding[3]` respectively. + The value should be greater than or equal to 0. Default: 0. + dilation (Union[int, tuple[int]]): Dilation size of 2D convolution kernel. + The data type is an integer or a tuple of two integers. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` on the height and width directions is in range of [1, H] + and [1, W] respectively. Default: 1. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. If the group is equal to `in_channels` and `out_channels`, + this 2D convolution layer also can be called 2D depthwise convolution layer. Default: 1. + has_bias (bool): Whether the Conv2d layer has a bias parameter. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. + Default: 'NCHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, H_{in}, W_{in})` \ + or :math:`(2, N, H_{in}, W_{in}, C_{in})`. + + Outputs: + Tensor of shape :math:`(2, N, C_{out}, H_{out}, W_{out})` or :math:`(2, N, H_{out}, W_{out}, C_{out})`. + + pad_mode is 'same': + + .. math:: + \begin{array}{ll} \\ + H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[0]}}} \right \rceil \\ + W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[1]}}} \right \rceil \\ + \end{array} + + pad_mode is 'valid': + + .. math:: + \begin{array}{ll} \\ + H_{out} = \left \lceil{\frac{H_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) } + {\text{stride[0]}}} \right \rceil \\ + W_{out} = \left \lceil{\frac{W_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) } + {\text{stride[1]}}} \right \rceil \\ + \end{array} + + pad_mode is 'pad': + + .. math:: + \begin{array}{ll} \\ + H_{out} = \left \lfloor{\frac{H_{in} + padding[0] + padding[1] - (\text{kernel_size[0]} - 1) \times + \text{dilation[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} + padding[2] + padding[3] - (\text{kernel_size[1]} - 1) \times + \text{dilation[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\ + \end{array} + + Raises: + TypeError: If `in_channels`, `out_channels` or `group` is not an int. + TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int not a tuple. + ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + ValueError: If `padding` is a tuple whose length is not equal to 4. + ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0). + ValueError: If `data_format` is neither 'NCHW' not 'NHWC'. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.dual import Conv2d + >>> from mindspore import Tensor + >>> w = Tensor(np.random.random((2, 128, 3, 7, 7)).astype(np.float32)) + >>> b = Tensor(np.random.random((2, 128)).astype(np.float32)) + >>> net = Conv2d( + >>> in_channels=3, out_channels=128, kernel_size=7, stride=2, padding=3, + >>> pad_mode='pad', weight_init=w, bias_init=b, has_bias=True + >>> ) + >>> inp = Tensor(np.random.random((2, 16, 3, 224, 224)).astype(np.float32)) + >>> out = net(inp) + >>> print(out.shape) + (2, 16, 128, 112, 112) + """ + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: _size_2_t, + stride: _size_2_t = 1, + pad_mode: str = 'same', + padding: _size_2_t = 0, + dilation: _size_2_t = 1, + group: int = 1, + has_bias: bool = False, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + data_format: str = 'NCHW') -> None: + super(Conv2d, self).__init__(HConv2d, + ConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init, + data_format=data_format) + + +class Conv1d(_UniformOperator): + r""" + 1D convolution layer on the dual-valued input. + + Calculates the 1D convolution on the input tensor which is typically of shape :math:`(2, N, C_{in}, L_{in})`, + where :math:`N` is batch size, :math:`C_{in}` is a number of channels and :math:`L_{in}` is a length of sequence. + The formula is defined as: + + .. math:: + + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{inp}(N_i, k)}) + + where :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of + the output and :math:`j` is in the range of :math:`[0, C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)` + is a convolution kernel slice with shape :math:`\text{kernel_size}`, where :math:`\text{kernel_size}` + is the width of the convolution kernel. :math:`\text{bias}` is the bias parameter, + and :math:`\text{inp}` is the input tensor. The shape of full convolution kernel is + :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size})`, + where `group` is the number of groups to split the input `inp` in the channel dimension. + :math:`ccor` is the dual-valued `cross-correlation `_. + This implies the operation as: + + .. math:: + \text{Re(ccor)} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)}) + \text{Re(bias)}\\ + \text{Du(ccor)} = \text{ccor}(\text{Du(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Re(kernel)}, \text{Du(inp)}) + \text{Du(bias)} + + where and :math:`ccor` is the real-valued `cross-correlation `_, + :math:`inp` is the dual input tensors, :math:`\text{kernel}` is a dual weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a dual bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{Re(...)}` and + :math:`\text{Du(...)}` are respectively real and dual parts of the dual-valued expression inside the parentheses. + + For more details, please refers to the paper `Gradient Based Learning Applied to Document + Recognition `_. + + Note: + On Ascend platform, only group convolution in depthwise convolution scenarios is supported. + That is, when `group > 1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. + + Args: + in_channels (int): The channel number of the input tensor of the Conv1d layer. + out_channels (int): The channel number of the output tensor of the Conv1d layer. + kernel_size (int): Specifies the width of the 1D convolution kernel. + stride (int): The movement stride of the 1D convolution kernel. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (int): The number of padding on both sides of input. + The value should be greater than or equal to 0. Default: 0. + dilation (int): Dilation size of 1D convolution kernel. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` is in range of [1, L]. Default: 1. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. Default: 1. + has_bias (bool): Whether the Conv1d layer has a bias parameter. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, L_{in})`. + + Outputs: + Tensor of shape :math:`(2, N, C_{out}, L_{out})`. + + pad_mode is 'same': + + .. math:: + L_{out} = \left \lceil{\frac{L_{in}}{\text{stride}}} \right \rceil + + pad_mode is 'valid': + + .. math:: + L_{out} = \left \lceil{\frac{L_{in} - \text{dilation} \times (\text{kernel_size} - 1) } + {\text{stride}}} \right \rceil + + pad_mode is 'pad': + + .. math:: + L_{out} = \left \lfloor{\frac{L_{in} + 2 \times padding - (\text{kernel_size} - 1) \times + \text{dilation} - 1 }{\text{stride}} + 1} \right \rfloor + + Raises: + TypeError: If `in_channels`, `out_channels`, `kernel_size`, `stride`, `padding` or `dilation` is not an int. + ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.dual import Conv1d + >>> from mindspore import Tensor + >>> w = Tensor(np.random.random((2, 16, 1, 6)).astype(np.float32)) + >>> b = Tensor(np.random.random((2, 16)).astype(np.float32)) + >>> net = Conv1d( + >>> in_channels=1, out_channels=16, kernel_size=6, stride=2, padding=2, + >>> pad_mode='pad', weight_init=w, bias_init=b, has_bias=True + >>> ) + >>> u = Tensor(np.random.random((2, 8, 1, 4096)).astype(np.float32)) + >>> out = net(u) + >>> print(out.shape) + (2, 8, 16, 2048) + """ + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: _size_1_t, + stride: _size_1_t = 1, + pad_mode: str = 'same', + padding: _size_1_t = 0, + dilation: _size_1_t = 1, + group: int = 1, + has_bias: bool = False, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros') -> None: + super(Conv1d, self).__init__(HConv1d, + ConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init) + + +class Conv3d(_UniformOperator): + r""" + 3D convolution layer on the dual-valued input. + + Calculates the 3D convolution on the input tensor which is typically of shape + :math:`(2, N, C_{in}, D_{in}, H_{in}, W_{in})`, + where :math:`N` is batch size, :math:`C_{in}` is a number of channels, + :math:`D_{in}, H_{in}, W_{in}` are the depth, height and width of the feature layer respectively. + The formula is defined as: + + .. math:: + + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{inp}(N_i, k)}) + + where :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of + the output and :math:`j` is in the range of :math:`[0,C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)` + is a convolution kernel slice with shape + :math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`, + where :math:`\text{kernel_size[0]}`, :math:`\text{kernel_size[1]}` and :math:`\text{kernel_size[2]}` are + the depth, height and width of the convolution kernel respectively. :math:`\text{bias}` is the bias parameter + and :math:`\text{inp}` is the input tensor. + The shape of full convolution kernel is + :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`, + where `group` is the number of groups to split the input `x` in the channel dimension. + :math:`hccor` is the dual-valued `cross-correlation `_. + This implies the operation as: + + .. math:: + \text{Re(ccor)} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)}) + \text{Re(bias)}\\ + \text{Du(ccor)} = \text{ccor}(\text{Du(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Re(kernel)}, \text{Du(inp)}) + \text{Du(bias)} + + where and :math:`ccor` is the real-valued `cross-correlation `_, + :math:`inp` is the dual input tensors, :math:`\text{kernel}` is a dual weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a dual bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{Re(...)}` + and :math:`\text{Du(...)}` are respectively real and dual parts of the dual-valued expression inside + the parentheses. + + For more details, please refers to the paper `Gradient Based Learning Applied to Document + Recognition `_. + + Note: + On Ascend platform, only group convolution in depthwise convolution scenarios is supported. + That is, when `group>1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. + + Args: + in_channels (int): The channel number of the input tensor of the Conv3d layer. + out_channels (int): The channel number of the output tensor of the Conv3d layer. + kernel_size (Union[int, tuple[int]]): Specifies the depth, height and width of the 3D convolution kernel. + The data type is an integer or a tuple of three integers. An integer represents the depth, height + and width of the convolution kernel. A tuple of three integers represents the depth, height + and width of the convolution kernel respectively. + stride (Union[int, tuple[int]]): The movement stride of the 3D convolution kernel. + The data type is an integer or a tuple of three integers. An integer represents the movement step size + in depth, height and width directions. A tuple of three integers represents the movement step size + in the depth, height and width directions respectively. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (Union(int, tuple[int])): The number of padding on the depth, height and width directions of the input. + The data type is an integer or a tuple of six integers. If `padding` is an integer, + then the head, tail, top, bottom, left, and right padding are all equal to `padding`. + If `padding` is a tuple of six integers, then the head, tail, top, bottom, left, and right padding + is equal to `padding[0]`, `padding[1]`, `padding[2]`, `padding[3]`, `padding[4]` and `padding[5]` + respectively. The value should be greater than or equal to 0. Default: 0. + dilation (Union[int, tuple[int]]): Dilation size of 3D convolution kernel. + The data type is an integer or a tuple of three integers. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` on the depth, height and width directions is in range of + [1, D], [1, H] and [1, W] respectively. Default: 1. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. Default: 1. Only 1 is currently supported. + has_bias (bool): Whether the Conv3d layer has a bias parameter. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + data_format (str): The optional value for data format. Currently only support "NCDHW". + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, D_{in}, H_{in}, W_{in})`. + Currently input data type only support float16 and float32. + + Outputs: + Tensor of shape is :math:`(2, N, C_{out}, D_{out}, H_{out}, W_{out})`. + + pad_mode is 'same': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lceil{\frac{D_{in}}{\text{stride[0]}}} \right \rceil \\ + H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[1]}}} \right \rceil \\ + W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\ + \end{array} + + + pad_mode is 'valid': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lfloor{\frac{D_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) } + {\text{stride[0]}} + 1} \right \rfloor \\ + H_{out} = \left \lfloor{\frac{H_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) } + {\text{stride[1]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} - \text{dilation[2]} \times (\text{kernel_size[2]} - 1) } + {\text{stride[2]}} + 1} \right \rfloor \\ + \end{array} + + pad_mode is 'pad': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lfloor{\frac{D_{in} + padding[0] + padding[1] - (\text{dilation[0]} - 1) \times + \text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\ + H_{out} = \left \lfloor{\frac{H_{in} + padding[2] + padding[3] - (\text{dilation[1]} - 1) \times + \text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} + padding[4] + padding[5] - (\text{dilation[2]} - 1) \times + \text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\ + \end{array} + + Raises: + TypeError: If `in_channels`, `out_channels` or `group` is not an int. + TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int nor a tuple. + ValueError: If `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + ValueError: If `padding` is a tuple whose length is not equal to 6. + ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0, 0, 0). + ValueError: If `data_format` is not 'NCDHW'. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.dual import Conv3d + >>> from mindspore import Tensor + >>> w = Tensor(np.random.random((2, 128, 3, 3, 3, 3)).astype(np.float32)) + >>> b = Tensor(np.random.random((2, 128)).astype(np.float32)) + >>> net = Conv3d( + >>> in_channels=3, out_channels=128, kernel_size=3, stride=1, padding=1, + >>> pad_mode='pad', weight_init=w, bias_init=b, has_bias=True + >>> ) + >>> u = Tensor(np.random.random((2, 64, 3, 32, 32, 32)).astype(np.float32)) + >>> out = net(u) + >>> print(out.shape) + (2, 64, 128, 32, 32, 32) + """ + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: _size_3_t, + stride: _size_3_t = 1, + pad_mode: str = 'same', + padding: _size_3_t = 0, + dilation: _size_3_t = 1, + group: int = 1, + has_bias: bool = False, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + data_format: str = 'NCDHW') -> None: + super(Conv3d, self).__init__(HConv3d, + ConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init, + data_format=data_format) + + +class BatchNorm1d(_UniformOperator): + r""" + The dual-valued Batch Normalization layer over a second-order dual input of four dimensions, including + one spatial dimension, or three dimensions. + + This layer applies Batch Normalization over a dual input to reduce internal covariate shift. + Batch Normalization is widely used in convolutional networks. It rescales and recenters the feature using + a mini-batch of data and the learned parameters which can be described by the following formula: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + out = \frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}\\ + \text{Re(\hat{out})} = \text{Re(out)} * \text{Re(\gamma)} + \text{Re(\beta)}\\ + \text{Du(\hat{out})} = \text{Re(out)} * \text{Du(\gamma)} + + \text{Du(out)} * \text{Re(\gamma)} + \text{Du(\beta)}, + \end{align} + + where :math:`inp` is the dual input tensors, :math:`\mathrm{E}[inp]` is the arithmetic mean of the input tensor + over the batch dimension, :math:`\mathrm{Var}[inp]` is the statistical variance of the input tensor over spatial + dimensions, based on the dual norm :math:`\|x+\epsilon y\|=\left|\frac{y}{2}\right|+\sqrt{x^2+\frac{y^2}{4}}`, + :math:`\gamma` and :math:`\beta` are dual learnable parameters representing the scale and shift coefficients + respectively, and :math:`\delta` is a small positive constant, which is needed to avoid division by zero in case + statistical variance is close to zero. :math:`\text{Re(...)}` and :math:`\text{Du(...)}` are respectively real + and dual parts of the dual-valued expression inside the parentheses. + + Args: + num_features (int): The number of features in the input space. + eps (float): A small positive threshold, which is needed to avoid division by zero. Default: :math:`10^{-5}` + momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. + Default: 0.9. + affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + use_batch_statistics (bool): + + - If True, use the mean value and variance value of current batch data and track running mean + and running variance. + - If False, use the mean value and variance value of specified value, and not track statistical value. + - If None, the use_batch_statistics is automatically set to True or False according to the training + and evaluation mode. During training, the parameter is set to True, and during evaluation, the + parameter is set to False. Default: None. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, W)` or :math:`(2, N, C)`. + '2' denotes that the input tensor belongs to the dual domain and has got a real and + a dual parts. The `num_features` in `Args` has to be equal to :math:`C` in `Inputs`. + + Outputs: + Tensor, the normalized, scaled, offset tensor of the same shape as :math:`u`: + :math:`(2, N, C, W)` or :math:`(2, N, C)` + + Raises: + TypeError: If `num_features` is not an int. + TypeError: If `eps` is not a float. + ValueError: If `num_features` is less than 1. + ValueError: If `momentum` is not in range [0, 1]. + ValueError: if 'inp' is not a Tensor of 3 or 4 dimensions. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.dual import BatchNorm1d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32)).astype(np.float32)) + >>> bn = BatchNorm1d(64) + >>> y = bn(u) + >>> print(y.shape) + (2, 8, 64, 32) + """ + + def __init__(self, + num_features: int, + eps: float = 1e-5, + momentum: float = 0.9, + affine: bool = True, + gamma_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + beta_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_mean_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_var_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + use_batch_statistics: bool = True) -> None: + super(BatchNorm1d, self).__init__(HBatchNorm1d, + BatchNormImpl, + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + gamma_init=gamma_init, + beta_init=beta_init, + moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, + use_batch_statistics=use_batch_statistics) + + +class BatchNorm2d(_UniformOperator): + r""" + The dual-valued Batch Normalization layer over a second-order dual input of five dimensions, including + two spatial dimensions. + + This layer applies Batch Normalization over a dual input to reduce internal covariate shift. + Batch Normalization is widely used in convolutional networks. It rescales and recenters the feature using + a mini-batch of data and the learned parameters which can be described by the following formula: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + out = \frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}\\ + \text{Re(\hat{out})} = \text{Re(out)} * \text{Re(\gamma)} + \text{Re(\beta)}\\ + \text{Du(\hat{out})} = \text{Re(out)} * \text{Du(\gamma)}\ + + \text{Du(out)} * \text{Re(\gamma)} + \text{Du(\beta)}, + \end{align} + + where :math:`inp` is the dual input tensors, :math:`\mathrm{E}[inp]` is the arithmetic mean of the input tensor + over the batch dimension, :math:`\mathrm{Var}[inp]` is the statistical variance of the input tensor over spatial + dimensions, based on the dual norm :math:`\|x+\epsilon y\|=\left|\frac{y}{2}\right|+\sqrt{x^2+\frac{y^2}{4}}`, + :math:`\gamma` and :math:`\beta` are dual learnable parameters representing the scale and shift coefficients + respectively, and :math:`\delta` is a small positive constant, which is needed to avoid division by zero in case + statistical variance is close to zero. :math:`\text{Re(...)}` and :math:`\text{Du(...)}` are respectively real + and dual parts of the dual-valued expression inside the parentheses. + + Args: + num_features (int): The number of features in the input space. + eps (float): A small positive threshold, which is needed to avoid division by zero. Default: :math:`10^{-5}` + momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. + Default: 0.9. + affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + use_batch_statistics (bool): + + - If True, use the mean value and variance value of current batch data and track running mean + and running variance. + - If False, use the mean value and variance value of specified value, and not track statistical value. + - If None, the use_batch_statistics is automatically set to True or False according to the training + and evaluation mode. During training, the parameter is set to True, and during evaluation, the + parameter is set to False. Default: None. + data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. Default: 'NCHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, H, W)` if data_format is 'NCHW', or + :math:`(2, N, H, W, C)` if data_format is 'NHWC'. '2' denotes that the input tensor belongs to the dual + domain and has got a real and a dual parts. The `num_features` in `Args` has to be equal to + :math:`C` in `Inputs`. + + Outputs: + Tensor, the normalized, scaled, offset tensor of the same shape as :math:`u`: + :math:`(2, N, C, H, W)` if data_format is 'NCHW', or :math:`(2, N, H, W, C)` if data_format is 'NHWC'. + + Raises: + TypeError: If `num_features` is not an int. + TypeError: If `eps` is not a float. + ValueError: If `num_features` is less than 1. + ValueError: If `momentum` is not in range [0, 1]. + ValueError: If `data_format` is neither 'NHWC' not 'NCHW'. + ValueError: if 'inp' is not a Tensor of 5 dimensions. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.dual import BatchNorm2d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32, 32)).astype(np.float32)) + >>> bn = BatchNorm2d(64) + >>> y = bn(u) + >>> print(y.shape) + (2, 8, 64, 32, 32) + """ + + def __init__(self, + num_features: int, + eps: float = 1e-5, + momentum: float = 0.9, + affine: bool = True, + gamma_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + beta_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_mean_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_var_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + use_batch_statistics: bool = True, + data_format='NCHW') -> None: + super(BatchNorm2d, self).__init__(HBatchNorm2d, + BatchNormImpl, + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + gamma_init=gamma_init, + beta_init=beta_init, + moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, + use_batch_statistics=use_batch_statistics, + data_format=data_format) + + +class BatchNorm3d(_UniformOperator): + r""" + The dual-valued Batch Normalization layer over a second-order dual input of six dimensions, including + three spatial dimensions. + + This layer applies Batch Normalization over a dual input to reduce internal covariate shift. + Batch Normalization is widely used in convolutional networks. It rescales and recenters the feature using + a mini-batch of data and the learned parameters which can be described by the following formula: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + out = \frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}\\ + \text{Re(\hat{out})} = \text{Re(out)} * \text{Re(\gamma)} + \text{Re(\beta)}\\ + \text{Du(\hat{out})} = \text{Re(out)} * \text{Du(\gamma)} + + \text{Du(out)} * \text{Re(\gamma)} + \text{Du(\beta)}, + \end{align} + + where :math:`inp` is the dual input tensors, :math:`\mathrm{E}[inp]` is the arithmetic mean of the input tensor + over the batch dimension, :math:`\mathrm{Var}[inp]` is the statistical variance of the input tensor over spatial + dimensions, based on the dual norm :math:`\|x+\epsilon y\|=\left|\frac{y}{2}\right|+\sqrt{x^2+\frac{y^2}{4}}`, + :math:`\gamma` and :math:`\beta` are dual learnable parameters representing the scale and shift coefficients + respectively, and :math:`\delta` is a small positive constant, which is needed to avoid division by zero in case + statistical variance is close to zero. :math:`\text{Re(...)}` and :math:`\text{Du(...)}` are respectively real + and dual parts of the dual-valued expression inside the parentheses. + + Args: + num_features (int): The number of features in the input space. + eps (float): A small positive threshold, which is needed to avoid division by zero. Default: :math:`10^{-5}` + momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. + Default: 0.9. + affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + use_batch_statistics (bool): + + - If True, use the mean value and variance value of current batch data and track running mean + and running variance. + - If False, use the mean value and variance value of specified value, and not track statistical value. + - If None, the use_batch_statistics is automatically set to True or False according to the training + and evaluation mode. During training, the parameter is set to True, and during evaluation, the + parameter is set to False. Default: None. + data_format (str): The optional value for data format. Only 'NCDHW' format is supported as of now. + Default: 'NCDHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, D, H, W)`. '2' denotes that the input tensor belongs + to the dual domain and has got a real and a dual parts. The `num_features` in `Args` has to be equal + to :math:`C` in `Inputs`. + + Outputs: + Tensor, the normalized, scaled, offset tensor of the same shape :math:`(2, N, C, D, H, W)` as :math:`u`. + + Raises: + TypeError: If `num_features` is not an int. + TypeError: If `eps` is not a float. + ValueError: If `num_features` is less than 1. + ValueError: If `momentum` is not in range [0, 1]. + ValueError: If `data_format` is not 'NCDHW'. + ValueError: if 'inp' is not a Tensor of 6 dimensions. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.dual import BatchNorm3d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32, 32, 32)).astype(np.float32)) + >>> bn = BatchNorm3d(64) + >>> y = bn(u) + >>> print(y.shape) + (2, 8, 64, 32, 32, 32) + """ + + def __init__(self, + num_features: int, + eps: float = 1e-5, + momentum: float = 0.9, + affine: bool = True, + gamma_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + beta_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_mean_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_var_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + use_batch_statistics: bool = True, + data_format='NCDHW') -> None: + super(BatchNorm3d, self).__init__(HBatchNorm3d, + BatchNormImpl, + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + gamma_init=gamma_init, + beta_init=beta_init, + moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, + use_batch_statistics=use_batch_statistics, + data_format=data_format) + + +class Dense(_UniformOperator): + r""" + The dual-valued dense connected layer. + + Applies dense connected layer for the dual-valued input. This layer implements the operation as: + + .. math:: + \begin{align} + \text{Re(out)} = \text{Re(inp)} * \text{Re(kernel)} + \text{Re(bias)}\\ + \text{Du(out)} = \text{Re(inp)} * \text{Du(kernel)} + \text{Du(inp)} * \text{Re(kernel)} + \text{Du(bias)}, + \end{align} + + where :math:`inp` is the dual input tensors, :math:`\text{kernel}` is a dual weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a dual bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{Re(...)}` and + :math:`\text{Du(...)}` are respectively real and dual parts of the dual-valued expression inside the parentheses. + + Args: + in_channels (int): The number of channels in the input space. + out_channels (int): The number of channels in the output space. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is + same as `inp`. The values of str refer to the function `initializer`. Default: 'zeros'. + has_bias (bool): Specifies whether the layer uses a bias vector. Default: True. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, *, ..., *, in\_channels)`. '2' denotes that the input tensor + belongs to the dual domain and has got a real and a dual parts. The `in_channels` in `Args` + has to be equal to :math:`in\_channels` in `Inputs`. The count of mediator dimensions denoted by '*' is + arbitrary but must be at least one. + + Outputs: + Tensor of shape :math:`(2, *, ..., *, out\_channels)`. The count of mediator dimensions is the same as one + in 'Inputs'. + + Raises: + TypeError: If `in_channels` or `out_channels` is not an int. + TypeError: If `has_bias` is not a bool. + ValueError: If length of shape of `weight_init` is not equal to 3, + or shape[0] of 'weight_init' is not equal to 2, + or shape[1] of `weight_init` is not equal to `out_channels`, + or shape[2] of `weight_init` is not equal to `in_channels`. + ValueError: If length of shape of `bias_init` is not equal to 2, + or shape[0] of 'bias_init' is not equal to 2, + or shape[1] of `bias_init` is not equal to `out_channels`. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.dual import Dense + >>> from mindspore import Tensor + >>> w = Tensor(np.random.random((2, 7, 5)).astype(np.float32)) + >>> b = Tensor(np.random.random((2, 7)).astype(np.float32)) + >>> net = Dense(in_channels=5, out_channels=7, weight_init=w, bias_init=b, has_bias=True) + >>> u = Tensor(np.random.random((2, 34, 1, 5)).astype(np.float32)) + >>> out = net(u) + >>> print(out.shape) + (2, 34, 1, 7) + """ + + def __init__(self, + in_channels: int, + out_channels: int, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + has_bias: bool = True) -> None: + super(Dense, self).__init__(HDense, + DenseImpl, + in_channels=in_channels, + out_channels=out_channels, + weight_init=weight_init, + bias_init=bias_init, + has_bias=has_bias) diff --git a/mindspore/python/mindspore/hypercomplex/hypercomplex/_hc_bn_impl.py b/mindspore/python/mindspore/hypercomplex/hypercomplex/_hc_bn_impl.py new file mode 100644 index 00000000000..0d20373633e --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/hypercomplex/_hc_bn_impl.py @@ -0,0 +1,339 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Hypercomplex BatchNorm Implementation""" +import numbers +from typing import Union, Tuple +from abc import abstractmethod +import mindspore.nn as nn +from mindspore.common.initializer import initializer, Initializer +from mindspore.common.parameter import Parameter +from mindspore.common.tensor import Tensor + + +class _BatchNormImpl(nn.Cell): + r""" + The interface of the implementor part of batch normalization layer on the second-order hypercomplex numbers. + + Defines the API for getting the norm of hypercomplex number, applying scaling and shift to a hypercomplex tensor, + and updating the running mean and variance, which are used during inference. The API is used by the 'BatchNorm' + class, and it must be implemented separately for every hypercomplex algebra: + + Args: + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. + num_features (int): The number of features in the input space. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + gamma_init: Union[Tensor, str, Initializer, numbers.Number], + beta_init: Union[Tensor, str, Initializer, numbers.Number], + num_features: int) -> None: + super(_BatchNormImpl, self).__init__() + + @abstractmethod + def get_norm(self, u: Tensor) -> Tensor: + r""" + Calculates norm of a hypercomplex elements of an input tensor. + + Norm is a non-negative real number that is a characteristic of 'magnitude' of that number, i.e. how far away it + is from zero. + + Args: + u (Tensor): Tensor of shape (2, *, ..., *). '2' denotes that the input tensor belongs to the hypercomplex + domain and has a real and a hypercomplex parts. + + Returns: + Tensor of shape (*, ..., *). The count and size of dimensions of the output tensor are the same ones as in + the input tensor, but without the very first dimension because the output tensor is real-valued. + """ + + @abstractmethod + def get_square_norm(self, u: Tensor) -> Tensor: + r""" + Calculates element-wise squared norm of hypercomplex elements of an input tensor. + + Norm is a non-negative real number that is a characteristic of 'magnitude' of that number, i.e. how far away it + is from zero. + + Args: + u (Tensor): Tensor of shape (2, *, ..., *). '2' denotes that the input tensor belongs to the hypercomplex + domain and has a real and a hypercomplex parts. + + Returns: + Tensor of shape (*, ..., *). The count and size of dimensions of the output tensor are the same ones as in + the input tensor, but without the very first dimension because the output tensor is real-valued. + """ + + @abstractmethod + def scale_and_shift(self, + u_x: Tensor, + u_y: Tensor, + scale_x: Tensor, + scale_y: Tensor, + shift_x: Tensor, + shift_y: Tensor) -> Tuple[Tensor, Tensor]: + r""" + Applies hypercomplex scaling and shift to an input tensor. + + This function implements the operation as: + + .. math:: + \text{out} = \text{mul}(\text{inp}, \text{scale}) + \text{shift}, + + where :math:`inp` is the hypercomplex input tensors, :math:`\text{mul}` is the channel-wise scaling operation, + which depends on the type of the number system and provided by subclassess, :math:`\text{scale}` is + a hypercomplex scaling vector with the same data type as the :math:`inp` created by the layer, and + :math:`\text{shift}` is a hypercomplex bias vector with the same data type as the :math:`inp` created by + the layer. + + Args: + u_x (Tensor): A tensor of shape (C,), which represents the real part of the normalized inputs. + u_y (Tensor): A tensor of shape (C,), which represents the hypercomplex part of the normalized inputs. + scale_x (Tensor): A tensor of shape (C,), which represents the real part of the scaling vector. + scale_y (Tensor): A tensor of shape (C,), which represents the hypercomplex part of the scaling vector. + shift_x (Tensor): A tensor of shape (C,), which represents the real part of the bias vector. + shift_y (Tensor): A tensor of shape (C,), which represents the hypercomplex part of the bias vector. + + Returns: + Tuple of two tensors of shape (C,), which contains the real and the hypercomplex parts of rescaled and + recentered inputs. + """ + + @abstractmethod + def calculate_bn(self, + u_centered_x: Tensor, + u_centered_y: Tensor, + sigma: Tensor) -> Tuple[Tensor, Tensor]: + r""" + Given a hypercomplex centered input tensor and the standard deviation of its elements, computes the + corresponding rescaled and recentered tensor with normalized variance. + + This function implements the operation as: + + .. math:: + \text{out} = \text{mul}(\text{inp}, \frac{\text{scale}}{\sigma}) + \text{shift}, + + where :math:`inp` is the hypercomplex input tensors centered over spatial and mini-batch dimensions, + :math:`\sigma` is standard deviation of the input tensors over the same dimensions, :math:`\text{mul}` is a + channel-wise scaling operation, which depends on the type of the number system and provided by subclassess, + :math:`\text{scale}` is a hypercomplex scaling vector with the same data type as the :math:`inp` created + by the layer, and :math:`\text{shift}` is a hypercomplex bias vector with the same data type as the + :math:`inp` created by the layer. + + Args: + u_centered_x (Tensor): A tensor of shape (C,), which represents the real part of the centered inputs. + u_centered_y (Tensor): A tensor of shape (C,), which represents the hypercomplex part of the + centered inputs. + sigma (Tensor): A tensor of shape (C,), which represents the statistical standard deviation of the inputs. + + Returns: + Tuple of two tensors of shape (C,), which contains the real and the hypercomplex parts of rescaled and + recentered normalized inputs. + """ + + @abstractmethod + def calculate_infer_bn(self, + moving_mean_x: Tensor, + moving_mean_y: Tensor, + moving_sigma: Tensor, + u_x: Tensor, + u_y: Tensor) -> Tuple[Tensor, Tensor]: + r""" + Given a hypercomplex input tensor, computes the corresponding rescaled and recentered normalized tensor. + + This function is supposed to be used during inference. The mean and standard deviation are accumulated during + the training phase. The function implements the operation as: + + .. math:: + \text{out} = \text{mul}(\text{inp}, \frac{\text{scale}}{\sigma}) + + \left(\text{mul}(-\mathrm{E}[inp], \frac{\text{scale}}{\sigma})+\text{shift}\right), + + where :math:`inp` is the hypercomplex input tensors, :math:`\sigma` is the accumulated standard deviation of + the input tensors over spatial and mini-batch dimensions, :math:`\mathrm{E}[inp]` is the accumulated arithmetic + mean of the input tensor over the same dimensions,:math:`\text{mul}` is a channel-wise scaling operation, which + depends on the type of the number system and provided by subclassess, :math:`\text{scale}` is a hypercomplex + scaling vector with the same data type as the :math:`inp` created by the layer, and :math:`\text{shift}` is a + hypercomplex bias vector with the same data type as the :math:`inp` created by the layer. + + Args: + moving_mean_x (Tensor): A tensor of shape (C,), which represents the real part of the accumulated + arithmetic mean of inputs. + moving_mean_y (Tensor): A tensor of shape (C,), which represents the hypercomplex part of the accumulated + arithmetic mean of inputs. + moving_sigma (Tensor): A tensor of shape (C,), which represents the accumulated statistical standard + deviation of inputs. + u_x (Tensor): A tensor of shape (C,), which represents the real part of the input tensor. + u_y (Tensor): A tensor of shape (C,), which represents the hypercomplex part of the input tensor. + + Returns: + Tuple of two tensors of shape (C,), which contains the real and the hypercomplex parts of normalized, + rescaled and recentered inputs. + """ + + +class _BaseBatchNormImpl(_BatchNormImpl): + r""" + The base implementor part of the batch normalization layer for all the hypercomplex numbers of the second order. + + Contains initialization and processing logic, which are shared by all specific implementations of the + 'BatchNormImpl' interface for dual, double, and complex numbers. + + Args: + affine (bool) - A bool value. When set to True, gamma and beta can be learned. + use_batch_statistics (bool): If true, use the mean value and variance value of current batch data. If false, + use the mean value and variance value of specified value. If None, the training process will use the mean + and variance of current batch data and track the running mean and variance, the evaluation process will use + the running mean and variance. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. + num_features (int): The number of features in the input space. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + affine: bool, + use_batch_statistics: bool, + gamma_init: Union[Tensor, str, Initializer, numbers.Number], + beta_init: Union[Tensor, str, Initializer, numbers.Number], + num_features: int) -> None: + super(_BaseBatchNormImpl, self).__init__(gamma_init, + beta_init, + num_features) + self.scale_x = Parameter(initializer(gamma_init, num_features), name="scale_x", requires_grad=affine) + self.scale_y = Parameter(initializer(gamma_init, num_features), name="scale_y", requires_grad=affine) + self.shift_x = Parameter(initializer(beta_init, num_features), name="shift_x", requires_grad=affine) + self.shift_y = Parameter(initializer(beta_init, num_features), name="shift_y", requires_grad=affine) + + def calculate_infer_bn(self, + moving_mean_x: Tensor, + moving_mean_y: Tensor, + moving_sigma: Tensor, + u_x: Tensor, + u_y: Tensor) -> Tuple[Tensor, Tensor]: + r""" + Given a hypercomplex input tensor, computes the corresponding rescaled and recentered normalized tensor. + + This function is supposed to be used during inference. The mean and standard deviation are accumulated during + the training phase. The function implements the operation as: + + .. math:: + \text{out} = \text{mul}(\text{inp}, \frac{\text{scale}}{\sigma}) + + \left(\text{mul}(-\mathrm{E}[inp], \frac{\text{scale}}{\sigma})+\text{shift}\right), + + where :math:`inp` is the hypercomplex input tensors, :math:`\sigma` is the accumulated standard deviation of + the input tensors over spatial and mini-batch dimensions, :math:`\mathrm{E}[inp]` is the accumulated arithmetic + mean of the input tensor over the same dimensions,:math:`\text{mul}` is a channel-wise scaling operation, which + depends on the type of the number system and provided by subclassess, :math:`\text{scale}` is a hypercomplex + scaling vector with the same data type as the :math:`inp` created by the layer, and :math:`\text{shift}` is a + hypercomplex bias vector with the same data type as the :math:`inp` created by the layer. + + Args: + moving_mean_x (Tensor): A tensor of shape (C,), which represents the real part of the accumulated + arithmetic mean of inputs. + moving_mean_y (Tensor): A tensor of shape (C,), which represents the hypercomplex part of the accumulated + arithmetic mean of inputs. + moving_sigma (Tensor): A tensor of shape (C,), which represents the accumulated statistical standard + deviation of inputs. + u_x (Tensor): A tensor of shape (C,), which represents the real part of the input tensor. + u_y (Tensor): A tensor of shape (C,), which represents the hypercomplex part of the input tensor. + + Returns: + Tuple of two tensors of shape (C,), which contains the real and the hypercomplex parts of normalized, + rescaled and recentered inputs. + """ + fused_scale_x = self.scale_x / moving_sigma + fused_scale_y = self.scale_y / moving_sigma + neg_mean_x = (-1) * moving_mean_x + neg_mean_y = (-1) * moving_mean_y + fused_shift_x, fused_shift_y = self.scale_and_shift(neg_mean_x, + neg_mean_y, + fused_scale_x, + fused_scale_y, + self.shift_x, + self.shift_y) + out_x, out_y = self.scale_and_shift(u_x, + u_y, + fused_scale_x, + fused_scale_y, + fused_shift_x, + fused_shift_y) + return out_x, out_y + + def calculate_bn(self, + u_centered_x: Tensor, + u_centered_y: Tensor, + sigma: Tensor) -> Tuple[Tensor, Tensor]: + r""" + Given a hypercomplex centered input tensor and the standard deviation of its elements, computes the + corresponding rescaled and recentered tensor with normalized variance. + + This function implements the operation as: + + .. math:: + \text{out} = \text{mul}(\text{inp}, \frac{\text{scale}}{\sigma}) + \text{shift}, + + where :math:`inp` is the hypercomplex input tensors centered over spatial and mini-batch dimensions, + :math:`\sigma` is standard deviation of the input tensors over the same dimensions, :math:`\text{mul}` is a + channel-wise scaling operation, which depends on the type of the number system and provided by subclassess, + :math:`\text{scale}` is a hypercomplex scaling vector with the same data type as the :math:`inp` created + by the layer, and :math:`\text{shift}` is a hypercomplex bias vector with the same data type as the + :math:`inp` created by the layer. + + Args: + u_centered_x (Tensor): A tensor of shape (C,), which represents the real part of the centered inputs. + u_centered_y (Tensor): A tensor of shape (C,), which represents the hypercomplex part of the + centered inputs. + sigma (Tensor): A tensor of shape (C,), which represents the statistical standard deviation of the inputs. + + Returns: + Tuple of two tensors of shape (C,), which contains the real and the hypercomplex parts of rescaled and + recentered normalized inputs. + """ + scale_x = self.scale_x / sigma + scale_y = self.scale_y / sigma + out_x, out_y = self.scale_and_shift(u_centered_x, + u_centered_y, + scale_x, + scale_y, + self.shift_x, + self.shift_y) + return out_x, out_y + + @abstractmethod + def get_norm(self, u: Tensor) -> Tensor: + pass + + @abstractmethod + def get_square_norm(self, u: Tensor) -> Tensor: + pass + + @abstractmethod + def scale_and_shift(self, + u_x: Tensor, + u_y: Tensor, + scale_x: Tensor, + scale_y: Tensor, + shift_x: Tensor, + shift_y: Tensor) -> Tuple[Tensor, Tensor]: + pass diff --git a/mindspore/python/mindspore/hypercomplex/hypercomplex/_hc_conv_impl.py b/mindspore/python/mindspore/hypercomplex/hypercomplex/_hc_conv_impl.py new file mode 100644 index 00000000000..7e60c51af5b --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/hypercomplex/_hc_conv_impl.py @@ -0,0 +1,123 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Hypercomplex Convolution Implementation""" +import numbers +from typing import Callable, Union, Tuple + +from mindspore.common.initializer import initializer, Initializer +from mindspore.common.parameter import Parameter +from mindspore.common.tensor import Tensor +from mindspore import nn +from mindspore.hypercomplex.utils import get_x_and_y + + +class _ConvImpl(nn.Cell): + r""" + The interface of the implementor part of convolution layer on second-order hypercomplex numbers. + + Defines the API for unbiased convolution transformation, which is used by the '_ConvNd' class. The API must + be implemented separately for every hypercomplex algebra: + + .. math:: + \text{out} = \text{conv}(\text{inp}, \text{kernel}) + + where :math:`inp` is the hypercomplex input tensors, :math:`\text{conv}` is the convolution transformation + operation, which is provided by subclasses, :math:`\text{kernel}` is a hypercomplex weight matrix with the same + data type as the :math:`inp` created by the layer. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and hypercomplex parts of + the kernel. + **factory_kwargs (dict): Extra parameters which may be needed by specific subclasses. + + Inputs: + - **conv_op** (Callable) - the function of the real-valued convolution to be used with decomposition + of the hypercomplex convolution transformation. For example, mindspore.ops.operations.Conv2D(...) + may be passed for a 2D convolution. + - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the real part of the input. The exact shape depends on data format and the number of spatial + dimensions. + - **y** (Tensor) - Tensor of the same shape as `x`, which defines the real part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(N, C_{out}, *, ..., *)` or :math:`(N, *, ..., *, C_{out})`, which + represent the real and the hypercomplex parts of the output respectively. Data format and the count of spatial + dimensions are the same as in `x` and `y`. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + weight_init: Union[Tensor, str, Initializer, numbers.Number], + weight_shape: tuple, + **factory_kwargs) -> None: + super(_ConvImpl, self).__init__() + + def construct(self, + conv_op: Callable, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + pass + + +class _BaseConvImpl(_ConvImpl): + r""" + The base implementor part of the convolution layer for all the hypercomplex numbers of the second order. + + Contains initialization of the kernel tensors, which is shared by all specific implementations of the 'ConvImpl' + interface for dual, double, and complex numbers. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and hypercomplex parts of + the kernel. + **factory_kwargs (dict): Extra parameters which may be needed by specific subclasses. + + Inputs: + - **conv_op** (Callable) - the function of the real-valued convolution to be used with decomposition + of the hypercomplex convolution transformation. For example, mindspore.ops.operations.Conv2D(...) may be + passed for a 2D convolution. + - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the real part of the input. The exact shape depends on data format and the number of spatial + dimensions. + - **y** (Tensor) - Tensor of the same shape as `x`, which defines the real part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(N, C_{out}, *, ..., *)` or :math:`(N, *, ..., *, C_{out})`, which + represent the real and the hypercomplex parts of the output respectively. Data format and the count of spatial + dimensions are the same as in `x` and `y`. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + weight_init: Union[Tensor, str, Initializer, numbers.Number], + weight_shape: tuple, + **factory_kwargs) -> None: + super(_BaseConvImpl, self).__init__(weight_init, + weight_shape, + **factory_kwargs) + + if isinstance(weight_init, Tensor): + weight_init_x, weight_init_y = get_x_and_y(weight_init) + else: + weight_init_x = weight_init_y = weight_init + self.weight_x = Parameter(initializer(weight_init_x, shape=weight_shape), name='weight_x') + self.weight_y = Parameter(initializer(weight_init_y, shape=weight_shape), name='weight_y') diff --git a/mindspore/python/mindspore/hypercomplex/hypercomplex/_hc_dense_impl.py b/mindspore/python/mindspore/hypercomplex/hypercomplex/_hc_dense_impl.py new file mode 100644 index 00000000000..35605862e5a --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/hypercomplex/_hc_dense_impl.py @@ -0,0 +1,114 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""hypercomplex dense implementation""" +import numbers +from typing import Callable, Union, Tuple + +from mindspore.common.initializer import initializer, Initializer +from mindspore.common.parameter import Parameter +from mindspore.common.tensor import Tensor +from mindspore import nn +from mindspore.hypercomplex.utils import get_x_and_y + + +class _DenseImpl(nn.Cell): + r""" + The interface of the implementor part of dense connected layer on second-order hypercomplex numbers. + + Defines the API for linear transformation, which is used by the 'Dense' class. The API must be implemented + seprarately for every hypercomplex algebra: + + .. math:: + \text{out} = \text{linear}(\text{inp}, \text{kernel}) + + where :math:`inp` is the hypercomplex input tensors, :math:`\text{linear}` is the linear transformation operation, + which is provided by subclasses, :math:`\text{kernel}` is a hypercomplex weight matrix with the same data type as + the :math:`inp` created by the layer. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and hypercomplex parts of + the kernel. + **factory_kwargs (dict): Extra parameters which may be needed by specific subclasses. + + Inputs: + - **matmul_op** (Callable) - the function of the real-valued matrix multiplication to be used for decomposition + of the hypercomplex linear transformation. Usually, mindspore.ops.operations.MatMul(...) is passed + - **x** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the real part of the input. + - **y** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the hypercomplex part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(*, out\_channels)`, which represents the real and the hypercomplex + part of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + weight_init: Union[Tensor, str, Initializer, numbers.Number], + weight_shape: tuple, + **factory_kwargs) -> None: + super(_DenseImpl, self).__init__() + + def construct(self, + matmul_op: Callable, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + pass + + +class _BaseDenseImpl(_DenseImpl): + r""" + The base implementor part of the dense connected layer for all the hypercomplex numbers of the second order. + + Contains initialization of the kernel tensors, which is shared by all specific implementations of the 'DenseImpl' + interface for dual, double, and complex numbers. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and hypercomplex parts of + the kernel. + **factory_kwargs (dict): Extra parameters which may be needed by specific subclasses. + + Inputs: + - **matmul_op** (Callable) - the function of the real-valued matrix multiplication to be used for decomposition + of the hypercomplex linear transformation. Usually, mindspore.ops.operations.MatMul(...) is passed + - **x** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the real part of the input. + - **y** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the hypercomplex part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(*, out\_channels)`, which represents the real and the hypercomplex + part of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + weight_init: Union[Tensor, str, Initializer, numbers.Number], + weight_shape: tuple, + **factory_kwargs) -> None: + super(_BaseDenseImpl, self).__init__(weight_init, + weight_shape, + **factory_kwargs) + if isinstance(weight_init, Tensor): + weight_init_x, weight_init_y = get_x_and_y(weight_init) + else: + weight_init_x = weight_init_y = weight_init + self.weight_x = Parameter(initializer(weight_init_x, shape=weight_shape), name='weight_x') + self.weight_y = Parameter(initializer(weight_init_y, shape=weight_shape), name='weight_y') diff --git a/mindspore/python/mindspore/hypercomplex/hypercomplex/hc_bn.py b/mindspore/python/mindspore/hypercomplex/hypercomplex/hc_bn.py new file mode 100644 index 00000000000..30ca3923b6f --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/hypercomplex/hc_bn.py @@ -0,0 +1,627 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Hypercomplex batchnorm""" +import numbers +from typing import TypeVar, Type, Union, Any +from abc import abstractmethod + +import numpy as np +import mindspore +import mindspore.context as context +import mindspore.nn as nn +import mindspore.ops as P +from mindspore._checkparam import Validator as validator +from mindspore.common.parameter import Parameter +from mindspore.common.initializer import initializer, Initializer +from mindspore.common.tensor import Tensor +from mindspore.ops import functional as F +from mindspore.hypercomplex.hypercomplex._hc_bn_impl import _BatchNormImpl as BatchNormImpl +from mindspore.hypercomplex.utils import get_x_and_y, to_2channel + +TBatchNormImpl = TypeVar('TBatchNormImpl', bound=BatchNormImpl) + + +class _BatchNorm(nn.Cell): + r""" + The base class of the abstract part of Batch Normalization layer over a second-order hypercomplex input + of some number of dimensions. + + This layer applies Batch Normalization over a hypercomplex input to reduce internal covariate shift. + Batch Normalization is widely used in convolutional networks. It rescales and recenters the feature using + a mini-batch of data and the learned parameters which can be described by the following formula: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + out = \text{linear}(\frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}, \gamma) + \beta, + \end{align} + + where :math:`inp` is the hypercomplex input tensors, :math:`\text{linear}` is the linear transformation operation, + which depends on the type of the number system and provided by the implementor part of the batch normalization + layer, :math:`\mathrm{E}[inp]` is the arithmetic mean of the input tensor over the spatial and mini-batch + dimensions, :math:`\mathrm{Var}[inp]` is the statistical variance of the input tensor over the same dimensions, + :math:`\gamma` and :math:`\beta` are hypercomplex learnable parameters representing the scale and shift coefficients + respectively, and :math:`\delta` is a small positive constant, which is needed to avoid division by zero in case + statistical variance is close to zero. + + This is not a self-sufficient class. In order to construct a fully connected layer, one should instantiate a child + class and an implementor class, which acts like a bridge pattern and determines the exact set of hypercomplex + numbers. That implies the rules of multiplication and therefore affects how a linear transformation works. + + Args: + bn_impl(BatchNormImpl): The implementor object of the batch normalization layer. Essentially, the concrete + class name of this argument defines the algebra that the batch normalization layer will operate on. + num_features (int): The number of features in the input space. + eps (float): A small positive threshold, which is needed to avoid division by zero. Default: :math:`10^{-5}` + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. + Default: 0.9. + affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + use_batch_statistics (bool): + - If True, use the mean value and variance value of current batch data and track running mean + and running variance. + - If False, use the mean value and variance value of specified value, and not track statistical value. + - If None, the use_batch_statistics is automatically set to True or False according to the training + and evaluation mode. During training, the parameter is set to True, and during evaluation, the + parameter is set to False. Default: None. + data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. Default: 'NCHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, *, ..., *)` if data_format is 'NCHW', or + :math:`(2, N, *, ..., *, C)` if data_format is 'NHWC', with float16 or float32 data type. '2' denotes that + the input tensor belongs to the hypercomplex domain and has got a real and a hypercomplex parts. Or, + :math:`(N, C, *, ..., *)` if data_format is 'NCHW', or :math:`(N, *, ..., *, C)` if data_format is 'NHWC', + with complex64 data type. The `num_features` in `Args` has to be equal to :math:`C` in `Inputs`. + The count of dimensions denoted by '*' must be equal to the number of spatial dimensions. + + Outputs: + Tensor, the normalized, scaled, offset tensor of the same data type and shape as :math:`inp`: + :math:`(2, N, C, *, ..., *)` if data_format is 'NCHW', or :math:`(2, N, *, ..., *, C)` if data_format is 'NHWC', + with float16 or float32 data type. Or, :math:`(N, C, *, ..., *)` if data_format is 'NCHW', or + :math:`(N, *, ..., *, C)` if data_format is 'NHWC', with complex64 data type. + + Raises: + TypeError: If `num_features` is not an int. + TypeError: If `eps` is not a float. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If `num_features` is less than 1. + ValueError: If `momentum` is not in range [0, 1]. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + bn_impl: Type[TBatchNormImpl], + num_features: int, + eps: float = 1e-5, + momentum: float = 0.9, + affine: bool = True, + gamma_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + beta_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_mean_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_var_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + use_batch_statistics: bool = None, + data_format: str = 'NCHW') -> None: + """Initialize _BatchNorm.""" + super(_BatchNorm, self).__init__() + validator.check_value_type('num_features', num_features, [int], self.cls_name) + if num_features < 1: + raise ValueError(f"For '{self.cls_name}', the 'num_features' must be at least 1, but got {num_features}.") + + if momentum < 0 or momentum > 1: + raise ValueError(f"For '{self.cls_name}', the 'momentum' must be a number in range [0, 1], " + f"but got {momentum}.") + self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.cls_name) + if context.get_context("device_target") != "GPU" and self.format == "NHWC": + raise ValueError(f"For '{self.cls_name}', the 'NHWC' format only support in GPU target, but got device " + f"target {context.get_context('device_target')}.") + self.use_batch_statistics = use_batch_statistics + if self.use_batch_statistics is not None and not isinstance(self.use_batch_statistics, bool): + raise ValueError(f"For '{self.cls_name}', the 'use_batch_statistics' must be a boolean value or None," + f" but got {use_batch_statistics}.") + self.num_features = num_features + self.eps = eps + self.beta_init = beta_init + self.gamma_init = gamma_init + self.moving_mean_init = moving_mean_init + self.moving_var_init = moving_var_init + self.affine = affine + + self.bn_impl = bn_impl(affine, use_batch_statistics, gamma_init, beta_init, num_features) + + self.moving_mean_x = Parameter( + initializer(moving_mean_init, (num_features)), name="mean_x", requires_grad=False + ) + self.moving_mean_y = Parameter( + initializer(moving_mean_init, (num_features)), name="mean_y", requires_grad=False + ) + self.moving_sigma2 = Parameter( + initializer(moving_var_init, num_features), name="sigma2", requires_grad=False + ) + + self.parallel_mode = context.get_auto_parallel_context("parallel_mode") + + self._target = context.get_context("device_target") + self.is_graph_mode = context.get_context("mode") == context.GRAPH_MODE + self.momentum = 1.0 - momentum + + self.reduce_mean_op1 = P.ReduceMean(keep_dims=True) + self.reduce_mean_op2 = P.ReduceMean(keep_dims=False) + + self.features_dim = data_format.lower().find('c') + self.get_dtype = P.DType() + self.get_shape = P.Shape() + + def construct(self, u: Tensor) -> Tensor: + """construct""" + u_dtype = self.get_dtype(u) + u_shape = self.get_shape(u) + self._check_input_dim(u_shape, u_dtype) + if u_dtype == mindspore.complex64: + hc_axis = None + feature_axis = self.features_dim + else: + hc_axis = 0 + feature_axis = self.features_dim + 1 + + if self.training or not self.use_batch_statistics: + ndim = u.ndim + hc_axis = hc_axis + feature_axis = feature_axis + sh = np.arange(ndim) + sh = sh[sh != hc_axis] + sh = sh[sh != feature_axis] + if hc_axis is None: + u_x, u_y = get_x_and_y(u) + mu_x = self.reduce_mean_op1(u_x, sh.tolist()) + mu_y = self.reduce_mean_op1(u_y, sh.tolist()) + mu = to_2channel(mu_x, mu_y, mindspore.complex64) + else: + mu = self.reduce_mean_op1(u, sh.tolist()) + + u_centered = u - mu + norma2 = self.bn_impl.get_square_norm(u_centered) + norma_feature_axis = feature_axis if hc_axis is None or feature_axis < hc_axis else feature_axis - 1 + ndim = norma2.ndim + mean_dims = np.arange(ndim) + mean_dims = mean_dims[mean_dims != norma_feature_axis] + sigma2 = self.reduce_mean_op2(norma2, mean_dims.tolist()) + self.eps + result = self._calculate_bn(u_centered, sigma2, feature_axis) + + if self.use_batch_statistics: + momentum = self.momentum + mu = mu.squeeze() + mu_x, mu_y = get_x_and_y(mu) + momentum_suppl = 1 - momentum + self.moving_mean_x *= momentum_suppl + self.moving_mean_x += mu_x * momentum + self.moving_mean_y *= momentum_suppl + self.moving_mean_y += mu_y * momentum + self.moving_sigma2 *= momentum_suppl + self.moving_sigma2 += sigma2 * momentum + elif self.affine: + result = self._calculate_infer_bn(u, axis=feature_axis) + else: + broadcast_mu_shape = [1] * u.ndim + broadcast_mu_shape[feature_axis] = u_shape[feature_axis] + if hc_axis is not None: + broadcast_mu_shape[hc_axis] = 2 + moving_mean = to_2channel(self.moving_mean_x, self.moving_mean_y, u.dtype) + moving_mean = moving_mean.reshape(tuple(broadcast_mu_shape)) + inference_centered = u - moving_mean + result = self._calculate_bn(inference_centered, self.moving_sigma2, feature_axis) + return result + + def _calculate_bn(self, + u_centered: Tensor, + sigma2: Tensor, + axis: int) -> Tensor: + """_calculate_bn, implement the abstract function""" + sigma = P.sqrt(sigma2) + ndim = u_centered.ndim + u_shape = list(np.arange(ndim)) + u_shape[ndim - 1] = axis + u_shape[axis] = ndim - 1 + u_shape = tuple(int(i) for i in u_shape) + out = P.transpose(u_centered, u_shape) + if self.affine: + out_x, out_y = get_x_and_y(out) + out_x, out_y = self.bn_impl.calculate_bn(out_x, out_y, sigma) + out = to_2channel(out_x, out_y, self.get_dtype(u_centered)) + else: + out = out / sigma + out = P.transpose(out, u_shape) + return out + + def _calculate_infer_bn(self, + u: Tensor, + axis: int) -> Tensor: + """_calculate_infer_bn, implement the abstract function""" + ndim = u.ndim + shape = list(np.arange(ndim)) + shape[ndim-1] = axis + shape[axis] = ndim - 1 + shape = tuple(int(i) for i in shape) + + out = P.transpose(u, shape) + out_x, out_y = get_x_and_y(out) + out_x, out_y = self.bn_impl.calculate_infer_bn(self.moving_mean_x, + self.moving_mean_y, + P.sqrt(self.moving_sigma2), + out_x, + out_y) + out = to_2channel(out_x, out_y, dtype=u.dtype) + out = P.transpose(out, shape) + return out + + @abstractmethod + def _check_input_dim(self, shape: tuple, dtype: Any): + raise NotImplementedError + + +class BatchNorm1d(_BatchNorm): + r""" + The class of the abstract part of Batch Normalization layer over a second-order hypercomplex input + of four dimensions including one spatial dimension, or three dimensions. + + This layer applies Batch Normalization over a hypercomplex input of 'NCW' data format in order to reduce + internal covariate shift. Batch Normalization is widely used in convolutional networks. It rescales and recenters + the feature using a mini-batch of data and the learned parameters which can be described by the following formula: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + out = \text{linear}(\frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}, \gamma) + \beta, + \end{align} + + where :math:`inp` is the hypercomplex input tensors, :math:`\text{linear}` is the linear transformation operation, + which depends on the type of the number system and provided by the implementor part of the batch normalization + layer, :math:`\mathrm{E}[inp]` is the arithmetic mean of the input tensor over the spatial and mini-batch + dimensions, :math:`\mathrm{Var}[inp]` is the statistical variance of the input tensor over the same dimensions, + :math:`\gamma` and :math:`\beta` are hypercomplex learnable parameters representing the scale and shift coefficients + respectively, and :math:`\delta` is a small positive constant, which is needed to avoid division by zero in case + statistical variance is close to zero. + + This is not a self-sufficient class. In order to construct a fully connected layer, one should instantiate this + class and an implementor class, which acts like a bridge pattern and determines the exact set of hypercomplex + numbers. That implies the rules of multiplication and therefore affects how a linear transformation works. + + Args: + bn_impl(BatchNormImpl): The implementor object of the batch normalization layer. Essentially, the concrete + class name of this argument defines the algebra that the batch normalization layer will operate on. + num_features (int): The number of features in the input space. + eps (float): A small positive threshold, which is needed to avoid division by zero. Default: :math:`10^{-5}` + momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. + Default: 0.9. + affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + use_batch_statistics (bool): + + - If True, use the mean value and variance value of current batch data and track running mean + and running variance. + - If False, use the mean value and variance value of specified value, and not track statistical value. + - If None, the use_batch_statistics is automatically set to True or False according to the training + and evaluation mode. During training, the parameter is set to True, and during evaluation, the + parameter is set to False. Default: None. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, W)` or :math:`(2, N, C)`, with float16 or float32 data + type, or :math:`(N, C, W)` or :math:`(N, C)`, with complex64 data type. In the former case '2' denotes that + the input tensor belongs to the hypercomplex domain and has got a real and a hypercomplex parts. + The `num_features` in `Args` has to be equal to :math:`C` in `inp`. + + Outputs: + Tensor, the normalized, scaled, offset tensor of the same data type and shape as :math:`inp`: + :math:`(2, N, C, W)` or :math:`(2, N, C)`, with float16 or float32 data type, or :math:`(N, C, W)` or + :math:`(N, C)`, with complex64 data type. + + Raises: + TypeError: If `num_features` is not an int. + TypeError: If `eps` is not a float. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If `num_features` is less than 1. + ValueError: If `momentum` is not in range [0, 1]. + ValueError: if 'inp' is not a Tensor of 3 or 4 dimensions with float16 or float32 data type, and not a Tensor + of 2 or 3 dimensions with complex64 data type. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + bn_impl: Type[TBatchNormImpl], + num_features: int, + eps: float = 1e-5, + momentum: float = 0.9, + affine: bool = True, + gamma_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + beta_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_mean_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_var_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + use_batch_statistics: bool = None) -> None: + """Initialize _BatchNorm.""" + + super(BatchNorm1d, self).__init__(bn_impl, + num_features, + eps, + momentum, + affine, + gamma_init, + beta_init, + moving_mean_init, + moving_var_init, + use_batch_statistics) + + def _check_input_dim(self, shape: tuple, dtype: Any): + dim = len(shape) + if dtype in [mindspore.float16, mindspore.float32]: + if dim not in (4, 3): + raise ValueError(f"For '{self.cls_name}', the in_shape must have 3-4 dims, but got {dim}.") + elif dtype == mindspore.complex64: + if dim not in (3, 2): + raise ValueError(f"For '{self.cls_name}', the in_shape must have 2-3 dims, but got {dim}.") + else: + raise TypeError(f"Only float16, float32 and complex64 data types are supported, but got {dtype}.") + return None + + +class BatchNorm2d(_BatchNorm): + r""" + The class of the abstract part of Batch Normalization layer over a second-order hypercomplex input + of five dimensions, including two spatial dimensions. + + This layer applies Batch Normalization over a hypercomplex input to reduce internal covariate shift. + Batch Normalization is widely used in convolutional networks. It rescales and recenters the feature + using a mini-batch of data and the learned parameters which can be described by the following formula: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + y = \text{linear}(\frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}, \gamma) + \beta, + \end{align} + + where :math:`inp` is the hypercomplex input tensors, :math:`\text{linear}` is the linear transformation operation, + which depends on the type of the number system and provided by the implementor part of the batch normalization + layer, :math:`\mathrm{E}[inp]` is the arithmetic mean of the input tensor over the spatial and mini-batch + dimensions, :math:`\mathrm{Var}[inp]` is the statistical variance of the input tensor over the same dimensions, + :math:`\gamma` and :math:`\beta` are hypercomplex learnable parameters representing the scale and shift coefficients + respectively, and :math:`\delta` is a small positive constant, which is needed to avoid division by zero in case + statistical variance is close to zero. + + This is not a self-sufficient class. In order to construct a fully connected layer, one should instantiate this + class and an implementor class, which acts like a bridge pattern and determines the exact set of hypercomplex + numbers. That implies the rules of multiplication and therefore affects how a linear transformation works. + + Args: + bn_impl(BatchNormImpl): The implementor object of the batch normalization layer. Essentially, the concrete + class name of this argument defines the algebra that the batch normalization layer will operate on. + num_features (int): The number of features in the input space. + eps (float): A small positive threshold, which is needed to avoid division by zero. Default: :math:`10^{-5}` + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. + Default: 0.9. + affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + use_batch_statistics (bool): + + - If True, use the mean value and variance value of current batch data and track running mean + and running variance. + - If False, use the mean value and variance value of specified value, and not track statistical value. + - If None, the use_batch_statistics is automatically set to True or False according to the training + and evaluation mode. During training, the parameter is set to True, and during evaluation, the + parameter is set to False. Default: None. + data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. Default: 'NCHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, H, W)` if data_format is 'NCHW', or + :math:`(2, N, H, W, C)` if data_format is 'NHWC', with float16 or float32 data type. '2' denotes that the + input tensor belongs to the hypercomplex domain and has got a real and a hypercomplex parts. Or, + :math:`(N, C, H, W)` if data_format is 'NCHW', or :math:`(N, H, W, C)` if data_format is 'NHWC', with + complex64 data type. The `num_features` in `Args` has to be equal to :math:`C` in `Inputs`. + + Outputs: + Tensor, the normalized, scaled, offset tensor of the same data type and shape as :math:`inp`: + :math:`(2, N, C, H, W)` if data_format is 'NCHW', or :math:`(2, N, H, W, C)` if data_format is 'NHWC', with + float16 or float32 data type. Or, :math:`(N, C, H, W)` if data_format is 'NCHW', or :math:`(N, H, W, C)` if + data_format is 'NHWC', with complex64 data type. + + Raises: + TypeError: If `num_features` is not an int. + TypeError: If `eps` is not a float. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If `num_features` is less than 1. + ValueError: If `momentum` is not in range [0, 1]. + ValueError: If `data_format` is neither 'NHWC' not 'NCHW'. + ValueError: if 'inp' is not a Tensor of 5 dimensions with float16 or float32 data type, and not a Tensor of 4 + dimensions with complex64 data type. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def _check_input_dim(self, shape: tuple, dtype: Any): + dim = len(shape) + if dtype in [mindspore.float16, mindspore.float32]: + if dim != 5: + raise ValueError(f"For '{self.cls_name}', the in_shape must have 5 dims, but got {dim}.") + elif dtype == mindspore.complex64: + if dim != 4: + raise ValueError(f"For '{self.cls_name}', the in_shape must have 4 dims, but got {dim}.") + else: + raise TypeError(f"Only float16, float32 and complex64 data types are supported, but got {dtype}.") + return None + + +class BatchNorm3d(nn.Cell): + r""" + The class of the abstract part of Batch Normalization layer over a second-order hypercomplex input + of six dimensions, including three spatial dimensions. + + This layer applies Batch Normalization over a hypercomplex input to reduce internal covariate shift. + Batch Normalization is widely used in convolutional networks. It rescales and recenters the feature + using a mini-batch of data and the learned parameters which can be described by the following formula: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + y = \text{linear}(\frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}, \gamma) + \beta, + \end{align} + + where :math:`inp` is the hypercomplex input tensors, :math:`\text{linear}` is the linear transformation operation, + which depends on the type of the number system and provided by the implementor part of the batch normalization + layer, :math:`\mathrm{E}[inp]` is the arithmetic mean of the input tensor over the spatial and mini-batch + dimensions, :math:`\mathrm{Var}[inp]` is the statistical variance of the input tensor over the same dimensions, + :math:`\gamma` and :math:`\beta` are hypercomplex learnable parameters representing the scale and shift coefficients + respectively, and :math:`\delta` is a small positive constant, which is needed to avoid division by zero in case + statistical variance is close to zero. + + This is not a self-sufficient class. In order to construct a fully connected layer, one should instantiate this + class and an implementor class, which acts like a bridge pattern and determines the exact set of hypercomplex + numbers. That implies the rules of multiplication and therefore affects how a linear transformation works. + + Args: + bn_impl(BatchNormImpl): The implementor object of the batch normalization layer. Essentially, the concrete + class name of this argument defines the algebra that the batch normalization layer will operate on. + num_features (int): The number of features in the input space. + eps (float): A small positive threshold, which is needed to avoid division by zero. Default: :math:`10^{-5}` + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. + Default: 0.9. + affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + use_batch_statistics (bool): + + - If True, use the mean value and variance value of current batch data and track running mean + and running variance. + - If False, use the mean value and variance value of specified value, and not track statistical value. + - If None, the use_batch_statistics is automatically set to True or False according to the training + and evaluation mode. During training, the parameter is set to True, and during evaluation, the + parameter is set to False. Default: None. + data_format (str): The optional value for data format. Only 'NCDHW' format is supported as of now. + Default: 'NCDHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, D, H, W)`, with float16 or float32 data type, or + :math:`(N, C, D, H, W)`, with complex64 data type. In the former case '2' denotes that the input tensor + belongs to the hypercomplex domain and has got a real and a hypercomplex parts. The `num_features` in `Args` + has to be equal to :math:`C` in `Inputs`. + + Outputs: + Tensor, the normalized, scaled, offset tensor of the same data type and shape as :math:`inp`: + :math:`(2, N, C, D, H, W)`, with float16 and float32 data type, or :math:`(N, C, D, H, W)`, with + complex64 data type. + + Raises: + TypeError: If `num_features` is not an int. + TypeError: If `eps` is not a float. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If `num_features` is less than 1. + ValueError: If `momentum` is not in range [0, 1]. + ValueError: If `data_format` is not 'NCDHW'. + ValueError: if 'inp' is not a Tensor of 6 dimensions. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + bn_impl: Type[TBatchNormImpl], + num_features: int, + eps: float = 1e-5, + momentum: float = 0.9, + affine: bool = True, + gamma_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + beta_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_mean_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_var_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + use_batch_statistics: bool = None, + data_format: str = 'NCDHW') -> None: + """Initialize _BatchNorm.""" + super(BatchNorm3d, self).__init__() + self.format = validator.check_string(data_format, ['NCDHW'], 'format', self.cls_name) + self.reshape = P.Reshape() + self.bn2d = BatchNorm2d(bn_impl=bn_impl, + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + gamma_init=gamma_init, + beta_init=beta_init, + moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, + use_batch_statistics=use_batch_statistics, + data_format="NCHW") + + def construct(self, u: Tensor) -> Tensor: + '''construct''' + u_shape = F.shape(u) + self._check_3d_shape(u_shape, F.dtype(u)) + reshape = list(u_shape) + reshape[-3] *= reshape[-2] + reshape = tuple(int(i) for i in reshape[:-2] + reshape[-1:]) + u = self.reshape(u, tuple(reshape)) + out = self.bn2d(u) + out = self.reshape(out, u_shape) + return out + + def _check_3d_shape(self, input_shape, dtype: Any) -> None: + '''_check_3d_shape''' + dim = len(input_shape) + if dtype in [mindspore.float16, mindspore.float32]: + if dim != 6: + msg_prefix = f"For '{self.cls_name}', the" if self.cls_name else "The" + raise ValueError(f"{msg_prefix} input_shape must be 6-dimensional, but got the length of input_shape: " + f"{len(dim)}.") + elif dtype == mindspore.complex64: + if dim != 5: + msg_prefix = f"For '{self.cls_name}', the" if self.cls_name else "The" + raise ValueError(f"{msg_prefix} input_shape must be 5-dimensional, but got the length of input_shape: " + f"{len(dim)}.") + else: + raise TypeError(f"Only float16, float32 and complex64 data types are supported, but got {dtype}.") + return None diff --git a/mindspore/python/mindspore/hypercomplex/hypercomplex/hc_conv.py b/mindspore/python/mindspore/hypercomplex/hypercomplex/hc_conv.py new file mode 100644 index 00000000000..2d726255288 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/hypercomplex/hc_conv.py @@ -0,0 +1,1055 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Hypercomplex Convolution""" +import numbers +from typing import Type, TypeVar, Tuple, Union +from abc import abstractmethod + +import numpy as np +from mindspore._checkparam import Validator, Rel, twice, _check_3d_int_or_tuple +from mindspore import context +from mindspore import log as logger +from mindspore.common.initializer import initializer, Initializer +from mindspore.common.parameter import Parameter +from mindspore.common.tensor import Tensor +from mindspore.nn.cell import Cell +from mindspore.ops import operations as P +from mindspore.hypercomplex.hypercomplex._hc_conv_impl import _ConvImpl as ConvImpl +from mindspore.hypercomplex.utils import get_x_and_y, to_2channel, \ + _size_1_t, _size_2_t, _size_3_t + + +TConvImpl = TypeVar('TConvImpl', bound=ConvImpl) + + +class _ConvNd(Cell): + r""" + The base class of the abstraction part of Convolution layer of the second-order hypercomplex input. + + Calculates the convolution on the input tensor which is typically of shape :math:`(2, N, C_{in}, *, ..., *)`, + where :math:`N` is batch size, :math:`C_{in}` is a number of input channels, and the count of spatial + dimensions denoted by '*' is defined by the specific subclass. The formula is defined as: + + .. math:: + + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{inp}(N_i, k)}) + + where :math:`ccor` is the `cross-correlation `_, the exact + implementation of which is defined and provided by the implementor part of the convolution layer, + :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of + the output and :math:`j` is in the range of :math:`[0, C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)` + is a convolution kernel slice with shape :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`, + where :math:`\text{kernel_size[0]}` and :math:`\text{kernel_size[1]}` are the height and width of the convolution + kernel respectively. :math:`\text{bias}` is the bias parameter and :math:`\text{inp}` is the input tensor. + In this case, `data_format` of the input tensor is 'NCHW' and the shape of full convolution kernel is + :math:`(2, C_{out}, C_{in} / \text{group}, *, ..., *)`, where `group` is the number of groups to split + the input `inp` in the channel dimension, and the '*' symbols denote the corresponding kernel dimensions. + If `data_format` of the input tensor is 'NHWC', the shape of full convolution kernel will be + :math:`(2, C_{out}, *, ..., *, C_{in} / \text{group}`. + + For more details, please refers to the paper `Gradient Based Learning Applied to Document + Recognition `_. + + Note: + On Ascend platform, only group convolution in depthwise convolution scenarios is supported. + That is, when `group > 1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. + + This is not a self-sufficient class. In order to construct a convolution layer, one should instantiate this + class and an implementor class, which acts like a bridge pattern and determine the exact set of hypercomplex + numbers. That implies the rules of multiplication and therefore affects how a convolution works. + + 'NCHW' format is supported only with GPU target device as of now. + + Args: + conv_impl (TConvImpl): The implementor object of the convolution layer. Essentially, the concrete class name + of this argument defines the algebra that the convolution layer will operate on. + in_channels (int): The channel number of the input tensor of the convolution layer. + out_channels (int): The channel number of the output tensor of the convolution layer. + kernel_size (Union[int, tuple[int]]): Specifies the spatial dimensions of the convolution kernel. + The data type is an integer or a tuple of integers. An integer represents the size of all the + spatial dimensions of the convolution kernel at once. A tuple of integers represents the spatial + dimensions of the convolution kernel individually. + stride (Union[int, tuple[int]]): The movement stride of the convolution kernel. + The data type is an integer or a tuple of integers. An integer represents the movement step size + in all directions at once. A tuple of integers represents the movement step size in every direction + individually. + pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (Union[int, tuple[int]]): The number of padding on the spatial dimensions of the input. + The data type is an integer or a tuple of integers, which then must be twice as long as the number + of spatial dimensions. If `padding` is an integer, then all the leading and trailing paddings in + all dimensions are equal to `padding`. The value should be greater than or equal to 0. + If `padding` is a tuple of integers, then the paddings are enumerated pair-wise from the first to + the last spatial dimension, the first element of the pair being equal to the leading padding, + and the second element of the pair being equal to the trailing padding of the corresponding + spatial dimension. + dilation (Union[int, tuple[int]]): Dilation size of convolution kernel. + The data type is an integer or a tuple of integers. If :math:`k > 1`, the kernel is sampled + every `k` elements. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. If the group is equal to `in_channels` and `out_channels`, + this convolution layer also can be called depthwise convolution layer. + has_bias (bool): Whether the convolution layer has a bias parameter. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. + data_format (str): The optional value for data format, is 'NHWC' or 'NCHW' or 'NCDHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, *, ..., *)` or :math:`(2, N, *, ..., *, C_{in})`, + with float16 or float32 data type, or :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})` + with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C_{out}, *, ..., *)` or + :math:`(2, N, *, ..., *, C_{out})`, with float16 or float32 data type, or :math:`(N, C_{out}, *, ..., *)` or + :math:`(N, *, ..., *, C_{out})`, with complex64 data type. + + Raises: + TypeError: If `in_channels`, `out_channels` or `group` is not an int. + TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int not a tuple. + TypeError: If any two of `inp`, `weight_init` and `bias_init` are Tensors of different data type. + ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + ValueError: If `padding` is a tuple whose length is not twice as big as the count of spatial dimensions. + ValueError: If `pad_mode` is not equal to 'pad' and `padding` is a tuple which contains non-zero elements. + ValueError: If `data_format` is neither 'NCHW', 'NHWC', nor 'NCDHW', or it is 'NCHW' and the target + device is not GPU. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + conv_impl: Type[TConvImpl], + in_channels: int, + out_channels: int, + kernel_size: Tuple[int, ...], + stride: Tuple[int, ...], + pad_mode: str, + padding: Tuple[int, ...], + dilation: Tuple[int, ...], + group: int, + has_bias: bool, + weight_init: Union[Tensor, str, Initializer, numbers.Number], + bias_init: Union[Tensor, str, Initializer, numbers.Number], + data_format: str = 'NCHW', + transposed: bool = False) -> None: + """Initialize _ConvNd.""" + super(_ConvNd, self).__init__() + + self.in_channels = Validator.check_positive_int(in_channels, 'in_channels', self.cls_name) + self.out_channels = Validator.check_positive_int(out_channels, 'out_channels', self.cls_name) + self.kernel_size = kernel_size + self.stride = stride + self.pad_mode = pad_mode + self.weight_init = weight_init + self.bias_init = bias_init + self.data_format = Validator.check_string(data_format, + ['NCHW', 'NHWC', 'NCDHW'], + 'format', + self.cls_name) + if context.get_context("device_target") != "GPU" and self.data_format == "NHWC": + raise ValueError("NHWC format only support in GPU target.") + if isinstance(padding, int): + Validator.check_non_negative_int(padding, 'padding', self.cls_name) + self.padding = padding + elif isinstance(padding, tuple): + for pad in padding: + Validator.check_non_negative_int(pad, 'padding item', self.cls_name) + self.padding = padding + else: + raise TypeError("padding type must be int/tuple(int) cannot be {}!".format(type(padding))) + + self.dilation = dilation + self.group = Validator.check_positive_int(group) + self.has_bias = has_bias + for kernel_size_elem in kernel_size: + Validator.check_positive_int(kernel_size_elem, 'kernel_size item', self.cls_name) + for stride_elem in stride: + Validator.check_positive_int(stride_elem, 'stride item', self.cls_name) + for dilation_elem in dilation: + Validator.check_positive_int(dilation_elem, 'dilation item', self.cls_name) + if in_channels % group != 0: + raise ValueError(f"Attr 'in_channels' of {self.cls_name} Op must be divisible by " + f"attr 'group' of {self.cls_name} Op.") + if out_channels % group != 0: + raise ValueError(f"Attr 'out_channels' {self.cls_name} Op must be divisible by " + f"attr 'group' of {self.cls_name} Op.") + if transposed: + shape = [in_channels, out_channels // group, *kernel_size] + else: + shape = [out_channels, *kernel_size, in_channels // group] if self.data_format == "NHWC" else \ + [out_channels, in_channels // group, *kernel_size] + self.dtype = self.weight_init.dtype if isinstance(self.weight_init, Tensor) else None + + # Weight initialization + self.conv_impl = conv_impl(self.weight_init, shape, data_format=data_format) + + # Bias initialization + if Validator.check_bool(has_bias, "has_bias", self.cls_name): + if isinstance(bias_init, Tensor): + if self.dtype is None: + self.dtype = bias_init.dtype + elif self.dtype != bias_init.dtype: + raise TypeError("Data type of the weight_init tensor and the bias init tensor must be equal, " + f"but got weight_init.dtype={self.dtype} and bias_init.dtype={bias_init.dtype}") + bias_init_x, bias_init_y = get_x_and_y(bias_init) + else: + bias_init_x = bias_init_y = bias_init + self.bias_x = Parameter(initializer(bias_init_x, [out_channels]), name='bias_x') + self.bias_y = Parameter(initializer(bias_init_y, [out_channels]), name='bias_y') + self.bias_add = P.BiasAdd() + else: + if self.bias_init != 'zeros': + logger.warning("Value of 'has_bias' is False, value of 'bias_init' will be ignored.") + self.bias_x = None + self.bias_y = None + self.bias_add = None + + def construct(self, u: Tensor) -> Tensor: + if self.dtype is not None and self.dtype != u.dtype: + raise TypeError("dtype must be equal to the data type of the inputs tensor, but got: " + f"dtype={self.dtype} and inputs.dtype={u.dtype}") + x, y = get_x_and_y(u) + out_x, out_y = self._construct(x, y) + out = to_2channel(out_x, out_y, u.dtype) + return out + + def extend_repr(self): + """extend representation""" + s = 'input_channels={}, output_channels={}, kernel_size={}, ' \ + 'stride={}, pad_mode={}, padding={}, dilation={}, ' \ + 'group={}, has_bias={}, ' \ + 'weight_init={}, bias_init={}, format={}'.format( + self.in_channels, + self.out_channels, + self.kernel_size, + self.stride, + self.pad_mode, + self.padding, + self.dilation, + self.group, + self.has_bias, + self.weight_init, + self.bias_init, + self.data_format) + return s + + @abstractmethod + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + pass + + def _check_input_5dims(self, input_shape: tuple) -> None: + if len(input_shape) != 5: + raise ValueError(f"For {self.cls_name}, input should be 5 dims, but got shape {input_shape}.") + + +class Conv2d(_ConvNd): + r""" + 2D convolution layer on the second-order hypercomplex input. + + Calculates the 2D convolution on the input tensor which is typically of shape + :math:`(2, N, C_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C_{in}` is a number of channels, + :math:`H_{in}, W_{in}` are the height and width of the feature layer respectively. The formula is defined as: + + .. math:: + + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{inp}(N_i, k)}) + + where :math:`ccor` is the `cross-correlation `_, the exact + implementation of which is defined and provided by the implementor part of the convolution layer, + :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of + the output and :math:`j` is in the range of :math:`[0, C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)` + is a convolution kernel slice with shape :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`, + where :math:`\text{kernel_size[0]}` and :math:`\text{kernel_size[1]}` are the height and width of the convolution + kernel respectively. :math:`\text{bias}` is the bias parameter and :math:`\text{inp}` is the input tensor. + In this case, `data_format` of the input tensor is 'NCHW' and the shape of full convolution kernel is + :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`, + where `group` is the number of groups to split the input `inp` in the channel dimension. If `data_format` of the + input tensor is 'NHWC', the shape of full convolution kernel will be + :math:`(C_{out}, \text{kernel_size[0]}, \text{kernel_size[1]}), C_{in} / \text{group}`. + + For more details, please refers to the paper `Gradient Based Learning Applied to Document + Recognition `_. + + Note: + On Ascend platform, only group convolution in depthwise convolution scenarios is supported. + That is, when `group > 1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. + + 'NCHW' format is supported only with GPU target device as of now. + + Args: + conv_impl (TConvImpl): The implementor object of the convolution layer. Essentially, the concrete class name + of this argument defines the algebra that the convolution layer will operate on. + in_channels (int): The channel number of the input tensor of the Conv2d layer. + out_channels (int): The channel number of the output tensor of the Conv2d layer. + kernel_size (Union[int, tuple[int]]): Specifies the height and width of the 2D convolution kernel. + The data type is an integer or a tuple of two integers. An integer represents the height + and width of the convolution kernel. A tuple of two integers represents the height + and width of the convolution kernel respectively. + stride (Union[int, tuple[int]]): The movement stride of the 2D convolution kernel. + The data type is an integer or a tuple of two integers. An integer represents the movement step size + in both height and width directions. A tuple of two integers represents the movement step size in the + height and width directions respectively. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (Union[int, tuple[int]]): The number of padding on the height and width directions of the input. + The data type is an integer or a tuple of four integers. If `padding` is an integer, + then the top, bottom, left, and right padding are all equal to `padding`. + If `padding` is a tuple of 4 integers, then the top, bottom, left, and right padding + is equal to `padding[0]`, `padding[1]`, `padding[2]`, and `padding[3]` respectively. + The value should be greater than or equal to 0. Default: 0. + dilation (Union[int, tuple[int]]): Dilation size of 2D convolution kernel. + The data type is an integer or a tuple of two integers. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` on the height and width directions is in range of [1, H] + and [1, W] respectively. Default: 1. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. If the group is equal to `in_channels` and `out_channels`, + this 2D convolution layer also can be called 2D depthwise convolution layer. Default: 1. + has_bias (bool): Whether the Conv2d layer has a bias parameter. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. + Default: 'NCHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, H_{in}, W_{in})` or + :math:`(2, N, H_{in}, W_{in}, C_{in})`, with float16 or float32 data type, or + :math:`(N, C_{in}, H_{in}, W_{in})` or :math:`(N, H_{in}, W_{in}, C_{in})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C_{out}, H_{out}, W_{out})` or + :math:`(2, N, H_{out}, W_{out}, C_{out})`, with float16 or float32 data type, or + :math:`(N, C_{out}, H_{out}, W_{out})` or :math:`(N, H_{out}, W_{out}, C_{out})`, with complex64 data type. + + pad_mode is 'same': + + .. math:: + \begin{array}{ll} \\ + H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[0]}}} \right \rceil \\ + W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[1]}}} \right \rceil \\ + \end{array} + + pad_mode is 'valid': + + .. math:: + \begin{array}{ll} \\ + H_{out} = \left \lceil{\frac{H_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) } + {\text{stride[0]}}} \right \rceil \\ + W_{out} = \left \lceil{\frac{W_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) } + {\text{stride[1]}}} \right \rceil \\ + \end{array} + + pad_mode is 'pad': + + .. math:: + \begin{array}{ll} \\ + H_{out} = \left \lfloor{\frac{H_{in} + padding[0] + padding[1] - (\text{kernel_size[0]} - 1) \times + \text{dilation[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} + padding[2] + padding[3] - (\text{kernel_size[1]} - 1) \times + \text{dilation[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\ + \end{array} + + Raises: + TypeError: If `in_channels`, `out_channels` or `group` is not an int. + TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int not a tuple. + TypeError: If any two of `inp`, `weight_init` and `bias_init` are Tensors of different data type. + ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + ValueError: If `padding` is a tuple whose length is not equal to 4. + ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0). + ValueError: If `data_format` is neither 'NCHW' not 'NHWC'. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + conv_impl: Type[TConvImpl], + in_channels: int, + out_channels: int, + kernel_size: _size_2_t, + stride: _size_2_t = 1, + pad_mode: str = 'same', + padding: _size_2_t = 0, + dilation: _size_2_t = 1, + group: int = 1, + has_bias: bool = False, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + data_format: str = 'NCHW') -> None: + """Initialize Conv2d.""" + kernel_size = twice(kernel_size) + stride = twice(stride) + self._dilation = dilation + dilation = twice(dilation) + super(Conv2d, self).__init__(conv_impl, + in_channels, + out_channels, + kernel_size, + stride, + pad_mode, + padding, + dilation, + group, + has_bias, + weight_init, + bias_init, + data_format) + self.conv2d = P.Conv2D(out_channel=self.out_channels, + kernel_size=self.kernel_size, + mode=1, + pad_mode=self.pad_mode, + pad=self.padding, + stride=self.stride, + dilation=self.dilation, + group=self.group, + data_format=self.data_format) + self.bias_add = P.BiasAdd(data_format=self.data_format) + + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + out_x, out_y = self.conv_impl(self.conv2d, x, y) + if self.has_bias: + out_x = self.bias_add(out_x, self.bias_x) + out_y = self.bias_add(out_y, self.bias_y) + return out_x, out_y + + +class Conv1d(_ConvNd): + r""" + 1D convolution layer on the second-order hypercomplex input. + + Calculates the 1D convolution on the input tensor which is typically of shape :math:`(2, N, C_{in}, L_{in})`, + where :math:`N` is batch size, :math:`C_{in}` is a number of channels and :math:`L_{in}` is a length of sequence. + The formula is defined as: + + .. math:: + + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{inp}(N_i, k)}) + + where :math:`ccor` is the `cross-correlation `_, the exact + implementation of which is defined and provided by the implementor part of the convolution layer, + :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of + the output and :math:`j` is in the range of :math:`[0, C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)` + is a convolution kernel slice with shape :math:`\text{kernel_size}`, where :math:`\text{kernel_size}` + is the width of the convolution kernel. :math:`\text{bias}` is the bias parameter, + and :math:`\text{inp}` is the input tensor. The shape of full convolution kernel is + :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size})`, + where `group` is the number of groups to split the input `inp` in the channel dimension. + + For more details, please refers to the paper `Gradient Based Learning Applied to Document + Recognition `_. + + Note: + On Ascend platform, only group convolution in depthwise convolution scenarios is supported. + That is, when `group > 1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. + + Args: + conv_impl (TConvImpl): The implementor object of the convolution layer. Essentially, the concrete class name + of this argument defines the algebra that the convolution layer will operate on. + in_channels (int): The channel number of the input tensor of the Conv1d layer. + out_channels (int): The channel number of the output tensor of the Conv1d layer. + kernel_size (int): Specifies the width of the 1D convolution kernel. + stride (int): The movement stride of the 1D convolution kernel. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (int): The number of padding on both sides of input. + The value should be greater than or equal to 0. Default: 0. + dilation (int): Dilation size of 1D convolution kernel. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` is in range of [1, L]. Default: 1. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. Default: 1. + has_bias (bool): Whether the Conv1d layer has a bias parameter. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, L_{in})`, with float16 or float32 data type, + or :math:`(N, C_{in}, L_{in})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C_{out}, L_{out})`, with float16 or float32 + data type, or :math:`(N, C_{out}, L_{out})`, with complex64 data type. + + pad_mode is 'same': + + .. math:: + L_{out} = \left \lceil{\frac{L_{in}}{\text{stride}}} \right \rceil + + pad_mode is 'valid': + + .. math:: + L_{out} = \left \lceil{\frac{L_{in} - \text{dilation} \times (\text{kernel_size} - 1) } + {\text{stride}}} \right \rceil + + pad_mode is 'pad': + + .. math:: + L_{out} = \left \lfloor{\frac{L_{in} + 2 \times padding - (\text{kernel_size} - 1) \times + \text{dilation} - 1 }{\text{stride}} + 1} \right \rfloor + + Raises: + TypeError: If `in_channels`, `out_channels`, `kernel_size`, `stride`, `padding` or `dilation` is not an int. + TypeError: If any two of `inp`, `weight_init` and `bias_init` are Tensors of different data type. + ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + conv_impl: Type[TConvImpl], + in_channels: int, + out_channels: int, + kernel_size: _size_1_t, + stride: _size_1_t = 1, + pad_mode: str = 'same', + padding: _size_1_t = 0, + dilation: _size_1_t = 1, + group: int = 1, + has_bias: bool = False, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros') -> None: + """Initialize Conv1d.""" + Validator.check_value_type("kernel_size", kernel_size, [int], self.cls_name) + Validator.check_value_type("stride", stride, [int], self.cls_name) + Validator.check_value_type("padding", padding, [int], self.cls_name) + Validator.check_value_type("dilation", dilation, [int], self.cls_name) + Validator.check_int(kernel_size, 1, Rel.GE, 'kernel_size', self.cls_name) + Validator.check_int(stride, 1, Rel.GE, 'stride', self.cls_name) + Validator.check_non_negative_int(padding, 'padding', self.cls_name) + Validator.check_int(dilation, 1, Rel.GE, 'dilation', self.cls_name) + kernel_size = (1, kernel_size) + stride = (1, stride) + dilation = (1, dilation) + get_shape = P.Shape() + get_dtype = P.DType() + if isinstance(weight_init, Tensor): + weight_init_shape = get_shape(weight_init) + Validator.check_equal_int(len(weight_init_shape), 3, 'weight_init_shape', self.cls_name) + weight_init_dtype = get_dtype(weight_init) + weight_init_value = weight_init.asnumpy() + weight_init_value = np.expand_dims(weight_init_value, 2) + weight_init = Tensor(weight_init_value, weight_init_dtype) + + super(Conv1d, self).__init__(conv_impl, + in_channels, + out_channels, + kernel_size, + stride, + pad_mode, + padding, + dilation, + group, + has_bias, + weight_init, + bias_init) + self.padding = (0, 0, padding, padding) + Validator.check_string(pad_mode, ['valid', 'same', 'pad'], 'pad_mode', self.cls_name) + self.conv2d = P.Conv2D(out_channel=self.out_channels, + kernel_size=self.kernel_size, + mode=1, + pad_mode=self.pad_mode, + pad=self.padding, + stride=self.stride, + dilation=self.dilation, + group=self.group) + self.bias_add = P.BiasAdd() + if pad_mode not in ('valid', 'same', 'pad'): + raise ValueError('Attr \'pad_mode\' of \'Conv1d\' Op passed ' + + str(pad_mode) + ', should be one of values in \'valid\', \'same\', \'pad\'.') + self.expand_dims = P.ExpandDims() + self.squeeze = P.Squeeze(2) + self.shape = P.Shape() + + def _check_input_3d(self, input_shape: tuple): + if len(input_shape) != 3: + raise ValueError(f"For '{self.cls_name}', the dimension of input must be 3d, but got {len(input_shape)}.") + return None + + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + x_shape = self.shape(x) + self._check_input_3d(x_shape) + x = self.expand_dims(x, 2) + y = self.expand_dims(y, 2) + out_x, out_y = self.conv_impl(self.conv2d, x, y) + if self.has_bias: + out_x = self.bias_add(out_x, self.bias_x) + out_y = self.bias_add(out_y, self.bias_y) + out_x = self.squeeze(out_x) + out_y = self.squeeze(out_y) + return out_x, out_y + + +class Conv3d(_ConvNd): + r""" + 3D convolution layer on the second-order hypercomplex input. + + Calculates the 3D convolution on the input tensor which is typically of shape + :math:`(2, N, C_{in}, D_{in}, H_{in}, W_{in})`, + where :math:`N` is batch size, :math:`C_{in}` is a number of channels, + :math:`D_{in}, H_{in}, W_{in}` are the depth, height and width of the feature layer respectively. + The formula is defined as: + + .. math:: + + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{inp}(N_i, k)}) + + where :math:`ccor` is the `cross-correlation `_, the exact + implementation of which is defined and provided by the implementor part of the convolution layer, + :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of + the output and :math:`j` is in the range of :math:`[0,C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)` + is a convolution kernel slice with shape + :math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`, + where :math:`\text{kernel_size[0]}`, :math:`\text{kernel_size[1]}` and :math:`\text{kernel_size[2]}` are + the depth, height and width of the convolution kernel respectively. :math:`\text{bias}` is the bias parameter + and :math:`\text{inp}` is the input tensor. + The shape of full convolution kernel is + :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`, + where `group` is the number of groups to split the input `x` in the channel dimension. + + For more details, please refers to the paper `Gradient Based Learning Applied to Document + Recognition `_. + + Note: + On Ascend platform, only group convolution in depthwise convolution scenarios is supported. + That is, when `group>1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. + + Args: + conv_impl (TConvImpl): The implementor object of the convolution layer. Essentially, the concrete class name + of this argument defines the algebra that the convolution layer will operate on. + in_channels (int): The channel number of the input tensor of the Conv3d layer. + out_channels (int): The channel number of the output tensor of the Conv3d layer. + kernel_size (Union[int, tuple[int]]): Specifies the depth, height and width of the 3D convolution kernel. + The data type is an integer or a tuple of three integers. An integer represents the depth, height + and width of the convolution kernel. A tuple of three integers represents the depth, height + and width of the convolution kernel respectively. + stride (Union[int, tuple[int]]): The movement stride of the 3D convolution kernel. + The data type is an integer or a tuple of three integers. An integer represents the movement step size + in depth, height and width directions. A tuple of three integers represents the movement step size + in the depth, height and width directions respectively. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (Union(int, tuple[int])): The number of padding on the depth, height and width directions of the input. + The data type is an integer or a tuple of six integers. If `padding` is an integer, + then the head, tail, top, bottom, left, and right padding are all equal to `padding`. + If `padding` is a tuple of six integers, then the head, tail, top, bottom, left, and right padding + is equal to `padding[0]`, `padding[1]`, `padding[2]`, `padding[3]`, `padding[4]` and `padding[5]` + respectively. The value should be greater than or equal to 0. Default: 0. + dilation (Union[int, tuple[int]]): Dilation size of 3D convolution kernel. + The data type is an integer or a tuple of three integers. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` on the depth, height and width directions is in range of + [1, D], [1, H] and [1, W] respectively. Default: 1. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. Default: 1. Only 1 is currently supported. + has_bias (bool): Whether the Conv3d layer has a bias parameter. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + data_format (str): The optional value for data format. Currently only support "NCDHW". + + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, D_{in}, H_{in}, W_{in})`, with float16 or float32 + data type, or :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C_{out}, D_{out}, H_{out}, W_{out})`, with + float16 or float32 data type, or :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`, with complex64 data type. + + pad_mode is 'same': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lceil{\frac{D_{in}}{\text{stride[0]}}} \right \rceil \\ + H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[1]}}} \right \rceil \\ + W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\ + \end{array} + + + pad_mode is 'valid': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lfloor{\frac{D_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) } + {\text{stride[0]}} + 1} \right \rfloor \\ + H_{out} = \left \lfloor{\frac{H_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) } + {\text{stride[1]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} - \text{dilation[2]} \times (\text{kernel_size[2]} - 1) } + {\text{stride[2]}} + 1} \right \rfloor \\ + \end{array} + + pad_mode is 'pad': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lfloor{\frac{D_{in} + padding[0] + padding[1] - (\text{dilation[0]} - 1) \times + \text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\ + H_{out} = \left \lfloor{\frac{H_{in} + padding[2] + padding[3] - (\text{dilation[1]} - 1) \times + \text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} + padding[4] + padding[5] - (\text{dilation[2]} - 1) \times + \text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\ + \end{array} + + Raises: + TypeError: If `in_channels`, `out_channels` or `group` is not an int. + TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int nor a tuple. + TypeError: If any two of `inp`, `weight_init` and `bias_init` are Tensors of different data type. + ValueError: If `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + ValueError: If `padding` is a tuple whose length is not equal to 6. + ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0, 0, 0). + ValueError: If `data_format` is not 'NCDHW'. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + conv_impl: Type[TConvImpl], + in_channels: int, + out_channels: int, + kernel_size: _size_3_t, + stride: _size_3_t = (1, 1, 1), + pad_mode: str = 'same', + padding: _size_3_t = 0, + dilation: _size_3_t = (1, 1, 1), + group: int = 1, + has_bias: bool = False, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + data_format: str = 'NCDHW') -> None: + """Initialize Conv3d.""" + self.conv_impl = conv_impl + kernel_size = _check_3d_int_or_tuple("kernel_size", kernel_size, self.cls_name) + stride = _check_3d_int_or_tuple("stride", stride, self.cls_name) + dilation = _check_3d_int_or_tuple("dilation", dilation, self.cls_name) + Validator.check_value_type('padding', padding, (int, tuple), self.cls_name) + if isinstance(padding, tuple): + Validator.check_equal_int(len(padding), 6, 'padding size', self.cls_name) + super(Conv3d, self).__init__(conv_impl, + in_channels, + out_channels, + kernel_size, + stride, + pad_mode, + padding, + dilation, + group, + has_bias, + weight_init, + bias_init, + data_format) + self.conv3d = P.Conv3D(out_channel=self.out_channels, + kernel_size=self.kernel_size, + mode=1, + pad_mode=self.pad_mode, + pad=self.padding, + stride=self.stride, + dilation=self.dilation, + group=self.group, + data_format=self.data_format) + self.bias_add = P.BiasAdd(data_format=self.data_format) + self.shape = P.Shape() + + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + x_shape = self.shape(x) + self._check_input_5dims(x_shape) + out_x, out_y = self.conv_impl(self.conv3d, x, y) + if self.has_bias: + out_x = self.bias_add(out_x, self.bias_x) + out_y = self.bias_add(out_y, self.bias_y) + return out_x, out_y + + +class Conv3dTranspose(_ConvNd): + r""" + 3D transposed convolution layer on the second-order hypercomplex input. + + Calculates a 3D transposed convolution, which can be regarded as Conv3d for the gradient of the input. + It also called deconvolution (although it is not an actual deconvolution). + + The input is typically of shape :math:`(2, N, C, D_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, + :math:`C` is a number of channels, :math:`D_{in}, H_{in}, W_{in}` are the depth, height and width of + the feature layer respectively. + + When Conv3d and Conv3dTranspose are initialized with the same parameters, and `pad_mode` is set to 'pad', + :math:`dilation * (kernel\_size - 1) - padding` amount of zero will be padded to the depth, height and width + directions of the input, they are inverses of each other in regard to the input and output shapes in this case. + However, when `stride` > 1, Conv3d maps multiple input shapes to the same output shape. Deconvolutional network + can refer to `Deconvolutional Networks `_. + + Args: + conv_impl (TConvImpl): The implementor object of the convolution layer. Essentially, the concrete class name + of this argument defines the algebra that the convolution layer will operate on. + in_channels (int): The channel number of the input tensor of the Conv3dTranspose layer. + out_channels (int): The channel number of the output tensor of the Conv3dTranspose layer. + kernel_size (Union[int, tuple[int]]): Specifies the depth, height and width of the 3D convolution kernel. + The data type is an integer or a tuple of three integers. An integer represents the depth, height + and width of the convolution kernel. A tuple of three integers represents the depth, height + and width of the convolution kernel respectively. + stride (Union[int, tuple[int]]): The movement stride of the 3D convolution kernel. + The data type is an integer or a tuple of three integers. An integer represents the movement step size + in depth, height and width directions. A tuple of three integers represents the movement step size + in the depth, height and width directions respectively. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (Union(int, tuple[int])): The number of padding on the depth, height and width directions of the input. + The data type is an integer or a tuple of six integers. If `padding` is an integer, + then the head, tail, top, bottom, left, and right padding are all equal to `padding`. + If `padding` is a tuple of six integers, then the head, tail, top, bottom, left, and right padding + is equal to `padding[0]`, `padding[1]`, `padding[2]`, `padding[3]`, `padding[4]` and `padding[5]` + respectively. The value should be greater than or equal to 0. Default: 0. + dilation (Union[int, tuple[int]]): Dilation size of 3D convolution kernel. + The data type is an integer or a tuple of three integers. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` on the depth, height and width directions is in range of + [1, D], [1, H] and [1, W] respectively. Default: 1. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. Default: 1. Only 1 is currently supported. + output_padding (Union(int, tuple[int])): The number of padding on the depth, height and width directions of + the output. The data type is an integer or a tuple of six integers. If `output_padding` is an integer, + then the head, tail, top, bottom, left, and right padding are all equal to `output_padding`. + If `output_padding` is a tuple of six integers, then the head, tail, top, bottom, left, and right padding + is equal to `output_padding[0]`, `output_padding[1]`, `output_padding[2]`, `output_padding[3]`, + `output_padding[4]` and `output_padding[5]` respectively. The value should be greater than or equal to 0. + Default: 0. + has_bias (bool): Whether the Conv3dTranspose layer has a bias parameter. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + data_format (str): The optional value for data format. Currently only support 'NCDHW'. + + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, D_{in}, H_{in}, W_{in})`, with float16 and float32 + data type, or :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C_{out}, D_{out}, H_{out}, W_{out})`, with + float16 or float32 data type, or :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`, with complex64 data type. + + pad_mode is 'same': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lfloor{\frac{D_{in}}{\text{stride[0]}} + 1} \right \rfloor \\ + H_{out} = \left \lfloor{\frac{H_{in}}{\text{stride[1]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in}}{\text{stride[2]}} + 1} \right \rfloor \\ + \end{array} + + + pad_mode is 'valid': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lfloor{\frac{D_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) } + {\text{stride[0]}} + 1} \right \rfloor \\ + H_{out} = \left \lfloor{\frac{H_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) } + {\text{stride[1]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} - \text{dilation[2]} \times (\text{kernel_size[2]} - 1) } + {\text{stride[2]}} + 1} \right \rfloor \\ + \end{array} + + pad_mode is 'pad': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lfloor{\frac{D_{in} + padding[0] + padding[1] - (\text{dilation[0]} - 1) \times + \text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\ + H_{out} = \left \lfloor{\frac{H_{in} + padding[2] + padding[3] - (\text{dilation[1]} - 1) \times + \text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} + padding[4] + padding[5] - (\text{dilation[2]} - 1) \times + \text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\ + \end{array} + + Raises: + TypeError: If `in_channels`, `out_channels` or `group` is not an int. + TypeError: If `kernel_size`, `stride`, `padding` , `dilation` or `output_padding` + is neither an int not a tuple of three. + TypeError: If any two of `inp`, `weight_init` and `bias_init` are Tensors of different data type. + TypeError: If input data type is not float16 or float32. + ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + ValueError: If `padding` is a tuple whose length is not equal to 6. + ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0, 0, 0). + ValueError: If `data_format` is not 'NCDHW'. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + conv_impl: Type[TConvImpl], + in_channels: int, + out_channels: int, + kernel_size: _size_3_t, + stride: _size_3_t = (1, 1, 1), + pad_mode: str = 'same', + padding: _size_3_t = 0, + dilation: _size_3_t = (1, 1, 1), + group: int = 1, + output_padding: int = 0, + has_bias: bool = False, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + data_format: str = 'NCDHW') -> None: + """Initialize Conv3dTranspose.""" + self.conv_impl = conv_impl + kernel_size = _check_3d_int_or_tuple("kernel_size", kernel_size, self.cls_name) + stride = _check_3d_int_or_tuple("stride", stride, self.cls_name) + dilation = _check_3d_int_or_tuple("dilation", dilation, self.cls_name) + Validator.check_value_type('padding', padding, (int, tuple), self.cls_name) + if isinstance(padding, tuple): + Validator.check_equal_int(len(padding), 6, 'padding size', self.cls_name) + self.output_padding = _check_3d_int_or_tuple("output_padding", output_padding, self.cls_name, + greater_zero=False) + super(Conv3dTranspose, self).__init__(conv_impl, + in_channels, + out_channels, + kernel_size, + stride, + pad_mode, + padding, + dilation, + group, + has_bias, + weight_init, + bias_init, + data_format, + transposed=True) + self.conv3d_transpose = P.Conv3DTranspose(in_channel=self.in_channels, + out_channel=self.out_channels, + kernel_size=self.kernel_size, + mode=1, + pad_mode=self.pad_mode, + pad=self.padding, + stride=self.stride, + dilation=self.dilation, + group=self.group, + output_padding=self.output_padding, + data_format=self.data_format) + self.bias_add = P.BiasAdd(data_format=self.data_format) + self.shape = P.Shape() + + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + x_shape = self.shape(x) + self._check_input_5dims(x_shape) + out_x, out_y = self.conv_impl(self.conv3d_transpose, x, y) + if self.has_bias: + out_x = self.bias_add(out_x, self.bias_x) + out_y = self.bias_add(out_y, self.bias_y) + return out_x, out_y diff --git a/mindspore/python/mindspore/hypercomplex/hypercomplex/hc_dense.py b/mindspore/python/mindspore/hypercomplex/hypercomplex/hc_dense.py new file mode 100644 index 00000000000..09b6f015be7 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/hypercomplex/hc_dense.py @@ -0,0 +1,200 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Hypercomplex Dense""" +import numbers +from typing import TypeVar, Type, Union + +import mindspore +import mindspore.nn as nn +from mindspore._checkparam import Validator +from mindspore.common.initializer import initializer, Initializer +from mindspore.common.parameter import Parameter +from mindspore.common.tensor import Tensor +from mindspore.ops import operations as P +from mindspore.hypercomplex.hypercomplex._hc_dense_impl import _DenseImpl as DenseImpl +from mindspore.hypercomplex.utils import get_x_and_y, to_2channel + + +TDenseImpl = TypeVar('TDenseImpl', bound=DenseImpl) + + +class Dense(nn.Cell): + r""" + The abstract part of dense connected layer. + + Applies dense connected layer for the second-order hypercomplex input. This layer implements the operation as: + + .. math:: + \text{out} = \text{linear}(\text{inp}, \text{kernel}) + \text{bias}, + + where :math:`inp` is the hypercomplex input tensors, :math:`\text{linear}` is the linear transformation operation, + which is defined and provided by the implementor part of the dense connected layer, :math:`\text{kernel}` is + a hypercomplex weight matrix with the same data type as the :math:`inp` created by the layer, and + :math:`\text{bias}` is a hypercomplex bias vector with the same data type as the :math:`inp` created by the layer + (only if has_bias is True). + + This is not a self-sufficient class. In order to construct a fully connected layer, one should instantiate this + class and an implementor class, which acts like a strategy pattern and determine the exact set of hypercomplex + numbers. That implies the rules of multiplication and therefore affects how a linear transformation works. + + Args: + dense_impl(DenseImpl): The implementor object of the dense connected layer. Essentially, the concrete class + name of this argument defines the algebra that the dense layer will operate on. + in_channels (int): The number of channels in the input space. + out_channels (int): The number of channels in the output space. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is + same as `inp`. The values of str refer to the function `initializer`. Default: 'zeros'. + has_bias (bool): Specifies whether the layer uses a bias vector. Default: True. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, *, ..., *, in\_channels)`, with float16 or float32 data type, + or :math:`(*, ..., *, in\_channels)`, with complex64 data type. In the former case '2' denotes that the input + tensor belongs to the hypercomplex domain and has got a real and an imaginary parts. The `in_channels` in + `Args` has to be equal to :math:`in\_channels` in `Inputs`. The count of mediator dimensions denoted by '*' + is arbitrary but must be at least one. + + Outputs: + Tensor of the same data type as 'inp' and of shape :math:`(2, *, ..., *, out\_channels)`, with float16 or + float32 data type, or :math:`(*, ..., *, out\_channels)`, with complex64 data type. The count of mediator + dimensions is the same as one in 'inp'. + + Raises: + TypeError: If `in_channels` or `out_channels` is not an int. + TypeError: If `has_bias` is not a bool. + TypeError: If any two of `inp`, `weight_init` and `bias_init` are Tensors of different data type. + ValueError: If length of shape of `weight_init` is not equal to 3, + or shape[0] of 'weight_init' is not equal to 2, + or shape[1] of `weight_init` is not equal to `out_channels`, + or shape[2] of `weight_init` is not equal to `in_channels`. + ValueError: If length of shape of `bias_init` is not equal to 2, + or shape[0] of 'bias_init' is not equal to 2, + or shape[1] of `bias_init` is not equal to `out_channels`. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + dense_impl: Type[TDenseImpl], + in_channels: int, + out_channels: int, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + has_bias: bool = True) -> None: + """Initialize Dense.""" + super(Dense, self).__init__() + self.in_channels = Validator.check_positive_int(in_channels, "in_channels", self.cls_name) + self.out_channels = Validator.check_positive_int(out_channels, "out_channels", self.cls_name) + self.has_bias = Validator.check_bool(has_bias, "has_bias", self.cls_name) + self.dtype = None + self.reshape = P.Reshape() + self.shape_op = P.Shape() + + self.weight_x = None + self.weight_y = None + if isinstance(weight_init, Tensor): + self.dtype = weight_init.dtype + if self.dtype in [mindspore.float16, mindspore.float32] and ( \ + weight_init.ndim != 3 + or weight_init.shape[0] != 2 \ + or weight_init.shape[1] != out_channels \ + or weight_init.shape[2] != in_channels): + raise ValueError(f"For '{self.cls_name}', weight init shape error. The ndim of 'weight_init' must " + f"be equal to 3, and the first dim must be equal to 2, and the second dim must be " + f"equal to 'out_channels', and the third dim must be equal to 'in_channels'. But got " + f"'weight_init': {weight_init}, 'out_channels': {out_channels}, 'in_channels': " + f"{in_channels}.") + if self.dtype == mindspore.complex64 and ( \ + weight_init.ndim != 2 \ + or weight_init.shape[0] != out_channels \ + or weight_init.shape[1] != in_channels): + raise ValueError(f"For '{self.cls_name}', weight init shape error. The ndim of 'weight_init' must " + f"be equal to 2, and the first dim must be equal to 'out_channels', " + f"and the second dim must be equal to 'in_channels'. But got " + f"'weight_init': {weight_init}, 'out_channels': {out_channels}, 'in_channels': " + f"{in_channels}.") + + self.dense_impl = dense_impl(weight_init, [out_channels, in_channels]) + + self.bias_x = None + self.bias_y = None + if self.has_bias: + if isinstance(bias_init, Tensor): + if self.dtype is None: + self.dtype = bias_init.dtype + elif self.dtype != bias_init.dtype: + raise TypeError("Data type of weight init tensor and the bias init tensor must be equal, " + f"but got weight_init.dtype={self.dtype} and bias_init.dtype={bias_init.dtype}") + if self.dtype in [mindspore.float16, mindspore.float32] and ( \ + bias_init.ndim != 2 \ + or bias_init.shape[0] != 2 \ + or bias_init.shape[1] != out_channels): + raise ValueError(f"For '{self.cls_name}', bias init shape error. The ndim of 'bias_init' must " + f"be equal to 2, and the second dim must be equal to 'out_channels'. But got " + f"'bias_init': {bias_init}, 'out_channels': {out_channels}.") + if self.dtype == mindspore.complex64 and ( \ + bias_init.ndim != 1 \ + or bias_init.shape[0] != out_channels): + raise ValueError(f"For '{self.cls_name}', bias init shape error. The ndim of 'bias_init' must " + f"be equal to 1, and the only dim must be equal to 'out_channels'. But got " + f"'bias_init': {bias_init}, 'out_channels': {out_channels}.") + bias_init_x, bias_init_y = get_x_and_y(bias_init) + else: + bias_init_x = bias_init_y = bias_init + self.bias_x = Parameter(initializer(bias_init_x, [out_channels]), name="bias_x") + self.bias_y = Parameter(initializer(bias_init_y, [out_channels]), name="bias_y") + self.bias_add = P.BiasAdd() + + self.matmul = P.MatMul(transpose_b=True) + + def check_dense_input_shape(self, x: Tensor, x_dtype): + msg_prefix = f"For '{self.cls_name}', the" if self.cls_name else "The" + if x_dtype in [mindspore.float32, mindspore.float64] and (len(x) < 3 or x[0] != 2): + raise ValueError(f"{msg_prefix} dimension of 'x' should not be less than 3, and the first dimension " + f"should be 2, but got {x}.") + if x_dtype == mindspore.complex64 and len(x) < 2: + raise ValueError(f"{msg_prefix} dimension of 'x' should not be less than 2, but got {x}.") + return None + + def construct(self, u: Tensor) -> Tensor: + """Construct""" + if self.dtype is not None and self.dtype != u.dtype: + raise TypeError("dtype must be equal to the data type of the inputs tensor, but got: " + f"dtype={self.dtype} and inputs.dtype={u.dtype}") + u_shape = self.shape_op(u) + self.check_dense_input_shape(u_shape, u.dtype) + u_reshape = [-1, u_shape[-1]] + if u.dtype in [mindspore.float32, mindspore.float64]: + u_reshape = [2] + u_reshape + if len(u_reshape) < len(u_shape): + u = self.reshape(u, tuple(u_reshape)) + x, y = get_x_and_y(u) + out_x, out_y = self.dense_impl(self.matmul, x, y) + if self.has_bias: + out_x = self.bias_add(out_x, self.bias_x) + out_y = self.bias_add(out_y, self.bias_y) + out = to_2channel(out_x, out_y, u.dtype) + if len(u_reshape) < len(u_shape): + out_shape = u_shape[:-1] + (-1,) + out = self.reshape(out, out_shape) + return out + + def extend_repr(self): + s = 'input_channels={}, output_channels={}'.format(self.in_channels, self.out_channels) + if self.has_bias: + s += ', has_bias={}'.format(self.has_bias) + return s diff --git a/mindspore/python/mindspore/hypercomplex/hypercomplex/hc_pool.py b/mindspore/python/mindspore/hypercomplex/hypercomplex/hc_pool.py new file mode 100644 index 00000000000..f98b791db6c --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/hypercomplex/hc_pool.py @@ -0,0 +1,1018 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Hypercomplex Pooling""" +from typing import Tuple +from abc import abstractmethod +import mindspore.context as context +import mindspore.nn as nn +from mindspore._checkparam import Rel, Validator as validator +from mindspore.common import dtype as mstype +from mindspore.common.tensor import Tensor +from mindspore.ops import functional as F +from mindspore.ops import operations as P +from mindspore.ops.operations.nn_ops import AdaptiveAvgPool3D, AdaptiveMaxPool2D +from mindspore.hypercomplex.utils import get_x_and_y, to_2channel, \ + _size_1_t, _size_2_t, _size_3_t + + +class _PoolNd(nn.Cell): + r""" + Base class for pooling layers for the second-order hypercomplex numbers. + + Includes data validation and initialization of hyperparameters, which are shared by all specific + implementations of pooling. + + + Note: + pad_mode for training only supports "same" and "valid". + + Args: + kernel_size (Union[int, Tuple[int]]): The size of kernel window used to take the average value. + The data type of kernel_size must be int and the value represents all the spatial dimensions + at once, or a tuple of the corresponding amount of int numbers that represent the spatial + dimensions separately. + stride (Union[int, Tuple[int]]): The distance of kernel moving, an int number that represents + the step size of movement for all the spatial dimensions at once, or a tuple of the + corresponding amount of int numbers that represent the step size of movement for spatial + dimensions separately. + pad_mode (str): The value for pad mode, is "same" or "valid", not case sensitive. + + - same: Adopts the way of completion. The height and width of the output will be the same as + the input. The total number of padding will be calculated in horizontal and vertical + directions and evenly distributed to top and bottom, left and right if possible. + Otherwise, the last extra padding will be done from the bottom and the right side. + + - valid: Adopts the way of discarding. The possible largest height and width of output + will be returned without padding. Extra pixels will be discarded. + data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. Note that 'NCHW' + format is supported only with GPU target device as of now. Default: 'NCHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, *, ..., *)`, with float16 or float32 data type, or + :math:`(N, C, *, ..., *)` with complex64 data type. The count of spatial dimensions denoted by '*' + depends on a specific subclass. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C, *, ..., *)`, with float16 or float32 data + type, or :math:`(N, C, *, ..., *)`, with complex64 data type. The count of spatial dimensions denoted by '*' + is equal to one of the input tensor 'inp', but the sizes of those dimensions can be different. + + Raises: + TypeError: If `kernel_size` or `stride` is not an int. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If `pad_mode` is neither 'same' nor 'valid' (case insensitive). + ValueError: If `kernel_size` or `stride` is less than 1. + ValueError: If `data_format` is neither 'NCHW' nor 'NHWC', or it is 'NCHW' and the target + device is not GPU. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + kernel_size: Tuple[int, ...], + stride: Tuple[int, ...], + pad_mode: str, + data_format: str = "NCHW") -> None: + """Initialize _PoolNd.""" + super(_PoolNd, self).__init__() + validator.check_value_type('pad_mode', pad_mode, [str], self.cls_name) + self.pad_mode = validator.check_string(pad_mode.upper(), + ['VALID', 'SAME'], + 'pad_mode', + self.cls_name) + self.format = validator.check_string(data_format, + ['NCHW', 'NHWC'], + 'format', + self.cls_name) + if context.get_context("device_target") != "GPU" and self.format == "NHWC": + raise ValueError(f"For '{self.cls_name}, the 'NHWC' format only support in GPU target, but got device " + f"target {context.get_context('device_target')}.") + + def _check_int_or_tuple(arg_name, arg_value): + validator.check_value_type(arg_name, arg_value, [int, tuple], self.cls_name) + error_msg = f'For \'{self.cls_name}\' the {arg_name} should be an positive int number or ' \ + f'a tuple of two positive int numbers, but got {arg_value}' + if isinstance(arg_value, int): + if arg_value <= 0: + raise ValueError(error_msg) + elif len(arg_value) == 2: + for item in arg_value: + if isinstance(item, int) and item > 0: + continue + raise ValueError(error_msg) + else: + raise ValueError(error_msg) + return arg_value + + self.kernel_size = _check_int_or_tuple('kernel_size', kernel_size) + self.stride = _check_int_or_tuple('stride', stride) + + def construct(self, u: Tensor) -> Tensor: + x, y = get_x_and_y(u) + x, y = self._construct(x, y) + out = to_2channel(x, y, u.dtype) + return out + + def extend_repr(self): + return 'kernel_size={kernel_size}, stride={stride}, pad_mode={pad_mode}'.format(**self.__dict__) + + @abstractmethod + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + pass + + +class MaxPool2d(_PoolNd): + r""" + 2D max pooling operation for temporal hypercomplex data of the second order.. + + Applies a 2D max pooling over an input Tensor which can be regarded as a composition of 2D planes. + + Typically the input is of shape :math:`(2, N, C, H_{in}, W_{in})`, MaxPool2d outputs + regional maximum in the :math:`(H_{in}, W_{in})`- dimension. Given kernel size + :math:`ks = (h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1)`, the operation is as follows: + + .. math:: + \text{out}(k, N_i, C_j, h, w) = \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1} + \text{inp}(k, N_i, C_j, s_0 \times h + m, s_1 \times w + n), + + where :math:`\text{inp}` is a hypercomplex input tensor. + + Note: + pad_mode for training only supports "same" and "valid". + + Args: + kernel_size (Union[int, tuple[int]]): The size of kernel used to take the max value, + is an int number that represents height and width are both kernel_size, + or a tuple of two int numbers that represent height and width respectively. + Default: 1. + stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents + the height and width of movement are both stride, or a tuple of two int numbers that + represent height and width of movement respectively. Default: 1. + pad_mode (str): The optional value for pad mode, is "same" or "valid", not case sensitive. + Default: "valid". + + - same: Adopts the way of completion. The height and width of the output will be the same as + the input. The total number of padding will be calculated in horizontal and vertical + directions and evenly distributed to top and bottom, left and right if possible. + Otherwise, the last extra padding will be done from the bottom and the right side. + + - valid: Adopts the way of discarding. The possible largest height and width of output + will be returned without padding. Extra pixels will be discarded. + data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. Note that 'NCHW' + format is supported only with GPU target device as of now. Default: 'NCHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, H_{in}, W_{in})`, with float16 or float32 data type, or + :math:`(N, C, H_{in}, W_{in})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C, H_{out}, W_{out})`, + with float16 or float32 data type, or :math:`(N, C, H_{out}, W_{out})`, with complex64 data type. + + Raises: + TypeError: If `kernel_size` or `stride` is not an int. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If `pad_mode` is neither 'same' nor 'valid' (case insensitive). + ValueError: If `kernel_size` or `stride` is less than 1. + ValueError: If `data_format` is neither 'NCHW' nor 'NHWC', or it is 'NCHW' and the target + device is not GPU. + ValueError: If length of shape of `inp` is not equal to 5 + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.hypercomplex.hc_pool import MaxPool2d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32, 32)).astype(np.float32)) + >>> maxp = MaxPool2d(kernel_size=4, stride=4) + >>> y = maxp(u) + >>> print(y.shape) + (2, 8, 64, 8, 8) + """ + + def __init__(self, + kernel_size: _size_2_t = 1, + stride: _size_2_t = 1, + pad_mode: str = "valid", + data_format: str = "NCHW") -> None: + super(MaxPool2d, self).__init__(kernel_size, stride, pad_mode, data_format) + self.max_pool = P.MaxPool(kernel_size=self.kernel_size, + strides=self.stride, + pad_mode=self.pad_mode, + data_format=self.format) + + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + out_x = self.max_pool(x) + out_y = self.max_pool(y) + return out_x, out_y + + +class MaxPool1d(_PoolNd): + r""" + 1D max pooling operation for temporal hypercomplex data of the second order. + + Applies a 1D max pooling over an input Tensor which can be regarded as a composition of 1D planes. + + Typically the input is of shape :math:`(2, N, C, L_{in})`, MaxPool1d outputs + regional maximum in the :math:`(L_{in})`-dimension. Given kernel size + :math:`ks = (l_{ker})` and stride :math:`s = (s_0)`, the operation is as follows: + + .. math:: + \text{out}(k, N_i, C_j, l) = \max_{n=0, \ldots, l_{ker}-1} + \text{inp}(k, N_i, C_j, s_0 \times l + n), + + where :math:`\text{inp}` is a hypercomplex input tensor. + + Note: + pad_mode for training only supports "same" and "valid". + + Args: + kernel_size (int): The size of kernel used to take the max value, Default: 1. + stride (int): The distance of kernel moving, an int number that represents + the width of movement is stride, Default: 1. + pad_mode (str): The optional value for pad mode, is "same" or "valid", not case sensitive. + Default: "valid". + + - same: Adopts the way of completion. The total number of padding will be calculated in horizontal + and vertical directions and evenly distributed to top and bottom, left and right if possible. + Otherwise, the last extra padding will be done from the bottom and the right side. + + - valid: Adopts the way of discarding. The possible largest height and width of output + will be returned without padding. Extra pixels will be discarded. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, L_{in})`, with float16 or float32 data type, or + :math:`(N, C, L_{in})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C, L_{out})`, with float16 or float32 + data type, or :math:`(N, C, L_{out})`, with complex64 data type. + + Raises: + TypeError: If `kernel_size` or `stride` is not an int. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If `pad_mode` is neither 'same' nor 'valid' with not case sensitive. + ValueError: If `kernel_size` or `strides` is less than 1. + ValueError: If length of shape of `inp` is not equal to 4. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.hypercomplex.hc_pool import MaxPool1d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32)).astype(np.float32)) + >>> maxp = MaxPool1d(kernel_size=4, stride=4) + >>> y = maxp(u) + >>> print(y.shape) + (2, 8, 64, 8) + """ + + def __init__(self, + kernel_size: _size_1_t = 1, + stride: _size_1_t = 1, + pad_mode: str = "valid") -> None: + """Initialize MaxPool1d.""" + super(MaxPool1d, self).__init__(kernel_size, stride, pad_mode) + validator.check_value_type('kernel_size', kernel_size, [int], self.cls_name) + validator.check_value_type('stride', stride, [int], self.cls_name) + validator.check_value_type('pad_mode', pad_mode, [str], self.cls_name) + self.pad_mode = validator.check_string(pad_mode.upper(), + ['VALID', 'SAME'], + 'pad_mode', + self.cls_name) + validator.check_int(kernel_size, 1, Rel.GE, "kernel_size", self.cls_name) + validator.check_int(stride, 1, Rel.GE, "stride", self.cls_name) + self.kernel_size = (1, kernel_size) + self.stride = (1, stride) + self.max_pool = P.MaxPool(kernel_size=self.kernel_size, + strides=self.stride, + pad_mode=self.pad_mode) + self.shape = F.shape + self.reduce_mean = P.ReduceMean(keep_dims=True) + self.expand = P.ExpandDims() + self.squeeze = P.Squeeze(2) + + def _shape_check(self, in_shape: tuple): + msg_prefix = f"For '{self.cls_name}', the" if self.cls_name else "The" + if len(in_shape) != 3: + raise ValueError(f"{msg_prefix} input must has 3 dim, but got {len(in_shape)}") + return None + + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + self._shape_check(self.shape(x)) + self._shape_check(self.shape(y)) + x = self.expand(x, 2) + y = self.expand(y, 2) + out_x = self.max_pool(x) + out_y = self.max_pool(y) + out_x = self.squeeze(out_x) + out_y = self.squeeze(out_y) + return out_x, out_y + + +class AvgPool2d(_PoolNd): + r""" + 2D average pooling for temporal hypercomplex data of the second order. + + Applies a 2D average pooling over an input Tensor which can be regarded as a composition of 2D input planes. + + Typically the input is of shape :math:`(2, N, C, H_{in}, W_{in})`, AvgPool2d outputs + regional average in the :math:`(H_{in}, W_{in})`-dimension. Given kernel size + :math:`ks = (h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1)`, the operation is as follows: + + .. math:: + \text{out}(k, N_i, C_j, h, w) = \frac{1}{h_{ker} * w_{ker}} \sum_{m=0}^{h_{ker}-1} \sum_{n=0}^{w_{ker}-1} + \text{inp}(k, N_i, C_j, s_0 \times h + m, s_1 \times w + n), + + where :math:`\text{inp}` is a hypercomplex input tensor. + + Note: + pad_mode for training only supports "same" and "valid". + + Args: + kernel_size (Union[int, tuple[int]]): The size of kernel used to take the average value. + The data type of kernel_size must be int and the value represents the height and width, + or a tuple of two int numbers that represent height and width respectively. + Default: 1. + stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents + the height and width of movement are both strides, or a tuple of two int numbers that + represent height and width of movement respectively. Default: 1. + pad_mode (str): The optional value for pad mode, is "same" or "valid", not case sensitive. + Default: "valid". + + - same: Adopts the way of completion. The height and width of the output will be the same as + the input. The total number of padding will be calculated in horizontal and vertical + directions and evenly distributed to top and bottom, left and right if possible. + Otherwise, the last extra padding will be done from the bottom and the right side. + + - valid: Adopts the way of discarding. The possible largest height and width of output + will be returned without padding. Extra pixels will be discarded. + data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. Note that 'NCHW' + format is supported only with GPU target device as of now. Default: 'NCHW'. + + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, H_{in}, W_{in})`, with float16 or float32 data type, or + :math:`(N, C, H_{in}, W_{in})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C, H_{out}, W_{out})`, with float16 + or float32 data type, or :math:`(N, C, H_{out}, W_{out})`, with complex64 data type. + + Raises: + TypeError: If `kernel_size` or `stride` is not an int. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If `pad_mode` is neither 'same' nor 'valid' (case insensitive). + ValueError: If `kernel_size` or `stride` is less than 1. + ValueError: If `data_format` is neither 'NCHW' nor 'NHWC', or it is 'NCHW' and the target + device is not GPU. + ValueError: If length of shape of `inp` is not equal to 5 + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.hypercomplex.hc_pool import AvgPool2d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32, 32)).astype(np.float32)) + >>> avg = AvgPool2d(kernel_size=4, stride=4) + >>> y = avg(u) + >>> print(y.shape) + (2, 8, 64, 8, 8) + """ + + def __init__(self, + kernel_size: _size_2_t = 1, + stride: _size_2_t = 1, + pad_mode: str = "valid", + data_format: str = "NCHW") -> None: + super(AvgPool2d, self).__init__(kernel_size, + stride, + pad_mode, + data_format) + self.avg_pool = P.AvgPool(kernel_size=self.kernel_size, + strides=self.stride, + pad_mode=self.pad_mode, + data_format=self.format) + + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + out_x = self.avg_pool(x) + out_y = self.avg_pool(y) + return out_x, out_y + + +class AvgPool1d(_PoolNd): + r""" + 1D average pooling for temporal hypercomplex data of the second order. + + Applies a 1D average pooling over an input Tensor which can be regarded as a composition of 1D input planes. + + Typically the input is of shape :math:`(2, N, C, L_{in})`, AvgPool1d outputs + regional average in the :math:`(L_{in})`-dimension. Given kernel size + :math:`ks = l_{ker}` and stride :math:`s = s_0`, the operation is as follows: + + .. math:: + \text{out}(k, N_i, C_j, l) = \frac{1}{l_{ker}} \sum_{n=0}^{l_{ker}-1} + \text{inp}(k, N_i, C_j, s_0 \times l + n) + + where :math:`\text{inp}` is a hypercomplex input tensor. + + Note: + pad_mode for training only supports "same" and "valid". + + Args: + kernel_size (int): The size of kernel window used to take the average value, Default: 1. + stride (int): The distance of kernel moving, an int number that represents + the width of movement is strides, Default: 1. + pad_mode (str): The optional value for pad mode, is "same" or "valid", not case sensitive. + Default: "valid". + + - same: Adopts the way of completion. The height and width of the output will be the same as + the input. The total number of padding will be calculated in horizontal and vertical + directions and evenly distributed to top and bottom, left and right if possible. + Otherwise, the last extra padding will be done from the bottom and the right side. + + - valid: Adopts the way of discarding. The possible largest height and width of output + will be returned without padding. Extra pixels will be discarded. + + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, L_{in})`, with float16 or float32 data type, or + :math:`(N, C, L_{in})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C, L_{out})`, with float16 or float32 + data type, or :math:`(N, C, L_{out})`, with complex64 data type. + + Raises: + TypeError: If `kernel_size` or `stride` is not an int. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If `pad_mode` is neither 'same' nor 'valid' with not case sensitive. + ValueError: If `kernel_size` or `strides` is less than 1. + ValueError: If length of shape of `inp` is not equal to 4. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.hypercomplex.hc_pool import AvgPool1d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32)).astype(np.float32)) + >>> avg = AvgPool1d(kernel_size=4, stride=4) + >>> y = avg(u) + >>> print(y.shape) + (2, 8, 64, 8) + """ + + def __init__(self, + kernel_size: _size_1_t = 1, + stride: _size_1_t = 1, + pad_mode: str = "valid") -> None: + """Initialize AvgPool1d.""" + validator.check_value_type('kernel_size', kernel_size, [int], self.cls_name) + validator.check_value_type('stride', stride, [int], self.cls_name) + validator.check_value_type('pad_mode', pad_mode, [str], self.cls_name) + self.pad_mode = validator.check_string(pad_mode.upper(), + ['VALID', 'SAME'], + 'pad_mode', + self.cls_name) + validator.check_int(kernel_size, 1, Rel.GE, "kernel_size", self.cls_name) + validator.check_int(stride, 1, Rel.GE, "stride", self.cls_name) + super(AvgPool1d, self).__init__(kernel_size, stride, pad_mode) + self.kernel_size = (1, kernel_size) + self.stride = (1, stride) + self.avg_pool = P.AvgPool(kernel_size=self.kernel_size, + strides=self.stride, + pad_mode=self.pad_mode) + self.shape = F.shape + self.reduce_mean = P.ReduceMean(keep_dims=True) + self.slice = P.Slice() + self.expand = P.ExpandDims() + self.squeeze = P.Squeeze(2) + + def _shape_check(self, in_shape: tuple): + msg_prefix = f"For '{self.cls_name}', the" if self.cls_name else "The" + if len(in_shape) != 3: + raise ValueError(f"{msg_prefix} input must has 3 dim, but got {len(in_shape)}") + return None + + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + x = F.depend(x, self._shape_check(self.shape(x))) + y = F.depend(y, self._shape_check(self.shape(y))) + batch, channel, width = self.shape(x) + if width == self.kernel_size[1]: + x = self.reduce_mean(x, 2) + y = self.reduce_mean(y, 2) + elif width - self.kernel_size[1] < self.stride[1]: + x = self.slice(x, (0, 0, 0), (batch, channel, self.kernel_size[1])) + y = self.slice(y, (0, 0, 0), (batch, channel, self.kernel_size[1])) + x = self.reduce_mean(x, 2) + y = self.reduce_mean(y, 2) + else: + x = self.expand(x, 2) + y = self.expand(y, 2) + x = self.avg_pool(x) + y = self.avg_pool(y) + x = self.squeeze(x) + y = self.squeeze(y) + return x, y + + +class _AdaptivePoolNd(nn.Cell): + r""" + Base class for adaptive pooling layers for the second-order temporal hypercomplex data. + + Includes data validation and initialization of hyperparameters, which are shared by all specific + implementations of adaptive pooling. + + Note: + The size of every spatial dimension of `inp` must be divisible by the corresponding value of `output_size`. + + Args: + output_size (Union[int, tuple]): The target output size. `ouput_size` can be a tuple of length being equal to + the count of spatial dimensions of the input tensor, or a single integer which then represents the desired + output size for all of the spatial dimensions at once, or None. + If it is None, it means the output size is the same as the input size. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, *, ..., *)`, with float16 or float32 data type, or + :math:`(N, C, *, ..., *)`, with complex64 data type. The count of spatial dimensions denoted by '*' + depends on a specific subclass. + + Outputs: + Tensor of the same data type as `inp`, and of shape :math:`(2, N, C, *, ..., *)`, with float16 or float32 data + type, or :math:`(N, C, *, ..., *)`, with complex64 data type. The number of spatial dimensions denoted by '*' + is the same as in `inp`. + + Raises: + TypeError: If dtype of `inp` is not float16, float32 or complex64. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, output_size: Tuple[int, ...]) -> None: + super(_AdaptivePoolNd, self).__init__() + self.output_size = output_size + + def construct(self, u: Tensor) -> Tensor: + x, y = get_x_and_y(u) + out_x, out_y = self._construct(x, y) + out = to_2channel(out_x, out_y, u.dtype) + + return out + + @abstractmethod + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + pass + + def _adaptive_shape_check(self, in_shape): + """Check shape.""" + msg_prefix = "For {}, the".format(self.cls_name) + if len(in_shape) != 3: + raise ValueError("{} input must has 3 dim, but got {}.".format(msg_prefix, len(in_shape))) + if in_shape[2] < self.output_size: + raise ValueError("{} input's last dimension must be greater or equal to " + "output size {}, but got {}.".format(msg_prefix, self.output_size, in_shape[2])) + if in_shape[2] % self.output_size != 0: + raise ValueError("{} input's last dimension must be divisible by " + "output size {}, but got {}.".format(msg_prefix, self.output_size, in_shape[2])) + return None + + def _adaptive_dtype_check(self, x_dtype): + """Check dtype.""" + if x_dtype not in [mstype.float16, mstype.float32]: + raise TypeError("For {}, the x_dtype must be float16 or float32, " + "but got {}.".format(self.cls_name, x_dtype)) + return None + + +class AdaptiveAvgPool1d(_AdaptivePoolNd): + r""" + 1D adaptive average pooling for temporal hypercomplex data of the second order. + + Applies a 1D adaptive average pooling over an input Tensor which can be regarded as + a composition of 1D input planes. + + Typically, the input is of shape :math:`(2, N, C, L_{in})`, + AdaptiveAvgPool1d outputs regional average in the :math:`L_{in}`-dimension. + The output is of shape :math:`(2, N, C, L_{out})`, + where :math:`L_{out}` is defined by `output_size`. + + Note: + :math:`L_{in}` must be divisible by `output_size`. + + Args: + output_size (int): the target output size :math:`L_{out}`. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, L_{in})`, with float16 or float32 data type, or + :math:`(N, C, L_{in})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C, L_{out})`, with float16 or float32 + data type, or :math:`(N, C, L_{out})`, with complex64 data type. + + Raises: + TypeError: If `output_size` is not an int. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If `output_size` is less than 1. + ValueError: If length of shape of `inp` is not equal to 4. + ValueError: If the last dimension of `inp` is smaller than `output_size`. + ValueError: If the last dimension of `inp` is not divisible by `output_size`. + + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.hypercomplex.hc_pool import AdaptiveAvgPool1d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32)).astype(np.float32)) + >>> avg = AdaptiveAvgPool1d(output_size=16) + >>> y = avg(u) + >>> print(y.shape) + (2, 8, 64, 16) + """ + + def __init__(self, output_size: int) -> None: + """Initialize AdaptiveAvgPool1d.""" + super(AdaptiveAvgPool1d, self).__init__(output_size) + validator.check_value_type('output_size', output_size, [int], self.cls_name) + validator.check_int(output_size, 1, Rel.GE, "output_size", self.cls_name) + self.shape = F.shape + self.expand = P.ExpandDims() + self.squeeze = P.Squeeze(2) + self.dtype = P.DType() + + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + self._adaptive_shape_check(self.shape(x)) + self._adaptive_shape_check(self.shape(y)) + self._adaptive_dtype_check(self.dtype(x)) + self._adaptive_dtype_check(self.dtype(y)) + + _, _, width = self.shape(x) + stride = width // self.output_size + kernel_size = width - (self.output_size - 1) * stride + + stride = (1, width // self.output_size) + kernel_size = (1, kernel_size) + + x = self.expand(x, 2) + y = self.expand(y, 2) + + avg_pool = P.AvgPool(kernel_size=kernel_size, strides=stride) + + out_x = avg_pool(x) + out_y = avg_pool(y) + out_x = self.squeeze(out_x) + out_y = self.squeeze(out_y) + + return out_x, out_y + + +class AdaptiveAvgPool2d(_AdaptivePoolNd): + r""" + 2D adaptive average pooling for temporal hypercomplex data of the second order. + + This operator applies a 2D adaptive average pooling to an input signal composed of multiple input planes. + That is, for any input size, the size of the specified output is H x W. + The number of output features is equal to the number of input features. + + The input and output data format can be "NCHW" and "CHW". N is the batch size, C is the number of channels, + H is the feature height, and W is the feature width. + + .. math:: + \begin{align} + h_{start} &= floor(i * H_{in} / H_{out})\\ + h_{end} &= ceil((i + 1) * H_{in} / H_{out})\\ + w_{start} &= floor(j * W_{in} / W_{out})\\ + w_{end} &= ceil((j + 1) * W_{in} / W_{out})\\ + out(i,j) &= \frac{\sum inp[h_{start}:h_{end}, w_{start}:w_{end}]}{(h_{end}- h_{start}) + * (w_{end}- w_{start})} + \end{align} + + Args: + output_size (Union[int, tuple]): The target output size is H x W. + `ouput_size` can be a tuple consisted of int type H and W, or a single H for H x H, or None. + If it is None, it means the output size is the same as the input size. + + Inputs: + - **inp** (Tensor) - The input of AdaptiveAvgPool2d, which is a 4D or 5D tensor of shape + :math:`(2, N, C, H_{in}, W_{in})` or :math:`(2, C, H_{in}, W_{in})`, with float16 or float32 data type, + or :math:`(N, C, H_{in}, W_{in})` or :math:`(C, H_{in}, W_{in})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C, H_{out}, W_{out})`, with float16 + or float32 data type, or :math:`(N, C, H_{out}, W_{out})`, with complex64 data type. + + Raises: + ValueError: If `output_size` is a tuple and the length of `output_size` is not 2. + TypeError: If `inp` is not a Tensor. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If the dimension of `inp` is less than or equal to the dimension of `output_size`. + + Supported Platforms: + ``GPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.hypercomplex.hc_pool import AdaptiveAvgPool2d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32, 32)).astype(np.float32)) + >>> avg = AdaptiveAvgPool2d(output_size=16) + >>> y = avg(u) + >>> print(y.shape) + (2, 8, 64, 16, 16) + """ + + def __init__(self, output_size: _size_2_t) -> None: + """Initialize AdaptiveAvgPool2d.""" + super(AdaptiveAvgPool2d, self).__init__(output_size) + self.adaptive_avgpool2d = P.AdaptiveAvgPool2D(output_size) + + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + out_x = self.adaptive_avgpool2d(x) + out_y = self.adaptive_avgpool2d(y) + + return out_x, out_y + + +class AdaptiveAvgPool3d(_AdaptivePoolNd): + r""" + 3D adaptive average pooling for temporal hypercomplex data of the second order. + + This operator applies a 3D adaptive average pooling to an input signal composed of multiple input planes. + That is, for any input size, the size of the specified output is :math:`(2, N, C, D, H, W)` or + :math:`(2, C, D, H, W)`. + The number of output features is equal to the number of input planes. + + Suppose the last 3 dimension size of x is :math:`(inD, inH, inW)`, then the last 3 dimension size of output is + :math:`(outD, outH, outW)`. + + .. math:: + \begin{array}{ll} \\ + \forall \quad od \in [0,outD-1], oh \in [0,outH-1], ow \in [0,outW-1]\\ + output[od,oh,ow] = \\ + \qquad mean(x[istartD:iendD+1,istartH:iendH+1,istartW:iendW+1])\\ + where,\\ + \qquad istartD= \left\lceil \frac{od * inD}{outD} \right\rceil \\ + \qquad iendD=\left\lfloor \frac{(od+1)* inD}{outD} \right\rfloor \\ + \qquad istartH=\left\lceil \frac{oh * inH}{outH} \right\rceil \\ + \qquad iendH=\left\lfloor \frac{(oh+1) * inH}{outH} \right\rfloor \\ + \qquad istartW=\left\lceil \frac{ow * inW}{outW} \right\rceil \\ + \qquad iendW=\left\lfloor \frac{(ow+1) * inW}{outW} \right\rfloor + \end{array} + + Args: + output_size (Union[int, tuple]): The target output size. `output_size` can be a tuple :math:`(D, H, W)`, + or an int D for :math:`(D, D, D)`. :math:`(D)`, :math:`(H)` and :math:`(W)` can be int or None + which means the output size is the same as that of the input. + + Inputs: + - **inp** (Tensor) - The input of AdaptiveAvgPool3d, which is a 6D Tensor + :math:`(2, N, C, D_{in}, H_{in}, W_{in})` or a 5D Tensor :math:`(2, C, D_{in}, H_{in}, W_{in})`, + with float16 or float32 data type, or 5D Tensor :math:`(N, C, D_{in}, H_{in}, W_{in})` or a 4D Tensor + :math:`(C, D_{in}, H_{in}, W_{in})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C, D_{out}, H_{out}, W_{out})`, with float16 + or float32 data type, or :math:`(N, C, D_{out}, H_{out}, W_{out})``, with complex64 data type. + + Raises: + TypeError: If `inp` is not a Tensor. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If the dimension of `inp` is not 5D or 6D. + ValueError: If `output_size` value is not positive. + + Supported Platforms: + ``GPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.hypercomplex.hc_pool import AdaptiveAvgPool3d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32, 48, 96)).astype(np.float32)) + >>> avg = AdaptiveAvgPool3d(output_size=(16, 24, 32)) + >>> y = avg(u) + >>> print(y.shape) + (2, 8, 64, 16, 24, 32) + """ + + def __init__(self, output_size: _size_3_t): + """Initialize AdaptiveAvgPool3d.""" + super(AdaptiveAvgPool3d, self).__init__(output_size) + self.adaptive_avg_pool3d = AdaptiveAvgPool3D(output_size) + + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + out_x = self.adaptive_avg_pool3d(x) + out_y = self.adaptive_avg_pool3d(y) + + return out_x, out_y + + +class AdaptiveMaxPool1d(_AdaptivePoolNd): + r""" + 1D adaptive maximum pooling for temporal hypercomplex data of the second order. + + Applies a 1D adaptive maximum pooling over an input Tensor which can be regarded as + a composition of 1D input planes. + + Typically, the input is of shape :math:`(2, N, C, L_{in})`, + AdaptiveMaxPool1d outputs regional maximum in the :math:`L_{in}`-dimension. The output is of + shape :math:`(N, C, L_{out})`, where :math:`L_{out}` is defined by `output_size`. + + Note: + :math:`L_{in}` must be divisible by `output_size`. + + Args: + output_size (int): the target output size :math:`L_{out}`. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, L_{in})`, with float16 or float32 data type, or + :math:`(N, C, L_{in})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C, L_{out})`, with float16 or float32 + data type, or :math:`(N, C, L_{out})`, with complex64 data type. + + Raises: + TypeError: If dtype of `inp` is not float16, float32 or complex64. + TypeError: If `output_size` is not an int. + ValueError: If `output_size` is less than 1. + ValueError: If the last dimension of `inp` is smaller than `output_size`. + ValueError: If the last dimension of `inp` is not divisible by `output_size`. + ValueError: If length of shape of `inp` is not equal to 4. + + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.hypercomplex.hc_pool import AdaptiveMaxPool1d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32)).astype(np.float32)) + >>> maxp = AdaptiveMaxPool1d(output_size=16) + >>> y = maxp(u) + >>> print(y.shape) + (2, 8, 64, 16) + """ + + def __init__(self, output_size: int) -> None: + """Initialize AdaptiveMaxPool1d.""" + super(AdaptiveMaxPool1d, self).__init__(output_size) + validator.check_value_type('output_size', output_size, [int], self.cls_name) + validator.check_int(output_size, 1, Rel.GE, "output_size", self.cls_name) + self.shape = F.shape + self.expand = P.ExpandDims() + self.squeeze = P.Squeeze(2) + self.dtype = P.DType() + + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + self._adaptive_shape_check(self.shape(x)) + self._adaptive_shape_check(self.shape(y)) + self._adaptive_dtype_check(self.dtype(x)) + self._adaptive_dtype_check(self.dtype(y)) + + _, _, width = self.shape(x) + stride = width // self.output_size + kernel_size = width - (self.output_size - 1) * stride + + stride = (1, width // self.output_size) + kernel_size = (1, kernel_size) + + x = self.expand(x, 2) + y = self.expand(y, 2) + + max_pool = P.MaxPool(kernel_size=kernel_size, strides=stride) + + out_x = max_pool(x) + out_y = max_pool(y) + out_x = self.squeeze(out_x) + out_y = self.squeeze(out_y) + + return out_x, out_y + + +class AdaptiveMaxPool2d(_AdaptivePoolNd): + r""" + AdaptiveMaxPool2d operation for temporal hypercomplex data of the second order. + + This operator applies a 2D adaptive max pooling to an input signal composed of multiple input planes. + That is, for any input size, the size of the specified output is H x W. + The number of output features is equal to the number of input planes. + + The input and output data format can be "NCHW" and "CHW". N is the batch size, C is the number of channels, + H is the feature height, and W is the feature width. + + For max adaptive pool2d: + + .. math:: + + \begin{align} + h_{start} &= floor(i * H_{in} / H_{out})\\ + h_{end} &= ceil((i + 1) * H_{in} / H_{out})\\ + w_{start} &= floor(j * W_{in} / W_{out})\\ + w_{end} &= ceil((j + 1) * W_{in} / W_{out})\\ + out(i,j) &= {\max inp[h_{start}:h_{end}, w_{start}:w_{end}]} + \end{align} + + Note: + Ascend platform only supports float16 type for inp. + + Args: + output_size (Union[int, tuple]): The target output size is H x W. + ouput_size can be a tuple, or a single H for H x H, and H and W can be int or None + which means the output size is the same as the input. + + return_indices (bool): If `return_indices` is True, the indices of max value would be output. + Default: False. + + Inputs: + - **inp** (Tensor) - The input of AdaptiveMaxPool2d, which is a 5D tensor of shape + (2, N, C, H_{in}, W_{in}) or a 4D tensor of shape (2, C, H_{in}, W_{in}), with float16 or float32 data type, + or a 4D tensor of shape (N, C, H_{in}, W_{in}) or a 3D tensor of shape (C, H_{in}, W_{in}), + with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C, H_{out}, W_{out})` or + :math:`(2, C, H_{out}, W_{out})`, with float16 or float32 data type, or :math:`(N, C, H_{out}, W_{out})` or + :math:`(C, H_{out}, W_{out})`, with complex64 data type. + + Shape of the output is `inp_shape[:len(inp_shape) - len(out_shape)] + out_shape`. + + Raises: + TypeError: If `output_size` is not int or tuple. + TypeError: If `inp` is not a tensor. + TypeError: If `return_indices` is not a bool. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If `output_size` is a tuple and the length of `output_size` is not 2. + ValueError: If the dimension of `inp` is not 4D or 5D + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.hypercomplex.hc_pool import AdaptiveMaxPool2d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32, 32)).astype(np.float32)) + >>> maxp = AdaptiveMaxPool2d(output_size=16) + >>> y = maxp(u) + >>> print(y.shape) + (2, 8, 64, 16, 16) + """ + + def __init__(self, output_size: _size_2_t) -> None: + """Initialize AdaptiveAvgPool2d.""" + super(AdaptiveMaxPool2d, self).__init__(output_size) + self.adaptive_maxpool2d = AdaptiveMaxPool2D(output_size) + + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + out_x = self.adaptive_maxpool2d(x) + out_y = self.adaptive_maxpool2d(y) + + return out_x, out_y diff --git a/mindspore/python/mindspore/hypercomplex/hypercomplex/uniform_operator.py b/mindspore/python/mindspore/hypercomplex/hypercomplex/uniform_operator.py new file mode 100644 index 00000000000..27b254d60a1 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/hypercomplex/uniform_operator.py @@ -0,0 +1,49 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Uniform operators""" +from mindspore.nn import Cell + + +class _UniformOperator(Cell): + r""" + Base class for layers that operate with the second-order hypercomplex numbers, and are designed + using the bridge pattern. + + Constructs the object of the 'hc_op' type, passing 'hc_impl' as a parameter. + + Args: + hc_op (Type): The abstraction part of the bridge pattern. + hc_impl (Type): The implementor part of the bridge pattern. + **kwargs (dict): Additional arguments that may be required to construct the specific layer + + Inputs: + - **inp** (Tensor) - input tensor. The shape is specific to the subclass. + + Outputs: + Tensor of shape, which is specific to the subclass. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + hc_op, + hc_impl, + **kwargs) -> None: + super(_UniformOperator, self).__init__() + self.op = hc_op(hc_impl, **kwargs) + + def construct(self, x): + return self.op(x) diff --git a/mindspore/python/mindspore/hypercomplex/utils.py b/mindspore/python/mindspore/hypercomplex/utils.py new file mode 100644 index 00000000000..af65a0cd8a4 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/utils.py @@ -0,0 +1,51 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Utils""" +from typing import Tuple, Union + +import mindspore +from mindspore import ops as P + + +to_complex = P.Complex() +get_real = P.Real() +get_imag = P.Imag() +unstack = P.Unstack(0) +cat = P.Concat(0) + + +def get_x_and_y(tensor): + if tensor.dtype == mindspore.complex64: + return get_real(tensor), get_imag(tensor) + return unstack(tensor) + + +def to_2channel(real, imag, dtype=None): + '''Convert to 2 channel format''' + if dtype is not None and dtype == mindspore.complex64: + return to_complex(real, imag) + if dtype is not None and (dtype != real.dtype or dtype != imag.dtype): + raise ValueError("dtype must match with data type of the input tensors, but got: " + f"dtype={dtype}, real.dtype={real.dtype}, imag.dtype={imag.dtype}") + expand_dims = P.ExpandDims() + real = expand_dims(real, 0) + imag = expand_dims(imag, 0) + return cat((real, imag)) + + +_size_1_t = Union[int, Tuple[int]] +_size_2_t = Union[int, Tuple[int, int]] +_size_3_t = Union[int, Tuple[int, int, int]] +_size_any_t = Union[int, Tuple[int, ...]] diff --git a/tests/st/hypercomplex/deepconvnet.py b/tests/st/hypercomplex/deepconvnet.py new file mode 100644 index 00000000000..54027fd23e9 --- /dev/null +++ b/tests/st/hypercomplex/deepconvnet.py @@ -0,0 +1,83 @@ +from mindspore import nn +from mindspore.common.tensor import Tensor +from mindspore.ops import operations as P +import mindspore.hypercomplex.dual as ops + + +class DeepConvNet(nn.Cell): + def __init__(self): + super(DeepConvNet, self).__init__() + + self.conv1 = ops.Conv1d(1, 16, kernel_size=6, stride=2, padding=2, pad_mode='pad') + self.bn1 = ops.BatchNorm1d(16) + self.avg_pool1 = ops.AvgPool1d(kernel_size=2, stride=2) + self.pad1 = nn.Pad(paddings=((0, 0), (0, 0), (0, 0), (0, 2)), mode='CONSTANT') + + self.conv2 = ops.Conv1d(16, 32, kernel_size=3, stride=2, padding=0) + self.bn2 = ops.BatchNorm1d(32) + self.avg_pool2 = ops.AvgPool1d(kernel_size=2, stride=2) + + self.conv3 = ops.Conv1d(32, 64, kernel_size=3, stride=1, padding=1, pad_mode='pad') + self.bn3 = ops.BatchNorm1d(64) + self.avg_pool3 = ops.AvgPool1d(kernel_size=2, stride=2) + + self.conv4 = ops.Conv1d(64, 64, kernel_size=3, stride=1, padding=1, pad_mode='pad') + self.bn4 = ops.BatchNorm1d(64) + self.avg_pool4 = ops.AvgPool1d(kernel_size=2, stride=2) + + self.conv5 = ops.Conv1d(64, 128, kernel_size=3, stride=1, padding=1, pad_mode='pad') + self.conv6 = ops.Conv1d(128, 128, kernel_size=3, stride=1, padding=1, pad_mode='pad') + self.bn6 = ops.BatchNorm1d(128) + self.avg_pool6 = ops.AvgPool1d(kernel_size=2, stride=2) + + self.shape_op = P.Shape() + self.reshape = P.Reshape() + self.permute = P.Transpose() + self.flatten = P.Flatten() + + self.fc1 = ops.Dense(4096, 1024) + self.fc2 = nn.Dense(2048, 84) + + self.relu = ops.ReLU() + self.sigmoid = nn.Sigmoid() + + def construct(self, u: Tensor) -> Tensor: + u = self.conv1(u) + u = self.bn1(u) + u = self.relu(u) + u = self.avg_pool1(u) + u = self.pad1(u) + + u = self.conv2(u) + u = self.bn2(u) + u = self.relu(u) + u = self.avg_pool2(u) + + u = self.conv3(u) + u = self.bn3(u) + u = self.relu(u) + u = self.avg_pool3(u) + + u = self.conv4(u) + u = self.bn4(u) + u = self.relu(u) + u = self.avg_pool4(u) + + u = self.conv5(u) + u = self.relu(u) + + u = self.conv6(u) + u = self.bn6(u) + u = self.relu(u) + u = self.avg_pool6(u) + + u_shape = self.shape_op(u) + u = self.reshape(u, (u_shape[0], u_shape[1], -1)) + u = self.fc1(u) + u = self.relu(u) + + u = self.permute(u, (1, 0, 2)) + x = self.flatten(u) + x = self.fc2(x) + x = self.sigmoid(x) + return x diff --git a/tests/st/hypercomplex/hcmodel.py b/tests/st/hypercomplex/hcmodel.py new file mode 100644 index 00000000000..822473f884d --- /dev/null +++ b/tests/st/hypercomplex/hcmodel.py @@ -0,0 +1,32 @@ +from mindspore import nn +from mindspore.common.tensor import Tensor +from mindspore.ops import operations as P +from mindspore.hypercomplex.utils import get_x_and_y, to_2channel +import mindspore.hypercomplex.dual as ops + + +class HCModel(nn.Cell): + + def __init__(self): + super(HCModel, self).__init__() + self.conv1 = ops.Conv2d(1, 10, kernel_size=3) + self.bn1 = ops.BatchNorm2d(10) + self.max_pool = ops.MaxPool2d(2) + self.relu = ops.ReLU() + self.fc1 = ops.Dense(7290, 256) + self.fc2 = nn.Dense(512, 10) + self.concat = P.Concat(1) + + def construct(self, u: Tensor) -> Tensor: + u = to_2channel(u[:, :1], u[:, 1:]) + u = self.conv1(u) + u = self.bn1(u) + u = self.relu(u) + u = self.max_pool(u) + u = u.view(2, u.shape[1], -1) + u = self.fc1(u) + u = self.relu(u) + out_x, out_y = get_x_and_y(u) + out = self.concat([out_x, out_y]) + out = self.fc2(out) + return out diff --git a/tests/st/hypercomplex/resnet.py b/tests/st/hypercomplex/resnet.py new file mode 100644 index 00000000000..0ce578573c6 --- /dev/null +++ b/tests/st/hypercomplex/resnet.py @@ -0,0 +1,593 @@ +# Copyright 2020-2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""ResNet.""" +import math +import numpy as np +from scipy.stats import truncnorm +import mindspore.nn as nn +import mindspore.common.dtype as mstype +from mindspore.ops import operations as P +from mindspore.ops import functional as F +from mindspore.common.tensor import Tensor +import mindspore.hypercomplex.dual as ops +from mindspore.hypercomplex.utils import get_x_and_y, to_2channel + + +def conv_variance_scaling_initializer(in_channel, out_channel, kernel_size): + fan_in = in_channel * kernel_size * kernel_size + scale = 1.0 + scale /= max(1., fan_in) + stddev = (scale ** 0.5) / .87962566103423978 + mu, sigma = 0, stddev + weight = truncnorm(-2, 2, loc=mu, scale=sigma).rvs(out_channel * in_channel * kernel_size * kernel_size) + weight = np.reshape(weight, (out_channel, in_channel, kernel_size, kernel_size)) + return Tensor(weight, dtype=mstype.float32) + + +def calculate_gain(nonlinearity, param=None): + """calculate_gain""" + linear_fns = ['linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d', 'conv_transpose2d', 'conv_transpose3d'] + res = 0 + if nonlinearity in linear_fns or nonlinearity == 'sigmoid': + res = 1 + elif nonlinearity == 'tanh': + res = 5.0 / 3 + elif nonlinearity == 'relu': + res = math.sqrt(2.0) + elif nonlinearity == 'leaky_relu': + if param is None: + negative_slope = 0.01 + elif not isinstance(param, bool) and isinstance(param, int) or isinstance(param, float): + # True/False are instances of int, hence check above + negative_slope = param + else: + raise ValueError("negative_slope {} not a valid number".format(param)) + res = math.sqrt(2.0 / (1 + negative_slope ** 2)) + else: + raise ValueError("Unsupported nonlinearity {}".format(nonlinearity)) + return res + + +def _calculate_fan_in_and_fan_out(tensor): + """_calculate_fan_in_and_fan_out""" + dimensions = len(tensor) + if dimensions < 2: + raise ValueError("Fan in and fan out can not be computed for tensor with fewer than 2 dimensions") + if dimensions == 2: # Linear + fan_in = tensor[1] + fan_out = tensor[0] + else: + num_input_fmaps = tensor[1] + num_output_fmaps = tensor[0] + receptive_field_size = 1 + if dimensions > 2: + receptive_field_size = tensor[2] * tensor[3] + fan_in = num_input_fmaps * receptive_field_size + fan_out = num_output_fmaps * receptive_field_size + return fan_in, fan_out + + +def _calculate_correct_fan(tensor, mode): + mode = mode.lower() + valid_modes = ['fan_in', 'fan_out'] + if mode not in valid_modes: + raise ValueError("Mode {} not supported, please use one of {}".format(mode, valid_modes)) + fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) + return fan_in if mode == 'fan_in' else fan_out + + +def kaiming_normal(inputs_shape, a=0, mode='fan_in', nonlinearity='leaky_relu'): + fan = _calculate_correct_fan(inputs_shape, mode) + gain = calculate_gain(nonlinearity, a) + std = gain / math.sqrt(fan) + return np.random.normal(0, std, size=inputs_shape).astype(np.float32) + + +def kaiming_uniform(inputs_shape, a=0., mode='fan_in', nonlinearity='leaky_relu'): + fan = _calculate_correct_fan(inputs_shape, mode) + gain = calculate_gain(nonlinearity, a) + std = gain / math.sqrt(fan) + bound = math.sqrt(3.0) * std # Calculate uniform bounds from standard deviation + return np.random.uniform(-bound, bound, size=inputs_shape).astype(np.float32) + + +def _conv3x3(in_channel, out_channel, stride=1, use_se=False, res_base=False): + if use_se: + weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=3) + else: + weight_shape = (out_channel, in_channel, 3, 3) + weight = Tensor(kaiming_normal((2, *weight_shape), mode="fan_out", nonlinearity='relu')) + if res_base: + return ops.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride, + padding=1, pad_mode='pad', weight_init=weight) + return ops.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride, + padding=0, pad_mode='same', weight_init=weight) + + +def _conv1x1(in_channel, out_channel, stride=1, use_se=False, res_base=False): + if use_se: + weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=1) + else: + weight_shape = (out_channel, in_channel, 1, 1) + weight = Tensor(kaiming_normal((2, *weight_shape), mode="fan_out", nonlinearity='relu')) + if res_base: + return ops.Conv2d(in_channel, out_channel, kernel_size=1, stride=stride, + padding=0, pad_mode='pad', weight_init=weight) + return ops.Conv2d(in_channel, out_channel, kernel_size=1, stride=stride, + padding=0, pad_mode='same', weight_init=weight) + + +def _conv7x7(in_channel, out_channel, stride=1, use_se=False, res_base=False): + if use_se: + weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=7) + else: + weight_shape = (out_channel, in_channel, 7, 7) + weight = Tensor(kaiming_normal((2, *weight_shape), mode="fan_out", nonlinearity='relu')) + if res_base: + return ops.Conv2d(in_channel, out_channel, + kernel_size=7, stride=stride, padding=3, pad_mode='pad', weight_init=weight) + return ops.Conv2d(in_channel, out_channel, + kernel_size=7, stride=stride, padding=0, pad_mode='same', weight_init=weight) + + +def _bn(channel, res_base=False): + if res_base: + return ops.BatchNorm2d(channel, eps=1e-5, momentum=0.1, + gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1) + return ops.BatchNorm2d(channel, eps=1e-4, momentum=0.9, + gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1) + + +def _fc(in_channel, out_channel, use_se=False): + if use_se: + weight = np.random.normal(loc=0, scale=0.01, size=out_channel * in_channel) + weight = Tensor(np.reshape(weight, (out_channel, in_channel)), dtype=mstype.float32) + else: + weight_shape = (out_channel, in_channel) + weight = Tensor(kaiming_uniform(weight_shape, a=math.sqrt(5))) + return nn.Dense(in_channel, out_channel, has_bias=True, weight_init=weight, bias_init=0) + + +class ResidualBlock(nn.Cell): + """ + ResNet V1 residual block definition. + + Args: + in_channel (int): Input channel. + out_channel (int): Output channel. + stride (int): Stride size for the first convolutional layer. Default: 1. + use_se (bool): Enable SE-ResNet50 net. Default: False. + se_block(bool): Use se block in SE-ResNet50 net. Default: False. + + Returns: + Tensor, output tensor. + + Examples: + >>> ResidualBlock(3, 256, stride=2) + """ + expansion = 4 + + def __init__(self, + in_channel, + out_channel, + stride=1, + use_se=False, se_block=False): + super(ResidualBlock, self).__init__() + self.stride = stride + self.use_se = use_se + self.se_block = se_block + channel = out_channel // self.expansion + self.conv1 = _conv1x1(in_channel, channel, stride=1, use_se=self.use_se) + self.bn1 = _bn(channel) + if self.use_se and self.stride != 1: + self.e2 = nn.SequentialCell([_conv3x3(channel, channel, stride=1, use_se=True), _bn(channel), + ops.ReLU(), ops.MaxPool2d(kernel_size=2, stride=2, pad_mode='same')]) + else: + self.conv2 = _conv3x3(channel, channel, stride=stride, use_se=self.use_se) + self.bn2 = _bn(channel) + + self.conv3 = _conv1x1(channel, out_channel, stride=1, use_se=self.use_se) + self.bn3 = _bn(out_channel) + if self.se_block: + self.se_global_pool = P.ReduceMean(keep_dims=False) + self.se_dense_0 = _fc(out_channel, int(out_channel / 4), use_se=self.use_se) + self.se_dense_1 = _fc(int(out_channel / 4), out_channel, use_se=self.use_se) + self.se_sigmoid = nn.Sigmoid() + self.se_mul = P.Mul() + self.relu = ops.ReLU() + + self.down_sample = False + + if stride != 1 or in_channel != out_channel: + self.down_sample = True + self.down_sample_layer = None + + if self.down_sample: + if self.use_se: + if stride == 1: + self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, + stride, use_se=self.use_se), _bn(out_channel)]) + else: + self.down_sample_layer = nn.SequentialCell([ops.MaxPool2d(kernel_size=2, stride=2, pad_mode='same'), + _conv1x1(in_channel, out_channel, 1, + use_se=self.use_se), _bn(out_channel)]) + else: + self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride, + use_se=self.use_se), _bn(out_channel)]) + + def construct(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + if self.use_se and self.stride != 1: + out = self.e2(out) + else: + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + out = self.conv3(out) + out = self.bn3(out) + if self.se_block: + out_se = out + out = self.se_global_pool(out, (2, 3)) + out = self.se_dense_0(out) + out = self.relu(out) + out = self.se_dense_1(out) + out = self.se_sigmoid(out) + out = F.reshape(out, F.shape(out) + (1, 1)) + out = self.se_mul(out, out_se) + + if self.down_sample: + identity = self.down_sample_layer(identity) + + out = out + identity + out = self.relu(out) + + return out + + +class ResidualBlockBase(nn.Cell): + """ + ResNet V1 residual block definition. + + Args: + in_channel (int): Input channel. + out_channel (int): Output channel. + stride (int): Stride size for the first convolutional layer. Default: 1. + use_se (bool): Enable SE-ResNet50 net. Default: False. + se_block(bool): Use se block in SE-ResNet50 net. Default: False. + res_base (bool): Enable parameter setting of resnet18. Default: True. + + Returns: + Tensor, output tensor. + + Examples: + >>> ResidualBlockBase(3, 256, stride=2) + """ + + def __init__(self, + in_channel, + out_channel, + stride=1, + use_se=False, + se_block=False, + res_base=True): + super(ResidualBlockBase, self).__init__() + self.res_base = res_base + self.conv1 = _conv3x3(in_channel, out_channel, stride=stride, res_base=self.res_base) + self.bn1d = _bn(out_channel) + self.conv2 = _conv3x3(out_channel, out_channel, stride=1, res_base=self.res_base) + self.bn2d = _bn(out_channel) + self.relu = ops.ReLU() + + self.down_sample = False + if stride != 1 or in_channel != out_channel: + self.down_sample = True + + self.down_sample_layer = None + if self.down_sample: + self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride, + use_se=use_se, res_base=self.res_base), + _bn(out_channel, res_base)]) + + def construct(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1d(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2d(out) + + if self.down_sample: + identity = self.down_sample_layer(identity) + + out = out + identity + out = self.relu(out) + + return out + + +class ResNet(nn.Cell): + """ + ResNet architecture. + + Args: + block (Cell): Block for network. + layer_nums (list): Numbers of block in different layers. + in_channels (list): Input channel in each layer. + out_channels (list): Output channel in each layer. + strides (list): Stride size in each layer. + num_classes (int): The number of classes that the training images are belonging to. + use_se (bool): Enable SE-ResNet50 net. Default: False. + se_block(bool): Use se block in SE-ResNet50 net in layer 3 and layer 4. Default: False. + res_base (bool): Enable parameter setting of resnet18. Default: False. + + Returns: + Tensor, output tensor. + + Examples: + >>> ResNet(ResidualBlock, + >>> [3, 4, 6, 3], + >>> [64, 256, 512, 1024], + >>> [256, 512, 1024, 2048], + >>> [1, 2, 2, 2], + >>> 10) + """ + + def __init__(self, + block, + layer_nums, + in_channels, + out_channels, + strides, + num_classes, + use_se=False, + res_base=False): + super(ResNet, self).__init__() + + if not len(layer_nums) == len(in_channels) == len(out_channels) == 4: + raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!") + self.use_se = use_se + self.res_base = res_base + self.se_block = False + if self.use_se: + self.se_block = True + + if self.use_se: + self.conv1_0 = _conv3x3(3, 32, stride=2, use_se=self.use_se) + self.bn1_0 = _bn(32) + self.conv1_1 = _conv3x3(32, 32, stride=1, use_se=self.use_se) + self.bn1_1 = _bn(32) + self.conv1_2 = _conv3x3(32, 64, stride=1, use_se=self.use_se) + else: + self.conv1 = _conv7x7(3, 64, stride=2, res_base=self.res_base) + self.bn1 = _bn(64, self.res_base) + self.relu = ops.ReLU() + + if self.res_base: + self.pad = nn.Pad(paddings=((0, 0), (0, 0), (1, 1), (1, 1))) + self.maxpool = ops.MaxPool2d(kernel_size=3, stride=2, pad_mode="valid") + else: + self.maxpool = ops.MaxPool2d(kernel_size=3, stride=2, pad_mode="same") + + self.layer1 = self._make_layer(block, + layer_nums[0], + in_channel=in_channels[0], + out_channel=out_channels[0], + stride=strides[0], + use_se=self.use_se) + self.layer2 = self._make_layer(block, + layer_nums[1], + in_channel=in_channels[1], + out_channel=out_channels[1], + stride=strides[1], + use_se=self.use_se) + self.layer3 = self._make_layer(block, + layer_nums[2], + in_channel=in_channels[2], + out_channel=out_channels[2], + stride=strides[2], + use_se=self.use_se, + se_block=self.se_block) + self.layer4 = self._make_layer(block, + layer_nums[3], + in_channel=in_channels[3], + out_channel=out_channels[3], + stride=strides[3], + use_se=self.use_se, + se_block=self.se_block) + + self.avgpool = ops.AvgPool2d(4) + self.concat = P.Concat(1) + self.flatten = nn.Flatten() + self.end_point = _fc(16384, num_classes, use_se=self.use_se) + + def construct(self, x): + x = to_2channel(x[:, :3], x[:, 3:]) + if self.use_se: + x = self.conv1_0(x) + x = self.bn1_0(x) + x = self.relu(x) + x = self.conv1_1(x) + x = self.bn1_1(x) + x = self.relu(x) + x = self.conv1_2(x) + else: + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + if self.res_base: + x_1, x_2 = get_x_and_y(x) + x_1 = self.pad(x_1) + x_2 = self.pad(x_2) + x = to_2channel(x_1, x_2) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + out = self.avgpool(x) + out_x, out_y = get_x_and_y(out) + out = self.concat([out_x, out_y]) + out = self.flatten(out) + out = self.end_point(out) + return out + + def _make_layer(self, block, layer_num, in_channel, out_channel, stride, use_se=False, se_block=False): + """ + Make stage network of ResNet. + + Args: + block (Cell): Resnet block. + layer_num (int): Layer number. + in_channel (int): Input channel. + out_channel (int): Output channel. + stride (int): Stride size for the first convolutional layer. + se_block(bool): Use se block in SE-ResNet50 net. Default: False. + Returns: + SequentialCell, the output layer. + + Examples: + >>> _make_layer(ResidualBlock, 3, 128, 256, 2) + """ + layers = [] + + resnet_block = block(in_channel, out_channel, stride=stride, use_se=use_se) + layers.append(resnet_block) + if se_block: + for _ in range(1, layer_num - 1): + resnet_block = block(out_channel, out_channel, stride=1, use_se=use_se) + layers.append(resnet_block) + resnet_block = block(out_channel, out_channel, stride=1, use_se=use_se, se_block=se_block) + layers.append(resnet_block) + else: + for _ in range(1, layer_num): + resnet_block = block(out_channel, out_channel, stride=1, use_se=use_se) + layers.append(resnet_block) + return nn.SequentialCell(layers) + + +def resnet18(class_num=10): + """ + Get ResNet18 neural network. + + Args: + class_num (int): Class number. + + Returns: + Cell, cell instance of ResNet18 neural network. + + Examples: + >>> net = resnet18(10) + """ + return ResNet(ResidualBlockBase, + [2, 2, 2, 2], + [64, 64, 128, 256], + [64, 128, 256, 512], + [1, 2, 2, 2], + class_num, + res_base=True) + + +def resnet34(class_num=10): + """ + Get ResNet34 neural network. + + Args: + class_num (int): Class number. + + Returns: + Cell, cell instance of ResNet34 neural network. + + Examples: + >>> net = resnet18(10) + """ + return ResNet(ResidualBlockBase, + [3, 4, 6, 3], + [64, 64, 128, 256], + [64, 128, 256, 512], + [1, 2, 2, 2], + class_num, + res_base=True) + + +def resnet50(class_num=10): + """ + Get ResNet50 neural network. + + Args: + class_num (int): Class number. + + Returns: + Cell, cell instance of ResNet50 neural network. + + Examples: + >>> net = resnet50(10) + """ + return ResNet(ResidualBlock, + [3, 4, 6, 3], + [64, 256, 512, 1024], + [256, 512, 1024, 2048], + [1, 2, 2, 2], + class_num) + + +def se_resnet50(class_num=1001): + """ + Get SE-ResNet50 neural network. + + Args: + class_num (int): Class number. + + Returns: + Cell, cell instance of SE-ResNet50 neural network. + + Examples: + >>> net = se-resnet50(1001) + """ + return ResNet(ResidualBlock, + [3, 4, 6, 3], + [64, 256, 512, 1024], + [256, 512, 1024, 2048], + [1, 2, 2, 2], + class_num, + use_se=True) + + +def resnet101(class_num=1001): + """ + Get ResNet101 neural network. + + Args: + class_num (int): Class number. + + Returns: + Cell, cell instance of ResNet101 neural network. + + Examples: + >>> net = resnet101(1001) + """ + return ResNet(ResidualBlock, + [3, 4, 23, 3], + [64, 256, 512, 1024], + [256, 512, 1024, 2048], + [1, 2, 2, 2], + class_num) diff --git a/tests/st/hypercomplex/test_deepconvnet.py b/tests/st/hypercomplex/test_deepconvnet.py new file mode 100644 index 00000000000..73652097c56 --- /dev/null +++ b/tests/st/hypercomplex/test_deepconvnet.py @@ -0,0 +1,13 @@ +import numpy as np +from mindspore import context, Tensor +from mindspore.ops import operations as P +from deepconvnet import DeepConvNet + + +if __name__ == '__main__': + context.set_context(mode=context.GRAPH_MODE, device_target="GPU") + model = DeepConvNet() + model.set_train(False) + u = Tensor(np.random.random((2, 32, 1, 4096)).astype(np.float32)) + y = model(u) + print(P.Shape()(y), y) diff --git a/tests/st/hypercomplex/test_mnist.py b/tests/st/hypercomplex/test_mnist.py new file mode 100644 index 00000000000..e5edd9dd4ee --- /dev/null +++ b/tests/st/hypercomplex/test_mnist.py @@ -0,0 +1,112 @@ +import argparse + +import numpy as np +import mindspore +from mindspore import nn, context, ops +from mindspore.common import dtype as mstype +from mindspore.dataset import MnistDataset +from hcmodel import HCModel + +context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + + +class ImageToDualImage: + @staticmethod + def __call__(img): + return np.concatenate((img, img), axis=0) + + +def create_dataset(dataset_dir, batch_size, usage=None): + dataset = MnistDataset(dataset_dir=dataset_dir, usage=usage) + type_cast_op = mindspore.dataset.transforms.TypeCast(mstype.int32) + + # define map operations + trans = [mindspore.dataset.vision.Rescale(1.0 / 255.0, 0), + mindspore.dataset.vision.Normalize(mean=(0.1307,), std=(0.3081,)), + mindspore.dataset.vision.HWC2CHW(), + ImageToDualImage()] + + dataset = dataset.map(operations=type_cast_op, input_columns="label") + dataset = dataset.map(operations=trans, input_columns="image") + dataset = dataset.batch(batch_size) + return dataset + + +def train(model, dataset, loss_fn, optimizer): + # Define forward function + def forward_fn(data, label): + logits = model(data) + loss = loss_fn(logits, label) + return loss, logits + + # Get gradient function + grad_fn = ops.value_and_grad(forward_fn, None, optimizer.parameters, has_aux=True) + + # Define function of one-step training + def train_step(data, label): + (loss, _), grads = grad_fn(data, label) + loss = ops.depend(loss, optimizer(grads)) + return loss + + size = dataset.get_dataset_size() + model.set_train() + for batch, (data, label) in enumerate(dataset.create_tuple_iterator()): + loss = train_step(data, label) + + if batch % 100 == 0: + loss, current = loss.asnumpy(), batch + print(f"loss: {loss:>7f} [{current:>3d}/{size:>3d}]") + + +def test(model, dataset, loss_fn): + num_batches = dataset.get_dataset_size() + model.set_train(False) + total, test_loss, correct = 0, 0, 0 + for data, label in dataset.create_tuple_iterator(): + pred = model(data) + total += len(data) + test_loss += loss_fn(pred, label).asnumpy() + correct += (pred.argmax(1) == label).asnumpy().sum() + test_loss /= num_batches + correct /= total + print(f"Test: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n") + + +def main(): + parser = argparse.ArgumentParser(description='MindSpore MNIST Testing') + parser.add_argument( + '--dataset', default=None, type=str, metavar='DS', required=True, + help='Path to the dataset folder' + ) + parser.add_argument( + '--bs', default=64, type=int, metavar='N', required=False, + help='Mini-batch size' + ) + args = parser.parse_args() + + # Process the MNIST dataset. + train_dataset = create_dataset(args.dataset, args.bs, "train") + test_dataset = create_dataset(args.dataset, args.bs, "test") + + for img, lbl in test_dataset.create_tuple_iterator(): + print(f"Shape of image [N, C, H, W]: {img.shape} {img.dtype}") + print(f"Shape of label: {lbl.shape} {lbl.dtype}") + break + + # Initialize hypercomplex model + net = HCModel() + + # Initialize loss function and optimizer + criterion = nn.CrossEntropyLoss() + optim = nn.SGD(net.trainable_params(), 1e-2) + + epochs = 10 + for t in range(epochs): + print(f"Epoch {t+1}\n-------------------------------") + train(net, train_dataset, criterion, optim) + test(net, test_dataset, criterion) + print("Done!") + + +if __name__ == "__main__": + main() diff --git a/tests/st/hypercomplex/test_resnet.py b/tests/st/hypercomplex/test_resnet.py new file mode 100644 index 00000000000..04528b32901 --- /dev/null +++ b/tests/st/hypercomplex/test_resnet.py @@ -0,0 +1,115 @@ +import argparse +import numpy as np +from mindspore import nn, context, ops +from mindspore.common import dtype as mstype +from mindspore.dataset import Cifar10Dataset +from mindspore.dataset import vision, transforms +from resnet import resnet18 + +context.set_context(mode=context.GRAPH_MODE, device_target="GPU") + + +class ImageToDualImage: + @staticmethod + def __call__(img): + return np.concatenate((img, img), axis=0) + + +def create_dataset(dataset_dir, batch_size, usage=None): + dataset = Cifar10Dataset(dataset_dir=dataset_dir, usage=usage) + type_cast_op = transforms.TypeCast(mstype.int32) + + # define map operations + trans = [vision.ToPIL(), + vision.RandomCrop((32, 32), (4, 4, 4, 4)), + vision.RandomHorizontalFlip(prob=0.5), + vision.Resize((224, 224)), + vision.ToTensor(), + vision.Rescale(1.0 / 255.0, 0.0), + vision.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010], is_hwc=False), + ImageToDualImage()] + + dataset = dataset.map(operations=type_cast_op, input_columns="label") + dataset = dataset.map(operations=trans, input_columns="image") + dataset = dataset.batch(batch_size) + return dataset + + +def train(model, dataset, loss_fn, optimizer): + # Define forward function + def forward_fn(data, label): + logits = model(data) + loss = loss_fn(logits, label) + return loss, logits + + # Get gradient function + grad_fn = ops.value_and_grad(forward_fn, None, optimizer.parameters, has_aux=True) + + # Define function of one-step training + def train_step(data, label): + (loss, _), grads = grad_fn(data, label) + loss = ops.depend(loss, optimizer(grads)) + return loss + + size = dataset.get_dataset_size() + model.set_train() + for batch, (data, label) in enumerate(dataset.create_tuple_iterator()): + loss = train_step(data, label) + + if batch % 100 == 0: + loss, current = loss.asnumpy(), batch + print(f"loss: {loss:>7f} [{current:>3d}/{size:>3d}]") + + +def test(model, dataset, loss_fn): + num_batches = dataset.get_dataset_size() + model.set_train(False) + total, test_loss, correct = 0, 0, 0 + for data, label in dataset.create_tuple_iterator(): + pred = model(data) + total += len(data) + test_loss += loss_fn(pred, label).asnumpy() + correct += (pred.argmax(1) == label).asnumpy().sum() + test_loss /= num_batches + correct /= total + print(f"Test: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n") + + +def main(): + parser = argparse.ArgumentParser(description='MindSpore ResNet Testing') + parser.add_argument( + '--dataset', default=None, type=str, metavar='DS', required=True, + help='Path to the dataset folder' + ) + parser.add_argument( + '--bs', default=64, type=int, metavar='N', required=False, + help='Mini-batch size' + ) + args = parser.parse_args() + + # Process the cifar dataset. + train_dataset = create_dataset(args.dataset, args.bs, "train") + test_dataset = create_dataset(args.dataset, args.bs, "test") + + for img, lbl in test_dataset.create_tuple_iterator(): + print(f"Shape of image [N, C, H, W]: {img.shape} {img.dtype}") + print(f"Shape of label: {lbl.shape} {lbl.dtype}") + break + + # Initialize hypercomplex model + net = resnet18() + + # Initialize loss function and optimizer + criterion = nn.CrossEntropyLoss() + optim = nn.SGD(net.trainable_params(), 1e-2) + + epochs = 10 + for t in range(epochs): + print(f"Epoch {t+1}\n-------------------------------") + train(net, train_dataset, criterion, optim) + test(net, test_dataset, criterion) + print("Done!") + + +if __name__ == "__main__": + main()