diff --git a/.jenkins/check/config/filter_pylint.txt b/.jenkins/check/config/filter_pylint.txt index f57108b15ce..e85bffcfc05 100644 --- a/.jenkins/check/config/filter_pylint.txt +++ b/.jenkins/check/config/filter_pylint.txt @@ -74,6 +74,7 @@ "mindspore/mindspore/python/mindspore/ops/operations/array_ops.py" "redefined-builtin" "mindspore/mindspore/python/mindspore/ops/_grad_experimental/grad_sparse_ops.py" "unused-variable" "mindspore/mindspore/python/mindspore/ops/operations/_inner_ops.py" "not-callable" +"mindspore/mindspore/python/mindspore/hypercomplex" "useless-return" # MindData "mindspore/mindspore/python/mindspore/dataset/__init__.py" "redefined-builtin" diff --git a/mindspore/python/mindspore/hypercomplex/__init__.py b/mindspore/python/mindspore/hypercomplex/__init__.py new file mode 100644 index 00000000000..c76e4e50663 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/__init__.py @@ -0,0 +1,24 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +HyperComplex operators. + +Note: + This feature is a beta feature, and we are still improving its functionality. + The interface may be changed or removed in the future. +""" +import mindspore.hypercomplex.complex +import mindspore.hypercomplex.dual +import mindspore.hypercomplex.double diff --git a/mindspore/python/mindspore/hypercomplex/complex/__init__.py b/mindspore/python/mindspore/hypercomplex/complex/__init__.py new file mode 100644 index 00000000000..21b673d19fd --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/complex/__init__.py @@ -0,0 +1,23 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Complex Operators""" +from mindspore.hypercomplex.complex.complex_relu import ReLU +from mindspore.hypercomplex.complex.complex_operators import Conv1d, Conv2d, Conv3d +from mindspore.hypercomplex.complex.complex_operators import BatchNorm1d, BatchNorm2d, BatchNorm3d +from mindspore.hypercomplex.complex.complex_operators import Dense + +from mindspore.hypercomplex.hypercomplex.hc_pool import MaxPool1d, MaxPool2d, \ + AvgPool1d, AvgPool2d, AdaptiveAvgPool1d, AdaptiveAvgPool2d, \ + AdaptiveAvgPool3d, AdaptiveMaxPool1d, AdaptiveMaxPool2d diff --git a/mindspore/python/mindspore/hypercomplex/complex/_complex_bn_impl.py b/mindspore/python/mindspore/hypercomplex/complex/_complex_bn_impl.py new file mode 100644 index 00000000000..5d8f6a0ff9e --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/complex/_complex_bn_impl.py @@ -0,0 +1,139 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""complex BatchNorm implementation""" +from typing import Tuple + +import mindspore.ops as ops +from mindspore.common.tensor import Tensor +from mindspore.hypercomplex.hypercomplex._hc_bn_impl import _BaseBatchNormImpl as HCBatchNormImpl +from mindspore.hypercomplex.utils import get_x_and_y as get_real_and_imag + + +class _BatchNormImpl(HCBatchNormImpl): + r""" + The implementor class of the Batch Normalization layer for complex numbers. + + Implements the functionality specific to complex numbers and needed by the 'BatchNorm' class. This includes: + getting the norm of complex number, applying scaling and shift to a complex tensor, and updating the running + mean and variance, which are used during inference. + + Args: + affine (bool) - A bool value. When set to True, gamma and beta can be learned. + use_batch_statistics (bool): If true, use the mean value and variance value of current batch data. If false, + use the mean value and variance value of specified value. If None, the training process will use the mean + and variance of current batch data and track the running mean and variance, the evaluation process will use + the running mean and variance. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. + num_features (int): The number of features in the input space. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def scale_and_shift(self, + u_x: Tensor, + u_y: Tensor, + scale_x: Tensor, + scale_y: Tensor, + shift_x: Tensor, + shift_y: Tensor) -> Tuple[Tensor, Tensor]: + r""" + Applies complex scaling and shift to an input tensor. + + This function implements the operation as: + + .. math:: + \begin{align} + \text{Re(out)} = \text{Re(inp)} * \text{Re(scale)} - \text{Im(inp)} * \text{Im(scale)} + \text{Re(shift)}\\ + \text{Im(out)} = \text{Re(inp)} * \text{Im(scale)} + \text{Im(inp)} * \text{Re(scale)} + \text{Im(shift)}, + \end{align} + + where :math:`inp` is the complex input tensors, :math:`scale` and :math:`shift` are complex parameters + representing the scaling and shift coefficients respectively. :math:`\text{Re(...)}` and :math:`\text{Im(...)}` + are respectively real and imaginary parts of the complex-valued expression inside the parentheses. + + Args: + u_x (Tensor): A tensor of shape (C,), which represents the real part of the normalized inputs. + u_y (Tensor): A tensor of shape (C,), which represents the imaginary part of the normalized inputs. + scale_x (Tensor): A tensor of shape (C,), which represents the real part of the scaling vector. + scale_y (Tensor): A tensor of shape (C,), which represents the imaginary part of the scaling vector. + shift_x (Tensor): A tensor of shape (C,), which represents the real part of the bias vector. + shift_y (Tensor): A tensor of shape (C,), which represents the imaginary part of the bias vector. + + Returns: + Tuple of two tensors of shape (C,), which contains the real and the imaginary parts of rescaled and + recentered inputs. + """ + out_x = u_x * scale_x - u_y * scale_y + shift_x + out_y = u_x * scale_y + u_y * scale_x + shift_y + return out_x, out_y + + def get_norm(self, + u: Tensor) -> Tensor: + r""" + Calculates norm of complex elements of an input tensor. + + Norm is a non-negative real number that is a characteristic of 'magnitude' of that number, i.e. how far away it + is from zero. The function implements the operation as: + + .. math:: + \text{out} = \sqrt{\text{Re(inp)}^2 + \text{Im(inp)}^2 + \delta}, + + where :math:`inp` is the complex input tensors and :math:`\delta` is a small positive constant, which is needed + to avoid division by zero in case statistical variance is close to zero. :math:`\text{Re(...)}` and + :math:`\text{Im(...)}` are respectively real and imaginary parts of the complex-valued expression inside + the parentheses. + + Args: + u (Tensor): Tensor of shape (2, *, ..., *). '2' denotes that the input tensor belongs to the complex domain + and has a real and an imaginary parts. + + Returns: + Tensor of shape (*, ..., *). The count and size of dimensions of the output tensor are the same ones as in + the input tensor, but without the very first dimension because the output tensor is real-valued. + """ + norm2 = self.get_square_norm(u) + eps = 1e-7 + out = ops.sqrt(norm2 + eps) + return out + + def get_square_norm(self, + u: Tensor) -> Tensor: + r""" + Calculates element-wise squared norm of complex elements of an input tensor. + + Norm is a non-negative real number that is a characteristic of 'magnitude' of that number, i.e. how far away it + is from zero. The function implements the operation as: + + .. math:: + \text{out} = \text{Re(inp)}^2 + \text{Im(inp)}^2, + + where :math:`inp` is the complex input tensors, :math:`\text{Re(...)}` and :math:`\text{Im(...)}` + are respectively real and imaginary parts of the complex-valued expression inside the parentheses. + + Args: + u (Tensor): Tensor of shape (2, *, ..., *). '2' denotes that the input tensor belongs to the complex domain + and has a real and an imaginary parts. + + Returns: + Tensor of shape (*, ..., *). The count and size of dimensions of the output tensor are the same ones as in + the input tensor, but without the very first dimension because the output tensor is real-valued. + """ + u_r, u_i = get_real_and_imag(u) + out = u_r ** 2 + u_i ** 2 + return out diff --git a/mindspore/python/mindspore/hypercomplex/complex/_complex_conv_impl.py b/mindspore/python/mindspore/hypercomplex/complex/_complex_conv_impl.py new file mode 100644 index 00000000000..c0329f57d55 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/complex/_complex_conv_impl.py @@ -0,0 +1,201 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""complex convolution implementation""" +import numbers +from typing import Callable, Tuple, Union + +from mindspore.common.tensor import Tensor +from mindspore.common.initializer import Initializer +from mindspore.hypercomplex.hypercomplex._hc_conv_impl import _BaseConvImpl as BaseConvImpl +from mindspore import ops as P + + +class _ConvImpl(BaseConvImpl): + r""" + The implementor class of the convolution layer for complex numbers. + + Applies complex-valued convolution transformation. This layer implements the operation as: + + .. math:: + \begin{align} + \text{Re(ccor)} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)}) + - \text{ccor}(\text{Im(kernel)}, \text{Im(inp)})\\ + \text{Im(ccor)} = \text{ccor}(\text{Im(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Re(kernel)}, \text{Im(inp)}) + \end{align} + + where :math:`ccor` is the real-valued `cross-correlation `_, + :math:`inp` is the complex-valued input tensors, :math:`\text{kernel}` is a complex weight matrix with the same + data type as the :math:`inp` created by the layer, :math:`\text{Re(...)}` and :math:`\text{Im(...)}` + are respectively real and imaginary parts of the complex-valued expression inside the parentheses. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and imaginary parts of the kernel. + + Inputs: + - **conv_op** (Callable) - the function of the real-valued convolution to be used for decomposition + of the complex convolution transformation. For example, mindspore.ops.operations.Conv2D(...) may be passed + - **real** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the real part of the input. + - **imag** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the imaginary part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(N, C_{out}, *, ..., *)` or :math:`(N, *, ..., *, C_{out})`, + which represents the real and the imaginary parts of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def construct(self, + conv_op: Callable, + real: Tensor, + imag: Tensor) -> Tuple[Tensor, Tensor]: + out_rr = conv_op(real, self.weight_x) + out_ii = conv_op(imag, self.weight_y) + out_ri = conv_op(real, self.weight_y) + out_ir = conv_op(imag, self.weight_x) + + out_r = out_rr - out_ii + out_i = out_ri + out_ir + + return out_r, out_i + + +class _KaratsubaConvImpl(BaseConvImpl): + r""" + The implementor class of the convolution layer for complex numbers. + + Applies complex-valued convolution transformation. This layer implements the operation as: + + .. math:: + \begin{align} + \text{C1} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)})\\ + \text{C2} = \text{ccor}(\text{Im(kernel)}, \text{Im(inp)})\\ + \text{C3} = \text{ccor}(\text{Re(kernel)} + \text{Im(kernel)}, \text{Re(inp)} + \text{Im(inp)})\\ + \text{Re(out)} = C1 - C2\\ + \text{Im(out)} = C3 - C1 - C2, + \end{align} + + where :math:`ccor` is the real-valued `cross-correlation `_, + :math:`inp` is the complex-valued input tensors, :math:`\text{kernel}` is a complex weight matrix with the same + data type as the :math:`inp` created by the layer, :math:`\text{Re(...)}` and :math:`\text{Im(...)}` + are respectively real and imaginary parts of the complex-valued expression inside the parentheses. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and imaginary parts of the kernel. + + Inputs: + - **conv_op** (Callable) - the function of the real-valued convolution to be used for decomposition + of the complex convolution transformation. For example, mindspore.ops.operations.Conv2D(...) may be passed + - **real** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the real part of the input. + - **imag** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the imaginary part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(N, C_{out}, *, ..., *)` or :math:`(N, *, ..., *, C_{out})`, + which represents the real and the imaginary parts of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def construct(self, + conv_op: Callable, + real: Tensor, + imag: Tensor) -> Tuple[Tensor, Tensor]: + c1 = conv_op(real, self.weight_x) + c2 = conv_op(imag, self.weight_y) + c3 = conv_op(real + imag, self.weight_x + self.weight_y) + + out_r = c1 - c2 + out_i = c3 - c1 - c2 + + return out_r, out_i + + +class _ReImConvImpl(BaseConvImpl): + r""" + The implementor class of the convolution layer for complex numbers. + + Applies complex-valued convolution transformation. This layer implements the operation as: + + .. math:: + \begin{align} + \text{inp_cat} = \text{cat}(\text{Re(inp)}, \text{Im(inp)}) \\ + \text{K1} = \text{cat}(\text{Re(kernel)}, \text{-Im(kernel)}) \\ + \text{K2} = \text{cat}(\text{Im(kernel)}, \text{Re(kernel)}) \\ + \text{Re(ccor)} = \text{ccor}(\text{K1}, \text{Re(inp_cat)}) \\ + \text{Im(ccor)} = \text{ccor}(\text{K2}, \text{Re(inp_cat)}) + \end{align} + + where :math:`ccor` is the real-valued `cross-correlation `_, + :math:`inp` is the complex-valued input tensors, :math:`\text{kernel}` is a complex weight matrix with the same + data type as the :math:`inp` created by the layer, :math:`\text{cat}` is concatenation along the channel axis, + :math:`\text{Re(...)}` and :math:`\text{Im(...)}` are respectively real and imaginary parts of the complex-valued + expression inside the parentheses. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and imaginary parts of the kernel. + factory_kwargs (dict): Additional parameters, which must include data_format. + + Inputs: + - **conv_op** (Callable) - the function of the real-valued convolution to be used for decomposition + of the complex convolution transformation. For example, mindspore.ops.operations.Conv2D(...) may be passed + - **real** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the real part of the input. + - **imag** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the imaginary part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(N, C_{out}, *, ..., *)` or :math:`(N, *, ..., *, C_{out})`, + which represents the real and the imaginary parts of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + weight_init: Union[Tensor, str, Initializer, numbers.Number], + weight_shape: tuple, + **factory_kwargs) -> None: + super(_ReImConvImpl, self).__init__(weight_init, weight_shape, **factory_kwargs) + data_format = factory_kwargs.get('data_format', 'nchw') + c_idx = data_format.lower().find('c') + if c_idx < 0: + raise ValueError(f"Data format {data_format} is unsupported") + self.concat = P.Concat(c_idx) + self.neg = P.Neg() + + def construct(self, + conv_op: Callable, + real: Tensor, + imag: Tensor) -> Tuple[Tensor, Tensor]: + + inp = self.concat([real, imag]) + weight_y_neg = self.neg(self.weight_y) + w1 = self.concat([self.weight_x, weight_y_neg]) + w2 = self.concat([self.weight_y, self.weight_x]) + out_r = conv_op(inp, w1) + out_i = conv_op(inp, w2) + return out_r, out_i diff --git a/mindspore/python/mindspore/hypercomplex/complex/_complex_dense_impl.py b/mindspore/python/mindspore/hypercomplex/complex/_complex_dense_impl.py new file mode 100644 index 00000000000..18bfad327a2 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/complex/_complex_dense_impl.py @@ -0,0 +1,125 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""complex dense implementation""" +from typing import Callable, Tuple + +from mindspore.common.tensor import Tensor +from mindspore.hypercomplex.hypercomplex._hc_dense_impl import _BaseDenseImpl as BaseDenseImpl + + +class _DenseImpl(BaseDenseImpl): + r""" + The implementor class of the dense connected layer for complex numbers. + + Applies complex-valued matrix multiplication for dense connected layer. This layer implements the operation as: + + .. math:: + \begin{align} + \text{Re(out)} = \text{Re(inp)} * \text{Re(kernel)} - \text{Im(inp)} * \text{Im(kernel)}\\ + \text{Im(out)} = \text{Re(inp)} * \text{Im(kernel)} + \text{Im(inp)} * \text{Re(kernel)}, + \end{align} + + where :math:`inp` is the complex input tensors, :math:`\text{kernel}` is a complex weight matrix with the same + data type as the :math:`inp` created by the layer, :math:`\text{Re(...)}` and :math:`\text{Im(...)}` + are respectively real and imaginary parts of the complex-valued expression inside the parentheses. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and imaginary parts of the kernel. + **factory_kwargs (dict): Extra parameters which may be needed by specific subclasses. + + Inputs: + - **matmul_op** (Callable) - the function of the real-valued matrix multiplication to be used for decomposition + of the complex linear transformation. Usually, mindspore.ops.operations.MatMul(...) is passed + - **real** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the real part of the input. + - **imag** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the imaginary part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(*, out\_channels)`, which represents the real and the imaginary + parts of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def construct(self, + matmul_op: Callable, + real: Tensor, + imag: Tensor) -> Tuple[Tensor, Tensor]: + out_rr = matmul_op(real, self.weight_x) + out_ii = matmul_op(imag, self.weight_y) + out_ri = matmul_op(real, self.weight_y) + out_ir = matmul_op(imag, self.weight_x) + + out_r = out_rr - out_ii + out_i = out_ri + out_ir + + return out_r, out_i + + +class _KaratsubaDenseImpl(BaseDenseImpl): + r""" + The implementor class of the dense connected layer for complex numbers. + + Applies complex-valued matrix multiplication for dense connected layer. This layer implements the operation as: + + .. math:: + \begin{align} + \text{L1} = \text{Re(inp)} * \text{Re(kernel)}\\ + \text{L2} = \text{Im(inp)} * \text{Im(kernel)}\\ + \text{L3} = (\text{Re(inp)} + \text{Im(inp)}) * (\text{Re(kernel)} + \text{Im(kernel)})\\ + \text{Re(out)} = L1 - L2\\ + \text{Im(out)} = L3 - L1 - L2, + \end{align} + + where :math:`inp` is the complex input tensors, :math:`\text{kernel}` is a complex weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a complex bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{Re(...)}` and + :math:`\text{Im(...)}` are respectively real and imaginary parts of the complex-valued expression inside + the parentheses. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and imaginary parts of the kernel. + **factory_kwargs (dict): Extra parameters which may be needed by specific subclasses. + + Inputs: + - **matmul_op** (Callable) - the function of the real-valued matrix multiplication to be used for decomposition + of the complex linear transformation. Usually, mindspore.ops.operations.MatMul(...) is passed + - **real** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the real part of the input. + - **imag** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the imaginary part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(*, out\_channels)`, which represents the real and the imaginary + parts of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def construct(self, + matmul_op: Callable, + real: Tensor, + imag: Tensor) -> Tuple[Tensor, Tensor]: + l1 = matmul_op(real, self.weight_x) + l2 = matmul_op(imag, self.weight_y) + l3 = matmul_op(real + imag, self.weight_x + self.weight_y) + + out_r = l1 - l2 + out_i = l3 - l1 - l2 + + return out_r, out_i diff --git a/mindspore/python/mindspore/hypercomplex/complex/complex_operators.py b/mindspore/python/mindspore/hypercomplex/complex/complex_operators.py new file mode 100644 index 00000000000..27e8701f675 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/complex/complex_operators.py @@ -0,0 +1,1118 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Complex operators""" +import numbers +from typing import Union + +from mindspore.common.initializer import Initializer +from mindspore.common.tensor import Tensor +# Batch Normalization +from mindspore.hypercomplex.hypercomplex.hc_bn import BatchNorm1d as HBatchNorm1d, \ + BatchNorm2d as HBatchNorm2d, BatchNorm3d as HBatchNorm3d +from mindspore.hypercomplex.complex._complex_bn_impl import _BatchNormImpl as BatchNormImpl +# Convolution +from mindspore.hypercomplex.hypercomplex.hc_conv import Conv1d as HConv1d, Conv2d as HConv2d, Conv3d as HConv3d +from mindspore.hypercomplex.complex._complex_conv_impl import _ReImConvImpl as ConvImpl, \ + _KaratsubaConvImpl as KaratsubaConvImpl +# Dense +from mindspore.hypercomplex.hypercomplex.hc_dense import Dense as HDense +from mindspore.hypercomplex.complex._complex_dense_impl import _DenseImpl as DenseImpl, \ + _KaratsubaDenseImpl as KaratsubaDenseImpl + +from mindspore.hypercomplex.hypercomplex.uniform_operator import _UniformOperator + +from mindspore.hypercomplex.utils import _size_1_t, _size_2_t, _size_3_t + + +class Conv2d(_UniformOperator): + r""" + 2D convolution layer on the complex-valued input. + + Calculates the 2D convolution on the input tensor which is typically of shape + :math:`(2, N, C_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C_{in}` is a number of channels, + :math:`H_{in}, W_{in}` are the height and width of the feature layer respectively. The formula is defined as: + + .. math:: + + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \text{hccor}({\text{weight}(C_{\text{out}_j}, k), \text{inp}(N_i, k)}) + + where :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of + the output and :math:`j` is in the range of :math:`[0, C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)` + is a convolution kernel slice with shape :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`, + where :math:`\text{kernel_size[0]}` and :math:`\text{kernel_size[1]}` are the height and width of the convolution + kernel respectively. :math:`\text{bias}` is the bias parameter and :math:`\text{inp}` is the input tensor. + In this case, `data_format` of the input tensor is 'NCHW' and the shape of full convolution kernel is + :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`, + where `group` is the number of groups to split the input `inp` in the channel dimension. If `data_format` of the + input tensor is 'NHWC', the shape of full convolution kernel will be + :math:`(C_{out}, \text{kernel_size[0]}, \text{kernel_size[1]}), C_{in} / \text{group}`. + :math:`hccor` is the complex-valued `cross-correlation `_. + If use_karatsuba is False, this implies the operation as: + + .. math:: + \text{Re(ccor)} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)}) + - \text{ccor}(\text{Im(kernel)}, \text{Im(inp)}) + \text{Re(bias)}\\ + \text{Im(ccor)} = \text{ccor}(\text{Im(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Re(kernel)}, \text{Im(inp)}) + \text{Im(bias)} + + And, if use_karatsuba is True then: + + .. math:: + \begin{align} + \text{C1} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)})\\ + \text{C2} = \text{ccor}(\text{Im(kernel)}, \text{Im(inp)})\\ + \text{C3} = \text{ccor}(\text{Re(kernel)} + \text{Im(kernel)}, \text{Re(inp)} + \text{Im(inp)})\\ + \text{Re(out)} = C1 - C2 + \text{Re(bias)}\\ + \text{Im(out)} = C3 - C1 - C2 + \text{Im(bias)}, + \end{align} + + where :math:`ccor` is the real-valued `cross-correlation `_, + :math:`inp` is the complex input tensors, :math:`\text{kernel}` is a complex weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a complex bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{Re(...)}` and + :math:`\text{Im(...)}` are respectively real and imaginary parts of the complex-valued expression + inside the parentheses. + + Both the operations with and without using Karatsuba's algorithm are mathematically identical, but Karatsuba's + method requires three real-valued convolutions instead of four, at the cost of increased number of additions + and subtractions. Hence, it can be effective in terms of time consumption, but only if input tensors and kernels + are of big size. + + For more details, please refers to the paper `Gradient Based Learning Applied to Document + Recognition `_. + + Note: + On Ascend platform, only group convolution in depthwise convolution scenarios is supported. + That is, when `group > 1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. + + 'NCHW' format is supported only with GPU target device as of now. + + Args: + in_channels (int): The channel number of the input tensor of the Conv2d layer. + out_channels (int): The channel number of the output tensor of the Conv2d layer. + kernel_size (Union[int, tuple[int]]): Specifies the height and width of the 2D convolution kernel. + The data type is an integer or a tuple of two integers. An integer represents the height + and width of the convolution kernel. A tuple of two integers represents the height + and width of the convolution kernel respectively. + stride (Union[int, tuple[int]]): The movement stride of the 2D convolution kernel. + The data type is an integer or a tuple of two integers. An integer represents the movement step size + in both height and width directions. A tuple of two integers represents the movement step size in the + height and width directions respectively. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (Union[int, tuple[int]]): The number of padding on the height and width directions of the input. + The data type is an integer or a tuple of four integers. If `padding` is an integer, + then the top, bottom, left, and right padding are all equal to `padding`. + If `padding` is a tuple of 4 integers, then the top, bottom, left, and right padding + is equal to `padding[0]`, `padding[1]`, `padding[2]`, and `padding[3]` respectively. + The value should be greater than or equal to 0. Default: 0. + dilation (Union[int, tuple[int]]): Dilation size of 2D convolution kernel. + The data type is an integer or a tuple of two integers. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` on the height and width directions is in range of [1, H] + and [1, W] respectively. Default: 1. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. If the group is equal to `in_channels` and `out_channels`, + this 2D convolution layer also can be called 2D depthwise convolution layer. Default: 1. + has_bias (bool): Whether the Conv2d layer has a bias parameter. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. + Default: 'NCHW'. + use_karatsuba (bool): Specifies whether the layer uses Karatsuba's algorithm for complex-valued multiplication. + Default: False + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, H_{in}, W_{in})` + or :math:`(2, N, H_{in}, W_{in}, C_{in})`, with float16 or float32 data type, or + :math:`(N, C_{in}, H_{in}, W_{in})` or :math:`(N, H_{in}, W_{in}, C_{in})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C_{out}, H_{out}, W_{out})` or + :math:`(2, N, H_{out}, W_{out}, C_{out})`, with float16 or float32 data type, or + :math:`(N, C_{out}, H_{out}, W_{out})` or :math:`(N, H_{out}, W_{out}, C_{out})`, with complex64 data type. + + pad_mode is 'same': + + .. math:: + \begin{array}{ll} \\ + H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[0]}}} \right \rceil \\ + W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[1]}}} \right \rceil \\ + \end{array} + + pad_mode is 'valid': + + .. math:: + \begin{array}{ll} \\ + H_{out} = \left \lceil{\frac{H_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) } + {\text{stride[0]}}} \right \rceil \\ + W_{out} = \left \lceil{\frac{W_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) } + {\text{stride[1]}}} \right \rceil \\ + \end{array} + + pad_mode is 'pad': + + .. math:: + \begin{array}{ll} \\ + H_{out} = \left \lfloor{\frac{H_{in} + padding[0] + padding[1] - (\text{kernel_size[0]} - 1) \times + \text{dilation[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} + padding[2] + padding[3] - (\text{kernel_size[1]} - 1) \times + \text{dilation[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\ + \end{array} + + Raises: + TypeError: If `in_channels`, `out_channels` or `group` is not an int. + TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int not a tuple. + TypeError: If any two of `inp`, `weight_init` and `bias_init` are Tensors of different data type. + ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + ValueError: If `padding` is a tuple whose length is not equal to 4. + ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0). + ValueError: If `data_format` is neither 'NCHW' not 'NHWC'. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.complex import Conv2d + >>> from mindspore import Tensor + >>> w = Tensor(np.random.random((2, 128, 3, 7, 7)).astype(np.float32)) + >>> b = Tensor(np.random.random((2, 128)).astype(np.float32)) + >>> net = Conv2d( + >>> in_channels=3, out_channels=128, kernel_size=7, stride=2, padding=3, + >>> pad_mode='pad', weight_init=w, bias_init=b, has_bias=True + >>> ) + >>> z = Tensor(np.random.random((2, 16, 3, 224, 224)).astype(np.float32)) + >>> out = net(z) + >>> print(out.shape) + (2, 16, 128, 112, 112) + """ + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: _size_2_t, + stride: _size_2_t = 1, + pad_mode: str = 'same', + padding: _size_2_t = 0, + dilation: _size_2_t = 1, + group: int = 1, + has_bias: bool = False, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + data_format: str = 'NCHW', + use_karatsuba: bool = False) -> None: + if use_karatsuba: + super(Conv2d, self).__init__(HConv2d, + KaratsubaConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init, + data_format=data_format) + else: + super(Conv2d, self).__init__(HConv2d, + ConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init, + data_format=data_format) + + +class Conv1d(_UniformOperator): + r""" + 1D convolution layer on the complex-valued input. + + Calculates the 1D convolution on the input tensor which is typically of shape :math:`(2, N, C_{in}, L_{in})`, + where :math:`N` is batch size, :math:`C_{in}` is a number of channels and :math:`L_{in}` is a length of sequence. + The formula is defined as: + + .. math:: + + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{inp}(N_i, k)}), + + where :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of + the output and :math:`j` is in the range of :math:`[0, C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)` + is a convolution kernel slice with shape :math:`\text{kernel_size}`, where :math:`\text{kernel_size}` + is the width of the convolution kernel. :math:`\text{bias}` is the bias parameter, + and :math:`\text{inp}` is the input tensor. The shape of full convolution kernel is + :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size})`, + where `group` is the number of groups to split the input `inp` in the channel dimension. + :math:`ccor` is the complex-valued `cross-correlation `_. + If use_karatsuba is False, this implies the operation as: + + .. math:: + \text{Re(ccor)} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)}) + - \text{ccor}(\text{Im(kernel)}, \text{Im(inp)}) + \text{Re(bias)}\\ + \text{Im(ccor)} = \text{ccor}(\text{Im(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Re(kernel)}, \text{Im(inp)}) + \text{Im(bias)} + + And, if use_karatsuba is True then: + + .. math:: + \begin{align} + \text{C1} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)})\\ + \text{C2} = \text{ccor}(\text{Im(kernel)}, \text{Im(inp)})\\ + \text{C3} = \text{ccor}(\text{Re(kernel)} + \text{Im(kernel)}, \text{Re(inp)} + \text{Im(inp)})\\ + \text{Re(out)} = C1 - C2 + \text{Re(bias)}\\ + \text{Im(out)} = C3 - C1 - C2 + \text{Im(bias)}, + \end{align} + + where and :math:`ccor` is the real-valued `cross-correlation `_, + :math:`inp` is the complex input tensors, :math:`\text{kernel}` is a complex weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a complex bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{Re(...)}` and + :math:`\text{Im(...)}` are respectively real and imaginary parts of the complex-valued expression inside + the parentheses. + + Both the operations with and without using Karatsuba's algorithm are mathematically identical, but Karatsuba's + method requires three real-valued convolutions instead of four, at the cost of increased number of additions + and subtractions. Hence, it can be effective in terms of time consumption, but only if input tensors and kernels + are of big size. + + For more details, please refers to the paper `Gradient Based Learning Applied to Document + Recognition `_. + + Note: + On Ascend platform, only group convolution in depthwise convolution scenarios is supported. + That is, when `group > 1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. + + Args: + in_channels (int): The channel number of the input tensor of the Conv1d layer. + out_channels (int): The channel number of the output tensor of the Conv1d layer. + kernel_size (int): Specifies the width of the 1D convolution kernel. + stride (int): The movement stride of the 1D convolution kernel. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (int): The number of padding on both sides of input. + The value should be greater than or equal to 0. Default: 0. + dilation (int): Dilation size of 1D convolution kernel. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` is in range of [1, L]. Default: 1. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. Default: 1. + has_bias (bool): Whether the Conv1d layer has a bias parameter. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + use_karatsuba (bool): Specifies whether the layer uses Karatsuba's algorithm for complex-valued multiplication. + Default: False + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, L_{in})`, with float16 or float32 data type, or + :math:`(N, C_{out}, L_{out})` with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C_{out}, L_{out})`, with float16 or float32 + data type, or :math:`(N, C_{out}, L_{out})`, with complex64 data type. + + pad_mode is 'same': + + .. math:: + L_{out} = \left \lceil{\frac{L_{in}}{\text{stride}}} \right \rceil + + pad_mode is 'valid': + + .. math:: + L_{out} = \left \lceil{\frac{L_{in} - \text{dilation} \times (\text{kernel_size} - 1) } + {\text{stride}}} \right \rceil + + pad_mode is 'pad': + + .. math:: + L_{out} = \left \lfloor{\frac{L_{in} + 2 \times padding - (\text{kernel_size} - 1) \times + \text{dilation} - 1 }{\text{stride}} + 1} \right \rfloor + + Raises: + TypeError: If `in_channels`, `out_channels`, `kernel_size`, `stride`, `padding` or `dilation` is not an int. + TypeError: If any two of `inp`, `weight_init` and `bias_init` are Tensors of different data type. + ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.complex import Conv1d + >>> from mindspore import Tensor + >>> w = Tensor(np.random.random((2, 16, 1, 6)).astype(np.float32)) + >>> b = Tensor(np.random.random((2, 16)).astype(np.float32)) + >>> net = Conv1d( + >>> in_channels=1, out_channels=16, kernel_size=6, stride=2, padding=2, + >>> pad_mode='pad', weight_init=w, bias_init=b, has_bias=True + >>> ) + >>> z = Tensor(np.random.random((2, 8, 1, 4096)).astype(np.float32)) + >>> out = net(z) + >>> print(out.shape) + (2, 8, 16, 2048) + """ + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: _size_1_t, + stride: _size_1_t = 1, + pad_mode: str = 'same', + padding: _size_1_t = 0, + dilation: _size_1_t = 1, + group: int = 1, + has_bias: bool = False, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + use_karatsuba: bool = False) -> None: + if use_karatsuba: + super(Conv1d, self).__init__(HConv1d, + KaratsubaConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init) + else: + super(Conv1d, self).__init__(HConv1d, + ConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init) + + +class Conv3d(_UniformOperator): + r""" + 3D convolution layer on the complex-valued input. + + Calculates the 3D convolution on the input tensor which is typically of shape + :math:`(2, N, C_{in}, D_{in}, H_{in}, W_{in})`, + where :math:`N` is batch size, :math:`C_{in}` is a number of channels, + :math:`D_{in}, H_{in}, W_{in}` are the depth, height and width of the feature layer respectively. + The formula is defined as: + + .. math:: + + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{inp}(N_i, k)}) + + where :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of + the output and :math:`j` is in the range of :math:`[0,C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)` + is a convolution kernel slice with shape + :math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`, + where :math:`\text{kernel_size[0]}`, :math:`\text{kernel_size[1]}` and :math:`\text{kernel_size[2]}` are + the depth, height and width of the convolution kernel respectively. :math:`\text{bias}` is the bias parameter + and :math:`\text{inp}` is the input tensor. + The shape of full convolution kernel is + :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`, + where `group` is the number of groups to split the input `x` in the channel dimension. + :math:`hccor` is the complex-valued `cross-correlation `_. + If use_karatsuba is False, this implies the operation as: + + .. math:: + \text{Re(ccor)} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)}) + - \text{ccor}(\text{Im(kernel)}, \text{Im(inp)}) + \text{Re(bias)}\\ + \text{Im(ccor)} = \text{ccor}(\text{Im(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Re(kernel)}, \text{Im(inp)}) + \text{Im(bias)} + + And, if use_karatsuba is True then: + + .. math:: + \begin{align} + \text{C1} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)})\\ + \text{C2} = \text{ccor}(\text{Im(kernel)}, \text{Im(inp)})\\ + \text{C3} = \text{ccor}(\text{Re(kernel)} + \text{Im(kernel)}, \text{Re(inp)} + \text{Im(inp)})\\ + \text{Re(out)} = C1 - C2 + \text{Re(bias)}\\ + \text{Im(out)} = C3 - C1 - C2 + \text{Im(bias)}, + \end{align} + + where and :math:`ccor` is the real-valued `cross-correlation `_, + :math:`inp` is the complex input tensors, :math:`\text{kernel}` is a complex weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a complex bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{Re(...)}` + and :math:`\text{Im(...)}` are respectively real and imaginary parts of the complex-valued expression inside + the parentheses. + + Both the operations with and without using Karatsuba's algorithm are mathematically identical, but Karatsuba's + method requires three real-valued convolutions instead of four, at the cost of increased number of additions + and subtractions. Hence, it can be effective in terms of time consumption, but only if input tensors and kernels + are of big size. + + For more details, please refers to the paper `Gradient Based Learning Applied to Document + Recognition `_. + + Note: + On Ascend platform, only group convolution in depthwise convolution scenarios is supported. + That is, when `group>1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. + + Args: + in_channels (int): The channel number of the input tensor of the Conv3d layer. + out_channels (int): The channel number of the output tensor of the Conv3d layer. + kernel_size (Union[int, tuple[int]]): Specifies the depth, height and width of the 3D convolution kernel. + The data type is an integer or a tuple of three integers. An integer represents the depth, height + and width of the convolution kernel. A tuple of three integers represents the depth, height + and width of the convolution kernel respectively. + stride (Union[int, tuple[int]]): The movement stride of the 3D convolution kernel. + The data type is an integer or a tuple of three integers. An integer represents the movement step size + in depth, height and width directions. A tuple of three integers represents the movement step size + in the depth, height and width directions respectively. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (Union(int, tuple[int])): The number of padding on the depth, height and width directions of the input. + The data type is an integer or a tuple of six integers. If `padding` is an integer, + then the head, tail, top, bottom, left, and right padding are all equal to `padding`. + If `padding` is a tuple of six integers, then the head, tail, top, bottom, left, and right padding + is equal to `padding[0]`, `padding[1]`, `padding[2]`, `padding[3]`, `padding[4]` and `padding[5]` + respectively. The value should be greater than or equal to 0. Default: 0. + dilation (Union[int, tuple[int]]): Dilation size of 3D convolution kernel. + The data type is an integer or a tuple of three integers. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` on the depth, height and width directions is in range of + [1, D], [1, H] and [1, W] respectively. Default: 1. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. Default: 1. Only 1 is currently supported. + has_bias (bool): Whether the Conv3d layer has a bias parameter. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + data_format (str): The optional value for data format. Currently only support "NCDHW". + use_karatsuba (bool): Specifies whether the layer uses Karatsuba's algorithm for complex-valued multiplication. + Default: False + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, D_{in}, H_{in}, W_{in})`, with float16 or float32 + data type, or :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C_{out}, D_{out}, H_{out}, W_{out})`, with + float16 or float32 data type, or :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`, with complex64 data type. + + pad_mode is 'same': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lceil{\frac{D_{in}}{\text{stride[0]}}} \right \rceil \\ + H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[1]}}} \right \rceil \\ + W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\ + \end{array} + + + pad_mode is 'valid': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lfloor{\frac{D_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) } + {\text{stride[0]}} + 1} \right \rfloor \\ + H_{out} = \left \lfloor{\frac{H_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) } + {\text{stride[1]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} - \text{dilation[2]} \times (\text{kernel_size[2]} - 1) } + {\text{stride[2]}} + 1} \right \rfloor \\ + \end{array} + + pad_mode is 'pad': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lfloor{\frac{D_{in} + padding[0] + padding[1] - (\text{dilation[0]} - 1) \times + \text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\ + H_{out} = \left \lfloor{\frac{H_{in} + padding[2] + padding[3] - (\text{dilation[1]} - 1) \times + \text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} + padding[4] + padding[5] - (\text{dilation[2]} - 1) \times + \text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\ + \end{array} + + Raises: + TypeError: If `in_channels`, `out_channels` or `group` is not an int. + TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int nor a tuple. + ValueError: If `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + ValueError: If `padding` is a tuple whose length is not equal to 6. + ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0, 0, 0). + ValueError: If `data_format` is not 'NCDHW'. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.complex import Conv3d + >>> from mindspore import Tensor + >>> w = Tensor(np.random.random((2, 128, 3, 3, 3, 3)).astype(np.float32)) + >>> b = Tensor(np.random.random((2, 128)).astype(np.float32)) + >>> net = Conv3d( + >>> in_channels=3, out_channels=128, kernel_size=3, stride=1, padding=1, + >>> pad_mode='pad', weight_init=w, bias_init=b, has_bias=True + >>> ) + >>> z = Tensor(np.random.random((2, 64, 3, 32, 32, 32)).astype(np.float32)) + >>> out = net(z) + >>> print(out.shape) + (2, 64, 128, 32, 32, 32) + """ + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: _size_3_t, + stride: _size_3_t = 1, + pad_mode: str = 'same', + padding: _size_3_t = 0, + dilation: _size_3_t = 1, + group: int = 1, + has_bias: bool = False, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + data_format: str = 'NCDHW', + use_karatsuba: bool = False) -> None: + if use_karatsuba: + super(Conv3d, self).__init__(HConv3d, + KaratsubaConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init, + data_format=data_format) + else: + super(Conv3d, self).__init__(HConv3d, + ConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init, + data_format=data_format) + + +class BatchNorm1d(_UniformOperator): + r""" + The complex-valued Batch Normalization layer over a second-order complex input of four dimensions including + one spatial dimension, or three dimensions. + + This layer applies Batch Normalization over a complex input to reduce internal covariate shift. + Batch Normalization is widely used in convolutional networks. It rescales and recenters the feature using + a mini-batch of data and the learned parameters which can be described by the following formula: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + \hat{y} = \frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}\\ + \text{Re(\hat{out})} = \text{Re(out)} * \text{Re(\gamma)} + - \text{Im(out)} * \text{Im(\gamma)} + \text{Re(\beta)}\\ + \text{Im(\hat{out})} = \text{Re(out)} * \text{Im(\gamma)} + + \text{Im(out)} * \text{Re(\gamma)} + \text{Im(\beta)}, + \end{align} + + where :math:`inp` is the complex input tensors, :math:`\mathrm{E}[inp]` is the arithmetic mean of the input tensor + over the batch dimension, :math:`\mathrm{Var}[inp]` is the statistical variance of the input tensor over spatial + dimensions, based on the complex norm :math:`\|x+iy\|=\sqrt{x^2+y^2}`, :math:`\gamma` and :math:`\beta` are complex + learnable parameters representing the scale and shift coefficients respectively, and :math:`\delta` is a small + positive constant, which is needed to avoid division by zero in case statistical variance is close to zero. + :math:`\text{Re(...)}` and :math:`\text{Im(...)}` are respectively real and imaginary parts of the complex-valued + expression inside the parentheses. + + Args: + num_features (int): The number of features in the input space. + eps (float): A small positive threshold, which is needed to avoid division by zero. Default: :math:`10^{-5}` + momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. + Default: 0.9. + affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + use_batch_statistics (bool): + + - If True, use the mean value and variance value of current batch data and track running mean + and running variance. + - If False, use the mean value and variance value of specified value, and not track statistical value. + - If None, the use_batch_statistics is automatically set to True or False according to the training + and evaluation mode. During training, the parameter is set to True, and during evaluation, the + parameter is set to False. Default: None. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, W)` or :math:`(2, N, C)`, with float16 or float32 data + type, or :math:`(N, C, W)` or :math:`(N, C)`, with complex64 data type. In the former case '2' denotes that + the input tensor belongs to the complex domain and has got a real and an imaginary parts. The `num_features` + in `Args` has to be equal to :math:`C` in `inp`. + + Outputs: + Tensor, the normalized, scaled, offset tensor of the same data type and shape as :math:`u`: + :math:`(2, N, C, W)` or :math:`(2, N, C)`, with float16 or float32 data type, or :math:`(N, C, W)` + or :math:`(N, C)`, with complex64 data type. + + Raises: + TypeError: If `num_features` is not an int. + TypeError: If `eps` is not a float. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If `num_features` is less than 1. + ValueError: If `momentum` is not in range [0, 1]. + ValueError: if 'inp' is not a Tensor of 3 or 4 dimensions. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.complex import BatchNorm1d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32)).astype(np.float32)) + >>> bn = BatchNorm1d(64) + >>> y = bn(u) + >>> print(y.shape) + (2, 8, 64, 32) + """ + + def __init__(self, + num_features: int, + eps: float = 1e-5, + momentum: float = 0.9, + affine: bool = True, + gamma_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + beta_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_mean_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_var_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + use_batch_statistics: bool = True) -> None: + super(BatchNorm1d, self).__init__(HBatchNorm1d, + BatchNormImpl, + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + gamma_init=gamma_init, + beta_init=beta_init, + moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, + use_batch_statistics=use_batch_statistics) + + +class BatchNorm2d(_UniformOperator): + r""" + The complex-valued Batch Normalization layer over a second-order complex input of five dimensions, including + two spatial dimensions. + + This layer applies Batch Normalization over a complex input to reduce internal covariate shift. + Batch Normalization is widely used in convolutional networks. It rescales and recenters the feature using + a mini-batch of data and the learned parameters which can be described by the following formula: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + out = \frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}\\ + \text{Re(\hat{out})} = \text{Re(out)} * \text{Re(\gamma)}\ + - \text{Im(out)} * \text{Im(\gamma)} + \text{Re(\beta)}\\ + \text{Im(\hat{out})} = \text{Re(out)} * \text{Im(\gamma)} + + \text{Im(out)} * \text{Re(\gamma)} + \text{Im(\beta)}, + \end{align} + + where :math:`inp` is the complex input tensors, :math:`\mathrm{E}[inp]` is the arithmetic mean of the input tensor + over the batch dimension, :math:`\mathrm{Var}[inp]` is the statistical variance of the input tensor over spatial + dimensions, based on the complex norm :math:`\|x+iy\|=\sqrt{x^2+y^2}`, :math:`\gamma` and :math:`\beta` are complex + learnable parameters representing the scale and shift coefficients respectively, and :math:`\delta` is a small + positive constant, which is needed to avoid division by zero in case statistical variance is close to zero. + :math:`\text{Re(...)}` and :math:`\text{Im(...)}` are respectively real and imaginary parts of the complex-valued + expression inside the parentheses. + + Args: + num_features (int): The number of features in the input space. + eps (float): A small positive threshold, which is needed to avoid division by zero. Default: :math:`10^{-5}` + momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. + Default: 0.9. + affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + use_batch_statistics (bool): + + - If True, use the mean value and variance value of current batch data and track running mean + and running variance. + - If False, use the mean value and variance value of specified value, and not track statistical value. + - If None, the use_batch_statistics is automatically set to True or False according to the training + and evaluation mode. During training, the parameter is set to True, and during evaluation, the + parameter is set to False. Default: None. + data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. Default: 'NCHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, H, W)` if data_format is 'NCHW', or + :math:`(2, N, H, W, C)` if data_format is 'NHWC', with float16 or float32 data type. '2' denotes that + the input tensor belongs to the complex domain and has got a real and an imaginary parts. Or, + :math:`(N, C, H, W)` if data_format is 'NCHW', or :math:`(N, H, W, C)` if data_format is 'NHWC', + with complex64 data type. The `num_features` in `Args` has to be equal to :math:`C` in `inp`. + + Outputs: + Tensor, the normalized, scaled, offset tensor of the same data type and shape as :math:`u`: + :math:`(2, N, C, H, W)` if data_format is 'NCHW', or :math:`(2, N, H, W, C)` if data_format is 'NHWC', + with float16 or float32 data type, or :math:`(N, C, W)`. Or, :math:`(N, C, H, W)` if data_format is 'NCHW', + or :math:`(N, H, W, C)` if data_format is 'NHWC', with complex64 data type. + + Raises: + TypeError: If `num_features` is not an int. + TypeError: If `eps` is not a float. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If `num_features` is less than 1. + ValueError: If `momentum` is not in range [0, 1]. + ValueError: If `data_format` is neither 'NHWC' not 'NCHW'. + ValueError: if 'inp' is not a Tensor of 5 dimensions. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.complex import BatchNorm2d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32, 32)).astype(np.float32)) + >>> bn = BatchNorm2d(64) + >>> y = bn(u) + >>> print(y.shape) + (2, 8, 64, 32, 32) + """ + + def __init__(self, + num_features: int, + eps: float = 1e-5, + momentum: float = 0.9, + affine: bool = True, + gamma_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + beta_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_mean_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_var_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + use_batch_statistics: bool = True, + data_format='NCHW') -> None: + super(BatchNorm2d, self).__init__(HBatchNorm2d, + BatchNormImpl, + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + gamma_init=gamma_init, + beta_init=beta_init, + moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, + use_batch_statistics=use_batch_statistics, + data_format=data_format) + + +class BatchNorm3d(_UniformOperator): + r""" + The complex-valued Batch Normalization layer over a second-order complex input of six dimensions, including + three spatial dimensions. + + This layer applies Batch Normalization over a complex input to reduce internal covariate shift. + Batch Normalization is widely used in convolutional networks. It rescales and recenters the feature + using a mini-batch of data and the learned parameters which can be described by the following formula: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + out = \frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}\\ + \text{Re(\hat{out})} = \text{Re(out)} * \text{Re(\gamma)} + - \text{Im(out)} * \text{Im(\gamma)} + \text{Re(\beta)}\\ + \text{Im(\hat{out})} = \text{Re(out)} * \text{Im(\gamma)} + + \text{Im(out)} * \text{Re(\gamma)} + \text{Im(\beta)}, + \end{align} + + where :math:`inp` is the complex input tensors, :math:`\mathrm{E}[inp]` is the arithmetic mean of the input tensor + over the batch dimension, :math:`\mathrm{Var}[inp]` is the statistical variance of the input tensor over spatial + dimensions, based on the complex norm :math:`\|x+iy\|=\sqrt{x^2+y^2}`, :math:`\gamma` and :math:`\beta` are complex + learnable parameters representing the scale and shift coefficients respectively, and :math:`\delta` is a small + positive constant, which is needed to avoid division by zero in case statistical variance is close to zero. + :math:`\text{Re(...)}` and :math:`\text{Im(...)}` are respectively real and imaginary parts of the complex-valued + expression inside the parentheses. + + Args: + num_features (int): The number of features in the input space. + eps (float): A small positive threshold, which is needed to avoid division by zero. Default: :math:`10^{-5}` + momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. + Default: 0.9. + affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + use_batch_statistics (bool): + + - If True, use the mean value and variance value of current batch data and track running mean + and running variance. + - If False, use the mean value and variance value of specified value, and not track statistical value. + - If None, the use_batch_statistics is automatically set to True or False according to the training + and evaluation mode. During training, the parameter is set to True, and during evaluation, the + parameter is set to False. Default: None. + data_format (str): The optional value for data format. Only 'NCDHW' format is supported as of now. + Default: 'NCDHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, D, H, W)`. '2' denotes that the input tensor belongs + to the complex domain and has got a real and an imaginary parts. The `num_features` in `Args` has to be equal + to :math:`C` in `Inputs`. + + Outputs: + Tensor, the normalized, scaled, offset tensor of the same shape :math:`(2, N, C, D, H, W)` as :math:`u`. + + Raises: + TypeError: If `num_features` is not an int. + TypeError: If `eps` is not a float. + ValueError: If `num_features` is less than 1. + ValueError: If `momentum` is not in range [0, 1]. + ValueError: If `data_format` is not 'NCDHW'. + ValueError: if 'inp' is not a Tensor of 6 dimensions. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.complex import BatchNorm3d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32, 32, 32)).astype(np.float32)) + >>> bn = BatchNorm3d(64) + >>> y = bn(u) + >>> print(y.shape) + (2, 8, 64, 32, 32, 32) + """ + + def __init__(self, + num_features: int, + eps: float = 1e-5, + momentum: float = 0.9, + affine: bool = True, + gamma_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + beta_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_mean_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_var_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + use_batch_statistics: bool = True, + data_format='NCDHW') -> None: + super(BatchNorm3d, self).__init__(HBatchNorm3d, + BatchNormImpl, + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + gamma_init=gamma_init, + beta_init=beta_init, + moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, + use_batch_statistics=use_batch_statistics, + data_format=data_format) + + +class Dense(_UniformOperator): + r""" + The complex-valued dense connected layer. + + Applies dense connected layer for the complex-valued input. If use_karatsuba is False, this layer implements + the operation as: + + .. math:: + \text{Re(out)} = \text{Re(inp)} * \text{Re(kernel)} - \text{Im(inp)} * \text{Im(kernel)} + \text{Re(bias)} + \text{Im(out)} = \text{Re(inp)} * \text{Im(kernel)} + \text{Im(inp)} * \text{Re(kernel)} + \text{Im(bias)}, + + And, if use_karatsuba is True then: + + .. math:: + \begin{align} + \text{L1} = \text{Re(inp)} * \text{Re(kernel)}\\ + \text{L2} = \text{Im(inp)} * \text{Im(kernel)}\\ + \text{L3} = (\text{Re(inp)} + \text{Im(inp)}) * (\text{Re(kernel)} + \text{Im(kernel)})\\ + \text{Re(out)} = L1 - L2 + \text{Re(bias)}\\ + \text{Im(out)} = L3 - L1 - L2 + \text{Im(bias)}, + \end{align} + + where :math:`inp` is the complex input tensors, :math:`\text{kernel}` is a complex weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a complex bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{Re(...)}` and + :math:`\text{Im(...)}` are respectively real and imaginary parts of the complex-valued expression inside + the parentheses. + + Both the operations with and without using Karatsuba's algorithm are mathematically identical, but Karatsuba's + method requires three matrix multiplications instead of four, at the cost of increased number of additions and + subtractions. Hence, it can be effective in terms of time consumption, but only if input tensors and kernels are + of big size. + + Args: + in_channels (int): The number of channels in the input space. + out_channels (int): The number of channels in the output space. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is + same as `inp`. The values of str refer to the function `initializer`. Default: 'zeros'. + has_bias (bool): Specifies whether the layer uses a bias vector. Default: True. + use_karatsuba (bool): Specifies whether the layer uses Karatsuba's algorithm for complex-valued multiplication. + Default: False + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, *, ..., *, in\_channels)`, with float16 or float32 data type, + or :math:`(*, ..., *, in\_channels)`, with complex64 data type. In the former case '2' denotes that the input + tensor belongs to the complex domain and has got a real and an imaginary parts. The `in_channels` in `Args` + has to be equal to :math:`in\_channels` in `Inputs`. The count of mediator dimensions denoted by '*' + is arbitrary but must be at least one. + + Outputs: + Tensor of the same data type as 'inp' and of shape :math:`(2, *, ..., *, out\_channels)`, with float16 or + float32 data type, or :math:`(*, ..., *, out\_channels)`, with complex64 data type. The count of mediator + dimensions is the same as one in 'Inputs'. + + Raises: + TypeError: If `in_channels` or `out_channels` is not an int. + TypeError: If `has_bias` is not a bool. + TypeError: If any two of `inp`, `weight_init` and `bias_init` are Tensors of different data type. + ValueError: If length of shape of `weight_init` is not equal to 3, + or shape[0] of 'weight_init' is not equal to 2, + or shape[1] of `weight_init` is not equal to `out_channels`, + or shape[2] of `weight_init` is not equal to `in_channels`. + ValueError: If length of shape of `bias_init` is not equal to 2, + or shape[0] of 'bias_init' is not equal to 2, + or shape[1] of `bias_init` is not equal to `out_channels`. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.complex import Dense + >>> from mindspore import Tensor + >>> w = Tensor(np.random.random((2, 7, 5)).astype(np.float32)) + >>> b = Tensor(np.random.random((2, 7)).astype(np.float32)) + >>> net = Dense(in_channels=5, out_channels=7, weight_init=w, bias_init=b, has_bias=True) + >>> z = Tensor(np.random.random((2, 34, 1, 5)).astype(np.float32)) + >>> out = net(z) + >>> print(out.shape) + (2, 34, 1, 7) + """ + + def __init__(self, + in_channels: int, + out_channels: int, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + has_bias: bool = True, + use_karatsuba: bool = False) -> None: + if use_karatsuba: + super(Dense, self).__init__(HDense, + KaratsubaDenseImpl, + in_channels=in_channels, + out_channels=out_channels, + weight_init=weight_init, + bias_init=bias_init, + has_bias=has_bias) + else: + super(Dense, self).__init__(HDense, + DenseImpl, + in_channels=in_channels, + out_channels=out_channels, + weight_init=weight_init, + bias_init=bias_init, + has_bias=has_bias) diff --git a/mindspore/python/mindspore/hypercomplex/complex/complex_relu.py b/mindspore/python/mindspore/hypercomplex/complex/complex_relu.py new file mode 100644 index 00000000000..3a7cf95a46f --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/complex/complex_relu.py @@ -0,0 +1,62 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""complex ReLU implementation""" +import mindspore +from mindspore import nn, Tensor +from mindspore.ops import operations as P +from mindspore.hypercomplex.utils import get_x_and_y as get_real_and_imag, to_2channel as to_complex + + +class ReLU(nn.Cell): + r""" + Rectified Linear Unit activation function for complex-valued input. + + Applies ReLU activation layer for the complex-valued input. This layer applies the element-wise + :math:`\max(0, x)` for both real and imaginary parts of the input tensor independently: + + .. math:: + \begin{align} + \text{Re(out)} = (Re(inp))^+ = \max(0, Re(inp))\\ + \text{Im(out)} = (Im(inp))^+ = \max(0, Im(inp)), + \end{align} + + Inputs: + - **inp** (Tensor) - The input of ReLU is a Tensor of shape (2, *, ..., *), with float16 or float32 data type, + or (*, ..., *), with complex64 data type. + + Outputs: + Tensor, with the same data type and shape as the `inp`. + + Raises: + TypeError: If dtype of `inp` is not float16, float32, or complex64. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self): + """Initialize ReLU.""" + super(ReLU, self).__init__() + self.relu = P.ReLU() + + def construct(self, u: Tensor) -> Tensor: + if u.dtype == mindspore.complex64: + real, imag = get_real_and_imag(u) + real = self.relu(real) + imag = self.relu(imag) + out = to_complex(real, imag, u.dtype) + else: + out = self.relu(u) + return out diff --git a/mindspore/python/mindspore/hypercomplex/double/__init__.py b/mindspore/python/mindspore/hypercomplex/double/__init__.py new file mode 100644 index 00000000000..ee534519089 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/double/__init__.py @@ -0,0 +1,23 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Double Operators""" +from mindspore.hypercomplex.double.double_operators import Conv1d, Conv2d, Conv3d +from mindspore.hypercomplex.double.double_operators import BatchNorm1d, BatchNorm2d, BatchNorm3d +from mindspore.hypercomplex.double.double_operators import Dense +from mindspore.hypercomplex.double.double_operators import ReLU + +from mindspore.hypercomplex.hypercomplex.hc_pool import MaxPool1d, MaxPool2d, \ + AvgPool1d, AvgPool2d, AdaptiveAvgPool1d, AdaptiveAvgPool2d, \ + AdaptiveAvgPool3d, AdaptiveMaxPool1d, AdaptiveMaxPool2d diff --git a/mindspore/python/mindspore/hypercomplex/double/_double_bn_impl.py b/mindspore/python/mindspore/hypercomplex/double/_double_bn_impl.py new file mode 100644 index 00000000000..55e2e4bece8 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/double/_double_bn_impl.py @@ -0,0 +1,256 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Double BatchNorm implementation""" +from typing import Tuple + +import mindspore.ops as ops +from mindspore.common.tensor import Tensor +from mindspore.hypercomplex.hypercomplex._hc_bn_impl import _BaseBatchNormImpl as HCBatchNormImpl +from mindspore.hypercomplex.utils import get_x_and_y + +get_real_and_double = get_x_and_y +get_u1_and_u2 = get_x_and_y + + +class _BatchNormImpl(HCBatchNormImpl): + r""" + The implementor class of the Batch Normalization layer for double numbers in regular representation. + + Implements the functionality specific to double numbers and needed by the 'BatchNorm' class. This includes: + getting the norm of double number, applying scaling and shift to a double-valued tensor, and updating the running + mean and variance, which are used during inference. + + Args: + affine (bool) - A bool value. When set to True, gamma and beta can be learned. + use_batch_statistics (bool): If true, use the mean value and variance value of current batch data. If false, + use the mean value and variance value of specified value. If None, the training process will use the mean + and variance of current batch data and track the running mean and variance, the evaluation process will use + the running mean and variance. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. + num_features (int): The number of features in the input space. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def scale_and_shift(self, + u_x: Tensor, + u_y: Tensor, + scale_x: Tensor, + scale_y: Tensor, + shift_x: Tensor, + shift_y: Tensor) -> Tuple[Tensor, Tensor]: + r""" + Applies double scaling and shift to an input tensor in regular representation. + + This function implements the operation as: + + .. math:: + \begin{align} + \text{Re(out)} = \text{Re(inp)} * \text{Re(scale)} + \text{Db(inp)} * \text{Db(scale)} + \text{Re(shift)}\\ + \text{Db(out)} = \text{Re(inp)} * \text{Db(scale)} + \text{Db(inp)} * \text{Re(scale)} + \text{Db(shift)}, + \end{align} + + where :math:`inp` is the double input tensors, :math:`scale` and :math:`shift` are double parameters + representing the scaling and shift coefficients respectively. :math:`\text{Re(...)}` and :math:`\text{Db(...)}` + are respectively real and double parts of the double-valued expression inside the parentheses. + + Args: + u_x (Tensor): A tensor of shape (C,), which represents the real part of the normalized inputs. + u_y (Tensor): A tensor of shape (C,), which represents the double part of the normalized inputs. + scale_x (Tensor): A tensor of shape (C,), which represents the real part of the scaling vector. + scale_y (Tensor): A tensor of shape (C,), which represents the double part of the scaling vector. + shift_x (Tensor): A tensor of shape (C,), which represents the real part of the bias vector. + shift_y (Tensor): A tensor of shape (C,), which represents the double part of the bias vector. + + Returns: + Tuple of two tensors of shape (C,), which contains the real and the double parts of rescaled and + recentered inputs. + """ + out_x = u_x * scale_x + u_y * scale_y + shift_x + out_y = u_x * scale_y + u_y * scale_x + shift_y + return out_x, out_y + + def get_norm(self, u: Tensor) -> Tensor: + r""" + Calculates norm of double elements of an input tensor in regular representation. + + Norm is a non-negative real number that is a characteristic of 'magnitude' of that number, i.e. how far away it + is from zero. The function implements the operation as: + + .. math:: + \text{out} = |Re(inp)| + |Db(inp)|, + + where :math:`inp` is the double input tensors, :math:`\text{Re(...)}` and :math:`\text{Db(...)}` + are respectively real and double parts of the double-valued expression inside the parentheses. + + Args: + u (Tensor): Tensor of shape (2, *, ..., *). '2' denotes that the input tensor belongs to the double domain + and has two components. + + Returns: + Tensor of shape (*, ..., *). The count and size of dimensions of the output tensor are the same ones as in + the input tensor, but without the very first dimension because the output tensor is real-valued. + """ + u_r, u_d = get_real_and_double(u) + abs_op = ops.Abs() + out = abs_op(u_r) + abs_op(u_d) + return out + + def get_square_norm(self, u: Tensor) -> Tensor: + r""" + Calculates element-wise squared norm of double elements of an input tensor in regular representation. + + Norm is a non-negative real number that is a characteristic of 'magnitude' of that number, i.e. how far away it + is from zero. The function implements the operation as: + + .. math:: + \text{out} = \left(|Re(inp)| + |Db(inp)|\right)^2, + + where :math:`inp` is the double input tensors, :math:`\text{Re(...)}` and :math:`\text{Du(...)}` + are respectively real and double parts of the double-valued expression inside the parentheses. + + Args: + u (Tensor): Tensor of shape (2, *, ..., *). '2' denotes that the input tensor belongs to the double domain + and has a real and a double parts. + + Returns: + Tensor of shape (*, ..., *). The count and size of dimensions of the output tensor are the same ones as in + the input tensor, but without the very first dimension because the output tensor is real-valued. + """ + norm = self.get_norm(u) + out = norm ** 2 + return out + + +class _J1J2BatchNormImpl(HCBatchNormImpl): + r""" + The implementor class of the Batch Normalization layer for double numbers in diagonal representation. + + Implements the functionality specific to double numbers and needed by the 'BatchNorm' class. This includes: + getting the norm of double number, applying scaling and shift to a double-valued tensor, and updating the running + mean and variance, which are used during inference. + + Args: + affine (bool) - A bool value. When set to True, gamma and beta can be learned. + use_batch_statistics (bool): If true, use the mean value and variance value of current batch data. If false, + use the mean value and variance value of specified value. If None, the training process will use the mean + and variance of current batch data and track the running mean and variance, the evaluation process will use + the running mean and variance. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. + num_features (int): The number of features in the input space. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def scale_and_shift(self, + u_x: Tensor, + u_y: Tensor, + scale_x: Tensor, + scale_y: Tensor, + shift_x: Tensor, + shift_y: Tensor) -> Tuple[Tensor, Tensor]: + r""" + Applies double scaling and shift to an input tensor in diagonal representation. + + This function implements the operation as: + + .. math:: + \begin{align} + \text{Re(out)} = \text{X(inp)} * \text{Y(scale)} + \text{X(shift)}\\ + \text{Db(out)} = \text{X(inp)} * \text{Y(scale)} + \text{Y(inp)}, + \end{align} + + where :math:`inp` is the double input tensors in diagonal form, :math:`scale` and :math:`shift` are + double parameters representing the scaling and shift coefficients respectively. :math:`\text{X(...)}` + and :math:`\text{Y(...)}` are respectively the first and the second components of the double-valued + expression inside the parentheses. + + Args: + u_x (Tensor): A tensor of shape (C,), which represents the first part of the normalized inputs. + u_y (Tensor): A tensor of shape (C,), which represents the second part of the normalized inputs. + scale_x (Tensor): A tensor of shape (C,), which represents the first part of the scaling vector. + scale_y (Tensor): A tensor of shape (C,), which represents the second part of the scaling vector. + shift_x (Tensor): A tensor of shape (C,), which represents the first part of the bias vector. + shift_y (Tensor): A tensor of shape (C,), which represents the second part of the bias vector. + + Returns: + Tuple of two tensors of shape (C,), which contains the first and the second parts of rescaled and + recentered inputs in the diagonal representation. + """ + out_x = u_x * scale_x + shift_x + out_y = u_y * scale_y + shift_y + return out_x, out_y + + def get_norm(self, u: Tensor) -> Tensor: + r""" + Calculates norm of double elements of an input tensor in diagonal representation. + + Norm is a non-negative real number that is a characteristic of 'magnitude' of that number, i.e. how far away it + is from zero. The function implements the operation as: + + .. math:: + \text{out} = \text{max}(|X(inp)|, |Y(inp)|), + + where :math:`inp` is the double input tensors in diagonal form, :math:`\text{max}` is the maximum value of its + arguments. :math:`\text{X(...)}` and :math:`\text{Y(...)}` are respectively the first and the second components + of the double-valued expression inside the parentheses. + + Args: + u (Tensor): Tensor of shape (2, *, ..., *). '2' denotes that the input tensor belongs to the double domain + and has two components. + + Returns: + Tensor of shape (*, ..., *). The count and size of dimensions of the output tensor are the same ones as in + the input tensor, but without the very first dimension because the output tensor is real-valued. + """ + u_1, u_2 = get_u1_and_u2(u) + abs_op = ops.Abs() + max_op = ops.Maximum() + out = max_op(abs_op(u_1), abs_op(u_2)) + return out + + def get_square_norm(self, u: Tensor) -> Tensor: + r""" + Calculates element-wise squared norm of double elements of an input tensor in diagonal representation. + + Norm is a non-negative real number that is a characteristic of 'magnitude' of that number, i.e. how far away it + is from zero. The function implements the operation as: + + .. math:: + \text{out} = \left(\text{max}(|X(inp)|, |Y(inp)|)\right)^2, + + where :math:`inp` is the double input tensors in diagonal form, :math:`\text{max}` is the maximum value of its + arguments. :math:`\text{X(...)}` and :math:`\text{Y(...)}` are respectively the first and the second components + of the double-valued expression inside the parentheses. + + Args: + u (Tensor): Tensor of shape (2, *, ..., *). '2' denotes that the input tensor belongs to the double domain + and has two components. + + Returns: + Tensor of shape (*, ..., *). The count and size of dimensions of the output tensor are the same ones as in + the input tensor, but without the very first dimension because the output tensor is real-valued. + """ + norm = self.get_norm(u) + out = norm ** 2 + return out diff --git a/mindspore/python/mindspore/hypercomplex/double/_double_conv_impl.py b/mindspore/python/mindspore/hypercomplex/double/_double_conv_impl.py new file mode 100644 index 00000000000..691c9ecdaec --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/double/_double_conv_impl.py @@ -0,0 +1,126 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Double convolution implementation""" +from typing import Callable, Tuple + +from mindspore.common.tensor import Tensor +from mindspore.hypercomplex.hypercomplex._hc_conv_impl import _BaseConvImpl as BaseConvImpl + + +class _ConvImpl(BaseConvImpl): + r""" + The implementor class of the convolution layer for double numbers in regular representation. + + Applies double-valued convolution transformation. This layer implements the operation as: + + .. math:: + \begin{align} + \text{Re(ccor)} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Db(kernel)}, \text{Db(inp)})\\ + \text{Du(ccor)} = \text{ccor}(\text{Db(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Re(kernel)}, \text{Db(inp)}), + \end{align} + + where and :math:`ccor` is the real-valued `cross-correlation `_, + :math:`inp` is the double input tensors, :math:`\text{kernel}` is a double weight matrix with the same + data type as the :math:`inp` created by the layer. :math:`\text{Re(...)}` and :math:`\text{Db(...)}` + are respectively the first and the second parts of the double-valued expression inside the parentheses in the + regular form. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and double parts of the kernel. + + Inputs: + - **conv_op** (Callable) - the function of the real-valued convolution to be used for decomposition + of the double convolution transformation. For example, mindspore.ops.operations.Conv2D(...) may be passed + - **real** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the first part of the input. + - **double** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the second part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(N, C_{out}, *, ..., *)` or :math:`(N, *, ..., *, C_{out})`, + which represents both the first and the second parts of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def construct(self, + conv_op: Callable, + real: Tensor, + double: Tensor) -> Tuple[Tensor, Tensor]: + + u1 = real + double + u2 = real - double + + out1 = conv_op(u1, self.weight_x) + out2 = conv_op(u2, self.weight_y) + + out_r = out1 + out2 + out_d = out1 - out2 + return out_r, out_d + + +class _J1J2ConvImpl(BaseConvImpl): + r""" + The implementor class of the convolution layer for double numbers in diagonal representation. + + Applies double-valued convolution transformation. This layer implements the operation as: + + .. math:: + \begin{align} + \text{Re(ccor)} = \text{ccor}(\text{X(kernel)}, \text{X(inp)}) + \text{X(bias)}\\ + \text{Du(ccor)} = \text{ccor}(\text{Y(kernel)}, \text{Y(inp)}) + \text{Y(bias)}, + \end{align} + + where and :math:`ccor` is the real-valued `cross-correlation `_, + :math:`inp` is the double input tensors, :math:`\text{kernel}` is a double weight matrix with the same + data type as the :math:`inp` created by the layer. :math:`\text{X(...)}` and :math:`\text{Y(...)}` + are respectively the first and the second parts of the double-valued expression inside the parentheses in the + diagonal form. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and double parts of the kernel. + + Inputs: + - **conv_op** (Callable) - the function of the real-valued convolution to be used for decomposition + of the double convolution transformation. For example, mindspore.ops.operations.Conv2D(...) may be passed + - **u1** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the first part of the input. + - **u2** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the second part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(N, C_{out}, *, ..., *)` or :math:`(N, *, ..., *, C_{out})`, + which represents both the first and the second parts of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def construct(self, + conv_op: Callable, + u1: Tensor, + u2: Tensor) -> Tuple[Tensor, Tensor]: + + out1 = conv_op(u1, self.weight_x) + out2 = conv_op(u2, self.weight_y) + + return out1, out2 diff --git a/mindspore/python/mindspore/hypercomplex/double/_double_dense_impl.py b/mindspore/python/mindspore/hypercomplex/double/_double_dense_impl.py new file mode 100644 index 00000000000..fb15e05c594 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/double/_double_dense_impl.py @@ -0,0 +1,123 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Double dense implementation""" +from typing import Callable, Tuple + +from mindspore.common.tensor import Tensor +from mindspore.hypercomplex.hypercomplex._hc_dense_impl import _BaseDenseImpl as BaseDenseImpl + + +class _DenseImpl(BaseDenseImpl): + r""" + The implementor class of the dense connected layer for double numbers in normal representation. + + Applies double-valued matrix multiplication for dense connected layer. This layer implements the operation as: + + .. math:: + \begin{align} + \text{X(out)} = \text{X(inp)} * \text{X(kernel)} + \text{Y(inp)} * \text{Y(kernel)}\\ + \text{Y(out)} = \text{X(inp)} * \text{Y(kernel)} + \text{Y(inp)} * \text{X(kernel)}, + \end{align} + + where :math:`inp` is the double input tensors, :math:`\text{kernel}` is a double weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a double bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{X(...)}` and + :math:`\text{Y(...)}` are respectively the first and the second parts of the double-valued expression + inside the parentheses. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and imaginary parts of the kernel. + **factory_kwargs (dict): Extra parameters which may be needed by specific subclasses. + + Inputs: + - **matmul_op** (Callable) - the function of the real-valued matrix multiplication to be used for decomposition + of the complex linear transformation. Usually, mindspore.ops.operations.MatMul(...) is passed + - **real** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the real part of the input. + - **double** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the imaginary part of the + input. + + Outputs: + Tuple of two tensors, each of shape :math:`(*, out\_channels)`, which represents the real and the imaginary + parts of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def construct(self, + matmul_op: Callable, + real: Tensor, + double: Tensor) -> Tuple[Tensor, Tensor]: + u1 = real + double + u2 = real - double + + out1 = matmul_op(u1, self.weight_x) + out2 = matmul_op(u2, self.weight_y) + + out_r = out1 + out2 + out_d = out1 - out2 + + return out_r, out_d + + +class _J1J2DenseImpl(BaseDenseImpl): + r""" + The implementor class of the dense connected layer for double numbers in the diagonal representation. + + Applies double-valued matrix multiplication for dense connected layer. This layer implements the operation as: + + .. math:: + \begin{align} + \text{X(out)} = \text{X(inp)} * \text{X(kernel)}\\ + \text{Y(out)} = \text{Y(inp)} * \text{Y(kernel)}, + \end{align} + + where :math:`inp` is the double input tensors in the diagonal form, :math:`\text{kernel}` is a double weight matrix + in the diagonal form with the same data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is + a double bias vector in the diagonal form with the same data type as the :math:`inp` created by the layer + (only if has_bias is True). :math:`\text{X(...)}` and :math:`\text{Y(...)}` are respectively the first and the + second parts of the double-valued expression inside the parentheses. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and imaginary parts of the kernel. + **factory_kwargs (dict): Extra parameters which may be needed by specific subclasses. + + Inputs: + - **matmul_op** (Callable) - the function of the real-valued matrix multiplication to be used for decomposition + of the complex linear transformation. Usually, mindspore.ops.operations.MatMul(...) is passed + - **u1** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the real part of the input. + - **u2** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the imaginary part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(*, out\_channels)`, which represents the real and the imaginary + parts of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def construct(self, + matmul_op: Callable, + u1: Tensor, + u2: Tensor) -> Tuple[Tensor, Tensor]: + + out1 = matmul_op(u1, self.weight_x) + out2 = matmul_op(u2, self.weight_y) + + return out1, out2 diff --git a/mindspore/python/mindspore/hypercomplex/double/double_operators.py b/mindspore/python/mindspore/hypercomplex/double/double_operators.py new file mode 100644 index 00000000000..b29ac809ca1 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/double/double_operators.py @@ -0,0 +1,1331 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Double operators""" +import numbers +from typing import Union + +from mindspore import nn +from mindspore.common.initializer import Initializer +from mindspore.common.tensor import Tensor +# Batch Normalization +from mindspore.hypercomplex.hypercomplex.hc_bn import BatchNorm1d as HBatchNorm1d, \ + BatchNorm2d as HBatchNorm2d, BatchNorm3d as HBatchNorm3d +from mindspore.hypercomplex.double._double_bn_impl import _BatchNormImpl as BatchNormImpl, \ + _J1J2BatchNormImpl as J1J2BatchNormImpl +# Convolution +from mindspore.hypercomplex.hypercomplex.hc_conv import Conv1d as HConv1d, \ + Conv2d as HConv2d, Conv3d as HConv3d +from mindspore.hypercomplex.double._double_conv_impl import _ConvImpl as ConvImpl, \ + _J1J2ConvImpl as J1J2ConvImpl +# Dense +from mindspore.hypercomplex.hypercomplex.hc_dense import Dense as HDense +from mindspore.hypercomplex.double._double_dense_impl import _DenseImpl as DenseImpl, \ + _J1J2DenseImpl as J1J2DenseImpl +from mindspore.hypercomplex.hypercomplex.uniform_operator import _UniformOperator +# ReLU +from mindspore.hypercomplex.double.double_relu import J1J2ReLU + +from mindspore.hypercomplex.utils import _size_1_t, _size_2_t, _size_3_t + + +class Conv2d(_UniformOperator): + r""" + 2D convolution layer on the double-valued input. + + Calculates the 2D convolution on the input tensor which is typically of shape + :math:`(2, N, C_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C_{in}` is a number of channels, + :math:`H_{in}, W_{in}` are the height and width of the feature layer respectively. The formula is defined as: + + .. math:: + + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \text{hccor}({\text{weight}(C_{\text{out}_j}, k), \text{inp}(N_i, k)}) + + where :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of + the output and :math:`j` is in the range of :math:`[0, C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)` + is a convolution kernel slice with shape :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`, + where :math:`\text{kernel_size[0]}` and :math:`\text{kernel_size[1]}` are the height and width of the convolution + kernel respectively. :math:`\text{bias}` is the bias parameter and :math:`\text{inp}` is the input tensor. + In this case, `data_format` of the input tensor is 'NCHW' and the shape of full convolution kernel is + :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`, + where `group` is the number of groups to split the input `inp` in the channel dDuension. If `data_format` of the + input tensor is 'NHWC', the shape of full convolution kernel will be + :math:`(C_{out}, \text{kernel_size[0]}, \text{kernel_size[1]}), C_{in} / \text{group}`. + :math:`hccor` is the double-valued `cross-correlation `_. + If has_diagonal_form is False, this implies the operation as: + + .. math:: + \begin{align} + \text{Re(ccor)} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Db(kernel)}, \text{Db(inp)}) + \text{Re(bias)}\\ + \text{Du(ccor)} = \text{ccor}(\text{Db(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Re(kernel)}, \text{Db(inp)}) + \text{Db(bias)}, + \end{align} + + And, if has_diagonal_form is True then: + + .. math:: + \begin{align} + \text{Re(ccor)} = \text{ccor}(\text{X(kernel)}, \text{X(inp)}) + \text{X(bias)}\\ + \text{Du(ccor)} = \text{ccor}(\text{Y(kernel)}, \text{Y(inp)}) + \text{Y(bias)}, + \end{align} + + where and :math:`ccor` is the real-valued `cross-correlation `_, + :math:`inp` is the double input tensors, :math:`\text{kernel}` is a double weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a double bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{Re(...)}` and + :math:`\text{Db(...)}` are respectively the first and the second parts of the double-valued expression inside + the parentheses in the regular form, and :math:`\text{X(...)}` and :math:`\text{Y(...)}` stand for the same + functors, but with the parameters in the diagonal form. + + The convolution layer is notably more effective when double numbers are given in the diagonal form, because it then + requires only two convolutions instead of four. For some other kinds of layers (e.g. activation functions) it can + be otherwise. It is always possible to transfer to and from the diagonal representation: + + .. math:: + \begin{align} + \text{X(inp)} = \text{Re(inp)} + \text{Db(inp)}\\ + \text{Y(inp)} = \text{Re(inp)} - \text{Db(inp)}\\ + \text{Re(inp)} = 0.5 * (\text{X(inp)} + \text{Y(inp)})\\ + \text{Db(inp)} = 0.5 * (\text{X(inp)} - \text{Y(inp)}) + \end{align} + + However, every conversion consumes time resources, so it is a tradeoff which representation to choose. + For more details, please refers to the paper `Gradient Based Learning Applied to Document + Recognition `_. + + Note: + On Ascend platform, only group convolution in depthwise convolution scenarios is supported. + That is, when `group > 1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. + + 'NCHW' format is supported only with GPU target device as of now. + + Args: + in_channels (int): The channel number of the input tensor of the Conv2d layer. + out_channels (int): The channel number of the output tensor of the Conv2d layer. + kernel_size (Union[int, tuple[int]]): Specifies the height and width of the 2D convolution kernel. + The data type is an integer or a tuple of two integers. An integer represents the height + and width of the convolution kernel. A tuple of two integers represents the height + and width of the convolution kernel respectively. + stride (Union[int, tuple[int]]): The movement stride of the 2D convolution kernel. + The data type is an integer or a tuple of two integers. An integer represents the movement step size + in both height and width directions. A tuple of two integers represents the movement step size in the + height and width directions respectively. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (Union[int, tuple[int]]): The number of padding on the height and width directions of the input. + The data type is an integer or a tuple of four integers. If `padding` is an integer, + then the top, bottom, left, and right padding are all equal to `padding`. + If `padding` is a tuple of 4 integers, then the top, bottom, left, and right padding + is equal to `padding[0]`, `padding[1]`, `padding[2]`, and `padding[3]` respectively. + The value should be greater than or equal to 0. Default: 0. + dilation (Union[int, tuple[int]]): Dilation size of 2D convolution kernel. + The data type is an integer or a tuple of two integers. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` on the height and width directions is in range of [1, H] + and [1, W] respectively. Default: 1. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. If the group is equal to `in_channels` and `out_channels`, + this 2D convolution layer also can be called 2D depthwise convolution layer. Default: 1. + has_bias (bool): Whether the Conv2d layer has a bias parameter. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. + Default: 'NCHW'. + has_diagonal_form (bool): Specifies whether the input tensor is provided in the diagonal form. Default: False. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, H_{in}, W_{in})` \ + or :math:`(2, N, H_{in}, W_{in}, C_{in})`. + + Outputs: + Tensor of shape :math:`(2, N, C_{out}, H_{out}, W_{out})` or :math:`(2, N, H_{out}, W_{out}, C_{out})`. + + pad_mode is 'same': + + .. math:: + \begin{array}{ll} \\ + H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[0]}}} \right \rceil \\ + W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[1]}}} \right \rceil \\ + \end{array} + + pad_mode is 'valid': + + .. math:: + \begin{array}{ll} \\ + H_{out} = \left \lceil{\frac{H_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) } + {\text{stride[0]}}} \right \rceil \\ + W_{out} = \left \lceil{\frac{W_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) } + {\text{stride[1]}}} \right \rceil \\ + \end{array} + + pad_mode is 'pad': + + .. math:: + \begin{array}{ll} \\ + H_{out} = \left \lfloor{\frac{H_{in} + padding[0] + padding[1] - (\text{kernel_size[0]} - 1) \times + \text{dilation[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} + padding[2] + padding[3] - (\text{kernel_size[1]} - 1) \times + \text{dilation[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\ + \end{array} + + Raises: + TypeError: If `in_channels`, `out_channels` or `group` is not an int. + TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int not a tuple. + ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + ValueError: If `padding` is a tuple whose length is not equal to 4. + ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0). + ValueError: If `data_format` is neither 'NCHW' not 'NHWC'. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.double import Conv2d + >>> from mindspore import Tensor + >>> w = Tensor(np.random.random((2, 128, 3, 7, 7)).astype(np.float32)) + >>> b = Tensor(np.random.random((2, 128)).astype(np.float32)) + >>> net = Conv2d( + >>> in_channels=3, out_channels=128, kernel_size=7, stride=2, padding=3, + >>> pad_mode='pad', weight_init=w, bias_init=b, has_bias=True + >>> ) + >>> inp = Tensor(np.random.random((2, 16, 3, 224, 224)).astype(np.float32)) + >>> out = net(inp) + >>> print(out.shape) + (2, 16, 128, 112, 112) + """ + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: _size_2_t, + stride: _size_2_t = 1, + pad_mode: str = 'same', + padding: _size_2_t = 0, + dilation: _size_2_t = 1, + group: int = 1, + has_bias: bool = False, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + data_format: str = 'NCHW', + has_diagonal_form: bool = False) -> None: + if has_diagonal_form: + super(Conv2d, self).__init__(HConv2d, + J1J2ConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init, + data_format=data_format) + else: + super(Conv2d, self).__init__(HConv2d, + ConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init, + data_format=data_format) + + +class Conv1d(_UniformOperator): + r""" + 1D convolution layer on the double-valued input. + + Calculates the 1D convolution on the input tensor which is typically of shape :math:`(2, N, C_{in}, L_{in})`, + where :math:`N` is batch size, :math:`C_{in}` is a number of channels and :math:`L_{in}` is a length of sequence. + The formula is defined as: + + .. math:: + + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{inp}(N_i, k)}) + + where :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of + the output and :math:`j` is in the range of :math:`[0, C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)` + is a convolution kernel slice with shape :math:`\text{kernel_size}`, where :math:`\text{kernel_size}` + is the width of the convolution kernel. :math:`\text{bias}` is the bias parameter, + and :math:`\text{inp}` is the input tensor. The shape of full convolution kernel is + :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size})`, + where `group` is the number of groups to split the input `inp` in the channel dimension. + :math:`ccor` is the double-valued `cross-correlation `_. + If has_diagonal_form is False, this implies the operation as: + + .. math:: + \begin{align} + \text{Re(ccor)} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Db(kernel)}, \text{Db(inp)}) + \text{Re(bias)}\\ + \text{Du(ccor)} = \text{ccor}(\text{Db(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Re(kernel)}, \text{Db(inp)}) + \text{Db(bias)}, + \end{align} + + And, if has_diagonal_form is True then: + + .. math:: + \begin{align} + \text{Re(ccor)} = \text{ccor}(\text{X(kernel)}, \text{X(inp)}) + \text{X(bias)}\\ + \text{Du(ccor)} = \text{ccor}(\text{Y(kernel)}, \text{Y(inp)}) + \text{Y(bias)}, + \end{align} + + where and :math:`ccor` is the real-valued `cross-correlation `_, + :math:`inp` is the double input tensors, :math:`\text{kernel}` is a double weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a double bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{Re(...)}` and + :math:`\text{Db(...)}` are respectively the first and the second parts of the double-valued expression inside the + parentheses in the regular form, and :math:`\text{X(...)}` and :math:`\text{Y(...)}` stand for the same functors, + but with the parameters in the diagonal form. + + The convolution layer is notably more effective when double numbers are given in the diagonal form, because it then + requires only two convolutions instead of four. For some other kinds of layers (e.g. activation functions) it can + be otherwise. It is always possible to transfer to and from the diagonal representation: + + .. math:: + \begin{align} + \text{X(inp)} = \text{Re(inp)} + \text{Db(inp)}\\ + \text{Y(inp)} = \text{Re(inp)} - \text{Db(inp)}\\ + \text{Re(inp)} = 0.5 * (\text{X(inp)} + \text{Y(inp)})\\ + \text{Db(inp)} = 0.5 * (\text{X(inp)} - \text{Y(inp)}) + \end{align} + + However, every conversion consumes time resources, so it is a tradeoff which representation to choose. + For more details, please refers to the paper `Gradient Based Learning Applied to Document + Recognition `_. + + Note: + On Ascend platform, only group convolution in depthwise convolution scenarios is supported. + That is, when `group > 1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. + + Args: + in_channels (int): The channel number of the input tensor of the Conv1d layer. + out_channels (int): The channel number of the output tensor of the Conv1d layer. + kernel_size (int): Specifies the width of the 1D convolution kernel. + stride (int): The movement stride of the 1D convolution kernel. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (int): The number of padding on both sides of input. + The value should be greater than or equal to 0. Default: 0. + dilation (int): Dilation size of 1D convolution kernel. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` is in range of [1, L]. Default: 1. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. Default: 1. + has_bias (bool): Whether the Conv1d layer has a bias parameter. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + has_diagonal_form (bool): Specifies whether the input tensor is provided in the diagonal form. Default: False. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, L_{in})`. + + Outputs: + Tensor of shape :math:`(2, N, C_{out}, L_{out})`. + + pad_mode is 'same': + + .. math:: + L_{out} = \left \lceil{\frac{L_{in}}{\text{stride}}} \right \rceil + + pad_mode is 'valid': + + .. math:: + L_{out} = \left \lceil{\frac{L_{in} - \text{dilation} \times (\text{kernel_size} - 1) } + {\text{stride}}} \right \rceil + + pad_mode is 'pad': + + .. math:: + L_{out} = \left \lfloor{\frac{L_{in} + 2 \times padding - (\text{kernel_size} - 1) \times + \text{dilation} - 1 }{\text{stride}} + 1} \right \rfloor + + Raises: + TypeError: If `in_channels`, `out_channels`, `kernel_size`, `stride`, `padding` or `dilation` is not an int. + ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.double import Conv1d + >>> from mindspore import Tensor + >>> w = Tensor(np.random.random((2, 16, 1, 6)).astype(np.float32)) + >>> b = Tensor(np.random.random((2, 16)).astype(np.float32)) + >>> net = Conv1d( + >>> in_channels=1, out_channels=16, kernel_size=6, stride=2, padding=2, + >>> pad_mode='pad', weight_init=w, bias_init=b, has_bias=True + >>> ) + >>> u = Tensor(np.random.random((2, 8, 1, 4096)).astype(np.float32)) + >>> out = net(u) + >>> print(out.shape) + (2, 8, 16, 2048) + """ + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: _size_1_t, + stride: _size_1_t = 1, + pad_mode: str = 'same', + padding: _size_1_t = 0, + dilation: _size_1_t = 1, + group: int = 1, + has_bias: bool = False, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + has_diagonal_form: bool = False) -> None: + if has_diagonal_form: + super(Conv1d, self).__init__(HConv1d, + J1J2ConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init) + else: + super(Conv1d, self).__init__(HConv1d, + ConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init) + + +class Conv3d(_UniformOperator): + r""" + 3D convolution layer on the dual-valued input. + + Calculates the 3D convolution on the input tensor which is typically of shape + :math:`(2, N, C_{in}, D_{in}, H_{in}, W_{in})`, + where :math:`N` is batch size, :math:`C_{in}` is a number of channels, + :math:`D_{in}, H_{in}, W_{in}` are the depth, height and width of the feature layer respectively. + The formula is defined as: + + .. math:: + + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{inp}(N_i, k)}) + + where :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of + the output and :math:`j` is in the range of :math:`[0,C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)` + is a convolution kernel slice with shape + :math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`, + where :math:`\text{kernel_size[0]}`, :math:`\text{kernel_size[1]}` and :math:`\text{kernel_size[2]}` are + the depth, height and width of the convolution kernel respectively. :math:`\text{bias}` is the bias parameter + and :math:`\text{inp}` is the input tensor. + The shape of full convolution kernel is + :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`, + where `group` is the number of groups to split the input `x` in the channel dimension. + :math:`hccor` is the dual-valued `cross-correlation `_. + If has_diagonal_form is False, this implies the operation as: + + .. math:: + \begin{align} + \text{Re(ccor)} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Db(kernel)}, \text{Db(inp)}) + \text{Re(bias)}\\ + \text{Du(ccor)} = \text{ccor}(\text{Db(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Re(kernel)}, \text{Db(inp)}) + \text{Db(bias)}, + \end{align} + + And, if has_diagonal_form is True then: + + .. math:: + \begin{align} + \text{Re(ccor)} = \text{ccor}(\text{X(kernel)}, \text{X(inp)}) + \text{X(bias)}\\ + \text{Du(ccor)} = \text{ccor}(\text{Y(kernel)}, \text{Y(inp)}) + \text{Y(bias)}, + \end{align} + + where and :math:`ccor` is the real-valued `cross-correlation `_, + :math:`inp` is the double input tensors, :math:`\text{kernel}` is a double weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a double bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{Re(...)}` and + :math:`\text{Db(...)}` are respectively the first and the second parts of the double-valued expression inside the + parentheses in the regular form, and :math:`\text{X(...)}` and :math:`\text{Y(...)}` stand for the same functors, + but with the parameters in the diagonal form. + + The convolution layer is notably more effective when double numbers are given in the diagonal form, because it then + requires only two convolutions instead of four. For some other kinds of layers (e.g. activation functions) it can + be otherwise. It is always possible to transfer to and from the diagonal representation: + + .. math:: + \begin{align} + \text{X(inp)} = \text{Re(inp)} + \text{Db(inp)}\\ + \text{Y(inp)} = \text{Re(inp)} - \text{Db(inp)}\\ + \text{Re(inp)} = 0.5 * (\text{X(inp)} + \text{Y(inp)})\\ + \text{Db(inp)} = 0.5 * (\text{X(inp)} - \text{Y(inp)}) + \end{align} + + However, every conversion consumes time resources, so it is a tradeoff which representation to choose. + For more details, please refers to the paper `Gradient Based Learning Applied to Document + Recognition `_. + + Note: + On Ascend platform, only group convolution in depthwise convolution scenarios is supported. + That is, when `group > 1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. + + Args: + in_channels (int): The channel number of the input tensor of the Conv3d layer. + out_channels (int): The channel number of the output tensor of the Conv3d layer. + kernel_size (Union[int, tuple[int]]): Specifies the depth, height and width of the 3D convolution kernel. + The data type is an integer or a tuple of three integers. An integer represents the depth, height + and width of the convolution kernel. A tuple of three integers represents the depth, height + and width of the convolution kernel respectively. + stride (Union[int, tuple[int]]): The movement stride of the 3D convolution kernel. + The data type is an integer or a tuple of three integers. An integer represents the movement step size + in depth, height and width directions. A tuple of three integers represents the movement step size + in the depth, height and width directions respectively. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (Union(int, tuple[int])): The number of padding on the depth, height and width directions of the input. + The data type is an integer or a tuple of six integers. If `padding` is an integer, + then the head, tail, top, bottom, left, and right padding are all equal to `padding`. + If `padding` is a tuple of six integers, then the head, tail, top, bottom, left, and right padding + is equal to `padding[0]`, `padding[1]`, `padding[2]`, `padding[3]`, `padding[4]` and `padding[5]` + respectively. The value should be greater than or equal to 0. Default: 0. + dilation (Union[int, tuple[int]]): Dilation size of 3D convolution kernel. + The data type is an integer or a tuple of three integers. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` on the depth, height and width directions is in range of + [1, D], [1, H] and [1, W] respectively. Default: 1. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. Default: 1. Only 1 is currently supported. + has_bias (bool): Whether the Conv3d layer has a bias parameter. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + data_format (str): The optional value for data format. Currently only support "NCDHW". + has_diagonal_form (bool): Specifies whether the input tensor is provided in the diagonal form. Default: False. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, D_{in}, H_{in}, W_{in})`. + Currently input data type only support float16 and float32. + + Outputs: + Tensor of shape is :math:`(2, N, C_{out}, D_{out}, H_{out}, W_{out})`. + + pad_mode is 'same': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lceil{\frac{D_{in}}{\text{stride[0]}}} \right \rceil \\ + H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[1]}}} \right \rceil \\ + W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\ + \end{array} + + + pad_mode is 'valid': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lfloor{\frac{D_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) } + {\text{stride[0]}} + 1} \right \rfloor \\ + H_{out} = \left \lfloor{\frac{H_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) } + {\text{stride[1]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} - \text{dilation[2]} \times (\text{kernel_size[2]} - 1) } + {\text{stride[2]}} + 1} \right \rfloor \\ + \end{array} + + pad_mode is 'pad': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lfloor{\frac{D_{in} + padding[0] + padding[1] - (\text{dilation[0]} - 1) \times + \text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\ + H_{out} = \left \lfloor{\frac{H_{in} + padding[2] + padding[3] - (\text{dilation[1]} - 1) \times + \text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} + padding[4] + padding[5] - (\text{dilation[2]} - 1) \times + \text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\ + \end{array} + + Raises: + TypeError: If `in_channels`, `out_channels` or `group` is not an int. + TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int nor a tuple. + ValueError: If `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + ValueError: If `padding` is a tuple whose length is not equal to 6. + ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0, 0, 0). + ValueError: If `data_format` is not 'NCDHW'. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.dual import Conv3d + >>> from mindspore import Tensor + >>> w = Tensor(np.random.random((2, 128, 3, 3, 3, 3)).astype(np.float32)) + >>> b = Tensor(np.random.random((2, 128)).astype(np.float32)) + >>> net = Conv3d( + >>> in_channels=3, out_channels=128, kernel_size=3, stride=1, padding=1, + >>> pad_mode='pad', weight_init=w, bias_init=b, has_bias=True + >>> ) + >>> u = Tensor(np.random.random((2, 64, 3, 32, 32, 32)).astype(np.float32)) + >>> out = net(u) + >>> print(out.shape) + (2, 64, 128, 32, 32, 32) + """ + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: _size_3_t, + stride: _size_3_t = 1, + pad_mode: str = 'same', + padding: _size_3_t = 0, + dilation: _size_3_t = 1, + group: int = 1, + has_bias: bool = False, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + data_format: str = 'NCDHW', + has_diagonal_form: bool = False) -> None: + if has_diagonal_form: + super(Conv3d, self).__init__(HConv3d, + J1J2ConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init, + data_format=data_format) + else: + super(Conv3d, self).__init__(HConv3d, + ConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init, + data_format=data_format) + + +class BatchNorm1d(_UniformOperator): + r""" + The double-valued Batch Normalization layer over a second-order double input of four dimensions, including + one spatial dimension, or three dimensions. + + This layer applies Batch Normalization over a double input to reduce internal covariate shift. + Batch Normalization is widely used in convolutional networks. It rescales and recenters the feature using a + mini-batch of data and the learned parameters. If has_diagonal_form is False, this can be described by the + following formula: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + out = \frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}\\ + \text{Re(\hat{out})} = \text{Re(out)} * \text{Re(\gamma)} + + \text{Db(out)} * \text{Db(\gamma)} + \text{Re(\beta)}\\ + \text{Db(\hat{out})} = \text{Re(out)} * \text{Db(\gamma)} + + \text{Db(out)} * \text{Re(\gamma)} + \text{Db(\beta)}. + \end{align} + + And, if has_diagonal_form is True then: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + out = \frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}\\ + \text{Re(\hat{out})} = \text{X(out)} * \text{X(\gamma)} + \text{X(\beta)}\\ + \text{Db(\hat{out})} = \text{Y(out)} * \text{Y(\gamma)} + \text{Y(\beta)}. + \end{align} + + where :math:`inp` is the double input tensors, :math:`\mathrm{E}[inp]` is the arithmetic mean of the input tensor + over the batch dimension, :math:`\mathrm{Var}[inp]` is the statistical variance of the input tensor over spatial + dimensions, based on the double norm :math:`\|x+jy\|=|x|+|y|` for the regular representation and + :math:`\|x+jy\|=\text{max}(|x|,|y|)` in the diagonal representation. :math:`\gamma` and :math:`\beta` are double + learnable parameters representing the scale and shift coefficients respectively, and :math:`\delta` is a small + positive constant, which is needed to avoid division by zero in case statistical variance is close to zero. + :math:`\text{Re(...)}` and :math:`\text{Db(...)}` are respectively the first and the second parts of the + double-valued expression inside the parentheses in the regular form, and :math:`\text{X(...)}` and + :math:`\text{Y(...)}` stand for the same functors, but with the parameters in the diagonal form. + + The Batch Normalization layer is more effective when double numbers are given in the diagonal form, because of + effective multiplications. For some other kinds of layers (e.g. activation functions) it can be otherwise. It is + always possible to transfer to and from the diagonal representation: + + .. math:: + \begin{align} + \text{X(inp)} = \text{Re(inp)} + \text{Db(inp)}\\ + \text{Y(inp)} = \text{Re(inp)} - \text{Db(inp)}\\ + \text{Re(inp)} = 0.5 * (\text{X(inp)} + \text{Y(inp)})\\ + \text{Db(inp)} = 0.5 * (\text{X(inp)} - \text{Y(inp)}) + \end{align} + + However, every conversion consumes time resources, so it is a tradeoff which representation to choose. + + Args: + num_features (int): The number of features in the input space. + eps (float): A small positive threshold, which is needed to avoid division by zero. Default: :math:`10^{-5}` + momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. + Default: 0.9. + affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + use_batch_statistics (bool): + + - If True, use the mean value and variance value of current batch data and track running mean + and running variance. + - If False, use the mean value and variance value of specified value, and not track statistical value. + - If None, the use_batch_statistics is automatically set to True or False according to the training + and evaluation mode. During training, the parameter is set to True, and during evaluation, the + parameter is set to False. Default: None. + has_diagonal_form (bool): Specifies whether the input tensor is provided in the diagonal form. Default: False. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, W)` or :math:`(2, N, C)`. + '2' denotes that the input tensor belongs to the double domain and has got a real and + a double parts. The `num_features` in `Args` has to be equal to :math:`C` in `Inputs`. + + Outputs: + Tensor, the normalized, scaled, offset tensor of the same shape as :math:`u`: + :math:`(2, N, C, W)` or :math:`(2, C, W)`. + + Raises: + TypeError: If `num_features` is not an int. + TypeError: If `eps` is not a float. + ValueError: If `num_features` is less than 1. + ValueError: If `momentum` is not in range [0, 1]. + ValueError: if 'inp' is not a Tensor of 3 or 4 dimensions. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.double import BatchNorm1d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32)).astype(np.float32)) + >>> bn = BatchNorm1d(64) + >>> y = bn(u) + >>> print(y.shape) + (2, 8, 64, 32) + """ + + def __init__(self, + num_features: int, + eps: float = 1e-5, + momentum: float = 0.9, + affine: bool = True, + gamma_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + beta_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_mean_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_var_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + use_batch_statistics: bool = True, + has_diagonal_form: bool = False) -> None: + if has_diagonal_form: + super(BatchNorm1d, self).__init__(HBatchNorm1d, + J1J2BatchNormImpl, + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + gamma_init=gamma_init, + beta_init=beta_init, + moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, + use_batch_statistics=use_batch_statistics) + else: + super(BatchNorm1d, self).__init__(HBatchNorm1d, + BatchNormImpl, + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + gamma_init=gamma_init, + beta_init=beta_init, + moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, + use_batch_statistics=use_batch_statistics) + + +class BatchNorm2d(_UniformOperator): + r""" + The double-valued Batch Normalization layer over a second-order double input of five dimensions, including + two spatial dimensions. + + This layer applies Batch Normalization over a double input to reduce internal covariate shift. + Batch Normalization is widely used in convolutional networks. It rescales and recenters the feature using a + mini-batch of data and the learned parameters. If has_diagonal_form is False, this can be described by the + following formula: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + out = \frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}\\ + \text{Re(\hat{out})} = \text{Re(out)} * \text{Re(\gamma)} + + \text{Db(out)} * \text{Db(\gamma)} + \text{Re(\beta)}\\ + \text{Db(\hat{out})} = \text{Re(out)} * \text{Db(\gamma)} + + \text{Db(out)} * \text{Re(\gamma)} + \text{Db(\beta)}. + \end{align} + + And, if has_diagonal_form is True then: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + out = \frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}\\ + \text{Re(\hat{out})} = \text{X(out)} * \text{X(\gamma)} + \text{X(\beta)}\\ + \text{Db(\hat{out})} = \text{Y(out)} * \text{Y(\gamma)} + \text{Y(\beta)}. + \end{align} + + where :math:`inp` is the double input tensors, :math:`\mathrm{E}[inp]` is the arithmetic mean of the input tensor + over the batch dimension, :math:`\mathrm{Var}[inp]` is the statistical variance of the input tensor over spatial + dimensions, based on the double norm :math:`\|x+jy\|=|x|+|y|` for the regular representation and + :math:`\|x+jy\|=\text{max}(|x|,|y|)` in the diagonal representation. :math:`\gamma` and :math:`\beta` are double + learnable parameters representing the scale and shift coefficients respectively, and :math:`\delta` is a small + positive constant, which is needed to avoid division by zero in case statistical variance is close to zero. + :math:`\text{Re(...)}` and :math:`\text{Db(...)}` are respectively the first and the second parts of the + double-valued expression inside the parentheses in the regular form, and :math:`\text{X(...)}` and + :math:`\text{Y(...)}` stand for the same functors, but with the parameters in the diagonal form. + + The Batch Normalization layer is more effective when double numbers are given in the diagonal form, because of + effective multiplications. For some other kinds of layers (e.g. activation functions) it can be otherwise. It is + always possible to transfer to and from the diagonal representation: + + .. math:: + \begin{align} + \text{X(inp)} = \text{Re(inp)} + \text{Db(inp)}\\ + \text{Y(inp)} = \text{Re(inp)} - \text{Db(inp)}\\ + \text{Re(inp)} = 0.5 * (\text{X(inp)} + \text{Y(inp)})\\ + \text{Db(inp)} = 0.5 * (\text{X(inp)} - \text{Y(inp)}) + \end{align} + + However, every conversion consumes time resources, so it is a tradeoff which representation to choose. + + Args: + num_features (int): The number of features in the input space. + eps (float): A small positive threshold, which is needed to avoid division by zero. Default: :math:`10^{-5}` + momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. + Default: 0.9. + affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + use_batch_statistics (bool): + + - If True, use the mean value and variance value of current batch data and track running mean + and running variance. + - If False, use the mean value and variance value of specified value, and not track statistical value. + - If None, the use_batch_statistics is automatically set to True or False according to the training + and evaluation mode. During training, the parameter is set to True, and during evaluation, the + parameter is set to False. Default: None. + has_diagonal_form (bool): Specifies whether the input tensor is provided in the diagonal form. Default: False. + data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. Default: 'NCHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, H, W)` if data_format is 'NCHW', or + :math:`(2, N, H, W, C)` if data_format is 'NHWC'. '2' denotes that the input tensor belongs to the double + domain and has got a real and a double parts. The `num_features` in `Args` has to be equal to :math:`C` in + `Inputs`. + + Outputs: + Tensor, the normalized, scaled, offset tensor of the same shape as :math:`u`: + :math:`(2, N, C, H, W)` if data_format is 'NCHW', or :math:`(2, N, H, W, C)` if data_format is 'NHWC'. + + Raises: + TypeError: If `num_features` is not an int. + TypeError: If `eps` is not a float. + ValueError: If `num_features` is less than 1. + ValueError: If `momentum` is not in range [0, 1]. + ValueError: If `data_format` is neither 'NHWC' not 'NCHW'. + ValueError: if 'inp' is not a Tensor of 5 dimensions. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.double import BatchNorm2d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32, 32)).astype(np.float32)) + >>> bn = BatchNorm2d(64) + >>> y = bn(u) + >>> print(y.shape) + (2, 8, 64, 32, 32) + """ + + def __init__(self, + num_features: int, + eps: float = 1e-5, + momentum: float = 0.9, + affine: bool = True, + gamma_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + beta_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_mean_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_var_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + use_batch_statistics: bool = True, + has_diagonal_form: bool = False, + data_format='NCHW') -> None: + if has_diagonal_form: + super(BatchNorm2d, self).__init__(HBatchNorm2d, + J1J2BatchNormImpl, + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + gamma_init=gamma_init, + beta_init=beta_init, + moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, + use_batch_statistics=use_batch_statistics, + data_format=data_format) + else: + super(BatchNorm2d, self).__init__(HBatchNorm2d, + BatchNormImpl, + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + gamma_init=gamma_init, + beta_init=beta_init, + moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, + use_batch_statistics=use_batch_statistics, + data_format=data_format) + + +class BatchNorm3d(_UniformOperator): + r""" + The double-valued Batch Normalization layer over a second-order double input of six dimensions, including + three spatial dimensions. + + This layer applies Batch Normalization over a double input to reduce internal covariate shift. + Batch Normalization is widely used in convolutional networks. It rescales and recenters the feature using a + mini-batch of data and the learned parameters. If has_diagonal_form is False, this can be described by the + following formula: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + out = \frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}\\ + \text{Re(\hat{out})} = \text{Re(out)} * \text{Re(\gamma)} + + \text{Db(out)} * \text{Db(\gamma)} + \text{Re(\beta)}\\ + \text{Db(\hat{out})} = \text{Re(out)} * \text{Db(\gamma)} + + \text{Db(out)} * \text{Re(\gamma)} + \text{Db(\beta)}. + \end{align} + + And, if has_diagonal_form is True then: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + out = \frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}\\ + \text{Re(\hat{out})} = \text{X(out)} * \text{X(\gamma)} + \text{X(\beta)}\\ + \text{Db(\hat{out})} = \text{Y(out)} * \text{Y(\gamma)} + \text{Y(\beta)}. + \end{align} + + where :math:`inp` is the double input tensors, :math:`\mathrm{E}[inp]` is the arithmetic mean of the input tensor + over the batch dimension, :math:`\mathrm{Var}[inp]` is the statistical variance of the input tensor over spatial + dimensions, based on the double norm :math:`\|x+jy\|=|x|+|y|` for the regular representation and + :math:`\|x+jy\|=\text{max}(|x|,|y|)` in the diagonal representation. :math:`\gamma` and :math:`\beta` are double + learnable parameters representing the scale and shift coefficients respectively, and :math:`\delta` is a small + positive constant, which is needed to avoid division by zero in case statistical variance is close to zero. + :math:`\text{Re(...)}` and :math:`\text{Db(...)}` are respectively the first and the second parts of the + double-valued expression inside the parentheses in the regular form, and :math:`\text{X(...)}` and + :math:`\text{Y(...)}` stand for the same functors, but with the parameters in the diagonal form. + + The Batch Normalization layer is more effective when double numbers are given in the diagonal form, because of + effective multiplications. For some other kinds of layers (e.g. activation functions) it can be otherwise. It is + always possible to transfer to and from the diagonal representation: + + .. math:: + \begin{align} + \text{X(inp)} = \text{Re(inp)} + \text{Db(inp)}\\ + \text{Y(inp)} = \text{Re(inp)} - \text{Db(inp)}\\ + \text{Re(inp)} = 0.5 * (\text{X(inp)} + \text{Y(inp)})\\ + \text{Db(inp)} = 0.5 * (\text{X(inp)} - \text{Y(inp)}) + \end{align} + + However, every conversion consumes time resources, so it is a tradeoff which representation to choose. + + Args: + num_features (int): The number of features in the input space. + eps (float): A small positive threshold, which is needed to avoid division by zero. Default: :math:`10^{-5}` + momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. + Default: 0.9. + affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + use_batch_statistics (bool): + + - If True, use the mean value and variance value of current batch data and track running mean + and running variance. + - If False, use the mean value and variance value of specified value, and not track statistical value. + - If None, the use_batch_statistics is automatically set to True or False according to the training + and evaluation mode. During training, the parameter is set to True, and during evaluation, the + parameter is set to False. Default: None. + has_diagonal_form (bool): Specifies whether the input tensor is provided in the diagonal form. Default: False. + data_format (str): The optional value for data format. Only 'NCDHW' format is supported as of now. + Default: 'NCDHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, D, H, W)`. '2' denotes that the input tensor belongs + to the double domain and has got a real and a double parts. The `num_features` in `Args` has to be equal + to :math:`C` in `Inputs`. + + Outputs: + Tensor, the normalized, scaled, offset tensor of the same shape :math:`(2, N, C, D, H, W)` as :math:`u`. + + Raises: + TypeError: If `num_features` is not an int. + TypeError: If `eps` is not a float. + ValueError: If `num_features` is less than 1. + ValueError: If `momentum` is not in range [0, 1]. + ValueError: If `data_format` is not 'NCDHW'. + ValueError: if 'inp' is not a Tensor of 6 dimensions. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.double import BatchNorm3d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32, 32, 32)).astype(np.float32)) + >>> bn = BatchNorm3d(64) + >>> y = bn(u) + >>> print(y.shape) + (2, 8, 64, 32, 32, 32) + """ + + def __init__(self, + num_features: int, + eps: float = 1e-5, + momentum: float = 0.9, + affine: bool = True, + gamma_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + beta_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_mean_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_var_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + use_batch_statistics: bool = True, + has_diagonal_form: bool = False, + data_format='NCDHW') -> None: + if has_diagonal_form: + super(BatchNorm3d, self).__init__(HBatchNorm3d, + J1J2BatchNormImpl, + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + gamma_init=gamma_init, + beta_init=beta_init, + moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, + use_batch_statistics=use_batch_statistics, + data_format=data_format) + else: + super(BatchNorm3d, self).__init__(HBatchNorm3d, + BatchNormImpl, + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + gamma_init=gamma_init, + beta_init=beta_init, + moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, + use_batch_statistics=use_batch_statistics, + data_format=data_format) + + +class Dense(_UniformOperator): + r""" + The double-valued dense connected layer. + + Applies dense connected layer for the double-valued input. If has_diagonal_form is False, this layer implements + the operation as: + + .. math:: + \begin{align} + \text{Re(out)} = \text{Re(inp)} * \text{Re(kernel)} + \text{Db(inp)} * \text{Db(kernel)} + \text{Re(bias)}\\ + \text{Db(out)} = \text{Re(inp)} * \text{Db(kernel)} + \text{Db(inp)} * \text{Re(kernel)} + \text{Db(bias)}, + \end{align} + + And, if has_diagonal_form is True then: + + .. math:: + \begin{align} + \text{X(out)} = \text{X(inp)} * \text{X(kernel)} + \text{X(bias)}\\ + \text{Y(out)} = \text{Y(inp)} * \text{Y(kernel)} + \text{Y(bias)}, + \end{align} + + where :math:`inp` is the double input tensors, :math:`\text{kernel}` is a double weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a double bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{Re(...)}` and + :math:`\text{Db(...)}` are respectively the first and the second parts of the double-valued expression inside the + parentheses in the regular form, and :math:`\text{X(...)}` and :math:`\text{Y(...)}` stand for the same functors, + but with the parameters in the diagonal form. + + The dense layer is notably more effective when double numbers are given in the diagonal form, because it then + requires only two matrix multiplications instead of four. For some other kinds of layers (e.g. activation + functions) it can be otherwise. It is always possible to transfer to and from the diagonal representation: + + .. math:: + \begin{align} + \text{X(inp)} = \text{Re(inp)} + \text{Db(inp)}\\ + \text{Y(inp)} = \text{Re(inp)} - \text{Db(inp)}\\ + \text{Re(inp)} = 0.5 * (\text{X(inp)} + \text{Y(inp)})\\ + \text{Db(inp)} = 0.5 * (\text{X(inp)} - \text{Y(inp)}) + \end{align} + + However, every conversion consumes time resources, so it is a tradeoff which representation to choose. + + Args: + in_channels (int): The number of channels in the input space. + out_channels (int): The number of channels in the output space. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is + same as `inp`. The values of str refer to the function `initializer`. Default: 'zeros'. + has_bias (bool): Specifies whether the layer uses a bias vector. Default: True. + has_diagonal_form (bool): Specifies whether the input tensor is provided in the diagonal form. Default: False. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, *, ..., *, in\_channels)`. '2' denotes that the input tensor + belongs to the domain of double numbers and has got two components. The `in_channels` in `Args` + has to be equal to :math:`in\_channels` in `Inputs`. The count of mediator dimensions denoted by '*' is + arbitrary but must be at least one. + + Outputs: + Tensor of shape :math:`(2, *, ..., *, out\_channels)`. The count of mediator dimensions is the same as one + in 'Inputs'. + + Raises: + TypeError: If `in_channels` or `out_channels` is not an int. + TypeError: If `has_bias` is not a bool. + ValueError: If length of shape of `weight_init` is not equal to 3, + or shape[0] of 'weight_init' is not equal to 2, + or shape[1] of `weight_init` is not equal to `out_channels`, + or shape[2] of `weight_init` is not equal to `in_channels`. + ValueError: If length of shape of `bias_init` is not equal to 2, + or shape[0] of 'bias_init' is not equal to 2, + or shape[1] of `bias_init` is not equal to `out_channels`. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.double import Dense + >>> from mindspore import Tensor + >>> w = Tensor(np.random.random((2, 7, 5)).astype(np.float32)) + >>> b = Tensor(np.random.random((2, 7)).astype(np.float32)) + >>> net = Dense(in_channels=5, out_channels=7, weight_init=w, bias_init=b, has_bias=True) + >>> u = Tensor(np.random.random((2, 34, 1, 5)).astype(np.float32)) + >>> out = net(u) + >>> print(out.shape) + (2, 34, 1, 7) + """ + + def __init__(self, + in_channels: int, + out_channels: int, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + has_bias: bool = True, + has_diagonal_form: bool = False) -> None: + if has_diagonal_form: + super(Dense, self).__init__(HDense, + J1J2DenseImpl, + in_channels=in_channels, + out_channels=out_channels, + weight_init=weight_init, + bias_init=bias_init, + has_bias=has_bias) + else: + super(Dense, self).__init__(HDense, + DenseImpl, + in_channels=in_channels, + out_channels=out_channels, + weight_init=weight_init, + bias_init=bias_init, + has_bias=has_bias) + + +class ReLU(nn.Cell): + r""" + Rectified Linear Unit activation function for double numbers. + + Applies ReLU activation layer for the double-valued input. If has_diagonal_form is False, this layer implements + the operation as: + + .. math:: + \begin{align} + \text{Re(out)} = (Re(inp))^+ = \max(0, Re(inp))\\ + \text{Db(out)} = (Db(inp))^+ = \max(0, Db(inp)), + \end{align} + + It returns element-wise :math:`\max(0, x)` for both real and double parts of the input tensor independently. + Specially, the neurons with the negative output components will be suppressed and the active neurons will stay + the same. If has_diagonal_form is True then this layer first converts the input to the regular form: + + .. math:: + \begin{align} + \text{Re(inp)} = 0.5 * (\text{X(inp)} + \text{Y(inp)})\\ + \text{Db(inp)} = 0.5 * (\text{X(inp)} - \text{Y(inp)}), + \end{align} + + then applies element-wise ReLU as shown above, and transfers the result back to the diagonal representation: + + .. math:: + \begin{align} + \text{X(out)} = \text{Re(out)} + \text{Db(out)}\\ + \text{Y(out)} = \text{Re(out)} - \text{Db(out)} + \end{align} + + Args: + has_diagonal_form (bool): Specifies whether the input tensor is provided in the diagonal form. Default: False. + + Inputs: + - **inp** (Tensor) - The input of ReLU is a Tensor of shape (2, *, ..., *). The data type is + `number `_ . + + Outputs: + Tensor, with the same type and shape as the `inp`. + + Raises: + TypeError: If dtype of `inp` is not a number. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.sboule import ReLU + >>> from mindspore import Tensor + >>> net = ReLU() + >>> u = Tensor(np.array([[-1, 2, -3], [0, -2, 1]]).astype(np.float32)) + >>> out = net(u) + >>> print(out) + [[0. 2. 0] + [0. 0. 1.]] + """ + + def __init__(self, has_diagonal_form: bool = False) -> None: + super(ReLU, self).__init__() + if has_diagonal_form: + self.relu = J1J2ReLU() + else: + self.relu = nn.ReLU() + + def construct(self, u: Tensor) -> Tensor: + return self.relu(u) diff --git a/mindspore/python/mindspore/hypercomplex/double/double_relu.py b/mindspore/python/mindspore/hypercomplex/double/double_relu.py new file mode 100644 index 00000000000..a93ca8a847b --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/double/double_relu.py @@ -0,0 +1,77 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Double relu operators""" +from mindspore import nn, Tensor +from mindspore.ops import operations as P +from mindspore.hypercomplex.utils import get_x_and_y as get_u1_and_u2, \ + to_2channel as to_double + + +class J1J2ReLU(nn.Cell): + r""" + Rectified Linear Unit activation function for double-valued input in the diagonal representation. + + Applies ReLU activation layer for the double-valued input. This layer first converts the input to the regular form: + + .. math:: + \begin{align} + \text{Re(inp)} = 0.5 * (\text{X(inp)} + \text{Y(inp)})\\ + \text{Db(inp)} = 0.5 * (\text{X(inp)} - \text{Y(inp)}), + \end{align} + + then applies the element-wise :math:`\max(0, x)` for both real and double parts of the input tensor independently: + + .. math:: + \begin{align} + \text{Re(out)} = (Re(inp))^+ = \max(0, Re(inp))\\ + \text{Db(out)} = (Db(inp))^+ = \max(0, Db(inp)), + \end{align} + + and finally transfers the result back to the diagonal representation: + + .. math:: + \begin{align} + \text{X(out)} = \text{Re(out)} + \text{Db(out)}\\ + \text{Y(out)} = \text{Re(out)} - \text{Db(out)} + \end{align} + + Inputs: + - **inp** (Tensor) - The input of ReLU is a Tensor of shape (2, *, ..., *). The data type is + `number `_ . + + Outputs: + Tensor, with the same type and shape as the `inp`. + + Raises: + TypeError: If dtype of `inp` is not a number. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self): + """Initialize J1J2ReLU.""" + super(J1J2ReLU, self).__init__() + self.relu = P.ReLU() + + def construct(self, u: Tensor) -> Tensor: + u = u / 2 + u1, u2 = get_u1_and_u2(u) + x = self.relu(u1 + u2) + y = self.relu(u1 - u2) + out1 = x + y + out2 = x - y + out = to_double(out1, out2) + return out diff --git a/mindspore/python/mindspore/hypercomplex/dual/__init__.py b/mindspore/python/mindspore/hypercomplex/dual/__init__.py new file mode 100644 index 00000000000..3bfb1c3e4f8 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/dual/__init__.py @@ -0,0 +1,24 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Dual operators""" +from mindspore.nn import ReLU + +from mindspore.hypercomplex.dual.dual_operators import Conv1d, Conv2d, Conv3d +from mindspore.hypercomplex.dual.dual_operators import BatchNorm1d, BatchNorm2d, BatchNorm3d +from mindspore.hypercomplex.dual.dual_operators import Dense + +from mindspore.hypercomplex.hypercomplex.hc_pool import MaxPool1d, MaxPool2d, \ + AvgPool1d, AvgPool2d, AdaptiveAvgPool1d, AdaptiveAvgPool2d, \ + AdaptiveAvgPool3d, AdaptiveMaxPool1d, AdaptiveMaxPool2d diff --git a/mindspore/python/mindspore/hypercomplex/dual/_dual_bn_impl.py b/mindspore/python/mindspore/hypercomplex/dual/_dual_bn_impl.py new file mode 100644 index 00000000000..e4c4560b91d --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/dual/_dual_bn_impl.py @@ -0,0 +1,140 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Dual BatchNorm Implementation""" +from typing import Tuple + +import mindspore.ops as ops +from mindspore.common.tensor import Tensor +from mindspore.hypercomplex.hypercomplex._hc_bn_impl import _BaseBatchNormImpl as HCBatchNormImpl +from mindspore.hypercomplex.utils import get_x_and_y as get_real_and_dual + + +class _BatchNormImpl(HCBatchNormImpl): + r""" + The implementor class of the Batch Normalization layer for dual numbers. + + Implements the functionality specific to dual numbers and needed by the 'BatchNorm' class. This includes: + getting the norm of dual number, applying scaling and shift to a dual tensor, and updating the running + mean and variance, which are used during inference. + + Args: + affine (bool) - A bool value. When set to True, gamma and beta can be learned. + use_batch_statistics (bool): If true, use the mean value and variance value of current batch data. If false, + use the mean value and variance value of specified value. If None, the training process will use the mean + and variance of current batch data and track the running mean and variance, the evaluation process will use + the running mean and variance. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. + num_features (int): The number of features in the input space. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def scale_and_shift(self, + u_x: Tensor, + u_y: Tensor, + scale_x: Tensor, + scale_y: Tensor, + shift_x: Tensor, + shift_y: Tensor) -> Tuple[Tensor, Tensor]: + r""" + Applies dual scaling and shift to an input tensor. + + This function implements the operation as: + + .. math:: + \begin{align} + \text{Re(out)} = \text{Re(inp)} * \text{Re(scale)} + \text{Re(shift)}\\ + \text{Du(out)} = \text{Re(inp)} * \text{Du(scale)} + \text{Du(inp)} * \text{Re(scale)} + \text{Du(shift)}, + \end{align} + + where :math:`inp` is the dual input tensors, :math:`scale` and :math:`shift` are dual parameters representing + the scaling and shift coefficients respectively. :math:`\text{Re(...)}` and :math:`\text{Du(...)}` are + respectively real and dual parts of the dual-valued expression inside the parentheses. + + Args: + u_x (Tensor): A tensor of shape (C,), which represents the real part of the normalized inputs. + u_y (Tensor): A tensor of shape (C,), which represents the dual part of the normalized inputs. + scale_x (Tensor): A tensor of shape (C,), which represents the real part of the scaling vector. + scale_y (Tensor): A tensor of shape (C,), which represents the dual part of the scaling vector. + shift_x (Tensor): A tensor of shape (C,), which represents the real part of the bias vector. + shift_y (Tensor): A tensor of shape (C,), which represents the dual part of the bias vector. + + Returns: + Tuple of two tensors of shape (C,), which contains the real and the dual parts of rescaled and + recentered inputs. + """ + out_x = u_x * scale_x + shift_x + out_y = u_x * scale_y + u_y * scale_x + shift_y + return out_x, out_y + + def get_norm(self, u: Tensor) -> Tensor: + r""" + Calculates norm of dual elements of an input tensor. + + Norm is a non-negative real number that is a characteristic of 'magnitude' of that number, i.e. how far away it + is from zero. The function implements the operation as: + + .. math:: + \text{out} = \left|\frac{Du(inp)}{2}\right|+\sqrt{Re(inp)^2+\frac{Du(inp)^2}{4}+\delta}, + + where :math:`inp` is the dual input tensors and :math:`\delta` is a small positive constant, which is needed + to avoid division by zero in case statistical variance is close to zero. :math:`\text{Re(...)}` and + :math:`\text{Du(...)}` are respectively real and dual parts of the dual-valued expression inside + the parentheses. + + Args: + u (Tensor): Tensor of shape (2, *, ..., *). '2' denotes that the input tensor belongs to the dual domain + and has a real and a dual parts. + + Returns: + Tensor of shape (*, ..., *). The count and size of dimensions of the output tensor are the same ones as in + the input tensor, but without the very first dimension because the output tensor is real-valued. + """ + u_r, u_d = get_real_and_dual(u) + dual_half = u_d.abs() / 2 + eps = 1e-7 + sqrt = u_r ** 2 + dual_half ** 2 + eps + sqrt = ops.sqrt(sqrt) + out = dual_half + sqrt + return out + + def get_square_norm(self, u: Tensor) -> Tensor: + r""" + Calculates element-wise squared norm of dual elements of an input tensor. + + Norm is a non-negative real number that is a characteristic of 'magnitude' of that number, i.e. how far away it + is from zero. The function implements the operation as: + + .. math:: + \text{out} = \left(\left|\frac{Du(inp)}{2}\right|+\sqrt{Re(inp)^2+\frac{Du(inp)^2}{4}+\delta}\right)^2, + + where :math:`inp` is the dual input tensors, :math:`\text{Re(...)}` and :math:`\text{Du(...)}` + are respectively real and dual parts of the dual-valued expression inside the parentheses. + + Args: + u (Tensor): Tensor of shape (2, *, ..., *). '2' denotes that the input tensor belongs to the dual domain + and has a real and a dual parts. + + Returns: + Tensor of shape (*, ..., *). The count and size of dimensions of the output tensor are the same ones as in + the input tensor, but without the very first dimension because the output tensor is real-valued. + """ + norm = self.get_norm(u) + out = norm ** 2 + return out diff --git a/mindspore/python/mindspore/hypercomplex/dual/_dual_conv_impl.py b/mindspore/python/mindspore/hypercomplex/dual/_dual_conv_impl.py new file mode 100644 index 00000000000..f3653f60942 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/dual/_dual_conv_impl.py @@ -0,0 +1,138 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Dual Convolution Implementation""" +import numbers +from typing import Callable, Tuple, Union + +from mindspore.common.tensor import Tensor +from mindspore.common.initializer import Initializer +from mindspore.hypercomplex.hypercomplex._hc_conv_impl import _BaseConvImpl as BaseConvImpl +from mindspore import ops as P + + +class _ConvImpl(BaseConvImpl): + r""" + The implementor class of the convolution layer for dual numbers. + + Applies dual-valued convolution transformation. This layer implements the operation as: + + .. math:: + \begin{align} + \text{Re(ccor)} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)})\\ + \text{Du(ccor)} = \text{ccor}(\text{Du(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Re(kernel)}, \text{Du(inp)}), + \end{align} + + where and :math:`cccor` is the real-valued `cross-correlation `_, + :math:`inp` is the dual input tensors, :math:`\text{kernel}` is a dual weight matrix with the same + data type as the :math:`inp` created by the layer. :math:`\text{Re(...)}` and :math:`\text{Du(...)}` + are respectively real and dual parts of the dual-valued expression inside the parentheses. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and dual parts of the kernel. + + Inputs: + - **conv_op** (Callable) - the function of the real-valued convolution to be used for decomposition + of the dual convolution transformation. For example, mindspore.ops.operations.Conv2D(...) may be passed + - **real** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the real part of the input. + - **dual** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the dual part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(N, C_{out}, *, ..., *)` or :math:`(N, *, ..., *, C_{out})`, + which represents the real and the dual parts of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def construct(self, + conv_op: Callable, + real: Tensor, + dual: Tensor) -> Tuple[Tensor, Tensor]: + + out_r = conv_op(real, self.weight_x) + out_rd = conv_op(real, self.weight_y) + out_dr = conv_op(dual, self.weight_x) + + out_d = out_rd + out_dr + return out_r, out_d + + +class _ReDuConvImpl(BaseConvImpl): + r""" + The implementor class of the convolution layer for dual numbers. + + Applies dual-valued convolution transformation. This layer implements the operation as: + + .. math:: + \begin{align} + \text{inp_cat} = \text{cat}(\text{Re(inp)}, \text{Du(inp)}) \\ + \text{K} = \text{cat}(\text{Du(kernel)}, \text{Re(kernel)}) \\ + \text{Re(ccor)} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)})\\ + \text{Du(ccor)} = \text{ccor}(\text{K}, \text{Re(inp_cat)}) + \end{align} + + where and :math:`cccor` is the real-valued `cross-correlation `_, + :math:`inp` is the dual input tensors, :math:`\text{kernel}` is a dual weight matrix with the same + data type as the :math:`inp` created by the layer, :math:`\text{cat}` is concatenation along the channel axis. + :math:`\text{Re(...)}` and :math:`\text{Du(...)}` are respectively real and dual parts of the dual-valued expression + inside the parentheses. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and dual parts of the kernel. + + Inputs: + - **conv_op** (Callable) - the function of the real-valued convolution to be used for decomposition + of the dual convolution transformation. For example, mindspore.ops.operations.Conv2D(...) may be passed + - **real** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the real part of the input. + - **dual** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the dual part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(N, C_{out}, *, ..., *)` or :math:`(N, *, ..., *, C_{out})`, + which represents the real and the dual parts of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + weight_init: Union[Tensor, str, Initializer, numbers.Number], + weight_shape: tuple, + **factory_kwargs) -> None: + super(_ReDuConvImpl, self).__init__(weight_init, weight_shape, **factory_kwargs) + data_format = factory_kwargs.get('data_format', 'nchw') + c_idx = data_format.lower().find('c') + if c_idx < 0: + raise ValueError(f"Data format {data_format} is unsupported") + self.concat = P.Concat(c_idx) + + def construct(self, + conv_op: Callable, + real: Tensor, + imag: Tensor) -> Tuple[Tensor, Tensor]: + + out_r = conv_op(real, self.weight_x) + inp = self.concat([real, imag]) + w = self.concat([self.weight_y, self.weight_x]) + out_d = conv_op(inp, w) + return out_r, out_d diff --git a/mindspore/python/mindspore/hypercomplex/dual/_dual_dense_impl.py b/mindspore/python/mindspore/hypercomplex/dual/_dual_dense_impl.py new file mode 100644 index 00000000000..4de9cff50da --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/dual/_dual_dense_impl.py @@ -0,0 +1,69 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Dual Dense Implementation""" +from typing import Callable, Tuple + +from mindspore.common.tensor import Tensor +from mindspore.hypercomplex.hypercomplex._hc_dense_impl import _BaseDenseImpl as BaseDenseImpl + + +class _DenseImpl(BaseDenseImpl): + r""" + The implementor class of the dense connected layer for dual numbers. + + Applies dual-valued matrix multiplication for dense connected layer. This layer implements the operation as: + + .. math:: + \begin{align} + \text{Re(out)} = \text{Re(inp)} * \text{Re(kernel)}\\ + \text{Du(out)} = \text{Re(inp)} * \text{Du(kernel)} + \text{Du(inp)} * \text{Re(kernel)}, + \end{align} + + where :math:`inp` is the hypercomplex input tensors, :math:`\text{kernel}` is + a hypercomplex weight matrix with the same data type as the :math:`inp` created by the layer, + :math:`\text{Re(...)}` and :math:`\text{Du(...)}` are respectively real and dual parts of the dual-valued + expression inside the parentheses. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and dual parts of the kernel. + **factory_kwargs (dict): Extra parameters which may be needed by specific subclasses. + + Inputs: + - **matmul_op** (Callable) - the function of the real-valued matrix multiplication to be used for decomposition + of the dual linear transformation. Usually, mindspore.ops.operations.MatMul(...) is passed + - **real** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the real part of the input. + - **dual** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the dual part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(*, out\_channels)`, which represents the real and the dual + parts of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def construct(self, + matmul_op: Callable, + real: Tensor, + dual: Tensor) -> Tuple[Tensor, Tensor]: + + out_r = matmul_op(real, self.weight_x) + out_rd = matmul_op(real, self.weight_y) + out_dr = matmul_op(dual, self.weight_x) + + out_d = out_rd + out_dr + return out_r, out_d diff --git a/mindspore/python/mindspore/hypercomplex/dual/dual_operators.py b/mindspore/python/mindspore/hypercomplex/dual/dual_operators.py new file mode 100644 index 00000000000..bf5faeb9c7c --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/dual/dual_operators.py @@ -0,0 +1,958 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Dual Operators""" +import numbers +from typing import Union +from mindspore.common.initializer import Initializer +from mindspore.common.tensor import Tensor +# Batch Normalization +from mindspore.hypercomplex.hypercomplex.hc_bn import BatchNorm1d as HBatchNorm1d, \ + BatchNorm2d as HBatchNorm2d, BatchNorm3d as HBatchNorm3d +from mindspore.hypercomplex.dual._dual_bn_impl import _BatchNormImpl as BatchNormImpl +# Convolution +from mindspore.hypercomplex.hypercomplex.hc_conv import Conv1d as HConv1d, \ + Conv2d as HConv2d, Conv3d as HConv3d +from mindspore.hypercomplex.dual._dual_conv_impl import _ReDuConvImpl as ConvImpl +# Dense +from mindspore.hypercomplex.hypercomplex.hc_dense import Dense as HDense +from mindspore.hypercomplex.dual._dual_dense_impl import _DenseImpl as DenseImpl +from mindspore.hypercomplex.hypercomplex.uniform_operator import _UniformOperator + +from mindspore.hypercomplex.utils import _size_1_t, _size_2_t, _size_3_t + + +class Conv2d(_UniformOperator): + r""" + 2D convolution layer on the dual-valued input. + + Calculates the 2D convolution on the input tensor which is typically of shape + :math:`(2, N, C_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C_{in}` is a number of channels, + :math:`H_{in}, W_{in}` are the height and width of the feature layer respectively. The formula is defined as: + + .. math:: + + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \text{hccor}({\text{weight}(C_{\text{out}_j}, k), \text{inp}(N_i, k)}) + + where :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of + the output and :math:`j` is in the range of :math:`[0, C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)` + is a convolution kernel slice with shape :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`, + where :math:`\text{kernel_size[0]}` and :math:`\text{kernel_size[1]}` are the height and width of the convolution + kernel respectively. :math:`\text{bias}` is the bias parameter and :math:`\text{inp}` is the input tensor. + In this case, `data_format` of the input tensor is 'NCHW' and the shape of full convolution kernel is + :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`, + where `group` is the number of groups to split the input `inp` in the channel dDuension. If `data_format` of the + input tensor is 'NHWC', the shape of full convolution kernel will be + :math:`(C_{out}, \text{kernel_size[0]}, \text{kernel_size[1]}), C_{in} / \text{group}`. + :math:`hccor` is the dual-valued `cross-correlation `_. + This implies the operation as: + + .. math:: + \text{Re(ccor)} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)}) + \text{Re(bias)}\\ + \text{Du(ccor)} = \text{ccor}(\text{Du(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Re(kernel)}, \text{Du(inp)}) + \text{Du(bias)} + + where and :math:`cccor` is the real-valued `cross-correlation `_, + :math:`inp` is the dual input tensors, :math:`\text{kernel}` is a dual weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a dual bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{Re(...)}` + and :math:`\text{Du(...)}` are respectively real and dual parts of the dual-valued expression inside + the parentheses. + + For more details, please refers to the paper `Gradient Based Learning Applied to Document + Recognition `_. + + Note: + On Ascend platform, only group convolution in depthwise convolution scenarios is supported. + That is, when `group > 1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. + + 'NCHW' format is supported only with GPU target device as of now. + + Args: + in_channels (int): The channel number of the input tensor of the Conv2d layer. + out_channels (int): The channel number of the output tensor of the Conv2d layer. + kernel_size (Union[int, tuple[int]]): Specifies the height and width of the 2D convolution kernel. + The data type is an integer or a tuple of two integers. An integer represents the height + and width of the convolution kernel. A tuple of two integers represents the height + and width of the convolution kernel respectively. + stride (Union[int, tuple[int]]): The movement stride of the 2D convolution kernel. + The data type is an integer or a tuple of two integers. An integer represents the movement step size + in both height and width directions. A tuple of two integers represents the movement step size in + the height and width directions respectively. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (Union[int, tuple[int]]): The number of padding on the height and width directions of the input. + The data type is an integer or a tuple of four integers. If `padding` is an integer, + then the top, bottom, left, and right padding are all equal to `padding`. + If `padding` is a tuple of 4 integers, then the top, bottom, left, and right padding + is equal to `padding[0]`, `padding[1]`, `padding[2]`, and `padding[3]` respectively. + The value should be greater than or equal to 0. Default: 0. + dilation (Union[int, tuple[int]]): Dilation size of 2D convolution kernel. + The data type is an integer or a tuple of two integers. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` on the height and width directions is in range of [1, H] + and [1, W] respectively. Default: 1. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. If the group is equal to `in_channels` and `out_channels`, + this 2D convolution layer also can be called 2D depthwise convolution layer. Default: 1. + has_bias (bool): Whether the Conv2d layer has a bias parameter. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. + Default: 'NCHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, H_{in}, W_{in})` \ + or :math:`(2, N, H_{in}, W_{in}, C_{in})`. + + Outputs: + Tensor of shape :math:`(2, N, C_{out}, H_{out}, W_{out})` or :math:`(2, N, H_{out}, W_{out}, C_{out})`. + + pad_mode is 'same': + + .. math:: + \begin{array}{ll} \\ + H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[0]}}} \right \rceil \\ + W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[1]}}} \right \rceil \\ + \end{array} + + pad_mode is 'valid': + + .. math:: + \begin{array}{ll} \\ + H_{out} = \left \lceil{\frac{H_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) } + {\text{stride[0]}}} \right \rceil \\ + W_{out} = \left \lceil{\frac{W_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) } + {\text{stride[1]}}} \right \rceil \\ + \end{array} + + pad_mode is 'pad': + + .. math:: + \begin{array}{ll} \\ + H_{out} = \left \lfloor{\frac{H_{in} + padding[0] + padding[1] - (\text{kernel_size[0]} - 1) \times + \text{dilation[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} + padding[2] + padding[3] - (\text{kernel_size[1]} - 1) \times + \text{dilation[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\ + \end{array} + + Raises: + TypeError: If `in_channels`, `out_channels` or `group` is not an int. + TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int not a tuple. + ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + ValueError: If `padding` is a tuple whose length is not equal to 4. + ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0). + ValueError: If `data_format` is neither 'NCHW' not 'NHWC'. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.dual import Conv2d + >>> from mindspore import Tensor + >>> w = Tensor(np.random.random((2, 128, 3, 7, 7)).astype(np.float32)) + >>> b = Tensor(np.random.random((2, 128)).astype(np.float32)) + >>> net = Conv2d( + >>> in_channels=3, out_channels=128, kernel_size=7, stride=2, padding=3, + >>> pad_mode='pad', weight_init=w, bias_init=b, has_bias=True + >>> ) + >>> inp = Tensor(np.random.random((2, 16, 3, 224, 224)).astype(np.float32)) + >>> out = net(inp) + >>> print(out.shape) + (2, 16, 128, 112, 112) + """ + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: _size_2_t, + stride: _size_2_t = 1, + pad_mode: str = 'same', + padding: _size_2_t = 0, + dilation: _size_2_t = 1, + group: int = 1, + has_bias: bool = False, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + data_format: str = 'NCHW') -> None: + super(Conv2d, self).__init__(HConv2d, + ConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init, + data_format=data_format) + + +class Conv1d(_UniformOperator): + r""" + 1D convolution layer on the dual-valued input. + + Calculates the 1D convolution on the input tensor which is typically of shape :math:`(2, N, C_{in}, L_{in})`, + where :math:`N` is batch size, :math:`C_{in}` is a number of channels and :math:`L_{in}` is a length of sequence. + The formula is defined as: + + .. math:: + + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{inp}(N_i, k)}) + + where :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of + the output and :math:`j` is in the range of :math:`[0, C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)` + is a convolution kernel slice with shape :math:`\text{kernel_size}`, where :math:`\text{kernel_size}` + is the width of the convolution kernel. :math:`\text{bias}` is the bias parameter, + and :math:`\text{inp}` is the input tensor. The shape of full convolution kernel is + :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size})`, + where `group` is the number of groups to split the input `inp` in the channel dimension. + :math:`ccor` is the dual-valued `cross-correlation `_. + This implies the operation as: + + .. math:: + \text{Re(ccor)} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)}) + \text{Re(bias)}\\ + \text{Du(ccor)} = \text{ccor}(\text{Du(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Re(kernel)}, \text{Du(inp)}) + \text{Du(bias)} + + where and :math:`ccor` is the real-valued `cross-correlation `_, + :math:`inp` is the dual input tensors, :math:`\text{kernel}` is a dual weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a dual bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{Re(...)}` and + :math:`\text{Du(...)}` are respectively real and dual parts of the dual-valued expression inside the parentheses. + + For more details, please refers to the paper `Gradient Based Learning Applied to Document + Recognition `_. + + Note: + On Ascend platform, only group convolution in depthwise convolution scenarios is supported. + That is, when `group > 1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. + + Args: + in_channels (int): The channel number of the input tensor of the Conv1d layer. + out_channels (int): The channel number of the output tensor of the Conv1d layer. + kernel_size (int): Specifies the width of the 1D convolution kernel. + stride (int): The movement stride of the 1D convolution kernel. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (int): The number of padding on both sides of input. + The value should be greater than or equal to 0. Default: 0. + dilation (int): Dilation size of 1D convolution kernel. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` is in range of [1, L]. Default: 1. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. Default: 1. + has_bias (bool): Whether the Conv1d layer has a bias parameter. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, L_{in})`. + + Outputs: + Tensor of shape :math:`(2, N, C_{out}, L_{out})`. + + pad_mode is 'same': + + .. math:: + L_{out} = \left \lceil{\frac{L_{in}}{\text{stride}}} \right \rceil + + pad_mode is 'valid': + + .. math:: + L_{out} = \left \lceil{\frac{L_{in} - \text{dilation} \times (\text{kernel_size} - 1) } + {\text{stride}}} \right \rceil + + pad_mode is 'pad': + + .. math:: + L_{out} = \left \lfloor{\frac{L_{in} + 2 \times padding - (\text{kernel_size} - 1) \times + \text{dilation} - 1 }{\text{stride}} + 1} \right \rfloor + + Raises: + TypeError: If `in_channels`, `out_channels`, `kernel_size`, `stride`, `padding` or `dilation` is not an int. + ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.dual import Conv1d + >>> from mindspore import Tensor + >>> w = Tensor(np.random.random((2, 16, 1, 6)).astype(np.float32)) + >>> b = Tensor(np.random.random((2, 16)).astype(np.float32)) + >>> net = Conv1d( + >>> in_channels=1, out_channels=16, kernel_size=6, stride=2, padding=2, + >>> pad_mode='pad', weight_init=w, bias_init=b, has_bias=True + >>> ) + >>> u = Tensor(np.random.random((2, 8, 1, 4096)).astype(np.float32)) + >>> out = net(u) + >>> print(out.shape) + (2, 8, 16, 2048) + """ + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: _size_1_t, + stride: _size_1_t = 1, + pad_mode: str = 'same', + padding: _size_1_t = 0, + dilation: _size_1_t = 1, + group: int = 1, + has_bias: bool = False, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros') -> None: + super(Conv1d, self).__init__(HConv1d, + ConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init) + + +class Conv3d(_UniformOperator): + r""" + 3D convolution layer on the dual-valued input. + + Calculates the 3D convolution on the input tensor which is typically of shape + :math:`(2, N, C_{in}, D_{in}, H_{in}, W_{in})`, + where :math:`N` is batch size, :math:`C_{in}` is a number of channels, + :math:`D_{in}, H_{in}, W_{in}` are the depth, height and width of the feature layer respectively. + The formula is defined as: + + .. math:: + + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{inp}(N_i, k)}) + + where :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of + the output and :math:`j` is in the range of :math:`[0,C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)` + is a convolution kernel slice with shape + :math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`, + where :math:`\text{kernel_size[0]}`, :math:`\text{kernel_size[1]}` and :math:`\text{kernel_size[2]}` are + the depth, height and width of the convolution kernel respectively. :math:`\text{bias}` is the bias parameter + and :math:`\text{inp}` is the input tensor. + The shape of full convolution kernel is + :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`, + where `group` is the number of groups to split the input `x` in the channel dimension. + :math:`hccor` is the dual-valued `cross-correlation `_. + This implies the operation as: + + .. math:: + \text{Re(ccor)} = \text{ccor}(\text{Re(kernel)}, \text{Re(inp)}) + \text{Re(bias)}\\ + \text{Du(ccor)} = \text{ccor}(\text{Du(kernel)}, \text{Re(inp)}) + + \text{ccor}(\text{Re(kernel)}, \text{Du(inp)}) + \text{Du(bias)} + + where and :math:`ccor` is the real-valued `cross-correlation `_, + :math:`inp` is the dual input tensors, :math:`\text{kernel}` is a dual weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a dual bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{Re(...)}` + and :math:`\text{Du(...)}` are respectively real and dual parts of the dual-valued expression inside + the parentheses. + + For more details, please refers to the paper `Gradient Based Learning Applied to Document + Recognition `_. + + Note: + On Ascend platform, only group convolution in depthwise convolution scenarios is supported. + That is, when `group>1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. + + Args: + in_channels (int): The channel number of the input tensor of the Conv3d layer. + out_channels (int): The channel number of the output tensor of the Conv3d layer. + kernel_size (Union[int, tuple[int]]): Specifies the depth, height and width of the 3D convolution kernel. + The data type is an integer or a tuple of three integers. An integer represents the depth, height + and width of the convolution kernel. A tuple of three integers represents the depth, height + and width of the convolution kernel respectively. + stride (Union[int, tuple[int]]): The movement stride of the 3D convolution kernel. + The data type is an integer or a tuple of three integers. An integer represents the movement step size + in depth, height and width directions. A tuple of three integers represents the movement step size + in the depth, height and width directions respectively. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (Union(int, tuple[int])): The number of padding on the depth, height and width directions of the input. + The data type is an integer or a tuple of six integers. If `padding` is an integer, + then the head, tail, top, bottom, left, and right padding are all equal to `padding`. + If `padding` is a tuple of six integers, then the head, tail, top, bottom, left, and right padding + is equal to `padding[0]`, `padding[1]`, `padding[2]`, `padding[3]`, `padding[4]` and `padding[5]` + respectively. The value should be greater than or equal to 0. Default: 0. + dilation (Union[int, tuple[int]]): Dilation size of 3D convolution kernel. + The data type is an integer or a tuple of three integers. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` on the depth, height and width directions is in range of + [1, D], [1, H] and [1, W] respectively. Default: 1. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. Default: 1. Only 1 is currently supported. + has_bias (bool): Whether the Conv3d layer has a bias parameter. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + data_format (str): The optional value for data format. Currently only support "NCDHW". + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, D_{in}, H_{in}, W_{in})`. + Currently input data type only support float16 and float32. + + Outputs: + Tensor of shape is :math:`(2, N, C_{out}, D_{out}, H_{out}, W_{out})`. + + pad_mode is 'same': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lceil{\frac{D_{in}}{\text{stride[0]}}} \right \rceil \\ + H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[1]}}} \right \rceil \\ + W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\ + \end{array} + + + pad_mode is 'valid': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lfloor{\frac{D_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) } + {\text{stride[0]}} + 1} \right \rfloor \\ + H_{out} = \left \lfloor{\frac{H_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) } + {\text{stride[1]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} - \text{dilation[2]} \times (\text{kernel_size[2]} - 1) } + {\text{stride[2]}} + 1} \right \rfloor \\ + \end{array} + + pad_mode is 'pad': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lfloor{\frac{D_{in} + padding[0] + padding[1] - (\text{dilation[0]} - 1) \times + \text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\ + H_{out} = \left \lfloor{\frac{H_{in} + padding[2] + padding[3] - (\text{dilation[1]} - 1) \times + \text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} + padding[4] + padding[5] - (\text{dilation[2]} - 1) \times + \text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\ + \end{array} + + Raises: + TypeError: If `in_channels`, `out_channels` or `group` is not an int. + TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int nor a tuple. + ValueError: If `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + ValueError: If `padding` is a tuple whose length is not equal to 6. + ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0, 0, 0). + ValueError: If `data_format` is not 'NCDHW'. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.dual import Conv3d + >>> from mindspore import Tensor + >>> w = Tensor(np.random.random((2, 128, 3, 3, 3, 3)).astype(np.float32)) + >>> b = Tensor(np.random.random((2, 128)).astype(np.float32)) + >>> net = Conv3d( + >>> in_channels=3, out_channels=128, kernel_size=3, stride=1, padding=1, + >>> pad_mode='pad', weight_init=w, bias_init=b, has_bias=True + >>> ) + >>> u = Tensor(np.random.random((2, 64, 3, 32, 32, 32)).astype(np.float32)) + >>> out = net(u) + >>> print(out.shape) + (2, 64, 128, 32, 32, 32) + """ + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: _size_3_t, + stride: _size_3_t = 1, + pad_mode: str = 'same', + padding: _size_3_t = 0, + dilation: _size_3_t = 1, + group: int = 1, + has_bias: bool = False, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + data_format: str = 'NCDHW') -> None: + super(Conv3d, self).__init__(HConv3d, + ConvImpl, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + pad_mode=pad_mode, + padding=padding, + dilation=dilation, + group=group, + has_bias=has_bias, + weight_init=weight_init, + bias_init=bias_init, + data_format=data_format) + + +class BatchNorm1d(_UniformOperator): + r""" + The dual-valued Batch Normalization layer over a second-order dual input of four dimensions, including + one spatial dimension, or three dimensions. + + This layer applies Batch Normalization over a dual input to reduce internal covariate shift. + Batch Normalization is widely used in convolutional networks. It rescales and recenters the feature using + a mini-batch of data and the learned parameters which can be described by the following formula: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + out = \frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}\\ + \text{Re(\hat{out})} = \text{Re(out)} * \text{Re(\gamma)} + \text{Re(\beta)}\\ + \text{Du(\hat{out})} = \text{Re(out)} * \text{Du(\gamma)} + + \text{Du(out)} * \text{Re(\gamma)} + \text{Du(\beta)}, + \end{align} + + where :math:`inp` is the dual input tensors, :math:`\mathrm{E}[inp]` is the arithmetic mean of the input tensor + over the batch dimension, :math:`\mathrm{Var}[inp]` is the statistical variance of the input tensor over spatial + dimensions, based on the dual norm :math:`\|x+\epsilon y\|=\left|\frac{y}{2}\right|+\sqrt{x^2+\frac{y^2}{4}}`, + :math:`\gamma` and :math:`\beta` are dual learnable parameters representing the scale and shift coefficients + respectively, and :math:`\delta` is a small positive constant, which is needed to avoid division by zero in case + statistical variance is close to zero. :math:`\text{Re(...)}` and :math:`\text{Du(...)}` are respectively real + and dual parts of the dual-valued expression inside the parentheses. + + Args: + num_features (int): The number of features in the input space. + eps (float): A small positive threshold, which is needed to avoid division by zero. Default: :math:`10^{-5}` + momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. + Default: 0.9. + affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + use_batch_statistics (bool): + + - If True, use the mean value and variance value of current batch data and track running mean + and running variance. + - If False, use the mean value and variance value of specified value, and not track statistical value. + - If None, the use_batch_statistics is automatically set to True or False according to the training + and evaluation mode. During training, the parameter is set to True, and during evaluation, the + parameter is set to False. Default: None. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, W)` or :math:`(2, N, C)`. + '2' denotes that the input tensor belongs to the dual domain and has got a real and + a dual parts. The `num_features` in `Args` has to be equal to :math:`C` in `Inputs`. + + Outputs: + Tensor, the normalized, scaled, offset tensor of the same shape as :math:`u`: + :math:`(2, N, C, W)` or :math:`(2, N, C)` + + Raises: + TypeError: If `num_features` is not an int. + TypeError: If `eps` is not a float. + ValueError: If `num_features` is less than 1. + ValueError: If `momentum` is not in range [0, 1]. + ValueError: if 'inp' is not a Tensor of 3 or 4 dimensions. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.dual import BatchNorm1d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32)).astype(np.float32)) + >>> bn = BatchNorm1d(64) + >>> y = bn(u) + >>> print(y.shape) + (2, 8, 64, 32) + """ + + def __init__(self, + num_features: int, + eps: float = 1e-5, + momentum: float = 0.9, + affine: bool = True, + gamma_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + beta_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_mean_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_var_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + use_batch_statistics: bool = True) -> None: + super(BatchNorm1d, self).__init__(HBatchNorm1d, + BatchNormImpl, + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + gamma_init=gamma_init, + beta_init=beta_init, + moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, + use_batch_statistics=use_batch_statistics) + + +class BatchNorm2d(_UniformOperator): + r""" + The dual-valued Batch Normalization layer over a second-order dual input of five dimensions, including + two spatial dimensions. + + This layer applies Batch Normalization over a dual input to reduce internal covariate shift. + Batch Normalization is widely used in convolutional networks. It rescales and recenters the feature using + a mini-batch of data and the learned parameters which can be described by the following formula: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + out = \frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}\\ + \text{Re(\hat{out})} = \text{Re(out)} * \text{Re(\gamma)} + \text{Re(\beta)}\\ + \text{Du(\hat{out})} = \text{Re(out)} * \text{Du(\gamma)}\ + + \text{Du(out)} * \text{Re(\gamma)} + \text{Du(\beta)}, + \end{align} + + where :math:`inp` is the dual input tensors, :math:`\mathrm{E}[inp]` is the arithmetic mean of the input tensor + over the batch dimension, :math:`\mathrm{Var}[inp]` is the statistical variance of the input tensor over spatial + dimensions, based on the dual norm :math:`\|x+\epsilon y\|=\left|\frac{y}{2}\right|+\sqrt{x^2+\frac{y^2}{4}}`, + :math:`\gamma` and :math:`\beta` are dual learnable parameters representing the scale and shift coefficients + respectively, and :math:`\delta` is a small positive constant, which is needed to avoid division by zero in case + statistical variance is close to zero. :math:`\text{Re(...)}` and :math:`\text{Du(...)}` are respectively real + and dual parts of the dual-valued expression inside the parentheses. + + Args: + num_features (int): The number of features in the input space. + eps (float): A small positive threshold, which is needed to avoid division by zero. Default: :math:`10^{-5}` + momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. + Default: 0.9. + affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + use_batch_statistics (bool): + + - If True, use the mean value and variance value of current batch data and track running mean + and running variance. + - If False, use the mean value and variance value of specified value, and not track statistical value. + - If None, the use_batch_statistics is automatically set to True or False according to the training + and evaluation mode. During training, the parameter is set to True, and during evaluation, the + parameter is set to False. Default: None. + data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. Default: 'NCHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, H, W)` if data_format is 'NCHW', or + :math:`(2, N, H, W, C)` if data_format is 'NHWC'. '2' denotes that the input tensor belongs to the dual + domain and has got a real and a dual parts. The `num_features` in `Args` has to be equal to + :math:`C` in `Inputs`. + + Outputs: + Tensor, the normalized, scaled, offset tensor of the same shape as :math:`u`: + :math:`(2, N, C, H, W)` if data_format is 'NCHW', or :math:`(2, N, H, W, C)` if data_format is 'NHWC'. + + Raises: + TypeError: If `num_features` is not an int. + TypeError: If `eps` is not a float. + ValueError: If `num_features` is less than 1. + ValueError: If `momentum` is not in range [0, 1]. + ValueError: If `data_format` is neither 'NHWC' not 'NCHW'. + ValueError: if 'inp' is not a Tensor of 5 dimensions. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.dual import BatchNorm2d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32, 32)).astype(np.float32)) + >>> bn = BatchNorm2d(64) + >>> y = bn(u) + >>> print(y.shape) + (2, 8, 64, 32, 32) + """ + + def __init__(self, + num_features: int, + eps: float = 1e-5, + momentum: float = 0.9, + affine: bool = True, + gamma_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + beta_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_mean_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_var_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + use_batch_statistics: bool = True, + data_format='NCHW') -> None: + super(BatchNorm2d, self).__init__(HBatchNorm2d, + BatchNormImpl, + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + gamma_init=gamma_init, + beta_init=beta_init, + moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, + use_batch_statistics=use_batch_statistics, + data_format=data_format) + + +class BatchNorm3d(_UniformOperator): + r""" + The dual-valued Batch Normalization layer over a second-order dual input of six dimensions, including + three spatial dimensions. + + This layer applies Batch Normalization over a dual input to reduce internal covariate shift. + Batch Normalization is widely used in convolutional networks. It rescales and recenters the feature using + a mini-batch of data and the learned parameters which can be described by the following formula: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + out = \frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}\\ + \text{Re(\hat{out})} = \text{Re(out)} * \text{Re(\gamma)} + \text{Re(\beta)}\\ + \text{Du(\hat{out})} = \text{Re(out)} * \text{Du(\gamma)} + + \text{Du(out)} * \text{Re(\gamma)} + \text{Du(\beta)}, + \end{align} + + where :math:`inp` is the dual input tensors, :math:`\mathrm{E}[inp]` is the arithmetic mean of the input tensor + over the batch dimension, :math:`\mathrm{Var}[inp]` is the statistical variance of the input tensor over spatial + dimensions, based on the dual norm :math:`\|x+\epsilon y\|=\left|\frac{y}{2}\right|+\sqrt{x^2+\frac{y^2}{4}}`, + :math:`\gamma` and :math:`\beta` are dual learnable parameters representing the scale and shift coefficients + respectively, and :math:`\delta` is a small positive constant, which is needed to avoid division by zero in case + statistical variance is close to zero. :math:`\text{Re(...)}` and :math:`\text{Du(...)}` are respectively real + and dual parts of the dual-valued expression inside the parentheses. + + Args: + num_features (int): The number of features in the input space. + eps (float): A small positive threshold, which is needed to avoid division by zero. Default: :math:`10^{-5}` + momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. + Default: 0.9. + affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + use_batch_statistics (bool): + + - If True, use the mean value and variance value of current batch data and track running mean + and running variance. + - If False, use the mean value and variance value of specified value, and not track statistical value. + - If None, the use_batch_statistics is automatically set to True or False according to the training + and evaluation mode. During training, the parameter is set to True, and during evaluation, the + parameter is set to False. Default: None. + data_format (str): The optional value for data format. Only 'NCDHW' format is supported as of now. + Default: 'NCDHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, D, H, W)`. '2' denotes that the input tensor belongs + to the dual domain and has got a real and a dual parts. The `num_features` in `Args` has to be equal + to :math:`C` in `Inputs`. + + Outputs: + Tensor, the normalized, scaled, offset tensor of the same shape :math:`(2, N, C, D, H, W)` as :math:`u`. + + Raises: + TypeError: If `num_features` is not an int. + TypeError: If `eps` is not a float. + ValueError: If `num_features` is less than 1. + ValueError: If `momentum` is not in range [0, 1]. + ValueError: If `data_format` is not 'NCDHW'. + ValueError: if 'inp' is not a Tensor of 6 dimensions. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.dual import BatchNorm3d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32, 32, 32)).astype(np.float32)) + >>> bn = BatchNorm3d(64) + >>> y = bn(u) + >>> print(y.shape) + (2, 8, 64, 32, 32, 32) + """ + + def __init__(self, + num_features: int, + eps: float = 1e-5, + momentum: float = 0.9, + affine: bool = True, + gamma_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + beta_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_mean_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_var_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + use_batch_statistics: bool = True, + data_format='NCDHW') -> None: + super(BatchNorm3d, self).__init__(HBatchNorm3d, + BatchNormImpl, + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + gamma_init=gamma_init, + beta_init=beta_init, + moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, + use_batch_statistics=use_batch_statistics, + data_format=data_format) + + +class Dense(_UniformOperator): + r""" + The dual-valued dense connected layer. + + Applies dense connected layer for the dual-valued input. This layer implements the operation as: + + .. math:: + \begin{align} + \text{Re(out)} = \text{Re(inp)} * \text{Re(kernel)} + \text{Re(bias)}\\ + \text{Du(out)} = \text{Re(inp)} * \text{Du(kernel)} + \text{Du(inp)} * \text{Re(kernel)} + \text{Du(bias)}, + \end{align} + + where :math:`inp` is the dual input tensors, :math:`\text{kernel}` is a dual weight matrix with the same + data type as the :math:`inp` created by the layer, and :math:`\text{bias}` is a dual bias vector with the same + data type as the :math:`inp` created by the layer (only if has_bias is True). :math:`\text{Re(...)}` and + :math:`\text{Du(...)}` are respectively real and dual parts of the dual-valued expression inside the parentheses. + + Args: + in_channels (int): The number of channels in the input space. + out_channels (int): The number of channels in the output space. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is + same as `inp`. The values of str refer to the function `initializer`. Default: 'zeros'. + has_bias (bool): Specifies whether the layer uses a bias vector. Default: True. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, *, ..., *, in\_channels)`. '2' denotes that the input tensor + belongs to the dual domain and has got a real and a dual parts. The `in_channels` in `Args` + has to be equal to :math:`in\_channels` in `Inputs`. The count of mediator dimensions denoted by '*' is + arbitrary but must be at least one. + + Outputs: + Tensor of shape :math:`(2, *, ..., *, out\_channels)`. The count of mediator dimensions is the same as one + in 'Inputs'. + + Raises: + TypeError: If `in_channels` or `out_channels` is not an int. + TypeError: If `has_bias` is not a bool. + ValueError: If length of shape of `weight_init` is not equal to 3, + or shape[0] of 'weight_init' is not equal to 2, + or shape[1] of `weight_init` is not equal to `out_channels`, + or shape[2] of `weight_init` is not equal to `in_channels`. + ValueError: If length of shape of `bias_init` is not equal to 2, + or shape[0] of 'bias_init' is not equal to 2, + or shape[1] of `bias_init` is not equal to `out_channels`. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.dual import Dense + >>> from mindspore import Tensor + >>> w = Tensor(np.random.random((2, 7, 5)).astype(np.float32)) + >>> b = Tensor(np.random.random((2, 7)).astype(np.float32)) + >>> net = Dense(in_channels=5, out_channels=7, weight_init=w, bias_init=b, has_bias=True) + >>> u = Tensor(np.random.random((2, 34, 1, 5)).astype(np.float32)) + >>> out = net(u) + >>> print(out.shape) + (2, 34, 1, 7) + """ + + def __init__(self, + in_channels: int, + out_channels: int, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + has_bias: bool = True) -> None: + super(Dense, self).__init__(HDense, + DenseImpl, + in_channels=in_channels, + out_channels=out_channels, + weight_init=weight_init, + bias_init=bias_init, + has_bias=has_bias) diff --git a/mindspore/python/mindspore/hypercomplex/hypercomplex/_hc_bn_impl.py b/mindspore/python/mindspore/hypercomplex/hypercomplex/_hc_bn_impl.py new file mode 100644 index 00000000000..0d20373633e --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/hypercomplex/_hc_bn_impl.py @@ -0,0 +1,339 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Hypercomplex BatchNorm Implementation""" +import numbers +from typing import Union, Tuple +from abc import abstractmethod +import mindspore.nn as nn +from mindspore.common.initializer import initializer, Initializer +from mindspore.common.parameter import Parameter +from mindspore.common.tensor import Tensor + + +class _BatchNormImpl(nn.Cell): + r""" + The interface of the implementor part of batch normalization layer on the second-order hypercomplex numbers. + + Defines the API for getting the norm of hypercomplex number, applying scaling and shift to a hypercomplex tensor, + and updating the running mean and variance, which are used during inference. The API is used by the 'BatchNorm' + class, and it must be implemented separately for every hypercomplex algebra: + + Args: + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. + num_features (int): The number of features in the input space. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + gamma_init: Union[Tensor, str, Initializer, numbers.Number], + beta_init: Union[Tensor, str, Initializer, numbers.Number], + num_features: int) -> None: + super(_BatchNormImpl, self).__init__() + + @abstractmethod + def get_norm(self, u: Tensor) -> Tensor: + r""" + Calculates norm of a hypercomplex elements of an input tensor. + + Norm is a non-negative real number that is a characteristic of 'magnitude' of that number, i.e. how far away it + is from zero. + + Args: + u (Tensor): Tensor of shape (2, *, ..., *). '2' denotes that the input tensor belongs to the hypercomplex + domain and has a real and a hypercomplex parts. + + Returns: + Tensor of shape (*, ..., *). The count and size of dimensions of the output tensor are the same ones as in + the input tensor, but without the very first dimension because the output tensor is real-valued. + """ + + @abstractmethod + def get_square_norm(self, u: Tensor) -> Tensor: + r""" + Calculates element-wise squared norm of hypercomplex elements of an input tensor. + + Norm is a non-negative real number that is a characteristic of 'magnitude' of that number, i.e. how far away it + is from zero. + + Args: + u (Tensor): Tensor of shape (2, *, ..., *). '2' denotes that the input tensor belongs to the hypercomplex + domain and has a real and a hypercomplex parts. + + Returns: + Tensor of shape (*, ..., *). The count and size of dimensions of the output tensor are the same ones as in + the input tensor, but without the very first dimension because the output tensor is real-valued. + """ + + @abstractmethod + def scale_and_shift(self, + u_x: Tensor, + u_y: Tensor, + scale_x: Tensor, + scale_y: Tensor, + shift_x: Tensor, + shift_y: Tensor) -> Tuple[Tensor, Tensor]: + r""" + Applies hypercomplex scaling and shift to an input tensor. + + This function implements the operation as: + + .. math:: + \text{out} = \text{mul}(\text{inp}, \text{scale}) + \text{shift}, + + where :math:`inp` is the hypercomplex input tensors, :math:`\text{mul}` is the channel-wise scaling operation, + which depends on the type of the number system and provided by subclassess, :math:`\text{scale}` is + a hypercomplex scaling vector with the same data type as the :math:`inp` created by the layer, and + :math:`\text{shift}` is a hypercomplex bias vector with the same data type as the :math:`inp` created by + the layer. + + Args: + u_x (Tensor): A tensor of shape (C,), which represents the real part of the normalized inputs. + u_y (Tensor): A tensor of shape (C,), which represents the hypercomplex part of the normalized inputs. + scale_x (Tensor): A tensor of shape (C,), which represents the real part of the scaling vector. + scale_y (Tensor): A tensor of shape (C,), which represents the hypercomplex part of the scaling vector. + shift_x (Tensor): A tensor of shape (C,), which represents the real part of the bias vector. + shift_y (Tensor): A tensor of shape (C,), which represents the hypercomplex part of the bias vector. + + Returns: + Tuple of two tensors of shape (C,), which contains the real and the hypercomplex parts of rescaled and + recentered inputs. + """ + + @abstractmethod + def calculate_bn(self, + u_centered_x: Tensor, + u_centered_y: Tensor, + sigma: Tensor) -> Tuple[Tensor, Tensor]: + r""" + Given a hypercomplex centered input tensor and the standard deviation of its elements, computes the + corresponding rescaled and recentered tensor with normalized variance. + + This function implements the operation as: + + .. math:: + \text{out} = \text{mul}(\text{inp}, \frac{\text{scale}}{\sigma}) + \text{shift}, + + where :math:`inp` is the hypercomplex input tensors centered over spatial and mini-batch dimensions, + :math:`\sigma` is standard deviation of the input tensors over the same dimensions, :math:`\text{mul}` is a + channel-wise scaling operation, which depends on the type of the number system and provided by subclassess, + :math:`\text{scale}` is a hypercomplex scaling vector with the same data type as the :math:`inp` created + by the layer, and :math:`\text{shift}` is a hypercomplex bias vector with the same data type as the + :math:`inp` created by the layer. + + Args: + u_centered_x (Tensor): A tensor of shape (C,), which represents the real part of the centered inputs. + u_centered_y (Tensor): A tensor of shape (C,), which represents the hypercomplex part of the + centered inputs. + sigma (Tensor): A tensor of shape (C,), which represents the statistical standard deviation of the inputs. + + Returns: + Tuple of two tensors of shape (C,), which contains the real and the hypercomplex parts of rescaled and + recentered normalized inputs. + """ + + @abstractmethod + def calculate_infer_bn(self, + moving_mean_x: Tensor, + moving_mean_y: Tensor, + moving_sigma: Tensor, + u_x: Tensor, + u_y: Tensor) -> Tuple[Tensor, Tensor]: + r""" + Given a hypercomplex input tensor, computes the corresponding rescaled and recentered normalized tensor. + + This function is supposed to be used during inference. The mean and standard deviation are accumulated during + the training phase. The function implements the operation as: + + .. math:: + \text{out} = \text{mul}(\text{inp}, \frac{\text{scale}}{\sigma}) + + \left(\text{mul}(-\mathrm{E}[inp], \frac{\text{scale}}{\sigma})+\text{shift}\right), + + where :math:`inp` is the hypercomplex input tensors, :math:`\sigma` is the accumulated standard deviation of + the input tensors over spatial and mini-batch dimensions, :math:`\mathrm{E}[inp]` is the accumulated arithmetic + mean of the input tensor over the same dimensions,:math:`\text{mul}` is a channel-wise scaling operation, which + depends on the type of the number system and provided by subclassess, :math:`\text{scale}` is a hypercomplex + scaling vector with the same data type as the :math:`inp` created by the layer, and :math:`\text{shift}` is a + hypercomplex bias vector with the same data type as the :math:`inp` created by the layer. + + Args: + moving_mean_x (Tensor): A tensor of shape (C,), which represents the real part of the accumulated + arithmetic mean of inputs. + moving_mean_y (Tensor): A tensor of shape (C,), which represents the hypercomplex part of the accumulated + arithmetic mean of inputs. + moving_sigma (Tensor): A tensor of shape (C,), which represents the accumulated statistical standard + deviation of inputs. + u_x (Tensor): A tensor of shape (C,), which represents the real part of the input tensor. + u_y (Tensor): A tensor of shape (C,), which represents the hypercomplex part of the input tensor. + + Returns: + Tuple of two tensors of shape (C,), which contains the real and the hypercomplex parts of normalized, + rescaled and recentered inputs. + """ + + +class _BaseBatchNormImpl(_BatchNormImpl): + r""" + The base implementor part of the batch normalization layer for all the hypercomplex numbers of the second order. + + Contains initialization and processing logic, which are shared by all specific implementations of the + 'BatchNormImpl' interface for dual, double, and complex numbers. + + Args: + affine (bool) - A bool value. When set to True, gamma and beta can be learned. + use_batch_statistics (bool): If true, use the mean value and variance value of current batch data. If false, + use the mean value and variance value of specified value. If None, the training process will use the mean + and variance of current batch data and track the running mean and variance, the evaluation process will use + the running mean and variance. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. + num_features (int): The number of features in the input space. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + affine: bool, + use_batch_statistics: bool, + gamma_init: Union[Tensor, str, Initializer, numbers.Number], + beta_init: Union[Tensor, str, Initializer, numbers.Number], + num_features: int) -> None: + super(_BaseBatchNormImpl, self).__init__(gamma_init, + beta_init, + num_features) + self.scale_x = Parameter(initializer(gamma_init, num_features), name="scale_x", requires_grad=affine) + self.scale_y = Parameter(initializer(gamma_init, num_features), name="scale_y", requires_grad=affine) + self.shift_x = Parameter(initializer(beta_init, num_features), name="shift_x", requires_grad=affine) + self.shift_y = Parameter(initializer(beta_init, num_features), name="shift_y", requires_grad=affine) + + def calculate_infer_bn(self, + moving_mean_x: Tensor, + moving_mean_y: Tensor, + moving_sigma: Tensor, + u_x: Tensor, + u_y: Tensor) -> Tuple[Tensor, Tensor]: + r""" + Given a hypercomplex input tensor, computes the corresponding rescaled and recentered normalized tensor. + + This function is supposed to be used during inference. The mean and standard deviation are accumulated during + the training phase. The function implements the operation as: + + .. math:: + \text{out} = \text{mul}(\text{inp}, \frac{\text{scale}}{\sigma}) + + \left(\text{mul}(-\mathrm{E}[inp], \frac{\text{scale}}{\sigma})+\text{shift}\right), + + where :math:`inp` is the hypercomplex input tensors, :math:`\sigma` is the accumulated standard deviation of + the input tensors over spatial and mini-batch dimensions, :math:`\mathrm{E}[inp]` is the accumulated arithmetic + mean of the input tensor over the same dimensions,:math:`\text{mul}` is a channel-wise scaling operation, which + depends on the type of the number system and provided by subclassess, :math:`\text{scale}` is a hypercomplex + scaling vector with the same data type as the :math:`inp` created by the layer, and :math:`\text{shift}` is a + hypercomplex bias vector with the same data type as the :math:`inp` created by the layer. + + Args: + moving_mean_x (Tensor): A tensor of shape (C,), which represents the real part of the accumulated + arithmetic mean of inputs. + moving_mean_y (Tensor): A tensor of shape (C,), which represents the hypercomplex part of the accumulated + arithmetic mean of inputs. + moving_sigma (Tensor): A tensor of shape (C,), which represents the accumulated statistical standard + deviation of inputs. + u_x (Tensor): A tensor of shape (C,), which represents the real part of the input tensor. + u_y (Tensor): A tensor of shape (C,), which represents the hypercomplex part of the input tensor. + + Returns: + Tuple of two tensors of shape (C,), which contains the real and the hypercomplex parts of normalized, + rescaled and recentered inputs. + """ + fused_scale_x = self.scale_x / moving_sigma + fused_scale_y = self.scale_y / moving_sigma + neg_mean_x = (-1) * moving_mean_x + neg_mean_y = (-1) * moving_mean_y + fused_shift_x, fused_shift_y = self.scale_and_shift(neg_mean_x, + neg_mean_y, + fused_scale_x, + fused_scale_y, + self.shift_x, + self.shift_y) + out_x, out_y = self.scale_and_shift(u_x, + u_y, + fused_scale_x, + fused_scale_y, + fused_shift_x, + fused_shift_y) + return out_x, out_y + + def calculate_bn(self, + u_centered_x: Tensor, + u_centered_y: Tensor, + sigma: Tensor) -> Tuple[Tensor, Tensor]: + r""" + Given a hypercomplex centered input tensor and the standard deviation of its elements, computes the + corresponding rescaled and recentered tensor with normalized variance. + + This function implements the operation as: + + .. math:: + \text{out} = \text{mul}(\text{inp}, \frac{\text{scale}}{\sigma}) + \text{shift}, + + where :math:`inp` is the hypercomplex input tensors centered over spatial and mini-batch dimensions, + :math:`\sigma` is standard deviation of the input tensors over the same dimensions, :math:`\text{mul}` is a + channel-wise scaling operation, which depends on the type of the number system and provided by subclassess, + :math:`\text{scale}` is a hypercomplex scaling vector with the same data type as the :math:`inp` created + by the layer, and :math:`\text{shift}` is a hypercomplex bias vector with the same data type as the + :math:`inp` created by the layer. + + Args: + u_centered_x (Tensor): A tensor of shape (C,), which represents the real part of the centered inputs. + u_centered_y (Tensor): A tensor of shape (C,), which represents the hypercomplex part of the + centered inputs. + sigma (Tensor): A tensor of shape (C,), which represents the statistical standard deviation of the inputs. + + Returns: + Tuple of two tensors of shape (C,), which contains the real and the hypercomplex parts of rescaled and + recentered normalized inputs. + """ + scale_x = self.scale_x / sigma + scale_y = self.scale_y / sigma + out_x, out_y = self.scale_and_shift(u_centered_x, + u_centered_y, + scale_x, + scale_y, + self.shift_x, + self.shift_y) + return out_x, out_y + + @abstractmethod + def get_norm(self, u: Tensor) -> Tensor: + pass + + @abstractmethod + def get_square_norm(self, u: Tensor) -> Tensor: + pass + + @abstractmethod + def scale_and_shift(self, + u_x: Tensor, + u_y: Tensor, + scale_x: Tensor, + scale_y: Tensor, + shift_x: Tensor, + shift_y: Tensor) -> Tuple[Tensor, Tensor]: + pass diff --git a/mindspore/python/mindspore/hypercomplex/hypercomplex/_hc_conv_impl.py b/mindspore/python/mindspore/hypercomplex/hypercomplex/_hc_conv_impl.py new file mode 100644 index 00000000000..7e60c51af5b --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/hypercomplex/_hc_conv_impl.py @@ -0,0 +1,123 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Hypercomplex Convolution Implementation""" +import numbers +from typing import Callable, Union, Tuple + +from mindspore.common.initializer import initializer, Initializer +from mindspore.common.parameter import Parameter +from mindspore.common.tensor import Tensor +from mindspore import nn +from mindspore.hypercomplex.utils import get_x_and_y + + +class _ConvImpl(nn.Cell): + r""" + The interface of the implementor part of convolution layer on second-order hypercomplex numbers. + + Defines the API for unbiased convolution transformation, which is used by the '_ConvNd' class. The API must + be implemented separately for every hypercomplex algebra: + + .. math:: + \text{out} = \text{conv}(\text{inp}, \text{kernel}) + + where :math:`inp` is the hypercomplex input tensors, :math:`\text{conv}` is the convolution transformation + operation, which is provided by subclasses, :math:`\text{kernel}` is a hypercomplex weight matrix with the same + data type as the :math:`inp` created by the layer. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and hypercomplex parts of + the kernel. + **factory_kwargs (dict): Extra parameters which may be needed by specific subclasses. + + Inputs: + - **conv_op** (Callable) - the function of the real-valued convolution to be used with decomposition + of the hypercomplex convolution transformation. For example, mindspore.ops.operations.Conv2D(...) + may be passed for a 2D convolution. + - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the real part of the input. The exact shape depends on data format and the number of spatial + dimensions. + - **y** (Tensor) - Tensor of the same shape as `x`, which defines the real part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(N, C_{out}, *, ..., *)` or :math:`(N, *, ..., *, C_{out})`, which + represent the real and the hypercomplex parts of the output respectively. Data format and the count of spatial + dimensions are the same as in `x` and `y`. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + weight_init: Union[Tensor, str, Initializer, numbers.Number], + weight_shape: tuple, + **factory_kwargs) -> None: + super(_ConvImpl, self).__init__() + + def construct(self, + conv_op: Callable, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + pass + + +class _BaseConvImpl(_ConvImpl): + r""" + The base implementor part of the convolution layer for all the hypercomplex numbers of the second order. + + Contains initialization of the kernel tensors, which is shared by all specific implementations of the 'ConvImpl' + interface for dual, double, and complex numbers. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and hypercomplex parts of + the kernel. + **factory_kwargs (dict): Extra parameters which may be needed by specific subclasses. + + Inputs: + - **conv_op** (Callable) - the function of the real-valued convolution to be used with decomposition + of the hypercomplex convolution transformation. For example, mindspore.ops.operations.Conv2D(...) may be + passed for a 2D convolution. + - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})`, + which defines the real part of the input. The exact shape depends on data format and the number of spatial + dimensions. + - **y** (Tensor) - Tensor of the same shape as `x`, which defines the real part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(N, C_{out}, *, ..., *)` or :math:`(N, *, ..., *, C_{out})`, which + represent the real and the hypercomplex parts of the output respectively. Data format and the count of spatial + dimensions are the same as in `x` and `y`. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + weight_init: Union[Tensor, str, Initializer, numbers.Number], + weight_shape: tuple, + **factory_kwargs) -> None: + super(_BaseConvImpl, self).__init__(weight_init, + weight_shape, + **factory_kwargs) + + if isinstance(weight_init, Tensor): + weight_init_x, weight_init_y = get_x_and_y(weight_init) + else: + weight_init_x = weight_init_y = weight_init + self.weight_x = Parameter(initializer(weight_init_x, shape=weight_shape), name='weight_x') + self.weight_y = Parameter(initializer(weight_init_y, shape=weight_shape), name='weight_y') diff --git a/mindspore/python/mindspore/hypercomplex/hypercomplex/_hc_dense_impl.py b/mindspore/python/mindspore/hypercomplex/hypercomplex/_hc_dense_impl.py new file mode 100644 index 00000000000..35605862e5a --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/hypercomplex/_hc_dense_impl.py @@ -0,0 +1,114 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""hypercomplex dense implementation""" +import numbers +from typing import Callable, Union, Tuple + +from mindspore.common.initializer import initializer, Initializer +from mindspore.common.parameter import Parameter +from mindspore.common.tensor import Tensor +from mindspore import nn +from mindspore.hypercomplex.utils import get_x_and_y + + +class _DenseImpl(nn.Cell): + r""" + The interface of the implementor part of dense connected layer on second-order hypercomplex numbers. + + Defines the API for linear transformation, which is used by the 'Dense' class. The API must be implemented + seprarately for every hypercomplex algebra: + + .. math:: + \text{out} = \text{linear}(\text{inp}, \text{kernel}) + + where :math:`inp` is the hypercomplex input tensors, :math:`\text{linear}` is the linear transformation operation, + which is provided by subclasses, :math:`\text{kernel}` is a hypercomplex weight matrix with the same data type as + the :math:`inp` created by the layer. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and hypercomplex parts of + the kernel. + **factory_kwargs (dict): Extra parameters which may be needed by specific subclasses. + + Inputs: + - **matmul_op** (Callable) - the function of the real-valued matrix multiplication to be used for decomposition + of the hypercomplex linear transformation. Usually, mindspore.ops.operations.MatMul(...) is passed + - **x** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the real part of the input. + - **y** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the hypercomplex part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(*, out\_channels)`, which represents the real and the hypercomplex + part of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + weight_init: Union[Tensor, str, Initializer, numbers.Number], + weight_shape: tuple, + **factory_kwargs) -> None: + super(_DenseImpl, self).__init__() + + def construct(self, + matmul_op: Callable, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + pass + + +class _BaseDenseImpl(_DenseImpl): + r""" + The base implementor part of the dense connected layer for all the hypercomplex numbers of the second order. + + Contains initialization of the kernel tensors, which is shared by all specific implementations of the 'DenseImpl' + interface for dual, double, and complex numbers. + + Args: + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + weight_shape (tuple): The set of int numbers that defines the shape of real and hypercomplex parts of + the kernel. + **factory_kwargs (dict): Extra parameters which may be needed by specific subclasses. + + Inputs: + - **matmul_op** (Callable) - the function of the real-valued matrix multiplication to be used for decomposition + of the hypercomplex linear transformation. Usually, mindspore.ops.operations.MatMul(...) is passed + - **x** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the real part of the input. + - **y** (Tensor) - Tensor of shape :math:`(*, in\_channels)`, which defines the hypercomplex part of the input. + + Outputs: + Tuple of two tensors, each of shape :math:`(*, out\_channels)`, which represents the real and the hypercomplex + part of the output. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + weight_init: Union[Tensor, str, Initializer, numbers.Number], + weight_shape: tuple, + **factory_kwargs) -> None: + super(_BaseDenseImpl, self).__init__(weight_init, + weight_shape, + **factory_kwargs) + if isinstance(weight_init, Tensor): + weight_init_x, weight_init_y = get_x_and_y(weight_init) + else: + weight_init_x = weight_init_y = weight_init + self.weight_x = Parameter(initializer(weight_init_x, shape=weight_shape), name='weight_x') + self.weight_y = Parameter(initializer(weight_init_y, shape=weight_shape), name='weight_y') diff --git a/mindspore/python/mindspore/hypercomplex/hypercomplex/hc_bn.py b/mindspore/python/mindspore/hypercomplex/hypercomplex/hc_bn.py new file mode 100644 index 00000000000..30ca3923b6f --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/hypercomplex/hc_bn.py @@ -0,0 +1,627 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Hypercomplex batchnorm""" +import numbers +from typing import TypeVar, Type, Union, Any +from abc import abstractmethod + +import numpy as np +import mindspore +import mindspore.context as context +import mindspore.nn as nn +import mindspore.ops as P +from mindspore._checkparam import Validator as validator +from mindspore.common.parameter import Parameter +from mindspore.common.initializer import initializer, Initializer +from mindspore.common.tensor import Tensor +from mindspore.ops import functional as F +from mindspore.hypercomplex.hypercomplex._hc_bn_impl import _BatchNormImpl as BatchNormImpl +from mindspore.hypercomplex.utils import get_x_and_y, to_2channel + +TBatchNormImpl = TypeVar('TBatchNormImpl', bound=BatchNormImpl) + + +class _BatchNorm(nn.Cell): + r""" + The base class of the abstract part of Batch Normalization layer over a second-order hypercomplex input + of some number of dimensions. + + This layer applies Batch Normalization over a hypercomplex input to reduce internal covariate shift. + Batch Normalization is widely used in convolutional networks. It rescales and recenters the feature using + a mini-batch of data and the learned parameters which can be described by the following formula: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + out = \text{linear}(\frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}, \gamma) + \beta, + \end{align} + + where :math:`inp` is the hypercomplex input tensors, :math:`\text{linear}` is the linear transformation operation, + which depends on the type of the number system and provided by the implementor part of the batch normalization + layer, :math:`\mathrm{E}[inp]` is the arithmetic mean of the input tensor over the spatial and mini-batch + dimensions, :math:`\mathrm{Var}[inp]` is the statistical variance of the input tensor over the same dimensions, + :math:`\gamma` and :math:`\beta` are hypercomplex learnable parameters representing the scale and shift coefficients + respectively, and :math:`\delta` is a small positive constant, which is needed to avoid division by zero in case + statistical variance is close to zero. + + This is not a self-sufficient class. In order to construct a fully connected layer, one should instantiate a child + class and an implementor class, which acts like a bridge pattern and determines the exact set of hypercomplex + numbers. That implies the rules of multiplication and therefore affects how a linear transformation works. + + Args: + bn_impl(BatchNormImpl): The implementor object of the batch normalization layer. Essentially, the concrete + class name of this argument defines the algebra that the batch normalization layer will operate on. + num_features (int): The number of features in the input space. + eps (float): A small positive threshold, which is needed to avoid division by zero. Default: :math:`10^{-5}` + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. + Default: 0.9. + affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + use_batch_statistics (bool): + - If True, use the mean value and variance value of current batch data and track running mean + and running variance. + - If False, use the mean value and variance value of specified value, and not track statistical value. + - If None, the use_batch_statistics is automatically set to True or False according to the training + and evaluation mode. During training, the parameter is set to True, and during evaluation, the + parameter is set to False. Default: None. + data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. Default: 'NCHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, *, ..., *)` if data_format is 'NCHW', or + :math:`(2, N, *, ..., *, C)` if data_format is 'NHWC', with float16 or float32 data type. '2' denotes that + the input tensor belongs to the hypercomplex domain and has got a real and a hypercomplex parts. Or, + :math:`(N, C, *, ..., *)` if data_format is 'NCHW', or :math:`(N, *, ..., *, C)` if data_format is 'NHWC', + with complex64 data type. The `num_features` in `Args` has to be equal to :math:`C` in `Inputs`. + The count of dimensions denoted by '*' must be equal to the number of spatial dimensions. + + Outputs: + Tensor, the normalized, scaled, offset tensor of the same data type and shape as :math:`inp`: + :math:`(2, N, C, *, ..., *)` if data_format is 'NCHW', or :math:`(2, N, *, ..., *, C)` if data_format is 'NHWC', + with float16 or float32 data type. Or, :math:`(N, C, *, ..., *)` if data_format is 'NCHW', or + :math:`(N, *, ..., *, C)` if data_format is 'NHWC', with complex64 data type. + + Raises: + TypeError: If `num_features` is not an int. + TypeError: If `eps` is not a float. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If `num_features` is less than 1. + ValueError: If `momentum` is not in range [0, 1]. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + bn_impl: Type[TBatchNormImpl], + num_features: int, + eps: float = 1e-5, + momentum: float = 0.9, + affine: bool = True, + gamma_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + beta_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_mean_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_var_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + use_batch_statistics: bool = None, + data_format: str = 'NCHW') -> None: + """Initialize _BatchNorm.""" + super(_BatchNorm, self).__init__() + validator.check_value_type('num_features', num_features, [int], self.cls_name) + if num_features < 1: + raise ValueError(f"For '{self.cls_name}', the 'num_features' must be at least 1, but got {num_features}.") + + if momentum < 0 or momentum > 1: + raise ValueError(f"For '{self.cls_name}', the 'momentum' must be a number in range [0, 1], " + f"but got {momentum}.") + self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.cls_name) + if context.get_context("device_target") != "GPU" and self.format == "NHWC": + raise ValueError(f"For '{self.cls_name}', the 'NHWC' format only support in GPU target, but got device " + f"target {context.get_context('device_target')}.") + self.use_batch_statistics = use_batch_statistics + if self.use_batch_statistics is not None and not isinstance(self.use_batch_statistics, bool): + raise ValueError(f"For '{self.cls_name}', the 'use_batch_statistics' must be a boolean value or None," + f" but got {use_batch_statistics}.") + self.num_features = num_features + self.eps = eps + self.beta_init = beta_init + self.gamma_init = gamma_init + self.moving_mean_init = moving_mean_init + self.moving_var_init = moving_var_init + self.affine = affine + + self.bn_impl = bn_impl(affine, use_batch_statistics, gamma_init, beta_init, num_features) + + self.moving_mean_x = Parameter( + initializer(moving_mean_init, (num_features)), name="mean_x", requires_grad=False + ) + self.moving_mean_y = Parameter( + initializer(moving_mean_init, (num_features)), name="mean_y", requires_grad=False + ) + self.moving_sigma2 = Parameter( + initializer(moving_var_init, num_features), name="sigma2", requires_grad=False + ) + + self.parallel_mode = context.get_auto_parallel_context("parallel_mode") + + self._target = context.get_context("device_target") + self.is_graph_mode = context.get_context("mode") == context.GRAPH_MODE + self.momentum = 1.0 - momentum + + self.reduce_mean_op1 = P.ReduceMean(keep_dims=True) + self.reduce_mean_op2 = P.ReduceMean(keep_dims=False) + + self.features_dim = data_format.lower().find('c') + self.get_dtype = P.DType() + self.get_shape = P.Shape() + + def construct(self, u: Tensor) -> Tensor: + """construct""" + u_dtype = self.get_dtype(u) + u_shape = self.get_shape(u) + self._check_input_dim(u_shape, u_dtype) + if u_dtype == mindspore.complex64: + hc_axis = None + feature_axis = self.features_dim + else: + hc_axis = 0 + feature_axis = self.features_dim + 1 + + if self.training or not self.use_batch_statistics: + ndim = u.ndim + hc_axis = hc_axis + feature_axis = feature_axis + sh = np.arange(ndim) + sh = sh[sh != hc_axis] + sh = sh[sh != feature_axis] + if hc_axis is None: + u_x, u_y = get_x_and_y(u) + mu_x = self.reduce_mean_op1(u_x, sh.tolist()) + mu_y = self.reduce_mean_op1(u_y, sh.tolist()) + mu = to_2channel(mu_x, mu_y, mindspore.complex64) + else: + mu = self.reduce_mean_op1(u, sh.tolist()) + + u_centered = u - mu + norma2 = self.bn_impl.get_square_norm(u_centered) + norma_feature_axis = feature_axis if hc_axis is None or feature_axis < hc_axis else feature_axis - 1 + ndim = norma2.ndim + mean_dims = np.arange(ndim) + mean_dims = mean_dims[mean_dims != norma_feature_axis] + sigma2 = self.reduce_mean_op2(norma2, mean_dims.tolist()) + self.eps + result = self._calculate_bn(u_centered, sigma2, feature_axis) + + if self.use_batch_statistics: + momentum = self.momentum + mu = mu.squeeze() + mu_x, mu_y = get_x_and_y(mu) + momentum_suppl = 1 - momentum + self.moving_mean_x *= momentum_suppl + self.moving_mean_x += mu_x * momentum + self.moving_mean_y *= momentum_suppl + self.moving_mean_y += mu_y * momentum + self.moving_sigma2 *= momentum_suppl + self.moving_sigma2 += sigma2 * momentum + elif self.affine: + result = self._calculate_infer_bn(u, axis=feature_axis) + else: + broadcast_mu_shape = [1] * u.ndim + broadcast_mu_shape[feature_axis] = u_shape[feature_axis] + if hc_axis is not None: + broadcast_mu_shape[hc_axis] = 2 + moving_mean = to_2channel(self.moving_mean_x, self.moving_mean_y, u.dtype) + moving_mean = moving_mean.reshape(tuple(broadcast_mu_shape)) + inference_centered = u - moving_mean + result = self._calculate_bn(inference_centered, self.moving_sigma2, feature_axis) + return result + + def _calculate_bn(self, + u_centered: Tensor, + sigma2: Tensor, + axis: int) -> Tensor: + """_calculate_bn, implement the abstract function""" + sigma = P.sqrt(sigma2) + ndim = u_centered.ndim + u_shape = list(np.arange(ndim)) + u_shape[ndim - 1] = axis + u_shape[axis] = ndim - 1 + u_shape = tuple(int(i) for i in u_shape) + out = P.transpose(u_centered, u_shape) + if self.affine: + out_x, out_y = get_x_and_y(out) + out_x, out_y = self.bn_impl.calculate_bn(out_x, out_y, sigma) + out = to_2channel(out_x, out_y, self.get_dtype(u_centered)) + else: + out = out / sigma + out = P.transpose(out, u_shape) + return out + + def _calculate_infer_bn(self, + u: Tensor, + axis: int) -> Tensor: + """_calculate_infer_bn, implement the abstract function""" + ndim = u.ndim + shape = list(np.arange(ndim)) + shape[ndim-1] = axis + shape[axis] = ndim - 1 + shape = tuple(int(i) for i in shape) + + out = P.transpose(u, shape) + out_x, out_y = get_x_and_y(out) + out_x, out_y = self.bn_impl.calculate_infer_bn(self.moving_mean_x, + self.moving_mean_y, + P.sqrt(self.moving_sigma2), + out_x, + out_y) + out = to_2channel(out_x, out_y, dtype=u.dtype) + out = P.transpose(out, shape) + return out + + @abstractmethod + def _check_input_dim(self, shape: tuple, dtype: Any): + raise NotImplementedError + + +class BatchNorm1d(_BatchNorm): + r""" + The class of the abstract part of Batch Normalization layer over a second-order hypercomplex input + of four dimensions including one spatial dimension, or three dimensions. + + This layer applies Batch Normalization over a hypercomplex input of 'NCW' data format in order to reduce + internal covariate shift. Batch Normalization is widely used in convolutional networks. It rescales and recenters + the feature using a mini-batch of data and the learned parameters which can be described by the following formula: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + out = \text{linear}(\frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}, \gamma) + \beta, + \end{align} + + where :math:`inp` is the hypercomplex input tensors, :math:`\text{linear}` is the linear transformation operation, + which depends on the type of the number system and provided by the implementor part of the batch normalization + layer, :math:`\mathrm{E}[inp]` is the arithmetic mean of the input tensor over the spatial and mini-batch + dimensions, :math:`\mathrm{Var}[inp]` is the statistical variance of the input tensor over the same dimensions, + :math:`\gamma` and :math:`\beta` are hypercomplex learnable parameters representing the scale and shift coefficients + respectively, and :math:`\delta` is a small positive constant, which is needed to avoid division by zero in case + statistical variance is close to zero. + + This is not a self-sufficient class. In order to construct a fully connected layer, one should instantiate this + class and an implementor class, which acts like a bridge pattern and determines the exact set of hypercomplex + numbers. That implies the rules of multiplication and therefore affects how a linear transformation works. + + Args: + bn_impl(BatchNormImpl): The implementor object of the batch normalization layer. Essentially, the concrete + class name of this argument defines the algebra that the batch normalization layer will operate on. + num_features (int): The number of features in the input space. + eps (float): A small positive threshold, which is needed to avoid division by zero. Default: :math:`10^{-5}` + momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. + Default: 0.9. + affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + use_batch_statistics (bool): + + - If True, use the mean value and variance value of current batch data and track running mean + and running variance. + - If False, use the mean value and variance value of specified value, and not track statistical value. + - If None, the use_batch_statistics is automatically set to True or False according to the training + and evaluation mode. During training, the parameter is set to True, and during evaluation, the + parameter is set to False. Default: None. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, W)` or :math:`(2, N, C)`, with float16 or float32 data + type, or :math:`(N, C, W)` or :math:`(N, C)`, with complex64 data type. In the former case '2' denotes that + the input tensor belongs to the hypercomplex domain and has got a real and a hypercomplex parts. + The `num_features` in `Args` has to be equal to :math:`C` in `inp`. + + Outputs: + Tensor, the normalized, scaled, offset tensor of the same data type and shape as :math:`inp`: + :math:`(2, N, C, W)` or :math:`(2, N, C)`, with float16 or float32 data type, or :math:`(N, C, W)` or + :math:`(N, C)`, with complex64 data type. + + Raises: + TypeError: If `num_features` is not an int. + TypeError: If `eps` is not a float. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If `num_features` is less than 1. + ValueError: If `momentum` is not in range [0, 1]. + ValueError: if 'inp' is not a Tensor of 3 or 4 dimensions with float16 or float32 data type, and not a Tensor + of 2 or 3 dimensions with complex64 data type. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + bn_impl: Type[TBatchNormImpl], + num_features: int, + eps: float = 1e-5, + momentum: float = 0.9, + affine: bool = True, + gamma_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + beta_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_mean_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_var_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + use_batch_statistics: bool = None) -> None: + """Initialize _BatchNorm.""" + + super(BatchNorm1d, self).__init__(bn_impl, + num_features, + eps, + momentum, + affine, + gamma_init, + beta_init, + moving_mean_init, + moving_var_init, + use_batch_statistics) + + def _check_input_dim(self, shape: tuple, dtype: Any): + dim = len(shape) + if dtype in [mindspore.float16, mindspore.float32]: + if dim not in (4, 3): + raise ValueError(f"For '{self.cls_name}', the in_shape must have 3-4 dims, but got {dim}.") + elif dtype == mindspore.complex64: + if dim not in (3, 2): + raise ValueError(f"For '{self.cls_name}', the in_shape must have 2-3 dims, but got {dim}.") + else: + raise TypeError(f"Only float16, float32 and complex64 data types are supported, but got {dtype}.") + return None + + +class BatchNorm2d(_BatchNorm): + r""" + The class of the abstract part of Batch Normalization layer over a second-order hypercomplex input + of five dimensions, including two spatial dimensions. + + This layer applies Batch Normalization over a hypercomplex input to reduce internal covariate shift. + Batch Normalization is widely used in convolutional networks. It rescales and recenters the feature + using a mini-batch of data and the learned parameters which can be described by the following formula: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + y = \text{linear}(\frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}, \gamma) + \beta, + \end{align} + + where :math:`inp` is the hypercomplex input tensors, :math:`\text{linear}` is the linear transformation operation, + which depends on the type of the number system and provided by the implementor part of the batch normalization + layer, :math:`\mathrm{E}[inp]` is the arithmetic mean of the input tensor over the spatial and mini-batch + dimensions, :math:`\mathrm{Var}[inp]` is the statistical variance of the input tensor over the same dimensions, + :math:`\gamma` and :math:`\beta` are hypercomplex learnable parameters representing the scale and shift coefficients + respectively, and :math:`\delta` is a small positive constant, which is needed to avoid division by zero in case + statistical variance is close to zero. + + This is not a self-sufficient class. In order to construct a fully connected layer, one should instantiate this + class and an implementor class, which acts like a bridge pattern and determines the exact set of hypercomplex + numbers. That implies the rules of multiplication and therefore affects how a linear transformation works. + + Args: + bn_impl(BatchNormImpl): The implementor object of the batch normalization layer. Essentially, the concrete + class name of this argument defines the algebra that the batch normalization layer will operate on. + num_features (int): The number of features in the input space. + eps (float): A small positive threshold, which is needed to avoid division by zero. Default: :math:`10^{-5}` + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. + Default: 0.9. + affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + use_batch_statistics (bool): + + - If True, use the mean value and variance value of current batch data and track running mean + and running variance. + - If False, use the mean value and variance value of specified value, and not track statistical value. + - If None, the use_batch_statistics is automatically set to True or False according to the training + and evaluation mode. During training, the parameter is set to True, and during evaluation, the + parameter is set to False. Default: None. + data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. Default: 'NCHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, H, W)` if data_format is 'NCHW', or + :math:`(2, N, H, W, C)` if data_format is 'NHWC', with float16 or float32 data type. '2' denotes that the + input tensor belongs to the hypercomplex domain and has got a real and a hypercomplex parts. Or, + :math:`(N, C, H, W)` if data_format is 'NCHW', or :math:`(N, H, W, C)` if data_format is 'NHWC', with + complex64 data type. The `num_features` in `Args` has to be equal to :math:`C` in `Inputs`. + + Outputs: + Tensor, the normalized, scaled, offset tensor of the same data type and shape as :math:`inp`: + :math:`(2, N, C, H, W)` if data_format is 'NCHW', or :math:`(2, N, H, W, C)` if data_format is 'NHWC', with + float16 or float32 data type. Or, :math:`(N, C, H, W)` if data_format is 'NCHW', or :math:`(N, H, W, C)` if + data_format is 'NHWC', with complex64 data type. + + Raises: + TypeError: If `num_features` is not an int. + TypeError: If `eps` is not a float. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If `num_features` is less than 1. + ValueError: If `momentum` is not in range [0, 1]. + ValueError: If `data_format` is neither 'NHWC' not 'NCHW'. + ValueError: if 'inp' is not a Tensor of 5 dimensions with float16 or float32 data type, and not a Tensor of 4 + dimensions with complex64 data type. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def _check_input_dim(self, shape: tuple, dtype: Any): + dim = len(shape) + if dtype in [mindspore.float16, mindspore.float32]: + if dim != 5: + raise ValueError(f"For '{self.cls_name}', the in_shape must have 5 dims, but got {dim}.") + elif dtype == mindspore.complex64: + if dim != 4: + raise ValueError(f"For '{self.cls_name}', the in_shape must have 4 dims, but got {dim}.") + else: + raise TypeError(f"Only float16, float32 and complex64 data types are supported, but got {dtype}.") + return None + + +class BatchNorm3d(nn.Cell): + r""" + The class of the abstract part of Batch Normalization layer over a second-order hypercomplex input + of six dimensions, including three spatial dimensions. + + This layer applies Batch Normalization over a hypercomplex input to reduce internal covariate shift. + Batch Normalization is widely used in convolutional networks. It rescales and recenters the feature + using a mini-batch of data and the learned parameters which can be described by the following formula: + + .. math:: + \begin{align} + \mathrm{Var}[inp] = \mathrm{E}[\| inp_i - \mathrm{E}[inp] \|^2]\\ + y = \text{linear}(\frac{inp - \mathrm{E}[inp]}{\sqrt{\mathrm{Var}[inp] + \delta}}, \gamma) + \beta, + \end{align} + + where :math:`inp` is the hypercomplex input tensors, :math:`\text{linear}` is the linear transformation operation, + which depends on the type of the number system and provided by the implementor part of the batch normalization + layer, :math:`\mathrm{E}[inp]` is the arithmetic mean of the input tensor over the spatial and mini-batch + dimensions, :math:`\mathrm{Var}[inp]` is the statistical variance of the input tensor over the same dimensions, + :math:`\gamma` and :math:`\beta` are hypercomplex learnable parameters representing the scale and shift coefficients + respectively, and :math:`\delta` is a small positive constant, which is needed to avoid division by zero in case + statistical variance is close to zero. + + This is not a self-sufficient class. In order to construct a fully connected layer, one should instantiate this + class and an implementor class, which acts like a bridge pattern and determines the exact set of hypercomplex + numbers. That implies the rules of multiplication and therefore affects how a linear transformation works. + + Args: + bn_impl(BatchNormImpl): The implementor object of the batch normalization layer. Essentially, the concrete + class name of this argument defines the algebra that the batch normalization layer will operate on. + num_features (int): The number of features in the input space. + eps (float): A small positive threshold, which is needed to avoid division by zero. Default: :math:`10^{-5}` + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + momentum (float): A floating hyperparameter of the momentum for the running_mean and running_var computation. + Default: 0.9. + affine (bool): A bool value. When set to True, gamma and beta can be learned. Default: True. + gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'zeros'. + moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance. + The values of str refer to the function `initializer` including 'zeros', 'ones', etc. Default: 'ones'. + use_batch_statistics (bool): + + - If True, use the mean value and variance value of current batch data and track running mean + and running variance. + - If False, use the mean value and variance value of specified value, and not track statistical value. + - If None, the use_batch_statistics is automatically set to True or False according to the training + and evaluation mode. During training, the parameter is set to True, and during evaluation, the + parameter is set to False. Default: None. + data_format (str): The optional value for data format. Only 'NCDHW' format is supported as of now. + Default: 'NCDHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, D, H, W)`, with float16 or float32 data type, or + :math:`(N, C, D, H, W)`, with complex64 data type. In the former case '2' denotes that the input tensor + belongs to the hypercomplex domain and has got a real and a hypercomplex parts. The `num_features` in `Args` + has to be equal to :math:`C` in `Inputs`. + + Outputs: + Tensor, the normalized, scaled, offset tensor of the same data type and shape as :math:`inp`: + :math:`(2, N, C, D, H, W)`, with float16 and float32 data type, or :math:`(N, C, D, H, W)`, with + complex64 data type. + + Raises: + TypeError: If `num_features` is not an int. + TypeError: If `eps` is not a float. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If `num_features` is less than 1. + ValueError: If `momentum` is not in range [0, 1]. + ValueError: If `data_format` is not 'NCDHW'. + ValueError: if 'inp' is not a Tensor of 6 dimensions. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + bn_impl: Type[TBatchNormImpl], + num_features: int, + eps: float = 1e-5, + momentum: float = 0.9, + affine: bool = True, + gamma_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + beta_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_mean_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + moving_var_init: Union[Tensor, str, Initializer, numbers.Number] = 'ones', + use_batch_statistics: bool = None, + data_format: str = 'NCDHW') -> None: + """Initialize _BatchNorm.""" + super(BatchNorm3d, self).__init__() + self.format = validator.check_string(data_format, ['NCDHW'], 'format', self.cls_name) + self.reshape = P.Reshape() + self.bn2d = BatchNorm2d(bn_impl=bn_impl, + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + gamma_init=gamma_init, + beta_init=beta_init, + moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, + use_batch_statistics=use_batch_statistics, + data_format="NCHW") + + def construct(self, u: Tensor) -> Tensor: + '''construct''' + u_shape = F.shape(u) + self._check_3d_shape(u_shape, F.dtype(u)) + reshape = list(u_shape) + reshape[-3] *= reshape[-2] + reshape = tuple(int(i) for i in reshape[:-2] + reshape[-1:]) + u = self.reshape(u, tuple(reshape)) + out = self.bn2d(u) + out = self.reshape(out, u_shape) + return out + + def _check_3d_shape(self, input_shape, dtype: Any) -> None: + '''_check_3d_shape''' + dim = len(input_shape) + if dtype in [mindspore.float16, mindspore.float32]: + if dim != 6: + msg_prefix = f"For '{self.cls_name}', the" if self.cls_name else "The" + raise ValueError(f"{msg_prefix} input_shape must be 6-dimensional, but got the length of input_shape: " + f"{len(dim)}.") + elif dtype == mindspore.complex64: + if dim != 5: + msg_prefix = f"For '{self.cls_name}', the" if self.cls_name else "The" + raise ValueError(f"{msg_prefix} input_shape must be 5-dimensional, but got the length of input_shape: " + f"{len(dim)}.") + else: + raise TypeError(f"Only float16, float32 and complex64 data types are supported, but got {dtype}.") + return None diff --git a/mindspore/python/mindspore/hypercomplex/hypercomplex/hc_conv.py b/mindspore/python/mindspore/hypercomplex/hypercomplex/hc_conv.py new file mode 100644 index 00000000000..2d726255288 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/hypercomplex/hc_conv.py @@ -0,0 +1,1055 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Hypercomplex Convolution""" +import numbers +from typing import Type, TypeVar, Tuple, Union +from abc import abstractmethod + +import numpy as np +from mindspore._checkparam import Validator, Rel, twice, _check_3d_int_or_tuple +from mindspore import context +from mindspore import log as logger +from mindspore.common.initializer import initializer, Initializer +from mindspore.common.parameter import Parameter +from mindspore.common.tensor import Tensor +from mindspore.nn.cell import Cell +from mindspore.ops import operations as P +from mindspore.hypercomplex.hypercomplex._hc_conv_impl import _ConvImpl as ConvImpl +from mindspore.hypercomplex.utils import get_x_and_y, to_2channel, \ + _size_1_t, _size_2_t, _size_3_t + + +TConvImpl = TypeVar('TConvImpl', bound=ConvImpl) + + +class _ConvNd(Cell): + r""" + The base class of the abstraction part of Convolution layer of the second-order hypercomplex input. + + Calculates the convolution on the input tensor which is typically of shape :math:`(2, N, C_{in}, *, ..., *)`, + where :math:`N` is batch size, :math:`C_{in}` is a number of input channels, and the count of spatial + dimensions denoted by '*' is defined by the specific subclass. The formula is defined as: + + .. math:: + + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{inp}(N_i, k)}) + + where :math:`ccor` is the `cross-correlation `_, the exact + implementation of which is defined and provided by the implementor part of the convolution layer, + :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of + the output and :math:`j` is in the range of :math:`[0, C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)` + is a convolution kernel slice with shape :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`, + where :math:`\text{kernel_size[0]}` and :math:`\text{kernel_size[1]}` are the height and width of the convolution + kernel respectively. :math:`\text{bias}` is the bias parameter and :math:`\text{inp}` is the input tensor. + In this case, `data_format` of the input tensor is 'NCHW' and the shape of full convolution kernel is + :math:`(2, C_{out}, C_{in} / \text{group}, *, ..., *)`, where `group` is the number of groups to split + the input `inp` in the channel dimension, and the '*' symbols denote the corresponding kernel dimensions. + If `data_format` of the input tensor is 'NHWC', the shape of full convolution kernel will be + :math:`(2, C_{out}, *, ..., *, C_{in} / \text{group}`. + + For more details, please refers to the paper `Gradient Based Learning Applied to Document + Recognition `_. + + Note: + On Ascend platform, only group convolution in depthwise convolution scenarios is supported. + That is, when `group > 1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. + + This is not a self-sufficient class. In order to construct a convolution layer, one should instantiate this + class and an implementor class, which acts like a bridge pattern and determine the exact set of hypercomplex + numbers. That implies the rules of multiplication and therefore affects how a convolution works. + + 'NCHW' format is supported only with GPU target device as of now. + + Args: + conv_impl (TConvImpl): The implementor object of the convolution layer. Essentially, the concrete class name + of this argument defines the algebra that the convolution layer will operate on. + in_channels (int): The channel number of the input tensor of the convolution layer. + out_channels (int): The channel number of the output tensor of the convolution layer. + kernel_size (Union[int, tuple[int]]): Specifies the spatial dimensions of the convolution kernel. + The data type is an integer or a tuple of integers. An integer represents the size of all the + spatial dimensions of the convolution kernel at once. A tuple of integers represents the spatial + dimensions of the convolution kernel individually. + stride (Union[int, tuple[int]]): The movement stride of the convolution kernel. + The data type is an integer or a tuple of integers. An integer represents the movement step size + in all directions at once. A tuple of integers represents the movement step size in every direction + individually. + pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (Union[int, tuple[int]]): The number of padding on the spatial dimensions of the input. + The data type is an integer or a tuple of integers, which then must be twice as long as the number + of spatial dimensions. If `padding` is an integer, then all the leading and trailing paddings in + all dimensions are equal to `padding`. The value should be greater than or equal to 0. + If `padding` is a tuple of integers, then the paddings are enumerated pair-wise from the first to + the last spatial dimension, the first element of the pair being equal to the leading padding, + and the second element of the pair being equal to the trailing padding of the corresponding + spatial dimension. + dilation (Union[int, tuple[int]]): Dilation size of convolution kernel. + The data type is an integer or a tuple of integers. If :math:`k > 1`, the kernel is sampled + every `k` elements. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. If the group is equal to `in_channels` and `out_channels`, + this convolution layer also can be called depthwise convolution layer. + has_bias (bool): Whether the convolution layer has a bias parameter. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. + data_format (str): The optional value for data format, is 'NHWC' or 'NCHW' or 'NCDHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, *, ..., *)` or :math:`(2, N, *, ..., *, C_{in})`, + with float16 or float32 data type, or :math:`(N, C_{in}, *, ..., *)` or :math:`(N, *, ..., *, C_{in})` + with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C_{out}, *, ..., *)` or + :math:`(2, N, *, ..., *, C_{out})`, with float16 or float32 data type, or :math:`(N, C_{out}, *, ..., *)` or + :math:`(N, *, ..., *, C_{out})`, with complex64 data type. + + Raises: + TypeError: If `in_channels`, `out_channels` or `group` is not an int. + TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int not a tuple. + TypeError: If any two of `inp`, `weight_init` and `bias_init` are Tensors of different data type. + ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + ValueError: If `padding` is a tuple whose length is not twice as big as the count of spatial dimensions. + ValueError: If `pad_mode` is not equal to 'pad' and `padding` is a tuple which contains non-zero elements. + ValueError: If `data_format` is neither 'NCHW', 'NHWC', nor 'NCDHW', or it is 'NCHW' and the target + device is not GPU. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + conv_impl: Type[TConvImpl], + in_channels: int, + out_channels: int, + kernel_size: Tuple[int, ...], + stride: Tuple[int, ...], + pad_mode: str, + padding: Tuple[int, ...], + dilation: Tuple[int, ...], + group: int, + has_bias: bool, + weight_init: Union[Tensor, str, Initializer, numbers.Number], + bias_init: Union[Tensor, str, Initializer, numbers.Number], + data_format: str = 'NCHW', + transposed: bool = False) -> None: + """Initialize _ConvNd.""" + super(_ConvNd, self).__init__() + + self.in_channels = Validator.check_positive_int(in_channels, 'in_channels', self.cls_name) + self.out_channels = Validator.check_positive_int(out_channels, 'out_channels', self.cls_name) + self.kernel_size = kernel_size + self.stride = stride + self.pad_mode = pad_mode + self.weight_init = weight_init + self.bias_init = bias_init + self.data_format = Validator.check_string(data_format, + ['NCHW', 'NHWC', 'NCDHW'], + 'format', + self.cls_name) + if context.get_context("device_target") != "GPU" and self.data_format == "NHWC": + raise ValueError("NHWC format only support in GPU target.") + if isinstance(padding, int): + Validator.check_non_negative_int(padding, 'padding', self.cls_name) + self.padding = padding + elif isinstance(padding, tuple): + for pad in padding: + Validator.check_non_negative_int(pad, 'padding item', self.cls_name) + self.padding = padding + else: + raise TypeError("padding type must be int/tuple(int) cannot be {}!".format(type(padding))) + + self.dilation = dilation + self.group = Validator.check_positive_int(group) + self.has_bias = has_bias + for kernel_size_elem in kernel_size: + Validator.check_positive_int(kernel_size_elem, 'kernel_size item', self.cls_name) + for stride_elem in stride: + Validator.check_positive_int(stride_elem, 'stride item', self.cls_name) + for dilation_elem in dilation: + Validator.check_positive_int(dilation_elem, 'dilation item', self.cls_name) + if in_channels % group != 0: + raise ValueError(f"Attr 'in_channels' of {self.cls_name} Op must be divisible by " + f"attr 'group' of {self.cls_name} Op.") + if out_channels % group != 0: + raise ValueError(f"Attr 'out_channels' {self.cls_name} Op must be divisible by " + f"attr 'group' of {self.cls_name} Op.") + if transposed: + shape = [in_channels, out_channels // group, *kernel_size] + else: + shape = [out_channels, *kernel_size, in_channels // group] if self.data_format == "NHWC" else \ + [out_channels, in_channels // group, *kernel_size] + self.dtype = self.weight_init.dtype if isinstance(self.weight_init, Tensor) else None + + # Weight initialization + self.conv_impl = conv_impl(self.weight_init, shape, data_format=data_format) + + # Bias initialization + if Validator.check_bool(has_bias, "has_bias", self.cls_name): + if isinstance(bias_init, Tensor): + if self.dtype is None: + self.dtype = bias_init.dtype + elif self.dtype != bias_init.dtype: + raise TypeError("Data type of the weight_init tensor and the bias init tensor must be equal, " + f"but got weight_init.dtype={self.dtype} and bias_init.dtype={bias_init.dtype}") + bias_init_x, bias_init_y = get_x_and_y(bias_init) + else: + bias_init_x = bias_init_y = bias_init + self.bias_x = Parameter(initializer(bias_init_x, [out_channels]), name='bias_x') + self.bias_y = Parameter(initializer(bias_init_y, [out_channels]), name='bias_y') + self.bias_add = P.BiasAdd() + else: + if self.bias_init != 'zeros': + logger.warning("Value of 'has_bias' is False, value of 'bias_init' will be ignored.") + self.bias_x = None + self.bias_y = None + self.bias_add = None + + def construct(self, u: Tensor) -> Tensor: + if self.dtype is not None and self.dtype != u.dtype: + raise TypeError("dtype must be equal to the data type of the inputs tensor, but got: " + f"dtype={self.dtype} and inputs.dtype={u.dtype}") + x, y = get_x_and_y(u) + out_x, out_y = self._construct(x, y) + out = to_2channel(out_x, out_y, u.dtype) + return out + + def extend_repr(self): + """extend representation""" + s = 'input_channels={}, output_channels={}, kernel_size={}, ' \ + 'stride={}, pad_mode={}, padding={}, dilation={}, ' \ + 'group={}, has_bias={}, ' \ + 'weight_init={}, bias_init={}, format={}'.format( + self.in_channels, + self.out_channels, + self.kernel_size, + self.stride, + self.pad_mode, + self.padding, + self.dilation, + self.group, + self.has_bias, + self.weight_init, + self.bias_init, + self.data_format) + return s + + @abstractmethod + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + pass + + def _check_input_5dims(self, input_shape: tuple) -> None: + if len(input_shape) != 5: + raise ValueError(f"For {self.cls_name}, input should be 5 dims, but got shape {input_shape}.") + + +class Conv2d(_ConvNd): + r""" + 2D convolution layer on the second-order hypercomplex input. + + Calculates the 2D convolution on the input tensor which is typically of shape + :math:`(2, N, C_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C_{in}` is a number of channels, + :math:`H_{in}, W_{in}` are the height and width of the feature layer respectively. The formula is defined as: + + .. math:: + + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{inp}(N_i, k)}) + + where :math:`ccor` is the `cross-correlation `_, the exact + implementation of which is defined and provided by the implementor part of the convolution layer, + :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of + the output and :math:`j` is in the range of :math:`[0, C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)` + is a convolution kernel slice with shape :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`, + where :math:`\text{kernel_size[0]}` and :math:`\text{kernel_size[1]}` are the height and width of the convolution + kernel respectively. :math:`\text{bias}` is the bias parameter and :math:`\text{inp}` is the input tensor. + In this case, `data_format` of the input tensor is 'NCHW' and the shape of full convolution kernel is + :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`, + where `group` is the number of groups to split the input `inp` in the channel dimension. If `data_format` of the + input tensor is 'NHWC', the shape of full convolution kernel will be + :math:`(C_{out}, \text{kernel_size[0]}, \text{kernel_size[1]}), C_{in} / \text{group}`. + + For more details, please refers to the paper `Gradient Based Learning Applied to Document + Recognition `_. + + Note: + On Ascend platform, only group convolution in depthwise convolution scenarios is supported. + That is, when `group > 1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. + + 'NCHW' format is supported only with GPU target device as of now. + + Args: + conv_impl (TConvImpl): The implementor object of the convolution layer. Essentially, the concrete class name + of this argument defines the algebra that the convolution layer will operate on. + in_channels (int): The channel number of the input tensor of the Conv2d layer. + out_channels (int): The channel number of the output tensor of the Conv2d layer. + kernel_size (Union[int, tuple[int]]): Specifies the height and width of the 2D convolution kernel. + The data type is an integer or a tuple of two integers. An integer represents the height + and width of the convolution kernel. A tuple of two integers represents the height + and width of the convolution kernel respectively. + stride (Union[int, tuple[int]]): The movement stride of the 2D convolution kernel. + The data type is an integer or a tuple of two integers. An integer represents the movement step size + in both height and width directions. A tuple of two integers represents the movement step size in the + height and width directions respectively. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (Union[int, tuple[int]]): The number of padding on the height and width directions of the input. + The data type is an integer or a tuple of four integers. If `padding` is an integer, + then the top, bottom, left, and right padding are all equal to `padding`. + If `padding` is a tuple of 4 integers, then the top, bottom, left, and right padding + is equal to `padding[0]`, `padding[1]`, `padding[2]`, and `padding[3]` respectively. + The value should be greater than or equal to 0. Default: 0. + dilation (Union[int, tuple[int]]): Dilation size of 2D convolution kernel. + The data type is an integer or a tuple of two integers. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` on the height and width directions is in range of [1, H] + and [1, W] respectively. Default: 1. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. If the group is equal to `in_channels` and `out_channels`, + this 2D convolution layer also can be called 2D depthwise convolution layer. Default: 1. + has_bias (bool): Whether the Conv2d layer has a bias parameter. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. + Default: 'NCHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, H_{in}, W_{in})` or + :math:`(2, N, H_{in}, W_{in}, C_{in})`, with float16 or float32 data type, or + :math:`(N, C_{in}, H_{in}, W_{in})` or :math:`(N, H_{in}, W_{in}, C_{in})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C_{out}, H_{out}, W_{out})` or + :math:`(2, N, H_{out}, W_{out}, C_{out})`, with float16 or float32 data type, or + :math:`(N, C_{out}, H_{out}, W_{out})` or :math:`(N, H_{out}, W_{out}, C_{out})`, with complex64 data type. + + pad_mode is 'same': + + .. math:: + \begin{array}{ll} \\ + H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[0]}}} \right \rceil \\ + W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[1]}}} \right \rceil \\ + \end{array} + + pad_mode is 'valid': + + .. math:: + \begin{array}{ll} \\ + H_{out} = \left \lceil{\frac{H_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) } + {\text{stride[0]}}} \right \rceil \\ + W_{out} = \left \lceil{\frac{W_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) } + {\text{stride[1]}}} \right \rceil \\ + \end{array} + + pad_mode is 'pad': + + .. math:: + \begin{array}{ll} \\ + H_{out} = \left \lfloor{\frac{H_{in} + padding[0] + padding[1] - (\text{kernel_size[0]} - 1) \times + \text{dilation[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} + padding[2] + padding[3] - (\text{kernel_size[1]} - 1) \times + \text{dilation[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\ + \end{array} + + Raises: + TypeError: If `in_channels`, `out_channels` or `group` is not an int. + TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int not a tuple. + TypeError: If any two of `inp`, `weight_init` and `bias_init` are Tensors of different data type. + ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + ValueError: If `padding` is a tuple whose length is not equal to 4. + ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0). + ValueError: If `data_format` is neither 'NCHW' not 'NHWC'. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + conv_impl: Type[TConvImpl], + in_channels: int, + out_channels: int, + kernel_size: _size_2_t, + stride: _size_2_t = 1, + pad_mode: str = 'same', + padding: _size_2_t = 0, + dilation: _size_2_t = 1, + group: int = 1, + has_bias: bool = False, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + data_format: str = 'NCHW') -> None: + """Initialize Conv2d.""" + kernel_size = twice(kernel_size) + stride = twice(stride) + self._dilation = dilation + dilation = twice(dilation) + super(Conv2d, self).__init__(conv_impl, + in_channels, + out_channels, + kernel_size, + stride, + pad_mode, + padding, + dilation, + group, + has_bias, + weight_init, + bias_init, + data_format) + self.conv2d = P.Conv2D(out_channel=self.out_channels, + kernel_size=self.kernel_size, + mode=1, + pad_mode=self.pad_mode, + pad=self.padding, + stride=self.stride, + dilation=self.dilation, + group=self.group, + data_format=self.data_format) + self.bias_add = P.BiasAdd(data_format=self.data_format) + + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + out_x, out_y = self.conv_impl(self.conv2d, x, y) + if self.has_bias: + out_x = self.bias_add(out_x, self.bias_x) + out_y = self.bias_add(out_y, self.bias_y) + return out_x, out_y + + +class Conv1d(_ConvNd): + r""" + 1D convolution layer on the second-order hypercomplex input. + + Calculates the 1D convolution on the input tensor which is typically of shape :math:`(2, N, C_{in}, L_{in})`, + where :math:`N` is batch size, :math:`C_{in}` is a number of channels and :math:`L_{in}` is a length of sequence. + The formula is defined as: + + .. math:: + + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{inp}(N_i, k)}) + + where :math:`ccor` is the `cross-correlation `_, the exact + implementation of which is defined and provided by the implementor part of the convolution layer, + :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of + the output and :math:`j` is in the range of :math:`[0, C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)` + is a convolution kernel slice with shape :math:`\text{kernel_size}`, where :math:`\text{kernel_size}` + is the width of the convolution kernel. :math:`\text{bias}` is the bias parameter, + and :math:`\text{inp}` is the input tensor. The shape of full convolution kernel is + :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size})`, + where `group` is the number of groups to split the input `inp` in the channel dimension. + + For more details, please refers to the paper `Gradient Based Learning Applied to Document + Recognition `_. + + Note: + On Ascend platform, only group convolution in depthwise convolution scenarios is supported. + That is, when `group > 1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. + + Args: + conv_impl (TConvImpl): The implementor object of the convolution layer. Essentially, the concrete class name + of this argument defines the algebra that the convolution layer will operate on. + in_channels (int): The channel number of the input tensor of the Conv1d layer. + out_channels (int): The channel number of the output tensor of the Conv1d layer. + kernel_size (int): Specifies the width of the 1D convolution kernel. + stride (int): The movement stride of the 1D convolution kernel. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (int): The number of padding on both sides of input. + The value should be greater than or equal to 0. Default: 0. + dilation (int): Dilation size of 1D convolution kernel. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` is in range of [1, L]. Default: 1. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. Default: 1. + has_bias (bool): Whether the Conv1d layer has a bias parameter. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, L_{in})`, with float16 or float32 data type, + or :math:`(N, C_{in}, L_{in})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C_{out}, L_{out})`, with float16 or float32 + data type, or :math:`(N, C_{out}, L_{out})`, with complex64 data type. + + pad_mode is 'same': + + .. math:: + L_{out} = \left \lceil{\frac{L_{in}}{\text{stride}}} \right \rceil + + pad_mode is 'valid': + + .. math:: + L_{out} = \left \lceil{\frac{L_{in} - \text{dilation} \times (\text{kernel_size} - 1) } + {\text{stride}}} \right \rceil + + pad_mode is 'pad': + + .. math:: + L_{out} = \left \lfloor{\frac{L_{in} + 2 \times padding - (\text{kernel_size} - 1) \times + \text{dilation} - 1 }{\text{stride}} + 1} \right \rfloor + + Raises: + TypeError: If `in_channels`, `out_channels`, `kernel_size`, `stride`, `padding` or `dilation` is not an int. + TypeError: If any two of `inp`, `weight_init` and `bias_init` are Tensors of different data type. + ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + conv_impl: Type[TConvImpl], + in_channels: int, + out_channels: int, + kernel_size: _size_1_t, + stride: _size_1_t = 1, + pad_mode: str = 'same', + padding: _size_1_t = 0, + dilation: _size_1_t = 1, + group: int = 1, + has_bias: bool = False, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros') -> None: + """Initialize Conv1d.""" + Validator.check_value_type("kernel_size", kernel_size, [int], self.cls_name) + Validator.check_value_type("stride", stride, [int], self.cls_name) + Validator.check_value_type("padding", padding, [int], self.cls_name) + Validator.check_value_type("dilation", dilation, [int], self.cls_name) + Validator.check_int(kernel_size, 1, Rel.GE, 'kernel_size', self.cls_name) + Validator.check_int(stride, 1, Rel.GE, 'stride', self.cls_name) + Validator.check_non_negative_int(padding, 'padding', self.cls_name) + Validator.check_int(dilation, 1, Rel.GE, 'dilation', self.cls_name) + kernel_size = (1, kernel_size) + stride = (1, stride) + dilation = (1, dilation) + get_shape = P.Shape() + get_dtype = P.DType() + if isinstance(weight_init, Tensor): + weight_init_shape = get_shape(weight_init) + Validator.check_equal_int(len(weight_init_shape), 3, 'weight_init_shape', self.cls_name) + weight_init_dtype = get_dtype(weight_init) + weight_init_value = weight_init.asnumpy() + weight_init_value = np.expand_dims(weight_init_value, 2) + weight_init = Tensor(weight_init_value, weight_init_dtype) + + super(Conv1d, self).__init__(conv_impl, + in_channels, + out_channels, + kernel_size, + stride, + pad_mode, + padding, + dilation, + group, + has_bias, + weight_init, + bias_init) + self.padding = (0, 0, padding, padding) + Validator.check_string(pad_mode, ['valid', 'same', 'pad'], 'pad_mode', self.cls_name) + self.conv2d = P.Conv2D(out_channel=self.out_channels, + kernel_size=self.kernel_size, + mode=1, + pad_mode=self.pad_mode, + pad=self.padding, + stride=self.stride, + dilation=self.dilation, + group=self.group) + self.bias_add = P.BiasAdd() + if pad_mode not in ('valid', 'same', 'pad'): + raise ValueError('Attr \'pad_mode\' of \'Conv1d\' Op passed ' + + str(pad_mode) + ', should be one of values in \'valid\', \'same\', \'pad\'.') + self.expand_dims = P.ExpandDims() + self.squeeze = P.Squeeze(2) + self.shape = P.Shape() + + def _check_input_3d(self, input_shape: tuple): + if len(input_shape) != 3: + raise ValueError(f"For '{self.cls_name}', the dimension of input must be 3d, but got {len(input_shape)}.") + return None + + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + x_shape = self.shape(x) + self._check_input_3d(x_shape) + x = self.expand_dims(x, 2) + y = self.expand_dims(y, 2) + out_x, out_y = self.conv_impl(self.conv2d, x, y) + if self.has_bias: + out_x = self.bias_add(out_x, self.bias_x) + out_y = self.bias_add(out_y, self.bias_y) + out_x = self.squeeze(out_x) + out_y = self.squeeze(out_y) + return out_x, out_y + + +class Conv3d(_ConvNd): + r""" + 3D convolution layer on the second-order hypercomplex input. + + Calculates the 3D convolution on the input tensor which is typically of shape + :math:`(2, N, C_{in}, D_{in}, H_{in}, W_{in})`, + where :math:`N` is batch size, :math:`C_{in}` is a number of channels, + :math:`D_{in}, H_{in}, W_{in}` are the depth, height and width of the feature layer respectively. + The formula is defined as: + + .. math:: + + \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{inp}(N_i, k)}) + + where :math:`ccor` is the `cross-correlation `_, the exact + implementation of which is defined and provided by the implementor part of the convolution layer, + :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of + the output and :math:`j` is in the range of :math:`[0,C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)` + is a convolution kernel slice with shape + :math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`, + where :math:`\text{kernel_size[0]}`, :math:`\text{kernel_size[1]}` and :math:`\text{kernel_size[2]}` are + the depth, height and width of the convolution kernel respectively. :math:`\text{bias}` is the bias parameter + and :math:`\text{inp}` is the input tensor. + The shape of full convolution kernel is + :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`, + where `group` is the number of groups to split the input `x` in the channel dimension. + + For more details, please refers to the paper `Gradient Based Learning Applied to Document + Recognition `_. + + Note: + On Ascend platform, only group convolution in depthwise convolution scenarios is supported. + That is, when `group>1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied. + + Args: + conv_impl (TConvImpl): The implementor object of the convolution layer. Essentially, the concrete class name + of this argument defines the algebra that the convolution layer will operate on. + in_channels (int): The channel number of the input tensor of the Conv3d layer. + out_channels (int): The channel number of the output tensor of the Conv3d layer. + kernel_size (Union[int, tuple[int]]): Specifies the depth, height and width of the 3D convolution kernel. + The data type is an integer or a tuple of three integers. An integer represents the depth, height + and width of the convolution kernel. A tuple of three integers represents the depth, height + and width of the convolution kernel respectively. + stride (Union[int, tuple[int]]): The movement stride of the 3D convolution kernel. + The data type is an integer or a tuple of three integers. An integer represents the movement step size + in depth, height and width directions. A tuple of three integers represents the movement step size + in the depth, height and width directions respectively. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (Union(int, tuple[int])): The number of padding on the depth, height and width directions of the input. + The data type is an integer or a tuple of six integers. If `padding` is an integer, + then the head, tail, top, bottom, left, and right padding are all equal to `padding`. + If `padding` is a tuple of six integers, then the head, tail, top, bottom, left, and right padding + is equal to `padding[0]`, `padding[1]`, `padding[2]`, `padding[3]`, `padding[4]` and `padding[5]` + respectively. The value should be greater than or equal to 0. Default: 0. + dilation (Union[int, tuple[int]]): Dilation size of 3D convolution kernel. + The data type is an integer or a tuple of three integers. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` on the depth, height and width directions is in range of + [1, D], [1, H] and [1, W] respectively. Default: 1. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. Default: 1. Only 1 is currently supported. + has_bias (bool): Whether the Conv3d layer has a bias parameter. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + data_format (str): The optional value for data format. Currently only support "NCDHW". + + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, D_{in}, H_{in}, W_{in})`, with float16 or float32 + data type, or :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C_{out}, D_{out}, H_{out}, W_{out})`, with + float16 or float32 data type, or :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`, with complex64 data type. + + pad_mode is 'same': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lceil{\frac{D_{in}}{\text{stride[0]}}} \right \rceil \\ + H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[1]}}} \right \rceil \\ + W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\ + \end{array} + + + pad_mode is 'valid': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lfloor{\frac{D_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) } + {\text{stride[0]}} + 1} \right \rfloor \\ + H_{out} = \left \lfloor{\frac{H_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) } + {\text{stride[1]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} - \text{dilation[2]} \times (\text{kernel_size[2]} - 1) } + {\text{stride[2]}} + 1} \right \rfloor \\ + \end{array} + + pad_mode is 'pad': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lfloor{\frac{D_{in} + padding[0] + padding[1] - (\text{dilation[0]} - 1) \times + \text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\ + H_{out} = \left \lfloor{\frac{H_{in} + padding[2] + padding[3] - (\text{dilation[1]} - 1) \times + \text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} + padding[4] + padding[5] - (\text{dilation[2]} - 1) \times + \text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\ + \end{array} + + Raises: + TypeError: If `in_channels`, `out_channels` or `group` is not an int. + TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int nor a tuple. + TypeError: If any two of `inp`, `weight_init` and `bias_init` are Tensors of different data type. + ValueError: If `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + ValueError: If `padding` is a tuple whose length is not equal to 6. + ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0, 0, 0). + ValueError: If `data_format` is not 'NCDHW'. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + conv_impl: Type[TConvImpl], + in_channels: int, + out_channels: int, + kernel_size: _size_3_t, + stride: _size_3_t = (1, 1, 1), + pad_mode: str = 'same', + padding: _size_3_t = 0, + dilation: _size_3_t = (1, 1, 1), + group: int = 1, + has_bias: bool = False, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + data_format: str = 'NCDHW') -> None: + """Initialize Conv3d.""" + self.conv_impl = conv_impl + kernel_size = _check_3d_int_or_tuple("kernel_size", kernel_size, self.cls_name) + stride = _check_3d_int_or_tuple("stride", stride, self.cls_name) + dilation = _check_3d_int_or_tuple("dilation", dilation, self.cls_name) + Validator.check_value_type('padding', padding, (int, tuple), self.cls_name) + if isinstance(padding, tuple): + Validator.check_equal_int(len(padding), 6, 'padding size', self.cls_name) + super(Conv3d, self).__init__(conv_impl, + in_channels, + out_channels, + kernel_size, + stride, + pad_mode, + padding, + dilation, + group, + has_bias, + weight_init, + bias_init, + data_format) + self.conv3d = P.Conv3D(out_channel=self.out_channels, + kernel_size=self.kernel_size, + mode=1, + pad_mode=self.pad_mode, + pad=self.padding, + stride=self.stride, + dilation=self.dilation, + group=self.group, + data_format=self.data_format) + self.bias_add = P.BiasAdd(data_format=self.data_format) + self.shape = P.Shape() + + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + x_shape = self.shape(x) + self._check_input_5dims(x_shape) + out_x, out_y = self.conv_impl(self.conv3d, x, y) + if self.has_bias: + out_x = self.bias_add(out_x, self.bias_x) + out_y = self.bias_add(out_y, self.bias_y) + return out_x, out_y + + +class Conv3dTranspose(_ConvNd): + r""" + 3D transposed convolution layer on the second-order hypercomplex input. + + Calculates a 3D transposed convolution, which can be regarded as Conv3d for the gradient of the input. + It also called deconvolution (although it is not an actual deconvolution). + + The input is typically of shape :math:`(2, N, C, D_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, + :math:`C` is a number of channels, :math:`D_{in}, H_{in}, W_{in}` are the depth, height and width of + the feature layer respectively. + + When Conv3d and Conv3dTranspose are initialized with the same parameters, and `pad_mode` is set to 'pad', + :math:`dilation * (kernel\_size - 1) - padding` amount of zero will be padded to the depth, height and width + directions of the input, they are inverses of each other in regard to the input and output shapes in this case. + However, when `stride` > 1, Conv3d maps multiple input shapes to the same output shape. Deconvolutional network + can refer to `Deconvolutional Networks `_. + + Args: + conv_impl (TConvImpl): The implementor object of the convolution layer. Essentially, the concrete class name + of this argument defines the algebra that the convolution layer will operate on. + in_channels (int): The channel number of the input tensor of the Conv3dTranspose layer. + out_channels (int): The channel number of the output tensor of the Conv3dTranspose layer. + kernel_size (Union[int, tuple[int]]): Specifies the depth, height and width of the 3D convolution kernel. + The data type is an integer or a tuple of three integers. An integer represents the depth, height + and width of the convolution kernel. A tuple of three integers represents the depth, height + and width of the convolution kernel respectively. + stride (Union[int, tuple[int]]): The movement stride of the 3D convolution kernel. + The data type is an integer or a tuple of three integers. An integer represents the movement step size + in depth, height and width directions. A tuple of three integers represents the movement step size + in the depth, height and width directions respectively. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: The width of the output is the same as the value of the input divided by `stride`. + If this mode is set, the value of `padding` must be 0. + + - valid: Returns a valid calculated output without padding. Excess pixels that do not satisfy the + calculation will be discarded. If this mode is set, the value of `padding` must be 0. + + - pad: Pads the input. Padding `padding` size of zero on both sides of the input. + If this mode is set, the value of `padding` must be greater than or equal to 0. + + padding (Union(int, tuple[int])): The number of padding on the depth, height and width directions of the input. + The data type is an integer or a tuple of six integers. If `padding` is an integer, + then the head, tail, top, bottom, left, and right padding are all equal to `padding`. + If `padding` is a tuple of six integers, then the head, tail, top, bottom, left, and right padding + is equal to `padding[0]`, `padding[1]`, `padding[2]`, `padding[3]`, `padding[4]` and `padding[5]` + respectively. The value should be greater than or equal to 0. Default: 0. + dilation (Union[int, tuple[int]]): Dilation size of 3D convolution kernel. + The data type is an integer or a tuple of three integers. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` on the depth, height and width directions is in range of + [1, D], [1, H] and [1, W] respectively. Default: 1. + group (int): Splits filter into groups, `in_channels` and `out_channels` must be + divisible by `group`. Default: 1. Only 1 is currently supported. + output_padding (Union(int, tuple[int])): The number of padding on the depth, height and width directions of + the output. The data type is an integer or a tuple of six integers. If `output_padding` is an integer, + then the head, tail, top, bottom, left, and right padding are all equal to `output_padding`. + If `output_padding` is a tuple of six integers, then the head, tail, top, bottom, left, and right padding + is equal to `output_padding[0]`, `output_padding[1]`, `output_padding[2]`, `output_padding[3]`, + `output_padding[4]` and `output_padding[5]` respectively. The value should be greater than or equal to 0. + Default: 0. + has_bias (bool): Whether the Conv3dTranspose layer has a bias parameter. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of weight parameter. + It can be a Tensor, a string, an Initializer or a numbers.Number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initialization method of bias parameter. + Available initialization methods are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + data_format (str): The optional value for data format. Currently only support 'NCDHW'. + + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C_{in}, D_{in}, H_{in}, W_{in})`, with float16 and float32 + data type, or :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C_{out}, D_{out}, H_{out}, W_{out})`, with + float16 or float32 data type, or :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`, with complex64 data type. + + pad_mode is 'same': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lfloor{\frac{D_{in}}{\text{stride[0]}} + 1} \right \rfloor \\ + H_{out} = \left \lfloor{\frac{H_{in}}{\text{stride[1]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in}}{\text{stride[2]}} + 1} \right \rfloor \\ + \end{array} + + + pad_mode is 'valid': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lfloor{\frac{D_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) } + {\text{stride[0]}} + 1} \right \rfloor \\ + H_{out} = \left \lfloor{\frac{H_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) } + {\text{stride[1]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} - \text{dilation[2]} \times (\text{kernel_size[2]} - 1) } + {\text{stride[2]}} + 1} \right \rfloor \\ + \end{array} + + pad_mode is 'pad': + + .. math:: + \begin{array}{ll} \\ + D_{out} = \left \lfloor{\frac{D_{in} + padding[0] + padding[1] - (\text{dilation[0]} - 1) \times + \text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\ + H_{out} = \left \lfloor{\frac{H_{in} + padding[2] + padding[3] - (\text{dilation[1]} - 1) \times + \text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\ + W_{out} = \left \lfloor{\frac{W_{in} + padding[4] + padding[5] - (\text{dilation[2]} - 1) \times + \text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\ + \end{array} + + Raises: + TypeError: If `in_channels`, `out_channels` or `group` is not an int. + TypeError: If `kernel_size`, `stride`, `padding` , `dilation` or `output_padding` + is neither an int not a tuple of three. + TypeError: If any two of `inp`, `weight_init` and `bias_init` are Tensors of different data type. + TypeError: If input data type is not float16 or float32. + ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + ValueError: If `padding` is a tuple whose length is not equal to 6. + ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0, 0, 0). + ValueError: If `data_format` is not 'NCDHW'. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + conv_impl: Type[TConvImpl], + in_channels: int, + out_channels: int, + kernel_size: _size_3_t, + stride: _size_3_t = (1, 1, 1), + pad_mode: str = 'same', + padding: _size_3_t = 0, + dilation: _size_3_t = (1, 1, 1), + group: int = 1, + output_padding: int = 0, + has_bias: bool = False, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + data_format: str = 'NCDHW') -> None: + """Initialize Conv3dTranspose.""" + self.conv_impl = conv_impl + kernel_size = _check_3d_int_or_tuple("kernel_size", kernel_size, self.cls_name) + stride = _check_3d_int_or_tuple("stride", stride, self.cls_name) + dilation = _check_3d_int_or_tuple("dilation", dilation, self.cls_name) + Validator.check_value_type('padding', padding, (int, tuple), self.cls_name) + if isinstance(padding, tuple): + Validator.check_equal_int(len(padding), 6, 'padding size', self.cls_name) + self.output_padding = _check_3d_int_or_tuple("output_padding", output_padding, self.cls_name, + greater_zero=False) + super(Conv3dTranspose, self).__init__(conv_impl, + in_channels, + out_channels, + kernel_size, + stride, + pad_mode, + padding, + dilation, + group, + has_bias, + weight_init, + bias_init, + data_format, + transposed=True) + self.conv3d_transpose = P.Conv3DTranspose(in_channel=self.in_channels, + out_channel=self.out_channels, + kernel_size=self.kernel_size, + mode=1, + pad_mode=self.pad_mode, + pad=self.padding, + stride=self.stride, + dilation=self.dilation, + group=self.group, + output_padding=self.output_padding, + data_format=self.data_format) + self.bias_add = P.BiasAdd(data_format=self.data_format) + self.shape = P.Shape() + + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + x_shape = self.shape(x) + self._check_input_5dims(x_shape) + out_x, out_y = self.conv_impl(self.conv3d_transpose, x, y) + if self.has_bias: + out_x = self.bias_add(out_x, self.bias_x) + out_y = self.bias_add(out_y, self.bias_y) + return out_x, out_y diff --git a/mindspore/python/mindspore/hypercomplex/hypercomplex/hc_dense.py b/mindspore/python/mindspore/hypercomplex/hypercomplex/hc_dense.py new file mode 100644 index 00000000000..09b6f015be7 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/hypercomplex/hc_dense.py @@ -0,0 +1,200 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Hypercomplex Dense""" +import numbers +from typing import TypeVar, Type, Union + +import mindspore +import mindspore.nn as nn +from mindspore._checkparam import Validator +from mindspore.common.initializer import initializer, Initializer +from mindspore.common.parameter import Parameter +from mindspore.common.tensor import Tensor +from mindspore.ops import operations as P +from mindspore.hypercomplex.hypercomplex._hc_dense_impl import _DenseImpl as DenseImpl +from mindspore.hypercomplex.utils import get_x_and_y, to_2channel + + +TDenseImpl = TypeVar('TDenseImpl', bound=DenseImpl) + + +class Dense(nn.Cell): + r""" + The abstract part of dense connected layer. + + Applies dense connected layer for the second-order hypercomplex input. This layer implements the operation as: + + .. math:: + \text{out} = \text{linear}(\text{inp}, \text{kernel}) + \text{bias}, + + where :math:`inp` is the hypercomplex input tensors, :math:`\text{linear}` is the linear transformation operation, + which is defined and provided by the implementor part of the dense connected layer, :math:`\text{kernel}` is + a hypercomplex weight matrix with the same data type as the :math:`inp` created by the layer, and + :math:`\text{bias}` is a hypercomplex bias vector with the same data type as the :math:`inp` created by the layer + (only if has_bias is True). + + This is not a self-sufficient class. In order to construct a fully connected layer, one should instantiate this + class and an implementor class, which acts like a strategy pattern and determine the exact set of hypercomplex + numbers. That implies the rules of multiplication and therefore affects how a linear transformation works. + + Args: + dense_impl(DenseImpl): The implementor object of the dense connected layer. Essentially, the concrete class + name of this argument defines the algebra that the dense layer will operate on. + in_channels (int): The number of channels in the input space. + out_channels (int): The number of channels in the output space. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype + is same as `inp`. The values of str refer to the function `initializer`. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is + same as `inp`. The values of str refer to the function `initializer`. Default: 'zeros'. + has_bias (bool): Specifies whether the layer uses a bias vector. Default: True. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, *, ..., *, in\_channels)`, with float16 or float32 data type, + or :math:`(*, ..., *, in\_channels)`, with complex64 data type. In the former case '2' denotes that the input + tensor belongs to the hypercomplex domain and has got a real and an imaginary parts. The `in_channels` in + `Args` has to be equal to :math:`in\_channels` in `Inputs`. The count of mediator dimensions denoted by '*' + is arbitrary but must be at least one. + + Outputs: + Tensor of the same data type as 'inp' and of shape :math:`(2, *, ..., *, out\_channels)`, with float16 or + float32 data type, or :math:`(*, ..., *, out\_channels)`, with complex64 data type. The count of mediator + dimensions is the same as one in 'inp'. + + Raises: + TypeError: If `in_channels` or `out_channels` is not an int. + TypeError: If `has_bias` is not a bool. + TypeError: If any two of `inp`, `weight_init` and `bias_init` are Tensors of different data type. + ValueError: If length of shape of `weight_init` is not equal to 3, + or shape[0] of 'weight_init' is not equal to 2, + or shape[1] of `weight_init` is not equal to `out_channels`, + or shape[2] of `weight_init` is not equal to `in_channels`. + ValueError: If length of shape of `bias_init` is not equal to 2, + or shape[0] of 'bias_init' is not equal to 2, + or shape[1] of `bias_init` is not equal to `out_channels`. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + dense_impl: Type[TDenseImpl], + in_channels: int, + out_channels: int, + weight_init: Union[Tensor, str, Initializer, numbers.Number] = 'normal', + bias_init: Union[Tensor, str, Initializer, numbers.Number] = 'zeros', + has_bias: bool = True) -> None: + """Initialize Dense.""" + super(Dense, self).__init__() + self.in_channels = Validator.check_positive_int(in_channels, "in_channels", self.cls_name) + self.out_channels = Validator.check_positive_int(out_channels, "out_channels", self.cls_name) + self.has_bias = Validator.check_bool(has_bias, "has_bias", self.cls_name) + self.dtype = None + self.reshape = P.Reshape() + self.shape_op = P.Shape() + + self.weight_x = None + self.weight_y = None + if isinstance(weight_init, Tensor): + self.dtype = weight_init.dtype + if self.dtype in [mindspore.float16, mindspore.float32] and ( \ + weight_init.ndim != 3 + or weight_init.shape[0] != 2 \ + or weight_init.shape[1] != out_channels \ + or weight_init.shape[2] != in_channels): + raise ValueError(f"For '{self.cls_name}', weight init shape error. The ndim of 'weight_init' must " + f"be equal to 3, and the first dim must be equal to 2, and the second dim must be " + f"equal to 'out_channels', and the third dim must be equal to 'in_channels'. But got " + f"'weight_init': {weight_init}, 'out_channels': {out_channels}, 'in_channels': " + f"{in_channels}.") + if self.dtype == mindspore.complex64 and ( \ + weight_init.ndim != 2 \ + or weight_init.shape[0] != out_channels \ + or weight_init.shape[1] != in_channels): + raise ValueError(f"For '{self.cls_name}', weight init shape error. The ndim of 'weight_init' must " + f"be equal to 2, and the first dim must be equal to 'out_channels', " + f"and the second dim must be equal to 'in_channels'. But got " + f"'weight_init': {weight_init}, 'out_channels': {out_channels}, 'in_channels': " + f"{in_channels}.") + + self.dense_impl = dense_impl(weight_init, [out_channels, in_channels]) + + self.bias_x = None + self.bias_y = None + if self.has_bias: + if isinstance(bias_init, Tensor): + if self.dtype is None: + self.dtype = bias_init.dtype + elif self.dtype != bias_init.dtype: + raise TypeError("Data type of weight init tensor and the bias init tensor must be equal, " + f"but got weight_init.dtype={self.dtype} and bias_init.dtype={bias_init.dtype}") + if self.dtype in [mindspore.float16, mindspore.float32] and ( \ + bias_init.ndim != 2 \ + or bias_init.shape[0] != 2 \ + or bias_init.shape[1] != out_channels): + raise ValueError(f"For '{self.cls_name}', bias init shape error. The ndim of 'bias_init' must " + f"be equal to 2, and the second dim must be equal to 'out_channels'. But got " + f"'bias_init': {bias_init}, 'out_channels': {out_channels}.") + if self.dtype == mindspore.complex64 and ( \ + bias_init.ndim != 1 \ + or bias_init.shape[0] != out_channels): + raise ValueError(f"For '{self.cls_name}', bias init shape error. The ndim of 'bias_init' must " + f"be equal to 1, and the only dim must be equal to 'out_channels'. But got " + f"'bias_init': {bias_init}, 'out_channels': {out_channels}.") + bias_init_x, bias_init_y = get_x_and_y(bias_init) + else: + bias_init_x = bias_init_y = bias_init + self.bias_x = Parameter(initializer(bias_init_x, [out_channels]), name="bias_x") + self.bias_y = Parameter(initializer(bias_init_y, [out_channels]), name="bias_y") + self.bias_add = P.BiasAdd() + + self.matmul = P.MatMul(transpose_b=True) + + def check_dense_input_shape(self, x: Tensor, x_dtype): + msg_prefix = f"For '{self.cls_name}', the" if self.cls_name else "The" + if x_dtype in [mindspore.float32, mindspore.float64] and (len(x) < 3 or x[0] != 2): + raise ValueError(f"{msg_prefix} dimension of 'x' should not be less than 3, and the first dimension " + f"should be 2, but got {x}.") + if x_dtype == mindspore.complex64 and len(x) < 2: + raise ValueError(f"{msg_prefix} dimension of 'x' should not be less than 2, but got {x}.") + return None + + def construct(self, u: Tensor) -> Tensor: + """Construct""" + if self.dtype is not None and self.dtype != u.dtype: + raise TypeError("dtype must be equal to the data type of the inputs tensor, but got: " + f"dtype={self.dtype} and inputs.dtype={u.dtype}") + u_shape = self.shape_op(u) + self.check_dense_input_shape(u_shape, u.dtype) + u_reshape = [-1, u_shape[-1]] + if u.dtype in [mindspore.float32, mindspore.float64]: + u_reshape = [2] + u_reshape + if len(u_reshape) < len(u_shape): + u = self.reshape(u, tuple(u_reshape)) + x, y = get_x_and_y(u) + out_x, out_y = self.dense_impl(self.matmul, x, y) + if self.has_bias: + out_x = self.bias_add(out_x, self.bias_x) + out_y = self.bias_add(out_y, self.bias_y) + out = to_2channel(out_x, out_y, u.dtype) + if len(u_reshape) < len(u_shape): + out_shape = u_shape[:-1] + (-1,) + out = self.reshape(out, out_shape) + return out + + def extend_repr(self): + s = 'input_channels={}, output_channels={}'.format(self.in_channels, self.out_channels) + if self.has_bias: + s += ', has_bias={}'.format(self.has_bias) + return s diff --git a/mindspore/python/mindspore/hypercomplex/hypercomplex/hc_pool.py b/mindspore/python/mindspore/hypercomplex/hypercomplex/hc_pool.py new file mode 100644 index 00000000000..f98b791db6c --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/hypercomplex/hc_pool.py @@ -0,0 +1,1018 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Hypercomplex Pooling""" +from typing import Tuple +from abc import abstractmethod +import mindspore.context as context +import mindspore.nn as nn +from mindspore._checkparam import Rel, Validator as validator +from mindspore.common import dtype as mstype +from mindspore.common.tensor import Tensor +from mindspore.ops import functional as F +from mindspore.ops import operations as P +from mindspore.ops.operations.nn_ops import AdaptiveAvgPool3D, AdaptiveMaxPool2D +from mindspore.hypercomplex.utils import get_x_and_y, to_2channel, \ + _size_1_t, _size_2_t, _size_3_t + + +class _PoolNd(nn.Cell): + r""" + Base class for pooling layers for the second-order hypercomplex numbers. + + Includes data validation and initialization of hyperparameters, which are shared by all specific + implementations of pooling. + + + Note: + pad_mode for training only supports "same" and "valid". + + Args: + kernel_size (Union[int, Tuple[int]]): The size of kernel window used to take the average value. + The data type of kernel_size must be int and the value represents all the spatial dimensions + at once, or a tuple of the corresponding amount of int numbers that represent the spatial + dimensions separately. + stride (Union[int, Tuple[int]]): The distance of kernel moving, an int number that represents + the step size of movement for all the spatial dimensions at once, or a tuple of the + corresponding amount of int numbers that represent the step size of movement for spatial + dimensions separately. + pad_mode (str): The value for pad mode, is "same" or "valid", not case sensitive. + + - same: Adopts the way of completion. The height and width of the output will be the same as + the input. The total number of padding will be calculated in horizontal and vertical + directions and evenly distributed to top and bottom, left and right if possible. + Otherwise, the last extra padding will be done from the bottom and the right side. + + - valid: Adopts the way of discarding. The possible largest height and width of output + will be returned without padding. Extra pixels will be discarded. + data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. Note that 'NCHW' + format is supported only with GPU target device as of now. Default: 'NCHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, *, ..., *)`, with float16 or float32 data type, or + :math:`(N, C, *, ..., *)` with complex64 data type. The count of spatial dimensions denoted by '*' + depends on a specific subclass. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C, *, ..., *)`, with float16 or float32 data + type, or :math:`(N, C, *, ..., *)`, with complex64 data type. The count of spatial dimensions denoted by '*' + is equal to one of the input tensor 'inp', but the sizes of those dimensions can be different. + + Raises: + TypeError: If `kernel_size` or `stride` is not an int. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If `pad_mode` is neither 'same' nor 'valid' (case insensitive). + ValueError: If `kernel_size` or `stride` is less than 1. + ValueError: If `data_format` is neither 'NCHW' nor 'NHWC', or it is 'NCHW' and the target + device is not GPU. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + kernel_size: Tuple[int, ...], + stride: Tuple[int, ...], + pad_mode: str, + data_format: str = "NCHW") -> None: + """Initialize _PoolNd.""" + super(_PoolNd, self).__init__() + validator.check_value_type('pad_mode', pad_mode, [str], self.cls_name) + self.pad_mode = validator.check_string(pad_mode.upper(), + ['VALID', 'SAME'], + 'pad_mode', + self.cls_name) + self.format = validator.check_string(data_format, + ['NCHW', 'NHWC'], + 'format', + self.cls_name) + if context.get_context("device_target") != "GPU" and self.format == "NHWC": + raise ValueError(f"For '{self.cls_name}, the 'NHWC' format only support in GPU target, but got device " + f"target {context.get_context('device_target')}.") + + def _check_int_or_tuple(arg_name, arg_value): + validator.check_value_type(arg_name, arg_value, [int, tuple], self.cls_name) + error_msg = f'For \'{self.cls_name}\' the {arg_name} should be an positive int number or ' \ + f'a tuple of two positive int numbers, but got {arg_value}' + if isinstance(arg_value, int): + if arg_value <= 0: + raise ValueError(error_msg) + elif len(arg_value) == 2: + for item in arg_value: + if isinstance(item, int) and item > 0: + continue + raise ValueError(error_msg) + else: + raise ValueError(error_msg) + return arg_value + + self.kernel_size = _check_int_or_tuple('kernel_size', kernel_size) + self.stride = _check_int_or_tuple('stride', stride) + + def construct(self, u: Tensor) -> Tensor: + x, y = get_x_and_y(u) + x, y = self._construct(x, y) + out = to_2channel(x, y, u.dtype) + return out + + def extend_repr(self): + return 'kernel_size={kernel_size}, stride={stride}, pad_mode={pad_mode}'.format(**self.__dict__) + + @abstractmethod + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + pass + + +class MaxPool2d(_PoolNd): + r""" + 2D max pooling operation for temporal hypercomplex data of the second order.. + + Applies a 2D max pooling over an input Tensor which can be regarded as a composition of 2D planes. + + Typically the input is of shape :math:`(2, N, C, H_{in}, W_{in})`, MaxPool2d outputs + regional maximum in the :math:`(H_{in}, W_{in})`- dimension. Given kernel size + :math:`ks = (h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1)`, the operation is as follows: + + .. math:: + \text{out}(k, N_i, C_j, h, w) = \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1} + \text{inp}(k, N_i, C_j, s_0 \times h + m, s_1 \times w + n), + + where :math:`\text{inp}` is a hypercomplex input tensor. + + Note: + pad_mode for training only supports "same" and "valid". + + Args: + kernel_size (Union[int, tuple[int]]): The size of kernel used to take the max value, + is an int number that represents height and width are both kernel_size, + or a tuple of two int numbers that represent height and width respectively. + Default: 1. + stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents + the height and width of movement are both stride, or a tuple of two int numbers that + represent height and width of movement respectively. Default: 1. + pad_mode (str): The optional value for pad mode, is "same" or "valid", not case sensitive. + Default: "valid". + + - same: Adopts the way of completion. The height and width of the output will be the same as + the input. The total number of padding will be calculated in horizontal and vertical + directions and evenly distributed to top and bottom, left and right if possible. + Otherwise, the last extra padding will be done from the bottom and the right side. + + - valid: Adopts the way of discarding. The possible largest height and width of output + will be returned without padding. Extra pixels will be discarded. + data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. Note that 'NCHW' + format is supported only with GPU target device as of now. Default: 'NCHW'. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, H_{in}, W_{in})`, with float16 or float32 data type, or + :math:`(N, C, H_{in}, W_{in})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C, H_{out}, W_{out})`, + with float16 or float32 data type, or :math:`(N, C, H_{out}, W_{out})`, with complex64 data type. + + Raises: + TypeError: If `kernel_size` or `stride` is not an int. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If `pad_mode` is neither 'same' nor 'valid' (case insensitive). + ValueError: If `kernel_size` or `stride` is less than 1. + ValueError: If `data_format` is neither 'NCHW' nor 'NHWC', or it is 'NCHW' and the target + device is not GPU. + ValueError: If length of shape of `inp` is not equal to 5 + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.hypercomplex.hc_pool import MaxPool2d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32, 32)).astype(np.float32)) + >>> maxp = MaxPool2d(kernel_size=4, stride=4) + >>> y = maxp(u) + >>> print(y.shape) + (2, 8, 64, 8, 8) + """ + + def __init__(self, + kernel_size: _size_2_t = 1, + stride: _size_2_t = 1, + pad_mode: str = "valid", + data_format: str = "NCHW") -> None: + super(MaxPool2d, self).__init__(kernel_size, stride, pad_mode, data_format) + self.max_pool = P.MaxPool(kernel_size=self.kernel_size, + strides=self.stride, + pad_mode=self.pad_mode, + data_format=self.format) + + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + out_x = self.max_pool(x) + out_y = self.max_pool(y) + return out_x, out_y + + +class MaxPool1d(_PoolNd): + r""" + 1D max pooling operation for temporal hypercomplex data of the second order. + + Applies a 1D max pooling over an input Tensor which can be regarded as a composition of 1D planes. + + Typically the input is of shape :math:`(2, N, C, L_{in})`, MaxPool1d outputs + regional maximum in the :math:`(L_{in})`-dimension. Given kernel size + :math:`ks = (l_{ker})` and stride :math:`s = (s_0)`, the operation is as follows: + + .. math:: + \text{out}(k, N_i, C_j, l) = \max_{n=0, \ldots, l_{ker}-1} + \text{inp}(k, N_i, C_j, s_0 \times l + n), + + where :math:`\text{inp}` is a hypercomplex input tensor. + + Note: + pad_mode for training only supports "same" and "valid". + + Args: + kernel_size (int): The size of kernel used to take the max value, Default: 1. + stride (int): The distance of kernel moving, an int number that represents + the width of movement is stride, Default: 1. + pad_mode (str): The optional value for pad mode, is "same" or "valid", not case sensitive. + Default: "valid". + + - same: Adopts the way of completion. The total number of padding will be calculated in horizontal + and vertical directions and evenly distributed to top and bottom, left and right if possible. + Otherwise, the last extra padding will be done from the bottom and the right side. + + - valid: Adopts the way of discarding. The possible largest height and width of output + will be returned without padding. Extra pixels will be discarded. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, L_{in})`, with float16 or float32 data type, or + :math:`(N, C, L_{in})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C, L_{out})`, with float16 or float32 + data type, or :math:`(N, C, L_{out})`, with complex64 data type. + + Raises: + TypeError: If `kernel_size` or `stride` is not an int. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If `pad_mode` is neither 'same' nor 'valid' with not case sensitive. + ValueError: If `kernel_size` or `strides` is less than 1. + ValueError: If length of shape of `inp` is not equal to 4. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.hypercomplex.hc_pool import MaxPool1d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32)).astype(np.float32)) + >>> maxp = MaxPool1d(kernel_size=4, stride=4) + >>> y = maxp(u) + >>> print(y.shape) + (2, 8, 64, 8) + """ + + def __init__(self, + kernel_size: _size_1_t = 1, + stride: _size_1_t = 1, + pad_mode: str = "valid") -> None: + """Initialize MaxPool1d.""" + super(MaxPool1d, self).__init__(kernel_size, stride, pad_mode) + validator.check_value_type('kernel_size', kernel_size, [int], self.cls_name) + validator.check_value_type('stride', stride, [int], self.cls_name) + validator.check_value_type('pad_mode', pad_mode, [str], self.cls_name) + self.pad_mode = validator.check_string(pad_mode.upper(), + ['VALID', 'SAME'], + 'pad_mode', + self.cls_name) + validator.check_int(kernel_size, 1, Rel.GE, "kernel_size", self.cls_name) + validator.check_int(stride, 1, Rel.GE, "stride", self.cls_name) + self.kernel_size = (1, kernel_size) + self.stride = (1, stride) + self.max_pool = P.MaxPool(kernel_size=self.kernel_size, + strides=self.stride, + pad_mode=self.pad_mode) + self.shape = F.shape + self.reduce_mean = P.ReduceMean(keep_dims=True) + self.expand = P.ExpandDims() + self.squeeze = P.Squeeze(2) + + def _shape_check(self, in_shape: tuple): + msg_prefix = f"For '{self.cls_name}', the" if self.cls_name else "The" + if len(in_shape) != 3: + raise ValueError(f"{msg_prefix} input must has 3 dim, but got {len(in_shape)}") + return None + + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + self._shape_check(self.shape(x)) + self._shape_check(self.shape(y)) + x = self.expand(x, 2) + y = self.expand(y, 2) + out_x = self.max_pool(x) + out_y = self.max_pool(y) + out_x = self.squeeze(out_x) + out_y = self.squeeze(out_y) + return out_x, out_y + + +class AvgPool2d(_PoolNd): + r""" + 2D average pooling for temporal hypercomplex data of the second order. + + Applies a 2D average pooling over an input Tensor which can be regarded as a composition of 2D input planes. + + Typically the input is of shape :math:`(2, N, C, H_{in}, W_{in})`, AvgPool2d outputs + regional average in the :math:`(H_{in}, W_{in})`-dimension. Given kernel size + :math:`ks = (h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1)`, the operation is as follows: + + .. math:: + \text{out}(k, N_i, C_j, h, w) = \frac{1}{h_{ker} * w_{ker}} \sum_{m=0}^{h_{ker}-1} \sum_{n=0}^{w_{ker}-1} + \text{inp}(k, N_i, C_j, s_0 \times h + m, s_1 \times w + n), + + where :math:`\text{inp}` is a hypercomplex input tensor. + + Note: + pad_mode for training only supports "same" and "valid". + + Args: + kernel_size (Union[int, tuple[int]]): The size of kernel used to take the average value. + The data type of kernel_size must be int and the value represents the height and width, + or a tuple of two int numbers that represent height and width respectively. + Default: 1. + stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents + the height and width of movement are both strides, or a tuple of two int numbers that + represent height and width of movement respectively. Default: 1. + pad_mode (str): The optional value for pad mode, is "same" or "valid", not case sensitive. + Default: "valid". + + - same: Adopts the way of completion. The height and width of the output will be the same as + the input. The total number of padding will be calculated in horizontal and vertical + directions and evenly distributed to top and bottom, left and right if possible. + Otherwise, the last extra padding will be done from the bottom and the right side. + + - valid: Adopts the way of discarding. The possible largest height and width of output + will be returned without padding. Extra pixels will be discarded. + data_format (str): The optional value for data format, is 'NHWC' or 'NCHW'. Note that 'NCHW' + format is supported only with GPU target device as of now. Default: 'NCHW'. + + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, H_{in}, W_{in})`, with float16 or float32 data type, or + :math:`(N, C, H_{in}, W_{in})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C, H_{out}, W_{out})`, with float16 + or float32 data type, or :math:`(N, C, H_{out}, W_{out})`, with complex64 data type. + + Raises: + TypeError: If `kernel_size` or `stride` is not an int. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If `pad_mode` is neither 'same' nor 'valid' (case insensitive). + ValueError: If `kernel_size` or `stride` is less than 1. + ValueError: If `data_format` is neither 'NCHW' nor 'NHWC', or it is 'NCHW' and the target + device is not GPU. + ValueError: If length of shape of `inp` is not equal to 5 + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.hypercomplex.hc_pool import AvgPool2d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32, 32)).astype(np.float32)) + >>> avg = AvgPool2d(kernel_size=4, stride=4) + >>> y = avg(u) + >>> print(y.shape) + (2, 8, 64, 8, 8) + """ + + def __init__(self, + kernel_size: _size_2_t = 1, + stride: _size_2_t = 1, + pad_mode: str = "valid", + data_format: str = "NCHW") -> None: + super(AvgPool2d, self).__init__(kernel_size, + stride, + pad_mode, + data_format) + self.avg_pool = P.AvgPool(kernel_size=self.kernel_size, + strides=self.stride, + pad_mode=self.pad_mode, + data_format=self.format) + + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + out_x = self.avg_pool(x) + out_y = self.avg_pool(y) + return out_x, out_y + + +class AvgPool1d(_PoolNd): + r""" + 1D average pooling for temporal hypercomplex data of the second order. + + Applies a 1D average pooling over an input Tensor which can be regarded as a composition of 1D input planes. + + Typically the input is of shape :math:`(2, N, C, L_{in})`, AvgPool1d outputs + regional average in the :math:`(L_{in})`-dimension. Given kernel size + :math:`ks = l_{ker}` and stride :math:`s = s_0`, the operation is as follows: + + .. math:: + \text{out}(k, N_i, C_j, l) = \frac{1}{l_{ker}} \sum_{n=0}^{l_{ker}-1} + \text{inp}(k, N_i, C_j, s_0 \times l + n) + + where :math:`\text{inp}` is a hypercomplex input tensor. + + Note: + pad_mode for training only supports "same" and "valid". + + Args: + kernel_size (int): The size of kernel window used to take the average value, Default: 1. + stride (int): The distance of kernel moving, an int number that represents + the width of movement is strides, Default: 1. + pad_mode (str): The optional value for pad mode, is "same" or "valid", not case sensitive. + Default: "valid". + + - same: Adopts the way of completion. The height and width of the output will be the same as + the input. The total number of padding will be calculated in horizontal and vertical + directions and evenly distributed to top and bottom, left and right if possible. + Otherwise, the last extra padding will be done from the bottom and the right side. + + - valid: Adopts the way of discarding. The possible largest height and width of output + will be returned without padding. Extra pixels will be discarded. + + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, L_{in})`, with float16 or float32 data type, or + :math:`(N, C, L_{in})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C, L_{out})`, with float16 or float32 + data type, or :math:`(N, C, L_{out})`, with complex64 data type. + + Raises: + TypeError: If `kernel_size` or `stride` is not an int. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If `pad_mode` is neither 'same' nor 'valid' with not case sensitive. + ValueError: If `kernel_size` or `strides` is less than 1. + ValueError: If length of shape of `inp` is not equal to 4. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.hypercomplex.hc_pool import AvgPool1d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32)).astype(np.float32)) + >>> avg = AvgPool1d(kernel_size=4, stride=4) + >>> y = avg(u) + >>> print(y.shape) + (2, 8, 64, 8) + """ + + def __init__(self, + kernel_size: _size_1_t = 1, + stride: _size_1_t = 1, + pad_mode: str = "valid") -> None: + """Initialize AvgPool1d.""" + validator.check_value_type('kernel_size', kernel_size, [int], self.cls_name) + validator.check_value_type('stride', stride, [int], self.cls_name) + validator.check_value_type('pad_mode', pad_mode, [str], self.cls_name) + self.pad_mode = validator.check_string(pad_mode.upper(), + ['VALID', 'SAME'], + 'pad_mode', + self.cls_name) + validator.check_int(kernel_size, 1, Rel.GE, "kernel_size", self.cls_name) + validator.check_int(stride, 1, Rel.GE, "stride", self.cls_name) + super(AvgPool1d, self).__init__(kernel_size, stride, pad_mode) + self.kernel_size = (1, kernel_size) + self.stride = (1, stride) + self.avg_pool = P.AvgPool(kernel_size=self.kernel_size, + strides=self.stride, + pad_mode=self.pad_mode) + self.shape = F.shape + self.reduce_mean = P.ReduceMean(keep_dims=True) + self.slice = P.Slice() + self.expand = P.ExpandDims() + self.squeeze = P.Squeeze(2) + + def _shape_check(self, in_shape: tuple): + msg_prefix = f"For '{self.cls_name}', the" if self.cls_name else "The" + if len(in_shape) != 3: + raise ValueError(f"{msg_prefix} input must has 3 dim, but got {len(in_shape)}") + return None + + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + x = F.depend(x, self._shape_check(self.shape(x))) + y = F.depend(y, self._shape_check(self.shape(y))) + batch, channel, width = self.shape(x) + if width == self.kernel_size[1]: + x = self.reduce_mean(x, 2) + y = self.reduce_mean(y, 2) + elif width - self.kernel_size[1] < self.stride[1]: + x = self.slice(x, (0, 0, 0), (batch, channel, self.kernel_size[1])) + y = self.slice(y, (0, 0, 0), (batch, channel, self.kernel_size[1])) + x = self.reduce_mean(x, 2) + y = self.reduce_mean(y, 2) + else: + x = self.expand(x, 2) + y = self.expand(y, 2) + x = self.avg_pool(x) + y = self.avg_pool(y) + x = self.squeeze(x) + y = self.squeeze(y) + return x, y + + +class _AdaptivePoolNd(nn.Cell): + r""" + Base class for adaptive pooling layers for the second-order temporal hypercomplex data. + + Includes data validation and initialization of hyperparameters, which are shared by all specific + implementations of adaptive pooling. + + Note: + The size of every spatial dimension of `inp` must be divisible by the corresponding value of `output_size`. + + Args: + output_size (Union[int, tuple]): The target output size. `ouput_size` can be a tuple of length being equal to + the count of spatial dimensions of the input tensor, or a single integer which then represents the desired + output size for all of the spatial dimensions at once, or None. + If it is None, it means the output size is the same as the input size. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, *, ..., *)`, with float16 or float32 data type, or + :math:`(N, C, *, ..., *)`, with complex64 data type. The count of spatial dimensions denoted by '*' + depends on a specific subclass. + + Outputs: + Tensor of the same data type as `inp`, and of shape :math:`(2, N, C, *, ..., *)`, with float16 or float32 data + type, or :math:`(N, C, *, ..., *)`, with complex64 data type. The number of spatial dimensions denoted by '*' + is the same as in `inp`. + + Raises: + TypeError: If dtype of `inp` is not float16, float32 or complex64. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, output_size: Tuple[int, ...]) -> None: + super(_AdaptivePoolNd, self).__init__() + self.output_size = output_size + + def construct(self, u: Tensor) -> Tensor: + x, y = get_x_and_y(u) + out_x, out_y = self._construct(x, y) + out = to_2channel(out_x, out_y, u.dtype) + + return out + + @abstractmethod + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + pass + + def _adaptive_shape_check(self, in_shape): + """Check shape.""" + msg_prefix = "For {}, the".format(self.cls_name) + if len(in_shape) != 3: + raise ValueError("{} input must has 3 dim, but got {}.".format(msg_prefix, len(in_shape))) + if in_shape[2] < self.output_size: + raise ValueError("{} input's last dimension must be greater or equal to " + "output size {}, but got {}.".format(msg_prefix, self.output_size, in_shape[2])) + if in_shape[2] % self.output_size != 0: + raise ValueError("{} input's last dimension must be divisible by " + "output size {}, but got {}.".format(msg_prefix, self.output_size, in_shape[2])) + return None + + def _adaptive_dtype_check(self, x_dtype): + """Check dtype.""" + if x_dtype not in [mstype.float16, mstype.float32]: + raise TypeError("For {}, the x_dtype must be float16 or float32, " + "but got {}.".format(self.cls_name, x_dtype)) + return None + + +class AdaptiveAvgPool1d(_AdaptivePoolNd): + r""" + 1D adaptive average pooling for temporal hypercomplex data of the second order. + + Applies a 1D adaptive average pooling over an input Tensor which can be regarded as + a composition of 1D input planes. + + Typically, the input is of shape :math:`(2, N, C, L_{in})`, + AdaptiveAvgPool1d outputs regional average in the :math:`L_{in}`-dimension. + The output is of shape :math:`(2, N, C, L_{out})`, + where :math:`L_{out}` is defined by `output_size`. + + Note: + :math:`L_{in}` must be divisible by `output_size`. + + Args: + output_size (int): the target output size :math:`L_{out}`. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, L_{in})`, with float16 or float32 data type, or + :math:`(N, C, L_{in})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C, L_{out})`, with float16 or float32 + data type, or :math:`(N, C, L_{out})`, with complex64 data type. + + Raises: + TypeError: If `output_size` is not an int. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If `output_size` is less than 1. + ValueError: If length of shape of `inp` is not equal to 4. + ValueError: If the last dimension of `inp` is smaller than `output_size`. + ValueError: If the last dimension of `inp` is not divisible by `output_size`. + + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.hypercomplex.hc_pool import AdaptiveAvgPool1d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32)).astype(np.float32)) + >>> avg = AdaptiveAvgPool1d(output_size=16) + >>> y = avg(u) + >>> print(y.shape) + (2, 8, 64, 16) + """ + + def __init__(self, output_size: int) -> None: + """Initialize AdaptiveAvgPool1d.""" + super(AdaptiveAvgPool1d, self).__init__(output_size) + validator.check_value_type('output_size', output_size, [int], self.cls_name) + validator.check_int(output_size, 1, Rel.GE, "output_size", self.cls_name) + self.shape = F.shape + self.expand = P.ExpandDims() + self.squeeze = P.Squeeze(2) + self.dtype = P.DType() + + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + self._adaptive_shape_check(self.shape(x)) + self._adaptive_shape_check(self.shape(y)) + self._adaptive_dtype_check(self.dtype(x)) + self._adaptive_dtype_check(self.dtype(y)) + + _, _, width = self.shape(x) + stride = width // self.output_size + kernel_size = width - (self.output_size - 1) * stride + + stride = (1, width // self.output_size) + kernel_size = (1, kernel_size) + + x = self.expand(x, 2) + y = self.expand(y, 2) + + avg_pool = P.AvgPool(kernel_size=kernel_size, strides=stride) + + out_x = avg_pool(x) + out_y = avg_pool(y) + out_x = self.squeeze(out_x) + out_y = self.squeeze(out_y) + + return out_x, out_y + + +class AdaptiveAvgPool2d(_AdaptivePoolNd): + r""" + 2D adaptive average pooling for temporal hypercomplex data of the second order. + + This operator applies a 2D adaptive average pooling to an input signal composed of multiple input planes. + That is, for any input size, the size of the specified output is H x W. + The number of output features is equal to the number of input features. + + The input and output data format can be "NCHW" and "CHW". N is the batch size, C is the number of channels, + H is the feature height, and W is the feature width. + + .. math:: + \begin{align} + h_{start} &= floor(i * H_{in} / H_{out})\\ + h_{end} &= ceil((i + 1) * H_{in} / H_{out})\\ + w_{start} &= floor(j * W_{in} / W_{out})\\ + w_{end} &= ceil((j + 1) * W_{in} / W_{out})\\ + out(i,j) &= \frac{\sum inp[h_{start}:h_{end}, w_{start}:w_{end}]}{(h_{end}- h_{start}) + * (w_{end}- w_{start})} + \end{align} + + Args: + output_size (Union[int, tuple]): The target output size is H x W. + `ouput_size` can be a tuple consisted of int type H and W, or a single H for H x H, or None. + If it is None, it means the output size is the same as the input size. + + Inputs: + - **inp** (Tensor) - The input of AdaptiveAvgPool2d, which is a 4D or 5D tensor of shape + :math:`(2, N, C, H_{in}, W_{in})` or :math:`(2, C, H_{in}, W_{in})`, with float16 or float32 data type, + or :math:`(N, C, H_{in}, W_{in})` or :math:`(C, H_{in}, W_{in})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C, H_{out}, W_{out})`, with float16 + or float32 data type, or :math:`(N, C, H_{out}, W_{out})`, with complex64 data type. + + Raises: + ValueError: If `output_size` is a tuple and the length of `output_size` is not 2. + TypeError: If `inp` is not a Tensor. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If the dimension of `inp` is less than or equal to the dimension of `output_size`. + + Supported Platforms: + ``GPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.hypercomplex.hc_pool import AdaptiveAvgPool2d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32, 32)).astype(np.float32)) + >>> avg = AdaptiveAvgPool2d(output_size=16) + >>> y = avg(u) + >>> print(y.shape) + (2, 8, 64, 16, 16) + """ + + def __init__(self, output_size: _size_2_t) -> None: + """Initialize AdaptiveAvgPool2d.""" + super(AdaptiveAvgPool2d, self).__init__(output_size) + self.adaptive_avgpool2d = P.AdaptiveAvgPool2D(output_size) + + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + out_x = self.adaptive_avgpool2d(x) + out_y = self.adaptive_avgpool2d(y) + + return out_x, out_y + + +class AdaptiveAvgPool3d(_AdaptivePoolNd): + r""" + 3D adaptive average pooling for temporal hypercomplex data of the second order. + + This operator applies a 3D adaptive average pooling to an input signal composed of multiple input planes. + That is, for any input size, the size of the specified output is :math:`(2, N, C, D, H, W)` or + :math:`(2, C, D, H, W)`. + The number of output features is equal to the number of input planes. + + Suppose the last 3 dimension size of x is :math:`(inD, inH, inW)`, then the last 3 dimension size of output is + :math:`(outD, outH, outW)`. + + .. math:: + \begin{array}{ll} \\ + \forall \quad od \in [0,outD-1], oh \in [0,outH-1], ow \in [0,outW-1]\\ + output[od,oh,ow] = \\ + \qquad mean(x[istartD:iendD+1,istartH:iendH+1,istartW:iendW+1])\\ + where,\\ + \qquad istartD= \left\lceil \frac{od * inD}{outD} \right\rceil \\ + \qquad iendD=\left\lfloor \frac{(od+1)* inD}{outD} \right\rfloor \\ + \qquad istartH=\left\lceil \frac{oh * inH}{outH} \right\rceil \\ + \qquad iendH=\left\lfloor \frac{(oh+1) * inH}{outH} \right\rfloor \\ + \qquad istartW=\left\lceil \frac{ow * inW}{outW} \right\rceil \\ + \qquad iendW=\left\lfloor \frac{(ow+1) * inW}{outW} \right\rfloor + \end{array} + + Args: + output_size (Union[int, tuple]): The target output size. `output_size` can be a tuple :math:`(D, H, W)`, + or an int D for :math:`(D, D, D)`. :math:`(D)`, :math:`(H)` and :math:`(W)` can be int or None + which means the output size is the same as that of the input. + + Inputs: + - **inp** (Tensor) - The input of AdaptiveAvgPool3d, which is a 6D Tensor + :math:`(2, N, C, D_{in}, H_{in}, W_{in})` or a 5D Tensor :math:`(2, C, D_{in}, H_{in}, W_{in})`, + with float16 or float32 data type, or 5D Tensor :math:`(N, C, D_{in}, H_{in}, W_{in})` or a 4D Tensor + :math:`(C, D_{in}, H_{in}, W_{in})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C, D_{out}, H_{out}, W_{out})`, with float16 + or float32 data type, or :math:`(N, C, D_{out}, H_{out}, W_{out})``, with complex64 data type. + + Raises: + TypeError: If `inp` is not a Tensor. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If the dimension of `inp` is not 5D or 6D. + ValueError: If `output_size` value is not positive. + + Supported Platforms: + ``GPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.hypercomplex.hc_pool import AdaptiveAvgPool3d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32, 48, 96)).astype(np.float32)) + >>> avg = AdaptiveAvgPool3d(output_size=(16, 24, 32)) + >>> y = avg(u) + >>> print(y.shape) + (2, 8, 64, 16, 24, 32) + """ + + def __init__(self, output_size: _size_3_t): + """Initialize AdaptiveAvgPool3d.""" + super(AdaptiveAvgPool3d, self).__init__(output_size) + self.adaptive_avg_pool3d = AdaptiveAvgPool3D(output_size) + + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + out_x = self.adaptive_avg_pool3d(x) + out_y = self.adaptive_avg_pool3d(y) + + return out_x, out_y + + +class AdaptiveMaxPool1d(_AdaptivePoolNd): + r""" + 1D adaptive maximum pooling for temporal hypercomplex data of the second order. + + Applies a 1D adaptive maximum pooling over an input Tensor which can be regarded as + a composition of 1D input planes. + + Typically, the input is of shape :math:`(2, N, C, L_{in})`, + AdaptiveMaxPool1d outputs regional maximum in the :math:`L_{in}`-dimension. The output is of + shape :math:`(N, C, L_{out})`, where :math:`L_{out}` is defined by `output_size`. + + Note: + :math:`L_{in}` must be divisible by `output_size`. + + Args: + output_size (int): the target output size :math:`L_{out}`. + + Inputs: + - **inp** (Tensor) - Tensor of shape :math:`(2, N, C, L_{in})`, with float16 or float32 data type, or + :math:`(N, C, L_{in})`, with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C, L_{out})`, with float16 or float32 + data type, or :math:`(N, C, L_{out})`, with complex64 data type. + + Raises: + TypeError: If dtype of `inp` is not float16, float32 or complex64. + TypeError: If `output_size` is not an int. + ValueError: If `output_size` is less than 1. + ValueError: If the last dimension of `inp` is smaller than `output_size`. + ValueError: If the last dimension of `inp` is not divisible by `output_size`. + ValueError: If length of shape of `inp` is not equal to 4. + + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.hypercomplex.hc_pool import AdaptiveMaxPool1d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32)).astype(np.float32)) + >>> maxp = AdaptiveMaxPool1d(output_size=16) + >>> y = maxp(u) + >>> print(y.shape) + (2, 8, 64, 16) + """ + + def __init__(self, output_size: int) -> None: + """Initialize AdaptiveMaxPool1d.""" + super(AdaptiveMaxPool1d, self).__init__(output_size) + validator.check_value_type('output_size', output_size, [int], self.cls_name) + validator.check_int(output_size, 1, Rel.GE, "output_size", self.cls_name) + self.shape = F.shape + self.expand = P.ExpandDims() + self.squeeze = P.Squeeze(2) + self.dtype = P.DType() + + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + self._adaptive_shape_check(self.shape(x)) + self._adaptive_shape_check(self.shape(y)) + self._adaptive_dtype_check(self.dtype(x)) + self._adaptive_dtype_check(self.dtype(y)) + + _, _, width = self.shape(x) + stride = width // self.output_size + kernel_size = width - (self.output_size - 1) * stride + + stride = (1, width // self.output_size) + kernel_size = (1, kernel_size) + + x = self.expand(x, 2) + y = self.expand(y, 2) + + max_pool = P.MaxPool(kernel_size=kernel_size, strides=stride) + + out_x = max_pool(x) + out_y = max_pool(y) + out_x = self.squeeze(out_x) + out_y = self.squeeze(out_y) + + return out_x, out_y + + +class AdaptiveMaxPool2d(_AdaptivePoolNd): + r""" + AdaptiveMaxPool2d operation for temporal hypercomplex data of the second order. + + This operator applies a 2D adaptive max pooling to an input signal composed of multiple input planes. + That is, for any input size, the size of the specified output is H x W. + The number of output features is equal to the number of input planes. + + The input and output data format can be "NCHW" and "CHW". N is the batch size, C is the number of channels, + H is the feature height, and W is the feature width. + + For max adaptive pool2d: + + .. math:: + + \begin{align} + h_{start} &= floor(i * H_{in} / H_{out})\\ + h_{end} &= ceil((i + 1) * H_{in} / H_{out})\\ + w_{start} &= floor(j * W_{in} / W_{out})\\ + w_{end} &= ceil((j + 1) * W_{in} / W_{out})\\ + out(i,j) &= {\max inp[h_{start}:h_{end}, w_{start}:w_{end}]} + \end{align} + + Note: + Ascend platform only supports float16 type for inp. + + Args: + output_size (Union[int, tuple]): The target output size is H x W. + ouput_size can be a tuple, or a single H for H x H, and H and W can be int or None + which means the output size is the same as the input. + + return_indices (bool): If `return_indices` is True, the indices of max value would be output. + Default: False. + + Inputs: + - **inp** (Tensor) - The input of AdaptiveMaxPool2d, which is a 5D tensor of shape + (2, N, C, H_{in}, W_{in}) or a 4D tensor of shape (2, C, H_{in}, W_{in}), with float16 or float32 data type, + or a 4D tensor of shape (N, C, H_{in}, W_{in}) or a 3D tensor of shape (C, H_{in}, W_{in}), + with complex64 data type. + + Outputs: + Tensor of the same data type as `inp` and of shape :math:`(2, N, C, H_{out}, W_{out})` or + :math:`(2, C, H_{out}, W_{out})`, with float16 or float32 data type, or :math:`(N, C, H_{out}, W_{out})` or + :math:`(C, H_{out}, W_{out})`, with complex64 data type. + + Shape of the output is `inp_shape[:len(inp_shape) - len(out_shape)] + out_shape`. + + Raises: + TypeError: If `output_size` is not int or tuple. + TypeError: If `inp` is not a tensor. + TypeError: If `return_indices` is not a bool. + TypeError: If dtype of `inp` is not float16, float32 or complex64. + ValueError: If `output_size` is a tuple and the length of `output_size` is not 2. + ValueError: If the dimension of `inp` is not 4D or 5D + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import numpy as np + >>> from mindspore.hypercomplex.hypercomplex.hc_pool import AdaptiveMaxPool2d + >>> from mindspore import Tensor + >>> u = Tensor(np.random.random((2, 8, 64, 32, 32)).astype(np.float32)) + >>> maxp = AdaptiveMaxPool2d(output_size=16) + >>> y = maxp(u) + >>> print(y.shape) + (2, 8, 64, 16, 16) + """ + + def __init__(self, output_size: _size_2_t) -> None: + """Initialize AdaptiveAvgPool2d.""" + super(AdaptiveMaxPool2d, self).__init__(output_size) + self.adaptive_maxpool2d = AdaptiveMaxPool2D(output_size) + + def _construct(self, + x: Tensor, + y: Tensor) -> Tuple[Tensor, Tensor]: + out_x = self.adaptive_maxpool2d(x) + out_y = self.adaptive_maxpool2d(y) + + return out_x, out_y diff --git a/mindspore/python/mindspore/hypercomplex/hypercomplex/uniform_operator.py b/mindspore/python/mindspore/hypercomplex/hypercomplex/uniform_operator.py new file mode 100644 index 00000000000..27b254d60a1 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/hypercomplex/uniform_operator.py @@ -0,0 +1,49 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Uniform operators""" +from mindspore.nn import Cell + + +class _UniformOperator(Cell): + r""" + Base class for layers that operate with the second-order hypercomplex numbers, and are designed + using the bridge pattern. + + Constructs the object of the 'hc_op' type, passing 'hc_impl' as a parameter. + + Args: + hc_op (Type): The abstraction part of the bridge pattern. + hc_impl (Type): The implementor part of the bridge pattern. + **kwargs (dict): Additional arguments that may be required to construct the specific layer + + Inputs: + - **inp** (Tensor) - input tensor. The shape is specific to the subclass. + + Outputs: + Tensor of shape, which is specific to the subclass. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + + def __init__(self, + hc_op, + hc_impl, + **kwargs) -> None: + super(_UniformOperator, self).__init__() + self.op = hc_op(hc_impl, **kwargs) + + def construct(self, x): + return self.op(x) diff --git a/mindspore/python/mindspore/hypercomplex/utils.py b/mindspore/python/mindspore/hypercomplex/utils.py new file mode 100644 index 00000000000..af65a0cd8a4 --- /dev/null +++ b/mindspore/python/mindspore/hypercomplex/utils.py @@ -0,0 +1,51 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Utils""" +from typing import Tuple, Union + +import mindspore +from mindspore import ops as P + + +to_complex = P.Complex() +get_real = P.Real() +get_imag = P.Imag() +unstack = P.Unstack(0) +cat = P.Concat(0) + + +def get_x_and_y(tensor): + if tensor.dtype == mindspore.complex64: + return get_real(tensor), get_imag(tensor) + return unstack(tensor) + + +def to_2channel(real, imag, dtype=None): + '''Convert to 2 channel format''' + if dtype is not None and dtype == mindspore.complex64: + return to_complex(real, imag) + if dtype is not None and (dtype != real.dtype or dtype != imag.dtype): + raise ValueError("dtype must match with data type of the input tensors, but got: " + f"dtype={dtype}, real.dtype={real.dtype}, imag.dtype={imag.dtype}") + expand_dims = P.ExpandDims() + real = expand_dims(real, 0) + imag = expand_dims(imag, 0) + return cat((real, imag)) + + +_size_1_t = Union[int, Tuple[int]] +_size_2_t = Union[int, Tuple[int, int]] +_size_3_t = Union[int, Tuple[int, int, int]] +_size_any_t = Union[int, Tuple[int, ...]] diff --git a/tests/st/hypercomplex/deepconvnet.py b/tests/st/hypercomplex/deepconvnet.py new file mode 100644 index 00000000000..54027fd23e9 --- /dev/null +++ b/tests/st/hypercomplex/deepconvnet.py @@ -0,0 +1,83 @@ +from mindspore import nn +from mindspore.common.tensor import Tensor +from mindspore.ops import operations as P +import mindspore.hypercomplex.dual as ops + + +class DeepConvNet(nn.Cell): + def __init__(self): + super(DeepConvNet, self).__init__() + + self.conv1 = ops.Conv1d(1, 16, kernel_size=6, stride=2, padding=2, pad_mode='pad') + self.bn1 = ops.BatchNorm1d(16) + self.avg_pool1 = ops.AvgPool1d(kernel_size=2, stride=2) + self.pad1 = nn.Pad(paddings=((0, 0), (0, 0), (0, 0), (0, 2)), mode='CONSTANT') + + self.conv2 = ops.Conv1d(16, 32, kernel_size=3, stride=2, padding=0) + self.bn2 = ops.BatchNorm1d(32) + self.avg_pool2 = ops.AvgPool1d(kernel_size=2, stride=2) + + self.conv3 = ops.Conv1d(32, 64, kernel_size=3, stride=1, padding=1, pad_mode='pad') + self.bn3 = ops.BatchNorm1d(64) + self.avg_pool3 = ops.AvgPool1d(kernel_size=2, stride=2) + + self.conv4 = ops.Conv1d(64, 64, kernel_size=3, stride=1, padding=1, pad_mode='pad') + self.bn4 = ops.BatchNorm1d(64) + self.avg_pool4 = ops.AvgPool1d(kernel_size=2, stride=2) + + self.conv5 = ops.Conv1d(64, 128, kernel_size=3, stride=1, padding=1, pad_mode='pad') + self.conv6 = ops.Conv1d(128, 128, kernel_size=3, stride=1, padding=1, pad_mode='pad') + self.bn6 = ops.BatchNorm1d(128) + self.avg_pool6 = ops.AvgPool1d(kernel_size=2, stride=2) + + self.shape_op = P.Shape() + self.reshape = P.Reshape() + self.permute = P.Transpose() + self.flatten = P.Flatten() + + self.fc1 = ops.Dense(4096, 1024) + self.fc2 = nn.Dense(2048, 84) + + self.relu = ops.ReLU() + self.sigmoid = nn.Sigmoid() + + def construct(self, u: Tensor) -> Tensor: + u = self.conv1(u) + u = self.bn1(u) + u = self.relu(u) + u = self.avg_pool1(u) + u = self.pad1(u) + + u = self.conv2(u) + u = self.bn2(u) + u = self.relu(u) + u = self.avg_pool2(u) + + u = self.conv3(u) + u = self.bn3(u) + u = self.relu(u) + u = self.avg_pool3(u) + + u = self.conv4(u) + u = self.bn4(u) + u = self.relu(u) + u = self.avg_pool4(u) + + u = self.conv5(u) + u = self.relu(u) + + u = self.conv6(u) + u = self.bn6(u) + u = self.relu(u) + u = self.avg_pool6(u) + + u_shape = self.shape_op(u) + u = self.reshape(u, (u_shape[0], u_shape[1], -1)) + u = self.fc1(u) + u = self.relu(u) + + u = self.permute(u, (1, 0, 2)) + x = self.flatten(u) + x = self.fc2(x) + x = self.sigmoid(x) + return x diff --git a/tests/st/hypercomplex/hcmodel.py b/tests/st/hypercomplex/hcmodel.py new file mode 100644 index 00000000000..822473f884d --- /dev/null +++ b/tests/st/hypercomplex/hcmodel.py @@ -0,0 +1,32 @@ +from mindspore import nn +from mindspore.common.tensor import Tensor +from mindspore.ops import operations as P +from mindspore.hypercomplex.utils import get_x_and_y, to_2channel +import mindspore.hypercomplex.dual as ops + + +class HCModel(nn.Cell): + + def __init__(self): + super(HCModel, self).__init__() + self.conv1 = ops.Conv2d(1, 10, kernel_size=3) + self.bn1 = ops.BatchNorm2d(10) + self.max_pool = ops.MaxPool2d(2) + self.relu = ops.ReLU() + self.fc1 = ops.Dense(7290, 256) + self.fc2 = nn.Dense(512, 10) + self.concat = P.Concat(1) + + def construct(self, u: Tensor) -> Tensor: + u = to_2channel(u[:, :1], u[:, 1:]) + u = self.conv1(u) + u = self.bn1(u) + u = self.relu(u) + u = self.max_pool(u) + u = u.view(2, u.shape[1], -1) + u = self.fc1(u) + u = self.relu(u) + out_x, out_y = get_x_and_y(u) + out = self.concat([out_x, out_y]) + out = self.fc2(out) + return out diff --git a/tests/st/hypercomplex/resnet.py b/tests/st/hypercomplex/resnet.py new file mode 100644 index 00000000000..0ce578573c6 --- /dev/null +++ b/tests/st/hypercomplex/resnet.py @@ -0,0 +1,593 @@ +# Copyright 2020-2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""ResNet.""" +import math +import numpy as np +from scipy.stats import truncnorm +import mindspore.nn as nn +import mindspore.common.dtype as mstype +from mindspore.ops import operations as P +from mindspore.ops import functional as F +from mindspore.common.tensor import Tensor +import mindspore.hypercomplex.dual as ops +from mindspore.hypercomplex.utils import get_x_and_y, to_2channel + + +def conv_variance_scaling_initializer(in_channel, out_channel, kernel_size): + fan_in = in_channel * kernel_size * kernel_size + scale = 1.0 + scale /= max(1., fan_in) + stddev = (scale ** 0.5) / .87962566103423978 + mu, sigma = 0, stddev + weight = truncnorm(-2, 2, loc=mu, scale=sigma).rvs(out_channel * in_channel * kernel_size * kernel_size) + weight = np.reshape(weight, (out_channel, in_channel, kernel_size, kernel_size)) + return Tensor(weight, dtype=mstype.float32) + + +def calculate_gain(nonlinearity, param=None): + """calculate_gain""" + linear_fns = ['linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d', 'conv_transpose2d', 'conv_transpose3d'] + res = 0 + if nonlinearity in linear_fns or nonlinearity == 'sigmoid': + res = 1 + elif nonlinearity == 'tanh': + res = 5.0 / 3 + elif nonlinearity == 'relu': + res = math.sqrt(2.0) + elif nonlinearity == 'leaky_relu': + if param is None: + negative_slope = 0.01 + elif not isinstance(param, bool) and isinstance(param, int) or isinstance(param, float): + # True/False are instances of int, hence check above + negative_slope = param + else: + raise ValueError("negative_slope {} not a valid number".format(param)) + res = math.sqrt(2.0 / (1 + negative_slope ** 2)) + else: + raise ValueError("Unsupported nonlinearity {}".format(nonlinearity)) + return res + + +def _calculate_fan_in_and_fan_out(tensor): + """_calculate_fan_in_and_fan_out""" + dimensions = len(tensor) + if dimensions < 2: + raise ValueError("Fan in and fan out can not be computed for tensor with fewer than 2 dimensions") + if dimensions == 2: # Linear + fan_in = tensor[1] + fan_out = tensor[0] + else: + num_input_fmaps = tensor[1] + num_output_fmaps = tensor[0] + receptive_field_size = 1 + if dimensions > 2: + receptive_field_size = tensor[2] * tensor[3] + fan_in = num_input_fmaps * receptive_field_size + fan_out = num_output_fmaps * receptive_field_size + return fan_in, fan_out + + +def _calculate_correct_fan(tensor, mode): + mode = mode.lower() + valid_modes = ['fan_in', 'fan_out'] + if mode not in valid_modes: + raise ValueError("Mode {} not supported, please use one of {}".format(mode, valid_modes)) + fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) + return fan_in if mode == 'fan_in' else fan_out + + +def kaiming_normal(inputs_shape, a=0, mode='fan_in', nonlinearity='leaky_relu'): + fan = _calculate_correct_fan(inputs_shape, mode) + gain = calculate_gain(nonlinearity, a) + std = gain / math.sqrt(fan) + return np.random.normal(0, std, size=inputs_shape).astype(np.float32) + + +def kaiming_uniform(inputs_shape, a=0., mode='fan_in', nonlinearity='leaky_relu'): + fan = _calculate_correct_fan(inputs_shape, mode) + gain = calculate_gain(nonlinearity, a) + std = gain / math.sqrt(fan) + bound = math.sqrt(3.0) * std # Calculate uniform bounds from standard deviation + return np.random.uniform(-bound, bound, size=inputs_shape).astype(np.float32) + + +def _conv3x3(in_channel, out_channel, stride=1, use_se=False, res_base=False): + if use_se: + weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=3) + else: + weight_shape = (out_channel, in_channel, 3, 3) + weight = Tensor(kaiming_normal((2, *weight_shape), mode="fan_out", nonlinearity='relu')) + if res_base: + return ops.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride, + padding=1, pad_mode='pad', weight_init=weight) + return ops.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride, + padding=0, pad_mode='same', weight_init=weight) + + +def _conv1x1(in_channel, out_channel, stride=1, use_se=False, res_base=False): + if use_se: + weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=1) + else: + weight_shape = (out_channel, in_channel, 1, 1) + weight = Tensor(kaiming_normal((2, *weight_shape), mode="fan_out", nonlinearity='relu')) + if res_base: + return ops.Conv2d(in_channel, out_channel, kernel_size=1, stride=stride, + padding=0, pad_mode='pad', weight_init=weight) + return ops.Conv2d(in_channel, out_channel, kernel_size=1, stride=stride, + padding=0, pad_mode='same', weight_init=weight) + + +def _conv7x7(in_channel, out_channel, stride=1, use_se=False, res_base=False): + if use_se: + weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=7) + else: + weight_shape = (out_channel, in_channel, 7, 7) + weight = Tensor(kaiming_normal((2, *weight_shape), mode="fan_out", nonlinearity='relu')) + if res_base: + return ops.Conv2d(in_channel, out_channel, + kernel_size=7, stride=stride, padding=3, pad_mode='pad', weight_init=weight) + return ops.Conv2d(in_channel, out_channel, + kernel_size=7, stride=stride, padding=0, pad_mode='same', weight_init=weight) + + +def _bn(channel, res_base=False): + if res_base: + return ops.BatchNorm2d(channel, eps=1e-5, momentum=0.1, + gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1) + return ops.BatchNorm2d(channel, eps=1e-4, momentum=0.9, + gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1) + + +def _fc(in_channel, out_channel, use_se=False): + if use_se: + weight = np.random.normal(loc=0, scale=0.01, size=out_channel * in_channel) + weight = Tensor(np.reshape(weight, (out_channel, in_channel)), dtype=mstype.float32) + else: + weight_shape = (out_channel, in_channel) + weight = Tensor(kaiming_uniform(weight_shape, a=math.sqrt(5))) + return nn.Dense(in_channel, out_channel, has_bias=True, weight_init=weight, bias_init=0) + + +class ResidualBlock(nn.Cell): + """ + ResNet V1 residual block definition. + + Args: + in_channel (int): Input channel. + out_channel (int): Output channel. + stride (int): Stride size for the first convolutional layer. Default: 1. + use_se (bool): Enable SE-ResNet50 net. Default: False. + se_block(bool): Use se block in SE-ResNet50 net. Default: False. + + Returns: + Tensor, output tensor. + + Examples: + >>> ResidualBlock(3, 256, stride=2) + """ + expansion = 4 + + def __init__(self, + in_channel, + out_channel, + stride=1, + use_se=False, se_block=False): + super(ResidualBlock, self).__init__() + self.stride = stride + self.use_se = use_se + self.se_block = se_block + channel = out_channel // self.expansion + self.conv1 = _conv1x1(in_channel, channel, stride=1, use_se=self.use_se) + self.bn1 = _bn(channel) + if self.use_se and self.stride != 1: + self.e2 = nn.SequentialCell([_conv3x3(channel, channel, stride=1, use_se=True), _bn(channel), + ops.ReLU(), ops.MaxPool2d(kernel_size=2, stride=2, pad_mode='same')]) + else: + self.conv2 = _conv3x3(channel, channel, stride=stride, use_se=self.use_se) + self.bn2 = _bn(channel) + + self.conv3 = _conv1x1(channel, out_channel, stride=1, use_se=self.use_se) + self.bn3 = _bn(out_channel) + if self.se_block: + self.se_global_pool = P.ReduceMean(keep_dims=False) + self.se_dense_0 = _fc(out_channel, int(out_channel / 4), use_se=self.use_se) + self.se_dense_1 = _fc(int(out_channel / 4), out_channel, use_se=self.use_se) + self.se_sigmoid = nn.Sigmoid() + self.se_mul = P.Mul() + self.relu = ops.ReLU() + + self.down_sample = False + + if stride != 1 or in_channel != out_channel: + self.down_sample = True + self.down_sample_layer = None + + if self.down_sample: + if self.use_se: + if stride == 1: + self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, + stride, use_se=self.use_se), _bn(out_channel)]) + else: + self.down_sample_layer = nn.SequentialCell([ops.MaxPool2d(kernel_size=2, stride=2, pad_mode='same'), + _conv1x1(in_channel, out_channel, 1, + use_se=self.use_se), _bn(out_channel)]) + else: + self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride, + use_se=self.use_se), _bn(out_channel)]) + + def construct(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + if self.use_se and self.stride != 1: + out = self.e2(out) + else: + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + out = self.conv3(out) + out = self.bn3(out) + if self.se_block: + out_se = out + out = self.se_global_pool(out, (2, 3)) + out = self.se_dense_0(out) + out = self.relu(out) + out = self.se_dense_1(out) + out = self.se_sigmoid(out) + out = F.reshape(out, F.shape(out) + (1, 1)) + out = self.se_mul(out, out_se) + + if self.down_sample: + identity = self.down_sample_layer(identity) + + out = out + identity + out = self.relu(out) + + return out + + +class ResidualBlockBase(nn.Cell): + """ + ResNet V1 residual block definition. + + Args: + in_channel (int): Input channel. + out_channel (int): Output channel. + stride (int): Stride size for the first convolutional layer. Default: 1. + use_se (bool): Enable SE-ResNet50 net. Default: False. + se_block(bool): Use se block in SE-ResNet50 net. Default: False. + res_base (bool): Enable parameter setting of resnet18. Default: True. + + Returns: + Tensor, output tensor. + + Examples: + >>> ResidualBlockBase(3, 256, stride=2) + """ + + def __init__(self, + in_channel, + out_channel, + stride=1, + use_se=False, + se_block=False, + res_base=True): + super(ResidualBlockBase, self).__init__() + self.res_base = res_base + self.conv1 = _conv3x3(in_channel, out_channel, stride=stride, res_base=self.res_base) + self.bn1d = _bn(out_channel) + self.conv2 = _conv3x3(out_channel, out_channel, stride=1, res_base=self.res_base) + self.bn2d = _bn(out_channel) + self.relu = ops.ReLU() + + self.down_sample = False + if stride != 1 or in_channel != out_channel: + self.down_sample = True + + self.down_sample_layer = None + if self.down_sample: + self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride, + use_se=use_se, res_base=self.res_base), + _bn(out_channel, res_base)]) + + def construct(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1d(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2d(out) + + if self.down_sample: + identity = self.down_sample_layer(identity) + + out = out + identity + out = self.relu(out) + + return out + + +class ResNet(nn.Cell): + """ + ResNet architecture. + + Args: + block (Cell): Block for network. + layer_nums (list): Numbers of block in different layers. + in_channels (list): Input channel in each layer. + out_channels (list): Output channel in each layer. + strides (list): Stride size in each layer. + num_classes (int): The number of classes that the training images are belonging to. + use_se (bool): Enable SE-ResNet50 net. Default: False. + se_block(bool): Use se block in SE-ResNet50 net in layer 3 and layer 4. Default: False. + res_base (bool): Enable parameter setting of resnet18. Default: False. + + Returns: + Tensor, output tensor. + + Examples: + >>> ResNet(ResidualBlock, + >>> [3, 4, 6, 3], + >>> [64, 256, 512, 1024], + >>> [256, 512, 1024, 2048], + >>> [1, 2, 2, 2], + >>> 10) + """ + + def __init__(self, + block, + layer_nums, + in_channels, + out_channels, + strides, + num_classes, + use_se=False, + res_base=False): + super(ResNet, self).__init__() + + if not len(layer_nums) == len(in_channels) == len(out_channels) == 4: + raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!") + self.use_se = use_se + self.res_base = res_base + self.se_block = False + if self.use_se: + self.se_block = True + + if self.use_se: + self.conv1_0 = _conv3x3(3, 32, stride=2, use_se=self.use_se) + self.bn1_0 = _bn(32) + self.conv1_1 = _conv3x3(32, 32, stride=1, use_se=self.use_se) + self.bn1_1 = _bn(32) + self.conv1_2 = _conv3x3(32, 64, stride=1, use_se=self.use_se) + else: + self.conv1 = _conv7x7(3, 64, stride=2, res_base=self.res_base) + self.bn1 = _bn(64, self.res_base) + self.relu = ops.ReLU() + + if self.res_base: + self.pad = nn.Pad(paddings=((0, 0), (0, 0), (1, 1), (1, 1))) + self.maxpool = ops.MaxPool2d(kernel_size=3, stride=2, pad_mode="valid") + else: + self.maxpool = ops.MaxPool2d(kernel_size=3, stride=2, pad_mode="same") + + self.layer1 = self._make_layer(block, + layer_nums[0], + in_channel=in_channels[0], + out_channel=out_channels[0], + stride=strides[0], + use_se=self.use_se) + self.layer2 = self._make_layer(block, + layer_nums[1], + in_channel=in_channels[1], + out_channel=out_channels[1], + stride=strides[1], + use_se=self.use_se) + self.layer3 = self._make_layer(block, + layer_nums[2], + in_channel=in_channels[2], + out_channel=out_channels[2], + stride=strides[2], + use_se=self.use_se, + se_block=self.se_block) + self.layer4 = self._make_layer(block, + layer_nums[3], + in_channel=in_channels[3], + out_channel=out_channels[3], + stride=strides[3], + use_se=self.use_se, + se_block=self.se_block) + + self.avgpool = ops.AvgPool2d(4) + self.concat = P.Concat(1) + self.flatten = nn.Flatten() + self.end_point = _fc(16384, num_classes, use_se=self.use_se) + + def construct(self, x): + x = to_2channel(x[:, :3], x[:, 3:]) + if self.use_se: + x = self.conv1_0(x) + x = self.bn1_0(x) + x = self.relu(x) + x = self.conv1_1(x) + x = self.bn1_1(x) + x = self.relu(x) + x = self.conv1_2(x) + else: + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + if self.res_base: + x_1, x_2 = get_x_and_y(x) + x_1 = self.pad(x_1) + x_2 = self.pad(x_2) + x = to_2channel(x_1, x_2) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + out = self.avgpool(x) + out_x, out_y = get_x_and_y(out) + out = self.concat([out_x, out_y]) + out = self.flatten(out) + out = self.end_point(out) + return out + + def _make_layer(self, block, layer_num, in_channel, out_channel, stride, use_se=False, se_block=False): + """ + Make stage network of ResNet. + + Args: + block (Cell): Resnet block. + layer_num (int): Layer number. + in_channel (int): Input channel. + out_channel (int): Output channel. + stride (int): Stride size for the first convolutional layer. + se_block(bool): Use se block in SE-ResNet50 net. Default: False. + Returns: + SequentialCell, the output layer. + + Examples: + >>> _make_layer(ResidualBlock, 3, 128, 256, 2) + """ + layers = [] + + resnet_block = block(in_channel, out_channel, stride=stride, use_se=use_se) + layers.append(resnet_block) + if se_block: + for _ in range(1, layer_num - 1): + resnet_block = block(out_channel, out_channel, stride=1, use_se=use_se) + layers.append(resnet_block) + resnet_block = block(out_channel, out_channel, stride=1, use_se=use_se, se_block=se_block) + layers.append(resnet_block) + else: + for _ in range(1, layer_num): + resnet_block = block(out_channel, out_channel, stride=1, use_se=use_se) + layers.append(resnet_block) + return nn.SequentialCell(layers) + + +def resnet18(class_num=10): + """ + Get ResNet18 neural network. + + Args: + class_num (int): Class number. + + Returns: + Cell, cell instance of ResNet18 neural network. + + Examples: + >>> net = resnet18(10) + """ + return ResNet(ResidualBlockBase, + [2, 2, 2, 2], + [64, 64, 128, 256], + [64, 128, 256, 512], + [1, 2, 2, 2], + class_num, + res_base=True) + + +def resnet34(class_num=10): + """ + Get ResNet34 neural network. + + Args: + class_num (int): Class number. + + Returns: + Cell, cell instance of ResNet34 neural network. + + Examples: + >>> net = resnet18(10) + """ + return ResNet(ResidualBlockBase, + [3, 4, 6, 3], + [64, 64, 128, 256], + [64, 128, 256, 512], + [1, 2, 2, 2], + class_num, + res_base=True) + + +def resnet50(class_num=10): + """ + Get ResNet50 neural network. + + Args: + class_num (int): Class number. + + Returns: + Cell, cell instance of ResNet50 neural network. + + Examples: + >>> net = resnet50(10) + """ + return ResNet(ResidualBlock, + [3, 4, 6, 3], + [64, 256, 512, 1024], + [256, 512, 1024, 2048], + [1, 2, 2, 2], + class_num) + + +def se_resnet50(class_num=1001): + """ + Get SE-ResNet50 neural network. + + Args: + class_num (int): Class number. + + Returns: + Cell, cell instance of SE-ResNet50 neural network. + + Examples: + >>> net = se-resnet50(1001) + """ + return ResNet(ResidualBlock, + [3, 4, 6, 3], + [64, 256, 512, 1024], + [256, 512, 1024, 2048], + [1, 2, 2, 2], + class_num, + use_se=True) + + +def resnet101(class_num=1001): + """ + Get ResNet101 neural network. + + Args: + class_num (int): Class number. + + Returns: + Cell, cell instance of ResNet101 neural network. + + Examples: + >>> net = resnet101(1001) + """ + return ResNet(ResidualBlock, + [3, 4, 23, 3], + [64, 256, 512, 1024], + [256, 512, 1024, 2048], + [1, 2, 2, 2], + class_num) diff --git a/tests/st/hypercomplex/test_deepconvnet.py b/tests/st/hypercomplex/test_deepconvnet.py new file mode 100644 index 00000000000..73652097c56 --- /dev/null +++ b/tests/st/hypercomplex/test_deepconvnet.py @@ -0,0 +1,13 @@ +import numpy as np +from mindspore import context, Tensor +from mindspore.ops import operations as P +from deepconvnet import DeepConvNet + + +if __name__ == '__main__': + context.set_context(mode=context.GRAPH_MODE, device_target="GPU") + model = DeepConvNet() + model.set_train(False) + u = Tensor(np.random.random((2, 32, 1, 4096)).astype(np.float32)) + y = model(u) + print(P.Shape()(y), y) diff --git a/tests/st/hypercomplex/test_mnist.py b/tests/st/hypercomplex/test_mnist.py new file mode 100644 index 00000000000..e5edd9dd4ee --- /dev/null +++ b/tests/st/hypercomplex/test_mnist.py @@ -0,0 +1,112 @@ +import argparse + +import numpy as np +import mindspore +from mindspore import nn, context, ops +from mindspore.common import dtype as mstype +from mindspore.dataset import MnistDataset +from hcmodel import HCModel + +context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + + +class ImageToDualImage: + @staticmethod + def __call__(img): + return np.concatenate((img, img), axis=0) + + +def create_dataset(dataset_dir, batch_size, usage=None): + dataset = MnistDataset(dataset_dir=dataset_dir, usage=usage) + type_cast_op = mindspore.dataset.transforms.TypeCast(mstype.int32) + + # define map operations + trans = [mindspore.dataset.vision.Rescale(1.0 / 255.0, 0), + mindspore.dataset.vision.Normalize(mean=(0.1307,), std=(0.3081,)), + mindspore.dataset.vision.HWC2CHW(), + ImageToDualImage()] + + dataset = dataset.map(operations=type_cast_op, input_columns="label") + dataset = dataset.map(operations=trans, input_columns="image") + dataset = dataset.batch(batch_size) + return dataset + + +def train(model, dataset, loss_fn, optimizer): + # Define forward function + def forward_fn(data, label): + logits = model(data) + loss = loss_fn(logits, label) + return loss, logits + + # Get gradient function + grad_fn = ops.value_and_grad(forward_fn, None, optimizer.parameters, has_aux=True) + + # Define function of one-step training + def train_step(data, label): + (loss, _), grads = grad_fn(data, label) + loss = ops.depend(loss, optimizer(grads)) + return loss + + size = dataset.get_dataset_size() + model.set_train() + for batch, (data, label) in enumerate(dataset.create_tuple_iterator()): + loss = train_step(data, label) + + if batch % 100 == 0: + loss, current = loss.asnumpy(), batch + print(f"loss: {loss:>7f} [{current:>3d}/{size:>3d}]") + + +def test(model, dataset, loss_fn): + num_batches = dataset.get_dataset_size() + model.set_train(False) + total, test_loss, correct = 0, 0, 0 + for data, label in dataset.create_tuple_iterator(): + pred = model(data) + total += len(data) + test_loss += loss_fn(pred, label).asnumpy() + correct += (pred.argmax(1) == label).asnumpy().sum() + test_loss /= num_batches + correct /= total + print(f"Test: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n") + + +def main(): + parser = argparse.ArgumentParser(description='MindSpore MNIST Testing') + parser.add_argument( + '--dataset', default=None, type=str, metavar='DS', required=True, + help='Path to the dataset folder' + ) + parser.add_argument( + '--bs', default=64, type=int, metavar='N', required=False, + help='Mini-batch size' + ) + args = parser.parse_args() + + # Process the MNIST dataset. + train_dataset = create_dataset(args.dataset, args.bs, "train") + test_dataset = create_dataset(args.dataset, args.bs, "test") + + for img, lbl in test_dataset.create_tuple_iterator(): + print(f"Shape of image [N, C, H, W]: {img.shape} {img.dtype}") + print(f"Shape of label: {lbl.shape} {lbl.dtype}") + break + + # Initialize hypercomplex model + net = HCModel() + + # Initialize loss function and optimizer + criterion = nn.CrossEntropyLoss() + optim = nn.SGD(net.trainable_params(), 1e-2) + + epochs = 10 + for t in range(epochs): + print(f"Epoch {t+1}\n-------------------------------") + train(net, train_dataset, criterion, optim) + test(net, test_dataset, criterion) + print("Done!") + + +if __name__ == "__main__": + main() diff --git a/tests/st/hypercomplex/test_resnet.py b/tests/st/hypercomplex/test_resnet.py new file mode 100644 index 00000000000..04528b32901 --- /dev/null +++ b/tests/st/hypercomplex/test_resnet.py @@ -0,0 +1,115 @@ +import argparse +import numpy as np +from mindspore import nn, context, ops +from mindspore.common import dtype as mstype +from mindspore.dataset import Cifar10Dataset +from mindspore.dataset import vision, transforms +from resnet import resnet18 + +context.set_context(mode=context.GRAPH_MODE, device_target="GPU") + + +class ImageToDualImage: + @staticmethod + def __call__(img): + return np.concatenate((img, img), axis=0) + + +def create_dataset(dataset_dir, batch_size, usage=None): + dataset = Cifar10Dataset(dataset_dir=dataset_dir, usage=usage) + type_cast_op = transforms.TypeCast(mstype.int32) + + # define map operations + trans = [vision.ToPIL(), + vision.RandomCrop((32, 32), (4, 4, 4, 4)), + vision.RandomHorizontalFlip(prob=0.5), + vision.Resize((224, 224)), + vision.ToTensor(), + vision.Rescale(1.0 / 255.0, 0.0), + vision.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010], is_hwc=False), + ImageToDualImage()] + + dataset = dataset.map(operations=type_cast_op, input_columns="label") + dataset = dataset.map(operations=trans, input_columns="image") + dataset = dataset.batch(batch_size) + return dataset + + +def train(model, dataset, loss_fn, optimizer): + # Define forward function + def forward_fn(data, label): + logits = model(data) + loss = loss_fn(logits, label) + return loss, logits + + # Get gradient function + grad_fn = ops.value_and_grad(forward_fn, None, optimizer.parameters, has_aux=True) + + # Define function of one-step training + def train_step(data, label): + (loss, _), grads = grad_fn(data, label) + loss = ops.depend(loss, optimizer(grads)) + return loss + + size = dataset.get_dataset_size() + model.set_train() + for batch, (data, label) in enumerate(dataset.create_tuple_iterator()): + loss = train_step(data, label) + + if batch % 100 == 0: + loss, current = loss.asnumpy(), batch + print(f"loss: {loss:>7f} [{current:>3d}/{size:>3d}]") + + +def test(model, dataset, loss_fn): + num_batches = dataset.get_dataset_size() + model.set_train(False) + total, test_loss, correct = 0, 0, 0 + for data, label in dataset.create_tuple_iterator(): + pred = model(data) + total += len(data) + test_loss += loss_fn(pred, label).asnumpy() + correct += (pred.argmax(1) == label).asnumpy().sum() + test_loss /= num_batches + correct /= total + print(f"Test: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n") + + +def main(): + parser = argparse.ArgumentParser(description='MindSpore ResNet Testing') + parser.add_argument( + '--dataset', default=None, type=str, metavar='DS', required=True, + help='Path to the dataset folder' + ) + parser.add_argument( + '--bs', default=64, type=int, metavar='N', required=False, + help='Mini-batch size' + ) + args = parser.parse_args() + + # Process the cifar dataset. + train_dataset = create_dataset(args.dataset, args.bs, "train") + test_dataset = create_dataset(args.dataset, args.bs, "test") + + for img, lbl in test_dataset.create_tuple_iterator(): + print(f"Shape of image [N, C, H, W]: {img.shape} {img.dtype}") + print(f"Shape of label: {lbl.shape} {lbl.dtype}") + break + + # Initialize hypercomplex model + net = resnet18() + + # Initialize loss function and optimizer + criterion = nn.CrossEntropyLoss() + optim = nn.SGD(net.trainable_params(), 1e-2) + + epochs = 10 + for t in range(epochs): + print(f"Epoch {t+1}\n-------------------------------") + train(net, train_dataset, criterion, optim) + test(net, test_dataset, criterion) + print("Done!") + + +if __name__ == "__main__": + main()