switch positon of the codes for second_order

2020-05-22 09:22:11 +08:00 · 2020-05-22 09:22:11 +08:00 · da62a44d85
parent 27c13794d2
commit da62a44d85
31 changed files with 546 additions and 549 deletions
--- a/example/resnet101_imagenet2012_THOR/config_imagenet.py
+++ b/example/resnet101_imagenet2012_THOR/config_imagenet.py
@ -16,6 +16,7 @@
 network config setting, will be used in train.py and eval.py
 """
 from easydict import EasyDict as ed
+
 config = ed({
    "class_num": 1000,
    "batch_size": 32,
--- a/example/resnet101_imagenet2012_THOR/crossentropy.py
+++ b/example/resnet101_imagenet2012_THOR/crossentropy.py
@ -13,24 +13,26 @@
 # limitations under the License.
 # ============================================================================

-from mindspore.nn.loss.loss import _Loss
-from mindspore.ops import operations as P
-from mindspore.ops import functional as F
+import mindspore.nn as nn
 from mindspore import Tensor
 from mindspore.common import dtype as mstype
-import mindspore.nn as nn
+from mindspore.nn.loss.loss import _Loss
+from mindspore.ops import functional as F
+from mindspore.ops import operations as P
+

 class CrossEntropy(_Loss):
    def __init__(self, smooth_factor=0., num_classes=1000):
        super(CrossEntropy, self).__init__()
        self.onehot = P.OneHot()
        self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
-        self.off_value = Tensor(1.0 * smooth_factor / (num_classes -1), mstype.float32)
-        #self.cast = P.Cast()
+        self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32)
+        # self.cast = P.Cast()
        self.ce = nn.SoftmaxCrossEntropyWithLogits()
        self.mean = P.ReduceMean(False)
+
    def construct(self, logit, label):
-        #one_hot_label = self.onehot(self.cast(label, mstype.int32),
+        # one_hot_label = self.onehot(self.cast(label, mstype.int32),
        #                F.shape(logit)[1], self.on_value, self.off_value)、
        one_hot_label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value)
        loss = self.ce(logit, one_hot_label)
--- a/example/resnet101_imagenet2012_THOR/cus_ops/batch_matmul_impl.py
+++ b/example/resnet101_imagenet2012_THOR/cus_ops/batch_matmul_impl.py
@ -12,10 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-from te import tik
-from topi.cce import util
 from mindspore.ops.op_info_register import op_info_register

+
@op_info_register("""{
    "op_name": "CusBatchMatMul",
    "imply_type": "TBE",
@ -71,11 +70,5 @@ from mindspore.ops.op_info_register import op_info_register
        }
    ]
 }""")
-
- 
-
- 
-
 def CusBatchMatMul(input_x1, input_x2, output, transpose_a=False, transpose_b=True, kernel_name="batchmatmul"):
-
    return
--- a/example/resnet101_imagenet2012_THOR/cus_ops/cholesky_trsm.py
+++ b/example/resnet101_imagenet2012_THOR/cus_ops/cholesky_trsm.py
@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-from te import tik
-from topi.cce import util
 from mindspore.ops.op_info_register import op_info_register
+
+
@op_info_register("""{
    "op_name": "CusCholeskyTrsm",
    "imply_type": "TBE",
@ -58,7 +58,5 @@ from mindspore.ops.op_info_register import op_info_register
        }
   ]
 }""")
- 
- 
-def CusCholeskyTrsm(input_x,output, kernel_name):
+def CusCholeskyTrsm(input_x, output, kernel_name):
    return
--- a/example/resnet101_imagenet2012_THOR/cus_ops/cus_batch_matmul.py
+++ b/example/resnet101_imagenet2012_THOR/cus_ops/cus_batch_matmul.py
@ -12,42 +12,27 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-import numpy as np
 from mindspore.ops import prim_attr_register, PrimitiveWithInfer
-from mindspore import Tensor
-import mindspore as ms
 from mindspore.ops.composite import multitype_ops as C
+
+
 # y = x^2
 class CusBatchMatMul(PrimitiveWithInfer):
    """CusMatMulCube definition"""
+
    @prim_attr_register
    def __init__(self):
        """init CusMatMulCube"""
        self.init_prim_io_names(inputs=['x1', 'x2'], outputs=['y'])
-        # self.transpose_a = transpose_a
-        # self.transpose_b = transpose_b
-        from .batch_matmul_impl import CusBatchMatMul
- 
+
    def get_bprop(self):
        def bprop(x1, x2, out, dout):
-            return (C.zeros_like(x1),C.zeros_like(x2))
+            return (C.zeros_like(x1), C.zeros_like(x2))
+
        return bprop
- 
+
    def infer_shape(self, data1_shape, data2_shape):
-        #shape = [1, data1_shape[1], data2_shape[2], 16, 16]
-        #return shape
-        # if self.transpose_a == True:
-        #     k1, m = data1_shape
-        # else:
-        #     m, k1 = data1_shape
-        # if self.transpose_b == True:
-        #     n, k2 = data2_shape
-        # else:
-        #     k2, n = data2_shape
-        # assert k1==k2
-        # shape = [m, n]
        return data1_shape
- 
+
    def infer_dtype(self, data1_dtype, data2_dtype):
-        return data1_dtype
-        # return ms.common.dtype.tensor_type(getattr(ms, "float32"))
+        return data1_dtype
--- a/example/resnet101_imagenet2012_THOR/cus_ops/cus_cholesky_trsm.py
+++ b/example/resnet101_imagenet2012_THOR/cus_ops/cus_cholesky_trsm.py
@ -12,24 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-import numpy as np
 from mindspore.ops import prim_attr_register, PrimitiveWithInfer
-from mindspore import Tensor
- 
+
+
 class CusCholeskyTrsm(PrimitiveWithInfer):
    """CusCholeskyTrsm definition"""
+
    @prim_attr_register
    def __init__(self):
        """init CusCholeskyTrsm"""
        self.init_prim_io_names(inputs=['x1'], outputs=['y'])
-        from .cholesky_trsm import CusCholeskyTrsm
- 
+
    def infer_shape(self, data1_shape):
-        m,n = data1_shape
+        m, n = data1_shape
        if m >= 128:
-            return [m//128,128,128]
+            return [m // 128, 128, 128]
        else:
-            return [1,64,64]
- 
+            return [1, 64, 64]
+
    def infer_dtype(self, data1_dtype):
-        return data1_dtype
+        return data1_dtype
--- a/example/resnet101_imagenet2012_THOR/cus_ops/cus_fused_abs_max1.py
+++ b/example/resnet101_imagenet2012_THOR/cus_ops/cus_fused_abs_max1.py
@ -12,31 +12,30 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-import numpy as np
 from mindspore.ops import prim_attr_register, PrimitiveWithInfer
-from mindspore import Tensor
 from mindspore.ops.composite import multitype_ops as C
- 
+
+
 class CusFusedAbsMax1(PrimitiveWithInfer):
    """CusCholeskyTrsm definition"""
+
    @prim_attr_register
-    def __init__(self, origin_shape = [-1,-1]):
+    def __init__(self, origin_shape=[-1, -1]):
        """init CusCholeskyTrsm"""
        self.init_prim_io_names(inputs=['x1'], outputs=['y'])
-        from .fused_abs_max1 import CusFusedAbsMax1
        self.origin_shape = origin_shape
- 
+
    def get_bprop(self):
        def bprop(x, out, dout):
            return (C.zeros_like(x),)
+
        return bprop
- 
+
    def infer_shape(self, data1_shape):
        if len(data1_shape) == 2:
-            return [1,]
+            return [1, ]
        else:
            return [32, 64]
-        # return [128,128]
- 
+
    def infer_dtype(self, data1_dtype):
        return data1_dtype
--- a/example/resnet101_imagenet2012_THOR/cus_ops/cus_img2col.py
+++ b/example/resnet101_imagenet2012_THOR/cus_ops/cus_img2col.py
@ -13,26 +13,26 @@
 # limitations under the License.
 # ============================================================================

-import numpy as np
 from mindspore.ops import prim_attr_register, PrimitiveWithInfer
-from mindspore import Tensor
 from mindspore.ops.composite import multitype_ops as C

+
 class CusImg2Col(PrimitiveWithInfer):
    """CusImg2Col definition"""
+
    @prim_attr_register
-    def __init__(self, ksizes, strides, dilates = (1, 1, 1, 1), mode="NC1HWC0"):
+    def __init__(self, ksizes, strides, dilates=(1, 1, 1, 1), mode="NC1HWC0"):
        """init CusImg2Col"""
        self.init_prim_io_names(inputs=['x1'], outputs=['y'])
        self.ksizes = ksizes
        self.strides = strides
        self.dilates = dilates
        self.mode = mode
-        from .img2col_impl import CusImg2Col

    def get_bprop(self):
        def bprop(x, out, dout):
            return (C.zeros_like(x),)
+
        return bprop

    def infer_shape(self, data1_shape):
--- a/example/resnet101_imagenet2012_THOR/cus_ops/cus_matmul_cube.py
+++ b/example/resnet101_imagenet2012_THOR/cus_ops/cus_matmul_cube.py
@ -12,30 +12,31 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-import numpy as np
-from mindspore.ops import prim_attr_register, PrimitiveWithInfer
-from mindspore import Tensor
 import mindspore as ms
+from mindspore.ops import prim_attr_register, PrimitiveWithInfer
 from mindspore.ops.composite import multitype_ops as C
+
+
 # y = x^2
 class CusMatMulCube(PrimitiveWithInfer):
    """CusMatMulCube definition"""
+
    @prim_attr_register
    def __init__(self, transpose_a=False, transpose_b=False):
        """init CusMatMulCube"""
        self.init_prim_io_names(inputs=['x1', 'x2'], outputs=['y'])
        self.transpose_a = transpose_a
        self.transpose_b = transpose_b
-        from .matmul_cube_impl import CusMatMulCube
- 
+
    def get_bprop(self):
        def bprop(x1, x2, out, dout):
-            return (C.zeros_like(x1),C.zeros_like(x2))
+            return (C.zeros_like(x1), C.zeros_like(x2))
+
        return bprop
- 
+
    def infer_shape(self, data1_shape, data2_shape):
-        #shape = [1, data1_shape[1], data2_shape[2], 16, 16]
-        #return shape
+        # shape = [1, data1_shape[1], data2_shape[2], 16, 16]
+        # return shape
        if self.transpose_a == True:
            k1, m = data1_shape
        else:
@ -44,9 +45,9 @@ class CusMatMulCube(PrimitiveWithInfer):
            n, k2 = data2_shape
        else:
            k2, n = data2_shape
-        assert k1==k2
+        assert k1 == k2
        shape = [m, n]
        return shape
- 
+
    def infer_dtype(self, data1_dtype, data2_dtype):
        return ms.common.dtype.tensor_type(getattr(ms, "float32"))
--- a/example/resnet101_imagenet2012_THOR/cus_ops/cus_matmul_cube_dense_left.py
+++ b/example/resnet101_imagenet2012_THOR/cus_ops/cus_matmul_cube_dense_left.py
@ -12,27 +12,28 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-import numpy as np
-from mindspore.ops import prim_attr_register, PrimitiveWithInfer
-from mindspore import Tensor
 import mindspore as ms
+from mindspore.ops import prim_attr_register, PrimitiveWithInfer
 from mindspore.ops.composite import multitype_ops as C
+
+
 # y = x^2
 class CusMatMulCubeDenseLeft(PrimitiveWithInfer):
    """CusMatMulCube definition"""
+
    @prim_attr_register
    def __init__(self):
        """init CusMatMulCube"""
        self.init_prim_io_names(inputs=['x1', 'x2'], outputs=['y'])
-        from .matmul_cube_dense_left import CusMatMulCubeDenseLeft
- 
+
    def get_bprop(self):
        def bprop(x1, x2, out, dout):
-            return (C.zeros_like(x1),C.zeros_like(x2))
+            return (C.zeros_like(x1), C.zeros_like(x2))
+
        return bprop
- 
+
    def infer_shape(self, data1_shape, data2_shape):
        return data2_shape
- 
+
    def infer_dtype(self, data1_dtype, data2_dtype):
        return ms.common.dtype.tensor_type(getattr(ms, "float16"))
--- a/example/resnet101_imagenet2012_THOR/cus_ops/cus_matmul_cube_fracz_right_mul.py
+++ b/example/resnet101_imagenet2012_THOR/cus_ops/cus_matmul_cube_fracz_right_mul.py
@ -12,27 +12,27 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-import numpy as np
-from mindspore.ops import prim_attr_register, PrimitiveWithInfer
-from mindspore import Tensor
 import mindspore as ms
+from mindspore.ops import prim_attr_register, PrimitiveWithInfer
 from mindspore.ops.composite import multitype_ops as C
-# y = x^2
+
+
 class CusMatMulCubeFraczRightMul(PrimitiveWithInfer):
    """CusMatMulCubeFraczRightMul definition"""
+
    @prim_attr_register
    def __init__(self):
        """init CusMatMulCubeFraczRightMul"""
        self.init_prim_io_names(inputs=['x1', 'x2', 'x3'], outputs=['y'])
-        from .matmul_cube_fracz_right_mul_impl import CusMatMulCubeFraczRightMul

    def get_bprop(self):
        def bprop(x1, x2, x3, out, dout):
-            return (C.zeros_like(x1),C.zeros_like(x2),C.zeros_like(x3))
+            return (C.zeros_like(x1), C.zeros_like(x2), C.zeros_like(x3))
+
        return bprop
- 
+
    def infer_shape(self, data1_shape, data2_shape, data3_shape):
        return data1_shape
- 
+
    def infer_dtype(self, data1_dtype, data2_dtype, data3_dtype):
        return ms.common.dtype.tensor_type(getattr(ms, "float32"))
--- a/example/resnet101_imagenet2012_THOR/cus_ops/cus_matrix_combine.py
+++ b/example/resnet101_imagenet2012_THOR/cus_ops/cus_matrix_combine.py
@ -12,29 +12,29 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-import numpy as np
 from mindspore.ops import prim_attr_register, PrimitiveWithInfer
-from mindspore import Tensor
-import mindspore as ms
 from mindspore.ops.composite import multitype_ops as C
-# y = x^2
+
+
 class CusMatrixCombine(PrimitiveWithInfer):
    """CusMatMulCube definition"""
+
    @prim_attr_register
    def __init__(self):
        """init CusMatMulCube"""
        self.init_prim_io_names(inputs=['x'], outputs=['y'])
-        from .matrix_combine_impl import CusMatrixCombine
+
    def get_bprop(self):
        def bprop(x, out, dout):
            return (C.zeros_like(x),)
+
        return bprop
- 
+
    def infer_shape(self, data_shape):
        a, b, c = data_shape
-        shape = [a*b, a*c]
- 
+        shape = [a * b, a * c]
+
        return shape
- 
+
    def infer_dtype(self, data_dtype):
        return data_dtype
--- a/example/resnet101_imagenet2012_THOR/cus_ops/cus_transpose02314.py
+++ b/example/resnet101_imagenet2012_THOR/cus_ops/cus_transpose02314.py
@ -12,35 +12,33 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
- 
- 
-import numpy as np
+
+
 from mindspore.ops import prim_attr_register, PrimitiveWithInfer
-from mindspore import Tensor
 from mindspore.ops.composite import multitype_ops as C
- 
+
+
 class CusTranspose02314(PrimitiveWithInfer):
    """CusTranspose02314 definition"""
+
    @prim_attr_register
    def __init__(self):
        """init CusTranspose02314"""
        self.init_prim_io_names(inputs=['x1'], outputs=['y'])
-        from .transpose02314_impl import CusTranspose02314
- 
+
    def get_bprop(self):
        def bprop(x, out, dout):
            return (C.zeros_like(x),)
+
        return bprop
- 
+
    def infer_shape(self, data1_shape):
        assert len(data1_shape) == 4
        n, c, h, w = data1_shape
        c0 = 16
        c1 = c // 16
        shape = (n * h * w, c1 * c0)
-        # axis_0, axis_1, axis_2, axis_3, axis_4 = data1_shape
-        # shape = (axis_0, axis_2, axis_3, axis_1, axis_4)
        return shape
- 
+
    def infer_dtype(self, data1_dtype):
        return data1_dtype
--- a/example/resnet101_imagenet2012_THOR/cus_ops/fused_abs_max1.py
+++ b/example/resnet101_imagenet2012_THOR/cus_ops/fused_abs_max1.py
@ -13,9 +13,9 @@
 # limitations under the License.
 # ============================================================================

-from te import tik
-from topi.cce import util
 from mindspore.ops.op_info_register import op_info_register
+
+
@op_info_register("""{
    "op_name": "CusFusedAbsMax1",
    "imply_type": "TBE",
@ -64,5 +64,5 @@ from mindspore.ops.op_info_register import op_info_register
        }
    ]
 }""")
-def CusFusedAbsMax1(input_x, output, origin_shape = None, kernel_name="fused_abs_max1"):
+def CusFusedAbsMax1(input_x, output, origin_shape=None, kernel_name="fused_abs_max1"):
    return
--- a/example/resnet101_imagenet2012_THOR/cus_ops/img2col_impl.py
+++ b/example/resnet101_imagenet2012_THOR/cus_ops/img2col_impl.py
@ -13,9 +13,9 @@
 # limitations under the License.
 # ============================================================================

-from te import tik
-from topi.cce import util
 from mindspore.ops.op_info_register import op_info_register
+
+
@op_info_register("""{
    "op_name": "CusImg2ColNC1HWC0",
    "imply_type": "TBE",
@ -82,6 +82,5 @@ from mindspore.ops.op_info_register import op_info_register
        }
    ]
 }""")
- 
 def CusImg2ColNC1HWC0(input_x, output, ksizes, strides, dilates, padding, kernel_name="img2col"):
    return
--- a/example/resnet101_imagenet2012_THOR/cus_ops/matmul_cube_dense_left.py
+++ b/example/resnet101_imagenet2012_THOR/cus_ops/matmul_cube_dense_left.py
@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding:utf-8 -*-
 """
-copyright 2019 Huawei Technologies Co., Ltd
+copyright 2020 Huawei Technologies Co., Ltd
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@ -18,22 +18,15 @@ limitations under the License.
 matmul
 """
 from __future__ import absolute_import
- 
-import te.lang.cce
-import te.platform.cce_params as cce
-from te.platform.fusion_manager import fusion_manager
-from te import tvm
-from topi import generic
-from topi.cce import util
- 
-from impl.matmul_vector import matmul_vector_cce
- 
-from te import tik
+
 from mindspore.ops.op_info_register import op_info_register
+from topi.cce import util
+
 # General limitation of the size for input shape: 2**31
 SHAPE_SIZE_LIMIT = 2147483648
 NoneType = type(None)
- 
+
+
@op_info_register("""{
    "op_name": "CusMatMulCubeDenseLeft",
    "imply_type": "TBE",
@ -102,8 +95,7 @@ NoneType = type(None)
        }
    ]
 }""")
- 
@util.check_input_type(dict, dict, (dict, NoneType), dict, bool, bool, str)
-def CusMatMulCubeDenseLeft(input_x1, input_x2, bias=None, output_y={}, trans_a=False, trans_b=False, kernel_name="matmulcube"):
+def CusMatMulCubeDenseLeft(input_x1, input_x2, bias=None, output_y={}, trans_a=False, trans_b=False,
+                           kernel_name="matmulcube"):
    return
-
--- a/example/resnet101_imagenet2012_THOR/cus_ops/matmul_cube_fracz_left_cast_impl.py
+++ b/example/resnet101_imagenet2012_THOR/cus_ops/matmul_cube_fracz_left_cast_impl.py
@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding:utf-8 -*-
 """
-copyright 2019 Huawei Technologies Co., Ltd
+copyright 2020 Huawei Technologies Co., Ltd
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@ -18,19 +18,15 @@ limitations under the License.
 matmul
 """
 from __future__ import absolute_import
- 
-import te.platform.cce_params as cce
-from te import tvm
-from topi.cce import util
- 
-from te import tik
+
 from mindspore.ops.op_info_register import op_info_register
- 
+from topi.cce import util
+
 # General limitation of the size for input shape: 2**31
 SHAPE_SIZE_LIMIT = 2147483648
 NoneType = type(None)
- 
- 
+
+
@op_info_register("""{
    "op_name": "CusMatMulCubeFraczLeftCast",
    "imply_type": "TBE",
@ -99,7 +95,6 @@ NoneType = type(None)
        }
    ]
 }""")
- 
 # pylint: disable=locally-disabled,too-many-arguments, too-many-locals, too-many-statements
@util.check_input_type(dict, dict, (dict, NoneType), dict, bool, bool, str)
 def CusMatMulCubeFraczLeftCast(input_x1, input_x2, bias=None, output_y={}, trans_a=False, trans_b=False,
--- a/example/resnet101_imagenet2012_THOR/cus_ops/matmul_cube_fracz_right_mul_impl.py
+++ b/example/resnet101_imagenet2012_THOR/cus_ops/matmul_cube_fracz_right_mul_impl.py
@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding:utf-8 -*-
 """
-copyright 2019 Huawei Technologies Co., Ltd
+copyright 2020 Huawei Technologies Co., Ltd
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@ -18,21 +18,14 @@ limitations under the License.
 matmul
 """
 from __future__ import absolute_import
- 
-import te.lang.cce
-import te.platform.cce_params as cce
-from te.platform.fusion_manager import fusion_manager
-from te import tvm
-from topi import generic
-from topi.cce import util
-from te import tik
-from impl.matmul_vector import matmul_vector_cce
+
 from mindspore.ops.op_info_register import op_info_register
+
 # General limitation of the size for input shape: 2**31
 SHAPE_SIZE_LIMIT = 2147483648
 NoneType = type(None)
- 
- 
+
+
@op_info_register("""{
    "op_name": "CusMatMulCubeFraczRightMul",
    "imply_type": "TBE",
@ -114,8 +107,6 @@ NoneType = type(None)
        }
    ]
 }""")
- 
-def CusMatMulCubeFraczRightMul(input_x1, input_x2, input_x3, bias=None, output_y={}, trans_a=False, trans_b=False, kernel_name="matmulcube"):
+def CusMatMulCubeFraczRightMul(input_x1, input_x2, input_x3, bias=None, output_y={}, trans_a=False, trans_b=False,
+                               kernel_name="matmulcube"):
    return
-
-
--- a/example/resnet101_imagenet2012_THOR/cus_ops/matmul_cube_impl.py
+++ b/example/resnet101_imagenet2012_THOR/cus_ops/matmul_cube_impl.py
@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding:utf-8 -*-
 """
-copyright 2019 Huawei Technologies Co., Ltd
+copyright 2020 Huawei Technologies Co., Ltd
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@ -18,20 +18,15 @@ limitations under the License.
 matmul
 """
 from __future__ import absolute_import
- 
-import te.lang.cce
-import te.platform.cce_params as cce
-from te import tvm
-from topi import generic
-from topi.cce import util
- 
-from impl.matmul_vector import matmul_vector_cce
+
 from mindspore.ops.op_info_register import op_info_register
- 
+from topi.cce import util
+
 # General limitation of the size for input shape: 2**31
 SHAPE_SIZE_LIMIT = 2147483648
 NoneType = type(None)
- 
+
+
@op_info_register("""{
    "op_name": "CusMatMulCube",
    "imply_type": "TBE",
@ -112,8 +107,7 @@ NoneType = type(None)
        }
    ]
 }""")
- 
 # pylint: disable=locally-disabled,too-many-arguments, too-many-locals, too-many-statements
@util.check_input_type(dict, dict, (dict, NoneType), dict, bool, bool, str)
 def CusMatMulCube(input_x1, input_x2, bias=None, output_y={}, trans_a=False, trans_b=False, kernel_name="matmulcube"):
-   return
+    return
--- a/example/resnet101_imagenet2012_THOR/cus_ops/matrix_combine_impl.py
+++ b/example/resnet101_imagenet2012_THOR/cus_ops/matrix_combine_impl.py
@ -13,9 +13,9 @@
 # limitations under the License.
 # ============================================================================

-from te import tik
-from topi.cce import util
 from mindspore.ops.op_info_register import op_info_register
+
+
@op_info_register("""{
    "op_name": "CusMatrixCombine",
    "imply_type": "TBE",
@ -58,7 +58,5 @@ from mindspore.ops.op_info_register import op_info_register
        }
    ]
 }""")
- 
- 
-def CusMatrixCombine(input_x, output,kernel_name="matrix_combine"):
+def CusMatrixCombine(input_x, output, kernel_name="matrix_combine"):
    return
--- a/example/resnet101_imagenet2012_THOR/cus_ops/transpose02314_impl.py
+++ b/example/resnet101_imagenet2012_THOR/cus_ops/transpose02314_impl.py
@ -13,9 +13,9 @@
 # limitations under the License.
 # ============================================================================

-from te import tik
-from topi.cce import util
 from mindspore.ops.op_info_register import op_info_register
+
+
@op_info_register("""{
    "op_name": "CusTranspose02314",
    "imply_type": "TBE",
@ -58,6 +58,5 @@ from mindspore.ops.op_info_register import op_info_register
        }
    ]
 }""")
- 
 def CusTranspose02314(input_x, output, kernel_name="transpose021354"):
    return
--- a/example/resnet101_imagenet2012_THOR/dataset_imagenet.py
+++ b/example/resnet101_imagenet2012_THOR/dataset_imagenet.py
@ -16,11 +16,12 @@
 create train or eval dataset.
 """
 import os
+
 import mindspore.common.dtype as mstype
 import mindspore.dataset.engine as de
-import mindspore.dataset.transforms.vision.c_transforms as V_C
 import mindspore.dataset.transforms.c_transforms as C2
-from config_imagenet import config
+import mindspore.dataset.transforms.vision.c_transforms as V_C
+

 def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
    """
@ -41,7 +42,7 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
        ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False)
    else:
        ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True,
-                                      num_shards=device_num, shard_id=rank_id)
+                                     num_shards=device_num, shard_id=rank_id)

    image_size = 224
    mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
@ -61,9 +62,9 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
            V_C.Normalize(mean=mean, std=std),
            V_C.HWC2CHW()
        ]
-    #type_cast_op = C2.TypeCast(mstype.float16)
+    # type_cast_op = C2.TypeCast(mstype.float16)
    type_cast_op = C2.TypeCast(mstype.int32)
- 
+
    ds = ds.map(input_columns="image", operations=transform_img, num_parallel_workers=8)
    ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8)

--- a/example/resnet101_imagenet2012_THOR/lr_generator.py
+++ b/example/resnet101_imagenet2012_THOR/lr_generator.py
@ -13,14 +13,17 @@
 # limitations under the License.
 # ============================================================================
 """learning rate generator"""
-import numpy as np
 import math

+import numpy as np
+
+
 def linear_warmup_lr(current_step, warmup_steps, base_lr, init_lr):
    lr_inc = (float(base_lr) - float(init_lr)) / float(warmup_steps)
    lr = float(init_lr) + lr_inc * current_step
    return lr

+
 def cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch, T_max, eta_min=0, num_periods=0.5):
    base_lr = lr
    warmup_init_lr = 0
@ -39,6 +42,7 @@ def cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch, T_max, et
        lr_each_step.append(lr)
    return np.array(lr_each_step).astype(np.float32)

+
 def warmup_cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch, T_max, eta_min=0, num_periods=0.5):
    base_lr = lr
    warmup_init_lr = 0
@ -57,6 +61,7 @@ def warmup_cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch, T_
        lr_each_step.append(lr)
    return np.array(lr_each_step).astype(np.float32)

+
 def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch, lr_decay_mode):
    """
    generate learning rate array
--- a/example/resnet101_imagenet2012_THOR/model/dataset_helper.py
+++ b/example/resnet101_imagenet2012_THOR/model/dataset_helper.py
@ -13,15 +13,15 @@
 # limitations under the License.
 # ============================================================================
 """Dataset help for minddata dataset"""
-from mindspore._checkparam import check_bool
 from mindspore import context
-from mindspore.train.parallel_utils import ParallelMode
-from mindspore.train._utils import _exec_datagraph, _get_types_and_shapes, _to_tensor, \
-    _construct_tensor_list, _to_full_shapes, _to_full_tensor
+from mindspore._checkparam import check_bool
 from mindspore.nn.wrap import GetNextSingleOp
 from mindspore.parallel._utils import _get_device_num, _get_global_rank, _get_parallel_mode
- 
- 
+from mindspore.train._utils import _exec_datagraph, _get_types_and_shapes, _to_tensor, \
+    _construct_tensor_list, _to_full_shapes, _to_full_tensor
+from mindspore.train.parallel_utils import ParallelMode
+
+
 class DatasetHelper:
    """
    Help function to use the Minddata dataset.
@ -41,9 +41,10 @@ class DatasetHelper:
        >>> for inputs in dataset_helper:
        >>>     outputs = network(*inputs)
    """
+
    def __init__(self, dataset, first_order_iter=0, dataset_sink_mode=True):
        check_bool(dataset_sink_mode)
- 
+
        iterclass = _DatasetIterGE
        if not dataset_sink_mode:
            iterclass = _DatasetIterFeed
@ -52,24 +53,25 @@ class DatasetHelper:
                iterclass = _DatasetIterMSLoopSink
            else:
                iterclass = _DatasetIterMS
- 
+
        self.iter = iterclass(dataset, first_order_iter)
- 
+
    def __iter__(self):
        return self.iter.__iter__()
- 
+
    # A temp solution for loop sink. Delete later
    def types_shapes(self):
        """Get the types and shapes from dataset on current config."""
        return self.iter.types_shapes()
- 
+
    def loop_size(self):
        """Get loop_size for every iteration."""
        return self.iter.loop_size
- 
- 
+
+
 class _DatasetIter:
    """Base iter for dataset help"""
+
    def __init__(self, dataset):
        self.loop_size = 1
        if not hasattr(dataset, '__ME_INITED__'):
@ -78,7 +80,7 @@ class _DatasetIter:
            else:
                self.loop_size = dataset.__loop_size__
            dataset.__ME_INITED__ = _exec_datagraph(dataset, self.loop_size).queue_name
- 
+
        self.ind = 0
        self.dataset = dataset
        dataset_types, dataset_shapes = _get_types_and_shapes(dataset)
@ -89,53 +91,57 @@ class _DatasetIter:
        if _get_parallel_mode() in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL):
            device_num = _get_device_num()
            self.dataset_shapes = _to_full_shapes(dataset_shapes, device_num)
- 
+
    def __iter__(self):
        self.ind = 0
        return self
- 
+
    def __next__(self):
        if self.ind >= self.loop_count:
            raise StopIteration()
        self.ind += 1
        return self.op()
- 
+
    def types_shapes(self):
        return self.dataset_types, self.dataset_shapes
- 
+
    def get_loop_count(self, dataset):
        loop_count = 1
        if hasattr(dataset, '__loop_size__'):
            loop_size = dataset.__loop_size__
-            loop_count = int(dataset.get_dataset_size()/loop_size)
+            loop_count = int(dataset.get_dataset_size() / loop_size)
        return loop_count
- 
- 
+
+
 class _DatasetIterMSLoopSink(_DatasetIter):
    """Iter for context (enable_loop_sink=True)"""
+
    def __init__(self, dataset, first_order_iter):
        super(_DatasetIterMSLoopSink, self).__init__(dataset)
        # self.loop_count = self.get_loop_count(dataset)
        loop_size = dataset.__loop_size__ + first_order_iter
-        self.loop_count = int(dataset.get_dataset_size()/loop_size) * 2
- 
+        self.loop_count = int(dataset.get_dataset_size() / loop_size) * 2
+
        def op():
            return tuple()
+
        self.op = op
- 
- 
+
+
 class _DatasetIterMS(_DatasetIter):
    """Iter for context (enable_loop_sink=False)"""
+
    def __init__(self, dataset, first_order_order):
        super(_DatasetIterMS, self).__init__(dataset)
        self.loop_count = dataset.get_dataset_size()
        self.loop_size = 1
        queue_name = dataset.__ME_INITED__
        self.op = GetNextSingleOp(self.dataset_types, self.dataset_shapes, queue_name)
- 
+

 class _DatasetIterGE(_DatasetIter):
    """Iter for ge"""
+
    def __init__(self, dataset):
        super(_DatasetIterGE, self).__init__(dataset)
        self.loop_count = self.get_loop_count(dataset)
@ -145,14 +151,16 @@ class _DatasetIterGE(_DatasetIter):
        if self.need_to_full:
            batch_expand_num = _get_device_num()
        tensor_list_run = _construct_tensor_list(self.dataset_types, self.dataset_shapes, batch_expand_num)
- 
+
        def op():
            return tensor_list_run
+
        self.op = op
- 
- 
+
+
 class _DatasetIterFeed:
    """Iter for feed data"""
+
    def __init__(self, dataset, first_order_order):
        self.dataset = dataset
        self.device_num = _get_device_num()
@ -161,18 +169,18 @@ class _DatasetIterFeed:
        self.repeat_ind = 0
        self.loop_count = dataset.get_dataset_size()
        self.ind = 0
- 
+
        parallel_mode = context.get_auto_parallel_context("parallel_mode")
        self.need_to_full = parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL)
- 
+
    def __iter__(self):
        if self.repeat_ind % self.repeat_count == 0:
            self.iter = self.dataset.__iter__()
- 
+
        self.repeat_ind += 1
        self.ind = 0
        return self
- 
+
    def __next__(self):
        if self.ind >= self.loop_count:
            raise StopIteration()
--- a/example/resnet101_imagenet2012_THOR/model/grad_reducer_thor.py
+++ b/example/resnet101_imagenet2012_THOR/model/grad_reducer_thor.py
@ -12,28 +12,30 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-from mindspore.nn.cell import Cell
+import mindspore.common.dtype as mstype
 from mindspore.communication.management import GlobalComm, get_group_size
+from mindspore.nn.cell import Cell
 from mindspore.ops import functional as F, composite as C, operations as P
 from mindspore.ops.operations.comm_ops import AllReduce, ReduceOp
-import mindspore.common.dtype as mstype
-from mindspore.communication import create_group

 reduce_opt = C.MultitypeFuncGraph("reduce_opt")

 _all_reduce_A = AllReduce()

+
 def _init_optimizer_allreduce(group):
    global _all_reduce_A
    _all_reduce_A = AllReduce(ReduceOp.SUM, GlobalComm.WORLD_COMM_GROUP)
    _all_reduce_A.add_prim_attr('fusion', group)

+
@reduce_opt.register("Function", "Number", "Tensor")
 def _tensors_allreduce_mean(mul, degree, grad):
    degree = F.scalar_cast(degree, F.dtype(grad))
    grad = _all_reduce_A(grad)
    cast_op = P.Cast()
-    return mul(grad, cast_op(F.scalar_to_array(1.0/degree), F.dtype(grad)))
+    return mul(grad, cast_op(F.scalar_to_array(1.0 / degree), F.dtype(grad)))
+

@reduce_opt.register("Bool", "Tensor")
 def _tensors_allreduce(allreduce_filter, grad):
@ -41,8 +43,10 @@ def _tensors_allreduce(allreduce_filter, grad):
        return _all_reduce_A(grad)
    return grad

+
 _get_datatype = C.MultitypeFuncGraph("_get_datatype")

+
@_get_datatype.register("Tensor")
 def _tensors_get_datatype(grad):
    """
--- a/example/resnet101_imagenet2012_THOR/model/model_thor.py
+++ b/example/resnet101_imagenet2012_THOR/model/model_thor.py
@ -13,29 +13,26 @@
 # limitations under the License.
 # ============================================================================
 """Model."""
-import numpy as np
 import mindspore.nn as nn
-from mindspore import log as logger
-from mindspore.common.tensor import Tensor
-from mindspore.nn.metrics import get_metrics
-from mindspore._checkparam import check_input_data, check_output_data, check_int_positive, check_bool
-from mindspore.train.callback import _InternalCallbackParam, RunContext, _build_callbacks
+import numpy as np
 from mindspore import context
+from mindspore import log as logger
+from mindspore._c_expression import init_exec_dataset
+from mindspore._checkparam import check_input_data, check_output_data, check_int_positive, check_bool
+from mindspore.common import dtype as mstype
+from mindspore.common.dtype import pytype_to_dtype
+from mindspore.common.tensor import Tensor
+from mindspore.nn.metrics import Loss
+from mindspore.nn.metrics import get_metrics
+from mindspore.nn.wrap.cell_wrapper import _VirtualDatasetCell
 from mindspore.parallel._utils import _get_parallel_mode, _get_device_num, _get_global_rank, \
    _get_parameter_broadcast, _device_number_check, _parameter_broadcast_check
-from mindspore.nn.metrics import Loss
-from mindspore.nn.wrap import WithLossCell, WithEvalCell, \
-    DataWrapper
-from mindspore.nn.wrap.cell_wrapper import _VirtualDatasetCell
-from mindspore.train.parallel_utils import ParallelMode
-from mindspore.common import dtype as mstype
-from second_order.dataset_helper import DatasetHelper
 from mindspore.train import amp
- 
-from mindspore.common.dtype import pytype_to_dtype
-from mindspore._c_expression import init_exec_dataset
-from mindspore.common.parameter import Parameter
- 
+from mindspore.train.callback import _InternalCallbackParam, RunContext, _build_callbacks
+from mindspore.train.parallel_utils import ParallelMode
+from second_order.dataset_helper import DatasetHelper
+
+
 def _convert_type(types):
    """
    Convert from numpy type to tensor type.
@ -51,18 +48,20 @@ def _convert_type(types):
        ms_type = pytype_to_dtype(np_type)
        ms_types.append(ms_type)
    return ms_types
- 
+
+
 def _get_types_and_shapes(dataset):
    """Get dataset types and shapes."""
    dataset_types = _convert_type(dataset.output_types())
    dataset_shapes = dataset.output_shapes()
    return dataset_types, dataset_shapes
- 
+
+
 def _exec_datagraph(exec_dataset, dataset_size, phase='dataset'):
    """Initialize and execute the dataset graph."""
    batch_size = exec_dataset.get_batch_size()
    input_indexs = exec_dataset.input_indexs
- 
+
    # transform data format
    dataset_types, dataset_shapes = _get_types_and_shapes(exec_dataset)
    init_exec_dataset(exec_dataset.__ME_INITED__,
@ -72,8 +71,8 @@ def _exec_datagraph(exec_dataset, dataset_size, phase='dataset'):
                      dataset_shapes,
                      input_indexs,
                      phase=phase)
- 
- 
+
+
 class Model:
    """
    High-Level API for Training or Testing.
@ -131,7 +130,7 @@ class Model:
        >>> dataset = get_dataset()
        >>> model.train(2, dataset)
    """
- 
+
    def __init__(self, network, loss_fn=None, optimizer=None, metrics=None, eval_network=None,
                 eval_indexes=None, amp_level="O0", frequency=278, **kwargs):
        self._network = network
@ -152,49 +151,49 @@ class Model:
        self._device_number = _get_device_num()
        self._global_rank = _get_global_rank()
        self._parameter_broadcast = _get_parameter_broadcast()
- 
+
        self._train_network = self._build_train_network()
        self._build_eval_network(metrics, eval_network, eval_indexes)
        self._build_predict_network()
- 
+
    def _check_kwargs(self, kwargs):
        for arg in kwargs:
            if arg not in ['loss_scale_manager', 'keep_batchnorm_fp32']:
-                raise  ValueError(f"Unsupport arg '{arg}'")
- 
+                raise ValueError(f"Unsupport arg '{arg}'")
+
    def _build_train_network(self):
        """Build train network"""
        network = self._network
        if self._optimizer:
            if self._loss_scale_manager_set:
                network = amp.build_train_network(network,
-                                                self._optimizer,
-                                                self._loss_fn,
-                                                level=self._amp_level,
-                                                loss_scale_manager=self._loss_scale_manager,
-                                                keep_batchnorm_fp32=self._keep_bn_fp32)
+                                                  self._optimizer,
+                                                  self._loss_fn,
+                                                  level=self._amp_level,
+                                                  loss_scale_manager=self._loss_scale_manager,
+                                                  keep_batchnorm_fp32=self._keep_bn_fp32)
            else:
                network = amp.build_train_network(network,
-                                                self._optimizer,
-                                                self._loss_fn,
-                                                level=self._amp_level,
-                                                keep_batchnorm_fp32=self._keep_bn_fp32)
+                                                  self._optimizer,
+                                                  self._loss_fn,
+                                                  level=self._amp_level,
+                                                  keep_batchnorm_fp32=self._keep_bn_fp32)
        elif self._loss_fn:
            network = nn.WithLossCell(network, self._loss_fn)
        # If need to check if loss_fn is not None, but optimizer is None
        return network
- 
+
    def _build_eval_network(self, metrics, eval_network, eval_indexes):
        """Build the network for evaluation."""
        self._metric_fns = get_metrics(metrics)
        if not self._metric_fns:
            return
- 
+
        if eval_network is not None:
            if eval_indexes is not None and not (isinstance(eval_indexes, list) and len(eval_indexes) == 3):
                raise ValueError("Eval_indexes must be a list or None. If eval_indexes is a list, length of it \
                                 must be three. But got {}".format(eval_indexes))
- 
+
            self._eval_network = eval_network
            self._eval_indexes = eval_indexes
        else:
@ -202,27 +201,27 @@ class Model:
                raise ValueError("loss_fn can not be None.")
            self._eval_network = nn.WithEvalCell(self._network, self._loss_fn)
            self._eval_indexes = [0, 1, 2]
- 
+
    def _build_predict_network(self):
        """Build the network for prediction."""
        self._predict_network = self._network
        if self._parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL):
            self._predict_network = _VirtualDatasetCell(self._network)
- 
+
    def _clear_metrics(self):
        """Clear metrics local values."""
        for metric in self._metric_fns.values():
            metric.clear()
- 
+
    def _update_metrics(self, outputs):
        """Update metrics local values."""
        if not isinstance(outputs, tuple):
            raise ValueError("The `outputs` is not tuple.")
- 
+
        if self._eval_indexes is not None and len(outputs) < 3:
            raise ValueError("The length of `outputs` must be greater than or equal to 3, \
                             but got {}".format(len(outputs)))
- 
+
        for metric in self._metric_fns.values():
            if self._eval_indexes is None:
                metric.update(*outputs)
@ -231,14 +230,14 @@ class Model:
                    metric.update(outputs[self._eval_indexes[0]])
                else:
                    metric.update(outputs[self._eval_indexes[1]], outputs[self._eval_indexes[2]])
- 
+
    def _get_metrics(self):
        """Get metrics local values."""
        metrics = dict()
        for key, value in self._metric_fns.items():
            metrics[key] = value.eval()
        return metrics
- 
+
    def _get_scaling_sens(self):
        """get the scaling sens"""
        scaling_sens = 1
@ -247,7 +246,7 @@ class Model:
        if self._parallel_mode == ParallelMode.DATA_PARALLEL:
            scaling_sens /= self._device_number
        return scaling_sens
- 
+
    def _train(self, epoch, train_dataset, callbacks=None, dataset_sink_mode=True):
        """
        Training.
@ -266,10 +265,10 @@ class Model:
        """
        epoch = check_int_positive(epoch)
        self._train_network.set_train()
- 
+
        if self._parameter_broadcast:
            self._train_network.set_broadcast_flag()
- 
+
        # build callback list
        list_callback = _build_callbacks(callbacks)
        cb_params = _InternalCallbackParam()
@ -283,7 +282,7 @@ class Model:
        cb_params.device_number = self._device_number
        cb_params.train_dataset = train_dataset
        cb_params.list_callback = list_callback
- 
+
        if dataset_sink_mode:
            if context.get_context("mode") == context.PYNATIVE_MODE:
                logger.warning("The pynative mode cannot support dataset sink mode currently."
@ -293,7 +292,6 @@ class Model:
                self._train_dataset_sink_process(epoch, train_dataset, list_callback, cb_params)
        else:
            self._train_process(epoch, train_dataset, list_callback, cb_params)
- 

    def _train_dataset_sink_process(self, epoch, train_dataset, list_callback=None, cb_params=None):
        """
@ -317,7 +315,7 @@ class Model:
        if not hasattr(train_dataset, '__ME_INITED__') and context.get_context("enable_loop_sink") \
                and not context.get_context("enable_ge"):
            need_wrap = True
- 
+
        dataset_helper = DatasetHelper(train_dataset, iter_first_order)
        # remove later to deal with loop sink
        if need_wrap:
@ -330,7 +328,7 @@ class Model:
        loop_size = dataset_helper.loop_size()
        run_context = RunContext(cb_params)
        list_callback.begin(run_context)
- 
+
        # used to stop training for early stop, such as stopAtTIme or stopATStep
        should_stop = False
        has_do_train1_dataset = False
@ -338,7 +336,7 @@ class Model:
        for i in range(epoch):
            cb_params.cur_epoch_num = i + 1
            list_callback.epoch_begin(run_context)
- 
+
            # for data sink dataset_helper only iter once, other wise iter epoch_size times.
            for inputs in dataset_helper:
                list_callback.step_begin(run_context)
@ -357,14 +355,14 @@ class Model:
                outputs = self._train_network(*inputs)
                cb_params.net_outputs = outputs
                list_callback.step_end(run_context)
- 
+
            list_callback.epoch_end(run_context)
            should_stop = should_stop or run_context.get_stop_requested()
            if should_stop:
                break
- 
+
        list_callback.end(run_context)
- 
+
    def _train_process(self, epoch, train_dataset, list_callback=None, cb_params=None):
        """
        Training process. The data would be passed to network directly.
@ -385,12 +383,12 @@ class Model:
        _callback_wrapper(list_callback, run_context, "begin")
        # used to stop training for early stop, such as stopAtTIme or stopATStep
        should_stop = False
- 
+
        for i in range(epoch):
            cb_params.cur_epoch_num = i + 1
- 
+
            _callback_wrapper(list_callback, run_context, "epoch_begin")
- 
+
            for next_element in dataset_helper:
                len_element = len(next_element)
                if self._loss_fn and len_element != 2:
@ -398,33 +396,33 @@ class Model:
                                     "return two elements, but got {}".format(len_element))
                cb_params.cur_step_num += 1
                _callback_wrapper(list_callback, run_context, "step_begin")
- 
+
                overflow = False
                if self._loss_scale_manager and self._loss_scale_manager.get_drop_overflow_update():
                    scaling_sens = self._get_scaling_sens()
                    next_element = tuple(next_element) + (Tensor(scaling_sens, mstype.float32),)
- 
+
                outputs = self._train_network(*next_element)
                cb_params.net_outputs = outputs
                if self._loss_scale_manager and self._loss_scale_manager.get_drop_overflow_update():
                    _, overflow, _ = outputs
                    overflow = np.all(overflow.asnumpy())
                    self._loss_scale_manager.update_loss_scale(overflow)
- 
+
                _callback_wrapper(list_callback, run_context, "step_end")
                should_stop = should_stop or run_context.get_stop_requested()
                if should_stop:
                    break
- 
+
            train_dataset.reset()
- 
+
            _callback_wrapper(list_callback, run_context, "epoch_end")
            should_stop = should_stop or run_context.get_stop_requested()
            if should_stop:
                break
- 
+
        _callback_wrapper(list_callback, run_context, "end")
- 
+
    def train(self, epoch, train_dataset, callbacks=None, dataset_sink_mode=True):
        """
        Training API where the iteration is controlled by python front-end.
@ -470,12 +468,12 @@ class Model:

        if context.get_context("device_target") in ["CPU", "GPU"] and context.get_context("enable_loop_sink"):
            raise ValueError("CPU and GPU can't support loop sink, please set enable_loop_sink=False.")
- 
+
        self._train(epoch,
                    train_dataset,
                    callbacks=callbacks,
                    dataset_sink_mode=dataset_sink_mode)
- 
+
    def _eval_dataset_sink_process(self, valid_dataset, list_callback=None, cb_params=None):
        """
        Evaluation. The data would be passed to network through dataset channel.
@ -489,42 +487,42 @@ class Model:
            Dict, returns the loss value & metrics values for the model in test mode.
        """
        _device_number_check(self._parallel_mode, self._device_number)
- 
+
        run_context = RunContext(cb_params)
- 
+
        # remove later to deal with loop sink
        need_wrap = False
        if not hasattr(valid_dataset, '__ME_INITED__') and context.get_context("enable_loop_sink") \
-               and not context.get_context("enable_ge"):
+                and not context.get_context("enable_ge"):
            need_wrap = True
- 
+
        valid_dataset.__loop_size__ = 1
        dataset_helper = DatasetHelper(valid_dataset)
- 
+
        # remove later to deal with loop sink
        if need_wrap:
            self._eval_network = nn.DataWrapper(self._eval_network, *(dataset_helper.types_shapes()),
-                                             valid_dataset.__ME_INITED__)
+                                                valid_dataset.__ME_INITED__)
            self._eval_network.set_train(mode=False)
            self._eval_network.phase = 'eval'
        list_callback.begin(run_context)
- 
+
        for inputs in dataset_helper:
            cb_params.cur_step_num += 1
            list_callback.step_begin(run_context)
- 
+
            outputs = self._eval_network(*inputs)
- 
+
            cb_params.net_outputs = outputs
            list_callback.step_end(run_context)
            self._update_metrics(outputs)
- 
+
        metrics = self._get_metrics()
        cb_params.metrics = metrics
        list_callback.end(run_context)
- 
+
        return metrics
- 
+
    def _eval_process(self, valid_dataset, list_callback=None, cb_params=None):
        """
        Evaluation. The data would be passed to network directly.
@ -539,7 +537,7 @@ class Model:
        """
        run_context = RunContext(cb_params)
        list_callback.begin(run_context)
- 
+
        dataset_helper = DatasetHelper(valid_dataset, dataset_sink_mode=False)
        for next_element in dataset_helper:
            cb_params.cur_step_num += 1
@ -548,12 +546,12 @@ class Model:
            cb_params.net_outputs = outputs
            list_callback.step_end(run_context)
            self._update_metrics(outputs)
- 
+
        metrics = self._get_metrics()
        cb_params.metrics = metrics
        list_callback.end(run_context)
        return metrics
- 
+
    def eval(self, valid_dataset, callbacks=None, dataset_sink_mode=True):
        """
        Evaluation API where the iteration is controlled by python front-end.
@ -584,7 +582,7 @@ class Model:
        check_bool(dataset_sink_mode)
        if not self._metric_fns:
            raise ValueError("metric fn can not be None or empty.")
- 
+
        list_callback = _build_callbacks(callbacks)
        cb_params = _InternalCallbackParam()
        cb_params.eval_network = self._eval_network
@ -592,16 +590,16 @@ class Model:
        cb_params.batch_num = valid_dataset.get_dataset_size()
        cb_params.mode = "eval"
        cb_params.cur_step_num = 0
- 
+
        self._eval_network.set_train(mode=False)
        self._eval_network.phase = 'eval'
- 
+
        self._clear_metrics()
- 
+
        if dataset_sink_mode:
            return self._eval_dataset_sink_process(valid_dataset, list_callback, cb_params)
        return self._eval_process(valid_dataset, list_callback, cb_params)
- 
+
    def predict(self, *predict_data):
        """
        Generates output predictions for the input samples.
@ -625,9 +623,9 @@ class Model:
        self._predict_network.set_train(False)
        check_input_data(*predict_data, data_class=Tensor)
        result = self._predict_network(*predict_data)
- 
+
        check_output_data(result)
        return result
- 
- 
+
+
 __all__ = ["Model"]
--- a/example/resnet101_imagenet2012_THOR/model/resnet.py
+++ b/example/resnet101_imagenet2012_THOR/model/resnet.py
@ -13,13 +13,15 @@
 # limitations under the License.
 # ============================================================================
 """ResNet."""
-import numpy as np
-import mindspore.nn as nn
-from mindspore.ops import operations as P
-from mindspore.common.tensor import Tensor
-from second_order.thor_layer import Conv2d_Thor, Dense_Thor
 import math

+import mindspore.nn as nn
+import numpy as np
+from mindspore.common.tensor import Tensor
+from mindspore.ops import operations as P
+from second_order.thor_layer import Conv2d_Thor, Dense_Thor
+
+
 def calculate_gain(nonlinearity, param=None):
    linear_fns = ['linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d', 'conv_transpose2d', 'conv_transpose3d']
    if nonlinearity in linear_fns or nonlinearity == 'sigmoid':
@ -39,12 +41,13 @@ def calculate_gain(nonlinearity, param=None):
        return math.sqrt(2.0 / (1 + negative_slope ** 2))
    else:
        raise ValueError("Unsupported nonlinearity {}".format(nonlinearity))
- 
+
+
 def _calculate_fan_in_and_fan_out(tensor):
    dimensions = len(tensor)
    if dimensions < 2:
        raise ValueError("Fan in and fan out can not be computed for tensor with fewer than 2 dimensions")
- 
+
    if dimensions == 2:  # Linear
        fan_in = tensor[1]
        fan_out = tensor[0]
@ -57,22 +60,25 @@ def _calculate_fan_in_and_fan_out(tensor):
        fan_in = num_input_fmaps * receptive_field_size
        fan_out = num_output_fmaps * receptive_field_size
    return fan_in, fan_out
- 
+
+
 def _calculate_correct_fan(tensor, mode):
    mode = mode.lower()
    valid_modes = ['fan_in', 'fan_out']
    if mode not in valid_modes:
        raise ValueError("Mode {} not supported, please use one of {}".format(mode, valid_modes))
- 
+
    fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
    return fan_in if mode == 'fan_in' else fan_out

+
 def kaiming_normal(inputs_shape, a=0, mode='fan_in', nonlinearity='leaky_relu'):
    fan = _calculate_correct_fan(inputs_shape, mode)
    gain = calculate_gain(nonlinearity, a)
    std = gain / math.sqrt(fan)
    return np.random.normal(0, std, size=inputs_shape).astype(np.float32)
- 
+
+
 def kaiming_uniform(inputs_shape, a=0, mode='fan_in', nonlinearity='leaky_relu'):
    fan = _calculate_correct_fan(inputs_shape, mode)
    gain = calculate_gain(nonlinearity, a)
@ -80,6 +86,7 @@ def kaiming_uniform(inputs_shape, a=0, mode='fan_in', nonlinearity='leaky_relu')
    bound = math.sqrt(3.0) * std  # Calculate uniform bounds from standard deviation
    return np.random.uniform(-bound, bound, size=inputs_shape).astype(np.float32)

+
 def _conv3x3(in_channel, out_channel, stride=1, damping=0.03, loss_scale=1, frequency=278):
    weight_shape = (out_channel, in_channel, 3, 3)
    weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu'))
@ -88,35 +95,41 @@ def _conv3x3(in_channel, out_channel, stride=1, damping=0.03, loss_scale=1, freq
                       damping=damping, loss_scale=loss_scale, frequency=frequency)
    # return nn.Conv2d(in_channel, out_channel,
    #                  kernel_size=3, stride=stride, padding=0, pad_mode='same', weight_init=weight)
- 
+
+
 def _conv1x1(in_channel, out_channel, stride=1, damping=0.03, loss_scale=1, frequency=278):
    weight_shape = (out_channel, in_channel, 1, 1)
    weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu'))
    return Conv2d_Thor(in_channel, out_channel,
-                     kernel_size=1, stride=stride, padding=0, pad_mode='same', weight_init=weight,
-                     damping=damping, loss_scale=loss_scale, frequency=frequency)
- 
+                       kernel_size=1, stride=stride, padding=0, pad_mode='same', weight_init=weight,
+                       damping=damping, loss_scale=loss_scale, frequency=frequency)
+
+
 def _conv7x7(in_channel, out_channel, stride=1, damping=0.03, loss_scale=1, frequency=278):
    weight_shape = (out_channel, in_channel, 7, 7)
    weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu'))
    return Conv2d_Thor(in_channel, out_channel,
-                     kernel_size=7, stride=stride, padding=0, pad_mode='same', weight_init=weight,
-                     damping=damping, loss_scale=loss_scale, frequency=frequency)
- 
+                       kernel_size=7, stride=stride, padding=0, pad_mode='same', weight_init=weight,
+                       damping=damping, loss_scale=loss_scale, frequency=frequency)
+
+
 def _bn(channel):
    return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9,
                          gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1)
- 
+
+
 def _bn_last(channel):
    return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9,
                          gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1)

+
 def _fc(in_channel, out_channel, damping, loss_scale, frequency):
    weight_shape = (out_channel, in_channel)
-    weight = Tensor(kaiming_uniform(weight_shape, a = math.sqrt(5))
+    weight = Tensor(kaiming_uniform(weight_shape, a=math.sqrt(5))
    return Dense_Thor(in_channel, out_channel, has_bias=False, weight_init=weight, bias_init=0,
                      damping=damping, loss_scale=loss_scale, frequency=frequency)
- 
+
+
 class ResidualBlock(nn.Cell):
    """
    ResNet V1 residual block definition.
@ -133,7 +146,7 @@ class ResidualBlock(nn.Cell):
        >>> ResidualBlock(3, 256, stride=2)
    """
    expansion = 4
- 
+
    def __init__(self,
                 in_channel,
                 out_channel,
@ -142,54 +155,58 @@ class ResidualBlock(nn.Cell):
                 loss_scale=1,
                 frequency=278):
        super(ResidualBlock, self).__init__()
- 
+
        channel = out_channel // self.expansion
-        self.conv1 = _conv1x1(in_channel, channel, stride=1, damping=damping, loss_scale=loss_scale, frequency=frequency)
+        self.conv1 = _conv1x1(in_channel, channel, stride=1, damping=damping, loss_scale=loss_scale,
+                              frequency=frequency)
        self.bn1 = _bn(channel)
- 
-        self.conv2 = _conv3x3(channel, channel, stride=stride, damping=damping, loss_scale=loss_scale, frequency=frequency)
+
+        self.conv2 = _conv3x3(channel, channel, stride=stride, damping=damping, loss_scale=loss_scale,
+                              frequency=frequency)
        self.bn2 = _bn(channel)
- 
-        self.conv3 = _conv1x1(channel, out_channel, stride=1, damping=damping, loss_scale=loss_scale, frequency=frequency)
+
+        self.conv3 = _conv1x1(channel, out_channel, stride=1, damping=damping, loss_scale=loss_scale,
+                              frequency=frequency)
        self.bn3 = _bn_last(out_channel)
- 
+
        self.relu = nn.ReLU()
- 
+
        self.down_sample = False
- 
+
        if stride != 1 or in_channel != out_channel:
            self.down_sample = True
        self.down_sample_layer = None
- 
+
        if self.down_sample:
            self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride,
-                                                                 damping=damping, loss_scale=loss_scale, frequency=frequency),
+                                                                 damping=damping, loss_scale=loss_scale,
+                                                                 frequency=frequency),
                                                        _bn(out_channel)])
        self.add = P.TensorAdd()
- 
+
    def construct(self, x):
        identity = x
- 
+
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
- 
+
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
- 
+
        out = self.conv3(out)
        out = self.bn3(out)
- 
+
        if self.down_sample:
            identity = self.down_sample_layer(identity)
- 
+
        out = self.add(out, identity)
        out = self.relu(out)
- 
+
        return out
- 
- 
+
+
 class ResNet(nn.Cell):
    """
    ResNet architecture.
@ -212,7 +229,7 @@ class ResNet(nn.Cell):
        >>>        [1, 2, 2, 2],
        >>>        10)
    """
- 
+
    def __init__(self,
                 block,
                 layer_nums,
@ -224,15 +241,15 @@ class ResNet(nn.Cell):
                 loss_scale,
                 frequency):
        super(ResNet, self).__init__()
- 
+
        if not len(layer_nums) == len(in_channels) == len(out_channels) == 4:
            raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!")
- 
+
        self.conv1 = _conv7x7(3, 64, stride=2, damping=damping, loss_scale=loss_scale, frequency=frequency)
        self.bn1 = _bn(64)
        self.relu = P.ReLU()
        self.maxpool = P.MaxPoolWithArgmax(padding="same", ksize=3, strides=2)
- 
+
        self.layer1 = self._make_layer(block,
                                       layer_nums[0],
                                       in_channel=in_channels[0],
@ -253,7 +270,7 @@ class ResNet(nn.Cell):
                                       layer_nums[2],
                                       in_channel=in_channels[2],
                                       out_channel=out_channels[2],
-                                       stride=strides[2],damping=damping,
+                                       stride=strides[2], damping=damping,
                                       loss_scale=loss_scale,
                                       frequency=frequency)
        self.layer4 = self._make_layer(block,
@ -264,11 +281,11 @@ class ResNet(nn.Cell):
                                       damping=damping,
                                       loss_scale=loss_scale,
                                       frequency=frequency)
- 
+
        self.mean = P.ReduceMean(keep_dims=True)
        self.flatten = nn.Flatten()
        self.end_point = _fc(out_channels[3], num_classes, damping=damping, loss_scale=loss_scale, frequency=frequency)
- 
+
    def _make_layer(self, block, layer_num, in_channel, out_channel, stride,
                    damping, loss_scale, frequency):
        """
@ -288,36 +305,36 @@ class ResNet(nn.Cell):
            >>> _make_layer(ResidualBlock, 3, 128, 256, 2)
        """
        layers = []
- 
+
        resnet_block = block(in_channel, out_channel, stride=stride,
                             damping=damping, loss_scale=loss_scale, frequency=frequency)
        layers.append(resnet_block)
- 
+
        for _ in range(1, layer_num):
            resnet_block = block(out_channel, out_channel, stride=1,
                                 damping=damping, loss_scale=loss_scale, frequency=frequency)
            layers.append(resnet_block)
- 
+
        return nn.SequentialCell(layers)
- 
+
    def construct(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        c1, argmax = self.maxpool(x)
- 
+
        c2 = self.layer1(c1)
        c3 = self.layer2(c2)
        c4 = self.layer3(c3)
        c5 = self.layer4(c4)
- 
+
        out = self.mean(c5, (2, 3))
        out = self.flatten(out)
        out = self.end_point(out)
- 
+
        return out
- 
- 
+
+
 def resnet50(class_num=10, damping=0.03, loss_scale=1, frequency=278):
    """
    Get ResNet50 neural network.
--- a/example/resnet101_imagenet2012_THOR/model/thor.py
+++ b/example/resnet101_imagenet2012_THOR/model/thor.py
@ -13,42 +13,47 @@
 # limitations under the License.
 # ============================================================================
 """momentum"""
-import numpy as np
-from mindspore.ops import functional as F, composite as C, operations as P
-from mindspore.common.parameter import Parameter
-from mindspore.common.tensor import Tensor
 import mindspore.common.dtype as mstype
-from mindspore.nn.optim.optimizer import Optimizer
-from mindspore.common.parameter import ParameterTuple
-from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_mirror_mean
-from mindspore.common.initializer import initializer
-from model.grad_reducer_thor import DistributedGradReducerThor
-from cus_ops.cus_matmul_cube_fracz_right_mul import CusMatMulCubeFraczRightMul
-from cus_ops.cus_fused_abs_max1 import CusFusedAbsMax1
-from cus_ops.cus_matmul_cube_fracz_left_cast import CusMatMulCubeFraczLeftCast
-from cus_ops.cus_matmul_cube_dense_left import CusMatMulCubeDenseLeft
 from cus_ops.cus_matmul_cube_dense_right import CusMatMulCubeDenseRight
- 
+from cus_ops.cus_matmul_cube_fracz_left_cast import CusMatMulCubeFraczLeftCast
+from mindspore.common.initializer import initializer
+from mindspore.common.parameter import Parameter
+from mindspore.common.parameter import ParameterTuple
+from mindspore.common.tensor import Tensor
+from mindspore.nn.optim.optimizer import Optimizer
+from mindspore.ops import functional as F, composite as C, operations as P
+from mindspore.parallel._utils import _get_device_num, _get_mirror_mean
+
+from cus_ops.cus_matmul_cube_dense_left import CusMatMulCubeDenseLeft
+from cus_ops.cus_matmul_cube_fracz_right_mul import CusMatMulCubeFraczRightMul
+from model.grad_reducer_thor import DistributedGradReducerThor
+
 momentum_opt = C.MultitypeFuncGraph("momentum_opt")
+
+
@momentum_opt.register("Function", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor")
 def _tensor_run_opt_ext(opt, learning_rate, momentum, gradient, weight, moment):
    """Apply momentum optimizer to the weight parameter using Tensor."""
    success = True
    success = F.depend(success, opt(weight, moment, learning_rate, gradient, momentum))
    return success
- 
+
+
 op_add = P.AddN()
 apply_decay = C.MultitypeFuncGraph("apply_decay")
- 
+
+
@apply_decay.register("Number", "Bool", "Tensor", "Tensor")
 def _tensor_apply_decay(weight_decay, if_apply, weight, gradient):
    """Get grad with weight_decay."""
    if if_apply:
        return op_add((weight * weight_decay, gradient))
    return gradient
- 
+
+
 class THOR(Optimizer):
-    def __init__(self, params, learning_rate, momentum, matrix_A, matrix_G, A_inv_max, G_inv_max, weight_decay=0.0, loss_scale=1.0,
+    def __init__(self, params, learning_rate, momentum, matrix_A, matrix_G, A_inv_max, G_inv_max, weight_decay=0.0,
+                 loss_scale=1.0,
                 decay_filter=lambda x: x.name not in []):
        super(THOR, self).__init__(learning_rate, params, weight_decay, loss_scale)
        if isinstance(momentum, float) and momentum < 0.0:
@ -93,9 +98,10 @@ class THOR(Optimizer):
        self.matrix_A_inv = ()
        self.matrix_G_inv = ()
        self.matrix_max_inv = ()
- 
+
        for i in range(54):
-            self.matrix_max_inv = self.matrix_max_inv + (Parameter(initializer(1, [1], mstype.float32), name="matrix_max"+str(i), requires_grad=False), )
+            self.matrix_max_inv = self.matrix_max_inv + (
+                Parameter(initializer(1, [1], mstype.float32), name="matrix_max" + str(i), requires_grad=False),)
        self.log = P.Log()
        self.exp = P.Exp()
        self.sqrt = P.Sqrt()
@ -105,7 +111,7 @@ class THOR(Optimizer):
        self.thor = True
        self.weight_decay = weight_decay * loss_scale
        self.decay_flags = tuple(decay_filter(x) for x in self.parameters)
- 
+
    def construct(self, gradients):
        params = self.params
        moments = self.moments
@ -124,9 +130,9 @@ class THOR(Optimizer):
                matrix_G = F.depend(matrix_G, g)
                A_max = F.depend(A_max, g)
                G_max = F.depend(G_max, g)
-                matrix_A_allreduce = matrix_A_allreduce + (matrix_A, )
-                matrix_G_allreduce = matrix_G_allreduce + (matrix_G, )
-                matrix_A_max_allreduce = matrix_A_max_allreduce + (A_max, )
+                matrix_A_allreduce = matrix_A_allreduce + (matrix_A,)
+                matrix_G_allreduce = matrix_G_allreduce + (matrix_G,)
+                matrix_A_max_allreduce = matrix_A_max_allreduce + (A_max,)
                matrix_G_max_allreduce = matrix_G_max_allreduce + (G_max,)
            matrix_A_allreduce = self.grad_reducer_A(matrix_A_allreduce)
            matrix_G_allreduce = self.grad_reducer_G(matrix_G_allreduce)
@ -182,13 +188,13 @@ class THOR(Optimizer):
                    new_grads = new_grads + (g,)
                else:
                    g = self.cube_matmul_left(matrix_G, g)
-                    g =self.cube_matmul_right_mul(g, matrix_A, matrix_max)
+                    g = self.cube_matmul_right_mul(g, matrix_A, matrix_max)
                    new_grads = new_grads + (g, gradients[i * 3 + 1], gradients[i * 3 + 2])
            gradients = new_grads
- 
+
        if self.weight_decay > 0:
            gradients = self.hyper_map(F.partial(apply_decay, self.weight_decay), self.decay_flags,
-                                        params, gradients)
+                                       params, gradients)
        gradients = self.scale_grad(gradients)
        lr = self.get_lr()
        success = self.hyper_map(F.partial(momentum_opt, self.opt, lr, self.momentum), gradients, params, moments)
--- a/example/resnet101_imagenet2012_THOR/model/thor_layer.py
+++ b/example/resnet101_imagenet2012_THOR/model/thor_layer.py
@ -13,27 +13,29 @@
 # limitations under the License.
 # ============================================================================

-import numpy as np
 import mindspore as ms
 import mindspore.common.dtype as mstype
-from mindspore.ops import operations as P
-from mindspore.common.parameter import Parameter
-from mindspore.common.initializer import initializer
+import numpy as np
 from mindspore._checkparam import check_bool, twice, check_int_positive
-from mindspore.nn.cell import Cell
-from mindspore.ops import functional as F
-from mindspore.common.tensor import Tensor
-from mindspore.nn.layer.activation import get_activation
 from mindspore._extends import cell_attr_register
+from mindspore.common.initializer import initializer
+from mindspore.common.parameter import Parameter
+from mindspore.common.tensor import Tensor
+from mindspore.nn.cell import Cell
+from mindspore.nn.layer.activation import get_activation
+from mindspore.ops import operations as P
+
+from cus_ops.cus_batch_matmul import CusBatchMatMul
+from cus_ops.cus_cholesky_trsm import CusCholeskyTrsm
+from cus_ops.cus_fused_abs_max1 import CusFusedAbsMax1
+from cus_ops.cus_img2col import CusImg2Col
 from cus_ops.cus_matmul_cube import CusMatMulCube
 from cus_ops.cus_matrix_combine import CusMatrixCombine
-from cus_ops.cus_cholesky_trsm import CusCholeskyTrsm
-from cus_ops.cus_img2col import CusImg2Col
-from cus_ops.cus_fused_abs_max1 import CusFusedAbsMax1
-from cus_ops.cus_batch_matmul import CusBatchMatMul
 from cus_ops.cus_transpose02314 import CusTranspose02314
- 
+
 C0 = 16
+
+
 def caculate_device_shape(matrix_dim, channel, is_A):
    if is_A:
        if channel // C0 == 0:
@ -41,11 +43,13 @@ def caculate_device_shape(matrix_dim, channel, is_A):
        return (int(matrix_dim // C0), int(matrix_dim // C0), C0, C0), int(matrix_dim)
    else:
        return (int(matrix_dim // C0), int(matrix_dim // C0), C0, C0), int(matrix_dim)
- 
+
+
 class _Conv(Cell):
    r"""Applies a N-D convolution over an input signal composed of several input
       planes.
    """
+
    def __init__(self,
                 in_channels,
                 out_channels,
@ -73,23 +77,23 @@ class _Conv(Cell):
        self.has_bias = has_bias
        if not (isinstance(in_channels, int) and in_channels > 0):
            raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op passed '
-                             +str(in_channels)+ ', should be a int and greater than 0.')
+                             + str(in_channels) + ', should be a int and greater than 0.')
        if (not isinstance(kernel_size, tuple)) or len(kernel_size) != 2 or \
-            (not isinstance(kernel_size[0], int)) or (not isinstance(kernel_size[1], int)) or \
+                (not isinstance(kernel_size[0], int)) or (not isinstance(kernel_size[1], int)) or \
                kernel_size[0] < 1 or kernel_size[1] < 1:
            raise ValueError('Attr \'kernel_size\' of \'Conv2D\' Op passed '
-                             +str(self.kernel_size)+', should be a int or tuple and equal to or greater than 1.')
+                             + str(self.kernel_size) + ', should be a int or tuple and equal to or greater than 1.')
        if in_channels % group != 0:
            raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op must be divisible by '
                             'attr \'group\' of \'Conv2D\' Op.')
        if out_channels % group != 0:
            raise ValueError('Attr \'out_channels\' of \'Conv2D\' Op must be divisible by '
                             'attr \'group\' of \'Conv2D\' Op.')
- 
+
        self.weight = Parameter(initializer(
            weight_init, [out_channels, in_channels // group, *kernel_size]),
-                                name='weight')
- 
+            name='weight')
+
        if check_bool(has_bias):
            self.bias = Parameter(_initializer(
                bias_init, [out_channels]), name='bias')
@ -97,10 +101,11 @@ class _Conv(Cell):
            if bias_init != 'zeros':
                logger.warning("Value of 'has_bias' is False, value of 'bias_init' will be ignored.")
            self.bias = None
- 
+
    def construct(self, *inputs):
        raise NotImplementedError
- 
+
+
 class Conv2d_Thor(_Conv):
    def __init__(self,
                 in_channels,
@ -120,7 +125,7 @@ class Conv2d_Thor(_Conv):
                 bias_init='zeros'):
        self.thor = True
        ksizes = (1, kernel_size, kernel_size, 1)
-        self.hw = kernel_size*kernel_size
+        self.hw = kernel_size * kernel_size
        strides = (1, stride, stride, 1)
        kernel_size = twice(kernel_size)
        super(Conv2d_Thor, self).__init__(
@ -146,26 +151,37 @@ class Conv2d_Thor(_Conv):
                               dilation=self.dilation,
                               group=self.group
                               )
- 
-        self.img2col = CusImg2Col(ksizes = ksizes, strides = strides)
+
+        self.img2col = CusImg2Col(ksizes=ksizes, strides=strides)
        self.cube_matmul = CusMatMulCube(transpose_a=True)
        self.matrix_combine = CusMatrixCombine()
        self.cholesky = CusCholeskyTrsm()
        self.transpose02314 = CusTranspose02314()
-        self.matrix_A_dim = self.in_channels*self.kernel_size[0]*self.kernel_size[1]
+        self.matrix_A_dim = self.in_channels * self.kernel_size[0] * self.kernel_size[1]
        self.matrix_G_dim = self.out_channels
-        self.matrix_A_device_shape, self.matrix_A_device_dim = caculate_device_shape(self.matrix_A_dim, self.in_channels, True)
-        self.matrix_G_device_shape, self.matrix_G_device_dim = caculate_device_shape(self.matrix_G_dim, self.in_channels, False)
-        self.matrix_A_device_temp_shape = (self.matrix_A_device_shape[0], self.matrix_A_device_shape[2], self.matrix_A_device_shape[1], self.matrix_A_device_shape[3])
-        self.matrix_G_device_temp_shape = (self.matrix_G_device_shape[0], self.matrix_G_device_shape[2], self.matrix_G_device_shape[1], self.matrix_G_device_shape[3])
-        self.matrix_A_inv = Parameter(Tensor(np.reshape(np.identity(self.matrix_A_device_dim).astype(np.float16), self.matrix_A_device_shape)), name='matrix_A_inv', requires_grad=False)
+        self.matrix_A_device_shape, self.matrix_A_device_dim = caculate_device_shape(self.matrix_A_dim,
+                                                                                     self.in_channels, True)
+        self.matrix_G_device_shape, self.matrix_G_device_dim = caculate_device_shape(self.matrix_G_dim,
+                                                                                     self.in_channels, False)
+        self.matrix_A_device_temp_shape = (
+            self.matrix_A_device_shape[0], self.matrix_A_device_shape[2], self.matrix_A_device_shape[1],
+            self.matrix_A_device_shape[3])
+        self.matrix_G_device_temp_shape = (
+            self.matrix_G_device_shape[0], self.matrix_G_device_shape[2], self.matrix_G_device_shape[1],
+            self.matrix_G_device_shape[3])
+        self.matrix_A_inv = Parameter(
+            Tensor(np.reshape(np.identity(self.matrix_A_device_dim).astype(np.float16), self.matrix_A_device_shape)),
+            name='matrix_A_inv', requires_grad=False)
        self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), name="A_inv_max", requires_grad=False)
-        self.matrix_G_inv = Parameter(Tensor(np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape)), name="matrix_G_inv", requires_grad=False)
- 
+        self.matrix_G_inv = Parameter(
+            Tensor(np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape)),
+            name="matrix_G_inv", requires_grad=False)
+
        self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), name="G_inv_max", requires_grad=False)
-        self.fake_G = Tensor(np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape))
-        self.fake_G_inv_max = Tensor(np.zeros([1,]).astype(np.float32))
- 
+        self.fake_G = Tensor(
+            np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape))
+        self.fake_G_inv_max = Tensor(np.zeros([1, ]).astype(np.float32))
+
        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.transpose = P.Transpose()
@ -178,9 +194,10 @@ class Conv2d_Thor(_Conv):
        self.channels_slice_flag = False
        if self.in_channels % C0 != 0:
            self.channels_slice_flag = True
- 
+
        self.padA_flag = False
-        if (self.matrix_A_dim // self.diag_block_dim) * self.diag_block_dim != self.matrix_A_dim and self.matrix_A_dim > self.diag_block_dim:
+        if (
+                self.matrix_A_dim // self.diag_block_dim) * self.diag_block_dim != self.matrix_A_dim and self.matrix_A_dim > self.diag_block_dim:
            self.padA_flag = True
            pad_dim = self.diag_block_dim - self.matrix_A_dim % self.diag_block_dim
            self.padA = P.Pad(((0, pad_dim), (0, pad_dim)))
@ -191,16 +208,16 @@ class Conv2d_Thor(_Conv):
        self.slice = P.Slice()
        self.gather = P.GatherV2()
        self.freq = Tensor(frequency, mstype.int32)
-        self.loss_scale = Tensor(1/loss_scale, mstype.float16)
+        self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
        self.axis = 0
- 
+
        dampingA_dim = self.matrix_A_dim
        if (self.matrix_A_dim % self.diag_block_dim) != 0 and self.matrix_A_dim > self.diag_block_dim:
            dampingA_dim = (self.matrix_A_dim // self.diag_block_dim + 1) * self.diag_block_dim
        dampingG_dim = self.matrix_G_dim
        if (self.matrix_G_dim % self.diag_block_dim) != 0 and self.matrix_G_dim > self.diag_block_dim:
            dampingG_dim = (self.matrix_G_dim // self.diag_block_dim + 1) * self.diag_block_dim
- 
+
        self.dampingA = Tensor(np.identity(dampingA_dim), mstype.float32)
        self.dampingG = Tensor(np.identity(dampingG_dim), mstype.float32)
        self.fused_abs_max1 = CusFusedAbsMax1([self.matrix_A_dim, self.matrix_A_dim])
@ -211,50 +228,50 @@ class Conv2d_Thor(_Conv):
        self.getG = P.InsertGradientOf(self.save_gradient)

    def save_gradient(self, dout):
-       out = dout
-       dout = self.mul(dout, self.loss_scale)
-       dout = self.mul(dout, 32.0)
-       dout = self.transpose02314(dout)
-       dout_shape = self.shape(dout)
-       normalizer = dout_shape[0]
- 
-       matrix_G = self.cube_matmul(dout, dout)
-       normalizer = self.cast(normalizer, ms.float32)
-       matrix_G = self.mul(matrix_G, 1.0/normalizer)
-       damping_step = self.gather(self.damping, self.cov_step, 0)
-       self.cov_step = self.cov_step + self.freq
-       damping_step = self.cast(damping_step, mstype.float32)
-       damping = self.mul(damping_step, 32.0/normalizer)
-       damping = self.sqrt(damping)
-       dampingG = self.cast(self.dampingG, mstype.float32)
-       matrix_G = matrix_G + damping * dampingG
- 
-       matrix_G_inv = self.cholesky(matrix_G)
-       matrix_G_inv = self.vector_matmul(matrix_G_inv, matrix_G_inv)
-       matrix_G_inv_max = self.fused_abs_max2(matrix_G_inv)
-       matrix_G_inv_max = self.fused_abs_max2(matrix_G_inv_max)
-       self.G_inv_max = matrix_G_inv_max
-       matrix_G_inv = self.matrix_combine(matrix_G_inv)
-       matrix_G_inv_shape = self.shape(matrix_G_inv)
-       matrix_G_inv = self.reshape(matrix_G_inv, self.matrix_G_device_temp_shape)
-       matrix_G_inv = self.transpose(matrix_G_inv, (2, 0, 1, 3))
-       matrix_G = self.cast(matrix_G_inv, mstype.float16)
-       self.matrix_G_inv = matrix_G
-       return out
- 
+        out = dout
+        dout = self.mul(dout, self.loss_scale)
+        dout = self.mul(dout, 32.0)
+        dout = self.transpose02314(dout)
+        dout_shape = self.shape(dout)
+        normalizer = dout_shape[0]
+
+        matrix_G = self.cube_matmul(dout, dout)
+        normalizer = self.cast(normalizer, ms.float32)
+        matrix_G = self.mul(matrix_G, 1.0 / normalizer)
+        damping_step = self.gather(self.damping, self.cov_step, 0)
+        self.cov_step = self.cov_step + self.freq
+        damping_step = self.cast(damping_step, mstype.float32)
+        damping = self.mul(damping_step, 32.0 / normalizer)
+        damping = self.sqrt(damping)
+        dampingG = self.cast(self.dampingG, mstype.float32)
+        matrix_G = matrix_G + damping * dampingG
+
+        matrix_G_inv = self.cholesky(matrix_G)
+        matrix_G_inv = self.vector_matmul(matrix_G_inv, matrix_G_inv)
+        matrix_G_inv_max = self.fused_abs_max2(matrix_G_inv)
+        matrix_G_inv_max = self.fused_abs_max2(matrix_G_inv_max)
+        self.G_inv_max = matrix_G_inv_max
+        matrix_G_inv = self.matrix_combine(matrix_G_inv)
+        matrix_G_inv_shape = self.shape(matrix_G_inv)
+        matrix_G_inv = self.reshape(matrix_G_inv, self.matrix_G_device_temp_shape)
+        matrix_G_inv = self.transpose(matrix_G_inv, (2, 0, 1, 3))
+        matrix_G = self.cast(matrix_G_inv, mstype.float16)
+        self.matrix_G_inv = matrix_G
+        return out
+
    def construct(self, x):
        if self.thor:
            matrix_A = self.img2col(x)
            matrix_A_shape = self.shape(matrix_A)
            normalizer = matrix_A_shape[0]
            matrix_A = self.cube_matmul(matrix_A, matrix_A)
- 
+
            if self.channels_slice_flag:
                matrix_A = self.reshape(matrix_A, (self.hw, C0, self.hw, C0))
                matrix_A = self.slice(matrix_A, (0, 0, 0, 0), (self.hw, self.in_channels, self.hw, self.in_channels))
                matrix_A = self.reshape(matrix_A, (self.matrix_A_dim, self.matrix_A_dim))
            normalizer = self.cast(normalizer, ms.float32)
-            matrix_A = self.mul(matrix_A, 1.0/normalizer)
+            matrix_A = self.mul(matrix_A, 1.0 / normalizer)
            if self.padA_flag:
                matrix_A = self.padA(matrix_A)
            damping_step = self.gather(self.damping, self.cov_step, self.axis)
@ -273,7 +290,7 @@ class Conv2d_Thor(_Conv):
            in_channels = self.in_channels
            if self.padA_flag:
                matrix_A_inv = self.slice(matrix_A_inv, (0, 0), (self.matrix_A_dim, self.matrix_A_dim))
- 
+
            if self.device_shape_pad_flag:
                matrix_A_inv = self.reshape(matrix_A_inv, (self.hw, self.in_channels, self.hw, self.in_channels))
                matrix_A_inv = self.device_shape_pad(matrix_A_inv)
@ -286,31 +303,32 @@ class Conv2d_Thor(_Conv):
            out = self.getG(out)
        else:
            out = self.conv2d(x, self.weight)
- 
+
        return out
- 
+
    def extra_repr(self):
        s = 'input_channels={}, output_channels={}, kernel_size={},' \
-              'stride={},  pad_mode={}, padding={}, dilation={}, ' \
+            'stride={},  pad_mode={}, padding={}, dilation={}, ' \
            'group={}, data_format={}, has_bias={},' \
-                'weight_init={}, bias_init={}'.format(
-                    self.in_channels,
-                    self.out_channels,
-                    self.kernel_size,
-                    self.stride,
-                    self.pad_mode,
-                    self.padding,
-                    self.dilation,
-                    self.group,
-                    self.data_format,
-                    self.has_bias,
-                    self.weight,
-                    self.bias)
- 
+            'weight_init={}, bias_init={}'.format(
+            self.in_channels,
+            self.out_channels,
+            self.kernel_size,
+            self.stride,
+            self.pad_mode,
+            self.padding,
+            self.dilation,
+            self.group,
+            self.data_format,
+            self.has_bias,
+            self.weight,
+            self.bias)
+
        if self.has_bias:
            s += ', bias={}'.format(self.bias)
        return s
- 
+
+
 class Dense_Thor(Cell):
    @cell_attr_register(attrs=['has_bias', 'activation'])
    def __init__(self,
@ -330,30 +348,30 @@ class Dense_Thor(Cell):
        self.thor = True
        if isinstance(weight_init, Tensor):
            if weight_init.dim() != 2 or weight_init.shape()[0] != out_channels or \
-               weight_init.shape()[1] != in_channels:
+                    weight_init.shape()[1] != in_channels:
                raise ValueError("weight_init shape error")
- 
+
        self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight")
- 
+
        if self.has_bias:
            if isinstance(bias_init, Tensor):
                if bias_init.dim() != 1 or bias_init.shape()[0] != out_channels:
                    raise ValueError("bias_init shape error")
- 
+
            self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias")
- 
+
        self.matmul = P.MatMul(transpose_b=True)
        self.bias_add = P.BiasAdd()
- 
+
        self.activation = get_activation(activation)
        self.activation_flag = self.activation is not None
- 
+
        self.matrix_A_inv = Parameter(Tensor(np.zeros([128, 128, 16, 16]).astype(np.float16)), name='matrix_A_inv',
                                      requires_grad=False)
        self.matrix_G_inv = Parameter(Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16)), name="matrix_G_inv",
                                      requires_grad=False)
        self.fake_G = Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16))
- 
+
        self.matmul = P.MatMul(transpose_b=True)
        self.cube_matmul = CusMatMulCube(transpose_a=True)
        self.matrix_combine = CusMatrixCombine()
@ -365,7 +383,7 @@ class Dense_Thor(Cell):
        self.mul = P.Mul()
        self.cast = P.Cast()
        self.damping = Tensor(damping)
-        self.loss_scale = Tensor(1/loss_scale, mstype.float16)
+        self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
        self.vector_matmul = CusBatchMatMul()
        self.pad = P.Pad(((0, 24), (0, 24)))
        self.pad1 = P.Pad(((0, 8), (0, 8)))
@ -415,14 +433,14 @@ class Dense_Thor(Cell):
        matrix_G_inv = self.cast(matrix_G_inv, mstype.float16)
        self.matrix_G_inv = matrix_G_inv
        return out
- 
+
    def construct(self, x):
        if self.thor:
            inputs = self.cube_matmul(x, x)
            normalizer = 32
            normalizer = self.cast(normalizer, ms.float32)
            matrix_A = self.mul(inputs, 1.0 / normalizer)
- 
+
            damping_step = self.gather(self.damping, self.cov_step, self.axis)
            damping_step = self.cast(damping_step, mstype.float32)
            damping = self.sqrt(damping_step)
@ -430,11 +448,11 @@ class Dense_Thor(Cell):
            matrix_A = matrix_A + damping * dampingA
            matrix_A_inv = self.cholesky(matrix_A)
            matrix_A_inv = self.vector_matmul(matrix_A_inv, matrix_A_inv)
- 
+
            matrix_A_inv_max = self.fused_abs_max2(matrix_A_inv)
            matrix_A_inv_max = self.fused_abs_max2(matrix_A_inv_max)
            self.A_inv_max = matrix_A_inv_max
- 
+
            matrix_A_inv = self.matrix_combine(matrix_A_inv)
            matrix_A_inv_shape = self.shape(matrix_A_inv)
            matrix_A_inv = self.reshape(matrix_A_inv, (matrix_A_inv_shape[0] / 16, 16, matrix_A_inv_shape[0] / 16, 16))
@ -446,20 +464,20 @@ class Dense_Thor(Cell):
            output = self.getG(output)
        else:
            output = self.matmul(x, self.weight)
- 
+
        if self.has_bias:
            output = self.bias_add(output, self.bias)
        if self.activation_flag:
            return self.activation(output)
        return output
- 
+
    def extend_repr(self):
        str_info = 'in_channels={}, out_channels={}, weight={}, has_bias={}' \
-                   .format(self.in_channels, self.out_channels, self.weight, self.has_bias)
+            .format(self.in_channels, self.out_channels, self.weight, self.has_bias)
        if self.has_bias:
            str_info = str_info + ', bias={}'.format(self.bias)
- 
+
        if self.activation_flag:
            str_info = str_info + ', activation={}'.format(self.activation)
- 
+
        return str_info
--- a/example/resnet101_imagenet2012_THOR/run_distribute_train_new.sh
+++ b/example/resnet101_imagenet2012_THOR/run_distribute_train_new.sh
--- a/example/resnet101_imagenet2012_THOR/train.py
+++ b/example/resnet101_imagenet2012_THOR/train.py
@ -13,62 +13,54 @@
 # limitations under the License.
 # ============================================================================
 """train_imagenet."""
-import os
 import argparse
+import os
 import random
+
+import mindspore.dataset.engine as de
 import numpy as np
-from dataset_imagenet import create_dataset
-from lr_generator import get_lr, warmup_cosine_annealing_lr
-from config_imagenet import config
-from mindspore import context
 from mindspore import Tensor
+from mindspore import context
+from mindspore.communication.management import init
 from mindspore.parallel._auto_parallel_context import auto_parallel_context
-from mindspore.nn.optim.momentum import Momentum
-from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
- 
-from mindspore.train.model import ParallelMode
- 
 from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
 from mindspore.train.loss_scale_manager import FixedLossScaleManager
-import mindspore.dataset.engine as de
-from mindspore.communication.management import init
- 
-import math
-import mindspore.nn as nn
-from crossentropy import CrossEntropy
-from var_init import default_recurisive_init, KaimingNormal
-from mindspore.common import initializer as weight_init
-
-from second_order.thor import THOR
+from mindspore.train.model import ParallelMode
 from second_order.model_second_order import Model
 from second_order.resnet import resnet50
-from mindspore.train.serialization import load_checkpoint, load_param_into_net
- 
+from second_order.thor import THOR
+
+from config_imagenet import config
+from crossentropy import CrossEntropy
+from dataset_imagenet import create_dataset
+from lr_generator import get_lr, warmup_cosine_annealing_lr
+
 random.seed(1)
 np.random.seed(1)
 de.config.set_seed(1)
- 
+
 parser = argparse.ArgumentParser(description='Image classification')
 parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute')
 parser.add_argument('--device_num', type=int, default=1, help='Device num.')
 parser.add_argument('--do_train', type=bool, default=True, help='Do train or not.')
 parser.add_argument('--do_eval', type=bool, default=False, help='Do eval or not.')
 parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
- 
+
 args_opt = parser.parse_args()
 device_id = int(os.getenv('DEVICE_ID'))
- 
+
 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True, device_id=device_id)
 context.set_context(enable_task_sink=True)
 context.set_context(enable_loop_sink=True)
 context.set_context(enable_mem_reuse=True)

+
 def get_second_order_lr(global_step, lr_init, decay, total_epochs, steps_per_epoch):
    lr_each_step = []
    total_steps = steps_per_epoch * total_epochs
    for i in range(total_steps):
-        epoch = (i+1)/steps_per_epoch
-        base = (1.0 - float(epoch)/total_epochs)**decay
+        epoch = (i + 1) / steps_per_epoch
+        base = (1.0 - float(epoch) / total_epochs) ** decay
        lr = lr_init * base
        lr_each_step.append(lr)
    current_step = global_step
@ -77,11 +69,12 @@ def get_second_order_lr(global_step, lr_init, decay, total_epochs, steps_per_epo
    learning_rate = lr_each_step[current_step:]
    return learning_rate

+
 def get_second_order_damping(global_step, damping_init, decay_rate, total_epochs, steps_per_epoch):
    damping_each_step = []
    total_steps = steps_per_epoch * total_epochs
    for step in range(total_steps):
-        epoch = (step+1) / steps_per_epoch
+        epoch = (step + 1) / steps_per_epoch
        damping = damping_init * (decay_rate ** (epoch / 10))
        damping_each_step.append(damping)

@ -91,6 +84,7 @@ def get_second_order_damping(global_step, damping_init, decay_rate, total_epochs
    print("damping_is=========", damping)
    return damping

+
 if __name__ == '__main__':
    if args_opt.do_eval:
        print("eval")
@ -104,7 +98,7 @@ if __name__ == '__main__':
            init()
        else:
            print(" ")
- 
+
    epoch_size = config.epoch_size
    damping = get_second_order_damping(0, 0.03, 0.87, 50, 5004)
    net = resnet50(class_num=config.class_num, damping=damping, loss_scale=config.loss_scale,
@ -128,8 +122,8 @@ if __name__ == '__main__':
                                                   config.eta_min))
        else:
            lr = Tensor(get_lr(global_step=0, lr_init=config.lr_init, lr_end=config.lr_end, lr_max=config.lr_max,
-                           warmup_epochs=config.warmup_epochs, total_epochs=epoch_size, steps_per_epoch=step_size,
-                           lr_decay_mode='poly'))
+                               warmup_epochs=config.warmup_epochs, total_epochs=epoch_size, steps_per_epoch=step_size,
+                               lr_decay_mode='poly'))
        opt = THOR(filter(lambda x: x.requires_grad, net.get_parameters()), lr,
                   config.momentum, damping, config.frequency,
                   filter(lambda x: 'matrix_A' in x.name, net.get_parameters()),
@ -137,8 +131,9 @@ if __name__ == '__main__':
                   filter(lambda x: 'spatial_norm' in x.name, net.get_parameters()),
                   config.weight_decay, config.loss_scale)

-        model = Model(net, loss_fn=loss, optimizer=opt, amp_level='O2', loss_scale_manager=loss_scale, keep_batchnorm_fp32=False, metrics={'acc'}, frequency=config.frequency)
- 
+        model = Model(net, loss_fn=loss, optimizer=opt, amp_level='O2', loss_scale_manager=loss_scale,
+                      keep_batchnorm_fp32=False, metrics={'acc'}, frequency=config.frequency)
+
        time_cb = TimeMonitor(data_size=step_size)
        loss_cb = LossMonitor()
        cb = [time_cb, loss_cb]