switch positon of the codes for second_order

This commit is contained in:
z00478463 2020-05-22 09:22:11 +08:00
parent 27c13794d2
commit da62a44d85
31 changed files with 546 additions and 549 deletions

View File

@ -16,6 +16,7 @@
network config setting, will be used in train.py and eval.py
"""
from easydict import EasyDict as ed
config = ed({
"class_num": 1000,
"batch_size": 32,

View File

@ -13,24 +13,26 @@
# limitations under the License.
# ============================================================================
from mindspore.nn.loss.loss import _Loss
from mindspore.ops import operations as P
from mindspore.ops import functional as F
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.common import dtype as mstype
import mindspore.nn as nn
from mindspore.nn.loss.loss import _Loss
from mindspore.ops import functional as F
from mindspore.ops import operations as P
class CrossEntropy(_Loss):
def __init__(self, smooth_factor=0., num_classes=1000):
super(CrossEntropy, self).__init__()
self.onehot = P.OneHot()
self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
self.off_value = Tensor(1.0 * smooth_factor / (num_classes -1), mstype.float32)
#self.cast = P.Cast()
self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32)
# self.cast = P.Cast()
self.ce = nn.SoftmaxCrossEntropyWithLogits()
self.mean = P.ReduceMean(False)
def construct(self, logit, label):
#one_hot_label = self.onehot(self.cast(label, mstype.int32),
# one_hot_label = self.onehot(self.cast(label, mstype.int32),
# F.shape(logit)[1], self.on_value, self.off_value)、
one_hot_label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value)
loss = self.ce(logit, one_hot_label)

View File

@ -12,10 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
from te import tik
from topi.cce import util
from mindspore.ops.op_info_register import op_info_register
@op_info_register("""{
"op_name": "CusBatchMatMul",
"imply_type": "TBE",
@ -71,11 +70,5 @@ from mindspore.ops.op_info_register import op_info_register
}
]
}""")
def CusBatchMatMul(input_x1, input_x2, output, transpose_a=False, transpose_b=True, kernel_name="batchmatmul"):
return

View File

@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
from te import tik
from topi.cce import util
from mindspore.ops.op_info_register import op_info_register
@op_info_register("""{
"op_name": "CusCholeskyTrsm",
"imply_type": "TBE",
@ -58,7 +58,5 @@ from mindspore.ops.op_info_register import op_info_register
}
]
}""")
def CusCholeskyTrsm(input_x,output, kernel_name):
def CusCholeskyTrsm(input_x, output, kernel_name):
return

View File

@ -12,42 +12,27 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
from mindspore.ops import prim_attr_register, PrimitiveWithInfer
from mindspore import Tensor
import mindspore as ms
from mindspore.ops.composite import multitype_ops as C
# y = x^2
class CusBatchMatMul(PrimitiveWithInfer):
"""CusMatMulCube definition"""
@prim_attr_register
def __init__(self):
"""init CusMatMulCube"""
self.init_prim_io_names(inputs=['x1', 'x2'], outputs=['y'])
# self.transpose_a = transpose_a
# self.transpose_b = transpose_b
from .batch_matmul_impl import CusBatchMatMul
def get_bprop(self):
def bprop(x1, x2, out, dout):
return (C.zeros_like(x1),C.zeros_like(x2))
return (C.zeros_like(x1), C.zeros_like(x2))
return bprop
def infer_shape(self, data1_shape, data2_shape):
#shape = [1, data1_shape[1], data2_shape[2], 16, 16]
#return shape
# if self.transpose_a == True:
# k1, m = data1_shape
# else:
# m, k1 = data1_shape
# if self.transpose_b == True:
# n, k2 = data2_shape
# else:
# k2, n = data2_shape
# assert k1==k2
# shape = [m, n]
return data1_shape
def infer_dtype(self, data1_dtype, data2_dtype):
return data1_dtype
# return ms.common.dtype.tensor_type(getattr(ms, "float32"))
return data1_dtype

View File

@ -12,24 +12,23 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
from mindspore.ops import prim_attr_register, PrimitiveWithInfer
from mindspore import Tensor
class CusCholeskyTrsm(PrimitiveWithInfer):
"""CusCholeskyTrsm definition"""
@prim_attr_register
def __init__(self):
"""init CusCholeskyTrsm"""
self.init_prim_io_names(inputs=['x1'], outputs=['y'])
from .cholesky_trsm import CusCholeskyTrsm
def infer_shape(self, data1_shape):
m,n = data1_shape
m, n = data1_shape
if m >= 128:
return [m//128,128,128]
return [m // 128, 128, 128]
else:
return [1,64,64]
return [1, 64, 64]
def infer_dtype(self, data1_dtype):
return data1_dtype
return data1_dtype

View File

@ -12,31 +12,30 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
from mindspore.ops import prim_attr_register, PrimitiveWithInfer
from mindspore import Tensor
from mindspore.ops.composite import multitype_ops as C
class CusFusedAbsMax1(PrimitiveWithInfer):
"""CusCholeskyTrsm definition"""
@prim_attr_register
def __init__(self, origin_shape = [-1,-1]):
def __init__(self, origin_shape=[-1, -1]):
"""init CusCholeskyTrsm"""
self.init_prim_io_names(inputs=['x1'], outputs=['y'])
from .fused_abs_max1 import CusFusedAbsMax1
self.origin_shape = origin_shape
def get_bprop(self):
def bprop(x, out, dout):
return (C.zeros_like(x),)
return bprop
def infer_shape(self, data1_shape):
if len(data1_shape) == 2:
return [1,]
return [1, ]
else:
return [32, 64]
# return [128,128]
def infer_dtype(self, data1_dtype):
return data1_dtype

View File

@ -13,26 +13,26 @@
# limitations under the License.
# ============================================================================
import numpy as np
from mindspore.ops import prim_attr_register, PrimitiveWithInfer
from mindspore import Tensor
from mindspore.ops.composite import multitype_ops as C
class CusImg2Col(PrimitiveWithInfer):
"""CusImg2Col definition"""
@prim_attr_register
def __init__(self, ksizes, strides, dilates = (1, 1, 1, 1), mode="NC1HWC0"):
def __init__(self, ksizes, strides, dilates=(1, 1, 1, 1), mode="NC1HWC0"):
"""init CusImg2Col"""
self.init_prim_io_names(inputs=['x1'], outputs=['y'])
self.ksizes = ksizes
self.strides = strides
self.dilates = dilates
self.mode = mode
from .img2col_impl import CusImg2Col
def get_bprop(self):
def bprop(x, out, dout):
return (C.zeros_like(x),)
return bprop
def infer_shape(self, data1_shape):

View File

@ -12,30 +12,31 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
from mindspore.ops import prim_attr_register, PrimitiveWithInfer
from mindspore import Tensor
import mindspore as ms
from mindspore.ops import prim_attr_register, PrimitiveWithInfer
from mindspore.ops.composite import multitype_ops as C
# y = x^2
class CusMatMulCube(PrimitiveWithInfer):
"""CusMatMulCube definition"""
@prim_attr_register
def __init__(self, transpose_a=False, transpose_b=False):
"""init CusMatMulCube"""
self.init_prim_io_names(inputs=['x1', 'x2'], outputs=['y'])
self.transpose_a = transpose_a
self.transpose_b = transpose_b
from .matmul_cube_impl import CusMatMulCube
def get_bprop(self):
def bprop(x1, x2, out, dout):
return (C.zeros_like(x1),C.zeros_like(x2))
return (C.zeros_like(x1), C.zeros_like(x2))
return bprop
def infer_shape(self, data1_shape, data2_shape):
#shape = [1, data1_shape[1], data2_shape[2], 16, 16]
#return shape
# shape = [1, data1_shape[1], data2_shape[2], 16, 16]
# return shape
if self.transpose_a == True:
k1, m = data1_shape
else:
@ -44,9 +45,9 @@ class CusMatMulCube(PrimitiveWithInfer):
n, k2 = data2_shape
else:
k2, n = data2_shape
assert k1==k2
assert k1 == k2
shape = [m, n]
return shape
def infer_dtype(self, data1_dtype, data2_dtype):
return ms.common.dtype.tensor_type(getattr(ms, "float32"))

View File

@ -12,27 +12,28 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
from mindspore.ops import prim_attr_register, PrimitiveWithInfer
from mindspore import Tensor
import mindspore as ms
from mindspore.ops import prim_attr_register, PrimitiveWithInfer
from mindspore.ops.composite import multitype_ops as C
# y = x^2
class CusMatMulCubeDenseLeft(PrimitiveWithInfer):
"""CusMatMulCube definition"""
@prim_attr_register
def __init__(self):
"""init CusMatMulCube"""
self.init_prim_io_names(inputs=['x1', 'x2'], outputs=['y'])
from .matmul_cube_dense_left import CusMatMulCubeDenseLeft
def get_bprop(self):
def bprop(x1, x2, out, dout):
return (C.zeros_like(x1),C.zeros_like(x2))
return (C.zeros_like(x1), C.zeros_like(x2))
return bprop
def infer_shape(self, data1_shape, data2_shape):
return data2_shape
def infer_dtype(self, data1_dtype, data2_dtype):
return ms.common.dtype.tensor_type(getattr(ms, "float16"))

View File

@ -12,27 +12,27 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
from mindspore.ops import prim_attr_register, PrimitiveWithInfer
from mindspore import Tensor
import mindspore as ms
from mindspore.ops import prim_attr_register, PrimitiveWithInfer
from mindspore.ops.composite import multitype_ops as C
# y = x^2
class CusMatMulCubeFraczRightMul(PrimitiveWithInfer):
"""CusMatMulCubeFraczRightMul definition"""
@prim_attr_register
def __init__(self):
"""init CusMatMulCubeFraczRightMul"""
self.init_prim_io_names(inputs=['x1', 'x2', 'x3'], outputs=['y'])
from .matmul_cube_fracz_right_mul_impl import CusMatMulCubeFraczRightMul
def get_bprop(self):
def bprop(x1, x2, x3, out, dout):
return (C.zeros_like(x1),C.zeros_like(x2),C.zeros_like(x3))
return (C.zeros_like(x1), C.zeros_like(x2), C.zeros_like(x3))
return bprop
def infer_shape(self, data1_shape, data2_shape, data3_shape):
return data1_shape
def infer_dtype(self, data1_dtype, data2_dtype, data3_dtype):
return ms.common.dtype.tensor_type(getattr(ms, "float32"))

View File

@ -12,29 +12,29 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
from mindspore.ops import prim_attr_register, PrimitiveWithInfer
from mindspore import Tensor
import mindspore as ms
from mindspore.ops.composite import multitype_ops as C
# y = x^2
class CusMatrixCombine(PrimitiveWithInfer):
"""CusMatMulCube definition"""
@prim_attr_register
def __init__(self):
"""init CusMatMulCube"""
self.init_prim_io_names(inputs=['x'], outputs=['y'])
from .matrix_combine_impl import CusMatrixCombine
def get_bprop(self):
def bprop(x, out, dout):
return (C.zeros_like(x),)
return bprop
def infer_shape(self, data_shape):
a, b, c = data_shape
shape = [a*b, a*c]
shape = [a * b, a * c]
return shape
def infer_dtype(self, data_dtype):
return data_dtype

View File

@ -12,35 +12,33 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
from mindspore.ops import prim_attr_register, PrimitiveWithInfer
from mindspore import Tensor
from mindspore.ops.composite import multitype_ops as C
class CusTranspose02314(PrimitiveWithInfer):
"""CusTranspose02314 definition"""
@prim_attr_register
def __init__(self):
"""init CusTranspose02314"""
self.init_prim_io_names(inputs=['x1'], outputs=['y'])
from .transpose02314_impl import CusTranspose02314
def get_bprop(self):
def bprop(x, out, dout):
return (C.zeros_like(x),)
return bprop
def infer_shape(self, data1_shape):
assert len(data1_shape) == 4
n, c, h, w = data1_shape
c0 = 16
c1 = c // 16
shape = (n * h * w, c1 * c0)
# axis_0, axis_1, axis_2, axis_3, axis_4 = data1_shape
# shape = (axis_0, axis_2, axis_3, axis_1, axis_4)
return shape
def infer_dtype(self, data1_dtype):
return data1_dtype

View File

@ -13,9 +13,9 @@
# limitations under the License.
# ============================================================================
from te import tik
from topi.cce import util
from mindspore.ops.op_info_register import op_info_register
@op_info_register("""{
"op_name": "CusFusedAbsMax1",
"imply_type": "TBE",
@ -64,5 +64,5 @@ from mindspore.ops.op_info_register import op_info_register
}
]
}""")
def CusFusedAbsMax1(input_x, output, origin_shape = None, kernel_name="fused_abs_max1"):
def CusFusedAbsMax1(input_x, output, origin_shape=None, kernel_name="fused_abs_max1"):
return

View File

@ -13,9 +13,9 @@
# limitations under the License.
# ============================================================================
from te import tik
from topi.cce import util
from mindspore.ops.op_info_register import op_info_register
@op_info_register("""{
"op_name": "CusImg2ColNC1HWC0",
"imply_type": "TBE",
@ -82,6 +82,5 @@ from mindspore.ops.op_info_register import op_info_register
}
]
}""")
def CusImg2ColNC1HWC0(input_x, output, ksizes, strides, dilates, padding, kernel_name="img2col"):
return

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
copyright 2019 Huawei Technologies Co., Ltd
copyright 2020 Huawei Technologies Co., Ltd
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -18,22 +18,15 @@ limitations under the License.
matmul
"""
from __future__ import absolute_import
import te.lang.cce
import te.platform.cce_params as cce
from te.platform.fusion_manager import fusion_manager
from te import tvm
from topi import generic
from topi.cce import util
from impl.matmul_vector import matmul_vector_cce
from te import tik
from mindspore.ops.op_info_register import op_info_register
from topi.cce import util
# General limitation of the size for input shape: 2**31
SHAPE_SIZE_LIMIT = 2147483648
NoneType = type(None)
@op_info_register("""{
"op_name": "CusMatMulCubeDenseLeft",
"imply_type": "TBE",
@ -102,8 +95,7 @@ NoneType = type(None)
}
]
}""")
@util.check_input_type(dict, dict, (dict, NoneType), dict, bool, bool, str)
def CusMatMulCubeDenseLeft(input_x1, input_x2, bias=None, output_y={}, trans_a=False, trans_b=False, kernel_name="matmulcube"):
def CusMatMulCubeDenseLeft(input_x1, input_x2, bias=None, output_y={}, trans_a=False, trans_b=False,
kernel_name="matmulcube"):
return

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
copyright 2019 Huawei Technologies Co., Ltd
copyright 2020 Huawei Technologies Co., Ltd
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -18,19 +18,15 @@ limitations under the License.
matmul
"""
from __future__ import absolute_import
import te.platform.cce_params as cce
from te import tvm
from topi.cce import util
from te import tik
from mindspore.ops.op_info_register import op_info_register
from topi.cce import util
# General limitation of the size for input shape: 2**31
SHAPE_SIZE_LIMIT = 2147483648
NoneType = type(None)
@op_info_register("""{
"op_name": "CusMatMulCubeFraczLeftCast",
"imply_type": "TBE",
@ -99,7 +95,6 @@ NoneType = type(None)
}
]
}""")
# pylint: disable=locally-disabled,too-many-arguments, too-many-locals, too-many-statements
@util.check_input_type(dict, dict, (dict, NoneType), dict, bool, bool, str)
def CusMatMulCubeFraczLeftCast(input_x1, input_x2, bias=None, output_y={}, trans_a=False, trans_b=False,

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
copyright 2019 Huawei Technologies Co., Ltd
copyright 2020 Huawei Technologies Co., Ltd
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -18,21 +18,14 @@ limitations under the License.
matmul
"""
from __future__ import absolute_import
import te.lang.cce
import te.platform.cce_params as cce
from te.platform.fusion_manager import fusion_manager
from te import tvm
from topi import generic
from topi.cce import util
from te import tik
from impl.matmul_vector import matmul_vector_cce
from mindspore.ops.op_info_register import op_info_register
# General limitation of the size for input shape: 2**31
SHAPE_SIZE_LIMIT = 2147483648
NoneType = type(None)
@op_info_register("""{
"op_name": "CusMatMulCubeFraczRightMul",
"imply_type": "TBE",
@ -114,8 +107,6 @@ NoneType = type(None)
}
]
}""")
def CusMatMulCubeFraczRightMul(input_x1, input_x2, input_x3, bias=None, output_y={}, trans_a=False, trans_b=False, kernel_name="matmulcube"):
def CusMatMulCubeFraczRightMul(input_x1, input_x2, input_x3, bias=None, output_y={}, trans_a=False, trans_b=False,
kernel_name="matmulcube"):
return

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
copyright 2019 Huawei Technologies Co., Ltd
copyright 2020 Huawei Technologies Co., Ltd
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -18,20 +18,15 @@ limitations under the License.
matmul
"""
from __future__ import absolute_import
import te.lang.cce
import te.platform.cce_params as cce
from te import tvm
from topi import generic
from topi.cce import util
from impl.matmul_vector import matmul_vector_cce
from mindspore.ops.op_info_register import op_info_register
from topi.cce import util
# General limitation of the size for input shape: 2**31
SHAPE_SIZE_LIMIT = 2147483648
NoneType = type(None)
@op_info_register("""{
"op_name": "CusMatMulCube",
"imply_type": "TBE",
@ -112,8 +107,7 @@ NoneType = type(None)
}
]
}""")
# pylint: disable=locally-disabled,too-many-arguments, too-many-locals, too-many-statements
@util.check_input_type(dict, dict, (dict, NoneType), dict, bool, bool, str)
def CusMatMulCube(input_x1, input_x2, bias=None, output_y={}, trans_a=False, trans_b=False, kernel_name="matmulcube"):
return
return

View File

@ -13,9 +13,9 @@
# limitations under the License.
# ============================================================================
from te import tik
from topi.cce import util
from mindspore.ops.op_info_register import op_info_register
@op_info_register("""{
"op_name": "CusMatrixCombine",
"imply_type": "TBE",
@ -58,7 +58,5 @@ from mindspore.ops.op_info_register import op_info_register
}
]
}""")
def CusMatrixCombine(input_x, output,kernel_name="matrix_combine"):
def CusMatrixCombine(input_x, output, kernel_name="matrix_combine"):
return

View File

@ -13,9 +13,9 @@
# limitations under the License.
# ============================================================================
from te import tik
from topi.cce import util
from mindspore.ops.op_info_register import op_info_register
@op_info_register("""{
"op_name": "CusTranspose02314",
"imply_type": "TBE",
@ -58,6 +58,5 @@ from mindspore.ops.op_info_register import op_info_register
}
]
}""")
def CusTranspose02314(input_x, output, kernel_name="transpose021354"):
return

View File

@ -16,11 +16,12 @@
create train or eval dataset.
"""
import os
import mindspore.common.dtype as mstype
import mindspore.dataset.engine as de
import mindspore.dataset.transforms.vision.c_transforms as V_C
import mindspore.dataset.transforms.c_transforms as C2
from config_imagenet import config
import mindspore.dataset.transforms.vision.c_transforms as V_C
def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
"""
@ -41,7 +42,7 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False)
else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True,
num_shards=device_num, shard_id=rank_id)
num_shards=device_num, shard_id=rank_id)
image_size = 224
mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
@ -61,9 +62,9 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
V_C.Normalize(mean=mean, std=std),
V_C.HWC2CHW()
]
#type_cast_op = C2.TypeCast(mstype.float16)
# type_cast_op = C2.TypeCast(mstype.float16)
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", operations=transform_img, num_parallel_workers=8)
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8)

View File

@ -13,14 +13,17 @@
# limitations under the License.
# ============================================================================
"""learning rate generator"""
import numpy as np
import math
import numpy as np
def linear_warmup_lr(current_step, warmup_steps, base_lr, init_lr):
lr_inc = (float(base_lr) - float(init_lr)) / float(warmup_steps)
lr = float(init_lr) + lr_inc * current_step
return lr
def cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch, T_max, eta_min=0, num_periods=0.5):
base_lr = lr
warmup_init_lr = 0
@ -39,6 +42,7 @@ def cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch, T_max, et
lr_each_step.append(lr)
return np.array(lr_each_step).astype(np.float32)
def warmup_cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch, T_max, eta_min=0, num_periods=0.5):
base_lr = lr
warmup_init_lr = 0
@ -57,6 +61,7 @@ def warmup_cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch, T_
lr_each_step.append(lr)
return np.array(lr_each_step).astype(np.float32)
def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch, lr_decay_mode):
"""
generate learning rate array

View File

@ -13,15 +13,15 @@
# limitations under the License.
# ============================================================================
"""Dataset help for minddata dataset"""
from mindspore._checkparam import check_bool
from mindspore import context
from mindspore.train.parallel_utils import ParallelMode
from mindspore.train._utils import _exec_datagraph, _get_types_and_shapes, _to_tensor, \
_construct_tensor_list, _to_full_shapes, _to_full_tensor
from mindspore._checkparam import check_bool
from mindspore.nn.wrap import GetNextSingleOp
from mindspore.parallel._utils import _get_device_num, _get_global_rank, _get_parallel_mode
from mindspore.train._utils import _exec_datagraph, _get_types_and_shapes, _to_tensor, \
_construct_tensor_list, _to_full_shapes, _to_full_tensor
from mindspore.train.parallel_utils import ParallelMode
class DatasetHelper:
"""
Help function to use the Minddata dataset.
@ -41,9 +41,10 @@ class DatasetHelper:
>>> for inputs in dataset_helper:
>>> outputs = network(*inputs)
"""
def __init__(self, dataset, first_order_iter=0, dataset_sink_mode=True):
check_bool(dataset_sink_mode)
iterclass = _DatasetIterGE
if not dataset_sink_mode:
iterclass = _DatasetIterFeed
@ -52,24 +53,25 @@ class DatasetHelper:
iterclass = _DatasetIterMSLoopSink
else:
iterclass = _DatasetIterMS
self.iter = iterclass(dataset, first_order_iter)
def __iter__(self):
return self.iter.__iter__()
# A temp solution for loop sink. Delete later
def types_shapes(self):
"""Get the types and shapes from dataset on current config."""
return self.iter.types_shapes()
def loop_size(self):
"""Get loop_size for every iteration."""
return self.iter.loop_size
class _DatasetIter:
"""Base iter for dataset help"""
def __init__(self, dataset):
self.loop_size = 1
if not hasattr(dataset, '__ME_INITED__'):
@ -78,7 +80,7 @@ class _DatasetIter:
else:
self.loop_size = dataset.__loop_size__
dataset.__ME_INITED__ = _exec_datagraph(dataset, self.loop_size).queue_name
self.ind = 0
self.dataset = dataset
dataset_types, dataset_shapes = _get_types_and_shapes(dataset)
@ -89,53 +91,57 @@ class _DatasetIter:
if _get_parallel_mode() in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL):
device_num = _get_device_num()
self.dataset_shapes = _to_full_shapes(dataset_shapes, device_num)
def __iter__(self):
self.ind = 0
return self
def __next__(self):
if self.ind >= self.loop_count:
raise StopIteration()
self.ind += 1
return self.op()
def types_shapes(self):
return self.dataset_types, self.dataset_shapes
def get_loop_count(self, dataset):
loop_count = 1
if hasattr(dataset, '__loop_size__'):
loop_size = dataset.__loop_size__
loop_count = int(dataset.get_dataset_size()/loop_size)
loop_count = int(dataset.get_dataset_size() / loop_size)
return loop_count
class _DatasetIterMSLoopSink(_DatasetIter):
"""Iter for context (enable_loop_sink=True)"""
def __init__(self, dataset, first_order_iter):
super(_DatasetIterMSLoopSink, self).__init__(dataset)
# self.loop_count = self.get_loop_count(dataset)
loop_size = dataset.__loop_size__ + first_order_iter
self.loop_count = int(dataset.get_dataset_size()/loop_size) * 2
self.loop_count = int(dataset.get_dataset_size() / loop_size) * 2
def op():
return tuple()
self.op = op
class _DatasetIterMS(_DatasetIter):
"""Iter for context (enable_loop_sink=False)"""
def __init__(self, dataset, first_order_order):
super(_DatasetIterMS, self).__init__(dataset)
self.loop_count = dataset.get_dataset_size()
self.loop_size = 1
queue_name = dataset.__ME_INITED__
self.op = GetNextSingleOp(self.dataset_types, self.dataset_shapes, queue_name)
class _DatasetIterGE(_DatasetIter):
"""Iter for ge"""
def __init__(self, dataset):
super(_DatasetIterGE, self).__init__(dataset)
self.loop_count = self.get_loop_count(dataset)
@ -145,14 +151,16 @@ class _DatasetIterGE(_DatasetIter):
if self.need_to_full:
batch_expand_num = _get_device_num()
tensor_list_run = _construct_tensor_list(self.dataset_types, self.dataset_shapes, batch_expand_num)
def op():
return tensor_list_run
self.op = op
class _DatasetIterFeed:
"""Iter for feed data"""
def __init__(self, dataset, first_order_order):
self.dataset = dataset
self.device_num = _get_device_num()
@ -161,18 +169,18 @@ class _DatasetIterFeed:
self.repeat_ind = 0
self.loop_count = dataset.get_dataset_size()
self.ind = 0
parallel_mode = context.get_auto_parallel_context("parallel_mode")
self.need_to_full = parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL)
def __iter__(self):
if self.repeat_ind % self.repeat_count == 0:
self.iter = self.dataset.__iter__()
self.repeat_ind += 1
self.ind = 0
return self
def __next__(self):
if self.ind >= self.loop_count:
raise StopIteration()

View File

@ -12,28 +12,30 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
from mindspore.nn.cell import Cell
import mindspore.common.dtype as mstype
from mindspore.communication.management import GlobalComm, get_group_size
from mindspore.nn.cell import Cell
from mindspore.ops import functional as F, composite as C, operations as P
from mindspore.ops.operations.comm_ops import AllReduce, ReduceOp
import mindspore.common.dtype as mstype
from mindspore.communication import create_group
reduce_opt = C.MultitypeFuncGraph("reduce_opt")
_all_reduce_A = AllReduce()
def _init_optimizer_allreduce(group):
global _all_reduce_A
_all_reduce_A = AllReduce(ReduceOp.SUM, GlobalComm.WORLD_COMM_GROUP)
_all_reduce_A.add_prim_attr('fusion', group)
@reduce_opt.register("Function", "Number", "Tensor")
def _tensors_allreduce_mean(mul, degree, grad):
degree = F.scalar_cast(degree, F.dtype(grad))
grad = _all_reduce_A(grad)
cast_op = P.Cast()
return mul(grad, cast_op(F.scalar_to_array(1.0/degree), F.dtype(grad)))
return mul(grad, cast_op(F.scalar_to_array(1.0 / degree), F.dtype(grad)))
@reduce_opt.register("Bool", "Tensor")
def _tensors_allreduce(allreduce_filter, grad):
@ -41,8 +43,10 @@ def _tensors_allreduce(allreduce_filter, grad):
return _all_reduce_A(grad)
return grad
_get_datatype = C.MultitypeFuncGraph("_get_datatype")
@_get_datatype.register("Tensor")
def _tensors_get_datatype(grad):
"""

View File

@ -13,29 +13,26 @@
# limitations under the License.
# ============================================================================
"""Model."""
import numpy as np
import mindspore.nn as nn
from mindspore import log as logger
from mindspore.common.tensor import Tensor
from mindspore.nn.metrics import get_metrics
from mindspore._checkparam import check_input_data, check_output_data, check_int_positive, check_bool
from mindspore.train.callback import _InternalCallbackParam, RunContext, _build_callbacks
import numpy as np
from mindspore import context
from mindspore import log as logger
from mindspore._c_expression import init_exec_dataset
from mindspore._checkparam import check_input_data, check_output_data, check_int_positive, check_bool
from mindspore.common import dtype as mstype
from mindspore.common.dtype import pytype_to_dtype
from mindspore.common.tensor import Tensor
from mindspore.nn.metrics import Loss
from mindspore.nn.metrics import get_metrics
from mindspore.nn.wrap.cell_wrapper import _VirtualDatasetCell
from mindspore.parallel._utils import _get_parallel_mode, _get_device_num, _get_global_rank, \
_get_parameter_broadcast, _device_number_check, _parameter_broadcast_check
from mindspore.nn.metrics import Loss
from mindspore.nn.wrap import WithLossCell, WithEvalCell, \
DataWrapper
from mindspore.nn.wrap.cell_wrapper import _VirtualDatasetCell
from mindspore.train.parallel_utils import ParallelMode
from mindspore.common import dtype as mstype
from second_order.dataset_helper import DatasetHelper
from mindspore.train import amp
from mindspore.common.dtype import pytype_to_dtype
from mindspore._c_expression import init_exec_dataset
from mindspore.common.parameter import Parameter
from mindspore.train.callback import _InternalCallbackParam, RunContext, _build_callbacks
from mindspore.train.parallel_utils import ParallelMode
from second_order.dataset_helper import DatasetHelper
def _convert_type(types):
"""
Convert from numpy type to tensor type.
@ -51,18 +48,20 @@ def _convert_type(types):
ms_type = pytype_to_dtype(np_type)
ms_types.append(ms_type)
return ms_types
def _get_types_and_shapes(dataset):
"""Get dataset types and shapes."""
dataset_types = _convert_type(dataset.output_types())
dataset_shapes = dataset.output_shapes()
return dataset_types, dataset_shapes
def _exec_datagraph(exec_dataset, dataset_size, phase='dataset'):
"""Initialize and execute the dataset graph."""
batch_size = exec_dataset.get_batch_size()
input_indexs = exec_dataset.input_indexs
# transform data format
dataset_types, dataset_shapes = _get_types_and_shapes(exec_dataset)
init_exec_dataset(exec_dataset.__ME_INITED__,
@ -72,8 +71,8 @@ def _exec_datagraph(exec_dataset, dataset_size, phase='dataset'):
dataset_shapes,
input_indexs,
phase=phase)
class Model:
"""
High-Level API for Training or Testing.
@ -131,7 +130,7 @@ class Model:
>>> dataset = get_dataset()
>>> model.train(2, dataset)
"""
def __init__(self, network, loss_fn=None, optimizer=None, metrics=None, eval_network=None,
eval_indexes=None, amp_level="O0", frequency=278, **kwargs):
self._network = network
@ -152,49 +151,49 @@ class Model:
self._device_number = _get_device_num()
self._global_rank = _get_global_rank()
self._parameter_broadcast = _get_parameter_broadcast()
self._train_network = self._build_train_network()
self._build_eval_network(metrics, eval_network, eval_indexes)
self._build_predict_network()
def _check_kwargs(self, kwargs):
for arg in kwargs:
if arg not in ['loss_scale_manager', 'keep_batchnorm_fp32']:
raise ValueError(f"Unsupport arg '{arg}'")
raise ValueError(f"Unsupport arg '{arg}'")
def _build_train_network(self):
"""Build train network"""
network = self._network
if self._optimizer:
if self._loss_scale_manager_set:
network = amp.build_train_network(network,
self._optimizer,
self._loss_fn,
level=self._amp_level,
loss_scale_manager=self._loss_scale_manager,
keep_batchnorm_fp32=self._keep_bn_fp32)
self._optimizer,
self._loss_fn,
level=self._amp_level,
loss_scale_manager=self._loss_scale_manager,
keep_batchnorm_fp32=self._keep_bn_fp32)
else:
network = amp.build_train_network(network,
self._optimizer,
self._loss_fn,
level=self._amp_level,
keep_batchnorm_fp32=self._keep_bn_fp32)
self._optimizer,
self._loss_fn,
level=self._amp_level,
keep_batchnorm_fp32=self._keep_bn_fp32)
elif self._loss_fn:
network = nn.WithLossCell(network, self._loss_fn)
# If need to check if loss_fn is not None, but optimizer is None
return network
def _build_eval_network(self, metrics, eval_network, eval_indexes):
"""Build the network for evaluation."""
self._metric_fns = get_metrics(metrics)
if not self._metric_fns:
return
if eval_network is not None:
if eval_indexes is not None and not (isinstance(eval_indexes, list) and len(eval_indexes) == 3):
raise ValueError("Eval_indexes must be a list or None. If eval_indexes is a list, length of it \
must be three. But got {}".format(eval_indexes))
self._eval_network = eval_network
self._eval_indexes = eval_indexes
else:
@ -202,27 +201,27 @@ class Model:
raise ValueError("loss_fn can not be None.")
self._eval_network = nn.WithEvalCell(self._network, self._loss_fn)
self._eval_indexes = [0, 1, 2]
def _build_predict_network(self):
"""Build the network for prediction."""
self._predict_network = self._network
if self._parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL):
self._predict_network = _VirtualDatasetCell(self._network)
def _clear_metrics(self):
"""Clear metrics local values."""
for metric in self._metric_fns.values():
metric.clear()
def _update_metrics(self, outputs):
"""Update metrics local values."""
if not isinstance(outputs, tuple):
raise ValueError("The `outputs` is not tuple.")
if self._eval_indexes is not None and len(outputs) < 3:
raise ValueError("The length of `outputs` must be greater than or equal to 3, \
but got {}".format(len(outputs)))
for metric in self._metric_fns.values():
if self._eval_indexes is None:
metric.update(*outputs)
@ -231,14 +230,14 @@ class Model:
metric.update(outputs[self._eval_indexes[0]])
else:
metric.update(outputs[self._eval_indexes[1]], outputs[self._eval_indexes[2]])
def _get_metrics(self):
"""Get metrics local values."""
metrics = dict()
for key, value in self._metric_fns.items():
metrics[key] = value.eval()
return metrics
def _get_scaling_sens(self):
"""get the scaling sens"""
scaling_sens = 1
@ -247,7 +246,7 @@ class Model:
if self._parallel_mode == ParallelMode.DATA_PARALLEL:
scaling_sens /= self._device_number
return scaling_sens
def _train(self, epoch, train_dataset, callbacks=None, dataset_sink_mode=True):
"""
Training.
@ -266,10 +265,10 @@ class Model:
"""
epoch = check_int_positive(epoch)
self._train_network.set_train()
if self._parameter_broadcast:
self._train_network.set_broadcast_flag()
# build callback list
list_callback = _build_callbacks(callbacks)
cb_params = _InternalCallbackParam()
@ -283,7 +282,7 @@ class Model:
cb_params.device_number = self._device_number
cb_params.train_dataset = train_dataset
cb_params.list_callback = list_callback
if dataset_sink_mode:
if context.get_context("mode") == context.PYNATIVE_MODE:
logger.warning("The pynative mode cannot support dataset sink mode currently."
@ -293,7 +292,6 @@ class Model:
self._train_dataset_sink_process(epoch, train_dataset, list_callback, cb_params)
else:
self._train_process(epoch, train_dataset, list_callback, cb_params)
def _train_dataset_sink_process(self, epoch, train_dataset, list_callback=None, cb_params=None):
"""
@ -317,7 +315,7 @@ class Model:
if not hasattr(train_dataset, '__ME_INITED__') and context.get_context("enable_loop_sink") \
and not context.get_context("enable_ge"):
need_wrap = True
dataset_helper = DatasetHelper(train_dataset, iter_first_order)
# remove later to deal with loop sink
if need_wrap:
@ -330,7 +328,7 @@ class Model:
loop_size = dataset_helper.loop_size()
run_context = RunContext(cb_params)
list_callback.begin(run_context)
# used to stop training for early stop, such as stopAtTIme or stopATStep
should_stop = False
has_do_train1_dataset = False
@ -338,7 +336,7 @@ class Model:
for i in range(epoch):
cb_params.cur_epoch_num = i + 1
list_callback.epoch_begin(run_context)
# for data sink dataset_helper only iter once, other wise iter epoch_size times.
for inputs in dataset_helper:
list_callback.step_begin(run_context)
@ -357,14 +355,14 @@ class Model:
outputs = self._train_network(*inputs)
cb_params.net_outputs = outputs
list_callback.step_end(run_context)
list_callback.epoch_end(run_context)
should_stop = should_stop or run_context.get_stop_requested()
if should_stop:
break
list_callback.end(run_context)
def _train_process(self, epoch, train_dataset, list_callback=None, cb_params=None):
"""
Training process. The data would be passed to network directly.
@ -385,12 +383,12 @@ class Model:
_callback_wrapper(list_callback, run_context, "begin")
# used to stop training for early stop, such as stopAtTIme or stopATStep
should_stop = False
for i in range(epoch):
cb_params.cur_epoch_num = i + 1
_callback_wrapper(list_callback, run_context, "epoch_begin")
for next_element in dataset_helper:
len_element = len(next_element)
if self._loss_fn and len_element != 2:
@ -398,33 +396,33 @@ class Model:
"return two elements, but got {}".format(len_element))
cb_params.cur_step_num += 1
_callback_wrapper(list_callback, run_context, "step_begin")
overflow = False
if self._loss_scale_manager and self._loss_scale_manager.get_drop_overflow_update():
scaling_sens = self._get_scaling_sens()
next_element = tuple(next_element) + (Tensor(scaling_sens, mstype.float32),)
outputs = self._train_network(*next_element)
cb_params.net_outputs = outputs
if self._loss_scale_manager and self._loss_scale_manager.get_drop_overflow_update():
_, overflow, _ = outputs
overflow = np.all(overflow.asnumpy())
self._loss_scale_manager.update_loss_scale(overflow)
_callback_wrapper(list_callback, run_context, "step_end")
should_stop = should_stop or run_context.get_stop_requested()
if should_stop:
break
train_dataset.reset()
_callback_wrapper(list_callback, run_context, "epoch_end")
should_stop = should_stop or run_context.get_stop_requested()
if should_stop:
break
_callback_wrapper(list_callback, run_context, "end")
def train(self, epoch, train_dataset, callbacks=None, dataset_sink_mode=True):
"""
Training API where the iteration is controlled by python front-end.
@ -470,12 +468,12 @@ class Model:
if context.get_context("device_target") in ["CPU", "GPU"] and context.get_context("enable_loop_sink"):
raise ValueError("CPU and GPU can't support loop sink, please set enable_loop_sink=False.")
self._train(epoch,
train_dataset,
callbacks=callbacks,
dataset_sink_mode=dataset_sink_mode)
def _eval_dataset_sink_process(self, valid_dataset, list_callback=None, cb_params=None):
"""
Evaluation. The data would be passed to network through dataset channel.
@ -489,42 +487,42 @@ class Model:
Dict, returns the loss value & metrics values for the model in test mode.
"""
_device_number_check(self._parallel_mode, self._device_number)
run_context = RunContext(cb_params)
# remove later to deal with loop sink
need_wrap = False
if not hasattr(valid_dataset, '__ME_INITED__') and context.get_context("enable_loop_sink") \
and not context.get_context("enable_ge"):
and not context.get_context("enable_ge"):
need_wrap = True
valid_dataset.__loop_size__ = 1
dataset_helper = DatasetHelper(valid_dataset)
# remove later to deal with loop sink
if need_wrap:
self._eval_network = nn.DataWrapper(self._eval_network, *(dataset_helper.types_shapes()),
valid_dataset.__ME_INITED__)
valid_dataset.__ME_INITED__)
self._eval_network.set_train(mode=False)
self._eval_network.phase = 'eval'
list_callback.begin(run_context)
for inputs in dataset_helper:
cb_params.cur_step_num += 1
list_callback.step_begin(run_context)
outputs = self._eval_network(*inputs)
cb_params.net_outputs = outputs
list_callback.step_end(run_context)
self._update_metrics(outputs)
metrics = self._get_metrics()
cb_params.metrics = metrics
list_callback.end(run_context)
return metrics
def _eval_process(self, valid_dataset, list_callback=None, cb_params=None):
"""
Evaluation. The data would be passed to network directly.
@ -539,7 +537,7 @@ class Model:
"""
run_context = RunContext(cb_params)
list_callback.begin(run_context)
dataset_helper = DatasetHelper(valid_dataset, dataset_sink_mode=False)
for next_element in dataset_helper:
cb_params.cur_step_num += 1
@ -548,12 +546,12 @@ class Model:
cb_params.net_outputs = outputs
list_callback.step_end(run_context)
self._update_metrics(outputs)
metrics = self._get_metrics()
cb_params.metrics = metrics
list_callback.end(run_context)
return metrics
def eval(self, valid_dataset, callbacks=None, dataset_sink_mode=True):
"""
Evaluation API where the iteration is controlled by python front-end.
@ -584,7 +582,7 @@ class Model:
check_bool(dataset_sink_mode)
if not self._metric_fns:
raise ValueError("metric fn can not be None or empty.")
list_callback = _build_callbacks(callbacks)
cb_params = _InternalCallbackParam()
cb_params.eval_network = self._eval_network
@ -592,16 +590,16 @@ class Model:
cb_params.batch_num = valid_dataset.get_dataset_size()
cb_params.mode = "eval"
cb_params.cur_step_num = 0
self._eval_network.set_train(mode=False)
self._eval_network.phase = 'eval'
self._clear_metrics()
if dataset_sink_mode:
return self._eval_dataset_sink_process(valid_dataset, list_callback, cb_params)
return self._eval_process(valid_dataset, list_callback, cb_params)
def predict(self, *predict_data):
"""
Generates output predictions for the input samples.
@ -625,9 +623,9 @@ class Model:
self._predict_network.set_train(False)
check_input_data(*predict_data, data_class=Tensor)
result = self._predict_network(*predict_data)
check_output_data(result)
return result
__all__ = ["Model"]

View File

@ -13,13 +13,15 @@
# limitations under the License.
# ============================================================================
"""ResNet."""
import numpy as np
import mindspore.nn as nn
from mindspore.ops import operations as P
from mindspore.common.tensor import Tensor
from second_order.thor_layer import Conv2d_Thor, Dense_Thor
import math
import mindspore.nn as nn
import numpy as np
from mindspore.common.tensor import Tensor
from mindspore.ops import operations as P
from second_order.thor_layer import Conv2d_Thor, Dense_Thor
def calculate_gain(nonlinearity, param=None):
linear_fns = ['linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d', 'conv_transpose2d', 'conv_transpose3d']
if nonlinearity in linear_fns or nonlinearity == 'sigmoid':
@ -39,12 +41,13 @@ def calculate_gain(nonlinearity, param=None):
return math.sqrt(2.0 / (1 + negative_slope ** 2))
else:
raise ValueError("Unsupported nonlinearity {}".format(nonlinearity))
def _calculate_fan_in_and_fan_out(tensor):
dimensions = len(tensor)
if dimensions < 2:
raise ValueError("Fan in and fan out can not be computed for tensor with fewer than 2 dimensions")
if dimensions == 2: # Linear
fan_in = tensor[1]
fan_out = tensor[0]
@ -57,22 +60,25 @@ def _calculate_fan_in_and_fan_out(tensor):
fan_in = num_input_fmaps * receptive_field_size
fan_out = num_output_fmaps * receptive_field_size
return fan_in, fan_out
def _calculate_correct_fan(tensor, mode):
mode = mode.lower()
valid_modes = ['fan_in', 'fan_out']
if mode not in valid_modes:
raise ValueError("Mode {} not supported, please use one of {}".format(mode, valid_modes))
fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
return fan_in if mode == 'fan_in' else fan_out
def kaiming_normal(inputs_shape, a=0, mode='fan_in', nonlinearity='leaky_relu'):
fan = _calculate_correct_fan(inputs_shape, mode)
gain = calculate_gain(nonlinearity, a)
std = gain / math.sqrt(fan)
return np.random.normal(0, std, size=inputs_shape).astype(np.float32)
def kaiming_uniform(inputs_shape, a=0, mode='fan_in', nonlinearity='leaky_relu'):
fan = _calculate_correct_fan(inputs_shape, mode)
gain = calculate_gain(nonlinearity, a)
@ -80,6 +86,7 @@ def kaiming_uniform(inputs_shape, a=0, mode='fan_in', nonlinearity='leaky_relu')
bound = math.sqrt(3.0) * std # Calculate uniform bounds from standard deviation
return np.random.uniform(-bound, bound, size=inputs_shape).astype(np.float32)
def _conv3x3(in_channel, out_channel, stride=1, damping=0.03, loss_scale=1, frequency=278):
weight_shape = (out_channel, in_channel, 3, 3)
weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu'))
@ -88,35 +95,41 @@ def _conv3x3(in_channel, out_channel, stride=1, damping=0.03, loss_scale=1, freq
damping=damping, loss_scale=loss_scale, frequency=frequency)
# return nn.Conv2d(in_channel, out_channel,
# kernel_size=3, stride=stride, padding=0, pad_mode='same', weight_init=weight)
def _conv1x1(in_channel, out_channel, stride=1, damping=0.03, loss_scale=1, frequency=278):
weight_shape = (out_channel, in_channel, 1, 1)
weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu'))
return Conv2d_Thor(in_channel, out_channel,
kernel_size=1, stride=stride, padding=0, pad_mode='same', weight_init=weight,
damping=damping, loss_scale=loss_scale, frequency=frequency)
kernel_size=1, stride=stride, padding=0, pad_mode='same', weight_init=weight,
damping=damping, loss_scale=loss_scale, frequency=frequency)
def _conv7x7(in_channel, out_channel, stride=1, damping=0.03, loss_scale=1, frequency=278):
weight_shape = (out_channel, in_channel, 7, 7)
weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu'))
return Conv2d_Thor(in_channel, out_channel,
kernel_size=7, stride=stride, padding=0, pad_mode='same', weight_init=weight,
damping=damping, loss_scale=loss_scale, frequency=frequency)
kernel_size=7, stride=stride, padding=0, pad_mode='same', weight_init=weight,
damping=damping, loss_scale=loss_scale, frequency=frequency)
def _bn(channel):
return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9,
gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1)
def _bn_last(channel):
return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9,
gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1)
def _fc(in_channel, out_channel, damping, loss_scale, frequency):
weight_shape = (out_channel, in_channel)
weight = Tensor(kaiming_uniform(weight_shape, a = math.sqrt(5))
weight = Tensor(kaiming_uniform(weight_shape, a=math.sqrt(5))
return Dense_Thor(in_channel, out_channel, has_bias=False, weight_init=weight, bias_init=0,
damping=damping, loss_scale=loss_scale, frequency=frequency)
class ResidualBlock(nn.Cell):
"""
ResNet V1 residual block definition.
@ -133,7 +146,7 @@ class ResidualBlock(nn.Cell):
>>> ResidualBlock(3, 256, stride=2)
"""
expansion = 4
def __init__(self,
in_channel,
out_channel,
@ -142,54 +155,58 @@ class ResidualBlock(nn.Cell):
loss_scale=1,
frequency=278):
super(ResidualBlock, self).__init__()
channel = out_channel // self.expansion
self.conv1 = _conv1x1(in_channel, channel, stride=1, damping=damping, loss_scale=loss_scale, frequency=frequency)
self.conv1 = _conv1x1(in_channel, channel, stride=1, damping=damping, loss_scale=loss_scale,
frequency=frequency)
self.bn1 = _bn(channel)
self.conv2 = _conv3x3(channel, channel, stride=stride, damping=damping, loss_scale=loss_scale, frequency=frequency)
self.conv2 = _conv3x3(channel, channel, stride=stride, damping=damping, loss_scale=loss_scale,
frequency=frequency)
self.bn2 = _bn(channel)
self.conv3 = _conv1x1(channel, out_channel, stride=1, damping=damping, loss_scale=loss_scale, frequency=frequency)
self.conv3 = _conv1x1(channel, out_channel, stride=1, damping=damping, loss_scale=loss_scale,
frequency=frequency)
self.bn3 = _bn_last(out_channel)
self.relu = nn.ReLU()
self.down_sample = False
if stride != 1 or in_channel != out_channel:
self.down_sample = True
self.down_sample_layer = None
if self.down_sample:
self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride,
damping=damping, loss_scale=loss_scale, frequency=frequency),
damping=damping, loss_scale=loss_scale,
frequency=frequency),
_bn(out_channel)])
self.add = P.TensorAdd()
def construct(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.down_sample:
identity = self.down_sample_layer(identity)
out = self.add(out, identity)
out = self.relu(out)
return out
class ResNet(nn.Cell):
"""
ResNet architecture.
@ -212,7 +229,7 @@ class ResNet(nn.Cell):
>>> [1, 2, 2, 2],
>>> 10)
"""
def __init__(self,
block,
layer_nums,
@ -224,15 +241,15 @@ class ResNet(nn.Cell):
loss_scale,
frequency):
super(ResNet, self).__init__()
if not len(layer_nums) == len(in_channels) == len(out_channels) == 4:
raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!")
self.conv1 = _conv7x7(3, 64, stride=2, damping=damping, loss_scale=loss_scale, frequency=frequency)
self.bn1 = _bn(64)
self.relu = P.ReLU()
self.maxpool = P.MaxPoolWithArgmax(padding="same", ksize=3, strides=2)
self.layer1 = self._make_layer(block,
layer_nums[0],
in_channel=in_channels[0],
@ -253,7 +270,7 @@ class ResNet(nn.Cell):
layer_nums[2],
in_channel=in_channels[2],
out_channel=out_channels[2],
stride=strides[2],damping=damping,
stride=strides[2], damping=damping,
loss_scale=loss_scale,
frequency=frequency)
self.layer4 = self._make_layer(block,
@ -264,11 +281,11 @@ class ResNet(nn.Cell):
damping=damping,
loss_scale=loss_scale,
frequency=frequency)
self.mean = P.ReduceMean(keep_dims=True)
self.flatten = nn.Flatten()
self.end_point = _fc(out_channels[3], num_classes, damping=damping, loss_scale=loss_scale, frequency=frequency)
def _make_layer(self, block, layer_num, in_channel, out_channel, stride,
damping, loss_scale, frequency):
"""
@ -288,36 +305,36 @@ class ResNet(nn.Cell):
>>> _make_layer(ResidualBlock, 3, 128, 256, 2)
"""
layers = []
resnet_block = block(in_channel, out_channel, stride=stride,
damping=damping, loss_scale=loss_scale, frequency=frequency)
layers.append(resnet_block)
for _ in range(1, layer_num):
resnet_block = block(out_channel, out_channel, stride=1,
damping=damping, loss_scale=loss_scale, frequency=frequency)
layers.append(resnet_block)
return nn.SequentialCell(layers)
def construct(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
c1, argmax = self.maxpool(x)
c2 = self.layer1(c1)
c3 = self.layer2(c2)
c4 = self.layer3(c3)
c5 = self.layer4(c4)
out = self.mean(c5, (2, 3))
out = self.flatten(out)
out = self.end_point(out)
return out
def resnet50(class_num=10, damping=0.03, loss_scale=1, frequency=278):
"""
Get ResNet50 neural network.

View File

@ -13,42 +13,47 @@
# limitations under the License.
# ============================================================================
"""momentum"""
import numpy as np
from mindspore.ops import functional as F, composite as C, operations as P
from mindspore.common.parameter import Parameter
from mindspore.common.tensor import Tensor
import mindspore.common.dtype as mstype
from mindspore.nn.optim.optimizer import Optimizer
from mindspore.common.parameter import ParameterTuple
from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_mirror_mean
from mindspore.common.initializer import initializer
from model.grad_reducer_thor import DistributedGradReducerThor
from cus_ops.cus_matmul_cube_fracz_right_mul import CusMatMulCubeFraczRightMul
from cus_ops.cus_fused_abs_max1 import CusFusedAbsMax1
from cus_ops.cus_matmul_cube_fracz_left_cast import CusMatMulCubeFraczLeftCast
from cus_ops.cus_matmul_cube_dense_left import CusMatMulCubeDenseLeft
from cus_ops.cus_matmul_cube_dense_right import CusMatMulCubeDenseRight
from cus_ops.cus_matmul_cube_fracz_left_cast import CusMatMulCubeFraczLeftCast
from mindspore.common.initializer import initializer
from mindspore.common.parameter import Parameter
from mindspore.common.parameter import ParameterTuple
from mindspore.common.tensor import Tensor
from mindspore.nn.optim.optimizer import Optimizer
from mindspore.ops import functional as F, composite as C, operations as P
from mindspore.parallel._utils import _get_device_num, _get_mirror_mean
from cus_ops.cus_matmul_cube_dense_left import CusMatMulCubeDenseLeft
from cus_ops.cus_matmul_cube_fracz_right_mul import CusMatMulCubeFraczRightMul
from model.grad_reducer_thor import DistributedGradReducerThor
momentum_opt = C.MultitypeFuncGraph("momentum_opt")
@momentum_opt.register("Function", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor")
def _tensor_run_opt_ext(opt, learning_rate, momentum, gradient, weight, moment):
"""Apply momentum optimizer to the weight parameter using Tensor."""
success = True
success = F.depend(success, opt(weight, moment, learning_rate, gradient, momentum))
return success
op_add = P.AddN()
apply_decay = C.MultitypeFuncGraph("apply_decay")
@apply_decay.register("Number", "Bool", "Tensor", "Tensor")
def _tensor_apply_decay(weight_decay, if_apply, weight, gradient):
"""Get grad with weight_decay."""
if if_apply:
return op_add((weight * weight_decay, gradient))
return gradient
class THOR(Optimizer):
def __init__(self, params, learning_rate, momentum, matrix_A, matrix_G, A_inv_max, G_inv_max, weight_decay=0.0, loss_scale=1.0,
def __init__(self, params, learning_rate, momentum, matrix_A, matrix_G, A_inv_max, G_inv_max, weight_decay=0.0,
loss_scale=1.0,
decay_filter=lambda x: x.name not in []):
super(THOR, self).__init__(learning_rate, params, weight_decay, loss_scale)
if isinstance(momentum, float) and momentum < 0.0:
@ -93,9 +98,10 @@ class THOR(Optimizer):
self.matrix_A_inv = ()
self.matrix_G_inv = ()
self.matrix_max_inv = ()
for i in range(54):
self.matrix_max_inv = self.matrix_max_inv + (Parameter(initializer(1, [1], mstype.float32), name="matrix_max"+str(i), requires_grad=False), )
self.matrix_max_inv = self.matrix_max_inv + (
Parameter(initializer(1, [1], mstype.float32), name="matrix_max" + str(i), requires_grad=False),)
self.log = P.Log()
self.exp = P.Exp()
self.sqrt = P.Sqrt()
@ -105,7 +111,7 @@ class THOR(Optimizer):
self.thor = True
self.weight_decay = weight_decay * loss_scale
self.decay_flags = tuple(decay_filter(x) for x in self.parameters)
def construct(self, gradients):
params = self.params
moments = self.moments
@ -124,9 +130,9 @@ class THOR(Optimizer):
matrix_G = F.depend(matrix_G, g)
A_max = F.depend(A_max, g)
G_max = F.depend(G_max, g)
matrix_A_allreduce = matrix_A_allreduce + (matrix_A, )
matrix_G_allreduce = matrix_G_allreduce + (matrix_G, )
matrix_A_max_allreduce = matrix_A_max_allreduce + (A_max, )
matrix_A_allreduce = matrix_A_allreduce + (matrix_A,)
matrix_G_allreduce = matrix_G_allreduce + (matrix_G,)
matrix_A_max_allreduce = matrix_A_max_allreduce + (A_max,)
matrix_G_max_allreduce = matrix_G_max_allreduce + (G_max,)
matrix_A_allreduce = self.grad_reducer_A(matrix_A_allreduce)
matrix_G_allreduce = self.grad_reducer_G(matrix_G_allreduce)
@ -182,13 +188,13 @@ class THOR(Optimizer):
new_grads = new_grads + (g,)
else:
g = self.cube_matmul_left(matrix_G, g)
g =self.cube_matmul_right_mul(g, matrix_A, matrix_max)
g = self.cube_matmul_right_mul(g, matrix_A, matrix_max)
new_grads = new_grads + (g, gradients[i * 3 + 1], gradients[i * 3 + 2])
gradients = new_grads
if self.weight_decay > 0:
gradients = self.hyper_map(F.partial(apply_decay, self.weight_decay), self.decay_flags,
params, gradients)
params, gradients)
gradients = self.scale_grad(gradients)
lr = self.get_lr()
success = self.hyper_map(F.partial(momentum_opt, self.opt, lr, self.momentum), gradients, params, moments)

View File

@ -13,27 +13,29 @@
# limitations under the License.
# ============================================================================
import numpy as np
import mindspore as ms
import mindspore.common.dtype as mstype
from mindspore.ops import operations as P
from mindspore.common.parameter import Parameter
from mindspore.common.initializer import initializer
import numpy as np
from mindspore._checkparam import check_bool, twice, check_int_positive
from mindspore.nn.cell import Cell
from mindspore.ops import functional as F
from mindspore.common.tensor import Tensor
from mindspore.nn.layer.activation import get_activation
from mindspore._extends import cell_attr_register
from mindspore.common.initializer import initializer
from mindspore.common.parameter import Parameter
from mindspore.common.tensor import Tensor
from mindspore.nn.cell import Cell
from mindspore.nn.layer.activation import get_activation
from mindspore.ops import operations as P
from cus_ops.cus_batch_matmul import CusBatchMatMul
from cus_ops.cus_cholesky_trsm import CusCholeskyTrsm
from cus_ops.cus_fused_abs_max1 import CusFusedAbsMax1
from cus_ops.cus_img2col import CusImg2Col
from cus_ops.cus_matmul_cube import CusMatMulCube
from cus_ops.cus_matrix_combine import CusMatrixCombine
from cus_ops.cus_cholesky_trsm import CusCholeskyTrsm
from cus_ops.cus_img2col import CusImg2Col
from cus_ops.cus_fused_abs_max1 import CusFusedAbsMax1
from cus_ops.cus_batch_matmul import CusBatchMatMul
from cus_ops.cus_transpose02314 import CusTranspose02314
C0 = 16
def caculate_device_shape(matrix_dim, channel, is_A):
if is_A:
if channel // C0 == 0:
@ -41,11 +43,13 @@ def caculate_device_shape(matrix_dim, channel, is_A):
return (int(matrix_dim // C0), int(matrix_dim // C0), C0, C0), int(matrix_dim)
else:
return (int(matrix_dim // C0), int(matrix_dim // C0), C0, C0), int(matrix_dim)
class _Conv(Cell):
r"""Applies a N-D convolution over an input signal composed of several input
planes.
"""
def __init__(self,
in_channels,
out_channels,
@ -73,23 +77,23 @@ class _Conv(Cell):
self.has_bias = has_bias
if not (isinstance(in_channels, int) and in_channels > 0):
raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op passed '
+str(in_channels)+ ', should be a int and greater than 0.')
+ str(in_channels) + ', should be a int and greater than 0.')
if (not isinstance(kernel_size, tuple)) or len(kernel_size) != 2 or \
(not isinstance(kernel_size[0], int)) or (not isinstance(kernel_size[1], int)) or \
(not isinstance(kernel_size[0], int)) or (not isinstance(kernel_size[1], int)) or \
kernel_size[0] < 1 or kernel_size[1] < 1:
raise ValueError('Attr \'kernel_size\' of \'Conv2D\' Op passed '
+str(self.kernel_size)+', should be a int or tuple and equal to or greater than 1.')
+ str(self.kernel_size) + ', should be a int or tuple and equal to or greater than 1.')
if in_channels % group != 0:
raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op must be divisible by '
'attr \'group\' of \'Conv2D\' Op.')
if out_channels % group != 0:
raise ValueError('Attr \'out_channels\' of \'Conv2D\' Op must be divisible by '
'attr \'group\' of \'Conv2D\' Op.')
self.weight = Parameter(initializer(
weight_init, [out_channels, in_channels // group, *kernel_size]),
name='weight')
name='weight')
if check_bool(has_bias):
self.bias = Parameter(_initializer(
bias_init, [out_channels]), name='bias')
@ -97,10 +101,11 @@ class _Conv(Cell):
if bias_init != 'zeros':
logger.warning("Value of 'has_bias' is False, value of 'bias_init' will be ignored.")
self.bias = None
def construct(self, *inputs):
raise NotImplementedError
class Conv2d_Thor(_Conv):
def __init__(self,
in_channels,
@ -120,7 +125,7 @@ class Conv2d_Thor(_Conv):
bias_init='zeros'):
self.thor = True
ksizes = (1, kernel_size, kernel_size, 1)
self.hw = kernel_size*kernel_size
self.hw = kernel_size * kernel_size
strides = (1, stride, stride, 1)
kernel_size = twice(kernel_size)
super(Conv2d_Thor, self).__init__(
@ -146,26 +151,37 @@ class Conv2d_Thor(_Conv):
dilation=self.dilation,
group=self.group
)
self.img2col = CusImg2Col(ksizes = ksizes, strides = strides)
self.img2col = CusImg2Col(ksizes=ksizes, strides=strides)
self.cube_matmul = CusMatMulCube(transpose_a=True)
self.matrix_combine = CusMatrixCombine()
self.cholesky = CusCholeskyTrsm()
self.transpose02314 = CusTranspose02314()
self.matrix_A_dim = self.in_channels*self.kernel_size[0]*self.kernel_size[1]
self.matrix_A_dim = self.in_channels * self.kernel_size[0] * self.kernel_size[1]
self.matrix_G_dim = self.out_channels
self.matrix_A_device_shape, self.matrix_A_device_dim = caculate_device_shape(self.matrix_A_dim, self.in_channels, True)
self.matrix_G_device_shape, self.matrix_G_device_dim = caculate_device_shape(self.matrix_G_dim, self.in_channels, False)
self.matrix_A_device_temp_shape = (self.matrix_A_device_shape[0], self.matrix_A_device_shape[2], self.matrix_A_device_shape[1], self.matrix_A_device_shape[3])
self.matrix_G_device_temp_shape = (self.matrix_G_device_shape[0], self.matrix_G_device_shape[2], self.matrix_G_device_shape[1], self.matrix_G_device_shape[3])
self.matrix_A_inv = Parameter(Tensor(np.reshape(np.identity(self.matrix_A_device_dim).astype(np.float16), self.matrix_A_device_shape)), name='matrix_A_inv', requires_grad=False)
self.matrix_A_device_shape, self.matrix_A_device_dim = caculate_device_shape(self.matrix_A_dim,
self.in_channels, True)
self.matrix_G_device_shape, self.matrix_G_device_dim = caculate_device_shape(self.matrix_G_dim,
self.in_channels, False)
self.matrix_A_device_temp_shape = (
self.matrix_A_device_shape[0], self.matrix_A_device_shape[2], self.matrix_A_device_shape[1],
self.matrix_A_device_shape[3])
self.matrix_G_device_temp_shape = (
self.matrix_G_device_shape[0], self.matrix_G_device_shape[2], self.matrix_G_device_shape[1],
self.matrix_G_device_shape[3])
self.matrix_A_inv = Parameter(
Tensor(np.reshape(np.identity(self.matrix_A_device_dim).astype(np.float16), self.matrix_A_device_shape)),
name='matrix_A_inv', requires_grad=False)
self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), name="A_inv_max", requires_grad=False)
self.matrix_G_inv = Parameter(Tensor(np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape)), name="matrix_G_inv", requires_grad=False)
self.matrix_G_inv = Parameter(
Tensor(np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape)),
name="matrix_G_inv", requires_grad=False)
self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), name="G_inv_max", requires_grad=False)
self.fake_G = Tensor(np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape))
self.fake_G_inv_max = Tensor(np.zeros([1,]).astype(np.float32))
self.fake_G = Tensor(
np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape))
self.fake_G_inv_max = Tensor(np.zeros([1, ]).astype(np.float32))
self.shape = P.Shape()
self.reshape = P.Reshape()
self.transpose = P.Transpose()
@ -178,9 +194,10 @@ class Conv2d_Thor(_Conv):
self.channels_slice_flag = False
if self.in_channels % C0 != 0:
self.channels_slice_flag = True
self.padA_flag = False
if (self.matrix_A_dim // self.diag_block_dim) * self.diag_block_dim != self.matrix_A_dim and self.matrix_A_dim > self.diag_block_dim:
if (
self.matrix_A_dim // self.diag_block_dim) * self.diag_block_dim != self.matrix_A_dim and self.matrix_A_dim > self.diag_block_dim:
self.padA_flag = True
pad_dim = self.diag_block_dim - self.matrix_A_dim % self.diag_block_dim
self.padA = P.Pad(((0, pad_dim), (0, pad_dim)))
@ -191,16 +208,16 @@ class Conv2d_Thor(_Conv):
self.slice = P.Slice()
self.gather = P.GatherV2()
self.freq = Tensor(frequency, mstype.int32)
self.loss_scale = Tensor(1/loss_scale, mstype.float16)
self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
self.axis = 0
dampingA_dim = self.matrix_A_dim
if (self.matrix_A_dim % self.diag_block_dim) != 0 and self.matrix_A_dim > self.diag_block_dim:
dampingA_dim = (self.matrix_A_dim // self.diag_block_dim + 1) * self.diag_block_dim
dampingG_dim = self.matrix_G_dim
if (self.matrix_G_dim % self.diag_block_dim) != 0 and self.matrix_G_dim > self.diag_block_dim:
dampingG_dim = (self.matrix_G_dim // self.diag_block_dim + 1) * self.diag_block_dim
self.dampingA = Tensor(np.identity(dampingA_dim), mstype.float32)
self.dampingG = Tensor(np.identity(dampingG_dim), mstype.float32)
self.fused_abs_max1 = CusFusedAbsMax1([self.matrix_A_dim, self.matrix_A_dim])
@ -211,50 +228,50 @@ class Conv2d_Thor(_Conv):
self.getG = P.InsertGradientOf(self.save_gradient)
def save_gradient(self, dout):
out = dout
dout = self.mul(dout, self.loss_scale)
dout = self.mul(dout, 32.0)
dout = self.transpose02314(dout)
dout_shape = self.shape(dout)
normalizer = dout_shape[0]
matrix_G = self.cube_matmul(dout, dout)
normalizer = self.cast(normalizer, ms.float32)
matrix_G = self.mul(matrix_G, 1.0/normalizer)
damping_step = self.gather(self.damping, self.cov_step, 0)
self.cov_step = self.cov_step + self.freq
damping_step = self.cast(damping_step, mstype.float32)
damping = self.mul(damping_step, 32.0/normalizer)
damping = self.sqrt(damping)
dampingG = self.cast(self.dampingG, mstype.float32)
matrix_G = matrix_G + damping * dampingG
matrix_G_inv = self.cholesky(matrix_G)
matrix_G_inv = self.vector_matmul(matrix_G_inv, matrix_G_inv)
matrix_G_inv_max = self.fused_abs_max2(matrix_G_inv)
matrix_G_inv_max = self.fused_abs_max2(matrix_G_inv_max)
self.G_inv_max = matrix_G_inv_max
matrix_G_inv = self.matrix_combine(matrix_G_inv)
matrix_G_inv_shape = self.shape(matrix_G_inv)
matrix_G_inv = self.reshape(matrix_G_inv, self.matrix_G_device_temp_shape)
matrix_G_inv = self.transpose(matrix_G_inv, (2, 0, 1, 3))
matrix_G = self.cast(matrix_G_inv, mstype.float16)
self.matrix_G_inv = matrix_G
return out
out = dout
dout = self.mul(dout, self.loss_scale)
dout = self.mul(dout, 32.0)
dout = self.transpose02314(dout)
dout_shape = self.shape(dout)
normalizer = dout_shape[0]
matrix_G = self.cube_matmul(dout, dout)
normalizer = self.cast(normalizer, ms.float32)
matrix_G = self.mul(matrix_G, 1.0 / normalizer)
damping_step = self.gather(self.damping, self.cov_step, 0)
self.cov_step = self.cov_step + self.freq
damping_step = self.cast(damping_step, mstype.float32)
damping = self.mul(damping_step, 32.0 / normalizer)
damping = self.sqrt(damping)
dampingG = self.cast(self.dampingG, mstype.float32)
matrix_G = matrix_G + damping * dampingG
matrix_G_inv = self.cholesky(matrix_G)
matrix_G_inv = self.vector_matmul(matrix_G_inv, matrix_G_inv)
matrix_G_inv_max = self.fused_abs_max2(matrix_G_inv)
matrix_G_inv_max = self.fused_abs_max2(matrix_G_inv_max)
self.G_inv_max = matrix_G_inv_max
matrix_G_inv = self.matrix_combine(matrix_G_inv)
matrix_G_inv_shape = self.shape(matrix_G_inv)
matrix_G_inv = self.reshape(matrix_G_inv, self.matrix_G_device_temp_shape)
matrix_G_inv = self.transpose(matrix_G_inv, (2, 0, 1, 3))
matrix_G = self.cast(matrix_G_inv, mstype.float16)
self.matrix_G_inv = matrix_G
return out
def construct(self, x):
if self.thor:
matrix_A = self.img2col(x)
matrix_A_shape = self.shape(matrix_A)
normalizer = matrix_A_shape[0]
matrix_A = self.cube_matmul(matrix_A, matrix_A)
if self.channels_slice_flag:
matrix_A = self.reshape(matrix_A, (self.hw, C0, self.hw, C0))
matrix_A = self.slice(matrix_A, (0, 0, 0, 0), (self.hw, self.in_channels, self.hw, self.in_channels))
matrix_A = self.reshape(matrix_A, (self.matrix_A_dim, self.matrix_A_dim))
normalizer = self.cast(normalizer, ms.float32)
matrix_A = self.mul(matrix_A, 1.0/normalizer)
matrix_A = self.mul(matrix_A, 1.0 / normalizer)
if self.padA_flag:
matrix_A = self.padA(matrix_A)
damping_step = self.gather(self.damping, self.cov_step, self.axis)
@ -273,7 +290,7 @@ class Conv2d_Thor(_Conv):
in_channels = self.in_channels
if self.padA_flag:
matrix_A_inv = self.slice(matrix_A_inv, (0, 0), (self.matrix_A_dim, self.matrix_A_dim))
if self.device_shape_pad_flag:
matrix_A_inv = self.reshape(matrix_A_inv, (self.hw, self.in_channels, self.hw, self.in_channels))
matrix_A_inv = self.device_shape_pad(matrix_A_inv)
@ -286,31 +303,32 @@ class Conv2d_Thor(_Conv):
out = self.getG(out)
else:
out = self.conv2d(x, self.weight)
return out
def extra_repr(self):
s = 'input_channels={}, output_channels={}, kernel_size={},' \
'stride={}, pad_mode={}, padding={}, dilation={}, ' \
'stride={}, pad_mode={}, padding={}, dilation={}, ' \
'group={}, data_format={}, has_bias={},' \
'weight_init={}, bias_init={}'.format(
self.in_channels,
self.out_channels,
self.kernel_size,
self.stride,
self.pad_mode,
self.padding,
self.dilation,
self.group,
self.data_format,
self.has_bias,
self.weight,
self.bias)
'weight_init={}, bias_init={}'.format(
self.in_channels,
self.out_channels,
self.kernel_size,
self.stride,
self.pad_mode,
self.padding,
self.dilation,
self.group,
self.data_format,
self.has_bias,
self.weight,
self.bias)
if self.has_bias:
s += ', bias={}'.format(self.bias)
return s
class Dense_Thor(Cell):
@cell_attr_register(attrs=['has_bias', 'activation'])
def __init__(self,
@ -330,30 +348,30 @@ class Dense_Thor(Cell):
self.thor = True
if isinstance(weight_init, Tensor):
if weight_init.dim() != 2 or weight_init.shape()[0] != out_channels or \
weight_init.shape()[1] != in_channels:
weight_init.shape()[1] != in_channels:
raise ValueError("weight_init shape error")
self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight")
if self.has_bias:
if isinstance(bias_init, Tensor):
if bias_init.dim() != 1 or bias_init.shape()[0] != out_channels:
raise ValueError("bias_init shape error")
self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias")
self.matmul = P.MatMul(transpose_b=True)
self.bias_add = P.BiasAdd()
self.activation = get_activation(activation)
self.activation_flag = self.activation is not None
self.matrix_A_inv = Parameter(Tensor(np.zeros([128, 128, 16, 16]).astype(np.float16)), name='matrix_A_inv',
requires_grad=False)
self.matrix_G_inv = Parameter(Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16)), name="matrix_G_inv",
requires_grad=False)
self.fake_G = Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16))
self.matmul = P.MatMul(transpose_b=True)
self.cube_matmul = CusMatMulCube(transpose_a=True)
self.matrix_combine = CusMatrixCombine()
@ -365,7 +383,7 @@ class Dense_Thor(Cell):
self.mul = P.Mul()
self.cast = P.Cast()
self.damping = Tensor(damping)
self.loss_scale = Tensor(1/loss_scale, mstype.float16)
self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
self.vector_matmul = CusBatchMatMul()
self.pad = P.Pad(((0, 24), (0, 24)))
self.pad1 = P.Pad(((0, 8), (0, 8)))
@ -415,14 +433,14 @@ class Dense_Thor(Cell):
matrix_G_inv = self.cast(matrix_G_inv, mstype.float16)
self.matrix_G_inv = matrix_G_inv
return out
def construct(self, x):
if self.thor:
inputs = self.cube_matmul(x, x)
normalizer = 32
normalizer = self.cast(normalizer, ms.float32)
matrix_A = self.mul(inputs, 1.0 / normalizer)
damping_step = self.gather(self.damping, self.cov_step, self.axis)
damping_step = self.cast(damping_step, mstype.float32)
damping = self.sqrt(damping_step)
@ -430,11 +448,11 @@ class Dense_Thor(Cell):
matrix_A = matrix_A + damping * dampingA
matrix_A_inv = self.cholesky(matrix_A)
matrix_A_inv = self.vector_matmul(matrix_A_inv, matrix_A_inv)
matrix_A_inv_max = self.fused_abs_max2(matrix_A_inv)
matrix_A_inv_max = self.fused_abs_max2(matrix_A_inv_max)
self.A_inv_max = matrix_A_inv_max
matrix_A_inv = self.matrix_combine(matrix_A_inv)
matrix_A_inv_shape = self.shape(matrix_A_inv)
matrix_A_inv = self.reshape(matrix_A_inv, (matrix_A_inv_shape[0] / 16, 16, matrix_A_inv_shape[0] / 16, 16))
@ -446,20 +464,20 @@ class Dense_Thor(Cell):
output = self.getG(output)
else:
output = self.matmul(x, self.weight)
if self.has_bias:
output = self.bias_add(output, self.bias)
if self.activation_flag:
return self.activation(output)
return output
def extend_repr(self):
str_info = 'in_channels={}, out_channels={}, weight={}, has_bias={}' \
.format(self.in_channels, self.out_channels, self.weight, self.has_bias)
.format(self.in_channels, self.out_channels, self.weight, self.has_bias)
if self.has_bias:
str_info = str_info + ', bias={}'.format(self.bias)
if self.activation_flag:
str_info = str_info + ', activation={}'.format(self.activation)
return str_info

View File

@ -13,62 +13,54 @@
# limitations under the License.
# ============================================================================
"""train_imagenet."""
import os
import argparse
import os
import random
import mindspore.dataset.engine as de
import numpy as np
from dataset_imagenet import create_dataset
from lr_generator import get_lr, warmup_cosine_annealing_lr
from config_imagenet import config
from mindspore import context
from mindspore import Tensor
from mindspore import context
from mindspore.communication.management import init
from mindspore.parallel._auto_parallel_context import auto_parallel_context
from mindspore.nn.optim.momentum import Momentum
from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
from mindspore.train.model import ParallelMode
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
from mindspore.train.loss_scale_manager import FixedLossScaleManager
import mindspore.dataset.engine as de
from mindspore.communication.management import init
import math
import mindspore.nn as nn
from crossentropy import CrossEntropy
from var_init import default_recurisive_init, KaimingNormal
from mindspore.common import initializer as weight_init
from second_order.thor import THOR
from mindspore.train.model import ParallelMode
from second_order.model_second_order import Model
from second_order.resnet import resnet50
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from second_order.thor import THOR
from config_imagenet import config
from crossentropy import CrossEntropy
from dataset_imagenet import create_dataset
from lr_generator import get_lr, warmup_cosine_annealing_lr
random.seed(1)
np.random.seed(1)
de.config.set_seed(1)
parser = argparse.ArgumentParser(description='Image classification')
parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute')
parser.add_argument('--device_num', type=int, default=1, help='Device num.')
parser.add_argument('--do_train', type=bool, default=True, help='Do train or not.')
parser.add_argument('--do_eval', type=bool, default=False, help='Do eval or not.')
parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
args_opt = parser.parse_args()
device_id = int(os.getenv('DEVICE_ID'))
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True, device_id=device_id)
context.set_context(enable_task_sink=True)
context.set_context(enable_loop_sink=True)
context.set_context(enable_mem_reuse=True)
def get_second_order_lr(global_step, lr_init, decay, total_epochs, steps_per_epoch):
lr_each_step = []
total_steps = steps_per_epoch * total_epochs
for i in range(total_steps):
epoch = (i+1)/steps_per_epoch
base = (1.0 - float(epoch)/total_epochs)**decay
epoch = (i + 1) / steps_per_epoch
base = (1.0 - float(epoch) / total_epochs) ** decay
lr = lr_init * base
lr_each_step.append(lr)
current_step = global_step
@ -77,11 +69,12 @@ def get_second_order_lr(global_step, lr_init, decay, total_epochs, steps_per_epo
learning_rate = lr_each_step[current_step:]
return learning_rate
def get_second_order_damping(global_step, damping_init, decay_rate, total_epochs, steps_per_epoch):
damping_each_step = []
total_steps = steps_per_epoch * total_epochs
for step in range(total_steps):
epoch = (step+1) / steps_per_epoch
epoch = (step + 1) / steps_per_epoch
damping = damping_init * (decay_rate ** (epoch / 10))
damping_each_step.append(damping)
@ -91,6 +84,7 @@ def get_second_order_damping(global_step, damping_init, decay_rate, total_epochs
print("damping_is=========", damping)
return damping
if __name__ == '__main__':
if args_opt.do_eval:
print("eval")
@ -104,7 +98,7 @@ if __name__ == '__main__':
init()
else:
print(" ")
epoch_size = config.epoch_size
damping = get_second_order_damping(0, 0.03, 0.87, 50, 5004)
net = resnet50(class_num=config.class_num, damping=damping, loss_scale=config.loss_scale,
@ -128,8 +122,8 @@ if __name__ == '__main__':
config.eta_min))
else:
lr = Tensor(get_lr(global_step=0, lr_init=config.lr_init, lr_end=config.lr_end, lr_max=config.lr_max,
warmup_epochs=config.warmup_epochs, total_epochs=epoch_size, steps_per_epoch=step_size,
lr_decay_mode='poly'))
warmup_epochs=config.warmup_epochs, total_epochs=epoch_size, steps_per_epoch=step_size,
lr_decay_mode='poly'))
opt = THOR(filter(lambda x: x.requires_grad, net.get_parameters()), lr,
config.momentum, damping, config.frequency,
filter(lambda x: 'matrix_A' in x.name, net.get_parameters()),
@ -137,8 +131,9 @@ if __name__ == '__main__':
filter(lambda x: 'spatial_norm' in x.name, net.get_parameters()),
config.weight_decay, config.loss_scale)
model = Model(net, loss_fn=loss, optimizer=opt, amp_level='O2', loss_scale_manager=loss_scale, keep_batchnorm_fp32=False, metrics={'acc'}, frequency=config.frequency)
model = Model(net, loss_fn=loss, optimizer=opt, amp_level='O2', loss_scale_manager=loss_scale,
keep_batchnorm_fp32=False, metrics={'acc'}, frequency=config.frequency)
time_cb = TimeMonitor(data_size=step_size)
loss_cb = LossMonitor()
cb = [time_cb, loss_cb]