forked from mindspore-Ecosystem/mindspore
change API set_strategy() to shard()
This commit is contained in:
parent
20991a0718
commit
a836d25c64
|
@ -567,8 +567,8 @@ class Conv2dTranspose(_Conv):
|
|||
else:
|
||||
self.padding_top, self.padding_bottom, self.padding_left, self.padding_right = self.padding
|
||||
|
||||
def set_strategy(self, strategy):
|
||||
self.conv2d_transpose.set_strategy(strategy)
|
||||
def shard(self, strategy):
|
||||
self.conv2d_transpose.shard(strategy)
|
||||
return self
|
||||
|
||||
def _deconv_output_length(self, input_length, filter_size, stride_size, dilation_size, padding):
|
||||
|
@ -744,8 +744,8 @@ class Conv1dTranspose(_Conv):
|
|||
self.expand_dims = P.ExpandDims()
|
||||
self.squeeze = P.Squeeze(2)
|
||||
|
||||
def set_strategy(self, strategy):
|
||||
self.conv2d_transpose.set_strategy(strategy)
|
||||
def shard(self, strategy):
|
||||
self.conv2d_transpose.shard(strategy)
|
||||
return self
|
||||
|
||||
def _deconv_output_length(self, input_length, filter_size, stride_size, dilation_size, padding):
|
||||
|
|
|
@ -174,17 +174,17 @@ class EmbeddingLookup(Cell):
|
|||
Validator.check_integer('manul shape dim', dim, 0, Rel.GT, self.cls_name)
|
||||
self.gatherv2.add_prim_attr("manual_split", manual_shapes)
|
||||
self.embeddinglookup.add_prim_attr("manual_split", manual_shapes)
|
||||
self.gatherv2.set_strategy(((get_group_size(), 1), (1, get_group_size())))
|
||||
self.embeddinglookup.set_strategy(((get_group_size(), 1), (1, get_group_size())))
|
||||
self.gatherv2.shard(((get_group_size(), 1), (1, get_group_size())))
|
||||
self.embeddinglookup.shard(((get_group_size(), 1), (1, get_group_size())))
|
||||
elif slice_mode == "table_row_slice" and is_auto_parallel:
|
||||
self.gatherv2.set_strategy(((get_group_size(), 1), (1, 1)))
|
||||
self.embeddinglookup.set_strategy(((get_group_size(), 1), (1, 1)))
|
||||
self.gatherv2.shard(((get_group_size(), 1), (1, 1)))
|
||||
self.embeddinglookup.shard(((get_group_size(), 1), (1, 1)))
|
||||
elif slice_mode == "table_column_slice" and is_auto_parallel:
|
||||
self.gatherv2.set_strategy(((1, get_group_size()), (1, 1)))
|
||||
self.embeddinglookup.set_strategy(((1, get_group_size()), (1, 1)))
|
||||
self.gatherv2.shard(((1, get_group_size()), (1, 1)))
|
||||
self.embeddinglookup.shard(((1, get_group_size()), (1, 1)))
|
||||
elif slice_mode == "batch_slice" and is_auto_parallel:
|
||||
self.gatherv2.set_strategy(((1, 1), (get_group_size(), 1)))
|
||||
self.embeddinglookup.set_strategy(((1, 1), (get_group_size(), 1)))
|
||||
self.gatherv2.shard(((1, 1), (get_group_size(), 1)))
|
||||
self.embeddinglookup.shard(((1, 1), (get_group_size(), 1)))
|
||||
else:
|
||||
if is_auto_parallel:
|
||||
raise ValueError("slice_mode should support mode in nn.EmbeddingLookup, but get "
|
||||
|
|
|
@ -112,12 +112,12 @@ class _BatchNorm(Cell):
|
|||
|
||||
data_parallel_strategy = ((1,), (1,))
|
||||
data_parallel_strategy_one = ((1,), ())
|
||||
self.sub_mean = P.Sub().set_strategy(data_parallel_strategy)
|
||||
self.sub_var = P.Sub().set_strategy(data_parallel_strategy)
|
||||
self.mul_mean = P.Mul().set_strategy(data_parallel_strategy_one)
|
||||
self.mul_var = P.Mul().set_strategy(data_parallel_strategy_one)
|
||||
self.assign_sub_mean = P.AssignSub().set_strategy(data_parallel_strategy)
|
||||
self.assign_sub_var = P.AssignSub().set_strategy(data_parallel_strategy)
|
||||
self.sub_mean = P.Sub().shard(data_parallel_strategy)
|
||||
self.sub_var = P.Sub().shard(data_parallel_strategy)
|
||||
self.mul_mean = P.Mul().shard(data_parallel_strategy_one)
|
||||
self.mul_var = P.Mul().shard(data_parallel_strategy_one)
|
||||
self.assign_sub_mean = P.AssignSub().shard(data_parallel_strategy)
|
||||
self.assign_sub_var = P.AssignSub().shard(data_parallel_strategy)
|
||||
|
||||
def _check_data_dim(self, x):
|
||||
raise NotImplementedError
|
||||
|
|
|
@ -102,7 +102,7 @@ class Primitive(Primitive_):
|
|||
self.add_attr(name, value)
|
||||
return self
|
||||
|
||||
def set_strategy(self, strategy):
|
||||
def shard(self, strategy):
|
||||
"""
|
||||
Add strategies to primitive attribute.
|
||||
|
||||
|
|
|
@ -198,14 +198,14 @@ class WideDeepModel(nn.Cell):
|
|||
self.concat = P.Concat(axis=1)
|
||||
self.cast = P.Cast()
|
||||
if is_auto_parallel and host_device_mix and not is_field_slice:
|
||||
self.dense_layer_1.dropout.dropout_do_mask.set_strategy(((1, get_group_size()),))
|
||||
self.dense_layer_1.dropout.dropout.set_strategy(((1, get_group_size()),))
|
||||
self.dense_layer_1.matmul.set_strategy(((1, get_group_size()), (get_group_size(), 1)))
|
||||
self.dense_layer_1.dropout.dropout_do_mask.shard(((1, get_group_size()),))
|
||||
self.dense_layer_1.dropout.dropout.shard(((1, get_group_size()),))
|
||||
self.dense_layer_1.matmul.shard(((1, get_group_size()), (get_group_size(), 1)))
|
||||
self.deep_embeddinglookup = nn.EmbeddingLookup(self.vocab_size, self.emb_dim,
|
||||
slice_mode=nn.EmbeddingLookup.TABLE_COLUMN_SLICE)
|
||||
self.wide_embeddinglookup = nn.EmbeddingLookup(self.vocab_size, 1,
|
||||
slice_mode=nn.EmbeddingLookup.TABLE_ROW_SLICE)
|
||||
self.deep_mul.set_strategy(((1, 1, get_group_size()), (1, 1, 1)))
|
||||
self.deep_mul.shard(((1, 1, get_group_size()), (1, 1, 1)))
|
||||
self.deep_reshape.add_prim_attr("skip_redistribution", True)
|
||||
self.reduce_sum.add_prim_attr("cross_batch", True)
|
||||
self.embedding_table = self.deep_embeddinglookup.embedding_table
|
||||
|
@ -217,12 +217,12 @@ class WideDeepModel(nn.Cell):
|
|||
self.wide_embeddinglookup = nn.EmbeddingLookup(self.vocab_size, 1,
|
||||
slice_mode=nn.EmbeddingLookup.FIELD_SLICE,
|
||||
manual_shapes=manual_shapes)
|
||||
self.deep_mul.set_strategy(((1, get_group_size(), 1), (1, get_group_size(), 1)))
|
||||
self.wide_mul.set_strategy(((1, get_group_size(), 1), (1, get_group_size(), 1)))
|
||||
self.reduce_sum.set_strategy(((1, get_group_size(), 1),))
|
||||
self.dense_layer_1.dropout.dropout_do_mask.set_strategy(((1, get_group_size()),))
|
||||
self.dense_layer_1.dropout.dropout.set_strategy(((1, get_group_size()),))
|
||||
self.dense_layer_1.matmul.set_strategy(((1, get_group_size()), (get_group_size(), 1)))
|
||||
self.deep_mul.shard(((1, get_group_size(), 1), (1, get_group_size(), 1)))
|
||||
self.wide_mul.shard(((1, get_group_size(), 1), (1, get_group_size(), 1)))
|
||||
self.reduce_sum.shard(((1, get_group_size(), 1),))
|
||||
self.dense_layer_1.dropout.dropout_do_mask.shard(((1, get_group_size()),))
|
||||
self.dense_layer_1.dropout.dropout.shard(((1, get_group_size()),))
|
||||
self.dense_layer_1.matmul.shard(((1, get_group_size()), (get_group_size(), 1)))
|
||||
self.embedding_table = self.deep_embeddinglookup.embedding_table
|
||||
elif parameter_server:
|
||||
self.deep_embeddinglookup = nn.EmbeddingLookup(self.vocab_size, self.emb_dim)
|
||||
|
|
|
@ -51,12 +51,12 @@ class Onehot(Cell):
|
|||
trans_stra = None
|
||||
if strategy:
|
||||
trans_stra = (strategy[0],)
|
||||
self.onehot = P.OneHot().set_strategy(strategy=strategy)
|
||||
self.onehot = P.OneHot().shard(strategy=strategy)
|
||||
self.depth = depth
|
||||
self.on_value = Tensor(on_value, ms.float32)
|
||||
self.off_value = Tensor(off_value, ms.float32)
|
||||
self.transpose = P.Transpose().set_strategy(strategy=trans_stra)
|
||||
self.sub = P.Sub().set_strategy(strategy=((1, 1), (1, 1)))
|
||||
self.transpose = P.Transpose().shard(strategy=trans_stra)
|
||||
self.sub = P.Sub().shard(strategy=((1, 1), (1, 1)))
|
||||
self.axis = axis
|
||||
|
||||
def construct(self, input_, indices):
|
||||
|
|
|
@ -140,20 +140,20 @@ class SoftmaxCrossEntropyExpand(Cell):
|
|||
if len(stra_list) < 11:
|
||||
stra_list = [None] * 11
|
||||
self.exp = P.Exp()
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=True).set_strategy(strategy=stra_list[1])
|
||||
self.onehot = P.OneHot().set_strategy(strategy=stra_list[2])
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=True).shard(strategy=stra_list[1])
|
||||
self.onehot = P.OneHot().shard(strategy=stra_list[2])
|
||||
self.on_value = Tensor(1.0, mstype.float32)
|
||||
self.off_value = Tensor(0.0, mstype.float32)
|
||||
self.div = P.Div().set_strategy(strategy=stra_list[3])
|
||||
self.log = P.Log().set_strategy(strategy=stra_list[4])
|
||||
self.sum_cross_entropy = P.ReduceSum(keep_dims=False).set_strategy(strategy=stra_list[5])
|
||||
self.mul = P.Mul().set_strategy(strategy=stra_list[6])
|
||||
self.mul2 = P.Mul().set_strategy(strategy=stra_list[7])
|
||||
self.div = P.Div().shard(strategy=stra_list[3])
|
||||
self.log = P.Log().shard(strategy=stra_list[4])
|
||||
self.sum_cross_entropy = P.ReduceSum(keep_dims=False).shard(strategy=stra_list[5])
|
||||
self.mul = P.Mul().shard(strategy=stra_list[6])
|
||||
self.mul2 = P.Mul().shard(strategy=stra_list[7])
|
||||
self.cast = P.Cast()
|
||||
self.reduce_mean = P.ReduceMean(keep_dims=False).set_strategy(strategy=stra_list[8])
|
||||
self.reduce_mean = P.ReduceMean(keep_dims=False).shard(strategy=stra_list[8])
|
||||
self.sparse = sparse
|
||||
self.reduce_max = P.ReduceMax(keep_dims=True).set_strategy(strategy=stra_list[9])
|
||||
self.sub = P.Sub().set_strategy(strategy=stra_list[10])
|
||||
self.reduce_max = P.ReduceMax(keep_dims=True).shard(strategy=stra_list[9])
|
||||
self.sub = P.Sub().shard(strategy=stra_list[10])
|
||||
|
||||
def construct(self, logit, label):
|
||||
logit_max = self.reduce_max(logit, -1)
|
||||
|
@ -174,7 +174,7 @@ class MatmulNet(Cell):
|
|||
super(MatmulNet, self).__init__()
|
||||
if loss_stra_list is None:
|
||||
loss_stra_list = []
|
||||
self.matmul = P.MatMul(transpose_b=True).set_strategy(strategy=matmul_stra)
|
||||
self.matmul = P.MatMul(transpose_b=True).shard(strategy=matmul_stra)
|
||||
self.loss = SoftmaxCrossEntropyExpand(sparse=True, stra_list=loss_stra_list)
|
||||
self.weight = Parameter(Tensor(np.ones(MatmulParamShape), dtype=ms.float32), name="weight")
|
||||
|
||||
|
|
|
@ -181,7 +181,7 @@ class WideDeepModel(nn.Cell):
|
|||
self.weight_bias_init,
|
||||
self.deep_layer_act, convert_dtype=True)
|
||||
|
||||
self.gather_v2 = P.GatherV2().set_strategy(((1, 8), (1, 1)))
|
||||
self.gather_v2 = P.GatherV2().shard(((1, 8), (1, 1)))
|
||||
self.gather_v2_1 = P.GatherV2()
|
||||
self.mul = P.Mul()
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False)
|
||||
|
@ -230,7 +230,7 @@ class NetWithLossClass(nn.Cell):
|
|||
self.network = network
|
||||
self.l2_coef = config.l2_coef
|
||||
self.loss = P.SigmoidCrossEntropyWithLogits()
|
||||
self.square = P.Square().set_strategy(((1, get_group_size()),))
|
||||
self.square = P.Square().shard(((1, get_group_size()),))
|
||||
self.reduceMean_false = P.ReduceMean(keep_dims=False)
|
||||
self.reduceSum_false = P.ReduceSum(keep_dims=False)
|
||||
|
||||
|
|
|
@ -273,8 +273,8 @@ class DepthwiseConv2dNative(_DepthwiseConv2dNative):
|
|||
dilation=self.dilation,
|
||||
group=self.group)
|
||||
|
||||
def set_strategy(self, strategy):
|
||||
self.depthwise_conv2d_native.set_strategy(strategy)
|
||||
def shard(self, strategy):
|
||||
self.depthwise_conv2d_native.shard(strategy)
|
||||
return self
|
||||
|
||||
def construct(self, x):
|
||||
|
|
|
@ -29,8 +29,8 @@ grad_all = C.GradOperation(get_all=True)
|
|||
class AddRelu(nn.Cell):
|
||||
def __init__(self, strategy0=None, strategy1=None):
|
||||
super(AddRelu, self).__init__()
|
||||
self.add = P.TensorAdd().set_strategy(strategy=strategy0)
|
||||
self.relu = P.ReLU().set_strategy(strategy=strategy1)
|
||||
self.add = P.TensorAdd().shard(strategy=strategy0)
|
||||
self.relu = P.ReLU().shard(strategy=strategy1)
|
||||
|
||||
def construct(self, x, z):
|
||||
out = self.add(x, z)
|
||||
|
|
|
@ -53,9 +53,9 @@ class Dataset(MindData):
|
|||
class AllToAllNet(nn.Cell):
|
||||
def __init__(self, strategy1):
|
||||
super(AllToAllNet, self).__init__()
|
||||
self.matmul = P.MatMul().set_strategy(((1, 1), (1, 8)))
|
||||
self.matmul = P.MatMul().shard(((1, 1), (1, 8)))
|
||||
self.matmul_weight = Parameter(Tensor(np.ones([128, 256]), dtype=ms.float32), name="weight")
|
||||
self.transpose1 = P.Transpose().set_strategy(strategy1)
|
||||
self.transpose1 = P.Transpose().shard(strategy1)
|
||||
|
||||
def construct(self, x):
|
||||
x = self.matmul(x, self.matmul_weight)
|
||||
|
@ -80,8 +80,8 @@ def all_to_all_common(strategy1):
|
|||
net = all_to_all_net(strategy1)
|
||||
|
||||
loss = SoftmaxCrossEntropyWithLogits(sparse=True)
|
||||
loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1)))
|
||||
loss.one_hot.set_strategy(((8, 1), (), ()))
|
||||
loss.softmax_cross_entropy.shard(((8, 1), (8, 1)))
|
||||
loss.one_hot.shard(((8, 1), (), ()))
|
||||
opt = Momentum(net.trainable_params(), learning_rate, momentum)
|
||||
model = Model(net, loss, opt)
|
||||
|
||||
|
|
|
@ -55,8 +55,8 @@ def test_matmul_sub():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.sub = P.Sub().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.sub = P.Sub().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -79,8 +79,8 @@ def test_matmul_add():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.add = P.TensorAdd().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.add = P.TensorAdd().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -103,8 +103,8 @@ def test_matmul_mul():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.mul = P.Mul().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.mul = P.Mul().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -126,8 +126,8 @@ def test_matmul_mod():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.mod = P.Mod().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.mod = P.Mod().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -149,8 +149,8 @@ def test_matmul_floormod():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.floormod = P.FloorMod().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.floormod = P.FloorMod().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -173,8 +173,8 @@ def test_matmul_atan2():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.atan2 = P.Atan2().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.atan2 = P.Atan2().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -197,8 +197,8 @@ def test_matmul_divNoNan():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.divNoNan = P.DivNoNan().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.divNoNan = P.DivNoNan().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -221,10 +221,10 @@ def test_matmul_logicaland():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.equal = P.Equal().set_strategy(strategy2)
|
||||
self.notequal = P.NotEqual().set_strategy(strategy2)
|
||||
self.logical = P.LogicalAnd().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.equal = P.Equal().shard(strategy2)
|
||||
self.notequal = P.NotEqual().shard(strategy2)
|
||||
self.logical = P.LogicalAnd().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -250,10 +250,10 @@ def test_matmul_logicalor():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.equal = P.Equal().set_strategy(strategy2)
|
||||
self.notequal = P.NotEqual().set_strategy(strategy2)
|
||||
self.logical = P.LogicalOr().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.equal = P.Equal().shard(strategy2)
|
||||
self.notequal = P.NotEqual().shard(strategy2)
|
||||
self.logical = P.LogicalOr().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -279,8 +279,8 @@ def test_matmul_div():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.div = P.Div().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.div = P.Div().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -303,8 +303,8 @@ def test_matmul_add_broadcast():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.add = P.TensorAdd().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.add = P.TensorAdd().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -327,8 +327,8 @@ def test_matmul_add_broadcast2():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.add = P.TensorAdd().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.add = P.TensorAdd().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -351,8 +351,8 @@ def test_matmul_sub_broadcast():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.sub = P.Sub().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.sub = P.Sub().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -375,8 +375,8 @@ def test_matmul_sub_broadcast2():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.sub = P.Sub().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.sub = P.Sub().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -399,8 +399,8 @@ def test_matmul_mul_broadcast():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.mul = P.Mul().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.mul = P.Mul().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -423,8 +423,8 @@ def test_matmul_mul_broadcast2():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.mul = P.Mul().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.mul = P.Mul().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -447,8 +447,8 @@ def test_matmul_div_broadcast():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.div = P.Div().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.div = P.Div().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -471,8 +471,8 @@ def test_matmul_div_broadcast2():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.div = P.Div().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.div = P.Div().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -495,8 +495,8 @@ def test_matmul_greater_broadcast():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.greater = P.Greater().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.greater = P.Greater().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -519,8 +519,8 @@ def test_matmul_greater_broadcast2():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.greater = P.Greater().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.greater = P.Greater().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -543,8 +543,8 @@ def test_matmul_floordiv():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.floordiv = P.FloorDiv().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.floordiv = P.FloorDiv().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -567,8 +567,8 @@ def test_matmul_floordiv_broadcast():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.floordiv = P.FloorDiv().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.floordiv = P.FloorDiv().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -591,8 +591,8 @@ def test_matmul_floordiv_broadcast2():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.floordiv = P.FloorDiv().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.floordiv = P.FloorDiv().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
|
|
@ -60,18 +60,18 @@ class Net(nn.Cell):
|
|||
super().__init__()
|
||||
self.query_w = Parameter(initializer(
|
||||
"normal", [8, 16], ms.float32), name='query')
|
||||
self.query = P.MatMul().set_strategy(strategy1)
|
||||
self.query = P.MatMul().shard(strategy1)
|
||||
|
||||
self.key_w = Parameter(initializer(
|
||||
"normal", [8, 16], ms.float32), name='key')
|
||||
self.key = P.MatMul().set_strategy(strategy2)
|
||||
self.key = P.MatMul().shard(strategy2)
|
||||
|
||||
self.value_w = Parameter(initializer(
|
||||
"normal", [8, 16], ms.float32), name='value')
|
||||
self.value = P.MatMul().set_strategy(strategy3)
|
||||
self.value = P.MatMul().shard(strategy3)
|
||||
|
||||
self.score = P.MatMul().set_strategy(strategy4)
|
||||
self.context = P.MatMul().set_strategy(strategy5)
|
||||
self.score = P.MatMul().shard(strategy4)
|
||||
self.context = P.MatMul().shard(strategy5)
|
||||
self.transpose1 = P.Transpose()
|
||||
self.transpose2 = P.Transpose()
|
||||
self.relu = P.ReLU()
|
||||
|
|
|
@ -24,8 +24,8 @@ from mindspore.ops import operations as P
|
|||
class Net(Cell):
|
||||
def __init__(self, mul_weight, strategy1=None, strategy2=None):
|
||||
super().__init__()
|
||||
self.mul = P.Mul().set_strategy(strategy1)
|
||||
self.sigmoid = P.Sigmoid().set_strategy(strategy2)
|
||||
self.mul = P.Mul().shard(strategy1)
|
||||
self.sigmoid = P.Sigmoid().shard(strategy2)
|
||||
self.mul_weight = Parameter(mul_weight, "w1")
|
||||
|
||||
def construct(self, x, b):
|
||||
|
|
|
@ -107,7 +107,7 @@ def test_auto_parallel_arithmetic_model():
|
|||
def __init__(self):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul()
|
||||
self.one_hot = P.OneHot().set_strategy(((1, 8), (), ()))
|
||||
self.one_hot = P.OneHot().shard(((1, 8), (), ()))
|
||||
self.on_value = Tensor(1.0, ms.float32)
|
||||
self.off_value = Tensor(0.0, ms.float32)
|
||||
self.matmul2 = P.MatMul()
|
||||
|
|
|
@ -53,7 +53,7 @@ def test_four_matmul_linear():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1):
|
||||
super().__init__()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.matmul2 = P.MatMul()
|
||||
self.matmul3 = P.MatMul()
|
||||
self.matmul4 = P.MatMul()
|
||||
|
|
|
@ -298,7 +298,7 @@ def test_reshape_auto_7():
|
|||
def __init__(self):
|
||||
super().__init__()
|
||||
self.reshape = P.Reshape()
|
||||
self.mul = P.Mul().set_strategy(((1, 2, 4), (2, 4)))
|
||||
self.mul = P.Mul().shard(((1, 2, 4), (2, 4)))
|
||||
self.mul_weight = Parameter(Tensor(np.ones([128, 96]), dtype=ms.float32), name="weight")
|
||||
|
||||
def construct(self, x):
|
||||
|
|
|
@ -53,7 +53,7 @@ def test_four_matmul_linear():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1):
|
||||
super().__init__()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.weight = Parameter(Tensor(np.ones([512, 256]).astype(np.float32) * 0.01), "w", requires_grad=True)
|
||||
self.matmul2 = P.MatMul()
|
||||
|
||||
|
|
|
@ -24,8 +24,8 @@ from mindspore.ops import operations as P
|
|||
class Net(Cell):
|
||||
def __init__(self, mul_weight, batch_matmul_weight, transpose_b=False, strategy1=None, strategy2=None):
|
||||
super().__init__()
|
||||
self.mul = P.Mul().set_strategy(strategy1)
|
||||
self.batch_matmul = P.BatchMatMul(transpose_b=transpose_b).set_strategy(strategy2)
|
||||
self.mul = P.Mul().shard(strategy1)
|
||||
self.batch_matmul = P.BatchMatMul(transpose_b=transpose_b).shard(strategy2)
|
||||
self.mul_weight = Parameter(mul_weight, "w1")
|
||||
self.batch_matmul_weight = Parameter(batch_matmul_weight, "w2")
|
||||
|
||||
|
|
|
@ -73,7 +73,7 @@ class NetConv(nn.Cell):
|
|||
has_bias,
|
||||
weight_init,
|
||||
bias_init)
|
||||
self.conv.conv2d.set_strategy(strategy)
|
||||
self.conv.conv2d.shard(strategy)
|
||||
|
||||
def construct(self, input_x):
|
||||
return self.conv(input_x)
|
||||
|
@ -84,9 +84,9 @@ def test_batch():
|
|||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.conv1 = NetConv(16, 8, (3, 3), bias_init='zeros', strategy=strategy1)
|
||||
self.mul1 = P.Mul().set_strategy(strategy2)
|
||||
self.mul1 = P.Mul().shard(strategy2)
|
||||
self.conv2 = NetConv(8, 64, (9, 9), bias_init='zeros', strategy=strategy1)
|
||||
self.mul2 = P.Mul().set_strategy(strategy3)
|
||||
self.mul2 = P.Mul().shard(strategy3)
|
||||
|
||||
def construct(self, x, w1, w2):
|
||||
out1 = self.conv1(x)
|
||||
|
|
|
@ -64,7 +64,7 @@ def conv7x7(in_channels, out_channels, stride=1, padding=0):
|
|||
conv = Conv2d(in_channels, out_channels,
|
||||
kernel_size=7, stride=stride, padding=padding, weight_init=weight, has_bias=False,
|
||||
pad_mode="same")
|
||||
conv.conv2d.set_strategy(strategy_weight)
|
||||
conv.conv2d.shard(strategy_weight)
|
||||
return conv
|
||||
|
||||
|
||||
|
@ -86,7 +86,7 @@ def bn_with_initialize(out_channels):
|
|||
gamma = weight_variable_1(shape)
|
||||
bn = BatchNorm2d(out_channels, momentum=0.1, eps=0.0001, gamma_init=gamma,
|
||||
beta_init=beta, moving_mean_init=mean, moving_var_init=var)
|
||||
bn.bn_train.set_strategy(strategy_bn)
|
||||
bn.bn_train.shard(strategy_bn)
|
||||
return bn
|
||||
|
||||
|
||||
|
@ -98,10 +98,10 @@ class ResNet(Cell):
|
|||
self.conv1 = conv7x7(3, 64, stride=2, padding=0)
|
||||
self.bn1 = bn_with_initialize(64)
|
||||
self.relu = ReLU()
|
||||
self.relu.relu.set_strategy(strategy_no_weight)
|
||||
self.relu.relu.shard(strategy_no_weight)
|
||||
self.maxpool = MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
|
||||
self.reshape = P.Reshape()
|
||||
self.matmul = P.MatMul().set_strategy(((8, 1), (1, 1)))
|
||||
self.matmul = P.MatMul().shard(((8, 1), (1, 1)))
|
||||
self.matmul_weight = Parameter(Tensor(np.ones([200704, num_classes]), dtype=ms.float32), name="weight")
|
||||
|
||||
def construct(self, x):
|
||||
|
@ -135,7 +135,7 @@ def test_batchnorm_batch_parallel():
|
|||
net = batchnorm_net(num_classes)
|
||||
|
||||
loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
|
||||
loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1)))
|
||||
loss.softmax_cross_entropy.shard(((dev_num, 1), (dev_num, 1)))
|
||||
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
|
||||
|
||||
model = Model(net, loss, opt)
|
||||
|
|
|
@ -51,13 +51,13 @@ def test_two_matmul_batchnorm_ex():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.norm = P.FusedBatchNormEx()
|
||||
self.gamma = Parameter(Tensor(np.ones([64]), dtype=ms.float32), name="gamma")
|
||||
self.beta = Parameter(Tensor(np.ones([64]), dtype=ms.float32), name="beta")
|
||||
self.mean = Parameter(Tensor(np.ones([64]), dtype=ms.float32), name="mean")
|
||||
self.var = Parameter(Tensor(np.ones([64]), dtype=ms.float32), name="var")
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul1(x, y)
|
||||
|
|
|
@ -70,7 +70,7 @@ class Net(nn.Cell):
|
|||
super().__init__()
|
||||
self.conv = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=1, stride=1, pad_mode='valid',
|
||||
has_bias=True, weight_init='ones', bias_init='ones')
|
||||
self.reduce_mean = P.ReduceMean(keep_dims=False).set_strategy(((1, 1, 1, 8),))
|
||||
self.reduce_mean = P.ReduceMean(keep_dims=False).shard(((1, 1, 1, 8),))
|
||||
self.flat = nn.Flatten()
|
||||
|
||||
def construct(self, inputs):
|
||||
|
|
|
@ -87,18 +87,18 @@ class FusedBatchNorm(nn.Cell):
|
|||
epsilon=self.eps)
|
||||
self.bn_infer = P.BatchNorm(is_training=False,
|
||||
epsilon=self.eps)
|
||||
self.sub_mean = P.Sub().set_strategy(((1), (1)))
|
||||
self.sub_var = P.Sub().set_strategy(((1), (1)))
|
||||
self.mul_mean = P.Mul().set_strategy(((1,), ()))
|
||||
self.mul_var = P.Mul().set_strategy(((1,), ()))
|
||||
self.assign_sub_mean = P.AssignSub().set_strategy(((1,), (1,)))
|
||||
self.assign_sub_var = P.AssignSub().set_strategy(((1), (1)))
|
||||
self.sub_mean2 = P.Sub().set_strategy(((1), (1)))
|
||||
self.sub_var2 = P.Sub().set_strategy(((1), (1)))
|
||||
self.sub_mean = P.Sub().shard(((1), (1)))
|
||||
self.sub_var = P.Sub().shard(((1), (1)))
|
||||
self.mul_mean = P.Mul().shard(((1,), ()))
|
||||
self.mul_var = P.Mul().shard(((1,), ()))
|
||||
self.assign_sub_mean = P.AssignSub().shard(((1,), (1,)))
|
||||
self.assign_sub_var = P.AssignSub().shard(((1), (1)))
|
||||
self.sub_mean2 = P.Sub().shard(((1), (1)))
|
||||
self.sub_var2 = P.Sub().shard(((1), (1)))
|
||||
|
||||
def set_strategy(self, strategy):
|
||||
self.bn_train.set_strategy(strategy)
|
||||
self.bn_infer.set_strategy(strategy)
|
||||
def shard(self, strategy):
|
||||
self.bn_train.shard(strategy)
|
||||
self.bn_infer.shard(strategy)
|
||||
|
||||
def _check_data_dim(self, x):
|
||||
raise NotImplementedError
|
||||
|
@ -173,7 +173,7 @@ class PReLU(nn.Cell):
|
|||
w = Tensor(w)
|
||||
self.w = Parameter(initializer(w, [channel,]), name='a')
|
||||
self.prelu = P.PReLU()
|
||||
self.relu = P.ReLU().set_strategy(((1)))
|
||||
self.relu = P.ReLU().shard(((1)))
|
||||
|
||||
def construct(self, x):
|
||||
self.w = self.relu(self.w)
|
||||
|
@ -210,7 +210,7 @@ def bn_common(parallel_mode, train_flag, strategy_loss=None):
|
|||
net = bn_net()
|
||||
|
||||
loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
|
||||
loss.softmax_cross_entropy.set_strategy(strategy_loss)
|
||||
loss.softmax_cross_entropy.shard(strategy_loss)
|
||||
opt = Momentum(net.trainable_params(), learning_rate, momentum, 0.0001, 1024 * rank_size)
|
||||
|
||||
if not train_flag:
|
||||
|
|
|
@ -52,8 +52,8 @@ class CommonNet(nn.Cell):
|
|||
def __init__(self):
|
||||
super(CommonNet, self).__init__()
|
||||
self.weight = Parameter(Tensor(np.ones([256, 64]), dtype=ms.float32), name="mul_weight")
|
||||
self.logicalnot = P.LogicalNot().set_strategy(((4, 2),))
|
||||
self.equal = P.Equal().set_strategy(((4, 2), (4, 2)))
|
||||
self.logicalnot = P.LogicalNot().shard(((4, 2),))
|
||||
self.equal = P.Equal().shard(((4, 2), (4, 2)))
|
||||
|
||||
def construct(self, x, label):
|
||||
x = self.equal(x, self.weight)
|
||||
|
|
|
@ -56,8 +56,8 @@ def test_matmul_equal():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.equal = P.Equal().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.equal = P.Equal().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -79,8 +79,8 @@ def test_matmul_not_equal():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.notequal = P.NotEqual().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.notequal = P.NotEqual().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -102,8 +102,8 @@ def test_matmul_approximateEqual():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.approximateEqual = P.ApproximateEqual(tolerance=0.5).set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.approximateEqual = P.ApproximateEqual(tolerance=0.5).shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -126,8 +126,8 @@ def test_matmul_greater():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.greater = P.Greater().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.greater = P.Greater().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -150,8 +150,8 @@ def test_matmul_greaterEqual():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.greaterEqual = P.GreaterEqual().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.greaterEqual = P.GreaterEqual().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -174,8 +174,8 @@ def test_matmul_less():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.less = P.Less().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.less = P.Less().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -198,8 +198,8 @@ def test_matmul_lessEqual():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.lessEqual = P.LessEqual().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.lessEqual = P.LessEqual().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -222,8 +222,8 @@ def test_matmul_not_equal_repeated_calculation():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.notequal = P.NotEqual().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.notequal = P.NotEqual().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -245,8 +245,8 @@ def test_matmul_maximum():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.maximum = P.Maximum().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.maximum = P.Maximum().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -268,8 +268,8 @@ def test_matmul_maximum_broadcast():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.maximum = P.Maximum().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.maximum = P.Maximum().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -291,8 +291,8 @@ def test_matmul_maximum_broadcast2():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.maximum = P.Maximum().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.maximum = P.Maximum().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -314,8 +314,8 @@ def test_matmul_minimum():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.minimum = P.Minimum().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.minimum = P.Minimum().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -337,8 +337,8 @@ def test_matmul_minimum_broadcast():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.minimum = P.Maximum().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.minimum = P.Maximum().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -360,8 +360,8 @@ def test_matmul_minimum_broadcast2():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.minimum = P.Minimum().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.minimum = P.Minimum().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
|
|
@ -23,12 +23,12 @@ from mindspore.ops import operations as P
|
|||
class Net(Cell):
|
||||
def __init__(self, weight, weight2, strategy1=None, strategy2=None, is_parameter=True):
|
||||
super().__init__()
|
||||
self.concat = P.Concat(axis=0).set_strategy(strategy1)
|
||||
self.concat = P.Concat(axis=0).shard(strategy1)
|
||||
if is_parameter:
|
||||
self.weight = Parameter(weight, "w1")
|
||||
else:
|
||||
self.weight = weight
|
||||
self.mul = P.Mul().set_strategy(strategy2)
|
||||
self.mul = P.Mul().shard(strategy2)
|
||||
self.weight2 = Parameter(weight2, "w2")
|
||||
|
||||
def construct(self, x, b):
|
||||
|
@ -40,8 +40,8 @@ class Net(Cell):
|
|||
class Net2(Cell):
|
||||
def __init__(self, weight, strategy1=None, strategy2=None, axis=0):
|
||||
super().__init__()
|
||||
self.mul = P.Mul().set_strategy(strategy1)
|
||||
self.concat = P.Concat(axis=axis).set_strategy(strategy2)
|
||||
self.mul = P.Mul().shard(strategy1)
|
||||
self.concat = P.Concat(axis=axis).shard(strategy2)
|
||||
self.weight = Parameter(weight, "w")
|
||||
|
||||
def construct(self, x, b):
|
||||
|
@ -53,12 +53,12 @@ class Net2(Cell):
|
|||
class Net3(Cell):
|
||||
def __init__(self, weight, weight2, weight3, strategy1=None, strategy2=None, is_parameter=True):
|
||||
super().__init__()
|
||||
self.concat = P.Concat(axis=0).set_strategy(strategy1)
|
||||
self.concat = P.Concat(axis=0).shard(strategy1)
|
||||
if is_parameter:
|
||||
self.weight = Parameter(weight, "w1")
|
||||
else:
|
||||
self.weight = weight
|
||||
self.mul = P.Mul().set_strategy(strategy2)
|
||||
self.mul = P.Mul().shard(strategy2)
|
||||
self.weight2 = Parameter(weight2, "w2")
|
||||
self.weight3 = Parameter(weight3, "w3")
|
||||
|
||||
|
|
|
@ -54,9 +54,9 @@ class Dataset(MindData):
|
|||
class AllToAllNet(nn.Cell):
|
||||
def __init__(self, strategy1):
|
||||
super(AllToAllNet, self).__init__()
|
||||
self.matmul = P.MatMul().set_strategy(((1, 1), (1, 8)))
|
||||
self.matmul = P.MatMul().shard(((1, 1), (1, 8)))
|
||||
self.matmul_weight = Parameter(Tensor(np.ones([128, 256]), dtype=ms.float32), name="weight")
|
||||
self.transpose1 = P.Transpose().set_strategy(strategy1)
|
||||
self.transpose1 = P.Transpose().shard(strategy1)
|
||||
|
||||
def construct(self, x):
|
||||
x = self.matmul(x, self.matmul_weight)
|
||||
|
@ -81,7 +81,7 @@ def loss_scale_manager_common(strategy1):
|
|||
net = all_to_all_net(strategy1)
|
||||
|
||||
loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
|
||||
loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1)))
|
||||
loss.softmax_cross_entropy.shard(((8, 1), (8, 1)))
|
||||
opt = Momentum(net.trainable_params(), learning_rate, momentum)
|
||||
scale_manager = DynamicLossScaleManager(32, 2, 2000)
|
||||
model = Model(net, loss, opt, loss_scale_manager=scale_manager)
|
||||
|
@ -154,9 +154,9 @@ def test_input_not_in_parameter_layotu_dict():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1):
|
||||
super(Net, self).__init__()
|
||||
self.matmul = P.MatMul().set_strategy(((1, 1), (1, 8)))
|
||||
self.matmul = P.MatMul().shard(((1, 1), (1, 8)))
|
||||
self.matmul_weight = Parameter(Tensor(np.ones([128, 256]), dtype=ms.float32), name="weight")
|
||||
self.transpose1 = P.Transpose().set_strategy(strategy1)
|
||||
self.transpose1 = P.Transpose().shard(strategy1)
|
||||
|
||||
def construct(self, x):
|
||||
x = self.matmul(x, self.matmul_weight)
|
||||
|
|
|
@ -44,8 +44,8 @@ def test_sum_as_loss_float16():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy0, strategy1):
|
||||
super().__init__()
|
||||
self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1)
|
||||
self.fc_nobias = P.MatMul(transpose_b=True).shard(strategy0)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy1)
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.fc_nobias(x, y)
|
||||
|
@ -67,8 +67,8 @@ def test_sum_as_loss_float32():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy0, strategy1):
|
||||
super().__init__()
|
||||
self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1)
|
||||
self.fc_nobias = P.MatMul(transpose_b=True).shard(strategy0)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy1)
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.fc_nobias(x, y)
|
||||
|
@ -90,8 +90,8 @@ def test_sum_as_loss_int32():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy0, strategy1):
|
||||
super().__init__()
|
||||
self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1)
|
||||
self.fc_nobias = P.MatMul(transpose_b=True).shard(strategy0)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy1)
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.fc_nobias(x, y)
|
||||
|
|
|
@ -24,9 +24,9 @@ from mindspore.ops import operations as P
|
|||
class Net(Cell):
|
||||
def __init__(self, mul_weight, strategy1=None, strategy2=None):
|
||||
super().__init__()
|
||||
self.mul = P.Mul().set_strategy(strategy1)
|
||||
self.mul2 = P.Mul().set_strategy(strategy1)
|
||||
self.dropout_do_mask = P.DropoutDoMask().set_strategy(strategy2)
|
||||
self.mul = P.Mul().shard(strategy1)
|
||||
self.mul2 = P.Mul().shard(strategy1)
|
||||
self.dropout_do_mask = P.DropoutDoMask().shard(strategy2)
|
||||
self.dropout_gen_mask = P.DropoutGenMask()
|
||||
self.get_shape = P.Shape()
|
||||
self.cast = P.Cast()
|
||||
|
|
|
@ -56,9 +56,9 @@ def test_matmul_pow():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.pow = P.Pow().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.pow = P.Pow().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -82,9 +82,9 @@ def test_matmul_exp():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.exp = P.Exp().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.exp = P.Exp().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -108,9 +108,9 @@ def test_matmul_log():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.log = P.Log().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.log = P.Log().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -133,9 +133,9 @@ def test_matmul_abs():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.abs = P.Abs().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.abs = P.Abs().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -158,9 +158,9 @@ def test_matmul_sign():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.sign = P.Sign().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.sign = P.Sign().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -184,9 +184,9 @@ def test_matmul_floor():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.floor = P.Floor().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.floor = P.Floor().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -209,9 +209,9 @@ def test_matmul_round():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.round = P.Round().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.round = P.Round().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -235,9 +235,9 @@ def test_matmul_reciprocal():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.reciprocal = P.Reciprocal().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.reciprocal = P.Reciprocal().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -261,9 +261,9 @@ def test_matmul_inv():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.inv = P.Inv().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.inv = P.Inv().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -287,9 +287,9 @@ def test_matmul_rsqrt():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.rsqrt = P.Rsqrt().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.rsqrt = P.Rsqrt().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -313,9 +313,9 @@ def test_matmul_tan():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.tan = P.Tan().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.tan = P.Tan().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -339,9 +339,9 @@ def test_matmul_sin():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.sin = P.Sin().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.sin = P.Sin().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -365,9 +365,9 @@ def test_matmul_sinh():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.sinh = P.Sinh().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.sinh = P.Sinh().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -391,9 +391,9 @@ def test_matmul_log1p():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.log1p = P.Log1p().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.log1p = P.Log1p().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -417,9 +417,9 @@ def test_matmul_expm1():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.expm1 = P.Expm1().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.expm1 = P.Expm1().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -443,9 +443,9 @@ def test_matmul_cosh():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.cosh = P.Cosh().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.cosh = P.Cosh().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -468,9 +468,9 @@ def test_matmul_erf():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.erf = P.Erf().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.erf = P.Erf().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -494,9 +494,9 @@ def test_matmul_erfc():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.erfc = P.Erfc().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.erfc = P.Erfc().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -520,9 +520,9 @@ def test_matmul_zeroslike():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.zeroslike = P.ZerosLike().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.zeroslike = P.ZerosLike().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -546,9 +546,9 @@ def test_matmul_oneslike():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.oneslike = P.OnesLike().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.oneslike = P.OnesLike().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -572,9 +572,9 @@ def test_matmul_BesselI0e():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.BesselI0e = P.BesselI0e().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.BesselI0e = P.BesselI0e().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -598,9 +598,9 @@ def test_matmul_BesselI1e():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.BesselI1e = P.BesselI1e().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.BesselI1e = P.BesselI1e().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -624,9 +624,9 @@ def test_matmul_ceil():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.Ceil = P.Ceil().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.Ceil = P.Ceil().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -650,9 +650,9 @@ def test_matmul_atan():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.atan = P.Atan().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.atan = P.Atan().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -676,9 +676,9 @@ def test_matmul_Atanh():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.atanh = P.Atanh().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.atanh = P.Atanh().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -702,9 +702,9 @@ def test_matmul_asin():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.asin = P.Asin().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.asin = P.Asin().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -728,9 +728,9 @@ def test_matmul_asinh():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.asinh = P.Asinh().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.asinh = P.Asinh().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -754,9 +754,9 @@ def test_matmul_acosh():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.acosh = P.Acosh().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.acosh = P.Acosh().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -780,9 +780,9 @@ def test_matmul_logical_not():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.logicalnot = P.LogicalNot().set_strategy(strategy2)
|
||||
self.equal = P.Equal().set_strategy(strategy3)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.logicalnot = P.LogicalNot().shard(strategy2)
|
||||
self.equal = P.Equal().shard(strategy3)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -807,9 +807,9 @@ def test_matmul_cast():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.cast = P.Cast().set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy3)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.cast = P.Cast().shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy3)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -834,7 +834,7 @@ def test_gradient_fp32_sync():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.cast = P.Cast()
|
||||
|
||||
def construct(self, x, y, b):
|
||||
|
@ -858,7 +858,7 @@ def test_gradient_fp32_sync1():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.cast = P.Cast()
|
||||
|
||||
def construct(self, x, y, b):
|
||||
|
@ -882,7 +882,7 @@ def test_gradient_fp32_sync2():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.cast = P.Cast()
|
||||
|
||||
def construct(self, x, y, b):
|
||||
|
@ -906,7 +906,7 @@ def test_gradient_fp32_sync3():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.cast = P.Cast()
|
||||
|
||||
def construct(self, x, y, b):
|
||||
|
@ -930,10 +930,10 @@ def test_mul_two_cast():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.mul = P.Mul().set_strategy(strategy1)
|
||||
self.mul2 = P.Mul().set_strategy(strategy2)
|
||||
self.cast = P.Cast().set_strategy(strategy3)
|
||||
self.cast2 = P.Cast().set_strategy(strategy3)
|
||||
self.mul = P.Mul().shard(strategy1)
|
||||
self.mul2 = P.Mul().shard(strategy2)
|
||||
self.cast = P.Cast().shard(strategy3)
|
||||
self.cast2 = P.Cast().shard(strategy3)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.mul(x, y)
|
||||
|
|
|
@ -49,8 +49,8 @@ class Net(nn.Cell):
|
|||
super().__init__()
|
||||
self.index = Tensor(np.ones(shape), dtype=ms.int32)
|
||||
self.offset = offset
|
||||
self.elu = P.EmbeddingLookup().set_strategy(strategy1).add_prim_attr("primitive_target", target)
|
||||
self.mm = P.BatchMatMul().set_strategy(strategy2)
|
||||
self.elu = P.EmbeddingLookup().shard(strategy1).add_prim_attr("primitive_target", target)
|
||||
self.mm = P.BatchMatMul().shard(strategy2)
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.elu(x, self.index, self.offset)
|
||||
|
|
|
@ -24,9 +24,9 @@ from mindspore.ops import operations as P
|
|||
class Net(Cell):
|
||||
def __init__(self, mul_weight, strategy1=None, strategy2=None, strategy3=None):
|
||||
super().__init__()
|
||||
self.mul = P.Mul().set_strategy(strategy1)
|
||||
self.expand_dims = P.ExpandDims().set_strategy(strategy2)
|
||||
self.mul2 = P.Mul().set_strategy(strategy3)
|
||||
self.mul = P.Mul().shard(strategy1)
|
||||
self.expand_dims = P.ExpandDims().shard(strategy2)
|
||||
self.mul2 = P.Mul().shard(strategy3)
|
||||
self.mul_weight = Parameter(mul_weight, "w1")
|
||||
|
||||
def construct(self, x, b):
|
||||
|
@ -39,8 +39,8 @@ class Net(Cell):
|
|||
class Net2(Cell):
|
||||
def __init__(self, mul_weight, strategy1=None, strategy2=None):
|
||||
super().__init__()
|
||||
self.expand_dims = P.ExpandDims().set_strategy(strategy1)
|
||||
self.mul = P.Mul().set_strategy(strategy2)
|
||||
self.expand_dims = P.ExpandDims().shard(strategy1)
|
||||
self.mul = P.Mul().shard(strategy2)
|
||||
self.mul_weight = Parameter(mul_weight, "w1")
|
||||
|
||||
def construct(self, x, b):
|
||||
|
|
|
@ -24,8 +24,8 @@ from mindspore.ops import operations as P
|
|||
class Net(Cell):
|
||||
def __init__(self, mul_weight, strategy1=None, strategy2=None):
|
||||
super().__init__()
|
||||
self.mul = P.Mul().set_strategy(strategy1)
|
||||
self.neg = P.Neg().set_strategy(strategy2)
|
||||
self.mul = P.Mul().shard(strategy1)
|
||||
self.neg = P.Neg().shard(strategy2)
|
||||
self.mul_weight = Parameter(mul_weight, "w1")
|
||||
|
||||
def construct(self, x, b):
|
||||
|
|
|
@ -51,9 +51,9 @@ class Dataset(MindData):
|
|||
class AllToAllNet(nn.Cell):
|
||||
def __init__(self, strategy1):
|
||||
super(AllToAllNet, self).__init__()
|
||||
self.matmul = P.MatMul().set_strategy(((1, 1), (1, 8)))
|
||||
self.matmul = P.MatMul().shard(((1, 1), (1, 8)))
|
||||
self.matmul_weight = Parameter(Tensor(np.ones([128, 256]), dtype=ms.float32), name="weight")
|
||||
self.transpose1 = P.Transpose().set_strategy(strategy1)
|
||||
self.transpose1 = P.Transpose().shard(strategy1)
|
||||
|
||||
def construct(self, x):
|
||||
x = self.matmul(x, self.matmul_weight)
|
||||
|
@ -77,8 +77,8 @@ def all_to_all_common(strategy1):
|
|||
net = all_to_all_net(strategy1)
|
||||
|
||||
loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
|
||||
loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1)))
|
||||
loss.one_hot.set_strategy(((8, 1), (), ()))
|
||||
loss.softmax_cross_entropy.shard(((8, 1), (8, 1)))
|
||||
loss.one_hot.shard(((8, 1), (), ()))
|
||||
opt = Momentum(net.trainable_params(), learning_rate, momentum)
|
||||
model = Model(net, loss, opt)
|
||||
|
||||
|
|
|
@ -51,8 +51,8 @@ class Net(nn.Cell):
|
|||
super().__init__()
|
||||
if shape is None:
|
||||
shape = [64, 64]
|
||||
self.gatherv2 = P.GatherV2().set_strategy(strategy1).add_prim_attr("primitive_target", target)
|
||||
self.mul = P.Mul().set_strategy(strategy2)
|
||||
self.gatherv2 = P.GatherV2().shard(strategy1).add_prim_attr("primitive_target", target)
|
||||
self.mul = P.Mul().shard(strategy2)
|
||||
self.index = Tensor(np.ones(shape), dtype=ms.int32)
|
||||
self.axis = axis
|
||||
|
||||
|
|
|
@ -79,7 +79,7 @@ class GatherV2(_Loss):
|
|||
emb2_list = np.reshape(emb_list[1::2], (int(index_size / 2), 16))
|
||||
self.emb1_param = Tensor(emb1_list, dtype=mstype.int32)
|
||||
self.emb2_param = Tensor(emb2_list, dtype=mstype.int32)
|
||||
self.gatherv2 = P.GatherV2().set_strategy(strategy).add_prim_attr("data_parallel", True)
|
||||
self.gatherv2 = P.GatherV2().shard(strategy).add_prim_attr("data_parallel", True)
|
||||
|
||||
def construct(self, nembeddings):
|
||||
emb1 = self.gatherv2(nembeddings, self.emb1_param, 0)
|
||||
|
@ -208,7 +208,7 @@ class GatherV2Axis1(_Loss):
|
|||
emb2_list = np.reshape(emb_list[1::2], (int(index_size / 2), index_size))
|
||||
self.emb1_param = Tensor(emb1_list, dtype=mstype.int32)
|
||||
self.emb2_param = Tensor(emb2_list, dtype=mstype.int32)
|
||||
self.gatherv2 = P.GatherV2().set_strategy(strategy)
|
||||
self.gatherv2 = P.GatherV2().shard(strategy)
|
||||
|
||||
def construct(self, nembeddings):
|
||||
emb1 = self.gatherv2(nembeddings, self.emb1_param, 1)
|
||||
|
|
|
@ -32,10 +32,10 @@ class NetWithLoss(nn.Cell):
|
|||
def __init__(self, network, types, shapes, output_num, strategy3=None, strategy4=None, axis=-1):
|
||||
super(NetWithLoss, self).__init__()
|
||||
self.get_next = P.GetNext(types, shapes, output_num, "")
|
||||
self.one_hot = P.OneHot(axis=axis).set_strategy(strategy3)
|
||||
self.one_hot = P.OneHot(axis=axis).shard(strategy3)
|
||||
self.on_value = Tensor(1.0, ms.float32)
|
||||
self.off_value = Tensor(0.0, ms.float32)
|
||||
self.loss = P.SoftmaxCrossEntropyWithLogits().set_strategy(strategy4)
|
||||
self.loss = P.SoftmaxCrossEntropyWithLogits().shard(strategy4)
|
||||
self.network = network
|
||||
|
||||
def construct(self):
|
||||
|
@ -81,8 +81,8 @@ def test_get_next_semi_auto_parallel():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, channel=1, w=0.25, strategy1=None, strategy2=None):
|
||||
super().__init__()
|
||||
self.norm = P.L2Normalize().set_strategy(strategy1)
|
||||
self.prelu = P.PReLU().set_strategy(strategy2)
|
||||
self.norm = P.L2Normalize().shard(strategy1)
|
||||
self.prelu = P.PReLU().shard(strategy2)
|
||||
self.w = Parameter(initializer(w, [channel,]), name='w')
|
||||
|
||||
def construct(self, data):
|
||||
|
@ -105,8 +105,8 @@ def test_get_next_semi_auto_parallel1():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, channel=1, w=0.25, strategy1=None, strategy2=None):
|
||||
super().__init__()
|
||||
self.norm = P.L2Normalize().set_strategy(strategy1)
|
||||
self.prelu = P.PReLU().set_strategy(strategy2)
|
||||
self.norm = P.L2Normalize().shard(strategy1)
|
||||
self.prelu = P.PReLU().shard(strategy2)
|
||||
self.w = Parameter(initializer(w, [channel,]), name='w')
|
||||
|
||||
def construct(self, data):
|
||||
|
@ -129,8 +129,8 @@ def test_get_next_auto_parallel():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, channel=1, w=0.25, strategy1=None, strategy2=None):
|
||||
super().__init__()
|
||||
self.norm = P.L2Normalize().set_strategy(strategy1)
|
||||
self.prelu = P.PReLU().set_strategy(strategy2)
|
||||
self.norm = P.L2Normalize().shard(strategy1)
|
||||
self.prelu = P.PReLU().shard(strategy2)
|
||||
self.w = Parameter(initializer(w, [channel,]), name='w')
|
||||
|
||||
def construct(self, data):
|
||||
|
|
|
@ -27,8 +27,8 @@ def test_get_parameter_layout():
|
|||
def __init__(self, strategy1, strategy2, weight):
|
||||
super().__init__()
|
||||
self.weight = Parameter(weight, "w1")
|
||||
self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1)
|
||||
self.relu = P.ReLU().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul(transpose_a=False, transpose_b=True).shard(strategy1)
|
||||
self.relu = P.ReLU().shard(strategy2)
|
||||
|
||||
def construct(self, x):
|
||||
out = self.matmul(x, self.weight)
|
||||
|
|
|
@ -49,8 +49,8 @@ class GradWrap(nn.Cell):
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1=None, strategy2=None):
|
||||
super().__init__()
|
||||
self.dropout = P.Dropout(keep_prob=0.6).set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().set_strategy(strategy2)
|
||||
self.dropout = P.Dropout(keep_prob=0.6).shard(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy2)
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.matmul(x, y)
|
||||
|
|
|
@ -56,9 +56,9 @@ def test_matmul_tanh():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy2)
|
||||
self.tanh = P.Tanh().set_strategy(strategy3)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.matmul2 = P.MatMul().shard(strategy2)
|
||||
self.tanh = P.Tanh().shard(strategy3)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.tanh(self.matmul1(x, y))
|
||||
|
@ -82,9 +82,9 @@ def test_matmul_activation():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy2)
|
||||
self.activation = P.ReLU().set_strategy(strategy3)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.matmul2 = P.MatMul().shard(strategy2)
|
||||
self.activation = P.ReLU().shard(strategy3)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.activation(self.matmul1(x, y))
|
||||
|
@ -108,9 +108,9 @@ def test_matmul_softmax():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy2)
|
||||
self.softmax = P.Softmax().set_strategy(strategy3)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.matmul2 = P.MatMul().shard(strategy2)
|
||||
self.softmax = P.Softmax().shard(strategy3)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.softmax(self.matmul1(x, y))
|
||||
|
@ -134,9 +134,9 @@ def test_matmul_logsoftmax():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy2)
|
||||
self.logsoftmax = P.LogSoftmax().set_strategy(strategy3)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.matmul2 = P.MatMul().shard(strategy2)
|
||||
self.logsoftmax = P.LogSoftmax().shard(strategy3)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.logsoftmax(self.matmul1(x, y))
|
||||
|
@ -160,12 +160,12 @@ def test_activations():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy2)
|
||||
self.gelu = P.Gelu().set_strategy(strategy3)
|
||||
self.tanh = P.Tanh().set_strategy(strategy3)
|
||||
self.softmax = P.Softmax().set_strategy(strategy3)
|
||||
self.logsoftmax = P.LogSoftmax().set_strategy(strategy3)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.matmul2 = P.MatMul().shard(strategy2)
|
||||
self.gelu = P.Gelu().shard(strategy3)
|
||||
self.tanh = P.Tanh().shard(strategy3)
|
||||
self.softmax = P.Softmax().shard(strategy3)
|
||||
self.logsoftmax = P.LogSoftmax().shard(strategy3)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.gelu(self.tanh(self.matmul1(x, y)))
|
||||
|
@ -189,12 +189,12 @@ def test_activations_repeated_calculation():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3, strategy4, strategy5, strategy6):
|
||||
super().__init__()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy2)
|
||||
self.gelu = P.Gelu().set_strategy(strategy3)
|
||||
self.tanh = P.Tanh().set_strategy(strategy4)
|
||||
self.softmax = P.Softmax().set_strategy(strategy5)
|
||||
self.logsoftmax = P.LogSoftmax().set_strategy(strategy6)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.matmul2 = P.MatMul().shard(strategy2)
|
||||
self.gelu = P.Gelu().shard(strategy3)
|
||||
self.tanh = P.Tanh().shard(strategy4)
|
||||
self.softmax = P.Softmax().shard(strategy5)
|
||||
self.logsoftmax = P.LogSoftmax().shard(strategy6)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.gelu(self.tanh(self.matmul1(x, y)))
|
||||
|
@ -221,12 +221,12 @@ def test_activations_axis_tuple():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3, strategy4, strategy5, strategy6):
|
||||
super().__init__()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy2)
|
||||
self.gelu = P.Gelu().set_strategy(strategy3)
|
||||
self.tanh = P.Tanh().set_strategy(strategy4)
|
||||
self.softmax = P.Softmax(axis=(0, 1)).set_strategy(strategy5)
|
||||
self.logsoftmax = P.LogSoftmax().set_strategy(strategy6)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.matmul2 = P.MatMul().shard(strategy2)
|
||||
self.gelu = P.Gelu().shard(strategy3)
|
||||
self.tanh = P.Tanh().shard(strategy4)
|
||||
self.softmax = P.Softmax(axis=(0, 1)).shard(strategy5)
|
||||
self.logsoftmax = P.LogSoftmax().shard(strategy6)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.gelu(self.tanh(self.matmul1(x, y)))
|
||||
|
|
|
@ -28,8 +28,8 @@ class Net(nn.Cell):
|
|||
def __init__(self, strategy1, strategy2, weight):
|
||||
super().__init__()
|
||||
self.weight = Parameter(weight, "w1")
|
||||
self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1)
|
||||
self.relu = P.ReLU().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul(transpose_a=False, transpose_b=True).shard(strategy1)
|
||||
self.relu = P.ReLU().shard(strategy2)
|
||||
|
||||
def construct(self, x):
|
||||
out = self.matmul(x, self.weight)
|
||||
|
|
|
@ -52,10 +52,10 @@ def test_l2normalize_matmul():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.norm1 = P.L2Normalize(axis=0).set_strategy(strategy1)
|
||||
self.norm2 = P.L2Normalize(axis=0).set_strategy(strategy1)
|
||||
self.mul1 = P.Mul().set_strategy(strategy2)
|
||||
self.mul2 = P.Mul().set_strategy(strategy3)
|
||||
self.norm1 = P.L2Normalize(axis=0).shard(strategy1)
|
||||
self.norm2 = P.L2Normalize(axis=0).shard(strategy1)
|
||||
self.mul1 = P.Mul().shard(strategy2)
|
||||
self.mul2 = P.Mul().shard(strategy3)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
y = self.norm1(y)
|
||||
|
|
|
@ -28,9 +28,9 @@ class Net(Cell):
|
|||
super().__init__()
|
||||
self.begin_norm_axis = 2
|
||||
self.begin_params_axis = 1
|
||||
self.mul = P.Mul().set_strategy(strategy1)
|
||||
self.layer_norm = P.LayerNorm(self.begin_norm_axis, self.begin_params_axis).set_strategy(strategy2)
|
||||
self.mul2 = P.Mul().set_strategy(strategy3)
|
||||
self.mul = P.Mul().shard(strategy1)
|
||||
self.layer_norm = P.LayerNorm(self.begin_norm_axis, self.begin_params_axis).shard(strategy2)
|
||||
self.mul2 = P.Mul().shard(strategy3)
|
||||
self.mul_weight = Parameter(mul_weight, "w1")
|
||||
self.normalized_shape = [64, 32, 16]
|
||||
self.gamma = Parameter(initializer('ones', self.normalized_shape), name="gamma")
|
||||
|
|
|
@ -29,7 +29,7 @@ grad_all = C.GradOperation(get_all=True)
|
|||
class NetWithLoss(nn.Cell):
|
||||
def __init__(self, network, strategy3):
|
||||
super(NetWithLoss, self).__init__()
|
||||
self.loss = P.SoftmaxCrossEntropyWithLogits().set_strategy(strategy3)
|
||||
self.loss = P.SoftmaxCrossEntropyWithLogits().shard(strategy3)
|
||||
self.network = network
|
||||
|
||||
def construct(self, x, y, bias, label):
|
||||
|
@ -50,9 +50,9 @@ def test_linear():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy0, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0)
|
||||
self.add = P.TensorAdd().set_strategy(strategy1)
|
||||
self.gelu = P.Gelu().set_strategy(strategy2)
|
||||
self.fc_nobias = P.MatMul(transpose_b=True).shard(strategy0)
|
||||
self.add = P.TensorAdd().shard(strategy1)
|
||||
self.gelu = P.Gelu().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, bias):
|
||||
out = self.fc_nobias(x, y)
|
||||
|
|
|
@ -71,8 +71,8 @@ def test_two_matmul():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy2)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.matmul2 = P.MatMul().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul1(x, y)
|
||||
|
|
|
@ -27,7 +27,7 @@ from mindspore.ops import operations as P
|
|||
class NetWithLoss(nn.Cell):
|
||||
def __init__(self, network, strategy3):
|
||||
super(NetWithLoss, self).__init__()
|
||||
self.loss = P.SoftmaxCrossEntropyWithLogits().set_strategy(strategy3)
|
||||
self.loss = P.SoftmaxCrossEntropyWithLogits().shard(strategy3)
|
||||
self.network = network
|
||||
|
||||
def construct(self, x, b):
|
||||
|
@ -45,8 +45,8 @@ def test_momentum():
|
|||
def __init__(self, strategy1, strategy2, weight):
|
||||
super().__init__()
|
||||
self.weight = Parameter(weight, "w1")
|
||||
self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1)
|
||||
self.relu = P.ReLU().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul(transpose_a=False, transpose_b=True).shard(strategy1)
|
||||
self.relu = P.ReLU().shard(strategy2)
|
||||
|
||||
def construct(self, x):
|
||||
out = self.matmul(x, self.weight)
|
||||
|
@ -79,8 +79,8 @@ def test_momentum_with_loss_scale():
|
|||
def __init__(self, strategy1, strategy2, weight):
|
||||
super().__init__()
|
||||
self.weight = Parameter(weight, "w1")
|
||||
self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1)
|
||||
self.relu = P.ReLU().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul(transpose_a=False, transpose_b=True).shard(strategy1)
|
||||
self.relu = P.ReLU().shard(strategy2)
|
||||
|
||||
def construct(self, x):
|
||||
out = self.matmul(x, self.weight)
|
||||
|
@ -113,8 +113,8 @@ def test_momentum_with_dynamic_lr():
|
|||
def __init__(self, strategy1, strategy2, weight):
|
||||
super().__init__()
|
||||
self.weight = Parameter(weight, "w1")
|
||||
self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1)
|
||||
self.relu = P.ReLU().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul(transpose_a=False, transpose_b=True).shard(strategy1)
|
||||
self.relu = P.ReLU().shard(strategy2)
|
||||
|
||||
def construct(self, x):
|
||||
out = self.matmul(x, self.weight)
|
||||
|
@ -148,8 +148,8 @@ def test_momentum_with_loss_scale_and_dynamic_lr():
|
|||
def __init__(self, strategy1, strategy2, weight):
|
||||
super().__init__()
|
||||
self.weight = Parameter(weight, "w1")
|
||||
self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1)
|
||||
self.relu = P.ReLU().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul(transpose_a=False, transpose_b=True).shard(strategy1)
|
||||
self.relu = P.ReLU().shard(strategy2)
|
||||
|
||||
def construct(self, x):
|
||||
out = self.matmul(x, self.weight)
|
||||
|
@ -184,8 +184,8 @@ def test_lars():
|
|||
def __init__(self, strategy1, strategy2, weight):
|
||||
super().__init__()
|
||||
self.weight = Parameter(weight, "w1")
|
||||
self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1)
|
||||
self.relu = P.ReLU().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul(transpose_a=False, transpose_b=True).shard(strategy1)
|
||||
self.relu = P.ReLU().shard(strategy2)
|
||||
|
||||
def construct(self, x):
|
||||
out = self.matmul(x, self.weight)
|
||||
|
|
|
@ -36,12 +36,12 @@ class Net(Cell):
|
|||
split_string="manual_split",
|
||||
param_shape=(8, 8)):
|
||||
super().__init__()
|
||||
self.gatherv2 = P.EmbeddingLookup().set_strategy(strategy1)
|
||||
self.gatherv2 = P.EmbeddingLookup().shard(strategy1)
|
||||
self.gatherv2.add_prim_attr(split_string, split_tuple)
|
||||
self.gatherv2.add_prim_attr("primitive_target", "CPU")
|
||||
self.mul = P.Mul().set_strategy(strategy2)
|
||||
self.mul = P.Mul().shard(strategy2)
|
||||
self.reshape = P.Reshape()
|
||||
self.matmul = P.MatMul().set_strategy(strategy3)
|
||||
self.matmul = P.MatMul().shard(strategy3)
|
||||
self.matmul.add_prim_attr("forward_reduce_scatter", True)
|
||||
if init_flag:
|
||||
self.param = Parameter(initializer("ones", param_shape, ms.float32), name="gatherv2_param")
|
||||
|
|
|
@ -33,11 +33,11 @@ class Net(Cell):
|
|||
split_string="manual_split",
|
||||
param_shape=(8, 8)):
|
||||
super().__init__()
|
||||
self.gatherv2 = P.GatherV2().set_strategy(strategy1)
|
||||
self.gatherv2 = P.GatherV2().shard(strategy1)
|
||||
self.gatherv2.add_prim_attr(split_string, split_tuple)
|
||||
self.mul = P.Mul().set_strategy(strategy2)
|
||||
self.mul = P.Mul().shard(strategy2)
|
||||
self.reshape = P.Reshape()
|
||||
self.matmul = P.MatMul().set_strategy(strategy3)
|
||||
self.matmul = P.MatMul().shard(strategy3)
|
||||
self.matmul.add_prim_attr("forward_reduce_scatter", True)
|
||||
if init_flag:
|
||||
self.param = Parameter(initializer("ones", param_shape, ms.float32), name="gatherv2_param")
|
||||
|
|
|
@ -52,11 +52,11 @@ def test_two_matmul_dropout():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.dropout = nn.Dropout()
|
||||
self.dropout.dropout_do_mask.set_strategy(strategy2)
|
||||
self.dropout.dropout_gen_mask.set_strategy(strategy2)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy3)
|
||||
self.dropout.dropout_do_mask.shard(strategy2)
|
||||
self.dropout.dropout_gen_mask.shard(strategy2)
|
||||
self.matmul2 = P.MatMul().shard(strategy3)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul1(x, y)
|
||||
|
|
|
@ -59,9 +59,9 @@ def test_two_matmul():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy2)
|
||||
self.matmul3 = P.MatMul().set_strategy(strategy3)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.matmul2 = P.MatMul().shard(strategy2)
|
||||
self.matmul3 = P.MatMul().shard(strategy3)
|
||||
self.diag = P.Diag()
|
||||
self.fill = P.Fill()
|
||||
|
||||
|
@ -89,8 +89,8 @@ def test_matmul_mul_broadcast2():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.mul = P.Mul().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.mul = P.Mul().shard(strategy2)
|
||||
self.t = Tensor(0.9, ms.float32)
|
||||
|
||||
def construct(self, x, y):
|
||||
|
@ -113,9 +113,9 @@ def test_two_matmul1():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy2)
|
||||
self.matmul3 = P.MatMul().set_strategy(strategy3)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.matmul2 = P.MatMul().shard(strategy2)
|
||||
self.matmul3 = P.MatMul().shard(strategy3)
|
||||
self.diag = P.Diag()
|
||||
self.fill = P.Fill()
|
||||
|
||||
|
@ -143,8 +143,8 @@ def test_matmul_add_tensor():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.add = P.TensorAdd().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.add = P.TensorAdd().shard(strategy2)
|
||||
self.b = Tensor(0.9, ms.float32)
|
||||
|
||||
def construct(self, x, y):
|
||||
|
|
|
@ -50,9 +50,9 @@ class GradWrap(nn.Cell):
|
|||
class Net1(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy2)
|
||||
self.matmul3 = P.MatMul().set_strategy(strategy3)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.matmul2 = P.MatMul().shard(strategy2)
|
||||
self.matmul3 = P.MatMul().shard(strategy3)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out1 = self.matmul1(x, b)
|
||||
|
@ -66,7 +66,7 @@ def test_two_matmul():
|
|||
def __init__(self, strategy1, strategy2, strategy3, strategy4):
|
||||
super().__init__()
|
||||
self.net1_out = Net1(strategy1, strategy2, strategy3)
|
||||
self.matmul = P.MatMul().set_strategy(strategy4)
|
||||
self.matmul = P.MatMul().shard(strategy4)
|
||||
|
||||
def construct(self, x, y, b, z):
|
||||
out = self.net1_out(x, y, b)
|
||||
|
|
|
@ -24,8 +24,8 @@ from mindspore.ops import operations as P
|
|||
class Net(Cell):
|
||||
def __init__(self, mul_weight, strategy1=None, strategy2=None):
|
||||
super().__init__()
|
||||
self.mul = P.Mul().set_strategy(strategy1)
|
||||
self.neg = P.Neg().set_strategy(strategy2)
|
||||
self.mul = P.Mul().shard(strategy1)
|
||||
self.neg = P.Neg().shard(strategy2)
|
||||
self.mul_weight = Parameter(mul_weight, "w1")
|
||||
|
||||
def construct(self, x, b):
|
||||
|
|
|
@ -85,15 +85,15 @@ class SemiAutoOneHotNet(Cell):
|
|||
self.d = args.d
|
||||
self.e = args.e
|
||||
self.cast = P.Cast()
|
||||
self.cast.set_strategy(strategy=strategy.twod_strategy)
|
||||
self.cast.shard(strategy=strategy.twod_strategy)
|
||||
self.cast1 = P.Cast()
|
||||
self.cast1.set_strategy(strategy=strategy.twod_strategy)
|
||||
self.cast1.shard(strategy=strategy.twod_strategy)
|
||||
self.cast2 = P.Cast()
|
||||
self.cast2.set_strategy(strategy=strategy.twod_strategy)
|
||||
self.cast2.shard(strategy=strategy.twod_strategy)
|
||||
self.cast3 = P.Cast()
|
||||
self.cast3.set_strategy(strategy=strategy.scalar_strategy)
|
||||
self.cast3.shard(strategy=strategy.scalar_strategy)
|
||||
self.cast4 = P.Cast()
|
||||
self.cast4.set_strategy(strategy=strategy.scalar_strategy)
|
||||
self.cast4.shard(strategy=strategy.scalar_strategy)
|
||||
self.a_const = Tensor(self.a, dtype=mstype.float32)
|
||||
self.b_const = Tensor(self.b, dtype=mstype.float32)
|
||||
self.c_const = Tensor(self.c, dtype=mstype.float32)
|
||||
|
@ -102,64 +102,64 @@ class SemiAutoOneHotNet(Cell):
|
|||
self.m_const_zero = Tensor(0, dtype=mstype.float32)
|
||||
self.a_const_one = Tensor(1, dtype=mstype.float32)
|
||||
self.onehot = P.OneHot()
|
||||
self.onehot.set_strategy(strategy=strategy.onehot_strategy)
|
||||
self.onehot.shard(strategy=strategy.onehot_strategy)
|
||||
self.exp = P.Exp()
|
||||
self.exp.set_strategy(strategy=strategy.twod_strategy)
|
||||
self.exp.shard(strategy=strategy.twod_strategy)
|
||||
self.exp2 = P.Exp()
|
||||
self.exp2.set_strategy(strategy=strategy.twod_strategy)
|
||||
self.exp2.shard(strategy=strategy.twod_strategy)
|
||||
self.exp3 = P.Exp()
|
||||
self.exp3.set_strategy(strategy=strategy.twod_strategy)
|
||||
self.exp3.shard(strategy=strategy.twod_strategy)
|
||||
self.mul_const = P.Mul()
|
||||
self.mul_const.set_strategy(strategy=strategy.scalar_twod_strategy)
|
||||
self.mul_const.shard(strategy=strategy.scalar_twod_strategy)
|
||||
self.mul_const2 = P.TensorAdd()
|
||||
self.mul_const2.set_strategy(strategy=strategy.scalar_twod_strategy)
|
||||
self.mul_const2.shard(strategy=strategy.scalar_twod_strategy)
|
||||
self.mul_const3 = P.Sub()
|
||||
self.mul_const3.set_strategy(strategy=strategy.twod_scalar_strategy)
|
||||
self.mul_const3.shard(strategy=strategy.twod_scalar_strategy)
|
||||
self.mul_const4 = P.Sub()
|
||||
self.mul_const4.set_strategy(strategy=strategy.scalar_twod_strategy)
|
||||
self.mul_const4.shard(strategy=strategy.scalar_twod_strategy)
|
||||
self.mul_const5 = P.Mul()
|
||||
self.mul_const5.set_strategy(strategy=strategy.twod_scalar_strategy)
|
||||
self.mul_const5.shard(strategy=strategy.twod_scalar_strategy)
|
||||
self.mul = P.Mul()
|
||||
self.mul.set_strategy(strategy=strategy.twod_twod_strategy)
|
||||
self.mul.shard(strategy=strategy.twod_twod_strategy)
|
||||
self.mul2 = P.Mul()
|
||||
self.mul2.set_strategy(strategy=strategy.twod_twod_strategy)
|
||||
self.mul2.shard(strategy=strategy.twod_twod_strategy)
|
||||
self.mul3 = P.TensorAdd()
|
||||
self.mul3.set_strategy(strategy=strategy.twod_twod_strategy)
|
||||
self.mul3.shard(strategy=strategy.twod_twod_strategy)
|
||||
self.mul4 = P.Sub()
|
||||
self.mul4.set_strategy(strategy=strategy.twod_twodbc_strategy)
|
||||
self.mul4.shard(strategy=strategy.twod_twodbc_strategy)
|
||||
self.mul5 = P.RealDiv()
|
||||
self.mul5.set_strategy(strategy=strategy.twod_twodbc_strategy)
|
||||
self.mul5.shard(strategy=strategy.twod_twodbc_strategy)
|
||||
self.mul6 = P.Mul()
|
||||
self.mul6.set_strategy(strategy=strategy.twod_twod_strategy)
|
||||
self.mul6.shard(strategy=strategy.twod_twod_strategy)
|
||||
self.mul7 = P.Mul()
|
||||
self.mul7.set_strategy(strategy=strategy.twod_scalar_strategy)
|
||||
self.mul7.shard(strategy=strategy.twod_scalar_strategy)
|
||||
self.mul8 = P.RealDiv()
|
||||
self.mul8.set_strategy(strategy=strategy.scalar_scalar_strategy)
|
||||
self.mul8.shard(strategy=strategy.scalar_scalar_strategy)
|
||||
self.mul9 = P.TensorAdd()
|
||||
self.mul9.set_strategy(strategy=strategy.twod_scalar_strategy)
|
||||
self.mul9.shard(strategy=strategy.twod_scalar_strategy)
|
||||
|
||||
self.reduce_max = P.ReduceMax(keep_dims=True)
|
||||
self.reduce_max.set_strategy(strategy=strategy.twod_strategy)
|
||||
self.reduce_max.shard(strategy=strategy.twod_strategy)
|
||||
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False)
|
||||
self.reduce_sum.set_strategy(strategy=strategy.twod_strategy)
|
||||
self.reduce_sum.shard(strategy=strategy.twod_strategy)
|
||||
self.reduce_sum_2 = P.ReduceSum(keep_dims=False)
|
||||
self.reduce_sum_2.set_strategy(strategy=strategy.twod_strategy)
|
||||
self.reduce_sum_2.shard(strategy=strategy.twod_strategy)
|
||||
self.reduce_sum_3 = P.ReduceSum(keep_dims=False)
|
||||
self.reduce_sum_3.set_strategy(strategy=strategy.oned_strategy)
|
||||
self.reduce_sum_3.shard(strategy=strategy.oned_strategy)
|
||||
|
||||
self.reshape = P.Reshape()
|
||||
self.log = P.Log()
|
||||
self.log.set_strategy(strategy=strategy.twod_strategy)
|
||||
self.log.shard(strategy=strategy.twod_strategy)
|
||||
|
||||
self.on_value = Tensor(1.0, mstype.float32)
|
||||
self.off_value = Tensor(0.0, mstype.float32)
|
||||
self.normalize = P.L2Normalize(axis=1)
|
||||
self.normalize.set_strategy(strategy=strategy.twod_strategy_m)
|
||||
self.normalize.shard(strategy=strategy.twod_strategy_m)
|
||||
self.normalize2 = P.L2Normalize(axis=1)
|
||||
self.normalize2.set_strategy(strategy=strategy.twod_strategy_m)
|
||||
self.normalize2.shard(strategy=strategy.twod_strategy_m)
|
||||
self.fc = P.MatMul(transpose_b=True)
|
||||
self.fc.set_strategy(strategy=strategy.twodbc_twod_strategy)
|
||||
self.fc.shard(strategy=strategy.twodbc_twod_strategy)
|
||||
weight_shape = [args.num_classes, args.emb_size]
|
||||
weight_np = np.zeros(weight_shape, np.float32)
|
||||
self.weight = Parameter(Tensor(weight_np), name='model_parallel_weight')
|
||||
|
|
|
@ -29,7 +29,7 @@ grad_by_list = C.GradOperation(get_by_list=True)
|
|||
class NetWithLoss(nn.Cell):
|
||||
def __init__(self, network, strategy3):
|
||||
super(NetWithLoss, self).__init__()
|
||||
self.loss = P.SoftmaxCrossEntropyWithLogits().set_strategy(strategy3)
|
||||
self.loss = P.SoftmaxCrossEntropyWithLogits().shard(strategy3)
|
||||
self.network = network
|
||||
|
||||
def construct(self, x, b):
|
||||
|
@ -54,7 +54,7 @@ def test_one_weight_parameter():
|
|||
def __init__(self, strategy1, weight):
|
||||
super().__init__()
|
||||
self.weight = Parameter(weight, "w1", requires_grad=True)
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
|
||||
def construct(self, x):
|
||||
out = self.matmul(x, self.weight)
|
||||
|
|
|
@ -33,10 +33,10 @@ class NetWithLoss(nn.Cell):
|
|||
def __init__(self, network, strategy3, strategy4, axis):
|
||||
super(NetWithLoss, self).__init__()
|
||||
self.virtual_dataset = _VirtualDataset()
|
||||
self.one_hot = P.OneHot(axis=axis).set_strategy(strategy3)
|
||||
self.one_hot = P.OneHot(axis=axis).shard(strategy3)
|
||||
self.on_value = Tensor(2.0, ms.float32)
|
||||
self.off_value = Tensor(1.0, ms.float32)
|
||||
self.loss = P.SoftmaxCrossEntropyWithLogits().set_strategy(strategy4)
|
||||
self.loss = P.SoftmaxCrossEntropyWithLogits().shard(strategy4)
|
||||
self.network = network
|
||||
|
||||
def construct(self, x, y, b):
|
||||
|
@ -58,8 +58,8 @@ class GradWrap(nn.Cell):
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.gelu = P.Gelu().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.gelu = P.Gelu().shard(strategy2)
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.matmul(x, y)
|
||||
|
|
|
@ -51,7 +51,7 @@ class DenseWrap(Cell):
|
|||
bias_init='zeros',
|
||||
has_bias=True,
|
||||
matmul_strategy=None,
|
||||
set_strategy=None):
|
||||
shard=None):
|
||||
|
||||
super(DenseWrap, self).__init__()
|
||||
|
||||
|
@ -69,8 +69,8 @@ class DenseWrap(Cell):
|
|||
self.bias = Parameter(initializer(
|
||||
'zeros', [output_channels]), name="bias")
|
||||
|
||||
self.matmul = P.MatMul(transpose_b=True).set_strategy(matmul_strategy)
|
||||
self.bias_add = P.TensorAdd().set_strategy(set_strategy)
|
||||
self.matmul = P.MatMul(transpose_b=True).shard(matmul_strategy)
|
||||
self.bias_add = P.TensorAdd().shard(shard)
|
||||
|
||||
def construct(self, x):
|
||||
if self.has_bias:
|
||||
|
@ -108,7 +108,7 @@ def conv3x3(in_channels, out_channels, stride=1):
|
|||
conv = Conv2d(in_channels, out_channels,
|
||||
kernel_size=3, stride=stride, padding=0, weight_init=weight, has_bias=False,
|
||||
pad_mode="same")
|
||||
conv.conv2d.set_strategy(strategy_weight)
|
||||
conv.conv2d.shard(strategy_weight)
|
||||
return conv
|
||||
|
||||
|
||||
|
@ -119,7 +119,7 @@ def conv1x1(in_channels, out_channels, stride=1):
|
|||
conv = Conv2d(in_channels, out_channels,
|
||||
kernel_size=1, stride=stride, padding=0, weight_init=weight, has_bias=False,
|
||||
pad_mode="same")
|
||||
conv.conv2d.set_strategy(strategy_weight)
|
||||
conv.conv2d.shard(strategy_weight)
|
||||
return conv
|
||||
|
||||
|
||||
|
@ -130,7 +130,7 @@ def conv7x7(in_channels, out_channels, stride=1):
|
|||
conv = Conv2d(in_channels, out_channels,
|
||||
kernel_size=7, stride=stride, padding=0, weight_init=weight, has_bias=False,
|
||||
pad_mode="same")
|
||||
conv.conv2d.set_strategy(strategy_weight)
|
||||
conv.conv2d.shard(strategy_weight)
|
||||
return conv
|
||||
|
||||
|
||||
|
@ -152,7 +152,7 @@ def bn_with_initialize(out_channels):
|
|||
gamma = weight_variable_1(shape)
|
||||
bn = BatchNorm2d(out_channels, momentum=0.1, eps=0.0001, gamma_init=gamma,
|
||||
beta_init=beta, moving_mean_init=mean, moving_var_init=var)
|
||||
bn.bn_train.set_strategy(strategy_bn)
|
||||
bn.bn_train.shard(strategy_bn)
|
||||
return bn
|
||||
|
||||
|
||||
|
@ -164,7 +164,7 @@ def bn_with_initialize_last(out_channels):
|
|||
gamma = weight_variable_0(shape)
|
||||
bn = BatchNorm2d(out_channels, momentum=0.1, eps=0.0001, gamma_init=gamma,
|
||||
beta_init=beta, moving_mean_init=mean, moving_var_init=var)
|
||||
bn.bn_train.set_strategy(strategy_bn)
|
||||
bn.bn_train.shard(strategy_bn)
|
||||
return bn
|
||||
|
||||
|
||||
|
@ -175,7 +175,7 @@ def fc_with_initialize(input_channels, out_channels):
|
|||
bias = weight_variable_0(bias_shape)
|
||||
|
||||
return DenseWrap(input_channels, out_channels, weight, bias, has_bias=True,
|
||||
matmul_strategy=strategy_fc_weight_nobias, set_strategy=strategy_tensor_add)
|
||||
matmul_strategy=strategy_fc_weight_nobias, shard=strategy_tensor_add)
|
||||
|
||||
|
||||
class ResidualBlock(Cell):
|
||||
|
@ -197,10 +197,10 @@ class ResidualBlock(Cell):
|
|||
self.conv3 = conv1x1(out_chls, out_channels, stride=1)
|
||||
self.bn3 = bn_with_initialize_last(out_channels)
|
||||
|
||||
self.relu1 = P.ReLU().set_strategy(strategy_no_weight)
|
||||
self.relu2 = P.ReLU().set_strategy(strategy_no_weight)
|
||||
self.relu3 = P.ReLU().set_strategy(strategy_no_weight)
|
||||
self.add = TensorAdd().set_strategy(strategy_add)
|
||||
self.relu1 = P.ReLU().shard(strategy_no_weight)
|
||||
self.relu2 = P.ReLU().shard(strategy_no_weight)
|
||||
self.relu3 = P.ReLU().shard(strategy_no_weight)
|
||||
self.add = TensorAdd().shard(strategy_add)
|
||||
|
||||
def construct(self, x):
|
||||
identity = x
|
||||
|
@ -242,14 +242,14 @@ class ResidualBlockWithDown(Cell):
|
|||
self.conv3 = conv1x1(out_chls, out_channels, stride=1)
|
||||
self.bn3 = bn_with_initialize_last(out_channels)
|
||||
|
||||
self.relu1 = P.ReLU().set_strategy(strategy_no_weight)
|
||||
self.relu2 = P.ReLU().set_strategy(strategy_no_weight)
|
||||
self.relu3 = P.ReLU().set_strategy(strategy_no_weight)
|
||||
self.relu1 = P.ReLU().shard(strategy_no_weight)
|
||||
self.relu2 = P.ReLU().shard(strategy_no_weight)
|
||||
self.relu3 = P.ReLU().shard(strategy_no_weight)
|
||||
self.down_sample = down_sample
|
||||
|
||||
self.conv_down_sample = conv1x1(in_channels, out_channels, stride=stride)
|
||||
self.bn_down_sample = bn_with_initialize(out_channels)
|
||||
self.add = TensorAdd().set_strategy(strategy_add)
|
||||
self.add = TensorAdd().shard(strategy_add)
|
||||
|
||||
def construct(self, x):
|
||||
identity = x
|
||||
|
@ -296,11 +296,11 @@ class ResNet(Cell):
|
|||
super(ResNet, self).__init__()
|
||||
self.conv1 = conv7x7(3, 64, stride=2)
|
||||
self.bn1 = bn_with_initialize(64)
|
||||
self.relu = P.ReLU().set_strategy(strategy_no_weight)
|
||||
self.relu = P.ReLU().shard(strategy_no_weight)
|
||||
self.maxpool = MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
|
||||
self.layer1 = MakeLayer0(
|
||||
block, in_channels=64, out_channels=256, stride=1)
|
||||
self.pool = M.ReduceMean(keep_dims=True).set_strategy(strategy_no_weight)
|
||||
self.pool = M.ReduceMean(keep_dims=True).shard(strategy_no_weight)
|
||||
self.fc = fc_with_initialize(64 * block.expansion, num_classes)
|
||||
self.flatten = Flatten()
|
||||
|
||||
|
@ -319,11 +319,11 @@ class ResNet(Cell):
|
|||
class ResNetModelParallel(Cell):
|
||||
def __init__(self, block, num_classes=100):
|
||||
super(ResNetModelParallel, self).__init__()
|
||||
self.relu = P.ReLU().set_strategy(((1, dev_num, 1, 1),))
|
||||
self.relu = P.ReLU().shard(((1, dev_num, 1, 1),))
|
||||
self.maxpool = MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
|
||||
self.layer1 = MakeLayer0(
|
||||
block, in_channels=64, out_channels=256, stride=1)
|
||||
self.pool = M.ReduceMean(keep_dims=True).set_strategy(strategy_no_weight)
|
||||
self.pool = M.ReduceMean(keep_dims=True).shard(strategy_no_weight)
|
||||
self.fc = fc_with_initialize(64 * block.expansion, num_classes)
|
||||
self.flatten = Flatten()
|
||||
|
||||
|
@ -363,7 +363,7 @@ def test_resnet_operator_batch_parallel():
|
|||
net = resnet_operator_net(num_classes)
|
||||
|
||||
loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
|
||||
loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1)))
|
||||
loss.softmax_cross_entropy.shard(((dev_num, 1), (dev_num, 1)))
|
||||
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
|
||||
|
||||
model = Model(net, loss, opt)
|
||||
|
@ -388,7 +388,7 @@ def test_resnet_model_parallel():
|
|||
net = resnet_model_parallel_net(num_classes)
|
||||
|
||||
loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
|
||||
loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1)))
|
||||
loss.softmax_cross_entropy.shard(((dev_num, 1), (dev_num, 1)))
|
||||
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
|
||||
|
||||
model = Model(net, loss, opt)
|
||||
|
|
|
@ -27,7 +27,7 @@ from mindspore.ops import operations as P
|
|||
class NetWithLoss(nn.Cell):
|
||||
def __init__(self, network, strategy3):
|
||||
super(NetWithLoss, self).__init__()
|
||||
self.loss = P.SoftmaxCrossEntropyWithLogits().set_strategy(strategy3)
|
||||
self.loss = P.SoftmaxCrossEntropyWithLogits().shard(strategy3)
|
||||
self.network = network
|
||||
|
||||
def construct(self, x, b):
|
||||
|
@ -45,8 +45,8 @@ def test_optimizer_clone_weight():
|
|||
def __init__(self, strategy1, strategy2, weight):
|
||||
super().__init__()
|
||||
self.weight = Parameter(weight, "w1")
|
||||
self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1)
|
||||
self.relu = P.ReLU().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul(transpose_a=False, transpose_b=True).shard(strategy1)
|
||||
self.relu = P.ReLU().shard(strategy2)
|
||||
|
||||
def construct(self, x):
|
||||
out = self.matmul(x, self.weight)
|
||||
|
@ -80,8 +80,8 @@ def test_optimizer_clone_weight2():
|
|||
def __init__(self, strategy1, strategy2, weight):
|
||||
super().__init__()
|
||||
self.weight = Parameter(weight, "w1")
|
||||
self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1)
|
||||
self.relu = P.ReLU().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul(transpose_a=False, transpose_b=True).shard(strategy1)
|
||||
self.relu = P.ReLU().shard(strategy2)
|
||||
|
||||
def construct(self, x):
|
||||
out = self.matmul(x, self.weight)
|
||||
|
|
|
@ -37,7 +37,7 @@ def test_parameter_init():
|
|||
def __init__(self, strategy1, weight):
|
||||
super().__init__()
|
||||
self.weight = Parameter(weight, "w1")
|
||||
self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1)
|
||||
self.matmul = P.MatMul(transpose_a=False, transpose_b=True).shard(strategy1)
|
||||
|
||||
def construct(self, x):
|
||||
out = self.matmul(x, self.weight)
|
||||
|
|
|
@ -24,8 +24,8 @@ from mindspore.ops import operations as P
|
|||
class Net(Cell):
|
||||
def __init__(self, mul_weight, strategy1=None, strategy2=None):
|
||||
super().__init__()
|
||||
self.mul = P.Mul().set_strategy(strategy1)
|
||||
self.mul2 = P.Mul().set_strategy(strategy2)
|
||||
self.mul = P.Mul().shard(strategy1)
|
||||
self.mul2 = P.Mul().shard(strategy2)
|
||||
self.mul_weight = Parameter(mul_weight, "w1")
|
||||
|
||||
def construct(self, x, b):
|
||||
|
@ -37,8 +37,8 @@ class Net(Cell):
|
|||
class Net2(Cell):
|
||||
def __init__(self, mul_weight, strategy1=None, strategy2=None):
|
||||
super().__init__()
|
||||
self.mul = P.Mul().set_strategy(strategy1)
|
||||
self.mul2 = P.Mul().set_strategy(strategy2)
|
||||
self.mul = P.Mul().shard(strategy1)
|
||||
self.mul2 = P.Mul().shard(strategy2)
|
||||
self.mul_weight = Parameter(mul_weight, "w1")
|
||||
|
||||
def construct(self, x, b):
|
||||
|
|
|
@ -90,7 +90,7 @@ def test_prelu_parallel_success1():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy):
|
||||
super().__init__()
|
||||
self.prelu = P.PReLU().set_strategy(strategy)
|
||||
self.prelu = P.PReLU().shard(strategy)
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.prelu(x, y)
|
||||
|
@ -110,7 +110,7 @@ def test_prelu_parallel_success2():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy):
|
||||
super().__init__()
|
||||
self.prelu = P.PReLU().set_strategy(strategy)
|
||||
self.prelu = P.PReLU().shard(strategy)
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.prelu(x, y)
|
||||
|
@ -148,8 +148,8 @@ def test_prelu_parallel_success3():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.prelu = P.PReLU().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.prelu = P.PReLU().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, w):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -173,7 +173,7 @@ def test_prelu_parallel_success4():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy):
|
||||
super().__init__()
|
||||
self.prelu = P.PReLU().set_strategy(strategy)
|
||||
self.prelu = P.PReLU().shard(strategy)
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.prelu(x, y)
|
||||
|
@ -193,7 +193,7 @@ def test_prelu_parallel_success5():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy):
|
||||
super().__init__()
|
||||
self.prelu = P.PReLU().set_strategy(strategy)
|
||||
self.prelu = P.PReLU().shard(strategy)
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.prelu(x, y)
|
||||
|
|
|
@ -70,9 +70,9 @@ class PReLU(nn.Cell):
|
|||
|
||||
self.w = Parameter(initializer(w, [channel,]), name='a')
|
||||
self.prelu = P.PReLU()
|
||||
self.relu = P.ReLU().set_strategy(((1,),))
|
||||
self.sub = P.Sub().set_strategy(((1,), (1,)))
|
||||
self.assign_sub = P.AssignSub().set_strategy(((1,), (1,)))
|
||||
self.relu = P.ReLU().shard(((1,),))
|
||||
self.sub = P.Sub().shard(((1,), (1,)))
|
||||
self.assign_sub = P.AssignSub().shard(((1,), (1,)))
|
||||
|
||||
def construct(self, x):
|
||||
u = self.relu(self.w)
|
||||
|
|
|
@ -82,9 +82,9 @@ def test_sum_mul():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.mul1 = P.Mul().set_strategy(strategy1)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy2)
|
||||
self.mul2 = P.Mul().set_strategy(strategy3)
|
||||
self.mul1 = P.Mul().shard(strategy1)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy2)
|
||||
self.mul2 = P.Mul().shard(strategy3)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.mul1(x, y)
|
||||
|
@ -109,9 +109,9 @@ def test_sum_mul2():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.mul1 = P.Mul().set_strategy(strategy1)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy2)
|
||||
self.mul2 = P.Mul().set_strategy(strategy3)
|
||||
self.mul1 = P.Mul().shard(strategy1)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy2)
|
||||
self.mul2 = P.Mul().shard(strategy3)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.mul1(x, y)
|
||||
|
@ -136,9 +136,9 @@ def test_sum_mul3():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.mul1 = P.Mul().set_strategy(strategy1)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy2)
|
||||
self.mul2 = P.Mul().set_strategy(strategy3)
|
||||
self.mul1 = P.Mul().shard(strategy1)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy2)
|
||||
self.mul2 = P.Mul().shard(strategy3)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.mul1(x, y)
|
||||
|
@ -163,9 +163,9 @@ def test_sum_mul4():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.mul1 = P.Mul().set_strategy(strategy1)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=True).set_strategy(strategy2)
|
||||
self.mul2 = P.Mul().set_strategy(strategy3)
|
||||
self.mul1 = P.Mul().shard(strategy1)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=True).shard(strategy2)
|
||||
self.mul2 = P.Mul().shard(strategy3)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.mul1(x, y)
|
||||
|
@ -190,8 +190,8 @@ def test_sum_mul5():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.mul1 = P.Mul().set_strategy(strategy1)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=True).set_strategy(strategy2)
|
||||
self.mul1 = P.Mul().shard(strategy1)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=True).shard(strategy2)
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.mul1(x, y)
|
||||
|
@ -213,8 +213,8 @@ def test_sum_mul6():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.mul1 = P.Mul().set_strategy(strategy1)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=True).set_strategy(strategy2)
|
||||
self.mul1 = P.Mul().shard(strategy1)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=True).shard(strategy2)
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.mul1(x, y)
|
||||
|
@ -236,8 +236,8 @@ def test_sum_mul7():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.mul1 = P.Mul().set_strategy(strategy1)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=True).set_strategy(strategy2)
|
||||
self.mul1 = P.Mul().shard(strategy1)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=True).shard(strategy2)
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.mul1(x, y)
|
||||
|
@ -259,9 +259,9 @@ def test_max_mul():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.mul1 = P.Mul().set_strategy(strategy1)
|
||||
self.reduce_max = P.ReduceMax(keep_dims=False).set_strategy(strategy2)
|
||||
self.mul2 = P.Mul().set_strategy(strategy3)
|
||||
self.mul1 = P.Mul().shard(strategy1)
|
||||
self.reduce_max = P.ReduceMax(keep_dims=False).shard(strategy2)
|
||||
self.mul2 = P.Mul().shard(strategy3)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.mul1(x, y)
|
||||
|
@ -286,9 +286,9 @@ def test_min_mul():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.mul1 = P.Mul().set_strategy(strategy1)
|
||||
self.reduce_min = P.ReduceMin(keep_dims=False).set_strategy(strategy2)
|
||||
self.mul2 = P.Mul().set_strategy(strategy3)
|
||||
self.mul1 = P.Mul().shard(strategy1)
|
||||
self.reduce_min = P.ReduceMin(keep_dims=False).shard(strategy2)
|
||||
self.mul2 = P.Mul().shard(strategy3)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.mul1(x, y)
|
||||
|
@ -313,9 +313,9 @@ def test_reduce_mean_mul_float32():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.mul1 = P.Mul().set_strategy(strategy1)
|
||||
self.reduce_mean = P.ReduceMean(keep_dims=False).set_strategy(strategy2)
|
||||
self.mul2 = P.Mul().set_strategy(strategy3)
|
||||
self.mul1 = P.Mul().shard(strategy1)
|
||||
self.reduce_mean = P.ReduceMean(keep_dims=False).shard(strategy2)
|
||||
self.mul2 = P.Mul().shard(strategy3)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.mul1(x, y)
|
||||
|
@ -340,9 +340,9 @@ def test_reduce_mean_mul_float32():
|
|||
class ArgMaxWithValueNet(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.mul1 = P.Mul().set_strategy(strategy1)
|
||||
self.arg_max_with_value = P.ArgMaxWithValue(keep_dims=False, axis=-1).set_strategy(strategy2)
|
||||
self.mul2 = P.Mul().set_strategy(strategy3)
|
||||
self.mul1 = P.Mul().shard(strategy1)
|
||||
self.arg_max_with_value = P.ArgMaxWithValue(keep_dims=False, axis=-1).shard(strategy2)
|
||||
self.mul2 = P.Mul().shard(strategy3)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.mul1(x, y)
|
||||
|
@ -354,9 +354,9 @@ class ArgMaxWithValueNet(nn.Cell):
|
|||
class ArgMinWithValueNet(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.mul1 = P.Mul().set_strategy(strategy1)
|
||||
self.arg_min_with_value = P.ArgMinWithValue(keep_dims=False, axis=-1).set_strategy(strategy2)
|
||||
self.mul2 = P.Mul().set_strategy(strategy3)
|
||||
self.mul1 = P.Mul().shard(strategy1)
|
||||
self.arg_min_with_value = P.ArgMinWithValue(keep_dims=False, axis=-1).shard(strategy2)
|
||||
self.mul2 = P.Mul().shard(strategy3)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.mul1(x, y)
|
||||
|
@ -441,9 +441,9 @@ def test_arg_min_with_value_mul_auto():
|
|||
class ArgMinWithValueNet2(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.mul1 = P.Mul().set_strategy(strategy1)
|
||||
self.arg_min_with_value = P.ArgMinWithValue(keep_dims=True, axis=-1).set_strategy(strategy2)
|
||||
self.relu = P.ReLU().set_strategy(strategy3)
|
||||
self.mul1 = P.Mul().shard(strategy1)
|
||||
self.arg_min_with_value = P.ArgMinWithValue(keep_dims=True, axis=-1).shard(strategy2)
|
||||
self.relu = P.ReLU().shard(strategy3)
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.mul1(x, y)
|
||||
|
@ -486,9 +486,9 @@ def test_cross_batch():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.mul1 = P.Mul().set_strategy(strategy1)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy2)
|
||||
self.reduce_mean = P.ReduceMean(keep_dims=False).set_strategy(strategy3).add_prim_attr("cross_batch", True)
|
||||
self.mul1 = P.Mul().shard(strategy1)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy2)
|
||||
self.reduce_mean = P.ReduceMean(keep_dims=False).shard(strategy3).add_prim_attr("cross_batch", True)
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.mul1(x, y)
|
||||
|
@ -512,9 +512,9 @@ def test_cross_batch2():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.mul1 = P.Mul().set_strategy(strategy1)
|
||||
self.reduce_mean = P.ReduceMean(keep_dims=False).set_strategy(strategy2)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy3).add_prim_attr("cross_batch", True)
|
||||
self.mul1 = P.Mul().shard(strategy1)
|
||||
self.reduce_mean = P.ReduceMean(keep_dims=False).shard(strategy2)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy3).add_prim_attr("cross_batch", True)
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.mul1(x, y)
|
||||
|
@ -561,9 +561,9 @@ def test_max_empty_tuple():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.mul = P.Mul().set_strategy(strategy1)
|
||||
self.reduce_max = P.ReduceMax(keep_dims=False).set_strategy(strategy2)
|
||||
self.add = P.TensorAdd().set_strategy(strategy3)
|
||||
self.mul = P.Mul().shard(strategy1)
|
||||
self.reduce_max = P.ReduceMax(keep_dims=False).shard(strategy2)
|
||||
self.add = P.TensorAdd().shard(strategy3)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.mul(x, y)
|
||||
|
|
|
@ -67,9 +67,9 @@ class Dataset(MindData):
|
|||
class ReshapeNet(nn.Cell):
|
||||
def __init__(self, strategy0, strategy1, strategy2):
|
||||
super(ReshapeNet, self).__init__()
|
||||
self.relu = P.ReLU().set_strategy(strategy0)
|
||||
self.reshape = P.Reshape().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().set_strategy(strategy2)
|
||||
self.relu = P.ReLU().shard(strategy0)
|
||||
self.reshape = P.Reshape().shard(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy2)
|
||||
self.matmul_weight = Parameter(Tensor(np.ones([25088, 256]), dtype=ms.float32), name="weight")
|
||||
|
||||
def construct(self, x):
|
||||
|
@ -96,8 +96,8 @@ def reshape_common(parallel_mode, strategy0, strategy1, strategy2, strategy_loss
|
|||
net = reshape_net(strategy0, strategy1, strategy2)
|
||||
|
||||
loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
|
||||
loss.softmax_cross_entropy.set_strategy(strategy_loss)
|
||||
loss.one_hot.set_strategy(((8, 1), (), ()))
|
||||
loss.softmax_cross_entropy.shard(strategy_loss)
|
||||
loss.one_hot.shard(((8, 1), (), ()))
|
||||
opt = Momentum(net.trainable_params(), learning_rate, momentum)
|
||||
model = Model(net, loss, opt)
|
||||
model.train(epoch_size, dataset, dataset_sink_mode=False)
|
||||
|
@ -206,7 +206,7 @@ class ReshapeNet1(nn.Cell):
|
|||
super(ReshapeNet1, self).__init__()
|
||||
self.virtual_dataset = _VirtualDataset()
|
||||
self.reshape = P.Reshape()
|
||||
self.matmul = P.MatMul().set_strategy(strategy0)
|
||||
self.matmul = P.MatMul().shard(strategy0)
|
||||
self.matmul_weight = Parameter(Tensor(np.ones([25088, 256]), dtype=ms.float32), name="weight")
|
||||
self.reshape2 = P.Reshape()
|
||||
|
||||
|
@ -223,7 +223,7 @@ class ReshapeNet2(nn.Cell):
|
|||
super(ReshapeNet2, self).__init__()
|
||||
self.virtual_dataset = _VirtualDataset()
|
||||
self.reshape = P.Reshape()
|
||||
self.matmul = P.MatMul().set_strategy(strategy0)
|
||||
self.matmul = P.MatMul().shard(strategy0)
|
||||
self.matmul_weight = Parameter(Tensor(np.ones([25088, 256]), dtype=ms.float32), name="weight")
|
||||
self.reshape2 = P.Reshape()
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=True)
|
||||
|
@ -244,7 +244,7 @@ class ReshapeNet3(nn.Cell):
|
|||
super(ReshapeNet3, self).__init__()
|
||||
self.virtual_dataset = _VirtualDataset()
|
||||
self.reshape = P.Reshape()
|
||||
self.matmul = P.MatMul().set_strategy(strategy0)
|
||||
self.matmul = P.MatMul().shard(strategy0)
|
||||
self.matmul_weight = Parameter(Tensor(np.ones([25088, 256]), dtype=ms.float32), name="weight")
|
||||
self.reshape2 = P.Reshape()
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False)
|
||||
|
@ -266,7 +266,7 @@ class ReshapeNet4(nn.Cell):
|
|||
self.virtual_dataset = _VirtualDataset()
|
||||
self.reshape = P.Reshape()
|
||||
self.reshape2 = P.Reshape()
|
||||
self.matmul = P.MatMul().set_strategy(strategy0)
|
||||
self.matmul = P.MatMul().shard(strategy0)
|
||||
self.matmul_weight = Parameter(Tensor(np.ones([25088, 256]), dtype=ms.float32), name="weight")
|
||||
|
||||
def construct(self, x):
|
||||
|
@ -282,9 +282,9 @@ class ReshapeNet5(nn.Cell):
|
|||
super(ReshapeNet5, self).__init__()
|
||||
self.virtual_dataset = _VirtualDataset()
|
||||
self.reshape = P.Reshape()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy0)
|
||||
self.matmul1 = P.MatMul().shard(strategy0)
|
||||
self.matmul1_weight = Parameter(Tensor(np.ones([25088, 256]), dtype=ms.float32), name="weight")
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy0)
|
||||
self.matmul2 = P.MatMul().shard(strategy0)
|
||||
|
||||
def construct(self, x):
|
||||
x = self.virtual_dataset(x)
|
||||
|
@ -299,10 +299,10 @@ class ReshapeNet6(nn.Cell):
|
|||
super(ReshapeNet6, self).__init__()
|
||||
self.virtual_dataset = _VirtualDataset()
|
||||
self.reshape = P.Reshape()
|
||||
self.matmul1_1 = P.MatMul().set_strategy(strategy0)
|
||||
self.matmul1_2 = P.MatMul().set_strategy(strategy0)
|
||||
self.matmul1_1 = P.MatMul().shard(strategy0)
|
||||
self.matmul1_2 = P.MatMul().shard(strategy0)
|
||||
self.matmul1_weight = Parameter(Tensor(np.ones([25088, 256]), dtype=ms.float32), name="weight")
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy0)
|
||||
self.matmul2 = P.MatMul().shard(strategy0)
|
||||
self.add = P.TensorAdd()
|
||||
|
||||
def construct(self, x):
|
||||
|
@ -552,7 +552,7 @@ class ParallelReduceMeanNet(nn.Cell):
|
|||
self.flat = nn.Flatten()
|
||||
self.reducemean_axis = reducemean_axis
|
||||
if strategy is not None:
|
||||
self.reduce_mean.set_strategy(strategy)
|
||||
self.reduce_mean.shard(strategy)
|
||||
|
||||
def construct(self, inputs):
|
||||
x = self.conv(inputs)
|
||||
|
@ -626,7 +626,7 @@ class ParallelReshapeNet(nn.Cell):
|
|||
has_bias=True)
|
||||
self.reshape = P.Reshape()
|
||||
self.shape = shape
|
||||
self.reshape.set_strategy(strategy)
|
||||
self.reshape.shard(strategy)
|
||||
|
||||
def construct(self, inputs):
|
||||
x = self.flat(inputs)
|
||||
|
|
|
@ -51,7 +51,7 @@ class Net(nn.Cell):
|
|||
def __init__(self, strategy):
|
||||
super().__init__()
|
||||
self.reshape = P.Reshape()
|
||||
self.mul = P.Mul().set_strategy(strategy)
|
||||
self.mul = P.Mul().shard(strategy)
|
||||
self.relu = P.ReLU()
|
||||
|
||||
def construct(self, x, y):
|
||||
|
|
|
@ -24,7 +24,7 @@ from mindspore.ops import operations as P
|
|||
class Net(Cell):
|
||||
def __init__(self, matmul_weight, strategy1=None):
|
||||
super().__init__()
|
||||
self.gatherv2 = P.GatherV2().set_strategy(strategy1)
|
||||
self.gatherv2 = P.GatherV2().shard(strategy1)
|
||||
self.reshape = P.Reshape().add_prim_attr("skip_redistribution", True)
|
||||
self.matmul = P.MatMul(transpose_b=False)
|
||||
self.index = Tensor(np.ones([64, 64]), dtype=ms.int32)
|
||||
|
|
|
@ -40,9 +40,9 @@ def test_sum_as_loss():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy0, strategy1):
|
||||
super().__init__()
|
||||
self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1)
|
||||
self.mul = P.Mul().set_strategy(strategy=((), ()))
|
||||
self.fc_nobias = P.MatMul(transpose_b=True).shard(strategy0)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy1)
|
||||
self.mul = P.Mul().shard(strategy=((), ()))
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.fc_nobias(x, y)
|
||||
|
|
|
@ -42,8 +42,8 @@ class Net(nn.Cell):
|
|||
class NetWithLoss(nn.Cell):
|
||||
def __init__(self, network):
|
||||
super(NetWithLoss, self).__init__()
|
||||
self.sum = P.ReduceSum(keep_dims=False).set_strategy(strategy=((4, 1, 1, 1),))
|
||||
self.mean = P.ReduceMean(keep_dims=False).set_strategy(strategy=((8, 1, 1, 1),))
|
||||
self.sum = P.ReduceSum(keep_dims=False).shard(strategy=((4, 1, 1, 1),))
|
||||
self.mean = P.ReduceMean(keep_dims=False).shard(strategy=((8, 1, 1, 1),))
|
||||
self.net = network
|
||||
|
||||
def construct(self, x):
|
||||
|
|
|
@ -24,8 +24,8 @@ from mindspore.ops import operations as P
|
|||
class Net(Cell):
|
||||
def __init__(self, mul_weight, strategy1=None, strategy2=None):
|
||||
super().__init__()
|
||||
self.mul = P.Mul().set_strategy(strategy1)
|
||||
self.loss = P.SigmoidCrossEntropyWithLogits().set_strategy(strategy2)
|
||||
self.mul = P.Mul().shard(strategy1)
|
||||
self.loss = P.SigmoidCrossEntropyWithLogits().shard(strategy2)
|
||||
self.mul_weight = Parameter(mul_weight, "w1")
|
||||
|
||||
def construct(self, x, b):
|
||||
|
|
|
@ -29,7 +29,7 @@ grad_all = C.GradOperation(get_all=True)
|
|||
class NetWithLoss(nn.Cell):
|
||||
def __init__(self, network, strategy3=None):
|
||||
super(NetWithLoss, self).__init__()
|
||||
self.loss = P.SoftmaxCrossEntropyWithLogits().set_strategy(strategy3)
|
||||
self.loss = P.SoftmaxCrossEntropyWithLogits().shard(strategy3)
|
||||
self.network = network
|
||||
|
||||
def construct(self, x, y, b):
|
||||
|
@ -55,8 +55,8 @@ def test_softmax_cross_entropy_loss():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul(transpose_b=True).set_strategy(strategy1)
|
||||
self.gelu = P.Gelu().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul(transpose_b=True).shard(strategy1)
|
||||
self.gelu = P.Gelu().shard(strategy2)
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -80,8 +80,8 @@ def test_softmax_cross_entropy_loss_repeated_calculation():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul(transpose_b=True).set_strategy(strategy1)
|
||||
self.gelu = P.Gelu().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul(transpose_b=True).shard(strategy1)
|
||||
self.gelu = P.Gelu().shard(strategy2)
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.matmul(x, y)
|
||||
|
|
|
@ -74,7 +74,7 @@ def test_bprop_with_sparse_feature_mirror():
|
|||
shape = [8, 8]
|
||||
self.index = Tensor(np.ones(shape), dtype=ms.int32)
|
||||
self.embeddinglookup = nn.EmbeddingLookup(64, 64, param_init='ones')
|
||||
self.embeddinglookup.embeddinglookup.set_strategy(((1, 1), (8, 1)))
|
||||
self.embeddinglookup.embeddinglookup.shard(((1, 1), (8, 1)))
|
||||
|
||||
def construct(self, x, b):
|
||||
out = self.embeddinglookup(self.index)
|
||||
|
|
|
@ -52,8 +52,8 @@ class Net(nn.Cell):
|
|||
super().__init__()
|
||||
if shape is None:
|
||||
shape = [64, 64]
|
||||
self.gatherv2 = P.SparseGatherV2().set_strategy(strategy1).add_prim_attr("primitive_target", target)
|
||||
self.mul = P.Mul().set_strategy(strategy2)
|
||||
self.gatherv2 = P.SparseGatherV2().shard(strategy1).add_prim_attr("primitive_target", target)
|
||||
self.mul = P.Mul().shard(strategy2)
|
||||
self.index = Tensor(np.ones(shape), dtype=ms.int32)
|
||||
self.axis = axis
|
||||
|
||||
|
|
|
@ -76,8 +76,8 @@ def test_no_grad():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy2)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.matmul2 = P.MatMul().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul1(x, y)
|
||||
|
@ -101,8 +101,8 @@ def test_grad_sens_parameter_type():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy2)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.matmul2 = P.MatMul().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul1(x, y)
|
||||
|
@ -133,8 +133,8 @@ def test_grad_sens_tensor_type():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy2)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.matmul2 = P.MatMul().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul1(x, y)
|
||||
|
@ -158,8 +158,8 @@ def test_grad_sens_scalar_broadcast():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy0, strategy1):
|
||||
super().__init__()
|
||||
self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1)
|
||||
self.fc_nobias = P.MatMul(transpose_b=True).shard(strategy0)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy1)
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.fc_nobias(x, y)
|
||||
|
|
|
@ -24,9 +24,9 @@ from mindspore.ops import operations as P
|
|||
class Net(Cell):
|
||||
def __init__(self, mul_weight, strategy1=None, strategy2=None):
|
||||
super(Net, self).__init__()
|
||||
self.mul = P.Mul().set_strategy(strategy1)
|
||||
self.square = P.Square().set_strategy(strategy2)
|
||||
self.mul2 = P.Mul().set_strategy(strategy1)
|
||||
self.mul = P.Mul().shard(strategy1)
|
||||
self.square = P.Square().shard(strategy2)
|
||||
self.mul2 = P.Mul().shard(strategy1)
|
||||
self.mul_weight = Parameter(mul_weight, "w1")
|
||||
|
||||
def construct(self, x, b):
|
||||
|
|
|
@ -24,8 +24,8 @@ from mindspore.ops import operations as P
|
|||
class Net(Cell):
|
||||
def __init__(self, strategy1=None, strategy2=None, axis=()):
|
||||
super().__init__()
|
||||
self.squeeze = P.Squeeze(axis=axis).set_strategy(strategy1)
|
||||
self.mul = P.Mul().set_strategy(strategy2)
|
||||
self.squeeze = P.Squeeze(axis=axis).shard(strategy1)
|
||||
self.mul = P.Mul().shard(strategy2)
|
||||
|
||||
def construct(self, x, b):
|
||||
out = self.squeeze(x)
|
||||
|
|
|
@ -51,10 +51,10 @@ def test_two_matmul():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3, strategy4):
|
||||
super().__init__()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy2)
|
||||
self.matmul3 = P.MatMul().set_strategy(strategy3)
|
||||
self.matmul4 = P.MatMul().set_strategy(strategy4)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.matmul2 = P.MatMul().shard(strategy2)
|
||||
self.matmul3 = P.MatMul().shard(strategy3)
|
||||
self.matmul4 = P.MatMul().shard(strategy4)
|
||||
|
||||
def construct(self, x, y, b, a):
|
||||
out = self.matmul1(x, y)
|
||||
|
|
|
@ -51,12 +51,12 @@ def test_six_matmul_save():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3, strategy4, strategy5, strategy6):
|
||||
super().__init__()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy2)
|
||||
self.matmul3 = P.MatMul().set_strategy(strategy3)
|
||||
self.matmul4 = P.MatMul().set_strategy(strategy4)
|
||||
self.matmul5 = P.MatMul().set_strategy(strategy5)
|
||||
self.matmul6 = P.MatMul().set_strategy(strategy6)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.matmul2 = P.MatMul().shard(strategy2)
|
||||
self.matmul3 = P.MatMul().shard(strategy3)
|
||||
self.matmul4 = P.MatMul().shard(strategy4)
|
||||
self.matmul5 = P.MatMul().shard(strategy5)
|
||||
self.matmul6 = P.MatMul().shard(strategy6)
|
||||
self.weight1 = Parameter(Tensor(np.ones([32, 64]), dtype=ms.float32), name="weight1")
|
||||
self.weight2 = Parameter(Tensor(np.ones([64, 64]), dtype=ms.float32), name="weight2")
|
||||
self.weight3 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight3")
|
||||
|
@ -113,12 +113,12 @@ def test_six_matmul_load():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy3, strategy4, strategy5, strategy6, strategy7):
|
||||
super().__init__()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul3 = P.MatMul().set_strategy(strategy3)
|
||||
self.matmul4 = P.MatMul().set_strategy(strategy4)
|
||||
self.matmul5 = P.MatMul().set_strategy(strategy5)
|
||||
self.matmul6 = P.MatMul().set_strategy(strategy6)
|
||||
self.matmul7 = P.MatMul().set_strategy(strategy7)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.matmul3 = P.MatMul().shard(strategy3)
|
||||
self.matmul4 = P.MatMul().shard(strategy4)
|
||||
self.matmul5 = P.MatMul().shard(strategy5)
|
||||
self.matmul6 = P.MatMul().shard(strategy6)
|
||||
self.matmul7 = P.MatMul().shard(strategy7)
|
||||
self.weight1 = Parameter(Tensor(np.ones([32, 64]), dtype=ms.float32), name="weight1")
|
||||
self.weight3 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight3")
|
||||
self.weight4 = Parameter(Tensor(np.ones([128, 64]), dtype=ms.float32), name="weight4")
|
||||
|
@ -231,10 +231,10 @@ def test_six_matmul_load_auto():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy3, strategy4, strategy5):
|
||||
super().__init__()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul3 = P.MatMul().set_strategy(strategy3)
|
||||
self.matmul4 = P.MatMul().set_strategy(strategy4)
|
||||
self.matmul5 = P.MatMul().set_strategy(strategy5)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.matmul3 = P.MatMul().shard(strategy3)
|
||||
self.matmul4 = P.MatMul().shard(strategy4)
|
||||
self.matmul5 = P.MatMul().shard(strategy5)
|
||||
self.matmul6 = P.MatMul()
|
||||
self.matmul7 = P.MatMul()
|
||||
self.weight1 = Parameter(Tensor(np.ones([32, 64]), dtype=ms.float32), name="weight1")
|
||||
|
|
|
@ -25,8 +25,8 @@ from mindspore.ops import operations as P
|
|||
class Net(Cell):
|
||||
def __init__(self, weight, w2, begin, end, strides, strategy1=None, strategy2=None, is_parameter=True, mask=0):
|
||||
super().__init__()
|
||||
self.mul = P.Mul().set_strategy(strategy1)
|
||||
self.strided_slice = P.StridedSlice(begin_mask=mask).set_strategy(strategy2)
|
||||
self.mul = P.Mul().shard(strategy1)
|
||||
self.strided_slice = P.StridedSlice(begin_mask=mask).shard(strategy2)
|
||||
if is_parameter:
|
||||
self.weight = Parameter(weight, "w1")
|
||||
else:
|
||||
|
@ -47,8 +47,8 @@ class Net(Cell):
|
|||
class Net2(Cell):
|
||||
def __init__(self, weight2, begin, end, strides, strategy1=None, strategy2=None):
|
||||
super().__init__()
|
||||
self.mul = P.Mul().set_strategy(strategy1)
|
||||
self.strided_slice = P.StridedSlice().set_strategy(strategy2)
|
||||
self.mul = P.Mul().shard(strategy1)
|
||||
self.strided_slice = P.StridedSlice().shard(strategy2)
|
||||
self.weight2 = Parameter(weight2, "w2")
|
||||
self.begin = begin
|
||||
self.end = end
|
||||
|
|
|
@ -44,8 +44,8 @@ def test_sum_as_loss():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy0, strategy1):
|
||||
super().__init__()
|
||||
self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1)
|
||||
self.fc_nobias = P.MatMul(transpose_b=True).shard(strategy0)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy1)
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.fc_nobias(x, y)
|
||||
|
@ -67,8 +67,8 @@ def test_sum_as_loss2():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy0, strategy1):
|
||||
super().__init__()
|
||||
self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1)
|
||||
self.fc_nobias = P.MatMul(transpose_b=True).shard(strategy0)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy1)
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.fc_nobias(x, y)
|
||||
|
|
|
@ -24,8 +24,8 @@ from mindspore.ops import operations as P
|
|||
class Net(Cell):
|
||||
def __init__(self, weight, weight2, strategy1=None, strategy2=None, is_parameter=True):
|
||||
super().__init__()
|
||||
self.mul = P.Mul().set_strategy(strategy1)
|
||||
self.tile = P.Tile().set_strategy(strategy2)
|
||||
self.mul = P.Mul().shard(strategy1)
|
||||
self.tile = P.Tile().shard(strategy2)
|
||||
if is_parameter:
|
||||
self.weight = Parameter(weight, "w1")
|
||||
else:
|
||||
|
@ -43,8 +43,8 @@ class Net(Cell):
|
|||
class Net2(Cell):
|
||||
def __init__(self, weight2, strategy1=None, strategy2=None):
|
||||
super().__init__()
|
||||
self.mul = P.Mul().set_strategy(strategy1)
|
||||
self.tile = P.Tile().set_strategy(strategy2)
|
||||
self.mul = P.Mul().shard(strategy1)
|
||||
self.tile = P.Tile().shard(strategy2)
|
||||
self.weight2 = Parameter(weight2, "w2")
|
||||
|
||||
def construct(self, x, b):
|
||||
|
|
|
@ -24,8 +24,8 @@ from mindspore.ops import operations as P
|
|||
class Net(Cell):
|
||||
def __init__(self, mul_weight, strategy1=None, strategy2=None):
|
||||
super().__init__()
|
||||
self.mul = P.Mul().set_strategy(strategy1)
|
||||
self.neg = P.Neg().set_strategy(strategy2)
|
||||
self.mul = P.Mul().shard(strategy1)
|
||||
self.neg = P.Neg().shard(strategy2)
|
||||
self.mul_weight = Parameter(mul_weight, "w1")
|
||||
|
||||
def construct(self, x, b):
|
||||
|
@ -38,7 +38,7 @@ class EvalNet(Cell):
|
|||
def __init__(self, network, strategy2=None):
|
||||
super().__init__()
|
||||
self.network = network
|
||||
self.relu = P.ReLU().set_strategy(strategy2)
|
||||
self.relu = P.ReLU().shard(strategy2)
|
||||
|
||||
def construct(self, x, b):
|
||||
out = self.network(x, b)
|
||||
|
|
|
@ -50,10 +50,10 @@ class Dataset(MindData):
|
|||
class TransposeNet(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super(TransposeNet, self).__init__()
|
||||
self.matmul = P.MatMul().set_strategy(((8, 1), (1, 1)))
|
||||
self.matmul = P.MatMul().shard(((8, 1), (1, 1)))
|
||||
self.matmul_weight = Parameter(Tensor(np.ones([128, 256]), dtype=ms.float32), name="weight")
|
||||
self.transpose1 = P.Transpose().set_strategy(strategy1)
|
||||
self.transpose2 = P.Transpose().set_strategy(strategy2)
|
||||
self.transpose1 = P.Transpose().shard(strategy1)
|
||||
self.transpose2 = P.Transpose().shard(strategy2)
|
||||
|
||||
def construct(self, x):
|
||||
x = self.matmul(x, self.matmul_weight)
|
||||
|
@ -81,7 +81,7 @@ def transpose_common(strategy1, strategy2):
|
|||
net = transpose_net(strategy1, strategy2)
|
||||
|
||||
loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
|
||||
loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1)))
|
||||
loss.softmax_cross_entropy.shard(((8, 1), (8, 1)))
|
||||
opt = Momentum(net.trainable_params(), learning_rate, momentum)
|
||||
context.set_context(mode=context.GRAPH_MODE)
|
||||
model = Model(net, loss, opt)
|
||||
|
|
|
@ -57,8 +57,8 @@ def test_two_matmul():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy2)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.matmul2 = P.MatMul().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul1(x, y)
|
||||
|
@ -82,8 +82,8 @@ def test_two_matmul_repeated_calculation1():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy2)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.matmul2 = P.MatMul().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul1(x, y)
|
||||
|
@ -106,8 +106,8 @@ def test_two_matmul_repeated_calculation2():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy2)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.matmul2 = P.MatMul().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul1(x, y)
|
||||
|
@ -130,9 +130,9 @@ def test_matmul_forward_reduce_scatter():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.matmul.add_prim_attr("forward_reduce_scatter", True)
|
||||
self.mul = P.Mul().set_strategy(strategy2)
|
||||
self.mul = P.Mul().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
@ -155,9 +155,9 @@ def test_matmul_forward_reduce_scatter_transpose():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2):
|
||||
super().__init__()
|
||||
self.matmul = P.MatMul(transpose_b=True).set_strategy(strategy1)
|
||||
self.matmul = P.MatMul(transpose_b=True).shard(strategy1)
|
||||
self.matmul.add_prim_attr("forward_reduce_scatter", True)
|
||||
self.mul = P.Mul().set_strategy(strategy2)
|
||||
self.mul = P.Mul().shard(strategy2)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.matmul(x, y)
|
||||
|
|
|
@ -29,7 +29,7 @@ grad_by_list = C.GradOperation(get_by_list=True)
|
|||
class NetWithLoss(nn.Cell):
|
||||
def __init__(self, network, strategy3):
|
||||
super(NetWithLoss, self).__init__()
|
||||
self.loss = P.SoftmaxCrossEntropyWithLogits().set_strategy(strategy3)
|
||||
self.loss = P.SoftmaxCrossEntropyWithLogits().shard(strategy3)
|
||||
self.network = network
|
||||
|
||||
def construct(self, x, b):
|
||||
|
@ -55,8 +55,8 @@ def test_two_weights_parameter():
|
|||
super().__init__()
|
||||
self.weight = Parameter(weight, "w1", requires_grad=True)
|
||||
self.weight2 = Parameter(weight2, "w2", requires_grad=True)
|
||||
self.matmul = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy2)
|
||||
self.matmul = P.MatMul().shard(strategy1)
|
||||
self.matmul2 = P.MatMul().shard(strategy2)
|
||||
|
||||
def construct(self, x):
|
||||
out = self.matmul(x, self.weight)
|
||||
|
|
|
@ -54,10 +54,10 @@ def test_virtual_dataset_3_input():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy0, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.virtual_dataset = _VirtualDataset().set_strategy(strategy0)
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy2)
|
||||
self.gelu = P.Gelu().set_strategy(strategy3)
|
||||
self.virtual_dataset = _VirtualDataset().shard(strategy0)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.matmul2 = P.MatMul().shard(strategy2)
|
||||
self.gelu = P.Gelu().shard(strategy3)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
x, y, b = self.virtual_dataset(x, y, b)
|
||||
|
@ -83,9 +83,9 @@ def test_virtualdataset_cell_3_inputs():
|
|||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, strategy3):
|
||||
super().__init__()
|
||||
self.matmul1 = P.MatMul().set_strategy(strategy1)
|
||||
self.matmul2 = P.MatMul().set_strategy(strategy2)
|
||||
self.gelu = P.Gelu().set_strategy(strategy3)
|
||||
self.matmul1 = P.MatMul().shard(strategy1)
|
||||
self.matmul2 = P.MatMul().shard(strategy2)
|
||||
self.gelu = P.Gelu().shard(strategy3)
|
||||
|
||||
def construct(self, x, y, b):
|
||||
out = self.gelu(self.matmul1(x, y))
|
||||
|
|
Loading…
Reference in New Issue