diff --git a/mindspore/nn/layer/conv.py b/mindspore/nn/layer/conv.py index 790bbe8e723..9cc8bce3c25 100644 --- a/mindspore/nn/layer/conv.py +++ b/mindspore/nn/layer/conv.py @@ -567,8 +567,8 @@ class Conv2dTranspose(_Conv): else: self.padding_top, self.padding_bottom, self.padding_left, self.padding_right = self.padding - def set_strategy(self, strategy): - self.conv2d_transpose.set_strategy(strategy) + def shard(self, strategy): + self.conv2d_transpose.shard(strategy) return self def _deconv_output_length(self, input_length, filter_size, stride_size, dilation_size, padding): @@ -744,8 +744,8 @@ class Conv1dTranspose(_Conv): self.expand_dims = P.ExpandDims() self.squeeze = P.Squeeze(2) - def set_strategy(self, strategy): - self.conv2d_transpose.set_strategy(strategy) + def shard(self, strategy): + self.conv2d_transpose.shard(strategy) return self def _deconv_output_length(self, input_length, filter_size, stride_size, dilation_size, padding): diff --git a/mindspore/nn/layer/embedding.py b/mindspore/nn/layer/embedding.py index 3d3f622f148..d148c685670 100755 --- a/mindspore/nn/layer/embedding.py +++ b/mindspore/nn/layer/embedding.py @@ -174,17 +174,17 @@ class EmbeddingLookup(Cell): Validator.check_integer('manul shape dim', dim, 0, Rel.GT, self.cls_name) self.gatherv2.add_prim_attr("manual_split", manual_shapes) self.embeddinglookup.add_prim_attr("manual_split", manual_shapes) - self.gatherv2.set_strategy(((get_group_size(), 1), (1, get_group_size()))) - self.embeddinglookup.set_strategy(((get_group_size(), 1), (1, get_group_size()))) + self.gatherv2.shard(((get_group_size(), 1), (1, get_group_size()))) + self.embeddinglookup.shard(((get_group_size(), 1), (1, get_group_size()))) elif slice_mode == "table_row_slice" and is_auto_parallel: - self.gatherv2.set_strategy(((get_group_size(), 1), (1, 1))) - self.embeddinglookup.set_strategy(((get_group_size(), 1), (1, 1))) + self.gatherv2.shard(((get_group_size(), 1), (1, 1))) + self.embeddinglookup.shard(((get_group_size(), 1), (1, 1))) elif slice_mode == "table_column_slice" and is_auto_parallel: - self.gatherv2.set_strategy(((1, get_group_size()), (1, 1))) - self.embeddinglookup.set_strategy(((1, get_group_size()), (1, 1))) + self.gatherv2.shard(((1, get_group_size()), (1, 1))) + self.embeddinglookup.shard(((1, get_group_size()), (1, 1))) elif slice_mode == "batch_slice" and is_auto_parallel: - self.gatherv2.set_strategy(((1, 1), (get_group_size(), 1))) - self.embeddinglookup.set_strategy(((1, 1), (get_group_size(), 1))) + self.gatherv2.shard(((1, 1), (get_group_size(), 1))) + self.embeddinglookup.shard(((1, 1), (get_group_size(), 1))) else: if is_auto_parallel: raise ValueError("slice_mode should support mode in nn.EmbeddingLookup, but get " diff --git a/mindspore/nn/layer/normalization.py b/mindspore/nn/layer/normalization.py index e777d3a4b25..6ca0c0ccdd0 100644 --- a/mindspore/nn/layer/normalization.py +++ b/mindspore/nn/layer/normalization.py @@ -112,12 +112,12 @@ class _BatchNorm(Cell): data_parallel_strategy = ((1,), (1,)) data_parallel_strategy_one = ((1,), ()) - self.sub_mean = P.Sub().set_strategy(data_parallel_strategy) - self.sub_var = P.Sub().set_strategy(data_parallel_strategy) - self.mul_mean = P.Mul().set_strategy(data_parallel_strategy_one) - self.mul_var = P.Mul().set_strategy(data_parallel_strategy_one) - self.assign_sub_mean = P.AssignSub().set_strategy(data_parallel_strategy) - self.assign_sub_var = P.AssignSub().set_strategy(data_parallel_strategy) + self.sub_mean = P.Sub().shard(data_parallel_strategy) + self.sub_var = P.Sub().shard(data_parallel_strategy) + self.mul_mean = P.Mul().shard(data_parallel_strategy_one) + self.mul_var = P.Mul().shard(data_parallel_strategy_one) + self.assign_sub_mean = P.AssignSub().shard(data_parallel_strategy) + self.assign_sub_var = P.AssignSub().shard(data_parallel_strategy) def _check_data_dim(self, x): raise NotImplementedError diff --git a/mindspore/ops/primitive.py b/mindspore/ops/primitive.py index 2c6af7d0b51..624848cf219 100644 --- a/mindspore/ops/primitive.py +++ b/mindspore/ops/primitive.py @@ -102,7 +102,7 @@ class Primitive(Primitive_): self.add_attr(name, value) return self - def set_strategy(self, strategy): + def shard(self, strategy): """ Add strategies to primitive attribute. diff --git a/model_zoo/official/recommend/wide_and_deep/src/wide_and_deep.py b/model_zoo/official/recommend/wide_and_deep/src/wide_and_deep.py index 864721ca1cf..d231e456ce6 100644 --- a/model_zoo/official/recommend/wide_and_deep/src/wide_and_deep.py +++ b/model_zoo/official/recommend/wide_and_deep/src/wide_and_deep.py @@ -198,14 +198,14 @@ class WideDeepModel(nn.Cell): self.concat = P.Concat(axis=1) self.cast = P.Cast() if is_auto_parallel and host_device_mix and not is_field_slice: - self.dense_layer_1.dropout.dropout_do_mask.set_strategy(((1, get_group_size()),)) - self.dense_layer_1.dropout.dropout.set_strategy(((1, get_group_size()),)) - self.dense_layer_1.matmul.set_strategy(((1, get_group_size()), (get_group_size(), 1))) + self.dense_layer_1.dropout.dropout_do_mask.shard(((1, get_group_size()),)) + self.dense_layer_1.dropout.dropout.shard(((1, get_group_size()),)) + self.dense_layer_1.matmul.shard(((1, get_group_size()), (get_group_size(), 1))) self.deep_embeddinglookup = nn.EmbeddingLookup(self.vocab_size, self.emb_dim, slice_mode=nn.EmbeddingLookup.TABLE_COLUMN_SLICE) self.wide_embeddinglookup = nn.EmbeddingLookup(self.vocab_size, 1, slice_mode=nn.EmbeddingLookup.TABLE_ROW_SLICE) - self.deep_mul.set_strategy(((1, 1, get_group_size()), (1, 1, 1))) + self.deep_mul.shard(((1, 1, get_group_size()), (1, 1, 1))) self.deep_reshape.add_prim_attr("skip_redistribution", True) self.reduce_sum.add_prim_attr("cross_batch", True) self.embedding_table = self.deep_embeddinglookup.embedding_table @@ -217,12 +217,12 @@ class WideDeepModel(nn.Cell): self.wide_embeddinglookup = nn.EmbeddingLookup(self.vocab_size, 1, slice_mode=nn.EmbeddingLookup.FIELD_SLICE, manual_shapes=manual_shapes) - self.deep_mul.set_strategy(((1, get_group_size(), 1), (1, get_group_size(), 1))) - self.wide_mul.set_strategy(((1, get_group_size(), 1), (1, get_group_size(), 1))) - self.reduce_sum.set_strategy(((1, get_group_size(), 1),)) - self.dense_layer_1.dropout.dropout_do_mask.set_strategy(((1, get_group_size()),)) - self.dense_layer_1.dropout.dropout.set_strategy(((1, get_group_size()),)) - self.dense_layer_1.matmul.set_strategy(((1, get_group_size()), (get_group_size(), 1))) + self.deep_mul.shard(((1, get_group_size(), 1), (1, get_group_size(), 1))) + self.wide_mul.shard(((1, get_group_size(), 1), (1, get_group_size(), 1))) + self.reduce_sum.shard(((1, get_group_size(), 1),)) + self.dense_layer_1.dropout.dropout_do_mask.shard(((1, get_group_size()),)) + self.dense_layer_1.dropout.dropout.shard(((1, get_group_size()),)) + self.dense_layer_1.matmul.shard(((1, get_group_size()), (get_group_size(), 1))) self.embedding_table = self.deep_embeddinglookup.embedding_table elif parameter_server: self.deep_embeddinglookup = nn.EmbeddingLookup(self.vocab_size, self.emb_dim) diff --git a/tests/st/auto_parallel/onehot_model_parallel.py b/tests/st/auto_parallel/onehot_model_parallel.py index 8351f3e1f49..977bab5876b 100644 --- a/tests/st/auto_parallel/onehot_model_parallel.py +++ b/tests/st/auto_parallel/onehot_model_parallel.py @@ -51,12 +51,12 @@ class Onehot(Cell): trans_stra = None if strategy: trans_stra = (strategy[0],) - self.onehot = P.OneHot().set_strategy(strategy=strategy) + self.onehot = P.OneHot().shard(strategy=strategy) self.depth = depth self.on_value = Tensor(on_value, ms.float32) self.off_value = Tensor(off_value, ms.float32) - self.transpose = P.Transpose().set_strategy(strategy=trans_stra) - self.sub = P.Sub().set_strategy(strategy=((1, 1), (1, 1))) + self.transpose = P.Transpose().shard(strategy=trans_stra) + self.sub = P.Sub().shard(strategy=((1, 1), (1, 1))) self.axis = axis def construct(self, input_, indices): diff --git a/tests/st/auto_parallel/soft_entropy_loss_expand_parallel.py b/tests/st/auto_parallel/soft_entropy_loss_expand_parallel.py index 90637fda158..f57ccd94c87 100644 --- a/tests/st/auto_parallel/soft_entropy_loss_expand_parallel.py +++ b/tests/st/auto_parallel/soft_entropy_loss_expand_parallel.py @@ -140,20 +140,20 @@ class SoftmaxCrossEntropyExpand(Cell): if len(stra_list) < 11: stra_list = [None] * 11 self.exp = P.Exp() - self.reduce_sum = P.ReduceSum(keep_dims=True).set_strategy(strategy=stra_list[1]) - self.onehot = P.OneHot().set_strategy(strategy=stra_list[2]) + self.reduce_sum = P.ReduceSum(keep_dims=True).shard(strategy=stra_list[1]) + self.onehot = P.OneHot().shard(strategy=stra_list[2]) self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) - self.div = P.Div().set_strategy(strategy=stra_list[3]) - self.log = P.Log().set_strategy(strategy=stra_list[4]) - self.sum_cross_entropy = P.ReduceSum(keep_dims=False).set_strategy(strategy=stra_list[5]) - self.mul = P.Mul().set_strategy(strategy=stra_list[6]) - self.mul2 = P.Mul().set_strategy(strategy=stra_list[7]) + self.div = P.Div().shard(strategy=stra_list[3]) + self.log = P.Log().shard(strategy=stra_list[4]) + self.sum_cross_entropy = P.ReduceSum(keep_dims=False).shard(strategy=stra_list[5]) + self.mul = P.Mul().shard(strategy=stra_list[6]) + self.mul2 = P.Mul().shard(strategy=stra_list[7]) self.cast = P.Cast() - self.reduce_mean = P.ReduceMean(keep_dims=False).set_strategy(strategy=stra_list[8]) + self.reduce_mean = P.ReduceMean(keep_dims=False).shard(strategy=stra_list[8]) self.sparse = sparse - self.reduce_max = P.ReduceMax(keep_dims=True).set_strategy(strategy=stra_list[9]) - self.sub = P.Sub().set_strategy(strategy=stra_list[10]) + self.reduce_max = P.ReduceMax(keep_dims=True).shard(strategy=stra_list[9]) + self.sub = P.Sub().shard(strategy=stra_list[10]) def construct(self, logit, label): logit_max = self.reduce_max(logit, -1) @@ -174,7 +174,7 @@ class MatmulNet(Cell): super(MatmulNet, self).__init__() if loss_stra_list is None: loss_stra_list = [] - self.matmul = P.MatMul(transpose_b=True).set_strategy(strategy=matmul_stra) + self.matmul = P.MatMul(transpose_b=True).shard(strategy=matmul_stra) self.loss = SoftmaxCrossEntropyExpand(sparse=True, stra_list=loss_stra_list) self.weight = Parameter(Tensor(np.ones(MatmulParamShape), dtype=ms.float32), name="weight") diff --git a/tests/st/model_zoo_tests/wide_and_deep/python_file_for_ci/wide_and_deep.py b/tests/st/model_zoo_tests/wide_and_deep/python_file_for_ci/wide_and_deep.py index 3043055f2a5..09bb2a1e117 100644 --- a/tests/st/model_zoo_tests/wide_and_deep/python_file_for_ci/wide_and_deep.py +++ b/tests/st/model_zoo_tests/wide_and_deep/python_file_for_ci/wide_and_deep.py @@ -181,7 +181,7 @@ class WideDeepModel(nn.Cell): self.weight_bias_init, self.deep_layer_act, convert_dtype=True) - self.gather_v2 = P.GatherV2().set_strategy(((1, 8), (1, 1))) + self.gather_v2 = P.GatherV2().shard(((1, 8), (1, 1))) self.gather_v2_1 = P.GatherV2() self.mul = P.Mul() self.reduce_sum = P.ReduceSum(keep_dims=False) @@ -230,7 +230,7 @@ class NetWithLossClass(nn.Cell): self.network = network self.l2_coef = config.l2_coef self.loss = P.SigmoidCrossEntropyWithLogits() - self.square = P.Square().set_strategy(((1, get_group_size()),)) + self.square = P.Square().shard(((1, get_group_size()),)) self.reduceMean_false = P.ReduceMean(keep_dims=False) self.reduceSum_false = P.ReduceSum(keep_dims=False) diff --git a/tests/st/networks/models/deeplabv3/src/backbone/resnet_deeplab.py b/tests/st/networks/models/deeplabv3/src/backbone/resnet_deeplab.py index 1dda6fe746d..d9348de76d7 100644 --- a/tests/st/networks/models/deeplabv3/src/backbone/resnet_deeplab.py +++ b/tests/st/networks/models/deeplabv3/src/backbone/resnet_deeplab.py @@ -273,8 +273,8 @@ class DepthwiseConv2dNative(_DepthwiseConv2dNative): dilation=self.dilation, group=self.group) - def set_strategy(self, strategy): - self.depthwise_conv2d_native.set_strategy(strategy) + def shard(self, strategy): + self.depthwise_conv2d_native.shard(strategy) return self def construct(self, x): diff --git a/tests/ut/python/parallel/test_add_relu_redistribution.py b/tests/ut/python/parallel/test_add_relu_redistribution.py index 894c29a3404..ac88592399f 100644 --- a/tests/ut/python/parallel/test_add_relu_redistribution.py +++ b/tests/ut/python/parallel/test_add_relu_redistribution.py @@ -29,8 +29,8 @@ grad_all = C.GradOperation(get_all=True) class AddRelu(nn.Cell): def __init__(self, strategy0=None, strategy1=None): super(AddRelu, self).__init__() - self.add = P.TensorAdd().set_strategy(strategy=strategy0) - self.relu = P.ReLU().set_strategy(strategy=strategy1) + self.add = P.TensorAdd().shard(strategy=strategy0) + self.relu = P.ReLU().shard(strategy=strategy1) def construct(self, x, z): out = self.add(x, z) diff --git a/tests/ut/python/parallel/test_alltoall.py b/tests/ut/python/parallel/test_alltoall.py index bdf408142af..203cb036fbc 100644 --- a/tests/ut/python/parallel/test_alltoall.py +++ b/tests/ut/python/parallel/test_alltoall.py @@ -53,9 +53,9 @@ class Dataset(MindData): class AllToAllNet(nn.Cell): def __init__(self, strategy1): super(AllToAllNet, self).__init__() - self.matmul = P.MatMul().set_strategy(((1, 1), (1, 8))) + self.matmul = P.MatMul().shard(((1, 1), (1, 8))) self.matmul_weight = Parameter(Tensor(np.ones([128, 256]), dtype=ms.float32), name="weight") - self.transpose1 = P.Transpose().set_strategy(strategy1) + self.transpose1 = P.Transpose().shard(strategy1) def construct(self, x): x = self.matmul(x, self.matmul_weight) @@ -80,8 +80,8 @@ def all_to_all_common(strategy1): net = all_to_all_net(strategy1) loss = SoftmaxCrossEntropyWithLogits(sparse=True) - loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1))) - loss.one_hot.set_strategy(((8, 1), (), ())) + loss.softmax_cross_entropy.shard(((8, 1), (8, 1))) + loss.one_hot.shard(((8, 1), (), ())) opt = Momentum(net.trainable_params(), learning_rate, momentum) model = Model(net, loss, opt) diff --git a/tests/ut/python/parallel/test_arithmetic.py b/tests/ut/python/parallel/test_arithmetic.py index bef6e042655..36e52a993ed 100644 --- a/tests/ut/python/parallel/test_arithmetic.py +++ b/tests/ut/python/parallel/test_arithmetic.py @@ -55,8 +55,8 @@ def test_matmul_sub(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.sub = P.Sub().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.sub = P.Sub().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -79,8 +79,8 @@ def test_matmul_add(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.add = P.TensorAdd().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.add = P.TensorAdd().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -103,8 +103,8 @@ def test_matmul_mul(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.mul = P.Mul().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.mul = P.Mul().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -126,8 +126,8 @@ def test_matmul_mod(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.mod = P.Mod().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.mod = P.Mod().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -149,8 +149,8 @@ def test_matmul_floormod(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.floormod = P.FloorMod().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.floormod = P.FloorMod().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -173,8 +173,8 @@ def test_matmul_atan2(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.atan2 = P.Atan2().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.atan2 = P.Atan2().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -197,8 +197,8 @@ def test_matmul_divNoNan(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.divNoNan = P.DivNoNan().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.divNoNan = P.DivNoNan().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -221,10 +221,10 @@ def test_matmul_logicaland(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.equal = P.Equal().set_strategy(strategy2) - self.notequal = P.NotEqual().set_strategy(strategy2) - self.logical = P.LogicalAnd().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.equal = P.Equal().shard(strategy2) + self.notequal = P.NotEqual().shard(strategy2) + self.logical = P.LogicalAnd().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -250,10 +250,10 @@ def test_matmul_logicalor(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.equal = P.Equal().set_strategy(strategy2) - self.notequal = P.NotEqual().set_strategy(strategy2) - self.logical = P.LogicalOr().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.equal = P.Equal().shard(strategy2) + self.notequal = P.NotEqual().shard(strategy2) + self.logical = P.LogicalOr().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -279,8 +279,8 @@ def test_matmul_div(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.div = P.Div().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.div = P.Div().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -303,8 +303,8 @@ def test_matmul_add_broadcast(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.add = P.TensorAdd().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.add = P.TensorAdd().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -327,8 +327,8 @@ def test_matmul_add_broadcast2(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.add = P.TensorAdd().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.add = P.TensorAdd().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -351,8 +351,8 @@ def test_matmul_sub_broadcast(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.sub = P.Sub().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.sub = P.Sub().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -375,8 +375,8 @@ def test_matmul_sub_broadcast2(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.sub = P.Sub().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.sub = P.Sub().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -399,8 +399,8 @@ def test_matmul_mul_broadcast(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.mul = P.Mul().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.mul = P.Mul().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -423,8 +423,8 @@ def test_matmul_mul_broadcast2(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.mul = P.Mul().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.mul = P.Mul().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -447,8 +447,8 @@ def test_matmul_div_broadcast(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.div = P.Div().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.div = P.Div().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -471,8 +471,8 @@ def test_matmul_div_broadcast2(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.div = P.Div().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.div = P.Div().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -495,8 +495,8 @@ def test_matmul_greater_broadcast(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.greater = P.Greater().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.greater = P.Greater().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -519,8 +519,8 @@ def test_matmul_greater_broadcast2(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.greater = P.Greater().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.greater = P.Greater().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -543,8 +543,8 @@ def test_matmul_floordiv(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.floordiv = P.FloorDiv().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.floordiv = P.FloorDiv().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -567,8 +567,8 @@ def test_matmul_floordiv_broadcast(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.floordiv = P.FloorDiv().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.floordiv = P.FloorDiv().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -591,8 +591,8 @@ def test_matmul_floordiv_broadcast2(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.floordiv = P.FloorDiv().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.floordiv = P.FloorDiv().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) diff --git a/tests/ut/python/parallel/test_attention.py b/tests/ut/python/parallel/test_attention.py index 7af99af2bd6..a484e1fd63e 100644 --- a/tests/ut/python/parallel/test_attention.py +++ b/tests/ut/python/parallel/test_attention.py @@ -60,18 +60,18 @@ class Net(nn.Cell): super().__init__() self.query_w = Parameter(initializer( "normal", [8, 16], ms.float32), name='query') - self.query = P.MatMul().set_strategy(strategy1) + self.query = P.MatMul().shard(strategy1) self.key_w = Parameter(initializer( "normal", [8, 16], ms.float32), name='key') - self.key = P.MatMul().set_strategy(strategy2) + self.key = P.MatMul().shard(strategy2) self.value_w = Parameter(initializer( "normal", [8, 16], ms.float32), name='value') - self.value = P.MatMul().set_strategy(strategy3) + self.value = P.MatMul().shard(strategy3) - self.score = P.MatMul().set_strategy(strategy4) - self.context = P.MatMul().set_strategy(strategy5) + self.score = P.MatMul().shard(strategy4) + self.context = P.MatMul().shard(strategy5) self.transpose1 = P.Transpose() self.transpose2 = P.Transpose() self.relu = P.ReLU() diff --git a/tests/ut/python/parallel/test_auto_parallel_activation.py b/tests/ut/python/parallel/test_auto_parallel_activation.py index 815411dc164..beaa047f2fa 100644 --- a/tests/ut/python/parallel/test_auto_parallel_activation.py +++ b/tests/ut/python/parallel/test_auto_parallel_activation.py @@ -24,8 +24,8 @@ from mindspore.ops import operations as P class Net(Cell): def __init__(self, mul_weight, strategy1=None, strategy2=None): super().__init__() - self.mul = P.Mul().set_strategy(strategy1) - self.sigmoid = P.Sigmoid().set_strategy(strategy2) + self.mul = P.Mul().shard(strategy1) + self.sigmoid = P.Sigmoid().shard(strategy2) self.mul_weight = Parameter(mul_weight, "w1") def construct(self, x, b): diff --git a/tests/ut/python/parallel/test_auto_parallel_onehot.py b/tests/ut/python/parallel/test_auto_parallel_onehot.py index 32ee1c99ed7..03fb233ae4e 100644 --- a/tests/ut/python/parallel/test_auto_parallel_onehot.py +++ b/tests/ut/python/parallel/test_auto_parallel_onehot.py @@ -107,7 +107,7 @@ def test_auto_parallel_arithmetic_model(): def __init__(self): super().__init__() self.matmul = P.MatMul() - self.one_hot = P.OneHot().set_strategy(((1, 8), (), ())) + self.one_hot = P.OneHot().shard(((1, 8), (), ())) self.on_value = Tensor(1.0, ms.float32) self.off_value = Tensor(0.0, ms.float32) self.matmul2 = P.MatMul() diff --git a/tests/ut/python/parallel/test_auto_parallel_partial_strategy.py b/tests/ut/python/parallel/test_auto_parallel_partial_strategy.py index 4aa2fe6b8d4..2606a7d302d 100644 --- a/tests/ut/python/parallel/test_auto_parallel_partial_strategy.py +++ b/tests/ut/python/parallel/test_auto_parallel_partial_strategy.py @@ -53,7 +53,7 @@ def test_four_matmul_linear(): class Net(nn.Cell): def __init__(self, strategy1): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) + self.matmul1 = P.MatMul().shard(strategy1) self.matmul2 = P.MatMul() self.matmul3 = P.MatMul() self.matmul4 = P.MatMul() diff --git a/tests/ut/python/parallel/test_auto_parallel_reshape.py b/tests/ut/python/parallel/test_auto_parallel_reshape.py index a54660bf6cb..5a384d874a9 100644 --- a/tests/ut/python/parallel/test_auto_parallel_reshape.py +++ b/tests/ut/python/parallel/test_auto_parallel_reshape.py @@ -298,7 +298,7 @@ def test_reshape_auto_7(): def __init__(self): super().__init__() self.reshape = P.Reshape() - self.mul = P.Mul().set_strategy(((1, 2, 4), (2, 4))) + self.mul = P.Mul().shard(((1, 2, 4), (2, 4))) self.mul_weight = Parameter(Tensor(np.ones([128, 96]), dtype=ms.float32), name="weight") def construct(self, x): diff --git a/tests/ut/python/parallel/test_auto_parallel_two_partial_matmul.py b/tests/ut/python/parallel/test_auto_parallel_two_partial_matmul.py index a05730aa5e6..aa0bfd126a3 100644 --- a/tests/ut/python/parallel/test_auto_parallel_two_partial_matmul.py +++ b/tests/ut/python/parallel/test_auto_parallel_two_partial_matmul.py @@ -53,7 +53,7 @@ def test_four_matmul_linear(): class Net(nn.Cell): def __init__(self, strategy1): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) + self.matmul1 = P.MatMul().shard(strategy1) self.weight = Parameter(Tensor(np.ones([512, 256]).astype(np.float32) * 0.01), "w", requires_grad=True) self.matmul2 = P.MatMul() diff --git a/tests/ut/python/parallel/test_batch_matmul.py b/tests/ut/python/parallel/test_batch_matmul.py index f49a9c322e2..87b5116348c 100644 --- a/tests/ut/python/parallel/test_batch_matmul.py +++ b/tests/ut/python/parallel/test_batch_matmul.py @@ -24,8 +24,8 @@ from mindspore.ops import operations as P class Net(Cell): def __init__(self, mul_weight, batch_matmul_weight, transpose_b=False, strategy1=None, strategy2=None): super().__init__() - self.mul = P.Mul().set_strategy(strategy1) - self.batch_matmul = P.BatchMatMul(transpose_b=transpose_b).set_strategy(strategy2) + self.mul = P.Mul().shard(strategy1) + self.batch_matmul = P.BatchMatMul(transpose_b=transpose_b).shard(strategy2) self.mul_weight = Parameter(mul_weight, "w1") self.batch_matmul_weight = Parameter(batch_matmul_weight, "w2") diff --git a/tests/ut/python/parallel/test_batch_parallel.py b/tests/ut/python/parallel/test_batch_parallel.py index db0c93dbf9f..962e0ca0f64 100644 --- a/tests/ut/python/parallel/test_batch_parallel.py +++ b/tests/ut/python/parallel/test_batch_parallel.py @@ -73,7 +73,7 @@ class NetConv(nn.Cell): has_bias, weight_init, bias_init) - self.conv.conv2d.set_strategy(strategy) + self.conv.conv2d.shard(strategy) def construct(self, input_x): return self.conv(input_x) @@ -84,9 +84,9 @@ def test_batch(): def __init__(self, strategy1, strategy2, strategy3): super().__init__() self.conv1 = NetConv(16, 8, (3, 3), bias_init='zeros', strategy=strategy1) - self.mul1 = P.Mul().set_strategy(strategy2) + self.mul1 = P.Mul().shard(strategy2) self.conv2 = NetConv(8, 64, (9, 9), bias_init='zeros', strategy=strategy1) - self.mul2 = P.Mul().set_strategy(strategy3) + self.mul2 = P.Mul().shard(strategy3) def construct(self, x, w1, w2): out1 = self.conv1(x) diff --git a/tests/ut/python/parallel/test_batchnorm_batch_parallel.py b/tests/ut/python/parallel/test_batchnorm_batch_parallel.py index 6be8967b558..4927c26bbd4 100644 --- a/tests/ut/python/parallel/test_batchnorm_batch_parallel.py +++ b/tests/ut/python/parallel/test_batchnorm_batch_parallel.py @@ -64,7 +64,7 @@ def conv7x7(in_channels, out_channels, stride=1, padding=0): conv = Conv2d(in_channels, out_channels, kernel_size=7, stride=stride, padding=padding, weight_init=weight, has_bias=False, pad_mode="same") - conv.conv2d.set_strategy(strategy_weight) + conv.conv2d.shard(strategy_weight) return conv @@ -86,7 +86,7 @@ def bn_with_initialize(out_channels): gamma = weight_variable_1(shape) bn = BatchNorm2d(out_channels, momentum=0.1, eps=0.0001, gamma_init=gamma, beta_init=beta, moving_mean_init=mean, moving_var_init=var) - bn.bn_train.set_strategy(strategy_bn) + bn.bn_train.shard(strategy_bn) return bn @@ -98,10 +98,10 @@ class ResNet(Cell): self.conv1 = conv7x7(3, 64, stride=2, padding=0) self.bn1 = bn_with_initialize(64) self.relu = ReLU() - self.relu.relu.set_strategy(strategy_no_weight) + self.relu.relu.shard(strategy_no_weight) self.maxpool = MaxPool2d(kernel_size=3, stride=2, pad_mode="same") self.reshape = P.Reshape() - self.matmul = P.MatMul().set_strategy(((8, 1), (1, 1))) + self.matmul = P.MatMul().shard(((8, 1), (1, 1))) self.matmul_weight = Parameter(Tensor(np.ones([200704, num_classes]), dtype=ms.float32), name="weight") def construct(self, x): @@ -135,7 +135,7 @@ def test_batchnorm_batch_parallel(): net = batchnorm_net(num_classes) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') - loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1))) + loss.softmax_cross_entropy.shard(((dev_num, 1), (dev_num, 1))) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) model = Model(net, loss, opt) diff --git a/tests/ut/python/parallel/test_batchnorm_ex_batch_parallel.py b/tests/ut/python/parallel/test_batchnorm_ex_batch_parallel.py index 4655ec89aca..249fe60350a 100644 --- a/tests/ut/python/parallel/test_batchnorm_ex_batch_parallel.py +++ b/tests/ut/python/parallel/test_batchnorm_ex_batch_parallel.py @@ -51,13 +51,13 @@ def test_two_matmul_batchnorm_ex(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) + self.matmul1 = P.MatMul().shard(strategy1) self.norm = P.FusedBatchNormEx() self.gamma = Parameter(Tensor(np.ones([64]), dtype=ms.float32), name="gamma") self.beta = Parameter(Tensor(np.ones([64]), dtype=ms.float32), name="beta") self.mean = Parameter(Tensor(np.ones([64]), dtype=ms.float32), name="mean") self.var = Parameter(Tensor(np.ones([64]), dtype=ms.float32), name="var") - self.matmul2 = P.MatMul().set_strategy(strategy2) + self.matmul2 = P.MatMul().shard(strategy2) def construct(self, x, y, b): out = self.matmul1(x, y) diff --git a/tests/ut/python/parallel/test_bias_add.py b/tests/ut/python/parallel/test_bias_add.py index 573efde125a..ce8d9ec3b8f 100644 --- a/tests/ut/python/parallel/test_bias_add.py +++ b/tests/ut/python/parallel/test_bias_add.py @@ -70,7 +70,7 @@ class Net(nn.Cell): super().__init__() self.conv = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=1, stride=1, pad_mode='valid', has_bias=True, weight_init='ones', bias_init='ones') - self.reduce_mean = P.ReduceMean(keep_dims=False).set_strategy(((1, 1, 1, 8),)) + self.reduce_mean = P.ReduceMean(keep_dims=False).shard(((1, 1, 1, 8),)) self.flat = nn.Flatten() def construct(self, inputs): diff --git a/tests/ut/python/parallel/test_bn_prelu_cell.py b/tests/ut/python/parallel/test_bn_prelu_cell.py index fbfe5a2b0b9..d94a79deb64 100644 --- a/tests/ut/python/parallel/test_bn_prelu_cell.py +++ b/tests/ut/python/parallel/test_bn_prelu_cell.py @@ -87,18 +87,18 @@ class FusedBatchNorm(nn.Cell): epsilon=self.eps) self.bn_infer = P.BatchNorm(is_training=False, epsilon=self.eps) - self.sub_mean = P.Sub().set_strategy(((1), (1))) - self.sub_var = P.Sub().set_strategy(((1), (1))) - self.mul_mean = P.Mul().set_strategy(((1,), ())) - self.mul_var = P.Mul().set_strategy(((1,), ())) - self.assign_sub_mean = P.AssignSub().set_strategy(((1,), (1,))) - self.assign_sub_var = P.AssignSub().set_strategy(((1), (1))) - self.sub_mean2 = P.Sub().set_strategy(((1), (1))) - self.sub_var2 = P.Sub().set_strategy(((1), (1))) + self.sub_mean = P.Sub().shard(((1), (1))) + self.sub_var = P.Sub().shard(((1), (1))) + self.mul_mean = P.Mul().shard(((1,), ())) + self.mul_var = P.Mul().shard(((1,), ())) + self.assign_sub_mean = P.AssignSub().shard(((1,), (1,))) + self.assign_sub_var = P.AssignSub().shard(((1), (1))) + self.sub_mean2 = P.Sub().shard(((1), (1))) + self.sub_var2 = P.Sub().shard(((1), (1))) - def set_strategy(self, strategy): - self.bn_train.set_strategy(strategy) - self.bn_infer.set_strategy(strategy) + def shard(self, strategy): + self.bn_train.shard(strategy) + self.bn_infer.shard(strategy) def _check_data_dim(self, x): raise NotImplementedError @@ -173,7 +173,7 @@ class PReLU(nn.Cell): w = Tensor(w) self.w = Parameter(initializer(w, [channel,]), name='a') self.prelu = P.PReLU() - self.relu = P.ReLU().set_strategy(((1))) + self.relu = P.ReLU().shard(((1))) def construct(self, x): self.w = self.relu(self.w) @@ -210,7 +210,7 @@ def bn_common(parallel_mode, train_flag, strategy_loss=None): net = bn_net() loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') - loss.softmax_cross_entropy.set_strategy(strategy_loss) + loss.softmax_cross_entropy.shard(strategy_loss) opt = Momentum(net.trainable_params(), learning_rate, momentum, 0.0001, 1024 * rank_size) if not train_flag: diff --git a/tests/ut/python/parallel/test_bool_grad.py b/tests/ut/python/parallel/test_bool_grad.py index 735f66bb6a1..5f31494f322 100644 --- a/tests/ut/python/parallel/test_bool_grad.py +++ b/tests/ut/python/parallel/test_bool_grad.py @@ -52,8 +52,8 @@ class CommonNet(nn.Cell): def __init__(self): super(CommonNet, self).__init__() self.weight = Parameter(Tensor(np.ones([256, 64]), dtype=ms.float32), name="mul_weight") - self.logicalnot = P.LogicalNot().set_strategy(((4, 2),)) - self.equal = P.Equal().set_strategy(((4, 2), (4, 2))) + self.logicalnot = P.LogicalNot().shard(((4, 2),)) + self.equal = P.Equal().shard(((4, 2), (4, 2))) def construct(self, x, label): x = self.equal(x, self.weight) diff --git a/tests/ut/python/parallel/test_comparison_function_info.py b/tests/ut/python/parallel/test_comparison_function_info.py index 014e71709eb..fc74d8ae46e 100644 --- a/tests/ut/python/parallel/test_comparison_function_info.py +++ b/tests/ut/python/parallel/test_comparison_function_info.py @@ -56,8 +56,8 @@ def test_matmul_equal(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.equal = P.Equal().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.equal = P.Equal().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -79,8 +79,8 @@ def test_matmul_not_equal(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.notequal = P.NotEqual().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.notequal = P.NotEqual().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -102,8 +102,8 @@ def test_matmul_approximateEqual(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.approximateEqual = P.ApproximateEqual(tolerance=0.5).set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.approximateEqual = P.ApproximateEqual(tolerance=0.5).shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -126,8 +126,8 @@ def test_matmul_greater(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.greater = P.Greater().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.greater = P.Greater().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -150,8 +150,8 @@ def test_matmul_greaterEqual(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.greaterEqual = P.GreaterEqual().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.greaterEqual = P.GreaterEqual().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -174,8 +174,8 @@ def test_matmul_less(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.less = P.Less().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.less = P.Less().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -198,8 +198,8 @@ def test_matmul_lessEqual(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.lessEqual = P.LessEqual().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.lessEqual = P.LessEqual().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -222,8 +222,8 @@ def test_matmul_not_equal_repeated_calculation(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.notequal = P.NotEqual().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.notequal = P.NotEqual().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -245,8 +245,8 @@ def test_matmul_maximum(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.maximum = P.Maximum().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.maximum = P.Maximum().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -268,8 +268,8 @@ def test_matmul_maximum_broadcast(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.maximum = P.Maximum().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.maximum = P.Maximum().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -291,8 +291,8 @@ def test_matmul_maximum_broadcast2(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.maximum = P.Maximum().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.maximum = P.Maximum().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -314,8 +314,8 @@ def test_matmul_minimum(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.minimum = P.Minimum().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.minimum = P.Minimum().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -337,8 +337,8 @@ def test_matmul_minimum_broadcast(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.minimum = P.Maximum().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.minimum = P.Maximum().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -360,8 +360,8 @@ def test_matmul_minimum_broadcast2(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.minimum = P.Minimum().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.minimum = P.Minimum().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) diff --git a/tests/ut/python/parallel/test_concat.py b/tests/ut/python/parallel/test_concat.py index 74521a0fe95..cb7875751e0 100644 --- a/tests/ut/python/parallel/test_concat.py +++ b/tests/ut/python/parallel/test_concat.py @@ -23,12 +23,12 @@ from mindspore.ops import operations as P class Net(Cell): def __init__(self, weight, weight2, strategy1=None, strategy2=None, is_parameter=True): super().__init__() - self.concat = P.Concat(axis=0).set_strategy(strategy1) + self.concat = P.Concat(axis=0).shard(strategy1) if is_parameter: self.weight = Parameter(weight, "w1") else: self.weight = weight - self.mul = P.Mul().set_strategy(strategy2) + self.mul = P.Mul().shard(strategy2) self.weight2 = Parameter(weight2, "w2") def construct(self, x, b): @@ -40,8 +40,8 @@ class Net(Cell): class Net2(Cell): def __init__(self, weight, strategy1=None, strategy2=None, axis=0): super().__init__() - self.mul = P.Mul().set_strategy(strategy1) - self.concat = P.Concat(axis=axis).set_strategy(strategy2) + self.mul = P.Mul().shard(strategy1) + self.concat = P.Concat(axis=axis).shard(strategy2) self.weight = Parameter(weight, "w") def construct(self, x, b): @@ -53,12 +53,12 @@ class Net2(Cell): class Net3(Cell): def __init__(self, weight, weight2, weight3, strategy1=None, strategy2=None, is_parameter=True): super().__init__() - self.concat = P.Concat(axis=0).set_strategy(strategy1) + self.concat = P.Concat(axis=0).shard(strategy1) if is_parameter: self.weight = Parameter(weight, "w1") else: self.weight = weight - self.mul = P.Mul().set_strategy(strategy2) + self.mul = P.Mul().shard(strategy2) self.weight2 = Parameter(weight2, "w2") self.weight3 = Parameter(weight3, "w3") diff --git a/tests/ut/python/parallel/test_dataset_interface.py b/tests/ut/python/parallel/test_dataset_interface.py index 6ae5c091635..fbe8a7b0480 100644 --- a/tests/ut/python/parallel/test_dataset_interface.py +++ b/tests/ut/python/parallel/test_dataset_interface.py @@ -54,9 +54,9 @@ class Dataset(MindData): class AllToAllNet(nn.Cell): def __init__(self, strategy1): super(AllToAllNet, self).__init__() - self.matmul = P.MatMul().set_strategy(((1, 1), (1, 8))) + self.matmul = P.MatMul().shard(((1, 1), (1, 8))) self.matmul_weight = Parameter(Tensor(np.ones([128, 256]), dtype=ms.float32), name="weight") - self.transpose1 = P.Transpose().set_strategy(strategy1) + self.transpose1 = P.Transpose().shard(strategy1) def construct(self, x): x = self.matmul(x, self.matmul_weight) @@ -81,7 +81,7 @@ def loss_scale_manager_common(strategy1): net = all_to_all_net(strategy1) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') - loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1))) + loss.softmax_cross_entropy.shard(((8, 1), (8, 1))) opt = Momentum(net.trainable_params(), learning_rate, momentum) scale_manager = DynamicLossScaleManager(32, 2, 2000) model = Model(net, loss, opt, loss_scale_manager=scale_manager) @@ -154,9 +154,9 @@ def test_input_not_in_parameter_layotu_dict(): class Net(nn.Cell): def __init__(self, strategy1): super(Net, self).__init__() - self.matmul = P.MatMul().set_strategy(((1, 1), (1, 8))) + self.matmul = P.MatMul().shard(((1, 1), (1, 8))) self.matmul_weight = Parameter(Tensor(np.ones([128, 256]), dtype=ms.float32), name="weight") - self.transpose1 = P.Transpose().set_strategy(strategy1) + self.transpose1 = P.Transpose().shard(strategy1) def construct(self, x): x = self.matmul(x, self.matmul_weight) diff --git a/tests/ut/python/parallel/test_different_type_for_div_op.py b/tests/ut/python/parallel/test_different_type_for_div_op.py index 92480d06b03..0a07f08d80a 100644 --- a/tests/ut/python/parallel/test_different_type_for_div_op.py +++ b/tests/ut/python/parallel/test_different_type_for_div_op.py @@ -44,8 +44,8 @@ def test_sum_as_loss_float16(): class Net(nn.Cell): def __init__(self, strategy0, strategy1): super().__init__() - self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0) - self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1) + self.fc_nobias = P.MatMul(transpose_b=True).shard(strategy0) + self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy1) def construct(self, x, y): out = self.fc_nobias(x, y) @@ -67,8 +67,8 @@ def test_sum_as_loss_float32(): class Net(nn.Cell): def __init__(self, strategy0, strategy1): super().__init__() - self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0) - self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1) + self.fc_nobias = P.MatMul(transpose_b=True).shard(strategy0) + self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy1) def construct(self, x, y): out = self.fc_nobias(x, y) @@ -90,8 +90,8 @@ def test_sum_as_loss_int32(): class Net(nn.Cell): def __init__(self, strategy0, strategy1): super().__init__() - self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0) - self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1) + self.fc_nobias = P.MatMul(transpose_b=True).shard(strategy0) + self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy1) def construct(self, x, y): out = self.fc_nobias(x, y) diff --git a/tests/ut/python/parallel/test_dropout_do_mask.py b/tests/ut/python/parallel/test_dropout_do_mask.py index f3d8f6ef8e4..c966685b2a7 100644 --- a/tests/ut/python/parallel/test_dropout_do_mask.py +++ b/tests/ut/python/parallel/test_dropout_do_mask.py @@ -24,9 +24,9 @@ from mindspore.ops import operations as P class Net(Cell): def __init__(self, mul_weight, strategy1=None, strategy2=None): super().__init__() - self.mul = P.Mul().set_strategy(strategy1) - self.mul2 = P.Mul().set_strategy(strategy1) - self.dropout_do_mask = P.DropoutDoMask().set_strategy(strategy2) + self.mul = P.Mul().shard(strategy1) + self.mul2 = P.Mul().shard(strategy1) + self.dropout_do_mask = P.DropoutDoMask().shard(strategy2) self.dropout_gen_mask = P.DropoutGenMask() self.get_shape = P.Shape() self.cast = P.Cast() diff --git a/tests/ut/python/parallel/test_element_wise_function.py b/tests/ut/python/parallel/test_element_wise_function.py index 120cd8c3daa..7d6924fb8e2 100644 --- a/tests/ut/python/parallel/test_element_wise_function.py +++ b/tests/ut/python/parallel/test_element_wise_function.py @@ -56,9 +56,9 @@ def test_matmul_pow(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.pow = P.Pow().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.pow = P.Pow().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -82,9 +82,9 @@ def test_matmul_exp(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.exp = P.Exp().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.exp = P.Exp().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -108,9 +108,9 @@ def test_matmul_log(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.log = P.Log().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.log = P.Log().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -133,9 +133,9 @@ def test_matmul_abs(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.abs = P.Abs().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.abs = P.Abs().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -158,9 +158,9 @@ def test_matmul_sign(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.sign = P.Sign().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.sign = P.Sign().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -184,9 +184,9 @@ def test_matmul_floor(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.floor = P.Floor().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.floor = P.Floor().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -209,9 +209,9 @@ def test_matmul_round(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.round = P.Round().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.round = P.Round().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -235,9 +235,9 @@ def test_matmul_reciprocal(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.reciprocal = P.Reciprocal().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.reciprocal = P.Reciprocal().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -261,9 +261,9 @@ def test_matmul_inv(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.inv = P.Inv().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.inv = P.Inv().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -287,9 +287,9 @@ def test_matmul_rsqrt(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.rsqrt = P.Rsqrt().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.rsqrt = P.Rsqrt().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -313,9 +313,9 @@ def test_matmul_tan(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.tan = P.Tan().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.tan = P.Tan().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -339,9 +339,9 @@ def test_matmul_sin(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.sin = P.Sin().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.sin = P.Sin().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -365,9 +365,9 @@ def test_matmul_sinh(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.sinh = P.Sinh().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.sinh = P.Sinh().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -391,9 +391,9 @@ def test_matmul_log1p(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.log1p = P.Log1p().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.log1p = P.Log1p().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -417,9 +417,9 @@ def test_matmul_expm1(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.expm1 = P.Expm1().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.expm1 = P.Expm1().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -443,9 +443,9 @@ def test_matmul_cosh(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.cosh = P.Cosh().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.cosh = P.Cosh().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -468,9 +468,9 @@ def test_matmul_erf(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.erf = P.Erf().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.erf = P.Erf().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -494,9 +494,9 @@ def test_matmul_erfc(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.erfc = P.Erfc().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.erfc = P.Erfc().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -520,9 +520,9 @@ def test_matmul_zeroslike(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.zeroslike = P.ZerosLike().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.zeroslike = P.ZerosLike().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -546,9 +546,9 @@ def test_matmul_oneslike(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.oneslike = P.OnesLike().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.oneslike = P.OnesLike().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -572,9 +572,9 @@ def test_matmul_BesselI0e(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.BesselI0e = P.BesselI0e().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.BesselI0e = P.BesselI0e().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -598,9 +598,9 @@ def test_matmul_BesselI1e(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.BesselI1e = P.BesselI1e().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.BesselI1e = P.BesselI1e().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -624,9 +624,9 @@ def test_matmul_ceil(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.Ceil = P.Ceil().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.Ceil = P.Ceil().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -650,9 +650,9 @@ def test_matmul_atan(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.atan = P.Atan().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.atan = P.Atan().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -676,9 +676,9 @@ def test_matmul_Atanh(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.atanh = P.Atanh().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.atanh = P.Atanh().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -702,9 +702,9 @@ def test_matmul_asin(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.asin = P.Asin().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.asin = P.Asin().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -728,9 +728,9 @@ def test_matmul_asinh(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.asinh = P.Asinh().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.asinh = P.Asinh().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -754,9 +754,9 @@ def test_matmul_acosh(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.acosh = P.Acosh().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) + self.acosh = P.Acosh().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy1) def construct(self, x, y, b): out = self.matmul(x, y) @@ -780,9 +780,9 @@ def test_matmul_logical_not(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.logicalnot = P.LogicalNot().set_strategy(strategy2) - self.equal = P.Equal().set_strategy(strategy3) + self.matmul = P.MatMul().shard(strategy1) + self.logicalnot = P.LogicalNot().shard(strategy2) + self.equal = P.Equal().shard(strategy3) def construct(self, x, y, b): out = self.matmul(x, y) @@ -807,9 +807,9 @@ def test_matmul_cast(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.cast = P.Cast().set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy3) + self.matmul = P.MatMul().shard(strategy1) + self.cast = P.Cast().shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy3) def construct(self, x, y, b): out = self.matmul(x, y) @@ -834,7 +834,7 @@ def test_gradient_fp32_sync(): class Net(nn.Cell): def __init__(self, strategy1): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) self.cast = P.Cast() def construct(self, x, y, b): @@ -858,7 +858,7 @@ def test_gradient_fp32_sync1(): class Net(nn.Cell): def __init__(self, strategy1): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) self.cast = P.Cast() def construct(self, x, y, b): @@ -882,7 +882,7 @@ def test_gradient_fp32_sync2(): class Net(nn.Cell): def __init__(self, strategy1): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) self.cast = P.Cast() def construct(self, x, y, b): @@ -906,7 +906,7 @@ def test_gradient_fp32_sync3(): class Net(nn.Cell): def __init__(self, strategy1): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) self.cast = P.Cast() def construct(self, x, y, b): @@ -930,10 +930,10 @@ def test_mul_two_cast(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.mul = P.Mul().set_strategy(strategy1) - self.mul2 = P.Mul().set_strategy(strategy2) - self.cast = P.Cast().set_strategy(strategy3) - self.cast2 = P.Cast().set_strategy(strategy3) + self.mul = P.Mul().shard(strategy1) + self.mul2 = P.Mul().shard(strategy2) + self.cast = P.Cast().shard(strategy3) + self.cast2 = P.Cast().shard(strategy3) def construct(self, x, y, b): out = self.mul(x, y) diff --git a/tests/ut/python/parallel/test_embeddinglookup.py b/tests/ut/python/parallel/test_embeddinglookup.py index 576a6b3bc96..01159c0dc1a 100644 --- a/tests/ut/python/parallel/test_embeddinglookup.py +++ b/tests/ut/python/parallel/test_embeddinglookup.py @@ -49,8 +49,8 @@ class Net(nn.Cell): super().__init__() self.index = Tensor(np.ones(shape), dtype=ms.int32) self.offset = offset - self.elu = P.EmbeddingLookup().set_strategy(strategy1).add_prim_attr("primitive_target", target) - self.mm = P.BatchMatMul().set_strategy(strategy2) + self.elu = P.EmbeddingLookup().shard(strategy1).add_prim_attr("primitive_target", target) + self.mm = P.BatchMatMul().shard(strategy2) def construct(self, x, y): out = self.elu(x, self.index, self.offset) diff --git a/tests/ut/python/parallel/test_expand_dims.py b/tests/ut/python/parallel/test_expand_dims.py index 9d144ed50d9..d71a78346bd 100644 --- a/tests/ut/python/parallel/test_expand_dims.py +++ b/tests/ut/python/parallel/test_expand_dims.py @@ -24,9 +24,9 @@ from mindspore.ops import operations as P class Net(Cell): def __init__(self, mul_weight, strategy1=None, strategy2=None, strategy3=None): super().__init__() - self.mul = P.Mul().set_strategy(strategy1) - self.expand_dims = P.ExpandDims().set_strategy(strategy2) - self.mul2 = P.Mul().set_strategy(strategy3) + self.mul = P.Mul().shard(strategy1) + self.expand_dims = P.ExpandDims().shard(strategy2) + self.mul2 = P.Mul().shard(strategy3) self.mul_weight = Parameter(mul_weight, "w1") def construct(self, x, b): @@ -39,8 +39,8 @@ class Net(Cell): class Net2(Cell): def __init__(self, mul_weight, strategy1=None, strategy2=None): super().__init__() - self.expand_dims = P.ExpandDims().set_strategy(strategy1) - self.mul = P.Mul().set_strategy(strategy2) + self.expand_dims = P.ExpandDims().shard(strategy1) + self.mul = P.Mul().shard(strategy2) self.mul_weight = Parameter(mul_weight, "w1") def construct(self, x, b): diff --git a/tests/ut/python/parallel/test_forward_graph.py b/tests/ut/python/parallel/test_forward_graph.py index c8561210c51..8ad7451e6d5 100644 --- a/tests/ut/python/parallel/test_forward_graph.py +++ b/tests/ut/python/parallel/test_forward_graph.py @@ -24,8 +24,8 @@ from mindspore.ops import operations as P class Net(Cell): def __init__(self, mul_weight, strategy1=None, strategy2=None): super().__init__() - self.mul = P.Mul().set_strategy(strategy1) - self.neg = P.Neg().set_strategy(strategy2) + self.mul = P.Mul().shard(strategy1) + self.neg = P.Neg().shard(strategy2) self.mul_weight = Parameter(mul_weight, "w1") def construct(self, x, b): diff --git a/tests/ut/python/parallel/test_full_batch.py b/tests/ut/python/parallel/test_full_batch.py index d00e15162ab..9d504f2af26 100644 --- a/tests/ut/python/parallel/test_full_batch.py +++ b/tests/ut/python/parallel/test_full_batch.py @@ -51,9 +51,9 @@ class Dataset(MindData): class AllToAllNet(nn.Cell): def __init__(self, strategy1): super(AllToAllNet, self).__init__() - self.matmul = P.MatMul().set_strategy(((1, 1), (1, 8))) + self.matmul = P.MatMul().shard(((1, 1), (1, 8))) self.matmul_weight = Parameter(Tensor(np.ones([128, 256]), dtype=ms.float32), name="weight") - self.transpose1 = P.Transpose().set_strategy(strategy1) + self.transpose1 = P.Transpose().shard(strategy1) def construct(self, x): x = self.matmul(x, self.matmul_weight) @@ -77,8 +77,8 @@ def all_to_all_common(strategy1): net = all_to_all_net(strategy1) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') - loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1))) - loss.one_hot.set_strategy(((8, 1), (), ())) + loss.softmax_cross_entropy.shard(((8, 1), (8, 1))) + loss.one_hot.shard(((8, 1), (), ())) opt = Momentum(net.trainable_params(), learning_rate, momentum) model = Model(net, loss, opt) diff --git a/tests/ut/python/parallel/test_gather_v2.py b/tests/ut/python/parallel/test_gather_v2.py index 2d657a61016..9e845f5a589 100644 --- a/tests/ut/python/parallel/test_gather_v2.py +++ b/tests/ut/python/parallel/test_gather_v2.py @@ -51,8 +51,8 @@ class Net(nn.Cell): super().__init__() if shape is None: shape = [64, 64] - self.gatherv2 = P.GatherV2().set_strategy(strategy1).add_prim_attr("primitive_target", target) - self.mul = P.Mul().set_strategy(strategy2) + self.gatherv2 = P.GatherV2().shard(strategy1).add_prim_attr("primitive_target", target) + self.mul = P.Mul().shard(strategy2) self.index = Tensor(np.ones(shape), dtype=ms.int32) self.axis = axis diff --git a/tests/ut/python/parallel/test_gather_v2_primitive.py b/tests/ut/python/parallel/test_gather_v2_primitive.py index 8ad626e1846..21751502641 100644 --- a/tests/ut/python/parallel/test_gather_v2_primitive.py +++ b/tests/ut/python/parallel/test_gather_v2_primitive.py @@ -79,7 +79,7 @@ class GatherV2(_Loss): emb2_list = np.reshape(emb_list[1::2], (int(index_size / 2), 16)) self.emb1_param = Tensor(emb1_list, dtype=mstype.int32) self.emb2_param = Tensor(emb2_list, dtype=mstype.int32) - self.gatherv2 = P.GatherV2().set_strategy(strategy).add_prim_attr("data_parallel", True) + self.gatherv2 = P.GatherV2().shard(strategy).add_prim_attr("data_parallel", True) def construct(self, nembeddings): emb1 = self.gatherv2(nembeddings, self.emb1_param, 0) @@ -208,7 +208,7 @@ class GatherV2Axis1(_Loss): emb2_list = np.reshape(emb_list[1::2], (int(index_size / 2), index_size)) self.emb1_param = Tensor(emb1_list, dtype=mstype.int32) self.emb2_param = Tensor(emb2_list, dtype=mstype.int32) - self.gatherv2 = P.GatherV2().set_strategy(strategy) + self.gatherv2 = P.GatherV2().shard(strategy) def construct(self, nembeddings): emb1 = self.gatherv2(nembeddings, self.emb1_param, 1) diff --git a/tests/ut/python/parallel/test_get_next.py b/tests/ut/python/parallel/test_get_next.py index c1db710ad5a..fa22cb63765 100644 --- a/tests/ut/python/parallel/test_get_next.py +++ b/tests/ut/python/parallel/test_get_next.py @@ -32,10 +32,10 @@ class NetWithLoss(nn.Cell): def __init__(self, network, types, shapes, output_num, strategy3=None, strategy4=None, axis=-1): super(NetWithLoss, self).__init__() self.get_next = P.GetNext(types, shapes, output_num, "") - self.one_hot = P.OneHot(axis=axis).set_strategy(strategy3) + self.one_hot = P.OneHot(axis=axis).shard(strategy3) self.on_value = Tensor(1.0, ms.float32) self.off_value = Tensor(0.0, ms.float32) - self.loss = P.SoftmaxCrossEntropyWithLogits().set_strategy(strategy4) + self.loss = P.SoftmaxCrossEntropyWithLogits().shard(strategy4) self.network = network def construct(self): @@ -81,8 +81,8 @@ def test_get_next_semi_auto_parallel(): class Net(nn.Cell): def __init__(self, channel=1, w=0.25, strategy1=None, strategy2=None): super().__init__() - self.norm = P.L2Normalize().set_strategy(strategy1) - self.prelu = P.PReLU().set_strategy(strategy2) + self.norm = P.L2Normalize().shard(strategy1) + self.prelu = P.PReLU().shard(strategy2) self.w = Parameter(initializer(w, [channel,]), name='w') def construct(self, data): @@ -105,8 +105,8 @@ def test_get_next_semi_auto_parallel1(): class Net(nn.Cell): def __init__(self, channel=1, w=0.25, strategy1=None, strategy2=None): super().__init__() - self.norm = P.L2Normalize().set_strategy(strategy1) - self.prelu = P.PReLU().set_strategy(strategy2) + self.norm = P.L2Normalize().shard(strategy1) + self.prelu = P.PReLU().shard(strategy2) self.w = Parameter(initializer(w, [channel,]), name='w') def construct(self, data): @@ -129,8 +129,8 @@ def test_get_next_auto_parallel(): class Net(nn.Cell): def __init__(self, channel=1, w=0.25, strategy1=None, strategy2=None): super().__init__() - self.norm = P.L2Normalize().set_strategy(strategy1) - self.prelu = P.PReLU().set_strategy(strategy2) + self.norm = P.L2Normalize().shard(strategy1) + self.prelu = P.PReLU().shard(strategy2) self.w = Parameter(initializer(w, [channel,]), name='w') def construct(self, data): diff --git a/tests/ut/python/parallel/test_get_parameter_layout.py b/tests/ut/python/parallel/test_get_parameter_layout.py index 48b40e53841..0d4aa1722d1 100644 --- a/tests/ut/python/parallel/test_get_parameter_layout.py +++ b/tests/ut/python/parallel/test_get_parameter_layout.py @@ -27,8 +27,8 @@ def test_get_parameter_layout(): def __init__(self, strategy1, strategy2, weight): super().__init__() self.weight = Parameter(weight, "w1") - self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1) - self.relu = P.ReLU().set_strategy(strategy2) + self.matmul = P.MatMul(transpose_a=False, transpose_b=True).shard(strategy1) + self.relu = P.ReLU().shard(strategy2) def construct(self, x): out = self.matmul(x, self.weight) diff --git a/tests/ut/python/parallel/test_gpu_dropout.py b/tests/ut/python/parallel/test_gpu_dropout.py index 0eade2b9628..148f0184f2c 100644 --- a/tests/ut/python/parallel/test_gpu_dropout.py +++ b/tests/ut/python/parallel/test_gpu_dropout.py @@ -49,8 +49,8 @@ class GradWrap(nn.Cell): class Net(nn.Cell): def __init__(self, strategy1=None, strategy2=None): super().__init__() - self.dropout = P.Dropout(keep_prob=0.6).set_strategy(strategy1) - self.matmul = P.MatMul().set_strategy(strategy2) + self.dropout = P.Dropout(keep_prob=0.6).shard(strategy1) + self.matmul = P.MatMul().shard(strategy2) def construct(self, x, y): out = self.matmul(x, y) diff --git a/tests/ut/python/parallel/test_hybird_parallel_activation.py b/tests/ut/python/parallel/test_hybird_parallel_activation.py index cf2dd849cfa..8ff335e0594 100644 --- a/tests/ut/python/parallel/test_hybird_parallel_activation.py +++ b/tests/ut/python/parallel/test_hybird_parallel_activation.py @@ -56,9 +56,9 @@ def test_matmul_tanh(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) - self.matmul2 = P.MatMul().set_strategy(strategy2) - self.tanh = P.Tanh().set_strategy(strategy3) + self.matmul1 = P.MatMul().shard(strategy1) + self.matmul2 = P.MatMul().shard(strategy2) + self.tanh = P.Tanh().shard(strategy3) def construct(self, x, y, b): out = self.tanh(self.matmul1(x, y)) @@ -82,9 +82,9 @@ def test_matmul_activation(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) - self.matmul2 = P.MatMul().set_strategy(strategy2) - self.activation = P.ReLU().set_strategy(strategy3) + self.matmul1 = P.MatMul().shard(strategy1) + self.matmul2 = P.MatMul().shard(strategy2) + self.activation = P.ReLU().shard(strategy3) def construct(self, x, y, b): out = self.activation(self.matmul1(x, y)) @@ -108,9 +108,9 @@ def test_matmul_softmax(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) - self.matmul2 = P.MatMul().set_strategy(strategy2) - self.softmax = P.Softmax().set_strategy(strategy3) + self.matmul1 = P.MatMul().shard(strategy1) + self.matmul2 = P.MatMul().shard(strategy2) + self.softmax = P.Softmax().shard(strategy3) def construct(self, x, y, b): out = self.softmax(self.matmul1(x, y)) @@ -134,9 +134,9 @@ def test_matmul_logsoftmax(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) - self.matmul2 = P.MatMul().set_strategy(strategy2) - self.logsoftmax = P.LogSoftmax().set_strategy(strategy3) + self.matmul1 = P.MatMul().shard(strategy1) + self.matmul2 = P.MatMul().shard(strategy2) + self.logsoftmax = P.LogSoftmax().shard(strategy3) def construct(self, x, y, b): out = self.logsoftmax(self.matmul1(x, y)) @@ -160,12 +160,12 @@ def test_activations(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) - self.matmul2 = P.MatMul().set_strategy(strategy2) - self.gelu = P.Gelu().set_strategy(strategy3) - self.tanh = P.Tanh().set_strategy(strategy3) - self.softmax = P.Softmax().set_strategy(strategy3) - self.logsoftmax = P.LogSoftmax().set_strategy(strategy3) + self.matmul1 = P.MatMul().shard(strategy1) + self.matmul2 = P.MatMul().shard(strategy2) + self.gelu = P.Gelu().shard(strategy3) + self.tanh = P.Tanh().shard(strategy3) + self.softmax = P.Softmax().shard(strategy3) + self.logsoftmax = P.LogSoftmax().shard(strategy3) def construct(self, x, y, b): out = self.gelu(self.tanh(self.matmul1(x, y))) @@ -189,12 +189,12 @@ def test_activations_repeated_calculation(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3, strategy4, strategy5, strategy6): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) - self.matmul2 = P.MatMul().set_strategy(strategy2) - self.gelu = P.Gelu().set_strategy(strategy3) - self.tanh = P.Tanh().set_strategy(strategy4) - self.softmax = P.Softmax().set_strategy(strategy5) - self.logsoftmax = P.LogSoftmax().set_strategy(strategy6) + self.matmul1 = P.MatMul().shard(strategy1) + self.matmul2 = P.MatMul().shard(strategy2) + self.gelu = P.Gelu().shard(strategy3) + self.tanh = P.Tanh().shard(strategy4) + self.softmax = P.Softmax().shard(strategy5) + self.logsoftmax = P.LogSoftmax().shard(strategy6) def construct(self, x, y, b): out = self.gelu(self.tanh(self.matmul1(x, y))) @@ -221,12 +221,12 @@ def test_activations_axis_tuple(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3, strategy4, strategy5, strategy6): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) - self.matmul2 = P.MatMul().set_strategy(strategy2) - self.gelu = P.Gelu().set_strategy(strategy3) - self.tanh = P.Tanh().set_strategy(strategy4) - self.softmax = P.Softmax(axis=(0, 1)).set_strategy(strategy5) - self.logsoftmax = P.LogSoftmax().set_strategy(strategy6) + self.matmul1 = P.MatMul().shard(strategy1) + self.matmul2 = P.MatMul().shard(strategy2) + self.gelu = P.Gelu().shard(strategy3) + self.tanh = P.Tanh().shard(strategy4) + self.softmax = P.Softmax(axis=(0, 1)).shard(strategy5) + self.logsoftmax = P.LogSoftmax().shard(strategy6) def construct(self, x, y, b): out = self.gelu(self.tanh(self.matmul1(x, y))) diff --git a/tests/ut/python/parallel/test_initializer_weight_slice.py b/tests/ut/python/parallel/test_initializer_weight_slice.py index 049bb65fdaa..85faf9fc216 100644 --- a/tests/ut/python/parallel/test_initializer_weight_slice.py +++ b/tests/ut/python/parallel/test_initializer_weight_slice.py @@ -28,8 +28,8 @@ class Net(nn.Cell): def __init__(self, strategy1, strategy2, weight): super().__init__() self.weight = Parameter(weight, "w1") - self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1) - self.relu = P.ReLU().set_strategy(strategy2) + self.matmul = P.MatMul(transpose_a=False, transpose_b=True).shard(strategy1) + self.relu = P.ReLU().shard(strategy2) def construct(self, x): out = self.matmul(x, self.weight) diff --git a/tests/ut/python/parallel/test_l2normalize.py b/tests/ut/python/parallel/test_l2normalize.py index 8a26bf39432..850e71eb938 100644 --- a/tests/ut/python/parallel/test_l2normalize.py +++ b/tests/ut/python/parallel/test_l2normalize.py @@ -52,10 +52,10 @@ def test_l2normalize_matmul(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.norm1 = P.L2Normalize(axis=0).set_strategy(strategy1) - self.norm2 = P.L2Normalize(axis=0).set_strategy(strategy1) - self.mul1 = P.Mul().set_strategy(strategy2) - self.mul2 = P.Mul().set_strategy(strategy3) + self.norm1 = P.L2Normalize(axis=0).shard(strategy1) + self.norm2 = P.L2Normalize(axis=0).shard(strategy1) + self.mul1 = P.Mul().shard(strategy2) + self.mul2 = P.Mul().shard(strategy3) def construct(self, x, y, b): y = self.norm1(y) diff --git a/tests/ut/python/parallel/test_layer_norm.py b/tests/ut/python/parallel/test_layer_norm.py index 08fe687a73d..78a019a80c9 100644 --- a/tests/ut/python/parallel/test_layer_norm.py +++ b/tests/ut/python/parallel/test_layer_norm.py @@ -28,9 +28,9 @@ class Net(Cell): super().__init__() self.begin_norm_axis = 2 self.begin_params_axis = 1 - self.mul = P.Mul().set_strategy(strategy1) - self.layer_norm = P.LayerNorm(self.begin_norm_axis, self.begin_params_axis).set_strategy(strategy2) - self.mul2 = P.Mul().set_strategy(strategy3) + self.mul = P.Mul().shard(strategy1) + self.layer_norm = P.LayerNorm(self.begin_norm_axis, self.begin_params_axis).shard(strategy2) + self.mul2 = P.Mul().shard(strategy3) self.mul_weight = Parameter(mul_weight, "w1") self.normalized_shape = [64, 32, 16] self.gamma = Parameter(initializer('ones', self.normalized_shape), name="gamma") diff --git a/tests/ut/python/parallel/test_linear.py b/tests/ut/python/parallel/test_linear.py index b0fd4105669..1b3cecad679 100644 --- a/tests/ut/python/parallel/test_linear.py +++ b/tests/ut/python/parallel/test_linear.py @@ -29,7 +29,7 @@ grad_all = C.GradOperation(get_all=True) class NetWithLoss(nn.Cell): def __init__(self, network, strategy3): super(NetWithLoss, self).__init__() - self.loss = P.SoftmaxCrossEntropyWithLogits().set_strategy(strategy3) + self.loss = P.SoftmaxCrossEntropyWithLogits().shard(strategy3) self.network = network def construct(self, x, y, bias, label): @@ -50,9 +50,9 @@ def test_linear(): class Net(nn.Cell): def __init__(self, strategy0, strategy1, strategy2): super().__init__() - self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0) - self.add = P.TensorAdd().set_strategy(strategy1) - self.gelu = P.Gelu().set_strategy(strategy2) + self.fc_nobias = P.MatMul(transpose_b=True).shard(strategy0) + self.add = P.TensorAdd().shard(strategy1) + self.gelu = P.Gelu().shard(strategy2) def construct(self, x, y, bias): out = self.fc_nobias(x, y) diff --git a/tests/ut/python/parallel/test_loop_two_matmul.py b/tests/ut/python/parallel/test_loop_two_matmul.py index 5b066d53a84..9c36bff0d18 100644 --- a/tests/ut/python/parallel/test_loop_two_matmul.py +++ b/tests/ut/python/parallel/test_loop_two_matmul.py @@ -71,8 +71,8 @@ def test_two_matmul(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) - self.matmul2 = P.MatMul().set_strategy(strategy2) + self.matmul1 = P.MatMul().shard(strategy1) + self.matmul2 = P.MatMul().shard(strategy2) def construct(self, x, y, b): out = self.matmul1(x, y) diff --git a/tests/ut/python/parallel/test_loss_and_optimizer.py b/tests/ut/python/parallel/test_loss_and_optimizer.py index b04f447b2af..215c6dd8d29 100644 --- a/tests/ut/python/parallel/test_loss_and_optimizer.py +++ b/tests/ut/python/parallel/test_loss_and_optimizer.py @@ -27,7 +27,7 @@ from mindspore.ops import operations as P class NetWithLoss(nn.Cell): def __init__(self, network, strategy3): super(NetWithLoss, self).__init__() - self.loss = P.SoftmaxCrossEntropyWithLogits().set_strategy(strategy3) + self.loss = P.SoftmaxCrossEntropyWithLogits().shard(strategy3) self.network = network def construct(self, x, b): @@ -45,8 +45,8 @@ def test_momentum(): def __init__(self, strategy1, strategy2, weight): super().__init__() self.weight = Parameter(weight, "w1") - self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1) - self.relu = P.ReLU().set_strategy(strategy2) + self.matmul = P.MatMul(transpose_a=False, transpose_b=True).shard(strategy1) + self.relu = P.ReLU().shard(strategy2) def construct(self, x): out = self.matmul(x, self.weight) @@ -79,8 +79,8 @@ def test_momentum_with_loss_scale(): def __init__(self, strategy1, strategy2, weight): super().__init__() self.weight = Parameter(weight, "w1") - self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1) - self.relu = P.ReLU().set_strategy(strategy2) + self.matmul = P.MatMul(transpose_a=False, transpose_b=True).shard(strategy1) + self.relu = P.ReLU().shard(strategy2) def construct(self, x): out = self.matmul(x, self.weight) @@ -113,8 +113,8 @@ def test_momentum_with_dynamic_lr(): def __init__(self, strategy1, strategy2, weight): super().__init__() self.weight = Parameter(weight, "w1") - self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1) - self.relu = P.ReLU().set_strategy(strategy2) + self.matmul = P.MatMul(transpose_a=False, transpose_b=True).shard(strategy1) + self.relu = P.ReLU().shard(strategy2) def construct(self, x): out = self.matmul(x, self.weight) @@ -148,8 +148,8 @@ def test_momentum_with_loss_scale_and_dynamic_lr(): def __init__(self, strategy1, strategy2, weight): super().__init__() self.weight = Parameter(weight, "w1") - self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1) - self.relu = P.ReLU().set_strategy(strategy2) + self.matmul = P.MatMul(transpose_a=False, transpose_b=True).shard(strategy1) + self.relu = P.ReLU().shard(strategy2) def construct(self, x): out = self.matmul(x, self.weight) @@ -184,8 +184,8 @@ def test_lars(): def __init__(self, strategy1, strategy2, weight): super().__init__() self.weight = Parameter(weight, "w1") - self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1) - self.relu = P.ReLU().set_strategy(strategy2) + self.matmul = P.MatMul(transpose_a=False, transpose_b=True).shard(strategy1) + self.relu = P.ReLU().shard(strategy2) def construct(self, x): out = self.matmul(x, self.weight) diff --git a/tests/ut/python/parallel/test_manual_embedding_lookup.py b/tests/ut/python/parallel/test_manual_embedding_lookup.py index 945296dcec0..22741f86958 100644 --- a/tests/ut/python/parallel/test_manual_embedding_lookup.py +++ b/tests/ut/python/parallel/test_manual_embedding_lookup.py @@ -36,12 +36,12 @@ class Net(Cell): split_string="manual_split", param_shape=(8, 8)): super().__init__() - self.gatherv2 = P.EmbeddingLookup().set_strategy(strategy1) + self.gatherv2 = P.EmbeddingLookup().shard(strategy1) self.gatherv2.add_prim_attr(split_string, split_tuple) self.gatherv2.add_prim_attr("primitive_target", "CPU") - self.mul = P.Mul().set_strategy(strategy2) + self.mul = P.Mul().shard(strategy2) self.reshape = P.Reshape() - self.matmul = P.MatMul().set_strategy(strategy3) + self.matmul = P.MatMul().shard(strategy3) self.matmul.add_prim_attr("forward_reduce_scatter", True) if init_flag: self.param = Parameter(initializer("ones", param_shape, ms.float32), name="gatherv2_param") diff --git a/tests/ut/python/parallel/test_manual_gatherv2.py b/tests/ut/python/parallel/test_manual_gatherv2.py index 5e41109c8c3..1d7ffddc7d7 100644 --- a/tests/ut/python/parallel/test_manual_gatherv2.py +++ b/tests/ut/python/parallel/test_manual_gatherv2.py @@ -33,11 +33,11 @@ class Net(Cell): split_string="manual_split", param_shape=(8, 8)): super().__init__() - self.gatherv2 = P.GatherV2().set_strategy(strategy1) + self.gatherv2 = P.GatherV2().shard(strategy1) self.gatherv2.add_prim_attr(split_string, split_tuple) - self.mul = P.Mul().set_strategy(strategy2) + self.mul = P.Mul().shard(strategy2) self.reshape = P.Reshape() - self.matmul = P.MatMul().set_strategy(strategy3) + self.matmul = P.MatMul().shard(strategy3) self.matmul.add_prim_attr("forward_reduce_scatter", True) if init_flag: self.param = Parameter(initializer("ones", param_shape, ms.float32), name="gatherv2_param") diff --git a/tests/ut/python/parallel/test_matmul_dropout.py b/tests/ut/python/parallel/test_matmul_dropout.py index 5dfa4cabb3b..70718c7f5c3 100644 --- a/tests/ut/python/parallel/test_matmul_dropout.py +++ b/tests/ut/python/parallel/test_matmul_dropout.py @@ -52,11 +52,11 @@ def test_two_matmul_dropout(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) + self.matmul1 = P.MatMul().shard(strategy1) self.dropout = nn.Dropout() - self.dropout.dropout_do_mask.set_strategy(strategy2) - self.dropout.dropout_gen_mask.set_strategy(strategy2) - self.matmul2 = P.MatMul().set_strategy(strategy3) + self.dropout.dropout_do_mask.shard(strategy2) + self.dropout.dropout_gen_mask.shard(strategy2) + self.matmul2 = P.MatMul().shard(strategy3) def construct(self, x, y, b): out = self.matmul1(x, y) diff --git a/tests/ut/python/parallel/test_matmul_tensor.py b/tests/ut/python/parallel/test_matmul_tensor.py index aff6cfca737..64359d7caa2 100644 --- a/tests/ut/python/parallel/test_matmul_tensor.py +++ b/tests/ut/python/parallel/test_matmul_tensor.py @@ -59,9 +59,9 @@ def test_two_matmul(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) - self.matmul2 = P.MatMul().set_strategy(strategy2) - self.matmul3 = P.MatMul().set_strategy(strategy3) + self.matmul1 = P.MatMul().shard(strategy1) + self.matmul2 = P.MatMul().shard(strategy2) + self.matmul3 = P.MatMul().shard(strategy3) self.diag = P.Diag() self.fill = P.Fill() @@ -89,8 +89,8 @@ def test_matmul_mul_broadcast2(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.mul = P.Mul().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.mul = P.Mul().shard(strategy2) self.t = Tensor(0.9, ms.float32) def construct(self, x, y): @@ -113,9 +113,9 @@ def test_two_matmul1(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) - self.matmul2 = P.MatMul().set_strategy(strategy2) - self.matmul3 = P.MatMul().set_strategy(strategy3) + self.matmul1 = P.MatMul().shard(strategy1) + self.matmul2 = P.MatMul().shard(strategy2) + self.matmul3 = P.MatMul().shard(strategy3) self.diag = P.Diag() self.fill = P.Fill() @@ -143,8 +143,8 @@ def test_matmul_add_tensor(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.add = P.TensorAdd().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.add = P.TensorAdd().shard(strategy2) self.b = Tensor(0.9, ms.float32) def construct(self, x, y): diff --git a/tests/ut/python/parallel/test_mix_precision_hybrid_parallel.py b/tests/ut/python/parallel/test_mix_precision_hybrid_parallel.py index 81bb9cae7b8..867246e97ab 100644 --- a/tests/ut/python/parallel/test_mix_precision_hybrid_parallel.py +++ b/tests/ut/python/parallel/test_mix_precision_hybrid_parallel.py @@ -50,9 +50,9 @@ class GradWrap(nn.Cell): class Net1(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) - self.matmul2 = P.MatMul().set_strategy(strategy2) - self.matmul3 = P.MatMul().set_strategy(strategy3) + self.matmul1 = P.MatMul().shard(strategy1) + self.matmul2 = P.MatMul().shard(strategy2) + self.matmul3 = P.MatMul().shard(strategy3) def construct(self, x, y, b): out1 = self.matmul1(x, b) @@ -66,7 +66,7 @@ def test_two_matmul(): def __init__(self, strategy1, strategy2, strategy3, strategy4): super().__init__() self.net1_out = Net1(strategy1, strategy2, strategy3) - self.matmul = P.MatMul().set_strategy(strategy4) + self.matmul = P.MatMul().shard(strategy4) def construct(self, x, y, b, z): out = self.net1_out(x, y, b) diff --git a/tests/ut/python/parallel/test_neg.py b/tests/ut/python/parallel/test_neg.py index 34819373d6f..28dac24ab5d 100644 --- a/tests/ut/python/parallel/test_neg.py +++ b/tests/ut/python/parallel/test_neg.py @@ -24,8 +24,8 @@ from mindspore.ops import operations as P class Net(Cell): def __init__(self, mul_weight, strategy1=None, strategy2=None): super().__init__() - self.mul = P.Mul().set_strategy(strategy1) - self.neg = P.Neg().set_strategy(strategy2) + self.mul = P.Mul().shard(strategy1) + self.neg = P.Neg().shard(strategy2) self.mul_weight = Parameter(mul_weight, "w1") def construct(self, x, b): diff --git a/tests/ut/python/parallel/test_one_hot_net.py b/tests/ut/python/parallel/test_one_hot_net.py index 32f0d2a85c5..9f8eebf9151 100644 --- a/tests/ut/python/parallel/test_one_hot_net.py +++ b/tests/ut/python/parallel/test_one_hot_net.py @@ -85,15 +85,15 @@ class SemiAutoOneHotNet(Cell): self.d = args.d self.e = args.e self.cast = P.Cast() - self.cast.set_strategy(strategy=strategy.twod_strategy) + self.cast.shard(strategy=strategy.twod_strategy) self.cast1 = P.Cast() - self.cast1.set_strategy(strategy=strategy.twod_strategy) + self.cast1.shard(strategy=strategy.twod_strategy) self.cast2 = P.Cast() - self.cast2.set_strategy(strategy=strategy.twod_strategy) + self.cast2.shard(strategy=strategy.twod_strategy) self.cast3 = P.Cast() - self.cast3.set_strategy(strategy=strategy.scalar_strategy) + self.cast3.shard(strategy=strategy.scalar_strategy) self.cast4 = P.Cast() - self.cast4.set_strategy(strategy=strategy.scalar_strategy) + self.cast4.shard(strategy=strategy.scalar_strategy) self.a_const = Tensor(self.a, dtype=mstype.float32) self.b_const = Tensor(self.b, dtype=mstype.float32) self.c_const = Tensor(self.c, dtype=mstype.float32) @@ -102,64 +102,64 @@ class SemiAutoOneHotNet(Cell): self.m_const_zero = Tensor(0, dtype=mstype.float32) self.a_const_one = Tensor(1, dtype=mstype.float32) self.onehot = P.OneHot() - self.onehot.set_strategy(strategy=strategy.onehot_strategy) + self.onehot.shard(strategy=strategy.onehot_strategy) self.exp = P.Exp() - self.exp.set_strategy(strategy=strategy.twod_strategy) + self.exp.shard(strategy=strategy.twod_strategy) self.exp2 = P.Exp() - self.exp2.set_strategy(strategy=strategy.twod_strategy) + self.exp2.shard(strategy=strategy.twod_strategy) self.exp3 = P.Exp() - self.exp3.set_strategy(strategy=strategy.twod_strategy) + self.exp3.shard(strategy=strategy.twod_strategy) self.mul_const = P.Mul() - self.mul_const.set_strategy(strategy=strategy.scalar_twod_strategy) + self.mul_const.shard(strategy=strategy.scalar_twod_strategy) self.mul_const2 = P.TensorAdd() - self.mul_const2.set_strategy(strategy=strategy.scalar_twod_strategy) + self.mul_const2.shard(strategy=strategy.scalar_twod_strategy) self.mul_const3 = P.Sub() - self.mul_const3.set_strategy(strategy=strategy.twod_scalar_strategy) + self.mul_const3.shard(strategy=strategy.twod_scalar_strategy) self.mul_const4 = P.Sub() - self.mul_const4.set_strategy(strategy=strategy.scalar_twod_strategy) + self.mul_const4.shard(strategy=strategy.scalar_twod_strategy) self.mul_const5 = P.Mul() - self.mul_const5.set_strategy(strategy=strategy.twod_scalar_strategy) + self.mul_const5.shard(strategy=strategy.twod_scalar_strategy) self.mul = P.Mul() - self.mul.set_strategy(strategy=strategy.twod_twod_strategy) + self.mul.shard(strategy=strategy.twod_twod_strategy) self.mul2 = P.Mul() - self.mul2.set_strategy(strategy=strategy.twod_twod_strategy) + self.mul2.shard(strategy=strategy.twod_twod_strategy) self.mul3 = P.TensorAdd() - self.mul3.set_strategy(strategy=strategy.twod_twod_strategy) + self.mul3.shard(strategy=strategy.twod_twod_strategy) self.mul4 = P.Sub() - self.mul4.set_strategy(strategy=strategy.twod_twodbc_strategy) + self.mul4.shard(strategy=strategy.twod_twodbc_strategy) self.mul5 = P.RealDiv() - self.mul5.set_strategy(strategy=strategy.twod_twodbc_strategy) + self.mul5.shard(strategy=strategy.twod_twodbc_strategy) self.mul6 = P.Mul() - self.mul6.set_strategy(strategy=strategy.twod_twod_strategy) + self.mul6.shard(strategy=strategy.twod_twod_strategy) self.mul7 = P.Mul() - self.mul7.set_strategy(strategy=strategy.twod_scalar_strategy) + self.mul7.shard(strategy=strategy.twod_scalar_strategy) self.mul8 = P.RealDiv() - self.mul8.set_strategy(strategy=strategy.scalar_scalar_strategy) + self.mul8.shard(strategy=strategy.scalar_scalar_strategy) self.mul9 = P.TensorAdd() - self.mul9.set_strategy(strategy=strategy.twod_scalar_strategy) + self.mul9.shard(strategy=strategy.twod_scalar_strategy) self.reduce_max = P.ReduceMax(keep_dims=True) - self.reduce_max.set_strategy(strategy=strategy.twod_strategy) + self.reduce_max.shard(strategy=strategy.twod_strategy) self.reduce_sum = P.ReduceSum(keep_dims=False) - self.reduce_sum.set_strategy(strategy=strategy.twod_strategy) + self.reduce_sum.shard(strategy=strategy.twod_strategy) self.reduce_sum_2 = P.ReduceSum(keep_dims=False) - self.reduce_sum_2.set_strategy(strategy=strategy.twod_strategy) + self.reduce_sum_2.shard(strategy=strategy.twod_strategy) self.reduce_sum_3 = P.ReduceSum(keep_dims=False) - self.reduce_sum_3.set_strategy(strategy=strategy.oned_strategy) + self.reduce_sum_3.shard(strategy=strategy.oned_strategy) self.reshape = P.Reshape() self.log = P.Log() - self.log.set_strategy(strategy=strategy.twod_strategy) + self.log.shard(strategy=strategy.twod_strategy) self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.normalize = P.L2Normalize(axis=1) - self.normalize.set_strategy(strategy=strategy.twod_strategy_m) + self.normalize.shard(strategy=strategy.twod_strategy_m) self.normalize2 = P.L2Normalize(axis=1) - self.normalize2.set_strategy(strategy=strategy.twod_strategy_m) + self.normalize2.shard(strategy=strategy.twod_strategy_m) self.fc = P.MatMul(transpose_b=True) - self.fc.set_strategy(strategy=strategy.twodbc_twod_strategy) + self.fc.shard(strategy=strategy.twodbc_twod_strategy) weight_shape = [args.num_classes, args.emb_size] weight_np = np.zeros(weight_shape, np.float32) self.weight = Parameter(Tensor(weight_np), name='model_parallel_weight') diff --git a/tests/ut/python/parallel/test_one_weight_parameter.py b/tests/ut/python/parallel/test_one_weight_parameter.py index 8cf6b6aa8ee..8bd83148b4b 100644 --- a/tests/ut/python/parallel/test_one_weight_parameter.py +++ b/tests/ut/python/parallel/test_one_weight_parameter.py @@ -29,7 +29,7 @@ grad_by_list = C.GradOperation(get_by_list=True) class NetWithLoss(nn.Cell): def __init__(self, network, strategy3): super(NetWithLoss, self).__init__() - self.loss = P.SoftmaxCrossEntropyWithLogits().set_strategy(strategy3) + self.loss = P.SoftmaxCrossEntropyWithLogits().shard(strategy3) self.network = network def construct(self, x, b): @@ -54,7 +54,7 @@ def test_one_weight_parameter(): def __init__(self, strategy1, weight): super().__init__() self.weight = Parameter(weight, "w1", requires_grad=True) - self.matmul = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) def construct(self, x): out = self.matmul(x, self.weight) diff --git a/tests/ut/python/parallel/test_onehot.py b/tests/ut/python/parallel/test_onehot.py index 725e9e33ec5..26a77f40ea0 100644 --- a/tests/ut/python/parallel/test_onehot.py +++ b/tests/ut/python/parallel/test_onehot.py @@ -33,10 +33,10 @@ class NetWithLoss(nn.Cell): def __init__(self, network, strategy3, strategy4, axis): super(NetWithLoss, self).__init__() self.virtual_dataset = _VirtualDataset() - self.one_hot = P.OneHot(axis=axis).set_strategy(strategy3) + self.one_hot = P.OneHot(axis=axis).shard(strategy3) self.on_value = Tensor(2.0, ms.float32) self.off_value = Tensor(1.0, ms.float32) - self.loss = P.SoftmaxCrossEntropyWithLogits().set_strategy(strategy4) + self.loss = P.SoftmaxCrossEntropyWithLogits().shard(strategy4) self.network = network def construct(self, x, y, b): @@ -58,8 +58,8 @@ class GradWrap(nn.Cell): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.gelu = P.Gelu().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.gelu = P.Gelu().shard(strategy2) def construct(self, x, y): out = self.matmul(x, y) diff --git a/tests/ut/python/parallel/test_operator_model_parallel.py b/tests/ut/python/parallel/test_operator_model_parallel.py index 67040934613..340247cff44 100644 --- a/tests/ut/python/parallel/test_operator_model_parallel.py +++ b/tests/ut/python/parallel/test_operator_model_parallel.py @@ -51,7 +51,7 @@ class DenseWrap(Cell): bias_init='zeros', has_bias=True, matmul_strategy=None, - set_strategy=None): + shard=None): super(DenseWrap, self).__init__() @@ -69,8 +69,8 @@ class DenseWrap(Cell): self.bias = Parameter(initializer( 'zeros', [output_channels]), name="bias") - self.matmul = P.MatMul(transpose_b=True).set_strategy(matmul_strategy) - self.bias_add = P.TensorAdd().set_strategy(set_strategy) + self.matmul = P.MatMul(transpose_b=True).shard(matmul_strategy) + self.bias_add = P.TensorAdd().shard(shard) def construct(self, x): if self.has_bias: @@ -108,7 +108,7 @@ def conv3x3(in_channels, out_channels, stride=1): conv = Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=0, weight_init=weight, has_bias=False, pad_mode="same") - conv.conv2d.set_strategy(strategy_weight) + conv.conv2d.shard(strategy_weight) return conv @@ -119,7 +119,7 @@ def conv1x1(in_channels, out_channels, stride=1): conv = Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=0, weight_init=weight, has_bias=False, pad_mode="same") - conv.conv2d.set_strategy(strategy_weight) + conv.conv2d.shard(strategy_weight) return conv @@ -130,7 +130,7 @@ def conv7x7(in_channels, out_channels, stride=1): conv = Conv2d(in_channels, out_channels, kernel_size=7, stride=stride, padding=0, weight_init=weight, has_bias=False, pad_mode="same") - conv.conv2d.set_strategy(strategy_weight) + conv.conv2d.shard(strategy_weight) return conv @@ -152,7 +152,7 @@ def bn_with_initialize(out_channels): gamma = weight_variable_1(shape) bn = BatchNorm2d(out_channels, momentum=0.1, eps=0.0001, gamma_init=gamma, beta_init=beta, moving_mean_init=mean, moving_var_init=var) - bn.bn_train.set_strategy(strategy_bn) + bn.bn_train.shard(strategy_bn) return bn @@ -164,7 +164,7 @@ def bn_with_initialize_last(out_channels): gamma = weight_variable_0(shape) bn = BatchNorm2d(out_channels, momentum=0.1, eps=0.0001, gamma_init=gamma, beta_init=beta, moving_mean_init=mean, moving_var_init=var) - bn.bn_train.set_strategy(strategy_bn) + bn.bn_train.shard(strategy_bn) return bn @@ -175,7 +175,7 @@ def fc_with_initialize(input_channels, out_channels): bias = weight_variable_0(bias_shape) return DenseWrap(input_channels, out_channels, weight, bias, has_bias=True, - matmul_strategy=strategy_fc_weight_nobias, set_strategy=strategy_tensor_add) + matmul_strategy=strategy_fc_weight_nobias, shard=strategy_tensor_add) class ResidualBlock(Cell): @@ -197,10 +197,10 @@ class ResidualBlock(Cell): self.conv3 = conv1x1(out_chls, out_channels, stride=1) self.bn3 = bn_with_initialize_last(out_channels) - self.relu1 = P.ReLU().set_strategy(strategy_no_weight) - self.relu2 = P.ReLU().set_strategy(strategy_no_weight) - self.relu3 = P.ReLU().set_strategy(strategy_no_weight) - self.add = TensorAdd().set_strategy(strategy_add) + self.relu1 = P.ReLU().shard(strategy_no_weight) + self.relu2 = P.ReLU().shard(strategy_no_weight) + self.relu3 = P.ReLU().shard(strategy_no_weight) + self.add = TensorAdd().shard(strategy_add) def construct(self, x): identity = x @@ -242,14 +242,14 @@ class ResidualBlockWithDown(Cell): self.conv3 = conv1x1(out_chls, out_channels, stride=1) self.bn3 = bn_with_initialize_last(out_channels) - self.relu1 = P.ReLU().set_strategy(strategy_no_weight) - self.relu2 = P.ReLU().set_strategy(strategy_no_weight) - self.relu3 = P.ReLU().set_strategy(strategy_no_weight) + self.relu1 = P.ReLU().shard(strategy_no_weight) + self.relu2 = P.ReLU().shard(strategy_no_weight) + self.relu3 = P.ReLU().shard(strategy_no_weight) self.down_sample = down_sample self.conv_down_sample = conv1x1(in_channels, out_channels, stride=stride) self.bn_down_sample = bn_with_initialize(out_channels) - self.add = TensorAdd().set_strategy(strategy_add) + self.add = TensorAdd().shard(strategy_add) def construct(self, x): identity = x @@ -296,11 +296,11 @@ class ResNet(Cell): super(ResNet, self).__init__() self.conv1 = conv7x7(3, 64, stride=2) self.bn1 = bn_with_initialize(64) - self.relu = P.ReLU().set_strategy(strategy_no_weight) + self.relu = P.ReLU().shard(strategy_no_weight) self.maxpool = MaxPool2d(kernel_size=3, stride=2, pad_mode="same") self.layer1 = MakeLayer0( block, in_channels=64, out_channels=256, stride=1) - self.pool = M.ReduceMean(keep_dims=True).set_strategy(strategy_no_weight) + self.pool = M.ReduceMean(keep_dims=True).shard(strategy_no_weight) self.fc = fc_with_initialize(64 * block.expansion, num_classes) self.flatten = Flatten() @@ -319,11 +319,11 @@ class ResNet(Cell): class ResNetModelParallel(Cell): def __init__(self, block, num_classes=100): super(ResNetModelParallel, self).__init__() - self.relu = P.ReLU().set_strategy(((1, dev_num, 1, 1),)) + self.relu = P.ReLU().shard(((1, dev_num, 1, 1),)) self.maxpool = MaxPool2d(kernel_size=3, stride=2, pad_mode="same") self.layer1 = MakeLayer0( block, in_channels=64, out_channels=256, stride=1) - self.pool = M.ReduceMean(keep_dims=True).set_strategy(strategy_no_weight) + self.pool = M.ReduceMean(keep_dims=True).shard(strategy_no_weight) self.fc = fc_with_initialize(64 * block.expansion, num_classes) self.flatten = Flatten() @@ -363,7 +363,7 @@ def test_resnet_operator_batch_parallel(): net = resnet_operator_net(num_classes) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') - loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1))) + loss.softmax_cross_entropy.shard(((dev_num, 1), (dev_num, 1))) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) model = Model(net, loss, opt) @@ -388,7 +388,7 @@ def test_resnet_model_parallel(): net = resnet_model_parallel_net(num_classes) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') - loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1))) + loss.softmax_cross_entropy.shard(((dev_num, 1), (dev_num, 1))) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) model = Model(net, loss, opt) diff --git a/tests/ut/python/parallel/test_optimizer_clone_weight.py b/tests/ut/python/parallel/test_optimizer_clone_weight.py index bdafcba10a9..fa6e3c32b4f 100644 --- a/tests/ut/python/parallel/test_optimizer_clone_weight.py +++ b/tests/ut/python/parallel/test_optimizer_clone_weight.py @@ -27,7 +27,7 @@ from mindspore.ops import operations as P class NetWithLoss(nn.Cell): def __init__(self, network, strategy3): super(NetWithLoss, self).__init__() - self.loss = P.SoftmaxCrossEntropyWithLogits().set_strategy(strategy3) + self.loss = P.SoftmaxCrossEntropyWithLogits().shard(strategy3) self.network = network def construct(self, x, b): @@ -45,8 +45,8 @@ def test_optimizer_clone_weight(): def __init__(self, strategy1, strategy2, weight): super().__init__() self.weight = Parameter(weight, "w1") - self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1) - self.relu = P.ReLU().set_strategy(strategy2) + self.matmul = P.MatMul(transpose_a=False, transpose_b=True).shard(strategy1) + self.relu = P.ReLU().shard(strategy2) def construct(self, x): out = self.matmul(x, self.weight) @@ -80,8 +80,8 @@ def test_optimizer_clone_weight2(): def __init__(self, strategy1, strategy2, weight): super().__init__() self.weight = Parameter(weight, "w1") - self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1) - self.relu = P.ReLU().set_strategy(strategy2) + self.matmul = P.MatMul(transpose_a=False, transpose_b=True).shard(strategy1) + self.relu = P.ReLU().shard(strategy2) def construct(self, x): out = self.matmul(x, self.weight) diff --git a/tests/ut/python/parallel/test_parameter_init.py b/tests/ut/python/parallel/test_parameter_init.py index bd36876f0c8..5617b68c1f7 100644 --- a/tests/ut/python/parallel/test_parameter_init.py +++ b/tests/ut/python/parallel/test_parameter_init.py @@ -37,7 +37,7 @@ def test_parameter_init(): def __init__(self, strategy1, weight): super().__init__() self.weight = Parameter(weight, "w1") - self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1) + self.matmul = P.MatMul(transpose_a=False, transpose_b=True).shard(strategy1) def construct(self, x): out = self.matmul(x, self.weight) diff --git a/tests/ut/python/parallel/test_parameter_multi_users.py b/tests/ut/python/parallel/test_parameter_multi_users.py index e977966eb17..051af762c30 100644 --- a/tests/ut/python/parallel/test_parameter_multi_users.py +++ b/tests/ut/python/parallel/test_parameter_multi_users.py @@ -24,8 +24,8 @@ from mindspore.ops import operations as P class Net(Cell): def __init__(self, mul_weight, strategy1=None, strategy2=None): super().__init__() - self.mul = P.Mul().set_strategy(strategy1) - self.mul2 = P.Mul().set_strategy(strategy2) + self.mul = P.Mul().shard(strategy1) + self.mul2 = P.Mul().shard(strategy2) self.mul_weight = Parameter(mul_weight, "w1") def construct(self, x, b): @@ -37,8 +37,8 @@ class Net(Cell): class Net2(Cell): def __init__(self, mul_weight, strategy1=None, strategy2=None): super().__init__() - self.mul = P.Mul().set_strategy(strategy1) - self.mul2 = P.Mul().set_strategy(strategy2) + self.mul = P.Mul().shard(strategy1) + self.mul2 = P.Mul().shard(strategy2) self.mul_weight = Parameter(mul_weight, "w1") def construct(self, x, b): diff --git a/tests/ut/python/parallel/test_prelu.py b/tests/ut/python/parallel/test_prelu.py index e60aafeba0c..7ac0c3cf7b6 100644 --- a/tests/ut/python/parallel/test_prelu.py +++ b/tests/ut/python/parallel/test_prelu.py @@ -90,7 +90,7 @@ def test_prelu_parallel_success1(): class Net(nn.Cell): def __init__(self, strategy): super().__init__() - self.prelu = P.PReLU().set_strategy(strategy) + self.prelu = P.PReLU().shard(strategy) def construct(self, x, y): out = self.prelu(x, y) @@ -110,7 +110,7 @@ def test_prelu_parallel_success2(): class Net(nn.Cell): def __init__(self, strategy): super().__init__() - self.prelu = P.PReLU().set_strategy(strategy) + self.prelu = P.PReLU().shard(strategy) def construct(self, x, y): out = self.prelu(x, y) @@ -148,8 +148,8 @@ def test_prelu_parallel_success3(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) - self.prelu = P.PReLU().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.prelu = P.PReLU().shard(strategy2) def construct(self, x, y, w): out = self.matmul(x, y) @@ -173,7 +173,7 @@ def test_prelu_parallel_success4(): class Net(nn.Cell): def __init__(self, strategy): super().__init__() - self.prelu = P.PReLU().set_strategy(strategy) + self.prelu = P.PReLU().shard(strategy) def construct(self, x, y): out = self.prelu(x, y) @@ -193,7 +193,7 @@ def test_prelu_parallel_success5(): class Net(nn.Cell): def __init__(self, strategy): super().__init__() - self.prelu = P.PReLU().set_strategy(strategy) + self.prelu = P.PReLU().shard(strategy) def construct(self, x, y): out = self.prelu(x, y) diff --git a/tests/ut/python/parallel/test_prelu_cell.py b/tests/ut/python/parallel/test_prelu_cell.py index 43a794ea705..554679e97c7 100644 --- a/tests/ut/python/parallel/test_prelu_cell.py +++ b/tests/ut/python/parallel/test_prelu_cell.py @@ -70,9 +70,9 @@ class PReLU(nn.Cell): self.w = Parameter(initializer(w, [channel,]), name='a') self.prelu = P.PReLU() - self.relu = P.ReLU().set_strategy(((1,),)) - self.sub = P.Sub().set_strategy(((1,), (1,))) - self.assign_sub = P.AssignSub().set_strategy(((1,), (1,))) + self.relu = P.ReLU().shard(((1,),)) + self.sub = P.Sub().shard(((1,), (1,))) + self.assign_sub = P.AssignSub().shard(((1,), (1,))) def construct(self, x): u = self.relu(self.w) diff --git a/tests/ut/python/parallel/test_reduce_method_info.py b/tests/ut/python/parallel/test_reduce_method_info.py index 07712a2d9df..7d2c100f658 100644 --- a/tests/ut/python/parallel/test_reduce_method_info.py +++ b/tests/ut/python/parallel/test_reduce_method_info.py @@ -82,9 +82,9 @@ def test_sum_mul(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.mul1 = P.Mul().set_strategy(strategy1) - self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy2) - self.mul2 = P.Mul().set_strategy(strategy3) + self.mul1 = P.Mul().shard(strategy1) + self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy2) + self.mul2 = P.Mul().shard(strategy3) def construct(self, x, y, b): out = self.mul1(x, y) @@ -109,9 +109,9 @@ def test_sum_mul2(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.mul1 = P.Mul().set_strategy(strategy1) - self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy2) - self.mul2 = P.Mul().set_strategy(strategy3) + self.mul1 = P.Mul().shard(strategy1) + self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy2) + self.mul2 = P.Mul().shard(strategy3) def construct(self, x, y, b): out = self.mul1(x, y) @@ -136,9 +136,9 @@ def test_sum_mul3(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.mul1 = P.Mul().set_strategy(strategy1) - self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy2) - self.mul2 = P.Mul().set_strategy(strategy3) + self.mul1 = P.Mul().shard(strategy1) + self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy2) + self.mul2 = P.Mul().shard(strategy3) def construct(self, x, y, b): out = self.mul1(x, y) @@ -163,9 +163,9 @@ def test_sum_mul4(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.mul1 = P.Mul().set_strategy(strategy1) - self.reduce_sum = P.ReduceSum(keep_dims=True).set_strategy(strategy2) - self.mul2 = P.Mul().set_strategy(strategy3) + self.mul1 = P.Mul().shard(strategy1) + self.reduce_sum = P.ReduceSum(keep_dims=True).shard(strategy2) + self.mul2 = P.Mul().shard(strategy3) def construct(self, x, y, b): out = self.mul1(x, y) @@ -190,8 +190,8 @@ def test_sum_mul5(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.mul1 = P.Mul().set_strategy(strategy1) - self.reduce_sum = P.ReduceSum(keep_dims=True).set_strategy(strategy2) + self.mul1 = P.Mul().shard(strategy1) + self.reduce_sum = P.ReduceSum(keep_dims=True).shard(strategy2) def construct(self, x, y): out = self.mul1(x, y) @@ -213,8 +213,8 @@ def test_sum_mul6(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.mul1 = P.Mul().set_strategy(strategy1) - self.reduce_sum = P.ReduceSum(keep_dims=True).set_strategy(strategy2) + self.mul1 = P.Mul().shard(strategy1) + self.reduce_sum = P.ReduceSum(keep_dims=True).shard(strategy2) def construct(self, x, y): out = self.mul1(x, y) @@ -236,8 +236,8 @@ def test_sum_mul7(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.mul1 = P.Mul().set_strategy(strategy1) - self.reduce_sum = P.ReduceSum(keep_dims=True).set_strategy(strategy2) + self.mul1 = P.Mul().shard(strategy1) + self.reduce_sum = P.ReduceSum(keep_dims=True).shard(strategy2) def construct(self, x, y): out = self.mul1(x, y) @@ -259,9 +259,9 @@ def test_max_mul(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.mul1 = P.Mul().set_strategy(strategy1) - self.reduce_max = P.ReduceMax(keep_dims=False).set_strategy(strategy2) - self.mul2 = P.Mul().set_strategy(strategy3) + self.mul1 = P.Mul().shard(strategy1) + self.reduce_max = P.ReduceMax(keep_dims=False).shard(strategy2) + self.mul2 = P.Mul().shard(strategy3) def construct(self, x, y, b): out = self.mul1(x, y) @@ -286,9 +286,9 @@ def test_min_mul(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.mul1 = P.Mul().set_strategy(strategy1) - self.reduce_min = P.ReduceMin(keep_dims=False).set_strategy(strategy2) - self.mul2 = P.Mul().set_strategy(strategy3) + self.mul1 = P.Mul().shard(strategy1) + self.reduce_min = P.ReduceMin(keep_dims=False).shard(strategy2) + self.mul2 = P.Mul().shard(strategy3) def construct(self, x, y, b): out = self.mul1(x, y) @@ -313,9 +313,9 @@ def test_reduce_mean_mul_float32(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.mul1 = P.Mul().set_strategy(strategy1) - self.reduce_mean = P.ReduceMean(keep_dims=False).set_strategy(strategy2) - self.mul2 = P.Mul().set_strategy(strategy3) + self.mul1 = P.Mul().shard(strategy1) + self.reduce_mean = P.ReduceMean(keep_dims=False).shard(strategy2) + self.mul2 = P.Mul().shard(strategy3) def construct(self, x, y, b): out = self.mul1(x, y) @@ -340,9 +340,9 @@ def test_reduce_mean_mul_float32(): class ArgMaxWithValueNet(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.mul1 = P.Mul().set_strategy(strategy1) - self.arg_max_with_value = P.ArgMaxWithValue(keep_dims=False, axis=-1).set_strategy(strategy2) - self.mul2 = P.Mul().set_strategy(strategy3) + self.mul1 = P.Mul().shard(strategy1) + self.arg_max_with_value = P.ArgMaxWithValue(keep_dims=False, axis=-1).shard(strategy2) + self.mul2 = P.Mul().shard(strategy3) def construct(self, x, y, b): out = self.mul1(x, y) @@ -354,9 +354,9 @@ class ArgMaxWithValueNet(nn.Cell): class ArgMinWithValueNet(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.mul1 = P.Mul().set_strategy(strategy1) - self.arg_min_with_value = P.ArgMinWithValue(keep_dims=False, axis=-1).set_strategy(strategy2) - self.mul2 = P.Mul().set_strategy(strategy3) + self.mul1 = P.Mul().shard(strategy1) + self.arg_min_with_value = P.ArgMinWithValue(keep_dims=False, axis=-1).shard(strategy2) + self.mul2 = P.Mul().shard(strategy3) def construct(self, x, y, b): out = self.mul1(x, y) @@ -441,9 +441,9 @@ def test_arg_min_with_value_mul_auto(): class ArgMinWithValueNet2(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.mul1 = P.Mul().set_strategy(strategy1) - self.arg_min_with_value = P.ArgMinWithValue(keep_dims=True, axis=-1).set_strategy(strategy2) - self.relu = P.ReLU().set_strategy(strategy3) + self.mul1 = P.Mul().shard(strategy1) + self.arg_min_with_value = P.ArgMinWithValue(keep_dims=True, axis=-1).shard(strategy2) + self.relu = P.ReLU().shard(strategy3) def construct(self, x, y): out = self.mul1(x, y) @@ -486,9 +486,9 @@ def test_cross_batch(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.mul1 = P.Mul().set_strategy(strategy1) - self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy2) - self.reduce_mean = P.ReduceMean(keep_dims=False).set_strategy(strategy3).add_prim_attr("cross_batch", True) + self.mul1 = P.Mul().shard(strategy1) + self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy2) + self.reduce_mean = P.ReduceMean(keep_dims=False).shard(strategy3).add_prim_attr("cross_batch", True) def construct(self, x, y): out = self.mul1(x, y) @@ -512,9 +512,9 @@ def test_cross_batch2(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.mul1 = P.Mul().set_strategy(strategy1) - self.reduce_mean = P.ReduceMean(keep_dims=False).set_strategy(strategy2) - self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy3).add_prim_attr("cross_batch", True) + self.mul1 = P.Mul().shard(strategy1) + self.reduce_mean = P.ReduceMean(keep_dims=False).shard(strategy2) + self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy3).add_prim_attr("cross_batch", True) def construct(self, x, y): out = self.mul1(x, y) @@ -561,9 +561,9 @@ def test_max_empty_tuple(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.mul = P.Mul().set_strategy(strategy1) - self.reduce_max = P.ReduceMax(keep_dims=False).set_strategy(strategy2) - self.add = P.TensorAdd().set_strategy(strategy3) + self.mul = P.Mul().shard(strategy1) + self.reduce_max = P.ReduceMax(keep_dims=False).shard(strategy2) + self.add = P.TensorAdd().shard(strategy3) def construct(self, x, y, b): out = self.mul(x, y) diff --git a/tests/ut/python/parallel/test_reshape.py b/tests/ut/python/parallel/test_reshape.py index 6b07fc150a4..cb2aac7109c 100644 --- a/tests/ut/python/parallel/test_reshape.py +++ b/tests/ut/python/parallel/test_reshape.py @@ -67,9 +67,9 @@ class Dataset(MindData): class ReshapeNet(nn.Cell): def __init__(self, strategy0, strategy1, strategy2): super(ReshapeNet, self).__init__() - self.relu = P.ReLU().set_strategy(strategy0) - self.reshape = P.Reshape().set_strategy(strategy1) - self.matmul = P.MatMul().set_strategy(strategy2) + self.relu = P.ReLU().shard(strategy0) + self.reshape = P.Reshape().shard(strategy1) + self.matmul = P.MatMul().shard(strategy2) self.matmul_weight = Parameter(Tensor(np.ones([25088, 256]), dtype=ms.float32), name="weight") def construct(self, x): @@ -96,8 +96,8 @@ def reshape_common(parallel_mode, strategy0, strategy1, strategy2, strategy_loss net = reshape_net(strategy0, strategy1, strategy2) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') - loss.softmax_cross_entropy.set_strategy(strategy_loss) - loss.one_hot.set_strategy(((8, 1), (), ())) + loss.softmax_cross_entropy.shard(strategy_loss) + loss.one_hot.shard(((8, 1), (), ())) opt = Momentum(net.trainable_params(), learning_rate, momentum) model = Model(net, loss, opt) model.train(epoch_size, dataset, dataset_sink_mode=False) @@ -206,7 +206,7 @@ class ReshapeNet1(nn.Cell): super(ReshapeNet1, self).__init__() self.virtual_dataset = _VirtualDataset() self.reshape = P.Reshape() - self.matmul = P.MatMul().set_strategy(strategy0) + self.matmul = P.MatMul().shard(strategy0) self.matmul_weight = Parameter(Tensor(np.ones([25088, 256]), dtype=ms.float32), name="weight") self.reshape2 = P.Reshape() @@ -223,7 +223,7 @@ class ReshapeNet2(nn.Cell): super(ReshapeNet2, self).__init__() self.virtual_dataset = _VirtualDataset() self.reshape = P.Reshape() - self.matmul = P.MatMul().set_strategy(strategy0) + self.matmul = P.MatMul().shard(strategy0) self.matmul_weight = Parameter(Tensor(np.ones([25088, 256]), dtype=ms.float32), name="weight") self.reshape2 = P.Reshape() self.reduce_sum = P.ReduceSum(keep_dims=True) @@ -244,7 +244,7 @@ class ReshapeNet3(nn.Cell): super(ReshapeNet3, self).__init__() self.virtual_dataset = _VirtualDataset() self.reshape = P.Reshape() - self.matmul = P.MatMul().set_strategy(strategy0) + self.matmul = P.MatMul().shard(strategy0) self.matmul_weight = Parameter(Tensor(np.ones([25088, 256]), dtype=ms.float32), name="weight") self.reshape2 = P.Reshape() self.reduce_sum = P.ReduceSum(keep_dims=False) @@ -266,7 +266,7 @@ class ReshapeNet4(nn.Cell): self.virtual_dataset = _VirtualDataset() self.reshape = P.Reshape() self.reshape2 = P.Reshape() - self.matmul = P.MatMul().set_strategy(strategy0) + self.matmul = P.MatMul().shard(strategy0) self.matmul_weight = Parameter(Tensor(np.ones([25088, 256]), dtype=ms.float32), name="weight") def construct(self, x): @@ -282,9 +282,9 @@ class ReshapeNet5(nn.Cell): super(ReshapeNet5, self).__init__() self.virtual_dataset = _VirtualDataset() self.reshape = P.Reshape() - self.matmul1 = P.MatMul().set_strategy(strategy0) + self.matmul1 = P.MatMul().shard(strategy0) self.matmul1_weight = Parameter(Tensor(np.ones([25088, 256]), dtype=ms.float32), name="weight") - self.matmul2 = P.MatMul().set_strategy(strategy0) + self.matmul2 = P.MatMul().shard(strategy0) def construct(self, x): x = self.virtual_dataset(x) @@ -299,10 +299,10 @@ class ReshapeNet6(nn.Cell): super(ReshapeNet6, self).__init__() self.virtual_dataset = _VirtualDataset() self.reshape = P.Reshape() - self.matmul1_1 = P.MatMul().set_strategy(strategy0) - self.matmul1_2 = P.MatMul().set_strategy(strategy0) + self.matmul1_1 = P.MatMul().shard(strategy0) + self.matmul1_2 = P.MatMul().shard(strategy0) self.matmul1_weight = Parameter(Tensor(np.ones([25088, 256]), dtype=ms.float32), name="weight") - self.matmul2 = P.MatMul().set_strategy(strategy0) + self.matmul2 = P.MatMul().shard(strategy0) self.add = P.TensorAdd() def construct(self, x): @@ -552,7 +552,7 @@ class ParallelReduceMeanNet(nn.Cell): self.flat = nn.Flatten() self.reducemean_axis = reducemean_axis if strategy is not None: - self.reduce_mean.set_strategy(strategy) + self.reduce_mean.shard(strategy) def construct(self, inputs): x = self.conv(inputs) @@ -626,7 +626,7 @@ class ParallelReshapeNet(nn.Cell): has_bias=True) self.reshape = P.Reshape() self.shape = shape - self.reshape.set_strategy(strategy) + self.reshape.shard(strategy) def construct(self, inputs): x = self.flat(inputs) diff --git a/tests/ut/python/parallel/test_reshape_parameter.py b/tests/ut/python/parallel/test_reshape_parameter.py index f074f566f1b..3b23c4d13a1 100644 --- a/tests/ut/python/parallel/test_reshape_parameter.py +++ b/tests/ut/python/parallel/test_reshape_parameter.py @@ -51,7 +51,7 @@ class Net(nn.Cell): def __init__(self, strategy): super().__init__() self.reshape = P.Reshape() - self.mul = P.Mul().set_strategy(strategy) + self.mul = P.Mul().shard(strategy) self.relu = P.ReLU() def construct(self, x, y): diff --git a/tests/ut/python/parallel/test_reshape_skip_redistribution.py b/tests/ut/python/parallel/test_reshape_skip_redistribution.py index cbaf20d1132..29c0144301c 100644 --- a/tests/ut/python/parallel/test_reshape_skip_redistribution.py +++ b/tests/ut/python/parallel/test_reshape_skip_redistribution.py @@ -24,7 +24,7 @@ from mindspore.ops import operations as P class Net(Cell): def __init__(self, matmul_weight, strategy1=None): super().__init__() - self.gatherv2 = P.GatherV2().set_strategy(strategy1) + self.gatherv2 = P.GatherV2().shard(strategy1) self.reshape = P.Reshape().add_prim_attr("skip_redistribution", True) self.matmul = P.MatMul(transpose_b=False) self.index = Tensor(np.ones([64, 64]), dtype=ms.int32) diff --git a/tests/ut/python/parallel/test_scalar_loss.py b/tests/ut/python/parallel/test_scalar_loss.py index 0f8dcc03f87..1043b2997c3 100644 --- a/tests/ut/python/parallel/test_scalar_loss.py +++ b/tests/ut/python/parallel/test_scalar_loss.py @@ -40,9 +40,9 @@ def test_sum_as_loss(): class Net(nn.Cell): def __init__(self, strategy0, strategy1): super().__init__() - self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0) - self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1) - self.mul = P.Mul().set_strategy(strategy=((), ())) + self.fc_nobias = P.MatMul(transpose_b=True).shard(strategy0) + self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy1) + self.mul = P.Mul().shard(strategy=((), ())) def construct(self, x, y): out = self.fc_nobias(x, y) diff --git a/tests/ut/python/parallel/test_semi_auto_two_subgraphs.py b/tests/ut/python/parallel/test_semi_auto_two_subgraphs.py index 95e642cf3d2..85fc37c4972 100644 --- a/tests/ut/python/parallel/test_semi_auto_two_subgraphs.py +++ b/tests/ut/python/parallel/test_semi_auto_two_subgraphs.py @@ -42,8 +42,8 @@ class Net(nn.Cell): class NetWithLoss(nn.Cell): def __init__(self, network): super(NetWithLoss, self).__init__() - self.sum = P.ReduceSum(keep_dims=False).set_strategy(strategy=((4, 1, 1, 1),)) - self.mean = P.ReduceMean(keep_dims=False).set_strategy(strategy=((8, 1, 1, 1),)) + self.sum = P.ReduceSum(keep_dims=False).shard(strategy=((4, 1, 1, 1),)) + self.mean = P.ReduceMean(keep_dims=False).shard(strategy=((8, 1, 1, 1),)) self.net = network def construct(self, x): diff --git a/tests/ut/python/parallel/test_sigmoid_cross_entropy_with_logits.py b/tests/ut/python/parallel/test_sigmoid_cross_entropy_with_logits.py index b4b956a9e7f..0311f824621 100644 --- a/tests/ut/python/parallel/test_sigmoid_cross_entropy_with_logits.py +++ b/tests/ut/python/parallel/test_sigmoid_cross_entropy_with_logits.py @@ -24,8 +24,8 @@ from mindspore.ops import operations as P class Net(Cell): def __init__(self, mul_weight, strategy1=None, strategy2=None): super().__init__() - self.mul = P.Mul().set_strategy(strategy1) - self.loss = P.SigmoidCrossEntropyWithLogits().set_strategy(strategy2) + self.mul = P.Mul().shard(strategy1) + self.loss = P.SigmoidCrossEntropyWithLogits().shard(strategy2) self.mul_weight = Parameter(mul_weight, "w1") def construct(self, x, b): diff --git a/tests/ut/python/parallel/test_softmax_cross_entropy_loss.py b/tests/ut/python/parallel/test_softmax_cross_entropy_loss.py index 24b45600ba0..69d1f7c47c2 100644 --- a/tests/ut/python/parallel/test_softmax_cross_entropy_loss.py +++ b/tests/ut/python/parallel/test_softmax_cross_entropy_loss.py @@ -29,7 +29,7 @@ grad_all = C.GradOperation(get_all=True) class NetWithLoss(nn.Cell): def __init__(self, network, strategy3=None): super(NetWithLoss, self).__init__() - self.loss = P.SoftmaxCrossEntropyWithLogits().set_strategy(strategy3) + self.loss = P.SoftmaxCrossEntropyWithLogits().shard(strategy3) self.network = network def construct(self, x, y, b): @@ -55,8 +55,8 @@ def test_softmax_cross_entropy_loss(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul(transpose_b=True).set_strategy(strategy1) - self.gelu = P.Gelu().set_strategy(strategy2) + self.matmul = P.MatMul(transpose_b=True).shard(strategy1) + self.gelu = P.Gelu().shard(strategy2) def construct(self, x, y): out = self.matmul(x, y) @@ -80,8 +80,8 @@ def test_softmax_cross_entropy_loss_repeated_calculation(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul(transpose_b=True).set_strategy(strategy1) - self.gelu = P.Gelu().set_strategy(strategy2) + self.matmul = P.MatMul(transpose_b=True).shard(strategy1) + self.gelu = P.Gelu().shard(strategy2) def construct(self, x, y): out = self.matmul(x, y) diff --git a/tests/ut/python/parallel/test_sparse_feature_bprop.py b/tests/ut/python/parallel/test_sparse_feature_bprop.py index f7de90d9cd2..78dcd6dacb9 100644 --- a/tests/ut/python/parallel/test_sparse_feature_bprop.py +++ b/tests/ut/python/parallel/test_sparse_feature_bprop.py @@ -74,7 +74,7 @@ def test_bprop_with_sparse_feature_mirror(): shape = [8, 8] self.index = Tensor(np.ones(shape), dtype=ms.int32) self.embeddinglookup = nn.EmbeddingLookup(64, 64, param_init='ones') - self.embeddinglookup.embeddinglookup.set_strategy(((1, 1), (8, 1))) + self.embeddinglookup.embeddinglookup.shard(((1, 1), (8, 1))) def construct(self, x, b): out = self.embeddinglookup(self.index) diff --git a/tests/ut/python/parallel/test_sparse_gather_v2.py b/tests/ut/python/parallel/test_sparse_gather_v2.py index da1c5891792..2250e493207 100644 --- a/tests/ut/python/parallel/test_sparse_gather_v2.py +++ b/tests/ut/python/parallel/test_sparse_gather_v2.py @@ -52,8 +52,8 @@ class Net(nn.Cell): super().__init__() if shape is None: shape = [64, 64] - self.gatherv2 = P.SparseGatherV2().set_strategy(strategy1).add_prim_attr("primitive_target", target) - self.mul = P.Mul().set_strategy(strategy2) + self.gatherv2 = P.SparseGatherV2().shard(strategy1).add_prim_attr("primitive_target", target) + self.mul = P.Mul().shard(strategy2) self.index = Tensor(np.ones(shape), dtype=ms.int32) self.axis = axis diff --git a/tests/ut/python/parallel/test_split_grad_sens.py b/tests/ut/python/parallel/test_split_grad_sens.py index c77260faa78..e0e01adcb7a 100644 --- a/tests/ut/python/parallel/test_split_grad_sens.py +++ b/tests/ut/python/parallel/test_split_grad_sens.py @@ -76,8 +76,8 @@ def test_no_grad(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) - self.matmul2 = P.MatMul().set_strategy(strategy2) + self.matmul1 = P.MatMul().shard(strategy1) + self.matmul2 = P.MatMul().shard(strategy2) def construct(self, x, y, b): out = self.matmul1(x, y) @@ -101,8 +101,8 @@ def test_grad_sens_parameter_type(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) - self.matmul2 = P.MatMul().set_strategy(strategy2) + self.matmul1 = P.MatMul().shard(strategy1) + self.matmul2 = P.MatMul().shard(strategy2) def construct(self, x, y, b): out = self.matmul1(x, y) @@ -133,8 +133,8 @@ def test_grad_sens_tensor_type(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) - self.matmul2 = P.MatMul().set_strategy(strategy2) + self.matmul1 = P.MatMul().shard(strategy1) + self.matmul2 = P.MatMul().shard(strategy2) def construct(self, x, y, b): out = self.matmul1(x, y) @@ -158,8 +158,8 @@ def test_grad_sens_scalar_broadcast(): class Net(nn.Cell): def __init__(self, strategy0, strategy1): super().__init__() - self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0) - self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1) + self.fc_nobias = P.MatMul(transpose_b=True).shard(strategy0) + self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy1) def construct(self, x, y): out = self.fc_nobias(x, y) diff --git a/tests/ut/python/parallel/test_square.py b/tests/ut/python/parallel/test_square.py index fff9f1770d6..823a21ad1f0 100644 --- a/tests/ut/python/parallel/test_square.py +++ b/tests/ut/python/parallel/test_square.py @@ -24,9 +24,9 @@ from mindspore.ops import operations as P class Net(Cell): def __init__(self, mul_weight, strategy1=None, strategy2=None): super(Net, self).__init__() - self.mul = P.Mul().set_strategy(strategy1) - self.square = P.Square().set_strategy(strategy2) - self.mul2 = P.Mul().set_strategy(strategy1) + self.mul = P.Mul().shard(strategy1) + self.square = P.Square().shard(strategy2) + self.mul2 = P.Mul().shard(strategy1) self.mul_weight = Parameter(mul_weight, "w1") def construct(self, x, b): diff --git a/tests/ut/python/parallel/test_squeeze_info.py b/tests/ut/python/parallel/test_squeeze_info.py index 0b3144346ea..1edee945521 100644 --- a/tests/ut/python/parallel/test_squeeze_info.py +++ b/tests/ut/python/parallel/test_squeeze_info.py @@ -24,8 +24,8 @@ from mindspore.ops import operations as P class Net(Cell): def __init__(self, strategy1=None, strategy2=None, axis=()): super().__init__() - self.squeeze = P.Squeeze(axis=axis).set_strategy(strategy1) - self.mul = P.Mul().set_strategy(strategy2) + self.squeeze = P.Squeeze(axis=axis).shard(strategy1) + self.mul = P.Mul().shard(strategy2) def construct(self, x, b): out = self.squeeze(x) diff --git a/tests/ut/python/parallel/test_step_parallel.py b/tests/ut/python/parallel/test_step_parallel.py index a03a151e139..db3bab1ab0b 100644 --- a/tests/ut/python/parallel/test_step_parallel.py +++ b/tests/ut/python/parallel/test_step_parallel.py @@ -51,10 +51,10 @@ def test_two_matmul(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3, strategy4): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) - self.matmul2 = P.MatMul().set_strategy(strategy2) - self.matmul3 = P.MatMul().set_strategy(strategy3) - self.matmul4 = P.MatMul().set_strategy(strategy4) + self.matmul1 = P.MatMul().shard(strategy1) + self.matmul2 = P.MatMul().shard(strategy2) + self.matmul3 = P.MatMul().shard(strategy3) + self.matmul4 = P.MatMul().shard(strategy4) def construct(self, x, y, b, a): out = self.matmul1(x, y) diff --git a/tests/ut/python/parallel/test_strategy_checkpoint.py b/tests/ut/python/parallel/test_strategy_checkpoint.py index 2d39b7aae65..31e0c20034f 100644 --- a/tests/ut/python/parallel/test_strategy_checkpoint.py +++ b/tests/ut/python/parallel/test_strategy_checkpoint.py @@ -51,12 +51,12 @@ def test_six_matmul_save(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3, strategy4, strategy5, strategy6): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) - self.matmul2 = P.MatMul().set_strategy(strategy2) - self.matmul3 = P.MatMul().set_strategy(strategy3) - self.matmul4 = P.MatMul().set_strategy(strategy4) - self.matmul5 = P.MatMul().set_strategy(strategy5) - self.matmul6 = P.MatMul().set_strategy(strategy6) + self.matmul1 = P.MatMul().shard(strategy1) + self.matmul2 = P.MatMul().shard(strategy2) + self.matmul3 = P.MatMul().shard(strategy3) + self.matmul4 = P.MatMul().shard(strategy4) + self.matmul5 = P.MatMul().shard(strategy5) + self.matmul6 = P.MatMul().shard(strategy6) self.weight1 = Parameter(Tensor(np.ones([32, 64]), dtype=ms.float32), name="weight1") self.weight2 = Parameter(Tensor(np.ones([64, 64]), dtype=ms.float32), name="weight2") self.weight3 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight3") @@ -113,12 +113,12 @@ def test_six_matmul_load(): class Net(nn.Cell): def __init__(self, strategy1, strategy3, strategy4, strategy5, strategy6, strategy7): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) - self.matmul3 = P.MatMul().set_strategy(strategy3) - self.matmul4 = P.MatMul().set_strategy(strategy4) - self.matmul5 = P.MatMul().set_strategy(strategy5) - self.matmul6 = P.MatMul().set_strategy(strategy6) - self.matmul7 = P.MatMul().set_strategy(strategy7) + self.matmul1 = P.MatMul().shard(strategy1) + self.matmul3 = P.MatMul().shard(strategy3) + self.matmul4 = P.MatMul().shard(strategy4) + self.matmul5 = P.MatMul().shard(strategy5) + self.matmul6 = P.MatMul().shard(strategy6) + self.matmul7 = P.MatMul().shard(strategy7) self.weight1 = Parameter(Tensor(np.ones([32, 64]), dtype=ms.float32), name="weight1") self.weight3 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight3") self.weight4 = Parameter(Tensor(np.ones([128, 64]), dtype=ms.float32), name="weight4") @@ -231,10 +231,10 @@ def test_six_matmul_load_auto(): class Net(nn.Cell): def __init__(self, strategy1, strategy3, strategy4, strategy5): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) - self.matmul3 = P.MatMul().set_strategy(strategy3) - self.matmul4 = P.MatMul().set_strategy(strategy4) - self.matmul5 = P.MatMul().set_strategy(strategy5) + self.matmul1 = P.MatMul().shard(strategy1) + self.matmul3 = P.MatMul().shard(strategy3) + self.matmul4 = P.MatMul().shard(strategy4) + self.matmul5 = P.MatMul().shard(strategy5) self.matmul6 = P.MatMul() self.matmul7 = P.MatMul() self.weight1 = Parameter(Tensor(np.ones([32, 64]), dtype=ms.float32), name="weight1") diff --git a/tests/ut/python/parallel/test_stridedslice.py b/tests/ut/python/parallel/test_stridedslice.py index 9ee190b14a0..828b7f80ed8 100644 --- a/tests/ut/python/parallel/test_stridedslice.py +++ b/tests/ut/python/parallel/test_stridedslice.py @@ -25,8 +25,8 @@ from mindspore.ops import operations as P class Net(Cell): def __init__(self, weight, w2, begin, end, strides, strategy1=None, strategy2=None, is_parameter=True, mask=0): super().__init__() - self.mul = P.Mul().set_strategy(strategy1) - self.strided_slice = P.StridedSlice(begin_mask=mask).set_strategy(strategy2) + self.mul = P.Mul().shard(strategy1) + self.strided_slice = P.StridedSlice(begin_mask=mask).shard(strategy2) if is_parameter: self.weight = Parameter(weight, "w1") else: @@ -47,8 +47,8 @@ class Net(Cell): class Net2(Cell): def __init__(self, weight2, begin, end, strides, strategy1=None, strategy2=None): super().__init__() - self.mul = P.Mul().set_strategy(strategy1) - self.strided_slice = P.StridedSlice().set_strategy(strategy2) + self.mul = P.Mul().shard(strategy1) + self.strided_slice = P.StridedSlice().shard(strategy2) self.weight2 = Parameter(weight2, "w2") self.begin = begin self.end = end diff --git a/tests/ut/python/parallel/test_sum_as_loss.py b/tests/ut/python/parallel/test_sum_as_loss.py index bca26d0b2cf..60162cb6e6c 100644 --- a/tests/ut/python/parallel/test_sum_as_loss.py +++ b/tests/ut/python/parallel/test_sum_as_loss.py @@ -44,8 +44,8 @@ def test_sum_as_loss(): class Net(nn.Cell): def __init__(self, strategy0, strategy1): super().__init__() - self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0) - self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1) + self.fc_nobias = P.MatMul(transpose_b=True).shard(strategy0) + self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy1) def construct(self, x, y): out = self.fc_nobias(x, y) @@ -67,8 +67,8 @@ def test_sum_as_loss2(): class Net(nn.Cell): def __init__(self, strategy0, strategy1): super().__init__() - self.fc_nobias = P.MatMul(transpose_b=True).set_strategy(strategy0) - self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy1) + self.fc_nobias = P.MatMul(transpose_b=True).shard(strategy0) + self.reduce_sum = P.ReduceSum(keep_dims=False).shard(strategy1) def construct(self, x, y): out = self.fc_nobias(x, y) diff --git a/tests/ut/python/parallel/test_tile.py b/tests/ut/python/parallel/test_tile.py index 22832460ba0..14cfdfb59ca 100644 --- a/tests/ut/python/parallel/test_tile.py +++ b/tests/ut/python/parallel/test_tile.py @@ -24,8 +24,8 @@ from mindspore.ops import operations as P class Net(Cell): def __init__(self, weight, weight2, strategy1=None, strategy2=None, is_parameter=True): super().__init__() - self.mul = P.Mul().set_strategy(strategy1) - self.tile = P.Tile().set_strategy(strategy2) + self.mul = P.Mul().shard(strategy1) + self.tile = P.Tile().shard(strategy2) if is_parameter: self.weight = Parameter(weight, "w1") else: @@ -43,8 +43,8 @@ class Net(Cell): class Net2(Cell): def __init__(self, weight2, strategy1=None, strategy2=None): super().__init__() - self.mul = P.Mul().set_strategy(strategy1) - self.tile = P.Tile().set_strategy(strategy2) + self.mul = P.Mul().shard(strategy1) + self.tile = P.Tile().shard(strategy2) self.weight2 = Parameter(weight2, "w2") def construct(self, x, b): diff --git a/tests/ut/python/parallel/test_train_and_eval.py b/tests/ut/python/parallel/test_train_and_eval.py index 32abb0d1733..a851e9c3183 100644 --- a/tests/ut/python/parallel/test_train_and_eval.py +++ b/tests/ut/python/parallel/test_train_and_eval.py @@ -24,8 +24,8 @@ from mindspore.ops import operations as P class Net(Cell): def __init__(self, mul_weight, strategy1=None, strategy2=None): super().__init__() - self.mul = P.Mul().set_strategy(strategy1) - self.neg = P.Neg().set_strategy(strategy2) + self.mul = P.Mul().shard(strategy1) + self.neg = P.Neg().shard(strategy2) self.mul_weight = Parameter(mul_weight, "w1") def construct(self, x, b): @@ -38,7 +38,7 @@ class EvalNet(Cell): def __init__(self, network, strategy2=None): super().__init__() self.network = network - self.relu = P.ReLU().set_strategy(strategy2) + self.relu = P.ReLU().shard(strategy2) def construct(self, x, b): out = self.network(x, b) diff --git a/tests/ut/python/parallel/test_transpose.py b/tests/ut/python/parallel/test_transpose.py index 791566f6471..478fe942e04 100644 --- a/tests/ut/python/parallel/test_transpose.py +++ b/tests/ut/python/parallel/test_transpose.py @@ -50,10 +50,10 @@ class Dataset(MindData): class TransposeNet(nn.Cell): def __init__(self, strategy1, strategy2): super(TransposeNet, self).__init__() - self.matmul = P.MatMul().set_strategy(((8, 1), (1, 1))) + self.matmul = P.MatMul().shard(((8, 1), (1, 1))) self.matmul_weight = Parameter(Tensor(np.ones([128, 256]), dtype=ms.float32), name="weight") - self.transpose1 = P.Transpose().set_strategy(strategy1) - self.transpose2 = P.Transpose().set_strategy(strategy2) + self.transpose1 = P.Transpose().shard(strategy1) + self.transpose2 = P.Transpose().shard(strategy2) def construct(self, x): x = self.matmul(x, self.matmul_weight) @@ -81,7 +81,7 @@ def transpose_common(strategy1, strategy2): net = transpose_net(strategy1, strategy2) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') - loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1))) + loss.softmax_cross_entropy.shard(((8, 1), (8, 1))) opt = Momentum(net.trainable_params(), learning_rate, momentum) context.set_context(mode=context.GRAPH_MODE) model = Model(net, loss, opt) diff --git a/tests/ut/python/parallel/test_two_matmul.py b/tests/ut/python/parallel/test_two_matmul.py index cf91af463d5..dbef5e7e71a 100644 --- a/tests/ut/python/parallel/test_two_matmul.py +++ b/tests/ut/python/parallel/test_two_matmul.py @@ -57,8 +57,8 @@ def test_two_matmul(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) - self.matmul2 = P.MatMul().set_strategy(strategy2) + self.matmul1 = P.MatMul().shard(strategy1) + self.matmul2 = P.MatMul().shard(strategy2) def construct(self, x, y, b): out = self.matmul1(x, y) @@ -82,8 +82,8 @@ def test_two_matmul_repeated_calculation1(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) - self.matmul2 = P.MatMul().set_strategy(strategy2) + self.matmul1 = P.MatMul().shard(strategy1) + self.matmul2 = P.MatMul().shard(strategy2) def construct(self, x, y, b): out = self.matmul1(x, y) @@ -106,8 +106,8 @@ def test_two_matmul_repeated_calculation2(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) - self.matmul2 = P.MatMul().set_strategy(strategy2) + self.matmul1 = P.MatMul().shard(strategy1) + self.matmul2 = P.MatMul().shard(strategy2) def construct(self, x, y, b): out = self.matmul1(x, y) @@ -130,9 +130,9 @@ def test_matmul_forward_reduce_scatter(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul().set_strategy(strategy1) + self.matmul = P.MatMul().shard(strategy1) self.matmul.add_prim_attr("forward_reduce_scatter", True) - self.mul = P.Mul().set_strategy(strategy2) + self.mul = P.Mul().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) @@ -155,9 +155,9 @@ def test_matmul_forward_reduce_scatter_transpose(): class Net(nn.Cell): def __init__(self, strategy1, strategy2): super().__init__() - self.matmul = P.MatMul(transpose_b=True).set_strategy(strategy1) + self.matmul = P.MatMul(transpose_b=True).shard(strategy1) self.matmul.add_prim_attr("forward_reduce_scatter", True) - self.mul = P.Mul().set_strategy(strategy2) + self.mul = P.Mul().shard(strategy2) def construct(self, x, y, b): out = self.matmul(x, y) diff --git a/tests/ut/python/parallel/test_two_weights_parameter.py b/tests/ut/python/parallel/test_two_weights_parameter.py index c05fa63c2d8..50d4fb17b06 100644 --- a/tests/ut/python/parallel/test_two_weights_parameter.py +++ b/tests/ut/python/parallel/test_two_weights_parameter.py @@ -29,7 +29,7 @@ grad_by_list = C.GradOperation(get_by_list=True) class NetWithLoss(nn.Cell): def __init__(self, network, strategy3): super(NetWithLoss, self).__init__() - self.loss = P.SoftmaxCrossEntropyWithLogits().set_strategy(strategy3) + self.loss = P.SoftmaxCrossEntropyWithLogits().shard(strategy3) self.network = network def construct(self, x, b): @@ -55,8 +55,8 @@ def test_two_weights_parameter(): super().__init__() self.weight = Parameter(weight, "w1", requires_grad=True) self.weight2 = Parameter(weight2, "w2", requires_grad=True) - self.matmul = P.MatMul().set_strategy(strategy1) - self.matmul2 = P.MatMul().set_strategy(strategy2) + self.matmul = P.MatMul().shard(strategy1) + self.matmul2 = P.MatMul().shard(strategy2) def construct(self, x): out = self.matmul(x, self.weight) diff --git a/tests/ut/python/parallel/test_virtual_dataset_3_input.py b/tests/ut/python/parallel/test_virtual_dataset_3_input.py index a3b2f8d96bb..e9015713cab 100644 --- a/tests/ut/python/parallel/test_virtual_dataset_3_input.py +++ b/tests/ut/python/parallel/test_virtual_dataset_3_input.py @@ -54,10 +54,10 @@ def test_virtual_dataset_3_input(): class Net(nn.Cell): def __init__(self, strategy0, strategy1, strategy2, strategy3): super().__init__() - self.virtual_dataset = _VirtualDataset().set_strategy(strategy0) - self.matmul1 = P.MatMul().set_strategy(strategy1) - self.matmul2 = P.MatMul().set_strategy(strategy2) - self.gelu = P.Gelu().set_strategy(strategy3) + self.virtual_dataset = _VirtualDataset().shard(strategy0) + self.matmul1 = P.MatMul().shard(strategy1) + self.matmul2 = P.MatMul().shard(strategy2) + self.gelu = P.Gelu().shard(strategy3) def construct(self, x, y, b): x, y, b = self.virtual_dataset(x, y, b) @@ -83,9 +83,9 @@ def test_virtualdataset_cell_3_inputs(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, strategy3): super().__init__() - self.matmul1 = P.MatMul().set_strategy(strategy1) - self.matmul2 = P.MatMul().set_strategy(strategy2) - self.gelu = P.Gelu().set_strategy(strategy3) + self.matmul1 = P.MatMul().shard(strategy1) + self.matmul2 = P.MatMul().shard(strategy2) + self.gelu = P.Gelu().shard(strategy3) def construct(self, x, y, b): out = self.gelu(self.matmul1(x, y))