!44684 rm set_auto_parallel

Merge pull request !44684 from yangzhenzhang/rm-set-auto-parallel
2022-10-28 06:56:30 +00:00 · 2022-10-28 06:56:30 +00:00 · 384efc057f
parent 85720e99b7 5d5388ad00
commit 384efc057f
165 changed files with 58 additions and 393 deletions
--- a/docs/api/api_python/nn/mindspore.nn.Cell.rst
+++ b/docs/api/api_python/nn/mindspore.nn.Cell.rst
@ -37,13 +37,6 @@
        返回：
            Tuple类型，经过类型转换后的输入。

-    .. py:method:: auto_parallel_compile_and_run()
-
-        是否在‘AUTO_PARALLEL’或‘SEMI_AUTO_PARALLEL’模式下执行编译流程。
-
-        返回：
-            bool，`_auto_parallel_compile_and_run` 的值。
-
    .. py:method:: bprop_debug
        :property:

@ -245,15 +238,6 @@
            - **KeyError** - 如果参数名称为空或包含"."。
            - **TypeError** - 如果参数的类型不是Parameter。

-    .. py:method:: load_parameter_slice(params)
-
-        根据并行策略获取Tensor分片并替换原始参数。
-
-        请参考 `mindspore.common._Executor.compile` 源代码中的用法。
-
-        参数：
-            - **params** (dict) - 用于初始化数据图的参数字典。
-
    .. py:method:: name_cells()

        递归地获取一个Cell中所有子Cell的迭代器。
@ -414,12 +398,6 @@
        返回：
            Cell的输出。

-    .. py:method:: set_auto_parallel()
-
-        将Cell设置为自动并行模式。
-
-        .. note:: 如果一个Cell需要使用自动并行或半自动并行模式来进行训练、评估或预测，则该Cell需要调用此接口。
-
    .. py:method:: set_boost(boost_type)

        为了提升网络性能，可以配置boost内的算法让框架自动使能该算法来加速网络训练。
@ -497,13 +475,6 @@

              - **task_sink** (bool) - 是否通过数据集方式传递数据。默认值：True。

-    .. py:method:: set_parallel_input_with_inputs(*inputs)
-
-        通过并行策略对输入张量进行切分。
-
-        参数：
-            - **inputs** (tuple) - construct方法的输入。
-
    .. py:method:: set_param_fl(push_to_server=False, pull_from_server=False, requires_aggr=True)

        设置参数与服务器交互的方式。
--- a/mindspore/python/mindspore/common/api.py
+++ b/mindspore/python/mindspore/common/api.py
@ -24,7 +24,6 @@ import time
 import ast
 import inspect
 import importlib
-from collections import OrderedDict
 from functools import wraps
 import numpy as np
 import mindspore as ms
@ -40,7 +39,7 @@ from mindspore._c_expression import GraphExecutor_, Tensor, MetaTensor, CSRTenso
    _ms_memory_recycle
 from mindspore.parallel._ps_context import _is_role_pserver, _is_role_sched, _enable_distributed_mindrt
 from mindspore.parallel._utils import _check_full_batch, _get_parameter_broadcast, _is_pynative_parallel, \
-    _get_pipeline_stages
+    _get_pipeline_stages, _is_in_auto_parallel_mode
 from mindspore._checkparam import Validator
 from mindspore.common._utils import is_shape_unknown
 from mindspore.common.mutable import mutable
@ -893,16 +892,6 @@ def _function_forbid_reuse(func):
    return func


-def _get_auto_split_param_names(parameter_layout_dict):
-    auto_split_param_names = []
-    for key, value in parameter_layout_dict.items():
-        for dim in value[1]:
-            if dim != -1:
-                auto_split_param_names.append(key)
-                break
-    return auto_split_param_names
-
-
 def _build_broadcast_graph(broadcast_params_dict, broadcast_phase):
    """Build broadcast graph."""
    from mindspore.nn.wrap.cell_wrapper import _BroadCastCell
@ -918,18 +907,9 @@ def _build_broadcast_graph(broadcast_params_dict, broadcast_phase):
        broadcast_params_dict[param_name].set_data(param)


-def _parameter_broadcast(obj, auto_parallel_mode):
+def _parameter_broadcast(obj):
    """Parameter broadcast."""
-    auto_split_param_names = []
-    if auto_parallel_mode:
-        auto_split_param_names = _get_auto_split_param_names(obj.parameter_layout_dict)
-
    broadcast_params_dict = obj.parameters_broadcast_dict()
-    if auto_split_param_names and broadcast_params_dict:
-        broadcast_params_dict = OrderedDict()
-        for param_name, param in obj.parameters_broadcast_dict().items():
-            if param_name not in auto_split_param_names:
-                broadcast_params_dict[param_name] = param
    broadcast_phase = "_broadcast_subgraph"
    _build_broadcast_graph(broadcast_params_dict, broadcast_phase)

@ -974,20 +954,19 @@ class _PyNativeExecutor:
        return self._executor(sens_param, obj, args)

    @staticmethod
-    def parameter_broadcast(obj, phase, auto_parallel_mode):
+    def parameter_broadcast(obj, phase):
        """
        Run broadcast for parameter.

        Args:
            obj (Cell): The cell instance.
            phase (str): The phase of cell instance.
-            auto_parallel_mode (bool): The flag of running auto parallel.

        Return:
            None.
        """
        if BROADCAST_PHASE not in phase and _get_parameter_broadcast():
-            _parameter_broadcast(obj, auto_parallel_mode)
+            _parameter_broadcast(obj)

    def real_run_op(self, *args):
        """
@ -1342,7 +1321,7 @@ class _CellGraphExecutor:
        if "train" in phase and (enable_compile_cache is True or enable_compile_cache == "1"):
            self._graph_executor.set_compile_cache_dep_files(_get_compile_cache_dep_files())

-    def compile(self, obj, *args, phase='predict', do_convert=True, auto_parallel_mode=False, jit_config_dict=None):
+    def compile(self, obj, *args, phase='predict', do_convert=True, jit_config_dict=None):
        """
        Compiles graph.

@ -1351,7 +1330,6 @@ class _CellGraphExecutor:
            args (tuple): Function or cell input arguments.
            phase (str): The name of compile phase. Default: 'predict'.
            do_convert (bool): When set to True, convert ME graph to GE graph after compiling graph.
-            auto_parallel_mode: When set to True, use auto parallel mode to compile graph.
            jit_config_dict (dict): Jit config for compile. Default: None.

        Return:
@ -1395,10 +1373,11 @@ class _CellGraphExecutor:
        if graph is None:
            raise RuntimeError("Compile graph failed for phase {}.".format(phase))

+        auto_parallel_mode = _is_in_auto_parallel_mode()
        if not auto_parallel_mode:
            replace = obj.init_parameters_data(auto_parallel_mode=auto_parallel_mode)
            self._update_param_node_default_input(phase, replace)
-        else:
+        elif 'skip_auto_parallel_compile' not in obj.get_flags().keys():
            obj.parameter_layout_dict = self._graph_executor.get_parameter_layout(phase)
            obj.parallel_parameter_name_list = self._graph_executor.get_parallel_parameter_name_list(phase)
            if _get_pipeline_stages() > 1 and (not hasattr(obj, "is_first_iteration") or not obj.is_first_iteration):
@ -1413,7 +1392,7 @@ class _CellGraphExecutor:
        elif "export" in phase:
            self._build_data_graph(obj, phase)
        elif BROADCAST_PHASE not in phase and _get_parameter_broadcast():
-            _parameter_broadcast(obj, auto_parallel_mode)
+            _parameter_broadcast(obj)

        return phase, True

--- a/mindspore/python/mindspore/nn/cell.py
+++ b/mindspore/python/mindspore/nn/cell.py
@ -40,7 +40,6 @@ from mindspore.ops.operations import Cast
 from mindspore.ops.primitive import Primitive
 from mindspore.ops.operations import _inner_ops as inner
 from mindspore.parallel.shard import Shard
-from mindspore.parallel._tensor import _load_tensor_by_layout


 class Cell(Cell_):
@ -84,12 +83,11 @@ class Cell(Cell_):
        [Parameter (name=weight, shape=(240, 120, 4, 4), dtype=Float32, requires_grad=True)]
    """

-    IGNORE_LIST = ['_scope', '_cell_init_args', '_auto_prefix', '_cells', '_params', '_construct_inputs_names',
-                   '_construct_inputs_num', '_create_time', '_func_graph_flags', '_parallel_inputs_run',
-                   '_parameter_layout_dict', '_params_list', '_tensor_list', '_phase', '_auto_parallel_mode',
+    IGNORE_LIST = ['_scope', '_cell_init_args', '_auto_prefix', '_cells', '_params', '_create_time',
+                   '_func_graph_flags', '_parameter_layout_dict', '_params_list', '_tensor_list', '_phase',
                   '_forward_pre_hook', '_forward_hook', '_enable_forward_pre_hook', '_enable_forward_hook',
                   '_bprop_debug', '_enable_backward_hook', '_cell_backward_hook', '_is_run', '_param_prefix',
-                   '_attr_synced', 'pynative', 'requires_grad', '_auto_parallel_compile_and_run', 'cell_type']
+                   '_attr_synced', 'pynative', 'requires_grad', 'cell_type']

    def __init__(self, auto_prefix=True, flags=None):
        Cell_.__init__(self, self._cell_tag)
@ -123,10 +121,6 @@ class Cell(Cell_):
        if os.getenv('GC_COLLECT_IN_CELL') == '1':
            gc.collect()

-        self._construct_inputs_num = 0
-        self._construct_inputs_names = []
-        self._auto_parallel_mode = False
-        self._parallel_inputs_run = None
        if flags:
            self.add_flags(**flags)
        self._bprop_debug = False
@ -138,7 +132,6 @@ class Cell(Cell_):
        self._cell_backward_hook = None
        self._is_recursion_hook = False
        self.cell_type = None
-        self._auto_parallel_compile_and_run = False
        self.cast = Cast()
        self._has_config_recompute = False
        self._user_parameters = []
@ -385,7 +378,7 @@ class Cell(Cell_):
    def _do_parameter_broadcast(self):
        if context.get_auto_parallel_context("parallel_mode") == ParallelMode.DATA_PARALLEL:
            if not self.parameter_broadcast_done:
-                _pynative_executor.parameter_broadcast(self, self.phase, self._auto_parallel_mode)
+                _pynative_executor.parameter_broadcast(self, self.phase)
                self.parameter_broadcast_done = True

    def run_construct(self, cast_inputs, kwargs):
@ -832,84 +825,20 @@ class Cell(Cell_):
        """
        Replace parameters with sliced tensors by parallel strategies.

-        Please refer to the usage in source code of `mindspore.common._CellGraphExecutor.compile`.
-
-        Args:
-            params (dict): The parameters dictionary used for initializing the data graph.
+        Note:
+            This interface is deprecated.
        """
-        if params is None:
-            params = self.parameters_dict()
-        if isinstance(params, OrderedDict):
-            for key in params:
-                tensor = params[key].data
-                if key not in self.parameter_layout_dict:
-                    logger.info("The layout dict does not contain the key %s.", key)
-                    continue
-                if params[key].sliced:
-                    logger.debug("The param %s is already sliced.", key)
-                    continue
-                layout = self.parameter_layout_dict[key]
-                new_tensor = _load_tensor_by_layout(tensor, layout)
-                params[key].set_data(new_tensor, True)
-        else:
-            raise TypeError("For 'load_parameter_slice', the argument 'params' must be OrderedDict type, "
-                            "but got {}.".format(type(params)))
+        logger.warning("'load_parameter_slice' function is deprecated.")

-    def _load_inputs(self, *inputs):
-        """
-        Slice inputs tensors by parallel strategies.
-
-        Args:
-            inputs (Function or Cell): inputs of construct method.
-        """
-        parallel_inputs_run = []
-        # judge if *args exists in input
-        if self.argspec[1] is not None:
-            prefix = self.argspec[1]
-            for i in range(len(inputs)):
-                key = prefix + str(i)
-                self._construct_inputs_names = self._construct_inputs_names + (key,)
-                self._construct_inputs_num = self._construct_inputs_num + 1
-        for i, tensor in enumerate(inputs):
-            key = self._construct_inputs_names[i]
-            # if input is not used, self.parameter_layout_dict may not contain the key
-            if key not in self.parameter_layout_dict:
-                logger.warning("Layout dict does not contain the key %s.", key)
-                parallel_inputs_run.append(tensor)
-            else:
-                layout = self.parameter_layout_dict[key]
-                new_tensor = _load_tensor_by_layout(tensor, layout)
-                parallel_inputs_run.append(new_tensor)
-        return tuple(parallel_inputs_run)

    def set_parallel_input_with_inputs(self, *inputs):
        """
        Slice inputs tensors by parallel strategies.

-        Args:
-            inputs (tuple): inputs of construct method.
+        Note:
+            This interface is deprecated.
        """
-        self._parallel_inputs_run = self._load_inputs(*inputs)
-
-    def _get_construct_inputs_number_and_name(self):
-        """Compute self._construct_inputs_names and self._construct_inputs_num"""
-        from mindspore._extends.parse.parser import get_parse_method_of_class
-
-        fn = get_parse_method_of_class(self)
-        self.argspec = inspect.getfullargspec(fn)
-        self._construct_inputs_num = fn.__code__.co_argcount
-        self._construct_inputs_names = fn.__code__.co_varnames
-
-        if self._construct_inputs_num <= 0:
-            raise ValueError(f"For 'set_auto_parallel', the number of inputs must be greater than 0,"
-                             f"but got {self._construct_inputs_num}.")
-        if self._construct_inputs_names[0] != 'self':
-            raise ValueError(f"First member of fn function must be self, but got {self._construct_inputs_names[0]}")
-        if self._construct_inputs_num - 1 > len(self._construct_inputs_names):
-            raise ValueError(f"Num of inputs must be greater than num of fn function members, num of inputs is \
-                {self._construct_inputs_names - 1}, num of fn function members is {len(self._construct_inputs_names)}")
-        self._construct_inputs_names = self._construct_inputs_names[1:self._construct_inputs_num]
-        self._construct_inputs_num = self._construct_inputs_num - 1
+        logger.warning("'set_parallel_input_with_inputs' function is deprecated.")

    def set_inputs(self, *inputs):
        """
@ -975,7 +904,7 @@ class Cell(Cell_):
            inputs (tuple): Inputs of the Cell object.
        """
        if self._dynamic_shape_inputs is None or self._dynamic_shape_inputs[0] is None:
-            _cell_graph_executor.compile(self, *inputs, phase=self.phase, auto_parallel_mode=self._auto_parallel_mode,
+            _cell_graph_executor.compile(self, *inputs, phase=self.phase,
                                         jit_config_dict=self._jit_config_dict)
        else:
            self._check_compile_dynamic_shape(*inputs)
@ -986,7 +915,6 @@ class Cell(Cell_):

            self.saved_dynamic_shape = self._dynamic_shape_inputs
            _cell_graph_executor.compile(self, *self._dynamic_shape_inputs, phase=self.phase,
-                                         auto_parallel_mode=self._auto_parallel_mode,
                                         jit_config_dict=self._jit_config_dict)
            logger.debug("Compiled Graph with dynamic shape")

@ -1003,7 +931,6 @@ class Cell(Cell_):
        Returns:
            Object, the result of executing.
        """
-        self._auto_parallel_compile_and_run = True
        self.compile(*inputs)

        new_inputs = _get_args_for_run(self, inputs)
@ -1013,10 +940,10 @@ class Cell(Cell_):
        """
        Whether or not to execute compile and run in 'AUTO_PARALLEL' or 'SEMI_AUTO_PARALLEL' mode.

-        Returns:
-            bool, `_auto_parallel_compile_and_run` value.
+        Note:
+            This interface is deprecated.
        """
-        return self._auto_parallel_compile_and_run
+        logger.warning("'auto_parallel_compile_and_run' function is deprecated.")

    def exec_checkpoint_graph(self):
        """Executes saving checkpoint graph operation."""
@ -1652,11 +1579,9 @@ class Cell(Cell_):
        Set the cell to auto parallel mode.

        Note:
-            If a cell needs to use the auto parallel or semi auto parallel mode for training, evaluation or prediction,
-            this interface needs to be called by the cell.
+            This interface is deprecated.
        """
-        self._auto_parallel_mode = True
-        self._get_construct_inputs_number_and_name()
+        logger.warning("'set_auto_parallel' function is deprecated.")

    def set_jit_config(self, jit_config):
        """
--- a/mindspore/python/mindspore/parallel/_auto_parallel_context.py
+++ b/mindspore/python/mindspore/parallel/_auto_parallel_context.py
@ -429,13 +429,14 @@ class _AutoParallelContext:
        Set the value of sharding strategy propagation in AUTO_PARALLEL mode. If True, the strategy-configured operators
        will propagate the strategies to other operators with minimum redistribution cost; otherwise, the algorithm
        will search the desired strategies. Default: False.
-        This attribute is replaced by context.set_auto_parallel(search_mode="sharding_propagation").
+        This attribute is replaced by context.set_auto_parallel_context(search_mode="sharding_propagation").

        Args:
            sharding_propagation (bool): Enable/disable strategy propagation.
        """
-        logger.warning("This attribute is replaced by context.set_auto_parallel(search_mode='sharding_propagation'), "
-                       "and this attribute will be deleted in a future MindSpore version.")
+        logger.warning("This attribute is replaced by "
+                       "context.set_auto_parallel_context(search_mode='sharding_propagation'), and this attribute will"
+                       " be deleted in a future MindSpore version.")
        self.check_context_handle()
        if not isinstance(sharding_propagation, bool):
            raise TypeError("For 'set_auto_parallel_context().set_sharding_propagation', "
--- a/mindspore/python/mindspore/train/model.py
+++ b/mindspore/python/mindspore/train/model.py
@ -32,8 +32,8 @@ from mindspore._checkparam import check_input_data, check_output_data, Validator
 from mindspore.train.callback import _InternalCallbackParam, RunContext, _CallbackManager, Callback, TimeMonitor
 from mindspore.train.callback import __all__ as internal_cb_names
 from mindspore import context
-from mindspore.parallel._utils import _get_parallel_mode, _get_device_num, _get_global_rank, \
-    _get_parameter_broadcast, _device_number_check, _parameter_broadcast_check, _parallel_predict_check, \
+from mindspore.parallel._utils import _get_parallel_mode, _get_device_num, _get_parameter_broadcast, \
+    _device_number_check, _parameter_broadcast_check, _parallel_predict_check, \
    _reset_op_id_with_offset
 from mindspore.parallel._ps_context import _is_role_worker, _is_role_pserver, _is_role_sched, _is_ps_mode, \
    _cache_enable, _enable_distributed_mindrt
@ -212,7 +212,6 @@ class Model:
        self._process_amp_args(kwargs)
        self._parallel_mode = _get_parallel_mode()
        self._device_number = _get_device_num()
-        self._global_rank = _get_global_rank()
        self._parameter_broadcast = _get_parameter_broadcast()
        self._metrics = metrics

@ -323,7 +322,6 @@ class Model:
        # If need to check if loss_fn is not None, but optimizer is None

        if self._parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL):
-            network.set_auto_parallel()
            if self._optimizer is None:
                # In this case, multiple optimizer(s) is supposed to be included in 'self._network'
                _set_multi_subgraphs()
@ -371,14 +369,11 @@ class Model:
            if self._optimizer is None:
                # In this case, multiple optimizer(s) is supposed to be included in 'self._network'
                _set_multi_subgraphs()
-            self._eval_network.set_auto_parallel()

    def _build_predict_network(self):
        """Build the network for prediction."""
        self._predict_network = self._network
-        if self._parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL):
-            # Unlike the cases in build_train_network() and build_eval_network(), 'multi_subgraphs' is not set
-            self._predict_network.set_auto_parallel()
+        # Unlike the cases in build_train_network() and build_eval_network(), 'multi_subgraphs' is not set

    def _clear_metrics(self):
        """Clear metrics local values."""
@ -451,9 +446,6 @@ class Model:
        network.phase = phase
        self._backbone_is_train = is_train

-        if self._parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL):
-            network.set_auto_parallel()
-
        return dataset_helper, network

    def _check_network_mode(self, network, is_train):
@ -1638,7 +1630,6 @@ class Model:

        predict_net = self._predict_network
        # Unlike the cases in build_train_network() and build_eval_network(), 'multi_subgraphs' is not set
-        predict_net.set_auto_parallel()
        predict_net = self._check_network_mode(predict_net, False)
        predict_net.compile(*predict_data)
        return predict_net.parameter_layout_dict
--- a/mindspore/python/mindspore/train/serialization.py
+++ b/mindspore/python/mindspore/train/serialization.py
@ -54,7 +54,7 @@ from mindspore.compression.export import quant_export
 from mindspore.parallel._cell_wrapper import get_allgather_cell
 from mindspore.parallel._tensor import _load_tensor, _get_tensor_strategy, _get_tensor_slice_index
 from mindspore.parallel._tensor import _reshape_param_data, _reshape_param_data_with_weight
-from mindspore.parallel._utils import _infer_rank_list, _remove_repeated_slices
+from mindspore.parallel._utils import _infer_rank_list, _remove_repeated_slices, _is_in_auto_parallel_mode
 from mindspore.parallel._parallel_serialization import _convert_to_list, _convert_to_layout, _build_searched_strategy, \
    _restore_group_info_list
 from mindspore.train._utils import read_proto
@ -1344,9 +1344,8 @@ def _msfunc_info(net, *inputs):

 def _cell_info(net, *inputs):
    """Get mindir stream and net dict of cell"""
-    phase_name = "predict" if net._auto_parallel_mode else "export.mindir"
-    graph_id, _ = _executor.compile(net, *inputs, phase=phase_name,
-                                    do_convert=False, auto_parallel_mode=net._auto_parallel_mode)
+    phase_name = "predict" if _is_in_auto_parallel_mode() else "export.mindir"
+    graph_id, _ = _executor.compile(net, *inputs, phase=phase_name, do_convert=False)
    # pylint: disable=protected-access
    mindir_stream = _executor._get_func_graph_proto(net, graph_id, 'mind_ir')
    # clean obfuscation config to prevent the next call
--- a/mindspore/python/mindspore/train/train_thor/model_thor.py
+++ b/mindspore/python/mindspore/train/train_thor/model_thor.py
@ -26,7 +26,6 @@ import math
 from mindspore.train.callback import RunContext
 from mindspore import context
 from mindspore import nn
-from mindspore.context import ParallelMode
 from mindspore.train.model import Model
 from mindspore.train.dataset_helper import connect_network_with_dataset
 from mindspore.parallel._utils import _need_to_full, _to_full_tensor
@ -140,9 +139,6 @@ class ModelThor(Model):
        network.set_train(is_train)
        network.phase = phase

-        if self._parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL):
-            network.set_auto_parallel()
-
        return dataset_helper, network

    def _train_gpu_sink_step(self, cb_params, inputs, list_callback, iter_first_order, run_context):
--- a/tests/st/model_zoo_tests/resnet50/train.py
+++ b/tests/st/model_zoo_tests/resnet50/train.py
@ -236,8 +236,6 @@ def train_net():
    loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
    train_network = build_train_network(net, opt, loss, level="O2", boost_level=config.boost_mode,
                                        loss_scale_manager=loss_scale, keep_batchnorm_fp32=False)
-    if config.run_distribute:
-        train_network.set_auto_parallel()
    for _ in range(500):
        image = Tensor(np.random.rand(32, 3, 224, 224), dtype=mindspore.float32)
        label = Tensor(np.random.randint(0, 10, [32]), dtype=mindspore.int32)
--- a/tests/ut/python/parallel/test_adafactor.py
+++ b/tests/ut/python/parallel/test_adafactor.py
@ -61,7 +61,6 @@ def compile_net(net):
                          scale_parameter=scale_parameter, relative_step=relative_step,
                          warmup_init=warmup_init, compression=compression)
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
    _cell_graph_executor.compile(train_net, _x, _b)
    context.reset_auto_parallel_context()
--- a/tests/ut/python/parallel/test_add_relu_redistribution.py
+++ b/tests/ut/python/parallel/test_add_relu_redistribution.py
@ -62,7 +62,6 @@ class Grad(nn.Cell):


 def compile_net(net, x, y):
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, y)

--- a/tests/ut/python/parallel/test_arithmetic.py
+++ b/tests/ut/python/parallel/test_arithmetic.py
@ -51,7 +51,6 @@ class GradWrap(nn.Cell):


 def compile_net(net, x, y, b):
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, y, b)

@ -799,7 +798,6 @@ def test_assign_sub():
            return grad_all(self.network)(x)

    def compile_sub_net(net, x):
-        net.set_auto_parallel()
        net.set_train()
        _cell_graph_executor.compile(net, x)

@ -853,7 +851,6 @@ def test_assign_add():
            return grad_all(self.network)(x)

    def compile_sub_net(net, x):
-        net.set_auto_parallel()
        net.set_train()
        _cell_graph_executor.compile(net, x)

@ -907,7 +904,6 @@ def test_assign():
            return grad_all(self.network)(x)

    def compile_sub_net(net, x):
-        net.set_auto_parallel()
        net.set_train()
        _cell_graph_executor.compile(net, x)

--- a/tests/ut/python/parallel/test_auto_parallel_BN_PReLU.py
+++ b/tests/ut/python/parallel/test_auto_parallel_BN_PReLU.py
@ -76,6 +76,5 @@ def test_auto_parallel_bn_with_prelu():
    x = Tensor(np.random.rand(16, 16, 32, 64), dtype=ms.float32)

    net = GradWrap(NetWithLoss(Net()))
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x)
--- a/tests/ut/python/parallel/test_auto_parallel_activation.py
+++ b/tests/ut/python/parallel/test_auto_parallel_activation.py
@ -46,7 +46,6 @@ _b = Tensor(np.ones([64, 32]), dtype=ms.float32)
 def compile_net(net):
    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
    _cell_graph_executor.compile(train_net, _x, _b)
    context.reset_auto_parallel_context()
--- a/tests/ut/python/parallel/test_auto_parallel_adasum.py
+++ b/tests/ut/python/parallel/test_auto_parallel_adasum.py
@ -53,7 +53,6 @@ def compile_net(net, by_grad=True):
    else:
        optimizer = AdaSumByDeltaWeightWrapCell(Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9))
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
    _cell_graph_executor.compile(train_net, _x, _b)
    context.reset_auto_parallel_context()
--- a/tests/ut/python/parallel/test_auto_parallel_arithmetic.py
+++ b/tests/ut/python/parallel/test_auto_parallel_arithmetic.py
@ -56,7 +56,6 @@ class GradWrap(nn.Cell):


 def compile_net(net, x, y, b, phase):
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, y, b, phase=phase)

--- a/tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py
+++ b/tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py
@ -63,7 +63,6 @@ def test_auto_parallel_assign_sub_with_ref_key():

    net = NetWithLoss(nn.PReLU(4))
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    net.set_auto_parallel()
    reset_op_id()

    net.set_train()
--- a/tests/ut/python/parallel/test_auto_parallel_cast.py
+++ b/tests/ut/python/parallel/test_auto_parallel_cast.py
@ -82,7 +82,6 @@ def test_double_star_graph():

    net = NetWithLoss(Net())
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    net.set_auto_parallel()
    reset_op_id()

    net.set_train()
--- a/tests/ut/python/parallel/test_auto_parallel_common_parameter.py
+++ b/tests/ut/python/parallel/test_auto_parallel_common_parameter.py
@ -75,6 +75,5 @@ def test_common_parameter():

    net = GradWrap(NetWithLoss(Net()))
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, y, z)
--- a/tests/ut/python/parallel/test_auto_parallel_data_parallel_mode.py
+++ b/tests/ut/python/parallel/test_auto_parallel_data_parallel_mode.py
@ -53,7 +53,6 @@ class NetRecursive(nn.Cell):
        return self.mul_net(out1, out2)

 def compile_net(net, x, y):
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, y)

--- a/tests/ut/python/parallel/test_auto_parallel_double_sources.py
+++ b/tests/ut/python/parallel/test_auto_parallel_double_sources.py
@ -82,7 +82,6 @@ def test_double_source_graph():

    net = GradWrap(NetWithLoss(Net()))
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, y, z, w, a)

@ -118,6 +117,5 @@ def test_double_source_complex_graph():

    net = GradWrap(NetWithLoss(Net()))
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, y, z, w, a)
--- a/tests/ut/python/parallel/test_auto_parallel_double_star.py
+++ b/tests/ut/python/parallel/test_auto_parallel_double_star.py
@ -86,6 +86,5 @@ def test_double_star_graph():

    net = GradWrap(NetWithLoss(Net()))
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, y, z, w, a, b, c)
--- a/tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py
+++ b/tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py
@ -113,7 +113,6 @@ def test_double_subgraphs():
    context.set_auto_parallel_context(device_num=8, global_rank=0)
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
    net = TrainStepWarp(NetWithLoss(Net()))
-    net.set_auto_parallel()

    x = Tensor(np.ones([8, 8, 8, 8]), dtype=ms.float32)
    reset_op_id()
--- a/tests/ut/python/parallel/test_auto_parallel_fc_nobias.py
+++ b/tests/ut/python/parallel/test_auto_parallel_fc_nobias.py
@ -73,6 +73,5 @@ def test_two_matmul():

    net = GradWrap(NetWithLoss(Net()))
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, y, b)
--- a/tests/ut/python/parallel/test_auto_parallel_for_loop.py
+++ b/tests/ut/python/parallel/test_auto_parallel_for_loop.py
@ -130,7 +130,6 @@ def test_auto_parallel():
    context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0)
    _set_algo_single_loop(True)
    net = Full(_w1, 3)
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, _x, phase='train')
    num_ops = _cell_graph_executor._get_num_parallel_ops(net)
--- a/tests/ut/python/parallel/test_auto_parallel_for_loop_multi_subgraph.py
+++ b/tests/ut/python/parallel/test_auto_parallel_for_loop_multi_subgraph.py
@ -131,7 +131,6 @@ def test_double_subgraphs():
    _set_algo_single_loop(True)
    net = TrainStepWarp(NetWithLoss(Net()))
    _set_multi_subgraphs()
-    net.set_auto_parallel()

    x = Tensor(np.ones([8, 8, 8, 8]), dtype=ms.float32)
    reset_op_id()
--- a/tests/ut/python/parallel/test_auto_parallel_for_loop_reshape.py
+++ b/tests/ut/python/parallel/test_auto_parallel_for_loop_reshape.py
@ -134,7 +134,6 @@ def test_auto_parallel():
    context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0)
    _set_algo_single_loop(True)
    net = Full(_w1, 3)
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, _x, phase='train')
    num_ops = _cell_graph_executor._get_num_parallel_ops(net)
--- a/tests/ut/python/parallel/test_auto_parallel_four_matmul.py
+++ b/tests/ut/python/parallel/test_auto_parallel_four_matmul.py
@ -52,7 +52,6 @@ class GradWrap(nn.Cell):


 def compile_net(net, x, y, z, w, b):
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, y, z, w, b)

--- a/tests/ut/python/parallel/test_auto_parallel_inference.py
+++ b/tests/ut/python/parallel/test_auto_parallel_inference.py
@ -52,6 +52,5 @@ def test_inference_phase():
    net_with_loss = WithLossCell(net, loss)
    train_network = TrainOneStepCell(net_with_loss, optimizer)
    train_network.set_train()
-    train_network.set_auto_parallel()

    _ = train_network(predict, label)
--- a/tests/ut/python/parallel/test_auto_parallel_l2normalize.py
+++ b/tests/ut/python/parallel/test_auto_parallel_l2normalize.py
@ -71,7 +71,6 @@ def test_auto_parallel_l2normalize():
    context.set_auto_parallel_context(device_num=8, global_rank=0)
    net = NetWithLoss(Net())
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    net.set_auto_parallel()
    reset_op_id()

    x = Tensor(np.ones([128, 64, 64]), dtype=ms.float32)
--- a/tests/ut/python/parallel/test_auto_parallel_matmul_drop.py
+++ b/tests/ut/python/parallel/test_auto_parallel_matmul_drop.py
@ -69,7 +69,6 @@ def test_two_matmul_dropout():
    context.set_auto_parallel_context(device_num=8, global_rank=0)
    net = GradWrap(NetWithLoss(Net()))
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    net.set_auto_parallel()

    x = Tensor(np.ones([128, 32]), dtype=ms.float32)
    y = Tensor(np.ones([32, 64]), dtype=ms.float32)
--- a/tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py
+++ b/tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py
@ -76,7 +76,6 @@ def test_matmul_prelu():

    net = NetWithLoss(Net())
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    net.set_auto_parallel()
    reset_op_id()

    net.set_train()
--- a/tests/ut/python/parallel/test_auto_parallel_multi_graph.py
+++ b/tests/ut/python/parallel/test_auto_parallel_multi_graph.py
@ -61,7 +61,6 @@ label_ = Tensor(np.random.randn(128, 96).astype(np.float32), dtype=ms.float32)
 def compile_net(net):
    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
    _cell_graph_executor.compile(train_net, inputs_, label_)
    context.reset_auto_parallel_context()
--- a/tests/ut/python/parallel/test_auto_parallel_onehot.py
+++ b/tests/ut/python/parallel/test_auto_parallel_onehot.py
@ -98,7 +98,6 @@ def test_auto_parallel_arithmetic():
    context.set_auto_parallel_context(device_num=8, global_rank=0)
    net = GradWrap(NetWithLoss(Net()))
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    net.set_auto_parallel()

    x = Tensor(np.ones([64, 32]), dtype=ms.float32)
    y = Tensor(np.ones([32, 64]), dtype=ms.float32)
@ -125,7 +124,8 @@ def test_auto_parallel_arithmetic_model():
            return out2

    context.reset_auto_parallel_context()
-    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode=ParallelMode.AUTO_PARALLEL, dataset_strategy="data_parallel")
+    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode=ParallelMode.AUTO_PARALLEL,
+                                      dataset_strategy="data_parallel")
    net = NetOneHot()

    x = Tensor(np.ones([8, 32]), dtype=ms.float32)
--- a/tests/ut/python/parallel/test_auto_parallel_parameter_cast.py
+++ b/tests/ut/python/parallel/test_auto_parallel_parameter_cast.py
@ -74,7 +74,6 @@ def test_common_parameter():

    net = NetWithLoss(Net())
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    net.set_auto_parallel()
    reset_op_id()

    net.set_train()
--- a/tests/ut/python/parallel/test_auto_parallel_partial_strategy.py
+++ b/tests/ut/python/parallel/test_auto_parallel_partial_strategy.py
@ -80,6 +80,5 @@ def test_four_matmul_linear():

    net = GradWrap(NetWithLoss(Net(strategy1)))
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, y, z, w, b)
--- a/tests/ut/python/parallel/test_auto_parallel_reduce_method.py
+++ b/tests/ut/python/parallel/test_auto_parallel_reduce_method.py
@ -52,7 +52,6 @@ class GradWrap(nn.Cell):


 def compile_net(net, x, y, b):
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, y, b)

--- a/tests/ut/python/parallel/test_auto_parallel_reshape.py
+++ b/tests/ut/python/parallel/test_auto_parallel_reshape.py
@ -82,13 +82,11 @@ class GradWrapTwoInput(nn.Cell):

 def compile_graph(net, parallel_mode, device_num, x):
    context.set_auto_parallel_context(device_num=device_num, global_rank=0, parallel_mode=parallel_mode)
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x)

 def compile_graph_two_input(net, parallel_mode, device_num, x, y):
    context.set_auto_parallel_context(device_num=device_num, global_rank=0, parallel_mode=parallel_mode)
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, y)

--- a/tests/ut/python/parallel/test_auto_parallel_rhombus.py
+++ b/tests/ut/python/parallel/test_auto_parallel_rhombus.py
@ -52,7 +52,6 @@ class GradWrap(nn.Cell):


 def compile_net(net, x, y, b):
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, y, b)

--- a/tests/ut/python/parallel/test_auto_parallel_segment_min.py
+++ b/tests/ut/python/parallel/test_auto_parallel_segment_min.py
@ -70,6 +70,5 @@ def test_auto_parallel_unsortedsegmentmin():
    indices = Tensor(np.random.randint(16, size=(16,)), ms.int32)

    net = GradWrap(NetWithLoss(Net(16)))
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, indices)
--- a/tests/ut/python/parallel/test_auto_parallel_segment_sum.py
+++ b/tests/ut/python/parallel/test_auto_parallel_segment_sum.py
@ -70,6 +70,5 @@ def test_auto_parallel_unsortedsegmentsum():
    indices = Tensor(np.random.randint(16, size=(16, 16)))

    net = GradWrap(NetWithLoss(Net(16)))
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, indices)
--- a/tests/ut/python/parallel/test_auto_parallel_shard_propagation.py
+++ b/tests/ut/python/parallel/test_auto_parallel_shard_propagation.py
@ -47,7 +47,6 @@ _b = Tensor(np.ones([64, 32]), dtype=ms.float32)
 def compile_net(net):
    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
    _cell_graph_executor.compile(train_net, _x, _b)
    context.reset_auto_parallel_context()
--- a/tests/ut/python/parallel/test_auto_parallel_shard_propagation2.py
+++ b/tests/ut/python/parallel/test_auto_parallel_shard_propagation2.py
@ -48,7 +48,6 @@ _b = Tensor(np.ones([64, 32]), dtype=ms.float32)
 def compile_net(net):
    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
    _cell_graph_executor.compile(train_net, _x, _b)
    context.reset_auto_parallel_context()
--- a/tests/ut/python/parallel/test_auto_parallel_shard_propagation3.py
+++ b/tests/ut/python/parallel/test_auto_parallel_shard_propagation3.py
@ -49,7 +49,6 @@ _b = Tensor(np.ones([64, 32000]), dtype=ms.float32)
 def compile_net(net):
    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
    _cell_graph_executor.compile(train_net, _x, _b)
    context.reset_auto_parallel_context()
--- a/tests/ut/python/parallel/test_auto_parallel_softmax_loss.py
+++ b/tests/ut/python/parallel/test_auto_parallel_softmax_loss.py
@ -65,7 +65,6 @@ def test_softmax_cross_entropy_loss_auto_parallel():
    context.set_auto_parallel_context(device_num=8, global_rank=0)
    net = GradWrap(NetWithLoss(Net()))
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    net.set_auto_parallel()

    x = Tensor(np.ones([64, 32]), dtype=ms.float32)
    y = Tensor(np.ones([64, 32]), dtype=ms.float32)
--- a/tests/ut/python/parallel/test_auto_parallel_star_partial_strategy.py
+++ b/tests/ut/python/parallel/test_auto_parallel_star_partial_strategy.py
@ -90,7 +90,6 @@ def test_star_strategy_consistency1():
                     "relu2": ((2, 2),), "add": ((1, 8), (1, 8))}
    net = NetWithLoss(Net(strategy_dict))
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    net.set_auto_parallel()
    reset_op_id()
    net.set_train()
    _cell_graph_executor.compile(net, x, phase='train')
@ -105,7 +104,6 @@ def test_star_strategy_consistency2():
                     "relu2": ((2, 2),), "add": ((8, 1), (8, 1))}
    net = NetWithLoss(Net(strategy_dict))
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    net.set_auto_parallel()
    reset_op_id()
    net.set_train()
    _cell_graph_executor.compile(net, x, phase='train')
@ -120,7 +118,6 @@ def test_star_strategy_consistency3():
                     "relu2": ((4, 1),), "add": ((2, 2), (2, 2))}
    net = NetWithLoss(Net(strategy_dict))
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    net.set_auto_parallel()
    reset_op_id()
    net.set_train()
    _cell_graph_executor.compile(net, x, phase='train')
@ -135,7 +132,6 @@ def test_star_strategy_consistency4():
                     "relu2": None, "add": None}
    net = NetWithLoss(Net(strategy_dict))
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    net.set_auto_parallel()
    reset_op_id()
    with pytest.raises(RuntimeError):
        net.set_train()
--- a/tests/ut/python/parallel/test_auto_parallel_transformer.py
+++ b/tests/ut/python/parallel/test_auto_parallel_transformer.py
@ -115,6 +115,5 @@ def test_dmnet_train_step():
    input_ = Tensor(np.ones([4096, 4096]).astype(np.float32) * 0.01)
    net = GradWrap(NetWithLoss(MultiTransformer()))
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, input_)
--- a/tests/ut/python/parallel/test_auto_parallel_transpose.py
+++ b/tests/ut/python/parallel/test_auto_parallel_transpose.py
@ -77,7 +77,6 @@ def test_two_matmul_transpose():

    net = NetWithLoss(Net())
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    net.set_auto_parallel()
    reset_op_id()

    net.set_train()
--- a/tests/ut/python/parallel/test_auto_parallel_triangle_overwrite.py
+++ b/tests/ut/python/parallel/test_auto_parallel_triangle_overwrite.py
@ -71,7 +71,6 @@ def test_triangle_strategy_consistency():
    x = Tensor(np.ones([128, 1000]), dtype=ms.float32)
    net = NetWithLoss(Net())
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    net.set_auto_parallel()
    reset_op_id()

    net.set_train()
--- a/tests/ut/python/parallel/test_auto_parallel_tuple_depend.py
+++ b/tests/ut/python/parallel/test_auto_parallel_tuple_depend.py
@ -78,7 +78,6 @@ def test_virtual_dataset_3_input():
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
    context.set_auto_parallel_context(device_num=8, global_rank=0)
    net = GradWrap(NetWithLoss(Net()))
-    net.set_auto_parallel()
    x = Tensor(np.ones([128, 32]), dtype=ms.float32)
    y = Tensor(np.ones([32, 64]), dtype=ms.float32)
    b = Tensor(np.ones([64, 2048]), dtype=ms.float32)
--- a/tests/ut/python/parallel/test_auto_parallel_two_bn.py
+++ b/tests/ut/python/parallel/test_auto_parallel_two_bn.py
@ -76,7 +76,6 @@ def test_two_bn():
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
    net = NetWithLoss(Net())
    x = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    net.set_auto_parallel()
    net.set_train()
    set_algo_parameters(elementwise_op_strategy_follow=True)
    reset_op_id()
--- a/tests/ut/python/parallel/test_auto_parallel_two_matmul.py
+++ b/tests/ut/python/parallel/test_auto_parallel_two_matmul.py
@ -155,7 +155,6 @@ def test_two_matmul():

    net = NetWithLoss(Net())
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    net.set_auto_parallel()
    reset_op_id()

    net.set_train()
--- a/tests/ut/python/parallel/test_auto_parallel_two_partial_matmul.py
+++ b/tests/ut/python/parallel/test_auto_parallel_two_partial_matmul.py
@ -74,6 +74,5 @@ def test_four_matmul_linear():

    net = GradWrap(NetWithLoss(Net(strategy1)))
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, y)
--- a/tests/ut/python/parallel/test_auto_parallel_zig_zag.py
+++ b/tests/ut/python/parallel/test_auto_parallel_zig_zag.py
@ -80,6 +80,5 @@ def test_zig_zag_graph():

    net = GradWrap(NetWithLoss(Net()))
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, y, z, w, a)
--- a/tests/ut/python/parallel/test_auto_star_elimination.py
+++ b/tests/ut/python/parallel/test_auto_star_elimination.py
@ -92,6 +92,5 @@ def test_marin_loss():

    net = GradWrap(NetWithLoss(MarginCE()))
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, y)
--- a/tests/ut/python/parallel/test_batch_matmul.py
+++ b/tests/ut/python/parallel/test_batch_matmul.py
@ -48,7 +48,6 @@ _b = Tensor(np.ones([128, 64, 16]), dtype=ms.float32)
 def compile_net(net):
    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
    _cell_graph_executor.compile(train_net, _x, _b)
    context.reset_auto_parallel_context()
--- a/tests/ut/python/parallel/test_batch_parallel.py
+++ b/tests/ut/python/parallel/test_batch_parallel.py
@ -112,7 +112,6 @@ def test_batch():
    strategy3 = ((4, 1, 1, 2), (4, 1, 1, 2))

    net = GradWrap(NetWithLoss(Net(strategy1, strategy2, strategy3)))
-    net.set_auto_parallel()

    x = Tensor(np.ones([128, 16, 34, 34]), dtype=ms.float32)
    w1 = Tensor(np.ones([128, 8, 32, 32]), dtype=ms.float32)
@ -134,7 +133,6 @@ def test_batch_shape_less_than_devices():
    strategy3 = None

    net = GradWrap(NetWithLoss(Net(strategy1, strategy2, strategy3)))
-    net.set_auto_parallel()

    x = Tensor(np.ones([128, 16, 34, 34]), dtype=ms.float32)
    w1 = Tensor(np.ones([128, 8, 32, 32]), dtype=ms.float32)
--- a/tests/ut/python/parallel/test_batch_parallel_dropout.py
+++ b/tests/ut/python/parallel/test_batch_parallel_dropout.py
@ -69,7 +69,6 @@ def test_batch_parallel_dropout():
    context.set_auto_parallel_context(device_num=8, global_rank=0)
    net = GradWrap(NetWithLoss(Net()))
    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-    net.set_auto_parallel()

    x = Tensor(np.ones([128, 32]), dtype=ms.float32)
    y = Tensor(np.ones([32, 64]), dtype=ms.float32)
--- a/tests/ut/python/parallel/test_batch_parallel_tensoradd.py
+++ b/tests/ut/python/parallel/test_batch_parallel_tensoradd.py
@ -67,7 +67,6 @@ def test_matmul_add():

    net = GradWrap(NetWithLoss(Net()))
    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-    net.set_auto_parallel()

    x = Tensor(np.ones([64, 32]), dtype=ms.float32)
    y = Tensor(np.ones([32, 64]), dtype=ms.float32)
--- a/tests/ut/python/parallel/test_batchmm.py
+++ b/tests/ut/python/parallel/test_batchmm.py
@ -61,7 +61,6 @@ def compile_net(net):
    context.set_context(mode=context.GRAPH_MODE)
    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
    _cell_graph_executor.compile(train_net, _x)
    context.reset_auto_parallel_context()
--- a/tests/ut/python/parallel/test_batchnorm.py
+++ b/tests/ut/python/parallel/test_batchnorm.py
@ -50,7 +50,6 @@ _b = Tensor(np.ones([32, 16, 8, 8]), dtype=ms.float32)
 def compile_net(net):
    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
    _cell_graph_executor.compile(train_net, _x, _b)
    context.reset_auto_parallel_context()
@ -117,7 +116,6 @@ _b1 = Tensor(np.ones([32, 8]), dtype=ms.float32)
 def compile_net2(net):
    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
    _cell_graph_executor.compile(train_net, _x1, _b1)
    context.reset_auto_parallel_context()
--- a/tests/ut/python/parallel/test_batchparallel_replace_shape.py
+++ b/tests/ut/python/parallel/test_batchparallel_replace_shape.py
@ -49,7 +49,6 @@ _b = Tensor(np.ones([128, 64, 32]), dtype=ms.float32)
 def compile_net(net):
    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
    _cell_graph_executor.compile(train_net, _x, _b)
    context.reset_auto_parallel_context()
--- a/tests/ut/python/parallel/test_bounding_box_encode.py
+++ b/tests/ut/python/parallel/test_bounding_box_encode.py
@ -44,7 +44,6 @@ class Net(Cell):


 def compile_net(net: Cell, *inputs):
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, *inputs)
    context.reset_auto_parallel_context()
--- a/tests/ut/python/parallel/test_broadcast_to.py
+++ b/tests/ut/python/parallel/test_broadcast_to.py
@ -71,7 +71,6 @@ def compile_net(net):
    context.set_context(mode=context.GRAPH_MODE)
    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
    _cell_graph_executor.compile(train_net, _x1)
    context.reset_auto_parallel_context()
@ -81,7 +80,6 @@ def compile_net2(net):
    context.set_context(mode=context.GRAPH_MODE)
    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
    _cell_graph_executor.compile(train_net, _x1, _x2)
    context.reset_auto_parallel_context()
--- a/tests/ut/python/parallel/test_comm_not_recompute.py
+++ b/tests/ut/python/parallel/test_comm_not_recompute.py
@ -81,7 +81,6 @@ def compile_net(mp_comm_recompute, recompute_slice_activation):
    label = Tensor(np.zeros([32, 768]).astype(np.float32))
    net = train_step_with_loss_warp(DenseMutMulNet(mp_comm_recompute=mp_comm_recompute,
                                                   recompute_slice_activation=recompute_slice_activation))
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, input_, label)
    _Context().set_backend_policy("ge")
--- a/tests/ut/python/parallel/test_comparison_function_info.py
+++ b/tests/ut/python/parallel/test_comparison_function_info.py
@ -52,7 +52,6 @@ class GradWrap(nn.Cell):


 def compile_net(net, x, y, b):
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, y, b)

--- a/tests/ut/python/parallel/test_concat.py
+++ b/tests/ut/python/parallel/test_concat.py
@ -86,7 +86,6 @@ w3 = Tensor(np.ones([64, 64, 32]), dtype=ms.float32)
 def compile_net(net):
    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
    _cell_graph_executor.compile(train_net, _x, _b)
    context.reset_auto_parallel_context()
--- a/tests/ut/python/parallel/test_conv2d.py
+++ b/tests/ut/python/parallel/test_conv2d.py
@ -56,7 +56,6 @@ _b = Tensor(np.ones([32, 16, 8, 8]), dtype=ms.float32)
 def compile_net(net, input_x=_x):
    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
    _cell_graph_executor.compile(train_net, input_x, _b)
    context.reset_auto_parallel_context()
--- a/tests/ut/python/parallel/test_conv2d_transpose.py
+++ b/tests/ut/python/parallel/test_conv2d_transpose.py
@ -75,7 +75,6 @@ _b = Tensor(np.ones([32, 16, 8, 8]), dtype=ms.float32)
 def compile_net(net):
    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
    _cell_graph_executor.compile(train_net, _x, _b)
    context.reset_auto_parallel_context()
--- a/tests/ut/python/parallel/test_crop_and_resize.py
+++ b/tests/ut/python/parallel/test_crop_and_resize.py
@ -47,7 +47,6 @@ class Net(Cell):


 def compile_net(net: Cell, *inputs):
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, *inputs)
    context.reset_auto_parallel_context()
--- a/tests/ut/python/parallel/test_cus_matmul_dds.py
+++ b/tests/ut/python/parallel/test_cus_matmul_dds.py
@ -110,7 +110,6 @@ def compile_graph(batch_size, num_heads, dp, mp, auto=False, shard=True):
        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
    x = Tensor(np.ones((batch_size * 1024, num_heads * 128)), ms.float32)
    net = GradWrap(NetWithLoss(Net(batch_size, num_heads, dp, mp, shard=shard)))
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x)

--- a/tests/ut/python/parallel/test_device_num_not_power_of_2.py
+++ b/tests/ut/python/parallel/test_device_num_not_power_of_2.py
@ -75,7 +75,6 @@ _b1 = Tensor(np.ones([32 * 3]), dtype=ms.float32)
 def compile_net(net, change_input=False):
    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
    if change_input:
        _cell_graph_executor.compile(train_net, _x1, _b1)
--- a/tests/ut/python/parallel/test_different_type_for_div_op.py
+++ b/tests/ut/python/parallel/test_different_type_for_div_op.py
@ -40,7 +40,6 @@ class GradWrap(nn.Cell):


 def compile_net(net, x, y):
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, y)

--- a/tests/ut/python/parallel/test_dropout.py
+++ b/tests/ut/python/parallel/test_dropout.py
@ -52,7 +52,6 @@ _b = Tensor(np.ones([128, 64]), dtype=ms.float32)
 def compile_net(net):
    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
    _cell_graph_executor.compile(train_net, _x, _b)
    context.reset_auto_parallel_context()
--- a/tests/ut/python/parallel/test_dropout_do_mask.py
+++ b/tests/ut/python/parallel/test_dropout_do_mask.py
@ -54,7 +54,6 @@ _b = Tensor(np.ones([128, 64]), dtype=ms.float32)
 def compile_net(net):
    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
    _cell_graph_executor.compile(train_net, _x, _b)
    context.reset_auto_parallel_context()
--- a/tests/ut/python/parallel/test_dsd_matmul.py
+++ b/tests/ut/python/parallel/test_dsd_matmul.py
@ -126,7 +126,6 @@ def compile_graph(batch_size, num_heads, dp, mp, auto=False, shard=True):
        context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
    x = Tensor(np.ones((batch_size * 1024, num_heads * 128)), ms.float32)
    net = GradWrap(NetWithLoss(Net(batch_size, num_heads, dp, mp, shard=shard)))
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x)

--- a/tests/ut/python/parallel/test_dynamic_shape.py
+++ b/tests/ut/python/parallel/test_dynamic_shape.py
@ -83,7 +83,6 @@ def test_unique_column_split():
    net = Net()
    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
    _cell_graph_executor.compile(train_net, x)

@ -117,6 +116,5 @@ def test_unique_row_split():
    net = Net()
    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
    _cell_graph_executor.compile(train_net, x)
--- a/tests/ut/python/parallel/test_element_wise_function.py
+++ b/tests/ut/python/parallel/test_element_wise_function.py
@ -52,7 +52,6 @@ class GradWrap(nn.Cell):


 def compile_net(net, x, y, b):
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, y, b)

--- a/tests/ut/python/parallel/test_embeddinglookup.py
+++ b/tests/ut/python/parallel/test_embeddinglookup.py
@ -61,7 +61,6 @@ def test_embeddinglookup_reducescatter_false():
    shape = [8, 8]
    offset = 8
    net = NetWithLoss(Net(shape, offset))
-    net.set_auto_parallel()

    x = Tensor(np.ones([64, 32]), dtype=ms.float32)
    y = Tensor(np.ones([8, 32, 8]), dtype=ms.float32)
@ -73,7 +72,6 @@ def test_embeddinglookup_reducescatter_true():
    shape = [8, 8]
    offset = 8
    net = NetWithLoss(Net(shape, offset))
-    net.set_auto_parallel()

    x = Tensor(np.ones([64, 32]), dtype=ms.float32)
    y = Tensor(np.ones([8, 32, 8]), dtype=ms.float32)
@ -85,7 +83,6 @@ def test_embeddinglookup_reducescatter_false_grad():
    shape = [8, 8]
    offset = 8
    net = GradWrap(NetWithLoss(Net(shape, offset)))
-    net.set_auto_parallel()

    x = Tensor(np.ones([64, 32]), dtype=ms.float32)
    y = Tensor(np.ones([8, 32, 8]), dtype=ms.float32)
@ -97,7 +94,6 @@ def test_embeddinglookup_reducescatter_true_grad():
    shape = [8, 8]
    offset = 8
    net = GradWrap(NetWithLoss(Net(shape, offset)))
-    net.set_auto_parallel()

    x = Tensor(np.ones([64, 32]), dtype=ms.float32)
    y = Tensor(np.ones([8, 32, 8]), dtype=ms.float32)
@ -114,7 +110,6 @@ def test_embeddinglookup_semi_auto1():
    strategy2 = ((4, 1, 2), (4, 2, 1))
    net = GradWrap(NetWithLoss(Net(shape, offset, strategy1, strategy2, "CPU")))

-    net.set_auto_parallel()
    x = Tensor(np.ones([64 // 8, 64]), dtype=ms.float32)
    y = Tensor(np.ones([64 // 8, 64, 64]), dtype=ms.float32)
    net.set_train()
--- a/tests/ut/python/parallel/test_eval.py
+++ b/tests/ut/python/parallel/test_eval.py
@ -55,10 +55,9 @@ _w1 = Tensor(np.ones([64, 64]), dtype=ms.float32)
 _b = Tensor(np.ones([64, 64]), dtype=ms.float32)

 def compile_net(net, input_data, label, is_train=True):
-    net.set_auto_parallel()
    net.set_train(mode=is_train)
    phase = "train" if is_train else "eval"
-    _cell_graph_executor.compile(net, input_data, label, phase=phase, auto_parallel_mode=True)
+    _cell_graph_executor.compile(net, input_data, label, phase=phase)

 def test_train_and_eval():
    """
--- a/tests/ut/python/parallel/test_expand_dims.py
+++ b/tests/ut/python/parallel/test_expand_dims.py
@ -61,7 +61,6 @@ _b = Tensor(np.ones([128, 64, 32, 1]), dtype=ms.float32)
 def compile_net(net):
    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
    _cell_graph_executor.compile(train_net, _x, _b)
    context.reset_auto_parallel_context()
--- a/tests/ut/python/parallel/test_forward_graph.py
+++ b/tests/ut/python/parallel/test_forward_graph.py
@ -44,7 +44,6 @@ _b = Tensor(np.ones([128, 64, 32]), dtype=ms.float32)


 def compile_net(net):
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, _x, _b)
    context.reset_auto_parallel_context()
--- a/tests/ut/python/parallel/test_gather_field_split.py
+++ b/tests/ut/python/parallel/test_gather_field_split.py
@ -38,10 +38,9 @@ class Net(Cell):


 def compile_net(net, x):
-    net.set_auto_parallel()
    net.set_train()
    b = Tensor(np.ones([64, 8]), dtype=ms.float32)
-    phase, _ = _cell_graph_executor.compile(net, x, b, auto_parallel_mode=True)
+    phase, _ = _cell_graph_executor.compile(net, x, b)
    context.reset_auto_parallel_context()
    return phase

--- a/tests/ut/python/parallel/test_gather_v2.py
+++ b/tests/ut/python/parallel/test_gather_v2.py
@ -66,7 +66,6 @@ class Net(nn.Cell):

 def compile_graph(net, device_num, parallel_mode, x, y):
    context.set_auto_parallel_context(device_num=device_num, global_rank=0, parallel_mode=parallel_mode)
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, y)

--- a/tests/ut/python/parallel/test_gatherd.py
+++ b/tests/ut/python/parallel/test_gatherd.py
@ -47,7 +47,6 @@ _b = Tensor(np.ones([16, 32, 64]), dtype=ms.float32)
 def compile_net(net):
    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
    _cell_graph_executor.compile(train_net, _x, _b)
    context.reset_auto_parallel_context()
--- a/tests/ut/python/parallel/test_get_next.py
+++ b/tests/ut/python/parallel/test_get_next.py
@ -64,7 +64,6 @@ class GradWrap(nn.Cell):


 def compile_net(net):
-    net.set_auto_parallel()
    _cell_graph_executor.compile(net)

 def test_get_next_single():
--- a/tests/ut/python/parallel/test_get_parameter_layout.py
+++ b/tests/ut/python/parallel/test_get_parameter_layout.py
@ -50,10 +50,9 @@ def test_get_parameter_layout():
    weight = Tensor(np.ones([64, 32]), dtype=ms.float32)

    net = Net(strategy1, strategy2, weight)
-    net.set_auto_parallel()
    net.set_train()
    exe = me._cell_graph_executor
-    exe.compile(net, x, phase='train', auto_parallel_mode=True)
+    exe.compile(net, x, phase='train')
    x_layout = ([8], [0, -1], [32, 32], 0, True, '')  # device_arrangement = [2, 4], tensor_map = [1, -1]
    weight_layout = ([2, 4], [0, -1], [16, 32], 0, True, '')  # device_arrangement = [2, 4], tensor_map = [0, -1]
    expect_dict = {'x': x_layout, 'w1': weight_layout}
--- a/tests/ut/python/parallel/test_gpu_dropout.py
+++ b/tests/ut/python/parallel/test_gpu_dropout.py
@ -63,7 +63,6 @@ class Net(nn.Cell):

 def compile_graph(net, device_num, parallel_mode, x, y):
    context.set_auto_parallel_context(device_num=device_num, global_rank=0, parallel_mode=parallel_mode)
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, y)

--- a/tests/ut/python/parallel/test_hybird_parallel_activation.py
+++ b/tests/ut/python/parallel/test_hybird_parallel_activation.py
@ -52,7 +52,6 @@ class GradWrap(nn.Cell):


 def compile_net(net, x, y, b):
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, y, b)

--- a/tests/ut/python/parallel/test_initializer_weight_slice.py
+++ b/tests/ut/python/parallel/test_initializer_weight_slice.py
@ -64,9 +64,8 @@ def check_initializer_weight_slice(init_name="Uniform", using_seed=False):
        weight1 = initializer(init_name, [32, 32], ms.float32)
        weight2 = initializer(init_name, [32, 32], ms.float32)
        net = Net(strategy1, strategy2, weight1, weight2)
-        net.set_auto_parallel()
        net.set_train()
-        exe.compile(net, x, auto_parallel_mode=True, phase='train')
+        exe.compile(net, x, phase='train')
        hccl.rank_id = rank_save
        return net.parameters_dict()['w1'].data.asnumpy(), net.parameters_dict()['w2'].data.asnumpy()

@ -123,9 +122,8 @@ def test_wrong_order_set_parallel_mode_with_initializer():
    exe = me._cell_graph_executor
    x = Tensor(np.ones([32, 32]), dtype=ms.float32)
    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0)
-    net.set_auto_parallel()
    with pytest.raises(RuntimeError):
-        exe.compile(net, x, auto_parallel_mode=True, phase='train')
+        exe.compile(net, x, phase='train')


 def test_wrong_order_set_same_parallel_mode_with_initializer():
@ -143,8 +141,7 @@ def test_wrong_order_set_same_parallel_mode_with_initializer():
    exe = me._cell_graph_executor
    x = Tensor(np.ones([32, 32]), dtype=ms.float32)
    context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=8, global_rank=0)
-    net.set_auto_parallel()
-    exe.compile(net, x, auto_parallel_mode=True, phase='train')
+    exe.compile(net, x, phase='train')


 def test_wrong_order_set_parallel_mode_without_initializer():
@ -161,5 +158,4 @@ def test_wrong_order_set_parallel_mode_without_initializer():
    exe = me._cell_graph_executor
    x = Tensor(np.ones([32, 32]), dtype=ms.float32)
    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0)
-    net.set_auto_parallel()
-    exe.compile(net, x, auto_parallel_mode=True, phase='train')
+    exe.compile(net, x, phase='train')
--- a/tests/ut/python/parallel/test_iou.py
+++ b/tests/ut/python/parallel/test_iou.py
@ -42,7 +42,6 @@ class Net(Cell):

 def compile_net(net: Cell):
    net.set_train()
-    net.set_auto_parallel()
    _cell_graph_executor.compile(net, _anchor_boxes, _gt_boxes)
    context.reset_auto_parallel_context()

--- a/tests/ut/python/parallel/test_l2normalize.py
+++ b/tests/ut/python/parallel/test_l2normalize.py
@ -74,7 +74,6 @@ def test_l2normalize_matmul():
    strategy3 = ((1, 1, 8), (1, 1, 8))
    net = GradWrap(NetWithLoss(Net(strategy1, strategy2, strategy3)))
    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-    net.set_auto_parallel()

    x = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
    y = Tensor(np.ones([128, 32, 64]), dtype=ms.float32)
--- a/tests/ut/python/parallel/test_layer_norm.py
+++ b/tests/ut/python/parallel/test_layer_norm.py
@ -55,7 +55,6 @@ _b = Tensor(np.ones([16, 64, 32, 16]), dtype=ms.float32)
 def compile_net(net):
    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
    _cell_graph_executor.compile(train_net, _x, _b)
    context.reset_auto_parallel_context()
--- a/tests/ut/python/parallel/test_layer_norm_further.py
+++ b/tests/ut/python/parallel/test_layer_norm_further.py
@ -153,7 +153,6 @@ def compile_net(net):
    optimizer = Momentum(net.trainable_params(),
                         learning_rate=0.1, momentum=0.9)
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
    _cell_graph_executor.compile(train_net, _x, _b)
    context.reset_auto_parallel_context()
--- a/tests/ut/python/parallel/test_linear.py
+++ b/tests/ut/python/parallel/test_linear.py
@ -71,7 +71,6 @@ def test_linear():
    strategy2 = ((2, 8),)
    strategy3 = ((16, 1), (16, 1))
    net = GradWrap(NetWithLoss(Net(strategy0, strategy1, strategy2), strategy3))
-    net.set_auto_parallel()

    x = Tensor(np.ones([64, 32]), dtype=ms.float32)
    y = Tensor(np.ones([64, 32]), dtype=ms.float32)
--- a/tests/ut/python/parallel/test_loop_two_matmul.py
+++ b/tests/ut/python/parallel/test_loop_two_matmul.py
@ -98,7 +98,6 @@ def test_two_matmul():
            print(strategy1, strategy2)
            net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
            context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-            net.set_auto_parallel()
            net.set_train()
            _cell_graph_executor.compile(net, x, y, b)
            count = count + 1
--- a/tests/ut/python/parallel/test_loss_and_optimizer.py
+++ b/tests/ut/python/parallel/test_loss_and_optimizer.py
@ -40,7 +40,6 @@ class NetWithLoss(nn.Cell):


 def compile_net(net, x, b):
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, b)

--- a/tests/ut/python/parallel/test_manual_embedding_lookup.py
+++ b/tests/ut/python/parallel/test_manual_embedding_lookup.py
@ -70,9 +70,8 @@ def compile_net(net):
    optimizer = LazyAdam(net.trainable_params(), learning_rate=0.1)
    optimizer.sparse_opt.set_device("CPU")
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
-    _cell_graph_executor.compile(train_net, _x, _b, auto_parallel_mode=True)
+    _cell_graph_executor.compile(train_net, _x, _b)
    context.reset_auto_parallel_context()


--- a/tests/ut/python/parallel/test_manual_gatherv2.py
+++ b/tests/ut/python/parallel/test_manual_gatherv2.py
@ -82,16 +82,14 @@ _b = Tensor(np.ones([64, 8]), dtype=ms.float32)
 def compile_net(net):
    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
    train_net = TrainOneStepCell(net, optimizer)
-    train_net.set_auto_parallel()
    train_net.set_train()
-    _cell_graph_executor.compile(train_net, _x, _b, auto_parallel_mode=True)
+    _cell_graph_executor.compile(train_net, _x, _b)
    context.reset_auto_parallel_context()


 def compile_net_and_return_strategy(net: Cell, *inputs):
    optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
    train_net = TrainOneStepCell(net, optimizer)
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(train_net, *inputs, phase='train')
    strategies = _cell_graph_executor._get_shard_strategy(train_net)
--- a/tests/ut/python/parallel/test_matmul_dropout.py
+++ b/tests/ut/python/parallel/test_matmul_dropout.py
@ -126,7 +126,6 @@ def test_two_matmul_dropout():
    strategy3 = ((1, 8), (8, 1))
    net = GradWrap(NetWithLoss(Net(strategy1, strategy2, strategy3)))
    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-    net.set_auto_parallel()

    x = Tensor(np.ones([128, 32]), dtype=ms.float32)
    y = Tensor(np.ones([32, 64]), dtype=ms.float32)
--- a/tests/ut/python/parallel/test_matmul_tensor.py
+++ b/tests/ut/python/parallel/test_matmul_tensor.py
@ -54,7 +54,6 @@ class GradWrap(nn.Cell):


 def compile_net(net, x, y):
-    net.set_auto_parallel()
    net.set_train()
    _cell_graph_executor.compile(net, x, y)

--- a/Show More
+++ b/Show More