Fix partition bug

2022-10-19 14:48:28 +08:00 · 2022-10-19 14:48:28 +08:00 · e282ddfc01
parent 51dfb054d0
commit e282ddfc01
8 changed files with 15 additions and 11 deletions
--- a/mindspore/ccsrc/distributed/cluster/cluster_context.cc
+++ b/mindspore/ccsrc/distributed/cluster/cluster_context.cc
@ -199,12 +199,6 @@ void ClusterContext::InitNodeRole() {
    MS_LOG(EXCEPTION) << "Role name '" << node_role_ << "' is invalid. " << kDetailedFailureReason;
  }

-  // If node role is valid, judge the execution mode.
-  // MindSpore cluster does not support PyNative mode.
-  if (MsContext::GetInstance()->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode) {
-    MS_LOG(EXCEPTION) << "PyNative mode is not supported in MindSpore cluster.";
-  }
-
  if (common::GetEnv(kEnvWorkerNum).empty()) {
    if (node_role_ == kEnvRoleOfWorker) {
      MS_LOG(EXCEPTION) << "Please set env 'WORKER_NUM' to a number greater than 0.";
--- a/mindspore/ccsrc/distributed/cluster/topology/meta_server_node.cc
+++ b/mindspore/ccsrc/distributed/cluster/topology/meta_server_node.cc
@ -154,12 +154,13 @@ MessageBase *const MetaServerNode::HandleMessage(MessageBase *const message) {
      return rpc::NULL_MSG;
    }
    const auto &result = (*message_handlers_[name])(message->Body());
-    delete message;
    if (result.length() > 0) {
      auto rt_msg = CreateMessage(meta_server_addr_.GetUrl(), name, result);
      MS_EXCEPTION_IF_NULL(rt_msg);
+      delete message;
      return rt_msg.release();
    } else {
+      delete message;
      return rpc::NULL_MSG;
    }
  }
--- a/mindspore/python/mindspore/ops/primitive.py
+++ b/mindspore/python/mindspore/ops/primitive.py
@ -426,6 +426,10 @@ class Primitive(Primitive_):
        Validator.check_non_negative_int(rank_id, "rank_id", "Primitive.place")
        Validator.check_string(role, "MS_WORKER", "role", "Primitive.place")

+        if context.get_context("mode") == context.PYNATIVE_MODE:
+            raise RuntimeError("You are calling Primitive.place in pynative mode."
+                               "It's only supported in graph mode. Please switch to graph mode.")
+
        # Get the execution context and check whether calling of this 'place' method is valid.
        # This is because placing operators to arbitrary processes while other distributed training mode
        # is enabled is very unpredictable and may cause fatal error.
--- a/mindspore/python/mindspore/parallel/_ps_context.py
+++ b/mindspore/python/mindspore/parallel/_ps_context.py
@ -48,6 +48,9 @@ def set_ps_enable(enable):
    """
    Set ps enable flag.
    """
+    if context.get_context("mode") == context.PYNATIVE_MODE:
+        raise RuntimeError("Parameter server is not supported in pynative mode currently.")
+
    ps_context().set_ps_enable(enable)
    # If this is Server or Scheduler and device target is Ascend, reset the target to CPU
    if _need_reset_device_target_for_ps(context.get_context("device_target")):
--- a/tests/st/nccl/test_nccl_all.py
+++ b/tests/st/nccl/test_nccl_all.py
@ -16,7 +16,7 @@ import os
 import pytest


-@pytest.mark.level2
+@pytest.mark.level1
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_single
 def test_nccl_lenet():
--- a/tests/st/ps/full_ps/test_entry_full_ps_lenet.py
+++ b/tests/st/ps/full_ps/test_entry_full_ps_lenet.py
@ -16,7 +16,7 @@ import os
 import pytest


-@pytest.mark.level1
+@pytest.mark.level0
@pytest.mark.platform_x86_ascend_training
@pytest.mark.platform_arm_ascend_training
@pytest.mark.env_single
@ -39,7 +39,7 @@ def test_full_ps_lenet_ascend():
    assert return_code == 0


-@pytest.mark.level1
+@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_full_ps_lenet_gpu():
--- a/tests/st/ps/part_ps/test_entry_ps_embedding_heterogeneous_conv2d_adam.py
+++ b/tests/st/ps/part_ps/test_entry_ps_embedding_heterogeneous_conv2d_adam.py
@ -16,7 +16,7 @@ import os
 import pytest


-@pytest.mark.level0
+@pytest.mark.level1
@pytest.mark.platform_x86_ascend_training
@pytest.mark.platform_arm_ascend_training
@pytest.mark.env_single
--- a/tests/ut/python/dataset/test_noop_mode.py
+++ b/tests/ut/python/dataset/test_noop_mode.py
@ -28,6 +28,7 @@ def test_noop_pserver():
    Expectation: Runs successfully
    """
    os.environ['MS_ROLE'] = 'MS_PSERVER'
+    context.set_context(mode=context.GRAPH_MODE)
    context.set_ps_context(enable_ps=True)
    data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False, decode=True)
    num = 0
@ -45,6 +46,7 @@ def test_noop_sched():
    Expectation: Runs successfully
    """
    os.environ['MS_ROLE'] = 'MS_SCHED'
+    context.set_context(mode=context.GRAPH_MODE)
    context.set_ps_context(enable_ps=True)
    data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False, decode=True)
    num = 0