forked from mindspore-Ecosystem/mindspore
Fix partition bug
This commit is contained in:
parent
51dfb054d0
commit
e282ddfc01
|
@ -199,12 +199,6 @@ void ClusterContext::InitNodeRole() {
|
||||||
MS_LOG(EXCEPTION) << "Role name '" << node_role_ << "' is invalid. " << kDetailedFailureReason;
|
MS_LOG(EXCEPTION) << "Role name '" << node_role_ << "' is invalid. " << kDetailedFailureReason;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If node role is valid, judge the execution mode.
|
|
||||||
// MindSpore cluster does not support PyNative mode.
|
|
||||||
if (MsContext::GetInstance()->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode) {
|
|
||||||
MS_LOG(EXCEPTION) << "PyNative mode is not supported in MindSpore cluster.";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (common::GetEnv(kEnvWorkerNum).empty()) {
|
if (common::GetEnv(kEnvWorkerNum).empty()) {
|
||||||
if (node_role_ == kEnvRoleOfWorker) {
|
if (node_role_ == kEnvRoleOfWorker) {
|
||||||
MS_LOG(EXCEPTION) << "Please set env 'WORKER_NUM' to a number greater than 0.";
|
MS_LOG(EXCEPTION) << "Please set env 'WORKER_NUM' to a number greater than 0.";
|
||||||
|
|
|
@ -154,12 +154,13 @@ MessageBase *const MetaServerNode::HandleMessage(MessageBase *const message) {
|
||||||
return rpc::NULL_MSG;
|
return rpc::NULL_MSG;
|
||||||
}
|
}
|
||||||
const auto &result = (*message_handlers_[name])(message->Body());
|
const auto &result = (*message_handlers_[name])(message->Body());
|
||||||
delete message;
|
|
||||||
if (result.length() > 0) {
|
if (result.length() > 0) {
|
||||||
auto rt_msg = CreateMessage(meta_server_addr_.GetUrl(), name, result);
|
auto rt_msg = CreateMessage(meta_server_addr_.GetUrl(), name, result);
|
||||||
MS_EXCEPTION_IF_NULL(rt_msg);
|
MS_EXCEPTION_IF_NULL(rt_msg);
|
||||||
|
delete message;
|
||||||
return rt_msg.release();
|
return rt_msg.release();
|
||||||
} else {
|
} else {
|
||||||
|
delete message;
|
||||||
return rpc::NULL_MSG;
|
return rpc::NULL_MSG;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -426,6 +426,10 @@ class Primitive(Primitive_):
|
||||||
Validator.check_non_negative_int(rank_id, "rank_id", "Primitive.place")
|
Validator.check_non_negative_int(rank_id, "rank_id", "Primitive.place")
|
||||||
Validator.check_string(role, "MS_WORKER", "role", "Primitive.place")
|
Validator.check_string(role, "MS_WORKER", "role", "Primitive.place")
|
||||||
|
|
||||||
|
if context.get_context("mode") == context.PYNATIVE_MODE:
|
||||||
|
raise RuntimeError("You are calling Primitive.place in pynative mode."
|
||||||
|
"It's only supported in graph mode. Please switch to graph mode.")
|
||||||
|
|
||||||
# Get the execution context and check whether calling of this 'place' method is valid.
|
# Get the execution context and check whether calling of this 'place' method is valid.
|
||||||
# This is because placing operators to arbitrary processes while other distributed training mode
|
# This is because placing operators to arbitrary processes while other distributed training mode
|
||||||
# is enabled is very unpredictable and may cause fatal error.
|
# is enabled is very unpredictable and may cause fatal error.
|
||||||
|
|
|
@ -48,6 +48,9 @@ def set_ps_enable(enable):
|
||||||
"""
|
"""
|
||||||
Set ps enable flag.
|
Set ps enable flag.
|
||||||
"""
|
"""
|
||||||
|
if context.get_context("mode") == context.PYNATIVE_MODE:
|
||||||
|
raise RuntimeError("Parameter server is not supported in pynative mode currently.")
|
||||||
|
|
||||||
ps_context().set_ps_enable(enable)
|
ps_context().set_ps_enable(enable)
|
||||||
# If this is Server or Scheduler and device target is Ascend, reset the target to CPU
|
# If this is Server or Scheduler and device target is Ascend, reset the target to CPU
|
||||||
if _need_reset_device_target_for_ps(context.get_context("device_target")):
|
if _need_reset_device_target_for_ps(context.get_context("device_target")):
|
||||||
|
|
|
@ -16,7 +16,7 @@ import os
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.level2
|
@pytest.mark.level1
|
||||||
@pytest.mark.platform_x86_gpu_training
|
@pytest.mark.platform_x86_gpu_training
|
||||||
@pytest.mark.env_single
|
@pytest.mark.env_single
|
||||||
def test_nccl_lenet():
|
def test_nccl_lenet():
|
||||||
|
|
|
@ -16,7 +16,7 @@ import os
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.level1
|
@pytest.mark.level0
|
||||||
@pytest.mark.platform_x86_ascend_training
|
@pytest.mark.platform_x86_ascend_training
|
||||||
@pytest.mark.platform_arm_ascend_training
|
@pytest.mark.platform_arm_ascend_training
|
||||||
@pytest.mark.env_single
|
@pytest.mark.env_single
|
||||||
|
@ -39,7 +39,7 @@ def test_full_ps_lenet_ascend():
|
||||||
assert return_code == 0
|
assert return_code == 0
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.level1
|
@pytest.mark.level0
|
||||||
@pytest.mark.platform_x86_gpu_training
|
@pytest.mark.platform_x86_gpu_training
|
||||||
@pytest.mark.env_onecard
|
@pytest.mark.env_onecard
|
||||||
def test_full_ps_lenet_gpu():
|
def test_full_ps_lenet_gpu():
|
||||||
|
|
|
@ -16,7 +16,7 @@ import os
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.level0
|
@pytest.mark.level1
|
||||||
@pytest.mark.platform_x86_ascend_training
|
@pytest.mark.platform_x86_ascend_training
|
||||||
@pytest.mark.platform_arm_ascend_training
|
@pytest.mark.platform_arm_ascend_training
|
||||||
@pytest.mark.env_single
|
@pytest.mark.env_single
|
||||||
|
|
|
@ -28,6 +28,7 @@ def test_noop_pserver():
|
||||||
Expectation: Runs successfully
|
Expectation: Runs successfully
|
||||||
"""
|
"""
|
||||||
os.environ['MS_ROLE'] = 'MS_PSERVER'
|
os.environ['MS_ROLE'] = 'MS_PSERVER'
|
||||||
|
context.set_context(mode=context.GRAPH_MODE)
|
||||||
context.set_ps_context(enable_ps=True)
|
context.set_ps_context(enable_ps=True)
|
||||||
data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False, decode=True)
|
data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False, decode=True)
|
||||||
num = 0
|
num = 0
|
||||||
|
@ -45,6 +46,7 @@ def test_noop_sched():
|
||||||
Expectation: Runs successfully
|
Expectation: Runs successfully
|
||||||
"""
|
"""
|
||||||
os.environ['MS_ROLE'] = 'MS_SCHED'
|
os.environ['MS_ROLE'] = 'MS_SCHED'
|
||||||
|
context.set_context(mode=context.GRAPH_MODE)
|
||||||
context.set_ps_context(enable_ps=True)
|
context.set_ps_context(enable_ps=True)
|
||||||
data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False, decode=True)
|
data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False, decode=True)
|
||||||
num = 0
|
num = 0
|
||||||
|
|
Loading…
Reference in New Issue