forked from mindspore-Ecosystem/mindspore
!20005 disable mindRT in control flow
Merge pull request !20005 from limingqi107/r1.3
This commit is contained in:
commit
ee5ff9d273
|
@ -94,6 +94,7 @@ namespace gpu {
|
||||||
using AnfAlgo = mindspore::session::AnfRuntimeAlgorithm;
|
using AnfAlgo = mindspore::session::AnfRuntimeAlgorithm;
|
||||||
using CollectiveInitializer = device::gpu::CollectiveInitializer;
|
using CollectiveInitializer = device::gpu::CollectiveInitializer;
|
||||||
using GetLocalRankId = device::gpu::GetLocalRankId;
|
using GetLocalRankId = device::gpu::GetLocalRankId;
|
||||||
|
using InitNCCLComm = device::gpu::InitNCCLComm;
|
||||||
|
|
||||||
void GPUSession::Init(uint32_t device_id) {
|
void GPUSession::Init(uint32_t device_id) {
|
||||||
const void *collective_handle_ = CollectiveInitializer::instance().collective_handle();
|
const void *collective_handle_ = CollectiveInitializer::instance().collective_handle();
|
||||||
|
@ -113,7 +114,14 @@ void GPUSession::Init(uint32_t device_id) {
|
||||||
ms_context->set_param<uint32_t>(MS_CTX_DEVICE_ID, device_id);
|
ms_context->set_param<uint32_t>(MS_CTX_DEVICE_ID, device_id);
|
||||||
if (collective_inited) {
|
if (collective_inited) {
|
||||||
rank_id_ = GetRankId();
|
rank_id_ = GetRankId();
|
||||||
|
if (collective_handle_ != nullptr) {
|
||||||
|
auto init_nccl_comm_funcptr =
|
||||||
|
reinterpret_cast<InitNCCLComm>(dlsym(const_cast<void *>(collective_handle_), "InitNCCLComm"));
|
||||||
|
MS_EXCEPTION_IF_NULL(init_nccl_comm_funcptr);
|
||||||
|
(*init_nccl_comm_funcptr)();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
auto &json_parser = DumpJsonParser::GetInstance();
|
auto &json_parser = DumpJsonParser::GetInstance();
|
||||||
// Dump json config file if dump is enabled
|
// Dump json config file if dump is enabled
|
||||||
json_parser.CopyJsonToDir(rank_id_);
|
json_parser.CopyJsonToDir(rank_id_);
|
||||||
|
|
|
@ -73,7 +73,9 @@ GroupManager::GroupManager() { groups_.clear(); }
|
||||||
#if !defined(NO_DLIB) || defined(ENABLE_GPU)
|
#if !defined(NO_DLIB) || defined(ENABLE_GPU)
|
||||||
bool GroupManager::CreateGroupByExecutor(const std::string &device_name, const std::string &group_name,
|
bool GroupManager::CreateGroupByExecutor(const std::string &device_name, const std::string &group_name,
|
||||||
const std::vector<uint32_t> ranks, int device_id) {
|
const std::vector<uint32_t> ranks, int device_id) {
|
||||||
if (MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
|
// The group operation thread must be same with nccl init thread in the GPU device.
|
||||||
|
if (MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT) ||
|
||||||
|
(MsContext::GetInstance()->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice)) {
|
||||||
return CommManager::GetInstance().CreateGroupSync(group_name, ranks);
|
return CommManager::GetInstance().CreateGroupSync(group_name, ranks);
|
||||||
} else {
|
} else {
|
||||||
auto executor = session::ExecutorManager::Instance().GetExecutor(device_name, device_id);
|
auto executor = session::ExecutorManager::Instance().GetExecutor(device_name, device_id);
|
||||||
|
@ -84,7 +86,9 @@ bool GroupManager::CreateGroupByExecutor(const std::string &device_name, const s
|
||||||
|
|
||||||
bool GroupManager::DestroyGroupByExecutor(const std::string &device_name, const std::string &group_name,
|
bool GroupManager::DestroyGroupByExecutor(const std::string &device_name, const std::string &group_name,
|
||||||
int device_id) {
|
int device_id) {
|
||||||
if (MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
|
// The group operation thread must be same with nccl init thread in the GPU device.
|
||||||
|
if (MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT) ||
|
||||||
|
(MsContext::GetInstance()->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice)) {
|
||||||
return CommManager::GetInstance().DestroyGroup(group_name);
|
return CommManager::GetInstance().DestroyGroup(group_name);
|
||||||
} else {
|
} else {
|
||||||
auto executor = session::ExecutorManager::Instance().GetExecutor(device_name, device_id);
|
auto executor = session::ExecutorManager::Instance().GetExecutor(device_name, device_id);
|
||||||
|
@ -103,7 +107,9 @@ Status CreateGroups(const std::vector<std::pair<std::string, std::vector<uint32_
|
||||||
MS_EXCEPTION_IF_NULL(executor);
|
MS_EXCEPTION_IF_NULL(executor);
|
||||||
for (auto &group : group_info) {
|
for (auto &group : group_info) {
|
||||||
bool ret = true;
|
bool ret = true;
|
||||||
if (context_ptr->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
|
// The group operation thread must be same with nccl init thread in the GPU device.
|
||||||
|
if (context_ptr->get_param<bool>(MS_CTX_ENABLE_MINDRT) ||
|
||||||
|
(context_ptr->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice)) {
|
||||||
ret = CommManager::GetInstance().CreateGroupSync(group.first, group.second);
|
ret = CommManager::GetInstance().CreateGroupSync(group.first, group.second);
|
||||||
} else {
|
} else {
|
||||||
ret = executor->CreateCommGroup(group.first, group.second);
|
ret = executor->CreateCommGroup(group.first, group.second);
|
||||||
|
|
|
@ -55,6 +55,33 @@
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace pipeline {
|
namespace pipeline {
|
||||||
namespace {
|
namespace {
|
||||||
|
// Disable mindRT in the control flow scenario.
|
||||||
|
void ResetMindRTEnable(const ResourcePtr &res) {
|
||||||
|
MS_EXCEPTION_IF_NULL(res);
|
||||||
|
auto context_ptr = MsContext::GetInstance();
|
||||||
|
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||||
|
if (context_ptr->get_param<bool>(MS_CTX_ENABLE_MINDRT) == false) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto func_graph = res->func_graph();
|
||||||
|
MS_EXCEPTION_IF_NULL(func_graph);
|
||||||
|
if (func_graph != nullptr && func_graph->manager() != nullptr) {
|
||||||
|
auto manager = func_graph->manager();
|
||||||
|
size_t graph_nums = manager->func_graphs().size();
|
||||||
|
if (graph_nums == 1) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
MS_LOG(INFO) << "Disable mindRT in the multi graphs scenario.";
|
||||||
|
context_ptr->set_param<bool>(MS_CTX_ENABLE_MINDRT, false);
|
||||||
|
// Update the backend.
|
||||||
|
auto new_backend = compile::CreateBackend();
|
||||||
|
new_backend->SetDebugger();
|
||||||
|
res->results()[kBackend] = new_backend;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void TaskEmitActionForMindRT(const ResourcePtr &res) {
|
void TaskEmitActionForMindRT(const ResourcePtr &res) {
|
||||||
MS_EXCEPTION_IF_NULL(res);
|
MS_EXCEPTION_IF_NULL(res);
|
||||||
// Get the mindRT backend.
|
// Get the mindRT backend.
|
||||||
|
@ -544,6 +571,8 @@ bool TaskEmitAction(const ResourcePtr &res) {
|
||||||
if (res->func_graph() == nullptr) {
|
if (res->func_graph() == nullptr) {
|
||||||
MS_LOG(EXCEPTION) << "TaskEmit args error";
|
MS_LOG(EXCEPTION) << "TaskEmit args error";
|
||||||
}
|
}
|
||||||
|
// Disable mindRT in the control flow scenario.
|
||||||
|
ResetMindRTEnable(res);
|
||||||
FuncGraphPtr func_graph = res->func_graph();
|
FuncGraphPtr func_graph = res->func_graph();
|
||||||
MS_EXCEPTION_IF_NULL(func_graph);
|
MS_EXCEPTION_IF_NULL(func_graph);
|
||||||
auto bc_ptr = res->results()[kBackend].cast<compile::BackendPtr>();
|
auto bc_ptr = res->results()[kBackend].cast<compile::BackendPtr>();
|
||||||
|
|
|
@ -25,6 +25,9 @@ namespace device {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
void GPUDeviceManager::InitDevice() {
|
void GPUDeviceManager::InitDevice() {
|
||||||
CHECK_OP_RET_WITH_EXCEPT(CudaDriver::SetDevice(SizeToInt(cur_dev_id_)), "Failed to set current device id");
|
CHECK_OP_RET_WITH_EXCEPT(CudaDriver::SetDevice(SizeToInt(cur_dev_id_)), "Failed to set current device id");
|
||||||
|
if (dev_alive_) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
CHECK_OP_RET_WITH_EXCEPT(CreateStream(&default_stream_), "Failed to create CUDA stream.");
|
CHECK_OP_RET_WITH_EXCEPT(CreateStream(&default_stream_), "Failed to create CUDA stream.");
|
||||||
CHECK_CUDNN_RET_WITH_EXCEPT_NOTRACE(cudnnCreate(&cudnn_handle_), "Failed to create cuDNN handle");
|
CHECK_CUDNN_RET_WITH_EXCEPT_NOTRACE(cudnnCreate(&cudnn_handle_), "Failed to create cuDNN handle");
|
||||||
CHECK_CUDNN_RET_WITH_EXCEPT_NOTRACE(cudnnSetStream(cudnn_handle_, reinterpret_cast<cudaStream_t>(default_stream())),
|
CHECK_CUDNN_RET_WITH_EXCEPT_NOTRACE(cudnnSetStream(cudnn_handle_, reinterpret_cast<cudaStream_t>(default_stream())),
|
||||||
|
|
|
@ -506,11 +506,6 @@ void GraphScheduler::BuildAndScheduleGlobalActor() {
|
||||||
}
|
}
|
||||||
|
|
||||||
ActorSet *GraphScheduler::Transform(const GraphCompilerInfo &graph_compiler_info) {
|
ActorSet *GraphScheduler::Transform(const GraphCompilerInfo &graph_compiler_info) {
|
||||||
// Local maps and vectors clear.
|
|
||||||
graph_output_to_actor_.clear();
|
|
||||||
front_node_to_actor_.clear();
|
|
||||||
copy_actors_.clear();
|
|
||||||
|
|
||||||
MS_LOG(INFO) << "Graph(" << graph_compiler_info.name_ << ") transforms actor begin.";
|
MS_LOG(INFO) << "Graph(" << graph_compiler_info.name_ << ") transforms actor begin.";
|
||||||
if (graph_compiler_info.graphs_.size() == 0) {
|
if (graph_compiler_info.graphs_.size() == 0) {
|
||||||
MS_LOG(EXCEPTION) << "The number of graphs is zero.";
|
MS_LOG(EXCEPTION) << "The number of graphs is zero.";
|
||||||
|
@ -534,6 +529,12 @@ ActorSet *GraphScheduler::Transform(const GraphCompilerInfo &graph_compiler_info
|
||||||
MS_LOG(EXCEPTION) << "The actor set of " << graph_compiler_info.name_ << " is invalid.";
|
MS_LOG(EXCEPTION) << "The actor set of " << graph_compiler_info.name_ << " is invalid.";
|
||||||
}
|
}
|
||||||
MS_LOG(INFO) << "Graph(" << graph_compiler_info.name_ << ") transforms actor end.";
|
MS_LOG(INFO) << "Graph(" << graph_compiler_info.name_ << ") transforms actor end.";
|
||||||
|
|
||||||
|
// Local maps and vectors clear.
|
||||||
|
graph_output_to_actor_.clear();
|
||||||
|
front_node_to_actor_.clear();
|
||||||
|
copy_actors_.clear();
|
||||||
|
|
||||||
return actor_set.get();
|
return actor_set.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -90,6 +90,9 @@ def run_e2e_dump():
|
||||||
if context.get_context("device_target") == "Ascend":
|
if context.get_context("device_target") == "Ascend":
|
||||||
assert len(os.listdir(dump_file_path)) == 5
|
assert len(os.listdir(dump_file_path)) == 5
|
||||||
output_name = "Add.Add-op1.0.0.*.output.0.DefaultFormat.npy"
|
output_name = "Add.Add-op1.0.0.*.output.0.DefaultFormat.npy"
|
||||||
|
elif context.get_context("device_target") == "CPU":
|
||||||
|
assert len(os.listdir(dump_file_path)) == 5
|
||||||
|
output_name = "Add.Add-op3.0.0.*.output.0.DefaultFormat.npy"
|
||||||
else:
|
else:
|
||||||
assert len(os.listdir(dump_file_path)) == 3
|
assert len(os.listdir(dump_file_path)) == 3
|
||||||
output_name = "Add.Add-op3.0.0.*.output.0.DefaultFormat.npy"
|
output_name = "Add.Add-op3.0.0.*.output.0.DefaultFormat.npy"
|
||||||
|
|
Loading…
Reference in New Issue