forked from mindspore-Ecosystem/mindspore
!32134 [Bugfix] get ckpt failed when call init nccl twice
Merge pull request !32134 from zyli2020/master
This commit is contained in:
commit
6488b0612a
|
@ -74,6 +74,9 @@ class BACKEND_EXPORT CollectiveManager {
|
|||
// Get whether need reinitialize collective communication.
|
||||
bool need_reinit() const { return need_reinit_.load(); }
|
||||
|
||||
// Return collective manager is initialized.
|
||||
bool initialized() const { return inited_.load(); }
|
||||
|
||||
private:
|
||||
CollectiveManager();
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ bool Initialize() {
|
|||
}
|
||||
|
||||
#if ((defined ENABLE_CPU) && (!defined _WIN32))
|
||||
if (cluster::ClusterContext::instance()->initialized()) {
|
||||
if (cluster::ClusterContext::instance()->initialized() && !collective::CollectiveManager::instance()->initialized()) {
|
||||
// Server and Scheduler don't use collective communication library.
|
||||
auto node = cluster::ClusterContext::instance()->node();
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
|
|
Loading…
Reference in New Issue