forked from mindspore-Ecosystem/mindspore
!32398 bugfix: recovery fail when kill and pull worker twice
Merge pull request !32398 from zyli2020/worker_failover_bp
This commit is contained in:
commit
d39a2e45f0
|
@ -277,6 +277,7 @@ bool CollectiveManager::Finalize() {
|
|||
MS_LOG(WARNING) << "Failed to finalize device communication library.";
|
||||
}
|
||||
|
||||
inited_ = false;
|
||||
finalized_ = true;
|
||||
return true;
|
||||
};
|
||||
|
|
|
@ -29,6 +29,7 @@ bool NvidiaCollectiveCommLib::Initialize(uint32_t global_rank, uint32_t global_r
|
|||
global_rank_id_ = global_rank;
|
||||
global_rank_size_ = global_rank_size;
|
||||
initialized_ = true;
|
||||
finalized_ = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue