!32398 bugfix: recovery fail when kill and pull worker twice

Merge pull request !32398 from zyli2020/worker_failover_bp
This commit is contained in:
i-robot 2022-04-01 00:43:03 +00:00 committed by Gitee
commit d39a2e45f0
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
2 changed files with 2 additions and 0 deletions

View File

@ -277,6 +277,7 @@ bool CollectiveManager::Finalize() {
MS_LOG(WARNING) << "Failed to finalize device communication library.";
}
inited_ = false;
finalized_ = true;
return true;
};

View File

@ -29,6 +29,7 @@ bool NvidiaCollectiveCommLib::Initialize(uint32_t global_rank, uint32_t global_r
global_rank_id_ = global_rank;
global_rank_size_ = global_rank_size;
initialized_ = true;
finalized_ = false;
return true;
}