Merge pull request #5743 from sfc-gh-satherton/dd-crash-fix
Bug fix: rare crash in DataDistribution
This commit is contained in:
commit
6abeb89f95
|
@ -3096,7 +3096,8 @@ ACTOR Future<Void> printSnapshotTeamsInfo(Reference<DDTeamCollection> self) {
|
|||
|
||||
auto const& keys = self->server_status.getKeys();
|
||||
for (auto const& key : keys) {
|
||||
server_status.emplace(key, self->server_status.get(key));
|
||||
// Add to or update the local server_status map
|
||||
server_status[key] = self->server_status.get(key);
|
||||
}
|
||||
|
||||
TraceEvent("DDPrintSnapshotTeasmInfo", self->distributorId)
|
||||
|
@ -3131,13 +3132,22 @@ ACTOR Future<Void> printSnapshotTeamsInfo(Reference<DDTeamCollection> self) {
|
|||
server = server_info.begin();
|
||||
for (i = 0; i < server_info.size(); i++) {
|
||||
const UID& uid = server->first;
|
||||
TraceEvent("ServerStatus", self->distributorId)
|
||||
.detail("ServerUID", uid)
|
||||
.detail("Healthy", !get(server_status, uid).isUnhealthy())
|
||||
|
||||
TraceEvent e("ServerStatus", self->distributorId);
|
||||
e.detail("ServerUID", uid)
|
||||
.detail("MachineIsValid", server_info[uid]->machine.isValid())
|
||||
.detail("MachineTeamSize",
|
||||
server_info[uid]->machine.isValid() ? server_info[uid]->machine->machineTeams.size() : -1)
|
||||
.detail("Primary", self->primary);
|
||||
|
||||
// ServerStatus might not be known if server was very recently added and storageServerFailureTracker()
|
||||
// has not yet updated self->server_status
|
||||
// If the UID is not found, do not assume the server is healthy or unhealthy
|
||||
auto it = server_status.find(uid);
|
||||
if (it != server_status.end()) {
|
||||
e.detail("Healthy", !it->second.isUnhealthy());
|
||||
}
|
||||
|
||||
server++;
|
||||
if (++traceEventsPrinted % SERVER_KNOBS->DD_TEAMS_INFO_PRINT_YIELD_COUNT == 0) {
|
||||
wait(yield());
|
||||
|
@ -3174,7 +3184,11 @@ ACTOR Future<Void> printSnapshotTeamsInfo(Reference<DDTeamCollection> self) {
|
|||
|
||||
// Healthy machine has at least one healthy server
|
||||
for (auto& server : _machine->serversOnMachine) {
|
||||
if (!get(server_status, server->id).isUnhealthy()) {
|
||||
// ServerStatus might not be known if server was very recently added and
|
||||
// storageServerFailureTracker() has not yet updated self->server_status If the UID is not found, do
|
||||
// not assume the server is healthy
|
||||
auto it = server_status.find(server->id);
|
||||
if (it != server_status.end() && !it->second.isUnhealthy()) {
|
||||
isMachineHealthy = true;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue