Merge pull request #5743 from sfc-gh-satherton/dd-crash-fix

Bug fix: rare crash in DataDistribution
This commit is contained in:
Steve Atherton 2021-10-11 10:30:36 -07:00 committed by GitHub
commit 6abeb89f95
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 19 additions and 5 deletions

View File

@ -3096,7 +3096,8 @@ ACTOR Future<Void> printSnapshotTeamsInfo(Reference<DDTeamCollection> self) {
auto const& keys = self->server_status.getKeys();
for (auto const& key : keys) {
server_status.emplace(key, self->server_status.get(key));
// Add to or update the local server_status map
server_status[key] = self->server_status.get(key);
}
TraceEvent("DDPrintSnapshotTeasmInfo", self->distributorId)
@ -3131,13 +3132,22 @@ ACTOR Future<Void> printSnapshotTeamsInfo(Reference<DDTeamCollection> self) {
server = server_info.begin();
for (i = 0; i < server_info.size(); i++) {
const UID& uid = server->first;
TraceEvent("ServerStatus", self->distributorId)
.detail("ServerUID", uid)
.detail("Healthy", !get(server_status, uid).isUnhealthy())
TraceEvent e("ServerStatus", self->distributorId);
e.detail("ServerUID", uid)
.detail("MachineIsValid", server_info[uid]->machine.isValid())
.detail("MachineTeamSize",
server_info[uid]->machine.isValid() ? server_info[uid]->machine->machineTeams.size() : -1)
.detail("Primary", self->primary);
// ServerStatus might not be known if server was very recently added and storageServerFailureTracker()
// has not yet updated self->server_status
// If the UID is not found, do not assume the server is healthy or unhealthy
auto it = server_status.find(uid);
if (it != server_status.end()) {
e.detail("Healthy", !it->second.isUnhealthy());
}
server++;
if (++traceEventsPrinted % SERVER_KNOBS->DD_TEAMS_INFO_PRINT_YIELD_COUNT == 0) {
wait(yield());
@ -3174,7 +3184,11 @@ ACTOR Future<Void> printSnapshotTeamsInfo(Reference<DDTeamCollection> self) {
// Healthy machine has at least one healthy server
for (auto& server : _machine->serversOnMachine) {
if (!get(server_status, server->id).isUnhealthy()) {
// ServerStatus might not be known if server was very recently added and
// storageServerFailureTracker() has not yet updated self->server_status If the UID is not found, do
// not assume the server is healthy
auto it = server_status.find(server->id);
if (it != server_status.end() && !it->second.isUnhealthy()) {
isMachineHealthy = true;
}
}