From 0ea513b503cf992caa8ab4944308ae6d4554d0d4 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Tue, 23 Mar 2021 13:21:48 -0700 Subject: [PATCH] It is not safe to call expectedLogSets() with a potentially newer configuration than the one from the recovery --- fdbserver/masterserver.actor.cpp | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/fdbserver/masterserver.actor.cpp b/fdbserver/masterserver.actor.cpp index 4edf4faee6..5fbf5bc2de 100644 --- a/fdbserver/masterserver.actor.cpp +++ b/fdbserver/masterserver.actor.cpp @@ -1406,30 +1406,26 @@ ACTOR Future rejoinRequestHandler(Reference self) { } } +// Keeps the coordinated state (cstate) updated as the set of recruited tlogs change through recovery. ACTOR Future trackTlogRecovery(Reference self, Reference>> oldLogSystems, Future minRecoveryDuration) { state Future rejoinRequests = Never(); state DBRecoveryCount recoverCount = self->cstate.myDBState.recoveryCount + 1; + state DatabaseConfiguration configuration = + self->configuration; // self-configuration can be changed by configurationMonitor so we need a copy loop { state DBCoreState newState; self->logSystem->toCoreState(newState); newState.recoveryCount = recoverCount; state Future changed = self->logSystem->onCoreStateChanged(); - if (newState.tLogs[0].tLogWriteAntiQuorum != self->configuration.tLogWriteAntiQuorum || - newState.tLogs[0].tLogReplicationFactor != self->configuration.tLogReplicationFactor) { - TraceEvent("MasterConfigChanged", self->dbgid) - .setMaxEventLength(11000) - .setMaxFieldLength(10000) - .detail("Config", self->configuration.toString()) - .detail("TLogWriteAntiQuorum", newState.tLogs[0].tLogWriteAntiQuorum) - .detail("TLogReplicationFactor", newState.tLogs[0].tLogReplicationFactor); - throw master_recovery_failed(); - } + + ASSERT(newState.tLogs[0].tLogWriteAntiQuorum == configuration.tLogWriteAntiQuorum && + newState.tLogs[0].tLogReplicationFactor == configuration.tLogReplicationFactor); state bool allLogs = newState.tLogs.size() == - self->configuration.expectedLogSets(self->primaryDcId.size() ? self->primaryDcId[0] : Optional()); + configuration.expectedLogSets(self->primaryDcId.size() ? self->primaryDcId[0] : Optional()); state bool finalUpdate = !newState.oldTLogData.size() && allLogs; wait(self->cstate.write(newState, finalUpdate)); wait(minRecoveryDuration); @@ -1463,7 +1459,7 @@ ACTOR Future trackTlogRecovery(Reference self, .trackLatest("MasterRecoveryState"); } - if (newState.oldTLogData.size() && self->configuration.repopulateRegionAntiQuorum > 0 && + if (newState.oldTLogData.size() && configuration.repopulateRegionAntiQuorum > 0 && self->logSystem->remoteStorageRecovered()) { TraceEvent(SevWarnAlways, "RecruitmentStalled_RemoteStorageRecovered", self->dbgid); self->recruitmentStalled->set(true);