It is not safe to call expectedLogSets() with a potentially newer configuration than the one from the recovery

This commit is contained in:
Evan Tschannen 2021-03-23 13:21:48 -07:00
parent 43c81e550c
commit 0ea513b503
1 changed files with 8 additions and 12 deletions

View File

@ -1406,30 +1406,26 @@ ACTOR Future<Void> rejoinRequestHandler(Reference<MasterData> self) {
}
}
// Keeps the coordinated state (cstate) updated as the set of recruited tlogs change through recovery.
ACTOR Future<Void> trackTlogRecovery(Reference<MasterData> self,
Reference<AsyncVar<Reference<ILogSystem>>> oldLogSystems,
Future<Void> minRecoveryDuration) {
state Future<Void> rejoinRequests = Never();
state DBRecoveryCount recoverCount = self->cstate.myDBState.recoveryCount + 1;
state DatabaseConfiguration configuration =
self->configuration; // self-configuration can be changed by configurationMonitor so we need a copy
loop {
state DBCoreState newState;
self->logSystem->toCoreState(newState);
newState.recoveryCount = recoverCount;
state Future<Void> changed = self->logSystem->onCoreStateChanged();
if (newState.tLogs[0].tLogWriteAntiQuorum != self->configuration.tLogWriteAntiQuorum ||
newState.tLogs[0].tLogReplicationFactor != self->configuration.tLogReplicationFactor) {
TraceEvent("MasterConfigChanged", self->dbgid)
.setMaxEventLength(11000)
.setMaxFieldLength(10000)
.detail("Config", self->configuration.toString())
.detail("TLogWriteAntiQuorum", newState.tLogs[0].tLogWriteAntiQuorum)
.detail("TLogReplicationFactor", newState.tLogs[0].tLogReplicationFactor);
throw master_recovery_failed();
}
ASSERT(newState.tLogs[0].tLogWriteAntiQuorum == configuration.tLogWriteAntiQuorum &&
newState.tLogs[0].tLogReplicationFactor == configuration.tLogReplicationFactor);
state bool allLogs =
newState.tLogs.size() ==
self->configuration.expectedLogSets(self->primaryDcId.size() ? self->primaryDcId[0] : Optional<Key>());
configuration.expectedLogSets(self->primaryDcId.size() ? self->primaryDcId[0] : Optional<Key>());
state bool finalUpdate = !newState.oldTLogData.size() && allLogs;
wait(self->cstate.write(newState, finalUpdate));
wait(minRecoveryDuration);
@ -1463,7 +1459,7 @@ ACTOR Future<Void> trackTlogRecovery(Reference<MasterData> self,
.trackLatest("MasterRecoveryState");
}
if (newState.oldTLogData.size() && self->configuration.repopulateRegionAntiQuorum > 0 &&
if (newState.oldTLogData.size() && configuration.repopulateRegionAntiQuorum > 0 &&
self->logSystem->remoteStorageRecovered()) {
TraceEvent(SevWarnAlways, "RecruitmentStalled_RemoteStorageRecovered", self->dbgid);
self->recruitmentStalled->set(true);