Merge pull request #4548 from sfc-gh-etschannen/fix-master-config-change

Do not call expectedLogSets() with the wrong configuration
This commit is contained in:
Evan Tschannen 2021-03-23 18:31:08 -07:00 committed by GitHub
commit e8ed192647
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 8 additions and 12 deletions

View File

@ -1406,30 +1406,26 @@ ACTOR Future<Void> rejoinRequestHandler(Reference<MasterData> self) {
}
}
// Keeps the coordinated state (cstate) updated as the set of recruited tlogs change through recovery.
ACTOR Future<Void> trackTlogRecovery(Reference<MasterData> self,
Reference<AsyncVar<Reference<ILogSystem>>> oldLogSystems,
Future<Void> minRecoveryDuration) {
state Future<Void> rejoinRequests = Never();
state DBRecoveryCount recoverCount = self->cstate.myDBState.recoveryCount + 1;
state DatabaseConfiguration configuration =
self->configuration; // self-configuration can be changed by configurationMonitor so we need a copy
loop {
state DBCoreState newState;
self->logSystem->toCoreState(newState);
newState.recoveryCount = recoverCount;
state Future<Void> changed = self->logSystem->onCoreStateChanged();
if (newState.tLogs[0].tLogWriteAntiQuorum != self->configuration.tLogWriteAntiQuorum ||
newState.tLogs[0].tLogReplicationFactor != self->configuration.tLogReplicationFactor) {
TraceEvent("MasterConfigChanged", self->dbgid)
.setMaxEventLength(11000)
.setMaxFieldLength(10000)
.detail("Config", self->configuration.toString())
.detail("TLogWriteAntiQuorum", newState.tLogs[0].tLogWriteAntiQuorum)
.detail("TLogReplicationFactor", newState.tLogs[0].tLogReplicationFactor);
throw master_recovery_failed();
}
ASSERT(newState.tLogs[0].tLogWriteAntiQuorum == configuration.tLogWriteAntiQuorum &&
newState.tLogs[0].tLogReplicationFactor == configuration.tLogReplicationFactor);
state bool allLogs =
newState.tLogs.size() ==
self->configuration.expectedLogSets(self->primaryDcId.size() ? self->primaryDcId[0] : Optional<Key>());
configuration.expectedLogSets(self->primaryDcId.size() ? self->primaryDcId[0] : Optional<Key>());
state bool finalUpdate = !newState.oldTLogData.size() && allLogs;
wait(self->cstate.write(newState, finalUpdate));
wait(minRecoveryDuration);
@ -1463,7 +1459,7 @@ ACTOR Future<Void> trackTlogRecovery(Reference<MasterData> self,
.trackLatest("MasterRecoveryState");
}
if (newState.oldTLogData.size() && self->configuration.repopulateRegionAntiQuorum > 0 &&
if (newState.oldTLogData.size() && configuration.repopulateRegionAntiQuorum > 0 &&
self->logSystem->remoteStorageRecovered()) {
TraceEvent(SevWarnAlways, "RecruitmentStalled_RemoteStorageRecovered", self->dbgid);
self->recruitmentStalled->set(true);