fix: it is not safe to drop logs supporting the current primary datacenter, because configuring usable_regions down will drop the storage servers in the remote region, leaving you will no remaining logs
This commit is contained in:
parent
0f59dc4086
commit
30b2f85020
|
@ -780,7 +780,7 @@ public:
|
|||
}
|
||||
|
||||
void checkRecoveryStalled() {
|
||||
if(db.serverInfo->get().recoveryState < RecoveryState::RECOVERY_TRANSACTION && db.recoveryStalled ) {
|
||||
if( (db.serverInfo->get().recoveryState == RecoveryState::RECRUITING || db.serverInfo->get().recoveryState == RecoveryState::ACCEPTING_COMMITS || db.serverInfo->get().recoveryState == RecoveryState::ALL_LOGS_RECRUITED) && db.recoveryStalled ) {
|
||||
if(db.config.regions.size() > 1 && clusterControllerDcId.present()) {
|
||||
auto regions = db.config.regions;
|
||||
if(clusterControllerDcId.get() == regions[0].dcId) {
|
||||
|
|
|
@ -590,6 +590,8 @@ struct ILogSystem {
|
|||
|
||||
virtual void toCoreState( DBCoreState& ) = 0;
|
||||
|
||||
virtual bool remoteStorageRecovered() = 0;
|
||||
|
||||
virtual Future<Void> onCoreStateChanged() = 0;
|
||||
// Returns if and when the output of toCoreState() would change (for example, when older logs can be discarded from the state)
|
||||
|
||||
|
|
|
@ -283,14 +283,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
|
|||
}
|
||||
|
||||
newState.oldTLogData.clear();
|
||||
int recoveredCount = 0;
|
||||
if(recoveryComplete.isValid() && recoveryComplete.isReady()) {
|
||||
recoveredCount++;
|
||||
}
|
||||
if(remoteRecoveryComplete.isValid() && remoteRecoveryComplete.isReady()) {
|
||||
recoveredCount++;
|
||||
}
|
||||
if(recoveredCount < 2 - repopulateRegionAntiQuorum) {
|
||||
if(!recoveryComplete.isValid() || !recoveryComplete.isReady() || (repopulateRegionAntiQuorum == 0 && (!remoteRecoveryComplete.isValid() || !remoteRecoveryComplete.isReady()))) {
|
||||
newState.oldTLogData.resize(oldLogData.size());
|
||||
for(int i = 0; i < oldLogData.size(); i++) {
|
||||
for(auto &t : oldLogData[i].tLogs) {
|
||||
|
@ -318,6 +311,10 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
|
|||
newState.logSystemType = logSystemType;
|
||||
}
|
||||
|
||||
virtual bool remoteStorageRecovered() {
|
||||
return remoteRecoveryComplete.isValid() && remoteRecoveryComplete.isReady();
|
||||
}
|
||||
|
||||
virtual Future<Void> onCoreStateChanged() {
|
||||
std::vector<Future<Void>> changes;
|
||||
changes.push_back(Never());
|
||||
|
|
|
@ -1082,12 +1082,16 @@ ACTOR Future<Void> trackTlogRecovery( Reference<MasterData> self, Reference<Asyn
|
|||
.trackLatest(format("%s/MasterRecoveryState", printable(self->dbName).c_str() ).c_str());
|
||||
}
|
||||
|
||||
if(newState.oldTLogData.size() && self->configuration.repopulateRegionAntiQuorum > 0 && self->logSystem->remoteStorageRecovered()) {
|
||||
TraceEvent(SevWarnAlways, "RecruitmentStalled_RemoteStorageRecovered", self->dbgid);
|
||||
self->recruitmentStalled->set(true);
|
||||
}
|
||||
self->registrationTrigger.trigger();
|
||||
|
||||
if(allLogs && remoteRecovered.canBeSet()) {
|
||||
remoteRecovered.send(Void());
|
||||
}
|
||||
|
||||
|
||||
if( finalUpdate ) {
|
||||
oldLogSystems->get()->stopRejoins();
|
||||
rejoinRequests = rejoinRequestHandler(self);
|
||||
|
|
Loading…
Reference in New Issue