Add delay for master to recruit backup workers
This delay is to ensure old epoch's backup workers can save their progress in the database. Otherwise, the new master could attempts to recruit backup workers for the old epoch on version ranges that have already been popped. As a result, the logs will lose data.
This commit is contained in:
parent
fe6b4a4398
commit
80d3fa1222
|
@ -375,6 +375,8 @@ ACTOR Future<Void> saveProgress(BackupData* self, Version backupVersion) {
|
|||
|
||||
loop {
|
||||
try {
|
||||
// It's critical to save progress immediately so that after a master
|
||||
// recovery, the new master can know the progress so far.
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
|
@ -469,7 +471,6 @@ ACTOR Future<Void> saveMutationsToFile(BackupData* self, Version popVersion, int
|
|||
it->second.lastSavedVersion, popVersion + 1, blockSize, self->tag.id, self->totalTags));
|
||||
it++;
|
||||
}
|
||||
ASSERT(!activeUids.empty());
|
||||
|
||||
keyRangeMap.coalesce(allKeys);
|
||||
wait(waitForAll(logFileFutures));
|
||||
|
|
|
@ -355,6 +355,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
|||
init( PROVISIONAL_START_DELAY, 1.0 );
|
||||
init( PROVISIONAL_MAX_DELAY, 60.0 );
|
||||
init( PROVISIONAL_DELAY_GROWTH, 1.5 );
|
||||
init( SECONDS_BEFORE_RECRUIT_BACKUP_WORKER, 4.0 );
|
||||
|
||||
// Resolver
|
||||
init( SAMPLE_OFFSET_PER_KEY, 100 );
|
||||
|
|
|
@ -292,6 +292,7 @@ public:
|
|||
double PROVISIONAL_START_DELAY;
|
||||
double PROVISIONAL_DELAY_GROWTH;
|
||||
double PROVISIONAL_MAX_DELAY;
|
||||
double SECONDS_BEFORE_RECRUIT_BACKUP_WORKER;
|
||||
|
||||
// Resolver
|
||||
int64_t KEY_BYTES_PER_SAMPLE;
|
||||
|
|
|
@ -1241,6 +1241,9 @@ ACTOR Future<Void> configurationMonitor(Reference<MasterData> self, Database cx)
|
|||
ACTOR static Future<Void> recruitBackupWorkers(Reference<MasterData> self, Database cx) {
|
||||
ASSERT(self->backupWorkers.size() > 0);
|
||||
|
||||
// Avoid race between a backup worker's save progress and the reads below.
|
||||
wait(delay(SERVER_KNOBS->SECONDS_BEFORE_RECRUIT_BACKUP_WORKER));
|
||||
|
||||
state LogEpoch epoch = self->cstate.myDBState.recoveryCount;
|
||||
state Reference<BackupProgress> backupProgress(
|
||||
new BackupProgress(self->dbgid, self->logSystem->getOldEpochTagsVersionsInfo()));
|
||||
|
|
Loading…
Reference in New Issue