The BACKUP_LOCK_BYTES knob could be buggified to a value that was too low, resulting in backup getting stuck.

This commit is contained in:
A.J. Beamon 2021-09-23 17:03:32 -07:00
parent e28fef6264
commit c2885ab70d
2 changed files with 5 additions and 1 deletions

View File

@ -443,7 +443,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( BACKUP_TIMEOUT, 0.4 );
init( BACKUP_NOOP_POP_DELAY, 5.0 );
init( BACKUP_FILE_BLOCK_BYTES, 1024 * 1024 );
init( BACKUP_LOCK_BYTES, 3e9 ); if(randomize && BUGGIFY) BACKUP_LOCK_BYTES = deterministicRandom()->randomInt(1024, 4096) * 1024;
init( BACKUP_LOCK_BYTES, 3e9 ); if(randomize && BUGGIFY) BACKUP_LOCK_BYTES = deterministicRandom()->randomInt(1024, 4096) * 15 * 1024;
init( BACKUP_UPLOAD_DELAY, 10.0 ); if(randomize && BUGGIFY) BACKUP_UPLOAD_DELAY = deterministicRandom()->random01() * 60;
//Cluster Controller

View File

@ -839,6 +839,10 @@ ACTOR Future<Void> uploadData(BackupData* self) {
// make sure file is saved on version boundary
popVersion = lastVersion;
numMsg = lastVersionIndex;
// If we aren't able to process any messages and the lock is blocking us from
// queuing more, then we are stuck. This could suggest the lock capacity is too small.
ASSERT(numMsg > 0 || self->lock->waiters() == 0);
}
if (((numMsg > 0 || popVersion > lastPopVersion) && self->pulling) || self->pullFinished()) {
TraceEvent("BackupWorkerSave", self->myId)