fix: simulation could buggify a delay such that data distribution incorrectly thinks the queue is not processing unhealthy relocations

This commit is contained in:
Evan Tschannen 2019-02-18 14:57:07 -08:00
parent 8f2af8bed1
commit d492395f84
3 changed files with 3 additions and 1 deletions

View File

@ -2883,7 +2883,7 @@ ACTOR Future<Void> updateReplicasKey(DDTeamCollection* self, Optional<Key> dcId)
TraceEvent("DDUpdatingStalled", self->masterId).detail("DcId", printable(dcId)).detail("ZeroHealthy", self->zeroHealthyTeams->get()).detail("ProcessingUnhealthy", self->processingUnhealthy->get());
wait(self->zeroHealthyTeams->onChange() || self->processingUnhealthy->onChange());
}
wait(delay(FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY, TaskLowPriority)); //After the team trackers wait on the initial failure reaction delay, they yield. We want to make sure every tracker has had the opportunity to send their relocations to the queue.
wait(delay(SERVER_KNOBS->DD_STALL_CHECK_DELAY, TaskLowPriority)); //After the team trackers wait on the initial failure reaction delay, they yield. We want to make sure every tracker has had the opportunity to send their relocations to the queue.
if(!self->zeroHealthyTeams->get() && !self->processingUnhealthy->get()) {
break;
}

View File

@ -150,6 +150,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
init( STORAGE_RECRUITMENT_DELAY, 10.0 );
init( DATA_DISTRIBUTION_LOGGING_INTERVAL, 5.0 );
init( DD_ENABLED_CHECK_DELAY, 1.0 );
init( DD_STALL_CHECK_DELAY, 0.2 ); //Must be larger than the maximum possible value of MAX_BUGGIFIED_DELAY
init( DD_MERGE_COALESCE_DELAY, 120.0 ); if( randomize && BUGGIFY ) DD_MERGE_COALESCE_DELAY = 0.001;
init( STORAGE_METRICS_POLLING_DELAY, 2.0 ); if( randomize && BUGGIFY ) STORAGE_METRICS_POLLING_DELAY = 15.0;
init( STORAGE_METRICS_RANDOM_DELAY, 0.2 );

View File

@ -113,6 +113,7 @@ public:
double STORAGE_RECRUITMENT_DELAY;
double DATA_DISTRIBUTION_LOGGING_INTERVAL;
double DD_ENABLED_CHECK_DELAY;
double DD_STALL_CHECK_DELAY;
double DD_MERGE_COALESCE_DELAY;
double STORAGE_METRICS_POLLING_DELAY;
double STORAGE_METRICS_RANDOM_DELAY;