fix: give time to detect failed servers before building teams
This commit is contained in:
parent
0bc7274d0e
commit
4903df5ce9
|
@ -533,6 +533,7 @@ struct DDTeamCollection {
|
|||
bool primary;
|
||||
Reference<AsyncVar<bool>> processingUnhealthy;
|
||||
Future<Void> readyToStart;
|
||||
Future<Void> checkTeamDelay;
|
||||
|
||||
DDTeamCollection(
|
||||
Database const& cx,
|
||||
|
@ -547,8 +548,8 @@ struct DDTeamCollection {
|
|||
Future<Void> readyToStart, Reference<AsyncVar<bool>> zeroHealthyTeams, bool primary,
|
||||
Reference<AsyncVar<bool>> processingUnhealthy)
|
||||
:cx(cx), masterId(masterId), lock(lock), output(output), shardsAffectedByTeamFailure(shardsAffectedByTeamFailure), doBuildTeams( true ), teamBuilder( Void() ),
|
||||
configuration(configuration), serverChanges(serverChanges), readyToStart(readyToStart),
|
||||
initialFailureReactionDelay( delayed( readyToStart, BUGGIFY ? 0 : SERVER_KNOBS->INITIAL_FAILURE_REACTION_DELAY, TaskDataDistribution ) ), healthyTeamCount( 0 ),
|
||||
configuration(configuration), serverChanges(serverChanges), readyToStart(readyToStart), checkTeamDelay( delay( SERVER_KNOBS->CHECK_TEAM_DELAY, TaskDataDistribution) ),
|
||||
initialFailureReactionDelay( delayed( readyToStart, SERVER_KNOBS->INITIAL_FAILURE_REACTION_DELAY, TaskDataDistribution ) ), healthyTeamCount( 0 ),
|
||||
initializationDoneActor(logOnCompletion(readyToStart && initialFailureReactionDelay, this)), optimalTeamCount( 0 ), recruitingStream(0), restartRecruiting( SERVER_KNOBS->DEBOUNCE_RECRUITING_DELAY ),
|
||||
unhealthyServers(0), includedDCs(includedDCs), otherTrackedDCs(otherTrackedDCs), zeroHealthyTeams(zeroHealthyTeams), zeroOptimalTeams(true), primary(primary), processingUnhealthy(processingUnhealthy)
|
||||
{
|
||||
|
@ -586,6 +587,7 @@ struct DDTeamCollection {
|
|||
ACTOR Future<Void> checkBuildTeams( DDTeamCollection* self ) {
|
||||
state Promise<Void> restart;
|
||||
|
||||
Void _ = wait( self->checkTeamDelay );
|
||||
while( !self->teamBuilder.isReady() )
|
||||
Void _ = wait( self->teamBuilder );
|
||||
|
||||
|
|
|
@ -131,6 +131,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
|
|||
init( METRIC_DELAY, 0.1 ); if( randomize && BUGGIFY ) METRIC_DELAY = 1.0;
|
||||
init( ALL_DATA_REMOVED_DELAY, 1.0 );
|
||||
init( INITIAL_FAILURE_REACTION_DELAY, 30.0 ); if( randomize && BUGGIFY ) INITIAL_FAILURE_REACTION_DELAY = 0.0;
|
||||
init( CHECK_TEAM_DELAY, 30.0 );
|
||||
init( LOG_ON_COMPLETION_DELAY, DD_QUEUE_LOGGING_INTERVAL );
|
||||
init( BEST_TEAM_MAX_TEAM_TRIES, 10 );
|
||||
init( BEST_TEAM_OPTION_COUNT, 4 );
|
||||
|
|
|
@ -96,6 +96,7 @@ public:
|
|||
double METRIC_DELAY;
|
||||
double ALL_DATA_REMOVED_DELAY;
|
||||
double INITIAL_FAILURE_REACTION_DELAY;
|
||||
double CHECK_TEAM_DELAY;
|
||||
double LOG_ON_COMPLETION_DELAY;
|
||||
int BEST_TEAM_MAX_TEAM_TRIES;
|
||||
int BEST_TEAM_OPTION_COUNT;
|
||||
|
|
Loading…
Reference in New Issue