From d622cb1f6ed74eb3d223079cdeb47b08dacd86f1 Mon Sep 17 00:00:00 2001 From: Balachandar Namasivayam Date: Wed, 12 Sep 2018 18:29:49 -0700 Subject: [PATCH] When the cluster is configured from fearless setup to usable_regions=1, master goes into a loop changing team priority . Fix this issue. --- fdbserver/DataDistribution.actor.cpp | 35 +++++++++++++++------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index b477472a52..5469ecb972 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -1373,18 +1373,8 @@ ACTOR Future teamTracker( DDTeamCollection *self, Reference te state bool lastHealthy = team->isHealthy(); state bool lastOptimal = team->isOptimal() && lastHealthy; state bool lastWrongConfiguration = team->isWrongConfiguration(); - - if(lastHealthy) { - self->healthyTeamCount++; - self->zeroHealthyTeams->set(false); - } - - if(lastOptimal) { - self->optimalTeamCount++; - self->zeroOptimalTeams.set(false); - } - state bool lastZeroHealthy = self->zeroHealthyTeams->get(); + state bool firstCheck = true; Void _ = wait( yield() ); TraceEvent("TeamTrackerStarting", self->masterId).detail("Reason", "Initial wait complete (sc)").detail("Team", team->getDesc()); @@ -1420,10 +1410,23 @@ ACTOR Future teamTracker( DDTeamCollection *self, Reference te change.push_back( self->zeroHealthyTeams->onChange() ); bool healthy = self->satisfiesPolicy(team->servers) && !anyUndesired && team->getServerIDs().size() == self->configuration.storageTeamSize && serversLeft == self->configuration.storageTeamSize; + bool optimal = team->isOptimal() && healthy; bool recheck = !healthy && (lastReady != self->initialFailureReactionDelay.isReady() || (lastZeroHealthy && !self->zeroHealthyTeams->get())); lastReady = self->initialFailureReactionDelay.isReady(); lastZeroHealthy = self->zeroHealthyTeams->get(); + if (firstCheck) { + if (healthy) { + self->healthyTeamCount++; + self->zeroHealthyTeams->set(false); + } + + if (optimal) { + self->optimalTeamCount++; + self->zeroOptimalTeams.set(false); + } + } + if( serversLeft != lastServersLeft || anyUndesired != lastAnyUndesired || anyWrongConfiguration != lastWrongConfiguration || wrongSize || recheck ) { TraceEvent("TeamHealthChanged", self->masterId) .detail("Team", team->getDesc()).detail("ServersLeft", serversLeft) @@ -1433,17 +1436,15 @@ ACTOR Future teamTracker( DDTeamCollection *self, Reference te team->setHealthy( healthy ); // Unhealthy teams won't be chosen by bestTeam team->setWrongConfiguration( anyWrongConfiguration ); - bool optimal = team->isOptimal() && healthy; - if( optimal != lastOptimal ) { - lastOptimal = optimal; + if( !firstCheck && optimal != lastOptimal ) { self->optimalTeamCount += optimal ? 1 : -1; ASSERT( self->optimalTeamCount >= 0 ); self->zeroOptimalTeams.set(self->optimalTeamCount == 0); } + lastOptimal = optimal; - if( lastHealthy != healthy ) { - lastHealthy = healthy; + if( !firstCheck && lastHealthy != healthy ) { self->healthyTeamCount += healthy ? 1 : -1; ASSERT( self->healthyTeamCount >= 0 ); @@ -1459,6 +1460,7 @@ ACTOR Future teamTracker( DDTeamCollection *self, Reference te .detail("Optimal", optimal) .detail("OptimalTeamCount", self->optimalTeamCount); } + lastHealthy = healthy; lastServersLeft = serversLeft; lastAnyUndesired = anyUndesired; @@ -1543,6 +1545,7 @@ ACTOR Future teamTracker( DDTeamCollection *self, Reference te } } + firstCheck = false; // Wait for any of the machines to change status Void _ = wait( quorum( change, 1 ) ); Void _ = wait( yield() );