From 599fcb2e6d1588f0a6cb9c31945ea4f84b0dbf8c Mon Sep 17 00:00:00 2001 From: Meng Xu Date: Tue, 2 Jul 2019 15:58:31 -0700 Subject: [PATCH] Add serverTeamRemover to remove redundant server teams --- fdbserver/DataDistribution.actor.cpp | 101 ++++++++++++++++++++++++++- fdbserver/Knobs.cpp | 4 +- fdbserver/Knobs.h | 5 +- fdbserver/QuietDatabase.actor.cpp | 6 +- 4 files changed, 110 insertions(+), 6 deletions(-) diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 06bf837028..795840a5f3 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -592,6 +592,7 @@ struct DDTeamCollection : ReferenceCounted { Promise addSubsetComplete; Future badTeamRemover; Future redundantMachineTeamRemover; + Future redundantServerTeamRemover; Reference storageServerSet; std::vector forcedEntries, resultEntries; @@ -633,7 +634,7 @@ struct DDTeamCollection : ReferenceCounted { Reference> processingUnhealthy) : cx(cx), distributorId(distributorId), lock(lock), output(output), shardsAffectedByTeamFailure(shardsAffectedByTeamFailure), doBuildTeams(true), teamBuilder(Void()), - badTeamRemover(Void()), redundantMachineTeamRemover(Void()), configuration(configuration), + badTeamRemover(Void()), redundantMachineTeamRemover(Void()), redundantServerTeamRemover(Void()), configuration(configuration), readyToStart(readyToStart), clearHealthyZoneFuture(Void()), checkTeamDelay(delay(SERVER_KNOBS->CHECK_TEAM_DELAY, TaskDataDistribution)), initialFailureReactionDelay( @@ -1626,6 +1627,25 @@ struct DDTeamCollection : ReferenceCounted { return std::pair, int>(retMT, minNumProcessTeams); } + // Find the server team whose members are on the most number of server teams + std::pair, int> getServerTeamWithMostProcessTeams() { + Reference retST; + int maxNumProcessTeams = 0; + + for (auto& t : teams) { + int numProcessTeams = 0; + for (auto& server : t->getServers()) { + numProcessTeams += server->teams.size(); + } + if (numProcessTeams > maxNumProcessTeams) { + maxNumProcessTeams = numProcessTeams; + retST = t; + } + } + + return std::pair, int>(retST, maxNumProcessTeams); + } + int getHealthyMachineTeamCount() { int healthyTeamCount = 0; for (auto mt = machineTeams.begin(); mt != machineTeams.end(); ++mt) { @@ -2264,7 +2284,7 @@ ACTOR Future machineTeamRemover(DDTeamCollection* self) { state int numMachineTeamRemoved = 0; loop { // In case the machineTeamRemover cause problems in production, we can disable it - if (SERVER_KNOBS->TR_FLAG_DISABLE_TEAM_REMOVER) { + if (SERVER_KNOBS->TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER) { return Void(); // Directly return Void() } @@ -2362,6 +2382,79 @@ ACTOR Future machineTeamRemover(DDTeamCollection* self) { } } +// Remove the server team whose members have the most number of process teams +// until the total number of server teams is no larger than the desired number +ACTOR Future serverTeamRemover(DDTeamCollection* self) { + state int numServerTeamRemoved = 0; + loop { + // In case the serverTeamRemover cause problems in production, we can disable it + if (SERVER_KNOBS->TR_FLAG_DISABLE_SERVER_TEAM_REMOVER) { + return Void(); // Directly return Void() + } + + wait(waitUntilHealthy(self)); + + // To avoid removing machine teams too fast, which is unlikely happen though + wait( delay(SERVER_KNOBS->TR_REMOVE_SERVER_TEAM_DELAY) ); + + // Wait for the badTeamRemover() to avoid the potential race between adding the bad team (add the team tracker) + // and remove bad team (cancel the team tracker). + wait(self->badTeamRemover); + + state int healthyServerCount = self->calculateHealthyServerCount(); + // Check if all servers are healthy, if not, we wait for 1 second and loop back. + // Eventually, all servers will become healthy. + if (healthyServerCount != self->server_info.size()) { + continue; + } + + // From this point, all server teams should be healthy, because we wait above + // until processingUnhealthy is done, and all machines are healthy + + // In most cases, all machine teams should be healthy teams at this point. + int desiredServerTeams = SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * healthyServerCount; + int totalSTCount = self->teams.size(); + + if (totalSTCount > desiredServerTeams) { + // Pick the server team whose members are on the most number of server teams, and mark it undesired + state std::pair, int> foundSTInfo = self->getServerTeamWithMostProcessTeams(); + state Reference st = foundSTInfo.first; + state int maxNumProcessTeams = foundSTInfo.second; + ASSERT(st.isValid()); + // The team will be marked as a bad team + bool foundTeam = self->removeTeam(st); + ASSERT(foundTeam == true); + self->addTeam(st->getServers(), true, true); + TEST(true); + + self->doBuildTeams = true; + + if (self->badTeamRemover.isReady()) { + self->badTeamRemover = removeBadTeams(self); + self->addActor.send(self->badTeamRemover); + } + + TraceEvent("ServerTeamRemover") + .detail("ServerTeamToRemove", st->getServerIDsStr()) + .detail("NumProcessTeamsOnTheServerTeam", maxNumProcessTeams) + .detail("CurrentServerTeamNumber", self->teams.size()) + .detail("DesiredTeam", desiredServerTeams); + + numServerTeamRemoved++; + } else { + if (numServerTeamRemoved > 0) { + // Only trace the information when we remove a machine team + TraceEvent("ServerTeamRemoverDone") + .detail("HealthyServerNumber", healthyServerCount) + .detail("CurrentServerTeamNumber", self->teams.size()) + .detail("DesiredServerTeam", desiredServerTeams) + .detail("NumServerTeamRemoved", numServerTeamRemoved); + self->traceTeamCollectionInfo(); + } + } + } +} + // Track a team and issue RelocateShards when the level of degradation changes // A badTeam can be unhealthy or just a redundantTeam removed by machineTeamRemover() or serverTeamRemover() ACTOR Future teamTracker(DDTeamCollection* self, Reference team, bool badTeam, bool redundantTeam) { @@ -3336,6 +3429,10 @@ ACTOR Future dataDistributionTeamCollection( self->redundantMachineTeamRemover = machineTeamRemover(self); self->addActor.send(self->redundantMachineTeamRemover); } + if (self->redundantServerTeamRemover.isReady()) { + self->redundantServerTeamRemover = serverTeamRemover(self); + self->addActor.send(self->redundantServerTeamRemover); + } self->traceTeamCollectionInfo(); if(self->includedDCs.size()) { diff --git a/fdbserver/Knobs.cpp b/fdbserver/Knobs.cpp index 06798a485a..74f095d18a 100644 --- a/fdbserver/Knobs.cpp +++ b/fdbserver/Knobs.cpp @@ -179,8 +179,10 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) { init( DD_ZERO_HEALTHY_TEAM_DELAY, 1.0 ); // TeamRemover - TR_FLAG_DISABLE_TEAM_REMOVER = false; if( randomize && BUGGIFY ) TR_FLAG_DISABLE_TEAM_REMOVER = deterministicRandom()->random01() < 0.1 ? true : false; // false by default. disable the consistency check when it's true + TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER = false; if( randomize && BUGGIFY ) TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER = deterministicRandom()->random01() < 0.1 ? true : false; // false by default. disable the consistency check when it's true init( TR_REMOVE_MACHINE_TEAM_DELAY, 60.0 ); if( randomize && BUGGIFY ) TR_REMOVE_MACHINE_TEAM_DELAY = deterministicRandom()->random01() * 60.0; + TR_FLAG_DISABLE_SERVER_TEAM_REMOVER = false; if( randomize && BUGGIFY ) TR_FLAG_DISABLE_SERVER_TEAM_REMOVER = deterministicRandom()->random01() < 0.1 ? true : false; // false by default. disable the consistency check when it's true + init( TR_REMOVE_SERVER_TEAM_DELAY, 60.0 ); if( randomize && BUGGIFY ) TR_REMOVE_SERVER_TEAM_DELAY = deterministicRandom()->random01() * 60.0; // Redwood Storage Engine init( PREFIX_TREE_IMMEDIATE_KEY_SIZE_LIMIT, 30 ); diff --git a/fdbserver/Knobs.h b/fdbserver/Knobs.h index dab19d5108..e61a3a1c55 100644 --- a/fdbserver/Knobs.h +++ b/fdbserver/Knobs.h @@ -141,9 +141,12 @@ public: double DEBOUNCE_RECRUITING_DELAY; // TeamRemover to remove redundant teams - bool TR_FLAG_DISABLE_TEAM_REMOVER; // disable the teamRemover actor + bool TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER; // disable the machineTeamRemover actor double TR_REMOVE_MACHINE_TEAM_DELAY; // wait for the specified time before try to remove next machine team + bool TR_FLAG_DISABLE_SERVER_TEAM_REMOVER; // disable the serverTeamRemover actor + double TR_REMOVE_SERVER_TEAM_DELAY; // wait for the specified time before try to remove next server team + double DD_FAILURE_TIME; double DD_ZERO_HEALTHY_TEAM_DELAY; diff --git a/fdbserver/QuietDatabase.actor.cpp b/fdbserver/QuietDatabase.actor.cpp index ed3cd43f97..3e983c5db8 100644 --- a/fdbserver/QuietDatabase.actor.cpp +++ b/fdbserver/QuietDatabase.actor.cpp @@ -291,10 +291,12 @@ ACTOR Future getTeamCollectionValid(Database cx, WorkerInterface dataDistr int64_t desiredMachineTeamNumber = boost::lexical_cast(teamCollectionInfoMessage.getValue("DesiredMachineTeams")); int64_t maxMachineTeamNumber = boost::lexical_cast(teamCollectionInfoMessage.getValue("MaxMachineTeams")); + // TODO: Get finer granularity check // Team number is always valid when we disable teamRemover. This avoids false positive in simulation test - if (SERVER_KNOBS->TR_FLAG_DISABLE_TEAM_REMOVER) { + if (SERVER_KNOBS->TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER || SERVER_KNOBS->TR_FLAG_DISABLE_SERVER_TEAM_REMOVER) { TraceEvent("GetTeamCollectionValid") - .detail("KnobsTeamRemoverDisabled", SERVER_KNOBS->TR_FLAG_DISABLE_TEAM_REMOVER); + .detail("KnobsMachineTeamRemoverDisabled", SERVER_KNOBS->TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER) + .detail("KnobsServerTeamRemoverDisabled", SERVER_KNOBS->TR_FLAG_DISABLE_SERVER_TEAM_REMOVER); return true; }