From 08f28e99f96e9339339364fc9524c8326e06e664 Mon Sep 17 00:00:00 2001 From: Meng Xu Date: Wed, 26 Jun 2019 13:47:45 -0700 Subject: [PATCH] TeamCollection:Test no server or machine has incorrect team number Add test for simulation test which make sure the server team number per server will be no less than the desired_teams_per_server defined in knobs and no larger than the max_teams_per_server. Add similar test for machine teams number per machine as well. --- fdbserver/DataDistribution.actor.cpp | 49 ++++++++++++++++++++++++++++ fdbserver/QuietDatabase.actor.cpp | 19 +++++++++-- 2 files changed, 66 insertions(+), 2 deletions(-) diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 7b29ce0bdf..2f6a9bbcd2 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -1591,6 +1591,34 @@ struct DDTeamCollection : ReferenceCounted { return totalHealthyMachineCount; } + std::pair calculateMinMaxServerTeamNumOnServer() { + int minTeamNumber = std::numeric_limits::max(); + int maxTeamNumber = std::numeric_limits::min(); + for (auto& server : server_info ) { + if ( server.second->teams.size() < minTeamNumber ) { + minTeamNumber = server.second->teams.size(); + } + if ( server.second->teams.size() > maxTeamNumber ) { + maxTeamNumber = server.second->teams.size(); + } + } + return std::make_pair(minTeamNumber, maxTeamNumber); + } + + std::pair calculateMinMaxMachineTeamNumOnMachine() { + int minTeamNumber = std::numeric_limits::max(); + int maxTeamNumber = std::numeric_limits::min(); + for (auto& machine : machine_info) { + if ( machine.second->machineTeams.size() < minTeamNumber ) { + minTeamNumber = machine.second->machineTeams.size(); + } + if ( machine.second->machineTeams.size() > maxTeamNumber ) { + maxTeamNumber = machine.second->machineTeams.size(); + } + } + return std::make_pair(minTeamNumber, maxTeamNumber); + } + // Sanity check bool isServerTeamNumberCorrect(Reference& mt) { int num = 0; @@ -1762,6 +1790,9 @@ struct DDTeamCollection : ReferenceCounted { healthyMachineTeamCount = getHealthyMachineTeamCount(); + std::pair minMaxTeamNumberOnServer = calculateMinMaxServerTeamNumOnServer(); + std::pair minMaxMachineTeamNumberOnMachine = calculateMinMaxMachineTeamNumOnMachine(); + TraceEvent("TeamCollectionInfo", distributorId) .detail("Primary", primary) .detail("AddedTeamNumber", addedTeams) @@ -1775,6 +1806,10 @@ struct DDTeamCollection : ReferenceCounted { .detail("DesiredMachineTeams", desiredMachineTeams) .detail("MaxMachineTeams", maxMachineTeams) .detail("TotalHealthyMachine", totalHealthyMachineCount) + .detail("MinTeamNumberOnServer", minMaxTeamNumberOnServer.first) + .detail("MaxTeamNumberOnServer", minMaxTeamNumberOnServer.second) + .detail("MinMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.first) + .detail("MaxMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.second) .trackLatest("TeamCollectionInfo"); return addedTeams; @@ -1791,6 +1826,9 @@ struct DDTeamCollection : ReferenceCounted { int maxMachineTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * totalHealthyMachineCount; int healthyMachineTeamCount = getHealthyMachineTeamCount(); + std::pair minMaxTeamNumberOnServer = calculateMinMaxServerTeamNumOnServer(); + std::pair minMaxMachineTeamNumberOnMachine = calculateMinMaxMachineTeamNumOnMachine(); + TraceEvent("TeamCollectionInfo", distributorId) .detail("Primary", primary) .detail("AddedTeamNumber", 0) @@ -1804,6 +1842,10 @@ struct DDTeamCollection : ReferenceCounted { .detail("DesiredMachineTeams", desiredMachineTeams) .detail("MaxMachineTeams", maxMachineTeams) .detail("TotalHealthyMachine", totalHealthyMachineCount) + .detail("MinTeamNumberOnServer", minMaxTeamNumberOnServer.first) + .detail("MaxTeamNumberOnServer", minMaxTeamNumberOnServer.second) + .detail("MinMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.first) + .detail("MaxMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.second) .trackLatest("TeamCollectionInfo"); // Debug purpose @@ -1901,6 +1943,9 @@ struct DDTeamCollection : ReferenceCounted { int maxMachineTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * totalHealthyMachineCount; int healthyMachineTeamCount = self->getHealthyMachineTeamCount(); + std::pair minMaxTeamNumberOnServer = self->calculateMinMaxServerTeamNumOnServer(); + std::pair minMaxMachineTeamNumberOnMachine = self->calculateMinMaxMachineTeamNumOnMachine(); + TraceEvent("TeamCollectionInfo", self->distributorId) .detail("Primary", self->primary) .detail("AddedTeamNumber", 0) @@ -1914,6 +1959,10 @@ struct DDTeamCollection : ReferenceCounted { .detail("DesiredMachineTeams", desiredMachineTeams) .detail("MaxMachineTeams", maxMachineTeams) .detail("TotalHealthyMachine", totalHealthyMachineCount) + .detail("MinTeamNumberOnServer", minMaxTeamNumberOnServer.first) + .detail("MaxTeamNumberOnServer", minMaxTeamNumberOnServer.second) + .detail("MinMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.first) + .detail("MaxMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.second) .trackLatest("TeamCollectionInfo"); } } diff --git a/fdbserver/QuietDatabase.actor.cpp b/fdbserver/QuietDatabase.actor.cpp index b5be5335dc..126779c4bf 100644 --- a/fdbserver/QuietDatabase.actor.cpp +++ b/fdbserver/QuietDatabase.actor.cpp @@ -289,6 +289,11 @@ ACTOR Future getTeamCollectionValid(Database cx, WorkerInterface dataDistr int64_t healthyMachineTeamCount = boost::lexical_cast(teamCollectionInfoMessage.getValue("CurrentHealthyMachineTeamNumber")); int64_t desiredMachineTeamNumber = boost::lexical_cast(teamCollectionInfoMessage.getValue("DesiredMachineTeams")); int64_t maxMachineTeamNumber = boost::lexical_cast(teamCollectionInfoMessage.getValue("MaxMachineTeams")); + + int64_t minServerTeamOnServer = boost::lexical_cast(teamCollectionInfoMessage.getValue("MinTeamNumberOnServer")); + int64_t maxServerTeamOnServer = boost::lexical_cast(teamCollectionInfoMessage.getValue("MaxTeamNumberOnServer")); + int64_t minMachineTeamOnMachine = boost::lexical_cast(teamCollectionInfoMessage.getValue("MinMachineTeamNumberOnMachine")); + int64_t maxMachineTeamOnMachine = boost::lexical_cast(teamCollectionInfoMessage.getValue("MaxMachineTeamNumberOnMachine")); // Team number is always valid when we disable teamRemover. This avoids false positive in simulation test if (SERVER_KNOBS->TR_FLAG_DISABLE_TEAM_REMOVER) { @@ -299,7 +304,11 @@ ACTOR Future getTeamCollectionValid(Database cx, WorkerInterface dataDistr // The if condition should be consistent with the condition in teamRemover() that decides // if redundant teams exist. - if (healthyMachineTeamCount > desiredMachineTeamNumber) { + if (healthyMachineTeamCount > desiredMachineTeamNumber || + minServerTeamOnServer < SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER || + minMachineTeamOnMachine < SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER || + maxServerTeamOnServer > SERVER_KNOBS->MAX_TEAMS_PER_SERVER || + maxMachineTeamOnMachine > SERVER_KNOBS->MAX_TEAMS_PER_SERVER) { TraceEvent("GetTeamCollectionValid") .detail("CurrentTeamNumber", currentTeamNumber) .detail("DesiredTeamNumber", desiredTeamNumber) @@ -307,7 +316,13 @@ ACTOR Future getTeamCollectionValid(Database cx, WorkerInterface dataDistr .detail("CurrentHealthyMachineTeamNumber", healthyMachineTeamCount) .detail("DesiredMachineTeams", desiredMachineTeamNumber) .detail("CurrentMachineTeamNumber", currentMachineTeamNumber) - .detail("MaxMachineTeams", maxMachineTeamNumber); + .detail("MaxMachineTeams", maxMachineTeamNumber) + .detail("MinTeamNumberOnServer", minServerTeamOnServer) + .detail("MaxTeamNumberOnServer", maxServerTeamOnServer) + .detail("MinMachineTeamNumberOnMachine", minMachineTeamOnMachine) + .detail("MaxMachineTeamNumberOnMachine", maxMachineTeamOnMachine) + .detail("DesiredTeamsPerServer", SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER) + .detail("MaxTeamsPerServer", SERVER_KNOBS->MAX_TEAMS_PER_SERVER); return false; } else { return true;