TeamCollection:Test no server or machine has incorrect team number

Add test for simulation test which make sure the server team number
per server will be no less than the desired_teams_per_server defined
in knobs and no larger than the max_teams_per_server.

Add similar test for machine teams number per machine as well.
This commit is contained in:
Meng Xu 2019-06-26 13:47:45 -07:00
parent 9ff1b06484
commit 08f28e99f9
2 changed files with 66 additions and 2 deletions

View File

@ -1591,6 +1591,34 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
return totalHealthyMachineCount;
}
std::pair<int, int> calculateMinMaxServerTeamNumOnServer() {
int minTeamNumber = std::numeric_limits<int>::max();
int maxTeamNumber = std::numeric_limits<int>::min();
for (auto& server : server_info ) {
if ( server.second->teams.size() < minTeamNumber ) {
minTeamNumber = server.second->teams.size();
}
if ( server.second->teams.size() > maxTeamNumber ) {
maxTeamNumber = server.second->teams.size();
}
}
return std::make_pair(minTeamNumber, maxTeamNumber);
}
std::pair<int, int> calculateMinMaxMachineTeamNumOnMachine() {
int minTeamNumber = std::numeric_limits<int>::max();
int maxTeamNumber = std::numeric_limits<int>::min();
for (auto& machine : machine_info) {
if ( machine.second->machineTeams.size() < minTeamNumber ) {
minTeamNumber = machine.second->machineTeams.size();
}
if ( machine.second->machineTeams.size() > maxTeamNumber ) {
maxTeamNumber = machine.second->machineTeams.size();
}
}
return std::make_pair(minTeamNumber, maxTeamNumber);
}
// Sanity check
bool isServerTeamNumberCorrect(Reference<TCMachineTeamInfo>& mt) {
int num = 0;
@ -1762,6 +1790,9 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
healthyMachineTeamCount = getHealthyMachineTeamCount();
std::pair<int, int> minMaxTeamNumberOnServer = calculateMinMaxServerTeamNumOnServer();
std::pair<int, int> minMaxMachineTeamNumberOnMachine = calculateMinMaxMachineTeamNumOnMachine();
TraceEvent("TeamCollectionInfo", distributorId)
.detail("Primary", primary)
.detail("AddedTeamNumber", addedTeams)
@ -1775,6 +1806,10 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
.detail("DesiredMachineTeams", desiredMachineTeams)
.detail("MaxMachineTeams", maxMachineTeams)
.detail("TotalHealthyMachine", totalHealthyMachineCount)
.detail("MinTeamNumberOnServer", minMaxTeamNumberOnServer.first)
.detail("MaxTeamNumberOnServer", minMaxTeamNumberOnServer.second)
.detail("MinMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.first)
.detail("MaxMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.second)
.trackLatest("TeamCollectionInfo");
return addedTeams;
@ -1791,6 +1826,9 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
int maxMachineTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * totalHealthyMachineCount;
int healthyMachineTeamCount = getHealthyMachineTeamCount();
std::pair<int, int> minMaxTeamNumberOnServer = calculateMinMaxServerTeamNumOnServer();
std::pair<int, int> minMaxMachineTeamNumberOnMachine = calculateMinMaxMachineTeamNumOnMachine();
TraceEvent("TeamCollectionInfo", distributorId)
.detail("Primary", primary)
.detail("AddedTeamNumber", 0)
@ -1804,6 +1842,10 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
.detail("DesiredMachineTeams", desiredMachineTeams)
.detail("MaxMachineTeams", maxMachineTeams)
.detail("TotalHealthyMachine", totalHealthyMachineCount)
.detail("MinTeamNumberOnServer", minMaxTeamNumberOnServer.first)
.detail("MaxTeamNumberOnServer", minMaxTeamNumberOnServer.second)
.detail("MinMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.first)
.detail("MaxMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.second)
.trackLatest("TeamCollectionInfo");
// Debug purpose
@ -1901,6 +1943,9 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
int maxMachineTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * totalHealthyMachineCount;
int healthyMachineTeamCount = self->getHealthyMachineTeamCount();
std::pair<int, int> minMaxTeamNumberOnServer = self->calculateMinMaxServerTeamNumOnServer();
std::pair<int, int> minMaxMachineTeamNumberOnMachine = self->calculateMinMaxMachineTeamNumOnMachine();
TraceEvent("TeamCollectionInfo", self->distributorId)
.detail("Primary", self->primary)
.detail("AddedTeamNumber", 0)
@ -1914,6 +1959,10 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
.detail("DesiredMachineTeams", desiredMachineTeams)
.detail("MaxMachineTeams", maxMachineTeams)
.detail("TotalHealthyMachine", totalHealthyMachineCount)
.detail("MinTeamNumberOnServer", minMaxTeamNumberOnServer.first)
.detail("MaxTeamNumberOnServer", minMaxTeamNumberOnServer.second)
.detail("MinMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.first)
.detail("MaxMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.second)
.trackLatest("TeamCollectionInfo");
}
}

View File

@ -289,6 +289,11 @@ ACTOR Future<bool> getTeamCollectionValid(Database cx, WorkerInterface dataDistr
int64_t healthyMachineTeamCount = boost::lexical_cast<int64_t>(teamCollectionInfoMessage.getValue("CurrentHealthyMachineTeamNumber"));
int64_t desiredMachineTeamNumber = boost::lexical_cast<int64_t>(teamCollectionInfoMessage.getValue("DesiredMachineTeams"));
int64_t maxMachineTeamNumber = boost::lexical_cast<int64_t>(teamCollectionInfoMessage.getValue("MaxMachineTeams"));
int64_t minServerTeamOnServer = boost::lexical_cast<int64_t>(teamCollectionInfoMessage.getValue("MinTeamNumberOnServer"));
int64_t maxServerTeamOnServer = boost::lexical_cast<int64_t>(teamCollectionInfoMessage.getValue("MaxTeamNumberOnServer"));
int64_t minMachineTeamOnMachine = boost::lexical_cast<int64_t>(teamCollectionInfoMessage.getValue("MinMachineTeamNumberOnMachine"));
int64_t maxMachineTeamOnMachine = boost::lexical_cast<int64_t>(teamCollectionInfoMessage.getValue("MaxMachineTeamNumberOnMachine"));
// Team number is always valid when we disable teamRemover. This avoids false positive in simulation test
if (SERVER_KNOBS->TR_FLAG_DISABLE_TEAM_REMOVER) {
@ -299,7 +304,11 @@ ACTOR Future<bool> getTeamCollectionValid(Database cx, WorkerInterface dataDistr
// The if condition should be consistent with the condition in teamRemover() that decides
// if redundant teams exist.
if (healthyMachineTeamCount > desiredMachineTeamNumber) {
if (healthyMachineTeamCount > desiredMachineTeamNumber ||
minServerTeamOnServer < SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER ||
minMachineTeamOnMachine < SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER ||
maxServerTeamOnServer > SERVER_KNOBS->MAX_TEAMS_PER_SERVER ||
maxMachineTeamOnMachine > SERVER_KNOBS->MAX_TEAMS_PER_SERVER) {
TraceEvent("GetTeamCollectionValid")
.detail("CurrentTeamNumber", currentTeamNumber)
.detail("DesiredTeamNumber", desiredTeamNumber)
@ -307,7 +316,13 @@ ACTOR Future<bool> getTeamCollectionValid(Database cx, WorkerInterface dataDistr
.detail("CurrentHealthyMachineTeamNumber", healthyMachineTeamCount)
.detail("DesiredMachineTeams", desiredMachineTeamNumber)
.detail("CurrentMachineTeamNumber", currentMachineTeamNumber)
.detail("MaxMachineTeams", maxMachineTeamNumber);
.detail("MaxMachineTeams", maxMachineTeamNumber)
.detail("MinTeamNumberOnServer", minServerTeamOnServer)
.detail("MaxTeamNumberOnServer", maxServerTeamOnServer)
.detail("MinMachineTeamNumberOnMachine", minMachineTeamOnMachine)
.detail("MaxMachineTeamNumberOnMachine", maxMachineTeamOnMachine)
.detail("DesiredTeamsPerServer", SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER)
.detail("MaxTeamsPerServer", SERVER_KNOBS->MAX_TEAMS_PER_SERVER);
return false;
} else {
return true;