Add new criteria to DD's GetTeamRequest that allow you to require shards be present on the team and that the team have a minimum free ratio. This avoids scenarios where the team chosen when processing the request is later rejected by the requestor, causing rebalancing movements to get stuck.
This commit is contained in:
parent
8cfc032929
commit
c164acb88d
|
@ -773,7 +773,10 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
break;
|
||||
}
|
||||
}
|
||||
if(found && teamList[j]->isHealthy()) {
|
||||
if(found && teamList[j]->isHealthy() &&
|
||||
(!req.requiresAssignedData || self->shardsAffectedByTeamFailure->getShardsFor(ShardsAffectedByTeamFailure::Team(teamList[j]->getServerIDs(), self->primary)).size() > 0) &&
|
||||
teamList[j]->getMinFreeSpaceRatio() >= req.minFreeSpaceRatio)
|
||||
{
|
||||
req.reply.send( teamList[j] );
|
||||
return Void();
|
||||
}
|
||||
|
@ -784,7 +787,11 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
if( req.wantsTrueBest ) {
|
||||
ASSERT( !bestOption.present() );
|
||||
for( int i = 0; i < self->teams.size(); i++ ) {
|
||||
if( self->teams[i]->isHealthy() && (!req.preferLowerUtilization || self->teams[i]->hasHealthyFreeSpace()) ) {
|
||||
if (self->teams[i]->isHealthy() &&
|
||||
(!req.preferLowerUtilization || self->teams[i]->hasHealthyFreeSpace()) &&
|
||||
(!req.requiresAssignedData || self->shardsAffectedByTeamFailure->getShardsFor(ShardsAffectedByTeamFailure::Team(self->teams[i]->getServerIDs(), self->primary)) .size() > 0) &&
|
||||
self->teams[i]->getMinFreeSpaceRatio() >= req.minFreeSpaceRatio)
|
||||
{
|
||||
int64_t loadBytes = self->teams[i]->getLoadBytes(true, req.inflightPenalty);
|
||||
if( !bestOption.present() || ( req.preferLowerUtilization && loadBytes < bestLoadBytes ) || ( !req.preferLowerUtilization && loadBytes > bestLoadBytes ) ) {
|
||||
bestLoadBytes = loadBytes;
|
||||
|
@ -798,7 +805,11 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
while( randomTeams.size() < SERVER_KNOBS->BEST_TEAM_OPTION_COUNT && nTries < SERVER_KNOBS->BEST_TEAM_MAX_TEAM_TRIES ) {
|
||||
Reference<IDataDistributionTeam> dest = deterministicRandom()->randomChoice(self->teams);
|
||||
|
||||
bool ok = dest->isHealthy() && (!req.preferLowerUtilization || dest->hasHealthyFreeSpace());
|
||||
bool ok = dest->isHealthy() &&
|
||||
(!req.preferLowerUtilization || dest->hasHealthyFreeSpace()) &&
|
||||
(!req.requiresAssignedData || self->shardsAffectedByTeamFailure->getShardsFor(ShardsAffectedByTeamFailure::Team(dest->getServerIDs(), self->primary)).size() > 0) &&
|
||||
dest->getMinFreeSpaceRatio() >= req.minFreeSpaceRatio;
|
||||
|
||||
for(int i=0; ok && i<randomTeams.size(); i++) {
|
||||
if (randomTeams[i]->getServerIDs() == dest->getServerIDs()) {
|
||||
ok = false;
|
||||
|
@ -833,6 +844,11 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
for( int j = 0; j < teamList.size(); j++ ) {
|
||||
bool found = true;
|
||||
auto serverIDs = teamList[j]->getServerIDs();
|
||||
if((req.requiresAssignedData && self->shardsAffectedByTeamFailure->getShardsFor(ShardsAffectedByTeamFailure::Team(serverIDs, self->primary)).size() == 0) ||
|
||||
teamList[j]->getMinFreeSpaceRatio() < req.minFreeSpaceRatio)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
for( int k = 0; k < teamList[j]->size(); k++ ) {
|
||||
if( !completeSources.count( serverIDs[k] ) ) {
|
||||
found = false;
|
||||
|
|
|
@ -75,12 +75,16 @@ struct GetTeamRequest {
|
|||
bool wantsNewServers;
|
||||
bool wantsTrueBest;
|
||||
bool preferLowerUtilization;
|
||||
bool requiresAssignedData;
|
||||
double minFreeSpaceRatio;
|
||||
double inflightPenalty;
|
||||
std::vector<UID> completeSources;
|
||||
Promise< Optional< Reference<IDataDistributionTeam> > > reply;
|
||||
|
||||
GetTeamRequest() {}
|
||||
GetTeamRequest( bool wantsNewServers, bool wantsTrueBest, bool preferLowerUtilization, double inflightPenalty = 1.0 ) : wantsNewServers( wantsNewServers ), wantsTrueBest( wantsTrueBest ), preferLowerUtilization( preferLowerUtilization ), inflightPenalty( inflightPenalty ) {}
|
||||
GetTeamRequest( bool wantsNewServers, bool wantsTrueBest, bool preferLowerUtilization, bool requiresAssignedData, double minFreeSpaceRatio = 0.0, double inflightPenalty = 1.0 )
|
||||
: wantsNewServers( wantsNewServers ), wantsTrueBest( wantsTrueBest ), preferLowerUtilization( preferLowerUtilization ), requiresAssignedData(requiresAssignedData),
|
||||
minFreeSpaceRatio( minFreeSpaceRatio ), inflightPenalty( inflightPenalty ) {}
|
||||
};
|
||||
|
||||
struct GetMetricsRequest {
|
||||
|
|
|
@ -929,7 +929,7 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
|
|||
if(rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY || rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_2_LEFT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_UNHEALTHY;
|
||||
if(rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_1_LEFT || rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_0_LEFT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_ONE_LEFT;
|
||||
|
||||
auto req = GetTeamRequest(rd.wantsNewServers, rd.priority == SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM, true, inflightPenalty);
|
||||
auto req = GetTeamRequest(rd.wantsNewServers, rd.priority == SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM, true, false, 0.0, inflightPenalty);
|
||||
req.completeSources = rd.completeSources;
|
||||
Optional<Reference<IDataDistributionTeam>> bestTeam = wait(brokenPromiseToNever(self->teamCollections[tciIndex].getTeam.getReply(req)));
|
||||
// If a DC has no healthy team, we stop checking the other DCs until
|
||||
|
@ -1213,21 +1213,19 @@ ACTOR Future<Void> BgDDMountainChopper( DDQueueData* self, int teamCollectionInd
|
|||
if (self->priority_relocations[SERVER_KNOBS->PRIORITY_REBALANCE_OVERUTILIZED_TEAM] <
|
||||
SERVER_KNOBS->DD_REBALANCE_PARALLELISM) {
|
||||
state Optional<Reference<IDataDistributionTeam>> randomTeam = wait(brokenPromiseToNever(
|
||||
self->teamCollections[teamCollectionIndex].getTeam.getReply(GetTeamRequest(true, false, true))));
|
||||
self->teamCollections[teamCollectionIndex].getTeam.getReply(GetTeamRequest(true, false, true, false, SERVER_KNOBS->FREE_SPACE_RATIO_DD_CUTOFF))));
|
||||
if (randomTeam.present()) {
|
||||
if (randomTeam.get()->getMinFreeSpaceRatio() > SERVER_KNOBS->FREE_SPACE_RATIO_DD_CUTOFF) {
|
||||
state Optional<Reference<IDataDistributionTeam>> loadedTeam =
|
||||
wait(brokenPromiseToNever(self->teamCollections[teamCollectionIndex].getTeam.getReply(
|
||||
GetTeamRequest(true, true, false))));
|
||||
if (loadedTeam.present()) {
|
||||
bool moved =
|
||||
wait(rebalanceTeams(self, SERVER_KNOBS->PRIORITY_REBALANCE_OVERUTILIZED_TEAM, loadedTeam.get(),
|
||||
randomTeam.get(), teamCollectionIndex == 0));
|
||||
if (moved) {
|
||||
resetCount = 0;
|
||||
} else {
|
||||
resetCount++;
|
||||
}
|
||||
state Optional<Reference<IDataDistributionTeam>> loadedTeam =
|
||||
wait(brokenPromiseToNever(self->teamCollections[teamCollectionIndex].getTeam.getReply(
|
||||
GetTeamRequest(true, true, false, true))));
|
||||
if (loadedTeam.present()) {
|
||||
bool moved =
|
||||
wait(rebalanceTeams(self, SERVER_KNOBS->PRIORITY_REBALANCE_OVERUTILIZED_TEAM, loadedTeam.get(),
|
||||
randomTeam.get(), teamCollectionIndex == 0));
|
||||
if (moved) {
|
||||
resetCount = 0;
|
||||
} else {
|
||||
resetCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1282,20 +1280,18 @@ ACTOR Future<Void> BgDDValleyFiller( DDQueueData* self, int teamCollectionIndex)
|
|||
if (self->priority_relocations[SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM] <
|
||||
SERVER_KNOBS->DD_REBALANCE_PARALLELISM) {
|
||||
state Optional<Reference<IDataDistributionTeam>> randomTeam = wait(brokenPromiseToNever(
|
||||
self->teamCollections[teamCollectionIndex].getTeam.getReply(GetTeamRequest(true, false, false))));
|
||||
self->teamCollections[teamCollectionIndex].getTeam.getReply(GetTeamRequest(true, false, false, true))));
|
||||
if (randomTeam.present()) {
|
||||
state Optional<Reference<IDataDistributionTeam>> unloadedTeam = wait(brokenPromiseToNever(
|
||||
self->teamCollections[teamCollectionIndex].getTeam.getReply(GetTeamRequest(true, true, true))));
|
||||
self->teamCollections[teamCollectionIndex].getTeam.getReply(GetTeamRequest(true, true, true, false, SERVER_KNOBS->FREE_SPACE_RATIO_DD_CUTOFF))));
|
||||
if (unloadedTeam.present()) {
|
||||
if (unloadedTeam.get()->getMinFreeSpaceRatio() > SERVER_KNOBS->FREE_SPACE_RATIO_DD_CUTOFF) {
|
||||
bool moved =
|
||||
wait(rebalanceTeams(self, SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM, randomTeam.get(),
|
||||
unloadedTeam.get(), teamCollectionIndex == 0));
|
||||
if (moved) {
|
||||
resetCount = 0;
|
||||
} else {
|
||||
resetCount++;
|
||||
}
|
||||
bool moved =
|
||||
wait(rebalanceTeams(self, SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM, randomTeam.get(),
|
||||
unloadedTeam.get(), teamCollectionIndex == 0));
|
||||
if (moved) {
|
||||
resetCount = 0;
|
||||
} else {
|
||||
resetCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue