TeamRemover: Add status to show redundant team removing

Distinguish the removal of unhealthy team and redundant team.
Change status report to include redundant team removal report.
This commit is contained in:
Meng Xu 2019-02-21 14:16:44 -08:00
parent 0ac7014142
commit 7cca439e00
6 changed files with 29 additions and 11 deletions

View File

@ -535,7 +535,7 @@ Future<Void> storageServerTracker(
Promise<Void> const& errorOut, Promise<Void> const& errorOut,
Version const& addedVersion); Version const& addedVersion);
Future<Void> teamTracker( struct DDTeamCollection* const& self, Reference<TCTeamInfo> const& team, bool const& badTeam ); Future<Void> teamTracker(struct DDTeamCollection* const& self, Reference<TCTeamInfo> const& team, bool const& badTeam, bool const& redundantTeam);
struct DDTeamCollection : ReferenceCounted<DDTeamCollection> { struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
enum { REQUESTING_WORKER = 0, GETTING_WORKER = 1, GETTING_STORAGE = 2 }; enum { REQUESTING_WORKER = 0, GETTING_WORKER = 1, GETTING_STORAGE = 2 };
@ -1091,7 +1091,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
bool badTeam = redundantTeam || teamInfo->size() != configuration.storageTeamSize bool badTeam = redundantTeam || teamInfo->size() != configuration.storageTeamSize
|| !satisfiesPolicy(teamInfo->getServers()); || !satisfiesPolicy(teamInfo->getServers());
teamInfo->tracker = teamTracker(this, teamInfo, badTeam); teamInfo->tracker = teamTracker(this, teamInfo, badTeam, redundantTeam);
// ASSERT( teamInfo->serverIDs.size() > 0 ); //team can be empty at DB initialization // ASSERT( teamInfo->serverIDs.size() > 0 ); //team can be empty at DB initialization
if (badTeam) { if (badTeam) {
badTeams.push_back(teamInfo); badTeams.push_back(teamInfo);
@ -2366,7 +2366,8 @@ ACTOR Future<Void> teamRemover(DDTeamCollection* self) {
} }
// Track a team and issue RelocateShards when the level of degradation changes // Track a team and issue RelocateShards when the level of degradation changes
ACTOR Future<Void> teamTracker( DDTeamCollection* self, Reference<TCTeamInfo> team, bool badTeam ) { // A badTeam can be unhealthy or just a redundantTeam removed by teamRemover()
ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> team, bool badTeam, bool redundantTeam) {
state int lastServersLeft = team->size(); state int lastServersLeft = team->size();
state bool lastAnyUndesired = false; state bool lastAnyUndesired = false;
state bool logTeamEvents = g_network->isSimulated() || !badTeam; state bool logTeamEvents = g_network->isSimulated() || !badTeam;
@ -2488,11 +2489,19 @@ ACTOR Future<Void> teamTracker( DDTeamCollection* self, Reference<TCTeamInfo> te
team->setPriority( PRIORITY_TEAM_1_LEFT ); team->setPriority( PRIORITY_TEAM_1_LEFT );
else if( serversLeft == 2 ) else if( serversLeft == 2 )
team->setPriority( PRIORITY_TEAM_2_LEFT ); team->setPriority( PRIORITY_TEAM_2_LEFT );
else else if ( redundantTeam ) {
team->setPriority( PRIORITY_TEAM_UNHEALTHY ); team->setPriority( PRIORITY_TEAM_REDUNDANT );
} else {
team->setPriority( PRIORITY_TEAM_UNHEALTHY );
}
}
else if ( badTeam || anyWrongConfiguration ) {
if ( redundantTeam ) {
team->setPriority( PRIORITY_TEAM_REDUNDANT );
} else {
team->setPriority( PRIORITY_TEAM_UNHEALTHY );
}
} }
else if ( badTeam || anyWrongConfiguration )
team->setPriority( PRIORITY_TEAM_UNHEALTHY );
else if( anyUndesired ) else if( anyUndesired )
team->setPriority( PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER ); team->setPriority( PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER );
else else

View File

@ -46,6 +46,7 @@ enum {
PRIORITY_MERGE_SHARD = 240, PRIORITY_MERGE_SHARD = 240,
PRIORITY_SPLIT_SHARD = 250, PRIORITY_SPLIT_SHARD = 250,
PRIORITY_TEAM_REDUNDANT = 700,
PRIORITY_TEAM_UNHEALTHY = 800, PRIORITY_TEAM_UNHEALTHY = 800,
PRIORITY_TEAM_2_LEFT = 809, PRIORITY_TEAM_2_LEFT = 809,

View File

@ -376,14 +376,14 @@ struct DDQueueData {
std::map<int, int> priority_relocations; std::map<int, int> priority_relocations;
int unhealthyRelocations; int unhealthyRelocations;
void startRelocation(int priority) { void startRelocation(int priority) {
if(priority >= PRIORITY_TEAM_UNHEALTHY) { if(priority >= PRIORITY_TEAM_REDUNDANT) {
unhealthyRelocations++; unhealthyRelocations++;
rawProcessingUnhealthy->set(true); rawProcessingUnhealthy->set(true);
} }
priority_relocations[priority]++; priority_relocations[priority]++;
} }
void finishRelocation(int priority) { void finishRelocation(int priority) {
if(priority >= PRIORITY_TEAM_UNHEALTHY) { if(priority >= PRIORITY_TEAM_REDUNDANT) {
unhealthyRelocations--; unhealthyRelocations--;
ASSERT(unhealthyRelocations >= 0); ASSERT(unhealthyRelocations >= 0);
if(unhealthyRelocations == 0) { if(unhealthyRelocations == 0) {
@ -594,7 +594,7 @@ struct DDQueueData {
if( foundActiveFetching || foundActiveRelocation ) { if( foundActiveFetching || foundActiveRelocation ) {
rd.wantsNewServers |= rrs.wantsNewServers; rd.wantsNewServers |= rrs.wantsNewServers;
rd.startTime = std::min( rd.startTime, rrs.startTime ); rd.startTime = std::min( rd.startTime, rrs.startTime );
if( rrs.priority >= PRIORITY_TEAM_UNHEALTHY && rd.changesBoundaries() ) if( rrs.priority >= PRIORITY_TEAM_REDUNDANT && rd.changesBoundaries() )
rd.priority = std::max( rd.priority, rrs.priority ); rd.priority = std::max( rd.priority, rrs.priority );
} }
@ -757,7 +757,7 @@ struct DDQueueData {
inFlightActors.liveActorAt( it->range().begin ) && inFlightActors.liveActorAt( it->range().begin ) &&
!rd.keys.contains( it->range() ) && !rd.keys.contains( it->range() ) &&
it->value().priority >= rd.priority && it->value().priority >= rd.priority &&
rd.priority < PRIORITY_TEAM_UNHEALTHY ) { rd.priority < PRIORITY_TEAM_REDUNDANT ) {
/*TraceEvent("OverlappingInFlight", distributorId) /*TraceEvent("OverlappingInFlight", distributorId)
.detail("KeyBegin", printable(it->value().keys.begin)) .detail("KeyBegin", printable(it->value().keys.begin))
.detail("KeyEnd", printable(it->value().keys.end)) .detail("KeyEnd", printable(it->value().keys.end))
@ -890,6 +890,7 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
bestTeams.clear(); bestTeams.clear();
while( tciIndex < self->teamCollections.size() ) { while( tciIndex < self->teamCollections.size() ) {
double inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_HEALTHY; double inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_HEALTHY;
if(rd.priority >= PRIORITY_TEAM_REDUNDANT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_REDUNDANT;
if(rd.priority >= PRIORITY_TEAM_UNHEALTHY) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_UNHEALTHY; if(rd.priority >= PRIORITY_TEAM_UNHEALTHY) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_UNHEALTHY;
if(rd.priority >= PRIORITY_TEAM_1_LEFT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_ONE_LEFT; if(rd.priority >= PRIORITY_TEAM_1_LEFT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_ONE_LEFT;

View File

@ -86,6 +86,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
init( BG_DD_DECREASE_RATE, 1.02 ); init( BG_DD_DECREASE_RATE, 1.02 );
init( BG_DD_SATURATION_DELAY, 1.0 ); init( BG_DD_SATURATION_DELAY, 1.0 );
init( INFLIGHT_PENALTY_HEALTHY, 1.0 ); init( INFLIGHT_PENALTY_HEALTHY, 1.0 );
init( INFLIGHT_PENALTY_REDUNDANT, 9.0 );
init( INFLIGHT_PENALTY_UNHEALTHY, 10.0 ); init( INFLIGHT_PENALTY_UNHEALTHY, 10.0 );
init( INFLIGHT_PENALTY_ONE_LEFT, 1000.0 ); init( INFLIGHT_PENALTY_ONE_LEFT, 1000.0 );

View File

@ -89,6 +89,7 @@ public:
double BG_DD_DECREASE_RATE; double BG_DD_DECREASE_RATE;
double BG_DD_SATURATION_DELAY; double BG_DD_SATURATION_DELAY;
double INFLIGHT_PENALTY_HEALTHY; double INFLIGHT_PENALTY_HEALTHY;
double INFLIGHT_PENALTY_REDUNDANT;
double INFLIGHT_PENALTY_UNHEALTHY; double INFLIGHT_PENALTY_UNHEALTHY;
double INFLIGHT_PENALTY_ONE_LEFT; double INFLIGHT_PENALTY_ONE_LEFT;

View File

@ -1233,6 +1233,11 @@ ACTOR static Future<JsonBuilderObject> dataStatusFetcher(std::pair<WorkerInterfa
stateSectionObj["name"] = "healing"; stateSectionObj["name"] = "healing";
stateSectionObj["description"] = "Restoring replication factor"; stateSectionObj["description"] = "Restoring replication factor";
} }
else if (highestPriority >= PRIORITY_TEAM_REDUNDANT) {
stateSectionObj["healthy"] = true;
stateSectionObj["name"] = "healthy_removing_redundant_teams";
stateSectionObj["description"] = "Removing redundant machine teams";
}
else if (highestPriority >= PRIORITY_MERGE_SHARD) { else if (highestPriority >= PRIORITY_MERGE_SHARD) {
stateSectionObj["healthy"] = true; stateSectionObj["healthy"] = true;
stateSectionObj["name"] = "healthy_repartitioning"; stateSectionObj["name"] = "healthy_repartitioning";