TeamRemover: Add status to show redundant team removing

Distinguish the removal of unhealthy team and redundant team.
Change status report to include redundant team removal report.
This commit is contained in:
Meng Xu 2019-02-21 14:16:44 -08:00
parent 0ac7014142
commit 7cca439e00
6 changed files with 29 additions and 11 deletions

View File

@ -535,7 +535,7 @@ Future<Void> storageServerTracker(
Promise<Void> const& errorOut,
Version const& addedVersion);
Future<Void> teamTracker( struct DDTeamCollection* const& self, Reference<TCTeamInfo> const& team, bool const& badTeam );
Future<Void> teamTracker(struct DDTeamCollection* const& self, Reference<TCTeamInfo> const& team, bool const& badTeam, bool const& redundantTeam);
struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
enum { REQUESTING_WORKER = 0, GETTING_WORKER = 1, GETTING_STORAGE = 2 };
@ -1091,7 +1091,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
bool badTeam = redundantTeam || teamInfo->size() != configuration.storageTeamSize
|| !satisfiesPolicy(teamInfo->getServers());
teamInfo->tracker = teamTracker(this, teamInfo, badTeam);
teamInfo->tracker = teamTracker(this, teamInfo, badTeam, redundantTeam);
// ASSERT( teamInfo->serverIDs.size() > 0 ); //team can be empty at DB initialization
if (badTeam) {
badTeams.push_back(teamInfo);
@ -2366,7 +2366,8 @@ ACTOR Future<Void> teamRemover(DDTeamCollection* self) {
}
// Track a team and issue RelocateShards when the level of degradation changes
ACTOR Future<Void> teamTracker( DDTeamCollection* self, Reference<TCTeamInfo> team, bool badTeam ) {
// A badTeam can be unhealthy or just a redundantTeam removed by teamRemover()
ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> team, bool badTeam, bool redundantTeam) {
state int lastServersLeft = team->size();
state bool lastAnyUndesired = false;
state bool logTeamEvents = g_network->isSimulated() || !badTeam;
@ -2488,11 +2489,19 @@ ACTOR Future<Void> teamTracker( DDTeamCollection* self, Reference<TCTeamInfo> te
team->setPriority( PRIORITY_TEAM_1_LEFT );
else if( serversLeft == 2 )
team->setPriority( PRIORITY_TEAM_2_LEFT );
else
team->setPriority( PRIORITY_TEAM_UNHEALTHY );
else if ( redundantTeam ) {
team->setPriority( PRIORITY_TEAM_REDUNDANT );
} else {
team->setPriority( PRIORITY_TEAM_UNHEALTHY );
}
}
else if ( badTeam || anyWrongConfiguration ) {
if ( redundantTeam ) {
team->setPriority( PRIORITY_TEAM_REDUNDANT );
} else {
team->setPriority( PRIORITY_TEAM_UNHEALTHY );
}
}
else if ( badTeam || anyWrongConfiguration )
team->setPriority( PRIORITY_TEAM_UNHEALTHY );
else if( anyUndesired )
team->setPriority( PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER );
else

View File

@ -46,6 +46,7 @@ enum {
PRIORITY_MERGE_SHARD = 240,
PRIORITY_SPLIT_SHARD = 250,
PRIORITY_TEAM_REDUNDANT = 700,
PRIORITY_TEAM_UNHEALTHY = 800,
PRIORITY_TEAM_2_LEFT = 809,

View File

@ -376,14 +376,14 @@ struct DDQueueData {
std::map<int, int> priority_relocations;
int unhealthyRelocations;
void startRelocation(int priority) {
if(priority >= PRIORITY_TEAM_UNHEALTHY) {
if(priority >= PRIORITY_TEAM_REDUNDANT) {
unhealthyRelocations++;
rawProcessingUnhealthy->set(true);
}
priority_relocations[priority]++;
}
void finishRelocation(int priority) {
if(priority >= PRIORITY_TEAM_UNHEALTHY) {
if(priority >= PRIORITY_TEAM_REDUNDANT) {
unhealthyRelocations--;
ASSERT(unhealthyRelocations >= 0);
if(unhealthyRelocations == 0) {
@ -594,7 +594,7 @@ struct DDQueueData {
if( foundActiveFetching || foundActiveRelocation ) {
rd.wantsNewServers |= rrs.wantsNewServers;
rd.startTime = std::min( rd.startTime, rrs.startTime );
if( rrs.priority >= PRIORITY_TEAM_UNHEALTHY && rd.changesBoundaries() )
if( rrs.priority >= PRIORITY_TEAM_REDUNDANT && rd.changesBoundaries() )
rd.priority = std::max( rd.priority, rrs.priority );
}
@ -757,7 +757,7 @@ struct DDQueueData {
inFlightActors.liveActorAt( it->range().begin ) &&
!rd.keys.contains( it->range() ) &&
it->value().priority >= rd.priority &&
rd.priority < PRIORITY_TEAM_UNHEALTHY ) {
rd.priority < PRIORITY_TEAM_REDUNDANT ) {
/*TraceEvent("OverlappingInFlight", distributorId)
.detail("KeyBegin", printable(it->value().keys.begin))
.detail("KeyEnd", printable(it->value().keys.end))
@ -890,6 +890,7 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
bestTeams.clear();
while( tciIndex < self->teamCollections.size() ) {
double inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_HEALTHY;
if(rd.priority >= PRIORITY_TEAM_REDUNDANT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_REDUNDANT;
if(rd.priority >= PRIORITY_TEAM_UNHEALTHY) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_UNHEALTHY;
if(rd.priority >= PRIORITY_TEAM_1_LEFT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_ONE_LEFT;

View File

@ -86,6 +86,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
init( BG_DD_DECREASE_RATE, 1.02 );
init( BG_DD_SATURATION_DELAY, 1.0 );
init( INFLIGHT_PENALTY_HEALTHY, 1.0 );
init( INFLIGHT_PENALTY_REDUNDANT, 9.0 );
init( INFLIGHT_PENALTY_UNHEALTHY, 10.0 );
init( INFLIGHT_PENALTY_ONE_LEFT, 1000.0 );

View File

@ -89,6 +89,7 @@ public:
double BG_DD_DECREASE_RATE;
double BG_DD_SATURATION_DELAY;
double INFLIGHT_PENALTY_HEALTHY;
double INFLIGHT_PENALTY_REDUNDANT;
double INFLIGHT_PENALTY_UNHEALTHY;
double INFLIGHT_PENALTY_ONE_LEFT;

View File

@ -1233,6 +1233,11 @@ ACTOR static Future<JsonBuilderObject> dataStatusFetcher(std::pair<WorkerInterfa
stateSectionObj["name"] = "healing";
stateSectionObj["description"] = "Restoring replication factor";
}
else if (highestPriority >= PRIORITY_TEAM_REDUNDANT) {
stateSectionObj["healthy"] = true;
stateSectionObj["name"] = "healthy_removing_redundant_teams";
stateSectionObj["description"] = "Removing redundant machine teams";
}
else if (highestPriority >= PRIORITY_MERGE_SHARD) {
stateSectionObj["healthy"] = true;
stateSectionObj["name"] = "healthy_repartitioning";