TeamRemover: Add status to show redundant team removing
Distinguish the removal of unhealthy team and redundant team. Change status report to include redundant team removal report.
This commit is contained in:
parent
0ac7014142
commit
7cca439e00
|
@ -535,7 +535,7 @@ Future<Void> storageServerTracker(
|
|||
Promise<Void> const& errorOut,
|
||||
Version const& addedVersion);
|
||||
|
||||
Future<Void> teamTracker( struct DDTeamCollection* const& self, Reference<TCTeamInfo> const& team, bool const& badTeam );
|
||||
Future<Void> teamTracker(struct DDTeamCollection* const& self, Reference<TCTeamInfo> const& team, bool const& badTeam, bool const& redundantTeam);
|
||||
|
||||
struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
||||
enum { REQUESTING_WORKER = 0, GETTING_WORKER = 1, GETTING_STORAGE = 2 };
|
||||
|
@ -1091,7 +1091,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
bool badTeam = redundantTeam || teamInfo->size() != configuration.storageTeamSize
|
||||
|| !satisfiesPolicy(teamInfo->getServers());
|
||||
|
||||
teamInfo->tracker = teamTracker(this, teamInfo, badTeam);
|
||||
teamInfo->tracker = teamTracker(this, teamInfo, badTeam, redundantTeam);
|
||||
// ASSERT( teamInfo->serverIDs.size() > 0 ); //team can be empty at DB initialization
|
||||
if (badTeam) {
|
||||
badTeams.push_back(teamInfo);
|
||||
|
@ -2366,7 +2366,8 @@ ACTOR Future<Void> teamRemover(DDTeamCollection* self) {
|
|||
}
|
||||
|
||||
// Track a team and issue RelocateShards when the level of degradation changes
|
||||
ACTOR Future<Void> teamTracker( DDTeamCollection* self, Reference<TCTeamInfo> team, bool badTeam ) {
|
||||
// A badTeam can be unhealthy or just a redundantTeam removed by teamRemover()
|
||||
ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> team, bool badTeam, bool redundantTeam) {
|
||||
state int lastServersLeft = team->size();
|
||||
state bool lastAnyUndesired = false;
|
||||
state bool logTeamEvents = g_network->isSimulated() || !badTeam;
|
||||
|
@ -2488,11 +2489,19 @@ ACTOR Future<Void> teamTracker( DDTeamCollection* self, Reference<TCTeamInfo> te
|
|||
team->setPriority( PRIORITY_TEAM_1_LEFT );
|
||||
else if( serversLeft == 2 )
|
||||
team->setPriority( PRIORITY_TEAM_2_LEFT );
|
||||
else
|
||||
team->setPriority( PRIORITY_TEAM_UNHEALTHY );
|
||||
else if ( redundantTeam ) {
|
||||
team->setPriority( PRIORITY_TEAM_REDUNDANT );
|
||||
} else {
|
||||
team->setPriority( PRIORITY_TEAM_UNHEALTHY );
|
||||
}
|
||||
}
|
||||
else if ( badTeam || anyWrongConfiguration ) {
|
||||
if ( redundantTeam ) {
|
||||
team->setPriority( PRIORITY_TEAM_REDUNDANT );
|
||||
} else {
|
||||
team->setPriority( PRIORITY_TEAM_UNHEALTHY );
|
||||
}
|
||||
}
|
||||
else if ( badTeam || anyWrongConfiguration )
|
||||
team->setPriority( PRIORITY_TEAM_UNHEALTHY );
|
||||
else if( anyUndesired )
|
||||
team->setPriority( PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER );
|
||||
else
|
||||
|
|
|
@ -46,6 +46,7 @@ enum {
|
|||
PRIORITY_MERGE_SHARD = 240,
|
||||
PRIORITY_SPLIT_SHARD = 250,
|
||||
|
||||
PRIORITY_TEAM_REDUNDANT = 700,
|
||||
PRIORITY_TEAM_UNHEALTHY = 800,
|
||||
PRIORITY_TEAM_2_LEFT = 809,
|
||||
|
||||
|
|
|
@ -376,14 +376,14 @@ struct DDQueueData {
|
|||
std::map<int, int> priority_relocations;
|
||||
int unhealthyRelocations;
|
||||
void startRelocation(int priority) {
|
||||
if(priority >= PRIORITY_TEAM_UNHEALTHY) {
|
||||
if(priority >= PRIORITY_TEAM_REDUNDANT) {
|
||||
unhealthyRelocations++;
|
||||
rawProcessingUnhealthy->set(true);
|
||||
}
|
||||
priority_relocations[priority]++;
|
||||
}
|
||||
void finishRelocation(int priority) {
|
||||
if(priority >= PRIORITY_TEAM_UNHEALTHY) {
|
||||
if(priority >= PRIORITY_TEAM_REDUNDANT) {
|
||||
unhealthyRelocations--;
|
||||
ASSERT(unhealthyRelocations >= 0);
|
||||
if(unhealthyRelocations == 0) {
|
||||
|
@ -594,7 +594,7 @@ struct DDQueueData {
|
|||
if( foundActiveFetching || foundActiveRelocation ) {
|
||||
rd.wantsNewServers |= rrs.wantsNewServers;
|
||||
rd.startTime = std::min( rd.startTime, rrs.startTime );
|
||||
if( rrs.priority >= PRIORITY_TEAM_UNHEALTHY && rd.changesBoundaries() )
|
||||
if( rrs.priority >= PRIORITY_TEAM_REDUNDANT && rd.changesBoundaries() )
|
||||
rd.priority = std::max( rd.priority, rrs.priority );
|
||||
}
|
||||
|
||||
|
@ -757,7 +757,7 @@ struct DDQueueData {
|
|||
inFlightActors.liveActorAt( it->range().begin ) &&
|
||||
!rd.keys.contains( it->range() ) &&
|
||||
it->value().priority >= rd.priority &&
|
||||
rd.priority < PRIORITY_TEAM_UNHEALTHY ) {
|
||||
rd.priority < PRIORITY_TEAM_REDUNDANT ) {
|
||||
/*TraceEvent("OverlappingInFlight", distributorId)
|
||||
.detail("KeyBegin", printable(it->value().keys.begin))
|
||||
.detail("KeyEnd", printable(it->value().keys.end))
|
||||
|
@ -890,6 +890,7 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
|
|||
bestTeams.clear();
|
||||
while( tciIndex < self->teamCollections.size() ) {
|
||||
double inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_HEALTHY;
|
||||
if(rd.priority >= PRIORITY_TEAM_REDUNDANT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_REDUNDANT;
|
||||
if(rd.priority >= PRIORITY_TEAM_UNHEALTHY) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_UNHEALTHY;
|
||||
if(rd.priority >= PRIORITY_TEAM_1_LEFT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_ONE_LEFT;
|
||||
|
||||
|
|
|
@ -86,6 +86,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
|
|||
init( BG_DD_DECREASE_RATE, 1.02 );
|
||||
init( BG_DD_SATURATION_DELAY, 1.0 );
|
||||
init( INFLIGHT_PENALTY_HEALTHY, 1.0 );
|
||||
init( INFLIGHT_PENALTY_REDUNDANT, 9.0 );
|
||||
init( INFLIGHT_PENALTY_UNHEALTHY, 10.0 );
|
||||
init( INFLIGHT_PENALTY_ONE_LEFT, 1000.0 );
|
||||
|
||||
|
|
|
@ -89,6 +89,7 @@ public:
|
|||
double BG_DD_DECREASE_RATE;
|
||||
double BG_DD_SATURATION_DELAY;
|
||||
double INFLIGHT_PENALTY_HEALTHY;
|
||||
double INFLIGHT_PENALTY_REDUNDANT;
|
||||
double INFLIGHT_PENALTY_UNHEALTHY;
|
||||
double INFLIGHT_PENALTY_ONE_LEFT;
|
||||
|
||||
|
|
|
@ -1233,6 +1233,11 @@ ACTOR static Future<JsonBuilderObject> dataStatusFetcher(std::pair<WorkerInterfa
|
|||
stateSectionObj["name"] = "healing";
|
||||
stateSectionObj["description"] = "Restoring replication factor";
|
||||
}
|
||||
else if (highestPriority >= PRIORITY_TEAM_REDUNDANT) {
|
||||
stateSectionObj["healthy"] = true;
|
||||
stateSectionObj["name"] = "healthy_removing_redundant_teams";
|
||||
stateSectionObj["description"] = "Removing redundant machine teams";
|
||||
}
|
||||
else if (highestPriority >= PRIORITY_MERGE_SHARD) {
|
||||
stateSectionObj["healthy"] = true;
|
||||
stateSectionObj["name"] = "healthy_repartitioning";
|
||||
|
|
Loading…
Reference in New Issue