TeamRemover: Add status to show redundant team removing
Distinguish the removal of unhealthy team and redundant team. Change status report to include redundant team removal report.
This commit is contained in:
parent
0ac7014142
commit
7cca439e00
|
@ -535,7 +535,7 @@ Future<Void> storageServerTracker(
|
||||||
Promise<Void> const& errorOut,
|
Promise<Void> const& errorOut,
|
||||||
Version const& addedVersion);
|
Version const& addedVersion);
|
||||||
|
|
||||||
Future<Void> teamTracker( struct DDTeamCollection* const& self, Reference<TCTeamInfo> const& team, bool const& badTeam );
|
Future<Void> teamTracker(struct DDTeamCollection* const& self, Reference<TCTeamInfo> const& team, bool const& badTeam, bool const& redundantTeam);
|
||||||
|
|
||||||
struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
||||||
enum { REQUESTING_WORKER = 0, GETTING_WORKER = 1, GETTING_STORAGE = 2 };
|
enum { REQUESTING_WORKER = 0, GETTING_WORKER = 1, GETTING_STORAGE = 2 };
|
||||||
|
@ -1091,7 +1091,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
||||||
bool badTeam = redundantTeam || teamInfo->size() != configuration.storageTeamSize
|
bool badTeam = redundantTeam || teamInfo->size() != configuration.storageTeamSize
|
||||||
|| !satisfiesPolicy(teamInfo->getServers());
|
|| !satisfiesPolicy(teamInfo->getServers());
|
||||||
|
|
||||||
teamInfo->tracker = teamTracker(this, teamInfo, badTeam);
|
teamInfo->tracker = teamTracker(this, teamInfo, badTeam, redundantTeam);
|
||||||
// ASSERT( teamInfo->serverIDs.size() > 0 ); //team can be empty at DB initialization
|
// ASSERT( teamInfo->serverIDs.size() > 0 ); //team can be empty at DB initialization
|
||||||
if (badTeam) {
|
if (badTeam) {
|
||||||
badTeams.push_back(teamInfo);
|
badTeams.push_back(teamInfo);
|
||||||
|
@ -2366,7 +2366,8 @@ ACTOR Future<Void> teamRemover(DDTeamCollection* self) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Track a team and issue RelocateShards when the level of degradation changes
|
// Track a team and issue RelocateShards when the level of degradation changes
|
||||||
ACTOR Future<Void> teamTracker( DDTeamCollection* self, Reference<TCTeamInfo> team, bool badTeam ) {
|
// A badTeam can be unhealthy or just a redundantTeam removed by teamRemover()
|
||||||
|
ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> team, bool badTeam, bool redundantTeam) {
|
||||||
state int lastServersLeft = team->size();
|
state int lastServersLeft = team->size();
|
||||||
state bool lastAnyUndesired = false;
|
state bool lastAnyUndesired = false;
|
||||||
state bool logTeamEvents = g_network->isSimulated() || !badTeam;
|
state bool logTeamEvents = g_network->isSimulated() || !badTeam;
|
||||||
|
@ -2488,11 +2489,19 @@ ACTOR Future<Void> teamTracker( DDTeamCollection* self, Reference<TCTeamInfo> te
|
||||||
team->setPriority( PRIORITY_TEAM_1_LEFT );
|
team->setPriority( PRIORITY_TEAM_1_LEFT );
|
||||||
else if( serversLeft == 2 )
|
else if( serversLeft == 2 )
|
||||||
team->setPriority( PRIORITY_TEAM_2_LEFT );
|
team->setPriority( PRIORITY_TEAM_2_LEFT );
|
||||||
else
|
else if ( redundantTeam ) {
|
||||||
team->setPriority( PRIORITY_TEAM_UNHEALTHY );
|
team->setPriority( PRIORITY_TEAM_REDUNDANT );
|
||||||
|
} else {
|
||||||
|
team->setPriority( PRIORITY_TEAM_UNHEALTHY );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if ( badTeam || anyWrongConfiguration ) {
|
||||||
|
if ( redundantTeam ) {
|
||||||
|
team->setPriority( PRIORITY_TEAM_REDUNDANT );
|
||||||
|
} else {
|
||||||
|
team->setPriority( PRIORITY_TEAM_UNHEALTHY );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if ( badTeam || anyWrongConfiguration )
|
|
||||||
team->setPriority( PRIORITY_TEAM_UNHEALTHY );
|
|
||||||
else if( anyUndesired )
|
else if( anyUndesired )
|
||||||
team->setPriority( PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER );
|
team->setPriority( PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER );
|
||||||
else
|
else
|
||||||
|
|
|
@ -46,6 +46,7 @@ enum {
|
||||||
PRIORITY_MERGE_SHARD = 240,
|
PRIORITY_MERGE_SHARD = 240,
|
||||||
PRIORITY_SPLIT_SHARD = 250,
|
PRIORITY_SPLIT_SHARD = 250,
|
||||||
|
|
||||||
|
PRIORITY_TEAM_REDUNDANT = 700,
|
||||||
PRIORITY_TEAM_UNHEALTHY = 800,
|
PRIORITY_TEAM_UNHEALTHY = 800,
|
||||||
PRIORITY_TEAM_2_LEFT = 809,
|
PRIORITY_TEAM_2_LEFT = 809,
|
||||||
|
|
||||||
|
|
|
@ -376,14 +376,14 @@ struct DDQueueData {
|
||||||
std::map<int, int> priority_relocations;
|
std::map<int, int> priority_relocations;
|
||||||
int unhealthyRelocations;
|
int unhealthyRelocations;
|
||||||
void startRelocation(int priority) {
|
void startRelocation(int priority) {
|
||||||
if(priority >= PRIORITY_TEAM_UNHEALTHY) {
|
if(priority >= PRIORITY_TEAM_REDUNDANT) {
|
||||||
unhealthyRelocations++;
|
unhealthyRelocations++;
|
||||||
rawProcessingUnhealthy->set(true);
|
rawProcessingUnhealthy->set(true);
|
||||||
}
|
}
|
||||||
priority_relocations[priority]++;
|
priority_relocations[priority]++;
|
||||||
}
|
}
|
||||||
void finishRelocation(int priority) {
|
void finishRelocation(int priority) {
|
||||||
if(priority >= PRIORITY_TEAM_UNHEALTHY) {
|
if(priority >= PRIORITY_TEAM_REDUNDANT) {
|
||||||
unhealthyRelocations--;
|
unhealthyRelocations--;
|
||||||
ASSERT(unhealthyRelocations >= 0);
|
ASSERT(unhealthyRelocations >= 0);
|
||||||
if(unhealthyRelocations == 0) {
|
if(unhealthyRelocations == 0) {
|
||||||
|
@ -594,7 +594,7 @@ struct DDQueueData {
|
||||||
if( foundActiveFetching || foundActiveRelocation ) {
|
if( foundActiveFetching || foundActiveRelocation ) {
|
||||||
rd.wantsNewServers |= rrs.wantsNewServers;
|
rd.wantsNewServers |= rrs.wantsNewServers;
|
||||||
rd.startTime = std::min( rd.startTime, rrs.startTime );
|
rd.startTime = std::min( rd.startTime, rrs.startTime );
|
||||||
if( rrs.priority >= PRIORITY_TEAM_UNHEALTHY && rd.changesBoundaries() )
|
if( rrs.priority >= PRIORITY_TEAM_REDUNDANT && rd.changesBoundaries() )
|
||||||
rd.priority = std::max( rd.priority, rrs.priority );
|
rd.priority = std::max( rd.priority, rrs.priority );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -757,7 +757,7 @@ struct DDQueueData {
|
||||||
inFlightActors.liveActorAt( it->range().begin ) &&
|
inFlightActors.liveActorAt( it->range().begin ) &&
|
||||||
!rd.keys.contains( it->range() ) &&
|
!rd.keys.contains( it->range() ) &&
|
||||||
it->value().priority >= rd.priority &&
|
it->value().priority >= rd.priority &&
|
||||||
rd.priority < PRIORITY_TEAM_UNHEALTHY ) {
|
rd.priority < PRIORITY_TEAM_REDUNDANT ) {
|
||||||
/*TraceEvent("OverlappingInFlight", distributorId)
|
/*TraceEvent("OverlappingInFlight", distributorId)
|
||||||
.detail("KeyBegin", printable(it->value().keys.begin))
|
.detail("KeyBegin", printable(it->value().keys.begin))
|
||||||
.detail("KeyEnd", printable(it->value().keys.end))
|
.detail("KeyEnd", printable(it->value().keys.end))
|
||||||
|
@ -890,6 +890,7 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
|
||||||
bestTeams.clear();
|
bestTeams.clear();
|
||||||
while( tciIndex < self->teamCollections.size() ) {
|
while( tciIndex < self->teamCollections.size() ) {
|
||||||
double inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_HEALTHY;
|
double inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_HEALTHY;
|
||||||
|
if(rd.priority >= PRIORITY_TEAM_REDUNDANT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_REDUNDANT;
|
||||||
if(rd.priority >= PRIORITY_TEAM_UNHEALTHY) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_UNHEALTHY;
|
if(rd.priority >= PRIORITY_TEAM_UNHEALTHY) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_UNHEALTHY;
|
||||||
if(rd.priority >= PRIORITY_TEAM_1_LEFT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_ONE_LEFT;
|
if(rd.priority >= PRIORITY_TEAM_1_LEFT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_ONE_LEFT;
|
||||||
|
|
||||||
|
|
|
@ -86,6 +86,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
|
||||||
init( BG_DD_DECREASE_RATE, 1.02 );
|
init( BG_DD_DECREASE_RATE, 1.02 );
|
||||||
init( BG_DD_SATURATION_DELAY, 1.0 );
|
init( BG_DD_SATURATION_DELAY, 1.0 );
|
||||||
init( INFLIGHT_PENALTY_HEALTHY, 1.0 );
|
init( INFLIGHT_PENALTY_HEALTHY, 1.0 );
|
||||||
|
init( INFLIGHT_PENALTY_REDUNDANT, 9.0 );
|
||||||
init( INFLIGHT_PENALTY_UNHEALTHY, 10.0 );
|
init( INFLIGHT_PENALTY_UNHEALTHY, 10.0 );
|
||||||
init( INFLIGHT_PENALTY_ONE_LEFT, 1000.0 );
|
init( INFLIGHT_PENALTY_ONE_LEFT, 1000.0 );
|
||||||
|
|
||||||
|
|
|
@ -89,6 +89,7 @@ public:
|
||||||
double BG_DD_DECREASE_RATE;
|
double BG_DD_DECREASE_RATE;
|
||||||
double BG_DD_SATURATION_DELAY;
|
double BG_DD_SATURATION_DELAY;
|
||||||
double INFLIGHT_PENALTY_HEALTHY;
|
double INFLIGHT_PENALTY_HEALTHY;
|
||||||
|
double INFLIGHT_PENALTY_REDUNDANT;
|
||||||
double INFLIGHT_PENALTY_UNHEALTHY;
|
double INFLIGHT_PENALTY_UNHEALTHY;
|
||||||
double INFLIGHT_PENALTY_ONE_LEFT;
|
double INFLIGHT_PENALTY_ONE_LEFT;
|
||||||
|
|
||||||
|
|
|
@ -1233,6 +1233,11 @@ ACTOR static Future<JsonBuilderObject> dataStatusFetcher(std::pair<WorkerInterfa
|
||||||
stateSectionObj["name"] = "healing";
|
stateSectionObj["name"] = "healing";
|
||||||
stateSectionObj["description"] = "Restoring replication factor";
|
stateSectionObj["description"] = "Restoring replication factor";
|
||||||
}
|
}
|
||||||
|
else if (highestPriority >= PRIORITY_TEAM_REDUNDANT) {
|
||||||
|
stateSectionObj["healthy"] = true;
|
||||||
|
stateSectionObj["name"] = "healthy_removing_redundant_teams";
|
||||||
|
stateSectionObj["description"] = "Removing redundant machine teams";
|
||||||
|
}
|
||||||
else if (highestPriority >= PRIORITY_MERGE_SHARD) {
|
else if (highestPriority >= PRIORITY_MERGE_SHARD) {
|
||||||
stateSectionObj["healthy"] = true;
|
stateSectionObj["healthy"] = true;
|
||||||
stateSectionObj["name"] = "healthy_repartitioning";
|
stateSectionObj["name"] = "healthy_repartitioning";
|
||||||
|
|
Loading…
Reference in New Issue