More aggressively attempt to find teams that do not have low disk space
This commit is contained in:
parent
44f21ca332
commit
819c55556c
|
@ -241,9 +241,14 @@ public:
|
|||
double minAvailableSpaceRatio = getMinAvailableSpaceRatio(includeInFlight);
|
||||
int64_t inFlightBytes = includeInFlight ? getDataInFlightToTeam() / servers.size() : 0;
|
||||
double availableSpaceMultiplier = SERVER_KNOBS->FREE_SPACE_RATIO_CUTOFF / ( std::max( std::min( SERVER_KNOBS->FREE_SPACE_RATIO_CUTOFF, minAvailableSpaceRatio ), 0.000001 ) );
|
||||
if(servers.size()>2) {
|
||||
//make sure in triple replication the penalty is high enough that you will always avoid a team with a member at 20% free space
|
||||
availableSpaceMultiplier = availableSpaceMultiplier * availableSpaceMultiplier;
|
||||
}
|
||||
|
||||
if(availableSpaceMultiplier > 1 && deterministicRandom()->random01() < 0.001)
|
||||
TraceEvent(SevWarn, "DiskNearCapacity").detail("AvailableSpaceRatio", minAvailableSpaceRatio);
|
||||
if(minAvailableSpaceRatio < SERVER_KNOBS->START_MIN_FREE_SPACE_RATIO) {
|
||||
TraceEvent(SevWarn, "DiskNearCapacity").suppressFor(1.0).detail("AvailableSpaceRatio", minAvailableSpaceRatio);
|
||||
}
|
||||
|
||||
return (physicalBytes + (inflightPenalty*inFlightBytes)) * availableSpaceMultiplier;
|
||||
}
|
||||
|
|
|
@ -929,7 +929,8 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
|
|||
if(rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY || rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_2_LEFT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_UNHEALTHY;
|
||||
if(rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_1_LEFT || rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_0_LEFT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_ONE_LEFT;
|
||||
|
||||
auto req = GetTeamRequest(rd.wantsNewServers, rd.priority == SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM, true, false, SERVER_KNOBS->MIN_FREE_SPACE_RATIO, inflightPenalty);
|
||||
double targetFreeSpaceRatio = std::max(SERVER_KNOBS->START_MIN_FREE_SPACE_RATIO - stuckCount*SERVER_KNOBS->MIN_FREE_SPACE_RATIO_INCREMENT, SERVER_KNOBS->END_MIN_FREE_SPACE_RATIO);
|
||||
auto req = GetTeamRequest(rd.wantsNewServers, rd.priority == SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM, true, false, targetFreeSpaceRatio, inflightPenalty);
|
||||
req.completeSources = rd.completeSources;
|
||||
Optional<Reference<IDataDistributionTeam>> bestTeam = wait(brokenPromiseToNever(self->teamCollections[tciIndex].getTeam.getReply(req)));
|
||||
// If a DC has no healthy team, we stop checking the other DCs until
|
||||
|
|
|
@ -102,7 +102,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
|||
init( BG_DD_DECREASE_RATE, 1.02 );
|
||||
init( BG_DD_SATURATION_DELAY, 1.0 );
|
||||
init( INFLIGHT_PENALTY_HEALTHY, 1.0 );
|
||||
init( INFLIGHT_PENALTY_UNHEALTHY, 10.0 );
|
||||
init( INFLIGHT_PENALTY_UNHEALTHY, 500.0 );
|
||||
init( INFLIGHT_PENALTY_ONE_LEFT, 1000.0 );
|
||||
|
||||
init( PRIORITY_RECOVER_MOVE, 110 );
|
||||
|
@ -184,7 +184,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
|||
init( DD_MERGE_COALESCE_DELAY, isSimulated ? 30.0 : 300.0 ); if( randomize && BUGGIFY ) DD_MERGE_COALESCE_DELAY = 0.001;
|
||||
init( STORAGE_METRICS_POLLING_DELAY, 2.0 ); if( randomize && BUGGIFY ) STORAGE_METRICS_POLLING_DELAY = 15.0;
|
||||
init( STORAGE_METRICS_RANDOM_DELAY, 0.2 );
|
||||
init( FREE_SPACE_RATIO_CUTOFF, 0.1 );
|
||||
init( FREE_SPACE_RATIO_CUTOFF, 0.35 );
|
||||
init( FREE_SPACE_RATIO_DD_CUTOFF, 0.2 );
|
||||
init( DESIRED_TEAMS_PER_SERVER, 5 ); if( randomize && BUGGIFY ) DESIRED_TEAMS_PER_SERVER = 1;
|
||||
init( MAX_TEAMS_PER_SERVER, 5*DESIRED_TEAMS_PER_SERVER );
|
||||
|
@ -428,7 +428,9 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
|||
init( MAX_TRANSACTIONS_PER_BYTE, 1000 );
|
||||
|
||||
init( MIN_FREE_SPACE, 1e8 );
|
||||
init( MIN_FREE_SPACE_RATIO, 0.05 );
|
||||
init( START_MIN_FREE_SPACE_RATIO, 0.26 );
|
||||
init( END_MIN_FREE_SPACE_RATIO, 0.05 );
|
||||
init( MIN_FREE_SPACE_RATIO_INCREMENT, 0.03 );
|
||||
|
||||
init( MAX_TL_SS_VERSION_DIFFERENCE, 1e99 ); // if( randomize && BUGGIFY ) MAX_TL_SS_VERSION_DIFFERENCE = std::max(1.0, 0.25 * VERSIONS_PER_SECOND); // spring starts at half this value //FIXME: this knob causes ratekeeper to clamp on idle cluster in simulation that have a large number of logs
|
||||
init( MAX_TL_SS_VERSION_DIFFERENCE_BATCH, 1e99 );
|
||||
|
|
|
@ -151,6 +151,7 @@ public:
|
|||
double STORAGE_METRICS_RANDOM_DELAY;
|
||||
double FREE_SPACE_RATIO_CUTOFF;
|
||||
double FREE_SPACE_RATIO_DD_CUTOFF;
|
||||
double FREE_SPACE_CUTOFF_PENALTY;
|
||||
int DESIRED_TEAMS_PER_SERVER;
|
||||
int MAX_TEAMS_PER_SERVER;
|
||||
int64_t DD_SHARD_SIZE_GRANULARITY;
|
||||
|
@ -367,7 +368,9 @@ public:
|
|||
double MAX_TRANSACTIONS_PER_BYTE;
|
||||
|
||||
int64_t MIN_FREE_SPACE;
|
||||
double MIN_FREE_SPACE_RATIO;
|
||||
double START_MIN_FREE_SPACE_RATIO;
|
||||
double END_MIN_FREE_SPACE_RATIO;
|
||||
double MIN_FREE_SPACE_RATIO_INCREMENT;
|
||||
|
||||
double MAX_TL_SS_VERSION_DIFFERENCE; // spring starts at half this value
|
||||
double MAX_TL_SS_VERSION_DIFFERENCE_BATCH;
|
||||
|
|
|
@ -390,7 +390,7 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
|
|||
|
||||
limitReason_t ssLimitReason = limitReason_t::unlimited;
|
||||
|
||||
int64_t minFreeSpace = std::max(SERVER_KNOBS->MIN_FREE_SPACE, (int64_t)(SERVER_KNOBS->MIN_FREE_SPACE_RATIO * ss.smoothTotalSpace.smoothTotal()));
|
||||
int64_t minFreeSpace = std::max(SERVER_KNOBS->MIN_FREE_SPACE, (int64_t)(SERVER_KNOBS->END_MIN_FREE_SPACE_RATIO * ss.smoothTotalSpace.smoothTotal()));
|
||||
|
||||
worstFreeSpaceStorageServer = std::min(worstFreeSpaceStorageServer, (int64_t)ss.smoothFreeSpace.smoothTotal() - minFreeSpace);
|
||||
|
||||
|
@ -574,7 +574,7 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
|
|||
|
||||
limitReason_t tlogLimitReason = limitReason_t::log_server_write_queue;
|
||||
|
||||
int64_t minFreeSpace = std::max( SERVER_KNOBS->MIN_FREE_SPACE, (int64_t)(SERVER_KNOBS->MIN_FREE_SPACE_RATIO * tl.smoothTotalSpace.smoothTotal()));
|
||||
int64_t minFreeSpace = std::max( SERVER_KNOBS->MIN_FREE_SPACE, (int64_t)(SERVER_KNOBS->END_MIN_FREE_SPACE_RATIO * tl.smoothTotalSpace.smoothTotal()));
|
||||
|
||||
worstFreeSpaceTLog = std::min(worstFreeSpaceTLog, (int64_t)tl.smoothFreeSpace.smoothTotal() - minFreeSpace);
|
||||
|
||||
|
|
Loading…
Reference in New Issue