allow one requests every second to skip there all_alteratives_failed delay, because if a client has a timeout longer than the delay we will never invalidate the key servers cache

This commit is contained in:
Evan Tschannen 2019-08-09 13:03:40 -07:00
parent 36b7b98d3c
commit da8163fd5a
5 changed files with 13 additions and 4 deletions

View File

@ -39,6 +39,7 @@ Fixes
* Under certain conditions, cross region replication could stall for 10 minute periods. `(PR #1818) <https://github.com/apple/foundationdb/pull/1818>`_.
* In very rare scenarios, master recovery would restart because system metadata was loaded incorrectly. `(PR #1919) <https://github.com/apple/foundationdb/pull/1919>`_.
* Ratekeeper will aggressively throttle when unable to fetch the list of storage servers for a considerable period of time. `(PR #1858) <https://github.com/apple/foundationdb/pull/1858>`_.
* Proxies could become overloaded when all storage servers on a team fail. [6.2.1] `(PR #1976) <https://github.com/apple/foundationdb/pull/1976>`_.
Status
------

View File

@ -298,10 +298,15 @@ Future< REPLY_TYPE(Request) > loadBalance(
if(now() - g_network->networkMetrics.newestAlternativesFailure > FLOW_KNOBS->ALTERNATIVES_FAILURE_RESET_TIME) {
g_network->networkMetrics.oldestAlternativesFailure = now();
}
double elapsed = now()-g_network->networkMetrics.oldestAlternativesFailure;
double delay = std::min(elapsed*FLOW_KNOBS->ALTERNATIVES_FAILURE_DELAY_RATIO, FLOW_KNOBS->ALTERNATIVES_FAILURE_MAX_DELAY);
delay = std::max(delay, std::min(elapsed*FLOW_KNOBS->ALTERNATIVES_FAILURE_SLOW_DELAY_RATIO, FLOW_KNOBS->ALTERNATIVES_FAILURE_SLOW_MAX_DELAY));
delay = std::max(delay, FLOW_KNOBS->ALTERNATIVES_FAILURE_MIN_DELAY);
double delay = FLOW_KNOBS->ALTERNATIVES_FAILURE_MIN_DELAY;
if(now() - g_network->networkMetrics.lastAlternativesFailureSkipDelay > FLOW_KNOBS->ALTERNATIVES_FAILURE_SKIP_DELAY) {
g_network->networkMetrics.lastAlternativesFailureSkipDelay = now();
} else {
double elapsed = now()-g_network->networkMetrics.oldestAlternativesFailure;
delay = std::max(delay, std::min(elapsed*FLOW_KNOBS->ALTERNATIVES_FAILURE_DELAY_RATIO, FLOW_KNOBS->ALTERNATIVES_FAILURE_MAX_DELAY));
delay = std::max(delay, std::min(elapsed*FLOW_KNOBS->ALTERNATIVES_FAILURE_SLOW_DELAY_RATIO, FLOW_KNOBS->ALTERNATIVES_FAILURE_SLOW_MAX_DELAY));
}
// Making this SevWarn means a lot of clutter
if(now() - g_network->networkMetrics.newestAlternativesFailure > 1 || deterministicRandom()->random01() < 0.01) {

View File

@ -168,6 +168,7 @@ FlowKnobs::FlowKnobs(bool randomize, bool isSimulated) {
init( ALTERNATIVES_FAILURE_MAX_DELAY, 1.0 );
init( ALTERNATIVES_FAILURE_SLOW_DELAY_RATIO, 0.04 );
init( ALTERNATIVES_FAILURE_SLOW_MAX_DELAY, 30.0 );
init( ALTERNATIVES_FAILURE_SKIP_DELAY, 1.0 );
init( FUTURE_VERSION_INITIAL_BACKOFF, 1.0 );
init( FUTURE_VERSION_MAX_BACKOFF, 8.0 );
init( FUTURE_VERSION_BACKOFF_GROWTH, 2.0 );

View File

@ -190,6 +190,7 @@ public:
double ALTERNATIVES_FAILURE_MAX_DELAY;
double ALTERNATIVES_FAILURE_SLOW_DELAY_RATIO;
double ALTERNATIVES_FAILURE_SLOW_MAX_DELAY;
double ALTERNATIVES_FAILURE_SKIP_DELAY;
double FUTURE_VERSION_INITIAL_BACKOFF;
double FUTURE_VERSION_MAX_BACKOFF;
double FUTURE_VERSION_BACKOFF_GROWTH;

View File

@ -305,6 +305,7 @@ struct NetworkMetrics {
double oldestAlternativesFailure;
double newestAlternativesFailure;
double lastAlternativesFailureSkipDelay;
double lastSync;
double secSquaredSubmit;