409 lines
14 KiB
C++
409 lines
14 KiB
C++
/*
|
|
* LoadBalance.actor.h
|
|
*
|
|
* This source file is part of the FoundationDB open source project
|
|
*
|
|
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
// When actually compiled (NO_INTELLISENSE), include the generated version of this file. In intellisense use the source version.
|
|
#if defined(NO_INTELLISENSE) && !defined(FLOW_LOADBALANCE_ACTOR_G_H)
|
|
#define FLOW_LOADBALANCE_ACTOR_G_H
|
|
#include "LoadBalance.actor.g.h"
|
|
#elif !defined(FLOW_LOADBALANCE_ACTOR_H)
|
|
#define FLOW_LOADBALANCE_ACTOR_H
|
|
|
|
#include "flow/flow.h"
|
|
#include "flow/Knobs.h"
|
|
|
|
#include "FailureMonitor.h"
|
|
#include "fdbrpc.h"
|
|
#include "Locality.h"
|
|
#include "QueueModel.h"
|
|
#include "MultiInterface.h"
|
|
|
|
using std::vector;
|
|
|
|
struct ModelHolder : NonCopyable, public ReferenceCounted<ModelHolder> {
|
|
QueueModel* model;
|
|
bool released;
|
|
double startTime;
|
|
double delta;
|
|
uint64_t token;
|
|
|
|
ModelHolder( QueueModel* model, uint64_t token ) : model(model), token(token), released(false), startTime(now()) {
|
|
if(model) {
|
|
delta = model->addRequest(token);
|
|
}
|
|
}
|
|
|
|
void release(bool clean, double penalty, bool measureLatency = true) {
|
|
if(model && !released) {
|
|
released = true;
|
|
double latency = (clean || measureLatency) ? now() - startTime : 0.0;
|
|
model->endRequest(token, latency, penalty, delta, clean);
|
|
}
|
|
}
|
|
|
|
~ModelHolder() {
|
|
release(false, -1.0, false);
|
|
}
|
|
};
|
|
|
|
struct LoadBalancedReply {
|
|
double penalty;
|
|
LoadBalancedReply() : penalty(1.0) {}
|
|
|
|
template <class Ar>
|
|
void serialize(Ar &ar) {
|
|
ar & penalty;
|
|
}
|
|
};
|
|
|
|
Optional<LoadBalancedReply> getLoadBalancedReply(LoadBalancedReply *reply);
|
|
Optional<LoadBalancedReply> getLoadBalancedReply(void*);
|
|
|
|
// Returns true if we got a value for our request
|
|
// Throws an error if the request returned an error that should bubble out
|
|
// Returns false if we got an error that should result in reissuing the request
|
|
template <class T>
|
|
bool checkAndProcessResult(ErrorOr<T> result, Reference<ModelHolder> holder, bool atMostOnce) {
|
|
int errCode = result.isError() ? result.getError().code() : error_code_success;
|
|
bool maybeDelivered = errCode == error_code_broken_promise || errCode == error_code_request_maybe_delivered;
|
|
bool receivedResponse = result.present() || (!maybeDelivered && errCode != error_code_process_behind);
|
|
|
|
Optional<LoadBalancedReply> loadBalancedReply;
|
|
if(!result.isError()) {
|
|
loadBalancedReply = getLoadBalancedReply(&result.get());
|
|
}
|
|
|
|
holder->release(receivedResponse, loadBalancedReply.present() ? loadBalancedReply.get().penalty : -1.0);
|
|
|
|
if(result.present()) {
|
|
return true;
|
|
}
|
|
|
|
if(receivedResponse) {
|
|
throw result.getError();
|
|
}
|
|
|
|
if(atMostOnce && maybeDelivered) {
|
|
throw request_maybe_delivered();
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
ACTOR template <class Request>
|
|
Future<Optional<REPLY_TYPE(Request)>> makeRequest(RequestStream<Request> const* stream, Request request, double backoff, Future<Void> requestUnneeded, QueueModel *model, bool isFirstRequest, bool atMostOnce) {
|
|
if(backoff > 0.0) {
|
|
Void _ = wait(delay(backoff) || requestUnneeded);
|
|
}
|
|
|
|
if(requestUnneeded.isReady()) {
|
|
return Optional<REPLY_TYPE(Request)>();
|
|
}
|
|
|
|
state Reference<ModelHolder> holder(new ModelHolder(model, stream->getEndpoint().token.first()));
|
|
|
|
ErrorOr<REPLY_TYPE(Request)> result = wait(stream->tryGetReply(request));
|
|
if(checkAndProcessResult(result, holder, atMostOnce)) {
|
|
return result.get();
|
|
}
|
|
else {
|
|
return Optional<REPLY_TYPE(Request)>();
|
|
}
|
|
}
|
|
|
|
template <class Reply>
|
|
void addLaggingRequest(Future<Optional<Reply>> reply, Promise<Void> requestFinished, QueueModel *model) {
|
|
requestFinished.send(Void());
|
|
if(!reply.isReady()) {
|
|
if(model) {
|
|
if(model->laggingRequestCount > FLOW_KNOBS->MAX_LAGGING_REQUESTS_OUTSTANDING || model->laggingRequests.isReady()) {
|
|
model->laggingRequests.cancel();
|
|
model->laggingRequestCount = 0;
|
|
model->addActor = PromiseStream<Future<Void>>();
|
|
model->laggingRequests = actorCollection( model->addActor.getFuture(), &model->laggingRequestCount );
|
|
}
|
|
|
|
model->addActor.send(success(errorOr(reply)));
|
|
}
|
|
}
|
|
}
|
|
|
|
// Keep trying to get a reply from any of servers until success or cancellation; tries to take into account
|
|
// failMon's information for load balancing and avoiding failed servers
|
|
// If ALL the servers are failed and the list of servers is not fresh, throws an exception to let the caller refresh the list of servers
|
|
ACTOR template <class Interface, class Request>
|
|
Future< REPLY_TYPE(Request) > loadBalance(
|
|
Reference<MultiInterface<Interface>> alternatives,
|
|
RequestStream<Request> Interface::* channel,
|
|
Request request = Request(),
|
|
int taskID = TaskDefaultPromiseEndpoint,
|
|
bool atMostOnce = false, // if true, throws request_maybe_delivered() instead of retrying automatically
|
|
QueueModel* model = NULL)
|
|
{
|
|
state Future<Optional<REPLY_TYPE(Request)>> firstRequest;
|
|
state Optional<uint64_t> firstRequestEndpoint;
|
|
state Future<Optional<REPLY_TYPE(Request)>> secondRequest;
|
|
state Future<Void> secondDelay = Never();
|
|
|
|
state Promise<Void> requestFinished;
|
|
|
|
setReplyPriority(request, taskID);
|
|
if (!alternatives)
|
|
return Never();
|
|
|
|
ASSERT( alternatives->size() );
|
|
|
|
state int bestAlt = g_random->randomInt(0, alternatives->countBest());
|
|
state int nextAlt = g_random->randomInt(0, std::max(alternatives->size() - 1,1));
|
|
if( nextAlt >= bestAlt )
|
|
nextAlt++;
|
|
|
|
if(model) {
|
|
double bestMetric = 1e9;
|
|
double nextMetric = 1e9;
|
|
double bestTime = 1e9;
|
|
double nextTime = 1e9;
|
|
for(int i=0; i<alternatives->countBest(); i++) {
|
|
RequestStream<Request> const* thisStream = &alternatives->get( i, channel );
|
|
if (!IFailureMonitor::failureMonitor().getState( thisStream->getEndpoint() ).failed) {
|
|
auto& qd = model->getMeasurement(thisStream->getEndpoint().token.first());
|
|
double thisMetric = qd.smoothOutstanding.smoothTotal();
|
|
double thisTime = qd.latency;
|
|
|
|
if(thisMetric < bestMetric) {
|
|
if(i != bestAlt) {
|
|
nextAlt = bestAlt;
|
|
nextMetric = bestMetric;
|
|
nextTime = bestTime;
|
|
}
|
|
bestAlt = i;
|
|
bestMetric = thisMetric;
|
|
bestTime = thisTime;
|
|
} else if( thisMetric < nextMetric ) {
|
|
nextAlt = i;
|
|
nextMetric = thisMetric;
|
|
nextTime = thisTime;
|
|
}
|
|
}
|
|
}
|
|
if( nextMetric > 1e8 ) {
|
|
for(int i=alternatives->countBest(); i<alternatives->size(); i++) {
|
|
RequestStream<Request> const* thisStream = &alternatives->get( i, channel );
|
|
if (!IFailureMonitor::failureMonitor().getState( thisStream->getEndpoint() ).failed) {
|
|
auto& qd = model->getMeasurement(thisStream->getEndpoint().token.first());
|
|
double thisMetric = qd.smoothOutstanding.smoothTotal();
|
|
double thisTime = qd.latency;
|
|
|
|
if( thisMetric < nextMetric ) {
|
|
nextAlt = i;
|
|
nextMetric = thisMetric;
|
|
nextTime = thisTime;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if(nextTime < 1e9) {
|
|
if(bestTime > FLOW_KNOBS->INSTANT_SECOND_REQUEST_MULTIPLIER*(model->secondMultiplier*(nextTime) + FLOW_KNOBS->BASE_SECOND_REQUEST_TIME)) {
|
|
secondDelay = Void();
|
|
} else {
|
|
secondDelay = delay( model->secondMultiplier*nextTime + FLOW_KNOBS->BASE_SECOND_REQUEST_TIME );
|
|
}
|
|
}
|
|
else {
|
|
secondDelay = Never();
|
|
}
|
|
}
|
|
|
|
state int startAlt = nextAlt;
|
|
state int startDistance = (bestAlt+alternatives->size()-startAlt) % alternatives->size();
|
|
|
|
state int numAttempts = 0;
|
|
state double backoff = 0;
|
|
loop {
|
|
// Find an alternative, if any, that is not failed, starting with nextAlt
|
|
state RequestStream<Request> const* stream = NULL;
|
|
for(int alternativeNum=0; alternativeNum<alternatives->size(); alternativeNum++) {
|
|
int useAlt = nextAlt;
|
|
if( nextAlt == startAlt )
|
|
useAlt = bestAlt;
|
|
else if( (nextAlt+alternatives->size()-startAlt) % alternatives->size() <= startDistance )
|
|
useAlt = (nextAlt+alternatives->size()-1) % alternatives->size();
|
|
|
|
stream = &alternatives->get( useAlt, channel );
|
|
if (!IFailureMonitor::failureMonitor().getState( stream->getEndpoint() ).failed && (!firstRequestEndpoint.present() || stream->getEndpoint().token.first() != firstRequestEndpoint.get()))
|
|
break;
|
|
nextAlt = (nextAlt+1) % alternatives->size();
|
|
stream=NULL;
|
|
}
|
|
|
|
if(!stream && !firstRequest.isValid() ) {
|
|
// Everything is down! Wait for someone to be up.
|
|
if(now() - g_network->networkMetrics.newestAlternativesFailure > FLOW_KNOBS->ALTERNATIVES_FAILURE_RESET_TIME) {
|
|
g_network->networkMetrics.oldestAlternativesFailure = now();
|
|
}
|
|
|
|
double serversValidTime = alternatives->getRetrievedAt();
|
|
double minDelay = std::min(FLOW_KNOBS->CACHE_REFRESH_INTERVAL_WHEN_ALL_ALTERNATIVES_FAILED - (now() - serversValidTime), FLOW_KNOBS->ALTERNATIVES_FAILURE_MIN_DELAY);
|
|
double delay = std::max(std::min((now()-g_network->networkMetrics.oldestAlternativesFailure)*FLOW_KNOBS->ALTERNATIVES_FAILURE_DELAY_RATIO, FLOW_KNOBS->ALTERNATIVES_FAILURE_MAX_DELAY), minDelay);
|
|
|
|
if(serversValidTime == ALWAYS_FRESH)
|
|
delay = ALWAYS_FRESH;
|
|
|
|
// Making this SevWarn means a lot of clutter
|
|
if(now() - g_network->networkMetrics.newestAlternativesFailure > 1 || g_random->random01() < 0.01) {
|
|
TraceEvent("AllAlternativesFailed")
|
|
.detail("Interval", FLOW_KNOBS->CACHE_REFRESH_INTERVAL_WHEN_ALL_ALTERNATIVES_FAILED)
|
|
.detail("ServersValidTime", serversValidTime)
|
|
.detail("Alternatives", alternatives->description())
|
|
.detail("Delay", delay);
|
|
}
|
|
|
|
g_network->networkMetrics.newestAlternativesFailure = now();
|
|
|
|
if (delay < 0) {
|
|
throw all_alternatives_failed();
|
|
}
|
|
vector<Future<Void>> ok( alternatives->size() );
|
|
for(int i=0; i<ok.size(); i++)
|
|
ok[i] = IFailureMonitor::failureMonitor().onStateEqual( alternatives->get(i, channel).getEndpoint(), FailureStatus(false) );
|
|
choose {
|
|
when ( Void _ = wait( quorum( ok, 1 ) ) ) {}
|
|
when ( Void _ = wait( ::delayJittered( delay ) ) ) {
|
|
throw all_alternatives_failed();
|
|
}
|
|
}
|
|
|
|
numAttempts = 0; // now that we've got a server back, reset the backoff
|
|
} else if(!stream) {
|
|
//Only the first location is available.
|
|
Optional<REPLY_TYPE(Request)> result = wait( firstRequest );
|
|
if(result.present()) {
|
|
return result.get();
|
|
}
|
|
|
|
firstRequest = Future<Optional<REPLY_TYPE(Request)>>();
|
|
firstRequestEndpoint = Optional<uint64_t>();
|
|
} else if( firstRequest.isValid() ) {
|
|
//Issue a second request, the first one is taking a long time.
|
|
secondRequest = makeRequest(stream, request, backoff, requestFinished.getFuture(), model, false, atMostOnce);
|
|
state bool firstFinished = false;
|
|
|
|
loop {
|
|
choose {
|
|
when(ErrorOr<Optional<REPLY_TYPE(Request)>> result = wait( firstRequest.isValid() ? errorOr(firstRequest) : Never() )) {
|
|
if(result.isError() || result.get().present()) {
|
|
addLaggingRequest(secondRequest, requestFinished, model);
|
|
if(result.isError()) {
|
|
throw result.getError();
|
|
}
|
|
else {
|
|
return result.get().get();
|
|
}
|
|
}
|
|
|
|
firstRequest = Future<Optional<REPLY_TYPE(Request)>>();
|
|
firstRequestEndpoint = Optional<uint64_t>();
|
|
firstFinished = true;
|
|
}
|
|
when(ErrorOr<Optional<REPLY_TYPE(Request)>> result = wait( errorOr(secondRequest) )) {
|
|
if(result.isError() || result.get().present()) {
|
|
if(!firstFinished) {
|
|
addLaggingRequest(firstRequest, requestFinished, model);
|
|
}
|
|
if(result.isError()) {
|
|
throw result.getError();
|
|
}
|
|
else {
|
|
return result.get().get();
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if(++numAttempts >= alternatives->size()) {
|
|
backoff = std::min(FLOW_KNOBS->LOAD_BALANCE_MAX_BACKOFF, std::max(FLOW_KNOBS->LOAD_BALANCE_START_BACKOFF, backoff * FLOW_KNOBS->LOAD_BALANCE_BACKOFF_RATE));
|
|
}
|
|
} else {
|
|
//Issue a request, if it takes too long to get a reply, go around the loop
|
|
firstRequest = makeRequest(stream, request, backoff, requestFinished.getFuture(), model, true, atMostOnce);
|
|
firstRequestEndpoint = stream->getEndpoint().token.first();
|
|
|
|
loop {
|
|
choose {
|
|
when(ErrorOr<Optional<REPLY_TYPE(Request)>> result = wait( errorOr(firstRequest) )) {
|
|
if(model) {
|
|
model->secondMultiplier = std::max(model->secondMultiplier-FLOW_KNOBS->SECOND_REQUEST_MULTIPLIER_DECAY, 1.0);
|
|
model->secondBudget = std::min(model->secondBudget+FLOW_KNOBS->SECOND_REQUEST_BUDGET_GROWTH, FLOW_KNOBS->SECOND_REQUEST_MAX_BUDGET);
|
|
}
|
|
|
|
if(result.isError()) {
|
|
throw result.getError();
|
|
}
|
|
|
|
if(result.get().present()) {
|
|
return result.get().get();
|
|
}
|
|
|
|
firstRequest = Future<Optional<REPLY_TYPE(Request)>>();
|
|
firstRequestEndpoint = Optional<uint64_t>();
|
|
break;
|
|
}
|
|
when(Void _ = wait(secondDelay)) {
|
|
secondDelay = Never();
|
|
if(model && model->secondBudget >= 1.0) {
|
|
model->secondMultiplier += FLOW_KNOBS->SECOND_REQUEST_MULTIPLIER_GROWTH;
|
|
model->secondBudget -= 1.0;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if(++numAttempts >= alternatives->size()) {
|
|
backoff = std::min(FLOW_KNOBS->LOAD_BALANCE_MAX_BACKOFF, std::max(FLOW_KNOBS->LOAD_BALANCE_START_BACKOFF, backoff * FLOW_KNOBS->LOAD_BALANCE_BACKOFF_RATE));
|
|
}
|
|
}
|
|
|
|
nextAlt = (nextAlt+1) % alternatives->size();
|
|
resetReply(request, taskID);
|
|
secondDelay = Never();
|
|
}
|
|
}
|
|
|
|
// This wrapper is just to help the compiler accept the coercesion to Reference<Multinterface>
|
|
template <class Interface, class Request, class Multi>
|
|
inline Future< REPLY_TYPE(Request) > loadBalance(
|
|
Reference<Multi> alternatives,
|
|
RequestStream<Request> Interface::* channel,
|
|
Request request = Request(),
|
|
int taskID = TaskDefaultPromiseEndpoint,
|
|
bool atMostOnce = false,
|
|
QueueModel* model = NULL)
|
|
{
|
|
return loadBalance( Reference<MultiInterface<Interface>>(alternatives), channel, request, taskID, atMostOnce, model );
|
|
}
|
|
|
|
#endif
|