Merge pull request #1817 from etschannen/feature-proxy-forward

Proxies will forward clients to the next generation
This commit is contained in:
Evan Tschannen 2019-07-10 13:53:12 -07:00 committed by GitHub
commit 7e919e361c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 165 additions and 149 deletions

View File

@ -8,7 +8,6 @@ set(FDBCLIENT_SRCS
BackupContainer.actor.cpp
BackupContainer.h
BlobStore.actor.cpp
ClientDBInfo.h
ClientLogEvents.h
ClientWorkerInterface.h
ClusterInterface.h

View File

@ -1,49 +0,0 @@
/*
* ClientDBInfo.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FDBCLIENT_CLIENTDBINFO_H
#define FDBCLIENT_CLIENTDBINFO_H
#pragma once
#include "fdbclient/MasterProxyInterface.h"
// ClientDBInfo is all the information needed by a database client to access the database
// It is returned (and kept up to date) by the OpenDatabaseRequest interface of ClusterInterface
struct ClientDBInfo {
constexpr static FileIdentifier file_identifier = 5355080;
UID id; // Changes each time anything else changes
vector< MasterProxyInterface > proxies;
double clientTxnInfoSampleRate;
int64_t clientTxnInfoSizeLimit;
ClientDBInfo() : clientTxnInfoSampleRate(std::numeric_limits<double>::infinity()), clientTxnInfoSizeLimit(-1) {}
bool operator == (ClientDBInfo const& r) const { return id == r.id; }
bool operator != (ClientDBInfo const& r) const { return id != r.id; }
template <class Archive>
void serialize(Archive& ar) {
if constexpr (!is_fb_function<Archive>) {
ASSERT(ar.protocolVersion().isValid());
}
serializer(ar, proxies, id, clientTxnInfoSampleRate, clientTxnInfoSizeLimit);
}
};
#endif

View File

@ -25,7 +25,7 @@
#include "fdbclient/FDBTypes.h"
#include "fdbrpc/FailureMonitor.h"
#include "fdbclient/Status.h"
#include "fdbclient/ClientDBInfo.h"
#include "fdbclient/MasterProxyInterface.h"
// Streams from WorkerInterface that are safe and useful to call from a client.
// A ClientWorkerInterface is embedded as the first element of a WorkerInterface.

View File

@ -25,7 +25,7 @@
#include "fdbclient/FDBTypes.h"
#include "fdbrpc/FailureMonitor.h"
#include "fdbclient/Status.h"
#include "fdbclient/ClientDBInfo.h"
#include "fdbclient/MasterProxyInterface.h"
#include "fdbclient/ClientWorkerInterface.h"
struct ClusterInterface {

View File

@ -25,7 +25,6 @@
#include "fdbclient/NativeAPI.actor.h"
#include "fdbclient/KeyRangeMap.h"
#include "fdbclient/MasterProxyInterface.h"
#include "fdbclient/ClientDBInfo.h"
#include "fdbrpc/QueueModel.h"
#include "fdbrpc/MultiInterface.h"
#include "flow/TDMetric.actor.h"

View File

@ -75,7 +75,39 @@ struct MasterProxyInterface {
}
};
struct CommitID {
// ClientDBInfo is all the information needed by a database client to access the database
// It is returned (and kept up to date) by the OpenDatabaseRequest interface of ClusterInterface
struct ClientDBInfo {
constexpr static FileIdentifier file_identifier = 5355080;
UID id; // Changes each time anything else changes
vector< MasterProxyInterface > proxies;
double clientTxnInfoSampleRate;
int64_t clientTxnInfoSizeLimit;
ClientDBInfo() : clientTxnInfoSampleRate(std::numeric_limits<double>::infinity()), clientTxnInfoSizeLimit(-1) {}
bool operator == (ClientDBInfo const& r) const { return id == r.id; }
bool operator != (ClientDBInfo const& r) const { return id != r.id; }
template <class Archive>
void serialize(Archive& ar) {
if constexpr (!is_fb_function<Archive>) {
ASSERT(ar.protocolVersion().isValid());
}
serializer(ar, proxies, id, clientTxnInfoSampleRate, clientTxnInfoSizeLimit);
}
};
struct ProxyForwardReply {
Optional<ClientDBInfo> newClientInfo;
ProxyForwardReply() {}
template <class Ar>
void serialize(Ar &ar) {
serializer(ar, newClientInfo);
}
};
struct CommitID : public ProxyForwardReply {
constexpr static FileIdentifier file_identifier = 14254927;
Version version; // returns invalidVersion if transaction conflicts
uint16_t txnBatchId;
@ -83,7 +115,7 @@ struct CommitID {
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, version, txnBatchId, metadataVersion);
serializer(ar, *(ProxyForwardReply*)this, version, txnBatchId, metadataVersion);
}
CommitID() : version(invalidVersion), txnBatchId(0) {}
@ -127,7 +159,7 @@ static inline int getBytes( CommitTransactionRequest const& r ) {
return total;
}
struct GetReadVersionReply {
struct GetReadVersionReply : public ProxyForwardReply {
constexpr static FileIdentifier file_identifier = 15709388;
Version version;
bool locked;
@ -135,7 +167,7 @@ struct GetReadVersionReply {
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, version, locked, metadataVersion);
serializer(ar, *(ProxyForwardReply*)this, version, locked, metadataVersion);
}
};
@ -169,14 +201,14 @@ struct GetReadVersionRequest : TimedRequest {
}
};
struct GetKeyServerLocationsReply {
struct GetKeyServerLocationsReply : public ProxyForwardReply {
constexpr static FileIdentifier file_identifier = 10636023;
Arena arena;
std::vector<std::pair<KeyRangeRef, vector<StorageServerInterface>>> results;
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, results, arena);
serializer(ar, *(ProxyForwardReply*)this, results, arena);
}
};

View File

@ -578,19 +578,20 @@ ACTOR static Future<Void> monitorClientInfo( Reference<AsyncVar<Optional<Cluster
choose {
when( ClientDBInfo ni = wait( clusterInterface->get().present() ? brokenPromiseToNever( clusterInterface->get().get().openDatabase.getReply( req ) ) : Never() ) ) {
TraceEvent("ClientInfoChange").detail("ChangeID", ni.id);
outInfo->set(ni);
if (ni.proxies.empty()) {
TraceEvent("ClientInfo_NoProxiesReturned").detail("ChangeID", ni.id);
continue;
loop {
TraceEvent("ClientInfoChange").detail("ChangeID", outInfo->get().id);
if (outInfo->get().proxies.empty()) {
TraceEvent("ClientInfo_NoProxiesReturned").detail("ChangeID", outInfo->get().id);
break;
} else if (!FlowTransport::transport().isClient()) {
continue;
break;
}
vector<Future<Void>> onProxyFailureVec;
state vector<Future<Void>> onProxyFailureVec;
bool skipWaitForProxyFail = false;
for (const auto& proxy : ni.proxies) {
for (const auto& proxy : outInfo->get().proxies) {
if (proxy.provisional) {
skipWaitForProxyFail = true;
break;
@ -605,11 +606,16 @@ ACTOR static Future<Void> monitorClientInfo( Reference<AsyncVar<Optional<Cluster
IFailureMonitor::failureMonitor().onStateEqual(
proxy.getStorageServerRejoinInfo.getEndpoint(), FailureStatus()));
}
if (skipWaitForProxyFail) continue;
if (skipWaitForProxyFail) break;
leaderMon = Void();
wait(waitForAny(onProxyFailureVec));
state Future<Void> anyFailures = waitForAny(onProxyFailureVec);
wait(anyFailures || outInfo->onChange());
if(anyFailures.isReady()) {
leaderMon = ccf ? monitorLeader(ccf, clusterInterface) : Void();
break;
}
}
}
when( wait( clusterInterface->onChange() ) ) {
if(clusterInterface->get().present())
@ -1244,6 +1250,10 @@ ACTOR Future< pair<KeyRange,Reference<LocationInfo>> > getKeyLocation_internal(
choose {
when ( wait( cx->onMasterProxiesChanged() ) ) {}
when ( GetKeyServerLocationsReply rep = wait( loadBalance( cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::getKeyServersLocations, GetKeyServerLocationsRequest(key, Optional<KeyRef>(), 100, isBackward, key.arena()), TaskPriority::DefaultPromiseEndpoint ) ) ) {
if(rep.newClientInfo.present()) {
cx->clientInfo->set(rep.newClientInfo.get());
continue;
}
if( info.debugID.present() )
g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getKeyLocation.After");
ASSERT( rep.results.size() == 1 );
@ -1281,6 +1291,11 @@ ACTOR Future< vector< pair<KeyRange,Reference<LocationInfo>> > > getKeyRangeLoca
choose {
when ( wait( cx->onMasterProxiesChanged() ) ) {}
when ( GetKeyServerLocationsReply _rep = wait( loadBalance( cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::getKeyServersLocations, GetKeyServerLocationsRequest(keys.begin, keys.end, limit, reverse, keys.arena()), TaskPriority::DefaultPromiseEndpoint ) ) ) {
if(_rep.newClientInfo.present()) {
cx->clientInfo->set(_rep.newClientInfo.get());
continue;
}
state GetKeyServerLocationsReply rep = _rep;
if( info.debugID.present() )
g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getKeyLocations.After");
@ -1493,6 +1508,11 @@ ACTOR Future<Version> waitForCommittedVersion( Database cx, Version version ) {
choose {
when ( wait( cx->onMasterProxiesChanged() ) ) {}
when ( GetReadVersionReply v = wait( loadBalance( cx->getMasterProxies(false), &MasterProxyInterface::getConsistentReadVersion, GetReadVersionRequest( 0, GetReadVersionRequest::PRIORITY_SYSTEM_IMMEDIATE ), cx->taskID ) ) ) {
if(v.newClientInfo.present()) {
cx->clientInfo->set(v.newClientInfo.get());
continue;
}
if (v.version >= version)
return v.version;
// SOMEDAY: Do the wait on the server side, possibly use less expensive source of committed version (causal consistency is not needed for this purpose)
@ -2701,6 +2721,11 @@ ACTOR static Future<Void> tryCommit( Database cx, Reference<TransactionLogInfo>
throw request_maybe_delivered();
}
when (CommitID ci = wait( reply )) {
if(ci.newClientInfo.present()) {
cx->clientInfo->set(ci.newClientInfo.get());
throw not_committed();
}
Version v = ci.version;
if (v != invalidVersion) {
if (info.debugID.present())
@ -3032,6 +3057,10 @@ ACTOR Future<GetReadVersionReply> getConsistentReadVersion( DatabaseContext *cx,
choose {
when ( wait( cx->onMasterProxiesChanged() ) ) {}
when ( GetReadVersionReply v = wait( loadBalance( cx->getMasterProxies(flags & GetReadVersionRequest::FLAG_USE_PROVISIONAL_PROXIES), &MasterProxyInterface::getConsistentReadVersion, req, cx->taskID ) ) ) {
if(v.newClientInfo.present()) {
cx->clientInfo->set(v.newClientInfo.get());
continue;
}
if( debugID.present() )
g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), "NativeAPI.getConsistentReadVersion.After");
ASSERT( v.version > 0 );

View File

@ -31,7 +31,6 @@
<EnableCompile Condition="'$(Configuration)|$(Platform)'=='Release|X64'">false</EnableCompile>
</ActorCompiler>
<ClInclude Include="BlobStore.h" />
<ClInclude Include="ClientDBInfo.h" />
<ClInclude Include="ClientLogEvents.h" />
<ClInclude Include="ClientWorkerInterface.h" />
<ClInclude Include="ClusterInterface.h" />

View File

@ -283,6 +283,8 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
init( PROXY_SPIN_DELAY, 0.01 );
init( UPDATE_REMOTE_LOG_VERSION_INTERVAL, 2.0 );
init( MAX_TXS_POP_VERSION_HISTORY, 1e5 );
init( PROXY_FORWARD_DELAY, 10.0 );
init( MAX_FORWARD_MESSAGES, 1e6 ); if( randomize && BUGGIFY ) MAX_FORWARD_MESSAGES = 10;
// Master Server
// masterCommitter() in the master server will allow lower priority tasks (e.g. DataDistibution)

View File

@ -228,6 +228,8 @@ public:
double PROXY_SPIN_DELAY;
double UPDATE_REMOTE_LOG_VERSION_INTERVAL;
int MAX_TXS_POP_VERSION_HISTORY;
double PROXY_FORWARD_DELAY;
int MAX_FORWARD_MESSAGES;
// Master Server
double COMMIT_SLEEP_TIME;

View File

@ -251,7 +251,7 @@ Future<Void> ILogSystem::ServerPeekCursor::getMore(TaskPriority taskID) {
ACTOR Future<Void> serverPeekOnFailed( ILogSystem::ServerPeekCursor* self ) {
loop {
choose {
when( wait( self->interf->get().present() ? IFailureMonitor::failureMonitor().onDisconnectOrFailure( self->interf->get().interf().peekMessages.getEndpoint() ) : Never() ) ) { return Void(); }
when( wait( self->interf->get().present() ? IFailureMonitor::failureMonitor().onStateEqual( self->interf->get().interf().peekMessages.getEndpoint(), FailureStatus() ) : Never() ) ) { return Void(); }
when( wait( self->interf->onChange() ) ) {}
}
}

View File

@ -965,7 +965,7 @@ ACTOR Future<Void> commitBatch(
break;
}
when(GetReadVersionReply v = wait(self->getConsistentReadVersion.getReply(GetReadVersionRequest(0, GetReadVersionRequest::PRIORITY_SYSTEM_IMMEDIATE | GetReadVersionRequest::FLAG_CAUSAL_READ_RISKY)))) {
if(v.version > self->committedVersion.get()) {
if(!v.newClientInfo.present() && v.version > self->committedVersion.get()) {
self->locked = v.locked;
self->metadataVersion = v.metadataVersion;
self->committedVersion.set(v.version);
@ -1782,33 +1782,85 @@ ACTOR Future<Void> masterProxyServerCore(
ACTOR Future<Void> checkRemoved(Reference<AsyncVar<ServerDBInfo>> db, uint64_t recoveryCount, MasterProxyInterface myInterface) {
loop{
if (db->get().recoveryCount >= recoveryCount && !std::count(db->get().client.proxies.begin(), db->get().client.proxies.end(), myInterface))
if (db->get().recoveryCount >= recoveryCount && !std::count(db->get().client.proxies.begin(), db->get().client.proxies.end(), myInterface)) {
throw worker_removed();
}
wait(db->onChange());
}
}
ACTOR template <class X> Future<Void> stripRequests( RequestStream<X> in, PromiseStream<ReplyPromise<REPLY_TYPE(X)>> out, int* count) {
loop {
X req = waitNext(in.getFuture());
out.send(req.reply);
if((*count) >= 0 && ++(*count) >= SERVER_KNOBS->MAX_FORWARD_MESSAGES) {
TraceEvent(SevWarnAlways, "TooManyProxyForwardRequests");
return Void();
}
}
}
ACTOR Future<Void> forwardProxy(ClientDBInfo info, PromiseStream<ReplyPromise<CommitID>> commitReplies, PromiseStream<ReplyPromise<GetReadVersionReply>> grvReplies, PromiseStream<ReplyPromise<GetKeyServerLocationsReply>> locationReplies) {
loop {
choose {
when(ReplyPromise<CommitID> req = waitNext(commitReplies.getFuture())) {
CommitID rep;
rep.newClientInfo = info;
req.send(rep);
}
when(ReplyPromise<GetReadVersionReply> req = waitNext(grvReplies.getFuture())) {
GetReadVersionReply rep;
rep.newClientInfo = info;
req.send(rep);
}
when(ReplyPromise<GetKeyServerLocationsReply> req = waitNext(locationReplies.getFuture())) {
GetKeyServerLocationsReply rep;
rep.newClientInfo = info;
req.send(rep);
}
}
wait(yield());
}
}
ACTOR Future<Void> masterProxyServer(
MasterProxyInterface proxy,
InitializeMasterProxyRequest req,
Reference<AsyncVar<ServerDBInfo>> db,
std::string whitelistBinPaths)
{
state Future<Void> core;
try {
state Future<Void> core = masterProxyServerCore(proxy, req.master, db, req.recoveryCount, req.recoveryTransactionVersion, req.firstProxy, whitelistBinPaths);
loop choose{
when(wait(core)) { return Void(); }
when(wait(checkRemoved(db, req.recoveryCount, proxy))) {}
}
core = masterProxyServerCore(proxy, req.master, db, req.recoveryCount, req.recoveryTransactionVersion, req.firstProxy, whitelistBinPaths);
wait(core || checkRemoved(db, req.recoveryCount, proxy));
}
catch (Error& e) {
if (e.code() == error_code_actor_cancelled || e.code() == error_code_worker_removed || e.code() == error_code_tlog_stopped ||
e.code() == error_code_master_tlog_failed || e.code() == error_code_coordinators_changed || e.code() == error_code_coordinated_state_conflict ||
e.code() == error_code_new_coordinators_timed_out)
{
TraceEvent("MasterProxyTerminated", proxy.id()).error(e, true);
return Void();
}
if (e.code() != error_code_worker_removed && e.code() != error_code_tlog_stopped &&
e.code() != error_code_master_tlog_failed && e.code() != error_code_coordinators_changed &&
e.code() != error_code_coordinated_state_conflict && e.code() != error_code_new_coordinators_timed_out) {
throw;
}
}
core.cancel();
state PromiseStream<ReplyPromise<CommitID>> commitReplies;
state PromiseStream<ReplyPromise<GetReadVersionReply>> grvReplies;
state PromiseStream<ReplyPromise<GetKeyServerLocationsReply>> locationReplies;
state int replyCount = 0;
state Future<Void> finishForward = delay(SERVER_KNOBS->PROXY_FORWARD_DELAY) || stripRequests(proxy.commit, commitReplies, &replyCount) || stripRequests(proxy.getConsistentReadVersion, grvReplies, &replyCount) || stripRequests(proxy.getKeyServersLocations, locationReplies, &replyCount);
proxy = MasterProxyInterface();
loop {
if(finishForward.isReady()) {
return Void();
}
if(db->get().client.proxies.size() > 0 && !db->get().client.proxies[0].provisional && db->get().recoveryCount >= req.recoveryCount
&& !std::count(db->get().client.proxies.begin(), db->get().client.proxies.end(), proxy)) {
replyCount = -1;
core = forwardProxy(db->get().client, commitReplies, grvReplies, locationReplies);
wait(finishForward);
return Void();
}
wait(db->onChange() || finishForward);
}
}

View File

@ -1357,7 +1357,7 @@ ACTOR Future<Void> masterCore( Reference<MasterData> self ) {
// SOMEDAY: For faster recovery, do this and setDBState asynchronously and don't wait for them
// unless we want to change TLogs
wait((success(recoveryCommit) && sendInitialCommitToResolvers(self)) );
if(recoveryCommit.isReady() && recoveryCommit.get().isError()) {
if(recoveryCommit.isReady() && ( recoveryCommit.get().isError() || recoveryCommit.get().get().newClientInfo.present() )) {
TEST(true); // Master recovery failed because of the initial commit failed
throw master_recovery_failed();
}

View File

@ -129,6 +129,9 @@ ACTOR Future<Void> handleIOErrors( Future<Void> actor, IClosable* store, UID id,
} else {
wait(onClosed);
}
if(e.isError() && e.getError().code() == error_code_broken_promise && !storeError.isReady()) {
wait(delay(0.00001 + FLOW_KNOBS->MAX_BUGGIFIED_DELAY));
}
if(storeError.isReady()) throw storeError.get().getError();
if (e.isError()) throw e.getError(); else return e.get();
}

View File

@ -354,7 +354,7 @@ struct ConsistencyCheckWorkload : TestWorkload
ErrorOr<GetKeyServerLocationsReply> shards = keyServerLocationFutures[i].get();
//If performing quiescent check, then all master proxies should be reachable. Otherwise, only one needs to be reachable
if (self->performQuiescentChecks && !shards.present())
if (self->performQuiescentChecks && (!shards.present() || shards.get().newClientInfo.present()))
{
TraceEvent("ConsistencyCheck_MasterProxyUnavailable").detail("MasterProxyID", proxyInfo->getId(i));
self->testFailure("Master proxy unavailable");
@ -363,7 +363,7 @@ struct ConsistencyCheckWorkload : TestWorkload
//Get the list of shards if one was returned. If not doing a quiescent check, we can break if it is.
//If we are doing a quiescent check, then we only need to do this for the first shard.
if (shards.present() && !keyServersInsertedForThisIteration)
if (shards.present() && !shards.get().newClientInfo.present() && !keyServersInsertedForThisIteration)
{
keyServers.insert(keyServers.end(), shards.get().results.begin(), shards.get().results.end());
keyServersInsertedForThisIteration = true;

View File

@ -1,45 +0,0 @@
testTitle=SnapTestPre
;write 1000 Keys ending with even numbers
testName=SnapTest
numSnaps=1
maxSnapDelay=3.0
testID=0
clearAfterTest=false
testTitle=SnapTestTakeSnap
;Take snap and do read/write
testName=ReadWrite
testDuration=10.0
transactionsPerSecond=10000
writesPerTransactionA=0
readsPerTransactionA=10
writesPerTransactionB=10
readsPerTransactionB=1
alpha=0.5
nodeCount=100000
valueBytes=16
discardEdgeMeasurements=false
testName=SnapTest
numSnaps=1
maxSnapDelay=10.0
testID=1
clearAfterTest=false
testName=Attrition
testDuration=10.0
testTitle=SnapTestPost
;write 1000 Keys ending with odd numbers
testName=SnapTest
numSnaps=1
maxSnapDelay=25.0
testID=2
clearAfterTest=false
; save and shutdown
testTitle=SnapSimpleShutdown
testName=SaveAndKill
restartInfoLocation=simfdb/restartInfo.ini
testDuration=10.0
isRestoring=1

View File

@ -1,7 +0,0 @@
; verify all keys are even numbered
testTitle=SnapTestVerify
testName=SnapTest
numSnaps=1
maxSnapDelay=3.0
testID=3
restartInfoLocation=simfdb/restartInfo.ini