Revert "CDs reject requests from external cluster or requests need to be forwarded based on an old forward request."

This commit is contained in:
A.J. Beamon 2021-02-05 13:05:33 -08:00 committed by GitHub
parent 8091d8179d
commit 5e80e16802
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 18 additions and 121 deletions

View File

@ -428,18 +428,12 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
// (they are always derived from a ClusterConnectionString key).
// Forwarding values are stored in this range:
const KeyRangeRef fwdKeys( LiteralStringRef( "\xff" "fwd" ), LiteralStringRef( "\xff" "fwe" ) );
// Forwarding time are stored in this range:
const KeyRangeRef fwdTimeKeys(LiteralStringRef("\xff"
"fwdTime"),
LiteralStringRef("\xff"
"fwdTimf"));
struct LeaderRegisterCollection {
// SOMEDAY: Factor this into a generic tool? Extend ActorCollection to support removal actions? What?
ActorCollection actors;
Map<Key, LeaderElectionRegInterface> registerInterfaces;
Map<Key, LeaderInfo> forward;
Map<Key, double> forwardStartTime;
OnDemandStore *pStore;
LeaderRegisterCollection( OnDemandStore *pStore ) : actors( false ), pStore( pStore ) {}
@ -448,21 +442,13 @@ struct LeaderRegisterCollection {
if( !self->pStore->exists() )
return Void();
OnDemandStore &store = *self->pStore;
state Future<Standalone<RangeResultRef>> forwardingInfoF = store->readRange(fwdKeys);
state Future<Standalone<RangeResultRef>> forwardingTimeF = store->readRange(fwdTimeKeys);
wait(success(forwardingInfoF) && success(forwardingTimeF));
Standalone<RangeResultRef> forwardingInfo = forwardingInfoF.get();
Standalone<RangeResultRef> forwardingTime = forwardingTimeF.get();
Standalone<RangeResultRef> forwardingInfo = wait( store->readRange( fwdKeys ) );
for( int i = 0; i < forwardingInfo.size(); i++ ) {
LeaderInfo forwardInfo;
forwardInfo.forward = true;
forwardInfo.serializedInfo = forwardingInfo[i].value;
self->forward[ forwardingInfo[i].key.removePrefix( fwdKeys.begin ) ] = forwardInfo;
}
for (int i = 0; i < forwardingTime.size(); i++) {
double time = BinaryReader::fromStringRef<double>(forwardingInfo[i].value, IncludeVersion());
self->forwardStartTime[forwardingInfo[i].key.removePrefix(fwdTimeKeys.begin)] = time;
}
return Void();
}
@ -470,31 +456,18 @@ struct LeaderRegisterCollection {
Optional<LeaderInfo> getForward(KeyRef key) {
auto i = forward.find( key );
auto t = forwardStartTime.find(key);
if (i == forward.end())
return Optional<LeaderInfo>();
if (t != forwardStartTime.end()) {
double forwardTime = t->value;
if (now() - forwardTime > SERVER_KNOBS->FORWARD_REQUEST_TOO_OLD) {
TraceEvent(SevWarnAlways, "AccessOldForward")
.detail("ForwardSetSecondsAgo", now() - forwardTime)
.detail("ForwardClusterKey", key);
}
}
return i->value;
}
ACTOR static Future<Void> setForward(LeaderRegisterCollection *self, KeyRef key, ClusterConnectionString conn) {
double forwardTime = now();
LeaderInfo forwardInfo;
forwardInfo.forward = true;
forwardInfo.serializedInfo = conn.toString();
self->forward[ key ] = forwardInfo;
self->forwardStartTime[key] = forwardTime;
OnDemandStore &store = *self->pStore;
store->set( KeyValueRef( key.withPrefix( fwdKeys.begin ), conn.toString() ) );
store->set(
KeyValueRef(key.withPrefix(fwdTimeKeys.begin), BinaryWriter::toValue(forwardTime, IncludeVersion())));
wait(store->commit());
return Void();
}
@ -535,8 +508,7 @@ struct LeaderRegisterCollection {
// leaderServer multiplexes multiple leaderRegisters onto a single LeaderElectionRegInterface,
// creating and destroying them on demand.
ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf, OnDemandStore* pStore, UID id,
Reference<ClusterConnectionFile> ccf) {
ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf, OnDemandStore *pStore, UID id) {
state LeaderRegisterCollection regs( pStore );
state ActorCollection forwarders(false);
@ -551,18 +523,7 @@ ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf, OnDemandStore
info.forward = forward.get().serializedInfo;
req.reply.send( CachedSerialization<ClientDBInfo>(info) );
} else {
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT &&
(ccf->getConnectionString().clusterKey() != req.clusterKey ||
ccf->getConnectionString().coordinators() != req.coordinators)) {
TraceEvent(SevWarnAlways, "CCFMismatch")
.detail("RequestType", "OpenDatabaseCoordRequest")
.detail("LocalCS", ccf->getConnectionString().toString())
.detail("IncomingClusterKey", req.clusterKey)
.detail("IncomingCoordinators", describeList(req.coordinators, req.coordinators.size()));
req.reply.sendError(wrong_connection_file());
} else {
regs.getInterface(req.clusterKey, id).openDatabase.send(req);
}
regs.getInterface(req.clusterKey, id).openDatabase.send( req );
}
}
when ( ElectionResultRequest req = waitNext( interf.electionResult.getFuture() ) ) {
@ -570,91 +531,44 @@ ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf, OnDemandStore
if( forward.present() ) {
req.reply.send( forward.get() );
} else {
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT &&
(ccf->getConnectionString().clusterKey() != req.key ||
ccf->getConnectionString().coordinators() != req.coordinators)) {
TraceEvent(SevWarnAlways, "CCFMismatch")
.detail("RequestType", "ElectionResultRequest")
.detail("LocalCS", ccf->getConnectionString().toString())
.detail("IncomingClusterKey", req.key)
.detail("IncomingCoordinators", describeList(req.coordinators, req.coordinators.size()));
req.reply.sendError(wrong_connection_file());
} else {
regs.getInterface(req.key, id).electionResult.send(req);
}
regs.getInterface(req.key, id).electionResult.send( req );
}
}
when ( GetLeaderRequest req = waitNext( interf.getLeader.getFuture() ) ) {
Optional<LeaderInfo> forward = regs.getForward(req.key);
if( forward.present() )
req.reply.send( forward.get() );
else {
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT && ccf->getConnectionString().clusterKey() != req.key) {
TraceEvent(SevWarnAlways, "CCFMismatch")
.detail("RequestType", "GetLeaderRequest")
.detail("LocalCS", ccf->getConnectionString().toString())
.detail("IncomingClusterKey", req.key);
req.reply.sendError(wrong_connection_file());
} else {
regs.getInterface(req.key, id).getLeader.send(req);
}
}
else
regs.getInterface(req.key, id).getLeader.send( req );
}
when ( CandidacyRequest req = waitNext( interf.candidacy.getFuture() ) ) {
Optional<LeaderInfo> forward = regs.getForward(req.key);
if( forward.present() )
req.reply.send( forward.get() );
else {
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT && ccf->getConnectionString().clusterKey() != req.key) {
TraceEvent(SevWarnAlways, "CCFMismatch")
.detail("RequestType", "CandidacyRequest")
.detail("LocalCS", ccf->getConnectionString().toString())
.detail("IncomingClusterKey", req.key);
req.reply.sendError(wrong_connection_file());
} else {
regs.getInterface(req.key, id).candidacy.send(req);
}
}
else
regs.getInterface(req.key, id).candidacy.send(req);
}
when ( LeaderHeartbeatRequest req = waitNext( interf.leaderHeartbeat.getFuture() ) ) {
Optional<LeaderInfo> forward = regs.getForward(req.key);
if( forward.present() )
req.reply.send(LeaderHeartbeatReply{ false });
else {
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT && ccf->getConnectionString().clusterKey() != req.key) {
TraceEvent(SevWarnAlways, "CCFMismatch")
.detail("RequestType", "LeaderHeartbeatRequest")
.detail("LocalCS", ccf->getConnectionString().toString())
.detail("IncomingClusterKey", req.key);
req.reply.sendError(wrong_connection_file());
} else {
regs.getInterface(req.key, id).leaderHeartbeat.send(req);
}
}
else
regs.getInterface(req.key, id).leaderHeartbeat.send(req);
}
when ( ForwardRequest req = waitNext( interf.forward.getFuture() ) ) {
Optional<LeaderInfo> forward = regs.getForward(req.key);
if( forward.present() )
req.reply.send( Void() );
else {
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT && ccf->getConnectionString().clusterKey() != req.key) {
TraceEvent(SevWarnAlways, "CCFMismatch")
.detail("RequestType", "ForwardRequest")
.detail("LocalCS", ccf->getConnectionString().toString())
.detail("IncomingClusterKey", req.key);
req.reply.sendError(wrong_connection_file());
} else {
forwarders.add(LeaderRegisterCollection::setForward(&regs, req.key,
ClusterConnectionString(req.conn.toString())));
regs.getInterface(req.key, id).forward.send(req);
}
forwarders.add( LeaderRegisterCollection::setForward( &regs, req.key, ClusterConnectionString(req.conn.toString()) ) );
regs.getInterface(req.key, id).forward.send(req);
}
}
when( wait( forwarders.getResult() ) ) { ASSERT(false); throw internal_error(); }
}
}
ACTOR Future<Void> coordinationServer(std::string dataFolder, Reference<ClusterConnectionFile> ccf) {
ACTOR Future<Void> coordinationServer(std::string dataFolder) {
state UID myID = deterministicRandom()->randomUniqueID();
state LeaderElectionRegInterface myLeaderInterface( g_network );
state GenerationRegInterface myInterface( g_network );
@ -663,8 +577,7 @@ ACTOR Future<Void> coordinationServer(std::string dataFolder, Reference<ClusterC
TraceEvent("CoordinationServer", myID).detail("MyInterfaceAddr", myInterface.read.getEndpoint().getPrimaryAddress()).detail("Folder", dataFolder);
try {
wait(localGenerationReg(myInterface, &store) || leaderServer(myLeaderInterface, &store, myID, ccf) ||
store.getError());
wait( localGenerationReg(myInterface, &store) || leaderServer(myLeaderInterface, &store, myID) || store.getError() );
throw internal_error();
} catch (Error& e) {
TraceEvent("CoordinationServerError", myID).error(e, true);

View File

@ -209,6 +209,6 @@ public:
vector<GenerationRegInterface> stateServers;
};
Future<Void> coordinationServer(std::string const& dataFolder, Reference<ClusterConnectionFile> const& ccf);
Future<Void> coordinationServer( std::string const& dataFolder );
#endif

View File

@ -609,8 +609,6 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
// Coordination
init( COORDINATED_STATE_ONCONFLICT_POLL_INTERVAL, 1.0 ); if( randomize && BUGGIFY ) COORDINATED_STATE_ONCONFLICT_POLL_INTERVAL = 10.0;
init( FORWARD_REQUEST_TOO_OLD, 600.0 ); if( randomize && BUGGIFY ) FORWARD_REQUEST_TOO_OLD = 60.0;
init( ENABLE_CROSS_CLUSTER_SUPPORT, true ); if( randomize && BUGGIFY ) ENABLE_CROSS_CLUSTER_SUPPORT = false;
// Buggification
init( BUGGIFIED_EVENTUAL_CONSISTENCY, 1.0 );

View File

@ -538,9 +538,6 @@ public:
// Coordination
double COORDINATED_STATE_ONCONFLICT_POLL_INTERVAL;
double FORWARD_REQUEST_TOO_OLD;
bool ENABLE_CROSS_CLUSTER_SUPPORT; // Allow a coordinator to serve requests whose connection string does not match
// the local copy
// Buggification
double BUGGIFIED_EVENTUAL_CONSISTENCY;

View File

@ -1133,7 +1133,6 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
TEST( !useIPv6 );
vector<NetworkAddress> coordinatorAddresses;
vector<NetworkAddress> extraCoordinatorAddresses; // Used by extra DB if the DR db is a new one
if(minimumRegions > 1) {
//do not put coordinators in the primary region so that we can kill that region safely
int nonPrimaryDcs = dataCenters/2;
@ -1142,9 +1141,6 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
for(int m = 0; m < dcCoordinators; m++) {
auto ip = makeIPAddressForSim(useIPv6, { 2, dc, 1, m });
coordinatorAddresses.push_back(NetworkAddress(ip, sslEnabled && !sslOnly ? 2 : 1, true, sslEnabled && sslOnly));
auto extraIp = makeIPAddressForSim(useIPv6, { 4, dc, 1, m });
extraCoordinatorAddresses.push_back(
NetworkAddress(extraIp, sslEnabled && !sslOnly ? 2 : 1, true, sslEnabled && sslOnly));
TraceEvent("SelectedCoordinator").detail("Address", coordinatorAddresses.back());
}
}
@ -1170,9 +1166,6 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
} else {
auto ip = makeIPAddressForSim(useIPv6, { 2, dc, 1, m });
coordinatorAddresses.push_back(NetworkAddress(ip, sslEnabled && !sslOnly ? 2 : 1, true, sslEnabled && sslOnly));
auto extraIp = makeIPAddressForSim(useIPv6, { 4, dc, 1, m });
extraCoordinatorAddresses.push_back(
NetworkAddress(extraIp, sslEnabled && !sslOnly ? 2 : 1, true, sslEnabled && sslOnly));
TraceEvent("SelectedCoordinator").detail("Address", coordinatorAddresses.back()).detail("M", m).detail("Machines", machines).detail("Assigned", assignedMachines).detail("DcCoord", dcCoordinators).detail("P1", (m+1==dcCoordinators)).detail("P2", (assignedMachines<4)).detail("P3", (assignedMachines+machines-dcCoordinators>=4)).detail("CoordinatorCount", coordinatorCount);
}
assignedMachines++;
@ -1200,13 +1193,10 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
// If extraDB==0, leave g_simulator.extraDB as null because the test does not use DR.
if(extraDB==1) {
// The DR database can be either a new database or itself
g_simulator.extraDB =
BUGGIFY ? new ClusterConnectionString(coordinatorAddresses, LiteralStringRef("TestCluster:0"))
: new ClusterConnectionString(extraCoordinatorAddresses, LiteralStringRef("ExtraCluster:0"));
g_simulator.extraDB = new ClusterConnectionString(coordinatorAddresses, BUGGIFY ? LiteralStringRef("TestCluster:0") : LiteralStringRef("ExtraCluster:0"));
} else if(extraDB==2) {
// The DR database is a new database
g_simulator.extraDB =
new ClusterConnectionString(extraCoordinatorAddresses, LiteralStringRef("ExtraCluster:0"));
g_simulator.extraDB = new ClusterConnectionString(coordinatorAddresses, LiteralStringRef("ExtraCluster:0"));
} else if(extraDB==3) {
// The DR database is the same database
g_simulator.extraDB = new ClusterConnectionString(coordinatorAddresses, LiteralStringRef("TestCluster:0"));

View File

@ -1756,7 +1756,7 @@ ACTOR Future<Void> fdbd(
if (coordFolder.size()) {
// SOMEDAY: remove the fileNotFound wrapper and make DiskQueue construction safe from errors setting up
// their files
actors.push_back(fileNotFoundToNever(coordinationServer(coordFolder, coordinators.ccf)));
actors.push_back(fileNotFoundToNever(coordinationServer(coordFolder)));
}
state UID processIDUid = wait(createAndLockProcessIdFile(dataFolder));

View File

@ -72,7 +72,6 @@ ERROR( connection_idle, 1049, "Connection closed after idle timeout" )
ERROR( disk_adapter_reset, 1050, "The disk queue adpater reset" )
ERROR( batch_transaction_throttled, 1051, "Batch GRV request rate limit exceeded")
ERROR( dd_cancelled, 1052, "Data distribution components cancelled")
ERROR( wrong_connection_file, 1053, "Connection file mismatch")
ERROR( broken_promise, 1100, "Broken promise" )
ERROR( operation_cancelled, 1101, "Asynchronous operation cancelled" )