FastRestore:Add debug to see why restore role is not connected

Reason: restore is a fdbserver who does not register with CC.
The new failure monitor changes how connection works for client and server.
For client, it does not connect to CC to get connected.
For server, it has to connect to CC to get connected.
Restore worker becomes the special role that behaves like a client but is a server.
This commit is contained in:
Meng Xu 2020-02-03 16:47:00 -08:00
parent 9c2046b11b
commit ca3b6135d0
6 changed files with 13 additions and 6 deletions

View File

@ -728,7 +728,9 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration( Reference<ClusterCo
incorrectTime = Optional<double>();
}
TraceEvent("MXTEST_MonitorProxiesOneGenerationStart");
state ErrorOr<CachedSerialization<ClientDBInfo>> rep = wait( clientLeaderServer.openDatabase.tryGetReply( req, TaskPriority::CoordinationReply ) );
TraceEvent("MXTEST_MonitorProxiesOneGenerationEnd").detail("Rep", rep.present());
if (rep.present()) {
if( rep.get().read().forward.present() ) {
TraceEvent("MonitorProxiesForwarding").detail("NewConnStr", rep.get().read().forward.get().toString()).detail("OldConnStr", info.intermediateConnFile->getConnectionString().toString());

View File

@ -777,6 +777,7 @@ Future<Void> DatabaseContext::connectionFileChanged() {
extern IPAddress determinePublicIPAutomatically(ClusterConnectionString const& ccs);
Database Database::createDatabase( Reference<ClusterConnectionFile> connFile, int apiVersion, bool internal, LocalityData const& clientLocality, DatabaseContext *preallocatedDb ) {
TraceEvent("MX1");
if(!g_network)
throw network_not_setup();
@ -811,7 +812,9 @@ Database Database::createDatabase( Reference<ClusterConnectionFile> connFile, in
Reference<AsyncVar<ClientDBInfo>> clientInfo(new AsyncVar<ClientDBInfo>());
Reference<AsyncVar<Reference<ClusterConnectionFile>>> connectionFile(new AsyncVar<Reference<ClusterConnectionFile>>());
connectionFile->set(connFile);
TraceEvent("MX2");
Future<Void> clientInfoMonitor = monitorProxies(connectionFile, clientInfo, networkOptions.supportedVersions, StringRef(networkOptions.traceLogGroup));
TraceEvent("MX3");
DatabaseContext *db;
if(preallocatedDb) {
@ -821,6 +824,7 @@ Database Database::createDatabase( Reference<ClusterConnectionFile> connFile, in
db = new DatabaseContext(connectionFile, clientInfo, clientInfoMonitor, TaskPriority::DefaultEndpoint, clientLocality, true, false, internal, apiVersion, /*switchable*/ true);
}
TraceEvent("MX4");
return Database(db);
}

View File

@ -1092,13 +1092,16 @@ Endpoint FlowTransport::loadedEndpoint( const UID& token ) {
}
void FlowTransport::addPeerReference(const Endpoint& endpoint, bool isStream) {
TraceEvent("AddPeerRef").detail("Endpoint", endpoint.getPrimaryAddress()).detail("IsStream", isStream).detail("EndpointValid", endpoint.getPrimaryAddress().isValid());
if (!isStream || !endpoint.getPrimaryAddress().isValid())
return;
Reference<Peer> peer = self->getOrOpenPeer(endpoint.getPrimaryAddress());
TraceEvent("AddPeerRef").detail("Endpoint", endpoint.getPrimaryAddress()).detail("IsStream", isStream).detail("PeerRef", peer->peerReferences);
if(peer->peerReferences == -1) {
if (FlowTransport::transport().isClient()) {
//
IFailureMonitor::failureMonitor().setStatus(endpoint.getPrimaryAddress(), FailureStatus(false));
}
peer->peerReferences = 1;

View File

@ -328,9 +328,9 @@ ACTOR Future<Void> restoreWorker(Reference<ClusterConnectionFile> connFile, Loca
try {
state vector<Future<Void>> actors;
// Connect to coordinators in order to connect to fdb cluster
ServerCoordinators coordinators(connFile);
if (coordFolder.size())
actors.push_back(fileNotFoundToNever(coordinationServer(coordFolder), "ClusterCoordinatorFailed"));
// ServerCoordinators coordinators(connFile);
// if (coordFolder.size())
// actors.push_back(fileNotFoundToNever(coordinationServer(coordFolder), "ClusterCoordinatorFailed"));
Database cx = Database::createDatabase(connFile, Database::API_VERSION_LATEST, true, locality);
wait(reportErrors(_restoreWorker(cx, locality), "RestoreWorker"));

View File

@ -67,7 +67,5 @@ struct RestoreWorkerData : NonCopyable, public ReferenceCounted<RestoreWorkerDa
}
};
ACTOR Future<Void> fileNotFoundToNever(Future<Void> f, std::string msg)
#include "flow/unactorcompiler.h"
#endif // FDBSERVER_RESTOREWORKER_H

View File

@ -1395,7 +1395,7 @@ ACTOR Future<Void> fileNotFoundToNever(Future<Void> f, std::string msg) {
return Void();
} catch (Error& e) {
if (e.code() == error_code_file_not_found) {
TraceEvent(SevWarn, msg).error(e);
//TraceEvent(SevWarn, "msg").error(e);
return Never();
}
throw;