diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index f67f5d4c5a..8cd7a90a70 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -2022,7 +2022,8 @@ ACTOR Future clusterController( ServerCoordinators coordinators, Reference } } -ACTOR Future clusterController( Reference connFile, Reference>> currentCC, Reference> asyncPriorityInfo ) { +ACTOR Future clusterController( Reference connFile, Reference>> currentCC, Reference> asyncPriorityInfo, Future recoveredDiskFiles ) { + Void _ = wait(recoveredDiskFiles); state bool hasConnected = false; loop { try { diff --git a/fdbserver/WorkerInterface.h b/fdbserver/WorkerInterface.h index 7fd9c243f8..a62eb23d80 100644 --- a/fdbserver/WorkerInterface.h +++ b/fdbserver/WorkerInterface.h @@ -280,8 +280,8 @@ class Database openDBOnServer( Reference> const& db, int Future extractClusterInterface( Reference>> const& a, Reference>> const& b ); Future fdbd( Reference const&, LocalityData const& localities, ProcessClass const& processClass, std::string const& dataFolder, std::string const& coordFolder, int64_t const& memoryLimit, std::string const& metricsConnFile, std::string const& metricsPrefix ); -Future workerServer( Reference const&, Reference>> const& ccInterface, LocalityData const& localities, Reference> const& asyncPriorityInfo, ProcessClass const& initialClass, std::string const& filename, int64_t const& memoryLimit, Future const& forceFailure, std::string const& metricsConnFile, std::string const& metricsPrefix ); -Future clusterController( Reference const&, Reference>> const& currentCC, Reference> const& asyncPriorityInfo ); +Future workerServer( Reference const&, Reference>> const& ccInterface, LocalityData const& localities, Reference> const& asyncPriorityInfo, ProcessClass const& initialClass, std::string const& filename, int64_t const& memoryLimit, Future const& forceFailure, std::string const& metricsConnFile, std::string const& metricsPrefix, Promise const& recoveredDiskFiles ); +Future clusterController( Reference const&, Reference>> const& currentCC, Reference> const& asyncPriorityInfo, Future const& recoveredDiskFiles ); // These servers are started by workerServer Future storageServer( diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index adfa1adbcc..7e5a3e2e94 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -479,7 +479,7 @@ ACTOR Future monitorServerDBInfo( Reference workerServer( Reference connFile, Reference>> ccInterface, LocalityData localities, - Reference> asyncPriorityInfo, ProcessClass initialClass, std::string folder, int64_t memoryLimit, std::string metricsConnFile, std::string metricsPrefix ) { + Reference> asyncPriorityInfo, ProcessClass initialClass, std::string folder, int64_t memoryLimit, std::string metricsConnFile, std::string metricsPrefix, Promise recoveredDiskFiles ) { state PromiseStream< ErrorInfo > errors; state Future handleErrors = workerHandleErrors( errors.getFuture() ); // Needs to be stopped last state ActorCollection errorForwarders(false); @@ -643,6 +643,8 @@ ACTOR Future workerServer( Reference connFile, Refe startRole( interf.id(), interf.id(), "Worker", details ); Void _ = wait(waitForAll(recoveries)); + recoveredDiskFiles.send(Void()); + errorForwarders.add( registrationClient( ccInterface, interf, asyncPriorityInfo, initialClass ) ); TraceEvent("RecoveriesComplete", interf.id()); @@ -979,12 +981,13 @@ ACTOR Future fdbd( Reference> asyncPriorityInfo(new AsyncVar( ClusterControllerPriorityInfo(ProcessClass(processClass.classType(), ProcessClass::CommandLineSource).machineClassFitness(ProcessClass::ClusterController), false, ClusterControllerPriorityInfo::FitnessUnknown))); vector> v; + state Promise recoveredDiskFiles; if ( coordFolder.size() ) v.push_back( fileNotFoundToNever( coordinationServer( coordFolder ) ) ); //SOMEDAY: remove the fileNotFound wrapper and make DiskQueue construction safe from errors setting up their files - v.push_back( reportErrors( processClass == ProcessClass::TesterClass ? monitorLeader( connFile, cc ) : clusterController( connFile, cc , asyncPriorityInfo ), "clusterController") ); + v.push_back( reportErrors( processClass == ProcessClass::TesterClass ? monitorLeader( connFile, cc ) : clusterController( connFile, cc , asyncPriorityInfo, recoveredDiskFiles.getFuture()), "clusterController") ); v.push_back( reportErrors(extractClusterInterface( cc, ci ), "extractClusterInterface") ); v.push_back( reportErrors(failureMonitorClient( ci, true ), "failureMonitorClient") ); - v.push_back( reportErrorsExcept(workerServer(connFile, cc, localities, asyncPriorityInfo, processClass, dataFolder, memoryLimit, metricsConnFile, metricsPrefix), "workerServer", UID(), &normalWorkerErrors()) ); + v.push_back( reportErrorsExcept(workerServer(connFile, cc, localities, asyncPriorityInfo, processClass, dataFolder, memoryLimit, metricsConnFile, metricsPrefix, recoveredDiskFiles), "workerServer", UID(), &normalWorkerErrors()) ); state Future firstConnect = reportErrors( printOnFirstConnected(ci), "ClusterFirstConnectedError" ); Void _ = wait( quorum(v,1) );