fix: do not attempt to become the cluster controller before recovering files from disk
This commit is contained in:
parent
e27531d39e
commit
c3f2e2bb38
|
@ -2022,7 +2022,8 @@ ACTOR Future<Void> clusterController( ServerCoordinators coordinators, Reference
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> clusterController( Reference<ClusterConnectionFile> connFile, Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> currentCC, Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo ) {
|
||||
ACTOR Future<Void> clusterController( Reference<ClusterConnectionFile> connFile, Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> currentCC, Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo, Future<Void> recoveredDiskFiles ) {
|
||||
Void _ = wait(recoveredDiskFiles);
|
||||
state bool hasConnected = false;
|
||||
loop {
|
||||
try {
|
||||
|
|
|
@ -280,8 +280,8 @@ class Database openDBOnServer( Reference<AsyncVar<ServerDBInfo>> const& db, int
|
|||
Future<Void> extractClusterInterface( Reference<AsyncVar<Optional<struct ClusterControllerFullInterface>>> const& a, Reference<AsyncVar<Optional<struct ClusterInterface>>> const& b );
|
||||
|
||||
Future<Void> fdbd( Reference<ClusterConnectionFile> const&, LocalityData const& localities, ProcessClass const& processClass, std::string const& dataFolder, std::string const& coordFolder, int64_t const& memoryLimit, std::string const& metricsConnFile, std::string const& metricsPrefix );
|
||||
Future<Void> workerServer( Reference<ClusterConnectionFile> const&, Reference<AsyncVar<Optional<struct ClusterControllerFullInterface>>> const& ccInterface, LocalityData const& localities, Reference<AsyncVar<ClusterControllerPriorityInfo>> const& asyncPriorityInfo, ProcessClass const& initialClass, std::string const& filename, int64_t const& memoryLimit, Future<Void> const& forceFailure, std::string const& metricsConnFile, std::string const& metricsPrefix );
|
||||
Future<Void> clusterController( Reference<ClusterConnectionFile> const&, Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> const& currentCC, Reference<AsyncVar<ClusterControllerPriorityInfo>> const& asyncPriorityInfo );
|
||||
Future<Void> workerServer( Reference<ClusterConnectionFile> const&, Reference<AsyncVar<Optional<struct ClusterControllerFullInterface>>> const& ccInterface, LocalityData const& localities, Reference<AsyncVar<ClusterControllerPriorityInfo>> const& asyncPriorityInfo, ProcessClass const& initialClass, std::string const& filename, int64_t const& memoryLimit, Future<Void> const& forceFailure, std::string const& metricsConnFile, std::string const& metricsPrefix, Promise<Void> const& recoveredDiskFiles );
|
||||
Future<Void> clusterController( Reference<ClusterConnectionFile> const&, Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> const& currentCC, Reference<AsyncVar<ClusterControllerPriorityInfo>> const& asyncPriorityInfo, Future<Void> const& recoveredDiskFiles );
|
||||
|
||||
// These servers are started by workerServer
|
||||
Future<Void> storageServer(
|
||||
|
|
|
@ -479,7 +479,7 @@ ACTOR Future<Void> monitorServerDBInfo( Reference<AsyncVar<Optional<ClusterContr
|
|||
}
|
||||
|
||||
ACTOR Future<Void> workerServer( Reference<ClusterConnectionFile> connFile, Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> ccInterface, LocalityData localities,
|
||||
Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo, ProcessClass initialClass, std::string folder, int64_t memoryLimit, std::string metricsConnFile, std::string metricsPrefix ) {
|
||||
Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo, ProcessClass initialClass, std::string folder, int64_t memoryLimit, std::string metricsConnFile, std::string metricsPrefix, Promise<Void> recoveredDiskFiles ) {
|
||||
state PromiseStream< ErrorInfo > errors;
|
||||
state Future<Void> handleErrors = workerHandleErrors( errors.getFuture() ); // Needs to be stopped last
|
||||
state ActorCollection errorForwarders(false);
|
||||
|
@ -643,6 +643,8 @@ ACTOR Future<Void> workerServer( Reference<ClusterConnectionFile> connFile, Refe
|
|||
startRole( interf.id(), interf.id(), "Worker", details );
|
||||
|
||||
Void _ = wait(waitForAll(recoveries));
|
||||
recoveredDiskFiles.send(Void());
|
||||
|
||||
errorForwarders.add( registrationClient( ccInterface, interf, asyncPriorityInfo, initialClass ) );
|
||||
|
||||
TraceEvent("RecoveriesComplete", interf.id());
|
||||
|
@ -979,12 +981,13 @@ ACTOR Future<Void> fdbd(
|
|||
Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo(new AsyncVar<ClusterControllerPriorityInfo>(
|
||||
ClusterControllerPriorityInfo(ProcessClass(processClass.classType(), ProcessClass::CommandLineSource).machineClassFitness(ProcessClass::ClusterController), false, ClusterControllerPriorityInfo::FitnessUnknown)));
|
||||
vector<Future<Void>> v;
|
||||
state Promise<Void> recoveredDiskFiles;
|
||||
if ( coordFolder.size() )
|
||||
v.push_back( fileNotFoundToNever( coordinationServer( coordFolder ) ) ); //SOMEDAY: remove the fileNotFound wrapper and make DiskQueue construction safe from errors setting up their files
|
||||
v.push_back( reportErrors( processClass == ProcessClass::TesterClass ? monitorLeader( connFile, cc ) : clusterController( connFile, cc , asyncPriorityInfo ), "clusterController") );
|
||||
v.push_back( reportErrors( processClass == ProcessClass::TesterClass ? monitorLeader( connFile, cc ) : clusterController( connFile, cc , asyncPriorityInfo, recoveredDiskFiles.getFuture()), "clusterController") );
|
||||
v.push_back( reportErrors(extractClusterInterface( cc, ci ), "extractClusterInterface") );
|
||||
v.push_back( reportErrors(failureMonitorClient( ci, true ), "failureMonitorClient") );
|
||||
v.push_back( reportErrorsExcept(workerServer(connFile, cc, localities, asyncPriorityInfo, processClass, dataFolder, memoryLimit, metricsConnFile, metricsPrefix), "workerServer", UID(), &normalWorkerErrors()) );
|
||||
v.push_back( reportErrorsExcept(workerServer(connFile, cc, localities, asyncPriorityInfo, processClass, dataFolder, memoryLimit, metricsConnFile, metricsPrefix, recoveredDiskFiles), "workerServer", UID(), &normalWorkerErrors()) );
|
||||
state Future<Void> firstConnect = reportErrors( printOnFirstConnected(ci), "ClusterFirstConnectedError" );
|
||||
|
||||
Void _ = wait( quorum(v,1) );
|
||||
|
|
Loading…
Reference in New Issue