2017-05-26 04:48:44 +08:00
|
|
|
/*
|
|
|
|
* ClusterController.actor.cpp
|
|
|
|
*
|
|
|
|
* This source file is part of the FoundationDB open source project
|
|
|
|
*
|
2019-02-16 09:29:52 +08:00
|
|
|
* Copyright 2013-2019 Apple Inc. and the FoundationDB project authors
|
2018-02-22 02:25:11 +08:00
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
2018-02-22 02:25:11 +08:00
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
2018-02-22 02:25:11 +08:00
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2019-05-16 07:13:04 +08:00
|
|
|
#include <algorithm>
|
|
|
|
#include <iterator>
|
|
|
|
#include <map>
|
|
|
|
#include <set>
|
|
|
|
#include <vector>
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
#include "fdbrpc/FailureMonitor.h"
|
|
|
|
#include "flow/ActorCollection.h"
|
2019-02-18 07:41:16 +08:00
|
|
|
#include "fdbclient/NativeAPI.actor.h"
|
2019-04-25 06:12:37 +08:00
|
|
|
#include "fdbserver/BackupInterface.h"
|
2017-05-26 04:48:44 +08:00
|
|
|
#include "fdbserver/CoordinationInterface.h"
|
2018-12-14 05:31:37 +08:00
|
|
|
#include "fdbserver/DataDistributorInterface.h"
|
2018-10-20 01:30:13 +08:00
|
|
|
#include "fdbserver/Knobs.h"
|
2019-02-18 10:55:52 +08:00
|
|
|
#include "fdbserver/MoveKeys.actor.h"
|
2019-02-18 11:13:26 +08:00
|
|
|
#include "fdbserver/WorkerInterface.actor.h"
|
2018-10-20 01:30:13 +08:00
|
|
|
#include "fdbserver/LeaderElection.h"
|
2019-01-29 01:25:15 +08:00
|
|
|
#include "fdbserver/LogSystemConfig.h"
|
2018-10-20 01:30:13 +08:00
|
|
|
#include "fdbserver/WaitFailure.h"
|
2019-02-15 08:24:46 +08:00
|
|
|
#include "fdbserver/RatekeeperInterface.h"
|
2018-10-20 01:30:13 +08:00
|
|
|
#include "fdbserver/ServerDBInfo.h"
|
|
|
|
#include "fdbserver/Status.h"
|
2019-01-19 08:18:34 +08:00
|
|
|
#include "fdbserver/LatencyBandConfig.h"
|
2017-05-26 04:48:44 +08:00
|
|
|
#include "fdbclient/DatabaseContext.h"
|
2018-10-20 01:30:13 +08:00
|
|
|
#include "fdbserver/RecoveryState.h"
|
2017-05-26 04:48:44 +08:00
|
|
|
#include "fdbclient/ReadYourWrites.h"
|
|
|
|
#include "fdbrpc/Replication.h"
|
|
|
|
#include "fdbrpc/ReplicationUtils.h"
|
2017-09-26 03:40:24 +08:00
|
|
|
#include "fdbclient/KeyBackedTypes.h"
|
2018-08-02 09:09:54 +08:00
|
|
|
#include "flow/Util.h"
|
2021-03-11 02:06:03 +08:00
|
|
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void failAfter(Future<Void> trigger, Endpoint e);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
struct WorkerInfo : NonCopyable {
|
|
|
|
Future<Void> watcher;
|
2017-11-15 05:57:37 +08:00
|
|
|
ReplyPromise<RegisterWorkerReply> reply;
|
2017-05-26 04:48:44 +08:00
|
|
|
Generation gen;
|
|
|
|
int reboots;
|
|
|
|
ProcessClass initialClass;
|
2018-02-10 08:48:55 +08:00
|
|
|
ClusterControllerPriorityInfo priorityInfo;
|
2019-03-09 00:25:07 +08:00
|
|
|
WorkerDetails details;
|
2019-03-23 09:00:16 +08:00
|
|
|
Future<Void> haltRatekeeper;
|
|
|
|
Future<Void> haltDistributor;
|
2020-04-06 14:09:36 +08:00
|
|
|
Standalone<VectorRef<StringRef>> issues;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
WorkerInfo()
|
|
|
|
: gen(-1), reboots(0),
|
|
|
|
priorityInfo(ProcessClass::UnsetFit, false, ClusterControllerPriorityInfo::FitnessUnknown) {}
|
|
|
|
WorkerInfo(Future<Void> watcher,
|
|
|
|
ReplyPromise<RegisterWorkerReply> reply,
|
|
|
|
Generation gen,
|
|
|
|
WorkerInterface interf,
|
|
|
|
ProcessClass initialClass,
|
|
|
|
ProcessClass processClass,
|
|
|
|
ClusterControllerPriorityInfo priorityInfo,
|
|
|
|
bool degraded,
|
|
|
|
Standalone<VectorRef<StringRef>> issues)
|
|
|
|
: watcher(watcher), reply(reply), gen(gen), reboots(0), initialClass(initialClass), priorityInfo(priorityInfo),
|
|
|
|
details(interf, processClass, degraded), issues(issues) {}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-06-10 08:33:41 +08:00
|
|
|
WorkerInfo(WorkerInfo&& r) noexcept
|
|
|
|
: watcher(std::move(r.watcher)), reply(std::move(r.reply)), gen(r.gen), reboots(r.reboots),
|
|
|
|
initialClass(r.initialClass), priorityInfo(r.priorityInfo), details(std::move(r.details)),
|
|
|
|
haltRatekeeper(r.haltRatekeeper), haltDistributor(r.haltDistributor), issues(r.issues) {}
|
|
|
|
void operator=(WorkerInfo&& r) noexcept {
|
2017-05-26 04:48:44 +08:00
|
|
|
watcher = std::move(r.watcher);
|
|
|
|
reply = std::move(r.reply);
|
|
|
|
gen = r.gen;
|
|
|
|
reboots = r.reboots;
|
|
|
|
initialClass = r.initialClass;
|
2018-02-10 08:48:55 +08:00
|
|
|
priorityInfo = r.priorityInfo;
|
2019-03-09 00:25:07 +08:00
|
|
|
details = std::move(r.details);
|
2019-11-13 05:01:29 +08:00
|
|
|
haltRatekeeper = r.haltRatekeeper;
|
|
|
|
haltDistributor = r.haltDistributor;
|
2020-04-06 14:09:36 +08:00
|
|
|
issues = r.issues;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2018-02-10 08:48:55 +08:00
|
|
|
struct WorkerFitnessInfo {
|
2019-03-09 00:25:07 +08:00
|
|
|
WorkerDetails worker;
|
2018-02-10 08:48:55 +08:00
|
|
|
ProcessClass::Fitness fitness;
|
|
|
|
int used;
|
|
|
|
|
|
|
|
WorkerFitnessInfo() : fitness(ProcessClass::NeverAssign), used(0) {}
|
2021-03-11 02:06:03 +08:00
|
|
|
WorkerFitnessInfo(WorkerDetails worker, ProcessClass::Fitness fitness, int used)
|
|
|
|
: worker(worker), fitness(fitness), used(used) {}
|
2018-02-10 08:48:55 +08:00
|
|
|
};
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
class ClusterControllerData {
|
|
|
|
public:
|
|
|
|
struct DBInfo {
|
|
|
|
Reference<AsyncVar<ClientDBInfo>> clientInfo;
|
2020-04-12 10:30:05 +08:00
|
|
|
Reference<AsyncVar<ServerDBInfo>> serverInfo;
|
2017-05-26 04:48:44 +08:00
|
|
|
std::map<NetworkAddress, double> incompatibleConnections;
|
2019-02-19 06:54:28 +08:00
|
|
|
AsyncTrigger forceMasterFailure;
|
2017-05-26 04:48:44 +08:00
|
|
|
int64_t masterRegistrationCount;
|
2020-04-06 14:09:36 +08:00
|
|
|
int64_t dbInfoCount;
|
2018-06-14 09:14:14 +08:00
|
|
|
bool recoveryStalled;
|
2018-07-01 21:39:04 +08:00
|
|
|
bool forceRecovery;
|
2021-03-11 02:06:03 +08:00
|
|
|
DatabaseConfiguration config; // Asynchronously updated via master registration
|
2017-11-16 09:15:24 +08:00
|
|
|
DatabaseConfiguration fullyRecoveredConfig;
|
2017-05-26 04:48:44 +08:00
|
|
|
Database db;
|
2018-09-01 01:51:55 +08:00
|
|
|
int unfinishedRecoveries;
|
|
|
|
int logGenerations;
|
2019-11-13 05:01:29 +08:00
|
|
|
bool cachePopulated;
|
2019-07-26 08:15:31 +08:00
|
|
|
std::map<NetworkAddress, std::pair<double, OpenDatabaseRequest>> clientStatus;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-11-07 15:50:55 +08:00
|
|
|
DBInfo()
|
|
|
|
: masterRegistrationCount(0), recoveryStalled(false), forceRecovery(false), unfinishedRecoveries(0),
|
|
|
|
logGenerations(0), cachePopulated(false), clientInfo(new AsyncVar<ClientDBInfo>()), dbInfoCount(0),
|
2021-03-11 02:06:03 +08:00
|
|
|
serverInfo(new AsyncVar<ServerDBInfo>()), db(DatabaseContext::create(clientInfo,
|
|
|
|
Future<Void>(),
|
|
|
|
LocalityData(),
|
|
|
|
true,
|
|
|
|
TaskPriority::DefaultEndpoint,
|
|
|
|
true)) // SOMEDAY: Locality!
|
2020-11-07 15:50:55 +08:00
|
|
|
{}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-02-15 08:24:46 +08:00
|
|
|
void setDistributor(const DataDistributorInterface& interf) {
|
2020-04-12 10:30:05 +08:00
|
|
|
auto newInfo = serverInfo->get();
|
2019-05-11 05:01:52 +08:00
|
|
|
newInfo.id = deterministicRandom()->randomUniqueID();
|
2020-04-06 14:09:36 +08:00
|
|
|
newInfo.infoGeneration = ++dbInfoCount;
|
2019-02-15 08:24:46 +08:00
|
|
|
newInfo.distributor = interf;
|
2021-03-11 02:06:03 +08:00
|
|
|
serverInfo->set(newInfo);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2019-02-15 08:24:46 +08:00
|
|
|
void setRatekeeper(const RatekeeperInterface& interf) {
|
2020-04-12 10:30:05 +08:00
|
|
|
auto newInfo = serverInfo->get();
|
2019-05-11 05:01:52 +08:00
|
|
|
newInfo.id = deterministicRandom()->randomUniqueID();
|
2020-04-06 14:09:36 +08:00
|
|
|
newInfo.infoGeneration = ++dbInfoCount;
|
2019-02-15 08:24:46 +08:00
|
|
|
newInfo.ratekeeper = interf;
|
2021-03-11 02:06:03 +08:00
|
|
|
serverInfo->set(newInfo);
|
2019-02-15 08:24:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void clearInterf(ProcessClass::ClassType t) {
|
2020-04-12 10:30:05 +08:00
|
|
|
auto newInfo = serverInfo->get();
|
2019-05-11 05:01:52 +08:00
|
|
|
newInfo.id = deterministicRandom()->randomUniqueID();
|
2020-04-06 14:09:36 +08:00
|
|
|
newInfo.infoGeneration = ++dbInfoCount;
|
2019-02-15 08:24:46 +08:00
|
|
|
if (t == ProcessClass::DataDistributorClass) {
|
|
|
|
newInfo.distributor = Optional<DataDistributorInterface>();
|
2019-03-27 23:24:25 +08:00
|
|
|
} else if (t == ProcessClass::RatekeeperClass) {
|
2019-02-15 08:24:46 +08:00
|
|
|
newInfo.ratekeeper = Optional<RatekeeperInterface>();
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
serverInfo->set(newInfo);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
struct UpdateWorkerList {
|
2021-03-11 02:06:03 +08:00
|
|
|
Future<Void> init(Database const& db) { return update(this, db); }
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void set(Optional<Standalone<StringRef>> processID, Optional<ProcessData> data) {
|
2017-05-26 04:48:44 +08:00
|
|
|
delta[processID] = data;
|
|
|
|
anyDelta.set(true);
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
std::map<Optional<Standalone<StringRef>>, Optional<ProcessData>> delta;
|
|
|
|
AsyncVar<bool> anyDelta;
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR static Future<Void> update(UpdateWorkerList* self, Database db) {
|
2017-05-26 04:48:44 +08:00
|
|
|
// The Database we are using is based on worker registrations to this cluster controller, which come only
|
2021-03-11 02:06:03 +08:00
|
|
|
// from master servers that we started, so it shouldn't be possible for multiple cluster controllers to
|
|
|
|
// fight.
|
2017-05-26 04:48:44 +08:00
|
|
|
state Transaction tr(db);
|
|
|
|
loop {
|
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
tr.clear(workerListKeys);
|
|
|
|
wait(tr.commit());
|
2017-05-26 04:48:44 +08:00
|
|
|
break;
|
|
|
|
} catch (Error& e) {
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(tr.onError(e));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
loop {
|
|
|
|
// Wait for some changes
|
|
|
|
while (!self->anyDelta.get())
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(self->anyDelta.onChange());
|
2017-05-26 04:48:44 +08:00
|
|
|
self->anyDelta.set(false);
|
|
|
|
|
|
|
|
state std::map<Optional<Standalone<StringRef>>, Optional<ProcessData>> delta;
|
2021-03-11 02:06:03 +08:00
|
|
|
delta.swap(self->delta);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
TraceEvent("UpdateWorkerList").detail("DeltaCount", delta.size());
|
|
|
|
|
|
|
|
// Do a transaction to write the changes
|
|
|
|
loop {
|
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto w = delta.begin(); w != delta.end(); ++w) {
|
2017-05-26 04:48:44 +08:00
|
|
|
if (w->second.present()) {
|
2021-03-11 02:06:03 +08:00
|
|
|
tr.set(workerListKeyFor(w->first.get()), workerListValue(w->second.get()));
|
2017-05-26 04:48:44 +08:00
|
|
|
} else
|
2021-03-11 02:06:03 +08:00
|
|
|
tr.clear(workerListKeyFor(w->first.get()));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(tr.commit());
|
2017-05-26 04:48:44 +08:00
|
|
|
break;
|
|
|
|
} catch (Error& e) {
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(tr.onError(e));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
bool workerAvailable(WorkerInfo const& worker, bool checkStable) {
|
|
|
|
return (now() - startTime < 2 * FLOW_KNOBS->SERVER_REQUEST_INTERVAL) ||
|
|
|
|
(IFailureMonitor::failureMonitor().getState(worker.details.interf.storage.getEndpoint()).isAvailable() &&
|
|
|
|
(!checkStable || worker.reboots < 2));
|
|
|
|
}
|
|
|
|
|
|
|
|
bool isLongLivedStateless(Optional<Key> const& processId) {
|
|
|
|
return (db.serverInfo->get().distributor.present() &&
|
|
|
|
db.serverInfo->get().distributor.get().locality.processId() == processId) ||
|
|
|
|
(db.serverInfo->get().ratekeeper.present() &&
|
|
|
|
db.serverInfo->get().ratekeeper.get().locality.processId() == processId);
|
|
|
|
}
|
|
|
|
|
|
|
|
WorkerDetails getStorageWorker(RecruitStorageRequest const& req) {
|
|
|
|
std::set<Optional<Standalone<StringRef>>> excludedMachines(req.excludeMachines.begin(),
|
|
|
|
req.excludeMachines.end());
|
|
|
|
std::set<Optional<Standalone<StringRef>>> includeDCs(req.includeDCs.begin(), req.includeDCs.end());
|
|
|
|
std::set<AddressExclusion> excludedAddresses(req.excludeAddresses.begin(), req.excludeAddresses.end());
|
|
|
|
|
|
|
|
for (auto& it : id_worker)
|
|
|
|
if (workerAvailable(it.second, false) &&
|
|
|
|
!excludedMachines.count(it.second.details.interf.locality.zoneId()) &&
|
|
|
|
(includeDCs.size() == 0 || includeDCs.count(it.second.details.interf.locality.dcId())) &&
|
|
|
|
!addressExcluded(excludedAddresses, it.second.details.interf.address()) &&
|
|
|
|
(!it.second.details.interf.secondaryAddress().present() ||
|
|
|
|
!addressExcluded(excludedAddresses, it.second.details.interf.secondaryAddress().get())) &&
|
|
|
|
it.second.details.processClass.machineClassFitness(ProcessClass::Storage) <= ProcessClass::UnsetFit) {
|
2019-03-09 00:25:07 +08:00
|
|
|
return it.second.details;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (req.criticalRecruitment) {
|
2017-05-26 04:48:44 +08:00
|
|
|
ProcessClass::Fitness bestFit = ProcessClass::NeverAssign;
|
2019-03-09 00:25:07 +08:00
|
|
|
Optional<WorkerDetails> bestInfo;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : id_worker) {
|
|
|
|
ProcessClass::Fitness fit = it.second.details.processClass.machineClassFitness(ProcessClass::Storage);
|
|
|
|
if (workerAvailable(it.second, false) &&
|
|
|
|
!excludedMachines.count(it.second.details.interf.locality.zoneId()) &&
|
|
|
|
(includeDCs.size() == 0 || includeDCs.count(it.second.details.interf.locality.dcId())) &&
|
|
|
|
!addressExcluded(excludedAddresses, it.second.details.interf.address()) && fit < bestFit) {
|
2017-05-26 04:48:44 +08:00
|
|
|
bestFit = fit;
|
2019-03-09 00:25:07 +08:00
|
|
|
bestInfo = it.second.details;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (bestInfo.present()) {
|
2017-05-26 04:48:44 +08:00
|
|
|
return bestInfo.get();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
throw no_more_servers();
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
std::vector<WorkerDetails> getWorkersForSeedServers(
|
|
|
|
DatabaseConfiguration const& conf,
|
|
|
|
Reference<IReplicationPolicy> const& policy,
|
|
|
|
Optional<Optional<Standalone<StringRef>>> const& dcId = Optional<Optional<Standalone<StringRef>>>()) {
|
2019-03-09 00:25:07 +08:00
|
|
|
std::map<ProcessClass::Fitness, vector<WorkerDetails>> fitness_workers;
|
|
|
|
std::vector<WorkerDetails> results;
|
2019-03-14 04:14:39 +08:00
|
|
|
Reference<LocalitySet> logServerSet = Reference<LocalitySet>(new LocalityMap<WorkerDetails>());
|
2021-03-11 02:06:03 +08:00
|
|
|
LocalityMap<WorkerDetails>* logServerMap = (LocalityMap<WorkerDetails>*)logServerSet.getPtr();
|
2017-09-15 08:06:00 +08:00
|
|
|
bool bCompleted = false;
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : id_worker) {
|
|
|
|
auto fitness = it.second.details.processClass.machineClassFitness(ProcessClass::Storage);
|
|
|
|
if (workerAvailable(it.second, false) && !conf.isExcludedServer(it.second.details.interf.addresses()) &&
|
|
|
|
fitness != ProcessClass::NeverAssign &&
|
|
|
|
(!dcId.present() || it.second.details.interf.locality.dcId() == dcId.get())) {
|
|
|
|
fitness_workers[fitness].push_back(it.second.details);
|
2017-09-15 08:06:00 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : fitness_workers) {
|
|
|
|
for (auto& worker : it.second) {
|
2019-03-09 00:25:07 +08:00
|
|
|
logServerMap->add(worker.interf.locality, &worker);
|
2017-09-15 08:06:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<LocalityEntry> bestSet;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (logServerSet->selectReplicas(policy, bestSet)) {
|
2017-09-15 08:06:00 +08:00
|
|
|
results.reserve(bestSet.size());
|
|
|
|
for (auto& entry : bestSet) {
|
|
|
|
auto object = logServerMap->getObject(entry);
|
|
|
|
results.push_back(*object);
|
|
|
|
}
|
|
|
|
bCompleted = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
logServerSet->clear();
|
|
|
|
logServerSet.clear();
|
|
|
|
|
|
|
|
if (!bCompleted) {
|
|
|
|
throw no_more_servers();
|
|
|
|
}
|
|
|
|
|
|
|
|
return results;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
std::vector<WorkerDetails> getWorkersForTlogs(DatabaseConfiguration const& conf,
|
|
|
|
int32_t required,
|
|
|
|
int32_t desired,
|
2020-08-07 16:02:07 +08:00
|
|
|
Reference<IReplicationPolicy> const& policy,
|
|
|
|
std::map<Optional<Standalone<StringRef>>, int>& id_used,
|
|
|
|
bool checkStable = false,
|
|
|
|
std::set<Optional<Key>> dcIds = std::set<Optional<Key>>(),
|
|
|
|
std::vector<UID> exclusionWorkerIds = {}) {
|
2021-03-11 02:06:03 +08:00
|
|
|
std::map<std::pair<ProcessClass::Fitness, bool>, vector<WorkerDetails>> fitness_workers;
|
2019-03-09 00:25:07 +08:00
|
|
|
std::vector<WorkerDetails> results;
|
2017-09-12 08:40:46 +08:00
|
|
|
std::vector<LocalityData> unavailableLocals;
|
2019-03-14 04:14:39 +08:00
|
|
|
Reference<LocalitySet> logServerSet;
|
2019-03-09 00:25:07 +08:00
|
|
|
LocalityMap<WorkerDetails>* logServerMap;
|
2017-09-12 08:40:46 +08:00
|
|
|
bool bCompleted = false;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-03-09 00:25:07 +08:00
|
|
|
logServerSet = Reference<LocalitySet>(new LocalityMap<WorkerDetails>());
|
2021-03-11 02:06:03 +08:00
|
|
|
logServerMap = (LocalityMap<WorkerDetails>*)logServerSet.getPtr();
|
|
|
|
for (auto& it : id_worker) {
|
|
|
|
if (std::find(exclusionWorkerIds.begin(), exclusionWorkerIds.end(), it.second.details.interf.id()) ==
|
|
|
|
exclusionWorkerIds.end()) {
|
2019-03-19 03:17:59 +08:00
|
|
|
auto fitness = it.second.details.processClass.machineClassFitness(ProcessClass::TLog);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (workerAvailable(it.second, checkStable) &&
|
|
|
|
!conf.isExcludedServer(it.second.details.interf.addresses()) &&
|
|
|
|
fitness != ProcessClass::NeverAssign &&
|
|
|
|
(!dcIds.size() || dcIds.count(it.second.details.interf.locality.dcId()))) {
|
2019-03-19 03:17:59 +08:00
|
|
|
fitness_workers[std::make_pair(fitness, it.second.details.degraded)].push_back(it.second.details);
|
2021-03-11 02:06:03 +08:00
|
|
|
} else {
|
2019-03-19 03:17:59 +08:00
|
|
|
unavailableLocals.push_back(it.second.details.interf.locality);
|
|
|
|
}
|
2017-05-27 05:20:11 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
results.reserve(results.size() + id_worker.size());
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int fitness = ProcessClass::BestFit; fitness != ProcessClass::NeverAssign && !bCompleted; fitness++) {
|
|
|
|
auto fitnessEnum = (ProcessClass::Fitness)fitness;
|
|
|
|
for (int addingDegraded = 0; addingDegraded < 2; addingDegraded++) {
|
|
|
|
auto workerItr = fitness_workers.find(std::make_pair(fitnessEnum, (bool)addingDegraded));
|
2019-03-13 05:38:54 +08:00
|
|
|
if (workerItr != fitness_workers.end()) {
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& worker : workerItr->second) {
|
2019-03-13 05:38:54 +08:00
|
|
|
logServerMap->add(worker.interf.locality, &worker);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
2020-01-08 11:53:09 +08:00
|
|
|
|
2019-03-13 05:38:54 +08:00
|
|
|
if (logServerSet->size() < (addingDegraded == 0 ? desired : required)) {
|
2021-03-11 02:06:03 +08:00
|
|
|
} else if (logServerSet->size() == required || logServerSet->size() <= desired) {
|
2019-03-09 03:40:00 +08:00
|
|
|
if (logServerSet->validate(policy)) {
|
|
|
|
for (auto& object : logServerMap->getObjects()) {
|
|
|
|
results.push_back(*object);
|
|
|
|
}
|
|
|
|
bCompleted = true;
|
|
|
|
break;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2020-07-29 12:07:23 +08:00
|
|
|
TraceEvent(SevWarn, "GWFTADNotAcceptable", id)
|
|
|
|
.detail("Fitness", fitness)
|
|
|
|
.detail("Processes", logServerSet->size())
|
|
|
|
.detail("Required", required)
|
|
|
|
.detail("TLogPolicy", policy->info())
|
|
|
|
.detail("DesiredLogs", desired)
|
|
|
|
.detail("AddingDegraded", addingDegraded);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2019-03-09 03:40:00 +08:00
|
|
|
// Try to select the desired size, if larger
|
|
|
|
else {
|
|
|
|
std::vector<LocalityEntry> bestSet;
|
|
|
|
std::vector<LocalityData> tLocalities;
|
|
|
|
|
|
|
|
// Try to find the best team of servers to fulfill the policy
|
2021-03-11 02:06:03 +08:00
|
|
|
if (findBestPolicySet(bestSet,
|
|
|
|
logServerSet,
|
|
|
|
policy,
|
|
|
|
desired,
|
|
|
|
SERVER_KNOBS->POLICY_RATING_TESTS,
|
2020-02-21 01:34:01 +08:00
|
|
|
SERVER_KNOBS->POLICY_GENERATIONS)) {
|
2019-03-09 03:40:00 +08:00
|
|
|
results.reserve(results.size() + bestSet.size());
|
|
|
|
for (auto& entry : bestSet) {
|
|
|
|
auto object = logServerMap->getObject(entry);
|
|
|
|
ASSERT(object);
|
|
|
|
results.push_back(*object);
|
|
|
|
tLocalities.push_back(object->interf.locality);
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent("GWFTADBestResults", id)
|
|
|
|
.detail("Fitness", fitness)
|
|
|
|
.detail("Processes", logServerSet->size())
|
|
|
|
.detail("BestCount", bestSet.size())
|
|
|
|
.detail("BestZones", ::describeZones(tLocalities))
|
|
|
|
.detail("BestDataHalls", ::describeDataHalls(tLocalities))
|
|
|
|
.detail("TLogPolicy", policy->info())
|
|
|
|
.detail("TotalResults", results.size())
|
|
|
|
.detail("DesiredLogs", desired)
|
|
|
|
.detail("AddingDegraded", addingDegraded);
|
2019-03-09 03:40:00 +08:00
|
|
|
bCompleted = true;
|
|
|
|
break;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent(SevWarn, "GWFTADNoBest", id)
|
|
|
|
.detail("Fitness", fitness)
|
|
|
|
.detail("Processes", logServerSet->size())
|
|
|
|
.detail("Required", required)
|
|
|
|
.detail("TLogPolicy", policy->info())
|
|
|
|
.detail("DesiredLogs", desired)
|
|
|
|
.detail("AddingDegraded", addingDegraded);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If policy cannot be satisfied
|
2018-04-09 12:24:05 +08:00
|
|
|
if (!bCompleted) {
|
|
|
|
std::vector<LocalityData> tLocalities;
|
|
|
|
for (auto& object : logServerMap->getObjects()) {
|
2019-03-09 00:25:07 +08:00
|
|
|
tLocalities.push_back(object->interf.locality);
|
2018-04-09 12:24:05 +08:00
|
|
|
}
|
|
|
|
|
2020-07-29 12:07:23 +08:00
|
|
|
TraceEvent(SevWarn, "GetTLogTeamFailed")
|
|
|
|
.detail("Policy", policy->info())
|
|
|
|
.detail("Processes", logServerSet->size())
|
|
|
|
.detail("Workers", id_worker.size())
|
|
|
|
.detail("FitnessGroups", fitness_workers.size())
|
|
|
|
.detail("TLogZones", ::describeZones(tLocalities))
|
|
|
|
.detail("TLogDataHalls", ::describeDataHalls(tLocalities))
|
|
|
|
.detail("MissingZones", ::describeZones(unavailableLocals))
|
|
|
|
.detail("MissingDataHalls", ::describeDataHalls(unavailableLocals))
|
|
|
|
.detail("Required", required)
|
|
|
|
.detail("DesiredLogs", desired)
|
|
|
|
.detail("RatingTests", SERVER_KNOBS->POLICY_RATING_TESTS)
|
|
|
|
.detail("CheckStable", checkStable)
|
|
|
|
.detail("NumExclusionWorkers", exclusionWorkerIds.size())
|
|
|
|
.detail("PolicyGenerations", SERVER_KNOBS->POLICY_GENERATIONS)
|
|
|
|
.backtrace();
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
logServerSet->clear();
|
|
|
|
logServerSet.clear();
|
|
|
|
throw no_more_servers();
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto& result : results) {
|
2019-03-09 00:25:07 +08:00
|
|
|
id_used[result.interf.locality.processId()]++;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent("GetTLogTeamDone")
|
|
|
|
.detail("Completed", bCompleted)
|
|
|
|
.detail("Policy", policy->info())
|
|
|
|
.detail("Results", results.size())
|
|
|
|
.detail("Processes", logServerSet->size())
|
|
|
|
.detail("Workers", id_worker.size())
|
|
|
|
.detail("Required", required)
|
|
|
|
.detail("Desired", desired)
|
|
|
|
.detail("RatingTests", SERVER_KNOBS->POLICY_RATING_TESTS)
|
|
|
|
.detail("PolicyGenerations", SERVER_KNOBS->POLICY_GENERATIONS);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
return results;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// FIXME: This logic will fallback unnecessarily when usable dcs > 1 because it does not check all combinations of
|
|
|
|
// potential satellite locations
|
|
|
|
std::vector<WorkerDetails> getWorkersForSatelliteLogs(const DatabaseConfiguration& conf,
|
|
|
|
const RegionInfo& region,
|
|
|
|
const RegionInfo& remoteRegion,
|
|
|
|
std::map<Optional<Standalone<StringRef>>, int>& id_used,
|
|
|
|
bool& satelliteFallback,
|
|
|
|
bool checkStable = false) {
|
2018-06-29 14:15:32 +08:00
|
|
|
int startDC = 0;
|
|
|
|
loop {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (startDC > 0 && startDC >= region.satellites.size() + 1 -
|
|
|
|
(satelliteFallback ? region.satelliteTLogUsableDcsFallback
|
|
|
|
: region.satelliteTLogUsableDcs)) {
|
|
|
|
if (satelliteFallback || region.satelliteTLogUsableDcsFallback == 0) {
|
2018-06-29 14:15:32 +08:00
|
|
|
throw no_more_servers();
|
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!goodRecruitmentTime.isReady()) {
|
2018-06-29 14:15:32 +08:00
|
|
|
throw operation_failed();
|
|
|
|
}
|
|
|
|
satelliteFallback = true;
|
|
|
|
startDC = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
try {
|
2019-03-19 03:17:59 +08:00
|
|
|
bool remoteDCUsedAsSatellite = false;
|
2018-06-29 14:15:32 +08:00
|
|
|
std::set<Optional<Key>> satelliteDCs;
|
2019-10-15 09:30:15 +08:00
|
|
|
int32_t desiredSatelliteTLogs = 0;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int s = startDC;
|
|
|
|
s < std::min<int>(startDC + (satelliteFallback ? region.satelliteTLogUsableDcsFallback
|
|
|
|
: region.satelliteTLogUsableDcs),
|
|
|
|
region.satellites.size());
|
|
|
|
s++) {
|
2018-06-29 14:15:32 +08:00
|
|
|
satelliteDCs.insert(region.satellites[s].dcId);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (region.satellites[s].satelliteDesiredTLogCount == -1 || desiredSatelliteTLogs == -1) {
|
2019-10-15 09:30:15 +08:00
|
|
|
desiredSatelliteTLogs = -1;
|
|
|
|
} else {
|
|
|
|
desiredSatelliteTLogs += region.satellites[s].satelliteDesiredTLogCount;
|
|
|
|
}
|
2019-03-19 03:17:59 +08:00
|
|
|
if (region.satellites[s].dcId == remoteRegion.dcId) {
|
|
|
|
remoteDCUsedAsSatellite = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
std::vector<UID> exclusionWorkerIds;
|
2021-03-11 02:06:03 +08:00
|
|
|
// FIXME: If remote DC is used as satellite then this logic only ensures that required number of remote
|
|
|
|
// TLogs can be recruited. It does not balance the number of desired TLogs across the satellite and
|
|
|
|
// remote sides.
|
2019-03-19 03:17:59 +08:00
|
|
|
if (remoteDCUsedAsSatellite) {
|
2021-03-11 02:06:03 +08:00
|
|
|
std::map<Optional<Standalone<StringRef>>, int> tmpIdUsed;
|
|
|
|
auto remoteLogs = getWorkersForTlogs(conf,
|
|
|
|
conf.getRemoteTLogReplicationFactor(),
|
|
|
|
conf.getRemoteTLogReplicationFactor(),
|
|
|
|
conf.getRemoteTLogPolicy(),
|
|
|
|
tmpIdUsed,
|
|
|
|
false,
|
|
|
|
{ remoteRegion.dcId },
|
|
|
|
{});
|
|
|
|
std::transform(remoteLogs.begin(),
|
|
|
|
remoteLogs.end(),
|
|
|
|
std::back_inserter(exclusionWorkerIds),
|
|
|
|
[](const WorkerDetails& in) { return in.interf.id(); });
|
|
|
|
}
|
|
|
|
if (satelliteFallback) {
|
|
|
|
return getWorkersForTlogs(conf,
|
|
|
|
region.satelliteTLogReplicationFactorFallback,
|
|
|
|
desiredSatelliteTLogs > 0 ? desiredSatelliteTLogs
|
|
|
|
: conf.getDesiredSatelliteLogs(region.dcId) *
|
|
|
|
region.satelliteTLogUsableDcsFallback /
|
|
|
|
region.satelliteTLogUsableDcs,
|
|
|
|
region.satelliteTLogPolicyFallback,
|
|
|
|
id_used,
|
|
|
|
checkStable,
|
|
|
|
satelliteDCs,
|
|
|
|
exclusionWorkerIds);
|
2018-06-29 14:15:32 +08:00
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
return getWorkersForTlogs(conf,
|
|
|
|
region.satelliteTLogReplicationFactor,
|
|
|
|
desiredSatelliteTLogs > 0 ? desiredSatelliteTLogs
|
|
|
|
: conf.getDesiredSatelliteLogs(region.dcId),
|
|
|
|
region.satelliteTLogPolicy,
|
|
|
|
id_used,
|
|
|
|
checkStable,
|
|
|
|
satelliteDCs,
|
|
|
|
exclusionWorkerIds);
|
|
|
|
}
|
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() != error_code_no_more_servers) {
|
2018-06-29 14:15:32 +08:00
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
startDC++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-23 09:00:16 +08:00
|
|
|
ProcessClass::Fitness getBestFitnessForRoleInDatacenter(ProcessClass::ClusterRole role) {
|
|
|
|
ProcessClass::Fitness bestFitness = ProcessClass::NeverAssign;
|
|
|
|
for (const auto& it : id_worker) {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (it.second.priorityInfo.isExcluded ||
|
|
|
|
it.second.details.interf.locality.dcId() != clusterControllerDcId) {
|
2019-03-23 09:00:16 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
bestFitness = std::min(bestFitness, it.second.details.processClass.machineClassFitness(role));
|
|
|
|
}
|
|
|
|
return bestFitness;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
WorkerFitnessInfo getWorkerForRoleInDatacenter(Optional<Standalone<StringRef>> const& dcId,
|
|
|
|
ProcessClass::ClusterRole role,
|
|
|
|
ProcessClass::Fitness unacceptableFitness,
|
|
|
|
DatabaseConfiguration const& conf,
|
|
|
|
std::map<Optional<Standalone<StringRef>>, int>& id_used,
|
|
|
|
bool checkStable = false) {
|
|
|
|
std::map<std::pair<ProcessClass::Fitness, int>, std::pair<vector<WorkerDetails>, vector<WorkerDetails>>>
|
|
|
|
fitness_workers;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : id_worker) {
|
|
|
|
auto fitness = it.second.details.processClass.machineClassFitness(role);
|
|
|
|
if (conf.isExcludedServer(it.second.details.interf.addresses())) {
|
2018-02-10 08:48:55 +08:00
|
|
|
fitness = std::max(fitness, ProcessClass::ExcludeFit);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (workerAvailable(it.second, checkStable) && fitness < unacceptableFitness &&
|
|
|
|
it.second.details.interf.locality.dcId() == dcId) {
|
2019-11-13 05:01:29 +08:00
|
|
|
if (isLongLivedStateless(it.first)) {
|
2021-03-11 02:06:03 +08:00
|
|
|
fitness_workers[std::make_pair(fitness, id_used[it.first])].second.push_back(it.second.details);
|
2019-03-24 04:25:36 +08:00
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
fitness_workers[std::make_pair(fitness, id_used[it.first])].first.push_back(it.second.details);
|
2019-03-24 04:25:36 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : fitness_workers) {
|
|
|
|
for (int j = 0; j < 2; j++) {
|
|
|
|
auto& w = j == 0 ? it.second.first : it.second.second;
|
2019-05-11 05:01:52 +08:00
|
|
|
deterministicRandom()->randomShuffle(w);
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < w.size(); i++) {
|
2019-03-24 04:25:36 +08:00
|
|
|
id_used[w[i].interf.locality.processId()]++;
|
2019-10-15 09:32:17 +08:00
|
|
|
return WorkerFitnessInfo(w[i], std::max(ProcessClass::GoodFit, it.first.first), it.first.second);
|
2019-03-24 04:25:36 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
throw no_more_servers();
|
|
|
|
}
|
|
|
|
|
2020-08-06 15:01:57 +08:00
|
|
|
vector<WorkerDetails> getWorkersForRoleInDatacenter(
|
2021-03-11 02:06:03 +08:00
|
|
|
Optional<Standalone<StringRef>> const& dcId,
|
|
|
|
ProcessClass::ClusterRole role,
|
|
|
|
int amount,
|
|
|
|
DatabaseConfiguration const& conf,
|
|
|
|
std::map<Optional<Standalone<StringRef>>, int>& id_used,
|
|
|
|
Optional<WorkerFitnessInfo> minWorker = Optional<WorkerFitnessInfo>(),
|
|
|
|
bool checkStable = false) {
|
2020-08-06 15:01:57 +08:00
|
|
|
std::map<std::pair<ProcessClass::Fitness, int>, std::pair<vector<WorkerDetails>, vector<WorkerDetails>>>
|
|
|
|
fitness_workers;
|
2019-03-09 00:25:07 +08:00
|
|
|
vector<WorkerDetails> results;
|
2020-08-06 15:01:57 +08:00
|
|
|
if (minWorker.present()) {
|
|
|
|
results.push_back(minWorker.get().worker);
|
2020-03-17 02:50:17 +08:00
|
|
|
}
|
|
|
|
if (amount <= results.size()) {
|
2017-05-26 04:48:44 +08:00
|
|
|
return results;
|
2020-03-17 02:50:17 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-08-06 15:01:57 +08:00
|
|
|
for (auto& it : id_worker) {
|
2021-03-11 02:06:03 +08:00
|
|
|
auto fitness = it.second.details.processClass.machineClassFitness(role);
|
2020-08-06 15:01:57 +08:00
|
|
|
if (workerAvailable(it.second, checkStable) &&
|
|
|
|
!conf.isExcludedServer(it.second.details.interf.addresses()) &&
|
|
|
|
it.second.details.interf.locality.dcId() == dcId &&
|
|
|
|
(!minWorker.present() ||
|
|
|
|
(it.second.details.interf.id() != minWorker.get().worker.interf.id() &&
|
|
|
|
(fitness < minWorker.get().fitness ||
|
|
|
|
(fitness == minWorker.get().fitness && id_used[it.first] <= minWorker.get().used))))) {
|
2019-11-13 05:01:29 +08:00
|
|
|
if (isLongLivedStateless(it.first)) {
|
2021-03-11 02:06:03 +08:00
|
|
|
fitness_workers[std::make_pair(fitness, id_used[it.first])].second.push_back(it.second.details);
|
2019-03-24 04:25:36 +08:00
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
fitness_workers[std::make_pair(fitness, id_used[it.first])].first.push_back(it.second.details);
|
2019-03-24 04:25:36 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : fitness_workers) {
|
|
|
|
for (int j = 0; j < 2; j++) {
|
|
|
|
auto& w = j == 0 ? it.second.first : it.second.second;
|
2019-05-11 05:01:52 +08:00
|
|
|
deterministicRandom()->randomShuffle(w);
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < w.size(); i++) {
|
2019-03-24 04:25:36 +08:00
|
|
|
results.push_back(w[i]);
|
|
|
|
id_used[w[i].interf.locality.processId()]++;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (results.size() == amount)
|
|
|
|
return results;
|
2019-03-24 04:25:36 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return results;
|
|
|
|
}
|
|
|
|
|
2018-02-10 08:48:55 +08:00
|
|
|
struct RoleFitness {
|
|
|
|
ProcessClass::Fitness bestFit;
|
|
|
|
ProcessClass::Fitness worstFit;
|
2019-01-11 02:28:32 +08:00
|
|
|
ProcessClass::ClusterRole role;
|
2018-02-10 08:48:55 +08:00
|
|
|
int count;
|
2019-03-09 03:40:00 +08:00
|
|
|
bool worstIsDegraded;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
RoleFitness(int bestFit, int worstFit, int count, ProcessClass::ClusterRole role)
|
|
|
|
: bestFit((ProcessClass::Fitness)bestFit), worstFit((ProcessClass::Fitness)worstFit), count(count),
|
|
|
|
role(role), worstIsDegraded(false) {}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
RoleFitness(int fitness, int count, ProcessClass::ClusterRole role)
|
|
|
|
: bestFit((ProcessClass::Fitness)fitness), worstFit((ProcessClass::Fitness)fitness), count(count), role(role),
|
|
|
|
worstIsDegraded(false) {}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
RoleFitness()
|
|
|
|
: bestFit(ProcessClass::NeverAssign), worstFit(ProcessClass::NeverAssign), role(ProcessClass::NoRole),
|
|
|
|
count(0), worstIsDegraded(false) {}
|
2017-10-25 03:58:54 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
RoleFitness(vector<WorkerDetails> workers, ProcessClass::ClusterRole role) : role(role) {
|
2019-10-15 09:32:17 +08:00
|
|
|
worstFit = ProcessClass::GoodFit;
|
2019-03-09 03:40:00 +08:00
|
|
|
worstIsDegraded = false;
|
2018-01-05 03:33:02 +08:00
|
|
|
bestFit = ProcessClass::NeverAssign;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : workers) {
|
|
|
|
auto thisFit = it.processClass.machineClassFitness(role);
|
|
|
|
if (thisFit > worstFit) {
|
2019-03-09 03:40:00 +08:00
|
|
|
worstFit = thisFit;
|
|
|
|
worstIsDegraded = it.degraded;
|
2021-03-11 02:06:03 +08:00
|
|
|
} else if (thisFit == worstFit) {
|
2019-03-09 03:40:00 +08:00
|
|
|
worstIsDegraded = worstIsDegraded || it.degraded;
|
|
|
|
}
|
2018-01-05 03:33:02 +08:00
|
|
|
bestFit = std::min(bestFit, thisFit);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2018-02-10 08:48:55 +08:00
|
|
|
count = workers.size();
|
2021-03-11 02:06:03 +08:00
|
|
|
// degraded is only used for recruitment of tlogs
|
|
|
|
if (role != ProcessClass::TLog) {
|
2019-03-22 02:23:49 +08:00
|
|
|
worstIsDegraded = false;
|
|
|
|
}
|
2018-02-10 08:48:55 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
bool operator<(RoleFitness const& r) const {
|
|
|
|
if (worstFit != r.worstFit)
|
|
|
|
return worstFit < r.worstFit;
|
|
|
|
if (worstIsDegraded != r.worstIsDegraded)
|
|
|
|
return r.worstIsDegraded;
|
2019-02-01 10:20:14 +08:00
|
|
|
// FIXME: TLog recruitment process does not guarantee the best fit is not worsened.
|
2021-03-11 02:06:03 +08:00
|
|
|
if (role != ProcessClass::TLog && role != ProcessClass::LogRouter && bestFit != r.bestFit)
|
|
|
|
return bestFit < r.bestFit;
|
2018-02-10 08:48:55 +08:00
|
|
|
return count > r.count;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2020-07-11 05:37:47 +08:00
|
|
|
bool operator>(RoleFitness const& r) const { return r < *this; }
|
|
|
|
bool operator<=(RoleFitness const& r) const { return !(*this > r); }
|
|
|
|
bool operator>=(RoleFitness const& r) const { return !(*this < r); }
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
bool betterFitness(RoleFitness const& r) const {
|
|
|
|
if (worstFit != r.worstFit)
|
|
|
|
return worstFit < r.worstFit;
|
|
|
|
if (worstIsDegraded != r.worstIsDegraded)
|
|
|
|
return r.worstFit;
|
|
|
|
if (bestFit != r.bestFit)
|
|
|
|
return bestFit < r.bestFit;
|
2018-02-10 08:48:55 +08:00
|
|
|
return false;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
bool betterCount(RoleFitness const& r) const {
|
|
|
|
if (count > r.count)
|
|
|
|
return true;
|
|
|
|
if (worstFit != r.worstFit)
|
|
|
|
return worstFit < r.worstFit;
|
|
|
|
if (worstIsDegraded != r.worstIsDegraded)
|
|
|
|
return r.worstFit;
|
2019-03-09 03:40:00 +08:00
|
|
|
return false;
|
2018-06-23 01:15:24 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
bool operator==(RoleFitness const& r) const {
|
|
|
|
return worstFit == r.worstFit && bestFit == r.bestFit && count == r.count &&
|
|
|
|
worstIsDegraded == r.worstIsDegraded;
|
|
|
|
}
|
2018-06-29 14:15:32 +08:00
|
|
|
|
2019-03-09 03:40:00 +08:00
|
|
|
std::string toString() const { return format("%d %d %d %d", bestFit, worstFit, count, worstIsDegraded); }
|
2017-05-26 04:48:44 +08:00
|
|
|
};
|
|
|
|
|
2019-10-18 04:18:31 +08:00
|
|
|
struct RoleFitnessPair {
|
|
|
|
RoleFitness proxy;
|
2020-08-06 15:01:57 +08:00
|
|
|
RoleFitness grvProxy;
|
2019-10-18 04:18:31 +08:00
|
|
|
RoleFitness resolver;
|
|
|
|
|
|
|
|
RoleFitnessPair() {}
|
2020-08-06 15:01:57 +08:00
|
|
|
RoleFitnessPair(RoleFitness const& proxy, RoleFitness const& grvProxy, RoleFitness const& resolver)
|
|
|
|
: proxy(proxy), grvProxy(grvProxy), resolver(resolver) {}
|
2019-10-18 04:18:31 +08:00
|
|
|
|
2020-08-06 15:01:57 +08:00
|
|
|
bool operator<(RoleFitnessPair const& r) const {
|
|
|
|
if (proxy.betterFitness(r.proxy)) {
|
2019-10-18 04:18:31 +08:00
|
|
|
return true;
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (r.proxy.betterFitness(proxy)) {
|
2019-10-18 04:18:31 +08:00
|
|
|
return false;
|
|
|
|
}
|
2020-08-06 15:01:57 +08:00
|
|
|
if (grvProxy.betterFitness(r.grvProxy)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (r.grvProxy.betterFitness(grvProxy)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (resolver.betterFitness(r.resolver)) {
|
2019-10-18 04:18:31 +08:00
|
|
|
return true;
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (r.resolver.betterFitness(resolver)) {
|
2019-10-18 04:18:31 +08:00
|
|
|
return false;
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (proxy.count != r.proxy.count) {
|
2019-10-18 04:18:31 +08:00
|
|
|
return proxy.count > r.proxy.count;
|
|
|
|
}
|
2020-08-06 15:01:57 +08:00
|
|
|
if (grvProxy.count != r.grvProxy.count) {
|
|
|
|
return grvProxy.count > r.grvProxy.count;
|
|
|
|
}
|
2019-10-18 04:18:31 +08:00
|
|
|
return resolver.count > r.resolver.count;
|
|
|
|
}
|
2020-07-11 05:37:47 +08:00
|
|
|
bool operator>(RoleFitnessPair const& r) const { return r < *this; }
|
|
|
|
bool operator<=(RoleFitnessPair const& r) const { return !(*this > r); }
|
|
|
|
bool operator>=(RoleFitnessPair const& r) const { return !(*this < r); }
|
2019-10-18 04:18:31 +08:00
|
|
|
|
2020-08-06 15:01:57 +08:00
|
|
|
bool operator==(RoleFitnessPair const& r) const {
|
|
|
|
return proxy == r.proxy && grvProxy == r.grvProxy && resolver == r.resolver;
|
|
|
|
}
|
2019-10-18 04:18:31 +08:00
|
|
|
};
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
std::set<Optional<Standalone<StringRef>>> getDatacenters(DatabaseConfiguration const& conf,
|
|
|
|
bool checkStable = false) {
|
2017-05-26 04:48:44 +08:00
|
|
|
std::set<Optional<Standalone<StringRef>>> result;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : id_worker)
|
|
|
|
if (workerAvailable(it.second, checkStable) && !conf.isExcludedServer(it.second.details.interf.addresses()))
|
2019-03-09 00:25:07 +08:00
|
|
|
result.insert(it.second.details.interf.locality.dcId());
|
2017-05-26 04:48:44 +08:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void updateKnownIds(std::map<Optional<Standalone<StringRef>>, int>* id_used) {
|
2019-02-13 07:50:44 +08:00
|
|
|
(*id_used)[masterProcessId]++;
|
|
|
|
(*id_used)[clusterControllerProcessId]++;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
RecruitRemoteFromConfigurationReply findRemoteWorkersForConfiguration(
|
|
|
|
RecruitRemoteFromConfigurationRequest const& req) {
|
2017-09-12 08:40:46 +08:00
|
|
|
RecruitRemoteFromConfigurationReply result;
|
2021-03-11 02:06:03 +08:00
|
|
|
std::map<Optional<Standalone<StringRef>>, int> id_used;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-02-13 07:50:44 +08:00
|
|
|
updateKnownIds(&id_used);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2017-09-12 08:40:46 +08:00
|
|
|
std::set<Optional<Key>> remoteDC;
|
|
|
|
remoteDC.insert(req.dcId);
|
2018-06-18 10:31:15 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
auto remoteLogs = getWorkersForTlogs(req.configuration,
|
|
|
|
req.configuration.getRemoteTLogReplicationFactor(),
|
|
|
|
req.configuration.getDesiredRemoteLogs(),
|
|
|
|
req.configuration.getRemoteTLogPolicy(),
|
|
|
|
id_used,
|
|
|
|
false,
|
|
|
|
remoteDC,
|
|
|
|
req.exclusionWorkerIds);
|
|
|
|
for (int i = 0; i < remoteLogs.size(); i++) {
|
2019-03-09 00:25:07 +08:00
|
|
|
result.remoteTLogs.push_back(remoteLogs[i].interf);
|
2017-09-12 08:40:46 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
auto logRouters = getWorkersForRoleInDatacenter(
|
|
|
|
req.dcId, ProcessClass::LogRouter, req.logRouterCount, req.configuration, id_used);
|
|
|
|
for (int i = 0; i < logRouters.size(); i++) {
|
2019-03-09 00:25:07 +08:00
|
|
|
result.logRouters.push_back(logRouters[i].interf);
|
2017-09-12 08:40:46 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!goodRemoteRecruitmentTime.isReady() &&
|
|
|
|
((RoleFitness(
|
|
|
|
SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredRemoteLogs(), ProcessClass::TLog)
|
|
|
|
.betterCount(RoleFitness(remoteLogs, ProcessClass::TLog))) ||
|
|
|
|
(RoleFitness(SERVER_KNOBS->EXPECTED_LOG_ROUTER_FITNESS, req.logRouterCount, ProcessClass::LogRouter)
|
|
|
|
.betterCount(RoleFitness(logRouters, ProcessClass::LogRouter))))) {
|
2017-09-12 08:40:46 +08:00
|
|
|
throw operation_failed();
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2017-09-12 08:40:46 +08:00
|
|
|
return result;
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ErrorOr<RecruitFromConfigurationReply> findWorkersForConfiguration(RecruitFromConfigurationRequest const& req,
|
|
|
|
Optional<Key> dcId) {
|
2017-09-12 08:40:46 +08:00
|
|
|
RecruitFromConfigurationReply result;
|
2021-03-11 02:06:03 +08:00
|
|
|
std::map<Optional<Standalone<StringRef>>, int> id_used;
|
2019-02-13 07:50:44 +08:00
|
|
|
updateKnownIds(&id_used);
|
2017-10-06 08:09:44 +08:00
|
|
|
|
2018-03-06 11:27:46 +08:00
|
|
|
ASSERT(dcId.present());
|
2018-06-29 14:15:32 +08:00
|
|
|
|
2018-03-06 11:27:46 +08:00
|
|
|
std::set<Optional<Key>> primaryDC;
|
|
|
|
primaryDC.insert(dcId);
|
|
|
|
result.dcId = dcId;
|
2018-06-29 14:15:32 +08:00
|
|
|
|
2018-03-06 11:27:46 +08:00
|
|
|
RegionInfo region;
|
2019-03-19 03:17:59 +08:00
|
|
|
RegionInfo remoteRegion;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& r : req.configuration.regions) {
|
|
|
|
if (r.dcId == dcId.get()) {
|
2018-03-06 11:27:46 +08:00
|
|
|
region = r;
|
2021-03-11 02:06:03 +08:00
|
|
|
} else {
|
2019-03-19 03:17:59 +08:00
|
|
|
remoteRegion = r;
|
2018-03-06 11:27:46 +08:00
|
|
|
}
|
|
|
|
}
|
2018-06-29 14:15:32 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (req.recruitSeedServers) {
|
|
|
|
auto primaryStorageServers =
|
|
|
|
getWorkersForSeedServers(req.configuration, req.configuration.storagePolicy, dcId);
|
|
|
|
for (int i = 0; i < primaryStorageServers.size(); i++) {
|
2019-03-09 00:25:07 +08:00
|
|
|
result.storageServers.push_back(primaryStorageServers[i].interf);
|
2017-10-06 08:09:44 +08:00
|
|
|
}
|
|
|
|
}
|
2018-06-29 14:15:32 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
auto tlogs = getWorkersForTlogs(req.configuration,
|
|
|
|
req.configuration.tLogReplicationFactor,
|
|
|
|
req.configuration.getDesiredLogs(),
|
|
|
|
req.configuration.tLogPolicy,
|
|
|
|
id_used,
|
|
|
|
false,
|
|
|
|
primaryDC);
|
|
|
|
for (int i = 0; i < tlogs.size(); i++) {
|
2019-03-09 00:25:07 +08:00
|
|
|
result.tLogs.push_back(tlogs[i].interf);
|
2017-09-12 08:40:46 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-03-09 00:25:07 +08:00
|
|
|
std::vector<WorkerDetails> satelliteLogs;
|
2020-08-06 15:01:57 +08:00
|
|
|
if (region.satelliteTLogReplicationFactor > 0 && req.configuration.usableRegions > 1) {
|
|
|
|
satelliteLogs =
|
|
|
|
getWorkersForSatelliteLogs(req.configuration, region, remoteRegion, id_used, result.satelliteFallback);
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < satelliteLogs.size(); i++) {
|
2019-03-09 00:25:07 +08:00
|
|
|
result.satelliteTLogs.push_back(satelliteLogs[i].interf);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
auto first_commit_proxy = getWorkerForRoleInDatacenter(
|
|
|
|
dcId, ProcessClass::CommitProxy, ProcessClass::ExcludeFit, req.configuration, id_used);
|
|
|
|
auto first_grv_proxy = getWorkerForRoleInDatacenter(
|
|
|
|
dcId, ProcessClass::GrvProxy, ProcessClass::ExcludeFit, req.configuration, id_used);
|
|
|
|
auto first_resolver = getWorkerForRoleInDatacenter(
|
|
|
|
dcId, ProcessClass::Resolver, ProcessClass::ExcludeFit, req.configuration, id_used);
|
|
|
|
|
|
|
|
auto commit_proxies = getWorkersForRoleInDatacenter(dcId,
|
|
|
|
ProcessClass::CommitProxy,
|
|
|
|
req.configuration.getDesiredCommitProxies(),
|
|
|
|
req.configuration,
|
|
|
|
id_used,
|
|
|
|
first_commit_proxy);
|
|
|
|
auto grv_proxies = getWorkersForRoleInDatacenter(dcId,
|
|
|
|
ProcessClass::GrvProxy,
|
|
|
|
req.configuration.getDesiredGrvProxies(),
|
|
|
|
req.configuration,
|
|
|
|
id_used,
|
|
|
|
first_grv_proxy);
|
|
|
|
auto resolvers = getWorkersForRoleInDatacenter(dcId,
|
|
|
|
ProcessClass::Resolver,
|
|
|
|
req.configuration.getDesiredResolvers(),
|
|
|
|
req.configuration,
|
|
|
|
id_used,
|
|
|
|
first_resolver);
|
|
|
|
for (int i = 0; i < commit_proxies.size(); i++)
|
|
|
|
result.commitProxies.push_back(commit_proxies[i].interf);
|
|
|
|
for (int i = 0; i < grv_proxies.size(); i++)
|
|
|
|
result.grvProxies.push_back(grv_proxies[i].interf);
|
|
|
|
for (int i = 0; i < resolvers.size(); i++)
|
2020-07-29 22:26:55 +08:00
|
|
|
result.resolvers.push_back(resolvers[i].interf);
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (req.maxOldLogRouters > 0) {
|
|
|
|
if (tlogs.size() == 1) {
|
2020-01-03 07:05:44 +08:00
|
|
|
result.oldLogRouters.push_back(tlogs[0].interf);
|
2019-12-10 05:12:13 +08:00
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < tlogs.size(); i++) {
|
|
|
|
if (tlogs[i].interf.locality.processId() != clusterControllerProcessId) {
|
2020-01-03 07:05:44 +08:00
|
|
|
result.oldLogRouters.push_back(tlogs[i].interf);
|
2019-12-10 05:12:13 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
2018-03-07 08:31:21 +08:00
|
|
|
|
2020-02-05 02:09:16 +08:00
|
|
|
if (req.configuration.backupWorkerEnabled) {
|
2020-01-28 05:14:52 +08:00
|
|
|
const int nBackup = std::max<int>(
|
|
|
|
(req.configuration.desiredLogRouterCount > 0 ? req.configuration.desiredLogRouterCount : tlogs.size()),
|
|
|
|
req.maxOldLogRouters);
|
|
|
|
auto backupWorkers =
|
|
|
|
getWorkersForRoleInDatacenter(dcId, ProcessClass::Backup, nBackup, req.configuration, id_used);
|
2021-03-11 02:06:03 +08:00
|
|
|
std::transform(backupWorkers.begin(),
|
|
|
|
backupWorkers.end(),
|
|
|
|
std::back_inserter(result.backupWorkers),
|
2020-01-28 05:14:52 +08:00
|
|
|
[](const WorkerDetails& w) { return w.interf; });
|
|
|
|
}
|
2019-05-16 07:13:04 +08:00
|
|
|
|
2020-08-06 15:01:57 +08:00
|
|
|
if (!goodRecruitmentTime.isReady() &&
|
|
|
|
(RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs(), ProcessClass::TLog)
|
|
|
|
.betterCount(RoleFitness(tlogs, ProcessClass::TLog)) ||
|
|
|
|
(region.satelliteTLogReplicationFactor > 0 && req.configuration.usableRegions > 1 &&
|
2021-03-11 02:06:03 +08:00
|
|
|
RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS,
|
|
|
|
req.configuration.getDesiredSatelliteLogs(dcId),
|
2020-08-06 15:01:57 +08:00
|
|
|
ProcessClass::TLog)
|
|
|
|
.betterCount(RoleFitness(satelliteLogs, ProcessClass::TLog))) ||
|
2021-03-11 02:06:03 +08:00
|
|
|
RoleFitness(SERVER_KNOBS->EXPECTED_COMMIT_PROXY_FITNESS,
|
|
|
|
req.configuration.getDesiredCommitProxies(),
|
2020-09-11 08:44:15 +08:00
|
|
|
ProcessClass::CommitProxy)
|
|
|
|
.betterCount(RoleFitness(commit_proxies, ProcessClass::CommitProxy)) ||
|
2021-03-11 02:06:03 +08:00
|
|
|
RoleFitness(SERVER_KNOBS->EXPECTED_GRV_PROXY_FITNESS,
|
|
|
|
req.configuration.getDesiredGrvProxies(),
|
2020-08-06 15:01:57 +08:00
|
|
|
ProcessClass::GrvProxy)
|
|
|
|
.betterCount(RoleFitness(grv_proxies, ProcessClass::GrvProxy)) ||
|
2021-03-11 02:06:03 +08:00
|
|
|
RoleFitness(SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS,
|
|
|
|
req.configuration.getDesiredResolvers(),
|
2020-08-06 15:01:57 +08:00
|
|
|
ProcessClass::Resolver)
|
|
|
|
.betterCount(RoleFitness(resolvers, ProcessClass::Resolver)))) {
|
2018-02-10 08:48:55 +08:00
|
|
|
return operation_failed();
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
RecruitFromConfigurationReply findWorkersForConfiguration(RecruitFromConfigurationRequest const& req) {
|
|
|
|
if (req.configuration.regions.size() > 1) {
|
2018-06-14 09:14:14 +08:00
|
|
|
std::vector<RegionInfo> regions = req.configuration.regions;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (regions[0].priority == regions[1].priority && regions[1].dcId == clusterControllerDcId.get()) {
|
2018-06-14 09:14:14 +08:00
|
|
|
std::swap(regions[0], regions[1]);
|
|
|
|
}
|
2018-07-07 05:44:11 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (regions[1].dcId == clusterControllerDcId.get() && regions[1].priority >= 0 &&
|
|
|
|
(!versionDifferenceUpdated || datacenterVersionDifference >= SERVER_KNOBS->MAX_VERSION_DIFFERENCE)) {
|
2018-07-07 05:44:11 +08:00
|
|
|
std::swap(regions[0], regions[1]);
|
|
|
|
}
|
|
|
|
|
2018-02-10 08:48:55 +08:00
|
|
|
bool setPrimaryDesired = false;
|
2017-09-12 08:40:46 +08:00
|
|
|
try {
|
2018-06-14 09:14:14 +08:00
|
|
|
auto reply = findWorkersForConfiguration(req, regions[0].dcId);
|
2018-02-10 08:48:55 +08:00
|
|
|
setPrimaryDesired = true;
|
|
|
|
vector<Optional<Key>> dcPriority;
|
2018-06-14 09:14:14 +08:00
|
|
|
dcPriority.push_back(regions[0].dcId);
|
|
|
|
dcPriority.push_back(regions[1].dcId);
|
2018-02-10 08:48:55 +08:00
|
|
|
desiredDcIds.set(dcPriority);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (reply.isError()) {
|
2018-02-10 08:48:55 +08:00
|
|
|
throw reply.getError();
|
2021-03-11 02:06:03 +08:00
|
|
|
} else if (regions[0].dcId == clusterControllerDcId.get()) {
|
2018-02-10 08:48:55 +08:00
|
|
|
return reply.get();
|
|
|
|
}
|
|
|
|
throw no_more_servers();
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
2020-02-20 08:48:30 +08:00
|
|
|
if (!goodRemoteRecruitmentTime.isReady() && regions[1].dcId != clusterControllerDcId.get()) {
|
2018-06-26 09:20:16 +08:00
|
|
|
throw operation_failed();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (e.code() != error_code_no_more_servers || regions[1].priority < 0) {
|
2017-09-12 08:40:46 +08:00
|
|
|
throw;
|
|
|
|
}
|
2018-02-10 08:48:55 +08:00
|
|
|
TraceEvent(SevWarn, "AttemptingRecruitmentInRemoteDC", id).error(e);
|
2018-06-14 09:14:14 +08:00
|
|
|
auto reply = findWorkersForConfiguration(req, regions[1].dcId);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!setPrimaryDesired) {
|
2018-02-10 08:48:55 +08:00
|
|
|
vector<Optional<Key>> dcPriority;
|
2018-06-14 09:14:14 +08:00
|
|
|
dcPriority.push_back(regions[1].dcId);
|
|
|
|
dcPriority.push_back(regions[0].dcId);
|
2018-02-10 08:48:55 +08:00
|
|
|
desiredDcIds.set(dcPriority);
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (reply.isError()) {
|
2018-02-10 08:48:55 +08:00
|
|
|
throw reply.getError();
|
2019-03-23 08:08:58 +08:00
|
|
|
} else if (regions[1].dcId == clusterControllerDcId.get()) {
|
2018-02-10 08:48:55 +08:00
|
|
|
return reply.get();
|
|
|
|
}
|
|
|
|
throw;
|
2017-09-12 08:40:46 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
} else if (req.configuration.regions.size() == 1) {
|
2018-03-16 01:59:30 +08:00
|
|
|
vector<Optional<Key>> dcPriority;
|
|
|
|
dcPriority.push_back(req.configuration.regions[0].dcId);
|
|
|
|
desiredDcIds.set(dcPriority);
|
|
|
|
auto reply = findWorkersForConfiguration(req, req.configuration.regions[0].dcId);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (reply.isError()) {
|
2018-03-16 01:59:30 +08:00
|
|
|
throw reply.getError();
|
2019-03-23 08:08:58 +08:00
|
|
|
} else if (req.configuration.regions[0].dcId == clusterControllerDcId.get()) {
|
2018-03-16 01:59:30 +08:00
|
|
|
return reply.get();
|
|
|
|
}
|
|
|
|
throw no_more_servers();
|
2017-09-12 08:40:46 +08:00
|
|
|
} else {
|
|
|
|
RecruitFromConfigurationReply result;
|
2021-03-11 02:06:03 +08:00
|
|
|
std::map<Optional<Standalone<StringRef>>, int> id_used;
|
2019-02-13 07:50:44 +08:00
|
|
|
updateKnownIds(&id_used);
|
2021-03-11 02:06:03 +08:00
|
|
|
auto tlogs = getWorkersForTlogs(req.configuration,
|
|
|
|
req.configuration.tLogReplicationFactor,
|
|
|
|
req.configuration.getDesiredLogs(),
|
|
|
|
req.configuration.tLogPolicy,
|
|
|
|
id_used);
|
|
|
|
for (int i = 0; i < tlogs.size(); i++) {
|
2019-03-09 00:25:07 +08:00
|
|
|
result.tLogs.push_back(tlogs[i].interf);
|
2017-09-12 08:40:46 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (req.maxOldLogRouters > 0) {
|
|
|
|
if (tlogs.size() == 1) {
|
2020-01-03 07:05:44 +08:00
|
|
|
result.oldLogRouters.push_back(tlogs[0].interf);
|
2019-12-10 05:12:13 +08:00
|
|
|
} else {
|
2020-08-06 15:01:57 +08:00
|
|
|
for (int i = 0; i < tlogs.size(); i++) {
|
|
|
|
if (tlogs[i].interf.locality.processId() != clusterControllerProcessId) {
|
2020-01-03 07:05:44 +08:00
|
|
|
result.oldLogRouters.push_back(tlogs[i].interf);
|
2019-12-10 05:12:13 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-06 15:01:57 +08:00
|
|
|
if (req.recruitSeedServers) {
|
2021-03-11 02:06:03 +08:00
|
|
|
auto primaryStorageServers =
|
|
|
|
getWorkersForSeedServers(req.configuration, req.configuration.storagePolicy);
|
|
|
|
for (int i = 0; i < primaryStorageServers.size(); i++)
|
2019-03-09 00:25:07 +08:00
|
|
|
result.storageServers.push_back(primaryStorageServers[i].interf);
|
2017-10-06 08:09:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
auto datacenters = getDatacenters(req.configuration);
|
2018-02-10 08:48:55 +08:00
|
|
|
|
2019-10-18 04:18:31 +08:00
|
|
|
RoleFitnessPair bestFitness;
|
2018-02-10 08:48:55 +08:00
|
|
|
int numEquivalent = 1;
|
|
|
|
Optional<Key> bestDC;
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto dcId : datacenters) {
|
2018-02-10 08:48:55 +08:00
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
// SOMEDAY: recruitment in other DCs besides the clusterControllerDcID will not account for the
|
|
|
|
// processes used by the master and cluster controller properly.
|
2018-02-10 08:48:55 +08:00
|
|
|
auto used = id_used;
|
2020-09-11 08:44:15 +08:00
|
|
|
auto first_commit_proxy = getWorkerForRoleInDatacenter(
|
|
|
|
dcId, ProcessClass::CommitProxy, ProcessClass::ExcludeFit, req.configuration, used);
|
2020-08-06 15:01:57 +08:00
|
|
|
auto first_grv_proxy = getWorkerForRoleInDatacenter(
|
|
|
|
dcId, ProcessClass::GrvProxy, ProcessClass::ExcludeFit, req.configuration, used);
|
|
|
|
auto first_resolver = getWorkerForRoleInDatacenter(
|
|
|
|
dcId, ProcessClass::Resolver, ProcessClass::ExcludeFit, req.configuration, used);
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
auto commit_proxies = getWorkersForRoleInDatacenter(dcId,
|
|
|
|
ProcessClass::CommitProxy,
|
2020-09-11 08:44:15 +08:00
|
|
|
req.configuration.getDesiredCommitProxies(),
|
2021-03-11 02:06:03 +08:00
|
|
|
req.configuration,
|
|
|
|
used,
|
|
|
|
first_commit_proxy);
|
|
|
|
auto grv_proxies = getWorkersForRoleInDatacenter(dcId,
|
|
|
|
ProcessClass::GrvProxy,
|
2020-08-06 15:01:57 +08:00
|
|
|
req.configuration.getDesiredGrvProxies(),
|
2021-03-11 02:06:03 +08:00
|
|
|
req.configuration,
|
|
|
|
used,
|
|
|
|
first_grv_proxy);
|
|
|
|
auto resolvers = getWorkersForRoleInDatacenter(dcId,
|
|
|
|
ProcessClass::Resolver,
|
|
|
|
req.configuration.getDesiredResolvers(),
|
|
|
|
req.configuration,
|
|
|
|
used,
|
|
|
|
first_resolver);
|
2018-02-10 08:48:55 +08:00
|
|
|
|
2020-09-11 08:44:15 +08:00
|
|
|
RoleFitnessPair fitness(RoleFitness(commit_proxies, ProcessClass::CommitProxy),
|
2020-08-06 15:01:57 +08:00
|
|
|
RoleFitness(grv_proxies, ProcessClass::GrvProxy),
|
|
|
|
RoleFitness(resolvers, ProcessClass::Resolver));
|
2018-02-10 08:48:55 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (dcId == clusterControllerDcId) {
|
2018-02-10 08:48:55 +08:00
|
|
|
bestFitness = fitness;
|
|
|
|
bestDC = dcId;
|
2020-08-12 09:54:54 +08:00
|
|
|
for (int i = 0; i < resolvers.size(); i++) {
|
2019-03-09 00:25:07 +08:00
|
|
|
result.resolvers.push_back(resolvers[i].interf);
|
2020-08-12 09:54:54 +08:00
|
|
|
}
|
2020-09-11 08:44:15 +08:00
|
|
|
for (int i = 0; i < commit_proxies.size(); i++) {
|
|
|
|
result.commitProxies.push_back(commit_proxies[i].interf);
|
2020-08-12 09:54:54 +08:00
|
|
|
}
|
|
|
|
for (int i = 0; i < grv_proxies.size(); i++) {
|
|
|
|
result.grvProxies.push_back(grv_proxies[i].interf);
|
|
|
|
}
|
2018-03-30 06:12:38 +08:00
|
|
|
|
2020-02-05 02:09:16 +08:00
|
|
|
if (req.configuration.backupWorkerEnabled) {
|
2020-01-28 05:14:52 +08:00
|
|
|
const int nBackup = std::max<int>(tlogs.size(), req.maxOldLogRouters);
|
2021-03-11 02:06:03 +08:00
|
|
|
auto backupWorkers = getWorkersForRoleInDatacenter(
|
|
|
|
dcId, ProcessClass::Backup, nBackup, req.configuration, id_used);
|
|
|
|
std::transform(backupWorkers.begin(),
|
|
|
|
backupWorkers.end(),
|
2020-01-28 05:14:52 +08:00
|
|
|
std::back_inserter(result.backupWorkers),
|
|
|
|
[](const WorkerDetails& w) { return w.interf; });
|
|
|
|
}
|
2019-12-11 05:28:49 +08:00
|
|
|
|
2018-02-10 08:48:55 +08:00
|
|
|
break;
|
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (fitness < bestFitness) {
|
2018-02-10 08:48:55 +08:00
|
|
|
bestFitness = fitness;
|
|
|
|
numEquivalent = 1;
|
|
|
|
bestDC = dcId;
|
2020-08-06 15:01:57 +08:00
|
|
|
} else if (fitness == bestFitness &&
|
|
|
|
deterministicRandom()->random01() < 1.0 / ++numEquivalent) {
|
2018-02-10 08:48:55 +08:00
|
|
|
bestDC = dcId;
|
|
|
|
}
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() != error_code_no_more_servers) {
|
2018-02-10 08:48:55 +08:00
|
|
|
throw;
|
|
|
|
}
|
2017-09-12 08:40:46 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (bestDC != clusterControllerDcId) {
|
2018-02-10 08:48:55 +08:00
|
|
|
vector<Optional<Key>> dcPriority;
|
|
|
|
dcPriority.push_back(bestDC);
|
|
|
|
desiredDcIds.set(dcPriority);
|
|
|
|
throw no_more_servers();
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
// If this cluster controller dies, do not prioritize recruiting the next one in the same DC
|
2018-02-10 08:48:55 +08:00
|
|
|
desiredDcIds.set(vector<Optional<Key>>());
|
2020-08-06 15:01:57 +08:00
|
|
|
TraceEvent("FindWorkersForConfig")
|
|
|
|
.detail("Replication", req.configuration.tLogReplicationFactor)
|
|
|
|
.detail("DesiredLogs", req.configuration.getDesiredLogs())
|
|
|
|
.detail("ActualLogs", result.tLogs.size())
|
2020-09-11 08:44:15 +08:00
|
|
|
.detail("DesiredCommitProxies", req.configuration.getDesiredCommitProxies())
|
|
|
|
.detail("ActualCommitProxies", result.commitProxies.size())
|
2020-08-06 15:01:57 +08:00
|
|
|
.detail("DesiredGrvProxies", req.configuration.getDesiredGrvProxies())
|
|
|
|
.detail("ActualGrvProxies", result.grvProxies.size())
|
|
|
|
.detail("DesiredResolvers", req.configuration.getDesiredResolvers())
|
2020-07-29 22:26:55 +08:00
|
|
|
.detail("ActualResolvers", result.resolvers.size());
|
2017-09-12 08:40:46 +08:00
|
|
|
|
2020-08-06 15:01:57 +08:00
|
|
|
if (!goodRecruitmentTime.isReady() &&
|
2021-03-11 02:06:03 +08:00
|
|
|
(RoleFitness(
|
|
|
|
SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs(), ProcessClass::TLog)
|
2020-08-06 15:01:57 +08:00
|
|
|
.betterCount(RoleFitness(tlogs, ProcessClass::TLog)) ||
|
2021-03-11 02:06:03 +08:00
|
|
|
RoleFitness(SERVER_KNOBS->EXPECTED_COMMIT_PROXY_FITNESS,
|
|
|
|
req.configuration.getDesiredCommitProxies(),
|
2020-09-11 08:44:15 +08:00
|
|
|
ProcessClass::CommitProxy)
|
2020-08-06 15:01:57 +08:00
|
|
|
.betterCount(bestFitness.proxy) ||
|
2021-03-11 02:06:03 +08:00
|
|
|
RoleFitness(SERVER_KNOBS->EXPECTED_GRV_PROXY_FITNESS,
|
|
|
|
req.configuration.getDesiredGrvProxies(),
|
2020-08-06 15:01:57 +08:00
|
|
|
ProcessClass::GrvProxy)
|
|
|
|
.betterCount(bestFitness.grvProxy) ||
|
2021-03-11 02:06:03 +08:00
|
|
|
RoleFitness(SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS,
|
|
|
|
req.configuration.getDesiredResolvers(),
|
2020-08-06 15:01:57 +08:00
|
|
|
ProcessClass::Resolver)
|
|
|
|
.betterCount(bestFitness.resolver))) {
|
2017-09-12 08:40:46 +08:00
|
|
|
throw operation_failed();
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-11 05:45:06 +08:00
|
|
|
// Check if txn system is recruited successfully in each region
|
2018-06-14 09:14:14 +08:00
|
|
|
void checkRegions(const std::vector<RegionInfo>& regions) {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (desiredDcIds.get().present() && desiredDcIds.get().get().size() == 2 &&
|
|
|
|
desiredDcIds.get().get()[0].get() == regions[0].dcId &&
|
|
|
|
desiredDcIds.get().get()[1].get() == regions[1].dcId) {
|
2018-06-14 09:14:14 +08:00
|
|
|
return;
|
|
|
|
}
|
2018-06-29 14:15:32 +08:00
|
|
|
|
2018-06-14 09:14:14 +08:00
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
std::map<Optional<Standalone<StringRef>>, int> id_used;
|
|
|
|
getWorkerForRoleInDatacenter(
|
|
|
|
regions[0].dcId, ProcessClass::ClusterController, ProcessClass::ExcludeFit, db.config, id_used, true);
|
|
|
|
getWorkerForRoleInDatacenter(
|
|
|
|
regions[0].dcId, ProcessClass::Master, ProcessClass::ExcludeFit, db.config, id_used, true);
|
2018-06-29 14:15:32 +08:00
|
|
|
|
2018-06-14 09:14:14 +08:00
|
|
|
std::set<Optional<Key>> primaryDC;
|
|
|
|
primaryDC.insert(regions[0].dcId);
|
2021-03-11 02:06:03 +08:00
|
|
|
getWorkersForTlogs(db.config,
|
|
|
|
db.config.tLogReplicationFactor,
|
|
|
|
db.config.getDesiredLogs(),
|
|
|
|
db.config.tLogPolicy,
|
|
|
|
id_used,
|
|
|
|
true,
|
|
|
|
primaryDC);
|
|
|
|
if (regions[0].satelliteTLogReplicationFactor > 0 && db.config.usableRegions > 1) {
|
2018-06-29 14:15:32 +08:00
|
|
|
bool satelliteFallback = false;
|
2019-03-19 03:17:59 +08:00
|
|
|
getWorkersForSatelliteLogs(db.config, regions[0], regions[1], id_used, satelliteFallback, true);
|
2018-06-14 09:14:14 +08:00
|
|
|
}
|
2018-02-10 08:48:55 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
getWorkerForRoleInDatacenter(
|
|
|
|
regions[0].dcId, ProcessClass::Resolver, ProcessClass::ExcludeFit, db.config, id_used, true);
|
|
|
|
getWorkerForRoleInDatacenter(
|
|
|
|
regions[0].dcId, ProcessClass::CommitProxy, ProcessClass::ExcludeFit, db.config, id_used, true);
|
|
|
|
getWorkerForRoleInDatacenter(
|
|
|
|
regions[0].dcId, ProcessClass::GrvProxy, ProcessClass::ExcludeFit, db.config, id_used, true);
|
2018-02-10 08:48:55 +08:00
|
|
|
|
2018-06-14 09:14:14 +08:00
|
|
|
vector<Optional<Key>> dcPriority;
|
|
|
|
dcPriority.push_back(regions[0].dcId);
|
|
|
|
dcPriority.push_back(regions[1].dcId);
|
|
|
|
desiredDcIds.set(dcPriority);
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() != error_code_no_more_servers) {
|
2018-06-14 09:14:14 +08:00
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void checkRecoveryStalled() {
|
2021-03-11 02:06:03 +08:00
|
|
|
if ((db.serverInfo->get().recoveryState == RecoveryState::RECRUITING ||
|
|
|
|
db.serverInfo->get().recoveryState == RecoveryState::ACCEPTING_COMMITS ||
|
|
|
|
db.serverInfo->get().recoveryState == RecoveryState::ALL_LOGS_RECRUITED) &&
|
|
|
|
db.recoveryStalled) {
|
2019-03-23 08:08:58 +08:00
|
|
|
if (db.config.regions.size() > 1) {
|
2018-06-14 09:14:14 +08:00
|
|
|
auto regions = db.config.regions;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (clusterControllerDcId.get() == regions[0].dcId) {
|
2018-06-14 09:14:14 +08:00
|
|
|
std::swap(regions[0], regions[1]);
|
2018-02-10 08:48:55 +08:00
|
|
|
}
|
2018-06-14 09:14:14 +08:00
|
|
|
ASSERT(clusterControllerDcId.get() == regions[1].dcId);
|
|
|
|
checkRegions(regions);
|
2018-02-10 08:48:55 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// FIXME: determine when to fail the cluster controller when a primaryDC has not been set
|
2017-05-26 04:48:44 +08:00
|
|
|
bool betterMasterExists() {
|
2020-04-12 10:30:05 +08:00
|
|
|
const ServerDBInfo dbi = db.serverInfo->get();
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (dbi.recoveryState < RecoveryState::ACCEPTING_COMMITS) {
|
2017-05-26 04:48:44 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// Do not trigger better master exists if the cluster controller is excluded, since the master will change
|
|
|
|
// anyways once the cluster controller is moved
|
|
|
|
if (id_worker[clusterControllerProcessId].priorityInfo.isExcluded) {
|
2018-09-22 07:14:39 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-03-23 08:08:58 +08:00
|
|
|
if (db.config.regions.size() > 1 && db.config.regions[0].priority > db.config.regions[1].priority &&
|
2021-03-11 02:06:03 +08:00
|
|
|
db.config.regions[0].dcId != clusterControllerDcId.get() && versionDifferenceUpdated &&
|
|
|
|
datacenterVersionDifference < SERVER_KNOBS->MAX_VERSION_DIFFERENCE) {
|
2018-06-14 09:14:14 +08:00
|
|
|
checkRegions(db.config.regions);
|
|
|
|
}
|
2018-02-10 08:48:55 +08:00
|
|
|
|
2017-11-17 08:58:55 +08:00
|
|
|
// Get master process
|
2017-05-26 04:48:44 +08:00
|
|
|
auto masterWorker = id_worker.find(dbi.master.locality.processId());
|
2021-03-11 02:06:03 +08:00
|
|
|
if (masterWorker == id_worker.end()) {
|
2017-05-26 04:48:44 +08:00
|
|
|
return false;
|
2017-10-25 03:58:54 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2017-11-17 08:58:55 +08:00
|
|
|
// Get tlog processes
|
2019-03-09 00:25:07 +08:00
|
|
|
std::vector<WorkerDetails> tlogs;
|
|
|
|
std::vector<WorkerDetails> remote_tlogs;
|
|
|
|
std::vector<WorkerDetails> satellite_tlogs;
|
|
|
|
std::vector<WorkerDetails> log_routers;
|
2018-04-27 13:18:07 +08:00
|
|
|
std::set<NetworkAddress> logRouterAddresses;
|
2020-01-18 04:49:34 +08:00
|
|
|
std::vector<WorkerDetails> backup_workers;
|
|
|
|
std::set<NetworkAddress> backup_addresses;
|
2018-02-10 08:48:55 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& logSet : dbi.logSystemConfig.tLogs) {
|
|
|
|
for (auto& it : logSet.tLogs) {
|
2020-04-19 12:29:38 +08:00
|
|
|
auto tlogWorker = id_worker.find(it.interf().filteredLocality.processId());
|
2021-03-11 02:06:03 +08:00
|
|
|
if (tlogWorker == id_worker.end())
|
2018-02-10 08:48:55 +08:00
|
|
|
return false;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (tlogWorker->second.priorityInfo.isExcluded)
|
2018-02-10 08:48:55 +08:00
|
|
|
return true;
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (logSet.isLocal && logSet.locality == tagLocalitySatellite) {
|
2019-03-09 00:25:07 +08:00
|
|
|
satellite_tlogs.push_back(tlogWorker->second.details);
|
2020-08-06 15:01:57 +08:00
|
|
|
} else if (logSet.isLocal) {
|
2019-03-09 00:25:07 +08:00
|
|
|
tlogs.push_back(tlogWorker->second.details);
|
2018-02-10 08:48:55 +08:00
|
|
|
} else {
|
2019-03-09 00:25:07 +08:00
|
|
|
remote_tlogs.push_back(tlogWorker->second.details);
|
2018-02-10 08:48:55 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-06 15:01:57 +08:00
|
|
|
for (auto& it : logSet.logRouters) {
|
2020-04-19 12:29:38 +08:00
|
|
|
auto tlogWorker = id_worker.find(it.interf().filteredLocality.processId());
|
2021-03-11 02:06:03 +08:00
|
|
|
if (tlogWorker == id_worker.end())
|
2018-02-10 08:48:55 +08:00
|
|
|
return false;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (tlogWorker->second.priorityInfo.isExcluded)
|
2018-02-10 08:48:55 +08:00
|
|
|
return true;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!logRouterAddresses.count(tlogWorker->second.details.interf.address())) {
|
|
|
|
logRouterAddresses.insert(tlogWorker->second.details.interf.address());
|
2019-03-09 00:25:07 +08:00
|
|
|
log_routers.push_back(tlogWorker->second.details);
|
2018-04-27 13:18:07 +08:00
|
|
|
}
|
2018-02-10 08:48:55 +08:00
|
|
|
}
|
2020-01-18 04:49:34 +08:00
|
|
|
|
|
|
|
for (const auto& worker : logSet.backupWorkers) {
|
|
|
|
auto workerIt = id_worker.find(worker.interf().locality.processId());
|
2021-03-11 02:06:03 +08:00
|
|
|
if (workerIt == id_worker.end())
|
|
|
|
return false;
|
|
|
|
if (workerIt->second.priorityInfo.isExcluded)
|
|
|
|
return true;
|
2020-01-18 04:49:34 +08:00
|
|
|
if (backup_addresses.count(workerIt->second.details.interf.address()) == 0) {
|
|
|
|
backup_addresses.insert(workerIt->second.details.interf.address());
|
|
|
|
backup_workers.push_back(workerIt->second.details);
|
|
|
|
}
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2020-09-11 08:44:15 +08:00
|
|
|
// Get commit proxy classes
|
|
|
|
std::vector<WorkerDetails> commitProxyClasses;
|
|
|
|
for (auto& it : dbi.client.commitProxies) {
|
|
|
|
auto commitProxyWorker = id_worker.find(it.processId);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (commitProxyWorker == id_worker.end())
|
|
|
|
return false;
|
|
|
|
if (commitProxyWorker->second.priorityInfo.isExcluded)
|
|
|
|
return true;
|
2020-09-11 08:44:15 +08:00
|
|
|
commitProxyClasses.push_back(commitProxyWorker->second.details);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2020-08-06 15:01:57 +08:00
|
|
|
// Get grv proxy classes
|
|
|
|
std::vector<WorkerDetails> grvProxyClasses;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : dbi.client.grvProxies) {
|
2020-07-15 15:37:41 +08:00
|
|
|
auto grvProxyWorker = id_worker.find(it.processId);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (grvProxyWorker == id_worker.end())
|
2020-07-15 15:37:41 +08:00
|
|
|
return false;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (grvProxyWorker->second.priorityInfo.isExcluded)
|
2020-07-15 15:37:41 +08:00
|
|
|
return true;
|
2020-08-06 15:01:57 +08:00
|
|
|
grvProxyClasses.push_back(grvProxyWorker->second.details);
|
2020-07-15 15:37:41 +08:00
|
|
|
}
|
|
|
|
|
2017-11-17 08:58:55 +08:00
|
|
|
// Get resolver classes
|
2019-03-09 00:25:07 +08:00
|
|
|
std::vector<WorkerDetails> resolverClasses;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : dbi.resolvers) {
|
2017-05-26 04:48:44 +08:00
|
|
|
auto resolverWorker = id_worker.find(it.locality.processId());
|
2021-03-11 02:06:03 +08:00
|
|
|
if (resolverWorker == id_worker.end())
|
2017-05-26 04:48:44 +08:00
|
|
|
return false;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (resolverWorker->second.priorityInfo.isExcluded)
|
2017-11-17 08:58:55 +08:00
|
|
|
return true;
|
2019-03-09 00:25:07 +08:00
|
|
|
resolverClasses.push_back(resolverWorker->second.details);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// Check master fitness. Don't return false if master is excluded in case all the processes are excluded, we
|
|
|
|
// still need master for recovery.
|
|
|
|
ProcessClass::Fitness oldMasterFit =
|
|
|
|
masterWorker->second.details.processClass.machineClassFitness(ProcessClass::Master);
|
|
|
|
if (db.config.isExcludedServer(dbi.master.addresses())) {
|
2017-11-17 08:58:55 +08:00
|
|
|
oldMasterFit = std::max(oldMasterFit, ProcessClass::ExcludeFit);
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
std::map<Optional<Standalone<StringRef>>, int> id_used;
|
2018-02-10 08:48:55 +08:00
|
|
|
id_used[clusterControllerProcessId]++;
|
2021-03-11 02:06:03 +08:00
|
|
|
WorkerFitnessInfo mworker = getWorkerForRoleInDatacenter(
|
|
|
|
clusterControllerDcId, ProcessClass::Master, ProcessClass::NeverAssign, db.config, id_used, true);
|
|
|
|
auto newMasterFit = mworker.worker.processClass.machineClassFitness(ProcessClass::Master);
|
|
|
|
if (db.config.isExcludedServer(mworker.worker.interf.addresses())) {
|
2019-10-15 09:32:17 +08:00
|
|
|
newMasterFit = std::max(newMasterFit, ProcessClass::ExcludeFit);
|
|
|
|
}
|
2017-11-17 08:58:55 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (oldMasterFit < newMasterFit)
|
2017-11-17 08:58:55 +08:00
|
|
|
return false;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (oldMasterFit > newMasterFit || (dbi.master.locality.processId() == clusterControllerProcessId &&
|
|
|
|
mworker.worker.interf.locality.processId() != clusterControllerProcessId))
|
2017-11-17 08:58:55 +08:00
|
|
|
return true;
|
|
|
|
|
2018-02-10 08:48:55 +08:00
|
|
|
std::set<Optional<Key>> primaryDC;
|
|
|
|
std::set<Optional<Key>> remoteDC;
|
2018-03-06 11:27:46 +08:00
|
|
|
|
|
|
|
RegionInfo region;
|
2019-03-19 03:17:59 +08:00
|
|
|
RegionInfo remoteRegion;
|
2019-03-23 08:08:58 +08:00
|
|
|
if (db.config.regions.size()) {
|
2018-03-06 11:27:46 +08:00
|
|
|
primaryDC.insert(clusterControllerDcId);
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& r : db.config.regions) {
|
|
|
|
if (r.dcId != clusterControllerDcId.get()) {
|
2018-03-06 11:27:46 +08:00
|
|
|
ASSERT(remoteDC.empty());
|
|
|
|
remoteDC.insert(r.dcId);
|
2019-03-19 03:17:59 +08:00
|
|
|
remoteRegion = r;
|
2018-02-10 08:48:55 +08:00
|
|
|
} else {
|
2018-03-06 11:27:46 +08:00
|
|
|
ASSERT(region.dcId == StringRef());
|
|
|
|
region = r;
|
|
|
|
}
|
|
|
|
}
|
2018-02-10 08:48:55 +08:00
|
|
|
}
|
2017-11-17 08:58:55 +08:00
|
|
|
|
2018-02-10 08:48:55 +08:00
|
|
|
// Check tLog fitness
|
|
|
|
RoleFitness oldTLogFit(tlogs, ProcessClass::TLog);
|
2021-03-11 02:06:03 +08:00
|
|
|
auto newTLogs = getWorkersForTlogs(db.config,
|
|
|
|
db.config.tLogReplicationFactor,
|
|
|
|
db.config.getDesiredLogs(),
|
|
|
|
db.config.tLogPolicy,
|
|
|
|
id_used,
|
|
|
|
true,
|
|
|
|
primaryDC);
|
2019-03-19 03:17:59 +08:00
|
|
|
RoleFitness newTLogFit(newTLogs, ProcessClass::TLog);
|
2017-11-17 08:58:55 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (oldTLogFit < newTLogFit)
|
|
|
|
return false;
|
2017-11-17 08:58:55 +08:00
|
|
|
|
2018-06-29 14:15:32 +08:00
|
|
|
bool oldSatelliteFallback = false;
|
2019-10-15 09:32:17 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& logSet : dbi.logSystemConfig.tLogs) {
|
|
|
|
if (region.satelliteTLogPolicy.isValid() && logSet.isLocal && logSet.locality == tagLocalitySatellite) {
|
2018-06-29 14:15:32 +08:00
|
|
|
oldSatelliteFallback = logSet.tLogPolicy->info() != region.satelliteTLogPolicy->info();
|
2020-11-04 01:28:04 +08:00
|
|
|
ASSERT(!oldSatelliteFallback ||
|
|
|
|
(region.satelliteTLogPolicyFallback.isValid() &&
|
|
|
|
logSet.tLogPolicy->info() == region.satelliteTLogPolicyFallback->info()));
|
2018-06-29 14:15:32 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-02-10 08:48:55 +08:00
|
|
|
RoleFitness oldSatelliteTLogFit(satellite_tlogs, ProcessClass::TLog);
|
2018-06-29 14:15:32 +08:00
|
|
|
bool newSatelliteFallback = false;
|
2021-03-11 02:06:03 +08:00
|
|
|
auto newSatelliteTLogs =
|
|
|
|
(region.satelliteTLogReplicationFactor > 0 && db.config.usableRegions > 1)
|
|
|
|
? getWorkersForSatelliteLogs(db.config, region, remoteRegion, id_used, newSatelliteFallback, true)
|
|
|
|
: satellite_tlogs;
|
2019-03-19 03:17:59 +08:00
|
|
|
RoleFitness newSatelliteTLogFit(newSatelliteTLogs, ProcessClass::TLog);
|
2018-02-10 08:48:55 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
std::map<Optional<Key>, int32_t> satellite_priority;
|
|
|
|
for (auto& r : region.satellites) {
|
2019-10-15 09:31:23 +08:00
|
|
|
satellite_priority[r.dcId] = r.priority;
|
|
|
|
}
|
|
|
|
|
|
|
|
int32_t oldSatelliteRegionFit = std::numeric_limits<int32_t>::max();
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : satellite_tlogs) {
|
2020-08-06 15:01:57 +08:00
|
|
|
if (satellite_priority.count(it.interf.locality.dcId())) {
|
2019-10-15 09:31:23 +08:00
|
|
|
oldSatelliteRegionFit = std::min(oldSatelliteRegionFit, satellite_priority[it.interf.locality.dcId()]);
|
|
|
|
} else {
|
|
|
|
oldSatelliteRegionFit = -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int32_t newSatelliteRegionFit = std::numeric_limits<int32_t>::max();
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : newSatelliteTLogs) {
|
|
|
|
if (satellite_priority.count(it.interf.locality.dcId())) {
|
2019-10-15 09:31:23 +08:00
|
|
|
newSatelliteRegionFit = std::min(newSatelliteRegionFit, satellite_priority[it.interf.locality.dcId()]);
|
|
|
|
} else {
|
|
|
|
newSatelliteRegionFit = -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (oldSatelliteFallback && !newSatelliteFallback)
|
|
|
|
return true;
|
|
|
|
if (!oldSatelliteFallback && newSatelliteFallback)
|
2018-06-29 14:15:32 +08:00
|
|
|
return false;
|
2018-02-10 08:48:55 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (oldSatelliteRegionFit < newSatelliteRegionFit)
|
|
|
|
return true;
|
|
|
|
if (oldSatelliteRegionFit > newSatelliteRegionFit)
|
2019-10-15 09:31:23 +08:00
|
|
|
return false;
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (oldSatelliteTLogFit < newSatelliteTLogFit)
|
2019-10-15 09:31:23 +08:00
|
|
|
return false;
|
|
|
|
|
2018-02-10 08:48:55 +08:00
|
|
|
RoleFitness oldRemoteTLogFit(remote_tlogs, ProcessClass::TLog);
|
2019-03-19 03:17:59 +08:00
|
|
|
std::vector<UID> exclusionWorkerIds;
|
2021-03-11 02:06:03 +08:00
|
|
|
auto fn = [](const WorkerDetails& in) { return in.interf.id(); };
|
2019-03-19 03:17:59 +08:00
|
|
|
std::transform(newTLogs.begin(), newTLogs.end(), std::back_inserter(exclusionWorkerIds), fn);
|
|
|
|
std::transform(newSatelliteTLogs.begin(), newSatelliteTLogs.end(), std::back_inserter(exclusionWorkerIds), fn);
|
2019-01-11 02:28:32 +08:00
|
|
|
RoleFitness newRemoteTLogFit(
|
2021-03-11 02:06:03 +08:00
|
|
|
(db.config.usableRegions > 1 && (dbi.recoveryState == RecoveryState::ALL_LOGS_RECRUITED ||
|
|
|
|
dbi.recoveryState == RecoveryState::FULLY_RECOVERED))
|
|
|
|
? getWorkersForTlogs(db.config,
|
|
|
|
db.config.getRemoteTLogReplicationFactor(),
|
|
|
|
db.config.getDesiredRemoteLogs(),
|
|
|
|
db.config.getRemoteTLogPolicy(),
|
|
|
|
id_used,
|
|
|
|
true,
|
|
|
|
remoteDC,
|
|
|
|
exclusionWorkerIds)
|
|
|
|
: remote_tlogs,
|
|
|
|
ProcessClass::TLog);
|
|
|
|
if (oldRemoteTLogFit < newRemoteTLogFit)
|
|
|
|
return false;
|
|
|
|
int oldRouterCount =
|
|
|
|
oldTLogFit.count * std::max<int>(1, db.config.desiredLogRouterCount / std::max(1, oldTLogFit.count));
|
|
|
|
int newRouterCount =
|
|
|
|
newTLogFit.count * std::max<int>(1, db.config.desiredLogRouterCount / std::max(1, newTLogFit.count));
|
2018-02-10 08:48:55 +08:00
|
|
|
RoleFitness oldLogRoutersFit(log_routers, ProcessClass::LogRouter);
|
2020-08-06 15:01:57 +08:00
|
|
|
RoleFitness newLogRoutersFit(
|
|
|
|
(db.config.usableRegions > 1 && dbi.recoveryState == RecoveryState::FULLY_RECOVERED)
|
2021-03-11 02:06:03 +08:00
|
|
|
? getWorkersForRoleInDatacenter(*remoteDC.begin(),
|
|
|
|
ProcessClass::LogRouter,
|
|
|
|
newRouterCount,
|
|
|
|
db.config,
|
|
|
|
id_used,
|
|
|
|
Optional<WorkerFitnessInfo>(),
|
|
|
|
true)
|
2020-08-06 15:01:57 +08:00
|
|
|
: log_routers,
|
|
|
|
ProcessClass::LogRouter);
|
2018-02-10 08:48:55 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (oldLogRoutersFit.count < oldRouterCount) {
|
2018-04-27 13:18:07 +08:00
|
|
|
oldLogRoutersFit.worstFit = ProcessClass::NeverAssign;
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (newLogRoutersFit.count < newRouterCount) {
|
2018-04-27 13:18:07 +08:00
|
|
|
newLogRoutersFit.worstFit = ProcessClass::NeverAssign;
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (oldLogRoutersFit < newLogRoutersFit)
|
|
|
|
return false;
|
2020-07-15 15:37:41 +08:00
|
|
|
|
2020-08-06 15:01:57 +08:00
|
|
|
// Check proxy/grvProxy/resolver fitness
|
2020-09-11 08:44:15 +08:00
|
|
|
RoleFitnessPair oldInFit(RoleFitness(commitProxyClasses, ProcessClass::CommitProxy),
|
2020-08-06 15:01:57 +08:00
|
|
|
RoleFitness(grvProxyClasses, ProcessClass::GrvProxy),
|
|
|
|
RoleFitness(resolverClasses, ProcessClass::Resolver));
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
auto first_commit_proxy = getWorkerForRoleInDatacenter(
|
|
|
|
clusterControllerDcId, ProcessClass::CommitProxy, ProcessClass::ExcludeFit, db.config, id_used, true);
|
|
|
|
auto first_grv_proxy = getWorkerForRoleInDatacenter(
|
|
|
|
clusterControllerDcId, ProcessClass::GrvProxy, ProcessClass::ExcludeFit, db.config, id_used, true);
|
|
|
|
auto first_resolver = getWorkerForRoleInDatacenter(
|
|
|
|
clusterControllerDcId, ProcessClass::Resolver, ProcessClass::ExcludeFit, db.config, id_used, true);
|
|
|
|
auto commit_proxies = getWorkersForRoleInDatacenter(clusterControllerDcId,
|
|
|
|
ProcessClass::CommitProxy,
|
|
|
|
db.config.getDesiredCommitProxies(),
|
|
|
|
db.config,
|
|
|
|
id_used,
|
|
|
|
first_commit_proxy,
|
|
|
|
true);
|
|
|
|
auto grv_proxies = getWorkersForRoleInDatacenter(clusterControllerDcId,
|
|
|
|
ProcessClass::GrvProxy,
|
|
|
|
db.config.getDesiredGrvProxies(),
|
|
|
|
db.config,
|
|
|
|
id_used,
|
|
|
|
first_grv_proxy,
|
|
|
|
true);
|
|
|
|
auto resolvers = getWorkersForRoleInDatacenter(clusterControllerDcId,
|
|
|
|
ProcessClass::Resolver,
|
|
|
|
db.config.getDesiredResolvers(),
|
|
|
|
db.config,
|
|
|
|
id_used,
|
|
|
|
first_resolver,
|
|
|
|
true);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-09-11 08:44:15 +08:00
|
|
|
RoleFitnessPair newInFit(RoleFitness(commit_proxies, ProcessClass::CommitProxy),
|
2020-08-06 15:01:57 +08:00
|
|
|
RoleFitness(grv_proxies, ProcessClass::GrvProxy),
|
|
|
|
RoleFitness(resolvers, ProcessClass::Resolver));
|
|
|
|
if (oldInFit.proxy.betterFitness(newInFit.proxy) || oldInFit.grvProxy.betterFitness(newInFit.grvProxy) ||
|
|
|
|
oldInFit.resolver.betterFitness(newInFit.resolver)) {
|
2019-10-18 04:18:31 +08:00
|
|
|
return false;
|
|
|
|
}
|
2020-01-18 04:49:34 +08:00
|
|
|
|
2021-03-17 03:14:19 +08:00
|
|
|
if (oldTLogFit.count + oldInFit.proxy.count + oldInFit.grvProxy.count + oldInFit.resolver.count >
|
|
|
|
newTLogFit.count + newInFit.proxy.count + newInFit.grvProxy.count + newInFit.resolver.count) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-01-18 04:49:34 +08:00
|
|
|
// Check backup worker fitness
|
|
|
|
RoleFitness oldBackupWorkersFit(backup_workers, ProcessClass::Backup);
|
|
|
|
const int nBackup = backup_addresses.size();
|
|
|
|
RoleFitness newBackupWorkersFit(
|
2021-03-17 03:14:19 +08:00
|
|
|
getWorkersForRoleInDatacenter(
|
|
|
|
clusterControllerDcId, ProcessClass::Backup, nBackup, db.config, id_used, true),
|
2020-01-18 04:49:34 +08:00
|
|
|
ProcessClass::Backup);
|
|
|
|
|
2020-07-23 03:20:22 +08:00
|
|
|
if (oldTLogFit > newTLogFit || oldInFit > newInFit || oldSatelliteTLogFit > newSatelliteTLogFit ||
|
2020-01-18 04:49:34 +08:00
|
|
|
oldRemoteTLogFit > newRemoteTLogFit || oldLogRoutersFit > newLogRoutersFit ||
|
|
|
|
oldBackupWorkersFit > newBackupWorkersFit) {
|
|
|
|
TraceEvent("BetterMasterExists", id)
|
|
|
|
.detail("OldMasterFit", oldMasterFit)
|
|
|
|
.detail("NewMasterFit", newMasterFit)
|
|
|
|
.detail("OldTLogFit", oldTLogFit.toString())
|
|
|
|
.detail("NewTLogFit", newTLogFit.toString())
|
|
|
|
.detail("OldProxyFit", oldInFit.proxy.toString())
|
|
|
|
.detail("NewProxyFit", newInFit.proxy.toString())
|
2020-08-06 15:01:57 +08:00
|
|
|
.detail("OldGrvProxyFit", oldInFit.grvProxy.toString())
|
|
|
|
.detail("NewGrvProxyFit", newInFit.grvProxy.toString())
|
2020-01-18 04:49:34 +08:00
|
|
|
.detail("OldResolverFit", oldInFit.resolver.toString())
|
|
|
|
.detail("NewResolverFit", newInFit.resolver.toString())
|
|
|
|
.detail("OldSatelliteFit", oldSatelliteTLogFit.toString())
|
|
|
|
.detail("NewSatelliteFit", newSatelliteTLogFit.toString())
|
|
|
|
.detail("OldRemoteFit", oldRemoteTLogFit.toString())
|
|
|
|
.detail("NewRemoteFit", newRemoteTLogFit.toString())
|
|
|
|
.detail("OldRouterFit", oldLogRoutersFit.toString())
|
|
|
|
.detail("NewRouterFit", newLogRoutersFit.toString())
|
|
|
|
.detail("OldBackupWorkerFit", oldBackupWorkersFit.toString())
|
|
|
|
.detail("NewBackupWorkerFit", newBackupWorkersFit.toString())
|
|
|
|
.detail("OldSatelliteFallback", oldSatelliteFallback)
|
|
|
|
.detail("NewSatelliteFallback", newSatelliteFallback);
|
2017-05-26 04:48:44 +08:00
|
|
|
return true;
|
|
|
|
}
|
2017-11-17 08:58:55 +08:00
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-11-14 04:58:55 +08:00
|
|
|
bool isUsedNotMaster(Optional<Key> processId) {
|
2019-03-20 06:21:46 +08:00
|
|
|
ASSERT(masterProcessId.present());
|
2021-03-11 02:06:03 +08:00
|
|
|
if (processId == masterProcessId)
|
|
|
|
return false;
|
2019-03-20 06:21:46 +08:00
|
|
|
|
2020-04-12 10:30:05 +08:00
|
|
|
auto& dbInfo = db.serverInfo->get();
|
2019-11-14 04:58:55 +08:00
|
|
|
for (const auto& tlogset : dbInfo.logSystemConfig.tLogs) {
|
2021-03-11 02:06:03 +08:00
|
|
|
for (const auto& tlog : tlogset.tLogs) {
|
|
|
|
if (tlog.present() && tlog.interf().filteredLocality.processId() == processId)
|
|
|
|
return true;
|
2019-11-14 04:58:55 +08:00
|
|
|
}
|
|
|
|
}
|
2020-09-11 08:44:15 +08:00
|
|
|
for (const CommitProxyInterface& interf : dbInfo.client.commitProxies) {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (interf.processId == processId)
|
|
|
|
return true;
|
2019-03-20 02:29:19 +08:00
|
|
|
}
|
2020-07-15 15:37:41 +08:00
|
|
|
for (const GrvProxyInterface& interf : dbInfo.client.grvProxies) {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (interf.processId == processId)
|
|
|
|
return true;
|
2020-07-15 15:37:41 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
for (const ResolverInterface& interf : dbInfo.resolvers) {
|
|
|
|
if (interf.locality.processId() == processId)
|
|
|
|
return true;
|
2019-03-20 02:29:19 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (processId == clusterControllerProcessId)
|
|
|
|
return true;
|
2019-11-13 06:22:36 +08:00
|
|
|
|
2019-03-20 02:29:19 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool onMasterIsBetter(const WorkerDetails& worker, ProcessClass::ClusterRole role) {
|
|
|
|
ASSERT(masterProcessId.present());
|
2019-03-21 01:00:31 +08:00
|
|
|
const auto& pid = worker.interf.locality.processId();
|
2021-03-11 02:06:03 +08:00
|
|
|
if ((role != ProcessClass::DataDistributor && role != ProcessClass::Ratekeeper) ||
|
|
|
|
pid == masterProcessId.get()) {
|
2019-03-20 06:58:25 +08:00
|
|
|
return false;
|
|
|
|
}
|
2019-11-14 04:58:55 +08:00
|
|
|
return isUsedNotMaster(pid);
|
2019-03-20 02:29:19 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
std::map<Optional<Standalone<StringRef>>, int> getUsedIds() {
|
2019-01-29 01:25:15 +08:00
|
|
|
std::map<Optional<Standalone<StringRef>>, int> idUsed;
|
2019-02-13 07:50:44 +08:00
|
|
|
updateKnownIds(&idUsed);
|
|
|
|
|
2020-04-12 10:30:05 +08:00
|
|
|
auto& dbInfo = db.serverInfo->get();
|
2019-02-13 07:50:44 +08:00
|
|
|
for (const auto& tlogset : dbInfo.logSystemConfig.tLogs) {
|
2021-03-11 02:06:03 +08:00
|
|
|
for (const auto& tlog : tlogset.tLogs) {
|
2019-02-01 02:10:41 +08:00
|
|
|
if (tlog.present()) {
|
2020-04-19 12:29:38 +08:00
|
|
|
idUsed[tlog.interf().filteredLocality.processId()]++;
|
2019-01-29 01:25:15 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-09-11 08:44:15 +08:00
|
|
|
for (const CommitProxyInterface& interf : dbInfo.client.commitProxies) {
|
2020-05-02 05:30:50 +08:00
|
|
|
ASSERT(interf.processId.present());
|
|
|
|
idUsed[interf.processId]++;
|
2019-01-29 01:25:15 +08:00
|
|
|
}
|
2020-07-15 15:37:41 +08:00
|
|
|
for (const GrvProxyInterface& interf : dbInfo.client.grvProxies) {
|
|
|
|
ASSERT(interf.processId.present());
|
|
|
|
idUsed[interf.processId]++;
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
for (const ResolverInterface& interf : dbInfo.resolvers) {
|
2019-01-29 01:25:15 +08:00
|
|
|
ASSERT(interf.locality.processId().present());
|
|
|
|
idUsed[interf.locality.processId()]++;
|
|
|
|
}
|
2019-02-13 07:50:44 +08:00
|
|
|
return idUsed;
|
2019-01-29 01:25:15 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
std::map<Optional<Standalone<StringRef>>, WorkerInfo> id_worker;
|
|
|
|
std::map<Optional<Standalone<StringRef>>, ProcessClass>
|
|
|
|
id_class; // contains the mapping from process id to process class from the database
|
2017-05-26 04:48:44 +08:00
|
|
|
Standalone<RangeResultRef> lastProcessClasses;
|
|
|
|
bool gotProcessClasses;
|
2017-11-16 09:15:24 +08:00
|
|
|
bool gotFullyRecoveredConfig;
|
2017-05-26 04:48:44 +08:00
|
|
|
Optional<Standalone<StringRef>> masterProcessId;
|
2017-10-26 02:35:29 +08:00
|
|
|
Optional<Standalone<StringRef>> clusterControllerProcessId;
|
2018-02-10 08:48:55 +08:00
|
|
|
Optional<Standalone<StringRef>> clusterControllerDcId;
|
2021-03-11 02:06:03 +08:00
|
|
|
AsyncVar<Optional<vector<Optional<Key>>>> desiredDcIds; // desired DC priorities
|
|
|
|
AsyncVar<std::pair<bool, Optional<vector<Optional<Key>>>>>
|
|
|
|
changingDcIds; // current DC priorities to change first, and whether that is the cluster controller
|
|
|
|
AsyncVar<std::pair<bool, Optional<vector<Optional<Key>>>>>
|
|
|
|
changedDcIds; // current DC priorities to change second, and whether the cluster controller has been changed
|
2017-05-26 04:48:44 +08:00
|
|
|
UID id;
|
|
|
|
std::vector<RecruitFromConfigurationRequest> outstandingRecruitmentRequests;
|
2017-09-12 08:40:46 +08:00
|
|
|
std::vector<RecruitRemoteFromConfigurationRequest> outstandingRemoteRecruitmentRequests;
|
2017-05-26 04:48:44 +08:00
|
|
|
std::vector<std::pair<RecruitStorageRequest, double>> outstandingStorageRequests;
|
|
|
|
ActorCollection ac;
|
|
|
|
UpdateWorkerList updateWorkerList;
|
2018-06-28 14:02:08 +08:00
|
|
|
Future<Void> outstandingRequestChecker;
|
2020-02-20 08:48:30 +08:00
|
|
|
Future<Void> outstandingRemoteRequestChecker;
|
2020-04-06 14:09:36 +08:00
|
|
|
AsyncTrigger updateDBInfo;
|
2020-04-18 06:05:01 +08:00
|
|
|
std::set<Endpoint> updateDBInfoEndpoints;
|
2020-04-06 14:09:36 +08:00
|
|
|
std::set<Endpoint> removedDBInfoEndpoints;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
DBInfo db;
|
|
|
|
Database cx;
|
|
|
|
double startTime;
|
2020-02-20 08:48:30 +08:00
|
|
|
Future<Void> goodRecruitmentTime;
|
|
|
|
Future<Void> goodRemoteRecruitmentTime;
|
2018-06-14 09:14:14 +08:00
|
|
|
Version datacenterVersionDifference;
|
2019-01-19 03:30:18 +08:00
|
|
|
PromiseStream<Future<Void>> addActor;
|
2019-04-25 06:12:37 +08:00
|
|
|
bool versionDifferenceUpdated;
|
2019-03-18 11:55:59 +08:00
|
|
|
bool recruitingDistributor;
|
2019-03-20 02:29:19 +08:00
|
|
|
Optional<UID> recruitingRatekeeperID;
|
2019-03-23 09:22:45 +08:00
|
|
|
AsyncVar<bool> recruitRatekeeper;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-10-04 06:29:11 +08:00
|
|
|
CounterCollection clusterControllerMetrics;
|
|
|
|
|
|
|
|
Counter openDatabaseRequests;
|
|
|
|
Counter registerWorkerRequests;
|
|
|
|
Counter getWorkersRequests;
|
|
|
|
Counter getClientWorkersRequests;
|
|
|
|
Counter registerMasterRequests;
|
|
|
|
Counter statusRequests;
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ClusterControllerData(ClusterControllerFullInterface const& ccInterface, LocalityData const& locality)
|
|
|
|
: clusterControllerProcessId(locality.processId()), clusterControllerDcId(locality.dcId()), id(ccInterface.id()),
|
|
|
|
ac(false), outstandingRequestChecker(Void()), outstandingRemoteRequestChecker(Void()), gotProcessClasses(false),
|
|
|
|
gotFullyRecoveredConfig(false), startTime(now()), goodRecruitmentTime(Never()),
|
|
|
|
goodRemoteRecruitmentTime(Never()), datacenterVersionDifference(0), versionDifferenceUpdated(false),
|
|
|
|
recruitingDistributor(false), recruitRatekeeper(false),
|
|
|
|
clusterControllerMetrics("ClusterController", id.toString()),
|
|
|
|
openDatabaseRequests("OpenDatabaseRequests", clusterControllerMetrics),
|
|
|
|
registerWorkerRequests("RegisterWorkerRequests", clusterControllerMetrics),
|
|
|
|
getWorkersRequests("GetWorkersRequests", clusterControllerMetrics),
|
|
|
|
getClientWorkersRequests("GetClientWorkersRequests", clusterControllerMetrics),
|
|
|
|
registerMasterRequests("RegisterMasterRequests", clusterControllerMetrics),
|
|
|
|
statusRequests("StatusRequests", clusterControllerMetrics) {
|
2020-04-12 10:30:05 +08:00
|
|
|
auto serverInfo = ServerDBInfo();
|
2019-05-11 05:01:52 +08:00
|
|
|
serverInfo.id = deterministicRandom()->randomUniqueID();
|
2020-04-06 14:09:36 +08:00
|
|
|
serverInfo.infoGeneration = ++db.dbInfoCount;
|
2017-05-26 04:48:44 +08:00
|
|
|
serverInfo.masterLifetime.ccID = id;
|
|
|
|
serverInfo.clusterInterface = ccInterface;
|
2018-09-29 03:12:06 +08:00
|
|
|
serverInfo.myLocality = locality;
|
2021-03-11 02:06:03 +08:00
|
|
|
db.serverInfo->set(serverInfo);
|
2019-06-25 17:47:35 +08:00
|
|
|
cx = openDBOnServer(db.serverInfo, TaskPriority::DefaultEndpoint, true, true);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
~ClusterControllerData() {
|
|
|
|
ac.clear(false);
|
|
|
|
id_worker.clear();
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> clusterWatchDatabase(ClusterControllerData* cluster, ClusterControllerData::DBInfo* db) {
|
2017-05-26 04:48:44 +08:00
|
|
|
state MasterInterface iMaster;
|
|
|
|
|
|
|
|
// SOMEDAY: If there is already a non-failed master referenced by zkMasterInfo, use that one until it fails
|
|
|
|
// When this someday is implemented, make sure forced failures still cause the master to be recruited again
|
|
|
|
|
|
|
|
loop {
|
|
|
|
TraceEvent("CCWDB", cluster->id);
|
|
|
|
try {
|
|
|
|
state double recoveryStart = now();
|
|
|
|
TraceEvent("CCWDB", cluster->id).detail("Recruiting", "Master");
|
2018-02-10 08:48:55 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// We must recruit the master in the same data center as the cluster controller.
|
|
|
|
// This should always be possible, because we can recruit the master on the same process as the cluster
|
|
|
|
// controller.
|
|
|
|
std::map<Optional<Standalone<StringRef>>, int> id_used;
|
2018-02-10 08:48:55 +08:00
|
|
|
id_used[cluster->clusterControllerProcessId]++;
|
2021-03-11 02:06:03 +08:00
|
|
|
state WorkerFitnessInfo masterWorker = cluster->getWorkerForRoleInDatacenter(
|
|
|
|
cluster->clusterControllerDcId, ProcessClass::Master, ProcessClass::NeverAssign, db->config, id_used);
|
|
|
|
if ((masterWorker.worker.processClass.machineClassFitness(ProcessClass::Master) >
|
|
|
|
SERVER_KNOBS->EXPECTED_MASTER_FITNESS ||
|
|
|
|
masterWorker.worker.interf.locality.processId() == cluster->clusterControllerProcessId) &&
|
|
|
|
!cluster->goodRecruitmentTime.isReady()) {
|
|
|
|
TraceEvent("CCWDB", cluster->id)
|
|
|
|
.detail("Fitness", masterWorker.worker.processClass.machineClassFitness(ProcessClass::Master));
|
|
|
|
wait(delay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY));
|
2017-05-26 04:48:44 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
RecruitMasterRequest rmq;
|
2020-04-12 10:30:05 +08:00
|
|
|
rmq.lifetime = db->serverInfo->get().masterLifetime;
|
2018-07-01 21:39:04 +08:00
|
|
|
rmq.forceRecovery = db->forceRecovery;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-03-09 00:25:07 +08:00
|
|
|
cluster->masterProcessId = masterWorker.worker.interf.locality.processId();
|
2018-09-01 01:51:55 +08:00
|
|
|
cluster->db.unfinishedRecoveries++;
|
2021-03-11 02:06:03 +08:00
|
|
|
state Future<ErrorOr<MasterInterface>> fNewMaster = masterWorker.worker.interf.master.tryGetReply(rmq);
|
|
|
|
wait(ready(fNewMaster) || db->forceMasterFailure.onTrigger());
|
2019-02-19 06:54:28 +08:00
|
|
|
if (fNewMaster.isReady() && fNewMaster.get().present()) {
|
|
|
|
TraceEvent("CCWDB", cluster->id).detail("Recruited", fNewMaster.get().get().id());
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
// for status tool
|
|
|
|
TraceEvent("RecruitedMasterWorker", cluster->id)
|
2021-03-11 02:06:03 +08:00
|
|
|
.detail("Address", fNewMaster.get().get().address())
|
|
|
|
.trackLatest("RecruitedMasterWorker");
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-02-19 06:54:28 +08:00
|
|
|
iMaster = fNewMaster.get().get();
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
db->masterRegistrationCount = 0;
|
2018-06-14 09:14:14 +08:00
|
|
|
db->recoveryStalled = false;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-04-12 10:30:05 +08:00
|
|
|
auto dbInfo = ServerDBInfo();
|
2017-05-26 04:48:44 +08:00
|
|
|
dbInfo.master = iMaster;
|
2019-05-11 05:01:52 +08:00
|
|
|
dbInfo.id = deterministicRandom()->randomUniqueID();
|
2020-04-06 14:09:36 +08:00
|
|
|
dbInfo.infoGeneration = ++db->dbInfoCount;
|
2020-04-12 10:30:05 +08:00
|
|
|
dbInfo.masterLifetime = db->serverInfo->get().masterLifetime;
|
2017-05-26 04:48:44 +08:00
|
|
|
++dbInfo.masterLifetime;
|
2020-04-12 10:30:05 +08:00
|
|
|
dbInfo.clusterInterface = db->serverInfo->get().clusterInterface;
|
|
|
|
dbInfo.distributor = db->serverInfo->get().distributor;
|
|
|
|
dbInfo.ratekeeper = db->serverInfo->get().ratekeeper;
|
|
|
|
dbInfo.latencyBandConfig = db->serverInfo->get().latencyBandConfig;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent("CCWDB", cluster->id)
|
|
|
|
.detail("Lifetime", dbInfo.masterLifetime.toString())
|
|
|
|
.detail("ChangeID", dbInfo.id);
|
|
|
|
db->serverInfo->set(dbInfo);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
state Future<Void> spinDelay = delay(
|
|
|
|
SERVER_KNOBS
|
|
|
|
->MASTER_SPIN_DELAY); // Don't retry master recovery more than once per second, but don't delay
|
|
|
|
// the "first" recovery after more than a second of normal operation
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
TraceEvent("CCWDB", cluster->id).detail("Watching", iMaster.id());
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// Master failure detection is pretty sensitive, but if we are in the middle of a very long recovery we
|
|
|
|
// really don't want to have to start over
|
2017-05-26 04:48:44 +08:00
|
|
|
loop choose {
|
2021-03-11 02:06:03 +08:00
|
|
|
when(wait(waitFailureClient(
|
|
|
|
iMaster.waitFailure,
|
|
|
|
db->masterRegistrationCount
|
|
|
|
? SERVER_KNOBS->MASTER_FAILURE_REACTION_TIME
|
|
|
|
: (now() - recoveryStart) * SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY,
|
|
|
|
db->masterRegistrationCount ? -SERVER_KNOBS->MASTER_FAILURE_REACTION_TIME /
|
|
|
|
SERVER_KNOBS->SECONDS_BEFORE_NO_FAILURE_DELAY
|
|
|
|
: SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY) ||
|
|
|
|
db->forceMasterFailure.onTrigger())) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
when(wait(db->serverInfo->onChange())) {}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2019-02-19 06:54:28 +08:00
|
|
|
wait(spinDelay);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
TEST(true); // clusterWatchDatabase() master failed
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent(SevWarn, "DetectedFailedMaster", cluster->id).detail("OldMaster", iMaster.id());
|
2017-05-26 04:48:44 +08:00
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
TEST(true); // clusterWatchDatabas() !newMaster.present()
|
|
|
|
wait(delay(SERVER_KNOBS->MASTER_SPIN_DELAY));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
} catch (Error& e) {
|
|
|
|
TraceEvent("CCWDB", cluster->id).error(e, true).detail("Master", iMaster.id());
|
2021-03-11 02:06:03 +08:00
|
|
|
if (e.code() == error_code_actor_cancelled)
|
|
|
|
throw;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
bool ok = e.code() == error_code_no_more_servers;
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent(ok ? SevWarn : SevError, "ClusterWatchDatabaseRetrying", cluster->id).error(e);
|
2017-05-26 04:48:44 +08:00
|
|
|
if (!ok)
|
|
|
|
throw e;
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(delay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> clusterGetServerInfo(ClusterControllerData::DBInfo* db,
|
|
|
|
UID knownServerInfoID,
|
2020-04-12 10:30:05 +08:00
|
|
|
ReplyPromise<ServerDBInfo> reply) {
|
2021-03-11 02:06:03 +08:00
|
|
|
while (db->serverInfo->get().id == knownServerInfoID) {
|
2017-05-26 04:48:44 +08:00
|
|
|
choose {
|
2021-03-11 02:06:03 +08:00
|
|
|
when(wait(yieldedFuture(db->serverInfo->onChange()))) {}
|
|
|
|
when(wait(delayJittered(300))) { break; } // The server might be long gone!
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
reply.send(db->serverInfo->get());
|
2017-05-26 04:48:44 +08:00
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
2019-07-26 08:15:31 +08:00
|
|
|
ACTOR Future<Void> clusterOpenDatabase(ClusterControllerData::DBInfo* db, OpenDatabaseRequest req) {
|
|
|
|
db->clientStatus[req.reply.getEndpoint().getPrimaryAddress()] = std::make_pair(now(), req);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (db->clientStatus.size() > 10000) {
|
2019-07-26 08:15:31 +08:00
|
|
|
TraceEvent(SevWarnAlways, "TooManyClientStatusEntries").suppressFor(1.0);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2020-01-08 11:53:09 +08:00
|
|
|
|
2019-07-26 08:15:31 +08:00
|
|
|
while (db->clientInfo->get().id == req.knownClientInfoID) {
|
2017-05-26 04:48:44 +08:00
|
|
|
choose {
|
2021-03-11 02:06:03 +08:00
|
|
|
when(wait(db->clientInfo->onChange())) {}
|
|
|
|
when(wait(delayJittered(SERVER_KNOBS->COORDINATOR_REGISTER_INTERVAL))) {
|
|
|
|
break;
|
|
|
|
} // The client might be long gone!
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
req.reply.send(db->clientInfo->get());
|
2017-05-26 04:48:44 +08:00
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void checkOutstandingRecruitmentRequests(ClusterControllerData* self) {
|
|
|
|
for (int i = 0; i < self->outstandingRecruitmentRequests.size(); i++) {
|
2017-05-26 04:48:44 +08:00
|
|
|
RecruitFromConfigurationRequest& req = self->outstandingRecruitmentRequests[i];
|
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
RecruitFromConfigurationReply rep = self->findWorkersForConfiguration(req);
|
|
|
|
req.reply.send(rep);
|
|
|
|
swapAndPop(&self->outstandingRecruitmentRequests, i--);
|
2017-05-26 04:48:44 +08:00
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() == error_code_no_more_servers || e.code() == error_code_operation_failed) {
|
|
|
|
TraceEvent(SevWarn, "RecruitTLogMatchingSetNotAvailable", self->id).error(e);
|
|
|
|
} else {
|
|
|
|
TraceEvent(SevError, "RecruitTLogsRequestError", self->id).error(e);
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void checkOutstandingRemoteRecruitmentRequests(ClusterControllerData* self) {
|
|
|
|
for (int i = 0; i < self->outstandingRemoteRecruitmentRequests.size(); i++) {
|
2017-09-12 08:40:46 +08:00
|
|
|
RecruitRemoteFromConfigurationRequest& req = self->outstandingRemoteRecruitmentRequests[i];
|
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
RecruitRemoteFromConfigurationReply rep = self->findRemoteWorkersForConfiguration(req);
|
|
|
|
req.reply.send(rep);
|
|
|
|
swapAndPop(&self->outstandingRemoteRecruitmentRequests, i--);
|
2017-09-12 08:40:46 +08:00
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() == error_code_no_more_servers || e.code() == error_code_operation_failed) {
|
|
|
|
TraceEvent(SevWarn, "RecruitRemoteTLogMatchingSetNotAvailable", self->id).error(e);
|
|
|
|
} else {
|
|
|
|
TraceEvent(SevError, "RecruitRemoteTLogsRequestError", self->id).error(e);
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void checkOutstandingStorageRequests(ClusterControllerData* self) {
|
|
|
|
for (int i = 0; i < self->outstandingStorageRequests.size(); i++) {
|
2017-05-26 04:48:44 +08:00
|
|
|
auto& req = self->outstandingStorageRequests[i];
|
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (req.second < now()) {
|
2017-05-26 04:48:44 +08:00
|
|
|
req.first.reply.sendError(timed_out());
|
2021-03-11 02:06:03 +08:00
|
|
|
swapAndPop(&self->outstandingStorageRequests, i--);
|
2017-05-26 04:48:44 +08:00
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!self->gotProcessClasses && !req.first.criticalRecruitment)
|
2017-05-26 04:48:44 +08:00
|
|
|
throw no_more_servers();
|
|
|
|
|
|
|
|
auto worker = self->getStorageWorker(req.first);
|
|
|
|
RecruitStorageReply rep;
|
2019-03-09 00:25:07 +08:00
|
|
|
rep.worker = worker.interf;
|
|
|
|
rep.processClass = worker.processClass;
|
2021-03-11 02:06:03 +08:00
|
|
|
req.first.reply.send(rep);
|
|
|
|
swapAndPop(&self->outstandingStorageRequests, i--);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() == error_code_no_more_servers) {
|
2019-08-13 01:08:12 +08:00
|
|
|
TraceEvent(SevWarn, "RecruitStorageNotAvailable", self->id)
|
2019-08-20 04:47:48 +08:00
|
|
|
.suppressFor(1.0)
|
2019-08-13 01:08:12 +08:00
|
|
|
.detail("OutstandingReq", i)
|
|
|
|
.detail("IsCriticalRecruitment", req.first.criticalRecruitment)
|
|
|
|
.error(e);
|
2017-05-26 04:48:44 +08:00
|
|
|
} else {
|
|
|
|
TraceEvent(SevError, "RecruitStorageError", self->id).error(e);
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-20 06:58:25 +08:00
|
|
|
void checkBetterDDOrRK(ClusterControllerData* self) {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!self->masterProcessId.present() ||
|
|
|
|
self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
|
2019-03-20 06:58:25 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-11-13 06:22:36 +08:00
|
|
|
std::map<Optional<Standalone<StringRef>>, int> id_used = self->getUsedIds();
|
2021-03-11 02:06:03 +08:00
|
|
|
WorkerDetails newRKWorker = self->getWorkerForRoleInDatacenter(self->clusterControllerDcId,
|
|
|
|
ProcessClass::Ratekeeper,
|
|
|
|
ProcessClass::NeverAssign,
|
|
|
|
self->db.config,
|
|
|
|
id_used,
|
|
|
|
true)
|
|
|
|
.worker;
|
2019-11-13 06:22:36 +08:00
|
|
|
if (self->onMasterIsBetter(newRKWorker, ProcessClass::Ratekeeper)) {
|
|
|
|
newRKWorker = self->id_worker[self->masterProcessId.get()].details;
|
|
|
|
}
|
2019-11-14 04:58:55 +08:00
|
|
|
id_used = self->getUsedIds();
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : id_used) {
|
2019-11-14 04:58:55 +08:00
|
|
|
it.second *= 2;
|
|
|
|
}
|
|
|
|
id_used[newRKWorker.interf.locality.processId()]++;
|
2021-03-11 02:06:03 +08:00
|
|
|
WorkerDetails newDDWorker = self->getWorkerForRoleInDatacenter(self->clusterControllerDcId,
|
|
|
|
ProcessClass::DataDistributor,
|
|
|
|
ProcessClass::NeverAssign,
|
|
|
|
self->db.config,
|
|
|
|
id_used,
|
|
|
|
true)
|
|
|
|
.worker;
|
2019-11-13 06:22:36 +08:00
|
|
|
if (self->onMasterIsBetter(newDDWorker, ProcessClass::DataDistributor)) {
|
|
|
|
newDDWorker = self->id_worker[self->masterProcessId.get()].details;
|
|
|
|
}
|
|
|
|
auto bestFitnessForRK = newRKWorker.processClass.machineClassFitness(ProcessClass::Ratekeeper);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (self->db.config.isExcludedServer(newRKWorker.interf.addresses())) {
|
2019-11-13 06:22:36 +08:00
|
|
|
bestFitnessForRK = std::max(bestFitnessForRK, ProcessClass::ExcludeFit);
|
|
|
|
}
|
|
|
|
auto bestFitnessForDD = newDDWorker.processClass.machineClassFitness(ProcessClass::DataDistributor);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (self->db.config.isExcludedServer(newDDWorker.interf.addresses())) {
|
2019-11-13 06:22:36 +08:00
|
|
|
bestFitnessForDD = std::max(bestFitnessForDD, ProcessClass::ExcludeFit);
|
|
|
|
}
|
2019-11-13 09:52:42 +08:00
|
|
|
//TraceEvent("CheckBetterDDorRKNewRecruits", self->id).detail("MasterProcessId", self->masterProcessId)
|
2021-03-11 02:06:03 +08:00
|
|
|
//.detail("NewRecruitRKProcessId", newRKWorker.interf.locality.processId()).detail("NewRecruiteDDProcessId",
|
|
|
|
// newDDWorker.interf.locality.processId());
|
2019-03-20 06:58:25 +08:00
|
|
|
|
2019-11-13 06:22:36 +08:00
|
|
|
Optional<Standalone<StringRef>> currentRKProcessId;
|
|
|
|
Optional<Standalone<StringRef>> currentDDProcessId;
|
2021-03-11 02:06:03 +08:00
|
|
|
|
2020-04-12 10:30:05 +08:00
|
|
|
auto& db = self->db.serverInfo->get();
|
2019-11-14 04:58:55 +08:00
|
|
|
bool ratekeeperHealthy = false;
|
2019-03-25 02:04:39 +08:00
|
|
|
if (db.ratekeeper.present() && self->id_worker.count(db.ratekeeper.get().locality.processId()) &&
|
2021-03-11 02:06:03 +08:00
|
|
|
(!self->recruitingRatekeeperID.present() || (self->recruitingRatekeeperID.get() == db.ratekeeper.get().id()))) {
|
2019-03-20 06:58:25 +08:00
|
|
|
auto& rkWorker = self->id_worker[db.ratekeeper.get().locality.processId()];
|
2019-11-13 06:22:36 +08:00
|
|
|
currentRKProcessId = rkWorker.details.interf.locality.processId();
|
2019-03-27 23:24:25 +08:00
|
|
|
auto rkFitness = rkWorker.details.processClass.machineClassFitness(ProcessClass::Ratekeeper);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (rkWorker.priorityInfo.isExcluded) {
|
2019-03-24 10:16:04 +08:00
|
|
|
rkFitness = ProcessClass::ExcludeFit;
|
2019-03-24 04:25:36 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (self->isUsedNotMaster(rkWorker.details.interf.locality.processId()) || bestFitnessForRK < rkFitness ||
|
|
|
|
(rkFitness == bestFitnessForRK && rkWorker.details.interf.locality.processId() == self->masterProcessId &&
|
|
|
|
newRKWorker.interf.locality.processId() != self->masterProcessId)) {
|
|
|
|
TraceEvent("CCHaltRK", self->id)
|
|
|
|
.detail("RKID", db.ratekeeper.get().id())
|
|
|
|
.detail("Excluded", rkWorker.priorityInfo.isExcluded)
|
|
|
|
.detail("Fitness", rkFitness)
|
|
|
|
.detail("BestFitness", bestFitnessForRK);
|
2019-03-24 00:20:54 +08:00
|
|
|
self->recruitRatekeeper.set(true);
|
2019-11-14 04:58:55 +08:00
|
|
|
} else {
|
|
|
|
ratekeeperHealthy = true;
|
2019-03-20 06:58:25 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!self->recruitingDistributor && db.distributor.present() &&
|
|
|
|
self->id_worker.count(db.distributor.get().locality.processId())) {
|
2019-03-20 06:58:25 +08:00
|
|
|
auto& ddWorker = self->id_worker[db.distributor.get().locality.processId()];
|
|
|
|
auto ddFitness = ddWorker.details.processClass.machineClassFitness(ProcessClass::DataDistributor);
|
2019-11-13 06:22:36 +08:00
|
|
|
currentDDProcessId = ddWorker.details.interf.locality.processId();
|
2021-03-11 02:06:03 +08:00
|
|
|
if (ddWorker.priorityInfo.isExcluded) {
|
2019-03-24 10:16:04 +08:00
|
|
|
ddFitness = ProcessClass::ExcludeFit;
|
2019-03-24 04:25:36 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (self->isUsedNotMaster(ddWorker.details.interf.locality.processId()) || bestFitnessForDD < ddFitness ||
|
|
|
|
(ddFitness == bestFitnessForDD && ddWorker.details.interf.locality.processId() == self->masterProcessId &&
|
|
|
|
newDDWorker.interf.locality.processId() != self->masterProcessId) ||
|
|
|
|
(ddFitness == bestFitnessForDD &&
|
|
|
|
newRKWorker.interf.locality.processId() != newDDWorker.interf.locality.processId() && ratekeeperHealthy &&
|
|
|
|
currentRKProcessId.present() && currentDDProcessId == currentRKProcessId &&
|
|
|
|
(newRKWorker.interf.locality.processId() != self->masterProcessId &&
|
|
|
|
newDDWorker.interf.locality.processId() != self->masterProcessId))) {
|
|
|
|
TraceEvent("CCHaltDD", self->id)
|
|
|
|
.detail("DDID", db.distributor.get().id())
|
|
|
|
.detail("Excluded", ddWorker.priorityInfo.isExcluded)
|
|
|
|
.detail("Fitness", ddFitness)
|
|
|
|
.detail("BestFitness", bestFitnessForDD)
|
|
|
|
.detail("CurrentRateKeeperProcessId",
|
|
|
|
currentRKProcessId.present() ? currentRKProcessId.get() : LiteralStringRef("None"))
|
|
|
|
.detail("CurrentDDProcessId", currentDDProcessId)
|
|
|
|
.detail("MasterProcessID", self->masterProcessId)
|
|
|
|
.detail("NewRKWorkers", newRKWorker.interf.locality.processId())
|
|
|
|
.detail("NewDDWorker", newDDWorker.interf.locality.processId());
|
|
|
|
ddWorker.haltDistributor = brokenPromiseToNever(
|
|
|
|
db.distributor.get().haltDataDistributor.getReply(HaltDataDistributorRequest(self->id)));
|
2019-03-20 06:58:25 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> doCheckOutstandingRequests(ClusterControllerData* self) {
|
2018-06-29 14:15:32 +08:00
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(delay(SERVER_KNOBS->CHECK_OUTSTANDING_INTERVAL));
|
|
|
|
while (!self->goodRecruitmentTime.isReady()) {
|
2020-02-20 08:48:30 +08:00
|
|
|
wait(self->goodRecruitmentTime);
|
|
|
|
}
|
2018-06-29 14:15:32 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
checkOutstandingRecruitmentRequests(self);
|
|
|
|
checkOutstandingStorageRequests(self);
|
2019-03-20 06:58:25 +08:00
|
|
|
checkBetterDDOrRK(self);
|
2018-06-29 14:15:32 +08:00
|
|
|
|
|
|
|
self->checkRecoveryStalled();
|
|
|
|
if (self->betterMasterExists()) {
|
2019-02-19 06:54:28 +08:00
|
|
|
self->db.forceMasterFailure.trigger();
|
2020-04-12 10:30:05 +08:00
|
|
|
TraceEvent("MasterRegistrationKill", self->id).detail("MasterId", self->db.serverInfo->get().master.id());
|
2018-06-29 14:15:32 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() != error_code_no_more_servers) {
|
2020-02-20 08:48:30 +08:00
|
|
|
TraceEvent(SevError, "CheckOutstandingError").error(e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> doCheckOutstandingRemoteRequests(ClusterControllerData* self) {
|
2020-02-20 08:48:30 +08:00
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(delay(SERVER_KNOBS->CHECK_OUTSTANDING_INTERVAL));
|
|
|
|
while (!self->goodRemoteRecruitmentTime.isReady()) {
|
2020-02-20 08:48:30 +08:00
|
|
|
wait(self->goodRemoteRecruitmentTime);
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
checkOutstandingRemoteRecruitmentRequests(self);
|
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() != error_code_no_more_servers) {
|
2018-06-29 14:15:32 +08:00
|
|
|
TraceEvent(SevError, "CheckOutstandingError").error(e);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void checkOutstandingRequests(ClusterControllerData* self) {
|
|
|
|
if (self->outstandingRemoteRequestChecker.isReady()) {
|
2020-02-20 08:48:30 +08:00
|
|
|
self->outstandingRemoteRequestChecker = doCheckOutstandingRemoteRequests(self);
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (self->outstandingRequestChecker.isReady()) {
|
2020-02-20 08:48:30 +08:00
|
|
|
self->outstandingRequestChecker = doCheckOutstandingRequests(self);
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> rebootAndCheck(ClusterControllerData* cluster, Optional<Standalone<StringRef>> processID) {
|
2019-02-18 10:46:59 +08:00
|
|
|
{
|
|
|
|
auto watcher = cluster->id_worker.find(processID);
|
|
|
|
ASSERT(watcher != cluster->id_worker.end());
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-02-18 10:46:59 +08:00
|
|
|
watcher->second.reboots++;
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(delay(g_network->isSimulated() ? SERVER_KNOBS->SIM_SHUTDOWN_TIMEOUT : SERVER_KNOBS->SHUTDOWN_TIMEOUT));
|
2019-02-18 10:46:59 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-02-18 10:46:59 +08:00
|
|
|
{
|
|
|
|
auto watcher = cluster->id_worker.find(processID);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (watcher != cluster->id_worker.end()) {
|
2019-02-18 10:46:59 +08:00
|
|
|
watcher->second.reboots--;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (watcher->second.reboots < 2)
|
|
|
|
checkOutstandingRequests(cluster);
|
2019-02-18 10:46:59 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> workerAvailabilityWatch(WorkerInterface worker,
|
|
|
|
ProcessClass startingClass,
|
|
|
|
ClusterControllerData* cluster) {
|
2019-05-30 04:43:21 +08:00
|
|
|
state Future<Void> failed =
|
|
|
|
(worker.address() == g_network->getLocalAddress() || startingClass.classType() == ProcessClass::TesterClass)
|
|
|
|
? Never()
|
|
|
|
: waitFailureClient(worker.waitFailure, SERVER_KNOBS->WORKER_FAILURE_TIME);
|
2021-03-11 02:06:03 +08:00
|
|
|
cluster->updateWorkerList.set(worker.locality.processId(),
|
|
|
|
ProcessData(worker.locality, startingClass, worker.stableAddress()));
|
2020-04-18 06:05:01 +08:00
|
|
|
cluster->updateDBInfoEndpoints.insert(worker.updateServerDBInfo.getEndpoint());
|
2020-04-06 14:09:36 +08:00
|
|
|
cluster->updateDBInfo.trigger();
|
2021-03-11 02:06:03 +08:00
|
|
|
// This switching avoids a race where the worker can be added to id_worker map after the workerAvailabilityWatch
|
|
|
|
// fails for the worker.
|
2019-01-11 02:28:32 +08:00
|
|
|
wait(delay(0));
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
loop {
|
|
|
|
choose {
|
2021-03-11 02:06:03 +08:00
|
|
|
when(wait(IFailureMonitor::failureMonitor().onStateEqual(
|
|
|
|
worker.storage.getEndpoint(),
|
|
|
|
FailureStatus(
|
|
|
|
IFailureMonitor::failureMonitor().getState(worker.storage.getEndpoint()).isAvailable())))) {
|
|
|
|
if (IFailureMonitor::failureMonitor().getState(worker.storage.getEndpoint()).isAvailable()) {
|
|
|
|
cluster->ac.add(rebootAndCheck(cluster, worker.locality.processId()));
|
|
|
|
checkOutstandingRequests(cluster);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
when(wait(failed)) { // remove workers that have failed
|
|
|
|
WorkerInfo& failedWorkerInfo = cluster->id_worker[worker.locality.processId()];
|
2019-12-07 05:28:44 +08:00
|
|
|
|
2017-10-05 08:11:12 +08:00
|
|
|
if (!failedWorkerInfo.reply.isSet()) {
|
2021-03-11 02:06:03 +08:00
|
|
|
failedWorkerInfo.reply.send(
|
|
|
|
RegisterWorkerReply(failedWorkerInfo.details.processClass, failedWorkerInfo.priorityInfo));
|
2017-10-05 08:11:12 +08:00
|
|
|
}
|
2019-03-21 07:03:36 +08:00
|
|
|
if (worker.locality.processId() == cluster->masterProcessId) {
|
|
|
|
cluster->masterProcessId = Optional<Key>();
|
|
|
|
}
|
2020-04-06 14:09:36 +08:00
|
|
|
cluster->removedDBInfoEndpoints.insert(worker.updateServerDBInfo.getEndpoint());
|
2021-03-11 02:06:03 +08:00
|
|
|
cluster->id_worker.erase(worker.locality.processId());
|
|
|
|
cluster->updateWorkerList.set(worker.locality.processId(), Optional<ProcessData>());
|
2017-05-26 04:48:44 +08:00
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
struct FailureStatusInfo {
|
|
|
|
FailureStatus status;
|
|
|
|
double lastRequestTime;
|
|
|
|
double penultimateRequestTime;
|
|
|
|
|
|
|
|
FailureStatusInfo() : lastRequestTime(0), penultimateRequestTime(0) {}
|
|
|
|
|
|
|
|
void insertRequest(double now) {
|
|
|
|
penultimateRequestTime = lastRequestTime;
|
|
|
|
lastRequestTime = now;
|
|
|
|
}
|
|
|
|
|
|
|
|
double latency(double now) const {
|
2021-03-11 02:06:03 +08:00
|
|
|
return std::max(now - lastRequestTime, lastRequestTime - penultimateRequestTime);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<vector<TLogInterface>> requireAll(vector<Future<Optional<vector<TLogInterface>>>> in) {
|
2017-05-26 04:48:44 +08:00
|
|
|
state vector<TLogInterface> out;
|
|
|
|
state int i;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (i = 0; i < in.size(); i++) {
|
2017-05-26 04:48:44 +08:00
|
|
|
Optional<vector<TLogInterface>> x = wait(in[i]);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!x.present())
|
|
|
|
throw recruitment_failed();
|
2017-05-26 04:48:44 +08:00
|
|
|
out.insert(out.end(), x.get().begin(), x.get().end());
|
|
|
|
}
|
|
|
|
return out;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void clusterRecruitStorage(ClusterControllerData* self, RecruitStorageRequest req) {
|
2017-05-26 04:48:44 +08:00
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!self->gotProcessClasses && !req.criticalRecruitment)
|
2017-05-26 04:48:44 +08:00
|
|
|
throw no_more_servers();
|
|
|
|
auto worker = self->getStorageWorker(req);
|
|
|
|
RecruitStorageReply rep;
|
2019-03-09 00:25:07 +08:00
|
|
|
rep.worker = worker.interf;
|
|
|
|
rep.processClass = worker.processClass;
|
2021-03-11 02:06:03 +08:00
|
|
|
req.reply.send(rep);
|
|
|
|
} catch (Error& e) {
|
2017-05-26 04:48:44 +08:00
|
|
|
if (e.code() == error_code_no_more_servers) {
|
2021-03-11 02:06:03 +08:00
|
|
|
self->outstandingStorageRequests.push_back(std::make_pair(req, now() + SERVER_KNOBS->RECRUITMENT_TIMEOUT));
|
2019-08-13 01:08:12 +08:00
|
|
|
TraceEvent(SevWarn, "RecruitStorageNotAvailable", self->id)
|
|
|
|
.detail("IsCriticalRecruitment", req.criticalRecruitment)
|
|
|
|
.error(e);
|
2017-05-26 04:48:44 +08:00
|
|
|
} else {
|
|
|
|
TraceEvent(SevError, "RecruitStorageError", self->id).error(e);
|
2021-03-11 02:06:03 +08:00
|
|
|
throw; // Any other error will bring down the cluster controller
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> clusterRecruitFromConfiguration(ClusterControllerData* self, RecruitFromConfigurationRequest req) {
|
2017-05-26 04:48:44 +08:00
|
|
|
// At the moment this doesn't really need to be an actor (it always completes immediately)
|
2021-03-11 02:06:03 +08:00
|
|
|
TEST(true); // ClusterController RecruitTLogsRequest
|
2017-05-26 04:48:44 +08:00
|
|
|
loop {
|
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
auto rep = self->findWorkersForConfiguration(req);
|
|
|
|
req.reply.send(rep);
|
2017-05-26 04:48:44 +08:00
|
|
|
return Void();
|
|
|
|
} catch (Error& e) {
|
2020-02-20 08:48:30 +08:00
|
|
|
if (e.code() == error_code_no_more_servers && self->goodRecruitmentTime.isReady()) {
|
2021-03-11 02:06:03 +08:00
|
|
|
self->outstandingRecruitmentRequests.push_back(req);
|
2017-05-26 04:48:44 +08:00
|
|
|
TraceEvent(SevWarn, "RecruitFromConfigurationNotAvailable", self->id).error(e);
|
|
|
|
return Void();
|
2021-03-11 02:06:03 +08:00
|
|
|
} else if (e.code() == error_code_operation_failed || e.code() == error_code_no_more_servers) {
|
|
|
|
// recruitment not good enough, try again
|
|
|
|
} else {
|
2017-05-26 04:48:44 +08:00
|
|
|
TraceEvent(SevError, "RecruitFromConfigurationError", self->id).error(e);
|
2021-03-11 02:06:03 +08:00
|
|
|
throw; // goodbye, cluster controller
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(lowPriorityDelay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> clusterRecruitRemoteFromConfiguration(ClusterControllerData* self,
|
|
|
|
RecruitRemoteFromConfigurationRequest req) {
|
2017-09-12 08:40:46 +08:00
|
|
|
// At the moment this doesn't really need to be an actor (it always completes immediately)
|
2021-03-11 02:06:03 +08:00
|
|
|
TEST(true); // ClusterController RecruitTLogsRequest Remote
|
2017-09-12 08:40:46 +08:00
|
|
|
loop {
|
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
RecruitRemoteFromConfigurationReply rep = self->findRemoteWorkersForConfiguration(req);
|
|
|
|
req.reply.send(rep);
|
2017-09-12 08:40:46 +08:00
|
|
|
return Void();
|
|
|
|
} catch (Error& e) {
|
2020-02-20 08:48:30 +08:00
|
|
|
if (e.code() == error_code_no_more_servers && self->goodRemoteRecruitmentTime.isReady()) {
|
2021-03-11 02:06:03 +08:00
|
|
|
self->outstandingRemoteRecruitmentRequests.push_back(req);
|
2017-09-12 08:40:46 +08:00
|
|
|
TraceEvent(SevWarn, "RecruitRemoteFromConfigurationNotAvailable", self->id).error(e);
|
|
|
|
return Void();
|
2021-03-11 02:06:03 +08:00
|
|
|
} else if (e.code() == error_code_operation_failed || e.code() == error_code_no_more_servers) {
|
|
|
|
// recruitment not good enough, try again
|
|
|
|
} else {
|
2017-09-12 08:40:46 +08:00
|
|
|
TraceEvent(SevError, "RecruitRemoteFromConfigurationError", self->id).error(e);
|
2021-03-11 02:06:03 +08:00
|
|
|
throw; // goodbye, cluster controller
|
2017-09-12 08:40:46 +08:00
|
|
|
}
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(lowPriorityDelay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY));
|
2017-09-12 08:40:46 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void clusterRegisterMaster(ClusterControllerData* self, RegisterMasterRequest const& req) {
|
|
|
|
req.reply.send(Void());
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-03-18 05:45:07 +08:00
|
|
|
TraceEvent("MasterRegistrationReceived", self->id)
|
|
|
|
.detail("MasterId", req.id)
|
|
|
|
.detail("Master", req.mi.toString())
|
|
|
|
.detail("Tlogs", describe(req.logSystemConfig.tLogs))
|
|
|
|
.detail("Resolvers", req.resolvers.size())
|
|
|
|
.detail("RecoveryState", (int)req.recoveryState)
|
|
|
|
.detail("RegistrationCount", req.registrationCount)
|
2020-09-11 08:44:15 +08:00
|
|
|
.detail("CommitProxies", req.commitProxies.size())
|
2020-08-12 09:54:54 +08:00
|
|
|
.detail("GrvProxies", req.grvProxies.size())
|
2020-03-18 05:45:07 +08:00
|
|
|
.detail("RecoveryCount", req.recoveryCount)
|
|
|
|
.detail("Stalled", req.recoveryStalled)
|
|
|
|
.detail("OldestBackupEpoch", req.logSystemConfig.oldestBackupEpoch);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// make sure the request comes from an active database
|
2017-05-26 04:48:44 +08:00
|
|
|
auto db = &self->db;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (db->serverInfo->get().master.id() != req.id || req.registrationCount <= db->masterRegistrationCount) {
|
|
|
|
TraceEvent("MasterRegistrationNotFound", self->id)
|
|
|
|
.detail("MasterId", req.id)
|
|
|
|
.detail("ExistingId", db->serverInfo->get().master.id())
|
|
|
|
.detail("RegCount", req.registrationCount)
|
|
|
|
.detail("ExistingRegCount", db->masterRegistrationCount);
|
2017-05-26 04:48:44 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (req.recoveryState == RecoveryState::FULLY_RECOVERED) {
|
2018-09-01 01:51:55 +08:00
|
|
|
self->db.unfinishedRecoveries = 0;
|
|
|
|
self->db.logGenerations = 0;
|
2021-03-11 02:06:03 +08:00
|
|
|
ASSERT(!req.logSystemConfig.oldTLogs.size());
|
2018-09-01 01:51:55 +08:00
|
|
|
} else {
|
|
|
|
self->db.logGenerations = std::max<int>(self->db.logGenerations, req.logSystemConfig.oldTLogs.size());
|
|
|
|
}
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
db->masterRegistrationCount = req.registrationCount;
|
2018-06-14 09:14:14 +08:00
|
|
|
db->recoveryStalled = req.recoveryStalled;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (req.configuration.present()) {
|
2017-11-15 05:57:37 +08:00
|
|
|
db->config = req.configuration.get();
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (req.recoveryState >= RecoveryState::ACCEPTING_COMMITS) {
|
2017-11-16 09:15:24 +08:00
|
|
|
self->gotFullyRecoveredConfig = true;
|
|
|
|
db->fullyRecoveredConfig = req.configuration.get();
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : self->id_worker) {
|
|
|
|
bool isExcludedFromConfig =
|
|
|
|
db->fullyRecoveredConfig.isExcludedServer(it.second.details.interf.addresses());
|
|
|
|
if (it.second.priorityInfo.isExcluded != isExcludedFromConfig) {
|
2018-02-10 08:48:55 +08:00
|
|
|
it.second.priorityInfo.isExcluded = isExcludedFromConfig;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!it.second.reply.isSet()) {
|
|
|
|
it.second.reply.send(
|
|
|
|
RegisterWorkerReply(it.second.details.processClass, it.second.priorityInfo));
|
2018-02-10 08:48:55 +08:00
|
|
|
}
|
2017-11-16 09:15:24 +08:00
|
|
|
}
|
2017-11-15 05:57:37 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
bool isChanged = false;
|
2020-04-12 10:30:05 +08:00
|
|
|
auto dbInfo = self->db.serverInfo->get();
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
if (dbInfo.recoveryState != req.recoveryState) {
|
|
|
|
dbInfo.recoveryState = req.recoveryState;
|
|
|
|
isChanged = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dbInfo.priorCommittedLogServers != req.priorCommittedLogServers) {
|
|
|
|
dbInfo.priorCommittedLogServers = req.priorCommittedLogServers;
|
|
|
|
isChanged = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Construct the client information
|
2020-09-11 08:44:15 +08:00
|
|
|
if (db->clientInfo->get().commitProxies != req.commitProxies ||
|
|
|
|
db->clientInfo->get().grvProxies != req.grvProxies) {
|
2017-05-26 04:48:44 +08:00
|
|
|
isChanged = true;
|
|
|
|
ClientDBInfo clientInfo;
|
2019-05-11 05:01:52 +08:00
|
|
|
clientInfo.id = deterministicRandom()->randomUniqueID();
|
2020-09-11 08:44:15 +08:00
|
|
|
clientInfo.commitProxies = req.commitProxies;
|
2020-07-15 15:37:41 +08:00
|
|
|
clientInfo.grvProxies = req.grvProxies;
|
2017-05-26 04:48:44 +08:00
|
|
|
clientInfo.clientTxnInfoSampleRate = db->clientInfo->get().clientTxnInfoSampleRate;
|
|
|
|
clientInfo.clientTxnInfoSizeLimit = db->clientInfo->get().clientTxnInfoSizeLimit;
|
2021-03-11 02:06:03 +08:00
|
|
|
db->clientInfo->set(clientInfo);
|
2017-05-26 04:48:44 +08:00
|
|
|
dbInfo.client = db->clientInfo->get();
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!dbInfo.logSystemConfig.isEqual(req.logSystemConfig)) {
|
2017-05-26 04:48:44 +08:00
|
|
|
isChanged = true;
|
|
|
|
dbInfo.logSystemConfig = req.logSystemConfig;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (dbInfo.resolvers != req.resolvers) {
|
2017-05-26 04:48:44 +08:00
|
|
|
isChanged = true;
|
|
|
|
dbInfo.resolvers = req.resolvers;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (dbInfo.recoveryCount != req.recoveryCount) {
|
2017-05-26 04:48:44 +08:00
|
|
|
isChanged = true;
|
|
|
|
dbInfo.recoveryCount = req.recoveryCount;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (isChanged) {
|
2019-05-11 05:01:52 +08:00
|
|
|
dbInfo.id = deterministicRandom()->randomUniqueID();
|
2020-04-06 14:09:36 +08:00
|
|
|
dbInfo.infoGeneration = ++self->db.dbInfoCount;
|
2021-03-11 02:06:03 +08:00
|
|
|
self->db.serverInfo->set(dbInfo);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2018-06-28 14:02:08 +08:00
|
|
|
checkOutstandingRequests(self);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void registerWorker(RegisterWorkerRequest req, ClusterControllerData* self) {
|
2019-03-24 11:55:03 +08:00
|
|
|
const WorkerInterface& w = req.wi;
|
2017-10-13 08:11:58 +08:00
|
|
|
ProcessClass newProcessClass = req.processClass;
|
2021-03-11 02:06:03 +08:00
|
|
|
auto info = self->id_worker.find(w.locality.processId());
|
2018-02-10 08:48:55 +08:00
|
|
|
ClusterControllerPriorityInfo newPriorityInfo = req.priorityInfo;
|
2019-03-22 08:56:04 +08:00
|
|
|
newPriorityInfo.processClassFitness = newProcessClass.machineClassFitness(ProcessClass::ClusterController);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto it : req.incompatiblePeers) {
|
2020-04-07 11:58:43 +08:00
|
|
|
self->db.incompatibleConnections[it] = now() + SERVER_KNOBS->INCOMPATIBLE_PEERS_LOGGING_INTERVAL;
|
2020-04-06 14:09:36 +08:00
|
|
|
}
|
2020-04-11 04:45:16 +08:00
|
|
|
self->removedDBInfoEndpoints.erase(w.updateServerDBInfo.getEndpoint());
|
2020-04-06 14:09:36 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (info == self->id_worker.end()) {
|
|
|
|
TraceEvent("ClusterControllerActualWorkers", self->id)
|
|
|
|
.detail("WorkerId", w.id())
|
|
|
|
.detail("ProcessId", w.locality.processId())
|
|
|
|
.detail("ZoneId", w.locality.zoneId())
|
|
|
|
.detail("DataHall", w.locality.dataHallId())
|
|
|
|
.detail("PClass", req.processClass.toString())
|
|
|
|
.detail("Workers", self->id_worker.size());
|
2020-02-20 08:48:30 +08:00
|
|
|
self->goodRecruitmentTime = lowPriorityDelay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY);
|
2020-02-21 05:46:22 +08:00
|
|
|
self->goodRemoteRecruitmentTime = lowPriorityDelay(SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY);
|
2018-11-05 15:07:56 +08:00
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent("ClusterControllerWorkerAlreadyRegistered", self->id)
|
|
|
|
.suppressFor(1.0)
|
|
|
|
.detail("WorkerId", w.id())
|
|
|
|
.detail("ProcessId", w.locality.processId())
|
|
|
|
.detail("ZoneId", w.locality.zoneId())
|
|
|
|
.detail("DataHall", w.locality.dataHallId())
|
|
|
|
.detail("PClass", req.processClass.toString())
|
|
|
|
.detail("Workers", self->id_worker.size());
|
|
|
|
}
|
|
|
|
if (w.address() == g_network->getLocalAddress()) {
|
|
|
|
if (self->changingDcIds.get().first) {
|
|
|
|
if (self->changingDcIds.get().second.present()) {
|
|
|
|
newPriorityInfo.dcFitness = ClusterControllerPriorityInfo::calculateDCFitness(
|
|
|
|
w.locality.dcId(), self->changingDcIds.get().second.get());
|
|
|
|
}
|
|
|
|
} else if (self->changedDcIds.get().second.present()) {
|
|
|
|
newPriorityInfo.dcFitness = ClusterControllerPriorityInfo::calculateDCFitness(
|
|
|
|
w.locality.dcId(), self->changedDcIds.get().second.get());
|
2018-06-29 15:10:29 +08:00
|
|
|
}
|
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!self->changingDcIds.get().first) {
|
|
|
|
if (self->changingDcIds.get().second.present()) {
|
|
|
|
newPriorityInfo.dcFitness = ClusterControllerPriorityInfo::calculateDCFitness(
|
|
|
|
w.locality.dcId(), self->changingDcIds.get().second.get());
|
2018-06-29 15:10:29 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
} else if (self->changedDcIds.get().second.present()) {
|
|
|
|
newPriorityInfo.dcFitness = ClusterControllerPriorityInfo::calculateDCFitness(
|
|
|
|
w.locality.dcId(), self->changedDcIds.get().second.get());
|
2018-02-10 08:48:55 +08:00
|
|
|
}
|
2017-10-26 02:35:29 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2017-11-15 05:57:37 +08:00
|
|
|
// Check process class and exclusive property
|
2021-03-11 02:06:03 +08:00
|
|
|
if (info == self->id_worker.end() || info->second.details.interf.id() != w.id() ||
|
|
|
|
req.generation >= info->second.gen) {
|
|
|
|
if (self->gotProcessClasses) {
|
2017-11-15 05:57:37 +08:00
|
|
|
auto classIter = self->id_class.find(w.locality.processId());
|
2019-02-01 10:20:14 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (classIter != self->id_class.end() && (classIter->second.classSource() == ProcessClass::DBSource ||
|
|
|
|
req.initialClass.classType() == ProcessClass::UnsetClass)) {
|
2017-11-15 05:57:37 +08:00
|
|
|
newProcessClass = classIter->second;
|
|
|
|
} else {
|
|
|
|
newProcessClass = req.initialClass;
|
|
|
|
}
|
2018-02-10 08:48:55 +08:00
|
|
|
newPriorityInfo.processClassFitness = newProcessClass.machineClassFitness(ProcessClass::ClusterController);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (self->gotFullyRecoveredConfig) {
|
2020-04-11 04:45:16 +08:00
|
|
|
newPriorityInfo.isExcluded = self->db.fullyRecoveredConfig.isExcludedServer(w.addresses());
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2017-10-13 08:11:58 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (info == self->id_worker.end()) {
|
|
|
|
self->id_worker[w.locality.processId()] = WorkerInfo(workerAvailabilityWatch(w, newProcessClass, self),
|
|
|
|
req.reply,
|
|
|
|
req.generation,
|
|
|
|
w,
|
|
|
|
req.initialClass,
|
|
|
|
newProcessClass,
|
|
|
|
newPriorityInfo,
|
|
|
|
req.degraded,
|
|
|
|
req.issues);
|
|
|
|
if (!self->masterProcessId.present() &&
|
|
|
|
w.locality.processId() == self->db.serverInfo->get().master.locality.processId()) {
|
2019-03-24 11:55:03 +08:00
|
|
|
self->masterProcessId = w.locality.processId();
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
checkOutstandingRequests(self);
|
|
|
|
} else if (info->second.details.interf.id() != w.id() || req.generation >= info->second.gen) {
|
2017-09-26 01:36:03 +08:00
|
|
|
if (!info->second.reply.isSet()) {
|
2021-03-11 02:06:03 +08:00
|
|
|
info->second.reply.send(Never());
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
info->second.reply = req.reply;
|
2019-03-09 00:25:07 +08:00
|
|
|
info->second.details.processClass = newProcessClass;
|
2018-02-10 08:48:55 +08:00
|
|
|
info->second.priorityInfo = newPriorityInfo;
|
2017-11-15 05:57:37 +08:00
|
|
|
info->second.initialClass = req.initialClass;
|
2019-03-09 03:40:00 +08:00
|
|
|
info->second.details.degraded = req.degraded;
|
2017-05-26 04:48:44 +08:00
|
|
|
info->second.gen = req.generation;
|
2020-04-06 14:09:36 +08:00
|
|
|
info->second.issues = req.issues;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (info->second.details.interf.id() != w.id()) {
|
2020-04-06 14:09:36 +08:00
|
|
|
self->removedDBInfoEndpoints.insert(info->second.details.interf.updateServerDBInfo.getEndpoint());
|
2019-03-09 00:25:07 +08:00
|
|
|
info->second.details.interf = w;
|
2021-03-11 02:06:03 +08:00
|
|
|
info->second.watcher = workerAvailabilityWatch(w, newProcessClass, self);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
checkOutstandingRequests(self);
|
2019-03-23 08:08:54 +08:00
|
|
|
} else {
|
|
|
|
TEST(true); // Received an old worker registration request.
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2020-04-12 10:30:05 +08:00
|
|
|
if (req.distributorInterf.present() && !self->db.serverInfo->get().distributor.present() &&
|
2021-03-11 02:06:03 +08:00
|
|
|
self->clusterControllerDcId == req.distributorInterf.get().locality.dcId() && !self->recruitingDistributor) {
|
2019-01-29 03:29:39 +08:00
|
|
|
const DataDistributorInterface& di = req.distributorInterf.get();
|
2019-07-05 23:12:25 +08:00
|
|
|
TraceEvent("CCRegisterDataDistributor", self->id).detail("DDID", di.id());
|
2019-02-15 08:24:46 +08:00
|
|
|
self->db.setDistributor(di);
|
2019-01-29 03:29:39 +08:00
|
|
|
}
|
2019-03-20 02:29:19 +08:00
|
|
|
if (req.ratekeeperInterf.present()) {
|
2021-03-11 02:06:03 +08:00
|
|
|
if ((self->recruitingRatekeeperID.present() &&
|
|
|
|
self->recruitingRatekeeperID.get() != req.ratekeeperInterf.get().id()) ||
|
|
|
|
self->clusterControllerDcId != w.locality.dcId()) {
|
2019-07-30 01:37:42 +08:00
|
|
|
TraceEvent("CCHaltRegisteringRatekeeper", self->id)
|
|
|
|
.detail("RKID", req.ratekeeperInterf.get().id())
|
|
|
|
.detail("DcID", printable(self->clusterControllerDcId))
|
|
|
|
.detail("ReqDcID", printable(w.locality.dcId()))
|
|
|
|
.detail("RecruitingRKID",
|
|
|
|
self->recruitingRatekeeperID.present() ? self->recruitingRatekeeperID.get() : UID());
|
2019-04-25 06:12:37 +08:00
|
|
|
self->id_worker[w.locality.processId()].haltRatekeeper = brokenPromiseToNever(
|
|
|
|
req.ratekeeperInterf.get().haltRatekeeper.getReply(HaltRatekeeperRequest(self->id)));
|
2019-07-30 01:37:42 +08:00
|
|
|
} else if (!self->recruitingRatekeeperID.present()) {
|
2019-03-16 08:06:15 +08:00
|
|
|
const RatekeeperInterface& rki = req.ratekeeperInterf.get();
|
2020-04-12 10:30:05 +08:00
|
|
|
const auto& ratekeeper = self->db.serverInfo->get().ratekeeper;
|
2019-07-05 23:12:25 +08:00
|
|
|
TraceEvent("CCRegisterRatekeeper", self->id).detail("RKID", rki.id());
|
2021-03-11 02:06:03 +08:00
|
|
|
if (ratekeeper.present() && ratekeeper.get().id() != rki.id() &&
|
|
|
|
self->id_worker.count(ratekeeper.get().locality.processId())) {
|
|
|
|
TraceEvent("CCHaltPreviousRatekeeper", self->id)
|
|
|
|
.detail("RKID", ratekeeper.get().id())
|
|
|
|
.detail("DcID", printable(self->clusterControllerDcId))
|
|
|
|
.detail("ReqDcID", printable(w.locality.dcId()))
|
|
|
|
.detail("RecruitingRKID",
|
|
|
|
self->recruitingRatekeeperID.present() ? self->recruitingRatekeeperID.get() : UID());
|
|
|
|
self->id_worker[ratekeeper.get().locality.processId()].haltRatekeeper =
|
|
|
|
brokenPromiseToNever(ratekeeper.get().haltRatekeeper.getReply(HaltRatekeeperRequest(self->id)));
|
|
|
|
}
|
|
|
|
if (!ratekeeper.present() || ratekeeper.get().id() != rki.id()) {
|
2019-03-23 08:56:16 +08:00
|
|
|
self->db.setRatekeeper(rki);
|
2019-03-21 04:54:15 +08:00
|
|
|
}
|
2019-03-16 08:06:15 +08:00
|
|
|
}
|
2019-01-29 03:29:39 +08:00
|
|
|
}
|
2019-11-13 05:01:29 +08:00
|
|
|
|
|
|
|
// Notify the worker to register again with new process class/exclusive property
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!req.reply.isSet() && newPriorityInfo != req.priorityInfo) {
|
|
|
|
req.reply.send(RegisterWorkerReply(newProcessClass, newPriorityInfo));
|
2019-11-13 05:01:29 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2017-09-28 07:31:38 +08:00
|
|
|
#define TIME_KEEPER_VERSION LiteralStringRef("1")
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> timeKeeperSetVersion(ClusterControllerData* self) {
|
2020-11-07 15:50:55 +08:00
|
|
|
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(self->cx);
|
2017-10-19 05:31:31 +08:00
|
|
|
loop {
|
|
|
|
try {
|
|
|
|
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
|
|
|
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
|
2018-01-09 10:21:00 +08:00
|
|
|
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
2017-10-19 05:31:31 +08:00
|
|
|
tr->set(timeKeeperVersionKey, TIME_KEEPER_VERSION);
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(tr->commit());
|
2017-10-19 05:31:31 +08:00
|
|
|
break;
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(tr->onError(e));
|
2017-09-28 07:31:38 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
2017-09-26 03:40:24 +08:00
|
|
|
// This actor periodically gets read version and writes it to cluster with current timestamp as key. To avoid running
|
|
|
|
// out of space, it limits the max number of entries and clears old entries on each update. This mapping is used from
|
|
|
|
// backup and restore to get the version information for a timestamp.
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> timeKeeper(ClusterControllerData* self) {
|
2017-09-26 03:40:24 +08:00
|
|
|
state KeyBackedMap<int64_t, Version> versionMap(timeKeeperPrefixRange.begin);
|
|
|
|
|
2018-11-03 03:56:29 +08:00
|
|
|
TraceEvent("TimeKeeperStarted");
|
2017-09-28 07:31:38 +08:00
|
|
|
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(timeKeeperSetVersion(self));
|
2017-09-28 07:31:38 +08:00
|
|
|
|
2017-09-26 03:40:24 +08:00
|
|
|
loop {
|
2020-11-07 15:50:55 +08:00
|
|
|
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(self->cx);
|
2017-10-19 05:31:31 +08:00
|
|
|
loop {
|
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!g_network->isSimulated()) {
|
2018-11-05 12:26:23 +08:00
|
|
|
// This is done to provide an arbitrary logged transaction every ~10s.
|
|
|
|
// FIXME: replace or augment this with logging on the proxy which tracks
|
|
|
|
// how long it is taking to hear responses from each other component.
|
|
|
|
|
2019-05-11 05:01:52 +08:00
|
|
|
UID debugID = deterministicRandom()->randomUniqueID();
|
2018-11-03 03:57:03 +08:00
|
|
|
TraceEvent("TimeKeeperCommit", debugID);
|
2018-11-03 03:56:29 +08:00
|
|
|
tr->debugTransaction(debugID);
|
|
|
|
}
|
2017-10-19 05:31:31 +08:00
|
|
|
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
|
|
|
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
|
2018-01-09 10:21:00 +08:00
|
|
|
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
2017-09-26 03:40:24 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Optional<Value> disableValue = wait(tr->get(timeKeeperDisableKey));
|
|
|
|
if (disableValue.present()) {
|
2017-10-19 05:31:31 +08:00
|
|
|
break;
|
|
|
|
}
|
2017-09-26 03:40:24 +08:00
|
|
|
|
2017-10-19 05:31:31 +08:00
|
|
|
Version v = tr->getReadVersion().get();
|
|
|
|
int64_t currentTime = (int64_t)now();
|
|
|
|
versionMap.set(tr, currentTime, v);
|
2017-09-29 04:13:24 +08:00
|
|
|
|
2017-10-19 05:31:31 +08:00
|
|
|
int64_t ttl = currentTime - SERVER_KNOBS->TIME_KEEPER_DELAY * SERVER_KNOBS->TIME_KEEPER_MAX_ENTRIES;
|
|
|
|
if (ttl > 0) {
|
|
|
|
versionMap.erase(tr, 0, ttl);
|
2017-09-29 04:13:24 +08:00
|
|
|
}
|
2017-10-19 05:31:31 +08:00
|
|
|
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(tr->commit());
|
2017-10-19 05:31:31 +08:00
|
|
|
break;
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(tr->onError(e));
|
2017-09-26 03:40:24 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(delay(SERVER_KNOBS->TIME_KEEPER_DELAY));
|
2017-09-26 03:40:24 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> statusServer(FutureStream<StatusRequest> requests,
|
|
|
|
ClusterControllerData* self,
|
|
|
|
ServerCoordinators coordinators) {
|
2017-05-26 04:48:44 +08:00
|
|
|
// Seconds since the END of the last GetStatus executed
|
|
|
|
state double last_request_time = 0.0;
|
|
|
|
|
|
|
|
// Place to accumulate a batch of requests to respond to
|
|
|
|
state std::vector<StatusRequest> requests_batch;
|
|
|
|
|
|
|
|
loop {
|
|
|
|
try {
|
|
|
|
// Wait til first request is ready
|
|
|
|
StatusRequest req = waitNext(requests);
|
2019-10-04 06:29:11 +08:00
|
|
|
++self->statusRequests;
|
2017-05-26 04:48:44 +08:00
|
|
|
requests_batch.push_back(req);
|
|
|
|
|
|
|
|
// Earliest time at which we may begin a new request
|
|
|
|
double next_allowed_request_time = last_request_time + SERVER_KNOBS->STATUS_MIN_TIME_BETWEEN_REQUESTS;
|
|
|
|
|
|
|
|
// Wait if needed to satisfy min_time knob, also allows more requets to queue up.
|
|
|
|
double minwait = std::max(next_allowed_request_time - now(), 0.0);
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(delay(minwait));
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
// Get all requests that are ready right *now*, before GetStatus() begins.
|
|
|
|
// All of these requests will be responded to with the next GetStatus() result.
|
2019-01-29 07:37:30 +08:00
|
|
|
// If requests are batched, do not respond to more than MAX_STATUS_REQUESTS_PER_SECOND
|
|
|
|
// requests per second
|
|
|
|
while (requests.isReady()) {
|
|
|
|
auto req = requests.pop();
|
|
|
|
if (SERVER_KNOBS->STATUS_MIN_TIME_BETWEEN_REQUESTS > 0.0 &&
|
2021-03-11 02:06:03 +08:00
|
|
|
requests_batch.size() + 1 >
|
|
|
|
SERVER_KNOBS->STATUS_MIN_TIME_BETWEEN_REQUESTS * SERVER_KNOBS->MAX_STATUS_REQUESTS_PER_SECOND) {
|
|
|
|
TraceEvent(SevWarnAlways, "TooManyStatusRequests")
|
|
|
|
.suppressFor(1.0)
|
|
|
|
.detail("BatchSize", requests_batch.size());
|
2019-01-29 07:37:30 +08:00
|
|
|
req.reply.sendError(server_overloaded());
|
|
|
|
} else {
|
|
|
|
requests_batch.push_back(req);
|
|
|
|
}
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
// Get status but trap errors to send back to client.
|
2019-03-09 00:25:07 +08:00
|
|
|
vector<WorkerDetails> workers;
|
2020-04-18 06:05:01 +08:00
|
|
|
std::vector<ProcessIssues> workerIssues;
|
2020-04-06 14:09:36 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : self->id_worker) {
|
2019-03-09 00:25:07 +08:00
|
|
|
workers.push_back(it.second.details);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (it.second.issues.size()) {
|
2020-04-18 06:05:01 +08:00
|
|
|
workerIssues.push_back(ProcessIssues(it.second.details.interf.address(), it.second.issues));
|
2020-04-06 14:09:36 +08:00
|
|
|
}
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
std::vector<NetworkAddress> incompatibleConnections;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto it = self->db.incompatibleConnections.begin(); it != self->db.incompatibleConnections.end();) {
|
|
|
|
if (it->second < now()) {
|
2017-05-26 04:48:44 +08:00
|
|
|
it = self->db.incompatibleConnections.erase(it);
|
|
|
|
} else {
|
|
|
|
incompatibleConnections.push_back(it->first);
|
|
|
|
it++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
state ErrorOr<StatusReply> result = wait(errorOr(clusterGetStatus(self->db.serverInfo,
|
|
|
|
self->cx,
|
|
|
|
workers,
|
|
|
|
workerIssues,
|
|
|
|
&self->db.clientStatus,
|
|
|
|
coordinators,
|
|
|
|
incompatibleConnections,
|
|
|
|
self->datacenterVersionDifference)));
|
2019-02-27 08:20:05 +08:00
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
if (result.isError() && result.getError().code() == error_code_actor_cancelled)
|
|
|
|
throw result.getError();
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// Update last_request_time now because GetStatus is finished and the delay is to be measured between
|
|
|
|
// requests
|
2017-05-26 04:48:44 +08:00
|
|
|
last_request_time = now();
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
while (!requests_batch.empty()) {
|
2017-05-26 04:48:44 +08:00
|
|
|
if (result.isError())
|
|
|
|
requests_batch.back().reply.sendError(result.getError());
|
|
|
|
else
|
|
|
|
requests_batch.back().reply.send(result.get());
|
|
|
|
requests_batch.pop_back();
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(yield());
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
2017-05-26 04:48:44 +08:00
|
|
|
TraceEvent(SevError, "StatusServerError").error(e);
|
|
|
|
throw e;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> monitorProcessClasses(ClusterControllerData* self) {
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
state ReadYourWritesTransaction trVer(self->db.db);
|
2017-05-26 04:48:44 +08:00
|
|
|
loop {
|
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
trVer.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
|
|
|
trVer.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
Optional<Value> val = wait(trVer.get(processClassVersionKey));
|
|
|
|
|
|
|
|
if (val.present())
|
|
|
|
break;
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Standalone<RangeResultRef> processClasses = wait(trVer.getRange(processClassKeys, CLIENT_KNOBS->TOO_MANY));
|
|
|
|
ASSERT(!processClasses.more && processClasses.size() < CLIENT_KNOBS->TOO_MANY);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
trVer.clear(processClassKeys);
|
|
|
|
trVer.set(processClassVersionKey, processClassVersionValue);
|
|
|
|
for (auto it : processClasses) {
|
|
|
|
UID processUid = decodeProcessClassKeyOld(it.key);
|
|
|
|
trVer.set(processClassKeyFor(processUid.toString()), it.value);
|
|
|
|
}
|
|
|
|
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(trVer.commit());
|
2017-05-26 04:48:44 +08:00
|
|
|
TraceEvent("ProcessClassUpgrade");
|
|
|
|
break;
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
|
|
|
wait(trVer.onError(e));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
loop {
|
2021-03-11 02:06:03 +08:00
|
|
|
state ReadYourWritesTransaction tr(self->db.db);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
loop {
|
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
|
|
|
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
|
|
|
Standalone<RangeResultRef> processClasses = wait(tr.getRange(processClassKeys, CLIENT_KNOBS->TOO_MANY));
|
|
|
|
ASSERT(!processClasses.more && processClasses.size() < CLIENT_KNOBS->TOO_MANY);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (processClasses != self->lastProcessClasses || !self->gotProcessClasses) {
|
2017-05-26 04:48:44 +08:00
|
|
|
self->id_class.clear();
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < processClasses.size(); i++) {
|
|
|
|
auto c = decodeProcessClassValue(processClasses[i].value);
|
|
|
|
ASSERT(c.classSource() != ProcessClass::CommandLineSource);
|
|
|
|
self->id_class[decodeProcessClassKey(processClasses[i].key)] = c;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& w : self->id_worker) {
|
2017-05-26 04:48:44 +08:00
|
|
|
auto classIter = self->id_class.find(w.first);
|
2017-09-26 01:36:03 +08:00
|
|
|
ProcessClass newProcessClass;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (classIter != self->id_class.end() &&
|
|
|
|
(classIter->second.classSource() == ProcessClass::DBSource ||
|
|
|
|
w.second.initialClass.classType() == ProcessClass::UnsetClass)) {
|
2017-09-26 01:36:03 +08:00
|
|
|
newProcessClass = classIter->second;
|
2017-05-26 04:48:44 +08:00
|
|
|
} else {
|
2017-09-26 01:36:03 +08:00
|
|
|
newProcessClass = w.second.initialClass;
|
|
|
|
}
|
|
|
|
|
2019-03-09 00:25:07 +08:00
|
|
|
if (newProcessClass != w.second.details.processClass) {
|
|
|
|
w.second.details.processClass = newProcessClass;
|
2021-03-11 02:06:03 +08:00
|
|
|
w.second.priorityInfo.processClassFitness =
|
|
|
|
newProcessClass.machineClassFitness(ProcessClass::ClusterController);
|
2017-10-05 06:48:55 +08:00
|
|
|
if (!w.second.reply.isSet()) {
|
2021-03-11 02:06:03 +08:00
|
|
|
w.second.reply.send(
|
|
|
|
RegisterWorkerReply(w.second.details.processClass, w.second.priorityInfo));
|
2017-10-05 06:48:55 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
self->lastProcessClasses = processClasses;
|
|
|
|
self->gotProcessClasses = true;
|
2021-03-11 02:06:03 +08:00
|
|
|
checkOutstandingRequests(self);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
state Future<Void> watchFuture = tr.watch(processClassChangeKey);
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(tr.commit());
|
|
|
|
wait(watchFuture);
|
2017-05-26 04:48:44 +08:00
|
|
|
break;
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
|
|
|
wait(tr.onError(e));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-01-19 08:18:34 +08:00
|
|
|
ACTOR Future<Void> monitorServerInfoConfig(ClusterControllerData::DBInfo* db) {
|
|
|
|
loop {
|
|
|
|
state ReadYourWritesTransaction tr(db->db);
|
|
|
|
loop {
|
|
|
|
try {
|
|
|
|
tr.setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
|
|
|
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
|
|
|
tr.setOption(FDBTransactionOptions::READ_LOCK_AWARE);
|
|
|
|
|
|
|
|
Optional<Value> configVal = wait(tr.get(latencyBandConfigKey));
|
|
|
|
Optional<LatencyBandConfig> config;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (configVal.present()) {
|
2019-01-19 08:18:34 +08:00
|
|
|
config = LatencyBandConfig::parse(configVal.get());
|
|
|
|
}
|
|
|
|
|
2020-04-12 10:30:05 +08:00
|
|
|
auto serverInfo = db->serverInfo->get();
|
2021-03-11 02:06:03 +08:00
|
|
|
if (config != serverInfo.latencyBandConfig) {
|
2019-01-19 08:18:34 +08:00
|
|
|
TraceEvent("LatencyBandConfigChanged").detail("Present", config.present());
|
2019-05-11 05:01:52 +08:00
|
|
|
serverInfo.id = deterministicRandom()->randomUniqueID();
|
2020-04-07 11:58:43 +08:00
|
|
|
serverInfo.infoGeneration = ++db->dbInfoCount;
|
2019-01-19 08:18:34 +08:00
|
|
|
serverInfo.latencyBandConfig = config;
|
2020-04-12 10:30:05 +08:00
|
|
|
db->serverInfo->set(serverInfo);
|
2019-01-19 08:18:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
state Future<Void> configChangeFuture = tr.watch(latencyBandConfigKey);
|
2019-01-25 03:43:26 +08:00
|
|
|
|
|
|
|
wait(tr.commit());
|
|
|
|
wait(configChangeFuture);
|
2019-01-19 08:18:34 +08:00
|
|
|
|
|
|
|
break;
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
2020-01-08 11:53:09 +08:00
|
|
|
wait(tr.onError(e));
|
2019-01-19 08:18:34 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
ACTOR Future<Void> monitorClientTxnInfoConfigs(ClusterControllerData::DBInfo* db) {
|
|
|
|
loop {
|
|
|
|
state ReadYourWritesTransaction tr(db->db);
|
|
|
|
loop {
|
|
|
|
try {
|
|
|
|
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
|
|
|
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
2017-10-04 11:57:39 +08:00
|
|
|
state Optional<Value> rateVal = wait(tr.get(fdbClientInfoTxnSampleRate));
|
|
|
|
state Optional<Value> limitVal = wait(tr.get(fdbClientInfoTxnSizeLimit));
|
2017-05-26 04:48:44 +08:00
|
|
|
ClientDBInfo clientInfo = db->clientInfo->get();
|
2021-03-11 02:06:03 +08:00
|
|
|
double sampleRate = rateVal.present()
|
|
|
|
? BinaryReader::fromStringRef<double>(rateVal.get(), Unversioned())
|
|
|
|
: std::numeric_limits<double>::infinity();
|
|
|
|
int64_t sizeLimit =
|
|
|
|
limitVal.present() ? BinaryReader::fromStringRef<int64_t>(limitVal.get(), Unversioned()) : -1;
|
|
|
|
if (sampleRate != clientInfo.clientTxnInfoSampleRate ||
|
|
|
|
sizeLimit != clientInfo.clientTxnInfoSampleRate) {
|
2019-05-11 05:01:52 +08:00
|
|
|
clientInfo.id = deterministicRandom()->randomUniqueID();
|
2017-11-02 09:29:56 +08:00
|
|
|
clientInfo.clientTxnInfoSampleRate = sampleRate;
|
|
|
|
clientInfo.clientTxnInfoSizeLimit = sizeLimit;
|
2017-05-26 04:48:44 +08:00
|
|
|
db->clientInfo->set(clientInfo);
|
|
|
|
}
|
2019-02-01 10:20:14 +08:00
|
|
|
|
2017-10-04 11:57:39 +08:00
|
|
|
state Future<Void> watchRateFuture = tr.watch(fdbClientInfoTxnSampleRate);
|
|
|
|
state Future<Void> watchLimitFuture = tr.watch(fdbClientInfoTxnSizeLimit);
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(tr.commit());
|
2017-05-26 04:48:44 +08:00
|
|
|
choose {
|
2018-08-11 04:57:10 +08:00
|
|
|
when(wait(watchRateFuture)) { break; }
|
2021-03-11 02:06:03 +08:00
|
|
|
when(wait(watchLimitFuture)) { break; }
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(tr.onError(e));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> updatedChangingDatacenters(ClusterControllerData* self) {
|
|
|
|
// do not change the cluster controller until all the processes have had a chance to register
|
|
|
|
wait(delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY));
|
2018-02-10 08:48:55 +08:00
|
|
|
loop {
|
2018-06-29 16:11:59 +08:00
|
|
|
state Future<Void> onChange = self->desiredDcIds.onChange();
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!self->desiredDcIds.get().present()) {
|
|
|
|
self->changingDcIds.set(std::make_pair(false, self->desiredDcIds.get()));
|
2018-06-29 16:11:59 +08:00
|
|
|
} else {
|
2018-06-29 15:10:29 +08:00
|
|
|
auto& worker = self->id_worker[self->clusterControllerProcessId];
|
2021-03-11 02:06:03 +08:00
|
|
|
uint8_t newFitness = ClusterControllerPriorityInfo::calculateDCFitness(
|
|
|
|
worker.details.interf.locality.dcId(), self->desiredDcIds.get().get());
|
|
|
|
self->changingDcIds.set(
|
|
|
|
std::make_pair(worker.priorityInfo.dcFitness > newFitness, self->desiredDcIds.get()));
|
|
|
|
|
|
|
|
TraceEvent("UpdateChangingDatacenter", self->id)
|
|
|
|
.detail("OldFitness", worker.priorityInfo.dcFitness)
|
|
|
|
.detail("NewFitness", newFitness);
|
|
|
|
if (worker.priorityInfo.dcFitness > newFitness) {
|
2018-06-29 15:10:29 +08:00
|
|
|
worker.priorityInfo.dcFitness = newFitness;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!worker.reply.isSet()) {
|
|
|
|
worker.reply.send(RegisterWorkerReply(worker.details.processClass, worker.priorityInfo));
|
2018-06-29 15:10:29 +08:00
|
|
|
}
|
2018-06-29 16:11:59 +08:00
|
|
|
} else {
|
|
|
|
state int currentFit = ProcessClass::BestFit;
|
2021-03-11 02:06:03 +08:00
|
|
|
while (currentFit <= ProcessClass::NeverAssign) {
|
2018-06-29 16:11:59 +08:00
|
|
|
bool updated = false;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : self->id_worker) {
|
|
|
|
if ((!it.second.priorityInfo.isExcluded &&
|
|
|
|
it.second.priorityInfo.processClassFitness == currentFit) ||
|
|
|
|
currentFit == ProcessClass::NeverAssign) {
|
|
|
|
uint8_t fitness = ClusterControllerPriorityInfo::calculateDCFitness(
|
|
|
|
it.second.details.interf.locality.dcId(), self->changingDcIds.get().second.get());
|
|
|
|
if (it.first != self->clusterControllerProcessId &&
|
|
|
|
it.second.priorityInfo.dcFitness != fitness) {
|
2018-06-29 16:11:59 +08:00
|
|
|
updated = true;
|
|
|
|
it.second.priorityInfo.dcFitness = fitness;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!it.second.reply.isSet()) {
|
|
|
|
it.second.reply.send(
|
|
|
|
RegisterWorkerReply(it.second.details.processClass, it.second.priorityInfo));
|
2018-06-29 16:11:59 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (updated && currentFit < ProcessClass::NeverAssign) {
|
|
|
|
wait(delay(SERVER_KNOBS->CC_CLASS_DELAY));
|
2018-06-29 16:11:59 +08:00
|
|
|
}
|
|
|
|
currentFit++;
|
|
|
|
}
|
2018-02-10 08:48:55 +08:00
|
|
|
}
|
|
|
|
}
|
2018-06-29 15:10:29 +08:00
|
|
|
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(onChange);
|
2018-02-10 08:48:55 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> updatedChangedDatacenters(ClusterControllerData* self) {
|
2018-02-10 08:48:55 +08:00
|
|
|
state Future<Void> changeDelay = delay(SERVER_KNOBS->CC_CHANGE_DELAY);
|
2018-06-29 16:11:59 +08:00
|
|
|
state Future<Void> onChange = self->changingDcIds.onChange();
|
2018-02-10 08:48:55 +08:00
|
|
|
loop {
|
|
|
|
choose {
|
2021-03-11 02:06:03 +08:00
|
|
|
when(wait(onChange)) {
|
2018-06-29 16:11:59 +08:00
|
|
|
changeDelay = delay(SERVER_KNOBS->CC_CHANGE_DELAY);
|
|
|
|
onChange = self->changingDcIds.onChange();
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
when(wait(changeDelay)) {
|
2018-02-10 08:48:55 +08:00
|
|
|
changeDelay = Never();
|
2018-06-29 16:11:59 +08:00
|
|
|
onChange = self->changingDcIds.onChange();
|
|
|
|
|
2018-06-29 15:10:29 +08:00
|
|
|
self->changedDcIds.set(self->changingDcIds.get());
|
2021-03-11 02:06:03 +08:00
|
|
|
if (self->changedDcIds.get().second.present()) {
|
2018-07-03 01:06:54 +08:00
|
|
|
TraceEvent("UpdateChangedDatacenter", self->id).detail("CCFirst", self->changedDcIds.get().first);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!self->changedDcIds.get().first) {
|
2018-06-29 15:10:29 +08:00
|
|
|
auto& worker = self->id_worker[self->clusterControllerProcessId];
|
2021-03-11 02:06:03 +08:00
|
|
|
uint8_t newFitness = ClusterControllerPriorityInfo::calculateDCFitness(
|
|
|
|
worker.details.interf.locality.dcId(), self->changedDcIds.get().second.get());
|
|
|
|
if (worker.priorityInfo.dcFitness != newFitness) {
|
2018-06-29 15:10:29 +08:00
|
|
|
worker.priorityInfo.dcFitness = newFitness;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!worker.reply.isSet()) {
|
|
|
|
worker.reply.send(
|
|
|
|
RegisterWorkerReply(worker.details.processClass, worker.priorityInfo));
|
2018-06-29 15:10:29 +08:00
|
|
|
}
|
2018-02-10 08:48:55 +08:00
|
|
|
}
|
2018-06-29 16:11:59 +08:00
|
|
|
} else {
|
|
|
|
state int currentFit = ProcessClass::BestFit;
|
2021-03-11 02:06:03 +08:00
|
|
|
while (currentFit <= ProcessClass::NeverAssign) {
|
2018-06-29 16:11:59 +08:00
|
|
|
bool updated = false;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : self->id_worker) {
|
|
|
|
if ((!it.second.priorityInfo.isExcluded &&
|
|
|
|
it.second.priorityInfo.processClassFitness == currentFit) ||
|
|
|
|
currentFit == ProcessClass::NeverAssign) {
|
|
|
|
uint8_t fitness = ClusterControllerPriorityInfo::calculateDCFitness(
|
|
|
|
it.second.details.interf.locality.dcId(),
|
|
|
|
self->changedDcIds.get().second.get());
|
|
|
|
if (it.first != self->clusterControllerProcessId &&
|
|
|
|
it.second.priorityInfo.dcFitness != fitness) {
|
2018-06-29 16:11:59 +08:00
|
|
|
updated = true;
|
|
|
|
it.second.priorityInfo.dcFitness = fitness;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!it.second.reply.isSet()) {
|
|
|
|
it.second.reply.send(RegisterWorkerReply(it.second.details.processClass,
|
|
|
|
it.second.priorityInfo));
|
2018-06-29 16:11:59 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (updated && currentFit < ProcessClass::NeverAssign) {
|
|
|
|
wait(delay(SERVER_KNOBS->CC_CLASS_DELAY));
|
2018-06-29 16:11:59 +08:00
|
|
|
}
|
|
|
|
currentFit++;
|
|
|
|
}
|
2018-02-10 08:48:55 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> updateDatacenterVersionDifference(ClusterControllerData* self) {
|
2018-06-22 07:34:36 +08:00
|
|
|
state double lastLogTime = 0;
|
2018-06-14 09:14:14 +08:00
|
|
|
loop {
|
|
|
|
self->versionDifferenceUpdated = false;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (self->db.serverInfo->get().recoveryState >= RecoveryState::ACCEPTING_COMMITS &&
|
|
|
|
self->db.config.usableRegions == 1) {
|
|
|
|
bool oldDifferenceTooLarge = !self->versionDifferenceUpdated ||
|
|
|
|
self->datacenterVersionDifference >= SERVER_KNOBS->MAX_VERSION_DIFFERENCE;
|
2018-06-14 09:14:14 +08:00
|
|
|
self->versionDifferenceUpdated = true;
|
|
|
|
self->datacenterVersionDifference = 0;
|
2018-07-07 05:44:11 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (oldDifferenceTooLarge) {
|
2018-07-07 05:44:11 +08:00
|
|
|
checkOutstandingRequests(self);
|
|
|
|
}
|
|
|
|
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(self->db.serverInfo->onChange());
|
2018-06-14 09:14:14 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
state Optional<TLogInterface> primaryLog;
|
|
|
|
state Optional<TLogInterface> remoteLog;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (self->db.serverInfo->get().recoveryState >= RecoveryState::ALL_LOGS_RECRUITED) {
|
|
|
|
for (auto& logSet : self->db.serverInfo->get().logSystemConfig.tLogs) {
|
|
|
|
if (logSet.isLocal && logSet.locality != tagLocalitySatellite) {
|
|
|
|
for (auto& tLog : logSet.tLogs) {
|
|
|
|
if (tLog.present()) {
|
2018-06-14 09:14:14 +08:00
|
|
|
primaryLog = tLog.interf();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!logSet.isLocal) {
|
|
|
|
for (auto& tLog : logSet.tLogs) {
|
|
|
|
if (tLog.present()) {
|
2018-06-14 09:14:14 +08:00
|
|
|
remoteLog = tLog.interf();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!primaryLog.present() || !remoteLog.present()) {
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(self->db.serverInfo->onChange());
|
2018-06-14 09:14:14 +08:00
|
|
|
continue;
|
|
|
|
}
|
2018-06-22 07:31:52 +08:00
|
|
|
|
2018-06-14 09:14:14 +08:00
|
|
|
state Future<Void> onChange = self->db.serverInfo->onChange();
|
|
|
|
loop {
|
2021-03-11 02:06:03 +08:00
|
|
|
state Future<TLogQueuingMetricsReply> primaryMetrics =
|
|
|
|
brokenPromiseToNever(primaryLog.get().getQueuingMetrics.getReply(TLogQueuingMetricsRequest()));
|
|
|
|
state Future<TLogQueuingMetricsReply> remoteMetrics =
|
|
|
|
brokenPromiseToNever(remoteLog.get().getQueuingMetrics.getReply(TLogQueuingMetricsRequest()));
|
2018-06-22 07:31:52 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
wait((success(primaryMetrics) && success(remoteMetrics)) || onChange);
|
|
|
|
if (onChange.isReady()) {
|
2018-06-14 09:14:14 +08:00
|
|
|
break;
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
|
|
|
|
if (primaryMetrics.get().v > 0 && remoteMetrics.get().v > 0) {
|
|
|
|
bool oldDifferenceTooLarge = !self->versionDifferenceUpdated ||
|
|
|
|
self->datacenterVersionDifference >= SERVER_KNOBS->MAX_VERSION_DIFFERENCE;
|
2020-05-11 08:49:09 +08:00
|
|
|
self->versionDifferenceUpdated = true;
|
|
|
|
self->datacenterVersionDifference = primaryMetrics.get().v - remoteMetrics.get().v;
|
2018-06-14 09:14:14 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (oldDifferenceTooLarge && self->datacenterVersionDifference < SERVER_KNOBS->MAX_VERSION_DIFFERENCE) {
|
2020-05-11 08:49:09 +08:00
|
|
|
checkOutstandingRequests(self);
|
|
|
|
}
|
2018-07-07 05:44:11 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (now() - lastLogTime > SERVER_KNOBS->CLUSTER_CONTROLLER_LOGGING_DELAY) {
|
2020-05-11 08:49:09 +08:00
|
|
|
lastLogTime = now();
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent("DatacenterVersionDifference", self->id)
|
|
|
|
.detail("Difference", self->datacenterVersionDifference);
|
2020-05-11 08:49:09 +08:00
|
|
|
}
|
2018-06-22 07:31:52 +08:00
|
|
|
}
|
2018-06-14 09:14:14 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(delay(SERVER_KNOBS->VERSION_LAG_METRIC_INTERVAL) || onChange);
|
|
|
|
if (onChange.isReady()) {
|
2018-06-14 09:14:14 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-19 06:54:28 +08:00
|
|
|
ACTOR Future<Void> doEmptyCommit(Database cx) {
|
|
|
|
state Transaction tr(cx);
|
|
|
|
loop {
|
|
|
|
try {
|
|
|
|
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
|
|
|
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
|
|
|
tr.makeSelfConflicting();
|
|
|
|
wait(tr.commit());
|
|
|
|
return Void();
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
|
|
|
wait(tr.onError(e));
|
2019-02-19 06:54:28 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> handleForcedRecoveries(ClusterControllerData* self, ClusterControllerFullInterface interf) {
|
2019-02-19 06:54:28 +08:00
|
|
|
loop {
|
2021-03-11 02:06:03 +08:00
|
|
|
state ForceRecoveryRequest req = waitNext(interf.clientInterface.forceRecovery.getFuture());
|
|
|
|
TraceEvent("ForcedRecoveryStart", self->id)
|
|
|
|
.detail("ClusterControllerDcId", self->clusterControllerDcId)
|
|
|
|
.detail("DcId", req.dcId.printable());
|
2019-02-19 06:54:28 +08:00
|
|
|
state Future<Void> fCommit = doEmptyCommit(self->cx);
|
2019-02-20 08:05:20 +08:00
|
|
|
wait(fCommit || delay(SERVER_KNOBS->FORCE_RECOVERY_CHECK_DELAY));
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!fCommit.isReady() || fCommit.isError()) {
|
2019-03-23 08:08:58 +08:00
|
|
|
if (self->clusterControllerDcId != req.dcId) {
|
2019-02-19 06:54:28 +08:00
|
|
|
vector<Optional<Key>> dcPriority;
|
|
|
|
dcPriority.push_back(req.dcId);
|
|
|
|
dcPriority.push_back(self->clusterControllerDcId);
|
|
|
|
self->desiredDcIds.set(dcPriority);
|
|
|
|
} else {
|
|
|
|
self->db.forceRecovery = true;
|
|
|
|
self->db.forceMasterFailure.trigger();
|
|
|
|
}
|
|
|
|
wait(fCommit);
|
|
|
|
}
|
|
|
|
TraceEvent("ForcedRecoveryFinish", self->id);
|
|
|
|
self->db.forceRecovery = false;
|
|
|
|
req.reply.send(Void());
|
2019-02-19 09:09:55 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<DataDistributorInterface> startDataDistributor(ClusterControllerData* self) {
|
|
|
|
wait(delay(0.0)); // If master fails at the same time, give it a chance to clear master PID.
|
2018-12-14 05:31:37 +08:00
|
|
|
|
2019-07-05 23:12:25 +08:00
|
|
|
TraceEvent("CCStartDataDistributor", self->id);
|
2019-02-14 03:54:35 +08:00
|
|
|
loop {
|
2019-01-31 01:05:12 +08:00
|
|
|
try {
|
2020-04-12 10:30:05 +08:00
|
|
|
state bool no_distributor = !self->db.serverInfo->get().distributor.present();
|
2021-03-11 02:06:03 +08:00
|
|
|
while (!self->masterProcessId.present() ||
|
|
|
|
self->masterProcessId != self->db.serverInfo->get().master.locality.processId() ||
|
|
|
|
self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
|
2019-03-21 07:03:36 +08:00
|
|
|
wait(self->db.serverInfo->onChange() || delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY));
|
2019-01-31 01:05:12 +08:00
|
|
|
}
|
2020-04-12 10:30:05 +08:00
|
|
|
if (no_distributor && self->db.serverInfo->get().distributor.present()) {
|
|
|
|
return self->db.serverInfo->get().distributor.get();
|
2019-01-31 01:05:12 +08:00
|
|
|
}
|
2019-02-01 02:51:25 +08:00
|
|
|
|
2019-02-13 07:50:44 +08:00
|
|
|
std::map<Optional<Standalone<StringRef>>, int> id_used = self->getUsedIds();
|
2021-03-11 02:06:03 +08:00
|
|
|
WorkerFitnessInfo data_distributor = self->getWorkerForRoleInDatacenter(self->clusterControllerDcId,
|
|
|
|
ProcessClass::DataDistributor,
|
|
|
|
ProcessClass::NeverAssign,
|
|
|
|
self->db.config,
|
|
|
|
id_used);
|
2019-03-20 02:29:19 +08:00
|
|
|
state WorkerDetails worker = data_distributor.worker;
|
|
|
|
if (self->onMasterIsBetter(worker, ProcessClass::DataDistributor)) {
|
|
|
|
worker = self->id_worker[self->masterProcessId.get()].details;
|
|
|
|
}
|
2020-01-08 11:53:09 +08:00
|
|
|
|
2019-05-11 05:01:52 +08:00
|
|
|
InitializeDataDistributorRequest req(deterministicRandom()->randomUniqueID());
|
2019-07-05 23:12:25 +08:00
|
|
|
TraceEvent("CCDataDistributorRecruit", self->id).detail("Addr", worker.interf.address());
|
2019-01-31 01:05:12 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ErrorOr<DataDistributorInterface> distributor = wait(worker.interf.dataDistributor.getReplyUnlessFailedFor(
|
|
|
|
req, SERVER_KNOBS->WAIT_FOR_DISTRIBUTOR_JOIN_DELAY, 0));
|
2019-01-31 01:05:12 +08:00
|
|
|
if (distributor.present()) {
|
2019-07-05 23:12:25 +08:00
|
|
|
TraceEvent("CCDataDistributorRecruited", self->id).detail("Addr", worker.interf.address());
|
2019-01-31 01:05:12 +08:00
|
|
|
return distributor.get();
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
2019-07-05 23:12:25 +08:00
|
|
|
TraceEvent("CCDataDistributorRecruitError", self->id).error(e);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (e.code() != error_code_no_more_servers) {
|
2019-01-31 01:05:12 +08:00
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(lowPriorityDelay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY));
|
2018-12-14 05:31:37 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> monitorDataDistributor(ClusterControllerData* self) {
|
|
|
|
while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
|
2019-03-25 07:48:24 +08:00
|
|
|
wait(self->db.serverInfo->onChange());
|
2018-12-14 05:31:37 +08:00
|
|
|
}
|
|
|
|
|
2019-02-01 02:51:25 +08:00
|
|
|
loop {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (self->db.serverInfo->get().distributor.present()) {
|
|
|
|
wait(waitFailureClient(self->db.serverInfo->get().distributor.get().waitFailure,
|
|
|
|
SERVER_KNOBS->DD_FAILURE_TIME));
|
2019-07-05 23:12:25 +08:00
|
|
|
TraceEvent("CCDataDistributorDied", self->id)
|
2021-03-11 02:06:03 +08:00
|
|
|
.detail("DistributorId", self->db.serverInfo->get().distributor.get().id());
|
2019-02-15 08:24:46 +08:00
|
|
|
self->db.clearInterf(ProcessClass::DataDistributorClass);
|
2019-02-13 07:50:44 +08:00
|
|
|
} else {
|
2019-03-18 11:55:59 +08:00
|
|
|
self->recruitingDistributor = true;
|
2021-03-11 02:06:03 +08:00
|
|
|
DataDistributorInterface distributorInterf = wait(startDataDistributor(self));
|
2019-03-18 11:55:59 +08:00
|
|
|
self->recruitingDistributor = false;
|
2019-02-15 08:24:46 +08:00
|
|
|
self->db.setDistributor(distributorInterf);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> startRatekeeper(ClusterControllerData* self) {
|
|
|
|
wait(delay(0.0)); // If master fails at the same time, give it a chance to clear master PID.
|
2019-03-21 07:03:36 +08:00
|
|
|
|
2019-07-05 23:12:25 +08:00
|
|
|
TraceEvent("CCStartRatekeeper", self->id);
|
2019-02-15 08:24:46 +08:00
|
|
|
loop {
|
|
|
|
try {
|
2020-04-12 10:30:05 +08:00
|
|
|
state bool no_ratekeeper = !self->db.serverInfo->get().ratekeeper.present();
|
2021-03-11 02:06:03 +08:00
|
|
|
while (!self->masterProcessId.present() ||
|
|
|
|
self->masterProcessId != self->db.serverInfo->get().master.locality.processId() ||
|
|
|
|
self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
|
2019-03-21 07:03:36 +08:00
|
|
|
wait(self->db.serverInfo->onChange() || delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY));
|
2019-02-15 08:24:46 +08:00
|
|
|
}
|
2020-04-12 10:30:05 +08:00
|
|
|
if (no_ratekeeper && self->db.serverInfo->get().ratekeeper.present()) {
|
2019-03-22 13:20:00 +08:00
|
|
|
// Existing ratekeeper registers while waiting, so skip.
|
|
|
|
return Void();
|
2019-02-15 08:24:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
std::map<Optional<Standalone<StringRef>>, int> id_used = self->getUsedIds();
|
2021-03-11 02:06:03 +08:00
|
|
|
WorkerFitnessInfo rkWorker = self->getWorkerForRoleInDatacenter(self->clusterControllerDcId,
|
|
|
|
ProcessClass::Ratekeeper,
|
|
|
|
ProcessClass::NeverAssign,
|
|
|
|
self->db.config,
|
|
|
|
id_used);
|
2019-05-11 05:01:52 +08:00
|
|
|
InitializeRatekeeperRequest req(deterministicRandom()->randomUniqueID());
|
2019-03-20 02:29:19 +08:00
|
|
|
state WorkerDetails worker = rkWorker.worker;
|
2019-03-27 23:24:25 +08:00
|
|
|
if (self->onMasterIsBetter(worker, ProcessClass::Ratekeeper)) {
|
2019-03-20 02:29:19 +08:00
|
|
|
worker = self->id_worker[self->masterProcessId.get()].details;
|
|
|
|
}
|
2019-03-23 09:22:45 +08:00
|
|
|
|
2019-03-20 02:29:19 +08:00
|
|
|
self->recruitingRatekeeperID = req.reqId;
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent("CCRecruitRatekeeper", self->id)
|
|
|
|
.detail("Addr", worker.interf.address())
|
|
|
|
.detail("RKID", req.reqId);
|
2019-02-15 08:24:46 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ErrorOr<RatekeeperInterface> interf = wait(
|
|
|
|
worker.interf.ratekeeper.getReplyUnlessFailedFor(req, SERVER_KNOBS->WAIT_FOR_RATEKEEPER_JOIN_DELAY, 0));
|
2019-02-15 08:24:46 +08:00
|
|
|
if (interf.present()) {
|
2019-03-23 09:22:45 +08:00
|
|
|
self->recruitRatekeeper.set(false);
|
2019-03-25 02:04:39 +08:00
|
|
|
self->recruitingRatekeeperID = interf.get().id();
|
2020-04-12 10:30:05 +08:00
|
|
|
const auto& ratekeeper = self->db.serverInfo->get().ratekeeper;
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent("CCRatekeeperRecruited", self->id)
|
|
|
|
.detail("Addr", worker.interf.address())
|
|
|
|
.detail("RKID", interf.get().id());
|
|
|
|
if (ratekeeper.present() && ratekeeper.get().id() != interf.get().id() &&
|
|
|
|
self->id_worker.count(ratekeeper.get().locality.processId())) {
|
|
|
|
TraceEvent("CCHaltRatekeeperAfterRecruit", self->id)
|
|
|
|
.detail("RKID", ratekeeper.get().id())
|
|
|
|
.detail("DcID", printable(self->clusterControllerDcId));
|
|
|
|
self->id_worker[ratekeeper.get().locality.processId()].haltRatekeeper =
|
|
|
|
brokenPromiseToNever(ratekeeper.get().haltRatekeeper.getReply(HaltRatekeeperRequest(self->id)));
|
|
|
|
}
|
|
|
|
if (!ratekeeper.present() || ratekeeper.get().id() != interf.get().id()) {
|
2019-03-23 08:56:16 +08:00
|
|
|
self->db.setRatekeeper(interf.get());
|
|
|
|
}
|
2019-03-23 09:22:45 +08:00
|
|
|
checkOutstandingRequests(self);
|
2019-03-21 04:54:15 +08:00
|
|
|
return Void();
|
2019-02-15 08:24:46 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
2019-07-05 23:12:25 +08:00
|
|
|
TraceEvent("CCRatekeeperRecruitError", self->id).error(e);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (e.code() != error_code_no_more_servers) {
|
2019-02-15 08:24:46 +08:00
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(lowPriorityDelay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY));
|
2019-02-15 08:24:46 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> monitorRatekeeper(ClusterControllerData* self) {
|
|
|
|
while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
|
2019-03-25 07:48:24 +08:00
|
|
|
wait(self->db.serverInfo->onChange());
|
2019-02-15 08:24:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
loop {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (self->db.serverInfo->get().ratekeeper.present() && !self->recruitRatekeeper.get()) {
|
2019-03-23 09:22:45 +08:00
|
|
|
choose {
|
2021-03-11 02:06:03 +08:00
|
|
|
when(wait(waitFailureClient(self->db.serverInfo->get().ratekeeper.get().waitFailure,
|
|
|
|
SERVER_KNOBS->RATEKEEPER_FAILURE_TIME))) {
|
2019-07-05 23:12:25 +08:00
|
|
|
TraceEvent("CCRatekeeperDied", self->id)
|
2021-03-11 02:06:03 +08:00
|
|
|
.detail("RKID", self->db.serverInfo->get().ratekeeper.get().id());
|
2019-03-27 23:24:25 +08:00
|
|
|
self->db.clearInterf(ProcessClass::RatekeeperClass);
|
2019-03-21 01:00:31 +08:00
|
|
|
}
|
2019-03-23 09:22:45 +08:00
|
|
|
when(wait(self->recruitRatekeeper.onChange())) {}
|
2019-03-20 06:21:46 +08:00
|
|
|
}
|
2019-02-15 08:24:46 +08:00
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(startRatekeeper(self));
|
2018-12-14 05:31:37 +08:00
|
|
|
}
|
2019-02-19 06:54:28 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> dbInfoUpdater(ClusterControllerData* self) {
|
2020-04-06 14:09:36 +08:00
|
|
|
state Future<Void> dbInfoChange = self->db.serverInfo->onChange();
|
|
|
|
state Future<Void> updateDBInfo = self->updateDBInfo.onTrigger();
|
|
|
|
loop {
|
|
|
|
choose {
|
2021-03-11 02:06:03 +08:00
|
|
|
when(wait(updateDBInfo)) { wait(delay(SERVER_KNOBS->DBINFO_BATCH_DELAY) || dbInfoChange); }
|
2020-04-06 14:09:36 +08:00
|
|
|
when(wait(dbInfoChange)) {}
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
|
2020-04-18 06:05:01 +08:00
|
|
|
UpdateServerDBInfoRequest req;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (dbInfoChange.isReady()) {
|
|
|
|
for (auto& it : self->id_worker) {
|
2020-04-18 06:05:01 +08:00
|
|
|
req.broadcastInfo.push_back(it.second.details.interf.updateServerDBInfo.getEndpoint());
|
2020-04-06 14:09:36 +08:00
|
|
|
}
|
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto it : self->removedDBInfoEndpoints) {
|
2020-04-18 07:45:22 +08:00
|
|
|
self->updateDBInfoEndpoints.erase(it);
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
req.broadcastInfo =
|
|
|
|
std::vector<Endpoint>(self->updateDBInfoEndpoints.begin(), self->updateDBInfoEndpoints.end());
|
2020-04-06 14:09:36 +08:00
|
|
|
}
|
|
|
|
|
2020-04-18 06:05:01 +08:00
|
|
|
self->updateDBInfoEndpoints.clear();
|
2020-04-06 14:09:36 +08:00
|
|
|
self->removedDBInfoEndpoints.clear();
|
2021-03-11 02:06:03 +08:00
|
|
|
|
2020-04-06 14:09:36 +08:00
|
|
|
dbInfoChange = self->db.serverInfo->onChange();
|
|
|
|
updateDBInfo = self->updateDBInfo.onTrigger();
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
req.serializedDbInfo =
|
|
|
|
BinaryWriter::toValue(self->db.serverInfo->get(), AssumeVersion(g_network->protocolVersion()));
|
2020-04-06 14:09:36 +08:00
|
|
|
|
2020-04-11 04:45:16 +08:00
|
|
|
TraceEvent("DBInfoStartBroadcast", self->id);
|
2020-04-06 14:09:36 +08:00
|
|
|
choose {
|
2021-03-11 02:06:03 +08:00
|
|
|
when(std::vector<Endpoint> notUpdated =
|
|
|
|
wait(broadcastDBInfoRequest(req, SERVER_KNOBS->DBINFO_SEND_AMOUNT, Optional<Endpoint>(), false))) {
|
2020-09-01 01:37:00 +08:00
|
|
|
TraceEvent("DBInfoFinishBroadcast", self->id).detail("NotUpdated", notUpdated.size());
|
2021-03-11 02:06:03 +08:00
|
|
|
if (notUpdated.size()) {
|
2020-04-18 07:45:22 +08:00
|
|
|
self->updateDBInfoEndpoints.insert(notUpdated.begin(), notUpdated.end());
|
2020-04-06 14:09:36 +08:00
|
|
|
self->updateDBInfo.trigger();
|
|
|
|
}
|
|
|
|
}
|
2020-04-11 08:02:11 +08:00
|
|
|
when(wait(dbInfoChange)) {}
|
2020-04-06 14:09:36 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> clusterControllerCore(ClusterControllerFullInterface interf,
|
|
|
|
Future<Void> leaderFail,
|
|
|
|
ServerCoordinators coordinators,
|
|
|
|
LocalityData locality) {
|
|
|
|
state ClusterControllerData self(interf, locality);
|
|
|
|
state Future<Void> coordinationPingDelay = delay(SERVER_KNOBS->WORKER_COORDINATION_PING_DELAY);
|
2017-05-26 04:48:44 +08:00
|
|
|
state uint64_t step = 0;
|
2021-03-11 02:06:03 +08:00
|
|
|
state Future<ErrorOr<Void>> error = errorOr(actorCollection(self.addActor.getFuture()));
|
|
|
|
|
|
|
|
self.addActor.send(clusterWatchDatabase(&self, &self.db)); // Start the master database
|
|
|
|
self.addActor.send(self.updateWorkerList.init(self.db.db));
|
|
|
|
self.addActor.send(statusServer(interf.clientInterface.databaseStatus.getFuture(), &self, coordinators));
|
|
|
|
self.addActor.send(timeKeeper(&self));
|
|
|
|
self.addActor.send(monitorProcessClasses(&self));
|
|
|
|
self.addActor.send(monitorServerInfoConfig(&self.db));
|
|
|
|
self.addActor.send(monitorClientTxnInfoConfigs(&self.db));
|
|
|
|
self.addActor.send(updatedChangingDatacenters(&self));
|
|
|
|
self.addActor.send(updatedChangedDatacenters(&self));
|
|
|
|
self.addActor.send(updateDatacenterVersionDifference(&self));
|
|
|
|
self.addActor.send(handleForcedRecoveries(&self, interf));
|
|
|
|
self.addActor.send(monitorDataDistributor(&self));
|
|
|
|
self.addActor.send(monitorRatekeeper(&self));
|
|
|
|
self.addActor.send(dbInfoUpdater(&self));
|
|
|
|
self.addActor.send(traceCounters("ClusterControllerMetrics",
|
|
|
|
self.id,
|
|
|
|
SERVER_KNOBS->STORAGE_LOGGING_DELAY,
|
|
|
|
&self.clusterControllerMetrics,
|
|
|
|
self.id.toString() + "/ClusterControllerMetrics"));
|
|
|
|
self.addActor.send(traceRole(Role::CLUSTER_CONTROLLER, interf.id()));
|
|
|
|
// printf("%s: I am the cluster controller\n", g_network->getLocalAddress().toString().c_str());
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
loop choose {
|
2021-03-11 02:06:03 +08:00
|
|
|
when(ErrorOr<Void> err = wait(error)) {
|
2017-05-26 04:48:44 +08:00
|
|
|
if (err.isError()) {
|
2018-09-06 06:06:14 +08:00
|
|
|
endRole(Role::CLUSTER_CONTROLLER, interf.id(), "Stop Received Error", false, err.getError());
|
2021-03-11 02:06:03 +08:00
|
|
|
} else {
|
2018-09-06 06:06:14 +08:00
|
|
|
endRole(Role::CLUSTER_CONTROLLER, interf.id(), "Stop Received Signal", true);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// We shut down normally even if there was a serious error (so this fdbserver may be re-elected cluster
|
|
|
|
// controller)
|
2017-05-26 04:48:44 +08:00
|
|
|
return Void();
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
when(OpenDatabaseRequest req = waitNext(interf.clientInterface.openDatabase.getFuture())) {
|
2019-10-04 06:29:11 +08:00
|
|
|
++self.openDatabaseRequests;
|
2019-07-26 08:15:31 +08:00
|
|
|
self.addActor.send(clusterOpenDatabase(&self.db, req));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
when(RecruitFromConfigurationRequest req = waitNext(interf.recruitFromConfiguration.getFuture())) {
|
|
|
|
self.addActor.send(clusterRecruitFromConfiguration(&self, req));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
when(RecruitRemoteFromConfigurationRequest req = waitNext(interf.recruitRemoteFromConfiguration.getFuture())) {
|
|
|
|
self.addActor.send(clusterRecruitRemoteFromConfiguration(&self, req));
|
2017-09-12 08:40:46 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
when(RecruitStorageRequest req = waitNext(interf.recruitStorage.getFuture())) {
|
|
|
|
clusterRecruitStorage(&self, req);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
when(RegisterWorkerRequest req = waitNext(interf.registerWorker.getFuture())) {
|
2019-10-04 06:29:11 +08:00
|
|
|
++self.registerWorkerRequests;
|
2021-03-11 02:06:03 +08:00
|
|
|
registerWorker(req, &self);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
when(GetWorkersRequest req = waitNext(interf.getWorkers.getFuture())) {
|
2019-10-04 06:29:11 +08:00
|
|
|
++self.getWorkersRequests;
|
2019-03-09 00:25:07 +08:00
|
|
|
vector<WorkerDetails> workers;
|
2017-10-25 03:58:54 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : self.id_worker) {
|
|
|
|
if ((req.flags & GetWorkersRequest::NON_EXCLUDED_PROCESSES_ONLY) &&
|
|
|
|
self.db.config.isExcludedServer(it.second.details.interf.addresses())) {
|
2017-10-25 03:58:54 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if ((req.flags & GetWorkersRequest::TESTER_CLASS_ONLY) &&
|
|
|
|
it.second.details.processClass.classType() != ProcessClass::TesterClass) {
|
2017-10-25 03:58:54 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2019-03-09 00:25:07 +08:00
|
|
|
workers.push_back(it.second.details);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2017-10-25 03:58:54 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
req.reply.send(workers);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
when(GetClientWorkersRequest req = waitNext(interf.clientInterface.getClientWorkers.getFuture())) {
|
2019-10-04 06:29:11 +08:00
|
|
|
++self.getClientWorkersRequests;
|
2017-05-26 04:48:44 +08:00
|
|
|
vector<ClientWorkerInterface> workers;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : self.id_worker) {
|
2019-03-09 00:25:07 +08:00
|
|
|
if (it.second.details.processClass.classType() != ProcessClass::TesterClass) {
|
|
|
|
workers.push_back(it.second.details.interf.clientInterface);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
req.reply.send(workers);
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
when(wait(coordinationPingDelay)) {
|
2017-05-26 04:48:44 +08:00
|
|
|
CoordinationPingMessage message(self.id, step++);
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : self.id_worker)
|
2019-03-09 00:25:07 +08:00
|
|
|
it.second.details.interf.coordinationPing.send(message);
|
2021-03-11 02:06:03 +08:00
|
|
|
coordinationPingDelay = delay(SERVER_KNOBS->WORKER_COORDINATION_PING_DELAY);
|
2017-05-26 04:48:44 +08:00
|
|
|
TraceEvent("CoordinationPingSent", self.id).detail("TimeStep", message.timeStep);
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
when(RegisterMasterRequest req = waitNext(interf.registerMaster.getFuture())) {
|
2019-10-04 06:29:11 +08:00
|
|
|
++self.registerMasterRequests;
|
2021-03-11 02:06:03 +08:00
|
|
|
clusterRegisterMaster(&self, req);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
when(GetServerDBInfoRequest req = waitNext(interf.getServerDBInfo.getFuture())) {
|
|
|
|
self.addActor.send(clusterGetServerInfo(&self.db, req.knownServerInfoID, req.reply));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
when(wait(leaderFail)) {
|
2017-05-26 04:48:44 +08:00
|
|
|
// We are no longer the leader if this has changed.
|
2018-09-06 06:06:14 +08:00
|
|
|
endRole(Role::CLUSTER_CONTROLLER, interf.id(), "Leader Replaced", true);
|
2017-05-26 04:48:44 +08:00
|
|
|
TEST(true); // Lost Cluster Controller Role
|
|
|
|
return Void();
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
when(ReplyPromise<Void> ping = waitNext(interf.clientInterface.ping.getFuture())) { ping.send(Void()); }
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> replaceInterface(ClusterControllerFullInterface interf) {
|
2019-05-30 07:57:13 +08:00
|
|
|
loop {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (interf.hasMessage()) {
|
2019-05-30 07:57:13 +08:00
|
|
|
wait(delay(SERVER_KNOBS->REPLACE_INTERFACE_DELAY));
|
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
wait(delay(SERVER_KNOBS->REPLACE_INTERFACE_CHECK_DELAY));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> clusterController(ServerCoordinators coordinators,
|
|
|
|
Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> currentCC,
|
|
|
|
bool hasConnected,
|
|
|
|
Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo,
|
|
|
|
LocalityData locality) {
|
2017-05-26 04:48:44 +08:00
|
|
|
loop {
|
|
|
|
state ClusterControllerFullInterface cci;
|
|
|
|
state bool inRole = false;
|
|
|
|
cci.initEndpoints();
|
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
// Register as a possible leader; wait to be elected
|
|
|
|
state Future<Void> leaderFail =
|
|
|
|
tryBecomeLeader(coordinators, cci, currentCC, hasConnected, asyncPriorityInfo);
|
|
|
|
state Future<Void> shouldReplace = replaceInterface(cci);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
while (!currentCC->get().present() || currentCC->get().get() != cci) {
|
|
|
|
choose {
|
2021-03-11 02:06:03 +08:00
|
|
|
when(wait(currentCC->onChange())) {}
|
|
|
|
when(wait(leaderFail)) {
|
|
|
|
ASSERT(false);
|
|
|
|
throw internal_error();
|
|
|
|
}
|
|
|
|
when(wait(shouldReplace)) { break; }
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!shouldReplace.isReady()) {
|
2019-05-30 07:57:13 +08:00
|
|
|
shouldReplace = Future<Void>();
|
|
|
|
hasConnected = true;
|
|
|
|
startRole(Role::CLUSTER_CONTROLLER, cci.id(), UID());
|
|
|
|
inRole = true;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(clusterControllerCore(cci, leaderFail, coordinators, locality));
|
2019-05-30 07:57:13 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
2017-05-26 04:48:44 +08:00
|
|
|
if (inRole)
|
2021-03-11 02:06:03 +08:00
|
|
|
endRole(Role::CLUSTER_CONTROLLER,
|
|
|
|
cci.id(),
|
|
|
|
"Error",
|
|
|
|
e.code() == error_code_actor_cancelled || e.code() == error_code_coordinators_changed,
|
|
|
|
e);
|
2017-05-26 04:48:44 +08:00
|
|
|
else
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent(e.code() == error_code_coordinators_changed ? SevInfo : SevError,
|
|
|
|
"ClusterControllerCandidateError",
|
|
|
|
cci.id())
|
|
|
|
.error(e);
|
2017-05-26 04:48:44 +08:00
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> clusterController(Reference<ClusterConnectionFile> connFile,
|
|
|
|
Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> currentCC,
|
|
|
|
Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo,
|
|
|
|
Future<Void> recoveredDiskFiles,
|
|
|
|
LocalityData locality) {
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(recoveredDiskFiles);
|
2017-05-26 04:48:44 +08:00
|
|
|
state bool hasConnected = false;
|
|
|
|
loop {
|
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
ServerCoordinators coordinators(connFile);
|
|
|
|
wait(clusterController(coordinators, currentCC, hasConnected, asyncPriorityInfo, locality));
|
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() != error_code_coordinators_changed)
|
2017-05-26 04:48:44 +08:00
|
|
|
throw; // Expected to terminate fdbserver
|
|
|
|
}
|
|
|
|
|
|
|
|
hasConnected = true;
|
|
|
|
}
|
2018-05-09 08:17:17 +08:00
|
|
|
}
|