Add support for changing coordinators to the configuration database
Configuration database data lives on the coordinators. When a change coordinators command is issued, the data must be sent to the new coordinators to keep the database consistent.
This commit is contained in:
parent
b641bd6c04
commit
74ac617a34
|
@ -844,7 +844,7 @@ ACTOR Future<Optional<ClusterConnectionString>> getClusterConnectionStringFromSt
|
||||||
// equal to one of the previously issued requests, there is a bug
|
// equal to one of the previously issued requests, there is a bug
|
||||||
// and we are breaking the promises we make with
|
// and we are breaking the promises we make with
|
||||||
// commit_unknown_result (the transaction must no longer be in
|
// commit_unknown_result (the transaction must no longer be in
|
||||||
// progress when receiving this error).
|
// progress when receiving commit_unknown_result).
|
||||||
int n = connectionStrings.size() > 0 ? connectionStrings.size() - 1 : 0; // avoid underflow
|
int n = connectionStrings.size() > 0 ? connectionStrings.size() - 1 : 0; // avoid underflow
|
||||||
for (int i = 0; i < n; ++i) {
|
for (int i = 0; i < n; ++i) {
|
||||||
ASSERT(currentKey.get() != connectionStrings.at(i));
|
ASSERT(currentKey.get() != connectionStrings.at(i));
|
||||||
|
@ -872,12 +872,58 @@ ACTOR Future<Optional<ClusterConnectionString>> getClusterConnectionStringFromSt
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ACTOR Future<Void> verifyConfigurationDatabaseAlive(Database cx) {
|
||||||
|
state Backoff backoff;
|
||||||
|
loop {
|
||||||
|
try {
|
||||||
|
// Attempt to read a random value from the configuration
|
||||||
|
// database to make sure it is online.
|
||||||
|
state Reference<ISingleThreadTransaction> configTr =
|
||||||
|
ISingleThreadTransaction::create(ISingleThreadTransaction::Type::PAXOS_CONFIG, cx);
|
||||||
|
Tuple tuple;
|
||||||
|
tuple.appendNull(); // config class
|
||||||
|
tuple << "test"_sr;
|
||||||
|
Optional<Value> serializedValue = wait(configTr->get(tuple.pack()));
|
||||||
|
TraceEvent("ChangeQuorumCheckerNewCoordinatorsOnline").log();
|
||||||
|
return Void();
|
||||||
|
} catch (Error& e) {
|
||||||
|
TraceEvent("ChangeQuorumCheckerNewCoordinatorsError").error(e);
|
||||||
|
if (e.code() == error_code_coordinators_changed) {
|
||||||
|
wait(backoff.onError());
|
||||||
|
configTr->reset();
|
||||||
|
} else {
|
||||||
|
wait(configTr->onError(e));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ACTOR Future<Void> resetPreviousCoordinatorsKey(Database cx) {
|
||||||
|
loop {
|
||||||
|
// When the change coordinators transaction succeeds, it uses the
|
||||||
|
// special key space error message to return a message to the client.
|
||||||
|
// This causes the underlying transaction to not be committed. In order
|
||||||
|
// to make sure we clear the previous coordinators key, we have to use
|
||||||
|
// a new transaction here.
|
||||||
|
state Reference<ISingleThreadTransaction> clearTr =
|
||||||
|
ISingleThreadTransaction::create(ISingleThreadTransaction::Type::RYW, cx);
|
||||||
|
try {
|
||||||
|
clearTr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||||
|
clearTr->clear(previousCoordinatorsKey);
|
||||||
|
wait(clearTr->commit());
|
||||||
|
return Void();
|
||||||
|
} catch (Error& e2) {
|
||||||
|
wait(clearTr->onError(e2));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
|
ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
|
||||||
ClusterConnectionString* conn,
|
ClusterConnectionString* conn,
|
||||||
std::string newName) {
|
std::string newName) {
|
||||||
|
TraceEvent("ChangeQuorumCheckerStart").detail("NewConnectionString", conn->toString());
|
||||||
state Optional<ClusterConnectionString> clusterConnectionStringOptional =
|
state Optional<ClusterConnectionString> clusterConnectionStringOptional =
|
||||||
wait(getClusterConnectionStringFromStorageServer(tr));
|
wait(getClusterConnectionStringFromStorageServer(tr));
|
||||||
|
|
||||||
|
@ -892,7 +938,7 @@ ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
|
||||||
conn->hostnames = old.hostnames;
|
conn->hostnames = old.hostnames;
|
||||||
conn->coords = old.coords;
|
conn->coords = old.coords;
|
||||||
}
|
}
|
||||||
std::vector<NetworkAddress> desiredCoordinators = wait(conn->tryResolveHostnames());
|
state std::vector<NetworkAddress> desiredCoordinators = wait(conn->tryResolveHostnames());
|
||||||
if (desiredCoordinators.size() != conn->hostnames.size() + conn->coords.size()) {
|
if (desiredCoordinators.size() != conn->hostnames.size() + conn->coords.size()) {
|
||||||
TraceEvent("ChangeQuorumCheckerEarlyTermination")
|
TraceEvent("ChangeQuorumCheckerEarlyTermination")
|
||||||
.detail("Reason", "One or more hostnames are unresolvable")
|
.detail("Reason", "One or more hostnames are unresolvable")
|
||||||
|
@ -909,6 +955,8 @@ ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
|
||||||
std::sort(old.coords.begin(), old.coords.end());
|
std::sort(old.coords.begin(), old.coords.end());
|
||||||
if (conn->hostnames == old.hostnames && conn->coords == old.coords && old.clusterKeyName() == newName) {
|
if (conn->hostnames == old.hostnames && conn->coords == old.coords && old.clusterKeyName() == newName) {
|
||||||
connectionStrings.clear();
|
connectionStrings.clear();
|
||||||
|
wait(verifyConfigurationDatabaseAlive(tr->getDatabase()));
|
||||||
|
wait(resetPreviousCoordinatorsKey(tr->getDatabase()));
|
||||||
return CoordinatorsResult::SAME_NETWORK_ADDRESSES;
|
return CoordinatorsResult::SAME_NETWORK_ADDRESSES;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -958,6 +1006,9 @@ ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
|
||||||
when(wait(waitForAll(leaderServers))) {}
|
when(wait(waitForAll(leaderServers))) {}
|
||||||
when(wait(delay(5.0))) { return CoordinatorsResult::COORDINATOR_UNREACHABLE; }
|
when(wait(delay(5.0))) { return CoordinatorsResult::COORDINATOR_UNREACHABLE; }
|
||||||
}
|
}
|
||||||
|
TraceEvent("ChangeQuorumCheckerSetCoordinatorsKey")
|
||||||
|
.detail("CurrentCoordinators", old.toString())
|
||||||
|
.detail("NewCoordinators", conn->toString());
|
||||||
tr->set(coordinatorsKey, conn->toString());
|
tr->set(coordinatorsKey, conn->toString());
|
||||||
return Optional<CoordinatorsResult>();
|
return Optional<CoordinatorsResult>();
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "fdbclient/DatabaseContext.h"
|
#include "fdbclient/DatabaseContext.h"
|
||||||
|
#include "fdbclient/MonitorLeader.h"
|
||||||
#include "fdbclient/PaxosConfigTransaction.h"
|
#include "fdbclient/PaxosConfigTransaction.h"
|
||||||
#include "flow/actorcompiler.h" // must be last include
|
#include "flow/actorcompiler.h" // must be last include
|
||||||
|
|
||||||
|
@ -34,8 +35,8 @@ class CommitQuorum {
|
||||||
Standalone<VectorRef<ConfigMutationRef>> mutations;
|
Standalone<VectorRef<ConfigMutationRef>> mutations;
|
||||||
ConfigCommitAnnotation annotation;
|
ConfigCommitAnnotation annotation;
|
||||||
|
|
||||||
ConfigTransactionCommitRequest getCommitRequest(ConfigGeneration generation) const {
|
ConfigTransactionCommitRequest getCommitRequest(ConfigGeneration generation, size_t coordinatorsHash) const {
|
||||||
return ConfigTransactionCommitRequest(generation, mutations, annotation);
|
return ConfigTransactionCommitRequest(coordinatorsHash, generation, mutations, annotation);
|
||||||
}
|
}
|
||||||
|
|
||||||
void updateResult() {
|
void updateResult() {
|
||||||
|
@ -62,14 +63,16 @@ class CommitQuorum {
|
||||||
|
|
||||||
ACTOR static Future<Void> addRequestActor(CommitQuorum* self,
|
ACTOR static Future<Void> addRequestActor(CommitQuorum* self,
|
||||||
ConfigGeneration generation,
|
ConfigGeneration generation,
|
||||||
|
size_t coordinatorsHash,
|
||||||
ConfigTransactionInterface cti) {
|
ConfigTransactionInterface cti) {
|
||||||
try {
|
try {
|
||||||
if (cti.hostname.present()) {
|
if (cti.hostname.present()) {
|
||||||
wait(timeoutError(retryGetReplyFromHostname(
|
wait(timeoutError(retryGetReplyFromHostname(self->getCommitRequest(generation, coordinatorsHash),
|
||||||
self->getCommitRequest(generation), cti.hostname.get(), WLTOKEN_CONFIGTXN_COMMIT),
|
cti.hostname.get(),
|
||||||
|
WLTOKEN_CONFIGTXN_COMMIT),
|
||||||
CLIENT_KNOBS->COMMIT_QUORUM_TIMEOUT));
|
CLIENT_KNOBS->COMMIT_QUORUM_TIMEOUT));
|
||||||
} else {
|
} else {
|
||||||
wait(timeoutError(cti.commit.getReply(self->getCommitRequest(generation)),
|
wait(timeoutError(cti.commit.getReply(self->getCommitRequest(generation, coordinatorsHash)),
|
||||||
CLIENT_KNOBS->COMMIT_QUORUM_TIMEOUT));
|
CLIENT_KNOBS->COMMIT_QUORUM_TIMEOUT));
|
||||||
}
|
}
|
||||||
++self->successful;
|
++self->successful;
|
||||||
|
@ -109,11 +112,11 @@ public:
|
||||||
}
|
}
|
||||||
void setTimestamp() { annotation.timestamp = now(); }
|
void setTimestamp() { annotation.timestamp = now(); }
|
||||||
size_t expectedSize() const { return annotation.expectedSize() + mutations.expectedSize(); }
|
size_t expectedSize() const { return annotation.expectedSize() + mutations.expectedSize(); }
|
||||||
Future<Void> commit(ConfigGeneration generation) {
|
Future<Void> commit(ConfigGeneration generation, size_t coordinatorsHash) {
|
||||||
// Send commit message to all replicas, even those that did not return the used replica.
|
// Send commit message to all replicas, even those that did not return the used replica.
|
||||||
// This way, slow replicas are kept up date.
|
// This way, slow replicas are kept up date.
|
||||||
for (const auto& cti : ctis) {
|
for (const auto& cti : ctis) {
|
||||||
actors.add(addRequestActor(this, generation, cti));
|
actors.add(addRequestActor(this, generation, coordinatorsHash, cti));
|
||||||
}
|
}
|
||||||
return result.getFuture();
|
return result.getFuture();
|
||||||
}
|
}
|
||||||
|
@ -122,11 +125,13 @@ public:
|
||||||
|
|
||||||
class GetGenerationQuorum {
|
class GetGenerationQuorum {
|
||||||
ActorCollection actors{ false };
|
ActorCollection actors{ false };
|
||||||
|
size_t coordinatorsHash;
|
||||||
std::vector<ConfigTransactionInterface> ctis;
|
std::vector<ConfigTransactionInterface> ctis;
|
||||||
std::map<ConfigGeneration, std::vector<ConfigTransactionInterface>> seenGenerations;
|
std::map<ConfigGeneration, std::vector<ConfigTransactionInterface>> seenGenerations;
|
||||||
Promise<ConfigGeneration> result;
|
Promise<ConfigGeneration> result;
|
||||||
size_t totalRepliesReceived{ 0 };
|
size_t totalRepliesReceived{ 0 };
|
||||||
size_t maxAgreement{ 0 };
|
size_t maxAgreement{ 0 };
|
||||||
|
Future<Void> coordinatorsChangedFuture;
|
||||||
Optional<Version> lastSeenLiveVersion;
|
Optional<Version> lastSeenLiveVersion;
|
||||||
Future<ConfigGeneration> getGenerationFuture;
|
Future<ConfigGeneration> getGenerationFuture;
|
||||||
|
|
||||||
|
@ -137,14 +142,15 @@ class GetGenerationQuorum {
|
||||||
if (cti.hostname.present()) {
|
if (cti.hostname.present()) {
|
||||||
wait(timeoutError(store(reply,
|
wait(timeoutError(store(reply,
|
||||||
retryGetReplyFromHostname(
|
retryGetReplyFromHostname(
|
||||||
ConfigTransactionGetGenerationRequest{ self->lastSeenLiveVersion },
|
ConfigTransactionGetGenerationRequest{ self->coordinatorsHash,
|
||||||
|
self->lastSeenLiveVersion },
|
||||||
cti.hostname.get(),
|
cti.hostname.get(),
|
||||||
WLTOKEN_CONFIGTXN_GETGENERATION)),
|
WLTOKEN_CONFIGTXN_GETGENERATION)),
|
||||||
CLIENT_KNOBS->GET_GENERATION_QUORUM_TIMEOUT));
|
CLIENT_KNOBS->GET_GENERATION_QUORUM_TIMEOUT));
|
||||||
} else {
|
} else {
|
||||||
wait(timeoutError(store(reply,
|
wait(timeoutError(store(reply,
|
||||||
cti.getGeneration.getReply(
|
cti.getGeneration.getReply(ConfigTransactionGetGenerationRequest{
|
||||||
ConfigTransactionGetGenerationRequest{ self->lastSeenLiveVersion })),
|
self->coordinatorsHash, self->lastSeenLiveVersion })),
|
||||||
CLIENT_KNOBS->GET_GENERATION_QUORUM_TIMEOUT));
|
CLIENT_KNOBS->GET_GENERATION_QUORUM_TIMEOUT));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -155,6 +161,14 @@ class GetGenerationQuorum {
|
||||||
auto& replicas = self->seenGenerations[gen];
|
auto& replicas = self->seenGenerations[gen];
|
||||||
replicas.push_back(cti);
|
replicas.push_back(cti);
|
||||||
self->maxAgreement = std::max(replicas.size(), self->maxAgreement);
|
self->maxAgreement = std::max(replicas.size(), self->maxAgreement);
|
||||||
|
// TraceEvent("ConfigTransactionGotGenerationReply")
|
||||||
|
// .detail("From", cti.getGeneration.getEndpoint().getPrimaryAddress())
|
||||||
|
// .detail("TotalRepliesReceived", self->totalRepliesReceived)
|
||||||
|
// .detail("ReplyGeneration", gen.toString())
|
||||||
|
// .detail("Replicas", replicas.size())
|
||||||
|
// .detail("Coordinators", self->ctis.size())
|
||||||
|
// .detail("MaxAgreement", self->maxAgreement)
|
||||||
|
// .detail("LastSeenLiveVersion", self->lastSeenLiveVersion);
|
||||||
if (replicas.size() >= self->ctis.size() / 2 + 1 && !self->result.isSet()) {
|
if (replicas.size() >= self->ctis.size() / 2 + 1 && !self->result.isSet()) {
|
||||||
self->result.send(gen);
|
self->result.send(gen);
|
||||||
} else if (self->maxAgreement + (self->ctis.size() - self->totalRepliesReceived) <
|
} else if (self->maxAgreement + (self->ctis.size() - self->totalRepliesReceived) <
|
||||||
|
@ -200,8 +214,18 @@ class GetGenerationQuorum {
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
if (e.code() == error_code_failed_to_reach_quorum) {
|
if (e.code() == error_code_failed_to_reach_quorum) {
|
||||||
CODE_PROBE(true, "Failed to reach quorum getting generation");
|
CODE_PROBE(true, "Failed to reach quorum getting generation");
|
||||||
|
if (self->coordinatorsChangedFuture.isReady()) {
|
||||||
|
throw coordinators_changed();
|
||||||
|
}
|
||||||
wait(delayJittered(
|
wait(delayJittered(
|
||||||
std::clamp(0.005 * (1 << retries), 0.0, CLIENT_KNOBS->TIMEOUT_RETRY_UPPER_BOUND)));
|
std::clamp(0.005 * (1 << retries), 0.0, CLIENT_KNOBS->TIMEOUT_RETRY_UPPER_BOUND)));
|
||||||
|
if (deterministicRandom()->random01() < 0.05) {
|
||||||
|
// Randomly inject a delay of at least the generation
|
||||||
|
// reply timeout, to try to prevent contention between
|
||||||
|
// clients.
|
||||||
|
wait(delay(CLIENT_KNOBS->GET_GENERATION_QUORUM_TIMEOUT *
|
||||||
|
(deterministicRandom()->random01() + 1.0)));
|
||||||
|
}
|
||||||
++retries;
|
++retries;
|
||||||
self->actors.clear(false);
|
self->actors.clear(false);
|
||||||
self->seenGenerations.clear();
|
self->seenGenerations.clear();
|
||||||
|
@ -217,9 +241,12 @@ class GetGenerationQuorum {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GetGenerationQuorum() = default;
|
GetGenerationQuorum() = default;
|
||||||
explicit GetGenerationQuorum(std::vector<ConfigTransactionInterface> const& ctis,
|
explicit GetGenerationQuorum(size_t coordinatorsHash,
|
||||||
|
std::vector<ConfigTransactionInterface> const& ctis,
|
||||||
|
Future<Void> coordinatorsChangedFuture,
|
||||||
Optional<Version> const& lastSeenLiveVersion = {})
|
Optional<Version> const& lastSeenLiveVersion = {})
|
||||||
: ctis(ctis), lastSeenLiveVersion(lastSeenLiveVersion) {}
|
: coordinatorsHash(coordinatorsHash), ctis(ctis), coordinatorsChangedFuture(coordinatorsChangedFuture),
|
||||||
|
lastSeenLiveVersion(lastSeenLiveVersion) {}
|
||||||
Future<ConfigGeneration> getGeneration() {
|
Future<ConfigGeneration> getGeneration() {
|
||||||
if (!getGenerationFuture.isValid()) {
|
if (!getGenerationFuture.isValid()) {
|
||||||
getGenerationFuture = getGenerationActor(this);
|
getGenerationFuture = getGenerationActor(this);
|
||||||
|
@ -240,12 +267,14 @@ public:
|
||||||
};
|
};
|
||||||
|
|
||||||
class PaxosConfigTransactionImpl {
|
class PaxosConfigTransactionImpl {
|
||||||
|
size_t coordinatorsHash;
|
||||||
std::vector<ConfigTransactionInterface> ctis;
|
std::vector<ConfigTransactionInterface> ctis;
|
||||||
GetGenerationQuorum getGenerationQuorum;
|
GetGenerationQuorum getGenerationQuorum;
|
||||||
CommitQuorum commitQuorum;
|
CommitQuorum commitQuorum;
|
||||||
int numRetries{ 0 };
|
int numRetries{ 0 };
|
||||||
Optional<UID> dID;
|
Optional<UID> dID;
|
||||||
Database cx;
|
Database cx;
|
||||||
|
Future<Void> watchClusterFileFuture;
|
||||||
|
|
||||||
ACTOR static Future<Optional<Value>> get(PaxosConfigTransactionImpl* self, Key key) {
|
ACTOR static Future<Optional<Value>> get(PaxosConfigTransactionImpl* self, Key key) {
|
||||||
state ConfigKey configKey = ConfigKey::decodeKey(key);
|
state ConfigKey configKey = ConfigKey::decodeKey(key);
|
||||||
|
@ -263,18 +292,19 @@ class PaxosConfigTransactionImpl {
|
||||||
}
|
}
|
||||||
wait(waitForAll(fs));
|
wait(waitForAll(fs));
|
||||||
state Reference<ConfigTransactionInfo> configNodes(new ConfigTransactionInfo(readReplicas));
|
state Reference<ConfigTransactionInfo> configNodes(new ConfigTransactionInfo(readReplicas));
|
||||||
ConfigTransactionGetReply reply =
|
ConfigTransactionGetReply reply = wait(timeoutError(
|
||||||
wait(timeoutError(basicLoadBalance(configNodes,
|
basicLoadBalance(configNodes,
|
||||||
&ConfigTransactionInterface::get,
|
&ConfigTransactionInterface::get,
|
||||||
ConfigTransactionGetRequest{ generation, configKey }),
|
ConfigTransactionGetRequest{ self->coordinatorsHash, generation, configKey }),
|
||||||
CLIENT_KNOBS->GET_KNOB_TIMEOUT));
|
CLIENT_KNOBS->GET_KNOB_TIMEOUT));
|
||||||
if (reply.value.present()) {
|
if (reply.value.present()) {
|
||||||
return reply.value.get().toValue();
|
return reply.value.get().toValue();
|
||||||
} else {
|
} else {
|
||||||
return Optional<Value>{};
|
return Optional<Value>{};
|
||||||
}
|
}
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
if (e.code() != error_code_timed_out && e.code() != error_code_broken_promise) {
|
if (e.code() != error_code_timed_out && e.code() != error_code_broken_promise &&
|
||||||
|
e.code() != error_code_coordinators_changed) {
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
self->reset();
|
self->reset();
|
||||||
|
@ -283,58 +313,87 @@ class PaxosConfigTransactionImpl {
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR static Future<RangeResult> getConfigClasses(PaxosConfigTransactionImpl* self) {
|
ACTOR static Future<RangeResult> getConfigClasses(PaxosConfigTransactionImpl* self) {
|
||||||
state ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
|
loop {
|
||||||
state std::vector<ConfigTransactionInterface> readReplicas = self->getGenerationQuorum.getReadReplicas();
|
try {
|
||||||
std::vector<Future<Void>> fs;
|
state ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
|
||||||
for (ConfigTransactionInterface& readReplica : readReplicas) {
|
state std::vector<ConfigTransactionInterface> readReplicas =
|
||||||
if (readReplica.hostname.present()) {
|
self->getGenerationQuorum.getReadReplicas();
|
||||||
fs.push_back(tryInitializeRequestStream(
|
std::vector<Future<Void>> fs;
|
||||||
&readReplica.getClasses, readReplica.hostname.get(), WLTOKEN_CONFIGTXN_GETCLASSES));
|
for (ConfigTransactionInterface& readReplica : readReplicas) {
|
||||||
|
if (readReplica.hostname.present()) {
|
||||||
|
fs.push_back(tryInitializeRequestStream(
|
||||||
|
&readReplica.getClasses, readReplica.hostname.get(), WLTOKEN_CONFIGTXN_GETCLASSES));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
wait(waitForAll(fs));
|
||||||
|
state Reference<ConfigTransactionInfo> configNodes(new ConfigTransactionInfo(readReplicas));
|
||||||
|
ConfigTransactionGetConfigClassesReply reply =
|
||||||
|
wait(basicLoadBalance(configNodes,
|
||||||
|
&ConfigTransactionInterface::getClasses,
|
||||||
|
ConfigTransactionGetConfigClassesRequest{ generation }));
|
||||||
|
RangeResult result;
|
||||||
|
result.reserve(result.arena(), reply.configClasses.size());
|
||||||
|
for (const auto& configClass : reply.configClasses) {
|
||||||
|
result.push_back_deep(result.arena(), KeyValueRef(configClass, ""_sr));
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
} catch (Error& e) {
|
||||||
|
if (e.code() != error_code_coordinators_changed) {
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
self->reset();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
wait(waitForAll(fs));
|
|
||||||
state Reference<ConfigTransactionInfo> configNodes(new ConfigTransactionInfo(readReplicas));
|
|
||||||
ConfigTransactionGetConfigClassesReply reply =
|
|
||||||
wait(basicLoadBalance(configNodes,
|
|
||||||
&ConfigTransactionInterface::getClasses,
|
|
||||||
ConfigTransactionGetConfigClassesRequest{ generation }));
|
|
||||||
RangeResult result;
|
|
||||||
result.reserve(result.arena(), reply.configClasses.size());
|
|
||||||
for (const auto& configClass : reply.configClasses) {
|
|
||||||
result.push_back_deep(result.arena(), KeyValueRef(configClass, ""_sr));
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR static Future<RangeResult> getKnobs(PaxosConfigTransactionImpl* self, Optional<Key> configClass) {
|
ACTOR static Future<RangeResult> getKnobs(PaxosConfigTransactionImpl* self, Optional<Key> configClass) {
|
||||||
state ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
|
loop {
|
||||||
state std::vector<ConfigTransactionInterface> readReplicas = self->getGenerationQuorum.getReadReplicas();
|
try {
|
||||||
std::vector<Future<Void>> fs;
|
state ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
|
||||||
for (ConfigTransactionInterface& readReplica : readReplicas) {
|
state std::vector<ConfigTransactionInterface> readReplicas =
|
||||||
if (readReplica.hostname.present()) {
|
self->getGenerationQuorum.getReadReplicas();
|
||||||
fs.push_back(tryInitializeRequestStream(
|
std::vector<Future<Void>> fs;
|
||||||
&readReplica.getKnobs, readReplica.hostname.get(), WLTOKEN_CONFIGTXN_GETKNOBS));
|
for (ConfigTransactionInterface& readReplica : readReplicas) {
|
||||||
|
if (readReplica.hostname.present()) {
|
||||||
|
fs.push_back(tryInitializeRequestStream(
|
||||||
|
&readReplica.getKnobs, readReplica.hostname.get(), WLTOKEN_CONFIGTXN_GETKNOBS));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
wait(waitForAll(fs));
|
||||||
|
state Reference<ConfigTransactionInfo> configNodes(new ConfigTransactionInfo(readReplicas));
|
||||||
|
ConfigTransactionGetKnobsReply reply =
|
||||||
|
wait(basicLoadBalance(configNodes,
|
||||||
|
&ConfigTransactionInterface::getKnobs,
|
||||||
|
ConfigTransactionGetKnobsRequest{ generation, configClass }));
|
||||||
|
RangeResult result;
|
||||||
|
result.reserve(result.arena(), reply.knobNames.size());
|
||||||
|
for (const auto& knobName : reply.knobNames) {
|
||||||
|
result.push_back_deep(result.arena(), KeyValueRef(knobName, ""_sr));
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
} catch (Error& e) {
|
||||||
|
if (e.code() != error_code_coordinators_changed) {
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
self->reset();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
wait(waitForAll(fs));
|
|
||||||
state Reference<ConfigTransactionInfo> configNodes(new ConfigTransactionInfo(readReplicas));
|
|
||||||
ConfigTransactionGetKnobsReply reply =
|
|
||||||
wait(basicLoadBalance(configNodes,
|
|
||||||
&ConfigTransactionInterface::getKnobs,
|
|
||||||
ConfigTransactionGetKnobsRequest{ generation, configClass }));
|
|
||||||
RangeResult result;
|
|
||||||
result.reserve(result.arena(), reply.knobNames.size());
|
|
||||||
for (const auto& knobName : reply.knobNames) {
|
|
||||||
result.push_back_deep(result.arena(), KeyValueRef(knobName, ""_sr));
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR static Future<Void> commit(PaxosConfigTransactionImpl* self) {
|
ACTOR static Future<Void> commit(PaxosConfigTransactionImpl* self) {
|
||||||
ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
|
loop {
|
||||||
self->commitQuorum.setTimestamp();
|
try {
|
||||||
wait(self->commitQuorum.commit(generation));
|
ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
|
||||||
return Void();
|
self->commitQuorum.setTimestamp();
|
||||||
|
wait(self->commitQuorum.commit(generation, self->coordinatorsHash));
|
||||||
|
return Void();
|
||||||
|
} catch (Error& e) {
|
||||||
|
if (e.code() != error_code_coordinators_changed) {
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
self->reset();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR static Future<Void> onError(PaxosConfigTransactionImpl* self, Error e) {
|
ACTOR static Future<Void> onError(PaxosConfigTransactionImpl* self, Error e) {
|
||||||
|
@ -350,6 +409,24 @@ class PaxosConfigTransactionImpl {
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Returns when the cluster interface updates with a new connection string.
|
||||||
|
ACTOR static Future<Void> watchClusterFile(Database cx) {
|
||||||
|
state Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface(
|
||||||
|
new AsyncVar<Optional<ClusterInterface>>);
|
||||||
|
state Future<Void> _ = monitorLeader<ClusterInterface>(cx->getConnectionRecord(), clusterInterface);
|
||||||
|
state std::string connectionString = cx->getConnectionRecord()->getConnectionString().toString();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
choose {
|
||||||
|
when(wait(clusterInterface->onChange())) {
|
||||||
|
if (cx->getConnectionRecord()->getConnectionString().toString() != connectionString) {
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Future<Version> getReadVersion() {
|
Future<Version> getReadVersion() {
|
||||||
return map(getGenerationQuorum.getGeneration(), [](auto const& gen) { return gen.committedVersion; });
|
return map(getGenerationQuorum.getGeneration(), [](auto const& gen) { return gen.committedVersion; });
|
||||||
|
@ -395,7 +472,21 @@ public:
|
||||||
void debugTransaction(UID dID) { this->dID = dID; }
|
void debugTransaction(UID dID) { this->dID = dID; }
|
||||||
|
|
||||||
void reset() {
|
void reset() {
|
||||||
getGenerationQuorum = GetGenerationQuorum{ ctis };
|
ctis.clear();
|
||||||
|
// Re-read connection string. If the cluster file changed, this will
|
||||||
|
// return the updated value.
|
||||||
|
const ClusterConnectionString& cs = cx->getConnectionRecord()->getConnectionString();
|
||||||
|
ctis.reserve(cs.hostnames.size() + cs.coords.size());
|
||||||
|
for (const auto& h : cs.hostnames) {
|
||||||
|
ctis.emplace_back(h);
|
||||||
|
}
|
||||||
|
for (const auto& c : cs.coords) {
|
||||||
|
ctis.emplace_back(c);
|
||||||
|
}
|
||||||
|
coordinatorsHash = std::hash<std::string>()(cx->getConnectionRecord()->getConnectionString().toString());
|
||||||
|
getGenerationQuorum = GetGenerationQuorum{
|
||||||
|
coordinatorsHash, ctis, watchClusterFile(cx), getGenerationQuorum.getLastSeenLiveVersion()
|
||||||
|
};
|
||||||
commitQuorum = CommitQuorum{ ctis };
|
commitQuorum = CommitQuorum{ ctis };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -416,21 +507,10 @@ public:
|
||||||
|
|
||||||
Future<Void> commit() { return commit(this); }
|
Future<Void> commit() { return commit(this); }
|
||||||
|
|
||||||
PaxosConfigTransactionImpl(Database const& cx) : cx(cx) {
|
PaxosConfigTransactionImpl(Database const& cx) : cx(cx) { reset(); }
|
||||||
const ClusterConnectionString& cs = cx->getConnectionRecord()->getConnectionString();
|
|
||||||
ctis.reserve(cs.hostnames.size() + cs.coords.size());
|
|
||||||
for (const auto& h : cs.hostnames) {
|
|
||||||
ctis.emplace_back(h);
|
|
||||||
}
|
|
||||||
for (const auto& c : cs.coords) {
|
|
||||||
ctis.emplace_back(c);
|
|
||||||
}
|
|
||||||
getGenerationQuorum = GetGenerationQuorum{ ctis };
|
|
||||||
commitQuorum = CommitQuorum{ ctis };
|
|
||||||
}
|
|
||||||
|
|
||||||
PaxosConfigTransactionImpl(std::vector<ConfigTransactionInterface> const& ctis)
|
PaxosConfigTransactionImpl(std::vector<ConfigTransactionInterface> const& ctis)
|
||||||
: ctis(ctis), getGenerationQuorum(ctis), commitQuorum(ctis) {}
|
: ctis(ctis), getGenerationQuorum(0, ctis, Future<Void>()), commitQuorum(ctis) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
Future<Version> PaxosConfigTransaction::getReadVersion() {
|
Future<Version> PaxosConfigTransaction::getReadVersion() {
|
||||||
|
|
|
@ -70,11 +70,12 @@ class SimpleConfigTransactionImpl {
|
||||||
state ConfigTransactionGetReply reply;
|
state ConfigTransactionGetReply reply;
|
||||||
if (self->cti.hostname.present()) {
|
if (self->cti.hostname.present()) {
|
||||||
wait(store(reply,
|
wait(store(reply,
|
||||||
retryGetReplyFromHostname(ConfigTransactionGetRequest{ generation, configKey },
|
retryGetReplyFromHostname(ConfigTransactionGetRequest{ 0, generation, configKey },
|
||||||
self->cti.hostname.get(),
|
self->cti.hostname.get(),
|
||||||
WLTOKEN_CONFIGTXN_GET)));
|
WLTOKEN_CONFIGTXN_GET)));
|
||||||
} else {
|
} else {
|
||||||
wait(store(reply, retryBrokenPromise(self->cti.get, ConfigTransactionGetRequest{ generation, configKey })));
|
wait(store(reply,
|
||||||
|
retryBrokenPromise(self->cti.get, ConfigTransactionGetRequest{ 0, generation, configKey })));
|
||||||
}
|
}
|
||||||
if (self->dID.present()) {
|
if (self->dID.present()) {
|
||||||
TraceEvent("SimpleConfigTransactionGotValue", self->dID.get())
|
TraceEvent("SimpleConfigTransactionGotValue", self->dID.get())
|
||||||
|
|
|
@ -1002,6 +1002,7 @@ std::vector<std::pair<UID, Version>> decodeBackupStartedValue(const ValueRef& va
|
||||||
return ids;
|
return ids;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const KeyRef previousCoordinatorsKey = LiteralStringRef("\xff/previousCoordinators");
|
||||||
const KeyRef coordinatorsKey = LiteralStringRef("\xff/coordinators");
|
const KeyRef coordinatorsKey = LiteralStringRef("\xff/coordinators");
|
||||||
const KeyRef logsKey = LiteralStringRef("\xff/logs");
|
const KeyRef logsKey = LiteralStringRef("\xff/logs");
|
||||||
const KeyRef minRequiredCommitVersionKey = LiteralStringRef("\xff/minRequiredCommitVersion");
|
const KeyRef minRequiredCommitVersionKey = LiteralStringRef("\xff/minRequiredCommitVersion");
|
||||||
|
|
|
@ -55,7 +55,7 @@ Reference<ITenant> ThreadSafeDatabase::openTenant(TenantNameRef tenantName) {
|
||||||
}
|
}
|
||||||
|
|
||||||
Reference<ITransaction> ThreadSafeDatabase::createTransaction() {
|
Reference<ITransaction> ThreadSafeDatabase::createTransaction() {
|
||||||
auto type = isConfigDB ? ISingleThreadTransaction::Type::SIMPLE_CONFIG : ISingleThreadTransaction::Type::RYW;
|
auto type = isConfigDB ? ISingleThreadTransaction::Type::PAXOS_CONFIG : ISingleThreadTransaction::Type::RYW;
|
||||||
return Reference<ITransaction>(new ThreadSafeTransaction(db, type, Optional<TenantName>()));
|
return Reference<ITransaction>(new ThreadSafeTransaction(db, type, Optional<TenantName>()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -224,7 +224,7 @@ ThreadSafeDatabase::~ThreadSafeDatabase() {
|
||||||
}
|
}
|
||||||
|
|
||||||
Reference<ITransaction> ThreadSafeTenant::createTransaction() {
|
Reference<ITransaction> ThreadSafeTenant::createTransaction() {
|
||||||
auto type = db->isConfigDB ? ISingleThreadTransaction::Type::SIMPLE_CONFIG : ISingleThreadTransaction::Type::RYW;
|
auto type = db->isConfigDB ? ISingleThreadTransaction::Type::PAXOS_CONFIG : ISingleThreadTransaction::Type::RYW;
|
||||||
return Reference<ITransaction>(new ThreadSafeTransaction(db->db, type, name));
|
return Reference<ITransaction>(new ThreadSafeTransaction(db->db, type, name));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -65,16 +65,18 @@ struct ConfigTransactionGetGenerationReply {
|
||||||
|
|
||||||
struct ConfigTransactionGetGenerationRequest {
|
struct ConfigTransactionGetGenerationRequest {
|
||||||
static constexpr FileIdentifier file_identifier = 138941;
|
static constexpr FileIdentifier file_identifier = 138941;
|
||||||
|
size_t coordinatorsHash;
|
||||||
// A hint to catch up lagging nodes:
|
// A hint to catch up lagging nodes:
|
||||||
Optional<Version> lastSeenLiveVersion;
|
Optional<Version> lastSeenLiveVersion;
|
||||||
ReplyPromise<ConfigTransactionGetGenerationReply> reply;
|
ReplyPromise<ConfigTransactionGetGenerationReply> reply;
|
||||||
ConfigTransactionGetGenerationRequest() = default;
|
ConfigTransactionGetGenerationRequest() = default;
|
||||||
explicit ConfigTransactionGetGenerationRequest(Optional<Version> const& lastSeenLiveVersion)
|
explicit ConfigTransactionGetGenerationRequest(size_t coordinatorsHash,
|
||||||
: lastSeenLiveVersion(lastSeenLiveVersion) {}
|
Optional<Version> const& lastSeenLiveVersion)
|
||||||
|
: coordinatorsHash(coordinatorsHash), lastSeenLiveVersion(lastSeenLiveVersion) {}
|
||||||
|
|
||||||
template <class Ar>
|
template <class Ar>
|
||||||
void serialize(Ar& ar) {
|
void serialize(Ar& ar) {
|
||||||
serializer(ar, lastSeenLiveVersion, reply);
|
serializer(ar, coordinatorsHash, lastSeenLiveVersion, reply);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -92,39 +94,43 @@ struct ConfigTransactionGetReply {
|
||||||
|
|
||||||
struct ConfigTransactionGetRequest {
|
struct ConfigTransactionGetRequest {
|
||||||
static constexpr FileIdentifier file_identifier = 923040;
|
static constexpr FileIdentifier file_identifier = 923040;
|
||||||
|
size_t coordinatorsHash;
|
||||||
ConfigGeneration generation;
|
ConfigGeneration generation;
|
||||||
ConfigKey key;
|
ConfigKey key;
|
||||||
ReplyPromise<ConfigTransactionGetReply> reply;
|
ReplyPromise<ConfigTransactionGetReply> reply;
|
||||||
|
|
||||||
ConfigTransactionGetRequest() = default;
|
ConfigTransactionGetRequest() = default;
|
||||||
explicit ConfigTransactionGetRequest(ConfigGeneration generation, ConfigKey key)
|
explicit ConfigTransactionGetRequest(size_t coordinatorsHash, ConfigGeneration generation, ConfigKey key)
|
||||||
: generation(generation), key(key) {}
|
: coordinatorsHash(coordinatorsHash), generation(generation), key(key) {}
|
||||||
|
|
||||||
template <class Ar>
|
template <class Ar>
|
||||||
void serialize(Ar& ar) {
|
void serialize(Ar& ar) {
|
||||||
serializer(ar, generation, key, reply);
|
serializer(ar, coordinatorsHash, generation, key, reply);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ConfigTransactionCommitRequest {
|
struct ConfigTransactionCommitRequest {
|
||||||
static constexpr FileIdentifier file_identifier = 103841;
|
static constexpr FileIdentifier file_identifier = 103841;
|
||||||
Arena arena;
|
Arena arena;
|
||||||
|
size_t coordinatorsHash;
|
||||||
ConfigGeneration generation{ ::invalidVersion, ::invalidVersion };
|
ConfigGeneration generation{ ::invalidVersion, ::invalidVersion };
|
||||||
VectorRef<ConfigMutationRef> mutations;
|
VectorRef<ConfigMutationRef> mutations;
|
||||||
ConfigCommitAnnotationRef annotation;
|
ConfigCommitAnnotationRef annotation;
|
||||||
ReplyPromise<Void> reply;
|
ReplyPromise<Void> reply;
|
||||||
|
|
||||||
ConfigTransactionCommitRequest() = default;
|
ConfigTransactionCommitRequest() = default;
|
||||||
explicit ConfigTransactionCommitRequest(ConfigGeneration generation,
|
explicit ConfigTransactionCommitRequest(size_t coordinatorsHash,
|
||||||
|
ConfigGeneration generation,
|
||||||
VectorRef<ConfigMutationRef> mutations,
|
VectorRef<ConfigMutationRef> mutations,
|
||||||
ConfigCommitAnnotationRef annotation)
|
ConfigCommitAnnotationRef annotation)
|
||||||
: generation(generation), mutations(arena, mutations), annotation(arena, annotation) {}
|
: coordinatorsHash(coordinatorsHash), generation(generation), mutations(arena, mutations),
|
||||||
|
annotation(arena, annotation) {}
|
||||||
|
|
||||||
size_t expectedSize() const { return mutations.expectedSize() + annotation.expectedSize(); }
|
size_t expectedSize() const { return mutations.expectedSize() + annotation.expectedSize(); }
|
||||||
|
|
||||||
template <class Ar>
|
template <class Ar>
|
||||||
void serialize(Ar& ar) {
|
void serialize(Ar& ar) {
|
||||||
serializer(ar, generation, mutations, annotation, reply, arena);
|
serializer(ar, coordinatorsHash, generation, mutations, annotation, reply, arena);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -377,6 +377,12 @@ std::vector<std::pair<UID, Version>> decodeBackupStartedValue(const ValueRef& va
|
||||||
// 1 = Send a signal to pause/already paused.
|
// 1 = Send a signal to pause/already paused.
|
||||||
extern const KeyRef backupPausedKey;
|
extern const KeyRef backupPausedKey;
|
||||||
|
|
||||||
|
// "\xff/previousCoordinators" = "[[ClusterConnectionString]]"
|
||||||
|
// Set to the encoded structure of the cluster's previous set of coordinators.
|
||||||
|
// Changed when performing quorumChange.
|
||||||
|
// See "CoordinationInterface.h" struct ClusterConnectionString for more details
|
||||||
|
extern const KeyRef previousCoordinatorsKey;
|
||||||
|
|
||||||
// "\xff/coordinators" = "[[ClusterConnectionString]]"
|
// "\xff/coordinators" = "[[ClusterConnectionString]]"
|
||||||
// Set to the encoded structure of the cluster's current set of coordinators.
|
// Set to the encoded structure of the cluster's current set of coordinators.
|
||||||
// Changed when performing quorumChange.
|
// Changed when performing quorumChange.
|
||||||
|
|
|
@ -48,8 +48,9 @@ enum WellKnownEndpoints {
|
||||||
WLTOKEN_CONFIGFOLLOWER_COMPACT, // 20
|
WLTOKEN_CONFIGFOLLOWER_COMPACT, // 20
|
||||||
WLTOKEN_CONFIGFOLLOWER_ROLLFORWARD, // 21
|
WLTOKEN_CONFIGFOLLOWER_ROLLFORWARD, // 21
|
||||||
WLTOKEN_CONFIGFOLLOWER_GETCOMMITTEDVERSION, // 22
|
WLTOKEN_CONFIGFOLLOWER_GETCOMMITTEDVERSION, // 22
|
||||||
WLTOKEN_PROCESS, // 23
|
WLTOKEN_CONFIGFOLLOWER_LOCK, // 23
|
||||||
WLTOKEN_RESERVED_COUNT // 24
|
WLTOKEN_PROCESS, // 24
|
||||||
|
WLTOKEN_RESERVED_COUNT // 25
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert(WLTOKEN_PROTOCOL_INFO ==
|
static_assert(WLTOKEN_PROTOCOL_INFO ==
|
||||||
|
|
|
@ -327,7 +327,8 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
void checkSetConfigKeys(MutationRef m) {
|
void checkSetConfigKeys(MutationRef m) {
|
||||||
if (!m.param1.startsWith(configKeysPrefix) && m.param1 != coordinatorsKey) {
|
if (!m.param1.startsWith(configKeysPrefix) && m.param1 != coordinatorsKey &&
|
||||||
|
m.param1 != previousCoordinatorsKey) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (Optional<StringRef>(m.param2) !=
|
if (Optional<StringRef>(m.param2) !=
|
||||||
|
@ -343,7 +344,8 @@ private:
|
||||||
TraceEvent("MutationRequiresRestart", dbgid)
|
TraceEvent("MutationRequiresRestart", dbgid)
|
||||||
.detail("M", m)
|
.detail("M", m)
|
||||||
.detail("PrevValue", t.orDefault("(none)"_sr))
|
.detail("PrevValue", t.orDefault("(none)"_sr))
|
||||||
.detail("ToCommit", toCommit != nullptr);
|
.detail("ToCommit", toCommit != nullptr)
|
||||||
|
.detail("InitialCommit", initialCommit);
|
||||||
confChange = true;
|
confChange = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1116,6 +1118,9 @@ private:
|
||||||
if (initialCommit) {
|
if (initialCommit) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (range.contains(previousCoordinatorsKey)) {
|
||||||
|
txnStateStore->clear(singleKeyRange(previousCoordinatorsKey));
|
||||||
|
}
|
||||||
if (range.contains(coordinatorsKey)) {
|
if (range.contains(coordinatorsKey)) {
|
||||||
txnStateStore->clear(singleKeyRange(coordinatorsKey));
|
txnStateStore->clear(singleKeyRange(coordinatorsKey));
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,7 +43,6 @@
|
||||||
#include "fdbserver/ClusterRecovery.actor.h"
|
#include "fdbserver/ClusterRecovery.actor.h"
|
||||||
#include "fdbserver/DataDistributorInterface.h"
|
#include "fdbserver/DataDistributorInterface.h"
|
||||||
#include "fdbserver/DBCoreState.h"
|
#include "fdbserver/DBCoreState.h"
|
||||||
#include "fdbserver/ConfigBroadcaster.h"
|
|
||||||
#include "fdbserver/MoveKeys.actor.h"
|
#include "fdbserver/MoveKeys.actor.h"
|
||||||
#include "fdbserver/LeaderElection.h"
|
#include "fdbserver/LeaderElection.h"
|
||||||
#include "fdbserver/LogSystem.h"
|
#include "fdbserver/LogSystem.h"
|
||||||
|
@ -196,6 +195,21 @@ struct EncryptKeyProxySingleton : Singleton<EncryptKeyProxyInterface> {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
ACTOR Future<Optional<Value>> getPreviousCoordinators(ClusterControllerData* self) {
|
||||||
|
state ReadYourWritesTransaction tr(self->db.db);
|
||||||
|
loop {
|
||||||
|
try {
|
||||||
|
tr.setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
||||||
|
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||||
|
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||||
|
Optional<Value> previousCoordinators = wait(tr.get(previousCoordinatorsKey));
|
||||||
|
return previousCoordinators;
|
||||||
|
} catch (Error& e) {
|
||||||
|
wait(tr.onError(e));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> clusterWatchDatabase(ClusterControllerData* cluster,
|
ACTOR Future<Void> clusterWatchDatabase(ClusterControllerData* cluster,
|
||||||
ClusterControllerData::DBInfo* db,
|
ClusterControllerData::DBInfo* db,
|
||||||
ServerCoordinators coordinators,
|
ServerCoordinators coordinators,
|
||||||
|
@ -1209,13 +1223,14 @@ ACTOR Future<Void> registerWorker(RegisterWorkerRequest req,
|
||||||
w.locality.processId() == self->db.serverInfo->get().master.locality.processId()) {
|
w.locality.processId() == self->db.serverInfo->get().master.locality.processId()) {
|
||||||
self->masterProcessId = w.locality.processId();
|
self->masterProcessId = w.locality.processId();
|
||||||
}
|
}
|
||||||
if (configBroadcaster != nullptr && isCoordinator) {
|
if (configBroadcaster != nullptr) {
|
||||||
self->addActor.send(configBroadcaster->registerNode(
|
self->addActor.send(configBroadcaster->registerNode(
|
||||||
w,
|
w,
|
||||||
req.lastSeenKnobVersion,
|
req.lastSeenKnobVersion,
|
||||||
req.knobConfigClassSet,
|
req.knobConfigClassSet,
|
||||||
self->id_worker[w.locality.processId()].watcher,
|
self->id_worker[w.locality.processId()].watcher,
|
||||||
self->id_worker[w.locality.processId()].details.interf.configBroadcastInterface));
|
self->id_worker[w.locality.processId()].details.interf.configBroadcastInterface,
|
||||||
|
isCoordinator));
|
||||||
}
|
}
|
||||||
self->updateDBInfoEndpoints.insert(w.updateServerDBInfo.getEndpoint());
|
self->updateDBInfoEndpoints.insert(w.updateServerDBInfo.getEndpoint());
|
||||||
self->updateDBInfo.trigger();
|
self->updateDBInfo.trigger();
|
||||||
|
@ -1246,12 +1261,13 @@ ACTOR Future<Void> registerWorker(RegisterWorkerRequest req,
|
||||||
self->updateDBInfoEndpoints.insert(w.updateServerDBInfo.getEndpoint());
|
self->updateDBInfoEndpoints.insert(w.updateServerDBInfo.getEndpoint());
|
||||||
self->updateDBInfo.trigger();
|
self->updateDBInfo.trigger();
|
||||||
}
|
}
|
||||||
if (configBroadcaster != nullptr && isCoordinator) {
|
if (configBroadcaster != nullptr) {
|
||||||
self->addActor.send(configBroadcaster->registerNode(w,
|
self->addActor.send(configBroadcaster->registerNode(w,
|
||||||
req.lastSeenKnobVersion,
|
req.lastSeenKnobVersion,
|
||||||
req.knobConfigClassSet,
|
req.knobConfigClassSet,
|
||||||
info->second.watcher,
|
info->second.watcher,
|
||||||
info->second.details.interf.configBroadcastInterface));
|
info->second.details.interf.configBroadcastInterface,
|
||||||
|
isCoordinator));
|
||||||
}
|
}
|
||||||
checkOutstandingRequests(self);
|
checkOutstandingRequests(self);
|
||||||
} else {
|
} else {
|
||||||
|
@ -2536,10 +2552,10 @@ ACTOR Future<Void> clusterControllerCore(ClusterControllerFullInterface interf,
|
||||||
ConfigDBType configDBType,
|
ConfigDBType configDBType,
|
||||||
Future<Void> recoveredDiskFiles) {
|
Future<Void> recoveredDiskFiles) {
|
||||||
state ClusterControllerData self(interf, locality, coordinators);
|
state ClusterControllerData self(interf, locality, coordinators);
|
||||||
state ConfigBroadcaster configBroadcaster(coordinators, configDBType);
|
|
||||||
state Future<Void> coordinationPingDelay = delay(SERVER_KNOBS->WORKER_COORDINATION_PING_DELAY);
|
state Future<Void> coordinationPingDelay = delay(SERVER_KNOBS->WORKER_COORDINATION_PING_DELAY);
|
||||||
state uint64_t step = 0;
|
state uint64_t step = 0;
|
||||||
state Future<ErrorOr<Void>> error = errorOr(actorCollection(self.addActor.getFuture()));
|
state Future<ErrorOr<Void>> error = errorOr(actorCollection(self.addActor.getFuture()));
|
||||||
|
state ConfigBroadcaster configBroadcaster(coordinators, configDBType, getPreviousCoordinators(&self));
|
||||||
|
|
||||||
// EncryptKeyProxy is necessary for TLog recovery, recruit it as the first process
|
// EncryptKeyProxy is necessary for TLog recovery, recruit it as the first process
|
||||||
if (SERVER_KNOBS->ENABLE_ENCRYPTION) {
|
if (SERVER_KNOBS->ENABLE_ENCRYPTION) {
|
||||||
|
|
|
@ -521,6 +521,7 @@ ACTOR Future<Void> changeCoordinators(Reference<ClusterRecoveryData> self) {
|
||||||
loop {
|
loop {
|
||||||
ChangeCoordinatorsRequest req = waitNext(self->clusterController.changeCoordinators.getFuture());
|
ChangeCoordinatorsRequest req = waitNext(self->clusterController.changeCoordinators.getFuture());
|
||||||
TraceEvent("ChangeCoordinators", self->dbgid).log();
|
TraceEvent("ChangeCoordinators", self->dbgid).log();
|
||||||
|
|
||||||
++self->changeCoordinatorsRequests;
|
++self->changeCoordinatorsRequests;
|
||||||
state ChangeCoordinatorsRequest changeCoordinatorsRequest = req;
|
state ChangeCoordinatorsRequest changeCoordinatorsRequest = req;
|
||||||
if (self->masterInterface.id() != changeCoordinatorsRequest.masterId) {
|
if (self->masterInterface.id() != changeCoordinatorsRequest.masterId) {
|
||||||
|
@ -1637,6 +1638,11 @@ ACTOR Future<Void> clusterRecoveryCore(Reference<ClusterRecoveryData> self) {
|
||||||
tr.set(
|
tr.set(
|
||||||
recoveryCommitRequest.arena, primaryLocalityKey, BinaryWriter::toValue(self->primaryLocality, Unversioned()));
|
recoveryCommitRequest.arena, primaryLocalityKey, BinaryWriter::toValue(self->primaryLocality, Unversioned()));
|
||||||
tr.set(recoveryCommitRequest.arena, backupVersionKey, backupVersionValue);
|
tr.set(recoveryCommitRequest.arena, backupVersionKey, backupVersionValue);
|
||||||
|
Optional<Value> txnStateStoreCoords = self->txnStateStore->readValue(coordinatorsKey).get();
|
||||||
|
if (txnStateStoreCoords.present() &&
|
||||||
|
txnStateStoreCoords.get() != self->coordinators.ccr->getConnectionString().toString()) {
|
||||||
|
tr.set(recoveryCommitRequest.arena, previousCoordinatorsKey, txnStateStoreCoords.get());
|
||||||
|
}
|
||||||
tr.set(recoveryCommitRequest.arena, coordinatorsKey, self->coordinators.ccr->getConnectionString().toString());
|
tr.set(recoveryCommitRequest.arena, coordinatorsKey, self->coordinators.ccr->getConnectionString().toString());
|
||||||
tr.set(recoveryCommitRequest.arena, logsKey, self->logSystem->getLogsValue());
|
tr.set(recoveryCommitRequest.arena, logsKey, self->logSystem->getLogsValue());
|
||||||
tr.set(recoveryCommitRequest.arena,
|
tr.set(recoveryCommitRequest.arena,
|
||||||
|
|
|
@ -618,7 +618,7 @@ struct CommitBatchContext {
|
||||||
bool isMyFirstBatch;
|
bool isMyFirstBatch;
|
||||||
bool firstStateMutations;
|
bool firstStateMutations;
|
||||||
|
|
||||||
Optional<Value> oldCoordinators;
|
Optional<Value> previousCoordinators;
|
||||||
|
|
||||||
StoreCommit_t storeCommits;
|
StoreCommit_t storeCommits;
|
||||||
|
|
||||||
|
@ -1146,7 +1146,7 @@ ACTOR Future<Void> applyMetadataToCommittedTransactions(CommitBatchContext* self
|
||||||
ASSERT(self->commitVersion);
|
ASSERT(self->commitVersion);
|
||||||
|
|
||||||
if (!self->isMyFirstBatch &&
|
if (!self->isMyFirstBatch &&
|
||||||
pProxyCommitData->txnStateStore->readValue(coordinatorsKey).get().get() != self->oldCoordinators.get()) {
|
pProxyCommitData->txnStateStore->readValue(coordinatorsKey).get().get() != self->previousCoordinators.get()) {
|
||||||
wait(brokenPromiseToNever(pProxyCommitData->db->get().clusterInterface.changeCoordinators.getReply(
|
wait(brokenPromiseToNever(pProxyCommitData->db->get().clusterInterface.changeCoordinators.getReply(
|
||||||
ChangeCoordinatorsRequest(pProxyCommitData->txnStateStore->readValue(coordinatorsKey).get().get(),
|
ChangeCoordinatorsRequest(pProxyCommitData->txnStateStore->readValue(coordinatorsKey).get().get(),
|
||||||
self->pProxyCommitData->master.id()))));
|
self->pProxyCommitData->master.id()))));
|
||||||
|
@ -1374,7 +1374,7 @@ ACTOR Future<Void> postResolution(CommitBatchContext* self) {
|
||||||
}
|
}
|
||||||
|
|
||||||
self->isMyFirstBatch = !pProxyCommitData->version.get();
|
self->isMyFirstBatch = !pProxyCommitData->version.get();
|
||||||
self->oldCoordinators = pProxyCommitData->txnStateStore->readValue(coordinatorsKey).get();
|
self->previousCoordinators = pProxyCommitData->txnStateStore->readValue(coordinatorsKey).get();
|
||||||
|
|
||||||
assertResolutionStateMutationsSizeConsistent(self->resolution);
|
assertResolutionStateMutationsSizeConsistent(self->resolution);
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include "fdbclient/ClusterConnectionMemoryRecord.h"
|
||||||
#include "fdbserver/ConfigBroadcaster.h"
|
#include "fdbserver/ConfigBroadcaster.h"
|
||||||
#include "fdbserver/Knobs.h"
|
#include "fdbserver/Knobs.h"
|
||||||
#include "fdbserver/IConfigConsumer.h"
|
#include "fdbserver/IConfigConsumer.h"
|
||||||
|
@ -77,13 +78,24 @@ class ConfigBroadcasterImpl {
|
||||||
std::deque<VersionedConfigMutation> mutationHistory;
|
std::deque<VersionedConfigMutation> mutationHistory;
|
||||||
std::deque<VersionedConfigCommitAnnotation> annotationHistory;
|
std::deque<VersionedConfigCommitAnnotation> annotationHistory;
|
||||||
Version lastCompactedVersion;
|
Version lastCompactedVersion;
|
||||||
|
Version largestLiveVersion;
|
||||||
Version mostRecentVersion;
|
Version mostRecentVersion;
|
||||||
|
size_t coordinatorsHash;
|
||||||
std::unique_ptr<IConfigConsumer> consumer;
|
std::unique_ptr<IConfigConsumer> consumer;
|
||||||
Future<Void> consumerFuture;
|
Future<Void> consumerFuture;
|
||||||
ActorCollection actors{ false };
|
ActorCollection actors{ false };
|
||||||
std::map<UID, BroadcastClientDetails> clients;
|
std::map<UID, BroadcastClientDetails> clients;
|
||||||
std::map<UID, Future<Void>> clientFailures;
|
std::map<UID, Future<Void>> clientFailures;
|
||||||
|
|
||||||
|
// State related to changing coordinators
|
||||||
|
|
||||||
|
// Used to read a snapshot from the previous coordinators after a change
|
||||||
|
// coordinators command.
|
||||||
|
Version maxLastSeenVersion = ::invalidVersion;
|
||||||
|
Future<Optional<Value>> previousCoordinatorsFuture;
|
||||||
|
std::unique_ptr<IConfigConsumer> previousCoordinatorsConsumer;
|
||||||
|
Future<Void> previousCoordinatorsSnapshotFuture;
|
||||||
|
|
||||||
UID id;
|
UID id;
|
||||||
CounterCollection cc;
|
CounterCollection cc;
|
||||||
Counter compactRequest;
|
Counter compactRequest;
|
||||||
|
@ -95,6 +107,7 @@ class ConfigBroadcasterImpl {
|
||||||
int coordinators = 0;
|
int coordinators = 0;
|
||||||
std::unordered_set<NetworkAddress> activeConfigNodes;
|
std::unordered_set<NetworkAddress> activeConfigNodes;
|
||||||
std::unordered_set<NetworkAddress> registrationResponses;
|
std::unordered_set<NetworkAddress> registrationResponses;
|
||||||
|
std::unordered_set<NetworkAddress> registrationResponsesUnregistered;
|
||||||
bool disallowUnregistered = false;
|
bool disallowUnregistered = false;
|
||||||
Promise<Void> newConfigNodesAllowed;
|
Promise<Void> newConfigNodesAllowed;
|
||||||
|
|
||||||
|
@ -155,8 +168,8 @@ class ConfigBroadcasterImpl {
|
||||||
}
|
}
|
||||||
|
|
||||||
ConfigBroadcasterImpl()
|
ConfigBroadcasterImpl()
|
||||||
: lastCompactedVersion(0), mostRecentVersion(0), id(deterministicRandom()->randomUniqueID()),
|
: lastCompactedVersion(0), largestLiveVersion(0), mostRecentVersion(0),
|
||||||
cc("ConfigBroadcaster"), compactRequest("CompactRequest", cc),
|
id(deterministicRandom()->randomUniqueID()), cc("ConfigBroadcaster"), compactRequest("CompactRequest", cc),
|
||||||
successfulChangeRequest("SuccessfulChangeRequest", cc), failedChangeRequest("FailedChangeRequest", cc),
|
successfulChangeRequest("SuccessfulChangeRequest", cc), failedChangeRequest("FailedChangeRequest", cc),
|
||||||
snapshotRequest("SnapshotRequest", cc) {
|
snapshotRequest("SnapshotRequest", cc) {
|
||||||
logger = traceCounters(
|
logger = traceCounters(
|
||||||
|
@ -183,45 +196,44 @@ class ConfigBroadcasterImpl {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Snapshot>
|
ACTOR static Future<Void> pushSnapshotAndChanges(ConfigBroadcasterImpl* self, Version snapshotVersion) {
|
||||||
Future<Void> setSnapshot(Snapshot&& snapshot, Version snapshotVersion) {
|
|
||||||
this->snapshot = std::forward<Snapshot>(snapshot);
|
|
||||||
this->lastCompactedVersion = snapshotVersion;
|
|
||||||
std::vector<Future<Void>> futures;
|
std::vector<Future<Void>> futures;
|
||||||
for (const auto& [id, client] : clients) {
|
for (const auto& [id, client] : self->clients) {
|
||||||
futures.push_back(brokenPromiseToNever(pushSnapshot(snapshotVersion, client)));
|
futures.push_back(brokenPromiseToNever(self->pushSnapshot(snapshotVersion, client)));
|
||||||
}
|
}
|
||||||
return waitForAll(futures);
|
wait(waitForAll(futures));
|
||||||
}
|
|
||||||
|
|
||||||
ACTOR template <class Snapshot>
|
|
||||||
static Future<Void> pushSnapshotAndChanges(ConfigBroadcasterImpl* self,
|
|
||||||
Snapshot snapshot,
|
|
||||||
Version snapshotVersion,
|
|
||||||
Standalone<VectorRef<VersionedConfigMutationRef>> changes,
|
|
||||||
Version changesVersion,
|
|
||||||
Standalone<VectorRef<VersionedConfigCommitAnnotationRef>> annotations) {
|
|
||||||
// Make sure all snapshot messages were received before sending changes.
|
|
||||||
wait(self->setSnapshot(snapshot, snapshotVersion));
|
|
||||||
self->addChanges(changes, changesVersion, annotations);
|
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR static Future<Void> waitForFailure(ConfigBroadcasterImpl* self,
|
ACTOR static Future<Void> waitForFailure(ConfigBroadcasterImpl* self,
|
||||||
Future<Void> watcher,
|
Future<Void> watcher,
|
||||||
UID clientUID,
|
UID clientUID,
|
||||||
NetworkAddress clientAddress) {
|
NetworkAddress clientAddress,
|
||||||
|
bool isCoordinator) {
|
||||||
wait(watcher);
|
wait(watcher);
|
||||||
TraceEvent(SevDebug, "ConfigBroadcastClientDied", self->id)
|
TraceEvent(SevDebug, "ConfigBroadcastClientDied", self->id)
|
||||||
.detail("ClientID", clientUID)
|
.detail("ClientID", clientUID)
|
||||||
.detail("Address", clientAddress);
|
.detail("Address", clientAddress)
|
||||||
|
.detail("IsUnregistered",
|
||||||
|
self->registrationResponsesUnregistered.find(clientAddress) !=
|
||||||
|
self->registrationResponsesUnregistered.end())
|
||||||
|
.detail("IsActive", self->activeConfigNodes.find(clientAddress) != self->activeConfigNodes.end());
|
||||||
self->clients.erase(clientUID);
|
self->clients.erase(clientUID);
|
||||||
self->clientFailures.erase(clientUID);
|
self->clientFailures.erase(clientUID);
|
||||||
self->activeConfigNodes.erase(clientAddress);
|
if (isCoordinator) {
|
||||||
self->registrationResponses.erase(clientAddress);
|
self->registrationResponses.erase(clientAddress);
|
||||||
// See comment where this promise is reset below.
|
if (self->activeConfigNodes.find(clientAddress) != self->activeConfigNodes.end()) {
|
||||||
if (self->newConfigNodesAllowed.isSet()) {
|
self->activeConfigNodes.erase(clientAddress);
|
||||||
self->newConfigNodesAllowed.reset();
|
if (self->registrationResponsesUnregistered.find(clientAddress) !=
|
||||||
|
self->registrationResponsesUnregistered.end()) {
|
||||||
|
self->registrationResponsesUnregistered.erase(clientAddress);
|
||||||
|
self->disallowUnregistered = false;
|
||||||
|
// See comment where this promise is reset below.
|
||||||
|
if (self->newConfigNodesAllowed.isSet()) {
|
||||||
|
self->newConfigNodesAllowed.reset();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
@ -231,57 +243,71 @@ class ConfigBroadcasterImpl {
|
||||||
// ensure strict serializability, some nodes may be temporarily restricted
|
// ensure strict serializability, some nodes may be temporarily restricted
|
||||||
// from participation until the other nodes in the system are brought up to
|
// from participation until the other nodes in the system are brought up to
|
||||||
// date.
|
// date.
|
||||||
ACTOR static Future<Void> registerNodeInternal(ConfigBroadcasterImpl* self,
|
ACTOR static Future<Void> registerNodeInternal(ConfigBroadcaster* broadcaster,
|
||||||
WorkerInterface w,
|
ConfigBroadcasterImpl* self,
|
||||||
Version lastSeenVersion) {
|
WorkerInterface w) {
|
||||||
if (self->configDBType == ConfigDBType::SIMPLE) {
|
if (self->configDBType == ConfigDBType::SIMPLE) {
|
||||||
wait(success(retryBrokenPromise(w.configBroadcastInterface.ready, ConfigBroadcastReadyRequest{})));
|
wait(success(
|
||||||
|
brokenPromiseToNever(w.configBroadcastInterface.ready.getReply(ConfigBroadcastReadyRequest{}))));
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
state NetworkAddress address = w.address();
|
state NetworkAddress address = w.address();
|
||||||
|
|
||||||
// Ask the registering ConfigNode whether it has registered in the past.
|
// Ask the registering ConfigNode whether it has registered in the past.
|
||||||
ConfigBroadcastRegisteredReply reply =
|
state ConfigBroadcastRegisteredReply reply = wait(
|
||||||
wait(w.configBroadcastInterface.registered.getReply(ConfigBroadcastRegisteredRequest{}));
|
brokenPromiseToNever(w.configBroadcastInterface.registered.getReply(ConfigBroadcastRegisteredRequest{})));
|
||||||
|
self->maxLastSeenVersion = std::max(self->maxLastSeenVersion, reply.lastSeenVersion);
|
||||||
state bool registered = reply.registered;
|
state bool registered = reply.registered;
|
||||||
|
TraceEvent("ConfigBroadcasterRegisterNodeReceivedRegistrationReply", self->id)
|
||||||
|
.detail("Address", address)
|
||||||
|
.detail("Registered", registered)
|
||||||
|
.detail("DisallowUnregistered", self->disallowUnregistered)
|
||||||
|
.detail("LastSeenVersion", reply.lastSeenVersion);
|
||||||
|
|
||||||
if (self->activeConfigNodes.find(address) != self->activeConfigNodes.end()) {
|
if (self->activeConfigNodes.find(address) != self->activeConfigNodes.end()) {
|
||||||
self->activeConfigNodes.erase(address);
|
self->activeConfigNodes.erase(address);
|
||||||
// Since a node can die and re-register before the broadcaster
|
if (self->registrationResponsesUnregistered.find(address) !=
|
||||||
// receives notice that the node has died, we need to check for
|
self->registrationResponsesUnregistered.end()) {
|
||||||
// re-registration of a node here. There are two places that can
|
self->registrationResponsesUnregistered.erase(address);
|
||||||
// reset the promise to allow new nodes, make sure the promise is
|
// If an unregistered node died which was active, reset the
|
||||||
// actually set before resetting it. This prevents a node from
|
// disallow unregistered flag so if it re-registers it can be
|
||||||
// dying, registering, waiting on the promise, then the broadcaster
|
// set as active again.
|
||||||
// receives the notification the node has died and resets the
|
self->disallowUnregistered = false;
|
||||||
// promise again.
|
// Since a node can die and re-register before the broadcaster
|
||||||
if (self->newConfigNodesAllowed.isSet()) {
|
// receives notice that the node has died, we need to check for
|
||||||
self->newConfigNodesAllowed.reset();
|
// re-registration of a node here. There are two places that can
|
||||||
|
// reset the promise to allow new nodes, so make sure the promise
|
||||||
|
// is actually set before resetting it. This prevents a node from
|
||||||
|
// dying, registering, waiting on the promise, then the broadcaster
|
||||||
|
// receives the notification the node has died and resets the
|
||||||
|
// promise again.
|
||||||
|
if (self->newConfigNodesAllowed.isSet()) {
|
||||||
|
self->newConfigNodesAllowed.reset();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
self->registrationResponses.insert(address);
|
self->registrationResponses.insert(address);
|
||||||
|
|
||||||
if (registered) {
|
if (registered) {
|
||||||
if (!self->disallowUnregistered) {
|
|
||||||
self->activeConfigNodes.clear();
|
|
||||||
}
|
|
||||||
self->activeConfigNodes.insert(address);
|
self->activeConfigNodes.insert(address);
|
||||||
self->disallowUnregistered = true;
|
self->disallowUnregistered = true;
|
||||||
} else if ((self->activeConfigNodes.size() < self->coordinators / 2 + 1 && !self->disallowUnregistered) ||
|
} else if ((self->activeConfigNodes.size() < self->coordinators / 2 + 1 && !self->disallowUnregistered) ||
|
||||||
self->coordinators - self->registrationResponses.size() <=
|
self->registrationResponsesUnregistered.size() < self->coordinators / 2) {
|
||||||
self->coordinators / 2 + 1 - self->activeConfigNodes.size()) {
|
|
||||||
// Received a registration request from an unregistered node. There
|
// Received a registration request from an unregistered node. There
|
||||||
// are two cases where we want to allow unregistered nodes to
|
// are two cases where we want to allow unregistered nodes to
|
||||||
// register:
|
// register:
|
||||||
// * the cluster is just starting and no nodes are registered
|
// * the cluster is just starting and no nodes are registered
|
||||||
// * a minority of nodes are registered and a majority are
|
// * there are registered and unregistered nodes, but the
|
||||||
// unregistered. This situation should only occur in rare
|
// registered nodes may not represent a majority due to previous
|
||||||
// circumstances where the cluster controller dies with only a
|
// data loss. In this case, unregistered nodes must be allowed
|
||||||
// minority of config nodes having received a
|
// to register so they can be rolled forward and form a quorum.
|
||||||
// ConfigBroadcastReadyRequest
|
// But only a minority of unregistered nodes should be allowed
|
||||||
|
// to register so they cannot override the registered nodes as
|
||||||
|
// a source of truth
|
||||||
self->activeConfigNodes.insert(address);
|
self->activeConfigNodes.insert(address);
|
||||||
if (self->activeConfigNodes.size() >= self->coordinators / 2 + 1 &&
|
self->registrationResponsesUnregistered.insert(address);
|
||||||
|
if ((self->activeConfigNodes.size() >= self->coordinators / 2 + 1 ||
|
||||||
|
self->registrationResponsesUnregistered.size() >= self->coordinators / 2 + 1) &&
|
||||||
self->newConfigNodesAllowed.canBeSet()) {
|
self->newConfigNodesAllowed.canBeSet()) {
|
||||||
self->newConfigNodesAllowed.send(Void());
|
self->newConfigNodesAllowed.send(Void());
|
||||||
}
|
}
|
||||||
|
@ -289,11 +315,78 @@ class ConfigBroadcasterImpl {
|
||||||
self->disallowUnregistered = true;
|
self->disallowUnregistered = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!registered) {
|
// Read previous coordinators and fetch snapshot from them if they
|
||||||
|
// exist. This path should only be hit once after the coordinators are
|
||||||
|
// changed.
|
||||||
|
wait(yield());
|
||||||
|
Optional<Value> previousCoordinators = wait(self->previousCoordinatorsFuture);
|
||||||
|
TraceEvent("ConfigBroadcasterRegisterNodeReadPreviousCoordinators", self->id)
|
||||||
|
.detail("PreviousCoordinators", previousCoordinators)
|
||||||
|
.detail("HasStartedConsumer", self->previousCoordinatorsSnapshotFuture.isValid());
|
||||||
|
|
||||||
|
if (previousCoordinators.present()) {
|
||||||
|
if (!self->previousCoordinatorsSnapshotFuture.isValid()) {
|
||||||
|
// Create a consumer to read a snapshot from the previous
|
||||||
|
// coordinators. The snapshot will be forwarded to the new
|
||||||
|
// coordinators to bring them up to date.
|
||||||
|
size_t previousCoordinatorsHash = std::hash<std::string>()(previousCoordinators.get().toString());
|
||||||
|
if (previousCoordinatorsHash != self->coordinatorsHash) {
|
||||||
|
ServerCoordinators previousCoordinatorsData(Reference<IClusterConnectionRecord>(
|
||||||
|
new ClusterConnectionMemoryRecord(previousCoordinators.get().toString())));
|
||||||
|
TraceEvent("ConfigBroadcasterRegisterNodeStartingConsumer", self->id).log();
|
||||||
|
self->previousCoordinatorsConsumer = IConfigConsumer::createPaxos(
|
||||||
|
previousCoordinatorsData, 0.5, SERVER_KNOBS->COMPACTION_INTERVAL, true);
|
||||||
|
self->previousCoordinatorsSnapshotFuture =
|
||||||
|
self->previousCoordinatorsConsumer->readSnapshot(*broadcaster);
|
||||||
|
} else {
|
||||||
|
// If the cluster controller restarts without a coordinator
|
||||||
|
// change having taken place, there is no need to read a
|
||||||
|
// previous snapshot.
|
||||||
|
self->previousCoordinatorsSnapshotFuture = Void();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
wait(self->previousCoordinatorsSnapshotFuture);
|
||||||
|
}
|
||||||
|
|
||||||
|
state bool sendSnapshot =
|
||||||
|
self->previousCoordinatorsConsumer && reply.lastSeenVersion <= self->mostRecentVersion;
|
||||||
|
// Unregistered nodes need to wait for either:
|
||||||
|
// 1. A quorum of registered nodes to register and send their
|
||||||
|
// snapshots, so the unregistered nodes can be rolled forward, or
|
||||||
|
// 2. A quorum of unregistered nodes to contact the broadcaster (this
|
||||||
|
// means there is no previous data in the configuration database)
|
||||||
|
// The above conditions do not apply when changing coordinators, as a
|
||||||
|
// snapshot of the current state of the configuration database needs to
|
||||||
|
// be sent to all new coordinators.
|
||||||
|
TraceEvent("ConfigBroadcasterRegisterNodeDetermineEligibility", self->id)
|
||||||
|
.detail("Registered", registered)
|
||||||
|
.detail("SendSnapshot", sendSnapshot);
|
||||||
|
if (!registered && !sendSnapshot) {
|
||||||
wait(self->newConfigNodesAllowed.getFuture());
|
wait(self->newConfigNodesAllowed.getFuture());
|
||||||
}
|
}
|
||||||
|
|
||||||
wait(success(w.configBroadcastInterface.ready.getReply(ConfigBroadcastReadyRequest{})));
|
TraceEvent("ConfigBroadcasterRegisterNodeSendingReadyRequest", self->id)
|
||||||
|
.detail("ConfigNodeAddress", address)
|
||||||
|
.detail("SendSnapshot", sendSnapshot)
|
||||||
|
.detail("SnapshotVersion", self->mostRecentVersion)
|
||||||
|
.detail("SnapshotSize", self->snapshot.size())
|
||||||
|
.detail("LargestLiveVersion", self->largestLiveVersion);
|
||||||
|
if (sendSnapshot) {
|
||||||
|
Version liveVersion = std::max(self->largestLiveVersion, self->mostRecentVersion);
|
||||||
|
wait(success(brokenPromiseToNever(w.configBroadcastInterface.ready.getReply(ConfigBroadcastReadyRequest{
|
||||||
|
self->coordinatorsHash, self->snapshot, self->mostRecentVersion, liveVersion }))));
|
||||||
|
} else {
|
||||||
|
wait(success(brokenPromiseToNever(w.configBroadcastInterface.ready.getReply(
|
||||||
|
ConfigBroadcastReadyRequest{ self->coordinatorsHash, {}, -1, -1 }))));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start the consumer last, so at least some nodes will be registered.
|
||||||
|
if (!self->consumerFuture.isValid()) {
|
||||||
|
if (sendSnapshot) {
|
||||||
|
self->consumer->allowSpecialCaseRollforward();
|
||||||
|
}
|
||||||
|
self->consumerFuture = self->consumer->consume(*broadcaster);
|
||||||
|
}
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -303,12 +396,10 @@ class ConfigBroadcasterImpl {
|
||||||
Version lastSeenVersion,
|
Version lastSeenVersion,
|
||||||
ConfigClassSet configClassSet,
|
ConfigClassSet configClassSet,
|
||||||
Future<Void> watcher,
|
Future<Void> watcher,
|
||||||
ConfigBroadcastInterface broadcastInterface) {
|
ConfigBroadcastInterface broadcastInterface,
|
||||||
|
bool isCoordinator) {
|
||||||
state BroadcastClientDetails client(
|
state BroadcastClientDetails client(
|
||||||
watcher, std::move(configClassSet), lastSeenVersion, std::move(broadcastInterface));
|
watcher, std::move(configClassSet), lastSeenVersion, std::move(broadcastInterface));
|
||||||
if (!impl->consumerFuture.isValid()) {
|
|
||||||
impl->consumerFuture = impl->consumer->consume(*self);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (impl->clients.count(broadcastInterface.id())) {
|
if (impl->clients.count(broadcastInterface.id())) {
|
||||||
// Client already registered
|
// Client already registered
|
||||||
|
@ -317,15 +408,18 @@ class ConfigBroadcasterImpl {
|
||||||
|
|
||||||
TraceEvent(SevDebug, "ConfigBroadcasterRegisteringWorker", impl->id)
|
TraceEvent(SevDebug, "ConfigBroadcasterRegisteringWorker", impl->id)
|
||||||
.detail("ClientID", broadcastInterface.id())
|
.detail("ClientID", broadcastInterface.id())
|
||||||
.detail("MostRecentVersion", impl->mostRecentVersion);
|
.detail("MostRecentVersion", impl->mostRecentVersion)
|
||||||
|
.detail("IsCoordinator", isCoordinator);
|
||||||
|
|
||||||
impl->actors.add(registerNodeInternal(impl, w, lastSeenVersion));
|
if (isCoordinator) {
|
||||||
|
impl->actors.add(registerNodeInternal(self, impl, w));
|
||||||
|
}
|
||||||
|
|
||||||
// Push full snapshot to worker if it isn't up to date.
|
// Push full snapshot to worker if it isn't up to date.
|
||||||
wait(impl->pushSnapshot(impl->mostRecentVersion, client));
|
wait(impl->pushSnapshot(impl->mostRecentVersion, client));
|
||||||
impl->clients[broadcastInterface.id()] = client;
|
impl->clients[broadcastInterface.id()] = client;
|
||||||
impl->clientFailures[broadcastInterface.id()] =
|
impl->clientFailures[broadcastInterface.id()] =
|
||||||
waitForFailure(impl, watcher, broadcastInterface.id(), w.address());
|
waitForFailure(impl, watcher, broadcastInterface.id(), w.address(), isCoordinator);
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -335,8 +429,10 @@ public:
|
||||||
Version lastSeenVersion,
|
Version lastSeenVersion,
|
||||||
ConfigClassSet configClassSet,
|
ConfigClassSet configClassSet,
|
||||||
Future<Void> watcher,
|
Future<Void> watcher,
|
||||||
ConfigBroadcastInterface const& broadcastInterface) {
|
ConfigBroadcastInterface const& broadcastInterface,
|
||||||
return registerNode(&self, this, w, lastSeenVersion, configClassSet, watcher, broadcastInterface);
|
bool isCoordinator) {
|
||||||
|
return registerNode(
|
||||||
|
&self, this, w, lastSeenVersion, configClassSet, watcher, broadcastInterface, isCoordinator);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Updates the broadcasters knowledge of which replicas are fully up to
|
// Updates the broadcasters knowledge of which replicas are fully up to
|
||||||
|
@ -377,17 +473,36 @@ public:
|
||||||
Standalone<VectorRef<VersionedConfigMutationRef>> const& changes,
|
Standalone<VectorRef<VersionedConfigMutationRef>> const& changes,
|
||||||
Version changesVersion,
|
Version changesVersion,
|
||||||
Standalone<VectorRef<VersionedConfigCommitAnnotationRef>> const& annotations,
|
Standalone<VectorRef<VersionedConfigCommitAnnotationRef>> const& annotations,
|
||||||
std::vector<ConfigFollowerInterface> const& readReplicas) {
|
std::vector<ConfigFollowerInterface> const& readReplicas,
|
||||||
|
Version largestLiveVersion,
|
||||||
|
bool fromPreviousCoordinators) {
|
||||||
TraceEvent(SevDebug, "ConfigBroadcasterApplyingSnapshotAndChanges", id)
|
TraceEvent(SevDebug, "ConfigBroadcasterApplyingSnapshotAndChanges", id)
|
||||||
.detail("CurrentMostRecentVersion", this->mostRecentVersion)
|
.detail("CurrentMostRecentVersion", this->mostRecentVersion)
|
||||||
.detail("SnapshotSize", snapshot.size())
|
.detail("SnapshotSize", snapshot.size())
|
||||||
.detail("SnapshotVersion", snapshotVersion)
|
.detail("SnapshotVersion", snapshotVersion)
|
||||||
.detail("ChangesSize", changes.size())
|
.detail("ChangesSize", changes.size())
|
||||||
.detail("ChangesVersion", changesVersion)
|
.detail("ChangesVersion", changesVersion)
|
||||||
.detail("ActiveReplicas", readReplicas.size());
|
.detail("ActiveReplicas", readReplicas.size())
|
||||||
actors.add(pushSnapshotAndChanges(this, snapshot, snapshotVersion, changes, changesVersion, annotations));
|
.detail("LargestLiveVersion", largestLiveVersion)
|
||||||
|
.detail("FromPreviousCoordinators", fromPreviousCoordinators);
|
||||||
|
// Avoid updating state if the snapshot contains no mutations, or if it
|
||||||
|
// contains old mutations. This can happen when the set of coordinators
|
||||||
|
// is changed, and a new coordinator comes online that has not yet had
|
||||||
|
// the current configuration database pushed to it, or when a new
|
||||||
|
// coordinator contains state from an old configuration database
|
||||||
|
// generation.
|
||||||
|
if ((snapshot.size() != 0 || changes.size() != 0) &&
|
||||||
|
(snapshotVersion > this->mostRecentVersion || changesVersion > this->mostRecentVersion)) {
|
||||||
|
this->snapshot = std::forward<Snapshot>(snapshot);
|
||||||
|
this->lastCompactedVersion = snapshotVersion;
|
||||||
|
this->largestLiveVersion = std::max(this->largestLiveVersion, largestLiveVersion);
|
||||||
|
addChanges(changes, changesVersion, annotations);
|
||||||
|
actors.add(pushSnapshotAndChanges(this, snapshotVersion));
|
||||||
|
}
|
||||||
|
|
||||||
updateKnownReplicas(readReplicas);
|
if (!fromPreviousCoordinators) {
|
||||||
|
updateKnownReplicas(readReplicas);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ConfigBroadcasterImpl(ConfigFollowerInterface const& cfi) : ConfigBroadcasterImpl() {
|
ConfigBroadcasterImpl(ConfigFollowerInterface const& cfi) : ConfigBroadcasterImpl() {
|
||||||
|
@ -397,18 +512,27 @@ public:
|
||||||
TraceEvent(SevDebug, "ConfigBroadcasterStartingConsumer", id).detail("Consumer", consumer->getID());
|
TraceEvent(SevDebug, "ConfigBroadcasterStartingConsumer", id).detail("Consumer", consumer->getID());
|
||||||
}
|
}
|
||||||
|
|
||||||
ConfigBroadcasterImpl(ServerCoordinators const& coordinators, ConfigDBType configDBType) : ConfigBroadcasterImpl() {
|
ConfigBroadcasterImpl(ServerCoordinators const& coordinators,
|
||||||
|
ConfigDBType configDBType,
|
||||||
|
Future<Optional<Value>> previousCoordinatorsFuture)
|
||||||
|
: ConfigBroadcasterImpl() {
|
||||||
this->configDBType = configDBType;
|
this->configDBType = configDBType;
|
||||||
this->coordinators = coordinators.configServers.size();
|
this->coordinators = coordinators.configServers.size();
|
||||||
if (configDBType != ConfigDBType::DISABLED) {
|
if (configDBType != ConfigDBType::DISABLED) {
|
||||||
if (configDBType == ConfigDBType::SIMPLE) {
|
if (configDBType == ConfigDBType::SIMPLE) {
|
||||||
consumer = IConfigConsumer::createSimple(coordinators, 0.5, SERVER_KNOBS->COMPACTION_INTERVAL);
|
consumer = IConfigConsumer::createSimple(coordinators, 0.5, SERVER_KNOBS->COMPACTION_INTERVAL);
|
||||||
} else {
|
} else {
|
||||||
|
this->previousCoordinatorsFuture = previousCoordinatorsFuture;
|
||||||
consumer = IConfigConsumer::createPaxos(coordinators, 0.5, SERVER_KNOBS->COMPACTION_INTERVAL);
|
consumer = IConfigConsumer::createPaxos(coordinators, 0.5, SERVER_KNOBS->COMPACTION_INTERVAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
coordinatorsHash = std::hash<std::string>()(coordinators.ccr->getConnectionString().toString());
|
||||||
|
|
||||||
TraceEvent(SevDebug, "ConfigBroadcasterStartingConsumer", id)
|
TraceEvent(SevDebug, "ConfigBroadcasterStartingConsumer", id)
|
||||||
.detail("Consumer", consumer->getID())
|
.detail("Consumer", consumer->getID())
|
||||||
.detail("UsingSimpleConsumer", configDBType == ConfigDBType::SIMPLE);
|
.detail("UsingSimpleConsumer", configDBType == ConfigDBType::SIMPLE)
|
||||||
|
.detail("CoordinatorsCount", this->coordinators)
|
||||||
|
.detail("CoordinatorsHash", coordinatorsHash);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -419,9 +543,12 @@ public:
|
||||||
JsonBuilderObject mutationObject;
|
JsonBuilderObject mutationObject;
|
||||||
mutationObject["version"] = versionedMutation.version;
|
mutationObject["version"] = versionedMutation.version;
|
||||||
const auto& mutation = versionedMutation.mutation;
|
const auto& mutation = versionedMutation.mutation;
|
||||||
|
mutationObject["type"] = mutation.isSet() ? "set" : "clear";
|
||||||
mutationObject["config_class"] = mutation.getConfigClass().orDefault("<global>"_sr);
|
mutationObject["config_class"] = mutation.getConfigClass().orDefault("<global>"_sr);
|
||||||
mutationObject["knob_name"] = mutation.getKnobName();
|
mutationObject["knob_name"] = mutation.getKnobName();
|
||||||
mutationObject["knob_value"] = mutation.getValue().toString();
|
if (mutation.isSet()) {
|
||||||
|
mutationObject["knob_value"] = mutation.getValue().toString();
|
||||||
|
}
|
||||||
mutationsArray.push_back(std::move(mutationObject));
|
mutationsArray.push_back(std::move(mutationObject));
|
||||||
}
|
}
|
||||||
result["mutations"] = std::move(mutationsArray);
|
result["mutations"] = std::move(mutationsArray);
|
||||||
|
@ -477,11 +604,15 @@ public:
|
||||||
static void runPendingRequestStoreTest(bool includeGlobalMutation, int expectedMatches);
|
static void runPendingRequestStoreTest(bool includeGlobalMutation, int expectedMatches);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
ConfigBroadcaster::ConfigBroadcaster() {}
|
||||||
|
|
||||||
ConfigBroadcaster::ConfigBroadcaster(ConfigFollowerInterface const& cfi)
|
ConfigBroadcaster::ConfigBroadcaster(ConfigFollowerInterface const& cfi)
|
||||||
: impl(PImpl<ConfigBroadcasterImpl>::create(cfi)) {}
|
: impl(PImpl<ConfigBroadcasterImpl>::create(cfi)) {}
|
||||||
|
|
||||||
ConfigBroadcaster::ConfigBroadcaster(ServerCoordinators const& coordinators, ConfigDBType configDBType)
|
ConfigBroadcaster::ConfigBroadcaster(ServerCoordinators const& coordinators,
|
||||||
: impl(PImpl<ConfigBroadcasterImpl>::create(coordinators, configDBType)) {}
|
ConfigDBType configDBType,
|
||||||
|
Future<Optional<Value>> previousCoordinatorsFuture)
|
||||||
|
: impl(PImpl<ConfigBroadcasterImpl>::create(coordinators, configDBType, previousCoordinatorsFuture)) {}
|
||||||
|
|
||||||
ConfigBroadcaster::ConfigBroadcaster(ConfigBroadcaster&&) = default;
|
ConfigBroadcaster::ConfigBroadcaster(ConfigBroadcaster&&) = default;
|
||||||
|
|
||||||
|
@ -493,8 +624,9 @@ Future<Void> ConfigBroadcaster::registerNode(WorkerInterface const& w,
|
||||||
Version lastSeenVersion,
|
Version lastSeenVersion,
|
||||||
ConfigClassSet const& configClassSet,
|
ConfigClassSet const& configClassSet,
|
||||||
Future<Void> watcher,
|
Future<Void> watcher,
|
||||||
ConfigBroadcastInterface const& broadcastInterface) {
|
ConfigBroadcastInterface const& broadcastInterface,
|
||||||
return impl->registerNode(*this, w, lastSeenVersion, configClassSet, watcher, broadcastInterface);
|
bool isCoordinator) {
|
||||||
|
return impl->registerNode(*this, w, lastSeenVersion, configClassSet, watcher, broadcastInterface, isCoordinator);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ConfigBroadcaster::applyChanges(Standalone<VectorRef<VersionedConfigMutationRef>> const& changes,
|
void ConfigBroadcaster::applyChanges(Standalone<VectorRef<VersionedConfigMutationRef>> const& changes,
|
||||||
|
@ -510,8 +642,17 @@ void ConfigBroadcaster::applySnapshotAndChanges(
|
||||||
Standalone<VectorRef<VersionedConfigMutationRef>> const& changes,
|
Standalone<VectorRef<VersionedConfigMutationRef>> const& changes,
|
||||||
Version changesVersion,
|
Version changesVersion,
|
||||||
Standalone<VectorRef<VersionedConfigCommitAnnotationRef>> const& annotations,
|
Standalone<VectorRef<VersionedConfigCommitAnnotationRef>> const& annotations,
|
||||||
std::vector<ConfigFollowerInterface> const& readReplicas) {
|
std::vector<ConfigFollowerInterface> const& readReplicas,
|
||||||
impl->applySnapshotAndChanges(snapshot, snapshotVersion, changes, changesVersion, annotations, readReplicas);
|
Version largestLiveVersion,
|
||||||
|
bool fromPreviousCoordinators) {
|
||||||
|
impl->applySnapshotAndChanges(snapshot,
|
||||||
|
snapshotVersion,
|
||||||
|
changes,
|
||||||
|
changesVersion,
|
||||||
|
annotations,
|
||||||
|
readReplicas,
|
||||||
|
largestLiveVersion,
|
||||||
|
fromPreviousCoordinators);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ConfigBroadcaster::applySnapshotAndChanges(
|
void ConfigBroadcaster::applySnapshotAndChanges(
|
||||||
|
@ -520,9 +661,17 @@ void ConfigBroadcaster::applySnapshotAndChanges(
|
||||||
Standalone<VectorRef<VersionedConfigMutationRef>> const& changes,
|
Standalone<VectorRef<VersionedConfigMutationRef>> const& changes,
|
||||||
Version changesVersion,
|
Version changesVersion,
|
||||||
Standalone<VectorRef<VersionedConfigCommitAnnotationRef>> const& annotations,
|
Standalone<VectorRef<VersionedConfigCommitAnnotationRef>> const& annotations,
|
||||||
std::vector<ConfigFollowerInterface> const& readReplicas) {
|
std::vector<ConfigFollowerInterface> const& readReplicas,
|
||||||
impl->applySnapshotAndChanges(
|
Version largestLiveVersion,
|
||||||
std::move(snapshot), snapshotVersion, changes, changesVersion, annotations, readReplicas);
|
bool fromPreviousCoordinators) {
|
||||||
|
impl->applySnapshotAndChanges(std::move(snapshot),
|
||||||
|
snapshotVersion,
|
||||||
|
changes,
|
||||||
|
changesVersion,
|
||||||
|
annotations,
|
||||||
|
readReplicas,
|
||||||
|
largestLiveVersion,
|
||||||
|
fromPreviousCoordinators);
|
||||||
}
|
}
|
||||||
|
|
||||||
Future<Void> ConfigBroadcaster::getError() const {
|
Future<Void> ConfigBroadcaster::getError() const {
|
||||||
|
@ -544,3 +693,27 @@ JsonBuilderObject ConfigBroadcaster::getStatus() const {
|
||||||
void ConfigBroadcaster::compact(Version compactionVersion) {
|
void ConfigBroadcaster::compact(Version compactionVersion) {
|
||||||
impl->compact(compactionVersion);
|
impl->compact(compactionVersion);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ACTOR static Future<Void> lockConfigNodesImpl(ServerCoordinators coordinators) {
|
||||||
|
size_t coordinatorsHash = std::hash<std::string>()(coordinators.ccr->getConnectionString().toString());
|
||||||
|
|
||||||
|
std::vector<Future<Void>> lockRequests;
|
||||||
|
lockRequests.reserve(coordinators.configServers.size());
|
||||||
|
for (int i = 0; i < coordinators.configServers.size(); i++) {
|
||||||
|
if (coordinators.configServers[i].hostname.present()) {
|
||||||
|
lockRequests.push_back(retryGetReplyFromHostname(ConfigFollowerLockRequest{ coordinatorsHash },
|
||||||
|
coordinators.configServers[i].hostname.get(),
|
||||||
|
WLTOKEN_CONFIGFOLLOWER_LOCK));
|
||||||
|
} else {
|
||||||
|
lockRequests.push_back(
|
||||||
|
retryBrokenPromise(coordinators.configServers[i].lock, ConfigFollowerLockRequest{ coordinatorsHash }));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int quorum_size = lockRequests.size() / 2 + 1;
|
||||||
|
wait(quorum(lockRequests, quorum_size));
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
||||||
|
Future<Void> ConfigBroadcaster::lockConfigNodes(ServerCoordinators coordinators) {
|
||||||
|
return lockConfigNodesImpl(coordinators);
|
||||||
|
}
|
||||||
|
|
|
@ -270,7 +270,7 @@ class BroadcasterToLocalConfigEnvironment {
|
||||||
self->cbi = makeReference<AsyncVar<ConfigBroadcastInterface>>();
|
self->cbi = makeReference<AsyncVar<ConfigBroadcastInterface>>();
|
||||||
self->readFrom.connectToBroadcaster(self->cbi);
|
self->readFrom.connectToBroadcaster(self->cbi);
|
||||||
self->broadcastServer = self->broadcaster.registerNode(
|
self->broadcastServer = self->broadcaster.registerNode(
|
||||||
WorkerInterface(), 0, configClassSet, self->workerFailure.getFuture(), self->cbi->get());
|
WorkerInterface(), 0, configClassSet, self->workerFailure.getFuture(), self->cbi->get(), true);
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -309,7 +309,8 @@ public:
|
||||||
readFrom.lastSeenVersion(),
|
readFrom.lastSeenVersion(),
|
||||||
readFrom.configClassSet(),
|
readFrom.configClassSet(),
|
||||||
workerFailure.getFuture(),
|
workerFailure.getFuture(),
|
||||||
cbi->get());
|
cbi->get(),
|
||||||
|
true);
|
||||||
}
|
}
|
||||||
|
|
||||||
Future<Void> restartLocalConfig(std::string const& newConfigPath) {
|
Future<Void> restartLocalConfig(std::string const& newConfigPath) {
|
||||||
|
@ -442,7 +443,7 @@ class TransactionToLocalConfigEnvironment {
|
||||||
self->cbi = makeReference<AsyncVar<ConfigBroadcastInterface>>();
|
self->cbi = makeReference<AsyncVar<ConfigBroadcastInterface>>();
|
||||||
self->readFrom.connectToBroadcaster(self->cbi);
|
self->readFrom.connectToBroadcaster(self->cbi);
|
||||||
self->broadcastServer = self->broadcaster.registerNode(
|
self->broadcastServer = self->broadcaster.registerNode(
|
||||||
WorkerInterface(), 0, configClassSet, self->workerFailure.getFuture(), self->cbi->get());
|
WorkerInterface(), 0, configClassSet, self->workerFailure.getFuture(), self->cbi->get(), true);
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -465,7 +466,8 @@ public:
|
||||||
readFrom.lastSeenVersion(),
|
readFrom.lastSeenVersion(),
|
||||||
readFrom.configClassSet(),
|
readFrom.configClassSet(),
|
||||||
workerFailure.getFuture(),
|
workerFailure.getFuture(),
|
||||||
cbi->get());
|
cbi->get(),
|
||||||
|
true);
|
||||||
}
|
}
|
||||||
|
|
||||||
Future<Void> restartLocalConfig(std::string const& newConfigPath) {
|
Future<Void> restartLocalConfig(std::string const& newConfigPath) {
|
||||||
|
|
|
@ -29,6 +29,7 @@ void ConfigFollowerInterface::setupWellKnownEndpoints() {
|
||||||
compact.makeWellKnownEndpoint(WLTOKEN_CONFIGFOLLOWER_COMPACT, TaskPriority::Coordination);
|
compact.makeWellKnownEndpoint(WLTOKEN_CONFIGFOLLOWER_COMPACT, TaskPriority::Coordination);
|
||||||
rollforward.makeWellKnownEndpoint(WLTOKEN_CONFIGFOLLOWER_ROLLFORWARD, TaskPriority::Coordination);
|
rollforward.makeWellKnownEndpoint(WLTOKEN_CONFIGFOLLOWER_ROLLFORWARD, TaskPriority::Coordination);
|
||||||
getCommittedVersion.makeWellKnownEndpoint(WLTOKEN_CONFIGFOLLOWER_GETCOMMITTEDVERSION, TaskPriority::Coordination);
|
getCommittedVersion.makeWellKnownEndpoint(WLTOKEN_CONFIGFOLLOWER_GETCOMMITTEDVERSION, TaskPriority::Coordination);
|
||||||
|
lock.makeWellKnownEndpoint(WLTOKEN_CONFIGFOLLOWER_LOCK, TaskPriority::Coordination);
|
||||||
}
|
}
|
||||||
|
|
||||||
ConfigFollowerInterface::ConfigFollowerInterface() : _id(deterministicRandom()->randomUniqueID()) {}
|
ConfigFollowerInterface::ConfigFollowerInterface() : _id(deterministicRandom()->randomUniqueID()) {}
|
||||||
|
@ -39,7 +40,8 @@ ConfigFollowerInterface::ConfigFollowerInterface(NetworkAddress const& remote)
|
||||||
getChanges(Endpoint::wellKnown({ remote }, WLTOKEN_CONFIGFOLLOWER_GETCHANGES)),
|
getChanges(Endpoint::wellKnown({ remote }, WLTOKEN_CONFIGFOLLOWER_GETCHANGES)),
|
||||||
compact(Endpoint::wellKnown({ remote }, WLTOKEN_CONFIGFOLLOWER_COMPACT)),
|
compact(Endpoint::wellKnown({ remote }, WLTOKEN_CONFIGFOLLOWER_COMPACT)),
|
||||||
rollforward(Endpoint::wellKnown({ remote }, WLTOKEN_CONFIGFOLLOWER_ROLLFORWARD)),
|
rollforward(Endpoint::wellKnown({ remote }, WLTOKEN_CONFIGFOLLOWER_ROLLFORWARD)),
|
||||||
getCommittedVersion(Endpoint::wellKnown({ remote }, WLTOKEN_CONFIGFOLLOWER_GETCOMMITTEDVERSION)) {}
|
getCommittedVersion(Endpoint::wellKnown({ remote }, WLTOKEN_CONFIGFOLLOWER_GETCOMMITTEDVERSION)),
|
||||||
|
lock(Endpoint::wellKnown({ remote }, WLTOKEN_CONFIGFOLLOWER_LOCK)) {}
|
||||||
|
|
||||||
ConfigFollowerInterface::ConfigFollowerInterface(Hostname const& remote)
|
ConfigFollowerInterface::ConfigFollowerInterface(Hostname const& remote)
|
||||||
: _id(deterministicRandom()->randomUniqueID()), hostname(remote) {}
|
: _id(deterministicRandom()->randomUniqueID()), hostname(remote) {}
|
||||||
|
|
|
@ -32,9 +32,11 @@
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
const KeyRef coordinatorsHashKey = "id"_sr;
|
||||||
const KeyRef lastCompactedVersionKey = "lastCompactedVersion"_sr;
|
const KeyRef lastCompactedVersionKey = "lastCompactedVersion"_sr;
|
||||||
const KeyRef currentGenerationKey = "currentGeneration"_sr;
|
const KeyRef currentGenerationKey = "currentGeneration"_sr;
|
||||||
const KeyRef registeredKey = "registered"_sr;
|
const KeyRef registeredKey = "registered"_sr;
|
||||||
|
const KeyRef lockedKey = "locked"_sr;
|
||||||
const KeyRangeRef kvKeys = KeyRangeRef("kv/"_sr, "kv0"_sr);
|
const KeyRangeRef kvKeys = KeyRangeRef("kv/"_sr, "kv0"_sr);
|
||||||
const KeyRangeRef mutationKeys = KeyRangeRef("mutation/"_sr, "mutation0"_sr);
|
const KeyRangeRef mutationKeys = KeyRangeRef("mutation/"_sr, "mutation0"_sr);
|
||||||
const KeyRangeRef annotationKeys = KeyRangeRef("annotation/"_sr, "annotation0"_sr);
|
const KeyRangeRef annotationKeys = KeyRangeRef("annotation/"_sr, "annotation0"_sr);
|
||||||
|
@ -122,6 +124,7 @@ class ConfigNodeImpl {
|
||||||
Counter failedChangeRequests;
|
Counter failedChangeRequests;
|
||||||
Counter snapshotRequests;
|
Counter snapshotRequests;
|
||||||
Counter getCommittedVersionRequests;
|
Counter getCommittedVersionRequests;
|
||||||
|
Counter lockRequests;
|
||||||
|
|
||||||
// Transaction counters
|
// Transaction counters
|
||||||
Counter successfulCommits;
|
Counter successfulCommits;
|
||||||
|
@ -132,6 +135,22 @@ class ConfigNodeImpl {
|
||||||
Counter getGenerationRequests;
|
Counter getGenerationRequests;
|
||||||
Future<Void> logger;
|
Future<Void> logger;
|
||||||
|
|
||||||
|
ACTOR static Future<Optional<size_t>> getCoordinatorsHash(ConfigNodeImpl* self) {
|
||||||
|
Optional<Value> value = wait(self->kvStore->readValue(coordinatorsHashKey));
|
||||||
|
if (!value.present()) {
|
||||||
|
return Optional<size_t>();
|
||||||
|
}
|
||||||
|
return BinaryReader::fromStringRef<size_t>(value.get(), IncludeVersion());
|
||||||
|
}
|
||||||
|
|
||||||
|
ACTOR static Future<Optional<size_t>> getLocked(ConfigNodeImpl* self) {
|
||||||
|
Optional<Value> value = wait(self->kvStore->readValue(lockedKey));
|
||||||
|
if (!value.present()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return BinaryReader::fromStringRef<Optional<size_t>>(value.get(), IncludeVersion());
|
||||||
|
}
|
||||||
|
|
||||||
ACTOR static Future<ConfigGeneration> getGeneration(ConfigNodeImpl* self) {
|
ACTOR static Future<ConfigGeneration> getGeneration(ConfigNodeImpl* self) {
|
||||||
state ConfigGeneration generation;
|
state ConfigGeneration generation;
|
||||||
Optional<Value> value = wait(self->kvStore->readValue(currentGenerationKey));
|
Optional<Value> value = wait(self->kvStore->readValue(currentGenerationKey));
|
||||||
|
@ -216,6 +235,7 @@ class ConfigNodeImpl {
|
||||||
wait(getAnnotations(self, req.lastSeenVersion + 1, committedVersion));
|
wait(getAnnotations(self, req.lastSeenVersion + 1, committedVersion));
|
||||||
TraceEvent(SevDebug, "ConfigNodeSendingChanges", self->id)
|
TraceEvent(SevDebug, "ConfigNodeSendingChanges", self->id)
|
||||||
.detail("ReqLastSeenVersion", req.lastSeenVersion)
|
.detail("ReqLastSeenVersion", req.lastSeenVersion)
|
||||||
|
.detail("ReqMostRecentVersion", req.mostRecentVersion)
|
||||||
.detail("CommittedVersion", committedVersion)
|
.detail("CommittedVersion", committedVersion)
|
||||||
.detail("NumMutations", versionedMutations.size())
|
.detail("NumMutations", versionedMutations.size())
|
||||||
.detail("NumCommits", versionedAnnotations.size());
|
.detail("NumCommits", versionedAnnotations.size());
|
||||||
|
@ -227,6 +247,12 @@ class ConfigNodeImpl {
|
||||||
// New transactions increment the database's current live version. This effectively serves as a lock, providing
|
// New transactions increment the database's current live version. This effectively serves as a lock, providing
|
||||||
// serializability
|
// serializability
|
||||||
ACTOR static Future<Void> getNewGeneration(ConfigNodeImpl* self, ConfigTransactionGetGenerationRequest req) {
|
ACTOR static Future<Void> getNewGeneration(ConfigNodeImpl* self, ConfigTransactionGetGenerationRequest req) {
|
||||||
|
state Optional<size_t> coordinatorsHash = wait(getCoordinatorsHash(self));
|
||||||
|
ASSERT(coordinatorsHash.present());
|
||||||
|
if (req.coordinatorsHash != coordinatorsHash.get()) {
|
||||||
|
req.reply.sendError(coordinators_changed());
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
state ConfigGeneration generation = wait(getGeneration(self));
|
state ConfigGeneration generation = wait(getGeneration(self));
|
||||||
++generation.liveVersion;
|
++generation.liveVersion;
|
||||||
if (req.lastSeenLiveVersion.present()) {
|
if (req.lastSeenLiveVersion.present()) {
|
||||||
|
@ -241,6 +267,18 @@ class ConfigNodeImpl {
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR static Future<Void> get(ConfigNodeImpl* self, ConfigTransactionGetRequest req) {
|
ACTOR static Future<Void> get(ConfigNodeImpl* self, ConfigTransactionGetRequest req) {
|
||||||
|
state Optional<size_t> locked = wait(getLocked(self));
|
||||||
|
if (locked.present()) {
|
||||||
|
CODE_PROBE(true, "attempting to read from a locked ConfigNode");
|
||||||
|
req.reply.sendError(coordinators_changed());
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
state Optional<size_t> coordinatorsHash = wait(getCoordinatorsHash(self));
|
||||||
|
ASSERT(coordinatorsHash.present());
|
||||||
|
if (req.coordinatorsHash != coordinatorsHash.get()) {
|
||||||
|
req.reply.sendError(coordinators_changed());
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
ConfigGeneration currentGeneration = wait(getGeneration(self));
|
ConfigGeneration currentGeneration = wait(getGeneration(self));
|
||||||
if (req.generation != currentGeneration) {
|
if (req.generation != currentGeneration) {
|
||||||
// TODO: Also send information about highest seen version
|
// TODO: Also send information about highest seen version
|
||||||
|
@ -273,6 +311,13 @@ class ConfigNodeImpl {
|
||||||
// TODO: Currently it is possible that extra configuration classes may be returned, we
|
// TODO: Currently it is possible that extra configuration classes may be returned, we
|
||||||
// may want to fix this to clean up the contract
|
// may want to fix this to clean up the contract
|
||||||
ACTOR static Future<Void> getConfigClasses(ConfigNodeImpl* self, ConfigTransactionGetConfigClassesRequest req) {
|
ACTOR static Future<Void> getConfigClasses(ConfigNodeImpl* self, ConfigTransactionGetConfigClassesRequest req) {
|
||||||
|
state Optional<size_t> locked = wait(getLocked(self));
|
||||||
|
if (locked.present()) {
|
||||||
|
CODE_PROBE(true, "attempting to read config classes from locked ConfigNode");
|
||||||
|
req.reply.sendError(coordinators_changed());
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
||||||
ConfigGeneration currentGeneration = wait(getGeneration(self));
|
ConfigGeneration currentGeneration = wait(getGeneration(self));
|
||||||
if (req.generation != currentGeneration) {
|
if (req.generation != currentGeneration) {
|
||||||
req.reply.sendError(transaction_too_old());
|
req.reply.sendError(transaction_too_old());
|
||||||
|
@ -306,6 +351,13 @@ class ConfigNodeImpl {
|
||||||
|
|
||||||
// Retrieve all knobs explicitly defined for the specified configuration class
|
// Retrieve all knobs explicitly defined for the specified configuration class
|
||||||
ACTOR static Future<Void> getKnobs(ConfigNodeImpl* self, ConfigTransactionGetKnobsRequest req) {
|
ACTOR static Future<Void> getKnobs(ConfigNodeImpl* self, ConfigTransactionGetKnobsRequest req) {
|
||||||
|
state Optional<size_t> locked = wait(getLocked(self));
|
||||||
|
if (locked.present()) {
|
||||||
|
CODE_PROBE(true, "attempting to read knobs from locked ConfigNode");
|
||||||
|
req.reply.sendError(coordinators_changed());
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
||||||
ConfigGeneration currentGeneration = wait(getGeneration(self));
|
ConfigGeneration currentGeneration = wait(getGeneration(self));
|
||||||
if (req.generation != currentGeneration) {
|
if (req.generation != currentGeneration) {
|
||||||
req.reply.sendError(transaction_too_old());
|
req.reply.sendError(transaction_too_old());
|
||||||
|
@ -383,6 +435,19 @@ class ConfigNodeImpl {
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR static Future<Void> commit(ConfigNodeImpl* self, ConfigTransactionCommitRequest req) {
|
ACTOR static Future<Void> commit(ConfigNodeImpl* self, ConfigTransactionCommitRequest req) {
|
||||||
|
state Optional<size_t> locked = wait(getLocked(self));
|
||||||
|
if (locked.present()) {
|
||||||
|
CODE_PROBE(true, "attempting to write to locked ConfigNode");
|
||||||
|
req.reply.sendError(coordinators_changed());
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
state Optional<size_t> coordinatorsHash = wait(getCoordinatorsHash(self));
|
||||||
|
ASSERT(coordinatorsHash.present());
|
||||||
|
if (req.coordinatorsHash != coordinatorsHash.get()) {
|
||||||
|
req.reply.sendError(coordinators_changed());
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
||||||
ConfigGeneration currentGeneration = wait(getGeneration(self));
|
ConfigGeneration currentGeneration = wait(getGeneration(self));
|
||||||
if (req.generation.committedVersion != currentGeneration.committedVersion) {
|
if (req.generation.committedVersion != currentGeneration.committedVersion) {
|
||||||
++self->failedCommits;
|
++self->failedCommits;
|
||||||
|
@ -454,7 +519,7 @@ class ConfigNodeImpl {
|
||||||
// However, commit annotations for compacted mutations are lost
|
// However, commit annotations for compacted mutations are lost
|
||||||
ACTOR static Future<Void> compact(ConfigNodeImpl* self, ConfigFollowerCompactRequest req) {
|
ACTOR static Future<Void> compact(ConfigNodeImpl* self, ConfigFollowerCompactRequest req) {
|
||||||
state Version lastCompactedVersion = wait(getLastCompactedVersion(self));
|
state Version lastCompactedVersion = wait(getLastCompactedVersion(self));
|
||||||
TraceEvent(SevDebug, "ConfigNodeCompacting", self->id)
|
TraceEvent(SevInfo, "ConfigNodeCompacting", self->id)
|
||||||
.detail("Version", req.version)
|
.detail("Version", req.version)
|
||||||
.detail("LastCompacted", lastCompactedVersion);
|
.detail("LastCompacted", lastCompactedVersion);
|
||||||
if (req.version <= lastCompactedVersion) {
|
if (req.version <= lastCompactedVersion) {
|
||||||
|
@ -506,11 +571,13 @@ class ConfigNodeImpl {
|
||||||
req.reply.sendError(transaction_too_old());
|
req.reply.sendError(transaction_too_old());
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
TraceEvent("ConfigNodeRollforward")
|
TraceEvent("ConfigNodeRollforward", self->id)
|
||||||
.detail("RollbackTo", req.rollback)
|
.detail("RollbackTo", req.rollback)
|
||||||
.detail("Target", req.target)
|
.detail("Target", req.target)
|
||||||
.detail("LastKnownCommitted", req.lastKnownCommitted)
|
.detail("LastKnownCommitted", req.lastKnownCommitted)
|
||||||
.detail("Committed", currentGeneration.committedVersion);
|
.detail("Committed", currentGeneration.committedVersion)
|
||||||
|
.detail("CurrentGeneration", currentGeneration.toString())
|
||||||
|
.detail("LastCompactedVersion", lastCompactedVersion);
|
||||||
// Rollback to prior known committed version to erase any commits not
|
// Rollback to prior known committed version to erase any commits not
|
||||||
// made on a quorum.
|
// made on a quorum.
|
||||||
if (req.rollback.present() && req.rollback.get() < currentGeneration.committedVersion) {
|
if (req.rollback.present() && req.rollback.get() < currentGeneration.committedVersion) {
|
||||||
|
@ -539,8 +606,11 @@ class ConfigNodeImpl {
|
||||||
}
|
}
|
||||||
// Now rollforward by applying all mutations between last known
|
// Now rollforward by applying all mutations between last known
|
||||||
// committed version and rollforward version.
|
// committed version and rollforward version.
|
||||||
ASSERT_GT(req.mutations[0].version, currentGeneration.committedVersion);
|
if (req.mutations.size() > 0) {
|
||||||
wait(commitMutations(self, req.mutations, req.annotations, req.target));
|
ASSERT_GT(req.mutations.size(), 0);
|
||||||
|
ASSERT_GT(req.mutations[0].version, currentGeneration.committedVersion);
|
||||||
|
wait(commitMutations(self, req.mutations, req.annotations, req.target));
|
||||||
|
}
|
||||||
|
|
||||||
req.reply.send(Void());
|
req.reply.send(Void());
|
||||||
return Void();
|
return Void();
|
||||||
|
@ -548,39 +618,20 @@ class ConfigNodeImpl {
|
||||||
|
|
||||||
ACTOR static Future<Void> getCommittedVersion(ConfigNodeImpl* self, ConfigFollowerGetCommittedVersionRequest req) {
|
ACTOR static Future<Void> getCommittedVersion(ConfigNodeImpl* self, ConfigFollowerGetCommittedVersionRequest req) {
|
||||||
state Version lastCompacted = wait(getLastCompactedVersion(self));
|
state Version lastCompacted = wait(getLastCompactedVersion(self));
|
||||||
ConfigGeneration generation = wait(getGeneration(self));
|
state ConfigGeneration generation = wait(getGeneration(self));
|
||||||
req.reply.send(ConfigFollowerGetCommittedVersionReply{ lastCompacted, generation.committedVersion });
|
bool isRegistered = wait(registered(self));
|
||||||
|
req.reply.send(ConfigFollowerGetCommittedVersionReply{
|
||||||
|
isRegistered, lastCompacted, generation.liveVersion, generation.committedVersion });
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR static Future<Void> serve(ConfigNodeImpl* self, ConfigFollowerInterface const* cfi) {
|
// Requires ConfigNodes to register with the ConfigBroadcaster before being
|
||||||
loop {
|
// allowed to respond to most requests. The ConfigBroadcaster will first
|
||||||
choose {
|
// ask the ConfigNode whether it is registered (kickstarted by the worker
|
||||||
when(ConfigFollowerGetSnapshotAndChangesRequest req =
|
// registering with the cluster controller). Then, the ConfigBroadcaster
|
||||||
waitNext(cfi->getSnapshotAndChanges.getFuture())) {
|
// will send the ConfigNode a ready message, containing a snapshot if the
|
||||||
++self->snapshotRequests;
|
// ConfigNode is a new coordinator and needs updated state, or empty
|
||||||
wait(getSnapshotAndChanges(self, req));
|
// otherwise.
|
||||||
}
|
|
||||||
when(ConfigFollowerGetChangesRequest req = waitNext(cfi->getChanges.getFuture())) {
|
|
||||||
wait(getChanges(self, req));
|
|
||||||
}
|
|
||||||
when(ConfigFollowerCompactRequest req = waitNext(cfi->compact.getFuture())) {
|
|
||||||
++self->compactRequests;
|
|
||||||
wait(compact(self, req));
|
|
||||||
}
|
|
||||||
when(ConfigFollowerRollforwardRequest req = waitNext(cfi->rollforward.getFuture())) {
|
|
||||||
++self->rollforwardRequests;
|
|
||||||
wait(rollforward(self, req));
|
|
||||||
}
|
|
||||||
when(ConfigFollowerGetCommittedVersionRequest req = waitNext(cfi->getCommittedVersion.getFuture())) {
|
|
||||||
++self->getCommittedVersionRequests;
|
|
||||||
wait(getCommittedVersion(self, req));
|
|
||||||
}
|
|
||||||
when(wait(self->kvStore->getError())) { ASSERT(false); }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ACTOR static Future<Void> serve(ConfigNodeImpl* self, ConfigBroadcastInterface const* cbi, bool infinite) {
|
ACTOR static Future<Void> serve(ConfigNodeImpl* self, ConfigBroadcastInterface const* cbi, bool infinite) {
|
||||||
loop {
|
loop {
|
||||||
// Normally, the ConfigBroadcaster will first send a
|
// Normally, the ConfigBroadcaster will first send a
|
||||||
|
@ -593,10 +644,61 @@ class ConfigNodeImpl {
|
||||||
// ConfigNode.
|
// ConfigNode.
|
||||||
choose {
|
choose {
|
||||||
when(state ConfigBroadcastRegisteredRequest req = waitNext(cbi->registered.getFuture())) {
|
when(state ConfigBroadcastRegisteredRequest req = waitNext(cbi->registered.getFuture())) {
|
||||||
bool isRegistered = wait(registered(self));
|
state bool isRegistered = wait(registered(self));
|
||||||
req.reply.send(ConfigBroadcastRegisteredReply{ isRegistered });
|
ConfigGeneration generation = wait(getGeneration(self));
|
||||||
|
TraceEvent("ConfigNodeSendingRegisteredReply", self->id)
|
||||||
|
.detail("Generation", generation.toString());
|
||||||
|
req.reply.send(ConfigBroadcastRegisteredReply{ isRegistered, generation.committedVersion });
|
||||||
}
|
}
|
||||||
when(ConfigBroadcastReadyRequest readyReq = waitNext(cbi->ready.getFuture())) {
|
when(state ConfigBroadcastReadyRequest readyReq = waitNext(cbi->ready.getFuture())) {
|
||||||
|
state Optional<size_t> locked = wait(getLocked(self));
|
||||||
|
|
||||||
|
// New ConfigNodes with no previous state should always
|
||||||
|
// apply snapshots from the ConfigBroadcaster. Otherwise,
|
||||||
|
// the ConfigNode must be part of a new generation to
|
||||||
|
// accept a snapshot. An existing ConfigNode that restarts
|
||||||
|
// shouldn't apply a snapshot and overwrite its state if
|
||||||
|
// the set of coordinators hasn't changed.
|
||||||
|
if ((!infinite && !locked.present()) ||
|
||||||
|
(locked.present() && locked.get() != readyReq.coordinatorsHash)) {
|
||||||
|
// Apply snapshot if necessary.
|
||||||
|
if (readyReq.snapshot.size() > 0) {
|
||||||
|
for (const auto& [configKey, knobValue] : readyReq.snapshot) {
|
||||||
|
TraceEvent("ConfigNodeSettingFromSnapshot", self->id)
|
||||||
|
.detail("ConfigClass", configKey.configClass)
|
||||||
|
.detail("KnobName", configKey.knobName)
|
||||||
|
.detail("Value", knobValue.toString())
|
||||||
|
.detail("Version", readyReq.snapshotVersion);
|
||||||
|
self->kvStore->set(KeyValueRef(
|
||||||
|
BinaryWriter::toValue(configKey, IncludeVersion()).withPrefix(kvKeys.begin),
|
||||||
|
ObjectWriter::toValue(knobValue, IncludeVersion())));
|
||||||
|
}
|
||||||
|
ConfigGeneration newGeneration = { readyReq.snapshotVersion, readyReq.liveVersion };
|
||||||
|
self->kvStore->set(KeyValueRef(currentGenerationKey,
|
||||||
|
BinaryWriter::toValue(newGeneration, IncludeVersion())));
|
||||||
|
// Clear out any mutations to the keys. If these
|
||||||
|
// aren't cleared, they will overwrite the
|
||||||
|
// snapshotted values when the knobs are read.
|
||||||
|
self->kvStore->clear(KeyRangeRef(versionedMutationKey(0, 0),
|
||||||
|
versionedMutationKey(readyReq.snapshotVersion + 1, 0)));
|
||||||
|
self->kvStore->clear(KeyRangeRef(versionedAnnotationKey(0),
|
||||||
|
versionedAnnotationKey(readyReq.snapshotVersion + 1)));
|
||||||
|
|
||||||
|
self->kvStore->set(
|
||||||
|
KeyValueRef(lastCompactedVersionKey,
|
||||||
|
BinaryWriter::toValue(readyReq.snapshotVersion, IncludeVersion())));
|
||||||
|
}
|
||||||
|
// Make sure freshly up to date ConfigNode isn't
|
||||||
|
// locked! This is possible if it was a coordinator in
|
||||||
|
// a previous generation.
|
||||||
|
self->kvStore->set(
|
||||||
|
KeyValueRef(lockedKey, BinaryWriter::toValue(Optional<size_t>(), IncludeVersion())));
|
||||||
|
}
|
||||||
|
self->kvStore->set(KeyValueRef(coordinatorsHashKey,
|
||||||
|
BinaryWriter::toValue(readyReq.coordinatorsHash, IncludeVersion())));
|
||||||
|
wait(self->kvStore->commit());
|
||||||
|
|
||||||
|
TraceEvent("ConfigNodeReady", self->id).detail("WasLocked", locked.present());
|
||||||
readyReq.reply.send(ConfigBroadcastReadyReply{});
|
readyReq.reply.send(ConfigBroadcastReadyReply{});
|
||||||
if (!infinite) {
|
if (!infinite) {
|
||||||
return Void();
|
return Void();
|
||||||
|
@ -606,17 +708,73 @@ class ConfigNodeImpl {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ACTOR static Future<Void> serveRegistered(ConfigNodeImpl* self, ConfigFollowerInterface const* cfi) {
|
||||||
|
loop {
|
||||||
|
choose {
|
||||||
|
when(ConfigFollowerCompactRequest req = waitNext(cfi->compact.getFuture())) {
|
||||||
|
++self->compactRequests;
|
||||||
|
wait(compact(self, req));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Many of the ConfigNode interfaces need to be served before the
|
||||||
|
// ConfigNode is officially registered with the ConfigBroadcaster. This is
|
||||||
|
// necessary due to edge cases around coordinator changes. For example, a
|
||||||
|
// ConfigNode that loses its coordinator status but then restarts before
|
||||||
|
// serving its snapshot to the new coordinators needs to be able to
|
||||||
|
// continue serving its snapshot interface when it restarts, even though it
|
||||||
|
// is no longer a coordinator.
|
||||||
|
ACTOR static Future<Void> serveUnregistered(ConfigNodeImpl* self, ConfigFollowerInterface const* cfi) {
|
||||||
|
loop {
|
||||||
|
choose {
|
||||||
|
when(ConfigFollowerGetSnapshotAndChangesRequest req =
|
||||||
|
waitNext(cfi->getSnapshotAndChanges.getFuture())) {
|
||||||
|
++self->snapshotRequests;
|
||||||
|
wait(getSnapshotAndChanges(self, req));
|
||||||
|
}
|
||||||
|
when(ConfigFollowerGetChangesRequest req = waitNext(cfi->getChanges.getFuture())) {
|
||||||
|
wait(getChanges(self, req));
|
||||||
|
}
|
||||||
|
when(ConfigFollowerRollforwardRequest req = waitNext(cfi->rollforward.getFuture())) {
|
||||||
|
++self->rollforwardRequests;
|
||||||
|
wait(rollforward(self, req));
|
||||||
|
}
|
||||||
|
when(ConfigFollowerGetCommittedVersionRequest req = waitNext(cfi->getCommittedVersion.getFuture())) {
|
||||||
|
++self->getCommittedVersionRequests;
|
||||||
|
wait(getCommittedVersion(self, req));
|
||||||
|
}
|
||||||
|
when(state ConfigFollowerLockRequest req = waitNext(cfi->lock.getFuture())) {
|
||||||
|
++self->lockRequests;
|
||||||
|
Optional<size_t> coordinatorsHash = wait(getCoordinatorsHash(self));
|
||||||
|
if (!coordinatorsHash.present() || coordinatorsHash.get() == req.coordinatorsHash) {
|
||||||
|
TraceEvent("ConfigNodeLocking", self->id).log();
|
||||||
|
self->kvStore->set(KeyValueRef(registeredKey, BinaryWriter::toValue(false, IncludeVersion())));
|
||||||
|
self->kvStore->set(KeyValueRef(
|
||||||
|
lockedKey,
|
||||||
|
BinaryWriter::toValue(Optional<size_t>(req.coordinatorsHash), IncludeVersion())));
|
||||||
|
wait(self->kvStore->commit());
|
||||||
|
}
|
||||||
|
req.reply.send(Void());
|
||||||
|
}
|
||||||
|
when(wait(self->kvStore->getError())) { ASSERT(false); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ACTOR static Future<Void> serve(ConfigNodeImpl* self,
|
ACTOR static Future<Void> serve(ConfigNodeImpl* self,
|
||||||
ConfigBroadcastInterface const* cbi,
|
ConfigBroadcastInterface const* cbi,
|
||||||
ConfigTransactionInterface const* cti,
|
ConfigTransactionInterface const* cti,
|
||||||
ConfigFollowerInterface const* cfi) {
|
ConfigFollowerInterface const* cfi) {
|
||||||
|
state Future<Void> serveUnregisteredFuture = serveUnregistered(self, cfi);
|
||||||
wait(serve(self, cbi, false));
|
wait(serve(self, cbi, false));
|
||||||
|
|
||||||
self->kvStore->set(KeyValueRef(registeredKey, BinaryWriter::toValue(true, IncludeVersion())));
|
self->kvStore->set(KeyValueRef(registeredKey, BinaryWriter::toValue(true, IncludeVersion())));
|
||||||
wait(self->kvStore->commit());
|
wait(self->kvStore->commit());
|
||||||
|
|
||||||
// Shouldn't return (coordinationServer will throw an error if it does).
|
// Shouldn't return (coordinationServer will throw an error if it does).
|
||||||
wait(serve(self, cbi, true) || serve(self, cti) || serve(self, cfi));
|
wait(serve(self, cbi, true) || serve(self, cti) || serveRegistered(self, cfi) || serveUnregisteredFuture);
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -631,11 +789,12 @@ public:
|
||||||
compactRequests("CompactRequests", cc), rollbackRequests("RollbackRequests", cc),
|
compactRequests("CompactRequests", cc), rollbackRequests("RollbackRequests", cc),
|
||||||
rollforwardRequests("RollforwardRequests", cc), successfulChangeRequests("SuccessfulChangeRequests", cc),
|
rollforwardRequests("RollforwardRequests", cc), successfulChangeRequests("SuccessfulChangeRequests", cc),
|
||||||
failedChangeRequests("FailedChangeRequests", cc), snapshotRequests("SnapshotRequests", cc),
|
failedChangeRequests("FailedChangeRequests", cc), snapshotRequests("SnapshotRequests", cc),
|
||||||
getCommittedVersionRequests("GetCommittedVersionRequests", cc), successfulCommits("SuccessfulCommits", cc),
|
getCommittedVersionRequests("GetCommittedVersionRequests", cc), lockRequests("LockRequests", cc),
|
||||||
failedCommits("FailedCommits", cc), setMutations("SetMutations", cc), clearMutations("ClearMutations", cc),
|
successfulCommits("SuccessfulCommits", cc), failedCommits("FailedCommits", cc),
|
||||||
|
setMutations("SetMutations", cc), clearMutations("ClearMutations", cc),
|
||||||
getValueRequests("GetValueRequests", cc), getGenerationRequests("GetGenerationRequests", cc) {
|
getValueRequests("GetValueRequests", cc), getGenerationRequests("GetGenerationRequests", cc) {
|
||||||
logger = traceCounters("ConfigNodeMetrics", id, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, &cc, "ConfigNode");
|
logger = traceCounters("ConfigNodeMetrics", id, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, &cc, "ConfigNode");
|
||||||
TraceEvent(SevDebug, "StartingConfigNode", id).detail("KVStoreAlreadyExists", kvStore.exists());
|
TraceEvent(SevInfo, "StartingConfigNode", id).detail("KVStoreAlreadyExists", kvStore.exists());
|
||||||
}
|
}
|
||||||
|
|
||||||
Future<Void> serve(ConfigBroadcastInterface const& cbi,
|
Future<Void> serve(ConfigBroadcastInterface const& cbi,
|
||||||
|
@ -646,7 +805,9 @@ public:
|
||||||
|
|
||||||
Future<Void> serve(ConfigTransactionInterface const& cti) { return serve(this, &cti); }
|
Future<Void> serve(ConfigTransactionInterface const& cti) { return serve(this, &cti); }
|
||||||
|
|
||||||
Future<Void> serve(ConfigFollowerInterface const& cfi) { return serve(this, &cfi); }
|
Future<Void> serve(ConfigFollowerInterface const& cfi) {
|
||||||
|
return serveUnregistered(this, &cfi) && serveRegistered(this, &cfi);
|
||||||
|
}
|
||||||
|
|
||||||
void close() { kvStore.close(); }
|
void close() { kvStore.close(); }
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "fdbclient/ClusterConnectionMemoryRecord.h"
|
#include "fdbclient/ClusterConnectionMemoryRecord.h"
|
||||||
|
#include "fdbserver/ConfigBroadcaster.h"
|
||||||
#include "fdbserver/CoordinatedState.h"
|
#include "fdbserver/CoordinatedState.h"
|
||||||
#include "fdbserver/CoordinationInterface.h"
|
#include "fdbserver/CoordinationInterface.h"
|
||||||
#include "fdbserver/Knobs.h"
|
#include "fdbserver/Knobs.h"
|
||||||
|
@ -343,10 +344,17 @@ struct MovableCoordinatedStateImpl {
|
||||||
if (BUGGIFY)
|
if (BUGGIFY)
|
||||||
wait(delay(5));
|
wait(delay(5));
|
||||||
|
|
||||||
|
if (BUGGIFY_WITH_PROB(0.001)) {
|
||||||
|
// Simulate random cluster controller death during coordinator
|
||||||
|
// change.
|
||||||
|
throw actor_cancelled();
|
||||||
|
}
|
||||||
|
|
||||||
// SOMEDAY: If we are worried about someone magically getting the new cluster ID and interfering, do a second
|
// SOMEDAY: If we are worried about someone magically getting the new cluster ID and interfering, do a second
|
||||||
// cs.setExclusive( encode( ReallyTo, ... ) )
|
// cs.setExclusive( encode( ReallyTo, ... ) )
|
||||||
TraceEvent("ChangingQuorum").detail("ConnectionString", nc.toString());
|
TraceEvent("ChangingQuorum").detail("ConnectionString", nc.toString());
|
||||||
wait(changeLeaderCoordinators(self->coordinators, StringRef(nc.toString())));
|
wait(ConfigBroadcaster::lockConfigNodes(self->coordinators) &&
|
||||||
|
changeLeaderCoordinators(self->coordinators, StringRef(nc.toString())));
|
||||||
TraceEvent("ChangedQuorum").detail("ConnectionString", nc.toString());
|
TraceEvent("ChangedQuorum").detail("ConnectionString", nc.toString());
|
||||||
throw coordinators_changed();
|
throw coordinators_changed();
|
||||||
}
|
}
|
||||||
|
|
|
@ -725,9 +725,9 @@ ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf,
|
||||||
}
|
}
|
||||||
when(ForwardRequest req = waitNext(interf.forward.getFuture())) {
|
when(ForwardRequest req = waitNext(interf.forward.getFuture())) {
|
||||||
Optional<LeaderInfo> forward = regs.getForward(req.key);
|
Optional<LeaderInfo> forward = regs.getForward(req.key);
|
||||||
if (forward.present())
|
if (forward.present()) {
|
||||||
req.reply.send(Void());
|
req.reply.send(Void());
|
||||||
else {
|
} else {
|
||||||
StringRef clusterName = ccr->getConnectionString().clusterKeyName();
|
StringRef clusterName = ccr->getConnectionString().clusterKeyName();
|
||||||
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT && getClusterDescriptor(req.key).compare(clusterName)) {
|
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT && getClusterDescriptor(req.key).compare(clusterName)) {
|
||||||
TraceEvent(SevWarn, "CCRMismatch")
|
TraceEvent(SevWarn, "CCRMismatch")
|
||||||
|
@ -761,12 +761,14 @@ ACTOR Future<Void> coordinationServer(std::string dataFolder,
|
||||||
state Future<Void> configDatabaseServer = Never();
|
state Future<Void> configDatabaseServer = Never();
|
||||||
TraceEvent("CoordinationServer", myID)
|
TraceEvent("CoordinationServer", myID)
|
||||||
.detail("MyInterfaceAddr", myInterface.read.getEndpoint().getPrimaryAddress())
|
.detail("MyInterfaceAddr", myInterface.read.getEndpoint().getPrimaryAddress())
|
||||||
.detail("Folder", dataFolder);
|
.detail("Folder", dataFolder)
|
||||||
|
.detail("ConfigNodeValid", configNode.isValid());
|
||||||
|
|
||||||
if (configNode.isValid()) {
|
if (configNode.isValid()) {
|
||||||
configTransactionInterface.setupWellKnownEndpoints();
|
configTransactionInterface.setupWellKnownEndpoints();
|
||||||
configFollowerInterface.setupWellKnownEndpoints();
|
configFollowerInterface.setupWellKnownEndpoints();
|
||||||
configDatabaseServer = configNode->serve(cbi, configTransactionInterface, configFollowerInterface);
|
configDatabaseServer =
|
||||||
|
brokenPromiseToNever(configNode->serve(cbi, configTransactionInterface, configFollowerInterface));
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -36,6 +36,8 @@ std::unique_ptr<IConfigConsumer> IConfigConsumer::createSimple(ServerCoordinator
|
||||||
|
|
||||||
std::unique_ptr<IConfigConsumer> IConfigConsumer::createPaxos(ServerCoordinators const& coordinators,
|
std::unique_ptr<IConfigConsumer> IConfigConsumer::createPaxos(ServerCoordinators const& coordinators,
|
||||||
double pollingInterval,
|
double pollingInterval,
|
||||||
Optional<double> compactionInterval) {
|
Optional<double> compactionInterval,
|
||||||
return std::make_unique<PaxosConfigConsumer>(coordinators, pollingInterval, compactionInterval);
|
bool readPreviousCoordinators) {
|
||||||
|
return std::make_unique<PaxosConfigConsumer>(
|
||||||
|
coordinators, pollingInterval, compactionInterval, readPreviousCoordinators);
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,10 +53,18 @@ class GetCommittedVersionQuorum {
|
||||||
Version largestCompactedResponse{ 0 };
|
Version largestCompactedResponse{ 0 };
|
||||||
// Last durably committed version.
|
// Last durably committed version.
|
||||||
Version lastSeenVersion;
|
Version lastSeenVersion;
|
||||||
|
// Largest compacted version on the existing ConfigNodes.
|
||||||
|
Version largestCompacted;
|
||||||
size_t totalRepliesReceived{ 0 };
|
size_t totalRepliesReceived{ 0 };
|
||||||
size_t maxAgreement{ 0 };
|
size_t maxAgreement{ 0 };
|
||||||
|
// Stores the largest live version out of all the responses.
|
||||||
|
Version largestLive{ 0 };
|
||||||
// Stores the largest committed version out of all responses.
|
// Stores the largest committed version out of all responses.
|
||||||
Version largestCommitted{ 0 };
|
Version largestCommitted{ 0 };
|
||||||
|
bool allowSpecialCaseRollforward_;
|
||||||
|
// True if a quorum has zero as their committed version. See explanation
|
||||||
|
// comment below.
|
||||||
|
bool specialZeroQuorum{ false };
|
||||||
|
|
||||||
// Sends rollback/rollforward messages to any nodes that are not up to date
|
// Sends rollback/rollforward messages to any nodes that are not up to date
|
||||||
// with the latest committed version as determined by the quorum. Should
|
// with the latest committed version as determined by the quorum. Should
|
||||||
|
@ -67,9 +75,18 @@ class GetCommittedVersionQuorum {
|
||||||
Version lastCompacted,
|
Version lastCompacted,
|
||||||
ConfigFollowerInterface cfi) {
|
ConfigFollowerInterface cfi) {
|
||||||
state Version target = quorumVersion.lastCommitted;
|
state Version target = quorumVersion.lastCommitted;
|
||||||
|
// TraceEvent("ConsumerUpdateNodeStart")
|
||||||
|
// .detail("NodeAddress", cfi.address())
|
||||||
|
// .detail("Target", target)
|
||||||
|
// .detail("NodeVersionLastCommitted", nodeVersion.lastCommitted)
|
||||||
|
// .detail("NodeVersionSecondToLastCommitted", nodeVersion.secondToLastCommitted)
|
||||||
|
// .detail("QuorumVersionLastCommitted", quorumVersion.lastCommitted)
|
||||||
|
// .detail("QuorumVersionSecondToLastCommitted", quorumVersion.secondToLastCommitted)
|
||||||
|
// .detail("LargestCompacted", self->largestCompacted);
|
||||||
if (nodeVersion.lastCommitted == target) {
|
if (nodeVersion.lastCommitted == target) {
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nodeVersion.lastCommitted < target) {
|
if (nodeVersion.lastCommitted < target) {
|
||||||
state Optional<Version> rollback;
|
state Optional<Version> rollback;
|
||||||
if (nodeVersion.lastCommitted > quorumVersion.secondToLastCommitted) {
|
if (nodeVersion.lastCommitted > quorumVersion.secondToLastCommitted) {
|
||||||
|
@ -83,7 +100,7 @@ class GetCommittedVersionQuorum {
|
||||||
// On the other hand, if the node is on an older committed
|
// On the other hand, if the node is on an older committed
|
||||||
// version, it's possible the version it is on was never made
|
// version, it's possible the version it is on was never made
|
||||||
// durable. To be safe, roll it back by one version.
|
// durable. To be safe, roll it back by one version.
|
||||||
rollback = std::max(nodeVersion.lastCommitted - 1, Version{ 0 });
|
rollback = std::max(nodeVersion.lastCommitted - 1, self->largestCompacted);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rollback.present()) {
|
if (rollback.present()) {
|
||||||
|
@ -118,6 +135,15 @@ class GetCommittedVersionQuorum {
|
||||||
ConfigFollowerGetChangesRequest{ lastSeenVersion, target }),
|
ConfigFollowerGetChangesRequest{ lastSeenVersion, target }),
|
||||||
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
|
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
|
||||||
|
|
||||||
|
// TraceEvent("ConsumerUpdateNodeSendingRollforward")
|
||||||
|
// .detail("NodeAddress", cfi.address())
|
||||||
|
// .detail("RollbackTo", rollback)
|
||||||
|
// .detail("LastKnownCommitted", nodeVersion.lastCommitted)
|
||||||
|
// .detail("Target", target)
|
||||||
|
// .detail("ChangesSize", reply.changes.size())
|
||||||
|
// .detail("AnnotationsSize", reply.annotations.size())
|
||||||
|
// .detail("LargestCompacted", self->largestCompactedResponse)
|
||||||
|
// .detail("SpecialZeroQuorum", self->specialZeroQuorum);
|
||||||
if (cfi.hostname.present()) {
|
if (cfi.hostname.present()) {
|
||||||
wait(timeoutError(
|
wait(timeoutError(
|
||||||
retryGetReplyFromHostname(
|
retryGetReplyFromHostname(
|
||||||
|
@ -139,7 +165,7 @@ class GetCommittedVersionQuorum {
|
||||||
// one of these errors in response to a get changes or
|
// one of these errors in response to a get changes or
|
||||||
// rollforward request. The retry loop should handle this
|
// rollforward request. The retry loop should handle this
|
||||||
// case.
|
// case.
|
||||||
TraceEvent(SevInfo, "ConfigNodeRollforwardError").error(e);
|
TraceEvent(SevInfo, "ConsumerConfigNodeRollforwardError").error(e);
|
||||||
} else {
|
} else {
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
|
@ -163,9 +189,19 @@ class GetCommittedVersionQuorum {
|
||||||
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
|
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!reply.registered) {
|
||||||
|
// ConfigNodes serve their GetCommittedVersion interface before
|
||||||
|
// being registered to allow them to be rolled forward.
|
||||||
|
// However, their responses should not count towards the
|
||||||
|
// quorum.
|
||||||
|
throw future_version();
|
||||||
|
}
|
||||||
|
|
||||||
++self->totalRepliesReceived;
|
++self->totalRepliesReceived;
|
||||||
self->largestCompactedResponse = std::max(self->largestCompactedResponse, reply.lastCompacted);
|
self->largestCompactedResponse = std::max(self->largestCompactedResponse, reply.lastCompacted);
|
||||||
state Version lastCompacted = reply.lastCompacted;
|
state Version lastCompacted = reply.lastCompacted;
|
||||||
|
self->committed[cfi.address()] = reply.lastCommitted;
|
||||||
|
self->largestLive = std::max(self->largestLive, reply.lastLive);
|
||||||
self->largestCommitted = std::max(self->largestCommitted, reply.lastCommitted);
|
self->largestCommitted = std::max(self->largestCommitted, reply.lastCommitted);
|
||||||
state CommittedVersions committedVersions = CommittedVersions{ self->lastSeenVersion, reply.lastCommitted };
|
state CommittedVersions committedVersions = CommittedVersions{ self->lastSeenVersion, reply.lastCommitted };
|
||||||
if (self->priorVersions.find(committedVersions.lastCommitted) == self->priorVersions.end()) {
|
if (self->priorVersions.find(committedVersions.lastCommitted) == self->priorVersions.end()) {
|
||||||
|
@ -174,7 +210,59 @@ class GetCommittedVersionQuorum {
|
||||||
auto& nodes = self->replies[committedVersions.lastCommitted];
|
auto& nodes = self->replies[committedVersions.lastCommitted];
|
||||||
nodes.push_back(cfi);
|
nodes.push_back(cfi);
|
||||||
self->maxAgreement = std::max(nodes.size(), self->maxAgreement);
|
self->maxAgreement = std::max(nodes.size(), self->maxAgreement);
|
||||||
|
// TraceEvent("ConsumerGetCommittedVersionReply")
|
||||||
|
// .detail("From", cfi.address())
|
||||||
|
// .detail("LastCompactedVersion", lastCompacted)
|
||||||
|
// .detail("LastCommittedVersion", reply.lastCommitted)
|
||||||
|
// .detail("LastSeenVersion", self->lastSeenVersion)
|
||||||
|
// .detail("Replies", self->totalRepliesReceived)
|
||||||
|
// .detail("RepliesMatchingVersion", nodes.size())
|
||||||
|
// .detail("Coordinators", self->cfis.size())
|
||||||
|
// .detail("AllowSpecialCaseRollforward", self->allowSpecialCaseRollforward_);
|
||||||
if (nodes.size() >= self->cfis.size() / 2 + 1) {
|
if (nodes.size() >= self->cfis.size() / 2 + 1) {
|
||||||
|
// A quorum at version 0 should use any higher committed
|
||||||
|
// version seen instead of 0. Imagine the following scenario
|
||||||
|
// with three coordinators:
|
||||||
|
//
|
||||||
|
// t0 t1 t2 t3
|
||||||
|
// A 1 1 | 1
|
||||||
|
// B 1 dies | 0
|
||||||
|
// C 0 0 | 0
|
||||||
|
//
|
||||||
|
// At t0, a value at version 1 is committed to A and B. At t1,
|
||||||
|
// B dies, and now the value only exists on A. At t2, a change
|
||||||
|
// coordinators command is executed by a client, causing a
|
||||||
|
// recovery. When the ConfigBroadcaster comes online and
|
||||||
|
// attempts to read the state of the previous coordinators (at
|
||||||
|
// time t3) so it can transfer it to the new coordinators, 2/3
|
||||||
|
// ConfigNodes are unregistered and only know about version 0.
|
||||||
|
// Quorum logic dictates the committed version is, thus,
|
||||||
|
// version 0. But we know a majority committed version 1. This
|
||||||
|
// is a special case error where a ConfigNode losing data is
|
||||||
|
// immediately followed by a coordinator change and recovery,
|
||||||
|
// and 0 is a special case. Imagine the following if C instead
|
||||||
|
// has had some values committed:
|
||||||
|
//
|
||||||
|
// t0 t1 t2 t3 t4
|
||||||
|
// A 1 2 2 | 2
|
||||||
|
// B 1 2 dies | 0
|
||||||
|
// C 1 1 1 | 1
|
||||||
|
//
|
||||||
|
// In this case, there is no quorum, and so all nodes would
|
||||||
|
// (correctly) be rolled forward to version 2. Since a node
|
||||||
|
// losing data is equivalent to saying it has a committed
|
||||||
|
// version of 0, we must treat a quorum of nodes at version 0
|
||||||
|
// as a special case, and instead use the largest committed
|
||||||
|
// version we've seen as the quorum version. This does not
|
||||||
|
// affect correctness because version 0 means nothing was
|
||||||
|
// committed, so there shouldn't be an issue rolling those
|
||||||
|
// nodes forward.
|
||||||
|
if (self->allowSpecialCaseRollforward_ && committedVersions.lastCommitted == 0 &&
|
||||||
|
self->largestCommitted > 0) {
|
||||||
|
self->specialZeroQuorum = true;
|
||||||
|
committedVersions = CommittedVersions{ 0, self->largestCommitted };
|
||||||
|
}
|
||||||
|
|
||||||
// A quorum of ConfigNodes agree on the latest committed version.
|
// A quorum of ConfigNodes agree on the latest committed version.
|
||||||
if (self->quorumVersion.canBeSet()) {
|
if (self->quorumVersion.canBeSet()) {
|
||||||
self->quorumVersion.send(QuorumVersion{ committedVersions, true });
|
self->quorumVersion.send(QuorumVersion{ committedVersions, true });
|
||||||
|
@ -186,7 +274,8 @@ class GetCommittedVersionQuorum {
|
||||||
// but the node we just got a reply from is not one of them. We may
|
// but the node we just got a reply from is not one of them. We may
|
||||||
// need to roll it forward or back.
|
// need to roll it forward or back.
|
||||||
QuorumVersion quorumVersion = wait(self->quorumVersion.getFuture());
|
QuorumVersion quorumVersion = wait(self->quorumVersion.getFuture());
|
||||||
ASSERT(committedVersions.lastCommitted != quorumVersion.versions.lastCommitted);
|
ASSERT(committedVersions.lastCommitted != quorumVersion.versions.lastCommitted ||
|
||||||
|
self->specialZeroQuorum);
|
||||||
wait(self->updateNode(self, committedVersions, quorumVersion.versions, lastCompacted, cfi));
|
wait(self->updateNode(self, committedVersions, quorumVersion.versions, lastCompacted, cfi));
|
||||||
} else if (self->maxAgreement + (self->cfis.size() - self->totalRepliesReceived) <
|
} else if (self->maxAgreement + (self->cfis.size() - self->totalRepliesReceived) <
|
||||||
(self->cfis.size() / 2 + 1)) {
|
(self->cfis.size() / 2 + 1)) {
|
||||||
|
@ -213,13 +302,18 @@ class GetCommittedVersionQuorum {
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
// Count a timeout as a reply.
|
// Count a timeout as a reply.
|
||||||
++self->totalRepliesReceived;
|
++self->totalRepliesReceived;
|
||||||
|
// TraceEvent("ConsumerGetCommittedVersionError").error(e)
|
||||||
|
// .detail("From", cfi.address())
|
||||||
|
// .detail("Replies", self->totalRepliesReceived)
|
||||||
|
// .detail("Coordinators", self->cfis.size());
|
||||||
if (e.code() == error_code_version_already_compacted) {
|
if (e.code() == error_code_version_already_compacted) {
|
||||||
if (self->quorumVersion.canBeSet()) {
|
if (self->quorumVersion.canBeSet()) {
|
||||||
// Calling sendError could delete self
|
// Calling sendError could delete self
|
||||||
auto local = self->quorumVersion;
|
auto local = self->quorumVersion;
|
||||||
local.sendError(e);
|
local.sendError(e);
|
||||||
}
|
}
|
||||||
} else if (e.code() != error_code_timed_out && e.code() != error_code_broken_promise) {
|
} else if (e.code() != error_code_timed_out && e.code() != error_code_future_version &&
|
||||||
|
e.code() != error_code_broken_promise) {
|
||||||
if (self->quorumVersion.canBeSet()) {
|
if (self->quorumVersion.canBeSet()) {
|
||||||
// Calling sendError could delete self
|
// Calling sendError could delete self
|
||||||
auto local = self->quorumVersion;
|
auto local = self->quorumVersion;
|
||||||
|
@ -231,6 +325,7 @@ class GetCommittedVersionQuorum {
|
||||||
std::accumulate(self->replies.begin(), self->replies.end(), 0, [](int value, auto const& p) {
|
std::accumulate(self->replies.begin(), self->replies.end(), 0, [](int value, auto const& p) {
|
||||||
return value + p.second.size();
|
return value + p.second.size();
|
||||||
});
|
});
|
||||||
|
|
||||||
if (nonTimeoutReplies >= self->cfis.size() / 2 + 1) {
|
if (nonTimeoutReplies >= self->cfis.size() / 2 + 1) {
|
||||||
// Make sure to trigger the quorumVersion if a timeout
|
// Make sure to trigger the quorumVersion if a timeout
|
||||||
// occurred, a quorum disagree on the committed version,
|
// occurred, a quorum disagree on the committed version,
|
||||||
|
@ -239,6 +334,14 @@ class GetCommittedVersionQuorum {
|
||||||
// back the largest committed version seen.
|
// back the largest committed version seen.
|
||||||
self->quorumVersion.send(
|
self->quorumVersion.send(
|
||||||
QuorumVersion{ CommittedVersions{ self->lastSeenVersion, self->largestCommitted }, false });
|
QuorumVersion{ CommittedVersions{ self->lastSeenVersion, self->largestCommitted }, false });
|
||||||
|
|
||||||
|
if (e.code() == error_code_future_version) {
|
||||||
|
wait(self->updateNode(self,
|
||||||
|
CommittedVersions{ self->lastSeenVersion, self->largestCommitted },
|
||||||
|
self->quorumVersion.getFuture().get().versions,
|
||||||
|
self->largestCompactedResponse,
|
||||||
|
cfi));
|
||||||
|
}
|
||||||
} else if (!self->quorumVersion.isSet()) {
|
} else if (!self->quorumVersion.isSet()) {
|
||||||
// Otherwise, if a quorum agree on the committed version,
|
// Otherwise, if a quorum agree on the committed version,
|
||||||
// some other occurred. Notify the caller of it.
|
// some other occurred. Notify the caller of it.
|
||||||
|
@ -253,8 +356,10 @@ class GetCommittedVersionQuorum {
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit GetCommittedVersionQuorum(std::vector<ConfigFollowerInterface> const& cfis, Version lastSeenVersion)
|
explicit GetCommittedVersionQuorum(std::vector<ConfigFollowerInterface> const& cfis,
|
||||||
: cfis(cfis), lastSeenVersion(lastSeenVersion) {}
|
Version lastSeenVersion,
|
||||||
|
Version largestCompacted)
|
||||||
|
: cfis(cfis), lastSeenVersion(lastSeenVersion), largestCompacted(largestCompacted) {}
|
||||||
Future<QuorumVersion> getCommittedVersion() {
|
Future<QuorumVersion> getCommittedVersion() {
|
||||||
ASSERT(!isReady()); // ensures this function is not accidentally called before resetting state
|
ASSERT(!isReady()); // ensures this function is not accidentally called before resetting state
|
||||||
for (const auto& cfi : cfis) {
|
for (const auto& cfi : cfis) {
|
||||||
|
@ -273,6 +378,7 @@ public:
|
||||||
ASSERT(isReady());
|
ASSERT(isReady());
|
||||||
return replies.at(quorumVersion.getFuture().get().versions.lastCommitted);
|
return replies.at(quorumVersion.getFuture().get().versions.lastCommitted);
|
||||||
}
|
}
|
||||||
|
Version getLargestLive() const { return largestLive; }
|
||||||
Version getSmallestCommitted() const {
|
Version getSmallestCommitted() const {
|
||||||
if (committed.size() == cfis.size()) {
|
if (committed.size() == cfis.size()) {
|
||||||
Version smallest = MAX_VERSION;
|
Version smallest = MAX_VERSION;
|
||||||
|
@ -283,6 +389,8 @@ public:
|
||||||
}
|
}
|
||||||
return ::invalidVersion;
|
return ::invalidVersion;
|
||||||
}
|
}
|
||||||
|
void allowSpecialCaseRollforward() { allowSpecialCaseRollforward_ = true; }
|
||||||
|
bool isSpecialZeroQuorum() const { return specialZeroQuorum; }
|
||||||
Future<Void> complete() const { return waitForAll(actors); }
|
Future<Void> complete() const { return waitForAll(actors); }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -293,9 +401,14 @@ class PaxosConfigConsumerImpl {
|
||||||
Version compactionVersion{ 0 };
|
Version compactionVersion{ 0 };
|
||||||
double pollingInterval;
|
double pollingInterval;
|
||||||
Optional<double> compactionInterval;
|
Optional<double> compactionInterval;
|
||||||
|
bool allowSpecialCaseRollforward_;
|
||||||
|
bool readPreviousCoordinators;
|
||||||
UID id;
|
UID id;
|
||||||
|
|
||||||
ACTOR static Future<Version> getCommittedVersion(PaxosConfigConsumerImpl* self) {
|
ACTOR static Future<Version> getCommittedVersion(PaxosConfigConsumerImpl* self) {
|
||||||
|
if (self->allowSpecialCaseRollforward_) {
|
||||||
|
self->getCommittedVersionQuorum.allowSpecialCaseRollforward();
|
||||||
|
}
|
||||||
QuorumVersion quorumVersion = wait(self->getCommittedVersionQuorum.getCommittedVersion());
|
QuorumVersion quorumVersion = wait(self->getCommittedVersionQuorum.getCommittedVersion());
|
||||||
if (!quorumVersion.isQuorum) {
|
if (!quorumVersion.isQuorum) {
|
||||||
throw failed_to_reach_quorum();
|
throw failed_to_reach_quorum();
|
||||||
|
@ -357,29 +470,37 @@ class PaxosConfigConsumerImpl {
|
||||||
&ConfigFollowerInterface::getSnapshotAndChanges,
|
&ConfigFollowerInterface::getSnapshotAndChanges,
|
||||||
ConfigFollowerGetSnapshotAndChangesRequest{ committedVersion }),
|
ConfigFollowerGetSnapshotAndChangesRequest{ committedVersion }),
|
||||||
SERVER_KNOBS->GET_SNAPSHOT_AND_CHANGES_TIMEOUT));
|
SERVER_KNOBS->GET_SNAPSHOT_AND_CHANGES_TIMEOUT));
|
||||||
|
Version smallestCommitted = self->getCommittedVersionQuorum.getSmallestCommitted();
|
||||||
TraceEvent(SevDebug, "ConfigConsumerGotSnapshotAndChanges", self->id)
|
TraceEvent(SevDebug, "ConfigConsumerGotSnapshotAndChanges", self->id)
|
||||||
.detail("SnapshotVersion", reply.snapshotVersion)
|
.detail("SnapshotVersion", reply.snapshotVersion)
|
||||||
.detail("SnapshotSize", reply.snapshot.size())
|
.detail("SnapshotSize", reply.snapshot.size())
|
||||||
.detail("ChangesVersion", committedVersion)
|
.detail("ChangesVersion", committedVersion)
|
||||||
.detail("ChangesSize", reply.changes.size())
|
.detail("ChangesSize", reply.changes.size())
|
||||||
.detail("AnnotationsSize", reply.annotations.size());
|
.detail("AnnotationsSize", reply.annotations.size())
|
||||||
|
.detail("LargestLiveVersion", self->getCommittedVersionQuorum.getLargestLive())
|
||||||
|
.detail("SmallestCommitted", smallestCommitted);
|
||||||
ASSERT_GE(committedVersion, self->lastSeenVersion);
|
ASSERT_GE(committedVersion, self->lastSeenVersion);
|
||||||
self->lastSeenVersion = committedVersion;
|
self->lastSeenVersion = committedVersion;
|
||||||
Version smallestCommitted = self->getCommittedVersionQuorum.getSmallestCommitted();
|
|
||||||
self->compactionVersion = std::max(self->compactionVersion, smallestCommitted);
|
self->compactionVersion = std::max(self->compactionVersion, smallestCommitted);
|
||||||
broadcaster->applySnapshotAndChanges(std::move(reply.snapshot),
|
broadcaster->applySnapshotAndChanges(std::move(reply.snapshot),
|
||||||
reply.snapshotVersion,
|
reply.snapshotVersion,
|
||||||
reply.changes,
|
reply.changes,
|
||||||
committedVersion,
|
committedVersion,
|
||||||
reply.annotations,
|
reply.annotations,
|
||||||
self->getCommittedVersionQuorum.getReadReplicas());
|
self->getCommittedVersionQuorum.getReadReplicas(),
|
||||||
|
self->getCommittedVersionQuorum.getLargestLive(),
|
||||||
|
self->readPreviousCoordinators);
|
||||||
wait(self->getCommittedVersionQuorum.complete());
|
wait(self->getCommittedVersionQuorum.complete());
|
||||||
|
if (self->allowSpecialCaseRollforward_) {
|
||||||
|
self->allowSpecialCaseRollforward_ = false;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
if (e.code() == error_code_failed_to_reach_quorum) {
|
if (e.code() == error_code_failed_to_reach_quorum) {
|
||||||
wait(self->getCommittedVersionQuorum.complete());
|
wait(self->getCommittedVersionQuorum.complete());
|
||||||
} else if (e.code() != error_code_timed_out && e.code() != error_code_broken_promise &&
|
} else if (e.code() != error_code_timed_out && e.code() != error_code_broken_promise &&
|
||||||
e.code() != error_code_version_already_compacted && e.code() != error_code_process_behind) {
|
e.code() != error_code_version_already_compacted && e.code() != error_code_process_behind &&
|
||||||
|
e.code() != error_code_future_version) {
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
wait(delayJittered(0.1));
|
wait(delayJittered(0.1));
|
||||||
|
@ -404,7 +525,8 @@ class PaxosConfigConsumerImpl {
|
||||||
// ConfigNodes changes to 1, 1, 2, the committed version
|
// ConfigNodes changes to 1, 1, 2, the committed version
|
||||||
// returned would be 1.
|
// returned would be 1.
|
||||||
if (committedVersion > self->lastSeenVersion) {
|
if (committedVersion > self->lastSeenVersion) {
|
||||||
ASSERT(self->getCommittedVersionQuorum.getReadReplicas().size() >= self->cfis.size() / 2 + 1);
|
ASSERT(self->getCommittedVersionQuorum.getReadReplicas().size() >= self->cfis.size() / 2 + 1 ||
|
||||||
|
self->getCommittedVersionQuorum.isSpecialZeroQuorum());
|
||||||
state std::vector<ConfigFollowerInterface> readReplicas =
|
state std::vector<ConfigFollowerInterface> readReplicas =
|
||||||
self->getCommittedVersionQuorum.getReadReplicas();
|
self->getCommittedVersionQuorum.getReadReplicas();
|
||||||
std::vector<Future<Void>> fs;
|
std::vector<Future<Void>> fs;
|
||||||
|
@ -448,8 +570,8 @@ class PaxosConfigConsumerImpl {
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
if (e.code() == error_code_version_already_compacted || e.code() == error_code_timed_out ||
|
if (e.code() == error_code_version_already_compacted || e.code() == error_code_timed_out ||
|
||||||
e.code() == error_code_failed_to_reach_quorum || e.code() == error_code_version_already_compacted ||
|
e.code() == error_code_failed_to_reach_quorum || e.code() == error_code_version_already_compacted ||
|
||||||
e.code() == error_code_process_behind) {
|
e.code() == error_code_process_behind || e.code() == error_code_future_version) {
|
||||||
CODE_PROBE(true, "PaxosConfigConsumer get version_already_compacted error");
|
CODE_PROBE(true, "PaxosConfigConsumer fetch error");
|
||||||
if (e.code() == error_code_failed_to_reach_quorum) {
|
if (e.code() == error_code_failed_to_reach_quorum) {
|
||||||
try {
|
try {
|
||||||
wait(self->getCommittedVersionQuorum.complete());
|
wait(self->getCommittedVersionQuorum.complete());
|
||||||
|
@ -483,39 +605,58 @@ class PaxosConfigConsumerImpl {
|
||||||
}
|
}
|
||||||
|
|
||||||
void resetCommittedVersionQuorum() {
|
void resetCommittedVersionQuorum() {
|
||||||
getCommittedVersionQuorum = GetCommittedVersionQuorum{ cfis, lastSeenVersion };
|
getCommittedVersionQuorum = GetCommittedVersionQuorum{ cfis, lastSeenVersion, compactionVersion };
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
Future<Void> readSnapshot(ConfigBroadcaster& broadcaster) { return getSnapshotAndChanges(this, &broadcaster); }
|
||||||
|
|
||||||
Future<Void> consume(ConfigBroadcaster& broadcaster) {
|
Future<Void> consume(ConfigBroadcaster& broadcaster) {
|
||||||
return fetchChanges(this, &broadcaster) || compactor(this, &broadcaster);
|
return fetchChanges(this, &broadcaster) || compactor(this, &broadcaster);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void allowSpecialCaseRollforward() { this->allowSpecialCaseRollforward_ = true; }
|
||||||
|
|
||||||
UID getID() const { return id; }
|
UID getID() const { return id; }
|
||||||
|
|
||||||
PaxosConfigConsumerImpl(std::vector<ConfigFollowerInterface> const& cfis,
|
PaxosConfigConsumerImpl(std::vector<ConfigFollowerInterface> const& cfis,
|
||||||
double pollingInterval,
|
double pollingInterval,
|
||||||
Optional<double> compactionInterval)
|
Optional<double> compactionInterval,
|
||||||
: cfis(cfis), getCommittedVersionQuorum(cfis, 0), pollingInterval(pollingInterval),
|
bool readPreviousCoordinators)
|
||||||
compactionInterval(compactionInterval), id(deterministicRandom()->randomUniqueID()) {}
|
: cfis(cfis), getCommittedVersionQuorum(cfis, 0, 0), pollingInterval(pollingInterval),
|
||||||
|
compactionInterval(compactionInterval), readPreviousCoordinators(readPreviousCoordinators),
|
||||||
|
id(deterministicRandom()->randomUniqueID()) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
PaxosConfigConsumer::PaxosConfigConsumer(std::vector<ConfigFollowerInterface> const& cfis,
|
PaxosConfigConsumer::PaxosConfigConsumer(std::vector<ConfigFollowerInterface> const& cfis,
|
||||||
double pollingInterval,
|
double pollingInterval,
|
||||||
Optional<double> compactionInterval)
|
Optional<double> compactionInterval,
|
||||||
: impl(PImpl<PaxosConfigConsumerImpl>::create(cfis, pollingInterval, compactionInterval)) {}
|
bool readPreviousCoordinators)
|
||||||
|
: impl(PImpl<PaxosConfigConsumerImpl>::create(cfis, pollingInterval, compactionInterval, readPreviousCoordinators)) {}
|
||||||
|
|
||||||
PaxosConfigConsumer::PaxosConfigConsumer(ServerCoordinators const& coordinators,
|
PaxosConfigConsumer::PaxosConfigConsumer(ServerCoordinators const& coordinators,
|
||||||
double pollingInterval,
|
double pollingInterval,
|
||||||
Optional<double> compactionInterval)
|
Optional<double> compactionInterval,
|
||||||
: impl(PImpl<PaxosConfigConsumerImpl>::create(coordinators.configServers, pollingInterval, compactionInterval)) {}
|
bool readPreviousCoordinators)
|
||||||
|
: impl(PImpl<PaxosConfigConsumerImpl>::create(coordinators.configServers,
|
||||||
|
pollingInterval,
|
||||||
|
compactionInterval,
|
||||||
|
readPreviousCoordinators)) {}
|
||||||
|
|
||||||
PaxosConfigConsumer::~PaxosConfigConsumer() = default;
|
PaxosConfigConsumer::~PaxosConfigConsumer() = default;
|
||||||
|
|
||||||
|
Future<Void> PaxosConfigConsumer::readSnapshot(ConfigBroadcaster& broadcaster) {
|
||||||
|
return impl->readSnapshot(broadcaster);
|
||||||
|
}
|
||||||
|
|
||||||
Future<Void> PaxosConfigConsumer::consume(ConfigBroadcaster& broadcaster) {
|
Future<Void> PaxosConfigConsumer::consume(ConfigBroadcaster& broadcaster) {
|
||||||
return impl->consume(broadcaster);
|
return impl->consume(broadcaster);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void PaxosConfigConsumer::allowSpecialCaseRollforward() {
|
||||||
|
impl->allowSpecialCaseRollforward();
|
||||||
|
}
|
||||||
|
|
||||||
UID PaxosConfigConsumer::getID() const {
|
UID PaxosConfigConsumer::getID() const {
|
||||||
return impl->getID();
|
return impl->getID();
|
||||||
}
|
}
|
||||||
|
|
|
@ -145,7 +145,8 @@ class SimpleConfigConsumerImpl {
|
||||||
reply.changes,
|
reply.changes,
|
||||||
committedVersion,
|
committedVersion,
|
||||||
reply.annotations,
|
reply.annotations,
|
||||||
{ self->cfi });
|
{ self->cfi },
|
||||||
|
committedVersion);
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -186,10 +187,19 @@ SimpleConfigConsumer::SimpleConfigConsumer(ServerCoordinators const& coordinator
|
||||||
Optional<double> compactionInterval)
|
Optional<double> compactionInterval)
|
||||||
: impl(PImpl<SimpleConfigConsumerImpl>::create(coordinators, pollingInterval, compactionInterval)) {}
|
: impl(PImpl<SimpleConfigConsumerImpl>::create(coordinators, pollingInterval, compactionInterval)) {}
|
||||||
|
|
||||||
|
Future<Void> SimpleConfigConsumer::readSnapshot(ConfigBroadcaster& broadcaster) {
|
||||||
|
ASSERT(false);
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
||||||
Future<Void> SimpleConfigConsumer::consume(ConfigBroadcaster& broadcaster) {
|
Future<Void> SimpleConfigConsumer::consume(ConfigBroadcaster& broadcaster) {
|
||||||
return impl->consume(broadcaster);
|
return impl->consume(broadcaster);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SimpleConfigConsumer::allowSpecialCaseRollforward() {
|
||||||
|
ASSERT(false);
|
||||||
|
}
|
||||||
|
|
||||||
SimpleConfigConsumer::~SimpleConfigConsumer() = default;
|
SimpleConfigConsumer::~SimpleConfigConsumer() = default;
|
||||||
|
|
||||||
UID SimpleConfigConsumer::getID() const {
|
UID SimpleConfigConsumer::getID() const {
|
||||||
|
|
|
@ -1069,7 +1069,7 @@ struct CLIOptions {
|
||||||
const char* blobCredsFromENV = nullptr;
|
const char* blobCredsFromENV = nullptr;
|
||||||
|
|
||||||
std::string configPath;
|
std::string configPath;
|
||||||
ConfigDBType configDBType{ ConfigDBType::DISABLED };
|
ConfigDBType configDBType{ ConfigDBType::PAXOS };
|
||||||
|
|
||||||
Reference<IClusterConnectionRecord> connectionFile;
|
Reference<IClusterConnectionRecord> connectionFile;
|
||||||
Standalone<StringRef> machineId;
|
Standalone<StringRef> machineId;
|
||||||
|
@ -1627,6 +1627,7 @@ private:
|
||||||
case OPT_USE_TEST_CONFIG_DB:
|
case OPT_USE_TEST_CONFIG_DB:
|
||||||
configDBType = ConfigDBType::SIMPLE;
|
configDBType = ConfigDBType::SIMPLE;
|
||||||
break;
|
break;
|
||||||
|
// TODO: Add no_config_db option which disables the configuration database
|
||||||
case OPT_FLOW_PROCESS_NAME:
|
case OPT_FLOW_PROCESS_NAME:
|
||||||
flowProcessName = args.OptionArg();
|
flowProcessName = args.OptionArg();
|
||||||
std::cout << flowProcessName << std::endl;
|
std::cout << flowProcessName << std::endl;
|
||||||
|
|
|
@ -131,10 +131,10 @@ inline bool containsMetadataMutation(const VectorRef<MutationRef>& mutations) {
|
||||||
(serverTagKeys.intersects(range)) || (serverTagHistoryKeys.intersects(range)) ||
|
(serverTagKeys.intersects(range)) || (serverTagHistoryKeys.intersects(range)) ||
|
||||||
(range.intersects(applyMutationsEndRange)) || (range.intersects(applyMutationsKeyVersionMapRange)) ||
|
(range.intersects(applyMutationsEndRange)) || (range.intersects(applyMutationsKeyVersionMapRange)) ||
|
||||||
(range.intersects(logRangesRange)) || (tssMappingKeys.intersects(range)) ||
|
(range.intersects(logRangesRange)) || (tssMappingKeys.intersects(range)) ||
|
||||||
(tssQuarantineKeys.intersects(range)) || (range.contains(coordinatorsKey)) ||
|
(tssQuarantineKeys.intersects(range)) || (range.contains(previousCoordinatorsKey)) ||
|
||||||
(range.contains(databaseLockedKey)) || (range.contains(metadataVersionKey)) ||
|
(range.contains(coordinatorsKey)) || (range.contains(databaseLockedKey)) ||
|
||||||
(range.contains(mustContainSystemMutationsKey)) || (range.contains(writeRecoveryKey)) ||
|
(range.contains(metadataVersionKey)) || (range.contains(mustContainSystemMutationsKey)) ||
|
||||||
(range.intersects(testOnlyTxnStateStorePrefixRange))) {
|
(range.contains(writeRecoveryKey)) || (range.intersects(testOnlyTxnStateStorePrefixRange))) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -93,6 +93,8 @@ public:
|
||||||
return previousWrite;
|
return previousWrite;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ServerCoordinators getCoordinators() { return coordinators; }
|
||||||
|
|
||||||
Future<Void> move(ClusterConnectionString const& nc) { return cstate.move(nc); }
|
Future<Void> move(ClusterConnectionString const& nc) { return cstate.move(nc); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -116,13 +116,15 @@ struct ConfigBroadcastChangesRequest {
|
||||||
struct ConfigBroadcastRegisteredReply {
|
struct ConfigBroadcastRegisteredReply {
|
||||||
static constexpr FileIdentifier file_identifier = 12041047;
|
static constexpr FileIdentifier file_identifier = 12041047;
|
||||||
bool registered;
|
bool registered;
|
||||||
|
Version lastSeenVersion;
|
||||||
|
|
||||||
ConfigBroadcastRegisteredReply() = default;
|
ConfigBroadcastRegisteredReply() = default;
|
||||||
explicit ConfigBroadcastRegisteredReply(bool registered) : registered(registered) {}
|
explicit ConfigBroadcastRegisteredReply(bool registered, Version lastSeenVersion)
|
||||||
|
: registered(registered), lastSeenVersion(lastSeenVersion) {}
|
||||||
|
|
||||||
template <class Ar>
|
template <class Ar>
|
||||||
void serialize(Ar& ar) {
|
void serialize(Ar& ar) {
|
||||||
serializer(ar, registered);
|
serializer(ar, registered, lastSeenVersion);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -151,13 +153,23 @@ struct ConfigBroadcastReadyReply {
|
||||||
|
|
||||||
struct ConfigBroadcastReadyRequest {
|
struct ConfigBroadcastReadyRequest {
|
||||||
static constexpr FileIdentifier file_identifier = 7402862;
|
static constexpr FileIdentifier file_identifier = 7402862;
|
||||||
|
size_t coordinatorsHash;
|
||||||
|
std::map<ConfigKey, KnobValue> snapshot;
|
||||||
|
Version snapshotVersion;
|
||||||
|
Version liveVersion;
|
||||||
ReplyPromise<ConfigBroadcastReadyReply> reply;
|
ReplyPromise<ConfigBroadcastReadyReply> reply;
|
||||||
|
|
||||||
ConfigBroadcastReadyRequest() = default;
|
ConfigBroadcastReadyRequest() = default;
|
||||||
|
ConfigBroadcastReadyRequest(size_t coordinatorsHash,
|
||||||
|
std::map<ConfigKey, KnobValue> const& snapshot,
|
||||||
|
Version snapshotVersion,
|
||||||
|
Version liveVersion)
|
||||||
|
: coordinatorsHash(coordinatorsHash), snapshot(snapshot), snapshotVersion(snapshotVersion),
|
||||||
|
liveVersion(liveVersion) {}
|
||||||
|
|
||||||
template <class Ar>
|
template <class Ar>
|
||||||
void serialize(Ar& ar) {
|
void serialize(Ar& ar) {
|
||||||
serializer(ar, reply);
|
serializer(ar, coordinatorsHash, snapshot, snapshotVersion, liveVersion, reply);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -39,7 +39,8 @@ class ConfigBroadcaster {
|
||||||
PImpl<class ConfigBroadcasterImpl> impl;
|
PImpl<class ConfigBroadcasterImpl> impl;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit ConfigBroadcaster(ServerCoordinators const&, ConfigDBType);
|
ConfigBroadcaster();
|
||||||
|
explicit ConfigBroadcaster(ServerCoordinators const&, ConfigDBType, Future<Optional<Value>>);
|
||||||
ConfigBroadcaster(ConfigBroadcaster&&);
|
ConfigBroadcaster(ConfigBroadcaster&&);
|
||||||
ConfigBroadcaster& operator=(ConfigBroadcaster&&);
|
ConfigBroadcaster& operator=(ConfigBroadcaster&&);
|
||||||
~ConfigBroadcaster();
|
~ConfigBroadcaster();
|
||||||
|
@ -47,7 +48,8 @@ public:
|
||||||
Version lastSeenVersion,
|
Version lastSeenVersion,
|
||||||
ConfigClassSet const& configClassSet,
|
ConfigClassSet const& configClassSet,
|
||||||
Future<Void> watcher,
|
Future<Void> watcher,
|
||||||
ConfigBroadcastInterface const& worker);
|
ConfigBroadcastInterface const& worker,
|
||||||
|
bool isCoordinator);
|
||||||
void applyChanges(Standalone<VectorRef<VersionedConfigMutationRef>> const& changes,
|
void applyChanges(Standalone<VectorRef<VersionedConfigMutationRef>> const& changes,
|
||||||
Version mostRecentVersion,
|
Version mostRecentVersion,
|
||||||
Standalone<VectorRef<VersionedConfigCommitAnnotationRef>> const& annotations,
|
Standalone<VectorRef<VersionedConfigCommitAnnotationRef>> const& annotations,
|
||||||
|
@ -57,18 +59,26 @@ public:
|
||||||
Standalone<VectorRef<VersionedConfigMutationRef>> const& changes,
|
Standalone<VectorRef<VersionedConfigMutationRef>> const& changes,
|
||||||
Version changesVersion,
|
Version changesVersion,
|
||||||
Standalone<VectorRef<VersionedConfigCommitAnnotationRef>> const& annotations,
|
Standalone<VectorRef<VersionedConfigCommitAnnotationRef>> const& annotations,
|
||||||
std::vector<ConfigFollowerInterface> const& readReplicas);
|
std::vector<ConfigFollowerInterface> const& readReplicas,
|
||||||
|
Version largestLiveVersion,
|
||||||
|
bool fromPreviousCoordinators = false);
|
||||||
void applySnapshotAndChanges(std::map<ConfigKey, KnobValue>&& snapshot,
|
void applySnapshotAndChanges(std::map<ConfigKey, KnobValue>&& snapshot,
|
||||||
Version snapshotVersion,
|
Version snapshotVersion,
|
||||||
Standalone<VectorRef<VersionedConfigMutationRef>> const& changes,
|
Standalone<VectorRef<VersionedConfigMutationRef>> const& changes,
|
||||||
Version changesVersion,
|
Version changesVersion,
|
||||||
Standalone<VectorRef<VersionedConfigCommitAnnotationRef>> const& annotations,
|
Standalone<VectorRef<VersionedConfigCommitAnnotationRef>> const& annotations,
|
||||||
std::vector<ConfigFollowerInterface> const& readReplicas);
|
std::vector<ConfigFollowerInterface> const& readReplicas,
|
||||||
|
Version largestLiveVersion,
|
||||||
|
bool fromPreviousCoordinators = false);
|
||||||
Future<Void> getError() const;
|
Future<Void> getError() const;
|
||||||
UID getID() const;
|
UID getID() const;
|
||||||
JsonBuilderObject getStatus() const;
|
JsonBuilderObject getStatus() const;
|
||||||
void compact(Version compactionVersion);
|
void compact(Version compactionVersion);
|
||||||
|
|
||||||
|
// Locks all ConfigNodes running on the given coordinators, returning when
|
||||||
|
// a quorum have successfully locked.
|
||||||
|
static Future<Void> lockConfigNodes(ServerCoordinators coordinators);
|
||||||
|
|
||||||
public: // Testing
|
public: // Testing
|
||||||
explicit ConfigBroadcaster(ConfigFollowerInterface const&);
|
explicit ConfigBroadcaster(ConfigFollowerInterface const&);
|
||||||
Future<Void> getClientFailure(UID clientUID) const;
|
Future<Void> getClientFailure(UID clientUID) const;
|
||||||
|
|
|
@ -177,16 +177,21 @@ struct ConfigFollowerRollforwardRequest {
|
||||||
|
|
||||||
struct ConfigFollowerGetCommittedVersionReply {
|
struct ConfigFollowerGetCommittedVersionReply {
|
||||||
static constexpr FileIdentifier file_identifier = 9214735;
|
static constexpr FileIdentifier file_identifier = 9214735;
|
||||||
|
bool registered;
|
||||||
Version lastCompacted;
|
Version lastCompacted;
|
||||||
|
Version lastLive;
|
||||||
Version lastCommitted;
|
Version lastCommitted;
|
||||||
|
|
||||||
ConfigFollowerGetCommittedVersionReply() = default;
|
ConfigFollowerGetCommittedVersionReply() = default;
|
||||||
explicit ConfigFollowerGetCommittedVersionReply(Version lastCompacted, Version lastCommitted)
|
explicit ConfigFollowerGetCommittedVersionReply(bool registered,
|
||||||
: lastCompacted(lastCompacted), lastCommitted(lastCommitted) {}
|
Version lastCompacted,
|
||||||
|
Version lastLive,
|
||||||
|
Version lastCommitted)
|
||||||
|
: registered(registered), lastCompacted(lastCompacted), lastLive(lastLive), lastCommitted(lastCommitted) {}
|
||||||
|
|
||||||
template <class Ar>
|
template <class Ar>
|
||||||
void serialize(Ar& ar) {
|
void serialize(Ar& ar) {
|
||||||
serializer(ar, lastCompacted, lastCommitted);
|
serializer(ar, registered, lastCompacted, lastLive, lastCommitted);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -202,6 +207,20 @@ struct ConfigFollowerGetCommittedVersionRequest {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ConfigFollowerLockRequest {
|
||||||
|
static constexpr FileIdentifier file_identifier = 1867800;
|
||||||
|
size_t coordinatorsHash;
|
||||||
|
ReplyPromise<Void> reply;
|
||||||
|
|
||||||
|
ConfigFollowerLockRequest() = default;
|
||||||
|
explicit ConfigFollowerLockRequest(size_t coordinatorsHash) : coordinatorsHash(coordinatorsHash) {}
|
||||||
|
|
||||||
|
template <class Ar>
|
||||||
|
void serialize(Ar& ar) {
|
||||||
|
serializer(ar, coordinatorsHash, reply);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Configuration database nodes serve a ConfigFollowerInterface which contains well known endpoints,
|
* Configuration database nodes serve a ConfigFollowerInterface which contains well known endpoints,
|
||||||
* used by workers to receive configuration database updates
|
* used by workers to receive configuration database updates
|
||||||
|
@ -217,6 +236,7 @@ public:
|
||||||
RequestStream<ConfigFollowerRollforwardRequest> rollforward;
|
RequestStream<ConfigFollowerRollforwardRequest> rollforward;
|
||||||
RequestStream<ConfigFollowerGetCommittedVersionRequest> getCommittedVersion;
|
RequestStream<ConfigFollowerGetCommittedVersionRequest> getCommittedVersion;
|
||||||
Optional<Hostname> hostname;
|
Optional<Hostname> hostname;
|
||||||
|
RequestStream<ConfigFollowerLockRequest> lock;
|
||||||
|
|
||||||
ConfigFollowerInterface();
|
ConfigFollowerInterface();
|
||||||
void setupWellKnownEndpoints();
|
void setupWellKnownEndpoints();
|
||||||
|
@ -229,6 +249,7 @@ public:
|
||||||
|
|
||||||
template <class Ar>
|
template <class Ar>
|
||||||
void serialize(Ar& ar) {
|
void serialize(Ar& ar) {
|
||||||
serializer(ar, _id, getSnapshotAndChanges, getChanges, compact, rollforward, getCommittedVersion, hostname);
|
serializer(
|
||||||
|
ar, _id, getSnapshotAndChanges, getChanges, compact, rollforward, getCommittedVersion, hostname, lock);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -224,6 +224,7 @@ class ConfigNode;
|
||||||
|
|
||||||
class ServerCoordinators : public ClientCoordinators {
|
class ServerCoordinators : public ClientCoordinators {
|
||||||
public:
|
public:
|
||||||
|
ServerCoordinators() {}
|
||||||
explicit ServerCoordinators(Reference<IClusterConnectionRecord> ccr);
|
explicit ServerCoordinators(Reference<IClusterConnectionRecord> ccr);
|
||||||
|
|
||||||
std::vector<LeaderElectionRegInterface> leaderElectionServers;
|
std::vector<LeaderElectionRegInterface> leaderElectionServers;
|
||||||
|
|
|
@ -35,7 +35,9 @@
|
||||||
class IConfigConsumer {
|
class IConfigConsumer {
|
||||||
public:
|
public:
|
||||||
virtual ~IConfigConsumer() = default;
|
virtual ~IConfigConsumer() = default;
|
||||||
|
virtual Future<Void> readSnapshot(ConfigBroadcaster& broadcaster) = 0;
|
||||||
virtual Future<Void> consume(ConfigBroadcaster& broadcaster) = 0;
|
virtual Future<Void> consume(ConfigBroadcaster& broadcaster) = 0;
|
||||||
|
virtual void allowSpecialCaseRollforward() = 0;
|
||||||
virtual UID getID() const = 0;
|
virtual UID getID() const = 0;
|
||||||
|
|
||||||
static std::unique_ptr<IConfigConsumer> createTestSimple(ConfigFollowerInterface const& cfi,
|
static std::unique_ptr<IConfigConsumer> createTestSimple(ConfigFollowerInterface const& cfi,
|
||||||
|
@ -46,5 +48,6 @@ public:
|
||||||
Optional<double> compactionInterval);
|
Optional<double> compactionInterval);
|
||||||
static std::unique_ptr<IConfigConsumer> createPaxos(ServerCoordinators const& coordinators,
|
static std::unique_ptr<IConfigConsumer> createPaxos(ServerCoordinators const& coordinators,
|
||||||
double pollingInterval,
|
double pollingInterval,
|
||||||
Optional<double> compactionInterval);
|
Optional<double> compactionInterval,
|
||||||
|
bool readPreviousCoordinators = false);
|
||||||
};
|
};
|
||||||
|
|
|
@ -32,13 +32,17 @@ class PaxosConfigConsumer : public IConfigConsumer {
|
||||||
public:
|
public:
|
||||||
PaxosConfigConsumer(ServerCoordinators const& coordinators,
|
PaxosConfigConsumer(ServerCoordinators const& coordinators,
|
||||||
double pollingInterval,
|
double pollingInterval,
|
||||||
Optional<double> compactionInterval);
|
Optional<double> compactionInterval,
|
||||||
|
bool readPreviousCoordinators);
|
||||||
~PaxosConfigConsumer();
|
~PaxosConfigConsumer();
|
||||||
|
Future<Void> readSnapshot(ConfigBroadcaster& broadcaster) override;
|
||||||
Future<Void> consume(ConfigBroadcaster& broadcaster) override;
|
Future<Void> consume(ConfigBroadcaster& broadcaster) override;
|
||||||
|
void allowSpecialCaseRollforward() override;
|
||||||
UID getID() const override;
|
UID getID() const override;
|
||||||
|
|
||||||
public: // Testing
|
public: // Testing
|
||||||
PaxosConfigConsumer(std::vector<ConfigFollowerInterface> const& cfis,
|
PaxosConfigConsumer(std::vector<ConfigFollowerInterface> const& cfis,
|
||||||
double pollingInterval,
|
double pollingInterval,
|
||||||
Optional<double> compactionInterval);
|
Optional<double> compactionInterval,
|
||||||
|
bool readPreviousCoordinators);
|
||||||
};
|
};
|
||||||
|
|
|
@ -37,7 +37,9 @@ public:
|
||||||
double pollingInterval,
|
double pollingInterval,
|
||||||
Optional<double> compactionInterval);
|
Optional<double> compactionInterval);
|
||||||
~SimpleConfigConsumer();
|
~SimpleConfigConsumer();
|
||||||
|
Future<Void> readSnapshot(ConfigBroadcaster& broadcaster) override;
|
||||||
Future<Void> consume(ConfigBroadcaster& broadcaster) override;
|
Future<Void> consume(ConfigBroadcaster& broadcaster) override;
|
||||||
|
void allowSpecialCaseRollforward() override;
|
||||||
UID getID() const override;
|
UID getID() const override;
|
||||||
|
|
||||||
public: // Testing
|
public: // Testing
|
||||||
|
|
|
@ -1948,7 +1948,7 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
|
||||||
recoveredDiskFiles));
|
recoveredDiskFiles));
|
||||||
|
|
||||||
if (configNode.isValid()) {
|
if (configNode.isValid()) {
|
||||||
errorForwarders.add(localConfig->consume(interf.configBroadcastInterface));
|
errorForwarders.add(brokenPromiseToNever(localConfig->consume(interf.configBroadcastInterface)));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (SERVER_KNOBS->ENABLE_WORKER_HEALTH_MONITOR) {
|
if (SERVER_KNOBS->ENABLE_WORKER_HEALTH_MONITOR) {
|
||||||
|
@ -3319,8 +3319,8 @@ ACTOR Future<Void> fdbd(Reference<IClusterConnectionRecord> connRecord,
|
||||||
state std::vector<Future<Void>> actors;
|
state std::vector<Future<Void>> actors;
|
||||||
state Promise<Void> recoveredDiskFiles;
|
state Promise<Void> recoveredDiskFiles;
|
||||||
state Reference<ConfigNode> configNode;
|
state Reference<ConfigNode> configNode;
|
||||||
state Reference<LocalConfiguration> localConfig =
|
state Reference<LocalConfiguration> localConfig = makeReference<LocalConfiguration>(
|
||||||
makeReference<LocalConfiguration>(dataFolder, configPath, manualKnobOverrides);
|
dataFolder, configPath, manualKnobOverrides, g_network->isSimulated() ? IsTest::True : IsTest::False);
|
||||||
// setupStackSignal();
|
// setupStackSignal();
|
||||||
getCurrentLineage()->modify(&RoleLineage::role) = ProcessClass::Worker;
|
getCurrentLineage()->modify(&RoleLineage::role) = ProcessClass::Worker;
|
||||||
|
|
||||||
|
@ -3329,11 +3329,9 @@ ACTOR Future<Void> fdbd(Reference<IClusterConnectionRecord> connRecord,
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: Initializing here causes simulation issues, these must be fixed
|
// FIXME: Initializing here causes simulation issues, these must be fixed
|
||||||
/*
|
// if (configDBType != ConfigDBType::DISABLED) {
|
||||||
if (configDBType != ConfigDBType::DISABLED) {
|
// wait(localConfig->initialize());
|
||||||
wait(localConfig->initialize());
|
// }
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
actors.push_back(serveProtocolInfo());
|
actors.push_back(serveProtocolInfo());
|
||||||
actors.push_back(serveProcess());
|
actors.push_back(serveProcess());
|
||||||
|
|
|
@ -33,6 +33,7 @@ struct ChangeConfigWorkload : TestWorkload {
|
||||||
double minDelayBeforeChange, maxDelayBeforeChange;
|
double minDelayBeforeChange, maxDelayBeforeChange;
|
||||||
std::string configMode; //<\"single\"|\"double\"|\"triple\">
|
std::string configMode; //<\"single\"|\"double\"|\"triple\">
|
||||||
std::string networkAddresses; // comma separated list e.g. "127.0.0.1:4000,127.0.0.1:4001"
|
std::string networkAddresses; // comma separated list e.g. "127.0.0.1:4000,127.0.0.1:4001"
|
||||||
|
int coordinatorChanges; // number of times to change coordinators. Only applied if `coordinators` is set to `auto`
|
||||||
|
|
||||||
ChangeConfigWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
|
ChangeConfigWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
|
||||||
minDelayBeforeChange = getOption(options, LiteralStringRef("minDelayBeforeChange"), 0);
|
minDelayBeforeChange = getOption(options, LiteralStringRef("minDelayBeforeChange"), 0);
|
||||||
|
@ -40,6 +41,7 @@ struct ChangeConfigWorkload : TestWorkload {
|
||||||
ASSERT(maxDelayBeforeChange >= minDelayBeforeChange);
|
ASSERT(maxDelayBeforeChange >= minDelayBeforeChange);
|
||||||
configMode = getOption(options, LiteralStringRef("configMode"), StringRef()).toString();
|
configMode = getOption(options, LiteralStringRef("configMode"), StringRef()).toString();
|
||||||
networkAddresses = getOption(options, LiteralStringRef("coordinators"), StringRef()).toString();
|
networkAddresses = getOption(options, LiteralStringRef("coordinators"), StringRef()).toString();
|
||||||
|
coordinatorChanges = getOption(options, LiteralStringRef("coordinatorChanges"), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string description() const override { return "ChangeConfig"; }
|
std::string description() const override { return "ChangeConfig"; }
|
||||||
|
@ -124,6 +126,15 @@ struct ChangeConfigWorkload : TestWorkload {
|
||||||
wait(CoordinatorsChangeActor(cx, self));
|
wait(CoordinatorsChangeActor(cx, self));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Run additional coordinator changes if specified.
|
||||||
|
if (self->networkAddresses.size() && self->networkAddresses == "auto") {
|
||||||
|
state int i;
|
||||||
|
for (i = 1; i < self->coordinatorChanges; ++i) {
|
||||||
|
wait(delay(20));
|
||||||
|
wait(CoordinatorsChangeActor(cx, self, true));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!extraConfigureBefore) {
|
if (!extraConfigureBefore) {
|
||||||
wait(self->configureExtraDatabases(self));
|
wait(self->configureExtraDatabases(self));
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,12 +39,14 @@ class ConfigIncrementWorkload : public TestWorkload {
|
||||||
static Key getConfigKey() { return Tuple::makeTuple(/* config class */ nullptr, testKnobName).pack(); }
|
static Key getConfigKey() { return Tuple::makeTuple(/* config class */ nullptr, testKnobName).pack(); }
|
||||||
|
|
||||||
ACTOR static Future<int> get(Reference<ISingleThreadTransaction> tr) {
|
ACTOR static Future<int> get(Reference<ISingleThreadTransaction> tr) {
|
||||||
TraceEvent(SevDebug, "ConfigIncrementGet");
|
state TraceEvent te(SevDebug, "ConfigIncrementGet");
|
||||||
Optional<Value> serializedValue = wait(tr->get(getConfigKey()));
|
Optional<Value> serializedValue = wait(tr->get(getConfigKey()));
|
||||||
if (!serializedValue.present()) {
|
if (!serializedValue.present()) {
|
||||||
return 0;
|
return 0;
|
||||||
} else {
|
} else {
|
||||||
return BinaryReader::fromStringRef<int>(serializedValue.get(), Unversioned());
|
int value = BinaryReader::fromStringRef<int>(serializedValue.get(), Unversioned());
|
||||||
|
te.detail("Value", value);
|
||||||
|
return value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -98,9 +100,9 @@ class ConfigIncrementWorkload : public TestWorkload {
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR static Future<bool> check(ConfigIncrementWorkload* self, Database cx) {
|
ACTOR static Future<bool> check(ConfigIncrementWorkload* self, Database cx) {
|
||||||
state Reference<ISingleThreadTransaction> tr = self->getTransaction(cx);
|
|
||||||
loop {
|
loop {
|
||||||
try {
|
try {
|
||||||
|
state Reference<ISingleThreadTransaction> tr = self->getTransaction(cx);
|
||||||
state int currentValue = wait(get(tr));
|
state int currentValue = wait(get(tr));
|
||||||
auto expectedValue = self->incrementActors * self->incrementsPerActor;
|
auto expectedValue = self->incrementActors * self->incrementsPerActor;
|
||||||
TraceEvent("ConfigIncrementCheck")
|
TraceEvent("ConfigIncrementCheck")
|
||||||
|
|
|
@ -9,7 +9,7 @@ testTitle = 'ConfigIncrement'
|
||||||
incrementActors = 2
|
incrementActors = 2
|
||||||
incrementsPerActor = 10
|
incrementsPerActor = 10
|
||||||
meanSleepWithinTransactions = 0.01
|
meanSleepWithinTransactions = 0.01
|
||||||
meanSleepBetweenTransactions = 0.1
|
meanSleepBetweenTransactions = 10
|
||||||
|
|
||||||
[[test.workload]]
|
[[test.workload]]
|
||||||
testName = 'Attrition'
|
testName = 'Attrition'
|
||||||
|
@ -17,3 +17,9 @@ testTitle = 'ConfigIncrement'
|
||||||
machinesToLeave = 3
|
machinesToLeave = 3
|
||||||
reboot = true
|
reboot = true
|
||||||
testDuration = 10.0
|
testDuration = 10.0
|
||||||
|
|
||||||
|
[[test.workload]]
|
||||||
|
testName = 'ChangeConfig'
|
||||||
|
maxDelayBeforeChange = 120.0
|
||||||
|
coordinators = 'auto'
|
||||||
|
coordinatorChanges = 2
|
|
@ -14,3 +14,9 @@ testTitle = 'ConfigIncrement'
|
||||||
[[test.workload]]
|
[[test.workload]]
|
||||||
testName = 'Attrition'
|
testName = 'Attrition'
|
||||||
reboot = false
|
reboot = false
|
||||||
|
|
||||||
|
[[test.workload]]
|
||||||
|
testName = 'ChangeConfig'
|
||||||
|
maxDelayBeforeChange = 120.0
|
||||||
|
coordinators = 'auto'
|
||||||
|
coordinatorChanges = 2
|
Loading…
Reference in New Issue