Merge branch 'main' into fdb#4271
This commit is contained in:
commit
102dcb30c6
|
@ -22,7 +22,7 @@ Contributing to FoundationDB can be in contributions to the code base, sharing y
|
||||||
|
|
||||||
### Binary downloads
|
### Binary downloads
|
||||||
|
|
||||||
Developers interested in using FoundationDB can get started by downloading and installing a binary package. Please see the [downloads page](https://www.foundationdb.org/download/) for a list of available packages.
|
Developers interested in using FoundationDB can get started by downloading and installing a binary package. Please see the [downloads page](https://github.com/apple/foundationdb/releases) for a list of available packages.
|
||||||
|
|
||||||
|
|
||||||
### Compiling from source
|
### Compiling from source
|
||||||
|
@ -181,4 +181,4 @@ Under Windows, only Visual Studio with ClangCl is supported
|
||||||
1. `mkdir build && cd build`
|
1. `mkdir build && cd build`
|
||||||
1. `cmake -G "Visual Studio 16 2019" -A x64 -T ClangCl <PATH_TO_FOUNDATIONDB_SOURCE>`
|
1. `cmake -G "Visual Studio 16 2019" -A x64 -T ClangCl <PATH_TO_FOUNDATIONDB_SOURCE>`
|
||||||
1. `msbuild /p:Configuration=Release foundationdb.sln`
|
1. `msbuild /p:Configuration=Release foundationdb.sln`
|
||||||
1. To increase build performance, use `/p:UseMultiToolTask=true` and `/p:CL_MPCount=<NUMBER_OF_PARALLEL_JOBS>`
|
1. To increase build performance, use `/p:UseMultiToolTask=true` and `/p:CL_MPCount=<NUMBER_OF_PARALLEL_JOBS>`
|
||||||
|
|
|
@ -176,7 +176,7 @@ ACTOR Future<bool> configureCommandActor(Reference<IDatabase> db,
|
||||||
case ConfigurationResult::STORAGE_MIGRATION_DISABLED:
|
case ConfigurationResult::STORAGE_MIGRATION_DISABLED:
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"ERROR: Storage engine type cannot be changed because "
|
"ERROR: Storage engine type cannot be changed because "
|
||||||
"storage_migration_mode=disabled.\n");
|
"storage_migration_type=disabled.\n");
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"Type `configure perpetual_storage_wiggle=1 storage_migration_type=gradual' to enable gradual "
|
"Type `configure perpetual_storage_wiggle=1 storage_migration_type=gradual' to enable gradual "
|
||||||
"migration with the perpetual wiggle, or `configure "
|
"migration with the perpetual wiggle, or `configure "
|
||||||
|
|
|
@ -100,6 +100,7 @@ ACTOR Future<bool> changeCoordinators(Reference<IDatabase> db, std::vector<Strin
|
||||||
state std::vector<StringRef>::iterator t;
|
state std::vector<StringRef>::iterator t;
|
||||||
for (t = tokens.begin() + 1; t != tokens.end(); ++t) {
|
for (t = tokens.begin() + 1; t != tokens.end(); ++t) {
|
||||||
try {
|
try {
|
||||||
|
// TODO(renxuan): add hostname parsing here.
|
||||||
auto const& addr = NetworkAddress::parse(t->toString());
|
auto const& addr = NetworkAddress::parse(t->toString());
|
||||||
if (new_coordinators_addresses.count(addr)) {
|
if (new_coordinators_addresses.count(addr)) {
|
||||||
fprintf(stderr, "ERROR: passed redundant coordinators: `%s'\n", addr.toString().c_str());
|
fprintf(stderr, "ERROR: passed redundant coordinators: `%s'\n", addr.toString().c_str());
|
||||||
|
|
|
@ -1157,7 +1157,6 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
|
||||||
|
|
||||||
state bool writeMode = false;
|
state bool writeMode = false;
|
||||||
|
|
||||||
state std::string clusterConnectString;
|
|
||||||
state std::map<Key, std::pair<Value, ClientLeaderRegInterface>> address_interface;
|
state std::map<Key, std::pair<Value, ClientLeaderRegInterface>> address_interface;
|
||||||
|
|
||||||
state FdbOptions globalOptions;
|
state FdbOptions globalOptions;
|
||||||
|
@ -1171,6 +1170,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
|
||||||
ClusterConnectionFile::lookupClusterFileName(opt.clusterFile);
|
ClusterConnectionFile::lookupClusterFileName(opt.clusterFile);
|
||||||
try {
|
try {
|
||||||
ccf = makeReference<ClusterConnectionFile>(resolvedClusterFile.first);
|
ccf = makeReference<ClusterConnectionFile>(resolvedClusterFile.first);
|
||||||
|
wait(ccf->resolveHostnames());
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
fprintf(stderr, "%s\n", ClusterConnectionFile::getErrorString(resolvedClusterFile, e).c_str());
|
fprintf(stderr, "%s\n", ClusterConnectionFile::getErrorString(resolvedClusterFile, e).c_str());
|
||||||
return 1;
|
return 1;
|
||||||
|
|
|
@ -28,6 +28,7 @@
|
||||||
|
|
||||||
#include "fdbclient/CoordinationInterface.h"
|
#include "fdbclient/CoordinationInterface.h"
|
||||||
|
|
||||||
|
// Determine public IP address by calling the first coordinator.
|
||||||
IPAddress determinePublicIPAutomatically(ClusterConnectionString& ccs) {
|
IPAddress determinePublicIPAutomatically(ClusterConnectionString& ccs) {
|
||||||
try {
|
try {
|
||||||
using namespace boost::asio;
|
using namespace boost::asio;
|
||||||
|
@ -35,6 +36,7 @@ IPAddress determinePublicIPAutomatically(ClusterConnectionString& ccs) {
|
||||||
io_service ioService;
|
io_service ioService;
|
||||||
ip::udp::socket socket(ioService);
|
ip::udp::socket socket(ioService);
|
||||||
|
|
||||||
|
ccs.resolveHostnamesBlocking();
|
||||||
const auto& coordAddr = ccs.coordinators()[0];
|
const auto& coordAddr = ccs.coordinators()[0];
|
||||||
const auto boostIp = coordAddr.ip.isV6() ? ip::address(ip::address_v6(coordAddr.ip.toV6()))
|
const auto boostIp = coordAddr.ip.isV6() ? ip::address(ip::address_v6(coordAddr.ip.toV6()))
|
||||||
: ip::address(ip::address_v4(coordAddr.ip.toV4()));
|
: ip::address(ip::address_v4(coordAddr.ip.toV4()));
|
||||||
|
|
|
@ -58,13 +58,28 @@ struct ClientLeaderRegInterface {
|
||||||
// - There is no address present more than once
|
// - There is no address present more than once
|
||||||
class ClusterConnectionString {
|
class ClusterConnectionString {
|
||||||
public:
|
public:
|
||||||
|
enum ConnectionStringStatus { RESOLVED, RESOLVING, UNRESOLVED };
|
||||||
|
|
||||||
ClusterConnectionString() {}
|
ClusterConnectionString() {}
|
||||||
ClusterConnectionString(const std::string& connStr);
|
ClusterConnectionString(const std::string& connStr);
|
||||||
ClusterConnectionString(const std::vector<NetworkAddress>& coordinators, Key key);
|
ClusterConnectionString(const std::vector<NetworkAddress>& coordinators, Key key);
|
||||||
ClusterConnectionString(const std::vector<Hostname>& hosts, Key key);
|
ClusterConnectionString(const std::vector<Hostname>& hosts, Key key);
|
||||||
|
|
||||||
|
ClusterConnectionString(const ClusterConnectionString& rhs) { operator=(rhs); }
|
||||||
|
ClusterConnectionString& operator=(const ClusterConnectionString& rhs) {
|
||||||
|
// Copy everything except AsyncTrigger resolveFinish.
|
||||||
|
status = rhs.status;
|
||||||
|
coords = rhs.coords;
|
||||||
|
hostnames = rhs.hostnames;
|
||||||
|
networkAddressToHostname = rhs.networkAddressToHostname;
|
||||||
|
key = rhs.key;
|
||||||
|
keyDesc = rhs.keyDesc;
|
||||||
|
connectionString = rhs.connectionString;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<NetworkAddress> const& coordinators() const { return coords; }
|
std::vector<NetworkAddress> const& coordinators() const { return coords; }
|
||||||
void addResolved(Hostname hostname, NetworkAddress address) {
|
void addResolved(const Hostname& hostname, const NetworkAddress& address) {
|
||||||
coords.push_back(address);
|
coords.push_back(address);
|
||||||
networkAddressToHostname.emplace(address, hostname);
|
networkAddressToHostname.emplace(address, hostname);
|
||||||
}
|
}
|
||||||
|
@ -80,14 +95,15 @@ public:
|
||||||
void resolveHostnamesBlocking();
|
void resolveHostnamesBlocking();
|
||||||
void resetToUnresolved();
|
void resetToUnresolved();
|
||||||
|
|
||||||
bool hasUnresolvedHostnames = false;
|
ConnectionStringStatus status = RESOLVED;
|
||||||
|
AsyncTrigger resolveFinish;
|
||||||
std::vector<NetworkAddress> coords;
|
std::vector<NetworkAddress> coords;
|
||||||
std::vector<Hostname> hostnames;
|
std::vector<Hostname> hostnames;
|
||||||
|
std::unordered_map<NetworkAddress, Hostname> networkAddressToHostname;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void parseConnString();
|
void parseConnString();
|
||||||
void parseKey(const std::string& key);
|
void parseKey(const std::string& key);
|
||||||
std::unordered_map<NetworkAddress, Hostname> networkAddressToHostname;
|
|
||||||
Key key, keyDesc;
|
Key key, keyDesc;
|
||||||
std::string connectionString;
|
std::string connectionString;
|
||||||
};
|
};
|
||||||
|
@ -139,7 +155,7 @@ public:
|
||||||
// Signals to the connection record that it was successfully used to connect to a cluster.
|
// Signals to the connection record that it was successfully used to connect to a cluster.
|
||||||
void notifyConnected();
|
void notifyConnected();
|
||||||
|
|
||||||
bool hasUnresolvedHostnames() const;
|
ClusterConnectionString::ConnectionStringStatus connectionStringStatus() const;
|
||||||
Future<Void> resolveHostnames();
|
Future<Void> resolveHostnames();
|
||||||
// This one should only be used when resolving asynchronously is impossible. For all other cases, resolveHostnames()
|
// This one should only be used when resolving asynchronously is impossible. For all other cases, resolveHostnames()
|
||||||
// should be preferred.
|
// should be preferred.
|
||||||
|
|
|
@ -169,7 +169,7 @@ std::map<std::string, std::string> configForToken(std::string const& mode) {
|
||||||
} else if (value == "gradual") {
|
} else if (value == "gradual") {
|
||||||
type = StorageMigrationType::GRADUAL;
|
type = StorageMigrationType::GRADUAL;
|
||||||
} else {
|
} else {
|
||||||
printf("Error: Only disabled|aggressive|gradual are valid for storage_migration_mode.\n");
|
printf("Error: Only disabled|aggressive|gradual are valid for storage_migration_type.\n");
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
out[p + key] = format("%d", type);
|
out[p + key] = format("%d", type);
|
||||||
|
|
|
@ -77,8 +77,8 @@ void IClusterConnectionRecord::setPersisted() {
|
||||||
connectionStringNeedsPersisted = false;
|
connectionStringNeedsPersisted = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IClusterConnectionRecord::hasUnresolvedHostnames() const {
|
ClusterConnectionString::ConnectionStringStatus IClusterConnectionRecord::connectionStringStatus() const {
|
||||||
return cs.hasUnresolvedHostnames;
|
return cs.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
Future<Void> IClusterConnectionRecord::resolveHostnames() {
|
Future<Void> IClusterConnectionRecord::resolveHostnames() {
|
||||||
|
@ -98,39 +98,56 @@ std::string ClusterConnectionString::getErrorString(std::string const& source, E
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> resolveHostnamesImpl(ClusterConnectionString* self) {
|
ACTOR Future<Void> resolveHostnamesImpl(ClusterConnectionString* self) {
|
||||||
std::vector<Future<Void>> fs;
|
loop {
|
||||||
for (auto const& hostName : self->hostnames) {
|
if (self->status == ClusterConnectionString::UNRESOLVED) {
|
||||||
fs.push_back(map(INetworkConnections::net()->resolveTCPEndpoint(hostName.host, hostName.service),
|
self->status = ClusterConnectionString::RESOLVING;
|
||||||
[=](std::vector<NetworkAddress> const& addresses) -> Void {
|
std::vector<Future<Void>> fs;
|
||||||
NetworkAddress addr = addresses[deterministicRandom()->randomInt(0, addresses.size())];
|
for (auto const& hostname : self->hostnames) {
|
||||||
addr.flags = 0; // Reset the parsed address to public
|
fs.push_back(map(INetworkConnections::net()->resolveTCPEndpoint(hostname.host, hostname.service),
|
||||||
addr.fromHostname = NetworkAddressFromHostname::True;
|
[=](std::vector<NetworkAddress> const& addresses) -> Void {
|
||||||
if (hostName.isTLS) {
|
NetworkAddress address =
|
||||||
addr.flags |= NetworkAddress::FLAG_TLS;
|
addresses[deterministicRandom()->randomInt(0, addresses.size())];
|
||||||
}
|
address.flags = 0; // Reset the parsed address to public
|
||||||
self->addResolved(hostName, addr);
|
address.fromHostname = NetworkAddressFromHostname::True;
|
||||||
return Void();
|
if (hostname.isTLS) {
|
||||||
}));
|
address.flags |= NetworkAddress::FLAG_TLS;
|
||||||
|
}
|
||||||
|
self->addResolved(hostname, address);
|
||||||
|
return Void();
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
wait(waitForAll(fs));
|
||||||
|
std::sort(self->coords.begin(), self->coords.end());
|
||||||
|
if (std::unique(self->coords.begin(), self->coords.end()) != self->coords.end()) {
|
||||||
|
self->status = ClusterConnectionString::UNRESOLVED;
|
||||||
|
self->resolveFinish.trigger();
|
||||||
|
throw connection_string_invalid();
|
||||||
|
}
|
||||||
|
self->status = ClusterConnectionString::RESOLVED;
|
||||||
|
self->resolveFinish.trigger();
|
||||||
|
break;
|
||||||
|
} else if (self->status == ClusterConnectionString::RESOLVING) {
|
||||||
|
wait(self->resolveFinish.onTrigger());
|
||||||
|
if (self->status == ClusterConnectionString::RESOLVED) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Otherwise, this means other threads failed on resolve, so here we go back to the loop and try to resolve
|
||||||
|
// again.
|
||||||
|
} else {
|
||||||
|
// status is RESOLVED, nothing to do.
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
wait(waitForAll(fs));
|
|
||||||
std::sort(self->coords.begin(), self->coords.end());
|
|
||||||
if (std::unique(self->coords.begin(), self->coords.end()) != self->coords.end()) {
|
|
||||||
throw connection_string_invalid();
|
|
||||||
}
|
|
||||||
self->hasUnresolvedHostnames = false;
|
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
Future<Void> ClusterConnectionString::resolveHostnames() {
|
Future<Void> ClusterConnectionString::resolveHostnames() {
|
||||||
if (!hasUnresolvedHostnames) {
|
return resolveHostnamesImpl(this);
|
||||||
return Void();
|
|
||||||
} else {
|
|
||||||
return resolveHostnamesImpl(this);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ClusterConnectionString::resolveHostnamesBlocking() {
|
void ClusterConnectionString::resolveHostnamesBlocking() {
|
||||||
if (hasUnresolvedHostnames) {
|
if (status != RESOLVED) {
|
||||||
|
status = RESOLVING;
|
||||||
for (auto const& hostname : hostnames) {
|
for (auto const& hostname : hostnames) {
|
||||||
std::vector<NetworkAddress> addresses =
|
std::vector<NetworkAddress> addresses =
|
||||||
INetworkConnections::net()->resolveTCPEndpointBlocking(hostname.host, hostname.service);
|
INetworkConnections::net()->resolveTCPEndpointBlocking(hostname.host, hostname.service);
|
||||||
|
@ -140,14 +157,14 @@ void ClusterConnectionString::resolveHostnamesBlocking() {
|
||||||
if (hostname.isTLS) {
|
if (hostname.isTLS) {
|
||||||
address.flags |= NetworkAddress::FLAG_TLS;
|
address.flags |= NetworkAddress::FLAG_TLS;
|
||||||
}
|
}
|
||||||
coords.push_back(address);
|
addResolved(hostname, address);
|
||||||
networkAddressToHostname.emplace(address, hostname);
|
|
||||||
}
|
}
|
||||||
std::sort(coords.begin(), coords.end());
|
std::sort(coords.begin(), coords.end());
|
||||||
if (std::unique(coords.begin(), coords.end()) != coords.end()) {
|
if (std::unique(coords.begin(), coords.end()) != coords.end()) {
|
||||||
|
status = UNRESOLVED;
|
||||||
throw connection_string_invalid();
|
throw connection_string_invalid();
|
||||||
}
|
}
|
||||||
hasUnresolvedHostnames = false;
|
status = RESOLVED;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -156,7 +173,7 @@ void ClusterConnectionString::resetToUnresolved() {
|
||||||
coords.clear();
|
coords.clear();
|
||||||
hostnames.clear();
|
hostnames.clear();
|
||||||
networkAddressToHostname.clear();
|
networkAddressToHostname.clear();
|
||||||
hasUnresolvedHostnames = true;
|
status = UNRESOLVED;
|
||||||
parseConnString();
|
parseConnString();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -184,7 +201,9 @@ void ClusterConnectionString::parseConnString() {
|
||||||
}
|
}
|
||||||
p = pComma + 1;
|
p = pComma + 1;
|
||||||
}
|
}
|
||||||
hasUnresolvedHostnames = hostnames.size() > 0;
|
if (hostnames.size() > 0) {
|
||||||
|
status = UNRESOLVED;
|
||||||
|
}
|
||||||
ASSERT((coords.size() + hostnames.size()) > 0);
|
ASSERT((coords.size() + hostnames.size()) > 0);
|
||||||
|
|
||||||
std::sort(coords.begin(), coords.end());
|
std::sort(coords.begin(), coords.end());
|
||||||
|
@ -256,7 +275,7 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/hostnames") {
|
||||||
{
|
{
|
||||||
input = "asdf:2345@localhost:1234";
|
input = "asdf:2345@localhost:1234";
|
||||||
ClusterConnectionString cs(input);
|
ClusterConnectionString cs(input);
|
||||||
ASSERT(cs.hasUnresolvedHostnames);
|
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
|
||||||
ASSERT(cs.hostnames.size() == 1);
|
ASSERT(cs.hostnames.size() == 1);
|
||||||
ASSERT(input == cs.toString());
|
ASSERT(input == cs.toString());
|
||||||
}
|
}
|
||||||
|
@ -264,7 +283,7 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/hostnames") {
|
||||||
{
|
{
|
||||||
input = "0xxdeadbeef:100100100@localhost:34534,host-name:23443";
|
input = "0xxdeadbeef:100100100@localhost:34534,host-name:23443";
|
||||||
ClusterConnectionString cs(input);
|
ClusterConnectionString cs(input);
|
||||||
ASSERT(cs.hasUnresolvedHostnames);
|
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
|
||||||
ASSERT(cs.hostnames.size() == 2);
|
ASSERT(cs.hostnames.size() == 2);
|
||||||
ASSERT(input == cs.toString());
|
ASSERT(input == cs.toString());
|
||||||
}
|
}
|
||||||
|
@ -277,7 +296,7 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/hostnames") {
|
||||||
commented += "# asdfasdf ##";
|
commented += "# asdfasdf ##";
|
||||||
|
|
||||||
ClusterConnectionString cs(commented);
|
ClusterConnectionString cs(commented);
|
||||||
ASSERT(cs.hasUnresolvedHostnames);
|
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
|
||||||
ASSERT(cs.hostnames.size() == 2);
|
ASSERT(cs.hostnames.size() == 2);
|
||||||
ASSERT(input == cs.toString());
|
ASSERT(input == cs.toString());
|
||||||
}
|
}
|
||||||
|
@ -290,7 +309,7 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/hostnames") {
|
||||||
commented += "# asdfasdf ##";
|
commented += "# asdfasdf ##";
|
||||||
|
|
||||||
ClusterConnectionString cs(commented);
|
ClusterConnectionString cs(commented);
|
||||||
ASSERT(cs.hasUnresolvedHostnames);
|
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
|
||||||
ASSERT(cs.hostnames.size() == 2);
|
ASSERT(cs.hostnames.size() == 2);
|
||||||
ASSERT(input == cs.toString());
|
ASSERT(input == cs.toString());
|
||||||
}
|
}
|
||||||
|
@ -314,16 +333,16 @@ TEST_CASE("/fdbclient/MonitorLeader/ConnectionString") {
|
||||||
INetworkConnections::net()->addMockTCPEndpoint(hn2, port2, { address2 });
|
INetworkConnections::net()->addMockTCPEndpoint(hn2, port2, { address2 });
|
||||||
|
|
||||||
state ClusterConnectionString cs(hostnames, LiteralStringRef("TestCluster:0"));
|
state ClusterConnectionString cs(hostnames, LiteralStringRef("TestCluster:0"));
|
||||||
ASSERT(cs.hasUnresolvedHostnames);
|
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
|
||||||
ASSERT(cs.hostnames.size() == 2);
|
ASSERT(cs.hostnames.size() == 2);
|
||||||
ASSERT(cs.coordinators().size() == 0);
|
ASSERT(cs.coordinators().size() == 0);
|
||||||
wait(cs.resolveHostnames());
|
wait(cs.resolveHostnames());
|
||||||
ASSERT(!cs.hasUnresolvedHostnames);
|
ASSERT(cs.status == ClusterConnectionString::RESOLVED);
|
||||||
ASSERT(cs.hostnames.size() == 2);
|
ASSERT(cs.hostnames.size() == 2);
|
||||||
ASSERT(cs.coordinators().size() == 2);
|
ASSERT(cs.coordinators().size() == 2);
|
||||||
ASSERT(cs.toString() == connectionString);
|
ASSERT(cs.toString() == connectionString);
|
||||||
cs.resetToUnresolved();
|
cs.resetToUnresolved();
|
||||||
ASSERT(cs.hasUnresolvedHostnames);
|
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
|
||||||
ASSERT(cs.hostnames.size() == 2);
|
ASSERT(cs.hostnames.size() == 2);
|
||||||
ASSERT(cs.coordinators().size() == 0);
|
ASSERT(cs.coordinators().size() == 0);
|
||||||
ASSERT(cs.toString() == connectionString);
|
ASSERT(cs.toString() == connectionString);
|
||||||
|
@ -422,7 +441,7 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/fuzz") {
|
||||||
}
|
}
|
||||||
|
|
||||||
ClusterConnectionString::ClusterConnectionString(const std::vector<NetworkAddress>& servers, Key key)
|
ClusterConnectionString::ClusterConnectionString(const std::vector<NetworkAddress>& servers, Key key)
|
||||||
: coords(servers) {
|
: status(RESOLVED), coords(servers) {
|
||||||
std::string keyString = key.toString();
|
std::string keyString = key.toString();
|
||||||
parseKey(keyString);
|
parseKey(keyString);
|
||||||
connectionString = keyString + "@";
|
connectionString = keyString + "@";
|
||||||
|
@ -435,7 +454,7 @@ ClusterConnectionString::ClusterConnectionString(const std::vector<NetworkAddres
|
||||||
}
|
}
|
||||||
|
|
||||||
ClusterConnectionString::ClusterConnectionString(const std::vector<Hostname>& hosts, Key key)
|
ClusterConnectionString::ClusterConnectionString(const std::vector<Hostname>& hosts, Key key)
|
||||||
: hasUnresolvedHostnames(true), hostnames(hosts) {
|
: status(UNRESOLVED), hostnames(hosts) {
|
||||||
std::string keyString = key.toString();
|
std::string keyString = key.toString();
|
||||||
parseKey(keyString);
|
parseKey(keyString);
|
||||||
connectionString = keyString + "@";
|
connectionString = keyString + "@";
|
||||||
|
@ -497,6 +516,7 @@ std::string ClusterConnectionString::toString() const {
|
||||||
}
|
}
|
||||||
|
|
||||||
ClientCoordinators::ClientCoordinators(Reference<IClusterConnectionRecord> ccr) : ccr(ccr) {
|
ClientCoordinators::ClientCoordinators(Reference<IClusterConnectionRecord> ccr) : ccr(ccr) {
|
||||||
|
ASSERT(ccr->connectionStringStatus() == ClusterConnectionString::RESOLVED);
|
||||||
ClusterConnectionString cs = ccr->getConnectionString();
|
ClusterConnectionString cs = ccr->getConnectionString();
|
||||||
for (auto s = cs.coordinators().begin(); s != cs.coordinators().end(); ++s)
|
for (auto s = cs.coordinators().begin(); s != cs.coordinators().end(); ++s)
|
||||||
clientLeaderServers.push_back(ClientLeaderRegInterface(*s));
|
clientLeaderServers.push_back(ClientLeaderRegInterface(*s));
|
||||||
|
@ -525,15 +545,44 @@ ClientLeaderRegInterface::ClientLeaderRegInterface(INetwork* local) {
|
||||||
|
|
||||||
// Nominee is the worker among all workers that are considered as leader by one coordinator
|
// Nominee is the worker among all workers that are considered as leader by one coordinator
|
||||||
// This function contacts a coordinator coord to ask who is its nominee.
|
// This function contacts a coordinator coord to ask who is its nominee.
|
||||||
|
// Note: for coordinators whose NetworkAddress is parsed out of a hostname, a connection failure will cause this actor
|
||||||
|
// to throw `coordinators_changed()` error
|
||||||
ACTOR Future<Void> monitorNominee(Key key,
|
ACTOR Future<Void> monitorNominee(Key key,
|
||||||
ClientLeaderRegInterface coord,
|
ClientLeaderRegInterface coord,
|
||||||
AsyncTrigger* nomineeChange,
|
AsyncTrigger* nomineeChange,
|
||||||
Optional<LeaderInfo>* info) {
|
Optional<LeaderInfo>* info,
|
||||||
|
Optional<Hostname> hostname = Optional<Hostname>()) {
|
||||||
loop {
|
loop {
|
||||||
state Optional<LeaderInfo> li =
|
state Optional<LeaderInfo> li;
|
||||||
wait(retryBrokenPromise(coord.getLeader,
|
|
||||||
GetLeaderRequest(key, info->present() ? info->get().changeID : UID()),
|
if (coord.getLeader.getEndpoint().getPrimaryAddress().fromHostname) {
|
||||||
TaskPriority::CoordinationReply));
|
state ErrorOr<Optional<LeaderInfo>> rep =
|
||||||
|
wait(coord.getLeader.tryGetReply(GetLeaderRequest(key, info->present() ? info->get().changeID : UID()),
|
||||||
|
TaskPriority::CoordinationReply));
|
||||||
|
if (rep.isError()) {
|
||||||
|
// Connecting to nominee failed, most likely due to connection failed.
|
||||||
|
TraceEvent("MonitorNomineeError")
|
||||||
|
.detail("Hostname", hostname.present() ? hostname.get().toString() : "UnknownHostname")
|
||||||
|
.detail("OldAddr", coord.getLeader.getEndpoint().getPrimaryAddress().toString())
|
||||||
|
.error(rep.getError());
|
||||||
|
if (rep.getError().code() == error_code_request_maybe_delivered) {
|
||||||
|
// 50 milliseconds delay to prevent tight resolving loop due to outdated DNS cache
|
||||||
|
wait(delay(0.05));
|
||||||
|
throw coordinators_changed();
|
||||||
|
} else {
|
||||||
|
throw rep.getError();
|
||||||
|
}
|
||||||
|
} else if (rep.present()) {
|
||||||
|
li = rep.get();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Optional<LeaderInfo> tmp =
|
||||||
|
wait(retryBrokenPromise(coord.getLeader,
|
||||||
|
GetLeaderRequest(key, info->present() ? info->get().changeID : UID()),
|
||||||
|
TaskPriority::CoordinationReply));
|
||||||
|
li = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
wait(Future<Void>(Void())); // Make sure we weren't cancelled
|
wait(Future<Void>(Void())); // Make sure we weren't cancelled
|
||||||
|
|
||||||
TraceEvent("GetLeaderReply")
|
TraceEvent("GetLeaderReply")
|
||||||
|
@ -608,53 +657,74 @@ Optional<std::pair<LeaderInfo, bool>> getLeader(const std::vector<Optional<Leade
|
||||||
ACTOR Future<MonitorLeaderInfo> monitorLeaderOneGeneration(Reference<IClusterConnectionRecord> connRecord,
|
ACTOR Future<MonitorLeaderInfo> monitorLeaderOneGeneration(Reference<IClusterConnectionRecord> connRecord,
|
||||||
Reference<AsyncVar<Value>> outSerializedLeaderInfo,
|
Reference<AsyncVar<Value>> outSerializedLeaderInfo,
|
||||||
MonitorLeaderInfo info) {
|
MonitorLeaderInfo info) {
|
||||||
state ClientCoordinators coordinators(info.intermediateConnRecord);
|
|
||||||
state AsyncTrigger nomineeChange;
|
|
||||||
state std::vector<Optional<LeaderInfo>> nominees;
|
|
||||||
state Future<Void> allActors;
|
|
||||||
|
|
||||||
nominees.resize(coordinators.clientLeaderServers.size());
|
|
||||||
|
|
||||||
std::vector<Future<Void>> actors;
|
|
||||||
// Ask all coordinators if the worker is considered as a leader (leader nominee) by the coordinator.
|
|
||||||
actors.reserve(coordinators.clientLeaderServers.size());
|
|
||||||
for (int i = 0; i < coordinators.clientLeaderServers.size(); i++)
|
|
||||||
actors.push_back(
|
|
||||||
monitorNominee(coordinators.clusterKey, coordinators.clientLeaderServers[i], &nomineeChange, &nominees[i]));
|
|
||||||
allActors = waitForAll(actors);
|
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
Optional<std::pair<LeaderInfo, bool>> leader = getLeader(nominees);
|
wait(connRecord->resolveHostnames());
|
||||||
TraceEvent("MonitorLeaderChange")
|
wait(info.intermediateConnRecord->resolveHostnames());
|
||||||
.detail("NewLeader", leader.present() ? leader.get().first.changeID : UID(1, 1));
|
state ClientCoordinators coordinators(info.intermediateConnRecord);
|
||||||
if (leader.present()) {
|
state AsyncTrigger nomineeChange;
|
||||||
if (leader.get().first.forward) {
|
state std::vector<Optional<LeaderInfo>> nominees;
|
||||||
TraceEvent("MonitorLeaderForwarding")
|
state Future<Void> allActors;
|
||||||
.detail("NewConnStr", leader.get().first.serializedInfo.toString())
|
|
||||||
.detail("OldConnStr", info.intermediateConnRecord->getConnectionString().toString())
|
|
||||||
.trackLatest("MonitorLeaderForwarding");
|
|
||||||
info.intermediateConnRecord = connRecord->makeIntermediateRecord(
|
|
||||||
ClusterConnectionString(leader.get().first.serializedInfo.toString()));
|
|
||||||
return info;
|
|
||||||
}
|
|
||||||
if (connRecord != info.intermediateConnRecord) {
|
|
||||||
if (!info.hasConnected) {
|
|
||||||
TraceEvent(SevWarnAlways, "IncorrectClusterFileContentsAtConnection")
|
|
||||||
.detail("ClusterFile", connRecord->toString())
|
|
||||||
.detail("StoredConnectionString", connRecord->getConnectionString().toString())
|
|
||||||
.detail("CurrentConnectionString",
|
|
||||||
info.intermediateConnRecord->getConnectionString().toString());
|
|
||||||
}
|
|
||||||
connRecord->setAndPersistConnectionString(info.intermediateConnRecord->getConnectionString());
|
|
||||||
info.intermediateConnRecord = connRecord;
|
|
||||||
}
|
|
||||||
|
|
||||||
info.hasConnected = true;
|
nominees.resize(coordinators.clientLeaderServers.size());
|
||||||
connRecord->notifyConnected();
|
|
||||||
|
|
||||||
outSerializedLeaderInfo->set(leader.get().first.serializedInfo);
|
state std::vector<Future<Void>> actors;
|
||||||
|
// Ask all coordinators if the worker is considered as a leader (leader nominee) by the coordinator.
|
||||||
|
actors.reserve(coordinators.clientLeaderServers.size());
|
||||||
|
for (int i = 0; i < coordinators.clientLeaderServers.size(); i++) {
|
||||||
|
Optional<Hostname> hostname;
|
||||||
|
auto r = connRecord->getConnectionString().networkAddressToHostname.find(
|
||||||
|
coordinators.clientLeaderServers[i].getLeader.getEndpoint().getPrimaryAddress());
|
||||||
|
if (r != connRecord->getConnectionString().networkAddressToHostname.end()) {
|
||||||
|
hostname = r->second;
|
||||||
|
}
|
||||||
|
actors.push_back(monitorNominee(
|
||||||
|
coordinators.clusterKey, coordinators.clientLeaderServers[i], &nomineeChange, &nominees[i], hostname));
|
||||||
|
}
|
||||||
|
allActors = waitForAll(actors);
|
||||||
|
|
||||||
|
loop {
|
||||||
|
Optional<std::pair<LeaderInfo, bool>> leader = getLeader(nominees);
|
||||||
|
TraceEvent("MonitorLeaderChange")
|
||||||
|
.detail("NewLeader", leader.present() ? leader.get().first.changeID : UID(1, 1));
|
||||||
|
if (leader.present()) {
|
||||||
|
if (leader.get().first.forward) {
|
||||||
|
TraceEvent("MonitorLeaderForwarding")
|
||||||
|
.detail("NewConnStr", leader.get().first.serializedInfo.toString())
|
||||||
|
.detail("OldConnStr", info.intermediateConnRecord->getConnectionString().toString())
|
||||||
|
.trackLatest("MonitorLeaderForwarding");
|
||||||
|
info.intermediateConnRecord = connRecord->makeIntermediateRecord(
|
||||||
|
ClusterConnectionString(leader.get().first.serializedInfo.toString()));
|
||||||
|
return info;
|
||||||
|
}
|
||||||
|
if (connRecord != info.intermediateConnRecord) {
|
||||||
|
if (!info.hasConnected) {
|
||||||
|
TraceEvent(SevWarnAlways, "IncorrectClusterFileContentsAtConnection")
|
||||||
|
.detail("ClusterFile", connRecord->toString())
|
||||||
|
.detail("StoredConnectionString", connRecord->getConnectionString().toString())
|
||||||
|
.detail("CurrentConnectionString",
|
||||||
|
info.intermediateConnRecord->getConnectionString().toString());
|
||||||
|
}
|
||||||
|
connRecord->setAndPersistConnectionString(info.intermediateConnRecord->getConnectionString());
|
||||||
|
info.intermediateConnRecord = connRecord;
|
||||||
|
}
|
||||||
|
|
||||||
|
info.hasConnected = true;
|
||||||
|
connRecord->notifyConnected();
|
||||||
|
|
||||||
|
outSerializedLeaderInfo->set(leader.get().first.serializedInfo);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
wait(nomineeChange.onTrigger() || allActors);
|
||||||
|
} catch (Error& e) {
|
||||||
|
if (e.code() == error_code_coordinators_changed) {
|
||||||
|
TraceEvent("MonitorLeaderCoordinatorsChanged").suppressFor(1.0);
|
||||||
|
connRecord->getConnectionString().resetToUnresolved();
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
wait(nomineeChange.onTrigger() || allActors);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -774,8 +844,8 @@ ACTOR Future<Void> getClientInfoFromLeader(Reference<AsyncVar<Optional<ClusterCo
|
||||||
when(ClientDBInfo ni =
|
when(ClientDBInfo ni =
|
||||||
wait(brokenPromiseToNever(knownLeader->get().get().clientInterface.openDatabase.getReply(req)))) {
|
wait(brokenPromiseToNever(knownLeader->get().get().clientInterface.openDatabase.getReply(req)))) {
|
||||||
TraceEvent("GetClientInfoFromLeaderGotClientInfo", knownLeader->get().get().clientInterface.id())
|
TraceEvent("GetClientInfoFromLeaderGotClientInfo", knownLeader->get().get().clientInterface.id())
|
||||||
.detail("CommitProxy0", ni.commitProxies.size() ? ni.commitProxies[0].id() : UID())
|
.detail("CommitProxy0", ni.commitProxies.size() ? ni.commitProxies[0].address().toString() : "")
|
||||||
.detail("GrvProxy0", ni.grvProxies.size() ? ni.grvProxies[0].id() : UID())
|
.detail("GrvProxy0", ni.grvProxies.size() ? ni.grvProxies[0].address().toString() : "")
|
||||||
.detail("ClientID", ni.id);
|
.detail("ClientID", ni.id);
|
||||||
clientData->clientInfo->set(CachedSerialization<ClientDBInfo>(ni));
|
clientData->clientInfo->set(CachedSerialization<ClientDBInfo>(ni));
|
||||||
}
|
}
|
||||||
|
@ -787,7 +857,8 @@ ACTOR Future<Void> getClientInfoFromLeader(Reference<AsyncVar<Optional<ClusterCo
|
||||||
ACTOR Future<Void> monitorLeaderAndGetClientInfo(Key clusterKey,
|
ACTOR Future<Void> monitorLeaderAndGetClientInfo(Key clusterKey,
|
||||||
std::vector<NetworkAddress> coordinators,
|
std::vector<NetworkAddress> coordinators,
|
||||||
ClientData* clientData,
|
ClientData* clientData,
|
||||||
Reference<AsyncVar<Optional<LeaderInfo>>> leaderInfo) {
|
Reference<AsyncVar<Optional<LeaderInfo>>> leaderInfo,
|
||||||
|
Reference<AsyncVar<Void>> coordinatorsChanged) {
|
||||||
state std::vector<ClientLeaderRegInterface> clientLeaderServers;
|
state std::vector<ClientLeaderRegInterface> clientLeaderServers;
|
||||||
state AsyncTrigger nomineeChange;
|
state AsyncTrigger nomineeChange;
|
||||||
state std::vector<Optional<LeaderInfo>> nominees;
|
state std::vector<Optional<LeaderInfo>> nominees;
|
||||||
|
@ -835,7 +906,14 @@ ACTOR Future<Void> monitorLeaderAndGetClientInfo(Key clusterKey,
|
||||||
leaderInfo->set(leader.get().first);
|
leaderInfo->set(leader.get().first);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
wait(nomineeChange.onTrigger() || allActors);
|
try {
|
||||||
|
wait(nomineeChange.onTrigger() || allActors);
|
||||||
|
} catch (Error& e) {
|
||||||
|
if (e.code() == error_code_coordinators_changed) {
|
||||||
|
coordinatorsChanged->trigger();
|
||||||
|
}
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -964,9 +1042,15 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration(
|
||||||
successIndex = index;
|
successIndex = index;
|
||||||
} else {
|
} else {
|
||||||
TEST(rep.getError().code() == error_code_failed_to_progress); // Coordinator cant talk to cluster controller
|
TEST(rep.getError().code() == error_code_failed_to_progress); // Coordinator cant talk to cluster controller
|
||||||
|
if (rep.getError().code() == error_code_coordinators_changed) {
|
||||||
|
throw coordinators_changed();
|
||||||
|
}
|
||||||
index = (index + 1) % addrs.size();
|
index = (index + 1) % addrs.size();
|
||||||
if (index == successIndex) {
|
if (index == successIndex) {
|
||||||
wait(delay(CLIENT_KNOBS->COORDINATOR_RECONNECTION_DELAY));
|
wait(delay(CLIENT_KNOBS->COORDINATOR_RECONNECTION_DELAY));
|
||||||
|
// When the client fails talking to all coordinators, we throw coordinators_changed() and let the caller
|
||||||
|
// re-resolve the connection string and retry.
|
||||||
|
throw coordinators_changed();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -978,16 +1062,27 @@ ACTOR Future<Void> monitorProxies(
|
||||||
Reference<AsyncVar<Optional<ClientLeaderRegInterface>>> coordinator,
|
Reference<AsyncVar<Optional<ClientLeaderRegInterface>>> coordinator,
|
||||||
Reference<ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>> supportedVersions,
|
Reference<ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>> supportedVersions,
|
||||||
Key traceLogGroup) {
|
Key traceLogGroup) {
|
||||||
|
wait(connRecord->get()->resolveHostnames());
|
||||||
state MonitorLeaderInfo info(connRecord->get());
|
state MonitorLeaderInfo info(connRecord->get());
|
||||||
loop {
|
loop {
|
||||||
choose {
|
try {
|
||||||
when(MonitorLeaderInfo _info = wait(monitorProxiesOneGeneration(
|
wait(info.intermediateConnRecord->resolveHostnames());
|
||||||
connRecord->get(), clientInfo, coordinator, info, supportedVersions, traceLogGroup))) {
|
choose {
|
||||||
info = _info;
|
when(MonitorLeaderInfo _info = wait(monitorProxiesOneGeneration(
|
||||||
|
connRecord->get(), clientInfo, coordinator, info, supportedVersions, traceLogGroup))) {
|
||||||
|
info = _info;
|
||||||
|
}
|
||||||
|
when(wait(connRecord->onChange())) {
|
||||||
|
info.hasConnected = false;
|
||||||
|
info.intermediateConnRecord = connRecord->get();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
when(wait(connRecord->onChange())) {
|
} catch (Error& e) {
|
||||||
info.hasConnected = false;
|
if (e.code() == error_code_coordinators_changed) {
|
||||||
info.intermediateConnRecord = connRecord->get();
|
TraceEvent("MonitorProxiesCoordinatorsChanged").suppressFor(1.0);
|
||||||
|
info.intermediateConnRecord->getConnectionString().resetToUnresolved();
|
||||||
|
} else {
|
||||||
|
throw e;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -74,10 +74,11 @@ Future<Void> monitorLeader(Reference<IClusterConnectionRecord> const& connFile,
|
||||||
// This is one place where the leader election algorithm is run. The coodinator contacts all coodinators to collect
|
// This is one place where the leader election algorithm is run. The coodinator contacts all coodinators to collect
|
||||||
// nominees, the nominee with the most nomination is the leader, and collects client data from the leader. This function
|
// nominees, the nominee with the most nomination is the leader, and collects client data from the leader. This function
|
||||||
// also monitors the change of the leader.
|
// also monitors the change of the leader.
|
||||||
Future<Void> monitorLeaderAndGetClientInfo(Value const& key,
|
Future<Void> monitorLeaderAndGetClientInfo(Key const& clusterKey,
|
||||||
std::vector<NetworkAddress> const& coordinators,
|
std::vector<NetworkAddress> const& coordinators,
|
||||||
ClientData* const& clientData,
|
ClientData* const& clientData,
|
||||||
Reference<AsyncVar<Optional<LeaderInfo>>> const& leaderInfo);
|
Reference<AsyncVar<Optional<LeaderInfo>>> const& leaderInfo,
|
||||||
|
Reference<AsyncVar<Void>> const& coordinatorsChanged);
|
||||||
|
|
||||||
Future<Void> monitorProxies(
|
Future<Void> monitorProxies(
|
||||||
Reference<AsyncVar<Reference<IClusterConnectionRecord>>> const& connRecord,
|
Reference<AsyncVar<Reference<IClusterConnectionRecord>>> const& connRecord,
|
||||||
|
|
|
@ -732,16 +732,18 @@ Future<Void> attemptGRVFromOldProxies(std::vector<GrvProxyInterface> oldProxies,
|
||||||
|
|
||||||
ACTOR static Future<Void> monitorClientDBInfoChange(DatabaseContext* cx,
|
ACTOR static Future<Void> monitorClientDBInfoChange(DatabaseContext* cx,
|
||||||
Reference<AsyncVar<ClientDBInfo> const> clientDBInfo,
|
Reference<AsyncVar<ClientDBInfo> const> clientDBInfo,
|
||||||
AsyncTrigger* proxyChangeTrigger) {
|
AsyncTrigger* proxiesChangeTrigger) {
|
||||||
state std::vector<CommitProxyInterface> curCommitProxies;
|
state std::vector<CommitProxyInterface> curCommitProxies;
|
||||||
state std::vector<GrvProxyInterface> curGrvProxies;
|
state std::vector<GrvProxyInterface> curGrvProxies;
|
||||||
state ActorCollection actors(false);
|
state ActorCollection actors(false);
|
||||||
|
state Future<Void> clientDBInfoOnChange = clientDBInfo->onChange();
|
||||||
curCommitProxies = clientDBInfo->get().commitProxies;
|
curCommitProxies = clientDBInfo->get().commitProxies;
|
||||||
curGrvProxies = clientDBInfo->get().grvProxies;
|
curGrvProxies = clientDBInfo->get().grvProxies;
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
choose {
|
choose {
|
||||||
when(wait(clientDBInfo->onChange())) {
|
when(wait(clientDBInfoOnChange)) {
|
||||||
|
clientDBInfoOnChange = clientDBInfo->onChange();
|
||||||
if (clientDBInfo->get().commitProxies != curCommitProxies ||
|
if (clientDBInfo->get().commitProxies != curCommitProxies ||
|
||||||
clientDBInfo->get().grvProxies != curGrvProxies) {
|
clientDBInfo->get().grvProxies != curGrvProxies) {
|
||||||
// This condition is a bit complicated. Here we want to verify that we're unable to receive a read
|
// This condition is a bit complicated. Here we want to verify that we're unable to receive a read
|
||||||
|
@ -758,7 +760,7 @@ ACTOR static Future<Void> monitorClientDBInfoChange(DatabaseContext* cx,
|
||||||
}
|
}
|
||||||
curCommitProxies = clientDBInfo->get().commitProxies;
|
curCommitProxies = clientDBInfo->get().commitProxies;
|
||||||
curGrvProxies = clientDBInfo->get().grvProxies;
|
curGrvProxies = clientDBInfo->get().grvProxies;
|
||||||
proxyChangeTrigger->trigger();
|
proxiesChangeTrigger->trigger();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
when(wait(actors.getResult())) { UNSTOPPABLE_ASSERT(false); }
|
when(wait(actors.getResult())) { UNSTOPPABLE_ASSERT(false); }
|
||||||
|
@ -5713,9 +5715,10 @@ ACTOR Future<GetReadVersionReply> getConsistentReadVersion(SpanID parentSpan,
|
||||||
loop {
|
loop {
|
||||||
try {
|
try {
|
||||||
state GetReadVersionRequest req(span.context, transactionCount, priority, flags, tags, debugID);
|
state GetReadVersionRequest req(span.context, transactionCount, priority, flags, tags, debugID);
|
||||||
|
state Future<Void> onProxiesChanged = cx->onProxiesChanged();
|
||||||
|
|
||||||
choose {
|
choose {
|
||||||
when(wait(cx->onProxiesChanged())) {}
|
when(wait(onProxiesChanged)) { onProxiesChanged = cx->onProxiesChanged(); }
|
||||||
when(GetReadVersionReply v =
|
when(GetReadVersionReply v =
|
||||||
wait(basicLoadBalance(cx->getGrvProxies(UseProvisionalProxies(
|
wait(basicLoadBalance(cx->getGrvProxies(UseProvisionalProxies(
|
||||||
flags & GetReadVersionRequest::FLAG_USE_PROVISIONAL_PROXIES)),
|
flags & GetReadVersionRequest::FLAG_USE_PROVISIONAL_PROXIES)),
|
||||||
|
@ -6881,6 +6884,7 @@ ACTOR Future<bool> checkSafeExclusions(Database cx, std::vector<AddressExclusion
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
TraceEvent("ExclusionSafetyCheckCoordinators").log();
|
TraceEvent("ExclusionSafetyCheckCoordinators").log();
|
||||||
|
wait(cx->getConnectionRecord()->resolveHostnames());
|
||||||
state ClientCoordinators coordinatorList(cx->getConnectionRecord());
|
state ClientCoordinators coordinatorList(cx->getConnectionRecord());
|
||||||
state std::vector<Future<Optional<LeaderInfo>>> leaderServers;
|
state std::vector<Future<Optional<LeaderInfo>>> leaderServers;
|
||||||
leaderServers.reserve(coordinatorList.clientLeaderServers.size());
|
leaderServers.reserve(coordinatorList.clientLeaderServers.size());
|
||||||
|
|
|
@ -29,12 +29,12 @@ void HealthMonitor::reportPeerClosed(const NetworkAddress& peerAddress) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void HealthMonitor::purgeOutdatedHistory() {
|
void HealthMonitor::purgeOutdatedHistory() {
|
||||||
for (auto it = peerClosedHistory.begin(); it != peerClosedHistory.end();) {
|
while (!peerClosedHistory.empty()) {
|
||||||
if (it->first < now() - FLOW_KNOBS->HEALTH_MONITOR_CLIENT_REQUEST_INTERVAL_SECS) {
|
auto const& p = peerClosedHistory.front();
|
||||||
auto& count = peerClosedNum[it->second];
|
if (p.first < now() - FLOW_KNOBS->HEALTH_MONITOR_CLIENT_REQUEST_INTERVAL_SECS) {
|
||||||
|
auto& count = peerClosedNum[p.second];
|
||||||
--count;
|
--count;
|
||||||
ASSERT(count >= 0);
|
ASSERT(count >= 0);
|
||||||
++it; // Increment before pop_front to avoid iterator invalidation
|
|
||||||
peerClosedHistory.pop_front();
|
peerClosedHistory.pop_front();
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -90,6 +90,7 @@ set(FDBSERVER_SRCS
|
||||||
QuietDatabase.actor.cpp
|
QuietDatabase.actor.cpp
|
||||||
QuietDatabase.h
|
QuietDatabase.h
|
||||||
RadixTree.h
|
RadixTree.h
|
||||||
|
Ratekeeper.h
|
||||||
Ratekeeper.actor.cpp
|
Ratekeeper.actor.cpp
|
||||||
RatekeeperInterface.h
|
RatekeeperInterface.h
|
||||||
RecoveryState.h
|
RecoveryState.h
|
||||||
|
@ -130,6 +131,8 @@ set(FDBSERVER_SRCS
|
||||||
storageserver.actor.cpp
|
storageserver.actor.cpp
|
||||||
TagPartitionedLogSystem.actor.cpp
|
TagPartitionedLogSystem.actor.cpp
|
||||||
TagPartitionedLogSystem.actor.h
|
TagPartitionedLogSystem.actor.h
|
||||||
|
TagThrottler.actor.cpp
|
||||||
|
TagThrottler.h
|
||||||
template_fdb.h
|
template_fdb.h
|
||||||
TCInfo.actor.cpp
|
TCInfo.actor.cpp
|
||||||
TCInfo.h
|
TCInfo.h
|
||||||
|
|
|
@ -96,6 +96,7 @@ LeaderElectionRegInterface::LeaderElectionRegInterface(INetwork* local) : Client
|
||||||
}
|
}
|
||||||
|
|
||||||
ServerCoordinators::ServerCoordinators(Reference<IClusterConnectionRecord> ccr) : ClientCoordinators(ccr) {
|
ServerCoordinators::ServerCoordinators(Reference<IClusterConnectionRecord> ccr) : ClientCoordinators(ccr) {
|
||||||
|
ASSERT(ccr->connectionStringStatus() == ClusterConnectionString::RESOLVED);
|
||||||
ClusterConnectionString cs = ccr->getConnectionString();
|
ClusterConnectionString cs = ccr->getConnectionString();
|
||||||
for (auto s = cs.coordinators().begin(); s != cs.coordinators().end(); ++s) {
|
for (auto s = cs.coordinators().begin(); s != cs.coordinators().end(); ++s) {
|
||||||
leaderElectionServers.emplace_back(*s);
|
leaderElectionServers.emplace_back(*s);
|
||||||
|
@ -205,8 +206,11 @@ ACTOR Future<Void> openDatabase(ClientData* db,
|
||||||
int* clientCount,
|
int* clientCount,
|
||||||
Reference<AsyncVar<bool>> hasConnectedClients,
|
Reference<AsyncVar<bool>> hasConnectedClients,
|
||||||
OpenDatabaseCoordRequest req,
|
OpenDatabaseCoordRequest req,
|
||||||
Future<Void> checkStuck) {
|
Future<Void> checkStuck,
|
||||||
|
Reference<AsyncVar<Void>> coordinatorsChanged) {
|
||||||
state ErrorOr<CachedSerialization<ClientDBInfo>> replyContents;
|
state ErrorOr<CachedSerialization<ClientDBInfo>> replyContents;
|
||||||
|
state Future<Void> coordinatorsChangedOnChange = coordinatorsChanged->onChange();
|
||||||
|
state Future<Void> clientInfoOnChange = db->clientInfo->onChange();
|
||||||
|
|
||||||
++(*clientCount);
|
++(*clientCount);
|
||||||
hasConnectedClients->set(true);
|
hasConnectedClients->set(true);
|
||||||
|
@ -223,7 +227,15 @@ ACTOR Future<Void> openDatabase(ClientData* db,
|
||||||
replyContents = failed_to_progress();
|
replyContents = failed_to_progress();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
when(wait(yieldedFuture(db->clientInfo->onChange()))) { replyContents = db->clientInfo->get(); }
|
when(wait(yieldedFuture(clientInfoOnChange))) {
|
||||||
|
clientInfoOnChange = db->clientInfo->onChange();
|
||||||
|
replyContents = db->clientInfo->get();
|
||||||
|
}
|
||||||
|
when(wait(coordinatorsChangedOnChange)) {
|
||||||
|
coordinatorsChangedOnChange = coordinatorsChanged->onChange();
|
||||||
|
replyContents = coordinators_changed();
|
||||||
|
break;
|
||||||
|
}
|
||||||
when(wait(delayJittered(SERVER_KNOBS->CLIENT_REGISTER_INTERVAL))) {
|
when(wait(delayJittered(SERVER_KNOBS->CLIENT_REGISTER_INTERVAL))) {
|
||||||
if (db->clientInfo->get().read().id.isValid()) {
|
if (db->clientInfo->get().read().id.isValid()) {
|
||||||
replyContents = db->clientInfo->get();
|
replyContents = db->clientInfo->get();
|
||||||
|
@ -254,18 +266,33 @@ ACTOR Future<Void> openDatabase(ClientData* db,
|
||||||
ACTOR Future<Void> remoteMonitorLeader(int* clientCount,
|
ACTOR Future<Void> remoteMonitorLeader(int* clientCount,
|
||||||
Reference<AsyncVar<bool>> hasConnectedClients,
|
Reference<AsyncVar<bool>> hasConnectedClients,
|
||||||
Reference<AsyncVar<Optional<LeaderInfo>>> currentElectedLeader,
|
Reference<AsyncVar<Optional<LeaderInfo>>> currentElectedLeader,
|
||||||
ElectionResultRequest req) {
|
ElectionResultRequest req,
|
||||||
|
Reference<AsyncVar<Void>> coordinatorsChanged) {
|
||||||
|
state bool coordinatorsChangeDetected = false;
|
||||||
|
state Future<Void> coordinatorsChangedOnChange = coordinatorsChanged->onChange();
|
||||||
|
state Future<Void> currentElectedLeaderOnChange = currentElectedLeader->onChange();
|
||||||
++(*clientCount);
|
++(*clientCount);
|
||||||
hasConnectedClients->set(true);
|
hasConnectedClients->set(true);
|
||||||
|
|
||||||
while (!currentElectedLeader->get().present() || req.knownLeader == currentElectedLeader->get().get().changeID) {
|
while (!currentElectedLeader->get().present() || req.knownLeader == currentElectedLeader->get().get().changeID) {
|
||||||
choose {
|
choose {
|
||||||
when(wait(yieldedFuture(currentElectedLeader->onChange()))) {}
|
when(wait(yieldedFuture(currentElectedLeaderOnChange))) {
|
||||||
|
currentElectedLeaderOnChange = currentElectedLeader->onChange();
|
||||||
|
}
|
||||||
|
when(wait(coordinatorsChangedOnChange)) {
|
||||||
|
coordinatorsChangedOnChange = coordinatorsChanged->onChange();
|
||||||
|
coordinatorsChangeDetected = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
when(wait(delayJittered(SERVER_KNOBS->CLIENT_REGISTER_INTERVAL))) { break; }
|
when(wait(delayJittered(SERVER_KNOBS->CLIENT_REGISTER_INTERVAL))) { break; }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
req.reply.send(currentElectedLeader->get());
|
if (coordinatorsChangeDetected) {
|
||||||
|
req.reply.sendError(coordinators_changed());
|
||||||
|
} else {
|
||||||
|
req.reply.send(currentElectedLeader->get());
|
||||||
|
}
|
||||||
|
|
||||||
if (--(*clientCount) == 0) {
|
if (--(*clientCount) == 0) {
|
||||||
hasConnectedClients->set(false);
|
hasConnectedClients->set(false);
|
||||||
|
@ -296,6 +323,9 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
|
||||||
state Reference<AsyncVar<Optional<LeaderInfo>>> currentElectedLeader =
|
state Reference<AsyncVar<Optional<LeaderInfo>>> currentElectedLeader =
|
||||||
makeReference<AsyncVar<Optional<LeaderInfo>>>();
|
makeReference<AsyncVar<Optional<LeaderInfo>>>();
|
||||||
state LivenessChecker canConnectToLeader(SERVER_KNOBS->COORDINATOR_LEADER_CONNECTION_TIMEOUT);
|
state LivenessChecker canConnectToLeader(SERVER_KNOBS->COORDINATOR_LEADER_CONNECTION_TIMEOUT);
|
||||||
|
state Reference<AsyncVar<Void>> coordinatorsChanged = makeReference<AsyncVar<Void>>();
|
||||||
|
state Future<Void> coordinatorsChangedOnChange = coordinatorsChanged->onChange();
|
||||||
|
state Future<Void> hasConnectedClientsOnChange = hasConnectedClients->onChange();
|
||||||
|
|
||||||
loop choose {
|
loop choose {
|
||||||
when(OpenDatabaseCoordRequest req = waitNext(interf.openDatabase.getFuture())) {
|
when(OpenDatabaseCoordRequest req = waitNext(interf.openDatabase.getFuture())) {
|
||||||
|
@ -306,10 +336,14 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
|
||||||
} else {
|
} else {
|
||||||
if (!leaderMon.isValid()) {
|
if (!leaderMon.isValid()) {
|
||||||
leaderMon = monitorLeaderAndGetClientInfo(
|
leaderMon = monitorLeaderAndGetClientInfo(
|
||||||
req.clusterKey, req.coordinators, &clientData, currentElectedLeader);
|
req.clusterKey, req.coordinators, &clientData, currentElectedLeader, coordinatorsChanged);
|
||||||
}
|
}
|
||||||
actors.add(
|
actors.add(openDatabase(&clientData,
|
||||||
openDatabase(&clientData, &clientCount, hasConnectedClients, req, canConnectToLeader.checkStuck()));
|
&clientCount,
|
||||||
|
hasConnectedClients,
|
||||||
|
req,
|
||||||
|
canConnectToLeader.checkStuck(),
|
||||||
|
coordinatorsChanged));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
when(ElectionResultRequest req = waitNext(interf.electionResult.getFuture())) {
|
when(ElectionResultRequest req = waitNext(interf.electionResult.getFuture())) {
|
||||||
|
@ -318,10 +352,11 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
|
||||||
req.reply.send(currentElectedLeader->get());
|
req.reply.send(currentElectedLeader->get());
|
||||||
} else {
|
} else {
|
||||||
if (!leaderMon.isValid()) {
|
if (!leaderMon.isValid()) {
|
||||||
leaderMon =
|
leaderMon = monitorLeaderAndGetClientInfo(
|
||||||
monitorLeaderAndGetClientInfo(req.key, req.coordinators, &clientData, currentElectedLeader);
|
req.key, req.coordinators, &clientData, currentElectedLeader, coordinatorsChanged);
|
||||||
}
|
}
|
||||||
actors.add(remoteMonitorLeader(&clientCount, hasConnectedClients, currentElectedLeader, req));
|
actors.add(remoteMonitorLeader(
|
||||||
|
&clientCount, hasConnectedClients, currentElectedLeader, req, coordinatorsChanged));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
when(GetLeaderRequest req = waitNext(interf.getLeader.getFuture())) {
|
when(GetLeaderRequest req = waitNext(interf.getLeader.getFuture())) {
|
||||||
|
@ -454,13 +489,18 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
|
||||||
notify.pop_front();
|
notify.pop_front();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
when(wait(hasConnectedClients->onChange())) {
|
when(wait(hasConnectedClientsOnChange)) {
|
||||||
|
hasConnectedClientsOnChange = hasConnectedClients->onChange();
|
||||||
if (!hasConnectedClients->get() && !nextInterval.isValid()) {
|
if (!hasConnectedClients->get() && !nextInterval.isValid()) {
|
||||||
TraceEvent("LeaderRegisterUnneeded").detail("Key", key);
|
TraceEvent("LeaderRegisterUnneeded").detail("Key", key);
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
when(wait(actors.getResult())) {}
|
when(wait(actors.getResult())) {}
|
||||||
|
when(wait(coordinatorsChangedOnChange)) {
|
||||||
|
leaderMon = Future<Void>();
|
||||||
|
coordinatorsChangedOnChange = coordinatorsChanged->onChange();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -171,6 +171,7 @@ typedef AsyncMap<UID, ServerStatus> ServerStatusMap;
|
||||||
|
|
||||||
class DDTeamCollection : public ReferenceCounted<DDTeamCollection> {
|
class DDTeamCollection : public ReferenceCounted<DDTeamCollection> {
|
||||||
friend class DDTeamCollectionImpl;
|
friend class DDTeamCollectionImpl;
|
||||||
|
friend class DDTeamCollectionUnitTest;
|
||||||
|
|
||||||
enum class Status { NONE = 0, WIGGLING = 1, EXCLUDED = 2, FAILED = 3 };
|
enum class Status { NONE = 0, WIGGLING = 1, EXCLUDED = 2, FAILED = 3 };
|
||||||
|
|
||||||
|
@ -521,6 +522,37 @@ class DDTeamCollection : public ReferenceCounted<DDTeamCollection> {
|
||||||
|
|
||||||
void noHealthyTeams() const;
|
void noHealthyTeams() const;
|
||||||
|
|
||||||
|
// To enable verbose debug info, set shouldPrint to true
|
||||||
|
void traceAllInfo(bool shouldPrint = false) const;
|
||||||
|
|
||||||
|
// Check if the server belongs to a machine; if not, create the machine.
|
||||||
|
// Establish the two-direction link between server and machine
|
||||||
|
Reference<TCMachineInfo> checkAndCreateMachine(Reference<TCServerInfo> server);
|
||||||
|
|
||||||
|
// Group storage servers (process) based on their machineId in LocalityData
|
||||||
|
// All created machines are healthy
|
||||||
|
// Return The number of healthy servers we grouped into machines
|
||||||
|
int constructMachinesFromServers();
|
||||||
|
|
||||||
|
// Create machineTeamsToBuild number of machine teams
|
||||||
|
// No operation if machineTeamsToBuild is 0
|
||||||
|
// Note: The creation of machine teams should not depend on server teams:
|
||||||
|
// No matter how server teams will be created, we will create the same set of machine teams;
|
||||||
|
// We should never use server team number in building machine teams.
|
||||||
|
//
|
||||||
|
// Five steps to create each machine team, which are document in the function
|
||||||
|
// Reuse ReplicationPolicy selectReplicas func to select machine team
|
||||||
|
// return number of added machine teams
|
||||||
|
int addBestMachineTeams(int machineTeamsToBuild);
|
||||||
|
|
||||||
|
// Sanity check the property of teams in unit test
|
||||||
|
// Return true if all server teams belong to machine teams
|
||||||
|
bool sanityCheckTeams() const;
|
||||||
|
|
||||||
|
void disableBuildingTeams() { doBuildTeams = false; }
|
||||||
|
|
||||||
|
void setCheckTeamDelay() { this->checkTeamDelay = Void(); }
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Database cx;
|
Database cx;
|
||||||
|
|
||||||
|
@ -595,39 +627,6 @@ public:
|
||||||
|
|
||||||
void addTeam(std::set<UID> const& team, bool isInitialTeam) { addTeam(team.begin(), team.end(), isInitialTeam); }
|
void addTeam(std::set<UID> const& team, bool isInitialTeam) { addTeam(team.begin(), team.end(), isInitialTeam); }
|
||||||
|
|
||||||
// FIXME: Public for testing only
|
|
||||||
void disableBuildingTeams() { doBuildTeams = false; }
|
|
||||||
|
|
||||||
// FIXME: Public for testing only
|
|
||||||
void setCheckTeamDelay() { this->checkTeamDelay = Void(); }
|
|
||||||
|
|
||||||
// FIXME: Public for testing only
|
|
||||||
// Group storage servers (process) based on their machineId in LocalityData
|
|
||||||
// All created machines are healthy
|
|
||||||
// Return The number of healthy servers we grouped into machines
|
|
||||||
int constructMachinesFromServers();
|
|
||||||
|
|
||||||
// FIXME: Public for testing only
|
|
||||||
// To enable verbose debug info, set shouldPrint to true
|
|
||||||
void traceAllInfo(bool shouldPrint = false) const;
|
|
||||||
|
|
||||||
// FIXME: Public for testing only
|
|
||||||
// Create machineTeamsToBuild number of machine teams
|
|
||||||
// No operation if machineTeamsToBuild is 0
|
|
||||||
// Note: The creation of machine teams should not depend on server teams:
|
|
||||||
// No matter how server teams will be created, we will create the same set of machine teams;
|
|
||||||
// We should never use server team number in building machine teams.
|
|
||||||
//
|
|
||||||
// Five steps to create each machine team, which are document in the function
|
|
||||||
// Reuse ReplicationPolicy selectReplicas func to select machine team
|
|
||||||
// return number of added machine teams
|
|
||||||
int addBestMachineTeams(int machineTeamsToBuild);
|
|
||||||
|
|
||||||
// FIXME: Public for testing only
|
|
||||||
// Sanity check the property of teams in unit test
|
|
||||||
// Return true if all server teams belong to machine teams
|
|
||||||
bool sanityCheckTeams() const;
|
|
||||||
|
|
||||||
// Create server teams based on machine teams
|
// Create server teams based on machine teams
|
||||||
// Before the number of machine teams reaches the threshold, build a machine team for each server team
|
// Before the number of machine teams reaches the threshold, build a machine team for each server team
|
||||||
// When it reaches the threshold, first try to build a server team with existing machine teams; if failed,
|
// When it reaches the threshold, first try to build a server team with existing machine teams; if failed,
|
||||||
|
@ -642,11 +641,6 @@ public:
|
||||||
|
|
||||||
bool removeTeam(Reference<TCTeamInfo> team);
|
bool removeTeam(Reference<TCTeamInfo> team);
|
||||||
|
|
||||||
// FIXME: Public for testing only
|
|
||||||
// Check if the server belongs to a machine; if not, create the machine.
|
|
||||||
// Establish the two-direction link between server and machine
|
|
||||||
Reference<TCMachineInfo> checkAndCreateMachine(Reference<TCServerInfo> server);
|
|
||||||
|
|
||||||
void removeTSS(UID removedServer);
|
void removeTSS(UID removedServer);
|
||||||
|
|
||||||
void removeServer(UID removedServer);
|
void removeServer(UID removedServer);
|
||||||
|
|
|
@ -158,8 +158,9 @@ ACTOR Future<std::vector<WorkerInterface>> getCoordWorkers(Database cx,
|
||||||
if (!coordinators.present()) {
|
if (!coordinators.present()) {
|
||||||
throw operation_failed();
|
throw operation_failed();
|
||||||
}
|
}
|
||||||
std::vector<NetworkAddress> coordinatorsAddr =
|
state ClusterConnectionString ccs(coordinators.get().toString());
|
||||||
ClusterConnectionString(coordinators.get().toString()).coordinators();
|
wait(ccs.resolveHostnames());
|
||||||
|
std::vector<NetworkAddress> coordinatorsAddr = ccs.coordinators();
|
||||||
std::set<NetworkAddress> coordinatorsAddrSet;
|
std::set<NetworkAddress> coordinatorsAddrSet;
|
||||||
for (const auto& addr : coordinatorsAddr) {
|
for (const auto& addr : coordinatorsAddr) {
|
||||||
TraceEvent(SevDebug, "CoordinatorAddress").detail("Addr", addr);
|
TraceEvent(SevDebug, "CoordinatorAddress").detail("Addr", addr);
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,207 @@
|
||||||
|
/*
|
||||||
|
* Ratekeeper.h
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "fdbclient/DatabaseConfiguration.h"
|
||||||
|
#include "fdbclient/DatabaseContext.h"
|
||||||
|
#include "fdbclient/StorageServerInterface.h"
|
||||||
|
#include "fdbclient/TagThrottle.actor.h"
|
||||||
|
#include "fdbrpc/Smoother.h"
|
||||||
|
#include "fdbserver/Knobs.h"
|
||||||
|
#include "fdbserver/RatekeeperInterface.h"
|
||||||
|
#include "fdbserver/ServerDBInfo.h"
|
||||||
|
#include "fdbserver/TLogInterface.h"
|
||||||
|
|
||||||
|
enum limitReason_t {
|
||||||
|
unlimited, // TODO: rename to workload?
|
||||||
|
storage_server_write_queue_size, // 1
|
||||||
|
storage_server_write_bandwidth_mvcc,
|
||||||
|
storage_server_readable_behind,
|
||||||
|
log_server_mvcc_write_bandwidth,
|
||||||
|
log_server_write_queue, // 5
|
||||||
|
storage_server_min_free_space, // a storage server's normal limits are being reduced by low free space
|
||||||
|
storage_server_min_free_space_ratio, // a storage server's normal limits are being reduced by a low free space ratio
|
||||||
|
log_server_min_free_space,
|
||||||
|
log_server_min_free_space_ratio,
|
||||||
|
storage_server_durability_lag, // 10
|
||||||
|
storage_server_list_fetch_failed,
|
||||||
|
limitReason_t_end
|
||||||
|
};
|
||||||
|
|
||||||
|
struct StorageQueueInfo {
|
||||||
|
bool valid;
|
||||||
|
UID id;
|
||||||
|
LocalityData locality;
|
||||||
|
StorageQueuingMetricsReply lastReply;
|
||||||
|
StorageQueuingMetricsReply prevReply;
|
||||||
|
Smoother smoothDurableBytes, smoothInputBytes, verySmoothDurableBytes;
|
||||||
|
Smoother smoothDurableVersion, smoothLatestVersion;
|
||||||
|
Smoother smoothFreeSpace;
|
||||||
|
Smoother smoothTotalSpace;
|
||||||
|
limitReason_t limitReason;
|
||||||
|
|
||||||
|
Optional<TransactionTag> busiestReadTag, busiestWriteTag;
|
||||||
|
double busiestReadTagFractionalBusyness = 0, busiestWriteTagFractionalBusyness = 0;
|
||||||
|
double busiestReadTagRate = 0, busiestWriteTagRate = 0;
|
||||||
|
|
||||||
|
Reference<EventCacheHolder> busiestWriteTagEventHolder;
|
||||||
|
|
||||||
|
// refresh periodically
|
||||||
|
TransactionTagMap<TransactionCommitCostEstimation> tagCostEst;
|
||||||
|
uint64_t totalWriteCosts = 0;
|
||||||
|
int totalWriteOps = 0;
|
||||||
|
|
||||||
|
StorageQueueInfo(UID id, LocalityData locality)
|
||||||
|
: valid(false), id(id), locality(locality), smoothDurableBytes(SERVER_KNOBS->SMOOTHING_AMOUNT),
|
||||||
|
smoothInputBytes(SERVER_KNOBS->SMOOTHING_AMOUNT), verySmoothDurableBytes(SERVER_KNOBS->SLOW_SMOOTHING_AMOUNT),
|
||||||
|
smoothDurableVersion(SERVER_KNOBS->SMOOTHING_AMOUNT), smoothLatestVersion(SERVER_KNOBS->SMOOTHING_AMOUNT),
|
||||||
|
smoothFreeSpace(SERVER_KNOBS->SMOOTHING_AMOUNT), smoothTotalSpace(SERVER_KNOBS->SMOOTHING_AMOUNT),
|
||||||
|
limitReason(limitReason_t::unlimited),
|
||||||
|
busiestWriteTagEventHolder(makeReference<EventCacheHolder>(id.toString() + "/BusiestWriteTag")) {
|
||||||
|
// FIXME: this is a tacky workaround for a potential uninitialized use in trackStorageServerQueueInfo
|
||||||
|
lastReply.instanceID = -1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TLogQueueInfo {
|
||||||
|
bool valid;
|
||||||
|
UID id;
|
||||||
|
TLogQueuingMetricsReply lastReply;
|
||||||
|
TLogQueuingMetricsReply prevReply;
|
||||||
|
Smoother smoothDurableBytes, smoothInputBytes, verySmoothDurableBytes;
|
||||||
|
Smoother smoothFreeSpace;
|
||||||
|
Smoother smoothTotalSpace;
|
||||||
|
TLogQueueInfo(UID id)
|
||||||
|
: valid(false), id(id), smoothDurableBytes(SERVER_KNOBS->SMOOTHING_AMOUNT),
|
||||||
|
smoothInputBytes(SERVER_KNOBS->SMOOTHING_AMOUNT), verySmoothDurableBytes(SERVER_KNOBS->SLOW_SMOOTHING_AMOUNT),
|
||||||
|
smoothFreeSpace(SERVER_KNOBS->SMOOTHING_AMOUNT), smoothTotalSpace(SERVER_KNOBS->SMOOTHING_AMOUNT) {
|
||||||
|
// FIXME: this is a tacky workaround for a potential uninitialized use in trackTLogQueueInfo (copied from
|
||||||
|
// storageQueueInfO)
|
||||||
|
lastReply.instanceID = -1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct RatekeeperLimits {
|
||||||
|
double tpsLimit;
|
||||||
|
Int64MetricHandle tpsLimitMetric;
|
||||||
|
Int64MetricHandle reasonMetric;
|
||||||
|
|
||||||
|
int64_t storageTargetBytes;
|
||||||
|
int64_t storageSpringBytes;
|
||||||
|
int64_t logTargetBytes;
|
||||||
|
int64_t logSpringBytes;
|
||||||
|
double maxVersionDifference;
|
||||||
|
|
||||||
|
int64_t durabilityLagTargetVersions;
|
||||||
|
int64_t lastDurabilityLag;
|
||||||
|
double durabilityLagLimit;
|
||||||
|
|
||||||
|
TransactionPriority priority;
|
||||||
|
std::string context;
|
||||||
|
|
||||||
|
Reference<EventCacheHolder> rkUpdateEventCacheHolder;
|
||||||
|
|
||||||
|
RatekeeperLimits(TransactionPriority priority,
|
||||||
|
std::string context,
|
||||||
|
int64_t storageTargetBytes,
|
||||||
|
int64_t storageSpringBytes,
|
||||||
|
int64_t logTargetBytes,
|
||||||
|
int64_t logSpringBytes,
|
||||||
|
double maxVersionDifference,
|
||||||
|
int64_t durabilityLagTargetVersions)
|
||||||
|
: tpsLimit(std::numeric_limits<double>::infinity()), tpsLimitMetric(StringRef("Ratekeeper.TPSLimit" + context)),
|
||||||
|
reasonMetric(StringRef("Ratekeeper.Reason" + context)), storageTargetBytes(storageTargetBytes),
|
||||||
|
storageSpringBytes(storageSpringBytes), logTargetBytes(logTargetBytes), logSpringBytes(logSpringBytes),
|
||||||
|
maxVersionDifference(maxVersionDifference),
|
||||||
|
durabilityLagTargetVersions(
|
||||||
|
durabilityLagTargetVersions +
|
||||||
|
SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS), // The read transaction life versions are expected to not
|
||||||
|
// be durable on the storage servers
|
||||||
|
lastDurabilityLag(0), durabilityLagLimit(std::numeric_limits<double>::infinity()), priority(priority),
|
||||||
|
context(context), rkUpdateEventCacheHolder(makeReference<EventCacheHolder>("RkUpdate" + context)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
class Ratekeeper {
|
||||||
|
friend class RatekeeperImpl;
|
||||||
|
|
||||||
|
// Differentiate from GrvProxyInfo in DatabaseContext.h
|
||||||
|
struct GrvProxyInfo {
|
||||||
|
int64_t totalTransactions;
|
||||||
|
int64_t batchTransactions;
|
||||||
|
uint64_t lastThrottledTagChangeId;
|
||||||
|
|
||||||
|
double lastUpdateTime;
|
||||||
|
double lastTagPushTime;
|
||||||
|
|
||||||
|
GrvProxyInfo()
|
||||||
|
: totalTransactions(0), batchTransactions(0), lastThrottledTagChangeId(0), lastUpdateTime(0),
|
||||||
|
lastTagPushTime(0) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
UID id;
|
||||||
|
Database db;
|
||||||
|
|
||||||
|
Map<UID, StorageQueueInfo> storageQueueInfo;
|
||||||
|
Map<UID, TLogQueueInfo> tlogQueueInfo;
|
||||||
|
|
||||||
|
std::map<UID, Ratekeeper::GrvProxyInfo> grvProxyInfo;
|
||||||
|
Smoother smoothReleasedTransactions, smoothBatchReleasedTransactions, smoothTotalDurableBytes;
|
||||||
|
HealthMetrics healthMetrics;
|
||||||
|
DatabaseConfiguration configuration;
|
||||||
|
PromiseStream<Future<Void>> addActor;
|
||||||
|
|
||||||
|
Int64MetricHandle actualTpsMetric;
|
||||||
|
|
||||||
|
double lastWarning;
|
||||||
|
double lastSSListFetchedTimestamp;
|
||||||
|
|
||||||
|
std::unique_ptr<class TagThrottler> tagThrottler;
|
||||||
|
|
||||||
|
RatekeeperLimits normalLimits;
|
||||||
|
RatekeeperLimits batchLimits;
|
||||||
|
|
||||||
|
Deque<double> actualTpsHistory;
|
||||||
|
Optional<Key> remoteDC;
|
||||||
|
|
||||||
|
Future<Void> expiredTagThrottleCleanup;
|
||||||
|
|
||||||
|
double lastBusiestCommitTagPick;
|
||||||
|
|
||||||
|
Ratekeeper(UID id, Database db);
|
||||||
|
|
||||||
|
Future<Void> configurationMonitor();
|
||||||
|
void updateCommitCostEstimation(UIDTransactionTagMap<TransactionCommitCostEstimation> const& costEstimation);
|
||||||
|
void updateRate(RatekeeperLimits* limits);
|
||||||
|
Future<Void> refreshStorageServerCommitCost();
|
||||||
|
Future<Void> monitorServerListChange(PromiseStream<std::pair<UID, Optional<StorageServerInterface>>> serverChanges);
|
||||||
|
Future<Void> trackEachStorageServer(FutureStream<std::pair<UID, Optional<StorageServerInterface>>> serverChanges);
|
||||||
|
|
||||||
|
// SOMEDAY: template trackStorageServerQueueInfo and trackTLogQueueInfo into one function
|
||||||
|
Future<Void> trackStorageServerQueueInfo(StorageServerInterface);
|
||||||
|
Future<Void> trackTLogQueueInfo(TLogInterface);
|
||||||
|
|
||||||
|
void tryAutoThrottleTag(TransactionTag, double rate, double busyness, TagThrottledReason);
|
||||||
|
void tryAutoThrottleTag(StorageQueueInfo&, int64_t storageQueue, int64_t storageDurabilityLag);
|
||||||
|
Future<Void> monitorThrottlingChanges();
|
||||||
|
|
||||||
|
public:
|
||||||
|
static Future<Void> run(RatekeeperInterface rkInterf, Reference<AsyncVar<ServerDBInfo> const> dbInfo);
|
||||||
|
};
|
|
@ -1905,8 +1905,8 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
|
||||||
TEST(useIPv6); // Use IPv6
|
TEST(useIPv6); // Use IPv6
|
||||||
TEST(!useIPv6); // Use IPv4
|
TEST(!useIPv6); // Use IPv4
|
||||||
|
|
||||||
// TODO(renxuan): Use hostname 25% of the time, unless it is disabled
|
// Use hostname 25% of the time, unless it is disabled
|
||||||
bool useHostname = false; // !testConfig.disableHostname && deterministicRandom()->random01() < 0.25;
|
bool useHostname = !testConfig.disableHostname && deterministicRandom()->random01() < 0.25;
|
||||||
TEST(useHostname); // Use hostname
|
TEST(useHostname); // Use hostname
|
||||||
TEST(!useHostname); // Use IP address
|
TEST(!useHostname); // Use IP address
|
||||||
NetworkAddressFromHostname fromHostname =
|
NetworkAddressFromHostname fromHostname =
|
||||||
|
|
|
@ -0,0 +1,598 @@
|
||||||
|
/*
|
||||||
|
* TagThrottler.h
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "fdbserver/TagThrottler.h"
|
||||||
|
|
||||||
|
class RkTagThrottleCollection : NonCopyable {
|
||||||
|
struct RkTagData {
|
||||||
|
Smoother requestRate;
|
||||||
|
RkTagData() : requestRate(CLIENT_KNOBS->TAG_THROTTLE_SMOOTHING_WINDOW) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct RkTagThrottleData {
|
||||||
|
ClientTagThrottleLimits limits;
|
||||||
|
Smoother clientRate;
|
||||||
|
|
||||||
|
// Only used by auto-throttles
|
||||||
|
double created = now();
|
||||||
|
double lastUpdated = 0;
|
||||||
|
double lastReduced = now();
|
||||||
|
bool rateSet = false;
|
||||||
|
|
||||||
|
RkTagThrottleData() : clientRate(CLIENT_KNOBS->TAG_THROTTLE_SMOOTHING_WINDOW) {}
|
||||||
|
|
||||||
|
double getTargetRate(Optional<double> requestRate) {
|
||||||
|
if (limits.tpsRate == 0.0 || !requestRate.present() || requestRate.get() == 0.0 || !rateSet) {
|
||||||
|
return limits.tpsRate;
|
||||||
|
} else {
|
||||||
|
return std::min(limits.tpsRate, (limits.tpsRate / requestRate.get()) * clientRate.smoothTotal());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Optional<double> updateAndGetClientRate(Optional<double> requestRate) {
|
||||||
|
if (limits.expiration > now()) {
|
||||||
|
double targetRate = getTargetRate(requestRate);
|
||||||
|
if (targetRate == std::numeric_limits<double>::max()) {
|
||||||
|
rateSet = false;
|
||||||
|
return targetRate;
|
||||||
|
}
|
||||||
|
if (!rateSet) {
|
||||||
|
rateSet = true;
|
||||||
|
clientRate.reset(targetRate);
|
||||||
|
} else {
|
||||||
|
clientRate.setTotal(targetRate);
|
||||||
|
}
|
||||||
|
|
||||||
|
double rate = clientRate.smoothTotal();
|
||||||
|
ASSERT(rate >= 0);
|
||||||
|
return rate;
|
||||||
|
} else {
|
||||||
|
TEST(true); // Get throttle rate for expired throttle
|
||||||
|
rateSet = false;
|
||||||
|
return Optional<double>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
void initializeTag(TransactionTag const& tag) { tagData.try_emplace(tag); }
|
||||||
|
|
||||||
|
public:
|
||||||
|
RkTagThrottleCollection() {}
|
||||||
|
|
||||||
|
RkTagThrottleCollection(RkTagThrottleCollection&& other) {
|
||||||
|
autoThrottledTags = std::move(other.autoThrottledTags);
|
||||||
|
manualThrottledTags = std::move(other.manualThrottledTags);
|
||||||
|
tagData = std::move(other.tagData);
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator=(RkTagThrottleCollection&& other) {
|
||||||
|
autoThrottledTags = std::move(other.autoThrottledTags);
|
||||||
|
manualThrottledTags = std::move(other.manualThrottledTags);
|
||||||
|
tagData = std::move(other.tagData);
|
||||||
|
}
|
||||||
|
|
||||||
|
double computeTargetTpsRate(double currentBusyness, double targetBusyness, double requestRate) {
|
||||||
|
ASSERT(currentBusyness > 0);
|
||||||
|
|
||||||
|
if (targetBusyness < 1) {
|
||||||
|
double targetFraction = targetBusyness * (1 - currentBusyness) / ((1 - targetBusyness) * currentBusyness);
|
||||||
|
return requestRate * targetFraction;
|
||||||
|
} else {
|
||||||
|
return std::numeric_limits<double>::max();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the TPS rate if the throttle is updated, otherwise returns an empty optional
|
||||||
|
Optional<double> autoThrottleTag(UID id,
|
||||||
|
TransactionTag const& tag,
|
||||||
|
double fractionalBusyness,
|
||||||
|
Optional<double> tpsRate = Optional<double>(),
|
||||||
|
Optional<double> expiration = Optional<double>()) {
|
||||||
|
ASSERT(!tpsRate.present() || tpsRate.get() >= 0);
|
||||||
|
ASSERT(!expiration.present() || expiration.get() > now());
|
||||||
|
|
||||||
|
auto itr = autoThrottledTags.find(tag);
|
||||||
|
bool present = (itr != autoThrottledTags.end());
|
||||||
|
if (!present) {
|
||||||
|
if (autoThrottledTags.size() >= SERVER_KNOBS->MAX_AUTO_THROTTLED_TRANSACTION_TAGS) {
|
||||||
|
TEST(true); // Reached auto-throttle limit
|
||||||
|
return Optional<double>();
|
||||||
|
}
|
||||||
|
|
||||||
|
itr = autoThrottledTags.try_emplace(tag).first;
|
||||||
|
initializeTag(tag);
|
||||||
|
} else if (itr->second.limits.expiration <= now()) {
|
||||||
|
TEST(true); // Re-throttling expired tag that hasn't been cleaned up
|
||||||
|
present = false;
|
||||||
|
itr->second = RkTagThrottleData();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto& throttle = itr->second;
|
||||||
|
|
||||||
|
if (!tpsRate.present()) {
|
||||||
|
if (now() <= throttle.created + SERVER_KNOBS->AUTO_TAG_THROTTLE_START_AGGREGATION_TIME) {
|
||||||
|
tpsRate = std::numeric_limits<double>::max();
|
||||||
|
if (present) {
|
||||||
|
return Optional<double>();
|
||||||
|
}
|
||||||
|
} else if (now() <= throttle.lastUpdated + SERVER_KNOBS->AUTO_TAG_THROTTLE_UPDATE_FREQUENCY) {
|
||||||
|
TEST(true); // Tag auto-throttled too quickly
|
||||||
|
return Optional<double>();
|
||||||
|
} else {
|
||||||
|
tpsRate = computeTargetTpsRate(fractionalBusyness,
|
||||||
|
SERVER_KNOBS->AUTO_THROTTLE_TARGET_TAG_BUSYNESS,
|
||||||
|
tagData[tag].requestRate.smoothRate());
|
||||||
|
|
||||||
|
if (throttle.limits.expiration > now() && tpsRate.get() >= throttle.limits.tpsRate) {
|
||||||
|
TEST(true); // Tag auto-throttle rate increase attempt while active
|
||||||
|
return Optional<double>();
|
||||||
|
}
|
||||||
|
|
||||||
|
throttle.lastUpdated = now();
|
||||||
|
if (tpsRate.get() < throttle.limits.tpsRate) {
|
||||||
|
throttle.lastReduced = now();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!expiration.present()) {
|
||||||
|
expiration = now() + SERVER_KNOBS->AUTO_TAG_THROTTLE_DURATION;
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT(tpsRate.present() && tpsRate.get() >= 0);
|
||||||
|
|
||||||
|
throttle.limits.tpsRate = tpsRate.get();
|
||||||
|
throttle.limits.expiration = expiration.get();
|
||||||
|
|
||||||
|
Optional<double> clientRate = throttle.updateAndGetClientRate(getRequestRate(tag));
|
||||||
|
|
||||||
|
TraceEvent("RkSetAutoThrottle", id)
|
||||||
|
.detail("Tag", tag)
|
||||||
|
.detail("TargetRate", tpsRate.get())
|
||||||
|
.detail("Expiration", expiration.get() - now())
|
||||||
|
.detail("ClientRate", clientRate)
|
||||||
|
.detail("Created", now() - throttle.created)
|
||||||
|
.detail("LastUpdate", now() - throttle.lastUpdated)
|
||||||
|
.detail("LastReduced", now() - throttle.lastReduced);
|
||||||
|
|
||||||
|
if (tpsRate.get() != std::numeric_limits<double>::max()) {
|
||||||
|
return tpsRate.get();
|
||||||
|
} else {
|
||||||
|
return Optional<double>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void manualThrottleTag(UID id,
|
||||||
|
TransactionTag const& tag,
|
||||||
|
TransactionPriority priority,
|
||||||
|
double tpsRate,
|
||||||
|
double expiration,
|
||||||
|
Optional<ClientTagThrottleLimits> const& oldLimits) {
|
||||||
|
ASSERT(tpsRate >= 0);
|
||||||
|
ASSERT(expiration > now());
|
||||||
|
|
||||||
|
auto& priorityThrottleMap = manualThrottledTags[tag];
|
||||||
|
auto result = priorityThrottleMap.try_emplace(priority);
|
||||||
|
initializeTag(tag);
|
||||||
|
ASSERT(result.second); // Updating to the map is done by copying the whole map
|
||||||
|
|
||||||
|
result.first->second.limits.tpsRate = tpsRate;
|
||||||
|
result.first->second.limits.expiration = expiration;
|
||||||
|
|
||||||
|
if (!oldLimits.present()) {
|
||||||
|
TEST(true); // Transaction tag manually throttled
|
||||||
|
TraceEvent("RatekeeperAddingManualThrottle", id)
|
||||||
|
.detail("Tag", tag)
|
||||||
|
.detail("Rate", tpsRate)
|
||||||
|
.detail("Priority", transactionPriorityToString(priority))
|
||||||
|
.detail("SecondsToExpiration", expiration - now());
|
||||||
|
} else if (oldLimits.get().tpsRate != tpsRate || oldLimits.get().expiration != expiration) {
|
||||||
|
TEST(true); // Manual transaction tag throttle updated
|
||||||
|
TraceEvent("RatekeeperUpdatingManualThrottle", id)
|
||||||
|
.detail("Tag", tag)
|
||||||
|
.detail("Rate", tpsRate)
|
||||||
|
.detail("Priority", transactionPriorityToString(priority))
|
||||||
|
.detail("SecondsToExpiration", expiration - now());
|
||||||
|
}
|
||||||
|
|
||||||
|
Optional<double> clientRate = result.first->second.updateAndGetClientRate(getRequestRate(tag));
|
||||||
|
ASSERT(clientRate.present());
|
||||||
|
}
|
||||||
|
|
||||||
|
Optional<ClientTagThrottleLimits> getManualTagThrottleLimits(TransactionTag const& tag,
|
||||||
|
TransactionPriority priority) {
|
||||||
|
auto itr = manualThrottledTags.find(tag);
|
||||||
|
if (itr != manualThrottledTags.end()) {
|
||||||
|
auto priorityItr = itr->second.find(priority);
|
||||||
|
if (priorityItr != itr->second.end()) {
|
||||||
|
return priorityItr->second.limits;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Optional<ClientTagThrottleLimits>();
|
||||||
|
}
|
||||||
|
|
||||||
|
PrioritizedTransactionTagMap<ClientTagThrottleLimits> getClientRates(bool autoThrottlingEnabled) {
|
||||||
|
PrioritizedTransactionTagMap<ClientTagThrottleLimits> clientRates;
|
||||||
|
|
||||||
|
for (auto tagItr = tagData.begin(); tagItr != tagData.end();) {
|
||||||
|
bool tagPresent = false;
|
||||||
|
|
||||||
|
double requestRate = tagItr->second.requestRate.smoothRate();
|
||||||
|
auto manualItr = manualThrottledTags.find(tagItr->first);
|
||||||
|
if (manualItr != manualThrottledTags.end()) {
|
||||||
|
Optional<ClientTagThrottleLimits> manualClientRate;
|
||||||
|
for (auto priority = allTransactionPriorities.rbegin(); !(priority == allTransactionPriorities.rend());
|
||||||
|
++priority) {
|
||||||
|
auto priorityItr = manualItr->second.find(*priority);
|
||||||
|
if (priorityItr != manualItr->second.end()) {
|
||||||
|
Optional<double> priorityClientRate = priorityItr->second.updateAndGetClientRate(requestRate);
|
||||||
|
if (!priorityClientRate.present()) {
|
||||||
|
TEST(true); // Manual priority throttle expired
|
||||||
|
priorityItr = manualItr->second.erase(priorityItr);
|
||||||
|
} else {
|
||||||
|
if (!manualClientRate.present() ||
|
||||||
|
manualClientRate.get().tpsRate > priorityClientRate.get()) {
|
||||||
|
manualClientRate = ClientTagThrottleLimits(priorityClientRate.get(),
|
||||||
|
priorityItr->second.limits.expiration);
|
||||||
|
} else {
|
||||||
|
TEST(true); // Manual throttle overriden by higher priority
|
||||||
|
}
|
||||||
|
|
||||||
|
++priorityItr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (manualClientRate.present()) {
|
||||||
|
tagPresent = true;
|
||||||
|
TEST(true); // Using manual throttle
|
||||||
|
clientRates[*priority][tagItr->first] = manualClientRate.get();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (manualItr->second.empty()) {
|
||||||
|
TEST(true); // All manual throttles expired
|
||||||
|
manualThrottledTags.erase(manualItr);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto autoItr = autoThrottledTags.find(tagItr->first);
|
||||||
|
if (autoItr != autoThrottledTags.end()) {
|
||||||
|
Optional<double> autoClientRate = autoItr->second.updateAndGetClientRate(requestRate);
|
||||||
|
if (autoClientRate.present()) {
|
||||||
|
double adjustedRate = autoClientRate.get();
|
||||||
|
double rampStartTime = autoItr->second.lastReduced + SERVER_KNOBS->AUTO_TAG_THROTTLE_DURATION -
|
||||||
|
SERVER_KNOBS->AUTO_TAG_THROTTLE_RAMP_UP_TIME;
|
||||||
|
if (now() >= rampStartTime && adjustedRate != std::numeric_limits<double>::max()) {
|
||||||
|
TEST(true); // Tag auto-throttle ramping up
|
||||||
|
|
||||||
|
double targetBusyness = SERVER_KNOBS->AUTO_THROTTLE_TARGET_TAG_BUSYNESS;
|
||||||
|
if (targetBusyness == 0) {
|
||||||
|
targetBusyness = 0.01;
|
||||||
|
}
|
||||||
|
|
||||||
|
double rampLocation = (now() - rampStartTime) / SERVER_KNOBS->AUTO_TAG_THROTTLE_RAMP_UP_TIME;
|
||||||
|
adjustedRate =
|
||||||
|
computeTargetTpsRate(targetBusyness, pow(targetBusyness, 1 - rampLocation), adjustedRate);
|
||||||
|
}
|
||||||
|
|
||||||
|
tagPresent = true;
|
||||||
|
if (autoThrottlingEnabled) {
|
||||||
|
auto result = clientRates[TransactionPriority::DEFAULT].try_emplace(
|
||||||
|
tagItr->first, adjustedRate, autoItr->second.limits.expiration);
|
||||||
|
if (!result.second && result.first->second.tpsRate > adjustedRate) {
|
||||||
|
result.first->second =
|
||||||
|
ClientTagThrottleLimits(adjustedRate, autoItr->second.limits.expiration);
|
||||||
|
} else {
|
||||||
|
TEST(true); // Auto throttle overriden by manual throttle
|
||||||
|
}
|
||||||
|
clientRates[TransactionPriority::BATCH][tagItr->first] =
|
||||||
|
ClientTagThrottleLimits(0, autoItr->second.limits.expiration);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ASSERT(autoItr->second.limits.expiration <= now());
|
||||||
|
TEST(true); // Auto throttle expired
|
||||||
|
if (BUGGIFY) { // Temporarily extend the window between expiration and cleanup
|
||||||
|
tagPresent = true;
|
||||||
|
} else {
|
||||||
|
autoThrottledTags.erase(autoItr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!tagPresent) {
|
||||||
|
TEST(true); // All tag throttles expired
|
||||||
|
tagItr = tagData.erase(tagItr);
|
||||||
|
} else {
|
||||||
|
++tagItr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return clientRates;
|
||||||
|
}
|
||||||
|
|
||||||
|
void addRequests(TransactionTag const& tag, int requests) {
|
||||||
|
if (requests > 0) {
|
||||||
|
TEST(true); // Requests reported for throttled tag
|
||||||
|
|
||||||
|
auto tagItr = tagData.try_emplace(tag);
|
||||||
|
tagItr.first->second.requestRate.addDelta(requests);
|
||||||
|
|
||||||
|
double requestRate = tagItr.first->second.requestRate.smoothRate();
|
||||||
|
|
||||||
|
auto autoItr = autoThrottledTags.find(tag);
|
||||||
|
if (autoItr != autoThrottledTags.end()) {
|
||||||
|
autoItr->second.updateAndGetClientRate(requestRate);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto manualItr = manualThrottledTags.find(tag);
|
||||||
|
if (manualItr != manualThrottledTags.end()) {
|
||||||
|
for (auto priorityItr = manualItr->second.begin(); priorityItr != manualItr->second.end();
|
||||||
|
++priorityItr) {
|
||||||
|
priorityItr->second.updateAndGetClientRate(requestRate);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Optional<double> getRequestRate(TransactionTag const& tag) {
|
||||||
|
auto itr = tagData.find(tag);
|
||||||
|
if (itr != tagData.end()) {
|
||||||
|
return itr->second.requestRate.smoothRate();
|
||||||
|
}
|
||||||
|
return Optional<double>();
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t autoThrottleCount() const { return autoThrottledTags.size(); }
|
||||||
|
|
||||||
|
int64_t manualThrottleCount() const {
|
||||||
|
int64_t count = 0;
|
||||||
|
for (auto itr = manualThrottledTags.begin(); itr != manualThrottledTags.end(); ++itr) {
|
||||||
|
count += itr->second.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
TransactionTagMap<RkTagThrottleData> autoThrottledTags;
|
||||||
|
TransactionTagMap<std::map<TransactionPriority, RkTagThrottleData>> manualThrottledTags;
|
||||||
|
TransactionTagMap<RkTagData> tagData;
|
||||||
|
uint32_t busyReadTagCount = 0, busyWriteTagCount = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
class TagThrottlerImpl {
|
||||||
|
Database db;
|
||||||
|
UID id;
|
||||||
|
RkTagThrottleCollection throttledTags;
|
||||||
|
uint64_t throttledTagChangeId{ 0 };
|
||||||
|
bool autoThrottlingEnabled{ false };
|
||||||
|
|
||||||
|
ACTOR static Future<Void> monitorThrottlingChanges(TagThrottlerImpl* self) {
|
||||||
|
state bool committed = false;
|
||||||
|
loop {
|
||||||
|
state ReadYourWritesTransaction tr(self->db);
|
||||||
|
|
||||||
|
loop {
|
||||||
|
try {
|
||||||
|
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||||
|
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||||
|
|
||||||
|
state Future<RangeResult> throttledTagKeys = tr.getRange(tagThrottleKeys, CLIENT_KNOBS->TOO_MANY);
|
||||||
|
state Future<Optional<Value>> autoThrottlingEnabled = tr.get(tagThrottleAutoEnabledKey);
|
||||||
|
|
||||||
|
if (!committed) {
|
||||||
|
BinaryWriter limitWriter(Unversioned());
|
||||||
|
limitWriter << SERVER_KNOBS->MAX_MANUAL_THROTTLED_TRANSACTION_TAGS;
|
||||||
|
tr.set(tagThrottleLimitKey, limitWriter.toValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
wait(success(throttledTagKeys) && success(autoThrottlingEnabled));
|
||||||
|
|
||||||
|
if (autoThrottlingEnabled.get().present() &&
|
||||||
|
autoThrottlingEnabled.get().get() == LiteralStringRef("0")) {
|
||||||
|
TEST(true); // Auto-throttling disabled
|
||||||
|
if (self->autoThrottlingEnabled) {
|
||||||
|
TraceEvent("AutoTagThrottlingDisabled", self->id).log();
|
||||||
|
}
|
||||||
|
self->autoThrottlingEnabled = false;
|
||||||
|
} else if (autoThrottlingEnabled.get().present() &&
|
||||||
|
autoThrottlingEnabled.get().get() == LiteralStringRef("1")) {
|
||||||
|
TEST(true); // Auto-throttling enabled
|
||||||
|
if (!self->autoThrottlingEnabled) {
|
||||||
|
TraceEvent("AutoTagThrottlingEnabled", self->id).log();
|
||||||
|
}
|
||||||
|
self->autoThrottlingEnabled = true;
|
||||||
|
} else {
|
||||||
|
TEST(true); // Auto-throttling unspecified
|
||||||
|
if (autoThrottlingEnabled.get().present()) {
|
||||||
|
TraceEvent(SevWarnAlways, "InvalidAutoTagThrottlingValue", self->id)
|
||||||
|
.detail("Value", autoThrottlingEnabled.get().get());
|
||||||
|
}
|
||||||
|
self->autoThrottlingEnabled = SERVER_KNOBS->AUTO_TAG_THROTTLING_ENABLED;
|
||||||
|
if (!committed)
|
||||||
|
tr.set(tagThrottleAutoEnabledKey,
|
||||||
|
LiteralStringRef(self->autoThrottlingEnabled ? "1" : "0"));
|
||||||
|
}
|
||||||
|
|
||||||
|
RkTagThrottleCollection updatedTagThrottles;
|
||||||
|
|
||||||
|
TraceEvent("RatekeeperReadThrottledTags", self->id)
|
||||||
|
.detail("NumThrottledTags", throttledTagKeys.get().size());
|
||||||
|
for (auto entry : throttledTagKeys.get()) {
|
||||||
|
TagThrottleKey tagKey = TagThrottleKey::fromKey(entry.key);
|
||||||
|
TagThrottleValue tagValue = TagThrottleValue::fromValue(entry.value);
|
||||||
|
|
||||||
|
ASSERT(tagKey.tags.size() == 1); // Currently, only 1 tag per throttle is supported
|
||||||
|
|
||||||
|
if (tagValue.expirationTime == 0 ||
|
||||||
|
tagValue.expirationTime > now() + tagValue.initialDuration) {
|
||||||
|
TEST(true); // Converting tag throttle duration to absolute time
|
||||||
|
tagValue.expirationTime = now() + tagValue.initialDuration;
|
||||||
|
BinaryWriter wr(IncludeVersion(ProtocolVersion::withTagThrottleValueReason()));
|
||||||
|
wr << tagValue;
|
||||||
|
state Value value = wr.toValue();
|
||||||
|
|
||||||
|
tr.set(entry.key, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tagValue.expirationTime > now()) {
|
||||||
|
TransactionTag tag = *tagKey.tags.begin();
|
||||||
|
Optional<ClientTagThrottleLimits> oldLimits =
|
||||||
|
self->throttledTags.getManualTagThrottleLimits(tag, tagKey.priority);
|
||||||
|
|
||||||
|
if (tagKey.throttleType == TagThrottleType::AUTO) {
|
||||||
|
updatedTagThrottles.autoThrottleTag(
|
||||||
|
self->id, tag, 0, tagValue.tpsRate, tagValue.expirationTime);
|
||||||
|
if (tagValue.reason == TagThrottledReason::BUSY_READ) {
|
||||||
|
updatedTagThrottles.busyReadTagCount++;
|
||||||
|
} else if (tagValue.reason == TagThrottledReason::BUSY_WRITE) {
|
||||||
|
updatedTagThrottles.busyWriteTagCount++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
updatedTagThrottles.manualThrottleTag(self->id,
|
||||||
|
tag,
|
||||||
|
tagKey.priority,
|
||||||
|
tagValue.tpsRate,
|
||||||
|
tagValue.expirationTime,
|
||||||
|
oldLimits);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self->throttledTags = std::move(updatedTagThrottles);
|
||||||
|
++self->throttledTagChangeId;
|
||||||
|
|
||||||
|
state Future<Void> watchFuture = tr.watch(tagThrottleSignalKey);
|
||||||
|
wait(tr.commit());
|
||||||
|
committed = true;
|
||||||
|
|
||||||
|
wait(watchFuture);
|
||||||
|
TraceEvent("RatekeeperThrottleSignaled", self->id).log();
|
||||||
|
TEST(true); // Tag throttle changes detected
|
||||||
|
break;
|
||||||
|
} catch (Error& e) {
|
||||||
|
TraceEvent("RatekeeperMonitorThrottlingChangesError", self->id).error(e);
|
||||||
|
wait(tr.onError(e));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Optional<double> autoThrottleTag(UID id, TransactionTag tag, double busyness) {
|
||||||
|
return throttledTags.autoThrottleTag(id, tag, busyness);
|
||||||
|
}
|
||||||
|
|
||||||
|
Future<Void> tryAutoThrottleTag(TransactionTag tag, double rate, double busyness, TagThrottledReason reason) {
|
||||||
|
// NOTE: before the comparison with MIN_TAG_COST, the busiest tag rate also compares with MIN_TAG_PAGES_RATE
|
||||||
|
// currently MIN_TAG_PAGES_RATE > MIN_TAG_COST in our default knobs.
|
||||||
|
if (busyness > SERVER_KNOBS->AUTO_THROTTLE_TARGET_TAG_BUSYNESS && rate > SERVER_KNOBS->MIN_TAG_COST) {
|
||||||
|
TEST(true); // Transaction tag auto-throttled
|
||||||
|
Optional<double> clientRate = autoThrottleTag(id, tag, busyness);
|
||||||
|
if (clientRate.present()) {
|
||||||
|
TagSet tags;
|
||||||
|
tags.addTag(tag);
|
||||||
|
|
||||||
|
Reference<DatabaseContext> dbRef = Reference<DatabaseContext>::addRef(db.getPtr());
|
||||||
|
return ThrottleApi::throttleTags(dbRef,
|
||||||
|
tags,
|
||||||
|
clientRate.get(),
|
||||||
|
SERVER_KNOBS->AUTO_TAG_THROTTLE_DURATION,
|
||||||
|
TagThrottleType::AUTO,
|
||||||
|
TransactionPriority::DEFAULT,
|
||||||
|
now() + SERVER_KNOBS->AUTO_TAG_THROTTLE_DURATION,
|
||||||
|
reason);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
TagThrottlerImpl(Database db, UID id) : db(db), id(id) {}
|
||||||
|
Future<Void> monitorThrottlingChanges() { return monitorThrottlingChanges(this); }
|
||||||
|
|
||||||
|
void addRequests(TransactionTag tag, int count) { throttledTags.addRequests(tag, count); }
|
||||||
|
uint64_t getThrottledTagChangeId() const { return throttledTagChangeId; }
|
||||||
|
PrioritizedTransactionTagMap<ClientTagThrottleLimits> getClientRates() {
|
||||||
|
return throttledTags.getClientRates(autoThrottlingEnabled);
|
||||||
|
}
|
||||||
|
int64_t autoThrottleCount() const { return throttledTags.autoThrottleCount(); }
|
||||||
|
uint32_t busyReadTagCount() const { return throttledTags.busyReadTagCount; }
|
||||||
|
uint32_t busyWriteTagCount() const { return throttledTags.busyWriteTagCount; }
|
||||||
|
int64_t manualThrottleCount() const { return throttledTags.manualThrottleCount(); }
|
||||||
|
bool isAutoThrottlingEnabled() const { return autoThrottlingEnabled; }
|
||||||
|
|
||||||
|
Future<Void> tryAutoThrottleTag(StorageQueueInfo& ss, int64_t storageQueue, int64_t storageDurabilityLag) {
|
||||||
|
// NOTE: we just keep it simple and don't differentiate write-saturation and read-saturation at the moment. In
|
||||||
|
// most of situation, this works. More indicators besides queue size and durability lag could be investigated in
|
||||||
|
// the future
|
||||||
|
if (storageQueue > SERVER_KNOBS->AUTO_TAG_THROTTLE_STORAGE_QUEUE_BYTES ||
|
||||||
|
storageDurabilityLag > SERVER_KNOBS->AUTO_TAG_THROTTLE_DURABILITY_LAG_VERSIONS) {
|
||||||
|
if (ss.busiestWriteTag.present()) {
|
||||||
|
return tryAutoThrottleTag(ss.busiestWriteTag.get(),
|
||||||
|
ss.busiestWriteTagRate,
|
||||||
|
ss.busiestWriteTagFractionalBusyness,
|
||||||
|
TagThrottledReason::BUSY_WRITE);
|
||||||
|
}
|
||||||
|
if (ss.busiestReadTag.present()) {
|
||||||
|
return tryAutoThrottleTag(ss.busiestReadTag.get(),
|
||||||
|
ss.busiestReadTagRate,
|
||||||
|
ss.busiestReadTagFractionalBusyness,
|
||||||
|
TagThrottledReason::BUSY_READ);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
||||||
|
}; // class TagThrottlerImpl
|
||||||
|
|
||||||
|
TagThrottler::TagThrottler(Database db, UID id) : impl(PImpl<TagThrottlerImpl>::create(db, id)) {}
|
||||||
|
TagThrottler::~TagThrottler() = default;
|
||||||
|
Future<Void> TagThrottler::monitorThrottlingChanges() {
|
||||||
|
return impl->monitorThrottlingChanges();
|
||||||
|
}
|
||||||
|
void TagThrottler::addRequests(TransactionTag tag, int count) {
|
||||||
|
impl->addRequests(tag, count);
|
||||||
|
}
|
||||||
|
uint64_t TagThrottler::getThrottledTagChangeId() const {
|
||||||
|
return impl->getThrottledTagChangeId();
|
||||||
|
}
|
||||||
|
PrioritizedTransactionTagMap<ClientTagThrottleLimits> TagThrottler::getClientRates() {
|
||||||
|
return impl->getClientRates();
|
||||||
|
}
|
||||||
|
int64_t TagThrottler::autoThrottleCount() const {
|
||||||
|
return impl->autoThrottleCount();
|
||||||
|
}
|
||||||
|
uint32_t TagThrottler::busyReadTagCount() const {
|
||||||
|
return impl->busyReadTagCount();
|
||||||
|
}
|
||||||
|
uint32_t TagThrottler::busyWriteTagCount() const {
|
||||||
|
return impl->busyWriteTagCount();
|
||||||
|
}
|
||||||
|
int64_t TagThrottler::manualThrottleCount() const {
|
||||||
|
return impl->manualThrottleCount();
|
||||||
|
}
|
||||||
|
bool TagThrottler::isAutoThrottlingEnabled() const {
|
||||||
|
return impl->isAutoThrottlingEnabled();
|
||||||
|
}
|
||||||
|
Future<Void> TagThrottler::tryAutoThrottleTag(StorageQueueInfo& ss,
|
||||||
|
int64_t storageQueue,
|
||||||
|
int64_t storageDurabilityLag) {
|
||||||
|
return impl->tryAutoThrottleTag(ss, storageQueue, storageDurabilityLag);
|
||||||
|
}
|
|
@ -0,0 +1,42 @@
|
||||||
|
/*
|
||||||
|
* TagThrottler.h
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "fdbclient/PImpl.h"
|
||||||
|
#include "fdbserver/Ratekeeper.h"
|
||||||
|
|
||||||
|
class TagThrottler {
|
||||||
|
PImpl<class TagThrottlerImpl> impl;
|
||||||
|
|
||||||
|
public:
|
||||||
|
TagThrottler(Database db, UID id);
|
||||||
|
~TagThrottler();
|
||||||
|
Future<Void> monitorThrottlingChanges();
|
||||||
|
void addRequests(TransactionTag tag, int count);
|
||||||
|
uint64_t getThrottledTagChangeId() const;
|
||||||
|
PrioritizedTransactionTagMap<ClientTagThrottleLimits> getClientRates();
|
||||||
|
int64_t autoThrottleCount() const;
|
||||||
|
uint32_t busyReadTagCount() const;
|
||||||
|
uint32_t busyWriteTagCount() const;
|
||||||
|
int64_t manualThrottleCount() const;
|
||||||
|
bool isAutoThrottlingEnabled() const;
|
||||||
|
Future<Void> tryAutoThrottleTag(StorageQueueInfo&, int64_t storageQueue, int64_t storageDurabilityLag);
|
||||||
|
};
|
|
@ -833,6 +833,7 @@ std::pair<NetworkAddressList, NetworkAddressList> buildNetworkAddresses(
|
||||||
NetworkAddressList publicNetworkAddresses;
|
NetworkAddressList publicNetworkAddresses;
|
||||||
NetworkAddressList listenNetworkAddresses;
|
NetworkAddressList listenNetworkAddresses;
|
||||||
|
|
||||||
|
connectionRecord.resolveHostnamesBlocking();
|
||||||
auto& coordinators = connectionRecord.getConnectionString().coordinators();
|
auto& coordinators = connectionRecord.getConnectionString().coordinators();
|
||||||
ASSERT(coordinators.size() > 0);
|
ASSERT(coordinators.size() > 0);
|
||||||
|
|
||||||
|
@ -1022,6 +1023,29 @@ struct CLIOptions {
|
||||||
return opts;
|
return opts;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Determine publicAddresses and listenAddresses by calling buildNetworkAddresses().
|
||||||
|
void buildNetwork(const char* name) {
|
||||||
|
try {
|
||||||
|
if (!publicAddressStrs.empty()) {
|
||||||
|
std::tie(publicAddresses, listenAddresses) =
|
||||||
|
buildNetworkAddresses(*connectionFile, publicAddressStrs, listenAddressStrs);
|
||||||
|
}
|
||||||
|
} catch (Error&) {
|
||||||
|
printHelpTeaser(name);
|
||||||
|
flushAndExit(FDB_EXIT_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (role == ServerRole::ConsistencyCheck) {
|
||||||
|
if (!publicAddressStrs.empty()) {
|
||||||
|
fprintf(stderr, "ERROR: Public address cannot be specified for consistency check processes\n");
|
||||||
|
printHelpTeaser(name);
|
||||||
|
flushAndExit(FDB_EXIT_ERROR);
|
||||||
|
}
|
||||||
|
auto publicIP = determinePublicIPAutomatically(connectionFile->getConnectionString());
|
||||||
|
publicAddresses.address = NetworkAddress(publicIP, ::getpid());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
CLIOptions() = default;
|
CLIOptions() = default;
|
||||||
|
|
||||||
|
@ -1594,26 +1618,6 @@ private:
|
||||||
// failmon?
|
// failmon?
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
|
||||||
if (!publicAddressStrs.empty()) {
|
|
||||||
std::tie(publicAddresses, listenAddresses) =
|
|
||||||
buildNetworkAddresses(*connectionFile, publicAddressStrs, listenAddressStrs);
|
|
||||||
}
|
|
||||||
} catch (Error&) {
|
|
||||||
printHelpTeaser(argv[0]);
|
|
||||||
flushAndExit(FDB_EXIT_ERROR);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (role == ServerRole::ConsistencyCheck) {
|
|
||||||
if (!publicAddressStrs.empty()) {
|
|
||||||
fprintf(stderr, "ERROR: Public address cannot be specified for consistency check processes\n");
|
|
||||||
printHelpTeaser(argv[0]);
|
|
||||||
flushAndExit(FDB_EXIT_ERROR);
|
|
||||||
}
|
|
||||||
auto publicIP = determinePublicIPAutomatically(connectionFile->getConnectionString());
|
|
||||||
publicAddresses.address = NetworkAddress(publicIP, ::getpid());
|
|
||||||
}
|
|
||||||
|
|
||||||
if (role == ServerRole::Simulation) {
|
if (role == ServerRole::Simulation) {
|
||||||
Optional<bool> buggifyOverride = checkBuggifyOverride(testFile);
|
Optional<bool> buggifyOverride = checkBuggifyOverride(testFile);
|
||||||
if (buggifyOverride.present())
|
if (buggifyOverride.present())
|
||||||
|
@ -1692,7 +1696,7 @@ int main(int argc, char* argv[]) {
|
||||||
//_set_output_format(_TWO_DIGIT_EXPONENT);
|
//_set_output_format(_TWO_DIGIT_EXPONENT);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
const auto opts = CLIOptions::parseArgs(argc, argv);
|
auto opts = CLIOptions::parseArgs(argc, argv);
|
||||||
const auto role = opts.role;
|
const auto role = opts.role;
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
|
@ -1787,6 +1791,7 @@ int main(int argc, char* argv[]) {
|
||||||
|
|
||||||
if (role == ServerRole::Simulation || role == ServerRole::CreateTemplateDatabase) {
|
if (role == ServerRole::Simulation || role == ServerRole::CreateTemplateDatabase) {
|
||||||
// startOldSimulator();
|
// startOldSimulator();
|
||||||
|
opts.buildNetwork(argv[0]);
|
||||||
startNewSimulator(opts.printSimTime);
|
startNewSimulator(opts.printSimTime);
|
||||||
openTraceFile(NetworkAddress(), opts.rollsize, opts.maxLogsSize, opts.logFolder, "trace", opts.logGroup);
|
openTraceFile(NetworkAddress(), opts.rollsize, opts.maxLogsSize, opts.logFolder, "trace", opts.logGroup);
|
||||||
openTracer(TracerType(deterministicRandom()->randomInt(static_cast<int>(TracerType::DISABLED),
|
openTracer(TracerType(deterministicRandom()->randomInt(static_cast<int>(TracerType::DISABLED),
|
||||||
|
@ -1795,6 +1800,7 @@ int main(int argc, char* argv[]) {
|
||||||
g_network = newNet2(opts.tlsConfig, opts.useThreadPool, true);
|
g_network = newNet2(opts.tlsConfig, opts.useThreadPool, true);
|
||||||
g_network->addStopCallback(Net2FileSystem::stop);
|
g_network->addStopCallback(Net2FileSystem::stop);
|
||||||
FlowTransport::createInstance(false, 1, WLTOKEN_RESERVED_COUNT);
|
FlowTransport::createInstance(false, 1, WLTOKEN_RESERVED_COUNT);
|
||||||
|
opts.buildNetwork(argv[0]);
|
||||||
|
|
||||||
const bool expectsPublicAddress =
|
const bool expectsPublicAddress =
|
||||||
(role == ServerRole::FDBD || role == ServerRole::NetworkTestServer || role == ServerRole::Restore);
|
(role == ServerRole::FDBD || role == ServerRole::NetworkTestServer || role == ServerRole::Restore);
|
||||||
|
|
|
@ -2308,10 +2308,11 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
|
||||||
ACTOR Future<Void> extractClusterInterface(Reference<AsyncVar<Optional<ClusterControllerFullInterface>> const> in,
|
ACTOR Future<Void> extractClusterInterface(Reference<AsyncVar<Optional<ClusterControllerFullInterface>> const> in,
|
||||||
Reference<AsyncVar<Optional<ClusterInterface>>> out) {
|
Reference<AsyncVar<Optional<ClusterInterface>>> out) {
|
||||||
loop {
|
loop {
|
||||||
if (in->get().present())
|
if (in->get().present()) {
|
||||||
out->set(in->get().get().clientInterface);
|
out->set(in->get().get().clientInterface);
|
||||||
else
|
} else {
|
||||||
out->set(Optional<ClusterInterface>());
|
out->set(Optional<ClusterInterface>());
|
||||||
|
}
|
||||||
wait(in->onChange());
|
wait(in->onChange());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2509,9 +2510,14 @@ ACTOR Future<MonitorLeaderInfo> monitorLeaderWithDelayedCandidacyImplOneGenerati
|
||||||
}
|
}
|
||||||
successIndex = index;
|
successIndex = index;
|
||||||
} else {
|
} else {
|
||||||
|
if (leader.isError() && leader.getError().code() == error_code_coordinators_changed) {
|
||||||
|
info.intermediateConnRecord->getConnectionString().resetToUnresolved();
|
||||||
|
throw coordinators_changed();
|
||||||
|
}
|
||||||
index = (index + 1) % addrs.size();
|
index = (index + 1) % addrs.size();
|
||||||
if (index == successIndex) {
|
if (index == successIndex) {
|
||||||
wait(delay(CLIENT_KNOBS->COORDINATOR_RECONNECTION_DELAY));
|
wait(delay(CLIENT_KNOBS->COORDINATOR_RECONNECTION_DELAY));
|
||||||
|
throw coordinators_changed();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2519,11 +2525,22 @@ ACTOR Future<MonitorLeaderInfo> monitorLeaderWithDelayedCandidacyImplOneGenerati
|
||||||
|
|
||||||
ACTOR Future<Void> monitorLeaderWithDelayedCandidacyImplInternal(Reference<IClusterConnectionRecord> connRecord,
|
ACTOR Future<Void> monitorLeaderWithDelayedCandidacyImplInternal(Reference<IClusterConnectionRecord> connRecord,
|
||||||
Reference<AsyncVar<Value>> outSerializedLeaderInfo) {
|
Reference<AsyncVar<Value>> outSerializedLeaderInfo) {
|
||||||
|
wait(connRecord->resolveHostnames());
|
||||||
state MonitorLeaderInfo info(connRecord);
|
state MonitorLeaderInfo info(connRecord);
|
||||||
loop {
|
loop {
|
||||||
MonitorLeaderInfo _info =
|
try {
|
||||||
wait(monitorLeaderWithDelayedCandidacyImplOneGeneration(connRecord, outSerializedLeaderInfo, info));
|
wait(info.intermediateConnRecord->resolveHostnames());
|
||||||
info = _info;
|
MonitorLeaderInfo _info =
|
||||||
|
wait(monitorLeaderWithDelayedCandidacyImplOneGeneration(connRecord, outSerializedLeaderInfo, info));
|
||||||
|
info = _info;
|
||||||
|
} catch (Error& e) {
|
||||||
|
if (e.code() == error_code_coordinators_changed) {
|
||||||
|
TraceEvent("MonitorLeaderWithDelayedCandidacyCoordinatorsChanged").suppressFor(1.0);
|
||||||
|
info.intermediateConnRecord->getConnectionString().resetToUnresolved();
|
||||||
|
} else {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2657,6 +2674,7 @@ ACTOR Future<Void> fdbd(Reference<IClusterConnectionRecord> connRecord,
|
||||||
actors.push_back(serveProcess());
|
actors.push_back(serveProcess());
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
wait(connRecord->resolveHostnames());
|
||||||
ServerCoordinators coordinators(connRecord);
|
ServerCoordinators coordinators(connRecord);
|
||||||
if (g_network->isSimulated()) {
|
if (g_network->isSimulated()) {
|
||||||
whitelistBinPaths = ",, random_path, /bin/snap_create.sh,,";
|
whitelistBinPaths = ",, random_path, /bin/snap_create.sh,,";
|
||||||
|
|
|
@ -36,9 +36,17 @@ static const char* storageMigrationTypes[] = { "perpetual_storage_wiggle=0 stora
|
||||||
"perpetual_storage_wiggle=1",
|
"perpetual_storage_wiggle=1",
|
||||||
"perpetual_storage_wiggle=1 storage_migration_type=gradual",
|
"perpetual_storage_wiggle=1 storage_migration_type=gradual",
|
||||||
"storage_migration_type=aggressive" };
|
"storage_migration_type=aggressive" };
|
||||||
static const char* logTypes[] = { "log_engine:=1", "log_engine:=2", "log_spill:=1", "log_spill:=2",
|
static const char* logTypes[] = { "log_engine:=1",
|
||||||
"log_version:=2", "log_version:=3", "log_version:=4", "log_version:=5",
|
"log_engine:=2",
|
||||||
"log_version:=6", "log_version:=7" };
|
"log_spill:=1",
|
||||||
|
"log_spill:=2",
|
||||||
|
"log_version:=2",
|
||||||
|
"log_version:=3",
|
||||||
|
"log_version:=4",
|
||||||
|
"log_version:=5",
|
||||||
|
"log_version:=6",
|
||||||
|
// downgrade incompatible log version
|
||||||
|
"log_version:=7" };
|
||||||
static const char* redundancies[] = { "single", "double", "triple" };
|
static const char* redundancies[] = { "single", "double", "triple" };
|
||||||
static const char* backupTypes[] = { "backup_worker_enabled:=0", "backup_worker_enabled:=1" };
|
static const char* backupTypes[] = { "backup_worker_enabled:=0", "backup_worker_enabled:=1" };
|
||||||
|
|
||||||
|
@ -220,6 +228,8 @@ struct ConfigureDatabaseWorkload : TestWorkload {
|
||||||
int additionalDBs;
|
int additionalDBs;
|
||||||
bool allowDescriptorChange;
|
bool allowDescriptorChange;
|
||||||
bool allowTestStorageMigration;
|
bool allowTestStorageMigration;
|
||||||
|
bool waitStoreTypeCheck;
|
||||||
|
bool downgradeTest1; // if this is true, don't pick up downgrade incompatible config
|
||||||
std::vector<Future<Void>> clients;
|
std::vector<Future<Void>> clients;
|
||||||
PerfIntCounter retries;
|
PerfIntCounter retries;
|
||||||
|
|
||||||
|
@ -229,6 +239,8 @@ struct ConfigureDatabaseWorkload : TestWorkload {
|
||||||
getOption(options, LiteralStringRef("allowDescriptorChange"), SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT);
|
getOption(options, LiteralStringRef("allowDescriptorChange"), SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT);
|
||||||
allowTestStorageMigration =
|
allowTestStorageMigration =
|
||||||
getOption(options, "allowTestStorageMigration"_sr, false) && g_simulator.allowStorageMigrationTypeChange;
|
getOption(options, "allowTestStorageMigration"_sr, false) && g_simulator.allowStorageMigrationTypeChange;
|
||||||
|
waitStoreTypeCheck = getOption(options, "waitStoreTypeCheck"_sr, false);
|
||||||
|
downgradeTest1 = getOption(options, "downgradeTest1"_sr, false);
|
||||||
g_simulator.usableRegions = 1;
|
g_simulator.usableRegions = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -273,7 +285,7 @@ struct ConfigureDatabaseWorkload : TestWorkload {
|
||||||
// only storage_migration_type=gradual && perpetual_storage_wiggle=1 need this check because in QuietDatabase
|
// only storage_migration_type=gradual && perpetual_storage_wiggle=1 need this check because in QuietDatabase
|
||||||
// perpetual wiggle will be forced to close For other cases, later ConsistencyCheck will check KV store type
|
// perpetual wiggle will be forced to close For other cases, later ConsistencyCheck will check KV store type
|
||||||
// there
|
// there
|
||||||
if (self->allowTestStorageMigration) {
|
if (self->allowTestStorageMigration || self->waitStoreTypeCheck) {
|
||||||
loop {
|
loop {
|
||||||
// There exists a race where the check can start before the last transaction that singleDB issued
|
// There exists a race where the check can start before the last transaction that singleDB issued
|
||||||
// finishes, if singleDB gets actor cancelled from a timeout at the end of a test. This means the
|
// finishes, if singleDB gets actor cancelled from a timeout at the end of a test. This means the
|
||||||
|
@ -404,8 +416,14 @@ struct ConfigureDatabaseWorkload : TestWorkload {
|
||||||
true)));
|
true)));
|
||||||
} else if (randomChoice == 6) {
|
} else if (randomChoice == 6) {
|
||||||
// Some configurations will be invalid, and that's fine.
|
// Some configurations will be invalid, and that's fine.
|
||||||
wait(success(IssueConfigurationChange(
|
int length = sizeof(logTypes) / sizeof(logTypes[0]);
|
||||||
cx, logTypes[deterministicRandom()->randomInt(0, sizeof(logTypes) / sizeof(logTypes[0]))], false)));
|
|
||||||
|
if (self->downgradeTest1) {
|
||||||
|
length -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
wait(success(
|
||||||
|
IssueConfigurationChange(cx, logTypes[deterministicRandom()->randomInt(0, length)], false)));
|
||||||
} else if (randomChoice == 7) {
|
} else if (randomChoice == 7) {
|
||||||
wait(success(IssueConfigurationChange(
|
wait(success(IssueConfigurationChange(
|
||||||
cx,
|
cx,
|
||||||
|
|
|
@ -926,10 +926,11 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
|
||||||
wait(tx->get(LiteralStringRef("processes")
|
wait(tx->get(LiteralStringRef("processes")
|
||||||
.withPrefix(SpecialKeySpace::getManagementApiCommandPrefix("coordinators"))));
|
.withPrefix(SpecialKeySpace::getManagementApiCommandPrefix("coordinators"))));
|
||||||
ASSERT(coordinator_processes_key.present());
|
ASSERT(coordinator_processes_key.present());
|
||||||
std::vector<std::string> process_addresses;
|
state std::vector<std::string> process_addresses;
|
||||||
boost::split(
|
boost::split(
|
||||||
process_addresses, coordinator_processes_key.get().toString(), [](char c) { return c == ','; });
|
process_addresses, coordinator_processes_key.get().toString(), [](char c) { return c == ','; });
|
||||||
ASSERT(process_addresses.size() == cs.coordinators().size());
|
ASSERT(process_addresses.size() == cs.coordinators().size() + cs.hostnames.size());
|
||||||
|
wait(cs.resolveHostnames());
|
||||||
// compare the coordinator process network addresses one by one
|
// compare the coordinator process network addresses one by one
|
||||||
for (const auto& network_address : cs.coordinators()) {
|
for (const auto& network_address : cs.coordinators()) {
|
||||||
ASSERT(std::find(process_addresses.begin(), process_addresses.end(), network_address.toString()) !=
|
ASSERT(std::find(process_addresses.begin(), process_addresses.end(), network_address.toString()) !=
|
||||||
|
@ -970,16 +971,15 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
|
||||||
old_coordinators_processes, processes_key.get().toString(), [](char c) { return c == ','; });
|
old_coordinators_processes, processes_key.get().toString(), [](char c) { return c == ','; });
|
||||||
// pick up one non-coordinator process if possible
|
// pick up one non-coordinator process if possible
|
||||||
std::vector<ProcessData> workers = wait(getWorkers(&tx->getTransaction()));
|
std::vector<ProcessData> workers = wait(getWorkers(&tx->getTransaction()));
|
||||||
|
std::string old_coordinators_processes_string = describe(old_coordinators_processes);
|
||||||
TraceEvent(SevDebug, "CoordinatorsManualChange")
|
TraceEvent(SevDebug, "CoordinatorsManualChange")
|
||||||
.detail("OldCoordinators", describe(old_coordinators_processes))
|
.detail("OldCoordinators", old_coordinators_processes_string)
|
||||||
.detail("WorkerSize", workers.size());
|
.detail("WorkerSize", workers.size());
|
||||||
if (workers.size() > old_coordinators_processes.size()) {
|
if (workers.size() > old_coordinators_processes.size()) {
|
||||||
loop {
|
loop {
|
||||||
auto worker = deterministicRandom()->randomChoice(workers);
|
auto worker = deterministicRandom()->randomChoice(workers);
|
||||||
new_coordinator_process = worker.address.toString();
|
new_coordinator_process = worker.address.toString();
|
||||||
if (std::find(old_coordinators_processes.begin(),
|
if (old_coordinators_processes_string.find(new_coordinator_process) == std::string::npos) {
|
||||||
old_coordinators_processes.end(),
|
|
||||||
worker.address.toString()) == old_coordinators_processes.end()) {
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1049,10 +1049,11 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
|
||||||
tx->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
tx->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
||||||
Optional<Value> res = wait(tx->get(coordinatorsKey));
|
Optional<Value> res = wait(tx->get(coordinatorsKey));
|
||||||
ASSERT(res.present()); // Otherwise, database is in a bad state
|
ASSERT(res.present()); // Otherwise, database is in a bad state
|
||||||
ClusterConnectionString cs(res.get().toString());
|
state ClusterConnectionString csNew(res.get().toString());
|
||||||
ASSERT(cs.coordinators().size() == old_coordinators_processes.size() + 1);
|
wait(csNew.resolveHostnames());
|
||||||
|
ASSERT(csNew.coordinators().size() == old_coordinators_processes.size() + 1);
|
||||||
// verify the coordinators' addresses
|
// verify the coordinators' addresses
|
||||||
for (const auto& network_address : cs.coordinators()) {
|
for (const auto& network_address : csNew.coordinators()) {
|
||||||
std::string address_str = network_address.toString();
|
std::string address_str = network_address.toString();
|
||||||
ASSERT(std::find(old_coordinators_processes.begin(),
|
ASSERT(std::find(old_coordinators_processes.begin(),
|
||||||
old_coordinators_processes.end(),
|
old_coordinators_processes.end(),
|
||||||
|
@ -1060,7 +1061,7 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
|
||||||
new_coordinator_process == address_str);
|
new_coordinator_process == address_str);
|
||||||
}
|
}
|
||||||
// verify the cluster decription
|
// verify the cluster decription
|
||||||
ASSERT(new_cluster_description == cs.clusterKeyName().toString());
|
ASSERT(new_cluster_description == csNew.clusterKeyName().toString());
|
||||||
tx->reset();
|
tx->reset();
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
wait(tx->onError(e));
|
wait(tx->onError(e));
|
||||||
|
|
|
@ -172,7 +172,7 @@ void FlowKnobs::initialize(Randomize randomize, IsSimulated isSimulated) {
|
||||||
init( MIN_LOGGED_PRIORITY_BUSY_FRACTION, 0.05 );
|
init( MIN_LOGGED_PRIORITY_BUSY_FRACTION, 0.05 );
|
||||||
init( CERT_FILE_MAX_SIZE, 5 * 1024 * 1024 );
|
init( CERT_FILE_MAX_SIZE, 5 * 1024 * 1024 );
|
||||||
init( READY_QUEUE_RESERVED_SIZE, 8192 );
|
init( READY_QUEUE_RESERVED_SIZE, 8192 );
|
||||||
init( ITERATIONS_PER_REACTOR_CHECK, 5 );
|
init( ITERATIONS_PER_REACTOR_CHECK, 100 );
|
||||||
|
|
||||||
//Network
|
//Network
|
||||||
init( PACKET_LIMIT, 100LL<<20 );
|
init( PACKET_LIMIT, 100LL<<20 );
|
||||||
|
|
|
@ -20,12 +20,14 @@
|
||||||
|
|
||||||
#include "flow/StreamCipher.h"
|
#include "flow/StreamCipher.h"
|
||||||
#include "flow/Arena.h"
|
#include "flow/Arena.h"
|
||||||
|
#include "flow/IRandom.h"
|
||||||
#include "flow/ITrace.h"
|
#include "flow/ITrace.h"
|
||||||
#include "flow/UnitTest.h"
|
#include "flow/UnitTest.h"
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
std::unordered_set<EVP_CIPHER_CTX*> StreamCipher::ctxs;
|
UID StreamCipherKey::globalKeyId;
|
||||||
std::unordered_set<StreamCipherKey*> StreamCipherKey::cipherKeys;
|
std::unordered_map<UID, EVP_CIPHER_CTX*> StreamCipher::ctxs;
|
||||||
|
std::unordered_map<UID, StreamCipherKey*> StreamCipherKey::cipherKeys;
|
||||||
std::unique_ptr<StreamCipherKey> StreamCipherKey::globalKey;
|
std::unique_ptr<StreamCipherKey> StreamCipherKey::globalKey;
|
||||||
|
|
||||||
bool StreamCipherKey::isGlobalKeyPresent() {
|
bool StreamCipherKey::isGlobalKeyPresent() {
|
||||||
|
@ -36,8 +38,9 @@ void StreamCipherKey::allocGlobalCipherKey() {
|
||||||
if (StreamCipherKey::isGlobalKeyPresent()) {
|
if (StreamCipherKey::isGlobalKeyPresent()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
StreamCipherKey::globalKeyId = deterministicRandom()->randomUniqueID();
|
||||||
StreamCipherKey::globalKey = std::make_unique<StreamCipherKey>(AES_256_KEY_LENGTH);
|
StreamCipherKey::globalKey = std::make_unique<StreamCipherKey>(AES_256_KEY_LENGTH);
|
||||||
StreamCipherKey::cipherKeys.insert(StreamCipherKey::globalKey.get());
|
StreamCipherKey::cipherKeys[StreamCipherKey::globalKeyId] = StreamCipherKey::globalKey.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
void StreamCipherKey::initializeGlobalRandomTestKey() {
|
void StreamCipherKey::initializeGlobalRandomTestKey() {
|
||||||
|
@ -56,8 +59,8 @@ StreamCipherKey const* StreamCipherKey::getGlobalCipherKey() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void StreamCipherKey::cleanup() noexcept {
|
void StreamCipherKey::cleanup() noexcept {
|
||||||
for (auto cipherKey : cipherKeys) {
|
for (const auto& itr : cipherKeys) {
|
||||||
cipherKey->reset();
|
itr.second->reset();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -67,31 +70,33 @@ void StreamCipherKey::initializeKey(uint8_t* data, int len) {
|
||||||
memcpy(arr.get(), data, copyLen);
|
memcpy(arr.get(), data, copyLen);
|
||||||
}
|
}
|
||||||
|
|
||||||
StreamCipherKey::StreamCipherKey(int size) : arr(std::make_unique<uint8_t[]>(size)), keySize(size) {
|
StreamCipherKey::StreamCipherKey(int size)
|
||||||
|
: id(deterministicRandom()->randomUniqueID()), arr(std::make_unique<uint8_t[]>(size)), keySize(size) {
|
||||||
memset(arr.get(), 0, keySize);
|
memset(arr.get(), 0, keySize);
|
||||||
cipherKeys.insert(this);
|
cipherKeys[id] = this;
|
||||||
}
|
}
|
||||||
|
|
||||||
StreamCipherKey::~StreamCipherKey() {
|
StreamCipherKey::~StreamCipherKey() {
|
||||||
reset();
|
reset();
|
||||||
cipherKeys.erase(this);
|
cipherKeys.erase(this->id);
|
||||||
}
|
}
|
||||||
|
|
||||||
StreamCipher::StreamCipher(int keySize)
|
StreamCipher::StreamCipher(int keySize)
|
||||||
: ctx(EVP_CIPHER_CTX_new()), hmacCtx(HMAC_CTX_new()), cipherKey(std::make_unique<StreamCipherKey>(keySize)) {
|
: id(deterministicRandom()->randomUniqueID()), ctx(EVP_CIPHER_CTX_new()), hmacCtx(HMAC_CTX_new()),
|
||||||
ctxs.insert(ctx);
|
cipherKey(std::make_unique<StreamCipherKey>(keySize)) {
|
||||||
|
ctxs[id] = ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
StreamCipher::StreamCipher()
|
StreamCipher::StreamCipher()
|
||||||
: ctx(EVP_CIPHER_CTX_new()), hmacCtx(HMAC_CTX_new()),
|
: id(deterministicRandom()->randomUniqueID()), ctx(EVP_CIPHER_CTX_new()), hmacCtx(HMAC_CTX_new()),
|
||||||
cipherKey(std::make_unique<StreamCipherKey>(AES_256_KEY_LENGTH)) {
|
cipherKey(std::make_unique<StreamCipherKey>(AES_256_KEY_LENGTH)) {
|
||||||
ctxs.insert(ctx);
|
ctxs[id] = ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
StreamCipher::~StreamCipher() {
|
StreamCipher::~StreamCipher() {
|
||||||
HMAC_CTX_free(hmacCtx);
|
HMAC_CTX_free(hmacCtx);
|
||||||
EVP_CIPHER_CTX_free(ctx);
|
EVP_CIPHER_CTX_free(ctx);
|
||||||
ctxs.erase(ctx);
|
ctxs.erase(id);
|
||||||
}
|
}
|
||||||
|
|
||||||
EVP_CIPHER_CTX* StreamCipher::getCtx() {
|
EVP_CIPHER_CTX* StreamCipher::getCtx() {
|
||||||
|
@ -103,8 +108,8 @@ HMAC_CTX* StreamCipher::getHmacCtx() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void StreamCipher::cleanup() noexcept {
|
void StreamCipher::cleanup() noexcept {
|
||||||
for (auto ctx : ctxs) {
|
for (auto itr : ctxs) {
|
||||||
EVP_CIPHER_CTX_free(ctx);
|
EVP_CIPHER_CTX_free(itr.second);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -44,8 +44,10 @@
|
||||||
// Wrapper class for openssl implementation of AES GCM
|
// Wrapper class for openssl implementation of AES GCM
|
||||||
// encryption/decryption
|
// encryption/decryption
|
||||||
class StreamCipherKey : NonCopyable {
|
class StreamCipherKey : NonCopyable {
|
||||||
|
static UID globalKeyId;
|
||||||
static std::unique_ptr<StreamCipherKey> globalKey;
|
static std::unique_ptr<StreamCipherKey> globalKey;
|
||||||
static std::unordered_set<StreamCipherKey*> cipherKeys;
|
static std::unordered_map<UID, StreamCipherKey*> cipherKeys;
|
||||||
|
UID id;
|
||||||
std::unique_ptr<uint8_t[]> arr;
|
std::unique_ptr<uint8_t[]> arr;
|
||||||
int keySize;
|
int keySize;
|
||||||
|
|
||||||
|
@ -67,7 +69,8 @@ public:
|
||||||
};
|
};
|
||||||
|
|
||||||
class StreamCipher final : NonCopyable {
|
class StreamCipher final : NonCopyable {
|
||||||
static std::unordered_set<EVP_CIPHER_CTX*> ctxs;
|
UID id;
|
||||||
|
static std::unordered_map<UID, EVP_CIPHER_CTX*> ctxs;
|
||||||
EVP_CIPHER_CTX* ctx;
|
EVP_CIPHER_CTX* ctx;
|
||||||
HMAC_CTX* hmacCtx;
|
HMAC_CTX* hmacCtx;
|
||||||
std::unique_ptr<StreamCipherKey> cipherKey;
|
std::unique_ptr<StreamCipherKey> cipherKey;
|
||||||
|
|
|
@ -255,6 +255,14 @@ if(WITH_PYTHON)
|
||||||
add_fdb_test(
|
add_fdb_test(
|
||||||
TEST_FILES restarting/from_7.0.0/SnapCycleRestart-1.txt
|
TEST_FILES restarting/from_7.0.0/SnapCycleRestart-1.txt
|
||||||
restarting/from_7.0.0/SnapCycleRestart-2.txt)
|
restarting/from_7.0.0/SnapCycleRestart-2.txt)
|
||||||
|
add_fdb_test(
|
||||||
|
TEST_FILES restarting/to_7.1.0/ConfigureStorageMigrationTestRestart-1.toml
|
||||||
|
restarting/to_7.1.0/ConfigureStorageMigrationTestRestart-2.toml)
|
||||||
|
add_fdb_test(
|
||||||
|
TEST_FILES restarting/from_7.1.0/ConfigureStorageMigrationTestRestart-1.toml
|
||||||
|
restarting/from_7.1.0/ConfigureStorageMigrationTestRestart-2.toml)
|
||||||
|
|
||||||
|
|
||||||
add_fdb_test(TEST_FILES slow/ApiCorrectness.toml)
|
add_fdb_test(TEST_FILES slow/ApiCorrectness.toml)
|
||||||
add_fdb_test(TEST_FILES slow/ApiCorrectnessAtomicRestore.toml)
|
add_fdb_test(TEST_FILES slow/ApiCorrectnessAtomicRestore.toml)
|
||||||
add_fdb_test(TEST_FILES slow/ApiCorrectnessSwitchover.toml)
|
add_fdb_test(TEST_FILES slow/ApiCorrectnessSwitchover.toml)
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
[configuration]
|
||||||
|
extraMachineCountDC = 2
|
||||||
|
|
||||||
|
[[test]]
|
||||||
|
testTitle = 'CloggedConfigureDatabaseTest'
|
||||||
|
clearAfterTest = false
|
||||||
|
|
||||||
|
[[test.workload]]
|
||||||
|
testName = 'ConfigureDatabase'
|
||||||
|
testDuration = 30.0
|
||||||
|
allowTestStorageMigration = true
|
||||||
|
allowDescriptorChange = false
|
||||||
|
|
||||||
|
[[test.workload]]
|
||||||
|
testName = 'RandomClogging'
|
||||||
|
testDuration = 30.0
|
||||||
|
|
||||||
|
[[test.workload]]
|
||||||
|
testName = 'RandomClogging'
|
||||||
|
testDuration = 30.0
|
||||||
|
scale = 0.1
|
||||||
|
clogginess = 2.0
|
||||||
|
|
||||||
|
[[test.workload]]
|
||||||
|
testName='SaveAndKill'
|
||||||
|
restartInfoLocation='simfdb/restartInfo.ini'
|
||||||
|
testDuration=30.0
|
|
@ -0,0 +1,22 @@
|
||||||
|
[configuration]
|
||||||
|
extraMachineCountDC = 2
|
||||||
|
|
||||||
|
[[test]]
|
||||||
|
testTitle = 'CloggedConfigureDatabaseTest'
|
||||||
|
runSetup=false
|
||||||
|
waitForQuiescenceBegin=false
|
||||||
|
|
||||||
|
[[test.workload]]
|
||||||
|
testName = 'ConfigureDatabase'
|
||||||
|
testDuration = 300.0
|
||||||
|
waitStoreTypeCheck = true
|
||||||
|
|
||||||
|
[[test.workload]]
|
||||||
|
testName = 'RandomClogging'
|
||||||
|
testDuration = 300.0
|
||||||
|
|
||||||
|
[[test.workload]]
|
||||||
|
testName = 'RandomClogging'
|
||||||
|
testDuration = 300.0
|
||||||
|
scale = 0.1
|
||||||
|
clogginess = 2.0
|
|
@ -0,0 +1,31 @@
|
||||||
|
[configuration]
|
||||||
|
extraMachineCountDC = 2
|
||||||
|
maxTLogVersion=6
|
||||||
|
disableHostname=true
|
||||||
|
storageEngineExcludeTypes=[4]
|
||||||
|
|
||||||
|
[[test]]
|
||||||
|
testTitle = 'CloggedConfigureDatabaseTest'
|
||||||
|
clearAfterTest = false
|
||||||
|
|
||||||
|
[[test.workload]]
|
||||||
|
testName = 'ConfigureDatabase'
|
||||||
|
testDuration = 30.0
|
||||||
|
allowTestStorageMigration = true
|
||||||
|
allowDescriptorChange = false
|
||||||
|
downgradeTest1 = true
|
||||||
|
|
||||||
|
[[test.workload]]
|
||||||
|
testName = 'RandomClogging'
|
||||||
|
testDuration = 30.0
|
||||||
|
|
||||||
|
[[test.workload]]
|
||||||
|
testName = 'RandomClogging'
|
||||||
|
testDuration = 30.0
|
||||||
|
scale = 0.1
|
||||||
|
clogginess = 2.0
|
||||||
|
|
||||||
|
[[test.workload]]
|
||||||
|
testName='SaveAndKill'
|
||||||
|
restartInfoLocation='simfdb/restartInfo.ini'
|
||||||
|
testDuration=30.0
|
|
@ -0,0 +1,22 @@
|
||||||
|
[configuration]
|
||||||
|
extraMachineCountDC = 2
|
||||||
|
|
||||||
|
[[test]]
|
||||||
|
testTitle = 'CloggedConfigureDatabaseTest'
|
||||||
|
runSetup=false
|
||||||
|
waitForQuiescenceBegin=false
|
||||||
|
|
||||||
|
[[test.workload]]
|
||||||
|
testName = 'ConfigureDatabase'
|
||||||
|
testDuration = 300.0
|
||||||
|
waitStoreTypeCheck = true
|
||||||
|
|
||||||
|
[[test.workload]]
|
||||||
|
testName = 'RandomClogging'
|
||||||
|
testDuration = 300.0
|
||||||
|
|
||||||
|
[[test.workload]]
|
||||||
|
testName = 'RandomClogging'
|
||||||
|
testDuration = 300.0
|
||||||
|
scale = 0.1
|
||||||
|
clogginess = 2.0
|
Loading…
Reference in New Issue