diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e599780e37..525e80a9d9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -36,7 +36,7 @@ Members of the Apple FoundationDB team are part of the core committers helping r ## Contributing ### Opening a Pull Request -We love pull requests! For minor changes, feel free to open up a PR directly. For larger feature development and any changes that may require community discussion, we ask that you discuss your ideas on the [community forums](https://forums.foundationdb.org) prior to opening a PR, and then reference that thread within your PR comment. +We love pull requests! For minor changes, feel free to open up a PR directly. For larger feature development and any changes that may require community discussion, we ask that you discuss your ideas on the [community forums](https://forums.foundationdb.org) prior to opening a PR, and then reference that thread within your PR comment. Please refer to [FoundationDB Commit Process](https://github.com/apple/foundationdb/wiki/FoundationDB-Commit-Process) for more detailed guidelines. CI will be run automatically for core committers, and for community PRs it will be initiated by the request of a core committer. Tests can also be run locally via `ctest`, and core committers can run additional validation on pull requests prior to merging them. diff --git a/bindings/c/fdb_c.cpp b/bindings/c/fdb_c.cpp index bf6af3aab7..907f8058b6 100644 --- a/bindings/c/fdb_c.cpp +++ b/bindings/c/fdb_c.cpp @@ -364,6 +364,17 @@ extern "C" DLLEXPORT double fdb_database_get_main_thread_busyness(FDBDatabase* d return DB(d)->getMainThreadBusyness(); } +// Returns the protocol version reported by a quorum of coordinators +// If an expected version is non-zero, the future won't return until the protocol version is different than expected +extern "C" DLLEXPORT FDBFuture* fdb_database_get_server_protocol(FDBDatabase* db, uint64_t expected_version) { + Optional expected; + if (expected_version > 0) { + expected = ProtocolVersion(expected_version); + } + + return (FDBFuture*)(DB(db)->getServerProtocol(expected).extractPtr()); +} + extern "C" DLLEXPORT void fdb_transaction_destroy(FDBTransaction* tr) { try { TXN(tr)->delref(); @@ -583,10 +594,6 @@ extern "C" DLLEXPORT FDBFuture* fdb_transaction_get_approximate_size(FDBTransact return (FDBFuture*)TXN(tr)->getApproximateSize().extractPtr(); } -extern "C" DLLEXPORT FDBFuture* fdb_get_server_protocol(const char* clusterFilePath) { - return (FDBFuture*)(API->getServerProtocol(clusterFilePath ? clusterFilePath : "").extractPtr()); -} - extern "C" DLLEXPORT FDBFuture* fdb_transaction_get_versionstamp(FDBTransaction* tr) { return (FDBFuture*)(TXN(tr)->getVersionstamp().extractPtr()); } diff --git a/bindings/c/foundationdb/fdb_c.h b/bindings/c/foundationdb/fdb_c.h index 2086cbd775..4ea59ac11e 100644 --- a/bindings/c/foundationdb/fdb_c.h +++ b/bindings/c/foundationdb/fdb_c.h @@ -189,6 +189,8 @@ DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_database_create_snapshot(FDBDatabase DLLEXPORT WARN_UNUSED_RESULT double fdb_database_get_main_thread_busyness(FDBDatabase* db); +DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_database_get_server_protocol(FDBDatabase* db, uint64_t expected_version); + DLLEXPORT void fdb_transaction_destroy(FDBTransaction* tr); DLLEXPORT void fdb_transaction_cancel(FDBTransaction* tr); @@ -281,8 +283,6 @@ DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_transaction_get_committed_version(F */ DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_get_approximate_size(FDBTransaction* tr); -DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_get_server_protocol(const char* clusterFilePath); - DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_get_versionstamp(FDBTransaction* tr); DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_on_error(FDBTransaction* tr, fdb_error_t error); diff --git a/bindings/c/test/unit/unit_tests.cpp b/bindings/c/test/unit/unit_tests.cpp index f3f97476c2..54f763fb5c 100644 --- a/bindings/c/test/unit/unit_tests.cpp +++ b/bindings/c/test/unit/unit_tests.cpp @@ -1513,17 +1513,17 @@ TEST_CASE("fdb_transaction_get_approximate_size") { } } -TEST_CASE("fdb_get_server_protocol") { +TEST_CASE("fdb_database_get_server_protocol") { // We don't really have any expectations other than "don't crash" here - FDBFuture* protocolFuture = fdb_get_server_protocol(clusterFilePath.c_str()); + FDBFuture* protocolFuture = fdb_database_get_server_protocol(db, 0); uint64_t out; fdb_check(fdb_future_block_until_ready(protocolFuture)); fdb_check(fdb_future_get_uint64(protocolFuture, &out)); fdb_future_destroy(protocolFuture); - // "Default" cluster file version - protocolFuture = fdb_get_server_protocol(nullptr); + // Passing in an expected version that's different than the cluster version + protocolFuture = fdb_database_get_server_protocol(db, 0x0FDB00A200090000LL); fdb_check(fdb_future_block_until_ready(protocolFuture)); fdb_check(fdb_future_get_uint64(protocolFuture, &out)); fdb_future_destroy(protocolFuture); diff --git a/bindings/python/fdb/__init__.py b/bindings/python/fdb/__init__.py index 17f697797d..c969b6c70c 100644 --- a/bindings/python/fdb/__init__.py +++ b/bindings/python/fdb/__init__.py @@ -95,7 +95,6 @@ def api_version(ver): 'transactional', 'options', 'StreamingMode', - 'get_server_protocol' ) _add_symbols(fdb.impl, list) diff --git a/bindings/python/fdb/impl.py b/bindings/python/fdb/impl.py index 6e7803777a..e8cc2a79b8 100644 --- a/bindings/python/fdb/impl.py +++ b/bindings/python/fdb/impl.py @@ -1531,9 +1531,6 @@ def init_c_api(): _capi.fdb_transaction_get_approximate_size.argtypes = [ctypes.c_void_p] _capi.fdb_transaction_get_approximate_size.restype = ctypes.c_void_p - _capi.fdb_get_server_protocol.argtypes = [ctypes.c_char_p] - _capi.fdb_get_server_protocol.restype = ctypes.c_void_p - _capi.fdb_transaction_get_versionstamp.argtypes = [ctypes.c_void_p] _capi.fdb_transaction_get_versionstamp.restype = ctypes.c_void_p @@ -1733,13 +1730,6 @@ open_databases = {} cacheLock = threading.Lock() -def get_server_protocol(clusterFilePath=None): - with _network_thread_reentrant_lock: - if not _network_thread: - init() - - return FutureUInt64(_capi.fdb_get_server_protocol(optionalParamToBytes(clusterFilePath)[0])) - def open(cluster_file=None, event_model=None): """Opens the given database (or the default database of the cluster indicated by the fdb.cluster file in a platform-specific location, if no cluster_file diff --git a/build/docker/centos6/devel/Dockerfile b/build/docker/centos6/devel/Dockerfile index 82c99d4464..c5c9db2914 100644 --- a/build/docker/centos6/devel/Dockerfile +++ b/build/docker/centos6/devel/Dockerfile @@ -76,4 +76,9 @@ RUN rm -f /root/anaconda-ks.cfg && \ ' j start --tarball $(find ${HOME}/build_output/packages -name correctness\*.tar.gz) "${@}"' \ '}' \ '' \ - >> .bashrc \ No newline at end of file + 'USER_BASHRC="$HOME/src/.bashrc.local"' \ + 'if test -f "$USER_BASHRC"; then' \ + ' source $USER_BASHRC' \ + 'fi' \ + '' \ + >> .bashrc diff --git a/build/docker/centos7/devel/Dockerfile b/build/docker/centos7/devel/Dockerfile index ea60da54e7..98f1923c17 100644 --- a/build/docker/centos7/devel/Dockerfile +++ b/build/docker/centos7/devel/Dockerfile @@ -104,5 +104,10 @@ RUN rm -f /root/anaconda-ks.cfg && \ ' j start --tarball $(find ${HOME}/build_output/packages -name correctness\*.tar.gz) "${@}"' \ '}' \ '' \ + 'USER_BASHRC="$HOME/src/.bashrc.local"' \ + 'if test -f "$USER_BASHRC"; then' \ + ' source $USER_BASHRC' \ + 'fi' \ + '' \ 'bash ${HOME}/docker_proxy.sh' \ - >> .bashrc \ No newline at end of file + >> .bashrc diff --git a/fdbcli/fdbcli.actor.cpp b/fdbcli/fdbcli.actor.cpp index e608e96086..d655601e22 100644 --- a/fdbcli/fdbcli.actor.cpp +++ b/fdbcli/fdbcli.actor.cpp @@ -24,6 +24,7 @@ #include "fdbclient/Status.h" #include "fdbclient/StatusClient.h" #include "fdbclient/DatabaseContext.h" +#include "fdbclient/GlobalConfig.actor.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbclient/ReadYourWrites.h" #include "fdbclient/ClusterInterface.h" @@ -3841,25 +3842,16 @@ ACTOR Future cli(CLIOptions opt, LineNoise* plinenoise) { is_error = true; continue; } - state Future>> sampleRateFuture = - tr->get(fdbClientInfoTxnSampleRate); - state Future>> sizeLimitFuture = - tr->get(fdbClientInfoTxnSizeLimit); - wait(makeInterruptable(success(sampleRateFuture) && success(sizeLimitFuture))); + const double sampleRateDbl = GlobalConfig::globalConfig().get( + fdbClientInfoTxnSampleRate, std::numeric_limits::infinity()); + const int64_t sizeLimit = + GlobalConfig::globalConfig().get(fdbClientInfoTxnSizeLimit, -1); std::string sampleRateStr = "default", sizeLimitStr = "default"; - if (sampleRateFuture.get().present()) { - const double sampleRateDbl = - BinaryReader::fromStringRef(sampleRateFuture.get().get(), Unversioned()); - if (!std::isinf(sampleRateDbl)) { - sampleRateStr = boost::lexical_cast(sampleRateDbl); - } + if (!std::isinf(sampleRateDbl)) { + sampleRateStr = boost::lexical_cast(sampleRateDbl); } - if (sizeLimitFuture.get().present()) { - const int64_t sizeLimit = - BinaryReader::fromStringRef(sizeLimitFuture.get().get(), Unversioned()); - if (sizeLimit != -1) { - sizeLimitStr = boost::lexical_cast(sizeLimit); - } + if (sizeLimit != -1) { + sizeLimitStr = boost::lexical_cast(sizeLimit); } printf("Client profiling rate is set to %s and size limit is set to %s.\n", sampleRateStr.c_str(), @@ -3897,8 +3889,12 @@ ACTOR Future cli(CLIOptions opt, LineNoise* plinenoise) { continue; } } - tr->set(fdbClientInfoTxnSampleRate, BinaryWriter::toValue(sampleRate, Unversioned())); - tr->set(fdbClientInfoTxnSizeLimit, BinaryWriter::toValue(sizeLimit, Unversioned())); + + Tuple rate = Tuple().appendDouble(sampleRate); + Tuple size = Tuple().append(sizeLimit); + tr->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES); + tr->set(GlobalConfig::prefixedKey(fdbClientInfoTxnSampleRate), rate.pack()); + tr->set(GlobalConfig::prefixedKey(fdbClientInfoTxnSizeLimit), size.pack()); if (!intrans) { wait(commitTransaction(tr)); } diff --git a/fdbclient/CMakeLists.txt b/fdbclient/CMakeLists.txt index 9b855ff3ae..ee87d08646 100644 --- a/fdbclient/CMakeLists.txt +++ b/fdbclient/CMakeLists.txt @@ -31,6 +31,9 @@ set(FDBCLIENT_SRCS FDBOptions.h FDBTypes.h FileBackupAgent.actor.cpp + GlobalConfig.h + GlobalConfig.actor.h + GlobalConfig.actor.cpp GrvProxyInterface.h HTTP.actor.cpp IClientApi.h diff --git a/fdbclient/CommitProxyInterface.h b/fdbclient/CommitProxyInterface.h index a166a87dfa..794b88ceaa 100644 --- a/fdbclient/CommitProxyInterface.h +++ b/fdbclient/CommitProxyInterface.h @@ -31,6 +31,7 @@ #include "fdbclient/CommitTransaction.h" #include "fdbserver/RatekeeperInterface.h" #include "fdbclient/TagThrottle.h" +#include "fdbclient/GlobalConfig.h" #include "fdbrpc/Stats.h" #include "fdbrpc/TimedRequest.h" @@ -113,16 +114,10 @@ struct ClientDBInfo { vector commitProxies; Optional firstCommitProxy; // not serialized, used for commitOnFirstProxy when the commit proxies vector has been shrunk - double clientTxnInfoSampleRate; - int64_t clientTxnInfoSizeLimit; Optional forward; - double transactionTagSampleRate; - double transactionTagSampleCost; + vector history; - ClientDBInfo() - : clientTxnInfoSampleRate(std::numeric_limits::infinity()), clientTxnInfoSizeLimit(-1), - transactionTagSampleRate(CLIENT_KNOBS->READ_TAG_SAMPLE_RATE), - transactionTagSampleCost(CLIENT_KNOBS->COMMIT_SAMPLE_COST) {} + ClientDBInfo() {} bool operator==(ClientDBInfo const& r) const { return id == r.id; } bool operator!=(ClientDBInfo const& r) const { return id != r.id; } @@ -132,15 +127,7 @@ struct ClientDBInfo { if constexpr (!is_fb_function) { ASSERT(ar.protocolVersion().isValid()); } - serializer(ar, - grvProxies, - commitProxies, - id, - clientTxnInfoSampleRate, - clientTxnInfoSizeLimit, - forward, - transactionTagSampleRate, - transactionTagSampleCost); + serializer(ar, grvProxies, commitProxies, id, forward, history); } }; diff --git a/fdbclient/GlobalConfig.actor.cpp b/fdbclient/GlobalConfig.actor.cpp new file mode 100644 index 0000000000..58e032d363 --- /dev/null +++ b/fdbclient/GlobalConfig.actor.cpp @@ -0,0 +1,229 @@ +/* + * GlobalConfig.actor.cpp + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2021 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fdbclient/DatabaseContext.h" +#include "fdbclient/GlobalConfig.actor.h" +#include "fdbclient/SpecialKeySpace.actor.h" +#include "fdbclient/SystemData.h" +#include "fdbclient/Tuple.h" +#include "flow/flow.h" +#include "flow/genericactors.actor.h" + +#include "flow/actorcompiler.h" // This must be the last #include. + +const KeyRef fdbClientInfoTxnSampleRate = LiteralStringRef("config/fdb_client_info/client_txn_sample_rate"); +const KeyRef fdbClientInfoTxnSizeLimit = LiteralStringRef("config/fdb_client_info/client_txn_size_limit"); + +const KeyRef transactionTagSampleRate = LiteralStringRef("config/transaction_tag_sample_rate"); +const KeyRef transactionTagSampleCost = LiteralStringRef("config/transaction_tag_sample_cost"); + +GlobalConfig::GlobalConfig() : lastUpdate(0) {} + +void GlobalConfig::create(DatabaseContext* cx, Reference> dbInfo) { + if (g_network->global(INetwork::enGlobalConfig) == nullptr) { + auto config = new GlobalConfig{}; + config->cx = Database(cx); + g_network->setGlobal(INetwork::enGlobalConfig, config); + config->_updater = updater(config, dbInfo); + } +} + +GlobalConfig& GlobalConfig::globalConfig() { + void* res = g_network->global(INetwork::enGlobalConfig); + ASSERT(res); + return *reinterpret_cast(res); +} + +Key GlobalConfig::prefixedKey(KeyRef key) { + return key.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::GLOBALCONFIG).begin); +} + +const Reference GlobalConfig::get(KeyRef name) { + auto it = data.find(name); + if (it == data.end()) { + return Reference(); + } + return it->second; +} + +const std::map> GlobalConfig::get(KeyRangeRef range) { + std::map> results; + for (const auto& [key, value] : data) { + if (range.contains(key)) { + results[key] = value; + } + } + return results; +} + +Future GlobalConfig::onInitialized() { + return initialized.getFuture(); +} + +void GlobalConfig::insert(KeyRef key, ValueRef value) { + data.erase(key); + + Arena arena(key.expectedSize() + value.expectedSize()); + KeyRef stableKey = KeyRef(arena, key); + try { + std::any any; + Tuple t = Tuple::unpack(value); + if (t.getType(0) == Tuple::ElementType::UTF8) { + any = StringRef(arena, t.getString(0).contents()); + } else if (t.getType(0) == Tuple::ElementType::INT) { + any = t.getInt(0); + } else if (t.getType(0) == Tuple::ElementType::FLOAT) { + any = t.getFloat(0); + } else if (t.getType(0) == Tuple::ElementType::DOUBLE) { + any = t.getDouble(0); + } else { + ASSERT(false); + } + data[stableKey] = makeReference(std::move(arena), std::move(any)); + } catch (Error& e) { + TraceEvent("GlobalConfigTupleParseError").detail("What", e.what()); + } +} + +void GlobalConfig::erase(KeyRef key) { + data.erase(key); +} + +void GlobalConfig::erase(KeyRangeRef range) { + auto it = data.begin(); + while (it != data.end()) { + if (range.contains(it->first)) { + it = data.erase(it); + } else { + ++it; + } + } +} + +// Older FDB versions used different keys for client profiling data. This +// function performs a one-time migration of data in these keys to the new +// global configuration key space. +ACTOR Future GlobalConfig::migrate(GlobalConfig* self) { + state Reference tr = makeReference(self->cx); + tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); + + state Key migratedKey("\xff\x02/fdbClientInfo/migrated/"_sr); + state Optional migrated = wait(tr->get(migratedKey)); + if (migrated.present()) { + // Already performed migration. + return Void(); + } + + state Optional sampleRate = wait(tr->get(Key("\xff\x02/fdbClientInfo/client_txn_sample_rate/"_sr))); + state Optional sizeLimit = wait(tr->get(Key("\xff\x02/fdbClientInfo/client_txn_size_limit/"_sr))); + + loop { + try { + tr->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES); + // The value doesn't matter too much, as long as the key is set. + tr->set(migratedKey.contents(), "1"_sr); + if (sampleRate.present()) { + const double sampleRateDbl = + BinaryReader::fromStringRef(sampleRate.get().contents(), Unversioned()); + Tuple rate = Tuple().appendDouble(sampleRateDbl); + tr->set(GlobalConfig::prefixedKey(fdbClientInfoTxnSampleRate), rate.pack()); + } + if (sizeLimit.present()) { + const int64_t sizeLimitInt = + BinaryReader::fromStringRef(sizeLimit.get().contents(), Unversioned()); + Tuple size = Tuple().append(sizeLimitInt); + tr->set(GlobalConfig::prefixedKey(fdbClientInfoTxnSizeLimit), size.pack()); + } + + wait(tr->commit()); + return Void(); + } catch (Error& e) { + throw; + } + } +} + +// Updates local copy of global configuration by reading the entire key-range +// from storage. +ACTOR Future GlobalConfig::refresh(GlobalConfig* self) { + self->data.clear(); + + Transaction tr(self->cx); + Standalone result = wait(tr.getRange(globalConfigDataKeys, CLIENT_KNOBS->TOO_MANY)); + for (const auto& kv : result) { + KeyRef systemKey = kv.key.removePrefix(globalConfigKeysPrefix); + self->insert(systemKey, kv.value); + } + return Void(); +} + +// Applies updates to the local copy of the global configuration when this +// process receives an updated history. +ACTOR Future GlobalConfig::updater(GlobalConfig* self, Reference> dbInfo) { + wait(self->migrate(self)); + + wait(self->refresh(self)); + self->initialized.send(Void()); + + loop { + try { + wait(dbInfo->onChange()); + + auto& history = dbInfo->get().history; + if (history.size() == 0) { + continue; + } + + if (self->lastUpdate < history[0].version) { + // This process missed too many global configuration + // history updates or the protocol version changed, so it + // must re-read the entire configuration range. + wait(self->refresh(self)); + if (dbInfo->get().history.size() > 0) { + self->lastUpdate = dbInfo->get().history.back().version; + } + } else { + // Apply history in order, from lowest version to highest + // version. Mutation history should already be stored in + // ascending version order. + for (const auto& vh : history) { + if (vh.version <= self->lastUpdate) { + continue; // already applied this mutation + } + + for (const auto& mutation : vh.mutations.contents()) { + if (mutation.type == MutationRef::SetValue) { + self->insert(mutation.param1, mutation.param2); + } else if (mutation.type == MutationRef::ClearRange) { + self->erase(KeyRangeRef(mutation.param1, mutation.param2)); + } else { + ASSERT(false); + } + } + + ASSERT(vh.version > self->lastUpdate); + self->lastUpdate = vh.version; + } + } + } catch (Error& e) { + throw; + } + } +} diff --git a/fdbclient/GlobalConfig.actor.h b/fdbclient/GlobalConfig.actor.h new file mode 100644 index 0000000000..5c3693f450 --- /dev/null +++ b/fdbclient/GlobalConfig.actor.h @@ -0,0 +1,146 @@ +/* + * GlobalConfig.actor.h + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2021 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#if defined(NO_INTELLISENSE) && !defined(FDBCLIENT_GLOBALCONFIG_ACTOR_G_H) +#define FDBCLIENT_GLOBALCONFIG_ACTOR_G_H +#include "fdbclient/GlobalConfig.actor.g.h" +#elif !defined(FDBCLIENT_GLOBALCONFIG_ACTOR_H) +#define FDBCLIENT_GLOBALCONFIG_ACTOR_H + +#include +#include +#include +#include + +#include "fdbclient/CommitProxyInterface.h" +#include "fdbclient/GlobalConfig.h" +#include "fdbclient/ReadYourWrites.h" + +#include "flow/actorcompiler.h" // has to be last include + +// The global configuration is a series of typed key-value pairs synced to all +// nodes (server and client) in an FDB cluster in an eventually consistent +// manner. Only small key-value pairs should be stored in global configuration; +// an excessive amount of data can cause synchronization slowness. + +// Keys +extern const KeyRef fdbClientInfoTxnSampleRate; +extern const KeyRef fdbClientInfoTxnSizeLimit; + +extern const KeyRef transactionTagSampleRate; +extern const KeyRef transactionTagSampleCost; + +// Structure used to hold the values stored by global configuration. The arena +// is used as memory to store both the key and the value (the value is only +// stored in the arena if it is an object; primitives are just copied). +struct ConfigValue : ReferenceCounted { + Arena arena; + std::any value; + + ConfigValue() {} + ConfigValue(Arena&& a, std::any&& v) : arena(a), value(v) {} +}; + +class GlobalConfig : NonCopyable { +public: + // Creates a GlobalConfig singleton, accessed by calling GlobalConfig(). + // This function should only be called once by each process (however, it is + // idempotent and calling it multiple times will have no effect). + static void create(DatabaseContext* cx, Reference> dbInfo); + + // Returns a reference to the global GlobalConfig object. Clients should + // call this function whenever they need to read a value out of the global + // configuration. + static GlobalConfig& globalConfig(); + + // Use this function to turn a global configuration key defined above into + // the full path needed to set the value in the database. + // + // For example, given "config/a", returns "\xff\xff/global_config/config/a". + static Key prefixedKey(KeyRef key); + + // Get a value from the framework. Values are returned as a ConfigValue + // reference which also contains the arena holding the object. As long as + // the caller keeps the ConfigValue reference, the value is guaranteed to + // be readable. An empty reference is returned if the value does not exist. + const Reference get(KeyRef name); + const std::map> get(KeyRangeRef range); + + // For arithmetic value types, returns a copy of the value for the given + // key, or the supplied default value if the framework does not know about + // the key. + template {}, bool>::type = true> + const T get(KeyRef name, T defaultVal) { + try { + auto configValue = get(name); + if (configValue.isValid()) { + if (configValue->value.has_value()) { + return std::any_cast(configValue->value); + } + } + + return defaultVal; + } catch (Error& e) { + throw; + } + } + + // Trying to write into the global configuration keyspace? To write data, + // submit a transaction to \xff\xff/global_config/ with + // encoded using the FDB tuple typecodes. Use the helper + // function `prefixedKey` to correctly prefix your global configuration + // key. + + // Triggers the returned future when the global configuration singleton has + // been created and is ready. + Future onInitialized(); + +private: + GlobalConfig(); + + // The functions below only affect the local copy of the global + // configuration keyspace! To insert or remove values across all nodes you + // must use a transaction (see the note above). + + // Inserts the given key-value pair into the local copy of the global + // configuration keyspace, overwriting the old key-value pair if it exists. + // `value` must be encoded using the FDB tuple typecodes. + void insert(KeyRef key, ValueRef value); + // Removes the given key (and associated value) from the local copy of the + // global configuration keyspace. + void erase(KeyRef key); + // Removes the given key range (and associated values) from the local copy + // of the global configuration keyspace. + void erase(KeyRangeRef range); + + ACTOR static Future migrate(GlobalConfig* self); + ACTOR static Future refresh(GlobalConfig* self); + ACTOR static Future updater(GlobalConfig* self, Reference> dbInfo); + + Database cx; + Future _updater; + Promise initialized; + std::unordered_map> data; + Version lastUpdate; +}; + +#endif diff --git a/fdbclient/GlobalConfig.h b/fdbclient/GlobalConfig.h new file mode 100644 index 0000000000..f68ea2361e --- /dev/null +++ b/fdbclient/GlobalConfig.h @@ -0,0 +1,45 @@ +/* + * GlobalConfig.h + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2021 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "fdbclient/CommitTransaction.h" +#include "fdbclient/FDBTypes.h" + +// Used to store a list of mutations made to the global configuration at a +// specific version. +struct VersionHistory { + constexpr static FileIdentifier file_identifier = 5863456; + + VersionHistory() {} + VersionHistory(Version v) : version(v) {} + + Version version; + Standalone> mutations; + + bool operator<(const VersionHistory& other) const { return version < other.version; } + + int expectedSize() const { return sizeof(version) + mutations.expectedSize(); } + + template + void serialize(Ar& ar) { + serializer(ar, mutations, version); + } +}; diff --git a/fdbclient/IClientApi.h b/fdbclient/IClientApi.h index 6f3ad07cd1..4496eff732 100644 --- a/fdbclient/IClientApi.h +++ b/fdbclient/IClientApi.h @@ -28,6 +28,7 @@ #include "flow/ThreadHelper.actor.h" +// An interface that represents a transaction created by a client class ITransaction { public: virtual ~ITransaction() {} @@ -90,6 +91,7 @@ public: virtual void delref() = 0; }; +// An interface that represents a connection to a cluster made by a client class IDatabase { public: virtual ~IDatabase() {} @@ -98,6 +100,11 @@ public: virtual void setOption(FDBDatabaseOptions::Option option, Optional value = Optional()) = 0; virtual double getMainThreadBusyness() = 0; + // Returns the protocol version reported by a quorum of coordinators + // If an expected version is given, the future won't return until the protocol version is different than expected + virtual ThreadFuture getServerProtocol( + Optional expectedVersion = Optional()) = 0; + virtual void addref() = 0; virtual void delref() = 0; @@ -110,13 +117,16 @@ public: virtual ThreadFuture createSnapshot(const StringRef& uid, const StringRef& snapshot_command) = 0; }; +// An interface that presents the top-level FDB client API as exposed through the C bindings +// +// This interface and its associated objects are intended to live outside the network thread, so its asynchronous +// operations use ThreadFutures and implementations should be thread safe. class IClientApi { public: virtual ~IClientApi() {} virtual void selectApiVersion(int apiVersion) = 0; virtual const char* getClientVersion() = 0; - virtual ThreadFuture getServerProtocol(const char* clusterFilePath) = 0; virtual void setNetworkOption(FDBNetworkOptions::Option option, Optional value = Optional()) = 0; diff --git a/fdbclient/MultiVersionTransaction.actor.cpp b/fdbclient/MultiVersionTransaction.actor.cpp index ac1855c811..4b6ba0c27c 100644 --- a/fdbclient/MultiVersionTransaction.actor.cpp +++ b/fdbclient/MultiVersionTransaction.actor.cpp @@ -356,7 +356,32 @@ double DLDatabase::getMainThreadBusyness() { return 0; } +// Returns the protocol version reported by a quorum of coordinators +// If an expected version is given, the future won't return until the protocol version is different than expected +ThreadFuture DLDatabase::getServerProtocol(Optional expectedVersion) { + ASSERT(api->databaseGetServerProtocol != nullptr); + + uint64_t expected = + expectedVersion.map([](const ProtocolVersion& v) { return v.version(); }).orDefault(0); + FdbCApi::FDBFuture* f = api->databaseGetServerProtocol(db, expected); + return toThreadFuture(api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) { + uint64_t pv; + FdbCApi::fdb_error_t error = api->futureGetUInt64(f, &pv); + ASSERT(!error); + return ProtocolVersion(pv); + }); +} + // DLApi + +// Loads the specified function from a dynamic library +// +// fp - The function pointer where the loaded function will be stored +// lib - The dynamic library where the function is loaded from +// libPath - The path of the dynamic library (used for logging) +// functionName - The function to load +// requireFunction - Determines the behavior if the function is not present. If true, an error is thrown. If false, +// the function pointer will be set to nullptr. template void loadClientFunction(T* fp, void* lib, std::string libPath, const char* functionName, bool requireFunction = true) { *(void**)(fp) = loadFunction(lib, functionName); @@ -403,6 +428,8 @@ void DLApi::init() { fdbCPath, "fdb_database_get_main_thread_busyness", headerVersion >= 700); + loadClientFunction( + &api->databaseGetServerProtocol, lib, fdbCPath, "fdb_database_get_server_protocol", headerVersion >= 700); loadClientFunction(&api->databaseDestroy, lib, fdbCPath, "fdb_database_destroy"); loadClientFunction(&api->databaseRebootWorker, lib, fdbCPath, "fdb_database_reboot_worker", headerVersion >= 700); loadClientFunction(&api->databaseForceRecoveryWithDataLoss, @@ -452,7 +479,7 @@ void DLApi::init() { loadClientFunction( &api->futureGetInt64, lib, fdbCPath, headerVersion >= 620 ? "fdb_future_get_int64" : "fdb_future_get_version"); - loadClientFunction(&api->futureGetUInt64, lib, fdbCPath, "fdb_future_get_uint64"); + loadClientFunction(&api->futureGetUInt64, lib, fdbCPath, "fdb_future_get_uint64", headerVersion >= 700); loadClientFunction(&api->futureGetError, lib, fdbCPath, "fdb_future_get_error"); loadClientFunction(&api->futureGetKey, lib, fdbCPath, "fdb_future_get_key"); loadClientFunction(&api->futureGetValue, lib, fdbCPath, "fdb_future_get_value"); @@ -488,11 +515,6 @@ const char* DLApi::getClientVersion() { return api->getClientVersion(); } -ThreadFuture DLApi::getServerProtocol(const char* clusterFilePath) { - ASSERT(false); - return ThreadFuture(); -} - void DLApi::setNetworkOption(FDBNetworkOptions::Option option, Optional value) { throwIfError(api->setNetworkOption( option, value.present() ? value.get().begin() : nullptr, value.present() ? value.get().size() : 0)); @@ -856,7 +878,7 @@ MultiVersionDatabase::MultiVersionDatabase(MultiVersionApi* api, std::string clusterFilePath, Reference db, bool openConnectors) - : dbState(new DatabaseState()) { + : dbState(new DatabaseState()), clusterFilePath(clusterFilePath) { dbState->db = db; dbState->dbVar->set(db); @@ -941,6 +963,15 @@ double MultiVersionDatabase::getMainThreadBusyness() { return 0; } +// Returns the protocol version reported by a quorum of coordinators +// If an expected version is given, the future won't return until the protocol version is different than expected +ThreadFuture MultiVersionDatabase::getServerProtocol(Optional expectedVersion) { + // TODO: send this out through the active database + return MultiVersionApi::api->getLocalClient() + ->api->createDatabase(clusterFilePath.c_str()) + ->getServerProtocol(expectedVersion); +} + void MultiVersionDatabase::Connector::connect() { addref(); onMainThreadVoid( @@ -1181,10 +1212,6 @@ const char* MultiVersionApi::getClientVersion() { return localClient->api->getClientVersion(); } -ThreadFuture MultiVersionApi::getServerProtocol(const char* clusterFilePath) { - return api->localClient->api->getServerProtocol(clusterFilePath); -} - void validateOption(Optional value, bool canBePresent, bool canBeAbsent, bool canBeEmpty = true) { ASSERT(canBePresent || canBeAbsent); diff --git a/fdbclient/MultiVersionTransaction.h b/fdbclient/MultiVersionTransaction.h index ea16f4f35e..badb848334 100644 --- a/fdbclient/MultiVersionTransaction.h +++ b/fdbclient/MultiVersionTransaction.h @@ -28,6 +28,8 @@ #include "flow/ThreadHelper.actor.h" +// FdbCApi is used as a wrapper around the FoundationDB C API that gets loaded from an external client library. +// All of the required functions loaded from that external library are stored in function pointers in this struct. struct FdbCApi : public ThreadSafeReferenceCounted { typedef struct future FDBFuture; typedef struct cluster FDBCluster; @@ -55,7 +57,6 @@ struct FdbCApi : public ThreadSafeReferenceCounted { // Network fdb_error_t (*selectApiVersion)(int runtimeVersion, int headerVersion); const char* (*getClientVersion)(); - FDBFuture* (*getServerProtocol)(const char* clusterFilePath); fdb_error_t (*setNetworkOption)(FDBNetworkOptions::Option option, uint8_t const* value, int valueLength); fdb_error_t (*setupNetwork)(); fdb_error_t (*runNetwork)(); @@ -81,6 +82,7 @@ struct FdbCApi : public ThreadSafeReferenceCounted { uint8_t const* snapshotCommmand, int snapshotCommandLength); double (*databaseGetMainThreadBusyness)(FDBDatabase* database); + FDBFuture* (*databaseGetServerProtocol)(FDBDatabase* database, uint64_t expectedVersion); // Transaction fdb_error_t (*transactionSetOption)(FDBTransaction* tr, @@ -185,6 +187,8 @@ struct FdbCApi : public ThreadSafeReferenceCounted { fdb_error_t (*futureGetCluster)(FDBFuture* f, FDBCluster** outCluster); }; +// An implementation of ITransaction that wraps a transaction object created on an externally loaded client library. +// All API calls to that transaction are routed through the external library. class DLTransaction : public ITransaction, ThreadSafeReferenceCounted { public: DLTransaction(Reference api, FdbCApi::FDBTransaction* tr) : api(api), tr(tr) {} @@ -249,6 +253,8 @@ private: FdbCApi::FDBTransaction* const tr; }; +// An implementation of IDatabase that wraps a database object created on an externally loaded client library. +// All API calls to that database are routed through the external library. class DLDatabase : public IDatabase, ThreadSafeReferenceCounted { public: DLDatabase(Reference api, FdbCApi::FDBDatabase* db) : api(api), db(db), ready(Void()) {} @@ -265,6 +271,11 @@ public: void setOption(FDBDatabaseOptions::Option option, Optional value = Optional()) override; double getMainThreadBusyness() override; + // Returns the protocol version reported by a quorum of coordinators + // If an expected version is given, the future won't return until the protocol version is different than expected + ThreadFuture getServerProtocol( + Optional expectedVersion = Optional()) override; + void addref() override { ThreadSafeReferenceCounted::addref(); } void delref() override { ThreadSafeReferenceCounted::delref(); } @@ -279,13 +290,14 @@ private: ThreadFuture ready; }; +// An implementation of IClientApi that re-issues API calls to the C API of an externally loaded client library. +// The DL prefix stands for "dynamic library". class DLApi : public IClientApi { public: DLApi(std::string fdbCPath, bool unlinkOnLoad = false); void selectApiVersion(int apiVersion) override; const char* getClientVersion() override; - ThreadFuture getServerProtocol(const char* clusterFilePath) override; void setNetworkOption(FDBNetworkOptions::Option option, Optional value = Optional()) override; void setupNetwork() override; @@ -312,6 +324,9 @@ private: class MultiVersionDatabase; +// An implementation of ITransaction that wraps a transaction created either locally or through a dynamically loaded +// external client. When needed (e.g on cluster version change), the MultiVersionTransaction can automatically replace +// its wrapped transaction with one from another client. class MultiVersionTransaction : public ITransaction, ThreadSafeReferenceCounted { public: MultiVersionTransaction(Reference db, @@ -413,6 +428,9 @@ struct ClientInfo : ClientDesc, ThreadSafeReferenceCounted { class MultiVersionApi; +// An implementation of IDatabase that wraps a database created either locally or through a dynamically loaded +// external client. The MultiVersionDatabase monitors the protocol version of the cluster and automatically +// replaces the wrapped database when the protocol version changes. class MultiVersionDatabase final : public IDatabase, ThreadSafeReferenceCounted { public: MultiVersionDatabase(MultiVersionApi* api, @@ -426,6 +444,11 @@ public: void setOption(FDBDatabaseOptions::Option option, Optional value = Optional()) override; double getMainThreadBusyness() override; + // Returns the protocol version reported by a quorum of coordinators + // If an expected version is given, the future won't return until the protocol version is different than expected + ThreadFuture getServerProtocol( + Optional expectedVersion = Optional()) override; + void addref() override { ThreadSafeReferenceCounted::addref(); } void delref() override { ThreadSafeReferenceCounted::delref(); } @@ -487,15 +510,19 @@ private: Mutex optionLock; }; + std::string clusterFilePath; const Reference dbState; friend class MultiVersionTransaction; }; +// An implementation of IClientApi that can choose between multiple different client implementations either provided +// locally within the primary loaded fdb_c client or through any number of dynamically loaded clients. +// +// This functionality is used to provide support for multiple protocol versions simultaneously. class MultiVersionApi : public IClientApi { public: void selectApiVersion(int apiVersion) override; const char* getClientVersion() override; - ThreadFuture getServerProtocol(const char* clusterFilePath) override; void setNetworkOption(FDBNetworkOptions::Option option, Optional value = Optional()) override; void setupNetwork() override; diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index b761f6c049..ebe9169c17 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -37,6 +37,7 @@ #include "fdbclient/ClusterInterface.h" #include "fdbclient/CoordinationInterface.h" #include "fdbclient/DatabaseContext.h" +#include "fdbclient/GlobalConfig.actor.h" #include "fdbclient/JsonBuilder.h" #include "fdbclient/KeyRangeMap.h" #include "fdbclient/Knobs.h" @@ -506,12 +507,13 @@ ACTOR static Future clientStatusUpdateActor(DatabaseContext* cx) { } } cx->clientStatusUpdater.outStatusQ.clear(); - double clientSamplingProbability = std::isinf(cx->clientInfo->get().clientTxnInfoSampleRate) - ? CLIENT_KNOBS->CSI_SAMPLING_PROBABILITY - : cx->clientInfo->get().clientTxnInfoSampleRate; - int64_t clientTxnInfoSizeLimit = cx->clientInfo->get().clientTxnInfoSizeLimit == -1 - ? CLIENT_KNOBS->CSI_SIZE_LIMIT - : cx->clientInfo->get().clientTxnInfoSizeLimit; + wait(GlobalConfig::globalConfig().onInitialized()); + double sampleRate = GlobalConfig::globalConfig().get(fdbClientInfoTxnSampleRate, + std::numeric_limits::infinity()); + double clientSamplingProbability = + std::isinf(sampleRate) ? CLIENT_KNOBS->CSI_SAMPLING_PROBABILITY : sampleRate; + int64_t sizeLimit = GlobalConfig::globalConfig().get(fdbClientInfoTxnSizeLimit, -1); + int64_t clientTxnInfoSizeLimit = sizeLimit == -1 ? CLIENT_KNOBS->CSI_SIZE_LIMIT : sizeLimit; if (!trChunksQ.empty() && deterministicRandom()->random01() < clientSamplingProbability) wait(delExcessClntTxnEntriesActor(&tr, clientTxnInfoSizeLimit)); @@ -957,6 +959,8 @@ DatabaseContext::DatabaseContext(Reference(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::TRACING))); + SpecialKeySpace::MODULE::GLOBALCONFIG, SpecialKeySpace::IMPLTYPE::READWRITE, + std::make_unique( + SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::GLOBALCONFIG))); + registerSpecialKeySpaceModule( + SpecialKeySpace::MODULE::TRACING, SpecialKeySpace::IMPLTYPE::READWRITE, + std::make_unique( + SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::TRACING))); registerSpecialKeySpaceModule( SpecialKeySpace::MODULE::CONFIGURATION, SpecialKeySpace::IMPLTYPE::READWRITE, @@ -1271,14 +1279,16 @@ Future DatabaseContext::onProxiesChanged() { } bool DatabaseContext::sampleReadTags() const { - return clientInfo->get().transactionTagSampleRate > 0 && - deterministicRandom()->random01() <= clientInfo->get().transactionTagSampleRate; + double sampleRate = GlobalConfig::globalConfig().get(transactionTagSampleRate, CLIENT_KNOBS->READ_TAG_SAMPLE_RATE); + return sampleRate > 0 && deterministicRandom()->random01() <= sampleRate; } bool DatabaseContext::sampleOnCost(uint64_t cost) const { - if (clientInfo->get().transactionTagSampleCost <= 0) + double sampleCost = + GlobalConfig::globalConfig().get(transactionTagSampleCost, CLIENT_KNOBS->COMMIT_SAMPLE_COST); + if (sampleCost <= 0) return false; - return deterministicRandom()->random01() <= (double)cost / clientInfo->get().transactionTagSampleCost; + return deterministicRandom()->random01() <= (double)cost / sampleCost; } int64_t extractIntOption(Optional value, int64_t minValue, int64_t maxValue) { @@ -2483,7 +2493,6 @@ ACTOR Future watchValue(Future version, cx->invalidateCache(key); wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, info.taskID)); } else if (e.code() == error_code_watch_cancelled || e.code() == error_code_process_behind) { - TEST(e.code() == error_code_watch_cancelled); // Too many watches on the storage server, poll for changes instead TEST(e.code() == error_code_watch_cancelled); // Too many watches on storage server, poll for changes TEST(e.code() == error_code_process_behind); // The storage servers are all behind wait(delay(CLIENT_KNOBS->WATCH_POLLING_TIME, info.taskID)); @@ -4908,9 +4917,18 @@ ACTOR Future coordinatorProtocolsFetcher(Reference getCoordinatorProtocols(Reference f) { - ProtocolVersion protocolVersion = wait(coordinatorProtocolsFetcher(f)); - return protocolVersion.version(); +// Returns the protocol version reported by a quorum of coordinators +// If an expected version is given, the future won't return until the protocol version is different than expected +ACTOR Future getClusterProtocol(Reference f, + Optional expectedVersion) { + loop { + ProtocolVersion protocolVersion = wait(coordinatorProtocolsFetcher(f)); + if (!expectedVersion.present() || protocolVersion != expectedVersion.get()) { + return protocolVersion; + } else { + wait(delay(2.0)); // TODO: this is temporary, so not making into a knob yet + } + } } uint32_t Transaction::getSize() { @@ -5378,9 +5396,8 @@ void Transaction::checkDeferredError() { Reference Transaction::createTrLogInfoProbabilistically(const Database& cx) { if (!cx->isError()) { - double clientSamplingProbability = std::isinf(cx->clientInfo->get().clientTxnInfoSampleRate) - ? CLIENT_KNOBS->CSI_SAMPLING_PROBABILITY - : cx->clientInfo->get().clientTxnInfoSampleRate; + double clientSamplingProbability = GlobalConfig::globalConfig().get( + fdbClientInfoTxnSampleRate, CLIENT_KNOBS->CSI_SAMPLING_PROBABILITY); if (((networkOptions.logClientInfo.present() && networkOptions.logClientInfo.get()) || BUGGIFY) && deterministicRandom()->random01() < clientSamplingProbability && (!g_network->isSimulated() || !g_simulator.speedUpSimulation)) { diff --git a/fdbclient/NativeAPI.actor.h b/fdbclient/NativeAPI.actor.h index ac31967d83..51411ae0a2 100644 --- a/fdbclient/NativeAPI.actor.h +++ b/fdbclient/NativeAPI.actor.h @@ -400,7 +400,10 @@ ACTOR Future snapCreate(Database cx, Standalone snapCmd, UID sn // Checks with Data Distributor that it is safe to mark all servers in exclusions as failed ACTOR Future checkSafeExclusions(Database cx, vector exclusions); -ACTOR Future getCoordinatorProtocols(Reference f); +// Returns the protocol version reported by a quorum of coordinators +// If an expected version is given, the future won't return until the protocol version is different than expected +ACTOR Future getClusterProtocol(Reference f, + Optional expectedVersion); inline uint64_t getWriteOperationCost(uint64_t bytes) { return bytes / std::max(1, CLIENT_KNOBS->WRITE_COST_BYTE_FACTOR) + 1; diff --git a/fdbclient/SpecialKeySpace.actor.cpp b/fdbclient/SpecialKeySpace.actor.cpp index b245b049ba..af1f106a66 100644 --- a/fdbclient/SpecialKeySpace.actor.cpp +++ b/fdbclient/SpecialKeySpace.actor.cpp @@ -23,6 +23,7 @@ #include "fdbclient/Knobs.h" #include "fdbclient/ProcessInterface.h" +#include "fdbclient/GlobalConfig.actor.h" #include "fdbclient/SpecialKeySpace.actor.h" #include "flow/Arena.h" #include "flow/UnitTest.h" @@ -65,6 +66,8 @@ std::unordered_map SpecialKeySpace::moduleToB { SpecialKeySpace::MODULE::ERRORMSG, singleKeyRange(LiteralStringRef("\xff\xff/error_message")) }, { SpecialKeySpace::MODULE::CONFIGURATION, KeyRangeRef(LiteralStringRef("\xff\xff/configuration/"), LiteralStringRef("\xff\xff/configuration0")) }, + { SpecialKeySpace::MODULE::GLOBALCONFIG, + KeyRangeRef(LiteralStringRef("\xff\xff/global_config/"), LiteralStringRef("\xff\xff/global_config0")) }, { SpecialKeySpace::MODULE::TRACING, KeyRangeRef(LiteralStringRef("\xff\xff/tracing/"), LiteralStringRef("\xff\xff/tracing0")) }, { SpecialKeySpace::MODULE::ACTORLINEAGE, @@ -1372,10 +1375,129 @@ Future> ConsistencyCheckImpl::commit(ReadYourWritesTransac return Optional(); } -TracingOptionsImpl::TracingOptionsImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) { - TraceEvent("TracingOptionsImpl::TracingOptionsImpl").detail("Range", kr); +GlobalConfigImpl::GlobalConfigImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {} + +// Returns key-value pairs for each value stored in the global configuration +// framework within the range specified. The special-key-space getrange +// function should only be used for informational purposes. All values are +// returned as strings regardless of their true type. +Future> GlobalConfigImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { + Standalone result; + + auto& globalConfig = GlobalConfig::globalConfig(); + KeyRangeRef modified = + KeyRangeRef(kr.begin.removePrefix(getKeyRange().begin), kr.end.removePrefix(getKeyRange().begin)); + std::map> values = globalConfig.get(modified); + for (const auto& [key, config] : values) { + Key prefixedKey = key.withPrefix(getKeyRange().begin); + if (config.isValid() && config->value.has_value()) { + if (config->value.type() == typeid(StringRef)) { + result.push_back_deep(result.arena(), + KeyValueRef(prefixedKey, std::any_cast(config->value).toString())); + } else if (config->value.type() == typeid(int64_t)) { + result.push_back_deep(result.arena(), + KeyValueRef(prefixedKey, std::to_string(std::any_cast(config->value)))); + } else if (config->value.type() == typeid(float)) { + result.push_back_deep(result.arena(), + KeyValueRef(prefixedKey, std::to_string(std::any_cast(config->value)))); + } else if (config->value.type() == typeid(double)) { + result.push_back_deep(result.arena(), + KeyValueRef(prefixedKey, std::to_string(std::any_cast(config->value)))); + } else { + ASSERT(false); + } + } + } + + return result; } +// Marks the key for insertion into global configuration. +void GlobalConfigImpl::set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) { + ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional(value))); +} + +// Writes global configuration changes to durable memory. Also writes the +// changes made in the transaction to a recent history set, and updates the +// latest version which the global configuration was updated at. +ACTOR Future> globalConfigCommitActor(GlobalConfigImpl* globalConfig, + ReadYourWritesTransaction* ryw) { + state Transaction& tr = ryw->getTransaction(); + + // History should only contain three most recent updates. If it currently + // has three items, remove the oldest to make room for a new item. + Standalone history = wait(tr.getRange(globalConfigHistoryKeys, CLIENT_KNOBS->TOO_MANY)); + constexpr int kGlobalConfigMaxHistorySize = 3; + if (history.size() > kGlobalConfigMaxHistorySize - 1) { + for (int i = 0; i < history.size() - (kGlobalConfigMaxHistorySize - 1); ++i) { + tr.clear(history[i].key); + } + } + + VersionHistory vh{ 0 }; + + // Transform writes from the special-key-space (\xff\xff/global_config/) to + // the system key space (\xff/globalConfig/), and writes mutations to + // latest version history. + state RangeMap>, KeyRangeRef>::Ranges ranges = + ryw->getSpecialKeySpaceWriteMap().containedRanges(specialKeys); + state RangeMap>, KeyRangeRef>::iterator iter = ranges.begin(); + while (iter != ranges.end()) { + std::pair> entry = iter->value(); + if (entry.first) { + if (entry.second.present() && iter->begin().startsWith(globalConfig->getKeyRange().begin)) { + Key bareKey = iter->begin().removePrefix(globalConfig->getKeyRange().begin); + vh.mutations.emplace_back_deep(vh.mutations.arena(), + MutationRef(MutationRef::SetValue, bareKey, entry.second.get())); + + Key systemKey = bareKey.withPrefix(globalConfigKeysPrefix); + tr.set(systemKey, entry.second.get()); + } else if (!entry.second.present() && iter->range().begin.startsWith(globalConfig->getKeyRange().begin) && + iter->range().end.startsWith(globalConfig->getKeyRange().begin)) { + KeyRef bareRangeBegin = iter->range().begin.removePrefix(globalConfig->getKeyRange().begin); + KeyRef bareRangeEnd = iter->range().end.removePrefix(globalConfig->getKeyRange().begin); + vh.mutations.emplace_back_deep(vh.mutations.arena(), + MutationRef(MutationRef::ClearRange, bareRangeBegin, bareRangeEnd)); + + Key systemRangeBegin = bareRangeBegin.withPrefix(globalConfigKeysPrefix); + Key systemRangeEnd = bareRangeEnd.withPrefix(globalConfigKeysPrefix); + tr.clear(KeyRangeRef(systemRangeBegin, systemRangeEnd)); + } + } + ++iter; + } + + // Record the mutations in this commit into the global configuration history. + Key historyKey = addVersionStampAtEnd(globalConfigHistoryPrefix); + ObjectWriter historyWriter(IncludeVersion()); + historyWriter.serialize(vh); + tr.atomicOp(historyKey, historyWriter.toStringRef(), MutationRef::SetVersionstampedKey); + + // Write version key to trigger update in cluster controller. + tr.atomicOp(globalConfigVersionKey, + LiteralStringRef("0123456789\x00\x00\x00\x00"), // versionstamp + MutationRef::SetVersionstampedValue); + + return Optional(); +} + +// Called when a transaction includes keys in the global configuration special-key-space range. +Future> GlobalConfigImpl::commit(ReadYourWritesTransaction* ryw) { + return globalConfigCommitActor(this, ryw); +} + +// Marks the range for deletion from global configuration. +void GlobalConfigImpl::clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) { + ryw->getSpecialKeySpaceWriteMap().insert(range, std::make_pair(true, Optional())); +} + +// Marks the key for deletion from global configuration. +void GlobalConfigImpl::clear(ReadYourWritesTransaction* ryw, const KeyRef& key) { + ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional())); +} + +TracingOptionsImpl::TracingOptionsImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {} + Future> TracingOptionsImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { Standalone result; for (const auto& option : SpecialKeySpace::getTracingOptions()) { diff --git a/fdbclient/SpecialKeySpace.actor.h b/fdbclient/SpecialKeySpace.actor.h index 051b17470a..08a3c6cfc5 100644 --- a/fdbclient/SpecialKeySpace.actor.h +++ b/fdbclient/SpecialKeySpace.actor.h @@ -147,6 +147,7 @@ public: CONFIGURATION, // Configuration of the cluster CONNECTIONSTRING, ERRORMSG, // A single key space contains a json string which describes the last error in special-key-space + GLOBALCONFIG, // Global configuration options synchronized to all nodes MANAGEMENT, // Management-API METRICS, // data-distribution metrics TESTONLY, // only used by correctness tests @@ -337,6 +338,16 @@ public: Future> commit(ReadYourWritesTransaction* ryw) override; }; +class GlobalConfigImpl : public SpecialKeyRangeRWImpl { +public: + explicit GlobalConfigImpl(KeyRangeRef kr); + Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; + void set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) override; + Future> commit(ReadYourWritesTransaction* ryw) override; + void clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) override; + void clear(ReadYourWritesTransaction* ryw, const KeyRef& key) override; +}; + class TracingOptionsImpl : public SpecialKeyRangeRWImpl { public: explicit TracingOptionsImpl(KeyRangeRef kr); diff --git a/fdbclient/SystemData.cpp b/fdbclient/SystemData.cpp index 0b15f8f91d..42fec5f9f2 100644 --- a/fdbclient/SystemData.cpp +++ b/fdbclient/SystemData.cpp @@ -632,7 +632,18 @@ std::string encodeFailedServersKey(AddressExclusion const& addr) { return failedServersPrefix.toString() + addr.toString(); } -const KeyRangeRef workerListKeys(LiteralStringRef("\xff/worker/"), LiteralStringRef("\xff/worker0")); +// const KeyRangeRef globalConfigKeys( LiteralStringRef("\xff/globalConfig/"), LiteralStringRef("\xff/globalConfig0") ); +// const KeyRef globalConfigPrefix = globalConfigKeys.begin; + +const KeyRangeRef globalConfigDataKeys( LiteralStringRef("\xff/globalConfig/k/"), LiteralStringRef("\xff/globalConfig/k0") ); +const KeyRef globalConfigKeysPrefix = globalConfigDataKeys.begin; + +const KeyRangeRef globalConfigHistoryKeys( LiteralStringRef("\xff/globalConfig/h/"), LiteralStringRef("\xff/globalConfig/h0") ); +const KeyRef globalConfigHistoryPrefix = globalConfigHistoryKeys.begin; + +const KeyRef globalConfigVersionKey = LiteralStringRef("\xff/globalConfig/v"); + +const KeyRangeRef workerListKeys( LiteralStringRef("\xff/worker/"), LiteralStringRef("\xff/worker0") ); const KeyRef workerListPrefix = workerListKeys.begin; const Key workerListKeyFor(StringRef processID) { @@ -748,8 +759,7 @@ const KeyRef tagThrottleCountKey = LiteralStringRef("\xff\x02/throttledTags/manu // Client status info prefix const KeyRangeRef fdbClientInfoPrefixRange(LiteralStringRef("\xff\x02/fdbClientInfo/"), LiteralStringRef("\xff\x02/fdbClientInfo0")); -const KeyRef fdbClientInfoTxnSampleRate = LiteralStringRef("\xff\x02/fdbClientInfo/client_txn_sample_rate/"); -const KeyRef fdbClientInfoTxnSizeLimit = LiteralStringRef("\xff\x02/fdbClientInfo/client_txn_size_limit/"); +// See remaining fields in GlobalConfig.actor.h // ConsistencyCheck settings const KeyRef fdbShouldConsistencyCheckBeSuspended = LiteralStringRef("\xff\x02/ConsistencyCheck/Suspend"); diff --git a/fdbclient/SystemData.h b/fdbclient/SystemData.h index bbeb7489f9..952e8fcf00 100644 --- a/fdbclient/SystemData.h +++ b/fdbclient/SystemData.h @@ -230,6 +230,30 @@ extern const KeyRef failedServersVersionKey; // The value of this key shall be c const AddressExclusion decodeFailedServersKey(KeyRef const& key); // where key.startsWith(failedServersPrefix) std::string encodeFailedServersKey(AddressExclusion const&); +// "\xff/globalConfig/[[option]]" := "value" +// An umbrella prefix for global configuration data synchronized to all nodes. +// extern const KeyRangeRef globalConfigData; +// extern const KeyRef globalConfigDataPrefix; + +// "\xff/globalConfig/k/[[key]]" := "value" +// Key-value pairs that have been set. The range this keyspace represents +// contains all globally configured options. +extern const KeyRangeRef globalConfigDataKeys; +extern const KeyRef globalConfigKeysPrefix; + +// "\xff/globalConfig/h/[[version]]" := "value" +// Maps a commit version to a list of mutations made to the global +// configuration at that commit. Shipped to nodes periodically. In general, +// clients should not write to keys in this keyspace; it will be written +// automatically when updating global configuration keys. +extern const KeyRangeRef globalConfigHistoryKeys; +extern const KeyRef globalConfigHistoryPrefix; + +// "\xff/globalConfig/v" := "version" +// Read-only key which returns the commit version of the most recent mutation +// made to the global configuration keyspace. +extern const KeyRef globalConfigVersionKey; + // "\xff/workers/[[processID]]" := "" // Asynchronously updated by the cluster controller, this is a list of fdbserver processes that have joined the cluster // and are currently (recently) available @@ -355,8 +379,6 @@ extern const KeyRangeRef applyMutationsKeyVersionCountRange; // FdbClient Info prefix extern const KeyRangeRef fdbClientInfoPrefixRange; -extern const KeyRef fdbClientInfoTxnSampleRate; -extern const KeyRef fdbClientInfoTxnSizeLimit; // Consistency Check settings extern const KeyRef fdbShouldConsistencyCheckBeSuspended; diff --git a/fdbclient/ThreadSafeTransaction.cpp b/fdbclient/ThreadSafeTransaction.cpp index 0e0877f9af..c5bf2dce87 100644 --- a/fdbclient/ThreadSafeTransaction.cpp +++ b/fdbclient/ThreadSafeTransaction.cpp @@ -97,6 +97,15 @@ double ThreadSafeDatabase::getMainThreadBusyness() { return g_network->networkInfo.metrics.networkBusyness; } +// Returns the protocol version reported by a quorum of coordinators +// If an expected version is given, the future won't return until the protocol version is different than expected +ThreadFuture ThreadSafeDatabase::getServerProtocol(Optional expectedVersion) { + DatabaseContext* db = this->db; + return onMainThread([db, expectedVersion]() -> Future { + return getClusterProtocol(db->getConnectionFile(), expectedVersion); + }); +} + ThreadSafeDatabase::ThreadSafeDatabase(std::string connFilename, int apiVersion) { ClusterConnectionFile* connFile = new ClusterConnectionFile(ClusterConnectionFile::lookupClusterFileName(connFilename).first); @@ -407,16 +416,6 @@ const char* ThreadSafeApi::getClientVersion() { return clientVersion.c_str(); } -// Wait until a quorum of coordinators with the same protocol version are available, and then return that protocol -// version. -ThreadFuture ThreadSafeApi::getServerProtocol(const char* clusterFilePath) { - return onMainThread([clusterFilePath = std::string(clusterFilePath)]() -> Future { - auto [clusterFile, isDefault] = ClusterConnectionFile::lookupClusterFileName(clusterFilePath); - Reference f = Reference(new ClusterConnectionFile(clusterFile)); - return getCoordinatorProtocols(f); - }); -} - void ThreadSafeApi::setNetworkOption(FDBNetworkOptions::Option option, Optional value) { if (option == FDBNetworkOptions::EXTERNAL_CLIENT_TRANSPORT_ID) { if (value.present()) { diff --git a/fdbclient/ThreadSafeTransaction.h b/fdbclient/ThreadSafeTransaction.h index a62e503c11..e6360c2a6d 100644 --- a/fdbclient/ThreadSafeTransaction.h +++ b/fdbclient/ThreadSafeTransaction.h @@ -27,6 +27,8 @@ #include "fdbclient/ClusterInterface.h" #include "fdbclient/IClientApi.h" +// An implementation of IDatabase that serializes operations onto the network thread and interacts with the lower-level +// client APIs exposed by NativeAPI and ReadYourWrites. class ThreadSafeDatabase : public IDatabase, public ThreadSafeReferenceCounted { public: ~ThreadSafeDatabase() override; @@ -37,9 +39,14 @@ public: void setOption(FDBDatabaseOptions::Option option, Optional value = Optional()) override; double getMainThreadBusyness() override; - ThreadFuture - onConnected(); // Returns after a majority of coordination servers are available and have reported a leader. The - // cluster file therefore is valid, but the database might be unavailable. + // Returns the protocol version reported by a quorum of coordinators + // If an expected version is given, the future won't return until the protocol version is different than expected + ThreadFuture getServerProtocol( + Optional expectedVersion = Optional()) override; + + // Returns after a majority of coordination servers are available and have reported a leader. The + // cluster file therefore is valid, but the database might be unavailable. + ThreadFuture onConnected(); void addref() override { ThreadSafeReferenceCounted::addref(); } void delref() override { ThreadSafeReferenceCounted::delref(); } @@ -58,6 +65,8 @@ public: // Internal use only DatabaseContext* unsafeGetPtr() const { return db; } }; +// An implementation of ITransaction that serializes operations onto the network thread and interacts with the +// lower-level client APIs exposed by NativeAPI and ReadYourWrites. class ThreadSafeTransaction : public ITransaction, ThreadSafeReferenceCounted, NonCopyable { public: explicit ThreadSafeTransaction(DatabaseContext* cx); @@ -135,11 +144,12 @@ private: ReadYourWritesTransaction* tr; }; +// An implementation of IClientApi that serializes operations onto the network thread and interacts with the lower-level +// client APIs exposed by NativeAPI and ReadYourWrites. class ThreadSafeApi : public IClientApi, ThreadSafeReferenceCounted { public: void selectApiVersion(int apiVersion) override; const char* getClientVersion() override; - ThreadFuture getServerProtocol(const char* clusterFilePath) override; void setNetworkOption(FDBNetworkOptions::Option option, Optional value = Optional()) override; void setupNetwork() override; diff --git a/fdbclient/Tuple.cpp b/fdbclient/Tuple.cpp index 3d4427079f..367a7b80fb 100644 --- a/fdbclient/Tuple.cpp +++ b/fdbclient/Tuple.cpp @@ -20,7 +20,20 @@ #include "fdbclient/Tuple.h" -static size_t find_string_terminator(const StringRef data, size_t offset) { +// TODO: Many functions copied from bindings/flow/Tuple.cpp. Merge at some point. +static float bigEndianFloat(float orig) { + int32_t big = *(int32_t*)&orig; + big = bigEndian32(big); + return *(float*)&big; +} + +static double bigEndianDouble(double orig) { + int64_t big = *(int64_t*)&orig; + big = bigEndian64(big); + return *(double*)&big; +} + +static size_t findStringTerminator(const StringRef data, size_t offset) { size_t i = offset; while (i < data.size() - 1 && !(data[i] == '\x00' && data[i + 1] != (uint8_t)'\xff')) { i += (data[i] == '\x00' ? 2 : 1); @@ -29,6 +42,20 @@ static size_t find_string_terminator(const StringRef data, size_t offset) { return i; } +// If encoding and the sign bit is 1 (the number is negative), flip all the bits. +// If decoding and the sign bit is 0 (the number is negative), flip all the bits. +// Otherwise, the number is positive, so flip the sign bit. +static void adjustFloatingPoint(uint8_t* bytes, size_t size, bool encode) { + if ((encode && ((uint8_t)(bytes[0] & 0x80) != (uint8_t)0x00)) || + (!encode && ((uint8_t)(bytes[0] & 0x80) != (uint8_t)0x80))) { + for (size_t i = 0; i < size; i++) { + bytes[i] ^= (uint8_t)0xff; + } + } else { + bytes[0] ^= (uint8_t)0x80; + } +} + Tuple::Tuple(StringRef const& str, bool exclude_incomplete) { data.append(data.arena(), str.begin(), str.size()); @@ -37,9 +64,13 @@ Tuple::Tuple(StringRef const& str, bool exclude_incomplete) { offsets.push_back(i); if (data[i] == '\x01' || data[i] == '\x02') { - i = find_string_terminator(str, i + 1) + 1; + i = findStringTerminator(str, i + 1) + 1; } else if (data[i] >= '\x0c' && data[i] <= '\x1c') { i += abs(data[i] - '\x14') + 1; + } else if (data[i] == 0x20) { + i += sizeof(float) + 1; + } else if (data[i] == 0x21) { + i += sizeof(double) + 1; } else if (data[i] == '\x00') { i += 1; } else { @@ -113,6 +144,29 @@ Tuple& Tuple::append(int64_t value) { return *this; } +Tuple& Tuple::appendFloat(float value) { + offsets.push_back(data.size()); + float swap = bigEndianFloat(value); + uint8_t* bytes = (uint8_t*)&swap; + adjustFloatingPoint(bytes, sizeof(float), true); + + data.push_back(data.arena(), 0x20); + data.append(data.arena(), bytes, sizeof(float)); + return *this; +} + +Tuple& Tuple::appendDouble(double value) { + offsets.push_back(data.size()); + double swap = value; + swap = bigEndianDouble(swap); + uint8_t* bytes = (uint8_t*)&swap; + adjustFloatingPoint(bytes, sizeof(double), true); + + data.push_back(data.arena(), 0x21); + data.append(data.arena(), bytes, sizeof(double)); + return *this; +} + Tuple& Tuple::appendNull() { offsets.push_back(data.size()); data.push_back(data.arena(), (uint8_t)'\x00'); @@ -134,6 +188,10 @@ Tuple::ElementType Tuple::getType(size_t index) const { return ElementType::UTF8; } else if (code >= '\x0c' && code <= '\x1c') { return ElementType::INT; + } else if (code == 0x20) { + return ElementType::FLOAT; + } else if (code == 0x21) { + return ElementType::DOUBLE; } else { throw invalid_tuple_data_type(); } @@ -228,6 +286,45 @@ int64_t Tuple::getInt(size_t index, bool allow_incomplete) const { return swap; } +// TODO: Combine with bindings/flow/Tuple.*. This code is copied from there. +float Tuple::getFloat(size_t index) const { + if (index >= offsets.size()) { + throw invalid_tuple_index(); + } + ASSERT_LT(offsets[index], data.size()); + uint8_t code = data[offsets[index]]; + if (code != 0x20) { + throw invalid_tuple_data_type(); + } + + float swap; + uint8_t* bytes = (uint8_t*)&swap; + ASSERT_LE(offsets[index] + 1 + sizeof(float), data.size()); + swap = *(float*)(data.begin() + offsets[index] + 1); + adjustFloatingPoint(bytes, sizeof(float), false); + + return bigEndianFloat(swap); +} + +double Tuple::getDouble(size_t index) const { + if (index >= offsets.size()) { + throw invalid_tuple_index(); + } + ASSERT_LT(offsets[index], data.size()); + uint8_t code = data[offsets[index]]; + if (code != 0x21) { + throw invalid_tuple_data_type(); + } + + double swap; + uint8_t* bytes = (uint8_t*)&swap; + ASSERT_LE(offsets[index] + 1 + sizeof(double), data.size()); + swap = *(double*)(data.begin() + offsets[index] + 1); + adjustFloatingPoint(bytes, sizeof(double), false); + + return bigEndianDouble(swap); +} + KeyRange Tuple::range(Tuple const& tuple) const { VectorRef begin; VectorRef end; diff --git a/fdbclient/Tuple.h b/fdbclient/Tuple.h index b44edd73cc..3dc597f262 100644 --- a/fdbclient/Tuple.h +++ b/fdbclient/Tuple.h @@ -38,6 +38,10 @@ struct Tuple { Tuple& append(Tuple const& tuple); Tuple& append(StringRef const& str, bool utf8 = false); Tuple& append(int64_t); + // There are some ambiguous append calls in fdbclient, so to make it easier + // to add append for floats and doubles, name them differently for now. + Tuple& appendFloat(float); + Tuple& appendDouble(double); Tuple& appendNull(); StringRef pack() const { return StringRef(data.begin(), data.size()); } @@ -47,7 +51,7 @@ struct Tuple { return append(t); } - enum ElementType { NULL_TYPE, INT, BYTES, UTF8 }; + enum ElementType { NULL_TYPE, INT, BYTES, UTF8, FLOAT, DOUBLE }; // this is number of elements, not length of data size_t size() const { return offsets.size(); } @@ -55,6 +59,8 @@ struct Tuple { ElementType getType(size_t index) const; Standalone getString(size_t index) const; int64_t getInt(size_t index, bool allow_incomplete = false) const; + float getFloat(size_t index) const; + double getDouble(size_t index) const; KeyRange range(Tuple const& tuple = Tuple()) const; diff --git a/fdbrpc/AsyncFileNonDurable.actor.h b/fdbrpc/AsyncFileNonDurable.actor.h index b682a7741b..848d755fb1 100644 --- a/fdbrpc/AsyncFileNonDurable.actor.h +++ b/fdbrpc/AsyncFileNonDurable.actor.h @@ -197,7 +197,7 @@ private: this->file = file; this->filename = filename; this->diskParameters = diskParameters; - maxWriteDelay = 5.0; + maxWriteDelay = FLOW_KNOBS->NON_DURABLE_MAX_WRITE_DELAY; hasBeenSynced = false; killMode = (KillMode)deterministicRandom()->randomInt(1, 3); @@ -434,7 +434,8 @@ private: state TaskPriority currentTaskID = g_network->getCurrentTask(); wait(g_simulator.onMachine(currentProcess)); - state double delayDuration = deterministicRandom()->random01() * self->maxWriteDelay; + state double delayDuration = + g_simulator.speedUpSimulation ? 0.0001 : (deterministicRandom()->random01() * self->maxWriteDelay); state Standalone dataCopy(StringRef((uint8_t*)data, length)); state Future startSyncFuture = self->startSyncPromise.getFuture(); @@ -606,7 +607,8 @@ private: state TaskPriority currentTaskID = g_network->getCurrentTask(); wait(g_simulator.onMachine(currentProcess)); - state double delayDuration = deterministicRandom()->random01() * self->maxWriteDelay; + state double delayDuration = + g_simulator.speedUpSimulation ? 0.0001 : (deterministicRandom()->random01() * self->maxWriteDelay); state Future startSyncFuture = self->startSyncPromise.getFuture(); try { diff --git a/fdbrpc/Locality.cpp b/fdbrpc/Locality.cpp index 3cf70943e0..8cdc0751c4 100644 --- a/fdbrpc/Locality.cpp +++ b/fdbrpc/Locality.cpp @@ -63,7 +63,7 @@ ProcessClass::Fitness ProcessClass::machineClassFitness(ClusterRole role) const default: return ProcessClass::NeverAssign; } - case ProcessClass::CommitProxy: + case ProcessClass::CommitProxy: // Resolver, Master, CommitProxy, and GrvProxy need to be the same besides best fit switch (_class) { case ProcessClass::CommitProxyClass: return ProcessClass::BestFit; @@ -71,10 +71,6 @@ ProcessClass::Fitness ProcessClass::machineClassFitness(ClusterRole role) const return ProcessClass::GoodFit; case ProcessClass::UnsetClass: return ProcessClass::UnsetFit; - case ProcessClass::GrvProxyClass: - return ProcessClass::OkayFit; - case ProcessClass::ResolutionClass: - return ProcessClass::OkayFit; case ProcessClass::TransactionClass: return ProcessClass::OkayFit; case ProcessClass::CoordinatorClass: @@ -84,7 +80,7 @@ ProcessClass::Fitness ProcessClass::machineClassFitness(ClusterRole role) const default: return ProcessClass::WorstFit; } - case ProcessClass::GrvProxy: + case ProcessClass::GrvProxy: // Resolver, Master, CommitProxy, and GrvProxy need to be the same besides best fit switch (_class) { case ProcessClass::GrvProxyClass: return ProcessClass::BestFit; @@ -92,10 +88,6 @@ ProcessClass::Fitness ProcessClass::machineClassFitness(ClusterRole role) const return ProcessClass::GoodFit; case ProcessClass::UnsetClass: return ProcessClass::UnsetFit; - case ProcessClass::CommitProxyClass: - return ProcessClass::OkayFit; - case ProcessClass::ResolutionClass: - return ProcessClass::OkayFit; case ProcessClass::TransactionClass: return ProcessClass::OkayFit; case ProcessClass::CoordinatorClass: @@ -105,7 +97,7 @@ ProcessClass::Fitness ProcessClass::machineClassFitness(ClusterRole role) const default: return ProcessClass::WorstFit; } - case ProcessClass::Master: + case ProcessClass::Master: // Resolver, Master, CommitProxy, and GrvProxy need to be the same besides best fit switch (_class) { case ProcessClass::MasterClass: return ProcessClass::BestFit; @@ -113,7 +105,7 @@ ProcessClass::Fitness ProcessClass::machineClassFitness(ClusterRole role) const return ProcessClass::GoodFit; case ProcessClass::UnsetClass: return ProcessClass::UnsetFit; - case ProcessClass::ResolutionClass: + case ProcessClass::TransactionClass: return ProcessClass::OkayFit; case ProcessClass::CoordinatorClass: case ProcessClass::TesterClass: @@ -122,7 +114,7 @@ ProcessClass::Fitness ProcessClass::machineClassFitness(ClusterRole role) const default: return ProcessClass::WorstFit; } - case ProcessClass::Resolver: + case ProcessClass::Resolver: // Resolver, Master, CommitProxy, and GrvProxy need to be the same besides best fit switch (_class) { case ProcessClass::ResolutionClass: return ProcessClass::BestFit; @@ -147,8 +139,6 @@ ProcessClass::Fitness ProcessClass::machineClassFitness(ClusterRole role) const return ProcessClass::GoodFit; case ProcessClass::UnsetClass: return ProcessClass::UnsetFit; - case ProcessClass::ResolutionClass: - return ProcessClass::OkayFit; case ProcessClass::TransactionClass: return ProcessClass::OkayFit; case ProcessClass::CoordinatorClass: @@ -167,8 +157,6 @@ ProcessClass::Fitness ProcessClass::machineClassFitness(ClusterRole role) const return ProcessClass::GoodFit; case ProcessClass::UnsetClass: return ProcessClass::UnsetFit; - case ProcessClass::ResolutionClass: - return ProcessClass::OkayFit; case ProcessClass::TransactionClass: return ProcessClass::OkayFit; case ProcessClass::CoordinatorClass: diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 55770d6f3b..8ec3a4d30c 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -41,6 +41,7 @@ #include "fdbserver/Status.h" #include "fdbserver/LatencyBandConfig.h" #include "fdbclient/DatabaseContext.h" +#include "fdbclient/GlobalConfig.actor.h" #include "fdbserver/RecoveryState.h" #include "fdbclient/ReadYourWrites.h" #include "fdbrpc/Replication.h" @@ -345,6 +346,7 @@ public: Reference logServerSet; LocalityMap* logServerMap; bool bCompleted = false; + desired = std::max(required, desired); // Construct the list of DCs where the TLog recruitment is happening. This is mainly for logging purpose. std::string dcList; @@ -407,98 +409,44 @@ public: // This worker is a candidate for TLog recruitment. bool inCCDC = worker_details.interf.locality.dcId() == clusterControllerDcId; + // Prefer recruiting a TransactionClass non-degraded process over a LogClass degraded process + if (worker_details.degraded) { + fitness = std::max(fitness, ProcessClass::GoodFit); + } fitness_workers[std::make_tuple(fitness, id_used[worker_process_id], worker_details.degraded, inCCDC)] .push_back(worker_details); } - // FIXME: it's not clear whether this is necessary. - for (int fitness = ProcessClass::BestFit; fitness != ProcessClass::NeverAssign; fitness++) { - auto fitnessEnum = (ProcessClass::Fitness)fitness; - for (int addingDegraded = 0; addingDegraded < 2; addingDegraded++) { - fitness_workers[std::make_tuple(fitnessEnum, 0, addingDegraded, false)]; - } - } - results.reserve(results.size() + id_worker.size()); + auto requiredFitness = ProcessClass::BestFit; + int requiredUsed = 0; + bool requiredDegraded = false; + bool requiredInCCDC = false; + + // Determine the minimum fitness and used necessary to fulfill the policy for (auto workerIter = fitness_workers.begin(); workerIter != fitness_workers.end(); ++workerIter) { auto fitness = std::get<0>(workerIter->first); auto used = std::get<1>(workerIter->first); - auto addingDegraded = std::get<2>(workerIter->first); - ASSERT(fitness < ProcessClass::NeverAssign); - if (bCompleted) { - break; + if (fitness > requiredFitness || used > requiredUsed) { + if (logServerSet->size() >= required && logServerSet->validate(policy)) { + bCompleted = true; + break; + } + requiredFitness = fitness; + requiredUsed = used; } + if (std::get<2>(workerIter->first)) { + requiredDegraded = true; + } + if (std::get<3>(workerIter->first)) { + requiredInCCDC = true; + } for (auto& worker : workerIter->second) { logServerMap->add(worker.interf.locality, &worker); } - - if (logServerSet->size() < (std::get<2>(workerIter->first) ? required : desired)) { - } else if (logServerSet->size() == required || logServerSet->size() <= desired) { - if (logServerSet->validate(policy)) { - for (auto& object : logServerMap->getObjects()) { - results.push_back(*object); - } - bCompleted = true; - break; - } - TraceEvent(SevWarn, "GWFTADNotAcceptable", id) - .detail("DcIds", dcList) - .detail("Fitness", fitness) - .detail("Processes", logServerSet->size()) - .detail("Required", required) - .detail("TLogPolicy", policy->info()) - .detail("DesiredLogs", desired) - .detail("Used", used) - .detail("AddingDegraded", addingDegraded); - } - // Try to select the desired size, if larger - else { - std::vector bestSet; - std::vector tLocalities; - - // Try to find the best team of servers to fulfill the policy - if (findBestPolicySet(bestSet, - logServerSet, - policy, - desired, - SERVER_KNOBS->POLICY_RATING_TESTS, - SERVER_KNOBS->POLICY_GENERATIONS)) { - results.reserve(results.size() + bestSet.size()); - for (auto& entry : bestSet) { - auto object = logServerMap->getObject(entry); - ASSERT(object); - results.push_back(*object); - tLocalities.push_back(object->interf.locality); - } - TraceEvent("GWFTADBestResults", id) - .detail("DcIds", dcList) - .detail("Fitness", fitness) - .detail("Used", used) - .detail("Processes", logServerSet->size()) - .detail("BestCount", bestSet.size()) - .detail("BestZones", ::describeZones(tLocalities)) - .detail("BestDataHalls", ::describeDataHalls(tLocalities)) - .detail("TLogPolicy", policy->info()) - .detail("TotalResults", results.size()) - .detail("DesiredLogs", desired) - .detail("AddingDegraded", addingDegraded); - bCompleted = true; - break; - } - TraceEvent(SevWarn, "GWFTADNoBest", id) - .detail("DcIds", dcList) - .detail("Fitness", fitness) - .detail("Used", used) - .detail("Processes", logServerSet->size()) - .detail("Required", required) - .detail("TLogPolicy", policy->info()) - .detail("DesiredLogs", desired) - .detail("AddingDegraded", addingDegraded); - } } - // If policy cannot be satisfied - if (!bCompleted) { + if (!bCompleted && !(logServerSet->size() >= required && logServerSet->validate(policy))) { std::vector tLocalities; for (auto& object : logServerMap->getObjects()) { tLocalities.push_back(object->interf.locality); @@ -516,33 +464,160 @@ public: .detail("MissingDataHalls", ::describeDataHalls(unavailableLocals)) .detail("Required", required) .detail("DesiredLogs", desired) - .detail("RatingTests", SERVER_KNOBS->POLICY_RATING_TESTS) .detail("CheckStable", checkStable) - .detail("NumExclusionWorkers", exclusionWorkerIds.size()) - .detail("PolicyGenerations", SERVER_KNOBS->POLICY_GENERATIONS) - .backtrace(); + .detail("NumExclusionWorkers", exclusionWorkerIds.size()); logServerSet->clear(); logServerSet.clear(); throw no_more_servers(); } + // If we have less than the desired amount, return all of the processes we have + if (logServerSet->size() <= desired) { + for (auto& object : logServerMap->getObjects()) { + results.push_back(*object); + } + for (auto& result : results) { + id_used[result.interf.locality.processId()]++; + } + TraceEvent("GetTLogTeamDone") + .detail("DcIds", dcList) + .detail("Policy", policy->info()) + .detail("Results", results.size()) + .detail("Processes", logServerSet->size()) + .detail("Workers", id_worker.size()) + .detail("Required", required) + .detail("Desired", desired) + .detail("Fitness", requiredFitness) + .detail("Used", requiredUsed) + .detail("AddingDegraded", requiredDegraded) + .detail("InCCDC", requiredInCCDC); + return results; + } + + // If we have added any degraded processes, try and remove them to see if we can still + // have the desired amount of processes + if (requiredDegraded) { + logServerMap->clear(); + for (auto workerIter = fitness_workers.begin(); workerIter != fitness_workers.end(); ++workerIter) { + auto fitness = std::get<0>(workerIter->first); + auto used = std::get<1>(workerIter->first); + if (fitness > requiredFitness || (fitness == requiredFitness && used > requiredUsed)) { + break; + } + auto addingDegraded = std::get<2>(workerIter->first); + if (addingDegraded) { + continue; + } + for (auto& worker : workerIter->second) { + logServerMap->add(worker.interf.locality, &worker); + } + } + if (logServerSet->size() >= desired && logServerSet->validate(policy)) { + requiredDegraded = false; + } + } + + // If we have added any processes in the CC DC, try and remove them to see if we can still + // have the desired amount of processes + if (requiredInCCDC) { + logServerMap->clear(); + for (auto workerIter = fitness_workers.begin(); workerIter != fitness_workers.end(); ++workerIter) { + auto fitness = std::get<0>(workerIter->first); + auto used = std::get<1>(workerIter->first); + if (fitness > requiredFitness || (fitness == requiredFitness && used > requiredUsed)) { + break; + } + auto addingDegraded = std::get<2>(workerIter->first); + auto inCCDC = std::get<3>(workerIter->first); + if (inCCDC || (!requiredDegraded && addingDegraded)) { + continue; + } + for (auto& worker : workerIter->second) { + logServerMap->add(worker.interf.locality, &worker); + } + } + if (logServerSet->size() >= desired && logServerSet->validate(policy)) { + requiredInCCDC = false; + } + } + + logServerMap->clear(); + for (auto workerIter = fitness_workers.begin(); workerIter != fitness_workers.end(); ++workerIter) { + auto fitness = std::get<0>(workerIter->first); + auto used = std::get<1>(workerIter->first); + if (fitness > requiredFitness || (fitness == requiredFitness && used > requiredUsed)) { + break; + } + auto addingDegraded = std::get<2>(workerIter->first); + auto inCCDC = std::get<3>(workerIter->first); + if ((!requiredInCCDC && inCCDC) || (!requiredDegraded && addingDegraded)) { + continue; + } + for (auto& worker : workerIter->second) { + logServerMap->add(worker.interf.locality, &worker); + } + } + + if (logServerSet->size() == desired) { + for (auto& object : logServerMap->getObjects()) { + results.push_back(*object); + } + for (auto& result : results) { + id_used[result.interf.locality.processId()]++; + } + TraceEvent("GetTLogTeamDone") + .detail("DcIds", dcList) + .detail("Policy", policy->info()) + .detail("Results", results.size()) + .detail("Processes", logServerSet->size()) + .detail("Workers", id_worker.size()) + .detail("Required", required) + .detail("Desired", desired) + .detail("Fitness", requiredFitness) + .detail("Used", requiredUsed) + .detail("AddingDegraded", requiredDegraded) + .detail("InCCDC", requiredInCCDC); + return results; + } + + std::vector bestSet; + std::vector tLocalities; + + // We have more than the desired number of processes, so use the policy engine to + // pick a diverse subset of them + bCompleted = findBestPolicySet(bestSet, + logServerSet, + policy, + desired, + SERVER_KNOBS->POLICY_RATING_TESTS, + SERVER_KNOBS->POLICY_GENERATIONS); + ASSERT(bCompleted); + results.reserve(results.size() + bestSet.size()); + for (auto& entry : bestSet) { + auto object = logServerMap->getObject(entry); + ASSERT(object); + results.push_back(*object); + tLocalities.push_back(object->interf.locality); + } for (auto& result : results) { id_used[result.interf.locality.processId()]++; } - TraceEvent("GetTLogTeamDone") .detail("DcIds", dcList) - .detail("Completed", bCompleted) .detail("Policy", policy->info()) .detail("Results", results.size()) .detail("Processes", logServerSet->size()) .detail("Workers", id_worker.size()) .detail("Required", required) .detail("Desired", desired) - .detail("RatingTests", SERVER_KNOBS->POLICY_RATING_TESTS) - .detail("PolicyGenerations", SERVER_KNOBS->POLICY_GENERATIONS); - + .detail("Fitness", requiredFitness) + .detail("Used", requiredUsed) + .detail("AddingDegraded", requiredDegraded) + .detail("InCCDC", requiredInCCDC) + .detail("BestCount", bestSet.size()) + .detail("BestZones", ::describeZones(tLocalities)) + .detail("BestDataHalls", ::describeDataHalls(tLocalities)); return results; } @@ -653,14 +728,15 @@ public: return bestFitness; } - WorkerFitnessInfo getWorkerForRoleInDatacenter(Optional> const& dcId, - ProcessClass::ClusterRole role, - ProcessClass::Fitness unacceptableFitness, - DatabaseConfiguration const& conf, - std::map>, int>& id_used, - bool checkStable = false) { - std::map, std::pair, vector>> - fitness_workers; + WorkerFitnessInfo getWorkerForRoleInDatacenter( + Optional> const& dcId, + ProcessClass::ClusterRole role, + ProcessClass::Fitness unacceptableFitness, + DatabaseConfiguration const& conf, + std::map>, int>& id_used, + Optional> preferredSharing = Optional>(), + bool checkStable = false) { + std::map, vector> fitness_workers; for (auto& it : id_worker) { auto fitness = it.second.details.processClass.machineClassFitness(role); @@ -669,23 +745,20 @@ public: } if (workerAvailable(it.second, checkStable) && fitness < unacceptableFitness && it.second.details.interf.locality.dcId() == dcId) { - if (isLongLivedStateless(it.first)) { - fitness_workers[std::make_pair(fitness, id_used[it.first])].second.push_back(it.second.details); - } else { - fitness_workers[std::make_pair(fitness, id_used[it.first])].first.push_back(it.second.details); - } + fitness_workers[std::make_tuple(fitness, + id_used[it.first], + isLongLivedStateless(it.first), + preferredSharing != it.first)] + .push_back(it.second.details); } } - for (auto& it : fitness_workers) { - for (int j = 0; j < 2; j++) { - auto& w = j == 0 ? it.second.first : it.second.second; - deterministicRandom()->randomShuffle(w); - for (int i = 0; i < w.size(); i++) { - id_used[w[i].interf.locality.processId()]++; - return WorkerFitnessInfo(w[i], std::max(ProcessClass::GoodFit, it.first.first), it.first.second); - } - } + if (fitness_workers.size()) { + auto worker = deterministicRandom()->randomChoice(fitness_workers.begin()->second); + id_used[worker.interf.locality.processId()]++; + return WorkerFitnessInfo(worker, + std::max(ProcessClass::GoodFit, std::get<0>(fitness_workers.begin()->first)), + std::get<1>(fitness_workers.begin()->first)); } throw no_more_servers(); @@ -742,130 +815,113 @@ public: return results; } + // Allows the comparison of two different recruitments to determine which one is better + // Tlog recruitment is different from all the other roles, in that it avoids degraded processes + // And tried to avoid recruitment in the same DC as the cluster controller struct RoleFitness { ProcessClass::Fitness bestFit; ProcessClass::Fitness worstFit; ProcessClass::ClusterRole role; int count; - bool worstIsDegraded; + int worstUsed = 1; + bool degraded = false; + bool inClusterControllerDC = false; RoleFitness(int bestFit, int worstFit, int count, ProcessClass::ClusterRole role) : bestFit((ProcessClass::Fitness)bestFit), worstFit((ProcessClass::Fitness)worstFit), count(count), - role(role), worstIsDegraded(false) {} + role(role) {} RoleFitness(int fitness, int count, ProcessClass::ClusterRole role) - : bestFit((ProcessClass::Fitness)fitness), worstFit((ProcessClass::Fitness)fitness), count(count), role(role), - worstIsDegraded(false) {} + : bestFit((ProcessClass::Fitness)fitness), worstFit((ProcessClass::Fitness)fitness), count(count), + role(role) {} RoleFitness() : bestFit(ProcessClass::NeverAssign), worstFit(ProcessClass::NeverAssign), role(ProcessClass::NoRole), - count(0), worstIsDegraded(false) {} + count(0) {} - RoleFitness(vector workers, ProcessClass::ClusterRole role) : role(role) { + RoleFitness(const vector& workers, + ProcessClass::ClusterRole role, + const std::map>, int>& id_used, + Optional> ccDcId) + : role(role) { + // Every recruitment will attempt to recruit the preferred amount through GoodFit, + // So a recruitment which only has BestFit is not better than one that has a GoodFit process worstFit = ProcessClass::GoodFit; - worstIsDegraded = false; + + degraded = false; + inClusterControllerDC = false; bestFit = ProcessClass::NeverAssign; + worstUsed = 1; for (auto& it : workers) { auto thisFit = it.processClass.machineClassFitness(role); - if (thisFit > worstFit) { - worstFit = thisFit; - worstIsDegraded = it.degraded; - } else if (thisFit == worstFit) { - worstIsDegraded = worstIsDegraded || it.degraded; - } + worstFit = std::max(worstFit, thisFit); bestFit = std::min(bestFit, thisFit); + degraded = it.degraded || degraded; + inClusterControllerDC = (it.interf.locality.dcId() == ccDcId) || inClusterControllerDC; + + auto thisUsed = id_used.find(it.interf.locality.processId()); + if (thisUsed == id_used.end()) { + TraceEvent(SevError, "UsedNotFound").detail("ProcessId", it.interf.locality.processId().get()); + ASSERT(false); + } + if (thisUsed->second == 0) { + TraceEvent(SevError, "UsedIsZero").detail("ProcessId", it.interf.locality.processId().get()); + ASSERT(false); + } + worstUsed = std::max(worstUsed, thisUsed->second); } + count = workers.size(); + // degraded is only used for recruitment of tlogs + // only tlogs avoid the cluster controller dc if (role != ProcessClass::TLog) { - worstIsDegraded = false; + degraded = false; + inClusterControllerDC = false; } } bool operator<(RoleFitness const& r) const { if (worstFit != r.worstFit) return worstFit < r.worstFit; - if (worstIsDegraded != r.worstIsDegraded) - return r.worstIsDegraded; + if (worstUsed != r.worstUsed) + return worstUsed < r.worstUsed; + if (count != r.count) + return count > r.count; + if (degraded != r.degraded) + return r.degraded; + if (inClusterControllerDC != r.inClusterControllerDC) + return r.inClusterControllerDC; // FIXME: TLog recruitment process does not guarantee the best fit is not worsened. if (role != ProcessClass::TLog && role != ProcessClass::LogRouter && bestFit != r.bestFit) return bestFit < r.bestFit; - return count > r.count; + return false; } bool operator>(RoleFitness const& r) const { return r < *this; } bool operator<=(RoleFitness const& r) const { return !(*this > r); } bool operator>=(RoleFitness const& r) const { return !(*this < r); } - bool betterFitness(RoleFitness const& r) const { - if (worstFit != r.worstFit) - return worstFit < r.worstFit; - if (worstIsDegraded != r.worstIsDegraded) - return r.worstFit; - if (bestFit != r.bestFit) - return bestFit < r.bestFit; - return false; - } - bool betterCount(RoleFitness const& r) const { if (count > r.count) return true; if (worstFit != r.worstFit) return worstFit < r.worstFit; - if (worstIsDegraded != r.worstIsDegraded) - return r.worstFit; + if (worstUsed != r.worstUsed) + return worstUsed < r.worstUsed; + if (degraded != r.degraded) + return r.degraded; + if (inClusterControllerDC != r.inClusterControllerDC) + return r.inClusterControllerDC; return false; } bool operator==(RoleFitness const& r) const { - return worstFit == r.worstFit && bestFit == r.bestFit && count == r.count && - worstIsDegraded == r.worstIsDegraded; + return worstFit == r.worstFit && worstUsed == r.worstUsed && bestFit == r.bestFit && count == r.count && + degraded == r.degraded && inClusterControllerDC == r.inClusterControllerDC; } - std::string toString() const { return format("%d %d %d %d", bestFit, worstFit, count, worstIsDegraded); } - }; - - struct RoleFitnessPair { - RoleFitness proxy; - RoleFitness grvProxy; - RoleFitness resolver; - - RoleFitnessPair() {} - RoleFitnessPair(RoleFitness const& proxy, RoleFitness const& grvProxy, RoleFitness const& resolver) - : proxy(proxy), grvProxy(grvProxy), resolver(resolver) {} - - bool operator<(RoleFitnessPair const& r) const { - if (proxy.betterFitness(r.proxy)) { - return true; - } - if (r.proxy.betterFitness(proxy)) { - return false; - } - if (grvProxy.betterFitness(r.grvProxy)) { - return true; - } - if (r.grvProxy.betterFitness(grvProxy)) { - return false; - } - if (resolver.betterFitness(r.resolver)) { - return true; - } - if (r.resolver.betterFitness(resolver)) { - return false; - } - if (proxy.count != r.proxy.count) { - return proxy.count > r.proxy.count; - } - if (grvProxy.count != r.grvProxy.count) { - return grvProxy.count > r.grvProxy.count; - } - return resolver.count > r.resolver.count; - } - bool operator>(RoleFitnessPair const& r) const { return r < *this; } - bool operator<=(RoleFitnessPair const& r) const { return !(*this > r); } - bool operator>=(RoleFitnessPair const& r) const { return !(*this < r); } - - bool operator==(RoleFitnessPair const& r) const { - return proxy == r.proxy && grvProxy == r.grvProxy && resolver == r.resolver; + std::string toString() const { + return format("%d %d %d %d %d %d", worstFit, worstUsed, count, degraded, inClusterControllerDC, bestFit); } }; @@ -914,9 +970,9 @@ public: if (!goodRemoteRecruitmentTime.isReady() && ((RoleFitness( SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredRemoteLogs(), ProcessClass::TLog) - .betterCount(RoleFitness(remoteLogs, ProcessClass::TLog))) || + .betterCount(RoleFitness(remoteLogs, ProcessClass::TLog, id_used, clusterControllerDcId))) || (RoleFitness(SERVER_KNOBS->EXPECTED_LOG_ROUTER_FITNESS, req.logRouterCount, ProcessClass::LogRouter) - .betterCount(RoleFitness(logRouters, ProcessClass::LogRouter))))) { + .betterCount(RoleFitness(logRouters, ProcessClass::LogRouter, id_used, clusterControllerDcId))))) { throw operation_failed(); } @@ -975,10 +1031,25 @@ public: auto first_commit_proxy = getWorkerForRoleInDatacenter( dcId, ProcessClass::CommitProxy, ProcessClass::ExcludeFit, req.configuration, id_used); - auto first_grv_proxy = getWorkerForRoleInDatacenter( - dcId, ProcessClass::GrvProxy, ProcessClass::ExcludeFit, req.configuration, id_used); - auto first_resolver = getWorkerForRoleInDatacenter( - dcId, ProcessClass::Resolver, ProcessClass::ExcludeFit, req.configuration, id_used); + auto first_grv_proxy = getWorkerForRoleInDatacenter(dcId, + ProcessClass::GrvProxy, + ProcessClass::ExcludeFit, + req.configuration, + id_used, + first_commit_proxy.worker.interf.locality.processId()); + auto first_resolver = getWorkerForRoleInDatacenter(dcId, + ProcessClass::Resolver, + ProcessClass::ExcludeFit, + req.configuration, + id_used, + first_commit_proxy.worker.interf.locality.processId()); + + // If one of the first process recruitments is forced to share a process, allow all of next recruitments + // to also share a process. + auto maxUsed = std::max({ first_commit_proxy.used, first_grv_proxy.used, first_resolver.used }); + first_commit_proxy.used = maxUsed; + first_grv_proxy.used = maxUsed; + first_resolver.used = maxUsed; auto commit_proxies = getWorkersForRoleInDatacenter(dcId, ProcessClass::CommitProxy, @@ -1031,24 +1102,24 @@ public: if (!goodRecruitmentTime.isReady() && (RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs(), ProcessClass::TLog) - .betterCount(RoleFitness(tlogs, ProcessClass::TLog)) || + .betterCount(RoleFitness(tlogs, ProcessClass::TLog, id_used, clusterControllerDcId)) || (region.satelliteTLogReplicationFactor > 0 && req.configuration.usableRegions > 1 && RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredSatelliteLogs(dcId), ProcessClass::TLog) - .betterCount(RoleFitness(satelliteLogs, ProcessClass::TLog))) || + .betterCount(RoleFitness(satelliteLogs, ProcessClass::TLog, id_used, clusterControllerDcId))) || RoleFitness(SERVER_KNOBS->EXPECTED_COMMIT_PROXY_FITNESS, req.configuration.getDesiredCommitProxies(), ProcessClass::CommitProxy) - .betterCount(RoleFitness(commit_proxies, ProcessClass::CommitProxy)) || + .betterCount(RoleFitness(commit_proxies, ProcessClass::CommitProxy, id_used, clusterControllerDcId)) || RoleFitness(SERVER_KNOBS->EXPECTED_GRV_PROXY_FITNESS, req.configuration.getDesiredGrvProxies(), ProcessClass::GrvProxy) - .betterCount(RoleFitness(grv_proxies, ProcessClass::GrvProxy)) || + .betterCount(RoleFitness(grv_proxies, ProcessClass::GrvProxy, id_used, clusterControllerDcId)) || RoleFitness(SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS, req.configuration.getDesiredResolvers(), ProcessClass::Resolver) - .betterCount(RoleFitness(resolvers, ProcessClass::Resolver)))) { + .betterCount(RoleFitness(resolvers, ProcessClass::Resolver, id_used, clusterControllerDcId)))) { return operation_failed(); } @@ -1062,9 +1133,15 @@ public: std::swap(regions[0], regions[1]); } - if (regions[1].dcId == clusterControllerDcId.get() && regions[1].priority >= 0 && + if (regions[1].dcId == clusterControllerDcId.get() && (!versionDifferenceUpdated || datacenterVersionDifference >= SERVER_KNOBS->MAX_VERSION_DIFFERENCE)) { - std::swap(regions[0], regions[1]); + if (regions[1].priority >= 0) { + std::swap(regions[0], regions[1]); + } else { + TraceEvent(SevWarnAlways, "CCDcPriorityNegative") + .detail("DcId", regions[1].dcId) + .detail("Priority", regions[1].priority); + } } bool setPrimaryDesired = false; @@ -1149,7 +1226,7 @@ public: auto datacenters = getDatacenters(req.configuration); - RoleFitnessPair bestFitness; + std::tuple bestFitness; int numEquivalent = 1; Optional bestDC; @@ -1160,10 +1237,27 @@ public: auto used = id_used; auto first_commit_proxy = getWorkerForRoleInDatacenter( dcId, ProcessClass::CommitProxy, ProcessClass::ExcludeFit, req.configuration, used); - auto first_grv_proxy = getWorkerForRoleInDatacenter( - dcId, ProcessClass::GrvProxy, ProcessClass::ExcludeFit, req.configuration, used); - auto first_resolver = getWorkerForRoleInDatacenter( - dcId, ProcessClass::Resolver, ProcessClass::ExcludeFit, req.configuration, used); + auto first_grv_proxy = + getWorkerForRoleInDatacenter(dcId, + ProcessClass::GrvProxy, + ProcessClass::ExcludeFit, + req.configuration, + used, + first_commit_proxy.worker.interf.locality.processId()); + auto first_resolver = + getWorkerForRoleInDatacenter(dcId, + ProcessClass::Resolver, + ProcessClass::ExcludeFit, + req.configuration, + used, + first_commit_proxy.worker.interf.locality.processId()); + + // If one of the first process recruitments is forced to share a process, allow all of next + // recruitments to also share a process. + auto maxUsed = std::max({ first_commit_proxy.used, first_grv_proxy.used, first_resolver.used }); + first_commit_proxy.used = maxUsed; + first_grv_proxy.used = maxUsed; + first_resolver.used = maxUsed; auto commit_proxies = getWorkersForRoleInDatacenter(dcId, ProcessClass::CommitProxy, @@ -1186,9 +1280,10 @@ public: used, first_resolver); - RoleFitnessPair fitness(RoleFitness(commit_proxies, ProcessClass::CommitProxy), - RoleFitness(grv_proxies, ProcessClass::GrvProxy), - RoleFitness(resolvers, ProcessClass::Resolver)); + auto fitness = std::make_tuple( + RoleFitness(commit_proxies, ProcessClass::CommitProxy, used, clusterControllerDcId), + RoleFitness(grv_proxies, ProcessClass::GrvProxy, used, clusterControllerDcId), + RoleFitness(resolvers, ProcessClass::Resolver, used, clusterControllerDcId)); if (dcId == clusterControllerDcId) { bestFitness = fitness; @@ -1206,7 +1301,7 @@ public: if (req.configuration.backupWorkerEnabled) { const int nBackup = std::max(tlogs.size(), req.maxOldLogRouters); auto backupWorkers = getWorkersForRoleInDatacenter( - dcId, ProcessClass::Backup, nBackup, req.configuration, id_used); + dcId, ProcessClass::Backup, nBackup, req.configuration, used); std::transform(backupWorkers.begin(), backupWorkers.end(), std::back_inserter(result.backupWorkers), @@ -1254,19 +1349,19 @@ public: if (!goodRecruitmentTime.isReady() && (RoleFitness( SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs(), ProcessClass::TLog) - .betterCount(RoleFitness(tlogs, ProcessClass::TLog)) || + .betterCount(RoleFitness(tlogs, ProcessClass::TLog, id_used, clusterControllerDcId)) || RoleFitness(SERVER_KNOBS->EXPECTED_COMMIT_PROXY_FITNESS, req.configuration.getDesiredCommitProxies(), ProcessClass::CommitProxy) - .betterCount(bestFitness.proxy) || + .betterCount(std::get<0>(bestFitness)) || RoleFitness(SERVER_KNOBS->EXPECTED_GRV_PROXY_FITNESS, req.configuration.getDesiredGrvProxies(), ProcessClass::GrvProxy) - .betterCount(bestFitness.grvProxy) || + .betterCount(std::get<1>(bestFitness)) || RoleFitness(SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS, req.configuration.getDesiredResolvers(), ProcessClass::Resolver) - .betterCount(bestFitness.resolver))) { + .betterCount(std::get<2>(bestFitness)))) { throw operation_failed(); } @@ -1284,10 +1379,20 @@ public: try { std::map>, int> id_used; - getWorkerForRoleInDatacenter( - regions[0].dcId, ProcessClass::ClusterController, ProcessClass::ExcludeFit, db.config, id_used, true); - getWorkerForRoleInDatacenter( - regions[0].dcId, ProcessClass::Master, ProcessClass::ExcludeFit, db.config, id_used, true); + getWorkerForRoleInDatacenter(regions[0].dcId, + ProcessClass::ClusterController, + ProcessClass::ExcludeFit, + db.config, + id_used, + Optional>(), + true); + getWorkerForRoleInDatacenter(regions[0].dcId, + ProcessClass::Master, + ProcessClass::ExcludeFit, + db.config, + id_used, + Optional>(), + true); std::set> primaryDC; primaryDC.insert(regions[0].dcId); @@ -1303,12 +1408,27 @@ public: getWorkersForSatelliteLogs(db.config, regions[0], regions[1], id_used, satelliteFallback, true); } - getWorkerForRoleInDatacenter( - regions[0].dcId, ProcessClass::Resolver, ProcessClass::ExcludeFit, db.config, id_used, true); - getWorkerForRoleInDatacenter( - regions[0].dcId, ProcessClass::CommitProxy, ProcessClass::ExcludeFit, db.config, id_used, true); - getWorkerForRoleInDatacenter( - regions[0].dcId, ProcessClass::GrvProxy, ProcessClass::ExcludeFit, db.config, id_used, true); + getWorkerForRoleInDatacenter(regions[0].dcId, + ProcessClass::Resolver, + ProcessClass::ExcludeFit, + db.config, + id_used, + Optional>(), + true); + getWorkerForRoleInDatacenter(regions[0].dcId, + ProcessClass::CommitProxy, + ProcessClass::ExcludeFit, + db.config, + id_used, + Optional>(), + true); + getWorkerForRoleInDatacenter(regions[0].dcId, + ProcessClass::GrvProxy, + ProcessClass::ExcludeFit, + db.config, + id_used, + Optional>(), + true); vector> dcPriority; dcPriority.push_back(regions[0].dcId); @@ -1337,6 +1457,12 @@ public: } } + void updateIdUsed(const vector& workers, std::map>, int>& id_used) { + for (auto& it : workers) { + id_used[it.interf.locality.processId()]++; + } + } + // FIXME: determine when to fail the cluster controller when a primaryDC has not been set // This function returns true when the cluster controller determines it is worth forcing @@ -1351,6 +1477,7 @@ public: // Do not trigger better master exists if the cluster controller is excluded, since the master will change // anyways once the cluster controller is moved if (id_worker[clusterControllerProcessId].priorityInfo.isExcluded) { + TraceEvent("NewRecruitmentIsWorse", id).detail("Reason", "ClusterControllerExcluded"); return false; } @@ -1363,6 +1490,9 @@ public: // Get master process auto masterWorker = id_worker.find(dbi.master.locality.processId()); if (masterWorker == id_worker.end()) { + TraceEvent("NewRecruitmentIsWorse", id) + .detail("Reason", "CannotFindMaster") + .detail("ProcessID", dbi.master.locality.processId()); return false; } @@ -1378,10 +1508,18 @@ public: for (auto& logSet : dbi.logSystemConfig.tLogs) { for (auto& it : logSet.tLogs) { auto tlogWorker = id_worker.find(it.interf().filteredLocality.processId()); - if (tlogWorker == id_worker.end()) + if (tlogWorker == id_worker.end()) { + TraceEvent("NewRecruitmentIsWorse", id) + .detail("Reason", "CannotFindTLog") + .detail("ProcessID", it.interf().filteredLocality.processId()); return false; - if (tlogWorker->second.priorityInfo.isExcluded) + } + if (tlogWorker->second.priorityInfo.isExcluded) { + TraceEvent("BetterMasterExists", id) + .detail("Reason", "TLogExcluded") + .detail("ProcessID", it.interf().filteredLocality.processId()); return true; + } if (logSet.isLocal && logSet.locality == tagLocalitySatellite) { satellite_tlogs.push_back(tlogWorker->second.details); @@ -1394,10 +1532,18 @@ public: for (auto& it : logSet.logRouters) { auto tlogWorker = id_worker.find(it.interf().filteredLocality.processId()); - if (tlogWorker == id_worker.end()) + if (tlogWorker == id_worker.end()) { + TraceEvent("NewRecruitmentIsWorse", id) + .detail("Reason", "CannotFindLogRouter") + .detail("ProcessID", it.interf().filteredLocality.processId()); return false; - if (tlogWorker->second.priorityInfo.isExcluded) + } + if (tlogWorker->second.priorityInfo.isExcluded) { + TraceEvent("BetterMasterExists", id) + .detail("Reason", "LogRouterExcluded") + .detail("ProcessID", it.interf().filteredLocality.processId()); return true; + } if (!logRouterAddresses.count(tlogWorker->second.details.interf.address())) { logRouterAddresses.insert(tlogWorker->second.details.interf.address()); log_routers.push_back(tlogWorker->second.details); @@ -1406,10 +1552,18 @@ public: for (const auto& worker : logSet.backupWorkers) { auto workerIt = id_worker.find(worker.interf().locality.processId()); - if (workerIt == id_worker.end()) + if (workerIt == id_worker.end()) { + TraceEvent("NewRecruitmentIsWorse", id) + .detail("Reason", "CannotFindBackupWorker") + .detail("ProcessID", worker.interf().locality.processId()); return false; - if (workerIt->second.priorityInfo.isExcluded) + } + if (workerIt->second.priorityInfo.isExcluded) { + TraceEvent("BetterMasterExists", id) + .detail("Reason", "BackupWorkerExcluded") + .detail("ProcessID", worker.interf().locality.processId()); return true; + } if (backup_addresses.count(workerIt->second.details.interf.address()) == 0) { backup_addresses.insert(workerIt->second.details.interf.address()); backup_workers.push_back(workerIt->second.details); @@ -1421,10 +1575,18 @@ public: std::vector commitProxyClasses; for (auto& it : dbi.client.commitProxies) { auto commitProxyWorker = id_worker.find(it.processId); - if (commitProxyWorker == id_worker.end()) + if (commitProxyWorker == id_worker.end()) { + TraceEvent("NewRecruitmentIsWorse", id) + .detail("Reason", "CannotFindCommitProxy") + .detail("ProcessID", it.processId); return false; - if (commitProxyWorker->second.priorityInfo.isExcluded) + } + if (commitProxyWorker->second.priorityInfo.isExcluded) { + TraceEvent("BetterMasterExists", id) + .detail("Reason", "CommitProxyExcluded") + .detail("ProcessID", it.processId); return true; + } commitProxyClasses.push_back(commitProxyWorker->second.details); } @@ -1432,10 +1594,18 @@ public: std::vector grvProxyClasses; for (auto& it : dbi.client.grvProxies) { auto grvProxyWorker = id_worker.find(it.processId); - if (grvProxyWorker == id_worker.end()) + if (grvProxyWorker == id_worker.end()) { + TraceEvent("NewRecruitmentIsWorse", id) + .detail("Reason", "CannotFindGrvProxy") + .detail("ProcessID", it.processId); return false; - if (grvProxyWorker->second.priorityInfo.isExcluded) + } + if (grvProxyWorker->second.priorityInfo.isExcluded) { + TraceEvent("BetterMasterExists", id) + .detail("Reason", "GrvProxyExcluded") + .detail("ProcessID", it.processId); return true; + } grvProxyClasses.push_back(grvProxyWorker->second.details); } @@ -1443,10 +1613,18 @@ public: std::vector resolverClasses; for (auto& it : dbi.resolvers) { auto resolverWorker = id_worker.find(it.locality.processId()); - if (resolverWorker == id_worker.end()) + if (resolverWorker == id_worker.end()) { + TraceEvent("NewRecruitmentIsWorse", id) + .detail("Reason", "CannotFindResolver") + .detail("ProcessID", it.locality.processId()); return false; - if (resolverWorker->second.priorityInfo.isExcluded) + } + if (resolverWorker->second.priorityInfo.isExcluded) { + TraceEvent("BetterMasterExists", id) + .detail("Reason", "ResolverExcluded") + .detail("ProcessID", it.locality.processId()); return true; + } resolverClasses.push_back(resolverWorker->second.details); } @@ -1459,19 +1637,40 @@ public: } std::map>, int> id_used; + std::map>, int> old_id_used; id_used[clusterControllerProcessId]++; - WorkerFitnessInfo mworker = getWorkerForRoleInDatacenter( - clusterControllerDcId, ProcessClass::Master, ProcessClass::NeverAssign, db.config, id_used, true); + old_id_used[clusterControllerProcessId]++; + WorkerFitnessInfo mworker = getWorkerForRoleInDatacenter(clusterControllerDcId, + ProcessClass::Master, + ProcessClass::NeverAssign, + db.config, + id_used, + Optional>(), + true); auto newMasterFit = mworker.worker.processClass.machineClassFitness(ProcessClass::Master); if (db.config.isExcludedServer(mworker.worker.interf.addresses())) { newMasterFit = std::max(newMasterFit, ProcessClass::ExcludeFit); } - if (oldMasterFit < newMasterFit) + old_id_used[masterWorker->first]++; + if (oldMasterFit < newMasterFit) { + TraceEvent("NewRecruitmentIsWorse", id) + .detail("OldMasterFit", oldMasterFit) + .detail("NewMasterFit", newMasterFit) + .detail("OldIsCC", dbi.master.locality.processId() == clusterControllerProcessId) + .detail("NewIsCC", mworker.worker.interf.locality.processId() == clusterControllerProcessId); + ; return false; + } if (oldMasterFit > newMasterFit || (dbi.master.locality.processId() == clusterControllerProcessId && - mworker.worker.interf.locality.processId() != clusterControllerProcessId)) + mworker.worker.interf.locality.processId() != clusterControllerProcessId)) { + TraceEvent("BetterMasterExists", id) + .detail("OldMasterFit", oldMasterFit) + .detail("NewMasterFit", newMasterFit) + .detail("OldIsCC", dbi.master.locality.processId() == clusterControllerProcessId) + .detail("NewIsCC", mworker.worker.interf.locality.processId() == clusterControllerProcessId); return true; + } std::set> primaryDC; std::set> remoteDC; @@ -1493,7 +1692,8 @@ public: } // Check tLog fitness - RoleFitness oldTLogFit(tlogs, ProcessClass::TLog); + updateIdUsed(tlogs, old_id_used); + RoleFitness oldTLogFit(tlogs, ProcessClass::TLog, old_id_used, clusterControllerDcId); auto newTLogs = getWorkersForTlogs(db.config, db.config.tLogReplicationFactor, db.config.getDesiredLogs(), @@ -1501,10 +1701,7 @@ public: id_used, true, primaryDC); - RoleFitness newTLogFit(newTLogs, ProcessClass::TLog); - - if (oldTLogFit < newTLogFit) - return false; + RoleFitness newTLogFit(newTLogs, ProcessClass::TLog, id_used, clusterControllerDcId); bool oldSatelliteFallback = false; @@ -1520,13 +1717,16 @@ public: } } - RoleFitness oldSatelliteTLogFit(satellite_tlogs, ProcessClass::TLog); + updateIdUsed(satellite_tlogs, old_id_used); + RoleFitness oldSatelliteTLogFit(satellite_tlogs, ProcessClass::TLog, old_id_used, clusterControllerDcId); bool newSatelliteFallback = false; - auto newSatelliteTLogs = - (region.satelliteTLogReplicationFactor > 0 && db.config.usableRegions > 1) - ? getWorkersForSatelliteLogs(db.config, region, remoteRegion, id_used, newSatelliteFallback, true) - : satellite_tlogs; - RoleFitness newSatelliteTLogFit(newSatelliteTLogs, ProcessClass::TLog); + auto newSatelliteTLogs = satellite_tlogs; + RoleFitness newSatelliteTLogFit = oldSatelliteTLogFit; + if (region.satelliteTLogReplicationFactor > 0 && db.config.usableRegions > 1) { + newSatelliteTLogs = + getWorkersForSatelliteLogs(db.config, region, remoteRegion, id_used, newSatelliteFallback, true); + newSatelliteTLogFit = RoleFitness(newSatelliteTLogs, ProcessClass::TLog, id_used, clusterControllerDcId); + } std::map, int32_t> satellite_priority; for (auto& r : region.satellites) { @@ -1551,55 +1751,72 @@ public: } } - if (oldSatelliteFallback && !newSatelliteFallback) + if (oldSatelliteFallback && !newSatelliteFallback) { + TraceEvent("BetterMasterExists", id) + .detail("OldSatelliteFallback", oldSatelliteFallback) + .detail("NewSatelliteFallback", newSatelliteFallback); return true; - if (!oldSatelliteFallback && newSatelliteFallback) + } + if (!oldSatelliteFallback && newSatelliteFallback) { + TraceEvent("NewRecruitmentIsWorse", id) + .detail("OldSatelliteFallback", oldSatelliteFallback) + .detail("NewSatelliteFallback", newSatelliteFallback); return false; + } - if (oldSatelliteRegionFit < newSatelliteRegionFit) + if (oldSatelliteRegionFit < newSatelliteRegionFit) { + TraceEvent("BetterMasterExists", id) + .detail("OldSatelliteRegionFit", oldSatelliteRegionFit) + .detail("NewSatelliteRegionFit", newSatelliteRegionFit); return true; - if (oldSatelliteRegionFit > newSatelliteRegionFit) + } + if (oldSatelliteRegionFit > newSatelliteRegionFit) { + TraceEvent("NewRecruitmentIsWorse", id) + .detail("OldSatelliteRegionFit", oldSatelliteRegionFit) + .detail("NewSatelliteRegionFit", newSatelliteRegionFit); return false; + } - if (oldSatelliteTLogFit < newSatelliteTLogFit) - return false; - - RoleFitness oldRemoteTLogFit(remote_tlogs, ProcessClass::TLog); + updateIdUsed(remote_tlogs, old_id_used); + RoleFitness oldRemoteTLogFit(remote_tlogs, ProcessClass::TLog, old_id_used, clusterControllerDcId); std::vector exclusionWorkerIds; auto fn = [](const WorkerDetails& in) { return in.interf.id(); }; std::transform(newTLogs.begin(), newTLogs.end(), std::back_inserter(exclusionWorkerIds), fn); std::transform(newSatelliteTLogs.begin(), newSatelliteTLogs.end(), std::back_inserter(exclusionWorkerIds), fn); - RoleFitness newRemoteTLogFit( - (db.config.usableRegions > 1 && (dbi.recoveryState == RecoveryState::ALL_LOGS_RECRUITED || - dbi.recoveryState == RecoveryState::FULLY_RECOVERED)) - ? getWorkersForTlogs(db.config, - db.config.getRemoteTLogReplicationFactor(), - db.config.getDesiredRemoteLogs(), - db.config.getRemoteTLogPolicy(), - id_used, - true, - remoteDC, - exclusionWorkerIds) - : remote_tlogs, - ProcessClass::TLog); - if (oldRemoteTLogFit < newRemoteTLogFit) - return false; + RoleFitness newRemoteTLogFit = oldRemoteTLogFit; + if (db.config.usableRegions > 1 && (dbi.recoveryState == RecoveryState::ALL_LOGS_RECRUITED || + dbi.recoveryState == RecoveryState::FULLY_RECOVERED)) { + newRemoteTLogFit = RoleFitness(getWorkersForTlogs(db.config, + db.config.getRemoteTLogReplicationFactor(), + db.config.getDesiredRemoteLogs(), + db.config.getRemoteTLogPolicy(), + id_used, + true, + remoteDC, + exclusionWorkerIds), + ProcessClass::TLog, + id_used, + clusterControllerDcId); + } int oldRouterCount = oldTLogFit.count * std::max(1, db.config.desiredLogRouterCount / std::max(1, oldTLogFit.count)); int newRouterCount = newTLogFit.count * std::max(1, db.config.desiredLogRouterCount / std::max(1, newTLogFit.count)); - RoleFitness oldLogRoutersFit(log_routers, ProcessClass::LogRouter); - RoleFitness newLogRoutersFit( - (db.config.usableRegions > 1 && dbi.recoveryState == RecoveryState::FULLY_RECOVERED) - ? getWorkersForRoleInDatacenter(*remoteDC.begin(), - ProcessClass::LogRouter, - newRouterCount, - db.config, - id_used, - Optional(), - true) - : log_routers, - ProcessClass::LogRouter); + updateIdUsed(log_routers, old_id_used); + RoleFitness oldLogRoutersFit(log_routers, ProcessClass::LogRouter, old_id_used, clusterControllerDcId); + RoleFitness newLogRoutersFit = oldLogRoutersFit; + if (db.config.usableRegions > 1 && dbi.recoveryState == RecoveryState::FULLY_RECOVERED) { + newLogRoutersFit = RoleFitness(getWorkersForRoleInDatacenter(*remoteDC.begin(), + ProcessClass::LogRouter, + newRouterCount, + db.config, + id_used, + Optional(), + true), + ProcessClass::LogRouter, + id_used, + clusterControllerDcId); + } if (oldLogRoutersFit.count < oldRouterCount) { oldLogRoutersFit.worstFit = ProcessClass::NeverAssign; @@ -1607,20 +1824,41 @@ public: if (newLogRoutersFit.count < newRouterCount) { newLogRoutersFit.worstFit = ProcessClass::NeverAssign; } - if (oldLogRoutersFit < newLogRoutersFit) - return false; // Check proxy/grvProxy/resolver fitness - RoleFitnessPair oldInFit(RoleFitness(commitProxyClasses, ProcessClass::CommitProxy), - RoleFitness(grvProxyClasses, ProcessClass::GrvProxy), - RoleFitness(resolverClasses, ProcessClass::Resolver)); + updateIdUsed(commitProxyClasses, old_id_used); + updateIdUsed(grvProxyClasses, old_id_used); + updateIdUsed(resolverClasses, old_id_used); + RoleFitness oldCommitProxyFit( + commitProxyClasses, ProcessClass::CommitProxy, old_id_used, clusterControllerDcId); + RoleFitness oldGrvProxyFit(grvProxyClasses, ProcessClass::GrvProxy, old_id_used, clusterControllerDcId); + RoleFitness oldResolverFit(resolverClasses, ProcessClass::Resolver, old_id_used, clusterControllerDcId); - auto first_commit_proxy = getWorkerForRoleInDatacenter( - clusterControllerDcId, ProcessClass::CommitProxy, ProcessClass::ExcludeFit, db.config, id_used, true); - auto first_grv_proxy = getWorkerForRoleInDatacenter( - clusterControllerDcId, ProcessClass::GrvProxy, ProcessClass::ExcludeFit, db.config, id_used, true); - auto first_resolver = getWorkerForRoleInDatacenter( - clusterControllerDcId, ProcessClass::Resolver, ProcessClass::ExcludeFit, db.config, id_used, true); + auto first_commit_proxy = getWorkerForRoleInDatacenter(clusterControllerDcId, + ProcessClass::CommitProxy, + ProcessClass::ExcludeFit, + db.config, + id_used, + Optional>(), + true); + auto first_grv_proxy = getWorkerForRoleInDatacenter(clusterControllerDcId, + ProcessClass::GrvProxy, + ProcessClass::ExcludeFit, + db.config, + id_used, + first_commit_proxy.worker.interf.locality.processId(), + true); + auto first_resolver = getWorkerForRoleInDatacenter(clusterControllerDcId, + ProcessClass::Resolver, + ProcessClass::ExcludeFit, + db.config, + id_used, + first_commit_proxy.worker.interf.locality.processId(), + true); + auto maxUsed = std::max({ first_commit_proxy.used, first_grv_proxy.used, first_resolver.used }); + first_commit_proxy.used = maxUsed; + first_grv_proxy.used = maxUsed; + first_resolver.used = maxUsed; auto commit_proxies = getWorkersForRoleInDatacenter(clusterControllerDcId, ProcessClass::CommitProxy, db.config.getDesiredCommitProxies(), @@ -1643,25 +1881,13 @@ public: first_resolver, true); - RoleFitnessPair newInFit(RoleFitness(commit_proxies, ProcessClass::CommitProxy), - RoleFitness(grv_proxies, ProcessClass::GrvProxy), - RoleFitness(resolvers, ProcessClass::Resolver)); - if (oldInFit.proxy.betterFitness(newInFit.proxy) || oldInFit.grvProxy.betterFitness(newInFit.grvProxy) || - oldInFit.resolver.betterFitness(newInFit.resolver)) { - return false; - } - - // Because a configuration with fewer proxies or resolvers does not cause this function to fail, - // we need an extra check to determine if the total number of processes has been reduced. - // This is mainly helpful in avoiding situations where killing a degraded process - // would result in a configuration with less total processes than desired. - if (oldTLogFit.count + oldInFit.proxy.count + oldInFit.grvProxy.count + oldInFit.resolver.count > - newTLogFit.count + newInFit.proxy.count + newInFit.grvProxy.count + newInFit.resolver.count) { - return false; - } + RoleFitness newCommitProxyFit(commit_proxies, ProcessClass::CommitProxy, id_used, clusterControllerDcId); + RoleFitness newGrvProxyFit(grv_proxies, ProcessClass::GrvProxy, id_used, clusterControllerDcId); + RoleFitness newResolverFit(resolvers, ProcessClass::Resolver, id_used, clusterControllerDcId); // Check backup worker fitness - RoleFitness oldBackupWorkersFit(backup_workers, ProcessClass::Backup); + updateIdUsed(backup_workers, old_id_used); + RoleFitness oldBackupWorkersFit(backup_workers, ProcessClass::Backup, old_id_used, clusterControllerDcId); const int nBackup = backup_addresses.size(); RoleFitness newBackupWorkersFit(getWorkersForRoleInDatacenter(clusterControllerDcId, ProcessClass::Backup, @@ -1670,35 +1896,75 @@ public: id_used, Optional(), true), - ProcessClass::Backup); + ProcessClass::Backup, + id_used, + clusterControllerDcId); - if (oldTLogFit > newTLogFit || oldInFit > newInFit || oldSatelliteTLogFit > newSatelliteTLogFit || - oldRemoteTLogFit > newRemoteTLogFit || oldLogRoutersFit > newLogRoutersFit || - oldBackupWorkersFit > newBackupWorkersFit) { + auto oldFit = std::make_tuple(oldTLogFit, + oldSatelliteTLogFit, + oldCommitProxyFit, + oldGrvProxyFit, + oldResolverFit, + oldBackupWorkersFit, + oldRemoteTLogFit, + oldLogRoutersFit); + auto newFit = std::make_tuple(newTLogFit, + newSatelliteTLogFit, + newCommitProxyFit, + newGrvProxyFit, + newResolverFit, + newBackupWorkersFit, + newRemoteTLogFit, + newLogRoutersFit); + + if (oldFit > newFit) { TraceEvent("BetterMasterExists", id) .detail("OldMasterFit", oldMasterFit) .detail("NewMasterFit", newMasterFit) .detail("OldTLogFit", oldTLogFit.toString()) .detail("NewTLogFit", newTLogFit.toString()) - .detail("OldProxyFit", oldInFit.proxy.toString()) - .detail("NewProxyFit", newInFit.proxy.toString()) - .detail("OldGrvProxyFit", oldInFit.grvProxy.toString()) - .detail("NewGrvProxyFit", newInFit.grvProxy.toString()) - .detail("OldResolverFit", oldInFit.resolver.toString()) - .detail("NewResolverFit", newInFit.resolver.toString()) .detail("OldSatelliteFit", oldSatelliteTLogFit.toString()) .detail("NewSatelliteFit", newSatelliteTLogFit.toString()) + .detail("OldCommitProxyFit", oldCommitProxyFit.toString()) + .detail("NewCommitProxyFit", newCommitProxyFit.toString()) + .detail("OldGrvProxyFit", oldGrvProxyFit.toString()) + .detail("NewGrvProxyFit", newGrvProxyFit.toString()) + .detail("OldResolverFit", oldResolverFit.toString()) + .detail("NewResolverFit", newResolverFit.toString()) + .detail("OldBackupWorkerFit", oldBackupWorkersFit.toString()) + .detail("NewBackupWorkerFit", newBackupWorkersFit.toString()) .detail("OldRemoteFit", oldRemoteTLogFit.toString()) .detail("NewRemoteFit", newRemoteTLogFit.toString()) .detail("OldRouterFit", oldLogRoutersFit.toString()) .detail("NewRouterFit", newLogRoutersFit.toString()) - .detail("OldBackupWorkerFit", oldBackupWorkersFit.toString()) - .detail("NewBackupWorkerFit", newBackupWorkersFit.toString()) .detail("OldSatelliteFallback", oldSatelliteFallback) .detail("NewSatelliteFallback", newSatelliteFallback); return true; } + if (oldFit < newFit) { + TraceEvent("NewRecruitmentIsWorse", id) + .detail("OldMasterFit", oldMasterFit) + .detail("NewMasterFit", newMasterFit) + .detail("OldTLogFit", oldTLogFit.toString()) + .detail("NewTLogFit", newTLogFit.toString()) + .detail("OldSatelliteFit", oldSatelliteTLogFit.toString()) + .detail("NewSatelliteFit", newSatelliteTLogFit.toString()) + .detail("OldCommitProxyFit", oldCommitProxyFit.toString()) + .detail("NewCommitProxyFit", newCommitProxyFit.toString()) + .detail("OldGrvProxyFit", oldGrvProxyFit.toString()) + .detail("NewGrvProxyFit", newGrvProxyFit.toString()) + .detail("OldResolverFit", oldResolverFit.toString()) + .detail("NewResolverFit", newResolverFit.toString()) + .detail("OldBackupWorkerFit", oldBackupWorkersFit.toString()) + .detail("NewBackupWorkerFit", newBackupWorkersFit.toString()) + .detail("OldRemoteFit", oldRemoteTLogFit.toString()) + .detail("NewRemoteFit", newRemoteTLogFit.toString()) + .detail("OldRouterFit", oldLogRoutersFit.toString()) + .detail("NewRouterFit", newLogRoutersFit.toString()) + .detail("OldSatelliteFallback", oldSatelliteFallback) + .detail("NewSatelliteFallback", newSatelliteFallback); + } return false; } @@ -2068,6 +2334,7 @@ void checkBetterDDOrRK(ClusterControllerData* self) { ProcessClass::NeverAssign, self->db.config, id_used, + Optional>(), true) .worker; if (self->onMasterIsBetter(newRKWorker, ProcessClass::Ratekeeper)) { @@ -2083,6 +2350,7 @@ void checkBetterDDOrRK(ClusterControllerData* self) { ProcessClass::NeverAssign, self->db.config, id_used, + Optional>(), true) .worker; if (self->onMasterIsBetter(newDDWorker, ProcessClass::DataDistributor)) { @@ -2454,8 +2722,6 @@ void clusterRegisterMaster(ClusterControllerData* self, RegisterMasterRequest co clientInfo.id = deterministicRandom()->randomUniqueID(); clientInfo.commitProxies = req.commitProxies; clientInfo.grvProxies = req.grvProxies; - clientInfo.clientTxnInfoSampleRate = db->clientInfo->get().clientTxnInfoSampleRate; - clientInfo.clientTxnInfoSizeLimit = db->clientInfo->get().clientTxnInfoSizeLimit; db->clientInfo->set(clientInfo); dbInfo.client = db->clientInfo->get(); } @@ -2930,36 +3196,84 @@ ACTOR Future monitorServerInfoConfig(ClusterControllerData::DBInfo* db) { } } -ACTOR Future monitorClientTxnInfoConfigs(ClusterControllerData::DBInfo* db) { +// Monitors the global configuration version key for changes. When changes are +// made, the global configuration history is read and any updates are sent to +// all processes in the system by updating the ClientDBInfo object. The +// GlobalConfig actor class contains the functionality to read the latest +// history and update the processes local view. +ACTOR Future monitorGlobalConfig(ClusterControllerData::DBInfo* db) { loop { state ReadYourWritesTransaction tr(db->db); loop { try { tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); - state Optional rateVal = wait(tr.get(fdbClientInfoTxnSampleRate)); - state Optional limitVal = wait(tr.get(fdbClientInfoTxnSizeLimit)); - ClientDBInfo clientInfo = db->clientInfo->get(); - double sampleRate = rateVal.present() - ? BinaryReader::fromStringRef(rateVal.get(), Unversioned()) - : std::numeric_limits::infinity(); - int64_t sizeLimit = - limitVal.present() ? BinaryReader::fromStringRef(limitVal.get(), Unversioned()) : -1; - if (sampleRate != clientInfo.clientTxnInfoSampleRate || - sizeLimit != clientInfo.clientTxnInfoSampleRate) { + state Optional globalConfigVersion = wait(tr.get(globalConfigVersionKey)); + state ClientDBInfo clientInfo = db->clientInfo->get(); + + if (globalConfigVersion.present()) { + // Since the history keys end with versionstamps, they + // should be sorted correctly (versionstamps are stored in + // big-endian order). + Standalone globalConfigHistory = + wait(tr.getRange(globalConfigHistoryKeys, CLIENT_KNOBS->TOO_MANY)); + // If the global configuration version key has been set, + // the history should contain at least one item. + ASSERT(globalConfigHistory.size() > 0); + clientInfo.history.clear(); + + for (const auto& kv : globalConfigHistory) { + ObjectReader reader(kv.value.begin(), IncludeVersion()); + if (reader.protocolVersion() != g_network->protocolVersion()) { + // If the protocol version has changed, the + // GlobalConfig actor should refresh its view by + // reading the entire global configuration key + // range. Setting the version to the max int64_t + // will always cause the global configuration + // updater to refresh its view of the configuration + // keyspace. + clientInfo.history.clear(); + clientInfo.history.emplace_back(std::numeric_limits::max()); + break; + } + + VersionHistory vh; + reader.deserialize(vh); + + // Read commit version out of versionstamp at end of key. + BinaryReader versionReader = + BinaryReader(kv.key.removePrefix(globalConfigHistoryPrefix), Unversioned()); + Version historyCommitVersion; + versionReader >> historyCommitVersion; + historyCommitVersion = bigEndian64(historyCommitVersion); + vh.version = historyCommitVersion; + + clientInfo.history.push_back(std::move(vh)); + } + + if (clientInfo.history.size() > 0) { + // The first item in the historical list of mutations + // is only used to: + // a) Recognize that some historical changes may have + // been missed, and the entire global + // configuration keyspace needs to be read, or.. + // b) Check which historical updates have already + // been applied. If this is the case, the first + // history item must have a version greater than + // or equal to whatever version the global + // configuration was last updated at, and + // therefore won't need to be applied again. + clientInfo.history[0].mutations = Standalone>(); + } + clientInfo.id = deterministicRandom()->randomUniqueID(); - clientInfo.clientTxnInfoSampleRate = sampleRate; - clientInfo.clientTxnInfoSizeLimit = sizeLimit; db->clientInfo->set(clientInfo); } - state Future watchRateFuture = tr.watch(fdbClientInfoTxnSampleRate); - state Future watchLimitFuture = tr.watch(fdbClientInfoTxnSizeLimit); + state Future globalConfigFuture = tr.watch(globalConfigVersionKey); wait(tr.commit()); - choose { - when(wait(watchRateFuture)) { break; } - when(wait(watchLimitFuture)) { break; } - } + wait(globalConfigFuture); + break; } catch (Error& e) { wait(tr.onError(e)); } @@ -3425,7 +3739,7 @@ ACTOR Future clusterControllerCore(ClusterControllerFullInterface interf, self.addActor.send(timeKeeper(&self)); self.addActor.send(monitorProcessClasses(&self)); self.addActor.send(monitorServerInfoConfig(&self.db)); - self.addActor.send(monitorClientTxnInfoConfigs(&self.db)); + self.addActor.send(monitorGlobalConfig(&self.db)); self.addActor.send(updatedChangingDatacenters(&self)); self.addActor.send(updatedChangedDatacenters(&self)); self.addActor.send(updateDatacenterVersionDifference(&self)); diff --git a/fdbserver/SimulatedCluster.actor.cpp b/fdbserver/SimulatedCluster.actor.cpp index e5ce23da2f..f8d9610f32 100644 --- a/fdbserver/SimulatedCluster.actor.cpp +++ b/fdbserver/SimulatedCluster.actor.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include "fdbrpc/Locality.h" #include "fdbrpc/simulator.h" #include "fdbclient/DatabaseContext.h" @@ -874,7 +875,9 @@ void SimulationConfig::set_config(std::string config) { StringRef StringRefOf(const char* s) { return StringRef((uint8_t*)s, strlen(s)); } - +// Generates and sets an appropriate configuration for the database according to +// the provided testConfig. Some attributes are randomly generated for more coverage +// of different combinations void SimulationConfig::generateNormalConfig(const TestConfig& testConfig) { set_config("new"); const bool simple = false; // Set true to simplify simulation configs for easier debugging @@ -897,7 +900,9 @@ void SimulationConfig::generateNormalConfig(const TestConfig& testConfig) { db.resolverCount = deterministicRandom()->randomInt(1, 7); int storage_engine_type = deterministicRandom()->randomInt(0, 4); // Continuously re-pick the storage engine type if it's the one we want to exclude - while (storage_engine_type == testConfig.storageEngineExcludeType) { + while (std::find(testConfig.storageEngineExcludeTypes.begin(), + testConfig.storageEngineExcludeTypes.end(), + storage_engine_type) != testConfig.storageEngineExcludeTypes.end()) { storage_engine_type = deterministicRandom()->randomInt(0, 4); } switch (storage_engine_type) { @@ -989,11 +994,11 @@ void SimulationConfig::generateNormalConfig(const TestConfig& testConfig) { if (deterministicRandom()->random01() < 0.5) { int logSpill = deterministicRandom()->randomInt(TLogSpillType::VALUE, TLogSpillType::END); set_config(format("log_spill:=%d", logSpill)); - int logVersion = deterministicRandom()->randomInt(TLogVersion::MIN_RECRUITABLE, TLogVersion::MAX_SUPPORTED + 1); + int logVersion = deterministicRandom()->randomInt(TLogVersion::MIN_RECRUITABLE, testConfig.maxTLogVersion + 1); set_config(format("log_version:=%d", logVersion)); } else { if (deterministicRandom()->random01() < 0.7) - set_config(format("log_version:=%d", TLogVersion::MAX_SUPPORTED)); + set_config(format("log_version:=%d", testConfig.maxTLogVersion)); if (deterministicRandom()->random01() < 0.5) set_config(format("log_spill:=%d", TLogSpillType::DEFAULT)); } @@ -1663,8 +1668,17 @@ void checkTestConf(const char* testFile, TestConfig* testConfig) { sscanf(value.c_str(), "%d", &testConfig->logAntiQuorum); } - if (attrib == "storageEngineExcludeType") { - sscanf(value.c_str(), "%d", &testConfig->storageEngineExcludeType); + if (attrib == "storageEngineExcludeTypes") { + std::stringstream ss(value); + for (int i; ss >> i;) { + testConfig->storageEngineExcludeTypes.push_back(i); + if (ss.peek() == ',') { + ss.ignore(); + } + } + } + if (attrib == "maxTLogVersion") { + sscanf(value.c_str(), "%d", &testConfig->maxTLogVersion); } } diff --git a/fdbserver/TesterInterface.actor.h b/fdbserver/TesterInterface.actor.h index f5e84a2a58..ddfb04da22 100644 --- a/fdbserver/TesterInterface.actor.h +++ b/fdbserver/TesterInterface.actor.h @@ -29,6 +29,7 @@ #include "fdbrpc/fdbrpc.h" #include "fdbrpc/PerfMetric.h" #include "fdbclient/NativeAPI.actor.h" +#include "flow/UnitTest.h" #include "flow/actorcompiler.h" // has to be last include struct CheckReply { constexpr static FileIdentifier file_identifier = 11; @@ -109,12 +110,15 @@ struct TestConfig { bool startIncompatibleProcess = false; int logAntiQuorum = -1; // Storage Engine Types: Verify match with SimulationConfig::generateNormalConfig - // -1 = None // 0 = "ssd" // 1 = "memory" // 2 = "memory-radixtree-beta" // 3 = "ssd-redwood-experimental" - int storageEngineExcludeType = -1; + // Requires a comma-separated list of numbers WITHOUT whitespaces + std::vector storageEngineExcludeTypes; + // Set the maximum TLog version that can be selected for a test + // Refer to FDBTypes.h::TLogVersion. Defaults to the maximum supported version. + int maxTLogVersion = TLogVersion::MAX_SUPPORTED; }; struct TesterInterface { @@ -135,7 +139,7 @@ ACTOR Future testerServerCore(TesterInterface interf, LocalityData locality); enum test_location_t { TEST_HERE, TEST_ON_SERVERS, TEST_ON_TESTERS }; -enum test_type_t { TEST_TYPE_FROM_FILE, TEST_TYPE_CONSISTENCY_CHECK }; +enum test_type_t { TEST_TYPE_FROM_FILE, TEST_TYPE_CONSISTENCY_CHECK, TEST_TYPE_UNIT_TESTS }; ACTOR Future runTests(Reference connFile, test_type_t whatToRun, @@ -143,7 +147,8 @@ ACTOR Future runTests(Reference connFile, int minTestersExpected, std::string fileName = std::string(), StringRef startingConfiguration = StringRef(), - LocalityData locality = LocalityData()); + LocalityData locality = LocalityData(), + UnitTestParameters testOptions = UnitTestParameters()); #include "flow/unactorcompiler.h" #endif diff --git a/fdbserver/VersionedBTree.actor.cpp b/fdbserver/VersionedBTree.actor.cpp index 8051d956b0..fab29a7034 100644 --- a/fdbserver/VersionedBTree.actor.cpp +++ b/fdbserver/VersionedBTree.actor.cpp @@ -6956,7 +6956,7 @@ RedwoodRecordRef randomRedwoodRecordRef(const std::string& keyBuffer, const std: return rec; } -TEST_CASE("!/redwood/correctness/unit/RedwoodRecordRef") { +TEST_CASE("/redwood/correctness/unit/RedwoodRecordRef") { ASSERT(RedwoodRecordRef::Delta::LengthFormatSizes[0] == 3); ASSERT(RedwoodRecordRef::Delta::LengthFormatSizes[1] == 4); ASSERT(RedwoodRecordRef::Delta::LengthFormatSizes[2] == 6); @@ -7029,7 +7029,7 @@ TEST_CASE("!/redwood/correctness/unit/RedwoodRecordRef") { bytes += deltaTest(a, b); } double elapsed = timer() - start; - printf("DeltaTest() on random large records %g M/s %g MB/s\n", count / elapsed / 1e6, bytes / elapsed / 1e6); + printf("DeltaTest() on random large records %f M/s %f MB/s\n", count / elapsed / 1e6, bytes / elapsed / 1e6); keyBuffer.resize(30); valueBuffer.resize(100); @@ -7041,7 +7041,7 @@ TEST_CASE("!/redwood/correctness/unit/RedwoodRecordRef") { RedwoodRecordRef b = randomRedwoodRecordRef(keyBuffer, valueBuffer); bytes += deltaTest(a, b); } - printf("DeltaTest() on random small records %g M/s %g MB/s\n", count / elapsed / 1e6, bytes / elapsed / 1e6); + printf("DeltaTest() on random small records %f M/s %f MB/s\n", count / elapsed / 1e6, bytes / elapsed / 1e6); RedwoodRecordRef rec1; RedwoodRecordRef rec2; @@ -7058,7 +7058,7 @@ TEST_CASE("!/redwood/correctness/unit/RedwoodRecordRef") { for (i = 0; i < count; ++i) { total += rec1.getCommonPrefixLen(rec2, 50); } - printf("%" PRId64 " getCommonPrefixLen(skip=50) %g M/s\n", total, count / (timer() - start) / 1e6); + printf("%" PRId64 " getCommonPrefixLen(skip=50) %f M/s\n", total, count / (timer() - start) / 1e6); start = timer(); total = 0; @@ -7066,7 +7066,7 @@ TEST_CASE("!/redwood/correctness/unit/RedwoodRecordRef") { for (i = 0; i < count; ++i) { total += rec1.getCommonPrefixLen(rec2, 0); } - printf("%" PRId64 " getCommonPrefixLen(skip=0) %g M/s\n", total, count / (timer() - start) / 1e6); + printf("%" PRId64 " getCommonPrefixLen(skip=0) %f M/s\n", total, count / (timer() - start) / 1e6); char buf[1000]; RedwoodRecordRef::Delta& d = *(RedwoodRecordRef::Delta*)buf; @@ -7079,7 +7079,7 @@ TEST_CASE("!/redwood/correctness/unit/RedwoodRecordRef") { for (i = 0; i < count; ++i) { total += rec1.writeDelta(d, rec2, commonPrefix); } - printf("%" PRId64 " writeDelta(commonPrefix=%d) %g M/s\n", total, commonPrefix, count / (timer() - start) / 1e6); + printf("%" PRId64 " writeDelta(commonPrefix=%d) %f M/s\n", total, commonPrefix, count / (timer() - start) / 1e6); start = timer(); total = 0; @@ -7087,12 +7087,12 @@ TEST_CASE("!/redwood/correctness/unit/RedwoodRecordRef") { for (i = 0; i < count; ++i) { total += rec1.writeDelta(d, rec2); } - printf("%" PRId64 " writeDelta() %g M/s\n", total, count / (timer() - start) / 1e6); + printf("%" PRId64 " writeDelta() %f M/s\n", total, count / (timer() - start) / 1e6); return Void(); } -TEST_CASE("!/redwood/correctness/unit/deltaTree/RedwoodRecordRef") { +TEST_CASE("/redwood/correctness/unit/deltaTree/RedwoodRecordRef") { // Sanity check on delta tree node format ASSERT(DeltaTree::Node::headerSize(false) == 4); ASSERT(DeltaTree::Node::headerSize(true) == 8); @@ -7271,7 +7271,7 @@ TEST_CASE("!/redwood/correctness/unit/deltaTree/RedwoodRecordRef") { return Void(); } -TEST_CASE("!/redwood/correctness/unit/deltaTree/IntIntPair") { +TEST_CASE("/redwood/correctness/unit/deltaTree/IntIntPair") { const int N = 200; IntIntPair prev = { 1, 0 }; IntIntPair next = { 10000, 10000 }; @@ -7615,7 +7615,7 @@ struct SimpleCounter { std::string toString() { return format("%" PRId64 "/%.2f/%.2f", x, rate() / 1e6, avgRate() / 1e6); } }; -TEST_CASE("!/redwood/performance/mutationBuffer") { +TEST_CASE(":/redwood/performance/mutationBuffer") { // This test uses pregenerated short random keys int count = 10e6; @@ -7643,34 +7643,47 @@ TEST_CASE("!/redwood/performance/mutationBuffer") { return Void(); } -TEST_CASE("!/redwood/correctness/btree") { +TEST_CASE("/redwood/correctness/btree") { g_redwoodMetricsActor = Void(); // Prevent trace event metrics from starting g_redwoodMetrics.clear(); - state std::string pagerFile = "unittest_pageFile.redwood"; + state std::string fileName = params.get("fileName").orDefault("unittest_pageFile.redwood"); IPager2* pager; - state bool serialTest = deterministicRandom()->coinflip(); - state bool shortTest = deterministicRandom()->coinflip(); + state bool serialTest = params.getInt("serialTest").orDefault(deterministicRandom()->coinflip()); + state bool shortTest = params.getInt("shortTest").orDefault(deterministicRandom()->coinflip()); state int pageSize = shortTest ? 200 : (deterministicRandom()->coinflip() ? 4096 : deterministicRandom()->randomInt(200, 400)); - state int64_t targetPageOps = shortTest ? 50000 : 1000000; - state bool pagerMemoryOnly = shortTest && (deterministicRandom()->random01() < .001); - state int maxKeySize = deterministicRandom()->randomInt(1, pageSize * 2); - state int maxValueSize = randomSize(pageSize * 25); - state int maxCommitSize = shortTest ? 1000 : randomSize(std::min((maxKeySize + maxValueSize) * 20000, 10e6)); - state double clearProbability = deterministicRandom()->random01() * .1; - state double clearSingleKeyProbability = deterministicRandom()->random01(); - state double clearPostSetProbability = deterministicRandom()->random01() * .1; - state double coldStartProbability = pagerMemoryOnly ? 0 : (deterministicRandom()->random01() * 0.3); - state double advanceOldVersionProbability = deterministicRandom()->random01(); + state int64_t targetPageOps = params.getInt("targetPageOps").orDefault(shortTest ? 50000 : 1000000); + state bool pagerMemoryOnly = + params.getInt("pagerMemoryOnly").orDefault(shortTest && (deterministicRandom()->random01() < .001)); + state int maxKeySize = params.getInt("maxKeySize").orDefault(deterministicRandom()->randomInt(1, pageSize * 2)); + state int maxValueSize = params.getInt("maxValueSize").orDefault(randomSize(pageSize * 25)); + state int maxCommitSize = + params.getInt("maxCommitSize") + .orDefault(shortTest ? 1000 : randomSize(std::min((maxKeySize + maxValueSize) * 20000, 10e6))); + state double clearProbability = + params.getDouble("clearProbability").orDefault(deterministicRandom()->random01() * .1); + state double clearSingleKeyProbability = + params.getDouble("clearSingleKeyProbability").orDefault(deterministicRandom()->random01()); + state double clearPostSetProbability = + params.getDouble("clearPostSetProbability").orDefault(deterministicRandom()->random01() * .1); + state double coldStartProbability = params.getDouble("coldStartProbability") + .orDefault(pagerMemoryOnly ? 0 : (deterministicRandom()->random01() * 0.3)); + state double advanceOldVersionProbability = + params.getDouble("advanceOldVersionProbability").orDefault(deterministicRandom()->random01()); state int64_t cacheSizeBytes = - pagerMemoryOnly ? 2e9 : (pageSize * deterministicRandom()->randomInt(1, (BUGGIFY ? 2 : 10000) + 1)); - state Version versionIncrement = deterministicRandom()->randomInt64(1, 1e8); - state Version remapCleanupWindow = BUGGIFY ? 0 : deterministicRandom()->randomInt64(1, versionIncrement * 50); - state int maxVerificationMapEntries = 300e3; + params.getInt("cacheSizeBytes") + .orDefault(pagerMemoryOnly ? 2e9 + : (pageSize * deterministicRandom()->randomInt(1, (BUGGIFY ? 2 : 10000) + 1))); + state Version versionIncrement = + params.getInt("versionIncrement").orDefault(deterministicRandom()->randomInt64(1, 1e8)); + state Version remapCleanupWindow = + params.getInt("remapCleanupWindow") + .orDefault(BUGGIFY ? 0 : deterministicRandom()->randomInt64(1, versionIncrement * 50)); + state int maxVerificationMapEntries = params.getInt("maxVerificationMapEntries").orDefault(300e3); printf("\n"); printf("targetPageOps: %" PRId64 "\n", targetPageOps); @@ -7693,11 +7706,11 @@ TEST_CASE("!/redwood/correctness/btree") { printf("\n"); printf("Deleting existing test data...\n"); - deleteFile(pagerFile); + deleteFile(fileName); printf("Initializing...\n"); - pager = new DWALPager(pageSize, pagerFile, cacheSizeBytes, remapCleanupWindow, pagerMemoryOnly); - state VersionedBTree* btree = new VersionedBTree(pager, pagerFile); + pager = new DWALPager(pageSize, fileName, cacheSizeBytes, remapCleanupWindow, pagerMemoryOnly); + state VersionedBTree* btree = new VersionedBTree(pager, fileName); wait(btree->init()); state std::map, Optional> written; @@ -7900,8 +7913,8 @@ TEST_CASE("!/redwood/correctness/btree") { wait(closedFuture); printf("Reopening btree from disk.\n"); - IPager2* pager = new DWALPager(pageSize, pagerFile, cacheSizeBytes, remapCleanupWindow); - btree = new VersionedBTree(pager, pagerFile); + IPager2* pager = new DWALPager(pageSize, fileName, cacheSizeBytes, remapCleanupWindow); + btree = new VersionedBTree(pager, fileName); wait(btree->init()); Version v = btree->getLatestVersion(); @@ -7937,7 +7950,7 @@ TEST_CASE("!/redwood/correctness/btree") { state Future closedFuture = btree->onClosed(); btree->close(); wait(closedFuture); - btree = new VersionedBTree(new DWALPager(pageSize, pagerFile, cacheSizeBytes, 0), pagerFile); + btree = new VersionedBTree(new DWALPager(pageSize, fileName, cacheSizeBytes, 0), fileName); wait(btree->init()); wait(btree->clearAllAndCheckSanity()); @@ -8003,7 +8016,7 @@ ACTOR Future randomScans(VersionedBTree* btree, return Void(); } -TEST_CASE("!/redwood/correctness/pager/cow") { +TEST_CASE(":/redwood/correctness/pager/cow") { state std::string pagerFile = "unittest_pageFile.redwood"; printf("Deleting old test data\n"); deleteFile(pagerFile); @@ -8030,7 +8043,7 @@ TEST_CASE("!/redwood/correctness/pager/cow") { return Void(); } -TEST_CASE("!/redwood/performance/set") { +TEST_CASE(":/redwood/performance/set") { state SignalableActorCollection actors; g_redwoodMetricsActor = Void(); // Prevent trace event metrics from starting @@ -8045,21 +8058,22 @@ TEST_CASE("!/redwood/performance/set") { deleteFile(pagerFile); } - state int pageSize = SERVER_KNOBS->REDWOOD_DEFAULT_PAGE_SIZE; - state int64_t pageCacheBytes = FLOW_KNOBS->PAGE_CACHE_4K; - state int nodeCount = 1e9; - state int maxRecordsPerCommit = 20000; - state int maxKVBytesPerCommit = 20e6; - state int64_t kvBytesTarget = 4e9; - state int minKeyPrefixBytes = 25; - state int maxKeyPrefixBytes = 25; - state int minValueSize = 100; - state int maxValueSize = 500; - state int minConsecutiveRun = 1; - state int maxConsecutiveRun = 100000; - state char firstKeyChar = 'a'; - state char lastKeyChar = 'm'; - state Version remapCleanupWindow = SERVER_KNOBS->REDWOOD_REMAP_CLEANUP_WINDOW; + state int pageSize = params.getInt("pageSize").orDefault(SERVER_KNOBS->REDWOOD_DEFAULT_PAGE_SIZE); + state int64_t pageCacheBytes = params.getInt("pageCacheBytes").orDefault(FLOW_KNOBS->PAGE_CACHE_4K); + state int nodeCount = params.getInt("nodeCount").orDefault(1e9); + state int maxRecordsPerCommit = params.getInt("maxRecordsPerCommit").orDefault(20000); + state int maxKVBytesPerCommit = params.getInt("maxKVBytesPerCommit").orDefault(20e6); + state int64_t kvBytesTarget = params.getInt("kvBytesTarget").orDefault(4e9); + state int minKeyPrefixBytes = params.getInt("minKeyPrefixBytes").orDefault(25); + state int maxKeyPrefixBytes = params.getInt("maxKeyPrefixBytes").orDefault(25); + state int minValueSize = params.getInt("minValueSize").orDefault(100); + state int maxValueSize = params.getInt("maxValueSize").orDefault(500); + state int minConsecutiveRun = params.getInt("minConsecutiveRun").orDefault(1); + state int maxConsecutiveRun = params.getInt("maxConsecutiveRun").orDefault(100); + state char firstKeyChar = params.get("firstKeyChar").orDefault("a")[0]; + state char lastKeyChar = params.get("lastKeyChar").orDefault("m")[0]; + state Version remapCleanupWindow = + params.getInt("remapCleanupWindow").orDefault(SERVER_KNOBS->REDWOOD_REMAP_CLEANUP_WINDOW); printf("pageSize: %d\n", pageSize); printf("pageCacheBytes: %" PRId64 "\n", pageCacheBytes); @@ -8541,11 +8555,11 @@ ACTOR Future doPrefixInsertComparison(int suffixSize, return Void(); } -TEST_CASE("!/redwood/performance/prefixSizeComparison") { - state int suffixSize = 12; - state int valueSize = 100; - state int recordCountTarget = 100e6; - state int usePrefixesInOrder = false; +TEST_CASE(":/redwood/performance/prefixSizeComparison") { + state int suffixSize = params.getInt("suffixSize").orDefault(12); + state int valueSize = params.getInt("valueSize").orDefault(100); + state int recordCountTarget = params.getInt("recordCountTarget").orDefault(100e6); + state bool usePrefixesInOrder = params.getInt("usePrefixesInOrder").orDefault(0); wait(doPrefixInsertComparison( suffixSize, valueSize, recordCountTarget, usePrefixesInOrder, KVSource({ { 10, 100000 } }))); @@ -8562,10 +8576,10 @@ TEST_CASE("!/redwood/performance/prefixSizeComparison") { return Void(); } -TEST_CASE("!/redwood/performance/sequentialInsert") { - state int prefixLen = 30; - state int valueSize = 100; - state int recordCountTarget = 100e6; +TEST_CASE(":/redwood/performance/sequentialInsert") { + state int prefixLen = params.getInt("prefixLen").orDefault(30); + state int valueSize = params.getInt("valueSize").orDefault(100); + state int recordCountTarget = params.getInt("recordCountTarget").orDefault(100e6); deleteFile("test.redwood"); wait(delay(5)); diff --git a/fdbserver/fdbserver.actor.cpp b/fdbserver/fdbserver.actor.cpp index ce2a903c1f..7785a7a761 100644 --- a/fdbserver/fdbserver.actor.cpp +++ b/fdbserver/fdbserver.actor.cpp @@ -67,6 +67,7 @@ #include "flow/TLSConfig.actor.h" #include "flow/Tracing.h" #include "flow/WriteOnlySet.h" +#include "flow/UnitTest.h" #if defined(__linux__) || defined(__FreeBSD__) #include @@ -89,7 +90,7 @@ enum { OPT_CONNFILE, OPT_SEEDCONNFILE, OPT_SEEDCONNSTRING, OPT_ROLE, OPT_LISTEN, OPT_PUBLICADDR, OPT_DATAFOLDER, OPT_LOGFOLDER, OPT_PARENTPID, OPT_TRACER, OPT_NEWCONSOLE, OPT_NOBOX, OPT_TESTFILE, OPT_RESTARTING, OPT_RESTORING, OPT_RANDOMSEED, OPT_KEY, OPT_MEMLIMIT, OPT_STORAGEMEMLIMIT, OPT_CACHEMEMLIMIT, OPT_MACHINEID, OPT_DCID, OPT_MACHINE_CLASS, OPT_BUGGIFY, OPT_VERSION, OPT_BUILD_FLAGS, OPT_CRASHONERROR, OPT_HELP, OPT_NETWORKIMPL, OPT_NOBUFSTDOUT, OPT_BUFSTDOUTERR, - OPT_TRACECLOCK, OPT_NUMTESTERS, OPT_DEVHELP, OPT_ROLLSIZE, OPT_MAXLOGS, OPT_MAXLOGSSIZE, OPT_KNOB, OPT_TESTSERVERS, OPT_TEST_ON_SERVERS, OPT_METRICSCONNFILE, + OPT_TRACECLOCK, OPT_NUMTESTERS, OPT_DEVHELP, OPT_ROLLSIZE, OPT_MAXLOGS, OPT_MAXLOGSSIZE, OPT_KNOB, OPT_UNITTESTPARAM, OPT_TESTSERVERS, OPT_TEST_ON_SERVERS, OPT_METRICSCONNFILE, OPT_METRICSPREFIX, OPT_LOGGROUP, OPT_LOCALITY, OPT_IO_TRUST_SECONDS, OPT_IO_TRUST_WARN_ONLY, OPT_FILESYSTEM, OPT_PROFILER_RSS_SIZE, OPT_KVFILE, OPT_TRACE_FORMAT, OPT_WHITELIST_BINPATH, OPT_BLOB_CREDENTIAL_FILE }; @@ -163,6 +164,7 @@ CSimpleOpt::SOption g_rgOptions[] = { { OPT_HELP, "--help", SO_NONE }, { OPT_DEVHELP, "--dev-help", SO_NONE }, { OPT_KNOB, "--knob_", SO_REQ_SEP }, + { OPT_UNITTESTPARAM, "--test_", SO_REQ_SEP }, { OPT_LOCALITY, "--locality_", SO_REQ_SEP }, { OPT_TESTSERVERS, "--testservers", SO_REQ_SEP }, { OPT_TEST_ON_SERVERS, "--testonservers", SO_NONE }, @@ -623,16 +625,19 @@ static void printUsage(const char* name, bool devhelp) { printOptionUsage("-h, -?, --help", "Display this help and exit."); if (devhelp) { printf(" --build_flags Print build information and exit.\n"); - printOptionUsage("-r ROLE, --role ROLE", - " Server role (valid options are fdbd, test, multitest," - " simulation, networktestclient, networktestserver, restore" - " consistencycheck, kvfileintegritycheck, kvfilegeneratesums). The default is `fdbd'."); + printOptionUsage( + "-r ROLE, --role ROLE", + " Server role (valid options are fdbd, test, multitest," + " simulation, networktestclient, networktestserver, restore" + " consistencycheck, kvfileintegritycheck, kvfilegeneratesums, unittests). The default is `fdbd'."); #ifdef _WIN32 printOptionUsage("-n, --newconsole", " Create a new console."); printOptionUsage("-q, --no_dialog", " Disable error dialog on crash."); printOptionUsage("--parentpid PID", " Specify a process after whose termination to exit."); #endif - printOptionUsage("-f TESTFILE, --testfile", " Testfile to run, defaults to `tests/default.txt'."); + printOptionUsage("-f TESTFILE, --testfile", + " Testfile to run, defaults to `tests/default.txt'. If role is `unittests', specifies which " + "unit tests to run as a search prefix."); printOptionUsage("-R, --restarting", " Restart a previous simulation that was cleanly shut down."); printOptionUsage("-s SEED, --seed SEED", " Random seed."); printOptionUsage("-k KEY, --key KEY", "Target key for search role."); @@ -652,6 +657,8 @@ static void printUsage(const char* name, bool devhelp) { printOptionUsage("--num_testers NUM", " A multitester will wait for NUM testers before starting" " (defaults to 1)."); + printOptionUsage("--test_PARAMNAME PARAMVALUE", + " Set a UnitTest named parameter to the given value. Names are case sensitive."); #ifdef __linux__ printOptionUsage("--rsssize SIZE", " Turns on automatic heap profiling when RSS memory size exceeds" @@ -923,6 +930,7 @@ enum class ServerRole { SkipListTest, Test, VersionedMapTest, + UnitTests }; struct CLIOptions { std::string commandLine; @@ -971,6 +979,7 @@ struct CLIOptions { Reference connectionFile; Standalone machineId; + UnitTestParameters testParams; static CLIOptions parseArgs(int argc, char* argv[]) { CLIOptions opts; @@ -1045,6 +1054,15 @@ private: knobs.push_back(std::make_pair(syn, args.OptionArg())); break; } + case OPT_UNITTESTPARAM: { + std::string syn = args.OptionSyntax(); + if (!StringRef(syn).startsWith(LiteralStringRef("--test_"))) { + fprintf(stderr, "ERROR: unable to parse knob option '%s'\n", syn.c_str()); + flushAndExit(FDB_EXIT_ERROR); + } + testParams.set(syn.substr(7), args.OptionArg()); + break; + } case OPT_LOCALITY: { std::string syn = args.OptionSyntax(); if (!StringRef(syn).startsWith(LiteralStringRef("--locality_"))) { @@ -1103,6 +1121,8 @@ private: role = ServerRole::KVFileGenerateIOLogChecksums; else if (!strcmp(sRole, "consistencycheck")) role = ServerRole::ConsistencyCheck; + else if (!strcmp(sRole, "unittests")) + role = ServerRole::UnitTests; else { fprintf(stderr, "ERROR: Unknown role `%s'\n", sRole); printHelpTeaser(argv[0]); @@ -1462,7 +1482,8 @@ private: return StringRef(addr).startsWith(LiteralStringRef("auto:")); }); if ((role != ServerRole::Simulation && role != ServerRole::CreateTemplateDatabase && - role != ServerRole::KVFileIntegrityCheck && role != ServerRole::KVFileGenerateIOLogChecksums) || + role != ServerRole::KVFileIntegrityCheck && role != ServerRole::KVFileGenerateIOLogChecksums && + role != ServerRole::UnitTests) || autoPublicAddress) { if (seedSpecified && !fileExists(connFile)) { @@ -1999,6 +2020,18 @@ int main(int argc, char* argv[]) { StringRef(), opts.localities)); g_network->run(); + } else if (role == ServerRole::UnitTests) { + setupRunLoopProfiler(); + auto m = startSystemMonitor(opts.dataFolder, opts.dcId, opts.zoneId, opts.zoneId); + f = stopAfter(runTests(opts.connectionFile, + TEST_TYPE_UNIT_TESTS, + TEST_HERE, + 1, + opts.testFile, + StringRef(), + opts.localities, + opts.testParams)); + g_network->run(); } else if (role == ServerRole::CreateTemplateDatabase) { createTemplateDatabase(); } else if (role == ServerRole::NetworkTestClient) { diff --git a/fdbserver/networktest.actor.cpp b/fdbserver/networktest.actor.cpp index 4acb46a2f0..d9ef7e4857 100644 --- a/fdbserver/networktest.actor.cpp +++ b/fdbserver/networktest.actor.cpp @@ -517,13 +517,6 @@ struct P2PNetworkTest { self->listeners.size(), self->remotes.size(), self->connectionsOut); - printf("Request size: %s\n", self->requestBytes.toString().c_str()); - printf("Response size: %s\n", self->replyBytes.toString().c_str()); - printf("Requests per outgoing session: %d\n", self->requests.toString().c_str()); - printf("Delay before socket read: %s\n", self->waitReadMilliseconds.toString().c_str()); - printf("Delay before socket write: %s\n", self->waitWriteMilliseconds.toString().c_str()); - printf("Delay before session close: %s\n", self->idleMilliseconds.toString().c_str()); - printf("Send/Recv size %d bytes\n", FLOW_KNOBS->MAX_PACKET_SEND_BYTES); for (auto n : self->remotes) { printf("Remote: %s\n", n.toString().c_str()); @@ -534,6 +527,19 @@ struct P2PNetworkTest { actors.add(incoming(self, el)); } + printf("Request size: %s\n", self->requestBytes.toString().c_str()); + printf("Response size: %s\n", self->replyBytes.toString().c_str()); + printf("Requests per outgoing session: %s\n", self->requests.toString().c_str()); + printf("Delay before socket read: %s\n", self->waitReadMilliseconds.toString().c_str()); + printf("Delay before socket write: %s\n", self->waitWriteMilliseconds.toString().c_str()); + printf("Delay before session close: %s\n", self->idleMilliseconds.toString().c_str()); + printf("Send/Recv size %d bytes\n", FLOW_KNOBS->MAX_PACKET_SEND_BYTES); + + if ((self->remotes.empty() || self->connectionsOut == 0) && self->listeners.empty()) { + printf("No listeners and no remotes or connectionsOut, so there is nothing to do!\n"); + ASSERT((!self->remotes.empty() && (self->connectionsOut > 0)) || !self->listeners.empty()); + } + if (!self->remotes.empty()) { for (int i = 0; i < self->connectionsOut; ++i) { actors.add(outgoing(self)); @@ -549,27 +555,30 @@ struct P2PNetworkTest { Future run() { return run_impl(this); } }; -int getEnvInt(const char* name, int defaultValue = 0) { - const char* val = getenv(name); - return val != nullptr ? atol(val) : defaultValue; -} - -std::string getEnvStr(const char* name, std::string defaultValue = "") { - const char* val = getenv(name); - return val != nullptr ? val : defaultValue; -} - -// TODO: Remove this hacky thing and make a "networkp2ptest" role in fdbserver -TEST_CASE("!p2ptest") { - state P2PNetworkTest p2p(getEnvStr("listenerAddresses", ""), - getEnvStr("remoteAddresses", ""), - getEnvInt("connectionsOut", 0), - getEnvStr("requestBytes", "0"), - getEnvStr("replyBytes", "0"), - getEnvStr("requests", "0"), - getEnvStr("idleMilliseconds", "0"), - getEnvStr("waitReadMilliseconds", "0"), - getEnvStr("waitWriteMilliseconds", "0")); +// Peer-to-Peer network test. +// One or more instances can be run and set to talk to each other. +// Each instance +// - listens on 0 or more listenerAddresses +// - maintains 0 or more connectionsOut at a time, each to a random choice from remoteAddresses +// Address lists are a string of comma-separated IP:port[:tls] strings. +// +// The other arguments can be specified as "fixedValue" or "minValue:maxValue". +// Each outgoing connection will live for a random requests count. +// Each request will +// - send a random requestBytes sized message +// - wait for a random replyBytes sized response. +// The client will close the connection after a random idleMilliseconds. +// Reads and writes can optionally preceded by random delays, waitReadMilliseconds and waitWriteMilliseconds. +TEST_CASE(":/network/p2ptest") { + state P2PNetworkTest p2p(params.get("listenerAddresses").orDefault(""), + params.get("remoteAddresses").orDefault(""), + params.getInt("connectionsOut").orDefault(1), + params.get("requestBytes").orDefault("50:100"), + params.get("replyBytes").orDefault("500:1000"), + params.get("requests").orDefault("10:10000"), + params.get("idleMilliseconds").orDefault("0"), + params.get("waitReadMilliseconds").orDefault("0"), + params.get("waitWriteMilliseconds").orDefault("0")); wait(p2p.run()); return Void(); diff --git a/fdbserver/tester.actor.cpp b/fdbserver/tester.actor.cpp index 839df40999..fa18a376a4 100644 --- a/fdbserver/tester.actor.cpp +++ b/fdbserver/tester.actor.cpp @@ -763,7 +763,7 @@ ACTOR Future runWorkload(Database cx, std::vector> testSpecGlobalKey } }, { "startIncompatibleProcess", [](const std::string& value) { TraceEvent("TestParserTest").detail("ParsedStartIncompatibleProcess", value); } }, - { "storageEngineExcludeType", - [](const std::string& value) { TraceEvent("TestParserTest").detail("ParsedStorageEngineExcludeType", ""); } } + { "storageEngineExcludeTypes", + [](const std::string& value) { TraceEvent("TestParserTest").detail("ParsedStorageEngineExcludeTypes", ""); } }, + { "maxTLogVersion", + [](const std::string& value) { TraceEvent("TestParserTest").detail("ParsedMaxTLogVersion", ""); } } }; std::map> testSpecTestKeys = { @@ -1572,13 +1574,16 @@ ACTOR Future runTests(Reference connFile, int minTestersExpected, std::string fileName, StringRef startingConfiguration, - LocalityData locality) { + LocalityData locality, + UnitTestParameters testOptions) { state vector testSpecs; auto cc = makeReference>>(); auto ci = makeReference>>(); vector> actors; - actors.push_back(reportErrors(monitorLeader(connFile, cc), "MonitorLeader")); - actors.push_back(reportErrors(extractClusterInterface(cc, ci), "ExtractClusterInterface")); + if (connFile) { + actors.push_back(reportErrors(monitorLeader(connFile, cc), "MonitorLeader")); + actors.push_back(reportErrors(extractClusterInterface(cc, ci), "ExtractClusterInterface")); + } if (whatToRun == TEST_TYPE_CONSISTENCY_CHECK) { TestSpec spec; @@ -1603,6 +1608,22 @@ ACTOR Future runTests(Reference connFile, KeyValueRef(LiteralStringRef("shuffleShards"), LiteralStringRef("true"))); spec.options.push_back_deep(spec.options.arena(), options); testSpecs.push_back(spec); + } else if (whatToRun == TEST_TYPE_UNIT_TESTS) { + TestSpec spec; + Standalone> options; + spec.title = LiteralStringRef("UnitTests"); + spec.startDelay = 0; + spec.useDB = false; + spec.timeout = 0; + options.push_back_deep(options.arena(), + KeyValueRef(LiteralStringRef("testName"), LiteralStringRef("UnitTests"))); + options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("testsMatching"), fileName)); + // Add unit test options as test spec options + for (auto& kv : testOptions.params) { + options.push_back_deep(options.arena(), KeyValueRef(kv.first, kv.second)); + } + spec.options.push_back_deep(spec.options.arena(), options); + testSpecs.push_back(spec); } else { ifstream ifs; ifs.open(fileName.c_str(), ifstream::in); diff --git a/fdbserver/workloads/ClientTransactionProfileCorrectness.actor.cpp b/fdbserver/workloads/ClientTransactionProfileCorrectness.actor.cpp index a5d6ca18be..5c99263f58 100644 --- a/fdbserver/workloads/ClientTransactionProfileCorrectness.actor.cpp +++ b/fdbserver/workloads/ClientTransactionProfileCorrectness.actor.cpp @@ -1,5 +1,6 @@ #include "fdbserver/workloads/workloads.actor.h" #include "fdbserver/ServerDBInfo.h" +#include "fdbclient/GlobalConfig.actor.h" #include "fdbclient/ManagementAPI.actor.h" #include "fdbclient/RunTransaction.actor.h" #include "flow/actorcompiler.h" // has to be last include @@ -269,10 +270,12 @@ struct ClientTransactionProfileCorrectnessWorkload : TestWorkload { ACTOR Future changeProfilingParameters(Database cx, int64_t sizeLimit, double sampleProbability) { wait(runRYWTransaction(cx, [=](Reference tr) -> Future { - tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); + tr->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES); tr->setOption(FDBTransactionOptions::LOCK_AWARE); - tr->set(fdbClientInfoTxnSampleRate, BinaryWriter::toValue(sampleProbability, Unversioned())); - tr->set(fdbClientInfoTxnSizeLimit, BinaryWriter::toValue(sizeLimit, Unversioned())); + Tuple rate = Tuple().appendDouble(sampleProbability); + Tuple size = Tuple().append(sizeLimit); + tr->set(GlobalConfig::prefixedKey(fdbClientInfoTxnSampleRate), rate.pack()); + tr->set(GlobalConfig::prefixedKey(fdbClientInfoTxnSizeLimit), size.pack()); return Void(); })); return Void(); diff --git a/fdbserver/workloads/SpecialKeySpaceCorrectness.actor.cpp b/fdbserver/workloads/SpecialKeySpaceCorrectness.actor.cpp index 34c1f32cf4..a6c98910d2 100644 --- a/fdbserver/workloads/SpecialKeySpaceCorrectness.actor.cpp +++ b/fdbserver/workloads/SpecialKeySpaceCorrectness.actor.cpp @@ -21,6 +21,7 @@ #include "boost/lexical_cast.hpp" #include "boost/algorithm/string.hpp" +#include "fdbclient/GlobalConfig.actor.h" #include "fdbclient/ManagementAPI.actor.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbclient/ReadYourWrites.h" diff --git a/fdbserver/workloads/UnitTests.actor.cpp b/fdbserver/workloads/UnitTests.actor.cpp index 2cb4210d0f..db816fe4c7 100644 --- a/fdbserver/workloads/UnitTests.actor.cpp +++ b/fdbserver/workloads/UnitTests.actor.cpp @@ -34,6 +34,7 @@ struct UnitTestWorkload : TestWorkload { bool enabled; std::string testPattern; int testRunLimit; + UnitTestParameters testParams; PerfIntCounter testsAvailable, testsExecuted, testsFailed; PerfDoubleCounter totalWallTime, totalSimTime; @@ -45,6 +46,14 @@ struct UnitTestWorkload : TestWorkload { enabled = !clientId; // only do this on the "first" client testPattern = getOption(options, LiteralStringRef("testsMatching"), Value()).toString(); testRunLimit = getOption(options, LiteralStringRef("maxTestCases"), -1); + + // Consume all remaining options as testParams which the unit test can access + for (auto& kv : options) { + if (kv.value.size() != 0) { + testParams.set(kv.key.toString(), getOption(options, kv.key, StringRef()).toString()); + } + } + forceLinkIndexedSetTests(); forceLinkDequeTests(); forceLinkFlowTests(); @@ -94,7 +103,7 @@ struct UnitTestWorkload : TestWorkload { state double start_timer = timer(); try { - wait(test->func()); + wait(test->func(self->testParams)); } catch (Error& e) { ++self->testsFailed; result = e; diff --git a/flow/Knobs.cpp b/flow/Knobs.cpp index a173ba43bf..4a3eb4e2d7 100644 --- a/flow/Knobs.cpp +++ b/flow/Knobs.cpp @@ -135,6 +135,7 @@ void FlowKnobs::initialize(bool randomize, bool isSimulated) { init( DISABLE_POSIX_KERNEL_AIO, 0 ); //AsyncFileNonDurable + init( NON_DURABLE_MAX_WRITE_DELAY, 5.0 ); init( MAX_PRIOR_MODIFICATION_DELAY, 1.0 ); if( randomize && BUGGIFY ) MAX_PRIOR_MODIFICATION_DELAY = 10.0; //GenericActors diff --git a/flow/Knobs.h b/flow/Knobs.h index ab088382f8..67ec3b82b7 100644 --- a/flow/Knobs.h +++ b/flow/Knobs.h @@ -149,6 +149,7 @@ public: int DISABLE_POSIX_KERNEL_AIO; // AsyncFileNonDurable + double NON_DURABLE_MAX_WRITE_DELAY; double MAX_PRIOR_MODIFICATION_DELAY; // GenericActors diff --git a/flow/UnitTest.cpp b/flow/UnitTest.cpp index 1d383ac00e..f797fc32c1 100644 --- a/flow/UnitTest.cpp +++ b/flow/UnitTest.cpp @@ -26,3 +26,40 @@ UnitTest::UnitTest(const char* name, const char* file, int line, TestFunction fu : name(name), file(file), line(line), func(func), next(g_unittests.tests) { g_unittests.tests = this; } + +void UnitTestParameters::set(const std::string& name, const std::string& value) { + printf("setting %s = %s\n", name.c_str(), value.c_str()); + params[name] = value; +} + +Optional UnitTestParameters::get(const std::string& name) const { + auto it = params.find(name); + if (it != params.end()) { + return it->second; + } + return {}; +} + +void UnitTestParameters::set(const std::string& name, int64_t value) { + set(name, format("%" PRId64, value)); +}; + +void UnitTestParameters::set(const std::string& name, double value) { + set(name, format("%g", value)); +}; + +Optional UnitTestParameters::getInt(const std::string& name) const { + auto opt = get(name); + if (opt.present()) { + return atoll(opt.get().c_str()); + } + return {}; +} + +Optional UnitTestParameters::getDouble(const std::string& name) const { + auto opt = get(name); + if (opt.present()) { + return atof(opt.get().c_str()); + } + return {}; +} diff --git a/flow/UnitTest.h b/flow/UnitTest.h index c76344e4bb..3a0d4c1db6 100644 --- a/flow/UnitTest.h +++ b/flow/UnitTest.h @@ -45,8 +45,34 @@ #include "flow/flow.h" +#include + +struct UnitTestParameters { + // Map of named case-sensitive parameters + std::map params; + + // Set a named parameter to a string value, replacing any existing value + void set(const std::string& name, const std::string& value); + + // Set a named parameter to an integer converted to a string value, replacing any existing value + void set(const std::string& name, int64_t value); + + // Set a named parameter to a double converted to a string value, replacing any existing value + void set(const std::string& name, double value); + + // Get a parameter's value, will return !present() if parameter was not set + Optional get(const std::string& name) const; + + // Get a parameter's value as an integer, will return !present() if parameter was not set + Optional getInt(const std::string& name) const; + + // Get a parameter's value parsed as a double, will return !present() if parameter was not set + Optional getDouble(const std::string& name) const; +}; + +// Unit test definition structured as a linked list item struct UnitTest { - typedef Future (*TestFunction)(); + typedef Future (*TestFunction)(const UnitTestParameters& params); const char* name; const char* file; @@ -57,6 +83,7 @@ struct UnitTest { UnitTest(const char* name, const char* file, int line, TestFunction func); }; +// Collection of unit tests in the form of a linked list struct UnitTestCollection { UnitTest* tests; }; @@ -71,17 +98,17 @@ extern UnitTestCollection g_unittests; #ifdef FLOW_DISABLE_UNIT_TESTS -#define TEST_CASE(name) static Future FILE_UNIQUE_NAME(disabled_testcase_func)() +#define TEST_CASE(name) static Future FILE_UNIQUE_NAME(disabled_testcase_func)(const UnitTestParameters& params) #define ACTOR_TEST_CASE(actorname, name) #else #define TEST_CASE(name) \ - static Future FILE_UNIQUE_NAME(testcase_func)(); \ + static Future FILE_UNIQUE_NAME(testcase_func)(const UnitTestParameters& params); \ namespace { \ static UnitTest FILE_UNIQUE_NAME(testcase)(name, __FILE__, __LINE__, &FILE_UNIQUE_NAME(testcase_func)); \ } \ - static Future FILE_UNIQUE_NAME(testcase_func)() + static Future FILE_UNIQUE_NAME(testcase_func)(const UnitTestParameters& params) // ACTOR_TEST_CASE generated by actorcompiler; don't use directly #define ACTOR_TEST_CASE(actorname, name) \ diff --git a/flow/actorcompiler/ActorParser.cs b/flow/actorcompiler/ActorParser.cs index d92bba9d53..f44b4e433f 100644 --- a/flow/actorcompiler/ActorParser.cs +++ b/flow/actorcompiler/ActorParser.cs @@ -535,7 +535,13 @@ namespace actorcompiler actor.testCaseParameters = str(paramRange); actor.name = "flowTestCase" + toks.First().SourceLine; - actor.parameters = new VarDeclaration[] { }; + actor.parameters = new VarDeclaration[] { new VarDeclaration { + name = "params", + type = "UnitTestParameters", + initializer = "", + initializerConstructorSyntax = false + } + }; actor.returnType = "Void"; } diff --git a/flow/network.h b/flow/network.h index ec14167121..e5683e4ca7 100644 --- a/flow/network.h +++ b/flow/network.h @@ -482,7 +482,8 @@ public: enBlobCredentialFiles = 10, enNetworkAddressesFunc = 11, enClientFailureMonitor = 12, - enSQLiteInjectedError = 13 + enSQLiteInjectedError = 13, + enGlobalConfig = 14 }; virtual void longTaskCheck(const char* name) {} diff --git a/tests/RedwoodCorrectness.txt b/tests/RedwoodCorrectness.txt index fbda6b04f4..6f190f2131 100644 --- a/tests/RedwoodCorrectness.txt +++ b/tests/RedwoodCorrectness.txt @@ -4,4 +4,4 @@ useDB=false testName=UnitTests maxTestCases=0 - testsMatching=!/redwood/correctness/ + testsMatching=/redwood/correctness/ diff --git a/tests/RedwoodCorrectnessBTree.txt b/tests/RedwoodCorrectnessBTree.txt index a2495adb7a..92bb3de164 100644 --- a/tests/RedwoodCorrectnessBTree.txt +++ b/tests/RedwoodCorrectnessBTree.txt @@ -4,4 +4,4 @@ useDB=false testName=UnitTests maxTestCases=0 - testsMatching=!/redwood/correctness/btree + testsMatching=/redwood/correctness/btree diff --git a/tests/RedwoodCorrectnessPager.txt b/tests/RedwoodCorrectnessPager.txt index 13f9ef1961..4b94c21cfc 100644 --- a/tests/RedwoodCorrectnessPager.txt +++ b/tests/RedwoodCorrectnessPager.txt @@ -4,4 +4,4 @@ useDB=false testName=UnitTests maxTestCases=0 - testsMatching=!/redwood/correctness/pager + testsMatching=:/redwood/correctness/pager diff --git a/tests/RedwoodCorrectnessUnits.txt b/tests/RedwoodCorrectnessUnits.txt index d32242f3df..ac56735455 100644 --- a/tests/RedwoodCorrectnessUnits.txt +++ b/tests/RedwoodCorrectnessUnits.txt @@ -4,4 +4,4 @@ useDB=false testName=UnitTests maxTestCases=0 - testsMatching=!/redwood/correctness/unit/ + testsMatching=/redwood/correctness/unit/ diff --git a/tests/RedwoodPerfPrefixCompression.txt b/tests/RedwoodPerfPrefixCompression.txt index 09bb6a30cc..3383a74c2b 100644 --- a/tests/RedwoodPerfPrefixCompression.txt +++ b/tests/RedwoodPerfPrefixCompression.txt @@ -4,4 +4,4 @@ useDB=false testName=UnitTests maxTestCases=0 - testsMatching=!/redwood/performance/prefixSizeComparison + testsMatching=:/redwood/performance/prefixSizeComparison diff --git a/tests/RedwoodPerfSequentialInsert.txt b/tests/RedwoodPerfSequentialInsert.txt index 2e61df3b53..21c7005951 100644 --- a/tests/RedwoodPerfSequentialInsert.txt +++ b/tests/RedwoodPerfSequentialInsert.txt @@ -4,4 +4,4 @@ useDB=false testName=UnitTests maxTestCases=0 - testsMatching=!/redwood/performance/sequentialInsert + testsMatching=:/redwood/performance/sequentialInsert diff --git a/tests/RedwoodPerfSet.txt b/tests/RedwoodPerfSet.txt index 0694fccdce..f720479ac2 100644 --- a/tests/RedwoodPerfSet.txt +++ b/tests/RedwoodPerfSet.txt @@ -4,4 +4,4 @@ useDB=false testName=UnitTests maxTestCases=0 - testsMatching=!/redwood/performance/set + testsMatching=:/redwood/performance/set diff --git a/tests/RedwoodPerfTests.txt b/tests/RedwoodPerfTests.txt index 91675d4b64..8d56ebc823 100644 --- a/tests/RedwoodPerfTests.txt +++ b/tests/RedwoodPerfTests.txt @@ -4,4 +4,4 @@ useDB=false testName=UnitTests maxTestCases=0 - testsMatching=!/redwood/performance/ + testsMatching=:/redwood/performance/ diff --git a/tests/TestRunner/TestRunner.py b/tests/TestRunner/TestRunner.py index 4e9bb1d5c0..207bec08c0 100755 --- a/tests/TestRunner/TestRunner.py +++ b/tests/TestRunner/TestRunner.py @@ -264,6 +264,40 @@ def process_traces(basedir, testname, path, out, aggregationPolicy, symbolicateB parser.writeObject({'CMakeSEED': str(cmake_seed)}) return res +class RestartTestPolicy: + def __init__(self, name, old_binary, new_binary): + # Default is to use the same binary for the restart test, unless constraints are satisfied. + self._first_binary = new_binary + self._second_binary = new_binary + if old_binary is None: + _logger.info("No old binary provided") + old_binary_version_raw = subprocess.check_output([old_binary, '--version']).decode('utf-8') + match = re.match('FoundationDB.*\(v([0-9]+\.[0-9]+\.[0-9]+)\)', old_binary_version_raw) + assert match, old_binary_version_raw + old_binary_version = tuple(map(int, match.group(1).split('.'))) + match = re.match('.*/restarting/from_([0-9]+\.[0-9]+\.[0-9]+)/', name) + if match: # upgrading _from_ + lower_bound = tuple(map(int, match.group(1).split('.'))) + if old_binary_version >= lower_bound: + self._first_binary = old_binary + _logger.info("Using old binary as first binary: {} >= {}".format(old_binary_version, lower_bound)) + else: + _logger.info("Using new binary as first binary: {} < {}".format(old_binary_version, lower_bound)) + match = re.match('.*/restarting/to_([0-9]+\.[0-9]+\.[0-9]+)/', name) + if match: # downgrading _to_ + lower_bound = tuple(map(int, match.group(1).split('.'))) + if old_binary_version >= lower_bound: + self._second_binary = old_binary + _logger.info("Using old binary as second binary: {} >= {}".format(old_binary_version, lower_bound)) + else: + _logger.info("Using new binary as second binary: {} < {}".format(old_binary_version, lower_bound)) + + def first_binary(self): + return self._first_binary + + def second_binary(self): + return self._second_binary + def run_simulation_test(basedir, options): fdbserver = os.path.join(basedir, 'bin', 'fdbserver') pargs = [fdbserver, @@ -298,14 +332,19 @@ def run_simulation_test(basedir, options): os.mkdir(wd) return_codes = {} # {command: return_code} first = True + restart_test_policy = None + if len(options.testfile) > 1: + restart_test_policy = RestartTestPolicy(options.testfile[0], options.old_binary, fdbserver) for testfile in options.testfile: tmp = list(pargs) - # old_binary is not under test, so don't run under valgrind valgrind_args = [] - if first and options.old_binary is not None and len(options.testfile) > 1: - _logger.info("Run old binary at {}".format(options.old_binary)) - tmp[0] = options.old_binary - elif options.use_valgrind: + if restart_test_policy is not None: + if first: + tmp[0] = restart_test_policy.first_binary() + else: + tmp[0] = restart_test_policy.second_binary() + # old_binary is not under test, so don't run under valgrind + if options.use_valgrind and tmp[0] == fdbserver: valgrind_args = ['valgrind', '--error-exitcode=99', '--'] if not first: tmp.append('-R') diff --git a/tests/rare/RedwoodCorrectnessBTree.toml b/tests/rare/RedwoodCorrectnessBTree.toml index fea0577ee7..c39098e4cc 100644 --- a/tests/rare/RedwoodCorrectnessBTree.toml +++ b/tests/rare/RedwoodCorrectnessBTree.toml @@ -6,4 +6,4 @@ startDelay = 0 [[test.workload]] testName = 'UnitTests' maxTestCases = 0 - testsMatching = '!/redwood/correctness/btree' + testsMatching = '/redwood/correctness/btree' diff --git a/tests/restarting/to_6.3.10/CycleTestRestart-1.txt b/tests/restarting/to_6.3.10/CycleTestRestart-1.txt index 59e764c697..fe2a95fd46 100644 --- a/tests/restarting/to_6.3.10/CycleTestRestart-1.txt +++ b/tests/restarting/to_6.3.10/CycleTestRestart-1.txt @@ -1,4 +1,5 @@ -storageEngineExcludeType=-1 +storageEngineExcludeTypes=-1,-2 +maxTLogVersion=6 testTitle=Clogged clearAfterTest=false testName=Cycle diff --git a/tests/restarting/to_6.3.10/CycleTestRestart-2.txt b/tests/restarting/to_6.3.10/CycleTestRestart-2.txt index ecd3c77b52..8af5b92392 100644 --- a/tests/restarting/to_6.3.10/CycleTestRestart-2.txt +++ b/tests/restarting/to_6.3.10/CycleTestRestart-2.txt @@ -1,4 +1,5 @@ -storageEngineExcludeType=-1 +storageEngineExcludeTypes=-1,-2 +maxTLogVersion=6 testTitle=Clogged runSetup=false testName=Cycle