diff --git a/.gitignore b/.gitignore index 2b74cc1f7c..f555965fab 100644 --- a/.gitignore +++ b/.gitignore @@ -7,7 +7,7 @@ bindings/java/foundationdb-client*.jar bindings/java/foundationdb-tests*.jar bindings/java/fdb-java-*-sources.jar packaging/msi/FDBInstaller.msi - +builds/ # Generated source, build, and packaging files *.g.cpp *.g.h diff --git a/CMakeLists.txt b/CMakeLists.txt index f6e85984f1..08df8edfe0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,7 @@ # limitations under the License. cmake_minimum_required(VERSION 3.13) project(foundationdb - VERSION 7.0.0 + VERSION 7.1.0 DESCRIPTION "FoundationDB is a scalable, fault-tolerant, ordered key-value store with full ACID transactions." HOMEPAGE_URL "http://www.foundationdb.org/" LANGUAGES C CXX ASM) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e599780e37..525e80a9d9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -36,7 +36,7 @@ Members of the Apple FoundationDB team are part of the core committers helping r ## Contributing ### Opening a Pull Request -We love pull requests! For minor changes, feel free to open up a PR directly. For larger feature development and any changes that may require community discussion, we ask that you discuss your ideas on the [community forums](https://forums.foundationdb.org) prior to opening a PR, and then reference that thread within your PR comment. +We love pull requests! For minor changes, feel free to open up a PR directly. For larger feature development and any changes that may require community discussion, we ask that you discuss your ideas on the [community forums](https://forums.foundationdb.org) prior to opening a PR, and then reference that thread within your PR comment. Please refer to [FoundationDB Commit Process](https://github.com/apple/foundationdb/wiki/FoundationDB-Commit-Process) for more detailed guidelines. CI will be run automatically for core committers, and for community PRs it will be initiated by the request of a core committer. Tests can also be run locally via `ctest`, and core committers can run additional validation on pull requests prior to merging them. diff --git a/README.md b/README.md index 44b451c135..9e0ddb78a5 100755 --- a/README.md +++ b/README.md @@ -157,11 +157,11 @@ The build under MacOS will work the same way as on Linux. To get boost and ninja cmake -G Ninja ``` -To generate a installable package, you can use cpack: +To generate a installable package, ```sh ninja -cpack -G productbuild +$SRCDIR/packaging/osx/buildpkg.sh . $SRCDIR ``` ### Windows @@ -171,7 +171,7 @@ that Visual Studio is used to compile. 1. Install Visual Studio 2017 (Community Edition is tested) 1. Install cmake Version 3.12 or higher [CMake](https://cmake.org/) -1. Download version 1.72 of [Boost](https://dl.bintray.com/boostorg/release/1.72.0/source/boost_1_72_0.tar.bz2) +1. Download version 1.72 of [Boost](https://boostorg.jfrog.io/artifactory/main/release/1.72.0/source/boost_1_72_0.tar.bz2) 1. Unpack boost (you don't need to compile it) 1. Install [Mono](http://www.mono-project.com/download/stable/) 1. (Optional) Install a [JDK](http://www.oracle.com/technetwork/java/javase/downloads/index.html). FoundationDB currently builds with Java 8 diff --git a/bindings/CMakeLists.txt b/bindings/CMakeLists.txt index e363695ac2..378ea504b1 100644 --- a/bindings/CMakeLists.txt +++ b/bindings/CMakeLists.txt @@ -1,6 +1,6 @@ -add_subdirectory(c) if(NOT OPEN_FOR_IDE) # flow bindings currently doesn't support that + add_subdirectory(c) add_subdirectory(flow) endif() add_subdirectory(python) diff --git a/bindings/bindingtester/__init__.py b/bindings/bindingtester/__init__.py index f8ad0030e2..17d06cf4fe 100644 --- a/bindings/bindingtester/__init__.py +++ b/bindings/bindingtester/__init__.py @@ -26,7 +26,7 @@ sys.path[:0] = [os.path.join(os.path.dirname(__file__), '..', '..', 'bindings', import util -FDB_API_VERSION = 700 +FDB_API_VERSION = 710 LOGGING = { 'version': 1, diff --git a/bindings/bindingtester/bindingtester.py b/bindings/bindingtester/bindingtester.py index 58db70f5db..9c178a09d5 100755 --- a/bindings/bindingtester/bindingtester.py +++ b/bindings/bindingtester/bindingtester.py @@ -157,7 +157,7 @@ def choose_api_version(selected_api_version, tester_min_version, tester_max_vers api_version = min_version elif random.random() < 0.9: api_version = random.choice([v for v in [13, 14, 16, 21, 22, 23, 100, 200, 300, 400, 410, 420, 430, - 440, 450, 460, 500, 510, 520, 600, 610, 620, 630, 700] if v >= min_version and v <= max_version]) + 440, 450, 460, 500, 510, 520, 600, 610, 620, 630, 700, 710] if v >= min_version and v <= max_version]) else: api_version = random.randint(min_version, max_version) diff --git a/bindings/bindingtester/known_testers.py b/bindings/bindingtester/known_testers.py index e1522039db..0fe5ad638f 100644 --- a/bindings/bindingtester/known_testers.py +++ b/bindings/bindingtester/known_testers.py @@ -20,7 +20,7 @@ import os -MAX_API_VERSION = 700 +MAX_API_VERSION = 710 COMMON_TYPES = ['null', 'bytes', 'string', 'int', 'uuid', 'bool', 'float', 'double', 'tuple'] ALL_TYPES = COMMON_TYPES + ['versionstamp'] diff --git a/bindings/bindingtester/tests/scripted.py b/bindings/bindingtester/tests/scripted.py index c113ebc07f..c250b9d8af 100644 --- a/bindings/bindingtester/tests/scripted.py +++ b/bindings/bindingtester/tests/scripted.py @@ -34,7 +34,7 @@ fdb.api_version(FDB_API_VERSION) class ScriptedTest(Test): - TEST_API_VERSION = 700 + TEST_API_VERSION = 710 def __init__(self, subspace): super(ScriptedTest, self).__init__(subspace, ScriptedTest.TEST_API_VERSION, ScriptedTest.TEST_API_VERSION) diff --git a/bindings/c/fdb_c.cpp b/bindings/c/fdb_c.cpp index bf6af3aab7..16fbddf1c9 100644 --- a/bindings/c/fdb_c.cpp +++ b/bindings/c/fdb_c.cpp @@ -19,10 +19,11 @@ */ #include -#define FDB_API_VERSION 700 +#define FDB_API_VERSION 710 #define FDB_INCLUDE_LEGACY_TYPES #include "fdbclient/MultiVersionTransaction.h" +#include "fdbclient/MultiVersionAssignmentVars.h" #include "foundationdb/fdb_c.h" int g_api_version = 0; @@ -364,6 +365,22 @@ extern "C" DLLEXPORT double fdb_database_get_main_thread_busyness(FDBDatabase* d return DB(d)->getMainThreadBusyness(); } +// Returns the protocol version reported by the coordinator this client is connected to +// If an expected version is non-zero, the future won't return until the protocol version is different than expected +// Note: this will never return if the server is running a protocol from FDB 5.0 or older +extern "C" DLLEXPORT FDBFuture* fdb_database_get_server_protocol(FDBDatabase* db, uint64_t expected_version) { + Optional expected; + if (expected_version > 0) { + expected = ProtocolVersion(expected_version); + } + + return ( + FDBFuture*)(mapThreadFuture(DB(db)->getServerProtocol(expected), [](ErrorOr result) { + return result.map([](ProtocolVersion pv) { return pv.versionWithFlags(); }); + }).extractPtr()); +} + extern "C" DLLEXPORT void fdb_transaction_destroy(FDBTransaction* tr) { try { TXN(tr)->delref(); @@ -583,10 +600,6 @@ extern "C" DLLEXPORT FDBFuture* fdb_transaction_get_approximate_size(FDBTransact return (FDBFuture*)TXN(tr)->getApproximateSize().extractPtr(); } -extern "C" DLLEXPORT FDBFuture* fdb_get_server_protocol(const char* clusterFilePath) { - return (FDBFuture*)(API->getServerProtocol(clusterFilePath ? clusterFilePath : "").extractPtr()); -} - extern "C" DLLEXPORT FDBFuture* fdb_transaction_get_versionstamp(FDBTransaction* tr) { return (FDBFuture*)(TXN(tr)->getVersionstamp().extractPtr()); } diff --git a/bindings/c/foundationdb/fdb_c.h b/bindings/c/foundationdb/fdb_c.h index 2086cbd775..81bf10d8a8 100644 --- a/bindings/c/foundationdb/fdb_c.h +++ b/bindings/c/foundationdb/fdb_c.h @@ -27,10 +27,10 @@ #endif #if !defined(FDB_API_VERSION) -#error You must #define FDB_API_VERSION prior to including fdb_c.h (current version is 700) +#error You must #define FDB_API_VERSION prior to including fdb_c.h (current version is 710) #elif FDB_API_VERSION < 13 #error API version no longer supported (upgrade to 13) -#elif FDB_API_VERSION > 700 +#elif FDB_API_VERSION > 710 #error Requested API version requires a newer version of this header #endif @@ -97,7 +97,7 @@ typedef struct key { const uint8_t* key; int key_length; } FDBKey; -#if FDB_API_VERSION >= 700 +#if FDB_API_VERSION >= 710 typedef struct keyvalue { const uint8_t* key; int key_length; @@ -189,6 +189,8 @@ DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_database_create_snapshot(FDBDatabase DLLEXPORT WARN_UNUSED_RESULT double fdb_database_get_main_thread_busyness(FDBDatabase* db); +DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_database_get_server_protocol(FDBDatabase* db, uint64_t expected_version); + DLLEXPORT void fdb_transaction_destroy(FDBTransaction* tr); DLLEXPORT void fdb_transaction_cancel(FDBTransaction* tr); @@ -281,8 +283,6 @@ DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_transaction_get_committed_version(F */ DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_get_approximate_size(FDBTransaction* tr); -DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_get_server_protocol(const char* clusterFilePath); - DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_get_versionstamp(FDBTransaction* tr); DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_on_error(FDBTransaction* tr, fdb_error_t error); diff --git a/bindings/c/test/fdb_c90_test.c b/bindings/c/test/fdb_c90_test.c index 1569d98250..bbfb7f6dbf 100644 --- a/bindings/c/test/fdb_c90_test.c +++ b/bindings/c/test/fdb_c90_test.c @@ -1,9 +1,9 @@ -#define FDB_API_VERSION 700 +#define FDB_API_VERSION 710 #include int main(int argc, char* argv[]) { (void)argc; (void)argv; - fdb_select_api_version(700); + fdb_select_api_version(710); return 0; } diff --git a/bindings/c/test/mako/mako.c b/bindings/c/test/mako/mako.c index 5ed7ab9a50..ed24ba5a39 100644 --- a/bindings/c/test/mako/mako.c +++ b/bindings/c/test/mako/mako.c @@ -1172,6 +1172,14 @@ int worker_process_main(mako_args_t* args, int worker_id, mako_shmhdr_t* shm, pi #endif } + /* Set client Log group */ + if (strlen(args->log_group) != 0) { + err = fdb_network_set_option(FDB_NET_OPTION_TRACE_LOG_GROUP, (uint8_t*)args->log_group, strlen(args->log_group)); + if (err) { + fprintf(stderr, "ERROR: fdb_network_set_option(FDB_NET_OPTION_TRACE_LOG_GROUP): %s\n", fdb_get_error(err)); + } + } + /* enable tracing if specified */ if (args->trace) { fprintf(debugme, @@ -1345,6 +1353,7 @@ int init_args(mako_args_t* args) { args->verbose = 1; args->flatbuffers = 0; /* internal */ args->knobs[0] = '\0'; + args->log_group[0] = '\0'; args->trace = 0; args->tracepath[0] = '\0'; args->traceformat = 0; /* default to client's default (XML) */ @@ -1505,6 +1514,7 @@ void usage() { printf("%-24s %s\n", "-m, --mode=MODE", "Specify the mode (build, run, clean)"); printf("%-24s %s\n", "-z, --zipf", "Use zipfian distribution instead of uniform distribution"); printf("%-24s %s\n", " --commitget", "Commit GETs"); + printf("%-24s %s\n", " --loggroup=LOGGROUP", "Set client log group"); printf("%-24s %s\n", " --trace", "Enable tracing"); printf("%-24s %s\n", " --tracepath=PATH", "Set trace file path"); printf("%-24s %s\n", " --trace_format ", "Set trace format (Default: json)"); @@ -1546,6 +1556,7 @@ int parse_args(int argc, char* argv[], mako_args_t* args) { { "verbose", required_argument, NULL, 'v' }, { "mode", required_argument, NULL, 'm' }, { "knobs", required_argument, NULL, ARG_KNOBS }, + { "loggroup", required_argument, NULL, ARG_LOGGROUP }, { "tracepath", required_argument, NULL, ARG_TRACEPATH }, { "trace_format", required_argument, NULL, ARG_TRACEFORMAT }, { "streaming", required_argument, NULL, ARG_STREAMING_MODE }, @@ -1656,6 +1667,9 @@ int parse_args(int argc, char* argv[], mako_args_t* args) { case ARG_KNOBS: memcpy(args->knobs, optarg, strlen(optarg) + 1); break; + case ARG_LOGGROUP: + memcpy(args->log_group, optarg, strlen(optarg) + 1); + break; case ARG_TRACE: args->trace = 1; break; diff --git a/bindings/c/test/mako/mako.h b/bindings/c/test/mako/mako.h index c065b44c13..214e3e6fc6 100644 --- a/bindings/c/test/mako/mako.h +++ b/bindings/c/test/mako/mako.h @@ -3,7 +3,7 @@ #pragma once #ifndef FDB_API_VERSION -#define FDB_API_VERSION 700 +#define FDB_API_VERSION 710 #endif #include @@ -68,6 +68,7 @@ enum Arguments { ARG_VERSION, ARG_KNOBS, ARG_FLATBUFFERS, + ARG_LOGGROUP, ARG_TRACE, ARG_TRACEPATH, ARG_TRACEFORMAT, @@ -97,6 +98,7 @@ typedef struct { int ops[MAX_OP][3]; } mako_txnspec_t; +#define LOGGROUP_MAX 256 #define KNOB_MAX 256 #define TAGPREFIXLENGTH_MAX 8 @@ -122,6 +124,7 @@ typedef struct { int verbose; mako_txnspec_t txnspec; char cluster_file[PATH_MAX]; + char log_group[LOGGROUP_MAX]; int trace; char tracepath[PATH_MAX]; int traceformat; /* 0 - XML, 1 - JSON */ diff --git a/bindings/c/test/performance_test.c b/bindings/c/test/performance_test.c index d2f8655b87..f73f673bcf 100644 --- a/bindings/c/test/performance_test.c +++ b/bindings/c/test/performance_test.c @@ -641,7 +641,7 @@ void runTests(struct ResultSet* rs) { int main(int argc, char** argv) { srand(time(NULL)); struct ResultSet* rs = newResultSet(); - checkError(fdb_select_api_version(700), "select API version", rs); + checkError(fdb_select_api_version(710), "select API version", rs); printf("Running performance test at client version: %s\n", fdb_get_client_version()); valueStr = (uint8_t*)malloc((sizeof(uint8_t)) * valueSize); diff --git a/bindings/c/test/ryw_benchmark.c b/bindings/c/test/ryw_benchmark.c index 8021a1fc9d..98f92208c0 100644 --- a/bindings/c/test/ryw_benchmark.c +++ b/bindings/c/test/ryw_benchmark.c @@ -285,7 +285,7 @@ void runTests(struct ResultSet* rs) { int main(int argc, char** argv) { srand(time(NULL)); struct ResultSet* rs = newResultSet(); - checkError(fdb_select_api_version(700), "select API version", rs); + checkError(fdb_select_api_version(710), "select API version", rs); printf("Running RYW Benchmark test at client version: %s\n", fdb_get_client_version()); keys = generateKeys(numKeys, keySize); diff --git a/bindings/c/test/test.h b/bindings/c/test/test.h index 1e0622dd3a..0b79e232c6 100644 --- a/bindings/c/test/test.h +++ b/bindings/c/test/test.h @@ -29,7 +29,7 @@ #include #ifndef FDB_API_VERSION -#define FDB_API_VERSION 700 +#define FDB_API_VERSION 710 #endif #include diff --git a/bindings/c/test/txn_size_test.c b/bindings/c/test/txn_size_test.c index ca0261edf2..f1c90cd720 100644 --- a/bindings/c/test/txn_size_test.c +++ b/bindings/c/test/txn_size_test.c @@ -97,7 +97,7 @@ void runTests(struct ResultSet* rs) { int main(int argc, char** argv) { srand(time(NULL)); struct ResultSet* rs = newResultSet(); - checkError(fdb_select_api_version(700), "select API version", rs); + checkError(fdb_select_api_version(710), "select API version", rs); printf("Running performance test at client version: %s\n", fdb_get_client_version()); keys = generateKeys(numKeys, KEY_SIZE); diff --git a/bindings/c/test/unit/fdb_api.hpp b/bindings/c/test/unit/fdb_api.hpp index fc4b3e8e6b..17f25d55ee 100644 --- a/bindings/c/test/unit/fdb_api.hpp +++ b/bindings/c/test/unit/fdb_api.hpp @@ -39,7 +39,7 @@ #pragma once -#define FDB_API_VERSION 700 +#define FDB_API_VERSION 710 #include #include diff --git a/bindings/c/test/unit/setup_tests.cpp b/bindings/c/test/unit/setup_tests.cpp index a5109b68f0..602af99845 100644 --- a/bindings/c/test/unit/setup_tests.cpp +++ b/bindings/c/test/unit/setup_tests.cpp @@ -20,7 +20,7 @@ // Unit tests for API setup, network initialization functions from the FDB C API. -#define FDB_API_VERSION 700 +#define FDB_API_VERSION 710 #include #include #include @@ -42,13 +42,13 @@ TEST_CASE("setup") { CHECK(err); // Select current API version - fdb_check(fdb_select_api_version(700)); + fdb_check(fdb_select_api_version(710)); // Error to call again after a successful return - err = fdb_select_api_version(700); + err = fdb_select_api_version(710); CHECK(err); - CHECK(fdb_get_max_api_version() >= 700); + CHECK(fdb_get_max_api_version() >= 710); fdb_check(fdb_setup_network()); // Calling a second time should fail diff --git a/bindings/c/test/unit/unit_tests.cpp b/bindings/c/test/unit/unit_tests.cpp index f3f97476c2..703b1273dd 100644 --- a/bindings/c/test/unit/unit_tests.cpp +++ b/bindings/c/test/unit/unit_tests.cpp @@ -20,7 +20,7 @@ // Unit tests for the FoundationDB C API. -#define FDB_API_VERSION 700 +#define FDB_API_VERSION 710 #include #include #include @@ -263,13 +263,15 @@ TEST_CASE("fdb_future_set_callback") { &context)); fdb_error_t err = wait_future(f1); + + context.event.wait(); // Wait until callback is called + if (err) { fdb::EmptyFuture f2 = tr.on_error(err); fdb_check(wait_future(f2)); continue; } - context.event.wait(); break; } } @@ -515,10 +517,10 @@ TEST_CASE("write system key") { fdb::Transaction tr(db); std::string syskey("\xff\x02"); - fdb_check(tr.set_option(FDB_TR_OPTION_ACCESS_SYSTEM_KEYS, nullptr, 0)); - tr.set(syskey, "bar"); while (1) { + fdb_check(tr.set_option(FDB_TR_OPTION_ACCESS_SYSTEM_KEYS, nullptr, 0)); + tr.set(syskey, "bar"); fdb::EmptyFuture f1 = tr.commit(); fdb_error_t err = wait_future(f1); @@ -949,16 +951,25 @@ TEST_CASE("fdb_transaction_clear") { } TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_ADD") { - insert_data(db, create_data({ { "foo", "a" } })); + insert_data(db, create_data({ { "foo", "\x00" } })); fdb::Transaction tr(db); int8_t param = 1; + int potentialCommitCount = 0; while (1) { tr.atomic_op(key("foo"), (const uint8_t*)¶m, sizeof(param), FDB_MUTATION_TYPE_ADD); + if (potentialCommitCount + 1 == 256) { + // Trying to commit again might overflow the one unsigned byte we're looking at + break; + } + ++potentialCommitCount; fdb::EmptyFuture f1 = tr.commit(); fdb_error_t err = wait_future(f1); if (err) { + if (fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE_NOT_COMMITTED, err)) { + --potentialCommitCount; + } fdb::EmptyFuture f2 = tr.on_error(err); fdb_check(wait_future(f2)); continue; @@ -969,7 +980,8 @@ TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_ADD") { auto value = get_value(key("foo"), /* snapshot */ false, {}); REQUIRE(value.has_value()); CHECK(value->size() == 1); - CHECK(value->data()[0] == 'b'); // incrementing 'a' results in 'b' + CHECK(uint8_t(value->data()[0]) > 0); + CHECK(uint8_t(value->data()[0]) <= potentialCommitCount); } TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_BIT_AND") { @@ -1139,14 +1151,19 @@ TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_BIT_XOR") { fdb::Transaction tr(db); char param[] = { 'a', 'd' }; + int potentialCommitCount = 0; while (1) { tr.atomic_op(key("foo"), (const uint8_t*)"b", 1, FDB_MUTATION_TYPE_BIT_XOR); tr.atomic_op(key("bar"), (const uint8_t*)param, 2, FDB_MUTATION_TYPE_BIT_XOR); tr.atomic_op(key("baz"), (const uint8_t*)"d", 1, FDB_MUTATION_TYPE_BIT_XOR); + ++potentialCommitCount; fdb::EmptyFuture f1 = tr.commit(); fdb_error_t err = wait_future(f1); if (err) { + if (fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE_NOT_COMMITTED, err)) { + --potentialCommitCount; + } fdb::EmptyFuture f2 = tr.on_error(err); fdb_check(wait_future(f2)); continue; @@ -1154,6 +1171,11 @@ TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_BIT_XOR") { break; } + if (potentialCommitCount != 1) { + MESSAGE("Transaction may not have committed exactly once. Suppressing assertions"); + return; + } + auto value = get_value(key("foo"), /* snapshot */ false, {}); REQUIRE(value.has_value()); CHECK(value->size() == 1); @@ -1204,13 +1226,18 @@ TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_APPEND_IF_FITS") { insert_data(db, create_data({ { "foo", "f" } })); fdb::Transaction tr(db); + int potentialCommitCount = 0; while (1) { tr.atomic_op(key("foo"), (const uint8_t*)"db", 2, FDB_MUTATION_TYPE_APPEND_IF_FITS); tr.atomic_op(key("bar"), (const uint8_t*)"foundation", 10, FDB_MUTATION_TYPE_APPEND_IF_FITS); + ++potentialCommitCount; fdb::EmptyFuture f1 = tr.commit(); fdb_error_t err = wait_future(f1); if (err) { + if (fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE_NOT_COMMITTED, err)) { + --potentialCommitCount; + } fdb::EmptyFuture f2 = tr.on_error(err); fdb_check(wait_future(f2)); continue; @@ -1218,13 +1245,18 @@ TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_APPEND_IF_FITS") { break; } - auto value = get_value(key("foo"), /* snapshot */ false, {}); - REQUIRE(value.has_value()); - CHECK(value->compare("fdb") == 0); + auto value_foo = get_value(key("foo"), /* snapshot */ false, {}); + REQUIRE(value_foo.has_value()); - value = get_value(key("bar"), /* snapshot */ false, {}); - REQUIRE(value.has_value()); - CHECK(value->compare("foundation") == 0); + auto value_bar = get_value(key("bar"), /* snapshot */ false, {}); + REQUIRE(value_bar.has_value()); + + if (potentialCommitCount != 1) { + MESSAGE("Transaction may not have committed exactly once. Suppressing assertions"); + } else { + CHECK(value_foo.value() == "fdb"); + CHECK(value_bar.value() == "foundation"); + } } TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_MAX") { @@ -1513,17 +1545,17 @@ TEST_CASE("fdb_transaction_get_approximate_size") { } } -TEST_CASE("fdb_get_server_protocol") { +TEST_CASE("fdb_database_get_server_protocol") { // We don't really have any expectations other than "don't crash" here - FDBFuture* protocolFuture = fdb_get_server_protocol(clusterFilePath.c_str()); + FDBFuture* protocolFuture = fdb_database_get_server_protocol(db, 0); uint64_t out; fdb_check(fdb_future_block_until_ready(protocolFuture)); fdb_check(fdb_future_get_uint64(protocolFuture, &out)); fdb_future_destroy(protocolFuture); - // "Default" cluster file version - protocolFuture = fdb_get_server_protocol(nullptr); + // Passing in an expected version that's different than the cluster version + protocolFuture = fdb_database_get_server_protocol(db, 0x0FDB00A200090000LL); fdb_check(fdb_future_block_until_ready(protocolFuture)); fdb_check(fdb_future_get_uint64(protocolFuture, &out)); fdb_future_destroy(protocolFuture); @@ -1576,7 +1608,7 @@ TEST_CASE("fdb_transaction_watch max watches") { fdb_check(f1.set_callback( +[](FDBFuture* f, void* param) { fdb_error_t err = fdb_future_get_error(f); - if (err != 1101) { // operation_cancelled + if (err != /*operation_cancelled*/ 1101 && !fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE, err)) { CHECK(err == 1032); // too_many_watches } auto* event = static_cast*>(param); @@ -1587,7 +1619,7 @@ TEST_CASE("fdb_transaction_watch max watches") { fdb_check(f2.set_callback( +[](FDBFuture* f, void* param) { fdb_error_t err = fdb_future_get_error(f); - if (err != 1101) { // operation_cancelled + if (err != /*operation_cancelled*/ 1101 && !fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE, err)) { CHECK(err == 1032); // too_many_watches } auto* event = static_cast*>(param); @@ -1598,7 +1630,7 @@ TEST_CASE("fdb_transaction_watch max watches") { fdb_check(f3.set_callback( +[](FDBFuture* f, void* param) { fdb_error_t err = fdb_future_get_error(f); - if (err != 1101) { // operation_cancelled + if (err != /*operation_cancelled*/ 1101 && !fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE, err)) { CHECK(err == 1032); // too_many_watches } auto* event = static_cast*>(param); @@ -1609,7 +1641,7 @@ TEST_CASE("fdb_transaction_watch max watches") { fdb_check(f4.set_callback( +[](FDBFuture* f, void* param) { fdb_error_t err = fdb_future_get_error(f); - if (err != 1101) { // operation_cancelled + if (err != /*operation_cancelled*/ 1101 && !fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE, err)) { CHECK(err == 1032); // too_many_watches } auto* event = static_cast*>(param); @@ -1671,7 +1703,7 @@ TEST_CASE("fdb_transaction_cancel") { // ... until the transaction has been reset. tr.reset(); fdb::ValueFuture f2 = tr.get("foo", /* snapshot */ false); - fdb_check(wait_future(f2)); + CHECK(wait_future(f2) != 1025); // transaction_cancelled } TEST_CASE("fdb_transaction_add_conflict_range") { @@ -2146,22 +2178,29 @@ TEST_CASE("monitor_network_busyness") { } int main(int argc, char** argv) { - if (argc != 3 && argc != 4) { + if (argc < 3) { std::cout << "Unit tests for the FoundationDB C API.\n" - << "Usage: fdb_c_unit_tests /path/to/cluster_file key_prefix [externalClient]" << std::endl; + << "Usage: fdb_c_unit_tests /path/to/cluster_file key_prefix [externalClient] [doctest args]" + << std::endl; return 1; } - fdb_check(fdb_select_api_version(700)); - if (argc == 4) { + fdb_check(fdb_select_api_version(710)); + if (argc >= 4) { std::string externalClientLibrary = argv[3]; - fdb_check(fdb_network_set_option( - FDBNetworkOption::FDB_NET_OPTION_DISABLE_LOCAL_CLIENT, reinterpret_cast(""), 0)); - fdb_check(fdb_network_set_option(FDBNetworkOption::FDB_NET_OPTION_EXTERNAL_CLIENT_LIBRARY, - reinterpret_cast(externalClientLibrary.c_str()), - externalClientLibrary.size())); + if (externalClientLibrary.substr(0, 2) != "--") { + fdb_check(fdb_network_set_option( + FDBNetworkOption::FDB_NET_OPTION_DISABLE_LOCAL_CLIENT, reinterpret_cast(""), 0)); + fdb_check(fdb_network_set_option(FDBNetworkOption::FDB_NET_OPTION_EXTERNAL_CLIENT_LIBRARY, + reinterpret_cast(externalClientLibrary.c_str()), + externalClientLibrary.size())); + } } + /* fdb_check(fdb_network_set_option( */ + /* FDBNetworkOption::FDB_NET_OPTION_CLIENT_BUGGIFY_ENABLE, reinterpret_cast(""), 0)); */ + doctest::Context context; + context.applyCommandLine(argc, argv); fdb_check(fdb_setup_network()); std::thread network_thread{ &fdb_run_network }; diff --git a/bindings/c/test/workloads/SimpleWorkload.cpp b/bindings/c/test/workloads/SimpleWorkload.cpp index 6d1adbefdf..2be433b9c1 100644 --- a/bindings/c/test/workloads/SimpleWorkload.cpp +++ b/bindings/c/test/workloads/SimpleWorkload.cpp @@ -18,7 +18,7 @@ * limitations under the License. */ -#define FDB_API_VERSION 700 +#define FDB_API_VERSION 710 #include "foundationdb/fdb_c.h" #undef DLLEXPORT #include "workloads.h" @@ -266,7 +266,7 @@ struct SimpleWorkload : FDBWorkload { insertsPerTx = context->getOption("insertsPerTx", 100ul); opsPerTx = context->getOption("opsPerTx", 100ul); runFor = context->getOption("runFor", 10.0); - auto err = fdb_select_api_version(700); + auto err = fdb_select_api_version(710); if (err) { context->trace( FDBSeverity::Info, "SelectAPIVersionFailed", { { "Error", std::string(fdb_get_error(err)) } }); diff --git a/bindings/flow/fdb_flow.actor.cpp b/bindings/flow/fdb_flow.actor.cpp index fc753a6fbe..90e1a68621 100644 --- a/bindings/flow/fdb_flow.actor.cpp +++ b/bindings/flow/fdb_flow.actor.cpp @@ -37,7 +37,7 @@ THREAD_FUNC networkThread(void* fdb) { } ACTOR Future _test() { - API* fdb = FDB::API::selectAPIVersion(700); + API* fdb = FDB::API::selectAPIVersion(710); auto db = fdb->createDatabase(); state Reference tr = db->createTransaction(); @@ -81,7 +81,7 @@ ACTOR Future _test() { } void fdb_flow_test() { - API* fdb = FDB::API::selectAPIVersion(700); + API* fdb = FDB::API::selectAPIVersion(710); fdb->setupNetwork(); startThread(networkThread, fdb); diff --git a/bindings/flow/fdb_flow.h b/bindings/flow/fdb_flow.h index 28eab34e3c..f1b87c16ba 100644 --- a/bindings/flow/fdb_flow.h +++ b/bindings/flow/fdb_flow.h @@ -23,7 +23,7 @@ #include -#define FDB_API_VERSION 700 +#define FDB_API_VERSION 710 #include #undef DLLEXPORT diff --git a/bindings/flow/tester/Tester.actor.cpp b/bindings/flow/tester/Tester.actor.cpp index 8a5f5adc26..958ff1a0be 100644 --- a/bindings/flow/tester/Tester.actor.cpp +++ b/bindings/flow/tester/Tester.actor.cpp @@ -1863,7 +1863,7 @@ ACTOR void _test_versionstamp() { try { g_network = newNet2(TLSConfig()); - API* fdb = FDB::API::selectAPIVersion(700); + API* fdb = FDB::API::selectAPIVersion(710); fdb->setupNetwork(); startThread(networkThread, fdb); diff --git a/bindings/go/README.md b/bindings/go/README.md index 8619e1692a..87bf502d36 100644 --- a/bindings/go/README.md +++ b/bindings/go/README.md @@ -9,7 +9,7 @@ This package requires: - [Mono](http://www.mono-project.com/) (macOS or Linux) or [Visual Studio](https://www.visualstudio.com/) (Windows) (build-time only) - FoundationDB C API 2.0.x-6.1.x (part of the [FoundationDB client packages](https://apple.github.io/foundationdb/downloads.html#c)) -Use of this package requires the selection of a FoundationDB API version at runtime. This package currently supports FoundationDB API versions 200-700. +Use of this package requires the selection of a FoundationDB API version at runtime. This package currently supports FoundationDB API versions 200-710. To install this package, you can run the "fdb-go-install.sh" script (for versions 5.0.x and greater): diff --git a/bindings/go/src/fdb/cluster.go b/bindings/go/src/fdb/cluster.go index 5ab17b5273..b5556d93fd 100644 --- a/bindings/go/src/fdb/cluster.go +++ b/bindings/go/src/fdb/cluster.go @@ -22,7 +22,7 @@ package fdb -// #define FDB_API_VERSION 700 +// #define FDB_API_VERSION 710 // #include import "C" diff --git a/bindings/go/src/fdb/database.go b/bindings/go/src/fdb/database.go index 60f3f03d06..0e18ab908c 100644 --- a/bindings/go/src/fdb/database.go +++ b/bindings/go/src/fdb/database.go @@ -22,7 +22,7 @@ package fdb -// #define FDB_API_VERSION 700 +// #define FDB_API_VERSION 710 // #include import "C" diff --git a/bindings/go/src/fdb/doc.go b/bindings/go/src/fdb/doc.go index e1759701ff..2ecf99f200 100644 --- a/bindings/go/src/fdb/doc.go +++ b/bindings/go/src/fdb/doc.go @@ -46,7 +46,7 @@ A basic interaction with the FoundationDB API is demonstrated below: func main() { // Different API versions may expose different runtime behaviors. - fdb.MustAPIVersion(700) + fdb.MustAPIVersion(710) // Open the default database from the system cluster db := fdb.MustOpenDefault() diff --git a/bindings/go/src/fdb/errors.go b/bindings/go/src/fdb/errors.go index 9c9f75b566..9ce11ca150 100644 --- a/bindings/go/src/fdb/errors.go +++ b/bindings/go/src/fdb/errors.go @@ -22,7 +22,7 @@ package fdb -// #define FDB_API_VERSION 700 +// #define FDB_API_VERSION 710 // #include import "C" diff --git a/bindings/go/src/fdb/fdb.go b/bindings/go/src/fdb/fdb.go index bc05a05dba..662951be82 100644 --- a/bindings/go/src/fdb/fdb.go +++ b/bindings/go/src/fdb/fdb.go @@ -22,7 +22,7 @@ package fdb -// #define FDB_API_VERSION 700 +// #define FDB_API_VERSION 710 // #include // #include import "C" @@ -108,7 +108,7 @@ func (opt NetworkOptions) setOpt(code int, param []byte) error { // library, an error will be returned. APIVersion must be called prior to any // other functions in the fdb package. // -// Currently, this package supports API versions 200 through 700. +// Currently, this package supports API versions 200 through 710. // // Warning: When using the multi-version client API, setting an API version that // is not supported by a particular client library will prevent that client from @@ -116,7 +116,7 @@ func (opt NetworkOptions) setOpt(code int, param []byte) error { // the API version of your application after upgrading your client until the // cluster has also been upgraded. func APIVersion(version int) error { - headerVersion := 700 + headerVersion := 710 networkMutex.Lock() defer networkMutex.Unlock() @@ -128,7 +128,7 @@ func APIVersion(version int) error { return errAPIVersionAlreadySet } - if version < 200 || version > 700 { + if version < 200 || version > 710 { return errAPIVersionNotSupported } diff --git a/bindings/go/src/fdb/fdb_test.go b/bindings/go/src/fdb/fdb_test.go index e455dba473..d55a3a7d63 100644 --- a/bindings/go/src/fdb/fdb_test.go +++ b/bindings/go/src/fdb/fdb_test.go @@ -32,7 +32,7 @@ import ( func ExampleOpenDefault() { var e error - e = fdb.APIVersion(700) + e = fdb.APIVersion(710) if e != nil { fmt.Printf("Unable to set API version: %v\n", e) return @@ -52,7 +52,7 @@ func ExampleOpenDefault() { } func TestVersionstamp(t *testing.T) { - fdb.MustAPIVersion(700) + fdb.MustAPIVersion(710) db := fdb.MustOpenDefault() setVs := func(t fdb.Transactor, key fdb.Key) (fdb.FutureKey, error) { @@ -98,7 +98,7 @@ func TestVersionstamp(t *testing.T) { } func ExampleTransactor() { - fdb.MustAPIVersion(700) + fdb.MustAPIVersion(710) db := fdb.MustOpenDefault() setOne := func(t fdb.Transactor, key fdb.Key, value []byte) error { @@ -149,7 +149,7 @@ func ExampleTransactor() { } func ExampleReadTransactor() { - fdb.MustAPIVersion(700) + fdb.MustAPIVersion(710) db := fdb.MustOpenDefault() getOne := func(rt fdb.ReadTransactor, key fdb.Key) ([]byte, error) { @@ -202,7 +202,7 @@ func ExampleReadTransactor() { } func ExamplePrefixRange() { - fdb.MustAPIVersion(700) + fdb.MustAPIVersion(710) db := fdb.MustOpenDefault() tr, e := db.CreateTransaction() @@ -241,7 +241,7 @@ func ExamplePrefixRange() { } func ExampleRangeIterator() { - fdb.MustAPIVersion(700) + fdb.MustAPIVersion(710) db := fdb.MustOpenDefault() tr, e := db.CreateTransaction() diff --git a/bindings/go/src/fdb/futures.go b/bindings/go/src/fdb/futures.go index e51d5eaa8d..35115f8594 100644 --- a/bindings/go/src/fdb/futures.go +++ b/bindings/go/src/fdb/futures.go @@ -23,7 +23,7 @@ package fdb // #cgo LDFLAGS: -lfdb_c -lm -// #define FDB_API_VERSION 700 +// #define FDB_API_VERSION 710 // #include // #include // diff --git a/bindings/go/src/fdb/range.go b/bindings/go/src/fdb/range.go index 584f23cb2b..32155eae45 100644 --- a/bindings/go/src/fdb/range.go +++ b/bindings/go/src/fdb/range.go @@ -22,7 +22,7 @@ package fdb -// #define FDB_API_VERSION 700 +// #define FDB_API_VERSION 710 // #include import "C" diff --git a/bindings/go/src/fdb/transaction.go b/bindings/go/src/fdb/transaction.go index 9c64b06ac7..98bfa86c08 100644 --- a/bindings/go/src/fdb/transaction.go +++ b/bindings/go/src/fdb/transaction.go @@ -22,7 +22,7 @@ package fdb -// #define FDB_API_VERSION 700 +// #define FDB_API_VERSION 710 // #include import "C" diff --git a/bindings/java/CMakeLists.txt b/bindings/java/CMakeLists.txt index 2da8639b8d..09012cdf97 100644 --- a/bindings/java/CMakeLists.txt +++ b/bindings/java/CMakeLists.txt @@ -141,8 +141,6 @@ endif() target_include_directories(fdb_java PRIVATE ${JNI_INCLUDE_DIRS}) # libfdb_java.so is loaded by fdb-java.jar and doesn't need to depened on jvm shared libraries. target_link_libraries(fdb_java PRIVATE fdb_c) -set_target_properties(fdb_java PROPERTIES - LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib/${SYSTEM_NAME}/amd64/) if(APPLE) set_target_properties(fdb_java PROPERTIES SUFFIX ".jnilib") endif() @@ -217,7 +215,11 @@ if(NOT OPEN_FOR_IDE) elseif(APPLE) set(lib_destination "osx/x86_64") else() - set(lib_destination "linux/amd64") + if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") + set(lib_destination "linux/aarch64") + else() + set(lib_destination "linux/amd64") + endif() endif() set(lib_destination "${unpack_dir}/lib/${lib_destination}") set(jni_package "${CMAKE_BINARY_DIR}/packages/lib") diff --git a/bindings/java/JavaWorkload.cpp b/bindings/java/JavaWorkload.cpp index 7eaf9527b6..b2506965eb 100644 --- a/bindings/java/JavaWorkload.cpp +++ b/bindings/java/JavaWorkload.cpp @@ -19,7 +19,7 @@ */ #include -#define FDB_API_VERSION 700 +#define FDB_API_VERSION 710 #include #include @@ -375,7 +375,7 @@ struct JVM { jmethodID selectMethod = env->GetStaticMethodID(fdbClass, "selectAPIVersion", "(I)Lcom/apple/foundationdb/FDB;"); checkException(); - auto fdbInstance = env->CallStaticObjectMethod(fdbClass, selectMethod, jint(700)); + auto fdbInstance = env->CallStaticObjectMethod(fdbClass, selectMethod, jint(710)); checkException(); env->CallObjectMethod(fdbInstance, getMethod(fdbClass, "disableShutdownHook", "()V")); checkException(); diff --git a/bindings/java/fdbJNI.cpp b/bindings/java/fdbJNI.cpp index 06acae658e..587190d3a5 100644 --- a/bindings/java/fdbJNI.cpp +++ b/bindings/java/fdbJNI.cpp @@ -21,7 +21,7 @@ #include #include -#define FDB_API_VERSION 700 +#define FDB_API_VERSION 710 #include diff --git a/bindings/java/src/integration/com/apple/foundationdb/DirectoryTest.java b/bindings/java/src/integration/com/apple/foundationdb/DirectoryTest.java index ddddd20ad1..5634e7d741 100644 --- a/bindings/java/src/integration/com/apple/foundationdb/DirectoryTest.java +++ b/bindings/java/src/integration/com/apple/foundationdb/DirectoryTest.java @@ -42,7 +42,7 @@ import org.junit.jupiter.api.extension.ExtendWith; */ @ExtendWith(RequiresDatabase.class) class DirectoryTest { - private static final FDB fdb = FDB.selectAPIVersion(700); + private static final FDB fdb = FDB.selectAPIVersion(710); @Test void testCanCreateDirectory() throws Exception { diff --git a/bindings/java/src/integration/com/apple/foundationdb/RangeQueryIntegrationTest.java b/bindings/java/src/integration/com/apple/foundationdb/RangeQueryIntegrationTest.java index e7490fd038..8c9dbc049c 100644 --- a/bindings/java/src/integration/com/apple/foundationdb/RangeQueryIntegrationTest.java +++ b/bindings/java/src/integration/com/apple/foundationdb/RangeQueryIntegrationTest.java @@ -41,7 +41,7 @@ import org.junit.jupiter.api.extension.ExtendWith; */ @ExtendWith(RequiresDatabase.class) class RangeQueryIntegrationTest { - private static final FDB fdb = FDB.selectAPIVersion(700); + private static final FDB fdb = FDB.selectAPIVersion(710); @BeforeEach @AfterEach diff --git a/bindings/java/src/integration/com/apple/foundationdb/RequiresDatabase.java b/bindings/java/src/integration/com/apple/foundationdb/RequiresDatabase.java index 803a25ab1c..69537c8a8d 100644 --- a/bindings/java/src/integration/com/apple/foundationdb/RequiresDatabase.java +++ b/bindings/java/src/integration/com/apple/foundationdb/RequiresDatabase.java @@ -80,7 +80,7 @@ public class RequiresDatabase implements ExecutionCondition, BeforeAllCallback { * assume that if we are here, then canRunIntegrationTest() is returning true and we don't have to bother * checking it. */ - try (Database db = FDB.selectAPIVersion(700).open()) { + try (Database db = FDB.selectAPIVersion(710).open()) { db.run(tr -> { CompletableFuture future = tr.get("test".getBytes()); diff --git a/bindings/java/src/junit/com/apple/foundationdb/FDBLibraryRule.java b/bindings/java/src/junit/com/apple/foundationdb/FDBLibraryRule.java index c50899fef9..455cb9c4b6 100644 --- a/bindings/java/src/junit/com/apple/foundationdb/FDBLibraryRule.java +++ b/bindings/java/src/junit/com/apple/foundationdb/FDBLibraryRule.java @@ -37,7 +37,7 @@ public class FDBLibraryRule implements BeforeAllCallback { public FDBLibraryRule(int apiVersion) { this.apiVersion = apiVersion; } - public static FDBLibraryRule current() { return new FDBLibraryRule(700); } + public static FDBLibraryRule current() { return new FDBLibraryRule(710); } public static FDBLibraryRule v63() { return new FDBLibraryRule(630); } diff --git a/bindings/java/src/main/com/apple/foundationdb/FDB.java b/bindings/java/src/main/com/apple/foundationdb/FDB.java index 031a1e2472..1a54e108d5 100644 --- a/bindings/java/src/main/com/apple/foundationdb/FDB.java +++ b/bindings/java/src/main/com/apple/foundationdb/FDB.java @@ -35,7 +35,7 @@ import java.util.concurrent.atomic.AtomicInteger; * This call is required before using any other part of the API. The call allows * an error to be thrown at this point to prevent client code from accessing a later library * with incorrect assumptions from the current version. The API version documented here is version - * {@code 700}.

+ * {@code 710}.

* FoundationDB encapsulates multiple versions of its interface by requiring * the client to explicitly specify the version of the API it uses. The purpose * of this design is to allow you to upgrade the server, client libraries, or @@ -183,8 +183,8 @@ public class FDB { } if(version < 510) throw new IllegalArgumentException("API version not supported (minimum 510)"); - if(version > 700) - throw new IllegalArgumentException("API version not supported (maximum 700)"); + if(version > 710) + throw new IllegalArgumentException("API version not supported (maximum 710)"); Select_API_version(version); singleton = new FDB(version); diff --git a/bindings/java/src/main/com/apple/foundationdb/JNIUtil.java b/bindings/java/src/main/com/apple/foundationdb/JNIUtil.java index 8aa3d9f138..99c2f8a322 100644 --- a/bindings/java/src/main/com/apple/foundationdb/JNIUtil.java +++ b/bindings/java/src/main/com/apple/foundationdb/JNIUtil.java @@ -36,11 +36,7 @@ class JNIUtil { private static final String TEMPFILE_PREFIX = "fdbjni"; private static final String TEMPFILE_SUFFIX = ".library"; - private enum OS { - WIN32("windows", "amd64", false), - LINUX("linux", "amd64", true), - OSX("osx", "x86_64", true); - + private static class OS { private final String name; private final String arch; private final boolean canDeleteEager; @@ -171,13 +167,19 @@ class JNIUtil { private static OS getRunningOS() { String osname = System.getProperty("os.name").toLowerCase(); - if(osname.startsWith("windows")) - return OS.WIN32; - if(osname.startsWith("linux")) - return OS.LINUX; - if(osname.startsWith("mac") || osname.startsWith("darwin")) - return OS.OSX; - throw new IllegalStateException("Unknown or unsupported OS: " + osname); + String arch = System.getProperty("os.arch"); + if (!arch.equals("amd64") && !arch.equals("x86_64") && !arch.equals("aarch64")) { + throw new IllegalStateException("Unknown or unsupported arch: " + arch); + } + if (osname.startsWith("windows")) { + return new OS("windows", arch, /* canDeleteEager */ false); + } else if (osname.startsWith("linux")) { + return new OS("linux", arch, /* canDeleteEager */ true); + } else if (osname.startsWith("mac") || osname.startsWith("darwin")) { + return new OS("osx", arch, /* canDeleteEager */ true); + } else { + throw new IllegalStateException("Unknown or unsupported OS: " + osname); + } } private JNIUtil() {} diff --git a/bindings/java/src/main/overview.html.in b/bindings/java/src/main/overview.html.in index adaedd1a03..fe20448dfb 100644 --- a/bindings/java/src/main/overview.html.in +++ b/bindings/java/src/main/overview.html.in @@ -13,7 +13,7 @@ and then added to your classpath.

Getting started

To start using FoundationDB from Java, create an instance of the {@link com.apple.foundationdb.FDB FoundationDB API interface} with the version of the -API that you want to use (this release of the FoundationDB Java API supports versions between {@code 510} and {@code 700}). +API that you want to use (this release of the FoundationDB Java API supports versions between {@code 510} and {@code 710}). With this API object you can then open {@link com.apple.foundationdb.Cluster Cluster}s and {@link com.apple.foundationdb.Database Database}s and start using {@link com.apple.foundationdb.Transaction Transaction}s. @@ -29,7 +29,7 @@ import com.apple.foundationdb.tuple.Tuple; public class Example { public static void main(String[] args) { - FDB fdb = FDB.selectAPIVersion(700); + FDB fdb = FDB.selectAPIVersion(710); try(Database db = fdb.open()) { // Run an operation on the database diff --git a/bindings/java/src/test/com/apple/foundationdb/test/AbstractTester.java b/bindings/java/src/test/com/apple/foundationdb/test/AbstractTester.java index e27e80b082..8cb1230c2f 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/AbstractTester.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/AbstractTester.java @@ -27,7 +27,7 @@ import com.apple.foundationdb.Database; import com.apple.foundationdb.FDB; public abstract class AbstractTester { - public static final int API_VERSION = 700; + public static final int API_VERSION = 710; protected static final int NUM_RUNS = 25; protected static final Charset ASCII = Charset.forName("ASCII"); diff --git a/bindings/java/src/test/com/apple/foundationdb/test/BlockingBenchmark.java b/bindings/java/src/test/com/apple/foundationdb/test/BlockingBenchmark.java index 68f7d74a95..d9c8c20d23 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/BlockingBenchmark.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/BlockingBenchmark.java @@ -33,7 +33,7 @@ public class BlockingBenchmark { private static final int PARALLEL = 100; public static void main(String[] args) throws InterruptedException { - FDB fdb = FDB.selectAPIVersion(700); + FDB fdb = FDB.selectAPIVersion(710); // The cluster file DOES NOT need to be valid, although it must exist. // This is because the database is never really contacted in this test. diff --git a/bindings/java/src/test/com/apple/foundationdb/test/ConcurrentGetSetGet.java b/bindings/java/src/test/com/apple/foundationdb/test/ConcurrentGetSetGet.java index bddfd6f57d..046a39f66d 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/ConcurrentGetSetGet.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/ConcurrentGetSetGet.java @@ -48,7 +48,7 @@ public class ConcurrentGetSetGet { } public static void main(String[] args) { - try(Database database = FDB.selectAPIVersion(700).open()) { + try(Database database = FDB.selectAPIVersion(710).open()) { new ConcurrentGetSetGet().apply(database); } } diff --git a/bindings/java/src/test/com/apple/foundationdb/test/Example.java b/bindings/java/src/test/com/apple/foundationdb/test/Example.java index 44e9087b3e..80c35b5ca2 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/Example.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/Example.java @@ -26,7 +26,7 @@ import com.apple.foundationdb.tuple.Tuple; public class Example { public static void main(String[] args) { - FDB fdb = FDB.selectAPIVersion(700); + FDB fdb = FDB.selectAPIVersion(710); try(Database db = fdb.open()) { // Run an operation on the database diff --git a/bindings/java/src/test/com/apple/foundationdb/test/IterableTest.java b/bindings/java/src/test/com/apple/foundationdb/test/IterableTest.java index ce1f623f4c..a9a7a37b66 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/IterableTest.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/IterableTest.java @@ -31,7 +31,7 @@ public class IterableTest { public static void main(String[] args) throws InterruptedException { final int reps = 1000; try { - FDB fdb = FDB.selectAPIVersion(700); + FDB fdb = FDB.selectAPIVersion(710); try(Database db = fdb.open()) { runTests(reps, db); } diff --git a/bindings/java/src/test/com/apple/foundationdb/test/LocalityTests.java b/bindings/java/src/test/com/apple/foundationdb/test/LocalityTests.java index d049ac83f7..a14b466514 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/LocalityTests.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/LocalityTests.java @@ -34,7 +34,7 @@ import com.apple.foundationdb.tuple.ByteArrayUtil; public class LocalityTests { public static void main(String[] args) { - FDB fdb = FDB.selectAPIVersion(700); + FDB fdb = FDB.selectAPIVersion(710); try(Database database = fdb.open(args[0])) { try(Transaction tr = database.createTransaction()) { String[] keyAddresses = LocalityUtil.getAddressesForKey(tr, "a".getBytes()).join(); diff --git a/bindings/java/src/test/com/apple/foundationdb/test/ParallelRandomScan.java b/bindings/java/src/test/com/apple/foundationdb/test/ParallelRandomScan.java index 624566964a..a218a6460e 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/ParallelRandomScan.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/ParallelRandomScan.java @@ -43,7 +43,7 @@ public class ParallelRandomScan { private static final int PARALLELISM_STEP = 5; public static void main(String[] args) throws InterruptedException { - FDB api = FDB.selectAPIVersion(700); + FDB api = FDB.selectAPIVersion(710); try(Database database = api.open(args[0])) { for(int i = PARALLELISM_MIN; i <= PARALLELISM_MAX; i += PARALLELISM_STEP) { runTest(database, i, ROWS, DURATION_MS); diff --git a/bindings/java/src/test/com/apple/foundationdb/test/RangeTest.java b/bindings/java/src/test/com/apple/foundationdb/test/RangeTest.java index 4232a6d664..38eaf7b424 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/RangeTest.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/RangeTest.java @@ -34,7 +34,7 @@ import com.apple.foundationdb.Transaction; import com.apple.foundationdb.async.AsyncIterable; public class RangeTest { - private static final int API_VERSION = 700; + private static final int API_VERSION = 710; public static void main(String[] args) { System.out.println("About to use version " + API_VERSION); diff --git a/bindings/java/src/test/com/apple/foundationdb/test/SerialInsertion.java b/bindings/java/src/test/com/apple/foundationdb/test/SerialInsertion.java index c16599196c..90adea8ac9 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/SerialInsertion.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/SerialInsertion.java @@ -34,7 +34,7 @@ public class SerialInsertion { private static final int NODES = 1000000; public static void main(String[] args) { - FDB api = FDB.selectAPIVersion(700); + FDB api = FDB.selectAPIVersion(710); try(Database database = api.open()) { long start = System.currentTimeMillis(); diff --git a/bindings/java/src/test/com/apple/foundationdb/test/SerialIteration.java b/bindings/java/src/test/com/apple/foundationdb/test/SerialIteration.java index db63999daa..8e4578d97f 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/SerialIteration.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/SerialIteration.java @@ -39,7 +39,7 @@ public class SerialIteration { private static final int THREAD_COUNT = 1; public static void main(String[] args) throws InterruptedException { - FDB api = FDB.selectAPIVersion(700); + FDB api = FDB.selectAPIVersion(710); try(Database database = api.open(args[0])) { for(int i = 1; i <= THREAD_COUNT; i++) { runThreadedTest(database, i); diff --git a/bindings/java/src/test/com/apple/foundationdb/test/SerialTest.java b/bindings/java/src/test/com/apple/foundationdb/test/SerialTest.java index df084d564f..5b89379350 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/SerialTest.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/SerialTest.java @@ -30,7 +30,7 @@ public class SerialTest { public static void main(String[] args) throws InterruptedException { final int reps = 1000; try { - FDB fdb = FDB.selectAPIVersion(700); + FDB fdb = FDB.selectAPIVersion(710); try(Database db = fdb.open()) { runTests(reps, db); } diff --git a/bindings/java/src/test/com/apple/foundationdb/test/SnapshotTransactionTest.java b/bindings/java/src/test/com/apple/foundationdb/test/SnapshotTransactionTest.java index 78de1ae3db..cb58c3e72d 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/SnapshotTransactionTest.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/SnapshotTransactionTest.java @@ -39,7 +39,7 @@ public class SnapshotTransactionTest { private static final Subspace SUBSPACE = new Subspace(Tuple.from("test", "conflict_ranges")); public static void main(String[] args) { - FDB fdb = FDB.selectAPIVersion(700); + FDB fdb = FDB.selectAPIVersion(710); try(Database db = fdb.open()) { snapshotReadShouldNotConflict(db); snapshotShouldNotAddConflictRange(db); diff --git a/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java b/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java index c3ad8313be..2145b88966 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java @@ -37,7 +37,7 @@ public class TupleTest { public static void main(String[] args) throws NoSuchFieldException { final int reps = 1000; try { - FDB fdb = FDB.selectAPIVersion(700); + FDB fdb = FDB.selectAPIVersion(710); try(Database db = fdb.open()) { runTests(reps, db); } diff --git a/bindings/java/src/test/com/apple/foundationdb/test/VersionstampSmokeTest.java b/bindings/java/src/test/com/apple/foundationdb/test/VersionstampSmokeTest.java index e50bc9c031..6ed02c008b 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/VersionstampSmokeTest.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/VersionstampSmokeTest.java @@ -32,7 +32,7 @@ import com.apple.foundationdb.tuple.Versionstamp; public class VersionstampSmokeTest { public static void main(String[] args) { - FDB fdb = FDB.selectAPIVersion(700); + FDB fdb = FDB.selectAPIVersion(710); try(Database db = fdb.open()) { db.run(tr -> { tr.clear(Tuple.from("prefix").range()); diff --git a/bindings/java/src/test/com/apple/foundationdb/test/WatchTest.java b/bindings/java/src/test/com/apple/foundationdb/test/WatchTest.java index 14c0aa1d43..eb675d1518 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/WatchTest.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/WatchTest.java @@ -34,7 +34,7 @@ import com.apple.foundationdb.Transaction; public class WatchTest { public static void main(String[] args) { - FDB fdb = FDB.selectAPIVersion(700); + FDB fdb = FDB.selectAPIVersion(710); try(Database database = fdb.open(args[0])) { database.options().setLocationCacheSize(42); try(Transaction tr = database.createTransaction()) { diff --git a/bindings/python/fdb/__init__.py b/bindings/python/fdb/__init__.py index 17f697797d..0054e72808 100644 --- a/bindings/python/fdb/__init__.py +++ b/bindings/python/fdb/__init__.py @@ -52,7 +52,7 @@ def get_api_version(): def api_version(ver): - header_version = 700 + header_version = 710 if '_version' in globals(): if globals()['_version'] != ver: @@ -95,7 +95,6 @@ def api_version(ver): 'transactional', 'options', 'StreamingMode', - 'get_server_protocol' ) _add_symbols(fdb.impl, list) diff --git a/bindings/python/fdb/impl.py b/bindings/python/fdb/impl.py index 6e7803777a..d38582b459 100644 --- a/bindings/python/fdb/impl.py +++ b/bindings/python/fdb/impl.py @@ -253,7 +253,7 @@ def transactional(*tr_args, **tr_kwargs): @functools.wraps(func) def wrapper(*args, **kwargs): # We can't throw this from the decorator, as when a user runs - # >>> import fdb ; fdb.api_version(700) + # >>> import fdb ; fdb.api_version(710) # the code above uses @transactional before the API version is set if fdb.get_api_version() >= 630 and inspect.isgeneratorfunction(func): raise ValueError("Generators can not be wrapped with fdb.transactional") @@ -1531,9 +1531,6 @@ def init_c_api(): _capi.fdb_transaction_get_approximate_size.argtypes = [ctypes.c_void_p] _capi.fdb_transaction_get_approximate_size.restype = ctypes.c_void_p - _capi.fdb_get_server_protocol.argtypes = [ctypes.c_char_p] - _capi.fdb_get_server_protocol.restype = ctypes.c_void_p - _capi.fdb_transaction_get_versionstamp.argtypes = [ctypes.c_void_p] _capi.fdb_transaction_get_versionstamp.restype = ctypes.c_void_p @@ -1733,13 +1730,6 @@ open_databases = {} cacheLock = threading.Lock() -def get_server_protocol(clusterFilePath=None): - with _network_thread_reentrant_lock: - if not _network_thread: - init() - - return FutureUInt64(_capi.fdb_get_server_protocol(optionalParamToBytes(clusterFilePath)[0])) - def open(cluster_file=None, event_model=None): """Opens the given database (or the default database of the cluster indicated by the fdb.cluster file in a platform-specific location, if no cluster_file diff --git a/bindings/python/tests/size_limit_tests.py b/bindings/python/tests/size_limit_tests.py index 756d9422e0..fdc9cdaf54 100644 --- a/bindings/python/tests/size_limit_tests.py +++ b/bindings/python/tests/size_limit_tests.py @@ -22,7 +22,7 @@ import fdb import sys if __name__ == '__main__': - fdb.api_version(700) + fdb.api_version(710) @fdb.transactional def setValue(tr, key, value): diff --git a/bindings/ruby/lib/fdb.rb b/bindings/ruby/lib/fdb.rb index df8448ea0b..f96c25945a 100644 --- a/bindings/ruby/lib/fdb.rb +++ b/bindings/ruby/lib/fdb.rb @@ -36,7 +36,7 @@ module FDB end end def self.api_version(version) - header_version = 700 + header_version = 710 if self.is_api_version_selected?() if @@chosen_version != version raise "FDB API already loaded at version #{@@chosen_version}." diff --git a/build/cmake/Dockerfile b/build/cmake/Dockerfile index 3f9d51a29a..0452606a1f 100644 --- a/build/cmake/Dockerfile +++ b/build/cmake/Dockerfile @@ -13,7 +13,7 @@ RUN curl -L https://github.com/Kitware/CMake/releases/download/v3.13.4/cmake-3.1 cd /tmp && tar xf cmake.tar.gz && cp -r cmake-3.13.4-Linux-x86_64/* /usr/local/ # install boost -RUN curl -L https://dl.bintray.com/boostorg/release/1.67.0/source/boost_1_72_0.tar.bz2 > /tmp/boost.tar.bz2 &&\ +RUN curl -L https://boostorg.jfrog.io/artifactory/main/release/1.67.0/source/boost_1_67_0.tar.bz2 > /tmp/boost.tar.bz2 &&\ cd /tmp && echo "2684c972994ee57fc5632e03bf044746f6eb45d4920c343937a465fd67a5adba boost.tar.bz2" > boost-sha.txt &&\ sha256sum -c boost-sha.txt && tar xf boost.tar.bz2 && cp -r boost_1_72_0/boost /usr/local/include/ &&\ rm -rf boost.tar.bz2 boost_1_72_0 diff --git a/build/cmake/package_tester/fdb_c_app/app.c b/build/cmake/package_tester/fdb_c_app/app.c index f26b2513c1..6fe24068f9 100644 --- a/build/cmake/package_tester/fdb_c_app/app.c +++ b/build/cmake/package_tester/fdb_c_app/app.c @@ -1,7 +1,7 @@ -#define FDB_API_VERSION 700 +#define FDB_API_VERSION 710 #include int main(int argc, char* argv[]) { - fdb_select_api_version(700); + fdb_select_api_version(710); return 0; } diff --git a/build/cmake/package_tester/modules/tests.sh b/build/cmake/package_tester/modules/tests.sh index 35ff098a6f..2495e21a94 100644 --- a/build/cmake/package_tester/modules/tests.sh +++ b/build/cmake/package_tester/modules/tests.sh @@ -65,7 +65,7 @@ then python setup.py install successOr "Installing python bindings failed" popd - python -c 'import fdb; fdb.api_version(700)' + python -c 'import fdb; fdb.api_version(710)' successOr "Loading python bindings failed" # Test cmake and pkg-config integration: https://github.com/apple/foundationdb/issues/1483 diff --git a/build/docker/centos6/build/Dockerfile b/build/docker/centos6/build/Dockerfile index 1290160c4f..0a1fbbd70a 100644 --- a/build/docker/centos6/build/Dockerfile +++ b/build/docker/centos6/build/Dockerfile @@ -22,6 +22,8 @@ RUN sed -i -e '/enabled/d' /etc/yum.repos.d/CentOS-Base.repo && \ curl \ debbuild \ devtoolset-8 \ + devtoolset-8-libasan-devel \ + devtoolset-8-libtsan-devel \ devtoolset-8-libubsan-devel \ devtoolset-8-valgrind-devel \ dos2unix \ @@ -37,6 +39,7 @@ RUN sed -i -e '/enabled/d' /etc/yum.repos.d/CentOS-Base.repo && \ lz4-devel \ lz4-static \ mono-devel \ + redhat-lsb-core \ rpm-build \ tcl-devel \ unzip \ @@ -155,7 +158,7 @@ RUN curl -Ls https://github.com/facebook/rocksdb/archive/v6.10.1.tar.gz -o rocks rm -rf /tmp/* # install boost 1.67 to /opt -RUN curl -Ls https://dl.bintray.com/boostorg/release/1.67.0/source/boost_1_67_0.tar.bz2 -o boost_1_67_0.tar.bz2 && \ +RUN curl -Ls https://boostorg.jfrog.io/artifactory/main/release/1.67.0/source/boost_1_67_0.tar.bz2 -o boost_1_67_0.tar.bz2 && \ echo "2684c972994ee57fc5632e03bf044746f6eb45d4920c343937a465fd67a5adba boost_1_67_0.tar.bz2" > boost-sha-67.txt && \ sha256sum -c boost-sha-67.txt && \ tar --no-same-owner --directory /opt -xjf boost_1_67_0.tar.bz2 && \ @@ -164,7 +167,7 @@ RUN curl -Ls https://dl.bintray.com/boostorg/release/1.67.0/source/boost_1_67_0. # install boost 1.72 to /opt RUN source /opt/rh/devtoolset-8/enable && \ - curl -Ls https://dl.bintray.com/boostorg/release/1.72.0/source/boost_1_72_0.tar.bz2 -o boost_1_72_0.tar.bz2 && \ + curl -Ls https://boostorg.jfrog.io/artifactory/main/release/1.72.0/source/boost_1_72_0.tar.bz2 -o boost_1_72_0.tar.bz2 && \ echo "59c9b274bc451cf91a9ba1dd2c7fdcaf5d60b1b3aa83f2c9fa143417cc660722 boost_1_72_0.tar.bz2" > boost-sha-72.txt && \ sha256sum -c boost-sha-72.txt && \ tar --no-same-owner --directory /opt -xjf boost_1_72_0.tar.bz2 && \ diff --git a/build/docker/centos6/devel/Dockerfile b/build/docker/centos6/devel/Dockerfile index 82c99d4464..c5c9db2914 100644 --- a/build/docker/centos6/devel/Dockerfile +++ b/build/docker/centos6/devel/Dockerfile @@ -76,4 +76,9 @@ RUN rm -f /root/anaconda-ks.cfg && \ ' j start --tarball $(find ${HOME}/build_output/packages -name correctness\*.tar.gz) "${@}"' \ '}' \ '' \ - >> .bashrc \ No newline at end of file + 'USER_BASHRC="$HOME/src/.bashrc.local"' \ + 'if test -f "$USER_BASHRC"; then' \ + ' source $USER_BASHRC' \ + 'fi' \ + '' \ + >> .bashrc diff --git a/build/docker/centos7/build/Dockerfile b/build/docker/centos7/build/Dockerfile index 3a9ee06938..de376d2557 100644 --- a/build/docker/centos7/build/Dockerfile +++ b/build/docker/centos7/build/Dockerfile @@ -18,6 +18,8 @@ RUN rpmkeys --import mono-project.com.rpmkey.pgp && \ curl \ debbuild \ devtoolset-8 \ + devtoolset-8-libasan-devel \ + devtoolset-8-libtsan-devel \ devtoolset-8-libubsan-devel \ devtoolset-8-systemtap-sdt-devel \ docker-ce \ @@ -34,6 +36,7 @@ RUN rpmkeys --import mono-project.com.rpmkey.pgp && \ lz4-devel \ lz4-static \ mono-devel \ + redhat-lsb-core \ rpm-build \ tcl-devel \ unzip \ @@ -138,7 +141,7 @@ RUN curl -Ls https://github.com/facebook/rocksdb/archive/v6.10.1.tar.gz -o rocks rm -rf /tmp/* # install boost 1.67 to /opt -RUN curl -Ls https://dl.bintray.com/boostorg/release/1.67.0/source/boost_1_67_0.tar.bz2 -o boost_1_67_0.tar.bz2 && \ +RUN curl -Ls https://boostorg.jfrog.io/artifactory/main/release/1.67.0/source/boost_1_67_0.tar.bz2 -o boost_1_67_0.tar.bz2 && \ echo "2684c972994ee57fc5632e03bf044746f6eb45d4920c343937a465fd67a5adba boost_1_67_0.tar.bz2" > boost-sha-67.txt && \ sha256sum -c boost-sha-67.txt && \ tar --no-same-owner --directory /opt -xjf boost_1_67_0.tar.bz2 && \ @@ -147,7 +150,7 @@ RUN curl -Ls https://dl.bintray.com/boostorg/release/1.67.0/source/boost_1_67_0. # install boost 1.72 to /opt RUN source /opt/rh/devtoolset-8/enable && \ - curl -Ls https://dl.bintray.com/boostorg/release/1.72.0/source/boost_1_72_0.tar.bz2 -o boost_1_72_0.tar.bz2 && \ + curl -Ls https://boostorg.jfrog.io/artifactory/main/release/1.72.0/source/boost_1_72_0.tar.bz2 -o boost_1_72_0.tar.bz2 && \ echo "59c9b274bc451cf91a9ba1dd2c7fdcaf5d60b1b3aa83f2c9fa143417cc660722 boost_1_72_0.tar.bz2" > boost-sha-72.txt && \ sha256sum -c boost-sha-72.txt && \ tar --no-same-owner --directory /opt -xjf boost_1_72_0.tar.bz2 && \ diff --git a/build/docker/centos7/devel/Dockerfile b/build/docker/centos7/devel/Dockerfile index ea60da54e7..98f1923c17 100644 --- a/build/docker/centos7/devel/Dockerfile +++ b/build/docker/centos7/devel/Dockerfile @@ -104,5 +104,10 @@ RUN rm -f /root/anaconda-ks.cfg && \ ' j start --tarball $(find ${HOME}/build_output/packages -name correctness\*.tar.gz) "${@}"' \ '}' \ '' \ + 'USER_BASHRC="$HOME/src/.bashrc.local"' \ + 'if test -f "$USER_BASHRC"; then' \ + ' source $USER_BASHRC' \ + 'fi' \ + '' \ 'bash ${HOME}/docker_proxy.sh' \ - >> .bashrc \ No newline at end of file + >> .bashrc diff --git a/cmake/CPackConfig.cmake b/cmake/CPackConfig.cmake index 08f90bc0c5..c67059ec65 100644 --- a/cmake/CPackConfig.cmake +++ b/cmake/CPackConfig.cmake @@ -9,24 +9,6 @@ elseif(CPACK_GENERATOR MATCHES "DEB") set(CPACK_COMPONENTS_ALL clients-deb server-deb clients-versioned server-versioned) set(CPACK_RESOURCE_FILE_README ${CMAKE_SOURCE_DIR}/README.md) set(CPACK_RESOURCE_FILE_LICENSE ${CMAKE_SOURCE_DIR}/LICENSE) -elseif(CPACK_GENERATOR MATCHES "productbuild") - set(CPACK_PACKAGING_INSTALL_PREFIX "/") - set(CPACK_COMPONENTS_ALL clients-pm server-pm) - set(CPACK_STRIP_FILES TRUE) - set(CPACK_PREFLIGHT_SERVER_SCRIPT ${CMAKE_SOURCE_DIR}/packaging/osx/scripts-server/preinstall) - set(CPACK_POSTFLIGHT_SERVER_SCRIPT ${CMAKE_SOURCE_DIR}/packaging/osx/scripts-server/postinstall) - set(CPACK_POSTFLIGHT_CLIENTS_SCRIPT ${CMAKE_SOURCE_DIR}/packaging/osx/scripts-server/preinstall) -# Commenting out this readme file until it works within packaging - set(CPACK_RESOURCE_FILE_README ${CMAKE_SOURCE_DIR}/packaging/osx/resources/conclusion.rtf) - set(CPACK_PRODUCTBUILD_RESOURCES_DIR ${CMAKE_SOURCE_DIR}/packaging/osx/resources) -# Changing the path of this file as CMAKE_BINARY_DIR does not seem to be defined - set(CPACK_RESOURCE_FILE_LICENSE ${CMAKE_BINARY_DIR}/License.txt) - if(NOT FDB_RELEASE) - set(prerelease_string "-PRERELEASE") - else() - set(prerelease_string "") - endif() - set(CPACK_PACKAGE_FILE_NAME "FoundationDB-${PROJECT_VERSION}${prerelease_string}") elseif(CPACK_GENERATOR MATCHES "TGZ") set(CPACK_STRIP_FILES TRUE) set(CPACK_COMPONENTS_ALL clients-tgz server-tgz) diff --git a/cmake/CompileBoost.cmake b/cmake/CompileBoost.cmake index 0b1cc68502..687c266f0b 100644 --- a/cmake/CompileBoost.cmake +++ b/cmake/CompileBoost.cmake @@ -38,7 +38,7 @@ function(compile_boost) include(ExternalProject) set(BOOST_INSTALL_DIR "${CMAKE_BINARY_DIR}/boost_install") ExternalProject_add("${MY_TARGET}Project" - URL "https://dl.bintray.com/boostorg/release/1.72.0/source/boost_1_72_0.tar.bz2" + URL "https://boostorg.jfrog.io/artifactory/main/release/1.72.0/source/boost_1_72_0.tar.bz2" URL_HASH SHA256=59c9b274bc451cf91a9ba1dd2c7fdcaf5d60b1b3aa83f2c9fa143417cc660722 CONFIGURE_COMMAND ./bootstrap.sh ${BOOTSTRAP_ARGS} BUILD_COMMAND ${B2_COMMAND} link=static ${MY_BUILD_ARGS} --prefix=${BOOST_INSTALL_DIR} ${USER_CONFIG_FLAG} install diff --git a/cmake/ConfigureCompiler.cmake b/cmake/ConfigureCompiler.cmake index 4dbe9db816..c14c5011c5 100644 --- a/cmake/ConfigureCompiler.cmake +++ b/cmake/ConfigureCompiler.cmake @@ -280,7 +280,12 @@ else() -Wno-unknown-attributes) endif() add_compile_options( - -Wall -Wextra + -Wall + -Wextra + -Wredundant-move + -Wpessimizing-move + -Woverloaded-virtual + -Wshift-sign-overflow # Here's the current set of warnings we need to explicitly disable to compile warning-free with clang 10 -Wno-comment -Wno-dangling-else @@ -288,16 +293,12 @@ else() -Wno-format -Wno-mismatched-tags -Wno-missing-field-initializers - -Wno-overloaded-virtual -Wno-reorder -Wno-reorder-ctor -Wno-sign-compare -Wno-tautological-pointer-compare -Wno-undefined-var-template -Wno-tautological-pointer-compare - -Wredundant-move - -Wpessimizing-move - -Woverloaded-virtual -Wno-unknown-pragmas -Wno-unknown-warning-option -Wno-unused-function diff --git a/cmake/FDBInstall.cmake b/cmake/FDBInstall.cmake index 263291c433..2dd4be696f 100644 --- a/cmake/FDBInstall.cmake +++ b/cmake/FDBInstall.cmake @@ -214,7 +214,7 @@ endfunction() function(fdb_install) if(NOT WIN32 AND NOT OPEN_FOR_IDE) - set(one_value_options COMPONENT DESTINATION EXPORT DESTINATION_SUFFIX) + set(one_value_options COMPONENT DESTINATION EXPORT DESTINATION_SUFFIX RENAME) set(multi_value_options TARGETS FILES PROGRAMS DIRECTORY) cmake_parse_arguments(IN "${options}" "${one_value_options}" "${multi_value_options}" "${ARGN}") @@ -237,6 +237,9 @@ function(fdb_install) get_install_dest(${pkg} ${destination} install_path) string(TOLOWER "${pkg}" package) if(install_export) + if(IN_RENAME) + message(FATAL_ERROR "RENAME for EXPORT target not implemented") + endif() install( EXPORT "${IN_EXPORT}-${package}" DESTINATION "${install_path}${IN_DESTINATION_SUFFIX}" @@ -248,11 +251,20 @@ function(fdb_install) set(export_args EXPORT "${IN_EXPORT}-${package}") endif() if(NOT ${install_path} STREQUAL "") - install( - ${args} - ${export_args} - DESTINATION "${install_path}${IN_DESTINATION_SUFFIX}" - COMPONENT "${IN_COMPONENT}-${package}") + if(IN_RENAME) + install( + ${args} + ${export_args} + DESTINATION "${install_path}${IN_DESTINATION_SUFFIX}" + COMPONENT "${IN_COMPONENT}-${package}" + RENAME ${IN_RENAME}) + else() + install( + ${args} + ${export_args} + DESTINATION "${install_path}${IN_DESTINATION_SUFFIX}" + COMPONENT "${IN_COMPONENT}-${package}") + endif() endif() endif() endforeach() diff --git a/cmake/InstallLayout.cmake b/cmake/InstallLayout.cmake index f4297f0179..91d39d4125 100644 --- a/cmake/InstallLayout.cmake +++ b/cmake/InstallLayout.cmake @@ -46,10 +46,6 @@ function(install_symlink) TO "../${rel_path}bin/${IN_FILE_NAME}" DESTINATION "usr/lib64/${IN_LINK_NAME}" COMPONENTS "${IN_COMPONENT}-deb") - install_symlink_impl( - TO "../${rel_path}local/bin/${IN_FILE_NAME}" - DESTINATION "usr/lib64/${IN_LINK_NAME}" - COMPONENTS "${IN_COMPONENT}-pm") elseif("${IN_LINK_DIR}" MATCHES "bin") install_symlink_impl( TO "../${rel_path}bin/${IN_FILE_NAME}" @@ -61,10 +57,6 @@ function(install_symlink) COMPONENTS "${IN_COMPONENT}-el6" "${IN_COMPONENT}-el7" "${IN_COMPONENT}-deb") - install_symlink_impl( - TO "../${rel_path}/bin/${IN_FILE_NAME}" - DESTINATION "usr/local/bin/${IN_LINK_NAME}" - COMPONENTS "${IN_COMPONENT}-pm") elseif("${IN_LINK_DIR}" MATCHES "fdbmonitor") install_symlink_impl( TO "../../${rel_path}bin/${IN_FILE_NAME}" @@ -76,10 +68,6 @@ function(install_symlink) COMPONENTS "${IN_COMPONENT}-el6" "${IN_COMPONENT}-el7" "${IN_COMPONENT}-deb") - install_symlink_impl( - TO "../../${rel_path}/bin/${IN_FILE_NAME}" - DESTINATION "usr/local/lib/foundationdb/${IN_LINK_NAME}" - COMPONENTS "${IN_COMPONENT}-pm") else() message(FATAL_ERROR "Unknown LINK_DIR ${IN_LINK_DIR}") endif() @@ -103,59 +91,16 @@ function(symlink_files) endif() endfunction() -# 'map' from (destination, package) to path -# format vars like install_destination_for_${destination}_${package} -set(install_destination_for_bin_tgz "bin") -set(install_destination_for_bin_deb "usr/bin") -set(install_destination_for_bin_el6 "usr/bin") -set(install_destination_for_bin_el7 "usr/bin") -set(install_destination_for_bin_pm "usr/local/bin") -set(install_destination_for_sbin_tgz "sbin") -set(install_destination_for_sbin_deb "usr/sbin") -set(install_destination_for_sbin_el6 "usr/sbin") -set(install_destination_for_sbin_el7 "usr/sbin") -set(install_destination_for_sbin_pm "usr/local/libexec") -set(install_destination_for_lib_tgz "lib") -set(install_destination_for_lib_deb "usr/lib") -set(install_destination_for_lib_el6 "usr/lib64") -set(install_destination_for_lib_el7 "usr/lib64") -set(install_destination_for_lib_pm "usr/local/lib") -set(install_destination_for_fdbmonitor_tgz "sbin") -set(install_destination_for_fdbmonitor_deb "usr/lib/foundationdb") -set(install_destination_for_fdbmonitor_el6 "usr/lib/foundationdb") -set(install_destination_for_fdbmonitor_el7 "usr/lib/foundationdb") -set(install_destination_for_fdbmonitor_pm "usr/local/libexec") -set(install_destination_for_include_tgz "include") -set(install_destination_for_include_deb "usr/include") -set(install_destination_for_include_el6 "usr/include") -set(install_destination_for_include_el7 "usr/include") -set(install_destination_for_include_pm "usr/local/include") -set(install_destination_for_etc_tgz "etc/foundationdb") -set(install_destination_for_etc_deb "etc/foundationdb") -set(install_destination_for_etc_el6 "etc/foundationdb") -set(install_destination_for_etc_el7 "etc/foundationdb") -set(install_destination_for_etc_pm "usr/local/etc/foundationdb") -set(install_destination_for_log_tgz "log/foundationdb") -set(install_destination_for_log_deb "var/log/foundationdb") -set(install_destination_for_log_el6 "var/log/foundationdb") -set(install_destination_for_log_el7 "var/log/foundationdb") -set(install_destination_for_log_pm "usr/local/foundationdb/logs") -set(install_destination_for_data_tgz "lib/foundationdb") -set(install_destination_for_data_deb "var/lib/foundationdb/data") -set(install_destination_for_data_el6 "var/lib/foundationdb/data") -set(install_destination_for_data_el7 "var/lib/foundationdb/data") -set(install_destination_for_data_pm "usr/local/foundationdb/data") -fdb_install_packages(TGZ DEB EL7 PM VERSIONED) -fdb_install_dirs(BIN SBIN LIB FDBMONITOR INCLUDE ETC LOG DATA) +fdb_install_packages(TGZ DEB EL7 VERSIONED) +fdb_install_dirs(BIN SBIN LIB FDBMONITOR INCLUDE ETC LOG DATA BACKUPAGENT) message(STATUS "FDB_INSTALL_DIRS -> ${FDB_INSTALL_DIRS}") -# 'map' from (destination, package) to path -# format vars like install_destination_for_${destination}_${package} install_destinations(TGZ BIN bin SBIN sbin LIB lib FDBMONITOR sbin + BACKUPAGENT usr/lib/foundationdb INCLUDE include ETC etc/foundationdb LOG log/foundationdb @@ -166,19 +111,13 @@ install_destinations(DEB SBIN usr/sbin LIB usr/lib FDBMONITOR usr/lib/foundationdb + BACKUPAGENT usr/lib/foundationdb INCLUDE usr/include ETC etc/foundationdb LOG var/log/foundationdb - DATA var/lib/foundationdb) + DATA var/lib/foundationdb/data) copy_install_destinations(DEB EL7) install_destinations(EL7 LIB usr/lib64) -install_destinations(PM - BIN usr/local/bin - SBIN usr/local/sbin - LIB lib - FDBMONITOR usr/local/libexec - INCLUDE usr/local/include - ETC usr/local/etc/foundationdb) # This can be used for debugging in case above is behaving funky #print_install_destinations() @@ -186,7 +125,7 @@ install_destinations(PM set(generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated") if(APPLE) - set(CPACK_GENERATOR TGZ productbuild) + set(CPACK_GENERATOR TGZ) else() set(CPACK_GENERATOR RPM DEB TGZ) endif() @@ -227,6 +166,13 @@ set(LIB_DIR lib64) configure_file("${PROJECT_SOURCE_DIR}/packaging/multiversion/clients/postinst" "${script_dir}/clients/postinst-el7" @ONLY) configure_file("${PROJECT_SOURCE_DIR}/packaging/multiversion/clients/prerm" "${script_dir}/clients" @ONLY) + +################################################################################ +# Move Docker Setup +################################################################################ + +file(COPY "${PROJECT_SOURCE_DIR}/packaging/docker" DESTINATION "${PROJECT_BINARY_DIR}/packages/") + ################################################################################ # General CPack configuration ################################################################################ @@ -249,19 +195,16 @@ set(CPACK_PACKAGE_CONTACT "The FoundationDB Community") set(CPACK_COMPONENT_SERVER-EL7_DEPENDS clients-el7) set(CPACK_COMPONENT_SERVER-DEB_DEPENDS clients-deb) set(CPACK_COMPONENT_SERVER-TGZ_DEPENDS clients-tgz) -set(CPACK_COMPONENT_SERVER-PM_DEPENDS clients-pm) set(CPACK_COMPONENT_SERVER-VERSIONED_DEPENDS clients-versioned) set(CPACK_COMPONENT_SERVER-EL7_DISPLAY_NAME "foundationdb-server") set(CPACK_COMPONENT_SERVER-DEB_DISPLAY_NAME "foundationdb-server") set(CPACK_COMPONENT_SERVER-TGZ_DISPLAY_NAME "foundationdb-server") -set(CPACK_COMPONENT_SERVER-PM_DISPLAY_NAME "foundationdb-server") set(CPACK_COMPONENT_SERVER-VERSIONED_DISPLAY_NAME "foundationdb-server-${PROJECT_VERSION}") set(CPACK_COMPONENT_CLIENTS-EL7_DISPLAY_NAME "foundationdb-clients") set(CPACK_COMPONENT_CLIENTS-DEB_DISPLAY_NAME "foundationdb-clients") set(CPACK_COMPONENT_CLIENTS-TGZ_DISPLAY_NAME "foundationdb-clients") -set(CPACK_COMPONENT_CLIENTS-PM_DISPLAY_NAME "foundationdb-clients") set(CPACK_COMPONENT_CLIENTS-VERSIONED_DISPLAY_NAME "foundationdb-clients-${PROJECT_VERSION}") @@ -419,19 +362,6 @@ set(CPACK_DEBIAN_SERVER-VERSIONED_PACKAGE_CONTROL_EXTRA ${CMAKE_BINARY_DIR}/packaging/multiversion/server/postinst ${CMAKE_BINARY_DIR}/packaging/multiversion/server/prerm) -################################################################################ -# MacOS configuration -################################################################################ - -if(APPLE) - install(PROGRAMS ${CMAKE_SOURCE_DIR}/packaging/osx/uninstall-FoundationDB.sh - DESTINATION "usr/local/foundationdb" - COMPONENT clients-pm) - install(FILES ${CMAKE_SOURCE_DIR}/packaging/osx/com.foundationdb.fdbmonitor.plist - DESTINATION "Library/LaunchDaemons" - COMPONENT server-pm) -endif() - ################################################################################ # Configuration for DEB ################################################################################ @@ -450,9 +380,6 @@ set(CLUSTER_DESCRIPTION1 ${description1} CACHE STRING "Cluster description") set(CLUSTER_DESCRIPTION2 ${description2} CACHE STRING "Cluster description") if(NOT WIN32) - install(FILES ${CMAKE_SOURCE_DIR}/packaging/osx/foundationdb.conf.new - DESTINATION "usr/local/etc" - COMPONENT server-pm) fdb_install(FILES ${CMAKE_SOURCE_DIR}/packaging/foundationdb.conf DESTINATION etc COMPONENT server) diff --git a/contrib/TestHarness/Program.cs.cmake b/contrib/TestHarness/Program.cs.cmake index 8d666b2725..075a2758d6 100644 --- a/contrib/TestHarness/Program.cs.cmake +++ b/contrib/TestHarness/Program.cs.cmake @@ -717,7 +717,7 @@ namespace SummarizeTest delegate IEnumerable parseDelegate(System.IO.Stream stream, string file, bool keepOriginalElement = false, double startTime = -1, double endTime = Double.MaxValue, - double samplingFactor = 1.0); + double samplingFactor = 1.0, Action nonFatalErrorMessage = null); static int Summarize(string[] traceFiles, string summaryFileName, string errorFileName, bool? killed, List outputErrors, int? exitCode, long? peakMemory, @@ -750,12 +750,14 @@ namespace SummarizeTest { try { + // Use Action to set this because IEnumerables with yield can't have an out variable + string nonFatalParseError = null; parseDelegate parse; if (traceFileName.EndsWith(".json")) parse = Magnesium.JsonParser.Parse; else parse = Magnesium.XmlParser.Parse; - foreach (var ev in parse(traceFile, traceFileName)) + foreach (var ev in parse(traceFile, traceFileName, nonFatalErrorMessage: (x) => { nonFatalParseError = x; })) { Magnesium.Severity newSeverity; if (severityMap.TryGetValue(new KeyValuePair(ev.Type, ev.Severity), out newSeverity)) @@ -876,6 +878,11 @@ namespace SummarizeTest if (ev.Type == "StderrSeverity") stderrSeverity = int.Parse(ev.Details.NewSeverity); } + if (nonFatalParseError != null) { + xout.Add(new XElement("NonFatalParseError", + new XAttribute("Severity", (int)Magnesium.Severity.SevWarnAlways), + new XAttribute("ErrorMessage", nonFatalParseError))); + } } catch (Exception e) diff --git a/contrib/TraceLogHelper/JsonParser.cs b/contrib/TraceLogHelper/JsonParser.cs index 9d7272a37f..84fbab81ab 100644 --- a/contrib/TraceLogHelper/JsonParser.cs +++ b/contrib/TraceLogHelper/JsonParser.cs @@ -1,4 +1,4 @@ -/* +/* * JsonParser.cs * * This source file is part of the FoundationDB open source project @@ -34,9 +34,10 @@ namespace Magnesium { static Random r = new Random(); + // dummy parameter nonFatalParseError to match xml public static IEnumerable Parse(System.IO.Stream stream, string file, bool keepOriginalElement = false, double startTime = -1, double endTime = Double.MaxValue, - double samplingFactor = 1.0) + double samplingFactor = 1.0, Action nonFatalErrorMessage = null) { using (var reader = new System.IO.StreamReader(stream)) { diff --git a/contrib/TraceLogHelper/XmlParser.cs b/contrib/TraceLogHelper/XmlParser.cs index 3728c58c3b..9ab79d920e 100644 --- a/contrib/TraceLogHelper/XmlParser.cs +++ b/contrib/TraceLogHelper/XmlParser.cs @@ -33,14 +33,29 @@ namespace Magnesium public static IEnumerable Parse(System.IO.Stream stream, string file, bool keepOriginalElement = false, double startTime = -1, double endTime = Double.MaxValue, - double samplingFactor = 1.0) + double samplingFactor = 1.0, Action nonFatalErrorMessage = null) { using (var reader = XmlReader.Create(stream)) { reader.ReadToDescendant("Trace"); reader.Read(); - foreach (var xev in StreamElements(reader)) + + // foreach (var xev in StreamElements(reader)) + // need to be able to catch and save non-fatal exceptions in StreamElements, so use explicit iterator instead of foreach + var iter = StreamElements(reader).GetEnumerator(); + while (true) { + try { + if (!iter.MoveNext()) { + break; + } + } catch (Exception e) { + if (nonFatalErrorMessage != null) { + nonFatalErrorMessage(e.Message); + } + break; + } + var xev = iter.Current; Event ev = null; try { @@ -165,28 +180,20 @@ namespace Magnesium } } + // throws exceptions if xml is invalid private static IEnumerable StreamElements(this XmlReader reader) { while (!reader.EOF) { if (reader.NodeType == XmlNodeType.Element) { - XElement node = null; - try - { - node = XElement.ReadFrom(reader) as XElement; - } - catch (Exception) { break; } + XElement node = XElement.ReadFrom(reader) as XElement; if (node != null) yield return node; } else { - try - { reader.Read(); - } - catch (Exception) { break; } } } } diff --git a/contrib/apiversioner.py b/contrib/apiversioner.py new file mode 100755 index 0000000000..9806216671 --- /dev/null +++ b/contrib/apiversioner.py @@ -0,0 +1,219 @@ +#!/usr/bin/env python3 +# +# apiversioner.py +# +# This source file is part of the FoundationDB open source project +# +# Copyright 2013-2021 Apple Inc. and the FoundationDB project authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +import logging +import os +import re +import sys +import traceback + + +LOG_FORMAT = '%(created)f [%(levelname)s] %(message)s' + +EXCLUDED_FILES = list(map(re.compile, [ + # Output directories + r'\.git/.*', r'bin/.*', r'packages/.*', r'\.objs/.*', r'\.deps/.*', r'bindings/go/build/.*', r'documentation/sphinx/\.out/.*', + + # Generated files + r'.*\.g\.cpp$', r'.*\.g\.h$', r'(^|.*/)generated.mk$', r'.*\.g\.S$', + r'.*/MutationType\.java', r'.*/generated\.go', + + # Binary files + r'.*\.class$', r'.*\.o$', r'.*\.a$', r'.*[\.-]debug', r'.*\.so$', r'.*\.dylib$', r'.*\.dll$', r'.*\.tar[^/]*$', r'.*\.jar$', r'.*pyc$', r'bindings/flow/bin/.*', + r'.*\.pdf$', r'.*\.jp[e]*g', r'.*\.png', r'.*\.ico', + r'packaging/msi/art/.*', + + # Project configuration files + r'.*foundationdb\.VC\.db$', r'.*foundationdb\.VC\.VC\.opendb$', r'.*iml$', + + # Source files from someone else + r'(^|.*/)Hash3\..*', r'(^|.*/)sqlite.*', + r'bindings/go/godoc-resources/.*', + r'bindings/go/src/fdb/tuple/testdata/tuples.golden', + r'fdbcli/linenoise/.*', + r'fdbrpc/rapidjson/.*', r'fdbrpc/rapidxml/.*', r'fdbrpc/zlib/.*', r'fdbrpc/sha1/.*', + r'fdbrpc/xml2json.hpp$', r'fdbrpc/libcoroutine/.*', r'fdbrpc/libeio/.*', r'fdbrpc/lib64/.*', + r'fdbrpc/generated-constants.cpp$', + + # Miscellaneous + r'bindings/nodejs/node_modules/.*', r'bindings/go/godoc/.*', r'.*trace.*xml$', r'.*log$', r'.*\.DS_Store$', r'simfdb/\.*', r'.*~$', r'.*.swp$' +])) + +SUSPECT_PHRASES = map(re.compile, [ + r'#define\s+FDB_API_VERSION\s+(\d+)', + r'\.\s*selectApiVersion\s*\(\s*(\d+)\s*\)', + r'\.\s*APIVersion\s*\(\s*(\d+)\s*\)', + r'\.\s*MustAPIVersion\s*\(\s*(\d+)\s*\)', + r'header_version\s+=\s+(\d+)', + r'\.\s*apiVersion\s*\(\s*(\d+)\s*\)', + r'API_VERSION\s*=\s*(\d+)', + r'fdb_select_api_version\s*\((\d+)\)' +]) + +DIM_CODE = '\033[2m' +BOLD_CODE = '\033[1m' +RED_COLOR = '\033[91m' +GREEN_COLOR = '\033[92m' +END_COLOR = '\033[0m' + + +def positive_response(val): + return val.lower() in {'y', 'yes'} + + +# Returns: new line list + a dirty flag +def rewrite_lines(lines, version_re, new_version, suspect_only=True, print_diffs=False, ask_confirm=False, grayscale=False): + new_lines = [] + dirty = False + new_str = str(new_version) + regexes = SUSPECT_PHRASES if suspect_only else [version_re] + group_index = 1 if suspect_only else 2 + for line_no, line in enumerate(lines): + new_line = line + offset = 0 + + for regex in regexes: + for m in regex.finditer(line): + # Replace suspect code with new version. + start = m.start(group_index) + end = m.end(group_index) + new_line = new_line[:start + offset] + new_str + new_line[end + offset:] + offset += len(new_str) - (end - start) + + if (print_diffs or ask_confirm) and line != new_line: + print('Rewrite:') + print('\n'.join(map(lambda pair: ' {:4d}: {}'.format(line_no - 1 + pair[0], pair[1]), enumerate(lines[line_no - 2:line_no])))) + print((DIM_CODE if grayscale else RED_COLOR) + '-{:4d}: {}'.format(line_no + 1, line) + END_COLOR) + print((BOLD_CODE if grayscale else GREEN_COLOR) + '+{:4d}: {}'.format(line_no + 1, new_line) + END_COLOR) + print('\n'.join(map(lambda pair: ' {:4d}: {}'.format(line_no + 2 + pair[0], pair[1]), enumerate(lines[line_no + 1:line_no + 3])))) + + if ask_confirm: + text = input('Looks good (y/n)? ') + if not positive_response(text): + print('Okay, skipping.') + new_line = line + + dirty = dirty or (new_line != line) + new_lines.append(new_line) + + return new_lines, dirty + + +def address_file(base_path, file_path, version, new_version=None, suspect_only=False, show_diffs=False, + rewrite=False, ask_confirm=True, grayscale=False, paths_only=False): + if any(map(lambda x: x.match(file_path), EXCLUDED_FILES)): + logging.debug('skipping file %s as matches excluded list', file_path) + return True + + # Look for all instances of the version number where it is not part of a larger number + version_re = re.compile('(^|[^\\d])(' + str(version) + ')([^\\d]|$)') + try: + contents = open(os.path.join(base_path, file_path), 'r').read() + lines = contents.split('\n') + new_lines = lines + dirty = False + + if suspect_only: + # Look for suspect lines (lines that attempt to set a version) + found = False + for line_no, line in enumerate(lines): + for suspect_phrase in SUSPECT_PHRASES: + for match in suspect_phrase.finditer(line): + curr_version = int(match.groups()[0]) + if (new_version is None and curr_version < version) or (new_version is not None and curr_version < new_version): + found = True + logging.info('Old version: %s:%d:%s', file_path, line_no + 1, line) + + if found and new_version is not None and (show_diffs or rewrite): + new_lines, dirty = rewrite_lines(lines, version_re, new_version, True, print_diffs=True, + ask_confirm=(rewrite and ask_confirm), grayscale=grayscale) + + else: + matching_lines = filter(lambda pair: version_re.search(pair[1]), enumerate(lines)) + + # Look for lines with the version + if matching_lines: + if paths_only: + logging.info('File %s matches', file_path) + else: + for line_no, line in matching_lines: + logging.info('Match: %s:%d:%s', file_path, line_no + 1, line) + if new_version is not None and (show_diffs or rewrite): + new_lines, dirty = rewrite_lines(lines, version_re, new_version, False, print_diffs=True, + ask_confirm=(rewrite and ask_confirm), grayscale=grayscale) + else: + logging.debug('File %s does not match', file_path) + + if dirty and rewrite: + logging.info('Rewriting %s', os.path.join(base_path, file_path)) + with open(os.path.join(base_path, file_path), 'w') as fout: + fout.write('\n'.join(new_lines)) + + return True + except (OSError, UnicodeDecodeError) as e: + logging.exception('Unable to read file %s due to OSError', os.path.join(base_path, file_path)) + return False + + +def address_path(path, version, new_version=None, suspect_only=False, show_diffs=False, rewrite=False, ask_confirm=True, grayscale=False, paths_only=False): + try: + if os.path.exists(path): + if os.path.isdir(path): + status = True + for dir_path, dir_names, file_names in os.walk(path): + for file_name in file_names: + file_path = os.path.relpath(os.path.join(dir_path, file_name), path) + status = address_file(path, file_path, version, new_version, suspect_only, show_diffs, + rewrite, ask_confirm, grayscale, paths_only) and status + return status + else: + base_name, file_name = os.path.split(path) + return address_file(base_name, file_name, version, new_version, suspect_only, show_diffs, rewrite, ask_confirm, grayscale) + else: + logging.error('Path %s does not exist', path) + return False + except OSError as e: + logging.exception('Unable to find all API versions due to OSError') + return False + + +def run(arg_list): + parser = argparse.ArgumentParser(description='finds and rewrites the API version in FDB source files') + parser.add_argument('path', help='path to search for FDB source files') + parser.add_argument('version', type=int, help='current/old version to search for') + parser.add_argument('--new-version', type=int, default=None, help='new version to update to') + parser.add_argument('--suspect-only', action='store_true', default=False, help='only look for phrases trying to set the API version') + parser.add_argument('--show-diffs', action='store_true', default=False, help='show suggested diffs for fixing version') + parser.add_argument('--rewrite', action='store_true', default=False, help='rewrite offending files') + parser.add_argument('-y', '--skip-confirm', action='store_true', default=False, help='do not ask for confirmation before rewriting') + parser.add_argument('--grayscale', action='store_true', default=False, + help='print diffs using grayscale output instead of red and green') + parser.add_argument('--paths-only', action='store_true', default=False, help='display only the path instead of the offending lines') + args = parser.parse_args(arg_list) + return address_path(args.path, args.version, args.new_version, args.suspect_only, args.show_diffs, + args.rewrite, not args.skip_confirm, args.grayscale, args.paths_only) + + +if __name__ == '__main__': + logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) + if not run(sys.argv[1:]): + exit(1) diff --git a/contrib/grv_proxy_model/grv_test.py b/contrib/grv_proxy_model/grv_test.py new file mode 100755 index 0000000000..1cd0224538 --- /dev/null +++ b/contrib/grv_proxy_model/grv_test.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 + +# +# grv_test.py +# +# This source file is part of the FoundationDB open source project +# +# Copyright 2013-2020 Apple Inc. and the FoundationDB project authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +import inspect +import sys + +import rate_model +import workload_model +import proxy_model +import ratekeeper_model +from priority import Priority +from plot import Plotter + +parser = argparse.ArgumentParser() +parser.add_argument('-w', '--workload', type=str, help='Name of workload to run') +parser.add_argument('-r', '--ratekeeper', type=str, help='Name of ratekeeper model') +parser.add_argument('-d', '--duration', type=int, default=240, help='Duration of simulated test, in seconds. Defaults to 240.') +parser.add_argument('-L', '--limiter', type=str, default='Original', help='Name of limiter implementation. Defaults to \'Original\'.') +parser.add_argument('-p', '--proxy', type=str, default='ProxyModel', help='Name of proxy implementation. Defaults to \'ProxyModel\'.') +parser.add_argument('--list', action='store_true', default=False, help='List options for all models.') +parser.add_argument('--no-graph', action='store_true', default=False, help='Disable graphical output.') + +args = parser.parse_args() + +def print_choices_list(context=None): + if context == 'workload' or context is None: + print('Workloads:') + for w in workload_model.predefined_workloads.keys(): + print(' %s' % w) + + if context == 'ratekeeper' or context is None: + print('\nRatekeeper models:') + for r in ratekeeper_model.predefined_ratekeeper.keys(): + print(' %s' % r) + + proxy_model_classes = [c for c in [getattr(proxy_model, a) for a in dir(proxy_model)] if inspect.isclass(c)] + + if context == 'proxy' or context is None: + print('\nProxy models:') + for p in proxy_model_classes: + if issubclass(p, proxy_model.ProxyModel): + print(' %s' % p.__name__) + + if context == 'limiter' or context is None: + print('\nProxy limiters:') + for p in proxy_model_classes: + if issubclass(p, proxy_model.Limiter) and p != proxy_model.Limiter: + name = p.__name__ + if name.endswith('Limiter'): + name = name[0:-len('Limiter')] + print(' %s' % name) + +if args.workload is None or args.ratekeeper is None: + print('ERROR: A workload (-w/--workload) and ratekeeper model (-r/--ratekeeper) must be specified.\n') + print_choices_list() + sys.exit(1) + +if args.list: + print_choices_list() + sys.exit(0) + +def validate_class_type(var, name, superclass): + cls = getattr(var, name, None) + return cls is not None and inspect.isclass(cls) and issubclass(cls, superclass) + +if not args.ratekeeper in ratekeeper_model.predefined_ratekeeper: + print('Invalid ratekeeper model `%s\'' % args.ratekeeper) + print_choices_list('ratekeeper') + sys.exit(1) + +if not args.workload in workload_model.predefined_workloads: + print('Invalid workload model `%s\'' % args.workload) + print_choices_list('workload') + sys.exit(1) + +if not validate_class_type(proxy_model, args.proxy, proxy_model.ProxyModel): + print('Invalid proxy model `%s\'' % args.proxy) + print_choices_list('proxy') + sys.exit(1) + +limiter_name = args.limiter +if not validate_class_type(proxy_model, limiter_name, proxy_model.Limiter): + limiter_name += 'Limiter' + if not validate_class_type(proxy_model, limiter_name, proxy_model.Limiter): + print('Invalid proxy limiter `%s\'' % args.limiter) + print_choices_list('limiter') + sys.exit(1) + +ratekeeper = ratekeeper_model.predefined_ratekeeper[args.ratekeeper] +workload = workload_model.predefined_workloads[args.workload] + +limiter = getattr(proxy_model, limiter_name) +proxy = getattr(proxy_model, args.proxy)(args.duration, ratekeeper, workload, limiter) + +proxy.run() + +for priority in workload.priorities(): + latencies = sorted([p for t in proxy.results.latencies[priority].values() for p in t]) + total_started = sum(proxy.results.started[priority].values()) + still_queued = sum([r.count for r in proxy.request_queue if r.priority == priority]) + + if len(latencies) > 0: + print('\n%s: %d requests in %d seconds (rate=%f). %d still queued.' % (priority, total_started, proxy.time, float(total_started)/proxy.time, still_queued)) + print(' Median latency: %f' % latencies[len(latencies)//2]) + print(' 90%% latency: %f' % latencies[int(0.9*len(latencies))]) + print(' 99%% latency: %f' % latencies[int(0.99*len(latencies))]) + print(' 99.9%% latency: %f' % latencies[int(0.999*len(latencies))]) + print(' Max latency: %f' % latencies[-1]) + +print('') + +if not args.no_graph: + plotter = Plotter(proxy.results) + plotter.display() diff --git a/contrib/grv_proxy_model/plot.py b/contrib/grv_proxy_model/plot.py new file mode 100755 index 0000000000..9334e2c844 --- /dev/null +++ b/contrib/grv_proxy_model/plot.py @@ -0,0 +1,107 @@ +# +# plot.py +# +# This source file is part of the FoundationDB open source project +# +# Copyright 2013-2020 Apple Inc. and the FoundationDB project authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import matplotlib.pyplot as plt + +class Plotter: + def __init__(self, results): + self.results = results + + def add_plot(data, time_resolution, label, use_avg=False): + out_data = {} + counts = {} + for t in data.keys(): + out_data.setdefault(t//time_resolution*time_resolution, 0) + counts.setdefault(t//time_resolution*time_resolution, 0) + out_data[t//time_resolution*time_resolution] += data[t] + counts[t//time_resolution*time_resolution] += 1 + + if use_avg: + out_data = { t: v/counts[t] for t,v in out_data.items() } + + plt.plot(list(out_data.keys()), list(out_data.values()), label=label) + + def add_plot_with_times(data, label): + plt.plot(list(data.keys()), list(data.values()), label=label) + + def display(self, time_resolution=0.1): + plt.figure(figsize=(40,9)) + plt.subplot(3, 3, 1) + for priority in self.results.started.keys(): + Plotter.add_plot(self.results.started[priority], time_resolution, priority) + + plt.xlabel('Time (s)') + plt.ylabel('Released/s') + plt.legend() + + plt.subplot(3, 3, 2) + for priority in self.results.queued.keys(): + Plotter.add_plot(self.results.queued[priority], time_resolution, priority) + + plt.xlabel('Time (s)') + plt.ylabel('Requests/s') + plt.legend() + + plt.subplot(3, 3, 3) + for priority in self.results.unprocessed_queue_sizes.keys(): + data = {k: max(v) for (k,v) in self.results.unprocessed_queue_sizes[priority].items()} + Plotter.add_plot(data, time_resolution, priority) + + plt.xlabel('Time (s)') + plt.ylabel('Max queue size') + plt.legend() + + num = 4 + for priority in self.results.latencies.keys(): + plt.subplot(3, 3, num) + median_latencies = {k: v[int(0.5*len(v))] if len(v) > 0 else 0 for (k,v) in self.results.latencies[priority].items()} + percentile90_latencies = {k: v[int(0.9*len(v))] if len(v) > 0 else 0 for (k,v) in self.results.latencies[priority].items()} + max_latencies = {k: max(v) if len(v) > 0 else 0 for (k,v) in self.results.latencies[priority].items()} + + Plotter.add_plot(median_latencies, time_resolution, 'median') + Plotter.add_plot(percentile90_latencies, time_resolution, '90th percentile') + Plotter.add_plot(max_latencies, time_resolution, 'max') + + plt.xlabel('Time (s)') + plt.ylabel(str(priority) + ' Latency (s)') + plt.yscale('log') + plt.legend() + num += 1 + + for priority in self.results.rate.keys(): + plt.subplot(3, 3, num) + if len(self.results.rate[priority]) > 0: + Plotter.add_plot(self.results.rate[priority], time_resolution, 'Rate', use_avg=True) + if len(self.results.released[priority]) > 0: + Plotter.add_plot(self.results.released[priority], time_resolution, 'Released', use_avg=True) + if len(self.results.limit[priority]) > 0: + Plotter.add_plot(self.results.limit[priority], time_resolution, 'Limit', use_avg=True) + if len(self.results.limit_and_budget[priority]) > 0: + Plotter.add_plot(self.results.limit_and_budget[priority], time_resolution, 'Limit and budget', use_avg=True) + if len(self.results.budget[priority]) > 0: + Plotter.add_plot(self.results.budget[priority], time_resolution, 'Budget', use_avg=True) + + plt.xlabel('Time (s)') + plt.ylabel('Value (' + str(priority) + ')') + plt.legend() + num += 1 + + plt.show() + diff --git a/contrib/grv_proxy_model/priority.py b/contrib/grv_proxy_model/priority.py new file mode 100755 index 0000000000..3ba5c05f2e --- /dev/null +++ b/contrib/grv_proxy_model/priority.py @@ -0,0 +1,40 @@ +# +# priority.py +# +# This source file is part of the FoundationDB open source project +# +# Copyright 2013-2020 Apple Inc. and the FoundationDB project authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import functools + +@functools.total_ordering +class Priority: + def __init__(self, priority_value, label): + self.priority_value = priority_value + self.label = label + + def __lt__(self, other): + return self.priority_value < other.priority_value + + def __str__(self): + return self.label + + def __repr__(self): + return repr(self.label) + +Priority.SYSTEM = Priority(0, "System") +Priority.DEFAULT = Priority(1, "Default") +Priority.BATCH = Priority(2, "Batch") diff --git a/contrib/grv_proxy_model/proxy_model.py b/contrib/grv_proxy_model/proxy_model.py new file mode 100755 index 0000000000..9ca2a39bfe --- /dev/null +++ b/contrib/grv_proxy_model/proxy_model.py @@ -0,0 +1,338 @@ +# +# proxy_model.py +# +# This source file is part of the FoundationDB open source project +# +# Copyright 2013-2020 Apple Inc. and the FoundationDB project authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import copy +import functools +import heapq + +from priority import Priority +from smoother import Smoother + +@functools.total_ordering +class Task: + def __init__(self, time, fxn): + self.time = time + self.fxn = fxn + + def __lt__(self, other): + return self.time < other.time + +class Limiter: + class UpdateRateParams: + def __init__(self, time): + self.time = time + + class UpdateLimitParams: + def __init__(self, time, elapsed): + self.time = time + self.elapsed = elapsed + + class CanStartParams: + def __init__(self, time, num_started, count): + self.time = time + self.num_started = num_started + self.count = count + + class UpdateBudgetParams: + def __init__(self, time, num_started, num_started_at_priority, min_priority, last_batch, queue_empty, elapsed): + self.time = time + self.num_started = num_started + self.num_started_at_priority = num_started_at_priority + self.min_priority = min_priority + self.last_batch = last_batch + self.queue_empty = queue_empty + self.elapsed = elapsed + + def __init__(self, priority, ratekeeper_model, proxy_model): + self.priority = priority + self.ratekeeper_model = ratekeeper_model + self.proxy_model = proxy_model + self.limit = 0 + self.rate = self.ratekeeper_model.get_limit(0, self.priority) + + def update_rate(self, params): + pass + + def update_limit(self, params): + pass + + def can_start(self, params): + pass + + def update_budget(self, params): + pass + +class OriginalLimiter(Limiter): + def __init__(self, priority, limit_rate_model, proxy_model): + Limiter.__init__(self, priority, limit_rate_model, proxy_model) + + def update_rate(self, params): + self.rate = self.ratekeeper_model.get_limit(params.time, self.priority) + + def update_limit(self, params): + self.limit = min(0, self.limit) + params.elapsed * self.rate + self.limit = min(self.limit, self.rate * 0.01) + self.limit = min(self.limit, 100000) + + self.proxy_model.results.rate[self.priority][params.time] = self.rate + self.proxy_model.results.limit[self.priority][params.time] = self.limit + + def can_start(self, params): + return params.num_started < self.limit + + def update_budget(self, params): + self.limit -= params.num_started + +class PositiveBudgetLimiter(OriginalLimiter): + def __init__(self, priority, limit_rate_model, proxy_model): + OriginalLimiter.__init__(self, priority, limit_rate_model, proxy_model) + + def update_limit(self, params): + self.limit += params.elapsed * self.rate + self.limit = min(self.limit, 2.0 * self.rate) + +class ClampedBudgetLimiter(PositiveBudgetLimiter): + def __init__(self, priority, limit_rate_model, proxy_model): + PositiveBudgetLimiter.__init__(self, priority, limit_rate_model, proxy_model) + + def update_budget(self, params): + min_budget = -self.rate * 5.0 + if self.limit > min_budget: + self.limit = max(self.limit - params.num_started, min_budget) + +class TimeLimiter(PositiveBudgetLimiter): + def __init__(self, priority, limit_rate_model, proxy_model): + PositiveBudgetLimiter.__init__(self, priority, limit_rate_model, proxy_model) + self.locked_until = 0 + + def can_start(self, params): + return params.time >= self.locked_until and PositiveBudgetLimiter.can_start(self, params) + + def update_budget(self, params): + #print('Start update budget: time=%f, limit=%f, locked_until=%f, num_started=%d, priority=%s, min_priority=%s, last_batch=%d' % (params.time, self.limit, self.locked_until, params.num_started, self.priority, params.min_priority, params.last_batch)) + + if params.min_priority >= self.priority or params.num_started < self.limit: + self.limit -= params.num_started + else: + self.limit = min(self.limit, max(self.limit - params.num_started, -params.last_batch)) + self.locked_until = min(params.time + 2.0, max(params.time, self.locked_until) + (params.num_started - self.limit)/self.rate) + + #print('End update budget: time=%f, limit=%f, locked_until=%f, num_started=%d, priority=%s, min_priority=%s' % (params.time, self.limit, self.locked_until, params.num_started, self.priority, params.min_priority)) + +class TimePositiveBudgetLimiter(PositiveBudgetLimiter): + def __init__(self, priority, limit_rate_model, proxy_model): + PositiveBudgetLimiter.__init__(self, priority, limit_rate_model, proxy_model) + self.locked_until = 0 + + def update_limit(self, params): + if params.time >= self.locked_until: + PositiveBudgetLimiter.update_limit(self, params) + + def can_start(self, params): + return params.num_started + params.count <= self.limit + + def update_budget(self, params): + #if params.num_started > 0: + #print('Start update budget: time=%f, limit=%f, locked_until=%f, num_started=%d, priority=%s, min_priority=%s, last_batch=%d' % (params.time, self.limit, self.locked_until, params.num_started, self.priority, params.min_priority, params.last_batch)) + + if params.num_started > self.limit: + self.locked_until = min(params.time + 2.0, max(params.time, self.locked_until) + penalty/self.rate) + self.limit = 0 + else: + self.limit -= params.num_started + + #if params.num_started > 0: + #print('End update budget: time=%f, limit=%f, locked_until=%f, num_started=%d, priority=%s, min_priority=%s' % (params.time, self.limit, self.locked_until, params.num_started, self.priority, params.min_priority)) + +class SmoothingLimiter(OriginalLimiter): + def __init__(self, priority, limit_rate_model, proxy_model): + OriginalLimiter.__init__(self, priority, limit_rate_model, proxy_model) + self.smooth_released = Smoother(2) + self.smooth_rate_limit = Smoother(2) + self.rate_set = False + + def update_rate(self, params): + OriginalLimiter.update_rate(self, params) + if not self.rate_set: + self.rate_set = True + self.smooth_rate_limit.reset(self.rate) + else: + self.smooth_rate_limit.set_total(params.time, self.rate) + + def update_limit(self, params): + self.limit = 2.0 * (self.smooth_rate_limit.smooth_total(params.time) - self.smooth_released.smooth_rate(params.time)) + + def can_start(self, params): + return params.num_started + params.count <= self.limit + + def update_budget(self, params): + self.smooth_released.add_delta(params.time, params.num_started) + +class SmoothingBudgetLimiter(SmoothingLimiter): + def __init__(self, priority, limit_rate_model, proxy_model): + SmoothingLimiter.__init__(self, priority, limit_rate_model, proxy_model) + #self.smooth_filled = Smoother(2) + self.budget = 0 + + def update_limit(self, params): + release_rate = (self.smooth_rate_limit.smooth_total(params.time) - self.smooth_released.smooth_rate(params.time)) + #self.smooth_filled.set_total(params.time, 1 if release_rate > 0 else 0) + self.limit = 2.0 * release_rate + + self.proxy_model.results.rate[self.priority][params.time] = self.smooth_rate_limit.smooth_total(params.time) + self.proxy_model.results.released[self.priority][params.time] = self.smooth_released.smooth_rate(params.time) + self.proxy_model.results.limit[self.priority][params.time] = self.limit + self.proxy_model.results.limit_and_budget[self.priority][params.time] = self.limit + self.budget + self.proxy_model.results.budget[self.priority][params.time] = self.budget + + #self.budget = max(0, self.budget + params.elapsed * self.smooth_rate_limit.smooth_total(params.time)) + + #if self.smooth_filled.smooth_total(params.time) >= 0.1: + #self.budget += params.elapsed * self.smooth_rate_limit.smooth_total(params.time) + + #print('Update limit: time=%f, priority=%s, limit=%f, rate=%f, released=%f, budget=%f' % (params.time, self.priority, self.limit, self.smooth_rate_limit.smooth_total(params.time), self.smooth_released.smooth_rate(params.time), self.budget)) + + def can_start(self, params): + return params.num_started + params.count <= self.limit + self.budget #or params.num_started + params.count <= self.budget + + def update_budget(self, params): + self.budget = max(0, self.budget + (self.limit - params.num_started_at_priority) / 2 * params.elapsed) + + if params.queue_empty: + self.budget = min(10, self.budget) + + self.smooth_released.add_delta(params.time, params.num_started_at_priority) + +class ProxyModel: + class Results: + def __init__(self, priorities, duration): + self.started = self.init_result(priorities, 0, duration) + self.queued = self.init_result(priorities, 0, duration) + self.latencies = self.init_result(priorities, [], duration) + self.unprocessed_queue_sizes = self.init_result(priorities, [], duration) + + self.rate = {p:{} for p in priorities} + self.released = {p:{} for p in priorities} + self.limit = {p:{} for p in priorities} + self.limit_and_budget = {p:{} for p in priorities} + self.budget = {p:{} for p in priorities} + + def init_result(self, priorities, starting_value, duration): + return {p: {s: copy.copy(starting_value) for s in range(0, duration)} for p in priorities} + + def __init__(self, duration, ratekeeper_model, workload_model, Limiter): + self.time = 0 + self.log_time = 0 + self.duration = duration + self.priority_limiters = { priority: Limiter(priority, ratekeeper_model, self) for priority in workload_model.priorities() } + self.workload_model = workload_model + self.request_scheduled = { p: False for p in self.workload_model.priorities()} + + self.tasks = [] + self.request_queue = [] + self.results = ProxyModel.Results(self.workload_model.priorities(), duration) + + def run(self): + self.update_rate() + self.process_requests(self.time) + + for priority in self.workload_model.priorities(): + next_request = self.workload_model.next_request(self.time, priority) + assert next_request is not None + heapq.heappush(self.tasks, Task(next_request.time, lambda next_request=next_request: self.receive_request(next_request))) + self.request_scheduled[priority] = True + + while True:# or len(self.request_queue) > 0: + if int(self.time) > self.log_time: + self.log_time = int(self.time) + #print(self.log_time) + + task = heapq.heappop(self.tasks) + self.time = task.time + if self.time >= self.duration: + break + + task.fxn() + + def update_rate(self): + for limiter in self.priority_limiters.values(): + limiter.update_rate(Limiter.UpdateRateParams(self.time)) + + heapq.heappush(self.tasks, Task(self.time + 0.01, lambda: self.update_rate())) + + def receive_request(self, request): + heapq.heappush(self.request_queue, request) + + self.results.queued[request.priority][int(self.time)] += request.count + + next_request = self.workload_model.next_request(self.time, request.priority) + if next_request is not None and next_request.time < self.duration: + heapq.heappush(self.tasks, Task(next_request.time, lambda: self.receive_request(next_request))) + else: + self.request_scheduled[request.priority] = False + + def process_requests(self, last_time): + elapsed = self.time - last_time + for limiter in self.priority_limiters.values(): + limiter.update_limit(Limiter.UpdateLimitParams(self.time, elapsed)) + + current_started = 0 + started = {p:0 for p in self.workload_model.priorities()} + + min_priority = Priority.SYSTEM + last_batch = 0 + while len(self.request_queue) > 0: + request = self.request_queue[0] + + if not self.priority_limiters[request.priority].can_start(Limiter.CanStartParams(self.time, current_started, request.count)): + break + + min_priority = request.priority + last_batch = request.count + + if self.workload_model.request_completed(request) and not self.request_scheduled[request.priority]: + next_request = self.workload_model.next_request(self.time, request.priority) + assert next_request is not None + heapq.heappush(self.tasks, Task(next_request.time, lambda next_request=next_request: self.receive_request(next_request))) + self.request_scheduled[request.priority] = True + + current_started += request.count + started[request.priority] += request.count + + heapq.heappop(self.request_queue) + self.results.started[request.priority][int(self.time)] += request.count + self.results.latencies[request.priority][int(self.time)].append(self.time-request.time) + + if len(self.request_queue) == 0: + min_priority = Priority.BATCH + + for priority, limiter in self.priority_limiters.items(): + started_at_priority = sum([v for p,v in started.items() if p <= priority]) + limiter.update_budget(Limiter.UpdateBudgetParams(self.time, current_started, started_at_priority, min_priority, last_batch, len(self.request_queue) == 0 or self.request_queue[0].priority > priority, elapsed)) + + for priority in self.workload_model.priorities(): + self.results.unprocessed_queue_sizes[priority][int(self.time)].append(self.workload_model.workload_models[priority].outstanding) + + current_time = self.time + + delay = 0.001 + heapq.heappush(self.tasks, Task(self.time + delay, lambda: self.process_requests(current_time))) + + diff --git a/contrib/grv_proxy_model/rate_model.py b/contrib/grv_proxy_model/rate_model.py new file mode 100755 index 0000000000..1fabce2c7e --- /dev/null +++ b/contrib/grv_proxy_model/rate_model.py @@ -0,0 +1,83 @@ +# +# rate_model.py +# +# This source file is part of the FoundationDB open source project +# +# Copyright 2013-2020 Apple Inc. and the FoundationDB project authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy + +class RateModel: + def __init__(self): + pass + + def get_rate(self, time): + pass + +class FixedRateModel(RateModel): + def __init__(self, rate): + RateModel.__init__(self) + self.rate = rate + + def get_rate(self, time): + return self.rate + +class UnlimitedRateModel(FixedRateModel): + def __init__(self): + self.rate = 1e9 + +class IntervalRateModel(RateModel): + def __init__(self, intervals): + self.intervals = sorted(intervals) + + def get_rate(self, time): + if len(self.intervals) == 0 or time < self.intervals[0][0]: + return 0 + + target_interval = len(self.intervals)-1 + for i in range(1, len(self.intervals)): + if time < self.intervals[i][0]: + target_interval = i-1 + break + + self.intervals = self.intervals[target_interval:] + return self.intervals[0][1] + +class SawtoothRateModel(RateModel): + def __init__(self, low, high, frequency): + self.low = low + self.high = high + self.frequency = frequency + + def get_rate(self, time): + if int(2*time/self.frequency) % 2 == 0: + return self.low + else: + return self.high + +class DistributionRateModel(RateModel): + def __init__(self, distribution, frequency): + self.distribution = distribution + self.frequency = frequency + self.last_change = 0 + self.rate = None + + def get_rate(self, time): + if self.frequency == 0 or int((time - self.last_change) / self.frequency) > int(self.last_change / self.frequency) or self.rate is None: + self.last_change = time + self.rate = self.distribution() + + return self.rate diff --git a/contrib/grv_proxy_model/ratekeeper_model.py b/contrib/grv_proxy_model/ratekeeper_model.py new file mode 100755 index 0000000000..57125dc4c0 --- /dev/null +++ b/contrib/grv_proxy_model/ratekeeper_model.py @@ -0,0 +1,67 @@ +# +# ratekeeper.py +# +# This source file is part of the FoundationDB open source project +# +# Copyright 2013-2020 Apple Inc. and the FoundationDB project authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy +import rate_model +from priority import Priority + +class RatekeeperModel: + def __init__(self, limit_models): + self.limit_models = limit_models + + def get_limit(self, time, priority): + return self.limit_models[priority].get_rate(time) + +predefined_ratekeeper = {} + +predefined_ratekeeper['default200_batch100'] = RatekeeperModel( +{ + Priority.SYSTEM: rate_model.UnlimitedRateModel(), + Priority.DEFAULT: rate_model.FixedRateModel(200), + Priority.BATCH: rate_model.FixedRateModel(100) +}) + +predefined_ratekeeper['default_sawtooth'] = RatekeeperModel( +{ + Priority.SYSTEM: rate_model.UnlimitedRateModel(), + Priority.DEFAULT: rate_model.SawtoothRateModel(10, 200, 1), + Priority.BATCH: rate_model.FixedRateModel(0) +}) + +predefined_ratekeeper['default_uniform_random'] = RatekeeperModel( +{ + Priority.SYSTEM: rate_model.UnlimitedRateModel(), + Priority.DEFAULT: rate_model.DistributionRateModel(lambda: numpy.random.uniform(10, 200), 1), + Priority.BATCH: rate_model.FixedRateModel(0) +}) + +predefined_ratekeeper['default_trickle'] = RatekeeperModel( +{ + Priority.SYSTEM: rate_model.UnlimitedRateModel(), + Priority.DEFAULT: rate_model.FixedRateModel(3), + Priority.BATCH: rate_model.FixedRateModel(0) +}) + +predefined_ratekeeper['default1000'] = RatekeeperModel( +{ + Priority.SYSTEM: rate_model.UnlimitedRateModel(), + Priority.DEFAULT: rate_model.FixedRateModel(1000), + Priority.BATCH: rate_model.FixedRateModel(500) +}) diff --git a/contrib/grv_proxy_model/smoother.py b/contrib/grv_proxy_model/smoother.py new file mode 100644 index 0000000000..bc1b32ea12 --- /dev/null +++ b/contrib/grv_proxy_model/smoother.py @@ -0,0 +1,53 @@ +# +# smoother.py +# +# This source file is part of the FoundationDB open source project +# +# Copyright 2013-2020 Apple Inc. and the FoundationDB project authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import math + +class Smoother: + def __init__(self, folding_time): + self.folding_time = folding_time + self.reset(0) + + def reset(self, value): + self.time = 0 + self.total = value + self.estimate = value + + def set_total(self, time, total): + self.add_delta(time, total-self.total) + + def add_delta(self, time, delta): + self.update(time) + self.total += delta + + def smooth_total(self, time): + self.update(time) + return self.estimate + + def smooth_rate(self, time): + self.update(time) + return (self.total-self.estimate) / self.folding_time + + def update(self, time): + elapsed = time - self.time + if elapsed > 0: + self.time = time + self.estimate += (self.total-self.estimate) * (1-math.exp(-elapsed/self.folding_time)) + diff --git a/contrib/grv_proxy_model/workload_model.py b/contrib/grv_proxy_model/workload_model.py new file mode 100755 index 0000000000..63fb4c472e --- /dev/null +++ b/contrib/grv_proxy_model/workload_model.py @@ -0,0 +1,201 @@ +# +# workload_model.py +# +# This source file is part of the FoundationDB open source project +# +# Copyright 2013-2020 Apple Inc. and the FoundationDB project authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import functools +import numpy +import math + +import rate_model +from priority import Priority + +@functools.total_ordering +class Request: + def __init__(self, time, count, priority): + self.time = time + self.count = count + self.priority = priority + + def __lt__(self, other): + return self.priority < other.priority + +class PriorityWorkloadModel: + def __init__(self, priority, rate_model, batch_model, generator, max_outstanding=1e9): + self.priority = priority + self.rate_model = rate_model + self.batch_model = batch_model + self.generator = generator + self.max_outstanding = max_outstanding + self.outstanding = 0 + + def next_request(self, time): + if self.outstanding >= self.max_outstanding: + return None + + batch_size = self.batch_model.next_batch() + self.outstanding += batch_size + interval = self.generator.next_request_interval(self.rate_model.get_rate(time)) + return Request(time + interval, batch_size, self.priority) + + def request_completed(self, request): + was_full = self.max_outstanding <= self.outstanding + self.outstanding -= request.count + + return was_full and self.outstanding < self.max_outstanding + +class WorkloadModel: + def __init__(self, workload_models): + self.workload_models = workload_models + + def priorities(self): + return list(self.workload_models.keys()) + + def next_request(self, time, priority): + return self.workload_models[priority].next_request(time) + + def request_completed(self, request): + return self.workload_models[request.priority].request_completed(request) + +class Distribution: + EXPONENTIAL = lambda x: numpy.random.exponential(x) + UNIFORM = lambda x: numpy.random.uniform(0, 2.0*x) + FIXED = lambda x: x + +class BatchGenerator: + def __init__(self): + pass + + def next_batch(self): + pass + +class DistributionBatchGenerator(BatchGenerator): + def __init__(self, distribution, size): + BatchGenerator.__init__(self) + self.distribution = distribution + self.size = size + + def next_batch(self): + return math.ceil(self.distribution(self.size)) + +class RequestGenerator: + def __init__(self): + pass + + def next_request_interval(self, rate): + pass + +class DistributionRequestGenerator(RequestGenerator): + def __init__(self, distribution): + RequestGenerator.__init__(self) + self.distribution = distribution + + def next_request_interval(self, rate): + if rate == 0: + return 1e9 + + return self.distribution(1.0/rate) + +predefined_workloads = {} + +predefined_workloads['slow_exponential'] = WorkloadModel( +{ + Priority.DEFAULT: PriorityWorkloadModel(Priority.DEFAULT, + rate_model.FixedRateModel(100), + DistributionBatchGenerator(Distribution.FIXED, 1), + DistributionRequestGenerator(Distribution.EXPONENTIAL), + max_outstanding=100 + ) +}) + +predefined_workloads['fixed_uniform'] = WorkloadModel( +{ + Priority.SYSTEM: PriorityWorkloadModel(Priority.SYSTEM, + rate_model.FixedRateModel(0), + DistributionBatchGenerator(Distribution.FIXED, 1), + DistributionRequestGenerator(Distribution.UNIFORM), + max_outstanding=10 + ), + Priority.DEFAULT: PriorityWorkloadModel(Priority.DEFAULT, + rate_model.FixedRateModel(95), + DistributionBatchGenerator(Distribution.FIXED, 10), + DistributionRequestGenerator(Distribution.UNIFORM), + max_outstanding=200 + ), + Priority.BATCH: PriorityWorkloadModel(Priority.BATCH, + rate_model.FixedRateModel(1), + DistributionBatchGenerator(Distribution.UNIFORM, 500), + DistributionRequestGenerator(Distribution.UNIFORM), + max_outstanding=200 + ) +}) + +predefined_workloads['batch_starvation'] = WorkloadModel( +{ + Priority.SYSTEM: PriorityWorkloadModel(Priority.SYSTEM, + rate_model.FixedRateModel(1), + DistributionBatchGenerator(Distribution.FIXED, 1), + DistributionRequestGenerator(Distribution.UNIFORM), + max_outstanding=10 + ), + Priority.DEFAULT: PriorityWorkloadModel(Priority.DEFAULT, + rate_model.IntervalRateModel([(0,50), (60,150), (120,90)]), + DistributionBatchGenerator(Distribution.FIXED, 1), + DistributionRequestGenerator(Distribution.UNIFORM), + max_outstanding=200 + ), + Priority.BATCH: PriorityWorkloadModel(Priority.BATCH, + rate_model.FixedRateModel(100), + DistributionBatchGenerator(Distribution.FIXED, 1), + DistributionRequestGenerator(Distribution.UNIFORM), + max_outstanding=200 + ) +}) + +predefined_workloads['default_low_high_low'] = WorkloadModel( +{ + Priority.SYSTEM: PriorityWorkloadModel(Priority.SYSTEM, + rate_model.FixedRateModel(0), + DistributionBatchGenerator(Distribution.FIXED, 1), + DistributionRequestGenerator(Distribution.UNIFORM), + max_outstanding=10 + ), + Priority.DEFAULT: PriorityWorkloadModel(Priority.DEFAULT, + rate_model.IntervalRateModel([(0,100), (60,300), (120,100)]), + DistributionBatchGenerator(Distribution.FIXED, 1), + DistributionRequestGenerator(Distribution.UNIFORM), + max_outstanding=200 + ), + Priority.BATCH: PriorityWorkloadModel(Priority.BATCH, + rate_model.FixedRateModel(0), + DistributionBatchGenerator(Distribution.FIXED, 1), + DistributionRequestGenerator(Distribution.UNIFORM), + max_outstanding=200 + ) +}) + +for rate in [83, 100, 180, 190, 200]: + predefined_workloads['default%d' % rate] = WorkloadModel( + { + Priority.DEFAULT: PriorityWorkloadModel(Priority.DEFAULT, + rate_model.FixedRateModel(rate), + DistributionBatchGenerator(Distribution.FIXED, 1), + DistributionRequestGenerator(Distribution.EXPONENTIAL), + max_outstanding=1000 + ) + }) diff --git a/design/special-key-space.md b/design/special-key-space.md index 5d22e9d7f3..7cdcfe460d 100644 --- a/design/special-key-space.md +++ b/design/special-key-space.md @@ -20,7 +20,7 @@ Consequently, the special-key-space framework wants to integrate all client func If your feature is exposing information to clients and the results are easily formatted as key-value pairs, then you can use special-key-space to implement your client function. ## How -If you choose to use, you need to implement a function class that inherits from `SpecialKeyRangeReadImpl`, which has an abstract method `Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr)`. +If you choose to use, you need to implement a function class that inherits from `SpecialKeyRangeReadImpl`, which has an abstract method `Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr)`. This method can be treated as a callback, whose implementation details are determined by the developer. Once you fill out the method, register the function class to the corresponding key range. Below is a detailed example. @@ -38,10 +38,10 @@ public: CountryToCapitalCity[LiteralStringRef("China")] = LiteralStringRef("Beijing"); } // Implement the getRange interface - Future> getRange(ReadYourWritesTransaction* ryw, + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override { - Standalone result; + RangeResult result; for (auto const& country : CountryToCapitalCity) { // the registered range here: [\xff\xff/example/, \xff\xff/example/\xff] Key keyWithPrefix = country.first.withPrefix(range.begin); @@ -71,7 +71,7 @@ ASSERT(res1.present() && res.getValue() == LiteralStringRef("Tokyo")); // getRange // Note: for getRange(key1, key2), both key1 and key2 should prefixed with \xff\xff // something like getRange("normal_key", "\xff\xff/...") is not supported yet -Standalone res2 = wait(tr.getRange(LiteralStringRef("\xff\xff/example/U"), LiteralStringRef("\xff\xff/example/U\xff"))); +RangeResult res2 = wait(tr.getRange(LiteralStringRef("\xff\xff/example/U"), LiteralStringRef("\xff\xff/example/U\xff"))); // res2 should contain USA and UK ASSERT( res2.size() == 2 && diff --git a/documentation/sphinx/conf.py b/documentation/sphinx/conf.py index 5ec9238930..ab42fdba6a 100644 --- a/documentation/sphinx/conf.py +++ b/documentation/sphinx/conf.py @@ -49,7 +49,7 @@ master_doc = 'index' # General information about the project. project = u'FoundationDB' -copyright = u'2013-2018 Apple, Inc and the FoundationDB project authors' +copyright = u'2013-2021 Apple, Inc and the FoundationDB project authors' # Load the version information from 'versions.target' import xml.etree.ElementTree as ET diff --git a/documentation/sphinx/source/administration.rst b/documentation/sphinx/source/administration.rst index 5f6369d889..7053a78ca0 100644 --- a/documentation/sphinx/source/administration.rst +++ b/documentation/sphinx/source/administration.rst @@ -799,3 +799,18 @@ Upgrading from Older Versions ----------------------------- Upgrades from versions older than 5.0.0 are no longer supported. + +Version-specific notes on downgrading +===================================== + +In general, downgrades between non-patch releases (i.e. 6.2.x - 6.1.x) are not supported. + +.. _downgrade-specific-version: + +Downgrading from 6.3.13 - 6.2.33 +-------------------------------- +After upgrading from 6.2 to 6.3, the option of rolling back and downgrading to 6.2 is still possible, given that the following conditions are met: + +* The 6.3 cluster cannot have ``TLogVersion`` greater than V4 (6.2). +* The 6.3 cluster cannot use storage engine types that are not ``ssd-1``, ``ssd-2``, or ``memory``. +* The 6.3 cluster must not have any key servers serialized with tag encoding. This condition can only be guaranteed if the ``TAG_ENCODE_KEY_SERVERS`` knob has never been changed to ``true`` on this cluster. diff --git a/documentation/sphinx/source/api-c.rst b/documentation/sphinx/source/api-c.rst index 0d02dc18dd..0acafea8ba 100644 --- a/documentation/sphinx/source/api-c.rst +++ b/documentation/sphinx/source/api-c.rst @@ -133,7 +133,7 @@ API versioning Prior to including ``fdb_c.h``, you must define the ``FDB_API_VERSION`` macro. This, together with the :func:`fdb_select_api_version()` function, allows programs written against an older version of the API to compile and run with newer versions of the C library. The current version of the FoundationDB C API is |api-version|. :: - #define FDB_API_VERSION 700 + #define FDB_API_VERSION 710 #include .. function:: fdb_error_t fdb_select_api_version(int version) diff --git a/documentation/sphinx/source/api-common.rst.inc b/documentation/sphinx/source/api-common.rst.inc index 0be8cc30fd..f70e16a5d6 100644 --- a/documentation/sphinx/source/api-common.rst.inc +++ b/documentation/sphinx/source/api-common.rst.inc @@ -148,7 +148,7 @@ .. |atomic-versionstamps-tuple-warning-value| replace:: At this time, versionstamped values are not compatible with the Tuple layer except in Java, Python, and Go. Note that this implies versionstamped values may not be used with the Subspace and Directory layers except in those languages. -.. |api-version| replace:: 700 +.. |api-version| replace:: 710 .. |streaming-mode-blurb1| replace:: When using |get-range-func| and similar interfaces, API clients can request large ranges of the database to iterate over. Making such a request doesn't necessarily mean that the client will consume all of the data in the range - sometimes the client doesn't know how far it intends to iterate in advance. FoundationDB tries to balance latency and bandwidth by requesting data for iteration in batches. diff --git a/documentation/sphinx/source/api-python.rst b/documentation/sphinx/source/api-python.rst index 59b82406e0..0cd1e8f078 100644 --- a/documentation/sphinx/source/api-python.rst +++ b/documentation/sphinx/source/api-python.rst @@ -108,7 +108,7 @@ Opening a database After importing the ``fdb`` module and selecting an API version, you probably want to open a :class:`Database` using :func:`open`:: import fdb - fdb.api_version(700) + fdb.api_version(710) db = fdb.open() .. function:: open( cluster_file=None, event_model=None ) diff --git a/documentation/sphinx/source/api-ruby.rst b/documentation/sphinx/source/api-ruby.rst index 7c707f445b..ddb721a0d0 100644 --- a/documentation/sphinx/source/api-ruby.rst +++ b/documentation/sphinx/source/api-ruby.rst @@ -93,7 +93,7 @@ Opening a database After requiring the ``FDB`` gem and selecting an API version, you probably want to open a :class:`Database` using :func:`open`:: require 'fdb' - FDB.api_version 700 + FDB.api_version 710 db = FDB.open .. function:: open( cluster_file=nil ) -> Database diff --git a/documentation/sphinx/source/api-version-upgrade-guide.rst b/documentation/sphinx/source/api-version-upgrade-guide.rst index 83486986a6..707d8e3246 100644 --- a/documentation/sphinx/source/api-version-upgrade-guide.rst +++ b/documentation/sphinx/source/api-version-upgrade-guide.rst @@ -9,6 +9,14 @@ This document provides an overview of changes that an application developer may For more details about API versions, see :ref:`api-versions`. +.. _api-version-upgrade-guide-710: + +API version 710 +=============== + +General +------- + .. _api-version-upgrade-guide-700: API version 700 diff --git a/documentation/sphinx/source/backups.rst b/documentation/sphinx/source/backups.rst index 24ae05a124..9f606a2b51 100644 --- a/documentation/sphinx/source/backups.rst +++ b/documentation/sphinx/source/backups.rst @@ -244,6 +244,9 @@ The ``start`` subcommand is used to start a backup. If there is already a backu ``-s `` or ``--snapshot_interval `` Specifies the duration, in seconds, of the inconsistent snapshots written to the backup in continuous mode. The default is 864000 which is 10 days. +``--initial_snapshot_interval `` + Specifies the duration, in seconds, of the first inconsistent snapshot written to the backup. The default is 0, which means as fast as possible. + ``--partitioned_log_experimental`` Specifies the backup uses the partitioned mutation logs generated by backup workers. Since FDB version 6.3, this option is experimental and requires using fast restore for restoring the database from the generated files. The default is to use non-partitioned mutation logs generated by backup agents. @@ -487,6 +490,9 @@ The ``start`` command will start a new restore on the specified (or default) tag ``--orig_cluster_file `` The cluster file for the original database from which the backup was created. The original database is only needed to convert a --timestamp argument to a database version. +``--inconsistent_snapshot_only`` + Ignore mutation log files during the restore to speedup the process. Because only range files are restored, this option gives an inconsistent snapshot in most cases and is not recommended to use. + .. program:: fdbrestore abort ``abort`` diff --git a/documentation/sphinx/source/class-scheduling-go.rst b/documentation/sphinx/source/class-scheduling-go.rst index 77d9c01e90..4f505d4931 100644 --- a/documentation/sphinx/source/class-scheduling-go.rst +++ b/documentation/sphinx/source/class-scheduling-go.rst @@ -29,7 +29,7 @@ Before using the API, we need to specify the API version. This allows programs t .. code-block:: go - fdb.MustAPIVersion(700) + fdb.MustAPIVersion(710) Next, we open a FoundationDB database. The API will connect to the FoundationDB cluster indicated by the :ref:`default cluster file `. @@ -78,7 +78,7 @@ If this is all working, it looks like we are ready to start building a real appl func main() { // Different API versions may expose different runtime behaviors. - fdb.MustAPIVersion(700) + fdb.MustAPIVersion(710) // Open the default database from the system cluster db := fdb.MustOpenDefault() @@ -666,7 +666,7 @@ Here's the code for the scheduling tutorial: } func main() { - fdb.MustAPIVersion(700) + fdb.MustAPIVersion(710) db := fdb.MustOpenDefault() db.Options().SetTransactionTimeout(60000) // 60,000 ms = 1 minute db.Options().SetTransactionRetryLimit(100) diff --git a/documentation/sphinx/source/class-scheduling-java.rst b/documentation/sphinx/source/class-scheduling-java.rst index c5dda17d55..dec3b23e18 100644 --- a/documentation/sphinx/source/class-scheduling-java.rst +++ b/documentation/sphinx/source/class-scheduling-java.rst @@ -30,7 +30,7 @@ Before using the API, we need to specify the API version. This allows programs t private static final Database db; static { - fdb = FDB.selectAPIVersion(700); + fdb = FDB.selectAPIVersion(710); db = fdb.open(); } @@ -66,7 +66,7 @@ If this is all working, it looks like we are ready to start building a real appl private static final Database db; static { - fdb = FDB.selectAPIVersion(700); + fdb = FDB.selectAPIVersion(710); db = fdb.open(); } @@ -441,7 +441,7 @@ Here's the code for the scheduling tutorial: private static final Database db; static { - fdb = FDB.selectAPIVersion(700); + fdb = FDB.selectAPIVersion(710); db = fdb.open(); db.options().setTransactionTimeout(60000); // 60,000 ms = 1 minute db.options().setTransactionRetryLimit(100); diff --git a/documentation/sphinx/source/class-scheduling-ruby.rst b/documentation/sphinx/source/class-scheduling-ruby.rst index c8d8483aad..f5871578e3 100644 --- a/documentation/sphinx/source/class-scheduling-ruby.rst +++ b/documentation/sphinx/source/class-scheduling-ruby.rst @@ -23,7 +23,7 @@ Open a Ruby interactive interpreter and import the FoundationDB API module:: Before using the API, we need to specify the API version. This allows programs to maintain compatibility even if the API is modified in future versions:: - > FDB.api_version 700 + > FDB.api_version 710 => nil Next, we open a FoundationDB database. The API will connect to the FoundationDB cluster indicated by the :ref:`default cluster file `. :: @@ -46,7 +46,7 @@ If this is all working, it looks like we are ready to start building a real appl .. code-block:: ruby require 'fdb' - FDB.api_version 700 + FDB.api_version 710 @db = FDB.open @db['hello'] = 'world' print 'hello ', @db['hello'] @@ -373,7 +373,7 @@ Here's the code for the scheduling tutorial: require 'fdb' - FDB.api_version 700 + FDB.api_version 710 #################################### ## Initialization ## diff --git a/documentation/sphinx/source/class-scheduling.rst b/documentation/sphinx/source/class-scheduling.rst index 23615a08a6..bdf3c72680 100644 --- a/documentation/sphinx/source/class-scheduling.rst +++ b/documentation/sphinx/source/class-scheduling.rst @@ -30,7 +30,7 @@ Open a Python interactive interpreter and import the FoundationDB API module:: Before using the API, we need to specify the API version. This allows programs to maintain compatibility even if the API is modified in future versions:: - >>> fdb.api_version(700) + >>> fdb.api_version(710) Next, we open a FoundationDB database. The API will connect to the FoundationDB cluster indicated by the :ref:`default cluster file `. :: @@ -48,7 +48,7 @@ When this command returns without exception, the modification is durably stored If this is all working, it looks like we are ready to start building a real application. For reference, here's the full code for "hello world":: import fdb - fdb.api_version(700) + fdb.api_version(710) db = fdb.open() db[b'hello'] = b'world' print 'hello', db[b'hello'] @@ -91,7 +91,7 @@ FoundationDB includes a few tools that make it easy to model data using this app opening a :ref:`directory ` in the database:: import fdb - fdb.api_version(700) + fdb.api_version(710) db = fdb.open() scheduling = fdb.directory.create_or_open(db, ('scheduling',)) @@ -337,7 +337,7 @@ Here's the code for the scheduling tutorial:: import fdb import fdb.tuple - fdb.api_version(700) + fdb.api_version(710) #################################### diff --git a/documentation/sphinx/source/client-testing.rst b/documentation/sphinx/source/client-testing.rst index caf65a265a..884eff0933 100644 --- a/documentation/sphinx/source/client-testing.rst +++ b/documentation/sphinx/source/client-testing.rst @@ -315,7 +315,7 @@ and pass the test with ``-f``: .. code-block:: sh - fdbserver -r simulator -f testfile.txt + fdbserver -r simulation -f testfile.txt Running a Workload on an actual Cluster diff --git a/documentation/sphinx/source/developer-guide.rst b/documentation/sphinx/source/developer-guide.rst index bb30e9e469..f51db018bb 100644 --- a/documentation/sphinx/source/developer-guide.rst +++ b/documentation/sphinx/source/developer-guide.rst @@ -949,11 +949,33 @@ that process, and wait for necessary data to be moved away. #. ``\xff\xff/management/options/failed/force`` Read/write. Setting this key disables safety checks for writes to ``\xff\xff/management/failed/``. Setting this key only has an effect in the current transaction and is not persisted on commit. #. ``\xff\xff/management/min_required_commit_version`` Read/write. Changing this key will change the corresponding system key ``\xff/minRequiredCommitVersion = [[Version]]``. The value of this special key is the literal text of the underlying ``Version``, which is ``int64_t``. If you set the key with a value failed to be parsed as ``int64_t``, ``special_keys_api_failure`` will be thrown. In addition, the given ``Version`` should be larger than the current read version and smaller than the upper bound(``2**63-1-version_per_second*3600*24*365*1000``). Otherwise, ``special_keys_api_failure`` is thrown. For more details, see help text of ``fdbcli`` command ``advanceversion``. #. ``\xff\xff/management/profiling/`` Read/write. Changing these two keys will change the corresponding system keys ``\xff\x02/fdbClientInfo/``, respectively. The value of ``\xff\xff/management/client_txn_sample_rate`` is a literal text of ``double``, and the value of ``\xff\xff/management/client_txn_size_limit`` is a literal text of ``int64_t``. A special value ``default`` can be set to or read from these two keys, representing the client profiling is disabled. In addition, ``clear`` in this range is not allowed. For more details, see help text of ``fdbcli`` command ``profile client``. +#. ``\xff\xff/management/maintenance/ := `` Read/write. Set/clear a key in this range will change the corresponding system key ``\xff\x02/healthyZone``. The value is a literal text of a non-negative ``double`` which represents the remaining time for the zone to be in maintenance. Commiting with an invalid value will throw ``special_keys_api_failure``. Only one zone is allowed to be in maintenance at the same time. Setting a new key in the range will override the old one and the transaction will throw ``special_keys_api_failure`` error if more than one zone is given. For more details, see help text of ``fdbcli`` command ``maintenance``. + In addition, a special key ``\xff\xff/management/maintenance/IgnoreSSFailures`` in the range, if set, will disable datadistribution for storage server failures. + It is doing the same thing as the fdbcli command ``datadistribution disable ssfailure``. + Maintenance mode will be unable to use until the key is cleared, which is the same as the fdbcli command ``datadistribution enable ssfailure``. + While the key is set, any commit that tries to set a key in the range will fail with the ``special_keys_api_failure`` error. +#. ``\xff\xff/management/data_distribution/`` Read/write. Changing these two keys will change the two corresponding system keys ``\xff/dataDistributionMode`` and ``\xff\x02/rebalanceDDIgnored``. The value of ``\xff\xff/management/data_distribution/mode`` is a literal text of ``0`` (disable) or ``1`` (enable). Transactions committed with invalid values will throw ``special_keys_api_failure`` . The value of ``\xff\xff/management/data_distribution/rebalance_ignored`` is empty. If present, it means data distribution is disabled for rebalance. Any transaction committed with non-empty value for this key will throw ``special_keys_api_failure``. For more details, see help text of ``fdbcli`` command ``datadistribution``. +#. ``\xff\xff/management/consistency_check_suspended`` Read/write. Set or read this key will set or read the underlying system key ``\xff\x02/ConsistencyCheck/Suspend``. The value of this special key is unused thus if present, will be empty. In particular, if the key exists, then consistency is suspended. For more details, see help text of ``fdbcli`` command ``consistencycheck``. +#. ``\xff\xff/management/db_locked`` Read/write. A single key that can be read and modified. Set the key will lock the database and clear the key will unlock. If the database is already locked, then the commit will fail with the ``special_keys_api_failure`` error. For more details, see help text of ``fdbcli`` command ``lock`` and ``unlock``. +#. ``\xff\xff/management/auto_coordinators`` Read-only. A single key, if read, will return a set of processes which is able to satisfy the current redundency level and serve as new coordinators. The return value is formatted as a comma delimited string of network addresses of coordinators, i.e. ``,,...,``. An exclusion is syntactically either an ip address (e.g. ``127.0.0.1``), or an ip address and port (e.g. ``127.0.0.1:4500``). If no port is specified, then all processes on that host match the exclusion. +Configuration module +~~~~~~~~~~~~~~~~~~~~ + +The configuration module is for changing the cluster configuration. +For example, you can change a process type or update coordinators by manipulating related special keys through transactions. + +#. ``\xff\xff/configuration/process/class_type/
:= `` Read/write. Reading keys in the range will retrieve processes' class types. Setting keys in the range will update processes' class types. The process matching ``
`` will be assigned to the given class type if the commit is successful. The valid class types are ``storage``, ``transaction``, ``resolution``, etc. A full list of class type can be found via ``fdbcli`` command ``help setclass``. Clearing keys is forbidden in the range. Instead, you can set the type as ``default``, which will clear the assigned class type if existing. For more details, see help text of ``fdbcli`` command ``setclass``. +#. ``\xff\xff/configuration/process/class_source/
:= `` Read-only. Reading keys in the range will retrieve processes' class source. The class source is one of ``command_line``, ``configure_auto``, ``set_class`` and ``invalid``, indicating the source that the process's class type comes from. +#. ``\xff\xff/configuration/coordinators/processes := ,,...,`` Read/write. A single key, if read, will return a comma delimited string of coordinators's network addresses. Thus to provide a new set of cooridinators, set the key with a correct formatted string of new coordinators' network addresses. As there's always the need to have coordinators, clear on the key is forbidden and a transaction will fail with the ``special_keys_api_failure`` error if the clear is committed. For more details, see help text of ``fdbcli`` command ``coordinators``. +#. ``\xff\xff/configuration/coordinators/cluster_description := `` Read/write. A single key, if read, will return the cluster description. Thus modifying the key will update the cluster decription. The new description needs to match ``[A-Za-z0-9_]+``, otherwise, the ``special_keys_api_failure`` error will be thrown. In addition, clear on the key is meaningless thus forbidden. For more details, see help text of ``fdbcli`` command ``coordinators``. + +The ``
`` here is the network address of the corresponding process. Thus the general form is ``ip:port``. + Error message module ~~~~~~~~~~~~~~~~~~~~ diff --git a/documentation/sphinx/source/hierarchical-documents-java.rst b/documentation/sphinx/source/hierarchical-documents-java.rst index db33abd4ef..b9869afd01 100644 --- a/documentation/sphinx/source/hierarchical-documents-java.rst +++ b/documentation/sphinx/source/hierarchical-documents-java.rst @@ -69,7 +69,7 @@ Here’s a basic implementation of the recipe. private static final long EMPTY_ARRAY = -1; static { - fdb = FDB.selectAPIVersion(700); + fdb = FDB.selectAPIVersion(710); db = fdb.open(); docSpace = new Subspace(Tuple.from("D")); } diff --git a/documentation/sphinx/source/mr-status-json-schemas.rst.inc b/documentation/sphinx/source/mr-status-json-schemas.rst.inc index 81da2adf83..914a682c4c 100644 --- a/documentation/sphinx/source/mr-status-json-schemas.rst.inc +++ b/documentation/sphinx/source/mr-status-json-schemas.rst.inc @@ -27,6 +27,7 @@ "storage", "transaction", "resolution", + "stateless", "commit_proxy", "grv_proxy", "master", @@ -120,7 +121,17 @@ "counter":0, "roughness":0.0 }, - "grv_latency_statistics":{ + "fetched_versions":{ + "hz":0.0, + "counter":0, + "roughness":0.0 + }, + "fetches_from_logs":{ + "hz":0.0, + "counter":0, + "roughness":0.0 + }, + "grv_latency_statistics":{ // GRV Latency metrics are grouped according to priority (currently batch or default). "default":{ "count":0, "min":0.0, @@ -132,6 +143,18 @@ "p95":0.0, "p99":0.0, "p99.9":0.0 + }, + "batch":{ + "count":0, + "min":0.0, + "max":0.0, + "median":0.0, + "mean":0.0, + "p25":0.0, + "p90":0.0, + "p95":0.0, + "p99":0.0, + "p99.9":0.0 } }, "read_latency_statistics":{ @@ -158,6 +181,18 @@ "p99":0.0, "p99.9":0.0 }, + "commit_batching_window_size":{ + "count":0, + "min":0.0, + "max":0.0, + "median":0.0, + "mean":0.0, + "p25":0.0, + "p90":0.0, + "p95":0.0, + "p99":0.0, + "p99.9":0.0 + }, "grv_latency_bands":{ // How many GRV requests belong to the latency (in seconds) band (e.g., How many requests belong to [0.01,0.1] latency band). The key is the upper bound of the band and the lower bound is the next smallest band (or 0, if none). Example: {0.01: 27, 0.1: 18, 1: 1, inf: 98,filtered: 10}, we have 18 requests in [0.01, 0.1) band. "$map_key=upperBoundOfBand": 1 }, @@ -579,6 +614,10 @@ "data_distribution_disabled_for_rebalance":true, "data_distribution_disabled":true, "active_primary_dc":"pv", + "bounce_impact":{ + "can_clean_bounce":true, + "reason":"" + }, "configuration":{ "log_anti_quorum":0, "log_replicas":2, @@ -643,6 +682,16 @@ "ssd-rocksdb-experimental", "memory" ]}, + "tss_count":1, + "tss_storage_engine":{ + "$enum":[ + "ssd", + "ssd-1", + "ssd-2", + "ssd-redwood-experimental", + "ssd-rocksdb-experimental", + "memory" + ]}, "coordinators_count":1, "excluded_servers":[ { diff --git a/documentation/sphinx/source/multimaps-java.rst b/documentation/sphinx/source/multimaps-java.rst index 3c9a46ad3c..d321a8a0aa 100644 --- a/documentation/sphinx/source/multimaps-java.rst +++ b/documentation/sphinx/source/multimaps-java.rst @@ -74,7 +74,7 @@ Here’s a simple implementation of multimaps with multisets as described: private static final int N = 100; static { - fdb = FDB.selectAPIVersion(700); + fdb = FDB.selectAPIVersion(710); db = fdb.open(); multi = new Subspace(Tuple.from("M")); } diff --git a/documentation/sphinx/source/priority-queues-java.rst b/documentation/sphinx/source/priority-queues-java.rst index 0fafb08b4b..37476a3663 100644 --- a/documentation/sphinx/source/priority-queues-java.rst +++ b/documentation/sphinx/source/priority-queues-java.rst @@ -74,7 +74,7 @@ Here's a basic implementation of the model: private static final Random randno; static{ - fdb = FDB.selectAPIVersion(700); + fdb = FDB.selectAPIVersion(710); db = fdb.open(); pq = new Subspace(Tuple.from("P")); diff --git a/documentation/sphinx/source/queues-java.rst b/documentation/sphinx/source/queues-java.rst index b4b60df48b..033f0df88a 100644 --- a/documentation/sphinx/source/queues-java.rst +++ b/documentation/sphinx/source/queues-java.rst @@ -73,7 +73,7 @@ The following is a simple implementation of the basic pattern: private static final Random randno; static{ - fdb = FDB.selectAPIVersion(700); + fdb = FDB.selectAPIVersion(710); db = fdb.open(); queue = new Subspace(Tuple.from("Q")); randno = new Random(); diff --git a/documentation/sphinx/source/release-notes/release-notes-620.rst b/documentation/sphinx/source/release-notes/release-notes-620.rst index 3148eefa97..3e388a8129 100644 --- a/documentation/sphinx/source/release-notes/release-notes-620.rst +++ b/documentation/sphinx/source/release-notes/release-notes-620.rst @@ -8,6 +8,7 @@ Release Notes * Fix backup agent stall when writing to local filesystem with slow metadata operations. `(PR #4428) `_ * Backup agent no longer uses 4k block caching layer on local output files so that write operations are larger. `(PR #4428) `_ * Fix accounting error that could cause commits to incorrectly fail with ``proxy_memory_limit_exceeded``. `(PR #4529) `_ +* Added support for downgrades from FDB version 6.3. For more details, see the :ref:`administration notes `. `(PR #4673) `_ `(PR #4469) `_ 6.2.32 ====== diff --git a/documentation/sphinx/source/release-notes/release-notes-630.rst b/documentation/sphinx/source/release-notes/release-notes-630.rst index 076f85d74d..ca6a8fd029 100644 --- a/documentation/sphinx/source/release-notes/release-notes-630.rst +++ b/documentation/sphinx/source/release-notes/release-notes-630.rst @@ -2,11 +2,30 @@ Release Notes ############# + +6.3.14 +====== +* Fixed fdbbackup start command that automatically configures database with backup workers to only do so when using partitioned logs. `(PR #4863) `_ +* Added ``cluster.bounce_impact`` section to status to report if there will be any extra effects when bouncing the cluster, and if so, the reason for those effects. `(PR #4770) `_ +* Added ``fetched_versions`` to the storage metrics section of status to report how fast a storage server is catching up in versions. `(PR #4770) `_ +* Added ``fetches_from_logs`` to the storage metrics section of status to report how frequently a storage server fetches updates from transaction logs. `(PR #4770) `_ +* Added the ``bypass_unreadable`` transaction option which allows ``get`` operations to read from sections of keyspace that have become unreadable because of versionstamp operations. `(PR #4774) `_ +* Fix several packaging issues. The osx package should now install successfully, and the structure of the RPM and DEB packages should match that of 6.2. `(PR #4810) `_ +* Fix an accounting error that could potentially result in inaccuracies in priority busyness metrics. `(PR #4824) `_ + +6.3.13 +====== +* Added ``commit_batching_window_size`` to the proxy roles section of status to record statistics about commit batching window size on each proxy. `(PR #4736) `_ +* The multi-version client now requires at most two client connections with version 6.2 or larger, regardless of how many external clients are configured. Clients older than 6.2 will continue to create an additional connection each. `(PR #4667) `_ + 6.3.12 ====== * Change the default for --knob_tls_server_handshake_threads to 64. The previous was 1000. This avoids starting 1000 threads by default, but may adversely affect recovery time for large clusters using tls. Users with large tls clusters should consider explicitly setting this knob in their foundationdb.conf file. `(PR #4421) `_ * Fix accounting error that could cause commits to incorrectly fail with ``proxy_memory_limit_exceeded``. `(PR #4526) `_ * As an optimization, partial restore using target key ranges now filters backup log data prior to loading it into the database. `(PR #4554) `_ +* Fix fault tolerance calculation when there are no tLogs in LogSet. `(PR #4454) `_ +* Change client's ``iteration_progression`` size defaults from 256 to 4096 bytes for better performance. `(PR #4416) `_ +* Add the ability to instrument java driver actions, such as ``FDBTransaction`` and ``RangeQuery``. `(PR #4385) `_ 6.3.11 ====== diff --git a/documentation/sphinx/source/release-notes/release-notes-700.rst b/documentation/sphinx/source/release-notes/release-notes-700.rst index 431ea14fc2..ea78b9a10b 100644 --- a/documentation/sphinx/source/release-notes/release-notes-700.rst +++ b/documentation/sphinx/source/release-notes/release-notes-700.rst @@ -15,7 +15,8 @@ Features Performance ----------- -* Increased performance of dr_agent when copying the mutation log. The ``COPY_LOG_BLOCK_SIZE``, ``COPY_LOG_BLOCKS_PER_TASK``, ``COPY_LOG_PREFETCH_BLOCKS``, ``COPY_LOG_READ_AHEAD_BYTES`` and ``COPY_LOG_TASK_DURATION_NANOS`` knobs can be set. `(PR 3436) `_ +* Increased performance of dr_agent when copying the mutation log. The ``COPY_LOG_BLOCK_SIZE``, ``COPY_LOG_BLOCKS_PER_TASK``, ``COPY_LOG_PREFETCH_BLOCKS``, ``COPY_LOG_READ_AHEAD_BYTES`` and ``COPY_LOG_TASK_DURATION_NANOS`` knobs can be set. `(PR #3436) `_ +* Reduced the number of connections required by the multi-version client when loading external clients. When connecting to 7.0 clusters, only one connection with version 6.2 or larger will be used. With older clusters, at most two connections with version 6.2 or larger will be used. Clients older than version 6.2 will continue to create an additional connection each. `(PR #4667) `_ Reliability ----------- @@ -29,8 +30,10 @@ Fixes Status ------ - - +* Added ``commit_batching_window_size`` to the proxy roles section of status to record statistics about commit batching window size on each proxy. `(PR #4735) `_ +* Added ``cluster.bounce_impact`` section to status to report if there will be any extra effects when bouncing the cluster, and if so, the reason for those effects. `(PR #4770) `_ +* Added ``fetched_versions`` to the storage metrics section of status to report how fast a storage server is catching up in versions. `(PR #4770) `_ +* Added ``fetches_from_logs`` to the storage metrics section of status to report how frequently a storage server fetches updates from transaction logs. `(PR #4770) `_ Bindings -------- diff --git a/documentation/sphinx/source/simple-indexes-java.rst b/documentation/sphinx/source/simple-indexes-java.rst index c5edf02e71..61769ea847 100644 --- a/documentation/sphinx/source/simple-indexes-java.rst +++ b/documentation/sphinx/source/simple-indexes-java.rst @@ -87,7 +87,7 @@ In this example, we’re storing user data based on user ID but sometimes need t private static final Subspace index; static { - fdb = FDB.selectAPIVersion(700); + fdb = FDB.selectAPIVersion(710); db = fdb.open(); main = new Subspace(Tuple.from("user")); index = new Subspace(Tuple.from("zipcode_index")); diff --git a/documentation/sphinx/source/tables-java.rst b/documentation/sphinx/source/tables-java.rst index 235dbd5b47..14cd0348ca 100644 --- a/documentation/sphinx/source/tables-java.rst +++ b/documentation/sphinx/source/tables-java.rst @@ -62,7 +62,7 @@ Here’s a simple implementation of the basic table pattern: private static final Subspace colIndex; static { - fdb = FDB.selectAPIVersion(700); + fdb = FDB.selectAPIVersion(710); db = fdb.open(); table = new Subspace(Tuple.from("T")); rowIndex = table.subspace(Tuple.from("R")); diff --git a/documentation/sphinx/source/vector-java.rst b/documentation/sphinx/source/vector-java.rst index 17da6ebed8..4341948316 100644 --- a/documentation/sphinx/source/vector-java.rst +++ b/documentation/sphinx/source/vector-java.rst @@ -77,7 +77,7 @@ Here’s the basic pattern: private static final Subspace vector; static { - fdb = FDB.selectAPIVersion(700); + fdb = FDB.selectAPIVersion(710); db = fdb.open(); vector = new Subspace(Tuple.from("V")); } diff --git a/documentation/tutorial/tutorial.actor.cpp b/documentation/tutorial/tutorial.actor.cpp index 5ec749b1cb..4213ddd5c2 100644 --- a/documentation/tutorial/tutorial.actor.cpp +++ b/documentation/tutorial/tutorial.actor.cpp @@ -366,7 +366,7 @@ ACTOR Future fdbClient() { // 3. write 10 values in [k, k+100] beginIdx = deterministicRandom()->randomInt(0, 1e8 - 100); startKey = keyPrefix + std::to_string(beginIdx); - Standalone range = wait(tx.getRange(KeyRangeRef(startKey, endKey), 100)); + RangeResult range = wait(tx.getRange(KeyRangeRef(startKey, endKey), 100)); for (int i = 0; i < 10; ++i) { Key k = Key(keyPrefix + std::to_string(beginIdx + deterministicRandom()->randomInt(0, 100))); tx.set(k, LiteralStringRef("foo")); diff --git a/fdbbackup/BackupTLSConfig.cpp b/fdbbackup/BackupTLSConfig.cpp new file mode 100644 index 0000000000..4df47e0b3b --- /dev/null +++ b/fdbbackup/BackupTLSConfig.cpp @@ -0,0 +1,90 @@ +/* + * BackupTLSConfig.cpp + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2021 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "fdbclient/NativeAPI.actor.h" +#include "flow/Arena.h" +#include "flow/Error.h" +#include "flow/network.h" + +#include "fdbbackup/BackupTLSConfig.h" + +void BackupTLSConfig::setupBlobCredentials() { + // Add blob credentials files from the environment to the list collected from the command line. + const char* blobCredsFromENV = getenv("FDB_BLOB_CREDENTIALS"); + if (blobCredsFromENV != nullptr) { + StringRef t((uint8_t*)blobCredsFromENV, strlen(blobCredsFromENV)); + do { + StringRef file = t.eat(":"); + if (file.size() != 0) + blobCredentials.push_back(file.toString()); + } while (t.size() != 0); + } + + // Update the global blob credential files list + std::vector* pFiles = (std::vector*)g_network->global(INetwork::enBlobCredentialFiles); + if (pFiles != nullptr) { + for (auto& f : blobCredentials) { + pFiles->push_back(f); + } + } +} + +bool BackupTLSConfig::setupTLS() { + if (tlsCertPath.size()) { + try { + setNetworkOption(FDBNetworkOptions::TLS_CERT_PATH, tlsCertPath); + } catch (Error& e) { + std::cerr << "ERROR: cannot set TLS certificate path to " << tlsCertPath << " (" << e.what() << ")\n"; + return false; + } + } + + if (tlsCAPath.size()) { + try { + setNetworkOption(FDBNetworkOptions::TLS_CA_PATH, tlsCAPath); + } catch (Error& e) { + std::cerr << "ERROR: cannot set TLS CA path to " << tlsCAPath << " (" << e.what() << ")\n"; + return false; + } + } + if (tlsKeyPath.size()) { + try { + if (tlsPassword.size()) + setNetworkOption(FDBNetworkOptions::TLS_PASSWORD, tlsPassword); + + setNetworkOption(FDBNetworkOptions::TLS_KEY_PATH, tlsKeyPath); + } catch (Error& e) { + std::cerr << "ERROR: cannot set TLS key path to " << tlsKeyPath << " (" << e.what() << ")\n"; + return false; + } + } + if (tlsVerifyPeers.size()) { + try { + setNetworkOption(FDBNetworkOptions::TLS_VERIFY_PEERS, tlsVerifyPeers); + } catch (Error& e) { + std::cerr << "ERROR: cannot set TLS peer verification to " << tlsVerifyPeers << " (" << e.what() + << ")\n"; + return false; + } + } + return true; +} \ No newline at end of file diff --git a/fdbbackup/BackupTLSConfig.h b/fdbbackup/BackupTLSConfig.h new file mode 100644 index 0000000000..4222c0c25f --- /dev/null +++ b/fdbbackup/BackupTLSConfig.h @@ -0,0 +1,41 @@ +/* + * BackupTLSConfig.h + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2021 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FDBBACKUP_BACKUPTLSCONFIG_H +#define FDBBACKUP_BACKUPTLSCONFIG_H +#pragma once + +#include +#include + +// TLS and blob credentials for backups and setup for these credentials. +struct BackupTLSConfig { + std::string tlsCertPath, tlsKeyPath, tlsCAPath, tlsPassword, tlsVerifyPeers; + std::vector blobCredentials; + + // Returns if TLS setup is successful + bool setupTLS(); + + // Sets up blob crentials. Add the file specified by FDB_BLOB_CREDENTIALS as well. + // Note this must be called after g_network is set up. + void setupBlobCredentials(); +}; + +#endif // FDBBACKUP_BACKUPTLSCONFIG_H diff --git a/fdbbackup/CMakeLists.txt b/fdbbackup/CMakeLists.txt index 1737b9042b..da2457b850 100644 --- a/fdbbackup/CMakeLists.txt +++ b/fdbbackup/CMakeLists.txt @@ -1,5 +1,7 @@ set(FDBBACKUP_SRCS - backup.actor.cpp) + BackupTLSConfig.h + BackupTLSConfig.cpp + backup.actor.cpp) add_flow_target(EXECUTABLE NAME fdbbackup SRCS ${FDBBACKUP_SRCS}) target_link_libraries(fdbbackup PRIVATE fdbclient) @@ -11,6 +13,8 @@ add_flow_target(EXECUTABLE NAME fdbconvert SRCS ${FDBCONVERT_SRCS}) target_link_libraries(fdbconvert PRIVATE fdbclient) set(FDBDECODE_SRCS + BackupTLSConfig.h + BackupTLSConfig.cpp FileDecoder.actor.cpp FileConverter.h) add_flow_target(EXECUTABLE NAME fdbdecode SRCS ${FDBDECODE_SRCS}) @@ -19,14 +23,14 @@ target_link_libraries(fdbdecode PRIVATE fdbclient) if(NOT OPEN_FOR_IDE) if(GENERATE_DEBUG_PACKAGES) fdb_install(TARGETS fdbbackup DESTINATION bin COMPONENT clients) - fdb_install(PROGRAMS $ DESTINATION fdbmonitor COMPONENT clients RENAME backup_agent/backup_agent) + fdb_install(PROGRAMS $ DESTINATION backupagent COMPONENT clients RENAME backup_agent/backup_agent) fdb_install(PROGRAMS $ DESTINATION bin COMPONENT clients RENAME fdbrestore) fdb_install(PROGRAMS $ DESTINATION bin COMPONENT clients RENAME dr_agent) fdb_install(PROGRAMS $ DESTINATION bin COMPONENT clients RENAME fdbdr) else() add_custom_target(prepare_fdbbackup_install ALL DEPENDS strip_only_fdbbackup) fdb_install(PROGRAMS ${CMAKE_BINARY_DIR}/packages/bin/fdbbackup DESTINATION bin COMPONENT clients) - fdb_install(PROGRAMS ${CMAKE_BINARY_DIR}/packages/bin/fdbbackup DESTINATION fdbmonitor COMPONENT clients RENAME backup_agent/backup_agent) + fdb_install(PROGRAMS ${CMAKE_BINARY_DIR}/packages/bin/fdbbackup DESTINATION backupagent COMPONENT clients RENAME backup_agent/backup_agent) fdb_install(PROGRAMS ${CMAKE_BINARY_DIR}/packages/bin/fdbbackup DESTINATION bin COMPONENT clients RENAME fdbrestore) fdb_install(PROGRAMS ${CMAKE_BINARY_DIR}/packages/bin/fdbbackup DESTINATION bin COMPONENT clients RENAME dr_agent) fdb_install(PROGRAMS ${CMAKE_BINARY_DIR}/packages/bin/fdbbackup DESTINATION bin COMPONENT clients RENAME fdbdr) diff --git a/fdbbackup/FileConverter.h b/fdbbackup/FileConverter.h index 0f7bfd6b16..e3890cb476 100644 --- a/fdbbackup/FileConverter.h +++ b/fdbbackup/FileConverter.h @@ -24,6 +24,7 @@ #include #include "flow/SimpleOpt.h" +#include "flow/TLSConfig.actor.h" namespace file_converter { @@ -31,6 +32,7 @@ namespace file_converter { enum { OPT_CONTAINER, OPT_BEGIN_VERSION, + OPT_BLOB_CREDENTIALS, OPT_CRASHONERROR, OPT_END_VERSION, OPT_TRACE, @@ -55,6 +57,10 @@ CSimpleOpt::SOption gConverterOptions[] = { { OPT_CONTAINER, "-r", SO_REQ_SEP }, { OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP }, { OPT_INPUT_FILE, "-i", SO_REQ_SEP }, { OPT_INPUT_FILE, "--input", SO_REQ_SEP }, + { OPT_BLOB_CREDENTIALS, "--blob_credentials", SO_REQ_SEP }, +#ifndef TLS_DISABLED + TLS_OPTION_FLAGS +#endif { OPT_BUILD_FLAGS, "--build_flags", SO_NONE }, { OPT_HELP, "-?", SO_NONE }, { OPT_HELP, "-h", SO_NONE }, diff --git a/fdbbackup/FileDecoder.actor.cpp b/fdbbackup/FileDecoder.actor.cpp index 89f1855365..193564d905 100644 --- a/fdbbackup/FileDecoder.actor.cpp +++ b/fdbbackup/FileDecoder.actor.cpp @@ -22,10 +22,12 @@ #include #include +#include "fdbbackup/BackupTLSConfig.h" #include "fdbclient/BackupAgent.actor.h" #include "fdbclient/BackupContainer.h" #include "fdbbackup/FileConverter.h" #include "fdbclient/MutationList.h" +#include "flow/Trace.h" #include "flow/flow.h" #include "flow/serialize.h" #include "fdbclient/BuildFlags.h" @@ -38,31 +40,52 @@ extern bool g_crashOnError; namespace file_converter { void printDecodeUsage() { - std::cout << "\n" - " -r, --container Container URL.\n" - " -i, --input FILE Log file to be decoded.\n" - " --crash Crash on serious error.\n" - " --build_flags Print build information and exit.\n" - "\n"; + std::cout + << "Decoder for FoundationDB backup mutation logs.\n" + "Usage: fdbdecode [OPTIONS]\n" + " -r, --container URL\n" + " Backup container URL, e.g., file:///some/path/.\n" + " -i, --input FILE\n" + " Log file filter, only matched files are decoded.\n" + " --log Enables trace file logging for the CLI session.\n" + " --logdir PATH Specifes the output directory for trace files. If\n" + " unspecified, defaults to the current directory. Has\n" + " no effect unless --log is specified.\n" + " --loggroup LOG_GROUP\n" + " Sets the LogGroup field with the specified value for all\n" + " events in the trace output (defaults to `default').\n" + " --trace_format FORMAT\n" + " Select the format of the trace files, xml (the default) or json.\n" + " Has no effect unless --log is specified.\n" + " --crash Crash on serious error.\n" + " --blob_credentials FILE\n" + " File containing blob credentials in JSON format.\n" + " The same credential format/file fdbbackup uses.\n" +#ifndef TLS_DISABLED + TLS_HELP +#endif + " --build_flags Print build information and exit.\n" + "\n"; return; } void printBuildInformation() { - printf("%s", jsonBuildInformation().c_str()); + std::cout << jsonBuildInformation() << "\n"; } struct DecodeParams { std::string container_url; - std::string file; + std::string fileFilter; // only files match the filter will be decoded bool log_enabled = false; std::string log_dir, trace_format, trace_log_group; + BackupTLSConfig tlsConfig; std::string toString() { std::string s; s.append("ContainerURL: "); s.append(container_url); - s.append(", File: "); - s.append(file); + s.append(", FileFilter: "); + s.append(fileFilter); if (log_enabled) { if (!log_dir.empty()) { s.append(" LogDir:").append(log_dir); @@ -76,6 +99,8 @@ struct DecodeParams { } return s; } + + }; int parseDecodeCommandLine(DecodeParams* param, CSimpleOpt* args) { @@ -93,7 +118,6 @@ int parseDecodeCommandLine(DecodeParams* param, CSimpleOpt* args) { int optId = args->OptionId(); switch (optId) { case OPT_HELP: - printDecodeUsage(); return FDB_EXIT_ERROR; case OPT_CONTAINER: @@ -105,7 +129,7 @@ int parseDecodeCommandLine(DecodeParams* param, CSimpleOpt* args) { break; case OPT_INPUT_FILE: - param->file = args->OptionArg(); + param->fileFilter = args->OptionArg(); break; case OPT_TRACE: @@ -127,6 +151,37 @@ int parseDecodeCommandLine(DecodeParams* param, CSimpleOpt* args) { case OPT_TRACE_LOG_GROUP: param->trace_log_group = args->OptionArg(); break; + + case OPT_BLOB_CREDENTIALS: + param->tlsConfig.blobCredentials.push_back(args->OptionArg()); + break; + +#ifndef TLS_DISABLED + case TLSConfig::OPT_TLS_PLUGIN: + args->OptionArg(); + break; + + case TLSConfig::OPT_TLS_CERTIFICATES: + param->tlsConfig.tlsCertPath = args->OptionArg(); + break; + + case TLSConfig::OPT_TLS_PASSWORD: + param->tlsConfig.tlsPassword = args->OptionArg(); + break; + + case TLSConfig::OPT_TLS_CA_FILE: + param->tlsConfig.tlsCAPath = args->OptionArg(); + break; + + case TLSConfig::OPT_TLS_KEY: + param->tlsConfig.tlsKeyPath = args->OptionArg(); + break; + + case TLSConfig::OPT_TLS_VERIFY_PEERS: + param->tlsConfig.tlsVerifyPeers = args->OptionArg(); + break; +#endif + case OPT_BUILD_FLAGS: printBuildInformation(); return FDB_EXIT_ERROR; @@ -147,7 +202,7 @@ void printLogFiles(std::string msg, const std::vector& files) { std::vector getRelevantLogFiles(const std::vector& files, const DecodeParams& params) { std::vector filtered; for (const auto& file : files) { - if (file.fileName.find(params.file) != std::string::npos) { + if (file.fileName.find(params.fileFilter) != std::string::npos) { filtered.push_back(file); } } @@ -515,6 +570,11 @@ int main(int argc, char** argv) { } } + if (!param.tlsConfig.setupTLS()) { + TraceEvent(SevError, "TLSError"); + throw tls_error(); + } + platformInit(); Error::init(); @@ -523,13 +583,14 @@ int main(int argc, char** argv) { TraceEvent::setNetworkThread(); openTraceFile(NetworkAddress(), 10 << 20, 10 << 20, param.log_dir, "decode", param.trace_log_group); + param.tlsConfig.setupBlobCredentials(); auto f = stopAfter(decode_logs(param)); runNetwork(); return status; } catch (Error& e) { - fprintf(stderr, "ERROR: %s\n", e.what()); + std::cerr << "ERROR: " << e.what() << "\n"; return FDB_EXIT_ERROR; } catch (std::exception& e) { TraceEvent(SevError, "MainError").error(unknown_error()).detail("RootException", e.what()); diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp index 7614324afc..77e4b03f0d 100644 --- a/fdbbackup/backup.actor.cpp +++ b/fdbbackup/backup.actor.cpp @@ -18,6 +18,7 @@ * limitations under the License. */ +#include "fdbbackup/BackupTLSConfig.h" #include "fdbclient/JsonBuilder.h" #include "flow/Arena.h" #include "flow/Error.h" @@ -105,6 +106,7 @@ enum { // Backup constants OPT_DESTCONTAINER, OPT_SNAPSHOTINTERVAL, + OPT_INITIAL_SNAPSHOT_INTERVAL, OPT_ERRORLIMIT, OPT_NOSTOPWHENDONE, OPT_EXPIRE_BEFORE_VERSION, @@ -144,6 +146,7 @@ enum { OPT_RESTORE_CLUSTERFILE_DEST, OPT_RESTORE_CLUSTERFILE_ORIG, OPT_RESTORE_BEGIN_VERSION, + OPT_RESTORE_INCONSISTENT_SNAPSHOT_ONLY, // Shared constants OPT_CLUSTERFILE, @@ -232,6 +235,7 @@ CSimpleOpt::SOption g_rgBackupStartOptions[] = { { OPT_USE_PARTITIONED_LOG, "--partitioned_log_experimental", SO_NONE }, { OPT_SNAPSHOTINTERVAL, "-s", SO_REQ_SEP }, { OPT_SNAPSHOTINTERVAL, "--snapshot_interval", SO_REQ_SEP }, + { OPT_INITIAL_SNAPSHOT_INTERVAL, "--initial_snapshot_interval", SO_REQ_SEP }, { OPT_TAGNAME, "-t", SO_REQ_SEP }, { OPT_TAGNAME, "--tagname", SO_REQ_SEP }, { OPT_BACKUPKEYS, "-k", SO_REQ_SEP }, @@ -691,6 +695,7 @@ CSimpleOpt::SOption g_rgRestoreOptions[] = { { OPT_BLOB_CREDENTIALS, "--blob_credentials", SO_REQ_SEP }, { OPT_INCREMENTALONLY, "--incremental", SO_NONE }, { OPT_RESTORE_BEGIN_VERSION, "--begin_version", SO_REQ_SEP }, + { OPT_RESTORE_INCONSISTENT_SNAPSHOT_ONLY, "--inconsistent_snapshot_only", SO_NONE }, #ifndef TLS_DISABLED TLS_OPTION_FLAGS #endif @@ -1571,7 +1576,7 @@ ACTOR Future getLayerStatus(Reference tr state Reference tr2(new ReadYourWritesTransaction(dest)); tr2->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr2->setOption(FDBTransactionOptions::LOCK_AWARE); - state Standalone tagNames = wait(tr2->getRange(dba.tagNames.range(), 10000, snapshot)); + state RangeResult tagNames = wait(tr2->getRange(dba.tagNames.range(), 10000, snapshot)); state std::vector>> backupVersion; state std::vector> backupStatus; state std::vector> tagRangeBytesDR; @@ -1633,7 +1638,7 @@ ACTOR Future cleanupStatus(Reference tr, std::string name, std::string id, int limit = 1) { - state Standalone docs = wait(tr->getRange(KeyRangeRef(rootKey, strinc(rootKey)), limit, true)); + state RangeResult docs = wait(tr->getRange(KeyRangeRef(rootKey, strinc(rootKey)), limit, true)); state bool readMore = false; state int i; for (i = 0; i < docs.size(); ++i) { @@ -1662,7 +1667,7 @@ ACTOR Future cleanupStatus(Reference tr, } if (readMore) { limit = 10000; - Standalone docs2 = wait(tr->getRange(KeyRangeRef(rootKey, strinc(rootKey)), limit, true)); + RangeResult docs2 = wait(tr->getRange(KeyRangeRef(rootKey, strinc(rootKey)), limit, true)); docs = std::move(docs2); readMore = false; } @@ -1679,7 +1684,7 @@ ACTOR Future getLayerStatus(Database src, std::string root try { tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr.setOption(FDBTransactionOptions::LOCK_AWARE); - state Standalone kvPairs = + state RangeResult kvPairs = wait(tr.getRange(KeyRangeRef(rootKey, strinc(rootKey)), GetRangeLimits::ROW_LIMIT_UNLIMITED)); json_spirit::mObject statusDoc; JSONDoc modifier(statusDoc); @@ -1879,6 +1884,7 @@ ACTOR Future submitDBBackup(Database src, ACTOR Future submitBackup(Database db, std::string url, + int initialSnapshotIntervalSeconds, int snapshotIntervalSeconds, Standalone> backupRanges, std::string tagName, @@ -1935,6 +1941,7 @@ ACTOR Future submitBackup(Database db, else { wait(backupAgent.submitBackup(db, KeyRef(url), + initialSnapshotIntervalSeconds, snapshotIntervalSeconds, tagName, backupRanges, @@ -2251,7 +2258,8 @@ ACTOR Future runRestore(Database db, bool waitForDone, std::string addPrefix, std::string removePrefix, - bool incrementalBackupOnly) { + bool onlyAppyMutationLogs, + bool inconsistentSnapshotOnly) { if (ranges.empty()) { ranges.push_back_deep(ranges.arena(), normalKeys); } @@ -2297,7 +2305,7 @@ ACTOR Future runRestore(Database db, BackupDescription desc = wait(bc->describeBackup()); - if (incrementalBackupOnly && desc.contiguousLogEnd.present()) { + if (onlyAppyMutationLogs && desc.contiguousLogEnd.present()) { targetVersion = desc.contiguousLogEnd.get() - 1; } else if (desc.maxRestorableVersion.present()) { targetVersion = desc.maxRestorableVersion.get(); @@ -2322,7 +2330,8 @@ ACTOR Future runRestore(Database db, KeyRef(addPrefix), KeyRef(removePrefix), true, - incrementalBackupOnly, + onlyAppyMutationLogs, + inconsistentSnapshotOnly, beginVersion)); if (waitForDone && verbose) { @@ -3212,6 +3221,8 @@ int main(int argc, char* argv[]) { std::string destinationContainer; bool describeDeep = false; bool describeTimestamps = false; + int initialSnapshotIntervalSeconds = + 0; // The initial snapshot has a desired duration of 0, meaning go as fast as possible. int snapshotIntervalSeconds = CLIENT_KNOBS->BACKUP_DEFAULT_SNAPSHOT_INTERVAL_SEC; std::string clusterFile; std::string sourceClusterFile; @@ -3236,6 +3247,8 @@ int main(int argc, char* argv[]) { bool stopWhenDone = true; bool usePartitionedLog = false; // Set to true to use new backup system bool incrementalBackupOnly = false; + bool onlyAppyMutationLogs = false; + bool inconsistentSnapshotOnly = false; bool forceAction = false; bool trace = false; bool quietDisplay = false; @@ -3251,8 +3264,7 @@ int main(int argc, char* argv[]) { LocalityData localities; uint64_t memLimit = 8LL << 30; Optional ti; - std::vector blobCredentials; - std::string tlsCertPath, tlsKeyPath, tlsCAPath, tlsPassword, tlsVerifyPeers; + BackupTLSConfig tlsConfig; Version dumpBegin = 0; Version dumpEnd = std::numeric_limits::max(); std::string restoreClusterFileDest; @@ -3467,6 +3479,7 @@ int main(int argc, char* argv[]) { modifyOptions.destURL = destinationContainer; break; case OPT_SNAPSHOTINTERVAL: + case OPT_INITIAL_SNAPSHOT_INTERVAL: case OPT_MOD_ACTIVE_INTERVAL: { const char* a = args->OptionArg(); int seconds; @@ -3478,6 +3491,8 @@ int main(int argc, char* argv[]) { if (optId == OPT_SNAPSHOTINTERVAL) { snapshotIntervalSeconds = seconds; modifyOptions.snapshotIntervalSeconds = seconds; + } else if (optId == OPT_INITIAL_SNAPSHOT_INTERVAL) { + initialSnapshotIntervalSeconds = seconds; } else if (optId == OPT_MOD_ACTIVE_INTERVAL) { modifyOptions.activeSnapshotIntervalSeconds = seconds; } @@ -3497,6 +3512,7 @@ int main(int argc, char* argv[]) { break; case OPT_INCREMENTALONLY: incrementalBackupOnly = true; + onlyAppyMutationLogs = true; break; case OPT_RESTORECONTAINER: restoreContainer = args->OptionArg(); @@ -3547,6 +3563,10 @@ int main(int argc, char* argv[]) { restoreVersion = ver; break; } + case OPT_RESTORE_INCONSISTENT_SNAPSHOT_ONLY: { + inconsistentSnapshotOnly = true; + break; + } #ifdef _WIN32 case OPT_PARENTPID: { auto pid_str = args->OptionArg(); @@ -3578,26 +3598,26 @@ int main(int argc, char* argv[]) { memLimit = ti.get(); break; case OPT_BLOB_CREDENTIALS: - blobCredentials.push_back(args->OptionArg()); + tlsConfig.blobCredentials.push_back(args->OptionArg()); break; #ifndef TLS_DISABLED case TLSConfig::OPT_TLS_PLUGIN: args->OptionArg(); break; case TLSConfig::OPT_TLS_CERTIFICATES: - tlsCertPath = args->OptionArg(); + tlsConfig.tlsCertPath = args->OptionArg(); break; case TLSConfig::OPT_TLS_PASSWORD: - tlsPassword = args->OptionArg(); + tlsConfig.tlsPassword = args->OptionArg(); break; case TLSConfig::OPT_TLS_CA_FILE: - tlsCAPath = args->OptionArg(); + tlsConfig.tlsCAPath = args->OptionArg(); break; case TLSConfig::OPT_TLS_KEY: - tlsKeyPath = args->OptionArg(); + tlsConfig.tlsKeyPath = args->OptionArg(); break; case TLSConfig::OPT_TLS_VERIFY_PEERS: - tlsVerifyPeers = args->OptionArg(); + tlsConfig.tlsVerifyPeers = args->OptionArg(); break; #endif case OPT_DUMP_BEGIN: @@ -3731,42 +3751,8 @@ int main(int argc, char* argv[]) { setNetworkOption(FDBNetworkOptions::DISABLE_CLIENT_STATISTICS_LOGGING); // deferred TLS options - if (tlsCertPath.size()) { - try { - setNetworkOption(FDBNetworkOptions::TLS_CERT_PATH, tlsCertPath); - } catch (Error& e) { - fprintf(stderr, "ERROR: cannot set TLS certificate path to `%s' (%s)\n", tlsCertPath.c_str(), e.what()); - return 1; - } - } - - if (tlsCAPath.size()) { - try { - setNetworkOption(FDBNetworkOptions::TLS_CA_PATH, tlsCAPath); - } catch (Error& e) { - fprintf(stderr, "ERROR: cannot set TLS CA path to `%s' (%s)\n", tlsCAPath.c_str(), e.what()); - return 1; - } - } - if (tlsKeyPath.size()) { - try { - if (tlsPassword.size()) - setNetworkOption(FDBNetworkOptions::TLS_PASSWORD, tlsPassword); - - setNetworkOption(FDBNetworkOptions::TLS_KEY_PATH, tlsKeyPath); - } catch (Error& e) { - fprintf(stderr, "ERROR: cannot set TLS key path to `%s' (%s)\n", tlsKeyPath.c_str(), e.what()); - return 1; - } - } - if (tlsVerifyPeers.size()) { - try { - setNetworkOption(FDBNetworkOptions::TLS_VERIFY_PEERS, tlsVerifyPeers); - } catch (Error& e) { - fprintf( - stderr, "ERROR: cannot set TLS peer verification to `%s' (%s)\n", tlsVerifyPeers.c_str(), e.what()); - return 1; - } + if (!tlsConfig.setupTLS()) { + return 1; } Error::init(); @@ -3806,25 +3792,8 @@ int main(int argc, char* argv[]) { // are logged. This thread will eventually run the network, so call it now. TraceEvent::setNetworkThread(); - // Add blob credentials files from the environment to the list collected from the command line. - const char* blobCredsFromENV = getenv("FDB_BLOB_CREDENTIALS"); - if (blobCredsFromENV != nullptr) { - StringRef t((uint8_t*)blobCredsFromENV, strlen(blobCredsFromENV)); - do { - StringRef file = t.eat(":"); - if (file.size() != 0) - blobCredentials.push_back(file.toString()); - } while (t.size() != 0); - } - - // Update the global blob credential files list - std::vector* pFiles = - (std::vector*)g_network->global(INetwork::enBlobCredentialFiles); - if (pFiles != nullptr) { - for (auto& f : blobCredentials) { - pFiles->push_back(f); - } - } + // Sets up blob credentials, including one from the environment FDB_BLOB_CREDENTIALS. + tlsConfig.setupBlobCredentials(); // Opens a trace file if trace is set (and if a trace file isn't already open) // For most modes, initCluster() will open a trace file, but some fdbbackup operations do not require @@ -3888,6 +3857,7 @@ int main(int argc, char* argv[]) { openBackupContainer(argv[0], destinationContainer); f = stopAfter(submitBackup(db, destinationContainer, + initialSnapshotIntervalSeconds, snapshotIntervalSeconds, backupKeys, tagName, @@ -4064,7 +4034,8 @@ int main(int argc, char* argv[]) { waitForDone, addPrefix, removePrefix, - incrementalBackupOnly)); + onlyAppyMutationLogs, + inconsistentSnapshotOnly)); break; case RestoreType::WAIT: f = stopAfter(success(ba.waitRestore(db, KeyRef(tagName), true))); @@ -4275,4 +4246,4 @@ int main(int argc, char* argv[]) { } flushAndExit(status); -} \ No newline at end of file +} diff --git a/fdbcli/CMakeLists.txt b/fdbcli/CMakeLists.txt index 2b65baf040..d0cab5b178 100644 --- a/fdbcli/CMakeLists.txt +++ b/fdbcli/CMakeLists.txt @@ -1,7 +1,10 @@ set(FDBCLI_SRCS fdbcli.actor.cpp + fdbcli.actor.h + ConsistencyCheckCommand.actor.cpp FlowLineNoise.actor.cpp FlowLineNoise.h + Util.cpp linenoise/linenoise.h) if(NOT WIN32) diff --git a/fdbcli/ConsistencyCheckCommand.actor.cpp b/fdbcli/ConsistencyCheckCommand.actor.cpp new file mode 100644 index 0000000000..892acbb239 --- /dev/null +++ b/fdbcli/ConsistencyCheckCommand.actor.cpp @@ -0,0 +1,63 @@ +/* + * ConsistencyCheckCommand.actor.cpp + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2021 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fdbcli/fdbcli.actor.h" + +#include "fdbclient/FDBOptions.g.h" +#include "fdbclient/IClientApi.h" + +#include "flow/Arena.h" +#include "flow/FastRef.h" +#include "flow/ThreadHelper.actor.h" +#include "flow/actorcompiler.h" // This must be the last #include. + +namespace fdb_cli { + +const KeyRef consistencyCheckSpecialKey = LiteralStringRef("\xff\xff/management/consistency_check_suspended"); + +ACTOR Future consistencyCheckCommandActor(Reference tr, std::vector tokens) { + // Here we do not proceed in a try-catch loop since the transaction is always supposed to succeed. + // If not, the outer loop catch block(fdbcli.actor.cpp) will handle the error and print out the error message + tr->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES); + if (tokens.size() == 1) { + Optional suspended = wait(safeThreadFutureToFuture(tr->get(consistencyCheckSpecialKey))); + printf("ConsistencyCheck is %s\n", suspended.present() ? "off" : "on"); + } else if (tokens.size() == 2 && tokencmp(tokens[1], "off")) { + tr->set(consistencyCheckSpecialKey, Value()); + wait(safeThreadFutureToFuture(tr->commit())); + } else if (tokens.size() == 2 && tokencmp(tokens[1], "on")) { + tr->clear(consistencyCheckSpecialKey); + wait(safeThreadFutureToFuture(tr->commit())); + } else { + printUsage(tokens[0]); + return false; + } + return true; +} + +CommandFactory consistencyCheckFactory( + "consistencycheck", + CommandHelp( + "consistencycheck [on|off]", + "permits or prevents consistency checking", + "Calling this command with `on' permits consistency check processes to run and `off' will halt their checking. " + "Calling this command with no arguments will display if consistency checking is currently allowed.\n")); + +} // namespace fdb_cli diff --git a/flow/ThreadHelper.cpp b/fdbcli/Util.cpp similarity index 50% rename from flow/ThreadHelper.cpp rename to fdbcli/Util.cpp index fe61752ea5..f67f27c774 100644 --- a/flow/ThreadHelper.cpp +++ b/fdbcli/Util.cpp @@ -1,9 +1,9 @@ /* - * ThreadHelper.cpp + * Util.cpp * * This source file is part of the FoundationDB open source project * - * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors + * Copyright 2013-2021 Apple Inc. and the FoundationDB project authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,26 @@ * limitations under the License. */ -#include "flow/ThreadHelper.actor.h" +#include "fdbcli/fdbcli.actor.h" -ThreadCallback* ThreadCallback::addCallback(ThreadCallback* cb) { - return (new ThreadMultiCallback())->addCallback(this)->addCallback(cb); +#include "flow/Arena.h" + +namespace fdb_cli { + +bool tokencmp(StringRef token, const char* command) { + if (token.size() != strlen(command)) + return false; + + return !memcmp(token.begin(), command, token.size()); } + +void printUsage(StringRef command) { + const auto& helpMap = CommandFactory::commands(); + auto i = helpMap.find(command.toString()); + if (i != helpMap.end()) + printf("Usage: %s\n", i->second.usage.c_str()); + else + fprintf(stderr, "ERROR: Unknown command `%s'\n", command.toString().c_str()); +} + +} // namespace fdb_cli diff --git a/fdbcli/fdbcli.actor.cpp b/fdbcli/fdbcli.actor.cpp index 677d431ce9..101bbcf9d5 100644 --- a/fdbcli/fdbcli.actor.cpp +++ b/fdbcli/fdbcli.actor.cpp @@ -21,9 +21,12 @@ #include "boost/lexical_cast.hpp" #include "fdbclient/NativeAPI.actor.h" #include "fdbclient/FDBTypes.h" +#include "fdbclient/IClientApi.h" +#include "fdbclient/MultiVersionTransaction.h" #include "fdbclient/Status.h" #include "fdbclient/StatusClient.h" #include "fdbclient/DatabaseContext.h" +#include "fdbclient/GlobalConfig.actor.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbclient/ReadYourWrites.h" #include "fdbclient/ClusterInterface.h" @@ -34,6 +37,7 @@ #include "fdbclient/TagThrottle.h" #include "fdbclient/Tuple.h" +#include "fdbclient/ThreadSafeTransaction.h" #include "flow/DeterministicRandom.h" #include "flow/Platform.h" @@ -41,6 +45,7 @@ #include "flow/SimpleOpt.h" #include "fdbcli/FlowLineNoise.h" +#include "fdbcli/fdbcli.actor.h" #include #include @@ -56,6 +61,13 @@ #include "flow/actorcompiler.h" // This must be the last #include. +#define FDB_API_VERSION 710 +/* + * While we could just use the MultiVersionApi instance directly, this #define allows us to swap in any other IClientApi + * instance (e.g. from ThreadSafeApi) + */ +#define API ((IClientApi*)MultiVersionApi::api) + extern const char* getSourceVersion(); std::vector validOptions; @@ -173,6 +185,13 @@ public: } } + // TODO: replace the above function after we refactor all fdbcli code + void apply(Reference tr) { + for (const auto& [name, value] : transactionOptions.options) { + tr->setOption(name, value.castTo()); + } + } + // Returns true if any options have been set bool hasAnyOptionsEnabled() const { return !transactionOptions.options.empty(); } @@ -320,13 +339,6 @@ static std::string formatStringRef(StringRef item, bool fullEscaping = false) { return ret; } -static bool tokencmp(StringRef token, const char* command) { - if (token.size() != strlen(command)) - return false; - - return !memcmp(token.begin(), command, token.size()); -} - static std::vector> parseLine(std::string& line, bool& err, bool& partial) { err = false; partial = false; @@ -453,20 +465,13 @@ static void printProgramUsage(const char* name) { " -h, --help Display this help and exit.\n"); } -struct CommandHelp { - std::string usage; - std::string short_desc; - std::string long_desc; - CommandHelp() {} - CommandHelp(const char* u, const char* s, const char* l) : usage(u), short_desc(s), long_desc(l) {} -}; - -std::map helpMap; -std::set hiddenCommands; - #define ESCAPINGK "\n\nFor information on escaping keys, type `help escaping'." #define ESCAPINGKV "\n\nFor information on escaping keys and values, type `help escaping'." +using namespace fdb_cli; +std::map& helpMap = CommandFactory::commands(); +std::set& hiddenCommands = CommandFactory::hiddenCommands(); + void initHelp() { helpMap["begin"] = CommandHelp("begin", @@ -492,11 +497,15 @@ void initHelp() { helpMap["configure"] = CommandHelp( "configure [new] " "|" - "commit_proxies=|grv_proxies=|logs=|resolvers=>*", + "commit_proxies=|grv_proxies=|logs=|resolvers=>*|" + "perpetual_storage_wiggle=", "change the database configuration", "The `new' option, if present, initializes a new database with the given configuration rather than changing " "the configuration of an existing one. When used, both a redundancy mode and a storage engine must be " - "specified.\n\nRedundancy mode:\n single - one copy of the data. Not fault tolerant.\n double - two copies " + "specified.\n\ntss: when enabled, configures the testing storage server for the cluster instead." + "When used with new to set up tss for the first time, it requires both a count and a storage engine." + "To disable the testing storage server, run \"configure tss count=0\"\n\n" + "Redundancy mode:\n single - one copy of the data. Not fault tolerant.\n double - two copies " "of data (survive one failure).\n triple - three copies of data (survive two failures).\n three_data_hall - " "See the Admin Guide.\n three_datacenter - See the Admin Guide.\n\nStorage engine:\n ssd - B-Tree storage " "engine optimized for solid state disks.\n memory - Durable in-memory storage engine for small " @@ -513,8 +522,11 @@ void initHelp() { "1, or set to -1 which restores the number of GRV proxies to the default value.\n\nlogs=: Sets the " "desired number of log servers in the cluster. Must be at least 1, or set to -1 which restores the number of " "logs to the default value.\n\nresolvers=: Sets the desired number of resolvers in the cluster. " - "Must be at least 1, or set to -1 which restores the number of resolvers to the default value.\n\nSee the " - "FoundationDB Administration Guide for more information."); + "Must be at least 1, or set to -1 which restores the number of resolvers to the default value.\n\n" + "perpetual_storage_wiggle=: Set the value speed (a.k.a., the number of processes that the Data " + "Distributor should wiggle at a time). Currently, only 0 and 1 are supported. The value 0 means to disable the " + "perpetual storage wiggle.\n\n" + "See the FoundationDB Administration Guide for more information."); helpMap["fileconfigure"] = CommandHelp( "fileconfigure [new] ", "change the database configuration from a file", @@ -650,11 +662,6 @@ void initHelp() { "SECONDS have elapsed, or after a storage server with a different ZONEID fails. Only one ZONEID can be marked " "for maintenance. Calling this command with no arguments will display any ongoing maintenance. Calling this " "command with `off' will disable maintenance.\n"); - helpMap["consistencycheck"] = CommandHelp( - "consistencycheck [on|off]", - "permits or prevents consistency checking", - "Calling this command with `on' permits consistency check processes to run and `off' will halt their checking. " - "Calling this command with no arguments will display if consistency checking is currently allowed.\n"); helpMap["throttle"] = CommandHelp("throttle [ARGS]", "view and control throttled tags", @@ -720,14 +727,6 @@ void printHelp(StringRef command) { printf("I don't know anything about `%s'\n", formatStringRef(command).c_str()); } -void printUsage(StringRef command) { - auto i = helpMap.find(command.toString()); - if (i != helpMap.end()) - printf("Usage: %s\n", i->second.usage.c_str()); - else - fprintf(stderr, "ERROR: Unknown command `%s'\n", command.toString().c_str()); -} - std::string getCoordinatorsInfoString(StatusObjectReader statusObj) { std::string outputString; try { @@ -1133,6 +1132,17 @@ void printStatus(StatusObjectReader statusObj, if (statusObjConfig.get("log_routers", intVal)) outputString += format("\n Desired Log Routers - %d", intVal); + if (statusObjConfig.get("tss_count", intVal) && intVal > 0) { + int activeTss = 0; + if (statusObjCluster.has("active_tss_count")) { + statusObjCluster.get("active_tss_count", activeTss); + } + outputString += format("\n TSS - %d/%d", activeTss, intVal); + + if (statusObjConfig.get("tss_storage_engine", strVal)) + outputString += format("\n TSS Storage Engine - %s", strVal.c_str()); + } + outputString += "\n Usable Regions - "; if (statusObjConfig.get("usable_regions", intVal)) { outputString += std::to_string(intVal); @@ -2670,6 +2680,27 @@ Reference getTransaction(Database db, return tr; } +// TODO: Update the function to get rid of Database and ReadYourWritesTransaction after refactoring +// The original ReadYourWritesTransaciton handle "tr" is needed as some commands can be called inside a +// transaction and "tr" holds the pointer to the ongoing transaction object. As it's not easy to get ride of "tr" in +// one shot and we are refactoring the code to use Reference (tr2), we need to let "tr2" point to the same +// underlying transaction like "tr". Thus everytime we need to use "tr2", we first update "tr" and let "tr2" points to +// "tr1". "tr2" is always having the same lifetime as "tr1" +Reference getTransaction(Database db, + Reference& tr, + Reference& tr2, + FdbOptions* options, + bool intrans) { + // Update "tr" to point to a brand new transaction object when it's not initialized or "intrans" flag is "false", + // which indicates we need a new transaction object + if (!tr || !intrans) { + tr = makeReference(db); + options->apply(tr); + } + tr2 = Reference(new ThreadSafeTransaction(tr.getPtr())); + return tr2; +} + std::string newCompletion(const char* base, const char* name) { return format("%s%s ", base, name); } @@ -2754,6 +2785,7 @@ void configureGenerator(const char* text, const char* line, std::vectorgetFilename().c_str()); + return 1; + } + if (opt.trace) { TraceEvent("CLIProgramStart") .setMaxEventLength(12000) @@ -3633,7 +3677,7 @@ ACTOR Future cli(CLIOptions opt, LineNoise* plinenoise) { if (tokencmp(tokens[0], "kill")) { getTransaction(db, tr, options, intrans); if (tokens.size() == 1) { - Standalone kvs = wait( + RangeResult kvs = wait( makeInterruptable(tr->getRange(KeyRangeRef(LiteralStringRef("\xff\xff/worker_interfaces/"), LiteralStringRef("\xff\xff/worker_interfaces0")), CLIENT_KNOBS->TOO_MANY))); @@ -3700,7 +3744,7 @@ ACTOR Future cli(CLIOptions opt, LineNoise* plinenoise) { if (tokencmp(tokens[0], "suspend")) { getTransaction(db, tr, options, intrans); if (tokens.size() == 1) { - Standalone kvs = wait( + RangeResult kvs = wait( makeInterruptable(tr->getRange(KeyRangeRef(LiteralStringRef("\xff\xff/worker_interfaces/"), LiteralStringRef("\xff\xff/worker_interfaces0")), CLIENT_KNOBS->TOO_MANY))); @@ -3796,29 +3840,9 @@ ACTOR Future cli(CLIOptions opt, LineNoise* plinenoise) { } if (tokencmp(tokens[0], "consistencycheck")) { - getTransaction(db, tr, options, intrans); - tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); - tr->setOption(FDBTransactionOptions::LOCK_AWARE); - if (tokens.size() == 1) { - state Future>> ccSuspendSettingFuture = - tr->get(fdbShouldConsistencyCheckBeSuspended); - wait(makeInterruptable(success(ccSuspendSettingFuture))); - bool ccSuspendSetting = - ccSuspendSettingFuture.get().present() - ? BinaryReader::fromStringRef(ccSuspendSettingFuture.get().get(), Unversioned()) - : false; - printf("ConsistencyCheck is %s\n", ccSuspendSetting ? "off" : "on"); - } else if (tokens.size() == 2 && tokencmp(tokens[1], "off")) { - tr->set(fdbShouldConsistencyCheckBeSuspended, BinaryWriter::toValue(true, Unversioned())); - wait(commitTransaction(tr)); - } else if (tokens.size() == 2 && tokencmp(tokens[1], "on")) { - tr->set(fdbShouldConsistencyCheckBeSuspended, BinaryWriter::toValue(false, Unversioned())); - wait(commitTransaction(tr)); - } else { - printUsage(tokens[0]); - is_error = true; - } + getTransaction(db, tr, tr2, options, intrans); + bool _result = wait(consistencyCheckCommandActor(tr2, tokens)); + is_error = !_result; continue; } @@ -3842,25 +3866,16 @@ ACTOR Future cli(CLIOptions opt, LineNoise* plinenoise) { is_error = true; continue; } - state Future>> sampleRateFuture = - tr->get(fdbClientInfoTxnSampleRate); - state Future>> sizeLimitFuture = - tr->get(fdbClientInfoTxnSizeLimit); - wait(makeInterruptable(success(sampleRateFuture) && success(sizeLimitFuture))); + const double sampleRateDbl = GlobalConfig::globalConfig().get( + fdbClientInfoTxnSampleRate, std::numeric_limits::infinity()); + const int64_t sizeLimit = + GlobalConfig::globalConfig().get(fdbClientInfoTxnSizeLimit, -1); std::string sampleRateStr = "default", sizeLimitStr = "default"; - if (sampleRateFuture.get().present()) { - const double sampleRateDbl = - BinaryReader::fromStringRef(sampleRateFuture.get().get(), Unversioned()); - if (!std::isinf(sampleRateDbl)) { - sampleRateStr = boost::lexical_cast(sampleRateDbl); - } + if (!std::isinf(sampleRateDbl)) { + sampleRateStr = boost::lexical_cast(sampleRateDbl); } - if (sizeLimitFuture.get().present()) { - const int64_t sizeLimit = - BinaryReader::fromStringRef(sizeLimitFuture.get().get(), Unversioned()); - if (sizeLimit != -1) { - sizeLimitStr = boost::lexical_cast(sizeLimit); - } + if (sizeLimit != -1) { + sizeLimitStr = boost::lexical_cast(sizeLimit); } printf("Client profiling rate is set to %s and size limit is set to %s.\n", sampleRateStr.c_str(), @@ -3898,8 +3913,12 @@ ACTOR Future cli(CLIOptions opt, LineNoise* plinenoise) { continue; } } - tr->set(fdbClientInfoTxnSampleRate, BinaryWriter::toValue(sampleRate, Unversioned())); - tr->set(fdbClientInfoTxnSizeLimit, BinaryWriter::toValue(sizeLimit, Unversioned())); + + Tuple rate = Tuple().appendDouble(sampleRate); + Tuple size = Tuple().append(sizeLimit); + tr->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES); + tr->set(GlobalConfig::prefixedKey(fdbClientInfoTxnSampleRate), rate.pack()); + tr->set(GlobalConfig::prefixedKey(fdbClientInfoTxnSizeLimit), size.pack()); if (!intrans) { wait(commitTransaction(tr)); } @@ -3916,7 +3935,7 @@ ACTOR Future cli(CLIOptions opt, LineNoise* plinenoise) { continue; } getTransaction(db, tr, options, intrans); - Standalone kvs = wait( + RangeResult kvs = wait( makeInterruptable(tr->getRange(KeyRangeRef(LiteralStringRef("\xff\xff/worker_interfaces/"), LiteralStringRef("\xff\xff/worker_interfaces0")), CLIENT_KNOBS->TOO_MANY))); @@ -3945,7 +3964,7 @@ ACTOR Future cli(CLIOptions opt, LineNoise* plinenoise) { continue; } getTransaction(db, tr, options, intrans); - Standalone kvs = wait(makeInterruptable( + RangeResult kvs = wait(makeInterruptable( tr->getRange(KeyRangeRef(LiteralStringRef("\xff\xff/worker_interfaces/"), LiteralStringRef("\xff\xff/worker_interfaces0")), CLIENT_KNOBS->TOO_MANY))); @@ -4024,7 +4043,7 @@ ACTOR Future cli(CLIOptions opt, LineNoise* plinenoise) { continue; } getTransaction(db, tr, options, intrans); - Standalone kvs = wait( + RangeResult kvs = wait( makeInterruptable(tr->getRange(KeyRangeRef(LiteralStringRef("\xff\xff/worker_interfaces/"), LiteralStringRef("\xff\xff/worker_interfaces0")), CLIENT_KNOBS->TOO_MANY))); @@ -4066,7 +4085,7 @@ ACTOR Future cli(CLIOptions opt, LineNoise* plinenoise) { if (tokencmp(tokens[0], "expensive_data_check")) { getTransaction(db, tr, options, intrans); if (tokens.size() == 1) { - Standalone kvs = wait( + RangeResult kvs = wait( makeInterruptable(tr->getRange(KeyRangeRef(LiteralStringRef("\xff\xff/worker_interfaces/"), LiteralStringRef("\xff\xff/worker_interfaces0")), CLIENT_KNOBS->TOO_MANY))); @@ -4182,7 +4201,7 @@ ACTOR Future cli(CLIOptions opt, LineNoise* plinenoise) { endKey = strinc(tokens[1]); } - Standalone kvs = wait(makeInterruptable( + RangeResult kvs = wait(makeInterruptable( getTransaction(db, tr, options, intrans)->getRange(KeyRangeRef(tokens[1], endKey), limit))); printf("\nRange limited to %d keys\n", limit); @@ -4910,7 +4929,9 @@ int main(int argc, char** argv) { } try { - setupNetwork(); + // Note: refactoring fdbcli, in progress + API->selectApiVersion(FDB_API_VERSION); + API->setupNetwork(); Future cliFuture = runCli(opt); Future timeoutFuture = opt.exit_timeout ? timeExit(opt.exit_timeout) : Never(); auto f = stopNetworkAfter(success(cliFuture) || timeoutFuture); diff --git a/fdbcli/fdbcli.actor.h b/fdbcli/fdbcli.actor.h new file mode 100644 index 0000000000..ceae1263c2 --- /dev/null +++ b/fdbcli/fdbcli.actor.h @@ -0,0 +1,78 @@ +/* + * fdbcli.actor.h + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2021 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +// When actually compiled (NO_INTELLISENSE), include the generated +// version of this file. In intellisense use the source version. +#if defined(NO_INTELLISENSE) && !defined(FDBCLI_FDBCLI_ACTOR_G_H) +#define FDBCLI_FDBCLI_ACTOR_G_H +#include "fdbcli/fdbcli.actor.g.h" +#elif !defined(FDBCLI_FDBCLI_ACTOR_H) +#define FDBCLI_FDBCLI_ACTOR_H + +#include "fdbclient/IClientApi.h" +#include "flow/Arena.h" + +#include "flow/actorcompiler.h" // This must be the last #include. + +namespace fdb_cli { + +struct CommandHelp { + std::string usage; + std::string short_desc; + std::string long_desc; + CommandHelp() {} + CommandHelp(const char* u, const char* s, const char* l) : usage(u), short_desc(s), long_desc(l) {} +}; + +struct CommandFactory { + CommandFactory(const char* name, CommandHelp help) { commands()[name] = help; } + CommandFactory(const char* name) { hiddenCommands().insert(name); } + static std::map& commands() { + static std::map helpMap; + return helpMap; + } + static std::set& hiddenCommands() { + static std::set commands; + return commands; + } +}; + +// Special keys used by fdbcli commands + +// consistencycheck +extern const KeyRef consistencyCheckSpecialKey; + +// help functions (Copied from fdbcli.actor.cpp) + +// compare StringRef with the given c string +bool tokencmp(StringRef token, const char* command); +// print the usage of the specified command +void printUsage(StringRef command); + +// All fdbcli commands (alphabetically) +// consistency command +ACTOR Future consistencyCheckCommandActor(Reference tr, std::vector tokens); + +} // namespace fdb_cli + +#include "flow/unactorcompiler.h" +#endif diff --git a/fdbclient/BackupAgent.actor.h b/fdbclient/BackupAgent.actor.h index fb8f6b1564..c8903b9fe4 100644 --- a/fdbclient/BackupAgent.actor.h +++ b/fdbclient/BackupAgent.actor.h @@ -294,7 +294,8 @@ public: Key addPrefix = Key(), Key removePrefix = Key(), bool lockDB = true, - bool incrementalBackupOnly = false, + bool onlyAppyMutationLogs = false, + bool inconsistentSnapshotOnly = false, Version beginVersion = -1); Future restore(Database cx, Optional cxOrig, @@ -307,7 +308,8 @@ public: Key addPrefix = Key(), Key removePrefix = Key(), bool lockDB = true, - bool incrementalBackupOnly = false, + bool onlyAppyMutationLogs = false, + bool inconsistentSnapshotOnly = false, Version beginVersion = -1) { Standalone> rangeRef; rangeRef.push_back_deep(rangeRef.arena(), range); @@ -322,7 +324,8 @@ public: addPrefix, removePrefix, lockDB, - incrementalBackupOnly, + onlyAppyMutationLogs, + inconsistentSnapshotOnly, beginVersion); } Future atomicRestore(Database cx, @@ -357,6 +360,7 @@ public: Future submitBackup(Reference tr, Key outContainer, + int initialSnapshotIntervalSeconds, int snapshotIntervalSeconds, std::string tagName, Standalone> backupRanges, @@ -365,6 +369,7 @@ public: bool incrementalBackupOnly = false); Future submitBackup(Database cx, Key outContainer, + int initialSnapshotIntervalSeconds, int snapshotIntervalSeconds, std::string tagName, Standalone> backupRanges, @@ -374,6 +379,7 @@ public: return runRYWTransactionFailIfLocked(cx, [=](Reference tr) { return submitBackup(tr, outContainer, + initialSnapshotIntervalSeconds, snapshotIntervalSeconds, tagName, backupRanges, @@ -404,7 +410,8 @@ public: Future getStatus(Database cx, bool showErrors, std::string tagName); Future getStatusJSON(Database cx, std::string tagName); - Future> getLastRestorable(Reference tr, Key tagName, + Future> getLastRestorable(Reference tr, + Key tagName, bool snapshot = false); void setLastRestorable(Reference tr, Key tagName, Version version); @@ -488,6 +495,14 @@ public: [=](Reference tr) { return unlockBackup(tr, tagName); }); } + // Specifies the action to take on the backup's destination key range + // before the backup begins. + enum PreBackupAction { + NONE = 0, // No action is taken + VERIFY = 1, // Verify the key range being restored to is empty. + CLEAR = 2 // Clear the key range being restored to. + }; + Future submitBackup(Reference tr, Key tagName, Standalone> backupRanges, @@ -495,7 +510,7 @@ public: Key addPrefix = StringRef(), Key removePrefix = StringRef(), bool lockDatabase = false, - bool databasesInSync = false); + PreBackupAction backupAction = PreBackupAction::VERIFY); Future submitBackup(Database cx, Key tagName, Standalone> backupRanges, @@ -503,10 +518,10 @@ public: Key addPrefix = StringRef(), Key removePrefix = StringRef(), bool lockDatabase = false, - bool databasesInSync = false) { + PreBackupAction backupAction = PreBackupAction::VERIFY) { return runRYWTransaction(cx, [=](Reference tr) { return submitBackup( - tr, tagName, backupRanges, stopWhenDone, addPrefix, removePrefix, lockDatabase, databasesInSync); + tr, tagName, backupRanges, stopWhenDone, addPrefix, removePrefix, lockDatabase, backupAction); }); } @@ -578,10 +593,10 @@ public: Reference futureBucket; }; -typedef std::pair, Version> RangeResultWithVersion; +using RangeResultWithVersion = std::pair; struct RCGroup { - Standalone items; + RangeResult items; Version version; uint64_t groupKey; @@ -835,6 +850,11 @@ public: typedef KeyBackedMap RangeDispatchMapT; RangeDispatchMapT snapshotRangeDispatchMap() { return configSpace.pack(LiteralStringRef(__FUNCTION__)); } + // Interval to use for the first (initial) snapshot. + KeyBackedProperty initialSnapshotIntervalSeconds() { + return configSpace.pack(LiteralStringRef(__FUNCTION__)); + } + // Interval to use for determining the target end version for new snapshots KeyBackedProperty snapshotIntervalSeconds() { return configSpace.pack(LiteralStringRef(__FUNCTION__)); } @@ -864,8 +884,9 @@ public: Future beginVersion = tr->getReadVersion(); Future defaultInterval = 0; - if (intervalSeconds < 0) + if (intervalSeconds < 0) { defaultInterval = copy.snapshotIntervalSeconds().getOrThrow(tr); + } // Make sure read version and possibly the snapshot interval value are ready, then clear/init the snapshot // config members diff --git a/fdbclient/BackupAgentBase.actor.cpp b/fdbclient/BackupAgentBase.actor.cpp index d6be426dab..4b00857503 100644 --- a/fdbclient/BackupAgentBase.actor.cpp +++ b/fdbclient/BackupAgentBase.actor.cpp @@ -401,11 +401,17 @@ ACTOR Future readCommitted(Database cx, releaser = FlowLock::Releaser( *lock, limits.bytes + CLIENT_KNOBS->VALUE_SIZE_LIMIT + CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT); - state Standalone values = wait(tr.getRange(begin, end, limits)); + state RangeResult values = wait(tr.getRange(begin, end, limits)); // When this buggify line is enabled, if there are more than 1 result then use half of the results + // Copy the data instead of messing with the results directly to avoid TSS issues. if (values.size() > 1 && BUGGIFY) { - values.resize(values.arena(), values.size() / 2); + RangeResult copy; + // only copy first half of values into copy + for (int i = 0; i < values.size() / 2; i++) { + copy.push_back_deep(copy.arena(), values[i]); + } + values = copy; values.more = true; // Half of the time wait for this tr to expire so that the next read is at a different version if (deterministicRandom()->random01() < 0.5) @@ -467,11 +473,17 @@ ACTOR Future readCommitted(Database cx, if (lockAware) tr.setOption(FDBTransactionOptions::LOCK_AWARE); - state Standalone rangevalue = wait(tr.getRange(nextKey, end, limits)); + state RangeResult rangevalue = wait(tr.getRange(nextKey, end, limits)); - // When this buggify line is enabled, if there are more than 1 result then use half of the results + // When this buggify line is enabled, if there are more than 1 result then use half of the results. + // Copy the data instead of messing with the results directly to avoid TSS issues. if (rangevalue.size() > 1 && BUGGIFY) { - rangevalue.resize(rangevalue.arena(), rangevalue.size() / 2); + RangeResult copy; + // only copy first half of rangevalue into copy + for (int i = 0; i < rangevalue.size() / 2; i++) { + copy.push_back_deep(copy.arena(), rangevalue[i]); + } + rangevalue = copy; rangevalue.more = true; // Half of the time wait for this tr to expire so that the next read is at a different version if (deterministicRandom()->random01() < 0.5) @@ -743,6 +755,9 @@ ACTOR Future applyMutations(Database cx, wait(coalesceKeyVersionCache( uid, newEndVersion, keyVersion, commit, committedVersion, addActor, &commitLock)); beginVersion = newEndVersion; + if (BUGGIFY) { + wait(delay(2.0)); + } } } catch (Error& e) { TraceEvent(e.code() == error_code_restore_missing_data ? SevWarnAlways : SevError, "ApplyMutationsError") @@ -775,7 +790,7 @@ ACTOR static Future _eraseLogData(Reference tr, return Void(); } - state Standalone backupVersions = wait( + state RangeResult backupVersions = wait( tr->getRange(KeyRangeRef(backupLatestVersionsPath, strinc(backupLatestVersionsPath)), CLIENT_KNOBS->TOO_MANY)); // Make sure version history key does exist and lower the beginVersion if needed @@ -867,7 +882,7 @@ ACTOR static Future _eraseLogData(Reference tr, } if (!endVersion.present() && backupVersions.size() == 1) { - Standalone existingDestUidValues = + RangeResult existingDestUidValues = wait(tr->getRange(KeyRangeRef(destUidLookupPrefix, strinc(destUidLookupPrefix)), CLIENT_KNOBS->TOO_MANY)); for (auto it : existingDestUidValues) { if (it.value == destUidValue) { @@ -900,7 +915,7 @@ ACTOR Future cleanupLogMutations(Database cx, Value destUidValue, bool del tr->setOption(FDBTransactionOptions::LOCK_AWARE); tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - state Standalone backupVersions = wait(tr->getRange( + state RangeResult backupVersions = wait(tr->getRange( KeyRangeRef(backupLatestVersionsPath, strinc(backupLatestVersionsPath)), CLIENT_KNOBS->TOO_MANY)); state Version readVer = tr->getReadVersion().get(); @@ -987,7 +1002,7 @@ ACTOR Future cleanupBackup(Database cx, bool deleteData) { tr->setOption(FDBTransactionOptions::LOCK_AWARE); tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - state Standalone destUids = wait( + state RangeResult destUids = wait( tr->getRange(KeyRangeRef(destUidLookupPrefix, strinc(destUidLookupPrefix)), CLIENT_KNOBS->TOO_MANY)); for (auto destUid : destUids) { diff --git a/fdbclient/CMakeLists.txt b/fdbclient/CMakeLists.txt index 1f8b7efb21..f55f3b7a59 100644 --- a/fdbclient/CMakeLists.txt +++ b/fdbclient/CMakeLists.txt @@ -32,6 +32,9 @@ set(FDBCLIENT_SRCS FDBOptions.h FDBTypes.h FileBackupAgent.actor.cpp + GlobalConfig.h + GlobalConfig.actor.h + GlobalConfig.actor.cpp GrvProxyInterface.h HTTP.actor.cpp IClientApi.h @@ -80,6 +83,7 @@ set(FDBCLIENT_SRCS Status.h StatusClient.actor.cpp StatusClient.h + StorageServerInterface.cpp StorageServerInterface.h Subspace.cpp Subspace.h diff --git a/fdbclient/CommitProxyInterface.h b/fdbclient/CommitProxyInterface.h index d4bc0dea2e..aa07679b78 100644 --- a/fdbclient/CommitProxyInterface.h +++ b/fdbclient/CommitProxyInterface.h @@ -30,6 +30,7 @@ #include "fdbclient/StorageServerInterface.h" #include "fdbclient/CommitTransaction.h" #include "fdbclient/TagThrottle.h" +#include "fdbclient/GlobalConfig.h" #include "fdbrpc/Stats.h" #include "fdbrpc/TimedRequest.h" @@ -112,34 +113,32 @@ struct ClientDBInfo { vector commitProxies; Optional firstCommitProxy; // not serialized, used for commitOnFirstProxy when the commit proxies vector has been shrunk - double clientTxnInfoSampleRate; - int64_t clientTxnInfoSizeLimit; Optional forward; - double transactionTagSampleRate; - double transactionTagSampleCost; + vector history; + vector> + tssMapping; // logically map for all active TSS pairs - ClientDBInfo() - : clientTxnInfoSampleRate(std::numeric_limits::infinity()), clientTxnInfoSizeLimit(-1), - transactionTagSampleRate(CLIENT_KNOBS->READ_TAG_SAMPLE_RATE), - transactionTagSampleCost(CLIENT_KNOBS->COMMIT_SAMPLE_COST) {} + ClientDBInfo() {} bool operator==(ClientDBInfo const& r) const { return id == r.id; } bool operator!=(ClientDBInfo const& r) const { return id != r.id; } + // convenience method to treat tss mapping like a map + Optional getTssPair(UID storageServerID) const { + for (auto& it : tssMapping) { + if (it.first == storageServerID) { + return Optional(it.second); + } + } + return Optional(); + } + template void serialize(Archive& ar) { if constexpr (!is_fb_function) { ASSERT(ar.protocolVersion().isValid()); } - serializer(ar, - grvProxies, - commitProxies, - id, - clientTxnInfoSampleRate, - clientTxnInfoSizeLimit, - forward, - transactionTagSampleRate, - transactionTagSampleCost); + serializer(ar, grvProxies, commitProxies, id, forward, history, tssMapping); } }; diff --git a/fdbclient/CoordinationInterface.h b/fdbclient/CoordinationInterface.h index 861d86a515..2c80899aae 100644 --- a/fdbclient/CoordinationInterface.h +++ b/fdbclient/CoordinationInterface.h @@ -30,24 +30,32 @@ const int MAX_CLUSTER_FILE_BYTES = 60000; +// well known endpoints published to the client. constexpr UID WLTOKEN_CLIENTLEADERREG_GETLEADER(-1, 2); constexpr UID WLTOKEN_CLIENTLEADERREG_OPENDATABASE(-1, 3); +// the value of this endpoint should be stable and not change. constexpr UID WLTOKEN_PROTOCOL_INFO(-1, 10); +constexpr UID WLTOKEN_CLIENTLEADERREG_DESCRIPTOR_MUTABLE(-1, 11); -constexpr UID WLTOKEN_CONFIGTXN_GETVERSION(-1, 11); -constexpr UID WLTOKEN_CONFIGTXN_GET(-1, 12); -constexpr UID WLTOKEN_CONFIGTXN_GETCLASSES(-1, 13); -constexpr UID WLTOKEN_CONFIGTXN_GETKNOBS(-1, 14); -constexpr UID WLTOKEN_CONFIGTXN_COMMIT(-1, 15); +constexpr UID WLTOKEN_CONFIGTXN_GETVERSION(-1, 12); +constexpr UID WLTOKEN_CONFIGTXN_GET(-1, 13); +constexpr UID WLTOKEN_CONFIGTXN_GETCLASSES(-1, 14); +constexpr UID WLTOKEN_CONFIGTXN_GETKNOBS(-1, 15); +constexpr UID WLTOKEN_CONFIGTXN_COMMIT(-1, 16); struct ClientLeaderRegInterface { RequestStream getLeader; RequestStream openDatabase; + RequestStream checkDescriptorMutable; ClientLeaderRegInterface() {} ClientLeaderRegInterface(NetworkAddress remote); ClientLeaderRegInterface(INetwork* local); + + bool operator==(const ClientLeaderRegInterface& rhs) const { + return getLeader == rhs.getLeader && openDatabase == rhs.openDatabase; + } }; class ClusterConnectionString { @@ -113,8 +121,9 @@ private: struct LeaderInfo { constexpr static FileIdentifier file_identifier = 8338794; + // The first 7 bits of changeID represent cluster controller process class fitness, the lower the better UID changeID; - static const uint64_t mask = ~(127ll << 57); + static const uint64_t changeIDMask = ~(uint64_t(0b1111111) << 57); Value serializedInfo; bool forward; // If true, serializedInfo is a connection string instead! @@ -131,13 +140,13 @@ struct LeaderInfo { // The first 7 bits of ChangeID represent cluster controller process class fitness, the lower the better void updateChangeID(ClusterControllerPriorityInfo info) { changeID = UID(((uint64_t)info.processClassFitness << 57) | ((uint64_t)info.isExcluded << 60) | - ((uint64_t)info.dcFitness << 61) | (changeID.first() & mask), + ((uint64_t)info.dcFitness << 61) | (changeID.first() & changeIDMask), changeID.second()); } // All but the first 7 bits are used to represent process id bool equalInternalId(LeaderInfo const& leaderInfo) const { - return ((changeID.first() & mask) == (leaderInfo.changeID.first() & mask)) && + return ((changeID.first() & changeIDMask) == (leaderInfo.changeID.first() & changeIDMask)) && changeID.second() == leaderInfo.changeID.second(); } @@ -145,8 +154,10 @@ struct LeaderInfo { // 1. the candidate has better process class fitness and the candidate is not the leader // 2. the leader process class fitness becomes worse bool leaderChangeRequired(LeaderInfo const& candidate) const { - return ((changeID.first() & ~mask) > (candidate.changeID.first() & ~mask) && !equalInternalId(candidate)) || - ((changeID.first() & ~mask) < (candidate.changeID.first() & ~mask) && equalInternalId(candidate)); + return ((changeID.first() & ~changeIDMask) > (candidate.changeID.first() & ~changeIDMask) && + !equalInternalId(candidate)) || + ((changeID.first() & ~changeIDMask) < (candidate.changeID.first() & ~changeIDMask) && + equalInternalId(candidate)); } ClusterControllerPriorityInfo getPriorityInfo() const { @@ -234,4 +245,28 @@ struct ProtocolInfoRequest { } }; +// Returns true if the cluster descriptor may be modified. +struct CheckDescriptorMutableReply { + constexpr static FileIdentifier file_identifier = 7784299; + CheckDescriptorMutableReply() = default; + explicit CheckDescriptorMutableReply(bool isMutable) : isMutable(isMutable) {} + bool isMutable; + template + void serialize(Ar& ar) { + serializer(ar, isMutable); + } +}; + +// Allows client to check if allowed to change the cluster descriptor. +struct CheckDescriptorMutableRequest { + constexpr static FileIdentifier file_identifier = 214729; + ReplyPromise reply; + CheckDescriptorMutableRequest() {} + + template + void serialize(Ar& ar) { + serializer(ar, reply); + } +}; + #endif diff --git a/fdbclient/DatabaseBackupAgent.actor.cpp b/fdbclient/DatabaseBackupAgent.actor.cpp index bc29f9e848..20f9c6bcf2 100644 --- a/fdbclient/DatabaseBackupAgent.actor.cpp +++ b/fdbclient/DatabaseBackupAgent.actor.cpp @@ -157,7 +157,7 @@ struct BackupRangeTaskFunc : TaskFuncBase { tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->setOption(FDBTransactionOptions::LOCK_AWARE); state Standalone> results; - Standalone values = wait(tr->getRange( + RangeResult values = wait(tr->getRange( KeyRangeRef(keyAfter(beginKey.withPrefix(keyServersPrefix)), endKey.withPrefix(keyServersPrefix)), limit)); for (auto& s : values) { @@ -314,19 +314,18 @@ struct BackupRangeTaskFunc : TaskFuncBase { applyMutationsKeyVersionMapRange.begin); state Key rangeCountKey = task->params[BackupAgentBase::keyConfigLogUid].withPrefix( applyMutationsKeyVersionCountRange.begin); - state Future> backupVersions = + state Future backupVersions = krmGetRanges(tr, prefix, KeyRangeRef(rangeBegin, rangeEnd), BUGGIFY ? 2 : 2000, 1e5); state Future> logVersionValue = tr->get( task->params[BackupAgentBase::keyConfigLogUid].withPrefix(applyMutationsEndRange.begin), true); state Future> rangeCountValue = tr->get(rangeCountKey, true); - state Future> prevRange = tr->getRange( + state Future prevRange = tr->getRange( firstGreaterOrEqual(prefix), lastLessOrEqual(rangeBegin.withPrefix(prefix)), 1, true, true); - state Future> nextRange = - tr->getRange(firstGreaterOrEqual(rangeEnd.withPrefix(prefix)), - firstGreaterOrEqual(strinc(prefix)), - 1, - true, - false); + state Future nextRange = tr->getRange(firstGreaterOrEqual(rangeEnd.withPrefix(prefix)), + firstGreaterOrEqual(strinc(prefix)), + 1, + true, + false); state Future verified = taskBucket->keepRunning(tr, task); wait(checkDatabaseLock(tr, @@ -725,7 +724,7 @@ struct CopyLogRangeTaskFunc : TaskFuncBase { state Subspace conf = Subspace(databaseBackupPrefixRange.begin) .get(BackupAgentBase::keyConfig) .get(task->params[BackupAgentBase::keyConfigLogUid]); - state std::vector> nextMutations; + state std::vector nextMutations; state bool isTimeoutOccured = false; state Optional lastKey; state Version lastVersion; @@ -736,9 +735,9 @@ struct CopyLogRangeTaskFunc : TaskFuncBase { return Optional(); } - state std::vector> mutations = std::move(nextMutations); + state std::vector mutations = std::move(nextMutations); state int64_t mutationSize = nextMutationSize; - nextMutations = std::vector>(); + nextMutations = std::vector(); nextMutationSize = 0; if (!endOfStream) { @@ -1072,7 +1071,7 @@ struct CopyLogsTaskFunc : TaskFuncBase { wait(waitForAll(addTaskVector) && taskBucket->finish(tr, task)); } else { - if (appliedVersion <= stopVersionData) { + if (appliedVersion < applyVersion) { wait(delay(FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY)); wait(success(CopyLogsTaskFunc::addTask( tr, taskBucket, task, prevBeginVersion, beginVersion, TaskCompletionKey::signal(onDone)))); @@ -1470,7 +1469,7 @@ struct OldCopyLogRangeTaskFunc : TaskFuncBase { .get(BackupAgentBase::keyConfig) .get(task->params[BackupAgentBase::keyConfigLogUid]); - state std::vector> nextMutations; + state std::vector nextMutations; state int64_t nextMutationSize = 0; loop { try { @@ -1478,9 +1477,9 @@ struct OldCopyLogRangeTaskFunc : TaskFuncBase { return Void(); } - state std::vector> mutations = std::move(nextMutations); + state std::vector mutations = std::move(nextMutations); state int64_t mutationSize = nextMutationSize; - nextMutations = std::vector>(); + nextMutations = std::vector(); nextMutationSize = 0; if (!endOfStream) { @@ -1819,7 +1818,7 @@ struct CopyDiffLogsUpgradeTaskFunc : TaskFuncBase { } if (backupRanges.size() == 1) { - Standalone existingDestUidValues = wait(srcTr->getRange( + RangeResult existingDestUidValues = wait(srcTr->getRange( KeyRangeRef(destUidLookupPrefix, strinc(destUidLookupPrefix)), CLIENT_KNOBS->TOO_MANY)); bool found = false; for (auto it : existingDestUidValues) { @@ -2063,7 +2062,7 @@ struct StartFullBackupTaskFunc : TaskFuncBase { // Initialize destUid if (backupRanges.size() == 1) { - Standalone existingDestUidValues = wait(srcTr->getRange( + RangeResult existingDestUidValues = wait(srcTr->getRange( KeyRangeRef(destUidLookupPrefix, strinc(destUidLookupPrefix)), CLIENT_KNOBS->TOO_MANY)); bool found = false; for (auto it : existingDestUidValues) { @@ -2243,17 +2242,18 @@ struct StartFullBackupTaskFunc : TaskFuncBase { return Void(); } - ACTOR static Future addTask(Reference tr, - Reference taskBucket, - Key logUid, - Key backupUid, - Key keyAddPrefix, - Key keyRemovePrefix, - Key keyConfigBackupRanges, - Key tagName, - TaskCompletionKey completionKey, - Reference waitFor = Reference(), - bool databasesInSync = false) { + ACTOR static Future addTask( + Reference tr, + Reference taskBucket, + Key logUid, + Key backupUid, + Key keyAddPrefix, + Key keyRemovePrefix, + Key keyConfigBackupRanges, + Key tagName, + TaskCompletionKey completionKey, + Reference waitFor = Reference(), + DatabaseBackupAgent::PreBackupAction backupAction = DatabaseBackupAgent::PreBackupAction::VERIFY) { Key doneKey = wait(completionKey.get(tr, taskBucket)); auto task = makeReference(StartFullBackupTaskFunc::name, StartFullBackupTaskFunc::version, doneKey); @@ -2264,7 +2264,7 @@ struct StartFullBackupTaskFunc : TaskFuncBase { task->params[BackupAgentBase::keyConfigBackupRanges] = keyConfigBackupRanges; task->params[BackupAgentBase::keyTagName] = tagName; task->params[DatabaseBackupAgent::keyDatabasesInSync] = - databasesInSync ? LiteralStringRef("t") : LiteralStringRef("f"); + backupAction == DatabaseBackupAgent::PreBackupAction::NONE ? LiteralStringRef("t") : LiteralStringRef("f"); if (!waitFor) { return taskBucket->addTask(tr, @@ -2514,7 +2514,7 @@ public: Key addPrefix, Key removePrefix, bool lockDB, - bool databasesInSync) { + DatabaseBackupAgent::PreBackupAction backupAction) { state UID logUid = deterministicRandom()->randomUniqueID(); state Key logUidValue = BinaryWriter::toValue(logUid, Unversioned()); state UID logUidCurrent = wait(backupAgent->getLogUid(tr, tagName)); @@ -2558,9 +2558,9 @@ public: } } - if (!databasesInSync) { + if (backupAction == DatabaseBackupAgent::PreBackupAction::VERIFY) { // Make sure all of the ranges are empty before we backup into them. - state std::vector>> backupIntoResults; + state std::vector> backupIntoResults; for (auto& backupRange : backupRanges) { backupIntoResults.push_back( tr->getRange(backupRange.removePrefix(removePrefix).withPrefix(addPrefix), 1)); @@ -2572,6 +2572,11 @@ public: throw restore_destination_not_empty(); } } + } else if (backupAction == DatabaseBackupAgent::PreBackupAction::CLEAR) { + // Clear out all ranges before we backup into them. + for (auto& backupRange : backupRanges) { + tr->clear(backupRange.removePrefix(removePrefix).withPrefix(addPrefix)); + } } // Clear the backup ranges for the tag @@ -2610,7 +2615,7 @@ public: tr->clear(KeyRangeRef(mapPrefix, mapEnd)); state Version readVersion = invalidVersion; - if (databasesInSync) { + if (backupAction == DatabaseBackupAgent::PreBackupAction::NONE) { Transaction readTransaction(backupAgent->taskBucket->src); readTransaction.setOption(FDBTransactionOptions::LOCK_AWARE); Version _ = wait(readTransaction.getReadVersion()); @@ -2629,7 +2634,7 @@ public: tagName, TaskCompletionKey::noSignal(), Reference(), - databasesInSync)); + backupAction)); if (lockDB) wait(lockDatabase(tr, logUid)); @@ -2772,8 +2777,14 @@ public: TraceEvent("DBA_SwitchoverVersionUpgraded"); try { - wait(drAgent.submitBackup( - backupAgent->taskBucket->src, tagName, backupRanges, false, addPrefix, removePrefix, true, true)); + wait(drAgent.submitBackup(backupAgent->taskBucket->src, + tagName, + backupRanges, + false, + addPrefix, + removePrefix, + true, + DatabaseBackupAgent::PreBackupAction::NONE)); } catch (Error& e) { if (e.code() != error_code_backup_duplicate) throw; @@ -3048,13 +3059,13 @@ public: tr->setOption(FDBTransactionOptions::LOCK_AWARE); state Future> fPaused = tr->get(backupAgent->taskBucket->getPauseKey()); - state Future> fErrorValues = + state Future fErrorValues = errorLimit > 0 ? tr->getRange(backupAgent->errors.get(BinaryWriter::toValue(logUid, Unversioned())).range(), errorLimit, false, true) - : Future>(); + : Future(); state Future> fBackupUid = tr->get(backupAgent->states.get(BinaryWriter::toValue(logUid, Unversioned())) .pack(DatabaseBackupAgent::keyFolderId)); @@ -3129,7 +3140,7 @@ public: // Append the errors, if requested if (errorLimit > 0) { - Standalone values = wait(fErrorValues); + RangeResult values = wait(fErrorValues); // Display the errors, if any if (values.size() > 0) { @@ -3236,9 +3247,9 @@ Future DatabaseBackupAgent::submitBackup(Reference DatabaseBackupAgent::discontinueBackup(Reference tr, Key tagName) { diff --git a/fdbclient/DatabaseConfiguration.cpp b/fdbclient/DatabaseConfiguration.cpp index 4da069b775..a2cfc435b3 100644 --- a/fdbclient/DatabaseConfiguration.cpp +++ b/fdbclient/DatabaseConfiguration.cpp @@ -31,7 +31,8 @@ void DatabaseConfiguration::resetInternal() { commitProxyCount = grvProxyCount = resolverCount = desiredTLogCount = tLogWriteAntiQuorum = tLogReplicationFactor = storageTeamSize = desiredLogRouterCount = -1; tLogVersion = TLogVersion::DEFAULT; - tLogDataStoreType = storageServerStoreType = KeyValueStoreType::END; + tLogDataStoreType = storageServerStoreType = testingStorageServerStoreType = KeyValueStoreType::END; + desiredTSSCount = 0; tLogSpillType = TLogSpillType::DEFAULT; autoCommitProxyCount = CLIENT_KNOBS->DEFAULT_AUTO_COMMIT_PROXIES; autoGrvProxyCount = CLIENT_KNOBS->DEFAULT_AUTO_GRV_PROXIES; @@ -43,6 +44,7 @@ void DatabaseConfiguration::resetInternal() { remoteDesiredTLogCount = -1; remoteTLogReplicationFactor = repopulateRegionAntiQuorum = 0; backupWorkerEnabled = false; + perpetualStorageWiggleSpeed = 0; } void parse(int* i, ValueRef const& v) { @@ -194,9 +196,9 @@ bool DatabaseConfiguration::isValid() const { getDesiredRemoteLogs() >= 1 && remoteTLogReplicationFactor >= 0 && repopulateRegionAntiQuorum >= 0 && repopulateRegionAntiQuorum <= 1 && usableRegions >= 1 && usableRegions <= 2 && regions.size() <= 2 && (usableRegions == 1 || regions.size() == 2) && (regions.size() == 0 || regions[0].priority >= 0) && - (regions.size() == 0 || - tLogPolicy->info() != - "dcid^2 x zoneid^2 x 1"))) { // We cannot specify regions with three_datacenter replication + (regions.size() == 0 || tLogPolicy->info() != "dcid^2 x zoneid^2 x 1") && + // We cannot specify regions with three_datacenter replication + (perpetualStorageWiggleSpeed == 0 || perpetualStorageWiggleSpeed == 1))) { return false; } std::set dcIds; @@ -298,6 +300,25 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const { result["storage_engine"] = "custom"; } + if (desiredTSSCount > 0) { + result["tss_count"] = desiredTSSCount; + if (testingStorageServerStoreType == KeyValueStoreType::SSD_BTREE_V1) { + result["tss_storage_engine"] = "ssd-1"; + } else if (testingStorageServerStoreType == KeyValueStoreType::SSD_BTREE_V2) { + result["tss_storage_engine"] = "ssd-2"; + } else if (testingStorageServerStoreType == KeyValueStoreType::SSD_REDWOOD_V1) { + result["tss_storage_engine"] = "ssd-redwood-experimental"; + } else if (testingStorageServerStoreType == KeyValueStoreType::SSD_ROCKSDB_V1) { + result["tss_storage_engine"] = "ssd-rocksdb-experimental"; + } else if (testingStorageServerStoreType == KeyValueStoreType::MEMORY_RADIXTREE) { + result["tss_storage_engine"] = "memory-radixtree-beta"; + } else if (testingStorageServerStoreType == KeyValueStoreType::MEMORY) { + result["tss_storage_engine"] = "memory-2"; + } else { + result["tss_storage_engine"] = "custom"; + } + } + result["log_spill"] = (int)tLogSpillType; if (remoteTLogReplicationFactor == 1) { @@ -352,7 +373,7 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const { } result["backup_worker_enabled"] = (int32_t)backupWorkerEnabled; - + result["perpetual_storage_wiggle"] = perpetualStorageWiggleSpeed; return result; } @@ -448,6 +469,8 @@ bool DatabaseConfiguration::setInternal(KeyRef key, ValueRef value) { } } else if (ck == LiteralStringRef("storage_replicas")) { parse(&storageTeamSize, value); + } else if (ck == LiteralStringRef("tss_count")) { + parse(&desiredTSSCount, value); } else if (ck == LiteralStringRef("log_version")) { parse((&type), value); type = std::max((int)TLogVersion::MIN_RECRUITABLE, type); @@ -470,6 +493,9 @@ bool DatabaseConfiguration::setInternal(KeyRef key, ValueRef value) { } else if (ck == LiteralStringRef("storage_engine")) { parse((&type), value); storageServerStoreType = (KeyValueStoreType::StoreType)type; + } else if (ck == LiteralStringRef("tss_storage_engine")) { + parse((&type), value); + testingStorageServerStoreType = (KeyValueStoreType::StoreType)type; } else if (ck == LiteralStringRef("auto_commit_proxies")) { parse(&autoCommitProxyCount, value); } else if (ck == LiteralStringRef("auto_grv_proxies")) { @@ -499,6 +525,8 @@ bool DatabaseConfiguration::setInternal(KeyRef key, ValueRef value) { parse(&repopulateRegionAntiQuorum, value); } else if (ck == LiteralStringRef("regions")) { parse(®ions, value); + } else if (ck == LiteralStringRef("perpetual_storage_wiggle")) { + parse(&perpetualStorageWiggleSpeed, value); } else { return false; } diff --git a/fdbclient/DatabaseConfiguration.h b/fdbclient/DatabaseConfiguration.h index bc64a6c9c5..0df45ce228 100644 --- a/fdbclient/DatabaseConfiguration.h +++ b/fdbclient/DatabaseConfiguration.h @@ -225,6 +225,10 @@ struct DatabaseConfiguration { int32_t storageTeamSize; KeyValueStoreType storageServerStoreType; + // Testing StorageServers + int32_t desiredTSSCount; + KeyValueStoreType testingStorageServerStoreType; + // Remote TLogs int32_t desiredLogRouterCount; int32_t remoteDesiredTLogCount; @@ -239,6 +243,9 @@ struct DatabaseConfiguration { int32_t repopulateRegionAntiQuorum; std::vector regions; + // Perpetual Storage Setting + int32_t perpetualStorageWiggleSpeed; + // Excluded servers (no state should be here) bool isExcludedServer(NetworkAddressList) const; std::set getExcludedServers() const; diff --git a/fdbclient/DatabaseContext.h b/fdbclient/DatabaseContext.h index 0f53f5e40f..703979aeff 100644 --- a/fdbclient/DatabaseContext.h +++ b/fdbclient/DatabaseContext.h @@ -152,6 +152,7 @@ public: return (DatabaseContext*)DatabaseContext::operator new(sizeof(DatabaseContext)); } + // Static constructor used by server processes to create a DatabaseContext // For internal (fdbserver) use only static Database create(Reference> clientInfo, Future clientInfoMonitor, @@ -164,9 +165,11 @@ public: ~DatabaseContext(); + // Constructs a new copy of this DatabaseContext from the parameters of this DatabaseContext Database clone() const { return Database(new DatabaseContext(connectionFile, clientInfo, + coordinator, clientInfoMonitor, taskID, clientLocality, @@ -196,6 +199,11 @@ public: Future onProxiesChanged(); Future getHealthMetrics(bool detailed); + // Returns the protocol version reported by the coordinator this client is connected to + // If an expected version is given, the future won't return until the protocol version is different than expected + // Note: this will never return if the server is running a protocol from FDB 5.0 or older + Future getClusterProtocol(Optional expectedVersion = Optional()); + // Update the watch counter for the database void addWatch(); void removeWatch(); @@ -247,6 +255,7 @@ public: // private: explicit DatabaseContext(Reference>> connectionFile, Reference> clientDBInfo, + Reference>> coordinator, Future clientInfoMonitor, TaskPriority taskID, LocalityData const& clientLocality, @@ -264,6 +273,9 @@ public: Reference>> connectionFile; AsyncTrigger proxiesChangeTrigger; Future monitorProxiesInfoChange; + Future monitorTssInfoChange; + Future tssMismatchHandler; + PromiseStream tssMismatchStream; Reference commitProxies; Reference grvProxies; bool proxyProvisional; // Provisional commit proxy and grv proxy are used at the same time. @@ -311,6 +323,8 @@ public: std::map server_interf; + std::map> tssMetrics; + UID dbId; bool internal; // Only contexts created through the C client and fdbcli are non-internal @@ -380,6 +394,9 @@ public: Future clientInfoMonitor; Future connected; + // An AsyncVar that reports the coordinator this DatabaseContext is interacting with + Reference>> coordinator; + Reference>> statusClusterInterface; Future statusLeaderMon; double lastStatusFetch; @@ -408,6 +425,9 @@ public: static bool debugUseTags; static const std::vector debugTransactionTagChoices; std::unordered_map> watchMap; + + void maybeAddTssMapping(StorageServerInterface const& ssi); + void addTssMapping(StorageServerInterface const& ssi, StorageServerInterface const& tssi); }; #endif diff --git a/fdbclient/FDBTypes.h b/fdbclient/FDBTypes.h index b2cd469ab8..e2c8b4cd3b 100644 --- a/fdbclient/FDBTypes.h +++ b/fdbclient/FDBTypes.h @@ -468,11 +468,12 @@ struct Traceable : std::true_type { } }; -typedef Standalone Key; -typedef Standalone Value; -typedef Standalone KeyRange; -typedef Standalone KeyValue; -typedef Standalone KeySelector; +using Key = Standalone; +using Value = Standalone; +using KeyRange = Standalone; +using KeyValue = Standalone; +using KeySelector = Standalone; +using RangeResult = Standalone; enum { invalidVersion = -1, latestVersion = -2, MAX_VERSION = std::numeric_limits::max() }; @@ -706,6 +707,7 @@ struct RangeResultRef : VectorRef { " readToBegin:" + std::to_string(readToBegin) + " readThroughEnd:" + std::to_string(readThroughEnd); } }; +using RangeResult = Standalone; template <> struct Traceable : std::true_type { @@ -866,22 +868,36 @@ struct TLogSpillType { // Contains the amount of free and total space for a storage server, in bytes struct StorageBytes { + // Free space on the filesystem int64_t free; + // Total space on the filesystem int64_t total; - int64_t used; // Used by *this* store, not total-free - int64_t available; // Amount of disk space that can be used by data structure, including free disk space and - // internally reusable space + // Used by *this* store, not total - free + int64_t used; + // Amount of space available for use by the store, which includes free space on the filesystem + // and internal free space within the store data that is immediately reusable. + int64_t available; + // Amount of space that could eventually be available for use after garbage collection + int64_t temp; StorageBytes() {} - StorageBytes(int64_t free, int64_t total, int64_t used, int64_t available) - : free(free), total(total), used(used), available(available) {} + StorageBytes(int64_t free, int64_t total, int64_t used, int64_t available, int64_t temp = 0) + : free(free), total(total), used(used), available(available), temp(temp) {} template void serialize(Ar& ar) { serializer(ar, free, total, used, available); } -}; + std::string toString() const { + return format("{%.2f MB total, %.2f MB free, %.2f MB available, %.2f MB used, %.2f MB temp}", + total / 1e6, + free / 1e6, + available / 1e6, + used / 1e6, + temp / 1e6); + } +}; struct LogMessageVersion { // Each message pushed into the log system has a unique, totally ordered LogMessageVersion // See ILogSystem::push() for how these are assigned diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp index 9a09890982..35d6743821 100644 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -142,7 +142,8 @@ public: } KeyBackedProperty addPrefix() { return configSpace.pack(LiteralStringRef(__FUNCTION__)); } KeyBackedProperty removePrefix() { return configSpace.pack(LiteralStringRef(__FUNCTION__)); } - KeyBackedProperty incrementalBackupOnly() { return configSpace.pack(LiteralStringRef(__FUNCTION__)); } + KeyBackedProperty onlyAppyMutationLogs() { return configSpace.pack(LiteralStringRef(__FUNCTION__)); } + KeyBackedProperty inconsistentSnapshotOnly() { return configSpace.pack(LiteralStringRef(__FUNCTION__)); } // XXX: Remove restoreRange() once it is safe to remove. It has been changed to restoreRanges KeyBackedProperty restoreRange() { return configSpace.pack(LiteralStringRef(__FUNCTION__)); } KeyBackedProperty> restoreRanges() { @@ -151,6 +152,7 @@ public: KeyBackedProperty batchFuture() { return configSpace.pack(LiteralStringRef(__FUNCTION__)); } KeyBackedProperty beginVersion() { return configSpace.pack(LiteralStringRef(__FUNCTION__)); } KeyBackedProperty restoreVersion() { return configSpace.pack(LiteralStringRef(__FUNCTION__)); } + KeyBackedProperty firstConsistentVersion() { return configSpace.pack(LiteralStringRef(__FUNCTION__)); } KeyBackedProperty> sourceContainer() { return configSpace.pack(LiteralStringRef(__FUNCTION__)); @@ -304,6 +306,13 @@ public: tr->set(uidPrefixKey(applyMutationsBeginRange.begin, uid), BinaryWriter::toValue(ver, Unversioned())); } + Future getApplyBeginVersion(Reference tr) { + return map(tr->get(uidPrefixKey(applyMutationsBeginRange.begin, uid)), + [=](Optional const& value) -> Version { + return value.present() ? BinaryReader::fromStringRef(value.get(), Unversioned()) : 0; + }); + } + void setApplyEndVersion(Reference tr, Version ver) { tr->set(uidPrefixKey(applyMutationsEndRange.begin, uid), BinaryWriter::toValue(ver, Unversioned())); } @@ -315,6 +324,21 @@ public: }); } + ACTOR static Future getCurrentVersion_impl(RestoreConfig* self, Reference tr) { + state ERestoreState status = wait(self->stateEnum().getD(tr)); + state Version version = -1; + if (status == ERestoreState::RUNNING) { + wait(store(version, self->getApplyBeginVersion(tr))); + } else if (status == ERestoreState::COMPLETED) { + wait(store(version, self->restoreVersion().getD(tr))); + } + return version; + } + + Future getCurrentVersion(Reference tr) { + return getCurrentVersion_impl(this, tr); + } + ACTOR static Future getProgress_impl(RestoreConfig restore, Reference tr); Future getProgress(Reference tr) { return getProgress_impl(*this, tr); } @@ -335,15 +359,17 @@ ACTOR Future RestoreConfig::getProgress_impl(RestoreConfig restore, state Future fileBlocksFinished = restore.fileBlocksFinished().getD(tr); state Future bytesWritten = restore.bytesWritten().getD(tr); state Future status = restore.stateText(tr); + state Future currentVersion = restore.getCurrentVersion(tr); state Future lag = restore.getApplyVersionLag(tr); + state Future firstConsistentVersion = restore.firstConsistentVersion().getD(tr); state Future tag = restore.tag().getD(tr); state Future> lastError = restore.lastError().getD(tr); // restore might no longer be valid after the first wait so make sure it is not needed anymore. state UID uid = restore.getUid(); wait(success(fileCount) && success(fileBlockCount) && success(fileBlocksDispatched) && - success(fileBlocksFinished) && success(bytesWritten) && success(status) && success(lag) && success(tag) && - success(lastError)); + success(fileBlocksFinished) && success(bytesWritten) && success(status) && success(currentVersion) && + success(lag) && success(firstConsistentVersion) && success(tag) && success(lastError)); std::string errstr = "None"; if (lastError.get().second != 0) @@ -360,11 +386,13 @@ ACTOR Future RestoreConfig::getProgress_impl(RestoreConfig restore, .detail("FileBlocksTotal", fileBlockCount.get()) .detail("FileBlocksInProgress", fileBlocksDispatched.get() - fileBlocksFinished.get()) .detail("BytesWritten", bytesWritten.get()) + .detail("CurrentVersion", currentVersion.get()) + .detail("FirstConsistentVersion", firstConsistentVersion.get()) .detail("ApplyLag", lag.get()) .detail("TaskInstance", THIS_ADDR); return format("Tag: %s UID: %s State: %s Blocks: %lld/%lld BlocksInProgress: %lld Files: %lld BytesWritten: " - "%lld ApplyVersionLag: %lld LastError: %s", + "%lld CurrentVersion: %lld FirstConsistentVersion: %lld ApplyVersionLag: %lld LastError: %s", tag.get().c_str(), uid.toString().c_str(), status.get().toString().c_str(), @@ -373,6 +401,8 @@ ACTOR Future RestoreConfig::getProgress_impl(RestoreConfig restore, fileBlocksDispatched.get() - fileBlocksFinished.get(), fileCount.get(), bytesWritten.get(), + currentVersion.get(), + firstConsistentVersion.get(), lag.get(), errstr.c_str()); } @@ -996,7 +1026,7 @@ ACTOR static Future>> getBlockOfShards(ReferencesetOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->setOption(FDBTransactionOptions::LOCK_AWARE); state Standalone> results; - Standalone values = wait(tr->getRange( + RangeResult values = wait(tr->getRange( KeyRangeRef(keyAfter(beginKey.withPrefix(keyServersPrefix)), endKey.withPrefix(keyServersPrefix)), limit)); for (auto& s : values) { @@ -2676,13 +2706,17 @@ struct StartFullBackupTaskFunc : BackupTaskFuncBase { wait(checkTaskVersion(cx, task, StartFullBackupTaskFunc::name, StartFullBackupTaskFunc::version)); state Reference tr(new ReadYourWritesTransaction(cx)); + state BackupConfig config(task); + state Future> partitionedLog; loop { try { tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->setOption(FDBTransactionOptions::LOCK_AWARE); - Version startVersion = wait(tr->getReadVersion()); + partitionedLog = config.partitionedLogEnabled().get(tr); + state Future startVersionFuture = tr->getReadVersion(); + wait(success(partitionedLog) && success(startVersionFuture)); - Params.beginVersion().set(task, startVersion); + Params.beginVersion().set(task, startVersionFuture.get()); break; } catch (Error& e) { wait(tr->onError(e)); @@ -2692,14 +2726,15 @@ struct StartFullBackupTaskFunc : BackupTaskFuncBase { // Check if backup worker is enabled DatabaseConfiguration dbConfig = wait(getDatabaseConfiguration(cx)); state bool backupWorkerEnabled = dbConfig.backupWorkerEnabled; - if (!backupWorkerEnabled) { + if (!backupWorkerEnabled && partitionedLog.get().present() && partitionedLog.get().get()) { + // Change configuration only when we set to use partitioned logs and + // the flag was not set before. wait(success(changeConfig(cx, "backup_worker_enabled:=1", true))); backupWorkerEnabled = true; } // Set the "backupStartedKey" and wait for all backup worker started tr->reset(); - state BackupConfig config(task); loop { state Future watchFuture; try { @@ -2709,7 +2744,7 @@ struct StartFullBackupTaskFunc : BackupTaskFuncBase { state Future> started = tr->get(backupStartedKey); state Future> taskStarted = tr->get(config.allWorkerStarted().key); - state Future> partitionedLog = config.partitionedLogEnabled().get(tr); + partitionedLog = config.partitionedLogEnabled().get(tr); wait(success(started) && success(taskStarted) && success(partitionedLog)); if (!partitionedLog.get().present() || !partitionedLog.get().get()) { @@ -2778,9 +2813,9 @@ struct StartFullBackupTaskFunc : BackupTaskFuncBase { state Reference backupFinished = futureBucket->future(tr); - // Initialize the initial snapshot and create tasks to continually write logs and snapshots - // The initial snapshot has a desired duration of 0, meaning go as fast as possible. - wait(config.initNewSnapshot(tr, 0)); + // Initialize the initial snapshot and create tasks to continually write logs and snapshots. + state Optional initialSnapshotIntervalSeconds = wait(config.initialSnapshotIntervalSeconds().get(tr)); + wait(config.initNewSnapshot(tr, initialSnapshotIntervalSeconds.orDefault(0))); // Using priority 1 for both of these to at least start both tasks soon // Do not add snapshot task if we only want the incremental backup @@ -3545,9 +3580,9 @@ struct RestoreDispatchTaskFunc : RestoreTaskFuncBase { state int64_t remainingInBatch = Params.remainingInBatch().get(task); state bool addingToExistingBatch = remainingInBatch > 0; state Version restoreVersion; - state Future> incrementalBackupOnly = restore.incrementalBackupOnly().get(tr); + state Future> onlyAppyMutationLogs = restore.onlyAppyMutationLogs().get(tr); - wait(store(restoreVersion, restore.restoreVersion().getOrThrow(tr)) && success(incrementalBackupOnly) && + wait(store(restoreVersion, restore.restoreVersion().getOrThrow(tr)) && success(onlyAppyMutationLogs) && checkTaskVersion(tr->getDatabase(), task, name, version)); // If not adding to an existing batch then update the apply mutations end version so the mutations from the @@ -4014,6 +4049,8 @@ struct StartFullRestoreTaskFunc : RestoreTaskFuncBase { state Version beginVersion; state Reference bc; state std::vector ranges; + state bool logsOnly; + state bool inconsistentSnapshotOnly; loop { try { @@ -4021,11 +4058,12 @@ struct StartFullRestoreTaskFunc : RestoreTaskFuncBase { tr->setOption(FDBTransactionOptions::LOCK_AWARE); wait(checkTaskVersion(tr->getDatabase(), task, name, version)); - Optional _beginVersion = wait(restore.beginVersion().get(tr)); - beginVersion = _beginVersion.present() ? _beginVersion.get() : invalidVersion; + wait(store(beginVersion, restore.beginVersion().getD(tr, false, invalidVersion))); wait(store(restoreVersion, restore.restoreVersion().getOrThrow(tr))); wait(store(ranges, restore.getRestoreRangesOrDefault(tr))); + wait(store(logsOnly, restore.onlyAppyMutationLogs().getD(tr, false, false))); + wait(store(inconsistentSnapshotOnly, restore.inconsistentSnapshotOnly().getD(tr, false, false))); wait(taskBucket->keepRunning(tr, task)); @@ -4072,8 +4110,7 @@ struct StartFullRestoreTaskFunc : RestoreTaskFuncBase { } } - Optional _incremental = wait(restore.incrementalBackupOnly().get(tr)); - state bool incremental = _incremental.present() ? _incremental.get() : false; + state Version firstConsistentVersion = invalidVersion; if (beginVersion == invalidVersion) { beginVersion = 0; } @@ -4081,31 +4118,60 @@ struct StartFullRestoreTaskFunc : RestoreTaskFuncBase { for (auto const& r : ranges) { keyRangesFilter.push_back_deep(keyRangesFilter.arena(), KeyRangeRef(r)); } - Optional restorable = - wait(bc->getRestoreSet(restoreVersion, keyRangesFilter, incremental, beginVersion)); - if (!incremental) { - beginVersion = restorable.get().snapshot.beginVersion; - } - + state Optional restorable = + wait(bc->getRestoreSet(restoreVersion, keyRangesFilter, logsOnly, beginVersion)); if (!restorable.present()) throw restore_missing_data(); - // First version for which log data should be applied - Params.firstVersion().set(task, beginVersion); - // Convert the two lists in restorable (logs and ranges) to a single list of RestoreFiles. // Order does not matter, they will be put in order when written to the restoreFileMap below. state std::vector files; - - for (const RangeFile& f : restorable.get().ranges) { - files.push_back({ f.version, f.fileName, true, f.blockSize, f.fileSize }); + if (!logsOnly) { + beginVersion = restorable.get().snapshot.beginVersion; + if (!inconsistentSnapshotOnly) { + for (const RangeFile& f : restorable.get().ranges) { + files.push_back({ f.version, f.fileName, true, f.blockSize, f.fileSize }); + // In a restore with both snapshots and logs, the firstConsistentVersion is the highest version of + // any range file. + firstConsistentVersion = std::max(firstConsistentVersion, f.version); + } + } else { + for (int i = 0; i < restorable.get().ranges.size(); ++i) { + const RangeFile& f = restorable.get().ranges[i]; + files.push_back({ f.version, f.fileName, true, f.blockSize, f.fileSize }); + // In inconsistentSnapshotOnly mode, if all range files have the same version, then it is the + // firstConsistentVersion, otherwise unknown (use -1). + if (i != 0 && f.version != firstConsistentVersion) { + firstConsistentVersion = invalidVersion; + } else { + firstConsistentVersion = f.version; + } + } + } + } else { + // In logs-only (incremental) mode, the firstConsistentVersion should just be restore.beginVersion(). + firstConsistentVersion = beginVersion; } - - if (!CLIENT_KNOBS->RESTORE_IGNORE_LOG_FILES) { + if (!inconsistentSnapshotOnly) { for (const LogFile& f : restorable.get().logs) { files.push_back({ f.beginVersion, f.fileName, false, f.blockSize, f.fileSize, f.endVersion }); } } + // First version for which log data should be applied + Params.firstVersion().set(task, beginVersion); + + tr->reset(); + loop { + try { + tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); + tr->setOption(FDBTransactionOptions::LOCK_AWARE); + restore.firstConsistentVersion().set(tr, firstConsistentVersion); + wait(tr->commit()); + break; + } catch (Error& e) { + wait(tr->onError(e)); + } + } state std::vector::iterator start = files.begin(); state std::vector::iterator end = files.end(); @@ -4179,7 +4245,7 @@ struct StartFullRestoreTaskFunc : RestoreTaskFuncBase { tr, taskBucket, task, 0, "", 0, CLIENT_KNOBS->RESTORE_DISPATCH_BATCH_SIZE))); wait(taskBucket->finish(tr, task)); - state Future> logsOnly = restore.incrementalBackupOnly().get(tr); + state Future> logsOnly = restore.onlyAppyMutationLogs().get(tr); wait(success(logsOnly)); if (logsOnly.get().present() && logsOnly.get().get()) { // If this is an incremental restore, we need to set the applyMutationsMapPrefix @@ -4443,6 +4509,7 @@ public: ACTOR static Future submitBackup(FileBackupAgent* backupAgent, Reference tr, Key outContainer, + int initialSnapshotIntervalSeconds, int snapshotIntervalSeconds, std::string tagName, Standalone> backupRanges, @@ -4523,7 +4590,7 @@ public: state Key destUidValue(BinaryWriter::toValue(uid, Unversioned())); if (normalizedRanges.size() == 1) { - Standalone existingDestUidValues = wait( + RangeResult existingDestUidValues = wait( tr->getRange(KeyRangeRef(destUidLookupPrefix, strinc(destUidLookupPrefix)), CLIENT_KNOBS->TOO_MANY)); bool found = false; for (auto it : existingDestUidValues) { @@ -4558,6 +4625,7 @@ public: config.backupContainer().set(tr, bc); config.stopWhenDone().set(tr, stopWhenDone); config.backupRanges().set(tr, normalizedRanges); + config.initialSnapshotIntervalSeconds().set(tr, initialSnapshotIntervalSeconds); config.snapshotIntervalSeconds().set(tr, snapshotIntervalSeconds); config.partitionedLogEnabled().set(tr, partitionedLog); config.incrementalBackupOnly().set(tr, incrementalBackupOnly); @@ -4577,7 +4645,8 @@ public: Key addPrefix, Key removePrefix, bool lockDB, - bool incrementalBackupOnly, + bool onlyAppyMutationLogs, + bool inconsistentSnapshotOnly, Version beginVersion, UID uid) { KeyRangeMap restoreRangeSet; @@ -4591,8 +4660,9 @@ public: restoreRanges.push_back(KeyRange(KeyRangeRef(restoreRange.range().begin, restoreRange.range().end))); } } - for (auto& restoreRange : restoreRanges) - ASSERT(restoreRange.contains(removePrefix) || removePrefix.size() == 0); + for (auto& restoreRange : restoreRanges) { + ASSERT(restoreRange.begin.startsWith(removePrefix) && restoreRange.end.startsWith(removePrefix)); + } tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->setOption(FDBTransactionOptions::LOCK_AWARE); @@ -4627,8 +4697,8 @@ public: KeyRange restoreIntoRange = KeyRangeRef(restoreRanges[index].begin, restoreRanges[index].end) .removePrefix(removePrefix) .withPrefix(addPrefix); - Standalone existingRows = wait(tr->getRange(restoreIntoRange, 1)); - if (existingRows.size() > 0 && !incrementalBackupOnly) { + RangeResult existingRows = wait(tr->getRange(restoreIntoRange, 1)); + if (existingRows.size() > 0 && !onlyAppyMutationLogs) { throw restore_destination_not_empty(); } } @@ -4645,7 +4715,8 @@ public: restore.sourceContainer().set(tr, bc); restore.stateEnum().set(tr, ERestoreState::QUEUED); restore.restoreVersion().set(tr, restoreVersion); - restore.incrementalBackupOnly().set(tr, incrementalBackupOnly); + restore.onlyAppyMutationLogs().set(tr, onlyAppyMutationLogs); + restore.inconsistentSnapshotOnly().set(tr, inconsistentSnapshotOnly); restore.beginVersion().set(tr, beginVersion); if (BUGGIFY && restoreRanges.size() == 1) { restore.restoreRange().set(tr, restoreRanges[0]); @@ -5184,7 +5255,8 @@ public: } ACTOR static Future> getLastRestorable(FileBackupAgent* backupAgent, - Reference tr, Key tagName, + Reference tr, + Key tagName, bool snapshot) { tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->setOption(FDBTransactionOptions::LOCK_AWARE); @@ -5218,7 +5290,9 @@ public: // removePrefix: for each key to be restored, remove this prefix first. // lockDB: if set lock the database with randomUid before performing restore; // otherwise, check database is locked with the randomUid - // incrementalBackupOnly: only perform incremental backup + // onlyAppyMutationLogs: only perform incremental restore, by only applying mutation logs + // inconsistentSnapshotOnly: Ignore mutation log files during the restore to speedup the process. + // When set to true, gives an inconsistent snapshot, thus not recommended // beginVersion: restore's begin version // randomUid: the UID for lock the database ACTOR static Future restore(FileBackupAgent* backupAgent, @@ -5233,9 +5307,15 @@ public: Key addPrefix, Key removePrefix, bool lockDB, - bool incrementalBackupOnly, + bool onlyAppyMutationLogs, + bool inconsistentSnapshotOnly, Version beginVersion, UID randomUid) { + // The restore command line tool won't allow ranges to be empty, but correctness workloads somehow might. + if (ranges.empty()) { + throw restore_error(); + } + state Reference bc = IBackupContainer::openContainer(url.toString()); state BackupDescription desc = wait(bc->describeBackup(true)); @@ -5247,12 +5327,12 @@ public: if (targetVersion == invalidVersion && desc.maxRestorableVersion.present()) targetVersion = desc.maxRestorableVersion.get(); - if (targetVersion == invalidVersion && incrementalBackupOnly && desc.contiguousLogEnd.present()) { + if (targetVersion == invalidVersion && onlyAppyMutationLogs && desc.contiguousLogEnd.present()) { targetVersion = desc.contiguousLogEnd.get() - 1; } Optional restoreSet = - wait(bc->getRestoreSet(targetVersion, ranges, incrementalBackupOnly, beginVersion)); + wait(bc->getRestoreSet(targetVersion, ranges, onlyAppyMutationLogs, beginVersion)); if (!restoreSet.present()) { TraceEvent(SevWarn, "FileBackupAgentRestoreNotPossible") @@ -5284,7 +5364,8 @@ public: addPrefix, removePrefix, lockDB, - incrementalBackupOnly, + onlyAppyMutationLogs, + inconsistentSnapshotOnly, beginVersion, randomUid)); wait(tr->commit()); @@ -5440,6 +5521,7 @@ public: removePrefix, true, false, + false, invalidVersion, randomUid)); return ver; @@ -5500,7 +5582,8 @@ Future FileBackupAgent::restore(Database cx, Key addPrefix, Key removePrefix, bool lockDB, - bool incrementalBackupOnly, + bool onlyAppyMutationLogs, + bool inconsistentSnapshotOnly, Version beginVersion) { return FileBackupAgentImpl::restore(this, cx, @@ -5514,7 +5597,8 @@ Future FileBackupAgent::restore(Database cx, addPrefix, removePrefix, lockDB, - incrementalBackupOnly, + onlyAppyMutationLogs, + inconsistentSnapshotOnly, beginVersion, deterministicRandom()->randomUniqueID()); } @@ -5545,6 +5629,7 @@ Future FileBackupAgent::waitRestore(Database cx, Key tagName, boo Future FileBackupAgent::submitBackup(Reference tr, Key outContainer, + int initialSnapshotIntervalSeconds, int snapshotIntervalSeconds, std::string tagName, Standalone> backupRanges, @@ -5554,6 +5639,7 @@ Future FileBackupAgent::submitBackup(Reference return FileBackupAgentImpl::submitBackup(this, tr, outContainer, + initialSnapshotIntervalSeconds, snapshotIntervalSeconds, tagName, backupRanges, @@ -5578,7 +5664,8 @@ Future FileBackupAgent::getStatusJSON(Database cx, std::string tagN return FileBackupAgentImpl::getStatusJSON(this, cx, tagName); } -Future> FileBackupAgent::getLastRestorable(Reference tr, Key tagName, +Future> FileBackupAgent::getLastRestorable(Reference tr, + Key tagName, bool snapshot) { return FileBackupAgentImpl::getLastRestorable(this, tr, tagName, snapshot); } @@ -5660,7 +5747,7 @@ ACTOR static Future writeKVs(Database cx, Standalone readKVs = wait(tr.getRange(KeyRangeRef(k1, k2), CLIENT_KNOBS->TOO_MANY)); + RangeResult readKVs = wait(tr.getRange(KeyRangeRef(k1, k2), CLIENT_KNOBS->TOO_MANY)); ASSERT(readKVs.size() > 0 || begin == end); break; } catch (Error& e) { @@ -5692,7 +5779,7 @@ ACTOR static Future transformDatabaseContents(Database cx, tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr.setOption(FDBTransactionOptions::LOCK_AWARE); for (i = 0; i < restoreRanges.size(); ++i) { - Standalone kvs = wait(tr.getRange(restoreRanges[i], CLIENT_KNOBS->TOO_MANY)); + RangeResult kvs = wait(tr.getRange(restoreRanges[i], CLIENT_KNOBS->TOO_MANY)); ASSERT(!kvs.more); for (auto kv : kvs) { oldData.push_back_deep(oldData.arena(), KeyValueRef(kv.key, kv.value)); @@ -5759,7 +5846,7 @@ ACTOR static Future transformDatabaseContents(Database cx, try { tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr.setOption(FDBTransactionOptions::LOCK_AWARE); - Standalone emptyData = wait(tr.getRange(normalKeys, CLIENT_KNOBS->TOO_MANY)); + RangeResult emptyData = wait(tr.getRange(normalKeys, CLIENT_KNOBS->TOO_MANY)); for (int i = 0; i < emptyData.size(); ++i) { TraceEvent(SevError, "ExpectEmptyData") .detail("Index", i) @@ -5797,7 +5884,7 @@ ACTOR static Future transformDatabaseContents(Database cx, try { tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr.setOption(FDBTransactionOptions::LOCK_AWARE); - Standalone allData = wait(tr.getRange(normalKeys, CLIENT_KNOBS->TOO_MANY)); + RangeResult allData = wait(tr.getRange(normalKeys, CLIENT_KNOBS->TOO_MANY)); TraceEvent(SevFRTestInfo, "SanityCheckData").detail("Size", allData.size()); for (int i = 0; i < allData.size(); ++i) { std::pair backupRestoreValid = insideValidRange(allData[i], restoreRanges, backupRanges); diff --git a/fdbclient/GlobalConfig.actor.cpp b/fdbclient/GlobalConfig.actor.cpp new file mode 100644 index 0000000000..5fa901df0e --- /dev/null +++ b/fdbclient/GlobalConfig.actor.cpp @@ -0,0 +1,229 @@ +/* + * GlobalConfig.actor.cpp + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2021 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fdbclient/DatabaseContext.h" +#include "fdbclient/GlobalConfig.actor.h" +#include "fdbclient/SpecialKeySpace.actor.h" +#include "fdbclient/SystemData.h" +#include "fdbclient/Tuple.h" +#include "flow/flow.h" +#include "flow/genericactors.actor.h" + +#include "flow/actorcompiler.h" // This must be the last #include. + +const KeyRef fdbClientInfoTxnSampleRate = LiteralStringRef("config/fdb_client_info/client_txn_sample_rate"); +const KeyRef fdbClientInfoTxnSizeLimit = LiteralStringRef("config/fdb_client_info/client_txn_size_limit"); + +const KeyRef transactionTagSampleRate = LiteralStringRef("config/transaction_tag_sample_rate"); +const KeyRef transactionTagSampleCost = LiteralStringRef("config/transaction_tag_sample_cost"); + +GlobalConfig::GlobalConfig() : lastUpdate(0) {} + +void GlobalConfig::create(DatabaseContext* cx, Reference> dbInfo) { + if (g_network->global(INetwork::enGlobalConfig) == nullptr) { + auto config = new GlobalConfig{}; + config->cx = Database(cx); + g_network->setGlobal(INetwork::enGlobalConfig, config); + config->_updater = updater(config, dbInfo); + } +} + +GlobalConfig& GlobalConfig::globalConfig() { + void* res = g_network->global(INetwork::enGlobalConfig); + ASSERT(res); + return *reinterpret_cast(res); +} + +Key GlobalConfig::prefixedKey(KeyRef key) { + return key.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::GLOBALCONFIG).begin); +} + +const Reference GlobalConfig::get(KeyRef name) { + auto it = data.find(name); + if (it == data.end()) { + return Reference(); + } + return it->second; +} + +const std::map> GlobalConfig::get(KeyRangeRef range) { + std::map> results; + for (const auto& [key, value] : data) { + if (range.contains(key)) { + results[key] = value; + } + } + return results; +} + +Future GlobalConfig::onInitialized() { + return initialized.getFuture(); +} + +void GlobalConfig::insert(KeyRef key, ValueRef value) { + data.erase(key); + + Arena arena(key.expectedSize() + value.expectedSize()); + KeyRef stableKey = KeyRef(arena, key); + try { + std::any any; + Tuple t = Tuple::unpack(value); + if (t.getType(0) == Tuple::ElementType::UTF8) { + any = StringRef(arena, t.getString(0).contents()); + } else if (t.getType(0) == Tuple::ElementType::INT) { + any = t.getInt(0); + } else if (t.getType(0) == Tuple::ElementType::FLOAT) { + any = t.getFloat(0); + } else if (t.getType(0) == Tuple::ElementType::DOUBLE) { + any = t.getDouble(0); + } else { + ASSERT(false); + } + data[stableKey] = makeReference(std::move(arena), std::move(any)); + } catch (Error& e) { + TraceEvent("GlobalConfigTupleParseError").detail("What", e.what()); + } +} + +void GlobalConfig::erase(KeyRef key) { + data.erase(key); +} + +void GlobalConfig::erase(KeyRangeRef range) { + auto it = data.begin(); + while (it != data.end()) { + if (range.contains(it->first)) { + it = data.erase(it); + } else { + ++it; + } + } +} + +// Older FDB versions used different keys for client profiling data. This +// function performs a one-time migration of data in these keys to the new +// global configuration key space. +ACTOR Future GlobalConfig::migrate(GlobalConfig* self) { + state Reference tr = makeReference(self->cx); + tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); + + state Key migratedKey("\xff\x02/fdbClientInfo/migrated/"_sr); + state Optional migrated = wait(tr->get(migratedKey)); + if (migrated.present()) { + // Already performed migration. + return Void(); + } + + state Optional sampleRate = wait(tr->get(Key("\xff\x02/fdbClientInfo/client_txn_sample_rate/"_sr))); + state Optional sizeLimit = wait(tr->get(Key("\xff\x02/fdbClientInfo/client_txn_size_limit/"_sr))); + + loop { + try { + tr->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES); + // The value doesn't matter too much, as long as the key is set. + tr->set(migratedKey.contents(), "1"_sr); + if (sampleRate.present()) { + const double sampleRateDbl = + BinaryReader::fromStringRef(sampleRate.get().contents(), Unversioned()); + Tuple rate = Tuple().appendDouble(sampleRateDbl); + tr->set(GlobalConfig::prefixedKey(fdbClientInfoTxnSampleRate), rate.pack()); + } + if (sizeLimit.present()) { + const int64_t sizeLimitInt = + BinaryReader::fromStringRef(sizeLimit.get().contents(), Unversioned()); + Tuple size = Tuple().append(sizeLimitInt); + tr->set(GlobalConfig::prefixedKey(fdbClientInfoTxnSizeLimit), size.pack()); + } + + wait(tr->commit()); + return Void(); + } catch (Error& e) { + throw; + } + } +} + +// Updates local copy of global configuration by reading the entire key-range +// from storage. +ACTOR Future GlobalConfig::refresh(GlobalConfig* self) { + self->data.clear(); + + Transaction tr(self->cx); + RangeResult result = wait(tr.getRange(globalConfigDataKeys, CLIENT_KNOBS->TOO_MANY)); + for (const auto& kv : result) { + KeyRef systemKey = kv.key.removePrefix(globalConfigKeysPrefix); + self->insert(systemKey, kv.value); + } + return Void(); +} + +// Applies updates to the local copy of the global configuration when this +// process receives an updated history. +ACTOR Future GlobalConfig::updater(GlobalConfig* self, Reference> dbInfo) { + wait(self->migrate(self)); + + wait(self->refresh(self)); + self->initialized.send(Void()); + + loop { + try { + wait(dbInfo->onChange()); + + auto& history = dbInfo->get().history; + if (history.size() == 0) { + continue; + } + + if (self->lastUpdate < history[0].version) { + // This process missed too many global configuration + // history updates or the protocol version changed, so it + // must re-read the entire configuration range. + wait(self->refresh(self)); + if (dbInfo->get().history.size() > 0) { + self->lastUpdate = dbInfo->get().history.back().version; + } + } else { + // Apply history in order, from lowest version to highest + // version. Mutation history should already be stored in + // ascending version order. + for (const auto& vh : history) { + if (vh.version <= self->lastUpdate) { + continue; // already applied this mutation + } + + for (const auto& mutation : vh.mutations.contents()) { + if (mutation.type == MutationRef::SetValue) { + self->insert(mutation.param1, mutation.param2); + } else if (mutation.type == MutationRef::ClearRange) { + self->erase(KeyRangeRef(mutation.param1, mutation.param2)); + } else { + ASSERT(false); + } + } + + ASSERT(vh.version > self->lastUpdate); + self->lastUpdate = vh.version; + } + } + } catch (Error& e) { + throw; + } + } +} diff --git a/fdbclient/GlobalConfig.actor.h b/fdbclient/GlobalConfig.actor.h new file mode 100644 index 0000000000..5c3693f450 --- /dev/null +++ b/fdbclient/GlobalConfig.actor.h @@ -0,0 +1,146 @@ +/* + * GlobalConfig.actor.h + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2021 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#if defined(NO_INTELLISENSE) && !defined(FDBCLIENT_GLOBALCONFIG_ACTOR_G_H) +#define FDBCLIENT_GLOBALCONFIG_ACTOR_G_H +#include "fdbclient/GlobalConfig.actor.g.h" +#elif !defined(FDBCLIENT_GLOBALCONFIG_ACTOR_H) +#define FDBCLIENT_GLOBALCONFIG_ACTOR_H + +#include +#include +#include +#include + +#include "fdbclient/CommitProxyInterface.h" +#include "fdbclient/GlobalConfig.h" +#include "fdbclient/ReadYourWrites.h" + +#include "flow/actorcompiler.h" // has to be last include + +// The global configuration is a series of typed key-value pairs synced to all +// nodes (server and client) in an FDB cluster in an eventually consistent +// manner. Only small key-value pairs should be stored in global configuration; +// an excessive amount of data can cause synchronization slowness. + +// Keys +extern const KeyRef fdbClientInfoTxnSampleRate; +extern const KeyRef fdbClientInfoTxnSizeLimit; + +extern const KeyRef transactionTagSampleRate; +extern const KeyRef transactionTagSampleCost; + +// Structure used to hold the values stored by global configuration. The arena +// is used as memory to store both the key and the value (the value is only +// stored in the arena if it is an object; primitives are just copied). +struct ConfigValue : ReferenceCounted { + Arena arena; + std::any value; + + ConfigValue() {} + ConfigValue(Arena&& a, std::any&& v) : arena(a), value(v) {} +}; + +class GlobalConfig : NonCopyable { +public: + // Creates a GlobalConfig singleton, accessed by calling GlobalConfig(). + // This function should only be called once by each process (however, it is + // idempotent and calling it multiple times will have no effect). + static void create(DatabaseContext* cx, Reference> dbInfo); + + // Returns a reference to the global GlobalConfig object. Clients should + // call this function whenever they need to read a value out of the global + // configuration. + static GlobalConfig& globalConfig(); + + // Use this function to turn a global configuration key defined above into + // the full path needed to set the value in the database. + // + // For example, given "config/a", returns "\xff\xff/global_config/config/a". + static Key prefixedKey(KeyRef key); + + // Get a value from the framework. Values are returned as a ConfigValue + // reference which also contains the arena holding the object. As long as + // the caller keeps the ConfigValue reference, the value is guaranteed to + // be readable. An empty reference is returned if the value does not exist. + const Reference get(KeyRef name); + const std::map> get(KeyRangeRef range); + + // For arithmetic value types, returns a copy of the value for the given + // key, or the supplied default value if the framework does not know about + // the key. + template {}, bool>::type = true> + const T get(KeyRef name, T defaultVal) { + try { + auto configValue = get(name); + if (configValue.isValid()) { + if (configValue->value.has_value()) { + return std::any_cast(configValue->value); + } + } + + return defaultVal; + } catch (Error& e) { + throw; + } + } + + // Trying to write into the global configuration keyspace? To write data, + // submit a transaction to \xff\xff/global_config/ with + // encoded using the FDB tuple typecodes. Use the helper + // function `prefixedKey` to correctly prefix your global configuration + // key. + + // Triggers the returned future when the global configuration singleton has + // been created and is ready. + Future onInitialized(); + +private: + GlobalConfig(); + + // The functions below only affect the local copy of the global + // configuration keyspace! To insert or remove values across all nodes you + // must use a transaction (see the note above). + + // Inserts the given key-value pair into the local copy of the global + // configuration keyspace, overwriting the old key-value pair if it exists. + // `value` must be encoded using the FDB tuple typecodes. + void insert(KeyRef key, ValueRef value); + // Removes the given key (and associated value) from the local copy of the + // global configuration keyspace. + void erase(KeyRef key); + // Removes the given key range (and associated values) from the local copy + // of the global configuration keyspace. + void erase(KeyRangeRef range); + + ACTOR static Future migrate(GlobalConfig* self); + ACTOR static Future refresh(GlobalConfig* self); + ACTOR static Future updater(GlobalConfig* self, Reference> dbInfo); + + Database cx; + Future _updater; + Promise initialized; + std::unordered_map> data; + Version lastUpdate; +}; + +#endif diff --git a/fdbclient/GlobalConfig.h b/fdbclient/GlobalConfig.h new file mode 100644 index 0000000000..f68ea2361e --- /dev/null +++ b/fdbclient/GlobalConfig.h @@ -0,0 +1,45 @@ +/* + * GlobalConfig.h + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2021 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "fdbclient/CommitTransaction.h" +#include "fdbclient/FDBTypes.h" + +// Used to store a list of mutations made to the global configuration at a +// specific version. +struct VersionHistory { + constexpr static FileIdentifier file_identifier = 5863456; + + VersionHistory() {} + VersionHistory(Version v) : version(v) {} + + Version version; + Standalone> mutations; + + bool operator<(const VersionHistory& other) const { return version < other.version; } + + int expectedSize() const { return sizeof(version) + mutations.expectedSize(); } + + template + void serialize(Ar& ar) { + serializer(ar, mutations, version); + } +}; diff --git a/fdbclient/IClientApi.h b/fdbclient/IClientApi.h index 6f3ad07cd1..3017a1f3f3 100644 --- a/fdbclient/IClientApi.h +++ b/fdbclient/IClientApi.h @@ -20,7 +20,6 @@ #ifndef FDBCLIENT_ICLIENTAPI_H #define FDBCLIENT_ICLIENTAPI_H -#include "fdbclient/ManagementAPI.actor.h" #pragma once #include "fdbclient/FDBOptions.g.h" @@ -28,6 +27,7 @@ #include "flow/ThreadHelper.actor.h" +// An interface that represents a transaction created by a client class ITransaction { public: virtual ~ITransaction() {} @@ -41,24 +41,24 @@ public: // until the ThreadFuture's ThreadSingleAssignmentVar has its memory released or it is destroyed. virtual ThreadFuture> get(const KeyRef& key, bool snapshot = false) = 0; virtual ThreadFuture getKey(const KeySelectorRef& key, bool snapshot = false) = 0; - virtual ThreadFuture> getRange(const KeySelectorRef& begin, - const KeySelectorRef& end, - int limit, - bool snapshot = false, - bool reverse = false) = 0; - virtual ThreadFuture> getRange(const KeySelectorRef& begin, - const KeySelectorRef& end, - GetRangeLimits limits, - bool snapshot = false, - bool reverse = false) = 0; - virtual ThreadFuture> getRange(const KeyRangeRef& keys, - int limit, - bool snapshot = false, - bool reverse = false) = 0; - virtual ThreadFuture> getRange(const KeyRangeRef& keys, - GetRangeLimits limits, - bool snapshot = false, - bool reverse = false) = 0; + virtual ThreadFuture getRange(const KeySelectorRef& begin, + const KeySelectorRef& end, + int limit, + bool snapshot = false, + bool reverse = false) = 0; + virtual ThreadFuture getRange(const KeySelectorRef& begin, + const KeySelectorRef& end, + GetRangeLimits limits, + bool snapshot = false, + bool reverse = false) = 0; + virtual ThreadFuture getRange(const KeyRangeRef& keys, + int limit, + bool snapshot = false, + bool reverse = false) = 0; + virtual ThreadFuture getRange(const KeyRangeRef& keys, + GetRangeLimits limits, + bool snapshot = false, + bool reverse = false) = 0; virtual ThreadFuture>> getAddressesForKey(const KeyRef& key) = 0; virtual ThreadFuture> getVersionstamp() = 0; @@ -90,6 +90,7 @@ public: virtual void delref() = 0; }; +// An interface that represents a connection to a cluster made by a client class IDatabase { public: virtual ~IDatabase() {} @@ -98,6 +99,12 @@ public: virtual void setOption(FDBDatabaseOptions::Option option, Optional value = Optional()) = 0; virtual double getMainThreadBusyness() = 0; + // Returns the protocol version reported by the coordinator this client is connected to + // If an expected version is given, the future won't return until the protocol version is different than expected + // Note: this will never return if the server is running a protocol from FDB 5.0 or older + virtual ThreadFuture getServerProtocol( + Optional expectedVersion = Optional()) = 0; + virtual void addref() = 0; virtual void delref() = 0; @@ -110,13 +117,16 @@ public: virtual ThreadFuture createSnapshot(const StringRef& uid, const StringRef& snapshot_command) = 0; }; +// An interface that presents the top-level FDB client API as exposed through the C bindings +// +// This interface and its associated objects are intended to live outside the network thread, so its asynchronous +// operations use ThreadFutures and implementations should be thread safe. class IClientApi { public: virtual ~IClientApi() {} virtual void selectApiVersion(int apiVersion) = 0; virtual const char* getClientVersion() = 0; - virtual ThreadFuture getServerProtocol(const char* clusterFilePath) = 0; virtual void setNetworkOption(FDBNetworkOptions::Option option, Optional value = Optional()) = 0; diff --git a/fdbclient/KeyBackedTypes.h b/fdbclient/KeyBackedTypes.h index dd6623e4ef..f92324e4ab 100644 --- a/fdbclient/KeyBackedTypes.h +++ b/fdbclient/KeyBackedTypes.h @@ -280,7 +280,7 @@ public: return map( tr->getRange( KeyRangeRef(s.pack(Codec::pack(begin)), endKey), GetRangeLimits(limit), snapshot, reverse), - [s](Standalone const& kvs) -> PairsType { + [s](RangeResult const& kvs) -> PairsType { PairsType results; for (int i = 0; i < kvs.size(); ++i) { KeyType key = Codec::unpack(s.unpack(kvs[i].key)); @@ -344,7 +344,7 @@ public: Key endKey = end.present() ? s.pack(Codec::pack(end.get())) : space.range().end; return map( tr->getRange(KeyRangeRef(s.pack(Codec::pack(begin)), endKey), GetRangeLimits(limit), snapshot), - [s](Standalone const& kvs) -> Values { + [s](RangeResult const& kvs) -> Values { Values results; for (int i = 0; i < kvs.size(); ++i) { results.push_back(Codec::unpack(s.unpack(kvs[i].key))); diff --git a/fdbclient/KeyRangeMap.actor.cpp b/fdbclient/KeyRangeMap.actor.cpp index 67992e3e95..7b7dcdf1e3 100644 --- a/fdbclient/KeyRangeMap.actor.cpp +++ b/fdbclient/KeyRangeMap.actor.cpp @@ -35,7 +35,7 @@ void KeyRangeActorMap::getRangesAffectedByInsertion(const KeyRangeRef& keys, vec affectedRanges.push_back(KeyRangeRef(keys.end, e.end())); } -Standalone krmDecodeRanges(KeyRef mapPrefix, KeyRange keys, Standalone kv) { +RangeResult krmDecodeRanges(KeyRef mapPrefix, KeyRange keys, RangeResult kv) { ASSERT(!kv.more || kv.size() > 1); KeyRange withPrefix = KeyRangeRef(mapPrefix.toString() + keys.begin.toString(), mapPrefix.toString() + keys.end.toString()); @@ -46,7 +46,7 @@ Standalone krmDecodeRanges(KeyRef mapPrefix, KeyRange keys, Stan if (kv.size() && kv.end()[-1].key.startsWith(mapPrefix)) endValue = kv.end()[-1].value; - Standalone result; + RangeResult result; result.arena().dependsOn(kv.arena()); result.arena().dependsOn(keys.arena()); @@ -67,34 +67,28 @@ Standalone krmDecodeRanges(KeyRef mapPrefix, KeyRange keys, Stan } // Returns keys.begin, all transitional points in keys, and keys.end, and their values -ACTOR Future> krmGetRanges(Transaction* tr, - Key mapPrefix, - KeyRange keys, - int limit, - int limitBytes) { +ACTOR Future krmGetRanges(Transaction* tr, Key mapPrefix, KeyRange keys, int limit, int limitBytes) { KeyRange withPrefix = KeyRangeRef(mapPrefix.toString() + keys.begin.toString(), mapPrefix.toString() + keys.end.toString()); state GetRangeLimits limits(limit, limitBytes); limits.minRows = 2; - Standalone kv = - wait(tr->getRange(lastLessOrEqual(withPrefix.begin), firstGreaterThan(withPrefix.end), limits)); + RangeResult kv = wait(tr->getRange(lastLessOrEqual(withPrefix.begin), firstGreaterThan(withPrefix.end), limits)); return krmDecodeRanges(mapPrefix, keys, kv); } -ACTOR Future> krmGetRanges(Reference tr, - Key mapPrefix, - KeyRange keys, - int limit, - int limitBytes) { +ACTOR Future krmGetRanges(Reference tr, + Key mapPrefix, + KeyRange keys, + int limit, + int limitBytes) { KeyRange withPrefix = KeyRangeRef(mapPrefix.toString() + keys.begin.toString(), mapPrefix.toString() + keys.end.toString()); state GetRangeLimits limits(limit, limitBytes); limits.minRows = 2; - Standalone kv = - wait(tr->getRange(lastLessOrEqual(withPrefix.begin), firstGreaterThan(withPrefix.end), limits)); + RangeResult kv = wait(tr->getRange(lastLessOrEqual(withPrefix.begin), firstGreaterThan(withPrefix.end), limits)); return krmDecodeRanges(mapPrefix, keys, kv); } @@ -125,8 +119,7 @@ void krmSetPreviouslyEmptyRange(CommitTransactionRef& tr, ACTOR Future krmSetRange(Transaction* tr, Key mapPrefix, KeyRange range, Value value) { state KeyRange withPrefix = KeyRangeRef(mapPrefix.toString() + range.begin.toString(), mapPrefix.toString() + range.end.toString()); - Standalone old = - wait(tr->getRange(lastLessOrEqual(withPrefix.end), firstGreaterThan(withPrefix.end), 1, true)); + RangeResult old = wait(tr->getRange(lastLessOrEqual(withPrefix.end), firstGreaterThan(withPrefix.end), 1, true)); Value oldValue; bool hasResult = old.size() > 0 && old[0].key.startsWith(mapPrefix); @@ -147,8 +140,7 @@ ACTOR Future krmSetRange(Transaction* tr, Key mapPrefix, KeyRange range, V ACTOR Future krmSetRange(Reference tr, Key mapPrefix, KeyRange range, Value value) { state KeyRange withPrefix = KeyRangeRef(mapPrefix.toString() + range.begin.toString(), mapPrefix.toString() + range.end.toString()); - Standalone old = - wait(tr->getRange(lastLessOrEqual(withPrefix.end), firstGreaterThan(withPrefix.end), 1, true)); + RangeResult old = wait(tr->getRange(lastLessOrEqual(withPrefix.end), firstGreaterThan(withPrefix.end), 1, true)); Value oldValue; bool hasResult = old.size() > 0 && old[0].key.startsWith(mapPrefix); @@ -182,7 +174,7 @@ static Future krmSetRangeCoalescing_(Transaction* tr, state KeyRange maxWithPrefix = KeyRangeRef(mapPrefix.toString() + maxRange.begin.toString(), mapPrefix.toString() + maxRange.end.toString()); - state vector>> keys; + state vector> keys; keys.push_back(tr->getRange(lastLessThan(withPrefix.begin), firstGreaterOrEqual(withPrefix.begin), 1, true)); keys.push_back(tr->getRange(lastLessOrEqual(withPrefix.end), firstGreaterThan(withPrefix.end) + 1, 2, true)); wait(waitForAll(keys)); diff --git a/fdbclient/KeyRangeMap.h b/fdbclient/KeyRangeMap.h index 38a340c77b..7016dcfc4d 100644 --- a/fdbclient/KeyRangeMap.h +++ b/fdbclient/KeyRangeMap.h @@ -126,16 +126,16 @@ private: // krm*(): KeyRangeMap-like abstraction stored in the database, accessed through Transactions class Transaction; class ReadYourWritesTransaction; -Future> krmGetRanges(Transaction* const& tr, - Key const& mapPrefix, - KeyRange const& keys, - int const& limit = CLIENT_KNOBS->KRM_GET_RANGE_LIMIT, - int const& limitBytes = CLIENT_KNOBS->KRM_GET_RANGE_LIMIT_BYTES); -Future> krmGetRanges(Reference const& tr, - Key const& mapPrefix, - KeyRange const& keys, - int const& limit = CLIENT_KNOBS->KRM_GET_RANGE_LIMIT, - int const& limitBytes = CLIENT_KNOBS->KRM_GET_RANGE_LIMIT_BYTES); +Future krmGetRanges(Transaction* const& tr, + Key const& mapPrefix, + KeyRange const& keys, + int const& limit = CLIENT_KNOBS->KRM_GET_RANGE_LIMIT, + int const& limitBytes = CLIENT_KNOBS->KRM_GET_RANGE_LIMIT_BYTES); +Future krmGetRanges(Reference const& tr, + Key const& mapPrefix, + KeyRange const& keys, + int const& limit = CLIENT_KNOBS->KRM_GET_RANGE_LIMIT, + int const& limitBytes = CLIENT_KNOBS->KRM_GET_RANGE_LIMIT_BYTES); void krmSetPreviouslyEmptyRange(Transaction* tr, const KeyRef& mapPrefix, const KeyRangeRef& keys, @@ -162,7 +162,7 @@ Future krmSetRangeCoalescing(Reference const& t KeyRange const& range, KeyRange const& maxRange, Value const& value); -Standalone krmDecodeRanges(KeyRef mapPrefix, KeyRange keys, Standalone kv); +RangeResult krmDecodeRanges(KeyRef mapPrefix, KeyRange keys, RangeResult kv); template std::vector> KeyRangeMap::getAffectedRangesAfterInsertion( diff --git a/fdbclient/Knobs.cpp b/fdbclient/Knobs.cpp index 3ebaf5bf80..b269926c74 100644 --- a/fdbclient/Knobs.cpp +++ b/fdbclient/Knobs.cpp @@ -50,6 +50,7 @@ void ClientKnobs::initialize(bool randomize) { init( RECOVERY_DELAY_SECONDS_PER_GENERATION, 60.0 ); init( MAX_GENERATIONS, 100 ); init( MAX_GENERATIONS_OVERRIDE, 0 ); + init( MAX_GENERATIONS_SIM, 50 ); //Disable network connections after this many generations in simulation, should be less than RECOVERY_DELAY_START_GENERATION init( COORDINATOR_RECONNECTION_DELAY, 1.0 ); init( CLIENT_EXAMPLE_AMOUNT, 20 ); diff --git a/fdbclient/Knobs.h b/fdbclient/Knobs.h index f1359e4672..66a78cf017 100644 --- a/fdbclient/Knobs.h +++ b/fdbclient/Knobs.h @@ -42,6 +42,7 @@ public: double RECOVERY_DELAY_SECONDS_PER_GENERATION; double MAX_GENERATIONS; double MAX_GENERATIONS_OVERRIDE; + double MAX_GENERATIONS_SIM; double COORDINATOR_RECONNECTION_DELAY; int CLIENT_EXAMPLE_AMOUNT; diff --git a/fdbclient/ManagementAPI.actor.cpp b/fdbclient/ManagementAPI.actor.cpp index 05e1ec95e2..444642de89 100644 --- a/fdbclient/ManagementAPI.actor.cpp +++ b/fdbclient/ManagementAPI.actor.cpp @@ -60,6 +60,13 @@ std::map configForToken(std::string const& mode) { return out; } + if (mode == "tss") { + // Set temporary marker in config map to mark that this is a tss configuration and not a normal storage/log + // configuration. A bit of a hack but reuses the parsing code nicely. + out[p + "istss"] = "1"; + return out; + } + if (mode == "locked") { // Setting this key is interpreted as an instruction to use the normal version-stamp-based mechanism for locking // the database. @@ -119,7 +126,7 @@ std::map configForToken(std::string const& mode) { if ((key == "logs" || key == "commit_proxies" || key == "grv_proxies" || key == "resolvers" || key == "remote_logs" || key == "log_routers" || key == "usable_regions" || - key == "repopulate_anti_quorum") && + key == "repopulate_anti_quorum" || key == "count") && isInteger(value)) { out[p + key] = value; } @@ -134,6 +141,14 @@ std::map configForToken(std::string const& mode) { BinaryWriter::toValue(regionObj, IncludeVersion(ProtocolVersion::withRegionConfiguration())).toString(); } + if (key == "perpetual_storage_wiggle" && isInteger(value)) { + int ppWiggle = atoi(value.c_str()); + if (ppWiggle >= 2 || ppWiggle < 0) { + printf("Error: Only 0 and 1 are valid values of perpetual_storage_wiggle at present.\n"); + return out; + } + out[p + key] = value; + } return out; } @@ -326,6 +341,35 @@ ConfigurationResult buildConfiguration(std::vector const& modeTokens, serializeReplicationPolicy(policyWriter, logPolicy); outConf[p + "log_replication_policy"] = policyWriter.toValue().toString(); } + if (outConf.count(p + "istss")) { + // redo config parameters to be tss config instead of normal config + + // save param values from parsing as a normal config + bool isNew = outConf.count(p + "initialized"); + Optional count; + Optional storageEngine; + if (outConf.count(p + "count")) { + count = Optional(outConf[p + "count"]); + } + if (outConf.count(p + "storage_engine")) { + storageEngine = Optional(outConf[p + "storage_engine"]); + } + + // A new tss setup must have count + storage engine. An adjustment must have at least one. + if ((isNew && (!count.present() || !storageEngine.present())) || + (!isNew && !count.present() && !storageEngine.present())) { + return ConfigurationResult::INCOMPLETE_CONFIGURATION; + } + + // clear map and only reset tss parameters + outConf.clear(); + if (count.present()) { + outConf[p + "tss_count"] = count.get(); + } + if (storageEngine.present()) { + outConf[p + "tss_storage_engine"] = storageEngine.get(); + } + } return ConfigurationResult::SUCCESS; } @@ -357,7 +401,7 @@ ACTOR Future getDatabaseConfiguration(Database cx) { loop { try { tr.setOption(FDBTransactionOptions::LOCK_AWARE); - Standalone res = wait(tr.getRange(configKeys, CLIENT_KNOBS->TOO_MANY)); + RangeResult res = wait(tr.getRange(configKeys, CLIENT_KNOBS->TOO_MANY)); ASSERT(res.size() < CLIENT_KNOBS->TOO_MANY); DatabaseConfiguration config; config.fromKeyValues((VectorRef)res); @@ -407,7 +451,7 @@ ACTOR Future changeConfig(Database cx, std::map> fConfig = tr.getRange(configKeys, CLIENT_KNOBS->TOO_MANY); + state Future fConfig = tr.getRange(configKeys, CLIENT_KNOBS->TOO_MANY); state Future> fWorkers = getWorkers(&tr); wait(success(fConfig) || tooLong); @@ -458,19 +502,19 @@ ACTOR Future changeConfig(Database cx, std::map> fServerList = - (newConfig.regions.size()) ? tr.getRange(serverListKeys, CLIENT_KNOBS->TOO_MANY) - : Future>(); + state Future fServerList = (newConfig.regions.size()) + ? tr.getRange(serverListKeys, CLIENT_KNOBS->TOO_MANY) + : Future(); if (newConfig.usableRegions == 2) { if (oldReplicationUsesDcId) { - state Future> fLocalityList = + state Future fLocalityList = tr.getRange(tagLocalityListKeys, CLIENT_KNOBS->TOO_MANY); wait(success(fLocalityList) || tooLong); if (!fLocalityList.isReady()) { return ConfigurationResult::DATABASE_UNAVAILABLE; } - Standalone localityList = fLocalityList.get(); + RangeResult localityList = fLocalityList.get(); ASSERT(!localityList.more && localityList.size() < CLIENT_KNOBS->TOO_MANY); std::set localityDcIds; @@ -513,7 +557,7 @@ ACTOR Future changeConfig(Database cx, std::map serverList = fServerList.get(); + RangeResult serverList = fServerList.get(); ASSERT(!serverList.more && serverList.size() < CLIENT_KNOBS->TOO_MANY); std::set newDcIds; @@ -988,8 +1032,8 @@ Future changeConfig(Database const& cx, std::string const& } ACTOR Future> getWorkers(Transaction* tr) { - state Future> processClasses = tr->getRange(processClassKeys, CLIENT_KNOBS->TOO_MANY); - state Future> processData = tr->getRange(workerListKeys, CLIENT_KNOBS->TOO_MANY); + state Future processClasses = tr->getRange(processClassKeys, CLIENT_KNOBS->TOO_MANY); + state Future processData = tr->getRange(workerListKeys, CLIENT_KNOBS->TOO_MANY); wait(success(processClasses) && success(processData)); ASSERT(!processClasses.get().more && processClasses.get().size() < CLIENT_KNOBS->TOO_MANY); @@ -1105,6 +1149,7 @@ ACTOR Future> changeQuorumChecker(Transaction* tr, vector>> leaderServers; ClientCoordinators coord(Reference(new ClusterConnectionFile(conn))); + leaderServers.reserve(coord.clientLeaderServers.size()); for (int i = 0; i < coord.clientLeaderServers.size(); i++) leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader, @@ -1188,14 +1233,20 @@ ACTOR Future changeQuorum(Database cx, Reference>> leaderServers; - ClientCoordinators coord(Reference(new ClusterConnectionFile(conn))); + state vector>> leaderServers; + state ClientCoordinators coord(Reference(new ClusterConnectionFile(conn))); + // check if allowed to modify the cluster descriptor + if (!change->getDesiredClusterKeyName().empty()) { + CheckDescriptorMutableReply mutabilityReply = + wait(coord.clientLeaderServers[0].checkDescriptorMutable.getReply(CheckDescriptorMutableRequest())); + if (!mutabilityReply.isMutable) + return CoordinatorsResult::BAD_DATABASE_STATE; + } leaderServers.reserve(coord.clientLeaderServers.size()); for (int i = 0; i < coord.clientLeaderServers.size(); i++) leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader, GetLeaderRequest(coord.clusterKey, UID()), TaskPriority::CoordinationReply)); - choose { when(wait(waitForAll(leaderServers))) {} when(wait(delay(5.0))) { return CoordinatorsResult::COORDINATOR_UNREACHABLE; } @@ -1679,9 +1730,9 @@ ACTOR Future setClass(Database cx, AddressExclusion server, ProcessClass p } ACTOR Future> getExcludedServers(Transaction* tr) { - state Standalone r = wait(tr->getRange(excludedServersKeys, CLIENT_KNOBS->TOO_MANY)); + state RangeResult r = wait(tr->getRange(excludedServersKeys, CLIENT_KNOBS->TOO_MANY)); ASSERT(!r.more && r.size() < CLIENT_KNOBS->TOO_MANY); - state Standalone r2 = wait(tr->getRange(failedServersKeys, CLIENT_KNOBS->TOO_MANY)); + state RangeResult r2 = wait(tr->getRange(failedServersKeys, CLIENT_KNOBS->TOO_MANY)); ASSERT(!r2.more && r2.size() < CLIENT_KNOBS->TOO_MANY); vector exclusions; @@ -1867,7 +1918,7 @@ ACTOR Future checkForExcludingServersTxActor(ReadYourWritesTransaction* tr // recovery // Check that there aren't any storage servers with addresses violating the exclusions - Standalone serverList = wait(tr->getRange(serverListKeys, CLIENT_KNOBS->TOO_MANY)); + RangeResult serverList = wait(tr->getRange(serverListKeys, CLIENT_KNOBS->TOO_MANY)); ASSERT(!serverList.more && serverList.size() < CLIENT_KNOBS->TOO_MANY); state bool ok = true; @@ -1948,7 +1999,7 @@ ACTOR Future waitForFullReplication(Database cx) { tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); tr.setOption(FDBTransactionOptions::LOCK_AWARE); - Standalone confResults = wait(tr.getRange(configKeys, CLIENT_KNOBS->TOO_MANY)); + RangeResult confResults = wait(tr.getRange(configKeys, CLIENT_KNOBS->TOO_MANY)); ASSERT(!confResults.more && confResults.size() < CLIENT_KNOBS->TOO_MANY); state DatabaseConfiguration config; config.fromKeyValues((VectorRef)confResults); @@ -2203,8 +2254,7 @@ ACTOR Future changeCachedRange(Database cx, KeyRangeRef range, bool add) { tr.clear(sysRangeClear); tr.clear(privateRange); tr.addReadConflictRange(privateRange); - Standalone previous = - wait(tr.getRange(KeyRangeRef(storageCachePrefix, sysRange.begin), 1, true)); + RangeResult previous = wait(tr.getRange(KeyRangeRef(storageCachePrefix, sysRange.begin), 1, true)); bool prevIsCached = false; if (!previous.empty()) { std::vector prevVal; @@ -2220,8 +2270,7 @@ ACTOR Future changeCachedRange(Database cx, KeyRangeRef range, bool add) { tr.set(sysRange.begin, trueValue); tr.set(privateRange.begin, serverKeysTrue); } - Standalone after = - wait(tr.getRange(KeyRangeRef(sysRange.end, storageCacheKeys.end), 1, false)); + RangeResult after = wait(tr.getRange(KeyRangeRef(sysRange.end, storageCacheKeys.end), 1, false)); bool afterIsCached = false; if (!after.empty()) { std::vector afterVal; diff --git a/fdbclient/MonitorLeader.actor.cpp b/fdbclient/MonitorLeader.actor.cpp index af563c68b0..86a09ff424 100644 --- a/fdbclient/MonitorLeader.actor.cpp +++ b/fdbclient/MonitorLeader.actor.cpp @@ -380,11 +380,14 @@ ClientCoordinators::ClientCoordinators(Key clusterKey, std::vector> getLeader(const vector monitorLeaderOneGeneration(ReferencegetConnectionString().toString()); + .detail("OldConnStr", info.intermediateConnFile->getConnectionString().toString()) + .trackLatest("MonitorLeaderForwarding"); info.intermediateConnFile = makeReference( connFile->getFilename(), ClusterConnectionString(leader.get().first.serializedInfo.toString())); return info; @@ -757,6 +762,7 @@ void shrinkProxyList(ClientDBInfo& ni, ACTOR Future monitorProxiesOneGeneration( Reference connFile, Reference> clientInfo, + Reference>> coordinator, MonitorLeaderInfo info, Reference>>> supportedVersions, Key traceLogGroup) { @@ -774,6 +780,9 @@ ACTOR Future monitorProxiesOneGeneration( loop { state ClientLeaderRegInterface clientLeaderServer(addrs[idx]); state OpenDatabaseCoordRequest req; + + coordinator->set(clientLeaderServer); + req.clusterKey = cs.clusterKey(); req.coordinators = cs.coordinators(); req.knownClientInfoID = clientInfo->get().id; @@ -840,13 +849,14 @@ ACTOR Future monitorProxiesOneGeneration( ACTOR Future monitorProxies( Reference>> connFile, Reference> clientInfo, + Reference>> coordinator, Reference>>> supportedVersions, Key traceLogGroup) { state MonitorLeaderInfo info(connFile->get()); loop { choose { when(MonitorLeaderInfo _info = wait(monitorProxiesOneGeneration( - connFile->get(), clientInfo, info, supportedVersions, traceLogGroup))) { + connFile->get(), clientInfo, coordinator, info, supportedVersions, traceLogGroup))) { info = _info; } when(wait(connFile->onChange())) { diff --git a/fdbclient/MonitorLeader.h b/fdbclient/MonitorLeader.h index 204b6994f4..b9b195a9da 100644 --- a/fdbclient/MonitorLeader.h +++ b/fdbclient/MonitorLeader.h @@ -76,6 +76,7 @@ Future monitorLeaderForProxies(Value const& key, Future monitorProxies( Reference>> const& connFile, Reference> const& clientInfo, + Reference>> const& coordinator, Reference>>> const& supportedVersions, Key const& traceLogGroup); diff --git a/fdbclient/MultiVersionTransaction.actor.cpp b/fdbclient/MultiVersionTransaction.actor.cpp index ac1855c811..18f7bc71e8 100644 --- a/fdbclient/MultiVersionTransaction.actor.cpp +++ b/fdbclient/MultiVersionTransaction.actor.cpp @@ -89,19 +89,19 @@ ThreadFuture DLTransaction::getKey(const KeySelectorRef& key, bool snapshot }); } -ThreadFuture> DLTransaction::getRange(const KeySelectorRef& begin, - const KeySelectorRef& end, - int limit, - bool snapshot, - bool reverse) { +ThreadFuture DLTransaction::getRange(const KeySelectorRef& begin, + const KeySelectorRef& end, + int limit, + bool snapshot, + bool reverse) { return getRange(begin, end, GetRangeLimits(limit), snapshot, reverse); } -ThreadFuture> DLTransaction::getRange(const KeySelectorRef& begin, - const KeySelectorRef& end, - GetRangeLimits limits, - bool snapshot, - bool reverse) { +ThreadFuture DLTransaction::getRange(const KeySelectorRef& begin, + const KeySelectorRef& end, + GetRangeLimits limits, + bool snapshot, + bool reverse) { FdbCApi::FDBFuture* f = api->transactionGetRange(tr, begin.getKey().begin(), begin.getKey().size(), @@ -117,7 +117,7 @@ ThreadFuture> DLTransaction::getRange(const KeySelect 0, snapshot, reverse); - return toThreadFuture>(api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) { + return toThreadFuture(api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) { const FdbCApi::FDBKeyValue* kvs; int count; FdbCApi::fdb_bool_t more; @@ -125,23 +125,19 @@ ThreadFuture> DLTransaction::getRange(const KeySelect ASSERT(!error); // The memory for this is stored in the FDBFuture and is released when the future gets destroyed - return Standalone(RangeResultRef(VectorRef((KeyValueRef*)kvs, count), more), - Arena()); + return RangeResult(RangeResultRef(VectorRef((KeyValueRef*)kvs, count), more), Arena()); }); } -ThreadFuture> DLTransaction::getRange(const KeyRangeRef& keys, - int limit, - bool snapshot, - bool reverse) { +ThreadFuture DLTransaction::getRange(const KeyRangeRef& keys, int limit, bool snapshot, bool reverse) { return getRange( firstGreaterOrEqual(keys.begin), firstGreaterOrEqual(keys.end), GetRangeLimits(limit), snapshot, reverse); } -ThreadFuture> DLTransaction::getRange(const KeyRangeRef& keys, - GetRangeLimits limits, - bool snapshot, - bool reverse) { +ThreadFuture DLTransaction::getRange(const KeyRangeRef& keys, + GetRangeLimits limits, + bool snapshot, + bool reverse) { return getRange(firstGreaterOrEqual(keys.begin), firstGreaterOrEqual(keys.end), limits, snapshot, reverse); } @@ -289,12 +285,15 @@ void DLTransaction::reset() { // DLDatabase DLDatabase::DLDatabase(Reference api, ThreadFuture dbFuture) : api(api), db(nullptr) { + addref(); ready = mapThreadFuture(dbFuture, [this](ErrorOr db) { if (db.isError()) { + delref(); return ErrorOr(db.getError()); } this->db = db.get(); + delref(); return ErrorOr(Void()); }); } @@ -356,7 +355,33 @@ double DLDatabase::getMainThreadBusyness() { return 0; } +// Returns the protocol version reported by the coordinator this client is connected to +// If an expected version is given, the future won't return until the protocol version is different than expected +// Note: this will never return if the server is running a protocol from FDB 5.0 or older +ThreadFuture DLDatabase::getServerProtocol(Optional expectedVersion) { + ASSERT(api->databaseGetServerProtocol != nullptr); + + uint64_t expected = + expectedVersion.map([](const ProtocolVersion& v) { return v.version(); }).orDefault(0); + FdbCApi::FDBFuture* f = api->databaseGetServerProtocol(db, expected); + return toThreadFuture(api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) { + uint64_t pv; + FdbCApi::fdb_error_t error = api->futureGetUInt64(f, &pv); + ASSERT(!error); + return ProtocolVersion(pv); + }); +} + // DLApi + +// Loads the specified function from a dynamic library +// +// fp - The function pointer where the loaded function will be stored +// lib - The dynamic library where the function is loaded from +// libPath - The path of the dynamic library (used for logging) +// functionName - The function to load +// requireFunction - Determines the behavior if the function is not present. If true, an error is thrown. If false, +// the function pointer will be set to nullptr. template void loadClientFunction(T* fp, void* lib, std::string libPath, const char* functionName, bool requireFunction = true) { *(void**)(fp) = loadFunction(lib, functionName); @@ -403,6 +428,8 @@ void DLApi::init() { fdbCPath, "fdb_database_get_main_thread_busyness", headerVersion >= 700); + loadClientFunction( + &api->databaseGetServerProtocol, lib, fdbCPath, "fdb_database_get_server_protocol", headerVersion >= 700); loadClientFunction(&api->databaseDestroy, lib, fdbCPath, "fdb_database_destroy"); loadClientFunction(&api->databaseRebootWorker, lib, fdbCPath, "fdb_database_reboot_worker", headerVersion >= 700); loadClientFunction(&api->databaseForceRecoveryWithDataLoss, @@ -452,7 +479,7 @@ void DLApi::init() { loadClientFunction( &api->futureGetInt64, lib, fdbCPath, headerVersion >= 620 ? "fdb_future_get_int64" : "fdb_future_get_version"); - loadClientFunction(&api->futureGetUInt64, lib, fdbCPath, "fdb_future_get_uint64"); + loadClientFunction(&api->futureGetUInt64, lib, fdbCPath, "fdb_future_get_uint64", headerVersion >= 700); loadClientFunction(&api->futureGetError, lib, fdbCPath, "fdb_future_get_error"); loadClientFunction(&api->futureGetKey, lib, fdbCPath, "fdb_future_get_key"); loadClientFunction(&api->futureGetValue, lib, fdbCPath, "fdb_future_get_value"); @@ -488,11 +515,6 @@ const char* DLApi::getClientVersion() { return api->getClientVersion(); } -ThreadFuture DLApi::getServerProtocol(const char* clusterFilePath) { - ASSERT(false); - return ThreadFuture(); -} - void DLApi::setNetworkOption(FDBNetworkOptions::Option option, Optional value) { throwIfError(api->setNetworkOption( option, value.present() ? value.get().begin() : nullptr, value.present() ? value.get().size() : 0)); @@ -659,45 +681,45 @@ ThreadFuture MultiVersionTransaction::getKey(const KeySelectorRef& key, boo return abortableFuture(f, tr.onChange); } -ThreadFuture> MultiVersionTransaction::getRange(const KeySelectorRef& begin, - const KeySelectorRef& end, - int limit, - bool snapshot, - bool reverse) { +ThreadFuture MultiVersionTransaction::getRange(const KeySelectorRef& begin, + const KeySelectorRef& end, + int limit, + bool snapshot, + bool reverse) { auto tr = getTransaction(); auto f = tr.transaction ? tr.transaction->getRange(begin, end, limit, snapshot, reverse) - : ThreadFuture>(Never()); + : ThreadFuture(Never()); return abortableFuture(f, tr.onChange); } -ThreadFuture> MultiVersionTransaction::getRange(const KeySelectorRef& begin, - const KeySelectorRef& end, - GetRangeLimits limits, - bool snapshot, - bool reverse) { +ThreadFuture MultiVersionTransaction::getRange(const KeySelectorRef& begin, + const KeySelectorRef& end, + GetRangeLimits limits, + bool snapshot, + bool reverse) { auto tr = getTransaction(); auto f = tr.transaction ? tr.transaction->getRange(begin, end, limits, snapshot, reverse) - : ThreadFuture>(Never()); + : ThreadFuture(Never()); return abortableFuture(f, tr.onChange); } -ThreadFuture> MultiVersionTransaction::getRange(const KeyRangeRef& keys, - int limit, - bool snapshot, - bool reverse) { +ThreadFuture MultiVersionTransaction::getRange(const KeyRangeRef& keys, + int limit, + bool snapshot, + bool reverse) { auto tr = getTransaction(); - auto f = tr.transaction ? tr.transaction->getRange(keys, limit, snapshot, reverse) - : ThreadFuture>(Never()); + auto f = + tr.transaction ? tr.transaction->getRange(keys, limit, snapshot, reverse) : ThreadFuture(Never()); return abortableFuture(f, tr.onChange); } -ThreadFuture> MultiVersionTransaction::getRange(const KeyRangeRef& keys, - GetRangeLimits limits, - bool snapshot, - bool reverse) { +ThreadFuture MultiVersionTransaction::getRange(const KeyRangeRef& keys, + GetRangeLimits limits, + bool snapshot, + bool reverse) { auto tr = getTransaction(); - auto f = tr.transaction ? tr.transaction->getRange(keys, limits, snapshot, reverse) - : ThreadFuture>(Never()); + auto f = + tr.transaction ? tr.transaction->getRange(keys, limits, snapshot, reverse) : ThreadFuture(Never()); return abortableFuture(f, tr.onChange); } @@ -855,35 +877,52 @@ MultiVersionDatabase::MultiVersionDatabase(MultiVersionApi* api, int threadIdx, std::string clusterFilePath, Reference db, + Reference versionMonitorDb, bool openConnectors) - : dbState(new DatabaseState()) { + : dbState(new DatabaseState(clusterFilePath, versionMonitorDb)) { dbState->db = db; dbState->dbVar->set(db); - if (!openConnectors) { - dbState->currentClientIndex = 0; - } else { + if (openConnectors) { if (!api->localClientDisabled) { - dbState->currentClientIndex = 0; - dbState->addConnection(api->getLocalClient(), clusterFilePath); - } else { - dbState->currentClientIndex = -1; + dbState->addClient(api->getLocalClient()); } - api->runOnExternalClients(threadIdx, [this, clusterFilePath](Reference client) { - dbState->addConnection(client, clusterFilePath); + api->runOnExternalClients(threadIdx, [this](Reference client) { dbState->addClient(client); }); + + if (!externalClientsInitialized.test_and_set()) { + api->runOnExternalClientsAllThreads([&clusterFilePath](Reference client) { + // This creates a database to initialize some client state on the external library + // We only do this on 6.2+ clients to avoid some bugs associated with older versions + // This deletes the new database immediately to discard its connections + if (client->protocolVersion.hasCloseUnusedConnection()) { + Reference newDb = client->api->createDatabase(clusterFilePath.c_str()); + } + }); + } + + // For clients older than 6.2 we create and maintain our database connection + api->runOnExternalClients(threadIdx, [this, &clusterFilePath](Reference client) { + if (!client->protocolVersion.hasCloseUnusedConnection()) { + dbState->legacyDatabaseConnections[client->protocolVersion] = + client->api->createDatabase(clusterFilePath.c_str()); + } }); - dbState->startConnections(); + Reference dbStateRef = dbState; + onMainThreadVoid([dbStateRef]() { dbStateRef->protocolVersionMonitor = dbStateRef->monitorProtocolVersion(); }, + nullptr); } } MultiVersionDatabase::~MultiVersionDatabase() { - dbState->cancelConnections(); + dbState->close(); } +// Create a MultiVersionDatabase that wraps an already created IDatabase object +// For internal use in testing Reference MultiVersionDatabase::debugCreateFromExistingDatabase(Reference db) { - return Reference(new MultiVersionDatabase(MultiVersionApi::api, 0, "", db, false)); + return Reference(new MultiVersionDatabase(MultiVersionApi::api, 0, "", db, db, false)); } Reference MultiVersionDatabase::createTransaction() { @@ -941,180 +980,279 @@ double MultiVersionDatabase::getMainThreadBusyness() { return 0; } -void MultiVersionDatabase::Connector::connect() { - addref(); - onMainThreadVoid( - [this]() { - if (!cancelled) { - connected = false; - if (connectionFuture.isValid()) { - connectionFuture.cancel(); - } - - candidateDatabase = client->api->createDatabase(clusterFilePath.c_str()); - if (client->external) { - connectionFuture = candidateDatabase.castTo()->onReady(); - } else { - connectionFuture = ThreadFuture(Void()); - } - - connectionFuture = flatMapThreadFuture(connectionFuture, [this](ErrorOr ready) { - if (ready.isError()) { - return ErrorOr>(ready.getError()); - } - - tr = candidateDatabase->createTransaction(); - return ErrorOr>( - mapThreadFuture(tr->getReadVersion(), [](ErrorOr v) { - // If the version attempt returns an error, we regard that as a connection (except - // operation_cancelled) - if (v.isError() && v.getError().code() == error_code_operation_cancelled) { - return ErrorOr(v.getError()); - } else { - return ErrorOr(Void()); - } - })); - }); - - int userParam; - connectionFuture.callOrSetAsCallback(this, userParam, 0); - } else { - delref(); - } - }, - nullptr); +// Returns the protocol version reported by the coordinator this client is connected to +// If an expected version is given, the future won't return until the protocol version is different than expected +// Note: this will never return if the server is running a protocol from FDB 5.0 or older +ThreadFuture MultiVersionDatabase::getServerProtocol(Optional expectedVersion) { + return dbState->versionMonitorDb->getServerProtocol(expectedVersion); } -// Only called from main thread -void MultiVersionDatabase::Connector::cancel() { - connected = false; - cancelled = true; - if (connectionFuture.isValid()) { - connectionFuture.cancel(); - } -} +MultiVersionDatabase::DatabaseState::DatabaseState(std::string clusterFilePath, Reference versionMonitorDb) + : clusterFilePath(clusterFilePath), versionMonitorDb(versionMonitorDb), + dbVar(new ThreadSafeAsyncVar>(Reference(nullptr))) {} -void MultiVersionDatabase::Connector::fire(const Void& unused, int& userParam) { - onMainThreadVoid( - [this]() { - if (!cancelled) { - connected = true; - dbState->stateChanged(); - } - delref(); - }, - nullptr); -} +// Adds a client (local or externally loaded) that can be used to connect to the cluster +void MultiVersionDatabase::DatabaseState::addClient(Reference client) { + ProtocolVersion baseVersion = client->protocolVersion.normalizedVersion(); + auto [itr, inserted] = clients.insert({ baseVersion, client }); + if (!inserted) { + // SOMEDAY: prefer client with higher release version if protocol versions are compatible + Reference keptClient = itr->second; + Reference discardedClient = client; + if (client->canReplace(itr->second)) { + std::swap(keptClient, discardedClient); + clients[baseVersion] = client; + } + + discardedClient->failed = true; + TraceEvent(SevWarn, "DuplicateClientVersion") + .detail("Keeping", keptClient->libPath) + .detail("KeptProtocolVersion", keptClient->protocolVersion) + .detail("Disabling", discardedClient->libPath) + .detail("DisabledProtocolVersion", discardedClient->protocolVersion); -void MultiVersionDatabase::Connector::error(const Error& e, int& userParam) { - if (e.code() != error_code_operation_cancelled) { - // TODO: is it right to abandon this connection attempt? - client->failed = true; MultiVersionApi::api->updateSupportedVersions(); - TraceEvent(SevError, "DatabaseConnectionError").error(e).detail("ClientLibrary", this->client->libPath); } - delref(); + if (!client->protocolVersion.hasInexpensiveMultiVersionClient() && !client->failed) { + TraceEvent("AddingLegacyVersionMonitor") + .detail("LibPath", client->libPath) + .detail("ProtocolVersion", client->protocolVersion); + + legacyVersionMonitors.emplace_back(new LegacyVersionMonitor(client)); + } } -MultiVersionDatabase::DatabaseState::DatabaseState() - : dbVar(new ThreadSafeAsyncVar>(Reference(nullptr))), currentClientIndex(-1) {} +// Watch the cluster protocol version for changes and update the database state when it does. +// Must be called from the main thread +ThreadFuture MultiVersionDatabase::DatabaseState::monitorProtocolVersion() { + startLegacyVersionMonitors(); -// Only called from main thread -void MultiVersionDatabase::DatabaseState::stateChanged() { - int newIndex = -1; - for (int i = 0; i < clients.size(); ++i) { - if (i != currentClientIndex && connectionAttempts[i]->connected) { - if (currentClientIndex >= 0 && !clients[i]->canReplace(clients[currentClientIndex])) { - TraceEvent(SevWarn, "DuplicateClientVersion") - .detail("Keeping", clients[currentClientIndex]->libPath) - .detail("KeptClientProtocolVersion", clients[currentClientIndex]->protocolVersion.version()) - .detail("Disabling", clients[i]->libPath) - .detail("DisabledClientProtocolVersion", clients[i]->protocolVersion.version()); - connectionAttempts[i]->connected = false; // Permanently disable this client in favor of the current one - clients[i]->failed = true; - MultiVersionApi::api->updateSupportedVersions(); - return; + Optional expected = dbProtocolVersion; + ThreadFuture f = versionMonitorDb->getServerProtocol(dbProtocolVersion); + + Reference self = Reference::addRef(this); + return mapThreadFuture(f, [self, expected](ErrorOr cv) { + if (cv.isError()) { + if (cv.getError().code() == error_code_operation_cancelled) { + return ErrorOr(cv.getError()); } - newIndex = i; - break; + TraceEvent("ErrorGettingClusterProtocolVersion") + .detail("ExpectedProtocolVersion", expected) + .error(cv.getError()); + } + + ProtocolVersion clusterVersion = + !cv.isError() ? cv.get() : self->dbProtocolVersion.orDefault(currentProtocolVersion); + onMainThreadVoid([self, clusterVersion]() { self->protocolVersionChanged(clusterVersion); }, nullptr); + return ErrorOr(Void()); + }); +} + +// Called when a change to the protocol version of the cluster has been detected. +// Must be called from the main thread +void MultiVersionDatabase::DatabaseState::protocolVersionChanged(ProtocolVersion protocolVersion) { + // If the protocol version changed but is still compatible, update our local version but keep the same connection + if (dbProtocolVersion.present() && + protocolVersion.normalizedVersion() == dbProtocolVersion.get().normalizedVersion()) { + dbProtocolVersion = protocolVersion; + + ASSERT(protocolVersionMonitor.isValid()); + protocolVersionMonitor.cancel(); + protocolVersionMonitor = monitorProtocolVersion(); + } + + // The protocol version has changed to a different, incompatible version + else { + TraceEvent("ProtocolVersionChanged") + .detail("NewProtocolVersion", protocolVersion) + .detail("OldProtocolVersion", dbProtocolVersion); + + dbProtocolVersion = protocolVersion; + + auto itr = clients.find(protocolVersion.normalizedVersion()); + if (itr != clients.end()) { + auto& client = itr->second; + TraceEvent("CreatingDatabaseOnClient") + .detail("LibraryPath", client->libPath) + .detail("Failed", client->failed) + .detail("External", client->external); + + Reference newDb = client->api->createDatabase(clusterFilePath.c_str()); + + if (client->external && !MultiVersionApi::apiVersionAtLeast(610)) { + // Old API versions return a future when creating the database, so we need to wait for it + Reference self = Reference::addRef(this); + dbReady = mapThreadFuture( + newDb.castTo()->onReady(), [self, newDb, client](ErrorOr ready) { + if (!ready.isError()) { + onMainThreadVoid([self, newDb, client]() { self->updateDatabase(newDb, client); }, nullptr); + } else { + onMainThreadVoid([self, client]() { self->updateDatabase(Reference(), client); }, + nullptr); + } + + return ready; + }); + } else { + updateDatabase(newDb, client); + } + } else { + // We don't have a client matching the current protocol + updateDatabase(Reference(), Reference()); } } +} - if (newIndex == -1) { - ASSERT_EQ(currentClientIndex, 0); // This can only happen for the local client, which we set as the current - // connection before we know it's connected - return; - } +// Replaces the active database connection with a new one. Must be called from the main thread. +void MultiVersionDatabase::DatabaseState::updateDatabase(Reference newDb, Reference client) { + if (newDb) { + optionLock.enter(); + for (auto option : options) { + try { + // In practice, this will set a deferred error instead of throwing. If that happens, the database + // will be unusable (attempts to use it will throw errors). + newDb->setOption(option.first, option.second.castTo()); + } catch (Error& e) { + optionLock.leave(); - // Restart connection for replaced client - auto newDb = connectionAttempts[newIndex]->candidateDatabase; - - optionLock.enter(); - for (auto option : options) { - try { - newDb->setOption(option.first, - option.second.castTo()); // In practice, this will set a deferred error instead - // of throwing. If that happens, the database will be - // unusable (attempts to use it will throw errors). - } catch (Error& e) { - optionLock.leave(); - TraceEvent(SevError, "ClusterVersionChangeOptionError") - .error(e) - .detail("Option", option.first) - .detail("OptionValue", option.second) - .detail("LibPath", clients[newIndex]->libPath); - connectionAttempts[newIndex]->connected = false; - clients[newIndex]->failed = true; - MultiVersionApi::api->updateSupportedVersions(); - return; // If we can't set all of the options on a cluster, we abandon the client + // If we can't set all of the options on a cluster, we abandon the client + TraceEvent(SevError, "ClusterVersionChangeOptionError") + .error(e) + .detail("Option", option.first) + .detail("OptionValue", option.second) + .detail("LibPath", client->libPath); + client->failed = true; + MultiVersionApi::api->updateSupportedVersions(); + newDb = Reference(); + break; + } } - } - db = newDb; - optionLock.leave(); + db = newDb; + + optionLock.leave(); + + if (dbProtocolVersion.get().hasStableInterfaces() && db) { + versionMonitorDb = db; + } else { + // For older clients that don't have an API to get the protocol version, we have to monitor it locally + versionMonitorDb = MultiVersionApi::api->getLocalClient()->api->createDatabase(clusterFilePath.c_str()); + } + } else { + // We don't have a database connection, so use the local client to monitor the protocol version + db = Reference(); + versionMonitorDb = MultiVersionApi::api->getLocalClient()->api->createDatabase(clusterFilePath.c_str()); + } dbVar->set(db); - if (currentClientIndex >= 0 && connectionAttempts[currentClientIndex]->connected) { - connectionAttempts[currentClientIndex]->connected = false; - connectionAttempts[currentClientIndex]->connect(); - } - - ASSERT(newIndex >= 0 && newIndex < clients.size()); - currentClientIndex = newIndex; + ASSERT(protocolVersionMonitor.isValid()); + protocolVersionMonitor.cancel(); + protocolVersionMonitor = monitorProtocolVersion(); } -void MultiVersionDatabase::DatabaseState::addConnection(Reference client, std::string clusterFilePath) { - clients.push_back(client); - connectionAttempts.push_back( - makeReference(Reference::addRef(this), client, clusterFilePath)); -} - -void MultiVersionDatabase::DatabaseState::startConnections() { - for (auto c : connectionAttempts) { - c->connect(); +// Starts version monitors for old client versions that don't support connect packet monitoring (<= 5.0). +// Must be called from the main thread +void MultiVersionDatabase::DatabaseState::startLegacyVersionMonitors() { + for (auto itr = legacyVersionMonitors.begin(); itr != legacyVersionMonitors.end(); ++itr) { + while (itr != legacyVersionMonitors.end() && (*itr)->client->failed) { + (*itr)->close(); + itr = legacyVersionMonitors.erase(itr); + } + if (itr != legacyVersionMonitors.end() && + (!dbProtocolVersion.present() || (*itr)->client->protocolVersion != dbProtocolVersion.get())) { + (*itr)->startConnectionMonitor(Reference::addRef(this)); + } } } -void MultiVersionDatabase::DatabaseState::cancelConnections() { - addref(); +// Cleans up state for the legacy version monitors to break reference cycles +void MultiVersionDatabase::DatabaseState::close() { + Reference self = Reference::addRef(this); onMainThreadVoid( - [this]() { - for (auto c : connectionAttempts) { - c->cancel(); + [self]() { + if (self->protocolVersionMonitor.isValid()) { + self->protocolVersionMonitor.cancel(); + } + for (auto monitor : self->legacyVersionMonitors) { + monitor->close(); } - connectionAttempts.clear(); - clients.clear(); - delref(); + self->legacyVersionMonitors.clear(); }, nullptr); } -// MultiVersionApi +// Starts the connection monitor by creating a database object at an old version. +// Must be called from the main thread +void MultiVersionDatabase::LegacyVersionMonitor::startConnectionMonitor( + Reference dbState) { + if (!monitorRunning) { + monitorRunning = true; + auto itr = dbState->legacyDatabaseConnections.find(client->protocolVersion); + ASSERT(itr != dbState->legacyDatabaseConnections.end()); + + db = itr->second; + tr = Reference(); + + TraceEvent("StartingLegacyVersionMonitor").detail("ProtocolVersion", client->protocolVersion); + Reference self = Reference::addRef(this); + versionMonitor = + mapThreadFuture(db.castTo()->onReady(), [self, dbState](ErrorOr ready) { + onMainThreadVoid( + [self, ready, dbState]() { + if (ready.isError()) { + if (ready.getError().code() != error_code_operation_cancelled) { + TraceEvent(SevError, "FailedToOpenDatabaseOnClient") + .error(ready.getError()) + .detail("LibPath", self->client->libPath); + + self->client->failed = true; + MultiVersionApi::api->updateSupportedVersions(); + } + } else { + self->runGrvProbe(dbState); + } + }, + nullptr); + + return ready; + }); + } +} + +// Runs a GRV probe on the cluster to determine if the client version is compatible with the cluster. +// Must be called from main thread +void MultiVersionDatabase::LegacyVersionMonitor::runGrvProbe(Reference dbState) { + tr = db->createTransaction(); + Reference self = Reference::addRef(this); + versionMonitor = mapThreadFuture(tr->getReadVersion(), [self, dbState](ErrorOr v) { + // If the version attempt returns an error, we regard that as a connection (except operation_cancelled) + if (!v.isError() || v.getError().code() != error_code_operation_cancelled) { + onMainThreadVoid( + [self, dbState]() { + self->monitorRunning = false; + dbState->protocolVersionChanged(self->client->protocolVersion); + }, + nullptr); + } + + return v.map([](Version v) { return Void(); }); + }); +} + +void MultiVersionDatabase::LegacyVersionMonitor::close() { + if (versionMonitor.isValid()) { + versionMonitor.cancel(); + } +} + +std::atomic_flag MultiVersionDatabase::externalClientsInitialized = ATOMIC_FLAG_INIT; + +// MultiVersionApi bool MultiVersionApi::apiVersionAtLeast(int minVersion) { ASSERT_NE(MultiVersionApi::api->apiVersion, 0); return MultiVersionApi::api->apiVersion >= minVersion || MultiVersionApi::api->apiVersion < 0; @@ -1181,10 +1319,6 @@ const char* MultiVersionApi::getClientVersion() { return localClient->api->getClientVersion(); } -ThreadFuture MultiVersionApi::getServerProtocol(const char* clusterFilePath) { - return api->localClient->api->getServerProtocol(clusterFilePath); -} - void validateOption(Optional value, bool canBePresent, bool canBeAbsent, bool canBeEmpty = true) { ASSERT(canBePresent || canBeAbsent); @@ -1581,6 +1715,7 @@ void MultiVersionApi::addNetworkThreadCompletionHook(void (*hook)(void*), void* } } +// Creates an IDatabase object that represents a connection to the cluster Reference MultiVersionApi::createDatabase(const char* clusterFilePath) { lock.enter(); if (!networkSetup) { @@ -1595,28 +1730,21 @@ Reference MultiVersionApi::createDatabase(const char* clusterFilePath int threadIdx = nextThread; nextThread = (nextThread + 1) % threadCount; lock.leave(); - for (auto it : externalClients) { - TraceEvent("CreatingDatabaseOnExternalClient") - .detail("LibraryPath", it.first) - .detail("Failed", it.second[threadIdx]->failed); - } - return Reference(new MultiVersionDatabase(this, threadIdx, clusterFile, Reference())); + + Reference localDb = localClient->api->createDatabase(clusterFilePath); + return Reference( + new MultiVersionDatabase(this, threadIdx, clusterFile, Reference(), localDb)); } lock.leave(); ASSERT_LE(threadCount, 1); - auto db = localClient->api->createDatabase(clusterFilePath); + Reference localDb = localClient->api->createDatabase(clusterFilePath); if (bypassMultiClientApi) { - return db; + return localDb; } else { - for (auto it : externalClients) { - TraceEvent("CreatingDatabaseOnExternalClient") - .detail("LibraryPath", it.first) - .detail("Failed", it.second[0]->failed); - } - return Reference(new MultiVersionDatabase(this, 0, clusterFile, db)); + return Reference(new MultiVersionDatabase(this, 0, clusterFile, Reference(), localDb)); } } @@ -1948,6 +2076,12 @@ ACTOR Future checkUndestroyedFutures(std::vector THREAD_FUNC runSingleAssignmentVarTest(void* arg) { noUnseed = true; @@ -1960,6 +2094,9 @@ THREAD_FUNC runSingleAssignmentVarTest(void* arg) { tf.validate(); tf.future.extractPtr(); // leaks + for (auto t : tf.threads) { + waitThread(t); + } } for (int numRuns = 0; numRuns < 25; ++numRuns) { @@ -2030,12 +2167,14 @@ struct AbortableTest { TEST_CASE("/fdbclient/multiversionclient/AbortableSingleAssignmentVar") { state volatile bool done = false; - g_network->startThread(runSingleAssignmentVarTest, (void*)&done); + state THREAD_HANDLE thread = g_network->startThread(runSingleAssignmentVarTest, (void*)&done); while (!done) { wait(delay(1.0)); } + waitThread(thread); + return Void(); } @@ -2107,20 +2246,24 @@ TEST_CASE("/fdbclient/multiversionclient/DLSingleAssignmentVar") { state volatile bool done = false; MultiVersionApi::api->callbackOnMainThread = true; - g_network->startThread(runSingleAssignmentVarTest, (void*)&done); + state THREAD_HANDLE thread = g_network->startThread(runSingleAssignmentVarTest, (void*)&done); while (!done) { wait(delay(1.0)); } + waitThread(thread); + done = false; MultiVersionApi::api->callbackOnMainThread = false; - g_network->startThread(runSingleAssignmentVarTest, (void*)&done); + thread = g_network->startThread(runSingleAssignmentVarTest, (void*)&done); while (!done) { wait(delay(1.0)); } + waitThread(thread); + return Void(); } @@ -2145,12 +2288,14 @@ struct MapTest { TEST_CASE("/fdbclient/multiversionclient/MapSingleAssignmentVar") { state volatile bool done = false; - g_network->startThread(runSingleAssignmentVarTest, (void*)&done); + state THREAD_HANDLE thread = g_network->startThread(runSingleAssignmentVarTest, (void*)&done); while (!done) { wait(delay(1.0)); } + waitThread(thread); + return Void(); } @@ -2182,11 +2327,13 @@ struct FlatMapTest { TEST_CASE("/fdbclient/multiversionclient/FlatMapSingleAssignmentVar") { state volatile bool done = false; - g_network->startThread(runSingleAssignmentVarTest, (void*)&done); + state THREAD_HANDLE thread = g_network->startThread(runSingleAssignmentVarTest, (void*)&done); while (!done) { wait(delay(1.0)); } + waitThread(thread); + return Void(); } diff --git a/fdbclient/MultiVersionTransaction.h b/fdbclient/MultiVersionTransaction.h index ea16f4f35e..a98e16b440 100644 --- a/fdbclient/MultiVersionTransaction.h +++ b/fdbclient/MultiVersionTransaction.h @@ -28,6 +28,8 @@ #include "flow/ThreadHelper.actor.h" +// FdbCApi is used as a wrapper around the FoundationDB C API that gets loaded from an external client library. +// All of the required functions loaded from that external library are stored in function pointers in this struct. struct FdbCApi : public ThreadSafeReferenceCounted { typedef struct future FDBFuture; typedef struct cluster FDBCluster; @@ -55,7 +57,6 @@ struct FdbCApi : public ThreadSafeReferenceCounted { // Network fdb_error_t (*selectApiVersion)(int runtimeVersion, int headerVersion); const char* (*getClientVersion)(); - FDBFuture* (*getServerProtocol)(const char* clusterFilePath); fdb_error_t (*setNetworkOption)(FDBNetworkOptions::Option option, uint8_t const* value, int valueLength); fdb_error_t (*setupNetwork)(); fdb_error_t (*runNetwork)(); @@ -81,6 +82,7 @@ struct FdbCApi : public ThreadSafeReferenceCounted { uint8_t const* snapshotCommmand, int snapshotCommandLength); double (*databaseGetMainThreadBusyness)(FDBDatabase* database); + FDBFuture* (*databaseGetServerProtocol)(FDBDatabase* database, uint64_t expectedVersion); // Transaction fdb_error_t (*transactionSetOption)(FDBTransaction* tr, @@ -185,6 +187,8 @@ struct FdbCApi : public ThreadSafeReferenceCounted { fdb_error_t (*futureGetCluster)(FDBFuture* f, FDBCluster** outCluster); }; +// An implementation of ITransaction that wraps a transaction object created on an externally loaded client library. +// All API calls to that transaction are routed through the external library. class DLTransaction : public ITransaction, ThreadSafeReferenceCounted { public: DLTransaction(Reference api, FdbCApi::FDBTransaction* tr) : api(api), tr(tr) {} @@ -196,24 +200,24 @@ public: ThreadFuture> get(const KeyRef& key, bool snapshot = false) override; ThreadFuture getKey(const KeySelectorRef& key, bool snapshot = false) override; - ThreadFuture> getRange(const KeySelectorRef& begin, - const KeySelectorRef& end, - int limit, - bool snapshot = false, - bool reverse = false) override; - ThreadFuture> getRange(const KeySelectorRef& begin, - const KeySelectorRef& end, - GetRangeLimits limits, - bool snapshot = false, - bool reverse = false) override; - ThreadFuture> getRange(const KeyRangeRef& keys, - int limit, - bool snapshot = false, - bool reverse = false) override; - ThreadFuture> getRange(const KeyRangeRef& keys, - GetRangeLimits limits, - bool snapshot = false, - bool reverse = false) override; + ThreadFuture getRange(const KeySelectorRef& begin, + const KeySelectorRef& end, + int limit, + bool snapshot = false, + bool reverse = false) override; + ThreadFuture getRange(const KeySelectorRef& begin, + const KeySelectorRef& end, + GetRangeLimits limits, + bool snapshot = false, + bool reverse = false) override; + ThreadFuture getRange(const KeyRangeRef& keys, + int limit, + bool snapshot = false, + bool reverse = false) override; + ThreadFuture getRange(const KeyRangeRef& keys, + GetRangeLimits limits, + bool snapshot = false, + bool reverse = false) override; ThreadFuture>> getAddressesForKey(const KeyRef& key) override; ThreadFuture> getVersionstamp() override; ThreadFuture getEstimatedRangeSizeBytes(const KeyRangeRef& keys) override; @@ -249,6 +253,8 @@ private: FdbCApi::FDBTransaction* const tr; }; +// An implementation of IDatabase that wraps a database object created on an externally loaded client library. +// All API calls to that database are routed through the external library. class DLDatabase : public IDatabase, ThreadSafeReferenceCounted { public: DLDatabase(Reference api, FdbCApi::FDBDatabase* db) : api(api), db(db), ready(Void()) {} @@ -265,6 +271,12 @@ public: void setOption(FDBDatabaseOptions::Option option, Optional value = Optional()) override; double getMainThreadBusyness() override; + // Returns the protocol version reported by the coordinator this client is connected to + // If an expected version is given, the future won't return until the protocol version is different than expected + // Note: this will never return if the server is running a protocol from FDB 5.0 or older + ThreadFuture getServerProtocol( + Optional expectedVersion = Optional()) override; + void addref() override { ThreadSafeReferenceCounted::addref(); } void delref() override { ThreadSafeReferenceCounted::delref(); } @@ -279,13 +291,14 @@ private: ThreadFuture ready; }; +// An implementation of IClientApi that re-issues API calls to the C API of an externally loaded client library. +// The DL prefix stands for "dynamic library". class DLApi : public IClientApi { public: DLApi(std::string fdbCPath, bool unlinkOnLoad = false); void selectApiVersion(int apiVersion) override; const char* getClientVersion() override; - ThreadFuture getServerProtocol(const char* clusterFilePath) override; void setNetworkOption(FDBNetworkOptions::Option option, Optional value = Optional()) override; void setupNetwork() override; @@ -312,6 +325,9 @@ private: class MultiVersionDatabase; +// An implementation of ITransaction that wraps a transaction created either locally or through a dynamically loaded +// external client. When needed (e.g on cluster version change), the MultiVersionTransaction can automatically replace +// its wrapped transaction with one from another client. class MultiVersionTransaction : public ITransaction, ThreadSafeReferenceCounted { public: MultiVersionTransaction(Reference db, @@ -323,24 +339,24 @@ public: ThreadFuture> get(const KeyRef& key, bool snapshot = false) override; ThreadFuture getKey(const KeySelectorRef& key, bool snapshot = false) override; - ThreadFuture> getRange(const KeySelectorRef& begin, - const KeySelectorRef& end, - int limit, - bool snapshot = false, - bool reverse = false) override; - ThreadFuture> getRange(const KeySelectorRef& begin, - const KeySelectorRef& end, - GetRangeLimits limits, - bool snapshot = false, - bool reverse = false) override; - ThreadFuture> getRange(const KeyRangeRef& keys, - int limit, - bool snapshot = false, - bool reverse = false) override; - ThreadFuture> getRange(const KeyRangeRef& keys, - GetRangeLimits limits, - bool snapshot = false, - bool reverse = false) override; + ThreadFuture getRange(const KeySelectorRef& begin, + const KeySelectorRef& end, + int limit, + bool snapshot = false, + bool reverse = false) override; + ThreadFuture getRange(const KeySelectorRef& begin, + const KeySelectorRef& end, + GetRangeLimits limits, + bool snapshot = false, + bool reverse = false) override; + ThreadFuture getRange(const KeyRangeRef& keys, + int limit, + bool snapshot = false, + bool reverse = false) override; + ThreadFuture getRange(const KeyRangeRef& keys, + GetRangeLimits limits, + bool snapshot = false, + bool reverse = false) override; ThreadFuture>> getAddressesForKey(const KeyRef& key) override; ThreadFuture> getVersionstamp() override; @@ -413,89 +429,147 @@ struct ClientInfo : ClientDesc, ThreadSafeReferenceCounted { class MultiVersionApi; +// An implementation of IDatabase that wraps a database created either locally or through a dynamically loaded +// external client. The MultiVersionDatabase monitors the protocol version of the cluster and automatically +// replaces the wrapped database when the protocol version changes. class MultiVersionDatabase final : public IDatabase, ThreadSafeReferenceCounted { public: MultiVersionDatabase(MultiVersionApi* api, int threadIdx, std::string clusterFilePath, Reference db, + Reference versionMonitorDb, bool openConnectors = true); + ~MultiVersionDatabase() override; Reference createTransaction() override; void setOption(FDBDatabaseOptions::Option option, Optional value = Optional()) override; double getMainThreadBusyness() override; + // Returns the protocol version reported by the coordinator this client is connected to + // If an expected version is given, the future won't return until the protocol version is different than expected + // Note: this will never return if the server is running a protocol from FDB 5.0 or older + ThreadFuture getServerProtocol( + Optional expectedVersion = Optional()) override; + void addref() override { ThreadSafeReferenceCounted::addref(); } void delref() override { ThreadSafeReferenceCounted::delref(); } + // Create a MultiVersionDatabase that wraps an already created IDatabase object + // For internal use in testing static Reference debugCreateFromExistingDatabase(Reference db); ThreadFuture rebootWorker(const StringRef& address, bool check, int duration) override; ThreadFuture forceRecoveryWithDataLoss(const StringRef& dcid) override; ThreadFuture createSnapshot(const StringRef& uid, const StringRef& snapshot_command) override; -private: - struct DatabaseState; + // private: - struct Connector : ThreadCallback, ThreadSafeReferenceCounted { - Connector(Reference dbState, Reference client, std::string clusterFilePath) - : dbState(dbState), client(client), clusterFilePath(clusterFilePath), connected(false), cancelled(false) {} - - void connect(); - void cancel(); - - bool canFire(int notMadeActive) const override { return true; } - void fire(const Void& unused, int& userParam) override; - void error(const Error& e, int& userParam) override; - - const Reference client; - const std::string clusterFilePath; - - const Reference dbState; - - ThreadFuture connectionFuture; - - Reference candidateDatabase; - Reference tr; - - bool connected; - bool cancelled; - }; + struct LegacyVersionMonitor; + // A struct that manages the current connection state of the MultiVersionDatabase. This wraps the underlying + // IDatabase object that is currently interacting with the cluster. struct DatabaseState : ThreadSafeReferenceCounted { - DatabaseState(); + DatabaseState(std::string clusterFilePath, Reference versionMonitorDb); - void stateChanged(); - void addConnection(Reference client, std::string clusterFilePath); - void startConnections(); - void cancelConnections(); + // Replaces the active database connection with a new one. Must be called from the main thread. + void updateDatabase(Reference newDb, Reference client); + + // Called when a change to the protocol version of the cluster has been detected. + // Must be called from the main thread + void protocolVersionChanged(ProtocolVersion protocolVersion); + + // Adds a client (local or externally loaded) that can be used to connect to the cluster + void addClient(Reference client); + + // Watch the cluster protocol version for changes and update the database state when it does. + // Must be called from the main thread + ThreadFuture monitorProtocolVersion(); + + // Starts version monitors for old client versions that don't support connect packet monitoring (<= 5.0). + // Must be called from the main thread + void startLegacyVersionMonitors(); + + // Cleans up state for the legacy version monitors to break reference cycles + void close(); Reference db; const Reference>> dbVar; + std::string clusterFilePath; + + // Used to monitor the cluster protocol version. Will be the same as db unless we have either not connected + // yet or if the client version associated with db does not support protocol monitoring. In those cases, + // this will be a specially created local db. + Reference versionMonitorDb; ThreadFuture changed; bool cancelled; - int currentClientIndex; - std::vector> clients; - std::vector> connectionAttempts; + ThreadFuture dbReady; + ThreadFuture protocolVersionMonitor; + + // Versions older than 6.1 do not benefit from having their database connections closed. Additionally, + // there are various issues that result in negative behavior in some cases if the connections are closed. + // Therefore, we leave them open. + std::map> legacyDatabaseConnections; + + // Versions 5.0 and older do not support connection packet monitoring and require alternate techniques to + // determine the cluster version. + std::list> legacyVersionMonitors; + + Optional dbProtocolVersion; + + // This maps a normalized protocol version to the client associated with it. This prevents compatible + // differences in protocol version not matching each other. + std::map> clients; std::vector>>> options; UniqueOrderedOptionList transactionDefaultOptions; Mutex optionLock; }; + // A struct that enables monitoring whether the cluster is running an old version (<= 5.0) that doesn't support + // connect packet monitoring. + struct LegacyVersionMonitor : ThreadSafeReferenceCounted { + LegacyVersionMonitor(Reference const& client) : client(client), monitorRunning(false) {} + + // Terminates the version monitor to break reference cycles + void close(); + + // Starts the connection monitor by creating a database object at an old version. + // Must be called from the main thread + void startConnectionMonitor(Reference dbState); + + // Runs a GRV probe on the cluster to determine if the client version is compatible with the cluster. + // Must be called from main thread + void runGrvProbe(Reference dbState); + + Reference client; + Reference db; + Reference tr; + + ThreadFuture versionMonitor; + bool monitorRunning; + }; + const Reference dbState; friend class MultiVersionTransaction; + + // Clients must create a database object in order to initialize some of their state. + // This needs to be done only once, and this flag tracks whether that has happened. + static std::atomic_flag externalClientsInitialized; }; +// An implementation of IClientApi that can choose between multiple different client implementations either provided +// locally within the primary loaded fdb_c client or through any number of dynamically loaded clients. +// +// This functionality is used to provide support for multiple protocol versions simultaneously. class MultiVersionApi : public IClientApi { public: void selectApiVersion(int apiVersion) override; const char* getClientVersion() override; - ThreadFuture getServerProtocol(const char* clusterFilePath) override; void setNetworkOption(FDBNetworkOptions::Option option, Optional value = Optional()) override; void setupNetwork() override; @@ -503,6 +577,7 @@ public: void stopNetwork() override; void addNetworkThreadCompletionHook(void (*hook)(void*), void* hookParameter) override; + // Creates an IDatabase object that represents a connection to the cluster Reference createDatabase(const char* clusterFilePath) override; static MultiVersionApi* api; diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 472c232b95..ebeb6da937 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -36,7 +36,9 @@ #include "fdbclient/ClusterInterface.h" #include "fdbclient/CoordinationInterface.h" #include "fdbclient/DatabaseContext.h" +#include "fdbclient/GlobalConfig.actor.h" #include "fdbclient/JsonBuilder.h" +#include "fdbclient/KeyBackedTypes.h" #include "fdbclient/KeyRangeMap.h" #include "fdbclient/Knobs.h" #include "fdbclient/ManagementAPI.actor.h" @@ -120,6 +122,40 @@ NetworkOptions::NetworkOptions() static const Key CLIENT_LATENCY_INFO_PREFIX = LiteralStringRef("client_latency/"); static const Key CLIENT_LATENCY_INFO_CTR_PREFIX = LiteralStringRef("client_latency_counter/"); +void DatabaseContext::maybeAddTssMapping(StorageServerInterface const& ssi) { + // add tss mapping if server is new + + Optional tssPair = clientInfo->get().getTssPair(ssi.id()); + if (tssPair.present()) { + addTssMapping(ssi, tssPair.get()); + } +} + +// calling getInterface potentially recursively is weird, but since this function is only called when an entry is +// created/changed, the recursive call should never recurse itself. +void DatabaseContext::addTssMapping(StorageServerInterface const& ssi, StorageServerInterface const& tssi) { + Reference tssInfo = StorageServerInfo::getInterface(this, tssi, clientLocality); + Reference ssInfo = StorageServerInfo::getInterface(this, ssi, clientLocality); + + Reference metrics = makeReference(); + tssMetrics[tssi.id()] = metrics; + + // Add each read data request we want to duplicate to TSS to endpoint mapping (getValue, getKey, getKeyValues, + // watchValue) + queueModel.updateTssEndpoint( + ssInfo->interf.getValue.getEndpoint().token.first(), + TSSEndpointData(tssi.id(), tssInfo->interf.getValue.getEndpoint(), metrics, clientInfo->get().id)); + queueModel.updateTssEndpoint( + ssInfo->interf.getKey.getEndpoint().token.first(), + TSSEndpointData(tssi.id(), tssInfo->interf.getKey.getEndpoint(), metrics, clientInfo->get().id)); + queueModel.updateTssEndpoint( + ssInfo->interf.getKeyValues.getEndpoint().token.first(), + TSSEndpointData(tssi.id(), tssInfo->interf.getKeyValues.getEndpoint(), metrics, clientInfo->get().id)); + queueModel.updateTssEndpoint( + ssInfo->interf.watchValue.getEndpoint().token.first(), + TSSEndpointData(tssi.id(), tssInfo->interf.watchValue.getEndpoint(), metrics, clientInfo->get().id)); +} + Reference StorageServerInfo::getInterface(DatabaseContext* cx, StorageServerInterface const& ssi, LocalityData const& locality) { @@ -132,11 +168,14 @@ Reference StorageServerInfo::getInterface(DatabaseContext* cx // pointing to. This is technically correct, but is very unnatural. We may want to refactor load // balance to take an AsyncVar> so that it is notified when the interface // changes. + it->second->interf = ssi; + cx->maybeAddTssMapping(ssi); } else { it->second->notifyContextDestroyed(); Reference loc(new StorageServerInfo(cx, ssi, locality)); cx->server_interf[ssi.id()] = loc.getPtr(); + cx->maybeAddTssMapping(ssi); return loc; } } @@ -146,6 +185,7 @@ Reference StorageServerInfo::getInterface(DatabaseContext* cx Reference loc(new StorageServerInfo(cx, ssi, locality)); cx->server_interf[ssi.id()] = loc.getPtr(); + cx->maybeAddTssMapping(ssi); return loc; } @@ -284,6 +324,13 @@ void delref(DatabaseContext* ptr) { ptr->delref(); } +void traceTSSErrors(const char* name, UID tssId, const std::unordered_map& errorsByCode) { + TraceEvent ev(name, tssId); + for (auto& it : errorsByCode) { + ev.detail("E" + std::to_string(it.first), it.second); + } +} + ACTOR Future databaseLogger(DatabaseContext* cx) { state double lastLogged = 0; loop { @@ -326,6 +373,62 @@ ACTOR Future databaseLogger(DatabaseContext* cx) { cx->mutationsPerCommit.clear(); cx->bytesPerCommit.clear(); + for (const auto& it : cx->tssMetrics) { + // TODO could skip this tss if request counter is zero? would potentially complicate elapsed calculation + // though + if (it.second->mismatches.getIntervalDelta()) { + cx->tssMismatchStream.send(it.first); + } + + // do error histograms as separate event + if (it.second->ssErrorsByCode.size()) { + traceTSSErrors("TSS_SSErrors", it.first, it.second->ssErrorsByCode); + } + + if (it.second->tssErrorsByCode.size()) { + traceTSSErrors("TSS_TSSErrors", it.first, it.second->tssErrorsByCode); + } + + TraceEvent tssEv("TSSClientMetrics", cx->dbId); + tssEv.detail("TSSID", it.first) + .detail("Elapsed", (lastLogged == 0) ? 0 : now() - lastLogged) + .detail("Internal", cx->internal); + + it.second->cc.logToTraceEvent(tssEv); + + tssEv.detail("MeanSSGetValueLatency", it.second->SSgetValueLatency.mean()) + .detail("MedianSSGetValueLatency", it.second->SSgetValueLatency.median()) + .detail("SSGetValueLatency90", it.second->SSgetValueLatency.percentile(0.90)) + .detail("SSGetValueLatency99", it.second->SSgetValueLatency.percentile(0.99)); + + tssEv.detail("MeanTSSGetValueLatency", it.second->TSSgetValueLatency.mean()) + .detail("MedianTSSGetValueLatency", it.second->TSSgetValueLatency.median()) + .detail("TSSGetValueLatency90", it.second->TSSgetValueLatency.percentile(0.90)) + .detail("TSSGetValueLatency99", it.second->TSSgetValueLatency.percentile(0.99)); + + tssEv.detail("MeanSSGetKeyLatency", it.second->SSgetKeyLatency.mean()) + .detail("MedianSSGetKeyLatency", it.second->SSgetKeyLatency.median()) + .detail("SSGetKeyLatency90", it.second->SSgetKeyLatency.percentile(0.90)) + .detail("SSGetKeyLatency99", it.second->SSgetKeyLatency.percentile(0.99)); + + tssEv.detail("MeanTSSGetKeyLatency", it.second->TSSgetKeyLatency.mean()) + .detail("MedianTSSGetKeyLatency", it.second->TSSgetKeyLatency.median()) + .detail("TSSGetKeyLatency90", it.second->TSSgetKeyLatency.percentile(0.90)) + .detail("TSSGetKeyLatency99", it.second->TSSgetKeyLatency.percentile(0.99)); + + tssEv.detail("MeanSSGetKeyValuesLatency", it.second->SSgetKeyLatency.mean()) + .detail("MedianSSGetKeyValuesLatency", it.second->SSgetKeyLatency.median()) + .detail("SSGetKeyValuesLatency90", it.second->SSgetKeyLatency.percentile(0.90)) + .detail("SSGetKeyValuesLatency99", it.second->SSgetKeyLatency.percentile(0.99)); + + tssEv.detail("MeanTSSGetKeyValuesLatency", it.second->TSSgetKeyValuesLatency.mean()) + .detail("MedianTSSGetKeyValuesLatency", it.second->TSSgetKeyValuesLatency.median()) + .detail("TSSGetKeyValuesLatency90", it.second->TSSgetKeyValuesLatency.percentile(0.90)) + .detail("TSSGetKeyValuesLatency99", it.second->TSSgetKeyValuesLatency.percentile(0.99)); + + it.second->clear(); + } + lastLogged = now(); } } @@ -385,7 +488,7 @@ ACTOR static Future delExcessClntTxnEntriesActor(Transaction* tr, int64_t ? (txInfoSize - clientTxInfoSizeLimit) : CLIENT_KNOBS->TRANSACTION_SIZE_LIMIT; GetRangeLimits limit(GetRangeLimits::ROW_LIMIT_UNLIMITED, getRangeByteLimit); - Standalone txEntries = + RangeResult txEntries = wait(tr->getRange(KeyRangeRef(clientLatencyName, strinc(clientLatencyName)), limit)); state int64_t numBytesToDel = 0; KeyRef endKey; @@ -505,12 +608,13 @@ ACTOR static Future clientStatusUpdateActor(DatabaseContext* cx) { } } cx->clientStatusUpdater.outStatusQ.clear(); - double clientSamplingProbability = std::isinf(cx->clientInfo->get().clientTxnInfoSampleRate) - ? CLIENT_KNOBS->CSI_SAMPLING_PROBABILITY - : cx->clientInfo->get().clientTxnInfoSampleRate; - int64_t clientTxnInfoSizeLimit = cx->clientInfo->get().clientTxnInfoSizeLimit == -1 - ? CLIENT_KNOBS->CSI_SIZE_LIMIT - : cx->clientInfo->get().clientTxnInfoSizeLimit; + wait(GlobalConfig::globalConfig().onInitialized()); + double sampleRate = GlobalConfig::globalConfig().get(fdbClientInfoTxnSampleRate, + std::numeric_limits::infinity()); + double clientSamplingProbability = + std::isinf(sampleRate) ? CLIENT_KNOBS->CSI_SAMPLING_PROBABILITY : sampleRate; + int64_t sizeLimit = GlobalConfig::globalConfig().get(fdbClientInfoTxnSizeLimit, -1); + int64_t clientTxnInfoSizeLimit = sizeLimit == -1 ? CLIENT_KNOBS->CSI_SIZE_LIMIT : sizeLimit; if (!trChunksQ.empty() && deterministicRandom()->random01() < clientSamplingProbability) wait(delExcessClntTxnEntriesActor(&tr, clientTxnInfoSizeLimit)); @@ -594,7 +698,7 @@ ACTOR Future updateCachedRanges(DatabaseContext* self, std::map range = wait(tr.getRange(storageCacheKeys, CLIENT_KNOBS->TOO_MANY)); + RangeResult range = wait(tr.getRange(storageCacheKeys, CLIENT_KNOBS->TOO_MANY)); ASSERT(!range.more); std::vector>> cacheInterfaces; cacheInterfaces.reserve(cacheServers->size()); @@ -671,8 +775,7 @@ ACTOR Future monitorCacheList(DatabaseContext* self) { // the cyclic reference to self. wait(refreshTransaction(self, &tr)); try { - Standalone cacheList = - wait(tr.getRange(storageCacheServerKeys, CLIENT_KNOBS->TOO_MANY)); + RangeResult cacheList = wait(tr.getRange(storageCacheServerKeys, CLIENT_KNOBS->TOO_MANY)); ASSERT(!cacheList.more); bool hasChanges = false; std::map allCacheServers; @@ -710,6 +813,99 @@ ACTOR Future monitorCacheList(DatabaseContext* self) { } } +// updates tss mapping when set of tss servers changes +ACTOR static Future monitorTssChange(DatabaseContext* cx) { + state vector> curTssMapping; + curTssMapping = cx->clientInfo->get().tssMapping; + + loop { + wait(cx->clientInfo->onChange()); + if (cx->clientInfo->get().tssMapping != curTssMapping) { + // To optimize size of the ClientDBInfo payload, we could eventually change CC to just send a tss change + // id/generation, and have client reread the mapping here if it changed. It's a very minor optimization + // though, and would cause extra read load. + ClientDBInfo clientInfo = cx->clientInfo->get(); + curTssMapping = clientInfo.tssMapping; + + std::unordered_set seenTssIds; + + if (curTssMapping.size()) { + for (const auto& it : curTssMapping) { + seenTssIds.insert(it.second.id()); + + if (cx->server_interf.count(it.first)) { + cx->addTssMapping(cx->server_interf[it.first]->interf, it.second); + } + } + } + + for (auto it = cx->tssMetrics.begin(); it != cx->tssMetrics.end();) { + if (seenTssIds.count(it->first)) { + it++; + } else { + it = cx->tssMetrics.erase(it); + } + } + + cx->queueModel.removeOldTssData(clientInfo.id); + } + } +} + +ACTOR static Future handleTssMismatches(DatabaseContext* cx) { + state Reference tr; + state KeyBackedMap tssMapDB = KeyBackedMap(tssMappingKeys.begin); + loop { + state UID tssID = waitNext(cx->tssMismatchStream.getFuture()); + // find ss pair id so we can remove it from the mapping + state UID tssPairID; + bool found = false; + for (const auto& it : cx->clientInfo->get().tssMapping) { + if (it.second.id() == tssID) { + tssPairID = it.first; + found = true; + break; + } + } + if (found) { + TraceEvent(SevWarnAlways, "TSS_KillMismatch").detail("TSSID", tssID.toString()); + TEST(true); // killing TSS because it got mismatch + + // TODO we could write something to the system keyspace and then have DD listen to that keyspace and then DD + // do exactly this, so why not just cut out the middle man (or the middle system keys, as it were) + tr = makeReference(Database(Reference::addRef(cx))); + state int tries = 0; + loop { + try { + tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); + tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); + + tr->clear(serverTagKeyFor(tssID)); + tssMapDB.erase(tr, tssPairID); + + tr->set(tssMappingChangeKey, deterministicRandom()->randomUniqueID().toString()); + wait(tr->commit()); + + break; + } catch (Error& e) { + wait(tr->onError(e)); + } + tries++; + if (tries > 10) { + // Give up on trying to kill the tss, it'll get another mismatch or a human will investigate + // eventually + TraceEvent("TSS_KillMismatchGaveUp").detail("TSSID", tssID.toString()); + break; + } + } + // clear out txn so that the extra DatabaseContext ref gets decref'd and we can free cx + tr = makeReference(); + } else { + TEST(true); // Not killing TSS with mismatch because it's already gone + } + } +} + ACTOR static Future getHealthMetricsActor(DatabaseContext* cx, bool detailed) { if (now() - cx->healthMetricsLastUpdated < CLIENT_KNOBS->AGGREGATE_HEALTH_METRICS_MAX_STALENESS) { if (detailed) { @@ -755,16 +951,16 @@ void DatabaseContext::registerSpecialKeySpaceModule(SpecialKeySpace::MODULE modu specialKeySpaceModules.push_back(std::move(impl)); } -ACTOR Future> getWorkerInterfaces(Reference clusterFile); +ACTOR Future getWorkerInterfaces(Reference clusterFile); ACTOR Future> getJSON(Database db); struct WorkerInterfacesSpecialKeyImpl : SpecialKeyRangeReadImpl { - Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override { + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override { if (ryw->getDatabase().getPtr() && ryw->getDatabase()->getConnectionFile()) { Key prefix = Key(getKeyRange().begin); return map(getWorkerInterfaces(ryw->getDatabase()->getConnectionFile()), - [prefix = prefix, kr = KeyRange(kr)](const Standalone& in) { - Standalone result; + [prefix = prefix, kr = KeyRange(kr)](const RangeResult& in) { + RangeResult result; for (const auto& [k_, v] : in) { auto k = k_.withPrefix(prefix); if (kr.contains(k)) @@ -775,7 +971,7 @@ struct WorkerInterfacesSpecialKeyImpl : SpecialKeyRangeReadImpl { return result; }); } else { - return Standalone(); + return RangeResult(); } } @@ -783,10 +979,10 @@ struct WorkerInterfacesSpecialKeyImpl : SpecialKeyRangeReadImpl { }; struct SingleSpecialKeyImpl : SpecialKeyRangeReadImpl { - Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override { + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override { ASSERT(kr.contains(k)); return map(f(ryw), [k = k](Optional v) { - Standalone result; + RangeResult result; if (v.present()) { result.push_back_deep(result.arena(), KeyValueRef(k, v.get())); } @@ -805,11 +1001,11 @@ private: class HealthMetricsRangeImpl : public SpecialKeyRangeAsyncImpl { public: explicit HealthMetricsRangeImpl(KeyRangeRef kr); - Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; }; -static Standalone healthMetricsToKVPairs(const HealthMetrics& metrics, KeyRangeRef kr) { - Standalone result; +static RangeResult healthMetricsToKVPairs(const HealthMetrics& metrics, KeyRangeRef kr) { + RangeResult result; if (CLIENT_BUGGIFY) return result; if (kr.contains(LiteralStringRef("\xff\xff/metrics/health/aggregate")) && metrics.worstStorageDurabilityLag != 0) { @@ -879,8 +1075,7 @@ static Standalone healthMetricsToKVPairs(const HealthMetrics& me return result; } -ACTOR static Future> healthMetricsGetRangeActor(ReadYourWritesTransaction* ryw, - KeyRangeRef kr) { +ACTOR static Future healthMetricsGetRangeActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) { HealthMetrics metrics = wait(ryw->getDatabase()->getHealthMetrics( /*detailed ("per process")*/ kr.intersects(KeyRangeRef(LiteralStringRef("\xff\xff/metrics/health/storage/"), LiteralStringRef("\xff\xff/metrics/health/storage0"))) || @@ -891,13 +1086,13 @@ ACTOR static Future> healthMetricsGetRangeActor(ReadY HealthMetricsRangeImpl::HealthMetricsRangeImpl(KeyRangeRef kr) : SpecialKeyRangeAsyncImpl(kr) {} -Future> HealthMetricsRangeImpl::getRange(ReadYourWritesTransaction* ryw, - KeyRangeRef kr) const { +Future HealthMetricsRangeImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { return healthMetricsGetRangeActor(ryw, kr); } DatabaseContext::DatabaseContext(Reference>> connectionFile, Reference> clientInfo, + Reference>> coordinator, Future clientInfoMonitor, TaskPriority taskID, LocalityData const& clientLocality, @@ -906,9 +1101,10 @@ DatabaseContext::DatabaseContext(Reference( singleKeyRange(LiteralStringRef("consistency_check_suspended")) .withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin))); + registerSpecialKeySpaceModule( + SpecialKeySpace::MODULE::GLOBALCONFIG, + SpecialKeySpace::IMPLTYPE::READWRITE, + std::make_unique(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::GLOBALCONFIG))); registerSpecialKeySpaceModule( SpecialKeySpace::MODULE::TRACING, SpecialKeySpace::IMPLTYPE::READWRITE, @@ -1045,6 +1249,18 @@ DatabaseContext::DatabaseContext(Reference( KeyRangeRef(LiteralStringRef("profiling/"), LiteralStringRef("profiling0")) .withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin))); + registerSpecialKeySpaceModule( + SpecialKeySpace::MODULE::MANAGEMENT, + SpecialKeySpace::IMPLTYPE::READWRITE, + std::make_unique( + KeyRangeRef(LiteralStringRef("maintenance/"), LiteralStringRef("maintenance0")) + .withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin))); + registerSpecialKeySpaceModule( + SpecialKeySpace::MODULE::MANAGEMENT, + SpecialKeySpace::IMPLTYPE::READWRITE, + std::make_unique( + KeyRangeRef(LiteralStringRef("data_distribution/"), LiteralStringRef("data_distribution0")) + .withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin))); } if (apiVersionAtLeast(630)) { registerSpecialKeySpaceModule(SpecialKeySpace::MODULE::TRANSACTION, @@ -1156,6 +1372,8 @@ DatabaseContext::DatabaseContext(const Error& err) transactionsExpensiveClearCostEstCount("ExpensiveClearCostEstCount", cc), internal(false), transactionTracingEnabled(true) {} +// Static constructor used by server processes to create a DatabaseContext +// For internal (fdbserver) use only Database DatabaseContext::create(Reference> clientInfo, Future clientInfoMonitor, LocalityData clientLocality, @@ -1166,6 +1384,7 @@ Database DatabaseContext::create(Reference> clientInfo, bool switchable) { return Database(new DatabaseContext(Reference>>(), clientInfo, + makeReference>>(), clientInfoMonitor, taskID, clientLocality, @@ -1179,6 +1398,8 @@ Database DatabaseContext::create(Reference> clientInfo, DatabaseContext::~DatabaseContext() { cacheListMonitor.cancel(); monitorProxiesInfoChange.cancel(); + monitorTssInfoChange.cancel(); + tssMismatchHandler.cancel(); for (auto it = server_interf.begin(); it != server_interf.end(); it = server_interf.erase(it)) it->second->notifyContextDestroyed(); ASSERT_ABORT(server_interf.empty()); @@ -1266,14 +1487,16 @@ Future DatabaseContext::onProxiesChanged() { } bool DatabaseContext::sampleReadTags() const { - return clientInfo->get().transactionTagSampleRate > 0 && - deterministicRandom()->random01() <= clientInfo->get().transactionTagSampleRate; + double sampleRate = GlobalConfig::globalConfig().get(transactionTagSampleRate, CLIENT_KNOBS->READ_TAG_SAMPLE_RATE); + return sampleRate > 0 && deterministicRandom()->random01() <= sampleRate; } bool DatabaseContext::sampleOnCost(uint64_t cost) const { - if (clientInfo->get().transactionTagSampleCost <= 0) + double sampleCost = + GlobalConfig::globalConfig().get(transactionTagSampleCost, CLIENT_KNOBS->COMMIT_SAMPLE_COST); + if (sampleCost <= 0) return false; - return deterministicRandom()->random01() <= (double)cost / clientInfo->get().transactionTagSampleCost; + return deterministicRandom()->random01() <= (double)cost / sampleCost; } int64_t extractIntOption(Optional value, int64_t minValue, int64_t maxValue) { @@ -1450,6 +1673,9 @@ void DatabaseContext::expireThrottles() { extern IPAddress determinePublicIPAutomatically(ClusterConnectionString const& ccs); +// Creates a database object that represents a connection to a cluster +// This constructor uses a preallocated DatabaseContext that may have been created +// on another thread Database Database::createDatabase(Reference connFile, int apiVersion, bool internal, @@ -1496,15 +1722,20 @@ Database Database::createDatabase(Reference connFile, g_network->initTLS(); auto clientInfo = makeReference>(); + auto coordinator = makeReference>>(); auto connectionFile = makeReference>>(); connectionFile->set(connFile); - Future clientInfoMonitor = monitorProxies( - connectionFile, clientInfo, networkOptions.supportedVersions, StringRef(networkOptions.traceLogGroup)); + Future clientInfoMonitor = monitorProxies(connectionFile, + clientInfo, + coordinator, + networkOptions.supportedVersions, + StringRef(networkOptions.traceLogGroup)); DatabaseContext* db; if (preallocatedDb) { db = new (preallocatedDb) DatabaseContext(connectionFile, clientInfo, + coordinator, clientInfoMonitor, TaskPriority::DefaultEndpoint, clientLocality, @@ -1516,6 +1747,7 @@ Database Database::createDatabase(Reference connFile, } else { db = new DatabaseContext(connectionFile, clientInfo, + coordinator, clientInfoMonitor, TaskPriority::DefaultEndpoint, clientLocality, @@ -1935,14 +2167,14 @@ AddressExclusion AddressExclusion::parse(StringRef const& key) { } } -Future> getRange(Database const& cx, - Future const& fVersion, - KeySelector const& begin, - KeySelector const& end, - GetRangeLimits const& limits, - bool const& reverse, - TransactionInfo const& info, - TagSet const& tags); +Future getRange(Database const& cx, + Future const& fVersion, + KeySelector const& begin, + KeySelector const& end, + GetRangeLimits const& limits, + bool const& reverse, + TransactionInfo const& info, + TagSet const& tags); ACTOR Future> getValue(Future version, Key key, @@ -2208,7 +2440,7 @@ ACTOR Future> getValue(Future version, state GetValueReply reply; try { - if (CLIENT_BUGGIFY) { + if (CLIENT_BUGGIFY_WITH_PROB(.01)) { throw deterministicRandom()->randomChoice( std::vector{ transaction_too_old(), future_version() }); } @@ -2318,6 +2550,11 @@ ACTOR Future getKey(Database cx, KeySelector k, Future version, Tr "NativeAPI.getKey.Before"); //.detail("StartKey", // k.getKey()).detail("Offset",k.offset).detail("OrEqual",k.orEqual); ++cx->transactionPhysicalReads; + + GetKeyRequest req( + span.context, k, version.get(), cx->sampleReadTags() ? tags : Optional(), getKeyID); + req.arena.dependsOn(k.arena()); + state GetKeyReply reply; try { choose { @@ -2326,11 +2563,7 @@ ACTOR Future getKey(Database cx, KeySelector k, Future version, Tr wait(loadBalance(cx.getPtr(), ssi.second, &StorageServerInterface::getKey, - GetKeyRequest(span.context, - k, - version.get(), - cx->sampleReadTags() ? tags : Optional(), - getKeyID), + req, TaskPriority::DefaultPromiseEndpoint, false, cx->enableLocalityLoadBalance ? &cx->queueModel : nullptr))) { @@ -2481,7 +2714,6 @@ ACTOR Future watchValue(Future version, cx->invalidateCache(key); wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, info.taskID)); } else if (e.code() == error_code_watch_cancelled || e.code() == error_code_process_behind) { - TEST(e.code() == error_code_watch_cancelled); // Too many watches on the storage server, poll for changes instead TEST(e.code() == error_code_watch_cancelled); // Too many watches on storage server, poll for changes TEST(e.code() == error_code_process_behind); // The storage servers are all behind wait(delay(CLIENT_KNOBS->WATCH_POLLING_TIME, info.taskID)); @@ -2667,14 +2899,14 @@ void transformRangeLimits(GetRangeLimits limits, bool reverse, GetKeyValuesReque } } -ACTOR Future> getExactRange(Database cx, - Version version, - KeyRange keys, - GetRangeLimits limits, - bool reverse, - TransactionInfo info, - TagSet tags) { - state Standalone output; +ACTOR Future getExactRange(Database cx, + Version version, + KeyRange keys, + GetRangeLimits limits, + bool reverse, + TransactionInfo info, + TagSet tags) { + state RangeResult output; state Span span("NAPI:getExactRange"_loc, info.spanID); // printf("getExactRange( '%s', '%s' )\n", keys.begin.toString().c_str(), keys.end.toString().c_str()); @@ -2692,6 +2924,9 @@ ACTOR Future> getExactRange(Database cx, req.end = firstGreaterOrEqual(range.end); req.spanContext = span.context; + // keep shard's arena around in case of async tss comparison + req.arena.dependsOn(locations[shard].first.arena()); + transformRangeLimits(limits, reverse, req); ASSERT(req.limitBytes > 0 && req.limit != 0 && req.limit < 0 == reverse); @@ -2846,14 +3081,14 @@ Future resolveKey(Database const& cx, return getKey(cx, key, version, info, tags); } -ACTOR Future> getRangeFallback(Database cx, - Version version, - KeySelector begin, - KeySelector end, - GetRangeLimits limits, - bool reverse, - TransactionInfo info, - TagSet tags) { +ACTOR Future getRangeFallback(Database cx, + Version version, + KeySelector begin, + KeySelector end, + GetRangeLimits limits, + bool reverse, + TransactionInfo info, + TagSet tags) { if (version == latestVersion) { state Transaction transaction(cx); transaction.setOption(FDBTransactionOptions::CAUSAL_READ_RISKY); @@ -2869,15 +3104,15 @@ ACTOR Future> getRangeFallback(Database cx, state Key b = wait(fb); state Key e = wait(fe); if (b >= e) { - return Standalone(); + return RangeResult(); } // if e is allKeys.end, we have read through the end of the database // if b is allKeys.begin, we have either read through the beginning of the database, // or allKeys.begin exists in the database and will be part of the conflict range anyways - Standalone _r = wait(getExactRange(cx, version, KeyRangeRef(b, e), limits, reverse, info, tags)); - Standalone r = _r; + RangeResult _r = wait(getExactRange(cx, version, KeyRangeRef(b, e), limits, reverse, info, tags)); + RangeResult r = _r; if (b == allKeys.begin && ((reverse && !r.more) || !reverse)) r.readToBegin = true; @@ -2909,7 +3144,7 @@ void getRangeFinished(Database cx, bool snapshot, Promise> conflictRange, bool reverse, - Standalone result) { + RangeResult result) { int64_t bytes = 0; for (const KeyValueRef& kv : result) { bytes += kv.key.size() + kv.value.size(); @@ -2955,21 +3190,21 @@ void getRangeFinished(Database cx, } } -ACTOR Future> getRange(Database cx, - Reference trLogInfo, - Future fVersion, - KeySelector begin, - KeySelector end, - GetRangeLimits limits, - Promise> conflictRange, - bool snapshot, - bool reverse, - TransactionInfo info, - TagSet tags) { +ACTOR Future getRange(Database cx, + Reference trLogInfo, + Future fVersion, + KeySelector begin, + KeySelector end, + GetRangeLimits limits, + Promise> conflictRange, + bool snapshot, + bool reverse, + TransactionInfo info, + TagSet tags) { state GetRangeLimits originalLimits(limits); state KeySelector originalBegin = begin; state KeySelector originalEnd = end; - state Standalone output; + state RangeResult output; state Span span("NAPI:getRange"_loc, info.spanID); try { @@ -3008,6 +3243,9 @@ ACTOR Future> getRange(Database cx, req.isFetchKeys = (info.taskID == TaskPriority::FetchKeys); req.version = readVersion; + // In case of async tss comparison, also make req arena depend on begin, end, and/or shard's arena depending + // on which is used + bool dependOnShard = false; if (reverse && (begin - 1).isDefinitelyLess(shard.begin) && (!begin.isFirstGreaterOrEqual() || begin.getKey() != shard.begin)) { // In this case we would be setting modifiedSelectors to true, but @@ -3015,14 +3253,23 @@ ACTOR Future> getRange(Database cx, req.begin = firstGreaterOrEqual(shard.begin); modifiedSelectors = true; - } else + req.arena.dependsOn(shard.arena()); + dependOnShard = true; + } else { req.begin = begin; + req.arena.dependsOn(begin.arena()); + } if (!reverse && end.isDefinitelyGreater(shard.end)) { req.end = firstGreaterOrEqual(shard.end); modifiedSelectors = true; - } else + if (!dependOnShard) { + req.arena.dependsOn(shard.arena()); + } + } else { req.end = end; + req.arena.dependsOn(end.arena()); + } transformRangeLimits(limits, reverse, req); ASSERT(req.limitBytes > 0 && req.limit != 0 && req.limit < 0 == reverse); @@ -3052,7 +3299,7 @@ ACTOR Future> getRange(Database cx, ++cx->transactionPhysicalReads; state GetKeyValuesReply rep; try { - if (CLIENT_BUGGIFY) { + if (CLIENT_BUGGIFY_WITH_PROB(.01)) { throw deterministicRandom()->randomChoice( std::vector{ transaction_too_old(), future_version() }); } @@ -3101,16 +3348,23 @@ ACTOR Future> getRange(Database cx, bool readToBegin = output.readToBegin; bool readThroughEnd = output.readThroughEnd; - output = Standalone( - RangeResultRef(rep.data, modifiedSelectors || limits.isReached() || rep.more), rep.arena); + output = RangeResult(RangeResultRef(rep.data, modifiedSelectors || limits.isReached() || rep.more), + rep.arena); output.readToBegin = readToBegin; output.readThroughEnd = readThroughEnd; if (BUGGIFY && limits.hasByteLimit() && output.size() > std::max(1, originalLimits.minRows)) { + // Copy instead of resizing because TSS maybe be using output's arena for comparison. This only + // happens in simulation so it's fine + RangeResult copy; + int newSize = + deterministicRandom()->randomInt(std::max(1, originalLimits.minRows), output.size()); + for (int i = 0; i < newSize; i++) { + copy.push_back_deep(copy.arena(), output[i]); + } + output = copy; output.more = true; - output.resize( - output.arena(), - deterministicRandom()->randomInt(std::max(1, originalLimits.minRows), output.size())); + getRangeFinished(cx, trLogInfo, startTime, @@ -3155,7 +3409,7 @@ ACTOR Future> getRange(Database cx, TEST(true); // !GetKeyValuesReply.more and modifiedSelectors in getRange if (!rep.data.size()) { - Standalone result = wait(getRangeFallback( + RangeResult result = wait(getRangeFallback( cx, version, originalBegin, originalEnd, originalLimits, reverse, info, tags)); getRangeFinished(cx, trLogInfo, @@ -3192,7 +3446,7 @@ ACTOR Future> getRange(Database cx, reverse ? (end - 1).isBackward() : begin.isBackward()); if (e.code() == error_code_wrong_shard_server) { - Standalone result = wait(getRangeFallback( + RangeResult result = wait(getRangeFallback( cx, version, originalBegin, originalEnd, originalLimits, reverse, info, tags)); getRangeFinished(cx, trLogInfo, @@ -3228,14 +3482,14 @@ ACTOR Future> getRange(Database cx, } } -Future> getRange(Database const& cx, - Future const& fVersion, - KeySelector const& begin, - KeySelector const& end, - GetRangeLimits const& limits, - bool const& reverse, - TransactionInfo const& info, - TagSet const& tags) { +Future getRange(Database const& cx, + Future const& fVersion, + KeySelector const& begin, + KeySelector const& end, + GetRangeLimits const& limits, + bool const& reverse, + TransactionInfo const& info, + TagSet const& tags) { return getRange(cx, Reference(), fVersion, @@ -3457,18 +3711,18 @@ ACTOR Future>> getAddressesForKeyActor(Key key // serverInterfaces vector being empty, which will cause us to return an empty addresses list. state Key ksKey = keyServersKey(key); - state Standalone serverTagResult = wait(getRange(cx, - ver, - lastLessOrEqual(serverTagKeys.begin), - firstGreaterThan(serverTagKeys.end), - GetRangeLimits(CLIENT_KNOBS->TOO_MANY), - false, - info, - options.readTags)); + state RangeResult serverTagResult = wait(getRange(cx, + ver, + lastLessOrEqual(serverTagKeys.begin), + firstGreaterThan(serverTagKeys.end), + GetRangeLimits(CLIENT_KNOBS->TOO_MANY), + false, + info, + options.readTags)); ASSERT(!serverTagResult.more && serverTagResult.size() < CLIENT_KNOBS->TOO_MANY); - Future> futureServerUids = getRange( + Future futureServerUids = getRange( cx, ver, lastLessOrEqual(ksKey), firstGreaterThan(ksKey), GetRangeLimits(1), false, info, options.readTags); - Standalone serverUids = wait(futureServerUids); + RangeResult serverUids = wait(futureServerUids); ASSERT(serverUids.size()); // every shard needs to have a team @@ -3533,16 +3787,16 @@ Future Transaction::getKey(const KeySelector& key, bool snapshot) { return getKeyAndConflictRange(cx, key, getReadVersion(), conflictRange, info, options.readTags); } -Future> Transaction::getRange(const KeySelector& begin, - const KeySelector& end, - GetRangeLimits limits, - bool snapshot, - bool reverse) { +Future Transaction::getRange(const KeySelector& begin, + const KeySelector& end, + GetRangeLimits limits, + bool snapshot, + bool reverse) { ++cx->transactionLogicalReads; ++cx->transactionGetRangeRequests; if (limits.isReached()) - return Standalone(); + return RangeResult(); if (!limits.isValid()) return range_limits_invalid(); @@ -3563,7 +3817,7 @@ Future> Transaction::getRange(const KeySelector& begi if (b.offset >= e.offset && b.getKey() >= e.getKey()) { TEST(true); // Native range inverted - return Standalone(); + return RangeResult(); } Promise> conflictRange; @@ -3575,11 +3829,11 @@ Future> Transaction::getRange(const KeySelector& begi cx, trLogInfo, getReadVersion(), b, e, limits, conflictRange, snapshot, reverse, info, options.readTags); } -Future> Transaction::getRange(const KeySelector& begin, - const KeySelector& end, - int limit, - bool snapshot, - bool reverse) { +Future Transaction::getRange(const KeySelector& begin, + const KeySelector& end, + int limit, + bool snapshot, + bool reverse) { return getRange(begin, end, GetRangeLimits(limit), snapshot, reverse); } @@ -3935,7 +4189,7 @@ ACTOR void checkWrites(Database cx, if (m.mutated) { checkedRanges++; if (m.cleared) { - Standalone shouldBeEmpty = wait(tr.getRange(it->range(), 1)); + RangeResult shouldBeEmpty = wait(tr.getRange(it->range(), 1)); if (shouldBeEmpty.size()) { TraceEvent(SevError, "CheckWritesFailed") .detail("Class", "Clear") @@ -4877,37 +5131,95 @@ Future> Transaction::getVersionstamp() { return versionstampPromise.getFuture(); } -ACTOR Future coordinatorProtocolsFetcher(Reference f) { - state ClientCoordinators coord(f); +// Gets the protocol version reported by a coordinator via the protocol info interface +ACTOR Future getCoordinatorProtocol(NetworkAddressList coordinatorAddresses) { + RequestStream requestStream{ Endpoint{ { coordinatorAddresses }, WLTOKEN_PROTOCOL_INFO } }; + ProtocolInfoReply reply = wait(retryBrokenPromise(requestStream, ProtocolInfoRequest{})); - state vector> coordProtocols; - coordProtocols.reserve(coord.clientLeaderServers.size()); - for (int i = 0; i < coord.clientLeaderServers.size(); i++) { - RequestStream requestStream{ Endpoint{ - { coord.clientLeaderServers[i].getLeader.getEndpoint().addresses }, WLTOKEN_PROTOCOL_INFO } }; - coordProtocols.push_back(retryBrokenPromise(requestStream, ProtocolInfoRequest{})); - } - - wait(smartQuorum(coordProtocols, coordProtocols.size() / 2 + 1, 1.5)); - - std::unordered_map protocolCount; - for (int i = 0; i < coordProtocols.size(); i++) { - if (coordProtocols[i].isReady()) { - protocolCount[coordProtocols[i].get().version.version()]++; - } - } - - uint64_t majorityProtocol = std::max_element(protocolCount.begin(), - protocolCount.end(), - [](const std::pair& l, - const std::pair& r) { return l.second < r.second; }) - ->first; - return ProtocolVersion(majorityProtocol); + return reply.version; } -ACTOR Future getCoordinatorProtocols(Reference f) { - ProtocolVersion protocolVersion = wait(coordinatorProtocolsFetcher(f)); - return protocolVersion.version(); +// Gets the protocol version reported by a coordinator in its connect packet +// If we are unable to get a version from the connect packet (e.g. because we lost connection with the peer), then this +// function will return with an unset result. +// If an expected version is given, this future won't return if the actual protocol version matches the expected version +ACTOR Future> getCoordinatorProtocolFromConnectPacket( + NetworkAddress coordinatorAddress, + Optional expectedVersion) { + + state Reference>> protocolVersion = + FlowTransport::transport().getPeerProtocolAsyncVar(coordinatorAddress); + + loop { + if (protocolVersion->get().present() && protocolVersion->get() != expectedVersion) { + return protocolVersion->get(); + } + + Future change = protocolVersion->onChange(); + if (!protocolVersion->get().present()) { + // If we still don't have any connection info after a timeout, retry sending the protocol version request + change = timeout(change, FLOW_KNOBS->CONNECTION_MONITOR_TIMEOUT, Void()); + } + + wait(change); + + if (!protocolVersion->get().present()) { + return protocolVersion->get(); + } + } +} + +// Returns the protocol version reported by the given coordinator +// If an expected version is given, the future won't return until the protocol version is different than expected +ACTOR Future getClusterProtocolImpl( + Reference>> coordinator, + Optional expectedVersion) { + + state bool needToConnect = true; + state Future protocolVersion = Never(); + + loop { + if (!coordinator->get().present()) { + wait(coordinator->onChange()); + } else { + Endpoint coordinatorEndpoint = coordinator->get().get().getLeader.getEndpoint(); + if (needToConnect) { + // Even though we typically rely on the connect packet to get the protocol version, we need to send some + // request in order to start a connection. This protocol version request serves that purpose. + protocolVersion = getCoordinatorProtocol(coordinatorEndpoint.addresses); + needToConnect = false; + } + choose { + when(wait(coordinator->onChange())) { needToConnect = true; } + + when(ProtocolVersion pv = wait(protocolVersion)) { + if (!expectedVersion.present() || expectedVersion.get() != pv) { + return pv; + } + + protocolVersion = Never(); + } + + // Older versions of FDB don't have an endpoint to return the protocol version, so we get this info from + // the connect packet + when(Optional pv = wait(getCoordinatorProtocolFromConnectPacket( + coordinatorEndpoint.getPrimaryAddress(), expectedVersion))) { + if (pv.present()) { + return pv.get(); + } else { + needToConnect = true; + } + } + } + } + } +} + +// Returns the protocol version reported by the coordinator this client is currently connected to +// If an expected version is given, the future won't return until the protocol version is different than expected +// Note: this will never return if the server is running a protocol from FDB 5.0 or older +Future DatabaseContext::getClusterProtocol(Optional expectedVersion) { + return getClusterProtocolImpl(coordinator, expectedVersion); } uint32_t Transaction::getSize() { @@ -5375,9 +5687,8 @@ void Transaction::checkDeferredError() const { Reference Transaction::createTrLogInfoProbabilistically(const Database& cx) { if (!cx->isError()) { - double clientSamplingProbability = std::isinf(cx->clientInfo->get().clientTxnInfoSampleRate) - ? CLIENT_KNOBS->CSI_SAMPLING_PROBABILITY - : cx->clientInfo->get().clientTxnInfoSampleRate; + double clientSamplingProbability = GlobalConfig::globalConfig().get( + fdbClientInfoTxnSampleRate, CLIENT_KNOBS->CSI_SAMPLING_PROBABILITY); if (((networkOptions.logClientInfo.present() && networkOptions.logClientInfo.get()) || BUGGIFY) && deterministicRandom()->random01() < clientSamplingProbability && (!g_network->isSimulated() || !g_simulator.speedUpSimulation)) { @@ -5537,7 +5848,7 @@ ACTOR static Future rebootWorkerActor(DatabaseContext* cx, ValueRef add state std::map> address_interface; if (!cx->getConnectionFile()) return 0; - Standalone kvs = wait(getWorkerInterfaces(cx->getConnectionFile())); + RangeResult kvs = wait(getWorkerInterfaces(cx->getConnectionFile())); ASSERT(!kvs.more); // Note: reuse this knob from fdbcli, change it if necessary Reference connectLock(new FlowLock(CLIENT_KNOBS->CLI_CONNECT_PARALLELISM)); diff --git a/fdbclient/NativeAPI.actor.h b/fdbclient/NativeAPI.actor.h index af07e5fe32..c26fb95ec7 100644 --- a/fdbclient/NativeAPI.actor.h +++ b/fdbclient/NativeAPI.actor.h @@ -76,11 +76,15 @@ class Database { public: enum { API_VERSION_LATEST = -1 }; + // Creates a database object that represents a connection to a cluster + // This constructor uses a preallocated DatabaseContext that may have been created + // on another thread static Database createDatabase(Reference connFile, int apiVersion, bool internal = true, LocalityData const& clientLocality = LocalityData(), DatabaseContext* preallocatedDb = nullptr); + static Database createDatabase(std::string connFileName, int apiVersion, bool internal = true, @@ -248,30 +252,30 @@ public: [[nodiscard]] Future watch(Reference watch); [[nodiscard]] Future getKey(const KeySelector& key, bool snapshot = false); // Future< Optional > get( const KeySelectorRef& key ); - [[nodiscard]] Future> getRange(const KeySelector& begin, - const KeySelector& end, - int limit, - bool snapshot = false, - bool reverse = false); - [[nodiscard]] Future> getRange(const KeySelector& begin, - const KeySelector& end, - GetRangeLimits limits, - bool snapshot = false, - bool reverse = false); - [[nodiscard]] Future> getRange(const KeyRange& keys, - int limit, - bool snapshot = false, - bool reverse = false) { + [[nodiscard]] Future getRange(const KeySelector& begin, + const KeySelector& end, + int limit, + bool snapshot = false, + bool reverse = false); + [[nodiscard]] Future getRange(const KeySelector& begin, + const KeySelector& end, + GetRangeLimits limits, + bool snapshot = false, + bool reverse = false); + [[nodiscard]] Future getRange(const KeyRange& keys, + int limit, + bool snapshot = false, + bool reverse = false) { return getRange(KeySelector(firstGreaterOrEqual(keys.begin), keys.arena()), KeySelector(firstGreaterOrEqual(keys.end), keys.arena()), limit, snapshot, reverse); } - [[nodiscard]] Future> getRange(const KeyRange& keys, - GetRangeLimits limits, - bool snapshot = false, - bool reverse = false) { + [[nodiscard]] Future getRange(const KeyRange& keys, + GetRangeLimits limits, + bool snapshot = false, + bool reverse = false) { return getRange(KeySelector(firstGreaterOrEqual(keys.begin), keys.arena()), KeySelector(firstGreaterOrEqual(keys.end), keys.arena()), limits, @@ -402,8 +406,6 @@ ACTOR Future snapCreate(Database cx, Standalone snapCmd, UID sn // Checks with Data Distributor that it is safe to mark all servers in exclusions as failed ACTOR Future checkSafeExclusions(Database cx, vector exclusions); -ACTOR Future getCoordinatorProtocols(Reference f); - inline uint64_t getWriteOperationCost(uint64_t bytes) { return bytes / std::max(1, CLIENT_KNOBS->WRITE_COST_BYTE_FACTOR) + 1; } diff --git a/fdbclient/RYWIterator.cpp b/fdbclient/RYWIterator.cpp index fd3eec35c7..cfd233e3a2 100644 --- a/fdbclient/RYWIterator.cpp +++ b/fdbclient/RYWIterator.cpp @@ -42,7 +42,7 @@ const RYWIterator::SEGMENT_TYPE RYWIterator::typeMap[12] = { }; RYWIterator::SEGMENT_TYPE RYWIterator::type() const { - if (is_unreadable()) + if (is_unreadable() && !bypassUnreadable) throw accessed_unreadable(); return typeMap[writes.type() * 3 + cache.type()]; @@ -72,7 +72,7 @@ ExtStringRef RYWIterator::endKey() { } const KeyValueRef* RYWIterator::kv(Arena& arena) { - if (is_unreadable()) + if (is_unreadable() && !bypassUnreadable) throw accessed_unreadable(); if (writes.is_unmodified_range()) { @@ -347,8 +347,9 @@ void testSnapshotCache() { } /* -ACTOR Standalone getRange( Transaction* tr, KeySelector begin, KeySelector end, SnapshotCache* cache, -WriteMap* writes, GetRangeLimits limits ) { RYWIterator it(cache, writes); RYWIterator itEnd(cache, writes); +ACTOR RangeResult getRange( Transaction* tr, KeySelector begin, KeySelector end, SnapshotCache* cache, +WriteMap* writes, GetRangeLimits limits ) { + RYWIterator it(cache, writes); RYWIterator itEnd(cache, writes); resolveKeySelectorFromCache( begin, it ); resolveKeySelectorFromCache( end, itEnd ); @@ -362,9 +363,8 @@ WriteMap* writes, GetRangeLimits limits ) { RYWIterator it(cache, writes); RYWIt ucEnd.skipUncached(itEnd); state KeySelector read_end = ucEnd==itEnd ? end : -firstGreaterOrEqual(ucEnd.endKey().toStandaloneStringRef()); Standalone snapshot_read = wait( -tr->getRange( begin, read_end, limits, false, false ) ); cache->insert( getKnownKeyRange( snapshot_read, begin, read_end -), snapshot_read ); +firstGreaterOrEqual(ucEnd.endKey().toStandaloneStringRef()); RangeResult snapshot_read = wait(tr->getRange( begin, +read_end, limits, false, false ) ); cache->insert( getKnownKeyRange( snapshot_read, begin, read_end), snapshot_read ); // TODO: Is there a more efficient way to deal with invalidation? it = itEnd = RYWIterator( cache, writes ); diff --git a/fdbclient/RYWIterator.h b/fdbclient/RYWIterator.h index e28b11c033..8bc9091fe2 100644 --- a/fdbclient/RYWIterator.h +++ b/fdbclient/RYWIterator.h @@ -28,7 +28,7 @@ class RYWIterator { public: RYWIterator(SnapshotCache* snapshotCache, WriteMap* writeMap) - : cache(snapshotCache), writes(writeMap), begin_key_cmp(0), end_key_cmp(0) {} + : cache(snapshotCache), writes(writeMap), begin_key_cmp(0), end_key_cmp(0), bypassUnreadable(false) {} enum SEGMENT_TYPE { UNKNOWN_RANGE, EMPTY_RANGE, KV }; static const SEGMENT_TYPE typeMap[12]; @@ -59,6 +59,8 @@ public: void skipContiguousBack(KeyRef key); + void bypassUnreadableProtection() { bypassUnreadable = true; } + WriteMap::iterator& extractWriteMapIterator(); // Really this should return an iterator by value, but for performance it's convenient to actually grab the internal // one. Consider copying the return value if performance isn't critical. If you modify the returned iterator, it @@ -72,6 +74,8 @@ private: SnapshotCache::iterator cache; WriteMap::iterator writes; KeyValueRef temp; + bool bypassUnreadable; // When set, allows read from sections of keyspace that have become unreadable because of + // versionstamp operations void updateCmp(); }; diff --git a/fdbclient/ReadYourWrites.actor.cpp b/fdbclient/ReadYourWrites.actor.cpp index a66ce63986..4db07f527b 100644 --- a/fdbclient/ReadYourWrites.actor.cpp +++ b/fdbclient/ReadYourWrites.actor.cpp @@ -71,7 +71,7 @@ public: : begin(begin), end(end), limits(limits) {} KeySelector begin, end; GetRangeLimits limits; - typedef Standalone Result; + using Result = RangeResult; }; // read() Performs a read (get, getKey, getRange, etc), in the context of the given transaction. Snapshot or RYW @@ -84,6 +84,9 @@ public: static Future> read(ReadYourWritesTransaction* ryw, GetValueReq read, Iter* it) { // This overload is required to provide postcondition: it->extractWriteMapIterator().segmentContains(read.key) + if (ryw->options.bypassUnreadable) { + it->bypassUnreadableProtection(); + } it->skip(read.key); state bool dependent = it->is_dependent(); if (it->is_kv()) { @@ -126,7 +129,7 @@ public: ACTOR template static Future read(ReadYourWritesTransaction* ryw, GetKeyReq read, Iter* it) { if (read.key.offset > 0) { - Standalone result = + RangeResult result = wait(getRangeValue(ryw, read.key, firstGreaterOrEqual(ryw->getMaxReadKey()), GetRangeLimits(1), it)); if (result.readToBegin) return allKeys.begin; @@ -135,7 +138,7 @@ public: return result[0].key; } else { read.key.offset++; - Standalone result = + RangeResult result = wait(getRangeValueBack(ryw, firstGreaterOrEqual(allKeys.begin), read.key, GetRangeLimits(1), it)); if (result.readThroughEnd) return ryw->getMaxReadKey(); @@ -146,12 +149,12 @@ public: }; template - static Future> read(ReadYourWritesTransaction* ryw, GetRangeReq read, Iter* it) { + static Future read(ReadYourWritesTransaction* ryw, GetRangeReq read, Iter* it) { return getRangeValue(ryw, read.begin, read.end, read.limits, it); }; template - static Future> read(ReadYourWritesTransaction* ryw, GetRangeReq read, Iter* it) { + static Future read(ReadYourWritesTransaction* ryw, GetRangeReq read, Iter* it) { return getRangeValueBack(ryw, read.begin, read.end, read.limits, it); }; @@ -171,9 +174,7 @@ public: } ACTOR template - static Future> readThrough(ReadYourWritesTransaction* ryw, - GetRangeReq read, - bool snapshot) { + static Future readThrough(ReadYourWritesTransaction* ryw, GetRangeReq read, bool snapshot) { if (Reverse && read.end.offset > 1) { // FIXME: Optimistically assume that this will not run into the system keys, and only reissue if the result // actually does. @@ -184,16 +185,15 @@ public: read.end = KeySelector(firstGreaterOrEqual(key), key.arena()); } - Standalone v = wait(ryw->tr.getRange(read.begin, read.end, read.limits, snapshot, Reverse)); + RangeResult v = wait(ryw->tr.getRange(read.begin, read.end, read.limits, snapshot, Reverse)); KeyRef maxKey = ryw->getMaxReadKey(); if (v.size() > 0) { if (!Reverse && v[v.size() - 1].key >= maxKey) { - state Standalone _v = v; + state RangeResult _v = v; int i = _v.size() - 2; for (; i >= 0 && _v[i].key >= maxKey; --i) { } - return Standalone(RangeResultRef(VectorRef(&_v[0], i + 1), false), - _v.arena()); + return RangeResult(RangeResultRef(VectorRef(&_v[0], i + 1), false), _v.arena()); } } @@ -230,7 +230,7 @@ public: static void addConflictRange(ReadYourWritesTransaction* ryw, GetRangeReq read, WriteMap::iterator& it, - Standalone const& result) { + RangeResult const& result) { KeyRef rangeBegin, rangeEnd; bool endInArena = false; @@ -265,7 +265,7 @@ public: static void addConflictRange(ReadYourWritesTransaction* ryw, GetRangeReq read, WriteMap::iterator& it, - Standalone const& result) { + RangeResult const& result) { KeyRef rangeBegin, rangeEnd; bool endInArena = false; @@ -527,14 +527,14 @@ public: // TODO: read to begin, read through end flags for result ACTOR template - static Future> getRangeValue(ReadYourWritesTransaction* ryw, - KeySelector begin, - KeySelector end, - GetRangeLimits limits, - Iter* pit) { + static Future getRangeValue(ReadYourWritesTransaction* ryw, + KeySelector begin, + KeySelector end, + GetRangeLimits limits, + Iter* pit) { state Iter& it(*pit); state Iter itEnd(*pit); - state Standalone result; + state RangeResult result; state int64_t additionalRows = 0; state int itemsPastEnd = 0; state int requestCount = 0; @@ -690,8 +690,7 @@ public: //TraceEvent("RYWIssuing", randomID).detail("Begin", read_begin.toString()).detail("End", read_end.toString()).detail("Bytes", requestLimit.bytes).detail("Rows", requestLimit.rows).detail("Limits", limits.bytes).detail("Reached", limits.isReached()).detail("RequestCount", requestCount).detail("SingleClears", singleClears).detail("UcEnd", ucEnd.beginKey()).detail("MinRows", requestLimit.minRows); additionalRows = 0; - Standalone snapshot_read = - wait(ryw->tr.getRange(read_begin, read_end, requestLimit, true, false)); + RangeResult snapshot_read = wait(ryw->tr.getRange(read_begin, read_end, requestLimit, true, false)); KeyRangeRef range = getKnownKeyRange(snapshot_read, read_begin, read_end, ryw->arena); //TraceEvent("RYWCacheInsert", randomID).detail("Range", range).detail("ExpectedSize", snapshot_read.expectedSize()).detail("Rows", snapshot_read.size()).detail("Results", snapshot_read).detail("More", snapshot_read.more).detail("ReadToBegin", snapshot_read.readToBegin).detail("ReadThroughEnd", snapshot_read.readThroughEnd).detail("ReadThrough", snapshot_read.readThrough); @@ -829,14 +828,14 @@ public: } ACTOR template - static Future> getRangeValueBack(ReadYourWritesTransaction* ryw, - KeySelector begin, - KeySelector end, - GetRangeLimits limits, - Iter* pit) { + static Future getRangeValueBack(ReadYourWritesTransaction* ryw, + KeySelector begin, + KeySelector end, + GetRangeLimits limits, + Iter* pit) { state Iter& it(*pit); state Iter itEnd(*pit); - state Standalone result; + state RangeResult result; state int64_t additionalRows = 0; state int itemsPastBegin = 0; state int requestCount = 0; @@ -994,8 +993,7 @@ public: //TraceEvent("RYWIssuing", randomID).detail("Begin", read_begin.toString()).detail("End", read_end.toString()).detail("Bytes", requestLimit.bytes).detail("Rows", requestLimit.rows).detail("Limits", limits.bytes).detail("Reached", limits.isReached()).detail("RequestCount", requestCount).detail("SingleClears", singleClears).detail("UcEnd", ucEnd.beginKey()).detail("MinRows", requestLimit.minRows); additionalRows = 0; - Standalone snapshot_read = - wait(ryw->tr.getRange(read_begin, read_end, requestLimit, true, true)); + RangeResult snapshot_read = wait(ryw->tr.getRange(read_begin, read_end, requestLimit, true, true)); KeyRangeRef range = getKnownKeyRangeBack(snapshot_read, read_begin, read_end, ryw->arena); //TraceEvent("RYWCacheInsert", randomID).detail("Range", range).detail("ExpectedSize", snapshot_read.expectedSize()).detail("Rows", snapshot_read.size()).detail("Results", snapshot_read).detail("More", snapshot_read.more).detail("ReadToBegin", snapshot_read.readToBegin).detail("ReadThroughEnd", snapshot_read.readThroughEnd).detail("ReadThrough", snapshot_read.readThrough); @@ -1329,7 +1327,7 @@ ACTOR Future> getJSON(Database db) { return getValueFromJSON(statusObj); } -ACTOR Future> getWorkerInterfaces(Reference clusterFile) { +ACTOR Future getWorkerInterfaces(Reference clusterFile) { state Reference>> clusterInterface(new AsyncVar>); state Future leaderMon = monitorLeader(clusterFile, clusterInterface); @@ -1340,7 +1338,7 @@ ACTOR Future> getWorkerInterfaces(Referenceget().get().getClientWorkers.getReply(GetClientWorkersRequest())) : Never())) { - Standalone result; + RangeResult result; for (auto& it : workers) { result.push_back_deep( result.arena(), @@ -1434,11 +1432,11 @@ Future ReadYourWritesTransaction::getKey(const KeySelector& key, bool snaps return result; } -Future> ReadYourWritesTransaction::getRange(KeySelector begin, - KeySelector end, - GetRangeLimits limits, - bool snapshot, - bool reverse) { +Future ReadYourWritesTransaction::getRange(KeySelector begin, + KeySelector end, + GetRangeLimits limits, + bool snapshot, + bool reverse) { if (getDatabase()->apiVersionAtLeast(630)) { if (specialKeys.contains(begin.getKey()) && specialKeys.begin <= end.getKey() && end.getKey() <= specialKeys.end) { @@ -1450,7 +1448,7 @@ Future> ReadYourWritesTransaction::getRange(KeySelect if (tr.getDatabase().getPtr() && tr.getDatabase()->getConnectionFile()) { return getWorkerInterfaces(tr.getDatabase()->getConnectionFile()); } else { - return Standalone(); + return RangeResult(); } } } @@ -1469,7 +1467,7 @@ Future> ReadYourWritesTransaction::getRange(KeySelect // This optimization prevents nullptr operations from being added to the conflict range if (limits.isReached()) { TEST(true); // RYW range read limit 0 - return Standalone(); + return RangeResult(); } if (!limits.isValid()) @@ -1483,10 +1481,10 @@ Future> ReadYourWritesTransaction::getRange(KeySelect if (begin.offset >= end.offset && begin.getKey() >= end.getKey()) { TEST(true); // RYW range inverted - return Standalone(); + return RangeResult(); } - Future> result = + Future result = reverse ? RYWImpl::readWithConflictRange(this, RYWImpl::GetRangeReq(begin, end, limits), snapshot) : RYWImpl::readWithConflictRange(this, RYWImpl::GetRangeReq(begin, end, limits), snapshot); @@ -1494,11 +1492,11 @@ Future> ReadYourWritesTransaction::getRange(KeySelect return result; } -Future> ReadYourWritesTransaction::getRange(const KeySelector& begin, - const KeySelector& end, - int limit, - bool snapshot, - bool reverse) { +Future ReadYourWritesTransaction::getRange(const KeySelector& begin, + const KeySelector& end, + int limit, + bool snapshot, + bool reverse) { return getRange(begin, end, GetRangeLimits(limit), snapshot, reverse); } @@ -1743,11 +1741,11 @@ void ReadYourWritesTransaction::setToken(uint64_t token) { tr.setToken(token); } -Standalone ReadYourWritesTransaction::getReadConflictRangeIntersecting(KeyRangeRef kr) { +RangeResult ReadYourWritesTransaction::getReadConflictRangeIntersecting(KeyRangeRef kr) { TEST(true); // Special keys read conflict range ASSERT(readConflictRangeKeysRange.contains(kr)); ASSERT(!tr.options.checkWritesEnabled); - Standalone result; + RangeResult result; if (!options.readYourWritesDisabled) { kr = kr.removePrefix(readConflictRangeKeysRange.begin); auto iter = readConflicts.rangeContainingKeyBefore(kr.begin); @@ -1785,10 +1783,10 @@ Standalone ReadYourWritesTransaction::getReadConflictRangeInters return result; } -Standalone ReadYourWritesTransaction::getWriteConflictRangeIntersecting(KeyRangeRef kr) { +RangeResult ReadYourWritesTransaction::getWriteConflictRangeIntersecting(KeyRangeRef kr) { TEST(true); // Special keys write conflict range ASSERT(writeConflictRangeKeysRange.contains(kr)); - Standalone result; + RangeResult result; // Memory owned by result CoalescedKeyRefRangeMap writeConflicts{ LiteralStringRef("0"), specialKeys.end }; @@ -2241,6 +2239,10 @@ void ReadYourWritesTransaction::setOptionImpl(FDBTransactionOptions::Option opti validateOptionValue(value, false); options.specialKeySpaceChangeConfiguration = true; break; + case FDBTransactionOptions::BYPASS_UNREADABLE: + validateOptionValue(value, false); + options.bypassUnreadable = true; + break; default: break; } diff --git a/fdbclient/ReadYourWrites.h b/fdbclient/ReadYourWrites.h index f341a38524..65bb972da9 100644 --- a/fdbclient/ReadYourWrites.h +++ b/fdbclient/ReadYourWrites.h @@ -43,6 +43,7 @@ struct ReadYourWritesTransactionOptions { double timeoutInSeconds; int maxRetries; int snapshotRywEnabled; + bool bypassUnreadable : 1; ReadYourWritesTransactionOptions() {} explicit ReadYourWritesTransactionOptions(Transaction const& tr); @@ -92,10 +93,10 @@ public: snapshot, reverse); } - Future> getRange(const KeyRange& keys, - GetRangeLimits limits, - bool snapshot = false, - bool reverse = false) { + Future getRange(const KeyRange& keys, + GetRangeLimits limits, + bool snapshot = false, + bool reverse = false) { return getRange(KeySelector(firstGreaterOrEqual(keys.begin), keys.arena()), KeySelector(firstGreaterOrEqual(keys.end), keys.arena()), limits, @@ -162,9 +163,9 @@ public: void setToken(uint64_t token); // Read from the special key space readConflictRangeKeysRange - Standalone getReadConflictRangeIntersecting(KeyRangeRef kr); + RangeResult getReadConflictRangeIntersecting(KeyRangeRef kr); // Read from the special key space writeConflictRangeKeysRange - Standalone getWriteConflictRangeIntersecting(KeyRangeRef kr); + RangeResult getWriteConflictRangeIntersecting(KeyRangeRef kr); bool specialKeySpaceRelaxed() const { return options.specialKeySpaceRelaxed; } bool specialKeySpaceChangeConfiguration() const { return options.specialKeySpaceChangeConfiguration; } diff --git a/fdbclient/Schemas.cpp b/fdbclient/Schemas.cpp index 866ea4441e..514866fe83 100644 --- a/fdbclient/Schemas.cpp +++ b/fdbclient/Schemas.cpp @@ -47,6 +47,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( "storage", "transaction", "resolution", + "stateless", "commit_proxy", "grv_proxy", "master", @@ -143,6 +144,16 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( "counter":0, "roughness":0.0 }, + "fetched_versions":{ + "hz":0.0, + "counter":0, + "roughness":0.0 + }, + "fetches_from_logs":{ + "hz":0.0, + "counter":0, + "roughness":0.0 + }, "grv_latency_statistics":{ "default":{ "count":0, @@ -155,6 +166,18 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( "p95":0.0, "p99":0.0, "p99.9":0.0 + }, + "batch":{ + "count":0, + "min":0.0, + "max":0.0, + "median":0.0, + "mean":0.0, + "p25":0.0, + "p90":0.0, + "p95":0.0, + "p99":0.0, + "p99.9":0.0 } }, "read_latency_statistics":{ @@ -181,6 +204,18 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( "p99":0.0, "p99.9":0.0 }, + "commit_batching_window_size":{ + "count":0, + "min":0.0, + "max":0.0, + "median":0.0, + "mean":0.0, + "p25":0.0, + "p90":0.0, + "p95":0.0, + "p99":0.0, + "p99.9":0.0 + }, "grv_latency_bands":{ "$map": 1 }, @@ -396,6 +431,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( "seconds" : 1.0, "versions" : 1000000 }, + "active_tss_count":0, "degraded_processes":0, "database_available":true, "database_lock_state": { @@ -623,6 +659,10 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( "data_distribution_disabled_for_rebalance":true, "data_distribution_disabled":true, "active_primary_dc":"pv", + "bounce_impact":{ + "can_clean_bounce":true, + "reason":"" + }, "configuration":{ "log_anti_quorum":0, "log_replicas":2, @@ -690,6 +730,19 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( "memory-2", "memory-radixtree-beta" ]}, + "tss_count":1, + "tss_storage_engine":{ + "$enum":[ + "ssd", + "ssd-1", + "ssd-2", + "ssd-redwood-experimental", + "ssd-rocksdb-experimental", + "memory", + "memory-1", + "memory-2", + "memory-radixtree-beta" + ]}, "coordinators_count":1, "excluded_servers":[ { @@ -702,7 +755,8 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( "auto_logs":3, "commit_proxies":5, "grv_proxies":1, - "backup_worker_enabled":1 + "backup_worker_enabled":1, + "perpetual_storage_wiggle":0 }, "data":{ "least_operating_space_bytes_log_server":0, @@ -762,7 +816,8 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( } } ], - "least_operating_space_bytes_storage_server":0 + "least_operating_space_bytes_storage_server":0, + "max_machine_failures_without_losing_data":0 }, "machines":{ "$map":{ diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index a54d6cafc3..ec14981151 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -19,8 +19,6 @@ */ #include "fdbclient/ServerKnobs.h" -#include "fdbrpc/Locality.h" -#include #define init(knob, value) initKnob(knob, value, #knob) @@ -80,7 +78,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi init( TLOG_SPILL_REFERENCE_MAX_BYTES_PER_BATCH, 16<<10 ); if ( randomize && BUGGIFY ) TLOG_SPILL_REFERENCE_MAX_BYTES_PER_BATCH = 500; init( DISK_QUEUE_FILE_EXTENSION_BYTES, 10<<20 ); // BUGGIFYd per file within the DiskQueue init( DISK_QUEUE_FILE_SHRINK_BYTES, 100<<20 ); // BUGGIFYd per file within the DiskQueue - init( DISK_QUEUE_MAX_TRUNCATE_BYTES, 2<<30 ); if ( randomize && BUGGIFY ) DISK_QUEUE_MAX_TRUNCATE_BYTES = 0; + init( DISK_QUEUE_MAX_TRUNCATE_BYTES, 2LL<<30 ); if ( randomize && BUGGIFY ) DISK_QUEUE_MAX_TRUNCATE_BYTES = 0; init( TLOG_DEGRADED_DURATION, 5.0 ); init( MAX_CACHE_VERSIONS, 10e6 ); init( TLOG_IGNORE_POP_AUTO_ENABLE_DELAY, 300.0 ); @@ -214,6 +212,9 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi init( SERVER_LIST_DELAY, 1.0 ); init( RECRUITMENT_IDLE_DELAY, 1.0 ); init( STORAGE_RECRUITMENT_DELAY, 10.0 ); + init( TSS_HACK_IDENTITY_MAPPING, false ); // THIS SHOULD NEVER BE SET IN PROD. Only for performance testing + init( TSS_RECRUITMENT_TIMEOUT, 3*STORAGE_RECRUITMENT_DELAY ); if (randomize && BUGGIFY ) TSS_RECRUITMENT_TIMEOUT = 1.0; // Super low timeout should cause tss recruitments to fail + init( TSS_DD_KILL_INTERVAL, 60.0 ); if (randomize && BUGGIFY ) TSS_DD_KILL_INTERVAL = 1.0; // May kill all TSS quickly init( DATA_DISTRIBUTION_LOGGING_INTERVAL, 5.0 ); init( DD_ENABLED_CHECK_DELAY, 1.0 ); init( DD_STALL_CHECK_DELAY, 0.4 ); //Must be larger than 2*MAX_BUGGIFIED_DELAY @@ -613,6 +614,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi //Worker init( WORKER_LOGGING_INTERVAL, 5.0 ); init( HEAP_PROFILER_INTERVAL, 30.0 ); + init( UNKNOWN_CC_TIMEOUT, 600.0 ); init( DEGRADED_RESET_INTERVAL, 24*60*60 ); if ( randomize && BUGGIFY ) DEGRADED_RESET_INTERVAL = 10; init( DEGRADED_WARNING_LIMIT, 1 ); init( DEGRADED_WARNING_RESET_DELAY, 7*24*60*60 ); @@ -627,6 +629,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi // Coordination init( COORDINATED_STATE_ONCONFLICT_POLL_INTERVAL, 1.0 ); if( randomize && BUGGIFY ) COORDINATED_STATE_ONCONFLICT_POLL_INTERVAL = 10.0; + init( ENABLE_CROSS_CLUSTER_SUPPORT, true ); if( randomize && BUGGIFY ) ENABLE_CROSS_CLUSTER_SUPPORT = false; // Buggification init( BUGGIFIED_EVENTUAL_CONSISTENCY, 1.0 ); @@ -702,7 +705,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi init( REDWOOD_DEFAULT_PAGE_SIZE, 4096 ); init( REDWOOD_KVSTORE_CONCURRENT_READS, 64 ); init( REDWOOD_COMMIT_CONCURRENT_READS, 64 ); - init( REDWOOD_PAGE_REBUILD_FILL_FACTOR, 0.66 ); + init( REDWOOD_PAGE_REBUILD_MAX_SLACK, 0.33 ); init( REDWOOD_LAZY_CLEAR_BATCH_SIZE_PAGES, 10 ); init( REDWOOD_LAZY_CLEAR_MIN_PAGES, 0 ); init( REDWOOD_LAZY_CLEAR_MAX_PAGES, 1e6 ); diff --git a/fdbclient/ServerKnobs.h b/fdbclient/ServerKnobs.h index 7722832c86..68a261e28e 100644 --- a/fdbclient/ServerKnobs.h +++ b/fdbclient/ServerKnobs.h @@ -22,6 +22,7 @@ #include "flow/Knobs.h" #include "fdbrpc/fdbrpc.h" +#include "fdbrpc/Locality.h" #include "fdbclient/Knobs.h" // Disk queue @@ -82,7 +83,7 @@ public: int64_t TLOG_SPILL_REFERENCE_MAX_BYTES_PER_BATCH; int64_t DISK_QUEUE_FILE_EXTENSION_BYTES; // When we grow the disk queue, by how many bytes should it grow? int64_t DISK_QUEUE_FILE_SHRINK_BYTES; // When we shrink the disk queue, by how many bytes should it shrink? - int DISK_QUEUE_MAX_TRUNCATE_BYTES; // A truncate larger than this will cause the file to be replaced instead. + int64_t DISK_QUEUE_MAX_TRUNCATE_BYTES; // A truncate larger than this will cause the file to be replaced instead. double TLOG_DEGRADED_DURATION; int64_t MAX_CACHE_VERSIONS; double TXS_POPPED_MAX_DELAY; @@ -165,6 +166,9 @@ public: double SERVER_LIST_DELAY; double RECRUITMENT_IDLE_DELAY; double STORAGE_RECRUITMENT_DELAY; + bool TSS_HACK_IDENTITY_MAPPING; + double TSS_RECRUITMENT_TIMEOUT; + double TSS_DD_KILL_INTERVAL; double DATA_DISTRIBUTION_LOGGING_INTERVAL; double DD_ENABLED_CHECK_DELAY; double DD_STALL_CHECK_DELAY; @@ -541,6 +545,7 @@ public: // Worker double WORKER_LOGGING_INTERVAL; double HEAP_PROFILER_INTERVAL; + double UNKNOWN_CC_TIMEOUT; double DEGRADED_RESET_INTERVAL; double DEGRADED_WARNING_LIMIT; double DEGRADED_WARNING_RESET_DELAY; @@ -556,6 +561,8 @@ public: // Coordination double COORDINATED_STATE_ONCONFLICT_POLL_INTERVAL; + bool ENABLE_CROSS_CLUSTER_SUPPORT; // Allow a coordinator to serve requests whose connection string does not match + // the local descriptor // Buggification double BUGGIFIED_EVENTUAL_CONSISTENCY; @@ -636,7 +643,7 @@ public: int REDWOOD_DEFAULT_PAGE_SIZE; // Page size for new Redwood files int REDWOOD_KVSTORE_CONCURRENT_READS; // Max number of simultaneous point or range reads in progress. int REDWOOD_COMMIT_CONCURRENT_READS; // Max number of concurrent reads done to support commit operations - double REDWOOD_PAGE_REBUILD_FILL_FACTOR; // When rebuilding pages, start a new page after this capacity + double REDWOOD_PAGE_REBUILD_MAX_SLACK; // When rebuilding pages, max slack to allow in page int REDWOOD_LAZY_CLEAR_BATCH_SIZE_PAGES; // Number of pages to try to pop from the lazy delete queue and process at // once int REDWOOD_LAZY_CLEAR_MIN_PAGES; // Minimum number of pages to free before ending a lazy clear cycle, unless the diff --git a/fdbclient/SnapshotCache.h b/fdbclient/SnapshotCache.h index df389eb4ab..eabd289aee 100644 --- a/fdbclient/SnapshotCache.h +++ b/fdbclient/SnapshotCache.h @@ -203,6 +203,7 @@ public: bool is_empty_range() const { return type() == EMPTY_RANGE; } bool is_dependent() const { return false; } bool is_unreadable() const { return false; } + void bypassUnreadableProtection() {} ExtStringRef beginKey() const { if (offset == 0) { diff --git a/fdbclient/SpecialKeySpace.actor.cpp b/fdbclient/SpecialKeySpace.actor.cpp index 5fb7360b0d..6b147eaa07 100644 --- a/fdbclient/SpecialKeySpace.actor.cpp +++ b/fdbclient/SpecialKeySpace.actor.cpp @@ -21,7 +21,7 @@ #include "boost/lexical_cast.hpp" #include "boost/algorithm/string.hpp" -#include "fdbclient/Knobs.h" +#include "fdbclient/GlobalConfig.actor.h" #include "fdbclient/SpecialKeySpace.actor.h" #include "flow/Arena.h" #include "flow/UnitTest.h" @@ -64,6 +64,8 @@ std::unordered_map SpecialKeySpace::moduleToB { SpecialKeySpace::MODULE::ERRORMSG, singleKeyRange(LiteralStringRef("\xff\xff/error_message")) }, { SpecialKeySpace::MODULE::CONFIGURATION, KeyRangeRef(LiteralStringRef("\xff\xff/configuration/"), LiteralStringRef("\xff\xff/configuration0")) }, + { SpecialKeySpace::MODULE::GLOBALCONFIG, + KeyRangeRef(LiteralStringRef("\xff\xff/global_config/"), LiteralStringRef("\xff\xff/global_config0")) }, { SpecialKeySpace::MODULE::TRACING, KeyRangeRef(LiteralStringRef("\xff\xff/tracing/"), LiteralStringRef("\xff\xff/tracing0")) } }; @@ -87,6 +89,12 @@ std::unordered_map SpecialKeySpace::managementApiCommandT .withPrefix(moduleToBoundary[MODULE::MANAGEMENT].begin) }, { "profile", KeyRangeRef(LiteralStringRef("profiling/"), LiteralStringRef("profiling0")) + .withPrefix(moduleToBoundary[MODULE::MANAGEMENT].begin) }, + { "maintenance", + KeyRangeRef(LiteralStringRef("maintenance/"), LiteralStringRef("maintenance0")) + .withPrefix(moduleToBoundary[MODULE::MANAGEMENT].begin) }, + { "datadistribution", + KeyRangeRef(LiteralStringRef("data_distribution/"), LiteralStringRef("data_distribution0")) .withPrefix(moduleToBoundary[MODULE::MANAGEMENT].begin) } }; @@ -94,9 +102,7 @@ std::set SpecialKeySpace::options = { "excluded/force", "failed/for std::set SpecialKeySpace::tracingOptions = { kTracingTransactionIdKey, kTracingTokenKey }; -Standalone rywGetRange(ReadYourWritesTransaction* ryw, - const KeyRangeRef& kr, - const Standalone& res); +RangeResult rywGetRange(ReadYourWritesTransaction* ryw, const KeyRangeRef& kr, const RangeResult& res); // This function will move the given KeySelector as far as possible to the standard form: // orEqual == false && offset == 1 (Standard form) @@ -107,13 +113,13 @@ Standalone rywGetRange(ReadYourWritesTransaction* ryw, ACTOR Future moveKeySelectorOverRangeActor(const SpecialKeyRangeReadImpl* skrImpl, ReadYourWritesTransaction* ryw, KeySelector* ks, - Optional>* cache) { + Optional* cache) { ASSERT(!ks->orEqual); // should be removed before calling ASSERT(ks->offset != 1); // never being called if KeySelector is already normalized state Key startKey(skrImpl->getKeyRange().begin); state Key endKey(skrImpl->getKeyRange().end); - state Standalone result; + state RangeResult result; if (ks->offset < 1) { // less than the given key @@ -134,10 +140,10 @@ ACTOR Future moveKeySelectorOverRangeActor(const SpecialKeyRangeReadImpl* if (skrImpl->isAsync()) { const SpecialKeyRangeAsyncImpl* ptr = dynamic_cast(skrImpl); - Standalone result_ = wait(ptr->getRange(ryw, KeyRangeRef(startKey, endKey), cache)); + RangeResult result_ = wait(ptr->getRange(ryw, KeyRangeRef(startKey, endKey), cache)); result = result_; } else { - Standalone result_ = wait(skrImpl->getRange(ryw, KeyRangeRef(startKey, endKey))); + RangeResult result_ = wait(skrImpl->getRange(ryw, KeyRangeRef(startKey, endKey))); result = result_; } @@ -186,8 +192,8 @@ ACTOR Future normalizeKeySelectorActor(SpecialKeySpace* sks, KeySelector* ks, KeyRangeRef boundary, int* actualOffset, - Standalone* result, - Optional>* cache) { + RangeResult* result, + Optional* cache) { // If offset < 1, where we need to move left, iter points to the range containing at least one smaller key // (It's a wasting of time to walk through the range whose begin key is same as ks->key) // (rangeContainingKeyBefore itself handles the case where ks->key == Key()) @@ -257,15 +263,15 @@ void SpecialKeySpace::modulesBoundaryInit() { } } -ACTOR Future> SpecialKeySpace::checkRYWValid(SpecialKeySpace* sks, - ReadYourWritesTransaction* ryw, - KeySelector begin, - KeySelector end, - GetRangeLimits limits, - bool reverse) { +ACTOR Future SpecialKeySpace::checkRYWValid(SpecialKeySpace* sks, + ReadYourWritesTransaction* ryw, + KeySelector begin, + KeySelector end, + GetRangeLimits limits, + bool reverse) { ASSERT(ryw); choose { - when(Standalone result = + when(RangeResult result = wait(SpecialKeySpace::getRangeAggregationActor(sks, ryw, begin, end, limits, reverse))) { return result; } @@ -273,22 +279,22 @@ ACTOR Future> SpecialKeySpace::checkRYWValid(SpecialK } } -ACTOR Future> SpecialKeySpace::getRangeAggregationActor(SpecialKeySpace* sks, - ReadYourWritesTransaction* ryw, - KeySelector begin, - KeySelector end, - GetRangeLimits limits, - bool reverse) { +ACTOR Future SpecialKeySpace::getRangeAggregationActor(SpecialKeySpace* sks, + ReadYourWritesTransaction* ryw, + KeySelector begin, + KeySelector end, + GetRangeLimits limits, + bool reverse) { // This function handles ranges which cover more than one keyrange and aggregates all results // KeySelector, GetRangeLimits and reverse are all handled here - state Standalone result; - state Standalone pairs; + state RangeResult result; + state RangeResult pairs; state RangeMap::iterator iter; state int actualBeginOffset; state int actualEndOffset; state KeyRangeRef moduleBoundary; // used to cache result from potential first read - state Optional> cache; + state Optional cache; if (ryw->specialKeySpaceRelaxed()) { moduleBoundary = sks->range; @@ -337,10 +343,10 @@ ACTOR Future> SpecialKeySpace::getRangeAggregationAct KeyRef keyEnd = kr.contains(end.getKey()) ? end.getKey() : kr.end; if (iter->value()->isAsync() && cache.present()) { const SpecialKeyRangeAsyncImpl* ptr = dynamic_cast(iter->value()); - Standalone pairs_ = wait(ptr->getRange(ryw, KeyRangeRef(keyStart, keyEnd), &cache)); + RangeResult pairs_ = wait(ptr->getRange(ryw, KeyRangeRef(keyStart, keyEnd), &cache)); pairs = pairs_; } else { - Standalone pairs_ = wait(iter->value()->getRange(ryw, KeyRangeRef(keyStart, keyEnd))); + RangeResult pairs_ = wait(iter->value()->getRange(ryw, KeyRangeRef(keyStart, keyEnd))); pairs = pairs_; } result.arena().dependsOn(pairs.arena()); @@ -368,10 +374,10 @@ ACTOR Future> SpecialKeySpace::getRangeAggregationAct KeyRef keyEnd = kr.contains(end.getKey()) ? end.getKey() : kr.end; if (iter->value()->isAsync() && cache.present()) { const SpecialKeyRangeAsyncImpl* ptr = dynamic_cast(iter->value()); - Standalone pairs_ = wait(ptr->getRange(ryw, KeyRangeRef(keyStart, keyEnd), &cache)); + RangeResult pairs_ = wait(ptr->getRange(ryw, KeyRangeRef(keyStart, keyEnd), &cache)); pairs = pairs_; } else { - Standalone pairs_ = wait(iter->value()->getRange(ryw, KeyRangeRef(keyStart, keyEnd))); + RangeResult pairs_ = wait(iter->value()->getRange(ryw, KeyRangeRef(keyStart, keyEnd))); pairs = pairs_; } result.arena().dependsOn(pairs.arena()); @@ -394,17 +400,17 @@ ACTOR Future> SpecialKeySpace::getRangeAggregationAct return result; } -Future> SpecialKeySpace::getRange(ReadYourWritesTransaction* ryw, - KeySelector begin, - KeySelector end, - GetRangeLimits limits, - bool reverse) { +Future SpecialKeySpace::getRange(ReadYourWritesTransaction* ryw, + KeySelector begin, + KeySelector end, + GetRangeLimits limits, + bool reverse) { // validate limits here if (!limits.isValid()) return range_limits_invalid(); if (limits.isReached()) { TEST(true); // read limit 0 - return Standalone(); + return RangeResult(); } // make sure orEqual == false begin.removeOrEqual(begin.arena()); @@ -412,7 +418,7 @@ Future> SpecialKeySpace::getRange(ReadYourWritesTrans if (begin.offset >= end.offset && begin.getKey() >= end.getKey()) { TEST(true); // range inverted - return Standalone(); + return RangeResult(); } return checkRYWValid(this, ryw, begin, end, limits, reverse); @@ -422,11 +428,11 @@ ACTOR Future> SpecialKeySpace::getActor(SpecialKeySpace* sks, ReadYourWritesTransaction* ryw, KeyRef key) { // use getRange to workaround this - Standalone result = wait(sks->getRange(ryw, - KeySelector(firstGreaterOrEqual(key)), - KeySelector(firstGreaterOrEqual(keyAfter(key))), - GetRangeLimits(CLIENT_KNOBS->TOO_MANY), - false)); + RangeResult result = wait(sks->getRange(ryw, + KeySelector(firstGreaterOrEqual(key)), + KeySelector(firstGreaterOrEqual(keyAfter(key))), + GetRangeLimits(CLIENT_KNOBS->TOO_MANY), + false)); ASSERT(result.size() <= 1); if (result.size()) { return Optional(result[0].value); @@ -544,16 +550,20 @@ ACTOR Future commitActor(SpecialKeySpace* sks, ReadYourWritesTransaction* state RangeMap>, KeyRangeRef>::Ranges ranges = ryw->getSpecialKeySpaceWriteMap().containedRanges(specialKeys); state RangeMap>, KeyRangeRef>::iterator iter = ranges.begin(); - state std::set writeModulePtrs; + state std::vector writeModulePtrs; + std::unordered_set deduplicate; while (iter != ranges.end()) { std::pair> entry = iter->value(); if (entry.first) { auto modulePtr = sks->getRWImpls().rangeContaining(iter->begin())->value(); - writeModulePtrs.insert(modulePtr); + auto [_, inserted] = deduplicate.insert(modulePtr); + if (inserted) { + writeModulePtrs.push_back(modulePtr); + } } ++iter; } - state std::set::const_iterator it; + state std::vector::const_iterator it; for (it = writeModulePtrs.begin(); it != writeModulePtrs.end(); ++it) { Optional msg = wait((*it)->commit(ryw)); if (msg.present()) { @@ -573,7 +583,7 @@ Future SpecialKeySpace::commit(ReadYourWritesTransaction* ryw) { SKSCTestImpl::SKSCTestImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {} -Future> SKSCTestImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { +Future SKSCTestImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { ASSERT(range.contains(kr)); auto resultFuture = ryw->getRange(kr, CLIENT_KNOBS->TOO_MANY); // all keys are written to RYW, since GRV is set, the read should happen locally @@ -591,27 +601,25 @@ Future> SKSCTestImpl::commit(ReadYourWritesTransaction* ry ReadConflictRangeImpl::ReadConflictRangeImpl(KeyRangeRef kr) : SpecialKeyRangeReadImpl(kr) {} -ACTOR static Future> getReadConflictRangeImpl(ReadYourWritesTransaction* ryw, KeyRange kr) { +ACTOR static Future getReadConflictRangeImpl(ReadYourWritesTransaction* ryw, KeyRange kr) { wait(ryw->pendingReads()); return ryw->getReadConflictRangeIntersecting(kr); } -Future> ReadConflictRangeImpl::getRange(ReadYourWritesTransaction* ryw, - KeyRangeRef kr) const { +Future ReadConflictRangeImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { return getReadConflictRangeImpl(ryw, kr); } WriteConflictRangeImpl::WriteConflictRangeImpl(KeyRangeRef kr) : SpecialKeyRangeReadImpl(kr) {} -Future> WriteConflictRangeImpl::getRange(ReadYourWritesTransaction* ryw, - KeyRangeRef kr) const { +Future WriteConflictRangeImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { return ryw->getWriteConflictRangeIntersecting(kr); } ConflictingKeysImpl::ConflictingKeysImpl(KeyRangeRef kr) : SpecialKeyRangeReadImpl(kr) {} -Future> ConflictingKeysImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { - Standalone result; +Future ConflictingKeysImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { + RangeResult result; if (ryw->getTransactionInfo().conflictingKeys) { auto krMapPtr = ryw->getTransactionInfo().conflictingKeys.get(); auto beginIter = krMapPtr->rangeContaining(kr.begin); @@ -627,13 +635,13 @@ Future> ConflictingKeysImpl::getRange(ReadYourWritesT return result; } -ACTOR Future> ddMetricsGetRangeActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) { +ACTOR Future ddMetricsGetRangeActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) { loop { try { auto keys = kr.removePrefix(ddStatsRange.begin); Standalone> resultWithoutPrefix = wait( waitDataDistributionMetricsList(ryw->getDatabase(), keys, CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT)); - Standalone result; + RangeResult result; for (const auto& ddMetricsRef : resultWithoutPrefix) { // each begin key is the previous end key, thus we only encode the begin key in the result KeyRef beginKey = ddMetricsRef.beginKey.withPrefix(ddStatsRange.begin, result.arena()); @@ -661,7 +669,7 @@ ACTOR Future> ddMetricsGetRangeActor(ReadYourWritesTr DDStatsRangeImpl::DDStatsRangeImpl(KeyRangeRef kr) : SpecialKeyRangeAsyncImpl(kr) {} -Future> DDStatsRangeImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { +Future DDStatsRangeImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { return ddMetricsGetRangeActor(ryw, kr); } @@ -674,9 +682,8 @@ Key SpecialKeySpace::getManagementApiCommandOptionSpecialKey(const std::string& ManagementCommandsOptionsImpl::ManagementCommandsOptionsImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {} -Future> ManagementCommandsOptionsImpl::getRange(ReadYourWritesTransaction* ryw, - KeyRangeRef kr) const { - Standalone result; +Future ManagementCommandsOptionsImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { + RangeResult result; // Since we only have limit number of options, a brute force loop here is enough for (const auto& option : SpecialKeySpace::getManagementApiOptionsSet()) { auto key = getKeyRange().begin.withSuffix(option); @@ -718,14 +725,12 @@ Future> ManagementCommandsOptionsImpl::commit(ReadYourWrit return Optional(); } -Standalone rywGetRange(ReadYourWritesTransaction* ryw, - const KeyRangeRef& kr, - const Standalone& res) { +RangeResult rywGetRange(ReadYourWritesTransaction* ryw, const KeyRangeRef& kr, const RangeResult& res) { // "res" is the read result regardless of your writes, if ryw disabled, return immediately if (ryw->readYourWritesDisabled()) return res; // If ryw enabled, we update it with writes from the transaction - Standalone result; + RangeResult result; RangeMap>, KeyRangeRef>::Ranges ranges = ryw->getSpecialKeySpaceWriteMap().containedRanges(kr); RangeMap>, KeyRangeRef>::iterator iter = ranges.begin(); @@ -770,13 +775,13 @@ Standalone rywGetRange(ReadYourWritesTransaction* ryw, } // read from those readwrite modules in which special keys have one-to-one mapping with real persisted keys -ACTOR Future> rwModuleWithMappingGetRangeActor(ReadYourWritesTransaction* ryw, - const SpecialKeyRangeRWImpl* impl, - KeyRangeRef kr) { - Standalone resultWithoutPrefix = +ACTOR Future rwModuleWithMappingGetRangeActor(ReadYourWritesTransaction* ryw, + const SpecialKeyRangeRWImpl* impl, + KeyRangeRef kr) { + RangeResult resultWithoutPrefix = wait(ryw->getTransaction().getRange(ryw->getDatabase()->specialKeySpace->decode(kr), CLIENT_KNOBS->TOO_MANY)); ASSERT(!resultWithoutPrefix.more && resultWithoutPrefix.size() < CLIENT_KNOBS->TOO_MANY); - Standalone result; + RangeResult result; for (const KeyValueRef& kv : resultWithoutPrefix) result.push_back_deep(result.arena(), KeyValueRef(impl->encode(kv.key), kv.value)); return rywGetRange(ryw, kr, result); @@ -784,8 +789,7 @@ ACTOR Future> rwModuleWithMappingGetRangeActor(ReadYo ExcludeServersRangeImpl::ExcludeServersRangeImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {} -Future> ExcludeServersRangeImpl::getRange(ReadYourWritesTransaction* ryw, - KeyRangeRef kr) const { +Future ExcludeServersRangeImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { return rwModuleWithMappingGetRangeActor(ryw, this, kr); } @@ -1024,8 +1028,7 @@ Future> ExcludeServersRangeImpl::commit(ReadYourWritesTran FailedServersRangeImpl::FailedServersRangeImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {} -Future> FailedServersRangeImpl::getRange(ReadYourWritesTransaction* ryw, - KeyRangeRef kr) const { +Future FailedServersRangeImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { return rwModuleWithMappingGetRangeActor(ryw, this, kr); } @@ -1048,10 +1051,8 @@ Future> FailedServersRangeImpl::commit(ReadYourWritesTrans return excludeCommitActor(ryw, true); } -ACTOR Future> ExclusionInProgressActor(ReadYourWritesTransaction* ryw, - KeyRef prefix, - KeyRangeRef kr) { - state Standalone result; +ACTOR Future ExclusionInProgressActor(ReadYourWritesTransaction* ryw, KeyRef prefix, KeyRangeRef kr) { + state RangeResult result; state Transaction& tr = ryw->getTransaction(); tr.setOption(FDBTransactionOptions::READ_SYSTEM_KEYS); tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); // necessary? @@ -1062,7 +1063,7 @@ ACTOR Future> ExclusionInProgressActor(ReadYourWrites state std::set inProgressExclusion; // Just getting a consistent read version proves that a set of tlogs satisfying the exclusions has completed // recovery Check that there aren't any storage servers with addresses violating the exclusions - state Standalone serverList = wait(tr.getRange(serverListKeys, CLIENT_KNOBS->TOO_MANY)); + state RangeResult serverList = wait(tr.getRange(serverListKeys, CLIENT_KNOBS->TOO_MANY)); ASSERT(!serverList.more && serverList.size() < CLIENT_KNOBS->TOO_MANY); for (auto& s : serverList) { @@ -1107,21 +1108,18 @@ ACTOR Future> ExclusionInProgressActor(ReadYourWrites ExclusionInProgressRangeImpl::ExclusionInProgressRangeImpl(KeyRangeRef kr) : SpecialKeyRangeAsyncImpl(kr) {} -Future> ExclusionInProgressRangeImpl::getRange(ReadYourWritesTransaction* ryw, - KeyRangeRef kr) const { +Future ExclusionInProgressRangeImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { return ExclusionInProgressActor(ryw, getKeyRange().begin, kr); } -ACTOR Future> getProcessClassActor(ReadYourWritesTransaction* ryw, - KeyRef prefix, - KeyRangeRef kr) { +ACTOR Future getProcessClassActor(ReadYourWritesTransaction* ryw, KeyRef prefix, KeyRangeRef kr) { vector _workers = wait(getWorkers(&ryw->getTransaction())); auto workers = _workers; // strip const // Note : the sort by string is anti intuition, ex. 1.1.1.1:11 < 1.1.1.1:5 std::sort(workers.begin(), workers.end(), [](const ProcessData& lhs, const ProcessData& rhs) { return formatIpPort(lhs.address.ip, lhs.address.port) < formatIpPort(rhs.address.ip, rhs.address.port); }); - Standalone result; + RangeResult result; for (auto& w : workers) { // exclude :tls in keys even the network addresss is TLS KeyRef k(prefix.withSuffix(formatIpPort(w.address.ip, w.address.port), result.arena())); @@ -1175,8 +1173,7 @@ ACTOR Future> processClassCommitActor(ReadYourWritesTransa ProcessClassRangeImpl::ProcessClassRangeImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {} -Future> ProcessClassRangeImpl::getRange(ReadYourWritesTransaction* ryw, - KeyRangeRef kr) const { +Future ProcessClassRangeImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { return getProcessClassActor(ryw, getKeyRange().begin, kr); } @@ -1229,16 +1226,14 @@ void ProcessClassRangeImpl::clear(ReadYourWritesTransaction* ryw, const KeyRef& ryw, "setclass", "Clear range operation is meaningless thus forbidden for setclass"); } -ACTOR Future> getProcessClassSourceActor(ReadYourWritesTransaction* ryw, - KeyRef prefix, - KeyRangeRef kr) { +ACTOR Future getProcessClassSourceActor(ReadYourWritesTransaction* ryw, KeyRef prefix, KeyRangeRef kr) { vector _workers = wait(getWorkers(&ryw->getTransaction())); auto workers = _workers; // strip const // Note : the sort by string is anti intuition, ex. 1.1.1.1:11 < 1.1.1.1:5 std::sort(workers.begin(), workers.end(), [](const ProcessData& lhs, const ProcessData& rhs) { return formatIpPort(lhs.address.ip, lhs.address.port) < formatIpPort(rhs.address.ip, rhs.address.port); }); - Standalone result; + RangeResult result; for (auto& w : workers) { // exclude :tls in keys even the network addresss is TLS Key k(prefix.withSuffix(formatIpPort(w.address.ip, w.address.port))); @@ -1254,15 +1249,14 @@ ACTOR Future> getProcessClassSourceActor(ReadYourWrit ProcessClassSourceRangeImpl::ProcessClassSourceRangeImpl(KeyRangeRef kr) : SpecialKeyRangeReadImpl(kr) {} -Future> ProcessClassSourceRangeImpl::getRange(ReadYourWritesTransaction* ryw, - KeyRangeRef kr) const { +Future ProcessClassSourceRangeImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { return getProcessClassSourceActor(ryw, getKeyRange().begin, kr); } -ACTOR Future> getLockedKeyActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) { +ACTOR Future getLockedKeyActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) { ryw->getTransaction().setOption(FDBTransactionOptions::LOCK_AWARE); Optional val = wait(ryw->getTransaction().get(databaseLockedKey)); - Standalone result; + RangeResult result; if (val.present()) { result.push_back_deep(result.arena(), KeyValueRef(kr.begin, val.get())); } @@ -1271,13 +1265,13 @@ ACTOR Future> getLockedKeyActor(ReadYourWritesTransac LockDatabaseImpl::LockDatabaseImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {} -Future> LockDatabaseImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { +Future LockDatabaseImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { // single key range, the queried range should always be the same as the underlying range ASSERT(kr == getKeyRange()); auto lockEntry = ryw->getSpecialKeySpaceWriteMap()[SpecialKeySpace::getManagementApiCommandPrefix("lock")]; if (!ryw->readYourWritesDisabled() && lockEntry.first) { // ryw enabled and we have written to the special key - Standalone result; + RangeResult result; if (lockEntry.second.present()) { result.push_back_deep(result.arena(), KeyValueRef(kr.begin, lockEntry.second.get())); } @@ -1328,12 +1322,12 @@ Future> LockDatabaseImpl::commit(ReadYourWritesTransaction } } -ACTOR Future> getConsistencyCheckKeyActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) { +ACTOR Future getConsistencyCheckKeyActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) { ryw->getTransaction().setOption(FDBTransactionOptions::LOCK_AWARE); ryw->getTransaction().setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); Optional val = wait(ryw->getTransaction().get(fdbShouldConsistencyCheckBeSuspended)); bool ccSuspendSetting = val.present() ? BinaryReader::fromStringRef(val.get(), Unversioned()) : false; - Standalone result; + RangeResult result; if (ccSuspendSetting) { result.push_back_deep(result.arena(), KeyValueRef(kr.begin, ValueRef())); } @@ -1342,14 +1336,13 @@ ACTOR Future> getConsistencyCheckKeyActor(ReadYourWri ConsistencyCheckImpl::ConsistencyCheckImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {} -Future> ConsistencyCheckImpl::getRange(ReadYourWritesTransaction* ryw, - KeyRangeRef kr) const { +Future ConsistencyCheckImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { // single key range, the queried range should always be the same as the underlying range ASSERT(kr == getKeyRange()); auto entry = ryw->getSpecialKeySpaceWriteMap()[SpecialKeySpace::getManagementApiCommandPrefix("consistencycheck")]; if (!ryw->readYourWritesDisabled() && entry.first) { // ryw enabled and we have written to the special key - Standalone result; + RangeResult result; if (entry.second.present()) { result.push_back_deep(result.arena(), KeyValueRef(kr.begin, entry.second.get())); } @@ -1369,12 +1362,131 @@ Future> ConsistencyCheckImpl::commit(ReadYourWritesTransac return Optional(); } -TracingOptionsImpl::TracingOptionsImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) { - TraceEvent("TracingOptionsImpl::TracingOptionsImpl").detail("Range", kr); +GlobalConfigImpl::GlobalConfigImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {} + +// Returns key-value pairs for each value stored in the global configuration +// framework within the range specified. The special-key-space getrange +// function should only be used for informational purposes. All values are +// returned as strings regardless of their true type. +Future GlobalConfigImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { + RangeResult result; + + auto& globalConfig = GlobalConfig::globalConfig(); + KeyRangeRef modified = + KeyRangeRef(kr.begin.removePrefix(getKeyRange().begin), kr.end.removePrefix(getKeyRange().begin)); + std::map> values = globalConfig.get(modified); + for (const auto& [key, config] : values) { + Key prefixedKey = key.withPrefix(getKeyRange().begin); + if (config.isValid() && config->value.has_value()) { + if (config->value.type() == typeid(StringRef)) { + result.push_back_deep(result.arena(), + KeyValueRef(prefixedKey, std::any_cast(config->value).toString())); + } else if (config->value.type() == typeid(int64_t)) { + result.push_back_deep(result.arena(), + KeyValueRef(prefixedKey, std::to_string(std::any_cast(config->value)))); + } else if (config->value.type() == typeid(float)) { + result.push_back_deep(result.arena(), + KeyValueRef(prefixedKey, std::to_string(std::any_cast(config->value)))); + } else if (config->value.type() == typeid(double)) { + result.push_back_deep(result.arena(), + KeyValueRef(prefixedKey, std::to_string(std::any_cast(config->value)))); + } else { + ASSERT(false); + } + } + } + + return result; } -Future> TracingOptionsImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { - Standalone result; +// Marks the key for insertion into global configuration. +void GlobalConfigImpl::set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) { + ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional(value))); +} + +// Writes global configuration changes to durable memory. Also writes the +// changes made in the transaction to a recent history set, and updates the +// latest version which the global configuration was updated at. +ACTOR Future> globalConfigCommitActor(GlobalConfigImpl* globalConfig, + ReadYourWritesTransaction* ryw) { + state Transaction& tr = ryw->getTransaction(); + + // History should only contain three most recent updates. If it currently + // has three items, remove the oldest to make room for a new item. + RangeResult history = wait(tr.getRange(globalConfigHistoryKeys, CLIENT_KNOBS->TOO_MANY)); + constexpr int kGlobalConfigMaxHistorySize = 3; + if (history.size() > kGlobalConfigMaxHistorySize - 1) { + for (int i = 0; i < history.size() - (kGlobalConfigMaxHistorySize - 1); ++i) { + tr.clear(history[i].key); + } + } + + VersionHistory vh{ 0 }; + + // Transform writes from the special-key-space (\xff\xff/global_config/) to + // the system key space (\xff/globalConfig/), and writes mutations to + // latest version history. + state RangeMap>, KeyRangeRef>::Ranges ranges = + ryw->getSpecialKeySpaceWriteMap().containedRanges(specialKeys); + state RangeMap>, KeyRangeRef>::iterator iter = ranges.begin(); + while (iter != ranges.end()) { + std::pair> entry = iter->value(); + if (entry.first) { + if (entry.second.present() && iter->begin().startsWith(globalConfig->getKeyRange().begin)) { + Key bareKey = iter->begin().removePrefix(globalConfig->getKeyRange().begin); + vh.mutations.emplace_back_deep(vh.mutations.arena(), + MutationRef(MutationRef::SetValue, bareKey, entry.second.get())); + + Key systemKey = bareKey.withPrefix(globalConfigKeysPrefix); + tr.set(systemKey, entry.second.get()); + } else if (!entry.second.present() && iter->range().begin.startsWith(globalConfig->getKeyRange().begin) && + iter->range().end.startsWith(globalConfig->getKeyRange().begin)) { + KeyRef bareRangeBegin = iter->range().begin.removePrefix(globalConfig->getKeyRange().begin); + KeyRef bareRangeEnd = iter->range().end.removePrefix(globalConfig->getKeyRange().begin); + vh.mutations.emplace_back_deep(vh.mutations.arena(), + MutationRef(MutationRef::ClearRange, bareRangeBegin, bareRangeEnd)); + + Key systemRangeBegin = bareRangeBegin.withPrefix(globalConfigKeysPrefix); + Key systemRangeEnd = bareRangeEnd.withPrefix(globalConfigKeysPrefix); + tr.clear(KeyRangeRef(systemRangeBegin, systemRangeEnd)); + } + } + ++iter; + } + + // Record the mutations in this commit into the global configuration history. + Key historyKey = addVersionStampAtEnd(globalConfigHistoryPrefix); + ObjectWriter historyWriter(IncludeVersion()); + historyWriter.serialize(vh); + tr.atomicOp(historyKey, historyWriter.toStringRef(), MutationRef::SetVersionstampedKey); + + // Write version key to trigger update in cluster controller. + tr.atomicOp(globalConfigVersionKey, + LiteralStringRef("0123456789\x00\x00\x00\x00"), // versionstamp + MutationRef::SetVersionstampedValue); + + return Optional(); +} + +// Called when a transaction includes keys in the global configuration special-key-space range. +Future> GlobalConfigImpl::commit(ReadYourWritesTransaction* ryw) { + return globalConfigCommitActor(this, ryw); +} + +// Marks the range for deletion from global configuration. +void GlobalConfigImpl::clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) { + ryw->getSpecialKeySpaceWriteMap().insert(range, std::make_pair(true, Optional())); +} + +// Marks the key for deletion from global configuration. +void GlobalConfigImpl::clear(ReadYourWritesTransaction* ryw, const KeyRef& key) { + ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional())); +} + +TracingOptionsImpl::TracingOptionsImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {} + +Future TracingOptionsImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { + RangeResult result; for (const auto& option : SpecialKeySpace::getTracingOptions()) { auto key = getKeyRange().begin.withSuffix(option); if (!kr.contains(key)) { @@ -1432,8 +1544,8 @@ void TracingOptionsImpl::clear(ReadYourWritesTransaction* ryw, const KeyRef& key CoordinatorsImpl::CoordinatorsImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {} -Future> CoordinatorsImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { - Standalone result; +Future CoordinatorsImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { + RangeResult result; KeyRef prefix(getKeyRange().begin); // the constructor of ClusterConnectionFile already checks whether the file is valid auto cs = ClusterConnectionFile(ryw->getDatabase()->getConnectionFile()->getFilename()).getConnectionString(); @@ -1578,9 +1690,8 @@ void CoordinatorsImpl::clear(ReadYourWritesTransaction* ryw, const KeyRef& key) CoordinatorsAutoImpl::CoordinatorsAutoImpl(KeyRangeRef kr) : SpecialKeyRangeReadImpl(kr) {} -ACTOR static Future> CoordinatorsAutoImplActor(ReadYourWritesTransaction* ryw, - KeyRangeRef kr) { - state Standalone res; +ACTOR static Future CoordinatorsAutoImplActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) { + state RangeResult res; state std::string autoCoordinatorsKey; state Transaction& tr = ryw->getTransaction(); @@ -1616,18 +1727,16 @@ ACTOR static Future> CoordinatorsAutoImplActor(ReadYo return res; } -Future> CoordinatorsAutoImpl::getRange(ReadYourWritesTransaction* ryw, - KeyRangeRef kr) const { +Future CoordinatorsAutoImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { // single key range, the queried range should always be the same as the underlying range ASSERT(kr == getKeyRange()); return CoordinatorsAutoImplActor(ryw, kr); } -ACTOR static Future> getMinCommitVersionActor(ReadYourWritesTransaction* ryw, - KeyRangeRef kr) { +ACTOR static Future getMinCommitVersionActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) { ryw->getTransaction().setOption(FDBTransactionOptions::LOCK_AWARE); Optional val = wait(ryw->getTransaction().get(minRequiredCommitVersionKey)); - Standalone result; + RangeResult result; if (val.present()) { Version minRequiredCommitVersion = BinaryReader::fromStringRef(val.get(), Unversioned()); ValueRef version(result.arena(), boost::lexical_cast(minRequiredCommitVersion)); @@ -1638,13 +1747,13 @@ ACTOR static Future> getMinCommitVersionActor(ReadYou AdvanceVersionImpl::AdvanceVersionImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {} -Future> AdvanceVersionImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { +Future AdvanceVersionImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { // single key range, the queried range should always be the same as the underlying range ASSERT(kr == getKeyRange()); auto entry = ryw->getSpecialKeySpaceWriteMap()[SpecialKeySpace::getManagementApiCommandPrefix("advanceversion")]; if (!ryw->readYourWritesDisabled() && entry.first) { // ryw enabled and we have written to the special key - Standalone result; + RangeResult result; if (entry.second.present()) { result.push_back_deep(result.arena(), KeyValueRef(kr.begin, entry.second.get())); } @@ -1693,10 +1802,10 @@ Future> AdvanceVersionImpl::commit(ReadYourWritesTransacti ClientProfilingImpl::ClientProfilingImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {} -ACTOR static Future> ClientProfilingGetRangeActor(ReadYourWritesTransaction* ryw, - KeyRef prefix, - KeyRangeRef kr) { - state Standalone result; +ACTOR static Future ClientProfilingGetRangeActor(ReadYourWritesTransaction* ryw, + KeyRef prefix, + KeyRangeRef kr) { + state RangeResult result; // client_txn_sample_rate state Key sampleRateKey = LiteralStringRef("client_txn_sample_rate").withPrefix(prefix); if (kr.contains(sampleRateKey)) { @@ -1738,7 +1847,8 @@ ACTOR static Future> ClientProfilingGetRangeActor(Rea return result; } -Future> ClientProfilingImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { +// TODO : add limitation on set operation +Future ClientProfilingImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { return ClientProfilingGetRangeActor(ryw, getKeyRange().begin, kr); } @@ -1794,3 +1904,195 @@ void ClientProfilingImpl::clear(ReadYourWritesTransaction* ryw, const KeyRef& ke "profile", "Clear operation is forbidden for profile client. You can set it to default to disable profiling."); } + +MaintenanceImpl::MaintenanceImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {} + +// Used to read the healthZoneKey +// If the key is persisted and the delayed read version is still larger than current read version, +// we will calculate the remaining time(truncated to integer, the same as fdbcli) and return back as the value +// If the zoneId is the special one `ignoreSSFailuresZoneString`, +// value will be 0 (same as fdbcli) +ACTOR static Future MaintenanceGetRangeActor(ReadYourWritesTransaction* ryw, + KeyRef prefix, + KeyRangeRef kr) { + state RangeResult result; + // zoneId + ryw->getTransaction().setOption(FDBTransactionOptions::LOCK_AWARE); + Optional val = wait(ryw->getTransaction().get(healthyZoneKey)); + if (val.present()) { + auto healthyZone = decodeHealthyZoneValue(val.get()); + if ((healthyZone.first == ignoreSSFailuresZoneString) || + (healthyZone.second > ryw->getTransaction().getReadVersion().get())) { + Key zone_key = healthyZone.first.withPrefix(prefix); + double seconds = healthyZone.first == ignoreSSFailuresZoneString + ? 0 + : (healthyZone.second - ryw->getTransaction().getReadVersion().get()) / + CLIENT_KNOBS->CORE_VERSIONSPERSECOND; + if (kr.contains(zone_key)) { + result.push_back_deep(result.arena(), + KeyValueRef(zone_key, Value(boost::lexical_cast(seconds)))); + } + } + } + return rywGetRange(ryw, kr, result); +} + +Future MaintenanceImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { + return MaintenanceGetRangeActor(ryw, getKeyRange().begin, kr); +} + +// Commit the change to healthZoneKey +// We do not allow more than one zone to be set in maintenance in one transaction +// In addition, if the zoneId now is 'ignoreSSFailuresZoneString', +// which means the data distribution is disabled for storage failures. +// Only clear this specific key is allowed, any other operations will throw error +ACTOR static Future> maintenanceCommitActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) { + // read + ryw->getTransaction().setOption(FDBTransactionOptions::LOCK_AWARE); + ryw->getTransaction().setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); + Optional val = wait(ryw->getTransaction().get(healthyZoneKey)); + Optional> healthyZone = + val.present() ? decodeHealthyZoneValue(val.get()) : Optional>(); + + state RangeMap>, KeyRangeRef>::Ranges ranges = + ryw->getSpecialKeySpaceWriteMap().containedRanges(kr); + Key zoneId; + double seconds; + bool isSet = false; + // Since maintenance only allows one zone at the same time, + // if a transaction has more than one set operation on different zone keys, + // the commit will throw an error + for (auto iter = ranges.begin(); iter != ranges.end(); ++iter) { + if (!iter->value().first) + continue; + if (iter->value().second.present()) { + if (isSet) + return Optional(ManagementAPIError::toJsonString( + false, "maintenance", "Multiple zones given for maintenance, only one allowed at the same time")); + isSet = true; + zoneId = iter->begin().removePrefix(kr.begin); + seconds = boost::lexical_cast(iter->value().second.get().toString()); + } else { + // if we already have set operation, then all clear operations will be meaningless, thus skip + if (!isSet && healthyZone.present() && iter.range().contains(healthyZone.get().first.withPrefix(kr.begin))) + ryw->getTransaction().clear(healthyZoneKey); + } + } + + if (isSet) { + if (healthyZone.present() && healthyZone.get().first == ignoreSSFailuresZoneString) { + std::string msg = "Maintenance mode cannot be used while data distribution is disabled for storage " + "server failures."; + return Optional(ManagementAPIError::toJsonString(false, "maintenance", msg)); + } else if (seconds < 0) { + std::string msg = + "The specified maintenance time " + boost::lexical_cast(seconds) + " is a negative value"; + return Optional(ManagementAPIError::toJsonString(false, "maintenance", msg)); + } else { + TraceEvent(SevDebug, "SKSMaintenanceSet").detail("ZoneId", zoneId.toString()); + ryw->getTransaction().set(healthyZoneKey, + healthyZoneValue(zoneId, + ryw->getTransaction().getReadVersion().get() + + (seconds * CLIENT_KNOBS->CORE_VERSIONSPERSECOND))); + } + } + return Optional(); +} + +Future> MaintenanceImpl::commit(ReadYourWritesTransaction* ryw) { + return maintenanceCommitActor(ryw, getKeyRange()); +} + +DataDistributionImpl::DataDistributionImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {} + +// Read the system keys dataDistributionModeKey and rebalanceDDIgnoreKey +ACTOR static Future DataDistributionGetRangeActor(ReadYourWritesTransaction* ryw, + KeyRef prefix, + KeyRangeRef kr) { + state RangeResult result; + // dataDistributionModeKey + state Key modeKey = LiteralStringRef("mode").withPrefix(prefix); + if (kr.contains(modeKey)) { + auto entry = ryw->getSpecialKeySpaceWriteMap()[modeKey]; + if (ryw->readYourWritesDisabled() || !entry.first) { + Optional f = wait(ryw->getTransaction().get(dataDistributionModeKey)); + int mode = -1; + if (f.present()) { + mode = BinaryReader::fromStringRef(f.get(), Unversioned()); + } + result.push_back_deep(result.arena(), KeyValueRef(modeKey, Value(boost::lexical_cast(mode)))); + } + } + // rebalanceDDIgnoreKey + state Key rebalanceIgnoredKey = LiteralStringRef("rebalance_ignored").withPrefix(prefix); + if (kr.contains(rebalanceIgnoredKey)) { + auto entry = ryw->getSpecialKeySpaceWriteMap()[rebalanceIgnoredKey]; + if (ryw->readYourWritesDisabled() || !entry.first) { + Optional f = wait(ryw->getTransaction().get(rebalanceDDIgnoreKey)); + if (f.present()) { + result.push_back_deep(result.arena(), KeyValueRef(rebalanceIgnoredKey, Value())); + } + } + } + return rywGetRange(ryw, kr, result); +} + +Future DataDistributionImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { + return DataDistributionGetRangeActor(ryw, getKeyRange().begin, kr); +} + +Future> DataDistributionImpl::commit(ReadYourWritesTransaction* ryw) { + // there are two valid keys in the range + // /mode -> dataDistributionModeKey, the value is only allowed to be set as "0"(disable) or "1"(enable) + // /rebalance_ignored -> rebalanceDDIgnoreKey, value is unused thus empty + Optional msg; + KeyRangeRef kr = getKeyRange(); + Key modeKey = LiteralStringRef("mode").withPrefix(kr.begin); + Key rebalanceIgnoredKey = LiteralStringRef("rebalance_ignored").withPrefix(kr.begin); + auto ranges = ryw->getSpecialKeySpaceWriteMap().containedRanges(kr); + for (auto iter = ranges.begin(); iter != ranges.end(); ++iter) { + if (!iter->value().first) + continue; + if (iter->value().second.present()) { + if (iter->range() == singleKeyRange(modeKey)) { + try { + int mode = boost::lexical_cast(iter->value().second.get().toString()); + Value modeVal = BinaryWriter::toValue(mode, Unversioned()); + if (mode == 0 || mode == 1) + ryw->getTransaction().set(dataDistributionModeKey, modeVal); + else + msg = ManagementAPIError::toJsonString(false, + "datadistribution", + "Please set the value of the data_distribution/mode to " + "0(disable) or 1(enable), other values are not allowed"); + } catch (boost::bad_lexical_cast& e) { + msg = ManagementAPIError::toJsonString(false, + "datadistribution", + "Invalid datadistribution mode(int): " + + iter->value().second.get().toString()); + } + } else if (iter->range() == singleKeyRange(rebalanceIgnoredKey)) { + if (iter->value().second.get().size()) + msg = + ManagementAPIError::toJsonString(false, + "datadistribution", + "Value is unused for the data_distribution/rebalance_ignored " + "key, please set it to an empty value"); + else + ryw->getTransaction().set(rebalanceDDIgnoreKey, LiteralStringRef("on")); + } else { + msg = ManagementAPIError::toJsonString( + false, + "datadistribution", + "Changing invalid keys, please read the documentation to check valid keys in the range"); + } + } else { + // clear + if (iter->range().contains(modeKey)) + ryw->getTransaction().clear(dataDistributionModeKey); + else if (iter->range().contains(rebalanceIgnoredKey)) + ryw->getTransaction().clear(rebalanceDDIgnoreKey); + } + } + return msg; +} diff --git a/fdbclient/SpecialKeySpace.actor.h b/fdbclient/SpecialKeySpace.actor.h index c760a10724..084135bfb6 100644 --- a/fdbclient/SpecialKeySpace.actor.h +++ b/fdbclient/SpecialKeySpace.actor.h @@ -36,7 +36,7 @@ class SpecialKeyRangeReadImpl { public: // Each derived class only needs to implement this simple version of getRange - virtual Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const = 0; + virtual Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const = 0; explicit SpecialKeyRangeReadImpl(KeyRangeRef kr) : range(kr) {} KeyRangeRef getKeyRange() const { return range; } @@ -100,28 +100,26 @@ class SpecialKeyRangeAsyncImpl : public SpecialKeyRangeReadImpl { public: explicit SpecialKeyRangeAsyncImpl(KeyRangeRef kr) : SpecialKeyRangeReadImpl(kr) {} - Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override = 0; + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override = 0; // calling with a cache object to have consistent results if we need to call rpc - Future> getRange(ReadYourWritesTransaction* ryw, - KeyRangeRef kr, - Optional>* cache) const { + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr, Optional* cache) const { return getRangeAsyncActor(this, ryw, kr, cache); } bool isAsync() const override { return true; } - ACTOR static Future> getRangeAsyncActor(const SpecialKeyRangeReadImpl* skrAyncImpl, - ReadYourWritesTransaction* ryw, - KeyRangeRef kr, - Optional>* cache) { + ACTOR static Future getRangeAsyncActor(const SpecialKeyRangeReadImpl* skrAyncImpl, + ReadYourWritesTransaction* ryw, + KeyRangeRef kr, + Optional* cache) { ASSERT(skrAyncImpl->getKeyRange().contains(kr)); ASSERT(cache != nullptr); if (!cache->present()) { // For simplicity, every time we need to cache, we read the whole range // Although sometimes the range can be narrowed, // there is not a general way to do it in complicated scenarios - Standalone result_ = wait(skrAyncImpl->getRange(ryw, skrAyncImpl->getKeyRange())); + RangeResult result_ = wait(skrAyncImpl->getRange(ryw, skrAyncImpl->getKeyRange())); *cache = result_; } const auto& allResults = cache->get(); @@ -131,11 +129,11 @@ public: while (end > 0 && allResults[end - 1].key >= kr.end) --end; if (start < end) { - Standalone result = RangeResultRef(allResults.slice(start, end), false); + RangeResult result = RangeResultRef(allResults.slice(start, end), false); result.arena().dependsOn(allResults.arena()); return result; } else - return Standalone(); + return RangeResult(); } }; @@ -146,6 +144,7 @@ public: CONFIGURATION, // Configuration of the cluster CONNECTIONSTRING, ERRORMSG, // A single key space contains a json string which describes the last error in special-key-space + GLOBALCONFIG, // Global configuration options synchronized to all nodes MANAGEMENT, // Management-API METRICS, // data-distribution metrics TESTONLY, // only used by correctness tests @@ -165,11 +164,11 @@ public: Future> get(ReadYourWritesTransaction* ryw, const Key& key); - Future> getRange(ReadYourWritesTransaction* ryw, - KeySelector begin, - KeySelector end, - GetRangeLimits limits, - bool reverse = false); + Future getRange(ReadYourWritesTransaction* ryw, + KeySelector begin, + KeySelector end, + GetRangeLimits limits, + bool reverse = false); void set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value); @@ -205,18 +204,18 @@ public: private: ACTOR static Future> getActor(SpecialKeySpace* sks, ReadYourWritesTransaction* ryw, KeyRef key); - ACTOR static Future> checkRYWValid(SpecialKeySpace* sks, - ReadYourWritesTransaction* ryw, - KeySelector begin, - KeySelector end, - GetRangeLimits limits, - bool reverse); - ACTOR static Future> getRangeAggregationActor(SpecialKeySpace* sks, - ReadYourWritesTransaction* ryw, - KeySelector begin, - KeySelector end, - GetRangeLimits limits, - bool reverse); + ACTOR static Future checkRYWValid(SpecialKeySpace* sks, + ReadYourWritesTransaction* ryw, + KeySelector begin, + KeySelector end, + GetRangeLimits limits, + bool reverse); + ACTOR static Future getRangeAggregationActor(SpecialKeySpace* sks, + ReadYourWritesTransaction* ryw, + KeySelector begin, + KeySelector end, + GetRangeLimits limits, + bool reverse); KeyRangeMap readImpls; KeyRangeMap modules; @@ -237,7 +236,7 @@ private: class SKSCTestImpl : public SpecialKeyRangeRWImpl { public: explicit SKSCTestImpl(KeyRangeRef kr); - Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; Future> commit(ReadYourWritesTransaction* ryw) override; }; @@ -250,31 +249,31 @@ public: class ConflictingKeysImpl : public SpecialKeyRangeReadImpl { public: explicit ConflictingKeysImpl(KeyRangeRef kr); - Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; }; class ReadConflictRangeImpl : public SpecialKeyRangeReadImpl { public: explicit ReadConflictRangeImpl(KeyRangeRef kr); - Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; }; class WriteConflictRangeImpl : public SpecialKeyRangeReadImpl { public: explicit WriteConflictRangeImpl(KeyRangeRef kr); - Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; }; class DDStatsRangeImpl : public SpecialKeyRangeAsyncImpl { public: explicit DDStatsRangeImpl(KeyRangeRef kr); - Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; }; class ManagementCommandsOptionsImpl : public SpecialKeyRangeRWImpl { public: explicit ManagementCommandsOptionsImpl(KeyRangeRef kr); - Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; void set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) override; void clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) override; void clear(ReadYourWritesTransaction* ryw, const KeyRef& key) override; @@ -284,7 +283,7 @@ public: class ExcludeServersRangeImpl : public SpecialKeyRangeRWImpl { public: explicit ExcludeServersRangeImpl(KeyRangeRef kr); - Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; void set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) override; Key decode(const KeyRef& key) const override; Key encode(const KeyRef& key) const override; @@ -294,7 +293,7 @@ public: class FailedServersRangeImpl : public SpecialKeyRangeRWImpl { public: explicit FailedServersRangeImpl(KeyRangeRef kr); - Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; void set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) override; Key decode(const KeyRef& key) const override; Key encode(const KeyRef& key) const override; @@ -304,13 +303,13 @@ public: class ExclusionInProgressRangeImpl : public SpecialKeyRangeAsyncImpl { public: explicit ExclusionInProgressRangeImpl(KeyRangeRef kr); - Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; }; class ProcessClassRangeImpl : public SpecialKeyRangeRWImpl { public: explicit ProcessClassRangeImpl(KeyRangeRef kr); - Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; Future> commit(ReadYourWritesTransaction* ryw) override; void clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) override; void clear(ReadYourWritesTransaction* ryw, const KeyRef& key) override; @@ -319,27 +318,37 @@ public: class ProcessClassSourceRangeImpl : public SpecialKeyRangeReadImpl { public: explicit ProcessClassSourceRangeImpl(KeyRangeRef kr); - Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; }; class LockDatabaseImpl : public SpecialKeyRangeRWImpl { public: explicit LockDatabaseImpl(KeyRangeRef kr); - Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; Future> commit(ReadYourWritesTransaction* ryw) override; }; class ConsistencyCheckImpl : public SpecialKeyRangeRWImpl { public: explicit ConsistencyCheckImpl(KeyRangeRef kr); - Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; Future> commit(ReadYourWritesTransaction* ryw) override; }; +class GlobalConfigImpl : public SpecialKeyRangeRWImpl { +public: + explicit GlobalConfigImpl(KeyRangeRef kr); + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; + void set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) override; + Future> commit(ReadYourWritesTransaction* ryw) override; + void clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) override; + void clear(ReadYourWritesTransaction* ryw, const KeyRef& key) override; +}; + class TracingOptionsImpl : public SpecialKeyRangeRWImpl { public: explicit TracingOptionsImpl(KeyRangeRef kr); - Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; void set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) override; Future> commit(ReadYourWritesTransaction* ryw) override; void clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) override; @@ -349,7 +358,7 @@ public: class CoordinatorsImpl : public SpecialKeyRangeRWImpl { public: explicit CoordinatorsImpl(KeyRangeRef kr); - Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; Future> commit(ReadYourWritesTransaction* ryw) override; void clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) override; void clear(ReadYourWritesTransaction* ryw, const KeyRef& key) override; @@ -358,24 +367,37 @@ public: class CoordinatorsAutoImpl : public SpecialKeyRangeReadImpl { public: explicit CoordinatorsAutoImpl(KeyRangeRef kr); - Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; }; class AdvanceVersionImpl : public SpecialKeyRangeRWImpl { public: explicit AdvanceVersionImpl(KeyRangeRef kr); - Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; Future> commit(ReadYourWritesTransaction* ryw) override; }; class ClientProfilingImpl : public SpecialKeyRangeRWImpl { public: explicit ClientProfilingImpl(KeyRangeRef kr); - Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; Future> commit(ReadYourWritesTransaction* ryw) override; void clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) override; void clear(ReadYourWritesTransaction* ryw, const KeyRef& key) override; }; +class MaintenanceImpl : public SpecialKeyRangeRWImpl { +public: + explicit MaintenanceImpl(KeyRangeRef kr); + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; + Future> commit(ReadYourWritesTransaction* ryw) override; +}; +class DataDistributionImpl : public SpecialKeyRangeRWImpl { +public: + explicit DataDistributionImpl(KeyRangeRef kr); + Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; + Future> commit(ReadYourWritesTransaction* ryw) override; +}; + #include "flow/unactorcompiler.h" #endif diff --git a/fdbclient/StorageServerInterface.cpp b/fdbclient/StorageServerInterface.cpp new file mode 100644 index 0000000000..fe5ef4aaeb --- /dev/null +++ b/fdbclient/StorageServerInterface.cpp @@ -0,0 +1,383 @@ +/* + * StorageServerInterface.cpp + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fdbclient/StorageServerInterface.h" +#include "flow/crc32c.h" // for crc32c_append, to checksum values in tss trace events + +// Includes template specializations for all tss operations on storage server types. +// New StorageServerInterface reply types must be added here or it won't compile. + +// if size + hex of checksum is shorter than value, record that instead of actual value. break-even point is 12 +// characters +std::string traceChecksumValue(ValueRef s) { + return s.size() > 12 ? format("(%d)%08x", s.size(), crc32c_append(0, s.begin(), s.size())) : s.toString(); +} + +template <> +bool TSS_doCompare(const GetValueRequest& req, + const GetValueReply& src, + const GetValueReply& tss, + Severity traceSeverity, + UID tssId) { + if (src.value.present() != tss.value.present() || (src.value.present() && src.value.get() != tss.value.get())) { + TraceEvent(traceSeverity, "TSSMismatchGetValue") + .suppressFor(1.0) + .detail("TSSID", tssId) + .detail("Key", req.key.printable()) + .detail("Version", req.version) + .detail("SSReply", src.value.present() ? traceChecksumValue(src.value.get()) : "missing") + .detail("TSSReply", tss.value.present() ? traceChecksumValue(tss.value.get()) : "missing"); + + return false; + } + return true; +} + +template <> +bool TSS_doCompare(const GetKeyRequest& req, + const GetKeyReply& src, + const GetKeyReply& tss, + Severity traceSeverity, + UID tssId) { + // This process is a bit complicated. Since the tss and ss can return different results if neighboring shards to + // req.sel.key are currently being moved, We validate that the results are the same IF the returned key selectors + // are final. Otherwise, we only mark the request as a mismatch if the difference between the two returned key + // selectors could ONLY be because of different results from the storage engines. We can afford to only partially + // check key selectors that start in a TSS shard and end in a non-TSS shard because the other read queries and the + // consistency check will eventually catch a misbehaving storage engine. + bool matches = true; + if (src.sel.orEqual == tss.sel.orEqual && src.sel.offset == tss.sel.offset) { + // full matching case + if (src.sel.offset == 0 && src.sel.orEqual) { + // found exact key, should be identical + matches = src.sel.getKey() == tss.sel.getKey(); + } + // if the query doesn't return the final key, there is an edge case where the ss and tss have different shard + // boundaries, so they pass different shard boundary keys back for the same offset + } else if (src.sel.getKey() == tss.sel.getKey()) { + // There is one case with a positive offset where the shard boundary the incomplete query stopped at is the next + // key in the shard that the complete query returned. This is not possible with a negative offset because the + // shard boundary is exclusive backwards + if (src.sel.offset == 0 && src.sel.orEqual && tss.sel.offset == 1 && !tss.sel.orEqual) { + // case where ss was complete and tss was incomplete + } else if (tss.sel.offset == 0 && tss.sel.orEqual && src.sel.offset == 1 && !src.sel.orEqual) { + // case where tss was complete and ss was incomplete + } else { + matches = false; + } + } else { + // ss/tss returned different keys, and different offsets and/or orEqual + // here we just validate that ordering of the keys matches the ordering of the offsets + bool tssKeyLarger = src.sel.getKey() < tss.sel.getKey(); + // the only case offsets are equal and orEqual aren't equal is the case with a negative offset, + // where one response has <=0 with the actual result and the other has <0 with the shard upper boundary. + // So whichever one has the actual result should have the lower key. + bool tssOffsetLarger = (src.sel.offset == tss.sel.offset) ? tss.sel.orEqual : src.sel.offset < tss.sel.offset; + matches = tssKeyLarger != tssOffsetLarger; + } + if (!matches) { + TraceEvent(traceSeverity, "TSSMismatchGetKey") + .suppressFor(1.0) + .detail("TSSID", tssId) + .detail("KeySelector", + format("%s%s:%d", req.sel.orEqual ? "=" : "", req.sel.getKey().printable().c_str(), req.sel.offset)) + .detail("Version", req.version) + .detail("SSReply", + format("%s%s:%d", src.sel.orEqual ? "=" : "", src.sel.getKey().printable().c_str(), src.sel.offset)) + .detail( + "TSSReply", + format("%s%s:%d", tss.sel.orEqual ? "=" : "", tss.sel.getKey().printable().c_str(), tss.sel.offset)); + } + return matches; +} + +template <> +bool TSS_doCompare(const GetKeyValuesRequest& req, + const GetKeyValuesReply& src, + const GetKeyValuesReply& tss, + Severity traceSeverity, + UID tssId) { + if (src.more != tss.more || src.data != tss.data) { + + std::string ssResultsString = format("(%d)%s:\n", src.data.size(), src.more ? "+" : ""); + for (auto& it : src.data) { + ssResultsString += "\n" + it.key.printable() + "=" + traceChecksumValue(it.value); + } + + std::string tssResultsString = format("(%d)%s:\n", tss.data.size(), tss.more ? "+" : ""); + for (auto& it : tss.data) { + tssResultsString += "\n" + it.key.printable() + "=" + traceChecksumValue(it.value); + } + + TraceEvent(traceSeverity, "TSSMismatchGetKeyValues") + .suppressFor(1.0) + .detail("TSSID", tssId) + .detail( + "Begin", + format( + "%s%s:%d", req.begin.orEqual ? "=" : "", req.begin.getKey().printable().c_str(), req.begin.offset)) + .detail("End", + format("%s%s:%d", req.end.orEqual ? "=" : "", req.end.getKey().printable().c_str(), req.end.offset)) + .detail("Version", req.version) + .detail("Limit", req.limit) + .detail("LimitBytes", req.limitBytes) + .detail("SSReply", ssResultsString) + .detail("TSSReply", tssResultsString); + + return false; + } + return true; +} + +template <> +bool TSS_doCompare(const WatchValueRequest& req, + const WatchValueReply& src, + const WatchValueReply& tss, + Severity traceSeverity, + UID tssId) { + // We duplicate watches just for load, no need to validte replies. + return true; +} + +// no-op template specializations for metrics replies +template <> +bool TSS_doCompare(const WaitMetricsRequest& req, + const StorageMetrics& src, + const StorageMetrics& tss, + Severity traceSeverity, + UID tssId) { + return true; +} + +template <> +bool TSS_doCompare(const SplitMetricsRequest& req, + const SplitMetricsReply& src, + const SplitMetricsReply& tss, + Severity traceSeverity, + UID tssId) { + return true; +} + +template <> +bool TSS_doCompare(const ReadHotSubRangeRequest& req, + const ReadHotSubRangeReply& src, + const ReadHotSubRangeReply& tss, + Severity traceSeverity, + UID tssId) { + return true; +} + +template <> +bool TSS_doCompare(const SplitRangeRequest& req, + const SplitRangeReply& src, + const SplitRangeReply& tss, + Severity traceSeverity, + UID tssId) { + return true; +} + +// only record metrics for data reads + +template <> +void TSSMetrics::recordLatency(const GetValueRequest& req, double ssLatency, double tssLatency) { + SSgetValueLatency.addSample(ssLatency); + TSSgetValueLatency.addSample(tssLatency); +} + +template <> +void TSSMetrics::recordLatency(const GetKeyRequest& req, double ssLatency, double tssLatency) { + SSgetKeyLatency.addSample(ssLatency); + TSSgetKeyLatency.addSample(tssLatency); +} + +template <> +void TSSMetrics::recordLatency(const GetKeyValuesRequest& req, double ssLatency, double tssLatency) { + SSgetKeyValuesLatency.addSample(ssLatency); + TSSgetKeyValuesLatency.addSample(tssLatency); +} + +template <> +void TSSMetrics::recordLatency(const WatchValueRequest& req, double ssLatency, double tssLatency) {} + +template <> +void TSSMetrics::recordLatency(const WaitMetricsRequest& req, double ssLatency, double tssLatency) {} + +template <> +void TSSMetrics::recordLatency(const SplitMetricsRequest& req, double ssLatency, double tssLatency) {} + +template <> +void TSSMetrics::recordLatency(const ReadHotSubRangeRequest& req, double ssLatency, double tssLatency) {} + +template <> +void TSSMetrics::recordLatency(const SplitRangeRequest& req, double ssLatency, double tssLatency) {} + +// ------------------- + +TEST_CASE("/StorageServerInterface/TSSCompare/TestComparison") { + printf("testing tss comparisons\n"); + + // to avoid compiler issues that StringRef(char* is deprecated) + std::string s_a = "a"; + std::string s_b = "b"; + std::string s_c = "c"; + std::string s_d = "d"; + std::string s_e = "e"; + + // test getValue + GetValueRequest gvReq; + gvReq.key = StringRef(s_a); + gvReq.version = 5; + + UID tssId; + + GetValueReply gvReplyMissing; + GetValueReply gvReplyA(Optional(StringRef(s_a)), false); + GetValueReply gvReplyB(Optional(StringRef(s_b)), false); + ASSERT(TSS_doCompare(gvReq, gvReplyMissing, gvReplyMissing, SevInfo, tssId)); + ASSERT(TSS_doCompare(gvReq, gvReplyA, gvReplyA, SevInfo, tssId)); + ASSERT(TSS_doCompare(gvReq, gvReplyB, gvReplyB, SevInfo, tssId)); + + ASSERT(!TSS_doCompare(gvReq, gvReplyMissing, gvReplyA, SevInfo, tssId)); + ASSERT(!TSS_doCompare(gvReq, gvReplyA, gvReplyB, SevInfo, tssId)); + + // test GetKeyValues + Arena a; // for all of the refs. ASAN complains if this isn't done. Could also make them all standalone i guess + GetKeyValuesRequest gkvReq; + gkvReq.begin = firstGreaterOrEqual(StringRef(a, s_a)); + gkvReq.end = firstGreaterOrEqual(StringRef(a, s_b)); + gkvReq.version = 5; + + GetKeyValuesReply gkvReplyEmpty; + GetKeyValuesReply gkvReplyOne; + KeyValueRef v; + v.key = StringRef(a, s_a); + v.value = StringRef(a, s_b); + gkvReplyOne.data.push_back_deep(gkvReplyOne.arena, v); + GetKeyValuesReply gkvReplyOneMore; + gkvReplyOneMore.data.push_back_deep(gkvReplyOneMore.arena, v); + gkvReplyOneMore.more = true; + + ASSERT(TSS_doCompare(gkvReq, gkvReplyEmpty, gkvReplyEmpty, SevInfo, tssId)); + ASSERT(TSS_doCompare(gkvReq, gkvReplyOne, gkvReplyOne, SevInfo, tssId)); + ASSERT(TSS_doCompare(gkvReq, gkvReplyOneMore, gkvReplyOneMore, SevInfo, tssId)); + ASSERT(!TSS_doCompare(gkvReq, gkvReplyEmpty, gkvReplyOne, SevInfo, tssId)); + ASSERT(!TSS_doCompare(gkvReq, gkvReplyOne, gkvReplyOneMore, SevInfo, tssId)); + + // test GetKey + GetKeyRequest gkReq; + gkReq.sel = KeySelectorRef(StringRef(a, s_a), false, 1); + gkReq.version = 5; + + GetKeyReply gkReplyA(KeySelectorRef(StringRef(a, s_a), false, 20), false); + GetKeyReply gkReplyB(KeySelectorRef(StringRef(a, s_b), false, 10), false); + GetKeyReply gkReplyC(KeySelectorRef(StringRef(a, s_c), true, 0), false); + GetKeyReply gkReplyD(KeySelectorRef(StringRef(a, s_d), false, -10), false); + GetKeyReply gkReplyE(KeySelectorRef(StringRef(a, s_e), false, -20), false); + + // identical cases + ASSERT(TSS_doCompare(gkReq, gkReplyA, gkReplyA, SevInfo, tssId)); + ASSERT(TSS_doCompare(gkReq, gkReplyB, gkReplyB, SevInfo, tssId)); + ASSERT(TSS_doCompare(gkReq, gkReplyC, gkReplyC, SevInfo, tssId)); + ASSERT(TSS_doCompare(gkReq, gkReplyD, gkReplyD, SevInfo, tssId)); + ASSERT(TSS_doCompare(gkReq, gkReplyE, gkReplyE, SevInfo, tssId)); + + // relative offset cases + ASSERT(TSS_doCompare(gkReq, gkReplyA, gkReplyB, SevInfo, tssId)); + ASSERT(TSS_doCompare(gkReq, gkReplyB, gkReplyA, SevInfo, tssId)); + ASSERT(TSS_doCompare(gkReq, gkReplyA, gkReplyC, SevInfo, tssId)); + ASSERT(TSS_doCompare(gkReq, gkReplyC, gkReplyA, SevInfo, tssId)); + ASSERT(TSS_doCompare(gkReq, gkReplyB, gkReplyC, SevInfo, tssId)); + ASSERT(TSS_doCompare(gkReq, gkReplyC, gkReplyB, SevInfo, tssId)); + + ASSERT(TSS_doCompare(gkReq, gkReplyC, gkReplyD, SevInfo, tssId)); + ASSERT(TSS_doCompare(gkReq, gkReplyD, gkReplyC, SevInfo, tssId)); + ASSERT(TSS_doCompare(gkReq, gkReplyC, gkReplyE, SevInfo, tssId)); + ASSERT(TSS_doCompare(gkReq, gkReplyE, gkReplyC, SevInfo, tssId)); + ASSERT(TSS_doCompare(gkReq, gkReplyD, gkReplyE, SevInfo, tssId)); + ASSERT(TSS_doCompare(gkReq, gkReplyE, gkReplyD, SevInfo, tssId)); + + // test same offset/orEqual wrong key + ASSERT(!TSS_doCompare(gkReq, + GetKeyReply(KeySelectorRef(StringRef(a, s_a), true, 0), false), + GetKeyReply(KeySelectorRef(StringRef(a, s_b), true, 0), false), + SevInfo, + tssId)); + // this could be from different shard boundaries, so don't say it's a mismatch + ASSERT(TSS_doCompare(gkReq, + GetKeyReply(KeySelectorRef(StringRef(a, s_a), false, 10), false), + GetKeyReply(KeySelectorRef(StringRef(a, s_b), false, 10), false), + SevInfo, + tssId)); + + // test offsets and key difference don't match + ASSERT(!TSS_doCompare(gkReq, + GetKeyReply(KeySelectorRef(StringRef(a, s_a), false, 0), false), + GetKeyReply(KeySelectorRef(StringRef(a, s_b), false, 10), false), + SevInfo, + tssId)); + ASSERT(!TSS_doCompare(gkReq, + GetKeyReply(KeySelectorRef(StringRef(a, s_a), false, -10), false), + GetKeyReply(KeySelectorRef(StringRef(a, s_b), false, 0), false), + SevInfo, + tssId)); + + // test key is next over in one shard, one found it and other didn't + // positive + // one that didn't find is +1 + ASSERT(TSS_doCompare(gkReq, + GetKeyReply(KeySelectorRef(StringRef(a, s_a), false, 1), false), + GetKeyReply(KeySelectorRef(StringRef(a, s_b), true, 0), false), + SevInfo, + tssId)); + ASSERT(!TSS_doCompare(gkReq, + GetKeyReply(KeySelectorRef(StringRef(a, s_a), true, 0), false), + GetKeyReply(KeySelectorRef(StringRef(a, s_b), false, 1), false), + SevInfo, + tssId)); + + // negative will have zero offset but not equal set + ASSERT(TSS_doCompare(gkReq, + GetKeyReply(KeySelectorRef(StringRef(a, s_a), true, 0), false), + GetKeyReply(KeySelectorRef(StringRef(a, s_b), false, 0), false), + SevInfo, + tssId)); + ASSERT(!TSS_doCompare(gkReq, + GetKeyReply(KeySelectorRef(StringRef(a, s_a), false, 0), false), + GetKeyReply(KeySelectorRef(StringRef(a, s_b), true, 0), false), + SevInfo, + tssId)); + + // test shard boundary key returned by incomplete query is the same as the key found by the other (only possible in + // positive direction) + ASSERT(TSS_doCompare(gkReq, + GetKeyReply(KeySelectorRef(StringRef(a, s_a), true, 0), false), + GetKeyReply(KeySelectorRef(StringRef(a, s_a), false, 1), false), + SevInfo, + tssId)); + + // explictly test checksum function + std::string s12 = "ABCDEFGHIJKL"; + std::string s13 = "ABCDEFGHIJKLO"; + std::string checksumStart13 = "(13)"; + ASSERT(s_a == traceChecksumValue(StringRef(s_a))); + ASSERT(s12 == traceChecksumValue(StringRef(s12))); + ASSERT(checksumStart13 == traceChecksumValue(StringRef(s13)).substr(0, 4)); + return Void(); +} \ No newline at end of file diff --git a/fdbclient/StorageServerInterface.h b/fdbclient/StorageServerInterface.h index 84971f040b..be1a223453 100644 --- a/fdbclient/StorageServerInterface.h +++ b/fdbclient/StorageServerInterface.h @@ -29,7 +29,9 @@ #include "fdbrpc/LoadBalance.actor.h" #include "fdbrpc/Stats.h" #include "fdbrpc/TimedRequest.h" +#include "fdbrpc/TSSComparison.h" #include "fdbclient/TagThrottle.h" +#include "flow/UnitTest.h" // Dead code, removed in the next protocol version struct VersionReply { @@ -54,6 +56,7 @@ struct StorageServerInterface { LocalityData locality; UID uniqueID; + Optional tssPairID; RequestStream getValue; RequestStream getKey; @@ -80,6 +83,7 @@ struct StorageServerInterface { NetworkAddress stableAddress() const { return getValue.getEndpoint().getStableAddress(); } Optional secondaryAddress() const { return getValue.getEndpoint().addresses.secondaryAddress; } UID id() const { return uniqueID; } + bool isTss() const { return tssPairID.present(); } std::string toString() const { return id().shortString(); } template void serialize(Ar& ar) { @@ -88,7 +92,11 @@ struct StorageServerInterface { // considered if (ar.protocolVersion().hasSmallEndpoints()) { - serializer(ar, uniqueID, locality, getValue); + if (ar.protocolVersion().hasTSS()) { + serializer(ar, uniqueID, locality, getValue, tssPairID); + } else { + serializer(ar, uniqueID, locality, getValue); + } if (Ar::isDeserializing) { getKey = RequestStream(getValue.getEndpoint().getAdjustedEndpoint(1)); getKeyValues = RequestStream(getValue.getEndpoint().getAdjustedEndpoint(2)); @@ -127,8 +135,9 @@ struct StorageServerInterface { waitFailure, getQueuingMetrics, getKeyValueStoreType); - if (ar.protocolVersion().hasWatches()) + if (ar.protocolVersion().hasWatches()) { serializer(ar, watchValue); + } } } bool operator==(StorageServerInterface const& s) const { return uniqueID == s.uniqueID; } diff --git a/fdbclient/SystemData.cpp b/fdbclient/SystemData.cpp index 2c846403ad..949dc9c912 100644 --- a/fdbclient/SystemData.cpp +++ b/fdbclient/SystemData.cpp @@ -25,6 +25,7 @@ #include "flow/Arena.h" #include "flow/TDMetric.actor.h" #include "flow/serialize.h" +#include "flow/UnitTest.h" const KeyRef systemKeysPrefix = LiteralStringRef("\xff"); const KeyRangeRef normalKeys(KeyRef(), systemKeysPrefix); @@ -48,9 +49,7 @@ const Key keyServersKey(const KeyRef& k) { const KeyRef keyServersKey(const KeyRef& k, Arena& arena) { return k.withPrefix(keyServersPrefix, arena); } -const Value keyServersValue(Standalone result, - const std::vector& src, - const std::vector& dest) { +const Value keyServersValue(RangeResult result, const std::vector& src, const std::vector& dest) { if (!CLIENT_KNOBS->TAG_ENCODE_KEY_SERVERS) { BinaryWriter wr(IncludeVersion(ProtocolVersion::withKeyServerValue())); wr << src << dest; @@ -95,7 +94,7 @@ const Value keyServersValue(const std::vector& srcTag, const std::vector result, +void decodeKeyServersValue(RangeResult result, const ValueRef& value, std::vector& src, std::vector& dest, @@ -347,7 +346,11 @@ uint16_t cacheChangeKeyDecodeIndex(const KeyRef& key) { return idx; } +const KeyRef tssMappingChangeKey = LiteralStringRef("\xff\x02/tssMappingChangeKey"); +const KeyRangeRef tssMappingKeys(LiteralStringRef("\xff/tss/"), LiteralStringRef("\xff/tss0")); + const KeyRangeRef serverTagKeys(LiteralStringRef("\xff/serverTag/"), LiteralStringRef("\xff/serverTag0")); + const KeyRef serverTagPrefix = serverTagKeys.begin; const KeyRangeRef serverTagConflictKeys(LiteralStringRef("\xff/serverTagConflict/"), LiteralStringRef("\xff/serverTagConflict0")); @@ -534,6 +537,7 @@ const Key serverListKeyFor(UID serverID) { return wr.toValue(); } +// TODO use flatbuffers depending on version const Value serverListValue(StorageServerInterface const& server) { BinaryWriter wr(IncludeVersion(ProtocolVersion::withServerListValue())); wr << server; @@ -552,6 +556,17 @@ StorageServerInterface decodeServerListValue(ValueRef const& value) { return s; } +const Value serverListValueFB(StorageServerInterface const& server) { + return ObjectWriter::toValue(server, IncludeVersion()); +} + +StorageServerInterface decodeServerListValueFB(ValueRef const& value) { + StorageServerInterface s; + ObjectReader reader(value.begin(), IncludeVersion()); + reader.deserialize(s); + return s; +} + // processClassKeys.contains(k) iff k.startsWith( processClassKeys.begin ) because '/'+1 == '0' const KeyRangeRef processClassKeys(LiteralStringRef("\xff/processClass/"), LiteralStringRef("\xff/processClass0")); const KeyRef processClassPrefix = processClassKeys.begin; @@ -596,6 +611,9 @@ ProcessClass decodeProcessClassValue(ValueRef const& value) { const KeyRangeRef configKeys(LiteralStringRef("\xff/conf/"), LiteralStringRef("\xff/conf0")); const KeyRef configKeysPrefix = configKeys.begin; +const KeyRef perpetualStorageWiggleKey(LiteralStringRef("\xff/conf/perpetual_storage_wiggle")); +const KeyRef wigglingStorageServerKey(LiteralStringRef("\xff/storageWigglePID")); + const KeyRef triggerDDTeamInfoPrintKey(LiteralStringRef("\xff/triggerDDTeamInfoPrint")); const KeyRangeRef excludedServersKeys(LiteralStringRef("\xff/conf/excluded/"), LiteralStringRef("\xff/conf/excluded0")); @@ -632,6 +650,19 @@ std::string encodeFailedServersKey(AddressExclusion const& addr) { return failedServersPrefix.toString() + addr.toString(); } +// const KeyRangeRef globalConfigKeys( LiteralStringRef("\xff/globalConfig/"), LiteralStringRef("\xff/globalConfig0") ); +// const KeyRef globalConfigPrefix = globalConfigKeys.begin; + +const KeyRangeRef globalConfigDataKeys(LiteralStringRef("\xff/globalConfig/k/"), + LiteralStringRef("\xff/globalConfig/k0")); +const KeyRef globalConfigKeysPrefix = globalConfigDataKeys.begin; + +const KeyRangeRef globalConfigHistoryKeys(LiteralStringRef("\xff/globalConfig/h/"), + LiteralStringRef("\xff/globalConfig/h0")); +const KeyRef globalConfigHistoryPrefix = globalConfigHistoryKeys.begin; + +const KeyRef globalConfigVersionKey = LiteralStringRef("\xff/globalConfig/v"); + const KeyRangeRef workerListKeys(LiteralStringRef("\xff/worker/"), LiteralStringRef("\xff/worker0")); const KeyRef workerListPrefix = workerListKeys.begin; @@ -748,8 +779,7 @@ const KeyRef tagThrottleCountKey = LiteralStringRef("\xff\x02/throttledTags/manu // Client status info prefix const KeyRangeRef fdbClientInfoPrefixRange(LiteralStringRef("\xff\x02/fdbClientInfo/"), LiteralStringRef("\xff\x02/fdbClientInfo0")); -const KeyRef fdbClientInfoTxnSampleRate = LiteralStringRef("\xff\x02/fdbClientInfo/client_txn_sample_rate/"); -const KeyRef fdbClientInfoTxnSizeLimit = LiteralStringRef("\xff\x02/fdbClientInfo/client_txn_size_limit/"); +// See remaining fields in GlobalConfig.actor.h // ConsistencyCheck settings const KeyRef fdbShouldConsistencyCheckBeSuspended = LiteralStringRef("\xff\x02/ConsistencyCheck/Suspend"); @@ -963,3 +993,60 @@ const KeyRef configTransactionDescriptionKey = "\xff\xff/description"_sr; const KeyRange globalConfigKnobKeys = singleKeyRange("\xff\xff/globalKnobs"_sr); const KeyRangeRef configKnobKeys("\xff\xff/knobs/"_sr, "\xff\xff/knobs0"_sr); const KeyRangeRef configClassKeys("\xff\xff/configClasses/"_sr, "\xff\xff/configClasses0"_sr); + +// for tests +void testSSISerdes(StorageServerInterface const& ssi, bool useFB) { + printf("ssi=\nid=%s\nlocality=%s\nisTss=%s\ntssId=%s\naddress=%s\ngetValue=%s\n\n\n", + ssi.id().toString().c_str(), + ssi.locality.toString().c_str(), + ssi.isTss() ? "true" : "false", + ssi.isTss() ? ssi.tssPairID.get().toString().c_str() : "", + ssi.address().toString().c_str(), + ssi.getValue.getEndpoint().token.toString().c_str()); + + StorageServerInterface ssi2 = + (useFB) ? decodeServerListValueFB(serverListValueFB(ssi)) : decodeServerListValue(serverListValue(ssi)); + + printf("ssi2=\nid=%s\nlocality=%s\nisTss=%s\ntssId=%s\naddress=%s\ngetValue=%s\n\n\n", + ssi2.id().toString().c_str(), + ssi2.locality.toString().c_str(), + ssi2.isTss() ? "true" : "false", + ssi2.isTss() ? ssi2.tssPairID.get().toString().c_str() : "", + ssi2.address().toString().c_str(), + ssi2.getValue.getEndpoint().token.toString().c_str()); + + ASSERT(ssi.id() == ssi2.id()); + ASSERT(ssi.locality == ssi2.locality); + ASSERT(ssi.isTss() == ssi2.isTss()); + if (ssi.isTss()) { + ASSERT(ssi2.tssPairID.get() == ssi2.tssPairID.get()); + } + ASSERT(ssi.address() == ssi2.address()); + ASSERT(ssi.getValue.getEndpoint().token == ssi2.getValue.getEndpoint().token); +} + +// unit test for serialization since tss stuff had bugs +TEST_CASE("/SystemData/SerDes/SSI") { + printf("testing ssi serdes\n"); + LocalityData localityData(Optional>(), + Standalone(deterministicRandom()->randomUniqueID().toString()), + Standalone(deterministicRandom()->randomUniqueID().toString()), + Optional>()); + + // non-tss + StorageServerInterface ssi; + ssi.uniqueID = UID(0x1234123412341234, 0x5678567856785678); + ssi.locality = localityData; + ssi.initEndpoints(); + + testSSISerdes(ssi, false); + testSSISerdes(ssi, true); + + ssi.tssPairID = UID(0x2345234523452345, 0x1238123812381238); + + testSSISerdes(ssi, false); + testSSISerdes(ssi, true); + printf("ssi serdes test complete\n"); + + return Void(); +} diff --git a/fdbclient/SystemData.h b/fdbclient/SystemData.h index 448f0b3d1d..c3266540fc 100644 --- a/fdbclient/SystemData.h +++ b/fdbclient/SystemData.h @@ -51,12 +51,12 @@ extern const KeyRangeRef keyServersKeys, keyServersKeyServersKeys; extern const KeyRef keyServersPrefix, keyServersEnd, keyServersKeyServersKey; const Key keyServersKey(const KeyRef& k); const KeyRef keyServersKey(const KeyRef& k, Arena& arena); -const Value keyServersValue(Standalone result, +const Value keyServersValue(RangeResult result, const std::vector& src, const std::vector& dest = std::vector()); const Value keyServersValue(const std::vector& srcTag, const std::vector& destTag = std::vector()); // `result` must be the full result of getting serverTagKeys -void decodeKeyServersValue(Standalone result, +void decodeKeyServersValue(RangeResult result, const ValueRef& value, std::vector& src, std::vector& dest, @@ -114,6 +114,11 @@ extern const KeyRef cacheChangePrefix; const Key cacheChangeKeyFor(uint16_t idx); uint16_t cacheChangeKeyDecodeIndex(const KeyRef& key); +// "\xff/tss/[[serverId]]" := "[[tssId]]" +extern const KeyRef tssMappingChangeKey; +extern const KeyRangeRef tssMappingKeys; +extern const KeyRef tssMappingPrefix; + // "\xff/serverTag/[[serverID]]" = "[[Tag]]" // Provides the Tag for the given serverID. Used to access a // storage server's corresponding TLog in order to apply mutations. @@ -195,6 +200,8 @@ UID decodeProcessClassKeyOld(KeyRef const& key); extern const KeyRangeRef configKeys; extern const KeyRef configKeysPrefix; +extern const KeyRef perpetualStorageWiggleKey; +extern const KeyRef wigglingStorageServerKey; // Change the value of this key to anything and that will trigger detailed data distribution team info log. extern const KeyRef triggerDDTeamInfoPrintKey; @@ -229,6 +236,30 @@ extern const KeyRef failedServersVersionKey; // The value of this key shall be c const AddressExclusion decodeFailedServersKey(KeyRef const& key); // where key.startsWith(failedServersPrefix) std::string encodeFailedServersKey(AddressExclusion const&); +// "\xff/globalConfig/[[option]]" := "value" +// An umbrella prefix for global configuration data synchronized to all nodes. +// extern const KeyRangeRef globalConfigData; +// extern const KeyRef globalConfigDataPrefix; + +// "\xff/globalConfig/k/[[key]]" := "value" +// Key-value pairs that have been set. The range this keyspace represents +// contains all globally configured options. +extern const KeyRangeRef globalConfigDataKeys; +extern const KeyRef globalConfigKeysPrefix; + +// "\xff/globalConfig/h/[[version]]" := "value" +// Maps a commit version to a list of mutations made to the global +// configuration at that commit. Shipped to nodes periodically. In general, +// clients should not write to keys in this keyspace; it will be written +// automatically when updating global configuration keys. +extern const KeyRangeRef globalConfigHistoryKeys; +extern const KeyRef globalConfigHistoryPrefix; + +// "\xff/globalConfig/v" := "version" +// Read-only key which returns the commit version of the most recent mutation +// made to the global configuration keyspace. +extern const KeyRef globalConfigVersionKey; + // "\xff/workers/[[processID]]" := "" // Asynchronously updated by the cluster controller, this is a list of fdbserver processes that have joined the cluster // and are currently (recently) available @@ -354,8 +385,6 @@ extern const KeyRangeRef applyMutationsKeyVersionCountRange; // FdbClient Info prefix extern const KeyRangeRef fdbClientInfoPrefixRange; -extern const KeyRef fdbClientInfoTxnSampleRate; -extern const KeyRef fdbClientInfoTxnSizeLimit; // Consistency Check settings extern const KeyRef fdbShouldConsistencyCheckBeSuspended; diff --git a/fdbclient/TagThrottle.actor.cpp b/fdbclient/TagThrottle.actor.cpp index c278db116d..76adbb5431 100644 --- a/fdbclient/TagThrottle.actor.cpp +++ b/fdbclient/TagThrottle.actor.cpp @@ -179,7 +179,7 @@ ACTOR Future> getThrottledTags(Database db, int lim if (!containsRecommend) { wait(store(reportAuto, getValidAutoEnabled(&tr, db))); } - Standalone throttles = wait(tr.getRange( + RangeResult throttles = wait(tr.getRange( reportAuto ? tagThrottleKeys : KeyRangeRef(tagThrottleKeysPrefix, tagThrottleAutoKeysPrefix), limit)); std::vector results; for (auto throttle : throttles) { @@ -202,7 +202,7 @@ ACTOR Future> getRecommendedTags(Database db, int l return std::vector(); } - Standalone throttles = + RangeResult throttles = wait(tr.getRange(KeyRangeRef(tagThrottleAutoKeysPrefix, tagThrottleKeys.end), limit)); std::vector results; for (auto throttle : throttles) { @@ -339,7 +339,7 @@ ACTOR Future unthrottleMatchingThrottles(Database db, loop { try { - state Standalone tags = wait(tr.getRange(begin, end, 1000)); + state RangeResult tags = wait(tr.getRange(begin, end, 1000)); state uint64_t unthrottledTags = 0; uint64_t manualUnthrottledTags = 0; for (auto tag : tags) { diff --git a/fdbclient/TaskBucket.actor.cpp b/fdbclient/TaskBucket.actor.cpp index 6f0f63a7f0..4e17a1c9f7 100644 --- a/fdbclient/TaskBucket.actor.cpp +++ b/fdbclient/TaskBucket.actor.cpp @@ -243,8 +243,7 @@ public: state Reference task(new Task()); task->key = taskUID; - state Standalone values = - wait(tr->getRange(taskAvailableSpace.range(), CLIENT_KNOBS->TOO_MANY)); + state RangeResult values = wait(tr->getRange(taskAvailableSpace.range(), CLIENT_KNOBS->TOO_MANY)); Version version = wait(tr->getReadVersion()); task->timeoutVersion = version + (uint64_t)(taskBucket->timeout * @@ -602,19 +601,19 @@ public: taskBucket->setOptions(tr); // Check all available priorities for keys - state std::vector>> resultFutures; + state std::vector> resultFutures; for (int pri = 0; pri <= CLIENT_KNOBS->TASKBUCKET_MAX_PRIORITY; ++pri) resultFutures.push_back(tr->getRange(taskBucket->getAvailableSpace(pri).range(), 1)); // If any priority levels have any keys then the taskbucket is not empty so return false state int i; for (i = 0; i < resultFutures.size(); ++i) { - Standalone results = wait(resultFutures[i]); + RangeResult results = wait(resultFutures[i]); if (results.size() > 0) return false; } - Standalone values = wait(tr->getRange(taskBucket->timeouts.range(), 1)); + RangeResult values = wait(tr->getRange(taskBucket->timeouts.range(), 1)); if (values.size() > 0) return false; @@ -625,14 +624,14 @@ public: taskBucket->setOptions(tr); // Check all available priorities for emptiness - state std::vector>> resultFutures; + state std::vector> resultFutures; for (int pri = 0; pri <= CLIENT_KNOBS->TASKBUCKET_MAX_PRIORITY; ++pri) resultFutures.push_back(tr->getRange(taskBucket->getAvailableSpace(pri).range(), 1)); // If any priority levels have any keys then return true as the level is 'busy' state int i; for (i = 0; i < resultFutures.size(); ++i) { - Standalone results = wait(resultFutures[i]); + RangeResult results = wait(resultFutures[i]); if (results.size() > 0) return true; } @@ -650,7 +649,7 @@ public: t.append(task->timeoutVersion); t.append(task->key); - Standalone values = wait(tr->getRange(taskBucket->timeouts.range(t), 1)); + RangeResult values = wait(tr->getRange(taskBucket->timeouts.range(t), 1)); if (values.size() > 0) return false; @@ -742,7 +741,7 @@ public: state KeyRange range( KeyRangeRef(taskBucket->timeouts.get(0).range().begin, taskBucket->timeouts.get(end).range().end)); - Standalone values = wait(tr->getRange(range, CLIENT_KNOBS->TASKBUCKET_MAX_TASK_KEYS)); + RangeResult values = wait(tr->getRange(range, CLIENT_KNOBS->TASKBUCKET_MAX_TASK_KEYS)); // Keys will be tuples of (taskUID, param) -> paramValue // Unfortunately we need to know the priority parameter for a taskUID before we can know which available-tasks @@ -793,7 +792,7 @@ public: ACTOR static Future debugPrintRange(Reference tr, Subspace subspace, Key msg) { tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->setOption(FDBTransactionOptions::LOCK_AWARE); - Standalone values = wait(tr->getRange(subspace.range(), CLIENT_KNOBS->TOO_MANY)); + RangeResult values = wait(tr->getRange(subspace.range(), CLIENT_KNOBS->TOO_MANY)); TraceEvent("TaskBucketDebugPrintRange") .detail("Key", subspace.key()) .detail("Count", values.size()) @@ -851,7 +850,7 @@ public: } else { TEST(true); // Extended a task without updating parameters // Otherwise, read and transplant the params from the old to new timeout spaces - Standalone params = wait(tr->getRange(oldTimeoutSpace.range(), CLIENT_KNOBS->TOO_MANY)); + RangeResult params = wait(tr->getRange(oldTimeoutSpace.range(), CLIENT_KNOBS->TOO_MANY)); for (auto& kv : params) { Tuple paramKey = oldTimeoutSpace.unpack(kv.key); tr->set(newTimeoutSpace.pack(paramKey), kv.value); @@ -1114,7 +1113,7 @@ public: ACTOR static Future isSet(Reference tr, Reference taskFuture) { taskFuture->futureBucket->setOptions(tr); - Standalone values = wait(tr->getRange(taskFuture->blocks.range(), 1)); + RangeResult values = wait(tr->getRange(taskFuture->blocks.range(), 1)); if (values.size() > 0) return false; @@ -1177,7 +1176,7 @@ public: Reference taskFuture) { taskFuture->futureBucket->setOptions(tr); - Standalone values = wait(tr->getRange(taskFuture->callbacks.range(), CLIENT_KNOBS->TOO_MANY)); + RangeResult values = wait(tr->getRange(taskFuture->callbacks.range(), CLIENT_KNOBS->TOO_MANY)); tr->clear(taskFuture->callbacks.range()); std::vector> actions; diff --git a/fdbclient/ThreadSafeTransaction.cpp b/fdbclient/ThreadSafeTransaction.cpp index 764a4b33eb..363ca9002d 100644 --- a/fdbclient/ThreadSafeTransaction.cpp +++ b/fdbclient/ThreadSafeTransaction.cpp @@ -101,6 +101,15 @@ double ThreadSafeDatabase::getMainThreadBusyness() { return g_network->networkInfo.metrics.networkBusyness; } +// Returns the protocol version reported by the coordinator this client is connected to +// If an expected version is given, the future won't return until the protocol version is different than expected +// Note: this will never return if the server is running a protocol from FDB 5.0 or older +ThreadFuture ThreadSafeDatabase::getServerProtocol(Optional expectedVersion) { + DatabaseContext* db = this->db; + return onMainThread( + [db, expectedVersion]() -> Future { return db->getClusterProtocol(expectedVersion); }); +} + ThreadSafeDatabase::ThreadSafeDatabase(std::string connFilename, int apiVersion) { ClusterConnectionFile* connFile = new ClusterConnectionFile(ClusterConnectionFile::lookupClusterFileName(connFilename).first); @@ -147,6 +156,12 @@ ThreadSafeTransaction::ThreadSafeTransaction(DatabaseContext* cx, ISingleThreadT nullptr); } +// This constructor is only used while refactoring fdbcli and only called from the main thread +ThreadSafeTransaction::ThreadSafeTransaction(ReadYourWritesTransaction* ryw) : tr(ryw) { + if (tr) + tr->addref(); +} + ThreadSafeTransaction::~ThreadSafeTransaction() { ISingleThreadTransaction* tr = this->tr; if (tr) @@ -212,31 +227,31 @@ ThreadFuture>> ThreadSafeTransaction::getRangeSplit }); } -ThreadFuture> ThreadSafeTransaction::getRange(const KeySelectorRef& begin, - const KeySelectorRef& end, - int limit, - bool snapshot, - bool reverse) { +ThreadFuture ThreadSafeTransaction::getRange(const KeySelectorRef& begin, + const KeySelectorRef& end, + int limit, + bool snapshot, + bool reverse) { KeySelector b = begin; KeySelector e = end; ISingleThreadTransaction* tr = this->tr; - return onMainThread([tr, b, e, limit, snapshot, reverse]() -> Future> { + return onMainThread([tr, b, e, limit, snapshot, reverse]() -> Future { tr->checkDeferredError(); return tr->getRange(b, e, limit, snapshot, reverse); }); } -ThreadFuture> ThreadSafeTransaction::getRange(const KeySelectorRef& begin, - const KeySelectorRef& end, - GetRangeLimits limits, - bool snapshot, - bool reverse) { +ThreadFuture ThreadSafeTransaction::getRange(const KeySelectorRef& begin, + const KeySelectorRef& end, + GetRangeLimits limits, + bool snapshot, + bool reverse) { KeySelector b = begin; KeySelector e = end; ISingleThreadTransaction* tr = this->tr; - return onMainThread([tr, b, e, limits, snapshot, reverse]() -> Future> { + return onMainThread([tr, b, e, limits, snapshot, reverse]() -> Future { tr->checkDeferredError(); return tr->getRange(b, e, limits, snapshot, reverse); }); @@ -410,16 +425,6 @@ const char* ThreadSafeApi::getClientVersion() { return clientVersion.c_str(); } -// Wait until a quorum of coordinators with the same protocol version are available, and then return that protocol -// version. -ThreadFuture ThreadSafeApi::getServerProtocol(const char* clusterFilePath) { - return onMainThread([clusterFilePath = std::string(clusterFilePath)]() -> Future { - auto [clusterFile, isDefault] = ClusterConnectionFile::lookupClusterFileName(clusterFilePath); - Reference f = Reference(new ClusterConnectionFile(clusterFile)); - return getCoordinatorProtocols(f); - }); -} - void ThreadSafeApi::setNetworkOption(FDBNetworkOptions::Option option, Optional value) { if (option == FDBNetworkOptions::EXTERNAL_CLIENT_TRANSPORT_ID) { if (value.present()) { diff --git a/fdbclient/ThreadSafeTransaction.h b/fdbclient/ThreadSafeTransaction.h index f199bab982..d063f1f80f 100644 --- a/fdbclient/ThreadSafeTransaction.h +++ b/fdbclient/ThreadSafeTransaction.h @@ -28,6 +28,8 @@ #include "fdbclient/IClientApi.h" #include "fdbclient/ISingleThreadTransaction.h" +// An implementation of IDatabase that serializes operations onto the network thread and interacts with the lower-level +// client APIs exposed by NativeAPI and ReadYourWrites. class ThreadSafeDatabase : public IDatabase, public ThreadSafeReferenceCounted { public: ~ThreadSafeDatabase() override; @@ -38,9 +40,15 @@ public: void setOption(FDBDatabaseOptions::Option option, Optional value = Optional()) override; double getMainThreadBusyness() override; - ThreadFuture - onConnected(); // Returns after a majority of coordination servers are available and have reported a leader. The - // cluster file therefore is valid, but the database might be unavailable. + // Returns the protocol version reported by the coordinator this client is connected to + // If an expected version is given, the future won't return until the protocol version is different than expected + // Note: this will never return if the server is running a protocol from FDB 5.0 or older + ThreadFuture getServerProtocol( + Optional expectedVersion = Optional()) override; + + // Returns after a majority of coordination servers are available and have reported a leader. The + // cluster file therefore is valid, but the database might be unavailable. + ThreadFuture onConnected(); void addref() override { ThreadSafeReferenceCounted::addref(); } void delref() override { ThreadSafeReferenceCounted::delref(); } @@ -60,37 +68,42 @@ public: // Internal use only DatabaseContext* unsafeGetPtr() const { return db; } }; +// An implementation of ITransaction that serializes operations onto the network thread and interacts with the +// lower-level client APIs exposed by NativeAPI and ReadYourWrites. class ThreadSafeTransaction : public ITransaction, ThreadSafeReferenceCounted, NonCopyable { public: explicit ThreadSafeTransaction(DatabaseContext* cx, ISingleThreadTransaction::Type type); ~ThreadSafeTransaction() override; + // Note: used while refactoring fdbcli, need to be removed later + explicit ThreadSafeTransaction(ReadYourWritesTransaction* ryw); + void cancel() override; void setVersion(Version v) override; ThreadFuture getReadVersion() override; ThreadFuture> get(const KeyRef& key, bool snapshot = false) override; ThreadFuture getKey(const KeySelectorRef& key, bool snapshot = false) override; - ThreadFuture> getRange(const KeySelectorRef& begin, - const KeySelectorRef& end, - int limit, - bool snapshot = false, - bool reverse = false) override; - ThreadFuture> getRange(const KeySelectorRef& begin, - const KeySelectorRef& end, - GetRangeLimits limits, - bool snapshot = false, - bool reverse = false) override; - ThreadFuture> getRange(const KeyRangeRef& keys, - int limit, - bool snapshot = false, - bool reverse = false) override { + ThreadFuture getRange(const KeySelectorRef& begin, + const KeySelectorRef& end, + int limit, + bool snapshot = false, + bool reverse = false) override; + ThreadFuture getRange(const KeySelectorRef& begin, + const KeySelectorRef& end, + GetRangeLimits limits, + bool snapshot = false, + bool reverse = false) override; + ThreadFuture getRange(const KeyRangeRef& keys, + int limit, + bool snapshot = false, + bool reverse = false) override { return getRange(firstGreaterOrEqual(keys.begin), firstGreaterOrEqual(keys.end), limit, snapshot, reverse); } - ThreadFuture> getRange(const KeyRangeRef& keys, - GetRangeLimits limits, - bool snapshot = false, - bool reverse = false) override { + ThreadFuture getRange(const KeyRangeRef& keys, + GetRangeLimits limits, + bool snapshot = false, + bool reverse = false) override { return getRange(firstGreaterOrEqual(keys.begin), firstGreaterOrEqual(keys.end), limits, snapshot, reverse); } ThreadFuture>> getAddressesForKey(const KeyRef& key) override; @@ -137,11 +150,12 @@ private: ISingleThreadTransaction* tr; }; +// An implementation of IClientApi that serializes operations onto the network thread and interacts with the lower-level +// client APIs exposed by NativeAPI and ReadYourWrites. class ThreadSafeApi : public IClientApi, ThreadSafeReferenceCounted { public: void selectApiVersion(int apiVersion) override; const char* getClientVersion() override; - ThreadFuture getServerProtocol(const char* clusterFilePath) override; void setNetworkOption(FDBNetworkOptions::Option option, Optional value = Optional()) override; void setupNetwork() override; diff --git a/fdbclient/Tuple.cpp b/fdbclient/Tuple.cpp index 3d4427079f..367a7b80fb 100644 --- a/fdbclient/Tuple.cpp +++ b/fdbclient/Tuple.cpp @@ -20,7 +20,20 @@ #include "fdbclient/Tuple.h" -static size_t find_string_terminator(const StringRef data, size_t offset) { +// TODO: Many functions copied from bindings/flow/Tuple.cpp. Merge at some point. +static float bigEndianFloat(float orig) { + int32_t big = *(int32_t*)&orig; + big = bigEndian32(big); + return *(float*)&big; +} + +static double bigEndianDouble(double orig) { + int64_t big = *(int64_t*)&orig; + big = bigEndian64(big); + return *(double*)&big; +} + +static size_t findStringTerminator(const StringRef data, size_t offset) { size_t i = offset; while (i < data.size() - 1 && !(data[i] == '\x00' && data[i + 1] != (uint8_t)'\xff')) { i += (data[i] == '\x00' ? 2 : 1); @@ -29,6 +42,20 @@ static size_t find_string_terminator(const StringRef data, size_t offset) { return i; } +// If encoding and the sign bit is 1 (the number is negative), flip all the bits. +// If decoding and the sign bit is 0 (the number is negative), flip all the bits. +// Otherwise, the number is positive, so flip the sign bit. +static void adjustFloatingPoint(uint8_t* bytes, size_t size, bool encode) { + if ((encode && ((uint8_t)(bytes[0] & 0x80) != (uint8_t)0x00)) || + (!encode && ((uint8_t)(bytes[0] & 0x80) != (uint8_t)0x80))) { + for (size_t i = 0; i < size; i++) { + bytes[i] ^= (uint8_t)0xff; + } + } else { + bytes[0] ^= (uint8_t)0x80; + } +} + Tuple::Tuple(StringRef const& str, bool exclude_incomplete) { data.append(data.arena(), str.begin(), str.size()); @@ -37,9 +64,13 @@ Tuple::Tuple(StringRef const& str, bool exclude_incomplete) { offsets.push_back(i); if (data[i] == '\x01' || data[i] == '\x02') { - i = find_string_terminator(str, i + 1) + 1; + i = findStringTerminator(str, i + 1) + 1; } else if (data[i] >= '\x0c' && data[i] <= '\x1c') { i += abs(data[i] - '\x14') + 1; + } else if (data[i] == 0x20) { + i += sizeof(float) + 1; + } else if (data[i] == 0x21) { + i += sizeof(double) + 1; } else if (data[i] == '\x00') { i += 1; } else { @@ -113,6 +144,29 @@ Tuple& Tuple::append(int64_t value) { return *this; } +Tuple& Tuple::appendFloat(float value) { + offsets.push_back(data.size()); + float swap = bigEndianFloat(value); + uint8_t* bytes = (uint8_t*)&swap; + adjustFloatingPoint(bytes, sizeof(float), true); + + data.push_back(data.arena(), 0x20); + data.append(data.arena(), bytes, sizeof(float)); + return *this; +} + +Tuple& Tuple::appendDouble(double value) { + offsets.push_back(data.size()); + double swap = value; + swap = bigEndianDouble(swap); + uint8_t* bytes = (uint8_t*)&swap; + adjustFloatingPoint(bytes, sizeof(double), true); + + data.push_back(data.arena(), 0x21); + data.append(data.arena(), bytes, sizeof(double)); + return *this; +} + Tuple& Tuple::appendNull() { offsets.push_back(data.size()); data.push_back(data.arena(), (uint8_t)'\x00'); @@ -134,6 +188,10 @@ Tuple::ElementType Tuple::getType(size_t index) const { return ElementType::UTF8; } else if (code >= '\x0c' && code <= '\x1c') { return ElementType::INT; + } else if (code == 0x20) { + return ElementType::FLOAT; + } else if (code == 0x21) { + return ElementType::DOUBLE; } else { throw invalid_tuple_data_type(); } @@ -228,6 +286,45 @@ int64_t Tuple::getInt(size_t index, bool allow_incomplete) const { return swap; } +// TODO: Combine with bindings/flow/Tuple.*. This code is copied from there. +float Tuple::getFloat(size_t index) const { + if (index >= offsets.size()) { + throw invalid_tuple_index(); + } + ASSERT_LT(offsets[index], data.size()); + uint8_t code = data[offsets[index]]; + if (code != 0x20) { + throw invalid_tuple_data_type(); + } + + float swap; + uint8_t* bytes = (uint8_t*)&swap; + ASSERT_LE(offsets[index] + 1 + sizeof(float), data.size()); + swap = *(float*)(data.begin() + offsets[index] + 1); + adjustFloatingPoint(bytes, sizeof(float), false); + + return bigEndianFloat(swap); +} + +double Tuple::getDouble(size_t index) const { + if (index >= offsets.size()) { + throw invalid_tuple_index(); + } + ASSERT_LT(offsets[index], data.size()); + uint8_t code = data[offsets[index]]; + if (code != 0x21) { + throw invalid_tuple_data_type(); + } + + double swap; + uint8_t* bytes = (uint8_t*)&swap; + ASSERT_LE(offsets[index] + 1 + sizeof(double), data.size()); + swap = *(double*)(data.begin() + offsets[index] + 1); + adjustFloatingPoint(bytes, sizeof(double), false); + + return bigEndianDouble(swap); +} + KeyRange Tuple::range(Tuple const& tuple) const { VectorRef begin; VectorRef end; diff --git a/fdbclient/Tuple.h b/fdbclient/Tuple.h index b44edd73cc..3dc597f262 100644 --- a/fdbclient/Tuple.h +++ b/fdbclient/Tuple.h @@ -38,6 +38,10 @@ struct Tuple { Tuple& append(Tuple const& tuple); Tuple& append(StringRef const& str, bool utf8 = false); Tuple& append(int64_t); + // There are some ambiguous append calls in fdbclient, so to make it easier + // to add append for floats and doubles, name them differently for now. + Tuple& appendFloat(float); + Tuple& appendDouble(double); Tuple& appendNull(); StringRef pack() const { return StringRef(data.begin(), data.size()); } @@ -47,7 +51,7 @@ struct Tuple { return append(t); } - enum ElementType { NULL_TYPE, INT, BYTES, UTF8 }; + enum ElementType { NULL_TYPE, INT, BYTES, UTF8, FLOAT, DOUBLE }; // this is number of elements, not length of data size_t size() const { return offsets.size(); } @@ -55,6 +59,8 @@ struct Tuple { ElementType getType(size_t index) const; Standalone getString(size_t index) const; int64_t getInt(size_t index, bool allow_incomplete = false) const; + float getFloat(size_t index) const; + double getDouble(size_t index) const; KeyRange range(Tuple const& tuple = Tuple()) const; diff --git a/fdbclient/vexillographer/fdb.options b/fdbclient/vexillographer/fdb.options index a447b23c69..15ba1250ca 100644 --- a/fdbclient/vexillographer/fdb.options +++ b/fdbclient/vexillographer/fdb.options @@ -192,7 +192,10 @@ description is not currently required but encouraged. description="Enable tracing for all transactions. This is the default." />