diff --git a/bindings/c/fdb_c.cpp b/bindings/c/fdb_c.cpp index c91327e45c..86356ef474 100644 --- a/bindings/c/fdb_c.cpp +++ b/bindings/c/fdb_c.cpp @@ -153,7 +153,7 @@ void fdb_future_destroy( FDBFuture* f ) { extern "C" DLLEXPORT fdb_error_t fdb_future_block_until_ready( FDBFuture* f ) { - CATCH_AND_RETURN( TSAVB(f)->blockUntilReady(); ); + CATCH_AND_RETURN(TSAVB(f)->blockUntilReadyCheckOnMainThread();); } fdb_bool_t fdb_future_is_error_v22( FDBFuture* f ) { @@ -596,7 +596,7 @@ fdb_error_t fdb_transaction_set_option_impl( FDBTransaction* tr, void fdb_transaction_set_option_v13( FDBTransaction* tr, FDBTransactionOption option ) { - fdb_transaction_set_option_impl( tr, option, NULL, 0 ); + fdb_transaction_set_option_impl( tr, option, nullptr, 0 ); } extern "C" DLLEXPORT diff --git a/bindings/c/test/mako/mako.c b/bindings/c/test/mako/mako.c index 06e9f59616..8d69d1af79 100644 --- a/bindings/c/test/mako/mako.c +++ b/bindings/c/test/mako/mako.c @@ -54,7 +54,8 @@ FILE* debugme; /* descriptor used for debug messages */ int err = wait_future(_f); \ if (err) { \ int err2; \ - if ((err != 1020 /* not_committed */) && (err != 1021 /* commit_unknown_result */)) { \ + if ((err != 1020 /* not_committed */) && (err != 1021 /* commit_unknown_result */) && \ + (err != 1213 /* tag_throttled */)) { \ fprintf(stderr, "ERROR: Error %s (%d) occured at %s\n", #_func, err, fdb_get_error(err)); \ } else { \ fprintf(annoyme, "ERROR: Error %s (%d) occured at %s\n", #_func, err, fdb_get_error(err)); \ @@ -698,7 +699,7 @@ retryTxn: } int run_workload(FDBTransaction* transaction, mako_args_t* args, int thread_tps, volatile double* throttle_factor, - int thread_iters, volatile int* signal, mako_stats_t* stats, int dotrace, lat_block_t* block[], + int thread_iters, volatile int* signal, mako_stats_t* stats, int dotrace, int dotagging, lat_block_t* block[], int* elem_size, bool* is_memory_allocated) { int xacts = 0; int64_t total_xacts = 0; @@ -710,6 +711,7 @@ int run_workload(FDBTransaction* transaction, mako_args_t* args, int thread_tps, int current_tps; char* traceid; int tracetimer = 0; + char* tagstr; if (thread_tps < 0) return 0; @@ -717,6 +719,12 @@ int run_workload(FDBTransaction* transaction, mako_args_t* args, int thread_tps, traceid = (char*)malloc(32); } + if(dotagging) { + tagstr = (char*)calloc(16, 1); + memcpy(tagstr, KEYPREFIX, KEYPREFIXLEN); + memcpy(tagstr + KEYPREFIXLEN, args->txntagging_prefix, TAGPREFIXLENGTH_MAX); + } + current_tps = (int)((double)thread_tps * *throttle_factor); keystr = (char*)malloc(sizeof(char) * args->key_length + 1); @@ -774,6 +782,7 @@ int run_workload(FDBTransaction* transaction, mako_args_t* args, int thread_tps, } } + } else { if (thread_tps > 0) { /* 1 second not passed, throttle */ @@ -783,6 +792,17 @@ int run_workload(FDBTransaction* transaction, mako_args_t* args, int thread_tps, } } /* throttle or txntrace */ + /* enable transaction tagging */ + if (dotagging > 0) { + sprintf(tagstr + KEYPREFIXLEN + TAGPREFIXLENGTH_MAX, "%03d", urand(0, args->txntagging - 1)); + fdb_error_t err = fdb_transaction_set_option(transaction, FDB_TR_OPTION_AUTO_THROTTLE_TAG, + (uint8_t*)tagstr, 16); + if (err) { + fprintf(stderr, "ERROR: FDB_TR_OPTION_DEBUG_TRANSACTION_IDENTIFIER: %s\n", + fdb_get_error(err)); + } + } + rc = run_one_transaction(transaction, args, stats, keystr, keystr2, valstr, block, elem_size, is_memory_allocated); if (rc) { @@ -808,6 +828,9 @@ int run_workload(FDBTransaction* transaction, mako_args_t* args, int thread_tps, if (dotrace) { free(traceid); } + if(dotagging) { + free(tagstr); + } return rc; } @@ -876,6 +899,7 @@ void* worker_thread(void* thread_args) { int op; int i, size; int dotrace = (worker_id == 0 && thread_id == 0 && args->txntrace) ? args->txntrace : 0; + int dotagging = args->txntagging; volatile int* signal = &((thread_args_t*)thread_args)->process->shm->signal; volatile double* throttle_factor = &((thread_args_t*)thread_args)->process->shm->throttle_factor; volatile int* readycount = &((thread_args_t*)thread_args)->process->shm->readycount; @@ -940,8 +964,8 @@ void* worker_thread(void* thread_args) { /* run the workload */ else if (args->mode == MODE_RUN) { - rc = run_workload(transaction, args, thread_tps, throttle_factor, thread_iters, signal, stats, dotrace, block, - elem_size, is_memory_allocated); + rc = run_workload(transaction, args, thread_tps, throttle_factor, thread_iters, + signal, stats, dotrace, dotagging, block, elem_size, is_memory_allocated); if (rc < 0) { fprintf(stderr, "ERROR: run_workload failed\n"); } @@ -1209,6 +1233,8 @@ int init_args(mako_args_t* args) { args->tracepath[0] = '\0'; args->traceformat = 0; /* default to client's default (XML) */ args->txntrace = 0; + args->txntagging = 0; + memset(args->txntagging_prefix, 0, TAGPREFIXLENGTH_MAX); for (i = 0; i < MAX_OP; i++) { args->txnspec.ops[i][OP_COUNT] = 0; } @@ -1366,6 +1392,8 @@ void usage() { printf("%-24s %s\n", " --tracepath=PATH", "Set trace file path"); printf("%-24s %s\n", " --trace_format ", "Set trace format (Default: json)"); printf("%-24s %s\n", " --txntrace=sec", "Specify transaction tracing interval (Default: 0)"); + printf("%-24s %s\n", " --txntagging", "Specify the number of different transaction tag (Default: 0, max = 1000)"); + printf("%-24s %s\n", " --txntagging_prefix", "Specify the prefix of transaction tag - mako${txntagging_prefix} (Default: '')"); printf("%-24s %s\n", " --knobs=KNOBS", "Set client knobs"); printf("%-24s %s\n", " --flatbuffers", "Use flatbuffers"); } @@ -1407,6 +1435,8 @@ int parse_args(int argc, char* argv[], mako_args_t* args) { { "commitget", no_argument, NULL, ARG_COMMITGET }, { "flatbuffers", no_argument, NULL, ARG_FLATBUFFERS }, { "trace", no_argument, NULL, ARG_TRACE }, + { "txntagging", required_argument, NULL, ARG_TXNTAGGING }, + { "txntagging_prefix", required_argument, NULL, ARG_TXNTAGGINGPREFIX}, { "version", no_argument, NULL, ARG_VERSION }, { NULL, 0, NULL, 0 } }; @@ -1522,8 +1552,25 @@ int parse_args(int argc, char* argv[], mako_args_t* args) { case ARG_TXNTRACE: args->txntrace = atoi(optarg); break; + + case ARG_TXNTAGGING: + args->txntagging = atoi(optarg); + if(args->txntagging > 1000) { + args->txntagging = 1000; + } + break; + case ARG_TXNTAGGINGPREFIX: { + if(strlen(optarg) > TAGPREFIXLENGTH_MAX) { + fprintf(stderr, "Error: the length of txntagging_prefix is larger than %d\n", TAGPREFIXLENGTH_MAX); + exit(0); + } + memcpy(args->txntagging_prefix, optarg, strlen(optarg)); + break; + } + } } + if ((args->tpsmin == -1) || (args->tpsmin > args->tpsmax)) { args->tpsmin = args->tpsmax; } @@ -1580,6 +1627,10 @@ int validate_args(mako_args_t* args) { fprintf(stderr, "ERROR: Must specify either seconds or iteration\n"); return -1; } + if(args->txntagging < 0) { + fprintf(stderr, "ERROR: --txntagging must be a non-negative integer\n"); + return -1; + } } return 0; } diff --git a/bindings/c/test/mako/mako.h b/bindings/c/test/mako/mako.h index e7e94865cd..3677c23e42 100644 --- a/bindings/c/test/mako/mako.h +++ b/bindings/c/test/mako/mako.h @@ -75,7 +75,9 @@ enum Arguments { ARG_TPSMIN, ARG_TPSINTERVAL, ARG_TPSCHANGE, - ARG_TXNTRACE + ARG_TXNTRACE, + ARG_TXNTAGGING, + ARG_TXNTAGGINGPREFIX }; enum TPSChangeTypes { TPS_SIN, TPS_SQUARE, TPS_PULSE }; @@ -95,6 +97,7 @@ typedef struct { } mako_txnspec_t; #define KNOB_MAX 256 +#define TAGPREFIXLENGTH_MAX 8 /* benchmark parameters */ typedef struct { @@ -124,6 +127,8 @@ typedef struct { char knobs[KNOB_MAX]; uint8_t flatbuffers; int txntrace; + int txntagging; + char txntagging_prefix[TAGPREFIXLENGTH_MAX]; } mako_args_t; /* shared memory */ diff --git a/bindings/flow/fdb_flow.actor.cpp b/bindings/flow/fdb_flow.actor.cpp index 27355138b1..fd4bdc61e2 100644 --- a/bindings/flow/fdb_flow.actor.cpp +++ b/bindings/flow/fdb_flow.actor.cpp @@ -157,14 +157,14 @@ namespace FDB { void cancel() override; void reset() override; - TransactionImpl() : tr(NULL) {} + TransactionImpl() : tr(nullptr) {} TransactionImpl(TransactionImpl&& r) noexcept { tr = r.tr; - r.tr = NULL; + r.tr = nullptr; } TransactionImpl& operator=(TransactionImpl&& r) noexcept { tr = r.tr; - r.tr = NULL; + r.tr = nullptr; return *this; } @@ -207,10 +207,10 @@ namespace FDB { if ( value.present() ) throw_on_error( fdb_network_set_option( option, value.get().begin(), value.get().size() ) ); else - throw_on_error( fdb_network_set_option( option, NULL, 0 ) ); + throw_on_error( fdb_network_set_option( option, nullptr, 0 ) ); } - API* API::instance = NULL; + API* API::instance = nullptr; API::API(int version) : version(version) {} API* API::selectAPIVersion(int apiVersion) { @@ -234,11 +234,11 @@ namespace FDB { } bool API::isAPIVersionSelected() { - return API::instance != NULL; + return API::instance != nullptr; } API* API::getInstance() { - if(API::instance == NULL) { + if(API::instance == nullptr) { throw api_version_unset(); } else { @@ -280,7 +280,7 @@ namespace FDB { if (value.present()) throw_on_error(fdb_database_set_option(db, option, value.get().begin(), value.get().size())); else - throw_on_error(fdb_database_set_option(db, option, NULL, 0)); + throw_on_error(fdb_database_set_option(db, option, nullptr, 0)); } TransactionImpl::TransactionImpl(FDBDatabase* db) { @@ -417,7 +417,7 @@ namespace FDB { if ( value.present() ) { throw_on_error( fdb_transaction_set_option( tr, option, value.get().begin(), value.get().size() ) ); } else { - throw_on_error( fdb_transaction_set_option( tr, option, NULL, 0 ) ); + throw_on_error( fdb_transaction_set_option( tr, option, nullptr, 0 ) ); } } diff --git a/bindings/flow/fdb_flow.h b/bindings/flow/fdb_flow.h index 66049cae0c..9d079682a1 100644 --- a/bindings/flow/fdb_flow.h +++ b/bindings/flow/fdb_flow.h @@ -31,7 +31,7 @@ namespace FDB { struct CFuture : NonCopyable, ReferenceCounted, FastAllocated { - CFuture() : f(NULL) {} + CFuture() : f(nullptr) {} explicit CFuture(FDBFuture* f) : f(f) {} ~CFuture() { if (f) { diff --git a/bindings/go/src/fdb/subspace/subspace.go b/bindings/go/src/fdb/subspace/subspace.go index 65f97048c8..d4600d725c 100644 --- a/bindings/go/src/fdb/subspace/subspace.go +++ b/bindings/go/src/fdb/subspace/subspace.go @@ -78,8 +78,9 @@ type Subspace interface { // FoundationDB keys (corresponding to the prefix of this Subspace). fdb.KeyConvertible - // All Subspaces implement fdb.ExactRange and fdb.Range, and describe all - // keys logically in this Subspace. + // All Subspaces implement fdb.ExactRange and fdb.Range, and describe all + // keys strictly within the subspace that encode tuples. Specifically, + // this will include all keys in [prefix + '\x00', prefix + '\xff'). fdb.ExactRange } diff --git a/bindings/java/CMakeLists.txt b/bindings/java/CMakeLists.txt index cbcbe07c27..17b14e6fd8 100644 --- a/bindings/java/CMakeLists.txt +++ b/bindings/java/CMakeLists.txt @@ -1,3 +1,6 @@ +set(RUN_JAVA_TESTS ON CACHE BOOL "Run Java unit tests") +set(RUN_JUNIT_TESTS OFF CACHE BOOL "Compile and run junit tests") + set(JAVA_BINDING_SRCS src/main/com/apple/foundationdb/async/AsyncIterable.java src/main/com/apple/foundationdb/async/AsyncIterator.java @@ -102,6 +105,10 @@ set(JAVA_TESTS_SRCS src/test/com/apple/foundationdb/test/WatchTest.java src/test/com/apple/foundationdb/test/WhileTrueTest.java) +set(JAVA_JUNIT_TESTS + src/junit/com/apple/foundationdb/tuple/AllTests.java + src/junit/com/apple/foundationdb/tuple/ArrayUtilTests.java) + set(GENERATED_JAVA_DIR ${CMAKE_CURRENT_BINARY_DIR}/src/main/com/apple/foundationdb) file(MAKE_DIRECTORY ${GENERATED_JAVA_DIR}) @@ -173,12 +180,6 @@ add_jar(fdb-java ${JAVA_BINDING_SRCS} ${GENERATED_JAVA_FILES} ${CMAKE_SOURCE_DIR OUTPUT_DIR ${PROJECT_BINARY_DIR}/lib VERSION ${CMAKE_PROJECT_VERSION} MANIFEST ${MANIFEST_FILE}) add_dependencies(fdb-java fdb_java_options fdb_java) -# TODO[mpilman]: The java RPM will require some more effort (mostly on debian). However, -# most people will use the fat-jar, so it is not clear how high this priority is. - -#install_jar(fdb-java DESTINATION ${FDB_SHARE_DIR}/java COMPONENT java) -#install(TARGETS fdb_java DESTINATION ${FDB_LIB_DIR} COMPONENT java) - if(NOT OPEN_FOR_IDE) set(FAT_JAR_BINARIES "NOTFOUND" CACHE STRING "Path of a directory structure with libraries to include in fat jar (a lib directory)") @@ -252,4 +253,30 @@ if(NOT OPEN_FOR_IDE) add_dependencies(fat-jar fdb-java) add_dependencies(fat-jar copy_lib) add_dependencies(packages fat-jar) + + if(RUN_JAVA_TESTS) + set(enabled ENABLED) + else() + set(enabled DISABLED) + endif() + set(TEST_CP ${tests_jar} ${target_jar}) + message(STATUS "TEST_CP ${TEST_CP}") + add_java_test(NAME DirectoryTest CLASS_PATH ${TEST_CP} + CLASS com.apple.foundationdb.test.DirectoryTest ${enabled}) + + if(RUN_JUNIT_TESTS) + file(DOWNLOAD "https://search.maven.org/remotecontent?filepath=junit/junit/4.13/junit-4.13.jar" + ${CMAKE_BINARY_DIR}/packages/junit-4.13.jar + EXPECTED_HASH SHA256=4b8532f63bdc0e0661507f947eb324a954d1dbac631ad19c8aa9a00feed1d863) + file(DOWNLOAD "https://repo1.maven.org/maven2/org/hamcrest/hamcrest-all/1.3/hamcrest-all-1.3.jar" + ${CMAKE_BINARY_DIR}/packages/hamcrest-all-1.3.jar + EXPECTED_HASH SHA256=4877670629ab96f34f5f90ab283125fcd9acb7e683e66319a68be6eb2cca60de) + add_jar(fdb-junit SOURCES ${JAVA_JUNIT_TESTS} INCLUDE_JARS fdb-java ${CMAKE_BINARY_DIR}/packages/junit-4.13.jar) + get_property(junit_jar_path TARGET fdb-junit PROPERTY JAR_FILE) + add_test(NAME junit + COMMAND ${Java_JAVA_EXECUTABLE} + -cp "${target_jar}:${junit_jar_path}:${CMAKE_BINARY_DIR}/packages/junit-4.13.jar:${CMAKE_BINARY_DIR}/packages/hamcrest-all-1.3.jar" + -Djava.library.path=${CMAKE_BINARY_DIR}/lib + org.junit.runner.JUnitCore "com.apple.foundationdb.tuple.AllTests") + endif() endif() diff --git a/bindings/java/fdbJNI.cpp b/bindings/java/fdbJNI.cpp index 19a68fca27..227d881704 100644 --- a/bindings/java/fdbJNI.cpp +++ b/bindings/java/fdbJNI.cpp @@ -1089,13 +1089,13 @@ void JNI_OnUnload(JavaVM *vm, void *reserved) { return; } else { // delete global references so the GC can collect them - if (range_result_summary_class != NULL) { + if (range_result_summary_class != JNI_NULL) { env->DeleteGlobalRef(range_result_summary_class); } - if (range_result_class != NULL) { + if (range_result_class != JNI_NULL) { env->DeleteGlobalRef(range_result_class); } - if (string_class != NULL) { + if (string_class != JNI_NULL) { env->DeleteGlobalRef(string_class); } } diff --git a/bindings/java/src/junit/com/apple/foundationdb/tuple/ArrayUtilTests.java b/bindings/java/src/junit/com/apple/foundationdb/tuple/ArrayUtilTests.java index 3cd7125a97..0b964b10dc 100644 --- a/bindings/java/src/junit/com/apple/foundationdb/tuple/ArrayUtilTests.java +++ b/bindings/java/src/junit/com/apple/foundationdb/tuple/ArrayUtilTests.java @@ -27,9 +27,14 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.util.ArrayList; +import java.util.Collections; import java.util.List; +import java.util.Random; +import org.junit.Assert; +import org.junit.Before; import org.junit.Test; +import org.junit.Ignore; /** * @author Ben @@ -251,7 +256,7 @@ public class ArrayUtilTests { /** * Test method for {@link ByteArrayUtil#bisectLeft(java.math.BigInteger[], java.math.BigInteger)}. */ - @Test + @Test @Ignore public void testBisectLeft() { fail("Not yet implemented"); } @@ -259,7 +264,7 @@ public class ArrayUtilTests { /** * Test method for {@link ByteArrayUtil#compareUnsigned(byte[], byte[])}. */ - @Test + @Test @Ignore public void testCompare() { fail("Not yet implemented"); } @@ -267,7 +272,7 @@ public class ArrayUtilTests { /** * Test method for {@link ByteArrayUtil#findNext(byte[], byte, int)}. */ - @Test + @Test @Ignore public void testFindNext() { fail("Not yet implemented"); } @@ -275,7 +280,7 @@ public class ArrayUtilTests { /** * Test method for {@link ByteArrayUtil#findTerminator(byte[], byte, byte, int)}. */ - @Test + @Test @Ignore public void testFindTerminator() { fail("Not yet implemented"); } @@ -283,7 +288,7 @@ public class ArrayUtilTests { /** * Test method for {@link ByteArrayUtil#copyOfRange(byte[], int, int)}. */ - @Test + @Test @Ignore public void testCopyOfRange() { fail("Not yet implemented"); } @@ -291,7 +296,7 @@ public class ArrayUtilTests { /** * Test method for {@link ByteArrayUtil#strinc(byte[])}. */ - @Test + @Test @Ignore public void testStrinc() { fail("Not yet implemented"); } @@ -299,7 +304,7 @@ public class ArrayUtilTests { /** * Test method for {@link ByteArrayUtil#printable(byte[])}. */ - @Test + @Test @Ignore public void testPrintable() { fail("Not yet implemented"); } diff --git a/bindings/java/src/test/com/apple/foundationdb/test/DirectoryTest.java b/bindings/java/src/test/com/apple/foundationdb/test/DirectoryTest.java index 9f838d8eeb..ae701363e5 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/DirectoryTest.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/DirectoryTest.java @@ -34,7 +34,7 @@ public class DirectoryTest { public static void main(String[] args) throws Exception { try { FDB fdb = FDB.selectAPIVersion(700); - try(Database db = fdb.open()) { + try(Database db = args.length > 0 ? fdb.open(args[0]) : fdb.open()) { runTests(db); } } diff --git a/build/Dockerfile b/build/Dockerfile index e99c45357d..ad9669fba9 100644 --- a/build/Dockerfile +++ b/build/Dockerfile @@ -45,13 +45,13 @@ RUN cd /tmp && curl -L https://github.com/ninja-build/ninja/archive/v1.9.0.zip - cd .. && rm -rf ninja-1.9.0 ninja.zip # install openssl -RUN cd /tmp && curl -L https://www.openssl.org/source/openssl-1.1.1d.tar.gz -o openssl.tar.gz &&\ - echo "1e3a91bc1f9dfce01af26026f856e064eab4c8ee0a8f457b5ae30b40b8b711f2 openssl.tar.gz" > openssl-sha.txt &&\ +RUN cd /tmp && curl -L https://www.openssl.org/source/openssl-1.1.1h.tar.gz -o openssl.tar.gz &&\ + echo "5c9ca8774bd7b03e5784f26ae9e9e6d749c9da2438545077e6b3d755a06595d9 openssl.tar.gz" > openssl-sha.txt &&\ sha256sum -c openssl-sha.txt && tar -xzf openssl.tar.gz &&\ - cd openssl-1.1.1d && scl enable devtoolset-8 -- ./config CFLAGS="-fPIC -O3" --prefix=/usr/local &&\ + cd openssl-1.1.1h && scl enable devtoolset-8 -- ./config CFLAGS="-fPIC -O3" --prefix=/usr/local &&\ scl enable devtoolset-8 -- make -j`nproc` && scl enable devtoolset-8 -- make -j1 install &&\ ln -sv /usr/local/lib64/lib*.so.1.1 /usr/lib64/ &&\ - cd /tmp/ && rm -rf /tmp/openssl-1.1.1d /tmp/openssl.tar.gz + cd /tmp/ && rm -rf /tmp/openssl-1.1.1h /tmp/openssl.tar.gz RUN cd /opt/ && curl -L https://github.com/facebook/rocksdb/archive/v6.10.1.tar.gz -o rocksdb.tar.gz &&\ echo "d573d2f15cdda883714f7e0bc87b814a8d4a53a82edde558f08f940e905541ee rocksdb.tar.gz" > rocksdb-sha.txt &&\ @@ -61,8 +61,8 @@ RUN cd /opt/ && curl -L https://github.com/facebook/rocksdb/archive/v6.10.1.tar. ARG TIMEZONEINFO=America/Los_Angeles RUN rm -f /etc/localtime && ln -s /usr/share/zoneinfo/${TIMEZONEINFO} /etc/localtime -LABEL version=0.1.15 -ENV DOCKER_IMAGEVER=0.1.15 +LABEL version=0.1.17 +ENV DOCKER_IMAGEVER=0.1.17 ENV JAVA_HOME=/usr/lib/jvm/java-1.8.0 ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/g++ diff --git a/build/Dockerfile.devel b/build/Dockerfile.devel index 9552a064dc..a88e79b7f4 100644 --- a/build/Dockerfile.devel +++ b/build/Dockerfile.devel @@ -1,4 +1,4 @@ -FROM foundationdb/foundationdb-build:0.1.15 +FROM foundationdb/foundationdb-build:0.1.17 USER root diff --git a/build/docker-compose.yaml b/build/docker-compose.yaml index 8241c48cb3..06b2567147 100644 --- a/build/docker-compose.yaml +++ b/build/docker-compose.yaml @@ -2,7 +2,7 @@ version: "3" services: common: &common - image: foundationdb/foundationdb-build:0.1.15 + image: foundationdb/foundationdb-build:0.1.17 build-setup: &build-setup <<: *common diff --git a/cmake/AddFdbTest.cmake b/cmake/AddFdbTest.cmake index 46134571a3..b740ee7e46 100644 --- a/cmake/AddFdbTest.cmake +++ b/cmake/AddFdbTest.cmake @@ -363,3 +363,60 @@ function(package_bindingtester) add_custom_target(bindingtester ALL DEPENDS ${tar_file}) add_dependencies(bindingtester copy_bindingtester_binaries) endfunction() + +function(add_fdbclient_test) + set(options DISABLED ENABLED) + set(oneValueArgs NAME) + set(multiValueArgs COMMAND) + cmake_parse_arguments(T "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}") + if(NOT T_ENABLED AND T_DISABLED) + return() + endif() + if(NOT T_NAME) + message(FATAL_ERROR "NAME is a required argument for add_fdbclient_test") + endif() + if(NOT T_COMMAND) + message(FATAL_ERROR "COMMAND is a required argument for add_fdbclient_test") + endif() + message(STATUS "Adding Client test ${T_NAME}") + add_test(NAME "${T_NAME}" + COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/tmp_cluster.py + --build-dir ${CMAKE_BINARY_DIR} + -- + ${T_COMMAND}) +endfunction() + +function(add_java_test) + set(options DISABLED ENABLED) + set(oneValueArgs NAME CLASS) + set(multiValueArgs CLASS_PATH) + cmake_parse_arguments(T "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}") + if(NOT T_ENABLED AND T_DISABLED) + return() + endif() + if(NOT T_NAME) + message(FATAL_ERROR "NAME is a required argument for add_fdbclient_test") + endif() + if(NOT T_CLASS) + message(FATAL_ERROR "CLASS is a required argument for add_fdbclient_test") + endif() + set(cp "") + set(separator ":") + if (WIN32) + set(separator ";") + endif() + message(STATUS "CLASSPATH ${T_CLASS_PATH}") + foreach(path ${T_CLASS_PATH}) + if(cp) + set(cp "${cp}${separator}${path}") + else() + set(cp "${path}") + endif() + endforeach() + add_fdbclient_test( + NAME ${T_NAME} + COMMAND ${Java_JAVA_EXECUTABLE} + -cp "${cp}" + -Djava.library.path=${CMAKE_BINARY_DIR}/lib + ${T_CLASS} "@CLUSTER_FILE@") +endfunction() diff --git a/cmake/CompileRocksDB.cmake b/cmake/CompileRocksDB.cmake index 3de2696beb..e04e0fed39 100644 --- a/cmake/CompileRocksDB.cmake +++ b/cmake/CompileRocksDB.cmake @@ -59,11 +59,14 @@ else() set(ROCKSDB_LIBRARIES ${BINARY_DIR}/librocksdb.a) + ExternalProject_Get_Property(rocksdb SOURCE_DIR) + set (ROCKSDB_INCLUDE_DIR "${SOURCE_DIR}/include") + set(ROCKSDB_FOUND TRUE) endif() message(STATUS "Found RocksDB library: ${ROCKSDB_LIBRARIES}") -message(STATUS "Found RocksDB includes: ${ROCKSDB_INCLUDE_DIRS}") +message(STATUS "Found RocksDB includes: ${ROCKSDB_INCLUDE_DIR}") mark_as_advanced( ROCKSDB_LIBRARIES diff --git a/cmake/FDBComponents.cmake b/cmake/FDBComponents.cmake index 783d39f541..be6b044a73 100644 --- a/cmake/FDBComponents.cmake +++ b/cmake/FDBComponents.cmake @@ -12,7 +12,7 @@ endif() # SSL ################################################################################ include(CheckSymbolExists) - + set(DISABLE_TLS OFF CACHE BOOL "Don't try to find OpenSSL and always build without TLS support") if(DISABLE_TLS) set(WITH_TLS OFF) @@ -107,7 +107,9 @@ endif() ################################################################################ set(SSD_ROCKSDB_EXPERIMENTAL OFF CACHE BOOL "Build with experimental RocksDB support") -if (SSD_ROCKSDB_EXPERIMENTAL) +# RocksDB is currently enabled by default for GCC but does not build with the latest +# Clang. +if (SSD_ROCKSDB_EXPERIMENTAL OR GCC) set(WITH_ROCKSDB_EXPERIMENTAL ON) else() set(WITH_ROCKSDB_EXPERIMENTAL OFF) diff --git a/contrib/Joshua/scripts/bindingTest.sh b/contrib/Joshua/scripts/bindingTest.sh index 8e2fde1f7d..4a0d7c70da 100755 --- a/contrib/Joshua/scripts/bindingTest.sh +++ b/contrib/Joshua/scripts/bindingTest.sh @@ -1,6 +1,5 @@ #!/bin/bash SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -pkill fdbserver ulimit -S -c unlimited unset FDB_NETWORK_OPTION_EXTERNAL_CLIENT_DIRECTORY @@ -8,4 +7,4 @@ WORKDIR="$(pwd)/tmp/$$" if [ ! -d "${WORKDIR}" ] ; then mkdir -p "${WORKDIR}" fi -DEBUGLEVEL=0 DISPLAYERROR=1 RANDOMTEST=1 WORKDIR="${WORKDIR}" FDBSERVERPORT="${PORT_FDBSERVER:-4500}" ${SCRIPTDIR}/bindingTestScript.sh 1 +DEBUGLEVEL=0 DISPLAYERROR=1 RANDOMTEST=1 WORKDIR="${WORKDIR}" ${SCRIPTDIR}/bindingTestScript.sh 1 diff --git a/contrib/Joshua/scripts/bindingTestScript.sh b/contrib/Joshua/scripts/bindingTestScript.sh index 9ef19ab1a6..f4e0e8eb8b 100755 --- a/contrib/Joshua/scripts/bindingTestScript.sh +++ b/contrib/Joshua/scripts/bindingTestScript.sh @@ -7,7 +7,7 @@ SCRIPTID="${$}" SAVEONERROR="${SAVEONERROR:-1}" PYTHONDIR="${BINDIR}/tests/python" testScript="${BINDIR}/tests/bindingtester/run_binding_tester.sh" -VERSION="1.6" +VERSION="1.9" source ${SCRIPTDIR}/localClusterStart.sh @@ -23,19 +23,22 @@ cycles="${1}" if [ "${DEBUGLEVEL}" -gt 0 ] then - echo "Work dir: ${WORKDIR}" - echo "Bin dir: ${BINDIR}" - echo "Log dir: ${LOGDIR}" - echo "Python path: ${PYTHONDIR}" - echo "Lib dir: ${LIBDIR}" - echo "Server port: ${FDBSERVERPORT}" - echo "Script Id: ${SCRIPTID}" - echo "Version: ${VERSION}" + echo "Work dir: ${WORKDIR}" + echo "Bin dir: ${BINDIR}" + echo "Log dir: ${LOGDIR}" + echo "Python path: ${PYTHONDIR}" + echo "Lib dir: ${LIBDIR}" + echo "Cluster String: ${FDBCLUSTERTEXT}" + echo "Script Id: ${SCRIPTID}" + echo "Version: ${VERSION}" fi # Begin the cluster using the logic in localClusterStart.sh. startCluster +# Stop the cluster on exit +trap "stopCluster" EXIT + # Display user message if [ "${status}" -ne 0 ]; then : @@ -58,8 +61,8 @@ fi # Display directory and log information, if an error occurred if [ "${status}" -ne 0 ] then - ls "${WORKDIR}" > "${LOGDIR}/dir.log" - ps -eafw > "${LOGDIR}/process-preclean.log" + ls "${WORKDIR}" &> "${LOGDIR}/dir.log" + ps -eafwH &> "${LOGDIR}/process-preclean.log" if [ -f "${FDBCONF}" ]; then cp -f "${FDBCONF}" "${LOGDIR}/" fi @@ -71,10 +74,15 @@ fi # Save debug information files, environment, and log information, if an error occurred if [ "${status}" -ne 0 ] && [ "${SAVEONERROR}" -gt 0 ]; then - ps -eafw > "${LOGDIR}/process-exit.log" - netstat -na > "${LOGDIR}/netstat.log" - df -h > "${LOGDIR}/disk.log" - env > "${LOGDIR}/env.log" + ps -eafwH &> "${LOGDIR}/process-exit.log" + netstat -na &> "${LOGDIR}/netstat.log" + df -h &> "${LOGDIR}/disk.log" + env &> "${LOGDIR}/env.log" +fi + +# Stop the cluster +if stopCluster; then + unset FDBSERVERID fi exit "${status}" diff --git a/contrib/Joshua/scripts/localClusterStart.sh b/contrib/Joshua/scripts/localClusterStart.sh index 3ba4cb9dcb..d1280267b3 100644 --- a/contrib/Joshua/scripts/localClusterStart.sh +++ b/contrib/Joshua/scripts/localClusterStart.sh @@ -5,311 +5,398 @@ WORKDIR="${WORKDIR:-${SCRIPTDIR}/tmp/fdb.work}" LOGDIR="${WORKDIR}/log" ETCDIR="${WORKDIR}/etc" BINDIR="${BINDIR:-${SCRIPTDIR}}" -FDBSERVERPORT="${FDBSERVERPORT:-4500}" +FDBPORTSTART="${FDBPORTSTART:-4000}" +FDBPORTTOTAL="${FDBPORTTOTAL:-1000}" +SERVERCHECKS="${SERVERCHECKS:-10}" +CONFIGUREWAIT="${CONFIGUREWAIT:-240}" FDBCONF="${ETCDIR}/fdb.cluster" LOGFILE="${LOGFILE:-${LOGDIR}/startcluster.log}" +AUDITCLUSTER="${AUDITCLUSTER:-0}" +AUDITLOG="${AUDITLOG:-/tmp/audit-cluster.log}" # Initialize the variables status=0 messagetime=0 messagecount=0 +# Do nothing, if cluster string is already defined +if [ -n "${FDBCLUSTERTEXT}" ] +then + : +# Otherwise, define the cluster text +else + # Define a random ip address and port on localhost + if [ -z "${IPADDRESS}" ]; then + let index2="${RANDOM} % 256" + let index3="${RANDOM} % 256" + let index4="(${RANDOM} % 255) + 1" + IPADDRESS="127.${index2}.${index3}.${index4}" + fi + if [ -z "${FDBPORT}" ]; then + let FDBPORT="(${RANDOM} % ${FDBPORTTOTAL}) + ${FDBPORTSTART}" + fi + FDBCLUSTERTEXT="${IPADDRESS}:${FDBPORT}" +fi + function log { - local status=0 - if [ "$#" -lt 1 ] - then - echo "Usage: log [echo]" - echo - echo "Logs the message and timestamp to LOGFILE (${LOGFILE}) and, if the" - echo "second argument is either not present or is set to 1, stdout." - let status="${status} + 1" - else - # Log to stdout. - if [ "$#" -lt 2 ] || [ "${2}" -ge 1 ] - then - echo "${1}" - fi + local status=0 + if [ "$#" -lt 1 ] + then + echo "Usage: log [echo]" + echo + echo "Logs the message and timestamp to LOGFILE (${LOGFILE}) and, if the" + echo "second argument is either not present or is set to 1, stdout." + let status="${status} + 1" + else + # Log to stdout. + if [ "$#" -lt 2 ] || [ "${2}" -ge 1 ] + then + echo "${1}" + fi - # Log to file. - datestr=$(date +"%Y-%m-%d %H:%M:%S (%s)") - dir=$(dirname "${LOGFILE}") - if ! [ -d "${dir}" ] && ! mkdir -p "${dir}" - then - echo "Could not create directory to log output." - let status="${status} + 1" - elif ! [ -f "${LOGFILE}" ] && ! touch "${LOGFILE}" - then - echo "Could not create file ${LOGFILE} to log output." - let status="${status} + 1" - elif ! echo "[ ${datestr} ] ${1}" >> "${LOGFILE}" - then - echo "Could not log output to ${LOGFILE}." - let status="${status} + 1" - fi - fi + # Log to file. + datestr=$(date +"%Y-%m-%d %H:%M:%S (%s)") + dir=$(dirname "${LOGFILE}") + if ! [ -d "${dir}" ] && ! mkdir -p "${dir}" + then + echo "Could not create directory to log output." + let status="${status} + 1" + elif ! [ -f "${LOGFILE}" ] && ! touch "${LOGFILE}" + then + echo "Could not create file ${LOGFILE} to log output." + let status="${status} + 1" + elif ! echo "[ ${datestr} ] ${1}" >> "${LOGFILE}" + then + echo "Could not log output to ${LOGFILE}." + let status="${status} + 1" + fi + fi - return "${status}" + return "${status}" } # Display a message for the user. function displayMessage { - local status=0 + local status=0 - if [ "$#" -lt 1 ] - then - echo "displayMessage " - let status="${status} + 1" - elif ! log "${1}" 0 - then - log "Could not write message to file." - else - # Increment the message counter - let messagecount="${messagecount} + 1" + if [ "$#" -lt 1 ] + then + echo "displayMessage " + let status="${status} + 1" + elif ! log "${1}" 0 + then + log "Could not write message to file." + else + # Increment the message counter + let messagecount="${messagecount} + 1" - # Display successful message, if previous message - if [ "${messagecount}" -gt 1 ] - then - # Determine the amount of transpired time - let timespent="${SECONDS}-${messagetime}" + # Display successful message, if previous message + if [ "${messagecount}" -gt 1 ] + then + # Determine the amount of transpired time + let timespent="${SECONDS}-${messagetime}" - if [ "${DEBUGLEVEL}" -gt 0 ]; then - printf "... done in %3d seconds\n" "${timespent}" - fi - fi + if [ "${DEBUGLEVEL}" -gt 0 ]; then + printf "... done in %3d seconds\n" "${timespent}" + fi + fi - # Display message - if [ "${DEBUGLEVEL}" -gt 0 ]; then - printf "%-16s %-35s " "$(date "+%F %H-%M-%S")" "$1" - fi + # Display message + if [ "${DEBUGLEVEL}" -gt 0 ]; then + printf "%-16s %-35s " "$(date "+%F %H-%M-%S")" "$1" + fi - # Update the variables - messagetime="${SECONDS}" - fi + # Update the variables + messagetime="${SECONDS}" + fi - return "${status}" + return "${status}" } # Create the directories used by the server. -function createDirectories { - # Display user message - if ! displayMessage "Creating directories" - then - echo 'Failed to display user message' - let status="${status} + 1" - - elif ! mkdir -p "${LOGDIR}" "${ETCDIR}" - then - log "Failed to create directories" - let status="${status} + 1" - - # Display user message - elif ! displayMessage "Setting file permissions" - then - log 'Failed to display user message' - let status="${status} + 1" - - elif ! chmod 755 "${BINDIR}/fdbserver" "${BINDIR}/fdbcli" - then - log "Failed to set file permissions" - let status="${status} + 1" - - else - while read filepath - do - if [ -f "${filepath}" ] && [ ! -x "${filepath}" ] - then - # if [ "${DEBUGLEVEL}" -gt 1 ]; then - # log " Enable executable: ${filepath}" - # fi - log " Enable executable: ${filepath}" "${DEBUGLEVEL}" - if ! chmod 755 "${filepath}" - then - log "Failed to set executable for file: ${filepath}" - let status="${status} + 1" - fi - fi - done < <(find "${BINDIR}" -iname '*.py' -o -iname '*.rb' -o -iname 'fdb_flow_tester' -o -iname '_stacktester' -o -iname '*.js' -o -iname '*.sh' -o -iname '*.ksh') - fi +function createDirectories +{ + local status=0 - return ${status} + # Display user message + if ! displayMessage "Creating directories" + then + echo 'Failed to display user message' + let status="${status} + 1" + + elif ! mkdir -p "${LOGDIR}" "${ETCDIR}" + then + log "Failed to create directories" + let status="${status} + 1" + + # Display user message + elif ! displayMessage "Setting file permissions" + then + log 'Failed to display user message' + let status="${status} + 1" + + elif ! chmod 755 "${BINDIR}/fdbserver" "${BINDIR}/fdbcli" + then + log "Failed to set file permissions" + let status="${status} + 1" + + else + while read filepath + do + if [ -f "${filepath}" ] && [ ! -x "${filepath}" ] + then + # if [ "${DEBUGLEVEL}" -gt 1 ]; then + # log " Enable executable: ${filepath}" + # fi + log " Enable executable: ${filepath}" "${DEBUGLEVEL}" + if ! chmod 755 "${filepath}" + then + log "Failed to set executable for file: ${filepath}" + let status="${status} + 1" + fi + fi + done < <(find "${BINDIR}" -iname '*.py' -o -iname '*.rb' -o -iname 'fdb_flow_tester' -o -iname '_stacktester' -o -iname '*.js' -o -iname '*.sh' -o -iname '*.ksh') + fi + + return ${status} } # Create a cluster file for the local cluster. -function createClusterFile { - if [ "${status}" -ne 0 ]; then - : - # Display user message - elif ! displayMessage "Creating Fdb Cluster file" - then - log 'Failed to display user message' - let status="${status} + 1" - else - description=$(LC_CTYPE=C tr -dc A-Za-z0-9 < /dev/urandom 2> /dev/null | head -c 8) - random_str=$(LC_CTYPE=C tr -dc A-Za-z0-9 < /dev/urandom 2> /dev/null | head -c 8) - echo "$description:$random_str@127.0.0.1:${FDBSERVERPORT}" > "${FDBCONF}" - fi +function createClusterFile +{ + local status=0 - if [ "${status}" -ne 0 ]; then - : - elif ! chmod 0664 "${FDBCONF}"; then - log "Failed to set permissions on fdbconf: ${FDBCONF}" - let status="${status} + 1" - fi + if [ "${status}" -ne 0 ]; then + : + # Display user message + elif ! displayMessage "Creating Fdb Cluster file" + then + log 'Failed to display user message' + let status="${status} + 1" + else + description=$(LC_CTYPE=C tr -dc A-Za-z0-9 < /dev/urandom 2> /dev/null | head -c 8) + random_str=$(LC_CTYPE=C tr -dc A-Za-z0-9 < /dev/urandom 2> /dev/null | head -c 8) + echo "${description}:${random_str}@${FDBCLUSTERTEXT}" > "${FDBCONF}" + fi - return ${status} + if [ "${status}" -ne 0 ]; then + : + elif ! chmod 0664 "${FDBCONF}"; then + log "Failed to set permissions on fdbconf: ${FDBCONF}" + let status="${status} + 1" + fi + + return ${status} +} + +# Stop the Cluster from running. +function stopCluster +{ + local status=0 + + # Add an audit entry, if enabled + if [ "${AUDITCLUSTER}" -gt 0 ]; then + printf '%-15s (%6s) Stopping cluster %-20s (%6s): %s\n' "$(date +'%Y-%m-%d %H:%M:%S')" "${$}" "${FDBCLUSTERTEXT}" "${FDBSERVERID}" >> "${AUDITLOG}" + fi + if [ -z "${FDBSERVERID}" ]; then + log 'FDB Server process is not defined' + let status="${status} + 1" + elif ! kill -0 "${FDBSERVERID}"; then + log "Failed to locate FDB Server process (${FDBSERVERID})" + let status="${status} + 1" + elif "${BINDIR}/fdbcli" -C "${FDBCONF}" --exec "kill; kill ${FDBCLUSTERTEXT}; sleep 3" --timeout 120 &>> "${LOGDIR}/fdbcli-kill.log" + then + # Ensure that process is dead + if ! kill -0 "${FDBSERVERID}" 2> /dev/null; then + log "Killed cluster (${FDBSERVERID}) via cli" + elif ! kill -9 "${FDBSERVERID}"; then + log "Failed to kill FDB Server process (${FDBSERVERID}) via cli or kill command" + let status="${status} + 1" + else + log "Forcibly killed FDB Server process (${FDBSERVERID}) since cli failed" + fi + elif ! kill -9 "${FDBSERVERID}"; then + log "Failed to forcibly kill FDB Server process (${FDBSERVERID})" + let status="${status} + 1" + else + log "Forcibly killed FDB Server process (${FDBSERVERID})" + fi + return "${status}" } # Start the server running. -function startFdbServer { - if [ "${status}" -ne 0 ]; then - : - elif ! displayMessage "Starting Fdb Server" - then - log 'Failed to display user message' - let status="${status} + 1" +function startFdbServer +{ + local status=0 - elif ! "${BINDIR}/fdbserver" -C "${FDBCONF}" -p "auto:${FDBSERVERPORT}" -L "${LOGDIR}" -d "${WORKDIR}/fdb/$$" &> "${LOGDIR}/fdbserver.log" & - then - log "Failed to start FDB Server" - # Maybe the server is already running - FDBSERVERID="$(pidof fdbserver)" - let status="${status} + 1" - else - FDBSERVERID="${!}" - fi + # Add an audit entry, if enabled + if [ "${AUDITCLUSTER}" -gt 0 ]; then + printf '%-15s (%6s) Starting cluster %-20s\n' "$(date +'%Y-%m-%d %H:%M:%S')" "${$}" "${FDBCLUSTERTEXT}" >> "${AUDITLOG}" + fi - if ! kill -0 ${FDBSERVERID} ; then - log "FDB Server start failed." - let status="${status} + 1" - fi + if ! displayMessage "Starting Fdb Server" + then + log 'Failed to display user message' + let status="${status} + 1" - return ${status} + else + "${BINDIR}/fdbserver" --knob_disable_posix_kernel_aio=1 -C "${FDBCONF}" -p "${FDBCLUSTERTEXT}" -L "${LOGDIR}" -d "${WORKDIR}/fdb/${$}" &> "${LOGDIR}/fdbserver.log" & + if [ "${?}" -ne 0 ] + then + log "Failed to start FDB Server" + let status="${status} + 1" + else + FDBSERVERID="${!}" + fi + fi + + if [ -z "${FDBSERVERID}" ]; then + log "FDB Server start failed because no process" + let status="${status} + 1" + elif ! kill -0 "${FDBSERVERID}" ; then + log "FDB Server start failed because process terminated unexpectedly" + let status="${status} + 1" + fi + + return ${status} } -function getStatus { - if [ "${status}" -ne 0 ]; then - : - elif ! date &>> "${LOGDIR}/fdbclient.log" - then - log 'Failed to get date' - let status="${status} + 1" - elif ! "${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'status json' --timeout 120 &>> "${LOGDIR}/fdbclient.log" - then - log 'Failed to get status from fdbcli' - let status="${status} + 1" - elif ! date &>> "${LOGDIR}/fdbclient.log" - then - log 'Failed to get date' - let status="${status} + 1" - fi +function getStatus +{ + local status=0 - return ${status} + if [ "${status}" -ne 0 ]; then + : + elif ! date &>> "${LOGDIR}/fdbclient.log" + then + log 'Failed to get date' + let status="${status} + 1" + elif ! "${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'status json' --timeout 120 &>> "${LOGDIR}/fdbclient.log" + then + log 'Failed to get status from fdbcli' + let status="${status} + 1" + elif ! date &>> "${LOGDIR}/fdbclient.log" + then + log 'Failed to get date' + let status="${status} + 1" + fi + + return ${status} } # Verify that the cluster is available. -function verifyAvailable { - # Verify that the server is running. - if ! kill -0 "${FDBSERVERID}" - then - log "FDB server process (${FDBSERVERID}) is not running" - let status="${status} + 1" - return 1 +function verifyAvailable +{ + local status=0 - # Display user message. - elif ! displayMessage "Checking cluster availability" - then - log 'Failed to display user message' - let status="${status} + 1" - return 1 - - # Determine if status json says the database is available. - else - avail=`"${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'status json' --timeout 10 2> /dev/null | grep -E '"database_available"|"available"' | grep 'true'` - log "Avail value: ${avail}" "${DEBUGLEVEL}" - if [[ -n "${avail}" ]] ; then - return 0 - else - return 1 - fi - fi + if [ -z "${FDBSERVERID}" ]; then + log "FDB Server process is not defined." + let status="${status} + 1" + # Verify that the server is running. + elif ! kill -0 "${FDBSERVERID}" + then + log "FDB server process (${FDBSERVERID}) is not running" + let status="${status} + 1" + # Display user message. + elif ! displayMessage "Checking cluster availability" + then + log 'Failed to display user message' + let status="${status} + 1" + # Determine if status json says the database is available. + else + avail=`"${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'status json' --timeout "${SERVERCHECKS}" 2> /dev/null | grep -E '"database_available"|"available"' | grep 'true'` + log "Avail value: ${avail}" "${DEBUGLEVEL}" + if [[ -n "${avail}" ]] ; then + : + else + let status="${status} + 1" + fi + fi + return "${status}" } # Configure the database on the server. -function createDatabase { - if [ "${status}" -ne 0 ]; then - : - # Ensure that the server is running - elif ! kill -0 "${FDBSERVERID}" - then - log "FDB server process: (${FDBSERVERID}) is not running" - let status="${status} + 1" +function createDatabase +{ + local status=0 - # Display user message - elif ! displayMessage "Creating database" - then - log 'Failed to display user message' - let status="${status} + 1" - elif ! echo "Client log:" &> "${LOGDIR}/fdbclient.log" - then - log 'Failed to create fdbclient.log' - let status="${status} + 1" - elif ! getStatus - then - log 'Failed to get status' - let status="${status} + 1" + if [ "${status}" -ne 0 ]; then + : + # Ensure that the server is running + elif ! kill -0 "${FDBSERVERID}" + then + log "FDB server process: (${FDBSERVERID}) is not running" + let status="${status} + 1" - # Configure the database. - else - "${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'configure new single memory; status' --timeout 240 --log --log-dir "${LOGDIR}" &>> "${LOGDIR}/fdbclient.log" + # Display user message + elif ! displayMessage "Creating database" + then + log 'Failed to display user message' + let status="${status} + 1" + elif ! echo "Client log:" &> "${LOGDIR}/fdbclient.log" + then + log 'Failed to create fdbclient.log' + let status="${status} + 1" + elif ! getStatus + then + log 'Failed to get status' + let status="${status} + 1" - if ! displayMessage "Checking if config succeeded" - then - log 'Failed to display user message.' - fi + # Configure the database. + else + "${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'configure new single memory; status' --timeout "${CONFIGUREWAIT}" --log --log-dir "${LOGDIR}" &>> "${LOGDIR}/fdbclient.log" - iteration=0 - while [[ "${iteration}" -lt 10 ]] && ! verifyAvailable - do - log "Database not created (iteration ${iteration})." - let iteration="${iteration} + 1" - done + if ! displayMessage "Checking if config succeeded" + then + log 'Failed to display user message.' + fi - if ! verifyAvailable - then - log "Failed to create database via cli" - getStatus - cat "${LOGDIR}/fdbclient.log" - log "Ignoring -- moving on" - #let status="${status} + 1" - fi - fi + iteration=0 + while [[ "${iteration}" -lt "${SERVERCHECKS}" ]] && ! verifyAvailable + do + log "Database not created (iteration ${iteration})." + let iteration="${iteration} + 1" + done - return ${status} + if ! verifyAvailable + then + log "Failed to create database via cli" + getStatus + cat "${LOGDIR}/fdbclient.log" + log "Ignoring -- moving on" + #let status="${status} + 1" + fi + fi + + return ${status} } # Begin the local cluster from scratch. -function startCluster { - if [ "${status}" -ne 0 ]; then - : - elif ! createDirectories - then - log "Could not create directories." - let status="${status} + 1" - elif ! createClusterFile - then - log "Could not create cluster file." - let status="${status} + 1" - elif ! startFdbServer - then - log "Could not start FDB server." - let status="${status} + 1" - elif ! createDatabase - then - log "Could not create database." - let status="${status} + 1" - fi +function startCluster +{ + local status=0 - return ${status} + if [ "${status}" -ne 0 ]; then + : + elif ! createDirectories + then + log "Could not create directories." + let status="${status} + 1" + elif ! createClusterFile + then + log "Could not create cluster file." + let status="${status} + 1" + elif ! startFdbServer + then + log "Could not start FDB server." + let status="${status} + 1" + elif ! createDatabase + then + log "Could not create database." + let status="${status} + 1" + fi + + return ${status} } diff --git a/contrib/commit_debug.py b/contrib/commit_debug.py index 7f6de3ff91..b37b5260d0 100755 --- a/contrib/commit_debug.py +++ b/contrib/commit_debug.py @@ -24,22 +24,22 @@ def parse_args(): # (e)nd of a span with a better given name locationToPhase = { "NativeAPI.commit.Before": [], - "MasterProxyServer.batcher": [("b", "Commit")], - "MasterProxyServer.commitBatch.Before": [], - "MasterProxyServer.commitBatch.GettingCommitVersion": [("b", "CommitVersion")], - "MasterProxyServer.commitBatch.GotCommitVersion": [("e", "CommitVersion")], + "CommitProxyServer.batcher": [("b", "Commit")], + "CommitProxyServer.commitBatch.Before": [], + "CommitProxyServer.commitBatch.GettingCommitVersion": [("b", "CommitVersion")], + "CommitProxyServer.commitBatch.GotCommitVersion": [("e", "CommitVersion")], "Resolver.resolveBatch.Before": [("b", "Resolver.PipelineWait")], "Resolver.resolveBatch.AfterQueueSizeCheck": [], "Resolver.resolveBatch.AfterOrderer": [("e", "Resolver.PipelineWait"), ("b", "Resolver.Conflicts")], "Resolver.resolveBatch.After": [("e", "Resolver.Conflicts")], - "MasterProxyServer.commitBatch.AfterResolution": [("b", "Proxy.Processing")], - "MasterProxyServer.commitBatch.ProcessingMutations": [], - "MasterProxyServer.commitBatch.AfterStoreCommits": [("e", "Proxy.Processing")], + "CommitProxyServer.commitBatch.AfterResolution": [("b", "Proxy.Processing")], + "CommitProxyServer.commitBatch.ProcessingMutations": [], + "CommitProxyServer.commitBatch.AfterStoreCommits": [("e", "Proxy.Processing")], "TLog.tLogCommit.BeforeWaitForVersion": [("b", "TLog.PipelineWait")], "TLog.tLogCommit.Before": [("e", "TLog.PipelineWait")], "TLog.tLogCommit.AfterTLogCommit": [("b", "TLog.FSync")], "TLog.tLogCommit.After": [("e", "TLog.FSync")], - "MasterProxyServer.commitBatch.AfterLogPush": [("e", "Commit")], + "CommitProxyServer.commitBatch.AfterLogPush": [("e", "Commit")], "NativeAPI.commit.After": [], } diff --git a/design/backup_v2_partitioned_logs.md b/design/backup_v2_partitioned_logs.md index 18369cdd6f..3768643891 100644 --- a/design/backup_v2_partitioned_logs.md +++ b/design/backup_v2_partitioned_logs.md @@ -16,7 +16,7 @@ As an essential component of a database system, backup and restore is commonly u ## Background -FDB backup system continuously scan the database’s key-value space, save key-value pairs and mutations at versions into range files and log files in blob storage. Specifically, mutation logs are generated at Proxy, and are written to transaction logs along with regular mutations. In production clusters like CK clusters, backup system is always on, which means each mutation is written twice to transaction logs, consuming about half of write bandwidth and about 40% of Proxy CPU time. +FDB backup system continuously scan the database’s key-value space, save key-value pairs and mutations at versions into range files and log files in blob storage. Specifically, mutation logs are generated at CommitProxy, and are written to transaction logs along with regular mutations. In production clusters like CK clusters, backup system is always on, which means each mutation is written twice to transaction logs, consuming about half of write bandwidth and about 40% of CommitProxy CPU time. The design of old backup system is [here](https://github.com/apple/foundationdb/blob/master/design/backup.md), and the data format of range files and mutations files is [here](https://github.com/apple/foundationdb/blob/master/design/backup-dataFormat.md). The technical overview of FDB is [here](https://github.com/apple/foundationdb/wiki/Technical-Overview-of-the-Database). The FDB recovery is described in this [doc](https://github.com/apple/foundationdb/blob/master/design/recovery-internals.md). @@ -37,7 +37,7 @@ The design of old backup system is [here](https://github.com/apple/foundationdb/ Feature priorities: Feature 1, 2, 3, 4, 5 are must-have; Feature 6 is better to have. -1. **Write bandwidth reduction by half**: removes the requirement to generate backup mutations at the Proxy, thus reduce TLog write bandwidth usage by half and significantly improve Proxy CPU usage; +1. **Write bandwidth reduction by half**: removes the requirement to generate backup mutations at the CommitProxy, thus reduce TLog write bandwidth usage by half and significantly improve CommitProxy CPU usage; 2. **Correctness**: The restored database must be consistent: each *restored* state (i.e., key-value pair) at a version `v` must match the original state at version `v`. 3. **Performance**: The backup system should be performant, mostly measured as a small CPU overhead on transaction logs and backup workers. The version lag on backup workers is an indicator of performance. 4. **Fault-tolerant**: The backup system should be fault-tolerant to node failures in the FDB cluster. @@ -153,9 +153,9 @@ The requirement of the new backup system raises several design challenges: **Master**: The master is responsible for coordinating the transition of the FDB transaction sub-system from one generation to the next. In particular, the master recruits backup workers during the recovery. -**Transaction Logs (TLogs)**: The transaction logs make mutations durable to disk for fast commit latencies. The logs receive commits from the proxy in version order, and only respond to the proxy once the data has been written and fsync'ed to an append only mutation log on disk. Storage servers retrieve mutations from TLogs. Once the storage servers have persisted mutations, storage servers then pop the mutations from the TLogs. +**Transaction Logs (TLogs)**: The transaction logs make mutations durable to disk for fast commit latencies. The logs receive commits from the commit proxy in version order, and only respond to the commit proxy once the data has been written and fsync'ed to an append only mutation log on disk. Storage servers retrieve mutations from TLogs. Once the storage servers have persisted mutations, storage servers then pop the mutations from the TLogs. -**Proxy**: The proxies are responsible for committing transactions, and tracking the storage servers responsible for each range of keys. In the old backup system, Proxies are responsible to group mutations into backup mutations and write them to the database. +**CommitProxy**: The commit proxies are responsible for committing transactions, and tracking the storage servers responsible for each range of keys. In the old backup system, commit proxies are responsible to group mutations into backup mutations and write them to the database. **GrvProxy**: The GRV proxies are responsible for providing read versions. ## System overview @@ -229,7 +229,7 @@ The operator’s backup request can indicate if an old backup or a new backup is 2. All backup workers monitor the key `\xff\x02/backupStarted`, see the change, and start logging mutations. 3. After all backup workers have started, the `fdbbackup` tool initiates the backup of all or specified key ranges by issuing a transaction `Ts`. -Compared to the old backup system, the above step 1 and 2 are new and is only triggered if client requests for a new type of backup. The purpose is to allow backup workers to function as no-op if there are no ongoing backups. However, the backup workers should still continuously pop their corresponding tags, otherwise mutations will be kept in the TLog. In order to know the version to pop, backup workers can obtain the read version from any proxy. Because the read version must be a committed version, so popping to this version is safe. +Compared to the old backup system, the above step 1 and 2 are new and is only triggered if client requests for a new type of backup. The purpose is to allow backup workers to function as no-op if there are no ongoing backups. However, the backup workers should still continuously pop their corresponding tags, otherwise mutations will be kept in the TLog. In order to know the version to pop, backup workers can obtain the read version from any GRV proxy. Because the read version must be a committed version, so popping to this version is safe. **Backup Submission Protocol** Protocol for `submitBackup()` to ensure that all backup workers of the current epoch have started logging mutations: diff --git a/design/data-distributor-internals.md b/design/data-distributor-internals.md index 661e35874b..ce432bfe67 100644 --- a/design/data-distributor-internals.md +++ b/design/data-distributor-internals.md @@ -22,7 +22,7 @@ Data distribution manages the lifetime of storage servers, decides which storage **Data distribution queue (`struct DDQueueData`)**: It receives shards to be relocated (i.e., RelocateShards), decides which shard should be moved to which server team, prioritizes the data movement based on relocate shard’s priority, and controls the progress of data movement based on servers’ workload. -**Special keys in the system keyspace**: DD saves its state in the system keyspace to recover from failure and to ensure every process (e.g., proxies, tLogs and storage servers) has a consistent view of which storage server is responsible for which key range. +**Special keys in the system keyspace**: DD saves its state in the system keyspace to recover from failure and to ensure every process (e.g., commit proxies, tLogs and storage servers) has a consistent view of which storage server is responsible for which key range. *serverKeys* sub-space (`\xff/serverKeys/`): It records the start key of each shard a server is responsible for. The format is *\xff/serverKeys/[serverID]/[start_key]*. To get start keys of all shards for a server, DD can read the key range with prefix *\xff/serverKeys/[serverID]/* and decode the value of [start_key]. @@ -32,9 +32,9 @@ Data distribution manages the lifetime of storage servers, decides which storage When a new DD is initialized, it will set itself as the owner by setting its random UID to the `moveKeysLockOwnerKey`. Since the owner key has only one value, at most one DD can own the DD-related system subspace. This avoids the potential race condition between multiple DDs which may co-exit during DD recruitment. -**Transaction State Store (txnStateStore)**: It is a replica of the special keyspace that stores the cluster’s states, such as which SS is responsible for which shard. Because proxies use txnStateStore to decide which tLog and SS should receive a mutation, proxies must have a consistent view of txnStateStore. Therefore, changes to txnStateStore must be populated to all proxies in total order. To achieve that, we use the special transaction (`applyMetaMutations`) to update txnStateStore and use resolvers to ensure the total ordering (serializable snapshot isolation). +**Transaction State Store (txnStateStore)**: It is a replica of the special keyspace that stores the cluster’s states, such as which SS is responsible for which shard. Because commit proxies use txnStateStore to decide which tLog and SS should receive a mutation, commit proxies must have a consistent view of txnStateStore. Therefore, changes to txnStateStore must be populated to all commit proxies in total order. To achieve that, we use the special transaction (`applyMetaMutations`) to update txnStateStore and use resolvers to ensure the total ordering (serializable snapshot isolation). -**Private mutation**: A private mutation is a mutation updating a special system key, such as keyServersKey (`\xff/keyServers/`) and serverKeysKey (`\xff/serverKeys/`). Like a normal mutation, a private mutation will be processed by the transaction systems (i.e., proxy, resolver and tLog) and be routed to a set of storage servers, based on the mutation’s tag, to update the key-value in the storage engine. Private mutations also keep the serializable snapshot isolation and consensus: The results of committed concurrent private mutations can be reproduced by sequentially executing the mutations, and all components in FDB have the same view of the mutations. +**Private mutation**: A private mutation is a mutation updating a special system key, such as keyServersKey (`\xff/keyServers/`) and serverKeysKey (`\xff/serverKeys/`). Like a normal mutation, a private mutation will be processed by the transaction systems (i.e., commit proxy, resolver and tLog) and be routed to a set of storage servers, based on the mutation’s tag, to update the key-value in the storage engine. Private mutations also keep the serializable snapshot isolation and consensus: The results of committed concurrent private mutations can be reproduced by sequentially executing the mutations, and all components in FDB have the same view of the mutations. ## Operations @@ -51,7 +51,7 @@ Whenever the team builder is invoked, it aims to build the desired number of ser **Data distribution queue server (`dataDistributionQueue` actor)**: It is created when DD is initialized. It behaves as a server to handle RelocateShard related requests. For example, it waits on the stream of RelocateShard. When a new RelocateShard is sent by teamTracker, it enqueues the new shard, and cancels the inflight shards that overlap with the new relocate shard. -**`applyMetaMutations`**: This is special logic to handle *private transactions* that modify txnStateStore and special system keys. Transaction systems (i.e., proxy, resolver and tLogs) and storage servers perform extra operations for the special transactions. For any update, it will be executed on all proxies in order so that all proxies have a consistent view of the txnStateStore. It will also send special keys to storage servers so that storage servers know the new keyspace they are now responsible for. +**`applyMetaMutations`**: This is special logic to handle *private transactions* that modify txnStateStore and special system keys. Transaction systems (i.e., commit proxy, resolver and tLogs) and storage servers perform extra operations for the special transactions. For any update, it will be executed on all commit proxies in order so that all commit proxies have a consistent view of the txnStateStore. It will also send special keys to storage servers so that storage servers know the new keyspace they are now responsible for. A storage server (SS) processes all requests sent to the server in its `storageServerCore` actor. When a (private) mutation request is sent to a SS, the server will call the `update()` function. Eventually, the `StorageUpdater` class will be invoked to apply the mutation in `applyMutation()` function, which handles private mutations `applyPrivateData()` function. @@ -84,9 +84,9 @@ Actors are created to monitor the reasons of key movement: A key range is a shard. A shard is the minimum unit of moving data. The storage server’s ownership of a shard -- which SS owns which shard -- is stored in the system keyspace *serverKeys* (`\xff/serverKeys/`) and *keyServers* (`\xff/keyServers/`). To simplify the explanation, we refer to the storage server’s ownership of a shard as a shard’s ownership. -A shard’s ownership is used in transaction systems (proxy and tLogs) to route mutations to tLogs and storage servers. When a proxy receives a mutation,dd it uses the shard’s ownership to decide which *k* tLogs receive the mutation, assuming *k* is the replias factor. When a storage server pulls mutations from tLogs, it uses the shard’s ownership to decide which shards the SS is responsible for and which tLog the SS should pull the data from. +A shard’s ownership is used in transaction systems (commit proxy and tLogs) to route mutations to tLogs and storage servers. When a commit proxy receives a mutation, it uses the shard’s ownership to decide which *k* tLogs receive the mutation, assuming *k* is the replias factor. When a storage server pulls mutations from tLogs, it uses the shard’s ownership to decide which shards the SS is responsible for and which tLog the SS should pull the data from. -A shard’s ownership must be consistent across transaction systems and SSes, so that mutations can be correctly routed to SSes. Moving keys from a SS to another requires changing the shard’s ownership under ACID property. The ACID property is achieved by using FDB transactions to change the *serverKeys *(`\xff/serverKeys/`) and *keyServers* (`\xff/keyServers/`). The mutation on the *serverKeys *and* keyServers *will be categorized as private mutations in transaction system. Compared to normal mutation, the private mutations will change the transaction state store (txnStateStore) that maintains the *serverKeys* and *keyServers* for transaction systems (proxy and tLog) when it arrives on each transaction component (e.g., tLog). Because mutations are processed in total order with the ACID guarantees, the change to the txnStateStore will be executed in total order on each node and the change on the shard’s ownership will also be consistent. +A shard’s ownership must be consistent across transaction systems and SSes, so that mutations can be correctly routed to SSes. Moving keys from a SS to another requires changing the shard’s ownership under ACID property. The ACID property is achieved by using FDB transactions to change the *serverKeys *(`\xff/serverKeys/`) and *keyServers* (`\xff/keyServers/`). The mutation on the *serverKeys *and* keyServers *will be categorized as private mutations in transaction system. Compared to normal mutation, the private mutations will change the transaction state store (txnStateStore) that maintains the *serverKeys* and *keyServers* for transaction systems (commit proxy and tLog) when it arrives on each transaction component (e.g., tLog). Because mutations are processed in total order with the ACID guarantees, the change to the txnStateStore will be executed in total order on each node and the change on the shard’s ownership will also be consistent. The data movement from one server (called source server) to another (called destination server) has four steps: (1) DD adds the destination server as the shard’s new owner; diff --git a/design/recovery-internals.md b/design/recovery-internals.md index 338304f988..cf9cc0b413 100644 --- a/design/recovery-internals.md +++ b/design/recovery-internals.md @@ -8,12 +8,12 @@ This document explains at the high level how the recovery works in a single clus ## `ServerDBInfo` data structure -This data structure contains transient information which is broadcast to all workers for a database, permitting them to communicate with each other. It contains, for example, the interfaces for cluster controller (CC), master, ratekeeper, and resolver, and holds the log system's configuration. Only part of the data structure, such as `ClientDBInfo` that contains the list of proxies, is available to the client. +This data structure contains transient information which is broadcast to all workers for a database, permitting them to communicate with each other. It contains, for example, the interfaces for cluster controller (CC), master, ratekeeper, and resolver, and holds the log system's configuration. Only part of the data structure, such as `ClientDBInfo` that contains the list of GRV proxies and commit proxies, is available to the client. Whenever a field of the `ServerDBInfo`is changed, the new value of the field, say new master's interface, will be sent to the CC and CC will propagate the new `ServerDBInfo` to all workers in the cluster. ## When will recovery happen? -Failure of certain roles in FDB can cause recovery. Those roles are cluster controller, master, proxy, transaction logs (tLog), resolvers, and log router. +Failure of certain roles in FDB can cause recovery. Those roles are cluster controller, master, GRV proxy, commit proxy, transaction logs (tLog), resolvers, log router, and backup workers. Network partition or failures can make CC unable to reach some roles, treating those roles as dead and causing recovery. If CC cannot connect to a majority of coordinators, it will be treated as dead by coordinators and recovery will happen. @@ -97,7 +97,7 @@ Master interface is stored in `serverDBInfo`. Once the CC recruits the master, i Once the master locks the cstate, it will recruit the still-alive tLogs from the previous generation for the benefit of faster recovery. The master gets the old tLogs’ interfaces from the READING_CSTATE phase and uses those interfaces to track which old tLog are still alive, the implementation of which is in `trackRejoins()`. -Once the master gets enough tLogs, it calculates the known committed version (i.e., `knownCommittedVersion` in code). `knownCommittedVersion` is the highest version that a proxy tells a given tLog that it had durably committed on *all* tLogs. The master's is the maximum of all of that. `knownCommittedVersion` is important, because it defines the lower bound of what version range of mutations need to be copied to the new generation. That is, any versions larger than the master's `knownCommittedVersion` is not guaranteed to persist on all replicas. The master chooses a *recovery version*, which is the minimum of durable versions on all tLogs of the old generation, and recruits a new set of tLogs that copy all data between `knownCommittedVersion + 1` and `recoveryVersion` from old tLogs. This copy makes sure data within the range has enough replicas to satisfy the replication policy. +Once the master gets enough tLogs, it calculates the known committed version (i.e., `knownCommittedVersion` in code). `knownCommittedVersion` is the highest version that a commit proxy tells a given tLog that it had durably committed on *all* tLogs. The master's is the maximum of all of that. `knownCommittedVersion` is important, because it defines the lower bound of what version range of mutations need to be copied to the new generation. That is, any versions larger than the master's `knownCommittedVersion` is not guaranteed to persist on all replicas. The master chooses a *recovery version*, which is the minimum of durable versions on all tLogs of the old generation, and recruits a new set of tLogs that copy all data between `knownCommittedVersion + 1` and `recoveryVersion` from old tLogs. This copy makes sure data within the range has enough replicas to satisfy the replication policy. Later, the master will use the recruited tLogs to create a new `TagPartitionedLogSystem` for the new generation. @@ -121,9 +121,9 @@ Consider an old generation with three TLogs: `A, B, C`. Their durable versions a Once we have a `knownCommittedVersion`, the master will reconstruct the transaction state store (txnStateStore) by peeking the txnStateTag in oldLogSystem. Recall that the txnStateStore includes the transaction system’s configuration, such as the assignment of shards to SS and to tLogs and that the txnStateStore was durable on disk in the oldLogSystem. -Once we get the txnStateStore, we know the configuration of the transaction system, such as the number of proxies. The master then can ask the CC to recruit roles for the new generation in the `recruitEverything()` function. Those recruited roles includes proxies, tLogs and seed SSes, which are the storage servers created for an empty database in the first generation to host the first shard and serve as the starting point of the bootstrap process to recruit more SSes. Once all roles are recruited, the master starts a new epoch in `newEpoch()`. +Once we get the txnStateStore, we know the configuration of the transaction system, such as the number of GRV proxies and commit proxies. The master then can ask the CC to recruit roles for the new generation in the `recruitEverything()` function. Those recruited roles includes GRV proxies, commit proxies, tLogs and seed SSes, which are the storage servers created for an empty database in the first generation to host the first shard and serve as the starting point of the bootstrap process to recruit more SSes. Once all roles are recruited, the master starts a new epoch in `newEpoch()`. -At this point, we have recovered the txnStateStore, recruited new proxies and tLogs, and copied data from old tLogs to new tLogs. We have a working transaction system in the new generation now. +At this point, we have recovered the txnStateStore, recruited new GRV proxies, commit proxies and tLogs, and copied data from old tLogs to new tLogs. We have a working transaction system in the new generation now. ### Where can the recovery get stuck in this phase? @@ -151,7 +151,7 @@ Not every FDB role participates in the recovery phases 1-3. This phase tells the Storage servers (SSes) are not involved in the recovery phase 1 - 3. To notify SSes about the recovery, the master commits a recovery transaction, the first transaction in the new generation, which contains the txnStateStore information. Once storage servers receive the recovery transaction, it will compare its latest data version and the recovery version, and rollback to the recovery version if its data version is newer. Note that storage servers may have newer data than the recovery version because they pre-fetch mutations from tLogs before the mutations are durable to reduce the latency to read newly written data. -Proxies haven’t recovered the transaction system state and cannot accept transactions yet. The master recovers proxies’ states by sending the txnStateStore to proxies through proxies’ (`txnState`) interfaces in `sendIntialCommitToResolvers()` function. Once proxies have recovered their states, they can start processing transactions. The recovery transaction that was waiting on proxies will be processed. +Commit proxies haven’t recovered the transaction system state and cannot accept transactions yet. The master recovers proxies’ states by sending the txnStateStore to commit proxies through commit proxies’ (`txnState`) interfaces in `sendIntialCommitToResolvers()` function. Once commit proxies have recovered their states, they can start processing transactions. The recovery transaction that was waiting on commit proxies will be processed. The resolvers haven’t known the recovery version either. The master needs to send the lastEpochEnd version (i.e., last commit of the previous generation) to resolvers via resolvers’ (`resolve`) interface. @@ -162,7 +162,7 @@ At the end of this phase, every role should be aware of the recovery and start r ## Phase 5: WRITING_CSTATE -Coordinators store the transaction systems’ information. The master needs to write the new tLogs into coordinators’ states to achieve consensus and fault tolerance. Only when the coordinators’ states are updated with the new transaction system’s configuration will the cluster controller tell clients about the new transaction system (such as the new proxies). +Coordinators store the transaction systems’ information. The master needs to write the new tLogs into coordinators’ states to achieve consensus and fault tolerance. Only when the coordinators’ states are updated with the new transaction system’s configuration will the cluster controller tell clients about the new transaction system (such as the new GRV proxies and commit proxies). The master only needs to write the new tLogs to a quorum of coordinators for a running cluster. The only time the master has to write all coordinators is when creating a brand new database. diff --git a/design/tlog-spilling.md.html b/design/tlog-spilling.md.html index aee572b597..4fce3e8e90 100644 --- a/design/tlog-spilling.md.html +++ b/design/tlog-spilling.md.html @@ -7,17 +7,17 @@ (This assumes a basic familiarity with [FoundationDB's architecture](https://www.youtu.be/EMwhsGsxfPU).) Transaction logs are a distributed Write-Ahead-Log for FoundationDB. They -receive commits from proxies, and are responsible for durably storing those -commits, and making them available to storage servers for reading. +receive commits from commit proxies, and are responsible for durably storing +those commits, and making them available to storage servers for reading. Clients send *mutations*, the list of their set, clears, atomic operations, -etc., to proxies. Proxies collect mutations into a *batch*, which is the list -of all changes that need to be applied to the database to bring it from version -`N-1` to `N`. Proxies then walk through their in-memory mapping of shard -boundaries to associate one or more *tags*, a small integer uniquely -identifying a destination storage server, with each mutation. They then send a -*commit*, the full list of `(tags, mutation)` for each mutation in a batch, to -the transaction logs. +etc., to commit proxies. Commit proxies collect mutations into a *batch*, which +is the list of all changes that need to be applied to the database to bring it +from version `N-1` to `N`. Commit proxies then walk through their in-memory +mapping of shard boundaries to associate one or more *tags*, a small integer +uniquely identifying a destination storage server, with each mutation. They +then send a *commit*, the full list of `(tags, mutation)` for each mutation in +a batch, to the transaction logs. The transaction log has two responsibilities: it must persist the commits to disk and notify the proxy when a commit is durably stored, and it must make the diff --git a/documentation/sphinx/source/administration.rst b/documentation/sphinx/source/administration.rst index 173f05f312..297e70046f 100644 --- a/documentation/sphinx/source/administration.rst +++ b/documentation/sphinx/source/administration.rst @@ -259,7 +259,8 @@ Use the ``status`` command of ``fdbcli`` to determine if the cluster is up and r Redundancy mode - triple Storage engine - ssd-2 Coordinators - 5 - Desired Proxies - 5 + Desired GRV Proxies - 1 + Desired Commit Proxies - 4 Desired Logs - 8 Cluster: @@ -299,7 +300,8 @@ The summary fields are interpreted as follows: Redundancy mode The currently configured redundancy mode (see the section :ref:`configuration-choosing-redundancy-mode`) Storage engine The currently configured storage engine (see the section :ref:`configuration-configuring-storage-subsystem`) Coordinators The number of FoundationDB coordination servers -Desired Proxies Number of proxies desired. If replication mode is 3 then default number of proxies is 3 +Desired GRV Proxies Number of GRV proxies desired. (default 1) +Desired Commit Proxies Number of commit proxies desired. If replication mode is 3 then default number of commit proxies is 3 Desired Logs Number of logs desired. If replication mode is 3 then default number of logs is 3 FoundationDB processes Number of FoundationDB processes participating in the cluster Machines Number of physical machines running at least one FoundationDB process that is participating in the cluster @@ -565,7 +567,7 @@ When configured, the ``status json`` output will include additional fields to re filtered: 1 } -The ``grv_latency_bands`` and ``commit_latency_bands`` objects will only be logged for ``proxy`` roles, and ``read_latency_bands`` will only be logged for storage roles. Each threshold is represented as a key in the map, and its associated value will be the total number of requests in the lifetime of the process with a latency smaller than the threshold but larger than the next smaller threshold. +The ``grv_latency_bands`` objects will only be logged for ``grv_proxy`` roles, ``commit_latency_bands`` objects will only be logged for ``commit_proxy`` roles, and ``read_latency_bands`` will only be logged for storage roles. Each threshold is represented as a key in the map, and its associated value will be the total number of requests in the lifetime of the process with a latency smaller than the threshold but larger than the next smaller threshold. For example, ``0.1: 1`` in ``read_latency_bands`` indicates that there has been 1 read request with a latency in the range ``[0.01, 0.1)``. For the smallest specified threshold, the lower bound is 0 (e.g. ``[0, 0.01)`` in the example above). Requests that took longer than any defined latency band will be reported in the ``inf`` (infinity) band. Requests that were filtered by the configuration (e.g. using ``max_read_bytes``) are reported in the ``filtered`` category. diff --git a/documentation/sphinx/source/api-c.rst b/documentation/sphinx/source/api-c.rst index 02cfaf1682..6fb06a6f18 100644 --- a/documentation/sphinx/source/api-c.rst +++ b/documentation/sphinx/source/api-c.rst @@ -263,9 +263,9 @@ See :ref:`developer-guide-programming-with-futures` for further (language-indepe .. function:: fdb_error_t fdb_future_block_until_ready(FDBFuture* future) - Blocks the calling thread until the given Future is ready. It will return success even if the Future is set to an error -- you must call :func:`fdb_future_get_error()` to determine that. :func:`fdb_future_block_until_ready()` will return an error only in exceptional conditions (e.g. out of memory or other operating system resources). + Blocks the calling thread until the given Future is ready. It will return success even if the Future is set to an error -- you must call :func:`fdb_future_get_error()` to determine that. :func:`fdb_future_block_until_ready()` will return an error only in exceptional conditions (e.g. deadlock detected, out of memory or other operating system resources). - .. warning:: Never call this function from a callback passed to :func:`fdb_future_set_callback()`. This may block the thread on which :func:`fdb_run_network()` was invoked, resulting in a deadlock. + .. warning:: Never call this function from a callback passed to :func:`fdb_future_set_callback()`. This may block the thread on which :func:`fdb_run_network()` was invoked, resulting in a deadlock. In some cases the client can detect the deadlock and throw a ``blocked_from_network_thread`` error. .. function:: fdb_bool_t fdb_future_is_ready(FDBFuture* future) diff --git a/documentation/sphinx/source/api-error-codes.rst b/documentation/sphinx/source/api-error-codes.rst index f013f4aabd..e8f564d80d 100644 --- a/documentation/sphinx/source/api-error-codes.rst +++ b/documentation/sphinx/source/api-error-codes.rst @@ -40,7 +40,7 @@ FoundationDB may return the following error codes from API functions. If you nee +-----------------------------------------------+-----+--------------------------------------------------------------------------------+ | external_client_already_loaded | 1040| External client has already been loaded | +-----------------------------------------------+-----+--------------------------------------------------------------------------------+ -| proxy_memory_limit_exceeded | 1042| Proxy commit memory limit exceeded | +| proxy_memory_limit_exceeded | 1042| CommitProxy commit memory limit exceeded | +-----------------------------------------------+-----+--------------------------------------------------------------------------------+ | batch_transaction_throttled | 1051| Batch GRV request rate limit exceeded | +-----------------------------------------------+-----+--------------------------------------------------------------------------------+ @@ -114,8 +114,12 @@ FoundationDB may return the following error codes from API functions. If you nee +-----------------------------------------------+-----+--------------------------------------------------------------------------------+ | transaction_read_only | 2023| Attempted to commit a transaction specified as read-only | +-----------------------------------------------+-----+--------------------------------------------------------------------------------+ +| invalid_cache_eviction_policy | 2024| Invalid cache eviction policy, only random and lru are supported | ++-----------------------------------------------+-----+--------------------------------------------------------------------------------+ | network_cannot_be_restarted | 2025| Network can only be started once | +-----------------------------------------------+-----+--------------------------------------------------------------------------------+ +| blocked_from_network_thread | 2026| Detected a deadlock in a callback called from the network thread | ++-----------------------------------------------+-----+--------------------------------------------------------------------------------+ | incompatible_protocol_version | 2100| Incompatible protocol version | +-----------------------------------------------+-----+--------------------------------------------------------------------------------+ | transaction_too_large | 2101| Transaction exceeds byte limit | diff --git a/documentation/sphinx/source/architecture.rst b/documentation/sphinx/source/architecture.rst index ec964c30e5..f0a902dfe2 100644 --- a/documentation/sphinx/source/architecture.rst +++ b/documentation/sphinx/source/architecture.rst @@ -6,7 +6,7 @@ FoundationDB makes your architecture flexible and easy to operate. Your applicat The following diagram details the logical architecture. -.. image:: /images/Architecture.png +|image0| Detailed FoundationDB Architecture @@ -26,7 +26,7 @@ and servers use the coordinators to connect with the cluster controller. The servers will attempt to become the cluster controller if one does not exist, and register with the cluster controller once one has been elected. Clients use the cluster controller to keep an up-to-date list -of proxies. +of GRV proxies and commit proxies. Cluster Controller ~~~~~~~~~~~~~~~~~~ @@ -42,10 +42,11 @@ Master The master is responsible for coordinating the transition of the write sub-system from one generation to the next. The write sub-system -includes the master, proxies, resolvers, and transaction logs. The three -roles are treated as a unit, and if any of them fail, we will recruit a -replacement for all three roles. The master provides the commit versions -for batches of the mutations to the proxies. +includes the master, GRV proxies, commit proxies, resolvers, and +transaction logs. The three roles are treated as a unit, and if any of +them fail, we will recruit a replacement for all three roles. The master +provides the commit versions for batches of the mutations to the commit +proxies. Historically, Ratekeeper and Data Distributor are coupled with Master on the same process. Since 6.2, both have become a singleton in the @@ -53,16 +54,22 @@ cluster. The life time is no longer tied with Master. |image1| -Proxies -~~~~~~~ +GRV Proxies +~~~~~~~~~~~ -The proxies are responsible for providing read versions, committing -transactions, and tracking the storage servers responsible for each -range of keys. To provide a read version, a proxy will ask all other -proxies to see the largest committed version at this point in time, -while simultaneously checking that the transaction logs have not been -stopped. Ratekeeper will artificially slow down the rate at which the -proxy provides read versions. +The GRV proxies are responsible for providing read versions, communicating +with ratekeeper to control the rate providing read versions. To provide a +read version, a GRV proxy will ask all master to see the largest committed +version at this point in time, while simultaneously checking that the +transaction logs have not been stopped. Ratekeeper will artificially slow +down the rate at which the GRV proxy provides read versions. + +Commit Proxies +~~~~~~~~~~~~~~ + +The proxies are responsible for committing transactions, report committed +versions to master and tracking the storage servers responsible for each +range of keys. Commits are accomplished by: @@ -73,20 +80,20 @@ Commits are accomplished by: The key space starting with the ``\xff`` byte is reserved for system metadata. All mutations committed into this key space are distributed to -all of the proxies through the resolvers. This metadata includes a +all of the commit proxies through the resolvers. This metadata includes a mapping between key ranges and the storage servers which have the data -for that range of keys. The proxies provides this information to clients -on-demand. The clients cache this mapping; if they ask a storage server -for a key it does not have, they will clear their cache and get a more -up-to-date list of servers from the proxies. +for that range of keys. The commit proxies provides this information to +clients on-demand. The clients cache this mapping; if they ask a storage +server for a key it does not have, they will clear their cache and get a +more up-to-date list of servers from the commit proxies. Transaction Logs ~~~~~~~~~~~~~~~~ The transaction logs make mutations durable to disk for fast commit -latencies. The logs receive commits from the proxy in version order, and -only respond to the proxy once the data has been written and fsync’ed to -an append only mutation log on disk. Before the data is even written to +latencies. The logs receive commits from the commit proxy in version order, +and only respond to the commit proxy once the data has been written and fsync’ed +to an append only mutation log on disk. Before the data is even written to disk we forward it to the storage servers responsible for that mutation. Once the storage servers have made the mutation durable, they pop it from the log. This generally happens roughly 6 seconds after the @@ -153,7 +160,7 @@ Transaction Processing ---------------------- A database transaction in FoundationDB starts by a client contacting one -of the Proxies to obtain a read version, which is guaranteed to be +of the GRV proxies to obtain a read version, which is guaranteed to be larger than any of commit version that client may know about (even through side channels outside the FoundationDB cluster). This is needed so that a client will see the result of previous commits that have @@ -165,64 +172,51 @@ memory without contacting the cluster. By default, reading a key that was written in the same transaction will return the newly written value. At commit time, the client sends the transaction data (all reads and -writes) to one of the Proxies and waits for commit or abort response -from the proxy. If the transaction conflicts with another one and cannot -commit, the client may choose to retry the transaction from the -beginning again. If the transaction commits, the proxy also returns the -commit version back to the client. Note this commit version is larger -than the read version and is chosen by the master. +writes) to one of the commit proxies and waits for commit or abort response +from the commit proxy. If the transaction conflicts with another one and +cannot commit, the client may choose to retry the transaction from the +beginning again. If the transaction commits, the commit proxy also returns +the commit version back to the client and to master so that GRV proxies can +get access to the latest committed version. Note this commit version is +larger than the read version and is chosen by the master. The FoundationDB architecture separates the scaling of client reads and writes (i.e., transaction commits). Because clients directly issue reads to sharded storage servers, reads scale linearly to the number of storage servers. Similarly, writes are scaled by adding more processes -to Proxies, Resolvers, and Log Servers in the transaction system. +to Commit Proxies, Resolvers, and Log Servers in the transaction system. Determine Read Version ~~~~~~~~~~~~~~~~~~~~~~ -When a client requests a read version from a proxy, the proxy asks all -other proxies for their last commit versions, and checks a set of -transaction logs satisfying replication policy are live. Then the proxy -returns the maximum commit version as the read version to the client. +When a client requests a read version from a GRV proxy, the GRV proxy asks +master for the latest committed version, and checks a set of transaction +logs satisfying replication policy are live. Then the GRV proxy returns +the maximum committed version as the read version to the client. |image2| -The reason for the proxy to contact all other proxies for commit -versions is to ensure the read version is larger than any previously -committed version. Consider that if proxy ``A`` commits a transaction, -and then the client asks proxy ``B`` for a read version. The read -version from proxy ``B`` must be larger than the version committed by -proxy ``A``. The only way to get this information is by asking proxy -``A`` for its largest committed version. +The reason for the GRV proxy to contact master for the latest committed +versions is to because master is a central place to keep the largest of +all commit proxies' committed version. The reason for checking a set of transaction logs satisfying replication -policy are live is to ensure the proxy is not replaced with newer -generation of proxies. This is because proxy is a stateless role -recruited in each generation. If a recovery has happened and the old -proxy is still live, this old proxy could still give out read versions. +policy are live is to ensure the GRV proxy is not replaced with newer +generation of GRV proxies. This is because GRV proxy is a stateless role +recruited in each generation. If a recovery has happened and the old GRV +proxy is still live, this old GRV proxy could still give out read versions. As a result, a *read-only* transaction may see stale results (a read-write transaction will be aborted). By checking a set of -transaction logs satisfying replication policy are live, the proxy makes +transaction logs satisfying replication policy are live, the GRV proxy makes sure no recovery has happened, thus the *read-only* transaction sees the latest data. -Note that the client cannot simply ask the master for read versions. The -master gives out versions to proxies to be committed, but the master -does not know when the versions it gives out are durable on the -transaction logs. Therefore it is not safe to do reads at the largest -version the master has provided because that version might be rolled -back in the event of a failure, so the client could end up reading data -that was never committed. In order for the client to use versions from -the master, the client needs to wait until all in-flight -transaction-batches (a write version is used for a batch of -transactions) to commit. This can take a long time and thus is -inefficient. Another drawback of this approach is putting more work -towards the master, because the master role can’t be scaled. Even though -giving out read-versions isn’t very expensive, it still requires the -master to get a transaction budget from the Ratekeeper, batches -requests, and potentially maintains thousands of network connections -from clients. +Note that the client cannot simply ask the master for read versions because +this approach is putting more work towards the master, because the master +role can’t be scaled. Even though giving out read-versions isn’t very +expensive, it still requires the master to get a transaction budget from the +Ratekeeper, batches requests, and potentially maintains thousands of network +connections from clients. |image3| @@ -231,27 +225,27 @@ Transaction Commit A client transaction commits in the following steps: -1. A client sends a transaction to a proxy. -2. The proxy asks the master for a commit version. +1. A client sends a transaction to a commit proxy. +2. The commit proxy asks the master for a commit version. 3. The master sends back a commit version that is higher than any commit version seen before. -4. The proxy sends the read and write conflict ranges to the resolver(s) +4. The commit proxy sends the read and write conflict ranges to the resolver(s) with the commit version included. 5. The resolver responds back with whether the transaction has any conflicts with previous transactions by sorting transactions according to their commit versions and computing if such a serial execution order is conflict-free. - - If there are conflicts, the proxy responds back to the client with + - If there are conflicts, the commit proxy responds back to the client with a not_committed error. - - If there are no conflicts, the proxy sends the mutations and + - If there are no conflicts, the commit proxy sends the mutations and commit version of this transaction to the transaction logs. -6. Once the mutations are durable on the logs, the proxy responds back +6. Once the mutations are durable on the logs, the commit proxy responds back success to the user. -Note the proxy sends each resolver their respective key ranges, if any -one of the resolvers detects a conflict then the transaction is not +Note the commit proxy sends each resolver their respective key ranges, if +any one of the resolvers detects a conflict then the transaction is not committed. This has the flaw that if only one of the resolvers detects a conflict, the other resolver will still think the transaction has succeeded and may fail future transactions with overlapping write @@ -273,8 +267,8 @@ Background Work There are a number of background work happening besides the transaction processing: -- **Ratekeeper** collects statistic information from proxies, - transaction logs, and storage servers and compute the target +- **Ratekeeper** collects statistic information from GRV proxies, Commit + proxies, transaction logs, and storage servers and compute the target transaction rate for the cluster. - **Data distribution** monitors all storage servers and perform load @@ -284,7 +278,7 @@ processing: - **Storage servers** pull mutations from transaction logs, write them into storage engine to persist on disks. -- **Proxies** periodically send empty commits to transaction logs to +- **Commit proxies** periodically send empty commits to transaction logs to keep commit versions increasing, in case there is no client generated transactions. @@ -299,9 +293,9 @@ latency. A typical recovery takes about a few hundred milliseconds, but longer recovery time (usually a few seconds) can happen. Whenever there is a failure in the transaction system, a recovery process is performed to restore the transaction system to a new configuration, i.e., a clean -state. Specifically, the Master process monitors the health of Proxies, -Resolvers, and Transaction Logs. If any one of the monitored process -failed, the Master process terminates. The Cluster Controller will +state. Specifically, the Master process monitors the health of GRV Proxies, +Commit Proxies, Resolvers, and Transaction Logs. If any one of the monitored +process failed, the Master process terminates. The Cluster Controller will detect this event, and then recruits a new Master, which coordinates the recovery and recruits a new transaction system instance. In this way, the transaction processing is divided into a number of epochs, where @@ -314,20 +308,20 @@ Coordinators and lock the coordinated states to prevent another Master process from recovering at the same time. Then the Master recovers previous transaction system states, including all Log Servers’ Information, stops these Log Servers from accepting transactions, and -recruits a new set of Proxies, Resolvers, and Transaction Logs. After -previous Log Servers are stopped and new transaction system is -recruited, the Master writes the coordinated states with current +recruits a new set of GRV Proxies, Commit Proxies, Resolvers, and +Transaction Logs. After previous Log Servers are stopped and new transaction +system is recruited, the Master writes the coordinated states with current transaction system information. Finally, the Master accepts new transaction commits. See details in this `documentation `__. -Because Proxies and Resolvers are stateless, their recoveries have no -extra work. In contrast, Transaction Logs save the logs of committed -transactions, and we need to ensure all previously committed -transactions are durable and retrievable by storage servers. That is, -for any transactions that the Proxies may have sent back commit -response, their logs are persisted in multiple Log Servers (e.g., three -servers if replication degree is 3). +Because GRV Proxies, Commit Proxies and Resolvers are stateless, their +recoveries have no extra work. In contrast, Transaction Logs save the +logs of committed transactions, and we need to ensure all previously +committed transactions are durable and retrievable by storage servers. +That is, for any transactions that the Commit Proxies may have sent back +commit response, their logs are persisted in multiple Log Servers (e.g., +three servers if replication degree is 3). Finally, a recovery will *fast forward* time by 90 seconds, which would abort any in-progress client transactions with ``transaction_too_old`` @@ -335,7 +329,7 @@ error. During retry, these client transactions will find the new generation of transaction system and commit. **``commit_result_unknown`` error:** If a recovery happened while a -transaction is committing (i.e., a proxy has sent mutations to +transaction is committing (i.e., a commit proxy has sent mutations to transaction logs). A client would have received ``commit_result_unknown``, and then retried the transaction. It’s completely permissible for FDB to commit both the first attempt, and the @@ -362,6 +356,7 @@ Documentation `__ +.. |image0| image:: images/Architecture.png .. |image1| image:: images/architecture-1.jpeg .. |image2| image:: images/architecture-2.jpeg .. |image3| image:: images/architecture-3.jpeg diff --git a/documentation/sphinx/source/command-line-interface.rst b/documentation/sphinx/source/command-line-interface.rst index a051f5cbad..cf695b825a 100644 --- a/documentation/sphinx/source/command-line-interface.rst +++ b/documentation/sphinx/source/command-line-interface.rst @@ -64,7 +64,7 @@ The ``commit`` command commits the current transaction. Any sets or clears execu configure --------- -The ``configure`` command changes the database configuration. Its syntax is ``configure [new] [single|double|triple|three_data_hall|three_datacenter] [ssd|memory] [proxies=] [resolvers=] [logs=]``. +The ``configure`` command changes the database configuration. Its syntax is ``configure [new] [single|double|triple|three_data_hall|three_datacenter] [ssd|memory] [grv_proxies=] [commit_proxies=] [resolvers=] [logs=]``. The ``new`` option, if present, initializes a new database with the given configuration rather than changing the configuration of an existing one. When ``new`` is used, both a redundancy mode and a storage engine must be specified. @@ -98,11 +98,12 @@ A FoundationDB cluster employs server processes of different types. It automatic For large clusters, you can manually set the allocated number of processes of a given type. Valid process types are: -* ``proxies`` +* ``grv_proxies`` +* ``commit_proxies`` * ``resolvers`` * ``logs`` -Set the process using ``configure [proxies|resolvers|logs]=``, where ```` is an integer greater than 0, or -1 to reset the value to its default. +Set the process using ``configure [grv_proxies|commit_proxies|resolvers|logs]=``, where ```` is an integer greater than 0, or -1 to reset the value to its default. For recommendations on appropriate values for process types in large clusters, see :ref:`guidelines-process-class-config`. @@ -357,7 +358,7 @@ setclass The ``setclass`` command can be used to change the :ref:`process class ` for a given process. Its syntax is ``setclass [
]``. If no arguments are specified, then the process classes of all processes are listed. Setting the class to ``default`` to revert to the process class specified on the command line. -The available process classes are ``unset``, ``storage``, ``transaction``, ``resolution``, ``proxy``, ``master``, ``test``, ``unset``, ``stateless``, ``log``, ``router``, ``cluster_controller``, ``fast_restore``, ``data_distributor``, ``coordinator``, ``ratekeeper``, ``storage_cache``, ``backup``, and ``default``. +The available process classes are ``unset``, ``storage``, ``transaction``, ``resolution``, ``grv_proxy``, ``commit_proxy``, ``master``, ``test``, ``unset``, ``stateless``, ``log``, ``router``, ``cluster_controller``, ``fast_restore``, ``data_distributor``, ``coordinator``, ``ratekeeper``, ``storage_cache``, ``backup``, and ``default``. sleep ----- diff --git a/documentation/sphinx/source/configuration.rst b/documentation/sphinx/source/configuration.rst index 0c0cb54332..14ada3a126 100644 --- a/documentation/sphinx/source/configuration.rst +++ b/documentation/sphinx/source/configuration.rst @@ -777,16 +777,17 @@ The 6.2 release still has a number of rough edges related to region configuratio Guidelines for setting process class ==================================== -In a FoundationDB cluster, each of the ``fdbserver`` processes perform different tasks. Each process is recruited to do a particular task based on its process ``class``. For example, processes with ``class=storage`` are given preference to be recruited for doing storage server tasks, ``class=transaction`` are for log server processes and ``class=stateless`` are for stateless processes like proxies, resolvers, etc., +In a FoundationDB cluster, each of the ``fdbserver`` processes perform different tasks. Each process is recruited to do a particular task based on its process ``class``. For example, processes with ``class=storage`` are given preference to be recruited for doing storage server tasks, ``class=transaction`` are for log server processes and ``class=stateless`` are for stateless processes like commit proxies, resolvers, etc., -The recommended minimum number of ``class=transaction`` (log server) processes is 8 (active) + 2 (standby) and the recommended minimum number for ``class=stateless`` processes is 4 (proxy) + 1 (resolver) + 1 (cluster controller) + 1 (master) + 2 (standby). It is better to spread the transaction and stateless processes across as many machines as possible. +The recommended minimum number of ``class=transaction`` (log server) processes is 8 (active) + 2 (standby) and the recommended minimum number for ``class=stateless`` processes is 1 (GRV proxy) + 3 (commit proxy) + 1 (resolver) + 1 (cluster controller) + 1 (master) + 2 (standby). It is better to spread the transaction and stateless processes across as many machines as possible. ``fdbcli`` is used to set the desired number of processes of a particular process type. To do so, you would issue the ``fdbcli`` commands:: - fdb> configure proxies=5 + fdb> configure grv_proxies=1 + fdb> configure grv_proxies=4 fdb> configure logs=8 -.. note:: In the present release, the default value for proxies and log servers is 3 and for resolvers is 1. You should not set the value of a process type to less than its default. +.. note:: In the present release, the default value for commit proxies and log servers is 3 and for GRV proxies and resolvers is 1. You should not set the value of a process type to less than its default. .. warning:: The conflict-resolution algorithm used by FoundationDB is conservative: it guarantees that no conflicting transactions will be committed, but it may fail to commit some transactions that theoretically could have been. The effects of this conservatism may increase as you increase the number of resolvers. It is therefore important to employ the recommended techniques for :ref:`minimizing conflicts ` when increasing the number of resolvers. diff --git a/documentation/sphinx/source/developer-guide.rst b/documentation/sphinx/source/developer-guide.rst index 95e7d3986a..5ec0983166 100644 --- a/documentation/sphinx/source/developer-guide.rst +++ b/documentation/sphinx/source/developer-guide.rst @@ -838,7 +838,7 @@ Caveats #. ``\xff\xff/transaction/read_conflict_range/`` The conflict range for a read is sometimes not known until that read completes (e.g. range reads with limits, key selectors). When you read from these special keys, the returned future first blocks until all pending reads are complete so it can give an accurate response. #. ``\xff\xff/transaction/write_conflict_range/`` The conflict range range for a ``set_versionstamped_key`` atomic op is not known until commit time. You'll get an approximate range (the actual range will be a subset of the approximate range) until the precise range is known. -#. ``\xff\xff/transaction/conflicting_keys/`` Since using this feature costs server (i.e., proxy and resolver) resources, it's disabled by default. You must opt in by setting the ``report_conflicting_keys`` transaction option. +#. ``\xff\xff/transaction/conflicting_keys/`` Since using this feature costs server (i.e., commit proxy and resolver) resources, it's disabled by default. You must opt in by setting the ``report_conflicting_keys`` transaction option. Metrics module -------------- @@ -1059,22 +1059,21 @@ How Versions are Generated and Assigned Versions are generated by the process that runs the *master* role. FoundationDB guarantees that no version will be generated twice and that the versions are monotonically increasing. -In order to assign read and commit versions to transactions, a client will never talk to the master. Instead it will get both from a proxy. Getting a read version is more complex than a commit version. Let's first look at commit versions: +In order to assign read and commit versions to transactions, a client will never talk to the master. Instead it will get them from a GRV proxy and a commit proxy. Getting a read version is more complex than a commit version. Let's first look at commit versions: -#. The client will send a commit message to a proxy. -#. The proxy will put this commit message in a queue in order to build a batch. -#. In parallel, the proxy will ask for a new version from the master (note that this means that only proxies will ever ask for new versions - which scales much better as it puts less stress on the network). -#. The proxy will then resolve all transactions within that batch (discussed later) and assign the version it got from the master to *all* transactions within that batch. It will then write the transactions to the transaction log system to make it durable. +#. The client will send a commit message to a commit proxy. +#. The commit proxy will put this commit message in a queue in order to build a batch. +#. In parallel, the commit proxy will ask for a new version from the master (note that this means that only commit proxies will ever ask for new versions - which scales much better as it puts less stress on the network). +#. The commit proxy will then resolve all transactions within that batch (discussed later) and assign the version it got from the master to *all* transactions within that batch. It will then write the transactions to the transaction log system to make it durable. #. If the transaction succeeded, it will send back the version as commit version to the client. Otherwise it will send back an error. -As mentioned before, the algorithm to assign read versions is a bit more complex. At the start of a transaction, a client will ask a proxy server for a read version. The proxy will reply with the last committed version as of the time it received the request - this is important to guarantee external consistency. This is how this is achieved: +As mentioned before, the algorithm to assign read versions is a bit more complex. At the start of a transaction, a client will ask a GRV proxy server for a read version. The GRV proxy will reply with the last committed version as of the time it received the request - this is important to guarantee external consistency. This is how this is achieved: -#. The client will send a GRV (get read version) request to a proxy. -#. The proxy will batch GRV requests for a short amount of time (it depends on load and configuartion how big these batches will be). +#. The client will send a GRV (get read version) request to a GRV proxy. +#. The GRV proxy will batch GRV requests for a short amount of time (it depends on load and configuartion how big these batches will be). #. The proxy will do the following steps in parallel: - * Ask all other proxies for their most recent committed version (the largest version they received from the master for which it successfully wrote the transactions to the transaction log system). - * Send a message to the transaction log system to verify that it is still writable. This is to prevent that we fetch read versions from a proxy that has been declared to be dead. -#. It will then take the largest committed version from all proxies (including its own) and send it back to the clients. + * Ask master for their most recent committed version (the largest version of proxies' committed version for which the transactions are successfully written to the transaction log system). + * Send a message to the transaction log system to verify that it is still writable. This is to prevent that we fetch read versions from a GRV proxy that has been declared to be dead. Checking whether the log-system is still writeable can be especially expensive if a clusters runs in a multi-region configuration. If a user is fine to sacrifice strict serializability they can use :ref:`option-causal-read-risky `. @@ -1148,8 +1147,8 @@ The ``commit_unknown_result`` Error ``commit_unknown_result`` can be thrown during a commit. This error is difficult to handle as you won't know whether your transaction was committed or not. There are mostly two reasons why you might see this error: -#. The client lost the connection to the proxy to which it did send the commit. So it never got a reply and therefore can't know whether the commit was successful or not. -#. There was a FoundationDB failure - for example a proxy failed during the commit. In that case there is no way for the client know whether the transaction succeeded or not. +#. The client lost the connection to the commit proxy to which it did send the commit. So it never got a reply and therefore can't know whether the commit was successful or not. +#. There was a FoundationDB failure - for example a commit proxy failed during the commit. In that case there is no way for the client know whether the transaction succeeded or not. However, there is one guarantee FoundationDB gives to the caller: at the point of time where you receive this error, the transaction either committed or not and if it didn't commit, it will never commit in the future. Or: it is guaranteed that the transaction is not in-flight anymore. This is an important guarantee as it means that if your transaction is idempotent you can simply retry. For more explanations see developer-guide-unknown-results_. diff --git a/documentation/sphinx/source/disk-snapshot-backup.rst b/documentation/sphinx/source/disk-snapshot-backup.rst index e5eccd8051..33b97b8c09 100644 --- a/documentation/sphinx/source/disk-snapshot-backup.rst +++ b/documentation/sphinx/source/disk-snapshot-backup.rst @@ -104,7 +104,7 @@ Field Name Description ``Name for the snapshot file`` recommended name for the disk snapshot cluster-name:ip-addr:port:UID ================================ ======================================================== ======================================================== -``snapshot create binary`` will not be invoked on processes which does not have any persistent data (for example, Cluster Controller or Master or MasterProxy). Since these processes are stateless, there is no need for a snapshot. Any specialized configuration knobs used for one of these stateless processes need to be copied and restored externally. +``snapshot create binary`` will not be invoked on processes which does not have any persistent data (for example, Cluster Controller or Master or CommitProxy). Since these processes are stateless, there is no need for a snapshot. Any specialized configuration knobs used for one of these stateless processes need to be copied and restored externally. Management of disk snapshots ---------------------------- diff --git a/documentation/sphinx/source/downloads.rst b/documentation/sphinx/source/downloads.rst index 1aae48d2eb..f4a153086d 100644 --- a/documentation/sphinx/source/downloads.rst +++ b/documentation/sphinx/source/downloads.rst @@ -10,38 +10,38 @@ macOS The macOS installation package is supported on macOS 10.7+. It includes the client and (optionally) the server. -* `FoundationDB-6.3.5.pkg `_ +* `FoundationDB-6.3.8.pkg `_ Ubuntu ------ The Ubuntu packages are supported on 64-bit Ubuntu 12.04+, but beware of the Linux kernel bug in Ubuntu 12.x. -* `foundationdb-clients-6.3.5-1_amd64.deb `_ -* `foundationdb-server-6.3.5-1_amd64.deb `_ (depends on the clients package) +* `foundationdb-clients-6.3.8-1_amd64.deb `_ +* `foundationdb-server-6.3.8-1_amd64.deb `_ (depends on the clients package) RHEL/CentOS EL6 --------------- The RHEL/CentOS EL6 packages are supported on 64-bit RHEL/CentOS 6.x. -* `foundationdb-clients-6.3.5-1.el6.x86_64.rpm `_ -* `foundationdb-server-6.3.5-1.el6.x86_64.rpm `_ (depends on the clients package) +* `foundationdb-clients-6.3.8-1.el6.x86_64.rpm `_ +* `foundationdb-server-6.3.8-1.el6.x86_64.rpm `_ (depends on the clients package) RHEL/CentOS EL7 --------------- The RHEL/CentOS EL7 packages are supported on 64-bit RHEL/CentOS 7.x. -* `foundationdb-clients-6.3.5-1.el7.x86_64.rpm `_ -* `foundationdb-server-6.3.5-1.el7.x86_64.rpm `_ (depends on the clients package) +* `foundationdb-clients-6.3.8-1.el7.x86_64.rpm `_ +* `foundationdb-server-6.3.8-1.el7.x86_64.rpm `_ (depends on the clients package) Windows ------- The Windows installer is supported on 64-bit Windows XP and later. It includes the client and (optionally) the server. -* `foundationdb-6.3.5-x64.msi `_ +* `foundationdb-6.3.8-x64.msi `_ API Language Bindings ===================== @@ -58,18 +58,18 @@ On macOS and Windows, the FoundationDB Python API bindings are installed as part If you need to use the FoundationDB Python API from other Python installations or paths, use the Python package manager ``pip`` (``pip install foundationdb``) or download the Python package: -* `foundationdb-6.3.5.tar.gz `_ +* `foundationdb-6.3.8.tar.gz `_ Ruby 1.9.3/2.0.0+ ----------------- -* `fdb-6.3.5.gem `_ +* `fdb-6.3.8.gem `_ Java 8+ ------- -* `fdb-java-6.3.5.jar `_ -* `fdb-java-6.3.5-javadoc.jar `_ +* `fdb-java-6.3.8.jar `_ +* `fdb-java-6.3.8-javadoc.jar `_ Go 1.11+ -------- diff --git a/documentation/sphinx/source/kv-architecture.rst b/documentation/sphinx/source/kv-architecture.rst index 6375072d5f..a30b4b1941 100644 --- a/documentation/sphinx/source/kv-architecture.rst +++ b/documentation/sphinx/source/kv-architecture.rst @@ -5,7 +5,7 @@ FoundationDB Architecture Coordinators ============ -All clients and servers connect to a FoundationDB cluster with a cluster file, which contains the IP:PORT of the coordinators. Both the clients and servers use the coordinators to connect with the cluster controller. The servers will attempt to become the cluster controller if one does not exist, and register with the cluster controller once one has been elected. Clients use the cluster controller to keep an up-to-date list of proxies. +All clients and servers connect to a FoundationDB cluster with a cluster file, which contains the IP:PORT of the coordinators. Both the clients and servers use the coordinators to connect with the cluster controller. The servers will attempt to become the cluster controller if one does not exist, and register with the cluster controller once one has been elected. Clients use the cluster controller to keep an up-to-date list of GRV proxies and commit proxies. Cluster Controller ================== @@ -15,12 +15,12 @@ The cluster controller is a singleton elected by a majority of coordinators. It Master ====== -The master is responsible for coordinating the transition of the write sub-system from one generation to the next. The write sub-system includes the master, proxies, resolvers, and transaction logs. The three roles are treated as a unit, and if any of them fail, we will recruit a replacement for all three roles. The master provides the commit versions for batches of the mutations to the proxies, runs data distribution algorithm, and runs ratekeeper. +The master is responsible for coordinating the transition of the write sub-system from one generation to the next. The write sub-system includes the master, GRV proxies, commit proxies, resolvers, and transaction logs. The three roles are treated as a unit, and if any of them fail, we will recruit a replacement for all three roles. The master keeps commit proxies' committed version, provides read version for GRV proxies, provides the commit versions for batches of the mutations to the commit proxies, runs data distribution algorithm, and runs ratekeeper. -Proxies -======= +GRV Proxies and Commit Proxies +============================== -The proxies are responsible for providing read versions, committing transactions, and tracking the storage servers responsible for each range of keys. To provide a read version, a proxy will ask all other proxies to see the largest committed version at this point in time, while simultaneously checking that the transaction logs have not been stopped. Ratekeeper will artificially slow down the rate at which the proxy provides read versions. +The GRV proxies are responsible for providing read versions. The commit proxies are responsible for committing transactions, and tracking the storage servers responsible for each range of keys. To provide a read version, a GRV proxy will ask master the largest committed version at this point in time, while simultaneously checking that the transaction logs have not been stopped. Ratekeeper will artificially slow down the rate at which the GRV proxy provides read versions. Commits are accomplished by: @@ -33,7 +33,7 @@ The key space starting with the '\xff' byte is reserved for system metadata. All Transaction Logs ================ -The transaction logs make mutations durable to disk for fast commit latencies. The logs receive commits from the proxy in version order, and only respond to the proxy once the data has been written and fsync'ed to an append only mutation log on disk. Before the data is even written to disk we forward it to the storage servers responsible for that mutation. Once the storage servers have made the mutation durable, they pop it from the log. This generally happens roughly 6 seconds after the mutation was originally committed to the log. We only read from the log's disk when the process has been rebooted. If a storage server has failed, mutations bound for that storage server will build up on the logs. Once data distribution makes a different storage server responsible for all of the missing storage server's data we will discard the log data bound for the failed server. +The transaction logs make mutations durable to disk for fast commit latencies. The logs receive commits from the commit proxy in version order, and only respond to the commit proxy once the data has been written and fsync'ed to an append only mutation log on disk. Before the data is even written to disk we forward it to the storage servers responsible for that mutation. Once the storage servers have made the mutation durable, they pop it from the log. This generally happens roughly 6 seconds after the mutation was originally committed to the log. We only read from the log's disk when the process has been rebooted. If a storage server has failed, mutations bound for that storage server will build up on the logs. Once data distribution makes a different storage server responsible for all of the missing storage server's data we will discard the log data bound for the failed server. Resolvers ========= @@ -48,4 +48,4 @@ The vast majority of processes in a cluster are storage servers. Storage servers Clients ======= -Clients must get a read version at the start of every transaction. During the transaction all of the reads are done at that read version, and write are kept in memory until transaction is committed. When the transaction is committed, all of the reads and writes are sent to the proxy. If the transaction conflicts with another transaction the client is responsible for retrying the transaction. By default, reading a key that was written in the same transaction will return the newly written value. +Clients must get a read version at the start of every transaction. During the transaction all of the reads are done at that read version, and write are kept in memory until transaction is committed. When the transaction is committed, all of the reads and writes are sent to the commit proxy. If the transaction conflicts with another transaction the client is responsible for retrying the transaction. By default, reading a key that was written in the same transaction will return the newly written value. diff --git a/documentation/sphinx/source/mr-status-json-schemas.rst.inc b/documentation/sphinx/source/mr-status-json-schemas.rst.inc index 0f4b6a9aa9..d7af4a0885 100644 --- a/documentation/sphinx/source/mr-status-json-schemas.rst.inc +++ b/documentation/sphinx/source/mr-status-json-schemas.rst.inc @@ -27,7 +27,7 @@ "storage", "transaction", "resolution", - "proxy", + "commit_proxy", "grv_proxy", "master", "test", @@ -61,7 +61,7 @@ "role":{ "$enum":[ "master", - "proxy", + "commit_proxy", "grv_proxy", "log", "storage", @@ -447,7 +447,7 @@ ], "recovery_state":{ "required_resolvers":1, - "required_proxies":1, + "required_commit_proxies":1, "required_grv_proxies":1, "name":{ // "fully_recovered" is the healthy state; other states are normal to transition through but not to persist in "$enum":[ @@ -633,11 +633,11 @@ "address":"10.0.4.1" } ], - "auto_proxies":3, + "auto_commit_proxies":3, "auto_resolvers":1, "auto_logs":3, "backup_worker_enabled":1, - "proxies":5 // this field will be absent if a value has not been explicitly set + "commit_proxies":5 // this field will be absent if a value has not been explicitly set }, "data":{ "least_operating_space_bytes_log_server":0, diff --git a/documentation/sphinx/source/release-notes/release-notes-620.rst b/documentation/sphinx/source/release-notes/release-notes-620.rst index 32b9d11e6f..e86de57f6c 100644 --- a/documentation/sphinx/source/release-notes/release-notes-620.rst +++ b/documentation/sphinx/source/release-notes/release-notes-620.rst @@ -2,6 +2,10 @@ Release Notes ############# +6.2.26 +====== +* Attempt to detect when calling :func:`fdb_future_block_until_ready` would cause a deadlock, and throw ``blocked_from_network_thread`` if it would definitely cause a deadlock. + 6.2.25 ====== diff --git a/documentation/sphinx/source/release-notes/release-notes-630.rst b/documentation/sphinx/source/release-notes/release-notes-630.rst index bcbddc91d4..3572fb45a4 100644 --- a/documentation/sphinx/source/release-notes/release-notes-630.rst +++ b/documentation/sphinx/source/release-notes/release-notes-630.rst @@ -2,7 +2,7 @@ Release Notes ############# -6.3.5 +6.3.8 ===== Features @@ -108,6 +108,10 @@ Other Changes * Updated boost to 1.72. `(PR #2684) `_ * Calling ``fdb_run_network`` multiple times in a single run of a client program now returns an error instead of causing undefined behavior. [6.3.1] `(PR #3229) `_ * Blob backup URL parameter ``request_timeout`` changed to ``request_timeout_min``, with prior name still supported. `(PR #3533) `_ +* Support query command in backup CLI that allows users to query restorable files by key ranges. [6.3.6] `(PR #3703) `_ +* Report missing old tlogs information when in recovery before storage servers are fully recovered. [6.3.6] `(PR #3706) `_ +* Updated OpenSSL to version 1.1.1h. [6.3.7] `(PR #3809) `_ +* Lowered the amount of time a watch will remain registered on a storage server from 900 seconds to 30 seconds. [6.3.8] `(PR #3833) `_ Fixes from previous versions ---------------------------- @@ -124,6 +128,8 @@ Fixes only impacting 6.3.0+ * Refreshing TLS certificates could cause crashes. [6.3.2] `(PR #3352) `_ * All storage class processes attempted to connect to the same coordinator. [6.3.2] `(PR #3361) `_ * Adjusted the proxy load balancing algorithm to be based on the CPU usage of the process instead of the number of requests processed. [6.3.5] `(PR #3653) `_ +* Only return the error code ``batch_transaction_throttled`` for API versions greater than or equal to 630. [6.3.6] `(PR #3799) `_ +* The fault tolerance calculation in status did not take into account region configurations. [6.3.8] `(PR #3836) `_ Earlier release notes --------------------- diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp index faef500a83..eb64bcbe3a 100644 --- a/fdbbackup/backup.actor.cpp +++ b/fdbbackup/backup.actor.cpp @@ -18,6 +18,10 @@ * limitations under the License. */ +#include "fdbclient/JsonBuilder.h" +#include "flow/Arena.h" +#include "flow/Error.h" +#include "flow/Trace.h" #define BOOST_DATE_TIME_NO_LIB #include @@ -81,7 +85,22 @@ enum enumProgramExe { }; enum enumBackupType { - BACKUP_UNDEFINED=0, BACKUP_START, BACKUP_MODIFY, BACKUP_STATUS, BACKUP_ABORT, BACKUP_WAIT, BACKUP_DISCONTINUE, BACKUP_PAUSE, BACKUP_RESUME, BACKUP_EXPIRE, BACKUP_DELETE, BACKUP_DESCRIBE, BACKUP_LIST, BACKUP_DUMP, BACKUP_CLEANUP + BACKUP_UNDEFINED = 0, + BACKUP_START, + BACKUP_MODIFY, + BACKUP_STATUS, + BACKUP_ABORT, + BACKUP_WAIT, + BACKUP_DISCONTINUE, + BACKUP_PAUSE, + BACKUP_RESUME, + BACKUP_EXPIRE, + BACKUP_DELETE, + BACKUP_DESCRIBE, + BACKUP_LIST, + BACKUP_QUERY, + BACKUP_DUMP, + BACKUP_CLEANUP }; enum enumDBType { @@ -96,29 +115,68 @@ enum enumRestoreType { // enum { // Backup constants - OPT_DESTCONTAINER, OPT_SNAPSHOTINTERVAL, OPT_ERRORLIMIT, OPT_NOSTOPWHENDONE, - OPT_EXPIRE_BEFORE_VERSION, OPT_EXPIRE_BEFORE_DATETIME, OPT_EXPIRE_DELETE_BEFORE_DAYS, - OPT_EXPIRE_RESTORABLE_AFTER_VERSION, OPT_EXPIRE_RESTORABLE_AFTER_DATETIME, OPT_EXPIRE_MIN_RESTORABLE_DAYS, - OPT_BASEURL, OPT_BLOB_CREDENTIALS, OPT_DESCRIBE_DEEP, OPT_DESCRIBE_TIMESTAMPS, - OPT_DUMP_BEGIN, OPT_DUMP_END, OPT_JSON, OPT_DELETE_DATA, OPT_MIN_CLEANUP_SECONDS, + OPT_DESTCONTAINER, + OPT_SNAPSHOTINTERVAL, + OPT_ERRORLIMIT, + OPT_NOSTOPWHENDONE, + OPT_EXPIRE_BEFORE_VERSION, + OPT_EXPIRE_BEFORE_DATETIME, + OPT_EXPIRE_DELETE_BEFORE_DAYS, + OPT_EXPIRE_RESTORABLE_AFTER_VERSION, + OPT_EXPIRE_RESTORABLE_AFTER_DATETIME, + OPT_EXPIRE_MIN_RESTORABLE_DAYS, + OPT_BASEURL, + OPT_BLOB_CREDENTIALS, + OPT_DESCRIBE_DEEP, + OPT_DESCRIBE_TIMESTAMPS, + OPT_DUMP_BEGIN, + OPT_DUMP_END, + OPT_JSON, + OPT_DELETE_DATA, + OPT_MIN_CLEANUP_SECONDS, OPT_USE_PARTITIONED_LOG, // Backup and Restore constants - OPT_TAGNAME, OPT_BACKUPKEYS, OPT_WAITFORDONE, + OPT_TAGNAME, + OPT_BACKUPKEYS, + OPT_WAITFORDONE, + OPT_BACKUPKEYS_FILTER, + OPT_INCREMENTALONLY, // Backup Modify - OPT_MOD_ACTIVE_INTERVAL, OPT_MOD_VERIFY_UID, + OPT_MOD_ACTIVE_INTERVAL, + OPT_MOD_VERIFY_UID, // Restore constants - OPT_RESTORECONTAINER, OPT_RESTORE_VERSION, OPT_RESTORE_TIMESTAMP, OPT_PREFIX_ADD, OPT_PREFIX_REMOVE, OPT_RESTORE_CLUSTERFILE_DEST, OPT_RESTORE_CLUSTERFILE_ORIG, + OPT_RESTORECONTAINER, + OPT_RESTORE_VERSION, + OPT_RESTORE_TIMESTAMP, + OPT_PREFIX_ADD, + OPT_PREFIX_REMOVE, + OPT_RESTORE_CLUSTERFILE_DEST, + OPT_RESTORE_CLUSTERFILE_ORIG, + OPT_RESTORE_BEGIN_VERSION, // Shared constants - OPT_CLUSTERFILE, OPT_QUIET, OPT_DRYRUN, OPT_FORCE, - OPT_HELP, OPT_DEVHELP, OPT_VERSION, OPT_PARENTPID, OPT_CRASHONERROR, - OPT_NOBUFSTDOUT, OPT_BUFSTDOUTERR, OPT_TRACE, OPT_TRACE_DIR, - OPT_KNOB, OPT_TRACE_LOG_GROUP, OPT_MEMLIMIT, OPT_LOCALITY, + OPT_CLUSTERFILE, + OPT_QUIET, + OPT_DRYRUN, + OPT_FORCE, + OPT_HELP, + OPT_DEVHELP, + OPT_VERSION, + OPT_PARENTPID, + OPT_CRASHONERROR, + OPT_NOBUFSTDOUT, + OPT_BUFSTDOUTERR, + OPT_TRACE, + OPT_TRACE_DIR, + OPT_KNOB, + OPT_TRACE_LOG_GROUP, + OPT_MEMLIMIT, + OPT_LOCALITY, - //DB constants + // DB constants OPT_SOURCE_CLUSTER, OPT_DEST_CLUSTER, OPT_CLEANUP, @@ -154,7 +212,7 @@ CSimpleOpt::SOption g_rgAgentOptions[] = { #ifndef TLS_DISABLED TLS_OPTION_FLAGS #endif - SO_END_OF_OPTIONS + SO_END_OF_OPTIONS }; CSimpleOpt::SOption g_rgBackupStartOptions[] = { @@ -197,6 +255,7 @@ CSimpleOpt::SOption g_rgBackupStartOptions[] = { { OPT_DEVHELP, "--dev-help", SO_NONE }, { OPT_KNOB, "--knob_", SO_REQ_SEP }, { OPT_BLOB_CREDENTIALS, "--blob_credentials", SO_REQ_SEP }, + { OPT_INCREMENTALONLY, "--incremental", SO_NONE }, #ifndef TLS_DISABLED TLS_OPTION_FLAGS #endif @@ -585,6 +644,40 @@ CSimpleOpt::SOption g_rgBackupListOptions[] = { SO_END_OF_OPTIONS }; +CSimpleOpt::SOption g_rgBackupQueryOptions[] = { +#ifdef _WIN32 + { OPT_PARENTPID, "--parentpid", SO_REQ_SEP }, +#endif + { OPT_RESTORE_TIMESTAMP, "--query_restore_timestamp", SO_REQ_SEP }, + { OPT_DESTCONTAINER, "-d", SO_REQ_SEP }, + { OPT_DESTCONTAINER, "--destcontainer", SO_REQ_SEP }, + { OPT_RESTORE_VERSION, "-qrv", SO_REQ_SEP }, + { OPT_RESTORE_VERSION, "--query_restore_version", SO_REQ_SEP }, + { OPT_BACKUPKEYS_FILTER, "-k", SO_REQ_SEP }, + { OPT_BACKUPKEYS_FILTER, "--keys", SO_REQ_SEP }, + { OPT_TRACE, "--log", SO_NONE }, + { OPT_TRACE_DIR, "--logdir", SO_REQ_SEP }, + { OPT_TRACE_FORMAT, "--trace_format", SO_REQ_SEP }, + { OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP }, + { OPT_QUIET, "-q", SO_NONE }, + { OPT_QUIET, "--quiet", SO_NONE }, + { OPT_VERSION, "-v", SO_NONE }, + { OPT_VERSION, "--version", SO_NONE }, + { OPT_CRASHONERROR, "--crash", SO_NONE }, + { OPT_MEMLIMIT, "-m", SO_REQ_SEP }, + { OPT_MEMLIMIT, "--memory", SO_REQ_SEP }, + { OPT_HELP, "-?", SO_NONE }, + { OPT_HELP, "-h", SO_NONE }, + { OPT_HELP, "--help", SO_NONE }, + { OPT_DEVHELP, "--dev-help", SO_NONE }, + { OPT_BLOB_CREDENTIALS, "--blob_credentials", SO_REQ_SEP }, + { OPT_KNOB, "--knob_", SO_REQ_SEP }, +#ifndef TLS_DISABLED + TLS_OPTION_FLAGS +#endif + SO_END_OF_OPTIONS +}; + // g_rgRestoreOptions is used by fdbrestore and fastrestore_tool CSimpleOpt::SOption g_rgRestoreOptions[] = { #ifdef _WIN32 @@ -603,6 +696,7 @@ CSimpleOpt::SOption g_rgRestoreOptions[] = { { OPT_BACKUPKEYS, "--keys", SO_REQ_SEP }, { OPT_WAITFORDONE, "-w", SO_NONE }, { OPT_WAITFORDONE, "--waitfordone", SO_NONE }, + { OPT_RESTORE_BEGIN_VERSION, "--begin_version", SO_REQ_SEP }, { OPT_RESTORE_VERSION, "--version", SO_REQ_SEP }, { OPT_RESTORE_VERSION, "-v", SO_REQ_SEP }, { OPT_TRACE, "--log", SO_NONE }, @@ -622,6 +716,7 @@ CSimpleOpt::SOption g_rgRestoreOptions[] = { { OPT_HELP, "--help", SO_NONE }, { OPT_DEVHELP, "--dev-help", SO_NONE }, { OPT_BLOB_CREDENTIALS, "--blob_credentials", SO_REQ_SEP }, + { OPT_INCREMENTALONLY, "--incremental", SO_NONE }, #ifndef TLS_DISABLED TLS_OPTION_FLAGS #endif @@ -918,13 +1013,16 @@ void printBackupContainerInfo() { static void printBackupUsage(bool devhelp) { printf("FoundationDB " FDB_VT_PACKAGE_NAME " (v" FDB_VT_VERSION ")\n"); - printf("Usage: %s (start | status | abort | wait | discontinue | pause | resume | expire | delete | describe | list | cleanup) [OPTIONS]\n\n", exeBackup.toString().c_str()); + printf("Usage: %s (start | status | abort | wait | discontinue | pause | resume | expire | delete | describe | " + "list | query | cleanup) [OPTIONS]\n\n", + exeBackup.toString().c_str()); printf(" -C CONNFILE The path of a file containing the connection string for the\n" " FoundationDB cluster. The default is first the value of the\n" " FDB_CLUSTER_FILE environment variable, then `./fdb.cluster',\n" " then `%s'.\n", platform::getDefaultClusterFilePath().c_str()); printf(" -d, --destcontainer URL\n" - " The Backup container URL for start, modify, describe, expire, and delete operations.\n"); + " The Backup container URL for start, modify, describe, query, expire, and delete " + "operations.\n"); printBackupContainerInfo(); printf(" -b, --base_url BASEURL\n" " Base backup URL for list operations. This looks like a Backup URL but without a backup name.\n"); @@ -938,6 +1036,12 @@ static void printBackupUsage(bool devhelp) { printf(" --delete_before_days NUM_DAYS\n" " Another way to specify version cutoff for expire operations. Deletes data files containing no data at or after a\n" " version approximately NUM_DAYS days worth of versions prior to the latest log version in the backup.\n"); + printf(" -qrv --query_restore_version VERSION\n" + " For query operations, set target version for restoring a backup. Set -1 for maximum\n" + " restorable version (default) and -2 for minimum restorable version.\n"); + printf(" --query_restore_timestamp DATETIME\n" + " For query operations, instead of a numeric version, use this to specify a timestamp in %s\n", BackupAgentBase::timeFormat().c_str()); + printf(" and it will be converted to a version from that time using metadata in the cluster file.\n"); printf(" --restorable_after_timestamp DATETIME\n" " For expire operations, set minimum acceptable restorability to the version equivalent of DATETIME and later.\n"); printf(" --restorable_after_version VERSION\n" @@ -956,8 +1060,8 @@ static void printBackupUsage(bool devhelp) { " Specifies a UID to verify against the BackupUID of the running backup. If provided, the UID is verified in the same transaction\n" " which sets the new backup parameters (if the UID matches).\n"); printf(" -e ERRORLIMIT The maximum number of errors printed by status (default is 10).\n"); - printf(" -k KEYS List of key ranges to backup.\n" - " If not specified, the entire database will be backed up.\n"); + printf(" -k KEYS List of key ranges to backup or to filter the backup in query operations.\n" + " If not specified, the entire database will be backed up or no filter will be applied.\n"); printf(" --partitioned_log_experimental Starts with new type of backup system using partitioned logs.\n"); printf(" -n, --dryrun For backup start or restore start, performs a trial run with no actual changes made.\n"); printf(" --log Enables trace file logging for the CLI session.\n" @@ -975,6 +1079,9 @@ static void printBackupUsage(bool devhelp) { " remove mutations for it. By default this is set to one hour.\n"); printf(" --delete_data\n" " This flag will cause cleanup to remove mutations for the most stale backup or DR.\n"); + // TODO: Enable this command-line argument once atomics are supported + // printf(" --incremental\n" + // " Performs incremental backup without the base backup.\n"); #ifndef TLS_DISABLED printf(TLS_HELP); #endif @@ -1032,8 +1139,11 @@ static void printRestoreUsage(bool devhelp ) { printf(" --trace_format FORMAT\n" " Select the format of the trace files. xml (the default) and json are supported.\n" " Has no effect unless --log is specified.\n"); + // TODO: Enable this command-line argument once atomics are supported + // printf(" --incremental\n" + // " Performs incremental restore without the base backup.\n"); #ifndef TLS_DISABLED - printf(TLS_HELP); + printf(TLS_HELP); #endif printf(" -v DBVERSION The version at which the database will be restored.\n"); printf(" --timestamp Instead of a numeric version, use this to specify a timestamp in %s\n", BackupAgentBase::timeFormat().c_str()); @@ -1273,6 +1383,7 @@ enumBackupType getBackupType(std::string backupType) values["delete"] = BACKUP_DELETE; values["describe"] = BACKUP_DESCRIBE; values["list"] = BACKUP_LIST; + values["query"] = BACKUP_QUERY; values["dump"] = BACKUP_DUMP; values["modify"] = BACKUP_MODIFY; } @@ -1402,7 +1513,7 @@ ACTOR Future getLayerStatus(Reference tr for (KeyBackedTag eachTag : backupTags) { Version last_restorable_version = tagLastRestorableVersions[j].get(); double last_restorable_seconds_behind = ((double)readVer - last_restorable_version) / CLIENT_KNOBS->CORE_VERSIONSPERSECOND; - BackupAgentBase::enumState status = (BackupAgentBase::enumState)tagStates[j].get(); + EBackupState status = tagStates[j].get(); const char *statusText = fba.getStateText(status); // The object for this backup tag inside this instance's subdocument @@ -1411,8 +1522,9 @@ ACTOR Future getLayerStatus(Reference tr tagRoot.create("current_status") = statusText; tagRoot.create("last_restorable_version") = tagLastRestorableVersions[j].get(); tagRoot.create("last_restorable_seconds_behind") = last_restorable_seconds_behind; - tagRoot.create("running_backup") = (status == BackupAgentBase::STATE_RUNNING_DIFFERENTIAL || status == BackupAgentBase::STATE_RUNNING); - tagRoot.create("running_backup_is_restorable") = (status == BackupAgentBase::STATE_RUNNING_DIFFERENTIAL); + tagRoot.create("running_backup") = + (status == EBackupState::STATE_RUNNING_DIFFERENTIAL || status == EBackupState::STATE_RUNNING); + tagRoot.create("running_backup_is_restorable") = (status == EBackupState::STATE_RUNNING_DIFFERENTIAL); tagRoot.create("range_bytes_written") = tagRangeBytes[j].get(); tagRoot.create("mutation_log_bytes_written") = tagLogBytes[j].get(); tagRoot.create("mutation_stream_id") = backupTagUids[j].toString(); @@ -1427,7 +1539,7 @@ ACTOR Future getLayerStatus(Reference tr tr2->setOption(FDBTransactionOptions::LOCK_AWARE); state Standalone tagNames = wait(tr2->getRange(dba.tagNames.range(), 10000, snapshot)); state std::vector>> backupVersion; - state std::vector> backupStatus; + state std::vector> backupStatus; state std::vector> tagRangeBytesDR; state std::vector> tagLogBytesDR; state Future> fDRPaused = tr->get(dba.taskBucket->getPauseKey(), snapshot); @@ -1452,11 +1564,12 @@ ACTOR Future getLayerStatus(Reference tr for (int i = 0; i < tagNames.size(); i++) { std::string tagName = dba.sourceTagNames.unpack(tagNames[i].key).getString(0).toString(); - BackupAgentBase::enumState status = (BackupAgentBase::enumState)backupStatus[i].get(); + auto status = backupStatus[i].get(); JSONDoc tagRoot = tagsRoot.create(tagName); - tagRoot.create("running_backup") = (status == BackupAgentBase::STATE_RUNNING_DIFFERENTIAL || status == BackupAgentBase::STATE_RUNNING); - tagRoot.create("running_backup_is_restorable") = (status == BackupAgentBase::STATE_RUNNING_DIFFERENTIAL); + tagRoot.create("running_backup") = + (status == EBackupState::STATE_RUNNING_DIFFERENTIAL || status == EBackupState::STATE_RUNNING); + tagRoot.create("running_backup_is_restorable") = (status == EBackupState::STATE_RUNNING_DIFFERENTIAL); tagRoot.create("range_bytes_written") = tagRangeBytesDR[i].get(); tagRoot.create("mutation_log_bytes_written") = tagLogBytesDR[i].get(); tagRoot.create("mutation_stream_id") = drTagUids[i].toString(); @@ -1721,7 +1834,8 @@ ACTOR Future submitDBBackup(Database src, Database dest, Standalone submitBackup(Database db, std::string url, int snapshotIntervalSeconds, Standalone> backupRanges, std::string tagName, bool dryRun, - bool waitForCompletion, bool stopWhenDone, bool usePartitionedLog) { + bool waitForCompletion, bool stopWhenDone, bool usePartitionedLog, + bool incrementalBackupOnly) { try { state FileBackupAgent backupAgent; @@ -1739,7 +1853,7 @@ ACTOR Future submitBackup(Database db, std::string url, int snapshotInterv EBackupState backupStatus = wait(config.stateEnum().getOrThrow(db)); // Throw error if a backup is currently running until we support parallel backups - if (BackupAgentBase::isRunnable((BackupAgentBase::enumState)backupStatus)) { + if (BackupAgentBase::isRunnable(backupStatus)) { throw backup_duplicate(); } } @@ -1766,7 +1880,7 @@ ACTOR Future submitBackup(Database db, std::string url, int snapshotInterv else { wait(backupAgent.submitBackup(db, KeyRef(url), snapshotIntervalSeconds, tagName, backupRanges, stopWhenDone, - usePartitionedLog)); + usePartitionedLog, incrementalBackupOnly)); // Wait for the backup to complete, if requested if (waitForCompletion) { @@ -1964,10 +2078,10 @@ ACTOR Future waitBackup(Database db, std::string tagName, bool stopWhenDon { state FileBackupAgent backupAgent; - int status = wait(backupAgent.waitBackup(db, tagName, stopWhenDone)); + EBackupState status = wait(backupAgent.waitBackup(db, tagName, stopWhenDone)); printf("The backup on tag `%s' %s.\n", printable(StringRef(tagName)).c_str(), - BackupAgentBase::getStateText((BackupAgentBase::enumState) status)); + BackupAgentBase::getStateText(status)); } catch (Error& e) { if(e.code() == error_code_actor_cancelled) @@ -2077,7 +2191,10 @@ Reference openBackupContainer(const char *name, std::string de return c; } -ACTOR Future runRestore(Database db, std::string originalClusterFile, std::string tagName, std::string container, Standalone> ranges, Version targetVersion, std::string targetTimestamp, bool performRestore, bool verbose, bool waitForDone, std::string addPrefix, std::string removePrefix) { +ACTOR Future runRestore(Database db, std::string originalClusterFile, std::string tagName, std::string container, + Standalone> ranges, Version beginVersion, Version targetVersion, + std::string targetTimestamp, bool performRestore, bool verbose, bool waitForDone, + std::string addPrefix, std::string removePrefix, bool incrementalBackupOnly) { if(ranges.empty()) { ranges.push_back_deep(ranges.arena(), normalKeys); } @@ -2119,19 +2236,23 @@ ACTOR Future runRestore(Database db, std::string originalClusterFile, std: BackupDescription desc = wait(bc->describeBackup()); - if(!desc.maxRestorableVersion.present()) { + if (incrementalBackupOnly && desc.contiguousLogEnd.present()) { + targetVersion = desc.contiguousLogEnd.get() - 1; + } else if (desc.maxRestorableVersion.present()) { + targetVersion = desc.maxRestorableVersion.get(); + } else { fprintf(stderr, "The specified backup is not restorable to any version.\n"); throw restore_error(); } - targetVersion = desc.maxRestorableVersion.get(); - if(verbose) printf("Using target restore version %" PRId64 "\n", targetVersion); } if (performRestore) { - Version restoredVersion = wait(backupAgent.restore(db, origDb, KeyRef(tagName), KeyRef(container), ranges, waitForDone, targetVersion, verbose, KeyRef(addPrefix), KeyRef(removePrefix))); + Version restoredVersion = wait(backupAgent.restore( + db, origDb, KeyRef(tagName), KeyRef(container), ranges, waitForDone, targetVersion, verbose, + KeyRef(addPrefix), KeyRef(removePrefix), true, incrementalBackupOnly, beginVersion)); if(waitForDone && verbose) { // If restore is now complete then report version restored @@ -2401,6 +2522,135 @@ ACTOR Future describeBackup(const char *name, std::string destinationConta return Void(); } +static void reportBackupQueryError(UID operationId, JsonBuilderObject& result, std::string errorMessage) { + result["error"] = errorMessage; + printf("%s\n", result.getJson().c_str()); + TraceEvent("BackupQueryFailure").detail("OperationId", operationId).detail("Reason", errorMessage); +} + +// If restoreVersion is invalidVersion or latestVersion, use the maximum or minimum restorable version respectively for +// selected key ranges. If restoreTimestamp is specified, any specified restoreVersion will be overriden to the version +// resolved to that timestamp. +ACTOR Future queryBackup(const char* name, std::string destinationContainer, + Standalone> keyRangesFilter, Version restoreVersion, + std::string originalClusterFile, std::string restoreTimestamp, bool verbose) { + state UID operationId = deterministicRandom()->randomUniqueID(); + state JsonBuilderObject result; + state std::string errorMessage; + result["key_ranges_filter"] = printable(keyRangesFilter); + result["destination_container"] = destinationContainer; + + TraceEvent("BackupQueryStart") + .detail("OperationId", operationId) + .detail("DestinationContainer", destinationContainer) + .detail("KeyRangesFilter", printable(keyRangesFilter)) + .detail("SpecifiedRestoreVersion", restoreVersion) + .detail("RestoreTimestamp", restoreTimestamp) + .detail("BackupClusterFile", originalClusterFile); + + // Resolve restoreTimestamp if given + if (!restoreTimestamp.empty()) { + if (originalClusterFile.empty()) { + reportBackupQueryError( + operationId, result, + format("an original cluster file must be given in order to resolve restore target timestamp '%s'", + restoreTimestamp.c_str())); + return Void(); + } + + if (!fileExists(originalClusterFile)) { + reportBackupQueryError(operationId, result, + format("The specified original source database cluster file '%s' does not exist\n", + originalClusterFile.c_str())); + return Void(); + } + + Database origDb = Database::createDatabase(originalClusterFile, Database::API_VERSION_LATEST); + Version v = wait(timeKeeperVersionFromDatetime(restoreTimestamp, origDb)); + result["restore_timestamp"] = restoreTimestamp; + result["restore_timestamp_resolved_version"] = v; + restoreVersion = v; + } + + try { + state Reference bc = openBackupContainer(name, destinationContainer); + if (restoreVersion == invalidVersion) { + BackupDescription desc = wait(bc->describeBackup()); + if (desc.maxRestorableVersion.present()) { + restoreVersion = desc.maxRestorableVersion.get(); + // Use continuous log end version for the maximum restorable version for the key ranges. + } else if (keyRangesFilter.size() && desc.contiguousLogEnd.present()) { + restoreVersion = desc.contiguousLogEnd.get(); + } else { + reportBackupQueryError( + operationId, result, + errorMessage = format("the backup for the specified key ranges is not restorable to any version")); + } + } + + if (restoreVersion < 0 && restoreVersion != latestVersion) { + reportBackupQueryError(operationId, result, + errorMessage = + format("the specified restorable version %ld is not valid", restoreVersion)); + return Void(); + } + Optional fileSet = wait(bc->getRestoreSet(restoreVersion, keyRangesFilter)); + if (fileSet.present()) { + int64_t totalRangeFilesSize = 0, totalLogFilesSize = 0; + result["restore_version"] = fileSet.get().targetVersion; + JsonBuilderArray rangeFilesJson; + JsonBuilderArray logFilesJson; + for (const auto& rangeFile : fileSet.get().ranges) { + JsonBuilderObject object; + object["file_name"] = rangeFile.fileName; + object["file_size"] = rangeFile.fileSize; + object["version"] = rangeFile.version; + object["key_range"] = fileSet.get().keyRanges.count(rangeFile.fileName) == 0 + ? "none" + : fileSet.get().keyRanges.at(rangeFile.fileName).toString(); + rangeFilesJson.push_back(object); + totalRangeFilesSize += rangeFile.fileSize; + } + for (const auto& log : fileSet.get().logs) { + JsonBuilderObject object; + object["file_name"] = log.fileName; + object["file_size"] = log.fileSize; + object["begin_version"] = log.beginVersion; + object["end_version"] = log.endVersion; + logFilesJson.push_back(object); + totalLogFilesSize += log.fileSize; + } + + result["total_range_files_size"] = totalRangeFilesSize; + result["total_log_files_size"] = totalLogFilesSize; + + if (verbose) { + result["ranges"] = rangeFilesJson; + result["logs"] = logFilesJson; + } + + TraceEvent("BackupQueryReceivedRestorableFilesSet") + .detail("DestinationContainer", destinationContainer) + .detail("KeyRangesFilter", printable(keyRangesFilter)) + .detail("ActualRestoreVersion", fileSet.get().targetVersion) + .detail("NumRangeFiles", fileSet.get().ranges.size()) + .detail("NumLogFiles", fileSet.get().logs.size()) + .detail("RangeFilesBytes", totalRangeFilesSize) + .detail("LogFilesBytes", totalLogFilesSize); + } else { + reportBackupQueryError(operationId, result, "no restorable files set found for specified key ranges"); + return Void(); + } + + } catch (Error& e) { + reportBackupQueryError(operationId, result, e.what()); + return Void(); + } + + printf("%s\n", result.getJson().c_str()); + return Void(); +} + ACTOR Future listBackup(std::string baseUrl) { try { std::vector containers = wait(IBackupContainer::listContainers(baseUrl)); @@ -2770,6 +3020,9 @@ int main(int argc, char* argv[]) { case BACKUP_LIST: args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupListOptions, SO_O_EXACT); break; + case BACKUP_QUERY: + args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupQueryOptions, SO_O_EXACT); + break; case BACKUP_MODIFY: args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupModifyOptions, SO_O_EXACT); break; @@ -2909,12 +3162,15 @@ int main(int argc, char* argv[]) { std::string addPrefix; std::string removePrefix; Standalone> backupKeys; + Standalone> backupKeysFilter; int maxErrors = 20; + Version beginVersion = invalidVersion; Version restoreVersion = invalidVersion; std::string restoreTimestamp; bool waitForDone = false; bool stopWhenDone = true; bool usePartitionedLog = false; // Set to true to use new backup system + bool incrementalBackupOnly = false; bool forceAction = false; bool trace = false; bool quietDisplay = false; @@ -3129,6 +3385,15 @@ int main(int argc, char* argv[]) { return FDB_EXIT_ERROR; } break; + case OPT_BACKUPKEYS_FILTER: + try { + addKeyRange(args->OptionArg(), backupKeysFilter); + } + catch (Error &) { + printHelpTeaser(argv[0]); + return FDB_EXIT_ERROR; + } + break; case OPT_DESTCONTAINER: destinationContainer = args->OptionArg(); // If the url starts with '/' then prepend "file://" for backwards compatibility @@ -3167,6 +3432,10 @@ int main(int argc, char* argv[]) { case OPT_USE_PARTITIONED_LOG: usePartitionedLog = true; break; + case OPT_INCREMENTALONLY: + // TODO: Enable this command-line argument once atomics are supported + // incrementalBackupOnly = true; + break; case OPT_RESTORECONTAINER: restoreContainer = args->OptionArg(); // If the url starts with '/' then prepend "file://" for backwards compatibility @@ -3194,6 +3463,17 @@ int main(int argc, char* argv[]) { } break; } + case OPT_RESTORE_BEGIN_VERSION: { + const char* a = args->OptionArg(); + long long ver = 0; + if (!sscanf(a, "%lld", &ver)) { + fprintf(stderr, "ERROR: Could not parse database beginVersion `%s'\n", a); + printHelpTeaser(argv[0]); + return FDB_EXIT_ERROR; + } + beginVersion = ver; + break; + } case OPT_RESTORE_VERSION: { const char* a = args->OptionArg(); long long ver = 0; @@ -3567,7 +3847,8 @@ int main(int argc, char* argv[]) { // Test out the backup url to make sure it parses. Doesn't test to make sure it's actually writeable. openBackupContainer(argv[0], destinationContainer); f = stopAfter(submitBackup(db, destinationContainer, snapshotIntervalSeconds, backupKeys, tagName, - dryRun, waitForDone, stopWhenDone, usePartitionedLog)); + dryRun, waitForDone, stopWhenDone, usePartitionedLog, + incrementalBackupOnly)); break; } @@ -3652,6 +3933,12 @@ int main(int argc, char* argv[]) { f = stopAfter( listBackup(baseUrl) ); break; + case BACKUP_QUERY: + initTraceFile(); + f = stopAfter(queryBackup(argv[0], destinationContainer, backupKeysFilter, restoreVersion, + restoreClusterFileOrig, restoreTimestamp, !quietDisplay)); + break; + case BACKUP_DUMP: initTraceFile(); f = stopAfter( dumpBackupData(argv[0], destinationContainer, dumpBegin, dumpEnd) ); @@ -3697,7 +3984,9 @@ int main(int argc, char* argv[]) { switch(restoreType) { case RESTORE_START: - f = stopAfter( runRestore(db, restoreClusterFileOrig, tagName, restoreContainer, backupKeys, restoreVersion, restoreTimestamp, !dryRun, !quietDisplay, waitForDone, addPrefix, removePrefix) ); + f = stopAfter(runRestore(db, restoreClusterFileOrig, tagName, restoreContainer, backupKeys, + beginVersion, restoreVersion, restoreTimestamp, !dryRun, !quietDisplay, + waitForDone, addPrefix, removePrefix, incrementalBackupOnly)); break; case RESTORE_WAIT: f = stopAfter( success(ba.waitRestore(db, KeyRef(tagName), true)) ); diff --git a/fdbcli/FlowLineNoise.actor.cpp b/fdbcli/FlowLineNoise.actor.cpp index 6c101ca666..4aa38a47a9 100644 --- a/fdbcli/FlowLineNoise.actor.cpp +++ b/fdbcli/FlowLineNoise.actor.cpp @@ -117,7 +117,7 @@ LineNoise::LineNoise( Hint h = onMainThread( [line]() -> Future { return hint_callback(line); }).getBlocking(); - if (!h.valid) return NULL; + if (!h.valid) return nullptr; *color = h.color; *bold = h.bold; return strdup( h.text.c_str() ); diff --git a/fdbcli/fdbcli.actor.cpp b/fdbcli/fdbcli.actor.cpp index 6351219341..9d6d4d0824 100644 --- a/fdbcli/fdbcli.actor.cpp +++ b/fdbcli/fdbcli.actor.cpp @@ -20,6 +20,7 @@ #include "boost/lexical_cast.hpp" #include "fdbclient/NativeAPI.actor.h" +#include "fdbclient/FDBTypes.h" #include "fdbclient/Status.h" #include "fdbclient/StatusClient.h" #include "fdbclient/DatabaseContext.h" @@ -102,7 +103,7 @@ CSimpleOpt::SOption g_rgOptions[] = { { OPT_CONNFILE, "-C", SO_REQ_SEP }, void printAtCol(const char* text, int col) { const char* iter = text; const char* start = text; - const char* space = NULL; + const char* space = nullptr; do { iter++; @@ -112,7 +113,7 @@ void printAtCol(const char* text, int col) { printf("%.*s\n", (int)(space - start), start); start = space; if (*start == ' ' || *start == '\n') start++; - space = NULL; + space = nullptr; } } while (*iter); } @@ -120,7 +121,7 @@ void printAtCol(const char* text, int col) { std::string lineWrap(const char* text, int col) { const char* iter = text; const char* start = text; - const char* space = NULL; + const char* space = nullptr; std::string out = ""; do { iter++; @@ -130,7 +131,7 @@ std::string lineWrap(const char* text, int col) { out += format("%.*s\n", (int)(space - start), start); start = space; if (*start == ' '/* || *start == '\n'*/) start++; - space = NULL; + space = nullptr; } } while (*iter); return out; @@ -470,8 +471,8 @@ void initHelp() { "All keys between BEGINKEY (inclusive) and ENDKEY (exclusive) are cleared from the database. This command will succeed even if the specified range is empty, but may fail because of conflicts." ESCAPINGK); helpMap["configure"] = CommandHelp( "configure [new] " - "|grv_" - "proxies=|logs=|resolvers=>*", + "|" + "commit_proxies=|grv_proxies=|logs=|resolvers=>*", "change the database configuration", "The `new' option, if present, initializes a new database with the given configuration rather than changing " "the configuration of an existing one. When used, both a redundancy mode and a storage engine must be " @@ -479,13 +480,19 @@ void initHelp() { "of data (survive one failure).\n triple - three copies of data (survive two failures).\n three_data_hall - " "See the Admin Guide.\n three_datacenter - See the Admin Guide.\n\nStorage engine:\n ssd - B-Tree storage " "engine optimized for solid state disks.\n memory - Durable in-memory storage engine for small " - "datasets.\n\nproxies=: Sets the desired number of proxies in the cluster. Must be at least 1, or set " - "to -1 which restores the number of proxies to the default value.\n\ngrv_proxies=: Sets the " - "desired number of GRV proxies in the cluster. Must be at least 1, or set to -1 which restores the number of " - "proxies to the default value.\n\nlogs=: Sets the desired number of log servers in the cluster. Must be " - "at least 1, or set to -1 which restores the number of logs to the default value.\n\nresolvers=: " - "Sets the desired number of resolvers in the cluster. Must be at least 1, or set to -1 which restores the " - "number of resolvers to the default value.\n\nSee the FoundationDB Administration Guide for more information."); + "datasets.\n\nproxies=: Sets the desired number of proxies in the cluster. The proxy role is being " + "deprecated and split into GRV proxy and Commit proxy, now prefer configure 'grv_proxies' and 'commit_proxies' " + "separately. Generally we should follow that 'commit_proxies' is three times of 'grv_proxies' and 'grv_proxies' " + "should be not more than 4. If 'proxies' is specified, it will be converted to 'grv_proxies' and 'commit_proxies'. " + "Must be at least 2 (1 GRV proxy, 1 Commit proxy), or set to -1 which restores the number of proxies to the " + "default value.\n\ncommit_proxies=: Sets the desired number of commit proxies in the cluster. " + "Must be at least 1, or set to -1 which restores the number of commit proxies to the default " + "value.\n\ngrv_proxies=: Sets the desired number of GRV proxies in the cluster. Must be at least " + "1, or set to -1 which restores the number of GRV proxies to the default value.\n\nlogs=: Sets the " + "desired number of log servers in the cluster. Must be at least 1, or set to -1 which restores the number of " + "logs to the default value.\n\nresolvers=: Sets the desired number of resolvers in the cluster. " + "Must be at least 1, or set to -1 which restores the number of resolvers to the default value.\n\nSee the " + "FoundationDB Administration Guide for more information."); helpMap["fileconfigure"] = CommandHelp( "fileconfigure [new] ", "change the database configuration from a file", @@ -871,12 +878,13 @@ void printStatus(StatusObjectReader statusObj, StatusClient::StatusLevel level, fatalRecoveryState = true; if (name == "recruiting_transaction_servers") { - description += format("\nNeed at least %d log servers across unique zones, %d proxies, " - "%d GRV proxies and %d resolvers.", - recoveryState["required_logs"].get_int(), - recoveryState["required_proxies"].get_int(), - recoveryState["required_grv_proxies"].get_int(), - recoveryState["required_resolvers"].get_int()); + description += + format("\nNeed at least %d log servers across unique zones, %d commit proxies, " + "%d GRV proxies and %d resolvers.", + recoveryState["required_logs"].get_int(), + recoveryState["required_commit_proxies"].get_int(), + recoveryState["required_grv_proxies"].get_int(), + recoveryState["required_resolvers"].get_int()); if (statusObjCluster.has("machines") && statusObjCluster.has("processes")) { auto numOfNonExcludedProcessesAndZones = getNumOfNonExcludedProcessAndZones(statusObjCluster); description += format("\nHave %d non-excluded processes on %d machines across %d zones.", numOfNonExcludedProcessesAndZones.first, getNumofNonExcludedMachines(statusObjCluster), numOfNonExcludedProcessesAndZones.second); @@ -1026,8 +1034,8 @@ void printStatus(StatusObjectReader statusObj, StatusClient::StatusLevel level, outputString += format("\n Exclusions - %d (type `exclude' for details)", excludedServersArr.size()); } - if (statusObjConfig.get("proxies", intVal)) - outputString += format("\n Desired Proxies - %d", intVal); + if (statusObjConfig.get("commit_proxies", intVal)) + outputString += format("\n Desired Commit Proxies - %d", intVal); if (statusObjConfig.get("grv_proxies", intVal)) outputString += format("\n Desired GRV Proxies - %d", intVal); @@ -1055,10 +1063,10 @@ void printStatus(StatusObjectReader statusObj, StatusClient::StatusLevel level, if (statusObjConfig.has("regions")) { outputString += "\n Regions: "; regions = statusObjConfig["regions"].get_array(); - bool isPrimary = false; - std::vector regionSatelliteDCs; - std::string regionDC; for (StatusObjectReader region : regions) { + bool isPrimary = false; + std::vector regionSatelliteDCs; + std::string regionDC; for (StatusObjectReader dc : region["datacenters"].get_array()) { if (!dc.has("satellite")) { regionDC = dc["id"].get_str(); @@ -1233,14 +1241,54 @@ void printStatus(StatusObjectReader statusObj, StatusClient::StatusLevel level, int minLoss = std::min(availLoss, dataLoss); const char *faultDomain = machinesAreZones ? "machine" : "zone"; - if (minLoss == 1) - outputString += format("1 %s", faultDomain); - else - outputString += format("%d %ss", minLoss, faultDomain); + outputString += format("%d %ss", minLoss, faultDomain); if (dataLoss > availLoss){ outputString += format(" (%d without data loss)", dataLoss); } + + if (dataLoss == -1) { + ASSERT_WE_THINK(availLoss == -1); + outputString += format( + "\n\n Warning: the database may have data loss and availability loss. Please restart " + "following tlog interfaces, otherwise storage servers may never be able to catch " + "up.\n"); + StatusObjectReader logs; + if (statusObjCluster.has("logs")) { + for (StatusObjectReader logEpoch : statusObjCluster.last().get_array()) { + bool possiblyLosingData; + if (logEpoch.get("possibly_losing_data", possiblyLosingData) && + !possiblyLosingData) { + continue; + } + // Current epoch doesn't have an end version. + int64_t epoch, beginVersion, endVersion = invalidVersion; + bool current; + logEpoch.get("epoch", epoch); + logEpoch.get("begin_version", beginVersion); + logEpoch.get("end_version", endVersion); + logEpoch.get("current", current); + std::string missing_log_interfaces; + if (logEpoch.has("log_interfaces")) { + for (StatusObjectReader logInterface : logEpoch.last().get_array()) { + bool healthy; + std::string address, id; + if (logInterface.get("healthy", healthy) && !healthy) { + logInterface.get("id", id); + logInterface.get("address", address); + missing_log_interfaces += format("%s,%s ", id.c_str(), address.c_str()); + } + } + } + outputString += format( + " %s log epoch: %ld begin: %ld end: %s, missing " + "log interfaces(id,address): %s\n", + current ? "Current" : "Old", epoch, beginVersion, + endVersion == invalidVersion ? "(unknown)" : format("%ld", endVersion).c_str(), + missing_log_interfaces.c_str()); + } + } + } } } @@ -1764,7 +1812,7 @@ ACTOR Future commitTransaction( Reference tr ) } ACTOR Future configure( Database db, std::vector tokens, Reference ccf, LineNoise* linenoise, Future warn ) { - state ConfigurationResult::Type result; + state ConfigurationResult result; state int startToken = 1; state bool force = false; if (tokens.size() < 2) @@ -1790,14 +1838,14 @@ ACTOR Future configure( Database db, std::vector tokens, Refere bool noChanges = conf.get().old_replication == conf.get().auto_replication && conf.get().old_logs == conf.get().auto_logs && - conf.get().old_proxies == conf.get().auto_proxies && + conf.get().old_commit_proxies == conf.get().auto_commit_proxies && conf.get().old_grv_proxies == conf.get().auto_grv_proxies && conf.get().old_resolvers == conf.get().auto_resolvers && conf.get().old_processes_with_transaction == conf.get().auto_processes_with_transaction && conf.get().old_machines_with_transaction == conf.get().auto_machines_with_transaction; bool noDesiredChanges = noChanges && conf.get().old_logs == conf.get().desired_logs && - conf.get().old_proxies == conf.get().desired_proxies && + conf.get().old_commit_proxies == conf.get().desired_commit_proxies && conf.get().old_grv_proxies == conf.get().desired_grv_proxies && conf.get().old_resolvers == conf.get().desired_resolvers; @@ -1816,8 +1864,11 @@ ACTOR Future configure( Database db, std::vector tokens, Refere outputString += format("| replication | %16s | %16s |\n", conf.get().old_replication.c_str(), conf.get().auto_replication.c_str()); outputString += format("| logs | %16d | %16d |", conf.get().old_logs, conf.get().auto_logs); outputString += conf.get().auto_logs != conf.get().desired_logs ? format(" (manually set; would be %d)\n", conf.get().desired_logs) : "\n"; - outputString += format("| proxies | %16d | %16d |", conf.get().old_proxies, conf.get().auto_proxies); - outputString += conf.get().auto_proxies != conf.get().desired_proxies ? format(" (manually set; would be %d)\n", conf.get().desired_proxies) : "\n"; + outputString += format("| commit_proxies | %16d | %16d |", conf.get().old_commit_proxies, + conf.get().auto_commit_proxies); + outputString += conf.get().auto_commit_proxies != conf.get().desired_commit_proxies + ? format(" (manually set; would be %d)\n", conf.get().desired_commit_proxies) + : "\n"; outputString += format("| grv_proxies | %16d | %16d |", conf.get().old_grv_proxies, conf.get().auto_grv_proxies); outputString += conf.get().auto_grv_proxies != conf.get().desired_grv_proxies @@ -1842,7 +1893,8 @@ ACTOR Future configure( Database db, std::vector tokens, Refere } } - ConfigurationResult::Type r = wait( makeInterruptable( changeConfig( db, std::vector(tokens.begin()+startToken,tokens.end()), conf, force) ) ); + ConfigurationResult r = wait(makeInterruptable( + changeConfig(db, std::vector(tokens.begin() + startToken, tokens.end()), conf, force))); result = r; } @@ -1968,7 +2020,7 @@ ACTOR Future fileConfigure(Database db, std::string filePath, bool isNewDa return true; } } - ConfigurationResult::Type result = wait( makeInterruptable( changeConfig(db, configString, force) ) ); + ConfigurationResult result = wait(makeInterruptable(changeConfig(db, configString, force))); // Real errors get thrown from makeInterruptable and printed by the catch block in cli(), but // there are various results specific to changeConfig() that we need to report: bool ret; @@ -2099,7 +2151,7 @@ ACTOR Future coordinators( Database db, std::vector tokens, boo } if(setName.size()) change = nameQuorumChange( setName.toString(), change ); - CoordinatorsResult::Type r = wait( makeInterruptable( changeQuorum( db, change ) ) ); + CoordinatorsResult r = wait(makeInterruptable(changeQuorum(db, change))); // Real errors get thrown from makeInterruptable and printed by the catch block in cli(), but // there are various results specific to changeConfig() that we need to report: @@ -2472,7 +2524,7 @@ void compGenerator(const char* text, bool help, std::vector& lc) { std::map::const_iterator iter; int len = strlen(text); - const char* helpExtra[] = {"escaping", "options", NULL}; + const char* helpExtra[] = {"escaping", "options", nullptr}; const char** he = helpExtra; @@ -2531,11 +2583,24 @@ void onOffGenerator(const char* text, const char *line, std::vector } void configureGenerator(const char* text, const char *line, std::vector& lc) { - const char* opts[] = { - "new", "single", "double", "triple", "three_data_hall", "three_datacenter", "ssd", - "ssd-1", "ssd-2", "memory", "memory-1", "memory-2", "memory-radixtree-beta", "proxies=", - "grv_proxies=", "logs=", "resolvers=", nullptr - }; + const char* opts[] = { "new", + "single", + "double", + "triple", + "three_data_hall", + "three_datacenter", + "ssd", + "ssd-1", + "ssd-2", + "memory", + "memory-1", + "memory-2", + "memory-radixtree-beta", + "commit_proxies=", + "grv_proxies=", + "logs=", + "resolvers=", + nullptr }; arrayGenerator(text, line, opts, lc); } @@ -2973,7 +3038,7 @@ ACTOR Future cli(CLIOptions opt, LineNoise* plinenoise) { .detail("SourceVersion", getSourceVersion()) .detail("Version", FDB_VT_VERSION) .detail("PackageName", FDB_VT_PACKAGE_NAME) - .detailf("ActualTime", "%lld", DEBUG_DETERMINISM ? 0 : time(NULL)) + .detailf("ActualTime", "%lld", DEBUG_DETERMINISM ? 0 : time(nullptr)) .detail("ClusterFile", ccf->getFilename().c_str()) .detail("ConnectionString", ccf->getConnectionString().toString()) .setMaxFieldLength(10000) @@ -4548,7 +4613,7 @@ int main(int argc, char **argv) { sigemptyset( &act.sa_mask ); act.sa_flags = 0; act.sa_handler = SIG_IGN; - sigaction(SIGINT, &act, NULL); + sigaction(SIGINT, &act, nullptr); #endif CLIOptions opt(argc, argv); diff --git a/fdbclient/AsyncFileBlobStore.actor.h b/fdbclient/AsyncFileBlobStore.actor.h index 681c28ac6a..b070cd65d9 100644 --- a/fdbclient/AsyncFileBlobStore.actor.h +++ b/fdbclient/AsyncFileBlobStore.actor.h @@ -59,7 +59,7 @@ public: virtual void delref() { ReferenceCounted::delref(); } struct Part : ReferenceCounted { - Part(int n, int minSize) : number(n), writer(content.getWriteBuffer(minSize), NULL, Unversioned()), length(0) { + Part(int n, int minSize) : number(n), writer(content.getWriteBuffer(minSize), nullptr, Unversioned()), length(0) { etag = std::string(); ::MD5_Init(&content_md5_buf); } diff --git a/fdbclient/BackupAgent.actor.h b/fdbclient/BackupAgent.actor.h index ca8b43f337..4b27c9e8d9 100644 --- a/fdbclient/BackupAgent.actor.h +++ b/fdbclient/BackupAgent.actor.h @@ -46,13 +46,15 @@ public: return "YYYY/MM/DD.HH:MI:SS[+/-]HHMM"; } - // Type of program being executed - enum enumActionResult { - RESULT_SUCCESSFUL = 0, RESULT_ERRORED = 1, RESULT_DUPLICATE = 2, RESULT_UNNEEDED = 3 - }; - - enum enumState { - STATE_ERRORED = 0, STATE_SUBMITTED = 1, STATE_RUNNING = 2, STATE_RUNNING_DIFFERENTIAL = 3, STATE_COMPLETED = 4, STATE_NEVERRAN = 5, STATE_ABORTED = 6, STATE_PARTIALLY_ABORTED = 7 + enum class EnumState { + STATE_ERRORED = 0, + STATE_SUBMITTED = 1, + STATE_RUNNING = 2, + STATE_RUNNING_DIFFERENTIAL = 3, + STATE_COMPLETED = 4, + STATE_NEVERRAN = 5, + STATE_ABORTED = 6, + STATE_PARTIALLY_ABORTED = 7 }; static const Key keyFolderId; @@ -85,70 +87,68 @@ public: static const int logHeaderSize; // Convert the status text to an enumerated value - static enumState getState(std::string stateText) - { - enumState enState = STATE_ERRORED; + static EnumState getState(std::string stateText) { + auto enState = EnumState::STATE_ERRORED; if (stateText.empty()) { - enState = STATE_NEVERRAN; + enState = EnumState::STATE_NEVERRAN; } else if (!stateText.compare("has been submitted")) { - enState = STATE_SUBMITTED; + enState = EnumState::STATE_SUBMITTED; } else if (!stateText.compare("has been started")) { - enState = STATE_RUNNING; + enState = EnumState::STATE_RUNNING; } else if (!stateText.compare("is differential")) { - enState = STATE_RUNNING_DIFFERENTIAL; + enState = EnumState::STATE_RUNNING_DIFFERENTIAL; } else if (!stateText.compare("has been completed")) { - enState = STATE_COMPLETED; + enState = EnumState::STATE_COMPLETED; } else if (!stateText.compare("has been aborted")) { - enState = STATE_ABORTED; + enState = EnumState::STATE_ABORTED; } else if (!stateText.compare("has been partially aborted")) { - enState = STATE_PARTIALLY_ABORTED; + enState = EnumState::STATE_PARTIALLY_ABORTED; } return enState; } // Convert the status enum to a text description - static const char* getStateText(enumState enState) - { + static const char* getStateText(EnumState enState) { const char* stateText; switch (enState) { - case STATE_ERRORED: + case EnumState::STATE_ERRORED: stateText = "has errored"; break; - case STATE_NEVERRAN: + case EnumState::STATE_NEVERRAN: stateText = "has never been started"; break; - case STATE_SUBMITTED: + case EnumState::STATE_SUBMITTED: stateText = "has been submitted"; break; - case STATE_RUNNING: + case EnumState::STATE_RUNNING: stateText = "has been started"; break; - case STATE_RUNNING_DIFFERENTIAL: + case EnumState::STATE_RUNNING_DIFFERENTIAL: stateText = "is differential"; break; - case STATE_COMPLETED: + case EnumState::STATE_COMPLETED: stateText = "has been completed"; break; - case STATE_ABORTED: + case EnumState::STATE_ABORTED: stateText = "has been aborted"; break; - case STATE_PARTIALLY_ABORTED: + case EnumState::STATE_PARTIALLY_ABORTED: stateText = "has been partially aborted"; break; default: @@ -160,34 +160,33 @@ public: } // Convert the status enum to a name - static const char* getStateName(enumState enState) - { + static const char* getStateName(EnumState enState) { const char* s; switch (enState) { - case STATE_ERRORED: + case EnumState::STATE_ERRORED: s = "Errored"; break; - case STATE_NEVERRAN: + case EnumState::STATE_NEVERRAN: s = "NeverRan"; break; - case STATE_SUBMITTED: + case EnumState::STATE_SUBMITTED: s = "Submitted"; break; - case STATE_RUNNING: + case EnumState::STATE_RUNNING: s = "Running"; break; - case STATE_RUNNING_DIFFERENTIAL: + case EnumState::STATE_RUNNING_DIFFERENTIAL: s = "RunningDifferentially"; break; - case STATE_COMPLETED: + case EnumState::STATE_COMPLETED: s = "Completed"; break; - case STATE_ABORTED: + case EnumState::STATE_ABORTED: s = "Aborted"; break; - case STATE_PARTIALLY_ABORTED: + case EnumState::STATE_PARTIALLY_ABORTED: s = "Aborting"; break; default: @@ -199,16 +198,15 @@ public: } // Determine if the specified state is runnable - static bool isRunnable(enumState enState) - { + static bool isRunnable(EnumState enState) { bool isRunnable = false; switch (enState) { - case STATE_SUBMITTED: - case STATE_RUNNING: - case STATE_RUNNING_DIFFERENTIAL: - case STATE_PARTIALLY_ABORTED: + case EnumState::STATE_SUBMITTED: + case EnumState::STATE_RUNNING: + case EnumState::STATE_RUNNING_DIFFERENTIAL: + case EnumState::STATE_PARTIALLY_ABORTED: isRunnable = true; break; default: @@ -286,11 +284,19 @@ public: // - submit a restore on the given tagName // - Optionally wait for the restore's completion. Will restore_error if restore fails or is aborted. // restore() will return the targetVersion which will be either the valid version passed in or the max restorable version for the given url. - Future restore(Database cx, Optional cxOrig, Key tagName, Key url, Standalone> ranges, bool waitForComplete = true, Version targetVersion = -1, bool verbose = true, Key addPrefix = Key(), Key removePrefix = Key(), bool lockDB = true); - Future restore(Database cx, Optional cxOrig, Key tagName, Key url, bool waitForComplete = true, Version targetVersion = -1, bool verbose = true, KeyRange range = normalKeys, Key addPrefix = Key(), Key removePrefix = Key(), bool lockDB = true) { + Future restore(Database cx, Optional cxOrig, Key tagName, Key url, + Standalone> ranges, bool waitForComplete = true, + Version targetVersion = -1, bool verbose = true, Key addPrefix = Key(), + Key removePrefix = Key(), bool lockDB = true, bool incrementalBackupOnly = false, + Version beginVersion = -1); + Future restore(Database cx, Optional cxOrig, Key tagName, Key url, bool waitForComplete = true, + Version targetVersion = -1, bool verbose = true, KeyRange range = normalKeys, + Key addPrefix = Key(), Key removePrefix = Key(), bool lockDB = true, + bool incrementalBackupOnly = false, Version beginVersion = -1) { Standalone> rangeRef; rangeRef.push_back_deep(rangeRef.arena(), range); - return restore(cx, cxOrig, tagName, url, rangeRef, waitForComplete, targetVersion, verbose, addPrefix, removePrefix, lockDB); + return restore(cx, cxOrig, tagName, url, rangeRef, waitForComplete, targetVersion, verbose, addPrefix, + removePrefix, lockDB, incrementalBackupOnly, beginVersion); } Future atomicRestore(Database cx, Key tagName, Standalone> ranges, Key addPrefix = Key(), Key removePrefix = Key()); Future atomicRestore(Database cx, Key tagName, KeyRange range = normalKeys, Key addPrefix = Key(), Key removePrefix = Key()) { @@ -315,13 +321,14 @@ public: Future submitBackup(Reference tr, Key outContainer, int snapshotIntervalSeconds, std::string tagName, Standalone> backupRanges, - bool stopWhenDone = true, bool partitionedLog = false); + bool stopWhenDone = true, bool partitionedLog = false, + bool incrementalBackupOnly = false); Future submitBackup(Database cx, Key outContainer, int snapshotIntervalSeconds, std::string tagName, Standalone> backupRanges, bool stopWhenDone = true, - bool partitionedLog = false) { + bool partitionedLog = false, bool incrementalBackupOnly = false) { return runRYWTransactionFailIfLocked(cx, [=](Reference tr) { return submitBackup(tr, outContainer, snapshotIntervalSeconds, tagName, backupRanges, stopWhenDone, - partitionedLog); + partitionedLog, incrementalBackupOnly); }); } @@ -350,7 +357,8 @@ public: // stopWhenDone will return when the backup is stopped, if enabled. Otherwise, it // will return when the backup directory is restorable. - Future waitBackup(Database cx, std::string tagName, bool stopWhenDone = true, Reference *pContainer = nullptr, UID *pUID = nullptr); + Future waitBackup(Database cx, std::string tagName, bool stopWhenDone = true, + Reference* pContainer = nullptr, UID* pUID = nullptr); static const Key keyLastRestorable; @@ -423,8 +431,8 @@ public: Future getStatus(Database cx, int errorLimit, Key tagName); - Future getStateValue(Reference tr, UID logUid, bool snapshot = false); - Future getStateValue(Database cx, UID logUid) { + Future getStateValue(Reference tr, UID logUid, bool snapshot = false); + Future getStateValue(Database cx, UID logUid) { return runRYWTransaction(cx, [=](Reference tr){ return getStateValue(tr, logUid); }); } @@ -443,8 +451,8 @@ public: // stopWhenDone will return when the backup is stopped, if enabled. Otherwise, it // will return when the backup directory is restorable. - Future waitBackup(Database cx, Key tagName, bool stopWhenDone = true); - Future waitSubmitted(Database cx, Key tagName); + Future waitBackup(Database cx, Key tagName, bool stopWhenDone = true); + Future waitSubmitted(Database cx, Key tagName); Future waitUpgradeToLatestDrVersion(Database cx, Key tagName); static const Key keyAddPrefix; @@ -513,9 +521,15 @@ ACTOR Future applyMutations(Database cx, Key uid, Key addPrefix, Key remov NotifiedVersion* committedVersion, Reference> keyVersion); ACTOR Future cleanupBackup(Database cx, bool deleteData); -typedef BackupAgentBase::enumState EBackupState; -template<> inline Tuple Codec::pack(EBackupState const &val) { return Tuple().append(val); } -template<> inline EBackupState Codec::unpack(Tuple const &val) { return (EBackupState)val.getInt(0); } +using EBackupState = BackupAgentBase::EnumState; +template <> +inline Tuple Codec::pack(EBackupState const& val) { + return Tuple().append(static_cast(val)); +} +template <> +inline EBackupState Codec::unpack(Tuple const& val) { + return static_cast(val.getInt(0)); +} // Key backed tags are a single-key slice of the TagUidMap, defined below. // The Value type of the key is a UidAndAbortedFlagT which is a pair of {UID, aborted_flag} @@ -810,6 +824,11 @@ public: return configSpace.pack(LiteralStringRef(__FUNCTION__)); } + // Set to true if only requesting incremental backup without base snapshot. + KeyBackedProperty incrementalBackupOnly() { + return configSpace.pack(LiteralStringRef(__FUNCTION__)); + } + // Latest version for which all prior versions have saved by backup workers. KeyBackedProperty latestBackupWorkerSavedVersion() { return configSpace.pack(LiteralStringRef(__FUNCTION__)); @@ -847,17 +866,25 @@ public: auto workerEnabled = backupWorkerEnabled().get(tr); auto plogEnabled = partitionedLogEnabled().get(tr); auto workerVersion = latestBackupWorkerSavedVersion().get(tr); - return map(success(lastLog) && success(firstSnapshot) && success(workerEnabled) && success(plogEnabled) && success(workerVersion), [=](Void) -> Optional { - // The latest log greater than the oldest snapshot is the restorable version - Optional logVersion = workerEnabled.get().present() && workerEnabled.get().get() && - plogEnabled.get().present() && plogEnabled.get().get() - ? workerVersion.get() - : lastLog.get(); - if (logVersion.present() && firstSnapshot.get().present() && logVersion.get() > firstSnapshot.get().get()) { - return std::max(logVersion.get() - 1, firstSnapshot.get().get()); - } - return {}; - }); + auto incrementalBackup = incrementalBackupOnly().get(tr); + return map(success(lastLog) && success(firstSnapshot) && success(workerEnabled) && success(plogEnabled) && + success(workerVersion) && success(incrementalBackup), + [=](Void) -> Optional { + // The latest log greater than the oldest snapshot is the restorable version + Optional logVersion = workerEnabled.get().present() && workerEnabled.get().get() && + plogEnabled.get().present() && plogEnabled.get().get() + ? workerVersion.get() + : lastLog.get(); + if (logVersion.present() && firstSnapshot.get().present() && + logVersion.get() > firstSnapshot.get().get()) { + return std::max(logVersion.get() - 1, firstSnapshot.get().get()); + } + if (logVersion.present() && incrementalBackup.isReady() && incrementalBackup.get().present() && + incrementalBackup.get().get()) { + return logVersion.get() - 1; + } + return {}; + }); } KeyBackedProperty> backupRanges() { @@ -936,5 +963,7 @@ Value makePadding(int size); ACTOR Future transformRestoredDatabase(Database cx, Standalone> backupRanges, Key addPrefix, Key removePrefix); +void simulateBlobFailure(); + #include "flow/unactorcompiler.h" #endif diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp index 7eab12ee49..d55232ffe0 100644 --- a/fdbclient/BackupContainer.actor.cpp +++ b/fdbclient/BackupContainer.actor.cpp @@ -23,6 +23,7 @@ #include "fdbclient/BackupAgent.actor.h" #include "fdbclient/FDBTypes.h" #include "fdbclient/JsonBuilder.h" +#include "flow/Arena.h" #include "flow/Trace.h" #include "flow/UnitTest.h" #include "flow/Hash3.h" @@ -245,7 +246,7 @@ std::string BackupDescription::toJSON() const { * file written will be after the start version of the snapshot's execution. * * Log files are at file paths like - * /plogs/...log,startVersion,endVersion,UID,tagID-of-N,blocksize + * /plogs/.../log,startVersion,endVersion,UID,tagID-of-N,blocksize * /logs/.../log,startVersion,endVersion,UID,blockSize * where ... is a multi level path which sorts lexically into version order and results in approximately 1 * unique folder per day containing about 5,000 files. Logs after FDB 6.3 are stored in "plogs" @@ -1343,19 +1344,44 @@ public: ACTOR static Future getSnapshotFileKeyRange_impl(Reference bc, RangeFile file) { - state Reference inFile = wait(bc->readFile(file.fileName)); + state int readFileRetries = 0; state bool beginKeySet = false; state Key beginKey; state Key endKey; - state int64_t j = 0; - for (; j < file.fileSize; j += file.blockSize) { - int64_t len = std::min(file.blockSize, file.fileSize - j); - Standalone> blockData = wait(fileBackup::decodeRangeFileBlock(inFile, j, len)); - if (!beginKeySet) { - beginKey = blockData.front().key; - beginKeySet = true; + loop { + try { + state Reference inFile = wait(bc->readFile(file.fileName)); + beginKeySet = false; + state int64_t j = 0; + for (; j < file.fileSize; j += file.blockSize) { + int64_t len = std::min(file.blockSize, file.fileSize - j); + Standalone> blockData = + wait(fileBackup::decodeRangeFileBlock(inFile, j, len)); + if (!beginKeySet) { + beginKey = blockData.front().key; + beginKeySet = true; + } + endKey = blockData.back().key; + } + break; + } catch (Error& e) { + if (e.code() == error_code_restore_bad_read || + e.code() == error_code_restore_unsupported_file_version || + e.code() == error_code_restore_corrupted_data_padding) { // no retriable error + TraceEvent(SevError, "BackupContainerGetSnapshotFileKeyRange").error(e); + throw; + } else if (e.code() == error_code_http_request_failed || e.code() == error_code_connection_failed || + e.code() == error_code_timed_out || e.code() == error_code_lookup_failed) { + // blob http request failure, retry + TraceEvent(SevWarnAlways, "BackupContainerGetSnapshotFileKeyRangeConnectionFailure") + .detail("Retries", ++readFileRetries) + .error(e); + wait(delayJittered(0.1)); + } else { + TraceEvent(SevError, "BackupContainerGetSnapshotFileKeyRangeUnexpectedError").error(e); + throw; + } } - endKey = blockData.back().key; } return KeyRange(KeyRangeRef(beginKey, endKey)); } @@ -1365,24 +1391,88 @@ public: return getSnapshotFileKeyRange_impl(Reference::addRef(this), file); } - ACTOR static Future> getRestoreSet_impl(Reference bc, Version targetVersion) { - // Find the most recent keyrange snapshot to end at or before targetVersion - state Optional snapshot; - std::vector snapshots = wait(bc->listKeyspaceSnapshots()); - for(auto const &s : snapshots) { - if(s.endVersion <= targetVersion) - snapshot = s; + static Optional getRestoreSetFromLogs(std::vector logs, Version targetVersion, + RestorableFileSet restorable) { + Version end = logs.begin()->endVersion; + computeRestoreEndVersion(logs, &restorable.logs, &end, targetVersion); + if (end >= targetVersion) { + restorable.continuousBeginVersion = logs.begin()->beginVersion; + restorable.continuousEndVersion = end; + return Optional(restorable); + } + return Optional(); + } + + ACTOR static Future> getRestoreSet_impl(Reference bc, + Version targetVersion, + VectorRef keyRangesFilter, bool logsOnly = false, + Version beginVersion = invalidVersion) { + // Does not support use keyRangesFilter for logsOnly yet + if (logsOnly && !keyRangesFilter.empty()) { + TraceEvent(SevError, "BackupContainerRestoreSetUnsupportedAPI").detail("KeyRangesFilter", keyRangesFilter.size()); + return Optional(); } - if(snapshot.present()) { + if (logsOnly) { + state RestorableFileSet restorableSet; + state std::vector logFiles; + Version begin = beginVersion == invalidVersion ? 0 : beginVersion; + wait(store(logFiles, bc->listLogFiles(begin, targetVersion, false))); + // List logs in version order so log continuity can be analyzed + std::sort(logFiles.begin(), logFiles.end()); + if (!logFiles.empty()) { + return getRestoreSetFromLogs(logFiles, targetVersion, restorableSet); + } + } + + // Find the most recent keyrange snapshot through which we can restore filtered key ranges into targetVersion. + state std::vector snapshots = wait(bc->listKeyspaceSnapshots()); + state int i = snapshots.size() - 1; + for (; i >= 0; i--) { + // The smallest version of filtered range files >= snapshot beginVersion > targetVersion + if (targetVersion >= 0 && snapshots[i].beginVersion > targetVersion) { + continue; + } + state RestorableFileSet restorable; - restorable.snapshot = snapshot.get(); - restorable.targetVersion = targetVersion; + state Version minKeyRangeVersion = MAX_VERSION; + state Version maxKeyRangeVersion = -1; std::pair, std::map> results = - wait(bc->readKeyspaceSnapshot(snapshot.get())); - restorable.ranges = std::move(results.first); - restorable.keyRanges = std::move(results.second); + wait(bc->readKeyspaceSnapshot(snapshots[i])); + + // Old backup does not have metadata about key ranges and can not be filtered with key ranges. + if (keyRangesFilter.size() && results.second.empty() && !results.first.empty()) { + throw backup_not_filterable_with_key_ranges(); + } + + // Filter by keyRangesFilter. + if (keyRangesFilter.empty()) { + restorable.ranges = std::move(results.first); + restorable.keyRanges = std::move(results.second); + minKeyRangeVersion = snapshots[i].beginVersion; + maxKeyRangeVersion = snapshots[i].endVersion; + } else { + for (const auto& rangeFile : results.first) { + const auto& keyRange = results.second.at(rangeFile.fileName); + if (keyRange.intersects(keyRangesFilter)) { + restorable.ranges.push_back(rangeFile); + restorable.keyRanges[rangeFile.fileName] = keyRange; + minKeyRangeVersion = std::min(minKeyRangeVersion, rangeFile.version); + maxKeyRangeVersion = std::max(maxKeyRangeVersion, rangeFile.version); + } + } + // No range file matches 'keyRangesFilter'. + if (restorable.ranges.empty()) { + throw backup_not_overlapped_with_keys_filter(); + } + } + // 'latestVersion' represents using the minimum restorable version in a snapshot. + restorable.targetVersion = targetVersion == latestVersion ? maxKeyRangeVersion : targetVersion; + // Any version < maxKeyRangeVersion is not restorable. + if (restorable.targetVersion < maxKeyRangeVersion) continue; + + restorable.snapshot = snapshots[i]; // TODO: Reenable the sanity check after TooManyFiles error is resolved if (false && g_network->isSimulated()) { // Sanity check key ranges @@ -1396,18 +1486,21 @@ public: } } - // No logs needed if there is a complete key space snapshot at the target version. - if (snapshot.get().beginVersion == snapshot.get().endVersion && - snapshot.get().endVersion == targetVersion) { + // No logs needed if there is a complete filtered key space snapshot at the target version. + if (minKeyRangeVersion == maxKeyRangeVersion && maxKeyRangeVersion == restorable.targetVersion) { restorable.continuousBeginVersion = restorable.continuousEndVersion = invalidVersion; + TraceEvent("BackupContainerGetRestorableFilesWithoutLogs") + .detail("KeyRangeVersion", restorable.targetVersion) + .detail("NumberOfRangeFiles", restorable.ranges.size()) + .detail("KeyRangesFilter", printable(keyRangesFilter)); return Optional(restorable); } // FIXME: check if there are tagged logs. for each tag, there is no version gap. state std::vector logs; state std::vector plogs; - wait(store(logs, bc->listLogFiles(snapshot.get().beginVersion, targetVersion, false)) && - store(plogs, bc->listLogFiles(snapshot.get().beginVersion, targetVersion, true))); + wait(store(logs, bc->listLogFiles(minKeyRangeVersion, restorable.targetVersion, false)) && + store(plogs, bc->listLogFiles(minKeyRangeVersion, restorable.targetVersion, true))); if (plogs.size() > 0) { logs.swap(plogs); @@ -1419,13 +1512,12 @@ public: // Remove duplicated log files that can happen for old epochs. std::vector filtered = filterDuplicates(logs); - restorable.logs.swap(filtered); // sort by version order again for continuous analysis std::sort(restorable.logs.begin(), restorable.logs.end()); - if (isPartitionedLogsContinuous(restorable.logs, snapshot.get().beginVersion, targetVersion)) { - restorable.continuousBeginVersion = snapshot.get().beginVersion; - restorable.continuousEndVersion = targetVersion + 1; // not inclusive + if (isPartitionedLogsContinuous(restorable.logs, minKeyRangeVersion, restorable.targetVersion)) { + restorable.continuousBeginVersion = minKeyRangeVersion; + restorable.continuousEndVersion = restorable.targetVersion + 1; // not inclusive return Optional(restorable); } return Optional(); @@ -1433,24 +1525,19 @@ public: // List logs in version order so log continuity can be analyzed std::sort(logs.begin(), logs.end()); - - // If there are logs and the first one starts at or before the snapshot begin version then proceed - if(!logs.empty() && logs.front().beginVersion <= snapshot.get().beginVersion) { - Version end = logs.begin()->endVersion; - computeRestoreEndVersion(logs, &restorable.logs, &end, targetVersion); - if (end >= targetVersion) { - restorable.continuousBeginVersion = logs.begin()->beginVersion; - restorable.continuousEndVersion = end; - return Optional(restorable); - } + // If there are logs and the first one starts at or before the keyrange's snapshot begin version, then + // it is valid restore set and proceed + if (!logs.empty() && logs.front().beginVersion <= minKeyRangeVersion) { + return getRestoreSetFromLogs(logs, targetVersion, restorable); } } - return Optional(); } - Future> getRestoreSet(Version targetVersion) final { - return getRestoreSet_impl(Reference::addRef(this), targetVersion); + Future> getRestoreSet(Version targetVersion, VectorRef keyRangesFilter, + bool logsOnly, Version beginVersion) final { + return getRestoreSet_impl(Reference::addRef(this), targetVersion, keyRangesFilter, + logsOnly, beginVersion); } private: diff --git a/fdbclient/BackupContainer.h b/fdbclient/BackupContainer.h index 8ac79937dd..1b7fa7a7eb 100644 --- a/fdbclient/BackupContainer.h +++ b/fdbclient/BackupContainer.h @@ -280,9 +280,13 @@ public: virtual Future dumpFileList(Version begin = 0, Version end = std::numeric_limits::max()) = 0; - // Get exactly the files necessary to restore to targetVersion. Returns non-present if - // restore to given version is not possible. - virtual Future> getRestoreSet(Version targetVersion) = 0; + // Get exactly the files necessary to restore the key space filtered by the specified key ranges to targetVersion. + // If targetVersion is 'latestVersion', use the minimum restorable version in a snapshot. + // If logsOnly is set, only use log files in [beginVersion, targetVervions) in restore set. + // Returns non-present if restoring to the given version is not possible. + virtual Future> getRestoreSet(Version targetVersion, + VectorRef keyRangesFilter = {}, + bool logsOnly = false, Version beginVersion = -1) = 0; // Get an IBackupContainer based on a container spec string static Reference openContainer(std::string url); diff --git a/fdbclient/BlobStore.actor.cpp b/fdbclient/BlobStore.actor.cpp index 664d82bd8d..b29af56172 100644 --- a/fdbclient/BlobStore.actor.cpp +++ b/fdbclient/BlobStore.actor.cpp @@ -277,7 +277,7 @@ ACTOR Future bucketExists_impl(Reference b, std::string std::string resource = std::string("/") + bucket; HTTP::Headers headers; - Reference r = wait(b->doRequest("HEAD", resource, headers, NULL, 0, {200, 404})); + Reference r = wait(b->doRequest("HEAD", resource, headers, nullptr, 0, {200, 404})); return r->code == 200; } @@ -291,7 +291,7 @@ ACTOR Future objectExists_impl(Reference b, std::string std::string resource = std::string("/") + bucket + "/" + object; HTTP::Headers headers; - Reference r = wait(b->doRequest("HEAD", resource, headers, NULL, 0, {200, 404})); + Reference r = wait(b->doRequest("HEAD", resource, headers, nullptr, 0, {200, 404})); return r->code == 200; } @@ -305,7 +305,7 @@ ACTOR Future deleteObject_impl(Reference b, std::string std::string resource = std::string("/") + bucket + "/" + object; HTTP::Headers headers; // 200 or 204 means object successfully deleted, 404 means it already doesn't exist, so any of those are considered successful - Reference r = wait(b->doRequest("DELETE", resource, headers, NULL, 0, {200, 204, 404})); + Reference r = wait(b->doRequest("DELETE", resource, headers, nullptr, 0, {200, 204, 404})); // But if the object already did not exist then the 'delete' is assumed to be successful but a warning is logged. if(r->code == 404) { @@ -386,7 +386,7 @@ ACTOR Future createBucket_impl(Reference b, std::string if(!exists) { std::string resource = std::string("/") + bucket; HTTP::Headers headers; - Reference r = wait(b->doRequest("PUT", resource, headers, NULL, 0, {200, 409})); + Reference r = wait(b->doRequest("PUT", resource, headers, nullptr, 0, {200, 409})); } return Void(); } @@ -401,7 +401,7 @@ ACTOR Future objectSize_impl(Reference b, std::strin std::string resource = std::string("/") + bucket + "/" + object; HTTP::Headers headers; - Reference r = wait(b->doRequest("HEAD", resource, headers, NULL, 0, {200, 404})); + Reference r = wait(b->doRequest("HEAD", resource, headers, nullptr, 0, {200, 404})); if(r->code == 404) throw file_not_found(); return r->contentLen; @@ -737,7 +737,7 @@ ACTOR Future listObjectsStream_impl(Reference bstore, s HTTP::Headers headers; state std::string fullResource = resource + HTTP::urlEncode(lastFile); lastFile.clear(); - Reference r = wait(bstore->doRequest("GET", fullResource, headers, NULL, 0, {200})); + Reference r = wait(bstore->doRequest("GET", fullResource, headers, nullptr, 0, {200})); listReleaser.release(); try { @@ -782,7 +782,7 @@ ACTOR Future listObjectsStream_impl(Reference bstore, s if(size == nullptr) { throw http_bad_response(); } - object.size = strtoull(size->value(), NULL, 10); + object.size = strtoull(size->value(), nullptr, 10); listResult.objects.push_back(object); } @@ -893,7 +893,7 @@ ACTOR Future> listBuckets_impl(Reference r = wait(bstore->doRequest("GET", fullResource, headers, NULL, 0, {200})); + Reference r = wait(bstore->doRequest("GET", fullResource, headers, nullptr, 0, {200})); listReleaser.release(); try { @@ -1024,7 +1024,7 @@ ACTOR Future readEntireFile_impl(Reference bstor std::string resource = std::string("/") + bucket + "/" + object; HTTP::Headers headers; - Reference r = wait(bstore->doRequest("GET", resource, headers, NULL, 0, {200, 404})); + Reference r = wait(bstore->doRequest("GET", resource, headers, nullptr, 0, {200, 404})); if(r->code == 404) throw file_not_found(); return r->content; @@ -1057,7 +1057,7 @@ ACTOR Future writeEntireFileFromBuffer_impl(Reference b ACTOR Future writeEntireFile_impl(Reference bstore, std::string bucket, std::string object, std::string content) { state UnsentPacketQueue packets; - PacketWriter pw(packets.getWriteBuffer(content.size()), NULL, Unversioned()); + PacketWriter pw(packets.getWriteBuffer(content.size()), nullptr, Unversioned()); pw.serializeBytes(content); if(content.size() > bstore->knobs.multipart_max_part_size) throw file_too_large(); @@ -1095,7 +1095,7 @@ ACTOR Future readObject_impl(Reference bstore, std::stri std::string resource = std::string("/") + bucket + "/" + object; HTTP::Headers headers; headers["Range"] = format("bytes=%lld-%lld", offset, offset + length - 1); - Reference r = wait(bstore->doRequest("GET", resource, headers, NULL, 0, {200, 206, 404})); + Reference r = wait(bstore->doRequest("GET", resource, headers, nullptr, 0, {200, 206, 404})); if(r->code == 404) throw file_not_found(); if(r->contentLen != r->content.size()) // Double check that this wasn't a header-only response, probably unnecessary @@ -1114,7 +1114,7 @@ ACTOR static Future beginMultiPartUpload_impl(Reference r = wait(bstore->doRequest("POST", resource, headers, NULL, 0, {200})); + Reference r = wait(bstore->doRequest("POST", resource, headers, nullptr, 0, {200})); try { xml_document<> doc; @@ -1180,7 +1180,7 @@ ACTOR Future finishMultiPartUpload_impl(Reference bstor std::string resource = format("/%s/%s?uploadId=%s", bucket.c_str(), object.c_str(), uploadID.c_str()); HTTP::Headers headers; - PacketWriter pw(part_list.getWriteBuffer(manifest.size()), NULL, Unversioned()); + PacketWriter pw(part_list.getWriteBuffer(manifest.size()), nullptr, Unversioned()); pw.serializeBytes(manifest); Reference r = wait(bstore->doRequest("POST", resource, headers, &part_list, manifest.size(), {200})); // TODO: In the event that the client times out just before the request completes (so the client is unaware) then the next retry diff --git a/fdbclient/CMakeLists.txt b/fdbclient/CMakeLists.txt index 43f9343b28..3f7333b632 100644 --- a/fdbclient/CMakeLists.txt +++ b/fdbclient/CMakeLists.txt @@ -33,7 +33,7 @@ set(FDBCLIENT_SRCS Knobs.h ManagementAPI.actor.cpp ManagementAPI.actor.h - MasterProxyInterface.h + CommitProxyInterface.h MetricLogger.actor.cpp MetricLogger.h MonitorLeader.actor.cpp diff --git a/fdbclient/ClientWorkerInterface.h b/fdbclient/ClientWorkerInterface.h index 4b4f822fc9..c4bdb2bc1b 100644 --- a/fdbclient/ClientWorkerInterface.h +++ b/fdbclient/ClientWorkerInterface.h @@ -25,7 +25,7 @@ #include "fdbclient/FDBTypes.h" #include "fdbrpc/FailureMonitor.h" #include "fdbclient/Status.h" -#include "fdbclient/MasterProxyInterface.h" +#include "fdbclient/CommitProxyInterface.h" // Streams from WorkerInterface that are safe and useful to call from a client. // A ClientWorkerInterface is embedded as the first element of a WorkerInterface. diff --git a/fdbclient/ClusterInterface.h b/fdbclient/ClusterInterface.h index c957ae8633..2570666b12 100644 --- a/fdbclient/ClusterInterface.h +++ b/fdbclient/ClusterInterface.h @@ -25,7 +25,7 @@ #include "fdbclient/FDBTypes.h" #include "fdbrpc/FailureMonitor.h" #include "fdbclient/Status.h" -#include "fdbclient/MasterProxyInterface.h" +#include "fdbclient/CommitProxyInterface.h" #include "fdbclient/ClientWorkerInterface.h" struct ClusterInterface { diff --git a/fdbclient/MasterProxyInterface.h b/fdbclient/CommitProxyInterface.h similarity index 96% rename from fdbclient/MasterProxyInterface.h rename to fdbclient/CommitProxyInterface.h index fbd98c6c2b..1cf63fcd2b 100644 --- a/fdbclient/MasterProxyInterface.h +++ b/fdbclient/CommitProxyInterface.h @@ -1,6 +1,6 @@ /* - * MasterProxyInterface.h + * CommitProxyInterface.h * * This source file is part of the FoundationDB open source project * @@ -19,8 +19,8 @@ * limitations under the License. */ -#ifndef FDBCLIENT_MASTERPROXYINTERFACE_H -#define FDBCLIENT_MASTERPROXYINTERFACE_H +#ifndef FDBCLIENT_COMMITPROXYINTERFACE_H +#define FDBCLIENT_COMMITPROXYINTERFACE_H #pragma once #include @@ -36,7 +36,7 @@ #include "fdbrpc/TimedRequest.h" #include "GrvProxyInterface.h" -struct MasterProxyInterface { +struct CommitProxyInterface { constexpr static FileIdentifier file_identifier = 8954922; enum { LocationAwareLoadBalance = 1 }; enum { AlwaysFresh = 1 }; @@ -59,8 +59,8 @@ struct MasterProxyInterface { UID id() const { return commit.getEndpoint().token; } std::string toString() const { return id().shortString(); } - bool operator == (MasterProxyInterface const& r) const { return id() == r.id(); } - bool operator != (MasterProxyInterface const& r) const { return id() != r.id(); } + bool operator==(CommitProxyInterface const& r) const { return id() == r.id(); } + bool operator!=(CommitProxyInterface const& r) const { return id() != r.id(); } NetworkAddress address() const { return commit.getEndpoint().getPrimaryAddress(); } template @@ -100,9 +100,10 @@ struct MasterProxyInterface { struct ClientDBInfo { constexpr static FileIdentifier file_identifier = 5355080; UID id; // Changes each time anything else changes - vector< GrvProxyInterface > grvProxies; - vector< MasterProxyInterface > masterProxies; - Optional firstProxy; //not serialized, used for commitOnFirstProxy when the proxies vector has been shrunk + vector grvProxies; + vector commitProxies; + Optional + firstCommitProxy; // not serialized, used for commitOnFirstProxy when the commit proxies vector has been shrunk double clientTxnInfoSampleRate; int64_t clientTxnInfoSizeLimit; Optional forward; @@ -122,7 +123,7 @@ struct ClientDBInfo { if constexpr (!is_fb_function) { ASSERT(ar.protocolVersion().isValid()); } - serializer(ar, grvProxies, masterProxies, id, clientTxnInfoSampleRate, clientTxnInfoSizeLimit, forward, + serializer(ar, grvProxies, commitProxies, id, clientTxnInfoSampleRate, clientTxnInfoSizeLimit, forward, transactionTagSampleRate, transactionTagSampleCost); } }; diff --git a/fdbclient/CoordinationInterface.h b/fdbclient/CoordinationInterface.h index 0dc2970ca1..95423bf6ca 100644 --- a/fdbclient/CoordinationInterface.h +++ b/fdbclient/CoordinationInterface.h @@ -25,7 +25,7 @@ #include "fdbclient/FDBTypes.h" #include "fdbrpc/fdbrpc.h" #include "fdbrpc/Locality.h" -#include "fdbclient/MasterProxyInterface.h" +#include "fdbclient/CommitProxyInterface.h" #include "fdbclient/ClusterInterface.h" const int MAX_CLUSTER_FILE_BYTES = 60000; diff --git a/fdbclient/DatabaseBackupAgent.actor.cpp b/fdbclient/DatabaseBackupAgent.actor.cpp index 1162986e10..81c865d09a 100644 --- a/fdbclient/DatabaseBackupAgent.actor.cpp +++ b/fdbclient/DatabaseBackupAgent.actor.cpp @@ -965,9 +965,10 @@ namespace dbBackup { tr->clear(KeyRangeRef(logsPath, strinc(logsPath))); tr->clear(conf.range()); - tr->set(states.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_COMPLETED))); + tr->set(states.pack(DatabaseBackupAgent::keyStateStatus), + StringRef(BackupAgentBase::getStateText(EBackupState::STATE_COMPLETED))); - wait(taskBucket->finish(tr, task)); + wait(taskBucket->finish(tr, task)); return Void(); } @@ -1449,9 +1450,10 @@ namespace dbBackup { try { tr.setOption(FDBTransactionOptions::LOCK_AWARE); tr.addReadConflictRange(singleKeyRange(sourceStates.pack(DatabaseBackupAgent::keyStateStatus))); - tr.set(sourceStates.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_RUNNING_DIFFERENTIAL))); + tr.set(sourceStates.pack(DatabaseBackupAgent::keyStateStatus), + StringRef(BackupAgentBase::getStateText(EBackupState::STATE_RUNNING_DIFFERENTIAL))); - Key versionKey = task->params[DatabaseBackupAgent::keyConfigLogUid].withPrefix(task->params[BackupAgentBase::destUid]).withPrefix(backupLatestVersionsPrefix); + Key versionKey = task->params[DatabaseBackupAgent::keyConfigLogUid].withPrefix(task->params[BackupAgentBase::destUid]).withPrefix(backupLatestVersionsPrefix); Optional prevBeginVersion = wait(tr.get(versionKey)); if (!prevBeginVersion.present()) { return Void(); @@ -1489,9 +1491,10 @@ namespace dbBackup { wait(success(FinishedFullBackupTaskFunc::addTask(tr, taskBucket, task, TaskCompletionKey::noSignal()))); } else { // Start the writing of logs, if differential - tr->set(states.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_RUNNING_DIFFERENTIAL))); + tr->set(states.pack(DatabaseBackupAgent::keyStateStatus), + StringRef(BackupAgentBase::getStateText(EBackupState::STATE_RUNNING_DIFFERENTIAL))); - allPartsDone = futureBucket->future(tr); + allPartsDone = futureBucket->future(tr); Version prevBeginVersion = BinaryReader::fromStringRef(task->params[DatabaseBackupAgent::keyPrevBeginVersion], Unversioned()); wait(success(CopyDiffLogsTaskFunc::addTask(tr, taskBucket, task, prevBeginVersion, restoreVersion, TaskCompletionKey::joinWith(allPartsDone)))); @@ -1623,9 +1626,10 @@ namespace dbBackup { srcTr2->set( Subspace(databaseBackupPrefixRange.begin).get(BackupAgentBase::keySourceTagName).pack(task->params[BackupAgentBase::keyTagName]), logUidValue ); srcTr2->set( sourceStates.pack(DatabaseBackupAgent::keyFolderId), task->params[DatabaseBackupAgent::keyFolderId] ); - srcTr2->set( sourceStates.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_RUNNING))); + srcTr2->set(sourceStates.pack(DatabaseBackupAgent::keyStateStatus), + StringRef(BackupAgentBase::getStateText(EBackupState::STATE_RUNNING))); - state Key destPath = destUidValue.withPrefix(backupLogKeys.begin); + state Key destPath = destUidValue.withPrefix(backupLogKeys.begin); // Start logging the mutations for the specified ranges of the tag for (auto &backupRange : backupRanges) { srcTr2->set(logRangesEncodeKey(backupRange.begin, BinaryReader::fromStringRef(destUidValue, Unversioned())), logRangesEncodeValue(backupRange.end, destPath)); @@ -1666,9 +1670,10 @@ namespace dbBackup { tr->set(logUidValue.withPrefix(applyMutationsBeginRange.begin), BinaryWriter::toValue(beginVersion, Unversioned())); tr->set(logUidValue.withPrefix(applyMutationsEndRange.begin), BinaryWriter::toValue(beginVersion, Unversioned())); - tr->set(states.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_RUNNING))); + tr->set(states.pack(DatabaseBackupAgent::keyStateStatus), + StringRef(BackupAgentBase::getStateText(EBackupState::STATE_RUNNING))); - state Reference kvBackupRangeComplete = futureBucket->future(tr); + state Reference kvBackupRangeComplete = futureBucket->future(tr); state Reference kvBackupComplete = futureBucket->future(tr); state int rangeCount = 0; @@ -1851,7 +1856,8 @@ public: } // This method will return the final status of the backup - ACTOR static Future waitBackup(DatabaseBackupAgent* backupAgent, Database cx, Key tagName, bool stopWhenDone) { + ACTOR static Future waitBackup(DatabaseBackupAgent* backupAgent, Database cx, Key tagName, + bool stopWhenDone) { state std::string backTrace; state UID logUid = wait(backupAgent->getLogUid(cx, tagName)); state Key statusKey = backupAgent->states.get(BinaryWriter::toValue(logUid, Unversioned())).pack(DatabaseBackupAgent::keyStateStatus); @@ -1862,15 +1868,15 @@ public: tr->setOption(FDBTransactionOptions::LOCK_AWARE); try { - state int status = wait(backupAgent->getStateValue(tr, logUid)); + state EBackupState status = wait(backupAgent->getStateValue(tr, logUid)); // Break, if no longer runnable - if (!DatabaseBackupAgent::isRunnable((BackupAgentBase::enumState)status) || BackupAgentBase::STATE_PARTIALLY_ABORTED == status) { + if (!DatabaseBackupAgent::isRunnable(status) || EBackupState::STATE_PARTIALLY_ABORTED == status) { return status; } // Break, if in differential mode (restorable) and stopWhenDone is not enabled - if ((!stopWhenDone) && (BackupAgentBase::STATE_RUNNING_DIFFERENTIAL == status)) { + if ((!stopWhenDone) && (EBackupState::STATE_RUNNING_DIFFERENTIAL == status)) { return status; } @@ -1885,7 +1891,7 @@ public: } // This method will return the final status of the backup - ACTOR static Future waitSubmitted(DatabaseBackupAgent* backupAgent, Database cx, Key tagName) { + ACTOR static Future waitSubmitted(DatabaseBackupAgent* backupAgent, Database cx, Key tagName) { state UID logUid = wait(backupAgent->getLogUid(cx, tagName)); state Key statusKey = backupAgent->states.get(BinaryWriter::toValue(logUid, Unversioned())).pack(DatabaseBackupAgent::keyStateStatus); @@ -1895,10 +1901,10 @@ public: tr->setOption(FDBTransactionOptions::LOCK_AWARE); try { - state int status = wait(backupAgent->getStateValue(tr, logUid)); + state EBackupState status = wait(backupAgent->getStateValue(tr, logUid)); // Break, if no longer runnable - if( BackupAgentBase::STATE_SUBMITTED != status) { + if (EBackupState::STATE_SUBMITTED != status) { return status; } @@ -1924,9 +1930,9 @@ public: tr->setOption(FDBTransactionOptions::COMMIT_ON_FIRST_PROXY); // We will use the global status for now to ensure that multiple backups do not start place with different tags - state int status = wait(backupAgent->getStateValue(tr, logUidCurrent)); + state EBackupState status = wait(backupAgent->getStateValue(tr, logUidCurrent)); - if (DatabaseBackupAgent::isRunnable((BackupAgentBase::enumState)status)) { + if (DatabaseBackupAgent::isRunnable(status)) { throw backup_duplicate(); } @@ -1987,7 +1993,8 @@ public: tr->set(backupAgent->config.get(logUidValue).pack(DatabaseBackupAgent::keyFolderId), backupUid); tr->set(backupAgent->states.get(logUidValue).pack(DatabaseBackupAgent::keyFolderId), backupUid); // written to config and states because it's also used by abort tr->set(backupAgent->config.get(logUidValue).pack(DatabaseBackupAgent::keyConfigBackupRanges), BinaryWriter::toValue(backupRanges, IncludeVersion(ProtocolVersion::withDRBackupRanges()))); - tr->set(backupAgent->states.get(logUidValue).pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_SUBMITTED))); + tr->set(backupAgent->states.get(logUidValue).pack(DatabaseBackupAgent::keyStateStatus), + StringRef(BackupAgentBase::getStateText(EBackupState::STATE_SUBMITTED))); if (stopWhenDone) { tr->set(backupAgent->config.get(logUidValue).pack(DatabaseBackupAgent::keyConfigStopWhenDoneKey), StringRef()); } @@ -2033,10 +2040,10 @@ public: ACTOR static Future atomicSwitchover(DatabaseBackupAgent* backupAgent, Database dest, Key tagName, Standalone> backupRanges, Key addPrefix, Key removePrefix, bool forceAction) { state DatabaseBackupAgent drAgent(dest); state UID destlogUid = wait(backupAgent->getLogUid(dest, tagName)); - state int status = wait(backupAgent->getStateValue(dest, destlogUid)); + state EBackupState status = wait(backupAgent->getStateValue(dest, destlogUid)); TraceEvent("DBA_SwitchoverStart").detail("Status", status); - if (status != BackupAgentBase::STATE_RUNNING_DIFFERENTIAL && status != BackupAgentBase::STATE_COMPLETED) { + if (status != EBackupState::STATE_RUNNING_DIFFERENTIAL && status != EBackupState::STATE_COMPLETED) { throw backup_duplicate(); } @@ -2153,10 +2160,10 @@ public: ACTOR static Future discontinueBackup(DatabaseBackupAgent* backupAgent, Reference tr, Key tagName) { tr->setOption(FDBTransactionOptions::LOCK_AWARE); state UID logUid = wait(backupAgent->getLogUid(tr, tagName)); - state int status = wait(backupAgent->getStateValue(tr, logUid)); + state EBackupState status = wait(backupAgent->getStateValue(tr, logUid)); TraceEvent("DBA_Discontinue").detail("Status", status); - if (!DatabaseBackupAgent::isRunnable((BackupAgentBase::enumState)status)) { + if (!DatabaseBackupAgent::isRunnable(status)) { throw backup_unneeded(); } @@ -2189,7 +2196,7 @@ public: logUid = _logUid; logUidValue = BinaryWriter::toValue(logUid, Unversioned()); - state Future statusFuture= backupAgent->getStateValue(tr, logUid); + state Future statusFuture = backupAgent->getStateValue(tr, logUid); state Future destUidFuture = backupAgent->getDestUid(tr, logUid); wait(success(statusFuture) && success(destUidFuture)); @@ -2197,8 +2204,8 @@ public: if (destUid.isValid()) { destUidValue = BinaryWriter::toValue(destUid, Unversioned()); } - int status = statusFuture.get(); - if (!backupAgent->isRunnable((BackupAgentBase::enumState)status)) { + EBackupState status = statusFuture.get(); + if (!backupAgent->isRunnable(status)) { throw backup_unneeded(); } @@ -2213,7 +2220,8 @@ public: tr->clear(prefixRange(logUidValue.withPrefix(applyLogKeys.begin))); - tr->set(StringRef(backupAgent->states.get(logUidValue).pack(DatabaseBackupAgent::keyStateStatus)), StringRef(DatabaseBackupAgent::getStateText(BackupAgentBase::STATE_PARTIALLY_ABORTED))); + tr->set(StringRef(backupAgent->states.get(logUidValue).pack(DatabaseBackupAgent::keyStateStatus)), + StringRef(DatabaseBackupAgent::getStateText(EBackupState::STATE_PARTIALLY_ABORTED))); wait(tr->commit()); TraceEvent("DBA_Abort").detail("CommitVersion", tr->getCommittedVersion()); @@ -2286,7 +2294,8 @@ public: } if (abortOldBackup) { - srcTr->set( backupAgent->sourceStates.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_ABORTED) )); + srcTr->set(backupAgent->sourceStates.pack(DatabaseBackupAgent::keyStateStatus), + StringRef(BackupAgentBase::getStateText(EBackupState::STATE_ABORTED))); srcTr->set( backupAgent->sourceStates.get(logUidValue).pack(DatabaseBackupAgent::keyFolderId), backupUid ); srcTr->clear(prefixRange(logUidValue.withPrefix(backupLogKeys.begin))); srcTr->clear(prefixRange(logUidValue.withPrefix(logRangesRange.begin))); @@ -2307,7 +2316,8 @@ public: break; } - srcTr->set( backupAgent->sourceStates.pack(DatabaseBackupAgent::keyStateStatus), StringRef(DatabaseBackupAgent::getStateText(BackupAgentBase::STATE_PARTIALLY_ABORTED) )); + srcTr->set(backupAgent->sourceStates.pack(DatabaseBackupAgent::keyStateStatus), + StringRef(DatabaseBackupAgent::getStateText(EBackupState::STATE_PARTIALLY_ABORTED))); srcTr->set( backupAgent->sourceStates.get(logUidValue).pack(DatabaseBackupAgent::keyFolderId), backupUid ); wait( eraseLogData(srcTr, logUidValue, destUidValue) || partialTimeout ); @@ -2341,7 +2351,8 @@ public: return Void(); } - tr->set(StringRef(backupAgent->states.get(logUidValue).pack(DatabaseBackupAgent::keyStateStatus)), StringRef(DatabaseBackupAgent::getStateText(BackupAgentBase::STATE_ABORTED))); + tr->set(StringRef(backupAgent->states.get(logUidValue).pack(DatabaseBackupAgent::keyStateStatus)), + StringRef(DatabaseBackupAgent::getStateText(EBackupState::STATE_ABORTED))); wait(tr->commit()); @@ -2382,13 +2393,11 @@ public: state Future> fStopVersionKey = tr->get(backupAgent->states.get(BinaryWriter::toValue(logUid, Unversioned())).pack(BackupAgentBase::keyStateStop)); state Future> fBackupKeysPacked = tr->get(backupAgent->config.get(BinaryWriter::toValue(logUid, Unversioned())).pack(BackupAgentBase::keyConfigBackupRanges)); - int backupStateInt = wait(backupAgent->getStateValue(tr, logUid)); - state BackupAgentBase::enumState backupState = (BackupAgentBase::enumState)backupStateInt; - - if (backupState == DatabaseBackupAgent::STATE_NEVERRAN) { + state EBackupState backupState = wait(backupAgent->getStateValue(tr, logUid)); + + if (backupState == EBackupState::STATE_NEVERRAN) { statusText += "No previous backups found.\n"; - } - else { + } else { state std::string tagNameDisplay; Optional tagName = wait(fTagName); @@ -2408,23 +2417,20 @@ public: } switch (backupState) { - case BackupAgentBase::STATE_SUBMITTED: + case EBackupState::STATE_SUBMITTED: statusText += "The DR on tag `" + tagNameDisplay + "' is NOT a complete copy of the primary database (just started).\n"; break; - case BackupAgentBase::STATE_RUNNING: + case EBackupState::STATE_RUNNING: statusText += "The DR on tag `" + tagNameDisplay + "' is NOT a complete copy of the primary database.\n"; break; - case BackupAgentBase::STATE_RUNNING_DIFFERENTIAL: + case EBackupState::STATE_RUNNING_DIFFERENTIAL: statusText += "The DR on tag `" + tagNameDisplay + "' is a complete copy of the primary database.\n"; break; - case BackupAgentBase::STATE_COMPLETED: - { + case EBackupState::STATE_COMPLETED: { Version stopVersion = stopVersionKey.present() ? BinaryReader::fromStringRef(stopVersionKey.get(), Unversioned()) : -1; statusText += "The previous DR on tag `" + tagNameDisplay + "' completed at version " + format("%lld", stopVersion) + ".\n"; - } - break; - case BackupAgentBase::STATE_PARTIALLY_ABORTED: - { + } break; + case EBackupState::STATE_PARTIALLY_ABORTED: { statusText += "The previous DR on tag `" + tagNameDisplay + "' " + BackupAgentBase::getStateText(backupState) + ".\n"; statusText += "Abort the DR with --cleanup before starting a new DR.\n"; break; @@ -2485,13 +2491,15 @@ public: return statusText; } - ACTOR static Future getStateValue(DatabaseBackupAgent* backupAgent, Reference tr, UID logUid, bool snapshot) { + ACTOR static Future getStateValue(DatabaseBackupAgent* backupAgent, + Reference tr, UID logUid, + bool snapshot) { tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->setOption(FDBTransactionOptions::LOCK_AWARE); state Key statusKey = backupAgent->states.get(BinaryWriter::toValue(logUid, Unversioned())).pack(DatabaseBackupAgent::keyStateStatus); Optional status = wait(tr->get(statusKey, snapshot)); - return (!status.present()) ? DatabaseBackupAgent::STATE_NEVERRAN : BackupAgentBase::getState(status.get().toString()); + return (!status.present()) ? EBackupState::STATE_NEVERRAN : BackupAgentBase::getState(status.get().toString()); } ACTOR static Future getDestUid(DatabaseBackupAgent* backupAgent, Reference tr, UID logUid, bool snapshot) { @@ -2536,7 +2544,8 @@ Future DatabaseBackupAgent::getStatus(Database cx, int errorLimit, return DatabaseBackupAgentImpl::getStatus(this, cx, errorLimit, tagName); } -Future DatabaseBackupAgent::getStateValue(Reference tr, UID logUid, bool snapshot) { +Future DatabaseBackupAgent::getStateValue(Reference tr, UID logUid, + bool snapshot) { return DatabaseBackupAgentImpl::getStateValue(this, tr, logUid, snapshot); } @@ -2552,11 +2561,11 @@ Future DatabaseBackupAgent::waitUpgradeToLatestDrVersion(Database cx, Key return DatabaseBackupAgentImpl::waitUpgradeToLatestDrVersion(this, cx, tagName); } -Future DatabaseBackupAgent::waitBackup(Database cx, Key tagName, bool stopWhenDone) { +Future DatabaseBackupAgent::waitBackup(Database cx, Key tagName, bool stopWhenDone) { return DatabaseBackupAgentImpl::waitBackup(this, cx, tagName, stopWhenDone); } -Future DatabaseBackupAgent::waitSubmitted(Database cx, Key tagName) { +Future DatabaseBackupAgent::waitSubmitted(Database cx, Key tagName) { return DatabaseBackupAgentImpl::waitSubmitted(this, cx, tagName); } diff --git a/fdbclient/DatabaseConfiguration.cpp b/fdbclient/DatabaseConfiguration.cpp index b1580205a0..53769e8140 100644 --- a/fdbclient/DatabaseConfiguration.cpp +++ b/fdbclient/DatabaseConfiguration.cpp @@ -29,12 +29,12 @@ DatabaseConfiguration::DatabaseConfiguration() void DatabaseConfiguration::resetInternal() { // does NOT reset rawConfiguration initialized = false; - proxyCount = grvProxyCount = resolverCount = desiredTLogCount = tLogWriteAntiQuorum = tLogReplicationFactor = + commitProxyCount = grvProxyCount = resolverCount = desiredTLogCount = tLogWriteAntiQuorum = tLogReplicationFactor = storageTeamSize = desiredLogRouterCount = -1; tLogVersion = TLogVersion::DEFAULT; tLogDataStoreType = storageServerStoreType = KeyValueStoreType::END; tLogSpillType = TLogSpillType::DEFAULT; - autoProxyCount = CLIENT_KNOBS->DEFAULT_AUTO_PROXIES; + autoCommitProxyCount = CLIENT_KNOBS->DEFAULT_AUTO_COMMIT_PROXIES; autoGrvProxyCount = CLIENT_KNOBS->DEFAULT_AUTO_GRV_PROXIES; autoResolverCount = CLIENT_KNOBS->DEFAULT_AUTO_RESOLVERS; autoDesiredTLogCount = CLIENT_KNOBS->DEFAULT_AUTO_LOGS; @@ -165,40 +165,39 @@ void DatabaseConfiguration::setDefaultReplicationPolicy() { bool DatabaseConfiguration::isValid() const { if( !(initialized && - tLogWriteAntiQuorum >= 0 && - tLogWriteAntiQuorum <= tLogReplicationFactor/2 && - tLogReplicationFactor >= 1 && - storageTeamSize >= 1 && - getDesiredProxies() >= 1 && - getDesiredGrvProxies() >= 1 && - getDesiredLogs() >= 1 && - getDesiredResolvers() >= 1 && - tLogVersion != TLogVersion::UNSET && - tLogVersion >= TLogVersion::MIN_RECRUITABLE && - tLogVersion <= TLogVersion::MAX_SUPPORTED && - tLogDataStoreType != KeyValueStoreType::END && - tLogSpillType != TLogSpillType::UNSET && - !(tLogSpillType == TLogSpillType::REFERENCE && tLogVersion < TLogVersion::V3) && - storageServerStoreType != KeyValueStoreType::END && - autoProxyCount >= 1 && - autoGrvProxyCount >= 1 && - autoResolverCount >= 1 && - autoDesiredTLogCount >= 1 && - storagePolicy && - tLogPolicy && - getDesiredRemoteLogs() >= 1 && - remoteTLogReplicationFactor >= 0 && - repopulateRegionAntiQuorum >= 0 && - repopulateRegionAntiQuorum <= 1 && - usableRegions >= 1 && - usableRegions <= 2 && - regions.size() <= 2 && - ( usableRegions == 1 || regions.size() == 2 ) && - ( regions.size() == 0 || regions[0].priority >= 0 ) && - ( regions.size() == 0 || tLogPolicy->info() != "dcid^2 x zoneid^2 x 1") ) ) { //We cannot specify regions with three_datacenter replication + tLogWriteAntiQuorum >= 0 && + tLogWriteAntiQuorum <= tLogReplicationFactor/2 && + tLogReplicationFactor >= 1 && + storageTeamSize >= 1 && + getDesiredCommitProxies() >= 1 && + getDesiredGrvProxies() >= 1 && + getDesiredLogs() >= 1 && + getDesiredResolvers() >= 1 && + tLogVersion != TLogVersion::UNSET && + tLogVersion >= TLogVersion::MIN_RECRUITABLE && + tLogVersion <= TLogVersion::MAX_SUPPORTED && + tLogDataStoreType != KeyValueStoreType::END && + tLogSpillType != TLogSpillType::UNSET && + !(tLogSpillType == TLogSpillType::REFERENCE && tLogVersion < TLogVersion::V3) && + storageServerStoreType != KeyValueStoreType::END && + autoCommitProxyCount >= 1 && + autoGrvProxyCount >= 1 && + autoResolverCount >= 1 && + autoDesiredTLogCount >= 1 && + storagePolicy && + tLogPolicy && + getDesiredRemoteLogs() >= 1 && + remoteTLogReplicationFactor >= 0 && + repopulateRegionAntiQuorum >= 0 && + repopulateRegionAntiQuorum <= 1 && + usableRegions >= 1 && + usableRegions <= 2 && + regions.size() <= 2 && + ( usableRegions == 1 || regions.size() == 2 ) && + ( regions.size() == 0 || regions[0].priority >= 0 ) && + ( regions.size() == 0 || tLogPolicy->info() != "dcid^2 x zoneid^2 x 1") ) ) { //We cannot specify regions with three_datacenter replication return false; } - std::set dcIds; dcIds.insert(Key()); for(auto& r : regions) { @@ -318,11 +317,11 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const { if (desiredTLogCount != -1 || isOverridden("logs")) { result["logs"] = desiredTLogCount; } - if (proxyCount != -1 || isOverridden("proxies")) { - result["proxies"] = proxyCount; + if (commitProxyCount != -1 || isOverridden("commit_proxies")) { + result["commit_proxies"] = commitProxyCount; } if (grvProxyCount != -1 || isOverridden("grv_proxies")) { - result["grv_proxies"] = proxyCount; + result["grv_proxies"] = grvProxyCount; } if (resolverCount != -1 || isOverridden("resolvers")) { result["resolvers"] = resolverCount; @@ -336,8 +335,8 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const { if (repopulateRegionAntiQuorum != 0 || isOverridden("repopulate_anti_quorum")) { result["repopulate_anti_quorum"] = repopulateRegionAntiQuorum; } - if (autoProxyCount != CLIENT_KNOBS->DEFAULT_AUTO_PROXIES || isOverridden("auto_proxies")) { - result["auto_proxies"] = autoProxyCount; + if (autoCommitProxyCount != CLIENT_KNOBS->DEFAULT_AUTO_COMMIT_PROXIES || isOverridden("auto_commit_proxies")) { + result["auto_commit_proxies"] = autoCommitProxyCount; } if (autoGrvProxyCount != CLIENT_KNOBS->DEFAULT_AUTO_GRV_PROXIES || isOverridden("auto_grv_proxies")) { result["auto_grv_proxies"] = autoGrvProxyCount; @@ -419,8 +418,8 @@ bool DatabaseConfiguration::setInternal(KeyRef key, ValueRef value) { if (ck == LiteralStringRef("initialized")) { initialized = true; - } else if (ck == LiteralStringRef("proxies")) { - parse(&proxyCount, value); + } else if (ck == LiteralStringRef("commit_proxies")) { + parse(&commitProxyCount, value); } else if (ck == LiteralStringRef("grv_proxies")) { parse(&grvProxyCount, value); } else if (ck == LiteralStringRef("resolvers")) { @@ -459,8 +458,8 @@ bool DatabaseConfiguration::setInternal(KeyRef key, ValueRef value) { } else if (ck == LiteralStringRef("storage_engine")) { parse((&type), value); storageServerStoreType = (KeyValueStoreType::StoreType)type; - } else if (ck == LiteralStringRef("auto_proxies")) { - parse(&autoProxyCount, value); + } else if (ck == LiteralStringRef("auto_commit_proxies")) { + parse(&autoCommitProxyCount, value); } else if (ck == LiteralStringRef("auto_grv_proxies")) { parse(&autoGrvProxyCount, value); } else if (ck == LiteralStringRef("auto_resolvers")) { diff --git a/fdbclient/DatabaseConfiguration.h b/fdbclient/DatabaseConfiguration.h index 4a045200e8..0e374457da 100644 --- a/fdbclient/DatabaseConfiguration.h +++ b/fdbclient/DatabaseConfiguration.h @@ -133,15 +133,19 @@ struct DatabaseConfiguration { } //Killing an entire datacenter counts as killing one zone in modes that support it - int32_t maxZoneFailuresTolerated() const { + int32_t maxZoneFailuresTolerated(int fullyReplicatedRegions, bool forAvailability) const { int worstSatellite = regions.size() ? std::numeric_limits::max() : 0; + int regionsWithNonNegativePriority = 0; for(auto& r : regions) { + if(r.priority >= 0) { + regionsWithNonNegativePriority++; + } worstSatellite = std::min(worstSatellite, r.satelliteTLogReplicationFactor - r.satelliteTLogWriteAntiQuorum); if(r.satelliteTLogUsableDcsFallback > 0) { worstSatellite = std::min(worstSatellite, r.satelliteTLogReplicationFactorFallback - r.satelliteTLogWriteAntiQuorumFallback); } } - if(usableRegions > 1 && worstSatellite > 0) { + if(usableRegions > 1 && fullyReplicatedRegions > 1 && worstSatellite > 0 && (!forAvailability || regionsWithNonNegativePriority > 1)) { return 1 + std::min(std::max(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, worstSatellite - 1), storageTeamSize - 1); } else if(worstSatellite > 0) { return std::min(tLogReplicationFactor + worstSatellite - 2 - tLogWriteAntiQuorum, storageTeamSize - 1); @@ -149,9 +153,9 @@ struct DatabaseConfiguration { return std::min(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, storageTeamSize - 1); } - // Proxy Servers - int32_t proxyCount; - int32_t autoProxyCount; + // CommitProxy Servers + int32_t commitProxyCount; + int32_t autoCommitProxyCount; int32_t grvProxyCount; int32_t autoGrvProxyCount; @@ -192,7 +196,10 @@ struct DatabaseConfiguration { bool isExcludedServer( NetworkAddressList ) const; std::set getExcludedServers() const; - int32_t getDesiredProxies() const { if(proxyCount == -1) return autoProxyCount; return proxyCount; } + int32_t getDesiredCommitProxies() const { + if (commitProxyCount == -1) return autoCommitProxyCount; + return commitProxyCount; + } int32_t getDesiredGrvProxies() const { if (grvProxyCount == -1) return autoGrvProxyCount; return grvProxyCount; diff --git a/fdbclient/DatabaseContext.h b/fdbclient/DatabaseContext.h index 0f86d41f9e..5652ae7a14 100644 --- a/fdbclient/DatabaseContext.h +++ b/fdbclient/DatabaseContext.h @@ -29,7 +29,7 @@ #include "fdbclient/NativeAPI.actor.h" #include "fdbclient/KeyRangeMap.h" -#include "fdbclient/MasterProxyInterface.h" +#include "fdbclient/CommitProxyInterface.h" #include "fdbclient/SpecialKeySpace.actor.h" #include "fdbrpc/QueueModel.h" #include "fdbrpc/MultiInterface.h" @@ -68,7 +68,7 @@ struct LocationInfo : MultiInterface } }; -using ProxyInfo = ModelInterface; +using CommitProxyInfo = ModelInterface; using GrvProxyInfo = ModelInterface; class ClientTagThrottleData : NonCopyable { @@ -165,8 +165,8 @@ public: bool sampleOnCost(uint64_t cost) const; void updateProxies(); - Reference getMasterProxies(bool useProvisionalProxies); - Future> getMasterProxiesFuture(bool useProvisionalProxies); + Reference getCommitProxies(bool useProvisionalProxies); + Future> getCommitProxiesFuture(bool useProvisionalProxies); Reference getGrvProxies(bool useProvisionalProxies); Future onProxiesChanged(); Future getHealthMetrics(bool detailed); @@ -219,9 +219,9 @@ public: Reference>> connectionFile; AsyncTrigger proxiesChangeTrigger; Future monitorProxiesInfoChange; - Reference masterProxies; + Reference commitProxies; Reference grvProxies; - bool proxyProvisional; + bool proxyProvisional; // Provisional commit proxy and grv proxy are used at the same time. UID proxiesLastChange; LocalityData clientLocality; QueueModel queueModel; diff --git a/fdbclient/FDBTypes.h b/fdbclient/FDBTypes.h index 7e16dcd75f..d876a0f489 100644 --- a/fdbclient/FDBTypes.h +++ b/fdbclient/FDBTypes.h @@ -257,6 +257,7 @@ struct Traceable> : std::true_type { std::string printable( const StringRef& val ); std::string printable( const std::string& val ); std::string printable( const KeyRangeRef& range ); +std::string printable(const VectorRef& val); std::string printable( const VectorRef& val ); std::string printable( const VectorRef& val ); std::string printable( const KeyValueRef& val ); @@ -289,6 +290,14 @@ struct KeyRangeRef { bool contains( const KeyRef& key ) const { return begin <= key && key < end; } bool contains( const KeyRangeRef& keys ) const { return begin <= keys.begin && keys.end <= end; } bool intersects( const KeyRangeRef& keys ) const { return begin < keys.end && keys.begin < end; } + bool intersects(const VectorRef& keysVec) const { + for (const auto& keys : keysVec) { + if (intersects(keys)) { + return true; + } + } + return false; + } bool empty() const { return begin == end; } bool singleKeyRange() const { return equalsKeyAfter(begin, end); } diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp index 568eef92b6..348c97f779 100644 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -131,6 +131,9 @@ public: KeyBackedProperty removePrefix() { return configSpace.pack(LiteralStringRef(__FUNCTION__)); } + KeyBackedProperty incrementalBackupOnly() { + return configSpace.pack(LiteralStringRef(__FUNCTION__)); + } // XXX: Remove restoreRange() once it is safe to remove. It has been changed to restoreRanges KeyBackedProperty restoreRange() { return configSpace.pack(LiteralStringRef(__FUNCTION__)); @@ -141,6 +144,9 @@ public: KeyBackedProperty batchFuture() { return configSpace.pack(LiteralStringRef(__FUNCTION__)); } + KeyBackedProperty beginVersion() { + return configSpace.pack(LiteralStringRef(__FUNCTION__)); + } KeyBackedProperty restoreVersion() { return configSpace.pack(LiteralStringRef(__FUNCTION__)); } @@ -557,7 +563,9 @@ namespace fileBackup { if(rLen != len) throw restore_bad_read(); - Standalone> results({}, buf.arena()); + simulateBlobFailure(); + + Standalone> results({}, buf.arena()); state StringRefReader reader(buf, restore_corrupted_data()); try { @@ -597,17 +605,17 @@ namespace fileBackup { if(b != 0xFF) throw restore_corrupted_data_padding(); - return results; + return results; } catch(Error &e) { - TraceEvent(SevWarn, "FileRestoreCorruptRangeFileBlock") - .error(e) - .detail("Filename", file->getFilename()) - .detail("BlockOffset", offset) - .detail("BlockLen", len) - .detail("ErrorRelativeOffset", reader.rptr - buf.begin()) - .detail("ErrorAbsoluteOffset", reader.rptr - buf.begin() + offset); - throw; + TraceEvent(SevWarn, "FileRestoreDecodeRangeFileBlockFailed") + .error(e) + .detail("Filename", file->getFilename()) + .detail("BlockOffset", offset) + .detail("BlockLen", len) + .detail("ErrorRelativeOffset", reader.rptr - buf.begin()) + .detail("ErrorAbsoluteOffset", reader.rptr - buf.begin() + offset); + throw; } } @@ -740,9 +748,10 @@ namespace fileBackup { state Subspace newConfigSpace = uidPrefixKey(LiteralStringRef("uid->config/").withPrefix(fileBackupPrefixRange.begin), uid); Optional statusStr = wait(tr->get(statusSpace.pack(FileBackupAgent::keyStateStatus))); - state EBackupState status = !statusStr.present() ? FileBackupAgent::STATE_NEVERRAN : BackupAgentBase::getState(statusStr.get().toString()); + state EBackupState status = + !statusStr.present() ? EBackupState::STATE_NEVERRAN : BackupAgentBase::getState(statusStr.get().toString()); - TraceEvent(SevInfo, "FileBackupAbortIncompatibleBackup") + TraceEvent(SevInfo, "FileBackupAbortIncompatibleBackup") .detail("TagName", tagName.c_str()) .detail("Status", BackupAgentBase::getStateText(status)); @@ -762,9 +771,9 @@ namespace fileBackup { // Set old style state key to Aborted if it was Runnable if(backupAgent->isRunnable(status)) - tr->set(statusKey, StringRef(FileBackupAgent::getStateText(BackupAgentBase::STATE_ABORTED))); + tr->set(statusKey, StringRef(FileBackupAgent::getStateText(EBackupState::STATE_ABORTED))); - return Void(); + return Void(); } struct AbortFiveZeroBackupTask : TaskFuncBase { @@ -814,11 +823,11 @@ namespace fileBackup { state BackupConfig config(current.first); EBackupState status = wait(config.stateEnum().getD(tr, false, EBackupState::STATE_NEVERRAN)); - if (!backupAgent->isRunnable((BackupAgentBase::enumState)status)) { - throw backup_unneeded(); - } + if (!backupAgent->isRunnable(status)) { + throw backup_unneeded(); + } - TraceEvent(SevInfo, "FBA_AbortFileOneBackup") + TraceEvent(SevInfo, "FBA_AbortFileOneBackup") .detail("TagName", tagName.c_str()) .detail("Status", BackupAgentBase::getStateText(status)); @@ -2092,10 +2101,10 @@ namespace fileBackup { } // If the backup is restorable but the state is not differential then set state to differential - if(restorableVersion.present() && backupState != BackupAgentBase::STATE_RUNNING_DIFFERENTIAL) - config.stateEnum().set(tr, BackupAgentBase::STATE_RUNNING_DIFFERENTIAL); + if (restorableVersion.present() && backupState != EBackupState::STATE_RUNNING_DIFFERENTIAL) + config.stateEnum().set(tr, EBackupState::STATE_RUNNING_DIFFERENTIAL); - // If stopWhenDone is set and there is a restorable version, set the done future and do not create further tasks. + // If stopWhenDone is set and there is a restorable version, set the done future and do not create further tasks. if(stopWhenDone && restorableVersion.present()) { wait(onDone->set(tr, taskBucket) && taskBucket->finish(tr, task)); @@ -2342,10 +2351,10 @@ namespace fileBackup { } // If the backup is restorable and the state isn't differential the set state to differential - if(restorableVersion.present() && backupState != BackupAgentBase::STATE_RUNNING_DIFFERENTIAL) - config.stateEnum().set(tr, BackupAgentBase::STATE_RUNNING_DIFFERENTIAL); + if (restorableVersion.present() && backupState != EBackupState::STATE_RUNNING_DIFFERENTIAL) + config.stateEnum().set(tr, EBackupState::STATE_RUNNING_DIFFERENTIAL); - // Unless we are to stop, start the next snapshot using the default interval + // Unless we are to stop, start the next snapshot using the default interval Reference snapshotDoneFuture = task->getDoneFuture(futureBucket); if(!stopWhenDone) { wait(config.initNewSnapshot(tr) && success(BackupSnapshotDispatchTask::addTask(tr, taskBucket, task, 1, TaskCompletionKey::signal(snapshotDoneFuture)))); @@ -2474,7 +2483,8 @@ namespace fileBackup { state Future> backupRangesFuture = config.backupRanges().getOrThrow(tr); state Future destUidValueFuture = config.destUidValue().getOrThrow(tr); state Future> partitionedLog = config.partitionedLogEnabled().get(tr); - wait(success(backupRangesFuture) && success(destUidValueFuture) && success(partitionedLog)); + state Future> incrementalBackupOnly = config.incrementalBackupOnly().get(tr); + wait(success(backupRangesFuture) && success(destUidValueFuture) && success(partitionedLog) && success(incrementalBackupOnly)); std::vector backupRanges = backupRangesFuture.get(); Key destUidValue = destUidValueFuture.get(); @@ -2494,7 +2504,10 @@ namespace fileBackup { wait(config.initNewSnapshot(tr, 0)); // Using priority 1 for both of these to at least start both tasks soon - wait(success(BackupSnapshotDispatchTask::addTask(tr, taskBucket, task, 1, TaskCompletionKey::joinWith(backupFinished)))); + // Do not add snapshot task if we only want the incremental backup + if (!incrementalBackupOnly.get().present() || !incrementalBackupOnly.get().get()) { + wait(success(BackupSnapshotDispatchTask::addTask(tr, taskBucket, task, 1, TaskCompletionKey::joinWith(backupFinished)))); + } wait(success(BackupLogsDispatchTask::addTask(tr, taskBucket, task, 1, 0, beginVersion, TaskCompletionKey::joinWith(backupFinished)))); // If a clean stop is requested, the log and snapshot tasks will quit after the backup is restorable, then the following @@ -3008,8 +3021,10 @@ namespace fileBackup { state int64_t remainingInBatch = Params.remainingInBatch().get(task); state bool addingToExistingBatch = remainingInBatch > 0; state Version restoreVersion; + state Future> incrementalBackupOnly = restore.incrementalBackupOnly().get(tr); wait(store(restoreVersion, restore.restoreVersion().getOrThrow(tr)) + && success(incrementalBackupOnly) && checkTaskVersion(tr->getDatabase(), task, name, version)); // If not adding to an existing batch then update the apply mutations end version so the mutations from the @@ -3398,6 +3413,7 @@ namespace fileBackup { state Reference tr(new ReadYourWritesTransaction(cx)); state RestoreConfig restore(task); state Version restoreVersion; + state Version beginVersion; state Reference bc; loop { @@ -3408,6 +3424,8 @@ namespace fileBackup { wait(checkTaskVersion(tr->getDatabase(), task, name, version)); Version _restoreVersion = wait(restore.restoreVersion().getOrThrow(tr)); restoreVersion = _restoreVersion; + Optional _beginVersion = wait(restore.beginVersion().get(tr)); + beginVersion = _beginVersion.present() ? _beginVersion.get() : invalidVersion; wait(taskBucket->keepRunning(tr, task)); ERestoreState oldState = wait(restore.stateEnum().getD(tr)); @@ -3447,14 +3465,22 @@ namespace fileBackup { wait(tr->onError(e)); } } + Optional _incremental = wait(restore.incrementalBackupOnly().get(tr)); + state bool incremental = _incremental.present() ? _incremental.get() : false; + if (beginVersion == invalidVersion) { + beginVersion = 0; + } + Optional restorable = + wait(bc->getRestoreSet(restoreVersion, VectorRef(), incremental, beginVersion)); + if (!incremental) { + beginVersion = restorable.get().snapshot.beginVersion; + } - Optional restorable = wait(bc->getRestoreSet(restoreVersion)); - - if(!restorable.present()) + if(!restorable.present()) throw restore_missing_data(); // First version for which log data should be applied - Params.firstVersion().set(task, restorable.get().snapshot.beginVersion); + Params.firstVersion().set(task, beginVersion); // Convert the two lists in restorable (logs and ranges) to a single list of RestoreFiles. // Order does not matter, they will be put in order when written to the restoreFileMap below. @@ -3463,6 +3489,7 @@ namespace fileBackup { for(const RangeFile &f : restorable.get().ranges) { files.push_back({f.version, f.fileName, true, f.blockSize, f.fileSize}); } + for(const LogFile &f : restorable.get().logs) { files.push_back({f.beginVersion, f.fileName, false, f.blockSize, f.fileSize, f.endVersion}); } @@ -3526,6 +3553,7 @@ namespace fileBackup { restore.stateEnum().set(tr, ERestoreState::RUNNING); // Set applyMutation versions + restore.setApplyBeginVersion(tr, firstVersion); restore.setApplyEndVersion(tr, firstVersion); @@ -3533,6 +3561,14 @@ namespace fileBackup { wait(success(RestoreDispatchTaskFunc::addTask(tr, taskBucket, task, 0, "", 0, CLIENT_KNOBS->RESTORE_DISPATCH_BATCH_SIZE))); wait(taskBucket->finish(tr, task)); + state Future> logsOnly = restore.incrementalBackupOnly().get(tr); + wait(success(logsOnly)); + if (logsOnly.get().present() && logsOnly.get().get()) { + // If this is an incremental restore, we need to set the applyMutationsMapPrefix + // to the earliest log version so no mutations are missed + Value versionEncoded = BinaryWriter::toValue(Params.firstVersion().get(task), Unversioned()); + wait(krmSetRange(tr, restore.applyMutationsMapPrefix(), normalKeys, versionEncoded)); + } return Void(); } @@ -3712,7 +3748,9 @@ public: // This method will return the final status of the backup at tag, and return the URL that was used on the tag // when that status value was read. - ACTOR static Future waitBackup(FileBackupAgent* backupAgent, Database cx, std::string tagName, bool stopWhenDone, Reference *pContainer = nullptr, UID *pUID = nullptr) { + ACTOR static Future waitBackup(FileBackupAgent* backupAgent, Database cx, std::string tagName, + bool stopWhenDone, Reference* pContainer = nullptr, + UID* pUID = nullptr) { state std::string backTrace; state KeyBackedTag tag = makeBackupTag(tagName); @@ -3733,7 +3771,8 @@ public: // Break, if one of the following is true // - no longer runnable // - in differential mode (restorable) and stopWhenDone is not enabled - if( !FileBackupAgent::isRunnable(status) || ((!stopWhenDone) && (BackupAgentBase::STATE_RUNNING_DIFFERENTIAL == status) )) { + if (!FileBackupAgent::isRunnable(status) || + ((!stopWhenDone) && (EBackupState::STATE_RUNNING_DIFFERENTIAL == status))) { if(pContainer != nullptr) { Reference c = wait(config.backupContainer().getOrThrow(tr, false, backup_invalid_info())); @@ -3760,7 +3799,7 @@ public: ACTOR static Future submitBackup(FileBackupAgent* backupAgent, Reference tr, Key outContainer, int snapshotIntervalSeconds, std::string tagName, Standalone> backupRanges, bool stopWhenDone, - bool partitionedLog) { + bool partitionedLog, bool incrementalBackupOnly) { tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->setOption(FDBTransactionOptions::LOCK_AWARE); tr->setOption(FDBTransactionOptions::COMMIT_ON_FIRST_PROXY); @@ -3863,13 +3902,17 @@ public: config.backupRanges().set(tr, normalizedRanges); config.snapshotIntervalSeconds().set(tr, snapshotIntervalSeconds); config.partitionedLogEnabled().set(tr, partitionedLog); + config.incrementalBackupOnly().set(tr, incrementalBackupOnly); Key taskKey = wait(fileBackup::StartFullBackupTaskFunc::addTask(tr, backupAgent->taskBucket, uid, TaskCompletionKey::noSignal())); return Void(); } - ACTOR static Future submitRestore(FileBackupAgent* backupAgent, Reference tr, Key tagName, Key backupURL, Standalone> ranges, Version restoreVersion, Key addPrefix, Key removePrefix, bool lockDB, UID uid) { + ACTOR static Future submitRestore(FileBackupAgent* backupAgent, Reference tr, + Key tagName, Key backupURL, Standalone> ranges, + Version restoreVersion, Key addPrefix, Key removePrefix, bool lockDB, + bool incrementalBackupOnly, Version beginVersion, UID uid) { KeyRangeMap restoreRangeSet; for (auto& range : ranges) { restoreRangeSet.insert(range, 1); @@ -3917,7 +3960,7 @@ public: for (index = 0; index < restoreRanges.size(); index++) { KeyRange restoreIntoRange = KeyRangeRef(restoreRanges[index].begin, restoreRanges[index].end).removePrefix(removePrefix).withPrefix(addPrefix); Standalone existingRows = wait(tr->getRange(restoreIntoRange, 1)); - if (existingRows.size() > 0) { + if (existingRows.size() > 0 && !incrementalBackupOnly) { throw restore_destination_not_empty(); } } @@ -3934,6 +3977,8 @@ public: restore.sourceContainer().set(tr, bc); restore.stateEnum().set(tr, ERestoreState::QUEUED); restore.restoreVersion().set(tr, restoreVersion); + restore.incrementalBackupOnly().set(tr, incrementalBackupOnly); + restore.beginVersion().set(tr, beginVersion); if (BUGGIFY && restoreRanges.size() == 1) { restore.restoreRange().set(tr, restoreRanges[0]); } @@ -4063,7 +4108,7 @@ public: state Key destUidValue = wait(config.destUidValue().getOrThrow(tr)); EBackupState status = wait(config.stateEnum().getD(tr, false, EBackupState::STATE_NEVERRAN)); - if (!backupAgent->isRunnable((BackupAgentBase::enumState)status)) { + if (!backupAgent->isRunnable(status)) { throw backup_unneeded(); } @@ -4166,13 +4211,13 @@ public: JsonBuilderObject statusDoc; statusDoc.setKey("Name", BackupAgentBase::getStateName(backupState)); statusDoc.setKey("Description", BackupAgentBase::getStateText(backupState)); - statusDoc.setKey("Completed", backupState == BackupAgentBase::STATE_COMPLETED); + statusDoc.setKey("Completed", backupState == EBackupState::STATE_COMPLETED); statusDoc.setKey("Running", BackupAgentBase::isRunnable(backupState)); doc.setKey("Status", statusDoc); state Future done = Void(); - if(backupState != BackupAgentBase::STATE_NEVERRAN) { + if (backupState != EBackupState::STATE_NEVERRAN) { state Reference bc; state TimestampedVersion latestRestorable; @@ -4184,7 +4229,7 @@ public: if(latestRestorable.present()) { JsonBuilderObject o = latestRestorable.toJSON(); - if(backupState != BackupAgentBase::STATE_COMPLETED) { + if (backupState != EBackupState::STATE_COMPLETED) { o.setKey("LagSeconds", (recentReadVersion - latestRestorable.version.get()) / CLIENT_KNOBS->CORE_VERSIONSPERSECOND); } doc.setKey("LatestRestorablePoint", o); @@ -4192,7 +4237,8 @@ public: doc.setKey("DestinationURL", bc->getURL()); } - if(backupState == BackupAgentBase::STATE_RUNNING_DIFFERENTIAL || backupState == BackupAgentBase::STATE_RUNNING) { + if (backupState == EBackupState::STATE_RUNNING_DIFFERENTIAL || + backupState == EBackupState::STATE_RUNNING) { state int64_t snapshotInterval; state int64_t logBytesWritten; state int64_t rangeBytesWritten; @@ -4315,23 +4361,28 @@ public: bool snapshotProgress = false; switch (backupState) { - case BackupAgentBase::STATE_SUBMITTED: - statusText += "The backup on tag `" + tagName + "' is in progress (just started) to " + bc->getURL() + ".\n"; - break; - case BackupAgentBase::STATE_RUNNING: - statusText += "The backup on tag `" + tagName + "' is in progress to " + bc->getURL() + ".\n"; - snapshotProgress = true; - break; - case BackupAgentBase::STATE_RUNNING_DIFFERENTIAL: - statusText += "The backup on tag `" + tagName + "' is restorable but continuing to " + bc->getURL() + ".\n"; - snapshotProgress = true; - break; - case BackupAgentBase::STATE_COMPLETED: - statusText += "The previous backup on tag `" + tagName + "' at " + bc->getURL() + " completed at version " + format("%lld", latestRestorableVersion.orDefault(-1)) + ".\n"; - break; - default: - statusText += "The previous backup on tag `" + tagName + "' at " + bc->getURL() + " " + backupStatus + ".\n"; - break; + case EBackupState::STATE_SUBMITTED: + statusText += "The backup on tag `" + tagName + "' is in progress (just started) to " + + bc->getURL() + ".\n"; + break; + case EBackupState::STATE_RUNNING: + statusText += "The backup on tag `" + tagName + "' is in progress to " + bc->getURL() + ".\n"; + snapshotProgress = true; + break; + case EBackupState::STATE_RUNNING_DIFFERENTIAL: + statusText += "The backup on tag `" + tagName + "' is restorable but continuing to " + + bc->getURL() + ".\n"; + snapshotProgress = true; + break; + case EBackupState::STATE_COMPLETED: + statusText += "The previous backup on tag `" + tagName + "' at " + bc->getURL() + + " completed at version " + format("%lld", latestRestorableVersion.orDefault(-1)) + + ".\n"; + break; + default: + statusText += "The previous backup on tag `" + tagName + "' at " + bc->getURL() + " " + + backupStatus + ".\n"; + break; } statusText += format("BackupUID: %s\n", uidAndAbortedFlag.get().first.toString().c_str()); statusText += format("BackupURL: %s\n", bc->getURL().c_str()); @@ -4367,7 +4418,7 @@ public: ); statusText += format("Snapshot interval is %lld seconds. ", snapshotInterval); - if(backupState == BackupAgentBase::STATE_RUNNING_DIFFERENTIAL) + if (backupState == EBackupState::STATE_RUNNING_DIFFERENTIAL) statusText += format("Current snapshot progress target is %3.2f%% (>100%% means the snapshot is supposed to be done)\n", 100.0 * (recentReadVersion - snapshotBeginVersion) / (snapshotTargetEndVersion - snapshotBeginVersion)) ; else statusText += "The initial snapshot is still running.\n"; @@ -4451,7 +4502,8 @@ public: ACTOR static Future restore(FileBackupAgent* backupAgent, Database cx, Optional cxOrig, Key tagName, Key url, Standalone> ranges, bool waitForComplete, Version targetVersion, bool verbose, Key addPrefix, - Key removePrefix, bool lockDB, UID randomUid) { + Key removePrefix, bool lockDB, bool incrementalBackupOnly, + Version beginVersion, UID randomUid) { state Reference bc = IBackupContainer::openContainer(url.toString()); state BackupDescription desc = wait(bc->describeBackup()); @@ -4463,7 +4515,12 @@ public: if(targetVersion == invalidVersion && desc.maxRestorableVersion.present()) targetVersion = desc.maxRestorableVersion.get(); - Optional restoreSet = wait(bc->getRestoreSet(targetVersion)); + if (targetVersion == invalidVersion && incrementalBackupOnly && desc.contiguousLogEnd.present()) { + targetVersion = desc.contiguousLogEnd.get() - 1; + } + + Optional restoreSet = + wait(bc->getRestoreSet(targetVersion, VectorRef(), incrementalBackupOnly, beginVersion)); if(!restoreSet.present()) { TraceEvent(SevWarn, "FileBackupAgentRestoreNotPossible") @@ -4482,7 +4539,8 @@ public: try { tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->setOption(FDBTransactionOptions::LOCK_AWARE); - wait(submitRestore(backupAgent, tr, tagName, url, ranges, targetVersion, addPrefix, removePrefix, lockDB, randomUid)); + wait(submitRestore(backupAgent, tr, tagName, url, ranges, targetVersion, addPrefix, removePrefix, + lockDB, incrementalBackupOnly, beginVersion, randomUid)); wait(tr->commit()); break; } catch(Error &e) { @@ -4518,7 +4576,7 @@ public: backupConfig = BackupConfig(uidFlag.first); state EBackupState status = wait(backupConfig.stateEnum().getOrThrow(ryw_tr)); - if (status != BackupAgentBase::STATE_RUNNING_DIFFERENTIAL ) { + if (status != EBackupState::STATE_RUNNING_DIFFERENTIAL) { throw backup_duplicate(); } @@ -4619,7 +4677,7 @@ public: } else { TraceEvent("AS_StartRestore"); Version ver = wait(restore(backupAgent, cx, cx, tagName, KeyRef(bc->getURL()), ranges, true, -1, true, - addPrefix, removePrefix, true, randomUid)); + addPrefix, removePrefix, true, false, invalidVersion, randomUid)); return ver; } } @@ -4656,8 +4714,13 @@ Future FileBackupAgent::atomicParallelRestore(Database cx, Key tagName, St return FileBackupAgentImpl::atomicParallelRestore(this, cx, tagName, ranges, addPrefix, removePrefix); } -Future FileBackupAgent::restore(Database cx, Optional cxOrig, Key tagName, Key url, Standalone> ranges, bool waitForComplete, Version targetVersion, bool verbose, Key addPrefix, Key removePrefix, bool lockDB) { - return FileBackupAgentImpl::restore(this, cx, cxOrig, tagName, url, ranges, waitForComplete, targetVersion, verbose, addPrefix, removePrefix, lockDB, deterministicRandom()->randomUniqueID()); +Future FileBackupAgent::restore(Database cx, Optional cxOrig, Key tagName, Key url, + Standalone> ranges, bool waitForComplete, + Version targetVersion, bool verbose, Key addPrefix, Key removePrefix, + bool lockDB, bool incrementalBackupOnly, Version beginVersion) { + return FileBackupAgentImpl::restore(this, cx, cxOrig, tagName, url, ranges, waitForComplete, targetVersion, verbose, + addPrefix, removePrefix, lockDB, incrementalBackupOnly, beginVersion, + deterministicRandom()->randomUniqueID()); } Future FileBackupAgent::atomicRestore(Database cx, Key tagName, Standalone> ranges, Key addPrefix, Key removePrefix) { @@ -4683,9 +4746,9 @@ Future FileBackupAgent::waitRestore(Database cx, Key tagName, boo Future FileBackupAgent::submitBackup(Reference tr, Key outContainer, int snapshotIntervalSeconds, std::string tagName, Standalone> backupRanges, bool stopWhenDone, - bool partitionedLog) { + bool partitionedLog, bool incrementalBackupOnly) { return FileBackupAgentImpl::submitBackup(this, tr, outContainer, snapshotIntervalSeconds, tagName, backupRanges, - stopWhenDone, partitionedLog); + stopWhenDone, partitionedLog, incrementalBackupOnly); } Future FileBackupAgent::discontinueBackup(Reference tr, Key tagName){ @@ -4714,7 +4777,8 @@ void FileBackupAgent::setLastRestorable(Reference tr, tr->set(lastRestorable.pack(tagName), BinaryWriter::toValue(version, Unversioned())); } -Future FileBackupAgent::waitBackup(Database cx, std::string tagName, bool stopWhenDone, Reference *pContainer, UID *pUID) { +Future FileBackupAgent::waitBackup(Database cx, std::string tagName, bool stopWhenDone, + Reference* pContainer, UID* pUID) { return FileBackupAgentImpl::waitBackup(this, cx, tagName, stopWhenDone, pContainer, pUID); } @@ -4963,3 +5027,18 @@ ACTOR Future transformRestoredDatabase(Database cx, Standalonerandom01() < 0.01) { // Simulate blob failures + double i = deterministicRandom()->random01(); + if (i < 0.5) { + throw http_request_failed(); + } else if (i < 0.7) { + throw connection_failed(); + } else if (i < 0.8) { + throw timed_out(); + } else if (i < 0.9) { + throw lookup_failed(); + } + } +} \ No newline at end of file diff --git a/fdbclient/GrvProxyInterface.h b/fdbclient/GrvProxyInterface.h index 06d4b7e946..94820a175f 100644 --- a/fdbclient/GrvProxyInterface.h +++ b/fdbclient/GrvProxyInterface.h @@ -27,6 +27,8 @@ // with RateKeeper to gather health information of the cluster. struct GrvProxyInterface { constexpr static FileIdentifier file_identifier = 8743216; + enum { LocationAwareLoadBalance = 1 }; + enum { AlwaysFresh = 1 }; Optional processId; bool provisional; diff --git a/fdbclient/HTTP.actor.cpp b/fdbclient/HTTP.actor.cpp index 0b02740b17..e61d203444 100644 --- a/fdbclient/HTTP.actor.cpp +++ b/fdbclient/HTTP.actor.cpp @@ -72,7 +72,7 @@ namespace HTTP { } PacketBuffer * writeRequestHeader(std::string const &verb, std::string const &resource, HTTP::Headers const &headers, PacketBuffer *dest) { - PacketWriter writer(dest, NULL, Unversioned()); + PacketWriter writer(dest, nullptr, Unversioned()); writer.serializeBytes(verb); writer.serializeBytes(" ", 1); writer.serializeBytes(resource); @@ -238,7 +238,7 @@ namespace HTTP { { // Read the line that contains the chunk length as text in hex size_t lineLen = wait(read_delimited_into_string(conn, "\r\n", &r->content, pos)); - state int chunkLen = strtol(r->content.substr(pos, lineLen).c_str(), NULL, 16); + state int chunkLen = strtol(r->content.substr(pos, lineLen).c_str(), nullptr, 16); // Instead of advancing pos, erase the chunk length header line (line length + delimiter size) from the content buffer r->content.erase(pos, lineLen + 2); @@ -301,7 +301,7 @@ namespace HTTP { state TraceEvent event(SevDebug, "HTTPRequest"); state UnsentPacketQueue empty; - if(pContent == NULL) + if(pContent == nullptr) pContent = ∅ // There is no standard http request id header field, so either a global default can be set via a knob diff --git a/fdbclient/JSONDoc.h b/fdbclient/JSONDoc.h index aafd1bb87f..0c8fe14bba 100644 --- a/fdbclient/JSONDoc.h +++ b/fdbclient/JSONDoc.h @@ -67,11 +67,11 @@ // // The following would throw if a.b.c did not exist, or if it was not an int. // int x = r["a.b.c"].get_int(); struct JSONDoc { - JSONDoc() : pObj(NULL) {} + JSONDoc() : pObj(nullptr) {} // Construction from const json_spirit::mObject, trivial and will never throw. // Resulting JSONDoc will not allow modifications. - JSONDoc(const json_spirit::mObject &o) : pObj(&o), wpObj(NULL) {} + JSONDoc(const json_spirit::mObject &o) : pObj(&o), wpObj(nullptr) {} // Construction from json_spirit::mObject. Allows modifications. JSONDoc(json_spirit::mObject &o) : pObj(&o), wpObj(&o) {} @@ -79,7 +79,7 @@ struct JSONDoc { // Construction from const json_spirit::mValue (which is a Variant type) which will try to // convert it to an mObject. This will throw if that fails, just as it would // if the caller called get_obj() itself and used the previous constructor instead. - JSONDoc(const json_spirit::mValue &v) : pObj(&v.get_obj()), wpObj(NULL) {} + JSONDoc(const json_spirit::mValue &v) : pObj(&v.get_obj()), wpObj(nullptr) {} // Construction from non-const json_spirit::mValue - will convert the mValue to // an object if it isn't already and then attach to it. @@ -98,13 +98,13 @@ struct JSONDoc { // path into on the "dot" character. // When a path is found, pLast is updated. bool has(std::string path, bool split=true) { - if (pObj == NULL) + if (pObj == nullptr) return false; if (path.empty()) return false; size_t start = 0; - const json_spirit::mValue *curVal = NULL; + const json_spirit::mValue *curVal = nullptr; while (start < path.size()) { // If a path segment is found then curVal must be an object @@ -140,7 +140,7 @@ struct JSONDoc { // Creates the given path (forcing Objects to exist along its depth, replacing whatever else might have been there) // and returns a reference to the Value at that location. json_spirit::mValue & create(std::string path, bool split=true) { - if (wpObj == NULL || path.empty()) + if (wpObj == nullptr || path.empty()) throw std::runtime_error("JSON Object not writable or bad JSON path"); size_t start = 0; @@ -280,7 +280,7 @@ struct JSONDoc { } const json_spirit::mValue & last() const { return *pLast; } - bool valid() const { return pObj != NULL; } + bool valid() const { return pObj != nullptr; } const json_spirit::mObject & obj() { // This dummy object is necessary to make working with obj() easier when this does not currently @@ -304,7 +304,7 @@ struct JSONDoc { static uint64_t expires_reference_version; private: const json_spirit::mObject *pObj; - // Writeable pointer to the same object. Will be NULL if initialized from a const object. + // Writeable pointer to the same object. Will be nullptr if initialized from a const object. json_spirit::mObject *wpObj; const json_spirit::mValue *pLast; }; diff --git a/fdbclient/Knobs.cpp b/fdbclient/Knobs.cpp index c2e99f63fb..e194f8827e 100644 --- a/fdbclient/Knobs.cpp +++ b/fdbclient/Knobs.cpp @@ -52,7 +52,7 @@ void ClientKnobs::initialize(bool randomize) { init( COORDINATOR_RECONNECTION_DELAY, 1.0 ); init( CLIENT_EXAMPLE_AMOUNT, 20 ); init( MAX_CLIENT_STATUS_AGE, 1.0 ); - init( MAX_MASTER_PROXY_CONNECTIONS, 5 ); if( randomize && BUGGIFY ) MAX_MASTER_PROXY_CONNECTIONS = 1; + init( MAX_COMMIT_PROXY_CONNECTIONS, 5 ); if( randomize && BUGGIFY ) MAX_COMMIT_PROXY_CONNECTIONS = 1; init( MAX_GRV_PROXY_CONNECTIONS, 3 ); if( randomize && BUGGIFY ) MAX_GRV_PROXY_CONNECTIONS = 1; init( STATUS_IDLE_TIMEOUT, 120.0 ); @@ -104,7 +104,7 @@ void ClientKnobs::initialize(bool randomize) { init( WATCH_POLLING_TIME, 1.0 ); if( randomize && BUGGIFY ) WATCH_POLLING_TIME = 5.0; init( NO_RECENT_UPDATES_DURATION, 20.0 ); if( randomize && BUGGIFY ) NO_RECENT_UPDATES_DURATION = 0.1; init( FAST_WATCH_TIMEOUT, 20.0 ); if( randomize && BUGGIFY ) FAST_WATCH_TIMEOUT = 1.0; - init( WATCH_TIMEOUT, 900.0 ); if( randomize && BUGGIFY ) WATCH_TIMEOUT = 20.0; + init( WATCH_TIMEOUT, 30.0 ); if( randomize && BUGGIFY ) WATCH_TIMEOUT = 20.0; // Core init( CORE_VERSIONSPERSECOND, 1e6 ); @@ -171,10 +171,12 @@ void ClientKnobs::initialize(bool randomize) { init( MIN_CLEANUP_SECONDS, 3600.0 ); // Configuration - init( DEFAULT_AUTO_PROXIES, 3 ); + init( DEFAULT_AUTO_COMMIT_PROXIES, 3 ); init( DEFAULT_AUTO_GRV_PROXIES, 1 ); init( DEFAULT_AUTO_RESOLVERS, 1 ); init( DEFAULT_AUTO_LOGS, 3 ); + init( DEFAULT_COMMIT_GRV_PROXIES_RATIO, 3 ); + init( DEFAULT_MAX_GRV_PROXIES, 4 ); init( IS_ACCEPTABLE_DELAY, 1.5 ); diff --git a/fdbclient/Knobs.h b/fdbclient/Knobs.h index c1d02b9a64..11a06d52cc 100644 --- a/fdbclient/Knobs.h +++ b/fdbclient/Knobs.h @@ -27,8 +27,6 @@ class ClientKnobs : public Knobs { public: - int BYTE_LIMIT_UNLIMITED; - int ROW_LIMIT_UNLIMITED; int TOO_MANY; // FIXME: this should really be split up so we can control these more specifically @@ -48,7 +46,7 @@ public: double COORDINATOR_RECONNECTION_DELAY; int CLIENT_EXAMPLE_AMOUNT; double MAX_CLIENT_STATUS_AGE; - int MAX_MASTER_PROXY_CONNECTIONS; + int MAX_COMMIT_PROXY_CONNECTIONS; int MAX_GRV_PROXY_CONNECTIONS; double STATUS_IDLE_TIMEOUT; @@ -169,8 +167,10 @@ public: double MIN_CLEANUP_SECONDS; // Configuration - int32_t DEFAULT_AUTO_PROXIES; + int32_t DEFAULT_AUTO_COMMIT_PROXIES; int32_t DEFAULT_AUTO_GRV_PROXIES; + int32_t DEFAULT_COMMIT_GRV_PROXIES_RATIO; + int32_t DEFAULT_MAX_GRV_PROXIES; int32_t DEFAULT_AUTO_RESOLVERS; int32_t DEFAULT_AUTO_LOGS; diff --git a/fdbclient/ManagementAPI.actor.cpp b/fdbclient/ManagementAPI.actor.cpp index a05fce601e..2dd16588df 100644 --- a/fdbclient/ManagementAPI.actor.cpp +++ b/fdbclient/ManagementAPI.actor.cpp @@ -19,8 +19,10 @@ */ #include +#include #include +#include "fdbclient/Knobs.h" #include "flow/Arena.h" #include "fdbclient/FDBOptions.g.h" #include "fdbclient/FDBTypes.h" @@ -33,6 +35,7 @@ #include "fdbclient/DatabaseContext.h" #include "fdbrpc/simulator.h" #include "fdbclient/StatusClient.h" +#include "flow/Trace.h" #include "flow/UnitTest.h" #include "fdbrpc/ReplicationPolicy.h" #include "fdbrpc/Replication.h" @@ -78,8 +81,42 @@ std::map configForToken( std::string const& mode ) { std::string key = mode.substr(0, pos); std::string value = mode.substr(pos+1); - if ((key == "logs" || key == "proxies" || key == "grv_proxies" || key == "resolvers" || key == "remote_logs" || - key == "log_routers" || key == "usable_regions" || key == "repopulate_anti_quorum") && + if (key == "proxies" && isInteger(value)) { + printf("Warning: Proxy role is being split into GRV Proxy and Commit Proxy, now prefer configuring " + "'grv_proxies' and 'commit_proxies' separately. Generally we should follow that 'commit_proxies'" + " is three times of 'grv_proxies' count and 'grv_proxies' should be not more than 4.\n"); + int proxiesCount = atoi(value.c_str()); + if (proxiesCount == -1) { + proxiesCount = CLIENT_KNOBS->DEFAULT_AUTO_GRV_PROXIES + CLIENT_KNOBS->DEFAULT_AUTO_COMMIT_PROXIES; + ASSERT_WE_THINK(proxiesCount >= 2); + } + + if (proxiesCount < 2) { + printf("Error: At least 2 proxies (1 GRV proxy and 1 Commit proxy) are required.\n"); + return out; + } + + int grvProxyCount = + std::max(1, std::min(CLIENT_KNOBS->DEFAULT_MAX_GRV_PROXIES, + proxiesCount / (CLIENT_KNOBS->DEFAULT_COMMIT_GRV_PROXIES_RATIO + 1))); + int commitProxyCount = proxiesCount - grvProxyCount; + ASSERT_WE_THINK(grvProxyCount >= 1 && commitProxyCount >= 1); + + out[p + "grv_proxies"] = std::to_string(grvProxyCount); + out[p + "commit_proxies"] = std::to_string(commitProxyCount); + printf("%d proxies are automatically converted into %d GRV proxies and %d Commit proxies.\n", proxiesCount, + grvProxyCount, commitProxyCount); + + TraceEvent("DatabaseConfigurationProxiesSpecified") + .detail("SpecifiedProxies", atoi(value.c_str())) + .detail("EffectiveSpecifiedProxies", proxiesCount) + .detail("ConvertedGrvProxies", grvProxyCount) + .detail("ConvertedCommitProxies", commitProxyCount); + } + + if ((key == "logs" || key == "commit_proxies" || key == "grv_proxies" || key == "resolvers" || + key == "remote_logs" || key == "log_routers" || key == "usable_regions" || + key == "repopulate_anti_quorum") && isInteger(value)) { out[p+key] = value; } @@ -229,7 +266,8 @@ std::map configForToken( std::string const& mode ) { return out; } -ConfigurationResult::Type buildConfiguration( std::vector const& modeTokens, std::map& outConf ) { +ConfigurationResult buildConfiguration(std::vector const& modeTokens, + std::map& outConf) { for(auto it : modeTokens) { std::string mode = it.toString(); auto m = configForToken( mode ); @@ -265,7 +303,7 @@ ConfigurationResult::Type buildConfiguration( std::vector const& mode return ConfigurationResult::SUCCESS; } -ConfigurationResult::Type buildConfiguration( std::string const& configMode, std::map& outConf ) { +ConfigurationResult buildConfiguration(std::string const& configMode, std::map& outConf) { std::vector modes; int p = 0; @@ -305,7 +343,7 @@ ACTOR Future getDatabaseConfiguration( Database cx ) { } } -ACTOR Future changeConfig( Database cx, std::map m, bool force ) { +ACTOR Future changeConfig(Database cx, std::map m, bool force) { state StringRef initIdKey = LiteralStringRef( "\xff/init_id" ); state Transaction tr(cx); @@ -656,7 +694,7 @@ ConfigureAutoResult parseConfig( StatusObject const& status ) { } if (processClass.classType() == ProcessClass::TransactionClass || - processClass.classType() == ProcessClass::ProxyClass || + processClass.classType() == ProcessClass::CommitProxyClass || processClass.classType() == ProcessClass::GrvProxyClass || processClass.classType() == ProcessClass::ResolutionClass || processClass.classType() == ProcessClass::StatelessClass || @@ -701,7 +739,7 @@ ConfigureAutoResult parseConfig( StatusObject const& status ) { if (proc.second == ProcessClass::StatelessClass) { existingStatelessCount++; } - if(proc.second == ProcessClass::ProxyClass) { + if (proc.second == ProcessClass::CommitProxyClass) { existingProxyCount++; } if (proc.second == ProcessClass::GrvProxyClass) { @@ -734,19 +772,18 @@ ConfigureAutoResult parseConfig( StatusObject const& status ) { resolverCount = result.old_resolvers; } - result.desired_proxies = std::max(std::min(12, processCount / 15), 1); + result.desired_commit_proxies = std::max(std::min(12, processCount / 15), 1); int proxyCount; - if (!statusObjConfig.get("proxies", result.old_proxies)) { - result.old_proxies = CLIENT_KNOBS->DEFAULT_AUTO_PROXIES; - statusObjConfig.get("auto_proxies", result.old_proxies); - result.auto_proxies = result.desired_proxies; - proxyCount = result.auto_proxies; + if (!statusObjConfig.get("commit_proxies", result.old_commit_proxies)) { + result.old_commit_proxies = CLIENT_KNOBS->DEFAULT_AUTO_COMMIT_PROXIES; + statusObjConfig.get("auto_commit_proxies", result.old_commit_proxies); + result.auto_commit_proxies = result.desired_commit_proxies; + proxyCount = result.auto_commit_proxies; } else { - result.auto_proxies = result.old_proxies; - proxyCount = result.old_proxies; + result.auto_commit_proxies = result.old_commit_proxies; + proxyCount = result.old_commit_proxies; } - // Need to configure a good number. result.desired_grv_proxies = std::max(std::min(4, processCount / 20), 1); int grvProxyCount; if (!statusObjConfig.get("grv_proxies", result.old_grv_proxies)) { @@ -823,7 +860,7 @@ ConfigureAutoResult parseConfig( StatusObject const& status ) { return result; } -ACTOR Future autoConfig( Database cx, ConfigureAutoResult conf ) { +ACTOR Future autoConfig(Database cx, ConfigureAutoResult conf) { state Transaction tr(cx); state Key versionKey = BinaryWriter::toValue(deterministicRandom()->randomUniqueID(),Unversioned()); @@ -857,8 +894,8 @@ ACTOR Future autoConfig( Database cx, ConfigureAutoRe if (conf.auto_logs != conf.old_logs) tr.set(configKeysPrefix.toString() + "auto_logs", format("%d", conf.auto_logs)); - if(conf.auto_proxies != conf.old_proxies) - tr.set(configKeysPrefix.toString() + "auto_proxies", format("%d", conf.auto_proxies)); + if (conf.auto_commit_proxies != conf.old_commit_proxies) + tr.set(configKeysPrefix.toString() + "auto_commit_proxies", format("%d", conf.auto_commit_proxies)); if (conf.auto_grv_proxies != conf.old_grv_proxies) tr.set(configKeysPrefix.toString() + "auto_grv_proxies", format("%d", conf.auto_grv_proxies)); @@ -890,7 +927,8 @@ ACTOR Future autoConfig( Database cx, ConfigureAutoRe } } -Future changeConfig( Database const& cx, std::vector const& modes, Optional const& conf, bool force ) { +Future changeConfig(Database const& cx, std::vector const& modes, + Optional const& conf, bool force) { if( modes.size() && modes[0] == LiteralStringRef("auto") && conf.present() ) { return autoConfig(cx, conf.get()); } @@ -902,7 +940,7 @@ Future changeConfig( Database const& cx, std::vector< return changeConfig(cx, m, force); } -Future changeConfig( Database const& cx, std::string const& modes, bool force ) { +Future changeConfig(Database const& cx, std::string const& modes, bool force) { TraceEvent("ChangeConfig").detail("Mode", modes); std::map m; auto r = buildConfiguration( modes, m ); @@ -971,7 +1009,7 @@ ACTOR Future> getCoordinators( Database cx ) { } } -ACTOR Future changeQuorum( Database cx, Reference change ) { +ACTOR Future changeQuorum(Database cx, Reference change) { state Transaction tr(cx); state int retries = 0; state std::vector desiredCoordinators; @@ -991,7 +1029,7 @@ ACTOR Future changeQuorum( Database cx, ReferencegetConnectionFile() && old.clusterKeyName().toString() != cx->getConnectionFile()->getConnectionString().clusterKeyName() ) return CoordinatorsResult::BAD_DATABASE_STATE; // Someone changed the "name" of the database?? - state CoordinatorsResult::Type result = CoordinatorsResult::SUCCESS; + state CoordinatorsResult result = CoordinatorsResult::SUCCESS; if(!desiredCoordinators.size()) { std::vector _desiredCoordinators = wait( change->getDesiredCoordinators( &tr, old.coordinators(), Reference(new ClusterConnectionFile(old)), result ) ); desiredCoordinators = _desiredCoordinators; @@ -1061,14 +1099,20 @@ ACTOR Future changeQuorum( Database cx, Reference desired; explicit SpecifiedQuorumChange( vector const& desired ) : desired(desired) {} - virtual Future> getDesiredCoordinators( Transaction* tr, vector oldCoordinators, Reference, CoordinatorsResult::Type& ) { + virtual Future> getDesiredCoordinators(Transaction* tr, + vector oldCoordinators, + Reference, + CoordinatorsResult&) { return desired; } }; Reference specifiedQuorumChange(vector const& addresses) { return Reference(new SpecifiedQuorumChange(addresses)); } struct NoQuorumChange : IQuorumChange { - virtual Future> getDesiredCoordinators( Transaction* tr, vector oldCoordinators, Reference, CoordinatorsResult::Type& ) { + virtual Future> getDesiredCoordinators(Transaction* tr, + vector oldCoordinators, + Reference, + CoordinatorsResult&) { return oldCoordinators; } }; @@ -1078,7 +1122,10 @@ struct NameQuorumChange : IQuorumChange { std::string newName; Reference otherChange; explicit NameQuorumChange( std::string const& newName, Reference const& otherChange ) : newName(newName), otherChange(otherChange) {} - virtual Future> getDesiredCoordinators( Transaction* tr, vector oldCoordinators, Reference cf, CoordinatorsResult::Type& t ) { + virtual Future> getDesiredCoordinators(Transaction* tr, + vector oldCoordinators, + Reference cf, + CoordinatorsResult& t) { return otherChange->getDesiredCoordinators(tr, oldCoordinators, cf, t); } virtual std::string getDesiredClusterKeyName() { @@ -1093,7 +1140,10 @@ struct AutoQuorumChange : IQuorumChange { int desired; explicit AutoQuorumChange( int desired ) : desired(desired) {} - virtual Future> getDesiredCoordinators( Transaction* tr, vector oldCoordinators, Reference ccf, CoordinatorsResult::Type& err ) { + virtual Future> getDesiredCoordinators(Transaction* tr, + vector oldCoordinators, + Reference ccf, + CoordinatorsResult& err) { return getDesired( this, tr, oldCoordinators, ccf, &err ); } @@ -1145,7 +1195,10 @@ struct AutoQuorumChange : IQuorumChange { return true; // The status quo seems fine } - ACTOR static Future> getDesired( AutoQuorumChange* self, Transaction* tr, vector oldCoordinators, Reference ccf, CoordinatorsResult::Type* err ) { + ACTOR static Future> getDesired(AutoQuorumChange* self, Transaction* tr, + vector oldCoordinators, + Reference ccf, + CoordinatorsResult* err) { state int desiredCount = self->desired; if(desiredCount == -1) { diff --git a/fdbclient/ManagementAPI.actor.h b/fdbclient/ManagementAPI.actor.h index 20b2a447d9..660cbac4b7 100644 --- a/fdbclient/ManagementAPI.actor.h +++ b/fdbclient/ManagementAPI.actor.h @@ -43,41 +43,35 @@ standard API and some knowledge of the contents of the system key space. // ConfigurationResult enumerates normal outcomes of changeConfig() and various error // conditions specific to it. changeConfig may also throw an Error to report other problems. -class ConfigurationResult { -public: - enum Type { - NO_OPTIONS_PROVIDED, - CONFLICTING_OPTIONS, - UNKNOWN_OPTION, - INCOMPLETE_CONFIGURATION, - INVALID_CONFIGURATION, - DATABASE_ALREADY_CREATED, - DATABASE_CREATED, - DATABASE_UNAVAILABLE, - STORAGE_IN_UNKNOWN_DCID, - REGION_NOT_FULLY_REPLICATED, - MULTIPLE_ACTIVE_REGIONS, - REGIONS_CHANGED, - NOT_ENOUGH_WORKERS, - REGION_REPLICATION_MISMATCH, - DCID_MISSING, - LOCKED_NOT_NEW, - SUCCESS, - }; +enum class ConfigurationResult { + NO_OPTIONS_PROVIDED, + CONFLICTING_OPTIONS, + UNKNOWN_OPTION, + INCOMPLETE_CONFIGURATION, + INVALID_CONFIGURATION, + DATABASE_ALREADY_CREATED, + DATABASE_CREATED, + DATABASE_UNAVAILABLE, + STORAGE_IN_UNKNOWN_DCID, + REGION_NOT_FULLY_REPLICATED, + MULTIPLE_ACTIVE_REGIONS, + REGIONS_CHANGED, + NOT_ENOUGH_WORKERS, + REGION_REPLICATION_MISMATCH, + DCID_MISSING, + LOCKED_NOT_NEW, + SUCCESS, }; -class CoordinatorsResult { -public: - enum Type { - INVALID_NETWORK_ADDRESSES, - SAME_NETWORK_ADDRESSES, - NOT_COORDINATORS, //FIXME: not detected - DATABASE_UNREACHABLE, //FIXME: not detected - BAD_DATABASE_STATE, - COORDINATOR_UNREACHABLE, - NOT_ENOUGH_MACHINES, - SUCCESS - }; +enum class CoordinatorsResult { + INVALID_NETWORK_ADDRESSES, + SAME_NETWORK_ADDRESSES, + NOT_COORDINATORS, // FIXME: not detected + DATABASE_UNREACHABLE, // FIXME: not detected + BAD_DATABASE_STATE, + COORDINATOR_UNREACHABLE, + NOT_ENOUGH_MACHINES, + SUCCESS }; struct ConfigureAutoResult { @@ -86,7 +80,7 @@ struct ConfigureAutoResult { int32_t machines; std::string old_replication; - int32_t old_proxies; + int32_t old_commit_proxies; int32_t old_grv_proxies; int32_t old_resolvers; int32_t old_logs; @@ -94,38 +88,46 @@ struct ConfigureAutoResult { int32_t old_machines_with_transaction; std::string auto_replication; - int32_t auto_proxies; + int32_t auto_commit_proxies; int32_t auto_grv_proxies; int32_t auto_resolvers; int32_t auto_logs; int32_t auto_processes_with_transaction; int32_t auto_machines_with_transaction; - int32_t desired_proxies; + int32_t desired_commit_proxies; int32_t desired_grv_proxies; int32_t desired_resolvers; int32_t desired_logs; ConfigureAutoResult() - : processes(-1), machines(-1), old_proxies(-1), old_grv_proxies(-1), old_resolvers(-1), old_logs(-1), - old_processes_with_transaction(-1), old_machines_with_transaction(-1), auto_proxies(-1), auto_grv_proxies(-1), - auto_resolvers(-1), auto_logs(-1), auto_processes_with_transaction(-1), auto_machines_with_transaction(-1), - desired_proxies(-1), desired_grv_proxies(-1), desired_resolvers(-1), desired_logs(-1) {} + : processes(-1), machines(-1), old_commit_proxies(-1), old_grv_proxies(-1), old_resolvers(-1), old_logs(-1), + old_processes_with_transaction(-1), old_machines_with_transaction(-1), auto_commit_proxies(-1), + auto_grv_proxies(-1), auto_resolvers(-1), auto_logs(-1), auto_processes_with_transaction(-1), + auto_machines_with_transaction(-1), desired_commit_proxies(-1), desired_grv_proxies(-1), desired_resolvers(-1), + desired_logs(-1) {} bool isValid() const { return processes != -1; } }; -ConfigurationResult::Type buildConfiguration( std::vector const& modeTokens, std::map& outConf ); // Accepts a vector of configuration tokens -ConfigurationResult::Type buildConfiguration( std::string const& modeString, std::map& outConf ); // Accepts tokens separated by spaces in a single string +ConfigurationResult buildConfiguration( + std::vector const& modeTokens, + std::map& outConf); // Accepts a vector of configuration tokens +ConfigurationResult buildConfiguration( + std::string const& modeString, + std::map& outConf); // Accepts tokens separated by spaces in a single string bool isCompleteConfiguration( std::map const& options ); // All versions of changeConfig apply the given set of configuration tokens to the database, and return a ConfigurationResult (or error). -Future changeConfig( Database const& cx, std::string const& configMode, bool force ); // Accepts tokens separated by spaces in a single string +Future changeConfig(Database const& cx, std::string const& configMode, + bool force); // Accepts tokens separated by spaces in a single string ConfigureAutoResult parseConfig( StatusObject const& status ); -Future changeConfig( Database const& cx, std::vector const& modes, Optional const& conf, bool force ); // Accepts a vector of configuration tokens -ACTOR Future changeConfig( +Future changeConfig(Database const& cx, std::vector const& modes, + Optional const& conf, + bool force); // Accepts a vector of configuration tokens +ACTOR Future changeConfig( Database cx, std::map m, bool force); // Accepts a full configuration in key/value format (from buildConfiguration) @@ -134,12 +136,15 @@ ACTOR Future waitForFullReplication(Database cx); struct IQuorumChange : ReferenceCounted { virtual ~IQuorumChange() {} - virtual Future> getDesiredCoordinators( Transaction* tr, vector oldCoordinators, Reference, CoordinatorsResult::Type& ) = 0; + virtual Future> getDesiredCoordinators(Transaction* tr, + vector oldCoordinators, + Reference, + CoordinatorsResult&) = 0; virtual std::string getDesiredClusterKeyName() { return std::string(); } }; // Change to use the given set of coordination servers -ACTOR Future changeQuorum(Database cx, Reference change); +ACTOR Future changeQuorum(Database cx, Reference change); Reference autoQuorumChange(int desired = -1); Reference noQuorumChange(); Reference specifiedQuorumChange(vector const&); diff --git a/fdbclient/MetricLogger.actor.cpp b/fdbclient/MetricLogger.actor.cpp index 7b5a16cb97..8d6778545a 100644 --- a/fdbclient/MetricLogger.actor.cpp +++ b/fdbclient/MetricLogger.actor.cpp @@ -171,7 +171,7 @@ ACTOR Future metricRuleUpdater(Database cx, MetricsConfig *config, TDMetri // Implementation of IMetricDB class MetricDB : public IMetricDB { public: - MetricDB(ReadYourWritesTransaction *tr = NULL) : tr(tr) {} + MetricDB(ReadYourWritesTransaction *tr = nullptr) : tr(tr) {} ~MetricDB() {} // levelKey is the prefix for the entire level, no timestamp at the end diff --git a/fdbclient/MonitorLeader.actor.cpp b/fdbclient/MonitorLeader.actor.cpp index 1e13b18560..e3ac757840 100644 --- a/fdbclient/MonitorLeader.actor.cpp +++ b/fdbclient/MonitorLeader.actor.cpp @@ -624,8 +624,8 @@ ACTOR Future getClientInfoFromLeader( Referenceget().get().clientInterface.openDatabase.getReply( req ) ) ) ) { TraceEvent("MonitorLeaderForProxiesGotClientInfo", knownLeader->get().get().clientInterface.id()) - .detail("MasterProxy0", ni.masterProxies.size() ? ni.masterProxies[0].id() : UID()) - .detail("GrvProxy0", ni.grvProxies.size() ? ni.grvProxies[0].id() : UID()) + .detail("CommitProxy0", ni.commitProxies.size() ? ni.commitProxies[0].id() : UID()) + .detail("GrvProxy0", ni.grvProxies.size() ? ni.grvProxies[0].id() : UID()) .detail("ClientID", ni.id); clientData->clientInfo->set(CachedSerialization(ni)); } @@ -681,24 +681,25 @@ ACTOR Future monitorLeaderForProxies( Key clusterKey, vector& lastMasterProxyUIDs, std::vector& lastMasterProxies, - std::vector& lastGrvProxyUIDs, std::vector& lastGrvProxies) { - if(ni.masterProxies.size() > CLIENT_KNOBS->MAX_MASTER_PROXY_CONNECTIONS) { - std::vector masterProxyUIDs; - for(auto& masterProxy : ni.masterProxies) { - masterProxyUIDs.push_back(masterProxy.id()); +void shrinkProxyList(ClientDBInfo& ni, std::vector& lastCommitProxyUIDs, + std::vector& lastCommitProxies, std::vector& lastGrvProxyUIDs, + std::vector& lastGrvProxies) { + if (ni.commitProxies.size() > CLIENT_KNOBS->MAX_COMMIT_PROXY_CONNECTIONS) { + std::vector commitProxyUIDs; + for (auto& commitProxy : ni.commitProxies) { + commitProxyUIDs.push_back(commitProxy.id()); } - if(masterProxyUIDs != lastMasterProxyUIDs) { - lastMasterProxyUIDs.swap(masterProxyUIDs); - lastMasterProxies = ni.masterProxies; - deterministicRandom()->randomShuffle(lastMasterProxies); - lastMasterProxies.resize(CLIENT_KNOBS->MAX_MASTER_PROXY_CONNECTIONS); - for(int i = 0; i < lastMasterProxies.size(); i++) { - TraceEvent("ConnectedMasterProxy").detail("MasterProxy", lastMasterProxies[i].id()); + if (commitProxyUIDs != lastCommitProxyUIDs) { + lastCommitProxyUIDs.swap(commitProxyUIDs); + lastCommitProxies = ni.commitProxies; + deterministicRandom()->randomShuffle(lastCommitProxies); + lastCommitProxies.resize(CLIENT_KNOBS->MAX_COMMIT_PROXY_CONNECTIONS); + for (int i = 0; i < lastCommitProxies.size(); i++) { + TraceEvent("ConnectedCommitProxy").detail("CommitProxy", lastCommitProxies[i].id()); } } - ni.firstProxy = ni.masterProxies[0]; - ni.masterProxies = lastMasterProxies; + ni.firstCommitProxy = ni.commitProxies[0]; + ni.commitProxies = lastCommitProxies; } if(ni.grvProxies.size() > CLIENT_KNOBS->MAX_GRV_PROXY_CONNECTIONS) { std::vector grvProxyUIDs; @@ -719,14 +720,16 @@ void shrinkProxyList( ClientDBInfo& ni, std::vector& lastMasterProxyUIDs, s } // Leader is the process that will be elected by coordinators as the cluster controller -ACTOR Future monitorProxiesOneGeneration( Reference connFile, Reference> clientInfo, MonitorLeaderInfo info, Reference>>> supportedVersions, Key traceLogGroup) { +ACTOR Future monitorProxiesOneGeneration( + Reference connFile, Reference> clientInfo, MonitorLeaderInfo info, + Reference>>> supportedVersions, Key traceLogGroup) { state ClusterConnectionString cs = info.intermediateConnFile->getConnectionString(); state vector addrs = cs.coordinators(); state int idx = 0; state int successIdx = 0; state Optional incorrectTime; - state std::vector lastProxyUIDs; - state std::vector lastProxies; + state std::vector lastCommitProxyUIDs; + state std::vector lastCommitProxies; state std::vector lastGrvProxyUIDs; state std::vector lastGrvProxies; @@ -780,7 +783,7 @@ ACTOR Future monitorProxiesOneGeneration( ReferencenotifyConnected(); auto& ni = rep.get().mutate(); - shrinkProxyList(ni, lastProxyUIDs, lastProxies, lastGrvProxyUIDs, lastGrvProxies); + shrinkProxyList(ni, lastCommitProxyUIDs, lastCommitProxies, lastGrvProxyUIDs, lastGrvProxies); clientInfo->set( ni ); successIdx = idx; } else { diff --git a/fdbclient/MonitorLeader.h b/fdbclient/MonitorLeader.h index 58f1fd3bbd..643cf361c7 100644 --- a/fdbclient/MonitorLeader.h +++ b/fdbclient/MonitorLeader.h @@ -25,7 +25,7 @@ #include "fdbclient/FDBTypes.h" #include "fdbclient/CoordinationInterface.h" #include "fdbclient/ClusterInterface.h" -#include "fdbclient/MasterProxyInterface.h" +#include "fdbclient/CommitProxyInterface.h" #define CLUSTER_FILE_ENV_VAR_NAME "FDB_CLUSTER_FILE" @@ -67,8 +67,9 @@ Future monitorLeaderForProxies( Value const& key, vector c Future monitorProxies( Reference>> const& connFile, Reference> const& clientInfo, Reference>>> const& supportedVersions, Key const& traceLogGroup ); -void shrinkProxyList( ClientDBInfo& ni, std::vector& lastMasterProxyUIDs, std::vector& lastMasterProxies, - std::vector& lastGrvProxyUIDs, std::vector& lastGrvProxies); +void shrinkProxyList(ClientDBInfo& ni, std::vector& lastCommitProxyUIDs, + std::vector& lastCommitProxies, std::vector& lastGrvProxyUIDs, + std::vector& lastGrvProxies); #ifndef __INTEL_COMPILER #pragma region Implementation diff --git a/fdbclient/MultiVersionAssignmentVars.h b/fdbclient/MultiVersionAssignmentVars.h index b4c84f11b9..ef63c4d0f7 100644 --- a/fdbclient/MultiVersionAssignmentVars.h +++ b/fdbclient/MultiVersionAssignmentVars.h @@ -163,7 +163,7 @@ public: if(destroyNow) { api->futureDestroy(f); - f = NULL; + f = nullptr; } return destroyNow; @@ -202,7 +202,7 @@ public: auto sav = (DLThreadSingleAssignmentVar*)param; if(MultiVersionApi::api->callbackOnMainThread) { - onMainThreadVoid([sav](){ sav->apply(); }, NULL); + onMainThreadVoid([sav](){ sav->apply(); }, nullptr); } else { sav->apply(); diff --git a/fdbclient/MultiVersionTransaction.actor.cpp b/fdbclient/MultiVersionTransaction.actor.cpp index 7dedd2f267..e8c0a6aa27 100644 --- a/fdbclient/MultiVersionTransaction.actor.cpp +++ b/fdbclient/MultiVersionTransaction.actor.cpp @@ -224,7 +224,7 @@ ThreadFuture DLTransaction::getApproximateSize() { } void DLTransaction::setOption(FDBTransactionOptions::Option option, Optional value) { - throwIfError(api->transactionSetOption(tr, option, value.present() ? value.get().begin() : NULL, value.present() ? value.get().size() : 0)); + throwIfError(api->transactionSetOption(tr, option, value.present() ? value.get().begin() : nullptr, value.present() ? value.get().size() : 0)); } ThreadFuture DLTransaction::onError(Error const& e) { @@ -262,14 +262,14 @@ Reference DLDatabase::createTransaction() { } void DLDatabase::setOption(FDBDatabaseOptions::Option option, Optional value) { - throwIfError(api->databaseSetOption(db, option, value.present() ? value.get().begin() : NULL, value.present() ? value.get().size() : 0)); + throwIfError(api->databaseSetOption(db, option, value.present() ? value.get().begin() : nullptr, value.present() ? value.get().size() : 0)); } // DLApi template void loadClientFunction(T *fp, void *lib, std::string libPath, const char *functionName, bool requireFunction = true) { *(void**)(fp) = loadFunction(lib, functionName); - if(*fp == NULL && requireFunction) { + if(*fp == nullptr && requireFunction) { TraceEvent(SevError, "ErrorLoadingFunction").detail("LibraryPath", libPath).detail("Function", functionName); throw platform_error(); } @@ -283,7 +283,7 @@ void DLApi::init() { } void* lib = loadLibrary(fdbCPath.c_str()); - if(lib == NULL) { + if(lib == nullptr) { TraceEvent(SevError, "ErrorLoadingExternalClientLibrary").detail("LibraryPath", fdbCPath); throw platform_error(); } @@ -347,7 +347,7 @@ void DLApi::selectApiVersion(int apiVersion) { init(); throwIfError(api->selectApiVersion(apiVersion, headerVersion)); - throwIfError(api->setNetworkOption(FDBNetworkOptions::EXTERNAL_CLIENT, NULL, 0)); + throwIfError(api->setNetworkOption(FDBNetworkOptions::EXTERNAL_CLIENT, nullptr, 0)); } const char* DLApi::getClientVersion() { @@ -359,7 +359,7 @@ const char* DLApi::getClientVersion() { } void DLApi::setNetworkOption(FDBNetworkOptions::Option option, Optional value) { - throwIfError(api->setNetworkOption(option, value.present() ? value.get().begin() : NULL, value.present() ? value.get().size() : 0)); + throwIfError(api->setNetworkOption(option, value.present() ? value.get().begin() : nullptr, value.present() ? value.get().size() : 0)); } void DLApi::setupNetwork() { @@ -786,7 +786,7 @@ void MultiVersionDatabase::Connector::connect() { else { delref(); } - }, NULL); + }, nullptr); } // Only called from main thread @@ -805,7 +805,7 @@ void MultiVersionDatabase::Connector::fire(const Void &unused, int& userParam) { dbState->stateChanged(); } delref(); - }, NULL); + }, nullptr); } void MultiVersionDatabase::Connector::error(const Error& e, int& userParam) { @@ -820,7 +820,7 @@ void MultiVersionDatabase::Connector::error(const Error& e, int& userParam) { } MultiVersionDatabase::DatabaseState::DatabaseState() - : dbVar(new ThreadSafeAsyncVar>(Reference(NULL))), currentClientIndex(-1) {} + : dbVar(new ThreadSafeAsyncVar>(Reference(nullptr))), currentClientIndex(-1) {} // Only called from main thread void MultiVersionDatabase::DatabaseState::stateChanged() { @@ -898,7 +898,7 @@ void MultiVersionDatabase::DatabaseState::cancelConnections() { connectionAttempts.clear(); clients.clear(); delref(); - }, NULL); + }, nullptr); } // MultiVersionApi @@ -1043,7 +1043,7 @@ void MultiVersionApi::setSupportedClientVersions(Standalone versions) // This option must be set on the main thread because it modifes structures that can be used concurrently by the main thread onMainThreadVoid([this, versions](){ localClient->api->setNetworkOption(FDBNetworkOptions::SUPPORTED_CLIENT_VERSIONS, versions); - }, NULL); + }, nullptr); if(!bypassMultiClientApi) { runOnExternalClients([versions](Reference client) { @@ -1654,7 +1654,7 @@ THREAD_FUNC runSingleAssignmentVarTest(void *arg) { onMainThreadVoid([done](){ *done = true; - }, NULL); + }, nullptr); } catch(Error &e) { printf("Caught error in test: %s\n", e.name()); diff --git a/fdbclient/MultiVersionTransaction.h b/fdbclient/MultiVersionTransaction.h index c803032cc7..cfb3cfe1a8 100644 --- a/fdbclient/MultiVersionTransaction.h +++ b/fdbclient/MultiVersionTransaction.h @@ -286,7 +286,7 @@ struct ClientInfo : ThreadSafeReferenceCounted { bool failed; std::vector> threadCompletionHooks; - ClientInfo() : protocolVersion(0), api(NULL), external(false), failed(true) {} + ClientInfo() : protocolVersion(0), api(nullptr), external(false), failed(true) {} ClientInfo(IClientApi *api) : protocolVersion(0), api(api), libPath("internal"), external(false), failed(false) {} ClientInfo(IClientApi *api, std::string libPath) : protocolVersion(0), api(api), libPath(libPath), external(true), failed(false) {} diff --git a/fdbclient/MutationList.h b/fdbclient/MutationList.h index bcc9b0db76..145c50b0f1 100644 --- a/fdbclient/MutationList.h +++ b/fdbclient/MutationList.h @@ -62,7 +62,7 @@ public: auto e = ptr->end(); // e points to the end of the current blob if (e == blob->data.end()) { // the condition sanity checks e is at the end of current blob blob = blob->next; - e = blob ? blob->data.begin() : NULL; + e = blob ? blob->data.begin() : nullptr; } ptr = (Header*)e; decode(); @@ -70,7 +70,7 @@ public: bool operator == ( Iterator const& i ) const { return ptr == i.ptr; } bool operator != ( Iterator const& i) const { return ptr != i.ptr; } - explicit operator bool() const { return blob!=NULL; } + explicit operator bool() const { return blob!=nullptr; } typedef std::forward_iterator_tag iterator_category; typedef const MutationRef value_type; @@ -79,7 +79,7 @@ public: typedef const MutationRef& reference; Iterator( Blob* blob, const Header* ptr ) : blob(blob), ptr(ptr) { decode(); } - Iterator() : blob(NULL), ptr(NULL) { } + Iterator() : blob(nullptr), ptr(nullptr) { } private: friend struct MutationListRef; const Blob* blob; // The blob containing the indicated mutation @@ -95,16 +95,16 @@ public: } }; - MutationListRef() : blob_begin(NULL), blob_end(NULL), totalBytes(0) { + MutationListRef() : blob_begin(nullptr), blob_end(nullptr), totalBytes(0) { } - MutationListRef( Arena& ar, MutationListRef const& r ) : blob_begin(NULL), blob_end(NULL), totalBytes(0) { + MutationListRef( Arena& ar, MutationListRef const& r ) : blob_begin(nullptr), blob_end(nullptr), totalBytes(0) { append_deep(ar, r.begin(), r.end()); } Iterator begin() const { if (blob_begin) return Iterator(blob_begin, (Header*)blob_begin->data.begin()); - return Iterator(NULL, NULL); + return Iterator(nullptr, nullptr); } - Iterator end() const { return Iterator(NULL, NULL); } + Iterator end() const { return Iterator(nullptr, nullptr); } size_t expectedSize() const { return sizeof(Blob) + totalBytes; } int totalSize() const { return totalBytes; } @@ -146,12 +146,13 @@ public: if(totalBytes > 0) { blob_begin = blob_end = new (ar.arena()) Blob; - blob_begin->next = NULL; + blob_begin->next = nullptr; blob_begin->data = StringRef((const uint8_t*)ar.arenaRead(totalBytes), totalBytes); // Zero-copy read when deserializing from an ArenaReader } } - //FIXME: this is re-implemented on the master proxy to include a yield, any changes to this function should also done there + // FIXME: this is re-implemented on the commit proxy to include a yield, any changes to this function should also + // done there template void serialize_save( Ar& ar ) const { serializer(ar, totalBytes); @@ -180,7 +181,7 @@ private: } blob_end->data = StringRef(b, bytes); - blob_end->next = NULL; + blob_end->next = nullptr; return b; } diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 7099d6fbea..6b3b12a39b 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -40,7 +40,7 @@ #include "fdbclient/KeyRangeMap.h" #include "fdbclient/Knobs.h" #include "fdbclient/ManagementAPI.actor.h" -#include "fdbclient/MasterProxyInterface.h" +#include "fdbclient/CommitProxyInterface.h" #include "fdbclient/MonitorLeader.h" #include "fdbclient/MutationList.h" #include "fdbclient/ReadYourWrites.h" @@ -95,7 +95,7 @@ Future loadBalance( DatabaseContext* ctx, const Reference alternatives, RequestStream Interface::*channel, const Request& request = Request(), TaskPriority taskID = TaskPriority::DefaultPromiseEndpoint, bool atMostOnce = false, // if true, throws request_maybe_delivered() instead of retrying automatically - QueueModel* model = NULL) { + QueueModel* model = nullptr) { if (alternatives->hasCaches) { return loadBalance(alternatives->locations(), channel, request, taskID, atMostOnce, model); } @@ -147,7 +147,7 @@ Reference StorageServerInfo::getInterface( DatabaseContext *c } void StorageServerInfo::notifyContextDestroyed() { - cx = NULL; + cx = nullptr; } StorageServerInfo::~StorageServerInfo() { @@ -155,7 +155,7 @@ StorageServerInfo::~StorageServerInfo() { auto it = cx->server_interf.find( interf.id() ); if( it != cx->server_interf.end() ) cx->server_interf.erase( it ); - cx = NULL; + cx = nullptr; } } @@ -189,6 +189,12 @@ std::string printable( const KeyRangeRef& range ) { return printable(range.begin) + " - " + printable(range.end); } +std::string printable(const VectorRef& val) { + std::string s; + for (int i = 0; i < val.size(); i++) s = s + printable(val[i]) + " "; + return s; +} + int unhex( char c ) { if (c >= '0' && c <= '9') return c-'0'; @@ -484,15 +490,15 @@ ACTOR static Future clientStatusUpdateActor(DatabaseContext *cx) { } ACTOR static Future monitorProxiesChange(Reference> clientDBInfo, AsyncTrigger *triggerVar) { - state vector< MasterProxyInterface > curProxies; + state vector curCommitProxies; state vector< GrvProxyInterface > curGrvProxies; - curProxies = clientDBInfo->get().masterProxies; + curCommitProxies = clientDBInfo->get().commitProxies; curGrvProxies = clientDBInfo->get().grvProxies; loop{ wait(clientDBInfo->onChange()); - if (clientDBInfo->get().masterProxies != curProxies || clientDBInfo->get().grvProxies != curGrvProxies) { - curProxies = clientDBInfo->get().masterProxies; + if (clientDBInfo->get().commitProxies != curCommitProxies || clientDBInfo->get().grvProxies != curGrvProxies) { + curCommitProxies = clientDBInfo->get().commitProxies; curGrvProxies = clientDBInfo->get().grvProxies; triggerVar->trigger(); } @@ -881,7 +887,7 @@ DatabaseContext::DatabaseContext(Reference(specialKeys.begin, specialKeys.end, /* test */ false)) { dbId = deterministicRandom()->randomUniqueID(); - connected = (clientInfo->get().masterProxies.size() && clientInfo->get().grvProxies.size()) + connected = (clientInfo->get().commitProxies.size() && clientInfo->get().grvProxies.size()) ? Void() : clientInfo->onChange(); @@ -930,6 +936,16 @@ DatabaseContext::DatabaseContext(Reference( KeyRangeRef(LiteralStringRef("inProgressExclusion/"), LiteralStringRef("inProgressExclusion0")) .withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin))); + registerSpecialKeySpaceModule( + SpecialKeySpace::MODULE::CONFIGURATION, SpecialKeySpace::IMPLTYPE::READWRITE, + std::make_unique( + KeyRangeRef(LiteralStringRef("process/class_type/"), LiteralStringRef("process/class_type0")) + .withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::CONFIGURATION).begin))); + registerSpecialKeySpaceModule( + SpecialKeySpace::MODULE::CONFIGURATION, SpecialKeySpace::IMPLTYPE::READONLY, + std::make_unique( + KeyRangeRef(LiteralStringRef("process/class_source/"), LiteralStringRef("process/class_source0")) + .withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::CONFIGURATION).begin))); } if (apiVersionAtLeast(630)) { registerSpecialKeySpaceModule(SpecialKeySpace::MODULE::TRANSACTION, SpecialKeySpace::IMPLTYPE::READONLY, @@ -1164,9 +1180,9 @@ void DatabaseContext::setOption( FDBDatabaseOptions::Option option, Optional(value.get()) : Optional>(), clientLocality.machineId(), clientLocality.dcId() ); - if( clientInfo->get().masterProxies.size() ) - masterProxies = Reference( new ProxyInfo( clientInfo->get().masterProxies) ); - if( clientInfo->get().grvProxies.size() ) + if (clientInfo->get().commitProxies.size()) + commitProxies = Reference(new CommitProxyInfo(clientInfo->get().commitProxies)); + if( clientInfo->get().grvProxies.size() ) grvProxies = Reference( new GrvProxyInfo( clientInfo->get().grvProxies ) ); server_interf.clear(); locationCache.insert( allKeys, Reference() ); @@ -1176,9 +1192,9 @@ void DatabaseContext::setOption( FDBDatabaseOptions::Option option, Optional(value.get()) : Optional>()); - if( clientInfo->get().masterProxies.size() ) - masterProxies = Reference( new ProxyInfo( clientInfo->get().masterProxies)); - if( clientInfo->get().grvProxies.size() ) + if (clientInfo->get().commitProxies.size()) + commitProxies = Reference(new CommitProxyInfo(clientInfo->get().commitProxies)); + if( clientInfo->get().grvProxies.size() ) grvProxies = Reference( new GrvProxyInfo( clientInfo->get().grvProxies )); server_interf.clear(); locationCache.insert( allKeys, Reference() ); @@ -1220,13 +1236,13 @@ ACTOR static Future switchConnectionFileImpl(ReferencegetConnectionString().toString()); // Reset state from former cluster. - self->masterProxies.clear(); + self->commitProxies.clear(); self->grvProxies.clear(); self->minAcceptableReadVersion = std::numeric_limits::max(); self->invalidateCache(allKeys); auto clearedClientInfo = self->clientInfo->get(); - clearedClientInfo.masterProxies.clear(); + clearedClientInfo.commitProxies.clear(); clearedClientInfo.grvProxies.clear(); clearedClientInfo.id = deterministicRandom()->randomUniqueID(); self->clientInfo->set(clearedClientInfo); @@ -1307,7 +1323,7 @@ Database Database::createDatabase( Reference connFile, in .detail("PackageName", FDB_VT_PACKAGE_NAME) .detail("ClusterFile", connFile->getFilename().c_str()) .detail("ConnectionString", connFile->getConnectionString().toString()) - .detailf("ActualTime", "%lld", DEBUG_DETERMINISM ? 0 : time(NULL)) + .detailf("ActualTime", "%lld", DEBUG_DETERMINISM ? 0 : time(nullptr)) .detail("ApiVersion", apiVersion) .detailf("ImageOffset", "%p", platform::getImageOffset()) .trackLatest("ClientStart"); @@ -1561,29 +1577,29 @@ void stopNetwork() { void DatabaseContext::updateProxies() { if (proxiesLastChange == clientInfo->get().id) return; proxiesLastChange = clientInfo->get().id; - masterProxies.clear(); + commitProxies.clear(); grvProxies.clear(); - bool masterProxyProvisional = false, grvProxyProvisional = false; - if (clientInfo->get().masterProxies.size()) { - masterProxies = Reference(new ProxyInfo(clientInfo->get().masterProxies)); - masterProxyProvisional = clientInfo->get().masterProxies[0].provisional; + bool commitProxyProvisional = false, grvProxyProvisional = false; + if (clientInfo->get().commitProxies.size()) { + commitProxies = Reference(new CommitProxyInfo(clientInfo->get().commitProxies)); + commitProxyProvisional = clientInfo->get().commitProxies[0].provisional; } if (clientInfo->get().grvProxies.size()) { grvProxies = Reference(new GrvProxyInfo(clientInfo->get().grvProxies)); grvProxyProvisional = clientInfo->get().grvProxies[0].provisional; } - if (clientInfo->get().masterProxies.size() && clientInfo->get().grvProxies.size()) { - ASSERT(masterProxyProvisional == grvProxyProvisional); - proxyProvisional = masterProxyProvisional; + if (clientInfo->get().commitProxies.size() && clientInfo->get().grvProxies.size()) { + ASSERT(commitProxyProvisional == grvProxyProvisional); + proxyProvisional = commitProxyProvisional; } } -Reference DatabaseContext::getMasterProxies(bool useProvisionalProxies) { +Reference DatabaseContext::getCommitProxies(bool useProvisionalProxies) { updateProxies(); if (proxyProvisional && !useProvisionalProxies) { - return Reference(); + return Reference(); } - return masterProxies; + return commitProxies; } Reference DatabaseContext::getGrvProxies(bool useProvisionalProxies) { @@ -1594,19 +1610,19 @@ Reference DatabaseContext::getGrvProxies(bool useProvisionalProxie return grvProxies; } -//Actor which will wait until the MultiInterface returned by the DatabaseContext cx is not NULL -ACTOR Future> getMasterProxiesFuture(DatabaseContext *cx, bool useProvisionalProxies) { +// Actor which will wait until the MultiInterface returned by the DatabaseContext cx is not nullptr +ACTOR Future> getCommitProxiesFuture(DatabaseContext* cx, bool useProvisionalProxies) { loop{ - Reference proxies = cx->getMasterProxies(useProvisionalProxies); - if (proxies) - return proxies; + Reference commitProxies = cx->getCommitProxies(useProvisionalProxies); + if (commitProxies) + return commitProxies; wait( cx->onProxiesChanged() ); } } -//Returns a future which will not be set until the ProxyInfo of this DatabaseContext is not NULL -Future> DatabaseContext::getMasterProxiesFuture(bool useProvisionalProxies) { - return ::getMasterProxiesFuture(this, useProvisionalProxies); +// Returns a future which will not be set until the CommitProxyInfo of this DatabaseContext is not nullptr +Future> DatabaseContext::getCommitProxiesFuture(bool useProvisionalProxies) { + return ::getCommitProxiesFuture(this, useProvisionalProxies); } void GetRangeLimits::decrement( VectorRef const& data ) { @@ -1733,8 +1749,8 @@ ACTOR Future>> getKeyLocation_internal(Da ++cx->transactionKeyServerLocationRequests; choose { when (wait(cx->onProxiesChanged())) {} - when (GetKeyServerLocationsReply rep = wait(basicLoadBalance( - cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::getKeyServersLocations, + when(GetKeyServerLocationsReply rep = wait(basicLoadBalance( + cx->getCommitProxies(info.useProvisionalProxies), &CommitProxyInterface::getKeyServersLocations, GetKeyServerLocationsRequest(span.context, key, Optional(), 100, isBackward, key.arena()), TaskPriority::DefaultPromiseEndpoint))) { ++cx->transactionKeyServerLocationRequestsCompleted; @@ -1782,8 +1798,8 @@ ACTOR Future>>> getKeyRangeLocatio ++cx->transactionKeyServerLocationRequests; choose { when ( wait( cx->onProxiesChanged() ) ) {} - when ( GetKeyServerLocationsReply _rep = wait(basicLoadBalance( - cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::getKeyServersLocations, + when(GetKeyServerLocationsReply _rep = wait(basicLoadBalance( + cx->getCommitProxies(info.useProvisionalProxies), &CommitProxyInterface::getKeyServersLocations, GetKeyServerLocationsRequest(span.context, keys.begin, keys.end, limit, reverse, keys.arena()), TaskPriority::DefaultPromiseEndpoint))) { ++cx->transactionKeyServerLocationRequestsCompleted; @@ -2512,7 +2528,7 @@ ACTOR Future> getRange( Database cx, ReferenceenableLocalityLoadBalance ? &cx->queueModel : NULL)); + cx->enableLocalityLoadBalance ? &cx->queueModel : nullptr)); rep = _rep; ++cx->transactionPhysicalReadsCompleted; } catch(Error&) { @@ -3450,14 +3466,16 @@ ACTOR static Future tryCommit( Database cx, Reference req.debugID = commitID; state Future reply; if (options.commitOnFirstProxy) { - if(cx->clientInfo->get().firstProxy.present()) { - reply = throwErrorOr ( brokenPromiseToMaybeDelivered ( cx->clientInfo->get().firstProxy.get().commit.tryGetReply(req) ) ); + if (cx->clientInfo->get().firstCommitProxy.present()) { + reply = throwErrorOr(brokenPromiseToMaybeDelivered( + cx->clientInfo->get().firstCommitProxy.get().commit.tryGetReply(req))); } else { - const std::vector& proxies = cx->clientInfo->get().masterProxies; + const std::vector& proxies = cx->clientInfo->get().commitProxies; reply = proxies.size() ? throwErrorOr ( brokenPromiseToMaybeDelivered ( proxies[0].commit.tryGetReply(req) ) ) : Never(); } } else { - reply = basicLoadBalance( cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::commit, req, TaskPriority::DefaultPromiseEndpoint, true ); + reply = basicLoadBalance(cx->getCommitProxies(info.useProvisionalProxies), &CommitProxyInterface::commit, + req, TaskPriority::DefaultPromiseEndpoint, true); } choose { @@ -3531,8 +3549,9 @@ ACTOR static Future tryCommit( Database cx, Reference // We don't know if the commit happened, and it might even still be in flight. if (!options.causalWriteRisky) { - // Make sure it's not still in flight, either by ensuring the master we submitted to is dead, or the version we submitted with is dead, or by committing a conflicting transaction successfully - //if ( cx->getMasterProxies()->masterGeneration <= originalMasterGeneration ) + // Make sure it's not still in flight, either by ensuring the master we submitted to is dead, or the + // version we submitted with is dead, or by committing a conflicting transaction successfully + // if ( cx->getCommitProxies()->masterGeneration <= originalMasterGeneration ) // To ensure the original request is not in flight, we need a key range which intersects its read conflict ranges // We pick a key range which also intersects its write conflict ranges, since that avoids potentially creating conflicts where there otherwise would be none @@ -3879,12 +3898,14 @@ ACTOR Future getConsistentReadVersion(SpanID parentSpan, Da TransactionPriority priority, uint32_t flags, TransactionTagMap tags, Optional debugID) { state Span span("NAPI:getConsistentReadVersion"_loc, parentSpan); - try { - ++cx->transactionReadVersionBatches; - if( debugID.present() ) - g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), "NativeAPI.getConsistentReadVersion.Before"); - loop { + + ++cx->transactionReadVersionBatches; + if( debugID.present() ) + g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), "NativeAPI.getConsistentReadVersion.Before"); + loop { + try { state GetReadVersionRequest req( span.context, transactionCount, priority, flags, tags, debugID ); + choose { when ( wait( cx->onProxiesChanged() ) ) {} when ( GetReadVersionReply v = wait( basicLoadBalance( cx->getGrvProxies(flags & GetReadVersionRequest::FLAG_USE_PROVISIONAL_PROXIES), &GrvProxyInterface::getConsistentReadVersion, req, cx->taskID ) ) ) { @@ -3913,12 +3934,17 @@ ACTOR Future getConsistentReadVersion(SpanID parentSpan, Da return v; } } + } catch (Error& e) { + if (e.code() != error_code_broken_promise && e.code() != error_code_batch_transaction_throttled) + TraceEvent(SevError, "GetConsistentReadVersionError").error(e); + if(e.code() == error_code_batch_transaction_throttled && !cx->apiVersionAtLeast(630)) { + wait(delayJittered(5.0)); + } else { + throw; + } } - } catch (Error& e) { - if (e.code() != error_code_broken_promise && e.code() != error_code_batch_transaction_throttled) - TraceEvent(SevError, "GetConsistentReadVersionError").error(e); - throw; } + } ACTOR Future readVersionBatcher( DatabaseContext *cx, FutureStream versionStream, TransactionPriority priority, uint32_t flags ) { @@ -4433,8 +4459,8 @@ ACTOR Future>> waitDataDistributionMetricsLis choose { when(wait(cx->onProxiesChanged())) {} when(ErrorOr rep = - wait(errorOr(basicLoadBalance(cx->getMasterProxies(false), &MasterProxyInterface::getDDMetrics, - GetDDMetricsRequest(keys, shardLimit))))) { + wait(errorOr(basicLoadBalance(cx->getCommitProxies(false), &CommitProxyInterface::getDDMetrics, + GetDDMetricsRequest(keys, shardLimit))))) { if (rep.isError()) { throw rep.getError(); } @@ -4539,7 +4565,9 @@ ACTOR Future snapCreate(Database cx, Standalone snapCmd, UID sn loop { choose { when(wait(cx->onProxiesChanged())) {} - when(wait(basicLoadBalance(cx->getMasterProxies(false), &MasterProxyInterface::proxySnapReq, ProxySnapRequest(snapCmd, snapUID, snapUID), cx->taskID, true /*atmostOnce*/ ))) { + when(wait(basicLoadBalance(cx->getCommitProxies(false), &CommitProxyInterface::proxySnapReq, + ProxySnapRequest(snapCmd, snapUID, snapUID), cx->taskID, + true /*atmostOnce*/))) { TraceEvent("SnapCreateExit") .detail("SnapCmd", snapCmd.toString()) .detail("UID", snapUID); @@ -4567,8 +4595,8 @@ ACTOR Future checkSafeExclusions(Database cx, vector exc choose { when(wait(cx->onProxiesChanged())) {} when(ExclusionSafetyCheckReply _ddCheck = - wait(basicLoadBalance(cx->getMasterProxies(false), &MasterProxyInterface::exclusionSafetyCheckReq, - req, cx->taskID))) { + wait(basicLoadBalance(cx->getCommitProxies(false), + &CommitProxyInterface::exclusionSafetyCheckReq, req, cx->taskID))) { ddCheck = _ddCheck.safe; break; } diff --git a/fdbclient/NativeAPI.actor.h b/fdbclient/NativeAPI.actor.h index 2d35022a4a..35338b3c93 100644 --- a/fdbclient/NativeAPI.actor.h +++ b/fdbclient/NativeAPI.actor.h @@ -30,7 +30,7 @@ #include "flow/flow.h" #include "flow/TDMetric.actor.h" #include "fdbclient/FDBTypes.h" -#include "fdbclient/MasterProxyInterface.h" +#include "fdbclient/CommitProxyInterface.h" #include "fdbclient/FDBOptions.g.h" #include "fdbclient/CoordinationInterface.h" #include "fdbclient/ClusterInterface.h" diff --git a/fdbclient/ReadYourWrites.actor.cpp b/fdbclient/ReadYourWrites.actor.cpp index 5693a48ea9..b1484123e9 100644 --- a/fdbclient/ReadYourWrites.actor.cpp +++ b/fdbclient/ReadYourWrites.actor.cpp @@ -1338,7 +1338,7 @@ Future< Standalone > ReadYourWritesTransaction::getRange( if(begin.getKey() > maxKey || end.getKey() > maxKey) return key_outside_legal_range(); - //This optimization prevents NULL operations from being added to the conflict range + //This optimization prevents nullptr operations from being added to the conflict range if( limits.isReached() ) { TEST(true); // RYW range read limit 0 return Standalone(); @@ -2053,9 +2053,6 @@ void ReadYourWritesTransaction::setOptionImpl( FDBTransactionOptions::Option opt case FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES: validateOptionValue(value, false); options.specialKeySpaceChangeConfiguration = true; - // By default, it allows to read system keys - // More options will be implicitly enabled if needed when doing set or clear - options.readSystemKeys = true; break; default: break; diff --git a/fdbclient/RestoreWorkerInterface.actor.h b/fdbclient/RestoreWorkerInterface.actor.h index dde8bd9059..885993cea2 100644 --- a/fdbclient/RestoreWorkerInterface.actor.h +++ b/fdbclient/RestoreWorkerInterface.actor.h @@ -54,6 +54,7 @@ struct RestoreSysInfo; struct RestoreApplierInterface; struct RestoreFinishRequest; struct RestoreSamplesRequest; +struct RestoreUpdateRateRequest; // RestoreSysInfo includes information each (type of) restore roles should know. // At this moment, it only include appliers. We keep the name for future extension. @@ -174,6 +175,7 @@ struct RestoreApplierInterface : RestoreRoleInterface { RequestStream initVersionBatch; RequestStream collectRestoreRoleInterfaces; RequestStream finishRestore; + RequestStream updateRate; bool operator==(RestoreWorkerInterface const& r) const { return id() == r.id(); } bool operator!=(RestoreWorkerInterface const& r) const { return id() != r.id(); } @@ -193,12 +195,13 @@ struct RestoreApplierInterface : RestoreRoleInterface { initVersionBatch.getEndpoint(TaskPriority::LoadBalancedEndpoint); collectRestoreRoleInterfaces.getEndpoint(TaskPriority::LoadBalancedEndpoint); finishRestore.getEndpoint(TaskPriority::LoadBalancedEndpoint); + updateRate.getEndpoint(TaskPriority::LoadBalancedEndpoint); } template void serialize(Ar& ar) { serializer(ar, *(RestoreRoleInterface*)this, heartbeat, sendMutationVector, applyToDB, initVersionBatch, - collectRestoreRoleInterfaces, finishRestore); + collectRestoreRoleInterfaces, finishRestore, updateRate); } std::string toString() const { return nodeID.toString(); } @@ -616,6 +619,50 @@ struct RestoreFinishRequest : TimedRequest { } }; +struct RestoreUpdateRateReply : TimedRequest { + constexpr static FileIdentifier file_identifier = 13018414; + + UID id; + double remainMB; // remaining data in MB to write to DB; + + RestoreUpdateRateReply() = default; + explicit RestoreUpdateRateReply(UID id, double remainMB) : id(id), remainMB(remainMB) {} + + std::string toString() const { + std::stringstream ss; + ss << "RestoreUpdateRateReply NodeID:" << id.toString() << " remainMB:" << remainMB; + return ss.str(); + } + + template + void serialize(Ar& ar) { + serializer(ar, id, remainMB); + } +}; + +struct RestoreUpdateRateRequest : TimedRequest { + constexpr static FileIdentifier file_identifier = 13018415; + + int batchIndex; + double writeMB; + + ReplyPromise reply; + + RestoreUpdateRateRequest() = default; + explicit RestoreUpdateRateRequest(int batchIndex, double writeMB) : batchIndex(batchIndex), writeMB(writeMB) {} + + template + void serialize(Ar& ar) { + serializer(ar, batchIndex, writeMB, reply); + } + + std::string toString() const { + std::stringstream ss; + ss << "RestoreUpdateRateRequest batchIndex:" << batchIndex << " writeMB:" << writeMB; + return ss.str(); + } +}; + struct RestoreRequest { constexpr static FileIdentifier file_identifier = 16035338; diff --git a/fdbclient/Schemas.cpp b/fdbclient/Schemas.cpp index 2e9403dc66..44358766f1 100644 --- a/fdbclient/Schemas.cpp +++ b/fdbclient/Schemas.cpp @@ -47,7 +47,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( "storage", "transaction", "resolution", - "proxy", + "commit_proxy", "grv_proxy", "master", "test", @@ -84,7 +84,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( "role":{ "$enum":[ "master", - "proxy", + "commit_proxy", "grv_proxy", "log", "storage", @@ -191,6 +191,13 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( "estimated_cost":{ "hz": 0.0 } + }, + "busiest_write_tag":{ + "tag": "", + "fractional_cost": 0.0, + "estimated_cost":{ + "hz": 0.0 + } } } ], @@ -271,15 +278,20 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( "run_loop_busy":0.2 } }, - "old_logs":[ + "logs":[ { - "logs":[ + "log_interfaces":[ { "id":"7f8d623d0cb9966e", "healthy":true, "address":"1.2.3.4:1234" } ], + "epoch":1, + "current":false, + "begin_version":23, + "end_version":112315141, + "possibly_losing_data":true, "log_replication_factor":3, "log_write_anti_quorum":0, "log_fault_tolerance":2, @@ -346,15 +358,11 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( "auto" : { "busy_read" : 0, "busy_write" : 0, - "count" : 0 + "count" : 0, + "recommended_only": 0 }, "manual" : { "count" : 0 - }, - "recommend" : { - "busy_read" : 0, - "busy_write" : 0, - "count" : 0 } }, "limiting_queue_bytes_storage_server":0, @@ -483,7 +491,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( R"statusSchema( "recovery_state":{ "required_resolvers":1, - "required_proxies":1, + "required_commit_proxies":1, "required_grv_proxies":1, "name":{ "$enum":[ @@ -672,11 +680,11 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( "address":"10.0.4.1" } ], - "auto_proxies":3, + "auto_commit_proxies":3, "auto_grv_proxies":1, "auto_resolvers":1, "auto_logs":3, - "proxies":5, + "commit_proxies":5, "grv_proxies":1, "backup_worker_enabled":1 }, @@ -876,11 +884,11 @@ const KeyRef JSONSchemas::clusterConfigurationSchema = LiteralStringRef(R"config "ssd-2", "memory" ]}, - "auto_proxies":3, + "auto_commit_proxies":3, "auto_grv_proxies":1, "auto_resolvers":1, "auto_logs":3, - "proxies":5 + "commit_proxies":5, "grv_proxies":1 })configSchema"); diff --git a/fdbclient/SpecialKeySpace.actor.cpp b/fdbclient/SpecialKeySpace.actor.cpp index 79a18cfa1e..b8600f2a14 100644 --- a/fdbclient/SpecialKeySpace.actor.cpp +++ b/fdbclient/SpecialKeySpace.actor.cpp @@ -36,7 +36,9 @@ std::unordered_map SpecialKeySpace::moduleToB KeyRangeRef(LiteralStringRef("\xff\xff/metrics/"), LiteralStringRef("\xff\xff/metrics0")) }, { SpecialKeySpace::MODULE::MANAGEMENT, KeyRangeRef(LiteralStringRef("\xff\xff/management/"), LiteralStringRef("\xff\xff/management0")) }, - { SpecialKeySpace::MODULE::ERRORMSG, singleKeyRange(LiteralStringRef("\xff\xff/error_message")) } + { SpecialKeySpace::MODULE::ERRORMSG, singleKeyRange(LiteralStringRef("\xff\xff/error_message")) }, + { SpecialKeySpace::MODULE::CONFIGURATION, + KeyRangeRef(LiteralStringRef("\xff\xff/configuration/"), LiteralStringRef("\xff\xff/configuration0")) } }; std::unordered_map SpecialKeySpace::managementApiCommandToRange = { @@ -48,6 +50,9 @@ std::unordered_map SpecialKeySpace::managementApiCommandT std::set SpecialKeySpace::options = { "excluded/force", "failed/force" }; +Standalone rywGetRange(ReadYourWritesTransaction* ryw, const KeyRangeRef& kr, + const Standalone& res); + // This function will move the given KeySelector as far as possible to the standard form: // orEqual == false && offset == 1 (Standard form) // If the corresponding key is not in the underlying key range, it will move over the range @@ -456,6 +461,24 @@ Future SpecialKeySpace::commit(ReadYourWritesTransaction* ryw) { return commitActor(this, ryw); } +SKSCTestImpl::SKSCTestImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {} + +Future> SKSCTestImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { + ASSERT(range.contains(kr)); + auto resultFuture = ryw->getRange(kr, CLIENT_KNOBS->TOO_MANY); + // all keys are written to RYW, since GRV is set, the read should happen locally + ASSERT(resultFuture.isReady()); + auto result = resultFuture.getValue(); + ASSERT(!result.more && result.size() < CLIENT_KNOBS->TOO_MANY); + auto kvs = resultFuture.getValue(); + return rywGetRange(ryw, kr, kvs); +} + +Future> SKSCTestImpl::commit(ReadYourWritesTransaction* ryw) { + ASSERT(false); + return Optional(); +} + ReadConflictRangeImpl::ReadConflictRangeImpl(KeyRangeRef kr) : SpecialKeyRangeReadImpl(kr) {} ACTOR static Future> getReadConflictRangeImpl(ReadYourWritesTransaction* ryw, KeyRange kr) { @@ -570,86 +593,82 @@ void ManagementCommandsOptionsImpl::clear(ReadYourWritesTransaction* ryw, const } } -Key ManagementCommandsOptionsImpl::decode(const KeyRef& key) const { - // Should never be used - ASSERT(false); - return key; -} - -Key ManagementCommandsOptionsImpl::encode(const KeyRef& key) const { - // Should never be used - ASSERT(false); - return key; -} - Future> ManagementCommandsOptionsImpl::commit(ReadYourWritesTransaction* ryw) { // Nothing to do, keys should be used by other impls' commit callback return Optional(); } -// read from rwModule -ACTOR Future> rwModuleGetRangeActor(ReadYourWritesTransaction* ryw, - const SpecialKeyRangeRWImpl* impl, KeyRangeRef kr) { - state KeyRangeRef range = impl->getKeyRange(); - Standalone resultWithoutPrefix = - wait(ryw->getRange(ryw->getDatabase()->specialKeySpace->decode(kr), CLIENT_KNOBS->TOO_MANY)); - ASSERT(!resultWithoutPrefix.more && resultWithoutPrefix.size() < CLIENT_KNOBS->TOO_MANY); +Standalone rywGetRange(ReadYourWritesTransaction* ryw, const KeyRangeRef& kr, + const Standalone& res) { + // "res" is the read result regardless of your writes, if ryw disabled, return immediately + if (ryw->readYourWritesDisabled()) return res; + // If ryw enabled, we update it with writes from the transaction Standalone result; - if (ryw->readYourWritesDisabled()) { - for (const KeyValueRef& kv : resultWithoutPrefix) - result.push_back_deep(result.arena(), KeyValueRef(impl->encode(kv.key), kv.value)); - } else { - RangeMap>, KeyRangeRef>::Ranges ranges = - ryw->getSpecialKeySpaceWriteMap().containedRanges(range); - RangeMap>, KeyRangeRef>::iterator iter = ranges.begin(); - int index = 0; - while (iter != ranges.end()) { - // add all previous entries into result - Key rk = impl->encode(resultWithoutPrefix[index].key); - while (index < resultWithoutPrefix.size() && rk < iter->begin()) { - result.push_back_deep(result.arena(), KeyValueRef(rk, resultWithoutPrefix[index].value)); - ++index; - } + RangeMap>, KeyRangeRef>::Ranges ranges = + ryw->getSpecialKeySpaceWriteMap().containedRanges(kr); + RangeMap>, KeyRangeRef>::iterator iter = ranges.begin(); + auto iter2 = res.begin(); + result.arena().dependsOn(res.arena()); + while (iter != ranges.end() || iter2 != res.end()) { + if (iter == ranges.end()) { + result.push_back(result.arena(), KeyValueRef(iter2->key, iter2->value)); + ++iter2; + } else if (iter2 == res.end()) { + // insert if it is a set entry std::pair> entry = iter->value(); + if (entry.first && entry.second.present()) { + result.push_back_deep(result.arena(), KeyValueRef(iter->begin(), entry.second.get())); + } + ++iter; + } else if (iter->range().contains(iter2->key)) { + std::pair> entry = iter->value(); + // if this is a valid range either for set or clear, move iter2 outside the range if (entry.first) { - // add the writen entries if exists - if (entry.second.present()) { + // insert if this is a set entry + if (entry.second.present()) result.push_back_deep(result.arena(), KeyValueRef(iter->begin(), entry.second.get())); - } - // move index to skip all entries in the iter->range - while (index < resultWithoutPrefix.size() && - iter->range().contains(impl->encode(resultWithoutPrefix[index].key))) - ++index; + // move iter2 outside the range + while (iter2 != res.end() && iter->range().contains(iter2->key)) ++iter2; + } + ++iter; + } else if (iter->begin() > iter2->key) { + result.push_back(result.arena(), KeyValueRef(iter2->key, iter2->value)); + ++iter2; + } else if (iter->end() <= iter2->key) { + // insert if it is a set entry + std::pair> entry = iter->value(); + if (entry.first && entry.second.present()) { + result.push_back_deep(result.arena(), KeyValueRef(iter->begin(), entry.second.get())); } ++iter; } - // add all remaining entries into result - while (index < resultWithoutPrefix.size()) { - const KeyValueRef& kv = resultWithoutPrefix[index]; - result.push_back_deep(result.arena(), KeyValueRef(impl->encode(kv.key), kv.value)); - ++index; - } } return result; } +// read from those readwrite modules in which special keys have one-to-one mapping with real persisted keys +ACTOR Future> rwModuleWithMappingGetRangeActor(ReadYourWritesTransaction* ryw, + const SpecialKeyRangeRWImpl* impl, + KeyRangeRef kr) { + Standalone resultWithoutPrefix = + wait(ryw->getTransaction().getRange(ryw->getDatabase()->specialKeySpace->decode(kr), CLIENT_KNOBS->TOO_MANY)); + ASSERT(!resultWithoutPrefix.more && resultWithoutPrefix.size() < CLIENT_KNOBS->TOO_MANY); + Standalone result; + for (const KeyValueRef& kv : resultWithoutPrefix) + result.push_back_deep(result.arena(), KeyValueRef(impl->encode(kv.key), kv.value)); + return rywGetRange(ryw, kr, result); +} + ExcludeServersRangeImpl::ExcludeServersRangeImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {} Future> ExcludeServersRangeImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { - return rwModuleGetRangeActor(ryw, this, kr); + return rwModuleWithMappingGetRangeActor(ryw, this, kr); } void ExcludeServersRangeImpl::set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) { - ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional(value))); -} - -void ExcludeServersRangeImpl::clear(ReadYourWritesTransaction* ryw, const KeyRef& key) { - ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional())); -} - -void ExcludeServersRangeImpl::clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) { - ryw->getSpecialKeySpaceWriteMap().insert(range, std::make_pair(true, Optional())); + // ignore value + ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional(ValueRef()))); } Key ExcludeServersRangeImpl::decode(const KeyRef& key) const { @@ -671,7 +690,7 @@ bool parseNetWorkAddrFromKeys(ReadYourWritesTransaction* ryw, bool failed, std:: while (iter != ranges.end()) { auto entry = iter->value(); // only check for exclude(set) operation, include(clear) are not checked - TraceEvent(SevInfo, "ParseNetworkAddress") + TraceEvent(SevDebug, "ParseNetworkAddress") .detail("Valid", entry.first) .detail("Set", entry.second.present()) .detail("Key", iter->begin().toString()); @@ -810,7 +829,6 @@ ACTOR Future checkExclusion(Database db, std::vector* ad } void includeServers(ReadYourWritesTransaction* ryw) { - ryw->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); ryw->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); ryw->setOption(FDBTransactionOptions::LOCK_AWARE); ryw->setOption(FDBTransactionOptions::USE_PROVISIONAL_PROXIES); @@ -874,19 +892,12 @@ FailedServersRangeImpl::FailedServersRangeImpl(KeyRangeRef kr) : SpecialKeyRange Future> FailedServersRangeImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const { - return rwModuleGetRangeActor(ryw, this, kr); + return rwModuleWithMappingGetRangeActor(ryw, this, kr); } void FailedServersRangeImpl::set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) { - ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional(value))); -} - -void FailedServersRangeImpl::clear(ReadYourWritesTransaction* ryw, const KeyRef& key) { - ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional())); -} - -void FailedServersRangeImpl::clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) { - ryw->getSpecialKeySpaceWriteMap().insert(range, std::make_pair(true, Optional())); + // ignore value + ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional(ValueRef()))); } Key FailedServersRangeImpl::decode(const KeyRef& key) const { @@ -943,8 +954,14 @@ ACTOR Future> ExclusionInProgressActor(ReadYourWrites } } + // sort and remove :tls + std::set inProgressAddresses; for (auto const& address : inProgressExclusion) { - Key addrKey = prefix.withSuffix(address.toString()); + inProgressAddresses.insert(formatIpPort(address.ip, address.port)); + } + + for (auto const& address : inProgressAddresses) { + Key addrKey = prefix.withSuffix(address); if (kr.contains(addrKey)) { result.push_back(result.arena(), KeyValueRef(addrKey, ValueRef())); result.arena().dependsOn(addrKey.arena()); @@ -959,3 +976,148 @@ Future> ExclusionInProgressRangeImpl::getRange(ReadYo KeyRangeRef kr) const { return ExclusionInProgressActor(ryw, getKeyRange().begin, kr); } + +ACTOR Future> getProcessClassActor(ReadYourWritesTransaction* ryw, KeyRef prefix, + KeyRangeRef kr) { + vector _workers = wait(getWorkers(&ryw->getTransaction())); + auto workers = _workers; // strip const + // Note : the sort by string is anti intuition, ex. 1.1.1.1:11 < 1.1.1.1:5 + std::sort(workers.begin(), workers.end(), [](const ProcessData& lhs, const ProcessData& rhs) { + return formatIpPort(lhs.address.ip, lhs.address.port) < formatIpPort(rhs.address.ip, rhs.address.port); + }); + Standalone result; + for (auto& w : workers) { + // exclude :tls in keys even the network addresss is TLS + KeyRef k(prefix.withSuffix(formatIpPort(w.address.ip, w.address.port), result.arena())); + if (kr.contains(k)) { + ValueRef v(result.arena(), w.processClass.toString()); + result.push_back(result.arena(), KeyValueRef(k, v)); + } + } + return rywGetRange(ryw, kr, result); +} + +ACTOR Future> processClassCommitActor(ReadYourWritesTransaction* ryw, KeyRangeRef range) { + // enable related options + ryw->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); + ryw->setOption(FDBTransactionOptions::LOCK_AWARE); + ryw->setOption(FDBTransactionOptions::USE_PROVISIONAL_PROXIES); + vector workers = wait( + getWorkers(&ryw->getTransaction())); // make sure we use the Transaction object to avoid used_during_commit() + + auto ranges = ryw->getSpecialKeySpaceWriteMap().containedRanges(range); + auto iter = ranges.begin(); + while (iter != ranges.end()) { + auto entry = iter->value(); + // only loop through (set) operation, (clear) not exist + if (entry.first && entry.second.present()) { + // parse network address + Key address = iter->begin().removePrefix(range.begin); + AddressExclusion addr = AddressExclusion::parse(address); + // parse class type + ValueRef processClassType = entry.second.get(); + ProcessClass processClass(processClassType.toString(), ProcessClass::DBSource); + // make sure we use the underlying Transaction object to avoid used_during_commit() + bool foundChange = false; + for (int i = 0; i < workers.size(); i++) { + if (addr.excludes(workers[i].address)) { + if (processClass.classType() != ProcessClass::InvalidClass) + ryw->getTransaction().set(processClassKeyFor(workers[i].locality.processId().get()), + processClassValue(processClass)); + else + ryw->getTransaction().clear(processClassKeyFor(workers[i].locality.processId().get())); + foundChange = true; + } + } + if (foundChange) + ryw->getTransaction().set(processClassChangeKey, deterministicRandom()->randomUniqueID().toString()); + } + ++iter; + } + return Optional(); +} + +ProcessClassRangeImpl::ProcessClassRangeImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {} + +Future> ProcessClassRangeImpl::getRange(ReadYourWritesTransaction* ryw, + KeyRangeRef kr) const { + return getProcessClassActor(ryw, getKeyRange().begin, kr); +} + +Future> ProcessClassRangeImpl::commit(ReadYourWritesTransaction* ryw) { + // Validate network address and process class type + Optional errorMsg; + auto ranges = ryw->getSpecialKeySpaceWriteMap().containedRanges(getKeyRange()); + auto iter = ranges.begin(); + while (iter != ranges.end()) { + auto entry = iter->value(); + // only check for setclass(set) operation, (clear) are forbidden thus not exist + if (entry.first && entry.second.present()) { + // validate network address + Key address = iter->begin().removePrefix(range.begin); + AddressExclusion addr = AddressExclusion::parse(address); + if (!addr.isValid()) { + std::string error = "ERROR: \'" + address.toString() + "\' is not a valid network endpoint address\n"; + if (address.toString().find(":tls") != std::string::npos) + error += " Do not include the `:tls' suffix when naming a process\n"; + errorMsg = ManagementAPIError::toJsonString(false, "setclass", error); + return errorMsg; + } + // validate class type + ValueRef processClassType = entry.second.get(); + ProcessClass processClass(processClassType.toString(), ProcessClass::DBSource); + if (processClass.classType() == ProcessClass::InvalidClass && + processClassType != LiteralStringRef("default")) { + std::string error = "ERROR: \'" + processClassType.toString() + "\' is not a valid process class\n"; + errorMsg = ManagementAPIError::toJsonString(false, "setclass", error); + return errorMsg; + } + } + ++iter; + } + return processClassCommitActor(ryw, getKeyRange()); +} + +void throwNotAllowedError(ReadYourWritesTransaction* ryw) { + auto msg = ManagementAPIError::toJsonString(false, "setclass", + "Clear operation is meaningless thus forbidden for setclass"); + ryw->setSpecialKeySpaceErrorMsg(msg); + throw special_keys_api_failure(); +} + +void ProcessClassRangeImpl::clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) { + return throwNotAllowedError(ryw); +} + +void ProcessClassRangeImpl::clear(ReadYourWritesTransaction* ryw, const KeyRef& key) { + return throwNotAllowedError(ryw); +} + +ACTOR Future> getProcessClassSourceActor(ReadYourWritesTransaction* ryw, KeyRef prefix, + KeyRangeRef kr) { + vector _workers = wait(getWorkers(&ryw->getTransaction())); + auto workers = _workers; // strip const + // Note : the sort by string is anti intuition, ex. 1.1.1.1:11 < 1.1.1.1:5 + std::sort(workers.begin(), workers.end(), [](const ProcessData& lhs, const ProcessData& rhs) { + return formatIpPort(lhs.address.ip, lhs.address.port) < formatIpPort(rhs.address.ip, rhs.address.port); + }); + Standalone result; + for (auto& w : workers) { + // exclude :tls in keys even the network addresss is TLS + Key k(prefix.withSuffix(formatIpPort(w.address.ip, w.address.port))); + if (kr.contains(k)) { + Value v(w.processClass.sourceString()); + result.push_back(result.arena(), KeyValueRef(k, v)); + result.arena().dependsOn(k.arena()); + result.arena().dependsOn(v.arena()); + } + } + return result; +} + +ProcessClassSourceRangeImpl::ProcessClassSourceRangeImpl(KeyRangeRef kr) : SpecialKeyRangeReadImpl(kr) {} + +Future> ProcessClassSourceRangeImpl::getRange(ReadYourWritesTransaction* ryw, + KeyRangeRef kr) const { + return getProcessClassSourceActor(ryw, getKeyRange().begin, kr); +} \ No newline at end of file diff --git a/fdbclient/SpecialKeySpace.actor.h b/fdbclient/SpecialKeySpace.actor.h index 4cbc9c5002..e824958ded 100644 --- a/fdbclient/SpecialKeySpace.actor.h +++ b/fdbclient/SpecialKeySpace.actor.h @@ -67,15 +67,29 @@ private: class SpecialKeyRangeRWImpl : public SpecialKeyRangeReadImpl { public: - virtual void set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) = 0; - virtual void clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) = 0; - virtual void clear(ReadYourWritesTransaction* ryw, const KeyRef& key) = 0; + virtual void set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) { + ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional(value))); + } + virtual void clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) { + ryw->getSpecialKeySpaceWriteMap().insert(range, std::make_pair(true, Optional())); + } + virtual void clear(ReadYourWritesTransaction* ryw, const KeyRef& key) { + ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional())); + } virtual Future> commit( ReadYourWritesTransaction* ryw) = 0; // all delayed async operations of writes in special-key-space // Given the special key to write, return the real key that needs to be modified - virtual Key decode(const KeyRef& key) const = 0; + virtual Key decode(const KeyRef& key) const { + // Default implementation should never be used + ASSERT(false); + return key; + } // Given the read key, return the corresponding special key - virtual Key encode(const KeyRef& key) const = 0; + virtual Key encode(const KeyRef& key) const { + // Default implementation should never be used + ASSERT(false); + return key; + }; explicit SpecialKeyRangeRWImpl(KeyRangeRef kr) : SpecialKeyRangeReadImpl(kr) {} @@ -125,6 +139,7 @@ class SpecialKeySpace { public: enum class MODULE { CLUSTERFILEPATH, + CONFIGURATION, // Configuration of the cluster CONNECTIONSTRING, ERRORMSG, // A single key space contains a json string which describes the last error in special-key-space MANAGEMENT, // Management-API @@ -201,6 +216,14 @@ private: void modulesBoundaryInit(); }; +// Used for SpecialKeySpaceCorrectnessWorkload +class SKSCTestImpl : public SpecialKeyRangeRWImpl { +public: + explicit SKSCTestImpl(KeyRangeRef kr); + Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; + Future> commit(ReadYourWritesTransaction* ryw) override; +}; + // Use special key prefix "\xff\xff/transaction/conflicting_keys/", // to retrieve keys which caused latest not_committed(conflicting with another transaction) error. // The returned key value pairs are interpretted as : @@ -238,8 +261,6 @@ public: void set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) override; void clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) override; void clear(ReadYourWritesTransaction* ryw, const KeyRef& key) override; - Key decode(const KeyRef& key) const override; - Key encode(const KeyRef& key) const override; Future> commit(ReadYourWritesTransaction* ryw) override; }; @@ -248,8 +269,6 @@ public: explicit ExcludeServersRangeImpl(KeyRangeRef kr); Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; void set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) override; - void clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) override; - void clear(ReadYourWritesTransaction* ryw, const KeyRef& key) override; Key decode(const KeyRef& key) const override; Key encode(const KeyRef& key) const override; Future> commit(ReadYourWritesTransaction* ryw) override; @@ -260,8 +279,6 @@ public: explicit FailedServersRangeImpl(KeyRangeRef kr); Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; void set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) override; - void clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) override; - void clear(ReadYourWritesTransaction* ryw, const KeyRef& key) override; Key decode(const KeyRef& key) const override; Key encode(const KeyRef& key) const override; Future> commit(ReadYourWritesTransaction* ryw) override; @@ -273,5 +290,20 @@ public: Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; }; +class ProcessClassRangeImpl : public SpecialKeyRangeRWImpl { +public: + explicit ProcessClassRangeImpl(KeyRangeRef kr); + Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; + Future> commit(ReadYourWritesTransaction* ryw) override; + void clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) override; + void clear(ReadYourWritesTransaction* ryw, const KeyRef& key) override; +}; + +class ProcessClassSourceRangeImpl : public SpecialKeyRangeReadImpl { +public: + explicit ProcessClassSourceRangeImpl(KeyRangeRef kr); + Future> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override; +}; + #include "flow/unactorcompiler.h" #endif diff --git a/fdbclient/SystemData.h b/fdbclient/SystemData.h index 08bfb6ff88..cdecc0e106 100644 --- a/fdbclient/SystemData.h +++ b/fdbclient/SystemData.h @@ -260,10 +260,10 @@ extern const KeyRangeRef logRangesRange; Key logRangesEncodeKey(KeyRef keyBegin, UID logUid); // Returns the start key and optionally the logRange Uid -KeyRef logRangesDecodeKey(KeyRef key, UID* logUid = NULL); +KeyRef logRangesDecodeKey(KeyRef key, UID* logUid = nullptr); // Returns the end key and optionally the key prefix -Key logRangesDecodeValue(KeyRef keyValue, Key* destKeyPrefix = NULL); +Key logRangesDecodeValue(KeyRef keyValue, Key* destKeyPrefix = nullptr); // Returns the encoded key value comprised of the end key and destination prefix Key logRangesEncodeValue(KeyRef keyEnd, KeyRef destPath); diff --git a/fdbclient/TagThrottle.actor.cpp b/fdbclient/TagThrottle.actor.cpp index a566b2fbfa..224f4839ce 100644 --- a/fdbclient/TagThrottle.actor.cpp +++ b/fdbclient/TagThrottle.actor.cpp @@ -19,7 +19,7 @@ */ #include "fdbclient/TagThrottle.h" -#include "fdbclient/MasterProxyInterface.h" +#include "fdbclient/CommitProxyInterface.h" #include "fdbclient/DatabaseContext.h" #include "flow/actorcompiler.h" // has to be last include @@ -104,7 +104,7 @@ TagThrottleKey TagThrottleKey::fromKey(const KeyRef& key) { TagThrottleValue TagThrottleValue::fromValue(const ValueRef& value) { TagThrottleValue throttleValue; - BinaryReader reader(value, IncludeVersion()); + BinaryReader reader(value, IncludeVersion(ProtocolVersion::withTagThrottleValueReason())); reader >> throttleValue; return throttleValue; } @@ -228,7 +228,7 @@ namespace ThrottleApi { } TagThrottleValue throttle(tpsRate, expirationTime.present() ? expirationTime.get() : 0, initialDuration, reason.present() ? reason.get() : TagThrottledReason::UNSET); - BinaryWriter wr(IncludeVersion(ProtocolVersion::withTagThrottleValue())); + BinaryWriter wr(IncludeVersion(ProtocolVersion::withTagThrottleValueReason())); wr << throttle; state Value value = wr.toValue(); @@ -347,6 +347,7 @@ namespace ThrottleApi { removed = true; tr.clear(tag.key); + unthrottledTags ++; } if(manualUnthrottledTags > 0) { diff --git a/fdbclient/TaskBucket.actor.cpp b/fdbclient/TaskBucket.actor.cpp index 66d3b9571f..9f1bb55cc6 100644 --- a/fdbclient/TaskBucket.actor.cpp +++ b/fdbclient/TaskBucket.actor.cpp @@ -234,17 +234,17 @@ public: ACTOR static Future taskVerify(Reference tb, Reference tr, Reference task) { if (task->params.find(Task::reservedTaskParamValidKey) == task->params.end()) { - TraceEvent("TB_TaskVerifyInvalidTask") - .detail("Task", task->params[Task::reservedTaskParamKeyType]) - .detail("ReservedTaskParamValidKey", "missing"); + TraceEvent("TaskBucketTaskVerifyInvalidTask") + .detail("Task", task->params[Task::reservedTaskParamKeyType]) + .detail("ReservedTaskParamValidKey", "missing"); return false; } if (task->params.find(Task::reservedTaskParamValidValue) == task->params.end()) { - TraceEvent("TB_TaskVerifyInvalidTask") - .detail("Task", task->params[Task::reservedTaskParamKeyType]) - .detail("ReservedTaskParamValidKey", task->params[Task::reservedTaskParamValidKey]) - .detail("ReservedTaskParamValidValue", "missing"); + TraceEvent("TaskBucketTaskVerifyInvalidTask") + .detail("Task", task->params[Task::reservedTaskParamKeyType]) + .detail("ReservedTaskParamValidKey", task->params[Task::reservedTaskParamValidKey]) + .detail("ReservedTaskParamValidValue", "missing"); return false; } @@ -253,20 +253,20 @@ public: Optional keyValue = wait(tr->get(task->params[Task::reservedTaskParamValidKey])); if (!keyValue.present()) { - TraceEvent("TB_TaskVerifyInvalidTask") - .detail("Task", task->params[Task::reservedTaskParamKeyType]) - .detail("ReservedTaskParamValidKey", task->params[Task::reservedTaskParamValidKey]) - .detail("ReservedTaskParamValidValue", task->params[Task::reservedTaskParamValidValue]) - .detail("KeyValue", "missing"); + TraceEvent("TaskBucketTaskVerifyInvalidTask") + .detail("Task", task->params[Task::reservedTaskParamKeyType]) + .detail("ReservedTaskParamValidKey", task->params[Task::reservedTaskParamValidKey]) + .detail("ReservedTaskParamValidValue", task->params[Task::reservedTaskParamValidValue]) + .detail("KeyValue", "missing"); return false; } if (keyValue.get().compare(StringRef(task->params[Task::reservedTaskParamValidValue]))) { - TraceEvent("TB_TaskVerifyAbortedTask") - .detail("Task", task->params[Task::reservedTaskParamKeyType]) - .detail("ReservedTaskParamValidKey", task->params[Task::reservedTaskParamValidKey]) - .detail("ReservedTaskParamValidValue", task->params[Task::reservedTaskParamValidValue]) - .detail("KeyValue", keyValue.get()); + TraceEvent("TaskBucketTaskVerifyAbortedTask") + .detail("Task", task->params[Task::reservedTaskParamKeyType]) + .detail("ReservedTaskParamValidKey", task->params[Task::reservedTaskParamValidKey]) + .detail("ReservedTaskParamValidValue", task->params[Task::reservedTaskParamValidValue]) + .detail("KeyValue", keyValue.get()); return false; } @@ -332,10 +332,10 @@ public: if(now() - start > 300) { TraceEvent(SevWarnAlways, "TaskBucketLongExtend") - .detail("Duration", now() - start) - .detail("TaskUID", task->key) - .detail("TaskType", task->params[Task::reservedTaskParamKeyType]) - .detail("Priority", task->getPriority()); + .detail("Duration", now() - start) + .detail("TaskUID", task->key) + .detail("TaskType", task->params[Task::reservedTaskParamKeyType]) + .detail("Priority", task->getPriority()); } // Take the extendMutex lock until we either succeed or stop trying to extend due to failure wait(task->extendMutex.take()); @@ -402,19 +402,19 @@ public: })); } } catch(Error &e) { - TraceEvent(SevWarn, "TB_ExecuteFailure") - .error(e) - .detail("TaskUID", task->key) - .detail("TaskType", task->params[Task::reservedTaskParamKeyType].printable()) - .detail("Priority", task->getPriority()); + TraceEvent(SevWarn, "TaskBucketExecuteFailure") + .error(e) + .detail("TaskUID", task->key) + .detail("TaskType", task->params[Task::reservedTaskParamKeyType].printable()) + .detail("Priority", task->getPriority()); try { wait(taskFunc->handleError(cx, task, e)); } catch(Error &e) { - TraceEvent(SevWarn, "TB_ExecuteFailureLogErrorFailed") - .error(e) // output handleError() error instead of original task error - .detail("TaskUID", task->key.printable()) - .detail("TaskType", task->params[Task::reservedTaskParamKeyType].printable()) - .detail("Priority", task->getPriority()); + TraceEvent(SevWarn, "TaskBucketExecuteFailureLogErrorFailed") + .error(e) // output handleError() error instead of original task error + .detail("TaskUID", task->key.printable()) + .detail("TaskType", task->params[Task::reservedTaskParamKeyType].printable()) + .detail("Priority", task->getPriority()); } } @@ -727,14 +727,17 @@ public: tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->setOption(FDBTransactionOptions::LOCK_AWARE); Standalone values = wait(tr->getRange(subspace.range(), CLIENT_KNOBS->TOO_MANY)); - TraceEvent("TaskBucket").detail("DebugPrintRange", "Print DB Range").detail("Key", subspace.key()).detail("Count", values.size()).detail("Msg", msg); - + TraceEvent("TaskBucketDebugPrintRange") + .detail("Key", subspace.key()) + .detail("Count", values.size()) + .detail("Msg", msg); + /*printf("debugPrintRange key: (%d) %s\n", values.size(), printable(subspace.key()).c_str()); for (auto & s : values) { - printf(" key: %-40s value: %s\n", printable(s.key).c_str(), s.value.c_str()); - TraceEvent("TaskBucket").detail("DebugPrintRange", msg) - .detail("Key", s.key) - .detail("Value", s.value); + printf(" key: %-40s value: %s\n", printable(s.key).c_str(), s.value.c_str()); + TraceEvent("TaskBucketDebugPrintKV").detail("Msg", msg) + .detail("Key", s.key) + .detail("Value", s.value); }*/ return Void(); @@ -870,9 +873,9 @@ ACTOR static Future actorAddTask(TaskBucket* tb, Reference validationValue = wait(tr->get(validationKey)); if (!validationValue.present()) { - TraceEvent(SevError, "TB_AddTaskInvalidKey") - .detail("Task", task->params[Task::reservedTaskParamKeyType]) - .detail("ValidationKey", validationKey); + TraceEvent(SevError, "TaskBucketAddTaskInvalidKey") + .detail("Task", task->params[Task::reservedTaskParamKeyType]) + .detail("ValidationKey", validationKey); throw invalid_option_value(); } @@ -1138,9 +1141,9 @@ public: Optional validationValue = wait(tr->get(validationKey)); if (!validationValue.present()) { - TraceEvent(SevError, "TB_OnSetAddTaskInvalidKey") - .detail("Task", task->params[Task::reservedTaskParamKeyType]) - .detail("ValidationKey", validationKey); + TraceEvent(SevError, "TaskBucketOnSetAddTaskInvalidKey") + .detail("Task", task->params[Task::reservedTaskParamKeyType]) + .detail("ValidationKey", validationKey); throw invalid_option_value(); } @@ -1239,6 +1242,6 @@ ACTOR Future getCompletionKey(TaskCompletionKey *self, Future TaskCompletionKey::get(Reference tr, Reference taskBucket) { - ASSERT(key.present() == (joinFuture.getPtr() == NULL)); + ASSERT(key.present() == (joinFuture.getPtr() == nullptr)); return key.present() ? key.get() : getCompletionKey(this, joinFuture->joinedFuture(tr, taskBucket)); } diff --git a/fdbclient/ThreadSafeTransaction.cpp b/fdbclient/ThreadSafeTransaction.cpp index e33bf70f63..b05481b0a4 100644 --- a/fdbclient/ThreadSafeTransaction.cpp +++ b/fdbclient/ThreadSafeTransaction.cpp @@ -84,12 +84,12 @@ ThreadSafeDatabase::ThreadSafeDatabase(std::string connFilename, int apiVersion) catch(...) { new (db) DatabaseContext(unknown_error()); } - }, NULL); + }, nullptr); } ThreadSafeDatabase::~ThreadSafeDatabase() { DatabaseContext *db = this->db; - onMainThreadVoid( [db](){ db->delref(); }, NULL ); + onMainThreadVoid( [db](){ db->delref(); }, nullptr ); } ThreadSafeTransaction::ThreadSafeTransaction(DatabaseContext* cx) { @@ -107,18 +107,18 @@ ThreadSafeTransaction::ThreadSafeTransaction(DatabaseContext* cx) { cx->addref(); new (tr) ReadYourWritesTransaction(Database(cx)); }, - NULL); + nullptr); } ThreadSafeTransaction::~ThreadSafeTransaction() { ReadYourWritesTransaction *tr = this->tr; if (tr) - onMainThreadVoid( [tr](){ tr->delref(); }, NULL ); + onMainThreadVoid( [tr](){ tr->delref(); }, nullptr ); } void ThreadSafeTransaction::cancel() { ReadYourWritesTransaction *tr = this->tr; - onMainThreadVoid( [tr](){ tr->cancel(); }, NULL ); + onMainThreadVoid( [tr](){ tr->cancel(); }, nullptr ); } void ThreadSafeTransaction::setVersion( Version v ) { @@ -328,17 +328,17 @@ ThreadFuture ThreadSafeTransaction::onError( Error const& e ) { void ThreadSafeTransaction::operator=(ThreadSafeTransaction&& r) noexcept { tr = r.tr; - r.tr = NULL; + r.tr = nullptr; } ThreadSafeTransaction::ThreadSafeTransaction(ThreadSafeTransaction&& r) noexcept { tr = r.tr; - r.tr = NULL; + r.tr = nullptr; } void ThreadSafeTransaction::reset() { ReadYourWritesTransaction *tr = this->tr; - onMainThreadVoid( [tr](){ tr->reset(); }, NULL ); + onMainThreadVoid( [tr](){ tr->reset(); }, nullptr ); } extern const char* getSourceVersion(); diff --git a/fdbclient/ThreadSafeTransaction.h b/fdbclient/ThreadSafeTransaction.h index 8e364ed3a3..9b40b03be3 100644 --- a/fdbclient/ThreadSafeTransaction.h +++ b/fdbclient/ThreadSafeTransaction.h @@ -96,7 +96,7 @@ public: ThreadFuture onError( Error const& e ) override; // These are to permit use as state variables in actors: - ThreadSafeTransaction() : tr(NULL) {} + ThreadSafeTransaction() : tr(nullptr) {} void operator=(ThreadSafeTransaction&& r) noexcept; ThreadSafeTransaction(ThreadSafeTransaction&& r) noexcept; diff --git a/fdbclient/VersionedMap.h b/fdbclient/VersionedMap.h index 2208224857..03732d0e34 100644 --- a/fdbclient/VersionedMap.h +++ b/fdbclient/VersionedMap.h @@ -802,7 +802,7 @@ public: void validate() { int count=0, height=0; - PTreeImpl::validate>>( root, at, NULL, NULL, count, height ); + PTreeImpl::validate>>( root, at, nullptr, nullptr, count, height ); if ( height > 100 ) TraceEvent(SevWarnAlways, "DiabolicalPTreeSize").detail("Size", count).detail("Height", height); } diff --git a/fdbclient/vexillographer/fdb.options b/fdbclient/vexillographer/fdb.options index f11956d79c..37e57346ee 100644 --- a/fdbclient/vexillographer/fdb.options +++ b/fdbclient/vexillographer/fdb.options @@ -195,7 +195,7 @@ description is not currently required but encouraged.