Merge remote-tracking branch 'upstream/master' into visibility-1

This commit is contained in:
Lukas Joswiak 2020-10-06 18:38:15 -07:00
commit dea7000970
217 changed files with 4997 additions and 2713 deletions

View File

@ -153,7 +153,7 @@ void fdb_future_destroy( FDBFuture* f ) {
extern "C" DLLEXPORT
fdb_error_t fdb_future_block_until_ready( FDBFuture* f ) {
CATCH_AND_RETURN( TSAVB(f)->blockUntilReady(); );
CATCH_AND_RETURN(TSAVB(f)->blockUntilReadyCheckOnMainThread(););
}
fdb_bool_t fdb_future_is_error_v22( FDBFuture* f ) {
@ -596,7 +596,7 @@ fdb_error_t fdb_transaction_set_option_impl( FDBTransaction* tr,
void fdb_transaction_set_option_v13( FDBTransaction* tr,
FDBTransactionOption option )
{
fdb_transaction_set_option_impl( tr, option, NULL, 0 );
fdb_transaction_set_option_impl( tr, option, nullptr, 0 );
}
extern "C" DLLEXPORT

View File

@ -54,7 +54,8 @@ FILE* debugme; /* descriptor used for debug messages */
int err = wait_future(_f); \
if (err) { \
int err2; \
if ((err != 1020 /* not_committed */) && (err != 1021 /* commit_unknown_result */)) { \
if ((err != 1020 /* not_committed */) && (err != 1021 /* commit_unknown_result */) && \
(err != 1213 /* tag_throttled */)) { \
fprintf(stderr, "ERROR: Error %s (%d) occured at %s\n", #_func, err, fdb_get_error(err)); \
} else { \
fprintf(annoyme, "ERROR: Error %s (%d) occured at %s\n", #_func, err, fdb_get_error(err)); \
@ -698,7 +699,7 @@ retryTxn:
}
int run_workload(FDBTransaction* transaction, mako_args_t* args, int thread_tps, volatile double* throttle_factor,
int thread_iters, volatile int* signal, mako_stats_t* stats, int dotrace, lat_block_t* block[],
int thread_iters, volatile int* signal, mako_stats_t* stats, int dotrace, int dotagging, lat_block_t* block[],
int* elem_size, bool* is_memory_allocated) {
int xacts = 0;
int64_t total_xacts = 0;
@ -710,6 +711,7 @@ int run_workload(FDBTransaction* transaction, mako_args_t* args, int thread_tps,
int current_tps;
char* traceid;
int tracetimer = 0;
char* tagstr;
if (thread_tps < 0) return 0;
@ -717,6 +719,12 @@ int run_workload(FDBTransaction* transaction, mako_args_t* args, int thread_tps,
traceid = (char*)malloc(32);
}
if(dotagging) {
tagstr = (char*)calloc(16, 1);
memcpy(tagstr, KEYPREFIX, KEYPREFIXLEN);
memcpy(tagstr + KEYPREFIXLEN, args->txntagging_prefix, TAGPREFIXLENGTH_MAX);
}
current_tps = (int)((double)thread_tps * *throttle_factor);
keystr = (char*)malloc(sizeof(char) * args->key_length + 1);
@ -774,6 +782,7 @@ int run_workload(FDBTransaction* transaction, mako_args_t* args, int thread_tps,
}
}
} else {
if (thread_tps > 0) {
/* 1 second not passed, throttle */
@ -783,6 +792,17 @@ int run_workload(FDBTransaction* transaction, mako_args_t* args, int thread_tps,
}
} /* throttle or txntrace */
/* enable transaction tagging */
if (dotagging > 0) {
sprintf(tagstr + KEYPREFIXLEN + TAGPREFIXLENGTH_MAX, "%03d", urand(0, args->txntagging - 1));
fdb_error_t err = fdb_transaction_set_option(transaction, FDB_TR_OPTION_AUTO_THROTTLE_TAG,
(uint8_t*)tagstr, 16);
if (err) {
fprintf(stderr, "ERROR: FDB_TR_OPTION_DEBUG_TRANSACTION_IDENTIFIER: %s\n",
fdb_get_error(err));
}
}
rc = run_one_transaction(transaction, args, stats, keystr, keystr2, valstr, block, elem_size,
is_memory_allocated);
if (rc) {
@ -808,6 +828,9 @@ int run_workload(FDBTransaction* transaction, mako_args_t* args, int thread_tps,
if (dotrace) {
free(traceid);
}
if(dotagging) {
free(tagstr);
}
return rc;
}
@ -876,6 +899,7 @@ void* worker_thread(void* thread_args) {
int op;
int i, size;
int dotrace = (worker_id == 0 && thread_id == 0 && args->txntrace) ? args->txntrace : 0;
int dotagging = args->txntagging;
volatile int* signal = &((thread_args_t*)thread_args)->process->shm->signal;
volatile double* throttle_factor = &((thread_args_t*)thread_args)->process->shm->throttle_factor;
volatile int* readycount = &((thread_args_t*)thread_args)->process->shm->readycount;
@ -940,8 +964,8 @@ void* worker_thread(void* thread_args) {
/* run the workload */
else if (args->mode == MODE_RUN) {
rc = run_workload(transaction, args, thread_tps, throttle_factor, thread_iters, signal, stats, dotrace, block,
elem_size, is_memory_allocated);
rc = run_workload(transaction, args, thread_tps, throttle_factor, thread_iters,
signal, stats, dotrace, dotagging, block, elem_size, is_memory_allocated);
if (rc < 0) {
fprintf(stderr, "ERROR: run_workload failed\n");
}
@ -1209,6 +1233,8 @@ int init_args(mako_args_t* args) {
args->tracepath[0] = '\0';
args->traceformat = 0; /* default to client's default (XML) */
args->txntrace = 0;
args->txntagging = 0;
memset(args->txntagging_prefix, 0, TAGPREFIXLENGTH_MAX);
for (i = 0; i < MAX_OP; i++) {
args->txnspec.ops[i][OP_COUNT] = 0;
}
@ -1366,6 +1392,8 @@ void usage() {
printf("%-24s %s\n", " --tracepath=PATH", "Set trace file path");
printf("%-24s %s\n", " --trace_format <xml|json>", "Set trace format (Default: json)");
printf("%-24s %s\n", " --txntrace=sec", "Specify transaction tracing interval (Default: 0)");
printf("%-24s %s\n", " --txntagging", "Specify the number of different transaction tag (Default: 0, max = 1000)");
printf("%-24s %s\n", " --txntagging_prefix", "Specify the prefix of transaction tag - mako${txntagging_prefix} (Default: '')");
printf("%-24s %s\n", " --knobs=KNOBS", "Set client knobs");
printf("%-24s %s\n", " --flatbuffers", "Use flatbuffers");
}
@ -1407,6 +1435,8 @@ int parse_args(int argc, char* argv[], mako_args_t* args) {
{ "commitget", no_argument, NULL, ARG_COMMITGET },
{ "flatbuffers", no_argument, NULL, ARG_FLATBUFFERS },
{ "trace", no_argument, NULL, ARG_TRACE },
{ "txntagging", required_argument, NULL, ARG_TXNTAGGING },
{ "txntagging_prefix", required_argument, NULL, ARG_TXNTAGGINGPREFIX},
{ "version", no_argument, NULL, ARG_VERSION },
{ NULL, 0, NULL, 0 }
};
@ -1522,8 +1552,25 @@ int parse_args(int argc, char* argv[], mako_args_t* args) {
case ARG_TXNTRACE:
args->txntrace = atoi(optarg);
break;
case ARG_TXNTAGGING:
args->txntagging = atoi(optarg);
if(args->txntagging > 1000) {
args->txntagging = 1000;
}
break;
case ARG_TXNTAGGINGPREFIX: {
if(strlen(optarg) > TAGPREFIXLENGTH_MAX) {
fprintf(stderr, "Error: the length of txntagging_prefix is larger than %d\n", TAGPREFIXLENGTH_MAX);
exit(0);
}
memcpy(args->txntagging_prefix, optarg, strlen(optarg));
break;
}
}
}
if ((args->tpsmin == -1) || (args->tpsmin > args->tpsmax)) {
args->tpsmin = args->tpsmax;
}
@ -1580,6 +1627,10 @@ int validate_args(mako_args_t* args) {
fprintf(stderr, "ERROR: Must specify either seconds or iteration\n");
return -1;
}
if(args->txntagging < 0) {
fprintf(stderr, "ERROR: --txntagging must be a non-negative integer\n");
return -1;
}
}
return 0;
}

View File

@ -75,7 +75,9 @@ enum Arguments {
ARG_TPSMIN,
ARG_TPSINTERVAL,
ARG_TPSCHANGE,
ARG_TXNTRACE
ARG_TXNTRACE,
ARG_TXNTAGGING,
ARG_TXNTAGGINGPREFIX
};
enum TPSChangeTypes { TPS_SIN, TPS_SQUARE, TPS_PULSE };
@ -95,6 +97,7 @@ typedef struct {
} mako_txnspec_t;
#define KNOB_MAX 256
#define TAGPREFIXLENGTH_MAX 8
/* benchmark parameters */
typedef struct {
@ -124,6 +127,8 @@ typedef struct {
char knobs[KNOB_MAX];
uint8_t flatbuffers;
int txntrace;
int txntagging;
char txntagging_prefix[TAGPREFIXLENGTH_MAX];
} mako_args_t;
/* shared memory */

View File

@ -157,14 +157,14 @@ namespace FDB {
void cancel() override;
void reset() override;
TransactionImpl() : tr(NULL) {}
TransactionImpl() : tr(nullptr) {}
TransactionImpl(TransactionImpl&& r) noexcept {
tr = r.tr;
r.tr = NULL;
r.tr = nullptr;
}
TransactionImpl& operator=(TransactionImpl&& r) noexcept {
tr = r.tr;
r.tr = NULL;
r.tr = nullptr;
return *this;
}
@ -207,10 +207,10 @@ namespace FDB {
if ( value.present() )
throw_on_error( fdb_network_set_option( option, value.get().begin(), value.get().size() ) );
else
throw_on_error( fdb_network_set_option( option, NULL, 0 ) );
throw_on_error( fdb_network_set_option( option, nullptr, 0 ) );
}
API* API::instance = NULL;
API* API::instance = nullptr;
API::API(int version) : version(version) {}
API* API::selectAPIVersion(int apiVersion) {
@ -234,11 +234,11 @@ namespace FDB {
}
bool API::isAPIVersionSelected() {
return API::instance != NULL;
return API::instance != nullptr;
}
API* API::getInstance() {
if(API::instance == NULL) {
if(API::instance == nullptr) {
throw api_version_unset();
}
else {
@ -280,7 +280,7 @@ namespace FDB {
if (value.present())
throw_on_error(fdb_database_set_option(db, option, value.get().begin(), value.get().size()));
else
throw_on_error(fdb_database_set_option(db, option, NULL, 0));
throw_on_error(fdb_database_set_option(db, option, nullptr, 0));
}
TransactionImpl::TransactionImpl(FDBDatabase* db) {
@ -417,7 +417,7 @@ namespace FDB {
if ( value.present() ) {
throw_on_error( fdb_transaction_set_option( tr, option, value.get().begin(), value.get().size() ) );
} else {
throw_on_error( fdb_transaction_set_option( tr, option, NULL, 0 ) );
throw_on_error( fdb_transaction_set_option( tr, option, nullptr, 0 ) );
}
}

View File

@ -31,7 +31,7 @@
namespace FDB {
struct CFuture : NonCopyable, ReferenceCounted<CFuture>, FastAllocated<CFuture> {
CFuture() : f(NULL) {}
CFuture() : f(nullptr) {}
explicit CFuture(FDBFuture* f) : f(f) {}
~CFuture() {
if (f) {

View File

@ -78,8 +78,9 @@ type Subspace interface {
// FoundationDB keys (corresponding to the prefix of this Subspace).
fdb.KeyConvertible
// All Subspaces implement fdb.ExactRange and fdb.Range, and describe all
// keys logically in this Subspace.
// All Subspaces implement fdb.ExactRange and fdb.Range, and describe all
// keys strictly within the subspace that encode tuples. Specifically,
// this will include all keys in [prefix + '\x00', prefix + '\xff').
fdb.ExactRange
}

View File

@ -1,3 +1,6 @@
set(RUN_JAVA_TESTS ON CACHE BOOL "Run Java unit tests")
set(RUN_JUNIT_TESTS OFF CACHE BOOL "Compile and run junit tests")
set(JAVA_BINDING_SRCS
src/main/com/apple/foundationdb/async/AsyncIterable.java
src/main/com/apple/foundationdb/async/AsyncIterator.java
@ -102,6 +105,10 @@ set(JAVA_TESTS_SRCS
src/test/com/apple/foundationdb/test/WatchTest.java
src/test/com/apple/foundationdb/test/WhileTrueTest.java)
set(JAVA_JUNIT_TESTS
src/junit/com/apple/foundationdb/tuple/AllTests.java
src/junit/com/apple/foundationdb/tuple/ArrayUtilTests.java)
set(GENERATED_JAVA_DIR ${CMAKE_CURRENT_BINARY_DIR}/src/main/com/apple/foundationdb)
file(MAKE_DIRECTORY ${GENERATED_JAVA_DIR})
@ -173,12 +180,6 @@ add_jar(fdb-java ${JAVA_BINDING_SRCS} ${GENERATED_JAVA_FILES} ${CMAKE_SOURCE_DIR
OUTPUT_DIR ${PROJECT_BINARY_DIR}/lib VERSION ${CMAKE_PROJECT_VERSION} MANIFEST ${MANIFEST_FILE})
add_dependencies(fdb-java fdb_java_options fdb_java)
# TODO[mpilman]: The java RPM will require some more effort (mostly on debian). However,
# most people will use the fat-jar, so it is not clear how high this priority is.
#install_jar(fdb-java DESTINATION ${FDB_SHARE_DIR}/java COMPONENT java)
#install(TARGETS fdb_java DESTINATION ${FDB_LIB_DIR} COMPONENT java)
if(NOT OPEN_FOR_IDE)
set(FAT_JAR_BINARIES "NOTFOUND" CACHE STRING
"Path of a directory structure with libraries to include in fat jar (a lib directory)")
@ -252,4 +253,30 @@ if(NOT OPEN_FOR_IDE)
add_dependencies(fat-jar fdb-java)
add_dependencies(fat-jar copy_lib)
add_dependencies(packages fat-jar)
if(RUN_JAVA_TESTS)
set(enabled ENABLED)
else()
set(enabled DISABLED)
endif()
set(TEST_CP ${tests_jar} ${target_jar})
message(STATUS "TEST_CP ${TEST_CP}")
add_java_test(NAME DirectoryTest CLASS_PATH ${TEST_CP}
CLASS com.apple.foundationdb.test.DirectoryTest ${enabled})
if(RUN_JUNIT_TESTS)
file(DOWNLOAD "https://search.maven.org/remotecontent?filepath=junit/junit/4.13/junit-4.13.jar"
${CMAKE_BINARY_DIR}/packages/junit-4.13.jar
EXPECTED_HASH SHA256=4b8532f63bdc0e0661507f947eb324a954d1dbac631ad19c8aa9a00feed1d863)
file(DOWNLOAD "https://repo1.maven.org/maven2/org/hamcrest/hamcrest-all/1.3/hamcrest-all-1.3.jar"
${CMAKE_BINARY_DIR}/packages/hamcrest-all-1.3.jar
EXPECTED_HASH SHA256=4877670629ab96f34f5f90ab283125fcd9acb7e683e66319a68be6eb2cca60de)
add_jar(fdb-junit SOURCES ${JAVA_JUNIT_TESTS} INCLUDE_JARS fdb-java ${CMAKE_BINARY_DIR}/packages/junit-4.13.jar)
get_property(junit_jar_path TARGET fdb-junit PROPERTY JAR_FILE)
add_test(NAME junit
COMMAND ${Java_JAVA_EXECUTABLE}
-cp "${target_jar}:${junit_jar_path}:${CMAKE_BINARY_DIR}/packages/junit-4.13.jar:${CMAKE_BINARY_DIR}/packages/hamcrest-all-1.3.jar"
-Djava.library.path=${CMAKE_BINARY_DIR}/lib
org.junit.runner.JUnitCore "com.apple.foundationdb.tuple.AllTests")
endif()
endif()

View File

@ -1089,13 +1089,13 @@ void JNI_OnUnload(JavaVM *vm, void *reserved) {
return;
} else {
// delete global references so the GC can collect them
if (range_result_summary_class != NULL) {
if (range_result_summary_class != JNI_NULL) {
env->DeleteGlobalRef(range_result_summary_class);
}
if (range_result_class != NULL) {
if (range_result_class != JNI_NULL) {
env->DeleteGlobalRef(range_result_class);
}
if (string_class != NULL) {
if (string_class != JNI_NULL) {
env->DeleteGlobalRef(string_class);
}
}

View File

@ -27,9 +27,14 @@ import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.Ignore;
/**
* @author Ben
@ -251,7 +256,7 @@ public class ArrayUtilTests {
/**
* Test method for {@link ByteArrayUtil#bisectLeft(java.math.BigInteger[], java.math.BigInteger)}.
*/
@Test
@Test @Ignore
public void testBisectLeft() {
fail("Not yet implemented");
}
@ -259,7 +264,7 @@ public class ArrayUtilTests {
/**
* Test method for {@link ByteArrayUtil#compareUnsigned(byte[], byte[])}.
*/
@Test
@Test @Ignore
public void testCompare() {
fail("Not yet implemented");
}
@ -267,7 +272,7 @@ public class ArrayUtilTests {
/**
* Test method for {@link ByteArrayUtil#findNext(byte[], byte, int)}.
*/
@Test
@Test @Ignore
public void testFindNext() {
fail("Not yet implemented");
}
@ -275,7 +280,7 @@ public class ArrayUtilTests {
/**
* Test method for {@link ByteArrayUtil#findTerminator(byte[], byte, byte, int)}.
*/
@Test
@Test @Ignore
public void testFindTerminator() {
fail("Not yet implemented");
}
@ -283,7 +288,7 @@ public class ArrayUtilTests {
/**
* Test method for {@link ByteArrayUtil#copyOfRange(byte[], int, int)}.
*/
@Test
@Test @Ignore
public void testCopyOfRange() {
fail("Not yet implemented");
}
@ -291,7 +296,7 @@ public class ArrayUtilTests {
/**
* Test method for {@link ByteArrayUtil#strinc(byte[])}.
*/
@Test
@Test @Ignore
public void testStrinc() {
fail("Not yet implemented");
}
@ -299,7 +304,7 @@ public class ArrayUtilTests {
/**
* Test method for {@link ByteArrayUtil#printable(byte[])}.
*/
@Test
@Test @Ignore
public void testPrintable() {
fail("Not yet implemented");
}

View File

@ -34,7 +34,7 @@ public class DirectoryTest {
public static void main(String[] args) throws Exception {
try {
FDB fdb = FDB.selectAPIVersion(700);
try(Database db = fdb.open()) {
try(Database db = args.length > 0 ? fdb.open(args[0]) : fdb.open()) {
runTests(db);
}
}

View File

@ -45,13 +45,13 @@ RUN cd /tmp && curl -L https://github.com/ninja-build/ninja/archive/v1.9.0.zip -
cd .. && rm -rf ninja-1.9.0 ninja.zip
# install openssl
RUN cd /tmp && curl -L https://www.openssl.org/source/openssl-1.1.1d.tar.gz -o openssl.tar.gz &&\
echo "1e3a91bc1f9dfce01af26026f856e064eab4c8ee0a8f457b5ae30b40b8b711f2 openssl.tar.gz" > openssl-sha.txt &&\
RUN cd /tmp && curl -L https://www.openssl.org/source/openssl-1.1.1h.tar.gz -o openssl.tar.gz &&\
echo "5c9ca8774bd7b03e5784f26ae9e9e6d749c9da2438545077e6b3d755a06595d9 openssl.tar.gz" > openssl-sha.txt &&\
sha256sum -c openssl-sha.txt && tar -xzf openssl.tar.gz &&\
cd openssl-1.1.1d && scl enable devtoolset-8 -- ./config CFLAGS="-fPIC -O3" --prefix=/usr/local &&\
cd openssl-1.1.1h && scl enable devtoolset-8 -- ./config CFLAGS="-fPIC -O3" --prefix=/usr/local &&\
scl enable devtoolset-8 -- make -j`nproc` && scl enable devtoolset-8 -- make -j1 install &&\
ln -sv /usr/local/lib64/lib*.so.1.1 /usr/lib64/ &&\
cd /tmp/ && rm -rf /tmp/openssl-1.1.1d /tmp/openssl.tar.gz
cd /tmp/ && rm -rf /tmp/openssl-1.1.1h /tmp/openssl.tar.gz
RUN cd /opt/ && curl -L https://github.com/facebook/rocksdb/archive/v6.10.1.tar.gz -o rocksdb.tar.gz &&\
echo "d573d2f15cdda883714f7e0bc87b814a8d4a53a82edde558f08f940e905541ee rocksdb.tar.gz" > rocksdb-sha.txt &&\
@ -61,8 +61,8 @@ RUN cd /opt/ && curl -L https://github.com/facebook/rocksdb/archive/v6.10.1.tar.
ARG TIMEZONEINFO=America/Los_Angeles
RUN rm -f /etc/localtime && ln -s /usr/share/zoneinfo/${TIMEZONEINFO} /etc/localtime
LABEL version=0.1.15
ENV DOCKER_IMAGEVER=0.1.15
LABEL version=0.1.17
ENV DOCKER_IMAGEVER=0.1.17
ENV JAVA_HOME=/usr/lib/jvm/java-1.8.0
ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc
ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/g++

View File

@ -1,4 +1,4 @@
FROM foundationdb/foundationdb-build:0.1.15
FROM foundationdb/foundationdb-build:0.1.17
USER root

View File

@ -2,7 +2,7 @@ version: "3"
services:
common: &common
image: foundationdb/foundationdb-build:0.1.15
image: foundationdb/foundationdb-build:0.1.17
build-setup: &build-setup
<<: *common

View File

@ -363,3 +363,60 @@ function(package_bindingtester)
add_custom_target(bindingtester ALL DEPENDS ${tar_file})
add_dependencies(bindingtester copy_bindingtester_binaries)
endfunction()
function(add_fdbclient_test)
set(options DISABLED ENABLED)
set(oneValueArgs NAME)
set(multiValueArgs COMMAND)
cmake_parse_arguments(T "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")
if(NOT T_ENABLED AND T_DISABLED)
return()
endif()
if(NOT T_NAME)
message(FATAL_ERROR "NAME is a required argument for add_fdbclient_test")
endif()
if(NOT T_COMMAND)
message(FATAL_ERROR "COMMAND is a required argument for add_fdbclient_test")
endif()
message(STATUS "Adding Client test ${T_NAME}")
add_test(NAME "${T_NAME}"
COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/tmp_cluster.py
--build-dir ${CMAKE_BINARY_DIR}
--
${T_COMMAND})
endfunction()
function(add_java_test)
set(options DISABLED ENABLED)
set(oneValueArgs NAME CLASS)
set(multiValueArgs CLASS_PATH)
cmake_parse_arguments(T "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")
if(NOT T_ENABLED AND T_DISABLED)
return()
endif()
if(NOT T_NAME)
message(FATAL_ERROR "NAME is a required argument for add_fdbclient_test")
endif()
if(NOT T_CLASS)
message(FATAL_ERROR "CLASS is a required argument for add_fdbclient_test")
endif()
set(cp "")
set(separator ":")
if (WIN32)
set(separator ";")
endif()
message(STATUS "CLASSPATH ${T_CLASS_PATH}")
foreach(path ${T_CLASS_PATH})
if(cp)
set(cp "${cp}${separator}${path}")
else()
set(cp "${path}")
endif()
endforeach()
add_fdbclient_test(
NAME ${T_NAME}
COMMAND ${Java_JAVA_EXECUTABLE}
-cp "${cp}"
-Djava.library.path=${CMAKE_BINARY_DIR}/lib
${T_CLASS} "@CLUSTER_FILE@")
endfunction()

View File

@ -59,11 +59,14 @@ else()
set(ROCKSDB_LIBRARIES
${BINARY_DIR}/librocksdb.a)
ExternalProject_Get_Property(rocksdb SOURCE_DIR)
set (ROCKSDB_INCLUDE_DIR "${SOURCE_DIR}/include")
set(ROCKSDB_FOUND TRUE)
endif()
message(STATUS "Found RocksDB library: ${ROCKSDB_LIBRARIES}")
message(STATUS "Found RocksDB includes: ${ROCKSDB_INCLUDE_DIRS}")
message(STATUS "Found RocksDB includes: ${ROCKSDB_INCLUDE_DIR}")
mark_as_advanced(
ROCKSDB_LIBRARIES

View File

@ -12,7 +12,7 @@ endif()
# SSL
################################################################################
include(CheckSymbolExists)
set(DISABLE_TLS OFF CACHE BOOL "Don't try to find OpenSSL and always build without TLS support")
if(DISABLE_TLS)
set(WITH_TLS OFF)
@ -107,7 +107,9 @@ endif()
################################################################################
set(SSD_ROCKSDB_EXPERIMENTAL OFF CACHE BOOL "Build with experimental RocksDB support")
if (SSD_ROCKSDB_EXPERIMENTAL)
# RocksDB is currently enabled by default for GCC but does not build with the latest
# Clang.
if (SSD_ROCKSDB_EXPERIMENTAL OR GCC)
set(WITH_ROCKSDB_EXPERIMENTAL ON)
else()
set(WITH_ROCKSDB_EXPERIMENTAL OFF)

View File

@ -1,6 +1,5 @@
#!/bin/bash
SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
pkill fdbserver
ulimit -S -c unlimited
unset FDB_NETWORK_OPTION_EXTERNAL_CLIENT_DIRECTORY
@ -8,4 +7,4 @@ WORKDIR="$(pwd)/tmp/$$"
if [ ! -d "${WORKDIR}" ] ; then
mkdir -p "${WORKDIR}"
fi
DEBUGLEVEL=0 DISPLAYERROR=1 RANDOMTEST=1 WORKDIR="${WORKDIR}" FDBSERVERPORT="${PORT_FDBSERVER:-4500}" ${SCRIPTDIR}/bindingTestScript.sh 1
DEBUGLEVEL=0 DISPLAYERROR=1 RANDOMTEST=1 WORKDIR="${WORKDIR}" ${SCRIPTDIR}/bindingTestScript.sh 1

View File

@ -7,7 +7,7 @@ SCRIPTID="${$}"
SAVEONERROR="${SAVEONERROR:-1}"
PYTHONDIR="${BINDIR}/tests/python"
testScript="${BINDIR}/tests/bindingtester/run_binding_tester.sh"
VERSION="1.6"
VERSION="1.9"
source ${SCRIPTDIR}/localClusterStart.sh
@ -23,19 +23,22 @@ cycles="${1}"
if [ "${DEBUGLEVEL}" -gt 0 ]
then
echo "Work dir: ${WORKDIR}"
echo "Bin dir: ${BINDIR}"
echo "Log dir: ${LOGDIR}"
echo "Python path: ${PYTHONDIR}"
echo "Lib dir: ${LIBDIR}"
echo "Server port: ${FDBSERVERPORT}"
echo "Script Id: ${SCRIPTID}"
echo "Version: ${VERSION}"
echo "Work dir: ${WORKDIR}"
echo "Bin dir: ${BINDIR}"
echo "Log dir: ${LOGDIR}"
echo "Python path: ${PYTHONDIR}"
echo "Lib dir: ${LIBDIR}"
echo "Cluster String: ${FDBCLUSTERTEXT}"
echo "Script Id: ${SCRIPTID}"
echo "Version: ${VERSION}"
fi
# Begin the cluster using the logic in localClusterStart.sh.
startCluster
# Stop the cluster on exit
trap "stopCluster" EXIT
# Display user message
if [ "${status}" -ne 0 ]; then
:
@ -58,8 +61,8 @@ fi
# Display directory and log information, if an error occurred
if [ "${status}" -ne 0 ]
then
ls "${WORKDIR}" > "${LOGDIR}/dir.log"
ps -eafw > "${LOGDIR}/process-preclean.log"
ls "${WORKDIR}" &> "${LOGDIR}/dir.log"
ps -eafwH &> "${LOGDIR}/process-preclean.log"
if [ -f "${FDBCONF}" ]; then
cp -f "${FDBCONF}" "${LOGDIR}/"
fi
@ -71,10 +74,15 @@ fi
# Save debug information files, environment, and log information, if an error occurred
if [ "${status}" -ne 0 ] && [ "${SAVEONERROR}" -gt 0 ]; then
ps -eafw > "${LOGDIR}/process-exit.log"
netstat -na > "${LOGDIR}/netstat.log"
df -h > "${LOGDIR}/disk.log"
env > "${LOGDIR}/env.log"
ps -eafwH &> "${LOGDIR}/process-exit.log"
netstat -na &> "${LOGDIR}/netstat.log"
df -h &> "${LOGDIR}/disk.log"
env &> "${LOGDIR}/env.log"
fi
# Stop the cluster
if stopCluster; then
unset FDBSERVERID
fi
exit "${status}"

View File

@ -5,311 +5,398 @@ WORKDIR="${WORKDIR:-${SCRIPTDIR}/tmp/fdb.work}"
LOGDIR="${WORKDIR}/log"
ETCDIR="${WORKDIR}/etc"
BINDIR="${BINDIR:-${SCRIPTDIR}}"
FDBSERVERPORT="${FDBSERVERPORT:-4500}"
FDBPORTSTART="${FDBPORTSTART:-4000}"
FDBPORTTOTAL="${FDBPORTTOTAL:-1000}"
SERVERCHECKS="${SERVERCHECKS:-10}"
CONFIGUREWAIT="${CONFIGUREWAIT:-240}"
FDBCONF="${ETCDIR}/fdb.cluster"
LOGFILE="${LOGFILE:-${LOGDIR}/startcluster.log}"
AUDITCLUSTER="${AUDITCLUSTER:-0}"
AUDITLOG="${AUDITLOG:-/tmp/audit-cluster.log}"
# Initialize the variables
status=0
messagetime=0
messagecount=0
# Do nothing, if cluster string is already defined
if [ -n "${FDBCLUSTERTEXT}" ]
then
:
# Otherwise, define the cluster text
else
# Define a random ip address and port on localhost
if [ -z "${IPADDRESS}" ]; then
let index2="${RANDOM} % 256"
let index3="${RANDOM} % 256"
let index4="(${RANDOM} % 255) + 1"
IPADDRESS="127.${index2}.${index3}.${index4}"
fi
if [ -z "${FDBPORT}" ]; then
let FDBPORT="(${RANDOM} % ${FDBPORTTOTAL}) + ${FDBPORTSTART}"
fi
FDBCLUSTERTEXT="${IPADDRESS}:${FDBPORT}"
fi
function log
{
local status=0
if [ "$#" -lt 1 ]
then
echo "Usage: log <message> [echo]"
echo
echo "Logs the message and timestamp to LOGFILE (${LOGFILE}) and, if the"
echo "second argument is either not present or is set to 1, stdout."
let status="${status} + 1"
else
# Log to stdout.
if [ "$#" -lt 2 ] || [ "${2}" -ge 1 ]
then
echo "${1}"
fi
local status=0
if [ "$#" -lt 1 ]
then
echo "Usage: log <message> [echo]"
echo
echo "Logs the message and timestamp to LOGFILE (${LOGFILE}) and, if the"
echo "second argument is either not present or is set to 1, stdout."
let status="${status} + 1"
else
# Log to stdout.
if [ "$#" -lt 2 ] || [ "${2}" -ge 1 ]
then
echo "${1}"
fi
# Log to file.
datestr=$(date +"%Y-%m-%d %H:%M:%S (%s)")
dir=$(dirname "${LOGFILE}")
if ! [ -d "${dir}" ] && ! mkdir -p "${dir}"
then
echo "Could not create directory to log output."
let status="${status} + 1"
elif ! [ -f "${LOGFILE}" ] && ! touch "${LOGFILE}"
then
echo "Could not create file ${LOGFILE} to log output."
let status="${status} + 1"
elif ! echo "[ ${datestr} ] ${1}" >> "${LOGFILE}"
then
echo "Could not log output to ${LOGFILE}."
let status="${status} + 1"
fi
fi
# Log to file.
datestr=$(date +"%Y-%m-%d %H:%M:%S (%s)")
dir=$(dirname "${LOGFILE}")
if ! [ -d "${dir}" ] && ! mkdir -p "${dir}"
then
echo "Could not create directory to log output."
let status="${status} + 1"
elif ! [ -f "${LOGFILE}" ] && ! touch "${LOGFILE}"
then
echo "Could not create file ${LOGFILE} to log output."
let status="${status} + 1"
elif ! echo "[ ${datestr} ] ${1}" >> "${LOGFILE}"
then
echo "Could not log output to ${LOGFILE}."
let status="${status} + 1"
fi
fi
return "${status}"
return "${status}"
}
# Display a message for the user.
function displayMessage
{
local status=0
local status=0
if [ "$#" -lt 1 ]
then
echo "displayMessage <message>"
let status="${status} + 1"
elif ! log "${1}" 0
then
log "Could not write message to file."
else
# Increment the message counter
let messagecount="${messagecount} + 1"
if [ "$#" -lt 1 ]
then
echo "displayMessage <message>"
let status="${status} + 1"
elif ! log "${1}" 0
then
log "Could not write message to file."
else
# Increment the message counter
let messagecount="${messagecount} + 1"
# Display successful message, if previous message
if [ "${messagecount}" -gt 1 ]
then
# Determine the amount of transpired time
let timespent="${SECONDS}-${messagetime}"
# Display successful message, if previous message
if [ "${messagecount}" -gt 1 ]
then
# Determine the amount of transpired time
let timespent="${SECONDS}-${messagetime}"
if [ "${DEBUGLEVEL}" -gt 0 ]; then
printf "... done in %3d seconds\n" "${timespent}"
fi
fi
if [ "${DEBUGLEVEL}" -gt 0 ]; then
printf "... done in %3d seconds\n" "${timespent}"
fi
fi
# Display message
if [ "${DEBUGLEVEL}" -gt 0 ]; then
printf "%-16s %-35s " "$(date "+%F %H-%M-%S")" "$1"
fi
# Display message
if [ "${DEBUGLEVEL}" -gt 0 ]; then
printf "%-16s %-35s " "$(date "+%F %H-%M-%S")" "$1"
fi
# Update the variables
messagetime="${SECONDS}"
fi
# Update the variables
messagetime="${SECONDS}"
fi
return "${status}"
return "${status}"
}
# Create the directories used by the server.
function createDirectories {
# Display user message
if ! displayMessage "Creating directories"
then
echo 'Failed to display user message'
let status="${status} + 1"
elif ! mkdir -p "${LOGDIR}" "${ETCDIR}"
then
log "Failed to create directories"
let status="${status} + 1"
# Display user message
elif ! displayMessage "Setting file permissions"
then
log 'Failed to display user message'
let status="${status} + 1"
elif ! chmod 755 "${BINDIR}/fdbserver" "${BINDIR}/fdbcli"
then
log "Failed to set file permissions"
let status="${status} + 1"
else
while read filepath
do
if [ -f "${filepath}" ] && [ ! -x "${filepath}" ]
then
# if [ "${DEBUGLEVEL}" -gt 1 ]; then
# log " Enable executable: ${filepath}"
# fi
log " Enable executable: ${filepath}" "${DEBUGLEVEL}"
if ! chmod 755 "${filepath}"
then
log "Failed to set executable for file: ${filepath}"
let status="${status} + 1"
fi
fi
done < <(find "${BINDIR}" -iname '*.py' -o -iname '*.rb' -o -iname 'fdb_flow_tester' -o -iname '_stacktester' -o -iname '*.js' -o -iname '*.sh' -o -iname '*.ksh')
fi
function createDirectories
{
local status=0
return ${status}
# Display user message
if ! displayMessage "Creating directories"
then
echo 'Failed to display user message'
let status="${status} + 1"
elif ! mkdir -p "${LOGDIR}" "${ETCDIR}"
then
log "Failed to create directories"
let status="${status} + 1"
# Display user message
elif ! displayMessage "Setting file permissions"
then
log 'Failed to display user message'
let status="${status} + 1"
elif ! chmod 755 "${BINDIR}/fdbserver" "${BINDIR}/fdbcli"
then
log "Failed to set file permissions"
let status="${status} + 1"
else
while read filepath
do
if [ -f "${filepath}" ] && [ ! -x "${filepath}" ]
then
# if [ "${DEBUGLEVEL}" -gt 1 ]; then
# log " Enable executable: ${filepath}"
# fi
log " Enable executable: ${filepath}" "${DEBUGLEVEL}"
if ! chmod 755 "${filepath}"
then
log "Failed to set executable for file: ${filepath}"
let status="${status} + 1"
fi
fi
done < <(find "${BINDIR}" -iname '*.py' -o -iname '*.rb' -o -iname 'fdb_flow_tester' -o -iname '_stacktester' -o -iname '*.js' -o -iname '*.sh' -o -iname '*.ksh')
fi
return ${status}
}
# Create a cluster file for the local cluster.
function createClusterFile {
if [ "${status}" -ne 0 ]; then
:
# Display user message
elif ! displayMessage "Creating Fdb Cluster file"
then
log 'Failed to display user message'
let status="${status} + 1"
else
description=$(LC_CTYPE=C tr -dc A-Za-z0-9 < /dev/urandom 2> /dev/null | head -c 8)
random_str=$(LC_CTYPE=C tr -dc A-Za-z0-9 < /dev/urandom 2> /dev/null | head -c 8)
echo "$description:$random_str@127.0.0.1:${FDBSERVERPORT}" > "${FDBCONF}"
fi
function createClusterFile
{
local status=0
if [ "${status}" -ne 0 ]; then
:
elif ! chmod 0664 "${FDBCONF}"; then
log "Failed to set permissions on fdbconf: ${FDBCONF}"
let status="${status} + 1"
fi
if [ "${status}" -ne 0 ]; then
:
# Display user message
elif ! displayMessage "Creating Fdb Cluster file"
then
log 'Failed to display user message'
let status="${status} + 1"
else
description=$(LC_CTYPE=C tr -dc A-Za-z0-9 < /dev/urandom 2> /dev/null | head -c 8)
random_str=$(LC_CTYPE=C tr -dc A-Za-z0-9 < /dev/urandom 2> /dev/null | head -c 8)
echo "${description}:${random_str}@${FDBCLUSTERTEXT}" > "${FDBCONF}"
fi
return ${status}
if [ "${status}" -ne 0 ]; then
:
elif ! chmod 0664 "${FDBCONF}"; then
log "Failed to set permissions on fdbconf: ${FDBCONF}"
let status="${status} + 1"
fi
return ${status}
}
# Stop the Cluster from running.
function stopCluster
{
local status=0
# Add an audit entry, if enabled
if [ "${AUDITCLUSTER}" -gt 0 ]; then
printf '%-15s (%6s) Stopping cluster %-20s (%6s): %s\n' "$(date +'%Y-%m-%d %H:%M:%S')" "${$}" "${FDBCLUSTERTEXT}" "${FDBSERVERID}" >> "${AUDITLOG}"
fi
if [ -z "${FDBSERVERID}" ]; then
log 'FDB Server process is not defined'
let status="${status} + 1"
elif ! kill -0 "${FDBSERVERID}"; then
log "Failed to locate FDB Server process (${FDBSERVERID})"
let status="${status} + 1"
elif "${BINDIR}/fdbcli" -C "${FDBCONF}" --exec "kill; kill ${FDBCLUSTERTEXT}; sleep 3" --timeout 120 &>> "${LOGDIR}/fdbcli-kill.log"
then
# Ensure that process is dead
if ! kill -0 "${FDBSERVERID}" 2> /dev/null; then
log "Killed cluster (${FDBSERVERID}) via cli"
elif ! kill -9 "${FDBSERVERID}"; then
log "Failed to kill FDB Server process (${FDBSERVERID}) via cli or kill command"
let status="${status} + 1"
else
log "Forcibly killed FDB Server process (${FDBSERVERID}) since cli failed"
fi
elif ! kill -9 "${FDBSERVERID}"; then
log "Failed to forcibly kill FDB Server process (${FDBSERVERID})"
let status="${status} + 1"
else
log "Forcibly killed FDB Server process (${FDBSERVERID})"
fi
return "${status}"
}
# Start the server running.
function startFdbServer {
if [ "${status}" -ne 0 ]; then
:
elif ! displayMessage "Starting Fdb Server"
then
log 'Failed to display user message'
let status="${status} + 1"
function startFdbServer
{
local status=0
elif ! "${BINDIR}/fdbserver" -C "${FDBCONF}" -p "auto:${FDBSERVERPORT}" -L "${LOGDIR}" -d "${WORKDIR}/fdb/$$" &> "${LOGDIR}/fdbserver.log" &
then
log "Failed to start FDB Server"
# Maybe the server is already running
FDBSERVERID="$(pidof fdbserver)"
let status="${status} + 1"
else
FDBSERVERID="${!}"
fi
# Add an audit entry, if enabled
if [ "${AUDITCLUSTER}" -gt 0 ]; then
printf '%-15s (%6s) Starting cluster %-20s\n' "$(date +'%Y-%m-%d %H:%M:%S')" "${$}" "${FDBCLUSTERTEXT}" >> "${AUDITLOG}"
fi
if ! kill -0 ${FDBSERVERID} ; then
log "FDB Server start failed."
let status="${status} + 1"
fi
if ! displayMessage "Starting Fdb Server"
then
log 'Failed to display user message'
let status="${status} + 1"
return ${status}
else
"${BINDIR}/fdbserver" --knob_disable_posix_kernel_aio=1 -C "${FDBCONF}" -p "${FDBCLUSTERTEXT}" -L "${LOGDIR}" -d "${WORKDIR}/fdb/${$}" &> "${LOGDIR}/fdbserver.log" &
if [ "${?}" -ne 0 ]
then
log "Failed to start FDB Server"
let status="${status} + 1"
else
FDBSERVERID="${!}"
fi
fi
if [ -z "${FDBSERVERID}" ]; then
log "FDB Server start failed because no process"
let status="${status} + 1"
elif ! kill -0 "${FDBSERVERID}" ; then
log "FDB Server start failed because process terminated unexpectedly"
let status="${status} + 1"
fi
return ${status}
}
function getStatus {
if [ "${status}" -ne 0 ]; then
:
elif ! date &>> "${LOGDIR}/fdbclient.log"
then
log 'Failed to get date'
let status="${status} + 1"
elif ! "${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'status json' --timeout 120 &>> "${LOGDIR}/fdbclient.log"
then
log 'Failed to get status from fdbcli'
let status="${status} + 1"
elif ! date &>> "${LOGDIR}/fdbclient.log"
then
log 'Failed to get date'
let status="${status} + 1"
fi
function getStatus
{
local status=0
return ${status}
if [ "${status}" -ne 0 ]; then
:
elif ! date &>> "${LOGDIR}/fdbclient.log"
then
log 'Failed to get date'
let status="${status} + 1"
elif ! "${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'status json' --timeout 120 &>> "${LOGDIR}/fdbclient.log"
then
log 'Failed to get status from fdbcli'
let status="${status} + 1"
elif ! date &>> "${LOGDIR}/fdbclient.log"
then
log 'Failed to get date'
let status="${status} + 1"
fi
return ${status}
}
# Verify that the cluster is available.
function verifyAvailable {
# Verify that the server is running.
if ! kill -0 "${FDBSERVERID}"
then
log "FDB server process (${FDBSERVERID}) is not running"
let status="${status} + 1"
return 1
function verifyAvailable
{
local status=0
# Display user message.
elif ! displayMessage "Checking cluster availability"
then
log 'Failed to display user message'
let status="${status} + 1"
return 1
# Determine if status json says the database is available.
else
avail=`"${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'status json' --timeout 10 2> /dev/null | grep -E '"database_available"|"available"' | grep 'true'`
log "Avail value: ${avail}" "${DEBUGLEVEL}"
if [[ -n "${avail}" ]] ; then
return 0
else
return 1
fi
fi
if [ -z "${FDBSERVERID}" ]; then
log "FDB Server process is not defined."
let status="${status} + 1"
# Verify that the server is running.
elif ! kill -0 "${FDBSERVERID}"
then
log "FDB server process (${FDBSERVERID}) is not running"
let status="${status} + 1"
# Display user message.
elif ! displayMessage "Checking cluster availability"
then
log 'Failed to display user message'
let status="${status} + 1"
# Determine if status json says the database is available.
else
avail=`"${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'status json' --timeout "${SERVERCHECKS}" 2> /dev/null | grep -E '"database_available"|"available"' | grep 'true'`
log "Avail value: ${avail}" "${DEBUGLEVEL}"
if [[ -n "${avail}" ]] ; then
:
else
let status="${status} + 1"
fi
fi
return "${status}"
}
# Configure the database on the server.
function createDatabase {
if [ "${status}" -ne 0 ]; then
:
# Ensure that the server is running
elif ! kill -0 "${FDBSERVERID}"
then
log "FDB server process: (${FDBSERVERID}) is not running"
let status="${status} + 1"
function createDatabase
{
local status=0
# Display user message
elif ! displayMessage "Creating database"
then
log 'Failed to display user message'
let status="${status} + 1"
elif ! echo "Client log:" &> "${LOGDIR}/fdbclient.log"
then
log 'Failed to create fdbclient.log'
let status="${status} + 1"
elif ! getStatus
then
log 'Failed to get status'
let status="${status} + 1"
if [ "${status}" -ne 0 ]; then
:
# Ensure that the server is running
elif ! kill -0 "${FDBSERVERID}"
then
log "FDB server process: (${FDBSERVERID}) is not running"
let status="${status} + 1"
# Configure the database.
else
"${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'configure new single memory; status' --timeout 240 --log --log-dir "${LOGDIR}" &>> "${LOGDIR}/fdbclient.log"
# Display user message
elif ! displayMessage "Creating database"
then
log 'Failed to display user message'
let status="${status} + 1"
elif ! echo "Client log:" &> "${LOGDIR}/fdbclient.log"
then
log 'Failed to create fdbclient.log'
let status="${status} + 1"
elif ! getStatus
then
log 'Failed to get status'
let status="${status} + 1"
if ! displayMessage "Checking if config succeeded"
then
log 'Failed to display user message.'
fi
# Configure the database.
else
"${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'configure new single memory; status' --timeout "${CONFIGUREWAIT}" --log --log-dir "${LOGDIR}" &>> "${LOGDIR}/fdbclient.log"
iteration=0
while [[ "${iteration}" -lt 10 ]] && ! verifyAvailable
do
log "Database not created (iteration ${iteration})."
let iteration="${iteration} + 1"
done
if ! displayMessage "Checking if config succeeded"
then
log 'Failed to display user message.'
fi
if ! verifyAvailable
then
log "Failed to create database via cli"
getStatus
cat "${LOGDIR}/fdbclient.log"
log "Ignoring -- moving on"
#let status="${status} + 1"
fi
fi
iteration=0
while [[ "${iteration}" -lt "${SERVERCHECKS}" ]] && ! verifyAvailable
do
log "Database not created (iteration ${iteration})."
let iteration="${iteration} + 1"
done
return ${status}
if ! verifyAvailable
then
log "Failed to create database via cli"
getStatus
cat "${LOGDIR}/fdbclient.log"
log "Ignoring -- moving on"
#let status="${status} + 1"
fi
fi
return ${status}
}
# Begin the local cluster from scratch.
function startCluster {
if [ "${status}" -ne 0 ]; then
:
elif ! createDirectories
then
log "Could not create directories."
let status="${status} + 1"
elif ! createClusterFile
then
log "Could not create cluster file."
let status="${status} + 1"
elif ! startFdbServer
then
log "Could not start FDB server."
let status="${status} + 1"
elif ! createDatabase
then
log "Could not create database."
let status="${status} + 1"
fi
function startCluster
{
local status=0
return ${status}
if [ "${status}" -ne 0 ]; then
:
elif ! createDirectories
then
log "Could not create directories."
let status="${status} + 1"
elif ! createClusterFile
then
log "Could not create cluster file."
let status="${status} + 1"
elif ! startFdbServer
then
log "Could not start FDB server."
let status="${status} + 1"
elif ! createDatabase
then
log "Could not create database."
let status="${status} + 1"
fi
return ${status}
}

View File

@ -24,22 +24,22 @@ def parse_args():
# (e)nd of a span with a better given name
locationToPhase = {
"NativeAPI.commit.Before": [],
"MasterProxyServer.batcher": [("b", "Commit")],
"MasterProxyServer.commitBatch.Before": [],
"MasterProxyServer.commitBatch.GettingCommitVersion": [("b", "CommitVersion")],
"MasterProxyServer.commitBatch.GotCommitVersion": [("e", "CommitVersion")],
"CommitProxyServer.batcher": [("b", "Commit")],
"CommitProxyServer.commitBatch.Before": [],
"CommitProxyServer.commitBatch.GettingCommitVersion": [("b", "CommitVersion")],
"CommitProxyServer.commitBatch.GotCommitVersion": [("e", "CommitVersion")],
"Resolver.resolveBatch.Before": [("b", "Resolver.PipelineWait")],
"Resolver.resolveBatch.AfterQueueSizeCheck": [],
"Resolver.resolveBatch.AfterOrderer": [("e", "Resolver.PipelineWait"), ("b", "Resolver.Conflicts")],
"Resolver.resolveBatch.After": [("e", "Resolver.Conflicts")],
"MasterProxyServer.commitBatch.AfterResolution": [("b", "Proxy.Processing")],
"MasterProxyServer.commitBatch.ProcessingMutations": [],
"MasterProxyServer.commitBatch.AfterStoreCommits": [("e", "Proxy.Processing")],
"CommitProxyServer.commitBatch.AfterResolution": [("b", "Proxy.Processing")],
"CommitProxyServer.commitBatch.ProcessingMutations": [],
"CommitProxyServer.commitBatch.AfterStoreCommits": [("e", "Proxy.Processing")],
"TLog.tLogCommit.BeforeWaitForVersion": [("b", "TLog.PipelineWait")],
"TLog.tLogCommit.Before": [("e", "TLog.PipelineWait")],
"TLog.tLogCommit.AfterTLogCommit": [("b", "TLog.FSync")],
"TLog.tLogCommit.After": [("e", "TLog.FSync")],
"MasterProxyServer.commitBatch.AfterLogPush": [("e", "Commit")],
"CommitProxyServer.commitBatch.AfterLogPush": [("e", "Commit")],
"NativeAPI.commit.After": [],
}

View File

@ -16,7 +16,7 @@ As an essential component of a database system, backup and restore is commonly u
## Background
FDB backup system continuously scan the databases key-value space, save key-value pairs and mutations at versions into range files and log files in blob storage. Specifically, mutation logs are generated at Proxy, and are written to transaction logs along with regular mutations. In production clusters like CK clusters, backup system is always on, which means each mutation is written twice to transaction logs, consuming about half of write bandwidth and about 40% of Proxy CPU time.
FDB backup system continuously scan the databases key-value space, save key-value pairs and mutations at versions into range files and log files in blob storage. Specifically, mutation logs are generated at CommitProxy, and are written to transaction logs along with regular mutations. In production clusters like CK clusters, backup system is always on, which means each mutation is written twice to transaction logs, consuming about half of write bandwidth and about 40% of CommitProxy CPU time.
The design of old backup system is [here](https://github.com/apple/foundationdb/blob/master/design/backup.md), and the data format of range files and mutations files is [here](https://github.com/apple/foundationdb/blob/master/design/backup-dataFormat.md). The technical overview of FDB is [here](https://github.com/apple/foundationdb/wiki/Technical-Overview-of-the-Database). The FDB recovery is described in this [doc](https://github.com/apple/foundationdb/blob/master/design/recovery-internals.md).
@ -37,7 +37,7 @@ The design of old backup system is [here](https://github.com/apple/foundationdb/
Feature priorities: Feature 1, 2, 3, 4, 5 are must-have; Feature 6 is better to have.
1. **Write bandwidth reduction by half**: removes the requirement to generate backup mutations at the Proxy, thus reduce TLog write bandwidth usage by half and significantly improve Proxy CPU usage;
1. **Write bandwidth reduction by half**: removes the requirement to generate backup mutations at the CommitProxy, thus reduce TLog write bandwidth usage by half and significantly improve CommitProxy CPU usage;
2. **Correctness**: The restored database must be consistent: each *restored* state (i.e., key-value pair) at a version `v` must match the original state at version `v`.
3. **Performance**: The backup system should be performant, mostly measured as a small CPU overhead on transaction logs and backup workers. The version lag on backup workers is an indicator of performance.
4. **Fault-tolerant**: The backup system should be fault-tolerant to node failures in the FDB cluster.
@ -153,9 +153,9 @@ The requirement of the new backup system raises several design challenges:
**Master**: The master is responsible for coordinating the transition of the FDB transaction sub-system from one generation to the next. In particular, the master recruits backup workers during the recovery.
**Transaction Logs (TLogs)**: The transaction logs make mutations durable to disk for fast commit latencies. The logs receive commits from the proxy in version order, and only respond to the proxy once the data has been written and fsync'ed to an append only mutation log on disk. Storage servers retrieve mutations from TLogs. Once the storage servers have persisted mutations, storage servers then pop the mutations from the TLogs.
**Transaction Logs (TLogs)**: The transaction logs make mutations durable to disk for fast commit latencies. The logs receive commits from the commit proxy in version order, and only respond to the commit proxy once the data has been written and fsync'ed to an append only mutation log on disk. Storage servers retrieve mutations from TLogs. Once the storage servers have persisted mutations, storage servers then pop the mutations from the TLogs.
**Proxy**: The proxies are responsible for committing transactions, and tracking the storage servers responsible for each range of keys. In the old backup system, Proxies are responsible to group mutations into backup mutations and write them to the database.
**CommitProxy**: The commit proxies are responsible for committing transactions, and tracking the storage servers responsible for each range of keys. In the old backup system, commit proxies are responsible to group mutations into backup mutations and write them to the database.
**GrvProxy**: The GRV proxies are responsible for providing read versions.
## System overview
@ -229,7 +229,7 @@ The operators backup request can indicate if an old backup or a new backup is
2. All backup workers monitor the key `\xff\x02/backupStarted`, see the change, and start logging mutations.
3. After all backup workers have started, the `fdbbackup` tool initiates the backup of all or specified key ranges by issuing a transaction `Ts`.
Compared to the old backup system, the above step 1 and 2 are new and is only triggered if client requests for a new type of backup. The purpose is to allow backup workers to function as no-op if there are no ongoing backups. However, the backup workers should still continuously pop their corresponding tags, otherwise mutations will be kept in the TLog. In order to know the version to pop, backup workers can obtain the read version from any proxy. Because the read version must be a committed version, so popping to this version is safe.
Compared to the old backup system, the above step 1 and 2 are new and is only triggered if client requests for a new type of backup. The purpose is to allow backup workers to function as no-op if there are no ongoing backups. However, the backup workers should still continuously pop their corresponding tags, otherwise mutations will be kept in the TLog. In order to know the version to pop, backup workers can obtain the read version from any GRV proxy. Because the read version must be a committed version, so popping to this version is safe.
**Backup Submission Protocol**
Protocol for `submitBackup()` to ensure that all backup workers of the current epoch have started logging mutations:

View File

@ -22,7 +22,7 @@ Data distribution manages the lifetime of storage servers, decides which storage
**Data distribution queue (`struct DDQueueData`)**: It receives shards to be relocated (i.e., RelocateShards), decides which shard should be moved to which server team, prioritizes the data movement based on relocate shards priority, and controls the progress of data movement based on servers workload.
**Special keys in the system keyspace**: DD saves its state in the system keyspace to recover from failure and to ensure every process (e.g., proxies, tLogs and storage servers) has a consistent view of which storage server is responsible for which key range.
**Special keys in the system keyspace**: DD saves its state in the system keyspace to recover from failure and to ensure every process (e.g., commit proxies, tLogs and storage servers) has a consistent view of which storage server is responsible for which key range.
*serverKeys* sub-space (`\xff/serverKeys/`): It records the start key of each shard a server is responsible for. The format is *\xff/serverKeys/[serverID]/[start_key]*. To get start keys of all shards for a server, DD can read the key range with prefix *\xff/serverKeys/[serverID]/* and decode the value of [start_key].
@ -32,9 +32,9 @@ Data distribution manages the lifetime of storage servers, decides which storage
When a new DD is initialized, it will set itself as the owner by setting its random UID to the `moveKeysLockOwnerKey`. Since the owner key has only one value, at most one DD can own the DD-related system subspace. This avoids the potential race condition between multiple DDs which may co-exit during DD recruitment.
**Transaction State Store (txnStateStore)**: It is a replica of the special keyspace that stores the clusters states, such as which SS is responsible for which shard. Because proxies use txnStateStore to decide which tLog and SS should receive a mutation, proxies must have a consistent view of txnStateStore. Therefore, changes to txnStateStore must be populated to all proxies in total order. To achieve that, we use the special transaction (`applyMetaMutations`) to update txnStateStore and use resolvers to ensure the total ordering (serializable snapshot isolation).
**Transaction State Store (txnStateStore)**: It is a replica of the special keyspace that stores the clusters states, such as which SS is responsible for which shard. Because commit proxies use txnStateStore to decide which tLog and SS should receive a mutation, commit proxies must have a consistent view of txnStateStore. Therefore, changes to txnStateStore must be populated to all commit proxies in total order. To achieve that, we use the special transaction (`applyMetaMutations`) to update txnStateStore and use resolvers to ensure the total ordering (serializable snapshot isolation).
**Private mutation**: A private mutation is a mutation updating a special system key, such as keyServersKey (`\xff/keyServers/`) and serverKeysKey (`\xff/serverKeys/`). Like a normal mutation, a private mutation will be processed by the transaction systems (i.e., proxy, resolver and tLog) and be routed to a set of storage servers, based on the mutations tag, to update the key-value in the storage engine. Private mutations also keep the serializable snapshot isolation and consensus: The results of committed concurrent private mutations can be reproduced by sequentially executing the mutations, and all components in FDB have the same view of the mutations.
**Private mutation**: A private mutation is a mutation updating a special system key, such as keyServersKey (`\xff/keyServers/`) and serverKeysKey (`\xff/serverKeys/`). Like a normal mutation, a private mutation will be processed by the transaction systems (i.e., commit proxy, resolver and tLog) and be routed to a set of storage servers, based on the mutations tag, to update the key-value in the storage engine. Private mutations also keep the serializable snapshot isolation and consensus: The results of committed concurrent private mutations can be reproduced by sequentially executing the mutations, and all components in FDB have the same view of the mutations.
## Operations
@ -51,7 +51,7 @@ Whenever the team builder is invoked, it aims to build the desired number of ser
**Data distribution queue server (`dataDistributionQueue` actor)**: It is created when DD is initialized. It behaves as a server to handle RelocateShard related requests. For example, it waits on the stream of RelocateShard. When a new RelocateShard is sent by teamTracker, it enqueues the new shard, and cancels the inflight shards that overlap with the new relocate shard.
**`applyMetaMutations`**: This is special logic to handle *private transactions* that modify txnStateStore and special system keys. Transaction systems (i.e., proxy, resolver and tLogs) and storage servers perform extra operations for the special transactions. For any update, it will be executed on all proxies in order so that all proxies have a consistent view of the txnStateStore. It will also send special keys to storage servers so that storage servers know the new keyspace they are now responsible for.
**`applyMetaMutations`**: This is special logic to handle *private transactions* that modify txnStateStore and special system keys. Transaction systems (i.e., commit proxy, resolver and tLogs) and storage servers perform extra operations for the special transactions. For any update, it will be executed on all commit proxies in order so that all commit proxies have a consistent view of the txnStateStore. It will also send special keys to storage servers so that storage servers know the new keyspace they are now responsible for.
A storage server (SS) processes all requests sent to the server in its `storageServerCore` actor. When a (private) mutation request is sent to a SS, the server will call the `update()` function. Eventually, the `StorageUpdater` class will be invoked to apply the mutation in `applyMutation()` function, which handles private mutations `applyPrivateData()` function.
@ -84,9 +84,9 @@ Actors are created to monitor the reasons of key movement:
A key range is a shard. A shard is the minimum unit of moving data. The storage servers ownership of a shard -- which SS owns which shard -- is stored in the system keyspace *serverKeys* (`\xff/serverKeys/`) and *keyServers* (`\xff/keyServers/`). To simplify the explanation, we refer to the storage servers ownership of a shard as a shards ownership.
A shards ownership is used in transaction systems (proxy and tLogs) to route mutations to tLogs and storage servers. When a proxy receives a mutation,dd it uses the shards ownership to decide which *k* tLogs receive the mutation, assuming *k* is the replias factor. When a storage server pulls mutations from tLogs, it uses the shards ownership to decide which shards the SS is responsible for and which tLog the SS should pull the data from.
A shards ownership is used in transaction systems (commit proxy and tLogs) to route mutations to tLogs and storage servers. When a commit proxy receives a mutation, it uses the shards ownership to decide which *k* tLogs receive the mutation, assuming *k* is the replias factor. When a storage server pulls mutations from tLogs, it uses the shards ownership to decide which shards the SS is responsible for and which tLog the SS should pull the data from.
A shards ownership must be consistent across transaction systems and SSes, so that mutations can be correctly routed to SSes. Moving keys from a SS to another requires changing the shards ownership under ACID property. The ACID property is achieved by using FDB transactions to change the *serverKeys *(`\xff/serverKeys/`) and *keyServers* (`\xff/keyServers/`). The mutation on the *serverKeys *and* keyServers *will be categorized as private mutations in transaction system. Compared to normal mutation, the private mutations will change the transaction state store (txnStateStore) that maintains the *serverKeys* and *keyServers* for transaction systems (proxy and tLog) when it arrives on each transaction component (e.g., tLog). Because mutations are processed in total order with the ACID guarantees, the change to the txnStateStore will be executed in total order on each node and the change on the shards ownership will also be consistent.
A shards ownership must be consistent across transaction systems and SSes, so that mutations can be correctly routed to SSes. Moving keys from a SS to another requires changing the shards ownership under ACID property. The ACID property is achieved by using FDB transactions to change the *serverKeys *(`\xff/serverKeys/`) and *keyServers* (`\xff/keyServers/`). The mutation on the *serverKeys *and* keyServers *will be categorized as private mutations in transaction system. Compared to normal mutation, the private mutations will change the transaction state store (txnStateStore) that maintains the *serverKeys* and *keyServers* for transaction systems (commit proxy and tLog) when it arrives on each transaction component (e.g., tLog). Because mutations are processed in total order with the ACID guarantees, the change to the txnStateStore will be executed in total order on each node and the change on the shards ownership will also be consistent.
The data movement from one server (called source server) to another (called destination server) has four steps:
(1) DD adds the destination server as the shards new owner;

View File

@ -8,12 +8,12 @@ This document explains at the high level how the recovery works in a single clus
## `ServerDBInfo` data structure
This data structure contains transient information which is broadcast to all workers for a database, permitting them to communicate with each other. It contains, for example, the interfaces for cluster controller (CC), master, ratekeeper, and resolver, and holds the log system's configuration. Only part of the data structure, such as `ClientDBInfo` that contains the list of proxies, is available to the client.
This data structure contains transient information which is broadcast to all workers for a database, permitting them to communicate with each other. It contains, for example, the interfaces for cluster controller (CC), master, ratekeeper, and resolver, and holds the log system's configuration. Only part of the data structure, such as `ClientDBInfo` that contains the list of GRV proxies and commit proxies, is available to the client.
Whenever a field of the `ServerDBInfo`is changed, the new value of the field, say new master's interface, will be sent to the CC and CC will propagate the new `ServerDBInfo` to all workers in the cluster.
## When will recovery happen?
Failure of certain roles in FDB can cause recovery. Those roles are cluster controller, master, proxy, transaction logs (tLog), resolvers, and log router.
Failure of certain roles in FDB can cause recovery. Those roles are cluster controller, master, GRV proxy, commit proxy, transaction logs (tLog), resolvers, log router, and backup workers.
Network partition or failures can make CC unable to reach some roles, treating those roles as dead and causing recovery. If CC cannot connect to a majority of coordinators, it will be treated as dead by coordinators and recovery will happen.
@ -97,7 +97,7 @@ Master interface is stored in `serverDBInfo`. Once the CC recruits the master, i
Once the master locks the cstate, it will recruit the still-alive tLogs from the previous generation for the benefit of faster recovery. The master gets the old tLogs interfaces from the READING_CSTATE phase and uses those interfaces to track which old tLog are still alive, the implementation of which is in `trackRejoins()`.
Once the master gets enough tLogs, it calculates the known committed version (i.e., `knownCommittedVersion` in code). `knownCommittedVersion` is the highest version that a proxy tells a given tLog that it had durably committed on *all* tLogs. The master's is the maximum of all of that. `knownCommittedVersion` is important, because it defines the lower bound of what version range of mutations need to be copied to the new generation. That is, any versions larger than the master's `knownCommittedVersion` is not guaranteed to persist on all replicas. The master chooses a *recovery version*, which is the minimum of durable versions on all tLogs of the old generation, and recruits a new set of tLogs that copy all data between `knownCommittedVersion + 1` and `recoveryVersion` from old tLogs. This copy makes sure data within the range has enough replicas to satisfy the replication policy.
Once the master gets enough tLogs, it calculates the known committed version (i.e., `knownCommittedVersion` in code). `knownCommittedVersion` is the highest version that a commit proxy tells a given tLog that it had durably committed on *all* tLogs. The master's is the maximum of all of that. `knownCommittedVersion` is important, because it defines the lower bound of what version range of mutations need to be copied to the new generation. That is, any versions larger than the master's `knownCommittedVersion` is not guaranteed to persist on all replicas. The master chooses a *recovery version*, which is the minimum of durable versions on all tLogs of the old generation, and recruits a new set of tLogs that copy all data between `knownCommittedVersion + 1` and `recoveryVersion` from old tLogs. This copy makes sure data within the range has enough replicas to satisfy the replication policy.
Later, the master will use the recruited tLogs to create a new `TagPartitionedLogSystem` for the new generation.
@ -121,9 +121,9 @@ Consider an old generation with three TLogs: `A, B, C`. Their durable versions a
Once we have a `knownCommittedVersion`, the master will reconstruct the transaction state store (txnStateStore) by peeking the txnStateTag in oldLogSystem.
Recall that the txnStateStore includes the transaction systems configuration, such as the assignment of shards to SS and to tLogs and that the txnStateStore was durable on disk in the oldLogSystem.
Once we get the txnStateStore, we know the configuration of the transaction system, such as the number of proxies. The master then can ask the CC to recruit roles for the new generation in the `recruitEverything()` function. Those recruited roles includes proxies, tLogs and seed SSes, which are the storage servers created for an empty database in the first generation to host the first shard and serve as the starting point of the bootstrap process to recruit more SSes. Once all roles are recruited, the master starts a new epoch in `newEpoch()`.
Once we get the txnStateStore, we know the configuration of the transaction system, such as the number of GRV proxies and commit proxies. The master then can ask the CC to recruit roles for the new generation in the `recruitEverything()` function. Those recruited roles includes GRV proxies, commit proxies, tLogs and seed SSes, which are the storage servers created for an empty database in the first generation to host the first shard and serve as the starting point of the bootstrap process to recruit more SSes. Once all roles are recruited, the master starts a new epoch in `newEpoch()`.
At this point, we have recovered the txnStateStore, recruited new proxies and tLogs, and copied data from old tLogs to new tLogs. We have a working transaction system in the new generation now.
At this point, we have recovered the txnStateStore, recruited new GRV proxies, commit proxies and tLogs, and copied data from old tLogs to new tLogs. We have a working transaction system in the new generation now.
### Where can the recovery get stuck in this phase?
@ -151,7 +151,7 @@ Not every FDB role participates in the recovery phases 1-3. This phase tells the
Storage servers (SSes) are not involved in the recovery phase 1 - 3. To notify SSes about the recovery, the master commits a recovery transaction, the first transaction in the new generation, which contains the txnStateStore information. Once storage servers receive the recovery transaction, it will compare its latest data version and the recovery version, and rollback to the recovery version if its data version is newer. Note that storage servers may have newer data than the recovery version because they pre-fetch mutations from tLogs before the mutations are durable to reduce the latency to read newly written data.
Proxies havent recovered the transaction system state and cannot accept transactions yet. The master recovers proxies states by sending the txnStateStore to proxies through proxies (`txnState`) interfaces in `sendIntialCommitToResolvers()` function. Once proxies have recovered their states, they can start processing transactions. The recovery transaction that was waiting on proxies will be processed.
Commit proxies havent recovered the transaction system state and cannot accept transactions yet. The master recovers proxies states by sending the txnStateStore to commit proxies through commit proxies (`txnState`) interfaces in `sendIntialCommitToResolvers()` function. Once commit proxies have recovered their states, they can start processing transactions. The recovery transaction that was waiting on commit proxies will be processed.
The resolvers havent known the recovery version either. The master needs to send the lastEpochEnd version (i.e., last commit of the previous generation) to resolvers via resolvers (`resolve`) interface.
@ -162,7 +162,7 @@ At the end of this phase, every role should be aware of the recovery and start r
## Phase 5: WRITING_CSTATE
Coordinators store the transaction systems information. The master needs to write the new tLogs into coordinators states to achieve consensus and fault tolerance. Only when the coordinators states are updated with the new transaction systems configuration will the cluster controller tell clients about the new transaction system (such as the new proxies).
Coordinators store the transaction systems information. The master needs to write the new tLogs into coordinators states to achieve consensus and fault tolerance. Only when the coordinators states are updated with the new transaction systems configuration will the cluster controller tell clients about the new transaction system (such as the new GRV proxies and commit proxies).
The master only needs to write the new tLogs to a quorum of coordinators for a running cluster. The only time the master has to write all coordinators is when creating a brand new database.

View File

@ -7,17 +7,17 @@
(This assumes a basic familiarity with [FoundationDB's architecture](https://www.youtu.be/EMwhsGsxfPU).)
Transaction logs are a distributed Write-Ahead-Log for FoundationDB. They
receive commits from proxies, and are responsible for durably storing those
commits, and making them available to storage servers for reading.
receive commits from commit proxies, and are responsible for durably storing
those commits, and making them available to storage servers for reading.
Clients send *mutations*, the list of their set, clears, atomic operations,
etc., to proxies. Proxies collect mutations into a *batch*, which is the list
of all changes that need to be applied to the database to bring it from version
`N-1` to `N`. Proxies then walk through their in-memory mapping of shard
boundaries to associate one or more *tags*, a small integer uniquely
identifying a destination storage server, with each mutation. They then send a
*commit*, the full list of `(tags, mutation)` for each mutation in a batch, to
the transaction logs.
etc., to commit proxies. Commit proxies collect mutations into a *batch*, which
is the list of all changes that need to be applied to the database to bring it
from version `N-1` to `N`. Commit proxies then walk through their in-memory
mapping of shard boundaries to associate one or more *tags*, a small integer
uniquely identifying a destination storage server, with each mutation. They
then send a *commit*, the full list of `(tags, mutation)` for each mutation in
a batch, to the transaction logs.
The transaction log has two responsibilities: it must persist the commits to
disk and notify the proxy when a commit is durably stored, and it must make the

View File

@ -259,7 +259,8 @@ Use the ``status`` command of ``fdbcli`` to determine if the cluster is up and r
Redundancy mode - triple
Storage engine - ssd-2
Coordinators - 5
Desired Proxies - 5
Desired GRV Proxies - 1
Desired Commit Proxies - 4
Desired Logs - 8
Cluster:
@ -299,7 +300,8 @@ The summary fields are interpreted as follows:
Redundancy mode The currently configured redundancy mode (see the section :ref:`configuration-choosing-redundancy-mode`)
Storage engine The currently configured storage engine (see the section :ref:`configuration-configuring-storage-subsystem`)
Coordinators The number of FoundationDB coordination servers
Desired Proxies Number of proxies desired. If replication mode is 3 then default number of proxies is 3
Desired GRV Proxies Number of GRV proxies desired. (default 1)
Desired Commit Proxies Number of commit proxies desired. If replication mode is 3 then default number of commit proxies is 3
Desired Logs Number of logs desired. If replication mode is 3 then default number of logs is 3
FoundationDB processes Number of FoundationDB processes participating in the cluster
Machines Number of physical machines running at least one FoundationDB process that is participating in the cluster
@ -565,7 +567,7 @@ When configured, the ``status json`` output will include additional fields to re
filtered: 1
}
The ``grv_latency_bands`` and ``commit_latency_bands`` objects will only be logged for ``proxy`` roles, and ``read_latency_bands`` will only be logged for storage roles. Each threshold is represented as a key in the map, and its associated value will be the total number of requests in the lifetime of the process with a latency smaller than the threshold but larger than the next smaller threshold.
The ``grv_latency_bands`` objects will only be logged for ``grv_proxy`` roles, ``commit_latency_bands`` objects will only be logged for ``commit_proxy`` roles, and ``read_latency_bands`` will only be logged for storage roles. Each threshold is represented as a key in the map, and its associated value will be the total number of requests in the lifetime of the process with a latency smaller than the threshold but larger than the next smaller threshold.
For example, ``0.1: 1`` in ``read_latency_bands`` indicates that there has been 1 read request with a latency in the range ``[0.01, 0.1)``. For the smallest specified threshold, the lower bound is 0 (e.g. ``[0, 0.01)`` in the example above). Requests that took longer than any defined latency band will be reported in the ``inf`` (infinity) band. Requests that were filtered by the configuration (e.g. using ``max_read_bytes``) are reported in the ``filtered`` category.

View File

@ -263,9 +263,9 @@ See :ref:`developer-guide-programming-with-futures` for further (language-indepe
.. function:: fdb_error_t fdb_future_block_until_ready(FDBFuture* future)
Blocks the calling thread until the given Future is ready. It will return success even if the Future is set to an error -- you must call :func:`fdb_future_get_error()` to determine that. :func:`fdb_future_block_until_ready()` will return an error only in exceptional conditions (e.g. out of memory or other operating system resources).
Blocks the calling thread until the given Future is ready. It will return success even if the Future is set to an error -- you must call :func:`fdb_future_get_error()` to determine that. :func:`fdb_future_block_until_ready()` will return an error only in exceptional conditions (e.g. deadlock detected, out of memory or other operating system resources).
.. warning:: Never call this function from a callback passed to :func:`fdb_future_set_callback()`. This may block the thread on which :func:`fdb_run_network()` was invoked, resulting in a deadlock.
.. warning:: Never call this function from a callback passed to :func:`fdb_future_set_callback()`. This may block the thread on which :func:`fdb_run_network()` was invoked, resulting in a deadlock. In some cases the client can detect the deadlock and throw a ``blocked_from_network_thread`` error.
.. function:: fdb_bool_t fdb_future_is_ready(FDBFuture* future)

View File

@ -40,7 +40,7 @@ FoundationDB may return the following error codes from API functions. If you nee
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| external_client_already_loaded | 1040| External client has already been loaded |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| proxy_memory_limit_exceeded | 1042| Proxy commit memory limit exceeded |
| proxy_memory_limit_exceeded | 1042| CommitProxy commit memory limit exceeded |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| batch_transaction_throttled | 1051| Batch GRV request rate limit exceeded |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+
@ -114,8 +114,12 @@ FoundationDB may return the following error codes from API functions. If you nee
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| transaction_read_only | 2023| Attempted to commit a transaction specified as read-only |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| invalid_cache_eviction_policy | 2024| Invalid cache eviction policy, only random and lru are supported |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| network_cannot_be_restarted | 2025| Network can only be started once |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| blocked_from_network_thread | 2026| Detected a deadlock in a callback called from the network thread |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| incompatible_protocol_version | 2100| Incompatible protocol version |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| transaction_too_large | 2101| Transaction exceeds byte limit |

View File

@ -6,7 +6,7 @@ FoundationDB makes your architecture flexible and easy to operate. Your applicat
The following diagram details the logical architecture.
.. image:: /images/Architecture.png
|image0|
Detailed FoundationDB Architecture
@ -26,7 +26,7 @@ and servers use the coordinators to connect with the cluster controller.
The servers will attempt to become the cluster controller if one does
not exist, and register with the cluster controller once one has been
elected. Clients use the cluster controller to keep an up-to-date list
of proxies.
of GRV proxies and commit proxies.
Cluster Controller
~~~~~~~~~~~~~~~~~~
@ -42,10 +42,11 @@ Master
The master is responsible for coordinating the transition of the write
sub-system from one generation to the next. The write sub-system
includes the master, proxies, resolvers, and transaction logs. The three
roles are treated as a unit, and if any of them fail, we will recruit a
replacement for all three roles. The master provides the commit versions
for batches of the mutations to the proxies.
includes the master, GRV proxies, commit proxies, resolvers, and
transaction logs. The three roles are treated as a unit, and if any of
them fail, we will recruit a replacement for all three roles. The master
provides the commit versions for batches of the mutations to the commit
proxies.
Historically, Ratekeeper and Data Distributor are coupled with Master on
the same process. Since 6.2, both have become a singleton in the
@ -53,16 +54,22 @@ cluster. The life time is no longer tied with Master.
|image1|
Proxies
~~~~~~~
GRV Proxies
~~~~~~~~~~~
The proxies are responsible for providing read versions, committing
transactions, and tracking the storage servers responsible for each
range of keys. To provide a read version, a proxy will ask all other
proxies to see the largest committed version at this point in time,
while simultaneously checking that the transaction logs have not been
stopped. Ratekeeper will artificially slow down the rate at which the
proxy provides read versions.
The GRV proxies are responsible for providing read versions, communicating
with ratekeeper to control the rate providing read versions. To provide a
read version, a GRV proxy will ask all master to see the largest committed
version at this point in time, while simultaneously checking that the
transaction logs have not been stopped. Ratekeeper will artificially slow
down the rate at which the GRV proxy provides read versions.
Commit Proxies
~~~~~~~~~~~~~~
The proxies are responsible for committing transactions, report committed
versions to master and tracking the storage servers responsible for each
range of keys.
Commits are accomplished by:
@ -73,20 +80,20 @@ Commits are accomplished by:
The key space starting with the ``\xff`` byte is reserved for system
metadata. All mutations committed into this key space are distributed to
all of the proxies through the resolvers. This metadata includes a
all of the commit proxies through the resolvers. This metadata includes a
mapping between key ranges and the storage servers which have the data
for that range of keys. The proxies provides this information to clients
on-demand. The clients cache this mapping; if they ask a storage server
for a key it does not have, they will clear their cache and get a more
up-to-date list of servers from the proxies.
for that range of keys. The commit proxies provides this information to
clients on-demand. The clients cache this mapping; if they ask a storage
server for a key it does not have, they will clear their cache and get a
more up-to-date list of servers from the commit proxies.
Transaction Logs
~~~~~~~~~~~~~~~~
The transaction logs make mutations durable to disk for fast commit
latencies. The logs receive commits from the proxy in version order, and
only respond to the proxy once the data has been written and fsynced to
an append only mutation log on disk. Before the data is even written to
latencies. The logs receive commits from the commit proxy in version order,
and only respond to the commit proxy once the data has been written and fsynced
to an append only mutation log on disk. Before the data is even written to
disk we forward it to the storage servers responsible for that mutation.
Once the storage servers have made the mutation durable, they pop it
from the log. This generally happens roughly 6 seconds after the
@ -153,7 +160,7 @@ Transaction Processing
----------------------
A database transaction in FoundationDB starts by a client contacting one
of the Proxies to obtain a read version, which is guaranteed to be
of the GRV proxies to obtain a read version, which is guaranteed to be
larger than any of commit version that client may know about (even
through side channels outside the FoundationDB cluster). This is needed
so that a client will see the result of previous commits that have
@ -165,64 +172,51 @@ memory without contacting the cluster. By default, reading a key that
was written in the same transaction will return the newly written value.
At commit time, the client sends the transaction data (all reads and
writes) to one of the Proxies and waits for commit or abort response
from the proxy. If the transaction conflicts with another one and cannot
commit, the client may choose to retry the transaction from the
beginning again. If the transaction commits, the proxy also returns the
commit version back to the client. Note this commit version is larger
than the read version and is chosen by the master.
writes) to one of the commit proxies and waits for commit or abort response
from the commit proxy. If the transaction conflicts with another one and
cannot commit, the client may choose to retry the transaction from the
beginning again. If the transaction commits, the commit proxy also returns
the commit version back to the client and to master so that GRV proxies can
get access to the latest committed version. Note this commit version is
larger than the read version and is chosen by the master.
The FoundationDB architecture separates the scaling of client reads and
writes (i.e., transaction commits). Because clients directly issue reads
to sharded storage servers, reads scale linearly to the number of
storage servers. Similarly, writes are scaled by adding more processes
to Proxies, Resolvers, and Log Servers in the transaction system.
to Commit Proxies, Resolvers, and Log Servers in the transaction system.
Determine Read Version
~~~~~~~~~~~~~~~~~~~~~~
When a client requests a read version from a proxy, the proxy asks all
other proxies for their last commit versions, and checks a set of
transaction logs satisfying replication policy are live. Then the proxy
returns the maximum commit version as the read version to the client.
When a client requests a read version from a GRV proxy, the GRV proxy asks
master for the latest committed version, and checks a set of transaction
logs satisfying replication policy are live. Then the GRV proxy returns
the maximum committed version as the read version to the client.
|image2|
The reason for the proxy to contact all other proxies for commit
versions is to ensure the read version is larger than any previously
committed version. Consider that if proxy ``A`` commits a transaction,
and then the client asks proxy ``B`` for a read version. The read
version from proxy ``B`` must be larger than the version committed by
proxy ``A``. The only way to get this information is by asking proxy
``A`` for its largest committed version.
The reason for the GRV proxy to contact master for the latest committed
versions is to because master is a central place to keep the largest of
all commit proxies' committed version.
The reason for checking a set of transaction logs satisfying replication
policy are live is to ensure the proxy is not replaced with newer
generation of proxies. This is because proxy is a stateless role
recruited in each generation. If a recovery has happened and the old
proxy is still live, this old proxy could still give out read versions.
policy are live is to ensure the GRV proxy is not replaced with newer
generation of GRV proxies. This is because GRV proxy is a stateless role
recruited in each generation. If a recovery has happened and the old GRV
proxy is still live, this old GRV proxy could still give out read versions.
As a result, a *read-only* transaction may see stale results (a
read-write transaction will be aborted). By checking a set of
transaction logs satisfying replication policy are live, the proxy makes
transaction logs satisfying replication policy are live, the GRV proxy makes
sure no recovery has happened, thus the *read-only* transaction sees the
latest data.
Note that the client cannot simply ask the master for read versions. The
master gives out versions to proxies to be committed, but the master
does not know when the versions it gives out are durable on the
transaction logs. Therefore it is not safe to do reads at the largest
version the master has provided because that version might be rolled
back in the event of a failure, so the client could end up reading data
that was never committed. In order for the client to use versions from
the master, the client needs to wait until all in-flight
transaction-batches (a write version is used for a batch of
transactions) to commit. This can take a long time and thus is
inefficient. Another drawback of this approach is putting more work
towards the master, because the master role cant be scaled. Even though
giving out read-versions isnt very expensive, it still requires the
master to get a transaction budget from the Ratekeeper, batches
requests, and potentially maintains thousands of network connections
from clients.
Note that the client cannot simply ask the master for read versions because
this approach is putting more work towards the master, because the master
role cant be scaled. Even though giving out read-versions isnt very
expensive, it still requires the master to get a transaction budget from the
Ratekeeper, batches requests, and potentially maintains thousands of network
connections from clients.
|image3|
@ -231,27 +225,27 @@ Transaction Commit
A client transaction commits in the following steps:
1. A client sends a transaction to a proxy.
2. The proxy asks the master for a commit version.
1. A client sends a transaction to a commit proxy.
2. The commit proxy asks the master for a commit version.
3. The master sends back a commit version that is higher than any commit
version seen before.
4. The proxy sends the read and write conflict ranges to the resolver(s)
4. The commit proxy sends the read and write conflict ranges to the resolver(s)
with the commit version included.
5. The resolver responds back with whether the transaction has any
conflicts with previous transactions by sorting transactions
according to their commit versions and computing if such a serial
execution order is conflict-free.
- If there are conflicts, the proxy responds back to the client with
- If there are conflicts, the commit proxy responds back to the client with
a not_committed error.
- If there are no conflicts, the proxy sends the mutations and
- If there are no conflicts, the commit proxy sends the mutations and
commit version of this transaction to the transaction logs.
6. Once the mutations are durable on the logs, the proxy responds back
6. Once the mutations are durable on the logs, the commit proxy responds back
success to the user.
Note the proxy sends each resolver their respective key ranges, if any
one of the resolvers detects a conflict then the transaction is not
Note the commit proxy sends each resolver their respective key ranges, if
any one of the resolvers detects a conflict then the transaction is not
committed. This has the flaw that if only one of the resolvers detects a
conflict, the other resolver will still think the transaction has
succeeded and may fail future transactions with overlapping write
@ -273,8 +267,8 @@ Background Work
There are a number of background work happening besides the transaction
processing:
- **Ratekeeper** collects statistic information from proxies,
transaction logs, and storage servers and compute the target
- **Ratekeeper** collects statistic information from GRV proxies, Commit
proxies, transaction logs, and storage servers and compute the target
transaction rate for the cluster.
- **Data distribution** monitors all storage servers and perform load
@ -284,7 +278,7 @@ processing:
- **Storage servers** pull mutations from transaction logs, write them
into storage engine to persist on disks.
- **Proxies** periodically send empty commits to transaction logs to
- **Commit proxies** periodically send empty commits to transaction logs to
keep commit versions increasing, in case there is no client generated
transactions.
@ -299,9 +293,9 @@ latency. A typical recovery takes about a few hundred milliseconds, but
longer recovery time (usually a few seconds) can happen. Whenever there
is a failure in the transaction system, a recovery process is performed
to restore the transaction system to a new configuration, i.e., a clean
state. Specifically, the Master process monitors the health of Proxies,
Resolvers, and Transaction Logs. If any one of the monitored process
failed, the Master process terminates. The Cluster Controller will
state. Specifically, the Master process monitors the health of GRV Proxies,
Commit Proxies, Resolvers, and Transaction Logs. If any one of the monitored
process failed, the Master process terminates. The Cluster Controller will
detect this event, and then recruits a new Master, which coordinates the
recovery and recruits a new transaction system instance. In this way,
the transaction processing is divided into a number of epochs, where
@ -314,20 +308,20 @@ Coordinators and lock the coordinated states to prevent another Master
process from recovering at the same time. Then the Master recovers
previous transaction system states, including all Log Servers
Information, stops these Log Servers from accepting transactions, and
recruits a new set of Proxies, Resolvers, and Transaction Logs. After
previous Log Servers are stopped and new transaction system is
recruited, the Master writes the coordinated states with current
recruits a new set of GRV Proxies, Commit Proxies, Resolvers, and
Transaction Logs. After previous Log Servers are stopped and new transaction
system is recruited, the Master writes the coordinated states with current
transaction system information. Finally, the Master accepts new
transaction commits. See details in this
`documentation <https://github.com/apple/foundationdb/blob/master/design/recovery-internals.md>`__.
Because Proxies and Resolvers are stateless, their recoveries have no
extra work. In contrast, Transaction Logs save the logs of committed
transactions, and we need to ensure all previously committed
transactions are durable and retrievable by storage servers. That is,
for any transactions that the Proxies may have sent back commit
response, their logs are persisted in multiple Log Servers (e.g., three
servers if replication degree is 3).
Because GRV Proxies, Commit Proxies and Resolvers are stateless, their
recoveries have no extra work. In contrast, Transaction Logs save the
logs of committed transactions, and we need to ensure all previously
committed transactions are durable and retrievable by storage servers.
That is, for any transactions that the Commit Proxies may have sent back
commit response, their logs are persisted in multiple Log Servers (e.g.,
three servers if replication degree is 3).
Finally, a recovery will *fast forward* time by 90 seconds, which would
abort any in-progress client transactions with ``transaction_too_old``
@ -335,7 +329,7 @@ error. During retry, these client transactions will find the new
generation of transaction system and commit.
**``commit_result_unknown`` error:** If a recovery happened while a
transaction is committing (i.e., a proxy has sent mutations to
transaction is committing (i.e., a commit proxy has sent mutations to
transaction logs). A client would have received
``commit_result_unknown``, and then retried the transaction. Its
completely permissible for FDB to commit both the first attempt, and the
@ -362,6 +356,7 @@ Documentation <https://github.com/apple/foundationdb/blob/master/design/data-dis
`Recovery
Documentation <https://github.com/apple/foundationdb/blob/master/design/recovery-internals.md>`__
.. |image0| image:: images/Architecture.png
.. |image1| image:: images/architecture-1.jpeg
.. |image2| image:: images/architecture-2.jpeg
.. |image3| image:: images/architecture-3.jpeg

View File

@ -64,7 +64,7 @@ The ``commit`` command commits the current transaction. Any sets or clears execu
configure
---------
The ``configure`` command changes the database configuration. Its syntax is ``configure [new] [single|double|triple|three_data_hall|three_datacenter] [ssd|memory] [proxies=<N>] [resolvers=<N>] [logs=<N>]``.
The ``configure`` command changes the database configuration. Its syntax is ``configure [new] [single|double|triple|three_data_hall|three_datacenter] [ssd|memory] [grv_proxies=<N>] [commit_proxies=<N>] [resolvers=<N>] [logs=<N>]``.
The ``new`` option, if present, initializes a new database with the given configuration rather than changing the configuration of an existing one. When ``new`` is used, both a redundancy mode and a storage engine must be specified.
@ -98,11 +98,12 @@ A FoundationDB cluster employs server processes of different types. It automatic
For large clusters, you can manually set the allocated number of processes of a given type. Valid process types are:
* ``proxies``
* ``grv_proxies``
* ``commit_proxies``
* ``resolvers``
* ``logs``
Set the process using ``configure [proxies|resolvers|logs]=<N>``, where ``<N>`` is an integer greater than 0, or -1 to reset the value to its default.
Set the process using ``configure [grv_proxies|commit_proxies|resolvers|logs]=<N>``, where ``<N>`` is an integer greater than 0, or -1 to reset the value to its default.
For recommendations on appropriate values for process types in large clusters, see :ref:`guidelines-process-class-config`.
@ -357,7 +358,7 @@ setclass
The ``setclass`` command can be used to change the :ref:`process class <guidelines-process-class-config>` for a given process. Its syntax is ``setclass [<ADDRESS> <CLASS>]``. If no arguments are specified, then the process classes of all processes are listed. Setting the class to ``default`` to revert to the process class specified on the command line.
The available process classes are ``unset``, ``storage``, ``transaction``, ``resolution``, ``proxy``, ``master``, ``test``, ``unset``, ``stateless``, ``log``, ``router``, ``cluster_controller``, ``fast_restore``, ``data_distributor``, ``coordinator``, ``ratekeeper``, ``storage_cache``, ``backup``, and ``default``.
The available process classes are ``unset``, ``storage``, ``transaction``, ``resolution``, ``grv_proxy``, ``commit_proxy``, ``master``, ``test``, ``unset``, ``stateless``, ``log``, ``router``, ``cluster_controller``, ``fast_restore``, ``data_distributor``, ``coordinator``, ``ratekeeper``, ``storage_cache``, ``backup``, and ``default``.
sleep
-----

View File

@ -777,16 +777,17 @@ The 6.2 release still has a number of rough edges related to region configuratio
Guidelines for setting process class
====================================
In a FoundationDB cluster, each of the ``fdbserver`` processes perform different tasks. Each process is recruited to do a particular task based on its process ``class``. For example, processes with ``class=storage`` are given preference to be recruited for doing storage server tasks, ``class=transaction`` are for log server processes and ``class=stateless`` are for stateless processes like proxies, resolvers, etc.,
In a FoundationDB cluster, each of the ``fdbserver`` processes perform different tasks. Each process is recruited to do a particular task based on its process ``class``. For example, processes with ``class=storage`` are given preference to be recruited for doing storage server tasks, ``class=transaction`` are for log server processes and ``class=stateless`` are for stateless processes like commit proxies, resolvers, etc.,
The recommended minimum number of ``class=transaction`` (log server) processes is 8 (active) + 2 (standby) and the recommended minimum number for ``class=stateless`` processes is 4 (proxy) + 1 (resolver) + 1 (cluster controller) + 1 (master) + 2 (standby). It is better to spread the transaction and stateless processes across as many machines as possible.
The recommended minimum number of ``class=transaction`` (log server) processes is 8 (active) + 2 (standby) and the recommended minimum number for ``class=stateless`` processes is 1 (GRV proxy) + 3 (commit proxy) + 1 (resolver) + 1 (cluster controller) + 1 (master) + 2 (standby). It is better to spread the transaction and stateless processes across as many machines as possible.
``fdbcli`` is used to set the desired number of processes of a particular process type. To do so, you would issue the ``fdbcli`` commands::
fdb> configure proxies=5
fdb> configure grv_proxies=1
fdb> configure grv_proxies=4
fdb> configure logs=8
.. note:: In the present release, the default value for proxies and log servers is 3 and for resolvers is 1. You should not set the value of a process type to less than its default.
.. note:: In the present release, the default value for commit proxies and log servers is 3 and for GRV proxies and resolvers is 1. You should not set the value of a process type to less than its default.
.. warning:: The conflict-resolution algorithm used by FoundationDB is conservative: it guarantees that no conflicting transactions will be committed, but it may fail to commit some transactions that theoretically could have been. The effects of this conservatism may increase as you increase the number of resolvers. It is therefore important to employ the recommended techniques for :ref:`minimizing conflicts <developer-guide-transaction-conflicts>` when increasing the number of resolvers.

View File

@ -838,7 +838,7 @@ Caveats
#. ``\xff\xff/transaction/read_conflict_range/`` The conflict range for a read is sometimes not known until that read completes (e.g. range reads with limits, key selectors). When you read from these special keys, the returned future first blocks until all pending reads are complete so it can give an accurate response.
#. ``\xff\xff/transaction/write_conflict_range/`` The conflict range range for a ``set_versionstamped_key`` atomic op is not known until commit time. You'll get an approximate range (the actual range will be a subset of the approximate range) until the precise range is known.
#. ``\xff\xff/transaction/conflicting_keys/`` Since using this feature costs server (i.e., proxy and resolver) resources, it's disabled by default. You must opt in by setting the ``report_conflicting_keys`` transaction option.
#. ``\xff\xff/transaction/conflicting_keys/`` Since using this feature costs server (i.e., commit proxy and resolver) resources, it's disabled by default. You must opt in by setting the ``report_conflicting_keys`` transaction option.
Metrics module
--------------
@ -1059,22 +1059,21 @@ How Versions are Generated and Assigned
Versions are generated by the process that runs the *master* role. FoundationDB guarantees that no version will be generated twice and that the versions are monotonically increasing.
In order to assign read and commit versions to transactions, a client will never talk to the master. Instead it will get both from a proxy. Getting a read version is more complex than a commit version. Let's first look at commit versions:
In order to assign read and commit versions to transactions, a client will never talk to the master. Instead it will get them from a GRV proxy and a commit proxy. Getting a read version is more complex than a commit version. Let's first look at commit versions:
#. The client will send a commit message to a proxy.
#. The proxy will put this commit message in a queue in order to build a batch.
#. In parallel, the proxy will ask for a new version from the master (note that this means that only proxies will ever ask for new versions - which scales much better as it puts less stress on the network).
#. The proxy will then resolve all transactions within that batch (discussed later) and assign the version it got from the master to *all* transactions within that batch. It will then write the transactions to the transaction log system to make it durable.
#. The client will send a commit message to a commit proxy.
#. The commit proxy will put this commit message in a queue in order to build a batch.
#. In parallel, the commit proxy will ask for a new version from the master (note that this means that only commit proxies will ever ask for new versions - which scales much better as it puts less stress on the network).
#. The commit proxy will then resolve all transactions within that batch (discussed later) and assign the version it got from the master to *all* transactions within that batch. It will then write the transactions to the transaction log system to make it durable.
#. If the transaction succeeded, it will send back the version as commit version to the client. Otherwise it will send back an error.
As mentioned before, the algorithm to assign read versions is a bit more complex. At the start of a transaction, a client will ask a proxy server for a read version. The proxy will reply with the last committed version as of the time it received the request - this is important to guarantee external consistency. This is how this is achieved:
As mentioned before, the algorithm to assign read versions is a bit more complex. At the start of a transaction, a client will ask a GRV proxy server for a read version. The GRV proxy will reply with the last committed version as of the time it received the request - this is important to guarantee external consistency. This is how this is achieved:
#. The client will send a GRV (get read version) request to a proxy.
#. The proxy will batch GRV requests for a short amount of time (it depends on load and configuartion how big these batches will be).
#. The client will send a GRV (get read version) request to a GRV proxy.
#. The GRV proxy will batch GRV requests for a short amount of time (it depends on load and configuartion how big these batches will be).
#. The proxy will do the following steps in parallel:
* Ask all other proxies for their most recent committed version (the largest version they received from the master for which it successfully wrote the transactions to the transaction log system).
* Send a message to the transaction log system to verify that it is still writable. This is to prevent that we fetch read versions from a proxy that has been declared to be dead.
#. It will then take the largest committed version from all proxies (including its own) and send it back to the clients.
* Ask master for their most recent committed version (the largest version of proxies' committed version for which the transactions are successfully written to the transaction log system).
* Send a message to the transaction log system to verify that it is still writable. This is to prevent that we fetch read versions from a GRV proxy that has been declared to be dead.
Checking whether the log-system is still writeable can be especially expensive if a clusters runs in a multi-region configuration. If a user is fine to sacrifice strict serializability they can use :ref:`option-causal-read-risky <api-python-option-set-causal-read-risky>`.
@ -1148,8 +1147,8 @@ The ``commit_unknown_result`` Error
``commit_unknown_result`` can be thrown during a commit. This error is difficult to handle as you won't know whether your transaction was committed or not. There are mostly two reasons why you might see this error:
#. The client lost the connection to the proxy to which it did send the commit. So it never got a reply and therefore can't know whether the commit was successful or not.
#. There was a FoundationDB failure - for example a proxy failed during the commit. In that case there is no way for the client know whether the transaction succeeded or not.
#. The client lost the connection to the commit proxy to which it did send the commit. So it never got a reply and therefore can't know whether the commit was successful or not.
#. There was a FoundationDB failure - for example a commit proxy failed during the commit. In that case there is no way for the client know whether the transaction succeeded or not.
However, there is one guarantee FoundationDB gives to the caller: at the point of time where you receive this error, the transaction either committed or not and if it didn't commit, it will never commit in the future. Or: it is guaranteed that the transaction is not in-flight anymore. This is an important guarantee as it means that if your transaction is idempotent you can simply retry. For more explanations see developer-guide-unknown-results_.

View File

@ -104,7 +104,7 @@ Field Name Description
``Name for the snapshot file`` recommended name for the disk snapshot cluster-name:ip-addr:port:UID
================================ ======================================================== ========================================================
``snapshot create binary`` will not be invoked on processes which does not have any persistent data (for example, Cluster Controller or Master or MasterProxy). Since these processes are stateless, there is no need for a snapshot. Any specialized configuration knobs used for one of these stateless processes need to be copied and restored externally.
``snapshot create binary`` will not be invoked on processes which does not have any persistent data (for example, Cluster Controller or Master or CommitProxy). Since these processes are stateless, there is no need for a snapshot. Any specialized configuration knobs used for one of these stateless processes need to be copied and restored externally.
Management of disk snapshots
----------------------------

View File

@ -10,38 +10,38 @@ macOS
The macOS installation package is supported on macOS 10.7+. It includes the client and (optionally) the server.
* `FoundationDB-6.3.5.pkg <https://www.foundationdb.org/downloads/6.3.5/macOS/installers/FoundationDB-6.3.5.pkg>`_
* `FoundationDB-6.3.8.pkg <https://www.foundationdb.org/downloads/6.3.8/macOS/installers/FoundationDB-6.3.8.pkg>`_
Ubuntu
------
The Ubuntu packages are supported on 64-bit Ubuntu 12.04+, but beware of the Linux kernel bug in Ubuntu 12.x.
* `foundationdb-clients-6.3.5-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.5/ubuntu/installers/foundationdb-clients_6.3.5-1_amd64.deb>`_
* `foundationdb-server-6.3.5-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.5/ubuntu/installers/foundationdb-server_6.3.5-1_amd64.deb>`_ (depends on the clients package)
* `foundationdb-clients-6.3.8-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.8/ubuntu/installers/foundationdb-clients_6.3.8-1_amd64.deb>`_
* `foundationdb-server-6.3.8-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.8/ubuntu/installers/foundationdb-server_6.3.8-1_amd64.deb>`_ (depends on the clients package)
RHEL/CentOS EL6
---------------
The RHEL/CentOS EL6 packages are supported on 64-bit RHEL/CentOS 6.x.
* `foundationdb-clients-6.3.5-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.5/rhel6/installers/foundationdb-clients-6.3.5-1.el6.x86_64.rpm>`_
* `foundationdb-server-6.3.5-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.5/rhel6/installers/foundationdb-server-6.3.5-1.el6.x86_64.rpm>`_ (depends on the clients package)
* `foundationdb-clients-6.3.8-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.8/rhel6/installers/foundationdb-clients-6.3.8-1.el6.x86_64.rpm>`_
* `foundationdb-server-6.3.8-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.8/rhel6/installers/foundationdb-server-6.3.8-1.el6.x86_64.rpm>`_ (depends on the clients package)
RHEL/CentOS EL7
---------------
The RHEL/CentOS EL7 packages are supported on 64-bit RHEL/CentOS 7.x.
* `foundationdb-clients-6.3.5-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.5/rhel7/installers/foundationdb-clients-6.3.5-1.el7.x86_64.rpm>`_
* `foundationdb-server-6.3.5-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.5/rhel7/installers/foundationdb-server-6.3.5-1.el7.x86_64.rpm>`_ (depends on the clients package)
* `foundationdb-clients-6.3.8-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.8/rhel7/installers/foundationdb-clients-6.3.8-1.el7.x86_64.rpm>`_
* `foundationdb-server-6.3.8-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.8/rhel7/installers/foundationdb-server-6.3.8-1.el7.x86_64.rpm>`_ (depends on the clients package)
Windows
-------
The Windows installer is supported on 64-bit Windows XP and later. It includes the client and (optionally) the server.
* `foundationdb-6.3.5-x64.msi <https://www.foundationdb.org/downloads/6.3.5/windows/installers/foundationdb-6.3.5-x64.msi>`_
* `foundationdb-6.3.8-x64.msi <https://www.foundationdb.org/downloads/6.3.8/windows/installers/foundationdb-6.3.8-x64.msi>`_
API Language Bindings
=====================
@ -58,18 +58,18 @@ On macOS and Windows, the FoundationDB Python API bindings are installed as part
If you need to use the FoundationDB Python API from other Python installations or paths, use the Python package manager ``pip`` (``pip install foundationdb``) or download the Python package:
* `foundationdb-6.3.5.tar.gz <https://www.foundationdb.org/downloads/6.3.5/bindings/python/foundationdb-6.3.5.tar.gz>`_
* `foundationdb-6.3.8.tar.gz <https://www.foundationdb.org/downloads/6.3.8/bindings/python/foundationdb-6.3.8.tar.gz>`_
Ruby 1.9.3/2.0.0+
-----------------
* `fdb-6.3.5.gem <https://www.foundationdb.org/downloads/6.3.5/bindings/ruby/fdb-6.3.5.gem>`_
* `fdb-6.3.8.gem <https://www.foundationdb.org/downloads/6.3.8/bindings/ruby/fdb-6.3.8.gem>`_
Java 8+
-------
* `fdb-java-6.3.5.jar <https://www.foundationdb.org/downloads/6.3.5/bindings/java/fdb-java-6.3.5.jar>`_
* `fdb-java-6.3.5-javadoc.jar <https://www.foundationdb.org/downloads/6.3.5/bindings/java/fdb-java-6.3.5-javadoc.jar>`_
* `fdb-java-6.3.8.jar <https://www.foundationdb.org/downloads/6.3.8/bindings/java/fdb-java-6.3.8.jar>`_
* `fdb-java-6.3.8-javadoc.jar <https://www.foundationdb.org/downloads/6.3.8/bindings/java/fdb-java-6.3.8-javadoc.jar>`_
Go 1.11+
--------

View File

@ -5,7 +5,7 @@ FoundationDB Architecture
Coordinators
============
All clients and servers connect to a FoundationDB cluster with a cluster file, which contains the IP:PORT of the coordinators. Both the clients and servers use the coordinators to connect with the cluster controller. The servers will attempt to become the cluster controller if one does not exist, and register with the cluster controller once one has been elected. Clients use the cluster controller to keep an up-to-date list of proxies.
All clients and servers connect to a FoundationDB cluster with a cluster file, which contains the IP:PORT of the coordinators. Both the clients and servers use the coordinators to connect with the cluster controller. The servers will attempt to become the cluster controller if one does not exist, and register with the cluster controller once one has been elected. Clients use the cluster controller to keep an up-to-date list of GRV proxies and commit proxies.
Cluster Controller
==================
@ -15,12 +15,12 @@ The cluster controller is a singleton elected by a majority of coordinators. It
Master
======
The master is responsible for coordinating the transition of the write sub-system from one generation to the next. The write sub-system includes the master, proxies, resolvers, and transaction logs. The three roles are treated as a unit, and if any of them fail, we will recruit a replacement for all three roles. The master provides the commit versions for batches of the mutations to the proxies, runs data distribution algorithm, and runs ratekeeper.
The master is responsible for coordinating the transition of the write sub-system from one generation to the next. The write sub-system includes the master, GRV proxies, commit proxies, resolvers, and transaction logs. The three roles are treated as a unit, and if any of them fail, we will recruit a replacement for all three roles. The master keeps commit proxies' committed version, provides read version for GRV proxies, provides the commit versions for batches of the mutations to the commit proxies, runs data distribution algorithm, and runs ratekeeper.
Proxies
=======
GRV Proxies and Commit Proxies
==============================
The proxies are responsible for providing read versions, committing transactions, and tracking the storage servers responsible for each range of keys. To provide a read version, a proxy will ask all other proxies to see the largest committed version at this point in time, while simultaneously checking that the transaction logs have not been stopped. Ratekeeper will artificially slow down the rate at which the proxy provides read versions.
The GRV proxies are responsible for providing read versions. The commit proxies are responsible for committing transactions, and tracking the storage servers responsible for each range of keys. To provide a read version, a GRV proxy will ask master the largest committed version at this point in time, while simultaneously checking that the transaction logs have not been stopped. Ratekeeper will artificially slow down the rate at which the GRV proxy provides read versions.
Commits are accomplished by:
@ -33,7 +33,7 @@ The key space starting with the '\xff' byte is reserved for system metadata. All
Transaction Logs
================
The transaction logs make mutations durable to disk for fast commit latencies. The logs receive commits from the proxy in version order, and only respond to the proxy once the data has been written and fsync'ed to an append only mutation log on disk. Before the data is even written to disk we forward it to the storage servers responsible for that mutation. Once the storage servers have made the mutation durable, they pop it from the log. This generally happens roughly 6 seconds after the mutation was originally committed to the log. We only read from the log's disk when the process has been rebooted. If a storage server has failed, mutations bound for that storage server will build up on the logs. Once data distribution makes a different storage server responsible for all of the missing storage server's data we will discard the log data bound for the failed server.
The transaction logs make mutations durable to disk for fast commit latencies. The logs receive commits from the commit proxy in version order, and only respond to the commit proxy once the data has been written and fsync'ed to an append only mutation log on disk. Before the data is even written to disk we forward it to the storage servers responsible for that mutation. Once the storage servers have made the mutation durable, they pop it from the log. This generally happens roughly 6 seconds after the mutation was originally committed to the log. We only read from the log's disk when the process has been rebooted. If a storage server has failed, mutations bound for that storage server will build up on the logs. Once data distribution makes a different storage server responsible for all of the missing storage server's data we will discard the log data bound for the failed server.
Resolvers
=========
@ -48,4 +48,4 @@ The vast majority of processes in a cluster are storage servers. Storage servers
Clients
=======
Clients must get a read version at the start of every transaction. During the transaction all of the reads are done at that read version, and write are kept in memory until transaction is committed. When the transaction is committed, all of the reads and writes are sent to the proxy. If the transaction conflicts with another transaction the client is responsible for retrying the transaction. By default, reading a key that was written in the same transaction will return the newly written value.
Clients must get a read version at the start of every transaction. During the transaction all of the reads are done at that read version, and write are kept in memory until transaction is committed. When the transaction is committed, all of the reads and writes are sent to the commit proxy. If the transaction conflicts with another transaction the client is responsible for retrying the transaction. By default, reading a key that was written in the same transaction will return the newly written value.

View File

@ -27,7 +27,7 @@
"storage",
"transaction",
"resolution",
"proxy",
"commit_proxy",
"grv_proxy",
"master",
"test",
@ -61,7 +61,7 @@
"role":{
"$enum":[
"master",
"proxy",
"commit_proxy",
"grv_proxy",
"log",
"storage",
@ -447,7 +447,7 @@
],
"recovery_state":{
"required_resolvers":1,
"required_proxies":1,
"required_commit_proxies":1,
"required_grv_proxies":1,
"name":{ // "fully_recovered" is the healthy state; other states are normal to transition through but not to persist in
"$enum":[
@ -633,11 +633,11 @@
"address":"10.0.4.1"
}
],
"auto_proxies":3,
"auto_commit_proxies":3,
"auto_resolvers":1,
"auto_logs":3,
"backup_worker_enabled":1,
"proxies":5 // this field will be absent if a value has not been explicitly set
"commit_proxies":5 // this field will be absent if a value has not been explicitly set
},
"data":{
"least_operating_space_bytes_log_server":0,

View File

@ -2,6 +2,10 @@
Release Notes
#############
6.2.26
======
* Attempt to detect when calling :func:`fdb_future_block_until_ready` would cause a deadlock, and throw ``blocked_from_network_thread`` if it would definitely cause a deadlock.
6.2.25
======

View File

@ -2,7 +2,7 @@
Release Notes
#############
6.3.5
6.3.8
=====
Features
@ -108,6 +108,10 @@ Other Changes
* Updated boost to 1.72. `(PR #2684) <https://github.com/apple/foundationdb/pull/2684>`_
* Calling ``fdb_run_network`` multiple times in a single run of a client program now returns an error instead of causing undefined behavior. [6.3.1] `(PR #3229) <https://github.com/apple/foundationdb/pull/3229>`_
* Blob backup URL parameter ``request_timeout`` changed to ``request_timeout_min``, with prior name still supported. `(PR #3533) <https://github.com/apple/foundationdb/pull/3533>`_
* Support query command in backup CLI that allows users to query restorable files by key ranges. [6.3.6] `(PR #3703) <https://github.com/apple/foundationdb/pull/3703>`_
* Report missing old tlogs information when in recovery before storage servers are fully recovered. [6.3.6] `(PR #3706) <https://github.com/apple/foundationdb/pull/3706>`_
* Updated OpenSSL to version 1.1.1h. [6.3.7] `(PR #3809) <https://github.com/apple/foundationdb/pull/3809>`_
* Lowered the amount of time a watch will remain registered on a storage server from 900 seconds to 30 seconds. [6.3.8] `(PR #3833) <https://github.com/apple/foundationdb/pull/3833>`_
Fixes from previous versions
----------------------------
@ -124,6 +128,8 @@ Fixes only impacting 6.3.0+
* Refreshing TLS certificates could cause crashes. [6.3.2] `(PR #3352) <https://github.com/apple/foundationdb/pull/3352>`_
* All storage class processes attempted to connect to the same coordinator. [6.3.2] `(PR #3361) <https://github.com/apple/foundationdb/pull/3361>`_
* Adjusted the proxy load balancing algorithm to be based on the CPU usage of the process instead of the number of requests processed. [6.3.5] `(PR #3653) <https://github.com/apple/foundationdb/pull/3653>`_
* Only return the error code ``batch_transaction_throttled`` for API versions greater than or equal to 630. [6.3.6] `(PR #3799) <https://github.com/apple/foundationdb/pull/3799>`_
* The fault tolerance calculation in status did not take into account region configurations. [6.3.8] `(PR #3836) <https://github.com/apple/foundationdb/pull/3836>`_
Earlier release notes
---------------------

View File

@ -18,6 +18,10 @@
* limitations under the License.
*/
#include "fdbclient/JsonBuilder.h"
#include "flow/Arena.h"
#include "flow/Error.h"
#include "flow/Trace.h"
#define BOOST_DATE_TIME_NO_LIB
#include <boost/interprocess/managed_shared_memory.hpp>
@ -81,7 +85,22 @@ enum enumProgramExe {
};
enum enumBackupType {
BACKUP_UNDEFINED=0, BACKUP_START, BACKUP_MODIFY, BACKUP_STATUS, BACKUP_ABORT, BACKUP_WAIT, BACKUP_DISCONTINUE, BACKUP_PAUSE, BACKUP_RESUME, BACKUP_EXPIRE, BACKUP_DELETE, BACKUP_DESCRIBE, BACKUP_LIST, BACKUP_DUMP, BACKUP_CLEANUP
BACKUP_UNDEFINED = 0,
BACKUP_START,
BACKUP_MODIFY,
BACKUP_STATUS,
BACKUP_ABORT,
BACKUP_WAIT,
BACKUP_DISCONTINUE,
BACKUP_PAUSE,
BACKUP_RESUME,
BACKUP_EXPIRE,
BACKUP_DELETE,
BACKUP_DESCRIBE,
BACKUP_LIST,
BACKUP_QUERY,
BACKUP_DUMP,
BACKUP_CLEANUP
};
enum enumDBType {
@ -96,29 +115,68 @@ enum enumRestoreType {
//
enum {
// Backup constants
OPT_DESTCONTAINER, OPT_SNAPSHOTINTERVAL, OPT_ERRORLIMIT, OPT_NOSTOPWHENDONE,
OPT_EXPIRE_BEFORE_VERSION, OPT_EXPIRE_BEFORE_DATETIME, OPT_EXPIRE_DELETE_BEFORE_DAYS,
OPT_EXPIRE_RESTORABLE_AFTER_VERSION, OPT_EXPIRE_RESTORABLE_AFTER_DATETIME, OPT_EXPIRE_MIN_RESTORABLE_DAYS,
OPT_BASEURL, OPT_BLOB_CREDENTIALS, OPT_DESCRIBE_DEEP, OPT_DESCRIBE_TIMESTAMPS,
OPT_DUMP_BEGIN, OPT_DUMP_END, OPT_JSON, OPT_DELETE_DATA, OPT_MIN_CLEANUP_SECONDS,
OPT_DESTCONTAINER,
OPT_SNAPSHOTINTERVAL,
OPT_ERRORLIMIT,
OPT_NOSTOPWHENDONE,
OPT_EXPIRE_BEFORE_VERSION,
OPT_EXPIRE_BEFORE_DATETIME,
OPT_EXPIRE_DELETE_BEFORE_DAYS,
OPT_EXPIRE_RESTORABLE_AFTER_VERSION,
OPT_EXPIRE_RESTORABLE_AFTER_DATETIME,
OPT_EXPIRE_MIN_RESTORABLE_DAYS,
OPT_BASEURL,
OPT_BLOB_CREDENTIALS,
OPT_DESCRIBE_DEEP,
OPT_DESCRIBE_TIMESTAMPS,
OPT_DUMP_BEGIN,
OPT_DUMP_END,
OPT_JSON,
OPT_DELETE_DATA,
OPT_MIN_CLEANUP_SECONDS,
OPT_USE_PARTITIONED_LOG,
// Backup and Restore constants
OPT_TAGNAME, OPT_BACKUPKEYS, OPT_WAITFORDONE,
OPT_TAGNAME,
OPT_BACKUPKEYS,
OPT_WAITFORDONE,
OPT_BACKUPKEYS_FILTER,
OPT_INCREMENTALONLY,
// Backup Modify
OPT_MOD_ACTIVE_INTERVAL, OPT_MOD_VERIFY_UID,
OPT_MOD_ACTIVE_INTERVAL,
OPT_MOD_VERIFY_UID,
// Restore constants
OPT_RESTORECONTAINER, OPT_RESTORE_VERSION, OPT_RESTORE_TIMESTAMP, OPT_PREFIX_ADD, OPT_PREFIX_REMOVE, OPT_RESTORE_CLUSTERFILE_DEST, OPT_RESTORE_CLUSTERFILE_ORIG,
OPT_RESTORECONTAINER,
OPT_RESTORE_VERSION,
OPT_RESTORE_TIMESTAMP,
OPT_PREFIX_ADD,
OPT_PREFIX_REMOVE,
OPT_RESTORE_CLUSTERFILE_DEST,
OPT_RESTORE_CLUSTERFILE_ORIG,
OPT_RESTORE_BEGIN_VERSION,
// Shared constants
OPT_CLUSTERFILE, OPT_QUIET, OPT_DRYRUN, OPT_FORCE,
OPT_HELP, OPT_DEVHELP, OPT_VERSION, OPT_PARENTPID, OPT_CRASHONERROR,
OPT_NOBUFSTDOUT, OPT_BUFSTDOUTERR, OPT_TRACE, OPT_TRACE_DIR,
OPT_KNOB, OPT_TRACE_LOG_GROUP, OPT_MEMLIMIT, OPT_LOCALITY,
OPT_CLUSTERFILE,
OPT_QUIET,
OPT_DRYRUN,
OPT_FORCE,
OPT_HELP,
OPT_DEVHELP,
OPT_VERSION,
OPT_PARENTPID,
OPT_CRASHONERROR,
OPT_NOBUFSTDOUT,
OPT_BUFSTDOUTERR,
OPT_TRACE,
OPT_TRACE_DIR,
OPT_KNOB,
OPT_TRACE_LOG_GROUP,
OPT_MEMLIMIT,
OPT_LOCALITY,
//DB constants
// DB constants
OPT_SOURCE_CLUSTER,
OPT_DEST_CLUSTER,
OPT_CLEANUP,
@ -154,7 +212,7 @@ CSimpleOpt::SOption g_rgAgentOptions[] = {
#ifndef TLS_DISABLED
TLS_OPTION_FLAGS
#endif
SO_END_OF_OPTIONS
SO_END_OF_OPTIONS
};
CSimpleOpt::SOption g_rgBackupStartOptions[] = {
@ -197,6 +255,7 @@ CSimpleOpt::SOption g_rgBackupStartOptions[] = {
{ OPT_DEVHELP, "--dev-help", SO_NONE },
{ OPT_KNOB, "--knob_", SO_REQ_SEP },
{ OPT_BLOB_CREDENTIALS, "--blob_credentials", SO_REQ_SEP },
{ OPT_INCREMENTALONLY, "--incremental", SO_NONE },
#ifndef TLS_DISABLED
TLS_OPTION_FLAGS
#endif
@ -585,6 +644,40 @@ CSimpleOpt::SOption g_rgBackupListOptions[] = {
SO_END_OF_OPTIONS
};
CSimpleOpt::SOption g_rgBackupQueryOptions[] = {
#ifdef _WIN32
{ OPT_PARENTPID, "--parentpid", SO_REQ_SEP },
#endif
{ OPT_RESTORE_TIMESTAMP, "--query_restore_timestamp", SO_REQ_SEP },
{ OPT_DESTCONTAINER, "-d", SO_REQ_SEP },
{ OPT_DESTCONTAINER, "--destcontainer", SO_REQ_SEP },
{ OPT_RESTORE_VERSION, "-qrv", SO_REQ_SEP },
{ OPT_RESTORE_VERSION, "--query_restore_version", SO_REQ_SEP },
{ OPT_BACKUPKEYS_FILTER, "-k", SO_REQ_SEP },
{ OPT_BACKUPKEYS_FILTER, "--keys", SO_REQ_SEP },
{ OPT_TRACE, "--log", SO_NONE },
{ OPT_TRACE_DIR, "--logdir", SO_REQ_SEP },
{ OPT_TRACE_FORMAT, "--trace_format", SO_REQ_SEP },
{ OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP },
{ OPT_QUIET, "-q", SO_NONE },
{ OPT_QUIET, "--quiet", SO_NONE },
{ OPT_VERSION, "-v", SO_NONE },
{ OPT_VERSION, "--version", SO_NONE },
{ OPT_CRASHONERROR, "--crash", SO_NONE },
{ OPT_MEMLIMIT, "-m", SO_REQ_SEP },
{ OPT_MEMLIMIT, "--memory", SO_REQ_SEP },
{ OPT_HELP, "-?", SO_NONE },
{ OPT_HELP, "-h", SO_NONE },
{ OPT_HELP, "--help", SO_NONE },
{ OPT_DEVHELP, "--dev-help", SO_NONE },
{ OPT_BLOB_CREDENTIALS, "--blob_credentials", SO_REQ_SEP },
{ OPT_KNOB, "--knob_", SO_REQ_SEP },
#ifndef TLS_DISABLED
TLS_OPTION_FLAGS
#endif
SO_END_OF_OPTIONS
};
// g_rgRestoreOptions is used by fdbrestore and fastrestore_tool
CSimpleOpt::SOption g_rgRestoreOptions[] = {
#ifdef _WIN32
@ -603,6 +696,7 @@ CSimpleOpt::SOption g_rgRestoreOptions[] = {
{ OPT_BACKUPKEYS, "--keys", SO_REQ_SEP },
{ OPT_WAITFORDONE, "-w", SO_NONE },
{ OPT_WAITFORDONE, "--waitfordone", SO_NONE },
{ OPT_RESTORE_BEGIN_VERSION, "--begin_version", SO_REQ_SEP },
{ OPT_RESTORE_VERSION, "--version", SO_REQ_SEP },
{ OPT_RESTORE_VERSION, "-v", SO_REQ_SEP },
{ OPT_TRACE, "--log", SO_NONE },
@ -622,6 +716,7 @@ CSimpleOpt::SOption g_rgRestoreOptions[] = {
{ OPT_HELP, "--help", SO_NONE },
{ OPT_DEVHELP, "--dev-help", SO_NONE },
{ OPT_BLOB_CREDENTIALS, "--blob_credentials", SO_REQ_SEP },
{ OPT_INCREMENTALONLY, "--incremental", SO_NONE },
#ifndef TLS_DISABLED
TLS_OPTION_FLAGS
#endif
@ -918,13 +1013,16 @@ void printBackupContainerInfo() {
static void printBackupUsage(bool devhelp) {
printf("FoundationDB " FDB_VT_PACKAGE_NAME " (v" FDB_VT_VERSION ")\n");
printf("Usage: %s (start | status | abort | wait | discontinue | pause | resume | expire | delete | describe | list | cleanup) [OPTIONS]\n\n", exeBackup.toString().c_str());
printf("Usage: %s (start | status | abort | wait | discontinue | pause | resume | expire | delete | describe | "
"list | query | cleanup) [OPTIONS]\n\n",
exeBackup.toString().c_str());
printf(" -C CONNFILE The path of a file containing the connection string for the\n"
" FoundationDB cluster. The default is first the value of the\n"
" FDB_CLUSTER_FILE environment variable, then `./fdb.cluster',\n"
" then `%s'.\n", platform::getDefaultClusterFilePath().c_str());
printf(" -d, --destcontainer URL\n"
" The Backup container URL for start, modify, describe, expire, and delete operations.\n");
" The Backup container URL for start, modify, describe, query, expire, and delete "
"operations.\n");
printBackupContainerInfo();
printf(" -b, --base_url BASEURL\n"
" Base backup URL for list operations. This looks like a Backup URL but without a backup name.\n");
@ -938,6 +1036,12 @@ static void printBackupUsage(bool devhelp) {
printf(" --delete_before_days NUM_DAYS\n"
" Another way to specify version cutoff for expire operations. Deletes data files containing no data at or after a\n"
" version approximately NUM_DAYS days worth of versions prior to the latest log version in the backup.\n");
printf(" -qrv --query_restore_version VERSION\n"
" For query operations, set target version for restoring a backup. Set -1 for maximum\n"
" restorable version (default) and -2 for minimum restorable version.\n");
printf(" --query_restore_timestamp DATETIME\n"
" For query operations, instead of a numeric version, use this to specify a timestamp in %s\n", BackupAgentBase::timeFormat().c_str());
printf(" and it will be converted to a version from that time using metadata in the cluster file.\n");
printf(" --restorable_after_timestamp DATETIME\n"
" For expire operations, set minimum acceptable restorability to the version equivalent of DATETIME and later.\n");
printf(" --restorable_after_version VERSION\n"
@ -956,8 +1060,8 @@ static void printBackupUsage(bool devhelp) {
" Specifies a UID to verify against the BackupUID of the running backup. If provided, the UID is verified in the same transaction\n"
" which sets the new backup parameters (if the UID matches).\n");
printf(" -e ERRORLIMIT The maximum number of errors printed by status (default is 10).\n");
printf(" -k KEYS List of key ranges to backup.\n"
" If not specified, the entire database will be backed up.\n");
printf(" -k KEYS List of key ranges to backup or to filter the backup in query operations.\n"
" If not specified, the entire database will be backed up or no filter will be applied.\n");
printf(" --partitioned_log_experimental Starts with new type of backup system using partitioned logs.\n");
printf(" -n, --dryrun For backup start or restore start, performs a trial run with no actual changes made.\n");
printf(" --log Enables trace file logging for the CLI session.\n"
@ -975,6 +1079,9 @@ static void printBackupUsage(bool devhelp) {
" remove mutations for it. By default this is set to one hour.\n");
printf(" --delete_data\n"
" This flag will cause cleanup to remove mutations for the most stale backup or DR.\n");
// TODO: Enable this command-line argument once atomics are supported
// printf(" --incremental\n"
// " Performs incremental backup without the base backup.\n");
#ifndef TLS_DISABLED
printf(TLS_HELP);
#endif
@ -1032,8 +1139,11 @@ static void printRestoreUsage(bool devhelp ) {
printf(" --trace_format FORMAT\n"
" Select the format of the trace files. xml (the default) and json are supported.\n"
" Has no effect unless --log is specified.\n");
// TODO: Enable this command-line argument once atomics are supported
// printf(" --incremental\n"
// " Performs incremental restore without the base backup.\n");
#ifndef TLS_DISABLED
printf(TLS_HELP);
printf(TLS_HELP);
#endif
printf(" -v DBVERSION The version at which the database will be restored.\n");
printf(" --timestamp Instead of a numeric version, use this to specify a timestamp in %s\n", BackupAgentBase::timeFormat().c_str());
@ -1273,6 +1383,7 @@ enumBackupType getBackupType(std::string backupType)
values["delete"] = BACKUP_DELETE;
values["describe"] = BACKUP_DESCRIBE;
values["list"] = BACKUP_LIST;
values["query"] = BACKUP_QUERY;
values["dump"] = BACKUP_DUMP;
values["modify"] = BACKUP_MODIFY;
}
@ -1402,7 +1513,7 @@ ACTOR Future<std::string> getLayerStatus(Reference<ReadYourWritesTransaction> tr
for (KeyBackedTag eachTag : backupTags) {
Version last_restorable_version = tagLastRestorableVersions[j].get();
double last_restorable_seconds_behind = ((double)readVer - last_restorable_version) / CLIENT_KNOBS->CORE_VERSIONSPERSECOND;
BackupAgentBase::enumState status = (BackupAgentBase::enumState)tagStates[j].get();
EBackupState status = tagStates[j].get();
const char *statusText = fba.getStateText(status);
// The object for this backup tag inside this instance's subdocument
@ -1411,8 +1522,9 @@ ACTOR Future<std::string> getLayerStatus(Reference<ReadYourWritesTransaction> tr
tagRoot.create("current_status") = statusText;
tagRoot.create("last_restorable_version") = tagLastRestorableVersions[j].get();
tagRoot.create("last_restorable_seconds_behind") = last_restorable_seconds_behind;
tagRoot.create("running_backup") = (status == BackupAgentBase::STATE_RUNNING_DIFFERENTIAL || status == BackupAgentBase::STATE_RUNNING);
tagRoot.create("running_backup_is_restorable") = (status == BackupAgentBase::STATE_RUNNING_DIFFERENTIAL);
tagRoot.create("running_backup") =
(status == EBackupState::STATE_RUNNING_DIFFERENTIAL || status == EBackupState::STATE_RUNNING);
tagRoot.create("running_backup_is_restorable") = (status == EBackupState::STATE_RUNNING_DIFFERENTIAL);
tagRoot.create("range_bytes_written") = tagRangeBytes[j].get();
tagRoot.create("mutation_log_bytes_written") = tagLogBytes[j].get();
tagRoot.create("mutation_stream_id") = backupTagUids[j].toString();
@ -1427,7 +1539,7 @@ ACTOR Future<std::string> getLayerStatus(Reference<ReadYourWritesTransaction> tr
tr2->setOption(FDBTransactionOptions::LOCK_AWARE);
state Standalone<RangeResultRef> tagNames = wait(tr2->getRange(dba.tagNames.range(), 10000, snapshot));
state std::vector<Future<Optional<Key>>> backupVersion;
state std::vector<Future<int>> backupStatus;
state std::vector<Future<EBackupState>> backupStatus;
state std::vector<Future<int64_t>> tagRangeBytesDR;
state std::vector<Future<int64_t>> tagLogBytesDR;
state Future<Optional<Value>> fDRPaused = tr->get(dba.taskBucket->getPauseKey(), snapshot);
@ -1452,11 +1564,12 @@ ACTOR Future<std::string> getLayerStatus(Reference<ReadYourWritesTransaction> tr
for (int i = 0; i < tagNames.size(); i++) {
std::string tagName = dba.sourceTagNames.unpack(tagNames[i].key).getString(0).toString();
BackupAgentBase::enumState status = (BackupAgentBase::enumState)backupStatus[i].get();
auto status = backupStatus[i].get();
JSONDoc tagRoot = tagsRoot.create(tagName);
tagRoot.create("running_backup") = (status == BackupAgentBase::STATE_RUNNING_DIFFERENTIAL || status == BackupAgentBase::STATE_RUNNING);
tagRoot.create("running_backup_is_restorable") = (status == BackupAgentBase::STATE_RUNNING_DIFFERENTIAL);
tagRoot.create("running_backup") =
(status == EBackupState::STATE_RUNNING_DIFFERENTIAL || status == EBackupState::STATE_RUNNING);
tagRoot.create("running_backup_is_restorable") = (status == EBackupState::STATE_RUNNING_DIFFERENTIAL);
tagRoot.create("range_bytes_written") = tagRangeBytesDR[i].get();
tagRoot.create("mutation_log_bytes_written") = tagLogBytesDR[i].get();
tagRoot.create("mutation_stream_id") = drTagUids[i].toString();
@ -1721,7 +1834,8 @@ ACTOR Future<Void> submitDBBackup(Database src, Database dest, Standalone<Vector
ACTOR Future<Void> submitBackup(Database db, std::string url, int snapshotIntervalSeconds,
Standalone<VectorRef<KeyRangeRef>> backupRanges, std::string tagName, bool dryRun,
bool waitForCompletion, bool stopWhenDone, bool usePartitionedLog) {
bool waitForCompletion, bool stopWhenDone, bool usePartitionedLog,
bool incrementalBackupOnly) {
try {
state FileBackupAgent backupAgent;
@ -1739,7 +1853,7 @@ ACTOR Future<Void> submitBackup(Database db, std::string url, int snapshotInterv
EBackupState backupStatus = wait(config.stateEnum().getOrThrow(db));
// Throw error if a backup is currently running until we support parallel backups
if (BackupAgentBase::isRunnable((BackupAgentBase::enumState)backupStatus)) {
if (BackupAgentBase::isRunnable(backupStatus)) {
throw backup_duplicate();
}
}
@ -1766,7 +1880,7 @@ ACTOR Future<Void> submitBackup(Database db, std::string url, int snapshotInterv
else {
wait(backupAgent.submitBackup(db, KeyRef(url), snapshotIntervalSeconds, tagName, backupRanges, stopWhenDone,
usePartitionedLog));
usePartitionedLog, incrementalBackupOnly));
// Wait for the backup to complete, if requested
if (waitForCompletion) {
@ -1964,10 +2078,10 @@ ACTOR Future<Void> waitBackup(Database db, std::string tagName, bool stopWhenDon
{
state FileBackupAgent backupAgent;
int status = wait(backupAgent.waitBackup(db, tagName, stopWhenDone));
EBackupState status = wait(backupAgent.waitBackup(db, tagName, stopWhenDone));
printf("The backup on tag `%s' %s.\n", printable(StringRef(tagName)).c_str(),
BackupAgentBase::getStateText((BackupAgentBase::enumState) status));
BackupAgentBase::getStateText(status));
}
catch (Error& e) {
if(e.code() == error_code_actor_cancelled)
@ -2077,7 +2191,10 @@ Reference<IBackupContainer> openBackupContainer(const char *name, std::string de
return c;
}
ACTOR Future<Void> runRestore(Database db, std::string originalClusterFile, std::string tagName, std::string container, Standalone<VectorRef<KeyRangeRef>> ranges, Version targetVersion, std::string targetTimestamp, bool performRestore, bool verbose, bool waitForDone, std::string addPrefix, std::string removePrefix) {
ACTOR Future<Void> runRestore(Database db, std::string originalClusterFile, std::string tagName, std::string container,
Standalone<VectorRef<KeyRangeRef>> ranges, Version beginVersion, Version targetVersion,
std::string targetTimestamp, bool performRestore, bool verbose, bool waitForDone,
std::string addPrefix, std::string removePrefix, bool incrementalBackupOnly) {
if(ranges.empty()) {
ranges.push_back_deep(ranges.arena(), normalKeys);
}
@ -2119,19 +2236,23 @@ ACTOR Future<Void> runRestore(Database db, std::string originalClusterFile, std:
BackupDescription desc = wait(bc->describeBackup());
if(!desc.maxRestorableVersion.present()) {
if (incrementalBackupOnly && desc.contiguousLogEnd.present()) {
targetVersion = desc.contiguousLogEnd.get() - 1;
} else if (desc.maxRestorableVersion.present()) {
targetVersion = desc.maxRestorableVersion.get();
} else {
fprintf(stderr, "The specified backup is not restorable to any version.\n");
throw restore_error();
}
targetVersion = desc.maxRestorableVersion.get();
if(verbose)
printf("Using target restore version %" PRId64 "\n", targetVersion);
}
if (performRestore) {
Version restoredVersion = wait(backupAgent.restore(db, origDb, KeyRef(tagName), KeyRef(container), ranges, waitForDone, targetVersion, verbose, KeyRef(addPrefix), KeyRef(removePrefix)));
Version restoredVersion = wait(backupAgent.restore(
db, origDb, KeyRef(tagName), KeyRef(container), ranges, waitForDone, targetVersion, verbose,
KeyRef(addPrefix), KeyRef(removePrefix), true, incrementalBackupOnly, beginVersion));
if(waitForDone && verbose) {
// If restore is now complete then report version restored
@ -2401,6 +2522,135 @@ ACTOR Future<Void> describeBackup(const char *name, std::string destinationConta
return Void();
}
static void reportBackupQueryError(UID operationId, JsonBuilderObject& result, std::string errorMessage) {
result["error"] = errorMessage;
printf("%s\n", result.getJson().c_str());
TraceEvent("BackupQueryFailure").detail("OperationId", operationId).detail("Reason", errorMessage);
}
// If restoreVersion is invalidVersion or latestVersion, use the maximum or minimum restorable version respectively for
// selected key ranges. If restoreTimestamp is specified, any specified restoreVersion will be overriden to the version
// resolved to that timestamp.
ACTOR Future<Void> queryBackup(const char* name, std::string destinationContainer,
Standalone<VectorRef<KeyRangeRef>> keyRangesFilter, Version restoreVersion,
std::string originalClusterFile, std::string restoreTimestamp, bool verbose) {
state UID operationId = deterministicRandom()->randomUniqueID();
state JsonBuilderObject result;
state std::string errorMessage;
result["key_ranges_filter"] = printable(keyRangesFilter);
result["destination_container"] = destinationContainer;
TraceEvent("BackupQueryStart")
.detail("OperationId", operationId)
.detail("DestinationContainer", destinationContainer)
.detail("KeyRangesFilter", printable(keyRangesFilter))
.detail("SpecifiedRestoreVersion", restoreVersion)
.detail("RestoreTimestamp", restoreTimestamp)
.detail("BackupClusterFile", originalClusterFile);
// Resolve restoreTimestamp if given
if (!restoreTimestamp.empty()) {
if (originalClusterFile.empty()) {
reportBackupQueryError(
operationId, result,
format("an original cluster file must be given in order to resolve restore target timestamp '%s'",
restoreTimestamp.c_str()));
return Void();
}
if (!fileExists(originalClusterFile)) {
reportBackupQueryError(operationId, result,
format("The specified original source database cluster file '%s' does not exist\n",
originalClusterFile.c_str()));
return Void();
}
Database origDb = Database::createDatabase(originalClusterFile, Database::API_VERSION_LATEST);
Version v = wait(timeKeeperVersionFromDatetime(restoreTimestamp, origDb));
result["restore_timestamp"] = restoreTimestamp;
result["restore_timestamp_resolved_version"] = v;
restoreVersion = v;
}
try {
state Reference<IBackupContainer> bc = openBackupContainer(name, destinationContainer);
if (restoreVersion == invalidVersion) {
BackupDescription desc = wait(bc->describeBackup());
if (desc.maxRestorableVersion.present()) {
restoreVersion = desc.maxRestorableVersion.get();
// Use continuous log end version for the maximum restorable version for the key ranges.
} else if (keyRangesFilter.size() && desc.contiguousLogEnd.present()) {
restoreVersion = desc.contiguousLogEnd.get();
} else {
reportBackupQueryError(
operationId, result,
errorMessage = format("the backup for the specified key ranges is not restorable to any version"));
}
}
if (restoreVersion < 0 && restoreVersion != latestVersion) {
reportBackupQueryError(operationId, result,
errorMessage =
format("the specified restorable version %ld is not valid", restoreVersion));
return Void();
}
Optional<RestorableFileSet> fileSet = wait(bc->getRestoreSet(restoreVersion, keyRangesFilter));
if (fileSet.present()) {
int64_t totalRangeFilesSize = 0, totalLogFilesSize = 0;
result["restore_version"] = fileSet.get().targetVersion;
JsonBuilderArray rangeFilesJson;
JsonBuilderArray logFilesJson;
for (const auto& rangeFile : fileSet.get().ranges) {
JsonBuilderObject object;
object["file_name"] = rangeFile.fileName;
object["file_size"] = rangeFile.fileSize;
object["version"] = rangeFile.version;
object["key_range"] = fileSet.get().keyRanges.count(rangeFile.fileName) == 0
? "none"
: fileSet.get().keyRanges.at(rangeFile.fileName).toString();
rangeFilesJson.push_back(object);
totalRangeFilesSize += rangeFile.fileSize;
}
for (const auto& log : fileSet.get().logs) {
JsonBuilderObject object;
object["file_name"] = log.fileName;
object["file_size"] = log.fileSize;
object["begin_version"] = log.beginVersion;
object["end_version"] = log.endVersion;
logFilesJson.push_back(object);
totalLogFilesSize += log.fileSize;
}
result["total_range_files_size"] = totalRangeFilesSize;
result["total_log_files_size"] = totalLogFilesSize;
if (verbose) {
result["ranges"] = rangeFilesJson;
result["logs"] = logFilesJson;
}
TraceEvent("BackupQueryReceivedRestorableFilesSet")
.detail("DestinationContainer", destinationContainer)
.detail("KeyRangesFilter", printable(keyRangesFilter))
.detail("ActualRestoreVersion", fileSet.get().targetVersion)
.detail("NumRangeFiles", fileSet.get().ranges.size())
.detail("NumLogFiles", fileSet.get().logs.size())
.detail("RangeFilesBytes", totalRangeFilesSize)
.detail("LogFilesBytes", totalLogFilesSize);
} else {
reportBackupQueryError(operationId, result, "no restorable files set found for specified key ranges");
return Void();
}
} catch (Error& e) {
reportBackupQueryError(operationId, result, e.what());
return Void();
}
printf("%s\n", result.getJson().c_str());
return Void();
}
ACTOR Future<Void> listBackup(std::string baseUrl) {
try {
std::vector<std::string> containers = wait(IBackupContainer::listContainers(baseUrl));
@ -2770,6 +3020,9 @@ int main(int argc, char* argv[]) {
case BACKUP_LIST:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupListOptions, SO_O_EXACT);
break;
case BACKUP_QUERY:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupQueryOptions, SO_O_EXACT);
break;
case BACKUP_MODIFY:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupModifyOptions, SO_O_EXACT);
break;
@ -2909,12 +3162,15 @@ int main(int argc, char* argv[]) {
std::string addPrefix;
std::string removePrefix;
Standalone<VectorRef<KeyRangeRef>> backupKeys;
Standalone<VectorRef<KeyRangeRef>> backupKeysFilter;
int maxErrors = 20;
Version beginVersion = invalidVersion;
Version restoreVersion = invalidVersion;
std::string restoreTimestamp;
bool waitForDone = false;
bool stopWhenDone = true;
bool usePartitionedLog = false; // Set to true to use new backup system
bool incrementalBackupOnly = false;
bool forceAction = false;
bool trace = false;
bool quietDisplay = false;
@ -3129,6 +3385,15 @@ int main(int argc, char* argv[]) {
return FDB_EXIT_ERROR;
}
break;
case OPT_BACKUPKEYS_FILTER:
try {
addKeyRange(args->OptionArg(), backupKeysFilter);
}
catch (Error &) {
printHelpTeaser(argv[0]);
return FDB_EXIT_ERROR;
}
break;
case OPT_DESTCONTAINER:
destinationContainer = args->OptionArg();
// If the url starts with '/' then prepend "file://" for backwards compatibility
@ -3167,6 +3432,10 @@ int main(int argc, char* argv[]) {
case OPT_USE_PARTITIONED_LOG:
usePartitionedLog = true;
break;
case OPT_INCREMENTALONLY:
// TODO: Enable this command-line argument once atomics are supported
// incrementalBackupOnly = true;
break;
case OPT_RESTORECONTAINER:
restoreContainer = args->OptionArg();
// If the url starts with '/' then prepend "file://" for backwards compatibility
@ -3194,6 +3463,17 @@ int main(int argc, char* argv[]) {
}
break;
}
case OPT_RESTORE_BEGIN_VERSION: {
const char* a = args->OptionArg();
long long ver = 0;
if (!sscanf(a, "%lld", &ver)) {
fprintf(stderr, "ERROR: Could not parse database beginVersion `%s'\n", a);
printHelpTeaser(argv[0]);
return FDB_EXIT_ERROR;
}
beginVersion = ver;
break;
}
case OPT_RESTORE_VERSION: {
const char* a = args->OptionArg();
long long ver = 0;
@ -3567,7 +3847,8 @@ int main(int argc, char* argv[]) {
// Test out the backup url to make sure it parses. Doesn't test to make sure it's actually writeable.
openBackupContainer(argv[0], destinationContainer);
f = stopAfter(submitBackup(db, destinationContainer, snapshotIntervalSeconds, backupKeys, tagName,
dryRun, waitForDone, stopWhenDone, usePartitionedLog));
dryRun, waitForDone, stopWhenDone, usePartitionedLog,
incrementalBackupOnly));
break;
}
@ -3652,6 +3933,12 @@ int main(int argc, char* argv[]) {
f = stopAfter( listBackup(baseUrl) );
break;
case BACKUP_QUERY:
initTraceFile();
f = stopAfter(queryBackup(argv[0], destinationContainer, backupKeysFilter, restoreVersion,
restoreClusterFileOrig, restoreTimestamp, !quietDisplay));
break;
case BACKUP_DUMP:
initTraceFile();
f = stopAfter( dumpBackupData(argv[0], destinationContainer, dumpBegin, dumpEnd) );
@ -3697,7 +3984,9 @@ int main(int argc, char* argv[]) {
switch(restoreType) {
case RESTORE_START:
f = stopAfter( runRestore(db, restoreClusterFileOrig, tagName, restoreContainer, backupKeys, restoreVersion, restoreTimestamp, !dryRun, !quietDisplay, waitForDone, addPrefix, removePrefix) );
f = stopAfter(runRestore(db, restoreClusterFileOrig, tagName, restoreContainer, backupKeys,
beginVersion, restoreVersion, restoreTimestamp, !dryRun, !quietDisplay,
waitForDone, addPrefix, removePrefix, incrementalBackupOnly));
break;
case RESTORE_WAIT:
f = stopAfter( success(ba.waitRestore(db, KeyRef(tagName), true)) );

View File

@ -117,7 +117,7 @@ LineNoise::LineNoise(
Hint h = onMainThread( [line]() -> Future<Hint> {
return hint_callback(line);
}).getBlocking();
if (!h.valid) return NULL;
if (!h.valid) return nullptr;
*color = h.color;
*bold = h.bold;
return strdup( h.text.c_str() );

View File

@ -20,6 +20,7 @@
#include "boost/lexical_cast.hpp"
#include "fdbclient/NativeAPI.actor.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/Status.h"
#include "fdbclient/StatusClient.h"
#include "fdbclient/DatabaseContext.h"
@ -102,7 +103,7 @@ CSimpleOpt::SOption g_rgOptions[] = { { OPT_CONNFILE, "-C", SO_REQ_SEP },
void printAtCol(const char* text, int col) {
const char* iter = text;
const char* start = text;
const char* space = NULL;
const char* space = nullptr;
do {
iter++;
@ -112,7 +113,7 @@ void printAtCol(const char* text, int col) {
printf("%.*s\n", (int)(space - start), start);
start = space;
if (*start == ' ' || *start == '\n') start++;
space = NULL;
space = nullptr;
}
} while (*iter);
}
@ -120,7 +121,7 @@ void printAtCol(const char* text, int col) {
std::string lineWrap(const char* text, int col) {
const char* iter = text;
const char* start = text;
const char* space = NULL;
const char* space = nullptr;
std::string out = "";
do {
iter++;
@ -130,7 +131,7 @@ std::string lineWrap(const char* text, int col) {
out += format("%.*s\n", (int)(space - start), start);
start = space;
if (*start == ' '/* || *start == '\n'*/) start++;
space = NULL;
space = nullptr;
}
} while (*iter);
return out;
@ -470,8 +471,8 @@ void initHelp() {
"All keys between BEGINKEY (inclusive) and ENDKEY (exclusive) are cleared from the database. This command will succeed even if the specified range is empty, but may fail because of conflicts." ESCAPINGK);
helpMap["configure"] = CommandHelp(
"configure [new] "
"<single|double|triple|three_data_hall|three_datacenter|ssd|memory|memory-radixtree-beta|proxies=<PROXIES>|grv_"
"proxies=<GRV_PROXIES>|logs=<LOGS>|resolvers=<RESOLVERS>>*",
"<single|double|triple|three_data_hall|three_datacenter|ssd|memory|memory-radixtree-beta|proxies=<PROXIES>|"
"commit_proxies=<COMMIT_PROXIES>|grv_proxies=<GRV_PROXIES>|logs=<LOGS>|resolvers=<RESOLVERS>>*",
"change the database configuration",
"The `new' option, if present, initializes a new database with the given configuration rather than changing "
"the configuration of an existing one. When used, both a redundancy mode and a storage engine must be "
@ -479,13 +480,19 @@ void initHelp() {
"of data (survive one failure).\n triple - three copies of data (survive two failures).\n three_data_hall - "
"See the Admin Guide.\n three_datacenter - See the Admin Guide.\n\nStorage engine:\n ssd - B-Tree storage "
"engine optimized for solid state disks.\n memory - Durable in-memory storage engine for small "
"datasets.\n\nproxies=<PROXIES>: Sets the desired number of proxies in the cluster. Must be at least 1, or set "
"to -1 which restores the number of proxies to the default value.\n\ngrv_proxies=<GRV_PROXIES>: Sets the "
"desired number of GRV proxies in the cluster. Must be at least 1, or set to -1 which restores the number of "
"proxies to the default value.\n\nlogs=<LOGS>: Sets the desired number of log servers in the cluster. Must be "
"at least 1, or set to -1 which restores the number of logs to the default value.\n\nresolvers=<RESOLVERS>: "
"Sets the desired number of resolvers in the cluster. Must be at least 1, or set to -1 which restores the "
"number of resolvers to the default value.\n\nSee the FoundationDB Administration Guide for more information.");
"datasets.\n\nproxies=<PROXIES>: Sets the desired number of proxies in the cluster. The proxy role is being "
"deprecated and split into GRV proxy and Commit proxy, now prefer configure 'grv_proxies' and 'commit_proxies' "
"separately. Generally we should follow that 'commit_proxies' is three times of 'grv_proxies' and 'grv_proxies' "
"should be not more than 4. If 'proxies' is specified, it will be converted to 'grv_proxies' and 'commit_proxies'. "
"Must be at least 2 (1 GRV proxy, 1 Commit proxy), or set to -1 which restores the number of proxies to the "
"default value.\n\ncommit_proxies=<COMMIT_PROXIES>: Sets the desired number of commit proxies in the cluster. "
"Must be at least 1, or set to -1 which restores the number of commit proxies to the default "
"value.\n\ngrv_proxies=<GRV_PROXIES>: Sets the desired number of GRV proxies in the cluster. Must be at least "
"1, or set to -1 which restores the number of GRV proxies to the default value.\n\nlogs=<LOGS>: Sets the "
"desired number of log servers in the cluster. Must be at least 1, or set to -1 which restores the number of "
"logs to the default value.\n\nresolvers=<RESOLVERS>: Sets the desired number of resolvers in the cluster. "
"Must be at least 1, or set to -1 which restores the number of resolvers to the default value.\n\nSee the "
"FoundationDB Administration Guide for more information.");
helpMap["fileconfigure"] = CommandHelp(
"fileconfigure [new] <FILENAME>",
"change the database configuration from a file",
@ -871,12 +878,13 @@ void printStatus(StatusObjectReader statusObj, StatusClient::StatusLevel level,
fatalRecoveryState = true;
if (name == "recruiting_transaction_servers") {
description += format("\nNeed at least %d log servers across unique zones, %d proxies, "
"%d GRV proxies and %d resolvers.",
recoveryState["required_logs"].get_int(),
recoveryState["required_proxies"].get_int(),
recoveryState["required_grv_proxies"].get_int(),
recoveryState["required_resolvers"].get_int());
description +=
format("\nNeed at least %d log servers across unique zones, %d commit proxies, "
"%d GRV proxies and %d resolvers.",
recoveryState["required_logs"].get_int(),
recoveryState["required_commit_proxies"].get_int(),
recoveryState["required_grv_proxies"].get_int(),
recoveryState["required_resolvers"].get_int());
if (statusObjCluster.has("machines") && statusObjCluster.has("processes")) {
auto numOfNonExcludedProcessesAndZones = getNumOfNonExcludedProcessAndZones(statusObjCluster);
description += format("\nHave %d non-excluded processes on %d machines across %d zones.", numOfNonExcludedProcessesAndZones.first, getNumofNonExcludedMachines(statusObjCluster), numOfNonExcludedProcessesAndZones.second);
@ -1026,8 +1034,8 @@ void printStatus(StatusObjectReader statusObj, StatusClient::StatusLevel level,
outputString += format("\n Exclusions - %d (type `exclude' for details)", excludedServersArr.size());
}
if (statusObjConfig.get("proxies", intVal))
outputString += format("\n Desired Proxies - %d", intVal);
if (statusObjConfig.get("commit_proxies", intVal))
outputString += format("\n Desired Commit Proxies - %d", intVal);
if (statusObjConfig.get("grv_proxies", intVal))
outputString += format("\n Desired GRV Proxies - %d", intVal);
@ -1055,10 +1063,10 @@ void printStatus(StatusObjectReader statusObj, StatusClient::StatusLevel level,
if (statusObjConfig.has("regions")) {
outputString += "\n Regions: ";
regions = statusObjConfig["regions"].get_array();
bool isPrimary = false;
std::vector<std::string> regionSatelliteDCs;
std::string regionDC;
for (StatusObjectReader region : regions) {
bool isPrimary = false;
std::vector<std::string> regionSatelliteDCs;
std::string regionDC;
for (StatusObjectReader dc : region["datacenters"].get_array()) {
if (!dc.has("satellite")) {
regionDC = dc["id"].get_str();
@ -1233,14 +1241,54 @@ void printStatus(StatusObjectReader statusObj, StatusClient::StatusLevel level,
int minLoss = std::min(availLoss, dataLoss);
const char *faultDomain = machinesAreZones ? "machine" : "zone";
if (minLoss == 1)
outputString += format("1 %s", faultDomain);
else
outputString += format("%d %ss", minLoss, faultDomain);
outputString += format("%d %ss", minLoss, faultDomain);
if (dataLoss > availLoss){
outputString += format(" (%d without data loss)", dataLoss);
}
if (dataLoss == -1) {
ASSERT_WE_THINK(availLoss == -1);
outputString += format(
"\n\n Warning: the database may have data loss and availability loss. Please restart "
"following tlog interfaces, otherwise storage servers may never be able to catch "
"up.\n");
StatusObjectReader logs;
if (statusObjCluster.has("logs")) {
for (StatusObjectReader logEpoch : statusObjCluster.last().get_array()) {
bool possiblyLosingData;
if (logEpoch.get("possibly_losing_data", possiblyLosingData) &&
!possiblyLosingData) {
continue;
}
// Current epoch doesn't have an end version.
int64_t epoch, beginVersion, endVersion = invalidVersion;
bool current;
logEpoch.get("epoch", epoch);
logEpoch.get("begin_version", beginVersion);
logEpoch.get("end_version", endVersion);
logEpoch.get("current", current);
std::string missing_log_interfaces;
if (logEpoch.has("log_interfaces")) {
for (StatusObjectReader logInterface : logEpoch.last().get_array()) {
bool healthy;
std::string address, id;
if (logInterface.get("healthy", healthy) && !healthy) {
logInterface.get("id", id);
logInterface.get("address", address);
missing_log_interfaces += format("%s,%s ", id.c_str(), address.c_str());
}
}
}
outputString += format(
" %s log epoch: %ld begin: %ld end: %s, missing "
"log interfaces(id,address): %s\n",
current ? "Current" : "Old", epoch, beginVersion,
endVersion == invalidVersion ? "(unknown)" : format("%ld", endVersion).c_str(),
missing_log_interfaces.c_str());
}
}
}
}
}
@ -1764,7 +1812,7 @@ ACTOR Future<Void> commitTransaction( Reference<ReadYourWritesTransaction> tr )
}
ACTOR Future<bool> configure( Database db, std::vector<StringRef> tokens, Reference<ClusterConnectionFile> ccf, LineNoise* linenoise, Future<Void> warn ) {
state ConfigurationResult::Type result;
state ConfigurationResult result;
state int startToken = 1;
state bool force = false;
if (tokens.size() < 2)
@ -1790,14 +1838,14 @@ ACTOR Future<bool> configure( Database db, std::vector<StringRef> tokens, Refere
bool noChanges = conf.get().old_replication == conf.get().auto_replication &&
conf.get().old_logs == conf.get().auto_logs &&
conf.get().old_proxies == conf.get().auto_proxies &&
conf.get().old_commit_proxies == conf.get().auto_commit_proxies &&
conf.get().old_grv_proxies == conf.get().auto_grv_proxies &&
conf.get().old_resolvers == conf.get().auto_resolvers &&
conf.get().old_processes_with_transaction == conf.get().auto_processes_with_transaction &&
conf.get().old_machines_with_transaction == conf.get().auto_machines_with_transaction;
bool noDesiredChanges = noChanges && conf.get().old_logs == conf.get().desired_logs &&
conf.get().old_proxies == conf.get().desired_proxies &&
conf.get().old_commit_proxies == conf.get().desired_commit_proxies &&
conf.get().old_grv_proxies == conf.get().desired_grv_proxies &&
conf.get().old_resolvers == conf.get().desired_resolvers;
@ -1816,8 +1864,11 @@ ACTOR Future<bool> configure( Database db, std::vector<StringRef> tokens, Refere
outputString += format("| replication | %16s | %16s |\n", conf.get().old_replication.c_str(), conf.get().auto_replication.c_str());
outputString += format("| logs | %16d | %16d |", conf.get().old_logs, conf.get().auto_logs);
outputString += conf.get().auto_logs != conf.get().desired_logs ? format(" (manually set; would be %d)\n", conf.get().desired_logs) : "\n";
outputString += format("| proxies | %16d | %16d |", conf.get().old_proxies, conf.get().auto_proxies);
outputString += conf.get().auto_proxies != conf.get().desired_proxies ? format(" (manually set; would be %d)\n", conf.get().desired_proxies) : "\n";
outputString += format("| commit_proxies | %16d | %16d |", conf.get().old_commit_proxies,
conf.get().auto_commit_proxies);
outputString += conf.get().auto_commit_proxies != conf.get().desired_commit_proxies
? format(" (manually set; would be %d)\n", conf.get().desired_commit_proxies)
: "\n";
outputString += format("| grv_proxies | %16d | %16d |", conf.get().old_grv_proxies,
conf.get().auto_grv_proxies);
outputString += conf.get().auto_grv_proxies != conf.get().desired_grv_proxies
@ -1842,7 +1893,8 @@ ACTOR Future<bool> configure( Database db, std::vector<StringRef> tokens, Refere
}
}
ConfigurationResult::Type r = wait( makeInterruptable( changeConfig( db, std::vector<StringRef>(tokens.begin()+startToken,tokens.end()), conf, force) ) );
ConfigurationResult r = wait(makeInterruptable(
changeConfig(db, std::vector<StringRef>(tokens.begin() + startToken, tokens.end()), conf, force)));
result = r;
}
@ -1968,7 +2020,7 @@ ACTOR Future<bool> fileConfigure(Database db, std::string filePath, bool isNewDa
return true;
}
}
ConfigurationResult::Type result = wait( makeInterruptable( changeConfig(db, configString, force) ) );
ConfigurationResult result = wait(makeInterruptable(changeConfig(db, configString, force)));
// Real errors get thrown from makeInterruptable and printed by the catch block in cli(), but
// there are various results specific to changeConfig() that we need to report:
bool ret;
@ -2099,7 +2151,7 @@ ACTOR Future<bool> coordinators( Database db, std::vector<StringRef> tokens, boo
}
if(setName.size()) change = nameQuorumChange( setName.toString(), change );
CoordinatorsResult::Type r = wait( makeInterruptable( changeQuorum( db, change ) ) );
CoordinatorsResult r = wait(makeInterruptable(changeQuorum(db, change)));
// Real errors get thrown from makeInterruptable and printed by the catch block in cli(), but
// there are various results specific to changeConfig() that we need to report:
@ -2472,7 +2524,7 @@ void compGenerator(const char* text, bool help, std::vector<std::string>& lc) {
std::map<std::string, CommandHelp>::const_iterator iter;
int len = strlen(text);
const char* helpExtra[] = {"escaping", "options", NULL};
const char* helpExtra[] = {"escaping", "options", nullptr};
const char** he = helpExtra;
@ -2531,11 +2583,24 @@ void onOffGenerator(const char* text, const char *line, std::vector<std::string>
}
void configureGenerator(const char* text, const char *line, std::vector<std::string>& lc) {
const char* opts[] = {
"new", "single", "double", "triple", "three_data_hall", "three_datacenter", "ssd",
"ssd-1", "ssd-2", "memory", "memory-1", "memory-2", "memory-radixtree-beta", "proxies=",
"grv_proxies=", "logs=", "resolvers=", nullptr
};
const char* opts[] = { "new",
"single",
"double",
"triple",
"three_data_hall",
"three_datacenter",
"ssd",
"ssd-1",
"ssd-2",
"memory",
"memory-1",
"memory-2",
"memory-radixtree-beta",
"commit_proxies=",
"grv_proxies=",
"logs=",
"resolvers=",
nullptr };
arrayGenerator(text, line, opts, lc);
}
@ -2973,7 +3038,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
.detail("SourceVersion", getSourceVersion())
.detail("Version", FDB_VT_VERSION)
.detail("PackageName", FDB_VT_PACKAGE_NAME)
.detailf("ActualTime", "%lld", DEBUG_DETERMINISM ? 0 : time(NULL))
.detailf("ActualTime", "%lld", DEBUG_DETERMINISM ? 0 : time(nullptr))
.detail("ClusterFile", ccf->getFilename().c_str())
.detail("ConnectionString", ccf->getConnectionString().toString())
.setMaxFieldLength(10000)
@ -4548,7 +4613,7 @@ int main(int argc, char **argv) {
sigemptyset( &act.sa_mask );
act.sa_flags = 0;
act.sa_handler = SIG_IGN;
sigaction(SIGINT, &act, NULL);
sigaction(SIGINT, &act, nullptr);
#endif
CLIOptions opt(argc, argv);

View File

@ -59,7 +59,7 @@ public:
virtual void delref() { ReferenceCounted<AsyncFileBlobStoreWrite>::delref(); }
struct Part : ReferenceCounted<Part> {
Part(int n, int minSize) : number(n), writer(content.getWriteBuffer(minSize), NULL, Unversioned()), length(0) {
Part(int n, int minSize) : number(n), writer(content.getWriteBuffer(minSize), nullptr, Unversioned()), length(0) {
etag = std::string();
::MD5_Init(&content_md5_buf);
}

View File

@ -46,13 +46,15 @@ public:
return "YYYY/MM/DD.HH:MI:SS[+/-]HHMM";
}
// Type of program being executed
enum enumActionResult {
RESULT_SUCCESSFUL = 0, RESULT_ERRORED = 1, RESULT_DUPLICATE = 2, RESULT_UNNEEDED = 3
};
enum enumState {
STATE_ERRORED = 0, STATE_SUBMITTED = 1, STATE_RUNNING = 2, STATE_RUNNING_DIFFERENTIAL = 3, STATE_COMPLETED = 4, STATE_NEVERRAN = 5, STATE_ABORTED = 6, STATE_PARTIALLY_ABORTED = 7
enum class EnumState {
STATE_ERRORED = 0,
STATE_SUBMITTED = 1,
STATE_RUNNING = 2,
STATE_RUNNING_DIFFERENTIAL = 3,
STATE_COMPLETED = 4,
STATE_NEVERRAN = 5,
STATE_ABORTED = 6,
STATE_PARTIALLY_ABORTED = 7
};
static const Key keyFolderId;
@ -85,70 +87,68 @@ public:
static const int logHeaderSize;
// Convert the status text to an enumerated value
static enumState getState(std::string stateText)
{
enumState enState = STATE_ERRORED;
static EnumState getState(std::string stateText) {
auto enState = EnumState::STATE_ERRORED;
if (stateText.empty()) {
enState = STATE_NEVERRAN;
enState = EnumState::STATE_NEVERRAN;
}
else if (!stateText.compare("has been submitted")) {
enState = STATE_SUBMITTED;
enState = EnumState::STATE_SUBMITTED;
}
else if (!stateText.compare("has been started")) {
enState = STATE_RUNNING;
enState = EnumState::STATE_RUNNING;
}
else if (!stateText.compare("is differential")) {
enState = STATE_RUNNING_DIFFERENTIAL;
enState = EnumState::STATE_RUNNING_DIFFERENTIAL;
}
else if (!stateText.compare("has been completed")) {
enState = STATE_COMPLETED;
enState = EnumState::STATE_COMPLETED;
}
else if (!stateText.compare("has been aborted")) {
enState = STATE_ABORTED;
enState = EnumState::STATE_ABORTED;
}
else if (!stateText.compare("has been partially aborted")) {
enState = STATE_PARTIALLY_ABORTED;
enState = EnumState::STATE_PARTIALLY_ABORTED;
}
return enState;
}
// Convert the status enum to a text description
static const char* getStateText(enumState enState)
{
static const char* getStateText(EnumState enState) {
const char* stateText;
switch (enState)
{
case STATE_ERRORED:
case EnumState::STATE_ERRORED:
stateText = "has errored";
break;
case STATE_NEVERRAN:
case EnumState::STATE_NEVERRAN:
stateText = "has never been started";
break;
case STATE_SUBMITTED:
case EnumState::STATE_SUBMITTED:
stateText = "has been submitted";
break;
case STATE_RUNNING:
case EnumState::STATE_RUNNING:
stateText = "has been started";
break;
case STATE_RUNNING_DIFFERENTIAL:
case EnumState::STATE_RUNNING_DIFFERENTIAL:
stateText = "is differential";
break;
case STATE_COMPLETED:
case EnumState::STATE_COMPLETED:
stateText = "has been completed";
break;
case STATE_ABORTED:
case EnumState::STATE_ABORTED:
stateText = "has been aborted";
break;
case STATE_PARTIALLY_ABORTED:
case EnumState::STATE_PARTIALLY_ABORTED:
stateText = "has been partially aborted";
break;
default:
@ -160,34 +160,33 @@ public:
}
// Convert the status enum to a name
static const char* getStateName(enumState enState)
{
static const char* getStateName(EnumState enState) {
const char* s;
switch (enState)
{
case STATE_ERRORED:
case EnumState::STATE_ERRORED:
s = "Errored";
break;
case STATE_NEVERRAN:
case EnumState::STATE_NEVERRAN:
s = "NeverRan";
break;
case STATE_SUBMITTED:
case EnumState::STATE_SUBMITTED:
s = "Submitted";
break;
case STATE_RUNNING:
case EnumState::STATE_RUNNING:
s = "Running";
break;
case STATE_RUNNING_DIFFERENTIAL:
case EnumState::STATE_RUNNING_DIFFERENTIAL:
s = "RunningDifferentially";
break;
case STATE_COMPLETED:
case EnumState::STATE_COMPLETED:
s = "Completed";
break;
case STATE_ABORTED:
case EnumState::STATE_ABORTED:
s = "Aborted";
break;
case STATE_PARTIALLY_ABORTED:
case EnumState::STATE_PARTIALLY_ABORTED:
s = "Aborting";
break;
default:
@ -199,16 +198,15 @@ public:
}
// Determine if the specified state is runnable
static bool isRunnable(enumState enState)
{
static bool isRunnable(EnumState enState) {
bool isRunnable = false;
switch (enState)
{
case STATE_SUBMITTED:
case STATE_RUNNING:
case STATE_RUNNING_DIFFERENTIAL:
case STATE_PARTIALLY_ABORTED:
case EnumState::STATE_SUBMITTED:
case EnumState::STATE_RUNNING:
case EnumState::STATE_RUNNING_DIFFERENTIAL:
case EnumState::STATE_PARTIALLY_ABORTED:
isRunnable = true;
break;
default:
@ -286,11 +284,19 @@ public:
// - submit a restore on the given tagName
// - Optionally wait for the restore's completion. Will restore_error if restore fails or is aborted.
// restore() will return the targetVersion which will be either the valid version passed in or the max restorable version for the given url.
Future<Version> restore(Database cx, Optional<Database> cxOrig, Key tagName, Key url, Standalone<VectorRef<KeyRangeRef>> ranges, bool waitForComplete = true, Version targetVersion = -1, bool verbose = true, Key addPrefix = Key(), Key removePrefix = Key(), bool lockDB = true);
Future<Version> restore(Database cx, Optional<Database> cxOrig, Key tagName, Key url, bool waitForComplete = true, Version targetVersion = -1, bool verbose = true, KeyRange range = normalKeys, Key addPrefix = Key(), Key removePrefix = Key(), bool lockDB = true) {
Future<Version> restore(Database cx, Optional<Database> cxOrig, Key tagName, Key url,
Standalone<VectorRef<KeyRangeRef>> ranges, bool waitForComplete = true,
Version targetVersion = -1, bool verbose = true, Key addPrefix = Key(),
Key removePrefix = Key(), bool lockDB = true, bool incrementalBackupOnly = false,
Version beginVersion = -1);
Future<Version> restore(Database cx, Optional<Database> cxOrig, Key tagName, Key url, bool waitForComplete = true,
Version targetVersion = -1, bool verbose = true, KeyRange range = normalKeys,
Key addPrefix = Key(), Key removePrefix = Key(), bool lockDB = true,
bool incrementalBackupOnly = false, Version beginVersion = -1) {
Standalone<VectorRef<KeyRangeRef>> rangeRef;
rangeRef.push_back_deep(rangeRef.arena(), range);
return restore(cx, cxOrig, tagName, url, rangeRef, waitForComplete, targetVersion, verbose, addPrefix, removePrefix, lockDB);
return restore(cx, cxOrig, tagName, url, rangeRef, waitForComplete, targetVersion, verbose, addPrefix,
removePrefix, lockDB, incrementalBackupOnly, beginVersion);
}
Future<Version> atomicRestore(Database cx, Key tagName, Standalone<VectorRef<KeyRangeRef>> ranges, Key addPrefix = Key(), Key removePrefix = Key());
Future<Version> atomicRestore(Database cx, Key tagName, KeyRange range = normalKeys, Key addPrefix = Key(), Key removePrefix = Key()) {
@ -315,13 +321,14 @@ public:
Future<Void> submitBackup(Reference<ReadYourWritesTransaction> tr, Key outContainer, int snapshotIntervalSeconds,
std::string tagName, Standalone<VectorRef<KeyRangeRef>> backupRanges,
bool stopWhenDone = true, bool partitionedLog = false);
bool stopWhenDone = true, bool partitionedLog = false,
bool incrementalBackupOnly = false);
Future<Void> submitBackup(Database cx, Key outContainer, int snapshotIntervalSeconds, std::string tagName,
Standalone<VectorRef<KeyRangeRef>> backupRanges, bool stopWhenDone = true,
bool partitionedLog = false) {
bool partitionedLog = false, bool incrementalBackupOnly = false) {
return runRYWTransactionFailIfLocked(cx, [=](Reference<ReadYourWritesTransaction> tr) {
return submitBackup(tr, outContainer, snapshotIntervalSeconds, tagName, backupRanges, stopWhenDone,
partitionedLog);
partitionedLog, incrementalBackupOnly);
});
}
@ -350,7 +357,8 @@ public:
// stopWhenDone will return when the backup is stopped, if enabled. Otherwise, it
// will return when the backup directory is restorable.
Future<int> waitBackup(Database cx, std::string tagName, bool stopWhenDone = true, Reference<IBackupContainer> *pContainer = nullptr, UID *pUID = nullptr);
Future<EnumState> waitBackup(Database cx, std::string tagName, bool stopWhenDone = true,
Reference<IBackupContainer>* pContainer = nullptr, UID* pUID = nullptr);
static const Key keyLastRestorable;
@ -423,8 +431,8 @@ public:
Future<std::string> getStatus(Database cx, int errorLimit, Key tagName);
Future<int> getStateValue(Reference<ReadYourWritesTransaction> tr, UID logUid, bool snapshot = false);
Future<int> getStateValue(Database cx, UID logUid) {
Future<EnumState> getStateValue(Reference<ReadYourWritesTransaction> tr, UID logUid, bool snapshot = false);
Future<EnumState> getStateValue(Database cx, UID logUid) {
return runRYWTransaction(cx, [=](Reference<ReadYourWritesTransaction> tr){ return getStateValue(tr, logUid); });
}
@ -443,8 +451,8 @@ public:
// stopWhenDone will return when the backup is stopped, if enabled. Otherwise, it
// will return when the backup directory is restorable.
Future<int> waitBackup(Database cx, Key tagName, bool stopWhenDone = true);
Future<int> waitSubmitted(Database cx, Key tagName);
Future<EnumState> waitBackup(Database cx, Key tagName, bool stopWhenDone = true);
Future<EnumState> waitSubmitted(Database cx, Key tagName);
Future<Void> waitUpgradeToLatestDrVersion(Database cx, Key tagName);
static const Key keyAddPrefix;
@ -513,9 +521,15 @@ ACTOR Future<Void> applyMutations(Database cx, Key uid, Key addPrefix, Key remov
NotifiedVersion* committedVersion, Reference<KeyRangeMap<Version>> keyVersion);
ACTOR Future<Void> cleanupBackup(Database cx, bool deleteData);
typedef BackupAgentBase::enumState EBackupState;
template<> inline Tuple Codec<EBackupState>::pack(EBackupState const &val) { return Tuple().append(val); }
template<> inline EBackupState Codec<EBackupState>::unpack(Tuple const &val) { return (EBackupState)val.getInt(0); }
using EBackupState = BackupAgentBase::EnumState;
template <>
inline Tuple Codec<EBackupState>::pack(EBackupState const& val) {
return Tuple().append(static_cast<int>(val));
}
template <>
inline EBackupState Codec<EBackupState>::unpack(Tuple const& val) {
return static_cast<EBackupState>(val.getInt(0));
}
// Key backed tags are a single-key slice of the TagUidMap, defined below.
// The Value type of the key is a UidAndAbortedFlagT which is a pair of {UID, aborted_flag}
@ -810,6 +824,11 @@ public:
return configSpace.pack(LiteralStringRef(__FUNCTION__));
}
// Set to true if only requesting incremental backup without base snapshot.
KeyBackedProperty<bool> incrementalBackupOnly() {
return configSpace.pack(LiteralStringRef(__FUNCTION__));
}
// Latest version for which all prior versions have saved by backup workers.
KeyBackedProperty<Version> latestBackupWorkerSavedVersion() {
return configSpace.pack(LiteralStringRef(__FUNCTION__));
@ -847,17 +866,25 @@ public:
auto workerEnabled = backupWorkerEnabled().get(tr);
auto plogEnabled = partitionedLogEnabled().get(tr);
auto workerVersion = latestBackupWorkerSavedVersion().get(tr);
return map(success(lastLog) && success(firstSnapshot) && success(workerEnabled) && success(plogEnabled) && success(workerVersion), [=](Void) -> Optional<Version> {
// The latest log greater than the oldest snapshot is the restorable version
Optional<Version> logVersion = workerEnabled.get().present() && workerEnabled.get().get() &&
plogEnabled.get().present() && plogEnabled.get().get()
? workerVersion.get()
: lastLog.get();
if (logVersion.present() && firstSnapshot.get().present() && logVersion.get() > firstSnapshot.get().get()) {
return std::max(logVersion.get() - 1, firstSnapshot.get().get());
}
return {};
});
auto incrementalBackup = incrementalBackupOnly().get(tr);
return map(success(lastLog) && success(firstSnapshot) && success(workerEnabled) && success(plogEnabled) &&
success(workerVersion) && success(incrementalBackup),
[=](Void) -> Optional<Version> {
// The latest log greater than the oldest snapshot is the restorable version
Optional<Version> logVersion = workerEnabled.get().present() && workerEnabled.get().get() &&
plogEnabled.get().present() && plogEnabled.get().get()
? workerVersion.get()
: lastLog.get();
if (logVersion.present() && firstSnapshot.get().present() &&
logVersion.get() > firstSnapshot.get().get()) {
return std::max(logVersion.get() - 1, firstSnapshot.get().get());
}
if (logVersion.present() && incrementalBackup.isReady() && incrementalBackup.get().present() &&
incrementalBackup.get().get()) {
return logVersion.get() - 1;
}
return {};
});
}
KeyBackedProperty<std::vector<KeyRange>> backupRanges() {
@ -936,5 +963,7 @@ Value makePadding(int size);
ACTOR Future<Void> transformRestoredDatabase(Database cx, Standalone<VectorRef<KeyRangeRef>> backupRanges,
Key addPrefix, Key removePrefix);
void simulateBlobFailure();
#include "flow/unactorcompiler.h"
#endif

View File

@ -23,6 +23,7 @@
#include "fdbclient/BackupAgent.actor.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/JsonBuilder.h"
#include "flow/Arena.h"
#include "flow/Trace.h"
#include "flow/UnitTest.h"
#include "flow/Hash3.h"
@ -245,7 +246,7 @@ std::string BackupDescription::toJSON() const {
* file written will be after the start version of the snapshot's execution.
*
* Log files are at file paths like
* /plogs/...log,startVersion,endVersion,UID,tagID-of-N,blocksize
* /plogs/.../log,startVersion,endVersion,UID,tagID-of-N,blocksize
* /logs/.../log,startVersion,endVersion,UID,blockSize
* where ... is a multi level path which sorts lexically into version order and results in approximately 1
* unique folder per day containing about 5,000 files. Logs after FDB 6.3 are stored in "plogs"
@ -1343,19 +1344,44 @@ public:
ACTOR static Future<KeyRange> getSnapshotFileKeyRange_impl(Reference<BackupContainerFileSystem> bc,
RangeFile file) {
state Reference<IAsyncFile> inFile = wait(bc->readFile(file.fileName));
state int readFileRetries = 0;
state bool beginKeySet = false;
state Key beginKey;
state Key endKey;
state int64_t j = 0;
for (; j < file.fileSize; j += file.blockSize) {
int64_t len = std::min<int64_t>(file.blockSize, file.fileSize - j);
Standalone<VectorRef<KeyValueRef>> blockData = wait(fileBackup::decodeRangeFileBlock(inFile, j, len));
if (!beginKeySet) {
beginKey = blockData.front().key;
beginKeySet = true;
loop {
try {
state Reference<IAsyncFile> inFile = wait(bc->readFile(file.fileName));
beginKeySet = false;
state int64_t j = 0;
for (; j < file.fileSize; j += file.blockSize) {
int64_t len = std::min<int64_t>(file.blockSize, file.fileSize - j);
Standalone<VectorRef<KeyValueRef>> blockData =
wait(fileBackup::decodeRangeFileBlock(inFile, j, len));
if (!beginKeySet) {
beginKey = blockData.front().key;
beginKeySet = true;
}
endKey = blockData.back().key;
}
break;
} catch (Error& e) {
if (e.code() == error_code_restore_bad_read ||
e.code() == error_code_restore_unsupported_file_version ||
e.code() == error_code_restore_corrupted_data_padding) { // no retriable error
TraceEvent(SevError, "BackupContainerGetSnapshotFileKeyRange").error(e);
throw;
} else if (e.code() == error_code_http_request_failed || e.code() == error_code_connection_failed ||
e.code() == error_code_timed_out || e.code() == error_code_lookup_failed) {
// blob http request failure, retry
TraceEvent(SevWarnAlways, "BackupContainerGetSnapshotFileKeyRangeConnectionFailure")
.detail("Retries", ++readFileRetries)
.error(e);
wait(delayJittered(0.1));
} else {
TraceEvent(SevError, "BackupContainerGetSnapshotFileKeyRangeUnexpectedError").error(e);
throw;
}
}
endKey = blockData.back().key;
}
return KeyRange(KeyRangeRef(beginKey, endKey));
}
@ -1365,24 +1391,88 @@ public:
return getSnapshotFileKeyRange_impl(Reference<BackupContainerFileSystem>::addRef(this), file);
}
ACTOR static Future<Optional<RestorableFileSet>> getRestoreSet_impl(Reference<BackupContainerFileSystem> bc, Version targetVersion) {
// Find the most recent keyrange snapshot to end at or before targetVersion
state Optional<KeyspaceSnapshotFile> snapshot;
std::vector<KeyspaceSnapshotFile> snapshots = wait(bc->listKeyspaceSnapshots());
for(auto const &s : snapshots) {
if(s.endVersion <= targetVersion)
snapshot = s;
static Optional<RestorableFileSet> getRestoreSetFromLogs(std::vector<LogFile> logs, Version targetVersion,
RestorableFileSet restorable) {
Version end = logs.begin()->endVersion;
computeRestoreEndVersion(logs, &restorable.logs, &end, targetVersion);
if (end >= targetVersion) {
restorable.continuousBeginVersion = logs.begin()->beginVersion;
restorable.continuousEndVersion = end;
return Optional<RestorableFileSet>(restorable);
}
return Optional<RestorableFileSet>();
}
ACTOR static Future<Optional<RestorableFileSet>> getRestoreSet_impl(Reference<BackupContainerFileSystem> bc,
Version targetVersion,
VectorRef<KeyRangeRef> keyRangesFilter, bool logsOnly = false,
Version beginVersion = invalidVersion) {
// Does not support use keyRangesFilter for logsOnly yet
if (logsOnly && !keyRangesFilter.empty()) {
TraceEvent(SevError, "BackupContainerRestoreSetUnsupportedAPI").detail("KeyRangesFilter", keyRangesFilter.size());
return Optional<RestorableFileSet>();
}
if(snapshot.present()) {
if (logsOnly) {
state RestorableFileSet restorableSet;
state std::vector<LogFile> logFiles;
Version begin = beginVersion == invalidVersion ? 0 : beginVersion;
wait(store(logFiles, bc->listLogFiles(begin, targetVersion, false)));
// List logs in version order so log continuity can be analyzed
std::sort(logFiles.begin(), logFiles.end());
if (!logFiles.empty()) {
return getRestoreSetFromLogs(logFiles, targetVersion, restorableSet);
}
}
// Find the most recent keyrange snapshot through which we can restore filtered key ranges into targetVersion.
state std::vector<KeyspaceSnapshotFile> snapshots = wait(bc->listKeyspaceSnapshots());
state int i = snapshots.size() - 1;
for (; i >= 0; i--) {
// The smallest version of filtered range files >= snapshot beginVersion > targetVersion
if (targetVersion >= 0 && snapshots[i].beginVersion > targetVersion) {
continue;
}
state RestorableFileSet restorable;
restorable.snapshot = snapshot.get();
restorable.targetVersion = targetVersion;
state Version minKeyRangeVersion = MAX_VERSION;
state Version maxKeyRangeVersion = -1;
std::pair<std::vector<RangeFile>, std::map<std::string, KeyRange>> results =
wait(bc->readKeyspaceSnapshot(snapshot.get()));
restorable.ranges = std::move(results.first);
restorable.keyRanges = std::move(results.second);
wait(bc->readKeyspaceSnapshot(snapshots[i]));
// Old backup does not have metadata about key ranges and can not be filtered with key ranges.
if (keyRangesFilter.size() && results.second.empty() && !results.first.empty()) {
throw backup_not_filterable_with_key_ranges();
}
// Filter by keyRangesFilter.
if (keyRangesFilter.empty()) {
restorable.ranges = std::move(results.first);
restorable.keyRanges = std::move(results.second);
minKeyRangeVersion = snapshots[i].beginVersion;
maxKeyRangeVersion = snapshots[i].endVersion;
} else {
for (const auto& rangeFile : results.first) {
const auto& keyRange = results.second.at(rangeFile.fileName);
if (keyRange.intersects(keyRangesFilter)) {
restorable.ranges.push_back(rangeFile);
restorable.keyRanges[rangeFile.fileName] = keyRange;
minKeyRangeVersion = std::min(minKeyRangeVersion, rangeFile.version);
maxKeyRangeVersion = std::max(maxKeyRangeVersion, rangeFile.version);
}
}
// No range file matches 'keyRangesFilter'.
if (restorable.ranges.empty()) {
throw backup_not_overlapped_with_keys_filter();
}
}
// 'latestVersion' represents using the minimum restorable version in a snapshot.
restorable.targetVersion = targetVersion == latestVersion ? maxKeyRangeVersion : targetVersion;
// Any version < maxKeyRangeVersion is not restorable.
if (restorable.targetVersion < maxKeyRangeVersion) continue;
restorable.snapshot = snapshots[i];
// TODO: Reenable the sanity check after TooManyFiles error is resolved
if (false && g_network->isSimulated()) {
// Sanity check key ranges
@ -1396,18 +1486,21 @@ public:
}
}
// No logs needed if there is a complete key space snapshot at the target version.
if (snapshot.get().beginVersion == snapshot.get().endVersion &&
snapshot.get().endVersion == targetVersion) {
// No logs needed if there is a complete filtered key space snapshot at the target version.
if (minKeyRangeVersion == maxKeyRangeVersion && maxKeyRangeVersion == restorable.targetVersion) {
restorable.continuousBeginVersion = restorable.continuousEndVersion = invalidVersion;
TraceEvent("BackupContainerGetRestorableFilesWithoutLogs")
.detail("KeyRangeVersion", restorable.targetVersion)
.detail("NumberOfRangeFiles", restorable.ranges.size())
.detail("KeyRangesFilter", printable(keyRangesFilter));
return Optional<RestorableFileSet>(restorable);
}
// FIXME: check if there are tagged logs. for each tag, there is no version gap.
state std::vector<LogFile> logs;
state std::vector<LogFile> plogs;
wait(store(logs, bc->listLogFiles(snapshot.get().beginVersion, targetVersion, false)) &&
store(plogs, bc->listLogFiles(snapshot.get().beginVersion, targetVersion, true)));
wait(store(logs, bc->listLogFiles(minKeyRangeVersion, restorable.targetVersion, false)) &&
store(plogs, bc->listLogFiles(minKeyRangeVersion, restorable.targetVersion, true)));
if (plogs.size() > 0) {
logs.swap(plogs);
@ -1419,13 +1512,12 @@ public:
// Remove duplicated log files that can happen for old epochs.
std::vector<LogFile> filtered = filterDuplicates(logs);
restorable.logs.swap(filtered);
// sort by version order again for continuous analysis
std::sort(restorable.logs.begin(), restorable.logs.end());
if (isPartitionedLogsContinuous(restorable.logs, snapshot.get().beginVersion, targetVersion)) {
restorable.continuousBeginVersion = snapshot.get().beginVersion;
restorable.continuousEndVersion = targetVersion + 1; // not inclusive
if (isPartitionedLogsContinuous(restorable.logs, minKeyRangeVersion, restorable.targetVersion)) {
restorable.continuousBeginVersion = minKeyRangeVersion;
restorable.continuousEndVersion = restorable.targetVersion + 1; // not inclusive
return Optional<RestorableFileSet>(restorable);
}
return Optional<RestorableFileSet>();
@ -1433,24 +1525,19 @@ public:
// List logs in version order so log continuity can be analyzed
std::sort(logs.begin(), logs.end());
// If there are logs and the first one starts at or before the snapshot begin version then proceed
if(!logs.empty() && logs.front().beginVersion <= snapshot.get().beginVersion) {
Version end = logs.begin()->endVersion;
computeRestoreEndVersion(logs, &restorable.logs, &end, targetVersion);
if (end >= targetVersion) {
restorable.continuousBeginVersion = logs.begin()->beginVersion;
restorable.continuousEndVersion = end;
return Optional<RestorableFileSet>(restorable);
}
// If there are logs and the first one starts at or before the keyrange's snapshot begin version, then
// it is valid restore set and proceed
if (!logs.empty() && logs.front().beginVersion <= minKeyRangeVersion) {
return getRestoreSetFromLogs(logs, targetVersion, restorable);
}
}
return Optional<RestorableFileSet>();
}
Future<Optional<RestorableFileSet>> getRestoreSet(Version targetVersion) final {
return getRestoreSet_impl(Reference<BackupContainerFileSystem>::addRef(this), targetVersion);
Future<Optional<RestorableFileSet>> getRestoreSet(Version targetVersion, VectorRef<KeyRangeRef> keyRangesFilter,
bool logsOnly, Version beginVersion) final {
return getRestoreSet_impl(Reference<BackupContainerFileSystem>::addRef(this), targetVersion, keyRangesFilter,
logsOnly, beginVersion);
}
private:

View File

@ -280,9 +280,13 @@ public:
virtual Future<BackupFileList> dumpFileList(Version begin = 0, Version end = std::numeric_limits<Version>::max()) = 0;
// Get exactly the files necessary to restore to targetVersion. Returns non-present if
// restore to given version is not possible.
virtual Future<Optional<RestorableFileSet>> getRestoreSet(Version targetVersion) = 0;
// Get exactly the files necessary to restore the key space filtered by the specified key ranges to targetVersion.
// If targetVersion is 'latestVersion', use the minimum restorable version in a snapshot.
// If logsOnly is set, only use log files in [beginVersion, targetVervions) in restore set.
// Returns non-present if restoring to the given version is not possible.
virtual Future<Optional<RestorableFileSet>> getRestoreSet(Version targetVersion,
VectorRef<KeyRangeRef> keyRangesFilter = {},
bool logsOnly = false, Version beginVersion = -1) = 0;
// Get an IBackupContainer based on a container spec string
static Reference<IBackupContainer> openContainer(std::string url);

View File

@ -277,7 +277,7 @@ ACTOR Future<bool> bucketExists_impl(Reference<BlobStoreEndpoint> b, std::string
std::string resource = std::string("/") + bucket;
HTTP::Headers headers;
Reference<HTTP::Response> r = wait(b->doRequest("HEAD", resource, headers, NULL, 0, {200, 404}));
Reference<HTTP::Response> r = wait(b->doRequest("HEAD", resource, headers, nullptr, 0, {200, 404}));
return r->code == 200;
}
@ -291,7 +291,7 @@ ACTOR Future<bool> objectExists_impl(Reference<BlobStoreEndpoint> b, std::string
std::string resource = std::string("/") + bucket + "/" + object;
HTTP::Headers headers;
Reference<HTTP::Response> r = wait(b->doRequest("HEAD", resource, headers, NULL, 0, {200, 404}));
Reference<HTTP::Response> r = wait(b->doRequest("HEAD", resource, headers, nullptr, 0, {200, 404}));
return r->code == 200;
}
@ -305,7 +305,7 @@ ACTOR Future<Void> deleteObject_impl(Reference<BlobStoreEndpoint> b, std::string
std::string resource = std::string("/") + bucket + "/" + object;
HTTP::Headers headers;
// 200 or 204 means object successfully deleted, 404 means it already doesn't exist, so any of those are considered successful
Reference<HTTP::Response> r = wait(b->doRequest("DELETE", resource, headers, NULL, 0, {200, 204, 404}));
Reference<HTTP::Response> r = wait(b->doRequest("DELETE", resource, headers, nullptr, 0, {200, 204, 404}));
// But if the object already did not exist then the 'delete' is assumed to be successful but a warning is logged.
if(r->code == 404) {
@ -386,7 +386,7 @@ ACTOR Future<Void> createBucket_impl(Reference<BlobStoreEndpoint> b, std::string
if(!exists) {
std::string resource = std::string("/") + bucket;
HTTP::Headers headers;
Reference<HTTP::Response> r = wait(b->doRequest("PUT", resource, headers, NULL, 0, {200, 409}));
Reference<HTTP::Response> r = wait(b->doRequest("PUT", resource, headers, nullptr, 0, {200, 409}));
}
return Void();
}
@ -401,7 +401,7 @@ ACTOR Future<int64_t> objectSize_impl(Reference<BlobStoreEndpoint> b, std::strin
std::string resource = std::string("/") + bucket + "/" + object;
HTTP::Headers headers;
Reference<HTTP::Response> r = wait(b->doRequest("HEAD", resource, headers, NULL, 0, {200, 404}));
Reference<HTTP::Response> r = wait(b->doRequest("HEAD", resource, headers, nullptr, 0, {200, 404}));
if(r->code == 404)
throw file_not_found();
return r->contentLen;
@ -737,7 +737,7 @@ ACTOR Future<Void> listObjectsStream_impl(Reference<BlobStoreEndpoint> bstore, s
HTTP::Headers headers;
state std::string fullResource = resource + HTTP::urlEncode(lastFile);
lastFile.clear();
Reference<HTTP::Response> r = wait(bstore->doRequest("GET", fullResource, headers, NULL, 0, {200}));
Reference<HTTP::Response> r = wait(bstore->doRequest("GET", fullResource, headers, nullptr, 0, {200}));
listReleaser.release();
try {
@ -782,7 +782,7 @@ ACTOR Future<Void> listObjectsStream_impl(Reference<BlobStoreEndpoint> bstore, s
if(size == nullptr) {
throw http_bad_response();
}
object.size = strtoull(size->value(), NULL, 10);
object.size = strtoull(size->value(), nullptr, 10);
listResult.objects.push_back(object);
}
@ -893,7 +893,7 @@ ACTOR Future<std::vector<std::string>> listBuckets_impl(Reference<BlobStoreEndpo
HTTP::Headers headers;
state std::string fullResource = resource + HTTP::urlEncode(lastName);
Reference<HTTP::Response> r = wait(bstore->doRequest("GET", fullResource, headers, NULL, 0, {200}));
Reference<HTTP::Response> r = wait(bstore->doRequest("GET", fullResource, headers, nullptr, 0, {200}));
listReleaser.release();
try {
@ -1024,7 +1024,7 @@ ACTOR Future<std::string> readEntireFile_impl(Reference<BlobStoreEndpoint> bstor
std::string resource = std::string("/") + bucket + "/" + object;
HTTP::Headers headers;
Reference<HTTP::Response> r = wait(bstore->doRequest("GET", resource, headers, NULL, 0, {200, 404}));
Reference<HTTP::Response> r = wait(bstore->doRequest("GET", resource, headers, nullptr, 0, {200, 404}));
if(r->code == 404)
throw file_not_found();
return r->content;
@ -1057,7 +1057,7 @@ ACTOR Future<Void> writeEntireFileFromBuffer_impl(Reference<BlobStoreEndpoint> b
ACTOR Future<Void> writeEntireFile_impl(Reference<BlobStoreEndpoint> bstore, std::string bucket, std::string object, std::string content) {
state UnsentPacketQueue packets;
PacketWriter pw(packets.getWriteBuffer(content.size()), NULL, Unversioned());
PacketWriter pw(packets.getWriteBuffer(content.size()), nullptr, Unversioned());
pw.serializeBytes(content);
if(content.size() > bstore->knobs.multipart_max_part_size)
throw file_too_large();
@ -1095,7 +1095,7 @@ ACTOR Future<int> readObject_impl(Reference<BlobStoreEndpoint> bstore, std::stri
std::string resource = std::string("/") + bucket + "/" + object;
HTTP::Headers headers;
headers["Range"] = format("bytes=%lld-%lld", offset, offset + length - 1);
Reference<HTTP::Response> r = wait(bstore->doRequest("GET", resource, headers, NULL, 0, {200, 206, 404}));
Reference<HTTP::Response> r = wait(bstore->doRequest("GET", resource, headers, nullptr, 0, {200, 206, 404}));
if(r->code == 404)
throw file_not_found();
if(r->contentLen != r->content.size()) // Double check that this wasn't a header-only response, probably unnecessary
@ -1114,7 +1114,7 @@ ACTOR static Future<std::string> beginMultiPartUpload_impl(Reference<BlobStoreEn
std::string resource = std::string("/") + bucket + "/" + object + "?uploads";
HTTP::Headers headers;
Reference<HTTP::Response> r = wait(bstore->doRequest("POST", resource, headers, NULL, 0, {200}));
Reference<HTTP::Response> r = wait(bstore->doRequest("POST", resource, headers, nullptr, 0, {200}));
try {
xml_document<> doc;
@ -1180,7 +1180,7 @@ ACTOR Future<Void> finishMultiPartUpload_impl(Reference<BlobStoreEndpoint> bstor
std::string resource = format("/%s/%s?uploadId=%s", bucket.c_str(), object.c_str(), uploadID.c_str());
HTTP::Headers headers;
PacketWriter pw(part_list.getWriteBuffer(manifest.size()), NULL, Unversioned());
PacketWriter pw(part_list.getWriteBuffer(manifest.size()), nullptr, Unversioned());
pw.serializeBytes(manifest);
Reference<HTTP::Response> r = wait(bstore->doRequest("POST", resource, headers, &part_list, manifest.size(), {200}));
// TODO: In the event that the client times out just before the request completes (so the client is unaware) then the next retry

View File

@ -33,7 +33,7 @@ set(FDBCLIENT_SRCS
Knobs.h
ManagementAPI.actor.cpp
ManagementAPI.actor.h
MasterProxyInterface.h
CommitProxyInterface.h
MetricLogger.actor.cpp
MetricLogger.h
MonitorLeader.actor.cpp

View File

@ -25,7 +25,7 @@
#include "fdbclient/FDBTypes.h"
#include "fdbrpc/FailureMonitor.h"
#include "fdbclient/Status.h"
#include "fdbclient/MasterProxyInterface.h"
#include "fdbclient/CommitProxyInterface.h"
// Streams from WorkerInterface that are safe and useful to call from a client.
// A ClientWorkerInterface is embedded as the first element of a WorkerInterface.

View File

@ -25,7 +25,7 @@
#include "fdbclient/FDBTypes.h"
#include "fdbrpc/FailureMonitor.h"
#include "fdbclient/Status.h"
#include "fdbclient/MasterProxyInterface.h"
#include "fdbclient/CommitProxyInterface.h"
#include "fdbclient/ClientWorkerInterface.h"
struct ClusterInterface {

View File

@ -1,6 +1,6 @@
/*
* MasterProxyInterface.h
* CommitProxyInterface.h
*
* This source file is part of the FoundationDB open source project
*
@ -19,8 +19,8 @@
* limitations under the License.
*/
#ifndef FDBCLIENT_MASTERPROXYINTERFACE_H
#define FDBCLIENT_MASTERPROXYINTERFACE_H
#ifndef FDBCLIENT_COMMITPROXYINTERFACE_H
#define FDBCLIENT_COMMITPROXYINTERFACE_H
#pragma once
#include <utility>
@ -36,7 +36,7 @@
#include "fdbrpc/TimedRequest.h"
#include "GrvProxyInterface.h"
struct MasterProxyInterface {
struct CommitProxyInterface {
constexpr static FileIdentifier file_identifier = 8954922;
enum { LocationAwareLoadBalance = 1 };
enum { AlwaysFresh = 1 };
@ -59,8 +59,8 @@ struct MasterProxyInterface {
UID id() const { return commit.getEndpoint().token; }
std::string toString() const { return id().shortString(); }
bool operator == (MasterProxyInterface const& r) const { return id() == r.id(); }
bool operator != (MasterProxyInterface const& r) const { return id() != r.id(); }
bool operator==(CommitProxyInterface const& r) const { return id() == r.id(); }
bool operator!=(CommitProxyInterface const& r) const { return id() != r.id(); }
NetworkAddress address() const { return commit.getEndpoint().getPrimaryAddress(); }
template <class Archive>
@ -100,9 +100,10 @@ struct MasterProxyInterface {
struct ClientDBInfo {
constexpr static FileIdentifier file_identifier = 5355080;
UID id; // Changes each time anything else changes
vector< GrvProxyInterface > grvProxies;
vector< MasterProxyInterface > masterProxies;
Optional<MasterProxyInterface> firstProxy; //not serialized, used for commitOnFirstProxy when the proxies vector has been shrunk
vector<GrvProxyInterface> grvProxies;
vector<CommitProxyInterface> commitProxies;
Optional<CommitProxyInterface>
firstCommitProxy; // not serialized, used for commitOnFirstProxy when the commit proxies vector has been shrunk
double clientTxnInfoSampleRate;
int64_t clientTxnInfoSizeLimit;
Optional<Value> forward;
@ -122,7 +123,7 @@ struct ClientDBInfo {
if constexpr (!is_fb_function<Archive>) {
ASSERT(ar.protocolVersion().isValid());
}
serializer(ar, grvProxies, masterProxies, id, clientTxnInfoSampleRate, clientTxnInfoSizeLimit, forward,
serializer(ar, grvProxies, commitProxies, id, clientTxnInfoSampleRate, clientTxnInfoSizeLimit, forward,
transactionTagSampleRate, transactionTagSampleCost);
}
};

View File

@ -25,7 +25,7 @@
#include "fdbclient/FDBTypes.h"
#include "fdbrpc/fdbrpc.h"
#include "fdbrpc/Locality.h"
#include "fdbclient/MasterProxyInterface.h"
#include "fdbclient/CommitProxyInterface.h"
#include "fdbclient/ClusterInterface.h"
const int MAX_CLUSTER_FILE_BYTES = 60000;

View File

@ -965,9 +965,10 @@ namespace dbBackup {
tr->clear(KeyRangeRef(logsPath, strinc(logsPath)));
tr->clear(conf.range());
tr->set(states.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_COMPLETED)));
tr->set(states.pack(DatabaseBackupAgent::keyStateStatus),
StringRef(BackupAgentBase::getStateText(EBackupState::STATE_COMPLETED)));
wait(taskBucket->finish(tr, task));
wait(taskBucket->finish(tr, task));
return Void();
}
@ -1449,9 +1450,10 @@ namespace dbBackup {
try {
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
tr.addReadConflictRange(singleKeyRange(sourceStates.pack(DatabaseBackupAgent::keyStateStatus)));
tr.set(sourceStates.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_RUNNING_DIFFERENTIAL)));
tr.set(sourceStates.pack(DatabaseBackupAgent::keyStateStatus),
StringRef(BackupAgentBase::getStateText(EBackupState::STATE_RUNNING_DIFFERENTIAL)));
Key versionKey = task->params[DatabaseBackupAgent::keyConfigLogUid].withPrefix(task->params[BackupAgentBase::destUid]).withPrefix(backupLatestVersionsPrefix);
Key versionKey = task->params[DatabaseBackupAgent::keyConfigLogUid].withPrefix(task->params[BackupAgentBase::destUid]).withPrefix(backupLatestVersionsPrefix);
Optional<Key> prevBeginVersion = wait(tr.get(versionKey));
if (!prevBeginVersion.present()) {
return Void();
@ -1489,9 +1491,10 @@ namespace dbBackup {
wait(success(FinishedFullBackupTaskFunc::addTask(tr, taskBucket, task, TaskCompletionKey::noSignal())));
}
else { // Start the writing of logs, if differential
tr->set(states.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_RUNNING_DIFFERENTIAL)));
tr->set(states.pack(DatabaseBackupAgent::keyStateStatus),
StringRef(BackupAgentBase::getStateText(EBackupState::STATE_RUNNING_DIFFERENTIAL)));
allPartsDone = futureBucket->future(tr);
allPartsDone = futureBucket->future(tr);
Version prevBeginVersion = BinaryReader::fromStringRef<Version>(task->params[DatabaseBackupAgent::keyPrevBeginVersion], Unversioned());
wait(success(CopyDiffLogsTaskFunc::addTask(tr, taskBucket, task, prevBeginVersion, restoreVersion, TaskCompletionKey::joinWith(allPartsDone))));
@ -1623,9 +1626,10 @@ namespace dbBackup {
srcTr2->set( Subspace(databaseBackupPrefixRange.begin).get(BackupAgentBase::keySourceTagName).pack(task->params[BackupAgentBase::keyTagName]), logUidValue );
srcTr2->set( sourceStates.pack(DatabaseBackupAgent::keyFolderId), task->params[DatabaseBackupAgent::keyFolderId] );
srcTr2->set( sourceStates.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_RUNNING)));
srcTr2->set(sourceStates.pack(DatabaseBackupAgent::keyStateStatus),
StringRef(BackupAgentBase::getStateText(EBackupState::STATE_RUNNING)));
state Key destPath = destUidValue.withPrefix(backupLogKeys.begin);
state Key destPath = destUidValue.withPrefix(backupLogKeys.begin);
// Start logging the mutations for the specified ranges of the tag
for (auto &backupRange : backupRanges) {
srcTr2->set(logRangesEncodeKey(backupRange.begin, BinaryReader::fromStringRef<UID>(destUidValue, Unversioned())), logRangesEncodeValue(backupRange.end, destPath));
@ -1666,9 +1670,10 @@ namespace dbBackup {
tr->set(logUidValue.withPrefix(applyMutationsBeginRange.begin), BinaryWriter::toValue(beginVersion, Unversioned()));
tr->set(logUidValue.withPrefix(applyMutationsEndRange.begin), BinaryWriter::toValue(beginVersion, Unversioned()));
tr->set(states.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_RUNNING)));
tr->set(states.pack(DatabaseBackupAgent::keyStateStatus),
StringRef(BackupAgentBase::getStateText(EBackupState::STATE_RUNNING)));
state Reference<TaskFuture> kvBackupRangeComplete = futureBucket->future(tr);
state Reference<TaskFuture> kvBackupRangeComplete = futureBucket->future(tr);
state Reference<TaskFuture> kvBackupComplete = futureBucket->future(tr);
state int rangeCount = 0;
@ -1851,7 +1856,8 @@ public:
}
// This method will return the final status of the backup
ACTOR static Future<int> waitBackup(DatabaseBackupAgent* backupAgent, Database cx, Key tagName, bool stopWhenDone) {
ACTOR static Future<EBackupState> waitBackup(DatabaseBackupAgent* backupAgent, Database cx, Key tagName,
bool stopWhenDone) {
state std::string backTrace;
state UID logUid = wait(backupAgent->getLogUid(cx, tagName));
state Key statusKey = backupAgent->states.get(BinaryWriter::toValue(logUid, Unversioned())).pack(DatabaseBackupAgent::keyStateStatus);
@ -1862,15 +1868,15 @@ public:
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
try {
state int status = wait(backupAgent->getStateValue(tr, logUid));
state EBackupState status = wait(backupAgent->getStateValue(tr, logUid));
// Break, if no longer runnable
if (!DatabaseBackupAgent::isRunnable((BackupAgentBase::enumState)status) || BackupAgentBase::STATE_PARTIALLY_ABORTED == status) {
if (!DatabaseBackupAgent::isRunnable(status) || EBackupState::STATE_PARTIALLY_ABORTED == status) {
return status;
}
// Break, if in differential mode (restorable) and stopWhenDone is not enabled
if ((!stopWhenDone) && (BackupAgentBase::STATE_RUNNING_DIFFERENTIAL == status)) {
if ((!stopWhenDone) && (EBackupState::STATE_RUNNING_DIFFERENTIAL == status)) {
return status;
}
@ -1885,7 +1891,7 @@ public:
}
// This method will return the final status of the backup
ACTOR static Future<int> waitSubmitted(DatabaseBackupAgent* backupAgent, Database cx, Key tagName) {
ACTOR static Future<EBackupState> waitSubmitted(DatabaseBackupAgent* backupAgent, Database cx, Key tagName) {
state UID logUid = wait(backupAgent->getLogUid(cx, tagName));
state Key statusKey = backupAgent->states.get(BinaryWriter::toValue(logUid, Unversioned())).pack(DatabaseBackupAgent::keyStateStatus);
@ -1895,10 +1901,10 @@ public:
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
try {
state int status = wait(backupAgent->getStateValue(tr, logUid));
state EBackupState status = wait(backupAgent->getStateValue(tr, logUid));
// Break, if no longer runnable
if( BackupAgentBase::STATE_SUBMITTED != status) {
if (EBackupState::STATE_SUBMITTED != status) {
return status;
}
@ -1924,9 +1930,9 @@ public:
tr->setOption(FDBTransactionOptions::COMMIT_ON_FIRST_PROXY);
// We will use the global status for now to ensure that multiple backups do not start place with different tags
state int status = wait(backupAgent->getStateValue(tr, logUidCurrent));
state EBackupState status = wait(backupAgent->getStateValue(tr, logUidCurrent));
if (DatabaseBackupAgent::isRunnable((BackupAgentBase::enumState)status)) {
if (DatabaseBackupAgent::isRunnable(status)) {
throw backup_duplicate();
}
@ -1987,7 +1993,8 @@ public:
tr->set(backupAgent->config.get(logUidValue).pack(DatabaseBackupAgent::keyFolderId), backupUid);
tr->set(backupAgent->states.get(logUidValue).pack(DatabaseBackupAgent::keyFolderId), backupUid); // written to config and states because it's also used by abort
tr->set(backupAgent->config.get(logUidValue).pack(DatabaseBackupAgent::keyConfigBackupRanges), BinaryWriter::toValue(backupRanges, IncludeVersion(ProtocolVersion::withDRBackupRanges())));
tr->set(backupAgent->states.get(logUidValue).pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_SUBMITTED)));
tr->set(backupAgent->states.get(logUidValue).pack(DatabaseBackupAgent::keyStateStatus),
StringRef(BackupAgentBase::getStateText(EBackupState::STATE_SUBMITTED)));
if (stopWhenDone) {
tr->set(backupAgent->config.get(logUidValue).pack(DatabaseBackupAgent::keyConfigStopWhenDoneKey), StringRef());
}
@ -2033,10 +2040,10 @@ public:
ACTOR static Future<Void> atomicSwitchover(DatabaseBackupAgent* backupAgent, Database dest, Key tagName, Standalone<VectorRef<KeyRangeRef>> backupRanges, Key addPrefix, Key removePrefix, bool forceAction) {
state DatabaseBackupAgent drAgent(dest);
state UID destlogUid = wait(backupAgent->getLogUid(dest, tagName));
state int status = wait(backupAgent->getStateValue(dest, destlogUid));
state EBackupState status = wait(backupAgent->getStateValue(dest, destlogUid));
TraceEvent("DBA_SwitchoverStart").detail("Status", status);
if (status != BackupAgentBase::STATE_RUNNING_DIFFERENTIAL && status != BackupAgentBase::STATE_COMPLETED) {
if (status != EBackupState::STATE_RUNNING_DIFFERENTIAL && status != EBackupState::STATE_COMPLETED) {
throw backup_duplicate();
}
@ -2153,10 +2160,10 @@ public:
ACTOR static Future<Void> discontinueBackup(DatabaseBackupAgent* backupAgent, Reference<ReadYourWritesTransaction> tr, Key tagName) {
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
state UID logUid = wait(backupAgent->getLogUid(tr, tagName));
state int status = wait(backupAgent->getStateValue(tr, logUid));
state EBackupState status = wait(backupAgent->getStateValue(tr, logUid));
TraceEvent("DBA_Discontinue").detail("Status", status);
if (!DatabaseBackupAgent::isRunnable((BackupAgentBase::enumState)status)) {
if (!DatabaseBackupAgent::isRunnable(status)) {
throw backup_unneeded();
}
@ -2189,7 +2196,7 @@ public:
logUid = _logUid;
logUidValue = BinaryWriter::toValue(logUid, Unversioned());
state Future<int> statusFuture= backupAgent->getStateValue(tr, logUid);
state Future<EBackupState> statusFuture = backupAgent->getStateValue(tr, logUid);
state Future<UID> destUidFuture = backupAgent->getDestUid(tr, logUid);
wait(success(statusFuture) && success(destUidFuture));
@ -2197,8 +2204,8 @@ public:
if (destUid.isValid()) {
destUidValue = BinaryWriter::toValue(destUid, Unversioned());
}
int status = statusFuture.get();
if (!backupAgent->isRunnable((BackupAgentBase::enumState)status)) {
EBackupState status = statusFuture.get();
if (!backupAgent->isRunnable(status)) {
throw backup_unneeded();
}
@ -2213,7 +2220,8 @@ public:
tr->clear(prefixRange(logUidValue.withPrefix(applyLogKeys.begin)));
tr->set(StringRef(backupAgent->states.get(logUidValue).pack(DatabaseBackupAgent::keyStateStatus)), StringRef(DatabaseBackupAgent::getStateText(BackupAgentBase::STATE_PARTIALLY_ABORTED)));
tr->set(StringRef(backupAgent->states.get(logUidValue).pack(DatabaseBackupAgent::keyStateStatus)),
StringRef(DatabaseBackupAgent::getStateText(EBackupState::STATE_PARTIALLY_ABORTED)));
wait(tr->commit());
TraceEvent("DBA_Abort").detail("CommitVersion", tr->getCommittedVersion());
@ -2286,7 +2294,8 @@ public:
}
if (abortOldBackup) {
srcTr->set( backupAgent->sourceStates.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_ABORTED) ));
srcTr->set(backupAgent->sourceStates.pack(DatabaseBackupAgent::keyStateStatus),
StringRef(BackupAgentBase::getStateText(EBackupState::STATE_ABORTED)));
srcTr->set( backupAgent->sourceStates.get(logUidValue).pack(DatabaseBackupAgent::keyFolderId), backupUid );
srcTr->clear(prefixRange(logUidValue.withPrefix(backupLogKeys.begin)));
srcTr->clear(prefixRange(logUidValue.withPrefix(logRangesRange.begin)));
@ -2307,7 +2316,8 @@ public:
break;
}
srcTr->set( backupAgent->sourceStates.pack(DatabaseBackupAgent::keyStateStatus), StringRef(DatabaseBackupAgent::getStateText(BackupAgentBase::STATE_PARTIALLY_ABORTED) ));
srcTr->set(backupAgent->sourceStates.pack(DatabaseBackupAgent::keyStateStatus),
StringRef(DatabaseBackupAgent::getStateText(EBackupState::STATE_PARTIALLY_ABORTED)));
srcTr->set( backupAgent->sourceStates.get(logUidValue).pack(DatabaseBackupAgent::keyFolderId), backupUid );
wait( eraseLogData(srcTr, logUidValue, destUidValue) || partialTimeout );
@ -2341,7 +2351,8 @@ public:
return Void();
}
tr->set(StringRef(backupAgent->states.get(logUidValue).pack(DatabaseBackupAgent::keyStateStatus)), StringRef(DatabaseBackupAgent::getStateText(BackupAgentBase::STATE_ABORTED)));
tr->set(StringRef(backupAgent->states.get(logUidValue).pack(DatabaseBackupAgent::keyStateStatus)),
StringRef(DatabaseBackupAgent::getStateText(EBackupState::STATE_ABORTED)));
wait(tr->commit());
@ -2382,13 +2393,11 @@ public:
state Future<Optional<Value>> fStopVersionKey = tr->get(backupAgent->states.get(BinaryWriter::toValue(logUid, Unversioned())).pack(BackupAgentBase::keyStateStop));
state Future<Optional<Key>> fBackupKeysPacked = tr->get(backupAgent->config.get(BinaryWriter::toValue(logUid, Unversioned())).pack(BackupAgentBase::keyConfigBackupRanges));
int backupStateInt = wait(backupAgent->getStateValue(tr, logUid));
state BackupAgentBase::enumState backupState = (BackupAgentBase::enumState)backupStateInt;
if (backupState == DatabaseBackupAgent::STATE_NEVERRAN) {
state EBackupState backupState = wait(backupAgent->getStateValue(tr, logUid));
if (backupState == EBackupState::STATE_NEVERRAN) {
statusText += "No previous backups found.\n";
}
else {
} else {
state std::string tagNameDisplay;
Optional<Key> tagName = wait(fTagName);
@ -2408,23 +2417,20 @@ public:
}
switch (backupState) {
case BackupAgentBase::STATE_SUBMITTED:
case EBackupState::STATE_SUBMITTED:
statusText += "The DR on tag `" + tagNameDisplay + "' is NOT a complete copy of the primary database (just started).\n";
break;
case BackupAgentBase::STATE_RUNNING:
case EBackupState::STATE_RUNNING:
statusText += "The DR on tag `" + tagNameDisplay + "' is NOT a complete copy of the primary database.\n";
break;
case BackupAgentBase::STATE_RUNNING_DIFFERENTIAL:
case EBackupState::STATE_RUNNING_DIFFERENTIAL:
statusText += "The DR on tag `" + tagNameDisplay + "' is a complete copy of the primary database.\n";
break;
case BackupAgentBase::STATE_COMPLETED:
{
case EBackupState::STATE_COMPLETED: {
Version stopVersion = stopVersionKey.present() ? BinaryReader::fromStringRef<Version>(stopVersionKey.get(), Unversioned()) : -1;
statusText += "The previous DR on tag `" + tagNameDisplay + "' completed at version " + format("%lld", stopVersion) + ".\n";
}
break;
case BackupAgentBase::STATE_PARTIALLY_ABORTED:
{
} break;
case EBackupState::STATE_PARTIALLY_ABORTED: {
statusText += "The previous DR on tag `" + tagNameDisplay + "' " + BackupAgentBase::getStateText(backupState) + ".\n";
statusText += "Abort the DR with --cleanup before starting a new DR.\n";
break;
@ -2485,13 +2491,15 @@ public:
return statusText;
}
ACTOR static Future<int> getStateValue(DatabaseBackupAgent* backupAgent, Reference<ReadYourWritesTransaction> tr, UID logUid, bool snapshot) {
ACTOR static Future<EBackupState> getStateValue(DatabaseBackupAgent* backupAgent,
Reference<ReadYourWritesTransaction> tr, UID logUid,
bool snapshot) {
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
state Key statusKey = backupAgent->states.get(BinaryWriter::toValue(logUid, Unversioned())).pack(DatabaseBackupAgent::keyStateStatus);
Optional<Value> status = wait(tr->get(statusKey, snapshot));
return (!status.present()) ? DatabaseBackupAgent::STATE_NEVERRAN : BackupAgentBase::getState(status.get().toString());
return (!status.present()) ? EBackupState::STATE_NEVERRAN : BackupAgentBase::getState(status.get().toString());
}
ACTOR static Future<UID> getDestUid(DatabaseBackupAgent* backupAgent, Reference<ReadYourWritesTransaction> tr, UID logUid, bool snapshot) {
@ -2536,7 +2544,8 @@ Future<std::string> DatabaseBackupAgent::getStatus(Database cx, int errorLimit,
return DatabaseBackupAgentImpl::getStatus(this, cx, errorLimit, tagName);
}
Future<int> DatabaseBackupAgent::getStateValue(Reference<ReadYourWritesTransaction> tr, UID logUid, bool snapshot) {
Future<EBackupState> DatabaseBackupAgent::getStateValue(Reference<ReadYourWritesTransaction> tr, UID logUid,
bool snapshot) {
return DatabaseBackupAgentImpl::getStateValue(this, tr, logUid, snapshot);
}
@ -2552,11 +2561,11 @@ Future<Void> DatabaseBackupAgent::waitUpgradeToLatestDrVersion(Database cx, Key
return DatabaseBackupAgentImpl::waitUpgradeToLatestDrVersion(this, cx, tagName);
}
Future<int> DatabaseBackupAgent::waitBackup(Database cx, Key tagName, bool stopWhenDone) {
Future<EBackupState> DatabaseBackupAgent::waitBackup(Database cx, Key tagName, bool stopWhenDone) {
return DatabaseBackupAgentImpl::waitBackup(this, cx, tagName, stopWhenDone);
}
Future<int> DatabaseBackupAgent::waitSubmitted(Database cx, Key tagName) {
Future<EBackupState> DatabaseBackupAgent::waitSubmitted(Database cx, Key tagName) {
return DatabaseBackupAgentImpl::waitSubmitted(this, cx, tagName);
}

View File

@ -29,12 +29,12 @@ DatabaseConfiguration::DatabaseConfiguration()
void DatabaseConfiguration::resetInternal() {
// does NOT reset rawConfiguration
initialized = false;
proxyCount = grvProxyCount = resolverCount = desiredTLogCount = tLogWriteAntiQuorum = tLogReplicationFactor =
commitProxyCount = grvProxyCount = resolverCount = desiredTLogCount = tLogWriteAntiQuorum = tLogReplicationFactor =
storageTeamSize = desiredLogRouterCount = -1;
tLogVersion = TLogVersion::DEFAULT;
tLogDataStoreType = storageServerStoreType = KeyValueStoreType::END;
tLogSpillType = TLogSpillType::DEFAULT;
autoProxyCount = CLIENT_KNOBS->DEFAULT_AUTO_PROXIES;
autoCommitProxyCount = CLIENT_KNOBS->DEFAULT_AUTO_COMMIT_PROXIES;
autoGrvProxyCount = CLIENT_KNOBS->DEFAULT_AUTO_GRV_PROXIES;
autoResolverCount = CLIENT_KNOBS->DEFAULT_AUTO_RESOLVERS;
autoDesiredTLogCount = CLIENT_KNOBS->DEFAULT_AUTO_LOGS;
@ -165,40 +165,39 @@ void DatabaseConfiguration::setDefaultReplicationPolicy() {
bool DatabaseConfiguration::isValid() const {
if( !(initialized &&
tLogWriteAntiQuorum >= 0 &&
tLogWriteAntiQuorum <= tLogReplicationFactor/2 &&
tLogReplicationFactor >= 1 &&
storageTeamSize >= 1 &&
getDesiredProxies() >= 1 &&
getDesiredGrvProxies() >= 1 &&
getDesiredLogs() >= 1 &&
getDesiredResolvers() >= 1 &&
tLogVersion != TLogVersion::UNSET &&
tLogVersion >= TLogVersion::MIN_RECRUITABLE &&
tLogVersion <= TLogVersion::MAX_SUPPORTED &&
tLogDataStoreType != KeyValueStoreType::END &&
tLogSpillType != TLogSpillType::UNSET &&
!(tLogSpillType == TLogSpillType::REFERENCE && tLogVersion < TLogVersion::V3) &&
storageServerStoreType != KeyValueStoreType::END &&
autoProxyCount >= 1 &&
autoGrvProxyCount >= 1 &&
autoResolverCount >= 1 &&
autoDesiredTLogCount >= 1 &&
storagePolicy &&
tLogPolicy &&
getDesiredRemoteLogs() >= 1 &&
remoteTLogReplicationFactor >= 0 &&
repopulateRegionAntiQuorum >= 0 &&
repopulateRegionAntiQuorum <= 1 &&
usableRegions >= 1 &&
usableRegions <= 2 &&
regions.size() <= 2 &&
( usableRegions == 1 || regions.size() == 2 ) &&
( regions.size() == 0 || regions[0].priority >= 0 ) &&
( regions.size() == 0 || tLogPolicy->info() != "dcid^2 x zoneid^2 x 1") ) ) { //We cannot specify regions with three_datacenter replication
tLogWriteAntiQuorum >= 0 &&
tLogWriteAntiQuorum <= tLogReplicationFactor/2 &&
tLogReplicationFactor >= 1 &&
storageTeamSize >= 1 &&
getDesiredCommitProxies() >= 1 &&
getDesiredGrvProxies() >= 1 &&
getDesiredLogs() >= 1 &&
getDesiredResolvers() >= 1 &&
tLogVersion != TLogVersion::UNSET &&
tLogVersion >= TLogVersion::MIN_RECRUITABLE &&
tLogVersion <= TLogVersion::MAX_SUPPORTED &&
tLogDataStoreType != KeyValueStoreType::END &&
tLogSpillType != TLogSpillType::UNSET &&
!(tLogSpillType == TLogSpillType::REFERENCE && tLogVersion < TLogVersion::V3) &&
storageServerStoreType != KeyValueStoreType::END &&
autoCommitProxyCount >= 1 &&
autoGrvProxyCount >= 1 &&
autoResolverCount >= 1 &&
autoDesiredTLogCount >= 1 &&
storagePolicy &&
tLogPolicy &&
getDesiredRemoteLogs() >= 1 &&
remoteTLogReplicationFactor >= 0 &&
repopulateRegionAntiQuorum >= 0 &&
repopulateRegionAntiQuorum <= 1 &&
usableRegions >= 1 &&
usableRegions <= 2 &&
regions.size() <= 2 &&
( usableRegions == 1 || regions.size() == 2 ) &&
( regions.size() == 0 || regions[0].priority >= 0 ) &&
( regions.size() == 0 || tLogPolicy->info() != "dcid^2 x zoneid^2 x 1") ) ) { //We cannot specify regions with three_datacenter replication
return false;
}
std::set<Key> dcIds;
dcIds.insert(Key());
for(auto& r : regions) {
@ -318,11 +317,11 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const {
if (desiredTLogCount != -1 || isOverridden("logs")) {
result["logs"] = desiredTLogCount;
}
if (proxyCount != -1 || isOverridden("proxies")) {
result["proxies"] = proxyCount;
if (commitProxyCount != -1 || isOverridden("commit_proxies")) {
result["commit_proxies"] = commitProxyCount;
}
if (grvProxyCount != -1 || isOverridden("grv_proxies")) {
result["grv_proxies"] = proxyCount;
result["grv_proxies"] = grvProxyCount;
}
if (resolverCount != -1 || isOverridden("resolvers")) {
result["resolvers"] = resolverCount;
@ -336,8 +335,8 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const {
if (repopulateRegionAntiQuorum != 0 || isOverridden("repopulate_anti_quorum")) {
result["repopulate_anti_quorum"] = repopulateRegionAntiQuorum;
}
if (autoProxyCount != CLIENT_KNOBS->DEFAULT_AUTO_PROXIES || isOverridden("auto_proxies")) {
result["auto_proxies"] = autoProxyCount;
if (autoCommitProxyCount != CLIENT_KNOBS->DEFAULT_AUTO_COMMIT_PROXIES || isOverridden("auto_commit_proxies")) {
result["auto_commit_proxies"] = autoCommitProxyCount;
}
if (autoGrvProxyCount != CLIENT_KNOBS->DEFAULT_AUTO_GRV_PROXIES || isOverridden("auto_grv_proxies")) {
result["auto_grv_proxies"] = autoGrvProxyCount;
@ -419,8 +418,8 @@ bool DatabaseConfiguration::setInternal(KeyRef key, ValueRef value) {
if (ck == LiteralStringRef("initialized")) {
initialized = true;
} else if (ck == LiteralStringRef("proxies")) {
parse(&proxyCount, value);
} else if (ck == LiteralStringRef("commit_proxies")) {
parse(&commitProxyCount, value);
} else if (ck == LiteralStringRef("grv_proxies")) {
parse(&grvProxyCount, value);
} else if (ck == LiteralStringRef("resolvers")) {
@ -459,8 +458,8 @@ bool DatabaseConfiguration::setInternal(KeyRef key, ValueRef value) {
} else if (ck == LiteralStringRef("storage_engine")) {
parse((&type), value);
storageServerStoreType = (KeyValueStoreType::StoreType)type;
} else if (ck == LiteralStringRef("auto_proxies")) {
parse(&autoProxyCount, value);
} else if (ck == LiteralStringRef("auto_commit_proxies")) {
parse(&autoCommitProxyCount, value);
} else if (ck == LiteralStringRef("auto_grv_proxies")) {
parse(&autoGrvProxyCount, value);
} else if (ck == LiteralStringRef("auto_resolvers")) {

View File

@ -133,15 +133,19 @@ struct DatabaseConfiguration {
}
//Killing an entire datacenter counts as killing one zone in modes that support it
int32_t maxZoneFailuresTolerated() const {
int32_t maxZoneFailuresTolerated(int fullyReplicatedRegions, bool forAvailability) const {
int worstSatellite = regions.size() ? std::numeric_limits<int>::max() : 0;
int regionsWithNonNegativePriority = 0;
for(auto& r : regions) {
if(r.priority >= 0) {
regionsWithNonNegativePriority++;
}
worstSatellite = std::min(worstSatellite, r.satelliteTLogReplicationFactor - r.satelliteTLogWriteAntiQuorum);
if(r.satelliteTLogUsableDcsFallback > 0) {
worstSatellite = std::min(worstSatellite, r.satelliteTLogReplicationFactorFallback - r.satelliteTLogWriteAntiQuorumFallback);
}
}
if(usableRegions > 1 && worstSatellite > 0) {
if(usableRegions > 1 && fullyReplicatedRegions > 1 && worstSatellite > 0 && (!forAvailability || regionsWithNonNegativePriority > 1)) {
return 1 + std::min(std::max(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, worstSatellite - 1), storageTeamSize - 1);
} else if(worstSatellite > 0) {
return std::min(tLogReplicationFactor + worstSatellite - 2 - tLogWriteAntiQuorum, storageTeamSize - 1);
@ -149,9 +153,9 @@ struct DatabaseConfiguration {
return std::min(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, storageTeamSize - 1);
}
// Proxy Servers
int32_t proxyCount;
int32_t autoProxyCount;
// CommitProxy Servers
int32_t commitProxyCount;
int32_t autoCommitProxyCount;
int32_t grvProxyCount;
int32_t autoGrvProxyCount;
@ -192,7 +196,10 @@ struct DatabaseConfiguration {
bool isExcludedServer( NetworkAddressList ) const;
std::set<AddressExclusion> getExcludedServers() const;
int32_t getDesiredProxies() const { if(proxyCount == -1) return autoProxyCount; return proxyCount; }
int32_t getDesiredCommitProxies() const {
if (commitProxyCount == -1) return autoCommitProxyCount;
return commitProxyCount;
}
int32_t getDesiredGrvProxies() const {
if (grvProxyCount == -1) return autoGrvProxyCount;
return grvProxyCount;

View File

@ -29,7 +29,7 @@
#include "fdbclient/NativeAPI.actor.h"
#include "fdbclient/KeyRangeMap.h"
#include "fdbclient/MasterProxyInterface.h"
#include "fdbclient/CommitProxyInterface.h"
#include "fdbclient/SpecialKeySpace.actor.h"
#include "fdbrpc/QueueModel.h"
#include "fdbrpc/MultiInterface.h"
@ -68,7 +68,7 @@ struct LocationInfo : MultiInterface<ReferencedInterface<StorageServerInterface>
}
};
using ProxyInfo = ModelInterface<MasterProxyInterface>;
using CommitProxyInfo = ModelInterface<CommitProxyInterface>;
using GrvProxyInfo = ModelInterface<GrvProxyInterface>;
class ClientTagThrottleData : NonCopyable {
@ -165,8 +165,8 @@ public:
bool sampleOnCost(uint64_t cost) const;
void updateProxies();
Reference<ProxyInfo> getMasterProxies(bool useProvisionalProxies);
Future<Reference<ProxyInfo>> getMasterProxiesFuture(bool useProvisionalProxies);
Reference<CommitProxyInfo> getCommitProxies(bool useProvisionalProxies);
Future<Reference<CommitProxyInfo>> getCommitProxiesFuture(bool useProvisionalProxies);
Reference<GrvProxyInfo> getGrvProxies(bool useProvisionalProxies);
Future<Void> onProxiesChanged();
Future<HealthMetrics> getHealthMetrics(bool detailed);
@ -219,9 +219,9 @@ public:
Reference<AsyncVar<Reference<ClusterConnectionFile>>> connectionFile;
AsyncTrigger proxiesChangeTrigger;
Future<Void> monitorProxiesInfoChange;
Reference<ProxyInfo> masterProxies;
Reference<CommitProxyInfo> commitProxies;
Reference<GrvProxyInfo> grvProxies;
bool proxyProvisional;
bool proxyProvisional; // Provisional commit proxy and grv proxy are used at the same time.
UID proxiesLastChange;
LocalityData clientLocality;
QueueModel queueModel;

View File

@ -257,6 +257,7 @@ struct Traceable<std::set<T>> : std::true_type {
std::string printable( const StringRef& val );
std::string printable( const std::string& val );
std::string printable( const KeyRangeRef& range );
std::string printable(const VectorRef<KeyRangeRef>& val);
std::string printable( const VectorRef<StringRef>& val );
std::string printable( const VectorRef<KeyValueRef>& val );
std::string printable( const KeyValueRef& val );
@ -289,6 +290,14 @@ struct KeyRangeRef {
bool contains( const KeyRef& key ) const { return begin <= key && key < end; }
bool contains( const KeyRangeRef& keys ) const { return begin <= keys.begin && keys.end <= end; }
bool intersects( const KeyRangeRef& keys ) const { return begin < keys.end && keys.begin < end; }
bool intersects(const VectorRef<KeyRangeRef>& keysVec) const {
for (const auto& keys : keysVec) {
if (intersects(keys)) {
return true;
}
}
return false;
}
bool empty() const { return begin == end; }
bool singleKeyRange() const { return equalsKeyAfter(begin, end); }

View File

@ -131,6 +131,9 @@ public:
KeyBackedProperty<Key> removePrefix() {
return configSpace.pack(LiteralStringRef(__FUNCTION__));
}
KeyBackedProperty<bool> incrementalBackupOnly() {
return configSpace.pack(LiteralStringRef(__FUNCTION__));
}
// XXX: Remove restoreRange() once it is safe to remove. It has been changed to restoreRanges
KeyBackedProperty<KeyRange> restoreRange() {
return configSpace.pack(LiteralStringRef(__FUNCTION__));
@ -141,6 +144,9 @@ public:
KeyBackedProperty<Key> batchFuture() {
return configSpace.pack(LiteralStringRef(__FUNCTION__));
}
KeyBackedProperty<Version> beginVersion() {
return configSpace.pack(LiteralStringRef(__FUNCTION__));
}
KeyBackedProperty<Version> restoreVersion() {
return configSpace.pack(LiteralStringRef(__FUNCTION__));
}
@ -557,7 +563,9 @@ namespace fileBackup {
if(rLen != len)
throw restore_bad_read();
Standalone<VectorRef<KeyValueRef>> results({}, buf.arena());
simulateBlobFailure();
Standalone<VectorRef<KeyValueRef>> results({}, buf.arena());
state StringRefReader reader(buf, restore_corrupted_data());
try {
@ -597,17 +605,17 @@ namespace fileBackup {
if(b != 0xFF)
throw restore_corrupted_data_padding();
return results;
return results;
} catch(Error &e) {
TraceEvent(SevWarn, "FileRestoreCorruptRangeFileBlock")
.error(e)
.detail("Filename", file->getFilename())
.detail("BlockOffset", offset)
.detail("BlockLen", len)
.detail("ErrorRelativeOffset", reader.rptr - buf.begin())
.detail("ErrorAbsoluteOffset", reader.rptr - buf.begin() + offset);
throw;
TraceEvent(SevWarn, "FileRestoreDecodeRangeFileBlockFailed")
.error(e)
.detail("Filename", file->getFilename())
.detail("BlockOffset", offset)
.detail("BlockLen", len)
.detail("ErrorRelativeOffset", reader.rptr - buf.begin())
.detail("ErrorAbsoluteOffset", reader.rptr - buf.begin() + offset);
throw;
}
}
@ -740,9 +748,10 @@ namespace fileBackup {
state Subspace newConfigSpace = uidPrefixKey(LiteralStringRef("uid->config/").withPrefix(fileBackupPrefixRange.begin), uid);
Optional<Value> statusStr = wait(tr->get(statusSpace.pack(FileBackupAgent::keyStateStatus)));
state EBackupState status = !statusStr.present() ? FileBackupAgent::STATE_NEVERRAN : BackupAgentBase::getState(statusStr.get().toString());
state EBackupState status =
!statusStr.present() ? EBackupState::STATE_NEVERRAN : BackupAgentBase::getState(statusStr.get().toString());
TraceEvent(SevInfo, "FileBackupAbortIncompatibleBackup")
TraceEvent(SevInfo, "FileBackupAbortIncompatibleBackup")
.detail("TagName", tagName.c_str())
.detail("Status", BackupAgentBase::getStateText(status));
@ -762,9 +771,9 @@ namespace fileBackup {
// Set old style state key to Aborted if it was Runnable
if(backupAgent->isRunnable(status))
tr->set(statusKey, StringRef(FileBackupAgent::getStateText(BackupAgentBase::STATE_ABORTED)));
tr->set(statusKey, StringRef(FileBackupAgent::getStateText(EBackupState::STATE_ABORTED)));
return Void();
return Void();
}
struct AbortFiveZeroBackupTask : TaskFuncBase {
@ -814,11 +823,11 @@ namespace fileBackup {
state BackupConfig config(current.first);
EBackupState status = wait(config.stateEnum().getD(tr, false, EBackupState::STATE_NEVERRAN));
if (!backupAgent->isRunnable((BackupAgentBase::enumState)status)) {
throw backup_unneeded();
}
if (!backupAgent->isRunnable(status)) {
throw backup_unneeded();
}
TraceEvent(SevInfo, "FBA_AbortFileOneBackup")
TraceEvent(SevInfo, "FBA_AbortFileOneBackup")
.detail("TagName", tagName.c_str())
.detail("Status", BackupAgentBase::getStateText(status));
@ -2092,10 +2101,10 @@ namespace fileBackup {
}
// If the backup is restorable but the state is not differential then set state to differential
if(restorableVersion.present() && backupState != BackupAgentBase::STATE_RUNNING_DIFFERENTIAL)
config.stateEnum().set(tr, BackupAgentBase::STATE_RUNNING_DIFFERENTIAL);
if (restorableVersion.present() && backupState != EBackupState::STATE_RUNNING_DIFFERENTIAL)
config.stateEnum().set(tr, EBackupState::STATE_RUNNING_DIFFERENTIAL);
// If stopWhenDone is set and there is a restorable version, set the done future and do not create further tasks.
// If stopWhenDone is set and there is a restorable version, set the done future and do not create further tasks.
if(stopWhenDone && restorableVersion.present()) {
wait(onDone->set(tr, taskBucket) && taskBucket->finish(tr, task));
@ -2342,10 +2351,10 @@ namespace fileBackup {
}
// If the backup is restorable and the state isn't differential the set state to differential
if(restorableVersion.present() && backupState != BackupAgentBase::STATE_RUNNING_DIFFERENTIAL)
config.stateEnum().set(tr, BackupAgentBase::STATE_RUNNING_DIFFERENTIAL);
if (restorableVersion.present() && backupState != EBackupState::STATE_RUNNING_DIFFERENTIAL)
config.stateEnum().set(tr, EBackupState::STATE_RUNNING_DIFFERENTIAL);
// Unless we are to stop, start the next snapshot using the default interval
// Unless we are to stop, start the next snapshot using the default interval
Reference<TaskFuture> snapshotDoneFuture = task->getDoneFuture(futureBucket);
if(!stopWhenDone) {
wait(config.initNewSnapshot(tr) && success(BackupSnapshotDispatchTask::addTask(tr, taskBucket, task, 1, TaskCompletionKey::signal(snapshotDoneFuture))));
@ -2474,7 +2483,8 @@ namespace fileBackup {
state Future<std::vector<KeyRange>> backupRangesFuture = config.backupRanges().getOrThrow(tr);
state Future<Key> destUidValueFuture = config.destUidValue().getOrThrow(tr);
state Future<Optional<bool>> partitionedLog = config.partitionedLogEnabled().get(tr);
wait(success(backupRangesFuture) && success(destUidValueFuture) && success(partitionedLog));
state Future<Optional<bool>> incrementalBackupOnly = config.incrementalBackupOnly().get(tr);
wait(success(backupRangesFuture) && success(destUidValueFuture) && success(partitionedLog) && success(incrementalBackupOnly));
std::vector<KeyRange> backupRanges = backupRangesFuture.get();
Key destUidValue = destUidValueFuture.get();
@ -2494,7 +2504,10 @@ namespace fileBackup {
wait(config.initNewSnapshot(tr, 0));
// Using priority 1 for both of these to at least start both tasks soon
wait(success(BackupSnapshotDispatchTask::addTask(tr, taskBucket, task, 1, TaskCompletionKey::joinWith(backupFinished))));
// Do not add snapshot task if we only want the incremental backup
if (!incrementalBackupOnly.get().present() || !incrementalBackupOnly.get().get()) {
wait(success(BackupSnapshotDispatchTask::addTask(tr, taskBucket, task, 1, TaskCompletionKey::joinWith(backupFinished))));
}
wait(success(BackupLogsDispatchTask::addTask(tr, taskBucket, task, 1, 0, beginVersion, TaskCompletionKey::joinWith(backupFinished))));
// If a clean stop is requested, the log and snapshot tasks will quit after the backup is restorable, then the following
@ -3008,8 +3021,10 @@ namespace fileBackup {
state int64_t remainingInBatch = Params.remainingInBatch().get(task);
state bool addingToExistingBatch = remainingInBatch > 0;
state Version restoreVersion;
state Future<Optional<bool>> incrementalBackupOnly = restore.incrementalBackupOnly().get(tr);
wait(store(restoreVersion, restore.restoreVersion().getOrThrow(tr))
&& success(incrementalBackupOnly)
&& checkTaskVersion(tr->getDatabase(), task, name, version));
// If not adding to an existing batch then update the apply mutations end version so the mutations from the
@ -3398,6 +3413,7 @@ namespace fileBackup {
state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
state RestoreConfig restore(task);
state Version restoreVersion;
state Version beginVersion;
state Reference<IBackupContainer> bc;
loop {
@ -3408,6 +3424,8 @@ namespace fileBackup {
wait(checkTaskVersion(tr->getDatabase(), task, name, version));
Version _restoreVersion = wait(restore.restoreVersion().getOrThrow(tr));
restoreVersion = _restoreVersion;
Optional<Version> _beginVersion = wait(restore.beginVersion().get(tr));
beginVersion = _beginVersion.present() ? _beginVersion.get() : invalidVersion;
wait(taskBucket->keepRunning(tr, task));
ERestoreState oldState = wait(restore.stateEnum().getD(tr));
@ -3447,14 +3465,22 @@ namespace fileBackup {
wait(tr->onError(e));
}
}
Optional<bool> _incremental = wait(restore.incrementalBackupOnly().get(tr));
state bool incremental = _incremental.present() ? _incremental.get() : false;
if (beginVersion == invalidVersion) {
beginVersion = 0;
}
Optional<RestorableFileSet> restorable =
wait(bc->getRestoreSet(restoreVersion, VectorRef<KeyRangeRef>(), incremental, beginVersion));
if (!incremental) {
beginVersion = restorable.get().snapshot.beginVersion;
}
Optional<RestorableFileSet> restorable = wait(bc->getRestoreSet(restoreVersion));
if(!restorable.present())
if(!restorable.present())
throw restore_missing_data();
// First version for which log data should be applied
Params.firstVersion().set(task, restorable.get().snapshot.beginVersion);
Params.firstVersion().set(task, beginVersion);
// Convert the two lists in restorable (logs and ranges) to a single list of RestoreFiles.
// Order does not matter, they will be put in order when written to the restoreFileMap below.
@ -3463,6 +3489,7 @@ namespace fileBackup {
for(const RangeFile &f : restorable.get().ranges) {
files.push_back({f.version, f.fileName, true, f.blockSize, f.fileSize});
}
for(const LogFile &f : restorable.get().logs) {
files.push_back({f.beginVersion, f.fileName, false, f.blockSize, f.fileSize, f.endVersion});
}
@ -3526,6 +3553,7 @@ namespace fileBackup {
restore.stateEnum().set(tr, ERestoreState::RUNNING);
// Set applyMutation versions
restore.setApplyBeginVersion(tr, firstVersion);
restore.setApplyEndVersion(tr, firstVersion);
@ -3533,6 +3561,14 @@ namespace fileBackup {
wait(success(RestoreDispatchTaskFunc::addTask(tr, taskBucket, task, 0, "", 0, CLIENT_KNOBS->RESTORE_DISPATCH_BATCH_SIZE)));
wait(taskBucket->finish(tr, task));
state Future<Optional<bool>> logsOnly = restore.incrementalBackupOnly().get(tr);
wait(success(logsOnly));
if (logsOnly.get().present() && logsOnly.get().get()) {
// If this is an incremental restore, we need to set the applyMutationsMapPrefix
// to the earliest log version so no mutations are missed
Value versionEncoded = BinaryWriter::toValue(Params.firstVersion().get(task), Unversioned());
wait(krmSetRange(tr, restore.applyMutationsMapPrefix(), normalKeys, versionEncoded));
}
return Void();
}
@ -3712,7 +3748,9 @@ public:
// This method will return the final status of the backup at tag, and return the URL that was used on the tag
// when that status value was read.
ACTOR static Future<int> waitBackup(FileBackupAgent* backupAgent, Database cx, std::string tagName, bool stopWhenDone, Reference<IBackupContainer> *pContainer = nullptr, UID *pUID = nullptr) {
ACTOR static Future<EBackupState> waitBackup(FileBackupAgent* backupAgent, Database cx, std::string tagName,
bool stopWhenDone, Reference<IBackupContainer>* pContainer = nullptr,
UID* pUID = nullptr) {
state std::string backTrace;
state KeyBackedTag tag = makeBackupTag(tagName);
@ -3733,7 +3771,8 @@ public:
// Break, if one of the following is true
// - no longer runnable
// - in differential mode (restorable) and stopWhenDone is not enabled
if( !FileBackupAgent::isRunnable(status) || ((!stopWhenDone) && (BackupAgentBase::STATE_RUNNING_DIFFERENTIAL == status) )) {
if (!FileBackupAgent::isRunnable(status) ||
((!stopWhenDone) && (EBackupState::STATE_RUNNING_DIFFERENTIAL == status))) {
if(pContainer != nullptr) {
Reference<IBackupContainer> c = wait(config.backupContainer().getOrThrow(tr, false, backup_invalid_info()));
@ -3760,7 +3799,7 @@ public:
ACTOR static Future<Void> submitBackup(FileBackupAgent* backupAgent, Reference<ReadYourWritesTransaction> tr,
Key outContainer, int snapshotIntervalSeconds, std::string tagName,
Standalone<VectorRef<KeyRangeRef>> backupRanges, bool stopWhenDone,
bool partitionedLog) {
bool partitionedLog, bool incrementalBackupOnly) {
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
tr->setOption(FDBTransactionOptions::COMMIT_ON_FIRST_PROXY);
@ -3863,13 +3902,17 @@ public:
config.backupRanges().set(tr, normalizedRanges);
config.snapshotIntervalSeconds().set(tr, snapshotIntervalSeconds);
config.partitionedLogEnabled().set(tr, partitionedLog);
config.incrementalBackupOnly().set(tr, incrementalBackupOnly);
Key taskKey = wait(fileBackup::StartFullBackupTaskFunc::addTask(tr, backupAgent->taskBucket, uid, TaskCompletionKey::noSignal()));
return Void();
}
ACTOR static Future<Void> submitRestore(FileBackupAgent* backupAgent, Reference<ReadYourWritesTransaction> tr, Key tagName, Key backupURL, Standalone<VectorRef<KeyRangeRef>> ranges, Version restoreVersion, Key addPrefix, Key removePrefix, bool lockDB, UID uid) {
ACTOR static Future<Void> submitRestore(FileBackupAgent* backupAgent, Reference<ReadYourWritesTransaction> tr,
Key tagName, Key backupURL, Standalone<VectorRef<KeyRangeRef>> ranges,
Version restoreVersion, Key addPrefix, Key removePrefix, bool lockDB,
bool incrementalBackupOnly, Version beginVersion, UID uid) {
KeyRangeMap<int> restoreRangeSet;
for (auto& range : ranges) {
restoreRangeSet.insert(range, 1);
@ -3917,7 +3960,7 @@ public:
for (index = 0; index < restoreRanges.size(); index++) {
KeyRange restoreIntoRange = KeyRangeRef(restoreRanges[index].begin, restoreRanges[index].end).removePrefix(removePrefix).withPrefix(addPrefix);
Standalone<RangeResultRef> existingRows = wait(tr->getRange(restoreIntoRange, 1));
if (existingRows.size() > 0) {
if (existingRows.size() > 0 && !incrementalBackupOnly) {
throw restore_destination_not_empty();
}
}
@ -3934,6 +3977,8 @@ public:
restore.sourceContainer().set(tr, bc);
restore.stateEnum().set(tr, ERestoreState::QUEUED);
restore.restoreVersion().set(tr, restoreVersion);
restore.incrementalBackupOnly().set(tr, incrementalBackupOnly);
restore.beginVersion().set(tr, beginVersion);
if (BUGGIFY && restoreRanges.size() == 1) {
restore.restoreRange().set(tr, restoreRanges[0]);
}
@ -4063,7 +4108,7 @@ public:
state Key destUidValue = wait(config.destUidValue().getOrThrow(tr));
EBackupState status = wait(config.stateEnum().getD(tr, false, EBackupState::STATE_NEVERRAN));
if (!backupAgent->isRunnable((BackupAgentBase::enumState)status)) {
if (!backupAgent->isRunnable(status)) {
throw backup_unneeded();
}
@ -4166,13 +4211,13 @@ public:
JsonBuilderObject statusDoc;
statusDoc.setKey("Name", BackupAgentBase::getStateName(backupState));
statusDoc.setKey("Description", BackupAgentBase::getStateText(backupState));
statusDoc.setKey("Completed", backupState == BackupAgentBase::STATE_COMPLETED);
statusDoc.setKey("Completed", backupState == EBackupState::STATE_COMPLETED);
statusDoc.setKey("Running", BackupAgentBase::isRunnable(backupState));
doc.setKey("Status", statusDoc);
state Future<Void> done = Void();
if(backupState != BackupAgentBase::STATE_NEVERRAN) {
if (backupState != EBackupState::STATE_NEVERRAN) {
state Reference<IBackupContainer> bc;
state TimestampedVersion latestRestorable;
@ -4184,7 +4229,7 @@ public:
if(latestRestorable.present()) {
JsonBuilderObject o = latestRestorable.toJSON();
if(backupState != BackupAgentBase::STATE_COMPLETED) {
if (backupState != EBackupState::STATE_COMPLETED) {
o.setKey("LagSeconds", (recentReadVersion - latestRestorable.version.get()) / CLIENT_KNOBS->CORE_VERSIONSPERSECOND);
}
doc.setKey("LatestRestorablePoint", o);
@ -4192,7 +4237,8 @@ public:
doc.setKey("DestinationURL", bc->getURL());
}
if(backupState == BackupAgentBase::STATE_RUNNING_DIFFERENTIAL || backupState == BackupAgentBase::STATE_RUNNING) {
if (backupState == EBackupState::STATE_RUNNING_DIFFERENTIAL ||
backupState == EBackupState::STATE_RUNNING) {
state int64_t snapshotInterval;
state int64_t logBytesWritten;
state int64_t rangeBytesWritten;
@ -4315,23 +4361,28 @@ public:
bool snapshotProgress = false;
switch (backupState) {
case BackupAgentBase::STATE_SUBMITTED:
statusText += "The backup on tag `" + tagName + "' is in progress (just started) to " + bc->getURL() + ".\n";
break;
case BackupAgentBase::STATE_RUNNING:
statusText += "The backup on tag `" + tagName + "' is in progress to " + bc->getURL() + ".\n";
snapshotProgress = true;
break;
case BackupAgentBase::STATE_RUNNING_DIFFERENTIAL:
statusText += "The backup on tag `" + tagName + "' is restorable but continuing to " + bc->getURL() + ".\n";
snapshotProgress = true;
break;
case BackupAgentBase::STATE_COMPLETED:
statusText += "The previous backup on tag `" + tagName + "' at " + bc->getURL() + " completed at version " + format("%lld", latestRestorableVersion.orDefault(-1)) + ".\n";
break;
default:
statusText += "The previous backup on tag `" + tagName + "' at " + bc->getURL() + " " + backupStatus + ".\n";
break;
case EBackupState::STATE_SUBMITTED:
statusText += "The backup on tag `" + tagName + "' is in progress (just started) to " +
bc->getURL() + ".\n";
break;
case EBackupState::STATE_RUNNING:
statusText += "The backup on tag `" + tagName + "' is in progress to " + bc->getURL() + ".\n";
snapshotProgress = true;
break;
case EBackupState::STATE_RUNNING_DIFFERENTIAL:
statusText += "The backup on tag `" + tagName + "' is restorable but continuing to " +
bc->getURL() + ".\n";
snapshotProgress = true;
break;
case EBackupState::STATE_COMPLETED:
statusText += "The previous backup on tag `" + tagName + "' at " + bc->getURL() +
" completed at version " + format("%lld", latestRestorableVersion.orDefault(-1)) +
".\n";
break;
default:
statusText += "The previous backup on tag `" + tagName + "' at " + bc->getURL() + " " +
backupStatus + ".\n";
break;
}
statusText += format("BackupUID: %s\n", uidAndAbortedFlag.get().first.toString().c_str());
statusText += format("BackupURL: %s\n", bc->getURL().c_str());
@ -4367,7 +4418,7 @@ public:
);
statusText += format("Snapshot interval is %lld seconds. ", snapshotInterval);
if(backupState == BackupAgentBase::STATE_RUNNING_DIFFERENTIAL)
if (backupState == EBackupState::STATE_RUNNING_DIFFERENTIAL)
statusText += format("Current snapshot progress target is %3.2f%% (>100%% means the snapshot is supposed to be done)\n", 100.0 * (recentReadVersion - snapshotBeginVersion) / (snapshotTargetEndVersion - snapshotBeginVersion)) ;
else
statusText += "The initial snapshot is still running.\n";
@ -4451,7 +4502,8 @@ public:
ACTOR static Future<Version> restore(FileBackupAgent* backupAgent, Database cx, Optional<Database> cxOrig,
Key tagName, Key url, Standalone<VectorRef<KeyRangeRef>> ranges,
bool waitForComplete, Version targetVersion, bool verbose, Key addPrefix,
Key removePrefix, bool lockDB, UID randomUid) {
Key removePrefix, bool lockDB, bool incrementalBackupOnly,
Version beginVersion, UID randomUid) {
state Reference<IBackupContainer> bc = IBackupContainer::openContainer(url.toString());
state BackupDescription desc = wait(bc->describeBackup());
@ -4463,7 +4515,12 @@ public:
if(targetVersion == invalidVersion && desc.maxRestorableVersion.present())
targetVersion = desc.maxRestorableVersion.get();
Optional<RestorableFileSet> restoreSet = wait(bc->getRestoreSet(targetVersion));
if (targetVersion == invalidVersion && incrementalBackupOnly && desc.contiguousLogEnd.present()) {
targetVersion = desc.contiguousLogEnd.get() - 1;
}
Optional<RestorableFileSet> restoreSet =
wait(bc->getRestoreSet(targetVersion, VectorRef<KeyRangeRef>(), incrementalBackupOnly, beginVersion));
if(!restoreSet.present()) {
TraceEvent(SevWarn, "FileBackupAgentRestoreNotPossible")
@ -4482,7 +4539,8 @@ public:
try {
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
wait(submitRestore(backupAgent, tr, tagName, url, ranges, targetVersion, addPrefix, removePrefix, lockDB, randomUid));
wait(submitRestore(backupAgent, tr, tagName, url, ranges, targetVersion, addPrefix, removePrefix,
lockDB, incrementalBackupOnly, beginVersion, randomUid));
wait(tr->commit());
break;
} catch(Error &e) {
@ -4518,7 +4576,7 @@ public:
backupConfig = BackupConfig(uidFlag.first);
state EBackupState status = wait(backupConfig.stateEnum().getOrThrow(ryw_tr));
if (status != BackupAgentBase::STATE_RUNNING_DIFFERENTIAL ) {
if (status != EBackupState::STATE_RUNNING_DIFFERENTIAL) {
throw backup_duplicate();
}
@ -4619,7 +4677,7 @@ public:
} else {
TraceEvent("AS_StartRestore");
Version ver = wait(restore(backupAgent, cx, cx, tagName, KeyRef(bc->getURL()), ranges, true, -1, true,
addPrefix, removePrefix, true, randomUid));
addPrefix, removePrefix, true, false, invalidVersion, randomUid));
return ver;
}
}
@ -4656,8 +4714,13 @@ Future<Void> FileBackupAgent::atomicParallelRestore(Database cx, Key tagName, St
return FileBackupAgentImpl::atomicParallelRestore(this, cx, tagName, ranges, addPrefix, removePrefix);
}
Future<Version> FileBackupAgent::restore(Database cx, Optional<Database> cxOrig, Key tagName, Key url, Standalone<VectorRef<KeyRangeRef>> ranges, bool waitForComplete, Version targetVersion, bool verbose, Key addPrefix, Key removePrefix, bool lockDB) {
return FileBackupAgentImpl::restore(this, cx, cxOrig, tagName, url, ranges, waitForComplete, targetVersion, verbose, addPrefix, removePrefix, lockDB, deterministicRandom()->randomUniqueID());
Future<Version> FileBackupAgent::restore(Database cx, Optional<Database> cxOrig, Key tagName, Key url,
Standalone<VectorRef<KeyRangeRef>> ranges, bool waitForComplete,
Version targetVersion, bool verbose, Key addPrefix, Key removePrefix,
bool lockDB, bool incrementalBackupOnly, Version beginVersion) {
return FileBackupAgentImpl::restore(this, cx, cxOrig, tagName, url, ranges, waitForComplete, targetVersion, verbose,
addPrefix, removePrefix, lockDB, incrementalBackupOnly, beginVersion,
deterministicRandom()->randomUniqueID());
}
Future<Version> FileBackupAgent::atomicRestore(Database cx, Key tagName, Standalone<VectorRef<KeyRangeRef>> ranges, Key addPrefix, Key removePrefix) {
@ -4683,9 +4746,9 @@ Future<ERestoreState> FileBackupAgent::waitRestore(Database cx, Key tagName, boo
Future<Void> FileBackupAgent::submitBackup(Reference<ReadYourWritesTransaction> tr, Key outContainer,
int snapshotIntervalSeconds, std::string tagName,
Standalone<VectorRef<KeyRangeRef>> backupRanges, bool stopWhenDone,
bool partitionedLog) {
bool partitionedLog, bool incrementalBackupOnly) {
return FileBackupAgentImpl::submitBackup(this, tr, outContainer, snapshotIntervalSeconds, tagName, backupRanges,
stopWhenDone, partitionedLog);
stopWhenDone, partitionedLog, incrementalBackupOnly);
}
Future<Void> FileBackupAgent::discontinueBackup(Reference<ReadYourWritesTransaction> tr, Key tagName){
@ -4714,7 +4777,8 @@ void FileBackupAgent::setLastRestorable(Reference<ReadYourWritesTransaction> tr,
tr->set(lastRestorable.pack(tagName), BinaryWriter::toValue<Version>(version, Unversioned()));
}
Future<int> FileBackupAgent::waitBackup(Database cx, std::string tagName, bool stopWhenDone, Reference<IBackupContainer> *pContainer, UID *pUID) {
Future<EBackupState> FileBackupAgent::waitBackup(Database cx, std::string tagName, bool stopWhenDone,
Reference<IBackupContainer>* pContainer, UID* pUID) {
return FileBackupAgentImpl::waitBackup(this, cx, tagName, stopWhenDone, pContainer, pUID);
}
@ -4963,3 +5027,18 @@ ACTOR Future<Void> transformRestoredDatabase(Database cx, Standalone<VectorRef<K
return Void();
}
void simulateBlobFailure() {
if (BUGGIFY && deterministicRandom()->random01() < 0.01) { // Simulate blob failures
double i = deterministicRandom()->random01();
if (i < 0.5) {
throw http_request_failed();
} else if (i < 0.7) {
throw connection_failed();
} else if (i < 0.8) {
throw timed_out();
} else if (i < 0.9) {
throw lookup_failed();
}
}
}

View File

@ -27,6 +27,8 @@
// with RateKeeper to gather health information of the cluster.
struct GrvProxyInterface {
constexpr static FileIdentifier file_identifier = 8743216;
enum { LocationAwareLoadBalance = 1 };
enum { AlwaysFresh = 1 };
Optional<Key> processId;
bool provisional;

View File

@ -72,7 +72,7 @@ namespace HTTP {
}
PacketBuffer * writeRequestHeader(std::string const &verb, std::string const &resource, HTTP::Headers const &headers, PacketBuffer *dest) {
PacketWriter writer(dest, NULL, Unversioned());
PacketWriter writer(dest, nullptr, Unversioned());
writer.serializeBytes(verb);
writer.serializeBytes(" ", 1);
writer.serializeBytes(resource);
@ -238,7 +238,7 @@ namespace HTTP {
{
// Read the line that contains the chunk length as text in hex
size_t lineLen = wait(read_delimited_into_string(conn, "\r\n", &r->content, pos));
state int chunkLen = strtol(r->content.substr(pos, lineLen).c_str(), NULL, 16);
state int chunkLen = strtol(r->content.substr(pos, lineLen).c_str(), nullptr, 16);
// Instead of advancing pos, erase the chunk length header line (line length + delimiter size) from the content buffer
r->content.erase(pos, lineLen + 2);
@ -301,7 +301,7 @@ namespace HTTP {
state TraceEvent event(SevDebug, "HTTPRequest");
state UnsentPacketQueue empty;
if(pContent == NULL)
if(pContent == nullptr)
pContent = &empty;
// There is no standard http request id header field, so either a global default can be set via a knob

View File

@ -67,11 +67,11 @@
// // The following would throw if a.b.c did not exist, or if it was not an int.
// int x = r["a.b.c"].get_int();
struct JSONDoc {
JSONDoc() : pObj(NULL) {}
JSONDoc() : pObj(nullptr) {}
// Construction from const json_spirit::mObject, trivial and will never throw.
// Resulting JSONDoc will not allow modifications.
JSONDoc(const json_spirit::mObject &o) : pObj(&o), wpObj(NULL) {}
JSONDoc(const json_spirit::mObject &o) : pObj(&o), wpObj(nullptr) {}
// Construction from json_spirit::mObject. Allows modifications.
JSONDoc(json_spirit::mObject &o) : pObj(&o), wpObj(&o) {}
@ -79,7 +79,7 @@ struct JSONDoc {
// Construction from const json_spirit::mValue (which is a Variant type) which will try to
// convert it to an mObject. This will throw if that fails, just as it would
// if the caller called get_obj() itself and used the previous constructor instead.
JSONDoc(const json_spirit::mValue &v) : pObj(&v.get_obj()), wpObj(NULL) {}
JSONDoc(const json_spirit::mValue &v) : pObj(&v.get_obj()), wpObj(nullptr) {}
// Construction from non-const json_spirit::mValue - will convert the mValue to
// an object if it isn't already and then attach to it.
@ -98,13 +98,13 @@ struct JSONDoc {
// path into on the "dot" character.
// When a path is found, pLast is updated.
bool has(std::string path, bool split=true) {
if (pObj == NULL)
if (pObj == nullptr)
return false;
if (path.empty())
return false;
size_t start = 0;
const json_spirit::mValue *curVal = NULL;
const json_spirit::mValue *curVal = nullptr;
while (start < path.size())
{
// If a path segment is found then curVal must be an object
@ -140,7 +140,7 @@ struct JSONDoc {
// Creates the given path (forcing Objects to exist along its depth, replacing whatever else might have been there)
// and returns a reference to the Value at that location.
json_spirit::mValue & create(std::string path, bool split=true) {
if (wpObj == NULL || path.empty())
if (wpObj == nullptr || path.empty())
throw std::runtime_error("JSON Object not writable or bad JSON path");
size_t start = 0;
@ -280,7 +280,7 @@ struct JSONDoc {
}
const json_spirit::mValue & last() const { return *pLast; }
bool valid() const { return pObj != NULL; }
bool valid() const { return pObj != nullptr; }
const json_spirit::mObject & obj() {
// This dummy object is necessary to make working with obj() easier when this does not currently
@ -304,7 +304,7 @@ struct JSONDoc {
static uint64_t expires_reference_version;
private:
const json_spirit::mObject *pObj;
// Writeable pointer to the same object. Will be NULL if initialized from a const object.
// Writeable pointer to the same object. Will be nullptr if initialized from a const object.
json_spirit::mObject *wpObj;
const json_spirit::mValue *pLast;
};

View File

@ -52,7 +52,7 @@ void ClientKnobs::initialize(bool randomize) {
init( COORDINATOR_RECONNECTION_DELAY, 1.0 );
init( CLIENT_EXAMPLE_AMOUNT, 20 );
init( MAX_CLIENT_STATUS_AGE, 1.0 );
init( MAX_MASTER_PROXY_CONNECTIONS, 5 ); if( randomize && BUGGIFY ) MAX_MASTER_PROXY_CONNECTIONS = 1;
init( MAX_COMMIT_PROXY_CONNECTIONS, 5 ); if( randomize && BUGGIFY ) MAX_COMMIT_PROXY_CONNECTIONS = 1;
init( MAX_GRV_PROXY_CONNECTIONS, 3 ); if( randomize && BUGGIFY ) MAX_GRV_PROXY_CONNECTIONS = 1;
init( STATUS_IDLE_TIMEOUT, 120.0 );
@ -104,7 +104,7 @@ void ClientKnobs::initialize(bool randomize) {
init( WATCH_POLLING_TIME, 1.0 ); if( randomize && BUGGIFY ) WATCH_POLLING_TIME = 5.0;
init( NO_RECENT_UPDATES_DURATION, 20.0 ); if( randomize && BUGGIFY ) NO_RECENT_UPDATES_DURATION = 0.1;
init( FAST_WATCH_TIMEOUT, 20.0 ); if( randomize && BUGGIFY ) FAST_WATCH_TIMEOUT = 1.0;
init( WATCH_TIMEOUT, 900.0 ); if( randomize && BUGGIFY ) WATCH_TIMEOUT = 20.0;
init( WATCH_TIMEOUT, 30.0 ); if( randomize && BUGGIFY ) WATCH_TIMEOUT = 20.0;
// Core
init( CORE_VERSIONSPERSECOND, 1e6 );
@ -171,10 +171,12 @@ void ClientKnobs::initialize(bool randomize) {
init( MIN_CLEANUP_SECONDS, 3600.0 );
// Configuration
init( DEFAULT_AUTO_PROXIES, 3 );
init( DEFAULT_AUTO_COMMIT_PROXIES, 3 );
init( DEFAULT_AUTO_GRV_PROXIES, 1 );
init( DEFAULT_AUTO_RESOLVERS, 1 );
init( DEFAULT_AUTO_LOGS, 3 );
init( DEFAULT_COMMIT_GRV_PROXIES_RATIO, 3 );
init( DEFAULT_MAX_GRV_PROXIES, 4 );
init( IS_ACCEPTABLE_DELAY, 1.5 );

View File

@ -27,8 +27,6 @@
class ClientKnobs : public Knobs {
public:
int BYTE_LIMIT_UNLIMITED;
int ROW_LIMIT_UNLIMITED;
int TOO_MANY; // FIXME: this should really be split up so we can control these more specifically
@ -48,7 +46,7 @@ public:
double COORDINATOR_RECONNECTION_DELAY;
int CLIENT_EXAMPLE_AMOUNT;
double MAX_CLIENT_STATUS_AGE;
int MAX_MASTER_PROXY_CONNECTIONS;
int MAX_COMMIT_PROXY_CONNECTIONS;
int MAX_GRV_PROXY_CONNECTIONS;
double STATUS_IDLE_TIMEOUT;
@ -169,8 +167,10 @@ public:
double MIN_CLEANUP_SECONDS;
// Configuration
int32_t DEFAULT_AUTO_PROXIES;
int32_t DEFAULT_AUTO_COMMIT_PROXIES;
int32_t DEFAULT_AUTO_GRV_PROXIES;
int32_t DEFAULT_COMMIT_GRV_PROXIES_RATIO;
int32_t DEFAULT_MAX_GRV_PROXIES;
int32_t DEFAULT_AUTO_RESOLVERS;
int32_t DEFAULT_AUTO_LOGS;

View File

@ -19,8 +19,10 @@
*/
#include <cinttypes>
#include <string>
#include <vector>
#include "fdbclient/Knobs.h"
#include "flow/Arena.h"
#include "fdbclient/FDBOptions.g.h"
#include "fdbclient/FDBTypes.h"
@ -33,6 +35,7 @@
#include "fdbclient/DatabaseContext.h"
#include "fdbrpc/simulator.h"
#include "fdbclient/StatusClient.h"
#include "flow/Trace.h"
#include "flow/UnitTest.h"
#include "fdbrpc/ReplicationPolicy.h"
#include "fdbrpc/Replication.h"
@ -78,8 +81,42 @@ std::map<std::string, std::string> configForToken( std::string const& mode ) {
std::string key = mode.substr(0, pos);
std::string value = mode.substr(pos+1);
if ((key == "logs" || key == "proxies" || key == "grv_proxies" || key == "resolvers" || key == "remote_logs" ||
key == "log_routers" || key == "usable_regions" || key == "repopulate_anti_quorum") &&
if (key == "proxies" && isInteger(value)) {
printf("Warning: Proxy role is being split into GRV Proxy and Commit Proxy, now prefer configuring "
"'grv_proxies' and 'commit_proxies' separately. Generally we should follow that 'commit_proxies'"
" is three times of 'grv_proxies' count and 'grv_proxies' should be not more than 4.\n");
int proxiesCount = atoi(value.c_str());
if (proxiesCount == -1) {
proxiesCount = CLIENT_KNOBS->DEFAULT_AUTO_GRV_PROXIES + CLIENT_KNOBS->DEFAULT_AUTO_COMMIT_PROXIES;
ASSERT_WE_THINK(proxiesCount >= 2);
}
if (proxiesCount < 2) {
printf("Error: At least 2 proxies (1 GRV proxy and 1 Commit proxy) are required.\n");
return out;
}
int grvProxyCount =
std::max(1, std::min(CLIENT_KNOBS->DEFAULT_MAX_GRV_PROXIES,
proxiesCount / (CLIENT_KNOBS->DEFAULT_COMMIT_GRV_PROXIES_RATIO + 1)));
int commitProxyCount = proxiesCount - grvProxyCount;
ASSERT_WE_THINK(grvProxyCount >= 1 && commitProxyCount >= 1);
out[p + "grv_proxies"] = std::to_string(grvProxyCount);
out[p + "commit_proxies"] = std::to_string(commitProxyCount);
printf("%d proxies are automatically converted into %d GRV proxies and %d Commit proxies.\n", proxiesCount,
grvProxyCount, commitProxyCount);
TraceEvent("DatabaseConfigurationProxiesSpecified")
.detail("SpecifiedProxies", atoi(value.c_str()))
.detail("EffectiveSpecifiedProxies", proxiesCount)
.detail("ConvertedGrvProxies", grvProxyCount)
.detail("ConvertedCommitProxies", commitProxyCount);
}
if ((key == "logs" || key == "commit_proxies" || key == "grv_proxies" || key == "resolvers" ||
key == "remote_logs" || key == "log_routers" || key == "usable_regions" ||
key == "repopulate_anti_quorum") &&
isInteger(value)) {
out[p+key] = value;
}
@ -229,7 +266,8 @@ std::map<std::string, std::string> configForToken( std::string const& mode ) {
return out;
}
ConfigurationResult::Type buildConfiguration( std::vector<StringRef> const& modeTokens, std::map<std::string, std::string>& outConf ) {
ConfigurationResult buildConfiguration(std::vector<StringRef> const& modeTokens,
std::map<std::string, std::string>& outConf) {
for(auto it : modeTokens) {
std::string mode = it.toString();
auto m = configForToken( mode );
@ -265,7 +303,7 @@ ConfigurationResult::Type buildConfiguration( std::vector<StringRef> const& mode
return ConfigurationResult::SUCCESS;
}
ConfigurationResult::Type buildConfiguration( std::string const& configMode, std::map<std::string, std::string>& outConf ) {
ConfigurationResult buildConfiguration(std::string const& configMode, std::map<std::string, std::string>& outConf) {
std::vector<StringRef> modes;
int p = 0;
@ -305,7 +343,7 @@ ACTOR Future<DatabaseConfiguration> getDatabaseConfiguration( Database cx ) {
}
}
ACTOR Future<ConfigurationResult::Type> changeConfig( Database cx, std::map<std::string, std::string> m, bool force ) {
ACTOR Future<ConfigurationResult> changeConfig(Database cx, std::map<std::string, std::string> m, bool force) {
state StringRef initIdKey = LiteralStringRef( "\xff/init_id" );
state Transaction tr(cx);
@ -656,7 +694,7 @@ ConfigureAutoResult parseConfig( StatusObject const& status ) {
}
if (processClass.classType() == ProcessClass::TransactionClass ||
processClass.classType() == ProcessClass::ProxyClass ||
processClass.classType() == ProcessClass::CommitProxyClass ||
processClass.classType() == ProcessClass::GrvProxyClass ||
processClass.classType() == ProcessClass::ResolutionClass ||
processClass.classType() == ProcessClass::StatelessClass ||
@ -701,7 +739,7 @@ ConfigureAutoResult parseConfig( StatusObject const& status ) {
if (proc.second == ProcessClass::StatelessClass) {
existingStatelessCount++;
}
if(proc.second == ProcessClass::ProxyClass) {
if (proc.second == ProcessClass::CommitProxyClass) {
existingProxyCount++;
}
if (proc.second == ProcessClass::GrvProxyClass) {
@ -734,19 +772,18 @@ ConfigureAutoResult parseConfig( StatusObject const& status ) {
resolverCount = result.old_resolvers;
}
result.desired_proxies = std::max(std::min(12, processCount / 15), 1);
result.desired_commit_proxies = std::max(std::min(12, processCount / 15), 1);
int proxyCount;
if (!statusObjConfig.get("proxies", result.old_proxies)) {
result.old_proxies = CLIENT_KNOBS->DEFAULT_AUTO_PROXIES;
statusObjConfig.get("auto_proxies", result.old_proxies);
result.auto_proxies = result.desired_proxies;
proxyCount = result.auto_proxies;
if (!statusObjConfig.get("commit_proxies", result.old_commit_proxies)) {
result.old_commit_proxies = CLIENT_KNOBS->DEFAULT_AUTO_COMMIT_PROXIES;
statusObjConfig.get("auto_commit_proxies", result.old_commit_proxies);
result.auto_commit_proxies = result.desired_commit_proxies;
proxyCount = result.auto_commit_proxies;
} else {
result.auto_proxies = result.old_proxies;
proxyCount = result.old_proxies;
result.auto_commit_proxies = result.old_commit_proxies;
proxyCount = result.old_commit_proxies;
}
// Need to configure a good number.
result.desired_grv_proxies = std::max(std::min(4, processCount / 20), 1);
int grvProxyCount;
if (!statusObjConfig.get("grv_proxies", result.old_grv_proxies)) {
@ -823,7 +860,7 @@ ConfigureAutoResult parseConfig( StatusObject const& status ) {
return result;
}
ACTOR Future<ConfigurationResult::Type> autoConfig( Database cx, ConfigureAutoResult conf ) {
ACTOR Future<ConfigurationResult> autoConfig(Database cx, ConfigureAutoResult conf) {
state Transaction tr(cx);
state Key versionKey = BinaryWriter::toValue(deterministicRandom()->randomUniqueID(),Unversioned());
@ -857,8 +894,8 @@ ACTOR Future<ConfigurationResult::Type> autoConfig( Database cx, ConfigureAutoRe
if (conf.auto_logs != conf.old_logs)
tr.set(configKeysPrefix.toString() + "auto_logs", format("%d", conf.auto_logs));
if(conf.auto_proxies != conf.old_proxies)
tr.set(configKeysPrefix.toString() + "auto_proxies", format("%d", conf.auto_proxies));
if (conf.auto_commit_proxies != conf.old_commit_proxies)
tr.set(configKeysPrefix.toString() + "auto_commit_proxies", format("%d", conf.auto_commit_proxies));
if (conf.auto_grv_proxies != conf.old_grv_proxies)
tr.set(configKeysPrefix.toString() + "auto_grv_proxies", format("%d", conf.auto_grv_proxies));
@ -890,7 +927,8 @@ ACTOR Future<ConfigurationResult::Type> autoConfig( Database cx, ConfigureAutoRe
}
}
Future<ConfigurationResult::Type> changeConfig( Database const& cx, std::vector<StringRef> const& modes, Optional<ConfigureAutoResult> const& conf, bool force ) {
Future<ConfigurationResult> changeConfig(Database const& cx, std::vector<StringRef> const& modes,
Optional<ConfigureAutoResult> const& conf, bool force) {
if( modes.size() && modes[0] == LiteralStringRef("auto") && conf.present() ) {
return autoConfig(cx, conf.get());
}
@ -902,7 +940,7 @@ Future<ConfigurationResult::Type> changeConfig( Database const& cx, std::vector<
return changeConfig(cx, m, force);
}
Future<ConfigurationResult::Type> changeConfig( Database const& cx, std::string const& modes, bool force ) {
Future<ConfigurationResult> changeConfig(Database const& cx, std::string const& modes, bool force) {
TraceEvent("ChangeConfig").detail("Mode", modes);
std::map<std::string,std::string> m;
auto r = buildConfiguration( modes, m );
@ -971,7 +1009,7 @@ ACTOR Future<std::vector<NetworkAddress>> getCoordinators( Database cx ) {
}
}
ACTOR Future<CoordinatorsResult::Type> changeQuorum( Database cx, Reference<IQuorumChange> change ) {
ACTOR Future<CoordinatorsResult> changeQuorum(Database cx, Reference<IQuorumChange> change) {
state Transaction tr(cx);
state int retries = 0;
state std::vector<NetworkAddress> desiredCoordinators;
@ -991,7 +1029,7 @@ ACTOR Future<CoordinatorsResult::Type> changeQuorum( Database cx, Reference<IQuo
if ( cx->getConnectionFile() && old.clusterKeyName().toString() != cx->getConnectionFile()->getConnectionString().clusterKeyName() )
return CoordinatorsResult::BAD_DATABASE_STATE; // Someone changed the "name" of the database??
state CoordinatorsResult::Type result = CoordinatorsResult::SUCCESS;
state CoordinatorsResult result = CoordinatorsResult::SUCCESS;
if(!desiredCoordinators.size()) {
std::vector<NetworkAddress> _desiredCoordinators = wait( change->getDesiredCoordinators( &tr, old.coordinators(), Reference<ClusterConnectionFile>(new ClusterConnectionFile(old)), result ) );
desiredCoordinators = _desiredCoordinators;
@ -1061,14 +1099,20 @@ ACTOR Future<CoordinatorsResult::Type> changeQuorum( Database cx, Reference<IQuo
struct SpecifiedQuorumChange : IQuorumChange {
vector<NetworkAddress> desired;
explicit SpecifiedQuorumChange( vector<NetworkAddress> const& desired ) : desired(desired) {}
virtual Future<vector<NetworkAddress>> getDesiredCoordinators( Transaction* tr, vector<NetworkAddress> oldCoordinators, Reference<ClusterConnectionFile>, CoordinatorsResult::Type& ) {
virtual Future<vector<NetworkAddress>> getDesiredCoordinators(Transaction* tr,
vector<NetworkAddress> oldCoordinators,
Reference<ClusterConnectionFile>,
CoordinatorsResult&) {
return desired;
}
};
Reference<IQuorumChange> specifiedQuorumChange(vector<NetworkAddress> const& addresses) { return Reference<IQuorumChange>(new SpecifiedQuorumChange(addresses)); }
struct NoQuorumChange : IQuorumChange {
virtual Future<vector<NetworkAddress>> getDesiredCoordinators( Transaction* tr, vector<NetworkAddress> oldCoordinators, Reference<ClusterConnectionFile>, CoordinatorsResult::Type& ) {
virtual Future<vector<NetworkAddress>> getDesiredCoordinators(Transaction* tr,
vector<NetworkAddress> oldCoordinators,
Reference<ClusterConnectionFile>,
CoordinatorsResult&) {
return oldCoordinators;
}
};
@ -1078,7 +1122,10 @@ struct NameQuorumChange : IQuorumChange {
std::string newName;
Reference<IQuorumChange> otherChange;
explicit NameQuorumChange( std::string const& newName, Reference<IQuorumChange> const& otherChange ) : newName(newName), otherChange(otherChange) {}
virtual Future<vector<NetworkAddress>> getDesiredCoordinators( Transaction* tr, vector<NetworkAddress> oldCoordinators, Reference<ClusterConnectionFile> cf, CoordinatorsResult::Type& t ) {
virtual Future<vector<NetworkAddress>> getDesiredCoordinators(Transaction* tr,
vector<NetworkAddress> oldCoordinators,
Reference<ClusterConnectionFile> cf,
CoordinatorsResult& t) {
return otherChange->getDesiredCoordinators(tr, oldCoordinators, cf, t);
}
virtual std::string getDesiredClusterKeyName() {
@ -1093,7 +1140,10 @@ struct AutoQuorumChange : IQuorumChange {
int desired;
explicit AutoQuorumChange( int desired ) : desired(desired) {}
virtual Future<vector<NetworkAddress>> getDesiredCoordinators( Transaction* tr, vector<NetworkAddress> oldCoordinators, Reference<ClusterConnectionFile> ccf, CoordinatorsResult::Type& err ) {
virtual Future<vector<NetworkAddress>> getDesiredCoordinators(Transaction* tr,
vector<NetworkAddress> oldCoordinators,
Reference<ClusterConnectionFile> ccf,
CoordinatorsResult& err) {
return getDesired( this, tr, oldCoordinators, ccf, &err );
}
@ -1145,7 +1195,10 @@ struct AutoQuorumChange : IQuorumChange {
return true; // The status quo seems fine
}
ACTOR static Future<vector<NetworkAddress>> getDesired( AutoQuorumChange* self, Transaction* tr, vector<NetworkAddress> oldCoordinators, Reference<ClusterConnectionFile> ccf, CoordinatorsResult::Type* err ) {
ACTOR static Future<vector<NetworkAddress>> getDesired(AutoQuorumChange* self, Transaction* tr,
vector<NetworkAddress> oldCoordinators,
Reference<ClusterConnectionFile> ccf,
CoordinatorsResult* err) {
state int desiredCount = self->desired;
if(desiredCount == -1) {

View File

@ -43,41 +43,35 @@ standard API and some knowledge of the contents of the system key space.
// ConfigurationResult enumerates normal outcomes of changeConfig() and various error
// conditions specific to it. changeConfig may also throw an Error to report other problems.
class ConfigurationResult {
public:
enum Type {
NO_OPTIONS_PROVIDED,
CONFLICTING_OPTIONS,
UNKNOWN_OPTION,
INCOMPLETE_CONFIGURATION,
INVALID_CONFIGURATION,
DATABASE_ALREADY_CREATED,
DATABASE_CREATED,
DATABASE_UNAVAILABLE,
STORAGE_IN_UNKNOWN_DCID,
REGION_NOT_FULLY_REPLICATED,
MULTIPLE_ACTIVE_REGIONS,
REGIONS_CHANGED,
NOT_ENOUGH_WORKERS,
REGION_REPLICATION_MISMATCH,
DCID_MISSING,
LOCKED_NOT_NEW,
SUCCESS,
};
enum class ConfigurationResult {
NO_OPTIONS_PROVIDED,
CONFLICTING_OPTIONS,
UNKNOWN_OPTION,
INCOMPLETE_CONFIGURATION,
INVALID_CONFIGURATION,
DATABASE_ALREADY_CREATED,
DATABASE_CREATED,
DATABASE_UNAVAILABLE,
STORAGE_IN_UNKNOWN_DCID,
REGION_NOT_FULLY_REPLICATED,
MULTIPLE_ACTIVE_REGIONS,
REGIONS_CHANGED,
NOT_ENOUGH_WORKERS,
REGION_REPLICATION_MISMATCH,
DCID_MISSING,
LOCKED_NOT_NEW,
SUCCESS,
};
class CoordinatorsResult {
public:
enum Type {
INVALID_NETWORK_ADDRESSES,
SAME_NETWORK_ADDRESSES,
NOT_COORDINATORS, //FIXME: not detected
DATABASE_UNREACHABLE, //FIXME: not detected
BAD_DATABASE_STATE,
COORDINATOR_UNREACHABLE,
NOT_ENOUGH_MACHINES,
SUCCESS
};
enum class CoordinatorsResult {
INVALID_NETWORK_ADDRESSES,
SAME_NETWORK_ADDRESSES,
NOT_COORDINATORS, // FIXME: not detected
DATABASE_UNREACHABLE, // FIXME: not detected
BAD_DATABASE_STATE,
COORDINATOR_UNREACHABLE,
NOT_ENOUGH_MACHINES,
SUCCESS
};
struct ConfigureAutoResult {
@ -86,7 +80,7 @@ struct ConfigureAutoResult {
int32_t machines;
std::string old_replication;
int32_t old_proxies;
int32_t old_commit_proxies;
int32_t old_grv_proxies;
int32_t old_resolvers;
int32_t old_logs;
@ -94,38 +88,46 @@ struct ConfigureAutoResult {
int32_t old_machines_with_transaction;
std::string auto_replication;
int32_t auto_proxies;
int32_t auto_commit_proxies;
int32_t auto_grv_proxies;
int32_t auto_resolvers;
int32_t auto_logs;
int32_t auto_processes_with_transaction;
int32_t auto_machines_with_transaction;
int32_t desired_proxies;
int32_t desired_commit_proxies;
int32_t desired_grv_proxies;
int32_t desired_resolvers;
int32_t desired_logs;
ConfigureAutoResult()
: processes(-1), machines(-1), old_proxies(-1), old_grv_proxies(-1), old_resolvers(-1), old_logs(-1),
old_processes_with_transaction(-1), old_machines_with_transaction(-1), auto_proxies(-1), auto_grv_proxies(-1),
auto_resolvers(-1), auto_logs(-1), auto_processes_with_transaction(-1), auto_machines_with_transaction(-1),
desired_proxies(-1), desired_grv_proxies(-1), desired_resolvers(-1), desired_logs(-1) {}
: processes(-1), machines(-1), old_commit_proxies(-1), old_grv_proxies(-1), old_resolvers(-1), old_logs(-1),
old_processes_with_transaction(-1), old_machines_with_transaction(-1), auto_commit_proxies(-1),
auto_grv_proxies(-1), auto_resolvers(-1), auto_logs(-1), auto_processes_with_transaction(-1),
auto_machines_with_transaction(-1), desired_commit_proxies(-1), desired_grv_proxies(-1), desired_resolvers(-1),
desired_logs(-1) {}
bool isValid() const { return processes != -1; }
};
ConfigurationResult::Type buildConfiguration( std::vector<StringRef> const& modeTokens, std::map<std::string, std::string>& outConf ); // Accepts a vector of configuration tokens
ConfigurationResult::Type buildConfiguration( std::string const& modeString, std::map<std::string, std::string>& outConf ); // Accepts tokens separated by spaces in a single string
ConfigurationResult buildConfiguration(
std::vector<StringRef> const& modeTokens,
std::map<std::string, std::string>& outConf); // Accepts a vector of configuration tokens
ConfigurationResult buildConfiguration(
std::string const& modeString,
std::map<std::string, std::string>& outConf); // Accepts tokens separated by spaces in a single string
bool isCompleteConfiguration( std::map<std::string, std::string> const& options );
// All versions of changeConfig apply the given set of configuration tokens to the database, and return a ConfigurationResult (or error).
Future<ConfigurationResult::Type> changeConfig( Database const& cx, std::string const& configMode, bool force ); // Accepts tokens separated by spaces in a single string
Future<ConfigurationResult> changeConfig(Database const& cx, std::string const& configMode,
bool force); // Accepts tokens separated by spaces in a single string
ConfigureAutoResult parseConfig( StatusObject const& status );
Future<ConfigurationResult::Type> changeConfig( Database const& cx, std::vector<StringRef> const& modes, Optional<ConfigureAutoResult> const& conf, bool force ); // Accepts a vector of configuration tokens
ACTOR Future<ConfigurationResult::Type> changeConfig(
Future<ConfigurationResult> changeConfig(Database const& cx, std::vector<StringRef> const& modes,
Optional<ConfigureAutoResult> const& conf,
bool force); // Accepts a vector of configuration tokens
ACTOR Future<ConfigurationResult> changeConfig(
Database cx, std::map<std::string, std::string> m,
bool force); // Accepts a full configuration in key/value format (from buildConfiguration)
@ -134,12 +136,15 @@ ACTOR Future<Void> waitForFullReplication(Database cx);
struct IQuorumChange : ReferenceCounted<IQuorumChange> {
virtual ~IQuorumChange() {}
virtual Future<vector<NetworkAddress>> getDesiredCoordinators( Transaction* tr, vector<NetworkAddress> oldCoordinators, Reference<ClusterConnectionFile>, CoordinatorsResult::Type& ) = 0;
virtual Future<vector<NetworkAddress>> getDesiredCoordinators(Transaction* tr,
vector<NetworkAddress> oldCoordinators,
Reference<ClusterConnectionFile>,
CoordinatorsResult&) = 0;
virtual std::string getDesiredClusterKeyName() { return std::string(); }
};
// Change to use the given set of coordination servers
ACTOR Future<CoordinatorsResult::Type> changeQuorum(Database cx, Reference<IQuorumChange> change);
ACTOR Future<CoordinatorsResult> changeQuorum(Database cx, Reference<IQuorumChange> change);
Reference<IQuorumChange> autoQuorumChange(int desired = -1);
Reference<IQuorumChange> noQuorumChange();
Reference<IQuorumChange> specifiedQuorumChange(vector<NetworkAddress> const&);

View File

@ -171,7 +171,7 @@ ACTOR Future<Void> metricRuleUpdater(Database cx, MetricsConfig *config, TDMetri
// Implementation of IMetricDB
class MetricDB : public IMetricDB {
public:
MetricDB(ReadYourWritesTransaction *tr = NULL) : tr(tr) {}
MetricDB(ReadYourWritesTransaction *tr = nullptr) : tr(tr) {}
~MetricDB() {}
// levelKey is the prefix for the entire level, no timestamp at the end

View File

@ -624,8 +624,8 @@ ACTOR Future<Void> getClientInfoFromLeader( Reference<AsyncVar<Optional<ClusterC
choose {
when( ClientDBInfo ni = wait( brokenPromiseToNever( knownLeader->get().get().clientInterface.openDatabase.getReply( req ) ) ) ) {
TraceEvent("MonitorLeaderForProxiesGotClientInfo", knownLeader->get().get().clientInterface.id())
.detail("MasterProxy0", ni.masterProxies.size() ? ni.masterProxies[0].id() : UID())
.detail("GrvProxy0", ni.grvProxies.size() ? ni.grvProxies[0].id() : UID())
.detail("CommitProxy0", ni.commitProxies.size() ? ni.commitProxies[0].id() : UID())
.detail("GrvProxy0", ni.grvProxies.size() ? ni.grvProxies[0].id() : UID())
.detail("ClientID", ni.id);
clientData->clientInfo->set(CachedSerialization<ClientDBInfo>(ni));
}
@ -681,24 +681,25 @@ ACTOR Future<Void> monitorLeaderForProxies( Key clusterKey, vector<NetworkAddres
}
}
void shrinkProxyList( ClientDBInfo& ni, std::vector<UID>& lastMasterProxyUIDs, std::vector<MasterProxyInterface>& lastMasterProxies,
std::vector<UID>& lastGrvProxyUIDs, std::vector<GrvProxyInterface>& lastGrvProxies) {
if(ni.masterProxies.size() > CLIENT_KNOBS->MAX_MASTER_PROXY_CONNECTIONS) {
std::vector<UID> masterProxyUIDs;
for(auto& masterProxy : ni.masterProxies) {
masterProxyUIDs.push_back(masterProxy.id());
void shrinkProxyList(ClientDBInfo& ni, std::vector<UID>& lastCommitProxyUIDs,
std::vector<CommitProxyInterface>& lastCommitProxies, std::vector<UID>& lastGrvProxyUIDs,
std::vector<GrvProxyInterface>& lastGrvProxies) {
if (ni.commitProxies.size() > CLIENT_KNOBS->MAX_COMMIT_PROXY_CONNECTIONS) {
std::vector<UID> commitProxyUIDs;
for (auto& commitProxy : ni.commitProxies) {
commitProxyUIDs.push_back(commitProxy.id());
}
if(masterProxyUIDs != lastMasterProxyUIDs) {
lastMasterProxyUIDs.swap(masterProxyUIDs);
lastMasterProxies = ni.masterProxies;
deterministicRandom()->randomShuffle(lastMasterProxies);
lastMasterProxies.resize(CLIENT_KNOBS->MAX_MASTER_PROXY_CONNECTIONS);
for(int i = 0; i < lastMasterProxies.size(); i++) {
TraceEvent("ConnectedMasterProxy").detail("MasterProxy", lastMasterProxies[i].id());
if (commitProxyUIDs != lastCommitProxyUIDs) {
lastCommitProxyUIDs.swap(commitProxyUIDs);
lastCommitProxies = ni.commitProxies;
deterministicRandom()->randomShuffle(lastCommitProxies);
lastCommitProxies.resize(CLIENT_KNOBS->MAX_COMMIT_PROXY_CONNECTIONS);
for (int i = 0; i < lastCommitProxies.size(); i++) {
TraceEvent("ConnectedCommitProxy").detail("CommitProxy", lastCommitProxies[i].id());
}
}
ni.firstProxy = ni.masterProxies[0];
ni.masterProxies = lastMasterProxies;
ni.firstCommitProxy = ni.commitProxies[0];
ni.commitProxies = lastCommitProxies;
}
if(ni.grvProxies.size() > CLIENT_KNOBS->MAX_GRV_PROXY_CONNECTIONS) {
std::vector<UID> grvProxyUIDs;
@ -719,14 +720,16 @@ void shrinkProxyList( ClientDBInfo& ni, std::vector<UID>& lastMasterProxyUIDs, s
}
// Leader is the process that will be elected by coordinators as the cluster controller
ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration( Reference<ClusterConnectionFile> connFile, Reference<AsyncVar<ClientDBInfo>> clientInfo, MonitorLeaderInfo info, Reference<ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>> supportedVersions, Key traceLogGroup) {
ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration(
Reference<ClusterConnectionFile> connFile, Reference<AsyncVar<ClientDBInfo>> clientInfo, MonitorLeaderInfo info,
Reference<ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>> supportedVersions, Key traceLogGroup) {
state ClusterConnectionString cs = info.intermediateConnFile->getConnectionString();
state vector<NetworkAddress> addrs = cs.coordinators();
state int idx = 0;
state int successIdx = 0;
state Optional<double> incorrectTime;
state std::vector<UID> lastProxyUIDs;
state std::vector<MasterProxyInterface> lastProxies;
state std::vector<UID> lastCommitProxyUIDs;
state std::vector<CommitProxyInterface> lastCommitProxies;
state std::vector<UID> lastGrvProxyUIDs;
state std::vector<GrvProxyInterface> lastGrvProxies;
@ -780,7 +783,7 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration( Reference<ClusterCo
connFile->notifyConnected();
auto& ni = rep.get().mutate();
shrinkProxyList(ni, lastProxyUIDs, lastProxies, lastGrvProxyUIDs, lastGrvProxies);
shrinkProxyList(ni, lastCommitProxyUIDs, lastCommitProxies, lastGrvProxyUIDs, lastGrvProxies);
clientInfo->set( ni );
successIdx = idx;
} else {

View File

@ -25,7 +25,7 @@
#include "fdbclient/FDBTypes.h"
#include "fdbclient/CoordinationInterface.h"
#include "fdbclient/ClusterInterface.h"
#include "fdbclient/MasterProxyInterface.h"
#include "fdbclient/CommitProxyInterface.h"
#define CLUSTER_FILE_ENV_VAR_NAME "FDB_CLUSTER_FILE"
@ -67,8 +67,9 @@ Future<Void> monitorLeaderForProxies( Value const& key, vector<NetworkAddress> c
Future<Void> monitorProxies( Reference<AsyncVar<Reference<ClusterConnectionFile>>> const& connFile, Reference<AsyncVar<ClientDBInfo>> const& clientInfo, Reference<ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>> const& supportedVersions, Key const& traceLogGroup );
void shrinkProxyList( ClientDBInfo& ni, std::vector<UID>& lastMasterProxyUIDs, std::vector<MasterProxyInterface>& lastMasterProxies,
std::vector<UID>& lastGrvProxyUIDs, std::vector<GrvProxyInterface>& lastGrvProxies);
void shrinkProxyList(ClientDBInfo& ni, std::vector<UID>& lastCommitProxyUIDs,
std::vector<CommitProxyInterface>& lastCommitProxies, std::vector<UID>& lastGrvProxyUIDs,
std::vector<GrvProxyInterface>& lastGrvProxies);
#ifndef __INTEL_COMPILER
#pragma region Implementation

View File

@ -163,7 +163,7 @@ public:
if(destroyNow) {
api->futureDestroy(f);
f = NULL;
f = nullptr;
}
return destroyNow;
@ -202,7 +202,7 @@ public:
auto sav = (DLThreadSingleAssignmentVar<T>*)param;
if(MultiVersionApi::api->callbackOnMainThread) {
onMainThreadVoid([sav](){ sav->apply(); }, NULL);
onMainThreadVoid([sav](){ sav->apply(); }, nullptr);
}
else {
sav->apply();

View File

@ -224,7 +224,7 @@ ThreadFuture<int64_t> DLTransaction::getApproximateSize() {
}
void DLTransaction::setOption(FDBTransactionOptions::Option option, Optional<StringRef> value) {
throwIfError(api->transactionSetOption(tr, option, value.present() ? value.get().begin() : NULL, value.present() ? value.get().size() : 0));
throwIfError(api->transactionSetOption(tr, option, value.present() ? value.get().begin() : nullptr, value.present() ? value.get().size() : 0));
}
ThreadFuture<Void> DLTransaction::onError(Error const& e) {
@ -262,14 +262,14 @@ Reference<ITransaction> DLDatabase::createTransaction() {
}
void DLDatabase::setOption(FDBDatabaseOptions::Option option, Optional<StringRef> value) {
throwIfError(api->databaseSetOption(db, option, value.present() ? value.get().begin() : NULL, value.present() ? value.get().size() : 0));
throwIfError(api->databaseSetOption(db, option, value.present() ? value.get().begin() : nullptr, value.present() ? value.get().size() : 0));
}
// DLApi
template<class T>
void loadClientFunction(T *fp, void *lib, std::string libPath, const char *functionName, bool requireFunction = true) {
*(void**)(fp) = loadFunction(lib, functionName);
if(*fp == NULL && requireFunction) {
if(*fp == nullptr && requireFunction) {
TraceEvent(SevError, "ErrorLoadingFunction").detail("LibraryPath", libPath).detail("Function", functionName);
throw platform_error();
}
@ -283,7 +283,7 @@ void DLApi::init() {
}
void* lib = loadLibrary(fdbCPath.c_str());
if(lib == NULL) {
if(lib == nullptr) {
TraceEvent(SevError, "ErrorLoadingExternalClientLibrary").detail("LibraryPath", fdbCPath);
throw platform_error();
}
@ -347,7 +347,7 @@ void DLApi::selectApiVersion(int apiVersion) {
init();
throwIfError(api->selectApiVersion(apiVersion, headerVersion));
throwIfError(api->setNetworkOption(FDBNetworkOptions::EXTERNAL_CLIENT, NULL, 0));
throwIfError(api->setNetworkOption(FDBNetworkOptions::EXTERNAL_CLIENT, nullptr, 0));
}
const char* DLApi::getClientVersion() {
@ -359,7 +359,7 @@ const char* DLApi::getClientVersion() {
}
void DLApi::setNetworkOption(FDBNetworkOptions::Option option, Optional<StringRef> value) {
throwIfError(api->setNetworkOption(option, value.present() ? value.get().begin() : NULL, value.present() ? value.get().size() : 0));
throwIfError(api->setNetworkOption(option, value.present() ? value.get().begin() : nullptr, value.present() ? value.get().size() : 0));
}
void DLApi::setupNetwork() {
@ -786,7 +786,7 @@ void MultiVersionDatabase::Connector::connect() {
else {
delref();
}
}, NULL);
}, nullptr);
}
// Only called from main thread
@ -805,7 +805,7 @@ void MultiVersionDatabase::Connector::fire(const Void &unused, int& userParam) {
dbState->stateChanged();
}
delref();
}, NULL);
}, nullptr);
}
void MultiVersionDatabase::Connector::error(const Error& e, int& userParam) {
@ -820,7 +820,7 @@ void MultiVersionDatabase::Connector::error(const Error& e, int& userParam) {
}
MultiVersionDatabase::DatabaseState::DatabaseState()
: dbVar(new ThreadSafeAsyncVar<Reference<IDatabase>>(Reference<IDatabase>(NULL))), currentClientIndex(-1) {}
: dbVar(new ThreadSafeAsyncVar<Reference<IDatabase>>(Reference<IDatabase>(nullptr))), currentClientIndex(-1) {}
// Only called from main thread
void MultiVersionDatabase::DatabaseState::stateChanged() {
@ -898,7 +898,7 @@ void MultiVersionDatabase::DatabaseState::cancelConnections() {
connectionAttempts.clear();
clients.clear();
delref();
}, NULL);
}, nullptr);
}
// MultiVersionApi
@ -1043,7 +1043,7 @@ void MultiVersionApi::setSupportedClientVersions(Standalone<StringRef> versions)
// This option must be set on the main thread because it modifes structures that can be used concurrently by the main thread
onMainThreadVoid([this, versions](){
localClient->api->setNetworkOption(FDBNetworkOptions::SUPPORTED_CLIENT_VERSIONS, versions);
}, NULL);
}, nullptr);
if(!bypassMultiClientApi) {
runOnExternalClients([versions](Reference<ClientInfo> client) {
@ -1654,7 +1654,7 @@ THREAD_FUNC runSingleAssignmentVarTest(void *arg) {
onMainThreadVoid([done](){
*done = true;
}, NULL);
}, nullptr);
}
catch(Error &e) {
printf("Caught error in test: %s\n", e.name());

View File

@ -286,7 +286,7 @@ struct ClientInfo : ThreadSafeReferenceCounted<ClientInfo> {
bool failed;
std::vector<std::pair<void (*)(void*), void*>> threadCompletionHooks;
ClientInfo() : protocolVersion(0), api(NULL), external(false), failed(true) {}
ClientInfo() : protocolVersion(0), api(nullptr), external(false), failed(true) {}
ClientInfo(IClientApi *api) : protocolVersion(0), api(api), libPath("internal"), external(false), failed(false) {}
ClientInfo(IClientApi *api, std::string libPath) : protocolVersion(0), api(api), libPath(libPath), external(true), failed(false) {}

View File

@ -62,7 +62,7 @@ public:
auto e = ptr->end(); // e points to the end of the current blob
if (e == blob->data.end()) { // the condition sanity checks e is at the end of current blob
blob = blob->next;
e = blob ? blob->data.begin() : NULL;
e = blob ? blob->data.begin() : nullptr;
}
ptr = (Header*)e;
decode();
@ -70,7 +70,7 @@ public:
bool operator == ( Iterator const& i ) const { return ptr == i.ptr; }
bool operator != ( Iterator const& i) const { return ptr != i.ptr; }
explicit operator bool() const { return blob!=NULL; }
explicit operator bool() const { return blob!=nullptr; }
typedef std::forward_iterator_tag iterator_category;
typedef const MutationRef value_type;
@ -79,7 +79,7 @@ public:
typedef const MutationRef& reference;
Iterator( Blob* blob, const Header* ptr ) : blob(blob), ptr(ptr) { decode(); }
Iterator() : blob(NULL), ptr(NULL) { }
Iterator() : blob(nullptr), ptr(nullptr) { }
private:
friend struct MutationListRef;
const Blob* blob; // The blob containing the indicated mutation
@ -95,16 +95,16 @@ public:
}
};
MutationListRef() : blob_begin(NULL), blob_end(NULL), totalBytes(0) {
MutationListRef() : blob_begin(nullptr), blob_end(nullptr), totalBytes(0) {
}
MutationListRef( Arena& ar, MutationListRef const& r ) : blob_begin(NULL), blob_end(NULL), totalBytes(0) {
MutationListRef( Arena& ar, MutationListRef const& r ) : blob_begin(nullptr), blob_end(nullptr), totalBytes(0) {
append_deep(ar, r.begin(), r.end());
}
Iterator begin() const {
if (blob_begin) return Iterator(blob_begin, (Header*)blob_begin->data.begin());
return Iterator(NULL, NULL);
return Iterator(nullptr, nullptr);
}
Iterator end() const { return Iterator(NULL, NULL); }
Iterator end() const { return Iterator(nullptr, nullptr); }
size_t expectedSize() const { return sizeof(Blob) + totalBytes; }
int totalSize() const { return totalBytes; }
@ -146,12 +146,13 @@ public:
if(totalBytes > 0) {
blob_begin = blob_end = new (ar.arena()) Blob;
blob_begin->next = NULL;
blob_begin->next = nullptr;
blob_begin->data = StringRef((const uint8_t*)ar.arenaRead(totalBytes), totalBytes); // Zero-copy read when deserializing from an ArenaReader
}
}
//FIXME: this is re-implemented on the master proxy to include a yield, any changes to this function should also done there
// FIXME: this is re-implemented on the commit proxy to include a yield, any changes to this function should also
// done there
template <class Ar>
void serialize_save( Ar& ar ) const {
serializer(ar, totalBytes);
@ -180,7 +181,7 @@ private:
}
blob_end->data = StringRef(b, bytes);
blob_end->next = NULL;
blob_end->next = nullptr;
return b;
}

View File

@ -40,7 +40,7 @@
#include "fdbclient/KeyRangeMap.h"
#include "fdbclient/Knobs.h"
#include "fdbclient/ManagementAPI.actor.h"
#include "fdbclient/MasterProxyInterface.h"
#include "fdbclient/CommitProxyInterface.h"
#include "fdbclient/MonitorLeader.h"
#include "fdbclient/MutationList.h"
#include "fdbclient/ReadYourWrites.h"
@ -95,7 +95,7 @@ Future<REPLY_TYPE(Request)> loadBalance(
DatabaseContext* ctx, const Reference<LocationInfo> alternatives, RequestStream<Request> Interface::*channel,
const Request& request = Request(), TaskPriority taskID = TaskPriority::DefaultPromiseEndpoint,
bool atMostOnce = false, // if true, throws request_maybe_delivered() instead of retrying automatically
QueueModel* model = NULL) {
QueueModel* model = nullptr) {
if (alternatives->hasCaches) {
return loadBalance(alternatives->locations(), channel, request, taskID, atMostOnce, model);
}
@ -147,7 +147,7 @@ Reference<StorageServerInfo> StorageServerInfo::getInterface( DatabaseContext *c
}
void StorageServerInfo::notifyContextDestroyed() {
cx = NULL;
cx = nullptr;
}
StorageServerInfo::~StorageServerInfo() {
@ -155,7 +155,7 @@ StorageServerInfo::~StorageServerInfo() {
auto it = cx->server_interf.find( interf.id() );
if( it != cx->server_interf.end() )
cx->server_interf.erase( it );
cx = NULL;
cx = nullptr;
}
}
@ -189,6 +189,12 @@ std::string printable( const KeyRangeRef& range ) {
return printable(range.begin) + " - " + printable(range.end);
}
std::string printable(const VectorRef<KeyRangeRef>& val) {
std::string s;
for (int i = 0; i < val.size(); i++) s = s + printable(val[i]) + " ";
return s;
}
int unhex( char c ) {
if (c >= '0' && c <= '9')
return c-'0';
@ -484,15 +490,15 @@ ACTOR static Future<Void> clientStatusUpdateActor(DatabaseContext *cx) {
}
ACTOR static Future<Void> monitorProxiesChange(Reference<AsyncVar<ClientDBInfo>> clientDBInfo, AsyncTrigger *triggerVar) {
state vector< MasterProxyInterface > curProxies;
state vector<CommitProxyInterface> curCommitProxies;
state vector< GrvProxyInterface > curGrvProxies;
curProxies = clientDBInfo->get().masterProxies;
curCommitProxies = clientDBInfo->get().commitProxies;
curGrvProxies = clientDBInfo->get().grvProxies;
loop{
wait(clientDBInfo->onChange());
if (clientDBInfo->get().masterProxies != curProxies || clientDBInfo->get().grvProxies != curGrvProxies) {
curProxies = clientDBInfo->get().masterProxies;
if (clientDBInfo->get().commitProxies != curCommitProxies || clientDBInfo->get().grvProxies != curGrvProxies) {
curCommitProxies = clientDBInfo->get().commitProxies;
curGrvProxies = clientDBInfo->get().grvProxies;
triggerVar->trigger();
}
@ -881,7 +887,7 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionF
transactionsExpensiveClearCostEstCount("ExpensiveClearCostEstCount", cc),
specialKeySpace(std::make_unique<SpecialKeySpace>(specialKeys.begin, specialKeys.end, /* test */ false)) {
dbId = deterministicRandom()->randomUniqueID();
connected = (clientInfo->get().masterProxies.size() && clientInfo->get().grvProxies.size())
connected = (clientInfo->get().commitProxies.size() && clientInfo->get().grvProxies.size())
? Void()
: clientInfo->onChange();
@ -930,6 +936,16 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionF
std::make_unique<ExclusionInProgressRangeImpl>(
KeyRangeRef(LiteralStringRef("inProgressExclusion/"), LiteralStringRef("inProgressExclusion0"))
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)));
registerSpecialKeySpaceModule(
SpecialKeySpace::MODULE::CONFIGURATION, SpecialKeySpace::IMPLTYPE::READWRITE,
std::make_unique<ProcessClassRangeImpl>(
KeyRangeRef(LiteralStringRef("process/class_type/"), LiteralStringRef("process/class_type0"))
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::CONFIGURATION).begin)));
registerSpecialKeySpaceModule(
SpecialKeySpace::MODULE::CONFIGURATION, SpecialKeySpace::IMPLTYPE::READONLY,
std::make_unique<ProcessClassSourceRangeImpl>(
KeyRangeRef(LiteralStringRef("process/class_source/"), LiteralStringRef("process/class_source0"))
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::CONFIGURATION).begin)));
}
if (apiVersionAtLeast(630)) {
registerSpecialKeySpaceModule(SpecialKeySpace::MODULE::TRANSACTION, SpecialKeySpace::IMPLTYPE::READONLY,
@ -1164,9 +1180,9 @@ void DatabaseContext::setOption( FDBDatabaseOptions::Option option, Optional<Str
break;
case FDBDatabaseOptions::MACHINE_ID:
clientLocality = LocalityData( clientLocality.processId(), value.present() ? Standalone<StringRef>(value.get()) : Optional<Standalone<StringRef>>(), clientLocality.machineId(), clientLocality.dcId() );
if( clientInfo->get().masterProxies.size() )
masterProxies = Reference<ProxyInfo>( new ProxyInfo( clientInfo->get().masterProxies) );
if( clientInfo->get().grvProxies.size() )
if (clientInfo->get().commitProxies.size())
commitProxies = Reference<CommitProxyInfo>(new CommitProxyInfo(clientInfo->get().commitProxies));
if( clientInfo->get().grvProxies.size() )
grvProxies = Reference<GrvProxyInfo>( new GrvProxyInfo( clientInfo->get().grvProxies ) );
server_interf.clear();
locationCache.insert( allKeys, Reference<LocationInfo>() );
@ -1176,9 +1192,9 @@ void DatabaseContext::setOption( FDBDatabaseOptions::Option option, Optional<Str
break;
case FDBDatabaseOptions::DATACENTER_ID:
clientLocality = LocalityData(clientLocality.processId(), clientLocality.zoneId(), clientLocality.machineId(), value.present() ? Standalone<StringRef>(value.get()) : Optional<Standalone<StringRef>>());
if( clientInfo->get().masterProxies.size() )
masterProxies = Reference<ProxyInfo>( new ProxyInfo( clientInfo->get().masterProxies));
if( clientInfo->get().grvProxies.size() )
if (clientInfo->get().commitProxies.size())
commitProxies = Reference<CommitProxyInfo>(new CommitProxyInfo(clientInfo->get().commitProxies));
if( clientInfo->get().grvProxies.size() )
grvProxies = Reference<GrvProxyInfo>( new GrvProxyInfo( clientInfo->get().grvProxies ));
server_interf.clear();
locationCache.insert( allKeys, Reference<LocationInfo>() );
@ -1220,13 +1236,13 @@ ACTOR static Future<Void> switchConnectionFileImpl(Reference<ClusterConnectionFi
.detail("ConnectionString", connFile->getConnectionString().toString());
// Reset state from former cluster.
self->masterProxies.clear();
self->commitProxies.clear();
self->grvProxies.clear();
self->minAcceptableReadVersion = std::numeric_limits<Version>::max();
self->invalidateCache(allKeys);
auto clearedClientInfo = self->clientInfo->get();
clearedClientInfo.masterProxies.clear();
clearedClientInfo.commitProxies.clear();
clearedClientInfo.grvProxies.clear();
clearedClientInfo.id = deterministicRandom()->randomUniqueID();
self->clientInfo->set(clearedClientInfo);
@ -1307,7 +1323,7 @@ Database Database::createDatabase( Reference<ClusterConnectionFile> connFile, in
.detail("PackageName", FDB_VT_PACKAGE_NAME)
.detail("ClusterFile", connFile->getFilename().c_str())
.detail("ConnectionString", connFile->getConnectionString().toString())
.detailf("ActualTime", "%lld", DEBUG_DETERMINISM ? 0 : time(NULL))
.detailf("ActualTime", "%lld", DEBUG_DETERMINISM ? 0 : time(nullptr))
.detail("ApiVersion", apiVersion)
.detailf("ImageOffset", "%p", platform::getImageOffset())
.trackLatest("ClientStart");
@ -1561,29 +1577,29 @@ void stopNetwork() {
void DatabaseContext::updateProxies() {
if (proxiesLastChange == clientInfo->get().id) return;
proxiesLastChange = clientInfo->get().id;
masterProxies.clear();
commitProxies.clear();
grvProxies.clear();
bool masterProxyProvisional = false, grvProxyProvisional = false;
if (clientInfo->get().masterProxies.size()) {
masterProxies = Reference<ProxyInfo>(new ProxyInfo(clientInfo->get().masterProxies));
masterProxyProvisional = clientInfo->get().masterProxies[0].provisional;
bool commitProxyProvisional = false, grvProxyProvisional = false;
if (clientInfo->get().commitProxies.size()) {
commitProxies = Reference<CommitProxyInfo>(new CommitProxyInfo(clientInfo->get().commitProxies));
commitProxyProvisional = clientInfo->get().commitProxies[0].provisional;
}
if (clientInfo->get().grvProxies.size()) {
grvProxies = Reference<GrvProxyInfo>(new GrvProxyInfo(clientInfo->get().grvProxies));
grvProxyProvisional = clientInfo->get().grvProxies[0].provisional;
}
if (clientInfo->get().masterProxies.size() && clientInfo->get().grvProxies.size()) {
ASSERT(masterProxyProvisional == grvProxyProvisional);
proxyProvisional = masterProxyProvisional;
if (clientInfo->get().commitProxies.size() && clientInfo->get().grvProxies.size()) {
ASSERT(commitProxyProvisional == grvProxyProvisional);
proxyProvisional = commitProxyProvisional;
}
}
Reference<ProxyInfo> DatabaseContext::getMasterProxies(bool useProvisionalProxies) {
Reference<CommitProxyInfo> DatabaseContext::getCommitProxies(bool useProvisionalProxies) {
updateProxies();
if (proxyProvisional && !useProvisionalProxies) {
return Reference<ProxyInfo>();
return Reference<CommitProxyInfo>();
}
return masterProxies;
return commitProxies;
}
Reference<GrvProxyInfo> DatabaseContext::getGrvProxies(bool useProvisionalProxies) {
@ -1594,19 +1610,19 @@ Reference<GrvProxyInfo> DatabaseContext::getGrvProxies(bool useProvisionalProxie
return grvProxies;
}
//Actor which will wait until the MultiInterface<MasterProxyInterface> returned by the DatabaseContext cx is not NULL
ACTOR Future<Reference<ProxyInfo>> getMasterProxiesFuture(DatabaseContext *cx, bool useProvisionalProxies) {
// Actor which will wait until the MultiInterface<CommitProxyInterface> returned by the DatabaseContext cx is not nullptr
ACTOR Future<Reference<CommitProxyInfo>> getCommitProxiesFuture(DatabaseContext* cx, bool useProvisionalProxies) {
loop{
Reference<ProxyInfo> proxies = cx->getMasterProxies(useProvisionalProxies);
if (proxies)
return proxies;
Reference<CommitProxyInfo> commitProxies = cx->getCommitProxies(useProvisionalProxies);
if (commitProxies)
return commitProxies;
wait( cx->onProxiesChanged() );
}
}
//Returns a future which will not be set until the ProxyInfo of this DatabaseContext is not NULL
Future<Reference<ProxyInfo>> DatabaseContext::getMasterProxiesFuture(bool useProvisionalProxies) {
return ::getMasterProxiesFuture(this, useProvisionalProxies);
// Returns a future which will not be set until the CommitProxyInfo of this DatabaseContext is not nullptr
Future<Reference<CommitProxyInfo>> DatabaseContext::getCommitProxiesFuture(bool useProvisionalProxies) {
return ::getCommitProxiesFuture(this, useProvisionalProxies);
}
void GetRangeLimits::decrement( VectorRef<KeyValueRef> const& data ) {
@ -1733,8 +1749,8 @@ ACTOR Future<pair<KeyRange, Reference<LocationInfo>>> getKeyLocation_internal(Da
++cx->transactionKeyServerLocationRequests;
choose {
when (wait(cx->onProxiesChanged())) {}
when (GetKeyServerLocationsReply rep = wait(basicLoadBalance(
cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::getKeyServersLocations,
when(GetKeyServerLocationsReply rep = wait(basicLoadBalance(
cx->getCommitProxies(info.useProvisionalProxies), &CommitProxyInterface::getKeyServersLocations,
GetKeyServerLocationsRequest(span.context, key, Optional<KeyRef>(), 100, isBackward, key.arena()),
TaskPriority::DefaultPromiseEndpoint))) {
++cx->transactionKeyServerLocationRequestsCompleted;
@ -1782,8 +1798,8 @@ ACTOR Future<vector<pair<KeyRange, Reference<LocationInfo>>>> getKeyRangeLocatio
++cx->transactionKeyServerLocationRequests;
choose {
when ( wait( cx->onProxiesChanged() ) ) {}
when ( GetKeyServerLocationsReply _rep = wait(basicLoadBalance(
cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::getKeyServersLocations,
when(GetKeyServerLocationsReply _rep = wait(basicLoadBalance(
cx->getCommitProxies(info.useProvisionalProxies), &CommitProxyInterface::getKeyServersLocations,
GetKeyServerLocationsRequest(span.context, keys.begin, keys.end, limit, reverse, keys.arena()),
TaskPriority::DefaultPromiseEndpoint))) {
++cx->transactionKeyServerLocationRequestsCompleted;
@ -2512,7 +2528,7 @@ ACTOR Future<Standalone<RangeResultRef>> getRange( Database cx, Reference<Transa
GetKeyValuesReply _rep =
wait(loadBalance(cx.getPtr(), beginServer.second, &StorageServerInterface::getKeyValues, req,
TaskPriority::DefaultPromiseEndpoint, false,
cx->enableLocalityLoadBalance ? &cx->queueModel : NULL));
cx->enableLocalityLoadBalance ? &cx->queueModel : nullptr));
rep = _rep;
++cx->transactionPhysicalReadsCompleted;
} catch(Error&) {
@ -3450,14 +3466,16 @@ ACTOR static Future<Void> tryCommit( Database cx, Reference<TransactionLogInfo>
req.debugID = commitID;
state Future<CommitID> reply;
if (options.commitOnFirstProxy) {
if(cx->clientInfo->get().firstProxy.present()) {
reply = throwErrorOr ( brokenPromiseToMaybeDelivered ( cx->clientInfo->get().firstProxy.get().commit.tryGetReply(req) ) );
if (cx->clientInfo->get().firstCommitProxy.present()) {
reply = throwErrorOr(brokenPromiseToMaybeDelivered(
cx->clientInfo->get().firstCommitProxy.get().commit.tryGetReply(req)));
} else {
const std::vector<MasterProxyInterface>& proxies = cx->clientInfo->get().masterProxies;
const std::vector<CommitProxyInterface>& proxies = cx->clientInfo->get().commitProxies;
reply = proxies.size() ? throwErrorOr ( brokenPromiseToMaybeDelivered ( proxies[0].commit.tryGetReply(req) ) ) : Never();
}
} else {
reply = basicLoadBalance( cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::commit, req, TaskPriority::DefaultPromiseEndpoint, true );
reply = basicLoadBalance(cx->getCommitProxies(info.useProvisionalProxies), &CommitProxyInterface::commit,
req, TaskPriority::DefaultPromiseEndpoint, true);
}
choose {
@ -3531,8 +3549,9 @@ ACTOR static Future<Void> tryCommit( Database cx, Reference<TransactionLogInfo>
// We don't know if the commit happened, and it might even still be in flight.
if (!options.causalWriteRisky) {
// Make sure it's not still in flight, either by ensuring the master we submitted to is dead, or the version we submitted with is dead, or by committing a conflicting transaction successfully
//if ( cx->getMasterProxies()->masterGeneration <= originalMasterGeneration )
// Make sure it's not still in flight, either by ensuring the master we submitted to is dead, or the
// version we submitted with is dead, or by committing a conflicting transaction successfully
// if ( cx->getCommitProxies()->masterGeneration <= originalMasterGeneration )
// To ensure the original request is not in flight, we need a key range which intersects its read conflict ranges
// We pick a key range which also intersects its write conflict ranges, since that avoids potentially creating conflicts where there otherwise would be none
@ -3879,12 +3898,14 @@ ACTOR Future<GetReadVersionReply> getConsistentReadVersion(SpanID parentSpan, Da
TransactionPriority priority, uint32_t flags,
TransactionTagMap<uint32_t> tags, Optional<UID> debugID) {
state Span span("NAPI:getConsistentReadVersion"_loc, parentSpan);
try {
++cx->transactionReadVersionBatches;
if( debugID.present() )
g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), "NativeAPI.getConsistentReadVersion.Before");
loop {
++cx->transactionReadVersionBatches;
if( debugID.present() )
g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), "NativeAPI.getConsistentReadVersion.Before");
loop {
try {
state GetReadVersionRequest req( span.context, transactionCount, priority, flags, tags, debugID );
choose {
when ( wait( cx->onProxiesChanged() ) ) {}
when ( GetReadVersionReply v = wait( basicLoadBalance( cx->getGrvProxies(flags & GetReadVersionRequest::FLAG_USE_PROVISIONAL_PROXIES), &GrvProxyInterface::getConsistentReadVersion, req, cx->taskID ) ) ) {
@ -3913,12 +3934,17 @@ ACTOR Future<GetReadVersionReply> getConsistentReadVersion(SpanID parentSpan, Da
return v;
}
}
} catch (Error& e) {
if (e.code() != error_code_broken_promise && e.code() != error_code_batch_transaction_throttled)
TraceEvent(SevError, "GetConsistentReadVersionError").error(e);
if(e.code() == error_code_batch_transaction_throttled && !cx->apiVersionAtLeast(630)) {
wait(delayJittered(5.0));
} else {
throw;
}
}
} catch (Error& e) {
if (e.code() != error_code_broken_promise && e.code() != error_code_batch_transaction_throttled)
TraceEvent(SevError, "GetConsistentReadVersionError").error(e);
throw;
}
}
ACTOR Future<Void> readVersionBatcher( DatabaseContext *cx, FutureStream<DatabaseContext::VersionRequest> versionStream, TransactionPriority priority, uint32_t flags ) {
@ -4433,8 +4459,8 @@ ACTOR Future<Standalone<VectorRef<DDMetricsRef>>> waitDataDistributionMetricsLis
choose {
when(wait(cx->onProxiesChanged())) {}
when(ErrorOr<GetDDMetricsReply> rep =
wait(errorOr(basicLoadBalance(cx->getMasterProxies(false), &MasterProxyInterface::getDDMetrics,
GetDDMetricsRequest(keys, shardLimit))))) {
wait(errorOr(basicLoadBalance(cx->getCommitProxies(false), &CommitProxyInterface::getDDMetrics,
GetDDMetricsRequest(keys, shardLimit))))) {
if (rep.isError()) {
throw rep.getError();
}
@ -4539,7 +4565,9 @@ ACTOR Future<Void> snapCreate(Database cx, Standalone<StringRef> snapCmd, UID sn
loop {
choose {
when(wait(cx->onProxiesChanged())) {}
when(wait(basicLoadBalance(cx->getMasterProxies(false), &MasterProxyInterface::proxySnapReq, ProxySnapRequest(snapCmd, snapUID, snapUID), cx->taskID, true /*atmostOnce*/ ))) {
when(wait(basicLoadBalance(cx->getCommitProxies(false), &CommitProxyInterface::proxySnapReq,
ProxySnapRequest(snapCmd, snapUID, snapUID), cx->taskID,
true /*atmostOnce*/))) {
TraceEvent("SnapCreateExit")
.detail("SnapCmd", snapCmd.toString())
.detail("UID", snapUID);
@ -4567,8 +4595,8 @@ ACTOR Future<bool> checkSafeExclusions(Database cx, vector<AddressExclusion> exc
choose {
when(wait(cx->onProxiesChanged())) {}
when(ExclusionSafetyCheckReply _ddCheck =
wait(basicLoadBalance(cx->getMasterProxies(false), &MasterProxyInterface::exclusionSafetyCheckReq,
req, cx->taskID))) {
wait(basicLoadBalance(cx->getCommitProxies(false),
&CommitProxyInterface::exclusionSafetyCheckReq, req, cx->taskID))) {
ddCheck = _ddCheck.safe;
break;
}

View File

@ -30,7 +30,7 @@
#include "flow/flow.h"
#include "flow/TDMetric.actor.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/MasterProxyInterface.h"
#include "fdbclient/CommitProxyInterface.h"
#include "fdbclient/FDBOptions.g.h"
#include "fdbclient/CoordinationInterface.h"
#include "fdbclient/ClusterInterface.h"

View File

@ -1338,7 +1338,7 @@ Future< Standalone<RangeResultRef> > ReadYourWritesTransaction::getRange(
if(begin.getKey() > maxKey || end.getKey() > maxKey)
return key_outside_legal_range();
//This optimization prevents NULL operations from being added to the conflict range
//This optimization prevents nullptr operations from being added to the conflict range
if( limits.isReached() ) {
TEST(true); // RYW range read limit 0
return Standalone<RangeResultRef>();
@ -2053,9 +2053,6 @@ void ReadYourWritesTransaction::setOptionImpl( FDBTransactionOptions::Option opt
case FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES:
validateOptionValue(value, false);
options.specialKeySpaceChangeConfiguration = true;
// By default, it allows to read system keys
// More options will be implicitly enabled if needed when doing set or clear
options.readSystemKeys = true;
break;
default:
break;

View File

@ -54,6 +54,7 @@ struct RestoreSysInfo;
struct RestoreApplierInterface;
struct RestoreFinishRequest;
struct RestoreSamplesRequest;
struct RestoreUpdateRateRequest;
// RestoreSysInfo includes information each (type of) restore roles should know.
// At this moment, it only include appliers. We keep the name for future extension.
@ -174,6 +175,7 @@ struct RestoreApplierInterface : RestoreRoleInterface {
RequestStream<RestoreVersionBatchRequest> initVersionBatch;
RequestStream<RestoreSimpleRequest> collectRestoreRoleInterfaces;
RequestStream<RestoreFinishRequest> finishRestore;
RequestStream<RestoreUpdateRateRequest> updateRate;
bool operator==(RestoreWorkerInterface const& r) const { return id() == r.id(); }
bool operator!=(RestoreWorkerInterface const& r) const { return id() != r.id(); }
@ -193,12 +195,13 @@ struct RestoreApplierInterface : RestoreRoleInterface {
initVersionBatch.getEndpoint(TaskPriority::LoadBalancedEndpoint);
collectRestoreRoleInterfaces.getEndpoint(TaskPriority::LoadBalancedEndpoint);
finishRestore.getEndpoint(TaskPriority::LoadBalancedEndpoint);
updateRate.getEndpoint(TaskPriority::LoadBalancedEndpoint);
}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, *(RestoreRoleInterface*)this, heartbeat, sendMutationVector, applyToDB, initVersionBatch,
collectRestoreRoleInterfaces, finishRestore);
collectRestoreRoleInterfaces, finishRestore, updateRate);
}
std::string toString() const { return nodeID.toString(); }
@ -616,6 +619,50 @@ struct RestoreFinishRequest : TimedRequest {
}
};
struct RestoreUpdateRateReply : TimedRequest {
constexpr static FileIdentifier file_identifier = 13018414;
UID id;
double remainMB; // remaining data in MB to write to DB;
RestoreUpdateRateReply() = default;
explicit RestoreUpdateRateReply(UID id, double remainMB) : id(id), remainMB(remainMB) {}
std::string toString() const {
std::stringstream ss;
ss << "RestoreUpdateRateReply NodeID:" << id.toString() << " remainMB:" << remainMB;
return ss.str();
}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, id, remainMB);
}
};
struct RestoreUpdateRateRequest : TimedRequest {
constexpr static FileIdentifier file_identifier = 13018415;
int batchIndex;
double writeMB;
ReplyPromise<RestoreUpdateRateReply> reply;
RestoreUpdateRateRequest() = default;
explicit RestoreUpdateRateRequest(int batchIndex, double writeMB) : batchIndex(batchIndex), writeMB(writeMB) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, batchIndex, writeMB, reply);
}
std::string toString() const {
std::stringstream ss;
ss << "RestoreUpdateRateRequest batchIndex:" << batchIndex << " writeMB:" << writeMB;
return ss.str();
}
};
struct RestoreRequest {
constexpr static FileIdentifier file_identifier = 16035338;

View File

@ -47,7 +47,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
"storage",
"transaction",
"resolution",
"proxy",
"commit_proxy",
"grv_proxy",
"master",
"test",
@ -84,7 +84,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
"role":{
"$enum":[
"master",
"proxy",
"commit_proxy",
"grv_proxy",
"log",
"storage",
@ -191,6 +191,13 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
"estimated_cost":{
"hz": 0.0
}
},
"busiest_write_tag":{
"tag": "",
"fractional_cost": 0.0,
"estimated_cost":{
"hz": 0.0
}
}
}
],
@ -271,15 +278,20 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
"run_loop_busy":0.2
}
},
"old_logs":[
"logs":[
{
"logs":[
"log_interfaces":[
{
"id":"7f8d623d0cb9966e",
"healthy":true,
"address":"1.2.3.4:1234"
}
],
"epoch":1,
"current":false,
"begin_version":23,
"end_version":112315141,
"possibly_losing_data":true,
"log_replication_factor":3,
"log_write_anti_quorum":0,
"log_fault_tolerance":2,
@ -346,15 +358,11 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
"auto" : {
"busy_read" : 0,
"busy_write" : 0,
"count" : 0
"count" : 0,
"recommended_only": 0
},
"manual" : {
"count" : 0
},
"recommend" : {
"busy_read" : 0,
"busy_write" : 0,
"count" : 0
}
},
"limiting_queue_bytes_storage_server":0,
@ -483,7 +491,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
R"statusSchema(
"recovery_state":{
"required_resolvers":1,
"required_proxies":1,
"required_commit_proxies":1,
"required_grv_proxies":1,
"name":{
"$enum":[
@ -672,11 +680,11 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
"address":"10.0.4.1"
}
],
"auto_proxies":3,
"auto_commit_proxies":3,
"auto_grv_proxies":1,
"auto_resolvers":1,
"auto_logs":3,
"proxies":5,
"commit_proxies":5,
"grv_proxies":1,
"backup_worker_enabled":1
},
@ -876,11 +884,11 @@ const KeyRef JSONSchemas::clusterConfigurationSchema = LiteralStringRef(R"config
"ssd-2",
"memory"
]},
"auto_proxies":3,
"auto_commit_proxies":3,
"auto_grv_proxies":1,
"auto_resolvers":1,
"auto_logs":3,
"proxies":5
"commit_proxies":5,
"grv_proxies":1
})configSchema");

View File

@ -36,7 +36,9 @@ std::unordered_map<SpecialKeySpace::MODULE, KeyRange> SpecialKeySpace::moduleToB
KeyRangeRef(LiteralStringRef("\xff\xff/metrics/"), LiteralStringRef("\xff\xff/metrics0")) },
{ SpecialKeySpace::MODULE::MANAGEMENT,
KeyRangeRef(LiteralStringRef("\xff\xff/management/"), LiteralStringRef("\xff\xff/management0")) },
{ SpecialKeySpace::MODULE::ERRORMSG, singleKeyRange(LiteralStringRef("\xff\xff/error_message")) }
{ SpecialKeySpace::MODULE::ERRORMSG, singleKeyRange(LiteralStringRef("\xff\xff/error_message")) },
{ SpecialKeySpace::MODULE::CONFIGURATION,
KeyRangeRef(LiteralStringRef("\xff\xff/configuration/"), LiteralStringRef("\xff\xff/configuration0")) }
};
std::unordered_map<std::string, KeyRange> SpecialKeySpace::managementApiCommandToRange = {
@ -48,6 +50,9 @@ std::unordered_map<std::string, KeyRange> SpecialKeySpace::managementApiCommandT
std::set<std::string> SpecialKeySpace::options = { "excluded/force", "failed/force" };
Standalone<RangeResultRef> rywGetRange(ReadYourWritesTransaction* ryw, const KeyRangeRef& kr,
const Standalone<RangeResultRef>& res);
// This function will move the given KeySelector as far as possible to the standard form:
// orEqual == false && offset == 1 (Standard form)
// If the corresponding key is not in the underlying key range, it will move over the range
@ -456,6 +461,24 @@ Future<Void> SpecialKeySpace::commit(ReadYourWritesTransaction* ryw) {
return commitActor(this, ryw);
}
SKSCTestImpl::SKSCTestImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
Future<Standalone<RangeResultRef>> SKSCTestImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const {
ASSERT(range.contains(kr));
auto resultFuture = ryw->getRange(kr, CLIENT_KNOBS->TOO_MANY);
// all keys are written to RYW, since GRV is set, the read should happen locally
ASSERT(resultFuture.isReady());
auto result = resultFuture.getValue();
ASSERT(!result.more && result.size() < CLIENT_KNOBS->TOO_MANY);
auto kvs = resultFuture.getValue();
return rywGetRange(ryw, kr, kvs);
}
Future<Optional<std::string>> SKSCTestImpl::commit(ReadYourWritesTransaction* ryw) {
ASSERT(false);
return Optional<std::string>();
}
ReadConflictRangeImpl::ReadConflictRangeImpl(KeyRangeRef kr) : SpecialKeyRangeReadImpl(kr) {}
ACTOR static Future<Standalone<RangeResultRef>> getReadConflictRangeImpl(ReadYourWritesTransaction* ryw, KeyRange kr) {
@ -570,86 +593,82 @@ void ManagementCommandsOptionsImpl::clear(ReadYourWritesTransaction* ryw, const
}
}
Key ManagementCommandsOptionsImpl::decode(const KeyRef& key) const {
// Should never be used
ASSERT(false);
return key;
}
Key ManagementCommandsOptionsImpl::encode(const KeyRef& key) const {
// Should never be used
ASSERT(false);
return key;
}
Future<Optional<std::string>> ManagementCommandsOptionsImpl::commit(ReadYourWritesTransaction* ryw) {
// Nothing to do, keys should be used by other impls' commit callback
return Optional<std::string>();
}
// read from rwModule
ACTOR Future<Standalone<RangeResultRef>> rwModuleGetRangeActor(ReadYourWritesTransaction* ryw,
const SpecialKeyRangeRWImpl* impl, KeyRangeRef kr) {
state KeyRangeRef range = impl->getKeyRange();
Standalone<RangeResultRef> resultWithoutPrefix =
wait(ryw->getRange(ryw->getDatabase()->specialKeySpace->decode(kr), CLIENT_KNOBS->TOO_MANY));
ASSERT(!resultWithoutPrefix.more && resultWithoutPrefix.size() < CLIENT_KNOBS->TOO_MANY);
Standalone<RangeResultRef> rywGetRange(ReadYourWritesTransaction* ryw, const KeyRangeRef& kr,
const Standalone<RangeResultRef>& res) {
// "res" is the read result regardless of your writes, if ryw disabled, return immediately
if (ryw->readYourWritesDisabled()) return res;
// If ryw enabled, we update it with writes from the transaction
Standalone<RangeResultRef> result;
if (ryw->readYourWritesDisabled()) {
for (const KeyValueRef& kv : resultWithoutPrefix)
result.push_back_deep(result.arena(), KeyValueRef(impl->encode(kv.key), kv.value));
} else {
RangeMap<Key, std::pair<bool, Optional<Value>>, KeyRangeRef>::Ranges ranges =
ryw->getSpecialKeySpaceWriteMap().containedRanges(range);
RangeMap<Key, std::pair<bool, Optional<Value>>, KeyRangeRef>::iterator iter = ranges.begin();
int index = 0;
while (iter != ranges.end()) {
// add all previous entries into result
Key rk = impl->encode(resultWithoutPrefix[index].key);
while (index < resultWithoutPrefix.size() && rk < iter->begin()) {
result.push_back_deep(result.arena(), KeyValueRef(rk, resultWithoutPrefix[index].value));
++index;
}
RangeMap<Key, std::pair<bool, Optional<Value>>, KeyRangeRef>::Ranges ranges =
ryw->getSpecialKeySpaceWriteMap().containedRanges(kr);
RangeMap<Key, std::pair<bool, Optional<Value>>, KeyRangeRef>::iterator iter = ranges.begin();
auto iter2 = res.begin();
result.arena().dependsOn(res.arena());
while (iter != ranges.end() || iter2 != res.end()) {
if (iter == ranges.end()) {
result.push_back(result.arena(), KeyValueRef(iter2->key, iter2->value));
++iter2;
} else if (iter2 == res.end()) {
// insert if it is a set entry
std::pair<bool, Optional<Value>> entry = iter->value();
if (entry.first && entry.second.present()) {
result.push_back_deep(result.arena(), KeyValueRef(iter->begin(), entry.second.get()));
}
++iter;
} else if (iter->range().contains(iter2->key)) {
std::pair<bool, Optional<Value>> entry = iter->value();
// if this is a valid range either for set or clear, move iter2 outside the range
if (entry.first) {
// add the writen entries if exists
if (entry.second.present()) {
// insert if this is a set entry
if (entry.second.present())
result.push_back_deep(result.arena(), KeyValueRef(iter->begin(), entry.second.get()));
}
// move index to skip all entries in the iter->range
while (index < resultWithoutPrefix.size() &&
iter->range().contains(impl->encode(resultWithoutPrefix[index].key)))
++index;
// move iter2 outside the range
while (iter2 != res.end() && iter->range().contains(iter2->key)) ++iter2;
}
++iter;
} else if (iter->begin() > iter2->key) {
result.push_back(result.arena(), KeyValueRef(iter2->key, iter2->value));
++iter2;
} else if (iter->end() <= iter2->key) {
// insert if it is a set entry
std::pair<bool, Optional<Value>> entry = iter->value();
if (entry.first && entry.second.present()) {
result.push_back_deep(result.arena(), KeyValueRef(iter->begin(), entry.second.get()));
}
++iter;
}
// add all remaining entries into result
while (index < resultWithoutPrefix.size()) {
const KeyValueRef& kv = resultWithoutPrefix[index];
result.push_back_deep(result.arena(), KeyValueRef(impl->encode(kv.key), kv.value));
++index;
}
}
return result;
}
// read from those readwrite modules in which special keys have one-to-one mapping with real persisted keys
ACTOR Future<Standalone<RangeResultRef>> rwModuleWithMappingGetRangeActor(ReadYourWritesTransaction* ryw,
const SpecialKeyRangeRWImpl* impl,
KeyRangeRef kr) {
Standalone<RangeResultRef> resultWithoutPrefix =
wait(ryw->getTransaction().getRange(ryw->getDatabase()->specialKeySpace->decode(kr), CLIENT_KNOBS->TOO_MANY));
ASSERT(!resultWithoutPrefix.more && resultWithoutPrefix.size() < CLIENT_KNOBS->TOO_MANY);
Standalone<RangeResultRef> result;
for (const KeyValueRef& kv : resultWithoutPrefix)
result.push_back_deep(result.arena(), KeyValueRef(impl->encode(kv.key), kv.value));
return rywGetRange(ryw, kr, result);
}
ExcludeServersRangeImpl::ExcludeServersRangeImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
Future<Standalone<RangeResultRef>> ExcludeServersRangeImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr) const {
return rwModuleGetRangeActor(ryw, this, kr);
return rwModuleWithMappingGetRangeActor(ryw, this, kr);
}
void ExcludeServersRangeImpl::set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) {
ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional<Value>(value)));
}
void ExcludeServersRangeImpl::clear(ReadYourWritesTransaction* ryw, const KeyRef& key) {
ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional<Value>()));
}
void ExcludeServersRangeImpl::clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) {
ryw->getSpecialKeySpaceWriteMap().insert(range, std::make_pair(true, Optional<Value>()));
// ignore value
ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional<Value>(ValueRef())));
}
Key ExcludeServersRangeImpl::decode(const KeyRef& key) const {
@ -671,7 +690,7 @@ bool parseNetWorkAddrFromKeys(ReadYourWritesTransaction* ryw, bool failed, std::
while (iter != ranges.end()) {
auto entry = iter->value();
// only check for exclude(set) operation, include(clear) are not checked
TraceEvent(SevInfo, "ParseNetworkAddress")
TraceEvent(SevDebug, "ParseNetworkAddress")
.detail("Valid", entry.first)
.detail("Set", entry.second.present())
.detail("Key", iter->begin().toString());
@ -810,7 +829,6 @@ ACTOR Future<bool> checkExclusion(Database db, std::vector<AddressExclusion>* ad
}
void includeServers(ReadYourWritesTransaction* ryw) {
ryw->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
ryw->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
ryw->setOption(FDBTransactionOptions::LOCK_AWARE);
ryw->setOption(FDBTransactionOptions::USE_PROVISIONAL_PROXIES);
@ -874,19 +892,12 @@ FailedServersRangeImpl::FailedServersRangeImpl(KeyRangeRef kr) : SpecialKeyRange
Future<Standalone<RangeResultRef>> FailedServersRangeImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr) const {
return rwModuleGetRangeActor(ryw, this, kr);
return rwModuleWithMappingGetRangeActor(ryw, this, kr);
}
void FailedServersRangeImpl::set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) {
ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional<Value>(value)));
}
void FailedServersRangeImpl::clear(ReadYourWritesTransaction* ryw, const KeyRef& key) {
ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional<Value>()));
}
void FailedServersRangeImpl::clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) {
ryw->getSpecialKeySpaceWriteMap().insert(range, std::make_pair(true, Optional<Value>()));
// ignore value
ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional<Value>(ValueRef())));
}
Key FailedServersRangeImpl::decode(const KeyRef& key) const {
@ -943,8 +954,14 @@ ACTOR Future<Standalone<RangeResultRef>> ExclusionInProgressActor(ReadYourWrites
}
}
// sort and remove :tls
std::set<std::string> inProgressAddresses;
for (auto const& address : inProgressExclusion) {
Key addrKey = prefix.withSuffix(address.toString());
inProgressAddresses.insert(formatIpPort(address.ip, address.port));
}
for (auto const& address : inProgressAddresses) {
Key addrKey = prefix.withSuffix(address);
if (kr.contains(addrKey)) {
result.push_back(result.arena(), KeyValueRef(addrKey, ValueRef()));
result.arena().dependsOn(addrKey.arena());
@ -959,3 +976,148 @@ Future<Standalone<RangeResultRef>> ExclusionInProgressRangeImpl::getRange(ReadYo
KeyRangeRef kr) const {
return ExclusionInProgressActor(ryw, getKeyRange().begin, kr);
}
ACTOR Future<Standalone<RangeResultRef>> getProcessClassActor(ReadYourWritesTransaction* ryw, KeyRef prefix,
KeyRangeRef kr) {
vector<ProcessData> _workers = wait(getWorkers(&ryw->getTransaction()));
auto workers = _workers; // strip const
// Note : the sort by string is anti intuition, ex. 1.1.1.1:11 < 1.1.1.1:5
std::sort(workers.begin(), workers.end(), [](const ProcessData& lhs, const ProcessData& rhs) {
return formatIpPort(lhs.address.ip, lhs.address.port) < formatIpPort(rhs.address.ip, rhs.address.port);
});
Standalone<RangeResultRef> result;
for (auto& w : workers) {
// exclude :tls in keys even the network addresss is TLS
KeyRef k(prefix.withSuffix(formatIpPort(w.address.ip, w.address.port), result.arena()));
if (kr.contains(k)) {
ValueRef v(result.arena(), w.processClass.toString());
result.push_back(result.arena(), KeyValueRef(k, v));
}
}
return rywGetRange(ryw, kr, result);
}
ACTOR Future<Optional<std::string>> processClassCommitActor(ReadYourWritesTransaction* ryw, KeyRangeRef range) {
// enable related options
ryw->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
ryw->setOption(FDBTransactionOptions::LOCK_AWARE);
ryw->setOption(FDBTransactionOptions::USE_PROVISIONAL_PROXIES);
vector<ProcessData> workers = wait(
getWorkers(&ryw->getTransaction())); // make sure we use the Transaction object to avoid used_during_commit()
auto ranges = ryw->getSpecialKeySpaceWriteMap().containedRanges(range);
auto iter = ranges.begin();
while (iter != ranges.end()) {
auto entry = iter->value();
// only loop through (set) operation, (clear) not exist
if (entry.first && entry.second.present()) {
// parse network address
Key address = iter->begin().removePrefix(range.begin);
AddressExclusion addr = AddressExclusion::parse(address);
// parse class type
ValueRef processClassType = entry.second.get();
ProcessClass processClass(processClassType.toString(), ProcessClass::DBSource);
// make sure we use the underlying Transaction object to avoid used_during_commit()
bool foundChange = false;
for (int i = 0; i < workers.size(); i++) {
if (addr.excludes(workers[i].address)) {
if (processClass.classType() != ProcessClass::InvalidClass)
ryw->getTransaction().set(processClassKeyFor(workers[i].locality.processId().get()),
processClassValue(processClass));
else
ryw->getTransaction().clear(processClassKeyFor(workers[i].locality.processId().get()));
foundChange = true;
}
}
if (foundChange)
ryw->getTransaction().set(processClassChangeKey, deterministicRandom()->randomUniqueID().toString());
}
++iter;
}
return Optional<std::string>();
}
ProcessClassRangeImpl::ProcessClassRangeImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
Future<Standalone<RangeResultRef>> ProcessClassRangeImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr) const {
return getProcessClassActor(ryw, getKeyRange().begin, kr);
}
Future<Optional<std::string>> ProcessClassRangeImpl::commit(ReadYourWritesTransaction* ryw) {
// Validate network address and process class type
Optional<std::string> errorMsg;
auto ranges = ryw->getSpecialKeySpaceWriteMap().containedRanges(getKeyRange());
auto iter = ranges.begin();
while (iter != ranges.end()) {
auto entry = iter->value();
// only check for setclass(set) operation, (clear) are forbidden thus not exist
if (entry.first && entry.second.present()) {
// validate network address
Key address = iter->begin().removePrefix(range.begin);
AddressExclusion addr = AddressExclusion::parse(address);
if (!addr.isValid()) {
std::string error = "ERROR: \'" + address.toString() + "\' is not a valid network endpoint address\n";
if (address.toString().find(":tls") != std::string::npos)
error += " Do not include the `:tls' suffix when naming a process\n";
errorMsg = ManagementAPIError::toJsonString(false, "setclass", error);
return errorMsg;
}
// validate class type
ValueRef processClassType = entry.second.get();
ProcessClass processClass(processClassType.toString(), ProcessClass::DBSource);
if (processClass.classType() == ProcessClass::InvalidClass &&
processClassType != LiteralStringRef("default")) {
std::string error = "ERROR: \'" + processClassType.toString() + "\' is not a valid process class\n";
errorMsg = ManagementAPIError::toJsonString(false, "setclass", error);
return errorMsg;
}
}
++iter;
}
return processClassCommitActor(ryw, getKeyRange());
}
void throwNotAllowedError(ReadYourWritesTransaction* ryw) {
auto msg = ManagementAPIError::toJsonString(false, "setclass",
"Clear operation is meaningless thus forbidden for setclass");
ryw->setSpecialKeySpaceErrorMsg(msg);
throw special_keys_api_failure();
}
void ProcessClassRangeImpl::clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) {
return throwNotAllowedError(ryw);
}
void ProcessClassRangeImpl::clear(ReadYourWritesTransaction* ryw, const KeyRef& key) {
return throwNotAllowedError(ryw);
}
ACTOR Future<Standalone<RangeResultRef>> getProcessClassSourceActor(ReadYourWritesTransaction* ryw, KeyRef prefix,
KeyRangeRef kr) {
vector<ProcessData> _workers = wait(getWorkers(&ryw->getTransaction()));
auto workers = _workers; // strip const
// Note : the sort by string is anti intuition, ex. 1.1.1.1:11 < 1.1.1.1:5
std::sort(workers.begin(), workers.end(), [](const ProcessData& lhs, const ProcessData& rhs) {
return formatIpPort(lhs.address.ip, lhs.address.port) < formatIpPort(rhs.address.ip, rhs.address.port);
});
Standalone<RangeResultRef> result;
for (auto& w : workers) {
// exclude :tls in keys even the network addresss is TLS
Key k(prefix.withSuffix(formatIpPort(w.address.ip, w.address.port)));
if (kr.contains(k)) {
Value v(w.processClass.sourceString());
result.push_back(result.arena(), KeyValueRef(k, v));
result.arena().dependsOn(k.arena());
result.arena().dependsOn(v.arena());
}
}
return result;
}
ProcessClassSourceRangeImpl::ProcessClassSourceRangeImpl(KeyRangeRef kr) : SpecialKeyRangeReadImpl(kr) {}
Future<Standalone<RangeResultRef>> ProcessClassSourceRangeImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr) const {
return getProcessClassSourceActor(ryw, getKeyRange().begin, kr);
}

View File

@ -67,15 +67,29 @@ private:
class SpecialKeyRangeRWImpl : public SpecialKeyRangeReadImpl {
public:
virtual void set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) = 0;
virtual void clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) = 0;
virtual void clear(ReadYourWritesTransaction* ryw, const KeyRef& key) = 0;
virtual void set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) {
ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional<Value>(value)));
}
virtual void clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) {
ryw->getSpecialKeySpaceWriteMap().insert(range, std::make_pair(true, Optional<Value>()));
}
virtual void clear(ReadYourWritesTransaction* ryw, const KeyRef& key) {
ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional<Value>()));
}
virtual Future<Optional<std::string>> commit(
ReadYourWritesTransaction* ryw) = 0; // all delayed async operations of writes in special-key-space
// Given the special key to write, return the real key that needs to be modified
virtual Key decode(const KeyRef& key) const = 0;
virtual Key decode(const KeyRef& key) const {
// Default implementation should never be used
ASSERT(false);
return key;
}
// Given the read key, return the corresponding special key
virtual Key encode(const KeyRef& key) const = 0;
virtual Key encode(const KeyRef& key) const {
// Default implementation should never be used
ASSERT(false);
return key;
};
explicit SpecialKeyRangeRWImpl(KeyRangeRef kr) : SpecialKeyRangeReadImpl(kr) {}
@ -125,6 +139,7 @@ class SpecialKeySpace {
public:
enum class MODULE {
CLUSTERFILEPATH,
CONFIGURATION, // Configuration of the cluster
CONNECTIONSTRING,
ERRORMSG, // A single key space contains a json string which describes the last error in special-key-space
MANAGEMENT, // Management-API
@ -201,6 +216,14 @@ private:
void modulesBoundaryInit();
};
// Used for SpecialKeySpaceCorrectnessWorkload
class SKSCTestImpl : public SpecialKeyRangeRWImpl {
public:
explicit SKSCTestImpl(KeyRangeRef kr);
Future<Standalone<RangeResultRef>> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override;
Future<Optional<std::string>> commit(ReadYourWritesTransaction* ryw) override;
};
// Use special key prefix "\xff\xff/transaction/conflicting_keys/<some_key>",
// to retrieve keys which caused latest not_committed(conflicting with another transaction) error.
// The returned key value pairs are interpretted as :
@ -238,8 +261,6 @@ public:
void set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) override;
void clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) override;
void clear(ReadYourWritesTransaction* ryw, const KeyRef& key) override;
Key decode(const KeyRef& key) const override;
Key encode(const KeyRef& key) const override;
Future<Optional<std::string>> commit(ReadYourWritesTransaction* ryw) override;
};
@ -248,8 +269,6 @@ public:
explicit ExcludeServersRangeImpl(KeyRangeRef kr);
Future<Standalone<RangeResultRef>> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override;
void set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) override;
void clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) override;
void clear(ReadYourWritesTransaction* ryw, const KeyRef& key) override;
Key decode(const KeyRef& key) const override;
Key encode(const KeyRef& key) const override;
Future<Optional<std::string>> commit(ReadYourWritesTransaction* ryw) override;
@ -260,8 +279,6 @@ public:
explicit FailedServersRangeImpl(KeyRangeRef kr);
Future<Standalone<RangeResultRef>> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override;
void set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) override;
void clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) override;
void clear(ReadYourWritesTransaction* ryw, const KeyRef& key) override;
Key decode(const KeyRef& key) const override;
Key encode(const KeyRef& key) const override;
Future<Optional<std::string>> commit(ReadYourWritesTransaction* ryw) override;
@ -273,5 +290,20 @@ public:
Future<Standalone<RangeResultRef>> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override;
};
class ProcessClassRangeImpl : public SpecialKeyRangeRWImpl {
public:
explicit ProcessClassRangeImpl(KeyRangeRef kr);
Future<Standalone<RangeResultRef>> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override;
Future<Optional<std::string>> commit(ReadYourWritesTransaction* ryw) override;
void clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) override;
void clear(ReadYourWritesTransaction* ryw, const KeyRef& key) override;
};
class ProcessClassSourceRangeImpl : public SpecialKeyRangeReadImpl {
public:
explicit ProcessClassSourceRangeImpl(KeyRangeRef kr);
Future<Standalone<RangeResultRef>> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override;
};
#include "flow/unactorcompiler.h"
#endif

View File

@ -260,10 +260,10 @@ extern const KeyRangeRef logRangesRange;
Key logRangesEncodeKey(KeyRef keyBegin, UID logUid);
// Returns the start key and optionally the logRange Uid
KeyRef logRangesDecodeKey(KeyRef key, UID* logUid = NULL);
KeyRef logRangesDecodeKey(KeyRef key, UID* logUid = nullptr);
// Returns the end key and optionally the key prefix
Key logRangesDecodeValue(KeyRef keyValue, Key* destKeyPrefix = NULL);
Key logRangesDecodeValue(KeyRef keyValue, Key* destKeyPrefix = nullptr);
// Returns the encoded key value comprised of the end key and destination prefix
Key logRangesEncodeValue(KeyRef keyEnd, KeyRef destPath);

View File

@ -19,7 +19,7 @@
*/
#include "fdbclient/TagThrottle.h"
#include "fdbclient/MasterProxyInterface.h"
#include "fdbclient/CommitProxyInterface.h"
#include "fdbclient/DatabaseContext.h"
#include "flow/actorcompiler.h" // has to be last include
@ -104,7 +104,7 @@ TagThrottleKey TagThrottleKey::fromKey(const KeyRef& key) {
TagThrottleValue TagThrottleValue::fromValue(const ValueRef& value) {
TagThrottleValue throttleValue;
BinaryReader reader(value, IncludeVersion());
BinaryReader reader(value, IncludeVersion(ProtocolVersion::withTagThrottleValueReason()));
reader >> throttleValue;
return throttleValue;
}
@ -228,7 +228,7 @@ namespace ThrottleApi {
}
TagThrottleValue throttle(tpsRate, expirationTime.present() ? expirationTime.get() : 0, initialDuration,
reason.present() ? reason.get() : TagThrottledReason::UNSET);
BinaryWriter wr(IncludeVersion(ProtocolVersion::withTagThrottleValue()));
BinaryWriter wr(IncludeVersion(ProtocolVersion::withTagThrottleValueReason()));
wr << throttle;
state Value value = wr.toValue();
@ -347,6 +347,7 @@ namespace ThrottleApi {
removed = true;
tr.clear(tag.key);
unthrottledTags ++;
}
if(manualUnthrottledTags > 0) {

View File

@ -234,17 +234,17 @@ public:
ACTOR static Future<bool> taskVerify(Reference<TaskBucket> tb, Reference<ReadYourWritesTransaction> tr, Reference<Task> task) {
if (task->params.find(Task::reservedTaskParamValidKey) == task->params.end()) {
TraceEvent("TB_TaskVerifyInvalidTask")
.detail("Task", task->params[Task::reservedTaskParamKeyType])
.detail("ReservedTaskParamValidKey", "missing");
TraceEvent("TaskBucketTaskVerifyInvalidTask")
.detail("Task", task->params[Task::reservedTaskParamKeyType])
.detail("ReservedTaskParamValidKey", "missing");
return false;
}
if (task->params.find(Task::reservedTaskParamValidValue) == task->params.end()) {
TraceEvent("TB_TaskVerifyInvalidTask")
.detail("Task", task->params[Task::reservedTaskParamKeyType])
.detail("ReservedTaskParamValidKey", task->params[Task::reservedTaskParamValidKey])
.detail("ReservedTaskParamValidValue", "missing");
TraceEvent("TaskBucketTaskVerifyInvalidTask")
.detail("Task", task->params[Task::reservedTaskParamKeyType])
.detail("ReservedTaskParamValidKey", task->params[Task::reservedTaskParamValidKey])
.detail("ReservedTaskParamValidValue", "missing");
return false;
}
@ -253,20 +253,20 @@ public:
Optional<Value> keyValue = wait(tr->get(task->params[Task::reservedTaskParamValidKey]));
if (!keyValue.present()) {
TraceEvent("TB_TaskVerifyInvalidTask")
.detail("Task", task->params[Task::reservedTaskParamKeyType])
.detail("ReservedTaskParamValidKey", task->params[Task::reservedTaskParamValidKey])
.detail("ReservedTaskParamValidValue", task->params[Task::reservedTaskParamValidValue])
.detail("KeyValue", "missing");
TraceEvent("TaskBucketTaskVerifyInvalidTask")
.detail("Task", task->params[Task::reservedTaskParamKeyType])
.detail("ReservedTaskParamValidKey", task->params[Task::reservedTaskParamValidKey])
.detail("ReservedTaskParamValidValue", task->params[Task::reservedTaskParamValidValue])
.detail("KeyValue", "missing");
return false;
}
if (keyValue.get().compare(StringRef(task->params[Task::reservedTaskParamValidValue]))) {
TraceEvent("TB_TaskVerifyAbortedTask")
.detail("Task", task->params[Task::reservedTaskParamKeyType])
.detail("ReservedTaskParamValidKey", task->params[Task::reservedTaskParamValidKey])
.detail("ReservedTaskParamValidValue", task->params[Task::reservedTaskParamValidValue])
.detail("KeyValue", keyValue.get());
TraceEvent("TaskBucketTaskVerifyAbortedTask")
.detail("Task", task->params[Task::reservedTaskParamKeyType])
.detail("ReservedTaskParamValidKey", task->params[Task::reservedTaskParamValidKey])
.detail("ReservedTaskParamValidValue", task->params[Task::reservedTaskParamValidValue])
.detail("KeyValue", keyValue.get());
return false;
}
@ -332,10 +332,10 @@ public:
if(now() - start > 300) {
TraceEvent(SevWarnAlways, "TaskBucketLongExtend")
.detail("Duration", now() - start)
.detail("TaskUID", task->key)
.detail("TaskType", task->params[Task::reservedTaskParamKeyType])
.detail("Priority", task->getPriority());
.detail("Duration", now() - start)
.detail("TaskUID", task->key)
.detail("TaskType", task->params[Task::reservedTaskParamKeyType])
.detail("Priority", task->getPriority());
}
// Take the extendMutex lock until we either succeed or stop trying to extend due to failure
wait(task->extendMutex.take());
@ -402,19 +402,19 @@ public:
}));
}
} catch(Error &e) {
TraceEvent(SevWarn, "TB_ExecuteFailure")
.error(e)
.detail("TaskUID", task->key)
.detail("TaskType", task->params[Task::reservedTaskParamKeyType].printable())
.detail("Priority", task->getPriority());
TraceEvent(SevWarn, "TaskBucketExecuteFailure")
.error(e)
.detail("TaskUID", task->key)
.detail("TaskType", task->params[Task::reservedTaskParamKeyType].printable())
.detail("Priority", task->getPriority());
try {
wait(taskFunc->handleError(cx, task, e));
} catch(Error &e) {
TraceEvent(SevWarn, "TB_ExecuteFailureLogErrorFailed")
.error(e) // output handleError() error instead of original task error
.detail("TaskUID", task->key.printable())
.detail("TaskType", task->params[Task::reservedTaskParamKeyType].printable())
.detail("Priority", task->getPriority());
TraceEvent(SevWarn, "TaskBucketExecuteFailureLogErrorFailed")
.error(e) // output handleError() error instead of original task error
.detail("TaskUID", task->key.printable())
.detail("TaskType", task->params[Task::reservedTaskParamKeyType].printable())
.detail("Priority", task->getPriority());
}
}
@ -727,14 +727,17 @@ public:
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
Standalone<RangeResultRef> values = wait(tr->getRange(subspace.range(), CLIENT_KNOBS->TOO_MANY));
TraceEvent("TaskBucket").detail("DebugPrintRange", "Print DB Range").detail("Key", subspace.key()).detail("Count", values.size()).detail("Msg", msg);
TraceEvent("TaskBucketDebugPrintRange")
.detail("Key", subspace.key())
.detail("Count", values.size())
.detail("Msg", msg);
/*printf("debugPrintRange key: (%d) %s\n", values.size(), printable(subspace.key()).c_str());
for (auto & s : values) {
printf(" key: %-40s value: %s\n", printable(s.key).c_str(), s.value.c_str());
TraceEvent("TaskBucket").detail("DebugPrintRange", msg)
.detail("Key", s.key)
.detail("Value", s.value);
printf(" key: %-40s value: %s\n", printable(s.key).c_str(), s.value.c_str());
TraceEvent("TaskBucketDebugPrintKV").detail("Msg", msg)
.detail("Key", s.key)
.detail("Value", s.value);
}*/
return Void();
@ -870,9 +873,9 @@ ACTOR static Future<Key> actorAddTask(TaskBucket* tb, Reference<ReadYourWritesTr
Optional<Value> validationValue = wait(tr->get(validationKey));
if (!validationValue.present()) {
TraceEvent(SevError, "TB_AddTaskInvalidKey")
.detail("Task", task->params[Task::reservedTaskParamKeyType])
.detail("ValidationKey", validationKey);
TraceEvent(SevError, "TaskBucketAddTaskInvalidKey")
.detail("Task", task->params[Task::reservedTaskParamKeyType])
.detail("ValidationKey", validationKey);
throw invalid_option_value();
}
@ -1138,9 +1141,9 @@ public:
Optional<Value> validationValue = wait(tr->get(validationKey));
if (!validationValue.present()) {
TraceEvent(SevError, "TB_OnSetAddTaskInvalidKey")
.detail("Task", task->params[Task::reservedTaskParamKeyType])
.detail("ValidationKey", validationKey);
TraceEvent(SevError, "TaskBucketOnSetAddTaskInvalidKey")
.detail("Task", task->params[Task::reservedTaskParamKeyType])
.detail("ValidationKey", validationKey);
throw invalid_option_value();
}
@ -1239,6 +1242,6 @@ ACTOR Future<Key> getCompletionKey(TaskCompletionKey *self, Future<Reference<Tas
}
Future<Key> TaskCompletionKey::get(Reference<ReadYourWritesTransaction> tr, Reference<TaskBucket> taskBucket) {
ASSERT(key.present() == (joinFuture.getPtr() == NULL));
ASSERT(key.present() == (joinFuture.getPtr() == nullptr));
return key.present() ? key.get() : getCompletionKey(this, joinFuture->joinedFuture(tr, taskBucket));
}

View File

@ -84,12 +84,12 @@ ThreadSafeDatabase::ThreadSafeDatabase(std::string connFilename, int apiVersion)
catch(...) {
new (db) DatabaseContext(unknown_error());
}
}, NULL);
}, nullptr);
}
ThreadSafeDatabase::~ThreadSafeDatabase() {
DatabaseContext *db = this->db;
onMainThreadVoid( [db](){ db->delref(); }, NULL );
onMainThreadVoid( [db](){ db->delref(); }, nullptr );
}
ThreadSafeTransaction::ThreadSafeTransaction(DatabaseContext* cx) {
@ -107,18 +107,18 @@ ThreadSafeTransaction::ThreadSafeTransaction(DatabaseContext* cx) {
cx->addref();
new (tr) ReadYourWritesTransaction(Database(cx));
},
NULL);
nullptr);
}
ThreadSafeTransaction::~ThreadSafeTransaction() {
ReadYourWritesTransaction *tr = this->tr;
if (tr)
onMainThreadVoid( [tr](){ tr->delref(); }, NULL );
onMainThreadVoid( [tr](){ tr->delref(); }, nullptr );
}
void ThreadSafeTransaction::cancel() {
ReadYourWritesTransaction *tr = this->tr;
onMainThreadVoid( [tr](){ tr->cancel(); }, NULL );
onMainThreadVoid( [tr](){ tr->cancel(); }, nullptr );
}
void ThreadSafeTransaction::setVersion( Version v ) {
@ -328,17 +328,17 @@ ThreadFuture<Void> ThreadSafeTransaction::onError( Error const& e ) {
void ThreadSafeTransaction::operator=(ThreadSafeTransaction&& r) noexcept {
tr = r.tr;
r.tr = NULL;
r.tr = nullptr;
}
ThreadSafeTransaction::ThreadSafeTransaction(ThreadSafeTransaction&& r) noexcept {
tr = r.tr;
r.tr = NULL;
r.tr = nullptr;
}
void ThreadSafeTransaction::reset() {
ReadYourWritesTransaction *tr = this->tr;
onMainThreadVoid( [tr](){ tr->reset(); }, NULL );
onMainThreadVoid( [tr](){ tr->reset(); }, nullptr );
}
extern const char* getSourceVersion();

View File

@ -96,7 +96,7 @@ public:
ThreadFuture<Void> onError( Error const& e ) override;
// These are to permit use as state variables in actors:
ThreadSafeTransaction() : tr(NULL) {}
ThreadSafeTransaction() : tr(nullptr) {}
void operator=(ThreadSafeTransaction&& r) noexcept;
ThreadSafeTransaction(ThreadSafeTransaction&& r) noexcept;

View File

@ -802,7 +802,7 @@ public:
void validate() {
int count=0, height=0;
PTreeImpl::validate<MapPair<K,std::pair<T,Version>>>( root, at, NULL, NULL, count, height );
PTreeImpl::validate<MapPair<K,std::pair<T,Version>>>( root, at, nullptr, nullptr, count, height );
if ( height > 100 )
TraceEvent(SevWarnAlways, "DiabolicalPTreeSize").detail("Size", count).detail("Height", height);
}

View File

@ -195,7 +195,7 @@ description is not currently required but encouraged.
<Option name="next_write_no_write_conflict_range" code="30"
description="The next write performed on this transaction will not generate a write conflict range. As a result, other transactions which read the key(s) being modified by the next write will not conflict with this transaction. Care needs to be taken when using this option on a transaction that is shared between multiple threads. When setting this option, write conflict ranges will be disabled on the next write operation, regardless of what thread it is on." />
<Option name="commit_on_first_proxy" code="40"
description="Committing this transaction will bypass the normal load balancing across proxies and go directly to the specifically nominated 'first proxy'."
description="Committing this transaction will bypass the normal load balancing across commit proxies and go directly to the specifically nominated 'first commit proxy'."
hidden="true" />
<Option name="check_writes_enable" code="50"
hidden="true" />

View File

@ -96,7 +96,7 @@ void monitor_fd( fdb_fd_set list, int fd, int* maxfd, void* cmd ) {
/* ignore maxfd */
struct kevent ev;
EV_SET( &ev, fd, EVFILT_READ, EV_ADD, 0, 0, cmd );
kevent( list, &ev, 1, NULL, 0, NULL ); // FIXME: check?
kevent( list, &ev, 1, nullptr, 0, nullptr ); // FIXME: check?
#endif
}
@ -105,15 +105,15 @@ void unmonitor_fd( fdb_fd_set list, int fd ) {
FD_CLR( fd, list );
#elif defined(__APPLE__) || defined(__FreeBSD__)
struct kevent ev;
EV_SET( &ev, fd, EVFILT_READ, EV_DELETE, 0, 0, NULL );
kevent( list, &ev, 1, NULL, 0, NULL ); // FIXME: check?
EV_SET( &ev, fd, EVFILT_READ, EV_DELETE, 0, 0, nullptr );
kevent( list, &ev, 1, nullptr, 0, nullptr ); // FIXME: check?
#endif
}
double get_cur_timestamp() {
struct tm tm_info;
struct timeval tv;
gettimeofday(&tv, NULL);
gettimeofday(&tv, nullptr);
localtime_r(&tv.tv_sec, &tm_info);
return tv.tv_sec + 1e-6*tv.tv_usec;
@ -182,14 +182,14 @@ void log_err(const char* func, int err, const char* format, ...) {
}
const char* get_value_multi(const CSimpleIni& ini, const char* key, ...) {
const char* ret = NULL;
const char* section = NULL;
const char* ret = nullptr;
const char* section = nullptr;
va_list ap;
va_start(ap, key);
while (!ret && (section = va_arg(ap, const char *)))
ret = ini.GetValue(section, key, NULL);
ret = ini.GetValue(section, key, nullptr);
va_end(ap);
@ -378,8 +378,8 @@ public:
// one pair for each of stdout and stderr
int pipes[2][2];
Command() : argv(NULL) { }
Command(const CSimpleIni& ini, std::string _section, uint64_t id, fdb_fd_set fds, int* maxfd) : section(_section), argv(NULL), fork_retry_time(-1), quiet(false), delete_envvars(NULL), fds(fds), deconfigured(false), kill_on_configuration_change(true) {
Command() : argv(nullptr) { }
Command(const CSimpleIni& ini, std::string _section, uint64_t id, fdb_fd_set fds, int* maxfd) : section(_section), argv(nullptr), fork_retry_time(-1), quiet(false), delete_envvars(nullptr), fds(fds), deconfigured(false), kill_on_configuration_change(true) {
char _ssection[strlen(section.c_str()) + 22];
snprintf(_ssection, strlen(section.c_str()) + 22, "%s.%" PRIu64, section.c_str(), id);
ssection = _ssection;
@ -410,7 +410,7 @@ public:
last_start = 0;
char* endptr;
const char* rd = get_value_multi(ini, "restart_delay", ssection.c_str(), section.c_str(), "general", "fdbmonitor", NULL);
const char* rd = get_value_multi(ini, "restart_delay", ssection.c_str(), section.c_str(), "general", "fdbmonitor", nullptr);
if (!rd) {
log_msg(SevError, "Unable to resolve restart delay for %s\n", ssection.c_str());
return;
@ -423,7 +423,7 @@ public:
}
}
const char* mrd = get_value_multi(ini, "initial_restart_delay", ssection.c_str(), section.c_str(), "general", "fdbmonitor", NULL);
const char* mrd = get_value_multi(ini, "initial_restart_delay", ssection.c_str(), section.c_str(), "general", "fdbmonitor", nullptr);
if (!mrd) {
initial_restart_delay = 0;
}
@ -437,7 +437,7 @@ public:
current_restart_delay = initial_restart_delay;
const char* rbo = get_value_multi(ini, "restart_backoff", ssection.c_str(), section.c_str(), "general", "fdbmonitor", NULL);
const char* rbo = get_value_multi(ini, "restart_backoff", ssection.c_str(), section.c_str(), "general", "fdbmonitor", nullptr);
if(!rbo) {
restart_backoff = max_restart_delay;
}
@ -453,7 +453,7 @@ public:
}
}
const char* rdri = get_value_multi(ini, "restart_delay_reset_interval", ssection.c_str(), section.c_str(), "general", "fdbmonitor", NULL);
const char* rdri = get_value_multi(ini, "restart_delay_reset_interval", ssection.c_str(), section.c_str(), "general", "fdbmonitor", nullptr);
if (!rdri) {
restart_delay_reset_interval = max_restart_delay;
}
@ -465,19 +465,19 @@ public:
}
}
const char* q = get_value_multi(ini, "disable_lifecycle_logging", ssection.c_str(), section.c_str(), "general", NULL);
const char* q = get_value_multi(ini, "disable_lifecycle_logging", ssection.c_str(), section.c_str(), "general", nullptr);
if (q && !strcmp(q, "true"))
quiet = true;
const char* del_env = get_value_multi(ini, "delete_envvars", ssection.c_str(), section.c_str(), "general", NULL);
const char* del_env = get_value_multi(ini, "delete_envvars", ssection.c_str(), section.c_str(), "general", nullptr);
delete_envvars = del_env;
const char* kocc = get_value_multi(ini, "kill_on_configuration_change", ssection.c_str(), section.c_str(), "general", NULL);
const char* kocc = get_value_multi(ini, "kill_on_configuration_change", ssection.c_str(), section.c_str(), "general", nullptr);
if(kocc && strcmp(kocc, "true")) {
kill_on_configuration_change = false;
}
const char* binary = get_value_multi(ini, "command", ssection.c_str(), section.c_str(), "general", NULL);
const char* binary = get_value_multi(ini, "command", ssection.c_str(), section.c_str(), "general", nullptr);
if (!binary) {
log_msg(SevError, "Unable to resolve command for %s\n", ssection.c_str());
return;
@ -495,7 +495,7 @@ public:
continue;
}
std::string opt = get_value_multi(ini, i.pItem, ssection.c_str(), section.c_str(), "general", NULL);
std::string opt = get_value_multi(ini, i.pItem, ssection.c_str(), section.c_str(), "general", nullptr);
std::size_t pos = 0;
@ -520,7 +520,7 @@ public:
for (auto itr : commands) {
argv[i++] = strdup(itr.c_str());
}
argv[i] = NULL;
argv[i] = nullptr;
}
~Command() {
delete[] argv;
@ -609,7 +609,7 @@ void start_process(Command* cmd, uint64_t id, uid_t uid, gid_t gid, int delay, s
dup2( cmd->pipes[0][1], fileno(stdout) );
dup2( cmd->pipes[1][1], fileno(stderr) );
if(cmd->delete_envvars != NULL && std::strlen(cmd->delete_envvars) > 0) {
if(cmd->delete_envvars != nullptr && std::strlen(cmd->delete_envvars) > 0) {
std::string vars(cmd->delete_envvars);
size_t start = 0;
do {
@ -630,7 +630,7 @@ void start_process(Command* cmd, uint64_t id, uid_t uid, gid_t gid, int delay, s
#ifdef __linux__
signal(SIGCHLD, SIG_DFL);
sigprocmask(SIG_SETMASK, mask, NULL);
sigprocmask(SIG_SETMASK, mask, nullptr);
/* death of our parent raises SIGHUP */
prctl(PR_SET_PDEATHSIG, SIGHUP);
@ -722,7 +722,7 @@ bool argv_equal(const char** a1, const char** a2)
i++;
}
if (a1[i] != NULL || a2[i] != NULL)
if (a1[i] != nullptr || a2[i] != nullptr)
return false;
return true;
}
@ -734,7 +734,7 @@ void kill_process(uint64_t id, bool wait = true) {
kill(pid, SIGTERM);
if(wait) {
waitpid(pid, NULL, 0);
waitpid(pid, nullptr, 0);
}
pid_id.erase(pid);
@ -758,8 +758,8 @@ void load_conf(const char* confpath, uid_t &uid, gid_t &gid, sigset_t* mask, fdb
uid_t _uid;
gid_t _gid;
const char* user = ini.GetValue("fdbmonitor", "user", NULL);
const char* group = ini.GetValue("fdbmonitor", "group", NULL);
const char* user = ini.GetValue("fdbmonitor", "user", nullptr);
const char* group = ini.GetValue("fdbmonitor", "group", nullptr);
if (user) {
errno = 0;
@ -924,8 +924,8 @@ void watch_conf_dir( int kq, int* confd_fd, std::string confdir ) {
while(true) {
/* If already watching, drop it and close */
if ( *confd_fd >= 0 ) {
EV_SET( &ev, *confd_fd, EVFILT_VNODE, EV_DELETE, NOTE_WRITE, 0, NULL );
kevent( kq, &ev, 1, NULL, 0, NULL );
EV_SET( &ev, *confd_fd, EVFILT_VNODE, EV_DELETE, NOTE_WRITE, 0, nullptr );
kevent( kq, &ev, 1, nullptr, 0, nullptr );
close( *confd_fd );
}
@ -939,8 +939,8 @@ void watch_conf_dir( int kq, int* confd_fd, std::string confdir ) {
}
if ( *confd_fd >= 0 ) {
EV_SET( &ev, *confd_fd, EVFILT_VNODE, EV_ADD | EV_CLEAR, NOTE_WRITE, 0, NULL );
kevent( kq, &ev, 1, NULL, 0, NULL );
EV_SET( &ev, *confd_fd, EVFILT_VNODE, EV_ADD | EV_CLEAR, NOTE_WRITE, 0, nullptr );
kevent( kq, &ev, 1, nullptr, 0, nullptr );
/* If our child appeared since we last tested it, start over from the beginning */
if ( confdir != child && (access(child.c_str(), F_OK) == 0 || errno != ENOENT) ) {
@ -964,16 +964,16 @@ void watch_conf_file( int kq, int* conff_fd, const char* confpath ) {
/* If already watching, drop it and close */
if ( *conff_fd >= 0 ) {
EV_SET( &ev, *conff_fd, EVFILT_VNODE, EV_DELETE, NOTE_WRITE | NOTE_ATTRIB, 0, NULL );
kevent( kq, &ev, 1, NULL, 0, NULL );
EV_SET( &ev, *conff_fd, EVFILT_VNODE, EV_DELETE, NOTE_WRITE | NOTE_ATTRIB, 0, nullptr );
kevent( kq, &ev, 1, nullptr, 0, nullptr );
close( *conff_fd );
}
/* Open and watch */
*conff_fd = open( confpath, O_EVTONLY );
if ( *conff_fd >= 0 ) {
EV_SET( &ev, *conff_fd, EVFILT_VNODE, EV_ADD | EV_CLEAR, NOTE_WRITE | NOTE_ATTRIB, 0, NULL );
kevent( kq, &ev, 1, NULL, 0, NULL );
EV_SET( &ev, *conff_fd, EVFILT_VNODE, EV_ADD | EV_CLEAR, NOTE_WRITE | NOTE_ATTRIB, 0, nullptr );
kevent( kq, &ev, 1, nullptr, 0, nullptr );
}
}
#endif
@ -1194,7 +1194,7 @@ int main(int argc, char** argv) {
lockfile = args.OptionArg();
break;
case OPT_LOGGROUP:
if(strchr(args.OptionArg(), '"') != NULL) {
if(strchr(args.OptionArg(), '"') != nullptr) {
log_msg(SevError, "Invalid log group '%s', cannot contain '\"'\n", args.OptionArg());
exit(1);
}
@ -1226,9 +1226,9 @@ int main(int argc, char** argv) {
_confpath = joinPath(buf, _confpath);
}
// Guaranteed (if non-NULL) to be an absolute path with no
// Guaranteed (if non-nullptr) to be an absolute path with no
// symbolic link, /./ or /../ components
const char *p = realpath(_confpath.c_str(), NULL);
const char *p = realpath(_confpath.c_str(), nullptr);
if (!p) {
log_msg(SevError, "No configuration file at %s\n", _confpath.c_str());
exit(1);
@ -1351,14 +1351,14 @@ int main(int argc, char** argv) {
struct kevent ev;
EV_SET( &ev, SIGHUP, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
kevent( kq, &ev, 1, NULL, 0, NULL );
EV_SET( &ev, SIGINT, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
kevent( kq, &ev, 1, NULL, 0, NULL );
EV_SET( &ev, SIGTERM, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
kevent( kq, &ev, 1, NULL, 0, NULL );
EV_SET( &ev, SIGCHLD, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
kevent( kq, &ev, 1, NULL, 0, NULL );
EV_SET( &ev, SIGHUP, EVFILT_SIGNAL, EV_ADD, 0, 0, nullptr);
kevent( kq, &ev, 1, nullptr, 0, nullptr );
EV_SET( &ev, SIGINT, EVFILT_SIGNAL, EV_ADD, 0, 0, nullptr);
kevent( kq, &ev, 1, nullptr, 0, nullptr );
EV_SET( &ev, SIGTERM, EVFILT_SIGNAL, EV_ADD, 0, 0, nullptr);
kevent( kq, &ev, 1, nullptr, 0, nullptr );
EV_SET( &ev, SIGCHLD, EVFILT_SIGNAL, EV_ADD, 0, 0, nullptr);
kevent( kq, &ev, 1, nullptr, 0, nullptr );
int confd_fd = -1;
int conff_fd = -1;
@ -1383,7 +1383,7 @@ int main(int argc, char** argv) {
pselect, but none blocks all signals while processing events */
sigprocmask(SIG_SETMASK, &full_mask, &normal_mask);
#elif defined(__APPLE__) || defined(__FreeBSD__)
sigprocmask(0, NULL, &normal_mask);
sigprocmask(0, nullptr, &normal_mask);
#endif
#if defined(__APPLE__) || defined(__FreeBSD__)
@ -1474,10 +1474,10 @@ int main(int argc, char** argv) {
srfds = rfds;
nfds = 0;
if(timeout < 0) {
nfds = pselect(maxfd+1, &srfds, NULL, NULL, NULL, &normal_mask);
nfds = pselect(maxfd+1, &srfds, nullptr, nullptr, nullptr, &normal_mask);
}
else if(timeout > 0) {
nfds = pselect(maxfd+1, &srfds, NULL, NULL, &tv, &normal_mask);
nfds = pselect(maxfd+1, &srfds, nullptr, nullptr, &tv, &normal_mask);
}
if(nfds == 0) {
@ -1486,10 +1486,10 @@ int main(int argc, char** argv) {
#elif defined(__APPLE__) || defined(__FreeBSD__)
int nev = 0;
if(timeout < 0) {
nev = kevent( kq, NULL, 0, &ev, 1, NULL );
nev = kevent( kq, nullptr, 0, &ev, 1, nullptr );
}
else if(timeout > 0) {
nev = kevent( kq, NULL, 0, &ev, 1, &tv );
nev = kevent( kq, nullptr, 0, &ev, 1, &tv );
}
if(nev == 0) {
@ -1503,8 +1503,8 @@ int main(int argc, char** argv) {
// This could be the conf dir or conf file
if ( ev.ident == confd_fd ) {
/* Changes in the directory holding the conf file; schedule a future timeout to reset watches and reload the conf */
EV_SET( &timeout, 1, EVFILT_TIMER, EV_ADD | EV_ONESHOT, 0, 200, NULL );
kevent( kq, &timeout, 1, NULL, 0, NULL );
EV_SET( &timeout, 1, EVFILT_TIMER, EV_ADD | EV_ONESHOT, 0, 200, nullptr );
kevent( kq, &timeout, 1, nullptr, 0, nullptr );
} else {
/* Direct writes to the conf file; reload! */
reload = true;
@ -1559,7 +1559,7 @@ int main(int argc, char** argv) {
/* Unblock signals */
signal(SIGCHLD, SIG_IGN);
sigprocmask(SIG_SETMASK, &normal_mask, NULL);
sigprocmask(SIG_SETMASK, &normal_mask, nullptr);
/* If daemonized, setsid() was called earlier so we can just kill our entire new process group */
if(daemonize) {
@ -1578,7 +1578,7 @@ int main(int argc, char** argv) {
/* Wait for all child processes (says POSIX.1-2001) */
/* POSIX.1-2001 specifies that if the disposition of SIGCHLD is set to SIG_IGN, then children that terminate do not become zombies and a call to wait()
will block until all children have terminated, and then fail with errno set to ECHILD */
wait(NULL);
wait(nullptr);
unlink(lockfile.c_str());
exit(0);
@ -1617,7 +1617,7 @@ int main(int argc, char** argv) {
if(search != additional_watch_wds.end() && event->len && search->second.count(event->name)) {
log_msg(SevInfo, "Changes detected on watched symlink `%s': (%d, %#010x)\n", event->name, event->wd, event->mask);
char *redone_confpath = realpath(_confpath.c_str(), NULL);
char *redone_confpath = realpath(_confpath.c_str(), nullptr);
if(!redone_confpath) {
log_msg(SevInfo, "Error calling realpath on `%s', continuing...\n", _confpath.c_str());
// exit(1);

View File

@ -46,7 +46,7 @@ class AsyncFileEIO : public IAsyncFile, public ReferenceCounted<AsyncFileEIO> {
public:
static void init() {
eio_set_max_parallel(FLOW_KNOBS->EIO_MAX_PARALLELISM);
if (eio_init( &eio_want_poll, NULL )) {
if (eio_init( &eio_want_poll, nullptr )) {
TraceEvent("EioInitError").detail("ErrorNo", errno);
throw platform_error();
}
@ -423,8 +423,8 @@ private:
static void eio_want_poll() {
want_poll = 1;
// SOMEDAY: NULL for deferred error, no analysis of correctness (itp)
onMainThreadVoid([](){ poll_eio(); }, NULL, TaskPriority::PollEIO);
// SOMEDAY: nullptr for deferred error, no analysis of correctness (itp)
onMainThreadVoid([](){ poll_eio(); }, nullptr, TaskPriority::PollEIO);
}
static int eio_callback( eio_req* req ) {

View File

@ -55,12 +55,12 @@ public:
HANDLE h = CreateFile( open_filename.c_str(),
GENERIC_READ | ((flags&OPEN_READWRITE) ? GENERIC_WRITE : 0),
FILE_SHARE_READ|FILE_SHARE_WRITE|FILE_SHARE_DELETE, NULL,
FILE_SHARE_READ|FILE_SHARE_WRITE|FILE_SHARE_DELETE, nullptr,
(flags&OPEN_EXCLUSIVE) ? CREATE_NEW :
(flags&OPEN_CREATE) ? OPEN_ALWAYS :
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED | FILE_FLAG_NO_BUFFERING,
NULL );
nullptr );
if (h == INVALID_HANDLE_VALUE) {
bool notFound = GetLastError() == ERROR_FILE_NOT_FOUND;
Error e = notFound ? file_not_found() : io_error();
@ -141,7 +141,7 @@ public:
}
Future<Void> truncate(int64_t size) override {
// FIXME: Possibly use SetFileInformationByHandle( file.native_handle(), FileEndOfFileInfo, ... ) instead
if (!SetFilePointerEx( file.native_handle(), *(LARGE_INTEGER*)&size, NULL, FILE_BEGIN ))
if (!SetFilePointerEx( file.native_handle(), *(LARGE_INTEGER*)&size, nullptr, FILE_BEGIN ))
throw io_error();
if (!SetEndOfFile(file.native_handle()))
throw io_error();

View File

@ -156,7 +156,7 @@ Future<Void> SimpleFailureMonitor::onStateChanged(Endpoint const& endpoint) {
return endpointKnownFailed.onChange(endpoint);
}
FailureStatus SimpleFailureMonitor::getState(Endpoint const& endpoint) {
FailureStatus SimpleFailureMonitor::getState(Endpoint const& endpoint) const {
if (failedEndpoints.count(endpoint))
return FailureStatus(true);
else {
@ -170,7 +170,7 @@ FailureStatus SimpleFailureMonitor::getState(Endpoint const& endpoint) {
}
}
FailureStatus SimpleFailureMonitor::getState(NetworkAddress const& address) {
FailureStatus SimpleFailureMonitor::getState(NetworkAddress const& address) const {
auto a = addressStatus.find(address);
if (a == addressStatus.end())
return FailureStatus();
@ -178,7 +178,7 @@ FailureStatus SimpleFailureMonitor::getState(NetworkAddress const& address) {
return a->second;
}
bool SimpleFailureMonitor::onlyEndpointFailed(Endpoint const& endpoint) {
bool SimpleFailureMonitor::onlyEndpointFailed(Endpoint const& endpoint) const {
if (!failedEndpoints.count(endpoint)) return false;
auto a = addressStatus.find(endpoint.getPrimaryAddress());
if (a == addressStatus.end())
@ -187,7 +187,7 @@ bool SimpleFailureMonitor::onlyEndpointFailed(Endpoint const& endpoint) {
return !a->second.failed;
}
bool SimpleFailureMonitor::permanentlyFailed(Endpoint const& endpoint) {
bool SimpleFailureMonitor::permanentlyFailed(Endpoint const& endpoint) const {
return failedEndpoints.count(endpoint);
}

View File

@ -87,10 +87,10 @@ struct FailureStatus {
class IFailureMonitor {
public:
// Returns the currently known status for the endpoint
virtual FailureStatus getState(Endpoint const& endpoint) = 0;
virtual FailureStatus getState(Endpoint const& endpoint) const = 0;
// Returns the currently known status for the address
virtual FailureStatus getState(NetworkAddress const& address) = 0;
virtual FailureStatus getState(NetworkAddress const& address) const = 0;
// Only use this function when the endpoint is known to be failed
virtual void endpointNotFound(Endpoint const&) = 0;
@ -102,10 +102,10 @@ public:
virtual Future<Void> onDisconnectOrFailure(Endpoint const& endpoint) = 0;
// Returns true if the endpoint is failed but the address of the endpoint is not failed.
virtual bool onlyEndpointFailed(Endpoint const& endpoint) = 0;
virtual bool onlyEndpointFailed(Endpoint const& endpoint) const = 0;
// Returns true if the endpoint will never become available.
virtual bool permanentlyFailed(Endpoint const& endpoint) = 0;
virtual bool permanentlyFailed(Endpoint const& endpoint) const = 0;
// Called by FlowTransport when a connection closes and a prior request or reply might be lost
virtual void notifyDisconnect(NetworkAddress const&) = 0;
@ -140,14 +140,14 @@ public:
SimpleFailureMonitor();
void setStatus(NetworkAddress const& address, FailureStatus const& status);
void endpointNotFound(Endpoint const&);
virtual void notifyDisconnect(NetworkAddress const&);
void notifyDisconnect(NetworkAddress const&) override;
virtual Future<Void> onStateChanged(Endpoint const& endpoint);
virtual FailureStatus getState(Endpoint const& endpoint);
virtual FailureStatus getState(NetworkAddress const& address);
virtual Future<Void> onDisconnectOrFailure(Endpoint const& endpoint);
virtual bool onlyEndpointFailed(Endpoint const& endpoint);
virtual bool permanentlyFailed(Endpoint const& endpoint);
Future<Void> onStateChanged(Endpoint const& endpoint) override;
FailureStatus getState(Endpoint const& endpoint) const override;
FailureStatus getState(NetworkAddress const& address) const override;
Future<Void> onDisconnectOrFailure(Endpoint const& endpoint) override;
bool onlyEndpointFailed(Endpoint const& endpoint) const override;
bool permanentlyFailed(Endpoint const& endpoint) const override;
void reset();

View File

@ -177,7 +177,7 @@ Future< REPLY_TYPE(Request) > loadBalance(
Request request = Request(),
TaskPriority taskID = TaskPriority::DefaultPromiseEndpoint,
bool atMostOnce = false, // if true, throws request_maybe_delivered() instead of retrying automatically
QueueModel* model = NULL)
QueueModel* model = nullptr)
{
state Future<Optional<REPLY_TYPE(Request)>> firstRequest;
state Optional<uint64_t> firstRequestEndpoint;
@ -296,7 +296,7 @@ Future< REPLY_TYPE(Request) > loadBalance(
}
// Find an alternative, if any, that is not failed, starting with nextAlt
state RequestStream<Request> const* stream = NULL;
state RequestStream<Request> const* stream = nullptr;
for(int alternativeNum=0; alternativeNum<alternatives->size(); alternativeNum++) {
int useAlt = nextAlt;
if( nextAlt == startAlt )
@ -309,7 +309,7 @@ Future< REPLY_TYPE(Request) > loadBalance(
break;
nextAlt = (nextAlt+1) % alternatives->size();
if(nextAlt == startAlt) triedAllOptions = true;
stream=NULL;
stream=nullptr;
}
if(!stream && !firstRequest.isValid() ) {
@ -493,7 +493,7 @@ Future< REPLY_TYPE(Request) > basicLoadBalance(
state int useAlt;
loop {
// Find an alternative, if any, that is not failed, starting with nextAlt
state RequestStream<Request> const* stream = NULL;
state RequestStream<Request> const* stream = nullptr;
for(int alternativeNum=0; alternativeNum<alternatives->size(); alternativeNum++) {
useAlt = nextAlt;
if( nextAlt == startAlt )
@ -505,7 +505,7 @@ Future< REPLY_TYPE(Request) > basicLoadBalance(
if (!IFailureMonitor::failureMonitor().getState( stream->getEndpoint() ).failed)
break;
nextAlt = (nextAlt+1) % alternatives->size();
stream=NULL;
stream=nullptr;
}
if(!stream) {

View File

@ -28,11 +28,11 @@
template <class T>
Reference<T> loadPlugin( std::string const& plugin_name ) {
void *(*get_plugin)(const char*) = NULL;
void *(*get_plugin)(const char*) = nullptr;
void* plugin = loadLibrary( plugin_name.c_str() );
if (plugin)
get_plugin = (void*(*)(const char*))loadFunction( plugin, "get_plugin" );
return (get_plugin) ? Reference<T>( (T*)get_plugin( T::get_plugin_type_name_and_version() ) ) : Reference<T>( NULL );
return (get_plugin) ? Reference<T>( (T*)get_plugin( T::get_plugin_type_name_and_version() ) ) : Reference<T>( nullptr );
}
#endif

View File

@ -63,9 +63,9 @@ ProcessClass::Fitness ProcessClass::machineClassFitness( ClusterRole role ) cons
default:
return ProcessClass::NeverAssign;
}
case ProcessClass::Proxy:
case ProcessClass::CommitProxy:
switch( _class ) {
case ProcessClass::ProxyClass:
case ProcessClass::CommitProxyClass:
return ProcessClass::BestFit;
case ProcessClass::StatelessClass:
return ProcessClass::GoodFit;
@ -92,7 +92,7 @@ ProcessClass::Fitness ProcessClass::machineClassFitness( ClusterRole role ) cons
return ProcessClass::GoodFit;
case ProcessClass::UnsetClass:
return ProcessClass::UnsetFit;
case ProcessClass::ProxyClass:
case ProcessClass::CommitProxyClass:
return ProcessClass::OkayFit;
case ProcessClass::ResolutionClass:
return ProcessClass::OkayFit;
@ -192,7 +192,7 @@ ProcessClass::Fitness ProcessClass::machineClassFitness( ClusterRole role ) cons
return ProcessClass::OkayFit;
case ProcessClass::TransactionClass:
return ProcessClass::OkayFit;
case ProcessClass::ProxyClass:
case ProcessClass::CommitProxyClass:
return ProcessClass::OkayFit;
case ProcessClass::GrvProxyClass:
return ProcessClass::OkayFit;

View File

@ -33,7 +33,7 @@ struct ProcessClass {
TransactionClass,
ResolutionClass,
TesterClass,
ProxyClass, // Process class of CommitProxy
CommitProxyClass,
GrvProxyClass,
MasterClass,
StatelessClass,
@ -53,7 +53,7 @@ struct ProcessClass {
enum ClusterRole {
Storage,
TLog,
Proxy,
CommitProxy,
GrvProxy,
Master,
Resolver,
@ -77,7 +77,12 @@ public:
if (s=="storage") _class = StorageClass;
else if (s=="transaction") _class = TransactionClass;
else if (s=="resolution") _class = ResolutionClass;
else if (s=="proxy") _class = ProxyClass;
else if (s=="commit_proxy") _class = CommitProxyClass;
else if (s=="proxy") {
_class = CommitProxyClass;
printf("WARNING: 'proxy' machine class is deprecated and will be automatically converted "
"'commit_proxy' machine class. Please use 'grv_proxy' or 'commit_proxy' specifically\n");
}
else if (s=="grv_proxy") _class = GrvProxyClass;
else if (s=="master") _class = MasterClass;
else if (s=="test") _class = TesterClass;
@ -99,7 +104,12 @@ public:
if (classStr=="storage") _class = StorageClass;
else if (classStr=="transaction") _class = TransactionClass;
else if (classStr=="resolution") _class = ResolutionClass;
else if (classStr=="proxy") _class = ProxyClass;
else if (classStr=="commit_proxy") _class = CommitProxyClass;
else if (classStr=="proxy") {
_class = CommitProxyClass;
printf("WARNING: 'proxy' machine class is deprecated and will be automatically converted "
"'commit_proxy' machine class. Please use 'grv_proxy' or 'commit_proxy' specifically\n");
}
else if (classStr=="grv_proxy") _class = GrvProxyClass;
else if (classStr=="master") _class = MasterClass;
else if (classStr=="test") _class = TesterClass;
@ -137,7 +147,7 @@ public:
case StorageClass: return "storage";
case TransactionClass: return "transaction";
case ResolutionClass: return "resolution";
case ProxyClass: return "proxy";
case CommitProxyClass: return "commit_proxy";
case GrvProxyClass: return "grv_proxy";
case MasterClass: return "master";
case TesterClass: return "test";

View File

@ -65,7 +65,7 @@ Future< Reference<class IAsyncFile> > Net2FileSystem::open( std::string filename
// EIO.
if ((flags & IAsyncFile::OPEN_UNBUFFERED) && !(flags & IAsyncFile::OPEN_NO_AIO) &&
!FLOW_KNOBS->DISABLE_POSIX_KERNEL_AIO)
f = AsyncFileKAIO::open(filename, flags, mode, NULL);
f = AsyncFileKAIO::open(filename, flags, mode, nullptr);
else
#endif
f = Net2AsyncFile::open(filename, flags, mode, static_cast<boost::asio::io_service*> ((void*) g_network->global(INetwork::enASIOService)));

View File

@ -1057,8 +1057,8 @@ void sleeptest() {
timespec ts;
ts.tv_sec = times[j] / 1000000;
ts.tv_nsec = (times[j] % 1000000)*1000;
clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL);
//nanosleep(&ts, NULL);
clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, nullptr);
//nanosleep(&ts, nullptr);
}
double t = timer() - b;
printf("Sleep test (%dus x %d): %0.1f\n", times[j], n, double(t)/n*1e6);

View File

@ -115,7 +115,7 @@ public:
Future<T> getFuture() const { sav->addFutureRef(); return Future<T>(sav); }
bool isSet() { return sav->isSet(); }
bool isValid() const { return sav != NULL; }
bool isValid() const { return sav != nullptr; }
ReplyPromise() : sav(new NetSAV<T>(0, 1)) {}
ReplyPromise(const ReplyPromise& rhs) : sav(rhs.sav) { sav->addPromiseRef(); }
ReplyPromise(ReplyPromise&& rhs) noexcept : sav(rhs.sav) { rhs.sav = 0; }
@ -144,7 +144,7 @@ public:
}
// Beware, these operations are very unsafe
SAV<T>* extractRawPointer() { auto ptr = sav; sav = NULL; return ptr; }
SAV<T>* extractRawPointer() { auto ptr = sav; sav = nullptr; return ptr; }
explicit ReplyPromise<T>(SAV<T>* ptr) : sav(ptr) {}
int getFutureReferenceCount() const { return sav->getFutureReferenceCount(); }

View File

@ -1062,7 +1062,7 @@ public:
}
}
}
return canKillProcesses(processesLeft, processesDead, KillInstantly, NULL);
return canKillProcesses(processesLeft, processesDead, KillInstantly, nullptr);
}
virtual bool datacenterDead(Optional<Standalone<StringRef>> dcId) const
@ -1622,7 +1622,7 @@ public:
}
Sim2() : time(0.0), timerTime(0.0), taskCount(0), yielded(false), yield_limit(0), currentTaskID(TaskPriority::Zero) {
// Not letting currentProcess be NULL eliminates some annoying special cases
// Not letting currentProcess be nullptr eliminates some annoying special cases
currentProcess = new ProcessInfo("NoMachine", LocalityData(Optional<Standalone<StringRef>>(), StringRef(), StringRef(), StringRef()), ProcessClass(), {NetworkAddress()}, this, "", "");
g_network = net2 = newNet2(TLSConfig(), false, true);
g_network->addStopCallback( Net2FileSystem::stop );
@ -1813,12 +1813,12 @@ Future<Void> waitUntilDiskReady( Reference<DiskParameters> diskParameters, int64
int sf_open( const char* filename, int flags, int convFlags, int mode ) {
HANDLE wh = CreateFile( filename, GENERIC_READ | ((flags&IAsyncFile::OPEN_READWRITE) ? GENERIC_WRITE : 0),
FILE_SHARE_READ|FILE_SHARE_WRITE|FILE_SHARE_DELETE, NULL,
FILE_SHARE_READ|FILE_SHARE_WRITE|FILE_SHARE_DELETE, nullptr,
(flags&IAsyncFile::OPEN_EXCLUSIVE) ? CREATE_NEW :
(flags&IAsyncFile::OPEN_CREATE) ? OPEN_ALWAYS :
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
NULL );
nullptr );
int h = -1;
if (wh != INVALID_HANDLE_VALUE) h = _open_osfhandle( (intptr_t)wh, convFlags );
else errno = GetLastError() == ERROR_FILE_NOT_FOUND ? ENOENT : EFAULT;

View File

@ -34,7 +34,7 @@ enum ClogMode { ClogDefault, ClogAll, ClogSend, ClogReceive };
class ISimulator : public INetwork {
public:
ISimulator() : desiredCoordinators(1), physicalDatacenters(1), processesPerMachine(0), listenersPerProcess(1), isStopped(false), lastConnectionFailure(0), connectionFailuresDisableDuration(0), speedUpSimulation(false), allSwapsDisabled(false), backupAgents(WaitForType), drAgents(WaitForType), extraDB(NULL), allowLogSetKills(true), usableRegions(1) {}
ISimulator() : desiredCoordinators(1), physicalDatacenters(1), processesPerMachine(0), listenersPerProcess(1), isStopped(false), lastConnectionFailure(0), connectionFailuresDisableDuration(0), speedUpSimulation(false), allSwapsDisabled(false), backupAgents(WaitForType), drAgents(WaitForType), extraDB(nullptr), allowLogSetKills(true), usableRegions(1) {}
// Order matters!
enum KillType { KillInstantly, InjectFaults, RebootAndDelete, RebootProcessAndDelete, Reboot, RebootProcess, None };
@ -97,7 +97,8 @@ public:
case ProcessClass::StorageClass: return true;
case ProcessClass::TransactionClass: return true;
case ProcessClass::ResolutionClass: return false;
case ProcessClass::ProxyClass: return false;
case ProcessClass::CommitProxyClass:
return false;
case ProcessClass::GrvProxyClass:
return false;
case ProcessClass::MasterClass:
@ -163,9 +164,9 @@ public:
virtual void rebootProcess(Optional<Standalone<StringRef>> zoneId, bool allProcesses ) = 0;
virtual void rebootProcess( ProcessInfo* process, KillType kt ) = 0;
virtual void killInterface( NetworkAddress address, KillType ) = 0;
virtual bool killMachine(Optional<Standalone<StringRef>> machineId, KillType kt, bool forceKill = false, KillType* ktFinal = NULL) = 0;
virtual bool killZone(Optional<Standalone<StringRef>> zoneId, KillType kt, bool forceKill = false, KillType* ktFinal = NULL) = 0;
virtual bool killDataCenter(Optional<Standalone<StringRef>> dcId, KillType kt, bool forceKill = false, KillType* ktFinal = NULL) = 0;
virtual bool killMachine(Optional<Standalone<StringRef>> machineId, KillType kt, bool forceKill = false, KillType* ktFinal = nullptr) = 0;
virtual bool killZone(Optional<Standalone<StringRef>> zoneId, KillType kt, bool forceKill = false, KillType* ktFinal = nullptr) = 0;
virtual bool killDataCenter(Optional<Standalone<StringRef>> dcId, KillType kt, bool forceKill = false, KillType* ktFinal = nullptr) = 0;
//virtual KillType getMachineKillState( UID zoneID ) = 0;
virtual bool canKillProcesses(std::vector<ProcessInfo*> const& availableProcesses, std::vector<ProcessInfo*> const& deadProcesses, KillType kt, KillType* newKillType) const = 0;
virtual bool isAvailable() const = 0;

View File

@ -43,17 +43,16 @@ Reference<StorageInfo> getStorageInfo(UID id, std::map<UID, Reference<StorageInf
}
// It is incredibly important that any modifications to txnStateStore are done in such a way that
// the same operations will be done on all proxies at the same time. Otherwise, the data stored in
// txnStateStore will become corrupted.
// the same operations will be done on all commit proxies at the same time. Otherwise, the data
// stored in txnStateStore will become corrupted.
void applyMetadataMutations(SpanID const& spanContext, UID const& dbgid, Arena& arena,
VectorRef<MutationRef> const& mutations, IKeyValueStore* txnStateStore,
LogPushData* toCommit, bool& confChange, Reference<ILogSystem> logSystem,
Version popVersion, KeyRangeMap<std::set<Key>>* vecBackupKeys,
KeyRangeMap<ServerCacheInfo>* keyInfo, KeyRangeMap<bool>* cacheInfo,
std::map<Key, ApplyMutationsData>* uid_applyMutationsData,
RequestStream<CommitTransactionRequest> commit, Database cx, NotifiedVersion* commitVersion,
std::map<UID, Reference<StorageInfo>>* storageCache, std::map<Tag, Version>* tag_popped,
bool initialCommit) {
KeyRangeMap<ServerCacheInfo>* keyInfo, KeyRangeMap<bool>* cacheInfo, std::map<Key,
ApplyMutationsData>* uid_applyMutationsData, RequestStream<CommitTransactionRequest> commit,
Database cx, NotifiedVersion* commitVersion, std::map<UID, Reference<StorageInfo>>* storageCache,
std::map<Tag, Version>* tag_popped, bool initialCommit) {
//std::map<keyRef, vector<uint16_t>> cacheRangeInfo;
std::map<KeyRef, MutationRef> cachedRangeInfo;
if (toCommit) {

View File

@ -21,7 +21,7 @@
#include "fdbclient/BackupAgent.actor.h"
#include "fdbclient/BackupContainer.h"
#include "fdbclient/DatabaseContext.h"
#include "fdbclient/MasterProxyInterface.h"
#include "fdbclient/CommitProxyInterface.h"
#include "fdbclient/SystemData.h"
#include "fdbserver/BackupInterface.h"
#include "fdbserver/BackupProgress.actor.h"

Some files were not shown because too many files have changed in this diff Show More