Merge remote-tracking branch 'origin/master' into simplify-global-knobs

This commit is contained in:
sfc-gh-tclinkenbeard 2021-01-13 14:39:35 -08:00
commit 8ff14878fe
124 changed files with 6823 additions and 4317 deletions

View File

@ -18,6 +18,7 @@
* limitations under the License.
*/
#include <cstdint>
#define FDB_API_VERSION 700
#define FDB_INCLUDE_LEGACY_TYPES
@ -226,6 +227,11 @@ fdb_error_t fdb_future_get_int64( FDBFuture* f, int64_t* out_value ) {
CATCH_AND_RETURN( *out_value = TSAV(int64_t, f)->get(); );
}
extern "C" DLLEXPORT
fdb_error_t fdb_future_get_uint64(FDBFuture *f, uint64_t *out) {
CATCH_AND_RETURN( *out = TSAV(uint64_t, f)->get(); );
}
extern "C" DLLEXPORT
fdb_error_t fdb_future_get_key( FDBFuture* f, uint8_t const** out_key,
int* out_key_length ) {
@ -598,6 +604,11 @@ FDBFuture* fdb_transaction_get_approximate_size(FDBTransaction* tr) {
return (FDBFuture*)TXN(tr)->getApproximateSize().extractPtr();
}
extern "C" DLLEXPORT
FDBFuture* fdb_get_server_protocol(const char* clusterFilePath){
return (FDBFuture*)( API->getServerProtocol(clusterFilePath ? clusterFilePath : "").extractPtr() );
}
extern "C" DLLEXPORT
FDBFuture* fdb_transaction_get_versionstamp( FDBTransaction* tr )
{

View File

@ -136,6 +136,9 @@ extern "C" {
DLLEXPORT WARN_UNUSED_RESULT fdb_error_t
fdb_future_get_int64( FDBFuture* f, int64_t* out );
DLLEXPORT WARN_UNUSED_RESULT fdb_error_t
fdb_future_get_uint64( FDBFuture* f, uint64_t* out );
DLLEXPORT WARN_UNUSED_RESULT fdb_error_t
fdb_future_get_key( FDBFuture* f, uint8_t const** out_key,
int* out_key_length );
@ -248,6 +251,9 @@ extern "C" {
DLLEXPORT WARN_UNUSED_RESULT FDBFuture*
fdb_transaction_get_approximate_size(FDBTransaction* tr);
DLLEXPORT WARN_UNUSED_RESULT FDBFuture*
fdb_get_server_protocol(const char* clusterFilePath);
DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_get_versionstamp( FDBTransaction* tr );
DLLEXPORT WARN_UNUSED_RESULT FDBFuture*

View File

@ -365,7 +365,7 @@ int run_op_get(FDBTransaction* transaction, char* keystr, char* valstr, int snap
return FDB_SUCCESS;
}
int run_op_getrange(FDBTransaction* transaction, char* keystr, char* keystr2, char* valstr, int snapshot, int reverse) {
int run_op_getrange(FDBTransaction* transaction, char* keystr, char* keystr2, char* valstr, int snapshot, int reverse, FDBStreamingMode streaming_mode) {
FDBFuture* f;
fdb_error_t err;
FDBKeyValue const* out_kv;
@ -374,7 +374,7 @@ int run_op_getrange(FDBTransaction* transaction, char* keystr, char* keystr2, ch
f = fdb_transaction_get_range(transaction, FDB_KEYSEL_FIRST_GREATER_OR_EQUAL((uint8_t*)keystr, strlen(keystr)),
FDB_KEYSEL_LAST_LESS_OR_EQUAL((uint8_t*)keystr2, strlen(keystr2)) + 1, 0 /* limit */,
0 /* target_bytes */, FDB_STREAMING_MODE_WANT_ALL /* FDBStreamingMode */,
0 /* target_bytes */, streaming_mode /* FDBStreamingMode */,
0 /* iteration */, snapshot, reverse /* reverse */);
fdb_wait_and_handle_error(fdb_transaction_get_range, f, transaction);
@ -488,13 +488,13 @@ retryTxn:
rc = run_op_get(transaction, keystr, valstr, 0);
break;
case OP_GETRANGE:
rc = run_op_getrange(transaction, keystr, keystr2, valstr, 0, args->txnspec.ops[i][OP_REVERSE]);
rc = run_op_getrange(transaction, keystr, keystr2, valstr, 0, args->txnspec.ops[i][OP_REVERSE], args->streaming_mode);
break;
case OP_SGET:
rc = run_op_get(transaction, keystr, valstr, 1);
break;
case OP_SGETRANGE:
rc = run_op_getrange(transaction, keystr, keystr2, valstr, 1, args->txnspec.ops[i][OP_REVERSE]);
rc = run_op_getrange(transaction, keystr, keystr2, valstr, 1, args->txnspec.ops[i][OP_REVERSE], args->streaming_mode);
break;
case OP_UPDATE:
randstr(valstr, args->value_length + 1);
@ -1233,6 +1233,7 @@ int init_args(mako_args_t* args) {
args->trace = 0;
args->tracepath[0] = '\0';
args->traceformat = 0; /* default to client's default (XML) */
args->streaming_mode = FDB_STREAMING_MODE_WANT_ALL;
args->txntrace = 0;
args->txntagging = 0;
memset(args->txntagging_prefix, 0, TAGPREFIXLENGTH_MAX);
@ -1397,6 +1398,7 @@ void usage() {
printf("%-24s %s\n", " --txntagging_prefix", "Specify the prefix of transaction tag - mako${txntagging_prefix} (Default: '')");
printf("%-24s %s\n", " --knobs=KNOBS", "Set client knobs");
printf("%-24s %s\n", " --flatbuffers", "Use flatbuffers");
printf("%-24s %s\n", " --streaming", "Streaming mode: all (default), iterator, small, medium, large, serial");
}
/* parse benchmark paramters */
@ -1428,6 +1430,7 @@ int parse_args(int argc, char* argv[], mako_args_t* args) {
{ "knobs", required_argument, NULL, ARG_KNOBS },
{ "tracepath", required_argument, NULL, ARG_TRACEPATH },
{ "trace_format", required_argument, NULL, ARG_TRACEFORMAT },
{ "streaming", required_argument, NULL, ARG_STREAMING_MODE },
{ "txntrace", required_argument, NULL, ARG_TXNTRACE },
/* no args */
{ "help", no_argument, NULL, 'h' },
@ -1547,7 +1550,25 @@ int parse_args(int argc, char* argv[], mako_args_t* args) {
args->traceformat = 0;
} else {
fprintf(stderr, "Error: Invalid trace_format %s\n", optarg);
exit(0);
return -1;
}
break;
case ARG_STREAMING_MODE:
if (strncmp(optarg, "all", 3) == 0) {
args->streaming_mode = FDB_STREAMING_MODE_WANT_ALL;
} else if (strncmp(optarg, "iterator", 8) == 0) {
args->streaming_mode = FDB_STREAMING_MODE_ITERATOR;
} else if (strncmp(optarg, "small", 5) == 0) {
args->streaming_mode = FDB_STREAMING_MODE_SMALL;
} else if (strncmp(optarg, "medium", 6) == 0) {
args->streaming_mode = FDB_STREAMING_MODE_MEDIUM;
} else if (strncmp(optarg, "large", 5) == 0) {
args->streaming_mode = FDB_STREAMING_MODE_LARGE;
} else if (strncmp(optarg, "serial", 6) == 0) {
args->streaming_mode = FDB_STREAMING_MODE_SERIAL;
} else {
fprintf(stderr, "Error: Invalid streaming mode %s\n", optarg);
return -1;
}
break;
case ARG_TXNTRACE:

View File

@ -77,7 +77,8 @@ enum Arguments {
ARG_TPSCHANGE,
ARG_TXNTRACE,
ARG_TXNTAGGING,
ARG_TXNTAGGINGPREFIX
ARG_TXNTAGGINGPREFIX,
ARG_STREAMING_MODE
};
enum TPSChangeTypes { TPS_SIN, TPS_SQUARE, TPS_PULSE };
@ -129,6 +130,7 @@ typedef struct {
int txntrace;
int txntagging;
char txntagging_prefix[TAGPREFIXLENGTH_MAX];
FDBStreamingMode streaming_mode;
} mako_args_t;
/* shared memory */

View File

@ -39,7 +39,7 @@
#pragma once
#define FDB_API_VERSION 620
#define FDB_API_VERSION 700
#include <foundationdb/fdb_c.h>
#include <string>
@ -89,7 +89,6 @@ class Int64Future : public Future {
Int64Future(FDBFuture* f) : Future(f) {}
};
class KeyFuture : public Future {
public:
// Call this function instead of fdb_future_get_key when using the KeyFuture
@ -148,7 +147,6 @@ class EmptyFuture : public Future {
EmptyFuture(FDBFuture* f) : Future(f) {}
};
// Wrapper around FDBTransaction, providing the same set of calls as the C API.
// Handles cleanup of memory, removing the need to call
// fdb_transaction_destroy.

View File

@ -20,7 +20,7 @@
// Unit tests for API setup, network initialization functions from the FDB C API.
#define FDB_API_VERSION 620
#define FDB_API_VERSION 700
#include <foundationdb/fdb_c.h>
#include <iostream>
#include <thread>
@ -42,13 +42,13 @@ TEST_CASE("setup") {
CHECK(err);
// Select current API version
fdb_check(fdb_select_api_version(620));
fdb_check(fdb_select_api_version(700));
// Error to call again after a successful return
err = fdb_select_api_version(620);
err = fdb_select_api_version(700);
CHECK(err);
CHECK(fdb_get_max_api_version() >= 620);
CHECK(fdb_get_max_api_version() >= 700);
fdb_check(fdb_setup_network());
// Calling a second time should fail

View File

@ -20,7 +20,7 @@
// Unit tests for the FoundationDB C API.
#define FDB_API_VERSION 620
#define FDB_API_VERSION 700
#include <foundationdb/fdb_c.h>
#include <assert.h>
#include <string.h>
@ -55,6 +55,7 @@ FDBDatabase *fdb_open_database(const char *clusterFile) {
static FDBDatabase *db = nullptr;
static std::string prefix;
static std::string clusterFilePath = "";
std::string key(const std::string& key) {
return prefix + key;
@ -1537,6 +1538,15 @@ TEST_CASE("fdb_transaction_get_approximate_size") {
}
}
TEST_CASE("fdb_get_server_protocol") {
FDBFuture* protocolFuture = fdb_get_server_protocol(clusterFilePath.c_str());
uint64_t out;
fdb_check(fdb_future_block_until_ready(protocolFuture));
fdb_check(fdb_future_get_uint64(protocolFuture, &out));
fdb_future_destroy(protocolFuture);
}
TEST_CASE("fdb_transaction_watch read_your_writes_disable") {
// Watches created on a transaction with the option READ_YOUR_WRITES_DISABLE
// should return a watches_disabled error.
@ -1743,6 +1753,220 @@ TEST_CASE("fdb_transaction_add_conflict_range") {
CHECK(success);
}
TEST_CASE("special-key-space valid transaction ID") {
auto value = get_value("\xff\xff/tracing/transaction_id", /* snapshot */ false, {});
REQUIRE(value.has_value());
uint64_t transaction_id = std::stoul(value.value());
CHECK(transaction_id > 0);
}
TEST_CASE("special-key-space custom transaction ID") {
fdb::Transaction tr(db);
fdb_check(tr.set_option(FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES,
nullptr, 0));
while (1) {
tr.set("\xff\xff/tracing/transaction_id", std::to_string(ULONG_MAX));
fdb::ValueFuture f1 = tr.get("\xff\xff/tracing/transaction_id",
/* snapshot */ false);
fdb_error_t err = wait_future(f1);
if (err) {
fdb::EmptyFuture f2 = tr.on_error(err);
fdb_check(wait_future(f2));
continue;
}
int out_present;
char *val;
int vallen;
fdb_check(f1.get(&out_present, (const uint8_t **)&val, &vallen));
REQUIRE(out_present);
uint64_t transaction_id = std::stoul(std::string(val, vallen));
CHECK(transaction_id == ULONG_MAX);
break;
}
}
TEST_CASE("special-key-space set transaction ID after write") {
fdb::Transaction tr(db);
fdb_check(tr.set_option(FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES,
nullptr, 0));
while (1) {
tr.set(key("foo"), "bar");
tr.set("\xff\xff/tracing/transaction_id", "0");
fdb::ValueFuture f1 = tr.get("\xff\xff/tracing/transaction_id",
/* snapshot */ false);
fdb_error_t err = wait_future(f1);
if (err) {
fdb::EmptyFuture f2 = tr.on_error(err);
fdb_check(wait_future(f2));
continue;
}
int out_present;
char *val;
int vallen;
fdb_check(f1.get(&out_present, (const uint8_t **)&val, &vallen));
REQUIRE(out_present);
uint64_t transaction_id = std::stoul(std::string(val, vallen));
CHECK(transaction_id != 0);
break;
}
}
TEST_CASE("special-key-space set token after write") {
fdb::Transaction tr(db);
fdb_check(tr.set_option(FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES,
nullptr, 0));
while (1) {
tr.set(key("foo"), "bar");
tr.set("\xff\xff/tracing/token", "false");
fdb::ValueFuture f1 = tr.get("\xff\xff/tracing/token",
/* snapshot */ false);
fdb_error_t err = wait_future(f1);
if (err) {
fdb::EmptyFuture f2 = tr.on_error(err);
fdb_check(wait_future(f2));
continue;
}
int out_present;
char *val;
int vallen;
fdb_check(f1.get(&out_present, (const uint8_t **)&val, &vallen));
REQUIRE(out_present);
uint64_t token = std::stoul(std::string(val, vallen));
CHECK(token != 0);
break;
}
}
TEST_CASE("special-key-space valid token") {
auto value = get_value("\xff\xff/tracing/token", /* snapshot */ false, {});
REQUIRE(value.has_value());
uint64_t token = std::stoul(value.value());
CHECK(token > 0);
}
TEST_CASE("special-key-space disable tracing") {
fdb::Transaction tr(db);
fdb_check(tr.set_option(FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES,
nullptr, 0));
while (1) {
tr.set("\xff\xff/tracing/token", "false");
fdb::ValueFuture f1 = tr.get("\xff\xff/tracing/token",
/* snapshot */ false);
fdb_error_t err = wait_future(f1);
if (err) {
fdb::EmptyFuture f2 = tr.on_error(err);
fdb_check(wait_future(f2));
continue;
}
int out_present;
char *val;
int vallen;
fdb_check(f1.get(&out_present, (const uint8_t **)&val, &vallen));
REQUIRE(out_present);
uint64_t token = std::stoul(std::string(val, vallen));
CHECK(token == 0);
break;
}
}
TEST_CASE("FDB_DB_OPTION_TRANSACTION_TRACE_DISABLE") {
fdb_check(fdb_database_set_option(db, FDB_DB_OPTION_TRANSACTION_TRACE_DISABLE, nullptr, 0));
auto value = get_value("\xff\xff/tracing/token", /* snapshot */ false, {});
REQUIRE(value.has_value());
uint64_t token = std::stoul(value.value());
CHECK(token == 0);
fdb_check(fdb_database_set_option(db, FDB_DB_OPTION_TRANSACTION_TRACE_ENABLE, nullptr, 0));
}
TEST_CASE("FDB_DB_OPTION_TRANSACTION_TRACE_DISABLE enable tracing for transaction") {
fdb_check(fdb_database_set_option(db, FDB_DB_OPTION_TRANSACTION_TRACE_DISABLE, nullptr, 0));
fdb::Transaction tr(db);
fdb_check(tr.set_option(FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES,
nullptr, 0));
while (1) {
tr.set("\xff\xff/tracing/token", "true");
fdb::ValueFuture f1 = tr.get("\xff\xff/tracing/token",
/* snapshot */ false);
fdb_error_t err = wait_future(f1);
if (err) {
fdb::EmptyFuture f2 = tr.on_error(err);
fdb_check(wait_future(f2));
continue;
}
int out_present;
char *val;
int vallen;
fdb_check(f1.get(&out_present, (const uint8_t **)&val, &vallen));
REQUIRE(out_present);
uint64_t token = std::stoul(std::string(val, vallen));
CHECK(token > 0);
break;
}
fdb_check(fdb_database_set_option(db, FDB_DB_OPTION_TRANSACTION_TRACE_ENABLE, nullptr, 0));
}
TEST_CASE("special-key-space tracing get range") {
std::string tracingBegin = "\xff\xff/tracing/";
std::string tracingEnd = "\xff\xff/tracing0";
fdb::Transaction tr(db);
fdb_check(tr.set_option(FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES,
nullptr, 0));
while (1) {
fdb::KeyValueArrayFuture f1 = tr.get_range(
FDB_KEYSEL_FIRST_GREATER_OR_EQUAL(
(const uint8_t *)tracingBegin.c_str(),
tracingBegin.size()
),
FDB_KEYSEL_LAST_LESS_THAN(
(const uint8_t *)tracingEnd.c_str(),
tracingEnd.size()
) + 1, /* limit */ 0, /* target_bytes */ 0,
/* FDBStreamingMode */ FDB_STREAMING_MODE_WANT_ALL, /* iteration */ 0,
/* snapshot */ false, /* reverse */ 0);
fdb_error_t err = wait_future(f1);
if (err) {
fdb::EmptyFuture f2 = tr.on_error(err);
fdb_check(wait_future(f2));
continue;
}
FDBKeyValue const *out_kv;
int out_count;
int out_more;
fdb_check(f1.get(&out_kv, &out_count, &out_more));
CHECK(!out_more);
CHECK(out_count == 2);
CHECK(std::string((char *)out_kv[0].key, out_kv[0].key_length) == tracingBegin + "token");
CHECK(std::stoul(std::string((char *)out_kv[0].value, out_kv[0].value_length)) > 0);
CHECK(std::string((char *)out_kv[1].key, out_kv[1].key_length) == tracingBegin + "transaction_id");
CHECK(std::stoul(std::string((char *)out_kv[1].value, out_kv[1].value_length)) > 0);
break;
}
}
TEST_CASE("fdb_error_predicate") {
CHECK(fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE, 1007)); // transaction_too_old
CHECK(fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE, 1020)); // not_committed
@ -1785,10 +2009,7 @@ TEST_CASE("fdb_error_predicate") {
CHECK(!fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE_NOT_COMMITTED, 1040)); // proxy_memory_limit_exceeded
}
// Feature not live yet, re-enable when checking if a blocking call is made
// from the network thread is live.
TEST_CASE("block_from_callback"
* doctest::skip(true)) {
TEST_CASE("block_from_callback") {
fdb::Transaction tr(db);
fdb::ValueFuture f1 = tr.get("foo", /*snapshot*/ true);
struct Context {
@ -1803,7 +2024,7 @@ TEST_CASE("block_from_callback"
fdb::ValueFuture f2 = context->tr->get("bar", /*snapshot*/ true);
fdb_error_t error = f2.block_until_ready();
if (error) {
CHECK(error == /*blocked_from_network_thread*/ 2025);
CHECK(error == /*blocked_from_network_thread*/ 2026);
}
context->event.set();
},
@ -1821,11 +2042,12 @@ int main(int argc, char **argv) {
doctest::Context context;
fdb_check(fdb_select_api_version(620));
fdb_check(fdb_select_api_version(700));
fdb_check(fdb_setup_network());
std::thread network_thread{ &fdb_run_network };
db = fdb_open_database(argv[1]);
clusterFilePath = std::string(argv[1]);
prefix = argv[2];
int res = context.run();
fdb_database_destroy(db);

View File

@ -95,6 +95,7 @@ def api_version(ver):
'transactional',
'options',
'StreamingMode',
'get_server_protocol'
)
_add_symbols(fdb.impl, list)

View File

@ -733,6 +733,12 @@ class FutureInt64(Future):
self.capi.fdb_future_get_int64(self.fpointer, ctypes.byref(value))
return value.value
class FutureUInt64(Future):
def wait(self):
self.block_until_ready()
value = ctypes.c_uint64()
self.capi.fdb_future_get_uint64(self.fpointer, ctypes.byref(value))
return value.value
class FutureKeyValueArray(Future):
def wait(self):
@ -1417,6 +1423,10 @@ def init_c_api():
_capi.fdb_future_get_int64.restype = ctypes.c_int
_capi.fdb_future_get_int64.errcheck = check_error_code
_capi.fdb_future_get_uint64.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.c_uint64)]
_capi.fdb_future_get_uint64.restype = ctypes.c_uint
_capi.fdb_future_get_uint64.errcheck = check_error_code
_capi.fdb_future_get_key.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.POINTER(ctypes.c_byte)),
ctypes.POINTER(ctypes.c_int)]
_capi.fdb_future_get_key.restype = ctypes.c_int
@ -1521,6 +1531,9 @@ def init_c_api():
_capi.fdb_transaction_get_approximate_size.argtypes = [ctypes.c_void_p]
_capi.fdb_transaction_get_approximate_size.restype = ctypes.c_void_p
_capi.fdb_get_server_protocol.argtypes = [ctypes.c_char_p]
_capi.fdb_get_server_protocol.restype = ctypes.c_void_p
_capi.fdb_transaction_get_versionstamp.argtypes = [ctypes.c_void_p]
_capi.fdb_transaction_get_versionstamp.restype = ctypes.c_void_p
@ -1720,6 +1733,12 @@ open_databases = {}
cacheLock = threading.Lock()
def get_server_protocol(clusterFilePath=None):
with _network_thread_reentrant_lock:
if not _network_thread:
init()
return FutureUInt64(_capi.fdb_get_server_protocol(optionalParamToBytes(clusterFilePath)[0]))
def open(cluster_file=None, event_model=None):
"""Opens the given database (or the default database of the cluster indicated

View File

@ -4,7 +4,8 @@ RUN yum install -y centos-release-scl scl-utils
RUN rpmkeys --import "http://pool.sks-keyservers.net/pks/lookup?op=get&search=0x3fa7e0328081bff6a14da29aa6a19b38d3d831ef"
RUN curl https://download.mono-project.com/repo/centos7-stable.repo | tee /etc/yum.repos.d/mono-centos7-stable.repo
RUN yum install -y curl rpm-build wget git unzip devtoolset-8 devtoolset-8-libubsan-devel devtoolset-8-valgrind-devel \
rh-ruby26 go-toolset-7 rh-git218 rh-python36-devel java-11-openjdk-devel.x86_64 mono-devel dos2unix dpkg rh-python36
rh-ruby26 go-toolset-7 rh-git218 rh-python36-devel java-11-openjdk-devel.x86_64 mono-devel dos2unix dpkg rh-python36 \
lz4 lz4-devel lz4-static
# install Ninja
RUN cd /tmp && curl -L https://github.com/ninja-build/ninja/archive/v1.9.0.zip -o ninja.zip &&\

View File

@ -5,7 +5,6 @@ env_set(USE_DTRACE ON BOOL "Enable dtrace probes on supported platforms")
env_set(USE_VALGRIND OFF BOOL "Compile for valgrind usage")
env_set(USE_VALGRIND_FOR_CTEST ${USE_VALGRIND} BOOL "Use valgrind for ctest")
env_set(ALLOC_INSTRUMENTATION OFF BOOL "Instrument alloc")
env_set(WITH_UNDODB OFF BOOL "Use rr or undodb")
env_set(USE_ASAN OFF BOOL "Compile with address sanitizer")
env_set(USE_GCOV OFF BOOL "Compile with gcov instrumentation")
env_set(USE_MSAN OFF BOOL "Compile with memory sanitizer. To avoid false positives you need to dynamically link to a msan-instrumented libc++ and libc++abi, which you must compile separately. See https://github.com/google/sanitizers/wiki/MemorySanitizerLibcxxHowTo#instrumented-libc.")
@ -47,35 +46,9 @@ add_compile_definitions(BOOST_ERROR_CODE_HEADER_ONLY BOOST_SYSTEM_NO_DEPRECATED)
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
if(ALLOC_INSTRUMENTATION)
add_compile_options(-DALLOC_INSTRUMENTATION)
endif()
if(WITH_UNDODB)
add_compile_options(-DWITH_UNDODB)
endif()
if(DEBUG_TASKS)
add_compile_options(-DDEBUG_TASKS)
endif()
if(NDEBUG)
add_compile_options(-DNDEBUG)
endif()
if(FDB_RELEASE)
add_compile_options(-DFDB_RELEASE)
add_compile_options(-DFDB_CLEAN_BUILD)
endif()
include_directories(${CMAKE_SOURCE_DIR})
include_directories(${CMAKE_BINARY_DIR})
if (NOT OPEN_FOR_IDE)
add_definitions(-DNO_INTELLISENSE)
endif()
if(WIN32)
add_definitions(-DUSE_USEFIBERS)
else()
add_definitions(-DUSE_UCONTEXT)
endif()
if (USE_CCACHE)
FIND_PROGRAM(CCACHE_FOUND "ccache")
@ -95,6 +68,10 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED ON)
if(NOT OPEN_FOR_IDE)
add_compile_definitions(NO_INTELLISENSE)
endif()
if(NOT WIN32)
include(CheckIncludeFile)
CHECK_INCLUDE_FILE("stdatomic.h" HAS_C11_ATOMICS)
@ -112,7 +89,6 @@ if(WIN32)
string(REGEX REPLACE "/W[0-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
endif()
add_compile_options(/W0 /EHsc /bigobj $<$<CONFIG:Release>:/Zi> /MP /FC /Gm-)
add_compile_definitions(_WIN32_WINNT=${WINDOWS_TARGET} WINVER=${WINDOWS_TARGET} NTDDI_VERSION=0x05020000 BOOST_ALL_NO_LIB)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd")
else()
@ -166,11 +142,7 @@ else()
# and create a debuginfo rpm
add_compile_options(-ggdb -fno-omit-frame-pointer)
if(USE_ASAN)
add_compile_options(
-fsanitize=address
-DUSE_SANITIZER
-DADDRESS_SANITIZER
)
add_compile_options(-fsanitize=address)
add_link_options(-fsanitize=address)
endif()
@ -180,15 +152,11 @@ else()
endif()
add_compile_options(
-fsanitize=memory
-fsanitize-memory-track-origins=2
-DUSE_SANITIZER
-DMEMORY_SANITIZER
)
-fsanitize-memory-track-origins=2)
add_link_options(-fsanitize=memory)
endif()
if(USE_GCOV)
add_compile_options(--coverage -DUSE_GCOV)
add_link_options(--coverage)
endif()
@ -196,20 +164,13 @@ else()
add_compile_options(
-fsanitize=undefined
# TODO(atn34) Re-enable -fsanitize=alignment once https://github.com/apple/foundationdb/issues/1434 is resolved
-fno-sanitize=alignment
-DUSE_SANITIZER
-DUNDEFINED_BEHAVIOR_SANITIZER
)
-fno-sanitize=alignment)
add_link_options(-fsanitize=undefined)
endif()
if(USE_TSAN)
add_compile_options(
-fsanitize=thread
-DUSE_SANITIZER
-DTHREAD_SANITIZER
-DDYNAMIC_ANNOTATIONS_EXTERNAL_IMPL=1
)
-fsanitize=thread)
add_link_options(-fsanitize=thread)
endif()
@ -269,9 +230,6 @@ else()
# for more information.
#add_compile_options(-fno-builtin-memcpy)
if (USE_VALGRIND)
add_compile_options(-DVALGRIND=1 -DUSE_VALGRIND=1)
endif()
if (CLANG)
add_compile_options()
# Clang has link errors unless `atomic` is specifically requested.
@ -280,7 +238,6 @@ else()
endif()
if (APPLE OR USE_LIBCXX)
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-stdlib=libc++>)
add_compile_definitions(WITH_LIBCXX)
if (NOT APPLE)
if (STATIC_LINK_LIBCXX)
add_link_options(-static-libgcc -nostdlib++ -Wl,-Bstatic -lc++ -lc++abi -Wl,-Bdynamic)
@ -366,10 +323,7 @@ else()
check_symbol_exists(aligned_alloc stdlib.h HAS_ALIGNED_ALLOC)
message(STATUS "Has aligned_alloc: ${HAS_ALIGNED_ALLOC}")
if((SUPPORT_DTRACE) AND (USE_DTRACE))
add_compile_definitions(DTRACE_PROBES)
endif()
if(HAS_ALIGNED_ALLOC)
add_compile_definitions(HAS_ALIGNED_ALLOC)
set(DTRACE_PROBES 1)
endif()
if(CMAKE_COMPILER_IS_GNUCXX)

91
design/flow_transport.md Normal file
View File

@ -0,0 +1,91 @@
# Flow Transport
This section describes the design and implementation of the flow transport wire protocol (as of release 6.3).
## ConnectPacket
The first bytes sent over a tcp connection in flow are the `ConnectPacket`.
This is a variable length message (though fixed length at a given protocol
version) designed with forward and backward compatibility in mind. The expected length of the `ConnectPacket` is encoded as the first 4 bytes (unsigned, little-endian). Upon receiving an incoming connection, a peer reads the `ProtocolVersion` (the next 8 bytes unsigned, little-endian. The most significant 4 bits encode flags and should be zeroed before interpreting numerically.) from the `ConnectPacket`.
## Protocol compatibility
Based on the incoming connection's `ProtocolVersion`, this connection is either
"compatible" or "incompatible". If this connection is incompatible, then we
will not actually look at any bytes sent after the `ConnectPacket`, but we will
keep the connection open so that the peer does not keep trying to open new
connections.
If this connection is compatible, then we know that our peer is using the same wire protocol as we are and we can proceed.
## Framing and checksumming protocol
As of release 6.3, the structure of subsequent messages is as follows:
* For TLS connections:
1. packet length (4 bytes unsigned little-endian)
2. token (16 opaque bytes that identify the recipient of this message)
3. message contents (packet length - 16 bytes to be interpreted by the recipient)
* For non-TLS connections, there's additionally a crc32 checksum for message integrity:
1. packet length (4 bytes unsigned little-endian)
2. 4 byte crc32 checksum of token + message
3. token
4. message
## Well-known endpoints
Endpoints are a pair of a 16 byte token that identifies the recipient and a
network address to send a message to. Endpoints are usually obtained over the
network - for example a request conventionally includes the endpoint the
reply should be sent to (like a self-addressed stamped envelope). So if you
can send a message and get endpoints in reply you can start sending messages
those endpoints. But how do you send that first message?
That's where the concept of a "well-known" endpoint comes in. Some endpoints
(for example the endpoints coordinators are listening on) use "well-known"
tokens that are agreed upon ahead of time. Technically the value of these
tokens could be changed as part of an incompatible protocol version bump, but
in practice this hasn't happened and shouldn't ever need to happen.
## Flatbuffers
Prior to release-6.2 the structure of messages (e.g. how many fields a
message has) was implicitly part of the protocol version, and so adding a
field to any message required a protocol version bump. Since release-6.2
messages are encoded as flatbuffers messages, and you can technically add
fields without a protocol version bump. This is a powerful and dangerous tool
that needs to be used with caution. If you add a field without a protocol version bump, then you can no longer be certain that this field will always be present (e.g. if you get a message from an old peer it might not include that field.)
We don't have a good way to test two or more fdbserver binaries in
simulation, so we discourage adding fields or otherwise making any protocol
changes without a protocol version bump.
Bumping the protocol version is costly for clients though, since now they need a whole new libfdb_c.so to be able to talk to the cluster _at all_.
## Stable Endpoints
Stable endpoints are a proposal to allow protocol compatibility to be checked
per endpoint rather than per connection. The proposal is to commit to the
current (release-6.3) framing protocol for opening connections, and allow a
newer framing protocol (for example a new checksum) to be negotiated after
the connection has been established. This way even if peers are at different
protocol versions they can still read the token each message is addressed to,
and they can use that token to decide whether or not to attempt to handle the
message. By default, tokens will have the same compatibility requirements as
before where the protocol version must match exactly. But new tokens can
optionally have a different policy - e.g. handle anything from a protocol
version >= release-7.0.
One of the main features motivating "Stable Endpoints" is the ability to download a compatible libfdb_c from a coordinator.
### Changes to flow transport for Stable Endpoints
1. Well known endpoints must never change (this just makes it official)
2. The (initial) framing protocol must remain fixed. If we want to change the checksum, we can add a stable, well known endpoint that advertises what checksums are supported and use this to change the checksum after the connection has already been established.
3. Each endpoint can have a different compatibility policy: e.g. an endpoint can be marked as requiring at least `ProtocolVersion::withStableInterfaces()` like this:
```
ReplyPromise<ProtocolInfoReply> reply{ PeerCompatibilityPolicy{ RequirePeer::AtLeast,
ProtocolVersion::withStableInterfaces() } };
```
4. Well known endpoints no longer need to be added in a particular order. Instead you reserve the number of well known endpoints ahead of time and then you can add them in any order.

View File

@ -2,12 +2,16 @@
Release Notes
#############
6.2.29
======
* Fix invalid memory access on data distributor when snapshotting large clusters. `(PR #4076) <https://github.com/apple/foundationdb/pull/4076>`_
* Add human-readable DateTime to trace events `(PR #4087) <https://github.com/apple/foundationdb/pull/4087>`_
6.2.28
======
* Log detailed team collection information when median available space ratio of all teams is too low. `(PR #3912) <https://github.com/apple/foundationdb/pull/3912>`_
* Bug fix, blob client did not support authentication key sizes over 64 bytes. `(PR #3964) <https://github.com/apple/foundationdb/pull/3964>`_
6.2.27
======
* For clusters with a large number of shards, avoid slow tasks in the data distributor by adding yields to the shard map destruction. `(PR #3834) <https://github.com/apple/foundationdb/pull/3834>`_

View File

@ -175,7 +175,7 @@ struct MutationFilesReadProgress : public ReferenceCounted<MutationFilesReadProg
int msgSize = bigEndian32(reader.consume<int>());
const uint8_t* message = reader.consume(msgSize);
ArenaReader rd(buf.arena(), StringRef(message, msgSize), AssumeVersion(currentProtocolVersion));
ArenaReader rd(buf.arena(), StringRef(message, msgSize), AssumeVersion(g_network->protocolVersion()));
MutationRef m;
rd >> m;
count++;
@ -433,7 +433,7 @@ ACTOR Future<Void> convert(ConvertParams params) {
state BackupDescription desc = wait(container->describeBackup());
std::cout << "\n" << desc.toString() << "\n";
// std::cout << "Using Protocol Version: 0x" << std::hex << currentProtocolVersion.version() << std::dec << "\n";
// std::cout << "Using Protocol Version: 0x" << std::hex << g_network->protocolVersion().version() << std::dec << "\n";
std::vector<LogFile> logs = getRelevantLogFiles(listing.logs, params.begin, params.end);
printLogFiles("Range has", logs);
@ -460,7 +460,7 @@ ACTOR Future<Void> convert(ConvertParams params) {
arena = Arena();
}
ArenaReader rd(data.arena, data.message, AssumeVersion(currentProtocolVersion));
ArenaReader rd(data.arena, data.message, AssumeVersion(g_network->protocolVersion()));
MutationRef m;
rd >> m;
std::cout << data.version.toString() << " m = " << m.toString() << "\n";

View File

@ -72,45 +72,32 @@ using std::endl;
#include "flow/SimpleOpt.h"
#include "flow/actorcompiler.h" // This must be the last #include.
// Type of program being executed
enum enumProgramExe {
EXE_AGENT,
EXE_BACKUP,
EXE_RESTORE,
EXE_FASTRESTORE_TOOL,
EXE_DR_AGENT,
EXE_DB_BACKUP,
EXE_UNDEFINED
enum class ProgramExe { AGENT, BACKUP, RESTORE, FASTRESTORE_TOOL, DR_AGENT, DB_BACKUP, UNDEFINED };
enum class BackupType {
UNDEFINED = 0,
START,
MODIFY,
STATUS,
ABORT,
WAIT,
DISCONTINUE,
PAUSE,
RESUME,
EXPIRE,
DELETE,
DESCRIBE,
LIST,
QUERY,
DUMP,
CLEANUP
};
enum enumBackupType {
BACKUP_UNDEFINED = 0,
BACKUP_START,
BACKUP_MODIFY,
BACKUP_STATUS,
BACKUP_ABORT,
BACKUP_WAIT,
BACKUP_DISCONTINUE,
BACKUP_PAUSE,
BACKUP_RESUME,
BACKUP_EXPIRE,
BACKUP_DELETE,
BACKUP_DESCRIBE,
BACKUP_LIST,
BACKUP_QUERY,
BACKUP_DUMP,
BACKUP_CLEANUP
};
enum enumDBType {
DB_UNDEFINED=0, DB_START, DB_STATUS, DB_SWITCH, DB_ABORT, DB_PAUSE, DB_RESUME
};
enum class DBType { UNDEFINED = 0, START, STATUS, SWITCH, ABORT, PAUSE, RESUME };
// New fast restore reuses the type from legacy slow restore
enum enumRestoreType {
RESTORE_UNKNOWN, RESTORE_START, RESTORE_STATUS, RESTORE_ABORT, RESTORE_WAIT
};
enum class RestoreType { UNKNOWN, START, STATUS, ABORT, WAIT };
//
enum {
@ -1258,30 +1245,29 @@ static void printDBBackupUsage(bool devhelp) {
return;
}
static void printUsage(enumProgramExe programExe, bool devhelp)
{
static void printUsage(ProgramExe programExe, bool devhelp) {
switch (programExe)
{
case EXE_AGENT:
case ProgramExe::AGENT:
printAgentUsage(devhelp);
break;
case EXE_BACKUP:
case ProgramExe::BACKUP:
printBackupUsage(devhelp);
break;
case EXE_RESTORE:
case ProgramExe::RESTORE:
printRestoreUsage(devhelp);
break;
case EXE_FASTRESTORE_TOOL:
case ProgramExe::FASTRESTORE_TOOL:
printFastRestoreUsage(devhelp);
break;
case EXE_DR_AGENT:
case ProgramExe::DR_AGENT:
printDBAgentUsage(devhelp);
break;
case EXE_DB_BACKUP:
case ProgramExe::DB_BACKUP:
printDBBackupUsage(devhelp);
break;
case EXE_UNDEFINED:
case ProgramExe::UNDEFINED:
default:
break;
}
@ -1292,9 +1278,8 @@ static void printUsage(enumProgramExe programExe, bool devhelp)
extern bool g_crashOnError;
// Return the type of program executable based on the name of executable file
enumProgramExe getProgramType(std::string programExe)
{
enumProgramExe enProgramExe = EXE_UNDEFINED;
ProgramExe getProgramType(std::string programExe) {
ProgramExe enProgramExe = ProgramExe::UNDEFINED;
// lowercase the string
std::transform(programExe.begin(), programExe.end(), programExe.begin(), ::tolower);
@ -1321,71 +1306,70 @@ enumProgramExe getProgramType(std::string programExe)
if ((programExe.length() >= exeAgent.size()) &&
(programExe.compare(programExe.length()-exeAgent.size(), exeAgent.size(), (const char*) exeAgent.begin()) == 0) )
{
enProgramExe = EXE_AGENT;
enProgramExe = ProgramExe::AGENT;
}
// Check if backup
else if ((programExe.length() >= exeBackup.size()) &&
(programExe.compare(programExe.length() - exeBackup.size(), exeBackup.size(), (const char*)exeBackup.begin()) == 0))
{
enProgramExe = EXE_BACKUP;
enProgramExe = ProgramExe::BACKUP;
}
// Check if restore
else if ((programExe.length() >= exeRestore.size()) &&
(programExe.compare(programExe.length() - exeRestore.size(), exeRestore.size(), (const char*)exeRestore.begin()) == 0))
{
enProgramExe = EXE_RESTORE;
enProgramExe = ProgramExe::RESTORE;
}
// Check if restore
else if ((programExe.length() >= exeFastRestoreTool.size()) &&
(programExe.compare(programExe.length() - exeFastRestoreTool.size(), exeFastRestoreTool.size(),
(const char*)exeFastRestoreTool.begin()) == 0)) {
enProgramExe = EXE_FASTRESTORE_TOOL;
enProgramExe = ProgramExe::FASTRESTORE_TOOL;
}
// Check if db agent
else if ((programExe.length() >= exeDatabaseAgent.size()) &&
(programExe.compare(programExe.length() - exeDatabaseAgent.size(), exeDatabaseAgent.size(),
(const char*)exeDatabaseAgent.begin()) == 0)) {
enProgramExe = EXE_DR_AGENT;
enProgramExe = ProgramExe::DR_AGENT;
}
// Check if db backup
else if ((programExe.length() >= exeDatabaseBackup.size()) &&
(programExe.compare(programExe.length() - exeDatabaseBackup.size(), exeDatabaseBackup.size(),
(const char*)exeDatabaseBackup.begin()) == 0)) {
enProgramExe = EXE_DB_BACKUP;
enProgramExe = ProgramExe::DB_BACKUP;
}
return enProgramExe;
}
enumBackupType getBackupType(std::string backupType)
{
enumBackupType enBackupType = BACKUP_UNDEFINED;
BackupType getBackupType(std::string backupType) {
BackupType enBackupType = BackupType::UNDEFINED;
// lowercase the string
std::transform(backupType.begin(), backupType.end(), backupType.begin(), ::tolower);
static std::map<std::string, enumBackupType> values;
static std::map<std::string, BackupType> values;
if(values.empty()) {
values["start"] = BACKUP_START;
values["status"] = BACKUP_STATUS;
values["abort"] = BACKUP_ABORT;
values["cleanup"] = BACKUP_CLEANUP;
values["wait"] = BACKUP_WAIT;
values["discontinue"] = BACKUP_DISCONTINUE;
values["pause"] = BACKUP_PAUSE;
values["resume"] = BACKUP_RESUME;
values["expire"] = BACKUP_EXPIRE;
values["delete"] = BACKUP_DELETE;
values["describe"] = BACKUP_DESCRIBE;
values["list"] = BACKUP_LIST;
values["query"] = BACKUP_QUERY;
values["dump"] = BACKUP_DUMP;
values["modify"] = BACKUP_MODIFY;
values["start"] = BackupType::START;
values["status"] = BackupType::STATUS;
values["abort"] = BackupType::ABORT;
values["cleanup"] = BackupType::CLEANUP;
values["wait"] = BackupType::WAIT;
values["discontinue"] = BackupType::DISCONTINUE;
values["pause"] = BackupType::PAUSE;
values["resume"] = BackupType::RESUME;
values["expire"] = BackupType::EXPIRE;
values["delete"] = BackupType::DELETE;
values["describe"] = BackupType::DESCRIBE;
values["list"] = BackupType::LIST;
values["query"] = BackupType::QUERY;
values["dump"] = BackupType::DUMP;
values["modify"] = BackupType::MODIFY;
}
auto i = values.find(backupType);
@ -1395,29 +1379,28 @@ enumBackupType getBackupType(std::string backupType)
return enBackupType;
}
enumRestoreType getRestoreType(std::string name) {
if(name == "start") return RESTORE_START;
if(name == "abort") return RESTORE_ABORT;
if(name == "status") return RESTORE_STATUS;
if(name == "wait") return RESTORE_WAIT;
return RESTORE_UNKNOWN;
RestoreType getRestoreType(std::string name) {
if (name == "start") return RestoreType::START;
if (name == "abort") return RestoreType::ABORT;
if (name == "status") return RestoreType::STATUS;
if (name == "wait") return RestoreType::WAIT;
return RestoreType::UNKNOWN;
}
enumDBType getDBType(std::string dbType)
{
enumDBType enBackupType = DB_UNDEFINED;
DBType getDBType(std::string dbType) {
DBType enBackupType = DBType::UNDEFINED;
// lowercase the string
std::transform(dbType.begin(), dbType.end(), dbType.begin(), ::tolower);
static std::map<std::string, enumDBType> values;
static std::map<std::string, DBType> values;
if(values.empty()) {
values["start"] = DB_START;
values["status"] = DB_STATUS;
values["switch"] = DB_SWITCH;
values["abort"] = DB_ABORT;
values["pause"] = DB_PAUSE;
values["resume"] = DB_RESUME;
values["start"] = DBType::START;
values["status"] = DBType::STATUS;
values["switch"] = DBType::SWITCH;
values["abort"] = DBType::ABORT;
values["pause"] = DBType::PAUSE;
values["resume"] = DBType::RESUME;
}
auto i = values.find(dbType);
@ -1427,7 +1410,8 @@ enumDBType getDBType(std::string dbType)
return enBackupType;
}
ACTOR Future<std::string> getLayerStatus(Reference<ReadYourWritesTransaction> tr, std::string name, std::string id, enumProgramExe exe, Database dest, bool snapshot = false) {
ACTOR Future<std::string> getLayerStatus(Reference<ReadYourWritesTransaction> tr, std::string name, std::string id,
ProgramExe exe, Database dest, bool snapshot = false) {
// This process will write a document that looks like this:
// { backup : { $expires : {<subdoc>}, version: <version from approximately 30 seconds from now> }
// so that the value under 'backup' will eventually expire to null and thus be ignored by
@ -1459,7 +1443,7 @@ ACTOR Future<std::string> getLayerStatus(Reference<ReadYourWritesTransaction> tr
o.create("process_cpu_seconds") = getProcessorTimeProcess();
o.create("configured_workers") = CLIENT_KNOBS->BACKUP_TASKS_PER_AGENT;
if(exe == EXE_AGENT) {
if (exe == ProgramExe::AGENT) {
static S3BlobStoreEndpoint::Stats last_stats;
static double last_ts = 0;
S3BlobStoreEndpoint::Stats current_stats = S3BlobStoreEndpoint::s_stats;
@ -1531,8 +1515,7 @@ ACTOR Future<std::string> getLayerStatus(Reference<ReadYourWritesTransaction> tr
j++;
}
}
else if(exe == EXE_DR_AGENT) {
} else if (exe == ProgramExe::DR_AGENT) {
state DatabaseBackupAgent dba;
state Reference<ReadYourWritesTransaction> tr2(new ReadYourWritesTransaction(dest));
tr2->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
@ -1676,8 +1659,9 @@ ACTOR Future<Void> updateAgentPollRate(Database src, std::string rootKey, std::s
}
}
ACTOR Future<Void> statusUpdateActor(Database statusUpdateDest, std::string name, enumProgramExe exe, double *pollDelay, Database taskDest = Database(),
std::string id = nondeterministicRandom()->randomUniqueID().toString()) {
ACTOR Future<Void> statusUpdateActor(Database statusUpdateDest, std::string name, ProgramExe exe, double* pollDelay,
Database taskDest = Database(),
std::string id = nondeterministicRandom()->randomUniqueID().toString()) {
state std::string metaKey = layerStatusMetaPrefixRange.begin.toString() + "json/" + name;
state std::string rootKey = backupStatusPrefixRange.begin.toString() + name + "/json";
state std::string instanceKey = rootKey + "/" + "agent-" + id;
@ -1734,8 +1718,9 @@ ACTOR Future<Void> statusUpdateActor(Database statusUpdateDest, std::string name
ACTOR Future<Void> runDBAgent(Database src, Database dest) {
state double pollDelay = 1.0 / CLIENT_KNOBS->BACKUP_AGGREGATE_POLL_RATE;
std::string id = nondeterministicRandom()->randomUniqueID().toString();
state Future<Void> status = statusUpdateActor(src, "dr_backup", EXE_DR_AGENT, &pollDelay, dest, id);
state Future<Void> status_other = statusUpdateActor(dest, "dr_backup_dest", EXE_DR_AGENT, &pollDelay, dest, id);
state Future<Void> status = statusUpdateActor(src, "dr_backup", ProgramExe::DR_AGENT, &pollDelay, dest, id);
state Future<Void> status_other =
statusUpdateActor(dest, "dr_backup_dest", ProgramExe::DR_AGENT, &pollDelay, dest, id);
state DatabaseBackupAgent backupAgent(src);
@ -1760,7 +1745,7 @@ ACTOR Future<Void> runDBAgent(Database src, Database dest) {
ACTOR Future<Void> runAgent(Database db) {
state double pollDelay = 1.0 / CLIENT_KNOBS->BACKUP_AGGREGATE_POLL_RATE;
state Future<Void> status = statusUpdateActor(db, "backup", EXE_AGENT, &pollDelay);
state Future<Void> status = statusUpdateActor(db, "backup", ProgramExe::AGENT, &pollDelay);
state FileBackupAgent backupAgent;
@ -2953,22 +2938,22 @@ int main(int argc, char* argv[]) {
setvbuf(stdout, NULL, _IONBF, 0);
setvbuf(stderr, NULL, _IONBF, 0);
enumProgramExe programExe = getProgramType(argv[0]);
enumBackupType backupType = BACKUP_UNDEFINED;
enumRestoreType restoreType = RESTORE_UNKNOWN;
enumDBType dbType = DB_UNDEFINED;
ProgramExe programExe = getProgramType(argv[0]);
BackupType backupType = BackupType::UNDEFINED;
RestoreType restoreType = RestoreType::UNKNOWN;
DBType dbType = DBType::UNDEFINED;
CSimpleOpt* args = NULL;
switch (programExe)
{
case EXE_AGENT:
case ProgramExe::AGENT:
args = new CSimpleOpt(argc, argv, g_rgAgentOptions, SO_O_EXACT);
break;
case EXE_DR_AGENT:
case ProgramExe::DR_AGENT:
args = new CSimpleOpt(argc, argv, g_rgDBAgentOptions, SO_O_EXACT);
break;
case EXE_BACKUP:
case ProgramExe::BACKUP:
// Display backup help, if no arguments
if (argc < 2) {
printBackupUsage(false);
@ -2981,52 +2966,52 @@ int main(int argc, char* argv[]) {
// Create the appropriate simple opt
switch (backupType)
{
case BACKUP_START:
case BackupType::START:
args = new CSimpleOpt(argc-1, &argv[1], g_rgBackupStartOptions, SO_O_EXACT);
break;
case BACKUP_STATUS:
case BackupType::STATUS:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupStatusOptions, SO_O_EXACT);
break;
case BACKUP_ABORT:
case BackupType::ABORT:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupAbortOptions, SO_O_EXACT);
break;
case BACKUP_CLEANUP:
case BackupType::CLEANUP:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupCleanupOptions, SO_O_EXACT);
break;
case BACKUP_WAIT:
case BackupType::WAIT:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupWaitOptions, SO_O_EXACT);
break;
case BACKUP_DISCONTINUE:
case BackupType::DISCONTINUE:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupDiscontinueOptions, SO_O_EXACT);
break;
case BACKUP_PAUSE:
case BackupType::PAUSE:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupPauseOptions, SO_O_EXACT);
break;
case BACKUP_RESUME:
case BackupType::RESUME:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupPauseOptions, SO_O_EXACT);
break;
case BACKUP_EXPIRE:
case BackupType::EXPIRE:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupExpireOptions, SO_O_EXACT);
break;
case BACKUP_DELETE:
case BackupType::DELETE:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupDeleteOptions, SO_O_EXACT);
break;
case BACKUP_DESCRIBE:
case BackupType::DESCRIBE:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupDescribeOptions, SO_O_EXACT);
break;
case BACKUP_DUMP:
case BackupType::DUMP:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupDumpOptions, SO_O_EXACT);
break;
case BACKUP_LIST:
case BackupType::LIST:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupListOptions, SO_O_EXACT);
break;
case BACKUP_QUERY:
case BackupType::QUERY:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupQueryOptions, SO_O_EXACT);
break;
case BACKUP_MODIFY:
case BackupType::MODIFY:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupModifyOptions, SO_O_EXACT);
break;
case BACKUP_UNDEFINED:
case BackupType::UNDEFINED:
default:
// Display help, if requested
if ((strcmp(argv[1], "-h") == 0) ||
@ -3044,7 +3029,7 @@ int main(int argc, char* argv[]) {
}
}
break;
case EXE_DB_BACKUP:
case ProgramExe::DB_BACKUP:
// Display backup help, if no arguments
if (argc < 2) {
printDBBackupUsage(false);
@ -3057,25 +3042,25 @@ int main(int argc, char* argv[]) {
// Create the appropriate simple opt
switch (dbType)
{
case DB_START:
case DBType::START:
args = new CSimpleOpt(argc-1, &argv[1], g_rgDBStartOptions, SO_O_EXACT);
break;
case DB_STATUS:
case DBType::STATUS:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgDBStatusOptions, SO_O_EXACT);
break;
case DB_SWITCH:
case DBType::SWITCH:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgDBSwitchOptions, SO_O_EXACT);
break;
case DB_ABORT:
case DBType::ABORT:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgDBAbortOptions, SO_O_EXACT);
break;
case DB_PAUSE:
case DBType::PAUSE:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgDBPauseOptions, SO_O_EXACT);
break;
case DB_RESUME:
case DBType::RESUME:
args = new CSimpleOpt(argc - 1, &argv[1], g_rgDBPauseOptions, SO_O_EXACT);
break;
case DB_UNDEFINED:
case DBType::UNDEFINED:
default:
// Display help, if requested
if ((strcmp(argv[1], "-h") == 0) ||
@ -3093,14 +3078,14 @@ int main(int argc, char* argv[]) {
}
}
break;
case EXE_RESTORE:
case ProgramExe::RESTORE:
if (argc < 2) {
printRestoreUsage(false);
return FDB_EXIT_ERROR;
}
// Get the restore operation type
restoreType = getRestoreType(argv[1]);
if(restoreType == RESTORE_UNKNOWN) {
if (restoreType == RestoreType::UNKNOWN) {
// Display help, if requested
if ((strcmp(argv[1], "-h") == 0) ||
(strcmp(argv[1], "--help") == 0) )
@ -3116,14 +3101,14 @@ int main(int argc, char* argv[]) {
}
args = new CSimpleOpt(argc - 1, argv + 1, g_rgRestoreOptions, SO_O_EXACT);
break;
case EXE_FASTRESTORE_TOOL:
case ProgramExe::FASTRESTORE_TOOL:
if (argc < 2) {
printFastRestoreUsage(false);
return FDB_EXIT_ERROR;
}
// Get the restore operation type
restoreType = getRestoreType(argv[1]);
if (restoreType == RESTORE_UNKNOWN) {
if (restoreType == RestoreType::UNKNOWN) {
// Display help, if requested
if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) {
printFastRestoreUsage(false);
@ -3136,7 +3121,7 @@ int main(int argc, char* argv[]) {
}
args = new CSimpleOpt(argc - 1, argv + 1, g_rgRestoreOptions, SO_O_EXACT);
break;
case EXE_UNDEFINED:
case ProgramExe::UNDEFINED:
default:
fprintf(stderr, "FoundationDB " FDB_VT_PACKAGE_NAME " (v" FDB_VT_VERSION ")\n");
fprintf(stderr, "ERROR: Unable to determine program type based on executable `%s'\n", argv[0]);
@ -3433,8 +3418,7 @@ int main(int argc, char* argv[]) {
usePartitionedLog = true;
break;
case OPT_INCREMENTALONLY:
// TODO: Enable this command-line argument once atomics are supported
// incrementalBackupOnly = true;
incrementalBackupOnly = true;
break;
case OPT_RESTORECONTAINER:
restoreContainer = args->OptionArg();
@ -3555,14 +3539,14 @@ int main(int argc, char* argv[]) {
{
switch (programExe)
{
case EXE_AGENT:
case ProgramExe::AGENT:
fprintf(stderr, "ERROR: Backup Agent does not support argument value `%s'\n", args->File(argLoop));
printHelpTeaser(argv[0]);
return FDB_EXIT_ERROR;
break;
// Add the backup key range
case EXE_BACKUP:
case ProgramExe::BACKUP:
// Error, if the keys option was not specified
if (backupKeys.size() == 0) {
fprintf(stderr, "ERROR: Unknown backup option value `%s'\n", args->File(argLoop));
@ -3581,26 +3565,26 @@ int main(int argc, char* argv[]) {
}
break;
case EXE_RESTORE:
case ProgramExe::RESTORE:
fprintf(stderr, "ERROR: FDB Restore does not support argument value `%s'\n", args->File(argLoop));
printHelpTeaser(argv[0]);
return FDB_EXIT_ERROR;
break;
case EXE_FASTRESTORE_TOOL:
case ProgramExe::FASTRESTORE_TOOL:
fprintf(stderr, "ERROR: FDB Fast Restore Tool does not support argument value `%s'\n",
args->File(argLoop));
printHelpTeaser(argv[0]);
return FDB_EXIT_ERROR;
break;
case EXE_DR_AGENT:
case ProgramExe::DR_AGENT:
fprintf(stderr, "ERROR: DR Agent does not support argument value `%s'\n", args->File(argLoop));
printHelpTeaser(argv[0]);
return FDB_EXIT_ERROR;
break;
case EXE_DB_BACKUP:
case ProgramExe::DB_BACKUP:
// Error, if the keys option was not specified
if (backupKeys.size() == 0) {
fprintf(stderr, "ERROR: Unknown DR option value `%s'\n", args->File(argLoop));
@ -3619,7 +3603,7 @@ int main(int argc, char* argv[]) {
}
break;
case EXE_UNDEFINED:
case ProgramExe::UNDEFINED:
default:
return FDB_EXIT_ERROR;
}
@ -3823,16 +3807,15 @@ int main(int argc, char* argv[]) {
switch (programExe)
{
case EXE_AGENT:
case ProgramExe::AGENT:
if(!initCluster())
return FDB_EXIT_ERROR;
f = stopAfter(runAgent(db));
break;
case EXE_BACKUP:
case ProgramExe::BACKUP:
switch (backupType)
{
case BACKUP_START:
{
case BackupType::START: {
if(!initCluster())
return FDB_EXIT_ERROR;
// Test out the backup url to make sure it parses. Doesn't test to make sure it's actually writeable.
@ -3843,8 +3826,7 @@ int main(int argc, char* argv[]) {
break;
}
case BACKUP_MODIFY:
{
case BackupType::MODIFY: {
if(!initCluster())
return FDB_EXIT_ERROR;
@ -3852,49 +3834,49 @@ int main(int argc, char* argv[]) {
break;
}
case BACKUP_STATUS:
case BackupType::STATUS:
if(!initCluster())
return FDB_EXIT_ERROR;
f = stopAfter( statusBackup(db, tagName, true, jsonOutput) );
break;
case BACKUP_ABORT:
case BackupType::ABORT:
if(!initCluster())
return FDB_EXIT_ERROR;
f = stopAfter( abortBackup(db, tagName) );
break;
case BACKUP_CLEANUP:
case BackupType::CLEANUP:
if(!initCluster())
return FDB_EXIT_ERROR;
f = stopAfter( cleanupMutations(db, deleteData) );
break;
case BACKUP_WAIT:
case BackupType::WAIT:
if(!initCluster())
return FDB_EXIT_ERROR;
f = stopAfter( waitBackup(db, tagName, stopWhenDone) );
break;
case BACKUP_DISCONTINUE:
case BackupType::DISCONTINUE:
if(!initCluster())
return FDB_EXIT_ERROR;
f = stopAfter( discontinueBackup(db, tagName, waitForDone) );
break;
case BACKUP_PAUSE:
case BackupType::PAUSE:
if(!initCluster())
return FDB_EXIT_ERROR;
f = stopAfter( changeBackupResumed(db, true) );
break;
case BACKUP_RESUME:
case BackupType::RESUME:
if(!initCluster())
return FDB_EXIT_ERROR;
f = stopAfter( changeBackupResumed(db, false) );
break;
case BACKUP_EXPIRE:
case BackupType::EXPIRE:
initTraceFile();
// Must have a usable cluster if either expire DateTime options were used
if(!expireDatetime.empty() || !expireRestorableAfterDatetime.empty()) {
@ -3904,12 +3886,12 @@ int main(int argc, char* argv[]) {
f = stopAfter( expireBackupData(argv[0], destinationContainer, expireVersion, expireDatetime, db, forceAction, expireRestorableAfterVersion, expireRestorableAfterDatetime) );
break;
case BACKUP_DELETE:
case BackupType::DELETE:
initTraceFile();
f = stopAfter( deleteBackupContainer(argv[0], destinationContainer) );
break;
case BACKUP_DESCRIBE:
case BackupType::DESCRIBE:
initTraceFile();
// If timestamp lookups are desired, require a cluster file
if(describeTimestamps && !initCluster())
@ -3919,23 +3901,23 @@ int main(int argc, char* argv[]) {
f = stopAfter( describeBackup(argv[0], destinationContainer, describeDeep, describeTimestamps ? Optional<Database>(db) : Optional<Database>(), jsonOutput) );
break;
case BACKUP_LIST:
case BackupType::LIST:
initTraceFile();
f = stopAfter( listBackup(baseUrl) );
break;
case BACKUP_QUERY:
case BackupType::QUERY:
initTraceFile();
f = stopAfter(queryBackup(argv[0], destinationContainer, backupKeysFilter, restoreVersion,
restoreClusterFileOrig, restoreTimestamp, !quietDisplay));
break;
case BACKUP_DUMP:
case BackupType::DUMP:
initTraceFile();
f = stopAfter( dumpBackupData(argv[0], destinationContainer, dumpBegin, dumpEnd) );
break;
case BACKUP_UNDEFINED:
case BackupType::UNDEFINED:
default:
fprintf(stderr, "ERROR: Unsupported backup action %s\n", argv[1]);
printHelpTeaser(argv[0]);
@ -3944,9 +3926,9 @@ int main(int argc, char* argv[]) {
}
break;
case EXE_RESTORE:
case ProgramExe::RESTORE:
if(dryRun) {
if(restoreType != RESTORE_START) {
if (restoreType != RestoreType::START) {
fprintf(stderr, "Restore dry run only works for 'start' command\n");
return FDB_EXIT_ERROR;
}
@ -3974,39 +3956,39 @@ int main(int argc, char* argv[]) {
}
switch(restoreType) {
case RESTORE_START:
f = stopAfter(runRestore(db, restoreClusterFileOrig, tagName, restoreContainer, backupKeys,
beginVersion, restoreVersion, restoreTimestamp, !dryRun, !quietDisplay,
waitForDone, addPrefix, removePrefix, incrementalBackupOnly));
break;
case RESTORE_WAIT:
f = stopAfter( success(ba.waitRestore(db, KeyRef(tagName), true)) );
break;
case RESTORE_ABORT:
f = stopAfter(
case RestoreType::START:
f = stopAfter(runRestore(db, restoreClusterFileOrig, tagName, restoreContainer, backupKeys,
beginVersion, restoreVersion, restoreTimestamp, !dryRun, !quietDisplay,
waitForDone, addPrefix, removePrefix, incrementalBackupOnly));
break;
case RestoreType::WAIT:
f = stopAfter(success(ba.waitRestore(db, KeyRef(tagName), true)));
break;
case RestoreType::ABORT:
f =
stopAfter(
map(ba.abortRestore(db, KeyRef(tagName)), [tagName](FileBackupAgent::ERestoreState s) -> Void {
printf("RESTORE_ABORT Tag: %s State: %s\n", tagName.c_str(),
FileBackupAgent::restoreStateText(s).toString().c_str());
return Void();
}));
break;
case RESTORE_STATUS:
// If no tag is specifically provided then print all tag status, don't just use "default"
if(tagProvided)
tag = tagName;
f = stopAfter( map(ba.restoreStatus(db, KeyRef(tag)), [](std::string s) -> Void {
printf("%s\n", s.c_str());
return Void();
}) );
break;
default:
throw restore_error();
break;
case RestoreType::STATUS:
// If no tag is specifically provided then print all tag status, don't just use "default"
if (tagProvided) tag = tagName;
f = stopAfter(map(ba.restoreStatus(db, KeyRef(tag)), [](std::string s) -> Void {
printf("%s\n", s.c_str());
return Void();
}));
break;
default:
throw restore_error();
}
break;
case EXE_FASTRESTORE_TOOL:
case ProgramExe::FASTRESTORE_TOOL:
// Support --dest_cluster_file option as fdbrestore does
if (dryRun) {
if (restoreType != RESTORE_START) {
if (restoreType != RestoreType::START) {
fprintf(stderr, "Restore dry run only works for 'start' command\n");
return FDB_EXIT_ERROR;
}
@ -4035,16 +4017,16 @@ int main(int argc, char* argv[]) {
}
// TODO: We have not implemented the code commented out in this case
switch (restoreType) {
case RESTORE_START:
case RestoreType::START:
f = stopAfter(runFastRestoreTool(db, tagName, restoreContainer, backupKeys, restoreVersion, !dryRun,
!quietDisplay, waitForDone));
break;
case RESTORE_WAIT:
case RestoreType::WAIT:
printf("[TODO][ERROR] FastRestore does not support RESTORE_WAIT yet!\n");
throw restore_error();
// f = stopAfter( success(ba.waitRestore(db, KeyRef(tagName), true)) );
break;
case RESTORE_ABORT:
case RestoreType::ABORT:
printf("[TODO][ERROR] FastRestore does not support RESTORE_ABORT yet!\n");
throw restore_error();
// f = stopAfter( map(ba.abortRestore(db, KeyRef(tagName)),
@ -4053,7 +4035,7 @@ int main(int argc, char* argv[]) {
// FileBackupAgent::restoreStateText(s).toString().c_str()); return Void();
// }) );
break;
case RESTORE_STATUS:
case RestoreType::STATUS:
printf("[TODO][ERROR] FastRestore does not support RESTORE_STATUS yet!\n");
throw restore_error();
// If no tag is specifically provided then print all tag status, don't just use "default"
@ -4067,35 +4049,35 @@ int main(int argc, char* argv[]) {
throw restore_error();
}
break;
case EXE_DR_AGENT:
case ProgramExe::DR_AGENT:
if(!initCluster())
return FDB_EXIT_ERROR;
f = stopAfter( runDBAgent(sourceDb, db) );
break;
case EXE_DB_BACKUP:
case ProgramExe::DB_BACKUP:
if(!initCluster())
return FDB_EXIT_ERROR;
switch (dbType)
{
case DB_START:
case DBType::START:
f = stopAfter( submitDBBackup(sourceDb, db, backupKeys, tagName) );
break;
case DB_STATUS:
case DBType::STATUS:
f = stopAfter( statusDBBackup(sourceDb, db, tagName, maxErrors) );
break;
case DB_SWITCH:
case DBType::SWITCH:
f = stopAfter( switchDBBackup(sourceDb, db, backupKeys, tagName, forceAction) );
break;
case DB_ABORT:
case DBType::ABORT:
f = stopAfter( abortDBBackup(sourceDb, db, tagName, partial, dstOnly) );
break;
case DB_PAUSE:
case DBType::PAUSE:
f = stopAfter( changeDBBackupResumed(sourceDb, db, true) );
break;
case DB_RESUME:
case DBType::RESUME:
f = stopAfter( changeDBBackupResumed(sourceDb, db, false) );
break;
case DB_UNDEFINED:
case DBType::UNDEFINED:
default:
fprintf(stderr, "ERROR: Unsupported DR action %s\n", argv[1]);
printHelpTeaser(argv[0]);
@ -4103,7 +4085,7 @@ int main(int argc, char* argv[]) {
break;
}
break;
case EXE_UNDEFINED:
case ProgramExe::UNDEFINED:
default:
return FDB_EXIT_ERROR;
}

File diff suppressed because it is too large Load Diff

View File

@ -23,46 +23,39 @@
#define FDBCLIENT_CLIENTLOGEVENTS_H
namespace FdbClientLogEvents {
typedef int EventType;
enum { GET_VERSION_LATENCY = 0,
GET_LATENCY = 1,
GET_RANGE_LATENCY = 2,
COMMIT_LATENCY = 3,
ERROR_GET = 4,
ERROR_GET_RANGE = 5,
ERROR_COMMIT = 6,
enum class EventType {
GET_VERSION_LATENCY = 0,
GET_LATENCY = 1,
GET_RANGE_LATENCY = 2,
COMMIT_LATENCY = 3,
ERROR_GET = 4,
ERROR_GET_RANGE = 5,
ERROR_COMMIT = 6,
UNSET
};
EVENTTYPEEND // End of EventType
};
enum class TransactionPriorityType { PRIORITY_DEFAULT = 0, PRIORITY_BATCH = 1, PRIORITY_IMMEDIATE = 2, UNSET };
typedef int TrasactionPriorityType;
enum {
PRIORITY_DEFAULT = 0,
PRIORITY_BATCH = 1,
PRIORITY_IMMEDIATE = 2,
PRIORITY_END
};
struct Event {
Event(EventType t, double ts, const Optional<Standalone<StringRef>>& dc) : type(t), startTs(ts) {
if (dc.present()) dcId = dc.get();
}
Event() {}
struct Event {
Event(EventType t, double ts, const Optional<Standalone<StringRef>> &dc) : type(t), startTs(ts){
if (dc.present())
dcId = dc.get();
}
Event() { }
template <typename Ar> Ar& serialize(Ar &ar) {
if (ar.protocolVersion().version() >= (uint64_t) 0x0FDB00B063010001LL) {
return serializer(ar, type, startTs, dcId);
} else {
return serializer(ar, type, startTs);
}
template <typename Ar>
Ar& serialize(Ar& ar) {
if (ar.protocolVersion().version() >= (uint64_t)0x0FDB00B063010001LL) {
return serializer(ar, type, startTs, dcId);
} else {
return serializer(ar, type, startTs);
}
}
EventType type{ EVENTTYPEEND };
double startTs{ 0 };
Key dcId{};
EventType type{ EventType::UNSET };
double startTs{ 0 };
Key dcId{};
void logEvent(std::string id, int maxFieldLength) const {}
void logEvent(std::string id, int maxFieldLength) const {}
};
struct EventGetVersion : public Event {
@ -96,9 +89,9 @@ namespace FdbClientLogEvents {
}
double latency;
TrasactionPriorityType priorityType {PRIORITY_END};
TransactionPriorityType priorityType{ TransactionPriorityType::UNSET };
void logEvent(std::string id, int maxFieldLength) const {
void logEvent(std::string id, int maxFieldLength) const {
TraceEvent("TransactionTrace_GetVersion")
.detail("TransactionID", id)
.detail("Latency", latency)
@ -108,23 +101,25 @@ namespace FdbClientLogEvents {
// Version V3 of EventGetVersion starting at 6.3
struct EventGetVersion_V3 : public Event {
EventGetVersion_V3(double ts, const Optional<Standalone<StringRef>> &dcId, double lat, TransactionPriority priority, Version version) : Event(GET_VERSION_LATENCY, ts, dcId), latency(lat), readVersion(version) {
switch(priority) {
EventGetVersion_V3(double ts, const Optional<Standalone<StringRef>>& dcId, double lat,
TransactionPriority priority, Version version)
: Event(EventType::GET_VERSION_LATENCY, ts, dcId), latency(lat), readVersion(version) {
switch(priority) {
// Unfortunately, the enum serialized here disagrees with the enum used elsewhere for the values used by each priority
case TransactionPriority::IMMEDIATE:
priorityType = PRIORITY_IMMEDIATE;
break;
priorityType = TransactionPriorityType::PRIORITY_IMMEDIATE;
break;
case TransactionPriority::DEFAULT:
priorityType = PRIORITY_DEFAULT;
break;
priorityType = TransactionPriorityType::PRIORITY_DEFAULT;
break;
case TransactionPriority::BATCH:
priorityType = PRIORITY_BATCH;
break;
priorityType = TransactionPriorityType::PRIORITY_BATCH;
break;
default:
ASSERT(false);
}
}
EventGetVersion_V3() { }
}
EventGetVersion_V3() { }
template <typename Ar> Ar& serialize(Ar &ar) {
if (!ar.isDeserializing)
@ -134,8 +129,8 @@ namespace FdbClientLogEvents {
}
double latency;
TrasactionPriorityType priorityType {PRIORITY_END};
Version readVersion;
TransactionPriorityType priorityType{ TransactionPriorityType::UNSET };
Version readVersion;
void logEvent(std::string id, int maxFieldLength) const {
TraceEvent("TransactionTrace_GetVersion")
@ -147,8 +142,9 @@ namespace FdbClientLogEvents {
};
struct EventGet : public Event {
EventGet(double ts, const Optional<Standalone<StringRef>> &dcId, double lat, int size, const KeyRef &in_key) : Event(GET_LATENCY, ts, dcId), latency(lat), valueSize(size), key(in_key) { }
EventGet() { }
EventGet(double ts, const Optional<Standalone<StringRef>>& dcId, double lat, int size, const KeyRef& in_key)
: Event(EventType::GET_LATENCY, ts, dcId), latency(lat), valueSize(size), key(in_key) {}
EventGet() { }
template <typename Ar> Ar& serialize(Ar &ar) {
if (!ar.isDeserializing)
@ -173,8 +169,11 @@ namespace FdbClientLogEvents {
};
struct EventGetRange : public Event {
EventGetRange(double ts, const Optional<Standalone<StringRef>> &dcId, double lat, int size, const KeyRef &start_key, const KeyRef & end_key) : Event(GET_RANGE_LATENCY, ts, dcId), latency(lat), rangeSize(size), startKey(start_key), endKey(end_key) { }
EventGetRange() { }
EventGetRange(double ts, const Optional<Standalone<StringRef>>& dcId, double lat, int size,
const KeyRef& start_key, const KeyRef& end_key)
: Event(EventType::GET_RANGE_LATENCY, ts, dcId), latency(lat), rangeSize(size), startKey(start_key),
endKey(end_key) {}
EventGetRange() { }
template <typename Ar> Ar& serialize(Ar &ar) {
if (!ar.isDeserializing)
@ -252,9 +251,11 @@ namespace FdbClientLogEvents {
// Version V2 of EventGetVersion starting at 6.3
struct EventCommit_V2 : public Event {
EventCommit_V2(double ts, const Optional<Standalone<StringRef>> &dcId, double lat, int mut, int bytes, Version version, const CommitTransactionRequest &commit_req)
: Event(COMMIT_LATENCY, ts, dcId), latency(lat), numMutations(mut), commitBytes(bytes), commitVersion(version), req(commit_req) { }
EventCommit_V2() { }
EventCommit_V2(double ts, const Optional<Standalone<StringRef>>& dcId, double lat, int mut, int bytes,
Version version, const CommitTransactionRequest& commit_req)
: Event(EventType::COMMIT_LATENCY, ts, dcId), latency(lat), numMutations(mut), commitBytes(bytes),
commitVersion(version), req(commit_req) {}
EventCommit_V2() { }
template <typename Ar> Ar& serialize(Ar &ar) {
if (!ar.isDeserializing)
@ -306,8 +307,9 @@ namespace FdbClientLogEvents {
};
struct EventGetError : public Event {
EventGetError(double ts, const Optional<Standalone<StringRef>> &dcId, int err_code, const KeyRef &in_key) : Event(ERROR_GET, ts, dcId), errCode(err_code), key(in_key) { }
EventGetError() { }
EventGetError(double ts, const Optional<Standalone<StringRef>>& dcId, int err_code, const KeyRef& in_key)
: Event(EventType::ERROR_GET, ts, dcId), errCode(err_code), key(in_key) {}
EventGetError() { }
template <typename Ar> Ar& serialize(Ar &ar) {
if (!ar.isDeserializing)
@ -330,8 +332,10 @@ namespace FdbClientLogEvents {
};
struct EventGetRangeError : public Event {
EventGetRangeError(double ts, const Optional<Standalone<StringRef>> &dcId, int err_code, const KeyRef &start_key, const KeyRef & end_key) : Event(ERROR_GET_RANGE, ts, dcId), errCode(err_code), startKey(start_key), endKey(end_key) { }
EventGetRangeError() { }
EventGetRangeError(double ts, const Optional<Standalone<StringRef>>& dcId, int err_code,
const KeyRef& start_key, const KeyRef& end_key)
: Event(EventType::ERROR_GET_RANGE, ts, dcId), errCode(err_code), startKey(start_key), endKey(end_key) {}
EventGetRangeError() { }
template <typename Ar> Ar& serialize(Ar &ar) {
if (!ar.isDeserializing)
@ -356,8 +360,10 @@ namespace FdbClientLogEvents {
};
struct EventCommitError : public Event {
EventCommitError(double ts, const Optional<Standalone<StringRef>> &dcId, int err_code, const CommitTransactionRequest &commit_req) : Event(ERROR_COMMIT, ts, dcId), errCode(err_code), req(commit_req) { }
EventCommitError() { }
EventCommitError(double ts, const Optional<Standalone<StringRef>>& dcId, int err_code,
const CommitTransactionRequest& commit_req)
: Event(EventType::ERROR_COMMIT, ts, dcId), errCode(err_code), req(commit_req) {}
EventCommitError() { }
template <typename Ar> Ar& serialize(Ar &ar) {
if (!ar.isDeserializing)

View File

@ -93,7 +93,5 @@ struct ProfilerRequest {
serializer(ar, reply, type, action, duration, outputFile);
}
};
BINARY_SERIALIZABLE( ProfilerRequest::Type );
BINARY_SERIALIZABLE( ProfilerRequest::Action );
#endif

View File

@ -30,6 +30,11 @@
const int MAX_CLUSTER_FILE_BYTES = 60000;
constexpr UID WLTOKEN_CLIENTLEADERREG_GETLEADER(-1, 2);
constexpr UID WLTOKEN_CLIENTLEADERREG_OPENDATABASE(-1, 3);
constexpr UID WLTOKEN_PROTOCOL_INFO(-1, 10);
struct ClientLeaderRegInterface {
RequestStream< struct GetLeaderRequest > getLeader;
RequestStream< struct OpenDatabaseCoordRequest > openDatabase;
@ -186,4 +191,30 @@ public:
ClientCoordinators() {}
};
struct ProtocolInfoReply {
constexpr static FileIdentifier file_identifier = 7784298;
ProtocolVersion version;
template <class Ar>
void serialize(Ar& ar) {
uint64_t version_ = 0;
if (Ar::isSerializing) {
version_ = version.versionWithFlags();
}
serializer(ar, version_);
if (Ar::isDeserializing) {
version = ProtocolVersion(version_);
}
}
};
struct ProtocolInfoRequest {
constexpr static FileIdentifier file_identifier = 13261233;
ReplyPromise<ProtocolInfoReply> reply{ PeerCompatibilityPolicy{ RequirePeer::AtLeast,
ProtocolVersion::withStableInterfaces() } };
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, reply);
}
};
#endif

View File

@ -319,6 +319,8 @@ public:
int snapshotRywEnabled;
int transactionTracingEnabled;
Future<Void> logger;
Future<Void> throttleExpirer;

View File

@ -39,16 +39,16 @@ typedef int64_t Generation;
typedef UID SpanID;
enum {
tagLocalitySpecial = -1,
tagLocalitySpecial = -1, // tag with this locality means it is invalidTag (id=0), txsTag (id=1), or cacheTag (id=2)
tagLocalityLogRouter = -2,
tagLocalityRemoteLog = -3,
tagLocalityRemoteLog = -3, // tag created by log router for remote tLogs
tagLocalityUpgraded = -4,
tagLocalitySatellite = -5,
tagLocalityLogRouterMapped = -6, // used by log router to pop from TLogs
tagLocalityLogRouterMapped = -6, // The pseudo tag used by log routers to pop the real LogRouter tag (i.e., -2)
tagLocalityTxs = -7,
tagLocalityBackup = -8, // used by backup role to pop from TLogs
tagLocalityInvalid = -99
}; //The TLog and LogRouter require these number to be as compact as possible
}; // The TLog and LogRouter require these number to be as compact as possible
inline bool isPseudoLocality(int8_t locality) {
return locality == tagLocalityLogRouterMapped || locality == tagLocalityBackup;
@ -56,6 +56,11 @@ inline bool isPseudoLocality(int8_t locality) {
#pragma pack(push, 1)
struct Tag {
// if locality > 0,
// locality decides which DC id the tLog is in;
// id decides which SS owns the tag; id <-> SS mapping is in the system keyspace: serverTagKeys.
// if locality < 0, locality decides the type of tLog set: satellite, LR, or remote tLog, etc.
// id decides which tLog in the tLog type will be used.
int8_t locality;
uint16_t id;

View File

@ -92,6 +92,7 @@ public:
virtual void selectApiVersion(int apiVersion) = 0;
virtual const char* getClientVersion() = 0;
virtual ThreadFuture<uint64_t> getServerProtocol(const char* clusterFilePath) = 0;
virtual void setNetworkOption(FDBNetworkOptions::Option option, Optional<StringRef> value = Optional<StringRef>()) = 0;
virtual void setupNetwork() = 0;

View File

@ -371,10 +371,6 @@ ClientCoordinators::ClientCoordinators( Key clusterKey, std::vector<NetworkAddre
ccf = makeReference<ClusterConnectionFile>(ClusterConnectionString(coordinators, clusterKey));
}
UID WLTOKEN_CLIENTLEADERREG_GETLEADER( -1, 2 );
UID WLTOKEN_CLIENTLEADERREG_OPENDATABASE( -1, 3 );
ClientLeaderRegInterface::ClientLeaderRegInterface( NetworkAddress remote )
: getLeader( Endpoint({remote}, WLTOKEN_CLIENTLEADERREG_GETLEADER) ),
openDatabase( Endpoint({remote}, WLTOKEN_CLIENTLEADERREG_OPENDATABASE) )

View File

@ -18,14 +18,17 @@
* limitations under the License.
*/
#include "fdbclient/CoordinationInterface.h"
#include "fdbclient/MultiVersionTransaction.h"
#include "fdbclient/MultiVersionAssignmentVars.h"
#include "fdbclient/ThreadSafeTransaction.h"
#include "flow/network.h"
#include "flow/Platform.h"
#include "flow/ProtocolVersion.h"
#include "flow/UnitTest.h"
#include "flow/actorcompiler.h" // This must be the last #include.
#include "flow/actorcompiler.h" // This must be the last #include.
void throwIfError(FdbCApi::fdb_error_t e) {
if(e) {
@ -343,6 +346,7 @@ void DLApi::init() {
headerVersion >= 700);
loadClientFunction(&api->futureGetInt64, lib, fdbCPath, headerVersion >= 620 ? "fdb_future_get_int64" : "fdb_future_get_version");
loadClientFunction(&api->futureGetUInt64, lib, fdbCPath, "fdb_future_get_uint64");
loadClientFunction(&api->futureGetError, lib, fdbCPath, "fdb_future_get_error");
loadClientFunction(&api->futureGetKey, lib, fdbCPath, "fdb_future_get_key");
loadClientFunction(&api->futureGetValue, lib, fdbCPath, "fdb_future_get_value");
@ -378,6 +382,11 @@ const char* DLApi::getClientVersion() {
return api->getClientVersion();
}
ThreadFuture<uint64_t> DLApi::getServerProtocol(const char *clusterFilePath) {
ASSERT(false);
return ThreadFuture<uint64_t>();
}
void DLApi::setNetworkOption(FDBNetworkOptions::Option option, Optional<StringRef> value) {
throwIfError(api->setNetworkOption(option, value.present() ? value.get().begin() : nullptr, value.present() ? value.get().size() : 0));
}
@ -990,6 +999,11 @@ const char* MultiVersionApi::getClientVersion() {
return localClient->api->getClientVersion();
}
ThreadFuture<uint64_t> MultiVersionApi::getServerProtocol(const char *clusterFilePath) {
return api->localClient->api->getServerProtocol(clusterFilePath);
}
void validateOption(Optional<StringRef> value, bool canBePresent, bool canBeAbsent, bool canBeEmpty=true) {
ASSERT(canBePresent || canBeAbsent);

View File

@ -55,6 +55,7 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
//Network
fdb_error_t (*selectApiVersion)(int runtimeVersion, int headerVersion);
const char* (*getClientVersion)();
FDBFuture* (*getServerProtocol)(const char* clusterFilePath);
fdb_error_t (*setNetworkOption)(FDBNetworkOptions::Option option, uint8_t const *value, int valueLength);
fdb_error_t (*setupNetwork)();
fdb_error_t (*runNetwork)();
@ -107,6 +108,7 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
//Future
fdb_error_t (*futureGetDatabase)(FDBFuture *f, FDBDatabase **outDb);
fdb_error_t (*futureGetInt64)(FDBFuture *f, int64_t *outValue);
fdb_error_t (*futureGetUInt64)(FDBFuture *f, uint64_t *outValue);
fdb_error_t (*futureGetError)(FDBFuture *f);
fdb_error_t (*futureGetKey)(FDBFuture *f, uint8_t const **outKey, int *outKeyLength);
fdb_error_t (*futureGetValue)(FDBFuture *f, fdb_bool_t *outPresent, uint8_t const **outValue, int *outValueLength);
@ -204,6 +206,7 @@ public:
void selectApiVersion(int apiVersion) override;
const char* getClientVersion() override;
ThreadFuture<uint64_t> getServerProtocol(const char* clusterFilePath) override;
void setNetworkOption(FDBNetworkOptions::Option option, Optional<StringRef> value = Optional<StringRef>()) override;
void setupNetwork() override;
@ -381,6 +384,7 @@ class MultiVersionApi : public IClientApi {
public:
void selectApiVersion(int apiVersion) override;
const char* getClientVersion() override;
ThreadFuture<uint64_t> getServerProtocol(const char* clusterFilePath) override;
void setNetworkOption(FDBNetworkOptions::Option option, Optional<StringRef> value = Optional<StringRef>()) override;
void setupNetwork() override;

View File

@ -873,7 +873,7 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionF
transactionsResourceConstrained("ResourceConstrained", cc), transactionsThrottled("Throttled", cc),
transactionsProcessBehind("ProcessBehind", cc), outstandingWatches(0), latencies(1000), readLatencies(1000),
commitLatencies(1000), GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000), mvCacheInsertLocation(0),
healthMetricsLastUpdated(0), detailedHealthMetricsLastUpdated(0), internal(internal),
healthMetricsLastUpdated(0), detailedHealthMetricsLastUpdated(0), internal(internal), transactionTracingEnabled(true),
smoothMidShardSize(CLIENT_KNOBS->SHARD_STAT_SMOOTH_AMOUNT),
transactionsExpensiveClearCostEstCount("ExpensiveClearCostEstCount", cc),
specialKeySpace(std::make_unique<SpecialKeySpace>(specialKeys.begin, specialKeys.end, /* test */ false)) {
@ -946,6 +946,10 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionF
std::make_unique<ConsistencyCheckImpl>(
singleKeyRange(LiteralStringRef("consistency_check_suspended"))
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)));
registerSpecialKeySpaceModule(
SpecialKeySpace::MODULE::TRACING, SpecialKeySpace::IMPLTYPE::READWRITE,
std::make_unique<TracingOptionsImpl>(
SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::TRACING)));
}
if (apiVersionAtLeast(630)) {
registerSpecialKeySpaceModule(SpecialKeySpace::MODULE::TRANSACTION, SpecialKeySpace::IMPLTYPE::READONLY,
@ -1044,7 +1048,7 @@ DatabaseContext::DatabaseContext(const Error& err)
transactionsProcessBehind("ProcessBehind", cc), latencies(1000), readLatencies(1000), commitLatencies(1000),
GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000),
smoothMidShardSize(CLIENT_KNOBS->SHARD_STAT_SMOOTH_AMOUNT),
transactionsExpensiveClearCostEstCount("ExpensiveClearCostEstCount", cc), internal(false) {}
transactionsExpensiveClearCostEstCount("ExpensiveClearCostEstCount", cc), internal(false), transactionTracingEnabled(true) {}
Database DatabaseContext::create(Reference<AsyncVar<ClientDBInfo>> clientInfo, Future<Void> clientInfoMonitor, LocalityData clientLocality, bool enableLocalityLoadBalance, TaskPriority taskID, bool lockAware, int apiVersion, bool switchable) {
return Database( new DatabaseContext( Reference<AsyncVar<Reference<ClusterConnectionFile>>>(), clientInfo, clientInfoMonitor, taskID, clientLocality, enableLocalityLoadBalance, lockAware, true, apiVersion, switchable ) );
@ -1207,6 +1211,14 @@ void DatabaseContext::setOption( FDBDatabaseOptions::Option option, Optional<Str
validateOptionValue(value, false);
snapshotRywEnabled--;
break;
case FDBDatabaseOptions::TRANSACTION_TRACE_ENABLE:
validateOptionValue(value, false);
transactionTracingEnabled++;
break;
case FDBDatabaseOptions::TRANSACTION_TRACE_DISABLE:
validateOptionValue(value, false);
transactionTracingEnabled--;
break;
default:
break;
}
@ -1817,7 +1829,12 @@ ACTOR Future<vector<pair<KeyRange, Reference<LocationInfo>>>> getKeyRangeLocatio
}
}
// Returns a vector of <ShardRange, storage server location info> pairs.
// Get the SS locations for each shard in the 'keys' key-range;
// Returned vector size is the number of shards in the input keys key-range.
// Returned vector element is <ShardRange, storage server location info> pairs, where
// ShardRange is the whole shard key-range, not a part of the given key range.
// Example: If query the function with key range (b, d), the returned list of pairs could be something like:
// [([a, b1), locationInfo), ([b1, c), locationInfo), ([c, d1), locationInfo)].
template <class F>
Future< vector< pair<KeyRange,Reference<LocationInfo>> > > getKeyRangeLocations( Database const& cx, KeyRange const& keys, int limit, bool reverse, F StorageServerInterface::*member, TransactionInfo const& info ) {
ASSERT (!keys.empty());
@ -2687,8 +2704,21 @@ void debugAddTags(Transaction *tr) {
}
SpanID generateSpanID(int transactionTracingEnabled) {
uint64_t tid = deterministicRandom()->randomUInt64();
if (transactionTracingEnabled > 0) {
return SpanID(tid, deterministicRandom()->randomUInt64());
} else {
return SpanID(tid, 0);
}
}
Transaction::Transaction()
: info(TaskPriority::DefaultEndpoint, generateSpanID(true)),
span(info.spanID, "Transaction"_loc) {}
Transaction::Transaction(Database const& cx)
: cx(cx), info(cx->taskID, deterministicRandom()->randomUniqueID()), backoff(CLIENT_KNOBS->DEFAULT_BACKOFF),
: cx(cx), info(cx->taskID, generateSpanID(cx->transactionTracingEnabled)), backoff(CLIENT_KNOBS->DEFAULT_BACKOFF),
committedVersion(invalidVersion), versionstampPromise(Promise<Standalone<StringRef>>()), options(cx), numErrors(0),
trLogInfo(createTrLogInfoProbabilistically(cx)), tr(info.spanID), span(info.spanID, "Transaction"_loc) {
if (DatabaseContext::debugUseTags) {
@ -4161,6 +4191,37 @@ Future<Standalone<StringRef>> Transaction::getVersionstamp() {
return versionstampPromise.getFuture();
}
ACTOR Future<ProtocolVersion> coordinatorProtocolsFetcher(Reference<ClusterConnectionFile> f) {
state ClientCoordinators coord(f);
state vector<Future<ProtocolInfoReply>> coordProtocols;
coordProtocols.reserve(coord.clientLeaderServers.size());
for (int i = 0; i < coord.clientLeaderServers.size(); i++) {
RequestStream<ProtocolInfoRequest> requestStream{ Endpoint{
{ coord.clientLeaderServers[i].getLeader.getEndpoint().addresses }, WLTOKEN_PROTOCOL_INFO } };
coordProtocols.push_back(retryBrokenPromise(requestStream, ProtocolInfoRequest{}));
}
wait(smartQuorum(coordProtocols, coordProtocols.size() / 2 + 1, 1.5));
std::unordered_map<uint64_t, int> protocolCount;
for(int i = 0; i<coordProtocols.size(); i++) {
if(coordProtocols[i].isReady()) {
protocolCount[coordProtocols[i].get().version.version()]++;
}
}
uint64_t majorityProtocol = std::max_element(protocolCount.begin(), protocolCount.end(), [](const std::pair<uint64_t, int>& l, const std::pair<uint64_t, int>& r){
return l.second < r.second;
})->first;
return ProtocolVersion(majorityProtocol);
}
ACTOR Future<uint64_t> getCoordinatorProtocols(Reference<ClusterConnectionFile> f) {
ProtocolVersion protocolVersion = wait(coordinatorProtocolsFetcher(f));
return protocolVersion.version();
}
uint32_t Transaction::getSize() {
auto s = tr.transaction.mutations.expectedSize() + tr.transaction.read_conflict_ranges.expectedSize() +
tr.transaction.write_conflict_ranges.expectedSize();
@ -4600,6 +4661,16 @@ Reference<TransactionLogInfo> Transaction::createTrLogInfoProbabilistically(cons
return Reference<TransactionLogInfo>();
}
void Transaction::setTransactionID(uint64_t id) {
ASSERT(getSize() == 0);
info.spanID = SpanID(id, info.spanID.second());
}
void Transaction::setToken(uint64_t token) {
ASSERT(getSize() == 0);
info.spanID = SpanID(info.spanID.first(), token);
}
void enableClientInfoLogging() {
ASSERT(networkOptions.logClientInfo.present() == false);
networkOptions.logClientInfo = true;

View File

@ -284,6 +284,8 @@ public:
[[nodiscard]] Future<Standalone<StringRef>>
getVersionstamp(); // Will be fulfilled only after commit() returns success
Future<uint64_t> getProtocolVersion();
Promise<Standalone<StringRef>> versionstampPromise;
uint32_t getSize();
@ -291,9 +293,7 @@ public:
void flushTrLogsIfEnabled();
// These are to permit use as state variables in actors:
Transaction()
: info(TaskPriority::DefaultEndpoint, deterministicRandom()->randomUniqueID()),
span(info.spanID, "Transaction"_loc) {}
Transaction();
void operator=(Transaction&& r) noexcept;
void reset();
@ -323,6 +323,9 @@ public:
double startTime;
Reference<TransactionLogInfo> trLogInfo;
void setTransactionID(uint64_t id);
void setToken(uint64_t token);
const vector<Future<std::pair<Key, Key>>>& getExtraReadConflictRanges() const { return extraConflictRanges; }
Standalone<VectorRef<KeyRangeRef>> readConflictRanges() const {
return Standalone<VectorRef<KeyRangeRef>>(tr.transaction.read_conflict_ranges, tr.arena);
@ -360,6 +363,8 @@ ACTOR Future<Void> snapCreate(Database cx, Standalone<StringRef> snapCmd, UID sn
// Checks with Data Distributor that it is safe to mark all servers in exclusions as failed
ACTOR Future<bool> checkSafeExclusions(Database cx, vector<AddressExclusion> exclusions);
ACTOR Future<uint64_t> getCoordinatorProtocols(Reference<ClusterConnectionFile> f);
inline uint64_t getWriteOperationCost(uint64_t bytes) {
return bytes / std::max(1, CLIENT_KNOBS->WRITE_COST_BYTE_FACTOR) + 1;
}

View File

@ -1593,6 +1593,14 @@ void ReadYourWritesTransaction::getWriteConflicts( KeyRangeMap<bool> *result ) {
}
}
void ReadYourWritesTransaction::setTransactionID(uint64_t id) {
tr.setTransactionID(id);
}
void ReadYourWritesTransaction::setToken(uint64_t token) {
tr.setToken(token);
}
Standalone<RangeResultRef> ReadYourWritesTransaction::getReadConflictRangeIntersecting(KeyRangeRef kr) {
TEST(true); // Special keys read conflict range
ASSERT(readConflictRangeKeysRange.contains(kr));

View File

@ -144,6 +144,9 @@ public:
return tr.info;
}
void setTransactionID(uint64_t id);
void setToken(uint64_t token);
// Read from the special key space readConflictRangeKeysRange
Standalone<RangeResultRef> getReadConflictRangeIntersecting(KeyRangeRef kr);
// Read from the special key space writeConflictRangeKeysRange

View File

@ -786,7 +786,8 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
"coordinators":[
{
"reachable":true,
"address":"127.0.0.1:4701"
"address":"127.0.0.1:4701",
"protocol": "0fdb00b070010001"
}
],
"quorum_reachable":true

View File

@ -24,6 +24,11 @@
#include "fdbclient/StatusClient.h"
#include "flow/actorcompiler.h" // This must be the last #include.
namespace {
const std::string kTracingTransactionIdKey = "transaction_id";
const std::string kTracingTokenKey = "token";
}
std::unordered_map<SpecialKeySpace::MODULE, KeyRange> SpecialKeySpace::moduleToBoundary = {
{ SpecialKeySpace::MODULE::TRANSACTION,
KeyRangeRef(LiteralStringRef("\xff\xff/transaction/"), LiteralStringRef("\xff\xff/transaction0")) },
@ -38,7 +43,9 @@ std::unordered_map<SpecialKeySpace::MODULE, KeyRange> SpecialKeySpace::moduleToB
KeyRangeRef(LiteralStringRef("\xff\xff/management/"), LiteralStringRef("\xff\xff/management0")) },
{ SpecialKeySpace::MODULE::ERRORMSG, singleKeyRange(LiteralStringRef("\xff\xff/error_message")) },
{ SpecialKeySpace::MODULE::CONFIGURATION,
KeyRangeRef(LiteralStringRef("\xff\xff/configuration/"), LiteralStringRef("\xff\xff/configuration0")) }
KeyRangeRef(LiteralStringRef("\xff\xff/configuration/"), LiteralStringRef("\xff\xff/configuration0")) },
{ SpecialKeySpace::MODULE::TRACING,
KeyRangeRef(LiteralStringRef("\xff\xff/tracing/"), LiteralStringRef("\xff\xff/tracing0")) }
};
std::unordered_map<std::string, KeyRange> SpecialKeySpace::managementApiCommandToRange = {
@ -53,6 +60,8 @@ std::unordered_map<std::string, KeyRange> SpecialKeySpace::managementApiCommandT
std::set<std::string> SpecialKeySpace::options = { "excluded/force", "failed/force" };
std::set<std::string> SpecialKeySpace::tracingOptions = { kTracingTransactionIdKey, kTracingTokenKey };
Standalone<RangeResultRef> rywGetRange(ReadYourWritesTransaction* ryw, const KeyRangeRef& kr,
const Standalone<RangeResultRef>& res);
@ -139,27 +148,46 @@ ACTOR Future<Void> normalizeKeySelectorActor(SpecialKeySpace* sks, ReadYourWrite
KeyRangeRef boundary, int* actualOffset,
Standalone<RangeResultRef>* result,
Optional<Standalone<RangeResultRef>>* cache) {
// If offset < 1, where we need to move left, iter points to the range containing at least one smaller key
// (It's a wasting of time to walk through the range whose begin key is same as ks->key)
// (rangeContainingKeyBefore itself handles the case where ks->key == Key())
// Otherwise, we only need to move right if offset > 1, iter points to the range containing the key
// Since boundary.end is always a key in the RangeMap, it is always safe to move right
state RangeMap<Key, SpecialKeyRangeReadImpl*, KeyRangeRef>::iterator iter =
ks->offset < 1 ? sks->getReadImpls().rangeContainingKeyBefore(ks->getKey())
: sks->getReadImpls().rangeContaining(ks->getKey());
while ((ks->offset < 1 && iter->begin() > boundary.begin) || (ks->offset > 1 && iter->begin() < boundary.end)) {
while ((ks->offset < 1 && iter->begin() >= boundary.begin) || (ks->offset > 1 && iter->begin() < boundary.end)) {
if (iter->value() != nullptr) {
wait(moveKeySelectorOverRangeActor(iter->value(), ryw, ks, cache));
}
ks->offset < 1 ? --iter : ++iter;
// Check if we can still move the iterator left
if (ks->offset < 1) {
if (iter == sks->getReadImpls().ranges().begin()) {
break;
} else {
--iter;
}
} else if (ks->offset > 1) {
// Always safe to move right
++iter;
}
}
*actualOffset = ks->offset;
if (iter->begin() == boundary.begin || iter->begin() == boundary.end) ks->setKey(iter->begin());
if (!ks->isFirstGreaterOrEqual()) {
// The Key Selector clamps up to the legal key space
TraceEvent(SevDebug, "ReadToBoundary")
.detail("TerminateKey", ks->getKey())
.detail("TerminateOffset", ks->offset);
if (ks->offset < 1)
// If still not normalized after moving to the boundary,
// let key selector clamp up to the boundary
if (ks->offset < 1) {
result->readToBegin = true;
else
ks->setKey(boundary.begin);
}
else {
result->readThroughEnd = true;
ks->setKey(boundary.end);
}
ks->offset = 1;
}
return Void();
@ -1263,3 +1291,63 @@ Future<Optional<std::string>> ConsistencyCheckImpl::commit(ReadYourWritesTransac
BinaryWriter::toValue(entry.present(), Unversioned()));
return Optional<std::string>();
}
TracingOptionsImpl::TracingOptionsImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {
TraceEvent("TracingOptionsImpl::TracingOptionsImpl").detail("Range", kr);
}
Future<Standalone<RangeResultRef>> TracingOptionsImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr) const {
Standalone<RangeResultRef> result;
for (const auto& option : SpecialKeySpace::getTracingOptions()) {
auto key = getKeyRange().begin.withSuffix(option);
if (!kr.contains(key)) {
continue;
}
if (key.endsWith(kTracingTransactionIdKey)) {
result.push_back_deep(result.arena(), KeyValueRef(key, std::to_string(ryw->getTransactionInfo().spanID.first())));
} else if (key.endsWith(kTracingTokenKey)) {
result.push_back_deep(result.arena(), KeyValueRef(key, std::to_string(ryw->getTransactionInfo().spanID.second())));
}
}
return result;
}
void TracingOptionsImpl::set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) {
if (ryw->getApproximateSize() > 0) {
ryw->setSpecialKeySpaceErrorMsg("tracing options must be set first");
ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional<Value>()));
return;
}
if (key.endsWith(kTracingTransactionIdKey)) {
ryw->setTransactionID(std::stoul(value.toString()));
} else if (key.endsWith(kTracingTokenKey)) {
if (value.toString() == "true") {
ryw->setToken(deterministicRandom()->randomUInt64());
} else if (value.toString() == "false") {
ryw->setToken(0);
} else {
ryw->setSpecialKeySpaceErrorMsg("token must be set to true/false");
throw special_keys_api_failure();
}
}
}
Future<Optional<std::string>> TracingOptionsImpl::commit(ReadYourWritesTransaction* ryw) {
if (ryw->getSpecialKeySpaceWriteMap().size() > 0) {
throw special_keys_api_failure();
}
return Optional<std::string>();
}
void TracingOptionsImpl::clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) {
ryw->setSpecialKeySpaceErrorMsg("clear range disabled");
throw special_keys_api_failure();
}
void TracingOptionsImpl::clear(ReadYourWritesTransaction* ryw, const KeyRef& key) {
ryw->setSpecialKeySpaceErrorMsg("clear disabled");
throw special_keys_api_failure();
}

View File

@ -145,6 +145,7 @@ public:
MANAGEMENT, // Management-API
METRICS, // data-distribution metrics
TESTONLY, // only used by correctness tests
TRACING, // Distributed tracing options
TRANSACTION, // transaction related info, conflicting keys, read/write conflict range
STATUSJSON,
UNKNOWN, // default value for all unregistered range
@ -190,6 +191,7 @@ public:
}
static Key getManagementApiCommandOptionSpecialKey(const std::string& command, const std::string& option);
static const std::set<std::string>& getManagementApiOptionsSet() { return options; }
static const std::set<std::string>& getTracingOptions() { return tracingOptions; }
private:
ACTOR static Future<Optional<Value>> getActor(SpecialKeySpace* sks, ReadYourWritesTransaction* ryw, KeyRef key);
@ -211,6 +213,7 @@ private:
static std::unordered_map<std::string, KeyRange>
managementApiCommandToRange; // management command to its special keys' range
static std::set<std::string> options; // "<command>/<option>"
static std::set<std::string> tracingOptions;
// Initialize module boundaries, used to handle cross_module_read
void modulesBoundaryInit();
@ -319,5 +322,15 @@ public:
Future<Optional<std::string>> commit(ReadYourWritesTransaction* ryw) override;
};
class TracingOptionsImpl : public SpecialKeyRangeRWImpl {
public:
explicit TracingOptionsImpl(KeyRangeRef kr);
Future<Standalone<RangeResultRef>> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override;
void set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) override;
Future<Optional<std::string>> commit(ReadYourWritesTransaction* ryw) override;
void clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) override;
void clear(ReadYourWritesTransaction* ryw, const KeyRef& key) override;
};
#include "flow/unactorcompiler.h"
#endif

View File

@ -28,6 +28,7 @@
#include "fdbclient/json_spirit/json_spirit_reader_template.h"
#include "fdbrpc/genericactors.actor.h"
#include "flow/actorcompiler.h" // has to be last include
#include <cstdint>
json_spirit::mValue readJSONStrictly(const std::string &s) {
json_spirit::mValue val;
@ -292,7 +293,17 @@ ACTOR Future<Optional<StatusObject>> clientCoordinatorsStatusFetcher(Reference<C
for (int i = 0; i < coord.clientLeaderServers.size(); i++)
leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader, GetLeaderRequest(coord.clusterKey, UID()), TaskPriority::CoordinationReply));
wait( smartQuorum(leaderServers, leaderServers.size() / 2 + 1, 1.5) || delay(2.0) );
state vector<Future<ProtocolInfoReply>> coordProtocols;
coordProtocols.reserve(coord.clientLeaderServers.size());
for (int i = 0; i < coord.clientLeaderServers.size(); i++) {
RequestStream<ProtocolInfoRequest> requestStream{ Endpoint{
{ coord.clientLeaderServers[i].getLeader.getEndpoint().addresses }, WLTOKEN_PROTOCOL_INFO } };
coordProtocols.push_back(retryBrokenPromise(requestStream, ProtocolInfoRequest{}));
}
wait(smartQuorum(leaderServers, leaderServers.size() / 2 + 1, 1.5) &&
smartQuorum(coordProtocols, coordProtocols.size() / 2 + 1, 1.5) ||
delay(2.0));
statusObj["quorum_reachable"] = *quorum_reachable = quorum(leaderServers, leaderServers.size() / 2 + 1).isReady();
@ -309,12 +320,17 @@ ACTOR Future<Optional<StatusObject>> clientCoordinatorsStatusFetcher(Reference<C
coordinatorsUnavailable++;
coordStatus["reachable"] = false;
}
if (coordProtocols[i].isReady()) {
uint64_t protocolVersionInt = coordProtocols[i].get().version.version();
std::stringstream hexSs;
hexSs << std::hex << std::setw(2*sizeof(protocolVersionInt)) << std::setfill('0') << protocolVersionInt;
coordStatus["protocol"] = hexSs.str();
}
coordsStatus.push_back(coordStatus);
}
statusObj["coordinators"] = coordsStatus;
*coordinatorsFaultTolerance = (leaderServers.size() - 1) / 2 - coordinatorsUnavailable;
return statusObj;
}
catch (Error &e){

View File

@ -203,10 +203,6 @@ const KeyRangeRef writeConflictRangeKeysRange =
LiteralStringRef("\xff\xff/transaction/write_conflict_range/\xff\xff"));
// "\xff/cacheServer/[[UID]] := StorageServerInterface"
// This will be added by the cache server on initialization and removed by DD
// TODO[mpilman]: We will need a way to map uint16_t ids to UIDs in a future
// versions. For now caches simply cache everything so the ids
// are not yet meaningful.
const KeyRangeRef storageCacheServerKeys(LiteralStringRef("\xff/cacheServer/"),
LiteralStringRef("\xff/cacheServer0"));
const KeyRef storageCacheServersPrefix = storageCacheServerKeys.begin;

View File

@ -45,6 +45,9 @@ extern const KeyRangeRef specialKeys; // [FF][FF] to [FF][FF][FF], some client f
extern const KeyRef afterAllKeys;
// "\xff/keyServers/[[begin]]" := "[[vector<serverID>, vector<serverID>]|[vector<Tag>, vector<Tag>]]"
// An internal mapping of where shards are located in the database. [[begin]] is the start of the shard range
// and the result is a list of serverIDs or Tags where these shards are located. These values can be changed
// as data movement occurs.
extern const KeyRangeRef keyServersKeys, keyServersKeyServersKeys;
extern const KeyRef keyServersPrefix, keyServersEnd, keyServersKeyServersKey;
const Key keyServersKey( const KeyRef& k );
@ -63,6 +66,10 @@ void decodeKeyServersValue( std::map<Tag, UID> const& tag_uid, const ValueRef& v
std::vector<UID>& src, std::vector<UID>& dest );
// "\xff/storageCacheServer/[[UID]] := StorageServerInterface"
// This will be added by the cache server on initialization and removed by DD
// TODO[mpilman]: We will need a way to map uint16_t ids to UIDs in a future
// versions. For now caches simply cache everything so the ids
// are not yet meaningful.
extern const KeyRangeRef storageCacheServerKeys;
extern const KeyRef storageCacheServersPrefix, storageCacheServersEnd;
const Key storageCacheServerKey(UID id);
@ -75,7 +82,11 @@ const Key storageCacheKey( const KeyRef& k );
const Value storageCacheValue( const std::vector<uint16_t>& serverIndices );
void decodeStorageCacheValue( const ValueRef& value, std::vector<uint16_t>& serverIndices );
// "\xff/serverKeys/[[serverID]]/[[begin]]" := "" | "1" | "2"
// "\xff/serverKeys/[[serverID]]/[[begin]]" := "[[serverKeysTrue]]" |" [[serverKeysFalse]]"
// An internal mapping of what shards any given server currently has ownership of
// Using the serverID as a prefix, then followed by the beginning of the shard range
// as the key, the value indicates whether the shard does or does not exist on the server.
// These values can be changed as data movement occurs.
extern const KeyRef serverKeysPrefix;
extern const ValueRef serverKeysTrue, serverKeysFalse;
const Key serverKeysKey( UID serverID, const KeyRef& keys );
@ -103,6 +114,8 @@ const Key cacheChangeKeyFor( uint16_t idx );
uint16_t cacheChangeKeyDecodeIndex( const KeyRef& key );
// "\xff/serverTag/[[serverID]]" = "[[Tag]]"
// Provides the Tag for the given serverID. Used to access a
// storage server's corresponding TLog in order to apply mutations.
extern const KeyRangeRef serverTagKeys;
extern const KeyRef serverTagPrefix;
extern const KeyRangeRef serverTagMaxKeys;
@ -122,6 +135,8 @@ Tag decodeServerTagValue( ValueRef const& );
const Key serverTagConflictKeyFor( Tag );
// "\xff/tagLocalityList/[[datacenterID]]" := "[[tagLocality]]"
// Provides the tagLocality for the given datacenterID
// See "FDBTypes.h" struct Tag for more details on tagLocality
extern const KeyRangeRef tagLocalityListKeys;
extern const KeyRef tagLocalityListPrefix;
const Key tagLocalityListKeyFor( Optional<Value> dcID );
@ -130,6 +145,8 @@ Optional<Value> decodeTagLocalityListKey( KeyRef const& );
int8_t decodeTagLocalityListValue( ValueRef const& );
// "\xff\x02/datacenterReplicas/[[datacenterID]]" := "[[replicas]]"
// Provides the number of replicas for the given datacenterID.
// Used in the initialization of the Data Distributor.
extern const KeyRangeRef datacenterReplicasKeys;
extern const KeyRef datacenterReplicasPrefix;
const Key datacenterReplicasKeyFor( Optional<Value> dcID );
@ -138,6 +155,8 @@ Optional<Value> decodeDatacenterReplicasKey( KeyRef const& );
int decodeDatacenterReplicasValue( ValueRef const& );
// "\xff\x02/tLogDatacenters/[[datacenterID]]"
// The existence of an empty string as a value signifies that the datacenterID is valid
// (as opposed to having no value at all)
extern const KeyRangeRef tLogDatacentersKeys;
extern const KeyRef tLogDatacentersPrefix;
const Key tLogDatacentersKeyFor( Optional<Value> dcID );
@ -170,29 +189,43 @@ ProcessClass decodeProcessClassValue( ValueRef const& );
UID decodeProcessClassKeyOld( KeyRef const& key );
// "\xff/conf/[[option]]" := "value"
// An umbrella prefix for options mostly used by the DatabaseConfiguration class.
// See DatabaseConfiguration.cpp ::setInternal for more examples.
extern const KeyRangeRef configKeys;
extern const KeyRef configKeysPrefix;
// The differences between excluded and failed can be found in "command-line-interface.rst"
// and in the help message of the fdbcli command "exclude".
// "\xff/conf/excluded/1.2.3.4" := ""
// "\xff/conf/excluded/1.2.3.4:4000" := ""
// These are inside configKeysPrefix since they represent a form of configuration and they are convenient
// to track in the same way by the tlog and recovery process, but they are ignored by the DatabaseConfiguration
// class.
// The existence of an empty string as a value signifies that the provided IP has been excluded.
// (as opposed to having no value at all)
extern const KeyRef excludedServersPrefix;
extern const KeyRangeRef excludedServersKeys;
extern const KeyRef excludedServersVersionKey; // The value of this key shall be changed by any transaction that modifies the excluded servers list
const AddressExclusion decodeExcludedServersKey( KeyRef const& key ); // where key.startsWith(excludedServersPrefix)
std::string encodeExcludedServersKey( AddressExclusion const& );
// "\xff/conf/failed/1.2.3.4" := ""
// "\xff/conf/failed/1.2.3.4:4000" := ""
// These are inside configKeysPrefix since they represent a form of configuration and they are convenient
// to track in the same way by the tlog and recovery process, but they are ignored by the DatabaseConfiguration
// class.
// The existence of an empty string as a value signifies that the provided IP has been marked as failed.
// (as opposed to having no value at all)
extern const KeyRef failedServersPrefix;
extern const KeyRangeRef failedServersKeys;
extern const KeyRef failedServersVersionKey; // The value of this key shall be changed by any transaction that modifies the failed servers list
const AddressExclusion decodeFailedServersKey( KeyRef const& key ); // where key.startsWith(failedServersPrefix)
std::string encodeFailedServersKey( AddressExclusion const& );
// "\xff/workers/[[processID]]" := ""
// Asynchronously updated by the cluster controller, this is a list of fdbserver processes that have joined the cluster
// and are currently (recently) available
// "\xff/workers/[[processID]]" := ""
// Asynchronously updated by the cluster controller, this is a list of fdbserver processes that have joined the cluster
// and are currently (recently) available
extern const KeyRangeRef workerListKeys;
extern const KeyRef workerListPrefix;
const Key workerListKeyFor(StringRef processID );
@ -200,7 +233,9 @@ const Value workerListValue( ProcessData const& );
Key decodeWorkerListKey( KeyRef const& );
ProcessData decodeWorkerListValue( ValueRef const& );
// "\xff\x02/backupProgress/[[workerID]]" := "[[WorkerBackupStatus]]"
// "\xff\x02/backupProgress/[[workerID]]" := "[[WorkerBackupStatus]]"
// Provides the progress for the given backup worker.
// See "FDBTypes.h" struct WorkerBackupStatus for more details on the return type value.
extern const KeyRangeRef backupProgressKeys;
extern const KeyRef backupProgressPrefix;
const Key backupProgressKeyFor(UID workerID);
@ -214,18 +249,31 @@ extern const KeyRef backupStartedKey;
Value encodeBackupStartedValue(const std::vector<std::pair<UID, Version>>& ids);
std::vector<std::pair<UID, Version>> decodeBackupStartedValue(const ValueRef& value);
// The key to signal backup workers that they should pause or resume.
// The key to signal backup workers that they should resume or pause.
// "\xff\x02/backupPaused" := "[[0|1]]"
// 0 = Send a signal to resume/already resumed.
// 1 = Send a signal to pause/already paused.
extern const KeyRef backupPausedKey;
// "\xff/coordinators" = "[[ClusterConnectionString]]"
// Set to the encoded structure of the cluster's current set of coordinators.
// Changed when performing quorumChange.
// See "CoordinationInterface.h" struct ClusterConnectionString for more details
extern const KeyRef coordinatorsKey;
// "\xff/logs" = "[[LogsValue]]"
// Used during master recovery in order to communicate
// and store info about the logs system.
extern const KeyRef logsKey;
// "\xff/minRequiredCommitVersion" = "[[Version]]"
// Used during backup/recovery to restrict version requirements
extern const KeyRef minRequiredCommitVersionKey;
const Value logsValue( const vector<std::pair<UID, NetworkAddress>>& logs, const vector<std::pair<UID, NetworkAddress>>& oldLogs );
std::pair<vector<std::pair<UID, NetworkAddress>>,vector<std::pair<UID, NetworkAddress>>> decodeLogsValue( const ValueRef& value );
// The "global keys" are send to each storage server any time they are changed
// The "global keys" are sent to each storage server any time they are changed
extern const KeyRef globalKeysPrefix;
extern const KeyRef lastEpochEndKey;
extern const KeyRef lastEpochEndPrivateKey;
@ -253,6 +301,7 @@ extern const KeyRef tagThrottleLimitKey;
extern const KeyRef tagThrottleCountKey;
// Log Range constant variables
// Used in the backup pipeline to track mutations
// \xff/logRanges/[16-byte UID][begin key] := serialize( make_pair([end key], [destination key prefix]), IncludeVersion() )
extern const KeyRangeRef logRangesRange;
@ -397,8 +446,16 @@ std::pair<Key,Version> decodeHealthyZoneValue( ValueRef const& );
extern const KeyRangeRef testOnlyTxnStateStorePrefixRange;
// Snapshot + Incremental Restore
// "\xff/writeRecovery" = "[[writeRecoveryKeyTrue]]"
// Flag used for the snapshot-restore pipeline in order to avoid
// anomalous behaviour with multiple recoveries.
extern const KeyRef writeRecoveryKey;
extern const ValueRef writeRecoveryKeyTrue;
// "\xff/snapshotEndVersion" = "[[Version]]"
// Written by master server during recovery if recovering from a snapshot.
// Allows incremental restore to read and set starting version for consistency.
extern const KeyRef snapshotEndVersionKey;
#pragma clang diagnostic pop

View File

@ -169,7 +169,7 @@ struct TagThrottleValue {
template<class Ar>
void serialize(Ar& ar) {
if(ar.protocolVersion().hasTagThrottleValueReason()) {
serializer(ar, tpsRate, expirationTime, initialDuration, reinterpret_cast<uint8_t&>(reason));
serializer(ar, tpsRate, expirationTime, initialDuration, reason);
}
else if(ar.protocolVersion().hasTagThrottleValue()) {
serializer(ar, tpsRate, expirationTime, initialDuration);
@ -216,8 +216,6 @@ namespace ThrottleApi {
Future<Void> enableAuto(Database const& db, bool const& enabled);
};
BINARY_SERIALIZABLE(TransactionPriority);
template<class Value>
using TransactionTagMap = std::unordered_map<TransactionTag, Value, std::hash<TransactionTagRef>>;

View File

@ -22,6 +22,7 @@
#include "fdbclient/ReadYourWrites.h"
#include "fdbclient/DatabaseContext.h"
#include "fdbclient/versions.h"
#include "fdbclient/NativeAPI.actor.h"
// Users of ThreadSafeTransaction might share Reference<ThreadSafe...> between different threads as long as they don't call addRef (e.g. C API follows this).
// Therefore, it is unsafe to call (explicitly or implicitly) this->addRef in any of these functions.
@ -364,6 +365,15 @@ const char* ThreadSafeApi::getClientVersion() {
return clientVersion.c_str();
}
ThreadFuture<uint64_t> ThreadSafeApi::getServerProtocol(const char* clusterFilePath) {
auto [clusterFile, isDefault] = ClusterConnectionFile::lookupClusterFileName(std::string(clusterFilePath));
Reference<ClusterConnectionFile> f = Reference<ClusterConnectionFile>(new ClusterConnectionFile(clusterFile));
return onMainThread( [f]() -> Future< uint64_t > {
return getCoordinatorProtocols(f);
} );
}
void ThreadSafeApi::setNetworkOption(FDBNetworkOptions::Option option, Optional<StringRef> value) {
if (option == FDBNetworkOptions::EXTERNAL_CLIENT_TRANSPORT_ID) {
if(value.present()) {

View File

@ -92,6 +92,8 @@ public:
Version getCommittedVersion() override;
ThreadFuture<int64_t> getApproximateSize() override;
ThreadFuture<uint64_t> getProtocolVersion();
void setOption( FDBTransactionOptions::Option option, Optional<StringRef> value = Optional<StringRef>() ) override;
ThreadFuture<Void> checkDeferredError();
@ -115,6 +117,7 @@ class ThreadSafeApi : public IClientApi, ThreadSafeReferenceCounted<ThreadSafeAp
public:
void selectApiVersion(int apiVersion);
const char* getClientVersion();
ThreadFuture<uint64_t> getServerProtocol(const char* clusterFilePath) override;
void setNetworkOption(FDBNetworkOptions::Option option, Optional<StringRef> value = Optional<StringRef>());
void setupNetwork();

View File

@ -182,6 +182,10 @@ description is not currently required but encouraged.
<Option name="transaction_include_port_in_address" code="505"
description="Addresses returned by get_addresses_for_key include the port when enabled. As of api version 630, this option is enabled by default and setting this has no effect."
defaultFor="23"/>
<Option name="transaction_trace_enable" code="600"
description="Enable tracing for all transactions. This is the default." />
<Option name="transaction_trace_disable" code="601"
description="Disable tracing for all transactions." />
</Scope>
<Scope name="TransactionOption">

View File

@ -50,7 +50,10 @@ if(NOT WIN32)
endif()
add_library(thirdparty STATIC ${FDBRPC_THIRD_PARTY_SRCS})
if(NOT WIN32)
if(WIN32)
target_compile_definitions(thirdparty PRIVATE USE_FIBERS)
else()
target_compile_definitions(thirdparty PRIVATE USE_UCONTEXT)
target_compile_options(thirdparty BEFORE PRIVATE -w) # disable warnings for third party
endif()
if(USE_VALGRIND)

View File

@ -20,6 +20,7 @@
// Unit tests for the flow language and libraries
#include "flow/ProtocolVersion.h"
#include "flow/UnitTest.h"
#include "flow/DeterministicRandom.h"
#include "flow/IThreadPool.h"
@ -281,6 +282,9 @@ struct YieldMockNetwork final : INetwork, ReferenceCounted<YieldMockNetwork> {
static TLSConfig emptyConfig;
return emptyConfig;
}
ProtocolVersion protocolVersion() override {
return baseNetwork->protocolVersion();
}
};
struct NonserializableThing {};

View File

@ -18,8 +18,11 @@
* limitations under the License.
*/
#include "fdbclient/CoordinationInterface.h"
#include "fdbrpc/FlowTransport.h"
#include "flow/network.h"
#include <cstdint>
#include <unordered_map>
#if VALGRIND
#include <memcheck.h>
@ -38,19 +41,21 @@
#include "flow/TDMetric.actor.h"
#include "flow/ObjectSerializer.h"
#include "flow/ProtocolVersion.h"
#include "flow/UnitTest.h"
#include "flow/actorcompiler.h" // This must be the last #include.
static NetworkAddressList g_currentDeliveryPeerAddress = NetworkAddressList();
const UID WLTOKEN_ENDPOINT_NOT_FOUND(-1, 0);
const UID WLTOKEN_PING_PACKET(-1, 1);
const UID TOKEN_IGNORE_PACKET(0, 2);
constexpr UID WLTOKEN_ENDPOINT_NOT_FOUND(-1, 0);
constexpr UID WLTOKEN_PING_PACKET(-1, 1);
constexpr int PACKET_LEN_WIDTH = sizeof(uint32_t);
const uint64_t TOKEN_STREAM_FLAG = 1;
class EndpointMap : NonCopyable {
public:
EndpointMap();
// Reserve space for this many wellKnownEndpoints
explicit EndpointMap(int wellKnownEndpointCount);
void insertWellKnown(NetworkMessageReceiver* r, const Endpoint::Token& token, TaskPriority priority);
void insert( NetworkMessageReceiver* r, Endpoint::Token& token, TaskPriority priority );
const Endpoint& insert( NetworkAddressList localAddresses, std::vector<std::pair<FlowReceiver*, TaskPriority>> const& streams );
NetworkMessageReceiver* get( Endpoint::Token const& token );
@ -65,17 +70,16 @@ private:
uint64_t uid[2]; // priority packed into lower 32 bits; actual lower 32 bits of token are the index in data[]
uint32_t nextFree;
};
NetworkMessageReceiver* receiver;
NetworkMessageReceiver* receiver = nullptr;
Endpoint::Token& token() { return *(Endpoint::Token*)uid; }
};
int wellKnownEndpointCount;
std::vector<Entry> data;
uint32_t firstFree;
};
EndpointMap::EndpointMap()
: firstFree(-1)
{
}
EndpointMap::EndpointMap(int wellKnownEndpointCount)
: wellKnownEndpointCount(wellKnownEndpointCount), data(wellKnownEndpointCount), firstFree(-1) {}
void EndpointMap::realloc() {
int oldSize = data.size();
@ -88,6 +92,14 @@ void EndpointMap::realloc() {
firstFree = oldSize;
}
void EndpointMap::insertWellKnown(NetworkMessageReceiver* r, const Endpoint::Token& token, TaskPriority priority) {
int index = token.second();
ASSERT(data[index].receiver == nullptr);
data[index].receiver = r;
data[index].token() =
Endpoint::Token(token.first(), (token.second() & 0xffffffff00000000LL) | static_cast<uint32_t>(priority));
}
void EndpointMap::insert( NetworkMessageReceiver* r, Endpoint::Token& token, TaskPriority priority ) {
if (firstFree == uint32_t(-1)) realloc();
int index = firstFree;
@ -135,6 +147,9 @@ const Endpoint& EndpointMap::insert( NetworkAddressList localAddresses, std::vec
NetworkMessageReceiver* EndpointMap::get( Endpoint::Token const& token ) {
uint32_t index = token.second();
if (index < wellKnownEndpointCount && data[index].receiver == nullptr) {
TraceEvent(SevWarnAlways, "WellKnownEndpointNotAdded").detail("Token", token);
}
if ( index < data.size() && data[index].token().first() == token.first() && ((data[index].token().second()&0xffffffff00000000LL)|index)==token.second() )
return data[index].receiver;
return 0;
@ -147,9 +162,13 @@ TaskPriority EndpointMap::getPriority( Endpoint::Token const& token ) {
return TaskPriority::UnknownEndpoint;
}
void EndpointMap::remove( Endpoint::Token const& token, NetworkMessageReceiver* r ) {
void EndpointMap::remove(Endpoint::Token const& token, NetworkMessageReceiver* r) {
uint32_t index = token.second();
if ( index < data.size() && data[index].token().first() == token.first() && ((data[index].token().second()&0xffffffff00000000LL)|index)==token.second() && data[index].receiver == r ) {
if (index < wellKnownEndpointCount) {
data[index].receiver = nullptr;
} else if (index < data.size() && data[index].token().first() == token.first() &&
((data[index].token().second() & 0xffffffff00000000LL) | index) == token.second() &&
data[index].receiver == r) {
data[index].receiver = 0;
data[index].nextFree = firstFree;
firstFree = index;
@ -158,11 +177,9 @@ void EndpointMap::remove( Endpoint::Token const& token, NetworkMessageReceiver*
struct EndpointNotFoundReceiver final : NetworkMessageReceiver {
EndpointNotFoundReceiver(EndpointMap& endpoints) {
//endpoints[WLTOKEN_ENDPOINT_NOT_FOUND] = this;
Endpoint::Token e = WLTOKEN_ENDPOINT_NOT_FOUND;
endpoints.insert(this, e, TaskPriority::DefaultEndpoint);
ASSERT( e == WLTOKEN_ENDPOINT_NOT_FOUND );
endpoints.insertWellKnown(this, WLTOKEN_ENDPOINT_NOT_FOUND, TaskPriority::DefaultEndpoint);
}
void receive(ArenaObjectReader& reader) override {
// Remote machine tells us it doesn't have endpoint e
Endpoint e;
@ -173,9 +190,7 @@ struct EndpointNotFoundReceiver final : NetworkMessageReceiver {
struct PingReceiver final : NetworkMessageReceiver {
PingReceiver(EndpointMap& endpoints) {
Endpoint::Token e = WLTOKEN_PING_PACKET;
endpoints.insert(this, e, TaskPriority::ReadSocket);
ASSERT( e == WLTOKEN_PING_PACKET );
endpoints.insertWellKnown(this, WLTOKEN_PING_PACKET, TaskPriority::ReadSocket);
}
void receive(ArenaObjectReader& reader) override {
ReplyPromise<Void> reply;
@ -214,11 +229,9 @@ public:
Reference<AsyncVar<bool>> degraded;
bool warnAlwaysForLargePacket;
// These declarations must be in exactly this order
EndpointMap endpoints;
EndpointNotFoundReceiver endpointNotFoundReceiver;
PingReceiver pingReceiver;
// End ordered declarations
EndpointNotFoundReceiver endpointNotFoundReceiver{ endpoints };
PingReceiver pingReceiver{ endpoints };
Int64MetricHandle bytesSent;
Int64MetricHandle countPacketsReceived;
@ -294,7 +307,8 @@ ACTOR Future<Void> pingLatencyLogger(TransportData* self) {
}
TransportData::TransportData(uint64_t transportId)
: endpointNotFoundReceiver(endpoints),
: endpoints(/*wellKnownTokenCount*/ 11),
endpointNotFoundReceiver(endpoints),
pingReceiver(endpoints),
warnAlwaysForLargePacket(true),
lastIncompatibleMessage(0),
@ -770,7 +784,7 @@ void Peer::prependConnectPacket() {
}
pkt.connectPacketLength = sizeof(pkt) - sizeof(pkt.connectPacketLength);
pkt.protocolVersion = currentProtocolVersion;
pkt.protocolVersion = g_network->protocolVersion();
pkt.protocolVersion.addObjectSerializerFlag();
pkt.connectionId = transport->transportId;
@ -835,6 +849,15 @@ TransportData::~TransportData() {
}
}
static bool checkCompatible(const PeerCompatibilityPolicy& policy, ProtocolVersion version) {
switch (policy.requirement) {
case RequirePeer::Exactly:
return version.version() == policy.version.version();
case RequirePeer::AtLeast:
return version.version() >= policy.version.version();
}
}
ACTOR static void deliver(TransportData* self, Endpoint destination, ArenaReader reader, bool inReadSocket) {
TaskPriority priority = self->endpoints.getPriority(destination.token);
if (priority < TaskPriority::ReadSocket || !inReadSocket) {
@ -845,6 +868,9 @@ ACTOR static void deliver(TransportData* self, Endpoint destination, ArenaReader
auto receiver = self->endpoints.get(destination.token);
if (receiver) {
if (!checkCompatible(receiver->peerCompatibilityPolicy(), reader.protocolVersion())) {
return;
}
try {
g_currentDeliveryPeerAddress = destination.addresses;
StringRef data = reader.arenaReadAll();
@ -890,11 +916,11 @@ static void scanPackets(TransportData* transport, uint8_t*& unprocessed_begin, c
//Retrieve packet length and checksum
if (checksumEnabled) {
if (e-p < sizeof(uint32_t) * 2) break;
packetLen = *(uint32_t*)p; p += sizeof(uint32_t);
packetLen = *(uint32_t*)p; p += PACKET_LEN_WIDTH;
packetChecksum = *(uint32_t*)p; p += sizeof(uint32_t);
} else {
if (e-p < sizeof(uint32_t)) break;
packetLen = *(uint32_t*)p; p += sizeof(uint32_t);
packetLen = *(uint32_t*)p; p += PACKET_LEN_WIDTH;
}
if (packetLen > FLOW_KNOBS->PACKET_LIMIT) {
@ -945,7 +971,9 @@ static void scanPackets(TransportData* transport, uint8_t*& unprocessed_begin, c
#if VALGRIND
VALGRIND_CHECK_MEM_IS_DEFINED(p, packetLen);
#endif
ArenaReader reader(arena, StringRef(p, packetLen), AssumeVersion(currentProtocolVersion));
// remove object serializer flag to account for flat buffer
peerProtocolVersion.removeObjectSerializerFlag();
ArenaReader reader(arena, StringRef(p, packetLen), AssumeVersion(peerProtocolVersion));
UID token;
reader >> token;
@ -972,9 +1000,9 @@ static void scanPackets(TransportData* transport, uint8_t*& unprocessed_begin, c
// Given unprocessed buffer [begin, end), check if next packet size is known and return
// enough size for the next packet, whose format is: {size, optional_checksum, data} +
// next_packet_size.
static int getNewBufferSize(const uint8_t* begin, const uint8_t* end, const NetworkAddress& peerAddress) {
static int getNewBufferSize(const uint8_t* begin, const uint8_t* end, const NetworkAddress& peerAddress, ProtocolVersion peerProtocolVersion) {
const int len = end - begin;
if (len < sizeof(uint32_t)) {
if (len < PACKET_LEN_WIDTH) {
return FLOW_KNOBS->MIN_PACKET_BUFFER_BYTES;
}
const uint32_t packetLen = *(uint32_t*)begin;
@ -1017,7 +1045,7 @@ ACTOR static Future<Void> connectionReader(
if (readAllBytes < FLOW_KNOBS->MIN_PACKET_BUFFER_FREE_BYTES) {
Arena newArena;
const int unproc_len = unprocessed_end - unprocessed_begin;
const int len = getNewBufferSize(unprocessed_begin, unprocessed_end, peerAddress);
const int len = getNewBufferSize(unprocessed_begin, unprocessed_end, peerAddress, peerProtocolVersion);
uint8_t* const newBuffer = new (newArena) uint8_t[ len ];
if (unproc_len > 0) {
memcpy(newBuffer, unprocessed_begin, unproc_len);
@ -1056,8 +1084,8 @@ ACTOR static Future<Void> connectionReader(
uint64_t connectionId = pkt.connectionId;
if (!pkt.protocolVersion.hasObjectSerializerFlag() ||
!pkt.protocolVersion.isCompatible(currentProtocolVersion)) {
incompatibleProtocolVersionNewer = pkt.protocolVersion > currentProtocolVersion;
!pkt.protocolVersion.isCompatible(g_network->protocolVersion())) {
incompatibleProtocolVersionNewer = pkt.protocolVersion > g_network->protocolVersion();
NetworkAddress addr = pkt.canonicalRemotePort
? NetworkAddress(pkt.canonicalRemoteIp(), pkt.canonicalRemotePort)
: conn->getPeerAddress();
@ -1067,9 +1095,8 @@ ACTOR static Future<Void> connectionReader(
if(now() - transport->lastIncompatibleMessage > FLOW_KNOBS->CONNECTION_REJECTED_MESSAGE_DELAY) {
TraceEvent(SevWarn, "ConnectionRejected", conn->getDebugID())
.detail("Reason", "IncompatibleProtocolVersion")
.detail("LocalVersion", currentProtocolVersion.version())
.detail("LocalVersion", g_network->protocolVersion().version())
.detail("RejectedVersion", pkt.protocolVersion.version())
.detail("VersionMask", ProtocolVersion::compatibleProtocolVersionMask)
.detail("Peer", pkt.canonicalRemotePort ? NetworkAddress(pkt.canonicalRemoteIp(), pkt.canonicalRemotePort)
: conn->getPeerAddress())
.detail("ConnectionId", connectionId);
@ -1081,7 +1108,6 @@ ACTOR static Future<Void> connectionReader(
} else if(connectionId > 1) {
transport->multiVersionConnections[connectionId] = now() + FLOW_KNOBS->CONNECTION_ID_TIMEOUT;
}
compatible = false;
if(!protocolVersion.hasMultiVersionClient()) {
// Older versions expected us to hang up. It may work even if we don't hang up here, but it's safer to keep the old behavior.
@ -1133,7 +1159,7 @@ ACTOR static Future<Void> connectionReader(
}
}
}
if (compatible) {
if (compatible || peerProtocolVersion.hasStableInterfaces()) {
scanPackets( transport, unprocessed_begin, unprocessed_end, arena, peerAddress, peerProtocolVersion );
}
else if(!expectConnectPacket) {
@ -1364,10 +1390,8 @@ void FlowTransport::removeEndpoint( const Endpoint& endpoint, NetworkMessageRece
void FlowTransport::addWellKnownEndpoint( Endpoint& endpoint, NetworkMessageReceiver* receiver, TaskPriority taskID ) {
endpoint.addresses = self->localAddresses;
ASSERT( ((endpoint.token.first() & TOKEN_STREAM_FLAG)!=0) == receiver->isStream() );
Endpoint::Token otoken = endpoint.token;
self->endpoints.insert( receiver, endpoint.token, taskID );
ASSERT( endpoint.token == otoken );
ASSERT(receiver->isStream());
self->endpoints.insertWellKnown(receiver, endpoint.token, taskID);
}
static void sendLocal( TransportData* self, ISerializeSource const& what, const Endpoint& destination ) {
@ -1375,7 +1399,7 @@ static void sendLocal( TransportData* self, ISerializeSource const& what, const
// SOMEDAY: Would it be better to avoid (de)serialization by doing this check in flow?
Standalone<StringRef> copy;
ObjectWriter wr(AssumeVersion(currentProtocolVersion));
ObjectWriter wr(AssumeVersion(g_network->protocolVersion()));
what.serializeObjectWriter(wr);
copy = wr.toStringRef();
#if VALGRIND
@ -1383,7 +1407,7 @@ static void sendLocal( TransportData* self, ISerializeSource const& what, const
#endif
ASSERT(copy.size() > 0);
deliver(self, destination, ArenaReader(copy.arena(), copy, AssumeVersion(currentProtocolVersion)), false);
deliver(self, destination, ArenaReader(copy.arena(), copy, AssumeVersion(g_network->protocolVersion())), false);
}
static ReliablePacket* sendPacket(TransportData* self, Reference<Peer> peer, ISerializeSource const& what,
@ -1405,12 +1429,12 @@ static ReliablePacket* sendPacket(TransportData* self, Reference<Peer> peer, ISe
int prevBytesWritten = pb->bytes_written;
PacketBuffer* checksumPb = pb;
PacketWriter wr(pb,rp,AssumeVersion(currentProtocolVersion)); // SOMEDAY: Can we downgrade to talk to older peers?
PacketWriter wr(pb,rp,AssumeVersion(g_network->protocolVersion())); // SOMEDAY: Can we downgrade to talk to older peers?
// Reserve some space for packet length and checksum, write them after serializing data
SplitBuffer packetInfoBuffer;
uint32_t len, checksum = 0;
int packetInfoSize = sizeof(len);
int packetInfoSize = PACKET_LEN_WIDTH;
if (checksumEnabled) {
packetInfoSize += sizeof(checksum);
}

View File

@ -27,6 +27,7 @@
#include "flow/genericactors.actor.h"
#include "flow/network.h"
#include "flow/FileIdentifier.h"
#include "flow/ProtocolVersion.h"
#include "flow/Net2Packet.h"
#include "fdbrpc/ContinuousSample.h"
@ -116,11 +117,21 @@ namespace std
};
}
enum class RequirePeer { Exactly, AtLeast };
struct PeerCompatibilityPolicy {
RequirePeer requirement;
ProtocolVersion version;
};
class ArenaObjectReader;
class NetworkMessageReceiver {
public:
virtual void receive(ArenaObjectReader&) = 0;
virtual bool isStream() const { return false; }
virtual PeerCompatibilityPolicy peerCompatibilityPolicy() const {
return { RequirePeer::Exactly, g_network->protocolVersion() };
}
};
struct TransportData;

View File

@ -621,7 +621,7 @@ void showArena( ArenaBlock* a, ArenaBlock* parent) {
}
void arenaTest() {
BinaryWriter wr(AssumeVersion(currentProtocolVersion));
BinaryWriter wr(AssumeVersion(g_network->protocolVersion()));
{
Arena arena;
VectorRef<StringRef> test;
@ -639,7 +639,7 @@ void arenaTest() {
{
Arena arena2;
VectorRef<StringRef> test2;
BinaryReader reader(wr.getData(),wr.getLength(), AssumeVersion(currentProtocolVersion));
BinaryReader reader(wr.getData(),wr.getLength(), AssumeVersion(g_network->protocolVersion()));
reader >> test2 >> arena2;
for(auto i = test2.begin(); i != test2.end(); ++i)

View File

@ -66,6 +66,12 @@ struct FlowReceiver : public NetworkMessageReceiver {
endpoint = e;
}
void setPeerCompatibilityPolicy(const PeerCompatibilityPolicy& policy) { peerCompatibilityPolicy_ = policy; }
PeerCompatibilityPolicy peerCompatibilityPolicy() const override {
return peerCompatibilityPolicy_.orDefault(NetworkMessageReceiver::peerCompatibilityPolicy());
}
void makeWellKnownEndpoint(Endpoint::Token token, TaskPriority taskID) {
ASSERT(!endpoint.isValid());
m_isLocalEndpoint = true;
@ -74,6 +80,7 @@ struct FlowReceiver : public NetworkMessageReceiver {
}
private:
Optional<PeerCompatibilityPolicy> peerCompatibilityPolicy_;
Endpoint endpoint;
bool m_isLocalEndpoint;
bool m_stream;
@ -117,6 +124,9 @@ public:
bool isSet() { return sav->isSet(); }
bool isValid() const { return sav != nullptr; }
ReplyPromise() : sav(new NetSAV<T>(0, 1)) {}
explicit ReplyPromise(const PeerCompatibilityPolicy& policy) : ReplyPromise() {
sav->setPeerCompatibilityPolicy(policy);
}
ReplyPromise(const ReplyPromise& rhs) : sav(rhs.sav) { sav->addPromiseRef(); }
ReplyPromise(ReplyPromise&& rhs) noexcept : sav(rhs.sav) { rhs.sav = 0; }
~ReplyPromise() { if (sav) sav->delPromiseRef(); }
@ -354,6 +364,9 @@ public:
FutureStream<T> getFuture() const { queue->addFutureRef(); return FutureStream<T>(queue); }
RequestStream() : queue(new NetNotifiedQueue<T>(0, 1)) {}
explicit RequestStream(PeerCompatibilityPolicy policy) : RequestStream() {
queue->setPeerCompatibilityPolicy(policy);
}
RequestStream(const RequestStream& rhs) : queue(rhs.queue) { queue->addPromiseRef(); }
RequestStream(RequestStream&& rhs) noexcept : queue(rhs.queue) { rhs.queue = 0; }
void operator=(const RequestStream& rhs) {

View File

@ -27,6 +27,7 @@
#include "flow/ActorCollection.h"
#include "flow/IRandom.h"
#include "flow/IThreadPool.h"
#include "flow/ProtocolVersion.h"
#include "flow/Util.h"
#include "fdbrpc/IAsyncFile.h"
#include "fdbrpc/AsyncFileCached.actor.h"
@ -92,8 +93,6 @@ void ISimulator::displayWorkers() const
return;
}
const UID TOKEN_ENDPOINT_NOT_FOUND(-1, -1);
int openCount = 0;
struct SimClogging {
@ -999,8 +998,8 @@ public:
net2->run();
}
ProcessInfo* newProcess(const char* name, IPAddress ip, uint16_t port, bool sslEnabled, uint16_t listenPerProcess,
LocalityData locality, ProcessClass startingClass, const char* dataFolder,
const char* coordinationFolder) override {
LocalityData locality, ProcessClass startingClass, const char* dataFolder,
const char* coordinationFolder, ProtocolVersion protocol) override {
ASSERT( locality.machineId().present() );
MachineInfo& machine = machines[ locality.machineId().get() ];
if (!machine.machineId.present())
@ -1043,6 +1042,7 @@ public:
currentlyRebootingProcesses.erase(addresses.address);
m->excluded = g_simulator.isExcluded(NetworkAddress(ip, port, true, false));
m->cleared = g_simulator.isCleared(addresses.address);
m->protocolVersion = protocol;
m->setGlobal(enTDMetrics, (flowGlobalType) &m->tdmetrics);
m->setGlobal(enNetworkConnections, (flowGlobalType) m->network);
@ -1708,6 +1708,10 @@ public:
return Void();
return delay( 0, taskID, process->machine->machineProcess );
}
ProtocolVersion protocolVersion() override {
return getCurrentProcess()->protocolVersion;
}
//time is guarded by ISimulator::mutex. It is not necessary to guard reads on the main thread because
//time should only be modified from the main thread.
@ -1796,6 +1800,7 @@ public:
: id(deterministicRandom()->randomUniqueID()), process(g_simulator.getCurrentProcess()), peerAddress(peerAddress),
actors(false), _localAddress(localAddress) {
g_sim2.addressMap.emplace(_localAddress, process);
ASSERT(process->boundUDPSockets.find(localAddress) == process->boundUDPSockets.end());
process->boundUDPSockets.emplace(localAddress, this);
}
~UDPSimSocket() {
@ -1909,6 +1914,9 @@ Future<Reference<IUDPSocket>> Sim2::createUDPSocket(NetworkAddress toAddr) {
localAddress.ip = IPAddress(process->address.ip.toV4() + deterministicRandom()->randomInt(0, 256));
}
localAddress.port = deterministicRandom()->randomInt(40000, 60000);
while (process->boundUDPSockets.find(localAddress) != process->boundUDPSockets.end()) {
localAddress.port = deterministicRandom()->randomInt(40000, 60000);
}
return Reference<IUDPSocket>(new UDPSimSocket(localAddress, toAddr));
}

View File

@ -20,6 +20,7 @@
#ifndef FLOW_SIMULATOR_H
#define FLOW_SIMULATOR_H
#include "flow/ProtocolVersion.h"
#pragma once
#include "flow/flow.h"
@ -71,6 +72,8 @@ public:
UID uid;
ProtocolVersion protocolVersion;
ProcessInfo(const char* name, LocalityData locality, ProcessClass startingClass, NetworkAddressList addresses,
INetworkConnections* net, const char* dataFolder, const char* coordinationFolder)
: name(name), locality(locality), startingClass(startingClass), addresses(addresses),
@ -162,7 +165,7 @@ public:
virtual ProcessInfo* newProcess(const char* name, IPAddress ip, uint16_t port, bool sslEnabled, uint16_t listenPerProcess,
LocalityData locality, ProcessClass startingClass, const char* dataFolder,
const char* coordinationFolder) = 0;
const char* coordinationFolder, ProtocolVersion protocol) = 0;
virtual void killProcess( ProcessInfo* machine, KillType ) = 0;
virtual void rebootProcess(Optional<Standalone<StringRef>> zoneId, bool allProcesses ) = 0;
virtual void rebootProcess( ProcessInfo* process, KillType kt ) = 0;
@ -176,6 +179,8 @@ public:
virtual bool datacenterDead(Optional<Standalone<StringRef>> dcId) const = 0;
virtual void displayWorkers() const;
virtual ProtocolVersion protocolVersion() = 0;
virtual void addRole(NetworkAddress const& address, std::string const& role) {
roleAddresses[address][role] ++;
TraceEvent("RoleAdd").detail("Address", address).detail("Role", role).detail("NumRoles", roleAddresses[address].size()).detail("Value", roleAddresses[address][role]);
@ -327,6 +332,9 @@ public:
BackupAgentType backupAgents;
BackupAgentType drAgents;
bool hasDiffProtocolProcess; // true if simulator is testing a process with a different version
bool setDiffProtocol; // true if a process with a different protocol version has been started
virtual flowGlobalType global(int id) const { return getCurrentProcess()->global(id); };
virtual void setGlobal(size_t id, flowGlobalType v) { getCurrentProcess()->setGlobal(id,v); };

View File

@ -59,7 +59,7 @@ struct VersionedMessage {
}
}
ArenaReader reader(arena, message, AssumeVersion(currentProtocolVersion));
ArenaReader reader(arena, message, AssumeVersion(g_network->protocolVersion()));
// Return false for LogProtocolMessage and SpanContextMessage metadata messages.
if (LogProtocolMessage::isNextIn(reader)) return false;
@ -756,7 +756,7 @@ ACTOR Future<Void> saveMutationsToFile(BackupData* self, Version popVersion, int
const auto& subrange = range.range();
intersectionRange = mutationRange & subrange;
MutationRef subm(MutationRef::Type::ClearRange, intersectionRange.begin, intersectionRange.end);
BinaryWriter wr(AssumeVersion(currentProtocolVersion));
BinaryWriter wr(AssumeVersion(g_network->protocolVersion()));
wr << subm;
mutations.push_back(wr.toValue());
for (int index : range.value()) {

View File

@ -178,6 +178,7 @@ set(FDBSERVER_SRCS
workloads/Performance.actor.cpp
workloads/Ping.actor.cpp
workloads/PopulateTPCC.actor.cpp
workloads/ProtocolVersion.actor.cpp
workloads/PubSubMultiples.actor.cpp
workloads/QueuePush.actor.cpp
workloads/RandomClogging.actor.cpp

View File

@ -2883,7 +2883,7 @@ ACTOR Future<Void> dbInfoUpdater( ClusterControllerData* self ) {
dbInfoChange = self->db.serverInfo->onChange();
updateDBInfo = self->updateDBInfo.onTrigger();
req.serializedDbInfo = BinaryWriter::toValue(self->db.serverInfo->get(), AssumeVersion(currentProtocolVersion));
req.serializedDbInfo = BinaryWriter::toValue(self->db.serverInfo->get(), AssumeVersion(g_network->protocolVersion()));
TraceEvent("DBInfoStartBroadcast", self->id);
choose {

View File

@ -24,10 +24,13 @@
#include "fdbserver/WorkerInterface.actor.h"
#include "fdbserver/Status.h"
#include "flow/ActorCollection.h"
#include "flow/ProtocolVersion.h"
#include "flow/UnitTest.h"
#include "flow/IndexedSet.h"
#include "fdbclient/MonitorLeader.h"
#include "flow/actorcompiler.h" // This must be the last #include.
#include "flow/network.h"
#include <cstdint>
// This module implements coordinationServer() and the interfaces in CoordinationInterface.h
@ -42,17 +45,6 @@ struct GenerationRegVal {
}
};
// The order of UIDs here must match the order in which makeWellKnownEndpoint is called.
// UID WLTOKEN_CLIENTLEADERREG_GETLEADER( -1, 2 ); // from fdbclient/MonitorLeader.actor.cpp
// UID WLTOKEN_CLIENTLEADERREG_OPENDATABASE( -1, 3 ); // from fdbclient/MonitorLeader.actor.cpp
UID WLTOKEN_LEADERELECTIONREG_CANDIDACY( -1, 4 );
UID WLTOKEN_LEADERELECTIONREG_ELECTIONRESULT( -1, 5 );
UID WLTOKEN_LEADERELECTIONREG_LEADERHEARTBEAT( -1, 6 );
UID WLTOKEN_LEADERELECTIONREG_FORWARD( -1, 7 );
UID WLTOKEN_GENERATIONREG_READ( -1, 8 );
UID WLTOKEN_GENERATIONREG_WRITE( -1, 9 );
GenerationRegInterface::GenerationRegInterface( NetworkAddress remote )
: read( Endpoint({remote}, WLTOKEN_GENERATIONREG_READ) ),
write( Endpoint({remote}, WLTOKEN_GENERATIONREG_WRITE) )

View File

@ -24,6 +24,13 @@
#include "fdbclient/CoordinationInterface.h"
constexpr UID WLTOKEN_LEADERELECTIONREG_CANDIDACY(-1, 4);
constexpr UID WLTOKEN_LEADERELECTIONREG_ELECTIONRESULT(-1, 5);
constexpr UID WLTOKEN_LEADERELECTIONREG_LEADERHEARTBEAT(-1, 6);
constexpr UID WLTOKEN_LEADERELECTIONREG_FORWARD(-1, 7);
constexpr UID WLTOKEN_GENERATIONREG_READ(-1, 8);
constexpr UID WLTOKEN_GENERATIONREG_WRITE(-1, 9);
struct GenerationRegInterface {
constexpr static FileIdentifier file_identifier = 16726744;
RequestStream< struct GenerationRegReadRequest > read;

View File

@ -4799,7 +4799,9 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self,
state MoveKeysLock lock;
state Reference<DDTeamCollection> primaryTeamCollection;
state Reference<DDTeamCollection> remoteTeamCollection;
state bool trackerCancelled;
loop {
trackerCancelled = false;
try {
loop {
TraceEvent("DDInitTakingMoveKeysLock", self->ddId);
@ -4966,7 +4968,7 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self,
actors.push_back(reportErrorsExcept(
dataDistributionTracker(initData, cx, output, shardsAffectedByTeamFailure, getShardMetrics,
getShardMetricsList, getAverageShardBytes.getFuture(), readyToStart,
anyZeroHealthyTeams, self->ddId, &shards),
anyZeroHealthyTeams, self->ddId, &shards, &trackerCancelled),
"DDTracker", self->ddId, &normalDDQueueErrors()));
actors.push_back(reportErrorsExcept(
dataDistributionQueue(cx, output, input.getFuture(), getShardMetrics, processingUnhealthy, tcis,
@ -5006,6 +5008,7 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self,
return Void();
}
catch( Error &e ) {
trackerCancelled = true;
state Error err = e;
TraceEvent("DataDistributorDestroyTeamCollections").error(e);
self->teamCollection = nullptr;

View File

@ -216,7 +216,7 @@ struct InitialDataDistribution : ReferenceCounted<InitialDataDistribution> {
struct ShardMetrics {
StorageMetrics metrics;
double lastLowBandwidthStartTime;
int shardCount;
int shardCount; // number of smaller shards whose metrics are aggregated in the ShardMetrics
bool operator==(ShardMetrics const& rhs) const {
return metrics == rhs.metrics && lastLowBandwidthStartTime == rhs.lastLowBandwidthStartTime &&
@ -233,18 +233,15 @@ struct ShardTrackedData {
Reference<AsyncVar<Optional<ShardMetrics>>> stats;
};
ACTOR Future<Void> dataDistributionTracker(
Reference<InitialDataDistribution> initData,
Database cx,
PromiseStream<RelocateShard> output,
Reference<ShardsAffectedByTeamFailure> shardsAffectedByTeamFailure,
PromiseStream<GetMetricsRequest> getShardMetrics,
PromiseStream<GetMetricsListRequest> getShardMetricsList,
FutureStream<Promise<int64_t>> getAverageShardBytes,
Promise<Void> readyToStart,
Reference<AsyncVar<bool>> zeroHealthyTeams,
UID distributorId,
KeyRangeMap<ShardTrackedData>* shards);
ACTOR Future<Void> dataDistributionTracker(Reference<InitialDataDistribution> initData, Database cx,
PromiseStream<RelocateShard> output,
Reference<ShardsAffectedByTeamFailure> shardsAffectedByTeamFailure,
PromiseStream<GetMetricsRequest> getShardMetrics,
PromiseStream<GetMetricsListRequest> getShardMetricsList,
FutureStream<Promise<int64_t>> getAverageShardBytes,
Promise<Void> readyToStart, Reference<AsyncVar<bool>> zeroHealthyTeams,
UID distributorId, KeyRangeMap<ShardTrackedData>* shards,
bool const* trackerCancelled);
ACTOR Future<Void> dataDistributionQueue(
Database cx,

View File

@ -18,8 +18,9 @@
* limitations under the License.
*/
#include <numeric>
#include <limits>
#include <numeric>
#include <vector>
#include "flow/ActorCollection.h"
#include "flow/Util.h"
@ -83,8 +84,8 @@ struct RelocateData {
};
class ParallelTCInfo final : public ReferenceCounted<ParallelTCInfo>, public IDataDistributionTeam {
vector<Reference<IDataDistributionTeam>> teams;
vector<UID> tempServerIDs;
std::vector<Reference<IDataDistributionTeam>> teams;
std::vector<UID> tempServerIDs;
int64_t sum(std::function<int64_t(IDataDistributionTeam const&)> func) const {
int64_t result = 0;
@ -95,11 +96,11 @@ class ParallelTCInfo final : public ReferenceCounted<ParallelTCInfo>, public IDa
}
template <class T>
vector<T> collect(std::function<vector<T>(IDataDistributionTeam const&)> func) const {
vector<T> result;
std::vector<T> collect(std::function<vector<T>(IDataDistributionTeam const&)> func) const {
std::vector<T> result;
for (const auto& team : teams) {
vector<T> newItems = func(*team);
std::vector<T> newItems = func(*team);
result.insert(result.end(), newItems.begin(), newItems.end());
}
return result;
@ -125,7 +126,7 @@ public:
return !any([func](IDataDistributionTeam const& team) { return !func(team); });
}
vector<StorageServerInterface> getLastKnownServerInterfaces() const override {
std::vector<StorageServerInterface> getLastKnownServerInterfaces() const override {
return collect<StorageServerInterface>(
[](IDataDistributionTeam const& team) { return team.getLastKnownServerInterfaces(); });
}
@ -138,11 +139,11 @@ public:
return totalSize;
}
vector<UID> const& getServerIDs() const override {
std::vector<UID> const& getServerIDs() const override {
static vector<UID> tempServerIDs;
tempServerIDs.clear();
for (const auto& team : teams) {
vector<UID> const &childIDs = team->getServerIDs();
std::vector<UID> const& childIDs = team->getServerIDs();
tempServerIDs.insert(tempServerIDs.end(), childIDs.begin(), childIDs.end());
}
return tempServerIDs;
@ -185,7 +186,7 @@ public:
}
Future<Void> updateStorageMetrics() override {
vector<Future<Void>> futures;
std::vector<Future<Void>> futures;
for (auto& team : teams) {
futures.push_back(team->updateStorageMetrics());
@ -248,7 +249,7 @@ public:
};
struct Busyness {
vector<int> ledger;
std::vector<int> ledger;
Busyness() : ledger( 10, 0 ) {}
@ -554,8 +555,8 @@ struct DDQueueData {
if(keyServersEntries.size() < SERVER_KNOBS->DD_QUEUE_MAX_KEY_SERVERS) {
for( int shard = 0; shard < keyServersEntries.size(); shard++ ) {
vector<UID> src, dest;
decodeKeyServersValue( UIDtoTagMap, keyServersEntries[shard].value, src, dest );
std::vector<UID> src, dest;
decodeKeyServersValue(UIDtoTagMap, keyServersEntries[shard].value, src, dest);
ASSERT( src.size() );
for( int i = 0; i < src.size(); i++ ) {
servers.insert( src[i] );
@ -859,7 +860,7 @@ struct DDQueueData {
startedHere++;
// update both inFlightActors and inFlight key range maps, cancelling deleted RelocateShards
vector<KeyRange> ranges;
std::vector<KeyRange> ranges;
inFlightActors.getRangesAffectedByInsertion( rd.keys, ranges );
inFlightActors.cancel( KeyRangeRef( ranges.front().begin, ranges.back().end ) );
inFlight.insert( rd.keys, rd );
@ -1440,7 +1441,7 @@ ACTOR Future<Void> dataDistributionQueue(Database cx, PromiseStream<RelocateShar
state RelocateData launchData;
state Future<Void> recordMetrics = delay(SERVER_KNOBS->DD_QUEUE_LOGGING_INTERVAL);
state vector<Future<Void>> balancingFutures;
state std::vector<Future<Void>> balancingFutures;
state ActorCollectionNoErrors actors;
state PromiseStream<KeyRange> rangesComplete;

View File

@ -91,14 +91,43 @@ struct DataDistributionTracker {
// Read hot detection
PromiseStream<KeyRange> readHotShard;
// The reference to trackerCancelled must be extracted by actors,
// because by the time (trackerCancelled == true) this memory cannot
// be accessed
bool const& trackerCancelled;
// This class extracts the trackerCancelled reference from a DataDistributionTracker object
// Because some actors spawned by the dataDistributionTracker outlive the DataDistributionTracker
// object, we must guard against memory errors by using a GetTracker functor to access
// the DataDistributionTracker object.
class SafeAccessor {
bool const& trackerCancelled;
DataDistributionTracker& tracker;
public:
SafeAccessor(DataDistributionTracker* tracker)
: trackerCancelled(tracker->trackerCancelled), tracker(*tracker) {
ASSERT(!trackerCancelled);
}
DataDistributionTracker* operator()() {
if (trackerCancelled) {
TEST(true); // Trying to access DataDistributionTracker after tracker has been cancelled
throw dd_tracker_cancelled();
}
return &tracker;
}
};
DataDistributionTracker(Database cx, UID distributorId, Promise<Void> const& readyToStart,
PromiseStream<RelocateShard> const& output,
Reference<ShardsAffectedByTeamFailure> shardsAffectedByTeamFailure,
Reference<AsyncVar<bool>> anyZeroHealthyTeams, KeyRangeMap<ShardTrackedData>& shards)
Reference<AsyncVar<bool>> anyZeroHealthyTeams, KeyRangeMap<ShardTrackedData>& shards,
bool const& trackerCancelled)
: cx(cx), distributorId(distributorId), dbSizeEstimate(new AsyncVar<int64_t>()), systemSizeEstimate(0),
maxShardSize(new AsyncVar<Optional<int64_t>>()), sizeChanges(false), readyToStart(readyToStart), output(output),
shardsAffectedByTeamFailure(shardsAffectedByTeamFailure), anyZeroHealthyTeams(anyZeroHealthyTeams),
shards(shards) {}
shards(shards), trackerCancelled(trackerCancelled) {}
~DataDistributionTracker()
{
@ -150,7 +179,7 @@ int64_t getMaxShardSize( double dbSizeEstimate ) {
(int64_t)SERVER_KNOBS->MAX_SHARD_BYTES);
}
ACTOR Future<Void> trackShardMetrics(DataDistributionTracker* self, KeyRange keys,
ACTOR Future<Void> trackShardMetrics(DataDistributionTracker::SafeAccessor self, KeyRange keys,
Reference<AsyncVar<Optional<ShardMetrics>>> shardMetrics) {
state BandwidthStatus bandwidthStatus = shardMetrics->get().present() ? getBandwidthStatus( shardMetrics->get().get().metrics ) : BandwidthStatusNormal;
state double lastLowBandwidthStartTime = shardMetrics->get().present() ? shardMetrics->get().get().lastLowBandwidthStartTime : now();
@ -209,7 +238,7 @@ ACTOR Future<Void> trackShardMetrics(DataDistributionTracker* self, KeyRange key
// TraceEvent("RHDTriggerReadHotLoggingForShard")
// .detail("ShardBegin", keys.begin.printable().c_str())
// .detail("ShardEnd", keys.end.printable().c_str());
self->readHotShard.send(keys);
self()->readHotShard.send(keys);
} else {
ASSERT(false);
}
@ -230,7 +259,8 @@ ACTOR Future<Void> trackShardMetrics(DataDistributionTracker* self, KeyRange key
bounds.permittedError.iosPerKSecond = bounds.permittedError.infinity;
loop {
Transaction tr(self->cx);
Transaction tr(self()->cx);
// metrics.second is the number of key-ranges (i.e., shards) in the 'keys' key-range
std::pair<Optional<StorageMetrics>, int> metrics = wait( tr.waitStorageMetrics( keys, bounds.min, bounds.max, bounds.permittedError, CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT, shardCount ) );
if(metrics.first.present()) {
BandwidthStatus newBandwidthStatus = getBandwidthStatus( metrics.first.get() );
@ -253,9 +283,11 @@ ACTOR Future<Void> trackShardMetrics(DataDistributionTracker* self, KeyRange key
.detail("TrackerID", trackerID);*/
if( shardMetrics->get().present() ) {
self->dbSizeEstimate->set( self->dbSizeEstimate->get() + metrics.first.get().bytes - shardMetrics->get().get().metrics.bytes );
self()->dbSizeEstimate->set(self()->dbSizeEstimate->get() + metrics.first.get().bytes -
shardMetrics->get().get().metrics.bytes);
if(keys.begin >= systemKeys.begin) {
self->systemSizeEstimate += metrics.first.get().bytes - shardMetrics->get().get().metrics.bytes;
self()->systemSizeEstimate +=
metrics.first.get().bytes - shardMetrics->get().get().metrics.bytes;
}
}
@ -272,8 +304,9 @@ ACTOR Future<Void> trackShardMetrics(DataDistributionTracker* self, KeyRange key
}
}
} catch( Error &e ) {
if (e.code() != error_code_actor_cancelled)
self->output.sendError(e); // Propagate failure to dataDistributionTracker
if (e.code() != error_code_actor_cancelled && e.code() != error_code_dd_tracker_cancelled) {
self()->output.sendError(e); // Propagate failure to dataDistributionTracker
}
throw e;
}
}
@ -382,16 +415,19 @@ ACTOR Future<Void> changeSizes( DataDistributionTracker* self, KeyRange keys, in
}
struct HasBeenTrueFor : ReferenceCounted<HasBeenTrueFor> {
explicit HasBeenTrueFor( Optional<ShardMetrics> value ) {
explicit HasBeenTrueFor(const Optional<ShardMetrics>& value) {
if(value.present()) {
trigger = delayJittered(std::max(0.0, SERVER_KNOBS->DD_MERGE_COALESCE_DELAY + value.get().lastLowBandwidthStartTime - now()), TaskPriority::DataDistributionLow ) || cleared.getFuture();
}
}
Future<Void> set() {
Future<Void> set(double lastLowBandwidthStartTime) {
if( !trigger.isValid() ) {
cleared = Promise<Void>();
trigger = delayJittered( SERVER_KNOBS->DD_MERGE_COALESCE_DELAY, TaskPriority::DataDistributionLow ) || cleared.getFuture();
trigger =
delayJittered(SERVER_KNOBS->DD_MERGE_COALESCE_DELAY + std::max(lastLowBandwidthStartTime - now(), 0.0),
TaskPriority::DataDistributionLow) ||
cleared.getFuture();
}
return trigger;
}
@ -558,6 +594,8 @@ Future<Void> shardMerger(
shardsMerged++;
auto shardBounds = getShardSizeBounds( merged, maxShardSize );
// If we just recently get the current shard's metrics (i.e., less than DD_LOW_BANDWIDTH_DELAY ago), it means
// the shard's metric may not be stable yet. So we cannot continue merging in this direction.
if( endingStats.bytes >= shardBounds.min.bytes ||
getBandwidthStatus( endingStats ) != BandwidthStatusLow ||
now() - lastLowBandwidthStartTime < SERVER_KNOBS->DD_LOW_BANDWIDTH_DELAY ||
@ -588,13 +626,21 @@ Future<Void> shardMerger(
//restarting shard tracker will derefenced values in the shard map, so make a copy
KeyRange mergeRange = merged;
// OldKeys: Shards in the key range are merged as one shard defined by NewKeys;
// NewKeys: New key range after shards are merged;
// EndingSize: The new merged shard size in bytes;
// BatchedMerges: The number of shards merged. Each shard is defined in self->shards;
// LastLowBandwidthStartTime: When does a shard's bandwidth status becomes BandwidthStatusLow. If a shard's status
// becomes BandwidthStatusLow less than DD_LOW_BANDWIDTH_DELAY ago, the merging logic will stop at the shard;
// ShardCount: The number of non-splittable shards that are merged. Each shard is defined in self->shards may have
// more than 1 shards.
TraceEvent("RelocateShardMergeMetrics", self->distributorId)
.detail("OldKeys", keys)
.detail("NewKeys", mergeRange)
.detail("EndingSize", endingStats.bytes)
.detail("BatchedMerges", shardsMerged)
.detail("LastLowBandwidthStartTime", lastLowBandwidthStartTime)
.detail("ShardCount", shardCount);
.detail("OldKeys", keys)
.detail("NewKeys", mergeRange)
.detail("EndingSize", endingStats.bytes)
.detail("BatchedMerges", shardsMerged)
.detail("LastLowBandwidthStartTime", lastLowBandwidthStartTime)
.detail("ShardCount", shardCount);
if(mergeRange.begin < systemKeys.begin) {
self->systemSizeEstimate -= systemBytes;
@ -629,7 +675,7 @@ ACTOR Future<Void> shardEvaluator(
// Every invocation must set this or clear it
if(shouldMerge && !self->anyZeroHealthyTeams->get()) {
auto whenLongEnough = wantsToMerge->set();
auto whenLongEnough = wantsToMerge->set(shardSize->get().get().lastLowBandwidthStartTime);
if( !wantsToMerge->hasBeenTrueForLongEnough() ) {
onChange = onChange || whenLongEnough;
}
@ -664,18 +710,14 @@ ACTOR Future<Void> shardEvaluator(
return Void();
}
ACTOR Future<Void> shardTracker(
DataDistributionTracker* self,
KeyRange keys,
Reference<AsyncVar<Optional<ShardMetrics>>> shardSize)
{
wait( yieldedFuture(self->readyToStart.getFuture()) );
ACTOR Future<Void> shardTracker(DataDistributionTracker::SafeAccessor self, KeyRange keys,
Reference<AsyncVar<Optional<ShardMetrics>>> shardSize) {
wait(yieldedFuture(self()->readyToStart.getFuture()));
if( !shardSize->get().present() )
wait( shardSize->onChange() );
if( !self->maxShardSize->get().present() )
wait( yieldedFuture(self->maxShardSize->onChange()) );
if (!self()->maxShardSize->get().present()) wait(yieldedFuture(self()->maxShardSize->onChange()));
// Since maxShardSize will become present for all shards at once, avoid slow tasks with a short delay
wait( delay( 0, TaskPriority::DataDistribution ) );
@ -683,27 +725,26 @@ ACTOR Future<Void> shardTracker(
// Survives multiple calls to shardEvaluator and keeps merges from happening too quickly.
state Reference<HasBeenTrueFor> wantsToMerge( new HasBeenTrueFor( shardSize->get() ) );
/*TraceEvent("ShardTracker", self->distributorId)
.detail("Begin", keys.begin)
.detail("End", keys.end)
.detail("TrackerID", trackerID)
.detail("MaxBytes", self->maxShardSize->get().get())
.detail("ShardSize", shardSize->get().get().bytes)
.detail("BytesPerKSec", shardSize->get().get().bytesPerKSecond);*/
/*TraceEvent("ShardTracker", self()->distributorId)
.detail("Begin", keys.begin)
.detail("End", keys.end)
.detail("TrackerID", trackerID)
.detail("MaxBytes", self()->maxShardSize->get().get())
.detail("ShardSize", shardSize->get().get().bytes)
.detail("BytesPerKSec", shardSize->get().get().bytesPerKSecond);*/
try {
loop {
// Use the current known size to check for (and start) splits and merges.
wait( shardEvaluator( self, keys, shardSize, wantsToMerge ) );
wait(shardEvaluator(self(), keys, shardSize, wantsToMerge));
// We could have a lot of actors being released from the previous wait at the same time. Immediately calling
// delay(0) mitigates the resulting SlowTask
wait( delay(0, TaskPriority::DataDistribution) );
}
} catch (Error& e) {
// If e is broken_promise then self may have already been deleted
if (e.code() != error_code_actor_cancelled && e.code() != error_code_broken_promise) {
self->output.sendError(e); // Propagate failure to dataDistributionTracker
if (e.code() != error_code_actor_cancelled && e.code() != error_code_dd_tracker_cancelled) {
self()->output.sendError(e); // Propagate failure to dataDistributionTracker
}
throw e;
}
@ -735,8 +776,8 @@ void restartShardTrackers(DataDistributionTracker* self, KeyRangeRef keys, Optio
ShardTrackedData data;
data.stats = shardMetrics;
data.trackShard = shardTracker(self, ranges[i], shardMetrics);
data.trackBytes = trackShardMetrics(self, ranges[i], shardMetrics);
data.trackShard = shardTracker(DataDistributionTracker::SafeAccessor(self), ranges[i], shardMetrics);
data.trackBytes = trackShardMetrics(DataDistributionTracker::SafeAccessor(self), ranges[i], shardMetrics);
self->shards.insert( ranges[i], data );
}
}
@ -859,9 +900,10 @@ ACTOR Future<Void> dataDistributionTracker(Reference<InitialDataDistribution> in
PromiseStream<GetMetricsListRequest> getShardMetricsList,
FutureStream<Promise<int64_t>> getAverageShardBytes,
Promise<Void> readyToStart, Reference<AsyncVar<bool>> anyZeroHealthyTeams,
UID distributorId, KeyRangeMap<ShardTrackedData>* shards) {
UID distributorId, KeyRangeMap<ShardTrackedData>* shards,
bool const* trackerCancelled) {
state DataDistributionTracker self(cx, distributorId, readyToStart, output, shardsAffectedByTeamFailure,
anyZeroHealthyTeams, *shards);
anyZeroHealthyTeams, *shards, *trackerCancelled);
state Future<Void> loggingTrigger = Void();
state Future<Void> readHotDetect = readHotDetector(&self);
try {

View File

@ -27,6 +27,9 @@ rocksdb::ColumnFamilyOptions getCFOptions() {
rocksdb::ColumnFamilyOptions options;
options.level_compaction_dynamic_level_bytes = true;
options.OptimizeLevelStyleCompaction(SERVER_KNOBS->ROCKSDB_MEMTABLE_BYTES);
if (SERVER_KNOBS->ROCKSDB_PERIODIC_COMPACTION_SECONDS > 0) {
options.periodic_compaction_seconds = SERVER_KNOBS->ROCKSDB_PERIODIC_COMPACTION_SECONDS;
}
// Compact sstables when there's too much deleted stuff.
options.table_properties_collector_factories = { rocksdb::NewCompactOnDeletionCollectorFactory(128, 1) };
return options;
@ -52,7 +55,7 @@ struct RocksDBKeyValueStore : IKeyValueStore {
explicit Writer(DB& db, UID id) : db(db), id(id) {}
~Writer() {
~Writer() override {
if (db) {
delete db;
}
@ -83,24 +86,49 @@ struct RocksDBKeyValueStore : IKeyValueStore {
TraceEvent(SevError, "RocksDBError").detail("Error", status.ToString()).detail("Method", "Open");
a.done.sendError(statusToError(status));
} else {
TraceEvent(SevInfo, "RocksDB").detail("Path", a.path).detail("Method", "Open");
a.done.send(Void());
}
}
struct DeleteVisitor : public rocksdb::WriteBatch::Handler {
VectorRef<KeyRangeRef>& deletes;
Arena& arena;
DeleteVisitor(VectorRef<KeyRangeRef>& deletes, Arena& arena) : deletes(deletes), arena(arena) {}
rocksdb::Status DeleteRangeCF(uint32_t /*column_family_id*/, const rocksdb::Slice& begin,
const rocksdb::Slice& end) override {
KeyRangeRef kr(toStringRef(begin), toStringRef(end));
deletes.push_back_deep(arena, kr);
return rocksdb::Status::OK();
}
};
struct CommitAction : TypedAction<Writer, CommitAction> {
std::unique_ptr<rocksdb::WriteBatch> batchToCommit;
ThreadReturnPromise<Void> done;
double getTimeEstimate() const override { return SERVER_KNOBS->COMMIT_TIME_ESTIMATE; }
};
void action(CommitAction& a) {
Standalone<VectorRef<KeyRangeRef>> deletes;
DeleteVisitor dv(deletes, deletes.arena());
ASSERT(a.batchToCommit->Iterate(&dv).ok());
// If there are any range deletes, we should have added them to be deleted.
ASSERT(!deletes.empty() || !a.batchToCommit->HasDeleteRange());
rocksdb::WriteOptions options;
options.sync = true;
options.sync = !SERVER_KNOBS->ROCKSDB_UNSAFE_AUTO_FSYNC;
auto s = db->Write(options, a.batchToCommit.get());
if (!s.ok()) {
TraceEvent(SevError, "RocksDBError").detail("Error", s.ToString()).detail("Method", "Commit");
a.done.sendError(statusToError(s));
} else {
a.done.send(Void());
for (const auto& keyRange : deletes) {
auto begin = toSlice(keyRange.begin);
auto end = toSlice(keyRange.end);
ASSERT(db->SuggestCompactRange(db->DefaultColumnFamily(), &begin, &end).ok());
}
}
}
@ -112,6 +140,10 @@ struct RocksDBKeyValueStore : IKeyValueStore {
double getTimeEstimate() const override { return SERVER_KNOBS->COMMIT_TIME_ESTIMATE; }
};
void action(CloseAction& a) {
if (db == nullptr) {
a.done.send(Void());
return;
}
auto s = db->Close();
if (!s.ok()) {
TraceEvent(SevError, "RocksDBError").detail("Error", s.ToString()).detail("Method", "Close");
@ -119,8 +151,14 @@ struct RocksDBKeyValueStore : IKeyValueStore {
if (a.deleteOnClose) {
std::vector<rocksdb::ColumnFamilyDescriptor> defaultCF = { rocksdb::ColumnFamilyDescriptor{
"default", getCFOptions() } };
rocksdb::DestroyDB(a.path, getOptions(), defaultCF);
s = rocksdb::DestroyDB(a.path, getOptions(), defaultCF);
if (!s.ok()) {
TraceEvent(SevError, "RocksDBError").detail("Error", s.ToString()).detail("Method", "Destroy");
} else {
TraceEvent(SevInfo, "RocksDB").detail("Path", a.path).detail("Method", "Destroy");
}
}
TraceEvent(SevInfo, "RocksDB").detail("Path", a.path).detail("Method", "Close");
a.done.send(Void());
}
};
@ -264,7 +302,6 @@ struct RocksDBKeyValueStore : IKeyValueStore {
UID id;
Reference<IThreadPool> writeThread;
Reference<IThreadPool> readThreads;
unsigned nReaders = 16;
Promise<Void> errorPromise;
Promise<Void> closePromise;
std::unique_ptr<rocksdb::WriteBatch> writeBatch;
@ -276,7 +313,7 @@ struct RocksDBKeyValueStore : IKeyValueStore {
writeThread = createGenericThreadPool();
readThreads = createGenericThreadPool();
writeThread->addThread(new Writer(db, id));
for (unsigned i = 0; i < nReaders; ++i) {
for (unsigned i = 0; i < SERVER_KNOBS->ROCKSDB_READ_PARALLELISM; ++i) {
readThreads->addThread(new Reader(db));
}
}
@ -368,16 +405,14 @@ struct RocksDBKeyValueStore : IKeyValueStore {
}
StorageBytes getStorageBytes() const override {
uint64_t live = 0;
ASSERT(db->GetIntProperty(rocksdb::DB::Properties::kEstimateLiveDataSize, &live));
int64_t free;
int64_t total;
uint64_t sstBytes = 0;
ASSERT(db->GetIntProperty(rocksdb::DB::Properties::kTotalSstFilesSize, &sstBytes));
uint64_t memtableBytes = 0;
ASSERT(db->GetIntProperty(rocksdb::DB::Properties::kSizeAllMemTables, &memtableBytes));
g_network->getDiskBytes(path, free, total);
return StorageBytes(free, total, sstBytes + memtableBytes, free);
return StorageBytes(free, total, live, free);
}
};

View File

@ -25,6 +25,7 @@
#include "fdbserver/CoroFlow.h"
#include "fdbserver/Knobs.h"
#include "flow/Hash3.h"
#include "flow/xxhash.h"
extern "C" {
#include "fdbserver/sqlite/sqliteInt.h"
@ -94,28 +95,54 @@ struct PageChecksumCodec {
SumType *pSumInPage = (SumType *)(pData + dataLen);
if (write) {
// Always write a CRC32 checksum for new pages
pSumInPage->part1 = 0; // Indicates CRC32 is being used
pSumInPage->part2 = crc32c_append(0xfdbeefdb, static_cast<uint8_t*>(data), dataLen);
// Always write a xxHash3 checksum for new pages
// First 8 bits are set to 0 so that with high probability,
// checksums written with hashlittle2 don't require calculating
// an xxHash3 checksum on read
auto xxHash3 = XXH3_64bits(data, dataLen);
pSumInPage->part1 = static_cast<uint32_t>((xxHash3 >> 32) & 0x00ffffff);
pSumInPage->part2 = static_cast<uint32_t>(xxHash3 & 0xffffffff);
return true;
}
SumType sum;
SumType crc32Sum;
if (pSumInPage->part1 == 0) {
// part1 being 0 indicates with high probability that a CRC32 checksum
// part1 being 0 indicates with very high probability that a CRC32 checksum
// was used, so check that first. If this checksum fails, there is still
// some chance the page was written with hashlittle2, so fall back to checking
// hashlittle2
sum.part1 = 0;
sum.part2 = crc32c_append(0xfdbeefdb, static_cast<uint8_t*>(data), dataLen);
if (sum == *pSumInPage) return true;
// some chance the page was written with another checksum algorithm
crc32Sum.part1 = 0;
crc32Sum.part2 = crc32c_append(0xfdbeefdb, static_cast<uint8_t*>(data), dataLen);
if (crc32Sum == *pSumInPage) {
TEST(true); // Read CRC32 checksum
return true;
}
}
// Try xxhash3
SumType xxHash3Sum;
if ((pSumInPage->part1 >> 24) == 0) {
// The first 8 bits of part1 being 0 indicates with high probability that an
// xxHash3 checksum was used, so check that next. If this checksum fails, there is
// still some chance the page was written with hashlittle2, so fall back to checking
// hashlittle2
auto xxHash3 = XXH3_64bits(data, dataLen);
xxHash3Sum.part1 = static_cast<uint32_t>((xxHash3 >> 32) & 0x00ffffff);
xxHash3Sum.part2 = static_cast<uint32_t>(xxHash3 & 0xffffffff);
if (xxHash3Sum == *pSumInPage) {
TEST(true); // Read xxHash3 checksum
return true;
}
}
// Try hashlittle2
SumType hashLittle2Sum;
hashLittle2Sum.part1 = pageNumber; // DO NOT CHANGE
hashLittle2Sum.part2 = 0x5ca1ab1e;
hashlittle2(pData, dataLen, &hashLittle2Sum.part1, &hashLittle2Sum.part2);
if (hashLittle2Sum == *pSumInPage) return true;
if (hashLittle2Sum == *pSumInPage) {
TEST(true); // Read HashLittle2 checksum
return true;
}
if (!silent) {
TraceEvent trEvent(SevError, "SQLitePageChecksumFailure");
@ -127,7 +154,12 @@ struct PageChecksumCodec {
.detail("PageSize", pageLen)
.detail("ChecksumInPage", pSumInPage->toString())
.detail("ChecksumCalculatedHL2", hashLittle2Sum.toString());
if (pSumInPage->part1 == 0) trEvent.detail("ChecksumCalculatedCRC", sum.toString());
if (pSumInPage->part1 == 0) {
trEvent.detail("ChecksumCalculatedCRC", crc32Sum.toString());
}
if (pSumInPage->part1 >> 24 == 0) {
trEvent.detail("ChecksumCalculatedXXHash3", xxHash3Sum.toString());
}
}
return false;
}

View File

@ -98,9 +98,10 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
init( PEEK_STATS_SLOW_RATIO, 0.5 );
init( PUSH_RESET_INTERVAL, 300.0 ); if ( randomize && BUGGIFY ) PUSH_RESET_INTERVAL = 20.0;
init( PUSH_MAX_LATENCY, 0.5 ); if ( randomize && BUGGIFY ) PUSH_MAX_LATENCY = 0.0;
init( PUSH_STATS_INTERVAL, 10.0 );
init( PUSH_STATS_INTERVAL, 10.0 );
init( PUSH_STATS_SLOW_AMOUNT, 2 );
init( PUSH_STATS_SLOW_RATIO, 0.5 );
init( TLOG_POP_BATCH_SIZE, 1000 ); if ( randomize && BUGGIFY ) TLOG_POP_BATCH_SIZE = 10;
// disk snapshot max timeout, to be put in TLog, storage and coordinator nodes
init( SNAP_CREATE_MAX_TIMEOUT, 300.0 );
@ -318,7 +319,10 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
// KeyValueStoreRocksDB
init( ROCKSDB_BACKGROUND_PARALLELISM, 0 );
init( ROCKSDB_READ_PARALLELISM, 4 );
init( ROCKSDB_MEMTABLE_BYTES, 512 * 1024 * 1024 );
init( ROCKSDB_UNSAFE_AUTO_FSYNC, false );
init( ROCKSDB_PERIODIC_COMPACTION_SECONDS, 0 );
// Leader election
bool longLeaderElection = randomize && BUGGIFY;
@ -579,6 +583,9 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
init( TAG_MEASUREMENT_INTERVAL, 30.0 ); if( randomize && BUGGIFY ) TAG_MEASUREMENT_INTERVAL = 1.0;
init( READ_COST_BYTE_FACTOR, 16384 ); if( randomize && BUGGIFY ) READ_COST_BYTE_FACTOR = 4096;
init( PREFIX_COMPRESS_KVS_MEM_SNAPSHOTS, true ); if( randomize && BUGGIFY ) PREFIX_COMPRESS_KVS_MEM_SNAPSHOTS = false;
init( REPORT_DD_METRICS, true );
init( DD_METRICS_REPORT_INTERVAL, 30.0 );
init( FETCH_KEYS_TOO_LONG_TIME_CRITERIA, 300.0 );
//Wait Failure
init( MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS, 250 ); if( randomize && BUGGIFY ) MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS = 2;
@ -611,6 +618,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
init( MAX_STATUS_REQUESTS_PER_SECOND, 256.0 );
init( CONFIGURATION_ROWS_TO_FETCH, 20000 );
init( DISABLE_DUPLICATE_LOG_WARNING, false );
init( HISTOGRAM_REPORT_INTERVAL, 300.0 );
// IPager
init( PAGER_RESERVED_PAGES, 1 );

View File

@ -99,6 +99,7 @@ public:
double PUSH_STATS_INTERVAL;
double PUSH_STATS_SLOW_AMOUNT;
double PUSH_STATS_SLOW_RATIO;
int TLOG_POP_BATCH_SIZE;
// Data distribution queue
double HEALTH_POLL_TIME;
@ -253,7 +254,10 @@ public:
// KeyValueStoreRocksDB
int ROCKSDB_BACKGROUND_PARALLELISM;
int ROCKSDB_READ_PARALLELISM;
int64_t ROCKSDB_MEMTABLE_BYTES;
bool ROCKSDB_UNSAFE_AUTO_FSYNC;
int64_t ROCKSDB_PERIODIC_COMPACTION_SECONDS;
// Leader election
int MAX_NOTIFICATIONS;
@ -508,6 +512,9 @@ public:
double TAG_MEASUREMENT_INTERVAL;
int64_t READ_COST_BYTE_FACTOR;
bool PREFIX_COMPRESS_KVS_MEM_SNAPSHOTS;
bool REPORT_DD_METRICS;
double DD_METRICS_REPORT_INTERVAL;
double FETCH_KEYS_TOO_LONG_TIME_CRITERIA;
//Wait Failure
int MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS;
@ -540,6 +547,7 @@ public:
double MAX_STATUS_REQUESTS_PER_SECOND;
int CONFIGURATION_ROWS_TO_FETCH;
bool DISABLE_DUPLICATE_LOG_WARNING;
double HISTOGRAM_REPORT_INTERVAL;
// IPager
int PAGER_RESERVED_PAGES;

View File

@ -80,20 +80,23 @@ struct LogRouterData {
const UID dbgid;
Reference<AsyncVar<Reference<ILogSystem>>> logSystem;
Optional<UID> primaryPeekLocation;
NotifiedVersion version;
NotifiedVersion minPopped;
NotifiedVersion version; // The largest version at which the log router has peeked mutations
// from satellite tLog or primary tLogs.
NotifiedVersion minPopped; // The minimum version among all tags that has been popped by remote tLogs.
const Version startVersion;
Version minKnownCommittedVersion;
Version minKnownCommittedVersion; // The minimum durable version among all LRs.
// A LR's durable version is the maximum version of mutations that have been
// popped by remote tLog.
Version poppedVersion;
Deque<std::pair<Version, Standalone<VectorRef<uint8_t>>>> messageBlocks;
Tag routerTag;
bool allowPops;
LogSet logSet;
bool foundEpochEnd;
double waitForVersionTime = 0;
double maxWaitForVersionTime = 0;
double getMoreTime = 0;
double maxGetMoreTime = 0;
bool foundEpochEnd; // Cluster is not fully recovered yet. LR has to handle recovery
double waitForVersionTime = 0; // The total amount of time LR waits for remote tLog to peek and pop its data.
double maxWaitForVersionTime = 0; // The max one-instance wait time when LR must wait for remote tLog to pop data.
double getMoreTime = 0; // The total amount of time LR waits for satellite tLog's data to become available.
double maxGetMoreTime = 0; // The max wait time LR spent in a pull-data-request to satellite tLog.
int64_t generation = -1;
Reference<Histogram> peekLatencyDist;
@ -105,7 +108,9 @@ struct LogRouterData {
std::map<UID, PeekTrackerData> peekTracker;
CounterCollection cc;
Counter getMoreCount, getMoreBlockedCount;
Counter getMoreCount; // Increase by 1 when LR tries to pull data from satellite tLog.
Counter
getMoreBlockedCount; // Increase by 1 if data is not available when LR tries to pull data from satellite tLog.
Future<Void> logger;
Reference<EventCacheHolder> eventCacheHolder;
@ -150,8 +155,10 @@ struct LogRouterData {
eventCacheHolder = makeReference<EventCacheHolder>(dbgid.shortString() + ".PeekLocation");
specialCounter(cc, "Version", [this](){ return this->version.get(); });
// FetchedVersions: How many version of mutations buffered at LR and have not been popped by remote tLogs
specialCounter(cc, "Version", [this]() { return this->version.get(); });
specialCounter(cc, "MinPopped", [this](){ return this->minPopped.get(); });
// TODO: Add minPopped locality and minPoppedId, similar as tLog Metrics
specialCounter(cc, "FetchedVersions", [this](){ return std::max<Version>(0, std::min<Version>(SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS, this->version.get() - this->minPopped.get())); });
specialCounter(cc, "MinKnownCommittedVersion", [this](){ return this->minKnownCommittedVersion; });
specialCounter(cc, "PoppedVersion", [this](){ return this->poppedVersion; });
@ -224,8 +231,15 @@ ACTOR Future<Void> waitForVersion( LogRouterData *self, Version ver ) {
// Since one set of log routers is created per generation of transaction logs, the gap caused by epoch end will be within MAX_VERSIONS_IN_FLIGHT of the log routers start version.
state double startTime = now();
if(self->version.get() < self->startVersion) {
// Log router needs to wait for remote tLogs to process data, whose version is less than self->startVersion,
// before the log router can pull more data (i.e., data after self->startVersion) from satellite tLog;
// This prevents LR from getting OOM due to it pulls too much data from satellite tLog at once;
// Note: each commit writes data to both primary tLog and satellite tLog. Satellite tLog can be viewed as
// a part of primary tLogs.
if(ver > self->startVersion) {
self->version.set(self->startVersion);
// Wait for remote tLog to peek and pop from LR,
// so that LR's minPopped version can increase to self->startVersion
wait(self->minPopped.whenAtLeast(self->version.get()));
}
self->waitForVersionTime += now() - startTime;
@ -233,6 +247,9 @@ ACTOR Future<Void> waitForVersion( LogRouterData *self, Version ver ) {
return Void();
}
if(!self->foundEpochEnd) {
// Similar to proxy that does not keep more than MAX_READ_TRANSACTION_LIFE_VERSIONS transactions oustanding;
// Log router does not keep more than MAX_READ_TRANSACTION_LIFE_VERSIONS transactions outstanding because
// remote SS cannot roll back to more than MAX_READ_TRANSACTION_LIFE_VERSIONS ago.
wait(self->minPopped.whenAtLeast(std::min(self->version.get(), ver - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS)));
} else {
while(self->minPopped.get() + SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS < ver) {
@ -252,6 +269,7 @@ ACTOR Future<Void> waitForVersion( LogRouterData *self, Version ver ) {
return Void();
}
// Log router pull data from satellite tLog
ACTOR Future<Void> pullAsyncData( LogRouterData *self ) {
state Future<Void> dbInfoChange = Void();
state Reference<ILogSystem::IPeekCursor> r;
@ -583,6 +601,7 @@ ACTOR Future<Void> logRouterCore(
addActor.send( logRouterPeekMessages( &logRouterData, req ) );
}
when( TLogPopRequest req = waitNext( interf.popMessages.getFuture() ) ) {
// Request from remote tLog to pop data from LR
addActor.send( logRouterPop( &logRouterData, req ) );
}
when (wait(error)) {}

View File

@ -849,7 +849,7 @@ struct LogPushData : NonCopyable {
for(auto& log : logSystem->getLogSystemConfig().tLogs) {
if(log.isLocal) {
for(int i = 0; i < log.tLogs.size(); i++) {
messagesWriter.push_back( BinaryWriter( AssumeVersion(currentProtocolVersion) ) );
messagesWriter.push_back( BinaryWriter( AssumeVersion(g_network->protocolVersion()) ) );
}
}
}
@ -916,7 +916,7 @@ struct LogPushData : NonCopyable {
msg_locations.clear();
logSystem->getPushLocations(prev_tags, msg_locations, allLocations);
BinaryWriter bw(AssumeVersion(currentProtocolVersion));
BinaryWriter bw(AssumeVersion(g_network->protocolVersion()));
// Metadata messages (currently LogProtocolMessage is the only metadata
// message) should be written before span information. If this isn't a

View File

@ -203,7 +203,6 @@ enum class LogSystemType {
empty = 0, // Never used.
tagPartitioned = 2,
};
BINARY_SERIALIZABLE(LogSystemType);
struct LogSystemConfig {
constexpr static FileIdentifier file_identifier = 16360847;

File diff suppressed because it is too large Load Diff

View File

@ -342,7 +342,7 @@ ACTOR Future<Void> updateMetricRegistration(Database cx, MetricsConfig *config,
loop {
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
try {
Value timestamp = BinaryWriter::toValue(CompressedInt<int64_t>(now()), AssumeVersion(currentProtocolVersion));
Value timestamp = BinaryWriter::toValue(CompressedInt<int64_t>(now()), AssumeVersion(g_network->protocolVersion()));
for(auto &key : keys) {
//fprintf(stderr, "%s: register: %s\n", collection->address.toString().c_str(), printable(key).c_str());
tr.set(key, timestamp);

View File

@ -51,7 +51,7 @@ TraceEvent debugKeyRangeEnabled( const char* context, Version version, KeyRangeR
}
TraceEvent debugTagsAndMessageEnabled( const char* context, Version version, StringRef commitBlob ) {
BinaryReader rdr(commitBlob, AssumeVersion(currentProtocolVersion));
BinaryReader rdr(commitBlob, AssumeVersion(g_network->protocolVersion()));
while (!rdr.empty()) {
if (*(int32_t*)rdr.peekBytes(4) == VERSION_HEADER) {
int32_t dummy;

View File

@ -1371,7 +1371,7 @@ void peekMessagesFromMemory( Reference<LogData> self, TLogPeekRequest const& req
ACTOR Future<std::vector<StringRef>> parseMessagesForTag( StringRef commitBlob, Tag tag, int logRouters ) {
// See the comment in LogSystem.cpp for the binary format of commitBlob.
state std::vector<StringRef> relevantMessages;
state BinaryReader rd(commitBlob, AssumeVersion(currentProtocolVersion));
state BinaryReader rd(commitBlob, AssumeVersion(g_network->protocolVersion()));
while (!rd.empty()) {
TagsAndMessage tagsAndMessage;
tagsAndMessage.loadFromArena(&rd, nullptr);
@ -2753,7 +2753,7 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
bool recovering = (req.recoverFrom.logSystemType == LogSystemType::tagPartitioned);
state Reference<LogData> logData = Reference<LogData>( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, currentProtocolVersion, req.allTags, recovering ? "Recovered" : "Recruited") );
state Reference<LogData> logData = Reference<LogData>( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, g_network->protocolVersion(), req.allTags, recovering ? "Recovered" : "Recruited") );
self->id_data[recruited.id()] = logData;
logData->locality = req.locality;
logData->recoveryCount = req.epoch;

View File

@ -28,7 +28,6 @@
// is slightly more detailed and is used by the status infrastructure. But I'm scared to make changes to the former so close to 1.0 release, so I'm making the latter.
enum class RecoveryState { UNINITIALIZED = 0, READING_CSTATE = 1, LOCKING_CSTATE = 2, RECRUITING = 3, RECOVERY_TRANSACTION = 4, WRITING_CSTATE = 5, ACCEPTING_COMMITS = 6, ALL_LOGS_RECRUITED = 7, STORAGE_RECOVERED = 8, FULLY_RECOVERED = 9 };
BINARY_SERIALIZABLE( RecoveryState );
namespace RecoveryStatus {
enum RecoveryStatus {

View File

@ -397,7 +397,7 @@ ACTOR static Future<Void> _parsePartitionedLogFileOnLoader(
// only one clear mutation is generated (i.e., always inserted).
ASSERT(inserted);
ArenaReader rd(buf.arena(), StringRef(message, msgSize), AssumeVersion(currentProtocolVersion));
ArenaReader rd(buf.arena(), StringRef(message, msgSize), AssumeVersion(g_network->protocolVersion()));
MutationRef mutation;
rd >> mutation;

View File

@ -80,7 +80,6 @@ using VersionedMutationsVec = Standalone<VectorRef<VersionedMutation>>;
using SampledMutationsVec = Standalone<VectorRef<SampledMutation>>;
enum class RestoreRole { Invalid = 0, Controller = 1, Loader, Applier };
BINARY_SERIALIZABLE(RestoreRole);
std::string getRoleStr(RestoreRole role);
extern const std::vector<std::string> RestoreRoleStr;
extern int numRoles;
@ -130,4 +129,4 @@ struct RestoreSimpleRequest : TimedRequest {
bool isRangeMutation(MutationRef m);
#endif // FDBSERVER_RESTOREUTIL_H
#endif // FDBSERVER_RESTOREUTIL_H

View File

@ -18,8 +18,10 @@
* limitations under the License.
*/
#include <cstdint>
#include <fstream>
#include <ostream>
#include "fdbrpc/Locality.h"
#include "fdbrpc/simulator.h"
#include "fdbclient/DatabaseContext.h"
#include "fdbserver/TesterInterface.actor.h"
@ -33,7 +35,9 @@
#include "fdbclient/NativeAPI.actor.h"
#include "fdbclient/BackupAgent.actor.h"
#include "fdbclient/versions.h"
#include "flow/ProtocolVersion.h"
#include "flow/actorcompiler.h" // This must be the last #include.
#include "flow/network.h"
#undef max
#undef min
@ -47,9 +51,9 @@ bool destructed = false;
template <class T>
T simulate( const T& in ) {
BinaryWriter writer(AssumeVersion(currentProtocolVersion));
BinaryWriter writer(AssumeVersion(g_network->protocolVersion()));
writer << in;
BinaryReader reader( writer.getData(), writer.getLength(), AssumeVersion(currentProtocolVersion) );
BinaryReader reader( writer.getData(), writer.getLength(), AssumeVersion(g_network->protocolVersion()) );
T out;
reader >> out;
return out;
@ -137,7 +141,7 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<ClusterConnec
std::string* dataFolder, std::string* coordFolder,
std::string baseFolder, ClusterConnectionString connStr,
bool useSeedFile, AgentMode runBackupAgents,
std::string whitelistBinPaths) {
std::string whitelistBinPaths, ProtocolVersion protocolVersion) {
state ISimulator::ProcessInfo *simProcess = g_simulator.getCurrentProcess();
state UID randomId = nondeterministicRandom()->randomUniqueID();
state int cycles = 0;
@ -154,7 +158,7 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<ClusterConnec
state ISimulator::ProcessInfo* process =
g_simulator.newProcess("Server", ip, port, sslEnabled, listenPerProcess, localities, processClass, dataFolder->c_str(),
coordFolder->c_str());
coordFolder->c_str(), protocolVersion);
wait(g_simulator.onProcess(process,
TaskPriority::DefaultYield)); // Now switch execution to the process on which we will run
state Future<ISimulator::KillType> onShutdown = process->onShutdown();
@ -298,7 +302,7 @@ std::map< Optional<Standalone<StringRef>>, std::vector< std::vector< std::string
// process count is no longer needed because it is now the length of the vector of ip's, because it was one ip per process
ACTOR Future<Void> simulatedMachine(ClusterConnectionString connStr, std::vector<IPAddress> ips, bool sslEnabled, LocalityData localities,
ProcessClass processClass, std::string baseFolder, bool restarting,
bool useSeedFile, AgentMode runBackupAgents, bool sslOnly, std::string whitelistBinPaths) {
bool useSeedFile, AgentMode runBackupAgents, bool sslOnly, std::string whitelistBinPaths, ProtocolVersion protocolVersion) {
state int bootCount = 0;
state std::vector<std::string> myFolders;
state std::vector<std::string> coordFolders;
@ -341,7 +345,13 @@ ACTOR Future<Void> simulatedMachine(ClusterConnectionString connStr, std::vector
Reference<ClusterConnectionFile> clusterFile(useSeedFile ? new ClusterConnectionFile(path, connStr.toString()) : new ClusterConnectionFile(path));
const int listenPort = i*listenPerProcess + 1;
AgentMode agentMode = runBackupAgents == AgentOnly ? ( i == ips.size()-1 ? AgentOnly : AgentNone ) : runBackupAgents;
processes.push_back(simulatedFDBDRebooter(clusterFile, ips[i], sslEnabled, listenPort, listenPerProcess, localities, processClass, &myFolders[i], &coordFolders[i], baseFolder, connStr, useSeedFile, agentMode, whitelistBinPaths));
if(g_simulator.hasDiffProtocolProcess && !g_simulator.setDiffProtocol && agentMode == AgentNone) {
processes.push_back(simulatedFDBDRebooter(clusterFile, ips[i], sslEnabled, listenPort, listenPerProcess, localities, processClass, &myFolders[i], &coordFolders[i], baseFolder, connStr, useSeedFile, agentMode, whitelistBinPaths, protocolVersion));
g_simulator.setDiffProtocol = true;
}
else {
processes.push_back(simulatedFDBDRebooter(clusterFile, ips[i], sslEnabled, listenPort, listenPerProcess, localities, processClass, &myFolders[i], &coordFolders[i], baseFolder, connStr, useSeedFile, agentMode, whitelistBinPaths, g_network->protocolVersion()));
}
TraceEvent("SimulatedMachineProcess", randomId).detail("Address", NetworkAddress(ips[i], listenPort, true, false)).detail("ZoneId", localities.zoneId()).detail("DataHall", localities.dataHallId()).detail("Folder", myFolders[i]);
}
@ -546,7 +556,7 @@ IPAddress makeIPAddressForSim(bool isIPv6, std::array<int, 4> parts) {
ACTOR Future<Void> restartSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFolder, int* pTesterCount,
Optional<ClusterConnectionString>* pConnString,
Standalone<StringRef>* pStartingConfiguration,
int extraDB, std::string whitelistBinPaths) {
int extraDB, std::string whitelistBinPaths, ProtocolVersion protocolVersion) {
CSimpleIni ini;
ini.SetUnicode();
ini.LoadFile(joinPath(baseFolder, "restartInfo.ini").c_str());
@ -645,7 +655,7 @@ ACTOR Future<Void> restartSimulatedSystem(vector<Future<Void>>* systemActors, st
simulatedMachine(conn, ipAddrs, usingSSL, localities, processClass, baseFolder, true,
i == useSeedForMachine, AgentAddition,
usingSSL && (listenersPerProcess == 1 || processClass == ProcessClass::TesterClass),
whitelistBinPaths),
whitelistBinPaths, protocolVersion),
processClass == ProcessClass::TesterClass ? "SimulatedTesterMachine" : "SimulatedMachine"));
}
@ -1052,8 +1062,8 @@ void SimulationConfig::generateNormalConfig(int minimumReplication, int minimumR
void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFolder, int* pTesterCount,
Optional<ClusterConnectionString>* pConnString, Standalone<StringRef>* pStartingConfiguration,
int extraDB, int minimumReplication, int minimumRegions, std::string whitelistBinPaths,
bool configureLocked, int logAntiQuorum) {
int extraDB, int minimumReplication, int minimumRegions, std::string whitelistBinPaths, bool configureLocked,
int logAntiQuorum, ProtocolVersion protocolVersion) {
// SOMEDAY: this does not test multi-interface configurations
SimulationConfig simconfig(extraDB, minimumReplication, minimumRegions);
if (logAntiQuorum != -1) {
@ -1218,6 +1228,7 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
bool requiresExtraDBMachines = extraDB && g_simulator.extraDB->toString() != conn.toString();
int assignedMachines = 0, nonVersatileMachines = 0;
std::vector<ProcessClass::ClassType> processClassesSubSet = {ProcessClass::UnsetClass, ProcessClass::ResolutionClass, ProcessClass::MasterClass};
for( int dc = 0; dc < dataCenters; dc++ ) {
//FIXME: test unset dcID
Optional<Standalone<StringRef>> dcUID = StringRef(format("%d", dc));
@ -1275,7 +1286,7 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
LocalityData localities(Optional<Standalone<StringRef>>(), zoneId, machineId, dcUID);
localities.set(LiteralStringRef("data_hall"), dcUID);
systemActors->push_back(reportErrors(simulatedMachine(conn, ips, sslEnabled,
localities, processClass, baseFolder, false, machine == useSeedForMachine, requiresExtraDBMachines ? AgentOnly : AgentAddition, sslOnly, whitelistBinPaths ), "SimulatedMachine"));
localities, processClass, baseFolder, false, machine == useSeedForMachine, requiresExtraDBMachines ? AgentOnly : AgentAddition, sslOnly, whitelistBinPaths, protocolVersion ), "SimulatedMachine"));
if (requiresExtraDBMachines) {
std::vector<IPAddress> extraIps;
@ -1289,7 +1300,7 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
localities.set(LiteralStringRef("data_hall"), dcUID);
systemActors->push_back(reportErrors(simulatedMachine(*g_simulator.extraDB, extraIps, sslEnabled,
localities,
processClass, baseFolder, false, machine == useSeedForMachine, AgentNone, sslOnly, whitelistBinPaths ), "SimulatedMachine"));
processClass, baseFolder, false, machine == useSeedForMachine, AgentNone, sslOnly, whitelistBinPaths, protocolVersion ), "SimulatedMachine"));
}
assignedMachines++;
@ -1313,13 +1324,18 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
std::vector<IPAddress> ips;
ips.push_back(makeIPAddressForSim(useIPv6, { 3, 4, 3, i + 1 }));
Standalone<StringRef> newZoneId = Standalone<StringRef>(deterministicRandom()->randomUniqueID().toString());
LocalityData localities(Optional<Standalone<StringRef>>(), newZoneId, newZoneId, Optional<Standalone<StringRef>>());
LocalityData localities(Optional<Standalone<StringRef>>(), newZoneId, newZoneId, Optional<Standalone<StringRef>>());
systemActors->push_back( reportErrors( simulatedMachine(
conn, ips, sslEnabled && sslOnly,
localities, ProcessClass(ProcessClass::TesterClass, ProcessClass::CommandLineSource),
baseFolder, false, i == useSeedForMachine, AgentNone, sslEnabled && sslOnly, whitelistBinPaths ),
baseFolder, false, i == useSeedForMachine, AgentNone, sslEnabled && sslOnly, whitelistBinPaths, protocolVersion ),
"SimulatedTesterMachine") );
}
if(g_simulator.setDiffProtocol) {
--(*pTesterCount);
}
*pStartingConfiguration = startingConfigString;
// save some state that we only need when restarting the simulator.
@ -1337,7 +1353,7 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
}
void checkTestConf(const char* testFile, int& extraDB, int& minimumReplication, int& minimumRegions,
int& configureLocked, int& logAntiQuorum) {
int& configureLocked, int& logAntiQuorum, bool& startIncompatibleProcess) {
std::ifstream ifs;
ifs.open(testFile, std::ifstream::in);
if (!ifs.good())
@ -1371,7 +1387,11 @@ void checkTestConf(const char* testFile, int& extraDB, int& minimumReplication,
}
if (attrib == "configureLocked") {
sscanf(value.c_str(), "%d", &configureLocked);
sscanf( value.c_str(), "%d", &configureLocked );
}
if (attrib == "startIncompatibleProcess") {
startIncompatibleProcess = strcmp(value.c_str(), "true") == 0;
}
if (attrib == "logAntiQuorum") {
sscanf(value.c_str(), "%d", &logAntiQuorum);
@ -1391,7 +1411,17 @@ ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool reboot
state int minimumRegions = 0;
state int configureLocked = 0;
state int logAntiQuorum = -1;
checkTestConf(testFile, extraDB, minimumReplication, minimumRegions, configureLocked, logAntiQuorum);
state bool startIncompatibleProcess = false;
checkTestConf(testFile, extraDB, minimumReplication, minimumRegions, configureLocked, logAntiQuorum, startIncompatibleProcess);
g_simulator.hasDiffProtocolProcess = startIncompatibleProcess;
g_simulator.setDiffProtocol = false;
state ProtocolVersion protocolVersion = currentProtocolVersion;
if(startIncompatibleProcess) {
// isolates right most 1 bit of compatibleProtocolVersionMask to make this protocolVersion incompatible
uint64_t minAddToMakeIncompatible = ProtocolVersion::compatibleProtocolVersionMask & ~(ProtocolVersion::compatibleProtocolVersionMask-1);
protocolVersion = ProtocolVersion(currentProtocolVersion.version() + minAddToMakeIncompatible);
}
// TODO (IPv6) Use IPv6?
wait(g_simulator.onProcess(
@ -1400,7 +1430,7 @@ ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool reboot
Standalone<StringRef>(deterministicRandom()->randomUniqueID().toString()),
Standalone<StringRef>(deterministicRandom()->randomUniqueID().toString()),
Optional<Standalone<StringRef>>()),
ProcessClass(ProcessClass::TesterClass, ProcessClass::CommandLineSource), "", ""),
ProcessClass(ProcessClass::TesterClass, ProcessClass::CommandLineSource), "", "", currentProtocolVersion),
TaskPriority::DefaultYield));
Sim2FileSystem::newFileSystem();
FlowTransport::createInstance(true, 1);
@ -1409,7 +1439,7 @@ ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool reboot
try {
//systemActors.push_back( startSystemMonitor(dataFolder) );
if (rebooting) {
wait( timeoutError( restartSimulatedSystem( &systemActors, dataFolder, &testerCount, &connFile, &startingConfiguration, extraDB, whitelistBinPaths), 100.0 ) );
wait( timeoutError( restartSimulatedSystem( &systemActors, dataFolder, &testerCount, &connFile, &startingConfiguration, extraDB, whitelistBinPaths, protocolVersion), 100.0 ) );
// FIXME: snapshot restore does not support multi-region restore, hence restore it as single region always
if (restoring) {
startingConfiguration = LiteralStringRef("usable_regions=1");
@ -1418,7 +1448,7 @@ ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool reboot
else {
g_expect_full_pointermap = 1;
setupSimulatedSystem(&systemActors, dataFolder, &testerCount, &connFile, &startingConfiguration, extraDB,
minimumReplication, minimumRegions, whitelistBinPaths, configureLocked, logAntiQuorum);
minimumReplication, minimumRegions, whitelistBinPaths, configureLocked, logAntiQuorum, protocolVersion);
wait( delay(1.0) ); // FIXME: WHY!!! //wait for machines to boot
}
std::string clusterFileDir = joinPath( dataFolder, deterministicRandom()->randomUniqueID().toString() );

View File

@ -2518,7 +2518,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
state JsonBuilderObject qos;
state JsonBuilderObject data_overlay;
statusObj["protocol_version"] = format("%" PRIx64, currentProtocolVersion.version());
statusObj["protocol_version"] = format("%" PRIx64, g_network->protocolVersion().version());
statusObj["connection_string"] = coordinators.ccf->getConnectionString().toString();
state Optional<DatabaseConfiguration> configuration;

View File

@ -1752,7 +1752,7 @@ ACTOR Future<Void> pullAsyncData( StorageCacheData *data ) {
//TODO cache servers should write the LogProtocolMessage when they are created
//cloneCursor1->setProtocolVersion(data->logProtocol);
cloneCursor1->setProtocolVersion(currentProtocolVersion);
cloneCursor1->setProtocolVersion(g_network->protocolVersion());
for (; cloneCursor1->hasMessage(); cloneCursor1->nextMessage()) {
ArenaReader& cloneReader = *cloneCursor1->reader();
@ -1820,7 +1820,7 @@ ACTOR Future<Void> pullAsyncData( StorageCacheData *data ) {
//FIXME: ensure this can only read data from the current version
//cloneCursor2->setProtocolVersion(data->logProtocol);
cloneCursor2->setProtocolVersion(currentProtocolVersion);
cloneCursor2->setProtocolVersion(g_network->protocolVersion());
ver = invalidVersion;
// Now process the mutations

View File

@ -27,6 +27,11 @@
#include "fdbserver/Knobs.h"
#include "flow/actorcompiler.h" // This must be the last #include.
const StringRef STORAGESERVER_HISTOGRAM_GROUP = LiteralStringRef("StorageServer");
const StringRef FETCH_KEYS_LATENCY_HISTOGRAM = LiteralStringRef("FetchKeysLatency");
const StringRef FETCH_KEYS_BYTES_HISTOGRAM = LiteralStringRef("FetchKeysSize");
const StringRef FETCH_KEYS_BYTES_PER_SECOND_HISTOGRAM = LiteralStringRef("FetchKeysBandwidth");
struct StorageMetricSample {
IndexedSet<Key, int64_t> sample;
int64_t metricUnitsPerSample;

View File

@ -44,6 +44,7 @@
#include "fdbserver/WaitFailure.h"
#include "fdbserver/RecoveryState.h"
#include "fdbserver/FDBExecHelper.actor.h"
#include "flow/Histogram.h"
#include "flow/actorcompiler.h" // This must be the last #include.
using std::pair;
@ -341,6 +342,7 @@ struct TLogData : NonCopyable {
FlowLock concurrentLogRouterReads;
FlowLock persistentDataCommitLock;
// Beginning of fields used by snapshot based backup and restore
bool ignorePopRequest; // ignore pop request from storage servers
double ignorePopDeadline; // time until which the ignorePopRequest will be
// honored
@ -352,19 +354,26 @@ struct TLogData : NonCopyable {
std::map<Tag, Version> toBePopped; // map of Tag->Version for all the pops
// that came when ignorePopRequest was set
Reference<AsyncVar<bool>> degraded;
// End of fields used by snapshot based backup and restore
std::vector<TagsAndMessage> tempTagMessages;
TLogData(UID dbgid, UID workerID, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> dbInfo, Reference<AsyncVar<bool>> degraded, std::string folder)
: dbgid(dbgid), workerID(workerID), instanceID(deterministicRandom()->randomUniqueID().first()),
persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)),
dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0), queueCommitEnd(0),
diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0), targetVolatileBytes(SERVER_KNOBS->TLOG_SPILL_THRESHOLD), overheadBytesInput(0), overheadBytesDurable(0),
peekMemoryLimiter(SERVER_KNOBS->TLOG_SPILL_REFERENCE_MAX_PEEK_MEMORY_BYTES),
concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS),
ignorePopRequest(false), ignorePopDeadline(), ignorePopUid(), dataFolder(folder), toBePopped()
{
cx = openDBOnServer(dbInfo, TaskPriority::DefaultEndpoint, true, true);
}
Reference<Histogram> commitLatencyDist;
TLogData(UID dbgid, UID workerID, IKeyValueStore* persistentData, IDiskQueue* persistentQueue,
Reference<AsyncVar<ServerDBInfo>> dbInfo, Reference<AsyncVar<bool>> degraded, std::string folder)
: dbgid(dbgid), workerID(workerID), instanceID(deterministicRandom()->randomUniqueID().first()),
persistentData(persistentData), rawPersistentQueue(persistentQueue),
persistentQueue(new TLogQueue(persistentQueue, dbgid)), dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0),
queueCommitEnd(0), diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0),
targetVolatileBytes(SERVER_KNOBS->TLOG_SPILL_THRESHOLD), overheadBytesInput(0), overheadBytesDurable(0),
peekMemoryLimiter(SERVER_KNOBS->TLOG_SPILL_REFERENCE_MAX_PEEK_MEMORY_BYTES),
concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS), ignorePopRequest(false),
ignorePopDeadline(), ignorePopUid(), dataFolder(folder), toBePopped(),
commitLatencyDist(Histogram::getHistogram(LiteralStringRef("tLog"), LiteralStringRef("commit"),
Histogram::Unit::microseconds)) {
cx = openDBOnServer(dbInfo, TaskPriority::DefaultEndpoint, true, true);
}
};
struct LogData : NonCopyable, public ReferenceCounted<LogData> {
@ -454,13 +463,19 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
bool stopped, initialized;
DBRecoveryCount recoveryCount;
VersionMetricHandle persistentDataVersion, persistentDataDurableVersion; // The last version number in the portion of the log (written|durable) to persistentData
NotifiedVersion version, queueCommittedVersion;
// If persistentDataVersion != persistentDurableDataVersion,
// then spilling is happening from persistentDurableDataVersion to persistentDataVersion.
// Data less than persistentDataDurableVersion is spilled on disk (or fully popped from the TLog);
VersionMetricHandle persistentDataVersion, persistentDataDurableVersion; // The last version number in the portion of the log (written|durable) to persistentData
NotifiedVersion version;
NotifiedVersion queueCommittedVersion; // The disk queue has committed up until the queueCommittedVersion version.
Version queueCommittingVersion;
Version knownCommittedVersion, durableKnownCommittedVersion, minKnownCommittedVersion;
Version queuePoppedVersion;
Version knownCommittedVersion; // The maximum version that a proxy has told us that is committed (all TLogs have
// ack'd a commit for this version).
Version durableKnownCommittedVersion, minKnownCommittedVersion;
Version queuePoppedVersion; // The disk queue has been popped up until the location which represents this version.
Version minPoppedTagVersion;
Tag minPoppedTag;
Tag minPoppedTag; // The tag that makes tLog hold its data and cause tLog's disk queue increasing.
Deque<std::pair<Version, Standalone<VectorRef<uint8_t>>>> messageBlocks;
std::vector<std::vector<Reference<TagData>>> tag_data; //tag.locality | tag.id
@ -503,7 +518,8 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
Version unrecoveredBefore, recoveredAt;
struct PeekTrackerData {
std::map<int, Promise<std::pair<Version, bool>>> sequence_version;
std::map<int, Promise<std::pair<Version, bool>>>
sequence_version; // second: Version is peeked begin version. bool is onlySpilled
double lastUpdate;
Tag tag;
@ -578,12 +594,15 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
queueCommittedVersion.initMetric(LiteralStringRef("TLog.QueueCommittedVersion"), cc.id);
specialCounter(cc, "Version", [this](){ return this->version.get(); });
specialCounter(cc, "QueueCommittedVersion", [this](){ return this->queueCommittedVersion.get(); });
specialCounter(cc, "QueueCommittedVersion", [this]() { return this->queueCommittedVersion.get(); });
specialCounter(cc, "PersistentDataVersion", [this](){ return this->persistentDataVersion; });
specialCounter(cc, "PersistentDataDurableVersion", [this](){ return this->persistentDataDurableVersion; });
specialCounter(cc, "KnownCommittedVersion", [this](){ return this->knownCommittedVersion; });
specialCounter(cc, "QueuePoppedVersion", [this](){ return this->queuePoppedVersion; });
specialCounter(cc, "MinPoppedTagVersion", [this](){ return this->minPoppedTagVersion; });
specialCounter(cc, "MinPoppedTagVersion", [this]() { return this->minPoppedTagVersion; });
// The locality and id of the tag that is responsible for making the TLog hold onto its oldest piece of data.
// If disk queues are growing and no one is sure why, then you shall look at this to find the tag responsible
// for why the TLog thinks it can't throw away data.
specialCounter(cc, "MinPoppedTagLocality", [this](){ return this->minPoppedTag.locality; });
specialCounter(cc, "MinPoppedTagId", [this](){ return this->minPoppedTag.id; });
specialCounter(cc, "SharedBytesInput", [tLogData](){ return tLogData->bytesInput; });
@ -805,6 +824,9 @@ ACTOR Future<Void> updatePoppedLocation( TLogData* self, Reference<LogData> logD
return Void();
}
// It runs against the oldest TLog instance, calculates the first location in the disk queue that contains un-popped
// data, and then issues a pop to the disk queue at that location so that anything earlier can be
// removed/forgotten/overwritten. In effect, it applies the effect of TLogPop RPCs to disk.
ACTOR Future<Void> popDiskQueue( TLogData* self, Reference<LogData> logData ) {
if (!logData->initialized) return Void();
@ -1019,20 +1041,6 @@ ACTOR Future<Void> updatePersistentData( TLogData* self, Reference<LogData> logD
}
ACTOR Future<Void> tLogPopCore( TLogData* self, Tag inputTag, Version to, Reference<LogData> logData ) {
if (self->ignorePopRequest) {
TraceEvent(SevDebug, "IgnoringPopRequest").detail("IgnorePopDeadline", self->ignorePopDeadline);
if (self->toBePopped.find(inputTag) == self->toBePopped.end()
|| to > self->toBePopped[inputTag]) {
self->toBePopped[inputTag] = to;
}
// add the pop to the toBePopped map
TraceEvent(SevDebug, "IgnoringPopRequest")
.detail("IgnorePopDeadline", self->ignorePopDeadline)
.detail("Tag", inputTag.toString())
.detail("Version", to);
return Void();
}
state Version upTo = to;
int8_t tagLocality = inputTag.locality;
if (isPseudoLocality(tagLocality)) {
@ -1068,38 +1076,60 @@ ACTOR Future<Void> tLogPopCore( TLogData* self, Tag inputTag, Version to, Refere
return Void();
}
ACTOR Future<Void> processPopRequests(TLogData* self, Reference<LogData> logData) {
state std::vector<Future<Void>> ignoredPops;
state std::map<Tag, Version>::const_iterator it;
state int ignoredPopsPlayed = 0;
state std::map<Tag, Version> toBePopped;
toBePopped = std::move(self->toBePopped);
self->toBePopped.clear();
self->ignorePopRequest = false;
self->ignorePopDeadline = 0.0;
self->ignorePopUid = "";
for (it = toBePopped.cbegin(); it != toBePopped.cend(); ++it) {
const auto& [tag, version] = *it;
TraceEvent("PlayIgnoredPop").detail("Tag", tag.toString()).detail("Version", version);
ignoredPops.push_back(tLogPopCore(self, tag, version, logData));
if (++ignoredPopsPlayed % SERVER_KNOBS->TLOG_POP_BATCH_SIZE == 0) {
TEST(true); // Yielding while processing pop requests
wait(yield());
}
}
wait(waitForAll(ignoredPops));
return Void();
}
ACTOR Future<Void> tLogPop( TLogData* self, TLogPopRequest req, Reference<LogData> logData ) {
// timeout check for ignorePopRequest
if (self->ignorePopRequest && (g_network->now() > self->ignorePopDeadline)) {
TraceEvent("EnableTLogPlayAllIgnoredPops");
// use toBePopped and issue all the pops
std::map<Tag, Version>::iterator it;
vector<Future<Void>> ignoredPops;
self->ignorePopRequest = false;
self->ignorePopUid = "";
self->ignorePopDeadline = 0.0;
for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) {
TraceEvent("PlayIgnoredPop")
.detail("Tag", it->first.toString())
.detail("Version", it->second);
ignoredPops.push_back(tLogPopCore(self, it->first, it->second, logData));
}
self->toBePopped.clear();
wait(waitForAll(ignoredPops));
TraceEvent("EnableTLogPlayAllIgnoredPops").detail("IgnoredPopDeadline", self->ignorePopDeadline);
wait(processPopRequests(self, logData));
TraceEvent("ResetIgnorePopRequest")
.detail("Now", g_network->now())
.detail("IgnorePopRequest", self->ignorePopRequest)
.detail("IgnorePopDeadline", self->ignorePopDeadline);
}
wait(tLogPopCore(self, req.tag, req.to, logData));
if (self->ignorePopRequest) {
TraceEvent(SevDebug, "IgnoringPopRequest").detail("IgnorePopDeadline", self->ignorePopDeadline);
auto& v = self->toBePopped[req.tag];
v = std::max(v, req.to);
TraceEvent(SevDebug, "IgnoringPopRequest")
.detail("IgnorePopDeadline", self->ignorePopDeadline)
.detail("Tag", req.tag.toString())
.detail("Version", req.to);
} else {
wait(tLogPopCore(self, req.tag, req.to, logData));
}
req.reply.send(Void());
return Void();
}
// This function (and updatePersistentData, which is called by this function) run at a low priority and can soak up all CPU resources.
// For this reason, they employ aggressive use of yields to avoid causing slow tasks that could introduce latencies for more important
// work (e.g. commits).
// This function (and updatePersistentData, which is called by this function) run at a low priority and can soak up all
// CPU resources. For this reason, they employ aggressive use of yields to avoid causing slow tasks that could introduce
// latencies for more important work (e.g. commits).
// This actor is just a loop that calls updatePersistentData and popDiskQueue whenever
// (a) there's data to be spilled or (b) we should update metadata after some commits have been fully popped.
ACTOR Future<Void> updateStorage( TLogData* self ) {
while(self->spillOrder.size() && !self->id_data.count(self->spillOrder.front())) {
self->spillOrder.pop_front();
@ -1401,7 +1431,7 @@ void peekMessagesFromMemory( Reference<LogData> self, TLogPeekRequest const& req
ACTOR Future<std::vector<StringRef>> parseMessagesForTag( StringRef commitBlob, Tag tag, int logRouters ) {
// See the comment in LogSystem.cpp for the binary format of commitBlob.
state std::vector<StringRef> relevantMessages;
state BinaryReader rd(commitBlob, AssumeVersion(currentProtocolVersion));
state BinaryReader rd(commitBlob, AssumeVersion(g_network->protocolVersion()));
while (!rd.empty()) {
TagsAndMessage tagsAndMessage;
tagsAndMessage.loadFromArena(&rd, nullptr);
@ -1892,7 +1922,11 @@ ACTOR Future<Void> tLogCommit(
return Void();
}
if (logData->version.get() == req.prevVersion) { // Not a duplicate (check relies on critical section between here self->version.set() below!)
state double beforeCommitT = now();
// Not a duplicate (check relies on critical section between here self->version.set() below!)
state bool isNotDuplicate = (logData->version.get() == req.prevVersion);
if (isNotDuplicate) {
if(req.debugID.present())
g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.Before");
@ -1930,6 +1964,10 @@ ACTOR Future<Void> tLogCommit(
return Void();
}
if (isNotDuplicate) {
self->commitLatencyDist->sampleSeconds(now() - beforeCommitT);
}
if(req.debugID.present())
g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.After");
@ -2155,30 +2193,16 @@ tLogEnablePopReq(TLogEnablePopRequest enablePopReq, TLogData* self, Reference<Lo
enablePopReq.reply.sendError(operation_failed());
return Void();
}
TraceEvent("EnableTLogPlayAllIgnoredPops2");
// use toBePopped and issue all the pops
std::map<Tag, Version>::iterator it;
state vector<Future<Void>> ignoredPops;
self->ignorePopRequest = false;
self->ignorePopDeadline = 0.0;
self->ignorePopUid = "";
for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) {
TraceEvent("PlayIgnoredPop")
.detail("Tag", it->first.toString())
.detail("Version", it->second);
ignoredPops.push_back(tLogPopCore(self, it->first, it->second, logData));
}
TraceEvent("TLogExecCmdPopEnable")
.detail("UidStr", enablePopReq.snapUID.toString())
.detail("IgnorePopUid", self->ignorePopUid)
.detail("IgnporePopRequest", self->ignorePopRequest)
.detail("IgnporePopDeadline", self->ignorePopDeadline)
.detail("PersistentDataVersion", logData->persistentDataVersion)
.detail("PersistentDatadurableVersion", logData->persistentDataDurableVersion)
.detail("QueueCommittedVersion", logData->queueCommittedVersion.get())
.detail("Version", logData->version.get());
wait(waitForAll(ignoredPops));
self->toBePopped.clear();
TraceEvent("EnableTLogPlayAllIgnoredPops2")
.detail("UidStr", enablePopReq.snapUID.toString())
.detail("IgnorePopUid", self->ignorePopUid)
.detail("IgnorePopRequest", self->ignorePopRequest)
.detail("IgnorePopDeadline", self->ignorePopDeadline)
.detail("PersistentDataVersion", logData->persistentDataVersion)
.detail("PersistentDataDurableVersion", logData->persistentDataDurableVersion)
.detail("QueueCommittedVersion", logData->queueCommittedVersion.get())
.detail("Version", logData->version.get());
wait(processPopRequests(self, logData));
enablePopReq.reply.send(Void());
return Void();
}
@ -2290,6 +2314,7 @@ void removeLog( TLogData* self, Reference<LogData> logData ) {
}
}
// remote tLog pull data from log routers
ACTOR Future<Void> pullAsyncData( TLogData* self, Reference<LogData> logData, std::vector<Tag> tags, Version beginVersion, Optional<Version> endVersion, bool poppedIsKnownCommitted ) {
state Future<Void> dbInfoChange = Void();
state Reference<ILogSystem::IPeekCursor> r;
@ -2809,7 +2834,7 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
bool recovering = (req.recoverFrom.logSystemType == LogSystemType::tagPartitioned);
state Reference<LogData> logData = makeReference<LogData>(
self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID,
currentProtocolVersion, req.spillType, req.allTags, recovering ? "Recovered" : "Recruited");
g_network->protocolVersion(), req.spillType, req.allTags, recovering ? "Recovered" : "Recruited");
self->id_data[recruited.id()] = logData;
logData->locality = req.locality;
logData->recoveryCount = req.epoch;

View File

@ -168,7 +168,7 @@ OldTLogCoreData::OldTLogCoreData(const OldLogData& oldData)
struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogSystem> {
const UID dbgid;
LogSystemType logSystemType;
std::vector<Reference<LogSet>> tLogs; // LogSets in different locations: primary, remote or satellite
std::vector<Reference<LogSet>> tLogs; // LogSets in different locations: primary, satellite, or remote
int expectedLogSets;
int logRouterTags;
int txsTags;
@ -197,7 +197,14 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
Version knownCommittedVersion;
Version backupStartVersion = invalidVersion; // max(tLogs[0].startVersion, previous epochEnd).
LocalityData locality;
std::map< std::pair<UID, Tag>, std::pair<Version, Version> > outstandingPops; // For each currently running popFromLog actor, (log server #, tag)->popped version
// For each currently running popFromLog actor, outstandingPops is
// (logID, tag)->(max popped version, durableKnownCommittedVersion).
// Why do we need durableKnownCommittedVersion? knownCommittedVersion gives the lower bound of what data
// will need to be copied into the next generation to restore the replication factor.
// Guess: It probably serves as a minimum version of what data should be on a TLog in the next generation and
// sending a pop for anything less than durableKnownCommittedVersion for the TLog will be absurd.
std::map<std::pair<UID, Tag>, std::pair<Version, Version>> outstandingPops;
Optional<PromiseStream<Future<Void>>> addActor;
ActorCollection popActors;
std::vector<OldLogData> oldLogData; // each element has the log info. in one old epoch.
@ -272,6 +279,9 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
Version& localityVersion = pseudoLocalityPopVersion[tag];
localityVersion = std::max(localityVersion, upTo);
Version minVersion = localityVersion;
// Why do we need to use the minimum popped version among all tags? Reason: for example,
// 2 pseudo tags pop 100 or 150, respectively. It's only safe to pop min(100, 150),
// because [101,150) is needed by another pseudo tag.
for (const int8_t locality : pseudoLocalities) {
minVersion = std::min(minVersion, pseudoLocalityPopVersion[Tag(locality, tag.id)]);
}
@ -1159,6 +1169,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
}
}
// pop 'tag.locality' type data up to the 'upTo' version
void pop(Version upTo, Tag tag, Version durableKnownCommittedVersion, int8_t popLocality) final {
if (upTo <= 0) return;
if (tag.locality == tagLocalityRemoteLog) {
@ -1184,6 +1195,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
}
}
// pop tag from log up to the version defined in self->outstandingPops[].first
ACTOR static Future<Void> popFromLog(TagPartitionedLogSystem* self,
Reference<AsyncVar<OptionalInterface<TLogInterface>>> log, Tag tag,
double time) {
@ -1191,6 +1203,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
loop {
wait( delay(time, TaskPriority::TLogPop) );
// to: first is upto version, second is durableKnownComittedVersion
state std::pair<Version,Version> to = self->outstandingPops[ std::make_pair(log->get().id(),tag) ];
if (to.first <= last) {

View File

@ -60,6 +60,7 @@
#include "fdbserver/workloads/workloads.actor.h"
#include "flow/DeterministicRandom.h"
#include "flow/Platform.h"
#include "flow/ProtocolVersion.h"
#include "flow/SimpleOpt.h"
#include "flow/SystemMonitor.h"
#include "flow/TLSConfig.actor.h"
@ -316,6 +317,14 @@ void failAfter( Future<Void> trigger, Endpoint e ) {
failAfter( trigger, g_simulator.getProcess( e ) );
}
ACTOR Future<Void> histogramReport() {
loop {
wait(delay(SERVER_KNOBS->HISTOGRAM_REPORT_INTERVAL));
GetHistogramRegistry().logReport();
}
}
void testSerializationSpeed() {
double tstart;
double build = 0, serialize = 0, deserialize = 0, copy = 0, deallocate = 0;
@ -879,7 +888,7 @@ void restoreRoleFilesHelper(std::string dirSrc, std::string dirToMove, std::stri
}
namespace {
enum Role {
enum class ServerRole {
ConsistencyCheck,
CreateTemplateDatabase,
DSLTest,
@ -907,7 +916,7 @@ struct CLIOptions {
int maxLogs = 0;
bool maxLogsSet = false;
Role role = FDBD;
ServerRole role = ServerRole::FDBD;
uint32_t randomSeed = platform::getRandomSeed();
const char* testFile = "tests/default.txt";
@ -1042,35 +1051,35 @@ private:
case OPT_ROLE:
sRole = args.OptionArg();
if (!strcmp(sRole, "fdbd"))
role = FDBD;
role = ServerRole::FDBD;
else if (!strcmp(sRole, "simulation"))
role = Simulation;
role = ServerRole::Simulation;
else if (!strcmp(sRole, "test"))
role = Test;
role = ServerRole::Test;
else if (!strcmp(sRole, "multitest"))
role = MultiTester;
role = ServerRole::MultiTester;
else if (!strcmp(sRole, "skiplisttest"))
role = SkipListTest;
role = ServerRole::SkipListTest;
else if (!strcmp(sRole, "search"))
role = SearchMutations;
role = ServerRole::SearchMutations;
else if (!strcmp(sRole, "dsltest"))
role = DSLTest;
role = ServerRole::DSLTest;
else if (!strcmp(sRole, "versionedmaptest"))
role = VersionedMapTest;
role = ServerRole::VersionedMapTest;
else if (!strcmp(sRole, "createtemplatedb"))
role = CreateTemplateDatabase;
role = ServerRole::CreateTemplateDatabase;
else if (!strcmp(sRole, "networktestclient"))
role = NetworkTestClient;
role = ServerRole::NetworkTestClient;
else if (!strcmp(sRole, "networktestserver"))
role = NetworkTestServer;
role = ServerRole::NetworkTestServer;
else if (!strcmp(sRole, "restore"))
role = Restore;
role = ServerRole::Restore;
else if (!strcmp(sRole, "kvfileintegritycheck"))
role = KVFileIntegrityCheck;
role = ServerRole::KVFileIntegrityCheck;
else if (!strcmp(sRole, "kvfilegeneratesums"))
role = KVFileGenerateIOLogChecksums;
role = ServerRole::KVFileGenerateIOLogChecksums;
else if (!strcmp(sRole, "consistencycheck"))
role = ConsistencyCheck;
role = ServerRole::ConsistencyCheck;
else {
fprintf(stderr, "ERROR: Unknown role `%s'\n", sRole);
printHelpTeaser(argv[0]);
@ -1423,8 +1432,8 @@ private:
bool autoPublicAddress =
std::any_of(publicAddressStrs.begin(), publicAddressStrs.end(),
[](const std::string& addr) { return StringRef(addr).startsWith(LiteralStringRef("auto:")); });
if ((role != Simulation && role != CreateTemplateDatabase && role != KVFileIntegrityCheck &&
role != KVFileGenerateIOLogChecksums) ||
if ((role != ServerRole::Simulation && role != ServerRole::CreateTemplateDatabase &&
role != ServerRole::KVFileIntegrityCheck && role != ServerRole::KVFileGenerateIOLogChecksums) ||
autoPublicAddress) {
if (seedSpecified && !fileExists(connFile)) {
@ -1471,7 +1480,7 @@ private:
flushAndExit(FDB_EXIT_ERROR);
}
if (role == ConsistencyCheck) {
if (role == ServerRole::ConsistencyCheck) {
if (!publicAddressStrs.empty()) {
fprintf(stderr, "ERROR: Public address cannot be specified for consistency check processes\n");
printHelpTeaser(argv[0]);
@ -1481,18 +1490,18 @@ private:
publicAddresses.address = NetworkAddress(publicIP, ::getpid());
}
if (role == Simulation) {
if (role == ServerRole::Simulation) {
Optional<bool> buggifyOverride = checkBuggifyOverride(testFile);
if (buggifyOverride.present()) buggifyEnabled = buggifyOverride.get();
}
if (role == SearchMutations && !targetKey) {
if (role == ServerRole::SearchMutations && !targetKey) {
fprintf(stderr, "ERROR: please specify a target key\n");
printHelpTeaser(argv[0]);
flushAndExit(FDB_EXIT_ERROR);
}
if (role == NetworkTestClient && !testServersStr.size()) {
if (role == ServerRole::NetworkTestClient && !testServersStr.size()) {
fprintf(stderr, "ERROR: please specify --testservers\n");
printHelpTeaser(argv[0]);
flushAndExit(FDB_EXIT_ERROR);
@ -1552,7 +1561,7 @@ int main(int argc, char* argv[]) {
const auto opts = CLIOptions::parseArgs(argc, argv);
const auto role = opts.role;
if (role == Simulation) printf("Random seed is %u...\n", opts.randomSeed);
if (role == ServerRole::Simulation) printf("Random seed is %u...\n", opts.randomSeed);
if (opts.zoneId.present())
printf("ZoneId set to %s, dcId to %s\n", printable(opts.zoneId).c_str(), printable(opts.dcId).c_str());
@ -1562,7 +1571,7 @@ int main(int argc, char* argv[]) {
enableBuggify(opts.buggifyEnabled, BuggifyType::General);
if (!globalServerKnobs->setKnob("log_directory", opts.logFolder)) ASSERT(false);
if (role != Simulation) {
if (role != ServerRole::Simulation) {
if (!globalServerKnobs->setKnob("commit_batches_mem_bytes_hard_limit", std::to_string(opts.memLimit)))
ASSERT(false);
}
@ -1588,9 +1597,9 @@ int main(int argc, char* argv[]) {
if (!globalServerKnobs->setKnob("server_mem_limit", std::to_string(opts.memLimit))) ASSERT(false);
// Reinitialize knobs in order to update knobs that are dependent on explicitly set knobs
globalFlowKnobs->initialize(true, role == Simulation);
globalFlowKnobs->initialize(true, role == ServerRole::Simulation);
globalClientKnobs->initialize(true);
globalServerKnobs->initialize(true, globalClientKnobs.get(), role == Simulation);
globalServerKnobs->initialize(true, globalClientKnobs, role == ServerRole::Simulation);
// evictionPolicyStringToEnum will throw an exception if the string is not recognized as a valid
EvictablePageCache::evictionPolicyStringToEnum(FLOW_KNOBS->CACHE_EVICTION_POLICY);
@ -1600,17 +1609,17 @@ int main(int argc, char* argv[]) {
flushAndExit(FDB_EXIT_ERROR);
}
if (role == SkipListTest) {
if (role == ServerRole::SkipListTest) {
skipListTest();
flushAndExit(FDB_EXIT_SUCCESS);
}
if (role == DSLTest) {
if (role == ServerRole::DSLTest) {
dsltest();
flushAndExit(FDB_EXIT_SUCCESS);
}
if (role == VersionedMapTest) {
if (role == ServerRole::VersionedMapTest) {
versionedMapTest();
flushAndExit(FDB_EXIT_SUCCESS);
}
@ -1622,7 +1631,7 @@ int main(int argc, char* argv[]) {
std::vector<Future<Void>> listenErrors;
if (role == Simulation || role == CreateTemplateDatabase) {
if (role == ServerRole::Simulation || role == ServerRole::CreateTemplateDatabase) {
//startOldSimulator();
startNewSimulator();
openTraceFile(NetworkAddress(), opts.rollsize, opts.maxLogsSize, opts.logFolder, "trace", opts.logGroup);
@ -1632,7 +1641,8 @@ int main(int argc, char* argv[]) {
g_network->addStopCallback( Net2FileSystem::stop );
FlowTransport::createInstance(false, 1);
const bool expectsPublicAddress = (role == FDBD || role == NetworkTestServer || role == Restore);
const bool expectsPublicAddress =
(role == ServerRole::FDBD || role == ServerRole::NetworkTestServer || role == ServerRole::Restore);
if (opts.publicAddressStrs.empty()) {
if (expectsPublicAddress) {
fprintf(stderr, "ERROR: The -p or --public_address option is required\n");
@ -1725,9 +1735,11 @@ int main(int argc, char* argv[]) {
Future<Optional<Void>> f;
if (role == Simulation) {
if (role == ServerRole::Simulation) {
TraceEvent("Simulation").detail("TestFile", opts.testFile);
auto histogramReportActor = histogramReport();
CLIENT_KNOBS->trace();
FLOW_KNOBS->trace();
SERVER_KNOBS->trace();
@ -1847,7 +1859,7 @@ int main(int argc, char* argv[]) {
}
setupAndRun(dataFolder, opts.testFile, opts.restarting, (isRestoring >= 1), opts.whitelistBinPaths);
g_simulator.run();
} else if (role == FDBD) {
} else if (role == ServerRole::FDBD) {
// Update the global blob credential files list so that both fast
// restore workers and backup workers can access blob storage.
std::vector<std::string>* pFiles =
@ -1885,45 +1897,46 @@ int main(int argc, char* argv[]) {
actors.push_back(fdbd(opts.connectionFile, opts.localities, opts.processClass, dataFolder, dataFolder,
opts.storageMemLimit, opts.metricsConnFile, opts.metricsPrefix, opts.rsssize,
opts.whitelistBinPaths));
actors.push_back(histogramReport());
// actors.push_back( recurring( []{}, .001 ) ); // for ASIO latency measurement
f = stopAfter(waitForAll(actors));
g_network->run();
}
} else if (role == MultiTester) {
} else if (role == ServerRole::MultiTester) {
setupRunLoopProfiler();
f = stopAfter(runTests(opts.connectionFile, TEST_TYPE_FROM_FILE,
opts.testOnServers ? TEST_ON_SERVERS : TEST_ON_TESTERS, opts.minTesterCount,
opts.testFile, StringRef(), opts.localities));
g_network->run();
} else if (role == Test) {
} else if (role == ServerRole::Test) {
setupRunLoopProfiler();
auto m = startSystemMonitor(opts.dataFolder, opts.dcId, opts.zoneId, opts.zoneId);
f = stopAfter(runTests(opts.connectionFile, TEST_TYPE_FROM_FILE, TEST_HERE, 1, opts.testFile, StringRef(),
opts.localities));
g_network->run();
} else if (role == ConsistencyCheck) {
} else if (role == ServerRole::ConsistencyCheck) {
setupRunLoopProfiler();
auto m = startSystemMonitor(opts.dataFolder, opts.dcId, opts.zoneId, opts.zoneId);
f = stopAfter(runTests(opts.connectionFile, TEST_TYPE_CONSISTENCY_CHECK, TEST_HERE, 1, opts.testFile,
StringRef(), opts.localities));
g_network->run();
} else if (role == CreateTemplateDatabase) {
} else if (role == ServerRole::CreateTemplateDatabase) {
createTemplateDatabase();
} else if (role == NetworkTestClient) {
} else if (role == ServerRole::NetworkTestClient) {
f = stopAfter(networkTestClient(opts.testServersStr));
g_network->run();
} else if (role == NetworkTestServer) {
} else if (role == ServerRole::NetworkTestServer) {
f = stopAfter( networkTestServer() );
g_network->run();
} else if (role == Restore) {
} else if (role == ServerRole::Restore) {
f = stopAfter(restoreWorker(opts.connectionFile, opts.localities, opts.dataFolder));
g_network->run();
} else if (role == KVFileIntegrityCheck) {
} else if (role == ServerRole::KVFileIntegrityCheck) {
f = stopAfter(KVFileCheck(opts.kvFile, true));
g_network->run();
} else if (role == KVFileGenerateIOLogChecksums) {
} else if (role == ServerRole::KVFileGenerateIOLogChecksums) {
Optional<Void> result;
try {
GenerateIOLogChecksumFile(opts.kvFile);
@ -1945,7 +1958,7 @@ int main(int argc, char* argv[]) {
TraceEvent("ElapsedTime").detail("SimTime", now()-startNow).detail("RealTime", timer()-start)
.detail("RandomUnseed", unseed);
if (role==Simulation){
if (role == ServerRole::Simulation) {
printf("Unseed: %d\n", unseed);
printf("Elapsed: %f simsec, %f real seconds\n", now()-startNow, timer()-start);
}
@ -1982,7 +1995,7 @@ int main(int argc, char* argv[]) {
cout << " " << i->second << " " << i->first << endl;*/
// cout << " " << Actor::allActors[i]->getName() << endl;
if (role == Simulation) {
if (role == ServerRole::Simulation) {
unsigned long sevErrorEventsLogged = TraceEvent::CountEventsLoggedAt(SevError);
if (sevErrorEventsLogged > 0) {
printf("%lu SevError events logged\n", sevErrorEventsLogged);

View File

@ -19,15 +19,20 @@
*/
#include <cinttypes>
#include <functional>
#include <type_traits>
#include <unordered_map>
#include "fdbrpc/fdbrpc.h"
#include "fdbrpc/LoadBalance.h"
#include "flow/Arena.h"
#include "flow/IRandom.h"
#include "flow/Tracing.h"
#include "flow/IndexedSet.h"
#include "flow/Hash3.h"
#include "flow/ActorCollection.h"
#include "flow/Arena.h"
#include "flow/Hash3.h"
#include "flow/Histogram.h"
#include "flow/IRandom.h"
#include "flow/IndexedSet.h"
#include "flow/SystemMonitor.h"
#include "flow/Tracing.h"
#include "flow/Util.h"
#include "fdbclient/Atomic.h"
#include "fdbclient/DatabaseContext.h"
@ -57,11 +62,8 @@
#include "fdbrpc/Smoother.h"
#include "fdbrpc/Stats.h"
#include "flow/TDMetric.actor.h"
#include <type_traits>
#include "flow/actorcompiler.h" // This must be the last #include.
using std::pair;
using std::make_pair;
#include "flow/actorcompiler.h" // This must be the last #include.
#ifndef __INTEL_COMPILER
#pragma region Data Structures
@ -229,13 +231,13 @@ struct UpdateEagerReadInfo {
void finishKeyBegin() {
std::sort(keyBegin.begin(), keyBegin.end());
keyBegin.resize( std::unique(keyBegin.begin(), keyBegin.end()) - keyBegin.begin() );
std::sort(keys.begin(), keys.end(), [](const pair<KeyRef, int>& lhs, const pair<KeyRef, int>& rhs) { return (lhs.first < rhs.first) || (lhs.first == rhs.first && lhs.second > rhs.second); } );
keys.resize(std::unique(keys.begin(), keys.end(), [](const pair<KeyRef, int>& lhs, const pair<KeyRef, int>& rhs) { return lhs.first == rhs.first; } ) - keys.begin());
std::sort(keys.begin(), keys.end(), [](const std::pair<KeyRef, int>& lhs, const std::pair<KeyRef, int>& rhs) { return (lhs.first < rhs.first) || (lhs.first == rhs.first && lhs.second > rhs.second); } );
keys.resize(std::unique(keys.begin(), keys.end(), [](const std::pair<KeyRef, int>& lhs, const std::pair<KeyRef, int>& rhs) { return lhs.first == rhs.first; } ) - keys.begin());
//value gets populated in doEagerReads
}
Optional<Value>& getValue(KeyRef key) {
int i = std::lower_bound(keys.begin(), keys.end(), pair<KeyRef, int>(key, 0), [](const pair<KeyRef, int>& lhs, const pair<KeyRef, int>& rhs) { return lhs.first < rhs.first; } ) - keys.begin();
int i = std::lower_bound(keys.begin(), keys.end(),std::pair<KeyRef, int>(key, 0), [](const std::pair<KeyRef, int>& lhs, const std::pair<KeyRef, int>& rhs) { return lhs.first < rhs.first; } ) - keys.begin();
ASSERT( i < keys.size() && keys[i].first == key );
return value[i];
}
@ -289,9 +291,63 @@ private:
std::map<Version, Standalone<VerUpdateRef>> mutationLog; // versions (durableVersion, version]
public:
public:
// Histograms
struct FetchKeysHistograms {
const Reference<Histogram> latency;
const Reference<Histogram> bytes;
const Reference<Histogram> bandwidth;
FetchKeysHistograms()
: latency(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP, FETCH_KEYS_LATENCY_HISTOGRAM,
Histogram::Unit::microseconds)),
bytes(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP, FETCH_KEYS_BYTES_HISTOGRAM,
Histogram::Unit::bytes)),
bandwidth(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP, FETCH_KEYS_BYTES_PER_SECOND_HISTOGRAM,
Histogram::Unit::bytes_per_second)) {}
} fetchKeysHistograms;
class CurrentRunningFetchKeys {
std::unordered_map<UID, double> startTimeMap;
std::unordered_map<UID, KeyRangeRef> keyRangeMap;
static const StringRef emptyString;
static const KeyRangeRef emptyKeyRange;
public:
void recordStart(const UID id, const KeyRange keyRange) {
startTimeMap[id] = now();
keyRangeMap[id] = keyRange;
}
void recordFinish(const UID id) {
startTimeMap.erase(id);
keyRangeMap.erase(id);
}
std::pair<double, KeyRangeRef> longestTime() const {
if (numRunning() == 0) {
return {-1, emptyKeyRange};
}
const double currentTime = now();
double longest = 0;
UID UIDofLongest;
for (const auto kv: startTimeMap) {
const double currentRunningTime = currentTime - kv.second;
if (longest < currentRunningTime) {
longest = currentRunningTime;
UIDofLongest = kv.first;
}
}
return {longest, keyRangeMap.at(UIDofLongest)};
}
int numRunning() const { return startTimeMap.size(); }
} currentRunningFetchKeys;
Tag tag;
vector<pair<Version,Tag>> history;
vector<pair<Version,Tag>> allHistory;
vector<std::pair<Version,Tag>> history;
vector<std::pair<Version,Tag>> allHistory;
Version poppedAllAfter;
std::map<Version, Arena> freeable; // for each version, an Arena that must be held until that version is < oldestVersion
Arena lastArena;
@ -338,8 +394,8 @@ public:
poppedAllAfter = std::numeric_limits<Version>::max();
}
vector<pair<Version,Tag>>* hist = &history;
vector<pair<Version,Tag>> allHistoryCopy;
vector<std::pair<Version,Tag>>* hist = &history;
vector<std::pair<Version,Tag>> allHistoryCopy;
if(popAllTags) {
allHistoryCopy = allHistory;
hist = &allHistoryCopy;
@ -604,22 +660,18 @@ public:
}
} counters;
StorageServer(IKeyValueStore* storage, Reference<AsyncVar<ServerDBInfo>> const& db, StorageServerInterface const& ssi)
: instanceID(deterministicRandom()->randomUniqueID().first()),
storage(this, storage), db(db), actors(false),
lastTLogVersion(0), lastVersionWithData(0), restoredVersion(0),
rebootAfterDurableVersion(std::numeric_limits<Version>::max()),
durableInProgress(Void()),
versionLag(0), primaryLocality(tagLocalityInvalid),
updateEagerReads(0),
shardChangeCounter(0),
fetchKeysParallelismLock(SERVER_KNOBS->FETCH_KEYS_PARALLELISM_BYTES),
shuttingDown(false), debug_inApplyUpdate(false), debug_lastValidateTime(0), watchBytes(0), numWatches(0),
logProtocol(0), counters(this), tag(invalidTag), maxQueryQueue(0), thisServerID(ssi.id()),
readQueueSizeMetric(LiteralStringRef("StorageServer.ReadQueueSize")),
behind(false), versionBehind(false), byteSampleClears(false, LiteralStringRef("\xff\xff\xff")), noRecentUpdates(false),
lastUpdate(now()), poppedAllAfter(std::numeric_limits<Version>::max()), cpuUsage(0.0), diskUsage(0.0)
{
StorageServer(IKeyValueStore* storage, Reference<AsyncVar<ServerDBInfo>> const& db,
StorageServerInterface const& ssi)
: fetchKeysHistograms(), instanceID(deterministicRandom()->randomUniqueID().first()), storage(this, storage),
db(db), actors(false), lastTLogVersion(0), lastVersionWithData(0), restoredVersion(0),
rebootAfterDurableVersion(std::numeric_limits<Version>::max()), durableInProgress(Void()), versionLag(0),
primaryLocality(tagLocalityInvalid), updateEagerReads(0), shardChangeCounter(0),
fetchKeysParallelismLock(SERVER_KNOBS->FETCH_KEYS_PARALLELISM_BYTES), shuttingDown(false),
debug_inApplyUpdate(false), debug_lastValidateTime(0), watchBytes(0), numWatches(0), logProtocol(0),
counters(this), tag(invalidTag), maxQueryQueue(0), thisServerID(ssi.id()),
readQueueSizeMetric(LiteralStringRef("StorageServer.ReadQueueSize")), behind(false), versionBehind(false),
byteSampleClears(false, LiteralStringRef("\xff\xff\xff")), noRecentUpdates(false), lastUpdate(now()),
poppedAllAfter(std::numeric_limits<Version>::max()), cpuUsage(0.0), diskUsage(0.0) {
version.initMetric(LiteralStringRef("StorageServer.Version"), counters.cc.id);
oldestVersion.initMetric(LiteralStringRef("StorageServer.OldestVersion"), counters.cc.id);
durableVersion.initMetric(LiteralStringRef("StorageServer.DurableVersion"), counters.cc.id);
@ -732,6 +784,9 @@ public:
}
};
const StringRef StorageServer::CurrentRunningFetchKeys::emptyString = LiteralStringRef("");
const KeyRangeRef StorageServer::CurrentRunningFetchKeys::emptyKeyRange = KeyRangeRef(StorageServer::CurrentRunningFetchKeys::emptyString, StorageServer::CurrentRunningFetchKeys::emptyString);
// If and only if key:=value is in (storage+versionedData), // NOT ACTUALLY: and key < allKeys.end,
// and H(key) < |key+value|/bytesPerSample,
// let sampledSize = max(|key+value|,bytesPerSample)
@ -1826,7 +1881,7 @@ bool changeDurableVersion( StorageServer* data, Version desiredDurableVersion )
setDataDurableVersion(data->thisServerID, data->durableVersion.get());
if (checkFatalError.isReady()) checkFatalError.get();
//TraceEvent("ForgotVersionsBefore", data->thisServerID).detail("Version", nextDurableVersion);
// TraceEvent("ForgotVersionsBefore", data->thisServerID).detail("Version", nextDurableVersion);
validate(data);
return nextDurableVersion == desiredDurableVersion;
@ -2142,16 +2197,56 @@ ACTOR Future<Void> logFetchKeysWarning(AddingShard* shard) {
loop {
state double waitSeconds = BUGGIFY ? 5.0 : 600.0;
wait(delay(waitSeconds));
TraceEvent(waitSeconds > 300.0 ? SevWarnAlways : SevInfo, "FetchKeysTooLong").detail("Duration", now() - startTime).detail("Phase", shard->phase).detail("Begin", shard->keys.begin.printable()).detail("End", shard->keys.end.printable());
const auto traceEventLevel = waitSeconds > SERVER_KNOBS->FETCH_KEYS_TOO_LONG_TIME_CRITERIA ? SevWarnAlways : SevInfo;
TraceEvent(traceEventLevel, "FetchKeysTooLong")
.detail("Duration", now() - startTime)
.detail("Phase", shard->phase)
.detail("Begin", shard->keys.begin.printable())
.detail("End", shard->keys.end.printable());
}
}
class FetchKeysMetricReporter {
const UID uid;
const double startTime;
int fetchedBytes;
StorageServer::FetchKeysHistograms& histograms;
StorageServer::CurrentRunningFetchKeys& currentRunning;
public:
FetchKeysMetricReporter(const UID& uid_, const double startTime_, const KeyRange& keyRange, StorageServer::FetchKeysHistograms& histograms_, StorageServer::CurrentRunningFetchKeys& currentRunning_)
: uid(uid_), startTime(startTime_), fetchedBytes(0), histograms(histograms_), currentRunning(currentRunning_) {
currentRunning.recordStart(uid, keyRange);
}
void addFetchedBytes(const int bytes) { fetchedBytes += bytes; }
~FetchKeysMetricReporter() {
double latency = now() - startTime;
// If fetchKeys is *NOT* run, i.e. returning immediately, still report a record.
if (latency == 0) latency = 1e6;
const uint32_t bandwidth = fetchedBytes / latency;
histograms.latency->sampleSeconds(latency);
histograms.bytes->sample(fetchedBytes);
histograms.bandwidth->sample(bandwidth);
currentRunning.recordFinish(uid);
}
};
ACTOR Future<Void> fetchKeys( StorageServer *data, AddingShard* shard ) {
state const UID fetchKeysID = deterministicRandom()->randomUniqueID();
state TraceInterval interval("FetchKeys");
state KeyRange keys = shard->keys;
state Future<Void> warningLogger = logFetchKeysWarning(shard);
state double startt = now();
state const double startTime = now();
state int fetchBlockBytes = BUGGIFY ? SERVER_KNOBS->BUGGIFY_BLOCK_BYTES : SERVER_KNOBS->FETCH_BLOCK_BYTES;
state FetchKeysMetricReporter metricReporter(fetchKeysID, startTime, keys, data->fetchKeysHistograms, data->currentRunningFetchKeys);
// delay(0) to force a return to the run loop before the work of fetchKeys is started.
// This allows adding->start() to be called inline with CSK.
@ -2189,7 +2284,7 @@ ACTOR Future<Void> fetchKeys( StorageServer *data, AddingShard* shard ) {
state double executeStart = now();
++data->counters.fetchWaitingCount;
data->counters.fetchWaitingMS += 1000*(executeStart - startt);
data->counters.fetchWaitingMS += 1000 * (executeStart - startTime);
// Fetch keys gets called while the update actor is processing mutations. data->version will not be updated until all mutations for a version
// have been processed. We need to take the durableVersionLock to ensure data->version is greater than the version of the mutation which caused
@ -2231,6 +2326,7 @@ ACTOR Future<Void> fetchKeys( StorageServer *data, AddingShard* shard ) {
DEBUG_KEY_RANGE("fetchRange", fetchVersion, keys);
for(auto k = this_block.begin(); k != this_block.end(); ++k) DEBUG_MUTATION("fetch", fetchVersion, MutationRef(MutationRef::SetValue, k->key, k->value));
metricReporter.addFetchedBytes(expectedSize);
data->counters.bytesFetched += expectedSize;
if( fetchBlockBytes > expectedSize ) {
holdingFKPL.release( fetchBlockBytes - expectedSize );
@ -2298,8 +2394,9 @@ ACTOR Future<Void> fetchKeys( StorageServer *data, AddingShard* shard ) {
while (!shard->updates.empty() && shard->updates[0].version <= fetchVersion) shard->updates.pop_front();
//FIXME: remove when we no longer support upgrades from 5.X
if(debug_getRangeRetries >= 100) {
if (debug_getRangeRetries >= 100) {
data->cx->enableLocalityLoadBalance = false;
// TODO: Add SevWarnAlways to say it was disabled.
}
debug_getRangeRetries++;
@ -2416,7 +2513,7 @@ ACTOR Future<Void> fetchKeys( StorageServer *data, AddingShard* shard ) {
TraceEvent(SevError, "FetchKeysError", data->thisServerID)
.error(e)
.detail("Elapsed", now()-startt)
.detail("Elapsed", now() - startTime)
.detail("KeyBegin", keys.begin)
.detail("KeyEnd",keys.end);
if (e.code() != error_code_actor_cancelled)
@ -3269,7 +3366,9 @@ bool StorageServerDisk::makeVersionMutationsDurable(Version& prevStorageVersion,
void StorageServerDisk::makeVersionDurable( Version version ) {
storage->set( KeyValueRef(persistVersion, BinaryWriter::toValue(version, Unversioned())) );
//TraceEvent("MakeDurable", data->thisServerID).detail("FromVersion", prevStorageVersion).detail("ToVersion", version);
// TraceEvent("MakeDurable", data->thisServerID)
// .detail("FromVersion", prevStorageVersion)
// .detail("ToVersion", version);
}
void StorageServerDisk::changeLogProtocol(Version version, ProtocolVersion protocol) {
@ -3788,6 +3887,35 @@ ACTOR Future<Void> serveWatchValueRequests( StorageServer* self, FutureStream<Wa
}
}
ACTOR Future<Void> reportStorageServerState(StorageServer* self) {
if (!SERVER_KNOBS->REPORT_DD_METRICS) {
return Void();
}
loop {
wait(delay(SERVER_KNOBS->DD_METRICS_REPORT_INTERVAL));
const auto numRunningFetchKeys = self->currentRunningFetchKeys.numRunning();
if (numRunningFetchKeys == 0) {
continue;
}
const auto longestRunningFetchKeys = self->currentRunningFetchKeys.longestTime();
auto level = SevInfo;
if (longestRunningFetchKeys.first >= SERVER_KNOBS->FETCH_KEYS_TOO_LONG_TIME_CRITERIA) {
level = SevWarnAlways;
}
TraceEvent(level, "FetchKeyCurrentStatus")
.detail("Timestamp", now())
.detail("LongestRunningTime", longestRunningFetchKeys.first)
.detail("StartKey", longestRunningFetchKeys.second.begin.printable())
.detail("EndKey", longestRunningFetchKeys.second.end.printable())
.detail("NumRunning", numRunningFetchKeys);
}
}
ACTOR Future<Void> storageServerCore( StorageServer* self, StorageServerInterface ssi )
{
state Future<Void> doUpdate = Void();
@ -3808,6 +3936,7 @@ ACTOR Future<Void> storageServerCore( StorageServer* self, StorageServerInterfac
self->actors.add(serveGetKeyRequests(self, ssi.getKey.getFuture()));
self->actors.add(serveWatchValueRequests(self, ssi.watchValue.getFuture()));
self->actors.add(traceRole(Role::STORAGE_SERVER, ssi.id()));
self->actors.add(reportStorageServerState(self));
self->transactionTagCounter.startNewInterval(self->thisServerID);
self->actors.add(recurring([&]() { self->transactionTagCounter.startNewInterval(self->thisServerID); },

View File

@ -908,6 +908,9 @@ std::map<std::string, std::function<void(const std::string&)>> testSpecGlobalKey
// else { } It is enable by default for tester
TraceEvent("TestParserTest").detail("ClientInfoLogging", value);
}},
{"startIncompatibleProcess", [](const std::string& value) {
TraceEvent("TestParserTest").detail("ParsedStartIncompatibleProcess", value);
}}
};
std::map<std::string, std::function<void(const std::string& value, TestSpec* spec)>> testSpecTestKeys = {

View File

@ -25,6 +25,7 @@
#include "fdbclient/StorageServerInterface.h"
#include "fdbserver/Knobs.h"
#include "flow/ActorCollection.h"
#include "flow/ProtocolVersion.h"
#include "flow/SystemMonitor.h"
#include "flow/TDMetric.actor.h"
#include "fdbrpc/simulator.h"
@ -46,6 +47,7 @@
#include "flow/Profiler.h"
#include "flow/ThreadHelper.actor.h"
#include "flow/Trace.h"
#include "flow/network.h"
#ifdef __linux__
#include <fcntl.h>
@ -993,7 +995,7 @@ ACTOR Future<Void> workerServer(
folder, locality.dcId(), locality.zoneId(), locality.machineId(), g_network->getLocalAddress().ip));
{
auto recruited = interf; //ghetto! don't we all love a good #define
auto recruited = interf;
DUMPTOKEN(recruited.clientInterface.reboot);
DUMPTOKEN(recruited.clientInterface.profiler);
DUMPTOKEN(recruited.tLog);
@ -1141,7 +1143,7 @@ ACTOR Future<Void> workerServer(
loop choose {
when( UpdateServerDBInfoRequest req = waitNext( interf.updateServerDBInfo.getFuture() ) ) {
ServerDBInfo localInfo = BinaryReader::fromStringRef<ServerDBInfo>(req.serializedDbInfo, AssumeVersion(currentProtocolVersion));
ServerDBInfo localInfo = BinaryReader::fromStringRef<ServerDBInfo>(req.serializedDbInfo, AssumeVersion(g_network->protocolVersion()));
localInfo.myLocality = locality;
if(localInfo.infoGeneration < dbInfo->get().infoGeneration && localInfo.clusterInterface == dbInfo->get().clusterInterface) {
@ -1796,6 +1798,16 @@ ACTOR Future<Void> monitorLeaderRemotelyWithDelayedCandidacy( Reference<ClusterC
}
}
ACTOR Future<Void> serveProtocolInfo() {
state RequestStream<ProtocolInfoRequest> protocolInfo(
PeerCompatibilityPolicy{ RequirePeer::AtLeast, ProtocolVersion::withStableInterfaces() });
protocolInfo.makeWellKnownEndpoint(WLTOKEN_PROTOCOL_INFO, TaskPriority::DefaultEndpoint);
loop {
state ProtocolInfoRequest req = waitNext(protocolInfo.getFuture());
req.reply.send(ProtocolInfoReply{ g_network->protocolVersion() });
}
}
ACTOR Future<Void> fdbd(
Reference<ClusterConnectionFile> connFile,
LocalityData localities,
@ -1811,6 +1823,8 @@ ACTOR Future<Void> fdbd(
state vector<Future<Void>> actors;
state Promise<Void> recoveredDiskFiles;
actors.push_back(serveProtocolInfo());
try {
ServerCoordinators coordinators( connFile );
if (g_network->isSimulated()) {

View File

@ -22,6 +22,7 @@
#include "fdbserver/workloads/workloads.actor.h"
#include "flow/ActorCollection.h"
#include "flow/IRandom.h"
#include "flow/SystemMonitor.h"
#include "fdbserver/workloads/AsyncFile.actor.h"
#include "flow/actorcompiler.h" // This must be the last #include.
@ -374,15 +375,6 @@ struct AsyncFileCorrectnessWorkload : public AsyncFileWorkload
return false;
}
//Populates a buffer with a random sequence of bytes
void generateRandomData(unsigned char* buffer, int length) const {
for(int i = 0; i < length; i+= sizeof(uint32_t))
{
uint32_t val = deterministicRandom()->randomUInt32();
memcpy(&buffer[i], &val, std::min(length - i, (int)sizeof(uint32_t)));
}
}
//Performs an operation on a file and the memory representation of that file
ACTOR Future<OperationInfo> processOperation(AsyncFileCorrectnessWorkload *self, OperationInfo info)
{
@ -413,7 +405,7 @@ struct AsyncFileCorrectnessWorkload : public AsyncFileWorkload
else if(info.operation == WRITE)
{
info.data = self->allocateBuffer(info.length);
self->generateRandomData(info.data->buffer, info.length);
generateRandomData(reinterpret_cast<uint8_t*>(info.data->buffer), info.length);
memcpy(&self->memoryFile->buffer[info.offset], info.data->buffer, info.length);
memset(&self->fileValidityMask[info.offset], 0xFF, info.length);

View File

@ -36,15 +36,14 @@ namespace ClientLogEventsParser {
FdbClientLogEvents::EventGetVersion_V2 gv;
reader >> gv;
ASSERT(gv.latency < 10000);
ASSERT(gv.priorityType >= 0 && gv.priorityType < FdbClientLogEvents::PRIORITY_END);
}
void parseEventGetVersion_V3(BinaryReader &reader) {
FdbClientLogEvents::EventGetVersion_V3 gv;
reader >> gv;
ASSERT(gv.latency < 10000);
ASSERT(gv.priorityType >= 0 && gv.priorityType < FdbClientLogEvents::PRIORITY_END && gv.readVersion > 0);
}
ASSERT(gv.readVersion > 0);
}
void parseEventGet(BinaryReader &reader) {
FdbClientLogEvents::EventGet g;
@ -142,25 +141,25 @@ bool checkTxInfoEntryFormat(BinaryReader &reader) {
reader >> event;
switch (event.type)
{
case FdbClientLogEvents::GET_VERSION_LATENCY:
case FdbClientLogEvents::EventType::GET_VERSION_LATENCY:
parser->parseGetVersion(reader);
break;
case FdbClientLogEvents::GET_LATENCY:
case FdbClientLogEvents::EventType::GET_LATENCY:
parser->parseGet(reader);
break;
case FdbClientLogEvents::GET_RANGE_LATENCY:
case FdbClientLogEvents::EventType::GET_RANGE_LATENCY:
parser->parseGetRange(reader);
break;
case FdbClientLogEvents::COMMIT_LATENCY:
case FdbClientLogEvents::EventType::COMMIT_LATENCY:
parser->parseCommit(reader);
break;
case FdbClientLogEvents::ERROR_GET:
case FdbClientLogEvents::EventType::ERROR_GET:
parser->parseErrorGet(reader);
break;
case FdbClientLogEvents::ERROR_GET_RANGE:
case FdbClientLogEvents::EventType::ERROR_GET_RANGE:
parser->parseErrorGetRange(reader);
break;
case FdbClientLogEvents::ERROR_COMMIT:
case FdbClientLogEvents::EventType::ERROR_COMMIT:
parser->parseErrorCommit(reader);
break;
default:

View File

@ -34,6 +34,7 @@
#include "flow/DeterministicRandom.h"
#include "fdbclient/ManagementAPI.actor.h"
#include "flow/actorcompiler.h" // This must be the last #include.
#include "flow/network.h"
//#define SevCCheckInfo SevVerbose
#define SevCCheckInfo SevInfo
@ -1295,7 +1296,7 @@ struct ConsistencyCheckWorkload : TestWorkload
vector<ISimulator::ProcessInfo*> all = g_simulator.getAllProcesses();
for(int i = 0; i < all.size(); i++) {
if( all[i]->isReliable() && all[i]->name == std::string("Server") && all[i]->startingClass != ProcessClass::TesterClass ) {
if( all[i]->isReliable() && all[i]->name == std::string("Server") && all[i]->startingClass != ProcessClass::TesterClass && all[i]->protocolVersion == g_network->protocolVersion() ) {
if(!workerAddresses.count(all[i]->address)) {
TraceEvent("ConsistencyCheck_WorkerMissingFromList").detail("Addr", all[i]->address);
return false;

View File

@ -69,7 +69,7 @@ struct DowngradeWorkload : TestWorkload {
};
ACTOR static Future<Void> writeOld(Database cx, int numObjects, Key key) {
BinaryWriter writer(IncludeVersion(currentProtocolVersion));
BinaryWriter writer(IncludeVersion(g_network->protocolVersion()));
std::vector<OldStruct> data(numObjects);
for (auto& oldObject : data) {
oldObject.setFields();
@ -90,7 +90,7 @@ struct DowngradeWorkload : TestWorkload {
}
ACTOR static Future<Void> writeNew(Database cx, int numObjects, Key key) {
ProtocolVersion protocolVersion = currentProtocolVersion;
ProtocolVersion protocolVersion = g_network->protocolVersion();
protocolVersion.addObjectSerializerFlag();
ObjectWriter writer(IncludeVersion(protocolVersion));
std::vector<NewStruct> data(numObjects);

View File

@ -49,7 +49,13 @@ struct LowLatencyWorkload : TestWorkload {
std::string description() const override { return "LowLatency"; }
Future<Void> setup(Database const& cx) override { return Void(); }
Future<Void> setup(Database const& cx) override {
if (g_network->isSimulated()) {
ASSERT(const_cast<ServerKnobs*>(SERVER_KNOBS)->setKnob("min_delay_cc_worst_fit_candidacy_seconds", "5"));
ASSERT(const_cast<ServerKnobs*>(SERVER_KNOBS)->setKnob("max_delay_cc_worst_fit_candidacy_seconds", "10"));
}
return Void();
}
Future<Void> start(Database const& cx) override {
if( clientId == 0 )

View File

@ -0,0 +1,51 @@
/*
* ProtocolVersion.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2019 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "fdbserver/workloads/workloads.actor.h"
struct ProtocolVersionWorkload : TestWorkload {
ProtocolVersionWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {}
std::string description() const override { return "ProtocolVersionWorkload"; }
Future<Void> start(Database const& cx) override { return _start(this, cx); }
ACTOR Future<Void> _start(ProtocolVersionWorkload* self, Database cx) {
state std::vector<ISimulator::ProcessInfo*> allProcesses = g_pSimulator->getAllProcesses();
state std::vector<ISimulator::ProcessInfo*>::iterator diffVersionProcess =
find_if(allProcesses.begin(), allProcesses.end(),
[](const ISimulator::ProcessInfo* p) { return p->protocolVersion != currentProtocolVersion; });
ASSERT(diffVersionProcess != allProcesses.end());
RequestStream<ProtocolInfoRequest> requestStream{ Endpoint{ { (*diffVersionProcess)->addresses },
WLTOKEN_PROTOCOL_INFO } };
ProtocolInfoReply reply = wait(retryBrokenPromise(requestStream, ProtocolInfoRequest{}));
ASSERT(reply.version != g_network->protocolVersion());
return Void();
}
Future<bool> check(Database const& cx) override { return true; }
void getMetrics(vector<PerfMetric>& m) override {}
};
WorkloadFactory<ProtocolVersionWorkload> ProtocolVersionWorkloadFactory("ProtocolVersion");

View File

@ -432,6 +432,20 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
ASSERT(e.code() == error_code_key_outside_legal_range);
tx->reset();
}
// test case when registered range is the same as the underlying module
try {
state Standalone<RangeResultRef> result = wait(tx->getRange(KeyRangeRef(LiteralStringRef("\xff\xff/worker_interfaces/"),
LiteralStringRef("\xff\xff/worker_interfaces0")),
CLIENT_KNOBS->TOO_MANY));
// We should have at least 1 process in the cluster
ASSERT(result.size());
state KeyValueRef entry = deterministicRandom()->randomChoice(result);
Optional<Value> singleRes = wait(tx->get(entry.key));
ASSERT(singleRes.present() && singleRes.get() == entry.value);
tx->reset();
} catch (Error& e) {
wait(tx->onError(e));
}
return Void();
}

View File

@ -166,7 +166,7 @@ struct StatusWorkload : TestWorkload {
state double issued = now();
StatusObject result = wait(StatusClient::statusFetcher(cx));
++self->replies;
BinaryWriter br(AssumeVersion(currentProtocolVersion));
BinaryWriter br(AssumeVersion(g_network->protocolVersion()));
save(br, result);
self->totalSize += br.getLength();
TraceEvent("StatusWorkloadReply").detail("ReplySize", br.getLength()).detail("Latency", now() - issued);//.detail("Reply", json_spirit::write_string(json_spirit::mValue(result)));

View File

@ -156,7 +156,7 @@ struct StorefrontWorkload : TestWorkload {
updaters.clear();
// set value for the order
BinaryWriter wr(AssumeVersion(currentProtocolVersion)); wr << itemList;
BinaryWriter wr(AssumeVersion(g_network->protocolVersion())); wr << itemList;
tr.set(orderKey, wr.toValue());
wait( tr.commit() );
@ -187,7 +187,7 @@ struct StorefrontWorkload : TestWorkload {
int orderIdx;
for(orderIdx=0; orderIdx<values.size(); orderIdx++) {
vector<int> saved;
BinaryReader br( values[orderIdx].value, AssumeVersion(currentProtocolVersion) );
BinaryReader br( values[orderIdx].value, AssumeVersion(g_network->protocolVersion()) );
br >> saved;
for(int c=0; c<saved.size(); c++)
result[saved[c]]++;
@ -247,7 +247,7 @@ struct StorefrontWorkload : TestWorkload {
for( int i=0; i < it->second; i++ )
itemList.push_back( it->first );
}
BinaryWriter wr(AssumeVersion(currentProtocolVersion)); wr << itemList;
BinaryWriter wr(AssumeVersion(g_network->protocolVersion())); wr << itemList;
if( wr.toValue() != val.get().toString() ) {
TraceEvent(SevError, "TestFailure")
.detail("Reason", "OrderContentsMismatch")

View File

@ -86,19 +86,33 @@ set(FLOW_SRCS
rte_memcpy.h
serialize.cpp
serialize.h
stacktrace.amalgamation.cpp
stacktrace.h
test_memcpy.cpp
test_memcpy_perf.cpp
version.cpp)
version.cpp
xxhash.c
xxhash.h)
add_library(stacktrace stacktrace.amalgamation.cpp stacktrace.h)
if (USE_ASAN)
target_compile_definitions(stacktrace PRIVATE ADDRESS_SANITIZER)
elseif(USE_MSAN)
target_compile_definitions(stacktrace PRIVATE MEMORY_SANITIZER)
elseif(USE_UBSAN)
target_compile_definitions(stacktrace PRIVATE UNDEFINED_BEHAVIOR_SANITIZER)
elseif(USE_TSAN)
target_compile_definitions(stacktrace PRIVATE THREAD_SANITIZER DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL=1)
endif()
if(UNIX AND NOT APPLE)
list(APPEND FLOW_SRCS folly_memcpy.S)
endif()
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/SourceVersion.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/SourceVersion.h)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/config.h)
add_flow_target(STATIC_LIBRARY NAME flow SRCS ${FLOW_SRCS})
target_link_libraries(flow PRIVATE stacktrace)
if (NOT APPLE AND NOT WIN32)
set (FLOW_LIBS ${FLOW_LIBS} rt)
elseif(WIN32)

View File

@ -59,7 +59,7 @@ void printBitsBig(size_t const size, void const * const ptr)
template<typename IntType>
void testCompressedInt(IntType n, StringRef rep = StringRef()) {
BinaryWriter w(AssumeVersion(currentProtocolVersion));
BinaryWriter w(AssumeVersion(g_network->protocolVersion()));
CompressedInt<IntType> cn(n);
w << cn;
@ -74,7 +74,7 @@ void testCompressedInt(IntType n, StringRef rep = StringRef()) {
rep = w.toValue();
cn.value = 0;
BinaryReader r(rep, AssumeVersion(currentProtocolVersion));
BinaryReader r(rep, AssumeVersion(g_network->protocolVersion()));
r >> cn;
if(cn.value != n) {

View File

@ -80,6 +80,10 @@ uint32_t DeterministicRandom::randomUInt32() {
return gen64();
}
uint64_t DeterministicRandom::randomUInt64() {
return gen64();
}
uint32_t DeterministicRandom::randomSkewedUInt32(uint32_t min, uint32_t maxPlusOne) {
std::uniform_real_distribution<double> distribution(std::log(min), std::log(maxPlusOne - 1));
double logpower = distribution(random);
@ -120,3 +124,10 @@ void DeterministicRandom::addref() {
void DeterministicRandom::delref() {
ReferenceCounted<DeterministicRandom>::delref();
}
void generateRandomData(uint8_t* buffer, int length) {
for (int i = 0; i < length; i += sizeof(uint32_t)) {
uint32_t val = deterministicRandom()->randomUInt32();
memcpy(&buffer[i], &val, std::min(length - i, (int)sizeof(uint32_t)));
}
}

View File

@ -44,6 +44,7 @@ public:
int randomInt(int min, int maxPlusOne) override;
int64_t randomInt64(int64_t min, int64_t maxPlusOne) override;
uint32_t randomUInt32() override;
uint64_t randomUInt64() override;
uint32_t randomSkewedUInt32(uint32_t min, uint32_t maxPlusOne) override;
UID randomUniqueID() override;
char randomAlphaNumeric() override;

View File

@ -24,6 +24,7 @@
#include "flow/Error.h"
#include "flow/Platform.h"
#include "flow/config.h"
// ALLOC_INSTRUMENTATION_STDOUT enables non-sampled logging of all allocations and deallocations to stdout to be processed by tools/alloc_instrumentation.py
//#define ALLOC_INSTRUMENTATION_STDOUT ENABLED(NOT_IN_CLEAN)

View File

@ -41,6 +41,8 @@ thread_local ISimulator::ProcessInfo* ISimulator::currentProcess = nullptr;
// we have a simulated contex here; we'd just use the current context regardless.
static HistogramRegistry* globalHistograms = nullptr;
#pragma region HistogramRegistry
HistogramRegistry& GetHistogramRegistry() {
ISimulator::ProcessInfo* info = g_simulator.getCurrentProcess();
@ -89,6 +91,16 @@ void HistogramRegistry::logReport() {
}
}
#pragma endregion // HistogramRegistry
#pragma region Histogram
const std::unordered_map<Histogram::Unit, std::string> Histogram::UnitToStringMapper = {
{ Histogram::Unit::microseconds, "microseconds" },
{ Histogram::Unit::bytes, "bytes" },
{ Histogram::Unit::bytes_per_second, "bytes_per_second" }
};
void Histogram::writeToLog() {
bool active = false;
for (uint32_t i = 0; i < 32; i++) {
@ -102,17 +114,19 @@ void Histogram::writeToLog() {
}
TraceEvent e(SevInfo, "Histogram");
e.detail("Group", group).detail("Op", op);
e.detail("Group", group).detail("Op", op).detail("Unit", UnitToStringMapper.at(unit));
for (uint32_t i = 0; i < 32; i++) {
uint32_t value = ((uint32_t)1) << (i + 1);
if (buckets[i]) {
switch (unit) {
case Unit::microseconds: {
uint32_t usec = ((uint32_t)1) << (i + 1);
e.detail(format("LessThan%u.%03u", usec / 1000, usec % 1000), buckets[i]);
case Unit::microseconds:
e.detail(format("LessThan%u.%03u", value / 1000, value % 1000), buckets[i]);
break;
}
case Unit::bytes:
e.detail(format("LessThan%u", ((uint32_t)1) << (i + 1)), buckets[i]);
case Unit::bytes_per_second:
e.detail(format("LessThan%u", value), buckets[i]);
break;
default:
ASSERT(false);
@ -121,6 +135,8 @@ void Histogram::writeToLog() {
}
}
#pragma endregion // Histogram
TEST_CASE("/flow/histogram/smoke_test") {
{
@ -168,4 +184,4 @@ TEST_CASE("/flow/histogram/smoke_test") {
GetHistogramRegistry().logReport();
return Void();
}
}

View File

@ -26,6 +26,7 @@
#include <string>
#include <map>
#include <unordered_map>
#ifdef _WIN32
#include <intrin.h>
@ -57,11 +58,16 @@ HistogramRegistry& GetHistogramRegistry();
*/
class Histogram sealed : public ReferenceCounted<Histogram> {
public:
enum class Unit { microseconds, bytes };
enum class Unit { microseconds, bytes, bytes_per_second };
private:
static const std::unordered_map<Unit, std::string> UnitToStringMapper;
Histogram(std::string group, std::string op, Unit unit, HistogramRegistry& registry)
: group(group), op(op), unit(unit), registry(registry), ReferenceCounted<Histogram>() {
ASSERT(UnitToStringMapper.find(unit) != UnitToStringMapper.end());
clear();
}

View File

@ -66,7 +66,7 @@ class UID {
public:
constexpr static FileIdentifier file_identifier = 15597147;
UID() { part[0] = part[1] = 0; }
UID( uint64_t a, uint64_t b ) { part[0]=a; part[1]=b; }
constexpr UID(uint64_t a, uint64_t b) : part{ a, b } {}
std::string toString() const;
std::string shortString() const;
bool isValid() const { return part[0] || part[1]; }
@ -130,6 +130,7 @@ public:
virtual int randomInt(int min, int maxPlusOne) = 0;
virtual int64_t randomInt64(int64_t min, int64_t maxPlusOne) = 0;
virtual uint32_t randomUInt32() = 0;
virtual uint64_t randomUInt64() = 0;
virtual UID randomUniqueID() = 0;
virtual char randomAlphaNumeric() = 0;
virtual std::string randomAlphaNumeric( int length ) = 0;
@ -174,4 +175,7 @@ Reference<IRandom> deterministicRandom();
// non-deterministic contexts.
Reference<IRandom> nondeterministicRandom();
// Populates a buffer with a random sequence of bytes
void generateRandomData(uint8_t* buffer, int length);
#endif

View File

@ -176,6 +176,7 @@ void FlowKnobs::initialize(bool randomize, bool isSimulated) {
init( TRACE_RETRY_OPEN_INTERVAL, 1.00 );
init( MIN_TRACE_SEVERITY, isSimulated ? 1 : 10 ); // Related to the trace severity in Trace.h
init( MAX_TRACE_SUPPRESSIONS, 1e4 );
init( TRACE_DATETIME_ENABLED, true ); // trace time in human readable format (always real time)
init( TRACE_SYNC_ENABLED, 0 );
init( TRACE_EVENT_METRIC_UNITS_PER_SAMPLE, 500 );
init( TRACE_EVENT_THROTTLER_SAMPLE_EXPIRY, 1800.0 ); // 30 mins

View File

@ -194,6 +194,7 @@ public:
double TRACE_RETRY_OPEN_INTERVAL;
int MIN_TRACE_SEVERITY;
int MAX_TRACE_SUPPRESSIONS;
bool TRACE_DATETIME_ENABLED;
int TRACE_SYNC_ENABLED;
int TRACE_EVENT_METRIC_UNITS_PER_SAMPLE;
int TRACE_EVENT_THROTTLER_SAMPLE_EXPIRY;

View File

@ -170,6 +170,9 @@ public:
virtual flowGlobalType global(int id) const override { return (globals.size() > id) ? globals[id] : nullptr; }
virtual void setGlobal(size_t id, flowGlobalType v) { globals.resize(std::max(globals.size(),id+1)); globals[id] = v; }
ProtocolVersion protocolVersion() override { return currentProtocolVersion; }
std::vector<flowGlobalType> globals;
virtual const TLSConfig& getTLSConfig() const override { return tlsConfig; }
@ -2028,7 +2031,7 @@ void net2_test() {
SendBuffer* pb = unsent.getWriteBuffer();
ReliablePacket* rp = new ReliablePacket; // 0
PacketWriter wr(pb,rp,AssumeVersion(currentProtocolVersion));
PacketWriter wr(pb,rp,AssumeVersion(g_network->protocolVersion()));
//BinaryWriter wr;
SplitBuffer packetLen;
uint32_t len = 0;

Some files were not shown because too many files have changed in this diff Show More