Merge remote-tracking branch 'upstream/master' into add-c-function-for-management-commands

This commit is contained in:
Chaoguang Lin 2020-12-21 12:32:31 -08:00
commit b68d84aaea
133 changed files with 9956 additions and 847 deletions

3
.gitignore vendored
View File

@ -95,3 +95,6 @@ flow/coveragetool/obj
.DS_Store
temp/
/versions.target
/compile_commands.json
/.ccls-cache
.clangd/

View File

@ -18,6 +18,7 @@
* limitations under the License.
*/
#include <cstdint>
#define FDB_API_VERSION 700
#define FDB_INCLUDE_LEGACY_TYPES
@ -226,6 +227,11 @@ fdb_error_t fdb_future_get_int64( FDBFuture* f, int64_t* out_value ) {
CATCH_AND_RETURN( *out_value = TSAV(int64_t, f)->get(); );
}
extern "C" DLLEXPORT
fdb_error_t fdb_future_get_uint64(FDBFuture *f, uint64_t *out) {
CATCH_AND_RETURN( *out = TSAV(uint64_t, f)->get(); );
}
extern "C" DLLEXPORT
fdb_error_t fdb_future_get_bool( FDBFuture* f, bool* out_value ) {
CATCH_AND_RETURN( *out_value = TSAV(bool, f)->get(); );
@ -608,6 +614,11 @@ FDBFuture* fdb_transaction_get_approximate_size(FDBTransaction* tr) {
return (FDBFuture*)TXN(tr)->getApproximateSize().extractPtr();
}
extern "C" DLLEXPORT
FDBFuture* fdb_get_server_protocol(const char* clusterFilePath){
return (FDBFuture*)( API->getServerProtocol(clusterFilePath ? clusterFilePath : "").extractPtr() );
}
extern "C" DLLEXPORT
FDBFuture* fdb_transaction_get_versionstamp( FDBTransaction* tr )
{

View File

@ -137,6 +137,9 @@ extern "C" {
DLLEXPORT WARN_UNUSED_RESULT fdb_error_t
fdb_future_get_int64( FDBFuture* f, int64_t* out );
DLLEXPORT WARN_UNUSED_RESULT fdb_error_t
fdb_future_get_uint64( FDBFuture* f, uint64_t* out );
DLLEXPORT WARN_UNUSED_RESULT fdb_error_t
fdb_future_get_bool( FDBFuture* f, bool* out );
@ -256,6 +259,9 @@ extern "C" {
DLLEXPORT WARN_UNUSED_RESULT FDBFuture*
fdb_transaction_get_approximate_size(FDBTransaction* tr);
DLLEXPORT WARN_UNUSED_RESULT FDBFuture*
fdb_get_server_protocol(const char* clusterFilePath);
DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_get_versionstamp( FDBTransaction* tr );
DLLEXPORT WARN_UNUSED_RESULT FDBFuture*

View File

@ -365,7 +365,7 @@ int run_op_get(FDBTransaction* transaction, char* keystr, char* valstr, int snap
return FDB_SUCCESS;
}
int run_op_getrange(FDBTransaction* transaction, char* keystr, char* keystr2, char* valstr, int snapshot, int reverse) {
int run_op_getrange(FDBTransaction* transaction, char* keystr, char* keystr2, char* valstr, int snapshot, int reverse, FDBStreamingMode streaming_mode) {
FDBFuture* f;
fdb_error_t err;
FDBKeyValue const* out_kv;
@ -374,7 +374,7 @@ int run_op_getrange(FDBTransaction* transaction, char* keystr, char* keystr2, ch
f = fdb_transaction_get_range(transaction, FDB_KEYSEL_FIRST_GREATER_OR_EQUAL((uint8_t*)keystr, strlen(keystr)),
FDB_KEYSEL_LAST_LESS_OR_EQUAL((uint8_t*)keystr2, strlen(keystr2)) + 1, 0 /* limit */,
0 /* target_bytes */, FDB_STREAMING_MODE_WANT_ALL /* FDBStreamingMode */,
0 /* target_bytes */, streaming_mode /* FDBStreamingMode */,
0 /* iteration */, snapshot, reverse /* reverse */);
fdb_wait_and_handle_error(fdb_transaction_get_range, f, transaction);
@ -488,13 +488,13 @@ retryTxn:
rc = run_op_get(transaction, keystr, valstr, 0);
break;
case OP_GETRANGE:
rc = run_op_getrange(transaction, keystr, keystr2, valstr, 0, args->txnspec.ops[i][OP_REVERSE]);
rc = run_op_getrange(transaction, keystr, keystr2, valstr, 0, args->txnspec.ops[i][OP_REVERSE], args->streaming_mode);
break;
case OP_SGET:
rc = run_op_get(transaction, keystr, valstr, 1);
break;
case OP_SGETRANGE:
rc = run_op_getrange(transaction, keystr, keystr2, valstr, 1, args->txnspec.ops[i][OP_REVERSE]);
rc = run_op_getrange(transaction, keystr, keystr2, valstr, 1, args->txnspec.ops[i][OP_REVERSE], args->streaming_mode);
break;
case OP_UPDATE:
randstr(valstr, args->value_length + 1);
@ -1233,6 +1233,7 @@ int init_args(mako_args_t* args) {
args->trace = 0;
args->tracepath[0] = '\0';
args->traceformat = 0; /* default to client's default (XML) */
args->streaming_mode = FDB_STREAMING_MODE_WANT_ALL;
args->txntrace = 0;
args->txntagging = 0;
memset(args->txntagging_prefix, 0, TAGPREFIXLENGTH_MAX);
@ -1397,6 +1398,7 @@ void usage() {
printf("%-24s %s\n", " --txntagging_prefix", "Specify the prefix of transaction tag - mako${txntagging_prefix} (Default: '')");
printf("%-24s %s\n", " --knobs=KNOBS", "Set client knobs");
printf("%-24s %s\n", " --flatbuffers", "Use flatbuffers");
printf("%-24s %s\n", " --streaming", "Streaming mode: all (default), iterator, small, medium, large, serial");
}
/* parse benchmark paramters */
@ -1428,6 +1430,7 @@ int parse_args(int argc, char* argv[], mako_args_t* args) {
{ "knobs", required_argument, NULL, ARG_KNOBS },
{ "tracepath", required_argument, NULL, ARG_TRACEPATH },
{ "trace_format", required_argument, NULL, ARG_TRACEFORMAT },
{ "streaming", required_argument, NULL, ARG_STREAMING_MODE },
{ "txntrace", required_argument, NULL, ARG_TXNTRACE },
/* no args */
{ "help", no_argument, NULL, 'h' },
@ -1547,7 +1550,25 @@ int parse_args(int argc, char* argv[], mako_args_t* args) {
args->traceformat = 0;
} else {
fprintf(stderr, "Error: Invalid trace_format %s\n", optarg);
exit(0);
return -1;
}
break;
case ARG_STREAMING_MODE:
if (strncmp(optarg, "all", 3) == 0) {
args->streaming_mode = FDB_STREAMING_MODE_WANT_ALL;
} else if (strncmp(optarg, "iterator", 8) == 0) {
args->streaming_mode = FDB_STREAMING_MODE_ITERATOR;
} else if (strncmp(optarg, "small", 5) == 0) {
args->streaming_mode = FDB_STREAMING_MODE_SMALL;
} else if (strncmp(optarg, "medium", 6) == 0) {
args->streaming_mode = FDB_STREAMING_MODE_MEDIUM;
} else if (strncmp(optarg, "large", 5) == 0) {
args->streaming_mode = FDB_STREAMING_MODE_LARGE;
} else if (strncmp(optarg, "serial", 6) == 0) {
args->streaming_mode = FDB_STREAMING_MODE_SERIAL;
} else {
fprintf(stderr, "Error: Invalid streaming mode %s\n", optarg);
return -1;
}
break;
case ARG_TXNTRACE:

View File

@ -77,7 +77,8 @@ enum Arguments {
ARG_TPSCHANGE,
ARG_TXNTRACE,
ARG_TXNTAGGING,
ARG_TXNTAGGINGPREFIX
ARG_TXNTAGGINGPREFIX,
ARG_STREAMING_MODE
};
enum TPSChangeTypes { TPS_SIN, TPS_SQUARE, TPS_PULSE };
@ -129,6 +130,7 @@ typedef struct {
int txntrace;
int txntagging;
char txntagging_prefix[TAGPREFIXLENGTH_MAX];
FDBStreamingMode streaming_mode;
} mako_args_t;
/* shared memory */

View File

@ -39,7 +39,7 @@
#pragma once
#define FDB_API_VERSION 620
#define FDB_API_VERSION 700
#include <foundationdb/fdb_c.h>
#include <string>
@ -99,7 +99,6 @@ class Int64Future : public Future {
Int64Future(FDBFuture* f) : Future(f) {}
};
class KeyFuture : public Future {
public:
// Call this function instead of fdb_future_get_key when using the KeyFuture

View File

@ -20,7 +20,7 @@
// Unit tests for API setup, network initialization functions from the FDB C API.
#define FDB_API_VERSION 620
#define FDB_API_VERSION 700
#include <foundationdb/fdb_c.h>
#include <iostream>
#include <thread>
@ -42,13 +42,13 @@ TEST_CASE("setup") {
CHECK(err);
// Select current API version
fdb_check(fdb_select_api_version(620));
fdb_check(fdb_select_api_version(700));
// Error to call again after a successful return
err = fdb_select_api_version(620);
err = fdb_select_api_version(700);
CHECK(err);
CHECK(fdb_get_max_api_version() >= 620);
CHECK(fdb_get_max_api_version() >= 700);
fdb_check(fdb_setup_network());
// Calling a second time should fail

View File

@ -20,7 +20,7 @@
// Unit tests for the FoundationDB C API.
#define FDB_API_VERSION 620
#define FDB_API_VERSION 700
#include <foundationdb/fdb_c.h>
#include <assert.h>
#include <string.h>
@ -56,6 +56,7 @@ FDBDatabase *fdb_open_database(const char *clusterFile) {
static FDBDatabase *db = nullptr;
static std::string prefix;
static std::string clusterFilePath = "";
std::string key(const std::string& key) {
return prefix + key;
@ -1538,6 +1539,15 @@ TEST_CASE("fdb_transaction_get_approximate_size") {
}
}
TEST_CASE("fdb_get_server_protocol") {
FDBFuture* protocolFuture = fdb_get_server_protocol(clusterFilePath.c_str());
uint64_t out;
fdb_check(fdb_future_block_until_ready(protocolFuture));
fdb_check(fdb_future_get_uint64(protocolFuture, &out));
fdb_future_destroy(protocolFuture);
}
TEST_CASE("fdb_transaction_watch read_your_writes_disable") {
// Watches created on a transaction with the option READ_YOUR_WRITES_DISABLE
// should return a watches_disabled error.
@ -1744,10 +1754,21 @@ TEST_CASE("fdb_transaction_add_conflict_range") {
CHECK(success);
}
std::string get_valid_status_json() {
TEST_CASE("special-key-space valid transaction ID") {
auto value = get_value("\xff\xff/tracing/a/transaction_id", /* snapshot */ false, {});
REQUIRE(value.has_value());
uint64_t transaction_id = std::stoul(value.value());
CHECK(transaction_id > 0);
}
TEST_CASE("special-key-space custom transaction ID") {
fdb::Transaction tr(db);
fdb_check(tr.set_option(FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES,
nullptr, 0));
while (1) {
fdb::ValueFuture f1 = tr.get("\xff\xff/status/json", false);
tr.set("\xff\xff/tracing/a/transaction_id", std::to_string(ULONG_MAX));
fdb::ValueFuture f1 = tr.get("\xff\xff/tracing/a/transaction_id",
/* snapshot */ false);
fdb_error_t err = wait_future(f1);
if (err) {
@ -1761,6 +1782,207 @@ std::string get_valid_status_json() {
int vallen;
fdb_check(f1.get(&out_present, (const uint8_t **)&val, &vallen));
REQUIRE(out_present);
uint64_t transaction_id = std::stoul(std::string(val, vallen));
CHECK(transaction_id == ULONG_MAX);
break;
}
}
TEST_CASE("special-key-space set transaction ID after write") {
fdb::Transaction tr(db);
fdb_check(tr.set_option(FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES,
nullptr, 0));
while (1) {
tr.set(key("foo"), "bar");
tr.set("\xff\xff/tracing/a/transaction_id", "0");
fdb::ValueFuture f1 = tr.get("\xff\xff/tracing/a/transaction_id",
/* snapshot */ false);
fdb_error_t err = wait_future(f1);
if (err) {
fdb::EmptyFuture f2 = tr.on_error(err);
fdb_check(wait_future(f2));
continue;
}
int out_present;
char *val;
int vallen;
fdb_check(f1.get(&out_present, (const uint8_t **)&val, &vallen));
REQUIRE(out_present);
uint64_t transaction_id = std::stoul(std::string(val, vallen));
CHECK(transaction_id != 0);
break;
}
}
TEST_CASE("special-key-space set token after write") {
fdb::Transaction tr(db);
fdb_check(tr.set_option(FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES,
nullptr, 0));
while (1) {
tr.set(key("foo"), "bar");
tr.set("\xff\xff/tracing/a/token", "false");
fdb::ValueFuture f1 = tr.get("\xff\xff/tracing/a/token",
/* snapshot */ false);
fdb_error_t err = wait_future(f1);
if (err) {
fdb::EmptyFuture f2 = tr.on_error(err);
fdb_check(wait_future(f2));
continue;
}
int out_present;
char *val;
int vallen;
fdb_check(f1.get(&out_present, (const uint8_t **)&val, &vallen));
REQUIRE(out_present);
uint64_t token = std::stoul(std::string(val, vallen));
CHECK(token != 0);
break;
}
}
TEST_CASE("special-key-space valid token") {
auto value = get_value("\xff\xff/tracing/a/token", /* snapshot */ false, {});
REQUIRE(value.has_value());
uint64_t token = std::stoul(value.value());
CHECK(token > 0);
}
TEST_CASE("special-key-space disable tracing") {
fdb::Transaction tr(db);
fdb_check(tr.set_option(FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES,
nullptr, 0));
while (1) {
tr.set("\xff\xff/tracing/a/token", "false");
fdb::ValueFuture f1 = tr.get("\xff\xff/tracing/a/token",
/* snapshot */ false);
fdb_error_t err = wait_future(f1);
if (err) {
fdb::EmptyFuture f2 = tr.on_error(err);
fdb_check(wait_future(f2));
continue;
}
int out_present;
char *val;
int vallen;
fdb_check(f1.get(&out_present, (const uint8_t **)&val, &vallen));
REQUIRE(out_present);
uint64_t token = std::stoul(std::string(val, vallen));
CHECK(token == 0);
break;
}
}
TEST_CASE("FDB_DB_OPTION_TRANSACTION_TRACE_DISABLE") {
fdb_check(fdb_database_set_option(db, FDB_DB_OPTION_TRANSACTION_TRACE_DISABLE, nullptr, 0));
auto value = get_value("\xff\xff/tracing/a/token", /* snapshot */ false, {});
REQUIRE(value.has_value());
uint64_t token = std::stoul(value.value());
CHECK(token == 0);
fdb_check(fdb_database_set_option(db, FDB_DB_OPTION_TRANSACTION_TRACE_ENABLE, nullptr, 0));
}
TEST_CASE("FDB_DB_OPTION_TRANSACTION_TRACE_DISABLE enable tracing for transaction") {
fdb_check(fdb_database_set_option(db, FDB_DB_OPTION_TRANSACTION_TRACE_DISABLE, nullptr, 0));
fdb::Transaction tr(db);
fdb_check(tr.set_option(FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES,
nullptr, 0));
while (1) {
tr.set("\xff\xff/tracing/a/token", "true");
fdb::ValueFuture f1 = tr.get("\xff\xff/tracing/a/token",
/* snapshot */ false);
fdb_error_t err = wait_future(f1);
if (err) {
fdb::EmptyFuture f2 = tr.on_error(err);
fdb_check(wait_future(f2));
continue;
}
int out_present;
char *val;
int vallen;
fdb_check(f1.get(&out_present, (const uint8_t **)&val, &vallen));
REQUIRE(out_present);
uint64_t token = std::stoul(std::string(val, vallen));
CHECK(token > 0);
break;
}
fdb_check(fdb_database_set_option(db, FDB_DB_OPTION_TRANSACTION_TRACE_ENABLE, nullptr, 0));
}
TEST_CASE("special-key-space tracing get range") {
std::string tracingBegin = "\xff\xff/tracing/a/";
std::string tracingEnd = "\xff\xff/tracing/a0";
fdb::Transaction tr(db);
fdb_check(tr.set_option(FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES,
nullptr, 0));
while (1) {
fdb::KeyValueArrayFuture f1 = tr.get_range(
FDB_KEYSEL_FIRST_GREATER_OR_EQUAL(
(const uint8_t *)tracingBegin.c_str(),
tracingBegin.size()
),
FDB_KEYSEL_LAST_LESS_OR_EQUAL(
(const uint8_t *)tracingEnd.c_str(),
tracingEnd.size()
) + 1, /* limit */ 0, /* target_bytes */ 0,
/* FDBStreamingMode */ FDB_STREAMING_MODE_WANT_ALL, /* iteration */ 0,
/* snapshot */ false, /* reverse */ 0);
fdb_error_t err = wait_future(f1);
if (err) {
fdb::EmptyFuture f2 = tr.on_error(err);
fdb_check(wait_future(f2));
continue;
}
FDBKeyValue const *out_kv;
int out_count;
int out_more;
fdb_check(f1.get(&out_kv, &out_count, &out_more));
CHECK(!out_more);
CHECK(out_count == 2);
CHECK(std::string((char *)out_kv[0].key, out_kv[0].key_length) == tracingBegin + "token");
CHECK(std::stoul(std::string((char *)out_kv[0].value, out_kv[0].value_length)) > 0);
CHECK(std::string((char *)out_kv[1].key, out_kv[1].key_length) == tracingBegin + "transaction_id");
CHECK(std::stoul(std::string((char *)out_kv[1].value, out_kv[1].value_length)) > 0);
break;
}
}
std::string get_valid_status_json() {
fdb::Transaction tr(db);
while (1) {
fdb::ValueFuture f1 = tr.get("\xff\xff/status/json", false);
fdb_error_t err = wait_future(f1);
if (err) {
fdb::EmptyFuture f2 = tr.on_error(err);
fdb_check(wait_future(f2));
continue;
}
int out_present;
char *val;
int vallen;
fdb_check(f1.get(&out_present, (const uint8_t **)&val, &vallen));
assert(out_present);
std::string statusJsonStr(val, vallen);
rapidjson::Document statusJson;
@ -1847,10 +2069,7 @@ TEST_CASE("fdb_error_predicate") {
CHECK(!fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE_NOT_COMMITTED, 1040)); // proxy_memory_limit_exceeded
}
// Feature not live yet, re-enable when checking if a blocking call is made
// from the network thread is live.
TEST_CASE("block_from_callback"
* doctest::skip(true)) {
TEST_CASE("block_from_callback") {
fdb::Transaction tr(db);
fdb::ValueFuture f1 = tr.get("foo", /*snapshot*/ true);
struct Context {
@ -1865,7 +2084,7 @@ TEST_CASE("block_from_callback"
fdb::ValueFuture f2 = context->tr->get("bar", /*snapshot*/ true);
fdb_error_t error = f2.block_until_ready();
if (error) {
CHECK(error == /*blocked_from_network_thread*/ 2025);
CHECK(error == /*blocked_from_network_thread*/ 2026);
}
context->event.set();
},
@ -1883,11 +2102,12 @@ int main(int argc, char **argv) {
doctest::Context context;
fdb_check(fdb_select_api_version(620));
fdb_check(fdb_select_api_version(700));
fdb_check(fdb_setup_network());
std::thread network_thread{ &fdb_run_network };
db = fdb_open_database(argv[1]);
clusterFilePath = std::string(argv[1]);
prefix = argv[2];
int res = context.run();
fdb_database_destroy(db);

View File

@ -95,6 +95,7 @@ def api_version(ver):
'transactional',
'options',
'StreamingMode',
'get_server_protocol'
)
_add_symbols(fdb.impl, list)

View File

@ -733,12 +733,18 @@ class FutureInt64(Future):
self.capi.fdb_future_get_int64(self.fpointer, ctypes.byref(value))
return value.value
class FutureUInt64(Future):
def wait(self):
self.block_until_ready()
value = ctypes.c_uint64()
self.capi.fdb_future_get_uint64(self.fpointer, ctypes.byref(value))
return value.value
class FutureBool(Future):
def wait(self):
self.block_until_ready()
value = ctypes.c_bool()
self.capi.fdb_future_get_bool(self.fpointer, ctypes.byref(value))
return value.value
class FutureKeyValueArray(Future):
def wait(self):
@ -1428,9 +1434,15 @@ def init_c_api():
_capi.fdb_future_get_int64.restype = ctypes.c_int
_capi.fdb_future_get_int64.errcheck = check_error_code
<<<<<<< HEAD
_capi.fdb_future_get_bool.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.c_bool)]
_capi.fdb_future_get_bool.restype = ctypes.c_bool
_capi.fdb_future_get_bool.errcheck = check_error_code
=======
_capi.fdb_future_get_uint64.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.c_uint64)]
_capi.fdb_future_get_uint64.restype = ctypes.c_uint
_capi.fdb_future_get_uint64.errcheck = check_error_code
>>>>>>> upstream/master
_capi.fdb_future_get_key.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.POINTER(ctypes.c_byte)),
ctypes.POINTER(ctypes.c_int)]
@ -1539,6 +1551,9 @@ def init_c_api():
_capi.fdb_transaction_get_approximate_size.argtypes = [ctypes.c_void_p]
_capi.fdb_transaction_get_approximate_size.restype = ctypes.c_void_p
_capi.fdb_get_server_protocol.argtypes = [ctypes.c_char_p]
_capi.fdb_get_server_protocol.restype = ctypes.c_void_p
_capi.fdb_transaction_get_versionstamp.argtypes = [ctypes.c_void_p]
_capi.fdb_transaction_get_versionstamp.restype = ctypes.c_void_p
@ -1738,6 +1753,12 @@ open_databases = {}
cacheLock = threading.Lock()
def get_server_protocol(clusterFilePath=None):
with _network_thread_reentrant_lock:
if not _network_thread:
init()
return FutureUInt64(_capi.fdb_get_server_protocol(optionalParamToBytes(clusterFilePath)[0]))
def open(cluster_file=None, event_model=None):
"""Opens the given database (or the default database of the cluster indicated

View File

@ -4,7 +4,8 @@ RUN yum install -y centos-release-scl scl-utils
RUN rpmkeys --import "http://pool.sks-keyservers.net/pks/lookup?op=get&search=0x3fa7e0328081bff6a14da29aa6a19b38d3d831ef"
RUN curl https://download.mono-project.com/repo/centos7-stable.repo | tee /etc/yum.repos.d/mono-centos7-stable.repo
RUN yum install -y curl rpm-build wget git unzip devtoolset-8 devtoolset-8-libubsan-devel devtoolset-8-valgrind-devel \
rh-ruby26 go-toolset-7 rh-git218 rh-python36-devel java-11-openjdk-devel.x86_64 mono-devel dos2unix dpkg rh-python36
rh-ruby26 go-toolset-7 rh-git218 rh-python36-devel java-11-openjdk-devel.x86_64 mono-devel dos2unix dpkg rh-python36 \
lz4 lz4-devel lz4-static
# install Ninja
RUN cd /tmp && curl -L https://github.com/ninja-build/ninja/archive/v1.9.0.zip -o ninja.zip &&\

View File

@ -1,24 +1,12 @@
if ((NOT GENERATE_EL6) AND (NOT "$ENV{GENERATE_EL6}" STREQUAL ""))
if (("$ENV{GENERATE_EL6}" STREQUAL "ON") OR ("$ENV{GENERATE_EL6}" STREQUAL "1") OR ("$ENV{GENERATE_EL6}" STREQUAL "YES"))
set(GENERATE_EL6 ON)
endif()
endif()
# RPM specifics
if(CPACK_GENERATOR MATCHES "RPM")
set(CPACK_PACKAGING_INSTALL_PREFIX "/")
if(GENERATE_EL6)
message(STATUS "Building EL6 components")
set(CPACK_COMPONENTS_ALL clients-el6 server-el6)
else()
message(STATUS "Building EL7 components")
set(CPACK_COMPONENTS_ALL clients-el7 server-el7)
endif()
set(CPACK_COMPONENTS_ALL clients-el7 server-el7 clients-versioned server-versioned)
set(CPACK_RESOURCE_FILE_README ${CMAKE_SOURCE_DIR}/README.md)
set(CPACK_RESOURCE_FILE_LICENSE ${CMAKE_SOURCE_DIR}/LICENSE)
elseif(CPACK_GENERATOR MATCHES "DEB")
set(CPACK_PACKAGING_INSTALL_PREFIX "/")
set(CPACK_COMPONENTS_ALL clients-deb server-deb)
set(CPACK_COMPONENTS_ALL clients-deb server-deb clients-versioned server-versioned)
set(CPACK_RESOURCE_FILE_README ${CMAKE_SOURCE_DIR}/README.md)
set(CPACK_RESOURCE_FILE_LICENSE ${CMAKE_SOURCE_DIR}/LICENSE)
elseif(CPACK_GENERATOR MATCHES "productbuild")

View File

@ -5,7 +5,6 @@ env_set(USE_DTRACE ON BOOL "Enable dtrace probes on supported platforms")
env_set(USE_VALGRIND OFF BOOL "Compile for valgrind usage")
env_set(USE_VALGRIND_FOR_CTEST ${USE_VALGRIND} BOOL "Use valgrind for ctest")
env_set(ALLOC_INSTRUMENTATION OFF BOOL "Instrument alloc")
env_set(WITH_UNDODB OFF BOOL "Use rr or undodb")
env_set(USE_ASAN OFF BOOL "Compile with address sanitizer")
env_set(USE_GCOV OFF BOOL "Compile with gcov instrumentation")
env_set(USE_MSAN OFF BOOL "Compile with memory sanitizer. To avoid false positives you need to dynamically link to a msan-instrumented libc++ and libc++abi, which you must compile separately. See https://github.com/google/sanitizers/wiki/MemorySanitizerLibcxxHowTo#instrumented-libc.")
@ -47,35 +46,9 @@ add_compile_definitions(BOOST_ERROR_CODE_HEADER_ONLY BOOST_SYSTEM_NO_DEPRECATED)
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
if(ALLOC_INSTRUMENTATION)
add_compile_options(-DALLOC_INSTRUMENTATION)
endif()
if(WITH_UNDODB)
add_compile_options(-DWITH_UNDODB)
endif()
if(DEBUG_TASKS)
add_compile_options(-DDEBUG_TASKS)
endif()
if(NDEBUG)
add_compile_options(-DNDEBUG)
endif()
if(FDB_RELEASE)
add_compile_options(-DFDB_RELEASE)
add_compile_options(-DFDB_CLEAN_BUILD)
endif()
include_directories(${CMAKE_SOURCE_DIR})
include_directories(${CMAKE_BINARY_DIR})
if (NOT OPEN_FOR_IDE)
add_definitions(-DNO_INTELLISENSE)
endif()
if(WIN32)
add_definitions(-DUSE_USEFIBERS)
else()
add_definitions(-DUSE_UCONTEXT)
endif()
if (USE_CCACHE)
FIND_PROGRAM(CCACHE_FOUND "ccache")
@ -95,6 +68,10 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED ON)
if(NOT OPEN_FOR_IDE)
add_compile_definitions(NO_INTELLISENSE)
endif()
if(NOT WIN32)
include(CheckIncludeFile)
CHECK_INCLUDE_FILE("stdatomic.h" HAS_C11_ATOMICS)
@ -112,7 +89,6 @@ if(WIN32)
string(REGEX REPLACE "/W[0-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
endif()
add_compile_options(/W0 /EHsc /bigobj $<$<CONFIG:Release>:/Zi> /MP /FC /Gm-)
add_compile_definitions(_WIN32_WINNT=${WINDOWS_TARGET} WINVER=${WINDOWS_TARGET} NTDDI_VERSION=0x05020000 BOOST_ALL_NO_LIB)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd")
else()
@ -166,11 +142,7 @@ else()
# and create a debuginfo rpm
add_compile_options(-ggdb -fno-omit-frame-pointer)
if(USE_ASAN)
add_compile_options(
-fsanitize=address
-DUSE_SANITIZER
-DADDRESS_SANITIZER
)
add_compile_options(-fsanitize=address)
add_link_options(-fsanitize=address)
endif()
@ -180,15 +152,11 @@ else()
endif()
add_compile_options(
-fsanitize=memory
-fsanitize-memory-track-origins=2
-DUSE_SANITIZER
-DMEMORY_SANITIZER
)
-fsanitize-memory-track-origins=2)
add_link_options(-fsanitize=memory)
endif()
if(USE_GCOV)
add_compile_options(--coverage -DUSE_GCOV)
add_link_options(--coverage)
endif()
@ -196,20 +164,13 @@ else()
add_compile_options(
-fsanitize=undefined
# TODO(atn34) Re-enable -fsanitize=alignment once https://github.com/apple/foundationdb/issues/1434 is resolved
-fno-sanitize=alignment
-DUSE_SANITIZER
-DUNDEFINED_BEHAVIOR_SANITIZER
)
-fno-sanitize=alignment)
add_link_options(-fsanitize=undefined)
endif()
if(USE_TSAN)
add_compile_options(
-fsanitize=thread
-DUSE_SANITIZER
-DTHREAD_SANITIZER
-DDYNAMIC_ANNOTATIONS_EXTERNAL_IMPL=1
)
-fsanitize=thread)
add_link_options(-fsanitize=thread)
endif()
@ -269,9 +230,6 @@ else()
# for more information.
#add_compile_options(-fno-builtin-memcpy)
if (USE_VALGRIND)
add_compile_options(-DVALGRIND=1 -DUSE_VALGRIND=1)
endif()
if (CLANG)
add_compile_options()
# Clang has link errors unless `atomic` is specifically requested.
@ -280,7 +238,6 @@ else()
endif()
if (APPLE OR USE_LIBCXX)
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-stdlib=libc++>)
add_compile_definitions(WITH_LIBCXX)
if (NOT APPLE)
if (STATIC_LINK_LIBCXX)
add_link_options(-static-libgcc -nostdlib++ -Wl,-Bstatic -lc++ -lc++abi -Wl,-Bdynamic)
@ -366,10 +323,7 @@ else()
check_symbol_exists(aligned_alloc stdlib.h HAS_ALIGNED_ALLOC)
message(STATUS "Has aligned_alloc: ${HAS_ALIGNED_ALLOC}")
if((SUPPORT_DTRACE) AND (USE_DTRACE))
add_compile_definitions(DTRACE_PROBES)
endif()
if(HAS_ALIGNED_ALLOC)
add_compile_definitions(HAS_ALIGNED_ALLOC)
set(DTRACE_PROBES 1)
endif()
if(CMAKE_COMPILER_IS_GNUCXX)

260
cmake/FDBInstall.cmake Normal file
View File

@ -0,0 +1,260 @@
function(fdb_install_packages)
set(FDB_INSTALL_PACKAGES ${ARGV} PARENT_SCOPE)
endfunction()
function(fdb_install_dirs)
set(FDB_INSTALL_DIRS ${ARGV} PARENT_SCOPE)
endfunction()
function(install_symlink_impl)
if (NOT WIN32)
return()
endif()
set(options "")
set(one_value_options TO DESTINATION)
set(multi_value_options COMPONENTS)
cmake_parse_arguments(SYM "${options}" "${one_value_options}" "${multi_value_options}" "${ARGN}")
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/symlinks)
get_filename_component(fname ${SYM_DESTINATION} NAME)
get_filename_component(dest_dir ${SYM_DESTINATION} DIRECTORY)
set(sl ${CMAKE_CURRENT_BINARY_DIR}/symlinks/${fname})
execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ${SYM_TO} ${sl})
foreach(component IN LISTS SYM_COMPONENTS)
install(FILES ${sl} DESTINATION ${dest_dir} COMPONENT ${component})
endforeach()
endfunction()
function(install_symlink)
if(NOT WIN32 AND NOT OPEN_FOR_IDE)
return()
endif()
set(options "")
set(one_value_options COMPONENT LINK_DIR FILE_DIR LINK_NAME FILE_NAME)
set(multi_value_options "")
cmake_parse_arguments(IN "${options}" "${one_value_options}" "${multi_value_options}" "${ARGN}")
set(rel_path "")
string(REGEX MATCHALL "\\/" slashes "${IN_LINK_NAME}")
foreach(ignored IN LISTS slashes)
set(rel_path "../${rel_path}")
endforeach()
if("${IN_FILE_DIR}" MATCHES "bin")
if("${IN_LINK_DIR}" MATCHES "lib")
install_symlink_impl(
TO "../${rel_path}bin/${IN_FILE_NAME}"
DESTINATION "lib/${IN_LINK_NAME}"
COMPONENTS "${IN_COMPONENT}-tgz")
install_symlink_impl(
TO "../${rel_path}bin/${IN_FILE_NAME}"
DESTINATION "usr/lib64/${IN_LINK_NAME}"
COMPONENTS
"${IN_COMPONENT}-el7"
"${IN_COMPONENT}-deb")
install_symlink_impl(
TO "../${rel_path}bin/${IN_FILE_NAME}"
DESTINATION "usr/lib64/${IN_LINK_NAME}"
COMPONENTS "${IN_COMPONENT}-deb")
elseif("${IN_LINK_DIR}" MATCHES "bin")
install_symlink_impl(
TO "../${rel_path}bin/${IN_FILE_NAME}"
DESTINATION "bin/${IN_LINK_NAME}"
COMPONENTS "${IN_COMPONENT}-tgz")
install_symlink_impl(
TO "../${rel_path}bin/${IN_FILE_NAME}"
DESTINATION "usr/bin/${IN_LINK_NAME}"
COMPONENTS
"${IN_COMPONENT}-el7"
"${IN_COMPONENT}-deb")
elseif("${IN_LINK_DIR}" MATCHES "fdbmonitor")
install_symlink_impl(
TO "../../${rel_path}bin/${IN_FILE_NAME}"
DESTINATION "lib/foundationdb/${IN_LINK_NAME}"
COMPONENTS "${IN_COMPONENT}-tgz")
install_symlink_impl(
TO "../../${rel_path}bin/${IN_FILE_NAME}"
DESTINATION "usr/lib/foundationdb/${IN_LINK_NAME}"
COMPONENTS
"${IN_COMPONENT}-el7"
"${IN_COMPONENT}-deb")
else()
message(FATAL_ERROR "Unknown LINK_DIR ${IN_LINK_DIR}")
endif()
else()
message(FATAL_ERROR "Unknown FILE_DIR ${IN_FILE_DIR}")
endif()
endfunction()
function(symlink_files)
if (NOT WIN32)
set(options "")
set(one_value_options LOCATION SOURCE)
set(multi_value_options TARGETS)
cmake_parse_arguments(SYM "${options}" "${one_value_options}" "${multi_value_options}" "${ARGN}")
file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/${SYM_LOCATION})
foreach(component IN LISTS SYM_TARGETS)
execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ${SYM_SOURCE} ${CMAKE_BINARY_DIR}/${SYM_LOCATION}/${component} WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${SYM_LOCATION})
endforeach()
endif()
endfunction()
function(pop_front)
if(ARGC LESS 2)
message(FATAL_ERROR "USAGE: pop_front(<list> <out-var> [<count>])")
endif()
set(count ${ARGV2})
if(NOT count)
set(count 1)
endif()
set(result)
foreach(elem IN LISTS ${ARGV0})
if(count GREATER 0)
math(EXPR count "${count} - 1")
else()
list(APPEND result ${elem})
endif()
endforeach()
set(${ARGV1} ${result} PARENT_SCOPE)
endfunction()
function(install_destinations)
if(NOT ARGV0)
message(FATAL_ERROR "No package passed")
endif()
set(package ${ARGV0})
set(REST_ARGS ${ARGV})
pop_front(REST_ARGS REST_ARGS)
list(FIND FDB_INSTALL_PACKAGES ${package} idx)
if(idx LESS 0)
message(FATAL_ERROR "Package ${package} does not exist")
endif()
cmake_parse_arguments(MY "" "${FDB_INSTALL_DIRS}" "" ${REST_ARGS})
foreach(dir IN LISTS FDB_INSTALL_DIRS)
if(MY_${dir})
set(var ${MY_${dir}})
set(__install_dest_${package}_${dir} ${MY_${dir}} PARENT_SCOPE)
endif()
endforeach()
endfunction()
function(get_install_dest)
if(ARGC LESS 3)
message(FATAL_ERROR "USAGE: get_install_dest(<pkg> <dir> <out-var> [<var-name>])")
endif()
set(package ${ARGV0})
set(dir ${ARGV1})
set(out ${ARGV2})
set(${out} ${__install_dest_${package}_${dir}} PARENT_SCOPE)
if(ARGV3)
set(${ARGV3} "__install_dest_${package}_${dir}")
endif()
endfunction()
function(print_install_destinations)
foreach(pkg IN LISTS FDB_INSTALL_PACKAGES)
message(STATUS "Destinations for ${pkg}")
set(old_indent ${CMAKE_MESSAGE_INDENT})
set(CMAKE_MESSAGE_INDENT "${CMAKE_MESSAGE_INDENT} ")
foreach(dir IN LISTS FDB_INSTALL_DIRS)
get_install_dest(${pkg} ${dir} d)
message(STATUS "${dir} -> ${d}")
endforeach()
set(CMAKE_MESSAGE_INDENT ${old_indent})
endforeach()
endfunction()
function(get_install_var)
if(NOT ARGC EQUAL 3)
message(FATAL_ERROR "USAGE: get_install_var(<pkg> <dir> <out-var>)")
endif()
set(${ARGV2} "__install_dest_${ARGV0}_${ARGV1}" PARENT_SCOPE)
endfunction()
function(copy_install_destinations)
if(ARGC LESS 2)
message(FATAL_ERROR "USAGE: copy_install_destinations(<from> <to> [PREFIX prefix])")
endif()
set(from ${ARGV0})
set(to ${ARGV1})
set(REST_ARGS ${ARGV})
pop_front(REST_ARGS REST_ARGS 2)
cmake_parse_arguments(MY "" "PREFIX" "" ${REST_ARGS})
foreach(dir IN LISTS FDB_INSTALL_DIRS)
get_install_dest(${from} ${dir} d)
get_install_var(${to} ${dir} name)
if(MY_PREFIX)
set(d "${MY_PREFIX}${d}")
endif()
set(${name} ${d} PARENT_SCOPE)
endforeach()
endfunction()
function(fdb_configure_and_install)
if(NOT WIN32 AND NOT OPEN_FOR_IDE)
set(one_value_options COMPONENT DESTINATION FILE DESTINATION_SUFFIX)
cmake_parse_arguments(IN "${options}" "${one_value_options}" "${multi_value_options}" "${ARGN}")
foreach(pkg IN LISTS FDB_INSTALL_PACKAGES)
string(TOLOWER "${pkg}" package)
string(TOUPPER "${IN_DESTINATION}" destination)
get_install_dest(${pkg} INCLUDE INCLUDE_DIR)
get_install_dest(${pkg} INCLUDE LIB_DIR)
get_install_dest(${pkg} ${destination} install_path)
string(REGEX REPLACE "\.in$" "" name "${IN_FILE}")
get_filename_component(name "${name}" NAME)
set(generated_file_name "${generated_dir}/${package}/${name}")
configure_file("${IN_FILE}" "${generated_file_name}" @ONLY)
install(
FILES "${generated_file_name}"
DESTINATION "${install_path}${IN_DESTINATION_SUFFIX}"
COMPONENT "${IN_COMPONENT}-${package}")
endforeach()
endif()
endfunction()
function(fdb_install)
if(NOT WIN32 AND NOT OPEN_FOR_IDE)
set(one_value_options COMPONENT DESTINATION EXPORT DESTINATION_SUFFIX)
set(multi_value_options TARGETS FILES PROGRAMS DIRECTORY)
cmake_parse_arguments(IN "${options}" "${one_value_options}" "${multi_value_options}" "${ARGN}")
set(install_export 0)
if(IN_TARGETS)
set(args TARGETS ${IN_TARGETS})
elseif(IN_FILES)
set(args FILES ${IN_FILES})
elseif(IN_PROGRAMS)
set(args PROGRAMS ${IN_PROGRAMS})
elseif(IN_DIRECTORY)
set(args DIRECTORY ${IN_DIRECTORY})
elseif(IN_EXPORT)
set(install_export 1)
else()
message(FATAL_ERROR "Expected FILES, PROGRAMS, DIRECTORY, or TARGETS")
endif()
string(TOUPPER "${IN_DESTINATION}" destination)
foreach(pkg IN LISTS FDB_INSTALL_PACKAGES)
get_install_dest(${pkg} ${destination} install_path)
string(TOLOWER "${pkg}" package)
if(install_export)
install(
EXPORT "${IN_EXPORT}-${package}"
DESTINATION "${install_path}${IN_DESTINATION_SUFFIX}"
FILE "${IN_EXPORT}.cmake"
COMPONENT "${IN_COMPONENT}-${package}")
else()
set(export_args "")
if (IN_EXPORT)
set(export_args EXPORT "${IN_EXPORT}-${package}")
endif()
if(NOT ${install_path} STREQUAL "")
install(
${args}
${export_args}
DESTINATION "${install_path}${IN_DESTINATION_SUFFIX}"
COMPONENT "${IN_COMPONENT}-${package}")
endif()
endif()
endforeach()
endif()
endfunction()

View File

@ -1,6 +1,4 @@
################################################################################
# Helper Functions
################################################################################
include(FDBInstall)
function(install_symlink_impl)
if (NOT WIN32)
@ -147,72 +145,45 @@ set(install_destination_for_data_deb "var/lib/foundationdb/data")
set(install_destination_for_data_el6 "var/lib/foundationdb/data")
set(install_destination_for_data_el7 "var/lib/foundationdb/data")
set(install_destination_for_data_pm "usr/local/foundationdb/data")
fdb_install_packages(TGZ DEB EL7 PM VERSIONED)
fdb_install_dirs(BIN SBIN LIB FDBMONITOR INCLUDE ETC LOG DATA)
message(STATUS "FDB_INSTALL_DIRS -> ${FDB_INSTALL_DIRS}")
# 'map' from (destination, package) to path
# format vars like install_destination_for_${destination}_${package}
install_destinations(TGZ
BIN bin
SBIN sbin
LIB lib
FDBMONITOR sbin
INCLUDE include
ETC etc/foundationdb
LOG log/foundationdb
DATA lib/foundationdb)
copy_install_destinations(TGZ VERSIONED PREFIX "usr/lib/foundationdb-${PROJECT_VERSION}/")
install_destinations(DEB
BIN usr/bin
SBIN usr/sbin
LIB usr/lib
FDBMONITOR usr/lib/foundationdb
INCLUDE usr/include
ETC etc/foundationdb
LOG var/log/foundationdb
DATA var/lib/foundationdb)
copy_install_destinations(DEB EL7)
install_destinations(EL7 LIB usr/lib64)
install_destinations(PM
BIN usr/local/bin
SBIN usr/local/sbin
LIB lib
FDBMONITOR usr/local/libexec
INCLUDE usr/local/include
ETC usr/local/etc/foundationdb)
# This can be used for debugging in case above is behaving funky
#print_install_destinations()
set(generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated")
function(fdb_configure_and_install)
if(NOT WIN32 AND NOT OPEN_FOR_IDE)
set(one_value_options COMPONENT DESTINATION FILE DESTINATION_SUFFIX)
cmake_parse_arguments(IN "${options}" "${one_value_options}" "${multi_value_options}" "${ARGN}")
foreach(package tgz deb el6 el7 pm)
set(INCLUDE_DIR "${install_destination_for_include_${package}}")
set(LIB_DIR "${install_destination_for_lib_${package}}")
set(install_path "${install_destination_for_${IN_DESTINATION}_${package}}")
string(REGEX REPLACE "\.in$" "" name "${IN_FILE}")
get_filename_component(name "${name}" NAME)
set(generated_file_name "${generated_dir}/${package}/${name}")
configure_file("${IN_FILE}" "${generated_file_name}" @ONLY)
install(
FILES "${generated_file_name}"
DESTINATION "${install_path}${IN_DESTINATION_SUFFIX}"
COMPONENT "${IN_COMPONENT}-${package}")
endforeach()
endif()
endfunction()
function(fdb_install)
if(NOT WIN32 AND NOT OPEN_FOR_IDE)
set(one_value_options COMPONENT DESTINATION EXPORT DESTINATION_SUFFIX)
set(multi_value_options TARGETS FILES PROGRAMS DIRECTORY)
cmake_parse_arguments(IN "${options}" "${one_value_options}" "${multi_value_options}" "${ARGN}")
set(install_export 0)
if(IN_TARGETS)
set(args TARGETS ${IN_TARGETS})
elseif(IN_FILES)
set(args FILES ${IN_FILES})
elseif(IN_PROGRAMS)
set(args PROGRAMS ${IN_PROGRAMS})
elseif(IN_DIRECTORY)
set(args DIRECTORY ${IN_DIRECTORY})
elseif(IN_EXPORT)
set(install_export 1)
else()
message(FATAL_ERROR "Expected FILES, PROGRAMS, DIRECTORY, or TARGETS")
endif()
foreach(package tgz deb el6 el7 pm)
set(install_path "${install_destination_for_${IN_DESTINATION}_${package}}")
if(install_export)
install(
EXPORT "${IN_EXPORT}-${package}"
DESTINATION "${install_path}${IN_DESTINATION_SUFFIX}"
FILE "${IN_EXPORT}.cmake"
COMPONENT "${IN_COMPONENT}-${package}")
else()
set(export_args "")
if (IN_EXPORT)
set(export_args EXPORT "${IN_EXPORT}-${package}")
endif()
if(NOT ${install_path} STREQUAL "")
install(
${args}
${export_args}
DESTINATION "${install_path}${IN_DESTINATION_SUFFIX}"
COMPONENT "${IN_COMPONENT}-${package}")
endif()
endif()
endforeach()
endif()
endfunction()
if(APPLE)
set(CPACK_GENERATOR TGZ productbuild)
@ -240,6 +211,22 @@ list(GET FDB_VERSION_LIST 0 FDB_MAJOR)
list(GET FDB_VERSION_LIST 1 FDB_MINOR)
list(GET FDB_VERSION_LIST 2 FDB_PATCH)
################################################################################
# Alternatives config
################################################################################
math(EXPR ALTERNATIVES_PRIORITY "(${PROJECT_VERSION_MAJOR} * 1000) + (${PROJECT_VERSION_MINOR} * 100) + ${PROJECT_VERSION_PATCH}")
set(script_dir "${PROJECT_BINARY_DIR}/packaging/multiversion/")
file(MAKE_DIRECTORY "${script_dir}/server" "${script_dir}/clients")
configure_file("${PROJECT_SOURCE_DIR}/packaging/multiversion/server/postinst" "${script_dir}/server" @ONLY)
configure_file("${PROJECT_SOURCE_DIR}/packaging/multiversion/server/prerm" "${script_dir}/server" @ONLY)
set(LIB_DIR lib)
configure_file("${PROJECT_SOURCE_DIR}/packaging/multiversion/clients/postinst" "${script_dir}/clients" @ONLY)
set(LIB_DIR lib64)
configure_file("${PROJECT_SOURCE_DIR}/packaging/multiversion/clients/postinst" "${script_dir}/clients/postinst-el7" @ONLY)
configure_file("${PROJECT_SOURCE_DIR}/packaging/multiversion/clients/prerm" "${script_dir}/clients" @ONLY)
################################################################################
# General CPack configuration
################################################################################
@ -259,23 +246,23 @@ set(CPACK_PACKAGE_DESCRIPTION_SUMMARY
set(CPACK_PACKAGE_ICON ${CMAKE_SOURCE_DIR}/packaging/foundationdb.ico)
set(CPACK_PACKAGE_CONTACT "The FoundationDB Community")
set(CPACK_COMPONENT_SERVER-EL6_DEPENDS clients-el6)
set(CPACK_COMPONENT_SERVER-EL7_DEPENDS clients-el7)
set(CPACK_COMPONENT_SERVER-DEB_DEPENDS clients-deb)
set(CPACK_COMPONENT_SERVER-TGZ_DEPENDS clients-tgz)
set(CPACK_COMPONENT_SERVER-PM_DEPENDS clients-pm)
set(CPACK_COMPONENT_SERVER-VERSIONED_DEPENDS clients-versioned)
set(CPACK_COMPONENT_SERVER-EL6_DISPLAY_NAME "foundationdb-server")
set(CPACK_COMPONENT_SERVER-EL7_DISPLAY_NAME "foundationdb-server")
set(CPACK_COMPONENT_SERVER-DEB_DISPLAY_NAME "foundationdb-server")
set(CPACK_COMPONENT_SERVER-TGZ_DISPLAY_NAME "foundationdb-server")
set(CPACK_COMPONENT_SERVER-PM_DISPLAY_NAME "foundationdb-server")
set(CPACK_COMPONENT_SERVER-VERSIONED_DISPLAY_NAME "foundationdb-server-${PROJECT_VERSION}")
set(CPACK_COMPONENT_CLIENTS-EL6_DISPLAY_NAME "foundationdb-clients")
set(CPACK_COMPONENT_CLIENTS-EL7_DISPLAY_NAME "foundationdb-clients")
set(CPACK_COMPONENT_CLIENTS-DEB_DISPLAY_NAME "foundationdb-clients")
set(CPACK_COMPONENT_CLIENTS-TGZ_DISPLAY_NAME "foundationdb-clients")
set(CPACK_COMPONENT_CLIENTS-PM_DISPLAY_NAME "foundationdb-clients")
set(CPACK_COMPONENT_CLIENTS-VERSIONED_DISPLAY_NAME "foundationdb-clients-${PROJECT_VERSION}")
# MacOS needs a file exiension for the LICENSE file
@ -312,39 +299,34 @@ set(deb-server-filename "foundationdb-server_${PROJECT_VERSION}${prerelease_stri
set(CPACK_RPM_PACKAGE_LICENSE "Apache 2.0")
set(CPACK_RPM_PACKAGE_NAME "foundationdb")
set(CPACK_RPM_CLIENTS-EL6_PACKAGE_NAME "foundationdb-clients")
set(CPACK_RPM_CLIENTS-EL7_PACKAGE_NAME "foundationdb-clients")
set(CPACK_RPM_SERVER-EL6_PACKAGE_NAME "foundationdb-server")
set(CPACK_RPM_SERVER-EL7_PACKAGE_NAME "foundationdb-server")
set(CPACK_RPM_SERVER-VERSIONED_PACKAGE_NAME "foundationdb-server-${PROJECT_VERSION}")
set(CPACK_RPM_CLIENTS-EL6_FILE_NAME "${rpm-clients-filename}.el6.x86_64.rpm")
set(CPACK_RPM_CLIENTS-EL7_FILE_NAME "${rpm-clients-filename}.el7.x86_64.rpm")
set(CPACK_RPM_SERVER-EL6_FILE_NAME "${rpm-server-filename}.el6.x86_64.rpm")
set(CPACK_RPM_CLIENTS-VERSIONED_FILE_NAME "${rpm-clients-filename}.versioned.x86_64.rpm")
set(CPACK_RPM_SERVER-EL7_FILE_NAME "${rpm-server-filename}.el7.x86_64.rpm")
set(CPACK_RPM_SERVER-VERSIONED_FILE_NAME "${rpm-server-filename}.versioned.x86_64.rpm")
set(CPACK_RPM_CLIENTS-EL6_DEBUGINFO_FILE_NAME "${rpm-clients-filename}.el6-debuginfo.x86_64.rpm")
set(CPACK_RPM_CLIENTS-EL7_DEBUGINFO_FILE_NAME "${rpm-clients-filename}.el7-debuginfo.x86_64.rpm")
set(CPACK_RPM_SERVER-EL6_DEBUGINFO_FILE_NAME "${rpm-server-filename}.el6-debuginfo.x86_64.rpm")
set(CPACK_RPM_CLIENTS-VERSIONED_DEBUGINFO_FILE_NAME "${rpm-clients-filename}.versioned-debuginfo.x86_64.rpm")
set(CPACK_RPM_SERVER-EL7_DEBUGINFO_FILE_NAME "${rpm-server-filename}.el7-debuginfo.x86_64.rpm")
set(CPACK_RPM_SERVER-VERSIONED_DEBUGINFO_FILE_NAME "${rpm-server-filename}.versioned-debuginfo.x86_64.rpm")
file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/packaging/emptydir")
fdb_install(DIRECTORY "${CMAKE_BINARY_DIR}/packaging/emptydir/" DESTINATION data COMPONENT server)
fdb_install(DIRECTORY "${CMAKE_BINARY_DIR}/packaging/emptydir/" DESTINATION log COMPONENT server)
fdb_install(DIRECTORY "${CMAKE_BINARY_DIR}/packaging/emptydir/" DESTINATION etc COMPONENT clients)
set(CPACK_RPM_SERVER-EL6_USER_FILELIST
"%config(noreplace) /etc/foundationdb/foundationdb.conf"
"%attr(0700,foundationdb,foundationdb) /var/log/foundationdb"
"%attr(0700, foundationdb, foundationdb) /var/lib/foundationdb")
set(CPACK_RPM_SERVER-EL7_USER_FILELIST
"%config(noreplace) /etc/foundationdb/foundationdb.conf"
"%attr(0700,foundationdb,foundationdb) /var/log/foundationdb"
"%attr(0700, foundationdb, foundationdb) /var/lib/foundationdb")
set(CPACK_RPM_CLIENTS-EL6_USER_FILELIST "%dir /etc/foundationdb")
set(CPACK_RPM_CLIENTS-EL7_USER_FILELIST "%dir /etc/foundationdb")
set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION
"/usr/sbin"
"/usr/share/java"
"/usr/lib"
"/usr/lib64/cmake"
"/etc/foundationdb"
"/usr/lib64/pkgconfig"
@ -358,42 +340,38 @@ set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION
"/lib/systemd/system"
"/etc/rc.d/init.d")
set(CPACK_RPM_DEBUGINFO_PACKAGE ${GENERATE_DEBUG_PACKAGES})
#set(CPACK_RPM_BUILD_SOURCE_DIRS_PREFIX /usr/src)
#set(CPACK_RPM_BUILD_SOURCE_FDB_INSTALL_DIRS_PREFIX /usr/src)
set(CPACK_RPM_COMPONENT_INSTALL ON)
set(CPACK_RPM_CLIENTS-EL6_PRE_INSTALL_SCRIPT_FILE
${CMAKE_SOURCE_DIR}/packaging/rpm/scripts/preclients.sh)
set(CPACK_RPM_clients-el7_PRE_INSTALL_SCRIPT_FILE
${CMAKE_SOURCE_DIR}/packaging/rpm/scripts/preclients.sh)
set(CPACK_RPM_CLIENTS-EL6_POST_INSTALL_SCRIPT_FILE
${CMAKE_SOURCE_DIR}/packaging/rpm/scripts/postclients.sh)
set(CPACK_RPM_CLIENTS-EL7_POST_INSTALL_SCRIPT_FILE
${CMAKE_SOURCE_DIR}/packaging/rpm/scripts/postclients.sh)
set(CPACK_RPM_SERVER-EL6_PRE_INSTALL_SCRIPT_FILE
${CMAKE_SOURCE_DIR}/packaging/rpm/scripts/preserver.sh)
set(CPACK_RPM_SERVER-EL7_PRE_INSTALL_SCRIPT_FILE
${CMAKE_SOURCE_DIR}/packaging/rpm/scripts/preserver.sh)
set(CPACK_RPM_SERVER-EL6_POST_INSTALL_SCRIPT_FILE
${CMAKE_SOURCE_DIR}/packaging/rpm/scripts/postserver-el6.sh)
set(CPACK_RPM_SERVER-EL7_POST_INSTALL_SCRIPT_FILE
${CMAKE_SOURCE_DIR}/packaging/rpm/scripts/postserver.sh)
set(CPACK_RPM_SERVER-EL6_PRE_UNINSTALL_SCRIPT_FILE
${CMAKE_SOURCE_DIR}/packaging/rpm/scripts/preunserver.sh)
set(CPACK_RPM_SERVER-EL7_PRE_UNINSTALL_SCRIPT_FILE
${CMAKE_SOURCE_DIR}/packaging/rpm/scripts/preunserver.sh)
set(CPACK_RPM_SERVER-EL6_PACKAGE_REQUIRES
"foundationdb-clients = ${FDB_MAJOR}.${FDB_MINOR}.${FDB_PATCH}")
set(CPACK_RPM_SERVER-EL7_PACKAGE_REQUIRES
"foundationdb-clients = ${FDB_MAJOR}.${FDB_MINOR}.${FDB_PATCH}")
#set(CPACK_RPM_java_PACKAGE_REQUIRES
# "foundationdb-clients = ${FDB_MAJOR}.${FDB_MINOR}.${FDB_PATCH}")
#set(CPACK_RPM_python_PACKAGE_REQUIRES
# "foundationdb-clients = ${FDB_MAJOR}.${FDB_MINOR}.${FDB_PATCH}")
set(CPACK_RPM_SERVER-VERSIONED_POST_INSTALL_SCRIPT_FILE
${CMAKE_BINARY_DIR}/packaging/multiversion/server/postinst)
set(CPACK_RPM_SERVER-VERSIONED_PRE_UNINSTALL_SCRIPT_FILE
${CMAKE_BINARY_DIR}/packaging/multiversion/server/prerm)
set(CPACK_RPM_CLIENTS-VERSIONED_POST_INSTALL_SCRIPT_FILE
${CMAKE_BINARY_DIR}/packaging/multiversion/clients/postinst-el7)
set(CPACK_RPM_CLIENTS-VERSIONED_PRE_UNINSTALL_SCRIPT_FILE
${CMAKE_BINARY_DIR}/packaging/multiversion/clients/prerm)
################################################################################
# Configuration for DEB
@ -408,6 +386,8 @@ set(CPACK_DEBIAN_ENABLE_COMPONENT_DEPENDS ON)
set(CPACK_DEBIAN_SERVER-DEB_PACKAGE_NAME "foundationdb-server")
set(CPACK_DEBIAN_CLIENTS-DEB_PACKAGE_NAME "foundationdb-clients")
set(CPACK_DEBIAN_SERVER-VERSIONED_PACKAGE_NAME "foundationdb-server-${PROJECT_VERSION}")
set(CPACK_DEBIAN_CLIENTS-VERSIONED_PACKAGE_NAME "foundationdb-clients-${PROJECT_VERSION}")
set(CPACK_DEBIAN_SERVER-DEB_PACKAGE_DEPENDS "adduser, libc6 (>= 2.12), foundationdb-clients (= ${FDB_VERSION})")
set(CPACK_DEBIAN_SERVER-DEB_PACKAGE_RECOMMENDS "python (>= 2.6)")
@ -422,6 +402,13 @@ set(CPACK_DEBIAN_SERVER-DEB_PACKAGE_CONTROL_EXTRA
${CMAKE_SOURCE_DIR}/packaging/deb/DEBIAN-foundationdb-server/prerm
${CMAKE_SOURCE_DIR}/packaging/deb/DEBIAN-foundationdb-server/postrm)
set(CPACK_DEBIAN_CLIENTS-VERSIONED_PACKAGE_CONTROL_EXTRA
${CMAKE_BINARY_DIR}/packaging/multiversion/clients/postinst
${CMAKE_BINARY_DIR}/packaging/multiversion/clients/prerm)
set(CPACK_DEBIAN_SERVER-VERSIONED_PACKAGE_CONTROL_EXTRA
${CMAKE_BINARY_DIR}/packaging/multiversion/server/postinst
${CMAKE_BINARY_DIR}/packaging/multiversion/server/prerm)
################################################################################
# MacOS configuration
################################################################################
@ -459,21 +446,21 @@ if(NOT WIN32)
fdb_install(FILES ${CMAKE_SOURCE_DIR}/packaging/foundationdb.conf
DESTINATION etc
COMPONENT server)
install(FILES ${CMAKE_SOURCE_DIR}/packaging/make_public.py
DESTINATION "usr/lib/foundationdb"
COMPONENT server-el6)
install(FILES ${CMAKE_SOURCE_DIR}/packaging/make_public.py
DESTINATION "usr/lib/foundationdb"
COMPONENT server-deb)
install(FILES ${CMAKE_SOURCE_DIR}/packaging/rpm/foundationdb.service
DESTINATION "lib/systemd/system"
COMPONENT server-el7)
install(PROGRAMS ${CMAKE_SOURCE_DIR}/packaging/rpm/foundationdb-init
DESTINATION "etc/rc.d/init.d"
RENAME "foundationdb"
COMPONENT server-el6)
install(PROGRAMS ${CMAKE_SOURCE_DIR}/packaging/deb/foundationdb-init
DESTINATION "etc/init.d"
RENAME "foundationdb"
COMPONENT server-deb)
install(FILES ${CMAKE_SOURCE_DIR}/packaging/rpm/foundationdb.service
DESTINATION "usr/lib/foundationdb-${PROJECT_VERSION}/lib/systemd/system"
COMPONENT server-versioned)
install(PROGRAMS ${CMAKE_SOURCE_DIR}/packaging/deb/foundationdb-init
DESTINATION "usr/lib/foundationdb-${PROJECT_VERSION}/etc/init.d"
RENAME "foundationdb"
COMPONENT server-versioned)
endif()

91
design/flow_transport.md Normal file
View File

@ -0,0 +1,91 @@
# Flow Transport
This section describes the design and implementation of the flow transport wire protocol (as of release 6.3).
## ConnectPacket
The first bytes sent over a tcp connection in flow are the `ConnectPacket`.
This is a variable length message (though fixed length at a given protocol
version) designed with forward and backward compatibility in mind. The expected length of the `ConnectPacket` is encoded as the first 4 bytes (unsigned, little-endian). Upon receiving an incoming connection, a peer reads the `ProtocolVersion` (the next 8 bytes unsigned, little-endian. The most significant 4 bits encode flags and should be zeroed before interpreting numerically.) from the `ConnectPacket`.
## Protocol compatibility
Based on the incoming connection's `ProtocolVersion`, this connection is either
"compatible" or "incompatible". If this connection is incompatible, then we
will not actually look at any bytes sent after the `ConnectPacket`, but we will
keep the connection open so that the peer does not keep trying to open new
connections.
If this connection is compatible, then we know that our peer is using the same wire protocol as we are and we can proceed.
## Framing and checksumming protocol
As of release 6.3, the structure of subsequent messages is as follows:
* For TLS connections:
1. packet length (4 bytes unsigned little-endian)
2. token (16 opaque bytes that identify the recipient of this message)
3. message contents (packet length - 16 bytes to be interpreted by the recipient)
* For non-TLS connections, there's additionally a crc32 checksum for message integrity:
1. packet length (4 bytes unsigned little-endian)
2. 4 byte crc32 checksum of token + message
3. token
4. message
## Well-known endpoints
Endpoints are a pair of a 16 byte token that identifies the recipient and a
network address to send a message to. Endpoints are usually obtained over the
network - for example a request conventionally includes the endpoint the
reply should be sent to (like a self-addressed stamped envelope). So if you
can send a message and get endpoints in reply you can start sending messages
those endpoints. But how do you send that first message?
That's where the concept of a "well-known" endpoint comes in. Some endpoints
(for example the endpoints coordinators are listening on) use "well-known"
tokens that are agreed upon ahead of time. Technically the value of these
tokens could be changed as part of an incompatible protocol version bump, but
in practice this hasn't happened and shouldn't ever need to happen.
## Flatbuffers
Prior to release-6.2 the structure of messages (e.g. how many fields a
message has) was implicitly part of the protocol version, and so adding a
field to any message required a protocol version bump. Since release-6.2
messages are encoded as flatbuffers messages, and you can technically add
fields without a protocol version bump. This is a powerful and dangerous tool
that needs to be used with caution. If you add a field without a protocol version bump, then you can no longer be certain that this field will always be present (e.g. if you get a message from an old peer it might not include that field.)
We don't have a good way to test two or more fdbserver binaries in
simulation, so we discourage adding fields or otherwise making any protocol
changes without a protocol version bump.
Bumping the protocol version is costly for clients though, since now they need a whole new libfdb_c.so to be able to talk to the cluster _at all_.
## Stable Endpoints
Stable endpoints are a proposal to allow protocol compatibility to be checked
per endpoint rather than per connection. The proposal is to commit to the
current (release-6.3) framing protocol for opening connections, and allow a
newer framing protocol (for example a new checksum) to be negotiated after
the connection has been established. This way even if peers are at different
protocol versions they can still read the token each message is addressed to,
and they can use that token to decide whether or not to attempt to handle the
message. By default, tokens will have the same compatibility requirements as
before where the protocol version must match exactly. But new tokens can
optionally have a different policy - e.g. handle anything from a protocol
version >= release-7.0.
One of the main features motivating "Stable Endpoints" is the ability to download a compatible libfdb_c from a coordinator.
### Changes to flow transport for Stable Endpoints
1. Well known endpoints must never change (this just makes it official)
2. The (initial) framing protocol must remain fixed. If we want to change the checksum, we can add a stable, well known endpoint that advertises what checksums are supported and use this to change the checksum after the connection has already been established.
3. Each endpoint can have a different compatibility policy: e.g. an endpoint can be marked as requiring at least `ProtocolVersion::withStableInterfaces()` like this:
```
ReplyPromise<ProtocolInfoReply> reply{ PeerCompatibilityPolicy{ RequirePeer::AtLeast,
ProtocolVersion::withStableInterfaces() } };
```
4. Well known endpoints no longer need to be added in a particular order. Instead you reserve the number of well known endpoints ahead of time and then you can add them in any order.

View File

@ -2,12 +2,16 @@
Release Notes
#############
6.2.29
======
* Fix invalid memory access on data distributor when snapshotting large clusters. `(PR #4076) <https://github.com/apple/foundationdb/pull/4076>`_
* Add human-readable DateTime to trace events `(PR #4087) <https://github.com/apple/foundationdb/pull/4087>`_
6.2.28
======
* Log detailed team collection information when median available space ratio of all teams is too low. `(PR #3912) <https://github.com/apple/foundationdb/pull/3912>`_
* Bug fix, blob client did not support authentication key sizes over 64 bytes. `(PR #3964) <https://github.com/apple/foundationdb/pull/3964>`_
6.2.27
======
* For clusters with a large number of shards, avoid slow tasks in the data distributor by adding yields to the shard map destruction. `(PR #3834) <https://github.com/apple/foundationdb/pull/3834>`_

View File

@ -5,6 +5,15 @@ Release Notes
6.3.10
======
Packaging
---------
* Create versioned RPM and DEB packages. This will allow users to install multiple versions of FoundationDB on the same machine and use alternatives to switch between versions. `(PR #3983) <https://github.com/apple/foundationdb/pull/3983>`_
* Remove support for RHEL 6 and CentOS 6. This version reached EOL and is not anymore officially supported by FoundationDB. `(PR #3983) <https://github.com/apple/foundationdb/pull/3983>`_
6.3.9
=====
Features
--------

View File

@ -175,7 +175,7 @@ struct MutationFilesReadProgress : public ReferenceCounted<MutationFilesReadProg
int msgSize = bigEndian32(reader.consume<int>());
const uint8_t* message = reader.consume(msgSize);
ArenaReader rd(buf.arena(), StringRef(message, msgSize), AssumeVersion(currentProtocolVersion));
ArenaReader rd(buf.arena(), StringRef(message, msgSize), AssumeVersion(g_network->protocolVersion()));
MutationRef m;
rd >> m;
count++;
@ -433,7 +433,7 @@ ACTOR Future<Void> convert(ConvertParams params) {
state BackupDescription desc = wait(container->describeBackup());
std::cout << "\n" << desc.toString() << "\n";
// std::cout << "Using Protocol Version: 0x" << std::hex << currentProtocolVersion.version() << std::dec << "\n";
// std::cout << "Using Protocol Version: 0x" << std::hex << g_network->protocolVersion().version() << std::dec << "\n";
std::vector<LogFile> logs = getRelevantLogFiles(listing.logs, params.begin, params.end);
printLogFiles("Range has", logs);
@ -460,7 +460,7 @@ ACTOR Future<Void> convert(ConvertParams params) {
arena = Arena();
}
ArenaReader rd(data.arena, data.message, AssumeVersion(currentProtocolVersion));
ArenaReader rd(data.arena, data.message, AssumeVersion(g_network->protocolVersion()));
MutationRef m;
rd >> m;
std::cout << data.version.toString() << " m = " << m.toString() << "\n";

View File

@ -3433,8 +3433,7 @@ int main(int argc, char* argv[]) {
usePartitionedLog = true;
break;
case OPT_INCREMENTALONLY:
// TODO: Enable this command-line argument once atomics are supported
// incrementalBackupOnly = true;
incrementalBackupOnly = true;
break;
case OPT_RESTORECONTAINER:
restoreContainer = args->OptionArg();

View File

@ -30,6 +30,11 @@
const int MAX_CLUSTER_FILE_BYTES = 60000;
constexpr UID WLTOKEN_CLIENTLEADERREG_GETLEADER(-1, 2);
constexpr UID WLTOKEN_CLIENTLEADERREG_OPENDATABASE(-1, 3);
constexpr UID WLTOKEN_PROTOCOL_INFO(-1, 10);
struct ClientLeaderRegInterface {
RequestStream< struct GetLeaderRequest > getLeader;
RequestStream< struct OpenDatabaseCoordRequest > openDatabase;
@ -186,4 +191,30 @@ public:
ClientCoordinators() {}
};
struct ProtocolInfoReply {
constexpr static FileIdentifier file_identifier = 7784298;
ProtocolVersion version;
template <class Ar>
void serialize(Ar& ar) {
uint64_t version_ = 0;
if (Ar::isSerializing) {
version_ = version.versionWithFlags();
}
serializer(ar, version_);
if (Ar::isDeserializing) {
version = ProtocolVersion(version_);
}
}
};
struct ProtocolInfoRequest {
constexpr static FileIdentifier file_identifier = 13261233;
ReplyPromise<ProtocolInfoReply> reply{ PeerCompatibilityPolicy{ RequirePeer::AtLeast,
ProtocolVersion::withStableInterfaces() } };
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, reply);
}
};
#endif

View File

@ -323,6 +323,8 @@ public:
int snapshotRywEnabled;
int transactionTracingEnabled;
Future<Void> logger;
Future<Void> throttleExpirer;

View File

@ -39,16 +39,16 @@ typedef int64_t Generation;
typedef UID SpanID;
enum {
tagLocalitySpecial = -1,
tagLocalitySpecial = -1, // tag with this locality means it is invalidTag (id=0), txsTag (id=1), or cacheTag (id=2)
tagLocalityLogRouter = -2,
tagLocalityRemoteLog = -3,
tagLocalityRemoteLog = -3, // tag created by log router for remote tLogs
tagLocalityUpgraded = -4,
tagLocalitySatellite = -5,
tagLocalityLogRouterMapped = -6, // used by log router to pop from TLogs
tagLocalityLogRouterMapped = -6, // The pseudo tag used by log routers to pop the real LogRouter tag (i.e., -2)
tagLocalityTxs = -7,
tagLocalityBackup = -8, // used by backup role to pop from TLogs
tagLocalityInvalid = -99
}; //The TLog and LogRouter require these number to be as compact as possible
}; // The TLog and LogRouter require these number to be as compact as possible
inline bool isPseudoLocality(int8_t locality) {
return locality == tagLocalityLogRouterMapped || locality == tagLocalityBackup;
@ -56,6 +56,11 @@ inline bool isPseudoLocality(int8_t locality) {
#pragma pack(push, 1)
struct Tag {
// if locality > 0,
// locality decides which DC id the tLog is in;
// id decides which SS owns the tag; id <-> SS mapping is in the system keyspace: serverTagKeys.
// if locality < 0, locality decides the type of tLog set: satellite, LR, or remote tLog, etc.
// id decides which tLog in the tLog type will be used.
int8_t locality;
uint16_t id;
@ -193,6 +198,10 @@ std::string describe( Reference<T> const& item ) {
return item->toString();
}
static std::string describe(UID const& item) {
return item.shortString();
}
template <class T>
std::string describe( T const& item ) {
return item.toString();

View File

@ -94,6 +94,7 @@ public:
virtual void selectApiVersion(int apiVersion) = 0;
virtual const char* getClientVersion() = 0;
virtual ThreadFuture<uint64_t> getServerProtocol(const char* clusterFilePath) = 0;
virtual void setNetworkOption(FDBNetworkOptions::Option option, Optional<StringRef> value = Optional<StringRef>()) = 0;
virtual void setupNetwork() = 0;

View File

@ -1758,6 +1758,7 @@ ACTOR Future<std::set<NetworkAddress>> checkForExcludingServers(Database cx, vec
wait( delayJittered( 1.0 ) ); // SOMEDAY: watches!
} catch (Error& e) {
TraceEvent("CheckForExcludingServersError").error(e);
wait( tr.onError(e) );
}
}

View File

@ -371,10 +371,6 @@ ClientCoordinators::ClientCoordinators( Key clusterKey, std::vector<NetworkAddre
ccf = makeReference<ClusterConnectionFile>(ClusterConnectionString(coordinators, clusterKey));
}
UID WLTOKEN_CLIENTLEADERREG_GETLEADER( -1, 2 );
UID WLTOKEN_CLIENTLEADERREG_OPENDATABASE( -1, 3 );
ClientLeaderRegInterface::ClientLeaderRegInterface( NetworkAddress remote )
: getLeader( Endpoint({remote}, WLTOKEN_CLIENTLEADERREG_GETLEADER) ),
openDatabase( Endpoint({remote}, WLTOKEN_CLIENTLEADERREG_OPENDATABASE) )

View File

@ -18,14 +18,17 @@
* limitations under the License.
*/
#include "fdbclient/CoordinationInterface.h"
#include "fdbclient/MultiVersionTransaction.h"
#include "fdbclient/MultiVersionAssignmentVars.h"
#include "fdbclient/ThreadSafeTransaction.h"
#include "flow/network.h"
#include "flow/Platform.h"
#include "flow/ProtocolVersion.h"
#include "flow/UnitTest.h"
#include "flow/actorcompiler.h" // This must be the last #include.
#include "flow/actorcompiler.h" // This must be the last #include.
void throwIfError(FdbCApi::fdb_error_t e) {
if(e) {
@ -359,6 +362,7 @@ void DLApi::init() {
loadClientFunction(&api->futureGetBool, lib, fdbCPath, "fdb_future_get_bool", headerVersion >= 700);
loadClientFunction(&api->futureGetInt64, lib, fdbCPath, headerVersion >= 620 ? "fdb_future_get_int64" : "fdb_future_get_version");
loadClientFunction(&api->futureGetUInt64, lib, fdbCPath, "fdb_future_get_uint64");
loadClientFunction(&api->futureGetError, lib, fdbCPath, "fdb_future_get_error");
loadClientFunction(&api->futureGetKey, lib, fdbCPath, "fdb_future_get_key");
loadClientFunction(&api->futureGetValue, lib, fdbCPath, "fdb_future_get_value");
@ -394,6 +398,11 @@ const char* DLApi::getClientVersion() {
return api->getClientVersion();
}
ThreadFuture<uint64_t> DLApi::getServerProtocol(const char *clusterFilePath) {
ASSERT(false);
return ThreadFuture<uint64_t>();
}
void DLApi::setNetworkOption(FDBNetworkOptions::Option option, Optional<StringRef> value) {
throwIfError(api->setNetworkOption(option, value.present() ? value.get().begin() : nullptr, value.present() ? value.get().size() : 0));
}
@ -1013,6 +1022,11 @@ const char* MultiVersionApi::getClientVersion() {
return localClient->api->getClientVersion();
}
ThreadFuture<uint64_t> MultiVersionApi::getServerProtocol(const char *clusterFilePath) {
return api->localClient->api->getServerProtocol(clusterFilePath);
}
void validateOption(Optional<StringRef> value, bool canBePresent, bool canBeAbsent, bool canBeEmpty=true) {
ASSERT(canBePresent || canBeAbsent);

View File

@ -55,6 +55,7 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
//Network
fdb_error_t (*selectApiVersion)(int runtimeVersion, int headerVersion);
const char* (*getClientVersion)();
FDBFuture* (*getServerProtocol)(const char* clusterFilePath);
fdb_error_t (*setNetworkOption)(FDBNetworkOptions::Option option, uint8_t const *value, int valueLength);
fdb_error_t (*setupNetwork)();
fdb_error_t (*runNetwork)();
@ -108,6 +109,7 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
//Future
fdb_error_t (*futureGetDatabase)(FDBFuture *f, FDBDatabase **outDb);
fdb_error_t (*futureGetInt64)(FDBFuture *f, int64_t *outValue);
fdb_error_t (*futureGetUInt64)(FDBFuture *f, uint64_t *outValue);
fdb_error_t (*futureGetBool) (FDBFuture *f, bool *outValue);
fdb_error_t (*futureGetError)(FDBFuture *f);
fdb_error_t (*futureGetKey)(FDBFuture *f, uint8_t const **outKey, int *outKeyLength);
@ -208,6 +210,7 @@ public:
void selectApiVersion(int apiVersion) override;
const char* getClientVersion() override;
ThreadFuture<uint64_t> getServerProtocol(const char* clusterFilePath) override;
void setNetworkOption(FDBNetworkOptions::Option option, Optional<StringRef> value = Optional<StringRef>()) override;
void setupNetwork() override;
@ -387,6 +390,7 @@ class MultiVersionApi : public IClientApi {
public:
void selectApiVersion(int apiVersion) override;
const char* getClientVersion() override;
ThreadFuture<uint64_t> getServerProtocol(const char* clusterFilePath) override;
void setNetworkOption(FDBNetworkOptions::Option option, Optional<StringRef> value = Optional<StringRef>()) override;
void setupNetwork() override;

View File

@ -873,7 +873,7 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionF
transactionsResourceConstrained("ResourceConstrained", cc), transactionsThrottled("Throttled", cc),
transactionsProcessBehind("ProcessBehind", cc), outstandingWatches(0), latencies(1000), readLatencies(1000),
commitLatencies(1000), GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000), mvCacheInsertLocation(0),
healthMetricsLastUpdated(0), detailedHealthMetricsLastUpdated(0), internal(internal),
healthMetricsLastUpdated(0), detailedHealthMetricsLastUpdated(0), internal(internal), transactionTracingEnabled(true),
smoothMidShardSize(CLIENT_KNOBS->SHARD_STAT_SMOOTH_AMOUNT),
transactionsExpensiveClearCostEstCount("ExpensiveClearCostEstCount", cc),
specialKeySpace(std::make_unique<SpecialKeySpace>(specialKeys.begin, specialKeys.end, /* test */ false)) {
@ -946,6 +946,14 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionF
std::make_unique<ConsistencyCheckImpl>(
singleKeyRange(LiteralStringRef("consistency_check_suspended"))
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)));
registerSpecialKeySpaceModule(
SpecialKeySpace::MODULE::TRACING, SpecialKeySpace::IMPLTYPE::READWRITE,
// std::make_unique<TracingOptionsImpl>(
// SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::TRACING)));
// TODO: Temporary fix for an issue with special-key top level ranges.
std::make_unique<TracingOptionsImpl>(
KeyRangeRef(LiteralStringRef("a/"), LiteralStringRef("a0"))
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::TRACING).begin)));
}
if (apiVersionAtLeast(630)) {
registerSpecialKeySpaceModule(SpecialKeySpace::MODULE::TRANSACTION, SpecialKeySpace::IMPLTYPE::READONLY,
@ -1044,7 +1052,7 @@ DatabaseContext::DatabaseContext(const Error& err)
transactionsProcessBehind("ProcessBehind", cc), latencies(1000), readLatencies(1000), commitLatencies(1000),
GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000),
smoothMidShardSize(CLIENT_KNOBS->SHARD_STAT_SMOOTH_AMOUNT),
transactionsExpensiveClearCostEstCount("ExpensiveClearCostEstCount", cc), internal(false) {}
transactionsExpensiveClearCostEstCount("ExpensiveClearCostEstCount", cc), internal(false), transactionTracingEnabled(true) {}
Database DatabaseContext::create(Reference<AsyncVar<ClientDBInfo>> clientInfo, Future<Void> clientInfoMonitor, LocalityData clientLocality, bool enableLocalityLoadBalance, TaskPriority taskID, bool lockAware, int apiVersion, bool switchable) {
return Database( new DatabaseContext( Reference<AsyncVar<Reference<ClusterConnectionFile>>>(), clientInfo, clientInfoMonitor, taskID, clientLocality, enableLocalityLoadBalance, lockAware, true, apiVersion, switchable ) );
@ -1207,6 +1215,14 @@ void DatabaseContext::setOption( FDBDatabaseOptions::Option option, Optional<Str
validateOptionValue(value, false);
snapshotRywEnabled--;
break;
case FDBDatabaseOptions::TRANSACTION_TRACE_ENABLE:
validateOptionValue(value, false);
transactionTracingEnabled++;
break;
case FDBDatabaseOptions::TRANSACTION_TRACE_DISABLE:
validateOptionValue(value, false);
transactionTracingEnabled--;
break;
default:
break;
}
@ -1822,7 +1838,12 @@ ACTOR Future<vector<pair<KeyRange, Reference<LocationInfo>>>> getKeyRangeLocatio
}
}
// Returns a vector of <ShardRange, storage server location info> pairs.
// Get the SS locations for each shard in the 'keys' key-range;
// Returned vector size is the number of shards in the input keys key-range.
// Returned vector element is <ShardRange, storage server location info> pairs, where
// ShardRange is the whole shard key-range, not a part of the given key range.
// Example: If query the function with key range (b, d), the returned list of pairs could be something like:
// [([a, b1), locationInfo), ([b1, c), locationInfo), ([c, d1), locationInfo)].
template <class F>
Future< vector< pair<KeyRange,Reference<LocationInfo>> > > getKeyRangeLocations( Database const& cx, KeyRange const& keys, int limit, bool reverse, F StorageServerInterface::*member, TransactionInfo const& info ) {
ASSERT (!keys.empty());
@ -2692,8 +2713,21 @@ void debugAddTags(Transaction *tr) {
}
SpanID generateSpanID(int transactionTracingEnabled) {
uint64_t tid = deterministicRandom()->randomUInt64();
if (transactionTracingEnabled > 0) {
return SpanID(tid, deterministicRandom()->randomUInt64());
} else {
return SpanID(tid, 0);
}
}
Transaction::Transaction()
: info(TaskPriority::DefaultEndpoint, generateSpanID(true)),
span(info.spanID, "Transaction"_loc) {}
Transaction::Transaction(Database const& cx)
: cx(cx), info(cx->taskID, deterministicRandom()->randomUniqueID()), backoff(CLIENT_KNOBS->DEFAULT_BACKOFF),
: cx(cx), info(cx->taskID, generateSpanID(cx->transactionTracingEnabled)), backoff(CLIENT_KNOBS->DEFAULT_BACKOFF),
committedVersion(invalidVersion), versionstampPromise(Promise<Standalone<StringRef>>()), options(cx), numErrors(0),
trLogInfo(createTrLogInfoProbabilistically(cx)), tr(info.spanID), span(info.spanID, "Transaction"_loc) {
if (DatabaseContext::debugUseTags) {
@ -4166,6 +4200,37 @@ Future<Standalone<StringRef>> Transaction::getVersionstamp() {
return versionstampPromise.getFuture();
}
ACTOR Future<ProtocolVersion> coordinatorProtocolsFetcher(Reference<ClusterConnectionFile> f) {
state ClientCoordinators coord(f);
state vector<Future<ProtocolInfoReply>> coordProtocols;
coordProtocols.reserve(coord.clientLeaderServers.size());
for (int i = 0; i < coord.clientLeaderServers.size(); i++) {
RequestStream<ProtocolInfoRequest> requestStream{ Endpoint{
{ coord.clientLeaderServers[i].getLeader.getEndpoint().addresses }, WLTOKEN_PROTOCOL_INFO } };
coordProtocols.push_back(retryBrokenPromise(requestStream, ProtocolInfoRequest{}));
}
wait(smartQuorum(coordProtocols, coordProtocols.size() / 2 + 1, 1.5));
std::unordered_map<uint64_t, int> protocolCount;
for(int i = 0; i<coordProtocols.size(); i++) {
if(coordProtocols[i].isReady()) {
protocolCount[coordProtocols[i].get().version.version()]++;
}
}
uint64_t majorityProtocol = std::max_element(protocolCount.begin(), protocolCount.end(), [](const std::pair<uint64_t, int>& l, const std::pair<uint64_t, int>& r){
return l.second < r.second;
})->first;
return ProtocolVersion(majorityProtocol);
}
ACTOR Future<uint64_t> getCoordinatorProtocols(Reference<ClusterConnectionFile> f) {
ProtocolVersion protocolVersion = wait(coordinatorProtocolsFetcher(f));
return protocolVersion.version();
}
uint32_t Transaction::getSize() {
auto s = tr.transaction.mutations.expectedSize() + tr.transaction.read_conflict_ranges.expectedSize() +
tr.transaction.write_conflict_ranges.expectedSize();
@ -4605,6 +4670,16 @@ Reference<TransactionLogInfo> Transaction::createTrLogInfoProbabilistically(cons
return Reference<TransactionLogInfo>();
}
void Transaction::setTransactionID(uint64_t id) {
ASSERT(getSize() == 0);
info.spanID = SpanID(id, info.spanID.second());
}
void Transaction::setToken(uint64_t token) {
ASSERT(getSize() == 0);
info.spanID = SpanID(info.spanID.first(), token);
}
void enableClientInfoLogging() {
ASSERT(networkOptions.logClientInfo.present() == false);
networkOptions.logClientInfo = true;

View File

@ -284,6 +284,8 @@ public:
[[nodiscard]] Future<Standalone<StringRef>>
getVersionstamp(); // Will be fulfilled only after commit() returns success
Future<uint64_t> getProtocolVersion();
Promise<Standalone<StringRef>> versionstampPromise;
uint32_t getSize();
@ -291,9 +293,7 @@ public:
void flushTrLogsIfEnabled();
// These are to permit use as state variables in actors:
Transaction()
: info(TaskPriority::DefaultEndpoint, deterministicRandom()->randomUniqueID()),
span(info.spanID, "Transaction"_loc) {}
Transaction();
void operator=(Transaction&& r) noexcept;
void reset();
@ -323,6 +323,9 @@ public:
double startTime;
Reference<TransactionLogInfo> trLogInfo;
void setTransactionID(uint64_t id);
void setToken(uint64_t token);
const vector<Future<std::pair<Key, Key>>>& getExtraReadConflictRanges() const { return extraConflictRanges; }
Standalone<VectorRef<KeyRangeRef>> readConflictRanges() const {
return Standalone<VectorRef<KeyRangeRef>>(tr.transaction.read_conflict_ranges, tr.arena);
@ -360,6 +363,8 @@ ACTOR Future<Void> snapCreate(Database cx, Standalone<StringRef> snapCmd, UID sn
// Checks with Data Distributor that it is safe to mark all servers in exclusions as failed
ACTOR Future<bool> checkSafeExclusions(Database cx, vector<AddressExclusion> exclusions);
ACTOR Future<uint64_t> getCoordinatorProtocols(Reference<ClusterConnectionFile> f);
inline uint64_t getWriteOperationCost(uint64_t bytes) {
return bytes / std::max(1, CLIENT_KNOBS->WRITE_COST_BYTE_FACTOR) + 1;
}

View File

@ -1593,6 +1593,14 @@ void ReadYourWritesTransaction::getWriteConflicts( KeyRangeMap<bool> *result ) {
}
}
void ReadYourWritesTransaction::setTransactionID(uint64_t id) {
tr.setTransactionID(id);
}
void ReadYourWritesTransaction::setToken(uint64_t token) {
tr.setToken(token);
}
Standalone<RangeResultRef> ReadYourWritesTransaction::getReadConflictRangeIntersecting(KeyRangeRef kr) {
TEST(true); // Special keys read conflict range
ASSERT(readConflictRangeKeysRange.contains(kr));

View File

@ -144,6 +144,9 @@ public:
return tr.info;
}
void setTransactionID(uint64_t id);
void setToken(uint64_t token);
// Read from the special key space readConflictRangeKeysRange
Standalone<RangeResultRef> getReadConflictRangeIntersecting(KeyRangeRef kr);
// Read from the special key space writeConflictRangeKeysRange

View File

@ -786,7 +786,8 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
"coordinators":[
{
"reachable":true,
"address":"127.0.0.1:4701"
"address":"127.0.0.1:4701",
"protocol": "0fdb00b070010001"
}
],
"quorum_reachable":true

View File

@ -24,6 +24,11 @@
#include "fdbclient/StatusClient.h"
#include "flow/actorcompiler.h" // This must be the last #include.
namespace {
const std::string kTracingTransactionIdKey = "transaction_id";
const std::string kTracingTokenKey = "token";
}
std::unordered_map<SpecialKeySpace::MODULE, KeyRange> SpecialKeySpace::moduleToBoundary = {
{ SpecialKeySpace::MODULE::TRANSACTION,
KeyRangeRef(LiteralStringRef("\xff\xff/transaction/"), LiteralStringRef("\xff\xff/transaction0")) },
@ -38,7 +43,9 @@ std::unordered_map<SpecialKeySpace::MODULE, KeyRange> SpecialKeySpace::moduleToB
KeyRangeRef(LiteralStringRef("\xff\xff/management/"), LiteralStringRef("\xff\xff/management0")) },
{ SpecialKeySpace::MODULE::ERRORMSG, singleKeyRange(LiteralStringRef("\xff\xff/error_message")) },
{ SpecialKeySpace::MODULE::CONFIGURATION,
KeyRangeRef(LiteralStringRef("\xff\xff/configuration/"), LiteralStringRef("\xff\xff/configuration0")) }
KeyRangeRef(LiteralStringRef("\xff\xff/configuration/"), LiteralStringRef("\xff\xff/configuration0")) },
{ SpecialKeySpace::MODULE::TRACING,
KeyRangeRef(LiteralStringRef("\xff\xff/tracing/"), LiteralStringRef("\xff\xff/tracing0")) }
};
std::unordered_map<std::string, KeyRange> SpecialKeySpace::managementApiCommandToRange = {
@ -53,6 +60,8 @@ std::unordered_map<std::string, KeyRange> SpecialKeySpace::managementApiCommandT
std::set<std::string> SpecialKeySpace::options = { "excluded/force", "failed/force" };
std::set<std::string> SpecialKeySpace::tracingOptions = { kTracingTransactionIdKey, kTracingTokenKey };
Standalone<RangeResultRef> rywGetRange(ReadYourWritesTransaction* ryw, const KeyRangeRef& kr,
const Standalone<RangeResultRef>& res);
@ -139,27 +148,46 @@ ACTOR Future<Void> normalizeKeySelectorActor(SpecialKeySpace* sks, ReadYourWrite
KeyRangeRef boundary, int* actualOffset,
Standalone<RangeResultRef>* result,
Optional<Standalone<RangeResultRef>>* cache) {
// If offset < 1, where we need to move left, iter points to the range containing at least one smaller key
// (It's a wasting of time to walk through the range whose begin key is same as ks->key)
// (rangeContainingKeyBefore itself handles the case where ks->key == Key())
// Otherwise, we only need to move right if offset > 1, iter points to the range containing the key
// Since boundary.end is always a key in the RangeMap, it is always safe to move right
state RangeMap<Key, SpecialKeyRangeReadImpl*, KeyRangeRef>::iterator iter =
ks->offset < 1 ? sks->getReadImpls().rangeContainingKeyBefore(ks->getKey())
: sks->getReadImpls().rangeContaining(ks->getKey());
while ((ks->offset < 1 && iter->begin() > boundary.begin) || (ks->offset > 1 && iter->begin() < boundary.end)) {
while ((ks->offset < 1 && iter->begin() >= boundary.begin) || (ks->offset > 1 && iter->begin() < boundary.end)) {
if (iter->value() != nullptr) {
wait(moveKeySelectorOverRangeActor(iter->value(), ryw, ks, cache));
}
ks->offset < 1 ? --iter : ++iter;
// Check if we can still move the iterator left
if (ks->offset < 1) {
if (iter == sks->getReadImpls().ranges().begin()) {
break;
} else {
--iter;
}
} else if (ks->offset > 1) {
// Always safe to move right
++iter;
}
}
*actualOffset = ks->offset;
if (iter->begin() == boundary.begin || iter->begin() == boundary.end) ks->setKey(iter->begin());
if (!ks->isFirstGreaterOrEqual()) {
// The Key Selector clamps up to the legal key space
TraceEvent(SevDebug, "ReadToBoundary")
.detail("TerminateKey", ks->getKey())
.detail("TerminateOffset", ks->offset);
if (ks->offset < 1)
// If still not normalized after moving to the boundary,
// let key selector clamp up to the boundary
if (ks->offset < 1) {
result->readToBegin = true;
else
ks->setKey(boundary.begin);
}
else {
result->readThroughEnd = true;
ks->setKey(boundary.end);
}
ks->offset = 1;
}
return Void();
@ -560,7 +588,7 @@ ACTOR Future<Standalone<RangeResultRef>> ddMetricsGetRangeActor(ReadYourWritesTr
return result;
} catch (Error& e) {
state Error err(e);
if (e.code() == error_code_operation_failed) {
if (e.code() == error_code_dd_not_found) {
TraceEvent(SevWarnAlways, "DataDistributorNotPresent")
.detail("Operation", "DDMetricsReqestThroughSpecialKeys");
wait(delayJittered(FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY));
@ -1263,3 +1291,63 @@ Future<Optional<std::string>> ConsistencyCheckImpl::commit(ReadYourWritesTransac
BinaryWriter::toValue(entry.present(), Unversioned()));
return Optional<std::string>();
}
TracingOptionsImpl::TracingOptionsImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {
TraceEvent("TracingOptionsImpl::TracingOptionsImpl").detail("Range", kr);
}
Future<Standalone<RangeResultRef>> TracingOptionsImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr) const {
Standalone<RangeResultRef> result;
for (const auto& option : SpecialKeySpace::getTracingOptions()) {
auto key = getKeyRange().begin.withSuffix(option);
if (!kr.contains(key)) {
continue;
}
if (key.endsWith(kTracingTransactionIdKey)) {
result.push_back_deep(result.arena(), KeyValueRef(key, std::to_string(ryw->getTransactionInfo().spanID.first())));
} else if (key.endsWith(kTracingTokenKey)) {
result.push_back_deep(result.arena(), KeyValueRef(key, std::to_string(ryw->getTransactionInfo().spanID.second())));
}
}
return result;
}
void TracingOptionsImpl::set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) {
if (ryw->getApproximateSize() > 0) {
ryw->setSpecialKeySpaceErrorMsg("tracing options must be set first");
ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional<Value>()));
return;
}
if (key.endsWith(kTracingTransactionIdKey)) {
ryw->setTransactionID(std::stoul(value.toString()));
} else if (key.endsWith(kTracingTokenKey)) {
if (value.toString() == "true") {
ryw->setToken(deterministicRandom()->randomUInt64());
} else if (value.toString() == "false") {
ryw->setToken(0);
} else {
ryw->setSpecialKeySpaceErrorMsg("token must be set to true/false");
throw special_keys_api_failure();
}
}
}
Future<Optional<std::string>> TracingOptionsImpl::commit(ReadYourWritesTransaction* ryw) {
if (ryw->getSpecialKeySpaceWriteMap().size() > 0) {
throw special_keys_api_failure();
}
return Optional<std::string>();
}
void TracingOptionsImpl::clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) {
ryw->setSpecialKeySpaceErrorMsg("clear range disabled");
throw special_keys_api_failure();
}
void TracingOptionsImpl::clear(ReadYourWritesTransaction* ryw, const KeyRef& key) {
ryw->setSpecialKeySpaceErrorMsg("clear disabled");
throw special_keys_api_failure();
}

View File

@ -145,6 +145,7 @@ public:
MANAGEMENT, // Management-API
METRICS, // data-distribution metrics
TESTONLY, // only used by correctness tests
TRACING, // Distributed tracing options
TRANSACTION, // transaction related info, conflicting keys, read/write conflict range
STATUSJSON,
UNKNOWN, // default value for all unregistered range
@ -190,6 +191,7 @@ public:
}
static Key getManagementApiCommandOptionSpecialKey(const std::string& command, const std::string& option);
static const std::set<std::string>& getManagementApiOptionsSet() { return options; }
static const std::set<std::string>& getTracingOptions() { return tracingOptions; }
private:
ACTOR static Future<Optional<Value>> getActor(SpecialKeySpace* sks, ReadYourWritesTransaction* ryw, KeyRef key);
@ -211,6 +213,7 @@ private:
static std::unordered_map<std::string, KeyRange>
managementApiCommandToRange; // management command to its special keys' range
static std::set<std::string> options; // "<command>/<option>"
static std::set<std::string> tracingOptions;
// Initialize module boundaries, used to handle cross_module_read
void modulesBoundaryInit();
@ -319,5 +322,15 @@ public:
Future<Optional<std::string>> commit(ReadYourWritesTransaction* ryw) override;
};
class TracingOptionsImpl : public SpecialKeyRangeRWImpl {
public:
explicit TracingOptionsImpl(KeyRangeRef kr);
Future<Standalone<RangeResultRef>> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override;
void set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) override;
Future<Optional<std::string>> commit(ReadYourWritesTransaction* ryw) override;
void clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) override;
void clear(ReadYourWritesTransaction* ryw, const KeyRef& key) override;
};
#include "flow/unactorcompiler.h"
#endif

View File

@ -28,6 +28,7 @@
#include "fdbclient/json_spirit/json_spirit_reader_template.h"
#include "fdbrpc/genericactors.actor.h"
#include "flow/actorcompiler.h" // has to be last include
#include <cstdint>
json_spirit::mValue readJSONStrictly(const std::string &s) {
json_spirit::mValue val;
@ -292,7 +293,17 @@ ACTOR Future<Optional<StatusObject>> clientCoordinatorsStatusFetcher(Reference<C
for (int i = 0; i < coord.clientLeaderServers.size(); i++)
leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader, GetLeaderRequest(coord.clusterKey, UID()), TaskPriority::CoordinationReply));
wait( smartQuorum(leaderServers, leaderServers.size() / 2 + 1, 1.5) || delay(2.0) );
state vector<Future<ProtocolInfoReply>> coordProtocols;
coordProtocols.reserve(coord.clientLeaderServers.size());
for (int i = 0; i < coord.clientLeaderServers.size(); i++) {
RequestStream<ProtocolInfoRequest> requestStream{ Endpoint{
{ coord.clientLeaderServers[i].getLeader.getEndpoint().addresses }, WLTOKEN_PROTOCOL_INFO } };
coordProtocols.push_back(retryBrokenPromise(requestStream, ProtocolInfoRequest{}));
}
wait(smartQuorum(leaderServers, leaderServers.size() / 2 + 1, 1.5) &&
smartQuorum(coordProtocols, coordProtocols.size() / 2 + 1, 1.5) ||
delay(2.0));
statusObj["quorum_reachable"] = *quorum_reachable = quorum(leaderServers, leaderServers.size() / 2 + 1).isReady();
@ -309,12 +320,17 @@ ACTOR Future<Optional<StatusObject>> clientCoordinatorsStatusFetcher(Reference<C
coordinatorsUnavailable++;
coordStatus["reachable"] = false;
}
if (coordProtocols[i].isReady()) {
uint64_t protocolVersionInt = coordProtocols[i].get().version.version();
std::stringstream hexSs;
hexSs << std::hex << std::setw(2*sizeof(protocolVersionInt)) << std::setfill('0') << protocolVersionInt;
coordStatus["protocol"] = hexSs.str();
}
coordsStatus.push_back(coordStatus);
}
statusObj["coordinators"] = coordsStatus;
*coordinatorsFaultTolerance = (leaderServers.size() - 1) / 2 - coordinatorsUnavailable;
return statusObj;
}
catch (Error &e){
@ -463,7 +479,7 @@ ACTOR Future<StatusObject> statusFetcherImpl( Reference<ClusterConnectionFile> f
state int coordinatorsFaultTolerance = 0;
try {
state int64_t clientTime = time(0);
state int64_t clientTime = g_network->timer();
StatusObject _statusObjClient = wait(clientStatusFetcher(f, &clientMessages, &quorum_reachable, &coordinatorsFaultTolerance));
statusObjClient = _statusObjClient;

View File

@ -22,6 +22,7 @@
#include "fdbclient/ReadYourWrites.h"
#include "fdbclient/DatabaseContext.h"
#include "fdbclient/versions.h"
#include "fdbclient/NativeAPI.actor.h"
// Users of ThreadSafeTransaction might share Reference<ThreadSafe...> between different threads as long as they don't call addRef (e.g. C API follows this).
// Therefore, it is unsafe to call (explicitly or implicitly) this->addRef in any of these functions.
@ -372,6 +373,15 @@ const char* ThreadSafeApi::getClientVersion() {
return clientVersion.c_str();
}
ThreadFuture<uint64_t> ThreadSafeApi::getServerProtocol(const char* clusterFilePath) {
auto [clusterFile, isDefault] = ClusterConnectionFile::lookupClusterFileName(std::string(clusterFilePath));
Reference<ClusterConnectionFile> f = Reference<ClusterConnectionFile>(new ClusterConnectionFile(clusterFile));
return onMainThread( [f]() -> Future< uint64_t > {
return getCoordinatorProtocols(f);
} );
}
void ThreadSafeApi::setNetworkOption(FDBNetworkOptions::Option option, Optional<StringRef> value) {
if (option == FDBNetworkOptions::EXTERNAL_CLIENT_TRANSPORT_ID) {
if(value.present()) {

View File

@ -94,6 +94,8 @@ public:
Version getCommittedVersion() override;
ThreadFuture<int64_t> getApproximateSize() override;
ThreadFuture<uint64_t> getProtocolVersion();
void setOption( FDBTransactionOptions::Option option, Optional<StringRef> value = Optional<StringRef>() ) override;
ThreadFuture<Void> checkDeferredError();
@ -117,6 +119,7 @@ class ThreadSafeApi : public IClientApi, ThreadSafeReferenceCounted<ThreadSafeAp
public:
void selectApiVersion(int apiVersion);
const char* getClientVersion();
ThreadFuture<uint64_t> getServerProtocol(const char* clusterFilePath) override;
void setNetworkOption(FDBNetworkOptions::Option option, Optional<StringRef> value = Optional<StringRef>());
void setupNetwork();

View File

@ -182,6 +182,10 @@ description is not currently required but encouraged.
<Option name="transaction_include_port_in_address" code="505"
description="Addresses returned by get_addresses_for_key include the port when enabled. As of api version 630, this option is enabled by default and setting this has no effect."
defaultFor="23"/>
<Option name="transaction_trace_enable" code="600"
description="Enable tracing for all transactions. This is the default." />
<Option name="transaction_trace_disable" code="601"
description="Disable tracing for all transactions." />
</Scope>
<Scope name="TransactionOption">

View File

@ -50,7 +50,10 @@ if(NOT WIN32)
endif()
add_library(thirdparty STATIC ${FDBRPC_THIRD_PARTY_SRCS})
if(NOT WIN32)
if(WIN32)
target_compile_definitions(thirdparty PRIVATE USE_FIBERS)
else()
target_compile_definitions(thirdparty PRIVATE USE_UCONTEXT)
target_compile_options(thirdparty BEFORE PRIVATE -w) # disable warnings for third party
endif()
if(USE_VALGRIND)

View File

@ -20,6 +20,7 @@
// Unit tests for the flow language and libraries
#include "flow/ProtocolVersion.h"
#include "flow/UnitTest.h"
#include "flow/DeterministicRandom.h"
#include "flow/IThreadPool.h"
@ -249,6 +250,7 @@ struct YieldMockNetwork final : INetwork, ReferenceCounted<YieldMockNetwork> {
void setCurrentTask(TaskPriority taskID) override { baseNetwork->setCurrentTask(taskID); }
double now() const override { return baseNetwork->now(); }
double timer() override { return baseNetwork->timer(); }
double timer_monotonic() override { return baseNetwork->timer_monotonic(); }
void stop() override { return baseNetwork->stop(); }
void addStopCallback(std::function<void()> fn) override {
ASSERT(false);
@ -280,6 +282,9 @@ struct YieldMockNetwork final : INetwork, ReferenceCounted<YieldMockNetwork> {
static TLSConfig emptyConfig;
return emptyConfig;
}
ProtocolVersion protocolVersion() override {
return baseNetwork->protocolVersion();
}
};
struct NonserializableThing {};

View File

@ -18,8 +18,11 @@
* limitations under the License.
*/
#include "fdbclient/CoordinationInterface.h"
#include "fdbrpc/FlowTransport.h"
#include "flow/network.h"
#include <cstdint>
#include <unordered_map>
#if VALGRIND
#include <memcheck.h>
@ -38,19 +41,21 @@
#include "flow/TDMetric.actor.h"
#include "flow/ObjectSerializer.h"
#include "flow/ProtocolVersion.h"
#include "flow/UnitTest.h"
#include "flow/actorcompiler.h" // This must be the last #include.
static NetworkAddressList g_currentDeliveryPeerAddress = NetworkAddressList();
const UID WLTOKEN_ENDPOINT_NOT_FOUND(-1, 0);
const UID WLTOKEN_PING_PACKET(-1, 1);
const UID TOKEN_IGNORE_PACKET(0, 2);
constexpr UID WLTOKEN_ENDPOINT_NOT_FOUND(-1, 0);
constexpr UID WLTOKEN_PING_PACKET(-1, 1);
constexpr int PACKET_LEN_WIDTH = sizeof(uint32_t);
const uint64_t TOKEN_STREAM_FLAG = 1;
class EndpointMap : NonCopyable {
public:
EndpointMap();
// Reserve space for this many wellKnownEndpoints
explicit EndpointMap(int wellKnownEndpointCount);
void insertWellKnown(NetworkMessageReceiver* r, const Endpoint::Token& token, TaskPriority priority);
void insert( NetworkMessageReceiver* r, Endpoint::Token& token, TaskPriority priority );
const Endpoint& insert( NetworkAddressList localAddresses, std::vector<std::pair<FlowReceiver*, TaskPriority>> const& streams );
NetworkMessageReceiver* get( Endpoint::Token const& token );
@ -65,17 +70,16 @@ private:
uint64_t uid[2]; // priority packed into lower 32 bits; actual lower 32 bits of token are the index in data[]
uint32_t nextFree;
};
NetworkMessageReceiver* receiver;
NetworkMessageReceiver* receiver = nullptr;
Endpoint::Token& token() { return *(Endpoint::Token*)uid; }
};
int wellKnownEndpointCount;
std::vector<Entry> data;
uint32_t firstFree;
};
EndpointMap::EndpointMap()
: firstFree(-1)
{
}
EndpointMap::EndpointMap(int wellKnownEndpointCount)
: wellKnownEndpointCount(wellKnownEndpointCount), data(wellKnownEndpointCount), firstFree(-1) {}
void EndpointMap::realloc() {
int oldSize = data.size();
@ -88,6 +92,14 @@ void EndpointMap::realloc() {
firstFree = oldSize;
}
void EndpointMap::insertWellKnown(NetworkMessageReceiver* r, const Endpoint::Token& token, TaskPriority priority) {
int index = token.second();
ASSERT(data[index].receiver == nullptr);
data[index].receiver = r;
data[index].token() =
Endpoint::Token(token.first(), (token.second() & 0xffffffff00000000LL) | static_cast<uint32_t>(priority));
}
void EndpointMap::insert( NetworkMessageReceiver* r, Endpoint::Token& token, TaskPriority priority ) {
if (firstFree == uint32_t(-1)) realloc();
int index = firstFree;
@ -135,6 +147,9 @@ const Endpoint& EndpointMap::insert( NetworkAddressList localAddresses, std::vec
NetworkMessageReceiver* EndpointMap::get( Endpoint::Token const& token ) {
uint32_t index = token.second();
if (index < wellKnownEndpointCount && data[index].receiver == nullptr) {
TraceEvent(SevWarnAlways, "WellKnownEndpointNotAdded").detail("Token", token);
}
if ( index < data.size() && data[index].token().first() == token.first() && ((data[index].token().second()&0xffffffff00000000LL)|index)==token.second() )
return data[index].receiver;
return 0;
@ -147,9 +162,13 @@ TaskPriority EndpointMap::getPriority( Endpoint::Token const& token ) {
return TaskPriority::UnknownEndpoint;
}
void EndpointMap::remove( Endpoint::Token const& token, NetworkMessageReceiver* r ) {
void EndpointMap::remove(Endpoint::Token const& token, NetworkMessageReceiver* r) {
uint32_t index = token.second();
if ( index < data.size() && data[index].token().first() == token.first() && ((data[index].token().second()&0xffffffff00000000LL)|index)==token.second() && data[index].receiver == r ) {
if (index < wellKnownEndpointCount) {
data[index].receiver = nullptr;
} else if (index < data.size() && data[index].token().first() == token.first() &&
((data[index].token().second() & 0xffffffff00000000LL) | index) == token.second() &&
data[index].receiver == r) {
data[index].receiver = 0;
data[index].nextFree = firstFree;
firstFree = index;
@ -158,11 +177,9 @@ void EndpointMap::remove( Endpoint::Token const& token, NetworkMessageReceiver*
struct EndpointNotFoundReceiver final : NetworkMessageReceiver {
EndpointNotFoundReceiver(EndpointMap& endpoints) {
//endpoints[WLTOKEN_ENDPOINT_NOT_FOUND] = this;
Endpoint::Token e = WLTOKEN_ENDPOINT_NOT_FOUND;
endpoints.insert(this, e, TaskPriority::DefaultEndpoint);
ASSERT( e == WLTOKEN_ENDPOINT_NOT_FOUND );
endpoints.insertWellKnown(this, WLTOKEN_ENDPOINT_NOT_FOUND, TaskPriority::DefaultEndpoint);
}
void receive(ArenaObjectReader& reader) override {
// Remote machine tells us it doesn't have endpoint e
Endpoint e;
@ -173,9 +190,7 @@ struct EndpointNotFoundReceiver final : NetworkMessageReceiver {
struct PingReceiver final : NetworkMessageReceiver {
PingReceiver(EndpointMap& endpoints) {
Endpoint::Token e = WLTOKEN_PING_PACKET;
endpoints.insert(this, e, TaskPriority::ReadSocket);
ASSERT( e == WLTOKEN_PING_PACKET );
endpoints.insertWellKnown(this, WLTOKEN_PING_PACKET, TaskPriority::ReadSocket);
}
void receive(ArenaObjectReader& reader) override {
ReplyPromise<Void> reply;
@ -214,11 +229,9 @@ public:
Reference<AsyncVar<bool>> degraded;
bool warnAlwaysForLargePacket;
// These declarations must be in exactly this order
EndpointMap endpoints;
EndpointNotFoundReceiver endpointNotFoundReceiver;
PingReceiver pingReceiver;
// End ordered declarations
EndpointNotFoundReceiver endpointNotFoundReceiver{ endpoints };
PingReceiver pingReceiver{ endpoints };
Int64MetricHandle bytesSent;
Int64MetricHandle countPacketsReceived;
@ -251,18 +264,36 @@ ACTOR Future<Void> pingLatencyLogger(TransportData* self) {
if(!peer) {
TraceEvent(SevWarnAlways, "MissingNetworkAddress").suppressFor(10.0).detail("PeerAddr", lastAddress);
}
if (peer->lastLoggedTime <= 0.0) {
peer->lastLoggedTime = peer->lastConnectTime;
}
if(peer && peer->pingLatencies.getPopulationSize() >= 10) {
TraceEvent("PingLatency")
.detail("PeerAddr", lastAddress)
.detail("MinLatency", peer->pingLatencies.min())
.detail("MaxLatency", peer->pingLatencies.max())
.detail("MeanLatency", peer->pingLatencies.mean())
.detail("MedianLatency", peer->pingLatencies.median())
.detail("P90Latency", peer->pingLatencies.percentile(0.90))
.detail("Count", peer->pingLatencies.getPopulationSize())
.detail("BytesReceived", peer->bytesReceived - peer->lastLoggedBytesReceived)
.detail("BytesSent", peer->bytesSent - peer->lastLoggedBytesSent);
.detail("Elapsed", now() - peer->lastLoggedTime)
.detail("PeerAddr", lastAddress)
.detail("MinLatency", peer->pingLatencies.min())
.detail("MaxLatency", peer->pingLatencies.max())
.detail("MeanLatency", peer->pingLatencies.mean())
.detail("MedianLatency", peer->pingLatencies.median())
.detail("P90Latency", peer->pingLatencies.percentile(0.90))
.detail("Count", peer->pingLatencies.getPopulationSize())
.detail("BytesReceived", peer->bytesReceived - peer->lastLoggedBytesReceived)
.detail("BytesSent", peer->bytesSent - peer->lastLoggedBytesSent)
.detail("ConnectOutgoingCount", peer->connectOutgoingCount)
.detail("ConnectIncomingCount", peer->connectIncomingCount)
.detail("ConnectFailedCount", peer->connectFailedCount)
.detail("ConnectMinLatency", peer->connectLatencies.min())
.detail("ConnectMaxLatency", peer->connectLatencies.max())
.detail("ConnectMeanLatency", peer->connectLatencies.mean())
.detail("ConnectMedianLatency", peer->connectLatencies.median())
.detail("ConnectP90Latency", peer->connectLatencies.percentile(0.90));
peer->lastLoggedTime = now();
peer->connectOutgoingCount = 0;
peer->connectIncomingCount = 0;
peer->connectFailedCount = 0;
peer->pingLatencies.clear();
peer->connectLatencies.clear();
peer->lastLoggedBytesReceived = peer->bytesReceived;
peer->lastLoggedBytesSent = peer->bytesSent;
wait(delay(FLOW_KNOBS->PING_LOGGING_INTERVAL));
@ -276,7 +307,8 @@ ACTOR Future<Void> pingLatencyLogger(TransportData* self) {
}
TransportData::TransportData(uint64_t transportId)
: endpointNotFoundReceiver(endpoints),
: endpoints(/*wellKnownTokenCount*/ 11),
endpointNotFoundReceiver(endpoints),
pingReceiver(endpoints),
warnAlwaysForLargePacket(true),
lastIncompatibleMessage(0),
@ -558,6 +590,7 @@ ACTOR Future<Void> connectionKeeper( Reference<Peer> self,
.detail("FailureStatus", IFailureMonitor::failureMonitor().getState(self->destination).isAvailable()
? "OK"
: "FAILED");
++self->connectOutgoingCount;
try {
choose {
@ -565,6 +598,10 @@ ACTOR Future<Void> connectionKeeper( Reference<Peer> self,
wait(INetworkConnections::net()->connect(self->destination))) {
conn = _conn;
wait(conn->connectHandshake());
self->connectLatencies.addSample(now() - self->lastConnectTime);
if (FlowTransport::isClient()) {
IFailureMonitor::failureMonitor().setStatus(self->destination, FailureStatus(false));
}
if (self->unsent.empty()) {
delayedHealthUpdateF = delayedHealthUpdate(self->destination);
choose {
@ -588,8 +625,9 @@ ACTOR Future<Void> connectionKeeper( Reference<Peer> self,
throw connection_failed();
}
}
} catch (Error& e) {
if (e.code() != error_code_connection_failed) {
} catch(Error &e) {
++self->connectFailedCount;
if(e.code() != error_code_connection_failed) {
throw;
}
TraceEvent("ConnectionTimedOut", conn ? conn->getDebugID() : UID())
@ -719,7 +757,8 @@ Peer::Peer(TransportData* transport, NetworkAddress const& destination)
reconnectionDelay(FLOW_KNOBS->INITIAL_RECONNECTION_TIME), compatible(true), outstandingReplies(0),
incompatibleProtocolVersionNewer(false), peerReferences(-1), bytesReceived(0), lastDataPacketSentTime(now()),
pingLatencies(destination.isPublic() ? FLOW_KNOBS->PING_SAMPLE_AMOUNT : 1), lastLoggedBytesReceived(0),
bytesSent(0), lastLoggedBytesSent(0) {
bytesSent(0), lastLoggedBytesSent(0), lastLoggedTime(0.0), connectOutgoingCount(0), connectIncomingCount(0),
connectFailedCount(0), connectLatencies(destination.isPublic() ? FLOW_KNOBS->NETWORK_CONNECT_SAMPLE_AMOUNT : 1) {
IFailureMonitor::failureMonitor().setStatus(destination, FailureStatus(false));
}
@ -745,7 +784,7 @@ void Peer::prependConnectPacket() {
}
pkt.connectPacketLength = sizeof(pkt) - sizeof(pkt.connectPacketLength);
pkt.protocolVersion = currentProtocolVersion;
pkt.protocolVersion = g_network->protocolVersion();
pkt.protocolVersion.addObjectSerializerFlag();
pkt.connectionId = transport->transportId;
@ -770,6 +809,7 @@ void Peer::discardUnreliablePackets() {
void Peer::onIncomingConnection( Reference<Peer> self, Reference<IConnection> conn, Future<Void> reader ) {
// In case two processes are trying to connect to each other simultaneously, the process with the larger canonical NetworkAddress
// gets to keep its outgoing connection.
++self->connectIncomingCount;
if ( !destination.isPublic() && !outgoingConnectionIdle ) throw address_in_use();
NetworkAddress compatibleAddr = transport->localAddresses.address;
if(transport->localAddresses.secondaryAddress.present() && transport->localAddresses.secondaryAddress.get().isTLS() == destination.isTLS()) {
@ -809,6 +849,15 @@ TransportData::~TransportData() {
}
}
static bool checkCompatible(const PeerCompatibilityPolicy& policy, ProtocolVersion version) {
switch (policy.requirement) {
case RequirePeer::Exactly:
return version.version() == policy.version.version();
case RequirePeer::AtLeast:
return version.version() >= policy.version.version();
}
}
ACTOR static void deliver(TransportData* self, Endpoint destination, ArenaReader reader, bool inReadSocket) {
TaskPriority priority = self->endpoints.getPriority(destination.token);
if (priority < TaskPriority::ReadSocket || !inReadSocket) {
@ -819,6 +868,9 @@ ACTOR static void deliver(TransportData* self, Endpoint destination, ArenaReader
auto receiver = self->endpoints.get(destination.token);
if (receiver) {
if (!checkCompatible(receiver->peerCompatibilityPolicy(), reader.protocolVersion())) {
return;
}
try {
g_currentDeliveryPeerAddress = destination.addresses;
StringRef data = reader.arenaReadAll();
@ -864,11 +916,11 @@ static void scanPackets(TransportData* transport, uint8_t*& unprocessed_begin, c
//Retrieve packet length and checksum
if (checksumEnabled) {
if (e-p < sizeof(uint32_t) * 2) break;
packetLen = *(uint32_t*)p; p += sizeof(uint32_t);
packetLen = *(uint32_t*)p; p += PACKET_LEN_WIDTH;
packetChecksum = *(uint32_t*)p; p += sizeof(uint32_t);
} else {
if (e-p < sizeof(uint32_t)) break;
packetLen = *(uint32_t*)p; p += sizeof(uint32_t);
packetLen = *(uint32_t*)p; p += PACKET_LEN_WIDTH;
}
if (packetLen > FLOW_KNOBS->PACKET_LIMIT) {
@ -919,7 +971,9 @@ static void scanPackets(TransportData* transport, uint8_t*& unprocessed_begin, c
#if VALGRIND
VALGRIND_CHECK_MEM_IS_DEFINED(p, packetLen);
#endif
ArenaReader reader(arena, StringRef(p, packetLen), AssumeVersion(currentProtocolVersion));
// remove object serializer flag to account for flat buffer
peerProtocolVersion.removeObjectSerializerFlag();
ArenaReader reader(arena, StringRef(p, packetLen), AssumeVersion(peerProtocolVersion));
UID token;
reader >> token;
@ -946,9 +1000,9 @@ static void scanPackets(TransportData* transport, uint8_t*& unprocessed_begin, c
// Given unprocessed buffer [begin, end), check if next packet size is known and return
// enough size for the next packet, whose format is: {size, optional_checksum, data} +
// next_packet_size.
static int getNewBufferSize(const uint8_t* begin, const uint8_t* end, const NetworkAddress& peerAddress) {
static int getNewBufferSize(const uint8_t* begin, const uint8_t* end, const NetworkAddress& peerAddress, ProtocolVersion peerProtocolVersion) {
const int len = end - begin;
if (len < sizeof(uint32_t)) {
if (len < PACKET_LEN_WIDTH) {
return FLOW_KNOBS->MIN_PACKET_BUFFER_BYTES;
}
const uint32_t packetLen = *(uint32_t*)begin;
@ -991,7 +1045,7 @@ ACTOR static Future<Void> connectionReader(
if (readAllBytes < FLOW_KNOBS->MIN_PACKET_BUFFER_FREE_BYTES) {
Arena newArena;
const int unproc_len = unprocessed_end - unprocessed_begin;
const int len = getNewBufferSize(unprocessed_begin, unprocessed_end, peerAddress);
const int len = getNewBufferSize(unprocessed_begin, unprocessed_end, peerAddress, peerProtocolVersion);
uint8_t* const newBuffer = new (newArena) uint8_t[ len ];
if (unproc_len > 0) {
memcpy(newBuffer, unprocessed_begin, unproc_len);
@ -1030,8 +1084,8 @@ ACTOR static Future<Void> connectionReader(
uint64_t connectionId = pkt.connectionId;
if (!pkt.protocolVersion.hasObjectSerializerFlag() ||
!pkt.protocolVersion.isCompatible(currentProtocolVersion)) {
incompatibleProtocolVersionNewer = pkt.protocolVersion > currentProtocolVersion;
!pkt.protocolVersion.isCompatible(g_network->protocolVersion())) {
incompatibleProtocolVersionNewer = pkt.protocolVersion > g_network->protocolVersion();
NetworkAddress addr = pkt.canonicalRemotePort
? NetworkAddress(pkt.canonicalRemoteIp(), pkt.canonicalRemotePort)
: conn->getPeerAddress();
@ -1041,9 +1095,8 @@ ACTOR static Future<Void> connectionReader(
if(now() - transport->lastIncompatibleMessage > FLOW_KNOBS->CONNECTION_REJECTED_MESSAGE_DELAY) {
TraceEvent(SevWarn, "ConnectionRejected", conn->getDebugID())
.detail("Reason", "IncompatibleProtocolVersion")
.detail("LocalVersion", currentProtocolVersion.version())
.detail("LocalVersion", g_network->protocolVersion().version())
.detail("RejectedVersion", pkt.protocolVersion.version())
.detail("VersionMask", ProtocolVersion::compatibleProtocolVersionMask)
.detail("Peer", pkt.canonicalRemotePort ? NetworkAddress(pkt.canonicalRemoteIp(), pkt.canonicalRemotePort)
: conn->getPeerAddress())
.detail("ConnectionId", connectionId);
@ -1055,7 +1108,6 @@ ACTOR static Future<Void> connectionReader(
} else if(connectionId > 1) {
transport->multiVersionConnections[connectionId] = now() + FLOW_KNOBS->CONNECTION_ID_TIMEOUT;
}
compatible = false;
if(!protocolVersion.hasMultiVersionClient()) {
// Older versions expected us to hang up. It may work even if we don't hang up here, but it's safer to keep the old behavior.
@ -1107,7 +1159,7 @@ ACTOR static Future<Void> connectionReader(
}
}
}
if (compatible) {
if (compatible || peerProtocolVersion.hasStableInterfaces()) {
scanPackets( transport, unprocessed_begin, unprocessed_end, arena, peerAddress, peerProtocolVersion );
}
else if(!expectConnectPacket) {
@ -1338,10 +1390,8 @@ void FlowTransport::removeEndpoint( const Endpoint& endpoint, NetworkMessageRece
void FlowTransport::addWellKnownEndpoint( Endpoint& endpoint, NetworkMessageReceiver* receiver, TaskPriority taskID ) {
endpoint.addresses = self->localAddresses;
ASSERT( ((endpoint.token.first() & TOKEN_STREAM_FLAG)!=0) == receiver->isStream() );
Endpoint::Token otoken = endpoint.token;
self->endpoints.insert( receiver, endpoint.token, taskID );
ASSERT( endpoint.token == otoken );
ASSERT(receiver->isStream());
self->endpoints.insertWellKnown(receiver, endpoint.token, taskID);
}
static void sendLocal( TransportData* self, ISerializeSource const& what, const Endpoint& destination ) {
@ -1349,7 +1399,7 @@ static void sendLocal( TransportData* self, ISerializeSource const& what, const
// SOMEDAY: Would it be better to avoid (de)serialization by doing this check in flow?
Standalone<StringRef> copy;
ObjectWriter wr(AssumeVersion(currentProtocolVersion));
ObjectWriter wr(AssumeVersion(g_network->protocolVersion()));
what.serializeObjectWriter(wr);
copy = wr.toStringRef();
#if VALGRIND
@ -1357,7 +1407,7 @@ static void sendLocal( TransportData* self, ISerializeSource const& what, const
#endif
ASSERT(copy.size() > 0);
deliver(self, destination, ArenaReader(copy.arena(), copy, AssumeVersion(currentProtocolVersion)), false);
deliver(self, destination, ArenaReader(copy.arena(), copy, AssumeVersion(g_network->protocolVersion())), false);
}
static ReliablePacket* sendPacket(TransportData* self, Reference<Peer> peer, ISerializeSource const& what,
@ -1379,12 +1429,12 @@ static ReliablePacket* sendPacket(TransportData* self, Reference<Peer> peer, ISe
int prevBytesWritten = pb->bytes_written;
PacketBuffer* checksumPb = pb;
PacketWriter wr(pb,rp,AssumeVersion(currentProtocolVersion)); // SOMEDAY: Can we downgrade to talk to older peers?
PacketWriter wr(pb,rp,AssumeVersion(g_network->protocolVersion())); // SOMEDAY: Can we downgrade to talk to older peers?
// Reserve some space for packet length and checksum, write them after serializing data
SplitBuffer packetInfoBuffer;
uint32_t len, checksum = 0;
int packetInfoSize = sizeof(len);
int packetInfoSize = PACKET_LEN_WIDTH;
if (checksumEnabled) {
packetInfoSize += sizeof(checksum);
}

View File

@ -27,6 +27,7 @@
#include "flow/genericactors.actor.h"
#include "flow/network.h"
#include "flow/FileIdentifier.h"
#include "flow/ProtocolVersion.h"
#include "flow/Net2Packet.h"
#include "fdbrpc/ContinuousSample.h"
@ -116,11 +117,21 @@ namespace std
};
}
enum class RequirePeer { Exactly, AtLeast };
struct PeerCompatibilityPolicy {
RequirePeer requirement;
ProtocolVersion version;
};
class ArenaObjectReader;
class NetworkMessageReceiver {
public:
virtual void receive(ArenaObjectReader&) = 0;
virtual bool isStream() const { return false; }
virtual PeerCompatibilityPolicy peerCompatibilityPolicy() const {
return { RequirePeer::Exactly, g_network->protocolVersion() };
}
};
struct TransportData;
@ -145,8 +156,14 @@ struct Peer : public ReferenceCounted<Peer> {
double lastDataPacketSentTime;
int outstandingReplies;
ContinuousSample<double> pingLatencies;
double lastLoggedTime;
int64_t lastLoggedBytesReceived;
int64_t lastLoggedBytesSent;
// Cleared every time stats are logged for this peer.
int connectOutgoingCount;
int connectIncomingCount;
int connectFailedCount;
ContinuousSample<double> connectLatencies;
explicit Peer(TransportData* transport, NetworkAddress const& destination);

View File

@ -87,7 +87,9 @@ void CounterCollection::logToTraceEvent(TraceEvent &te) const {
}
}
ACTOR Future<Void> traceCounters(std::string traceEventName, UID traceEventID, double interval, CounterCollection* counters, std::string trackLatestName) {
ACTOR Future<Void> traceCounters(std::string traceEventName, UID traceEventID, double interval,
CounterCollection* counters, std::string trackLatestName,
std::function<void(TraceEvent&)> decorator) {
wait(delay(0)); // Give an opportunity for all members used in special counters to be initialized
for (ICounter* c : counters->counters)
@ -100,6 +102,7 @@ ACTOR Future<Void> traceCounters(std::string traceEventName, UID traceEventID, d
te.detail("Elapsed", now() - last_interval);
counters->logToTraceEvent(te);
decorator(te);
if (!trackLatestName.empty()) {
te.trackLatest(trackLatestName);

View File

@ -146,7 +146,9 @@ struct SpecialCounter final : ICounter, FastAllocated<SpecialCounter<F>>, NonCop
template <class F>
static void specialCounter(CounterCollection& collection, std::string const& name, F && f) { new SpecialCounter<F>(collection, name, std::move(f)); }
Future<Void> traceCounters(std::string const& traceEventName, UID const& traceEventID, double const& interval, CounterCollection* const& counters, std::string const& trackLatestName = std::string());
Future<Void> traceCounters(std::string const& traceEventName, UID const& traceEventID, double const& interval,
CounterCollection* const& counters, std::string const& trackLatestName = std::string(),
std::function<void(TraceEvent&)> const& decorator = [](TraceEvent& te) {});
class LatencyBands {
public:

View File

@ -621,7 +621,7 @@ void showArena( ArenaBlock* a, ArenaBlock* parent) {
}
void arenaTest() {
BinaryWriter wr(AssumeVersion(currentProtocolVersion));
BinaryWriter wr(AssumeVersion(g_network->protocolVersion()));
{
Arena arena;
VectorRef<StringRef> test;
@ -639,7 +639,7 @@ void arenaTest() {
{
Arena arena2;
VectorRef<StringRef> test2;
BinaryReader reader(wr.getData(),wr.getLength(), AssumeVersion(currentProtocolVersion));
BinaryReader reader(wr.getData(),wr.getLength(), AssumeVersion(g_network->protocolVersion()));
reader >> test2 >> arena2;
for(auto i = test2.begin(); i != test2.end(); ++i)

View File

@ -66,6 +66,12 @@ struct FlowReceiver : public NetworkMessageReceiver {
endpoint = e;
}
void setPeerCompatibilityPolicy(const PeerCompatibilityPolicy& policy) { peerCompatibilityPolicy_ = policy; }
PeerCompatibilityPolicy peerCompatibilityPolicy() const override {
return peerCompatibilityPolicy_.orDefault(NetworkMessageReceiver::peerCompatibilityPolicy());
}
void makeWellKnownEndpoint(Endpoint::Token token, TaskPriority taskID) {
ASSERT(!endpoint.isValid());
m_isLocalEndpoint = true;
@ -74,6 +80,7 @@ struct FlowReceiver : public NetworkMessageReceiver {
}
private:
Optional<PeerCompatibilityPolicy> peerCompatibilityPolicy_;
Endpoint endpoint;
bool m_isLocalEndpoint;
bool m_stream;
@ -117,6 +124,9 @@ public:
bool isSet() { return sav->isSet(); }
bool isValid() const { return sav != nullptr; }
ReplyPromise() : sav(new NetSAV<T>(0, 1)) {}
explicit ReplyPromise(const PeerCompatibilityPolicy& policy) : ReplyPromise() {
sav->setPeerCompatibilityPolicy(policy);
}
ReplyPromise(const ReplyPromise& rhs) : sav(rhs.sav) { sav->addPromiseRef(); }
ReplyPromise(ReplyPromise&& rhs) noexcept : sav(rhs.sav) { rhs.sav = 0; }
~ReplyPromise() { if (sav) sav->delPromiseRef(); }
@ -354,6 +364,9 @@ public:
FutureStream<T> getFuture() const { queue->addFutureRef(); return FutureStream<T>(queue); }
RequestStream() : queue(new NetNotifiedQueue<T>(0, 1)) {}
explicit RequestStream(PeerCompatibilityPolicy policy) : RequestStream() {
queue->setPeerCompatibilityPolicy(policy);
}
RequestStream(const RequestStream& rhs) : queue(rhs.queue) { queue->addPromiseRef(); }
RequestStream(RequestStream&& rhs) noexcept : queue(rhs.queue) { rhs.queue = 0; }
void operator=(const RequestStream& rhs) {

View File

@ -27,6 +27,7 @@
#include "flow/ActorCollection.h"
#include "flow/IRandom.h"
#include "flow/IThreadPool.h"
#include "flow/ProtocolVersion.h"
#include "flow/Util.h"
#include "fdbrpc/IAsyncFile.h"
#include "fdbrpc/AsyncFileCached.actor.h"
@ -92,10 +93,6 @@ void ISimulator::displayWorkers() const
return;
}
const UID TOKEN_ENDPOINT_NOT_FOUND(-1, -1);
ISimulator* g_pSimulator = 0;
thread_local ISimulator::ProcessInfo* ISimulator::currentProcess = 0;
int openCount = 0;
struct SimClogging {
@ -749,6 +746,8 @@ public:
return timerTime;
}
double timer_monotonic() override { return timer(); }
Future<class Void> delay(double seconds, TaskPriority taskID) override {
ASSERT(taskID >= TaskPriority::Min && taskID <= TaskPriority::Max);
return delay( seconds, taskID, currentProcess );
@ -999,8 +998,8 @@ public:
net2->run();
}
ProcessInfo* newProcess(const char* name, IPAddress ip, uint16_t port, bool sslEnabled, uint16_t listenPerProcess,
LocalityData locality, ProcessClass startingClass, const char* dataFolder,
const char* coordinationFolder) override {
LocalityData locality, ProcessClass startingClass, const char* dataFolder,
const char* coordinationFolder, ProtocolVersion protocol) override {
ASSERT( locality.machineId().present() );
MachineInfo& machine = machines[ locality.machineId().get() ];
if (!machine.machineId.present())
@ -1043,6 +1042,7 @@ public:
currentlyRebootingProcesses.erase(addresses.address);
m->excluded = g_simulator.isExcluded(NetworkAddress(ip, port, true, false));
m->cleared = g_simulator.isCleared(addresses.address);
m->protocolVersion = protocol;
m->setGlobal(enTDMetrics, (flowGlobalType) &m->tdmetrics);
m->setGlobal(enNetworkConnections, (flowGlobalType) m->network);
@ -1708,6 +1708,10 @@ public:
return Void();
return delay( 0, taskID, process->machine->machineProcess );
}
ProtocolVersion protocolVersion() override {
return getCurrentProcess()->protocolVersion;
}
//time is guarded by ISimulator::mutex. It is not necessary to guard reads on the main thread because
//time should only be modified from the main thread.
@ -1796,6 +1800,7 @@ public:
: id(deterministicRandom()->randomUniqueID()), process(g_simulator.getCurrentProcess()), peerAddress(peerAddress),
actors(false), _localAddress(localAddress) {
g_sim2.addressMap.emplace(_localAddress, process);
ASSERT(process->boundUDPSockets.find(localAddress) == process->boundUDPSockets.end());
process->boundUDPSockets.emplace(localAddress, this);
}
~UDPSimSocket() {
@ -1909,6 +1914,9 @@ Future<Reference<IUDPSocket>> Sim2::createUDPSocket(NetworkAddress toAddr) {
localAddress.ip = IPAddress(process->address.ip.toV4() + deterministicRandom()->randomInt(0, 256));
}
localAddress.port = deterministicRandom()->randomInt(40000, 60000);
while (process->boundUDPSockets.find(localAddress) != process->boundUDPSockets.end()) {
localAddress.port = deterministicRandom()->randomInt(40000, 60000);
}
return Reference<IUDPSocket>(new UDPSimSocket(localAddress, toAddr));
}

View File

@ -20,9 +20,11 @@
#ifndef FLOW_SIMULATOR_H
#define FLOW_SIMULATOR_H
#include "flow/ProtocolVersion.h"
#pragma once
#include "flow/flow.h"
#include "flow/Histogram.h"
#include "fdbrpc/FailureMonitor.h"
#include "fdbrpc/Locality.h"
#include "fdbrpc/IAsyncFile.h"
@ -54,6 +56,7 @@ public:
LocalityData locality;
ProcessClass startingClass;
TDMetricCollection tdmetrics;
HistogramRegistry histograms;
std::map<NetworkAddress, Reference<IListener>> listenerMap;
std::map<NetworkAddress, Reference<IUDPSocket>> boundUDPSockets;
bool failed;
@ -69,6 +72,8 @@ public:
UID uid;
ProtocolVersion protocolVersion;
ProcessInfo(const char* name, LocalityData locality, ProcessClass startingClass, NetworkAddressList addresses,
INetworkConnections* net, const char* dataFolder, const char* coordinationFolder)
: name(name), locality(locality), startingClass(startingClass), addresses(addresses),
@ -160,7 +165,7 @@ public:
virtual ProcessInfo* newProcess(const char* name, IPAddress ip, uint16_t port, bool sslEnabled, uint16_t listenPerProcess,
LocalityData locality, ProcessClass startingClass, const char* dataFolder,
const char* coordinationFolder) = 0;
const char* coordinationFolder, ProtocolVersion protocol) = 0;
virtual void killProcess( ProcessInfo* machine, KillType ) = 0;
virtual void rebootProcess(Optional<Standalone<StringRef>> zoneId, bool allProcesses ) = 0;
virtual void rebootProcess( ProcessInfo* process, KillType kt ) = 0;
@ -174,6 +179,8 @@ public:
virtual bool datacenterDead(Optional<Standalone<StringRef>> dcId) const = 0;
virtual void displayWorkers() const;
virtual ProtocolVersion protocolVersion() = 0;
virtual void addRole(NetworkAddress const& address, std::string const& role) {
roleAddresses[address][role] ++;
TraceEvent("RoleAdd").detail("Address", address).detail("Role", role).detail("NumRoles", roleAddresses[address].size()).detail("Value", roleAddresses[address][role]);
@ -325,6 +332,9 @@ public:
BackupAgentType backupAgents;
BackupAgentType drAgents;
bool hasDiffProtocolProcess; // true if simulator is testing a process with a different version
bool setDiffProtocol; // true if a process with a different protocol version has been started
virtual flowGlobalType global(int id) const { return getCurrentProcess()->global(id); };
virtual void setGlobal(size_t id, flowGlobalType v) { getCurrentProcess()->setGlobal(id,v); };

View File

@ -59,7 +59,7 @@ struct VersionedMessage {
}
}
ArenaReader reader(arena, message, AssumeVersion(currentProtocolVersion));
ArenaReader reader(arena, message, AssumeVersion(g_network->protocolVersion()));
// Return false for LogProtocolMessage and SpanContextMessage metadata messages.
if (LogProtocolMessage::isNextIn(reader)) return false;
@ -756,7 +756,7 @@ ACTOR Future<Void> saveMutationsToFile(BackupData* self, Version popVersion, int
const auto& subrange = range.range();
intersectionRange = mutationRange & subrange;
MutationRef subm(MutationRef::Type::ClearRange, intersectionRange.begin, intersectionRange.end);
BinaryWriter wr(AssumeVersion(currentProtocolVersion));
BinaryWriter wr(AssumeVersion(g_network->protocolVersion()));
wr << subm;
mutations.push_back(wr.toValue());
for (int index : range.value()) {

View File

@ -178,6 +178,7 @@ set(FDBSERVER_SRCS
workloads/Performance.actor.cpp
workloads/Ping.actor.cpp
workloads/PopulateTPCC.actor.cpp
workloads/ProtocolVersion.actor.cpp
workloads/PubSubMultiples.actor.cpp
workloads/QueuePush.actor.cpp
workloads/RandomClogging.actor.cpp

View File

@ -2883,7 +2883,7 @@ ACTOR Future<Void> dbInfoUpdater( ClusterControllerData* self ) {
dbInfoChange = self->db.serverInfo->onChange();
updateDBInfo = self->updateDBInfo.onTrigger();
req.serializedDbInfo = BinaryWriter::toValue(self->db.serverInfo->get(), AssumeVersion(currentProtocolVersion));
req.serializedDbInfo = BinaryWriter::toValue(self->db.serverInfo->get(), AssumeVersion(g_network->protocolVersion()));
TraceEvent("DBInfoStartBroadcast", self->id);
choose {

View File

@ -24,10 +24,13 @@
#include "fdbserver/WorkerInterface.actor.h"
#include "fdbserver/Status.h"
#include "flow/ActorCollection.h"
#include "flow/ProtocolVersion.h"
#include "flow/UnitTest.h"
#include "flow/IndexedSet.h"
#include "fdbclient/MonitorLeader.h"
#include "flow/actorcompiler.h" // This must be the last #include.
#include "flow/network.h"
#include <cstdint>
// This module implements coordinationServer() and the interfaces in CoordinationInterface.h
@ -42,17 +45,6 @@ struct GenerationRegVal {
}
};
// The order of UIDs here must match the order in which makeWellKnownEndpoint is called.
// UID WLTOKEN_CLIENTLEADERREG_GETLEADER( -1, 2 ); // from fdbclient/MonitorLeader.actor.cpp
// UID WLTOKEN_CLIENTLEADERREG_OPENDATABASE( -1, 3 ); // from fdbclient/MonitorLeader.actor.cpp
UID WLTOKEN_LEADERELECTIONREG_CANDIDACY( -1, 4 );
UID WLTOKEN_LEADERELECTIONREG_ELECTIONRESULT( -1, 5 );
UID WLTOKEN_LEADERELECTIONREG_LEADERHEARTBEAT( -1, 6 );
UID WLTOKEN_LEADERELECTIONREG_FORWARD( -1, 7 );
UID WLTOKEN_GENERATIONREG_READ( -1, 8 );
UID WLTOKEN_GENERATIONREG_WRITE( -1, 9 );
GenerationRegInterface::GenerationRegInterface( NetworkAddress remote )
: read( Endpoint({remote}, WLTOKEN_GENERATIONREG_READ) ),
write( Endpoint({remote}, WLTOKEN_GENERATIONREG_WRITE) )

View File

@ -24,6 +24,13 @@
#include "fdbclient/CoordinationInterface.h"
constexpr UID WLTOKEN_LEADERELECTIONREG_CANDIDACY(-1, 4);
constexpr UID WLTOKEN_LEADERELECTIONREG_ELECTIONRESULT(-1, 5);
constexpr UID WLTOKEN_LEADERELECTIONREG_LEADERHEARTBEAT(-1, 6);
constexpr UID WLTOKEN_LEADERELECTIONREG_FORWARD(-1, 7);
constexpr UID WLTOKEN_GENERATIONREG_READ(-1, 8);
constexpr UID WLTOKEN_GENERATIONREG_WRITE(-1, 9);
struct GenerationRegInterface {
constexpr static FileIdentifier file_identifier = 16726744;
RequestStream< struct GenerationRegReadRequest > read;

View File

@ -128,8 +128,10 @@ public:
vector<Reference<TCMachineInfo>> machines;
vector<Standalone<StringRef>> machineIDs;
vector<Reference<TCTeamInfo>> serverTeams;
UID id;
explicit TCMachineTeamInfo(vector<Reference<TCMachineInfo>> const& machines) : machines(machines) {
explicit TCMachineTeamInfo(vector<Reference<TCMachineInfo>> const& machines)
: machines(machines), id(deterministicRandom()->randomUniqueID()) {
machineIDs.reserve(machines.size());
for (int i = 0; i < machines.size(); i++) {
machineIDs.push_back(machines[i]->machineID);
@ -164,13 +166,15 @@ class TCTeamInfo final : public ReferenceCounted<TCTeamInfo>, public IDataDistri
bool healthy;
bool wrongConfiguration; //True if any of the servers in the team have the wrong configuration
int priority;
UID id;
public:
Reference<TCMachineTeamInfo> machineTeam;
Future<Void> tracker;
explicit TCTeamInfo(vector<Reference<TCServerInfo>> const& servers)
: servers(servers), healthy(true), priority(SERVER_KNOBS->PRIORITY_TEAM_HEALTHY), wrongConfiguration(false) {
: servers(servers), healthy(true), priority(SERVER_KNOBS->PRIORITY_TEAM_HEALTHY), wrongConfiguration(false),
id(deterministicRandom()->randomUniqueID()) {
if (servers.empty()) {
TraceEvent(SevInfo, "ConstructTCTeamFromEmptyServers");
}
@ -180,6 +184,8 @@ public:
}
}
std::string getTeamID() const override { return id.shortString(); }
vector<StorageServerInterface> getLastKnownServerInterfaces() const override {
vector<StorageServerInterface> v;
v.reserve(servers.size());
@ -623,6 +629,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
int highestUtilizationTeam;
AsyncTrigger printDetailedTeamsInfo;
PromiseStream<GetMetricsRequest> getShardMetrics;
void resetLocalitySet() {
storageServerSet = Reference<LocalitySet>(new LocalityMap<UID>());
@ -654,7 +661,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
DatabaseConfiguration configuration, std::vector<Optional<Key>> includedDCs,
Optional<std::vector<Optional<Key>>> otherTrackedDCs, Future<Void> readyToStart,
Reference<AsyncVar<bool>> zeroHealthyTeams, bool primary,
Reference<AsyncVar<bool>> processingUnhealthy)
Reference<AsyncVar<bool>> processingUnhealthy, PromiseStream<GetMetricsRequest> getShardMetrics)
: cx(cx), distributorId(distributorId), lock(lock), output(output),
shardsAffectedByTeamFailure(shardsAffectedByTeamFailure), doBuildTeams(true), lastBuildTeamsFailed(false),
teamBuilder(Void()), badTeamRemover(Void()), checkInvalidLocalities(Void()), wrongStoreTypeRemover(Void()), configuration(configuration),
@ -666,8 +673,10 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
initializationDoneActor(logOnCompletion(readyToStart && initialFailureReactionDelay, this)),
optimalTeamCount(0), recruitingStream(0), restartRecruiting(SERVER_KNOBS->DEBOUNCE_RECRUITING_DELAY),
unhealthyServers(0), includedDCs(includedDCs), otherTrackedDCs(otherTrackedDCs),
zeroHealthyTeams(zeroHealthyTeams), zeroOptimalTeams(true), primary(primary), medianAvailableSpace(SERVER_KNOBS->MIN_AVAILABLE_SPACE_RATIO),
lastMedianAvailableSpaceUpdate(0), processingUnhealthy(processingUnhealthy), lowestUtilizationTeam(0), highestUtilizationTeam(0) {
zeroHealthyTeams(zeroHealthyTeams), zeroOptimalTeams(true), primary(primary),
medianAvailableSpace(SERVER_KNOBS->MIN_AVAILABLE_SPACE_RATIO), lastMedianAvailableSpaceUpdate(0),
processingUnhealthy(processingUnhealthy), lowestUtilizationTeam(0), highestUtilizationTeam(0),
getShardMetrics(getShardMetrics) {
if(!primary || configuration.usableRegions == 1) {
TraceEvent("DDTrackerStarting", distributorId)
.detail( "State", "Inactive" )
@ -1403,7 +1412,8 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
.detail("TeamIndex", i++)
.detail("Healthy", team->isHealthy())
.detail("TeamSize", team->size())
.detail("MemberIDs", team->getServerIDsStr());
.detail("MemberIDs", team->getServerIDsStr())
.detail("TeamID", team->getTeamID());
}
}
@ -2161,7 +2171,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
.detail("Primary", primary)
.detail("AddedTeams", 0)
.detail("TeamsToBuild", 0)
.detail("CurrentTeams", teams.size())
.detail("CurrentServerTeams", teams.size())
.detail("DesiredTeams", desiredServerTeams)
.detail("MaxTeams", maxServerTeams)
.detail("StorageTeamSize", configuration.storageTeamSize)
@ -2210,11 +2220,11 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
}
}
uniqueMachines = machines.size();
TraceEvent("BuildTeams")
.detail("ServerCount", self->server_info.size())
.detail("UniqueMachines", uniqueMachines)
.detail("Primary", self->primary)
.detail("StorageTeamSize", self->configuration.storageTeamSize);
TraceEvent("BuildTeams", self->distributorId)
.detail("ServerCount", self->server_info.size())
.detail("UniqueMachines", uniqueMachines)
.detail("Primary", self->primary)
.detail("StorageTeamSize", self->configuration.storageTeamSize);
// If there are too few machines to even build teams or there are too few represented datacenters, build no new teams
if( uniqueMachines >= self->configuration.storageTeamSize ) {
@ -2241,11 +2251,11 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
.detail("TeamsToBuild", teamsToBuild)
.detail("DesiredTeams", desiredTeams)
.detail("MaxTeams", maxTeams)
.detail("BadTeams", self->badTeams.size())
.detail("BadServerTeams", self->badTeams.size())
.detail("UniqueMachines", uniqueMachines)
.detail("TeamSize", self->configuration.storageTeamSize)
.detail("Servers", serverCount)
.detail("CurrentTrackedTeams", self->teams.size())
.detail("CurrentTrackedServerTeams", self->teams.size())
.detail("HealthyTeamCount", teamCount)
.detail("TotalTeamCount", totalTeamCount)
.detail("MachineTeamCount", self->machineTeams.size())
@ -2262,9 +2272,9 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
int addedTeams = self->addTeamsBestOf(teamsToBuild, desiredTeams, maxTeams);
if (addedTeams <= 0 && self->teams.size() == 0) {
TraceEvent(SevWarn, "NoTeamAfterBuildTeam")
.detail("TeamNum", self->teams.size())
.detail("Debug", "Check information below");
TraceEvent(SevWarn, "NoTeamAfterBuildTeam", self->distributorId)
.detail("ServerTeamNum", self->teams.size())
.detail("Debug", "Check information below");
// Debug: set true for traceAllInfo() to print out more information
self->traceAllInfo();
}
@ -2282,7 +2292,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
.detail("Primary", self->primary)
.detail("AddedTeams", 0)
.detail("TeamsToBuild", teamsToBuild)
.detail("CurrentTeams", self->teams.size())
.detail("CurrentServerTeams", self->teams.size())
.detail("DesiredTeams", desiredTeams)
.detail("MaxTeams", maxTeams)
.detail("StorageTeamSize", self->configuration.storageTeamSize)
@ -2323,9 +2333,9 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
}
TraceEvent(SevWarn, "NoHealthyTeams", distributorId)
.detail("CurrentTeamCount", teams.size())
.detail("ServerCount", server_info.size())
.detail("NonFailedServerCount", desiredServerSet.size());
.detail("CurrentServerTeamCount", teams.size())
.detail("ServerCount", server_info.size())
.detail("NonFailedServerCount", desiredServerSet.size());
}
bool shouldHandleServer(const StorageServerInterface &newServer) {
@ -2358,7 +2368,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
}
bool removeTeam( Reference<TCTeamInfo> team ) {
TraceEvent("RemovedTeam", distributorId).detail("Team", team->getDesc());
TraceEvent("RemovedServerTeam", distributorId).detail("Team", team->getDesc());
bool found = false;
for(int t=0; t<teams.size(); t++) {
if( teams[t] == team ) {
@ -2552,9 +2562,10 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
int removedCount = 0;
for (int t = 0; t < teams.size(); t++) {
if ( std::count( teams[t]->getServerIDs().begin(), teams[t]->getServerIDs().end(), removedServer ) ) {
TraceEvent("TeamRemoved")
TraceEvent("ServerTeamRemoved")
.detail("Primary", primary)
.detail("TeamServerIDs", teams[t]->getServerIDsStr());
.detail("TeamServerIDs", teams[t]->getServerIDsStr())
.detail("TeamID", teams[t]->getTeamID());
// removeTeam also needs to remove the team from the machine team info.
removeTeam(teams[t]);
t--;
@ -2627,8 +2638,8 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
restartTeamBuilder.trigger();
TraceEvent("DataDistributionTeamCollectionUpdate", distributorId)
.detail("Teams", teams.size())
.detail("BadTeams", badTeams.size())
.detail("ServerTeams", teams.size())
.detail("BadServerTeams", badTeams.size())
.detail("Servers", allServers.size())
.detail("Machines", machine_info.size())
.detail("MachineTeams", machineTeams.size())
@ -2926,7 +2937,7 @@ ACTOR Future<Void> removeBadTeams(DDTeamCollection* self) {
wait(self->initialFailureReactionDelay);
wait(waitUntilHealthy(self));
wait(self->addSubsetComplete.getFuture());
TraceEvent("DDRemovingBadTeams", self->distributorId).detail("Primary", self->primary);
TraceEvent("DDRemovingBadServerTeams", self->distributorId).detail("Primary", self->primary);
for(auto it : self->badTeams) {
it->tracker.cancel();
}
@ -3040,9 +3051,9 @@ ACTOR Future<Void> machineTeamRemover(DDTeamCollection* self) {
// Check if a server will have 0 team after the team is removed
for (auto& s : team->getServers()) {
if (s->teams.size() == 0) {
TraceEvent(SevError, "TeamRemoverTooAggressive")
TraceEvent(SevError, "MachineTeamRemoverTooAggressive", self->distributorId)
.detail("Server", s->id)
.detail("Team", team->getServerIDsStr());
.detail("ServerTeam", team->getDesc());
self->traceAllInfo(true);
}
}
@ -3065,6 +3076,7 @@ ACTOR Future<Void> machineTeamRemover(DDTeamCollection* self) {
}
TraceEvent("MachineTeamRemover", self->distributorId)
.detail("MachineTeamIDToRemove", mt->id.shortString())
.detail("MachineTeamToRemove", mt->getMachineIDsStr())
.detail("NumProcessTeamsOnTheMachineTeam", minNumProcessTeams)
.detail("CurrentMachineTeams", self->machineTeams.size())
@ -3080,7 +3092,7 @@ ACTOR Future<Void> machineTeamRemover(DDTeamCollection* self) {
} else {
if (numMachineTeamRemoved > 0) {
// Only trace the information when we remove a machine team
TraceEvent("TeamRemoverDone")
TraceEvent("MachineTeamRemoverDone", self->distributorId)
.detail("HealthyMachines", healthyMachineCount)
// .detail("CurrentHealthyMachineTeams", currentHealthyMTCount)
.detail("CurrentMachineTeams", self->machineTeams.size())
@ -3144,6 +3156,7 @@ ACTOR Future<Void> serverTeamRemover(DDTeamCollection* self) {
TraceEvent("ServerTeamRemover", self->distributorId)
.detail("ServerTeamToRemove", st->getServerIDsStr())
.detail("ServerTeamID", st->getTeamID())
.detail("NumProcessTeamsOnTheServerTeam", maxNumProcessTeams)
.detail("CurrentServerTeams", self->teams.size())
.detail("DesiredServerTeams", desiredServerTeams);
@ -3163,6 +3176,35 @@ ACTOR Future<Void> serverTeamRemover(DDTeamCollection* self) {
}
}
ACTOR Future<Void> zeroServerLeftLogger_impl(DDTeamCollection* self, Reference<TCTeamInfo> team) {
wait(delay(SERVER_KNOBS->DD_TEAM_ZERO_SERVER_LEFT_LOG_DELAY));
state vector<KeyRange> shards = self->shardsAffectedByTeamFailure->getShardsFor(
ShardsAffectedByTeamFailure::Team(team->getServerIDs(), self->primary));
state std::vector<Future<StorageMetrics>> sizes;
sizes.reserve(shards.size());
for (auto const& shard : shards) {
sizes.emplace_back(brokenPromiseToNever(self->getShardMetrics.getReply(GetMetricsRequest(shard))));
TraceEvent(SevWarnAlways, "DDShardLost", self->distributorId)
.detail("ServerTeamID", team->getTeamID())
.detail("ShardBegin", shard.begin)
.detail("ShardEnd", shard.end);
}
wait(waitForAll(sizes));
int64_t bytesLost = 0;
for (auto const& size : sizes) {
bytesLost += size.get().bytes;
}
TraceEvent(SevWarnAlways, "DDZeroServerLeftInTeam", self->distributorId)
.detail("Team", team->getDesc())
.detail("TotalBytesLost", bytesLost);
return Void();
}
bool teamContainsFailedServer(DDTeamCollection* self, Reference<TCTeamInfo> team) {
auto ssis = team->getLastKnownServerInterfaces();
for (const auto &ssi : ssis) {
@ -3198,18 +3240,22 @@ ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> tea
state bool lastZeroHealthy = self->zeroHealthyTeams->get();
state bool firstCheck = true;
state Future<Void> zeroServerLeftLogger;
if(logTeamEvents) {
TraceEvent("TeamTrackerStarting", self->distributorId).detail("Reason", "Initial wait complete (sc)").detail("Team", team->getDesc());
TraceEvent("ServerTeamTrackerStarting", self->distributorId)
.detail("Reason", "Initial wait complete (sc)")
.detail("ServerTeam", team->getDesc());
}
self->priority_teams[team->getPriority()]++;
try {
loop {
if(logTeamEvents) {
TraceEvent("TeamHealthChangeDetected", self->distributorId)
.detail("Team", team->getDesc())
.detail("Primary", self->primary)
.detail("IsReady", self->initialFailureReactionDelay.isReady());
TraceEvent("ServerTeamHealthChangeDetected", self->distributorId)
.detail("ServerTeam", team->getDesc())
.detail("Primary", self->primary)
.detail("IsReady", self->initialFailureReactionDelay.isReady());
self->traceTeamCollectionInfo();
}
// Check if the number of degraded machines has changed
@ -3285,10 +3331,13 @@ ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> tea
if (serversLeft != lastServersLeft || anyUndesired != lastAnyUndesired ||
anyWrongConfiguration != lastWrongConfiguration || recheck) { // NOTE: do not check wrongSize
if(logTeamEvents) {
TraceEvent("TeamHealthChanged", self->distributorId)
.detail("Team", team->getDesc()).detail("ServersLeft", serversLeft)
.detail("LastServersLeft", lastServersLeft).detail("ContainsUndesiredServer", anyUndesired)
.detail("HealthyTeamsCount", self->healthyTeamCount).detail("IsWrongConfiguration", anyWrongConfiguration);
TraceEvent("ServerTeamHealthChanged", self->distributorId)
.detail("ServerTeam", team->getDesc())
.detail("ServersLeft", serversLeft)
.detail("LastServersLeft", lastServersLeft)
.detail("ContainsUndesiredServer", anyUndesired)
.detail("HealthyTeamsCount", self->healthyTeamCount)
.detail("IsWrongConfiguration", anyWrongConfiguration);
}
team->setWrongConfiguration( anyWrongConfiguration );
@ -3310,18 +3359,18 @@ ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> tea
self->zeroHealthyTeams->set(self->healthyTeamCount == 0);
if( self->healthyTeamCount == 0 ) {
TraceEvent(SevWarn, "ZeroTeamsHealthySignalling", self->distributorId)
.detail("SignallingTeam", team->getDesc())
.detail("Primary", self->primary);
TraceEvent(SevWarn, "ZeroServerTeamsHealthySignalling", self->distributorId)
.detail("SignallingTeam", team->getDesc())
.detail("Primary", self->primary);
}
if(logTeamEvents) {
TraceEvent("TeamHealthDifference", self->distributorId)
.detail("Team", team->getDesc())
.detail("LastOptimal", lastOptimal)
.detail("LastHealthy", lastHealthy)
.detail("Optimal", optimal)
.detail("OptimalTeamCount", self->optimalTeamCount);
TraceEvent("ServerTeamHealthDifference", self->distributorId)
.detail("ServerTeam", team->getDesc())
.detail("LastOptimal", lastOptimal)
.detail("LastHealthy", lastHealthy)
.detail("Optimal", optimal)
.detail("OptimalTeamCount", self->optimalTeamCount);
}
}
@ -3358,12 +3407,24 @@ ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> tea
if(lastPriority != team->getPriority()) {
self->priority_teams[lastPriority]--;
self->priority_teams[team->getPriority()]++;
if (lastPriority == SERVER_KNOBS->PRIORITY_TEAM_0_LEFT &&
team->getPriority() < SERVER_KNOBS->PRIORITY_TEAM_0_LEFT) {
zeroServerLeftLogger = Void();
}
if (logTeamEvents) {
int dataLoss = team->getPriority() == SERVER_KNOBS->PRIORITY_TEAM_0_LEFT;
Severity severity = dataLoss ? SevWarnAlways : SevInfo;
TraceEvent(severity, "ServerTeamPriorityChange", self->distributorId)
.detail("Priority", team->getPriority())
.detail("Info", team->getDesc())
.detail("ZeroHealthyServerTeams", self->zeroHealthyTeams->get());
if (team->getPriority() == SERVER_KNOBS->PRIORITY_TEAM_0_LEFT) {
// 0 servers left in this team, data might be lost.
zeroServerLeftLogger = zeroServerLeftLogger_impl(self, team);
}
}
}
if(logTeamEvents) {
TraceEvent("TeamPriorityChange", self->distributorId).detail("Priority", team->getPriority())
.detail("Info", team->getDesc()).detail("ZeroHealthyTeams", self->zeroHealthyTeams->get());
}
lastZeroHealthy = self->zeroHealthyTeams->get(); //set this again in case it changed from this teams health changing
if ((self->initialFailureReactionDelay.isReady() && !self->zeroHealthyTeams->get()) || containsFailed) {
@ -3433,17 +3494,19 @@ ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> tea
self->output.send(rs);
TraceEvent("SendRelocateToDDQueue", self->distributorId)
.suppressFor(1.0)
.detail("Primary", self->primary)
.detail("Team", team->getDesc())
.detail("ServerPrimary", self->primary)
.detail("ServerTeam", team->getDesc())
.detail("KeyBegin", rs.keys.begin)
.detail("KeyEnd", rs.keys.end)
.detail("Priority", rs.priority)
.detail("TeamFailedMachines", team->size() - serversLeft)
.detail("TeamOKMachines", serversLeft);
.detail("ServerTeamFailedMachines", team->size() - serversLeft)
.detail("ServerTeamOKMachines", serversLeft);
}
} else {
if(logTeamEvents) {
TraceEvent("TeamHealthNotReady", self->distributorId).detail("HealthyTeamCount", self->healthyTeamCount);
TraceEvent("ServerTeamHealthNotReady", self->distributorId)
.detail("HealthyServerTeamCount", self->healthyTeamCount)
.detail("ServerTeamID", team->getTeamID());
}
}
}
@ -3455,7 +3518,7 @@ ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> tea
} catch(Error& e) {
if(logTeamEvents) {
TraceEvent("TeamTrackerStopping", self->distributorId)
.detail("Primary", self->primary)
.detail("ServerPrimary", self->primary)
.detail("Team", team->getDesc())
.detail("Priority", team->getPriority());
}
@ -3466,8 +3529,8 @@ ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> tea
if( self->healthyTeamCount == 0 ) {
TraceEvent(SevWarn, "ZeroTeamsHealthySignalling", self->distributorId)
.detail("Primary", self->primary)
.detail("SignallingTeam", team->getDesc());
.detail("ServerPrimary", self->primary)
.detail("SignallingServerTeam", team->getDesc());
self->zeroHealthyTeams->set(true);
}
}
@ -4736,7 +4799,9 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self,
state MoveKeysLock lock;
state Reference<DDTeamCollection> primaryTeamCollection;
state Reference<DDTeamCollection> remoteTeamCollection;
state bool trackerCancelled;
loop {
trackerCancelled = false;
try {
loop {
TraceEvent("DDInitTakingMoveKeysLock", self->ddId);
@ -4903,7 +4968,7 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self,
actors.push_back(reportErrorsExcept(
dataDistributionTracker(initData, cx, output, shardsAffectedByTeamFailure, getShardMetrics,
getShardMetricsList, getAverageShardBytes.getFuture(), readyToStart,
anyZeroHealthyTeams, self->ddId, &shards),
anyZeroHealthyTeams, self->ddId, &shards, &trackerCancelled),
"DDTracker", self->ddId, &normalDDQueueErrors()));
actors.push_back(reportErrorsExcept(
dataDistributionQueue(cx, output, input.getFuture(), getShardMetrics, processingUnhealthy, tcis,
@ -4915,13 +4980,13 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self,
primaryTeamCollection = makeReference<DDTeamCollection>(
cx, self->ddId, lock, output, shardsAffectedByTeamFailure, configuration, primaryDcId,
configuration.usableRegions > 1 ? remoteDcIds : std::vector<Optional<Key>>(), readyToStart.getFuture(),
zeroHealthyTeams[0], true, processingUnhealthy);
zeroHealthyTeams[0], true, processingUnhealthy, getShardMetrics);
teamCollectionsPtrs.push_back(primaryTeamCollection.getPtr());
if (configuration.usableRegions > 1) {
remoteTeamCollection = makeReference<DDTeamCollection>(
cx, self->ddId, lock, output, shardsAffectedByTeamFailure, configuration, remoteDcIds,
Optional<std::vector<Optional<Key>>>(), readyToStart.getFuture() && remoteRecovered(self->dbInfo),
zeroHealthyTeams[1], false, processingUnhealthy);
zeroHealthyTeams[1], false, processingUnhealthy, getShardMetrics);
teamCollectionsPtrs.push_back(remoteTeamCollection.getPtr());
remoteTeamCollection->teamCollections = teamCollectionsPtrs;
actors.push_back(
@ -4943,6 +5008,7 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self,
return Void();
}
catch( Error &e ) {
trackerCancelled = true;
state Error err = e;
TraceEvent("DataDistributorDestroyTeamCollections").error(e);
self->teamCollection = nullptr;
@ -5346,7 +5412,8 @@ DDTeamCollection* testTeamCollection(int teamSize, Reference<IReplicationPolicy>
DDTeamCollection* collection =
new DDTeamCollection(database, UID(0, 0), MoveKeysLock(), PromiseStream<RelocateShard>(),
makeReference<ShardsAffectedByTeamFailure>(), conf, {}, {}, Future<Void>(Void()),
makeReference<AsyncVar<bool>>(true), true, makeReference<AsyncVar<bool>>(false));
makeReference<AsyncVar<bool>>(true), true, makeReference<AsyncVar<bool>>(false),
PromiseStream<GetMetricsRequest>());
for (int id = 1; id <= processCount; ++id) {
UID uid(id, 0);
@ -5375,7 +5442,8 @@ DDTeamCollection* testMachineTeamCollection(int teamSize, Reference<IReplication
DDTeamCollection* collection =
new DDTeamCollection(database, UID(0, 0), MoveKeysLock(), PromiseStream<RelocateShard>(),
makeReference<ShardsAffectedByTeamFailure>(), conf, {}, {}, Future<Void>(Void()),
makeReference<AsyncVar<bool>>(true), true, makeReference<AsyncVar<bool>>(false));
makeReference<AsyncVar<bool>>(true), true, makeReference<AsyncVar<bool>>(false),
PromiseStream<GetMetricsRequest>());
for (int id = 1; id <= processCount; id++) {
UID uid(id, 0);

View File

@ -58,10 +58,12 @@ struct IDataDistributionTeam {
virtual bool isWrongConfiguration() const = 0;
virtual void setWrongConfiguration(bool) = 0;
virtual void addServers(const vector<UID> &servers) = 0;
virtual std::string getTeamID() const = 0;
std::string getDesc() const {
const auto& servers = getLastKnownServerInterfaces();
std::string s = format("Size %d; ", servers.size());
std::string s = format("TeamID:%s", getTeamID().c_str());
s += format("Size %d; ", servers.size());
for(int i=0; i<servers.size(); i++) {
if (i) s += ", ";
s += servers[i].address().toString() + " " + servers[i].id().shortString();
@ -214,7 +216,7 @@ struct InitialDataDistribution : ReferenceCounted<InitialDataDistribution> {
struct ShardMetrics {
StorageMetrics metrics;
double lastLowBandwidthStartTime;
int shardCount;
int shardCount; // number of smaller shards whose metrics are aggregated in the ShardMetrics
bool operator==(ShardMetrics const& rhs) const {
return metrics == rhs.metrics && lastLowBandwidthStartTime == rhs.lastLowBandwidthStartTime &&
@ -231,18 +233,15 @@ struct ShardTrackedData {
Reference<AsyncVar<Optional<ShardMetrics>>> stats;
};
ACTOR Future<Void> dataDistributionTracker(
Reference<InitialDataDistribution> initData,
Database cx,
PromiseStream<RelocateShard> output,
Reference<ShardsAffectedByTeamFailure> shardsAffectedByTeamFailure,
PromiseStream<GetMetricsRequest> getShardMetrics,
PromiseStream<GetMetricsListRequest> getShardMetricsList,
FutureStream<Promise<int64_t>> getAverageShardBytes,
Promise<Void> readyToStart,
Reference<AsyncVar<bool>> zeroHealthyTeams,
UID distributorId,
KeyRangeMap<ShardTrackedData>* shards);
ACTOR Future<Void> dataDistributionTracker(Reference<InitialDataDistribution> initData, Database cx,
PromiseStream<RelocateShard> output,
Reference<ShardsAffectedByTeamFailure> shardsAffectedByTeamFailure,
PromiseStream<GetMetricsRequest> getShardMetrics,
PromiseStream<GetMetricsListRequest> getShardMetricsList,
FutureStream<Promise<int64_t>> getAverageShardBytes,
Promise<Void> readyToStart, Reference<AsyncVar<bool>> zeroHealthyTeams,
UID distributorId, KeyRangeMap<ShardTrackedData>* shards,
bool const* trackerCancelled);
ACTOR Future<Void> dataDistributionQueue(
Database cx,

View File

@ -18,8 +18,9 @@
* limitations under the License.
*/
#include <numeric>
#include <limits>
#include <numeric>
#include <vector>
#include "flow/ActorCollection.h"
#include "flow/Util.h"
@ -83,7 +84,8 @@ struct RelocateData {
};
class ParallelTCInfo final : public ReferenceCounted<ParallelTCInfo>, public IDataDistributionTeam {
vector<Reference<IDataDistributionTeam>> teams;
std::vector<Reference<IDataDistributionTeam>> teams;
std::vector<UID> tempServerIDs;
int64_t sum(std::function<int64_t(IDataDistributionTeam const&)> func) const {
int64_t result = 0;
@ -94,11 +96,11 @@ class ParallelTCInfo final : public ReferenceCounted<ParallelTCInfo>, public IDa
}
template <class T>
vector<T> collect(std::function<vector<T>(IDataDistributionTeam const&)> func) const {
vector<T> result;
std::vector<T> collect(std::function<vector<T>(IDataDistributionTeam const&)> func) const {
std::vector<T> result;
for (const auto& team : teams) {
vector<T> newItems = func(*team);
std::vector<T> newItems = func(*team);
result.insert(result.end(), newItems.begin(), newItems.end());
}
return result;
@ -124,7 +126,7 @@ public:
return !any([func](IDataDistributionTeam const& team) { return !func(team); });
}
vector<StorageServerInterface> getLastKnownServerInterfaces() const override {
std::vector<StorageServerInterface> getLastKnownServerInterfaces() const override {
return collect<StorageServerInterface>(
[](IDataDistributionTeam const& team) { return team.getLastKnownServerInterfaces(); });
}
@ -137,11 +139,11 @@ public:
return totalSize;
}
vector<UID> const& getServerIDs() const override {
std::vector<UID> const& getServerIDs() const override {
static vector<UID> tempServerIDs;
tempServerIDs.clear();
for (const auto& team : teams) {
vector<UID> const &childIDs = team->getServerIDs();
std::vector<UID> const& childIDs = team->getServerIDs();
tempServerIDs.insert(tempServerIDs.end(), childIDs.begin(), childIDs.end());
}
return tempServerIDs;
@ -184,7 +186,7 @@ public:
}
Future<Void> updateStorageMetrics() override {
vector<Future<Void>> futures;
std::vector<Future<Void>> futures;
for (auto& team : teams) {
futures.push_back(team->updateStorageMetrics());
@ -235,10 +237,19 @@ public:
ASSERT(!teams.empty());
teams[0]->addServers(servers);
}
std::string getTeamID() const override {
std::string id;
for (int i = 0; i < teams.size(); i++) {
auto const& team = teams[i];
id += (i == teams.size() - 1) ? team->getTeamID() : format("%s, ", team->getTeamID().c_str());
}
return id;
}
};
struct Busyness {
vector<int> ledger;
std::vector<int> ledger;
Busyness() : ledger( 10, 0 ) {}
@ -544,8 +555,8 @@ struct DDQueueData {
if(keyServersEntries.size() < SERVER_KNOBS->DD_QUEUE_MAX_KEY_SERVERS) {
for( int shard = 0; shard < keyServersEntries.size(); shard++ ) {
vector<UID> src, dest;
decodeKeyServersValue( UIDtoTagMap, keyServersEntries[shard].value, src, dest );
std::vector<UID> src, dest;
decodeKeyServersValue(UIDtoTagMap, keyServersEntries[shard].value, src, dest);
ASSERT( src.size() );
for( int i = 0; i < src.size(); i++ ) {
servers.insert( src[i] );
@ -849,7 +860,7 @@ struct DDQueueData {
startedHere++;
// update both inFlightActors and inFlight key range maps, cancelling deleted RelocateShards
vector<KeyRange> ranges;
std::vector<KeyRange> ranges;
inFlightActors.getRangesAffectedByInsertion( rd.keys, ranges );
inFlightActors.cancel( KeyRangeRef( ranges.front().begin, ranges.back().end ) );
inFlight.insert( rd.keys, rd );
@ -1039,6 +1050,9 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueueData* self, RelocateData rd,
} else {
TraceEvent(relocateShardInterval.severity, "RelocateShardHasDestination", distributorId)
.detail("PairId", relocateShardInterval.pairID)
.detail("KeyBegin", rd.keys.begin)
.detail("KeyEnd", rd.keys.end)
.detail("SourceServers", describe(rd.src))
.detail("DestinationTeam", describe(destIds))
.detail("ExtraIds", describe(extraIds));
}
@ -1427,7 +1441,7 @@ ACTOR Future<Void> dataDistributionQueue(Database cx, PromiseStream<RelocateShar
state RelocateData launchData;
state Future<Void> recordMetrics = delay(SERVER_KNOBS->DD_QUEUE_LOGGING_INTERVAL);
state vector<Future<Void>> balancingFutures;
state std::vector<Future<Void>> balancingFutures;
state ActorCollectionNoErrors actors;
state PromiseStream<KeyRange> rangesComplete;

View File

@ -91,14 +91,43 @@ struct DataDistributionTracker {
// Read hot detection
PromiseStream<KeyRange> readHotShard;
// The reference to trackerCancelled must be extracted by actors,
// because by the time (trackerCancelled == true) this memory cannot
// be accessed
bool const& trackerCancelled;
// This class extracts the trackerCancelled reference from a DataDistributionTracker object
// Because some actors spawned by the dataDistributionTracker outlive the DataDistributionTracker
// object, we must guard against memory errors by using a GetTracker functor to access
// the DataDistributionTracker object.
class SafeAccessor {
bool const& trackerCancelled;
DataDistributionTracker& tracker;
public:
SafeAccessor(DataDistributionTracker* tracker)
: trackerCancelled(tracker->trackerCancelled), tracker(*tracker) {
ASSERT(!trackerCancelled);
}
DataDistributionTracker* operator()() {
if (trackerCancelled) {
TEST(true); // Trying to access DataDistributionTracker after tracker has been cancelled
throw dd_tracker_cancelled();
}
return &tracker;
}
};
DataDistributionTracker(Database cx, UID distributorId, Promise<Void> const& readyToStart,
PromiseStream<RelocateShard> const& output,
Reference<ShardsAffectedByTeamFailure> shardsAffectedByTeamFailure,
Reference<AsyncVar<bool>> anyZeroHealthyTeams, KeyRangeMap<ShardTrackedData>& shards)
Reference<AsyncVar<bool>> anyZeroHealthyTeams, KeyRangeMap<ShardTrackedData>& shards,
bool const& trackerCancelled)
: cx(cx), distributorId(distributorId), dbSizeEstimate(new AsyncVar<int64_t>()), systemSizeEstimate(0),
maxShardSize(new AsyncVar<Optional<int64_t>>()), sizeChanges(false), readyToStart(readyToStart), output(output),
shardsAffectedByTeamFailure(shardsAffectedByTeamFailure), anyZeroHealthyTeams(anyZeroHealthyTeams),
shards(shards) {}
shards(shards), trackerCancelled(trackerCancelled) {}
~DataDistributionTracker()
{
@ -150,7 +179,7 @@ int64_t getMaxShardSize( double dbSizeEstimate ) {
(int64_t)SERVER_KNOBS->MAX_SHARD_BYTES);
}
ACTOR Future<Void> trackShardMetrics(DataDistributionTracker* self, KeyRange keys,
ACTOR Future<Void> trackShardMetrics(DataDistributionTracker::SafeAccessor self, KeyRange keys,
Reference<AsyncVar<Optional<ShardMetrics>>> shardMetrics) {
state BandwidthStatus bandwidthStatus = shardMetrics->get().present() ? getBandwidthStatus( shardMetrics->get().get().metrics ) : BandwidthStatusNormal;
state double lastLowBandwidthStartTime = shardMetrics->get().present() ? shardMetrics->get().get().lastLowBandwidthStartTime : now();
@ -209,7 +238,7 @@ ACTOR Future<Void> trackShardMetrics(DataDistributionTracker* self, KeyRange key
// TraceEvent("RHDTriggerReadHotLoggingForShard")
// .detail("ShardBegin", keys.begin.printable().c_str())
// .detail("ShardEnd", keys.end.printable().c_str());
self->readHotShard.send(keys);
self()->readHotShard.send(keys);
} else {
ASSERT(false);
}
@ -230,7 +259,8 @@ ACTOR Future<Void> trackShardMetrics(DataDistributionTracker* self, KeyRange key
bounds.permittedError.iosPerKSecond = bounds.permittedError.infinity;
loop {
Transaction tr(self->cx);
Transaction tr(self()->cx);
// metrics.second is the number of key-ranges (i.e., shards) in the 'keys' key-range
std::pair<Optional<StorageMetrics>, int> metrics = wait( tr.waitStorageMetrics( keys, bounds.min, bounds.max, bounds.permittedError, CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT, shardCount ) );
if(metrics.first.present()) {
BandwidthStatus newBandwidthStatus = getBandwidthStatus( metrics.first.get() );
@ -253,9 +283,11 @@ ACTOR Future<Void> trackShardMetrics(DataDistributionTracker* self, KeyRange key
.detail("TrackerID", trackerID);*/
if( shardMetrics->get().present() ) {
self->dbSizeEstimate->set( self->dbSizeEstimate->get() + metrics.first.get().bytes - shardMetrics->get().get().metrics.bytes );
self()->dbSizeEstimate->set(self()->dbSizeEstimate->get() + metrics.first.get().bytes -
shardMetrics->get().get().metrics.bytes);
if(keys.begin >= systemKeys.begin) {
self->systemSizeEstimate += metrics.first.get().bytes - shardMetrics->get().get().metrics.bytes;
self()->systemSizeEstimate +=
metrics.first.get().bytes - shardMetrics->get().get().metrics.bytes;
}
}
@ -272,8 +304,9 @@ ACTOR Future<Void> trackShardMetrics(DataDistributionTracker* self, KeyRange key
}
}
} catch( Error &e ) {
if (e.code() != error_code_actor_cancelled)
self->output.sendError(e); // Propagate failure to dataDistributionTracker
if (e.code() != error_code_actor_cancelled && e.code() != error_code_dd_tracker_cancelled) {
self()->output.sendError(e); // Propagate failure to dataDistributionTracker
}
throw e;
}
}
@ -382,16 +415,19 @@ ACTOR Future<Void> changeSizes( DataDistributionTracker* self, KeyRange keys, in
}
struct HasBeenTrueFor : ReferenceCounted<HasBeenTrueFor> {
explicit HasBeenTrueFor( Optional<ShardMetrics> value ) {
explicit HasBeenTrueFor(const Optional<ShardMetrics>& value) {
if(value.present()) {
trigger = delayJittered(std::max(0.0, SERVER_KNOBS->DD_MERGE_COALESCE_DELAY + value.get().lastLowBandwidthStartTime - now()), TaskPriority::DataDistributionLow ) || cleared.getFuture();
}
}
Future<Void> set() {
Future<Void> set(double lastLowBandwidthStartTime) {
if( !trigger.isValid() ) {
cleared = Promise<Void>();
trigger = delayJittered( SERVER_KNOBS->DD_MERGE_COALESCE_DELAY, TaskPriority::DataDistributionLow ) || cleared.getFuture();
trigger =
delayJittered(SERVER_KNOBS->DD_MERGE_COALESCE_DELAY + std::max(lastLowBandwidthStartTime - now(), 0.0),
TaskPriority::DataDistributionLow) ||
cleared.getFuture();
}
return trigger;
}
@ -558,6 +594,8 @@ Future<Void> shardMerger(
shardsMerged++;
auto shardBounds = getShardSizeBounds( merged, maxShardSize );
// If we just recently get the current shard's metrics (i.e., less than DD_LOW_BANDWIDTH_DELAY ago), it means
// the shard's metric may not be stable yet. So we cannot continue merging in this direction.
if( endingStats.bytes >= shardBounds.min.bytes ||
getBandwidthStatus( endingStats ) != BandwidthStatusLow ||
now() - lastLowBandwidthStartTime < SERVER_KNOBS->DD_LOW_BANDWIDTH_DELAY ||
@ -588,13 +626,21 @@ Future<Void> shardMerger(
//restarting shard tracker will derefenced values in the shard map, so make a copy
KeyRange mergeRange = merged;
// OldKeys: Shards in the key range are merged as one shard defined by NewKeys;
// NewKeys: New key range after shards are merged;
// EndingSize: The new merged shard size in bytes;
// BatchedMerges: The number of shards merged. Each shard is defined in self->shards;
// LastLowBandwidthStartTime: When does a shard's bandwidth status becomes BandwidthStatusLow. If a shard's status
// becomes BandwidthStatusLow less than DD_LOW_BANDWIDTH_DELAY ago, the merging logic will stop at the shard;
// ShardCount: The number of non-splittable shards that are merged. Each shard is defined in self->shards may have
// more than 1 shards.
TraceEvent("RelocateShardMergeMetrics", self->distributorId)
.detail("OldKeys", keys)
.detail("NewKeys", mergeRange)
.detail("EndingSize", endingStats.bytes)
.detail("BatchedMerges", shardsMerged)
.detail("LastLowBandwidthStartTime", lastLowBandwidthStartTime)
.detail("ShardCount", shardCount);
.detail("OldKeys", keys)
.detail("NewKeys", mergeRange)
.detail("EndingSize", endingStats.bytes)
.detail("BatchedMerges", shardsMerged)
.detail("LastLowBandwidthStartTime", lastLowBandwidthStartTime)
.detail("ShardCount", shardCount);
if(mergeRange.begin < systemKeys.begin) {
self->systemSizeEstimate -= systemBytes;
@ -629,7 +675,7 @@ ACTOR Future<Void> shardEvaluator(
// Every invocation must set this or clear it
if(shouldMerge && !self->anyZeroHealthyTeams->get()) {
auto whenLongEnough = wantsToMerge->set();
auto whenLongEnough = wantsToMerge->set(shardSize->get().get().lastLowBandwidthStartTime);
if( !wantsToMerge->hasBeenTrueForLongEnough() ) {
onChange = onChange || whenLongEnough;
}
@ -664,18 +710,14 @@ ACTOR Future<Void> shardEvaluator(
return Void();
}
ACTOR Future<Void> shardTracker(
DataDistributionTracker* self,
KeyRange keys,
Reference<AsyncVar<Optional<ShardMetrics>>> shardSize)
{
wait( yieldedFuture(self->readyToStart.getFuture()) );
ACTOR Future<Void> shardTracker(DataDistributionTracker::SafeAccessor self, KeyRange keys,
Reference<AsyncVar<Optional<ShardMetrics>>> shardSize) {
wait(yieldedFuture(self()->readyToStart.getFuture()));
if( !shardSize->get().present() )
wait( shardSize->onChange() );
if( !self->maxShardSize->get().present() )
wait( yieldedFuture(self->maxShardSize->onChange()) );
if (!self()->maxShardSize->get().present()) wait(yieldedFuture(self()->maxShardSize->onChange()));
// Since maxShardSize will become present for all shards at once, avoid slow tasks with a short delay
wait( delay( 0, TaskPriority::DataDistribution ) );
@ -683,26 +725,27 @@ ACTOR Future<Void> shardTracker(
// Survives multiple calls to shardEvaluator and keeps merges from happening too quickly.
state Reference<HasBeenTrueFor> wantsToMerge( new HasBeenTrueFor( shardSize->get() ) );
/*TraceEvent("ShardTracker", self->distributorId)
.detail("Begin", keys.begin)
.detail("End", keys.end)
.detail("TrackerID", trackerID)
.detail("MaxBytes", self->maxShardSize->get().get())
.detail("ShardSize", shardSize->get().get().bytes)
.detail("BytesPerKSec", shardSize->get().get().bytesPerKSecond);*/
/*TraceEvent("ShardTracker", self()->distributorId)
.detail("Begin", keys.begin)
.detail("End", keys.end)
.detail("TrackerID", trackerID)
.detail("MaxBytes", self()->maxShardSize->get().get())
.detail("ShardSize", shardSize->get().get().bytes)
.detail("BytesPerKSec", shardSize->get().get().bytesPerKSecond);*/
try {
loop {
// Use the current known size to check for (and start) splits and merges.
wait( shardEvaluator( self, keys, shardSize, wantsToMerge ) );
wait(shardEvaluator(self(), keys, shardSize, wantsToMerge));
// We could have a lot of actors being released from the previous wait at the same time. Immediately calling
// delay(0) mitigates the resulting SlowTask
wait( delay(0, TaskPriority::DataDistribution) );
}
} catch (Error& e) {
if (e.code() != error_code_actor_cancelled)
self->output.sendError(e); // Propagate failure to dataDistributionTracker
if (e.code() != error_code_actor_cancelled && e.code() != error_code_dd_tracker_cancelled) {
self()->output.sendError(e); // Propagate failure to dataDistributionTracker
}
throw e;
}
}
@ -733,8 +776,8 @@ void restartShardTrackers(DataDistributionTracker* self, KeyRangeRef keys, Optio
ShardTrackedData data;
data.stats = shardMetrics;
data.trackShard = shardTracker(self, ranges[i], shardMetrics);
data.trackBytes = trackShardMetrics(self, ranges[i], shardMetrics);
data.trackShard = shardTracker(DataDistributionTracker::SafeAccessor(self), ranges[i], shardMetrics);
data.trackBytes = trackShardMetrics(DataDistributionTracker::SafeAccessor(self), ranges[i], shardMetrics);
self->shards.insert( ranges[i], data );
}
}
@ -857,9 +900,10 @@ ACTOR Future<Void> dataDistributionTracker(Reference<InitialDataDistribution> in
PromiseStream<GetMetricsListRequest> getShardMetricsList,
FutureStream<Promise<int64_t>> getAverageShardBytes,
Promise<Void> readyToStart, Reference<AsyncVar<bool>> anyZeroHealthyTeams,
UID distributorId, KeyRangeMap<ShardTrackedData>* shards) {
UID distributorId, KeyRangeMap<ShardTrackedData>* shards,
bool const* trackerCancelled) {
state DataDistributionTracker self(cx, distributorId, readyToStart, output, shardsAffectedByTeamFailure,
anyZeroHealthyTeams, *shards);
anyZeroHealthyTeams, *shards, *trackerCancelled);
state Future<Void> loggingTrigger = Void();
state Future<Void> readHotDetect = readHotDetector(&self);
try {

View File

@ -27,6 +27,9 @@ rocksdb::ColumnFamilyOptions getCFOptions() {
rocksdb::ColumnFamilyOptions options;
options.level_compaction_dynamic_level_bytes = true;
options.OptimizeLevelStyleCompaction(SERVER_KNOBS->ROCKSDB_MEMTABLE_BYTES);
if (SERVER_KNOBS->ROCKSDB_PERIODIC_COMPACTION_SECONDS > 0) {
options.periodic_compaction_seconds = SERVER_KNOBS->ROCKSDB_PERIODIC_COMPACTION_SECONDS;
}
// Compact sstables when there's too much deleted stuff.
options.table_properties_collector_factories = { rocksdb::NewCompactOnDeletionCollectorFactory(128, 1) };
return options;
@ -52,7 +55,7 @@ struct RocksDBKeyValueStore : IKeyValueStore {
explicit Writer(DB& db, UID id) : db(db), id(id) {}
~Writer() {
~Writer() override {
if (db) {
delete db;
}
@ -83,24 +86,49 @@ struct RocksDBKeyValueStore : IKeyValueStore {
TraceEvent(SevError, "RocksDBError").detail("Error", status.ToString()).detail("Method", "Open");
a.done.sendError(statusToError(status));
} else {
TraceEvent(SevInfo, "RocksDB").detail("Path", a.path).detail("Method", "Open");
a.done.send(Void());
}
}
struct DeleteVisitor : public rocksdb::WriteBatch::Handler {
VectorRef<KeyRangeRef>& deletes;
Arena& arena;
DeleteVisitor(VectorRef<KeyRangeRef>& deletes, Arena& arena) : deletes(deletes), arena(arena) {}
rocksdb::Status DeleteRangeCF(uint32_t /*column_family_id*/, const rocksdb::Slice& begin,
const rocksdb::Slice& end) override {
KeyRangeRef kr(toStringRef(begin), toStringRef(end));
deletes.push_back_deep(arena, kr);
return rocksdb::Status::OK();
}
};
struct CommitAction : TypedAction<Writer, CommitAction> {
std::unique_ptr<rocksdb::WriteBatch> batchToCommit;
ThreadReturnPromise<Void> done;
double getTimeEstimate() const override { return SERVER_KNOBS->COMMIT_TIME_ESTIMATE; }
};
void action(CommitAction& a) {
Standalone<VectorRef<KeyRangeRef>> deletes;
DeleteVisitor dv(deletes, deletes.arena());
ASSERT(a.batchToCommit->Iterate(&dv).ok());
// If there are any range deletes, we should have added them to be deleted.
ASSERT(!deletes.empty() || !a.batchToCommit->HasDeleteRange());
rocksdb::WriteOptions options;
options.sync = true;
options.sync = !SERVER_KNOBS->ROCKSDB_UNSAFE_AUTO_FSYNC;
auto s = db->Write(options, a.batchToCommit.get());
if (!s.ok()) {
TraceEvent(SevError, "RocksDBError").detail("Error", s.ToString()).detail("Method", "Commit");
a.done.sendError(statusToError(s));
} else {
a.done.send(Void());
for (const auto& keyRange : deletes) {
auto begin = toSlice(keyRange.begin);
auto end = toSlice(keyRange.end);
ASSERT(db->SuggestCompactRange(db->DefaultColumnFamily(), &begin, &end).ok());
}
}
}
@ -112,6 +140,10 @@ struct RocksDBKeyValueStore : IKeyValueStore {
double getTimeEstimate() const override { return SERVER_KNOBS->COMMIT_TIME_ESTIMATE; }
};
void action(CloseAction& a) {
if (db == nullptr) {
a.done.send(Void());
return;
}
auto s = db->Close();
if (!s.ok()) {
TraceEvent(SevError, "RocksDBError").detail("Error", s.ToString()).detail("Method", "Close");
@ -119,8 +151,14 @@ struct RocksDBKeyValueStore : IKeyValueStore {
if (a.deleteOnClose) {
std::vector<rocksdb::ColumnFamilyDescriptor> defaultCF = { rocksdb::ColumnFamilyDescriptor{
"default", getCFOptions() } };
rocksdb::DestroyDB(a.path, getOptions(), defaultCF);
s = rocksdb::DestroyDB(a.path, getOptions(), defaultCF);
if (!s.ok()) {
TraceEvent(SevError, "RocksDBError").detail("Error", s.ToString()).detail("Method", "Destroy");
} else {
TraceEvent(SevInfo, "RocksDB").detail("Path", a.path).detail("Method", "Destroy");
}
}
TraceEvent(SevInfo, "RocksDB").detail("Path", a.path).detail("Method", "Close");
a.done.send(Void());
}
};
@ -264,7 +302,6 @@ struct RocksDBKeyValueStore : IKeyValueStore {
UID id;
Reference<IThreadPool> writeThread;
Reference<IThreadPool> readThreads;
unsigned nReaders = 16;
Promise<Void> errorPromise;
Promise<Void> closePromise;
std::unique_ptr<rocksdb::WriteBatch> writeBatch;
@ -276,7 +313,7 @@ struct RocksDBKeyValueStore : IKeyValueStore {
writeThread = createGenericThreadPool();
readThreads = createGenericThreadPool();
writeThread->addThread(new Writer(db, id));
for (unsigned i = 0; i < nReaders; ++i) {
for (unsigned i = 0; i < SERVER_KNOBS->ROCKSDB_READ_PARALLELISM; ++i) {
readThreads->addThread(new Reader(db));
}
}
@ -368,16 +405,14 @@ struct RocksDBKeyValueStore : IKeyValueStore {
}
StorageBytes getStorageBytes() const override {
uint64_t live = 0;
ASSERT(db->GetIntProperty(rocksdb::DB::Properties::kEstimateLiveDataSize, &live));
int64_t free;
int64_t total;
uint64_t sstBytes = 0;
ASSERT(db->GetIntProperty(rocksdb::DB::Properties::kTotalSstFilesSize, &sstBytes));
uint64_t memtableBytes = 0;
ASSERT(db->GetIntProperty(rocksdb::DB::Properties::kSizeAllMemTables, &memtableBytes));
g_network->getDiskBytes(path, free, total);
return StorageBytes(free, total, sstBytes + memtableBytes, free);
return StorageBytes(free, total, live, free);
}
};

View File

@ -25,6 +25,7 @@
#include "fdbserver/CoroFlow.h"
#include "fdbserver/Knobs.h"
#include "flow/Hash3.h"
#include "flow/xxhash.h"
extern "C" {
#include "fdbserver/sqlite/sqliteInt.h"
@ -94,28 +95,54 @@ struct PageChecksumCodec {
SumType *pSumInPage = (SumType *)(pData + dataLen);
if (write) {
// Always write a CRC32 checksum for new pages
pSumInPage->part1 = 0; // Indicates CRC32 is being used
pSumInPage->part2 = crc32c_append(0xfdbeefdb, static_cast<uint8_t*>(data), dataLen);
// Always write a xxHash3 checksum for new pages
// First 8 bits are set to 0 so that with high probability,
// checksums written with hashlittle2 don't require calculating
// an xxHash3 checksum on read
auto xxHash3 = XXH3_64bits(data, dataLen);
pSumInPage->part1 = static_cast<uint32_t>((xxHash3 >> 32) & 0x00ffffff);
pSumInPage->part2 = static_cast<uint32_t>(xxHash3 & 0xffffffff);
return true;
}
SumType sum;
SumType crc32Sum;
if (pSumInPage->part1 == 0) {
// part1 being 0 indicates with high probability that a CRC32 checksum
// part1 being 0 indicates with very high probability that a CRC32 checksum
// was used, so check that first. If this checksum fails, there is still
// some chance the page was written with hashlittle2, so fall back to checking
// hashlittle2
sum.part1 = 0;
sum.part2 = crc32c_append(0xfdbeefdb, static_cast<uint8_t*>(data), dataLen);
if (sum == *pSumInPage) return true;
// some chance the page was written with another checksum algorithm
crc32Sum.part1 = 0;
crc32Sum.part2 = crc32c_append(0xfdbeefdb, static_cast<uint8_t*>(data), dataLen);
if (crc32Sum == *pSumInPage) {
TEST(true); // Read CRC32 checksum
return true;
}
}
// Try xxhash3
SumType xxHash3Sum;
if ((pSumInPage->part1 >> 24) == 0) {
// The first 8 bits of part1 being 0 indicates with high probability that an
// xxHash3 checksum was used, so check that next. If this checksum fails, there is
// still some chance the page was written with hashlittle2, so fall back to checking
// hashlittle2
auto xxHash3 = XXH3_64bits(data, dataLen);
xxHash3Sum.part1 = static_cast<uint32_t>((xxHash3 >> 32) & 0x00ffffff);
xxHash3Sum.part2 = static_cast<uint32_t>(xxHash3 & 0xffffffff);
if (xxHash3Sum == *pSumInPage) {
TEST(true); // Read xxHash3 checksum
return true;
}
}
// Try hashlittle2
SumType hashLittle2Sum;
hashLittle2Sum.part1 = pageNumber; // DO NOT CHANGE
hashLittle2Sum.part2 = 0x5ca1ab1e;
hashlittle2(pData, dataLen, &hashLittle2Sum.part1, &hashLittle2Sum.part2);
if (hashLittle2Sum == *pSumInPage) return true;
if (hashLittle2Sum == *pSumInPage) {
TEST(true); // Read HashLittle2 checksum
return true;
}
if (!silent) {
TraceEvent trEvent(SevError, "SQLitePageChecksumFailure");
@ -127,7 +154,12 @@ struct PageChecksumCodec {
.detail("PageSize", pageLen)
.detail("ChecksumInPage", pSumInPage->toString())
.detail("ChecksumCalculatedHL2", hashLittle2Sum.toString());
if (pSumInPage->part1 == 0) trEvent.detail("ChecksumCalculatedCRC", sum.toString());
if (pSumInPage->part1 == 0) {
trEvent.detail("ChecksumCalculatedCRC", crc32Sum.toString());
}
if (pSumInPage->part1 >> 24 == 0) {
trEvent.detail("ChecksumCalculatedXXHash3", xxHash3Sum.toString());
}
}
return false;
}

View File

@ -97,9 +97,10 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
init( PEEK_STATS_SLOW_RATIO, 0.5 );
init( PUSH_RESET_INTERVAL, 300.0 ); if ( randomize && BUGGIFY ) PUSH_RESET_INTERVAL = 20.0;
init( PUSH_MAX_LATENCY, 0.5 ); if ( randomize && BUGGIFY ) PUSH_MAX_LATENCY = 0.0;
init( PUSH_STATS_INTERVAL, 10.0 );
init( PUSH_STATS_INTERVAL, 10.0 );
init( PUSH_STATS_SLOW_AMOUNT, 2 );
init( PUSH_STATS_SLOW_RATIO, 0.5 );
init( TLOG_POP_BATCH_SIZE, 1000 ); if ( randomize && BUGGIFY ) TLOG_POP_BATCH_SIZE = 10;
// disk snapshot max timeout, to be put in TLog, storage and coordinator nodes
init( SNAP_CREATE_MAX_TIMEOUT, 300.0 );
@ -245,6 +246,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
init( DD_SS_STUCK_TIME_LIMIT, 300.0 ); if( randomize && BUGGIFY ) { DD_SS_STUCK_TIME_LIMIT = 200.0 + deterministicRandom()->random01() * 100.0; }
init( DD_TEAMS_INFO_PRINT_INTERVAL, 60 ); if( randomize && BUGGIFY ) DD_TEAMS_INFO_PRINT_INTERVAL = 10;
init( DD_TEAMS_INFO_PRINT_YIELD_COUNT, 100 ); if( randomize && BUGGIFY ) DD_TEAMS_INFO_PRINT_YIELD_COUNT = deterministicRandom()->random01() * 1000 + 1;
init( DD_TEAM_ZERO_SERVER_LEFT_LOG_DELAY, 120 ); if( randomize && BUGGIFY ) DD_TEAM_ZERO_SERVER_LEFT_LOG_DELAY = 5;
// TeamRemover
init( TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER, false ); if( randomize && BUGGIFY ) TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER = deterministicRandom()->random01() < 0.1 ? true : false; // false by default. disable the consistency check when it's true
@ -316,7 +318,10 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
// KeyValueStoreRocksDB
init( ROCKSDB_BACKGROUND_PARALLELISM, 0 );
init( ROCKSDB_READ_PARALLELISM, 4 );
init( ROCKSDB_MEMTABLE_BYTES, 512 * 1024 * 1024 );
init( ROCKSDB_UNSAFE_AUTO_FSYNC, false );
init( ROCKSDB_PERIODIC_COMPACTION_SECONDS, 0 );
// Leader election
bool longLeaderElection = randomize && BUGGIFY;
@ -577,6 +582,9 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
init( TAG_MEASUREMENT_INTERVAL, 30.0 ); if( randomize && BUGGIFY ) TAG_MEASUREMENT_INTERVAL = 1.0;
init( READ_COST_BYTE_FACTOR, 16384 ); if( randomize && BUGGIFY ) READ_COST_BYTE_FACTOR = 4096;
init( PREFIX_COMPRESS_KVS_MEM_SNAPSHOTS, true ); if( randomize && BUGGIFY ) PREFIX_COMPRESS_KVS_MEM_SNAPSHOTS = false;
init( REPORT_DD_METRICS, true );
init( DD_METRICS_REPORT_INTERVAL, 30.0 );
init( FETCH_KEYS_TOO_LONG_TIME_CRITERIA, 300.0 );
//Wait Failure
init( MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS, 250 ); if( randomize && BUGGIFY ) MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS = 2;
@ -609,6 +617,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
init( MAX_STATUS_REQUESTS_PER_SECOND, 256.0 );
init( CONFIGURATION_ROWS_TO_FETCH, 20000 );
init( DISABLE_DUPLICATE_LOG_WARNING, false );
init( HISTOGRAM_REPORT_INTERVAL, 300.0 );
// IPager
init( PAGER_RESERVED_PAGES, 1 );

View File

@ -99,6 +99,7 @@ public:
double PUSH_STATS_INTERVAL;
double PUSH_STATS_SLOW_AMOUNT;
double PUSH_STATS_SLOW_RATIO;
int TLOG_POP_BATCH_SIZE;
// Data distribution queue
double HEALTH_POLL_TIME;
@ -193,6 +194,7 @@ public:
double DD_SS_STUCK_TIME_LIMIT; // If a storage server is not getting new versions for this amount of time, then it becomes undesired.
int DD_TEAMS_INFO_PRINT_INTERVAL;
int DD_TEAMS_INFO_PRINT_YIELD_COUNT;
int DD_TEAM_ZERO_SERVER_LEFT_LOG_DELAY;
// TeamRemover to remove redundant teams
bool TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER; // disable the machineTeamRemover actor
@ -252,7 +254,10 @@ public:
// KeyValueStoreRocksDB
int ROCKSDB_BACKGROUND_PARALLELISM;
int ROCKSDB_READ_PARALLELISM;
int64_t ROCKSDB_MEMTABLE_BYTES;
bool ROCKSDB_UNSAFE_AUTO_FSYNC;
int64_t ROCKSDB_PERIODIC_COMPACTION_SECONDS;
// Leader election
int MAX_NOTIFICATIONS;
@ -507,6 +512,9 @@ public:
double TAG_MEASUREMENT_INTERVAL;
int64_t READ_COST_BYTE_FACTOR;
bool PREFIX_COMPRESS_KVS_MEM_SNAPSHOTS;
bool REPORT_DD_METRICS;
double DD_METRICS_REPORT_INTERVAL;
double FETCH_KEYS_TOO_LONG_TIME_CRITERIA;
//Wait Failure
int MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS;
@ -539,6 +547,7 @@ public:
double MAX_STATUS_REQUESTS_PER_SECOND;
int CONFIGURATION_ROWS_TO_FETCH;
bool DISABLE_DUPLICATE_LOG_WARNING;
double HISTOGRAM_REPORT_INTERVAL;
// IPager
int PAGER_RESERVED_PAGES;

View File

@ -30,6 +30,8 @@
#include "fdbserver/ApplyMetadataMutation.h"
#include "fdbserver/RecoveryState.h"
#include "fdbclient/Atomic.h"
#include "flow/Arena.h"
#include "flow/Histogram.h"
#include "flow/TDMetric.actor.h"
#include "flow/actorcompiler.h" // This must be the last #include.
@ -77,20 +79,26 @@ struct LogRouterData {
const UID dbgid;
Reference<AsyncVar<Reference<ILogSystem>>> logSystem;
NotifiedVersion version;
NotifiedVersion minPopped;
Optional<UID> primaryPeekLocation;
NotifiedVersion version; // The largest version at which the log router has peeked mutations
// from satellite tLog or primary tLogs.
NotifiedVersion minPopped; // The minimum version among all tags that has been popped by remote tLogs.
const Version startVersion;
Version minKnownCommittedVersion;
Version minKnownCommittedVersion; // The minimum durable version among all LRs.
// A LR's durable version is the maximum version of mutations that have been
// popped by remote tLog.
Version poppedVersion;
Deque<std::pair<Version, Standalone<VectorRef<uint8_t>>>> messageBlocks;
Tag routerTag;
bool allowPops;
LogSet logSet;
bool foundEpochEnd;
double waitForVersionTime = 0;
double maxWaitForVersionTime = 0;
double getMoreTime = 0;
double maxGetMoreTime = 0;
bool foundEpochEnd; // Cluster is not fully recovered yet. LR has to handle recovery
double waitForVersionTime = 0; // The total amount of time LR waits for remote tLog to peek and pop its data.
double maxWaitForVersionTime = 0; // The max one-instance wait time when LR must wait for remote tLog to pop data.
double getMoreTime = 0; // The total amount of time LR waits for satellite tLog's data to become available.
double maxGetMoreTime = 0; // The max wait time LR spent in a pull-data-request to satellite tLog.
int64_t generation = -1;
Reference<Histogram> peekLatencyDist;
struct PeekTrackerData {
std::map<int, Promise<std::pair<Version, bool>>> sequence_version;
@ -100,7 +108,9 @@ struct LogRouterData {
std::map<UID, PeekTrackerData> peekTracker;
CounterCollection cc;
Counter getMoreCount, getMoreBlockedCount;
Counter getMoreCount; // Increase by 1 when LR tries to pull data from satellite tLog.
Counter
getMoreBlockedCount; // Increase by 1 if data is not available when LR tries to pull data from satellite tLog.
Future<Void> logger;
Reference<EventCacheHolder> eventCacheHolder;
@ -121,9 +131,14 @@ struct LogRouterData {
return newTagData;
}
LogRouterData(UID dbgid, const InitializeLogRouterRequest& req) : dbgid(dbgid), routerTag(req.routerTag), logSystem(new AsyncVar<Reference<ILogSystem>>()),
version(req.startVersion-1), minPopped(0), startVersion(req.startVersion), allowPops(false), minKnownCommittedVersion(0), poppedVersion(0), foundEpochEnd(false),
cc("LogRouter", dbgid.toString()), getMoreCount("GetMoreCount", cc), getMoreBlockedCount("GetMoreBlockedCount", cc) {
LogRouterData(UID dbgid, const InitializeLogRouterRequest& req)
: dbgid(dbgid), routerTag(req.routerTag), logSystem(new AsyncVar<Reference<ILogSystem>>()),
version(req.startVersion - 1), minPopped(0), generation(req.recoveryCount), startVersion(req.startVersion),
allowPops(false), minKnownCommittedVersion(0), poppedVersion(0), foundEpochEnd(false),
cc("LogRouter", dbgid.toString()), getMoreCount("GetMoreCount", cc),
getMoreBlockedCount("GetMoreBlockedCount", cc),
peekLatencyDist(Histogram::getHistogram(LiteralStringRef("LogRouter"), LiteralStringRef("PeekTLogLatency"),
Histogram::Unit::microseconds)) {
//setup just enough of a logSet to be able to call getPushLocations
logSet.logServers.resize(req.tLogLocalities.size());
logSet.tLogPolicy = req.tLogPolicy;
@ -140,8 +155,10 @@ struct LogRouterData {
eventCacheHolder = makeReference<EventCacheHolder>(dbgid.shortString() + ".PeekLocation");
specialCounter(cc, "Version", [this](){ return this->version.get(); });
// FetchedVersions: How many version of mutations buffered at LR and have not been popped by remote tLogs
specialCounter(cc, "Version", [this]() { return this->version.get(); });
specialCounter(cc, "MinPopped", [this](){ return this->minPopped.get(); });
// TODO: Add minPopped locality and minPoppedId, similar as tLog Metrics
specialCounter(cc, "FetchedVersions", [this](){ return std::max<Version>(0, std::min<Version>(SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS, this->version.get() - this->minPopped.get())); });
specialCounter(cc, "MinKnownCommittedVersion", [this](){ return this->minKnownCommittedVersion; });
specialCounter(cc, "PoppedVersion", [this](){ return this->poppedVersion; });
@ -150,7 +167,12 @@ struct LogRouterData {
specialCounter(cc, "WaitForVersionMaxMS", [this](){ double val = this->maxWaitForVersionTime; this->maxWaitForVersionTime = 0; return 1000*val; });
specialCounter(cc, "GetMoreMS", [this](){ double val = this->getMoreTime; this->getMoreTime = 0; return 1000*val; });
specialCounter(cc, "GetMoreMaxMS", [this](){ double val = this->maxGetMoreTime; this->maxGetMoreTime = 0; return 1000*val; });
logger = traceCounters("LogRouterMetrics", dbgid, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, &cc, "LogRouterMetrics");
specialCounter(cc, "Generation", [this]() { return this->generation; });
logger = traceCounters("LogRouterMetrics", dbgid, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, &cc,
"LogRouterMetrics", [this](TraceEvent& te) {
te.detail("PrimaryPeekLocation", this->primaryPeekLocation);
te.detail("RouterTag", this->routerTag.toString());
});
}
};
@ -209,8 +231,15 @@ ACTOR Future<Void> waitForVersion( LogRouterData *self, Version ver ) {
// Since one set of log routers is created per generation of transaction logs, the gap caused by epoch end will be within MAX_VERSIONS_IN_FLIGHT of the log routers start version.
state double startTime = now();
if(self->version.get() < self->startVersion) {
// Log router needs to wait for remote tLogs to process data, whose version is less than self->startVersion,
// before the log router can pull more data (i.e., data after self->startVersion) from satellite tLog;
// This prevents LR from getting OOM due to it pulls too much data from satellite tLog at once;
// Note: each commit writes data to both primary tLog and satellite tLog. Satellite tLog can be viewed as
// a part of primary tLogs.
if(ver > self->startVersion) {
self->version.set(self->startVersion);
// Wait for remote tLog to peek and pop from LR,
// so that LR's minPopped version can increase to self->startVersion
wait(self->minPopped.whenAtLeast(self->version.get()));
}
self->waitForVersionTime += now() - startTime;
@ -218,6 +247,9 @@ ACTOR Future<Void> waitForVersion( LogRouterData *self, Version ver ) {
return Void();
}
if(!self->foundEpochEnd) {
// Similar to proxy that does not keep more than MAX_READ_TRANSACTION_LIFE_VERSIONS transactions oustanding;
// Log router does not keep more than MAX_READ_TRANSACTION_LIFE_VERSIONS transactions outstanding because
// remote SS cannot roll back to more than MAX_READ_TRANSACTION_LIFE_VERSIONS ago.
wait(self->minPopped.whenAtLeast(std::min(self->version.get(), ver - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS)));
} else {
while(self->minPopped.get() + SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS < ver) {
@ -237,6 +269,7 @@ ACTOR Future<Void> waitForVersion( LogRouterData *self, Version ver ) {
return Void();
}
// Log router pull data from satellite tLog
ACTOR Future<Void> pullAsyncData( LogRouterData *self ) {
state Future<Void> dbInfoChange = Void();
state Reference<ILogSystem::IPeekCursor> r;
@ -257,13 +290,16 @@ ACTOR Future<Void> pullAsyncData( LogRouterData *self ) {
state double startTime = now();
choose {
when(wait( getMoreF ) ) {
self->getMoreTime += now() - startTime;
self->maxGetMoreTime = std::max(self->maxGetMoreTime, now() - startTime);
double peekTime = now() - startTime;
self->peekLatencyDist->sampleSeconds(peekTime);
self->getMoreTime += peekTime;
self->maxGetMoreTime = std::max(self->maxGetMoreTime, peekTime);
break;
}
when( wait( dbInfoChange ) ) { //FIXME: does this actually happen?
if( self->logSystem->get() ) {
r = self->logSystem->get()->peekLogRouter( self->dbgid, tagAt, self->routerTag );
self->primaryPeekLocation = r->getPrimaryPeekLocation();
TraceEvent("LogRouterPeekLocation", self->dbgid).detail("LogID", r->getPrimaryPeekLocation()).trackLatest(self->eventCacheHolder->trackingKey);
} else {
r = Reference<ILogSystem::IPeekCursor>();
@ -565,6 +601,7 @@ ACTOR Future<Void> logRouterCore(
addActor.send( logRouterPeekMessages( &logRouterData, req ) );
}
when( TLogPopRequest req = waitNext( interf.popMessages.getFuture() ) ) {
// Request from remote tLog to pop data from LR
addActor.send( logRouterPop( &logRouterData, req ) );
}
when (wait(error)) {}

View File

@ -849,7 +849,7 @@ struct LogPushData : NonCopyable {
for(auto& log : logSystem->getLogSystemConfig().tLogs) {
if(log.isLocal) {
for(int i = 0; i < log.tLogs.size(); i++) {
messagesWriter.push_back( BinaryWriter( AssumeVersion(currentProtocolVersion) ) );
messagesWriter.push_back( BinaryWriter( AssumeVersion(g_network->protocolVersion()) ) );
}
}
}
@ -916,7 +916,7 @@ struct LogPushData : NonCopyable {
msg_locations.clear();
logSystem->getPushLocations(prev_tags, msg_locations, allLocations);
BinaryWriter bw(AssumeVersion(currentProtocolVersion));
BinaryWriter bw(AssumeVersion(g_network->protocolVersion()));
// Metadata messages (currently LogProtocolMessage is the only metadata
// message) should be written before span information. If this isn't a

View File

@ -142,8 +142,20 @@ ACTOR Future<Void> resetChecker( ILogSystem::ServerPeekCursor* self, NetworkAddr
self->unknownReplies = 0;
self->fastReplies = 0;
wait(delay(SERVER_KNOBS->PEEK_STATS_INTERVAL));
TraceEvent("SlowPeekStats").detail("PeerAddress", addr).detail("SlowReplies", self->slowReplies).detail("FastReplies", self->fastReplies).detail("UnknownReplies", self->unknownReplies);
if(self->slowReplies >= SERVER_KNOBS->PEEK_STATS_SLOW_AMOUNT && self->slowReplies/double(self->slowReplies+self->fastReplies) >= SERVER_KNOBS->PEEK_STATS_SLOW_RATIO) {
TraceEvent("SlowPeekStats", self->randomID)
.detail("PeerAddress", addr)
.detail("SlowReplies", self->slowReplies)
.detail("FastReplies", self->fastReplies)
.detail("UnknownReplies", self->unknownReplies);
if (self->slowReplies >= SERVER_KNOBS->PEEK_STATS_SLOW_AMOUNT &&
self->slowReplies / double(self->slowReplies + self->fastReplies) >= SERVER_KNOBS->PEEK_STATS_SLOW_RATIO) {
TraceEvent("ConnectionResetSlowPeek", self->randomID)
.detail("PeerAddress", addr)
.detail("SlowReplies", self->slowReplies)
.detail("FastReplies", self->fastReplies)
.detail("UnknownReplies", self->unknownReplies);
FlowTransport::transport().resetConnection(addr);
self->lastReset = now();
}

File diff suppressed because it is too large Load Diff

View File

@ -342,7 +342,7 @@ ACTOR Future<Void> updateMetricRegistration(Database cx, MetricsConfig *config,
loop {
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
try {
Value timestamp = BinaryWriter::toValue(CompressedInt<int64_t>(now()), AssumeVersion(currentProtocolVersion));
Value timestamp = BinaryWriter::toValue(CompressedInt<int64_t>(now()), AssumeVersion(g_network->protocolVersion()));
for(auto &key : keys) {
//fprintf(stderr, "%s: register: %s\n", collection->address.toString().c_str(), printable(key).c_str());
tr.set(key, timestamp);

View File

@ -51,7 +51,7 @@ TraceEvent debugKeyRangeEnabled( const char* context, Version version, KeyRangeR
}
TraceEvent debugTagsAndMessageEnabled( const char* context, Version version, StringRef commitBlob ) {
BinaryReader rdr(commitBlob, AssumeVersion(currentProtocolVersion));
BinaryReader rdr(commitBlob, AssumeVersion(g_network->protocolVersion()));
while (!rdr.empty()) {
if (*(int32_t*)rdr.peekBytes(4) == VERSION_HEADER) {
int32_t dummy;

View File

@ -1371,7 +1371,7 @@ void peekMessagesFromMemory( Reference<LogData> self, TLogPeekRequest const& req
ACTOR Future<std::vector<StringRef>> parseMessagesForTag( StringRef commitBlob, Tag tag, int logRouters ) {
// See the comment in LogSystem.cpp for the binary format of commitBlob.
state std::vector<StringRef> relevantMessages;
state BinaryReader rd(commitBlob, AssumeVersion(currentProtocolVersion));
state BinaryReader rd(commitBlob, AssumeVersion(g_network->protocolVersion()));
while (!rd.empty()) {
TagsAndMessage tagsAndMessage;
tagsAndMessage.loadFromArena(&rd, nullptr);
@ -2753,7 +2753,7 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
bool recovering = (req.recoverFrom.logSystemType == LogSystemType::tagPartitioned);
state Reference<LogData> logData = Reference<LogData>( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, currentProtocolVersion, req.allTags, recovering ? "Recovered" : "Recruited") );
state Reference<LogData> logData = Reference<LogData>( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, g_network->protocolVersion(), req.allTags, recovering ? "Recovered" : "Recruited") );
self->id_data[recruited.id()] = logData;
logData->locality = req.locality;
logData->recoveryCount = req.epoch;

View File

@ -397,7 +397,7 @@ ACTOR static Future<Void> _parsePartitionedLogFileOnLoader(
// only one clear mutation is generated (i.e., always inserted).
ASSERT(inserted);
ArenaReader rd(buf.arena(), StringRef(message, msgSize), AssumeVersion(currentProtocolVersion));
ArenaReader rd(buf.arena(), StringRef(message, msgSize), AssumeVersion(g_network->protocolVersion()));
MutationRef mutation;
rd >> mutation;

View File

@ -18,8 +18,10 @@
* limitations under the License.
*/
#include <cstdint>
#include <fstream>
#include <ostream>
#include "fdbrpc/Locality.h"
#include "fdbrpc/simulator.h"
#include "fdbclient/DatabaseContext.h"
#include "fdbserver/TesterInterface.actor.h"
@ -33,7 +35,9 @@
#include "fdbclient/NativeAPI.actor.h"
#include "fdbclient/BackupAgent.actor.h"
#include "fdbclient/versions.h"
#include "flow/ProtocolVersion.h"
#include "flow/actorcompiler.h" // This must be the last #include.
#include "flow/network.h"
#undef max
#undef min
@ -47,9 +51,9 @@ bool destructed = false;
template <class T>
T simulate( const T& in ) {
BinaryWriter writer(AssumeVersion(currentProtocolVersion));
BinaryWriter writer(AssumeVersion(g_network->protocolVersion()));
writer << in;
BinaryReader reader( writer.getData(), writer.getLength(), AssumeVersion(currentProtocolVersion) );
BinaryReader reader( writer.getData(), writer.getLength(), AssumeVersion(g_network->protocolVersion()) );
T out;
reader >> out;
return out;
@ -137,7 +141,7 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<ClusterConnec
std::string* dataFolder, std::string* coordFolder,
std::string baseFolder, ClusterConnectionString connStr,
bool useSeedFile, AgentMode runBackupAgents,
std::string whitelistBinPaths) {
std::string whitelistBinPaths, ProtocolVersion protocolVersion) {
state ISimulator::ProcessInfo *simProcess = g_simulator.getCurrentProcess();
state UID randomId = nondeterministicRandom()->randomUniqueID();
state int cycles = 0;
@ -154,7 +158,7 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<ClusterConnec
state ISimulator::ProcessInfo* process =
g_simulator.newProcess("Server", ip, port, sslEnabled, listenPerProcess, localities, processClass, dataFolder->c_str(),
coordFolder->c_str());
coordFolder->c_str(), protocolVersion);
wait(g_simulator.onProcess(process,
TaskPriority::DefaultYield)); // Now switch execution to the process on which we will run
state Future<ISimulator::KillType> onShutdown = process->onShutdown();
@ -298,7 +302,7 @@ std::map< Optional<Standalone<StringRef>>, std::vector< std::vector< std::string
// process count is no longer needed because it is now the length of the vector of ip's, because it was one ip per process
ACTOR Future<Void> simulatedMachine(ClusterConnectionString connStr, std::vector<IPAddress> ips, bool sslEnabled, LocalityData localities,
ProcessClass processClass, std::string baseFolder, bool restarting,
bool useSeedFile, AgentMode runBackupAgents, bool sslOnly, std::string whitelistBinPaths) {
bool useSeedFile, AgentMode runBackupAgents, bool sslOnly, std::string whitelistBinPaths, ProtocolVersion protocolVersion) {
state int bootCount = 0;
state std::vector<std::string> myFolders;
state std::vector<std::string> coordFolders;
@ -341,7 +345,13 @@ ACTOR Future<Void> simulatedMachine(ClusterConnectionString connStr, std::vector
Reference<ClusterConnectionFile> clusterFile(useSeedFile ? new ClusterConnectionFile(path, connStr.toString()) : new ClusterConnectionFile(path));
const int listenPort = i*listenPerProcess + 1;
AgentMode agentMode = runBackupAgents == AgentOnly ? ( i == ips.size()-1 ? AgentOnly : AgentNone ) : runBackupAgents;
processes.push_back(simulatedFDBDRebooter(clusterFile, ips[i], sslEnabled, listenPort, listenPerProcess, localities, processClass, &myFolders[i], &coordFolders[i], baseFolder, connStr, useSeedFile, agentMode, whitelistBinPaths));
if(g_simulator.hasDiffProtocolProcess && !g_simulator.setDiffProtocol && agentMode == AgentNone) {
processes.push_back(simulatedFDBDRebooter(clusterFile, ips[i], sslEnabled, listenPort, listenPerProcess, localities, processClass, &myFolders[i], &coordFolders[i], baseFolder, connStr, useSeedFile, agentMode, whitelistBinPaths, protocolVersion));
g_simulator.setDiffProtocol = true;
}
else {
processes.push_back(simulatedFDBDRebooter(clusterFile, ips[i], sslEnabled, listenPort, listenPerProcess, localities, processClass, &myFolders[i], &coordFolders[i], baseFolder, connStr, useSeedFile, agentMode, whitelistBinPaths, g_network->protocolVersion()));
}
TraceEvent("SimulatedMachineProcess", randomId).detail("Address", NetworkAddress(ips[i], listenPort, true, false)).detail("ZoneId", localities.zoneId()).detail("DataHall", localities.dataHallId()).detail("Folder", myFolders[i]);
}
@ -546,7 +556,7 @@ IPAddress makeIPAddressForSim(bool isIPv6, std::array<int, 4> parts) {
ACTOR Future<Void> restartSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFolder, int* pTesterCount,
Optional<ClusterConnectionString>* pConnString,
Standalone<StringRef>* pStartingConfiguration,
int extraDB, std::string whitelistBinPaths) {
int extraDB, std::string whitelistBinPaths, ProtocolVersion protocolVersion) {
CSimpleIni ini;
ini.SetUnicode();
ini.LoadFile(joinPath(baseFolder, "restartInfo.ini").c_str());
@ -645,7 +655,7 @@ ACTOR Future<Void> restartSimulatedSystem(vector<Future<Void>>* systemActors, st
simulatedMachine(conn, ipAddrs, usingSSL, localities, processClass, baseFolder, true,
i == useSeedForMachine, AgentAddition,
usingSSL && (listenersPerProcess == 1 || processClass == ProcessClass::TesterClass),
whitelistBinPaths),
whitelistBinPaths, protocolVersion),
processClass == ProcessClass::TesterClass ? "SimulatedTesterMachine" : "SimulatedMachine"));
}
@ -1052,8 +1062,8 @@ void SimulationConfig::generateNormalConfig(int minimumReplication, int minimumR
void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFolder, int* pTesterCount,
Optional<ClusterConnectionString>* pConnString, Standalone<StringRef>* pStartingConfiguration,
int extraDB, int minimumReplication, int minimumRegions, std::string whitelistBinPaths,
bool configureLocked, int logAntiQuorum) {
int extraDB, int minimumReplication, int minimumRegions, std::string whitelistBinPaths, bool configureLocked,
int logAntiQuorum, ProtocolVersion protocolVersion) {
// SOMEDAY: this does not test multi-interface configurations
SimulationConfig simconfig(extraDB, minimumReplication, minimumRegions);
if (logAntiQuorum != -1) {
@ -1218,6 +1228,7 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
bool requiresExtraDBMachines = extraDB && g_simulator.extraDB->toString() != conn.toString();
int assignedMachines = 0, nonVersatileMachines = 0;
std::vector<ProcessClass::ClassType> processClassesSubSet = {ProcessClass::UnsetClass, ProcessClass::ResolutionClass, ProcessClass::MasterClass};
for( int dc = 0; dc < dataCenters; dc++ ) {
//FIXME: test unset dcID
Optional<Standalone<StringRef>> dcUID = StringRef(format("%d", dc));
@ -1275,7 +1286,7 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
LocalityData localities(Optional<Standalone<StringRef>>(), zoneId, machineId, dcUID);
localities.set(LiteralStringRef("data_hall"), dcUID);
systemActors->push_back(reportErrors(simulatedMachine(conn, ips, sslEnabled,
localities, processClass, baseFolder, false, machine == useSeedForMachine, requiresExtraDBMachines ? AgentOnly : AgentAddition, sslOnly, whitelistBinPaths ), "SimulatedMachine"));
localities, processClass, baseFolder, false, machine == useSeedForMachine, requiresExtraDBMachines ? AgentOnly : AgentAddition, sslOnly, whitelistBinPaths, protocolVersion ), "SimulatedMachine"));
if (requiresExtraDBMachines) {
std::vector<IPAddress> extraIps;
@ -1289,7 +1300,7 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
localities.set(LiteralStringRef("data_hall"), dcUID);
systemActors->push_back(reportErrors(simulatedMachine(*g_simulator.extraDB, extraIps, sslEnabled,
localities,
processClass, baseFolder, false, machine == useSeedForMachine, AgentNone, sslOnly, whitelistBinPaths ), "SimulatedMachine"));
processClass, baseFolder, false, machine == useSeedForMachine, AgentNone, sslOnly, whitelistBinPaths, protocolVersion ), "SimulatedMachine"));
}
assignedMachines++;
@ -1313,13 +1324,18 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
std::vector<IPAddress> ips;
ips.push_back(makeIPAddressForSim(useIPv6, { 3, 4, 3, i + 1 }));
Standalone<StringRef> newZoneId = Standalone<StringRef>(deterministicRandom()->randomUniqueID().toString());
LocalityData localities(Optional<Standalone<StringRef>>(), newZoneId, newZoneId, Optional<Standalone<StringRef>>());
LocalityData localities(Optional<Standalone<StringRef>>(), newZoneId, newZoneId, Optional<Standalone<StringRef>>());
systemActors->push_back( reportErrors( simulatedMachine(
conn, ips, sslEnabled && sslOnly,
localities, ProcessClass(ProcessClass::TesterClass, ProcessClass::CommandLineSource),
baseFolder, false, i == useSeedForMachine, AgentNone, sslEnabled && sslOnly, whitelistBinPaths ),
baseFolder, false, i == useSeedForMachine, AgentNone, sslEnabled && sslOnly, whitelistBinPaths, protocolVersion ),
"SimulatedTesterMachine") );
}
if(g_simulator.setDiffProtocol) {
--(*pTesterCount);
}
*pStartingConfiguration = startingConfigString;
// save some state that we only need when restarting the simulator.
@ -1337,7 +1353,7 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
}
void checkTestConf(const char* testFile, int& extraDB, int& minimumReplication, int& minimumRegions,
int& configureLocked, int& logAntiQuorum) {
int& configureLocked, int& logAntiQuorum, bool& startIncompatibleProcess) {
std::ifstream ifs;
ifs.open(testFile, std::ifstream::in);
if (!ifs.good())
@ -1371,7 +1387,11 @@ void checkTestConf(const char* testFile, int& extraDB, int& minimumReplication,
}
if (attrib == "configureLocked") {
sscanf(value.c_str(), "%d", &configureLocked);
sscanf( value.c_str(), "%d", &configureLocked );
}
if (attrib == "startIncompatibleProcess") {
startIncompatibleProcess = strcmp(value.c_str(), "true") == 0;
}
if (attrib == "logAntiQuorum") {
sscanf(value.c_str(), "%d", &logAntiQuorum);
@ -1391,7 +1411,17 @@ ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool reboot
state int minimumRegions = 0;
state int configureLocked = 0;
state int logAntiQuorum = -1;
checkTestConf(testFile, extraDB, minimumReplication, minimumRegions, configureLocked, logAntiQuorum);
state bool startIncompatibleProcess = false;
checkTestConf(testFile, extraDB, minimumReplication, minimumRegions, configureLocked, logAntiQuorum, startIncompatibleProcess);
g_simulator.hasDiffProtocolProcess = startIncompatibleProcess;
g_simulator.setDiffProtocol = false;
state ProtocolVersion protocolVersion = currentProtocolVersion;
if(startIncompatibleProcess) {
// isolates right most 1 bit of compatibleProtocolVersionMask to make this protocolVersion incompatible
uint64_t minAddToMakeIncompatible = ProtocolVersion::compatibleProtocolVersionMask & ~(ProtocolVersion::compatibleProtocolVersionMask-1);
protocolVersion = ProtocolVersion(currentProtocolVersion.version() + minAddToMakeIncompatible);
}
// TODO (IPv6) Use IPv6?
wait(g_simulator.onProcess(
@ -1400,7 +1430,7 @@ ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool reboot
Standalone<StringRef>(deterministicRandom()->randomUniqueID().toString()),
Standalone<StringRef>(deterministicRandom()->randomUniqueID().toString()),
Optional<Standalone<StringRef>>()),
ProcessClass(ProcessClass::TesterClass, ProcessClass::CommandLineSource), "", ""),
ProcessClass(ProcessClass::TesterClass, ProcessClass::CommandLineSource), "", "", currentProtocolVersion),
TaskPriority::DefaultYield));
Sim2FileSystem::newFileSystem();
FlowTransport::createInstance(true, 1);
@ -1409,7 +1439,7 @@ ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool reboot
try {
//systemActors.push_back( startSystemMonitor(dataFolder) );
if (rebooting) {
wait( timeoutError( restartSimulatedSystem( &systemActors, dataFolder, &testerCount, &connFile, &startingConfiguration, extraDB, whitelistBinPaths), 100.0 ) );
wait( timeoutError( restartSimulatedSystem( &systemActors, dataFolder, &testerCount, &connFile, &startingConfiguration, extraDB, whitelistBinPaths, protocolVersion), 100.0 ) );
// FIXME: snapshot restore does not support multi-region restore, hence restore it as single region always
if (restoring) {
startingConfiguration = LiteralStringRef("usable_regions=1");
@ -1418,7 +1448,7 @@ ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool reboot
else {
g_expect_full_pointermap = 1;
setupSimulatedSystem(&systemActors, dataFolder, &testerCount, &connFile, &startingConfiguration, extraDB,
minimumReplication, minimumRegions, whitelistBinPaths, configureLocked, logAntiQuorum);
minimumReplication, minimumRegions, whitelistBinPaths, configureLocked, logAntiQuorum, protocolVersion);
wait( delay(1.0) ); // FIXME: WHY!!! //wait for machines to boot
}
std::string clusterFileDir = joinPath( dataFolder, deterministicRandom()->randomUniqueID().toString() );

View File

@ -1119,7 +1119,7 @@ ACTOR static Future<JsonBuilderObject> recoveryStateStatusFetcher(Database cx, W
}
ACTOR static Future<double> doGrvProbe(Transaction *tr, Optional<FDBTransactionOptions::Option> priority = Optional<FDBTransactionOptions::Option>()) {
state double start = timer_monotonic();
state double start = g_network->timer_monotonic();
loop {
try {
@ -1129,7 +1129,7 @@ ACTOR static Future<double> doGrvProbe(Transaction *tr, Optional<FDBTransactionO
}
wait(success(tr->getReadVersion()));
return timer_monotonic() - start;
return g_network->timer_monotonic() - start;
}
catch(Error &e) {
wait(tr->onError(e));
@ -1143,13 +1143,13 @@ ACTOR static Future<double> doReadProbe(Future<double> grvProbe, Transaction *tr
throw grv.getError();
}
state double start = timer_monotonic();
state double start = g_network->timer_monotonic();
loop {
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
try {
Optional<Standalone<StringRef> > _ = wait(tr->get(LiteralStringRef("\xff/StatusJsonTestKey62793")));
return timer_monotonic() - start;
return g_network->timer_monotonic() - start;
}
catch(Error &e) {
wait(tr->onError(e));
@ -1167,7 +1167,7 @@ ACTOR static Future<double> doCommitProbe(Future<double> grvProbe, Transaction *
ASSERT(sourceTr->getReadVersion().isReady());
tr->setVersion(sourceTr->getReadVersion().get());
state double start = timer_monotonic();
state double start = g_network->timer_monotonic();
loop {
try {
@ -1175,7 +1175,7 @@ ACTOR static Future<double> doCommitProbe(Future<double> grvProbe, Transaction *
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
tr->makeSelfConflicting();
wait(tr->commit());
return timer_monotonic() - start;
return g_network->timer_monotonic() - start;
}
catch(Error &e) {
wait(tr->onError(e));
@ -2518,7 +2518,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
state JsonBuilderObject qos;
state JsonBuilderObject data_overlay;
statusObj["protocol_version"] = format("%" PRIx64, currentProtocolVersion.version());
statusObj["protocol_version"] = format("%" PRIx64, g_network->protocolVersion().version());
statusObj["connection_string"] = coordinators.ccf->getConnectionString().toString();
state Optional<DatabaseConfiguration> configuration;
@ -2739,7 +2739,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
statusObj["messages"] = messages;
int64_t clusterTime = time(0);
int64_t clusterTime = g_network->timer();
if (clusterTime != -1){
statusObj["cluster_controller_timestamp"] = clusterTime;
}

View File

@ -1752,7 +1752,7 @@ ACTOR Future<Void> pullAsyncData( StorageCacheData *data ) {
//TODO cache servers should write the LogProtocolMessage when they are created
//cloneCursor1->setProtocolVersion(data->logProtocol);
cloneCursor1->setProtocolVersion(currentProtocolVersion);
cloneCursor1->setProtocolVersion(g_network->protocolVersion());
for (; cloneCursor1->hasMessage(); cloneCursor1->nextMessage()) {
ArenaReader& cloneReader = *cloneCursor1->reader();
@ -1820,7 +1820,7 @@ ACTOR Future<Void> pullAsyncData( StorageCacheData *data ) {
//FIXME: ensure this can only read data from the current version
//cloneCursor2->setProtocolVersion(data->logProtocol);
cloneCursor2->setProtocolVersion(currentProtocolVersion);
cloneCursor2->setProtocolVersion(g_network->protocolVersion());
ver = invalidVersion;
// Now process the mutations
@ -1951,7 +1951,7 @@ ACTOR Future<Void> storageCacheStartUpWarmup(StorageCacheData* self) {
state Transaction tr(self->cx);
state Value trueValue = storageCacheValue(std::vector<uint16_t>{ 0 });
state Value falseValue = storageCacheValue(std::vector<uint16_t>{});
state MutationRef privatized;
state Standalone<MutationRef> privatized;
privatized.type = MutationRef::SetValue;
state Version readVersion;
try {
@ -1969,7 +1969,7 @@ ACTOR Future<Void> storageCacheStartUpWarmup(StorageCacheData* self) {
ASSERT(currCached == (kv.value == falseValue));
if (kv.value == trueValue) {
begin = kv.key;
privatized.param1 = begin.withPrefix(systemKeys.begin);
privatized.param1 = begin.withPrefix(systemKeys.begin, privatized.arena());
privatized.param2 = serverKeysTrue;
//TraceEvent(SevDebug, "SCStartupFetch", self->thisServerID).
// detail("BeginKey", begin.substr(storageCacheKeys.begin.size())).
@ -1979,7 +1979,7 @@ ACTOR Future<Void> storageCacheStartUpWarmup(StorageCacheData* self) {
} else {
currCached = false;
end = kv.key;
privatized.param1 = begin.withPrefix(systemKeys.begin);
privatized.param1 = begin.withPrefix(systemKeys.begin, privatized.arena());
privatized.param2 = serverKeysFalse;
//TraceEvent(SevDebug, "SCStartupFetch", self->thisServerID).detail("EndKey", end.substr(storageCacheKeys.begin.size())).
// detail("ReadVersion", readVersion).detail("DataVersion", self->version.get());

View File

@ -27,6 +27,11 @@
#include "fdbserver/Knobs.h"
#include "flow/actorcompiler.h" // This must be the last #include.
const StringRef STORAGESERVER_HISTOGRAM_GROUP = LiteralStringRef("StorageServer");
const StringRef FETCH_KEYS_LATENCY_HISTOGRAM = LiteralStringRef("FetchKeysLatency");
const StringRef FETCH_KEYS_BYTES_HISTOGRAM = LiteralStringRef("FetchKeysSize");
const StringRef FETCH_KEYS_BYTES_PER_SECOND_HISTOGRAM = LiteralStringRef("FetchKeysBandwidth");
struct StorageMetricSample {
IndexedSet<Key, int64_t> sample;
int64_t metricUnitsPerSample;

View File

@ -44,6 +44,7 @@
#include "fdbserver/WaitFailure.h"
#include "fdbserver/RecoveryState.h"
#include "fdbserver/FDBExecHelper.actor.h"
#include "flow/Histogram.h"
#include "flow/actorcompiler.h" // This must be the last #include.
using std::pair;
@ -341,6 +342,7 @@ struct TLogData : NonCopyable {
FlowLock concurrentLogRouterReads;
FlowLock persistentDataCommitLock;
// Beginning of fields used by snapshot based backup and restore
bool ignorePopRequest; // ignore pop request from storage servers
double ignorePopDeadline; // time until which the ignorePopRequest will be
// honored
@ -352,19 +354,26 @@ struct TLogData : NonCopyable {
std::map<Tag, Version> toBePopped; // map of Tag->Version for all the pops
// that came when ignorePopRequest was set
Reference<AsyncVar<bool>> degraded;
// End of fields used by snapshot based backup and restore
std::vector<TagsAndMessage> tempTagMessages;
TLogData(UID dbgid, UID workerID, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> dbInfo, Reference<AsyncVar<bool>> degraded, std::string folder)
: dbgid(dbgid), workerID(workerID), instanceID(deterministicRandom()->randomUniqueID().first()),
persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)),
dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0), queueCommitEnd(0),
diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0), targetVolatileBytes(SERVER_KNOBS->TLOG_SPILL_THRESHOLD), overheadBytesInput(0), overheadBytesDurable(0),
peekMemoryLimiter(SERVER_KNOBS->TLOG_SPILL_REFERENCE_MAX_PEEK_MEMORY_BYTES),
concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS),
ignorePopRequest(false), ignorePopDeadline(), ignorePopUid(), dataFolder(folder), toBePopped()
{
cx = openDBOnServer(dbInfo, TaskPriority::DefaultEndpoint, true, true);
}
Reference<Histogram> commitLatencyDist;
TLogData(UID dbgid, UID workerID, IKeyValueStore* persistentData, IDiskQueue* persistentQueue,
Reference<AsyncVar<ServerDBInfo>> dbInfo, Reference<AsyncVar<bool>> degraded, std::string folder)
: dbgid(dbgid), workerID(workerID), instanceID(deterministicRandom()->randomUniqueID().first()),
persistentData(persistentData), rawPersistentQueue(persistentQueue),
persistentQueue(new TLogQueue(persistentQueue, dbgid)), dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0),
queueCommitEnd(0), diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0),
targetVolatileBytes(SERVER_KNOBS->TLOG_SPILL_THRESHOLD), overheadBytesInput(0), overheadBytesDurable(0),
peekMemoryLimiter(SERVER_KNOBS->TLOG_SPILL_REFERENCE_MAX_PEEK_MEMORY_BYTES),
concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS), ignorePopRequest(false),
ignorePopDeadline(), ignorePopUid(), dataFolder(folder), toBePopped(),
commitLatencyDist(Histogram::getHistogram(LiteralStringRef("tLog"), LiteralStringRef("commit"),
Histogram::Unit::microseconds)) {
cx = openDBOnServer(dbInfo, TaskPriority::DefaultEndpoint, true, true);
}
};
struct LogData : NonCopyable, public ReferenceCounted<LogData> {
@ -454,13 +463,19 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
bool stopped, initialized;
DBRecoveryCount recoveryCount;
VersionMetricHandle persistentDataVersion, persistentDataDurableVersion; // The last version number in the portion of the log (written|durable) to persistentData
NotifiedVersion version, queueCommittedVersion;
// If persistentDataVersion != persistentDurableDataVersion,
// then spilling is happening from persistentDurableDataVersion to persistentDataVersion.
// Data less than persistentDataDurableVersion is spilled on disk (or fully popped from the TLog);
VersionMetricHandle persistentDataVersion, persistentDataDurableVersion; // The last version number in the portion of the log (written|durable) to persistentData
NotifiedVersion version;
NotifiedVersion queueCommittedVersion; // The disk queue has committed up until the queueCommittedVersion version.
Version queueCommittingVersion;
Version knownCommittedVersion, durableKnownCommittedVersion, minKnownCommittedVersion;
Version queuePoppedVersion;
Version knownCommittedVersion; // The maximum version that a proxy has told us that is committed (all TLogs have
// ack'd a commit for this version).
Version durableKnownCommittedVersion, minKnownCommittedVersion;
Version queuePoppedVersion; // The disk queue has been popped up until the location which represents this version.
Version minPoppedTagVersion;
Tag minPoppedTag;
Tag minPoppedTag; // The tag that makes tLog hold its data and cause tLog's disk queue increasing.
Deque<std::pair<Version, Standalone<VectorRef<uint8_t>>>> messageBlocks;
std::vector<std::vector<Reference<TagData>>> tag_data; //tag.locality | tag.id
@ -503,7 +518,8 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
Version unrecoveredBefore, recoveredAt;
struct PeekTrackerData {
std::map<int, Promise<std::pair<Version, bool>>> sequence_version;
std::map<int, Promise<std::pair<Version, bool>>>
sequence_version; // second: Version is peeked begin version. bool is onlySpilled
double lastUpdate;
Tag tag;
@ -578,12 +594,15 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
queueCommittedVersion.initMetric(LiteralStringRef("TLog.QueueCommittedVersion"), cc.id);
specialCounter(cc, "Version", [this](){ return this->version.get(); });
specialCounter(cc, "QueueCommittedVersion", [this](){ return this->queueCommittedVersion.get(); });
specialCounter(cc, "QueueCommittedVersion", [this]() { return this->queueCommittedVersion.get(); });
specialCounter(cc, "PersistentDataVersion", [this](){ return this->persistentDataVersion; });
specialCounter(cc, "PersistentDataDurableVersion", [this](){ return this->persistentDataDurableVersion; });
specialCounter(cc, "KnownCommittedVersion", [this](){ return this->knownCommittedVersion; });
specialCounter(cc, "QueuePoppedVersion", [this](){ return this->queuePoppedVersion; });
specialCounter(cc, "MinPoppedTagVersion", [this](){ return this->minPoppedTagVersion; });
specialCounter(cc, "MinPoppedTagVersion", [this]() { return this->minPoppedTagVersion; });
// The locality and id of the tag that is responsible for making the TLog hold onto its oldest piece of data.
// If disk queues are growing and no one is sure why, then you shall look at this to find the tag responsible
// for why the TLog thinks it can't throw away data.
specialCounter(cc, "MinPoppedTagLocality", [this](){ return this->minPoppedTag.locality; });
specialCounter(cc, "MinPoppedTagId", [this](){ return this->minPoppedTag.id; });
specialCounter(cc, "SharedBytesInput", [tLogData](){ return tLogData->bytesInput; });
@ -600,6 +619,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
specialCounter(cc, "QueueDiskBytesTotal", [tLogData](){ return tLogData->rawPersistentQueue->getStorageBytes().total; });
specialCounter(cc, "PeekMemoryReserved", [tLogData]() { return tLogData->peekMemoryLimiter.activePermits(); });
specialCounter(cc, "PeekMemoryRequestsStalled", [tLogData]() { return tLogData->peekMemoryLimiter.waiters(); });
specialCounter(cc, "Geneartion", [this]() { return this->recoveryCount; });
}
~LogData() {
@ -804,6 +824,9 @@ ACTOR Future<Void> updatePoppedLocation( TLogData* self, Reference<LogData> logD
return Void();
}
// It runs against the oldest TLog instance, calculates the first location in the disk queue that contains un-popped
// data, and then issues a pop to the disk queue at that location so that anything earlier can be
// removed/forgotten/overwritten. In effect, it applies the effect of TLogPop RPCs to disk.
ACTOR Future<Void> popDiskQueue( TLogData* self, Reference<LogData> logData ) {
if (!logData->initialized) return Void();
@ -1018,20 +1041,6 @@ ACTOR Future<Void> updatePersistentData( TLogData* self, Reference<LogData> logD
}
ACTOR Future<Void> tLogPopCore( TLogData* self, Tag inputTag, Version to, Reference<LogData> logData ) {
if (self->ignorePopRequest) {
TraceEvent(SevDebug, "IgnoringPopRequest").detail("IgnorePopDeadline", self->ignorePopDeadline);
if (self->toBePopped.find(inputTag) == self->toBePopped.end()
|| to > self->toBePopped[inputTag]) {
self->toBePopped[inputTag] = to;
}
// add the pop to the toBePopped map
TraceEvent(SevDebug, "IgnoringPopRequest")
.detail("IgnorePopDeadline", self->ignorePopDeadline)
.detail("Tag", inputTag.toString())
.detail("Version", to);
return Void();
}
state Version upTo = to;
int8_t tagLocality = inputTag.locality;
if (isPseudoLocality(tagLocality)) {
@ -1067,38 +1076,60 @@ ACTOR Future<Void> tLogPopCore( TLogData* self, Tag inputTag, Version to, Refere
return Void();
}
ACTOR Future<Void> processPopRequests(TLogData* self, Reference<LogData> logData) {
state std::vector<Future<Void>> ignoredPops;
state std::map<Tag, Version>::const_iterator it;
state int ignoredPopsPlayed = 0;
state std::map<Tag, Version> toBePopped;
toBePopped = std::move(self->toBePopped);
self->toBePopped.clear();
self->ignorePopRequest = false;
self->ignorePopDeadline = 0.0;
self->ignorePopUid = "";
for (it = toBePopped.cbegin(); it != toBePopped.cend(); ++it) {
const auto& [tag, version] = *it;
TraceEvent("PlayIgnoredPop").detail("Tag", tag.toString()).detail("Version", version);
ignoredPops.push_back(tLogPopCore(self, tag, version, logData));
if (++ignoredPopsPlayed % SERVER_KNOBS->TLOG_POP_BATCH_SIZE == 0) {
TEST(true); // Yielding while processing pop requests
wait(yield());
}
}
wait(waitForAll(ignoredPops));
return Void();
}
ACTOR Future<Void> tLogPop( TLogData* self, TLogPopRequest req, Reference<LogData> logData ) {
// timeout check for ignorePopRequest
if (self->ignorePopRequest && (g_network->now() > self->ignorePopDeadline)) {
TraceEvent("EnableTLogPlayAllIgnoredPops");
// use toBePopped and issue all the pops
std::map<Tag, Version>::iterator it;
vector<Future<Void>> ignoredPops;
self->ignorePopRequest = false;
self->ignorePopUid = "";
self->ignorePopDeadline = 0.0;
for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) {
TraceEvent("PlayIgnoredPop")
.detail("Tag", it->first.toString())
.detail("Version", it->second);
ignoredPops.push_back(tLogPopCore(self, it->first, it->second, logData));
}
self->toBePopped.clear();
wait(waitForAll(ignoredPops));
TraceEvent("EnableTLogPlayAllIgnoredPops").detail("IgnoredPopDeadline", self->ignorePopDeadline);
wait(processPopRequests(self, logData));
TraceEvent("ResetIgnorePopRequest")
.detail("Now", g_network->now())
.detail("IgnorePopRequest", self->ignorePopRequest)
.detail("IgnorePopDeadline", self->ignorePopDeadline);
}
wait(tLogPopCore(self, req.tag, req.to, logData));
if (self->ignorePopRequest) {
TraceEvent(SevDebug, "IgnoringPopRequest").detail("IgnorePopDeadline", self->ignorePopDeadline);
auto& v = self->toBePopped[req.tag];
v = std::max(v, req.to);
TraceEvent(SevDebug, "IgnoringPopRequest")
.detail("IgnorePopDeadline", self->ignorePopDeadline)
.detail("Tag", req.tag.toString())
.detail("Version", req.to);
} else {
wait(tLogPopCore(self, req.tag, req.to, logData));
}
req.reply.send(Void());
return Void();
}
// This function (and updatePersistentData, which is called by this function) run at a low priority and can soak up all CPU resources.
// For this reason, they employ aggressive use of yields to avoid causing slow tasks that could introduce latencies for more important
// work (e.g. commits).
// This function (and updatePersistentData, which is called by this function) run at a low priority and can soak up all
// CPU resources. For this reason, they employ aggressive use of yields to avoid causing slow tasks that could introduce
// latencies for more important work (e.g. commits).
// This actor is just a loop that calls updatePersistentData and popDiskQueue whenever
// (a) there's data to be spilled or (b) we should update metadata after some commits have been fully popped.
ACTOR Future<Void> updateStorage( TLogData* self ) {
while(self->spillOrder.size() && !self->id_data.count(self->spillOrder.front())) {
self->spillOrder.pop_front();
@ -1400,7 +1431,7 @@ void peekMessagesFromMemory( Reference<LogData> self, TLogPeekRequest const& req
ACTOR Future<std::vector<StringRef>> parseMessagesForTag( StringRef commitBlob, Tag tag, int logRouters ) {
// See the comment in LogSystem.cpp for the binary format of commitBlob.
state std::vector<StringRef> relevantMessages;
state BinaryReader rd(commitBlob, AssumeVersion(currentProtocolVersion));
state BinaryReader rd(commitBlob, AssumeVersion(g_network->protocolVersion()));
while (!rd.empty()) {
TagsAndMessage tagsAndMessage;
tagsAndMessage.loadFromArena(&rd, nullptr);
@ -1891,7 +1922,11 @@ ACTOR Future<Void> tLogCommit(
return Void();
}
if (logData->version.get() == req.prevVersion) { // Not a duplicate (check relies on critical section between here self->version.set() below!)
state double beforeCommitT = now();
// Not a duplicate (check relies on critical section between here self->version.set() below!)
state bool isNotDuplicate = (logData->version.get() == req.prevVersion);
if (isNotDuplicate) {
if(req.debugID.present())
g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.Before");
@ -1929,6 +1964,10 @@ ACTOR Future<Void> tLogCommit(
return Void();
}
if (isNotDuplicate) {
self->commitLatencyDist->sampleSeconds(now() - beforeCommitT);
}
if(req.debugID.present())
g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.After");
@ -2154,30 +2193,16 @@ tLogEnablePopReq(TLogEnablePopRequest enablePopReq, TLogData* self, Reference<Lo
enablePopReq.reply.sendError(operation_failed());
return Void();
}
TraceEvent("EnableTLogPlayAllIgnoredPops2");
// use toBePopped and issue all the pops
std::map<Tag, Version>::iterator it;
state vector<Future<Void>> ignoredPops;
self->ignorePopRequest = false;
self->ignorePopDeadline = 0.0;
self->ignorePopUid = "";
for (it = self->toBePopped.begin(); it != self->toBePopped.end(); it++) {
TraceEvent("PlayIgnoredPop")
.detail("Tag", it->first.toString())
.detail("Version", it->second);
ignoredPops.push_back(tLogPopCore(self, it->first, it->second, logData));
}
TraceEvent("TLogExecCmdPopEnable")
.detail("UidStr", enablePopReq.snapUID.toString())
.detail("IgnorePopUid", self->ignorePopUid)
.detail("IgnporePopRequest", self->ignorePopRequest)
.detail("IgnporePopDeadline", self->ignorePopDeadline)
.detail("PersistentDataVersion", logData->persistentDataVersion)
.detail("PersistentDatadurableVersion", logData->persistentDataDurableVersion)
.detail("QueueCommittedVersion", logData->queueCommittedVersion.get())
.detail("Version", logData->version.get());
wait(waitForAll(ignoredPops));
self->toBePopped.clear();
TraceEvent("EnableTLogPlayAllIgnoredPops2")
.detail("UidStr", enablePopReq.snapUID.toString())
.detail("IgnorePopUid", self->ignorePopUid)
.detail("IgnorePopRequest", self->ignorePopRequest)
.detail("IgnorePopDeadline", self->ignorePopDeadline)
.detail("PersistentDataVersion", logData->persistentDataVersion)
.detail("PersistentDataDurableVersion", logData->persistentDataDurableVersion)
.detail("QueueCommittedVersion", logData->queueCommittedVersion.get())
.detail("Version", logData->version.get());
wait(processPopRequests(self, logData));
enablePopReq.reply.send(Void());
return Void();
}
@ -2289,6 +2314,7 @@ void removeLog( TLogData* self, Reference<LogData> logData ) {
}
}
// remote tLog pull data from log routers
ACTOR Future<Void> pullAsyncData( TLogData* self, Reference<LogData> logData, std::vector<Tag> tags, Version beginVersion, Optional<Version> endVersion, bool poppedIsKnownCommitted ) {
state Future<Void> dbInfoChange = Void();
state Reference<ILogSystem::IPeekCursor> r;
@ -2808,7 +2834,7 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
bool recovering = (req.recoverFrom.logSystemType == LogSystemType::tagPartitioned);
state Reference<LogData> logData = makeReference<LogData>(
self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID,
currentProtocolVersion, req.spillType, req.allTags, recovering ? "Recovered" : "Recruited");
g_network->protocolVersion(), req.spillType, req.allTags, recovering ? "Recovered" : "Recruited");
self->id_data[recruited.id()] = logData;
logData->locality = req.locality;
logData->recoveryCount = req.epoch;

View File

@ -168,7 +168,7 @@ OldTLogCoreData::OldTLogCoreData(const OldLogData& oldData)
struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogSystem> {
const UID dbgid;
LogSystemType logSystemType;
std::vector<Reference<LogSet>> tLogs; // LogSets in different locations: primary, remote or satellite
std::vector<Reference<LogSet>> tLogs; // LogSets in different locations: primary, satellite, or remote
int expectedLogSets;
int logRouterTags;
int txsTags;
@ -197,7 +197,14 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
Version knownCommittedVersion;
Version backupStartVersion = invalidVersion; // max(tLogs[0].startVersion, previous epochEnd).
LocalityData locality;
std::map< std::pair<UID, Tag>, std::pair<Version, Version> > outstandingPops; // For each currently running popFromLog actor, (log server #, tag)->popped version
// For each currently running popFromLog actor, outstandingPops is
// (logID, tag)->(max popped version, durableKnownCommittedVersion).
// Why do we need durableKnownCommittedVersion? knownCommittedVersion gives the lower bound of what data
// will need to be copied into the next generation to restore the replication factor.
// Guess: It probably serves as a minimum version of what data should be on a TLog in the next generation and
// sending a pop for anything less than durableKnownCommittedVersion for the TLog will be absurd.
std::map<std::pair<UID, Tag>, std::pair<Version, Version>> outstandingPops;
Optional<PromiseStream<Future<Void>>> addActor;
ActorCollection popActors;
std::vector<OldLogData> oldLogData; // each element has the log info. in one old epoch.
@ -272,6 +279,9 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
Version& localityVersion = pseudoLocalityPopVersion[tag];
localityVersion = std::max(localityVersion, upTo);
Version minVersion = localityVersion;
// Why do we need to use the minimum popped version among all tags? Reason: for example,
// 2 pseudo tags pop 100 or 150, respectively. It's only safe to pop min(100, 150),
// because [101,150) is needed by another pseudo tag.
for (const int8_t locality : pseudoLocalities) {
minVersion = std::min(minVersion, pseudoLocalityPopVersion[Tag(locality, tag.id)]);
}
@ -1159,6 +1169,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
}
}
// pop 'tag.locality' type data up to the 'upTo' version
void pop(Version upTo, Tag tag, Version durableKnownCommittedVersion, int8_t popLocality) final {
if (upTo <= 0) return;
if (tag.locality == tagLocalityRemoteLog) {
@ -1184,6 +1195,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
}
}
// pop tag from log up to the version defined in self->outstandingPops[].first
ACTOR static Future<Void> popFromLog(TagPartitionedLogSystem* self,
Reference<AsyncVar<OptionalInterface<TLogInterface>>> log, Tag tag,
double time) {
@ -1191,6 +1203,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
loop {
wait( delay(time, TaskPriority::TLogPop) );
// to: first is upto version, second is durableKnownComittedVersion
state std::pair<Version,Version> to = self->outstandingPops[ std::make_pair(log->get().id(),tag) ];
if (to.first <= last) {

View File

@ -60,6 +60,7 @@
#include "fdbserver/workloads/workloads.actor.h"
#include "flow/DeterministicRandom.h"
#include "flow/Platform.h"
#include "flow/ProtocolVersion.h"
#include "flow/SimpleOpt.h"
#include "flow/SystemMonitor.h"
#include "flow/TLSConfig.actor.h"
@ -316,6 +317,14 @@ void failAfter( Future<Void> trigger, Endpoint e ) {
failAfter( trigger, g_simulator.getProcess( e ) );
}
ACTOR Future<Void> histogramReport() {
loop {
wait(delay(SERVER_KNOBS->HISTOGRAM_REPORT_INTERVAL));
GetHistogramRegistry().logReport();
}
}
void testSerializationSpeed() {
double tstart;
double build = 0, serialize = 0, deserialize = 0, copy = 0, deallocate = 0;
@ -435,8 +444,10 @@ ACTOR Future<Void> dumpDatabase( Database cx, std::string outputFilename, KeyRan
void memoryTest();
void skipListTest();
Future<Void> startSystemMonitor(std::string dataFolder, Optional<Standalone<StringRef>> zoneId, Optional<Standalone<StringRef>> machineId) {
initializeSystemMonitorMachineState(SystemMonitorMachineState(dataFolder, zoneId, machineId, g_network->getLocalAddress().ip));
Future<Void> startSystemMonitor(std::string dataFolder, Optional<Standalone<StringRef>> dcId,
Optional<Standalone<StringRef>> zoneId, Optional<Standalone<StringRef>> machineId) {
initializeSystemMonitorMachineState(
SystemMonitorMachineState(dataFolder, dcId, zoneId, machineId, g_network->getLocalAddress().ip));
systemMonitor();
return recurring( &systemMonitor, 5.0, TaskPriority::FlushTrace );
@ -1635,6 +1646,7 @@ int main(int argc, char* argv[]) {
//startOldSimulator();
startNewSimulator();
openTraceFile(NetworkAddress(), opts.rollsize, opts.maxLogsSize, opts.logFolder, "trace", opts.logGroup);
openTracer(TracerType(deterministicRandom()->randomInt(static_cast<int>(TracerType::DISABLED), static_cast<int>(TracerType::END))));
} else {
g_network = newNet2(opts.tlsConfig, opts.useThreadPool, true);
g_network->addStopCallback( Net2FileSystem::stop );
@ -1736,6 +1748,8 @@ int main(int argc, char* argv[]) {
if (role == Simulation) {
TraceEvent("Simulation").detail("TestFile", opts.testFile);
auto histogramReportActor = histogramReport();
clientKnobs->trace();
flowKnobs->trace();
serverKnobs->trace();
@ -1893,6 +1907,7 @@ int main(int argc, char* argv[]) {
actors.push_back(fdbd(opts.connectionFile, opts.localities, opts.processClass, dataFolder, dataFolder,
opts.storageMemLimit, opts.metricsConnFile, opts.metricsPrefix, opts.rsssize,
opts.whitelistBinPaths));
actors.push_back(histogramReport());
// actors.push_back( recurring( []{}, .001 ) ); // for ASIO latency measurement
f = stopAfter(waitForAll(actors));
@ -1906,14 +1921,14 @@ int main(int argc, char* argv[]) {
g_network->run();
} else if (role == Test) {
setupRunLoopProfiler();
auto m = startSystemMonitor(opts.dataFolder, opts.zoneId, opts.zoneId);
auto m = startSystemMonitor(opts.dataFolder, opts.dcId, opts.zoneId, opts.zoneId);
f = stopAfter(runTests(opts.connectionFile, TEST_TYPE_FROM_FILE, TEST_HERE, 1, opts.testFile, StringRef(),
opts.localities));
g_network->run();
} else if (role == ConsistencyCheck) {
setupRunLoopProfiler();
auto m = startSystemMonitor(opts.dataFolder, opts.zoneId, opts.zoneId);
auto m = startSystemMonitor(opts.dataFolder, opts.dcId, opts.zoneId, opts.zoneId);
f = stopAfter(runTests(opts.connectionFile, TEST_TYPE_CONSISTENCY_CHECK, TEST_HERE, 1, opts.testFile,
StringRef(), opts.localities));
g_network->run();

View File

@ -19,15 +19,20 @@
*/
#include <cinttypes>
#include <functional>
#include <type_traits>
#include <unordered_map>
#include "fdbrpc/fdbrpc.h"
#include "fdbrpc/LoadBalance.h"
#include "flow/Arena.h"
#include "flow/IRandom.h"
#include "flow/Tracing.h"
#include "flow/IndexedSet.h"
#include "flow/Hash3.h"
#include "flow/ActorCollection.h"
#include "flow/Arena.h"
#include "flow/Hash3.h"
#include "flow/Histogram.h"
#include "flow/IRandom.h"
#include "flow/IndexedSet.h"
#include "flow/SystemMonitor.h"
#include "flow/Tracing.h"
#include "flow/Util.h"
#include "fdbclient/Atomic.h"
#include "fdbclient/DatabaseContext.h"
@ -57,11 +62,8 @@
#include "fdbrpc/Smoother.h"
#include "fdbrpc/Stats.h"
#include "flow/TDMetric.actor.h"
#include <type_traits>
#include "flow/actorcompiler.h" // This must be the last #include.
using std::pair;
using std::make_pair;
#include "flow/actorcompiler.h" // This must be the last #include.
#ifndef __INTEL_COMPILER
#pragma region Data Structures
@ -229,13 +231,13 @@ struct UpdateEagerReadInfo {
void finishKeyBegin() {
std::sort(keyBegin.begin(), keyBegin.end());
keyBegin.resize( std::unique(keyBegin.begin(), keyBegin.end()) - keyBegin.begin() );
std::sort(keys.begin(), keys.end(), [](const pair<KeyRef, int>& lhs, const pair<KeyRef, int>& rhs) { return (lhs.first < rhs.first) || (lhs.first == rhs.first && lhs.second > rhs.second); } );
keys.resize(std::unique(keys.begin(), keys.end(), [](const pair<KeyRef, int>& lhs, const pair<KeyRef, int>& rhs) { return lhs.first == rhs.first; } ) - keys.begin());
std::sort(keys.begin(), keys.end(), [](const std::pair<KeyRef, int>& lhs, const std::pair<KeyRef, int>& rhs) { return (lhs.first < rhs.first) || (lhs.first == rhs.first && lhs.second > rhs.second); } );
keys.resize(std::unique(keys.begin(), keys.end(), [](const std::pair<KeyRef, int>& lhs, const std::pair<KeyRef, int>& rhs) { return lhs.first == rhs.first; } ) - keys.begin());
//value gets populated in doEagerReads
}
Optional<Value>& getValue(KeyRef key) {
int i = std::lower_bound(keys.begin(), keys.end(), pair<KeyRef, int>(key, 0), [](const pair<KeyRef, int>& lhs, const pair<KeyRef, int>& rhs) { return lhs.first < rhs.first; } ) - keys.begin();
int i = std::lower_bound(keys.begin(), keys.end(),std::pair<KeyRef, int>(key, 0), [](const std::pair<KeyRef, int>& lhs, const std::pair<KeyRef, int>& rhs) { return lhs.first < rhs.first; } ) - keys.begin();
ASSERT( i < keys.size() && keys[i].first == key );
return value[i];
}
@ -289,9 +291,63 @@ private:
std::map<Version, Standalone<VerUpdateRef>> mutationLog; // versions (durableVersion, version]
public:
public:
// Histograms
struct FetchKeysHistograms {
const Reference<Histogram> latency;
const Reference<Histogram> bytes;
const Reference<Histogram> bandwidth;
FetchKeysHistograms()
: latency(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP, FETCH_KEYS_LATENCY_HISTOGRAM,
Histogram::Unit::microseconds)),
bytes(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP, FETCH_KEYS_BYTES_HISTOGRAM,
Histogram::Unit::bytes)),
bandwidth(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP, FETCH_KEYS_BYTES_PER_SECOND_HISTOGRAM,
Histogram::Unit::bytes_per_second)) {}
} fetchKeysHistograms;
class CurrentRunningFetchKeys {
std::unordered_map<UID, double> startTimeMap;
std::unordered_map<UID, KeyRangeRef> keyRangeMap;
static const StringRef emptyString;
static const KeyRangeRef emptyKeyRange;
public:
void recordStart(const UID id, const KeyRange keyRange) {
startTimeMap[id] = now();
keyRangeMap[id] = keyRange;
}
void recordFinish(const UID id) {
startTimeMap.erase(id);
keyRangeMap.erase(id);
}
std::pair<double, KeyRangeRef> longestTime() const {
if (numRunning() == 0) {
return {-1, emptyKeyRange};
}
const double currentTime = now();
double longest = 0;
UID UIDofLongest;
for (const auto kv: startTimeMap) {
const double currentRunningTime = currentTime - kv.second;
if (longest < currentRunningTime) {
longest = currentRunningTime;
UIDofLongest = kv.first;
}
}
return {longest, keyRangeMap.at(UIDofLongest)};
}
int numRunning() const { return startTimeMap.size(); }
} currentRunningFetchKeys;
Tag tag;
vector<pair<Version,Tag>> history;
vector<pair<Version,Tag>> allHistory;
vector<std::pair<Version,Tag>> history;
vector<std::pair<Version,Tag>> allHistory;
Version poppedAllAfter;
std::map<Version, Arena> freeable; // for each version, an Arena that must be held until that version is < oldestVersion
Arena lastArena;
@ -338,8 +394,8 @@ public:
poppedAllAfter = std::numeric_limits<Version>::max();
}
vector<pair<Version,Tag>>* hist = &history;
vector<pair<Version,Tag>> allHistoryCopy;
vector<std::pair<Version,Tag>>* hist = &history;
vector<std::pair<Version,Tag>> allHistoryCopy;
if(popAllTags) {
allHistoryCopy = allHistory;
hist = &allHistoryCopy;
@ -604,22 +660,18 @@ public:
}
} counters;
StorageServer(IKeyValueStore* storage, Reference<AsyncVar<ServerDBInfo>> const& db, StorageServerInterface const& ssi)
: instanceID(deterministicRandom()->randomUniqueID().first()),
storage(this, storage), db(db), actors(false),
lastTLogVersion(0), lastVersionWithData(0), restoredVersion(0),
rebootAfterDurableVersion(std::numeric_limits<Version>::max()),
durableInProgress(Void()),
versionLag(0), primaryLocality(tagLocalityInvalid),
updateEagerReads(0),
shardChangeCounter(0),
fetchKeysParallelismLock(SERVER_KNOBS->FETCH_KEYS_PARALLELISM_BYTES),
shuttingDown(false), debug_inApplyUpdate(false), debug_lastValidateTime(0), watchBytes(0), numWatches(0),
logProtocol(0), counters(this), tag(invalidTag), maxQueryQueue(0), thisServerID(ssi.id()),
readQueueSizeMetric(LiteralStringRef("StorageServer.ReadQueueSize")),
behind(false), versionBehind(false), byteSampleClears(false, LiteralStringRef("\xff\xff\xff")), noRecentUpdates(false),
lastUpdate(now()), poppedAllAfter(std::numeric_limits<Version>::max()), cpuUsage(0.0), diskUsage(0.0)
{
StorageServer(IKeyValueStore* storage, Reference<AsyncVar<ServerDBInfo>> const& db,
StorageServerInterface const& ssi)
: fetchKeysHistograms(), instanceID(deterministicRandom()->randomUniqueID().first()), storage(this, storage),
db(db), actors(false), lastTLogVersion(0), lastVersionWithData(0), restoredVersion(0),
rebootAfterDurableVersion(std::numeric_limits<Version>::max()), durableInProgress(Void()), versionLag(0),
primaryLocality(tagLocalityInvalid), updateEagerReads(0), shardChangeCounter(0),
fetchKeysParallelismLock(SERVER_KNOBS->FETCH_KEYS_PARALLELISM_BYTES), shuttingDown(false),
debug_inApplyUpdate(false), debug_lastValidateTime(0), watchBytes(0), numWatches(0), logProtocol(0),
counters(this), tag(invalidTag), maxQueryQueue(0), thisServerID(ssi.id()),
readQueueSizeMetric(LiteralStringRef("StorageServer.ReadQueueSize")), behind(false), versionBehind(false),
byteSampleClears(false, LiteralStringRef("\xff\xff\xff")), noRecentUpdates(false), lastUpdate(now()),
poppedAllAfter(std::numeric_limits<Version>::max()), cpuUsage(0.0), diskUsage(0.0) {
version.initMetric(LiteralStringRef("StorageServer.Version"), counters.cc.id);
oldestVersion.initMetric(LiteralStringRef("StorageServer.OldestVersion"), counters.cc.id);
durableVersion.initMetric(LiteralStringRef("StorageServer.DurableVersion"), counters.cc.id);
@ -732,6 +784,9 @@ public:
}
};
const StringRef StorageServer::CurrentRunningFetchKeys::emptyString = LiteralStringRef("");
const KeyRangeRef StorageServer::CurrentRunningFetchKeys::emptyKeyRange = KeyRangeRef(StorageServer::CurrentRunningFetchKeys::emptyString, StorageServer::CurrentRunningFetchKeys::emptyString);
// If and only if key:=value is in (storage+versionedData), // NOT ACTUALLY: and key < allKeys.end,
// and H(key) < |key+value|/bytesPerSample,
// let sampledSize = max(|key+value|,bytesPerSample)
@ -1011,7 +1066,7 @@ ACTOR Future<Void> getValueQ( StorageServer* data, GetValueRequest req ) {
++data->counters.finishedQueries;
--data->readQueueSizeMetric;
double duration = timer() - req.requestTime();
double duration = g_network->timer() - req.requestTime();
data->counters.readLatencySample.addMeasurement(duration);
if(data->latencyBandConfig.present()) {
int maxReadBytes = data->latencyBandConfig.get().readConfig.maxReadBytes.orDefault(std::numeric_limits<int>::max());
@ -1627,7 +1682,7 @@ ACTOR Future<Void> getKeyValuesQ( StorageServer* data, GetKeyValuesRequest req )
++data->counters.finishedQueries;
--data->readQueueSizeMetric;
double duration = timer() - req.requestTime();
double duration = g_network->timer() - req.requestTime();
data->counters.readLatencySample.addMeasurement(duration);
if(data->latencyBandConfig.present()) {
int maxReadBytes = data->latencyBandConfig.get().readConfig.maxReadBytes.orDefault(std::numeric_limits<int>::max());
@ -1700,7 +1755,7 @@ ACTOR Future<Void> getKeyQ( StorageServer* data, GetKeyRequest req ) {
++data->counters.finishedQueries;
--data->readQueueSizeMetric;
double duration = timer() - req.requestTime();
double duration = g_network->timer() - req.requestTime();
data->counters.readLatencySample.addMeasurement(duration);
if(data->latencyBandConfig.present()) {
int maxReadBytes = data->latencyBandConfig.get().readConfig.maxReadBytes.orDefault(std::numeric_limits<int>::max());
@ -1826,7 +1881,7 @@ bool changeDurableVersion( StorageServer* data, Version desiredDurableVersion )
setDataDurableVersion(data->thisServerID, data->durableVersion.get());
if (checkFatalError.isReady()) checkFatalError.get();
//TraceEvent("ForgotVersionsBefore", data->thisServerID).detail("Version", nextDurableVersion);
// TraceEvent("ForgotVersionsBefore", data->thisServerID).detail("Version", nextDurableVersion);
validate(data);
return nextDurableVersion == desiredDurableVersion;
@ -2142,16 +2197,56 @@ ACTOR Future<Void> logFetchKeysWarning(AddingShard* shard) {
loop {
state double waitSeconds = BUGGIFY ? 5.0 : 600.0;
wait(delay(waitSeconds));
TraceEvent(waitSeconds > 300.0 ? SevWarnAlways : SevInfo, "FetchKeysTooLong").detail("Duration", now() - startTime).detail("Phase", shard->phase).detail("Begin", shard->keys.begin.printable()).detail("End", shard->keys.end.printable());
const auto traceEventLevel = waitSeconds > SERVER_KNOBS->FETCH_KEYS_TOO_LONG_TIME_CRITERIA ? SevWarnAlways : SevInfo;
TraceEvent(traceEventLevel, "FetchKeysTooLong")
.detail("Duration", now() - startTime)
.detail("Phase", shard->phase)
.detail("Begin", shard->keys.begin.printable())
.detail("End", shard->keys.end.printable());
}
}
class FetchKeysMetricReporter {
const UID uid;
const double startTime;
int fetchedBytes;
StorageServer::FetchKeysHistograms& histograms;
StorageServer::CurrentRunningFetchKeys& currentRunning;
public:
FetchKeysMetricReporter(const UID& uid_, const double startTime_, const KeyRange& keyRange, StorageServer::FetchKeysHistograms& histograms_, StorageServer::CurrentRunningFetchKeys& currentRunning_)
: uid(uid_), startTime(startTime_), fetchedBytes(0), histograms(histograms_), currentRunning(currentRunning_) {
currentRunning.recordStart(uid, keyRange);
}
void addFetchedBytes(const int bytes) { fetchedBytes += bytes; }
~FetchKeysMetricReporter() {
double latency = now() - startTime;
// If fetchKeys is *NOT* run, i.e. returning immediately, still report a record.
if (latency == 0) latency = 1e6;
const uint32_t bandwidth = fetchedBytes / latency;
histograms.latency->sampleSeconds(latency);
histograms.bytes->sample(fetchedBytes);
histograms.bandwidth->sample(bandwidth);
currentRunning.recordFinish(uid);
}
};
ACTOR Future<Void> fetchKeys( StorageServer *data, AddingShard* shard ) {
state const UID fetchKeysID = deterministicRandom()->randomUniqueID();
state TraceInterval interval("FetchKeys");
state KeyRange keys = shard->keys;
state Future<Void> warningLogger = logFetchKeysWarning(shard);
state double startt = now();
state const double startTime = now();
state int fetchBlockBytes = BUGGIFY ? SERVER_KNOBS->BUGGIFY_BLOCK_BYTES : SERVER_KNOBS->FETCH_BLOCK_BYTES;
state FetchKeysMetricReporter metricReporter(fetchKeysID, startTime, keys, data->fetchKeysHistograms, data->currentRunningFetchKeys);
// delay(0) to force a return to the run loop before the work of fetchKeys is started.
// This allows adding->start() to be called inline with CSK.
@ -2189,7 +2284,7 @@ ACTOR Future<Void> fetchKeys( StorageServer *data, AddingShard* shard ) {
state double executeStart = now();
++data->counters.fetchWaitingCount;
data->counters.fetchWaitingMS += 1000*(executeStart - startt);
data->counters.fetchWaitingMS += 1000 * (executeStart - startTime);
// Fetch keys gets called while the update actor is processing mutations. data->version will not be updated until all mutations for a version
// have been processed. We need to take the durableVersionLock to ensure data->version is greater than the version of the mutation which caused
@ -2231,6 +2326,7 @@ ACTOR Future<Void> fetchKeys( StorageServer *data, AddingShard* shard ) {
DEBUG_KEY_RANGE("fetchRange", fetchVersion, keys);
for(auto k = this_block.begin(); k != this_block.end(); ++k) DEBUG_MUTATION("fetch", fetchVersion, MutationRef(MutationRef::SetValue, k->key, k->value));
metricReporter.addFetchedBytes(expectedSize);
data->counters.bytesFetched += expectedSize;
if( fetchBlockBytes > expectedSize ) {
holdingFKPL.release( fetchBlockBytes - expectedSize );
@ -2298,8 +2394,9 @@ ACTOR Future<Void> fetchKeys( StorageServer *data, AddingShard* shard ) {
while (!shard->updates.empty() && shard->updates[0].version <= fetchVersion) shard->updates.pop_front();
//FIXME: remove when we no longer support upgrades from 5.X
if(debug_getRangeRetries >= 100) {
if (debug_getRangeRetries >= 100) {
data->cx->enableLocalityLoadBalance = false;
// TODO: Add SevWarnAlways to say it was disabled.
}
debug_getRangeRetries++;
@ -2416,7 +2513,7 @@ ACTOR Future<Void> fetchKeys( StorageServer *data, AddingShard* shard ) {
TraceEvent(SevError, "FetchKeysError", data->thisServerID)
.error(e)
.detail("Elapsed", now()-startt)
.detail("Elapsed", now() - startTime)
.detail("KeyBegin", keys.begin)
.detail("KeyEnd",keys.end);
if (e.code() != error_code_actor_cancelled)
@ -3269,7 +3366,9 @@ bool StorageServerDisk::makeVersionMutationsDurable(Version& prevStorageVersion,
void StorageServerDisk::makeVersionDurable( Version version ) {
storage->set( KeyValueRef(persistVersion, BinaryWriter::toValue(version, Unversioned())) );
//TraceEvent("MakeDurable", data->thisServerID).detail("FromVersion", prevStorageVersion).detail("ToVersion", version);
// TraceEvent("MakeDurable", data->thisServerID)
// .detail("FromVersion", prevStorageVersion)
// .detail("ToVersion", version);
}
void StorageServerDisk::changeLogProtocol(Version version, ProtocolVersion protocol) {
@ -3665,7 +3764,10 @@ ACTOR Future<Void> metricsCore( StorageServer* self, StorageServerInterface ssi
wait( self->byteSampleRecovery );
self->actors.add(traceCounters("StorageMetrics", self->thisServerID, SERVER_KNOBS->STORAGE_LOGGING_DELAY, &self->counters.cc, self->thisServerID.toString() + "/StorageMetrics"));
Tag tag = self->tag;
self->actors.add(traceCounters("StorageMetrics", self->thisServerID, SERVER_KNOBS->STORAGE_LOGGING_DELAY,
&self->counters.cc, self->thisServerID.toString() + "/StorageMetrics",
[tag](TraceEvent& te) { te.detail("Tag", tag.toString()); }));
loop {
choose {
@ -3785,6 +3887,35 @@ ACTOR Future<Void> serveWatchValueRequests( StorageServer* self, FutureStream<Wa
}
}
ACTOR Future<Void> reportStorageServerState(StorageServer* self) {
if (!SERVER_KNOBS->REPORT_DD_METRICS) {
return Void();
}
loop {
wait(delay(SERVER_KNOBS->DD_METRICS_REPORT_INTERVAL));
const auto numRunningFetchKeys = self->currentRunningFetchKeys.numRunning();
if (numRunningFetchKeys == 0) {
continue;
}
const auto longestRunningFetchKeys = self->currentRunningFetchKeys.longestTime();
auto level = SevInfo;
if (longestRunningFetchKeys.first >= SERVER_KNOBS->FETCH_KEYS_TOO_LONG_TIME_CRITERIA) {
level = SevWarnAlways;
}
TraceEvent(level, "FetchKeyCurrentStatus")
.detail("Timestamp", now())
.detail("LongestRunningTime", longestRunningFetchKeys.first)
.detail("StartKey", longestRunningFetchKeys.second.begin.printable())
.detail("EndKey", longestRunningFetchKeys.second.end.printable())
.detail("NumRunning", numRunningFetchKeys);
}
}
ACTOR Future<Void> storageServerCore( StorageServer* self, StorageServerInterface ssi )
{
state Future<Void> doUpdate = Void();
@ -3805,6 +3936,7 @@ ACTOR Future<Void> storageServerCore( StorageServer* self, StorageServerInterfac
self->actors.add(serveGetKeyRequests(self, ssi.getKey.getFuture()));
self->actors.add(serveWatchValueRequests(self, ssi.watchValue.getFuture()));
self->actors.add(traceRole(Role::STORAGE_SERVER, ssi.id()));
self->actors.add(reportStorageServerState(self));
self->transactionTagCounter.startNewInterval(self->thisServerID);
self->actors.add(recurring([&]() { self->transactionTagCounter.startNewInterval(self->thisServerID); },

View File

@ -908,6 +908,9 @@ std::map<std::string, std::function<void(const std::string&)>> testSpecGlobalKey
// else { } It is enable by default for tester
TraceEvent("TestParserTest").detail("ClientInfoLogging", value);
}},
{"startIncompatibleProcess", [](const std::string& value) {
TraceEvent("TestParserTest").detail("ParsedStartIncompatibleProcess", value);
}}
};
std::map<std::string, std::function<void(const std::string& value, TestSpec* spec)>> testSpecTestKeys = {

View File

@ -25,6 +25,7 @@
#include "fdbclient/StorageServerInterface.h"
#include "fdbserver/Knobs.h"
#include "flow/ActorCollection.h"
#include "flow/ProtocolVersion.h"
#include "flow/SystemMonitor.h"
#include "flow/TDMetric.actor.h"
#include "fdbrpc/simulator.h"
@ -46,6 +47,7 @@
#include "flow/Profiler.h"
#include "flow/ThreadHelper.actor.h"
#include "flow/Trace.h"
#include "flow/network.h"
#ifdef __linux__
#include <fcntl.h>
@ -887,7 +889,7 @@ ACTOR Future<Void> monitorTraceLogIssues(Reference<AsyncVar<std::set<std::string
}
}
std::set<std::string> _issues;
retriveTraceLogIssues(_issues);
retrieveTraceLogIssues(_issues);
if (pingTimeout) {
// Ping trace log writer thread timeout.
_issues.insert("trace_log_writer_thread_unresponsive");
@ -989,10 +991,11 @@ ACTOR Future<Void> workerServer(
filesClosed.add(stopping.getFuture());
initializeSystemMonitorMachineState(SystemMonitorMachineState(folder, locality.zoneId(), locality.machineId(), g_network->getLocalAddress().ip));
initializeSystemMonitorMachineState(SystemMonitorMachineState(
folder, locality.dcId(), locality.zoneId(), locality.machineId(), g_network->getLocalAddress().ip));
{
auto recruited = interf; //ghetto! don't we all love a good #define
auto recruited = interf;
DUMPTOKEN(recruited.clientInterface.reboot);
DUMPTOKEN(recruited.clientInterface.profiler);
DUMPTOKEN(recruited.tLog);
@ -1140,7 +1143,7 @@ ACTOR Future<Void> workerServer(
loop choose {
when( UpdateServerDBInfoRequest req = waitNext( interf.updateServerDBInfo.getFuture() ) ) {
ServerDBInfo localInfo = BinaryReader::fromStringRef<ServerDBInfo>(req.serializedDbInfo, AssumeVersion(currentProtocolVersion));
ServerDBInfo localInfo = BinaryReader::fromStringRef<ServerDBInfo>(req.serializedDbInfo, AssumeVersion(g_network->protocolVersion()));
localInfo.myLocality = locality;
if(localInfo.infoGeneration < dbInfo->get().infoGeneration && localInfo.clusterInterface == dbInfo->get().clusterInterface) {
@ -1795,6 +1798,16 @@ ACTOR Future<Void> monitorLeaderRemotelyWithDelayedCandidacy( Reference<ClusterC
}
}
ACTOR Future<Void> serveProtocolInfo() {
state RequestStream<ProtocolInfoRequest> protocolInfo(
PeerCompatibilityPolicy{ RequirePeer::AtLeast, ProtocolVersion::withStableInterfaces() });
protocolInfo.makeWellKnownEndpoint(WLTOKEN_PROTOCOL_INFO, TaskPriority::DefaultEndpoint);
loop {
state ProtocolInfoRequest req = waitNext(protocolInfo.getFuture());
req.reply.send(ProtocolInfoReply{ g_network->protocolVersion() });
}
}
ACTOR Future<Void> fdbd(
Reference<ClusterConnectionFile> connFile,
LocalityData localities,
@ -1810,6 +1823,8 @@ ACTOR Future<Void> fdbd(
state vector<Future<Void>> actors;
state Promise<Void> recoveredDiskFiles;
actors.push_back(serveProtocolInfo());
try {
ServerCoordinators coordinators( connFile );
if (g_network->isSimulated()) {

View File

@ -22,6 +22,7 @@
#include "fdbserver/workloads/workloads.actor.h"
#include "flow/ActorCollection.h"
#include "flow/IRandom.h"
#include "flow/SystemMonitor.h"
#include "fdbserver/workloads/AsyncFile.actor.h"
#include "flow/actorcompiler.h" // This must be the last #include.
@ -374,15 +375,6 @@ struct AsyncFileCorrectnessWorkload : public AsyncFileWorkload
return false;
}
//Populates a buffer with a random sequence of bytes
void generateRandomData(unsigned char* buffer, int length) const {
for(int i = 0; i < length; i+= sizeof(uint32_t))
{
uint32_t val = deterministicRandom()->randomUInt32();
memcpy(&buffer[i], &val, std::min(length - i, (int)sizeof(uint32_t)));
}
}
//Performs an operation on a file and the memory representation of that file
ACTOR Future<OperationInfo> processOperation(AsyncFileCorrectnessWorkload *self, OperationInfo info)
{
@ -413,7 +405,7 @@ struct AsyncFileCorrectnessWorkload : public AsyncFileWorkload
else if(info.operation == WRITE)
{
info.data = self->allocateBuffer(info.length);
self->generateRandomData(info.data->buffer, info.length);
generateRandomData(reinterpret_cast<uint8_t*>(info.data->buffer), info.length);
memcpy(&self->memoryFile->buffer[info.offset], info.data->buffer, info.length);
memset(&self->fileValidityMask[info.offset], 0xFF, info.length);

View File

@ -34,6 +34,7 @@
#include "flow/DeterministicRandom.h"
#include "fdbclient/ManagementAPI.actor.h"
#include "flow/actorcompiler.h" // This must be the last #include.
#include "flow/network.h"
//#define SevCCheckInfo SevVerbose
#define SevCCheckInfo SevInfo
@ -1295,7 +1296,7 @@ struct ConsistencyCheckWorkload : TestWorkload
vector<ISimulator::ProcessInfo*> all = g_simulator.getAllProcesses();
for(int i = 0; i < all.size(); i++) {
if( all[i]->isReliable() && all[i]->name == std::string("Server") && all[i]->startingClass != ProcessClass::TesterClass ) {
if( all[i]->isReliable() && all[i]->name == std::string("Server") && all[i]->startingClass != ProcessClass::TesterClass && all[i]->protocolVersion == g_network->protocolVersion() ) {
if(!workerAddresses.count(all[i]->address)) {
TraceEvent("ConsistencyCheck_WorkerMissingFromList").detail("Addr", all[i]->address);
return false;

View File

@ -69,7 +69,7 @@ struct DowngradeWorkload : TestWorkload {
};
ACTOR static Future<Void> writeOld(Database cx, int numObjects, Key key) {
BinaryWriter writer(IncludeVersion(currentProtocolVersion));
BinaryWriter writer(IncludeVersion(g_network->protocolVersion()));
std::vector<OldStruct> data(numObjects);
for (auto& oldObject : data) {
oldObject.setFields();
@ -90,7 +90,7 @@ struct DowngradeWorkload : TestWorkload {
}
ACTOR static Future<Void> writeNew(Database cx, int numObjects, Key key) {
ProtocolVersion protocolVersion = currentProtocolVersion;
ProtocolVersion protocolVersion = g_network->protocolVersion();
protocolVersion.addObjectSerializerFlag();
ObjectWriter writer(IncludeVersion(protocolVersion));
std::vector<NewStruct> data(numObjects);

View File

@ -28,9 +28,9 @@
extern IKeyValueStore *makeDummyKeyValueStore();
template <class T>
class Histogram {
class TestHistogram {
public:
Histogram(int minSamples = 100) : minSamples(minSamples) { reset(); }
TestHistogram(int minSamples = 100) : minSamples(minSamples) { reset(); }
void reset() {
N = 0;
@ -145,7 +145,7 @@ struct KVTest {
}
};
ACTOR Future<Void> testKVRead( KVTest* test, Key key, Histogram<float>* latency, PerfIntCounter* count ) {
ACTOR Future<Void> testKVRead(KVTest* test, Key key, TestHistogram<float>* latency, PerfIntCounter* count) {
// state Version s1 = test->lastCommit;
state Version s2 = test->lastDurable;
@ -163,7 +163,7 @@ ACTOR Future<Void> testKVRead( KVTest* test, Key key, Histogram<float>* latency,
return Void();
}
ACTOR Future<Void> testKVReadSaturation( KVTest* test, Histogram<float>* latency, PerfIntCounter* count ) {
ACTOR Future<Void> testKVReadSaturation(KVTest* test, TestHistogram<float>* latency, PerfIntCounter* count) {
while (true) {
state double begin = timer();
Optional<Value> val = wait(test->store->readValue(test->randomKey()));
@ -173,7 +173,7 @@ ACTOR Future<Void> testKVReadSaturation( KVTest* test, Histogram<float>* latency
}
}
ACTOR Future<Void> testKVCommit( KVTest* test, Histogram<float>* latency, PerfIntCounter* count ) {
ACTOR Future<Void> testKVCommit(KVTest* test, TestHistogram<float>* latency, PerfIntCounter* count) {
state Version v = test->lastSet;
test->lastCommit = v;
state double begin = timer();
@ -194,7 +194,7 @@ struct KVStoreTestWorkload : TestWorkload {
bool doSetup, doClear, doCount;
std::string filename;
PerfIntCounter reads, sets, commits;
Histogram<float> readLatency, commitLatency;
TestHistogram<float> readLatency, commitLatency;
double setupTook;
std::string storeType;
@ -222,7 +222,7 @@ struct KVStoreTestWorkload : TestWorkload {
return Void();
}
Future<bool> check(Database const& cx) override { return true; }
void metricsFromHistogram(vector<PerfMetric>& m, std::string name, Histogram<float>& h) const {
void metricsFromHistogram(vector<PerfMetric>& m, std::string name, TestHistogram<float>& h) const {
m.push_back(PerfMetric("Min " + name, 1000.0 * h.min(), true));
m.push_back(PerfMetric("Average " + name, 1000.0 * h.mean(), true));
m.push_back(PerfMetric("Median " + name, 1000.0 * h.medianEstimate(), true));

View File

@ -49,7 +49,13 @@ struct LowLatencyWorkload : TestWorkload {
std::string description() const override { return "LowLatency"; }
Future<Void> setup(Database const& cx) override { return Void(); }
Future<Void> setup(Database const& cx) override {
if (g_network->isSimulated()) {
ASSERT(const_cast<ServerKnobs*>(SERVER_KNOBS)->setKnob("min_delay_cc_worst_fit_candidacy_seconds", "5"));
ASSERT(const_cast<ServerKnobs*>(SERVER_KNOBS)->setKnob("max_delay_cc_worst_fit_candidacy_seconds", "10"));
}
return Void();
}
Future<Void> start(Database const& cx) override {
if( clientId == 0 )

View File

@ -0,0 +1,51 @@
/*
* ProtocolVersion.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2019 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "fdbserver/workloads/workloads.actor.h"
struct ProtocolVersionWorkload : TestWorkload {
ProtocolVersionWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {}
std::string description() const override { return "ProtocolVersionWorkload"; }
Future<Void> start(Database const& cx) override { return _start(this, cx); }
ACTOR Future<Void> _start(ProtocolVersionWorkload* self, Database cx) {
state std::vector<ISimulator::ProcessInfo*> allProcesses = g_pSimulator->getAllProcesses();
state std::vector<ISimulator::ProcessInfo*>::iterator diffVersionProcess =
find_if(allProcesses.begin(), allProcesses.end(),
[](const ISimulator::ProcessInfo* p) { return p->protocolVersion != currentProtocolVersion; });
ASSERT(diffVersionProcess != allProcesses.end());
RequestStream<ProtocolInfoRequest> requestStream{ Endpoint{ { (*diffVersionProcess)->addresses },
WLTOKEN_PROTOCOL_INFO } };
ProtocolInfoReply reply = wait(retryBrokenPromise(requestStream, ProtocolInfoRequest{}));
ASSERT(reply.version != g_network->protocolVersion());
return Void();
}
Future<bool> check(Database const& cx) override { return true; }
void getMetrics(vector<PerfMetric>& m) override {}
};
WorkloadFactory<ProtocolVersionWorkload> ProtocolVersionWorkloadFactory("ProtocolVersion");

View File

@ -448,6 +448,9 @@ struct RemoveServersSafelyWorkload : TestWorkload {
if (coordinators.size() > 2) {
auto randomCoordinator = deterministicRandom()->randomChoice(coordinators);
coordExcl = AddressExclusion(randomCoordinator.ip, randomCoordinator.port);
TraceEvent("RemoveAndKill", functionId)
.detail("Step", "ChooseCoordinator")
.detail("Coordinator", describe(coordExcl));
}
}
std::copy(toKill.begin(), toKill.end(), std::back_inserter(toKillArray));
@ -457,11 +460,12 @@ struct RemoveServersSafelyWorkload : TestWorkload {
state bool safe = false;
state std::set<AddressExclusion> failSet =
random_subset(toKillArray, deterministicRandom()->randomInt(0, toKillArray.size() + 1));
if (coordExcl.isValid()) {
failSet.insert(coordExcl);
}
toKillMarkFailedArray.resize(failSet.size());
std::copy(failSet.begin(), failSet.end(), toKillMarkFailedArray.begin());
std::sort(toKillMarkFailedArray.begin(), toKillMarkFailedArray.end());
if (coordExcl.isValid()) {
toKillMarkFailedArray.push_back(coordExcl);
}
TraceEvent("RemoveAndKill", functionId)
.detail("Step", "SafetyCheck")
.detail("Exclusions", describe(toKillMarkFailedArray));
@ -500,6 +504,7 @@ struct RemoveServersSafelyWorkload : TestWorkload {
toKillMarkFailedArray.erase(removeServer);
}
ASSERT(toKillMarkFailedArray.size() <= toKillArray.size());
std::sort(toKillArray.begin(), toKillArray.end());
auto removeServer = toKill.begin();
TraceEvent("RemoveAndKill", functionId)
.detail("Step", "ReplaceNonFailedKillSet")

View File

@ -432,6 +432,20 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
ASSERT(e.code() == error_code_key_outside_legal_range);
tx->reset();
}
// test case when registered range is the same as the underlying module
try {
state Standalone<RangeResultRef> result = wait(tx->getRange(KeyRangeRef(LiteralStringRef("\xff\xff/worker_interfaces/"),
LiteralStringRef("\xff\xff/worker_interfaces0")),
CLIENT_KNOBS->TOO_MANY));
// We should have at least 1 process in the cluster
ASSERT(result.size());
state KeyValueRef entry = deterministicRandom()->randomChoice(result);
Optional<Value> singleRes = wait(tx->get(entry.key));
ASSERT(singleRes.present() && singleRes.get() == entry.value);
tx->reset();
} catch (Error& e) {
wait(tx->onError(e));
}
return Void();
}

View File

@ -27,8 +27,6 @@
#include "fdbclient/ManagementAPI.actor.h"
#include "flow/actorcompiler.h" // This must be the last #include.
extern bool noUnseed;
struct StatusWorkload : TestWorkload {
double testDuration, requestsPerSecond;
bool enableLatencyBands;
@ -53,8 +51,6 @@ struct StatusWorkload : TestWorkload {
// This is sort of a hack, but generate code coverage *requirements* for everything in schema
schemaCoverageRequirements(parsedSchema.get());
}
noUnseed = true;
}
std::string description() const override { return "StatusWorkload"; }
@ -170,7 +166,7 @@ struct StatusWorkload : TestWorkload {
state double issued = now();
StatusObject result = wait(StatusClient::statusFetcher(cx));
++self->replies;
BinaryWriter br(AssumeVersion(currentProtocolVersion));
BinaryWriter br(AssumeVersion(g_network->protocolVersion()));
save(br, result);
self->totalSize += br.getLength();
TraceEvent("StatusWorkloadReply").detail("ReplySize", br.getLength()).detail("Latency", now() - issued);//.detail("Reply", json_spirit::write_string(json_spirit::mValue(result)));

View File

@ -156,7 +156,7 @@ struct StorefrontWorkload : TestWorkload {
updaters.clear();
// set value for the order
BinaryWriter wr(AssumeVersion(currentProtocolVersion)); wr << itemList;
BinaryWriter wr(AssumeVersion(g_network->protocolVersion())); wr << itemList;
tr.set(orderKey, wr.toValue());
wait( tr.commit() );
@ -187,7 +187,7 @@ struct StorefrontWorkload : TestWorkload {
int orderIdx;
for(orderIdx=0; orderIdx<values.size(); orderIdx++) {
vector<int> saved;
BinaryReader br( values[orderIdx].value, AssumeVersion(currentProtocolVersion) );
BinaryReader br( values[orderIdx].value, AssumeVersion(g_network->protocolVersion()) );
br >> saved;
for(int c=0; c<saved.size(); c++)
result[saved[c]]++;
@ -247,7 +247,7 @@ struct StorefrontWorkload : TestWorkload {
for( int i=0; i < it->second; i++ )
itemList.push_back( it->first );
}
BinaryWriter wr(AssumeVersion(currentProtocolVersion)); wr << itemList;
BinaryWriter wr(AssumeVersion(g_network->protocolVersion())); wr << itemList;
if( wr.toValue() != val.get().toString() ) {
TraceEvent(SevError, "TestFailure")
.detail("Reason", "OrderContentsMismatch")

View File

@ -24,6 +24,8 @@ set(FLOW_SRCS
FileTraceLogWriter.h
Hash3.c
Hash3.h
Histogram.cpp
Histogram.h
IDispatched.h
IRandom.h
IThreadPool.cpp
@ -84,19 +86,33 @@ set(FLOW_SRCS
rte_memcpy.h
serialize.cpp
serialize.h
stacktrace.amalgamation.cpp
stacktrace.h
test_memcpy.cpp
test_memcpy_perf.cpp
version.cpp)
version.cpp
xxhash.c
xxhash.h)
add_library(stacktrace stacktrace.amalgamation.cpp stacktrace.h)
if (USE_ASAN)
target_compile_definitions(stacktrace PRIVATE ADDRESS_SANITIZER)
elseif(USE_MSAN)
target_compile_definitions(stacktrace PRIVATE MEMORY_SANITIZER)
elseif(USE_UBSAN)
target_compile_definitions(stacktrace PRIVATE UNDEFINED_BEHAVIOR_SANITIZER)
elseif(USE_TSAN)
target_compile_definitions(stacktrace PRIVATE THREAD_SANITIZER DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL=1)
endif()
if(UNIX AND NOT APPLE)
list(APPEND FLOW_SRCS folly_memcpy.S)
endif()
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/SourceVersion.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/SourceVersion.h)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/config.h)
add_flow_target(STATIC_LIBRARY NAME flow SRCS ${FLOW_SRCS})
target_link_libraries(flow PRIVATE stacktrace)
if (NOT APPLE AND NOT WIN32)
set (FLOW_LIBS ${FLOW_LIBS} rt)
elseif(WIN32)

View File

@ -59,7 +59,7 @@ void printBitsBig(size_t const size, void const * const ptr)
template<typename IntType>
void testCompressedInt(IntType n, StringRef rep = StringRef()) {
BinaryWriter w(AssumeVersion(currentProtocolVersion));
BinaryWriter w(AssumeVersion(g_network->protocolVersion()));
CompressedInt<IntType> cn(n);
w << cn;
@ -74,7 +74,7 @@ void testCompressedInt(IntType n, StringRef rep = StringRef()) {
rep = w.toValue();
cn.value = 0;
BinaryReader r(rep, AssumeVersion(currentProtocolVersion));
BinaryReader r(rep, AssumeVersion(g_network->protocolVersion()));
r >> cn;
if(cn.value != n) {

View File

@ -80,6 +80,10 @@ uint32_t DeterministicRandom::randomUInt32() {
return gen64();
}
uint64_t DeterministicRandom::randomUInt64() {
return gen64();
}
uint32_t DeterministicRandom::randomSkewedUInt32(uint32_t min, uint32_t maxPlusOne) {
std::uniform_real_distribution<double> distribution(std::log(min), std::log(maxPlusOne - 1));
double logpower = distribution(random);
@ -120,3 +124,10 @@ void DeterministicRandom::addref() {
void DeterministicRandom::delref() {
ReferenceCounted<DeterministicRandom>::delref();
}
void generateRandomData(uint8_t* buffer, int length) {
for (int i = 0; i < length; i += sizeof(uint32_t)) {
uint32_t val = deterministicRandom()->randomUInt32();
memcpy(&buffer[i], &val, std::min(length - i, (int)sizeof(uint32_t)));
}
}

View File

@ -44,6 +44,7 @@ public:
int randomInt(int min, int maxPlusOne) override;
int64_t randomInt64(int64_t min, int64_t maxPlusOne) override;
uint32_t randomUInt32() override;
uint64_t randomUInt64() override;
uint32_t randomSkewedUInt32(uint32_t min, uint32_t maxPlusOne) override;
UID randomUniqueID() override;
char randomAlphaNumeric() override;

View File

@ -24,6 +24,7 @@
#include "flow/Error.h"
#include "flow/Platform.h"
#include "flow/config.h"
// ALLOC_INSTRUMENTATION_STDOUT enables non-sampled logging of all allocations and deallocations to stdout to be processed by tools/alloc_instrumentation.py
//#define ALLOC_INSTRUMENTATION_STDOUT ENABLED(NOT_IN_CLEAN)

187
flow/Histogram.cpp Normal file
View File

@ -0,0 +1,187 @@
/*
* Histogram.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <flow/Histogram.h>
#include <flow/flow.h>
#include <flow/UnitTest.h>
// TODO: remove dependency on fdbrpc.
// we need to be able to check if we're in simulation so that the histograms are properly
// scoped to the right "machine".
// either we pull g_simulator into flow, or flow (and the I/O path) will be unable to log performance
// metrics.
#include <fdbrpc/simulator.h>
// pull in some global pointers too: These types are implemented in fdbrpc/sim2.actor.cpp, which is not available here.
// Yuck. If you're not using the simulator, these will remain null, and all should be well.
// TODO: create a execution context abstraction that allows independent flow instances within a process.
// The simulator would be the main user of it, and histogram would be the only other user (for now).
ISimulator* g_pSimulator = nullptr;
thread_local ISimulator::ProcessInfo* ISimulator::currentProcess = nullptr;
// Fallback registry when we're not in simulation -- if we had execution contexts we wouldn't need to check if
// we have a simulated contex here; we'd just use the current context regardless.
static HistogramRegistry* globalHistograms = nullptr;
#pragma region HistogramRegistry
HistogramRegistry& GetHistogramRegistry() {
ISimulator::ProcessInfo* info = g_simulator.getCurrentProcess();
if (info) {
// in simulator; scope histograms to simulated process
return info->histograms;
}
// avoid link order issues where the registry hasn't been initialized, but we're
// instantiating a histogram
if (globalHistograms == nullptr) {
// Note: This will show up as a leak on shutdown, but we're OK with that.
globalHistograms = new HistogramRegistry();
}
return *globalHistograms;
}
void HistogramRegistry::registerHistogram(Histogram* h) {
if (histograms.find(h->name()) != histograms.end()) {
TraceEvent(SevError, "HistogramDoubleRegistered").detail("group", h->group).detail("op", h->op);
ASSERT(false);
}
histograms.insert(std::pair<std::string, Histogram*>(h->name(), h));
}
void HistogramRegistry::unregisterHistogram(Histogram* h) {
std::string name = h->name();
if (histograms.find(name) == histograms.end()) {
TraceEvent(SevError, "HistogramNotRegistered").detail("group", h->group).detail("op", h->op);
}
int count = histograms.erase(name);
ASSERT(count == 1);
}
Histogram* HistogramRegistry::lookupHistogram(std::string name) {
auto h = histograms.find(name);
if (h == histograms.end()) {
return nullptr;
}
return h->second;
}
void HistogramRegistry::logReport() {
for (auto& i : histograms) {
i.second->writeToLog();
i.second->clear();
}
}
#pragma endregion // HistogramRegistry
#pragma region Histogram
const std::unordered_map<Histogram::Unit, std::string> Histogram::UnitToStringMapper = {
{ Histogram::Unit::microseconds, "microseconds" },
{ Histogram::Unit::bytes, "bytes" },
{ Histogram::Unit::bytes_per_second, "bytes_per_second" }
};
void Histogram::writeToLog() {
bool active = false;
for (uint32_t i = 0; i < 32; i++) {
if (buckets[i]) {
active = true;
break;
}
}
if (!active) {
return;
}
TraceEvent e(SevInfo, "Histogram");
e.detail("Group", group).detail("Op", op).detail("Unit", UnitToStringMapper.at(unit));
for (uint32_t i = 0; i < 32; i++) {
uint32_t value = ((uint32_t)1) << (i + 1);
if (buckets[i]) {
switch (unit) {
case Unit::microseconds:
e.detail(format("LessThan%u.%03u", value / 1000, value % 1000), buckets[i]);
break;
case Unit::bytes:
case Unit::bytes_per_second:
e.detail(format("LessThan%u", value), buckets[i]);
break;
default:
ASSERT(false);
}
}
}
}
#pragma endregion // Histogram
TEST_CASE("/flow/histogram/smoke_test") {
{
Reference<Histogram> h =
Histogram::getHistogram(LiteralStringRef("smoke_test"), LiteralStringRef("counts"), Histogram::Unit::bytes);
h->sample(0);
ASSERT(h->buckets[0] == 1);
h->sample(1);
ASSERT(h->buckets[0] == 2);
h->sample(2);
ASSERT(h->buckets[1] == 1);
GetHistogramRegistry().logReport();
ASSERT(h->buckets[0] == 0);
h->sample(0);
ASSERT(h->buckets[0] == 1);
h = Histogram::getHistogram(LiteralStringRef("smoke_test"), LiteralStringRef("counts2"),
Histogram::Unit::bytes);
// confirm that old h was deallocated.
h = Histogram::getHistogram(LiteralStringRef("smoke_test"), LiteralStringRef("counts"), Histogram::Unit::bytes);
ASSERT(h->buckets[0] == 0);
h = Histogram::getHistogram(LiteralStringRef("smoke_test"), LiteralStringRef("times"),
Histogram::Unit::microseconds);
h->sampleSeconds(0.000000);
h->sampleSeconds(0.0000019);
ASSERT(h->buckets[0] == 2);
h->sampleSeconds(0.0000021);
ASSERT(h->buckets[1] == 1);
h->sampleSeconds(0.000015);
ASSERT(h->buckets[3] == 1);
h->sampleSeconds(4400.0);
ASSERT(h->buckets[31] == 1);
GetHistogramRegistry().logReport();
}
// h has been deallocated. Does this crash?
GetHistogramRegistry().logReport();
return Void();
}

137
flow/Histogram.h Normal file
View File

@ -0,0 +1,137 @@
/*
* Histogram.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FLOW_HISTOGRAM_H
#define FLOW_HISTOGRAM_H
#pragma once
#include <flow/Arena.h>
#include <string>
#include <map>
#include <unordered_map>
#ifdef _WIN32
#include <intrin.h>
#pragma intrinsic(_BitScanReverse)
#endif
class Histogram;
class HistogramRegistry {
public:
void registerHistogram(Histogram* h);
void unregisterHistogram(Histogram* h);
Histogram* lookupHistogram(std::string name);
void logReport();
private:
// This map is ordered by key so that ops within the same group end up
// next to each other in the trace log.
std::map<std::string, Histogram*> histograms;
};
HistogramRegistry& GetHistogramRegistry();
/*
* A fast histogram with power-of-two spaced buckets.
*
* For more information about this technique, see:
* https://www.fsl.cs.stonybrook.edu/project-osprof.html
*/
class Histogram sealed : public ReferenceCounted<Histogram> {
public:
enum class Unit { microseconds, bytes, bytes_per_second };
private:
static const std::unordered_map<Unit, std::string> UnitToStringMapper;
Histogram(std::string group, std::string op, Unit unit, HistogramRegistry& registry)
: group(group), op(op), unit(unit), registry(registry), ReferenceCounted<Histogram>() {
ASSERT(UnitToStringMapper.find(unit) != UnitToStringMapper.end());
clear();
}
static std::string generateName(std::string group, std::string op) { return group + ":" + op; }
public:
~Histogram() { registry.unregisterHistogram(this); }
static Reference<Histogram> getHistogram(StringRef group, StringRef op, Unit unit) {
std::string group_str = group.toString();
std::string op_str = op.toString();
std::string name = generateName(group_str, op_str);
HistogramRegistry& registry = GetHistogramRegistry();
Histogram* h = registry.lookupHistogram(name);
if (!h) {
h = new Histogram(group_str, op_str, unit, registry);
registry.registerHistogram(h);
return Reference<Histogram>(h);
} else {
return Reference<Histogram>::addRef(h);
}
}
// This histogram buckets samples into powers of two.
inline void sample(uint32_t sample) {
size_t idx;
#ifdef _WIN32
unsigned long index;
// _BitScanReverse sets index to the position of the first non-zero bit, so
// _BitScanReverse(sample) ~= log_2(sample). _BitScanReverse returns false if
// sample is zero.
idx = _BitScanReverse(&index, sample) ? index : 0;
#else
// __builtin_clz counts the leading zeros in its uint32_t argument. So, 31-clz ~= log_2(sample).
// __builtin_clz(0) is undefined.
idx = sample ? (31 - __builtin_clz(sample)) : 0;
#endif
ASSERT(idx < 32);
buckets[idx]++;
}
inline void sampleSeconds(double delta) {
uint64_t delta_usec = (delta * 1000000);
if (delta_usec > UINT32_MAX) {
sample(UINT32_MAX);
} else {
sample((uint32_t)(delta * 1000000)); // convert to microseconds and truncate to integer
}
}
void clear() {
for (uint32_t& i : buckets) {
i = 0;
}
}
void writeToLog();
std::string name() { return generateName(this->group, this->op); }
std::string const group;
std::string const op;
Unit const unit;
HistogramRegistry& registry;
uint32_t buckets[32];
};
#endif // FLOW_HISTOGRAM_H

View File

@ -66,7 +66,7 @@ class UID {
public:
constexpr static FileIdentifier file_identifier = 15597147;
UID() { part[0] = part[1] = 0; }
UID( uint64_t a, uint64_t b ) { part[0]=a; part[1]=b; }
constexpr UID(uint64_t a, uint64_t b) : part{ a, b } {}
std::string toString() const;
std::string shortString() const;
bool isValid() const { return part[0] || part[1]; }
@ -130,6 +130,7 @@ public:
virtual int randomInt(int min, int maxPlusOne) = 0;
virtual int64_t randomInt64(int64_t min, int64_t maxPlusOne) = 0;
virtual uint32_t randomUInt32() = 0;
virtual uint64_t randomUInt64() = 0;
virtual UID randomUniqueID() = 0;
virtual char randomAlphaNumeric() = 0;
virtual std::string randomAlphaNumeric( int length ) = 0;
@ -174,4 +175,7 @@ Reference<IRandom> deterministicRandom();
// non-deterministic contexts.
Reference<IRandom> nondeterministicRandom();
// Populates a buffer with a random sequence of bytes
void generateRandomData(uint8_t* buffer, int length);
#endif

View File

@ -86,6 +86,7 @@ void FlowKnobs::initialize(bool randomize, bool isSimulated) {
init( INCOMPATIBLE_PEER_DELAY_BEFORE_LOGGING, 5.0 );
init( PING_LOGGING_INTERVAL, 3.0 );
init( PING_SAMPLE_AMOUNT, 100 );
init( NETWORK_CONNECT_SAMPLE_AMOUNT, 100 );
init( TLS_CERT_REFRESH_DELAY_SECONDS, 12*60*60 );
init( TLS_SERVER_CONNECTION_THROTTLE_TIMEOUT, 9.0 );
@ -174,6 +175,7 @@ void FlowKnobs::initialize(bool randomize, bool isSimulated) {
init( TRACE_RETRY_OPEN_INTERVAL, 1.00 );
init( MIN_TRACE_SEVERITY, isSimulated ? 1 : 10 ); // Related to the trace severity in Trace.h
init( MAX_TRACE_SUPPRESSIONS, 1e4 );
init( TRACE_DATETIME_ENABLED, true ); // trace time in human readable format (always real time)
init( TRACE_SYNC_ENABLED, 0 );
init( TRACE_EVENT_METRIC_UNITS_PER_SAMPLE, 500 );
init( TRACE_EVENT_THROTTLER_SAMPLE_EXPIRY, 1800.0 ); // 30 mins

View File

@ -100,6 +100,7 @@ public:
double INCOMPATIBLE_PEER_DELAY_BEFORE_LOGGING;
double PING_LOGGING_INTERVAL;
int PING_SAMPLE_AMOUNT;
int NETWORK_CONNECT_SAMPLE_AMOUNT;
int TLS_CERT_REFRESH_DELAY_SECONDS;
double TLS_SERVER_CONNECTION_THROTTLE_TIMEOUT;
@ -193,6 +194,7 @@ public:
double TRACE_RETRY_OPEN_INTERVAL;
int MIN_TRACE_SEVERITY;
int MAX_TRACE_SUPPRESSIONS;
bool TRACE_DATETIME_ENABLED;
int TRACE_SYNC_ENABLED;
int TRACE_EVENT_METRIC_UNITS_PER_SAMPLE;
int TRACE_EVENT_THROTTLER_SAMPLE_EXPIRY;

Some files were not shown because too many files have changed in this diff Show More