Merge branch 'master' of https://github.com/apple/foundationdb into feature-sim-time-batching
# Conflicts: # fdbserver/DataDistribution.actor.cpp
This commit is contained in:
commit
08a5f17660
|
@ -7,7 +7,8 @@ bindings/java/foundationdb-client*.jar
|
||||||
bindings/java/foundationdb-tests*.jar
|
bindings/java/foundationdb-tests*.jar
|
||||||
bindings/java/fdb-java-*-sources.jar
|
bindings/java/fdb-java-*-sources.jar
|
||||||
packaging/msi/FDBInstaller.msi
|
packaging/msi/FDBInstaller.msi
|
||||||
|
builds/
|
||||||
|
cmake-build-debug/
|
||||||
# Generated source, build, and packaging files
|
# Generated source, build, and packaging files
|
||||||
*.g.cpp
|
*.g.cpp
|
||||||
*.g.h
|
*.g.h
|
||||||
|
|
|
@ -78,6 +78,8 @@ if(NOT WIN32)
|
||||||
test/unit/fdb_api.cpp
|
test/unit/fdb_api.cpp
|
||||||
test/unit/fdb_api.hpp)
|
test/unit/fdb_api.hpp)
|
||||||
|
|
||||||
|
set(UNIT_TEST_VERSION_510_SRCS test/unit/unit_tests_version_510.cpp)
|
||||||
|
|
||||||
if(OPEN_FOR_IDE)
|
if(OPEN_FOR_IDE)
|
||||||
add_library(fdb_c_performance_test OBJECT test/performance_test.c test/test.h)
|
add_library(fdb_c_performance_test OBJECT test/performance_test.c test/test.h)
|
||||||
add_library(fdb_c_ryw_benchmark OBJECT test/ryw_benchmark.c test/test.h)
|
add_library(fdb_c_ryw_benchmark OBJECT test/ryw_benchmark.c test/test.h)
|
||||||
|
@ -85,6 +87,7 @@ if(NOT WIN32)
|
||||||
add_library(mako OBJECT ${MAKO_SRCS})
|
add_library(mako OBJECT ${MAKO_SRCS})
|
||||||
add_library(fdb_c_setup_tests OBJECT test/unit/setup_tests.cpp)
|
add_library(fdb_c_setup_tests OBJECT test/unit/setup_tests.cpp)
|
||||||
add_library(fdb_c_unit_tests OBJECT ${UNIT_TEST_SRCS})
|
add_library(fdb_c_unit_tests OBJECT ${UNIT_TEST_SRCS})
|
||||||
|
add_library(fdb_c_unit_tests_version_510 OBJECT ${UNIT_TEST_VERSION_510_SRCS})
|
||||||
else()
|
else()
|
||||||
add_executable(fdb_c_performance_test test/performance_test.c test/test.h)
|
add_executable(fdb_c_performance_test test/performance_test.c test/test.h)
|
||||||
add_executable(fdb_c_ryw_benchmark test/ryw_benchmark.c test/test.h)
|
add_executable(fdb_c_ryw_benchmark test/ryw_benchmark.c test/test.h)
|
||||||
|
@ -92,6 +95,7 @@ if(NOT WIN32)
|
||||||
add_executable(mako ${MAKO_SRCS})
|
add_executable(mako ${MAKO_SRCS})
|
||||||
add_executable(fdb_c_setup_tests test/unit/setup_tests.cpp)
|
add_executable(fdb_c_setup_tests test/unit/setup_tests.cpp)
|
||||||
add_executable(fdb_c_unit_tests ${UNIT_TEST_SRCS})
|
add_executable(fdb_c_unit_tests ${UNIT_TEST_SRCS})
|
||||||
|
add_executable(fdb_c_unit_tests_version_510 ${UNIT_TEST_VERSION_510_SRCS})
|
||||||
strip_debug_symbols(fdb_c_performance_test)
|
strip_debug_symbols(fdb_c_performance_test)
|
||||||
strip_debug_symbols(fdb_c_ryw_benchmark)
|
strip_debug_symbols(fdb_c_ryw_benchmark)
|
||||||
strip_debug_symbols(fdb_c_txn_size_test)
|
strip_debug_symbols(fdb_c_txn_size_test)
|
||||||
|
@ -104,8 +108,10 @@ if(NOT WIN32)
|
||||||
add_dependencies(fdb_c_unit_tests doctest)
|
add_dependencies(fdb_c_unit_tests doctest)
|
||||||
target_include_directories(fdb_c_setup_tests PUBLIC ${DOCTEST_INCLUDE_DIR})
|
target_include_directories(fdb_c_setup_tests PUBLIC ${DOCTEST_INCLUDE_DIR})
|
||||||
target_include_directories(fdb_c_unit_tests PUBLIC ${DOCTEST_INCLUDE_DIR})
|
target_include_directories(fdb_c_unit_tests PUBLIC ${DOCTEST_INCLUDE_DIR})
|
||||||
|
target_include_directories(fdb_c_unit_tests_version_510 PUBLIC ${DOCTEST_INCLUDE_DIR})
|
||||||
target_link_libraries(fdb_c_setup_tests PRIVATE fdb_c Threads::Threads)
|
target_link_libraries(fdb_c_setup_tests PRIVATE fdb_c Threads::Threads)
|
||||||
target_link_libraries(fdb_c_unit_tests PRIVATE fdb_c Threads::Threads)
|
target_link_libraries(fdb_c_unit_tests PRIVATE fdb_c Threads::Threads)
|
||||||
|
target_link_libraries(fdb_c_unit_tests_version_510 PRIVATE fdb_c Threads::Threads)
|
||||||
|
|
||||||
# do not set RPATH for mako
|
# do not set RPATH for mako
|
||||||
set_property(TARGET mako PROPERTY SKIP_BUILD_RPATH TRUE)
|
set_property(TARGET mako PROPERTY SKIP_BUILD_RPATH TRUE)
|
||||||
|
@ -135,6 +141,11 @@ if(NOT WIN32)
|
||||||
COMMAND $<TARGET_FILE:fdb_c_unit_tests>
|
COMMAND $<TARGET_FILE:fdb_c_unit_tests>
|
||||||
@CLUSTER_FILE@
|
@CLUSTER_FILE@
|
||||||
fdb)
|
fdb)
|
||||||
|
add_fdbclient_test(
|
||||||
|
NAME fdb_c_unit_tests_version_510
|
||||||
|
COMMAND $<TARGET_FILE:fdb_c_unit_tests_version_510>
|
||||||
|
@CLUSTER_FILE@
|
||||||
|
fdb)
|
||||||
add_fdbclient_test(
|
add_fdbclient_test(
|
||||||
NAME fdb_c_external_client_unit_tests
|
NAME fdb_c_external_client_unit_tests
|
||||||
COMMAND $<TARGET_FILE:fdb_c_unit_tests>
|
COMMAND $<TARGET_FILE:fdb_c_unit_tests>
|
||||||
|
@ -158,6 +169,10 @@ set_target_properties(c_workloads PROPERTIES
|
||||||
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/share/foundationdb")
|
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/share/foundationdb")
|
||||||
target_link_libraries(c_workloads PUBLIC fdb_c)
|
target_link_libraries(c_workloads PUBLIC fdb_c)
|
||||||
|
|
||||||
|
if (NOT WIN32 AND NOT APPLE AND NOT OPEN_FOR_IDE)
|
||||||
|
target_link_options(c_workloads PRIVATE "LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/external_workload.map,-z,nodelete")
|
||||||
|
endif()
|
||||||
|
|
||||||
# TODO: re-enable once the old vcxproj-based build system is removed.
|
# TODO: re-enable once the old vcxproj-based build system is removed.
|
||||||
#generate_export_header(fdb_c EXPORT_MACRO_NAME "DLLEXPORT"
|
#generate_export_header(fdb_c EXPORT_MACRO_NAME "DLLEXPORT"
|
||||||
# EXPORT_FILE_NAME ${CMAKE_CURRENT_BINARY_DIR}/foundationdb/fdb_c_export.h)
|
# EXPORT_FILE_NAME ${CMAKE_CURRENT_BINARY_DIR}/foundationdb/fdb_c_export.h)
|
||||||
|
|
|
@ -0,0 +1,7 @@
|
||||||
|
{
|
||||||
|
global:
|
||||||
|
workloadFactory;
|
||||||
|
local:
|
||||||
|
*;
|
||||||
|
};
|
||||||
|
|
|
@ -74,10 +74,41 @@ def write_unix_asm(asmfile, functions, prefix):
|
||||||
for f in functions:
|
for f in functions:
|
||||||
asmfile.write("\n.globl %s%s\n" % (prefix, f))
|
asmfile.write("\n.globl %s%s\n" % (prefix, f))
|
||||||
asmfile.write("%s%s:\n" % (prefix, f))
|
asmfile.write("%s%s:\n" % (prefix, f))
|
||||||
|
|
||||||
|
# These assembly implementations of versioned fdb c api functions must have the following properties.
|
||||||
|
#
|
||||||
|
# 1. Don't require dynamic relocation.
|
||||||
|
#
|
||||||
|
# 2. Perform a tail-call to the function pointer that works for a
|
||||||
|
# function with any number of arguments. For example, since registers x0-x7 are used
|
||||||
|
# to pass arguments in the Arm calling convention we must not use x0-x7
|
||||||
|
# here.
|
||||||
|
#
|
||||||
|
# You can compile this example c program to get a rough idea of how to
|
||||||
|
# load the extern symbol and make a tail call.
|
||||||
|
#
|
||||||
|
# $ cat test.c
|
||||||
|
# typedef int (*function)();
|
||||||
|
# extern function f;
|
||||||
|
# int g() { return f(); }
|
||||||
|
# $ cc -S -O3 -fPIC test.c && grep -A 10 '^g:' test.[sS]
|
||||||
|
# g:
|
||||||
|
# .LFB0:
|
||||||
|
# .cfi_startproc
|
||||||
|
# adrp x0, :got:f
|
||||||
|
# ldr x0, [x0, #:got_lo12:f]
|
||||||
|
# ldr x0, [x0]
|
||||||
|
# br x0
|
||||||
|
# .cfi_endproc
|
||||||
|
# .LFE0:
|
||||||
|
# .size g, .-g
|
||||||
|
# .ident "GCC: (GNU) 8.3.1 20190311 (Red Hat 8.3.1-3)"
|
||||||
|
|
||||||
if platform == "linux-aarch64":
|
if platform == "linux-aarch64":
|
||||||
asmfile.write("\tldr x16, =fdb_api_ptr_%s\n" % (f))
|
asmfile.write("\tadrp x8, :got:fdb_api_ptr_%s\n" % (f))
|
||||||
asmfile.write("\tldr x16, [x16]\n")
|
asmfile.write("\tldr x8, [x8, #:got_lo12:fdb_api_ptr_%s]\n" % (f))
|
||||||
asmfile.write("\tbr x16\n")
|
asmfile.write("\tldr x8, [x8]\n")
|
||||||
|
asmfile.write("\tbr x8\n")
|
||||||
else:
|
else:
|
||||||
asmfile.write(
|
asmfile.write(
|
||||||
"\tmov r11, qword ptr [%sfdb_api_ptr_%s@GOTPCREL+rip]\n" % (prefix, f))
|
"\tmov r11, qword ptr [%sfdb_api_ptr_%s@GOTPCREL+rip]\n" % (prefix, f))
|
||||||
|
|
|
@ -219,7 +219,7 @@ GetRangeResult get_range(fdb::Transaction& tr,
|
||||||
for (int i = 0; i < out_count; ++i) {
|
for (int i = 0; i < out_count; ++i) {
|
||||||
std::string key((const char*)out_kv[i].key, out_kv[i].key_length);
|
std::string key((const char*)out_kv[i].key, out_kv[i].key_length);
|
||||||
std::string value((const char*)out_kv[i].value, out_kv[i].value_length);
|
std::string value((const char*)out_kv[i].value, out_kv[i].value_length);
|
||||||
results.push_back(std::make_pair(key, value));
|
results.emplace_back(key, value);
|
||||||
}
|
}
|
||||||
return GetRangeResult{ results, out_more != 0, 0 };
|
return GetRangeResult{ results, out_more != 0, 0 };
|
||||||
}
|
}
|
||||||
|
@ -263,13 +263,15 @@ TEST_CASE("fdb_future_set_callback") {
|
||||||
&context));
|
&context));
|
||||||
|
|
||||||
fdb_error_t err = wait_future(f1);
|
fdb_error_t err = wait_future(f1);
|
||||||
|
|
||||||
|
context.event.wait(); // Wait until callback is called
|
||||||
|
|
||||||
if (err) {
|
if (err) {
|
||||||
fdb::EmptyFuture f2 = tr.on_error(err);
|
fdb::EmptyFuture f2 = tr.on_error(err);
|
||||||
fdb_check(wait_future(f2));
|
fdb_check(wait_future(f2));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
context.event.wait();
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -515,10 +517,10 @@ TEST_CASE("write system key") {
|
||||||
fdb::Transaction tr(db);
|
fdb::Transaction tr(db);
|
||||||
|
|
||||||
std::string syskey("\xff\x02");
|
std::string syskey("\xff\x02");
|
||||||
fdb_check(tr.set_option(FDB_TR_OPTION_ACCESS_SYSTEM_KEYS, nullptr, 0));
|
|
||||||
tr.set(syskey, "bar");
|
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
|
fdb_check(tr.set_option(FDB_TR_OPTION_ACCESS_SYSTEM_KEYS, nullptr, 0));
|
||||||
|
tr.set(syskey, "bar");
|
||||||
fdb::EmptyFuture f1 = tr.commit();
|
fdb::EmptyFuture f1 = tr.commit();
|
||||||
|
|
||||||
fdb_error_t err = wait_future(f1);
|
fdb_error_t err = wait_future(f1);
|
||||||
|
@ -949,16 +951,25 @@ TEST_CASE("fdb_transaction_clear") {
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_ADD") {
|
TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_ADD") {
|
||||||
insert_data(db, create_data({ { "foo", "a" } }));
|
insert_data(db, create_data({ { "foo", "\x00" } }));
|
||||||
|
|
||||||
fdb::Transaction tr(db);
|
fdb::Transaction tr(db);
|
||||||
int8_t param = 1;
|
int8_t param = 1;
|
||||||
|
int potentialCommitCount = 0;
|
||||||
while (1) {
|
while (1) {
|
||||||
tr.atomic_op(key("foo"), (const uint8_t*)¶m, sizeof(param), FDB_MUTATION_TYPE_ADD);
|
tr.atomic_op(key("foo"), (const uint8_t*)¶m, sizeof(param), FDB_MUTATION_TYPE_ADD);
|
||||||
|
if (potentialCommitCount + 1 == 256) {
|
||||||
|
// Trying to commit again might overflow the one unsigned byte we're looking at
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
++potentialCommitCount;
|
||||||
fdb::EmptyFuture f1 = tr.commit();
|
fdb::EmptyFuture f1 = tr.commit();
|
||||||
|
|
||||||
fdb_error_t err = wait_future(f1);
|
fdb_error_t err = wait_future(f1);
|
||||||
if (err) {
|
if (err) {
|
||||||
|
if (fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE_NOT_COMMITTED, err)) {
|
||||||
|
--potentialCommitCount;
|
||||||
|
}
|
||||||
fdb::EmptyFuture f2 = tr.on_error(err);
|
fdb::EmptyFuture f2 = tr.on_error(err);
|
||||||
fdb_check(wait_future(f2));
|
fdb_check(wait_future(f2));
|
||||||
continue;
|
continue;
|
||||||
|
@ -969,7 +980,8 @@ TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_ADD") {
|
||||||
auto value = get_value(key("foo"), /* snapshot */ false, {});
|
auto value = get_value(key("foo"), /* snapshot */ false, {});
|
||||||
REQUIRE(value.has_value());
|
REQUIRE(value.has_value());
|
||||||
CHECK(value->size() == 1);
|
CHECK(value->size() == 1);
|
||||||
CHECK(value->data()[0] == 'b'); // incrementing 'a' results in 'b'
|
CHECK(uint8_t(value->data()[0]) > 0);
|
||||||
|
CHECK(uint8_t(value->data()[0]) <= potentialCommitCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_BIT_AND") {
|
TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_BIT_AND") {
|
||||||
|
@ -1139,14 +1151,19 @@ TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_BIT_XOR") {
|
||||||
|
|
||||||
fdb::Transaction tr(db);
|
fdb::Transaction tr(db);
|
||||||
char param[] = { 'a', 'd' };
|
char param[] = { 'a', 'd' };
|
||||||
|
int potentialCommitCount = 0;
|
||||||
while (1) {
|
while (1) {
|
||||||
tr.atomic_op(key("foo"), (const uint8_t*)"b", 1, FDB_MUTATION_TYPE_BIT_XOR);
|
tr.atomic_op(key("foo"), (const uint8_t*)"b", 1, FDB_MUTATION_TYPE_BIT_XOR);
|
||||||
tr.atomic_op(key("bar"), (const uint8_t*)param, 2, FDB_MUTATION_TYPE_BIT_XOR);
|
tr.atomic_op(key("bar"), (const uint8_t*)param, 2, FDB_MUTATION_TYPE_BIT_XOR);
|
||||||
tr.atomic_op(key("baz"), (const uint8_t*)"d", 1, FDB_MUTATION_TYPE_BIT_XOR);
|
tr.atomic_op(key("baz"), (const uint8_t*)"d", 1, FDB_MUTATION_TYPE_BIT_XOR);
|
||||||
|
++potentialCommitCount;
|
||||||
fdb::EmptyFuture f1 = tr.commit();
|
fdb::EmptyFuture f1 = tr.commit();
|
||||||
|
|
||||||
fdb_error_t err = wait_future(f1);
|
fdb_error_t err = wait_future(f1);
|
||||||
if (err) {
|
if (err) {
|
||||||
|
if (fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE_NOT_COMMITTED, err)) {
|
||||||
|
--potentialCommitCount;
|
||||||
|
}
|
||||||
fdb::EmptyFuture f2 = tr.on_error(err);
|
fdb::EmptyFuture f2 = tr.on_error(err);
|
||||||
fdb_check(wait_future(f2));
|
fdb_check(wait_future(f2));
|
||||||
continue;
|
continue;
|
||||||
|
@ -1154,6 +1171,11 @@ TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_BIT_XOR") {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (potentialCommitCount != 1) {
|
||||||
|
MESSAGE("Transaction may not have committed exactly once. Suppressing assertions");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
auto value = get_value(key("foo"), /* snapshot */ false, {});
|
auto value = get_value(key("foo"), /* snapshot */ false, {});
|
||||||
REQUIRE(value.has_value());
|
REQUIRE(value.has_value());
|
||||||
CHECK(value->size() == 1);
|
CHECK(value->size() == 1);
|
||||||
|
@ -1204,13 +1226,18 @@ TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_APPEND_IF_FITS") {
|
||||||
insert_data(db, create_data({ { "foo", "f" } }));
|
insert_data(db, create_data({ { "foo", "f" } }));
|
||||||
|
|
||||||
fdb::Transaction tr(db);
|
fdb::Transaction tr(db);
|
||||||
|
int potentialCommitCount = 0;
|
||||||
while (1) {
|
while (1) {
|
||||||
tr.atomic_op(key("foo"), (const uint8_t*)"db", 2, FDB_MUTATION_TYPE_APPEND_IF_FITS);
|
tr.atomic_op(key("foo"), (const uint8_t*)"db", 2, FDB_MUTATION_TYPE_APPEND_IF_FITS);
|
||||||
tr.atomic_op(key("bar"), (const uint8_t*)"foundation", 10, FDB_MUTATION_TYPE_APPEND_IF_FITS);
|
tr.atomic_op(key("bar"), (const uint8_t*)"foundation", 10, FDB_MUTATION_TYPE_APPEND_IF_FITS);
|
||||||
|
++potentialCommitCount;
|
||||||
fdb::EmptyFuture f1 = tr.commit();
|
fdb::EmptyFuture f1 = tr.commit();
|
||||||
|
|
||||||
fdb_error_t err = wait_future(f1);
|
fdb_error_t err = wait_future(f1);
|
||||||
if (err) {
|
if (err) {
|
||||||
|
if (fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE_NOT_COMMITTED, err)) {
|
||||||
|
--potentialCommitCount;
|
||||||
|
}
|
||||||
fdb::EmptyFuture f2 = tr.on_error(err);
|
fdb::EmptyFuture f2 = tr.on_error(err);
|
||||||
fdb_check(wait_future(f2));
|
fdb_check(wait_future(f2));
|
||||||
continue;
|
continue;
|
||||||
|
@ -1218,13 +1245,18 @@ TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_APPEND_IF_FITS") {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto value = get_value(key("foo"), /* snapshot */ false, {});
|
auto value_foo = get_value(key("foo"), /* snapshot */ false, {});
|
||||||
REQUIRE(value.has_value());
|
REQUIRE(value_foo.has_value());
|
||||||
CHECK(value->compare("fdb") == 0);
|
|
||||||
|
|
||||||
value = get_value(key("bar"), /* snapshot */ false, {});
|
auto value_bar = get_value(key("bar"), /* snapshot */ false, {});
|
||||||
REQUIRE(value.has_value());
|
REQUIRE(value_bar.has_value());
|
||||||
CHECK(value->compare("foundation") == 0);
|
|
||||||
|
if (potentialCommitCount != 1) {
|
||||||
|
MESSAGE("Transaction may not have committed exactly once. Suppressing assertions");
|
||||||
|
} else {
|
||||||
|
CHECK(value_foo.value() == "fdb");
|
||||||
|
CHECK(value_bar.value() == "foundation");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_MAX") {
|
TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_MAX") {
|
||||||
|
@ -1576,7 +1608,7 @@ TEST_CASE("fdb_transaction_watch max watches") {
|
||||||
fdb_check(f1.set_callback(
|
fdb_check(f1.set_callback(
|
||||||
+[](FDBFuture* f, void* param) {
|
+[](FDBFuture* f, void* param) {
|
||||||
fdb_error_t err = fdb_future_get_error(f);
|
fdb_error_t err = fdb_future_get_error(f);
|
||||||
if (err != 1101) { // operation_cancelled
|
if (err != /*operation_cancelled*/ 1101 && !fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE, err)) {
|
||||||
CHECK(err == 1032); // too_many_watches
|
CHECK(err == 1032); // too_many_watches
|
||||||
}
|
}
|
||||||
auto* event = static_cast<std::shared_ptr<FdbEvent>*>(param);
|
auto* event = static_cast<std::shared_ptr<FdbEvent>*>(param);
|
||||||
|
@ -1587,7 +1619,7 @@ TEST_CASE("fdb_transaction_watch max watches") {
|
||||||
fdb_check(f2.set_callback(
|
fdb_check(f2.set_callback(
|
||||||
+[](FDBFuture* f, void* param) {
|
+[](FDBFuture* f, void* param) {
|
||||||
fdb_error_t err = fdb_future_get_error(f);
|
fdb_error_t err = fdb_future_get_error(f);
|
||||||
if (err != 1101) { // operation_cancelled
|
if (err != /*operation_cancelled*/ 1101 && !fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE, err)) {
|
||||||
CHECK(err == 1032); // too_many_watches
|
CHECK(err == 1032); // too_many_watches
|
||||||
}
|
}
|
||||||
auto* event = static_cast<std::shared_ptr<FdbEvent>*>(param);
|
auto* event = static_cast<std::shared_ptr<FdbEvent>*>(param);
|
||||||
|
@ -1598,7 +1630,7 @@ TEST_CASE("fdb_transaction_watch max watches") {
|
||||||
fdb_check(f3.set_callback(
|
fdb_check(f3.set_callback(
|
||||||
+[](FDBFuture* f, void* param) {
|
+[](FDBFuture* f, void* param) {
|
||||||
fdb_error_t err = fdb_future_get_error(f);
|
fdb_error_t err = fdb_future_get_error(f);
|
||||||
if (err != 1101) { // operation_cancelled
|
if (err != /*operation_cancelled*/ 1101 && !fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE, err)) {
|
||||||
CHECK(err == 1032); // too_many_watches
|
CHECK(err == 1032); // too_many_watches
|
||||||
}
|
}
|
||||||
auto* event = static_cast<std::shared_ptr<FdbEvent>*>(param);
|
auto* event = static_cast<std::shared_ptr<FdbEvent>*>(param);
|
||||||
|
@ -1609,7 +1641,7 @@ TEST_CASE("fdb_transaction_watch max watches") {
|
||||||
fdb_check(f4.set_callback(
|
fdb_check(f4.set_callback(
|
||||||
+[](FDBFuture* f, void* param) {
|
+[](FDBFuture* f, void* param) {
|
||||||
fdb_error_t err = fdb_future_get_error(f);
|
fdb_error_t err = fdb_future_get_error(f);
|
||||||
if (err != 1101) { // operation_cancelled
|
if (err != /*operation_cancelled*/ 1101 && !fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE, err)) {
|
||||||
CHECK(err == 1032); // too_many_watches
|
CHECK(err == 1032); // too_many_watches
|
||||||
}
|
}
|
||||||
auto* event = static_cast<std::shared_ptr<FdbEvent>*>(param);
|
auto* event = static_cast<std::shared_ptr<FdbEvent>*>(param);
|
||||||
|
@ -1671,7 +1703,7 @@ TEST_CASE("fdb_transaction_cancel") {
|
||||||
// ... until the transaction has been reset.
|
// ... until the transaction has been reset.
|
||||||
tr.reset();
|
tr.reset();
|
||||||
fdb::ValueFuture f2 = tr.get("foo", /* snapshot */ false);
|
fdb::ValueFuture f2 = tr.get("foo", /* snapshot */ false);
|
||||||
fdb_check(wait_future(f2));
|
CHECK(wait_future(f2) != 1025); // transaction_cancelled
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("fdb_transaction_add_conflict_range") {
|
TEST_CASE("fdb_transaction_add_conflict_range") {
|
||||||
|
@ -2146,22 +2178,29 @@ TEST_CASE("monitor_network_busyness") {
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
if (argc != 3 && argc != 4) {
|
if (argc < 3) {
|
||||||
std::cout << "Unit tests for the FoundationDB C API.\n"
|
std::cout << "Unit tests for the FoundationDB C API.\n"
|
||||||
<< "Usage: fdb_c_unit_tests /path/to/cluster_file key_prefix [externalClient]" << std::endl;
|
<< "Usage: fdb_c_unit_tests /path/to/cluster_file key_prefix [externalClient] [doctest args]"
|
||||||
|
<< std::endl;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
fdb_check(fdb_select_api_version(710));
|
fdb_check(fdb_select_api_version(710));
|
||||||
if (argc == 4) {
|
if (argc >= 4) {
|
||||||
std::string externalClientLibrary = argv[3];
|
std::string externalClientLibrary = argv[3];
|
||||||
fdb_check(fdb_network_set_option(
|
if (externalClientLibrary.substr(0, 2) != "--") {
|
||||||
FDBNetworkOption::FDB_NET_OPTION_DISABLE_LOCAL_CLIENT, reinterpret_cast<const uint8_t*>(""), 0));
|
fdb_check(fdb_network_set_option(
|
||||||
fdb_check(fdb_network_set_option(FDBNetworkOption::FDB_NET_OPTION_EXTERNAL_CLIENT_LIBRARY,
|
FDBNetworkOption::FDB_NET_OPTION_DISABLE_LOCAL_CLIENT, reinterpret_cast<const uint8_t*>(""), 0));
|
||||||
reinterpret_cast<const uint8_t*>(externalClientLibrary.c_str()),
|
fdb_check(fdb_network_set_option(FDBNetworkOption::FDB_NET_OPTION_EXTERNAL_CLIENT_LIBRARY,
|
||||||
externalClientLibrary.size()));
|
reinterpret_cast<const uint8_t*>(externalClientLibrary.c_str()),
|
||||||
|
externalClientLibrary.size()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* fdb_check(fdb_network_set_option( */
|
||||||
|
/* FDBNetworkOption::FDB_NET_OPTION_CLIENT_BUGGIFY_ENABLE, reinterpret_cast<const uint8_t*>(""), 0)); */
|
||||||
|
|
||||||
doctest::Context context;
|
doctest::Context context;
|
||||||
|
context.applyCommandLine(argc, argv);
|
||||||
|
|
||||||
fdb_check(fdb_setup_network());
|
fdb_check(fdb_setup_network());
|
||||||
std::thread network_thread{ &fdb_run_network };
|
std::thread network_thread{ &fdb_run_network };
|
||||||
|
|
|
@ -0,0 +1,118 @@
|
||||||
|
/*
|
||||||
|
* unit_tests_header_510.cpp
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2021 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Unit tests for the FoundationDB C API, at api header version 510
|
||||||
|
|
||||||
|
#include "fdb_c_options.g.h"
|
||||||
|
#include <thread>
|
||||||
|
|
||||||
|
#define FDB_API_VERSION 510
|
||||||
|
static_assert(FDB_API_VERSION == 510, "Don't change this! This test intentionally tests an old api header version");
|
||||||
|
|
||||||
|
#include <foundationdb/fdb_c.h>
|
||||||
|
|
||||||
|
#define DOCTEST_CONFIG_IMPLEMENT
|
||||||
|
#include "doctest.h"
|
||||||
|
|
||||||
|
#include "flow/config.h"
|
||||||
|
|
||||||
|
void fdb_check(fdb_error_t e) {
|
||||||
|
if (e) {
|
||||||
|
std::cerr << fdb_get_error(e) << std::endl;
|
||||||
|
std::abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string clusterFilePath;
|
||||||
|
std::string prefix;
|
||||||
|
|
||||||
|
FDBDatabase* db;
|
||||||
|
|
||||||
|
struct Future {
|
||||||
|
FDBFuture* f = nullptr;
|
||||||
|
Future() = default;
|
||||||
|
explicit Future(FDBFuture* f) : f(f) {}
|
||||||
|
~Future() {
|
||||||
|
if (f)
|
||||||
|
fdb_future_destroy(f);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Transaction {
|
||||||
|
FDBTransaction* tr = nullptr;
|
||||||
|
Transaction() = default;
|
||||||
|
explicit Transaction(FDBTransaction* tr) : tr(tr) {}
|
||||||
|
~Transaction() {
|
||||||
|
if (tr)
|
||||||
|
fdb_transaction_destroy(tr);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// TODO add more tests. The motivation for this test for now is to test the
|
||||||
|
// assembly code that handles emulating older api versions, but there's no
|
||||||
|
// reason why this shouldn't also test api version 510 specific behavior.
|
||||||
|
|
||||||
|
TEST_CASE("GRV") {
|
||||||
|
Transaction tr;
|
||||||
|
fdb_check(fdb_database_create_transaction(db, &tr.tr));
|
||||||
|
Future grv{ fdb_transaction_get_read_version(tr.tr) };
|
||||||
|
fdb_check(fdb_future_block_until_ready(grv.f));
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
if (argc < 3) {
|
||||||
|
std::cout << "Unit tests for the FoundationDB C API.\n"
|
||||||
|
<< "Usage: " << argv[0] << " /path/to/cluster_file key_prefix [doctest args]" << std::endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
fdb_check(fdb_select_api_version(FDB_API_VERSION));
|
||||||
|
|
||||||
|
doctest::Context context;
|
||||||
|
context.applyCommandLine(argc, argv);
|
||||||
|
|
||||||
|
fdb_check(fdb_setup_network());
|
||||||
|
std::thread network_thread{ &fdb_run_network };
|
||||||
|
|
||||||
|
{
|
||||||
|
FDBCluster* cluster;
|
||||||
|
Future clusterFuture{ fdb_create_cluster(argv[1]) };
|
||||||
|
fdb_check(fdb_future_block_until_ready(clusterFuture.f));
|
||||||
|
fdb_check(fdb_future_get_cluster(clusterFuture.f, &cluster));
|
||||||
|
Future databaseFuture{ fdb_cluster_create_database(cluster, (const uint8_t*)"DB", 2) };
|
||||||
|
fdb_check(fdb_future_block_until_ready(databaseFuture.f));
|
||||||
|
fdb_check(fdb_future_get_database(databaseFuture.f, &db));
|
||||||
|
fdb_cluster_destroy(cluster);
|
||||||
|
}
|
||||||
|
|
||||||
|
clusterFilePath = std::string(argv[1]);
|
||||||
|
prefix = argv[2];
|
||||||
|
int res = context.run();
|
||||||
|
fdb_database_destroy(db);
|
||||||
|
|
||||||
|
if (context.shouldExit()) {
|
||||||
|
fdb_check(fdb_stop_network());
|
||||||
|
network_thread.join();
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
fdb_check(fdb_stop_network());
|
||||||
|
network_thread.join();
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
|
@ -138,6 +138,11 @@ else()
|
||||||
add_library(fdb_java SHARED fdbJNI.cpp)
|
add_library(fdb_java SHARED fdbJNI.cpp)
|
||||||
add_library(java_workloads SHARED JavaWorkload.cpp)
|
add_library(java_workloads SHARED JavaWorkload.cpp)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (NOT WIN32 AND NOT APPLE AND NOT OPEN_FOR_IDE)
|
||||||
|
target_link_options(java_workloads PRIVATE "LINKER:--version-script=${CMAKE_SOURCE_DIR}/bindings/c/external_workload.map,-z,nodelete")
|
||||||
|
endif()
|
||||||
|
|
||||||
target_include_directories(fdb_java PRIVATE ${JNI_INCLUDE_DIRS})
|
target_include_directories(fdb_java PRIVATE ${JNI_INCLUDE_DIRS})
|
||||||
# libfdb_java.so is loaded by fdb-java.jar and doesn't need to depened on jvm shared libraries.
|
# libfdb_java.so is loaded by fdb-java.jar and doesn't need to depened on jvm shared libraries.
|
||||||
target_link_libraries(fdb_java PRIVATE fdb_c)
|
target_link_libraries(fdb_java PRIVATE fdb_c)
|
||||||
|
|
|
@ -74,3 +74,12 @@ add_custom_command(OUTPUT ${package_file}
|
||||||
add_custom_target(python_package DEPENDS ${package_file})
|
add_custom_target(python_package DEPENDS ${package_file})
|
||||||
add_dependencies(python_package python_binding)
|
add_dependencies(python_package python_binding)
|
||||||
add_dependencies(packages python_package)
|
add_dependencies(packages python_package)
|
||||||
|
|
||||||
|
if (NOT WIN32 AND NOT OPEN_FOR_IDE)
|
||||||
|
add_fdbclient_test(
|
||||||
|
NAME fdbcli_tests
|
||||||
|
COMMAND ${CMAKE_SOURCE_DIR}/bindings/python/tests/fdbcli_tests.py
|
||||||
|
${CMAKE_BINARY_DIR}/bin/fdbcli
|
||||||
|
@CLUSTER_FILE@
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
|
@ -0,0 +1,93 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import subprocess
|
||||||
|
import logging
|
||||||
|
import functools
|
||||||
|
|
||||||
|
def enable_logging(level=logging.ERROR):
|
||||||
|
"""Enable logging in the function with the specified logging level
|
||||||
|
|
||||||
|
Args:
|
||||||
|
level (logging.<level>, optional): logging level for the decorated function. Defaults to logging.ERROR.
|
||||||
|
"""
|
||||||
|
def func_decorator(func):
|
||||||
|
@functools.wraps(func)
|
||||||
|
def wrapper(*args,**kwargs):
|
||||||
|
# initialize logger
|
||||||
|
logger = logging.getLogger(func.__name__)
|
||||||
|
logger.setLevel(level)
|
||||||
|
# set logging format
|
||||||
|
handler = logging.StreamHandler()
|
||||||
|
handler_format = logging.Formatter('[%(asctime)s] - %(filename)s:%(lineno)d - %(levelname)s - %(name)s - %(message)s')
|
||||||
|
handler.setFormatter(handler_format)
|
||||||
|
handler.setLevel(level)
|
||||||
|
logger.addHandler(handler)
|
||||||
|
# pass the logger to the decorated function
|
||||||
|
result = func(logger, *args,**kwargs)
|
||||||
|
return result
|
||||||
|
return wrapper
|
||||||
|
return func_decorator
|
||||||
|
|
||||||
|
def run_fdbcli_command(*args):
|
||||||
|
"""run the fdbcli statement: fdbcli --exec '<arg1> <arg2> ... <argN>'.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
string: Console output from fdbcli
|
||||||
|
"""
|
||||||
|
commands = command_template + ["{}".format(' '.join(args))]
|
||||||
|
return subprocess.run(commands, stdout=subprocess.PIPE).stdout.decode('utf-8').strip()
|
||||||
|
|
||||||
|
@enable_logging()
|
||||||
|
def advanceversion(logger):
|
||||||
|
# get current read version
|
||||||
|
version1 = int(run_fdbcli_command('getversion'))
|
||||||
|
logger.debug("Read version: {}".format(version1))
|
||||||
|
# advance version to a much larger value compared to the current version
|
||||||
|
version2 = version1 * 10000
|
||||||
|
logger.debug("Advanced to version: " + str(version2))
|
||||||
|
run_fdbcli_command('advanceversion', str(version2))
|
||||||
|
# after running the advanceversion command,
|
||||||
|
# check the read version is advanced to the specified value
|
||||||
|
version3 = int(run_fdbcli_command('getversion'))
|
||||||
|
logger.debug("Read version: {}".format(version3))
|
||||||
|
assert version3 >= version2
|
||||||
|
# advance version to a smaller value compared to the current version
|
||||||
|
# this should be a no-op
|
||||||
|
run_fdbcli_command('advanceversion', str(version1))
|
||||||
|
# get the current version to make sure the version did not decrease
|
||||||
|
version4 = int(run_fdbcli_command('getversion'))
|
||||||
|
logger.debug("Read version: {}".format(version4))
|
||||||
|
assert version4 >= version3
|
||||||
|
|
||||||
|
@enable_logging()
|
||||||
|
def maintenance(logger):
|
||||||
|
# expected fdbcli output when running 'maintenance' while there's no ongoing maintenance
|
||||||
|
no_maintenance_output = 'No ongoing maintenance.'
|
||||||
|
output1 = run_fdbcli_command('maintenance')
|
||||||
|
assert output1 == no_maintenance_output
|
||||||
|
# set maintenance on a fake zone id for 10 seconds
|
||||||
|
run_fdbcli_command('maintenance', 'on', 'fake_zone_id', '10')
|
||||||
|
# show current maintenance status
|
||||||
|
output2 = run_fdbcli_command('maintenance')
|
||||||
|
logger.debug("Maintenance status: " + output2)
|
||||||
|
items = output2.split(' ')
|
||||||
|
# make sure this specific zone id is under maintenance
|
||||||
|
assert 'fake_zone_id' in items
|
||||||
|
logger.debug("Remaining time(seconds): " + items[-2])
|
||||||
|
assert 0 < int(items[-2]) < 10
|
||||||
|
# turn off maintenance
|
||||||
|
run_fdbcli_command('maintenance', 'off')
|
||||||
|
# check maintenance status
|
||||||
|
output3 = run_fdbcli_command('maintenance')
|
||||||
|
assert output3 == no_maintenance_output
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# fdbcli_tests.py <path_to_fdbcli_binary> <path_to_fdb_cluster_file>
|
||||||
|
assert len(sys.argv) == 3, "Please pass arguments: <path_to_fdbcli_binary> <path_to_fdb_cluster_file>"
|
||||||
|
# shell command template
|
||||||
|
command_template = [sys.argv[1], '-C', sys.argv[2], '--exec']
|
||||||
|
# tests for fdbcli commands
|
||||||
|
# assertions will fail if fdbcli does not work as expected
|
||||||
|
advanceversion()
|
||||||
|
maintenance()
|
|
@ -971,7 +971,7 @@ For example, you can change a process type or update coordinators by manipulatin
|
||||||
|
|
||||||
#. ``\xff\xff/configuration/process/class_type/<address> := <class_type>`` Read/write. Reading keys in the range will retrieve processes' class types. Setting keys in the range will update processes' class types. The process matching ``<address>`` will be assigned to the given class type if the commit is successful. The valid class types are ``storage``, ``transaction``, ``resolution``, etc. A full list of class type can be found via ``fdbcli`` command ``help setclass``. Clearing keys is forbidden in the range. Instead, you can set the type as ``default``, which will clear the assigned class type if existing. For more details, see help text of ``fdbcli`` command ``setclass``.
|
#. ``\xff\xff/configuration/process/class_type/<address> := <class_type>`` Read/write. Reading keys in the range will retrieve processes' class types. Setting keys in the range will update processes' class types. The process matching ``<address>`` will be assigned to the given class type if the commit is successful. The valid class types are ``storage``, ``transaction``, ``resolution``, etc. A full list of class type can be found via ``fdbcli`` command ``help setclass``. Clearing keys is forbidden in the range. Instead, you can set the type as ``default``, which will clear the assigned class type if existing. For more details, see help text of ``fdbcli`` command ``setclass``.
|
||||||
#. ``\xff\xff/configuration/process/class_source/<address> := <class_source>`` Read-only. Reading keys in the range will retrieve processes' class source. The class source is one of ``command_line``, ``configure_auto``, ``set_class`` and ``invalid``, indicating the source that the process's class type comes from.
|
#. ``\xff\xff/configuration/process/class_source/<address> := <class_source>`` Read-only. Reading keys in the range will retrieve processes' class source. The class source is one of ``command_line``, ``configure_auto``, ``set_class`` and ``invalid``, indicating the source that the process's class type comes from.
|
||||||
#. ``\xff\xff/configuration/coordinators/processes := <ip:port>,<ip:port>,...,<ip:port>`` Read/write. A single key, if read, will return a comma delimited string of coordinators's network addresses. Thus to provide a new set of cooridinators, set the key with a correct formatted string of new coordinators' network addresses. As there's always the need to have coordinators, clear on the key is forbidden and a transaction will fail with the ``special_keys_api_failure`` error if the clear is committed. For more details, see help text of ``fdbcli`` command ``coordinators``.
|
#. ``\xff\xff/configuration/coordinators/processes := <ip:port>,<ip:port>,...,<ip:port>`` Read/write. A single key, if read, will return a comma delimited string of coordinators' network addresses. Thus to provide a new set of cooridinators, set the key with a correct formatted string of new coordinators' network addresses. As there's always the need to have coordinators, clear on the key is forbidden and a transaction will fail with the ``special_keys_api_failure`` error if the clear is committed. For more details, see help text of ``fdbcli`` command ``coordinators``.
|
||||||
#. ``\xff\xff/configuration/coordinators/cluster_description := <new_description>`` Read/write. A single key, if read, will return the cluster description. Thus modifying the key will update the cluster decription. The new description needs to match ``[A-Za-z0-9_]+``, otherwise, the ``special_keys_api_failure`` error will be thrown. In addition, clear on the key is meaningless thus forbidden. For more details, see help text of ``fdbcli`` command ``coordinators``.
|
#. ``\xff\xff/configuration/coordinators/cluster_description := <new_description>`` Read/write. A single key, if read, will return the cluster description. Thus modifying the key will update the cluster decription. The new description needs to match ``[A-Za-z0-9_]+``, otherwise, the ``special_keys_api_failure`` error will be thrown. In addition, clear on the key is meaningless thus forbidden. For more details, see help text of ``fdbcli`` command ``coordinators``.
|
||||||
|
|
||||||
The ``<address>`` here is the network address of the corresponding process. Thus the general form is ``ip:port``.
|
The ``<address>`` here is the network address of the corresponding process. Thus the general form is ``ip:port``.
|
||||||
|
|
|
@ -121,6 +121,16 @@
|
||||||
"counter":0,
|
"counter":0,
|
||||||
"roughness":0.0
|
"roughness":0.0
|
||||||
},
|
},
|
||||||
|
"fetched_versions":{
|
||||||
|
"hz":0.0,
|
||||||
|
"counter":0,
|
||||||
|
"roughness":0.0
|
||||||
|
},
|
||||||
|
"fetches_from_logs":{
|
||||||
|
"hz":0.0,
|
||||||
|
"counter":0,
|
||||||
|
"roughness":0.0
|
||||||
|
},
|
||||||
"grv_latency_statistics":{ // GRV Latency metrics are grouped according to priority (currently batch or default).
|
"grv_latency_statistics":{ // GRV Latency metrics are grouped according to priority (currently batch or default).
|
||||||
"default":{
|
"default":{
|
||||||
"count":0,
|
"count":0,
|
||||||
|
@ -604,6 +614,10 @@
|
||||||
"data_distribution_disabled_for_rebalance":true,
|
"data_distribution_disabled_for_rebalance":true,
|
||||||
"data_distribution_disabled":true,
|
"data_distribution_disabled":true,
|
||||||
"active_primary_dc":"pv",
|
"active_primary_dc":"pv",
|
||||||
|
"bounce_impact":{
|
||||||
|
"can_clean_bounce":true,
|
||||||
|
"reason":""
|
||||||
|
},
|
||||||
"configuration":{
|
"configuration":{
|
||||||
"log_anti_quorum":0,
|
"log_anti_quorum":0,
|
||||||
"log_replicas":2,
|
"log_replicas":2,
|
||||||
|
@ -668,6 +682,16 @@
|
||||||
"ssd-rocksdb-experimental",
|
"ssd-rocksdb-experimental",
|
||||||
"memory"
|
"memory"
|
||||||
]},
|
]},
|
||||||
|
"tss_count":1,
|
||||||
|
"tss_storage_engine":{
|
||||||
|
"$enum":[
|
||||||
|
"ssd",
|
||||||
|
"ssd-1",
|
||||||
|
"ssd-2",
|
||||||
|
"ssd-redwood-experimental",
|
||||||
|
"ssd-rocksdb-experimental",
|
||||||
|
"memory"
|
||||||
|
]},
|
||||||
"coordinators_count":1,
|
"coordinators_count":1,
|
||||||
"excluded_servers":[
|
"excluded_servers":[
|
||||||
{
|
{
|
||||||
|
|
|
@ -3,16 +3,29 @@ Release Notes
|
||||||
#############
|
#############
|
||||||
|
|
||||||
|
|
||||||
|
6.3.14
|
||||||
|
======
|
||||||
|
* Fixed fdbbackup start command that automatically configures database with backup workers to only do so when using partitioned logs. `(PR #4863) <https://github.com/apple/foundationdb/pull/4863>`_
|
||||||
|
* Added ``cluster.bounce_impact`` section to status to report if there will be any extra effects when bouncing the cluster, and if so, the reason for those effects. `(PR #4770) <https://github.com/apple/foundationdb/pull/4770>`_
|
||||||
|
* Added ``fetched_versions`` to the storage metrics section of status to report how fast a storage server is catching up in versions. `(PR #4770) <https://github.com/apple/foundationdb/pull/4770>`_
|
||||||
|
* Added ``fetches_from_logs`` to the storage metrics section of status to report how frequently a storage server fetches updates from transaction logs. `(PR #4770) <https://github.com/apple/foundationdb/pull/4770>`_
|
||||||
|
* Added the ``bypass_unreadable`` transaction option which allows ``get`` operations to read from sections of keyspace that have become unreadable because of versionstamp operations. `(PR #4774) <https://github.com/apple/foundationdb/pull/4774>`_
|
||||||
|
* Fix several packaging issues. The osx package should now install successfully, and the structure of the RPM and DEB packages should match that of 6.2. `(PR #4810) <https://github.com/apple/foundationdb/pull/4810>`_
|
||||||
|
* Fix an accounting error that could potentially result in inaccuracies in priority busyness metrics. `(PR #4824) <https://github.com/apple/foundationdb/pull/4824>`_
|
||||||
|
|
||||||
6.3.13
|
6.3.13
|
||||||
======
|
======
|
||||||
|
* Added ``commit_batching_window_size`` to the proxy roles section of status to record statistics about commit batching window size on each proxy. `(PR #4736) <https://github.com/apple/foundationdb/pull/4736>`_
|
||||||
* The multi-version client now requires at most two client connections with version 6.2 or larger, regardless of how many external clients are configured. Clients older than 6.2 will continue to create an additional connection each. `(PR #4667) <https://github.com/apple/foundationdb/pull/4667>`_
|
* The multi-version client now requires at most two client connections with version 6.2 or larger, regardless of how many external clients are configured. Clients older than 6.2 will continue to create an additional connection each. `(PR #4667) <https://github.com/apple/foundationdb/pull/4667>`_
|
||||||
* Fix an accounting error that could potentially result in inaccuracies in priority busyness metrics. `(PR #4824) <https://github.com/apple/foundationdb/pull/4824>`_
|
|
||||||
|
|
||||||
6.3.12
|
6.3.12
|
||||||
======
|
======
|
||||||
* Change the default for --knob_tls_server_handshake_threads to 64. The previous was 1000. This avoids starting 1000 threads by default, but may adversely affect recovery time for large clusters using tls. Users with large tls clusters should consider explicitly setting this knob in their foundationdb.conf file. `(PR #4421) <https://github.com/apple/foundationdb/pull/4421>`_
|
* Change the default for --knob_tls_server_handshake_threads to 64. The previous was 1000. This avoids starting 1000 threads by default, but may adversely affect recovery time for large clusters using tls. Users with large tls clusters should consider explicitly setting this knob in their foundationdb.conf file. `(PR #4421) <https://github.com/apple/foundationdb/pull/4421>`_
|
||||||
* Fix accounting error that could cause commits to incorrectly fail with ``proxy_memory_limit_exceeded``. `(PR #4526) <https://github.com/apple/foundationdb/pull/4526>`_
|
* Fix accounting error that could cause commits to incorrectly fail with ``proxy_memory_limit_exceeded``. `(PR #4526) <https://github.com/apple/foundationdb/pull/4526>`_
|
||||||
* As an optimization, partial restore using target key ranges now filters backup log data prior to loading it into the database. `(PR #4554) <https://github.com/apple/foundationdb/pull/4554>`_
|
* As an optimization, partial restore using target key ranges now filters backup log data prior to loading it into the database. `(PR #4554) <https://github.com/apple/foundationdb/pull/4554>`_
|
||||||
|
* Fix fault tolerance calculation when there are no tLogs in LogSet. `(PR #4454) <https://github.com/apple/foundationdb/pull/4454>`_
|
||||||
|
* Change client's ``iteration_progression`` size defaults from 256 to 4096 bytes for better performance. `(PR #4416) <https://github.com/apple/foundationdb/pull/4416>`_
|
||||||
|
* Add the ability to instrument java driver actions, such as ``FDBTransaction`` and ``RangeQuery``. `(PR #4385) <https://github.com/apple/foundationdb/pull/4385>`_
|
||||||
|
|
||||||
6.3.11
|
6.3.11
|
||||||
======
|
======
|
||||||
|
|
|
@ -31,7 +31,9 @@ Fixes
|
||||||
Status
|
Status
|
||||||
------
|
------
|
||||||
* Added ``commit_batching_window_size`` to the proxy roles section of status to record statistics about commit batching window size on each proxy. `(PR #4735) <https://github.com/apple/foundationdb/pull/4735>`_
|
* Added ``commit_batching_window_size`` to the proxy roles section of status to record statistics about commit batching window size on each proxy. `(PR #4735) <https://github.com/apple/foundationdb/pull/4735>`_
|
||||||
|
* Added ``cluster.bounce_impact`` section to status to report if there will be any extra effects when bouncing the cluster, and if so, the reason for those effects. `(PR #4770) <https://github.com/apple/foundationdb/pull/4770>`_
|
||||||
|
* Added ``fetched_versions`` to the storage metrics section of status to report how fast a storage server is catching up in versions. `(PR #4770) <https://github.com/apple/foundationdb/pull/4770>`_
|
||||||
|
* Added ``fetches_from_logs`` to the storage metrics section of status to report how frequently a storage server fetches updates from transaction logs. `(PR #4770) <https://github.com/apple/foundationdb/pull/4770>`_
|
||||||
|
|
||||||
Bindings
|
Bindings
|
||||||
--------
|
--------
|
||||||
|
|
|
@ -3357,7 +3357,7 @@ int main(int argc, char* argv[]) {
|
||||||
deleteData = true;
|
deleteData = true;
|
||||||
break;
|
break;
|
||||||
case OPT_MIN_CLEANUP_SECONDS:
|
case OPT_MIN_CLEANUP_SECONDS:
|
||||||
knobs.push_back(std::make_pair("min_cleanup_seconds", args->OptionArg()));
|
knobs.emplace_back("min_cleanup_seconds", args->OptionArg());
|
||||||
break;
|
break;
|
||||||
case OPT_FORCE:
|
case OPT_FORCE:
|
||||||
forceAction = true;
|
forceAction = true;
|
||||||
|
@ -3452,7 +3452,7 @@ int main(int argc, char* argv[]) {
|
||||||
return FDB_EXIT_ERROR;
|
return FDB_EXIT_ERROR;
|
||||||
}
|
}
|
||||||
syn = syn.substr(7);
|
syn = syn.substr(7);
|
||||||
knobs.push_back(std::make_pair(syn, args->OptionArg()));
|
knobs.emplace_back(syn, args->OptionArg());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case OPT_BACKUPKEYS:
|
case OPT_BACKUPKEYS:
|
||||||
|
@ -4212,7 +4212,7 @@ int main(int argc, char* argv[]) {
|
||||||
s = s.substr(LiteralStringRef("struct ").size());
|
s = s.substr(LiteralStringRef("struct ").size());
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typeNames.push_back(std::make_pair(s, i->first));
|
typeNames.emplace_back(s, i->first);
|
||||||
}
|
}
|
||||||
std::sort(typeNames.begin(), typeNames.end());
|
std::sort(typeNames.begin(), typeNames.end());
|
||||||
for (int i = 0; i < typeNames.size(); i++) {
|
for (int i = 0; i < typeNames.size(); i++) {
|
||||||
|
|
|
@ -35,6 +35,7 @@
|
||||||
#include "fdbclient/CoordinationInterface.h"
|
#include "fdbclient/CoordinationInterface.h"
|
||||||
#include "fdbclient/FDBOptions.g.h"
|
#include "fdbclient/FDBOptions.g.h"
|
||||||
#include "fdbclient/TagThrottle.h"
|
#include "fdbclient/TagThrottle.h"
|
||||||
|
#include "fdbclient/Tuple.h"
|
||||||
|
|
||||||
#include "fdbclient/ThreadSafeTransaction.h"
|
#include "fdbclient/ThreadSafeTransaction.h"
|
||||||
#include "flow/DeterministicRandom.h"
|
#include "flow/DeterministicRandom.h"
|
||||||
|
@ -496,11 +497,15 @@ void initHelp() {
|
||||||
helpMap["configure"] = CommandHelp(
|
helpMap["configure"] = CommandHelp(
|
||||||
"configure [new] "
|
"configure [new] "
|
||||||
"<single|double|triple|three_data_hall|three_datacenter|ssd|memory|memory-radixtree-beta|proxies=<PROXIES>|"
|
"<single|double|triple|three_data_hall|three_datacenter|ssd|memory|memory-radixtree-beta|proxies=<PROXIES>|"
|
||||||
"commit_proxies=<COMMIT_PROXIES>|grv_proxies=<GRV_PROXIES>|logs=<LOGS>|resolvers=<RESOLVERS>>*",
|
"commit_proxies=<COMMIT_PROXIES>|grv_proxies=<GRV_PROXIES>|logs=<LOGS>|resolvers=<RESOLVERS>>*|"
|
||||||
|
"perpetual_storage_wiggle=<WIGGLE_SPEED>",
|
||||||
"change the database configuration",
|
"change the database configuration",
|
||||||
"The `new' option, if present, initializes a new database with the given configuration rather than changing "
|
"The `new' option, if present, initializes a new database with the given configuration rather than changing "
|
||||||
"the configuration of an existing one. When used, both a redundancy mode and a storage engine must be "
|
"the configuration of an existing one. When used, both a redundancy mode and a storage engine must be "
|
||||||
"specified.\n\nRedundancy mode:\n single - one copy of the data. Not fault tolerant.\n double - two copies "
|
"specified.\n\ntss: when enabled, configures the testing storage server for the cluster instead."
|
||||||
|
"When used with new to set up tss for the first time, it requires both a count and a storage engine."
|
||||||
|
"To disable the testing storage server, run \"configure tss count=0\"\n\n"
|
||||||
|
"Redundancy mode:\n single - one copy of the data. Not fault tolerant.\n double - two copies "
|
||||||
"of data (survive one failure).\n triple - three copies of data (survive two failures).\n three_data_hall - "
|
"of data (survive one failure).\n triple - three copies of data (survive two failures).\n three_data_hall - "
|
||||||
"See the Admin Guide.\n three_datacenter - See the Admin Guide.\n\nStorage engine:\n ssd - B-Tree storage "
|
"See the Admin Guide.\n three_datacenter - See the Admin Guide.\n\nStorage engine:\n ssd - B-Tree storage "
|
||||||
"engine optimized for solid state disks.\n memory - Durable in-memory storage engine for small "
|
"engine optimized for solid state disks.\n memory - Durable in-memory storage engine for small "
|
||||||
|
@ -517,8 +522,11 @@ void initHelp() {
|
||||||
"1, or set to -1 which restores the number of GRV proxies to the default value.\n\nlogs=<LOGS>: Sets the "
|
"1, or set to -1 which restores the number of GRV proxies to the default value.\n\nlogs=<LOGS>: Sets the "
|
||||||
"desired number of log servers in the cluster. Must be at least 1, or set to -1 which restores the number of "
|
"desired number of log servers in the cluster. Must be at least 1, or set to -1 which restores the number of "
|
||||||
"logs to the default value.\n\nresolvers=<RESOLVERS>: Sets the desired number of resolvers in the cluster. "
|
"logs to the default value.\n\nresolvers=<RESOLVERS>: Sets the desired number of resolvers in the cluster. "
|
||||||
"Must be at least 1, or set to -1 which restores the number of resolvers to the default value.\n\nSee the "
|
"Must be at least 1, or set to -1 which restores the number of resolvers to the default value.\n\n"
|
||||||
"FoundationDB Administration Guide for more information.");
|
"perpetual_storage_wiggle=<WIGGLE_SPEED>: Set the value speed (a.k.a., the number of processes that the Data "
|
||||||
|
"Distributor should wiggle at a time). Currently, only 0 and 1 are supported. The value 0 means to disable the "
|
||||||
|
"perpetual storage wiggle.\n\n"
|
||||||
|
"See the FoundationDB Administration Guide for more information.");
|
||||||
helpMap["fileconfigure"] = CommandHelp(
|
helpMap["fileconfigure"] = CommandHelp(
|
||||||
"fileconfigure [new] <FILENAME>",
|
"fileconfigure [new] <FILENAME>",
|
||||||
"change the database configuration from a file",
|
"change the database configuration from a file",
|
||||||
|
@ -1124,6 +1132,17 @@ void printStatus(StatusObjectReader statusObj,
|
||||||
if (statusObjConfig.get("log_routers", intVal))
|
if (statusObjConfig.get("log_routers", intVal))
|
||||||
outputString += format("\n Desired Log Routers - %d", intVal);
|
outputString += format("\n Desired Log Routers - %d", intVal);
|
||||||
|
|
||||||
|
if (statusObjConfig.get("tss_count", intVal) && intVal > 0) {
|
||||||
|
int activeTss = 0;
|
||||||
|
if (statusObjCluster.has("active_tss_count")) {
|
||||||
|
statusObjCluster.get("active_tss_count", activeTss);
|
||||||
|
}
|
||||||
|
outputString += format("\n TSS - %d/%d", activeTss, intVal);
|
||||||
|
|
||||||
|
if (statusObjConfig.get("tss_storage_engine", strVal))
|
||||||
|
outputString += format("\n TSS Storage Engine - %s", strVal.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
outputString += "\n Usable Regions - ";
|
outputString += "\n Usable Regions - ";
|
||||||
if (statusObjConfig.get("usable_regions", intVal)) {
|
if (statusObjConfig.get("usable_regions", intVal)) {
|
||||||
outputString += std::to_string(intVal);
|
outputString += std::to_string(intVal);
|
||||||
|
@ -2766,6 +2785,7 @@ void configureGenerator(const char* text, const char* line, std::vector<std::str
|
||||||
"grv_proxies=",
|
"grv_proxies=",
|
||||||
"logs=",
|
"logs=",
|
||||||
"resolvers=",
|
"resolvers=",
|
||||||
|
"perpetual_storage_wiggle=",
|
||||||
nullptr };
|
nullptr };
|
||||||
arrayGenerator(text, line, opts, lc);
|
arrayGenerator(text, line, opts, lc);
|
||||||
}
|
}
|
||||||
|
@ -3088,7 +3108,7 @@ struct CLIOptions {
|
||||||
return FDB_EXIT_ERROR;
|
return FDB_EXIT_ERROR;
|
||||||
}
|
}
|
||||||
syn = syn.substr(7);
|
syn = syn.substr(7);
|
||||||
knobs.push_back(std::make_pair(syn, args.OptionArg()));
|
knobs.emplace_back(syn, args.OptionArg());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case OPT_DEBUG_TLS:
|
case OPT_DEBUG_TLS:
|
||||||
|
|
|
@ -404,8 +404,14 @@ ACTOR Future<Void> readCommitted(Database cx,
|
||||||
state RangeResult values = wait(tr.getRange(begin, end, limits));
|
state RangeResult values = wait(tr.getRange(begin, end, limits));
|
||||||
|
|
||||||
// When this buggify line is enabled, if there are more than 1 result then use half of the results
|
// When this buggify line is enabled, if there are more than 1 result then use half of the results
|
||||||
|
// Copy the data instead of messing with the results directly to avoid TSS issues.
|
||||||
if (values.size() > 1 && BUGGIFY) {
|
if (values.size() > 1 && BUGGIFY) {
|
||||||
values.resize(values.arena(), values.size() / 2);
|
RangeResult copy;
|
||||||
|
// only copy first half of values into copy
|
||||||
|
for (int i = 0; i < values.size() / 2; i++) {
|
||||||
|
copy.push_back_deep(copy.arena(), values[i]);
|
||||||
|
}
|
||||||
|
values = copy;
|
||||||
values.more = true;
|
values.more = true;
|
||||||
// Half of the time wait for this tr to expire so that the next read is at a different version
|
// Half of the time wait for this tr to expire so that the next read is at a different version
|
||||||
if (deterministicRandom()->random01() < 0.5)
|
if (deterministicRandom()->random01() < 0.5)
|
||||||
|
@ -469,9 +475,15 @@ ACTOR Future<Void> readCommitted(Database cx,
|
||||||
|
|
||||||
state RangeResult rangevalue = wait(tr.getRange(nextKey, end, limits));
|
state RangeResult rangevalue = wait(tr.getRange(nextKey, end, limits));
|
||||||
|
|
||||||
// When this buggify line is enabled, if there are more than 1 result then use half of the results
|
// When this buggify line is enabled, if there are more than 1 result then use half of the results.
|
||||||
|
// Copy the data instead of messing with the results directly to avoid TSS issues.
|
||||||
if (rangevalue.size() > 1 && BUGGIFY) {
|
if (rangevalue.size() > 1 && BUGGIFY) {
|
||||||
rangevalue.resize(rangevalue.arena(), rangevalue.size() / 2);
|
RangeResult copy;
|
||||||
|
// only copy first half of rangevalue into copy
|
||||||
|
for (int i = 0; i < rangevalue.size() / 2; i++) {
|
||||||
|
copy.push_back_deep(copy.arena(), rangevalue[i]);
|
||||||
|
}
|
||||||
|
rangevalue = copy;
|
||||||
rangevalue.more = true;
|
rangevalue.more = true;
|
||||||
// Half of the time wait for this tr to expire so that the next read is at a different version
|
// Half of the time wait for this tr to expire so that the next read is at a different version
|
||||||
if (deterministicRandom()->random01() < 0.5)
|
if (deterministicRandom()->random01() < 0.5)
|
||||||
|
|
|
@ -57,7 +57,8 @@ set(FDBCLIENT_SRCS
|
||||||
SpecialKeySpace.actor.h
|
SpecialKeySpace.actor.h
|
||||||
ReadYourWrites.actor.cpp
|
ReadYourWrites.actor.cpp
|
||||||
ReadYourWrites.h
|
ReadYourWrites.h
|
||||||
RestoreWorkerInterface.actor.h
|
RestoreInterface.cpp
|
||||||
|
RestoreInterface.h
|
||||||
RunTransaction.actor.h
|
RunTransaction.actor.h
|
||||||
RYWIterator.cpp
|
RYWIterator.cpp
|
||||||
RYWIterator.h
|
RYWIterator.h
|
||||||
|
@ -68,6 +69,7 @@ set(FDBCLIENT_SRCS
|
||||||
Status.h
|
Status.h
|
||||||
StatusClient.actor.cpp
|
StatusClient.actor.cpp
|
||||||
StatusClient.h
|
StatusClient.h
|
||||||
|
StorageServerInterface.cpp
|
||||||
StorageServerInterface.h
|
StorageServerInterface.h
|
||||||
Subspace.cpp
|
Subspace.cpp
|
||||||
Subspace.h
|
Subspace.h
|
||||||
|
|
|
@ -29,7 +29,6 @@
|
||||||
#include "fdbclient/FDBTypes.h"
|
#include "fdbclient/FDBTypes.h"
|
||||||
#include "fdbclient/StorageServerInterface.h"
|
#include "fdbclient/StorageServerInterface.h"
|
||||||
#include "fdbclient/CommitTransaction.h"
|
#include "fdbclient/CommitTransaction.h"
|
||||||
#include "fdbserver/RatekeeperInterface.h"
|
|
||||||
#include "fdbclient/TagThrottle.h"
|
#include "fdbclient/TagThrottle.h"
|
||||||
#include "fdbclient/GlobalConfig.h"
|
#include "fdbclient/GlobalConfig.h"
|
||||||
|
|
||||||
|
@ -288,9 +287,12 @@ struct GetKeyServerLocationsReply {
|
||||||
Arena arena;
|
Arena arena;
|
||||||
std::vector<std::pair<KeyRangeRef, vector<StorageServerInterface>>> results;
|
std::vector<std::pair<KeyRangeRef, vector<StorageServerInterface>>> results;
|
||||||
|
|
||||||
|
// if any storage servers in results have a TSS pair, that mapping is in here
|
||||||
|
std::vector<std::pair<UID, StorageServerInterface>> resultsTssMapping;
|
||||||
|
|
||||||
template <class Ar>
|
template <class Ar>
|
||||||
void serialize(Ar& ar) {
|
void serialize(Ar& ar) {
|
||||||
serializer(ar, results, arena);
|
serializer(ar, results, resultsTssMapping, arena);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "fdbclient/FDBTypes.h"
|
#include "fdbclient/FDBTypes.h"
|
||||||
#include "fdbserver/Knobs.h"
|
#include "fdbclient/Knobs.h"
|
||||||
|
|
||||||
// The versioned message has wire format : -1, version, messages
|
// The versioned message has wire format : -1, version, messages
|
||||||
static const int32_t VERSION_HEADER = -1;
|
static const int32_t VERSION_HEADER = -1;
|
||||||
|
@ -95,7 +95,7 @@ struct MutationRef {
|
||||||
// Amplify atomicOp size to consider such extra workload.
|
// Amplify atomicOp size to consider such extra workload.
|
||||||
// A good value for FASTRESTORE_ATOMICOP_WEIGHT needs experimental evaluations.
|
// A good value for FASTRESTORE_ATOMICOP_WEIGHT needs experimental evaluations.
|
||||||
if (isAtomicOp()) {
|
if (isAtomicOp()) {
|
||||||
return totalSize() * SERVER_KNOBS->FASTRESTORE_ATOMICOP_WEIGHT;
|
return totalSize() * CLIENT_KNOBS->FASTRESTORE_ATOMICOP_WEIGHT;
|
||||||
} else {
|
} else {
|
||||||
return totalSize();
|
return totalSize();
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,12 +33,15 @@ const int MAX_CLUSTER_FILE_BYTES = 60000;
|
||||||
constexpr UID WLTOKEN_CLIENTLEADERREG_GETLEADER(-1, 2);
|
constexpr UID WLTOKEN_CLIENTLEADERREG_GETLEADER(-1, 2);
|
||||||
constexpr UID WLTOKEN_CLIENTLEADERREG_OPENDATABASE(-1, 3);
|
constexpr UID WLTOKEN_CLIENTLEADERREG_OPENDATABASE(-1, 3);
|
||||||
|
|
||||||
|
// the value of this endpoint should be stable and not change.
|
||||||
constexpr UID WLTOKEN_PROTOCOL_INFO(-1, 10);
|
constexpr UID WLTOKEN_PROTOCOL_INFO(-1, 10);
|
||||||
|
constexpr UID WLTOKEN_CLIENTLEADERREG_DESCRIPTOR_MUTABLE(-1, 11);
|
||||||
|
|
||||||
// The coordinator interface as exposed to clients
|
// well known endpoints published to the client.
|
||||||
struct ClientLeaderRegInterface {
|
struct ClientLeaderRegInterface {
|
||||||
RequestStream<struct GetLeaderRequest> getLeader;
|
RequestStream<struct GetLeaderRequest> getLeader;
|
||||||
RequestStream<struct OpenDatabaseCoordRequest> openDatabase;
|
RequestStream<struct OpenDatabaseCoordRequest> openDatabase;
|
||||||
|
RequestStream<struct CheckDescriptorMutableRequest> checkDescriptorMutable;
|
||||||
|
|
||||||
ClientLeaderRegInterface() {}
|
ClientLeaderRegInterface() {}
|
||||||
ClientLeaderRegInterface(NetworkAddress remote);
|
ClientLeaderRegInterface(NetworkAddress remote);
|
||||||
|
@ -236,4 +239,28 @@ struct ProtocolInfoRequest {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Returns true if the cluster descriptor may be modified.
|
||||||
|
struct CheckDescriptorMutableReply {
|
||||||
|
constexpr static FileIdentifier file_identifier = 7784299;
|
||||||
|
CheckDescriptorMutableReply() = default;
|
||||||
|
explicit CheckDescriptorMutableReply(bool isMutable) : isMutable(isMutable) {}
|
||||||
|
bool isMutable;
|
||||||
|
template <class Ar>
|
||||||
|
void serialize(Ar& ar) {
|
||||||
|
serializer(ar, isMutable);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Allows client to check if allowed to change the cluster descriptor.
|
||||||
|
struct CheckDescriptorMutableRequest {
|
||||||
|
constexpr static FileIdentifier file_identifier = 214729;
|
||||||
|
ReplyPromise<CheckDescriptorMutableReply> reply;
|
||||||
|
CheckDescriptorMutableRequest() {}
|
||||||
|
|
||||||
|
template <class Ar>
|
||||||
|
void serialize(Ar& ar) {
|
||||||
|
serializer(ar, reply);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -31,7 +31,8 @@ void DatabaseConfiguration::resetInternal() {
|
||||||
commitProxyCount = grvProxyCount = resolverCount = desiredTLogCount = tLogWriteAntiQuorum = tLogReplicationFactor =
|
commitProxyCount = grvProxyCount = resolverCount = desiredTLogCount = tLogWriteAntiQuorum = tLogReplicationFactor =
|
||||||
storageTeamSize = desiredLogRouterCount = -1;
|
storageTeamSize = desiredLogRouterCount = -1;
|
||||||
tLogVersion = TLogVersion::DEFAULT;
|
tLogVersion = TLogVersion::DEFAULT;
|
||||||
tLogDataStoreType = storageServerStoreType = KeyValueStoreType::END;
|
tLogDataStoreType = storageServerStoreType = testingStorageServerStoreType = KeyValueStoreType::END;
|
||||||
|
desiredTSSCount = 0;
|
||||||
tLogSpillType = TLogSpillType::DEFAULT;
|
tLogSpillType = TLogSpillType::DEFAULT;
|
||||||
autoCommitProxyCount = CLIENT_KNOBS->DEFAULT_AUTO_COMMIT_PROXIES;
|
autoCommitProxyCount = CLIENT_KNOBS->DEFAULT_AUTO_COMMIT_PROXIES;
|
||||||
autoGrvProxyCount = CLIENT_KNOBS->DEFAULT_AUTO_GRV_PROXIES;
|
autoGrvProxyCount = CLIENT_KNOBS->DEFAULT_AUTO_GRV_PROXIES;
|
||||||
|
@ -43,6 +44,7 @@ void DatabaseConfiguration::resetInternal() {
|
||||||
remoteDesiredTLogCount = -1;
|
remoteDesiredTLogCount = -1;
|
||||||
remoteTLogReplicationFactor = repopulateRegionAntiQuorum = 0;
|
remoteTLogReplicationFactor = repopulateRegionAntiQuorum = 0;
|
||||||
backupWorkerEnabled = false;
|
backupWorkerEnabled = false;
|
||||||
|
perpetualStorageWiggleSpeed = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void parse(int* i, ValueRef const& v) {
|
void parse(int* i, ValueRef const& v) {
|
||||||
|
@ -194,9 +196,9 @@ bool DatabaseConfiguration::isValid() const {
|
||||||
getDesiredRemoteLogs() >= 1 && remoteTLogReplicationFactor >= 0 && repopulateRegionAntiQuorum >= 0 &&
|
getDesiredRemoteLogs() >= 1 && remoteTLogReplicationFactor >= 0 && repopulateRegionAntiQuorum >= 0 &&
|
||||||
repopulateRegionAntiQuorum <= 1 && usableRegions >= 1 && usableRegions <= 2 && regions.size() <= 2 &&
|
repopulateRegionAntiQuorum <= 1 && usableRegions >= 1 && usableRegions <= 2 && regions.size() <= 2 &&
|
||||||
(usableRegions == 1 || regions.size() == 2) && (regions.size() == 0 || regions[0].priority >= 0) &&
|
(usableRegions == 1 || regions.size() == 2) && (regions.size() == 0 || regions[0].priority >= 0) &&
|
||||||
(regions.size() == 0 ||
|
(regions.size() == 0 || tLogPolicy->info() != "dcid^2 x zoneid^2 x 1") &&
|
||||||
tLogPolicy->info() !=
|
// We cannot specify regions with three_datacenter replication
|
||||||
"dcid^2 x zoneid^2 x 1"))) { // We cannot specify regions with three_datacenter replication
|
(perpetualStorageWiggleSpeed == 0 || perpetualStorageWiggleSpeed == 1))) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
std::set<Key> dcIds;
|
std::set<Key> dcIds;
|
||||||
|
@ -298,6 +300,25 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const {
|
||||||
result["storage_engine"] = "custom";
|
result["storage_engine"] = "custom";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (desiredTSSCount > 0) {
|
||||||
|
result["tss_count"] = desiredTSSCount;
|
||||||
|
if (testingStorageServerStoreType == KeyValueStoreType::SSD_BTREE_V1) {
|
||||||
|
result["tss_storage_engine"] = "ssd-1";
|
||||||
|
} else if (testingStorageServerStoreType == KeyValueStoreType::SSD_BTREE_V2) {
|
||||||
|
result["tss_storage_engine"] = "ssd-2";
|
||||||
|
} else if (testingStorageServerStoreType == KeyValueStoreType::SSD_REDWOOD_V1) {
|
||||||
|
result["tss_storage_engine"] = "ssd-redwood-experimental";
|
||||||
|
} else if (testingStorageServerStoreType == KeyValueStoreType::SSD_ROCKSDB_V1) {
|
||||||
|
result["tss_storage_engine"] = "ssd-rocksdb-experimental";
|
||||||
|
} else if (testingStorageServerStoreType == KeyValueStoreType::MEMORY_RADIXTREE) {
|
||||||
|
result["tss_storage_engine"] = "memory-radixtree-beta";
|
||||||
|
} else if (testingStorageServerStoreType == KeyValueStoreType::MEMORY) {
|
||||||
|
result["tss_storage_engine"] = "memory-2";
|
||||||
|
} else {
|
||||||
|
result["tss_storage_engine"] = "custom";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
result["log_spill"] = (int)tLogSpillType;
|
result["log_spill"] = (int)tLogSpillType;
|
||||||
|
|
||||||
if (remoteTLogReplicationFactor == 1) {
|
if (remoteTLogReplicationFactor == 1) {
|
||||||
|
@ -352,7 +373,7 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const {
|
||||||
}
|
}
|
||||||
|
|
||||||
result["backup_worker_enabled"] = (int32_t)backupWorkerEnabled;
|
result["backup_worker_enabled"] = (int32_t)backupWorkerEnabled;
|
||||||
|
result["perpetual_storage_wiggle"] = perpetualStorageWiggleSpeed;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -448,6 +469,8 @@ bool DatabaseConfiguration::setInternal(KeyRef key, ValueRef value) {
|
||||||
}
|
}
|
||||||
} else if (ck == LiteralStringRef("storage_replicas")) {
|
} else if (ck == LiteralStringRef("storage_replicas")) {
|
||||||
parse(&storageTeamSize, value);
|
parse(&storageTeamSize, value);
|
||||||
|
} else if (ck == LiteralStringRef("tss_count")) {
|
||||||
|
parse(&desiredTSSCount, value);
|
||||||
} else if (ck == LiteralStringRef("log_version")) {
|
} else if (ck == LiteralStringRef("log_version")) {
|
||||||
parse((&type), value);
|
parse((&type), value);
|
||||||
type = std::max((int)TLogVersion::MIN_RECRUITABLE, type);
|
type = std::max((int)TLogVersion::MIN_RECRUITABLE, type);
|
||||||
|
@ -470,6 +493,9 @@ bool DatabaseConfiguration::setInternal(KeyRef key, ValueRef value) {
|
||||||
} else if (ck == LiteralStringRef("storage_engine")) {
|
} else if (ck == LiteralStringRef("storage_engine")) {
|
||||||
parse((&type), value);
|
parse((&type), value);
|
||||||
storageServerStoreType = (KeyValueStoreType::StoreType)type;
|
storageServerStoreType = (KeyValueStoreType::StoreType)type;
|
||||||
|
} else if (ck == LiteralStringRef("tss_storage_engine")) {
|
||||||
|
parse((&type), value);
|
||||||
|
testingStorageServerStoreType = (KeyValueStoreType::StoreType)type;
|
||||||
} else if (ck == LiteralStringRef("auto_commit_proxies")) {
|
} else if (ck == LiteralStringRef("auto_commit_proxies")) {
|
||||||
parse(&autoCommitProxyCount, value);
|
parse(&autoCommitProxyCount, value);
|
||||||
} else if (ck == LiteralStringRef("auto_grv_proxies")) {
|
} else if (ck == LiteralStringRef("auto_grv_proxies")) {
|
||||||
|
@ -499,6 +525,8 @@ bool DatabaseConfiguration::setInternal(KeyRef key, ValueRef value) {
|
||||||
parse(&repopulateRegionAntiQuorum, value);
|
parse(&repopulateRegionAntiQuorum, value);
|
||||||
} else if (ck == LiteralStringRef("regions")) {
|
} else if (ck == LiteralStringRef("regions")) {
|
||||||
parse(®ions, value);
|
parse(®ions, value);
|
||||||
|
} else if (ck == LiteralStringRef("perpetual_storage_wiggle")) {
|
||||||
|
parse(&perpetualStorageWiggleSpeed, value);
|
||||||
} else {
|
} else {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -225,6 +225,10 @@ struct DatabaseConfiguration {
|
||||||
int32_t storageTeamSize;
|
int32_t storageTeamSize;
|
||||||
KeyValueStoreType storageServerStoreType;
|
KeyValueStoreType storageServerStoreType;
|
||||||
|
|
||||||
|
// Testing StorageServers
|
||||||
|
int32_t desiredTSSCount;
|
||||||
|
KeyValueStoreType testingStorageServerStoreType;
|
||||||
|
|
||||||
// Remote TLogs
|
// Remote TLogs
|
||||||
int32_t desiredLogRouterCount;
|
int32_t desiredLogRouterCount;
|
||||||
int32_t remoteDesiredTLogCount;
|
int32_t remoteDesiredTLogCount;
|
||||||
|
@ -239,6 +243,9 @@ struct DatabaseConfiguration {
|
||||||
int32_t repopulateRegionAntiQuorum;
|
int32_t repopulateRegionAntiQuorum;
|
||||||
std::vector<RegionInfo> regions;
|
std::vector<RegionInfo> regions;
|
||||||
|
|
||||||
|
// Perpetual Storage Setting
|
||||||
|
int32_t perpetualStorageWiggleSpeed;
|
||||||
|
|
||||||
// Excluded servers (no state should be here)
|
// Excluded servers (no state should be here)
|
||||||
bool isExcludedServer(NetworkAddressList) const;
|
bool isExcludedServer(NetworkAddressList) const;
|
||||||
std::set<AddressExclusion> getExcludedServers() const;
|
std::set<AddressExclusion> getExcludedServers() const;
|
||||||
|
|
|
@ -273,6 +273,9 @@ public:
|
||||||
Reference<AsyncVar<Reference<ClusterConnectionFile>>> connectionFile;
|
Reference<AsyncVar<Reference<ClusterConnectionFile>>> connectionFile;
|
||||||
AsyncTrigger proxiesChangeTrigger;
|
AsyncTrigger proxiesChangeTrigger;
|
||||||
Future<Void> monitorProxiesInfoChange;
|
Future<Void> monitorProxiesInfoChange;
|
||||||
|
Future<Void> monitorTssInfoChange;
|
||||||
|
Future<Void> tssMismatchHandler;
|
||||||
|
PromiseStream<UID> tssMismatchStream;
|
||||||
Reference<CommitProxyInfo> commitProxies;
|
Reference<CommitProxyInfo> commitProxies;
|
||||||
Reference<GrvProxyInfo> grvProxies;
|
Reference<GrvProxyInfo> grvProxies;
|
||||||
bool proxyProvisional; // Provisional commit proxy and grv proxy are used at the same time.
|
bool proxyProvisional; // Provisional commit proxy and grv proxy are used at the same time.
|
||||||
|
@ -320,6 +323,11 @@ public:
|
||||||
|
|
||||||
std::map<UID, StorageServerInfo*> server_interf;
|
std::map<UID, StorageServerInfo*> server_interf;
|
||||||
|
|
||||||
|
// map from ssid -> tss interface
|
||||||
|
std::unordered_map<UID, StorageServerInterface> tssMapping;
|
||||||
|
// map from tssid -> metrics for that tss pair
|
||||||
|
std::unordered_map<UID, Reference<TSSMetrics>> tssMetrics;
|
||||||
|
|
||||||
UID dbId;
|
UID dbId;
|
||||||
bool internal; // Only contexts created through the C client and fdbcli are non-internal
|
bool internal; // Only contexts created through the C client and fdbcli are non-internal
|
||||||
|
|
||||||
|
@ -419,6 +427,14 @@ public:
|
||||||
static bool debugUseTags;
|
static bool debugUseTags;
|
||||||
static const std::vector<std::string> debugTransactionTagChoices;
|
static const std::vector<std::string> debugTransactionTagChoices;
|
||||||
std::unordered_map<KeyRef, Reference<WatchMetadata>> watchMap;
|
std::unordered_map<KeyRef, Reference<WatchMetadata>> watchMap;
|
||||||
|
|
||||||
|
// Adds or updates the specified (SS, TSS) pair in the TSS mapping (if not already present).
|
||||||
|
// Requests to the storage server will be duplicated to the TSS.
|
||||||
|
void addTssMapping(StorageServerInterface const& ssi, StorageServerInterface const& tssi);
|
||||||
|
|
||||||
|
// Removes the storage server and its TSS pair from the TSS mapping (if present).
|
||||||
|
// Requests to the storage server will no longer be duplicated to its pair TSS.
|
||||||
|
void removeTssMapping(StorageServerInterface const& ssi);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -95,7 +95,7 @@ public:
|
||||||
if (itr != optionsIndexMap.end()) {
|
if (itr != optionsIndexMap.end()) {
|
||||||
options.erase(itr->second);
|
options.erase(itr->second);
|
||||||
}
|
}
|
||||||
options.push_back(std::make_pair(option, value));
|
options.emplace_back(option, value);
|
||||||
optionsIndexMap[option] = --options.end();
|
optionsIndexMap[option] = --options.end();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -107,4 +107,4 @@ public:
|
||||||
type::optionInfo.insert( \
|
type::optionInfo.insert( \
|
||||||
var, FDBOptionInfo(name, comment, parameterComment, hasParameter, hidden, persistent, defaultFor));
|
var, FDBOptionInfo(name, comment, parameterComment, hasParameter, hidden, persistent, defaultFor));
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -483,7 +483,9 @@ inline Key keyAfter(const KeyRef& key) {
|
||||||
|
|
||||||
Standalone<StringRef> r;
|
Standalone<StringRef> r;
|
||||||
uint8_t* s = new (r.arena()) uint8_t[key.size() + 1];
|
uint8_t* s = new (r.arena()) uint8_t[key.size() + 1];
|
||||||
memcpy(s, key.begin(), key.size());
|
if (key.size() > 0) {
|
||||||
|
memcpy(s, key.begin(), key.size());
|
||||||
|
}
|
||||||
s[key.size()] = 0;
|
s[key.size()] = 0;
|
||||||
((StringRef&)r) = StringRef(s, key.size() + 1);
|
((StringRef&)r) = StringRef(s, key.size() + 1);
|
||||||
return r;
|
return r;
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
#include "fdbclient/DatabaseContext.h"
|
#include "fdbclient/DatabaseContext.h"
|
||||||
#include "fdbclient/Knobs.h"
|
#include "fdbclient/Knobs.h"
|
||||||
#include "fdbclient/ManagementAPI.actor.h"
|
#include "fdbclient/ManagementAPI.actor.h"
|
||||||
|
#include "fdbclient/RestoreInterface.h"
|
||||||
#include "fdbclient/Status.h"
|
#include "fdbclient/Status.h"
|
||||||
#include "fdbclient/SystemData.h"
|
#include "fdbclient/SystemData.h"
|
||||||
#include "fdbclient/KeyBackedTypes.h"
|
#include "fdbclient/KeyBackedTypes.h"
|
||||||
|
@ -2705,13 +2706,17 @@ struct StartFullBackupTaskFunc : BackupTaskFuncBase {
|
||||||
wait(checkTaskVersion(cx, task, StartFullBackupTaskFunc::name, StartFullBackupTaskFunc::version));
|
wait(checkTaskVersion(cx, task, StartFullBackupTaskFunc::name, StartFullBackupTaskFunc::version));
|
||||||
|
|
||||||
state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
|
state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
|
||||||
|
state BackupConfig config(task);
|
||||||
|
state Future<Optional<bool>> partitionedLog;
|
||||||
loop {
|
loop {
|
||||||
try {
|
try {
|
||||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||||
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
|
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||||
Version startVersion = wait(tr->getReadVersion());
|
partitionedLog = config.partitionedLogEnabled().get(tr);
|
||||||
|
state Future<Version> startVersionFuture = tr->getReadVersion();
|
||||||
|
wait(success(partitionedLog) && success(startVersionFuture));
|
||||||
|
|
||||||
Params.beginVersion().set(task, startVersion);
|
Params.beginVersion().set(task, startVersionFuture.get());
|
||||||
break;
|
break;
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
wait(tr->onError(e));
|
wait(tr->onError(e));
|
||||||
|
@ -2721,14 +2726,15 @@ struct StartFullBackupTaskFunc : BackupTaskFuncBase {
|
||||||
// Check if backup worker is enabled
|
// Check if backup worker is enabled
|
||||||
DatabaseConfiguration dbConfig = wait(getDatabaseConfiguration(cx));
|
DatabaseConfiguration dbConfig = wait(getDatabaseConfiguration(cx));
|
||||||
state bool backupWorkerEnabled = dbConfig.backupWorkerEnabled;
|
state bool backupWorkerEnabled = dbConfig.backupWorkerEnabled;
|
||||||
if (!backupWorkerEnabled) {
|
if (!backupWorkerEnabled && partitionedLog.get().present() && partitionedLog.get().get()) {
|
||||||
|
// Change configuration only when we set to use partitioned logs and
|
||||||
|
// the flag was not set before.
|
||||||
wait(success(changeConfig(cx, "backup_worker_enabled:=1", true)));
|
wait(success(changeConfig(cx, "backup_worker_enabled:=1", true)));
|
||||||
backupWorkerEnabled = true;
|
backupWorkerEnabled = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set the "backupStartedKey" and wait for all backup worker started
|
// Set the "backupStartedKey" and wait for all backup worker started
|
||||||
tr->reset();
|
tr->reset();
|
||||||
state BackupConfig config(task);
|
|
||||||
loop {
|
loop {
|
||||||
state Future<Void> watchFuture;
|
state Future<Void> watchFuture;
|
||||||
try {
|
try {
|
||||||
|
@ -2738,7 +2744,7 @@ struct StartFullBackupTaskFunc : BackupTaskFuncBase {
|
||||||
|
|
||||||
state Future<Optional<Value>> started = tr->get(backupStartedKey);
|
state Future<Optional<Value>> started = tr->get(backupStartedKey);
|
||||||
state Future<Optional<Value>> taskStarted = tr->get(config.allWorkerStarted().key);
|
state Future<Optional<Value>> taskStarted = tr->get(config.allWorkerStarted().key);
|
||||||
state Future<Optional<bool>> partitionedLog = config.partitionedLogEnabled().get(tr);
|
partitionedLog = config.partitionedLogEnabled().get(tr);
|
||||||
wait(success(started) && success(taskStarted) && success(partitionedLog));
|
wait(success(started) && success(taskStarted) && success(partitionedLog));
|
||||||
|
|
||||||
if (!partitionedLog.get().present() || !partitionedLog.get().get()) {
|
if (!partitionedLog.get().present() || !partitionedLog.get().get()) {
|
||||||
|
|
|
@ -34,16 +34,7 @@ const KeyRef fdbClientInfoTxnSizeLimit = LiteralStringRef("config/fdb_client_inf
|
||||||
const KeyRef transactionTagSampleRate = LiteralStringRef("config/transaction_tag_sample_rate");
|
const KeyRef transactionTagSampleRate = LiteralStringRef("config/transaction_tag_sample_rate");
|
||||||
const KeyRef transactionTagSampleCost = LiteralStringRef("config/transaction_tag_sample_cost");
|
const KeyRef transactionTagSampleCost = LiteralStringRef("config/transaction_tag_sample_cost");
|
||||||
|
|
||||||
GlobalConfig::GlobalConfig() : lastUpdate(0) {}
|
GlobalConfig::GlobalConfig(Database& cx) : cx(cx), lastUpdate(0) {}
|
||||||
|
|
||||||
void GlobalConfig::create(DatabaseContext* cx, Reference<AsyncVar<ClientDBInfo>> dbInfo) {
|
|
||||||
if (g_network->global(INetwork::enGlobalConfig) == nullptr) {
|
|
||||||
auto config = new GlobalConfig{};
|
|
||||||
config->cx = Database(cx);
|
|
||||||
g_network->setGlobal(INetwork::enGlobalConfig, config);
|
|
||||||
config->_updater = updater(config, dbInfo);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
GlobalConfig& GlobalConfig::globalConfig() {
|
GlobalConfig& GlobalConfig::globalConfig() {
|
||||||
void* res = g_network->global(INetwork::enGlobalConfig);
|
void* res = g_network->global(INetwork::enGlobalConfig);
|
||||||
|
@ -77,6 +68,14 @@ Future<Void> GlobalConfig::onInitialized() {
|
||||||
return initialized.getFuture();
|
return initialized.getFuture();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Future<Void> GlobalConfig::onChange() {
|
||||||
|
return configChanged.onTrigger();
|
||||||
|
}
|
||||||
|
|
||||||
|
void GlobalConfig::trigger(KeyRef key, std::function<void(std::optional<std::any>)> fn) {
|
||||||
|
callbacks.emplace(key, std::move(fn));
|
||||||
|
}
|
||||||
|
|
||||||
void GlobalConfig::insert(KeyRef key, ValueRef value) {
|
void GlobalConfig::insert(KeyRef key, ValueRef value) {
|
||||||
data.erase(key);
|
data.erase(key);
|
||||||
|
|
||||||
|
@ -89,6 +88,8 @@ void GlobalConfig::insert(KeyRef key, ValueRef value) {
|
||||||
any = StringRef(arena, t.getString(0).contents());
|
any = StringRef(arena, t.getString(0).contents());
|
||||||
} else if (t.getType(0) == Tuple::ElementType::INT) {
|
} else if (t.getType(0) == Tuple::ElementType::INT) {
|
||||||
any = t.getInt(0);
|
any = t.getInt(0);
|
||||||
|
} else if (t.getType(0) == Tuple::ElementType::BOOL) {
|
||||||
|
any = t.getBool(0);
|
||||||
} else if (t.getType(0) == Tuple::ElementType::FLOAT) {
|
} else if (t.getType(0) == Tuple::ElementType::FLOAT) {
|
||||||
any = t.getFloat(0);
|
any = t.getFloat(0);
|
||||||
} else if (t.getType(0) == Tuple::ElementType::DOUBLE) {
|
} else if (t.getType(0) == Tuple::ElementType::DOUBLE) {
|
||||||
|
@ -97,19 +98,26 @@ void GlobalConfig::insert(KeyRef key, ValueRef value) {
|
||||||
ASSERT(false);
|
ASSERT(false);
|
||||||
}
|
}
|
||||||
data[stableKey] = makeReference<ConfigValue>(std::move(arena), std::move(any));
|
data[stableKey] = makeReference<ConfigValue>(std::move(arena), std::move(any));
|
||||||
|
|
||||||
|
if (callbacks.find(stableKey) != callbacks.end()) {
|
||||||
|
callbacks[stableKey](data[stableKey]->value);
|
||||||
|
}
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
TraceEvent("GlobalConfigTupleParseError").detail("What", e.what());
|
TraceEvent(SevWarn, "GlobalConfigTupleParseError").detail("What", e.what());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GlobalConfig::erase(KeyRef key) {
|
void GlobalConfig::erase(Key key) {
|
||||||
data.erase(key);
|
erase(KeyRangeRef(key, keyAfter(key)));
|
||||||
}
|
}
|
||||||
|
|
||||||
void GlobalConfig::erase(KeyRangeRef range) {
|
void GlobalConfig::erase(KeyRangeRef range) {
|
||||||
auto it = data.begin();
|
auto it = data.begin();
|
||||||
while (it != data.end()) {
|
while (it != data.end()) {
|
||||||
if (range.contains(it->first)) {
|
if (range.contains(it->first)) {
|
||||||
|
if (callbacks.find(it->first) != callbacks.end()) {
|
||||||
|
callbacks[it->first](std::nullopt);
|
||||||
|
}
|
||||||
it = data.erase(it);
|
it = data.erase(it);
|
||||||
} else {
|
} else {
|
||||||
++it;
|
++it;
|
||||||
|
@ -134,36 +142,39 @@ ACTOR Future<Void> GlobalConfig::migrate(GlobalConfig* self) {
|
||||||
state Optional<Value> sampleRate = wait(tr->get(Key("\xff\x02/fdbClientInfo/client_txn_sample_rate/"_sr)));
|
state Optional<Value> sampleRate = wait(tr->get(Key("\xff\x02/fdbClientInfo/client_txn_sample_rate/"_sr)));
|
||||||
state Optional<Value> sizeLimit = wait(tr->get(Key("\xff\x02/fdbClientInfo/client_txn_size_limit/"_sr)));
|
state Optional<Value> sizeLimit = wait(tr->get(Key("\xff\x02/fdbClientInfo/client_txn_size_limit/"_sr)));
|
||||||
|
|
||||||
loop {
|
try {
|
||||||
try {
|
tr->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES);
|
||||||
tr->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES);
|
// The value doesn't matter too much, as long as the key is set.
|
||||||
// The value doesn't matter too much, as long as the key is set.
|
tr->set(migratedKey.contents(), "1"_sr);
|
||||||
tr->set(migratedKey.contents(), "1"_sr);
|
if (sampleRate.present()) {
|
||||||
if (sampleRate.present()) {
|
const double sampleRateDbl =
|
||||||
const double sampleRateDbl =
|
BinaryReader::fromStringRef<double>(sampleRate.get().contents(), Unversioned());
|
||||||
BinaryReader::fromStringRef<double>(sampleRate.get().contents(), Unversioned());
|
Tuple rate = Tuple().appendDouble(sampleRateDbl);
|
||||||
Tuple rate = Tuple().appendDouble(sampleRateDbl);
|
tr->set(GlobalConfig::prefixedKey(fdbClientInfoTxnSampleRate), rate.pack());
|
||||||
tr->set(GlobalConfig::prefixedKey(fdbClientInfoTxnSampleRate), rate.pack());
|
|
||||||
}
|
|
||||||
if (sizeLimit.present()) {
|
|
||||||
const int64_t sizeLimitInt =
|
|
||||||
BinaryReader::fromStringRef<int64_t>(sizeLimit.get().contents(), Unversioned());
|
|
||||||
Tuple size = Tuple().append(sizeLimitInt);
|
|
||||||
tr->set(GlobalConfig::prefixedKey(fdbClientInfoTxnSizeLimit), size.pack());
|
|
||||||
}
|
|
||||||
|
|
||||||
wait(tr->commit());
|
|
||||||
return Void();
|
|
||||||
} catch (Error& e) {
|
|
||||||
throw;
|
|
||||||
}
|
}
|
||||||
|
if (sizeLimit.present()) {
|
||||||
|
const int64_t sizeLimitInt =
|
||||||
|
BinaryReader::fromStringRef<int64_t>(sizeLimit.get().contents(), Unversioned());
|
||||||
|
Tuple size = Tuple().append(sizeLimitInt);
|
||||||
|
tr->set(GlobalConfig::prefixedKey(fdbClientInfoTxnSizeLimit), size.pack());
|
||||||
|
}
|
||||||
|
|
||||||
|
wait(tr->commit());
|
||||||
|
} catch (Error& e) {
|
||||||
|
// If multiple fdbserver processes are started at once, they will all
|
||||||
|
// attempt this migration at the same time, sometimes resulting in
|
||||||
|
// aborts due to conflicts. Purposefully avoid retrying, making this
|
||||||
|
// migration best-effort.
|
||||||
|
TraceEvent(SevInfo, "GlobalConfigMigrationError").detail("What", e.what());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Updates local copy of global configuration by reading the entire key-range
|
// Updates local copy of global configuration by reading the entire key-range
|
||||||
// from storage.
|
// from storage.
|
||||||
ACTOR Future<Void> GlobalConfig::refresh(GlobalConfig* self) {
|
ACTOR Future<Void> GlobalConfig::refresh(GlobalConfig* self) {
|
||||||
self->data.clear();
|
self->erase(KeyRangeRef(""_sr, "\xff"_sr));
|
||||||
|
|
||||||
Transaction tr(self->cx);
|
Transaction tr(self->cx);
|
||||||
RangeResult result = wait(tr.getRange(globalConfigDataKeys, CLIENT_KNOBS->TOO_MANY));
|
RangeResult result = wait(tr.getRange(globalConfigDataKeys, CLIENT_KNOBS->TOO_MANY));
|
||||||
|
@ -176,7 +187,8 @@ ACTOR Future<Void> GlobalConfig::refresh(GlobalConfig* self) {
|
||||||
|
|
||||||
// Applies updates to the local copy of the global configuration when this
|
// Applies updates to the local copy of the global configuration when this
|
||||||
// process receives an updated history.
|
// process receives an updated history.
|
||||||
ACTOR Future<Void> GlobalConfig::updater(GlobalConfig* self, Reference<AsyncVar<ClientDBInfo>> dbInfo) {
|
ACTOR Future<Void> GlobalConfig::updater(GlobalConfig* self, const ClientDBInfo* dbInfo) {
|
||||||
|
wait(self->cx->onConnected());
|
||||||
wait(self->migrate(self));
|
wait(self->migrate(self));
|
||||||
|
|
||||||
wait(self->refresh(self));
|
wait(self->refresh(self));
|
||||||
|
@ -184,9 +196,9 @@ ACTOR Future<Void> GlobalConfig::updater(GlobalConfig* self, Reference<AsyncVar<
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
try {
|
try {
|
||||||
wait(dbInfo->onChange());
|
wait(self->dbInfoChanged.onTrigger());
|
||||||
|
|
||||||
auto& history = dbInfo->get().history;
|
auto& history = dbInfo->history;
|
||||||
if (history.size() == 0) {
|
if (history.size() == 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -196,8 +208,8 @@ ACTOR Future<Void> GlobalConfig::updater(GlobalConfig* self, Reference<AsyncVar<
|
||||||
// history updates or the protocol version changed, so it
|
// history updates or the protocol version changed, so it
|
||||||
// must re-read the entire configuration range.
|
// must re-read the entire configuration range.
|
||||||
wait(self->refresh(self));
|
wait(self->refresh(self));
|
||||||
if (dbInfo->get().history.size() > 0) {
|
if (dbInfo->history.size() > 0) {
|
||||||
self->lastUpdate = dbInfo->get().history.back().version;
|
self->lastUpdate = dbInfo->history.back().version;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Apply history in order, from lowest version to highest
|
// Apply history in order, from lowest version to highest
|
||||||
|
@ -222,6 +234,8 @@ ACTOR Future<Void> GlobalConfig::updater(GlobalConfig* self, Reference<AsyncVar<
|
||||||
self->lastUpdate = vh.version;
|
self->lastUpdate = vh.version;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
self->configChanged.trigger();
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
|
|
|
@ -62,10 +62,28 @@ struct ConfigValue : ReferenceCounted<ConfigValue> {
|
||||||
|
|
||||||
class GlobalConfig : NonCopyable {
|
class GlobalConfig : NonCopyable {
|
||||||
public:
|
public:
|
||||||
// Creates a GlobalConfig singleton, accessed by calling GlobalConfig().
|
// Creates a GlobalConfig singleton, accessed by calling
|
||||||
// This function should only be called once by each process (however, it is
|
// GlobalConfig::globalConfig(). This function requires a database object
|
||||||
// idempotent and calling it multiple times will have no effect).
|
// to allow global configuration to run transactions on the database, and
|
||||||
static void create(DatabaseContext* cx, Reference<AsyncVar<ClientDBInfo>> dbInfo);
|
// an AsyncVar object to watch for changes on. The ClientDBInfo pointer
|
||||||
|
// should point to a ClientDBInfo object which will contain the updated
|
||||||
|
// global configuration history when the given AsyncVar changes. This
|
||||||
|
// function should be called whenever the database object changes, in order
|
||||||
|
// to allow global configuration to run transactions on the latest
|
||||||
|
// database.
|
||||||
|
template <class T>
|
||||||
|
static void create(Database& cx, Reference<AsyncVar<T>> db, const ClientDBInfo* dbInfo) {
|
||||||
|
if (g_network->global(INetwork::enGlobalConfig) == nullptr) {
|
||||||
|
auto config = new GlobalConfig{ cx };
|
||||||
|
g_network->setGlobal(INetwork::enGlobalConfig, config);
|
||||||
|
config->_updater = updater(config, dbInfo);
|
||||||
|
// Bind changes in `db` to the `dbInfoChanged` AsyncTrigger.
|
||||||
|
forward(db, std::addressof(config->dbInfoChanged));
|
||||||
|
} else {
|
||||||
|
GlobalConfig* config = reinterpret_cast<GlobalConfig*>(g_network->global(INetwork::enGlobalConfig));
|
||||||
|
config->cx = cx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Returns a reference to the global GlobalConfig object. Clients should
|
// Returns a reference to the global GlobalConfig object. Clients should
|
||||||
// call this function whenever they need to read a value out of the global
|
// call this function whenever they need to read a value out of the global
|
||||||
|
@ -114,8 +132,18 @@ public:
|
||||||
// been created and is ready.
|
// been created and is ready.
|
||||||
Future<Void> onInitialized();
|
Future<Void> onInitialized();
|
||||||
|
|
||||||
|
// Triggers the returned future when any key-value pair in the global
|
||||||
|
// configuration changes.
|
||||||
|
Future<Void> onChange();
|
||||||
|
|
||||||
|
// Calls \ref fn when the value associated with \ref key is changed. \ref
|
||||||
|
// fn is passed the updated value for the key, or an empty optional if the
|
||||||
|
// key has been cleared. If the value is an allocated object, its memory
|
||||||
|
// remains in the control of the global configuration.
|
||||||
|
void trigger(KeyRef key, std::function<void(std::optional<std::any>)> fn);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
GlobalConfig();
|
GlobalConfig(Database& cx);
|
||||||
|
|
||||||
// The functions below only affect the local copy of the global
|
// The functions below only affect the local copy of the global
|
||||||
// configuration keyspace! To insert or remove values across all nodes you
|
// configuration keyspace! To insert or remove values across all nodes you
|
||||||
|
@ -127,20 +155,23 @@ private:
|
||||||
void insert(KeyRef key, ValueRef value);
|
void insert(KeyRef key, ValueRef value);
|
||||||
// Removes the given key (and associated value) from the local copy of the
|
// Removes the given key (and associated value) from the local copy of the
|
||||||
// global configuration keyspace.
|
// global configuration keyspace.
|
||||||
void erase(KeyRef key);
|
void erase(Key key);
|
||||||
// Removes the given key range (and associated values) from the local copy
|
// Removes the given key range (and associated values) from the local copy
|
||||||
// of the global configuration keyspace.
|
// of the global configuration keyspace.
|
||||||
void erase(KeyRangeRef range);
|
void erase(KeyRangeRef range);
|
||||||
|
|
||||||
ACTOR static Future<Void> migrate(GlobalConfig* self);
|
ACTOR static Future<Void> migrate(GlobalConfig* self);
|
||||||
ACTOR static Future<Void> refresh(GlobalConfig* self);
|
ACTOR static Future<Void> refresh(GlobalConfig* self);
|
||||||
ACTOR static Future<Void> updater(GlobalConfig* self, Reference<AsyncVar<ClientDBInfo>> dbInfo);
|
ACTOR static Future<Void> updater(GlobalConfig* self, const ClientDBInfo* dbInfo);
|
||||||
|
|
||||||
Database cx;
|
Database cx;
|
||||||
|
AsyncTrigger dbInfoChanged;
|
||||||
Future<Void> _updater;
|
Future<Void> _updater;
|
||||||
Promise<Void> initialized;
|
Promise<Void> initialized;
|
||||||
|
AsyncTrigger configChanged;
|
||||||
std::unordered_map<StringRef, Reference<ConfigValue>> data;
|
std::unordered_map<StringRef, Reference<ConfigValue>> data;
|
||||||
Version lastUpdate;
|
Version lastUpdate;
|
||||||
|
std::unordered_map<KeyRef, std::function<void(std::optional<std::any>)>> callbacks;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -173,6 +173,7 @@ void ClientKnobs::initialize(bool randomize) {
|
||||||
init( BACKUP_STATUS_DELAY, 40.0 );
|
init( BACKUP_STATUS_DELAY, 40.0 );
|
||||||
init( BACKUP_STATUS_JITTER, 0.05 );
|
init( BACKUP_STATUS_JITTER, 0.05 );
|
||||||
init( MIN_CLEANUP_SECONDS, 3600.0 );
|
init( MIN_CLEANUP_SECONDS, 3600.0 );
|
||||||
|
init( FASTRESTORE_ATOMICOP_WEIGHT, 1 ); if( randomize && BUGGIFY ) { FASTRESTORE_ATOMICOP_WEIGHT = deterministicRandom()->random01() * 200 + 1; }
|
||||||
|
|
||||||
// Configuration
|
// Configuration
|
||||||
init( DEFAULT_AUTO_COMMIT_PROXIES, 3 );
|
init( DEFAULT_AUTO_COMMIT_PROXIES, 3 );
|
||||||
|
|
|
@ -168,6 +168,7 @@ public:
|
||||||
double BACKUP_STATUS_DELAY;
|
double BACKUP_STATUS_DELAY;
|
||||||
double BACKUP_STATUS_JITTER;
|
double BACKUP_STATUS_JITTER;
|
||||||
double MIN_CLEANUP_SECONDS;
|
double MIN_CLEANUP_SECONDS;
|
||||||
|
int64_t FASTRESTORE_ATOMICOP_WEIGHT; // workload amplication factor for atomic op
|
||||||
|
|
||||||
// Configuration
|
// Configuration
|
||||||
int32_t DEFAULT_AUTO_COMMIT_PROXIES;
|
int32_t DEFAULT_AUTO_COMMIT_PROXIES;
|
||||||
|
|
|
@ -60,6 +60,13 @@ std::map<std::string, std::string> configForToken(std::string const& mode) {
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (mode == "tss") {
|
||||||
|
// Set temporary marker in config map to mark that this is a tss configuration and not a normal storage/log
|
||||||
|
// configuration. A bit of a hack but reuses the parsing code nicely.
|
||||||
|
out[p + "istss"] = "1";
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
if (mode == "locked") {
|
if (mode == "locked") {
|
||||||
// Setting this key is interpreted as an instruction to use the normal version-stamp-based mechanism for locking
|
// Setting this key is interpreted as an instruction to use the normal version-stamp-based mechanism for locking
|
||||||
// the database.
|
// the database.
|
||||||
|
@ -119,7 +126,7 @@ std::map<std::string, std::string> configForToken(std::string const& mode) {
|
||||||
|
|
||||||
if ((key == "logs" || key == "commit_proxies" || key == "grv_proxies" || key == "resolvers" ||
|
if ((key == "logs" || key == "commit_proxies" || key == "grv_proxies" || key == "resolvers" ||
|
||||||
key == "remote_logs" || key == "log_routers" || key == "usable_regions" ||
|
key == "remote_logs" || key == "log_routers" || key == "usable_regions" ||
|
||||||
key == "repopulate_anti_quorum") &&
|
key == "repopulate_anti_quorum" || key == "count") &&
|
||||||
isInteger(value)) {
|
isInteger(value)) {
|
||||||
out[p + key] = value;
|
out[p + key] = value;
|
||||||
}
|
}
|
||||||
|
@ -134,6 +141,14 @@ std::map<std::string, std::string> configForToken(std::string const& mode) {
|
||||||
BinaryWriter::toValue(regionObj, IncludeVersion(ProtocolVersion::withRegionConfiguration())).toString();
|
BinaryWriter::toValue(regionObj, IncludeVersion(ProtocolVersion::withRegionConfiguration())).toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (key == "perpetual_storage_wiggle" && isInteger(value)) {
|
||||||
|
int ppWiggle = atoi(value.c_str());
|
||||||
|
if (ppWiggle >= 2 || ppWiggle < 0) {
|
||||||
|
printf("Error: Only 0 and 1 are valid values of perpetual_storage_wiggle at present.\n");
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
out[p + key] = value;
|
||||||
|
}
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -326,6 +341,35 @@ ConfigurationResult buildConfiguration(std::vector<StringRef> const& modeTokens,
|
||||||
serializeReplicationPolicy(policyWriter, logPolicy);
|
serializeReplicationPolicy(policyWriter, logPolicy);
|
||||||
outConf[p + "log_replication_policy"] = policyWriter.toValue().toString();
|
outConf[p + "log_replication_policy"] = policyWriter.toValue().toString();
|
||||||
}
|
}
|
||||||
|
if (outConf.count(p + "istss")) {
|
||||||
|
// redo config parameters to be tss config instead of normal config
|
||||||
|
|
||||||
|
// save param values from parsing as a normal config
|
||||||
|
bool isNew = outConf.count(p + "initialized");
|
||||||
|
Optional<std::string> count;
|
||||||
|
Optional<std::string> storageEngine;
|
||||||
|
if (outConf.count(p + "count")) {
|
||||||
|
count = Optional<std::string>(outConf[p + "count"]);
|
||||||
|
}
|
||||||
|
if (outConf.count(p + "storage_engine")) {
|
||||||
|
storageEngine = Optional<std::string>(outConf[p + "storage_engine"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// A new tss setup must have count + storage engine. An adjustment must have at least one.
|
||||||
|
if ((isNew && (!count.present() || !storageEngine.present())) ||
|
||||||
|
(!isNew && !count.present() && !storageEngine.present())) {
|
||||||
|
return ConfigurationResult::INCOMPLETE_CONFIGURATION;
|
||||||
|
}
|
||||||
|
|
||||||
|
// clear map and only reset tss parameters
|
||||||
|
outConf.clear();
|
||||||
|
if (count.present()) {
|
||||||
|
outConf[p + "tss_count"] = count.get();
|
||||||
|
}
|
||||||
|
if (storageEngine.present()) {
|
||||||
|
outConf[p + "tss_storage_engine"] = storageEngine.get();
|
||||||
|
}
|
||||||
|
}
|
||||||
return ConfigurationResult::SUCCESS;
|
return ConfigurationResult::SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -741,7 +785,7 @@ ConfigureAutoResult parseConfig(StatusObject const& status) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (processClass.classType() != ProcessClass::TesterClass) {
|
if (processClass.classType() != ProcessClass::TesterClass) {
|
||||||
machine_processes[machineId].push_back(std::make_pair(addr, processClass));
|
machine_processes[machineId].emplace_back(addr, processClass);
|
||||||
processCount++;
|
processCount++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1105,6 +1149,7 @@ ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
|
||||||
|
|
||||||
vector<Future<Optional<LeaderInfo>>> leaderServers;
|
vector<Future<Optional<LeaderInfo>>> leaderServers;
|
||||||
ClientCoordinators coord(Reference<ClusterConnectionFile>(new ClusterConnectionFile(conn)));
|
ClientCoordinators coord(Reference<ClusterConnectionFile>(new ClusterConnectionFile(conn)));
|
||||||
|
|
||||||
leaderServers.reserve(coord.clientLeaderServers.size());
|
leaderServers.reserve(coord.clientLeaderServers.size());
|
||||||
for (int i = 0; i < coord.clientLeaderServers.size(); i++)
|
for (int i = 0; i < coord.clientLeaderServers.size(); i++)
|
||||||
leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader,
|
leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader,
|
||||||
|
@ -1188,14 +1233,20 @@ ACTOR Future<CoordinatorsResult> changeQuorum(Database cx, Reference<IQuorumChan
|
||||||
TEST(old.clusterKeyName() != conn.clusterKeyName()); // Quorum change with new name
|
TEST(old.clusterKeyName() != conn.clusterKeyName()); // Quorum change with new name
|
||||||
TEST(old.clusterKeyName() == conn.clusterKeyName()); // Quorum change with unchanged name
|
TEST(old.clusterKeyName() == conn.clusterKeyName()); // Quorum change with unchanged name
|
||||||
|
|
||||||
vector<Future<Optional<LeaderInfo>>> leaderServers;
|
state vector<Future<Optional<LeaderInfo>>> leaderServers;
|
||||||
ClientCoordinators coord(Reference<ClusterConnectionFile>(new ClusterConnectionFile(conn)));
|
state ClientCoordinators coord(Reference<ClusterConnectionFile>(new ClusterConnectionFile(conn)));
|
||||||
|
// check if allowed to modify the cluster descriptor
|
||||||
|
if (!change->getDesiredClusterKeyName().empty()) {
|
||||||
|
CheckDescriptorMutableReply mutabilityReply =
|
||||||
|
wait(coord.clientLeaderServers[0].checkDescriptorMutable.getReply(CheckDescriptorMutableRequest()));
|
||||||
|
if (!mutabilityReply.isMutable)
|
||||||
|
return CoordinatorsResult::BAD_DATABASE_STATE;
|
||||||
|
}
|
||||||
leaderServers.reserve(coord.clientLeaderServers.size());
|
leaderServers.reserve(coord.clientLeaderServers.size());
|
||||||
for (int i = 0; i < coord.clientLeaderServers.size(); i++)
|
for (int i = 0; i < coord.clientLeaderServers.size(); i++)
|
||||||
leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader,
|
leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader,
|
||||||
GetLeaderRequest(coord.clusterKey, UID()),
|
GetLeaderRequest(coord.clusterKey, UID()),
|
||||||
TaskPriority::CoordinationReply));
|
TaskPriority::CoordinationReply));
|
||||||
|
|
||||||
choose {
|
choose {
|
||||||
when(wait(waitForAll(leaderServers))) {}
|
when(wait(waitForAll(leaderServers))) {}
|
||||||
when(wait(delay(5.0))) { return CoordinatorsResult::COORDINATOR_UNREACHABLE; }
|
when(wait(delay(5.0))) { return CoordinatorsResult::COORDINATOR_UNREACHABLE; }
|
||||||
|
@ -1264,7 +1315,7 @@ struct AutoQuorumChange final : IQuorumChange {
|
||||||
vector<NetworkAddress> oldCoordinators,
|
vector<NetworkAddress> oldCoordinators,
|
||||||
Reference<ClusterConnectionFile> ccf,
|
Reference<ClusterConnectionFile> ccf,
|
||||||
CoordinatorsResult& err) override {
|
CoordinatorsResult& err) override {
|
||||||
return getDesired(this, tr, oldCoordinators, ccf, &err);
|
return getDesired(Reference<AutoQuorumChange>::addRef(this), tr, oldCoordinators, ccf, &err);
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR static Future<int> getRedundancy(AutoQuorumChange* self, Transaction* tr) {
|
ACTOR static Future<int> getRedundancy(AutoQuorumChange* self, Transaction* tr) {
|
||||||
|
@ -1327,7 +1378,7 @@ struct AutoQuorumChange final : IQuorumChange {
|
||||||
return true; // The status quo seems fine
|
return true; // The status quo seems fine
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR static Future<vector<NetworkAddress>> getDesired(AutoQuorumChange* self,
|
ACTOR static Future<vector<NetworkAddress>> getDesired(Reference<AutoQuorumChange> self,
|
||||||
Transaction* tr,
|
Transaction* tr,
|
||||||
vector<NetworkAddress> oldCoordinators,
|
vector<NetworkAddress> oldCoordinators,
|
||||||
Reference<ClusterConnectionFile> ccf,
|
Reference<ClusterConnectionFile> ccf,
|
||||||
|
@ -1335,7 +1386,7 @@ struct AutoQuorumChange final : IQuorumChange {
|
||||||
state int desiredCount = self->desired;
|
state int desiredCount = self->desired;
|
||||||
|
|
||||||
if (desiredCount == -1) {
|
if (desiredCount == -1) {
|
||||||
int redundancy = wait(getRedundancy(self, tr));
|
int redundancy = wait(getRedundancy(self.getPtr(), tr));
|
||||||
desiredCount = redundancy * 2 - 1;
|
desiredCount = redundancy * 2 - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1364,7 +1415,7 @@ struct AutoQuorumChange final : IQuorumChange {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (checkAcceptable) {
|
if (checkAcceptable) {
|
||||||
bool ok = wait(isAcceptable(self, tr, oldCoordinators, ccf, desiredCount, &excluded));
|
bool ok = wait(isAcceptable(self.getPtr(), tr, oldCoordinators, ccf, desiredCount, &excluded));
|
||||||
if (ok)
|
if (ok)
|
||||||
return oldCoordinators;
|
return oldCoordinators;
|
||||||
}
|
}
|
||||||
|
|
|
@ -380,11 +380,14 @@ ClientCoordinators::ClientCoordinators(Key clusterKey, std::vector<NetworkAddres
|
||||||
|
|
||||||
ClientLeaderRegInterface::ClientLeaderRegInterface(NetworkAddress remote)
|
ClientLeaderRegInterface::ClientLeaderRegInterface(NetworkAddress remote)
|
||||||
: getLeader(Endpoint({ remote }, WLTOKEN_CLIENTLEADERREG_GETLEADER)),
|
: getLeader(Endpoint({ remote }, WLTOKEN_CLIENTLEADERREG_GETLEADER)),
|
||||||
openDatabase(Endpoint({ remote }, WLTOKEN_CLIENTLEADERREG_OPENDATABASE)) {}
|
openDatabase(Endpoint({ remote }, WLTOKEN_CLIENTLEADERREG_OPENDATABASE)),
|
||||||
|
checkDescriptorMutable(Endpoint({ remote }, WLTOKEN_CLIENTLEADERREG_DESCRIPTOR_MUTABLE)) {}
|
||||||
|
|
||||||
ClientLeaderRegInterface::ClientLeaderRegInterface(INetwork* local) {
|
ClientLeaderRegInterface::ClientLeaderRegInterface(INetwork* local) {
|
||||||
getLeader.makeWellKnownEndpoint(WLTOKEN_CLIENTLEADERREG_GETLEADER, TaskPriority::Coordination);
|
getLeader.makeWellKnownEndpoint(WLTOKEN_CLIENTLEADERREG_GETLEADER, TaskPriority::Coordination);
|
||||||
openDatabase.makeWellKnownEndpoint(WLTOKEN_CLIENTLEADERREG_OPENDATABASE, TaskPriority::Coordination);
|
openDatabase.makeWellKnownEndpoint(WLTOKEN_CLIENTLEADERREG_OPENDATABASE, TaskPriority::Coordination);
|
||||||
|
checkDescriptorMutable.makeWellKnownEndpoint(WLTOKEN_CLIENTLEADERREG_DESCRIPTOR_MUTABLE,
|
||||||
|
TaskPriority::Coordination);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Nominee is the worker among all workers that are considered as leader by a coordinator
|
// Nominee is the worker among all workers that are considered as leader by a coordinator
|
||||||
|
@ -431,9 +434,9 @@ Optional<std::pair<LeaderInfo, bool>> getLeader(const vector<Optional<LeaderInfo
|
||||||
maskedNominees.reserve(nominees.size());
|
maskedNominees.reserve(nominees.size());
|
||||||
for (int i = 0; i < nominees.size(); i++) {
|
for (int i = 0; i < nominees.size(); i++) {
|
||||||
if (nominees[i].present()) {
|
if (nominees[i].present()) {
|
||||||
maskedNominees.push_back(std::make_pair(
|
maskedNominees.emplace_back(
|
||||||
UID(nominees[i].get().changeID.first() & LeaderInfo::changeIDMask, nominees[i].get().changeID.second()),
|
UID(nominees[i].get().changeID.first() & LeaderInfo::changeIDMask, nominees[i].get().changeID.second()),
|
||||||
i));
|
i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -496,7 +499,8 @@ ACTOR Future<MonitorLeaderInfo> monitorLeaderOneGeneration(Reference<ClusterConn
|
||||||
if (leader.get().first.forward) {
|
if (leader.get().first.forward) {
|
||||||
TraceEvent("MonitorLeaderForwarding")
|
TraceEvent("MonitorLeaderForwarding")
|
||||||
.detail("NewConnStr", leader.get().first.serializedInfo.toString())
|
.detail("NewConnStr", leader.get().first.serializedInfo.toString())
|
||||||
.detail("OldConnStr", info.intermediateConnFile->getConnectionString().toString()).trackLatest("MonitorLeaderForwarding");
|
.detail("OldConnStr", info.intermediateConnFile->getConnectionString().toString())
|
||||||
|
.trackLatest("MonitorLeaderForwarding");
|
||||||
info.intermediateConnFile = makeReference<ClusterConnectionFile>(
|
info.intermediateConnFile = makeReference<ClusterConnectionFile>(
|
||||||
connFile->getFilename(), ClusterConnectionString(leader.get().first.serializedInfo.toString()));
|
connFile->getFilename(), ClusterConnectionString(leader.get().first.serializedInfo.toString()));
|
||||||
return info;
|
return info;
|
||||||
|
@ -582,7 +586,7 @@ OpenDatabaseRequest ClientData::getRequest() {
|
||||||
auto& entry = issueMap[it];
|
auto& entry = issueMap[it];
|
||||||
entry.count++;
|
entry.count++;
|
||||||
if (entry.examples.size() < CLIENT_KNOBS->CLIENT_EXAMPLE_AMOUNT) {
|
if (entry.examples.size() < CLIENT_KNOBS->CLIENT_EXAMPLE_AMOUNT) {
|
||||||
entry.examples.push_back(std::make_pair(ci.first, ci.second.traceLogGroup));
|
entry.examples.emplace_back(ci.first, ci.second.traceLogGroup);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (ci.second.versions.size()) {
|
if (ci.second.versions.size()) {
|
||||||
|
@ -593,19 +597,19 @@ OpenDatabaseRequest ClientData::getRequest() {
|
||||||
auto& entry = versionMap[it];
|
auto& entry = versionMap[it];
|
||||||
entry.count++;
|
entry.count++;
|
||||||
if (entry.examples.size() < CLIENT_KNOBS->CLIENT_EXAMPLE_AMOUNT) {
|
if (entry.examples.size() < CLIENT_KNOBS->CLIENT_EXAMPLE_AMOUNT) {
|
||||||
entry.examples.push_back(std::make_pair(ci.first, ci.second.traceLogGroup));
|
entry.examples.emplace_back(ci.first, ci.second.traceLogGroup);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
auto& maxEntry = maxProtocolMap[maxProtocol];
|
auto& maxEntry = maxProtocolMap[maxProtocol];
|
||||||
maxEntry.count++;
|
maxEntry.count++;
|
||||||
if (maxEntry.examples.size() < CLIENT_KNOBS->CLIENT_EXAMPLE_AMOUNT) {
|
if (maxEntry.examples.size() < CLIENT_KNOBS->CLIENT_EXAMPLE_AMOUNT) {
|
||||||
maxEntry.examples.push_back(std::make_pair(ci.first, ci.second.traceLogGroup));
|
maxEntry.examples.emplace_back(ci.first, ci.second.traceLogGroup);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
auto& entry = versionMap[ClientVersionRef()];
|
auto& entry = versionMap[ClientVersionRef()];
|
||||||
entry.count++;
|
entry.count++;
|
||||||
if (entry.examples.size() < CLIENT_KNOBS->CLIENT_EXAMPLE_AMOUNT) {
|
if (entry.examples.size() < CLIENT_KNOBS->CLIENT_EXAMPLE_AMOUNT) {
|
||||||
entry.examples.push_back(std::make_pair(ci.first, ci.second.traceLogGroup));
|
entry.examples.emplace_back(ci.first, ci.second.traceLogGroup);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -595,7 +595,7 @@ Reference<IDatabase> DLApi::createDatabase(const char* clusterFilePath) {
|
||||||
|
|
||||||
void DLApi::addNetworkThreadCompletionHook(void (*hook)(void*), void* hookParameter) {
|
void DLApi::addNetworkThreadCompletionHook(void (*hook)(void*), void* hookParameter) {
|
||||||
MutexHolder holder(lock);
|
MutexHolder holder(lock);
|
||||||
threadCompletionHooks.push_back(std::make_pair(hook, hookParameter));
|
threadCompletionHooks.emplace_back(hook, hookParameter);
|
||||||
}
|
}
|
||||||
|
|
||||||
// MultiVersionTransaction
|
// MultiVersionTransaction
|
||||||
|
@ -947,7 +947,7 @@ void MultiVersionDatabase::setOption(FDBDatabaseOptions::Option option, Optional
|
||||||
value.castTo<Standalone<StringRef>>());
|
value.castTo<Standalone<StringRef>>());
|
||||||
}
|
}
|
||||||
|
|
||||||
dbState->options.push_back(std::make_pair(option, value.castTo<Standalone<StringRef>>()));
|
dbState->options.emplace_back(option, value.castTo<Standalone<StringRef>>());
|
||||||
|
|
||||||
if (dbState->db) {
|
if (dbState->db) {
|
||||||
dbState->db->setOption(option, value);
|
dbState->db->setOption(option, value);
|
||||||
|
@ -1559,7 +1559,7 @@ void MultiVersionApi::setNetworkOptionInternal(FDBNetworkOptions::Option option,
|
||||||
runOnExternalClientsAllThreads(
|
runOnExternalClientsAllThreads(
|
||||||
[option, value](Reference<ClientInfo> client) { client->api->setNetworkOption(option, value); });
|
[option, value](Reference<ClientInfo> client) { client->api->setNetworkOption(option, value); });
|
||||||
} else {
|
} else {
|
||||||
options.push_back(std::make_pair(option, value.castTo<Standalone<StringRef>>()));
|
options.emplace_back(option, value.castTo<Standalone<StringRef>>());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,6 +38,7 @@
|
||||||
#include "fdbclient/DatabaseContext.h"
|
#include "fdbclient/DatabaseContext.h"
|
||||||
#include "fdbclient/GlobalConfig.actor.h"
|
#include "fdbclient/GlobalConfig.actor.h"
|
||||||
#include "fdbclient/JsonBuilder.h"
|
#include "fdbclient/JsonBuilder.h"
|
||||||
|
#include "fdbclient/KeyBackedTypes.h"
|
||||||
#include "fdbclient/KeyRangeMap.h"
|
#include "fdbclient/KeyRangeMap.h"
|
||||||
#include "fdbclient/Knobs.h"
|
#include "fdbclient/Knobs.h"
|
||||||
#include "fdbclient/ManagementAPI.actor.h"
|
#include "fdbclient/ManagementAPI.actor.h"
|
||||||
|
@ -121,6 +122,52 @@ NetworkOptions::NetworkOptions()
|
||||||
static const Key CLIENT_LATENCY_INFO_PREFIX = LiteralStringRef("client_latency/");
|
static const Key CLIENT_LATENCY_INFO_PREFIX = LiteralStringRef("client_latency/");
|
||||||
static const Key CLIENT_LATENCY_INFO_CTR_PREFIX = LiteralStringRef("client_latency_counter/");
|
static const Key CLIENT_LATENCY_INFO_CTR_PREFIX = LiteralStringRef("client_latency_counter/");
|
||||||
|
|
||||||
|
void DatabaseContext::addTssMapping(StorageServerInterface const& ssi, StorageServerInterface const& tssi) {
|
||||||
|
auto result = tssMapping.find(ssi.id());
|
||||||
|
// Update tss endpoint mapping if ss isn't in mapping, or the interface it mapped to changed
|
||||||
|
if (result == tssMapping.end() ||
|
||||||
|
result->second.getValue.getEndpoint().token.first() != tssi.getValue.getEndpoint().token.first()) {
|
||||||
|
Reference<TSSMetrics> metrics;
|
||||||
|
if (result == tssMapping.end()) {
|
||||||
|
// new TSS pairing
|
||||||
|
metrics = makeReference<TSSMetrics>();
|
||||||
|
tssMetrics[tssi.id()] = metrics;
|
||||||
|
tssMapping[ssi.id()] = tssi;
|
||||||
|
} else {
|
||||||
|
if (result->second.id() == tssi.id()) {
|
||||||
|
metrics = tssMetrics[tssi.id()];
|
||||||
|
} else {
|
||||||
|
TEST(true); // SS now maps to new TSS! This will probably never happen in practice
|
||||||
|
tssMetrics.erase(result->second.id());
|
||||||
|
metrics = makeReference<TSSMetrics>();
|
||||||
|
tssMetrics[tssi.id()] = metrics;
|
||||||
|
}
|
||||||
|
result->second = tssi;
|
||||||
|
}
|
||||||
|
|
||||||
|
queueModel.updateTssEndpoint(ssi.getValue.getEndpoint().token.first(),
|
||||||
|
TSSEndpointData(tssi.id(), tssi.getValue.getEndpoint(), metrics));
|
||||||
|
queueModel.updateTssEndpoint(ssi.getKey.getEndpoint().token.first(),
|
||||||
|
TSSEndpointData(tssi.id(), tssi.getKey.getEndpoint(), metrics));
|
||||||
|
queueModel.updateTssEndpoint(ssi.getKeyValues.getEndpoint().token.first(),
|
||||||
|
TSSEndpointData(tssi.id(), tssi.getKeyValues.getEndpoint(), metrics));
|
||||||
|
queueModel.updateTssEndpoint(ssi.watchValue.getEndpoint().token.first(),
|
||||||
|
TSSEndpointData(tssi.id(), tssi.watchValue.getEndpoint(), metrics));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DatabaseContext::removeTssMapping(StorageServerInterface const& ssi) {
|
||||||
|
auto result = tssMapping.find(ssi.id());
|
||||||
|
if (result != tssMapping.end()) {
|
||||||
|
tssMetrics.erase(ssi.id());
|
||||||
|
tssMapping.erase(result);
|
||||||
|
queueModel.removeTssEndpoint(ssi.getValue.getEndpoint().token.first());
|
||||||
|
queueModel.removeTssEndpoint(ssi.getKey.getEndpoint().token.first());
|
||||||
|
queueModel.removeTssEndpoint(ssi.getKeyValues.getEndpoint().token.first());
|
||||||
|
queueModel.removeTssEndpoint(ssi.watchValue.getEndpoint().token.first());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Reference<StorageServerInfo> StorageServerInfo::getInterface(DatabaseContext* cx,
|
Reference<StorageServerInfo> StorageServerInfo::getInterface(DatabaseContext* cx,
|
||||||
StorageServerInterface const& ssi,
|
StorageServerInterface const& ssi,
|
||||||
LocalityData const& locality) {
|
LocalityData const& locality) {
|
||||||
|
@ -133,6 +180,7 @@ Reference<StorageServerInfo> StorageServerInfo::getInterface(DatabaseContext* cx
|
||||||
// pointing to. This is technically correct, but is very unnatural. We may want to refactor load
|
// pointing to. This is technically correct, but is very unnatural. We may want to refactor load
|
||||||
// balance to take an AsyncVar<Reference<Interface>> so that it is notified when the interface
|
// balance to take an AsyncVar<Reference<Interface>> so that it is notified when the interface
|
||||||
// changes.
|
// changes.
|
||||||
|
|
||||||
it->second->interf = ssi;
|
it->second->interf = ssi;
|
||||||
} else {
|
} else {
|
||||||
it->second->notifyContextDestroyed();
|
it->second->notifyContextDestroyed();
|
||||||
|
@ -285,6 +333,13 @@ void delref(DatabaseContext* ptr) {
|
||||||
ptr->delref();
|
ptr->delref();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void traceTSSErrors(const char* name, UID tssId, const std::unordered_map<int, uint64_t>& errorsByCode) {
|
||||||
|
TraceEvent ev(name, tssId);
|
||||||
|
for (auto& it : errorsByCode) {
|
||||||
|
ev.detail("E" + std::to_string(it.first), it.second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> databaseLogger(DatabaseContext* cx) {
|
ACTOR Future<Void> databaseLogger(DatabaseContext* cx) {
|
||||||
state double lastLogged = 0;
|
state double lastLogged = 0;
|
||||||
loop {
|
loop {
|
||||||
|
@ -327,6 +382,62 @@ ACTOR Future<Void> databaseLogger(DatabaseContext* cx) {
|
||||||
cx->mutationsPerCommit.clear();
|
cx->mutationsPerCommit.clear();
|
||||||
cx->bytesPerCommit.clear();
|
cx->bytesPerCommit.clear();
|
||||||
|
|
||||||
|
for (const auto& it : cx->tssMetrics) {
|
||||||
|
// TODO could skip this tss if request counter is zero? would potentially complicate elapsed calculation
|
||||||
|
// though
|
||||||
|
if (it.second->mismatches.getIntervalDelta()) {
|
||||||
|
cx->tssMismatchStream.send(it.first);
|
||||||
|
}
|
||||||
|
|
||||||
|
// do error histograms as separate event
|
||||||
|
if (it.second->ssErrorsByCode.size()) {
|
||||||
|
traceTSSErrors("TSS_SSErrors", it.first, it.second->ssErrorsByCode);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (it.second->tssErrorsByCode.size()) {
|
||||||
|
traceTSSErrors("TSS_TSSErrors", it.first, it.second->tssErrorsByCode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TraceEvent tssEv("TSSClientMetrics", cx->dbId);
|
||||||
|
tssEv.detail("TSSID", it.first)
|
||||||
|
.detail("Elapsed", (lastLogged == 0) ? 0 : now() - lastLogged)
|
||||||
|
.detail("Internal", cx->internal);
|
||||||
|
|
||||||
|
it.second->cc.logToTraceEvent(tssEv);
|
||||||
|
|
||||||
|
tssEv.detail("MeanSSGetValueLatency", it.second->SSgetValueLatency.mean())
|
||||||
|
.detail("MedianSSGetValueLatency", it.second->SSgetValueLatency.median())
|
||||||
|
.detail("SSGetValueLatency90", it.second->SSgetValueLatency.percentile(0.90))
|
||||||
|
.detail("SSGetValueLatency99", it.second->SSgetValueLatency.percentile(0.99));
|
||||||
|
|
||||||
|
tssEv.detail("MeanTSSGetValueLatency", it.second->TSSgetValueLatency.mean())
|
||||||
|
.detail("MedianTSSGetValueLatency", it.second->TSSgetValueLatency.median())
|
||||||
|
.detail("TSSGetValueLatency90", it.second->TSSgetValueLatency.percentile(0.90))
|
||||||
|
.detail("TSSGetValueLatency99", it.second->TSSgetValueLatency.percentile(0.99));
|
||||||
|
|
||||||
|
tssEv.detail("MeanSSGetKeyLatency", it.second->SSgetKeyLatency.mean())
|
||||||
|
.detail("MedianSSGetKeyLatency", it.second->SSgetKeyLatency.median())
|
||||||
|
.detail("SSGetKeyLatency90", it.second->SSgetKeyLatency.percentile(0.90))
|
||||||
|
.detail("SSGetKeyLatency99", it.second->SSgetKeyLatency.percentile(0.99));
|
||||||
|
|
||||||
|
tssEv.detail("MeanTSSGetKeyLatency", it.second->TSSgetKeyLatency.mean())
|
||||||
|
.detail("MedianTSSGetKeyLatency", it.second->TSSgetKeyLatency.median())
|
||||||
|
.detail("TSSGetKeyLatency90", it.second->TSSgetKeyLatency.percentile(0.90))
|
||||||
|
.detail("TSSGetKeyLatency99", it.second->TSSgetKeyLatency.percentile(0.99));
|
||||||
|
|
||||||
|
tssEv.detail("MeanSSGetKeyValuesLatency", it.second->SSgetKeyLatency.mean())
|
||||||
|
.detail("MedianSSGetKeyValuesLatency", it.second->SSgetKeyLatency.median())
|
||||||
|
.detail("SSGetKeyValuesLatency90", it.second->SSgetKeyLatency.percentile(0.90))
|
||||||
|
.detail("SSGetKeyValuesLatency99", it.second->SSgetKeyLatency.percentile(0.99));
|
||||||
|
|
||||||
|
tssEv.detail("MeanTSSGetKeyValuesLatency", it.second->TSSgetKeyValuesLatency.mean())
|
||||||
|
.detail("MedianTSSGetKeyValuesLatency", it.second->TSSgetKeyValuesLatency.median())
|
||||||
|
.detail("TSSGetKeyValuesLatency90", it.second->TSSgetKeyValuesLatency.percentile(0.90))
|
||||||
|
.detail("TSSGetKeyValuesLatency99", it.second->TSSgetKeyValuesLatency.percentile(0.99));
|
||||||
|
|
||||||
|
it.second->clear();
|
||||||
|
}
|
||||||
|
|
||||||
lastLogged = now();
|
lastLogged = now();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -711,6 +822,59 @@ ACTOR Future<Void> monitorCacheList(DatabaseContext* self) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ACTOR static Future<Void> handleTssMismatches(DatabaseContext* cx) {
|
||||||
|
state Reference<ReadYourWritesTransaction> tr;
|
||||||
|
state KeyBackedMap<UID, UID> tssMapDB = KeyBackedMap<UID, UID>(tssMappingKeys.begin);
|
||||||
|
loop {
|
||||||
|
state UID tssID = waitNext(cx->tssMismatchStream.getFuture());
|
||||||
|
// find ss pair id so we can remove it from the mapping
|
||||||
|
state UID tssPairID;
|
||||||
|
bool found = false;
|
||||||
|
for (const auto& it : cx->tssMapping) {
|
||||||
|
if (it.second.id() == tssID) {
|
||||||
|
tssPairID = it.first;
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (found) {
|
||||||
|
TraceEvent(SevWarnAlways, "TSS_KillMismatch").detail("TSSID", tssID.toString());
|
||||||
|
TEST(true); // killing TSS because it got mismatch
|
||||||
|
|
||||||
|
// TODO we could write something to the system keyspace and then have DD listen to that keyspace and then DD
|
||||||
|
// do exactly this, so why not just cut out the middle man (or the middle system keys, as it were)
|
||||||
|
tr = makeReference<ReadYourWritesTransaction>(Database(Reference<DatabaseContext>::addRef(cx)));
|
||||||
|
state int tries = 0;
|
||||||
|
loop {
|
||||||
|
try {
|
||||||
|
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||||
|
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||||
|
|
||||||
|
tr->clear(serverTagKeyFor(tssID));
|
||||||
|
tssMapDB.erase(tr, tssPairID);
|
||||||
|
|
||||||
|
wait(tr->commit());
|
||||||
|
|
||||||
|
break;
|
||||||
|
} catch (Error& e) {
|
||||||
|
wait(tr->onError(e));
|
||||||
|
}
|
||||||
|
tries++;
|
||||||
|
if (tries > 10) {
|
||||||
|
// Give up on trying to kill the tss, it'll get another mismatch or a human will investigate
|
||||||
|
// eventually
|
||||||
|
TraceEvent("TSS_KillMismatchGaveUp").detail("TSSID", tssID.toString());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// clear out txn so that the extra DatabaseContext ref gets decref'd and we can free cx
|
||||||
|
tr = makeReference<ReadYourWritesTransaction>();
|
||||||
|
} else {
|
||||||
|
TEST(true); // Not killing TSS with mismatch because it's already gone
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ACTOR static Future<HealthMetrics> getHealthMetricsActor(DatabaseContext* cx, bool detailed) {
|
ACTOR static Future<HealthMetrics> getHealthMetricsActor(DatabaseContext* cx, bool detailed) {
|
||||||
if (now() - cx->healthMetricsLastUpdated < CLIENT_KNOBS->AGGREGATE_HEALTH_METRICS_MAX_STALENESS) {
|
if (now() - cx->healthMetricsLastUpdated < CLIENT_KNOBS->AGGREGATE_HEALTH_METRICS_MAX_STALENESS) {
|
||||||
if (detailed) {
|
if (detailed) {
|
||||||
|
@ -957,9 +1121,8 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionF
|
||||||
getValueSubmitted.init(LiteralStringRef("NativeAPI.GetValueSubmitted"));
|
getValueSubmitted.init(LiteralStringRef("NativeAPI.GetValueSubmitted"));
|
||||||
getValueCompleted.init(LiteralStringRef("NativeAPI.GetValueCompleted"));
|
getValueCompleted.init(LiteralStringRef("NativeAPI.GetValueCompleted"));
|
||||||
|
|
||||||
GlobalConfig::create(this, clientInfo);
|
|
||||||
|
|
||||||
monitorProxiesInfoChange = monitorProxiesChange(clientInfo, &proxiesChangeTrigger);
|
monitorProxiesInfoChange = monitorProxiesChange(clientInfo, &proxiesChangeTrigger);
|
||||||
|
tssMismatchHandler = handleTssMismatches(this);
|
||||||
clientStatusUpdater.actor = clientStatusUpdateActor(this);
|
clientStatusUpdater.actor = clientStatusUpdateActor(this);
|
||||||
cacheListMonitor = monitorCacheList(this);
|
cacheListMonitor = monitorCacheList(this);
|
||||||
|
|
||||||
|
@ -1051,14 +1214,16 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionF
|
||||||
SpecialKeySpace::IMPLTYPE::READWRITE,
|
SpecialKeySpace::IMPLTYPE::READWRITE,
|
||||||
std::make_unique<ClientProfilingImpl>(
|
std::make_unique<ClientProfilingImpl>(
|
||||||
KeyRangeRef(LiteralStringRef("profiling/"), LiteralStringRef("profiling0"))
|
KeyRangeRef(LiteralStringRef("profiling/"), LiteralStringRef("profiling0"))
|
||||||
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)));
|
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)));
|
||||||
registerSpecialKeySpaceModule(
|
registerSpecialKeySpaceModule(
|
||||||
SpecialKeySpace::MODULE::MANAGEMENT, SpecialKeySpace::IMPLTYPE::READWRITE,
|
SpecialKeySpace::MODULE::MANAGEMENT,
|
||||||
|
SpecialKeySpace::IMPLTYPE::READWRITE,
|
||||||
std::make_unique<MaintenanceImpl>(
|
std::make_unique<MaintenanceImpl>(
|
||||||
KeyRangeRef(LiteralStringRef("maintenance/"), LiteralStringRef("maintenance0"))
|
KeyRangeRef(LiteralStringRef("maintenance/"), LiteralStringRef("maintenance0"))
|
||||||
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)));
|
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)));
|
||||||
registerSpecialKeySpaceModule(
|
registerSpecialKeySpaceModule(
|
||||||
SpecialKeySpace::MODULE::MANAGEMENT, SpecialKeySpace::IMPLTYPE::READWRITE,
|
SpecialKeySpace::MODULE::MANAGEMENT,
|
||||||
|
SpecialKeySpace::IMPLTYPE::READWRITE,
|
||||||
std::make_unique<DataDistributionImpl>(
|
std::make_unique<DataDistributionImpl>(
|
||||||
KeyRangeRef(LiteralStringRef("data_distribution/"), LiteralStringRef("data_distribution0"))
|
KeyRangeRef(LiteralStringRef("data_distribution/"), LiteralStringRef("data_distribution0"))
|
||||||
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)));
|
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)));
|
||||||
|
@ -1199,6 +1364,8 @@ Database DatabaseContext::create(Reference<AsyncVar<ClientDBInfo>> clientInfo,
|
||||||
DatabaseContext::~DatabaseContext() {
|
DatabaseContext::~DatabaseContext() {
|
||||||
cacheListMonitor.cancel();
|
cacheListMonitor.cancel();
|
||||||
monitorProxiesInfoChange.cancel();
|
monitorProxiesInfoChange.cancel();
|
||||||
|
monitorTssInfoChange.cancel();
|
||||||
|
tssMismatchHandler.cancel();
|
||||||
for (auto it = server_interf.begin(); it != server_interf.end(); it = server_interf.erase(it))
|
for (auto it = server_interf.begin(); it != server_interf.end(); it = server_interf.erase(it))
|
||||||
it->second->notifyContextDestroyed();
|
it->second->notifyContextDestroyed();
|
||||||
ASSERT_ABORT(server_interf.empty());
|
ASSERT_ABORT(server_interf.empty());
|
||||||
|
@ -1553,7 +1720,9 @@ Database Database::createDatabase(Reference<ClusterConnectionFile> connFile,
|
||||||
/*switchable*/ true);
|
/*switchable*/ true);
|
||||||
}
|
}
|
||||||
|
|
||||||
return Database(db);
|
auto database = Database(db);
|
||||||
|
GlobalConfig::create(database, clientInfo, std::addressof(clientInfo->get()));
|
||||||
|
return database;
|
||||||
}
|
}
|
||||||
|
|
||||||
Database Database::createDatabase(std::string connFileName,
|
Database Database::createDatabase(std::string connFileName,
|
||||||
|
@ -2015,6 +2184,29 @@ ACTOR Future<Optional<vector<StorageServerInterface>>> transactionalGetServerInt
|
||||||
return serverInterfaces;
|
return serverInterfaces;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void updateTssMappings(Database cx, const GetKeyServerLocationsReply& reply) {
|
||||||
|
// Since a ss -> tss mapping is included in resultsTssMapping iff that SS is in results and has a tss pair,
|
||||||
|
// all SS in results that do not have a mapping present must not have a tss pair.
|
||||||
|
std::unordered_map<UID, const StorageServerInterface*> ssiById;
|
||||||
|
for (const auto& [_, shard] : reply.results) {
|
||||||
|
for (auto& ssi : shard) {
|
||||||
|
ssiById[ssi.id()] = &ssi;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto& mapping : reply.resultsTssMapping) {
|
||||||
|
auto ssi = ssiById.find(mapping.first);
|
||||||
|
ASSERT(ssi != ssiById.end());
|
||||||
|
cx->addTssMapping(*ssi->second, mapping.second);
|
||||||
|
ssiById.erase(mapping.first);
|
||||||
|
}
|
||||||
|
|
||||||
|
// if SS didn't have a mapping above, it's still in the ssiById map, so remove its tss mapping
|
||||||
|
for (const auto& it : ssiById) {
|
||||||
|
cx->removeTssMapping(*it.second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// If isBackward == true, returns the shard containing the key before 'key' (an infinitely long, inexpressible key).
|
// If isBackward == true, returns the shard containing the key before 'key' (an infinitely long, inexpressible key).
|
||||||
// Otherwise returns the shard containing key
|
// Otherwise returns the shard containing key
|
||||||
ACTOR Future<pair<KeyRange, Reference<LocationInfo>>> getKeyLocation_internal(Database cx,
|
ACTOR Future<pair<KeyRange, Reference<LocationInfo>>> getKeyLocation_internal(Database cx,
|
||||||
|
@ -2047,6 +2239,7 @@ ACTOR Future<pair<KeyRange, Reference<LocationInfo>>> getKeyLocation_internal(Da
|
||||||
ASSERT(rep.results.size() == 1);
|
ASSERT(rep.results.size() == 1);
|
||||||
|
|
||||||
auto locationInfo = cx->setCachedLocation(rep.results[0].first, rep.results[0].second);
|
auto locationInfo = cx->setCachedLocation(rep.results[0].first, rep.results[0].second);
|
||||||
|
updateTssMappings(cx, rep);
|
||||||
return std::make_pair(KeyRange(rep.results[0].first, rep.arena), locationInfo);
|
return std::make_pair(KeyRange(rep.results[0].first, rep.arena), locationInfo);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2110,6 +2303,7 @@ ACTOR Future<vector<pair<KeyRange, Reference<LocationInfo>>>> getKeyRangeLocatio
|
||||||
cx->setCachedLocation(rep.results[shard].first, rep.results[shard].second));
|
cx->setCachedLocation(rep.results[shard].first, rep.results[shard].second));
|
||||||
wait(yield());
|
wait(yield());
|
||||||
}
|
}
|
||||||
|
updateTssMappings(cx, rep);
|
||||||
|
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
@ -2235,7 +2429,7 @@ ACTOR Future<Optional<Value>> getValue(Future<Version> version,
|
||||||
|
|
||||||
state GetValueReply reply;
|
state GetValueReply reply;
|
||||||
try {
|
try {
|
||||||
if (CLIENT_BUGGIFY) {
|
if (CLIENT_BUGGIFY_WITH_PROB(.01)) {
|
||||||
throw deterministicRandom()->randomChoice(
|
throw deterministicRandom()->randomChoice(
|
||||||
std::vector<Error>{ transaction_too_old(), future_version() });
|
std::vector<Error>{ transaction_too_old(), future_version() });
|
||||||
}
|
}
|
||||||
|
@ -2345,6 +2539,11 @@ ACTOR Future<Key> getKey(Database cx, KeySelector k, Future<Version> version, Tr
|
||||||
"NativeAPI.getKey.Before"); //.detail("StartKey",
|
"NativeAPI.getKey.Before"); //.detail("StartKey",
|
||||||
// k.getKey()).detail("Offset",k.offset).detail("OrEqual",k.orEqual);
|
// k.getKey()).detail("Offset",k.offset).detail("OrEqual",k.orEqual);
|
||||||
++cx->transactionPhysicalReads;
|
++cx->transactionPhysicalReads;
|
||||||
|
|
||||||
|
GetKeyRequest req(
|
||||||
|
span.context, k, version.get(), cx->sampleReadTags() ? tags : Optional<TagSet>(), getKeyID);
|
||||||
|
req.arena.dependsOn(k.arena());
|
||||||
|
|
||||||
state GetKeyReply reply;
|
state GetKeyReply reply;
|
||||||
try {
|
try {
|
||||||
choose {
|
choose {
|
||||||
|
@ -2353,11 +2552,7 @@ ACTOR Future<Key> getKey(Database cx, KeySelector k, Future<Version> version, Tr
|
||||||
wait(loadBalance(cx.getPtr(),
|
wait(loadBalance(cx.getPtr(),
|
||||||
ssi.second,
|
ssi.second,
|
||||||
&StorageServerInterface::getKey,
|
&StorageServerInterface::getKey,
|
||||||
GetKeyRequest(span.context,
|
req,
|
||||||
k,
|
|
||||||
version.get(),
|
|
||||||
cx->sampleReadTags() ? tags : Optional<TagSet>(),
|
|
||||||
getKeyID),
|
|
||||||
TaskPriority::DefaultPromiseEndpoint,
|
TaskPriority::DefaultPromiseEndpoint,
|
||||||
false,
|
false,
|
||||||
cx->enableLocalityLoadBalance ? &cx->queueModel : nullptr))) {
|
cx->enableLocalityLoadBalance ? &cx->queueModel : nullptr))) {
|
||||||
|
@ -2718,6 +2913,9 @@ ACTOR Future<RangeResult> getExactRange(Database cx,
|
||||||
req.end = firstGreaterOrEqual(range.end);
|
req.end = firstGreaterOrEqual(range.end);
|
||||||
req.spanContext = span.context;
|
req.spanContext = span.context;
|
||||||
|
|
||||||
|
// keep shard's arena around in case of async tss comparison
|
||||||
|
req.arena.dependsOn(locations[shard].first.arena());
|
||||||
|
|
||||||
transformRangeLimits(limits, reverse, req);
|
transformRangeLimits(limits, reverse, req);
|
||||||
ASSERT(req.limitBytes > 0 && req.limit != 0 && req.limit < 0 == reverse);
|
ASSERT(req.limitBytes > 0 && req.limit != 0 && req.limit < 0 == reverse);
|
||||||
|
|
||||||
|
@ -3034,6 +3232,9 @@ ACTOR Future<RangeResult> getRange(Database cx,
|
||||||
req.isFetchKeys = (info.taskID == TaskPriority::FetchKeys);
|
req.isFetchKeys = (info.taskID == TaskPriority::FetchKeys);
|
||||||
req.version = readVersion;
|
req.version = readVersion;
|
||||||
|
|
||||||
|
// In case of async tss comparison, also make req arena depend on begin, end, and/or shard's arena depending
|
||||||
|
// on which is used
|
||||||
|
bool dependOnShard = false;
|
||||||
if (reverse && (begin - 1).isDefinitelyLess(shard.begin) &&
|
if (reverse && (begin - 1).isDefinitelyLess(shard.begin) &&
|
||||||
(!begin.isFirstGreaterOrEqual() ||
|
(!begin.isFirstGreaterOrEqual() ||
|
||||||
begin.getKey() != shard.begin)) { // In this case we would be setting modifiedSelectors to true, but
|
begin.getKey() != shard.begin)) { // In this case we would be setting modifiedSelectors to true, but
|
||||||
|
@ -3041,14 +3242,23 @@ ACTOR Future<RangeResult> getRange(Database cx,
|
||||||
|
|
||||||
req.begin = firstGreaterOrEqual(shard.begin);
|
req.begin = firstGreaterOrEqual(shard.begin);
|
||||||
modifiedSelectors = true;
|
modifiedSelectors = true;
|
||||||
} else
|
req.arena.dependsOn(shard.arena());
|
||||||
|
dependOnShard = true;
|
||||||
|
} else {
|
||||||
req.begin = begin;
|
req.begin = begin;
|
||||||
|
req.arena.dependsOn(begin.arena());
|
||||||
|
}
|
||||||
|
|
||||||
if (!reverse && end.isDefinitelyGreater(shard.end)) {
|
if (!reverse && end.isDefinitelyGreater(shard.end)) {
|
||||||
req.end = firstGreaterOrEqual(shard.end);
|
req.end = firstGreaterOrEqual(shard.end);
|
||||||
modifiedSelectors = true;
|
modifiedSelectors = true;
|
||||||
} else
|
if (!dependOnShard) {
|
||||||
|
req.arena.dependsOn(shard.arena());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
req.end = end;
|
req.end = end;
|
||||||
|
req.arena.dependsOn(end.arena());
|
||||||
|
}
|
||||||
|
|
||||||
transformRangeLimits(limits, reverse, req);
|
transformRangeLimits(limits, reverse, req);
|
||||||
ASSERT(req.limitBytes > 0 && req.limit != 0 && req.limit < 0 == reverse);
|
ASSERT(req.limitBytes > 0 && req.limit != 0 && req.limit < 0 == reverse);
|
||||||
|
@ -3078,7 +3288,7 @@ ACTOR Future<RangeResult> getRange(Database cx,
|
||||||
++cx->transactionPhysicalReads;
|
++cx->transactionPhysicalReads;
|
||||||
state GetKeyValuesReply rep;
|
state GetKeyValuesReply rep;
|
||||||
try {
|
try {
|
||||||
if (CLIENT_BUGGIFY) {
|
if (CLIENT_BUGGIFY_WITH_PROB(.01)) {
|
||||||
throw deterministicRandom()->randomChoice(
|
throw deterministicRandom()->randomChoice(
|
||||||
std::vector<Error>{ transaction_too_old(), future_version() });
|
std::vector<Error>{ transaction_too_old(), future_version() });
|
||||||
}
|
}
|
||||||
|
@ -3133,10 +3343,17 @@ ACTOR Future<RangeResult> getRange(Database cx,
|
||||||
output.readThroughEnd = readThroughEnd;
|
output.readThroughEnd = readThroughEnd;
|
||||||
|
|
||||||
if (BUGGIFY && limits.hasByteLimit() && output.size() > std::max(1, originalLimits.minRows)) {
|
if (BUGGIFY && limits.hasByteLimit() && output.size() > std::max(1, originalLimits.minRows)) {
|
||||||
|
// Copy instead of resizing because TSS maybe be using output's arena for comparison. This only
|
||||||
|
// happens in simulation so it's fine
|
||||||
|
RangeResult copy;
|
||||||
|
int newSize =
|
||||||
|
deterministicRandom()->randomInt(std::max(1, originalLimits.minRows), output.size());
|
||||||
|
for (int i = 0; i < newSize; i++) {
|
||||||
|
copy.push_back_deep(copy.arena(), output[i]);
|
||||||
|
}
|
||||||
|
output = copy;
|
||||||
output.more = true;
|
output.more = true;
|
||||||
output.resize(
|
|
||||||
output.arena(),
|
|
||||||
deterministicRandom()->randomInt(std::max(1, originalLimits.minRows), output.size()));
|
|
||||||
getRangeFinished(cx,
|
getRangeFinished(cx,
|
||||||
trLogInfo,
|
trLogInfo,
|
||||||
startTime,
|
startTime,
|
||||||
|
@ -5659,3 +5876,23 @@ Future<Void> DatabaseContext::createSnapshot(StringRef uid, StringRef snapshot_c
|
||||||
}
|
}
|
||||||
return createSnapshotActor(this, UID::fromString(uid_str), snapshot_command);
|
return createSnapshotActor(this, UID::fromString(uid_str), snapshot_command);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ACTOR Future<Void> setPerpetualStorageWiggle(Database cx, bool enable, bool lock_aware) {
|
||||||
|
state ReadYourWritesTransaction tr(cx);
|
||||||
|
loop {
|
||||||
|
try {
|
||||||
|
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||||
|
if(lock_aware) {
|
||||||
|
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||||
|
}
|
||||||
|
|
||||||
|
tr.set(perpetualStorageWiggleKey, enable ? LiteralStringRef("1") : LiteralStringRef("0"));
|
||||||
|
wait(tr.commit());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
catch (Error& e) {
|
||||||
|
wait(tr.onError(e));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Void();
|
||||||
|
}
|
|
@ -407,5 +407,10 @@ ACTOR Future<bool> checkSafeExclusions(Database cx, vector<AddressExclusion> exc
|
||||||
inline uint64_t getWriteOperationCost(uint64_t bytes) {
|
inline uint64_t getWriteOperationCost(uint64_t bytes) {
|
||||||
return bytes / std::max(1, CLIENT_KNOBS->WRITE_COST_BYTE_FACTOR) + 1;
|
return bytes / std::max(1, CLIENT_KNOBS->WRITE_COST_BYTE_FACTOR) + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Create a transaction to set the value of system key \xff/conf/perpetual_storage_wiggle. If enable == true, the value
|
||||||
|
// will be 1. Otherwise, the value will be 0.
|
||||||
|
ACTOR Future<Void> setPerpetualStorageWiggle(Database cx, bool enable, bool lock_aware = false);
|
||||||
|
|
||||||
#include "flow/unactorcompiler.h"
|
#include "flow/unactorcompiler.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -0,0 +1,56 @@
|
||||||
|
/*
|
||||||
|
* RestoreInterface.h
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2021 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "fdbclient/RestoreInterface.h"
|
||||||
|
#include "flow/serialize.h"
|
||||||
|
|
||||||
|
const KeyRef restoreRequestDoneKey = "\xff\x02/restoreRequestDone"_sr;
|
||||||
|
const KeyRef restoreRequestTriggerKey = "\xff\x02/restoreRequestTrigger"_sr;
|
||||||
|
const KeyRangeRef restoreRequestKeys("\xff\x02/restoreRequests/"_sr, "\xff\x02/restoreRequests0"_sr);
|
||||||
|
|
||||||
|
// Encode and decode restore request value
|
||||||
|
Value restoreRequestTriggerValue(UID randomID, int numRequests) {
|
||||||
|
BinaryWriter wr(IncludeVersion(ProtocolVersion::withRestoreRequestTriggerValue()));
|
||||||
|
wr << numRequests;
|
||||||
|
wr << randomID;
|
||||||
|
return wr.toValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
int decodeRestoreRequestTriggerValue(ValueRef const& value) {
|
||||||
|
int s;
|
||||||
|
UID randomID;
|
||||||
|
BinaryReader reader(value, IncludeVersion());
|
||||||
|
reader >> s;
|
||||||
|
reader >> randomID;
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
Key restoreRequestKeyFor(int index) {
|
||||||
|
BinaryWriter wr(Unversioned());
|
||||||
|
wr.serializeBytes(restoreRequestKeys.begin);
|
||||||
|
wr << index;
|
||||||
|
return wr.toValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
Value restoreRequestValue(RestoreRequest const& request) {
|
||||||
|
BinaryWriter wr(IncludeVersion(ProtocolVersion::withRestoreRequestValue()));
|
||||||
|
wr << request;
|
||||||
|
return wr.toValue();
|
||||||
|
}
|
|
@ -0,0 +1,99 @@
|
||||||
|
/*
|
||||||
|
* RestoreInterface.h
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2021 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "fdbclient/FDBTypes.h"
|
||||||
|
#include "fdbrpc/fdbrpc.h"
|
||||||
|
|
||||||
|
struct RestoreCommonReply {
|
||||||
|
constexpr static FileIdentifier file_identifier = 5808787;
|
||||||
|
UID id; // unique ID of the server who sends the reply
|
||||||
|
bool isDuplicated;
|
||||||
|
|
||||||
|
RestoreCommonReply() = default;
|
||||||
|
explicit RestoreCommonReply(UID id, bool isDuplicated = false) : id(id), isDuplicated(isDuplicated) {}
|
||||||
|
|
||||||
|
std::string toString() const {
|
||||||
|
std::stringstream ss;
|
||||||
|
ss << "ServerNodeID:" << id.toString() << " isDuplicated:" << isDuplicated;
|
||||||
|
return ss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Ar>
|
||||||
|
void serialize(Ar& ar) {
|
||||||
|
serializer(ar, id, isDuplicated);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct RestoreRequest {
|
||||||
|
constexpr static FileIdentifier file_identifier = 16035338;
|
||||||
|
|
||||||
|
int index;
|
||||||
|
Key tagName;
|
||||||
|
Key url;
|
||||||
|
Version targetVersion;
|
||||||
|
KeyRange range;
|
||||||
|
UID randomUid;
|
||||||
|
|
||||||
|
// Every key in backup will first removePrefix and then addPrefix;
|
||||||
|
// Simulation testing does not cover when both addPrefix and removePrefix exist yet.
|
||||||
|
Key addPrefix;
|
||||||
|
Key removePrefix;
|
||||||
|
|
||||||
|
ReplyPromise<struct RestoreCommonReply> reply;
|
||||||
|
|
||||||
|
RestoreRequest() = default;
|
||||||
|
explicit RestoreRequest(const int index,
|
||||||
|
const Key& tagName,
|
||||||
|
const Key& url,
|
||||||
|
Version targetVersion,
|
||||||
|
const KeyRange& range,
|
||||||
|
const UID& randomUid,
|
||||||
|
Key& addPrefix,
|
||||||
|
Key removePrefix)
|
||||||
|
: index(index), tagName(tagName), url(url), targetVersion(targetVersion), range(range), randomUid(randomUid),
|
||||||
|
addPrefix(addPrefix), removePrefix(removePrefix) {}
|
||||||
|
|
||||||
|
// To change this serialization, ProtocolVersion::RestoreRequestValue must be updated, and downgrades need to be
|
||||||
|
// considered
|
||||||
|
template <class Ar>
|
||||||
|
void serialize(Ar& ar) {
|
||||||
|
serializer(ar, index, tagName, url, targetVersion, range, randomUid, addPrefix, removePrefix, reply);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string toString() const {
|
||||||
|
std::stringstream ss;
|
||||||
|
ss << "index:" << std::to_string(index) << " tagName:" << tagName.contents().toString()
|
||||||
|
<< " url:" << url.contents().toString() << " targetVersion:" << std::to_string(targetVersion)
|
||||||
|
<< " range:" << range.toString() << " randomUid:" << randomUid.toString()
|
||||||
|
<< " addPrefix:" << addPrefix.toString() << " removePrefix:" << removePrefix.toString();
|
||||||
|
return ss.str();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
extern const KeyRef restoreRequestDoneKey;
|
||||||
|
extern const KeyRef restoreRequestTriggerKey;
|
||||||
|
extern const KeyRangeRef restoreRequestKeys;
|
||||||
|
|
||||||
|
Value restoreRequestTriggerValue(UID randomID, int numRequests);
|
||||||
|
int decodeRequestRequestTriggerValue(ValueRef const&);
|
||||||
|
Key restoreRequestKeyFor(int index);
|
||||||
|
Value restoreRequestValue(RestoreRequest const&);
|
|
@ -144,6 +144,16 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
|
||||||
"counter":0,
|
"counter":0,
|
||||||
"roughness":0.0
|
"roughness":0.0
|
||||||
},
|
},
|
||||||
|
"fetched_versions":{
|
||||||
|
"hz":0.0,
|
||||||
|
"counter":0,
|
||||||
|
"roughness":0.0
|
||||||
|
},
|
||||||
|
"fetches_from_logs":{
|
||||||
|
"hz":0.0,
|
||||||
|
"counter":0,
|
||||||
|
"roughness":0.0
|
||||||
|
},
|
||||||
"grv_latency_statistics":{
|
"grv_latency_statistics":{
|
||||||
"default":{
|
"default":{
|
||||||
"count":0,
|
"count":0,
|
||||||
|
@ -421,6 +431,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
|
||||||
"seconds" : 1.0,
|
"seconds" : 1.0,
|
||||||
"versions" : 1000000
|
"versions" : 1000000
|
||||||
},
|
},
|
||||||
|
"active_tss_count":0,
|
||||||
"degraded_processes":0,
|
"degraded_processes":0,
|
||||||
"database_available":true,
|
"database_available":true,
|
||||||
"database_lock_state": {
|
"database_lock_state": {
|
||||||
|
@ -648,6 +659,10 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
|
||||||
"data_distribution_disabled_for_rebalance":true,
|
"data_distribution_disabled_for_rebalance":true,
|
||||||
"data_distribution_disabled":true,
|
"data_distribution_disabled":true,
|
||||||
"active_primary_dc":"pv",
|
"active_primary_dc":"pv",
|
||||||
|
"bounce_impact":{
|
||||||
|
"can_clean_bounce":true,
|
||||||
|
"reason":""
|
||||||
|
},
|
||||||
"configuration":{
|
"configuration":{
|
||||||
"log_anti_quorum":0,
|
"log_anti_quorum":0,
|
||||||
"log_replicas":2,
|
"log_replicas":2,
|
||||||
|
@ -715,6 +730,19 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
|
||||||
"memory-2",
|
"memory-2",
|
||||||
"memory-radixtree-beta"
|
"memory-radixtree-beta"
|
||||||
]},
|
]},
|
||||||
|
"tss_count":1,
|
||||||
|
"tss_storage_engine":{
|
||||||
|
"$enum":[
|
||||||
|
"ssd",
|
||||||
|
"ssd-1",
|
||||||
|
"ssd-2",
|
||||||
|
"ssd-redwood-experimental",
|
||||||
|
"ssd-rocksdb-experimental",
|
||||||
|
"memory",
|
||||||
|
"memory-1",
|
||||||
|
"memory-2",
|
||||||
|
"memory-radixtree-beta"
|
||||||
|
]},
|
||||||
"coordinators_count":1,
|
"coordinators_count":1,
|
||||||
"excluded_servers":[
|
"excluded_servers":[
|
||||||
{
|
{
|
||||||
|
@ -727,7 +755,8 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
|
||||||
"auto_logs":3,
|
"auto_logs":3,
|
||||||
"commit_proxies":5,
|
"commit_proxies":5,
|
||||||
"grv_proxies":1,
|
"grv_proxies":1,
|
||||||
"backup_worker_enabled":1
|
"backup_worker_enabled":1,
|
||||||
|
"perpetual_storage_wiggle":0
|
||||||
},
|
},
|
||||||
"data":{
|
"data":{
|
||||||
"least_operating_space_bytes_log_server":0,
|
"least_operating_space_bytes_log_server":0,
|
||||||
|
@ -787,7 +816,8 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"least_operating_space_bytes_storage_server":0
|
"least_operating_space_bytes_storage_server":0,
|
||||||
|
"max_machine_failures_without_losing_data":0
|
||||||
},
|
},
|
||||||
"machines":{
|
"machines":{
|
||||||
"$map":{
|
"$map":{
|
||||||
|
|
|
@ -1384,6 +1384,9 @@ Future<RangeResult> GlobalConfigImpl::getRange(ReadYourWritesTransaction* ryw, K
|
||||||
} else if (config->value.type() == typeid(int64_t)) {
|
} else if (config->value.type() == typeid(int64_t)) {
|
||||||
result.push_back_deep(result.arena(),
|
result.push_back_deep(result.arena(),
|
||||||
KeyValueRef(prefixedKey, std::to_string(std::any_cast<int64_t>(config->value))));
|
KeyValueRef(prefixedKey, std::to_string(std::any_cast<int64_t>(config->value))));
|
||||||
|
} else if (config->value.type() == typeid(bool)) {
|
||||||
|
result.push_back_deep(result.arena(),
|
||||||
|
KeyValueRef(prefixedKey, std::to_string(std::any_cast<bool>(config->value))));
|
||||||
} else if (config->value.type() == typeid(float)) {
|
} else if (config->value.type() == typeid(float)) {
|
||||||
result.push_back_deep(result.arena(),
|
result.push_back_deep(result.arena(),
|
||||||
KeyValueRef(prefixedKey, std::to_string(std::any_cast<float>(config->value))));
|
KeyValueRef(prefixedKey, std::to_string(std::any_cast<float>(config->value))));
|
||||||
|
@ -2058,9 +2061,20 @@ Future<Optional<std::string>> DataDistributionImpl::commit(ReadYourWritesTransac
|
||||||
try {
|
try {
|
||||||
int mode = boost::lexical_cast<int>(iter->value().second.get().toString());
|
int mode = boost::lexical_cast<int>(iter->value().second.get().toString());
|
||||||
Value modeVal = BinaryWriter::toValue(mode, Unversioned());
|
Value modeVal = BinaryWriter::toValue(mode, Unversioned());
|
||||||
if (mode == 0 || mode == 1)
|
if (mode == 0 || mode == 1) {
|
||||||
|
// Whenever configuration changes or DD related system keyspace is changed,
|
||||||
|
// actor must grab the moveKeysLockOwnerKey and update moveKeysLockWriteKey.
|
||||||
|
// This prevents concurrent write to the same system keyspace.
|
||||||
|
// When the owner of the DD related system keyspace changes, DD will reboot
|
||||||
|
BinaryWriter wrMyOwner(Unversioned());
|
||||||
|
wrMyOwner << dataDistributionModeLock;
|
||||||
|
ryw->getTransaction().set(moveKeysLockOwnerKey, wrMyOwner.toValue());
|
||||||
|
BinaryWriter wrLastWrite(Unversioned());
|
||||||
|
wrLastWrite << deterministicRandom()->randomUniqueID();
|
||||||
|
ryw->getTransaction().set(moveKeysLockWriteKey, wrLastWrite.toValue());
|
||||||
|
// set mode
|
||||||
ryw->getTransaction().set(dataDistributionModeKey, modeVal);
|
ryw->getTransaction().set(dataDistributionModeKey, modeVal);
|
||||||
else
|
} else
|
||||||
msg = ManagementAPIError::toJsonString(false,
|
msg = ManagementAPIError::toJsonString(false,
|
||||||
"datadistribution",
|
"datadistribution",
|
||||||
"Please set the value of the data_distribution/mode to "
|
"Please set the value of the data_distribution/mode to "
|
||||||
|
|
|
@ -0,0 +1,385 @@
|
||||||
|
/*
|
||||||
|
* StorageServerInterface.cpp
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "fdbclient/StorageServerInterface.h"
|
||||||
|
#include "flow/crc32c.h" // for crc32c_append, to checksum values in tss trace events
|
||||||
|
|
||||||
|
// Includes template specializations for all tss operations on storage server types.
|
||||||
|
// New StorageServerInterface reply types must be added here or it won't compile.
|
||||||
|
|
||||||
|
// if size + hex of checksum is shorter than value, record that instead of actual value. break-even point is 12
|
||||||
|
// characters
|
||||||
|
std::string traceChecksumValue(ValueRef s) {
|
||||||
|
return s.size() > 12 ? format("(%d)%08x", s.size(), crc32c_append(0, s.begin(), s.size())) : s.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
bool TSS_doCompare(const GetValueRequest& req,
|
||||||
|
const GetValueReply& src,
|
||||||
|
const GetValueReply& tss,
|
||||||
|
Severity traceSeverity,
|
||||||
|
UID tssId) {
|
||||||
|
if (src.value.present() != tss.value.present() || (src.value.present() && src.value.get() != tss.value.get())) {
|
||||||
|
TraceEvent(traceSeverity, "TSSMismatchGetValue")
|
||||||
|
.suppressFor(1.0)
|
||||||
|
.detail("TSSID", tssId)
|
||||||
|
.detail("Key", req.key.printable())
|
||||||
|
.detail("Version", req.version)
|
||||||
|
.detail("SSReply", src.value.present() ? traceChecksumValue(src.value.get()) : "missing")
|
||||||
|
.detail("TSSReply", tss.value.present() ? traceChecksumValue(tss.value.get()) : "missing");
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
bool TSS_doCompare(const GetKeyRequest& req,
|
||||||
|
const GetKeyReply& src,
|
||||||
|
const GetKeyReply& tss,
|
||||||
|
Severity traceSeverity,
|
||||||
|
UID tssId) {
|
||||||
|
// This process is a bit complicated. Since the tss and ss can return different results if neighboring shards to
|
||||||
|
// req.sel.key are currently being moved, We validate that the results are the same IF the returned key selectors
|
||||||
|
// are final. Otherwise, we only mark the request as a mismatch if the difference between the two returned key
|
||||||
|
// selectors could ONLY be because of different results from the storage engines. We can afford to only partially
|
||||||
|
// check key selectors that start in a TSS shard and end in a non-TSS shard because the other read queries and the
|
||||||
|
// consistency check will eventually catch a misbehaving storage engine.
|
||||||
|
bool matches = true;
|
||||||
|
if (src.sel.orEqual == tss.sel.orEqual && src.sel.offset == tss.sel.offset) {
|
||||||
|
// full matching case
|
||||||
|
if (src.sel.offset == 0 && src.sel.orEqual) {
|
||||||
|
// found exact key, should be identical
|
||||||
|
matches = src.sel.getKey() == tss.sel.getKey();
|
||||||
|
}
|
||||||
|
// if the query doesn't return the final key, there is an edge case where the ss and tss have different shard
|
||||||
|
// boundaries, so they pass different shard boundary keys back for the same offset
|
||||||
|
} else if (src.sel.getKey() == tss.sel.getKey()) {
|
||||||
|
// There is one case with a positive offset where the shard boundary the incomplete query stopped at is the next
|
||||||
|
// key in the shard that the complete query returned. This is not possible with a negative offset because the
|
||||||
|
// shard boundary is exclusive backwards
|
||||||
|
if (src.sel.offset == 0 && src.sel.orEqual && tss.sel.offset == 1 && !tss.sel.orEqual) {
|
||||||
|
// case where ss was complete and tss was incomplete
|
||||||
|
} else if (tss.sel.offset == 0 && tss.sel.orEqual && src.sel.offset == 1 && !src.sel.orEqual) {
|
||||||
|
// case where tss was complete and ss was incomplete
|
||||||
|
} else {
|
||||||
|
matches = false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// ss/tss returned different keys, and different offsets and/or orEqual
|
||||||
|
// here we just validate that ordering of the keys matches the ordering of the offsets
|
||||||
|
bool tssKeyLarger = src.sel.getKey() < tss.sel.getKey();
|
||||||
|
// the only case offsets are equal and orEqual aren't equal is the case with a negative offset,
|
||||||
|
// where one response has <=0 with the actual result and the other has <0 with the shard upper boundary.
|
||||||
|
// So whichever one has the actual result should have the lower key.
|
||||||
|
bool tssOffsetLarger = (src.sel.offset == tss.sel.offset) ? tss.sel.orEqual : src.sel.offset < tss.sel.offset;
|
||||||
|
matches = tssKeyLarger != tssOffsetLarger;
|
||||||
|
}
|
||||||
|
if (!matches) {
|
||||||
|
TraceEvent(traceSeverity, "TSSMismatchGetKey")
|
||||||
|
.suppressFor(1.0)
|
||||||
|
.detail("TSSID", tssId)
|
||||||
|
.detail("KeySelector",
|
||||||
|
format("%s%s:%d", req.sel.orEqual ? "=" : "", req.sel.getKey().printable().c_str(), req.sel.offset))
|
||||||
|
.detail("Version", req.version)
|
||||||
|
.detail("SSReply",
|
||||||
|
format("%s%s:%d", src.sel.orEqual ? "=" : "", src.sel.getKey().printable().c_str(), src.sel.offset))
|
||||||
|
.detail(
|
||||||
|
"TSSReply",
|
||||||
|
format("%s%s:%d", tss.sel.orEqual ? "=" : "", tss.sel.getKey().printable().c_str(), tss.sel.offset));
|
||||||
|
}
|
||||||
|
return matches;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
bool TSS_doCompare(const GetKeyValuesRequest& req,
|
||||||
|
const GetKeyValuesReply& src,
|
||||||
|
const GetKeyValuesReply& tss,
|
||||||
|
Severity traceSeverity,
|
||||||
|
UID tssId) {
|
||||||
|
if (src.more != tss.more || src.data != tss.data) {
|
||||||
|
|
||||||
|
std::string ssResultsString = format("(%d)%s:\n", src.data.size(), src.more ? "+" : "");
|
||||||
|
for (auto& it : src.data) {
|
||||||
|
ssResultsString += "\n" + it.key.printable() + "=" + traceChecksumValue(it.value);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string tssResultsString = format("(%d)%s:\n", tss.data.size(), tss.more ? "+" : "");
|
||||||
|
for (auto& it : tss.data) {
|
||||||
|
tssResultsString += "\n" + it.key.printable() + "=" + traceChecksumValue(it.value);
|
||||||
|
}
|
||||||
|
|
||||||
|
TraceEvent(traceSeverity, "TSSMismatchGetKeyValues")
|
||||||
|
.suppressFor(1.0)
|
||||||
|
.detail("TSSID", tssId)
|
||||||
|
.detail(
|
||||||
|
"Begin",
|
||||||
|
format(
|
||||||
|
"%s%s:%d", req.begin.orEqual ? "=" : "", req.begin.getKey().printable().c_str(), req.begin.offset))
|
||||||
|
.detail("End",
|
||||||
|
format("%s%s:%d", req.end.orEqual ? "=" : "", req.end.getKey().printable().c_str(), req.end.offset))
|
||||||
|
.detail("Version", req.version)
|
||||||
|
.detail("Limit", req.limit)
|
||||||
|
.detail("LimitBytes", req.limitBytes)
|
||||||
|
.detail("SSReply", ssResultsString)
|
||||||
|
.detail("TSSReply", tssResultsString);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
bool TSS_doCompare(const WatchValueRequest& req,
|
||||||
|
const WatchValueReply& src,
|
||||||
|
const WatchValueReply& tss,
|
||||||
|
Severity traceSeverity,
|
||||||
|
UID tssId) {
|
||||||
|
// We duplicate watches just for load, no need to validte replies.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// no-op template specializations for metrics replies
|
||||||
|
template <>
|
||||||
|
bool TSS_doCompare(const WaitMetricsRequest& req,
|
||||||
|
const StorageMetrics& src,
|
||||||
|
const StorageMetrics& tss,
|
||||||
|
Severity traceSeverity,
|
||||||
|
UID tssId) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
bool TSS_doCompare(const SplitMetricsRequest& req,
|
||||||
|
const SplitMetricsReply& src,
|
||||||
|
const SplitMetricsReply& tss,
|
||||||
|
Severity traceSeverity,
|
||||||
|
UID tssId) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
bool TSS_doCompare(const ReadHotSubRangeRequest& req,
|
||||||
|
const ReadHotSubRangeReply& src,
|
||||||
|
const ReadHotSubRangeReply& tss,
|
||||||
|
Severity traceSeverity,
|
||||||
|
UID tssId) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
bool TSS_doCompare(const SplitRangeRequest& req,
|
||||||
|
const SplitRangeReply& src,
|
||||||
|
const SplitRangeReply& tss,
|
||||||
|
Severity traceSeverity,
|
||||||
|
UID tssId) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// only record metrics for data reads
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void TSSMetrics::recordLatency(const GetValueRequest& req, double ssLatency, double tssLatency) {
|
||||||
|
SSgetValueLatency.addSample(ssLatency);
|
||||||
|
TSSgetValueLatency.addSample(tssLatency);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void TSSMetrics::recordLatency(const GetKeyRequest& req, double ssLatency, double tssLatency) {
|
||||||
|
SSgetKeyLatency.addSample(ssLatency);
|
||||||
|
TSSgetKeyLatency.addSample(tssLatency);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void TSSMetrics::recordLatency(const GetKeyValuesRequest& req, double ssLatency, double tssLatency) {
|
||||||
|
SSgetKeyValuesLatency.addSample(ssLatency);
|
||||||
|
TSSgetKeyValuesLatency.addSample(tssLatency);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void TSSMetrics::recordLatency(const WatchValueRequest& req, double ssLatency, double tssLatency) {}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void TSSMetrics::recordLatency(const WaitMetricsRequest& req, double ssLatency, double tssLatency) {}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void TSSMetrics::recordLatency(const SplitMetricsRequest& req, double ssLatency, double tssLatency) {}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void TSSMetrics::recordLatency(const ReadHotSubRangeRequest& req, double ssLatency, double tssLatency) {}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void TSSMetrics::recordLatency(const SplitRangeRequest& req, double ssLatency, double tssLatency) {}
|
||||||
|
|
||||||
|
// -------------------
|
||||||
|
|
||||||
|
TEST_CASE("/StorageServerInterface/TSSCompare/TestComparison") {
|
||||||
|
printf("testing tss comparisons\n");
|
||||||
|
|
||||||
|
// to avoid compiler issues that StringRef(char* is deprecated)
|
||||||
|
std::string s_a = "a";
|
||||||
|
std::string s_b = "b";
|
||||||
|
std::string s_c = "c";
|
||||||
|
std::string s_d = "d";
|
||||||
|
std::string s_e = "e";
|
||||||
|
|
||||||
|
// test getValue
|
||||||
|
GetValueRequest gvReq;
|
||||||
|
gvReq.key = StringRef(s_a);
|
||||||
|
gvReq.version = 5;
|
||||||
|
|
||||||
|
UID tssId;
|
||||||
|
|
||||||
|
GetValueReply gvReplyMissing;
|
||||||
|
GetValueReply gvReplyA(Optional<Value>(StringRef(s_a)), false);
|
||||||
|
GetValueReply gvReplyB(Optional<Value>(StringRef(s_b)), false);
|
||||||
|
ASSERT(TSS_doCompare(gvReq, gvReplyMissing, gvReplyMissing, SevInfo, tssId));
|
||||||
|
ASSERT(TSS_doCompare(gvReq, gvReplyA, gvReplyA, SevInfo, tssId));
|
||||||
|
ASSERT(TSS_doCompare(gvReq, gvReplyB, gvReplyB, SevInfo, tssId));
|
||||||
|
|
||||||
|
ASSERT(!TSS_doCompare(gvReq, gvReplyMissing, gvReplyA, SevInfo, tssId));
|
||||||
|
ASSERT(!TSS_doCompare(gvReq, gvReplyA, gvReplyB, SevInfo, tssId));
|
||||||
|
|
||||||
|
// test GetKeyValues
|
||||||
|
Arena a; // for all of the refs. ASAN complains if this isn't done. Could also make them all standalone i guess
|
||||||
|
GetKeyValuesRequest gkvReq;
|
||||||
|
gkvReq.begin = firstGreaterOrEqual(StringRef(a, s_a));
|
||||||
|
gkvReq.end = firstGreaterOrEqual(StringRef(a, s_b));
|
||||||
|
gkvReq.version = 5;
|
||||||
|
gkvReq.limit = 100;
|
||||||
|
gkvReq.limitBytes = 1000;
|
||||||
|
|
||||||
|
GetKeyValuesReply gkvReplyEmpty;
|
||||||
|
GetKeyValuesReply gkvReplyOne;
|
||||||
|
KeyValueRef v;
|
||||||
|
v.key = StringRef(a, s_a);
|
||||||
|
v.value = StringRef(a, s_b);
|
||||||
|
gkvReplyOne.data.push_back_deep(gkvReplyOne.arena, v);
|
||||||
|
GetKeyValuesReply gkvReplyOneMore;
|
||||||
|
gkvReplyOneMore.data.push_back_deep(gkvReplyOneMore.arena, v);
|
||||||
|
gkvReplyOneMore.more = true;
|
||||||
|
|
||||||
|
ASSERT(TSS_doCompare(gkvReq, gkvReplyEmpty, gkvReplyEmpty, SevInfo, tssId));
|
||||||
|
ASSERT(TSS_doCompare(gkvReq, gkvReplyOne, gkvReplyOne, SevInfo, tssId));
|
||||||
|
ASSERT(TSS_doCompare(gkvReq, gkvReplyOneMore, gkvReplyOneMore, SevInfo, tssId));
|
||||||
|
ASSERT(!TSS_doCompare(gkvReq, gkvReplyEmpty, gkvReplyOne, SevInfo, tssId));
|
||||||
|
ASSERT(!TSS_doCompare(gkvReq, gkvReplyOne, gkvReplyOneMore, SevInfo, tssId));
|
||||||
|
|
||||||
|
// test GetKey
|
||||||
|
GetKeyRequest gkReq;
|
||||||
|
gkReq.sel = KeySelectorRef(StringRef(a, s_a), false, 1);
|
||||||
|
gkReq.version = 5;
|
||||||
|
|
||||||
|
GetKeyReply gkReplyA(KeySelectorRef(StringRef(a, s_a), false, 20), false);
|
||||||
|
GetKeyReply gkReplyB(KeySelectorRef(StringRef(a, s_b), false, 10), false);
|
||||||
|
GetKeyReply gkReplyC(KeySelectorRef(StringRef(a, s_c), true, 0), false);
|
||||||
|
GetKeyReply gkReplyD(KeySelectorRef(StringRef(a, s_d), false, -10), false);
|
||||||
|
GetKeyReply gkReplyE(KeySelectorRef(StringRef(a, s_e), false, -20), false);
|
||||||
|
|
||||||
|
// identical cases
|
||||||
|
ASSERT(TSS_doCompare(gkReq, gkReplyA, gkReplyA, SevInfo, tssId));
|
||||||
|
ASSERT(TSS_doCompare(gkReq, gkReplyB, gkReplyB, SevInfo, tssId));
|
||||||
|
ASSERT(TSS_doCompare(gkReq, gkReplyC, gkReplyC, SevInfo, tssId));
|
||||||
|
ASSERT(TSS_doCompare(gkReq, gkReplyD, gkReplyD, SevInfo, tssId));
|
||||||
|
ASSERT(TSS_doCompare(gkReq, gkReplyE, gkReplyE, SevInfo, tssId));
|
||||||
|
|
||||||
|
// relative offset cases
|
||||||
|
ASSERT(TSS_doCompare(gkReq, gkReplyA, gkReplyB, SevInfo, tssId));
|
||||||
|
ASSERT(TSS_doCompare(gkReq, gkReplyB, gkReplyA, SevInfo, tssId));
|
||||||
|
ASSERT(TSS_doCompare(gkReq, gkReplyA, gkReplyC, SevInfo, tssId));
|
||||||
|
ASSERT(TSS_doCompare(gkReq, gkReplyC, gkReplyA, SevInfo, tssId));
|
||||||
|
ASSERT(TSS_doCompare(gkReq, gkReplyB, gkReplyC, SevInfo, tssId));
|
||||||
|
ASSERT(TSS_doCompare(gkReq, gkReplyC, gkReplyB, SevInfo, tssId));
|
||||||
|
|
||||||
|
ASSERT(TSS_doCompare(gkReq, gkReplyC, gkReplyD, SevInfo, tssId));
|
||||||
|
ASSERT(TSS_doCompare(gkReq, gkReplyD, gkReplyC, SevInfo, tssId));
|
||||||
|
ASSERT(TSS_doCompare(gkReq, gkReplyC, gkReplyE, SevInfo, tssId));
|
||||||
|
ASSERT(TSS_doCompare(gkReq, gkReplyE, gkReplyC, SevInfo, tssId));
|
||||||
|
ASSERT(TSS_doCompare(gkReq, gkReplyD, gkReplyE, SevInfo, tssId));
|
||||||
|
ASSERT(TSS_doCompare(gkReq, gkReplyE, gkReplyD, SevInfo, tssId));
|
||||||
|
|
||||||
|
// test same offset/orEqual wrong key
|
||||||
|
ASSERT(!TSS_doCompare(gkReq,
|
||||||
|
GetKeyReply(KeySelectorRef(StringRef(a, s_a), true, 0), false),
|
||||||
|
GetKeyReply(KeySelectorRef(StringRef(a, s_b), true, 0), false),
|
||||||
|
SevInfo,
|
||||||
|
tssId));
|
||||||
|
// this could be from different shard boundaries, so don't say it's a mismatch
|
||||||
|
ASSERT(TSS_doCompare(gkReq,
|
||||||
|
GetKeyReply(KeySelectorRef(StringRef(a, s_a), false, 10), false),
|
||||||
|
GetKeyReply(KeySelectorRef(StringRef(a, s_b), false, 10), false),
|
||||||
|
SevInfo,
|
||||||
|
tssId));
|
||||||
|
|
||||||
|
// test offsets and key difference don't match
|
||||||
|
ASSERT(!TSS_doCompare(gkReq,
|
||||||
|
GetKeyReply(KeySelectorRef(StringRef(a, s_a), false, 0), false),
|
||||||
|
GetKeyReply(KeySelectorRef(StringRef(a, s_b), false, 10), false),
|
||||||
|
SevInfo,
|
||||||
|
tssId));
|
||||||
|
ASSERT(!TSS_doCompare(gkReq,
|
||||||
|
GetKeyReply(KeySelectorRef(StringRef(a, s_a), false, -10), false),
|
||||||
|
GetKeyReply(KeySelectorRef(StringRef(a, s_b), false, 0), false),
|
||||||
|
SevInfo,
|
||||||
|
tssId));
|
||||||
|
|
||||||
|
// test key is next over in one shard, one found it and other didn't
|
||||||
|
// positive
|
||||||
|
// one that didn't find is +1
|
||||||
|
ASSERT(TSS_doCompare(gkReq,
|
||||||
|
GetKeyReply(KeySelectorRef(StringRef(a, s_a), false, 1), false),
|
||||||
|
GetKeyReply(KeySelectorRef(StringRef(a, s_b), true, 0), false),
|
||||||
|
SevInfo,
|
||||||
|
tssId));
|
||||||
|
ASSERT(!TSS_doCompare(gkReq,
|
||||||
|
GetKeyReply(KeySelectorRef(StringRef(a, s_a), true, 0), false),
|
||||||
|
GetKeyReply(KeySelectorRef(StringRef(a, s_b), false, 1), false),
|
||||||
|
SevInfo,
|
||||||
|
tssId));
|
||||||
|
|
||||||
|
// negative will have zero offset but not equal set
|
||||||
|
ASSERT(TSS_doCompare(gkReq,
|
||||||
|
GetKeyReply(KeySelectorRef(StringRef(a, s_a), true, 0), false),
|
||||||
|
GetKeyReply(KeySelectorRef(StringRef(a, s_b), false, 0), false),
|
||||||
|
SevInfo,
|
||||||
|
tssId));
|
||||||
|
ASSERT(!TSS_doCompare(gkReq,
|
||||||
|
GetKeyReply(KeySelectorRef(StringRef(a, s_a), false, 0), false),
|
||||||
|
GetKeyReply(KeySelectorRef(StringRef(a, s_b), true, 0), false),
|
||||||
|
SevInfo,
|
||||||
|
tssId));
|
||||||
|
|
||||||
|
// test shard boundary key returned by incomplete query is the same as the key found by the other (only possible in
|
||||||
|
// positive direction)
|
||||||
|
ASSERT(TSS_doCompare(gkReq,
|
||||||
|
GetKeyReply(KeySelectorRef(StringRef(a, s_a), true, 0), false),
|
||||||
|
GetKeyReply(KeySelectorRef(StringRef(a, s_a), false, 1), false),
|
||||||
|
SevInfo,
|
||||||
|
tssId));
|
||||||
|
|
||||||
|
// explictly test checksum function
|
||||||
|
std::string s12 = "ABCDEFGHIJKL";
|
||||||
|
std::string s13 = "ABCDEFGHIJKLO";
|
||||||
|
std::string checksumStart13 = "(13)";
|
||||||
|
ASSERT(s_a == traceChecksumValue(StringRef(s_a)));
|
||||||
|
ASSERT(s12 == traceChecksumValue(StringRef(s12)));
|
||||||
|
ASSERT(checksumStart13 == traceChecksumValue(StringRef(s13)).substr(0, 4));
|
||||||
|
return Void();
|
||||||
|
}
|
|
@ -29,7 +29,9 @@
|
||||||
#include "fdbrpc/LoadBalance.actor.h"
|
#include "fdbrpc/LoadBalance.actor.h"
|
||||||
#include "fdbrpc/Stats.h"
|
#include "fdbrpc/Stats.h"
|
||||||
#include "fdbrpc/TimedRequest.h"
|
#include "fdbrpc/TimedRequest.h"
|
||||||
|
#include "fdbrpc/TSSComparison.h"
|
||||||
#include "fdbclient/TagThrottle.h"
|
#include "fdbclient/TagThrottle.h"
|
||||||
|
#include "flow/UnitTest.h"
|
||||||
|
|
||||||
// Dead code, removed in the next protocol version
|
// Dead code, removed in the next protocol version
|
||||||
struct VersionReply {
|
struct VersionReply {
|
||||||
|
@ -54,6 +56,7 @@ struct StorageServerInterface {
|
||||||
|
|
||||||
LocalityData locality;
|
LocalityData locality;
|
||||||
UID uniqueID;
|
UID uniqueID;
|
||||||
|
Optional<UID> tssPairID;
|
||||||
|
|
||||||
RequestStream<struct GetValueRequest> getValue;
|
RequestStream<struct GetValueRequest> getValue;
|
||||||
RequestStream<struct GetKeyRequest> getKey;
|
RequestStream<struct GetKeyRequest> getKey;
|
||||||
|
@ -80,6 +83,7 @@ struct StorageServerInterface {
|
||||||
NetworkAddress stableAddress() const { return getValue.getEndpoint().getStableAddress(); }
|
NetworkAddress stableAddress() const { return getValue.getEndpoint().getStableAddress(); }
|
||||||
Optional<NetworkAddress> secondaryAddress() const { return getValue.getEndpoint().addresses.secondaryAddress; }
|
Optional<NetworkAddress> secondaryAddress() const { return getValue.getEndpoint().addresses.secondaryAddress; }
|
||||||
UID id() const { return uniqueID; }
|
UID id() const { return uniqueID; }
|
||||||
|
bool isTss() const { return tssPairID.present(); }
|
||||||
std::string toString() const { return id().shortString(); }
|
std::string toString() const { return id().shortString(); }
|
||||||
template <class Ar>
|
template <class Ar>
|
||||||
void serialize(Ar& ar) {
|
void serialize(Ar& ar) {
|
||||||
|
@ -88,7 +92,11 @@ struct StorageServerInterface {
|
||||||
// considered
|
// considered
|
||||||
|
|
||||||
if (ar.protocolVersion().hasSmallEndpoints()) {
|
if (ar.protocolVersion().hasSmallEndpoints()) {
|
||||||
serializer(ar, uniqueID, locality, getValue);
|
if (ar.protocolVersion().hasTSS()) {
|
||||||
|
serializer(ar, uniqueID, locality, getValue, tssPairID);
|
||||||
|
} else {
|
||||||
|
serializer(ar, uniqueID, locality, getValue);
|
||||||
|
}
|
||||||
if (Ar::isDeserializing) {
|
if (Ar::isDeserializing) {
|
||||||
getKey = RequestStream<struct GetKeyRequest>(getValue.getEndpoint().getAdjustedEndpoint(1));
|
getKey = RequestStream<struct GetKeyRequest>(getValue.getEndpoint().getAdjustedEndpoint(1));
|
||||||
getKeyValues = RequestStream<struct GetKeyValuesRequest>(getValue.getEndpoint().getAdjustedEndpoint(2));
|
getKeyValues = RequestStream<struct GetKeyValuesRequest>(getValue.getEndpoint().getAdjustedEndpoint(2));
|
||||||
|
@ -127,8 +135,9 @@ struct StorageServerInterface {
|
||||||
waitFailure,
|
waitFailure,
|
||||||
getQueuingMetrics,
|
getQueuingMetrics,
|
||||||
getKeyValueStoreType);
|
getKeyValueStoreType);
|
||||||
if (ar.protocolVersion().hasWatches())
|
if (ar.protocolVersion().hasWatches()) {
|
||||||
serializer(ar, watchValue);
|
serializer(ar, watchValue);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
bool operator==(StorageServerInterface const& s) const { return uniqueID == s.uniqueID; }
|
bool operator==(StorageServerInterface const& s) const { return uniqueID == s.uniqueID; }
|
||||||
|
|
|
@ -25,6 +25,7 @@
|
||||||
#include "flow/Arena.h"
|
#include "flow/Arena.h"
|
||||||
#include "flow/TDMetric.actor.h"
|
#include "flow/TDMetric.actor.h"
|
||||||
#include "flow/serialize.h"
|
#include "flow/serialize.h"
|
||||||
|
#include "flow/UnitTest.h"
|
||||||
|
|
||||||
const KeyRef systemKeysPrefix = LiteralStringRef("\xff");
|
const KeyRef systemKeysPrefix = LiteralStringRef("\xff");
|
||||||
const KeyRangeRef normalKeys(KeyRef(), systemKeysPrefix);
|
const KeyRangeRef normalKeys(KeyRef(), systemKeysPrefix);
|
||||||
|
@ -345,7 +346,10 @@ uint16_t cacheChangeKeyDecodeIndex(const KeyRef& key) {
|
||||||
return idx;
|
return idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const KeyRangeRef tssMappingKeys(LiteralStringRef("\xff/tss/"), LiteralStringRef("\xff/tss0"));
|
||||||
|
|
||||||
const KeyRangeRef serverTagKeys(LiteralStringRef("\xff/serverTag/"), LiteralStringRef("\xff/serverTag0"));
|
const KeyRangeRef serverTagKeys(LiteralStringRef("\xff/serverTag/"), LiteralStringRef("\xff/serverTag0"));
|
||||||
|
|
||||||
const KeyRef serverTagPrefix = serverTagKeys.begin;
|
const KeyRef serverTagPrefix = serverTagKeys.begin;
|
||||||
const KeyRangeRef serverTagConflictKeys(LiteralStringRef("\xff/serverTagConflict/"),
|
const KeyRangeRef serverTagConflictKeys(LiteralStringRef("\xff/serverTagConflict/"),
|
||||||
LiteralStringRef("\xff/serverTagConflict0"));
|
LiteralStringRef("\xff/serverTagConflict0"));
|
||||||
|
@ -532,6 +536,7 @@ const Key serverListKeyFor(UID serverID) {
|
||||||
return wr.toValue();
|
return wr.toValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO use flatbuffers depending on version
|
||||||
const Value serverListValue(StorageServerInterface const& server) {
|
const Value serverListValue(StorageServerInterface const& server) {
|
||||||
BinaryWriter wr(IncludeVersion(ProtocolVersion::withServerListValue()));
|
BinaryWriter wr(IncludeVersion(ProtocolVersion::withServerListValue()));
|
||||||
wr << server;
|
wr << server;
|
||||||
|
@ -550,6 +555,17 @@ StorageServerInterface decodeServerListValue(ValueRef const& value) {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const Value serverListValueFB(StorageServerInterface const& server) {
|
||||||
|
return ObjectWriter::toValue(server, IncludeVersion());
|
||||||
|
}
|
||||||
|
|
||||||
|
StorageServerInterface decodeServerListValueFB(ValueRef const& value) {
|
||||||
|
StorageServerInterface s;
|
||||||
|
ObjectReader reader(value.begin(), IncludeVersion());
|
||||||
|
reader.deserialize(s);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
// processClassKeys.contains(k) iff k.startsWith( processClassKeys.begin ) because '/'+1 == '0'
|
// processClassKeys.contains(k) iff k.startsWith( processClassKeys.begin ) because '/'+1 == '0'
|
||||||
const KeyRangeRef processClassKeys(LiteralStringRef("\xff/processClass/"), LiteralStringRef("\xff/processClass0"));
|
const KeyRangeRef processClassKeys(LiteralStringRef("\xff/processClass/"), LiteralStringRef("\xff/processClass0"));
|
||||||
const KeyRef processClassPrefix = processClassKeys.begin;
|
const KeyRef processClassPrefix = processClassKeys.begin;
|
||||||
|
@ -594,6 +610,9 @@ ProcessClass decodeProcessClassValue(ValueRef const& value) {
|
||||||
const KeyRangeRef configKeys(LiteralStringRef("\xff/conf/"), LiteralStringRef("\xff/conf0"));
|
const KeyRangeRef configKeys(LiteralStringRef("\xff/conf/"), LiteralStringRef("\xff/conf0"));
|
||||||
const KeyRef configKeysPrefix = configKeys.begin;
|
const KeyRef configKeysPrefix = configKeys.begin;
|
||||||
|
|
||||||
|
const KeyRef perpetualStorageWiggleKey(LiteralStringRef("\xff/conf/perpetual_storage_wiggle"));
|
||||||
|
const KeyRef wigglingStorageServerKey(LiteralStringRef("\xff/storageWigglePID"));
|
||||||
|
|
||||||
const KeyRef triggerDDTeamInfoPrintKey(LiteralStringRef("\xff/triggerDDTeamInfoPrint"));
|
const KeyRef triggerDDTeamInfoPrintKey(LiteralStringRef("\xff/triggerDDTeamInfoPrint"));
|
||||||
|
|
||||||
const KeyRangeRef excludedServersKeys(LiteralStringRef("\xff/conf/excluded/"), LiteralStringRef("\xff/conf/excluded0"));
|
const KeyRangeRef excludedServersKeys(LiteralStringRef("\xff/conf/excluded/"), LiteralStringRef("\xff/conf/excluded0"));
|
||||||
|
@ -633,15 +652,17 @@ std::string encodeFailedServersKey(AddressExclusion const& addr) {
|
||||||
// const KeyRangeRef globalConfigKeys( LiteralStringRef("\xff/globalConfig/"), LiteralStringRef("\xff/globalConfig0") );
|
// const KeyRangeRef globalConfigKeys( LiteralStringRef("\xff/globalConfig/"), LiteralStringRef("\xff/globalConfig0") );
|
||||||
// const KeyRef globalConfigPrefix = globalConfigKeys.begin;
|
// const KeyRef globalConfigPrefix = globalConfigKeys.begin;
|
||||||
|
|
||||||
const KeyRangeRef globalConfigDataKeys( LiteralStringRef("\xff/globalConfig/k/"), LiteralStringRef("\xff/globalConfig/k0") );
|
const KeyRangeRef globalConfigDataKeys(LiteralStringRef("\xff/globalConfig/k/"),
|
||||||
|
LiteralStringRef("\xff/globalConfig/k0"));
|
||||||
const KeyRef globalConfigKeysPrefix = globalConfigDataKeys.begin;
|
const KeyRef globalConfigKeysPrefix = globalConfigDataKeys.begin;
|
||||||
|
|
||||||
const KeyRangeRef globalConfigHistoryKeys( LiteralStringRef("\xff/globalConfig/h/"), LiteralStringRef("\xff/globalConfig/h0") );
|
const KeyRangeRef globalConfigHistoryKeys(LiteralStringRef("\xff/globalConfig/h/"),
|
||||||
|
LiteralStringRef("\xff/globalConfig/h0"));
|
||||||
const KeyRef globalConfigHistoryPrefix = globalConfigHistoryKeys.begin;
|
const KeyRef globalConfigHistoryPrefix = globalConfigHistoryKeys.begin;
|
||||||
|
|
||||||
const KeyRef globalConfigVersionKey = LiteralStringRef("\xff/globalConfig/v");
|
const KeyRef globalConfigVersionKey = LiteralStringRef("\xff/globalConfig/v");
|
||||||
|
|
||||||
const KeyRangeRef workerListKeys( LiteralStringRef("\xff/worker/"), LiteralStringRef("\xff/worker0") );
|
const KeyRangeRef workerListKeys(LiteralStringRef("\xff/worker/"), LiteralStringRef("\xff/worker0"));
|
||||||
const KeyRef workerListPrefix = workerListKeys.begin;
|
const KeyRef workerListPrefix = workerListKeys.begin;
|
||||||
|
|
||||||
const Key workerListKeyFor(StringRef processID) {
|
const Key workerListKeyFor(StringRef processID) {
|
||||||
|
@ -939,124 +960,8 @@ const KeyRef mustContainSystemMutationsKey = LiteralStringRef("\xff/mustContainS
|
||||||
|
|
||||||
const KeyRangeRef monitorConfKeys(LiteralStringRef("\xff\x02/monitorConf/"), LiteralStringRef("\xff\x02/monitorConf0"));
|
const KeyRangeRef monitorConfKeys(LiteralStringRef("\xff\x02/monitorConf/"), LiteralStringRef("\xff\x02/monitorConf0"));
|
||||||
|
|
||||||
const KeyRef restoreLeaderKey = LiteralStringRef("\xff\x02/restoreLeader");
|
|
||||||
const KeyRangeRef restoreWorkersKeys(LiteralStringRef("\xff\x02/restoreWorkers/"),
|
|
||||||
LiteralStringRef("\xff\x02/restoreWorkers0"));
|
|
||||||
const KeyRef restoreStatusKey = LiteralStringRef("\xff\x02/restoreStatus/");
|
|
||||||
|
|
||||||
const KeyRef restoreRequestTriggerKey = LiteralStringRef("\xff\x02/restoreRequestTrigger");
|
|
||||||
const KeyRef restoreRequestDoneKey = LiteralStringRef("\xff\x02/restoreRequestDone");
|
const KeyRef restoreRequestDoneKey = LiteralStringRef("\xff\x02/restoreRequestDone");
|
||||||
const KeyRangeRef restoreRequestKeys(LiteralStringRef("\xff\x02/restoreRequests/"),
|
|
||||||
LiteralStringRef("\xff\x02/restoreRequests0"));
|
|
||||||
|
|
||||||
const KeyRangeRef restoreApplierKeys(LiteralStringRef("\xff\x02/restoreApplier/"),
|
|
||||||
LiteralStringRef("\xff\x02/restoreApplier0"));
|
|
||||||
const KeyRef restoreApplierTxnValue = LiteralStringRef("1");
|
|
||||||
|
|
||||||
// restoreApplierKeys: track atomic transaction progress to ensure applying atomicOp exactly once
|
|
||||||
// Version and batchIndex are passed in as LittleEndian,
|
|
||||||
// they must be converted to BigEndian to maintain ordering in lexical order
|
|
||||||
const Key restoreApplierKeyFor(UID const& applierID, int64_t batchIndex, Version version) {
|
|
||||||
BinaryWriter wr(Unversioned());
|
|
||||||
wr.serializeBytes(restoreApplierKeys.begin);
|
|
||||||
wr << applierID << bigEndian64(batchIndex) << bigEndian64(version);
|
|
||||||
return wr.toValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::tuple<UID, int64_t, Version> decodeRestoreApplierKey(ValueRef const& key) {
|
|
||||||
BinaryReader rd(key, Unversioned());
|
|
||||||
UID applierID;
|
|
||||||
int64_t batchIndex;
|
|
||||||
Version version;
|
|
||||||
rd >> applierID >> batchIndex >> version;
|
|
||||||
return std::make_tuple(applierID, bigEndian64(batchIndex), bigEndian64(version));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Encode restore worker key for workerID
|
|
||||||
const Key restoreWorkerKeyFor(UID const& workerID) {
|
|
||||||
BinaryWriter wr(Unversioned());
|
|
||||||
wr.serializeBytes(restoreWorkersKeys.begin);
|
|
||||||
wr << workerID;
|
|
||||||
return wr.toValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Encode restore agent value
|
|
||||||
const Value restoreWorkerInterfaceValue(RestoreWorkerInterface const& cmdInterf) {
|
|
||||||
BinaryWriter wr(IncludeVersion(ProtocolVersion::withRestoreWorkerInterfaceValue()));
|
|
||||||
wr << cmdInterf;
|
|
||||||
return wr.toValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
RestoreWorkerInterface decodeRestoreWorkerInterfaceValue(ValueRef const& value) {
|
|
||||||
RestoreWorkerInterface s;
|
|
||||||
BinaryReader reader(value, IncludeVersion());
|
|
||||||
reader >> s;
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Encode and decode restore request value
|
|
||||||
// restoreRequestTrigger key
|
|
||||||
const Value restoreRequestTriggerValue(UID randomID, int const numRequests) {
|
|
||||||
BinaryWriter wr(IncludeVersion(ProtocolVersion::withRestoreRequestTriggerValue()));
|
|
||||||
wr << numRequests;
|
|
||||||
wr << randomID;
|
|
||||||
return wr.toValue();
|
|
||||||
}
|
|
||||||
int decodeRestoreRequestTriggerValue(ValueRef const& value) {
|
|
||||||
int s;
|
|
||||||
UID randomID;
|
|
||||||
BinaryReader reader(value, IncludeVersion());
|
|
||||||
reader >> s;
|
|
||||||
reader >> randomID;
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
// restoreRequestDone key
|
|
||||||
const Value restoreRequestDoneVersionValue(Version readVersion) {
|
|
||||||
BinaryWriter wr(IncludeVersion(ProtocolVersion::withRestoreRequestDoneVersionValue()));
|
|
||||||
wr << readVersion;
|
|
||||||
return wr.toValue();
|
|
||||||
}
|
|
||||||
Version decodeRestoreRequestDoneVersionValue(ValueRef const& value) {
|
|
||||||
Version v;
|
|
||||||
BinaryReader reader(value, IncludeVersion());
|
|
||||||
reader >> v;
|
|
||||||
return v;
|
|
||||||
}
|
|
||||||
|
|
||||||
const Key restoreRequestKeyFor(int const& index) {
|
|
||||||
BinaryWriter wr(Unversioned());
|
|
||||||
wr.serializeBytes(restoreRequestKeys.begin);
|
|
||||||
wr << index;
|
|
||||||
return wr.toValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
const Value restoreRequestValue(RestoreRequest const& request) {
|
|
||||||
BinaryWriter wr(IncludeVersion(ProtocolVersion::withRestoreRequestValue()));
|
|
||||||
wr << request;
|
|
||||||
return wr.toValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
RestoreRequest decodeRestoreRequestValue(ValueRef const& value) {
|
|
||||||
RestoreRequest s;
|
|
||||||
BinaryReader reader(value, IncludeVersion());
|
|
||||||
reader >> s;
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Register restore performance data to restoreStatus key
|
|
||||||
const Key restoreStatusKeyFor(StringRef statusType) {
|
|
||||||
BinaryWriter wr(Unversioned());
|
|
||||||
wr.serializeBytes(restoreStatusKey);
|
|
||||||
wr << statusType;
|
|
||||||
return wr.toValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
const Value restoreStatusValue(double val) {
|
|
||||||
BinaryWriter wr(IncludeVersion(ProtocolVersion::withRestoreStatusValue()));
|
|
||||||
wr << StringRef(std::to_string(val));
|
|
||||||
return wr.toValue();
|
|
||||||
}
|
|
||||||
const KeyRef healthyZoneKey = LiteralStringRef("\xff\x02/healthyZone");
|
const KeyRef healthyZoneKey = LiteralStringRef("\xff\x02/healthyZone");
|
||||||
const StringRef ignoreSSFailuresZoneString = LiteralStringRef("IgnoreSSFailures");
|
const StringRef ignoreSSFailuresZoneString = LiteralStringRef("IgnoreSSFailures");
|
||||||
const KeyRef rebalanceDDIgnoreKey = LiteralStringRef("\xff\x02/rebalanceDDIgnored");
|
const KeyRef rebalanceDDIgnoreKey = LiteralStringRef("\xff\x02/rebalanceDDIgnored");
|
||||||
|
@ -1082,3 +987,60 @@ const KeyRangeRef testOnlyTxnStateStorePrefixRange(LiteralStringRef("\xff/TESTON
|
||||||
const KeyRef writeRecoveryKey = LiteralStringRef("\xff/writeRecovery");
|
const KeyRef writeRecoveryKey = LiteralStringRef("\xff/writeRecovery");
|
||||||
const ValueRef writeRecoveryKeyTrue = LiteralStringRef("1");
|
const ValueRef writeRecoveryKeyTrue = LiteralStringRef("1");
|
||||||
const KeyRef snapshotEndVersionKey = LiteralStringRef("\xff/snapshotEndVersion");
|
const KeyRef snapshotEndVersionKey = LiteralStringRef("\xff/snapshotEndVersion");
|
||||||
|
|
||||||
|
// for tests
|
||||||
|
void testSSISerdes(StorageServerInterface const& ssi, bool useFB) {
|
||||||
|
printf("ssi=\nid=%s\nlocality=%s\nisTss=%s\ntssId=%s\naddress=%s\ngetValue=%s\n\n\n",
|
||||||
|
ssi.id().toString().c_str(),
|
||||||
|
ssi.locality.toString().c_str(),
|
||||||
|
ssi.isTss() ? "true" : "false",
|
||||||
|
ssi.isTss() ? ssi.tssPairID.get().toString().c_str() : "",
|
||||||
|
ssi.address().toString().c_str(),
|
||||||
|
ssi.getValue.getEndpoint().token.toString().c_str());
|
||||||
|
|
||||||
|
StorageServerInterface ssi2 =
|
||||||
|
(useFB) ? decodeServerListValueFB(serverListValueFB(ssi)) : decodeServerListValue(serverListValue(ssi));
|
||||||
|
|
||||||
|
printf("ssi2=\nid=%s\nlocality=%s\nisTss=%s\ntssId=%s\naddress=%s\ngetValue=%s\n\n\n",
|
||||||
|
ssi2.id().toString().c_str(),
|
||||||
|
ssi2.locality.toString().c_str(),
|
||||||
|
ssi2.isTss() ? "true" : "false",
|
||||||
|
ssi2.isTss() ? ssi2.tssPairID.get().toString().c_str() : "",
|
||||||
|
ssi2.address().toString().c_str(),
|
||||||
|
ssi2.getValue.getEndpoint().token.toString().c_str());
|
||||||
|
|
||||||
|
ASSERT(ssi.id() == ssi2.id());
|
||||||
|
ASSERT(ssi.locality == ssi2.locality);
|
||||||
|
ASSERT(ssi.isTss() == ssi2.isTss());
|
||||||
|
if (ssi.isTss()) {
|
||||||
|
ASSERT(ssi2.tssPairID.get() == ssi2.tssPairID.get());
|
||||||
|
}
|
||||||
|
ASSERT(ssi.address() == ssi2.address());
|
||||||
|
ASSERT(ssi.getValue.getEndpoint().token == ssi2.getValue.getEndpoint().token);
|
||||||
|
}
|
||||||
|
|
||||||
|
// unit test for serialization since tss stuff had bugs
|
||||||
|
TEST_CASE("/SystemData/SerDes/SSI") {
|
||||||
|
printf("testing ssi serdes\n");
|
||||||
|
LocalityData localityData(Optional<Standalone<StringRef>>(),
|
||||||
|
Standalone<StringRef>(deterministicRandom()->randomUniqueID().toString()),
|
||||||
|
Standalone<StringRef>(deterministicRandom()->randomUniqueID().toString()),
|
||||||
|
Optional<Standalone<StringRef>>());
|
||||||
|
|
||||||
|
// non-tss
|
||||||
|
StorageServerInterface ssi;
|
||||||
|
ssi.uniqueID = UID(0x1234123412341234, 0x5678567856785678);
|
||||||
|
ssi.locality = localityData;
|
||||||
|
ssi.initEndpoints();
|
||||||
|
|
||||||
|
testSSISerdes(ssi, false);
|
||||||
|
testSSISerdes(ssi, true);
|
||||||
|
|
||||||
|
ssi.tssPairID = UID(0x2345234523452345, 0x1238123812381238);
|
||||||
|
|
||||||
|
testSSISerdes(ssi, false);
|
||||||
|
testSSISerdes(ssi, true);
|
||||||
|
printf("ssi serdes test complete\n");
|
||||||
|
|
||||||
|
return Void();
|
||||||
|
}
|
|
@ -26,7 +26,6 @@
|
||||||
|
|
||||||
#include "fdbclient/FDBTypes.h"
|
#include "fdbclient/FDBTypes.h"
|
||||||
#include "fdbclient/StorageServerInterface.h"
|
#include "fdbclient/StorageServerInterface.h"
|
||||||
#include "fdbclient/RestoreWorkerInterface.actor.h"
|
|
||||||
|
|
||||||
// Don't warn on constants being defined in this file.
|
// Don't warn on constants being defined in this file.
|
||||||
#pragma clang diagnostic push
|
#pragma clang diagnostic push
|
||||||
|
@ -115,6 +114,9 @@ extern const KeyRef cacheChangePrefix;
|
||||||
const Key cacheChangeKeyFor(uint16_t idx);
|
const Key cacheChangeKeyFor(uint16_t idx);
|
||||||
uint16_t cacheChangeKeyDecodeIndex(const KeyRef& key);
|
uint16_t cacheChangeKeyDecodeIndex(const KeyRef& key);
|
||||||
|
|
||||||
|
// "\xff/tss/[[serverId]]" := "[[tssId]]"
|
||||||
|
extern const KeyRangeRef tssMappingKeys;
|
||||||
|
|
||||||
// "\xff/serverTag/[[serverID]]" = "[[Tag]]"
|
// "\xff/serverTag/[[serverID]]" = "[[Tag]]"
|
||||||
// Provides the Tag for the given serverID. Used to access a
|
// Provides the Tag for the given serverID. Used to access a
|
||||||
// storage server's corresponding TLog in order to apply mutations.
|
// storage server's corresponding TLog in order to apply mutations.
|
||||||
|
@ -196,6 +198,8 @@ UID decodeProcessClassKeyOld(KeyRef const& key);
|
||||||
extern const KeyRangeRef configKeys;
|
extern const KeyRangeRef configKeys;
|
||||||
extern const KeyRef configKeysPrefix;
|
extern const KeyRef configKeysPrefix;
|
||||||
|
|
||||||
|
extern const KeyRef perpetualStorageWiggleKey;
|
||||||
|
extern const KeyRef wigglingStorageServerKey;
|
||||||
// Change the value of this key to anything and that will trigger detailed data distribution team info log.
|
// Change the value of this key to anything and that will trigger detailed data distribution team info log.
|
||||||
extern const KeyRef triggerDDTeamInfoPrintKey;
|
extern const KeyRef triggerDDTeamInfoPrintKey;
|
||||||
|
|
||||||
|
@ -442,31 +446,6 @@ extern const KeyRef mustContainSystemMutationsKey;
|
||||||
// Key range reserved for storing changes to monitor conf files
|
// Key range reserved for storing changes to monitor conf files
|
||||||
extern const KeyRangeRef monitorConfKeys;
|
extern const KeyRangeRef monitorConfKeys;
|
||||||
|
|
||||||
// Fast restore
|
|
||||||
extern const KeyRef restoreLeaderKey;
|
|
||||||
extern const KeyRangeRef restoreWorkersKeys;
|
|
||||||
extern const KeyRef restoreStatusKey; // To be used when we measure fast restore performance
|
|
||||||
extern const KeyRef restoreRequestTriggerKey;
|
|
||||||
extern const KeyRef restoreRequestDoneKey;
|
|
||||||
extern const KeyRangeRef restoreRequestKeys;
|
|
||||||
extern const KeyRangeRef restoreApplierKeys;
|
|
||||||
extern const KeyRef restoreApplierTxnValue;
|
|
||||||
|
|
||||||
const Key restoreApplierKeyFor(UID const& applierID, int64_t batchIndex, Version version);
|
|
||||||
std::tuple<UID, int64_t, Version> decodeRestoreApplierKey(ValueRef const& key);
|
|
||||||
const Key restoreWorkerKeyFor(UID const& workerID);
|
|
||||||
const Value restoreWorkerInterfaceValue(RestoreWorkerInterface const& server);
|
|
||||||
RestoreWorkerInterface decodeRestoreWorkerInterfaceValue(ValueRef const& value);
|
|
||||||
const Value restoreRequestTriggerValue(UID randomUID, int const numRequests);
|
|
||||||
int decodeRestoreRequestTriggerValue(ValueRef const& value);
|
|
||||||
const Value restoreRequestDoneVersionValue(Version readVersion);
|
|
||||||
Version decodeRestoreRequestDoneVersionValue(ValueRef const& value);
|
|
||||||
const Key restoreRequestKeyFor(int const& index);
|
|
||||||
const Value restoreRequestValue(RestoreRequest const& server);
|
|
||||||
RestoreRequest decodeRestoreRequestValue(ValueRef const& value);
|
|
||||||
const Key restoreStatusKeyFor(StringRef statusType);
|
|
||||||
const Value restoreStatusValue(double val);
|
|
||||||
|
|
||||||
extern const KeyRef healthyZoneKey;
|
extern const KeyRef healthyZoneKey;
|
||||||
extern const StringRef ignoreSSFailuresZoneString;
|
extern const StringRef ignoreSSFailuresZoneString;
|
||||||
extern const KeyRef rebalanceDDIgnoreKey;
|
extern const KeyRef rebalanceDDIgnoreKey;
|
||||||
|
|
|
@ -194,6 +194,40 @@ struct TagThrottleInfo {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ClientTagThrottleLimits {
|
||||||
|
double tpsRate;
|
||||||
|
double expiration;
|
||||||
|
|
||||||
|
ClientTagThrottleLimits() : tpsRate(0), expiration(0) {}
|
||||||
|
ClientTagThrottleLimits(double tpsRate, double expiration) : tpsRate(tpsRate), expiration(expiration) {}
|
||||||
|
|
||||||
|
template <class Archive>
|
||||||
|
void serialize(Archive& ar) {
|
||||||
|
// Convert expiration time to a duration to avoid clock differences
|
||||||
|
double duration = 0;
|
||||||
|
if (!ar.isDeserializing) {
|
||||||
|
duration = expiration - now();
|
||||||
|
}
|
||||||
|
|
||||||
|
serializer(ar, tpsRate, duration);
|
||||||
|
|
||||||
|
if (ar.isDeserializing) {
|
||||||
|
expiration = now() + duration;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ClientTrCommitCostEstimation {
|
||||||
|
int opsCount = 0;
|
||||||
|
uint64_t writeCosts = 0;
|
||||||
|
std::deque<std::pair<int, uint64_t>> clearIdxCosts;
|
||||||
|
uint32_t expensiveCostEstCount = 0;
|
||||||
|
template <class Ar>
|
||||||
|
void serialize(Ar& ar) {
|
||||||
|
serializer(ar, opsCount, writeCosts, clearIdxCosts, expensiveCostEstCount);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
namespace ThrottleApi {
|
namespace ThrottleApi {
|
||||||
Future<std::vector<TagThrottleInfo>> getThrottledTags(Database const& db,
|
Future<std::vector<TagThrottleInfo>> getThrottledTags(Database const& db,
|
||||||
int const& limit,
|
int const& limit,
|
||||||
|
|
|
@ -474,7 +474,7 @@ void ThreadSafeApi::addNetworkThreadCompletionHook(void (*hook)(void*), void* ho
|
||||||
|
|
||||||
MutexHolder holder(lock); // We could use the network thread to protect this action, but then we can't guarantee
|
MutexHolder holder(lock); // We could use the network thread to protect this action, but then we can't guarantee
|
||||||
// upon return that the hook is set.
|
// upon return that the hook is set.
|
||||||
threadCompletionHooks.push_back(std::make_pair(hook, hookParameter));
|
threadCompletionHooks.emplace_back(hook, hookParameter);
|
||||||
}
|
}
|
||||||
|
|
||||||
IClientApi* ThreadSafeApi::api = new ThreadSafeApi();
|
IClientApi* ThreadSafeApi::api = new ThreadSafeApi();
|
||||||
|
|
|
@ -71,6 +71,8 @@ Tuple::Tuple(StringRef const& str, bool exclude_incomplete) {
|
||||||
i += sizeof(float) + 1;
|
i += sizeof(float) + 1;
|
||||||
} else if (data[i] == 0x21) {
|
} else if (data[i] == 0x21) {
|
||||||
i += sizeof(double) + 1;
|
i += sizeof(double) + 1;
|
||||||
|
} else if (data[i] == 0x26 || data[i] == 0x27) {
|
||||||
|
i += 1;
|
||||||
} else if (data[i] == '\x00') {
|
} else if (data[i] == '\x00') {
|
||||||
i += 1;
|
i += 1;
|
||||||
} else {
|
} else {
|
||||||
|
@ -144,6 +146,16 @@ Tuple& Tuple::append(int64_t value) {
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Tuple& Tuple::appendBool(bool value) {
|
||||||
|
offsets.push_back(data.size());
|
||||||
|
if (value) {
|
||||||
|
data.push_back(data.arena(), 0x27);
|
||||||
|
} else {
|
||||||
|
data.push_back(data.arena(), 0x26);
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
Tuple& Tuple::appendFloat(float value) {
|
Tuple& Tuple::appendFloat(float value) {
|
||||||
offsets.push_back(data.size());
|
offsets.push_back(data.size());
|
||||||
float swap = bigEndianFloat(value);
|
float swap = bigEndianFloat(value);
|
||||||
|
@ -192,6 +204,8 @@ Tuple::ElementType Tuple::getType(size_t index) const {
|
||||||
return ElementType::FLOAT;
|
return ElementType::FLOAT;
|
||||||
} else if (code == 0x21) {
|
} else if (code == 0x21) {
|
||||||
return ElementType::DOUBLE;
|
return ElementType::DOUBLE;
|
||||||
|
} else if (code == 0x26 || code == 0x27) {
|
||||||
|
return ElementType::BOOL;
|
||||||
} else {
|
} else {
|
||||||
throw invalid_tuple_data_type();
|
throw invalid_tuple_data_type();
|
||||||
}
|
}
|
||||||
|
@ -287,6 +301,21 @@ int64_t Tuple::getInt(size_t index, bool allow_incomplete) const {
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Combine with bindings/flow/Tuple.*. This code is copied from there.
|
// TODO: Combine with bindings/flow/Tuple.*. This code is copied from there.
|
||||||
|
bool Tuple::getBool(size_t index) const {
|
||||||
|
if (index >= offsets.size()) {
|
||||||
|
throw invalid_tuple_index();
|
||||||
|
}
|
||||||
|
ASSERT_LT(offsets[index], data.size());
|
||||||
|
uint8_t code = data[offsets[index]];
|
||||||
|
if (code == 0x26) {
|
||||||
|
return false;
|
||||||
|
} else if (code == 0x27) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
throw invalid_tuple_data_type();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
float Tuple::getFloat(size_t index) const {
|
float Tuple::getFloat(size_t index) const {
|
||||||
if (index >= offsets.size()) {
|
if (index >= offsets.size()) {
|
||||||
throw invalid_tuple_index();
|
throw invalid_tuple_index();
|
||||||
|
|
|
@ -40,6 +40,7 @@ struct Tuple {
|
||||||
Tuple& append(int64_t);
|
Tuple& append(int64_t);
|
||||||
// There are some ambiguous append calls in fdbclient, so to make it easier
|
// There are some ambiguous append calls in fdbclient, so to make it easier
|
||||||
// to add append for floats and doubles, name them differently for now.
|
// to add append for floats and doubles, name them differently for now.
|
||||||
|
Tuple& appendBool(bool);
|
||||||
Tuple& appendFloat(float);
|
Tuple& appendFloat(float);
|
||||||
Tuple& appendDouble(double);
|
Tuple& appendDouble(double);
|
||||||
Tuple& appendNull();
|
Tuple& appendNull();
|
||||||
|
@ -51,7 +52,7 @@ struct Tuple {
|
||||||
return append(t);
|
return append(t);
|
||||||
}
|
}
|
||||||
|
|
||||||
enum ElementType { NULL_TYPE, INT, BYTES, UTF8, FLOAT, DOUBLE };
|
enum ElementType { NULL_TYPE, INT, BYTES, UTF8, BOOL, FLOAT, DOUBLE };
|
||||||
|
|
||||||
// this is number of elements, not length of data
|
// this is number of elements, not length of data
|
||||||
size_t size() const { return offsets.size(); }
|
size_t size() const { return offsets.size(); }
|
||||||
|
@ -59,6 +60,7 @@ struct Tuple {
|
||||||
ElementType getType(size_t index) const;
|
ElementType getType(size_t index) const;
|
||||||
Standalone<StringRef> getString(size_t index) const;
|
Standalone<StringRef> getString(size_t index) const;
|
||||||
int64_t getInt(size_t index, bool allow_incomplete = false) const;
|
int64_t getInt(size_t index, bool allow_incomplete = false) const;
|
||||||
|
bool getBool(size_t index) const;
|
||||||
float getFloat(size_t index) const;
|
float getFloat(size_t index) const;
|
||||||
double getDouble(size_t index) const;
|
double getDouble(size_t index) const;
|
||||||
|
|
||||||
|
|
|
@ -856,7 +856,7 @@ void load_conf(const char* confpath, uid_t& uid, gid_t& gid, sigset_t* mask, fdb
|
||||||
|
|
||||||
if (id_command[i.first]->kill_on_configuration_change) {
|
if (id_command[i.first]->kill_on_configuration_change) {
|
||||||
kill_ids.push_back(i.first);
|
kill_ids.push_back(i.first);
|
||||||
start_ids.push_back(std::make_pair(i.first, cmd));
|
start_ids.emplace_back(i.first, cmd);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
log_msg(SevInfo, "Updated configuration for %s\n", id_command[i.first]->ssection.c_str());
|
log_msg(SevInfo, "Updated configuration for %s\n", id_command[i.first]->ssection.c_str());
|
||||||
|
|
|
@ -46,7 +46,8 @@ EvictablePage::~EvictablePage() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::map<std::string, OpenFileInfo> AsyncFileCached::openFiles;
|
// A map of filename to the file handle for all opened cached files
|
||||||
|
std::map<std::string, UnsafeWeakFutureReference<IAsyncFile>> AsyncFileCached::openFiles;
|
||||||
|
|
||||||
void AsyncFileCached::remove_page(AFCPage* page) {
|
void AsyncFileCached::remove_page(AFCPage* page) {
|
||||||
pages.erase(page->pageOffset);
|
pages.erase(page->pageOffset);
|
||||||
|
|
|
@ -132,39 +132,32 @@ struct EvictablePageCache : ReferenceCounted<EvictablePageCache> {
|
||||||
const CacheEvictionType cacheEvictionType;
|
const CacheEvictionType cacheEvictionType;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct OpenFileInfo : NonCopyable {
|
|
||||||
IAsyncFile* f;
|
|
||||||
Future<Reference<IAsyncFile>> opened; // Only valid until the file is fully opened
|
|
||||||
|
|
||||||
OpenFileInfo() : f(0) {}
|
|
||||||
OpenFileInfo(OpenFileInfo&& r) noexcept : f(r.f), opened(std::move(r.opened)) { r.f = 0; }
|
|
||||||
|
|
||||||
Future<Reference<IAsyncFile>> get() {
|
|
||||||
if (f)
|
|
||||||
return Reference<IAsyncFile>::addRef(f);
|
|
||||||
else
|
|
||||||
return opened;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct AFCPage;
|
struct AFCPage;
|
||||||
|
|
||||||
class AsyncFileCached final : public IAsyncFile, public ReferenceCounted<AsyncFileCached> {
|
class AsyncFileCached final : public IAsyncFile, public ReferenceCounted<AsyncFileCached> {
|
||||||
friend struct AFCPage;
|
friend struct AFCPage;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
// Opens a file that uses the FDB in-memory page cache
|
||||||
static Future<Reference<IAsyncFile>> open(std::string filename, int flags, int mode) {
|
static Future<Reference<IAsyncFile>> open(std::string filename, int flags, int mode) {
|
||||||
//TraceEvent("AsyncFileCachedOpen").detail("Filename", filename);
|
//TraceEvent("AsyncFileCachedOpen").detail("Filename", filename);
|
||||||
if (openFiles.find(filename) == openFiles.end()) {
|
auto itr = openFiles.find(filename);
|
||||||
|
if (itr == openFiles.end()) {
|
||||||
auto f = open_impl(filename, flags, mode);
|
auto f = open_impl(filename, flags, mode);
|
||||||
if (f.isReady() && f.isError())
|
if (f.isReady() && f.isError())
|
||||||
return f;
|
return f;
|
||||||
if (!f.isReady())
|
|
||||||
openFiles[filename].opened = f;
|
auto result = openFiles.try_emplace(filename, f);
|
||||||
else
|
|
||||||
return f.get();
|
// This should be inserting a new entry
|
||||||
|
ASSERT(result.second);
|
||||||
|
itr = result.first;
|
||||||
|
|
||||||
|
// We return here instead of falling through to the outer scope so that we don't delete all references to
|
||||||
|
// the underlying file before returning
|
||||||
|
return itr->second.get();
|
||||||
}
|
}
|
||||||
return openFiles[filename].get();
|
return itr->second.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
Future<int> read(void* data, int length, int64_t offset) override {
|
Future<int> read(void* data, int length, int64_t offset) override {
|
||||||
|
@ -263,7 +256,9 @@ public:
|
||||||
~AsyncFileCached() override;
|
~AsyncFileCached() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static std::map<std::string, OpenFileInfo> openFiles;
|
// A map of filename to the file handle for all opened cached files
|
||||||
|
static std::map<std::string, UnsafeWeakFutureReference<IAsyncFile>> openFiles;
|
||||||
|
|
||||||
std::string filename;
|
std::string filename;
|
||||||
Reference<IAsyncFile> uncached;
|
Reference<IAsyncFile> uncached;
|
||||||
int64_t length;
|
int64_t length;
|
||||||
|
@ -330,6 +325,7 @@ private:
|
||||||
|
|
||||||
static Future<Reference<IAsyncFile>> open_impl(std::string filename, int flags, int mode);
|
static Future<Reference<IAsyncFile>> open_impl(std::string filename, int flags, int mode);
|
||||||
|
|
||||||
|
// Opens a file that uses the FDB in-memory page cache
|
||||||
ACTOR static Future<Reference<IAsyncFile>> open_impl(std::string filename,
|
ACTOR static Future<Reference<IAsyncFile>> open_impl(std::string filename,
|
||||||
int flags,
|
int flags,
|
||||||
int mode,
|
int mode,
|
||||||
|
@ -345,10 +341,7 @@ private:
|
||||||
TraceEvent("AFCUnderlyingOpenEnd").detail("Filename", filename);
|
TraceEvent("AFCUnderlyingOpenEnd").detail("Filename", filename);
|
||||||
int64_t l = wait(f->size());
|
int64_t l = wait(f->size());
|
||||||
TraceEvent("AFCUnderlyingSize").detail("Filename", filename).detail("Size", l);
|
TraceEvent("AFCUnderlyingSize").detail("Filename", filename).detail("Size", l);
|
||||||
auto& of = openFiles[filename];
|
return new AsyncFileCached(f, filename, l, pageCache);
|
||||||
of.f = new AsyncFileCached(f, filename, l, pageCache);
|
|
||||||
of.opened = Future<Reference<IAsyncFile>>();
|
|
||||||
return Reference<IAsyncFile>(of.f);
|
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
if (e.code() != error_code_actor_cancelled)
|
if (e.code() != error_code_actor_cancelled)
|
||||||
openFiles.erase(filename);
|
openFiles.erase(filename);
|
||||||
|
|
|
@ -130,6 +130,9 @@ public:
|
||||||
UID id;
|
UID id;
|
||||||
std::string filename;
|
std::string filename;
|
||||||
|
|
||||||
|
// For files that use atomic write and create, they are initially created with an extra suffix
|
||||||
|
std::string initialFilename;
|
||||||
|
|
||||||
// An approximation of the size of the file; .size() should be used instead of this variable in most cases
|
// An approximation of the size of the file; .size() should be used instead of this variable in most cases
|
||||||
mutable int64_t approximateSize;
|
mutable int64_t approximateSize;
|
||||||
|
|
||||||
|
@ -182,11 +185,13 @@ private:
|
||||||
reponses; // cannot call getResult on this actor collection, since the actors will be on different processes
|
reponses; // cannot call getResult on this actor collection, since the actors will be on different processes
|
||||||
|
|
||||||
AsyncFileNonDurable(const std::string& filename,
|
AsyncFileNonDurable(const std::string& filename,
|
||||||
|
const std::string& initialFilename,
|
||||||
Reference<IAsyncFile> file,
|
Reference<IAsyncFile> file,
|
||||||
Reference<DiskParameters> diskParameters,
|
Reference<DiskParameters> diskParameters,
|
||||||
NetworkAddress openedAddress,
|
NetworkAddress openedAddress,
|
||||||
bool aio)
|
bool aio)
|
||||||
: openedAddress(openedAddress), pendingModifications(uint64_t(-1)), approximateSize(0), reponses(false),
|
: filename(filename), initialFilename(initialFilename), file(file), diskParameters(diskParameters),
|
||||||
|
openedAddress(openedAddress), pendingModifications(uint64_t(-1)), approximateSize(0), reponses(false),
|
||||||
aio(aio) {
|
aio(aio) {
|
||||||
|
|
||||||
// This is only designed to work in simulation
|
// This is only designed to work in simulation
|
||||||
|
@ -194,9 +199,6 @@ private:
|
||||||
this->id = deterministicRandom()->randomUniqueID();
|
this->id = deterministicRandom()->randomUniqueID();
|
||||||
|
|
||||||
//TraceEvent("AsyncFileNonDurable_Create", id).detail("Filename", filename);
|
//TraceEvent("AsyncFileNonDurable_Create", id).detail("Filename", filename);
|
||||||
this->file = file;
|
|
||||||
this->filename = filename;
|
|
||||||
this->diskParameters = diskParameters;
|
|
||||||
maxWriteDelay = FLOW_KNOBS->NON_DURABLE_MAX_WRITE_DELAY;
|
maxWriteDelay = FLOW_KNOBS->NON_DURABLE_MAX_WRITE_DELAY;
|
||||||
hasBeenSynced = false;
|
hasBeenSynced = false;
|
||||||
|
|
||||||
|
@ -236,10 +238,11 @@ public:
|
||||||
//TraceEvent("AsyncFileNonDurableOpenWaitOnDelete2").detail("Filename", filename);
|
//TraceEvent("AsyncFileNonDurableOpenWaitOnDelete2").detail("Filename", filename);
|
||||||
if (shutdown.isReady())
|
if (shutdown.isReady())
|
||||||
throw io_error().asInjectedFault();
|
throw io_error().asInjectedFault();
|
||||||
|
wait(g_simulator.onProcess(currentProcess, currentTaskID));
|
||||||
}
|
}
|
||||||
|
|
||||||
state Reference<AsyncFileNonDurable> nonDurableFile(
|
state Reference<AsyncFileNonDurable> nonDurableFile(
|
||||||
new AsyncFileNonDurable(filename, file, diskParameters, currentProcess->address, aio));
|
new AsyncFileNonDurable(filename, actualFilename, file, diskParameters, currentProcess->address, aio));
|
||||||
|
|
||||||
// Causes the approximateSize member to be set
|
// Causes the approximateSize member to be set
|
||||||
state Future<int64_t> sizeFuture = nonDurableFile->size();
|
state Future<int64_t> sizeFuture = nonDurableFile->size();
|
||||||
|
@ -269,13 +272,38 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
void addref() override { ReferenceCounted<AsyncFileNonDurable>::addref(); }
|
void addref() override { ReferenceCounted<AsyncFileNonDurable>::addref(); }
|
||||||
|
|
||||||
void delref() override {
|
void delref() override {
|
||||||
if (delref_no_destroy()) {
|
if (delref_no_destroy()) {
|
||||||
ASSERT(filesBeingDeleted.count(filename) == 0);
|
if (filesBeingDeleted.count(filename) == 0) {
|
||||||
//TraceEvent("AsyncFileNonDurable_StartDelete", id).detail("Filename", filename);
|
//TraceEvent("AsyncFileNonDurable_StartDelete", id).detail("Filename", filename);
|
||||||
Future<Void> deleteFuture = deleteFile(this);
|
Future<Void> deleteFuture = deleteFile(this);
|
||||||
if (!deleteFuture.isReady())
|
if (!deleteFuture.isReady())
|
||||||
filesBeingDeleted[filename] = deleteFuture;
|
filesBeingDeleted[filename] = deleteFuture;
|
||||||
|
}
|
||||||
|
|
||||||
|
removeOpenFile(filename, this);
|
||||||
|
if (initialFilename != filename) {
|
||||||
|
removeOpenFile(initialFilename, this);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Removes a file from the openFiles map
|
||||||
|
static void removeOpenFile(std::string filename, AsyncFileNonDurable* file) {
|
||||||
|
auto& openFiles = g_simulator.getCurrentProcess()->machine->openFiles;
|
||||||
|
|
||||||
|
auto iter = openFiles.find(filename);
|
||||||
|
|
||||||
|
// Various actions (e.g. simulated delete) can remove a file from openFiles prematurely, so it may already
|
||||||
|
// be gone. Renamed files (from atomic write and create) will also be present under only one of the two
|
||||||
|
// names.
|
||||||
|
if (iter != openFiles.end()) {
|
||||||
|
// even if the filename exists, it doesn't mean that it references the same file. It could be that the
|
||||||
|
// file was renamed and later a file with the same name was opened.
|
||||||
|
if (iter->second.getPtrIfReady().orDefault(nullptr) == file) {
|
||||||
|
openFiles.erase(iter);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -832,11 +860,9 @@ private:
|
||||||
//TraceEvent("AsyncFileNonDurable_FinishDelete", self->id).detail("Filename", self->filename);
|
//TraceEvent("AsyncFileNonDurable_FinishDelete", self->id).detail("Filename", self->filename);
|
||||||
|
|
||||||
delete self;
|
delete self;
|
||||||
wait(g_simulator.onProcess(currentProcess, currentTaskID));
|
|
||||||
return Void();
|
return Void();
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
state Error err = e;
|
state Error err = e;
|
||||||
wait(g_simulator.onProcess(currentProcess, currentTaskID));
|
|
||||||
throw err;
|
throw err;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,7 +29,8 @@ set(FDBRPC_SRCS
|
||||||
sim2.actor.cpp
|
sim2.actor.cpp
|
||||||
sim_validation.cpp
|
sim_validation.cpp
|
||||||
TimedRequest.h
|
TimedRequest.h
|
||||||
TraceFileIO.cpp)
|
TraceFileIO.cpp
|
||||||
|
TSSComparison.h)
|
||||||
|
|
||||||
set(COMPILE_EIO OFF)
|
set(COMPILE_EIO OFF)
|
||||||
|
|
||||||
|
|
|
@ -51,6 +51,8 @@ constexpr UID WLTOKEN_PING_PACKET(-1, 1);
|
||||||
constexpr int PACKET_LEN_WIDTH = sizeof(uint32_t);
|
constexpr int PACKET_LEN_WIDTH = sizeof(uint32_t);
|
||||||
const uint64_t TOKEN_STREAM_FLAG = 1;
|
const uint64_t TOKEN_STREAM_FLAG = 1;
|
||||||
|
|
||||||
|
const int WLTOKEN_COUNTS = 12; // number of wellKnownEndpoints
|
||||||
|
|
||||||
class EndpointMap : NonCopyable {
|
class EndpointMap : NonCopyable {
|
||||||
public:
|
public:
|
||||||
// Reserve space for this many wellKnownEndpoints
|
// Reserve space for this many wellKnownEndpoints
|
||||||
|
@ -96,6 +98,7 @@ void EndpointMap::realloc() {
|
||||||
|
|
||||||
void EndpointMap::insertWellKnown(NetworkMessageReceiver* r, const Endpoint::Token& token, TaskPriority priority) {
|
void EndpointMap::insertWellKnown(NetworkMessageReceiver* r, const Endpoint::Token& token, TaskPriority priority) {
|
||||||
int index = token.second();
|
int index = token.second();
|
||||||
|
ASSERT(index <= WLTOKEN_COUNTS);
|
||||||
ASSERT(data[index].receiver == nullptr);
|
ASSERT(data[index].receiver == nullptr);
|
||||||
data[index].receiver = r;
|
data[index].receiver = r;
|
||||||
data[index].token() =
|
data[index].token() =
|
||||||
|
@ -334,7 +337,7 @@ ACTOR Future<Void> pingLatencyLogger(TransportData* self) {
|
||||||
}
|
}
|
||||||
|
|
||||||
TransportData::TransportData(uint64_t transportId)
|
TransportData::TransportData(uint64_t transportId)
|
||||||
: endpoints(/*wellKnownTokenCount*/ 11), endpointNotFoundReceiver(endpoints), pingReceiver(endpoints),
|
: endpoints(WLTOKEN_COUNTS), endpointNotFoundReceiver(endpoints), pingReceiver(endpoints),
|
||||||
warnAlwaysForLargePacket(true), lastIncompatibleMessage(0), transportId(transportId),
|
warnAlwaysForLargePacket(true), lastIncompatibleMessage(0), transportId(transportId),
|
||||||
numIncompatibleConnections(0) {
|
numIncompatibleConnections(0) {
|
||||||
degraded = makeReference<AsyncVar<bool>>(false);
|
degraded = makeReference<AsyncVar<bool>>(false);
|
||||||
|
@ -1215,7 +1218,7 @@ ACTOR static Future<Void> connectionReader(TransportData* transport,
|
||||||
}
|
}
|
||||||
compatible = false;
|
compatible = false;
|
||||||
if (!protocolVersion.hasInexpensiveMultiVersionClient()) {
|
if (!protocolVersion.hasInexpensiveMultiVersionClient()) {
|
||||||
if(peer) {
|
if (peer) {
|
||||||
peer->protocolVersion->set(protocolVersion);
|
peer->protocolVersion->set(protocolVersion);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -24,7 +24,7 @@
|
||||||
|
|
||||||
void HealthMonitor::reportPeerClosed(const NetworkAddress& peerAddress) {
|
void HealthMonitor::reportPeerClosed(const NetworkAddress& peerAddress) {
|
||||||
purgeOutdatedHistory();
|
purgeOutdatedHistory();
|
||||||
peerClosedHistory.push_back(std::make_pair(now(), peerAddress));
|
peerClosedHistory.emplace_back(now(), peerAddress);
|
||||||
peerClosedNum[peerAddress] += 1;
|
peerClosedNum[peerAddress] += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -36,6 +36,8 @@
|
||||||
#include "fdbrpc/Locality.h"
|
#include "fdbrpc/Locality.h"
|
||||||
#include "fdbrpc/QueueModel.h"
|
#include "fdbrpc/QueueModel.h"
|
||||||
#include "fdbrpc/MultiInterface.h"
|
#include "fdbrpc/MultiInterface.h"
|
||||||
|
#include "fdbrpc/simulator.h" // for checking tss simulation mode
|
||||||
|
#include "fdbrpc/TSSComparison.h"
|
||||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||||
|
|
||||||
using std::vector;
|
using std::vector;
|
||||||
|
@ -75,6 +77,97 @@ struct LoadBalancedReply {
|
||||||
Optional<LoadBalancedReply> getLoadBalancedReply(const LoadBalancedReply* reply);
|
Optional<LoadBalancedReply> getLoadBalancedReply(const LoadBalancedReply* reply);
|
||||||
Optional<LoadBalancedReply> getLoadBalancedReply(const void*);
|
Optional<LoadBalancedReply> getLoadBalancedReply(const void*);
|
||||||
|
|
||||||
|
ACTOR template <class Req, class Resp>
|
||||||
|
Future<Void> tssComparison(Req req,
|
||||||
|
Future<ErrorOr<Resp>> fSource,
|
||||||
|
Future<ErrorOr<Resp>> fTss,
|
||||||
|
TSSEndpointData tssData) {
|
||||||
|
state double startTime = now();
|
||||||
|
state Future<Optional<ErrorOr<Resp>>> fTssWithTimeout = timeout(fTss, FLOW_KNOBS->LOAD_BALANCE_TSS_TIMEOUT);
|
||||||
|
state int finished = 0;
|
||||||
|
state double srcEndTime;
|
||||||
|
state double tssEndTime;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
choose {
|
||||||
|
when(state ErrorOr<Resp> src = wait(fSource)) {
|
||||||
|
srcEndTime = now();
|
||||||
|
fSource = Never();
|
||||||
|
finished++;
|
||||||
|
if (finished == 2) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
when(state Optional<ErrorOr<Resp>> tss = wait(fTssWithTimeout)) {
|
||||||
|
tssEndTime = now();
|
||||||
|
fTssWithTimeout = Never();
|
||||||
|
finished++;
|
||||||
|
if (finished == 2) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// we want to record ss/tss errors to metrics
|
||||||
|
int srcErrorCode = error_code_success;
|
||||||
|
int tssErrorCode = error_code_success;
|
||||||
|
|
||||||
|
++tssData.metrics->requests;
|
||||||
|
|
||||||
|
if (src.isError()) {
|
||||||
|
srcErrorCode = src.getError().code();
|
||||||
|
tssData.metrics->ssError(srcErrorCode);
|
||||||
|
}
|
||||||
|
if (!tss.present()) {
|
||||||
|
++tssData.metrics->tssTimeouts;
|
||||||
|
} else if (tss.get().isError()) {
|
||||||
|
tssErrorCode = tss.get().getError().code();
|
||||||
|
tssData.metrics->tssError(tssErrorCode);
|
||||||
|
}
|
||||||
|
if (!src.isError() && tss.present() && !tss.get().isError()) {
|
||||||
|
Optional<LoadBalancedReply> srcLB = getLoadBalancedReply(&src.get());
|
||||||
|
Optional<LoadBalancedReply> tssLB = getLoadBalancedReply(&tss.get().get());
|
||||||
|
ASSERT(srcLB.present() ==
|
||||||
|
tssLB.present()); // getLoadBalancedReply returned different responses for same templated type
|
||||||
|
|
||||||
|
// if Resp is a LoadBalancedReply, only compare if both replies are non-error
|
||||||
|
if (!srcLB.present() || (!srcLB.get().error.present() && !tssLB.get().error.present())) {
|
||||||
|
// only record latency difference if both requests actually succeeded, so that we're comparing apples to
|
||||||
|
// apples
|
||||||
|
tssData.metrics->recordLatency(req, srcEndTime - startTime, tssEndTime - startTime);
|
||||||
|
|
||||||
|
// expect mismatches in drop mutations mode.
|
||||||
|
Severity traceSeverity =
|
||||||
|
(g_network->isSimulated() && g_simulator.tssMode == ISimulator::TSSMode::EnabledDropMutations)
|
||||||
|
? SevWarnAlways
|
||||||
|
: SevError;
|
||||||
|
|
||||||
|
if (!TSS_doCompare(req, src.get(), tss.get().get(), traceSeverity, tssData.tssId)) {
|
||||||
|
TEST(true); // TSS Mismatch
|
||||||
|
++tssData.metrics->mismatches;
|
||||||
|
}
|
||||||
|
} else if (tssLB.present() && tssLB.get().error.present()) {
|
||||||
|
tssErrorCode = tssLB.get().error.get().code();
|
||||||
|
tssData.metrics->tssError(tssErrorCode);
|
||||||
|
} else if (srcLB.present() && srcLB.get().error.present()) {
|
||||||
|
srcErrorCode = srcLB.get().error.get().code();
|
||||||
|
tssData.metrics->ssError(srcErrorCode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (srcErrorCode != error_code_success && tssErrorCode != error_code_success && srcErrorCode != tssErrorCode) {
|
||||||
|
// if ss and tss both got different errors, record them
|
||||||
|
TraceEvent("TSSErrorMismatch")
|
||||||
|
.suppressFor(1.0)
|
||||||
|
.detail("TSSID", tssData.tssId)
|
||||||
|
.detail("SSError", srcErrorCode)
|
||||||
|
.detail("TSSError", tssErrorCode);
|
||||||
|
}
|
||||||
|
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
||||||
// Stores state for a request made by the load balancer
|
// Stores state for a request made by the load balancer
|
||||||
template <class Request>
|
template <class Request>
|
||||||
struct RequestData : NonCopyable {
|
struct RequestData : NonCopyable {
|
||||||
|
@ -91,11 +184,30 @@ struct RequestData : NonCopyable {
|
||||||
// This is true once setupRequest is called, even though at that point the response is Never().
|
// This is true once setupRequest is called, even though at that point the response is Never().
|
||||||
bool isValid() { return response.isValid(); }
|
bool isValid() { return response.isValid(); }
|
||||||
|
|
||||||
|
static void maybeDuplicateTSSRequest(RequestStream<Request> const* stream,
|
||||||
|
Request& request,
|
||||||
|
QueueModel* model,
|
||||||
|
Future<Reply> ssResponse) {
|
||||||
|
if (model) {
|
||||||
|
// Send parallel request to TSS pair, if it exists
|
||||||
|
Optional<TSSEndpointData> tssData = model->getTssData(stream->getEndpoint().token.first());
|
||||||
|
|
||||||
|
if (tssData.present()) {
|
||||||
|
TEST(true); // duplicating request to TSS
|
||||||
|
resetReply(request);
|
||||||
|
// FIXME: optimize to avoid creating new netNotifiedQueue for each message
|
||||||
|
RequestStream<Request> tssRequestStream(tssData.get().endpoint);
|
||||||
|
Future<ErrorOr<REPLY_TYPE(Request)>> fTssResult = tssRequestStream.tryGetReply(request);
|
||||||
|
model->addActor.send(tssComparison(request, ssResponse, fTssResult, tssData.get()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Initializes the request state and starts it, possibly after a backoff delay
|
// Initializes the request state and starts it, possibly after a backoff delay
|
||||||
void startRequest(double backoff,
|
void startRequest(double backoff,
|
||||||
bool triedAllOptions,
|
bool triedAllOptions,
|
||||||
RequestStream<Request> const* stream,
|
RequestStream<Request> const* stream,
|
||||||
Request const& request,
|
Request& request,
|
||||||
QueueModel* model) {
|
QueueModel* model) {
|
||||||
modelHolder = Reference<ModelHolder>();
|
modelHolder = Reference<ModelHolder>();
|
||||||
requestStarted = false;
|
requestStarted = false;
|
||||||
|
@ -105,12 +217,15 @@ struct RequestData : NonCopyable {
|
||||||
delay(backoff), [this, stream, &request, model](Void _) {
|
delay(backoff), [this, stream, &request, model](Void _) {
|
||||||
requestStarted = true;
|
requestStarted = true;
|
||||||
modelHolder = Reference<ModelHolder>(new ModelHolder(model, stream->getEndpoint().token.first()));
|
modelHolder = Reference<ModelHolder>(new ModelHolder(model, stream->getEndpoint().token.first()));
|
||||||
return stream->tryGetReply(request);
|
Future<Reply> resp = stream->tryGetReply(request);
|
||||||
|
maybeDuplicateTSSRequest(stream, request, model, resp);
|
||||||
|
return resp;
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
requestStarted = true;
|
requestStarted = true;
|
||||||
modelHolder = Reference<ModelHolder>(new ModelHolder(model, stream->getEndpoint().token.first()));
|
modelHolder = Reference<ModelHolder>(new ModelHolder(model, stream->getEndpoint().token.first()));
|
||||||
response = stream->tryGetReply(request);
|
response = stream->tryGetReply(request);
|
||||||
|
maybeDuplicateTSSRequest(stream, request, model, response);
|
||||||
}
|
}
|
||||||
|
|
||||||
requestProcessed = false;
|
requestProcessed = false;
|
||||||
|
|
|
@ -60,6 +60,20 @@ double QueueModel::addRequest(uint64_t id) {
|
||||||
return d.penalty;
|
return d.penalty;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void QueueModel::updateTssEndpoint(uint64_t endpointId, const TSSEndpointData& tssData) {
|
||||||
|
auto& d = data[endpointId];
|
||||||
|
d.tssData = tssData;
|
||||||
|
}
|
||||||
|
|
||||||
|
void QueueModel::removeTssEndpoint(uint64_t endpointId) {
|
||||||
|
auto& d = data[endpointId];
|
||||||
|
d.tssData = Optional<TSSEndpointData>();
|
||||||
|
}
|
||||||
|
|
||||||
|
Optional<TSSEndpointData> QueueModel::getTssData(uint64_t id) {
|
||||||
|
return data[id].tssData;
|
||||||
|
}
|
||||||
|
|
||||||
Optional<LoadBalancedReply> getLoadBalancedReply(const LoadBalancedReply* reply) {
|
Optional<LoadBalancedReply> getLoadBalancedReply(const LoadBalancedReply* reply) {
|
||||||
return *reply;
|
return *reply;
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,6 +26,17 @@
|
||||||
#include "fdbrpc/Smoother.h"
|
#include "fdbrpc/Smoother.h"
|
||||||
#include "flow/Knobs.h"
|
#include "flow/Knobs.h"
|
||||||
#include "flow/ActorCollection.h"
|
#include "flow/ActorCollection.h"
|
||||||
|
#include "fdbrpc/TSSComparison.h" // For TSS Metrics
|
||||||
|
#include "fdbrpc/FlowTransport.h" // For Endpoint
|
||||||
|
|
||||||
|
struct TSSEndpointData {
|
||||||
|
UID tssId;
|
||||||
|
Endpoint endpoint;
|
||||||
|
Reference<TSSMetrics> metrics;
|
||||||
|
|
||||||
|
TSSEndpointData(UID tssId, Endpoint endpoint, Reference<TSSMetrics> metrics)
|
||||||
|
: tssId(tssId), endpoint(endpoint), metrics(metrics) {}
|
||||||
|
};
|
||||||
|
|
||||||
// The data structure used for the client-side load balancing algorithm to
|
// The data structure used for the client-side load balancing algorithm to
|
||||||
// decide which storage server to read data from. Conceptually, it tracks the
|
// decide which storage server to read data from. Conceptually, it tracks the
|
||||||
|
@ -59,6 +70,10 @@ struct QueueData {
|
||||||
// hasn't returned a valid result, increase above `futureVersionBackoff`
|
// hasn't returned a valid result, increase above `futureVersionBackoff`
|
||||||
// to increase the future backoff amount.
|
// to increase the future backoff amount.
|
||||||
double increaseBackoffTime;
|
double increaseBackoffTime;
|
||||||
|
|
||||||
|
// a bit of a hack to store this here, but it's the only centralized place for per-endpoint tracking
|
||||||
|
Optional<TSSEndpointData> tssData;
|
||||||
|
|
||||||
QueueData()
|
QueueData()
|
||||||
: latency(0.001), penalty(1.0), smoothOutstanding(FLOW_KNOBS->QUEUE_MODEL_SMOOTHING_AMOUNT), failedUntil(0),
|
: latency(0.001), penalty(1.0), smoothOutstanding(FLOW_KNOBS->QUEUE_MODEL_SMOOTHING_AMOUNT), failedUntil(0),
|
||||||
futureVersionBackoff(FLOW_KNOBS->FUTURE_VERSION_INITIAL_BACKOFF), increaseBackoffTime(0) {}
|
futureVersionBackoff(FLOW_KNOBS->FUTURE_VERSION_INITIAL_BACKOFF), increaseBackoffTime(0) {}
|
||||||
|
@ -89,13 +104,29 @@ public:
|
||||||
double secondBudget;
|
double secondBudget;
|
||||||
PromiseStream<Future<Void>> addActor;
|
PromiseStream<Future<Void>> addActor;
|
||||||
Future<Void> laggingRequests; // requests for which a different recipient already answered
|
Future<Void> laggingRequests; // requests for which a different recipient already answered
|
||||||
|
PromiseStream<Future<Void>> addTSSActor;
|
||||||
|
Future<Void> tssComparisons; // requests for which a different recipient already answered
|
||||||
int laggingRequestCount;
|
int laggingRequestCount;
|
||||||
|
int laggingTSSCompareCount;
|
||||||
|
|
||||||
|
// Updates this endpoint data to duplicate requests to the specified TSS endpoint
|
||||||
|
void updateTssEndpoint(uint64_t endpointId, const TSSEndpointData& endpointData);
|
||||||
|
|
||||||
|
// Removes the TSS mapping from this endpoint to stop duplicating requests to a TSS endpoint
|
||||||
|
void removeTssEndpoint(uint64_t endpointId);
|
||||||
|
|
||||||
|
// Retrieves the data for this endpoint's pair TSS endpoint, if present
|
||||||
|
Optional<TSSEndpointData> getTssData(uint64_t endpointId);
|
||||||
|
|
||||||
QueueModel() : secondMultiplier(1.0), secondBudget(0), laggingRequestCount(0) {
|
QueueModel() : secondMultiplier(1.0), secondBudget(0), laggingRequestCount(0) {
|
||||||
laggingRequests = actorCollection(addActor.getFuture(), &laggingRequestCount);
|
laggingRequests = actorCollection(addActor.getFuture(), &laggingRequestCount);
|
||||||
|
tssComparisons = actorCollection(addTSSActor.getFuture(), &laggingTSSCompareCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
~QueueModel() { laggingRequests.cancel(); }
|
~QueueModel() {
|
||||||
|
laggingRequests.cancel();
|
||||||
|
tssComparisons.cancel();
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::unordered_map<uint64_t, QueueData> data;
|
std::unordered_map<uint64_t, QueueData> data;
|
||||||
|
@ -121,4 +152,4 @@ private:
|
||||||
};
|
};
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
|
|
||||||
#ifndef FDBRPC_STATS_H
|
#ifndef FDBRPC_STATS_H
|
||||||
#define FDBRPC_STATS_H
|
#define FDBRPC_STATS_H
|
||||||
|
#include <type_traits>
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
// Yet another performance statistics interface
|
// Yet another performance statistics interface
|
||||||
|
@ -136,7 +137,15 @@ struct SpecialCounter final : ICounter, FastAllocated<SpecialCounter<F>>, NonCop
|
||||||
void remove() override { delete this; }
|
void remove() override { delete this; }
|
||||||
|
|
||||||
std::string const& getName() const override { return name; }
|
std::string const& getName() const override { return name; }
|
||||||
int64_t getValue() const override { return f(); }
|
int64_t getValue() const override {
|
||||||
|
auto result = f();
|
||||||
|
// Disallow conversion from floating point to int64_t, since this has
|
||||||
|
// been a source of confusion - e.g. a percentage represented as a
|
||||||
|
// fraction between 0 and 1 is not meaningful after conversion to
|
||||||
|
// int64_t.
|
||||||
|
static_assert(!std::is_floating_point_v<decltype(result)>);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
void resetInterval() override {}
|
void resetInterval() override {}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,89 @@
|
||||||
|
/*
|
||||||
|
* TSSComparison.h
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This header is to declare the tss comparison function that LoadBalance.Actor.h needs to be aware of to call,
|
||||||
|
* But StorageServerInterface.h needs to implement on the types defined in SSI.h.
|
||||||
|
*/
|
||||||
|
#ifndef FDBRPC_TSS_COMPARISON_H
|
||||||
|
#define FDBRPC_TSS_COMPARISON_H
|
||||||
|
|
||||||
|
#include "fdbrpc/ContinuousSample.h"
|
||||||
|
#include "fdbrpc/Stats.h"
|
||||||
|
|
||||||
|
// refcounted + noncopyable because both DatabaseContext and individual endpoints share ownership
|
||||||
|
struct TSSMetrics : ReferenceCounted<TSSMetrics>, NonCopyable {
|
||||||
|
CounterCollection cc;
|
||||||
|
Counter requests;
|
||||||
|
Counter ssErrors;
|
||||||
|
Counter tssErrors;
|
||||||
|
Counter tssTimeouts;
|
||||||
|
Counter mismatches;
|
||||||
|
|
||||||
|
// We could probably just ignore getKey as it's seldom used?
|
||||||
|
ContinuousSample<double> SSgetValueLatency;
|
||||||
|
ContinuousSample<double> SSgetKeyLatency;
|
||||||
|
ContinuousSample<double> SSgetKeyValuesLatency;
|
||||||
|
|
||||||
|
ContinuousSample<double> TSSgetValueLatency;
|
||||||
|
ContinuousSample<double> TSSgetKeyLatency;
|
||||||
|
ContinuousSample<double> TSSgetKeyValuesLatency;
|
||||||
|
|
||||||
|
std::unordered_map<int, uint64_t> ssErrorsByCode;
|
||||||
|
std::unordered_map<int, uint64_t> tssErrorsByCode;
|
||||||
|
|
||||||
|
void ssError(int code) {
|
||||||
|
++ssErrors;
|
||||||
|
ssErrorsByCode[code]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
void tssError(int code) {
|
||||||
|
++tssErrors;
|
||||||
|
tssErrorsByCode[code]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Req>
|
||||||
|
void recordLatency(const Req& req, double ssLatency, double tssLatency);
|
||||||
|
|
||||||
|
void clear() {
|
||||||
|
SSgetValueLatency.clear();
|
||||||
|
SSgetKeyLatency.clear();
|
||||||
|
SSgetKeyValuesLatency.clear();
|
||||||
|
|
||||||
|
TSSgetValueLatency.clear();
|
||||||
|
TSSgetKeyLatency.clear();
|
||||||
|
TSSgetKeyValuesLatency.clear();
|
||||||
|
|
||||||
|
tssErrorsByCode.clear();
|
||||||
|
ssErrorsByCode.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
TSSMetrics()
|
||||||
|
: cc("TSSClientMetrics"), requests("Requests", cc), ssErrors("SSErrors", cc), tssErrors("TSSErrors", cc),
|
||||||
|
tssTimeouts("TSSTimeouts", cc), mismatches("Mismatches", cc), SSgetValueLatency(1000), SSgetKeyLatency(1000),
|
||||||
|
SSgetKeyValuesLatency(1000), TSSgetValueLatency(1000), TSSgetKeyLatency(1000), TSSgetKeyValuesLatency(1000) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
// part of the contract of this function is that if there is a mismatch, the implementation needs to record a trace
|
||||||
|
// event with the specified severity and tssId in the event.
|
||||||
|
template <class Req, class Rep>
|
||||||
|
bool TSS_doCompare(const Req& req, const Rep& src, const Rep& tss, Severity traceSeverity, UID tssId);
|
||||||
|
|
||||||
|
#endif
|
|
@ -537,7 +537,10 @@ public:
|
||||||
|
|
||||||
std::string getFilename() const override { return actualFilename; }
|
std::string getFilename() const override { return actualFilename; }
|
||||||
|
|
||||||
~SimpleFile() override { _close(h); }
|
~SimpleFile() override {
|
||||||
|
_close(h);
|
||||||
|
--openCount;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int h;
|
int h;
|
||||||
|
@ -1028,8 +1031,8 @@ public:
|
||||||
|
|
||||||
// Get the size of all files we've created on the server and subtract them from the free space
|
// Get the size of all files we've created on the server and subtract them from the free space
|
||||||
for (auto file = proc->machine->openFiles.begin(); file != proc->machine->openFiles.end(); ++file) {
|
for (auto file = proc->machine->openFiles.begin(); file != proc->machine->openFiles.end(); ++file) {
|
||||||
if (file->second.isReady()) {
|
if (file->second.get().isReady()) {
|
||||||
totalFileSize += ((AsyncFileNonDurable*)file->second.get().getPtr())->approximateSize;
|
totalFileSize += ((AsyncFileNonDurable*)file->second.get().get().getPtr())->approximateSize;
|
||||||
}
|
}
|
||||||
numFiles++;
|
numFiles++;
|
||||||
}
|
}
|
||||||
|
@ -2490,7 +2493,7 @@ Future<Reference<class IAsyncFile>> Sim2FileSystem::open(const std::string& file
|
||||||
actualFilename = filename + ".part";
|
actualFilename = filename + ".part";
|
||||||
auto partFile = machineCache.find(actualFilename);
|
auto partFile = machineCache.find(actualFilename);
|
||||||
if (partFile != machineCache.end()) {
|
if (partFile != machineCache.end()) {
|
||||||
Future<Reference<IAsyncFile>> f = AsyncFileDetachable::open(partFile->second);
|
Future<Reference<IAsyncFile>> f = AsyncFileDetachable::open(partFile->second.get());
|
||||||
if (FLOW_KNOBS->PAGE_WRITE_CHECKSUM_HISTORY > 0)
|
if (FLOW_KNOBS->PAGE_WRITE_CHECKSUM_HISTORY > 0)
|
||||||
f = map(f, [=](Reference<IAsyncFile> r) {
|
f = map(f, [=](Reference<IAsyncFile> r) {
|
||||||
return Reference<IAsyncFile>(new AsyncFileWriteChecker(r));
|
return Reference<IAsyncFile>(new AsyncFileWriteChecker(r));
|
||||||
|
@ -2498,19 +2501,26 @@ Future<Reference<class IAsyncFile>> Sim2FileSystem::open(const std::string& file
|
||||||
return f;
|
return f;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (machineCache.find(actualFilename) == machineCache.end()) {
|
|
||||||
|
Future<Reference<IAsyncFile>> f;
|
||||||
|
auto itr = machineCache.find(actualFilename);
|
||||||
|
if (itr == machineCache.end()) {
|
||||||
// Simulated disk parameters are shared by the AsyncFileNonDurable and the underlying SimpleFile.
|
// Simulated disk parameters are shared by the AsyncFileNonDurable and the underlying SimpleFile.
|
||||||
// This way, they can both keep up with the time to start the next operation
|
// This way, they can both keep up with the time to start the next operation
|
||||||
auto diskParameters =
|
auto diskParameters =
|
||||||
makeReference<DiskParameters>(FLOW_KNOBS->SIM_DISK_IOPS, FLOW_KNOBS->SIM_DISK_BANDWIDTH);
|
makeReference<DiskParameters>(FLOW_KNOBS->SIM_DISK_IOPS, FLOW_KNOBS->SIM_DISK_BANDWIDTH);
|
||||||
machineCache[actualFilename] =
|
f = AsyncFileNonDurable::open(filename,
|
||||||
AsyncFileNonDurable::open(filename,
|
|
||||||
actualFilename,
|
actualFilename,
|
||||||
SimpleFile::open(filename, flags, mode, diskParameters, false),
|
SimpleFile::open(filename, flags, mode, diskParameters, false),
|
||||||
diskParameters,
|
diskParameters,
|
||||||
(flags & IAsyncFile::OPEN_NO_AIO) == 0);
|
(flags & IAsyncFile::OPEN_NO_AIO) == 0);
|
||||||
|
|
||||||
|
machineCache[actualFilename] = UnsafeWeakFutureReference<IAsyncFile>(f);
|
||||||
|
} else {
|
||||||
|
f = itr->second.get();
|
||||||
}
|
}
|
||||||
Future<Reference<IAsyncFile>> f = AsyncFileDetachable::open(machineCache[actualFilename]);
|
|
||||||
|
f = AsyncFileDetachable::open(f);
|
||||||
if (FLOW_KNOBS->PAGE_WRITE_CHECKSUM_HISTORY > 0)
|
if (FLOW_KNOBS->PAGE_WRITE_CHECKSUM_HISTORY > 0)
|
||||||
f = map(f, [=](Reference<IAsyncFile> r) { return Reference<IAsyncFile>(new AsyncFileWriteChecker(r)); });
|
f = map(f, [=](Reference<IAsyncFile> r) { return Reference<IAsyncFile>(new AsyncFileWriteChecker(r)); });
|
||||||
return f;
|
return f;
|
||||||
|
|
|
@ -41,7 +41,7 @@ public:
|
||||||
: desiredCoordinators(1), physicalDatacenters(1), processesPerMachine(0), listenersPerProcess(1),
|
: desiredCoordinators(1), physicalDatacenters(1), processesPerMachine(0), listenersPerProcess(1),
|
||||||
isStopped(false), lastConnectionFailure(0), connectionFailuresDisableDuration(0), speedUpSimulation(false),
|
isStopped(false), lastConnectionFailure(0), connectionFailuresDisableDuration(0), speedUpSimulation(false),
|
||||||
allSwapsDisabled(false), backupAgents(BackupAgentType::WaitForType), drAgents(BackupAgentType::WaitForType),
|
allSwapsDisabled(false), backupAgents(BackupAgentType::WaitForType), drAgents(BackupAgentType::WaitForType),
|
||||||
extraDB(nullptr), allowLogSetKills(true), usableRegions(1) {}
|
extraDB(nullptr), allowLogSetKills(true), usableRegions(1), tssMode(TSSMode::Disabled) {}
|
||||||
|
|
||||||
// Order matters!
|
// Order matters!
|
||||||
enum KillType {
|
enum KillType {
|
||||||
|
@ -55,6 +55,9 @@ public:
|
||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Order matters! all modes >= 2 are fault injection modes
|
||||||
|
enum TSSMode { Disabled, EnabledNormal, EnabledAddDelay, EnabledDropMutations };
|
||||||
|
|
||||||
enum class BackupAgentType { NoBackupAgents, WaitForType, BackupToFile, BackupToDB };
|
enum class BackupAgentType { NoBackupAgents, WaitForType, BackupToFile, BackupToDB };
|
||||||
|
|
||||||
// Subclasses may subclass ProcessInfo as well
|
// Subclasses may subclass ProcessInfo as well
|
||||||
|
@ -188,10 +191,14 @@ public:
|
||||||
Promise<KillType> shutdownSignal;
|
Promise<KillType> shutdownSignal;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// A set of data associated with a simulated machine
|
||||||
struct MachineInfo {
|
struct MachineInfo {
|
||||||
ProcessInfo* machineProcess;
|
ProcessInfo* machineProcess;
|
||||||
std::vector<ProcessInfo*> processes;
|
std::vector<ProcessInfo*> processes;
|
||||||
std::map<std::string, Future<Reference<IAsyncFile>>> openFiles;
|
|
||||||
|
// A map from filename to file handle for all open files on a machine
|
||||||
|
std::map<std::string, UnsafeWeakFutureReference<IAsyncFile>> openFiles;
|
||||||
|
|
||||||
std::set<std::string> deletingFiles;
|
std::set<std::string> deletingFiles;
|
||||||
std::set<std::string> closingFiles;
|
std::set<std::string> closingFiles;
|
||||||
Optional<Standalone<StringRef>> machineId;
|
Optional<Standalone<StringRef>> machineId;
|
||||||
|
@ -401,6 +408,7 @@ public:
|
||||||
int32_t satelliteTLogWriteAntiQuorumFallback;
|
int32_t satelliteTLogWriteAntiQuorumFallback;
|
||||||
std::vector<Optional<Standalone<StringRef>>> primarySatelliteDcIds;
|
std::vector<Optional<Standalone<StringRef>>> primarySatelliteDcIds;
|
||||||
std::vector<Optional<Standalone<StringRef>>> remoteSatelliteDcIds;
|
std::vector<Optional<Standalone<StringRef>>> remoteSatelliteDcIds;
|
||||||
|
TSSMode tssMode;
|
||||||
|
|
||||||
// Used by workloads that perform reconfigurations
|
// Used by workloads that perform reconfigurations
|
||||||
int testerCount;
|
int testerCount;
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "fdbclient/MutationList.h"
|
#include "fdbclient/MutationList.h"
|
||||||
|
#include "fdbclient/KeyBackedTypes.h" // for key backed map codecs for tss mapping
|
||||||
#include "fdbclient/SystemData.h"
|
#include "fdbclient/SystemData.h"
|
||||||
#include "fdbclient/BackupAgent.actor.h"
|
#include "fdbclient/BackupAgent.actor.h"
|
||||||
#include "fdbclient/Notified.h"
|
#include "fdbclient/Notified.h"
|
||||||
|
@ -64,10 +65,19 @@ void applyMetadataMutations(SpanID const& spanContext,
|
||||||
NotifiedVersion* commitVersion,
|
NotifiedVersion* commitVersion,
|
||||||
std::map<UID, Reference<StorageInfo>>* storageCache,
|
std::map<UID, Reference<StorageInfo>>* storageCache,
|
||||||
std::map<Tag, Version>* tag_popped,
|
std::map<Tag, Version>* tag_popped,
|
||||||
|
std::unordered_map<UID, StorageServerInterface>* tssMapping,
|
||||||
bool initialCommit) {
|
bool initialCommit) {
|
||||||
// std::map<keyRef, vector<uint16_t>> cacheRangeInfo;
|
// std::map<keyRef, vector<uint16_t>> cacheRangeInfo;
|
||||||
std::map<KeyRef, MutationRef> cachedRangeInfo;
|
std::map<KeyRef, MutationRef> cachedRangeInfo;
|
||||||
|
|
||||||
|
// Testing Storage Server removal (clearing serverTagKey) needs to read tss server list value to determine it is a
|
||||||
|
// tss + find partner's tag to send the private mutation. Since the removeStorageServer transaction clears both the
|
||||||
|
// storage list and server tag, we have to enforce ordering, proccessing the server tag first, and postpone the
|
||||||
|
// server list clear until the end;
|
||||||
|
// Similarly, the TSS mapping change key needs to read the server list at the end of the commit
|
||||||
|
std::vector<KeyRangeRef> tssServerListToRemove;
|
||||||
|
std::vector<std::pair<UID, UID>> tssMappingToAdd;
|
||||||
|
|
||||||
for (auto const& m : mutations) {
|
for (auto const& m : mutations) {
|
||||||
//TraceEvent("MetadataMutation", dbgid).detail("M", m.toString());
|
//TraceEvent("MetadataMutation", dbgid).detail("M", m.toString());
|
||||||
if (toCommit) {
|
if (toCommit) {
|
||||||
|
@ -95,12 +105,14 @@ void applyMetadataMutations(SpanID const& spanContext,
|
||||||
|
|
||||||
for (const auto& id : src) {
|
for (const auto& id : src) {
|
||||||
auto storageInfo = getStorageInfo(id, storageCache, txnStateStore);
|
auto storageInfo = getStorageInfo(id, storageCache, txnStateStore);
|
||||||
|
ASSERT(!storageInfo->interf.isTss());
|
||||||
ASSERT(storageInfo->tag != invalidTag);
|
ASSERT(storageInfo->tag != invalidTag);
|
||||||
info.tags.push_back(storageInfo->tag);
|
info.tags.push_back(storageInfo->tag);
|
||||||
info.src_info.push_back(storageInfo);
|
info.src_info.push_back(storageInfo);
|
||||||
}
|
}
|
||||||
for (const auto& id : dest) {
|
for (const auto& id : dest) {
|
||||||
auto storageInfo = getStorageInfo(id, storageCache, txnStateStore);
|
auto storageInfo = getStorageInfo(id, storageCache, txnStateStore);
|
||||||
|
ASSERT(!storageInfo->interf.isTss());
|
||||||
ASSERT(storageInfo->tag != invalidTag);
|
ASSERT(storageInfo->tag != invalidTag);
|
||||||
info.tags.push_back(storageInfo->tag);
|
info.tags.push_back(storageInfo->tag);
|
||||||
info.dest_info.push_back(storageInfo);
|
info.dest_info.push_back(storageInfo);
|
||||||
|
@ -113,6 +125,8 @@ void applyMetadataMutations(SpanID const& spanContext,
|
||||||
txnStateStore->set(KeyValueRef(m.param1, m.param2));
|
txnStateStore->set(KeyValueRef(m.param1, m.param2));
|
||||||
} else if (m.param1.startsWith(serverKeysPrefix)) {
|
} else if (m.param1.startsWith(serverKeysPrefix)) {
|
||||||
if (toCommit) {
|
if (toCommit) {
|
||||||
|
Tag tag = decodeServerTagValue(
|
||||||
|
txnStateStore->readValue(serverTagKeyFor(serverKeysDecodeServer(m.param1))).get().get());
|
||||||
MutationRef privatized = m;
|
MutationRef privatized = m;
|
||||||
privatized.param1 = m.param1.withPrefix(systemKeys.begin, arena);
|
privatized.param1 = m.param1.withPrefix(systemKeys.begin, arena);
|
||||||
TraceEvent(SevDebug, "SendingPrivateMutation", dbgid)
|
TraceEvent(SevDebug, "SendingPrivateMutation", dbgid)
|
||||||
|
@ -120,14 +134,9 @@ void applyMetadataMutations(SpanID const& spanContext,
|
||||||
.detail("Privatized", privatized.toString())
|
.detail("Privatized", privatized.toString())
|
||||||
.detail("Server", serverKeysDecodeServer(m.param1))
|
.detail("Server", serverKeysDecodeServer(m.param1))
|
||||||
.detail("TagKey", serverTagKeyFor(serverKeysDecodeServer(m.param1)))
|
.detail("TagKey", serverTagKeyFor(serverKeysDecodeServer(m.param1)))
|
||||||
.detail(
|
.detail("Tag", tag.toString());
|
||||||
"Tag",
|
|
||||||
decodeServerTagValue(
|
|
||||||
txnStateStore->readValue(serverTagKeyFor(serverKeysDecodeServer(m.param1))).get().get())
|
|
||||||
.toString());
|
|
||||||
|
|
||||||
toCommit->addTag(decodeServerTagValue(
|
toCommit->addTag(tag);
|
||||||
txnStateStore->readValue(serverTagKeyFor(serverKeysDecodeServer(m.param1))).get().get()));
|
|
||||||
toCommit->writeTypedMessage(privatized);
|
toCommit->writeTypedMessage(privatized);
|
||||||
}
|
}
|
||||||
} else if (m.param1.startsWith(serverTagPrefix)) {
|
} else if (m.param1.startsWith(serverTagPrefix)) {
|
||||||
|
@ -235,6 +244,29 @@ void applyMetadataMutations(SpanID const& spanContext,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else if (m.param1.startsWith(tssMappingKeys.begin)) {
|
||||||
|
if (!initialCommit) {
|
||||||
|
txnStateStore->set(KeyValueRef(m.param1, m.param2));
|
||||||
|
if (tssMapping) {
|
||||||
|
// Normally uses key backed map, so have to use same unpacking code here.
|
||||||
|
UID ssId = Codec<UID>::unpack(Tuple::unpack(m.param1.removePrefix(tssMappingKeys.begin)));
|
||||||
|
UID tssId = Codec<UID>::unpack(Tuple::unpack(m.param2));
|
||||||
|
|
||||||
|
tssMappingToAdd.push_back(std::pair(ssId, tssId));
|
||||||
|
|
||||||
|
// send private mutation to SS that it now has a TSS pair
|
||||||
|
if (toCommit) {
|
||||||
|
MutationRef privatized = m;
|
||||||
|
privatized.param1 = m.param1.withPrefix(systemKeys.begin, arena);
|
||||||
|
|
||||||
|
Optional<Value> tagV = txnStateStore->readValue(serverTagKeyFor(ssId)).get();
|
||||||
|
if (tagV.present()) {
|
||||||
|
toCommit->addTag(decodeServerTagValue(tagV.get()));
|
||||||
|
toCommit->writeTypedMessage(privatized);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
} else if (m.param1 == databaseLockedKey || m.param1 == metadataVersionKey ||
|
} else if (m.param1 == databaseLockedKey || m.param1 == metadataVersionKey ||
|
||||||
m.param1 == mustContainSystemMutationsKey ||
|
m.param1 == mustContainSystemMutationsKey ||
|
||||||
m.param1.startsWith(applyMutationsBeginRange.begin) ||
|
m.param1.startsWith(applyMutationsBeginRange.begin) ||
|
||||||
|
@ -379,8 +411,20 @@ void applyMetadataMutations(SpanID const& spanContext,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (serverListKeys.intersects(range)) {
|
if (serverListKeys.intersects(range)) {
|
||||||
if (!initialCommit)
|
if (!initialCommit) {
|
||||||
txnStateStore->clear(range & serverListKeys);
|
KeyRangeRef rangeToClear = range & serverListKeys;
|
||||||
|
if (rangeToClear.singleKeyRange()) {
|
||||||
|
UID id = decodeServerListKey(rangeToClear.begin);
|
||||||
|
Optional<Value> ssiV = txnStateStore->readValue(serverListKeyFor(id)).get();
|
||||||
|
if (ssiV.present() && decodeServerListValue(ssiV.get()).isTss()) {
|
||||||
|
tssServerListToRemove.push_back(rangeToClear);
|
||||||
|
} else {
|
||||||
|
txnStateStore->clear(rangeToClear);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
txnStateStore->clear(rangeToClear);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (tagLocalityListKeys.intersects(range)) {
|
if (tagLocalityListKeys.intersects(range)) {
|
||||||
if (!initialCommit)
|
if (!initialCommit)
|
||||||
|
@ -411,6 +455,32 @@ void applyMetadataMutations(SpanID const& spanContext,
|
||||||
toCommit->writeTypedMessage(privatized);
|
toCommit->writeTypedMessage(privatized);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Might be a tss removal, which doesn't store a tag there.
|
||||||
|
// Chained if is a little verbose, but avoids unecessary work
|
||||||
|
if (toCommit && !initialCommit && !serverKeysCleared.size()) {
|
||||||
|
KeyRangeRef maybeTssRange = range & serverTagKeys;
|
||||||
|
if (maybeTssRange.singleKeyRange()) {
|
||||||
|
UID id = decodeServerTagKey(maybeTssRange.begin);
|
||||||
|
Optional<Value> ssiV = txnStateStore->readValue(serverListKeyFor(id)).get();
|
||||||
|
|
||||||
|
if (ssiV.present()) {
|
||||||
|
StorageServerInterface ssi = decodeServerListValue(ssiV.get());
|
||||||
|
if (ssi.isTss()) {
|
||||||
|
Optional<Value> tagV =
|
||||||
|
txnStateStore->readValue(serverTagKeyFor(ssi.tssPairID.get())).get();
|
||||||
|
if (tagV.present()) {
|
||||||
|
MutationRef privatized = m;
|
||||||
|
privatized.param1 = maybeTssRange.begin.withPrefix(systemKeys.begin, arena);
|
||||||
|
privatized.param2 =
|
||||||
|
keyAfter(maybeTssRange.begin, arena).withPrefix(systemKeys.begin, arena);
|
||||||
|
|
||||||
|
toCommit->addTag(decodeServerTagValue(tagV.get()));
|
||||||
|
toCommit->writeTypedMessage(privatized);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (!initialCommit) {
|
if (!initialCommit) {
|
||||||
KeyRangeRef clearRange = range & serverTagKeys;
|
KeyRangeRef clearRange = range & serverTagKeys;
|
||||||
|
@ -439,6 +509,19 @@ void applyMetadataMutations(SpanID const& spanContext,
|
||||||
if (!initialCommit)
|
if (!initialCommit)
|
||||||
txnStateStore->clear(range & serverTagHistoryKeys);
|
txnStateStore->clear(range & serverTagHistoryKeys);
|
||||||
}
|
}
|
||||||
|
if (tssMappingKeys.intersects(range)) {
|
||||||
|
if (!initialCommit) {
|
||||||
|
KeyRangeRef rangeToClear = range & tssMappingKeys;
|
||||||
|
ASSERT(rangeToClear.singleKeyRange());
|
||||||
|
txnStateStore->clear(rangeToClear);
|
||||||
|
if (tssMapping) {
|
||||||
|
// Normally uses key backed map, so have to use same unpacking code here.
|
||||||
|
UID ssId =
|
||||||
|
Codec<UID>::unpack(Tuple::unpack(rangeToClear.begin.removePrefix(tssMappingKeys.begin)));
|
||||||
|
tssMapping->erase(ssId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
if (range.contains(coordinatorsKey)) {
|
if (range.contains(coordinatorsKey)) {
|
||||||
if (!initialCommit)
|
if (!initialCommit)
|
||||||
txnStateStore->clear(singleKeyRange(coordinatorsKey));
|
txnStateStore->clear(singleKeyRange(coordinatorsKey));
|
||||||
|
@ -568,6 +651,17 @@ void applyMetadataMutations(SpanID const& spanContext,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (KeyRangeRef& range : tssServerListToRemove) {
|
||||||
|
txnStateStore->clear(range);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto& tssPair : tssMappingToAdd) {
|
||||||
|
// read tss server list from txn state store and add it to tss mapping
|
||||||
|
StorageServerInterface tssi =
|
||||||
|
decodeServerListValue(txnStateStore->readValue(serverListKeyFor(tssPair.second)).get().get());
|
||||||
|
(*tssMapping)[tssPair.first] = tssi;
|
||||||
|
}
|
||||||
|
|
||||||
// If we accumulated private mutations for cached key-ranges, we also need to
|
// If we accumulated private mutations for cached key-ranges, we also need to
|
||||||
// tag them with the relevant storage servers. This is done to make the storage
|
// tag them with the relevant storage servers. This is done to make the storage
|
||||||
// servers aware of the cached key-ranges
|
// servers aware of the cached key-ranges
|
||||||
|
@ -666,6 +760,7 @@ void applyMetadataMutations(SpanID const& spanContext,
|
||||||
&proxyCommitData.committedVersion,
|
&proxyCommitData.committedVersion,
|
||||||
&proxyCommitData.storageCache,
|
&proxyCommitData.storageCache,
|
||||||
&proxyCommitData.tag_popped,
|
&proxyCommitData.tag_popped,
|
||||||
|
&proxyCommitData.tssMapping,
|
||||||
initialCommit);
|
initialCommit);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -695,5 +790,6 @@ void applyMetadataMutations(SpanID const& spanContext,
|
||||||
/* commitVersion= */ nullptr,
|
/* commitVersion= */ nullptr,
|
||||||
/* storageCache= */ nullptr,
|
/* storageCache= */ nullptr,
|
||||||
/* tag_popped= */ nullptr,
|
/* tag_popped= */ nullptr,
|
||||||
|
/* tssMapping= */ nullptr,
|
||||||
/* initialCommit= */ false);
|
/* initialCommit= */ false);
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,6 +25,7 @@
|
||||||
#include "fdbclient/SystemData.h"
|
#include "fdbclient/SystemData.h"
|
||||||
#include "fdbserver/BackupInterface.h"
|
#include "fdbserver/BackupInterface.h"
|
||||||
#include "fdbserver/BackupProgress.actor.h"
|
#include "fdbserver/BackupProgress.actor.h"
|
||||||
|
#include "fdbserver/Knobs.h"
|
||||||
#include "fdbserver/LogProtocolMessage.h"
|
#include "fdbserver/LogProtocolMessage.h"
|
||||||
#include "fdbserver/LogSystem.h"
|
#include "fdbserver/LogSystem.h"
|
||||||
#include "fdbserver/ServerDBInfo.h"
|
#include "fdbserver/ServerDBInfo.h"
|
||||||
|
|
|
@ -83,6 +83,8 @@ set(FDBSERVER_SRCS
|
||||||
RestoreLoader.actor.cpp
|
RestoreLoader.actor.cpp
|
||||||
RestoreWorker.actor.h
|
RestoreWorker.actor.h
|
||||||
RestoreWorker.actor.cpp
|
RestoreWorker.actor.cpp
|
||||||
|
RestoreWorkerInterface.actor.cpp
|
||||||
|
RestoreWorkerInterface.actor.h
|
||||||
Resolver.actor.cpp
|
Resolver.actor.cpp
|
||||||
ResolverInterface.h
|
ResolverInterface.h
|
||||||
ServerDBInfo.actor.h
|
ServerDBInfo.actor.h
|
||||||
|
@ -103,6 +105,8 @@ set(FDBSERVER_SRCS
|
||||||
TesterInterface.actor.h
|
TesterInterface.actor.h
|
||||||
TLogInterface.h
|
TLogInterface.h
|
||||||
TLogServer.actor.cpp
|
TLogServer.actor.cpp
|
||||||
|
TSSMappingUtil.actor.h
|
||||||
|
TSSMappingUtil.actor.cpp
|
||||||
VersionedBTree.actor.cpp
|
VersionedBTree.actor.cpp
|
||||||
VFSAsync.h
|
VFSAsync.h
|
||||||
VFSAsync.cpp
|
VFSAsync.cpp
|
||||||
|
|
|
@ -599,8 +599,8 @@ public:
|
||||||
std::vector<std::tuple<ProcessClass::Fitness, int, bool, int, Field>> orderedFields;
|
std::vector<std::tuple<ProcessClass::Fitness, int, bool, int, Field>> orderedFields;
|
||||||
for (auto& it : fieldsWithMin) {
|
for (auto& it : fieldsWithMin) {
|
||||||
auto& fitness = field_fitness[it];
|
auto& fitness = field_fitness[it];
|
||||||
orderedFields.push_back(std::make_tuple(
|
orderedFields.emplace_back(
|
||||||
std::get<0>(fitness), std::get<1>(fitness), std::get<2>(fitness), field_count[it], it));
|
std::get<0>(fitness), std::get<1>(fitness), std::get<2>(fitness), field_count[it], it);
|
||||||
}
|
}
|
||||||
std::sort(orderedFields.begin(), orderedFields.end());
|
std::sort(orderedFields.begin(), orderedFields.end());
|
||||||
int totalFields = desired / minPerField;
|
int totalFields = desired / minPerField;
|
||||||
|
@ -1692,20 +1692,37 @@ public:
|
||||||
if (req.configuration.regions.size() > 1) {
|
if (req.configuration.regions.size() > 1) {
|
||||||
std::vector<RegionInfo> regions = req.configuration.regions;
|
std::vector<RegionInfo> regions = req.configuration.regions;
|
||||||
if (regions[0].priority == regions[1].priority && regions[1].dcId == clusterControllerDcId.get()) {
|
if (regions[0].priority == regions[1].priority && regions[1].dcId == clusterControllerDcId.get()) {
|
||||||
|
TraceEvent("CCSwitchPrimaryDc", id)
|
||||||
|
.detail("CCDcId", clusterControllerDcId.get())
|
||||||
|
.detail("OldPrimaryDcId", regions[0].dcId)
|
||||||
|
.detail("NewPrimaryDcId", regions[1].dcId);
|
||||||
std::swap(regions[0], regions[1]);
|
std::swap(regions[0], regions[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (regions[1].dcId == clusterControllerDcId.get() &&
|
if (regions[1].dcId == clusterControllerDcId.get() &&
|
||||||
(!versionDifferenceUpdated || datacenterVersionDifference >= SERVER_KNOBS->MAX_VERSION_DIFFERENCE)) {
|
(!versionDifferenceUpdated || datacenterVersionDifference >= SERVER_KNOBS->MAX_VERSION_DIFFERENCE)) {
|
||||||
if (regions[1].priority >= 0) {
|
if (regions[1].priority >= 0) {
|
||||||
|
TraceEvent("CCSwitchPrimaryDcVersionDifference", id)
|
||||||
|
.detail("CCDcId", clusterControllerDcId.get())
|
||||||
|
.detail("OldPrimaryDcId", regions[0].dcId)
|
||||||
|
.detail("NewPrimaryDcId", regions[1].dcId);
|
||||||
std::swap(regions[0], regions[1]);
|
std::swap(regions[0], regions[1]);
|
||||||
} else {
|
} else {
|
||||||
TraceEvent(SevWarnAlways, "CCDcPriorityNegative")
|
TraceEvent(SevWarnAlways, "CCDcPriorityNegative")
|
||||||
.detail("DcId", regions[1].dcId)
|
.detail("DcId", regions[1].dcId)
|
||||||
.detail("Priority", regions[1].priority);
|
.detail("Priority", regions[1].priority)
|
||||||
|
.detail("FindWorkersInDc", regions[0].dcId)
|
||||||
|
.detail("Warning", "Failover did not happen but CC is in remote DC");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TraceEvent("CCFindWorkersForConfiguration", id)
|
||||||
|
.detail("CCDcId", clusterControllerDcId.get())
|
||||||
|
.detail("Region0DcId", regions[0].dcId)
|
||||||
|
.detail("Region1DcId", regions[1].dcId)
|
||||||
|
.detail("DatacenterVersionDifference", datacenterVersionDifference)
|
||||||
|
.detail("VersionDifferenceUpdated", versionDifferenceUpdated);
|
||||||
|
|
||||||
bool setPrimaryDesired = false;
|
bool setPrimaryDesired = false;
|
||||||
try {
|
try {
|
||||||
auto reply = findWorkersForConfigurationFromDC(req, regions[0].dcId);
|
auto reply = findWorkersForConfigurationFromDC(req, regions[0].dcId);
|
||||||
|
@ -1719,6 +1736,10 @@ public:
|
||||||
} else if (regions[0].dcId == clusterControllerDcId.get()) {
|
} else if (regions[0].dcId == clusterControllerDcId.get()) {
|
||||||
return reply.get();
|
return reply.get();
|
||||||
}
|
}
|
||||||
|
TraceEvent(SevWarn, "CCRecruitmentFailed", id)
|
||||||
|
.detail("Reason", "Recruited Txn system and CC are in different DCs")
|
||||||
|
.detail("CCDcId", clusterControllerDcId.get())
|
||||||
|
.detail("RecruitedTxnSystemDcId", regions[0].dcId);
|
||||||
throw no_more_servers();
|
throw no_more_servers();
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
if (!goodRemoteRecruitmentTime.isReady() && regions[1].dcId != clusterControllerDcId.get()) {
|
if (!goodRemoteRecruitmentTime.isReady() && regions[1].dcId != clusterControllerDcId.get()) {
|
||||||
|
@ -1728,7 +1749,9 @@ public:
|
||||||
if (e.code() != error_code_no_more_servers || regions[1].priority < 0) {
|
if (e.code() != error_code_no_more_servers || regions[1].priority < 0) {
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
TraceEvent(SevWarn, "AttemptingRecruitmentInRemoteDC", id).error(e);
|
TraceEvent(SevWarn, "AttemptingRecruitmentInRemoteDc", id)
|
||||||
|
.detail("SetPrimaryDesired", setPrimaryDesired)
|
||||||
|
.error(e);
|
||||||
auto reply = findWorkersForConfigurationFromDC(req, regions[1].dcId);
|
auto reply = findWorkersForConfigurationFromDC(req, regions[1].dcId);
|
||||||
if (!setPrimaryDesired) {
|
if (!setPrimaryDesired) {
|
||||||
vector<Optional<Key>> dcPriority;
|
vector<Optional<Key>> dcPriority;
|
||||||
|
@ -3382,6 +3405,7 @@ void clusterRegisterMaster(ClusterControllerData* self, RegisterMasterRequest co
|
||||||
if (db->clientInfo->get().commitProxies != req.commitProxies ||
|
if (db->clientInfo->get().commitProxies != req.commitProxies ||
|
||||||
db->clientInfo->get().grvProxies != req.grvProxies) {
|
db->clientInfo->get().grvProxies != req.grvProxies) {
|
||||||
isChanged = true;
|
isChanged = true;
|
||||||
|
// TODO why construct a new one and not just copy the old one and change proxies + id?
|
||||||
ClientDBInfo clientInfo;
|
ClientDBInfo clientInfo;
|
||||||
clientInfo.id = deterministicRandom()->randomUniqueID();
|
clientInfo.id = deterministicRandom()->randomUniqueID();
|
||||||
clientInfo.commitProxies = req.commitProxies;
|
clientInfo.commitProxies = req.commitProxies;
|
||||||
|
@ -3874,7 +3898,7 @@ ACTOR Future<Void> monitorGlobalConfig(ClusterControllerData::DBInfo* db) {
|
||||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||||
state Optional<Value> globalConfigVersion = wait(tr.get(globalConfigVersionKey));
|
state Optional<Value> globalConfigVersion = wait(tr.get(globalConfigVersionKey));
|
||||||
state ClientDBInfo clientInfo = db->clientInfo->get();
|
state ClientDBInfo clientInfo = db->serverInfo->get().client;
|
||||||
|
|
||||||
if (globalConfigVersion.present()) {
|
if (globalConfigVersion.present()) {
|
||||||
// Since the history keys end with versionstamps, they
|
// Since the history keys end with versionstamps, they
|
||||||
|
@ -3932,6 +3956,14 @@ ACTOR Future<Void> monitorGlobalConfig(ClusterControllerData::DBInfo* db) {
|
||||||
}
|
}
|
||||||
|
|
||||||
clientInfo.id = deterministicRandom()->randomUniqueID();
|
clientInfo.id = deterministicRandom()->randomUniqueID();
|
||||||
|
// Update ServerDBInfo so fdbserver processes receive updated history.
|
||||||
|
ServerDBInfo serverInfo = db->serverInfo->get();
|
||||||
|
serverInfo.id = deterministicRandom()->randomUniqueID();
|
||||||
|
serverInfo.infoGeneration = ++db->dbInfoCount;
|
||||||
|
serverInfo.client = clientInfo;
|
||||||
|
db->serverInfo->set(serverInfo);
|
||||||
|
|
||||||
|
// Update ClientDBInfo so client processes receive updated history.
|
||||||
db->clientInfo->set(clientInfo);
|
db->clientInfo->set(clientInfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4411,6 +4443,7 @@ ACTOR Future<Void> clusterControllerCore(ClusterControllerFullInterface interf,
|
||||||
self.addActor.send(handleForcedRecoveries(&self, interf));
|
self.addActor.send(handleForcedRecoveries(&self, interf));
|
||||||
self.addActor.send(monitorDataDistributor(&self));
|
self.addActor.send(monitorDataDistributor(&self));
|
||||||
self.addActor.send(monitorRatekeeper(&self));
|
self.addActor.send(monitorRatekeeper(&self));
|
||||||
|
// self.addActor.send(monitorTSSMapping(&self));
|
||||||
self.addActor.send(dbInfoUpdater(&self));
|
self.addActor.send(dbInfoUpdater(&self));
|
||||||
self.addActor.send(traceCounters("ClusterControllerMetrics",
|
self.addActor.send(traceCounters("ClusterControllerMetrics",
|
||||||
self.id,
|
self.id,
|
||||||
|
|
|
@ -42,6 +42,7 @@
|
||||||
#include "fdbserver/ProxyCommitData.actor.h"
|
#include "fdbserver/ProxyCommitData.actor.h"
|
||||||
#include "fdbserver/RatekeeperInterface.h"
|
#include "fdbserver/RatekeeperInterface.h"
|
||||||
#include "fdbserver/RecoveryState.h"
|
#include "fdbserver/RecoveryState.h"
|
||||||
|
#include "fdbserver/RestoreUtil.h"
|
||||||
#include "fdbserver/WaitFailure.h"
|
#include "fdbserver/WaitFailure.h"
|
||||||
#include "fdbserver/WorkerInterface.actor.h"
|
#include "fdbserver/WorkerInterface.actor.h"
|
||||||
#include "flow/ActorCollection.h"
|
#include "flow/ActorCollection.h"
|
||||||
|
@ -1431,11 +1432,26 @@ ACTOR Future<Void> commitBatch(ProxyCommitData* self,
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add tss mapping data to the reply, if any of the included storage servers have a TSS pair
|
||||||
|
void maybeAddTssMapping(GetKeyServerLocationsReply& reply,
|
||||||
|
ProxyCommitData* commitData,
|
||||||
|
std::unordered_set<UID>& included,
|
||||||
|
UID ssId) {
|
||||||
|
if (!included.count(ssId)) {
|
||||||
|
auto mappingItr = commitData->tssMapping.find(ssId);
|
||||||
|
if (mappingItr != commitData->tssMapping.end()) {
|
||||||
|
included.insert(ssId);
|
||||||
|
reply.resultsTssMapping.push_back(*mappingItr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ACTOR static Future<Void> doKeyServerLocationRequest(GetKeyServerLocationsRequest req, ProxyCommitData* commitData) {
|
ACTOR static Future<Void> doKeyServerLocationRequest(GetKeyServerLocationsRequest req, ProxyCommitData* commitData) {
|
||||||
// We can't respond to these requests until we have valid txnStateStore
|
// We can't respond to these requests until we have valid txnStateStore
|
||||||
wait(commitData->validState.getFuture());
|
wait(commitData->validState.getFuture());
|
||||||
wait(delay(0, TaskPriority::DefaultEndpoint));
|
wait(delay(0, TaskPriority::DefaultEndpoint));
|
||||||
|
|
||||||
|
std::unordered_set<UID> tssMappingsIncluded;
|
||||||
GetKeyServerLocationsReply rep;
|
GetKeyServerLocationsReply rep;
|
||||||
if (!req.end.present()) {
|
if (!req.end.present()) {
|
||||||
auto r = req.reverse ? commitData->keyInfo.rangeContainingKeyBefore(req.begin)
|
auto r = req.reverse ? commitData->keyInfo.rangeContainingKeyBefore(req.begin)
|
||||||
|
@ -1444,8 +1460,9 @@ ACTOR static Future<Void> doKeyServerLocationRequest(GetKeyServerLocationsReques
|
||||||
ssis.reserve(r.value().src_info.size());
|
ssis.reserve(r.value().src_info.size());
|
||||||
for (auto& it : r.value().src_info) {
|
for (auto& it : r.value().src_info) {
|
||||||
ssis.push_back(it->interf);
|
ssis.push_back(it->interf);
|
||||||
|
maybeAddTssMapping(rep, commitData, tssMappingsIncluded, it->interf.id());
|
||||||
}
|
}
|
||||||
rep.results.push_back(std::make_pair(r.range(), ssis));
|
rep.results.emplace_back(r.range(), ssis);
|
||||||
} else if (!req.reverse) {
|
} else if (!req.reverse) {
|
||||||
int count = 0;
|
int count = 0;
|
||||||
for (auto r = commitData->keyInfo.rangeContaining(req.begin);
|
for (auto r = commitData->keyInfo.rangeContaining(req.begin);
|
||||||
|
@ -1455,8 +1472,9 @@ ACTOR static Future<Void> doKeyServerLocationRequest(GetKeyServerLocationsReques
|
||||||
ssis.reserve(r.value().src_info.size());
|
ssis.reserve(r.value().src_info.size());
|
||||||
for (auto& it : r.value().src_info) {
|
for (auto& it : r.value().src_info) {
|
||||||
ssis.push_back(it->interf);
|
ssis.push_back(it->interf);
|
||||||
|
maybeAddTssMapping(rep, commitData, tssMappingsIncluded, it->interf.id());
|
||||||
}
|
}
|
||||||
rep.results.push_back(std::make_pair(r.range(), ssis));
|
rep.results.emplace_back(r.range(), ssis);
|
||||||
count++;
|
count++;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -1467,8 +1485,9 @@ ACTOR static Future<Void> doKeyServerLocationRequest(GetKeyServerLocationsReques
|
||||||
ssis.reserve(r.value().src_info.size());
|
ssis.reserve(r.value().src_info.size());
|
||||||
for (auto& it : r.value().src_info) {
|
for (auto& it : r.value().src_info) {
|
||||||
ssis.push_back(it->interf);
|
ssis.push_back(it->interf);
|
||||||
|
maybeAddTssMapping(rep, commitData, tssMappingsIncluded, it->interf.id());
|
||||||
}
|
}
|
||||||
rep.results.push_back(std::make_pair(r.range(), ssis));
|
rep.results.emplace_back(r.range(), ssis);
|
||||||
if (r == commitData->keyInfo.ranges().begin()) {
|
if (r == commitData->keyInfo.ranges().begin()) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -406,8 +406,8 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
|
||||||
|
|
||||||
// If the current leader's priority became worse, we still need to notified all clients because now one
|
// If the current leader's priority became worse, we still need to notified all clients because now one
|
||||||
// of them might be better than the leader. In addition, even though FitnessRemote is better than
|
// of them might be better than the leader. In addition, even though FitnessRemote is better than
|
||||||
// FitnessUnknown, we still need to notified clients so that monitorLeaderRemotely has a chance to switch
|
// FitnessUnknown, we still need to notified clients so that monitorLeaderRemotely has a chance to
|
||||||
// from passively monitoring the leader to actively attempting to become the leader.
|
// switch from passively monitoring the leader to actively attempting to become the leader.
|
||||||
if (!currentNominee.present() || !nextNominee.present() ||
|
if (!currentNominee.present() || !nextNominee.present() ||
|
||||||
!currentNominee.get().equalInternalId(nextNominee.get()) ||
|
!currentNominee.get().equalInternalId(nextNominee.get()) ||
|
||||||
nextNominee.get() > currentNominee.get() ||
|
nextNominee.get() > currentNominee.get() ||
|
||||||
|
@ -545,15 +545,30 @@ struct LeaderRegisterCollection {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// extract the prefix descriptor from cluster id
|
||||||
|
StringRef getClusterDescriptor(Key key) {
|
||||||
|
StringRef str = key.contents();
|
||||||
|
return str.eat(":");
|
||||||
|
}
|
||||||
|
|
||||||
// leaderServer multiplexes multiple leaderRegisters onto a single LeaderElectionRegInterface,
|
// leaderServer multiplexes multiple leaderRegisters onto a single LeaderElectionRegInterface,
|
||||||
// creating and destroying them on demand.
|
// creating and destroying them on demand.
|
||||||
ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf, OnDemandStore* pStore, UID id) {
|
ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf,
|
||||||
|
OnDemandStore* pStore,
|
||||||
|
UID id,
|
||||||
|
Reference<ClusterConnectionFile> ccf) {
|
||||||
state LeaderRegisterCollection regs(pStore);
|
state LeaderRegisterCollection regs(pStore);
|
||||||
state ActorCollection forwarders(false);
|
state ActorCollection forwarders(false);
|
||||||
|
|
||||||
wait(LeaderRegisterCollection::init(®s));
|
wait(LeaderRegisterCollection::init(®s));
|
||||||
|
|
||||||
loop choose {
|
loop choose {
|
||||||
|
when(CheckDescriptorMutableRequest req = waitNext(interf.checkDescriptorMutable.getFuture())) {
|
||||||
|
// Note the response returns the value of a knob enforced by checking only one coordinator. It is not
|
||||||
|
// quorum based.
|
||||||
|
CheckDescriptorMutableReply rep(SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT);
|
||||||
|
req.reply.send(rep);
|
||||||
|
}
|
||||||
when(OpenDatabaseCoordRequest req = waitNext(interf.openDatabase.getFuture())) {
|
when(OpenDatabaseCoordRequest req = waitNext(interf.openDatabase.getFuture())) {
|
||||||
Optional<LeaderInfo> forward = regs.getForward(req.clusterKey);
|
Optional<LeaderInfo> forward = regs.getForward(req.clusterKey);
|
||||||
if (forward.present()) {
|
if (forward.present()) {
|
||||||
|
@ -562,7 +577,18 @@ ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf, OnDemandStore
|
||||||
info.forward = forward.get().serializedInfo;
|
info.forward = forward.get().serializedInfo;
|
||||||
req.reply.send(CachedSerialization<ClientDBInfo>(info));
|
req.reply.send(CachedSerialization<ClientDBInfo>(info));
|
||||||
} else {
|
} else {
|
||||||
regs.getInterface(req.clusterKey, id).openDatabase.send(req);
|
StringRef clusterName = ccf->getConnectionString().clusterKeyName();
|
||||||
|
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT &&
|
||||||
|
getClusterDescriptor(req.clusterKey).compare(clusterName)) {
|
||||||
|
TraceEvent(SevWarn, "CCFMismatch")
|
||||||
|
.detail("RequestType", "OpenDatabaseCoordRequest")
|
||||||
|
.detail("LocalCS", ccf->getConnectionString().toString())
|
||||||
|
.detail("IncomingClusterKey", req.clusterKey)
|
||||||
|
.detail("IncomingCoordinators", describeList(req.coordinators, req.coordinators.size()));
|
||||||
|
req.reply.sendError(wrong_connection_file());
|
||||||
|
} else {
|
||||||
|
regs.getInterface(req.clusterKey, id).openDatabase.send(req);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
when(ElectionResultRequest req = waitNext(interf.electionResult.getFuture())) {
|
when(ElectionResultRequest req = waitNext(interf.electionResult.getFuture())) {
|
||||||
|
@ -570,38 +596,89 @@ ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf, OnDemandStore
|
||||||
if (forward.present()) {
|
if (forward.present()) {
|
||||||
req.reply.send(forward.get());
|
req.reply.send(forward.get());
|
||||||
} else {
|
} else {
|
||||||
regs.getInterface(req.key, id).electionResult.send(req);
|
StringRef clusterName = ccf->getConnectionString().clusterKeyName();
|
||||||
|
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT && getClusterDescriptor(req.key).compare(clusterName)) {
|
||||||
|
TraceEvent(SevWarn, "CCFMismatch")
|
||||||
|
.detail("RequestType", "ElectionResultRequest")
|
||||||
|
.detail("LocalCS", ccf->getConnectionString().toString())
|
||||||
|
.detail("IncomingClusterKey", req.key)
|
||||||
|
.detail("ClusterKey", ccf->getConnectionString().clusterKey())
|
||||||
|
.detail("IncomingCoordinators", describeList(req.coordinators, req.coordinators.size()));
|
||||||
|
req.reply.sendError(wrong_connection_file());
|
||||||
|
} else {
|
||||||
|
regs.getInterface(req.key, id).electionResult.send(req);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
when(GetLeaderRequest req = waitNext(interf.getLeader.getFuture())) {
|
when(GetLeaderRequest req = waitNext(interf.getLeader.getFuture())) {
|
||||||
Optional<LeaderInfo> forward = regs.getForward(req.key);
|
Optional<LeaderInfo> forward = regs.getForward(req.key);
|
||||||
if (forward.present())
|
if (forward.present())
|
||||||
req.reply.send(forward.get());
|
req.reply.send(forward.get());
|
||||||
else
|
else {
|
||||||
regs.getInterface(req.key, id).getLeader.send(req);
|
StringRef clusterName = ccf->getConnectionString().clusterKeyName();
|
||||||
|
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT && getClusterDescriptor(req.key).compare(clusterName)) {
|
||||||
|
TraceEvent(SevWarn, "CCFMismatch")
|
||||||
|
.detail("RequestType", "GetLeaderRequest")
|
||||||
|
.detail("LocalCS", ccf->getConnectionString().toString())
|
||||||
|
.detail("IncomingClusterKey", req.key)
|
||||||
|
.detail("ClusterKey", ccf->getConnectionString().clusterKey());
|
||||||
|
req.reply.sendError(wrong_connection_file());
|
||||||
|
} else {
|
||||||
|
regs.getInterface(req.key, id).getLeader.send(req);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
when(CandidacyRequest req = waitNext(interf.candidacy.getFuture())) {
|
when(CandidacyRequest req = waitNext(interf.candidacy.getFuture())) {
|
||||||
Optional<LeaderInfo> forward = regs.getForward(req.key);
|
Optional<LeaderInfo> forward = regs.getForward(req.key);
|
||||||
if (forward.present())
|
if (forward.present())
|
||||||
req.reply.send(forward.get());
|
req.reply.send(forward.get());
|
||||||
else
|
else {
|
||||||
regs.getInterface(req.key, id).candidacy.send(req);
|
StringRef clusterName = ccf->getConnectionString().clusterKeyName();
|
||||||
|
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT && getClusterDescriptor(req.key).compare(clusterName)) {
|
||||||
|
TraceEvent(SevWarn, "CCFMismatch")
|
||||||
|
.detail("RequestType", "CandidacyRequest")
|
||||||
|
.detail("LocalCS", ccf->getConnectionString().toString())
|
||||||
|
.detail("IncomingClusterKey", req.key);
|
||||||
|
req.reply.sendError(wrong_connection_file());
|
||||||
|
} else {
|
||||||
|
regs.getInterface(req.key, id).candidacy.send(req);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
when(LeaderHeartbeatRequest req = waitNext(interf.leaderHeartbeat.getFuture())) {
|
when(LeaderHeartbeatRequest req = waitNext(interf.leaderHeartbeat.getFuture())) {
|
||||||
Optional<LeaderInfo> forward = regs.getForward(req.key);
|
Optional<LeaderInfo> forward = regs.getForward(req.key);
|
||||||
if (forward.present())
|
if (forward.present())
|
||||||
req.reply.send(LeaderHeartbeatReply{ false });
|
req.reply.send(LeaderHeartbeatReply{ false });
|
||||||
else
|
else {
|
||||||
regs.getInterface(req.key, id).leaderHeartbeat.send(req);
|
StringRef clusterName = ccf->getConnectionString().clusterKeyName();
|
||||||
|
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT && getClusterDescriptor(req.key).compare(clusterName)) {
|
||||||
|
TraceEvent(SevWarn, "CCFMismatch")
|
||||||
|
.detail("RequestType", "LeaderHeartbeatRequest")
|
||||||
|
.detail("LocalCS", ccf->getConnectionString().toString())
|
||||||
|
.detail("IncomingClusterKey", req.key);
|
||||||
|
req.reply.sendError(wrong_connection_file());
|
||||||
|
} else {
|
||||||
|
regs.getInterface(req.key, id).leaderHeartbeat.send(req);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
when(ForwardRequest req = waitNext(interf.forward.getFuture())) {
|
when(ForwardRequest req = waitNext(interf.forward.getFuture())) {
|
||||||
Optional<LeaderInfo> forward = regs.getForward(req.key);
|
Optional<LeaderInfo> forward = regs.getForward(req.key);
|
||||||
if (forward.present())
|
if (forward.present())
|
||||||
req.reply.send(Void());
|
req.reply.send(Void());
|
||||||
else {
|
else {
|
||||||
forwarders.add(
|
StringRef clusterName = ccf->getConnectionString().clusterKeyName();
|
||||||
LeaderRegisterCollection::setForward(®s, req.key, ClusterConnectionString(req.conn.toString())));
|
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT && getClusterDescriptor(req.key).compare(clusterName)) {
|
||||||
regs.getInterface(req.key, id).forward.send(req);
|
TraceEvent(SevWarn, "CCFMismatch")
|
||||||
|
.detail("RequestType", "ForwardRequest")
|
||||||
|
.detail("LocalCS", ccf->getConnectionString().toString())
|
||||||
|
.detail("IncomingClusterKey", req.key);
|
||||||
|
req.reply.sendError(wrong_connection_file());
|
||||||
|
} else {
|
||||||
|
forwarders.add(LeaderRegisterCollection::setForward(
|
||||||
|
®s, req.key, ClusterConnectionString(req.conn.toString())));
|
||||||
|
regs.getInterface(req.key, id).forward.send(req);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
when(wait(forwarders.getResult())) {
|
when(wait(forwarders.getResult())) {
|
||||||
|
@ -611,7 +688,7 @@ ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf, OnDemandStore
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> coordinationServer(std::string dataFolder) {
|
ACTOR Future<Void> coordinationServer(std::string dataFolder, Reference<ClusterConnectionFile> ccf) {
|
||||||
state UID myID = deterministicRandom()->randomUniqueID();
|
state UID myID = deterministicRandom()->randomUniqueID();
|
||||||
state LeaderElectionRegInterface myLeaderInterface(g_network);
|
state LeaderElectionRegInterface myLeaderInterface(g_network);
|
||||||
state GenerationRegInterface myInterface(g_network);
|
state GenerationRegInterface myInterface(g_network);
|
||||||
|
@ -622,7 +699,7 @@ ACTOR Future<Void> coordinationServer(std::string dataFolder) {
|
||||||
.detail("Folder", dataFolder);
|
.detail("Folder", dataFolder);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
wait(localGenerationReg(myInterface, &store) || leaderServer(myLeaderInterface, &store, myID) ||
|
wait(localGenerationReg(myInterface, &store) || leaderServer(myLeaderInterface, &store, myID, ccf) ||
|
||||||
store.getError());
|
store.getError());
|
||||||
throw internal_error();
|
throw internal_error();
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
|
|
|
@ -225,6 +225,6 @@ public:
|
||||||
vector<GenerationRegInterface> stateServers;
|
vector<GenerationRegInterface> stateServers;
|
||||||
};
|
};
|
||||||
|
|
||||||
Future<Void> coordinationServer(std::string const& dataFolder);
|
Future<Void> coordinationServer(std::string const& dataFolder, Reference<ClusterConnectionFile> const& ccf);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -263,6 +263,7 @@ ACTOR Future<Void> dataDistributionQueue(Database cx,
|
||||||
Reference<ShardsAffectedByTeamFailure> shardsAffectedByTeamFailure,
|
Reference<ShardsAffectedByTeamFailure> shardsAffectedByTeamFailure,
|
||||||
MoveKeysLock lock,
|
MoveKeysLock lock,
|
||||||
PromiseStream<Promise<int64_t>> getAverageShardBytes,
|
PromiseStream<Promise<int64_t>> getAverageShardBytes,
|
||||||
|
PromiseStream<Promise<int>> getUnhealthyRelocationCount,
|
||||||
UID distributorId,
|
UID distributorId,
|
||||||
int teamSize,
|
int teamSize,
|
||||||
int singleRegionTeamSize,
|
int singleRegionTeamSize,
|
||||||
|
|
|
@ -1032,7 +1032,7 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueueData* self, RelocateData rd,
|
||||||
anyWithSource = true;
|
anyWithSource = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bestTeams.push_back(std::make_pair(bestTeam.first.get(), bestTeam.second));
|
bestTeams.emplace_back(bestTeam.first.get(), bestTeam.second);
|
||||||
tciIndex++;
|
tciIndex++;
|
||||||
}
|
}
|
||||||
if (foundTeams && anyHealthy) {
|
if (foundTeams && anyHealthy) {
|
||||||
|
@ -1550,6 +1550,7 @@ ACTOR Future<Void> dataDistributionQueue(Database cx,
|
||||||
Reference<ShardsAffectedByTeamFailure> shardsAffectedByTeamFailure,
|
Reference<ShardsAffectedByTeamFailure> shardsAffectedByTeamFailure,
|
||||||
MoveKeysLock lock,
|
MoveKeysLock lock,
|
||||||
PromiseStream<Promise<int64_t>> getAverageShardBytes,
|
PromiseStream<Promise<int64_t>> getAverageShardBytes,
|
||||||
|
PromiseStream<Promise<int>> getUnhealthyRelocationCount,
|
||||||
UID distributorId,
|
UID distributorId,
|
||||||
int teamSize,
|
int teamSize,
|
||||||
int singleRegionTeamSize,
|
int singleRegionTeamSize,
|
||||||
|
@ -1679,6 +1680,9 @@ ACTOR Future<Void> dataDistributionQueue(Database cx,
|
||||||
}
|
}
|
||||||
when(wait(self.error.getFuture())) {} // Propagate errors from dataDistributionRelocator
|
when(wait(self.error.getFuture())) {} // Propagate errors from dataDistributionRelocator
|
||||||
when(wait(waitForAll(balancingFutures))) {}
|
when(wait(waitForAll(balancingFutures))) {}
|
||||||
|
when(Promise<int> r = waitNext(getUnhealthyRelocationCount.getFuture())) {
|
||||||
|
r.send(self.unhealthyRelocations);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
|
|
|
@ -176,8 +176,8 @@ ShardSizeBounds getShardSizeBounds(KeyRangeRef shard, int64_t maxShardSize) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t getMaxShardSize(double dbSizeEstimate) {
|
int64_t getMaxShardSize(double dbSizeEstimate) {
|
||||||
return std::min((SERVER_KNOBS->MIN_SHARD_BYTES +
|
return std::min((SERVER_KNOBS->MIN_SHARD_BYTES + (int64_t)std::sqrt(std::max<double>(dbSizeEstimate, 0)) *
|
||||||
(int64_t)std::sqrt(dbSizeEstimate) * SERVER_KNOBS->SHARD_BYTES_PER_SQRT_BYTES) *
|
SERVER_KNOBS->SHARD_BYTES_PER_SQRT_BYTES) *
|
||||||
SERVER_KNOBS->SHARD_BYTES_RATIO,
|
SERVER_KNOBS->SHARD_BYTES_RATIO,
|
||||||
(int64_t)SERVER_KNOBS->MAX_SHARD_BYTES);
|
(int64_t)SERVER_KNOBS->MAX_SHARD_BYTES);
|
||||||
}
|
}
|
||||||
|
|
|
@ -832,7 +832,7 @@ public:
|
||||||
int count = end - begin;
|
int count = end - begin;
|
||||||
numItems = count;
|
numItems = count;
|
||||||
nodeBytesDeleted = 0;
|
nodeBytesDeleted = 0;
|
||||||
initialHeight = (uint8_t)log2(count) + 1;
|
initialHeight = count ? (uint8_t)log2(count) + 1 : 0;
|
||||||
maxHeight = 0;
|
maxHeight = 0;
|
||||||
|
|
||||||
// The boundary leading to the new page acts as the last time we branched right
|
// The boundary leading to the new page acts as the last time we branched right
|
||||||
|
|
|
@ -148,7 +148,10 @@ ACTOR Future<int> spawnProcess(std::string path,
|
||||||
state pid_t pid = pidAndReadFD.first;
|
state pid_t pid = pidAndReadFD.first;
|
||||||
state Optional<int> readFD = pidAndReadFD.second;
|
state Optional<int> readFD = pidAndReadFD.second;
|
||||||
if (pid == -1) {
|
if (pid == -1) {
|
||||||
TraceEvent(SevWarnAlways, "SpawnProcess: Command failed to spawn").detail("Cmd", path).detail("Args", allArgs);
|
TraceEvent(SevWarnAlways, "SpawnProcessFailure")
|
||||||
|
.detail("Reason", "Command failed to spawn")
|
||||||
|
.detail("Cmd", path)
|
||||||
|
.detail("Args", allArgs);
|
||||||
return -1;
|
return -1;
|
||||||
} else if (pid > 0) {
|
} else if (pid > 0) {
|
||||||
state int status = -1;
|
state int status = -1;
|
||||||
|
@ -160,7 +163,8 @@ ACTOR Future<int> spawnProcess(std::string path,
|
||||||
if (runTime > maxWaitTime) {
|
if (runTime > maxWaitTime) {
|
||||||
// timing out
|
// timing out
|
||||||
|
|
||||||
TraceEvent(SevWarnAlways, "SpawnProcess : Command failed, timeout")
|
TraceEvent(SevWarnAlways, "SpawnProcessFailure")
|
||||||
|
.detail("Reason", "Command failed, timeout")
|
||||||
.detail("Cmd", path)
|
.detail("Cmd", path)
|
||||||
.detail("Args", allArgs);
|
.detail("Args", allArgs);
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -175,9 +179,10 @@ ACTOR Future<int> spawnProcess(std::string path,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (err < 0) {
|
if (err < 0) {
|
||||||
TraceEvent event(SevWarnAlways, "SpawnProcess : Command failed");
|
TraceEvent event(SevWarnAlways, "SpawnProcessFailure");
|
||||||
setupTraceWithOutput(event, bytesRead, outputBuffer);
|
setupTraceWithOutput(event, bytesRead, outputBuffer);
|
||||||
event.detail("Cmd", path)
|
event.detail("Reason", "Command failed")
|
||||||
|
.detail("Cmd", path)
|
||||||
.detail("Args", allArgs)
|
.detail("Args", allArgs)
|
||||||
.detail("Errno", WIFEXITED(status) ? WEXITSTATUS(status) : -1);
|
.detail("Errno", WIFEXITED(status) ? WEXITSTATUS(status) : -1);
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -194,14 +199,15 @@ ACTOR Future<int> spawnProcess(std::string path,
|
||||||
} else {
|
} else {
|
||||||
// child process completed
|
// child process completed
|
||||||
if (!(WIFEXITED(status) && WEXITSTATUS(status) == 0)) {
|
if (!(WIFEXITED(status) && WEXITSTATUS(status) == 0)) {
|
||||||
TraceEvent event(SevWarnAlways, "SpawnProcess : Command failed");
|
TraceEvent event(SevWarnAlways, "SpawnProcessFailure");
|
||||||
setupTraceWithOutput(event, bytesRead, outputBuffer);
|
setupTraceWithOutput(event, bytesRead, outputBuffer);
|
||||||
event.detail("Cmd", path)
|
event.detail("Reason", "Command failed")
|
||||||
|
.detail("Cmd", path)
|
||||||
.detail("Args", allArgs)
|
.detail("Args", allArgs)
|
||||||
.detail("Errno", WIFEXITED(status) ? WEXITSTATUS(status) : -1);
|
.detail("Errno", WIFEXITED(status) ? WEXITSTATUS(status) : -1);
|
||||||
return WIFEXITED(status) ? WEXITSTATUS(status) : -1;
|
return WIFEXITED(status) ? WEXITSTATUS(status) : -1;
|
||||||
}
|
}
|
||||||
TraceEvent event("SpawnProcess : Command status");
|
TraceEvent event("SpawnProcessCommandStatus");
|
||||||
setupTraceWithOutput(event, bytesRead, outputBuffer);
|
setupTraceWithOutput(event, bytesRead, outputBuffer);
|
||||||
event.detail("Cmd", path)
|
event.detail("Cmd", path)
|
||||||
.detail("Args", allArgs)
|
.detail("Args", allArgs)
|
||||||
|
|
|
@ -109,15 +109,18 @@ struct GrvProxyStats {
|
||||||
SERVER_KNOBS->LATENCY_SAMPLE_SIZE),
|
SERVER_KNOBS->LATENCY_SAMPLE_SIZE),
|
||||||
grvLatencyBands("GRVLatencyMetrics", id, SERVER_KNOBS->STORAGE_LOGGING_DELAY) {
|
grvLatencyBands("GRVLatencyMetrics", id, SERVER_KNOBS->STORAGE_LOGGING_DELAY) {
|
||||||
// The rate at which the limit(budget) is allowed to grow.
|
// The rate at which the limit(budget) is allowed to grow.
|
||||||
specialCounter(cc, "SystemAndDefaultTxnRateAllowed", [this]() { return this->transactionRateAllowed; });
|
|
||||||
specialCounter(cc, "BatchTransactionRateAllowed", [this]() { return this->batchTransactionRateAllowed; });
|
|
||||||
specialCounter(cc, "SystemAndDefaultTxnLimit", [this]() { return this->transactionLimit; });
|
|
||||||
specialCounter(cc, "BatchTransactionLimit", [this]() { return this->batchTransactionLimit; });
|
|
||||||
specialCounter(cc, "PercentageOfDefaultGRVQueueProcessed", [this]() {
|
|
||||||
return this->percentageOfDefaultGRVQueueProcessed;
|
|
||||||
});
|
|
||||||
specialCounter(
|
specialCounter(
|
||||||
cc, "PercentageOfBatchGRVQueueProcessed", [this]() { return this->percentageOfBatchGRVQueueProcessed; });
|
cc, "SystemAndDefaultTxnRateAllowed", [this]() { return int64_t(this->transactionRateAllowed); });
|
||||||
|
specialCounter(
|
||||||
|
cc, "BatchTransactionRateAllowed", [this]() { return int64_t(this->batchTransactionRateAllowed); });
|
||||||
|
specialCounter(cc, "SystemAndDefaultTxnLimit", [this]() { return int64_t(this->transactionLimit); });
|
||||||
|
specialCounter(cc, "BatchTransactionLimit", [this]() { return int64_t(this->batchTransactionLimit); });
|
||||||
|
specialCounter(cc, "PercentageOfDefaultGRVQueueProcessed", [this]() {
|
||||||
|
return int64_t(100 * this->percentageOfDefaultGRVQueueProcessed);
|
||||||
|
});
|
||||||
|
specialCounter(cc, "PercentageOfBatchGRVQueueProcessed", [this]() {
|
||||||
|
return int64_t(100 * this->percentageOfBatchGRVQueueProcessed);
|
||||||
|
});
|
||||||
|
|
||||||
logger = traceCounters("GrvProxyMetrics", id, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, &cc, "GrvProxyMetrics");
|
logger = traceCounters("GrvProxyMetrics", id, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, &cc, "GrvProxyMetrics");
|
||||||
for (int i = 0; i < FLOW_KNOBS->BASIC_LOAD_BALANCE_BUCKETS; i++) {
|
for (int i = 0; i < FLOW_KNOBS->BASIC_LOAD_BALANCE_BUCKETS; i++) {
|
||||||
|
@ -831,8 +834,10 @@ ACTOR static Future<Void> transactionStarter(GrvProxyInterface proxy,
|
||||||
}
|
}
|
||||||
span = Span(span.location);
|
span = Span(span.location);
|
||||||
|
|
||||||
grvProxyData->stats.percentageOfDefaultGRVQueueProcessed = (double)defaultGRVProcessed / defaultQueueSize;
|
grvProxyData->stats.percentageOfDefaultGRVQueueProcessed =
|
||||||
grvProxyData->stats.percentageOfBatchGRVQueueProcessed = (double)batchGRVProcessed / batchQueueSize;
|
defaultQueueSize ? (double)defaultGRVProcessed / defaultQueueSize : 1;
|
||||||
|
grvProxyData->stats.percentageOfBatchGRVQueueProcessed =
|
||||||
|
batchQueueSize ? (double)batchGRVProcessed / batchQueueSize : 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -56,9 +56,6 @@ public:
|
||||||
if (userData != nullptr && userDataDestructor != nullptr) {
|
if (userData != nullptr && userDataDestructor != nullptr) {
|
||||||
userDataDestructor(userData);
|
userDataDestructor(userData);
|
||||||
}
|
}
|
||||||
if (buffer != nullptr) {
|
|
||||||
VALGRIND_MAKE_MEM_UNDEFINED(buffer, bufferSize);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t const* begin() const { return (uint8_t*)buffer; }
|
uint8_t const* begin() const { return (uint8_t*)buffer; }
|
||||||
|
|
|
@ -401,7 +401,7 @@ private:
|
||||||
if (o->op == OpSet) {
|
if (o->op == OpSet) {
|
||||||
if (sequential) {
|
if (sequential) {
|
||||||
KeyValueMapPair pair(o->p1, o->p2);
|
KeyValueMapPair pair(o->p1, o->p2);
|
||||||
dataSets.push_back(std::make_pair(pair, pair.arena.getSize() + data.getElementBytes()));
|
dataSets.emplace_back(pair, pair.arena.getSize() + data.getElementBytes());
|
||||||
} else {
|
} else {
|
||||||
data.insert(o->p1, o->p2);
|
data.insert(o->p1, o->p2);
|
||||||
}
|
}
|
||||||
|
|
|
@ -131,6 +131,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
|
||||||
init( PRIORITY_RECOVER_MOVE, 110 );
|
init( PRIORITY_RECOVER_MOVE, 110 );
|
||||||
init( PRIORITY_REBALANCE_UNDERUTILIZED_TEAM, 120 );
|
init( PRIORITY_REBALANCE_UNDERUTILIZED_TEAM, 120 );
|
||||||
init( PRIORITY_REBALANCE_OVERUTILIZED_TEAM, 121 );
|
init( PRIORITY_REBALANCE_OVERUTILIZED_TEAM, 121 );
|
||||||
|
init( PRIORITY_PERPETUAL_STORAGE_WIGGLE, 140 );
|
||||||
init( PRIORITY_TEAM_HEALTHY, 140 );
|
init( PRIORITY_TEAM_HEALTHY, 140 );
|
||||||
init( PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER, 150 );
|
init( PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER, 150 );
|
||||||
init( PRIORITY_TEAM_REDUNDANT, 200 );
|
init( PRIORITY_TEAM_REDUNDANT, 200 );
|
||||||
|
@ -217,6 +218,9 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
|
||||||
init( SERVER_LIST_DELAY, 1.0 );
|
init( SERVER_LIST_DELAY, 1.0 );
|
||||||
init( RECRUITMENT_IDLE_DELAY, 1.0 );
|
init( RECRUITMENT_IDLE_DELAY, 1.0 );
|
||||||
init( STORAGE_RECRUITMENT_DELAY, 10.0 );
|
init( STORAGE_RECRUITMENT_DELAY, 10.0 );
|
||||||
|
init( TSS_HACK_IDENTITY_MAPPING, false ); // THIS SHOULD NEVER BE SET IN PROD. Only for performance testing
|
||||||
|
init( TSS_RECRUITMENT_TIMEOUT, 3*STORAGE_RECRUITMENT_DELAY ); if (randomize && BUGGIFY ) TSS_RECRUITMENT_TIMEOUT = 1.0; // Super low timeout should cause tss recruitments to fail
|
||||||
|
init( TSS_DD_CHECK_INTERVAL, 60.0 ); if (randomize && BUGGIFY ) TSS_DD_CHECK_INTERVAL = 1.0; // May kill all TSS quickly
|
||||||
init( DATA_DISTRIBUTION_LOGGING_INTERVAL, 5.0 );
|
init( DATA_DISTRIBUTION_LOGGING_INTERVAL, 5.0 );
|
||||||
init( DD_ENABLED_CHECK_DELAY, 1.0 );
|
init( DD_ENABLED_CHECK_DELAY, 1.0 );
|
||||||
init( DD_STALL_CHECK_DELAY, 0.4 ); //Must be larger than 2*MAX_BUGGIFIED_DELAY
|
init( DD_STALL_CHECK_DELAY, 0.4 ); //Must be larger than 2*MAX_BUGGIFIED_DELAY
|
||||||
|
@ -250,6 +254,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
|
||||||
init( DD_TEAMS_INFO_PRINT_INTERVAL, 60 ); if( randomize && BUGGIFY ) DD_TEAMS_INFO_PRINT_INTERVAL = 10;
|
init( DD_TEAMS_INFO_PRINT_INTERVAL, 60 ); if( randomize && BUGGIFY ) DD_TEAMS_INFO_PRINT_INTERVAL = 10;
|
||||||
init( DD_TEAMS_INFO_PRINT_YIELD_COUNT, 100 ); if( randomize && BUGGIFY ) DD_TEAMS_INFO_PRINT_YIELD_COUNT = deterministicRandom()->random01() * 1000 + 1;
|
init( DD_TEAMS_INFO_PRINT_YIELD_COUNT, 100 ); if( randomize && BUGGIFY ) DD_TEAMS_INFO_PRINT_YIELD_COUNT = deterministicRandom()->random01() * 1000 + 1;
|
||||||
init( DD_TEAM_ZERO_SERVER_LEFT_LOG_DELAY, 120 ); if( randomize && BUGGIFY ) DD_TEAM_ZERO_SERVER_LEFT_LOG_DELAY = 5;
|
init( DD_TEAM_ZERO_SERVER_LEFT_LOG_DELAY, 120 ); if( randomize && BUGGIFY ) DD_TEAM_ZERO_SERVER_LEFT_LOG_DELAY = 5;
|
||||||
|
init( DD_STORAGE_WIGGLE_PAUSE_THRESHOLD, 1 ); if( randomize && BUGGIFY ) DD_STORAGE_WIGGLE_PAUSE_THRESHOLD = 10;
|
||||||
|
|
||||||
// TeamRemover
|
// TeamRemover
|
||||||
init( TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER, false ); if( randomize && BUGGIFY ) TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER = deterministicRandom()->random01() < 0.1 ? true : false; // false by default. disable the consistency check when it's true
|
init( TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER, false ); if( randomize && BUGGIFY ) TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER = deterministicRandom()->random01() < 0.1 ? true : false; // false by default. disable the consistency check when it's true
|
||||||
|
@ -631,6 +636,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
|
||||||
|
|
||||||
// Coordination
|
// Coordination
|
||||||
init( COORDINATED_STATE_ONCONFLICT_POLL_INTERVAL, 1.0 ); if( randomize && BUGGIFY ) COORDINATED_STATE_ONCONFLICT_POLL_INTERVAL = 10.0;
|
init( COORDINATED_STATE_ONCONFLICT_POLL_INTERVAL, 1.0 ); if( randomize && BUGGIFY ) COORDINATED_STATE_ONCONFLICT_POLL_INTERVAL = 10.0;
|
||||||
|
init( ENABLE_CROSS_CLUSTER_SUPPORT, true ); if( randomize && BUGGIFY ) ENABLE_CROSS_CLUSTER_SUPPORT = false;
|
||||||
|
|
||||||
// Buggification
|
// Buggification
|
||||||
init( BUGGIFIED_EVENTUAL_CONSISTENCY, 1.0 );
|
init( BUGGIFIED_EVENTUAL_CONSISTENCY, 1.0 );
|
||||||
|
|
|
@ -133,6 +133,7 @@ public:
|
||||||
int PRIORITY_RECOVER_MOVE;
|
int PRIORITY_RECOVER_MOVE;
|
||||||
int PRIORITY_REBALANCE_UNDERUTILIZED_TEAM;
|
int PRIORITY_REBALANCE_UNDERUTILIZED_TEAM;
|
||||||
int PRIORITY_REBALANCE_OVERUTILIZED_TEAM;
|
int PRIORITY_REBALANCE_OVERUTILIZED_TEAM;
|
||||||
|
int PRIORITY_PERPETUAL_STORAGE_WIGGLE;
|
||||||
int PRIORITY_TEAM_HEALTHY;
|
int PRIORITY_TEAM_HEALTHY;
|
||||||
int PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER;
|
int PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER;
|
||||||
int PRIORITY_TEAM_REDUNDANT;
|
int PRIORITY_TEAM_REDUNDANT;
|
||||||
|
@ -167,6 +168,9 @@ public:
|
||||||
double SERVER_LIST_DELAY;
|
double SERVER_LIST_DELAY;
|
||||||
double RECRUITMENT_IDLE_DELAY;
|
double RECRUITMENT_IDLE_DELAY;
|
||||||
double STORAGE_RECRUITMENT_DELAY;
|
double STORAGE_RECRUITMENT_DELAY;
|
||||||
|
bool TSS_HACK_IDENTITY_MAPPING;
|
||||||
|
double TSS_RECRUITMENT_TIMEOUT;
|
||||||
|
double TSS_DD_CHECK_INTERVAL;
|
||||||
double DATA_DISTRIBUTION_LOGGING_INTERVAL;
|
double DATA_DISTRIBUTION_LOGGING_INTERVAL;
|
||||||
double DD_ENABLED_CHECK_DELAY;
|
double DD_ENABLED_CHECK_DELAY;
|
||||||
double DD_STALL_CHECK_DELAY;
|
double DD_STALL_CHECK_DELAY;
|
||||||
|
@ -200,6 +204,7 @@ public:
|
||||||
int DD_TEAMS_INFO_PRINT_INTERVAL;
|
int DD_TEAMS_INFO_PRINT_INTERVAL;
|
||||||
int DD_TEAMS_INFO_PRINT_YIELD_COUNT;
|
int DD_TEAMS_INFO_PRINT_YIELD_COUNT;
|
||||||
int DD_TEAM_ZERO_SERVER_LEFT_LOG_DELAY;
|
int DD_TEAM_ZERO_SERVER_LEFT_LOG_DELAY;
|
||||||
|
int DD_STORAGE_WIGGLE_PAUSE_THRESHOLD; // How many unhealthy relocations are ongoing will pause storage wiggle
|
||||||
|
|
||||||
// TeamRemover to remove redundant teams
|
// TeamRemover to remove redundant teams
|
||||||
bool TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER; // disable the machineTeamRemover actor
|
bool TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER; // disable the machineTeamRemover actor
|
||||||
|
@ -559,6 +564,8 @@ public:
|
||||||
|
|
||||||
// Coordination
|
// Coordination
|
||||||
double COORDINATED_STATE_ONCONFLICT_POLL_INTERVAL;
|
double COORDINATED_STATE_ONCONFLICT_POLL_INTERVAL;
|
||||||
|
bool ENABLE_CROSS_CLUSTER_SUPPORT; // Allow a coordinator to serve requests whose connection string does not match
|
||||||
|
// the local descriptor
|
||||||
|
|
||||||
// Buggification
|
// Buggification
|
||||||
double BUGGIFIED_EVENTUAL_CONSISTENCY;
|
double BUGGIFIED_EVENTUAL_CONSISTENCY;
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#include "fdbrpc/FailureMonitor.h"
|
#include "fdbrpc/FailureMonitor.h"
|
||||||
#include "fdbrpc/Locality.h"
|
#include "fdbrpc/Locality.h"
|
||||||
#include "fdbserver/CoordinationInterface.h"
|
#include "fdbserver/CoordinationInterface.h"
|
||||||
|
#include "fdbserver/Knobs.h"
|
||||||
#include "fdbclient/MonitorLeader.h"
|
#include "fdbclient/MonitorLeader.h"
|
||||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||||
|
|
||||||
|
|
|
@ -175,22 +175,22 @@ struct LogRouterData {
|
||||||
specialCounter(cc, "WaitForVersionMS", [this]() {
|
specialCounter(cc, "WaitForVersionMS", [this]() {
|
||||||
double val = this->waitForVersionTime;
|
double val = this->waitForVersionTime;
|
||||||
this->waitForVersionTime = 0;
|
this->waitForVersionTime = 0;
|
||||||
return 1000 * val;
|
return int64_t(1000 * val);
|
||||||
});
|
});
|
||||||
specialCounter(cc, "WaitForVersionMaxMS", [this]() {
|
specialCounter(cc, "WaitForVersionMaxMS", [this]() {
|
||||||
double val = this->maxWaitForVersionTime;
|
double val = this->maxWaitForVersionTime;
|
||||||
this->maxWaitForVersionTime = 0;
|
this->maxWaitForVersionTime = 0;
|
||||||
return 1000 * val;
|
return int64_t(1000 * val);
|
||||||
});
|
});
|
||||||
specialCounter(cc, "GetMoreMS", [this]() {
|
specialCounter(cc, "GetMoreMS", [this]() {
|
||||||
double val = this->getMoreTime;
|
double val = this->getMoreTime;
|
||||||
this->getMoreTime = 0;
|
this->getMoreTime = 0;
|
||||||
return 1000 * val;
|
return int64_t(1000 * val);
|
||||||
});
|
});
|
||||||
specialCounter(cc, "GetMoreMaxMS", [this]() {
|
specialCounter(cc, "GetMoreMaxMS", [this]() {
|
||||||
double val = this->maxGetMoreTime;
|
double val = this->maxGetMoreTime;
|
||||||
this->maxGetMoreTime = 0;
|
this->maxGetMoreTime = 0;
|
||||||
return 1000 * val;
|
return int64_t(1000 * val);
|
||||||
});
|
});
|
||||||
specialCounter(cc, "Generation", [this]() { return this->generation; });
|
specialCounter(cc, "Generation", [this]() { return this->generation; });
|
||||||
logger = traceCounters("LogRouterMetrics",
|
logger = traceCounters("LogRouterMetrics",
|
||||||
|
|
|
@ -410,6 +410,8 @@ struct ILogSystem {
|
||||||
|
|
||||||
virtual Optional<UID> getPrimaryPeekLocation() const = 0;
|
virtual Optional<UID> getPrimaryPeekLocation() const = 0;
|
||||||
|
|
||||||
|
virtual Optional<UID> getCurrentPeekLocation() const = 0;
|
||||||
|
|
||||||
virtual void addref() = 0;
|
virtual void addref() = 0;
|
||||||
|
|
||||||
virtual void delref() = 0;
|
virtual void delref() = 0;
|
||||||
|
@ -473,6 +475,7 @@ struct ILogSystem {
|
||||||
Version popped() const override;
|
Version popped() const override;
|
||||||
Version getMinKnownCommittedVersion() const override;
|
Version getMinKnownCommittedVersion() const override;
|
||||||
Optional<UID> getPrimaryPeekLocation() const override;
|
Optional<UID> getPrimaryPeekLocation() const override;
|
||||||
|
Optional<UID> getCurrentPeekLocation() const override;
|
||||||
|
|
||||||
void addref() override { ReferenceCounted<ServerPeekCursor>::addref(); }
|
void addref() override { ReferenceCounted<ServerPeekCursor>::addref(); }
|
||||||
|
|
||||||
|
@ -534,6 +537,7 @@ struct ILogSystem {
|
||||||
Version popped() const override;
|
Version popped() const override;
|
||||||
Version getMinKnownCommittedVersion() const override;
|
Version getMinKnownCommittedVersion() const override;
|
||||||
Optional<UID> getPrimaryPeekLocation() const override;
|
Optional<UID> getPrimaryPeekLocation() const override;
|
||||||
|
Optional<UID> getCurrentPeekLocation() const override;
|
||||||
|
|
||||||
void addref() override { ReferenceCounted<MergedPeekCursor>::addref(); }
|
void addref() override { ReferenceCounted<MergedPeekCursor>::addref(); }
|
||||||
|
|
||||||
|
@ -589,6 +593,7 @@ struct ILogSystem {
|
||||||
Version popped() const override;
|
Version popped() const override;
|
||||||
Version getMinKnownCommittedVersion() const override;
|
Version getMinKnownCommittedVersion() const override;
|
||||||
Optional<UID> getPrimaryPeekLocation() const override;
|
Optional<UID> getPrimaryPeekLocation() const override;
|
||||||
|
Optional<UID> getCurrentPeekLocation() const override;
|
||||||
|
|
||||||
void addref() override { ReferenceCounted<SetPeekCursor>::addref(); }
|
void addref() override { ReferenceCounted<SetPeekCursor>::addref(); }
|
||||||
|
|
||||||
|
@ -620,6 +625,7 @@ struct ILogSystem {
|
||||||
Version popped() const override;
|
Version popped() const override;
|
||||||
Version getMinKnownCommittedVersion() const override;
|
Version getMinKnownCommittedVersion() const override;
|
||||||
Optional<UID> getPrimaryPeekLocation() const override;
|
Optional<UID> getPrimaryPeekLocation() const override;
|
||||||
|
Optional<UID> getCurrentPeekLocation() const override;
|
||||||
|
|
||||||
void addref() override { ReferenceCounted<MultiCursor>::addref(); }
|
void addref() override { ReferenceCounted<MultiCursor>::addref(); }
|
||||||
|
|
||||||
|
@ -698,6 +704,7 @@ struct ILogSystem {
|
||||||
Version popped() const override;
|
Version popped() const override;
|
||||||
Version getMinKnownCommittedVersion() const override;
|
Version getMinKnownCommittedVersion() const override;
|
||||||
Optional<UID> getPrimaryPeekLocation() const override;
|
Optional<UID> getPrimaryPeekLocation() const override;
|
||||||
|
Optional<UID> getCurrentPeekLocation() const override;
|
||||||
|
|
||||||
void addref() override { ReferenceCounted<BufferedCursor>::addref(); }
|
void addref() override { ReferenceCounted<BufferedCursor>::addref(); }
|
||||||
|
|
||||||
|
|
|
@ -393,12 +393,16 @@ Version ILogSystem::ServerPeekCursor::getMinKnownCommittedVersion() const {
|
||||||
}
|
}
|
||||||
|
|
||||||
Optional<UID> ILogSystem::ServerPeekCursor::getPrimaryPeekLocation() const {
|
Optional<UID> ILogSystem::ServerPeekCursor::getPrimaryPeekLocation() const {
|
||||||
if (interf) {
|
if (interf && interf->get().present()) {
|
||||||
return interf->get().id();
|
return interf->get().id();
|
||||||
}
|
}
|
||||||
return Optional<UID>();
|
return Optional<UID>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Optional<UID> ILogSystem::ServerPeekCursor::getCurrentPeekLocation() const {
|
||||||
|
return ILogSystem::ServerPeekCursor::getPrimaryPeekLocation();
|
||||||
|
}
|
||||||
|
|
||||||
Version ILogSystem::ServerPeekCursor::popped() const {
|
Version ILogSystem::ServerPeekCursor::popped() const {
|
||||||
return poppedVersion;
|
return poppedVersion;
|
||||||
}
|
}
|
||||||
|
@ -673,6 +677,13 @@ Optional<UID> ILogSystem::MergedPeekCursor::getPrimaryPeekLocation() const {
|
||||||
return Optional<UID>();
|
return Optional<UID>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Optional<UID> ILogSystem::MergedPeekCursor::getCurrentPeekLocation() const {
|
||||||
|
if (currentCursor >= 0) {
|
||||||
|
return serverCursors[currentCursor]->getPrimaryPeekLocation();
|
||||||
|
}
|
||||||
|
return Optional<UID>();
|
||||||
|
}
|
||||||
|
|
||||||
Version ILogSystem::MergedPeekCursor::popped() const {
|
Version ILogSystem::MergedPeekCursor::popped() const {
|
||||||
Version poppedVersion = 0;
|
Version poppedVersion = 0;
|
||||||
for (auto& c : serverCursors)
|
for (auto& c : serverCursors)
|
||||||
|
@ -1023,6 +1034,13 @@ Optional<UID> ILogSystem::SetPeekCursor::getPrimaryPeekLocation() const {
|
||||||
return Optional<UID>();
|
return Optional<UID>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Optional<UID> ILogSystem::SetPeekCursor::getCurrentPeekLocation() const {
|
||||||
|
if (currentCursor >= 0 && currentSet >= 0) {
|
||||||
|
return serverCursors[currentSet][currentCursor]->getPrimaryPeekLocation();
|
||||||
|
}
|
||||||
|
return Optional<UID>();
|
||||||
|
}
|
||||||
|
|
||||||
Version ILogSystem::SetPeekCursor::popped() const {
|
Version ILogSystem::SetPeekCursor::popped() const {
|
||||||
Version poppedVersion = 0;
|
Version poppedVersion = 0;
|
||||||
for (auto& cursors : serverCursors) {
|
for (auto& cursors : serverCursors) {
|
||||||
|
@ -1123,6 +1141,10 @@ Optional<UID> ILogSystem::MultiCursor::getPrimaryPeekLocation() const {
|
||||||
return cursors.back()->getPrimaryPeekLocation();
|
return cursors.back()->getPrimaryPeekLocation();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Optional<UID> ILogSystem::MultiCursor::getCurrentPeekLocation() const {
|
||||||
|
return cursors.back()->getCurrentPeekLocation();
|
||||||
|
}
|
||||||
|
|
||||||
Version ILogSystem::MultiCursor::popped() const {
|
Version ILogSystem::MultiCursor::popped() const {
|
||||||
return std::max(poppedVersion, cursors.back()->popped());
|
return std::max(poppedVersion, cursors.back()->popped());
|
||||||
}
|
}
|
||||||
|
@ -1403,6 +1425,10 @@ Optional<UID> ILogSystem::BufferedCursor::getPrimaryPeekLocation() const {
|
||||||
return Optional<UID>();
|
return Optional<UID>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Optional<UID> ILogSystem::BufferedCursor::getCurrentPeekLocation() const {
|
||||||
|
return Optional<UID>();
|
||||||
|
}
|
||||||
|
|
||||||
Version ILogSystem::BufferedCursor::popped() const {
|
Version ILogSystem::BufferedCursor::popped() const {
|
||||||
if (initialPoppedVersion == poppedVersion) {
|
if (initialPoppedVersion == poppedVersion) {
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -20,9 +20,11 @@
|
||||||
|
|
||||||
#include "flow/Util.h"
|
#include "flow/Util.h"
|
||||||
#include "fdbrpc/FailureMonitor.h"
|
#include "fdbrpc/FailureMonitor.h"
|
||||||
|
#include "fdbclient/KeyBackedTypes.h"
|
||||||
#include "fdbclient/SystemData.h"
|
#include "fdbclient/SystemData.h"
|
||||||
#include "fdbserver/MoveKeys.actor.h"
|
#include "fdbserver/MoveKeys.actor.h"
|
||||||
#include "fdbserver/Knobs.h"
|
#include "fdbserver/Knobs.h"
|
||||||
|
#include "fdbserver/TSSMappingUtil.actor.h"
|
||||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||||
|
|
||||||
using std::max;
|
using std::max;
|
||||||
|
@ -158,7 +160,7 @@ ACTOR Future<Optional<UID>> checkReadWrite(Future<ErrorOr<GetShardStateReply>> f
|
||||||
return Optional<UID>(uid);
|
return Optional<UID>(uid);
|
||||||
}
|
}
|
||||||
|
|
||||||
Future<Void> removeOldDestinations(Transaction* tr,
|
Future<Void> removeOldDestinations(Reference<ReadYourWritesTransaction> tr,
|
||||||
UID oldDest,
|
UID oldDest,
|
||||||
VectorRef<KeyRangeRef> shards,
|
VectorRef<KeyRangeRef> shards,
|
||||||
KeyRangeRef currentKeys) {
|
KeyRangeRef currentKeys) {
|
||||||
|
@ -235,7 +237,7 @@ ACTOR Future<vector<UID>> addReadWriteDestinations(KeyRangeRef shard,
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<vector<vector<UID>>> additionalSources(RangeResult shards,
|
ACTOR Future<vector<vector<UID>>> additionalSources(RangeResult shards,
|
||||||
Transaction* tr,
|
Reference<ReadYourWritesTransaction> tr,
|
||||||
int desiredHealthy,
|
int desiredHealthy,
|
||||||
int maxServers) {
|
int maxServers) {
|
||||||
state RangeResult UIDtoTagMap = wait(tr->getRange(serverTagKeys, CLIENT_KNOBS->TOO_MANY));
|
state RangeResult UIDtoTagMap = wait(tr->getRange(serverTagKeys, CLIENT_KNOBS->TOO_MANY));
|
||||||
|
@ -320,6 +322,7 @@ ACTOR static Future<Void> startMoveKeys(Database occ,
|
||||||
MoveKeysLock lock,
|
MoveKeysLock lock,
|
||||||
FlowLock* startMoveKeysLock,
|
FlowLock* startMoveKeysLock,
|
||||||
UID relocationIntervalId,
|
UID relocationIntervalId,
|
||||||
|
std::map<UID, StorageServerInterface>* tssMapping,
|
||||||
const DDEnabledState* ddEnabledState) {
|
const DDEnabledState* ddEnabledState) {
|
||||||
state TraceInterval interval("RelocateShard_StartMoveKeys");
|
state TraceInterval interval("RelocateShard_StartMoveKeys");
|
||||||
state Future<Void> warningLogger = logWarningAfter("StartMoveKeysTooLong", 600, servers);
|
state Future<Void> warningLogger = logWarningAfter("StartMoveKeysTooLong", 600, servers);
|
||||||
|
@ -327,6 +330,7 @@ ACTOR static Future<Void> startMoveKeys(Database occ,
|
||||||
|
|
||||||
wait(startMoveKeysLock->take(TaskPriority::DataDistributionLaunch));
|
wait(startMoveKeysLock->take(TaskPriority::DataDistributionLaunch));
|
||||||
state FlowLock::Releaser releaser(*startMoveKeysLock);
|
state FlowLock::Releaser releaser(*startMoveKeysLock);
|
||||||
|
state bool loadedTssMapping = false;
|
||||||
|
|
||||||
TraceEvent(SevDebug, interval.begin(), relocationIntervalId);
|
TraceEvent(SevDebug, interval.begin(), relocationIntervalId);
|
||||||
|
|
||||||
|
@ -343,7 +347,8 @@ ACTOR static Future<Void> startMoveKeys(Database occ,
|
||||||
TEST(begin > keys.begin); // Multi-transactional startMoveKeys
|
TEST(begin > keys.begin); // Multi-transactional startMoveKeys
|
||||||
batches++;
|
batches++;
|
||||||
|
|
||||||
state Transaction tr(occ);
|
// RYW to optimize re-reading the same key ranges
|
||||||
|
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(occ);
|
||||||
state int retries = 0;
|
state int retries = 0;
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
|
@ -356,15 +361,22 @@ ACTOR static Future<Void> startMoveKeys(Database occ,
|
||||||
// Keep track of shards for all src servers so that we can preserve their values in serverKeys
|
// Keep track of shards for all src servers so that we can preserve their values in serverKeys
|
||||||
state Map<UID, VectorRef<KeyRangeRef>> shardMap;
|
state Map<UID, VectorRef<KeyRangeRef>> shardMap;
|
||||||
|
|
||||||
tr.info.taskID = TaskPriority::MoveKeys;
|
tr->getTransaction().info.taskID = TaskPriority::MoveKeys;
|
||||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||||
|
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||||
|
|
||||||
wait(checkMoveKeysLock(&tr, lock, ddEnabledState));
|
wait(checkMoveKeysLock(&(tr->getTransaction()), lock, ddEnabledState));
|
||||||
|
|
||||||
|
if (!loadedTssMapping) {
|
||||||
|
// share transaction for loading tss mapping with the rest of start move keys
|
||||||
|
wait(readTSSMappingRYW(tr, tssMapping));
|
||||||
|
loadedTssMapping = true;
|
||||||
|
}
|
||||||
|
|
||||||
vector<Future<Optional<Value>>> serverListEntries;
|
vector<Future<Optional<Value>>> serverListEntries;
|
||||||
serverListEntries.reserve(servers.size());
|
serverListEntries.reserve(servers.size());
|
||||||
for (int s = 0; s < servers.size(); s++)
|
for (int s = 0; s < servers.size(); s++)
|
||||||
serverListEntries.push_back(tr.get(serverListKeyFor(servers[s])));
|
serverListEntries.push_back(tr->get(serverListKeyFor(servers[s])));
|
||||||
state vector<Optional<Value>> serverListValues = wait(getAll(serverListEntries));
|
state vector<Optional<Value>> serverListValues = wait(getAll(serverListEntries));
|
||||||
|
|
||||||
for (int s = 0; s < serverListValues.size(); s++) {
|
for (int s = 0; s < serverListValues.size(); s++) {
|
||||||
|
@ -380,7 +392,8 @@ ACTOR static Future<Void> startMoveKeys(Database occ,
|
||||||
// Get all existing shards overlapping keys (exclude any that have been processed in a previous
|
// Get all existing shards overlapping keys (exclude any that have been processed in a previous
|
||||||
// iteration of the outer loop)
|
// iteration of the outer loop)
|
||||||
state KeyRange currentKeys = KeyRangeRef(begin, keys.end);
|
state KeyRange currentKeys = KeyRangeRef(begin, keys.end);
|
||||||
state RangeResult old = wait(krmGetRanges(&tr,
|
|
||||||
|
state RangeResult old = wait(krmGetRanges(tr,
|
||||||
keyServersPrefix,
|
keyServersPrefix,
|
||||||
currentKeys,
|
currentKeys,
|
||||||
SERVER_KNOBS->MOVE_KEYS_KRM_LIMIT,
|
SERVER_KNOBS->MOVE_KEYS_KRM_LIMIT,
|
||||||
|
@ -399,10 +412,10 @@ ACTOR static Future<Void> startMoveKeys(Database occ,
|
||||||
// printf("'%s': '%s'\n", old[i].key.toString().c_str(), old[i].value.toString().c_str());
|
// printf("'%s': '%s'\n", old[i].key.toString().c_str(), old[i].value.toString().c_str());
|
||||||
|
|
||||||
// Check that enough servers for each shard are in the correct state
|
// Check that enough servers for each shard are in the correct state
|
||||||
state RangeResult UIDtoTagMap = wait(tr.getRange(serverTagKeys, CLIENT_KNOBS->TOO_MANY));
|
state RangeResult UIDtoTagMap = wait(tr->getRange(serverTagKeys, CLIENT_KNOBS->TOO_MANY));
|
||||||
ASSERT(!UIDtoTagMap.more && UIDtoTagMap.size() < CLIENT_KNOBS->TOO_MANY);
|
ASSERT(!UIDtoTagMap.more && UIDtoTagMap.size() < CLIENT_KNOBS->TOO_MANY);
|
||||||
vector<vector<UID>> addAsSource = wait(additionalSources(
|
vector<vector<UID>> addAsSource = wait(additionalSources(
|
||||||
old, &tr, servers.size(), SERVER_KNOBS->MAX_ADDED_SOURCES_MULTIPLIER * servers.size()));
|
old, tr, servers.size(), SERVER_KNOBS->MAX_ADDED_SOURCES_MULTIPLIER * servers.size()));
|
||||||
|
|
||||||
// For each intersecting range, update keyServers[range] dest to be servers and clear existing dest
|
// For each intersecting range, update keyServers[range] dest to be servers and clear existing dest
|
||||||
// servers from serverKeys
|
// servers from serverKeys
|
||||||
|
@ -417,7 +430,7 @@ ACTOR static Future<Void> startMoveKeys(Database occ,
|
||||||
// .detail("KeyEnd", rangeIntersectKeys.end.toString())
|
// .detail("KeyEnd", rangeIntersectKeys.end.toString())
|
||||||
// .detail("OldSrc", describe(src))
|
// .detail("OldSrc", describe(src))
|
||||||
// .detail("OldDest", describe(dest))
|
// .detail("OldDest", describe(dest))
|
||||||
// .detail("ReadVersion", tr.getReadVersion().get());
|
// .detail("ReadVersion", tr->getReadVersion().get());
|
||||||
|
|
||||||
for (auto& uid : addAsSource[i]) {
|
for (auto& uid : addAsSource[i]) {
|
||||||
src.push_back(uid);
|
src.push_back(uid);
|
||||||
|
@ -425,7 +438,7 @@ ACTOR static Future<Void> startMoveKeys(Database occ,
|
||||||
uniquify(src);
|
uniquify(src);
|
||||||
|
|
||||||
// Update dest servers for this range to be equal to servers
|
// Update dest servers for this range to be equal to servers
|
||||||
krmSetPreviouslyEmptyRange(&tr,
|
krmSetPreviouslyEmptyRange(&(tr->getTransaction()),
|
||||||
keyServersPrefix,
|
keyServersPrefix,
|
||||||
rangeIntersectKeys,
|
rangeIntersectKeys,
|
||||||
keyServersValue(UIDtoTagMap, src, servers),
|
keyServersValue(UIDtoTagMap, src, servers),
|
||||||
|
@ -455,7 +468,7 @@ ACTOR static Future<Void> startMoveKeys(Database occ,
|
||||||
vector<Future<Void>> actors;
|
vector<Future<Void>> actors;
|
||||||
for (oldDest = oldDests.begin(); oldDest != oldDests.end(); ++oldDest)
|
for (oldDest = oldDests.begin(); oldDest != oldDests.end(); ++oldDest)
|
||||||
if (std::find(servers.begin(), servers.end(), *oldDest) == servers.end())
|
if (std::find(servers.begin(), servers.end(), *oldDest) == servers.end())
|
||||||
actors.push_back(removeOldDestinations(&tr, *oldDest, shardMap[*oldDest], currentKeys));
|
actors.push_back(removeOldDestinations(tr, *oldDest, shardMap[*oldDest], currentKeys));
|
||||||
|
|
||||||
// Update serverKeys to include keys (or the currently processed subset of keys) for each SS in
|
// Update serverKeys to include keys (or the currently processed subset of keys) for each SS in
|
||||||
// servers
|
// servers
|
||||||
|
@ -464,12 +477,12 @@ ACTOR static Future<Void> startMoveKeys(Database occ,
|
||||||
// to have the same shard boundaries If that invariant was important, we would have to move this
|
// to have the same shard boundaries If that invariant was important, we would have to move this
|
||||||
// inside the loop above and also set it for the src servers
|
// inside the loop above and also set it for the src servers
|
||||||
actors.push_back(krmSetRangeCoalescing(
|
actors.push_back(krmSetRangeCoalescing(
|
||||||
&tr, serverKeysPrefixFor(servers[i]), currentKeys, allKeys, serverKeysTrue));
|
tr, serverKeysPrefixFor(servers[i]), currentKeys, allKeys, serverKeysTrue));
|
||||||
}
|
}
|
||||||
|
|
||||||
wait(waitForAll(actors));
|
wait(waitForAll(actors));
|
||||||
|
|
||||||
wait(tr.commit());
|
wait(tr->commit());
|
||||||
|
|
||||||
/*TraceEvent("StartMoveKeysCommitDone", relocationIntervalId)
|
/*TraceEvent("StartMoveKeysCommitDone", relocationIntervalId)
|
||||||
.detail("CommitVersion", tr.getCommittedVersion())
|
.detail("CommitVersion", tr.getCommittedVersion())
|
||||||
|
@ -481,7 +494,7 @@ ACTOR static Future<Void> startMoveKeys(Database occ,
|
||||||
state Error err = e;
|
state Error err = e;
|
||||||
if (err.code() == error_code_move_to_removed_server)
|
if (err.code() == error_code_move_to_removed_server)
|
||||||
throw;
|
throw;
|
||||||
wait(tr.onError(e));
|
wait(tr->onError(e));
|
||||||
|
|
||||||
if (retries % 10 == 0) {
|
if (retries % 10 == 0) {
|
||||||
TraceEvent(
|
TraceEvent(
|
||||||
|
@ -500,7 +513,7 @@ ACTOR static Future<Void> startMoveKeys(Database occ,
|
||||||
}
|
}
|
||||||
|
|
||||||
// printf("Committed moving '%s'-'%s' (version %lld)\n", keys.begin.toString().c_str(),
|
// printf("Committed moving '%s'-'%s' (version %lld)\n", keys.begin.toString().c_str(),
|
||||||
// keys.end.toString().c_str(), tr.getCommittedVersion());
|
// keys.end.toString().c_str(), tr->getCommittedVersion());
|
||||||
TraceEvent(SevDebug, interval.end(), relocationIntervalId)
|
TraceEvent(SevDebug, interval.end(), relocationIntervalId)
|
||||||
.detail("Batches", batches)
|
.detail("Batches", batches)
|
||||||
.detail("Shards", shards)
|
.detail("Shards", shards)
|
||||||
|
@ -536,11 +549,14 @@ ACTOR Future<Void> waitForShardReady(StorageServerInterface server,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// best effort to also wait for TSS on data move
|
||||||
|
|
||||||
ACTOR Future<Void> checkFetchingState(Database cx,
|
ACTOR Future<Void> checkFetchingState(Database cx,
|
||||||
vector<UID> dest,
|
vector<UID> dest,
|
||||||
KeyRange keys,
|
KeyRange keys,
|
||||||
Promise<Void> dataMovementComplete,
|
Promise<Void> dataMovementComplete,
|
||||||
UID relocationIntervalId) {
|
UID relocationIntervalId,
|
||||||
|
std::map<UID, StorageServerInterface> tssMapping) {
|
||||||
state Transaction tr(cx);
|
state Transaction tr(cx);
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
|
@ -557,6 +573,7 @@ ACTOR Future<Void> checkFetchingState(Database cx,
|
||||||
serverListEntries.push_back(tr.get(serverListKeyFor(dest[s])));
|
serverListEntries.push_back(tr.get(serverListKeyFor(dest[s])));
|
||||||
state vector<Optional<Value>> serverListValues = wait(getAll(serverListEntries));
|
state vector<Optional<Value>> serverListValues = wait(getAll(serverListEntries));
|
||||||
vector<Future<Void>> requests;
|
vector<Future<Void>> requests;
|
||||||
|
state vector<Future<Void>> tssRequests;
|
||||||
for (int s = 0; s < serverListValues.size(); s++) {
|
for (int s = 0; s < serverListValues.size(); s++) {
|
||||||
if (!serverListValues[s].present()) {
|
if (!serverListValues[s].present()) {
|
||||||
// FIXME: Is this the right behavior? dataMovementComplete will never be sent!
|
// FIXME: Is this the right behavior? dataMovementComplete will never be sent!
|
||||||
|
@ -567,10 +584,25 @@ ACTOR Future<Void> checkFetchingState(Database cx,
|
||||||
ASSERT(si.id() == dest[s]);
|
ASSERT(si.id() == dest[s]);
|
||||||
requests.push_back(
|
requests.push_back(
|
||||||
waitForShardReady(si, keys, tr.getReadVersion().get(), GetShardStateRequest::FETCHING));
|
waitForShardReady(si, keys, tr.getReadVersion().get(), GetShardStateRequest::FETCHING));
|
||||||
|
|
||||||
|
auto tssPair = tssMapping.find(si.id());
|
||||||
|
if (tssPair != tssMapping.end()) {
|
||||||
|
tssRequests.push_back(waitForShardReady(
|
||||||
|
tssPair->second, keys, tr.getReadVersion().get(), GetShardStateRequest::FETCHING));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
wait(timeoutError(waitForAll(requests), SERVER_KNOBS->SERVER_READY_QUORUM_TIMEOUT, TaskPriority::MoveKeys));
|
wait(timeoutError(waitForAll(requests), SERVER_KNOBS->SERVER_READY_QUORUM_TIMEOUT, TaskPriority::MoveKeys));
|
||||||
|
|
||||||
|
// If normal servers return normally, give TSS data movement a bit of a chance, but don't block on it, and
|
||||||
|
// ignore errors in tss requests
|
||||||
|
if (tssRequests.size()) {
|
||||||
|
wait(timeout(waitForAllReady(tssRequests),
|
||||||
|
SERVER_KNOBS->SERVER_READY_QUORUM_TIMEOUT / 2,
|
||||||
|
Void(),
|
||||||
|
TaskPriority::MoveKeys));
|
||||||
|
}
|
||||||
|
|
||||||
dataMovementComplete.send(Void());
|
dataMovementComplete.send(Void());
|
||||||
return Void();
|
return Void();
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
|
@ -593,6 +625,7 @@ ACTOR static Future<Void> finishMoveKeys(Database occ,
|
||||||
FlowLock* finishMoveKeysParallelismLock,
|
FlowLock* finishMoveKeysParallelismLock,
|
||||||
bool hasRemote,
|
bool hasRemote,
|
||||||
UID relocationIntervalId,
|
UID relocationIntervalId,
|
||||||
|
std::map<UID, StorageServerInterface> tssMapping,
|
||||||
const DDEnabledState* ddEnabledState) {
|
const DDEnabledState* ddEnabledState) {
|
||||||
state TraceInterval interval("RelocateShard_FinishMoveKeys");
|
state TraceInterval interval("RelocateShard_FinishMoveKeys");
|
||||||
state TraceInterval waitInterval("");
|
state TraceInterval waitInterval("");
|
||||||
|
@ -602,6 +635,11 @@ ACTOR static Future<Void> finishMoveKeys(Database occ,
|
||||||
state int retries = 0;
|
state int retries = 0;
|
||||||
state FlowLock::Releaser releaser;
|
state FlowLock::Releaser releaser;
|
||||||
|
|
||||||
|
state std::vector<std::pair<UID, UID>> tssToKill;
|
||||||
|
state std::unordered_set<UID> tssToIgnore;
|
||||||
|
// try waiting for tss for a 2 loops, give up if they're stuck to not affect the rest of the cluster
|
||||||
|
state int waitForTSSCounter = 2;
|
||||||
|
|
||||||
ASSERT(!destinationTeam.empty());
|
ASSERT(!destinationTeam.empty());
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
@ -616,9 +654,26 @@ ACTOR static Future<Void> finishMoveKeys(Database occ,
|
||||||
|
|
||||||
state Transaction tr(occ);
|
state Transaction tr(occ);
|
||||||
|
|
||||||
// printf("finishMoveKeys( '%s'-'%s' )\n", keys.begin.toString().c_str(), keys.end.toString().c_str());
|
// printf("finishMoveKeys( '%s'-'%s' )\n", begin.toString().c_str(), keys.end.toString().c_str());
|
||||||
loop {
|
loop {
|
||||||
try {
|
try {
|
||||||
|
if (tssToKill.size()) {
|
||||||
|
TEST(true); // killing TSS because they were unavailable for movekeys
|
||||||
|
|
||||||
|
// Kill tss BEFORE committing main txn so that client requests don't make it to the tss when it
|
||||||
|
// has a different shard set than its pair use a different RYW transaction since i'm too lazy
|
||||||
|
// (and don't want to add bugs) by changing whole method to RYW. Also, using a different
|
||||||
|
// transaction makes it commit earlier which we may need to guarantee causality of tss getting
|
||||||
|
// removed before client sends a request to this key range on the new SS
|
||||||
|
wait(removeTSSPairsFromCluster(occ, tssToKill));
|
||||||
|
|
||||||
|
for (auto& tssPair : tssToKill) {
|
||||||
|
TraceEvent(SevWarnAlways, "TSS_KillMoveKeys").detail("TSSID", tssPair.second);
|
||||||
|
tssToIgnore.insert(tssPair.second);
|
||||||
|
}
|
||||||
|
tssToKill.clear();
|
||||||
|
}
|
||||||
|
|
||||||
tr.info.taskID = TaskPriority::MoveKeys;
|
tr.info.taskID = TaskPriority::MoveKeys;
|
||||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||||
|
|
||||||
|
@ -763,6 +818,8 @@ ACTOR static Future<Void> finishMoveKeys(Database occ,
|
||||||
// between
|
// between
|
||||||
// now and when this transaction commits.
|
// now and when this transaction commits.
|
||||||
state vector<Future<Void>> serverReady; // only for count below
|
state vector<Future<Void>> serverReady; // only for count below
|
||||||
|
state vector<Future<Void>> tssReady; // for waiting in parallel with tss
|
||||||
|
state vector<StorageServerInterface> tssReadyInterfs;
|
||||||
state vector<UID> newDestinations;
|
state vector<UID> newDestinations;
|
||||||
std::set<UID> completeSrcSet(completeSrc.begin(), completeSrc.end());
|
std::set<UID> completeSrcSet(completeSrc.begin(), completeSrc.end());
|
||||||
for (auto& it : dest) {
|
for (auto& it : dest) {
|
||||||
|
@ -789,22 +846,95 @@ ACTOR static Future<Void> finishMoveKeys(Database occ,
|
||||||
storageServerInterfaces.push_back(si);
|
storageServerInterfaces.push_back(si);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// update client info in case tss mapping changed or server got updated
|
||||||
|
|
||||||
// Wait for new destination servers to fetch the keys
|
// Wait for new destination servers to fetch the keys
|
||||||
|
|
||||||
serverReady.reserve(storageServerInterfaces.size());
|
serverReady.reserve(storageServerInterfaces.size());
|
||||||
for (int s = 0; s < storageServerInterfaces.size(); s++)
|
tssReady.reserve(storageServerInterfaces.size());
|
||||||
|
tssReadyInterfs.reserve(storageServerInterfaces.size());
|
||||||
|
for (int s = 0; s < storageServerInterfaces.size(); s++) {
|
||||||
serverReady.push_back(waitForShardReady(storageServerInterfaces[s],
|
serverReady.push_back(waitForShardReady(storageServerInterfaces[s],
|
||||||
keys,
|
keys,
|
||||||
tr.getReadVersion().get(),
|
tr.getReadVersion().get(),
|
||||||
GetShardStateRequest::READABLE));
|
GetShardStateRequest::READABLE));
|
||||||
wait(timeout(waitForAll(serverReady),
|
|
||||||
|
auto tssPair = tssMapping.find(storageServerInterfaces[s].id());
|
||||||
|
|
||||||
|
if (tssPair != tssMapping.end() && waitForTSSCounter > 0 &&
|
||||||
|
!tssToIgnore.count(tssPair->second.id())) {
|
||||||
|
tssReadyInterfs.push_back(tssPair->second);
|
||||||
|
tssReady.push_back(waitForShardReady(
|
||||||
|
tssPair->second, keys, tr.getReadVersion().get(), GetShardStateRequest::READABLE));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for all storage server moves, and explicitly swallow errors for tss ones with
|
||||||
|
// waitForAllReady If this takes too long the transaction will time out and retry, which is ok
|
||||||
|
wait(timeout(waitForAll(serverReady) && waitForAllReady(tssReady),
|
||||||
SERVER_KNOBS->SERVER_READY_QUORUM_TIMEOUT,
|
SERVER_KNOBS->SERVER_READY_QUORUM_TIMEOUT,
|
||||||
Void(),
|
Void(),
|
||||||
TaskPriority::MoveKeys));
|
TaskPriority::MoveKeys));
|
||||||
|
|
||||||
|
// Check to see if we're waiting only on tss. If so, decrement the waiting counter.
|
||||||
|
// If the waiting counter is zero, kill the slow/non-responsive tss processes before finalizing the
|
||||||
|
// data move.
|
||||||
|
if (tssReady.size()) {
|
||||||
|
bool allSSDone = true;
|
||||||
|
for (auto& f : serverReady) {
|
||||||
|
allSSDone &= f.isReady() && !f.isError();
|
||||||
|
if (!allSSDone) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (allSSDone) {
|
||||||
|
bool anyTssNotDone = false;
|
||||||
|
|
||||||
|
for (auto& f : tssReady) {
|
||||||
|
if (!f.isReady() || f.isError()) {
|
||||||
|
anyTssNotDone = true;
|
||||||
|
waitForTSSCounter--;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (anyTssNotDone && waitForTSSCounter == 0) {
|
||||||
|
for (int i = 0; i < tssReady.size(); i++) {
|
||||||
|
if (!tssReady[i].isReady() || tssReady[i].isError()) {
|
||||||
|
tssToKill.push_back(
|
||||||
|
std::pair(tssReadyInterfs[i].tssPairID.get(), tssReadyInterfs[i].id()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// repeat loop and go back to start to kill tss' before continuing on
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int count = dest.size() - newDestinations.size();
|
int count = dest.size() - newDestinations.size();
|
||||||
for (int s = 0; s < serverReady.size(); s++)
|
for (int s = 0; s < serverReady.size(); s++)
|
||||||
count += serverReady[s].isReady() && !serverReady[s].isError();
|
count += serverReady[s].isReady() && !serverReady[s].isError();
|
||||||
|
|
||||||
// printf(" fMK: moved data to %d/%d servers\n", count, serverReady.size());
|
int tssCount = 0;
|
||||||
|
for (int s = 0; s < tssReady.size(); s++)
|
||||||
|
tssCount += tssReady[s].isReady() && !tssReady[s].isError();
|
||||||
|
|
||||||
|
/*if (tssReady.size()) {
|
||||||
|
printf(" fMK: [%s - %s) moved data to %d/%d servers and %d/%d tss\n",
|
||||||
|
begin.toString().c_str(),
|
||||||
|
keys.end.toString().c_str(),
|
||||||
|
count,
|
||||||
|
serverReady.size(),
|
||||||
|
tssCount,
|
||||||
|
tssReady.size());
|
||||||
|
} else {
|
||||||
|
printf(" fMK: [%s - %s) moved data to %d/%d servers\n",
|
||||||
|
begin.toString().c_str(),
|
||||||
|
keys.end.toString().c_str(),
|
||||||
|
count,
|
||||||
|
serverReady.size());
|
||||||
|
}*/
|
||||||
TraceEvent(SevDebug, waitInterval.end(), relocationIntervalId).detail("ReadyServers", count);
|
TraceEvent(SevDebug, waitInterval.end(), relocationIntervalId).detail("ReadyServers", count);
|
||||||
|
|
||||||
if (count == dest.size()) {
|
if (count == dest.size()) {
|
||||||
|
@ -862,43 +992,48 @@ ACTOR static Future<Void> finishMoveKeys(Database occ,
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<std::pair<Version, Tag>> addStorageServer(Database cx, StorageServerInterface server) {
|
ACTOR Future<std::pair<Version, Tag>> addStorageServer(Database cx, StorageServerInterface server) {
|
||||||
state Transaction tr(cx);
|
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(cx);
|
||||||
|
state KeyBackedMap<UID, UID> tssMapDB = KeyBackedMap<UID, UID>(tssMappingKeys.begin);
|
||||||
state int maxSkipTags = 1;
|
state int maxSkipTags = 1;
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
try {
|
try {
|
||||||
state Future<RangeResult> fTagLocalities = tr.getRange(tagLocalityListKeys, CLIENT_KNOBS->TOO_MANY);
|
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||||
state Future<Optional<Value>> fv = tr.get(serverListKeyFor(server.id()));
|
|
||||||
|
|
||||||
state Future<Optional<Value>> fExclProc = tr.get(
|
// FIXME: don't fetch tag localities, all tags, and history tags if tss. Just fetch pair's tag
|
||||||
|
state Future<RangeResult> fTagLocalities = tr->getRange(tagLocalityListKeys, CLIENT_KNOBS->TOO_MANY);
|
||||||
|
state Future<Optional<Value>> fv = tr->get(serverListKeyFor(server.id()));
|
||||||
|
|
||||||
|
state Future<Optional<Value>> fExclProc = tr->get(
|
||||||
StringRef(encodeExcludedServersKey(AddressExclusion(server.address().ip, server.address().port))));
|
StringRef(encodeExcludedServersKey(AddressExclusion(server.address().ip, server.address().port))));
|
||||||
state Future<Optional<Value>> fExclIP =
|
state Future<Optional<Value>> fExclIP =
|
||||||
tr.get(StringRef(encodeExcludedServersKey(AddressExclusion(server.address().ip))));
|
tr->get(StringRef(encodeExcludedServersKey(AddressExclusion(server.address().ip))));
|
||||||
state Future<Optional<Value>> fFailProc =
|
state Future<Optional<Value>> fFailProc = tr->get(
|
||||||
tr.get(StringRef(encodeFailedServersKey(AddressExclusion(server.address().ip, server.address().port))));
|
StringRef(encodeFailedServersKey(AddressExclusion(server.address().ip, server.address().port))));
|
||||||
state Future<Optional<Value>> fFailIP =
|
state Future<Optional<Value>> fFailIP =
|
||||||
tr.get(StringRef(encodeFailedServersKey(AddressExclusion(server.address().ip))));
|
tr->get(StringRef(encodeFailedServersKey(AddressExclusion(server.address().ip))));
|
||||||
|
|
||||||
state Future<Optional<Value>> fExclProc2 =
|
state Future<Optional<Value>> fExclProc2 =
|
||||||
server.secondaryAddress().present()
|
server.secondaryAddress().present()
|
||||||
? tr.get(StringRef(encodeExcludedServersKey(
|
? tr->get(StringRef(encodeExcludedServersKey(
|
||||||
AddressExclusion(server.secondaryAddress().get().ip, server.secondaryAddress().get().port))))
|
AddressExclusion(server.secondaryAddress().get().ip, server.secondaryAddress().get().port))))
|
||||||
: Future<Optional<Value>>(Optional<Value>());
|
: Future<Optional<Value>>(Optional<Value>());
|
||||||
state Future<Optional<Value>> fExclIP2 =
|
state Future<Optional<Value>> fExclIP2 =
|
||||||
server.secondaryAddress().present()
|
server.secondaryAddress().present()
|
||||||
? tr.get(StringRef(encodeExcludedServersKey(AddressExclusion(server.secondaryAddress().get().ip))))
|
? tr->get(StringRef(encodeExcludedServersKey(AddressExclusion(server.secondaryAddress().get().ip))))
|
||||||
: Future<Optional<Value>>(Optional<Value>());
|
: Future<Optional<Value>>(Optional<Value>());
|
||||||
state Future<Optional<Value>> fFailProc2 =
|
state Future<Optional<Value>> fFailProc2 =
|
||||||
server.secondaryAddress().present()
|
server.secondaryAddress().present()
|
||||||
? tr.get(StringRef(encodeFailedServersKey(
|
? tr->get(StringRef(encodeFailedServersKey(
|
||||||
AddressExclusion(server.secondaryAddress().get().ip, server.secondaryAddress().get().port))))
|
AddressExclusion(server.secondaryAddress().get().ip, server.secondaryAddress().get().port))))
|
||||||
: Future<Optional<Value>>(Optional<Value>());
|
: Future<Optional<Value>>(Optional<Value>());
|
||||||
state Future<Optional<Value>> fFailIP2 =
|
state Future<Optional<Value>> fFailIP2 =
|
||||||
server.secondaryAddress().present()
|
server.secondaryAddress().present()
|
||||||
? tr.get(StringRef(encodeFailedServersKey(AddressExclusion(server.secondaryAddress().get().ip))))
|
? tr->get(StringRef(encodeFailedServersKey(AddressExclusion(server.secondaryAddress().get().ip))))
|
||||||
: Future<Optional<Value>>(Optional<Value>());
|
: Future<Optional<Value>>(Optional<Value>());
|
||||||
|
|
||||||
state Future<RangeResult> fTags = tr.getRange(serverTagKeys, CLIENT_KNOBS->TOO_MANY, true);
|
state Future<RangeResult> fTags = tr->getRange(serverTagKeys, CLIENT_KNOBS->TOO_MANY, true);
|
||||||
state Future<RangeResult> fHistoryTags = tr.getRange(serverTagHistoryKeys, CLIENT_KNOBS->TOO_MANY, true);
|
state Future<RangeResult> fHistoryTags = tr->getRange(serverTagHistoryKeys, CLIENT_KNOBS->TOO_MANY, true);
|
||||||
|
|
||||||
wait(success(fTagLocalities) && success(fv) && success(fTags) && success(fHistoryTags) &&
|
wait(success(fTagLocalities) && success(fv) && success(fTags) && success(fHistoryTags) &&
|
||||||
success(fExclProc) && success(fExclIP) && success(fFailProc) && success(fFailIP) &&
|
success(fExclProc) && success(fExclIP) && success(fFailProc) && success(fFailIP) &&
|
||||||
|
@ -914,63 +1049,90 @@ ACTOR Future<std::pair<Version, Tag>> addStorageServer(Database cx, StorageServe
|
||||||
if (fTagLocalities.get().more || fTags.get().more || fHistoryTags.get().more)
|
if (fTagLocalities.get().more || fTags.get().more || fHistoryTags.get().more)
|
||||||
ASSERT(false);
|
ASSERT(false);
|
||||||
|
|
||||||
int8_t maxTagLocality = 0;
|
state Tag tag;
|
||||||
state int8_t locality = -1;
|
if (server.isTss()) {
|
||||||
for (auto& kv : fTagLocalities.get()) {
|
bool foundTag = false;
|
||||||
int8_t loc = decodeTagLocalityListValue(kv.value);
|
for (auto& it : fTags.get()) {
|
||||||
if (decodeTagLocalityListKey(kv.key) == server.locality.dcId()) {
|
UID key = decodeServerTagKey(it.key);
|
||||||
locality = loc;
|
if (key == server.tssPairID.get()) {
|
||||||
break;
|
tag = decodeServerTagValue(it.value);
|
||||||
}
|
foundTag = true;
|
||||||
maxTagLocality = std::max(maxTagLocality, loc);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (locality == -1) {
|
|
||||||
locality = maxTagLocality + 1;
|
|
||||||
if (locality < 0)
|
|
||||||
throw recruitment_failed();
|
|
||||||
tr.set(tagLocalityListKeyFor(server.locality.dcId()), tagLocalityListValue(locality));
|
|
||||||
}
|
|
||||||
|
|
||||||
int skipTags = deterministicRandom()->randomInt(0, maxSkipTags);
|
|
||||||
|
|
||||||
state uint16_t tagId = 0;
|
|
||||||
std::vector<uint16_t> usedTags;
|
|
||||||
for (auto& it : fTags.get()) {
|
|
||||||
Tag t = decodeServerTagValue(it.value);
|
|
||||||
if (t.locality == locality) {
|
|
||||||
usedTags.push_back(t.id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (auto& it : fHistoryTags.get()) {
|
|
||||||
Tag t = decodeServerTagValue(it.value);
|
|
||||||
if (t.locality == locality) {
|
|
||||||
usedTags.push_back(t.id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::sort(usedTags.begin(), usedTags.end());
|
|
||||||
|
|
||||||
int usedIdx = 0;
|
|
||||||
for (; usedTags.size() > 0 && tagId <= usedTags.end()[-1]; tagId++) {
|
|
||||||
if (tagId < usedTags[usedIdx]) {
|
|
||||||
if (skipTags == 0)
|
|
||||||
break;
|
break;
|
||||||
skipTags--;
|
}
|
||||||
} else {
|
}
|
||||||
usedIdx++;
|
if (!foundTag) {
|
||||||
|
throw recruitment_failed();
|
||||||
|
}
|
||||||
|
|
||||||
|
tssMapDB.set(tr, server.tssPairID.get(), server.id());
|
||||||
|
|
||||||
|
} else {
|
||||||
|
int8_t maxTagLocality = 0;
|
||||||
|
state int8_t locality = -1;
|
||||||
|
for (auto& kv : fTagLocalities.get()) {
|
||||||
|
int8_t loc = decodeTagLocalityListValue(kv.value);
|
||||||
|
if (decodeTagLocalityListKey(kv.key) == server.locality.dcId()) {
|
||||||
|
locality = loc;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
maxTagLocality = std::max(maxTagLocality, loc);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (locality == -1) {
|
||||||
|
locality = maxTagLocality + 1;
|
||||||
|
if (locality < 0) {
|
||||||
|
throw recruitment_failed();
|
||||||
|
}
|
||||||
|
tr->set(tagLocalityListKeyFor(server.locality.dcId()), tagLocalityListValue(locality));
|
||||||
|
}
|
||||||
|
|
||||||
|
int skipTags = deterministicRandom()->randomInt(0, maxSkipTags);
|
||||||
|
|
||||||
|
state uint16_t tagId = 0;
|
||||||
|
std::vector<uint16_t> usedTags;
|
||||||
|
for (auto& it : fTags.get()) {
|
||||||
|
Tag t = decodeServerTagValue(it.value);
|
||||||
|
if (t.locality == locality) {
|
||||||
|
usedTags.push_back(t.id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (auto& it : fHistoryTags.get()) {
|
||||||
|
Tag t = decodeServerTagValue(it.value);
|
||||||
|
if (t.locality == locality) {
|
||||||
|
usedTags.push_back(t.id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::sort(usedTags.begin(), usedTags.end());
|
||||||
|
|
||||||
|
int usedIdx = 0;
|
||||||
|
for (; usedTags.size() > 0 && tagId <= usedTags.end()[-1]; tagId++) {
|
||||||
|
if (tagId < usedTags[usedIdx]) {
|
||||||
|
if (skipTags == 0)
|
||||||
|
break;
|
||||||
|
skipTags--;
|
||||||
|
} else {
|
||||||
|
usedIdx++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tagId += skipTags;
|
||||||
|
|
||||||
|
tag = Tag(locality, tagId);
|
||||||
|
|
||||||
|
tr->set(serverTagKeyFor(server.id()), serverTagValue(tag));
|
||||||
|
KeyRange conflictRange = singleKeyRange(serverTagConflictKeyFor(tag));
|
||||||
|
tr->addReadConflictRange(conflictRange);
|
||||||
|
tr->addWriteConflictRange(conflictRange);
|
||||||
|
|
||||||
|
if (SERVER_KNOBS->TSS_HACK_IDENTITY_MAPPING) {
|
||||||
|
// THIS SHOULD NEVER BE ENABLED IN ANY NON-TESTING ENVIRONMENT
|
||||||
|
TraceEvent(SevError, "TSSIdentityMappingEnabled");
|
||||||
|
tssMapDB.set(tr, server.id(), server.id());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tagId += skipTags;
|
|
||||||
|
|
||||||
state Tag tag(locality, tagId);
|
tr->set(serverListKeyFor(server.id()), serverListValue(server));
|
||||||
tr.set(serverTagKeyFor(server.id()), serverTagValue(tag));
|
wait(tr->commit());
|
||||||
tr.set(serverListKeyFor(server.id()), serverListValue(server));
|
return std::make_pair(tr->getCommittedVersion(), tag);
|
||||||
KeyRange conflictRange = singleKeyRange(serverTagConflictKeyFor(tag));
|
|
||||||
tr.addReadConflictRange(conflictRange);
|
|
||||||
tr.addWriteConflictRange(conflictRange);
|
|
||||||
|
|
||||||
wait(tr.commit());
|
|
||||||
return std::make_pair(tr.getCommittedVersion(), tag);
|
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
if (e.code() == error_code_commit_unknown_result)
|
if (e.code() == error_code_commit_unknown_result)
|
||||||
throw recruitment_failed(); // There is a remote possibility that we successfully added ourselves and
|
throw recruitment_failed(); // There is a remote possibility that we successfully added ourselves and
|
||||||
|
@ -980,12 +1142,12 @@ ACTOR Future<std::pair<Version, Tag>> addStorageServer(Database cx, StorageServe
|
||||||
maxSkipTags = SERVER_KNOBS->MAX_SKIP_TAGS;
|
maxSkipTags = SERVER_KNOBS->MAX_SKIP_TAGS;
|
||||||
}
|
}
|
||||||
|
|
||||||
wait(tr.onError(e));
|
wait(tr->onError(e));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// A SS can be removed only if all data (shards) on the SS have been moved away from the SS.
|
// A SS can be removed only if all data (shards) on the SS have been moved away from the SS.
|
||||||
ACTOR Future<bool> canRemoveStorageServer(Transaction* tr, UID serverID) {
|
ACTOR Future<bool> canRemoveStorageServer(Reference<ReadYourWritesTransaction> tr, UID serverID) {
|
||||||
RangeResult keys = wait(krmGetRanges(tr, serverKeysPrefixFor(serverID), allKeys, 2));
|
RangeResult keys = wait(krmGetRanges(tr, serverKeysPrefixFor(serverID), allKeys, 2));
|
||||||
|
|
||||||
ASSERT(keys.size() >= 2);
|
ASSERT(keys.size() >= 2);
|
||||||
|
@ -1005,34 +1167,37 @@ ACTOR Future<bool> canRemoveStorageServer(Transaction* tr, UID serverID) {
|
||||||
|
|
||||||
ACTOR Future<Void> removeStorageServer(Database cx,
|
ACTOR Future<Void> removeStorageServer(Database cx,
|
||||||
UID serverID,
|
UID serverID,
|
||||||
|
Optional<UID> tssPairID,
|
||||||
MoveKeysLock lock,
|
MoveKeysLock lock,
|
||||||
const DDEnabledState* ddEnabledState) {
|
const DDEnabledState* ddEnabledState) {
|
||||||
state Transaction tr(cx);
|
state KeyBackedMap<UID, UID> tssMapDB = KeyBackedMap<UID, UID>(tssMappingKeys.begin);
|
||||||
|
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(cx);
|
||||||
state bool retry = false;
|
state bool retry = false;
|
||||||
state int noCanRemoveCount = 0;
|
state int noCanRemoveCount = 0;
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
try {
|
try {
|
||||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||||
wait(checkMoveKeysLock(&tr, lock, ddEnabledState));
|
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||||
|
wait(checkMoveKeysLock(&(tr->getTransaction()), lock, ddEnabledState));
|
||||||
TraceEvent("RemoveStorageServerLocked")
|
TraceEvent("RemoveStorageServerLocked")
|
||||||
.detail("ServerID", serverID)
|
.detail("ServerID", serverID)
|
||||||
.detail("Version", tr.getReadVersion().get());
|
.detail("Version", tr->getReadVersion().get());
|
||||||
|
|
||||||
state bool canRemove = wait(canRemoveStorageServer(&tr, serverID));
|
state bool canRemove = wait(canRemoveStorageServer(tr, serverID));
|
||||||
if (!canRemove) {
|
if (!canRemove) {
|
||||||
TEST(true); // The caller had a transaction in flight that assigned keys to the server. Wait for it to
|
TEST(true); // The caller had a transaction in flight that assigned keys to the server. Wait for it to
|
||||||
// reverse its mistake.
|
// reverse its mistake.
|
||||||
TraceEvent(SevWarn, "NoCanRemove").detail("Count", noCanRemoveCount++).detail("ServerID", serverID);
|
TraceEvent(SevWarn, "NoCanRemove").detail("Count", noCanRemoveCount++).detail("ServerID", serverID);
|
||||||
wait(delayJittered(SERVER_KNOBS->REMOVE_RETRY_DELAY, TaskPriority::DataDistributionLaunch));
|
wait(delayJittered(SERVER_KNOBS->REMOVE_RETRY_DELAY, TaskPriority::DataDistributionLaunch));
|
||||||
tr.reset();
|
tr->reset();
|
||||||
TraceEvent("RemoveStorageServerRetrying").detail("CanRemove", canRemove);
|
TraceEvent("RemoveStorageServerRetrying").detail("CanRemove", canRemove);
|
||||||
} else {
|
} else {
|
||||||
|
state Future<Optional<Value>> fListKey = tr->get(serverListKeyFor(serverID));
|
||||||
state Future<Optional<Value>> fListKey = tr.get(serverListKeyFor(serverID));
|
state Future<RangeResult> fTags = tr->getRange(serverTagKeys, CLIENT_KNOBS->TOO_MANY);
|
||||||
state Future<RangeResult> fTags = tr.getRange(serverTagKeys, CLIENT_KNOBS->TOO_MANY);
|
state Future<RangeResult> fHistoryTags = tr->getRange(serverTagHistoryKeys, CLIENT_KNOBS->TOO_MANY);
|
||||||
state Future<RangeResult> fHistoryTags = tr.getRange(serverTagHistoryKeys, CLIENT_KNOBS->TOO_MANY);
|
state Future<RangeResult> fTagLocalities = tr->getRange(tagLocalityListKeys, CLIENT_KNOBS->TOO_MANY);
|
||||||
state Future<RangeResult> fTagLocalities = tr.getRange(tagLocalityListKeys, CLIENT_KNOBS->TOO_MANY);
|
state Future<RangeResult> fTLogDatacenters = tr->getRange(tLogDatacentersKeys, CLIENT_KNOBS->TOO_MANY);
|
||||||
state Future<RangeResult> fTLogDatacenters = tr.getRange(tLogDatacentersKeys, CLIENT_KNOBS->TOO_MANY);
|
|
||||||
|
|
||||||
wait(success(fListKey) && success(fTags) && success(fHistoryTags) && success(fTagLocalities) &&
|
wait(success(fListKey) && success(fTags) && success(fHistoryTags) && success(fTagLocalities) &&
|
||||||
success(fTLogDatacenters));
|
success(fTLogDatacenters));
|
||||||
|
@ -1072,22 +1237,32 @@ ACTOR Future<Void> removeStorageServer(Database cx,
|
||||||
if (locality >= 0 && !allLocalities.count(locality)) {
|
if (locality >= 0 && !allLocalities.count(locality)) {
|
||||||
for (auto& it : fTagLocalities.get()) {
|
for (auto& it : fTagLocalities.get()) {
|
||||||
if (locality == decodeTagLocalityListValue(it.value)) {
|
if (locality == decodeTagLocalityListValue(it.value)) {
|
||||||
tr.clear(it.key);
|
tr->clear(it.key);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
tr.clear(serverListKeyFor(serverID));
|
tr->clear(serverListKeyFor(serverID));
|
||||||
tr.clear(serverTagKeyFor(serverID));
|
tr->clear(serverTagKeyFor(serverID)); // A tss uses this to communicate shutdown but it never has a
|
||||||
tr.clear(serverTagHistoryRangeFor(serverID));
|
// server tag key set in the first place
|
||||||
|
tr->clear(serverTagHistoryRangeFor(serverID));
|
||||||
|
|
||||||
|
if (SERVER_KNOBS->TSS_HACK_IDENTITY_MAPPING) {
|
||||||
|
// THIS SHOULD NEVER BE ENABLED IN ANY NON-TESTING ENVIRONMENT
|
||||||
|
TraceEvent(SevError, "TSSIdentityMappingEnabled");
|
||||||
|
tssMapDB.erase(tr, serverID);
|
||||||
|
} else if (tssPairID.present()) {
|
||||||
|
tssMapDB.erase(tr, tssPairID.get());
|
||||||
|
}
|
||||||
|
|
||||||
retry = true;
|
retry = true;
|
||||||
wait(tr.commit());
|
wait(tr->commit());
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
state Error err = e;
|
state Error err = e;
|
||||||
wait(tr.onError(e));
|
wait(tr->onError(e));
|
||||||
TraceEvent("RemoveStorageServerRetrying").error(err);
|
TraceEvent("RemoveStorageServerRetrying").error(err);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1180,11 +1355,20 @@ ACTOR Future<Void> moveKeys(Database cx,
|
||||||
const DDEnabledState* ddEnabledState) {
|
const DDEnabledState* ddEnabledState) {
|
||||||
ASSERT(destinationTeam.size());
|
ASSERT(destinationTeam.size());
|
||||||
std::sort(destinationTeam.begin(), destinationTeam.end());
|
std::sort(destinationTeam.begin(), destinationTeam.end());
|
||||||
wait(startMoveKeys(
|
|
||||||
cx, keys, destinationTeam, lock, startMoveKeysParallelismLock, relocationIntervalId, ddEnabledState));
|
state std::map<UID, StorageServerInterface> tssMapping;
|
||||||
|
|
||||||
|
wait(startMoveKeys(cx,
|
||||||
|
keys,
|
||||||
|
destinationTeam,
|
||||||
|
lock,
|
||||||
|
startMoveKeysParallelismLock,
|
||||||
|
relocationIntervalId,
|
||||||
|
&tssMapping,
|
||||||
|
ddEnabledState));
|
||||||
|
|
||||||
state Future<Void> completionSignaller =
|
state Future<Void> completionSignaller =
|
||||||
checkFetchingState(cx, healthyDestinations, keys, dataMovementComplete, relocationIntervalId);
|
checkFetchingState(cx, healthyDestinations, keys, dataMovementComplete, relocationIntervalId, tssMapping);
|
||||||
|
|
||||||
wait(finishMoveKeys(cx,
|
wait(finishMoveKeys(cx,
|
||||||
keys,
|
keys,
|
||||||
|
@ -1193,6 +1377,7 @@ ACTOR Future<Void> moveKeys(Database cx,
|
||||||
finishMoveKeysParallelismLock,
|
finishMoveKeysParallelismLock,
|
||||||
hasRemote,
|
hasRemote,
|
||||||
relocationIntervalId,
|
relocationIntervalId,
|
||||||
|
tssMapping,
|
||||||
ddEnabledState));
|
ddEnabledState));
|
||||||
|
|
||||||
// This is defensive, but make sure that we always say that the movement is complete before moveKeys completes
|
// This is defensive, but make sure that we always say that the movement is complete before moveKeys completes
|
||||||
|
@ -1228,6 +1413,13 @@ void seedShardServers(Arena& arena, CommitTransactionRef& tr, vector<StorageServ
|
||||||
for (auto& s : servers) {
|
for (auto& s : servers) {
|
||||||
tr.set(arena, serverTagKeyFor(s.id()), serverTagValue(server_tag[s.id()]));
|
tr.set(arena, serverTagKeyFor(s.id()), serverTagValue(server_tag[s.id()]));
|
||||||
tr.set(arena, serverListKeyFor(s.id()), serverListValue(s));
|
tr.set(arena, serverListKeyFor(s.id()), serverListValue(s));
|
||||||
|
if (SERVER_KNOBS->TSS_HACK_IDENTITY_MAPPING) {
|
||||||
|
// THIS SHOULD NEVER BE ENABLED IN ANY NON-TESTING ENVIRONMENT
|
||||||
|
TraceEvent(SevError, "TSSIdentityMappingEnabled");
|
||||||
|
// hack key-backed map here since we can't really change CommitTransactionRef to a RYW transaction
|
||||||
|
Key uidRef = Codec<UID>::pack(s.id()).pack();
|
||||||
|
tr.set(arena, uidRef.withPrefix(tssMappingKeys.begin), uidRef);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<Tag> serverTags;
|
std::vector<Tag> serverTags;
|
||||||
|
|
|
@ -89,13 +89,14 @@ ACTOR Future<std::pair<Version, Tag>> addStorageServer(Database cx, StorageServe
|
||||||
|
|
||||||
ACTOR Future<Void> removeStorageServer(Database cx,
|
ACTOR Future<Void> removeStorageServer(Database cx,
|
||||||
UID serverID,
|
UID serverID,
|
||||||
|
Optional<UID> tssPairID, // if serverID is a tss, set to its ss pair id
|
||||||
MoveKeysLock lock,
|
MoveKeysLock lock,
|
||||||
const DDEnabledState* ddEnabledState);
|
const DDEnabledState* ddEnabledState);
|
||||||
// Removes the given storage server permanently from the database. It must already
|
// Removes the given storage server permanently from the database. It must already
|
||||||
// have no shards assigned to it. The storage server MUST NOT be added again after this
|
// have no shards assigned to it. The storage server MUST NOT be added again after this
|
||||||
// (though a new storage server with a new unique ID may be recruited from the same fdbserver).
|
// (though a new storage server with a new unique ID may be recruited from the same fdbserver).
|
||||||
|
|
||||||
ACTOR Future<bool> canRemoveStorageServer(Transaction* tr, UID serverID);
|
ACTOR Future<bool> canRemoveStorageServer(Reference<ReadYourWritesTransaction> tr, UID serverID);
|
||||||
// Returns true if the given storage server has no keys assigned to it and may be safely removed
|
// Returns true if the given storage server has no keys assigned to it and may be safely removed
|
||||||
// Obviously that could change later!
|
// Obviously that could change later!
|
||||||
ACTOR Future<Void> removeKeysFromFailedServer(Database cx,
|
ACTOR Future<Void> removeKeysFromFailedServer(Database cx,
|
||||||
|
|
|
@ -842,7 +842,7 @@ void commitMessages(Reference<LogData> self,
|
||||||
TEST(true); // Splitting commit messages across multiple blocks
|
TEST(true); // Splitting commit messages across multiple blocks
|
||||||
messages1 = StringRef(block.end(), bytes);
|
messages1 = StringRef(block.end(), bytes);
|
||||||
block.append(block.arena(), messages.begin(), bytes);
|
block.append(block.arena(), messages.begin(), bytes);
|
||||||
self->messageBlocks.push_back(std::make_pair(version, block));
|
self->messageBlocks.emplace_back(version, block);
|
||||||
addedBytes += int64_t(block.size()) * SERVER_KNOBS->TLOG_MESSAGE_BLOCK_OVERHEAD_FACTOR;
|
addedBytes += int64_t(block.size()) * SERVER_KNOBS->TLOG_MESSAGE_BLOCK_OVERHEAD_FACTOR;
|
||||||
messages = messages.substr(bytes);
|
messages = messages.substr(bytes);
|
||||||
}
|
}
|
||||||
|
@ -855,7 +855,7 @@ void commitMessages(Reference<LogData> self,
|
||||||
// Copy messages into block
|
// Copy messages into block
|
||||||
ASSERT(messages.size() <= block.capacity() - block.size());
|
ASSERT(messages.size() <= block.capacity() - block.size());
|
||||||
block.append(block.arena(), messages.begin(), messages.size());
|
block.append(block.arena(), messages.begin(), messages.size());
|
||||||
self->messageBlocks.push_back(std::make_pair(version, block));
|
self->messageBlocks.emplace_back(version, block);
|
||||||
addedBytes += int64_t(block.size()) * SERVER_KNOBS->TLOG_MESSAGE_BLOCK_OVERHEAD_FACTOR;
|
addedBytes += int64_t(block.size()) * SERVER_KNOBS->TLOG_MESSAGE_BLOCK_OVERHEAD_FACTOR;
|
||||||
messages = StringRef(block.end() - messages.size(), messages.size());
|
messages = StringRef(block.end() - messages.size(), messages.size());
|
||||||
|
|
||||||
|
@ -873,7 +873,7 @@ void commitMessages(Reference<LogData> self,
|
||||||
int offs = tag->messageOffsets[m];
|
int offs = tag->messageOffsets[m];
|
||||||
uint8_t const* p =
|
uint8_t const* p =
|
||||||
offs < messages1.size() ? messages1.begin() + offs : messages.begin() + offs - messages1.size();
|
offs < messages1.size() ? messages1.begin() + offs : messages.begin() + offs - messages1.size();
|
||||||
tsm->value.version_messages.push_back(std::make_pair(version, LengthPrefixedStringRef((uint32_t*)p)));
|
tsm->value.version_messages.emplace_back(version, LengthPrefixedStringRef((uint32_t*)p));
|
||||||
if (tsm->value.version_messages.back().second.expectedSize() > SERVER_KNOBS->MAX_MESSAGE_SIZE) {
|
if (tsm->value.version_messages.back().second.expectedSize() > SERVER_KNOBS->MAX_MESSAGE_SIZE) {
|
||||||
TraceEvent(SevWarnAlways, "LargeMessage")
|
TraceEvent(SevWarnAlways, "LargeMessage")
|
||||||
.detail("Size", tsm->value.version_messages.back().second.expectedSize());
|
.detail("Size", tsm->value.version_messages.back().second.expectedSize());
|
||||||
|
|
|
@ -158,6 +158,7 @@ struct ProxyCommitData {
|
||||||
EventMetricHandle<SingleKeyMutation> singleKeyMutationEvent;
|
EventMetricHandle<SingleKeyMutation> singleKeyMutationEvent;
|
||||||
|
|
||||||
std::map<UID, Reference<StorageInfo>> storageCache;
|
std::map<UID, Reference<StorageInfo>> storageCache;
|
||||||
|
std::unordered_map<UID, StorageServerInterface> tssMapping;
|
||||||
std::map<Tag, Version> tag_popped;
|
std::map<Tag, Version> tag_popped;
|
||||||
Deque<std::pair<Version, Version>> txsPopVersions;
|
Deque<std::pair<Version, Version>> txsPopVersions;
|
||||||
Version lastTxsPop;
|
Version lastTxsPop;
|
||||||
|
|
|
@ -26,6 +26,7 @@
|
||||||
#include "fdbclient/NativeAPI.actor.h"
|
#include "fdbclient/NativeAPI.actor.h"
|
||||||
#include "fdbclient/ReadYourWrites.h"
|
#include "fdbclient/ReadYourWrites.h"
|
||||||
#include "fdbclient/RunTransaction.actor.h"
|
#include "fdbclient/RunTransaction.actor.h"
|
||||||
|
#include "fdbserver/Knobs.h"
|
||||||
#include "fdbserver/TesterInterface.actor.h"
|
#include "fdbserver/TesterInterface.actor.h"
|
||||||
#include "fdbserver/WorkerInterface.actor.h"
|
#include "fdbserver/WorkerInterface.actor.h"
|
||||||
#include "fdbserver/ServerDBInfo.h"
|
#include "fdbserver/ServerDBInfo.h"
|
||||||
|
@ -308,9 +309,13 @@ ACTOR Future<int64_t> getMaxStorageServerQueueSize(Database cx, Reference<AsyncV
|
||||||
.detail("SS", servers[i].id());
|
.detail("SS", servers[i].id());
|
||||||
throw attribute_not_found();
|
throw attribute_not_found();
|
||||||
}
|
}
|
||||||
messages.push_back(timeoutError(itr->second.eventLogRequest.getReply(
|
// Ignore TSS in add delay mode since it can purposefully freeze forever
|
||||||
EventLogRequest(StringRef(servers[i].id().toString() + "/StorageMetrics"))),
|
if (!servers[i].isTss() || !g_network->isSimulated() ||
|
||||||
1.0));
|
g_simulator.tssMode != ISimulator::TSSMode::EnabledAddDelay) {
|
||||||
|
messages.push_back(timeoutError(itr->second.eventLogRequest.getReply(EventLogRequest(
|
||||||
|
StringRef(servers[i].id().toString() + "/StorageMetrics"))),
|
||||||
|
1.0));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
wait(waitForAll(messages));
|
wait(waitForAll(messages));
|
||||||
|
@ -516,7 +521,15 @@ ACTOR Future<bool> getStorageServersRecruiting(Database cx, WorkerInterface dist
|
||||||
1.0));
|
1.0));
|
||||||
|
|
||||||
TraceEvent("StorageServersRecruiting").detail("Message", recruitingMessage.toString());
|
TraceEvent("StorageServersRecruiting").detail("Message", recruitingMessage.toString());
|
||||||
return recruitingMessage.getValue("State") == "Recruiting";
|
|
||||||
|
if (recruitingMessage.getValue("State") == "Recruiting") {
|
||||||
|
std::string tssValue;
|
||||||
|
// if we're tss recruiting, that's fine because that can block indefinitely if only 1 free storage process
|
||||||
|
if (!recruitingMessage.tryGetValue("IsTSS", tssValue) || tssValue == "False") {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
TraceEvent("QuietDatabaseFailure", distributorWorker.id())
|
TraceEvent("QuietDatabaseFailure", distributorWorker.id())
|
||||||
.detail("Reason", "Failed to extract StorageServersRecruiting")
|
.detail("Reason", "Failed to extract StorageServersRecruiting")
|
||||||
|
@ -586,6 +599,10 @@ ACTOR Future<Void> waitForQuietDatabase(Database cx,
|
||||||
if (g_network->isSimulated())
|
if (g_network->isSimulated())
|
||||||
wait(delay(5.0));
|
wait(delay(5.0));
|
||||||
|
|
||||||
|
// The quiet database check (which runs at the end of every test) will always time out due to active data movement.
|
||||||
|
// To get around this, quiet Database will disable the perpetual wiggle in the setup phase.
|
||||||
|
wait(setPerpetualStorageWiggle(cx, false, true));
|
||||||
|
|
||||||
// Require 3 consecutive successful quiet database checks spaced 2 second apart
|
// Require 3 consecutive successful quiet database checks spaced 2 second apart
|
||||||
state int numSuccesses = 0;
|
state int numSuccesses = 0;
|
||||||
|
|
||||||
|
|
|
@ -54,7 +54,9 @@ StringRef radix_join(const StringRef& key1, const StringRef& key2, Arena& arena)
|
||||||
uint8_t* s = new (arena) uint8_t[rsize];
|
uint8_t* s = new (arena) uint8_t[rsize];
|
||||||
|
|
||||||
memcpy(s, key1.begin(), key1.size());
|
memcpy(s, key1.begin(), key1.size());
|
||||||
memcpy(s + key1.size(), key2.begin(), key2.size());
|
if (key2.size() > 0) {
|
||||||
|
memcpy(s + key1.size(), key2.begin(), key2.size());
|
||||||
|
}
|
||||||
|
|
||||||
return StringRef(s, rsize);
|
return StringRef(s, rsize);
|
||||||
}
|
}
|
||||||
|
@ -591,7 +593,9 @@ StringRef radix_tree::iterator::getKey(uint8_t* content) const {
|
||||||
auto node = m_pointee;
|
auto node = m_pointee;
|
||||||
uint32_t pos = m_pointee->m_depth;
|
uint32_t pos = m_pointee->m_depth;
|
||||||
while (true) {
|
while (true) {
|
||||||
memcpy(content + pos, node->getKey().begin(), node->getKeySize());
|
if (node->getKeySize() > 0) {
|
||||||
|
memcpy(content + pos, node->getKey().begin(), node->getKeySize());
|
||||||
|
}
|
||||||
node = node->m_parent;
|
node = node->m_parent;
|
||||||
if (node == nullptr || pos <= 0)
|
if (node == nullptr || pos <= 0)
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -719,9 +719,11 @@ ACTOR Future<Void> trackEachStorageServer(
|
||||||
when(state std::pair<UID, Optional<StorageServerInterface>> change = waitNext(serverChanges)) {
|
when(state std::pair<UID, Optional<StorageServerInterface>> change = waitNext(serverChanges)) {
|
||||||
wait(delay(0)); // prevent storageServerTracker from getting cancelled while on the call stack
|
wait(delay(0)); // prevent storageServerTracker from getting cancelled while on the call stack
|
||||||
if (change.second.present()) {
|
if (change.second.present()) {
|
||||||
auto& a = actors[change.first];
|
if (!change.second.get().isTss()) {
|
||||||
a = Future<Void>();
|
auto& a = actors[change.first];
|
||||||
a = splitError(trackStorageServerQueueInfo(self, change.second.get()), err);
|
a = Future<Void>();
|
||||||
|
a = splitError(trackStorageServerQueueInfo(self, change.second.get()), err);
|
||||||
|
}
|
||||||
} else
|
} else
|
||||||
actors.erase(change.first);
|
actors.erase(change.first);
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#ifndef FDBSERVER_RATEKEEPERINTERFACE_H
|
#ifndef FDBSERVER_RATEKEEPERINTERFACE_H
|
||||||
#define FDBSERVER_RATEKEEPERINTERFACE_H
|
#define FDBSERVER_RATEKEEPERINTERFACE_H
|
||||||
|
|
||||||
|
#include "fdbclient/CommitProxyInterface.h"
|
||||||
#include "fdbclient/FDBTypes.h"
|
#include "fdbclient/FDBTypes.h"
|
||||||
#include "fdbrpc/fdbrpc.h"
|
#include "fdbrpc/fdbrpc.h"
|
||||||
#include "fdbrpc/Locality.h"
|
#include "fdbrpc/Locality.h"
|
||||||
|
@ -49,29 +50,6 @@ struct RatekeeperInterface {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ClientTagThrottleLimits {
|
|
||||||
double tpsRate;
|
|
||||||
double expiration;
|
|
||||||
|
|
||||||
ClientTagThrottleLimits() : tpsRate(0), expiration(0) {}
|
|
||||||
ClientTagThrottleLimits(double tpsRate, double expiration) : tpsRate(tpsRate), expiration(expiration) {}
|
|
||||||
|
|
||||||
template <class Archive>
|
|
||||||
void serialize(Archive& ar) {
|
|
||||||
// Convert expiration time to a duration to avoid clock differences
|
|
||||||
double duration = 0;
|
|
||||||
if (!ar.isDeserializing) {
|
|
||||||
duration = expiration - now();
|
|
||||||
}
|
|
||||||
|
|
||||||
serializer(ar, tpsRate, duration);
|
|
||||||
|
|
||||||
if (ar.isDeserializing) {
|
|
||||||
expiration = now() + duration;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct TransactionCommitCostEstimation {
|
struct TransactionCommitCostEstimation {
|
||||||
int opsSum = 0;
|
int opsSum = 0;
|
||||||
uint64_t costSum = 0;
|
uint64_t costSum = 0;
|
||||||
|
@ -91,17 +69,6 @@ struct TransactionCommitCostEstimation {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ClientTrCommitCostEstimation {
|
|
||||||
int opsCount = 0;
|
|
||||||
uint64_t writeCosts = 0;
|
|
||||||
std::deque<std::pair<int, uint64_t>> clearIdxCosts;
|
|
||||||
uint32_t expensiveCostEstCount = 0;
|
|
||||||
template <class Ar>
|
|
||||||
void serialize(Ar& ar) {
|
|
||||||
serializer(ar, opsCount, writeCosts, clearIdxCosts, expensiveCostEstCount);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct GetRateInfoReply {
|
struct GetRateInfoReply {
|
||||||
constexpr static FileIdentifier file_identifier = 7845006;
|
constexpr static FileIdentifier file_identifier = 7845006;
|
||||||
double transactionRate;
|
double transactionRate;
|
||||||
|
|
|
@ -233,7 +233,7 @@ ACTOR Future<Void> resolveBatch(Reference<Resolver> self, ResolveTransactionBatc
|
||||||
self->resolvedStateBytes += stateBytes;
|
self->resolvedStateBytes += stateBytes;
|
||||||
|
|
||||||
if (stateBytes > 0)
|
if (stateBytes > 0)
|
||||||
self->recentStateTransactionSizes.push_back(std::make_pair(req.version, stateBytes));
|
self->recentStateTransactionSizes.emplace_back(req.version, stateBytes);
|
||||||
|
|
||||||
ASSERT(req.version >= firstUnseenVersion);
|
ASSERT(req.version >= firstUnseenVersion);
|
||||||
ASSERT(firstUnseenVersion >= self->debugMinRecentStateVersion);
|
ASSERT(firstUnseenVersion >= self->debugMinRecentStateVersion);
|
||||||
|
|
|
@ -35,10 +35,10 @@
|
||||||
#include "fdbrpc/Locality.h"
|
#include "fdbrpc/Locality.h"
|
||||||
#include "fdbrpc/Stats.h"
|
#include "fdbrpc/Stats.h"
|
||||||
#include "fdbserver/CoordinationInterface.h"
|
#include "fdbserver/CoordinationInterface.h"
|
||||||
#include "fdbclient/RestoreWorkerInterface.actor.h"
|
|
||||||
#include "fdbserver/MutationTracking.h"
|
#include "fdbserver/MutationTracking.h"
|
||||||
#include "fdbserver/RestoreUtil.h"
|
#include "fdbserver/RestoreUtil.h"
|
||||||
#include "fdbserver/RestoreRoleCommon.actor.h"
|
#include "fdbserver/RestoreRoleCommon.actor.h"
|
||||||
|
#include "fdbserver/RestoreWorkerInterface.actor.h"
|
||||||
|
|
||||||
#include "flow/actorcompiler.h" // has to be last include
|
#include "flow/actorcompiler.h" // has to be last include
|
||||||
|
|
||||||
|
|
|
@ -35,6 +35,7 @@
|
||||||
#include "fdbclient/NativeAPI.actor.h"
|
#include "fdbclient/NativeAPI.actor.h"
|
||||||
#include "fdbrpc/IAsyncFile.h"
|
#include "fdbrpc/IAsyncFile.h"
|
||||||
#include "fdbclient/BackupAgent.actor.h"
|
#include "fdbclient/BackupAgent.actor.h"
|
||||||
|
#include "fdbserver/Knobs.h"
|
||||||
|
|
||||||
#include "flow/actorcompiler.h" // has to be last include
|
#include "flow/actorcompiler.h" // has to be last include
|
||||||
|
|
||||||
|
@ -394,4 +395,4 @@ Future<Void> sendBatchRequests(RequestStream<Request> Interface::*channel,
|
||||||
}
|
}
|
||||||
|
|
||||||
#include "flow/unactorcompiler.h"
|
#include "flow/unactorcompiler.h"
|
||||||
#endif // FDBSERVER_RESTORECOMMON_ACTOR_H
|
#endif // FDBSERVER_RESTORECOMMON_ACTOR_H
|
||||||
|
|
|
@ -34,10 +34,10 @@
|
||||||
#include "fdbrpc/Stats.h"
|
#include "fdbrpc/Stats.h"
|
||||||
#include "fdbserver/CoordinationInterface.h"
|
#include "fdbserver/CoordinationInterface.h"
|
||||||
#include "fdbrpc/Locality.h"
|
#include "fdbrpc/Locality.h"
|
||||||
#include "fdbclient/RestoreWorkerInterface.actor.h"
|
|
||||||
#include "fdbserver/RestoreUtil.h"
|
#include "fdbserver/RestoreUtil.h"
|
||||||
#include "fdbserver/RestoreCommon.actor.h"
|
#include "fdbserver/RestoreCommon.actor.h"
|
||||||
#include "fdbserver/RestoreRoleCommon.actor.h"
|
#include "fdbserver/RestoreRoleCommon.actor.h"
|
||||||
|
#include "fdbserver/RestoreWorkerInterface.actor.h"
|
||||||
#include "fdbclient/BackupContainer.h"
|
#include "fdbclient/BackupContainer.h"
|
||||||
|
|
||||||
#include "flow/actorcompiler.h" // has to be last include
|
#include "flow/actorcompiler.h" // has to be last include
|
||||||
|
|
|
@ -37,7 +37,7 @@
|
||||||
#include "fdbrpc/Locality.h"
|
#include "fdbrpc/Locality.h"
|
||||||
#include "fdbrpc/Stats.h"
|
#include "fdbrpc/Stats.h"
|
||||||
#include "fdbserver/CoordinationInterface.h"
|
#include "fdbserver/CoordinationInterface.h"
|
||||||
#include "fdbclient/RestoreWorkerInterface.actor.h"
|
#include "fdbserver/RestoreWorkerInterface.actor.h"
|
||||||
#include "fdbserver/RestoreUtil.h"
|
#include "fdbserver/RestoreUtil.h"
|
||||||
|
|
||||||
#include "flow/actorcompiler.h" // has to be last include
|
#include "flow/actorcompiler.h" // has to be last include
|
||||||
|
|
|
@ -28,6 +28,7 @@
|
||||||
|
|
||||||
#include "fdbclient/Tuple.h"
|
#include "fdbclient/Tuple.h"
|
||||||
#include "fdbclient/CommitTransaction.h"
|
#include "fdbclient/CommitTransaction.h"
|
||||||
|
#include "fdbclient/RestoreInterface.h"
|
||||||
#include "flow/flow.h"
|
#include "flow/flow.h"
|
||||||
#include "fdbrpc/TimedRequest.h"
|
#include "fdbrpc/TimedRequest.h"
|
||||||
#include "fdbrpc/fdbrpc.h"
|
#include "fdbrpc/fdbrpc.h"
|
||||||
|
@ -88,26 +89,6 @@ std::string getHexString(StringRef input);
|
||||||
|
|
||||||
bool debugFRMutation(const char* context, Version version, MutationRef const& mutation);
|
bool debugFRMutation(const char* context, Version version, MutationRef const& mutation);
|
||||||
|
|
||||||
struct RestoreCommonReply {
|
|
||||||
constexpr static FileIdentifier file_identifier = 5808787;
|
|
||||||
UID id; // unique ID of the server who sends the reply
|
|
||||||
bool isDuplicated;
|
|
||||||
|
|
||||||
RestoreCommonReply() = default;
|
|
||||||
explicit RestoreCommonReply(UID id, bool isDuplicated = false) : id(id), isDuplicated(isDuplicated) {}
|
|
||||||
|
|
||||||
std::string toString() const {
|
|
||||||
std::stringstream ss;
|
|
||||||
ss << "ServerNodeID:" << id.toString() << " isDuplicated:" << isDuplicated;
|
|
||||||
return ss.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class Ar>
|
|
||||||
void serialize(Ar& ar) {
|
|
||||||
serializer(ar, id, isDuplicated);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct RestoreSimpleRequest : TimedRequest {
|
struct RestoreSimpleRequest : TimedRequest {
|
||||||
constexpr static FileIdentifier file_identifier = 16448937;
|
constexpr static FileIdentifier file_identifier = 16448937;
|
||||||
|
|
||||||
|
|
|
@ -189,7 +189,7 @@ ACTOR Future<Void> monitorWorkerLiveness(Reference<RestoreWorkerData> self) {
|
||||||
loop {
|
loop {
|
||||||
std::vector<std::pair<UID, RestoreSimpleRequest>> requests;
|
std::vector<std::pair<UID, RestoreSimpleRequest>> requests;
|
||||||
for (auto& worker : self->workerInterfaces) {
|
for (auto& worker : self->workerInterfaces) {
|
||||||
requests.push_back(std::make_pair(worker.first, RestoreSimpleRequest()));
|
requests.emplace_back(worker.first, RestoreSimpleRequest());
|
||||||
}
|
}
|
||||||
wait(sendBatchRequests(&RestoreWorkerInterface::heartbeat, self->workerInterfaces, requests));
|
wait(sendBatchRequests(&RestoreWorkerInterface::heartbeat, self->workerInterfaces, requests));
|
||||||
wait(delay(60.0));
|
wait(delay(60.0));
|
||||||
|
|
|
@ -33,12 +33,12 @@
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <cstdarg>
|
#include <cstdarg>
|
||||||
|
|
||||||
#include "fdbclient/RestoreWorkerInterface.actor.h"
|
|
||||||
#include "fdbserver/RestoreUtil.h"
|
#include "fdbserver/RestoreUtil.h"
|
||||||
#include "fdbserver/RestoreCommon.actor.h"
|
#include "fdbserver/RestoreCommon.actor.h"
|
||||||
#include "fdbserver/RestoreRoleCommon.actor.h"
|
#include "fdbserver/RestoreRoleCommon.actor.h"
|
||||||
#include "fdbserver/RestoreLoader.actor.h"
|
#include "fdbserver/RestoreLoader.actor.h"
|
||||||
#include "fdbserver/RestoreApplier.actor.h"
|
#include "fdbserver/RestoreApplier.actor.h"
|
||||||
|
#include "fdbserver/RestoreWorkerInterface.actor.h"
|
||||||
|
|
||||||
// Each restore worker (a process) is assigned for a role.
|
// Each restore worker (a process) is assigned for a role.
|
||||||
// MAYBE Later: We will support multiple restore roles on a worker
|
// MAYBE Later: We will support multiple restore roles on a worker
|
||||||
|
|
|
@ -0,0 +1,102 @@
|
||||||
|
/*
|
||||||
|
* RestoreWorkerInterface.actor.cpp
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "fdbserver/RestoreWorkerInterface.actor.h"
|
||||||
|
#include "flow/actorcompiler.h" // must be last include
|
||||||
|
|
||||||
|
const KeyRef restoreLeaderKey = "\xff\x02/restoreLeader"_sr;
|
||||||
|
const KeyRangeRef restoreWorkersKeys("\xff\x02/restoreWorkers/"_sr, "\xff\x02/restoreWorkers0"_sr);
|
||||||
|
const KeyRef restoreStatusKey = "\xff\x02/restoreStatus/"_sr;
|
||||||
|
const KeyRangeRef restoreApplierKeys("\xff\x02/restoreApplier/"_sr, "\xff\x02/restoreApplier0"_sr);
|
||||||
|
const KeyRef restoreApplierTxnValue = "1"_sr;
|
||||||
|
|
||||||
|
// restoreApplierKeys: track atomic transaction progress to ensure applying atomicOp exactly once
|
||||||
|
// Version and batchIndex are passed in as LittleEndian,
|
||||||
|
// they must be converted to BigEndian to maintain ordering in lexical order
|
||||||
|
const Key restoreApplierKeyFor(UID const& applierID, int64_t batchIndex, Version version) {
|
||||||
|
BinaryWriter wr(Unversioned());
|
||||||
|
wr.serializeBytes(restoreApplierKeys.begin);
|
||||||
|
wr << applierID << bigEndian64(batchIndex) << bigEndian64(version);
|
||||||
|
return wr.toValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::tuple<UID, int64_t, Version> decodeRestoreApplierKey(ValueRef const& key) {
|
||||||
|
BinaryReader rd(key, Unversioned());
|
||||||
|
UID applierID;
|
||||||
|
int64_t batchIndex;
|
||||||
|
Version version;
|
||||||
|
rd >> applierID >> batchIndex >> version;
|
||||||
|
return std::make_tuple(applierID, bigEndian64(batchIndex), bigEndian64(version));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Encode restore worker key for workerID
|
||||||
|
const Key restoreWorkerKeyFor(UID const& workerID) {
|
||||||
|
BinaryWriter wr(Unversioned());
|
||||||
|
wr.serializeBytes(restoreWorkersKeys.begin);
|
||||||
|
wr << workerID;
|
||||||
|
return wr.toValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Encode restore agent value
|
||||||
|
const Value restoreWorkerInterfaceValue(RestoreWorkerInterface const& cmdInterf) {
|
||||||
|
BinaryWriter wr(IncludeVersion(ProtocolVersion::withRestoreWorkerInterfaceValue()));
|
||||||
|
wr << cmdInterf;
|
||||||
|
return wr.toValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
RestoreWorkerInterface decodeRestoreWorkerInterfaceValue(ValueRef const& value) {
|
||||||
|
RestoreWorkerInterface s;
|
||||||
|
BinaryReader reader(value, IncludeVersion());
|
||||||
|
reader >> s;
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
Value restoreRequestDoneVersionValue(Version readVersion) {
|
||||||
|
BinaryWriter wr(IncludeVersion(ProtocolVersion::withRestoreRequestDoneVersionValue()));
|
||||||
|
wr << readVersion;
|
||||||
|
return wr.toValue();
|
||||||
|
}
|
||||||
|
Version decodeRestoreRequestDoneVersionValue(ValueRef const& value) {
|
||||||
|
Version v;
|
||||||
|
BinaryReader reader(value, IncludeVersion());
|
||||||
|
reader >> v;
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
RestoreRequest decodeRestoreRequestValue(ValueRef const& value) {
|
||||||
|
RestoreRequest s;
|
||||||
|
BinaryReader reader(value, IncludeVersion());
|
||||||
|
reader >> s;
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Register restore performance data to restoreStatus key
|
||||||
|
const Key restoreStatusKeyFor(StringRef statusType) {
|
||||||
|
BinaryWriter wr(Unversioned());
|
||||||
|
wr.serializeBytes(restoreStatusKey);
|
||||||
|
wr << statusType;
|
||||||
|
return wr.toValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
const Value restoreStatusValue(double val) {
|
||||||
|
BinaryWriter wr(IncludeVersion(ProtocolVersion::withRestoreStatusValue()));
|
||||||
|
wr << StringRef(std::to_string(val));
|
||||||
|
return wr.toValue();
|
||||||
|
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue