Merge branch 'master' of github.com:apple/foundationdb into refactor-fdbcli-2
This commit is contained in:
commit
11eed5bc71
|
@ -7,7 +7,7 @@ bindings/java/foundationdb-client*.jar
|
|||
bindings/java/foundationdb-tests*.jar
|
||||
bindings/java/fdb-java-*-sources.jar
|
||||
packaging/msi/FDBInstaller.msi
|
||||
|
||||
builds/
|
||||
# Generated source, build, and packaging files
|
||||
*.g.cpp
|
||||
*.g.h
|
||||
|
|
|
@ -263,13 +263,15 @@ TEST_CASE("fdb_future_set_callback") {
|
|||
&context));
|
||||
|
||||
fdb_error_t err = wait_future(f1);
|
||||
|
||||
context.event.wait(); // Wait until callback is called
|
||||
|
||||
if (err) {
|
||||
fdb::EmptyFuture f2 = tr.on_error(err);
|
||||
fdb_check(wait_future(f2));
|
||||
continue;
|
||||
}
|
||||
|
||||
context.event.wait();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -515,10 +517,10 @@ TEST_CASE("write system key") {
|
|||
fdb::Transaction tr(db);
|
||||
|
||||
std::string syskey("\xff\x02");
|
||||
fdb_check(tr.set_option(FDB_TR_OPTION_ACCESS_SYSTEM_KEYS, nullptr, 0));
|
||||
tr.set(syskey, "bar");
|
||||
|
||||
while (1) {
|
||||
fdb_check(tr.set_option(FDB_TR_OPTION_ACCESS_SYSTEM_KEYS, nullptr, 0));
|
||||
tr.set(syskey, "bar");
|
||||
fdb::EmptyFuture f1 = tr.commit();
|
||||
|
||||
fdb_error_t err = wait_future(f1);
|
||||
|
@ -949,16 +951,25 @@ TEST_CASE("fdb_transaction_clear") {
|
|||
}
|
||||
|
||||
TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_ADD") {
|
||||
insert_data(db, create_data({ { "foo", "a" } }));
|
||||
insert_data(db, create_data({ { "foo", "\x00" } }));
|
||||
|
||||
fdb::Transaction tr(db);
|
||||
int8_t param = 1;
|
||||
int potentialCommitCount = 0;
|
||||
while (1) {
|
||||
tr.atomic_op(key("foo"), (const uint8_t*)¶m, sizeof(param), FDB_MUTATION_TYPE_ADD);
|
||||
if (potentialCommitCount + 1 == 256) {
|
||||
// Trying to commit again might overflow the one unsigned byte we're looking at
|
||||
break;
|
||||
}
|
||||
++potentialCommitCount;
|
||||
fdb::EmptyFuture f1 = tr.commit();
|
||||
|
||||
fdb_error_t err = wait_future(f1);
|
||||
if (err) {
|
||||
if (fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE_NOT_COMMITTED, err)) {
|
||||
--potentialCommitCount;
|
||||
}
|
||||
fdb::EmptyFuture f2 = tr.on_error(err);
|
||||
fdb_check(wait_future(f2));
|
||||
continue;
|
||||
|
@ -969,7 +980,8 @@ TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_ADD") {
|
|||
auto value = get_value(key("foo"), /* snapshot */ false, {});
|
||||
REQUIRE(value.has_value());
|
||||
CHECK(value->size() == 1);
|
||||
CHECK(value->data()[0] == 'b'); // incrementing 'a' results in 'b'
|
||||
CHECK(uint8_t(value->data()[0]) > 0);
|
||||
CHECK(uint8_t(value->data()[0]) <= potentialCommitCount);
|
||||
}
|
||||
|
||||
TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_BIT_AND") {
|
||||
|
@ -1139,14 +1151,19 @@ TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_BIT_XOR") {
|
|||
|
||||
fdb::Transaction tr(db);
|
||||
char param[] = { 'a', 'd' };
|
||||
int potentialCommitCount = 0;
|
||||
while (1) {
|
||||
tr.atomic_op(key("foo"), (const uint8_t*)"b", 1, FDB_MUTATION_TYPE_BIT_XOR);
|
||||
tr.atomic_op(key("bar"), (const uint8_t*)param, 2, FDB_MUTATION_TYPE_BIT_XOR);
|
||||
tr.atomic_op(key("baz"), (const uint8_t*)"d", 1, FDB_MUTATION_TYPE_BIT_XOR);
|
||||
++potentialCommitCount;
|
||||
fdb::EmptyFuture f1 = tr.commit();
|
||||
|
||||
fdb_error_t err = wait_future(f1);
|
||||
if (err) {
|
||||
if (fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE_NOT_COMMITTED, err)) {
|
||||
--potentialCommitCount;
|
||||
}
|
||||
fdb::EmptyFuture f2 = tr.on_error(err);
|
||||
fdb_check(wait_future(f2));
|
||||
continue;
|
||||
|
@ -1154,6 +1171,11 @@ TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_BIT_XOR") {
|
|||
break;
|
||||
}
|
||||
|
||||
if (potentialCommitCount != 1) {
|
||||
MESSAGE("Transaction may not have committed exactly once. Suppressing assertions");
|
||||
return;
|
||||
}
|
||||
|
||||
auto value = get_value(key("foo"), /* snapshot */ false, {});
|
||||
REQUIRE(value.has_value());
|
||||
CHECK(value->size() == 1);
|
||||
|
@ -1204,13 +1226,18 @@ TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_APPEND_IF_FITS") {
|
|||
insert_data(db, create_data({ { "foo", "f" } }));
|
||||
|
||||
fdb::Transaction tr(db);
|
||||
int potentialCommitCount = 0;
|
||||
while (1) {
|
||||
tr.atomic_op(key("foo"), (const uint8_t*)"db", 2, FDB_MUTATION_TYPE_APPEND_IF_FITS);
|
||||
tr.atomic_op(key("bar"), (const uint8_t*)"foundation", 10, FDB_MUTATION_TYPE_APPEND_IF_FITS);
|
||||
++potentialCommitCount;
|
||||
fdb::EmptyFuture f1 = tr.commit();
|
||||
|
||||
fdb_error_t err = wait_future(f1);
|
||||
if (err) {
|
||||
if (fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE_NOT_COMMITTED, err)) {
|
||||
--potentialCommitCount;
|
||||
}
|
||||
fdb::EmptyFuture f2 = tr.on_error(err);
|
||||
fdb_check(wait_future(f2));
|
||||
continue;
|
||||
|
@ -1218,13 +1245,18 @@ TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_APPEND_IF_FITS") {
|
|||
break;
|
||||
}
|
||||
|
||||
auto value = get_value(key("foo"), /* snapshot */ false, {});
|
||||
REQUIRE(value.has_value());
|
||||
CHECK(value->compare("fdb") == 0);
|
||||
auto value_foo = get_value(key("foo"), /* snapshot */ false, {});
|
||||
REQUIRE(value_foo.has_value());
|
||||
|
||||
value = get_value(key("bar"), /* snapshot */ false, {});
|
||||
REQUIRE(value.has_value());
|
||||
CHECK(value->compare("foundation") == 0);
|
||||
auto value_bar = get_value(key("bar"), /* snapshot */ false, {});
|
||||
REQUIRE(value_bar.has_value());
|
||||
|
||||
if (potentialCommitCount != 1) {
|
||||
MESSAGE("Transaction may not have committed exactly once. Suppressing assertions");
|
||||
} else {
|
||||
CHECK(value_foo.value() == "fdb");
|
||||
CHECK(value_bar.value() == "foundation");
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("fdb_transaction_atomic_op FDB_MUTATION_TYPE_MAX") {
|
||||
|
@ -1576,7 +1608,7 @@ TEST_CASE("fdb_transaction_watch max watches") {
|
|||
fdb_check(f1.set_callback(
|
||||
+[](FDBFuture* f, void* param) {
|
||||
fdb_error_t err = fdb_future_get_error(f);
|
||||
if (err != 1101) { // operation_cancelled
|
||||
if (err != /*operation_cancelled*/ 1101 && !fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE, err)) {
|
||||
CHECK(err == 1032); // too_many_watches
|
||||
}
|
||||
auto* event = static_cast<std::shared_ptr<FdbEvent>*>(param);
|
||||
|
@ -1587,7 +1619,7 @@ TEST_CASE("fdb_transaction_watch max watches") {
|
|||
fdb_check(f2.set_callback(
|
||||
+[](FDBFuture* f, void* param) {
|
||||
fdb_error_t err = fdb_future_get_error(f);
|
||||
if (err != 1101) { // operation_cancelled
|
||||
if (err != /*operation_cancelled*/ 1101 && !fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE, err)) {
|
||||
CHECK(err == 1032); // too_many_watches
|
||||
}
|
||||
auto* event = static_cast<std::shared_ptr<FdbEvent>*>(param);
|
||||
|
@ -1598,7 +1630,7 @@ TEST_CASE("fdb_transaction_watch max watches") {
|
|||
fdb_check(f3.set_callback(
|
||||
+[](FDBFuture* f, void* param) {
|
||||
fdb_error_t err = fdb_future_get_error(f);
|
||||
if (err != 1101) { // operation_cancelled
|
||||
if (err != /*operation_cancelled*/ 1101 && !fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE, err)) {
|
||||
CHECK(err == 1032); // too_many_watches
|
||||
}
|
||||
auto* event = static_cast<std::shared_ptr<FdbEvent>*>(param);
|
||||
|
@ -1609,7 +1641,7 @@ TEST_CASE("fdb_transaction_watch max watches") {
|
|||
fdb_check(f4.set_callback(
|
||||
+[](FDBFuture* f, void* param) {
|
||||
fdb_error_t err = fdb_future_get_error(f);
|
||||
if (err != 1101) { // operation_cancelled
|
||||
if (err != /*operation_cancelled*/ 1101 && !fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE, err)) {
|
||||
CHECK(err == 1032); // too_many_watches
|
||||
}
|
||||
auto* event = static_cast<std::shared_ptr<FdbEvent>*>(param);
|
||||
|
@ -1671,7 +1703,7 @@ TEST_CASE("fdb_transaction_cancel") {
|
|||
// ... until the transaction has been reset.
|
||||
tr.reset();
|
||||
fdb::ValueFuture f2 = tr.get("foo", /* snapshot */ false);
|
||||
fdb_check(wait_future(f2));
|
||||
CHECK(wait_future(f2) != 1025); // transaction_cancelled
|
||||
}
|
||||
|
||||
TEST_CASE("fdb_transaction_add_conflict_range") {
|
||||
|
@ -2146,22 +2178,29 @@ TEST_CASE("monitor_network_busyness") {
|
|||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
if (argc != 3 && argc != 4) {
|
||||
if (argc < 3) {
|
||||
std::cout << "Unit tests for the FoundationDB C API.\n"
|
||||
<< "Usage: fdb_c_unit_tests /path/to/cluster_file key_prefix [externalClient]" << std::endl;
|
||||
<< "Usage: fdb_c_unit_tests /path/to/cluster_file key_prefix [externalClient] [doctest args]"
|
||||
<< std::endl;
|
||||
return 1;
|
||||
}
|
||||
fdb_check(fdb_select_api_version(710));
|
||||
if (argc == 4) {
|
||||
if (argc >= 4) {
|
||||
std::string externalClientLibrary = argv[3];
|
||||
fdb_check(fdb_network_set_option(
|
||||
FDBNetworkOption::FDB_NET_OPTION_DISABLE_LOCAL_CLIENT, reinterpret_cast<const uint8_t*>(""), 0));
|
||||
fdb_check(fdb_network_set_option(FDBNetworkOption::FDB_NET_OPTION_EXTERNAL_CLIENT_LIBRARY,
|
||||
reinterpret_cast<const uint8_t*>(externalClientLibrary.c_str()),
|
||||
externalClientLibrary.size()));
|
||||
if (externalClientLibrary.substr(0, 2) != "--") {
|
||||
fdb_check(fdb_network_set_option(
|
||||
FDBNetworkOption::FDB_NET_OPTION_DISABLE_LOCAL_CLIENT, reinterpret_cast<const uint8_t*>(""), 0));
|
||||
fdb_check(fdb_network_set_option(FDBNetworkOption::FDB_NET_OPTION_EXTERNAL_CLIENT_LIBRARY,
|
||||
reinterpret_cast<const uint8_t*>(externalClientLibrary.c_str()),
|
||||
externalClientLibrary.size()));
|
||||
}
|
||||
}
|
||||
|
||||
/* fdb_check(fdb_network_set_option( */
|
||||
/* FDBNetworkOption::FDB_NET_OPTION_CLIENT_BUGGIFY_ENABLE, reinterpret_cast<const uint8_t*>(""), 0)); */
|
||||
|
||||
doctest::Context context;
|
||||
context.applyCommandLine(argc, argv);
|
||||
|
||||
fdb_check(fdb_setup_network());
|
||||
std::thread network_thread{ &fdb_run_network };
|
||||
|
|
|
@ -74,3 +74,12 @@ add_custom_command(OUTPUT ${package_file}
|
|||
add_custom_target(python_package DEPENDS ${package_file})
|
||||
add_dependencies(python_package python_binding)
|
||||
add_dependencies(packages python_package)
|
||||
|
||||
if (NOT WIN32 AND NOT OPEN_FOR_IDE)
|
||||
add_fdbclient_test(
|
||||
NAME fdbcli_tests
|
||||
COMMAND ${CMAKE_SOURCE_DIR}/bindings/python/tests/fdbcli_tests.py
|
||||
${CMAKE_BINARY_DIR}/bin/fdbcli
|
||||
@CLUSTER_FILE@
|
||||
)
|
||||
endif()
|
||||
|
|
|
@ -0,0 +1,93 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import subprocess
|
||||
import logging
|
||||
import functools
|
||||
|
||||
def enable_logging(level=logging.ERROR):
|
||||
"""Enable logging in the function with the specified logging level
|
||||
|
||||
Args:
|
||||
level (logging.<level>, optional): logging level for the decorated function. Defaults to logging.ERROR.
|
||||
"""
|
||||
def func_decorator(func):
|
||||
@functools.wraps(func)
|
||||
def wrapper(*args,**kwargs):
|
||||
# initialize logger
|
||||
logger = logging.getLogger(func.__name__)
|
||||
logger.setLevel(level)
|
||||
# set logging format
|
||||
handler = logging.StreamHandler()
|
||||
handler_format = logging.Formatter('[%(asctime)s] - %(filename)s:%(lineno)d - %(levelname)s - %(name)s - %(message)s')
|
||||
handler.setFormatter(handler_format)
|
||||
handler.setLevel(level)
|
||||
logger.addHandler(handler)
|
||||
# pass the logger to the decorated function
|
||||
result = func(logger, *args,**kwargs)
|
||||
return result
|
||||
return wrapper
|
||||
return func_decorator
|
||||
|
||||
def run_fdbcli_command(*args):
|
||||
"""run the fdbcli statement: fdbcli --exec '<arg1> <arg2> ... <argN>'.
|
||||
|
||||
Returns:
|
||||
string: Console output from fdbcli
|
||||
"""
|
||||
commands = command_template + ["{}".format(' '.join(args))]
|
||||
return subprocess.run(commands, stdout=subprocess.PIPE).stdout.decode('utf-8').strip()
|
||||
|
||||
@enable_logging()
|
||||
def advanceversion(logger):
|
||||
# get current read version
|
||||
version1 = int(run_fdbcli_command('getversion'))
|
||||
logger.debug("Read version: {}".format(version1))
|
||||
# advance version to a much larger value compared to the current version
|
||||
version2 = version1 * 10000
|
||||
logger.debug("Advanced to version: " + str(version2))
|
||||
run_fdbcli_command('advanceversion', str(version2))
|
||||
# after running the advanceversion command,
|
||||
# check the read version is advanced to the specified value
|
||||
version3 = int(run_fdbcli_command('getversion'))
|
||||
logger.debug("Read version: {}".format(version3))
|
||||
assert version3 >= version2
|
||||
# advance version to a smaller value compared to the current version
|
||||
# this should be a no-op
|
||||
run_fdbcli_command('advanceversion', str(version1))
|
||||
# get the current version to make sure the version did not decrease
|
||||
version4 = int(run_fdbcli_command('getversion'))
|
||||
logger.debug("Read version: {}".format(version4))
|
||||
assert version4 >= version3
|
||||
|
||||
@enable_logging()
|
||||
def maintenance(logger):
|
||||
# expected fdbcli output when running 'maintenance' while there's no ongoing maintenance
|
||||
no_maintenance_output = 'No ongoing maintenance.'
|
||||
output1 = run_fdbcli_command('maintenance')
|
||||
assert output1 == no_maintenance_output
|
||||
# set maintenance on a fake zone id for 10 seconds
|
||||
run_fdbcli_command('maintenance', 'on', 'fake_zone_id', '10')
|
||||
# show current maintenance status
|
||||
output2 = run_fdbcli_command('maintenance')
|
||||
logger.debug("Maintenance status: " + output2)
|
||||
items = output2.split(' ')
|
||||
# make sure this specific zone id is under maintenance
|
||||
assert 'fake_zone_id' in items
|
||||
logger.debug("Remaining time(seconds): " + items[-2])
|
||||
assert 0 < int(items[-2]) < 10
|
||||
# turn off maintenance
|
||||
run_fdbcli_command('maintenance', 'off')
|
||||
# check maintenance status
|
||||
output3 = run_fdbcli_command('maintenance')
|
||||
assert output3 == no_maintenance_output
|
||||
|
||||
if __name__ == '__main__':
|
||||
# fdbcli_tests.py <path_to_fdbcli_binary> <path_to_fdb_cluster_file>
|
||||
assert len(sys.argv) == 3, "Please pass arguments: <path_to_fdbcli_binary> <path_to_fdb_cluster_file>"
|
||||
# shell command template
|
||||
command_template = [sys.argv[1], '-C', sys.argv[2], '--exec']
|
||||
# tests for fdbcli commands
|
||||
# assertions will fail if fdbcli does not work as expected
|
||||
advanceversion()
|
||||
maintenance()
|
|
@ -717,7 +717,7 @@ namespace SummarizeTest
|
|||
|
||||
delegate IEnumerable<Magnesium.Event> parseDelegate(System.IO.Stream stream, string file,
|
||||
bool keepOriginalElement = false, double startTime = -1, double endTime = Double.MaxValue,
|
||||
double samplingFactor = 1.0);
|
||||
double samplingFactor = 1.0, Action<string> nonFatalErrorMessage = null);
|
||||
|
||||
static int Summarize(string[] traceFiles, string summaryFileName,
|
||||
string errorFileName, bool? killed, List<string> outputErrors, int? exitCode, long? peakMemory,
|
||||
|
@ -750,12 +750,14 @@ namespace SummarizeTest
|
|||
{
|
||||
try
|
||||
{
|
||||
// Use Action to set this because IEnumerables with yield can't have an out variable
|
||||
string nonFatalParseError = null;
|
||||
parseDelegate parse;
|
||||
if (traceFileName.EndsWith(".json"))
|
||||
parse = Magnesium.JsonParser.Parse;
|
||||
else
|
||||
parse = Magnesium.XmlParser.Parse;
|
||||
foreach (var ev in parse(traceFile, traceFileName))
|
||||
foreach (var ev in parse(traceFile, traceFileName, nonFatalErrorMessage: (x) => { nonFatalParseError = x; }))
|
||||
{
|
||||
Magnesium.Severity newSeverity;
|
||||
if (severityMap.TryGetValue(new KeyValuePair<string, Magnesium.Severity>(ev.Type, ev.Severity), out newSeverity))
|
||||
|
@ -876,6 +878,11 @@ namespace SummarizeTest
|
|||
if (ev.Type == "StderrSeverity")
|
||||
stderrSeverity = int.Parse(ev.Details.NewSeverity);
|
||||
}
|
||||
if (nonFatalParseError != null) {
|
||||
xout.Add(new XElement("NonFatalParseError",
|
||||
new XAttribute("Severity", (int)Magnesium.Severity.SevWarnAlways),
|
||||
new XAttribute("ErrorMessage", nonFatalParseError)));
|
||||
}
|
||||
|
||||
}
|
||||
catch (Exception e)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/*
|
||||
/*
|
||||
* JsonParser.cs
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
|
@ -34,9 +34,10 @@ namespace Magnesium
|
|||
{
|
||||
static Random r = new Random();
|
||||
|
||||
// dummy parameter nonFatalParseError to match xml
|
||||
public static IEnumerable<Event> Parse(System.IO.Stream stream, string file,
|
||||
bool keepOriginalElement = false, double startTime = -1, double endTime = Double.MaxValue,
|
||||
double samplingFactor = 1.0)
|
||||
double samplingFactor = 1.0, Action<string> nonFatalErrorMessage = null)
|
||||
{
|
||||
using (var reader = new System.IO.StreamReader(stream))
|
||||
{
|
||||
|
|
|
@ -33,14 +33,29 @@ namespace Magnesium
|
|||
|
||||
public static IEnumerable<Event> Parse(System.IO.Stream stream, string file,
|
||||
bool keepOriginalElement = false, double startTime = -1, double endTime = Double.MaxValue,
|
||||
double samplingFactor = 1.0)
|
||||
double samplingFactor = 1.0, Action<string> nonFatalErrorMessage = null)
|
||||
{
|
||||
using (var reader = XmlReader.Create(stream))
|
||||
{
|
||||
reader.ReadToDescendant("Trace");
|
||||
reader.Read();
|
||||
foreach (var xev in StreamElements(reader))
|
||||
|
||||
// foreach (var xev in StreamElements(reader))
|
||||
// need to be able to catch and save non-fatal exceptions in StreamElements, so use explicit iterator instead of foreach
|
||||
var iter = StreamElements(reader).GetEnumerator();
|
||||
while (true)
|
||||
{
|
||||
try {
|
||||
if (!iter.MoveNext()) {
|
||||
break;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
if (nonFatalErrorMessage != null) {
|
||||
nonFatalErrorMessage(e.Message);
|
||||
}
|
||||
break;
|
||||
}
|
||||
var xev = iter.Current;
|
||||
Event ev = null;
|
||||
try
|
||||
{
|
||||
|
@ -165,28 +180,20 @@ namespace Magnesium
|
|||
}
|
||||
}
|
||||
|
||||
// throws exceptions if xml is invalid
|
||||
private static IEnumerable<XElement> StreamElements(this XmlReader reader)
|
||||
{
|
||||
while (!reader.EOF)
|
||||
{
|
||||
if (reader.NodeType == XmlNodeType.Element)
|
||||
{
|
||||
XElement node = null;
|
||||
try
|
||||
{
|
||||
node = XElement.ReadFrom(reader) as XElement;
|
||||
}
|
||||
catch (Exception) { break; }
|
||||
XElement node = XElement.ReadFrom(reader) as XElement;
|
||||
if (node != null)
|
||||
yield return node;
|
||||
}
|
||||
else
|
||||
{
|
||||
try
|
||||
{
|
||||
reader.Read();
|
||||
}
|
||||
catch (Exception) { break; }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -49,7 +49,7 @@ master_doc = 'index'
|
|||
|
||||
# General information about the project.
|
||||
project = u'FoundationDB'
|
||||
copyright = u'2013-2018 Apple, Inc and the FoundationDB project authors'
|
||||
copyright = u'2013-2021 Apple, Inc and the FoundationDB project authors'
|
||||
|
||||
# Load the version information from 'versions.target'
|
||||
import xml.etree.ElementTree as ET
|
||||
|
|
|
@ -971,7 +971,7 @@ For example, you can change a process type or update coordinators by manipulatin
|
|||
|
||||
#. ``\xff\xff/configuration/process/class_type/<address> := <class_type>`` Read/write. Reading keys in the range will retrieve processes' class types. Setting keys in the range will update processes' class types. The process matching ``<address>`` will be assigned to the given class type if the commit is successful. The valid class types are ``storage``, ``transaction``, ``resolution``, etc. A full list of class type can be found via ``fdbcli`` command ``help setclass``. Clearing keys is forbidden in the range. Instead, you can set the type as ``default``, which will clear the assigned class type if existing. For more details, see help text of ``fdbcli`` command ``setclass``.
|
||||
#. ``\xff\xff/configuration/process/class_source/<address> := <class_source>`` Read-only. Reading keys in the range will retrieve processes' class source. The class source is one of ``command_line``, ``configure_auto``, ``set_class`` and ``invalid``, indicating the source that the process's class type comes from.
|
||||
#. ``\xff\xff/configuration/coordinators/processes := <ip:port>,<ip:port>,...,<ip:port>`` Read/write. A single key, if read, will return a comma delimited string of coordinators's network addresses. Thus to provide a new set of cooridinators, set the key with a correct formatted string of new coordinators' network addresses. As there's always the need to have coordinators, clear on the key is forbidden and a transaction will fail with the ``special_keys_api_failure`` error if the clear is committed. For more details, see help text of ``fdbcli`` command ``coordinators``.
|
||||
#. ``\xff\xff/configuration/coordinators/processes := <ip:port>,<ip:port>,...,<ip:port>`` Read/write. A single key, if read, will return a comma delimited string of coordinators' network addresses. Thus to provide a new set of cooridinators, set the key with a correct formatted string of new coordinators' network addresses. As there's always the need to have coordinators, clear on the key is forbidden and a transaction will fail with the ``special_keys_api_failure`` error if the clear is committed. For more details, see help text of ``fdbcli`` command ``coordinators``.
|
||||
#. ``\xff\xff/configuration/coordinators/cluster_description := <new_description>`` Read/write. A single key, if read, will return the cluster description. Thus modifying the key will update the cluster decription. The new description needs to match ``[A-Za-z0-9_]+``, otherwise, the ``special_keys_api_failure`` error will be thrown. In addition, clear on the key is meaningless thus forbidden. For more details, see help text of ``fdbcli`` command ``coordinators``.
|
||||
|
||||
The ``<address>`` here is the network address of the corresponding process. Thus the general form is ``ip:port``.
|
||||
|
|
|
@ -121,6 +121,16 @@
|
|||
"counter":0,
|
||||
"roughness":0.0
|
||||
},
|
||||
"fetched_versions":{
|
||||
"hz":0.0,
|
||||
"counter":0,
|
||||
"roughness":0.0
|
||||
},
|
||||
"fetches_from_logs":{
|
||||
"hz":0.0,
|
||||
"counter":0,
|
||||
"roughness":0.0
|
||||
},
|
||||
"grv_latency_statistics":{ // GRV Latency metrics are grouped according to priority (currently batch or default).
|
||||
"default":{
|
||||
"count":0,
|
||||
|
@ -604,6 +614,10 @@
|
|||
"data_distribution_disabled_for_rebalance":true,
|
||||
"data_distribution_disabled":true,
|
||||
"active_primary_dc":"pv",
|
||||
"bounce_impact":{
|
||||
"can_clean_bounce":true,
|
||||
"reason":""
|
||||
},
|
||||
"configuration":{
|
||||
"log_anti_quorum":0,
|
||||
"log_replicas":2,
|
||||
|
@ -668,6 +682,16 @@
|
|||
"ssd-rocksdb-experimental",
|
||||
"memory"
|
||||
]},
|
||||
"tss_count":1,
|
||||
"tss_storage_engine":{
|
||||
"$enum":[
|
||||
"ssd",
|
||||
"ssd-1",
|
||||
"ssd-2",
|
||||
"ssd-redwood-experimental",
|
||||
"ssd-rocksdb-experimental",
|
||||
"memory"
|
||||
]},
|
||||
"coordinators_count":1,
|
||||
"excluded_servers":[
|
||||
{
|
||||
|
|
|
@ -3,16 +3,29 @@ Release Notes
|
|||
#############
|
||||
|
||||
|
||||
6.3.14
|
||||
======
|
||||
* Fixed fdbbackup start command that automatically configures database with backup workers to only do so when using partitioned logs. `(PR #4863) <https://github.com/apple/foundationdb/pull/4863>`_
|
||||
* Added ``cluster.bounce_impact`` section to status to report if there will be any extra effects when bouncing the cluster, and if so, the reason for those effects. `(PR #4770) <https://github.com/apple/foundationdb/pull/4770>`_
|
||||
* Added ``fetched_versions`` to the storage metrics section of status to report how fast a storage server is catching up in versions. `(PR #4770) <https://github.com/apple/foundationdb/pull/4770>`_
|
||||
* Added ``fetches_from_logs`` to the storage metrics section of status to report how frequently a storage server fetches updates from transaction logs. `(PR #4770) <https://github.com/apple/foundationdb/pull/4770>`_
|
||||
* Added the ``bypass_unreadable`` transaction option which allows ``get`` operations to read from sections of keyspace that have become unreadable because of versionstamp operations. `(PR #4774) <https://github.com/apple/foundationdb/pull/4774>`_
|
||||
* Fix several packaging issues. The osx package should now install successfully, and the structure of the RPM and DEB packages should match that of 6.2. `(PR #4810) <https://github.com/apple/foundationdb/pull/4810>`_
|
||||
* Fix an accounting error that could potentially result in inaccuracies in priority busyness metrics. `(PR #4824) <https://github.com/apple/foundationdb/pull/4824>`_
|
||||
|
||||
6.3.13
|
||||
======
|
||||
* Added ``commit_batching_window_size`` to the proxy roles section of status to record statistics about commit batching window size on each proxy. `(PR #4736) <https://github.com/apple/foundationdb/pull/4736>`_
|
||||
* The multi-version client now requires at most two client connections with version 6.2 or larger, regardless of how many external clients are configured. Clients older than 6.2 will continue to create an additional connection each. `(PR #4667) <https://github.com/apple/foundationdb/pull/4667>`_
|
||||
* Fix an accounting error that could potentially result in inaccuracies in priority busyness metrics. `(PR #4824) <https://github.com/apple/foundationdb/pull/4824>`_
|
||||
|
||||
6.3.12
|
||||
======
|
||||
* Change the default for --knob_tls_server_handshake_threads to 64. The previous was 1000. This avoids starting 1000 threads by default, but may adversely affect recovery time for large clusters using tls. Users with large tls clusters should consider explicitly setting this knob in their foundationdb.conf file. `(PR #4421) <https://github.com/apple/foundationdb/pull/4421>`_
|
||||
* Fix accounting error that could cause commits to incorrectly fail with ``proxy_memory_limit_exceeded``. `(PR #4526) <https://github.com/apple/foundationdb/pull/4526>`_
|
||||
* As an optimization, partial restore using target key ranges now filters backup log data prior to loading it into the database. `(PR #4554) <https://github.com/apple/foundationdb/pull/4554>`_
|
||||
* Fix fault tolerance calculation when there are no tLogs in LogSet. `(PR #4454) <https://github.com/apple/foundationdb/pull/4454>`_
|
||||
* Change client's ``iteration_progression`` size defaults from 256 to 4096 bytes for better performance. `(PR #4416) <https://github.com/apple/foundationdb/pull/4416>`_
|
||||
* Add the ability to instrument java driver actions, such as ``FDBTransaction`` and ``RangeQuery``. `(PR #4385) <https://github.com/apple/foundationdb/pull/4385>`_
|
||||
|
||||
6.3.11
|
||||
======
|
||||
|
|
|
@ -31,7 +31,9 @@ Fixes
|
|||
Status
|
||||
------
|
||||
* Added ``commit_batching_window_size`` to the proxy roles section of status to record statistics about commit batching window size on each proxy. `(PR #4735) <https://github.com/apple/foundationdb/pull/4735>`_
|
||||
|
||||
* Added ``cluster.bounce_impact`` section to status to report if there will be any extra effects when bouncing the cluster, and if so, the reason for those effects. `(PR #4770) <https://github.com/apple/foundationdb/pull/4770>`_
|
||||
* Added ``fetched_versions`` to the storage metrics section of status to report how fast a storage server is catching up in versions. `(PR #4770) <https://github.com/apple/foundationdb/pull/4770>`_
|
||||
* Added ``fetches_from_logs`` to the storage metrics section of status to report how frequently a storage server fetches updates from transaction logs. `(PR #4770) <https://github.com/apple/foundationdb/pull/4770>`_
|
||||
|
||||
Bindings
|
||||
--------
|
||||
|
|
|
@ -496,11 +496,15 @@ void initHelp() {
|
|||
helpMap["configure"] = CommandHelp(
|
||||
"configure [new] "
|
||||
"<single|double|triple|three_data_hall|three_datacenter|ssd|memory|memory-radixtree-beta|proxies=<PROXIES>|"
|
||||
"commit_proxies=<COMMIT_PROXIES>|grv_proxies=<GRV_PROXIES>|logs=<LOGS>|resolvers=<RESOLVERS>>*",
|
||||
"commit_proxies=<COMMIT_PROXIES>|grv_proxies=<GRV_PROXIES>|logs=<LOGS>|resolvers=<RESOLVERS>>*|"
|
||||
"perpetual_storage_wiggle=<WIGGLE_SPEED>",
|
||||
"change the database configuration",
|
||||
"The `new' option, if present, initializes a new database with the given configuration rather than changing "
|
||||
"the configuration of an existing one. When used, both a redundancy mode and a storage engine must be "
|
||||
"specified.\n\nRedundancy mode:\n single - one copy of the data. Not fault tolerant.\n double - two copies "
|
||||
"specified.\n\ntss: when enabled, configures the testing storage server for the cluster instead."
|
||||
"When used with new to set up tss for the first time, it requires both a count and a storage engine."
|
||||
"To disable the testing storage server, run \"configure tss count=0\"\n\n"
|
||||
"Redundancy mode:\n single - one copy of the data. Not fault tolerant.\n double - two copies "
|
||||
"of data (survive one failure).\n triple - three copies of data (survive two failures).\n three_data_hall - "
|
||||
"See the Admin Guide.\n three_datacenter - See the Admin Guide.\n\nStorage engine:\n ssd - B-Tree storage "
|
||||
"engine optimized for solid state disks.\n memory - Durable in-memory storage engine for small "
|
||||
|
@ -517,8 +521,11 @@ void initHelp() {
|
|||
"1, or set to -1 which restores the number of GRV proxies to the default value.\n\nlogs=<LOGS>: Sets the "
|
||||
"desired number of log servers in the cluster. Must be at least 1, or set to -1 which restores the number of "
|
||||
"logs to the default value.\n\nresolvers=<RESOLVERS>: Sets the desired number of resolvers in the cluster. "
|
||||
"Must be at least 1, or set to -1 which restores the number of resolvers to the default value.\n\nSee the "
|
||||
"FoundationDB Administration Guide for more information.");
|
||||
"Must be at least 1, or set to -1 which restores the number of resolvers to the default value.\n\n"
|
||||
"perpetual_storage_wiggle=<WIGGLE_SPEED>: Set the value speed (a.k.a., the number of processes that the Data "
|
||||
"Distributor should wiggle at a time). Currently, only 0 and 1 are supported. The value 0 means to disable the "
|
||||
"perpetual storage wiggle.\n\n"
|
||||
"See the FoundationDB Administration Guide for more information.");
|
||||
helpMap["fileconfigure"] = CommandHelp(
|
||||
"fileconfigure [new] <FILENAME>",
|
||||
"change the database configuration from a file",
|
||||
|
@ -1101,6 +1108,17 @@ void printStatus(StatusObjectReader statusObj,
|
|||
if (statusObjConfig.get("log_routers", intVal))
|
||||
outputString += format("\n Desired Log Routers - %d", intVal);
|
||||
|
||||
if (statusObjConfig.get("tss_count", intVal) && intVal > 0) {
|
||||
int activeTss = 0;
|
||||
if (statusObjCluster.has("active_tss_count")) {
|
||||
statusObjCluster.get("active_tss_count", activeTss);
|
||||
}
|
||||
outputString += format("\n TSS - %d/%d", activeTss, intVal);
|
||||
|
||||
if (statusObjConfig.get("tss_storage_engine", strVal))
|
||||
outputString += format("\n TSS Storage Engine - %s", strVal.c_str());
|
||||
}
|
||||
|
||||
outputString += "\n Usable Regions - ";
|
||||
if (statusObjConfig.get("usable_regions", intVal)) {
|
||||
outputString += std::to_string(intVal);
|
||||
|
@ -2743,6 +2761,7 @@ void configureGenerator(const char* text, const char* line, std::vector<std::str
|
|||
"grv_proxies=",
|
||||
"logs=",
|
||||
"resolvers=",
|
||||
"perpetual_storage_wiggle=",
|
||||
nullptr };
|
||||
arrayGenerator(text, line, opts, lc);
|
||||
}
|
||||
|
|
|
@ -404,8 +404,14 @@ ACTOR Future<Void> readCommitted(Database cx,
|
|||
state RangeResult values = wait(tr.getRange(begin, end, limits));
|
||||
|
||||
// When this buggify line is enabled, if there are more than 1 result then use half of the results
|
||||
// Copy the data instead of messing with the results directly to avoid TSS issues.
|
||||
if (values.size() > 1 && BUGGIFY) {
|
||||
values.resize(values.arena(), values.size() / 2);
|
||||
RangeResult copy;
|
||||
// only copy first half of values into copy
|
||||
for (int i = 0; i < values.size() / 2; i++) {
|
||||
copy.push_back_deep(copy.arena(), values[i]);
|
||||
}
|
||||
values = copy;
|
||||
values.more = true;
|
||||
// Half of the time wait for this tr to expire so that the next read is at a different version
|
||||
if (deterministicRandom()->random01() < 0.5)
|
||||
|
@ -469,9 +475,15 @@ ACTOR Future<Void> readCommitted(Database cx,
|
|||
|
||||
state RangeResult rangevalue = wait(tr.getRange(nextKey, end, limits));
|
||||
|
||||
// When this buggify line is enabled, if there are more than 1 result then use half of the results
|
||||
// When this buggify line is enabled, if there are more than 1 result then use half of the results.
|
||||
// Copy the data instead of messing with the results directly to avoid TSS issues.
|
||||
if (rangevalue.size() > 1 && BUGGIFY) {
|
||||
rangevalue.resize(rangevalue.arena(), rangevalue.size() / 2);
|
||||
RangeResult copy;
|
||||
// only copy first half of rangevalue into copy
|
||||
for (int i = 0; i < rangevalue.size() / 2; i++) {
|
||||
copy.push_back_deep(copy.arena(), rangevalue[i]);
|
||||
}
|
||||
rangevalue = copy;
|
||||
rangevalue.more = true;
|
||||
// Half of the time wait for this tr to expire so that the next read is at a different version
|
||||
if (deterministicRandom()->random01() < 0.5)
|
||||
|
|
|
@ -68,6 +68,7 @@ set(FDBCLIENT_SRCS
|
|||
Status.h
|
||||
StatusClient.actor.cpp
|
||||
StatusClient.h
|
||||
StorageServerInterface.cpp
|
||||
StorageServerInterface.h
|
||||
Subspace.cpp
|
||||
Subspace.h
|
||||
|
|
|
@ -288,9 +288,12 @@ struct GetKeyServerLocationsReply {
|
|||
Arena arena;
|
||||
std::vector<std::pair<KeyRangeRef, vector<StorageServerInterface>>> results;
|
||||
|
||||
// if any storage servers in results have a TSS pair, that mapping is in here
|
||||
std::vector<std::pair<UID, StorageServerInterface>> resultsTssMapping;
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, results, arena);
|
||||
serializer(ar, results, resultsTssMapping, arena);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -33,12 +33,15 @@ const int MAX_CLUSTER_FILE_BYTES = 60000;
|
|||
constexpr UID WLTOKEN_CLIENTLEADERREG_GETLEADER(-1, 2);
|
||||
constexpr UID WLTOKEN_CLIENTLEADERREG_OPENDATABASE(-1, 3);
|
||||
|
||||
// the value of this endpoint should be stable and not change.
|
||||
constexpr UID WLTOKEN_PROTOCOL_INFO(-1, 10);
|
||||
constexpr UID WLTOKEN_CLIENTLEADERREG_DESCRIPTOR_MUTABLE(-1, 11);
|
||||
|
||||
// The coordinator interface as exposed to clients
|
||||
// well known endpoints published to the client.
|
||||
struct ClientLeaderRegInterface {
|
||||
RequestStream<struct GetLeaderRequest> getLeader;
|
||||
RequestStream<struct OpenDatabaseCoordRequest> openDatabase;
|
||||
RequestStream<struct CheckDescriptorMutableRequest> checkDescriptorMutable;
|
||||
|
||||
ClientLeaderRegInterface() {}
|
||||
ClientLeaderRegInterface(NetworkAddress remote);
|
||||
|
@ -236,4 +239,28 @@ struct ProtocolInfoRequest {
|
|||
}
|
||||
};
|
||||
|
||||
// Returns true if the cluster descriptor may be modified.
|
||||
struct CheckDescriptorMutableReply {
|
||||
constexpr static FileIdentifier file_identifier = 7784299;
|
||||
CheckDescriptorMutableReply() = default;
|
||||
explicit CheckDescriptorMutableReply(bool isMutable) : isMutable(isMutable) {}
|
||||
bool isMutable;
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, isMutable);
|
||||
}
|
||||
};
|
||||
|
||||
// Allows client to check if allowed to change the cluster descriptor.
|
||||
struct CheckDescriptorMutableRequest {
|
||||
constexpr static FileIdentifier file_identifier = 214729;
|
||||
ReplyPromise<CheckDescriptorMutableReply> reply;
|
||||
CheckDescriptorMutableRequest() {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, reply);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -31,7 +31,8 @@ void DatabaseConfiguration::resetInternal() {
|
|||
commitProxyCount = grvProxyCount = resolverCount = desiredTLogCount = tLogWriteAntiQuorum = tLogReplicationFactor =
|
||||
storageTeamSize = desiredLogRouterCount = -1;
|
||||
tLogVersion = TLogVersion::DEFAULT;
|
||||
tLogDataStoreType = storageServerStoreType = KeyValueStoreType::END;
|
||||
tLogDataStoreType = storageServerStoreType = testingStorageServerStoreType = KeyValueStoreType::END;
|
||||
desiredTSSCount = 0;
|
||||
tLogSpillType = TLogSpillType::DEFAULT;
|
||||
autoCommitProxyCount = CLIENT_KNOBS->DEFAULT_AUTO_COMMIT_PROXIES;
|
||||
autoGrvProxyCount = CLIENT_KNOBS->DEFAULT_AUTO_GRV_PROXIES;
|
||||
|
@ -43,6 +44,7 @@ void DatabaseConfiguration::resetInternal() {
|
|||
remoteDesiredTLogCount = -1;
|
||||
remoteTLogReplicationFactor = repopulateRegionAntiQuorum = 0;
|
||||
backupWorkerEnabled = false;
|
||||
perpetualStorageWiggleSpeed = 0;
|
||||
}
|
||||
|
||||
void parse(int* i, ValueRef const& v) {
|
||||
|
@ -194,9 +196,9 @@ bool DatabaseConfiguration::isValid() const {
|
|||
getDesiredRemoteLogs() >= 1 && remoteTLogReplicationFactor >= 0 && repopulateRegionAntiQuorum >= 0 &&
|
||||
repopulateRegionAntiQuorum <= 1 && usableRegions >= 1 && usableRegions <= 2 && regions.size() <= 2 &&
|
||||
(usableRegions == 1 || regions.size() == 2) && (regions.size() == 0 || regions[0].priority >= 0) &&
|
||||
(regions.size() == 0 ||
|
||||
tLogPolicy->info() !=
|
||||
"dcid^2 x zoneid^2 x 1"))) { // We cannot specify regions with three_datacenter replication
|
||||
(regions.size() == 0 || tLogPolicy->info() != "dcid^2 x zoneid^2 x 1") &&
|
||||
// We cannot specify regions with three_datacenter replication
|
||||
(perpetualStorageWiggleSpeed == 0 || perpetualStorageWiggleSpeed == 1))) {
|
||||
return false;
|
||||
}
|
||||
std::set<Key> dcIds;
|
||||
|
@ -298,6 +300,25 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const {
|
|||
result["storage_engine"] = "custom";
|
||||
}
|
||||
|
||||
if (desiredTSSCount > 0) {
|
||||
result["tss_count"] = desiredTSSCount;
|
||||
if (testingStorageServerStoreType == KeyValueStoreType::SSD_BTREE_V1) {
|
||||
result["tss_storage_engine"] = "ssd-1";
|
||||
} else if (testingStorageServerStoreType == KeyValueStoreType::SSD_BTREE_V2) {
|
||||
result["tss_storage_engine"] = "ssd-2";
|
||||
} else if (testingStorageServerStoreType == KeyValueStoreType::SSD_REDWOOD_V1) {
|
||||
result["tss_storage_engine"] = "ssd-redwood-experimental";
|
||||
} else if (testingStorageServerStoreType == KeyValueStoreType::SSD_ROCKSDB_V1) {
|
||||
result["tss_storage_engine"] = "ssd-rocksdb-experimental";
|
||||
} else if (testingStorageServerStoreType == KeyValueStoreType::MEMORY_RADIXTREE) {
|
||||
result["tss_storage_engine"] = "memory-radixtree-beta";
|
||||
} else if (testingStorageServerStoreType == KeyValueStoreType::MEMORY) {
|
||||
result["tss_storage_engine"] = "memory-2";
|
||||
} else {
|
||||
result["tss_storage_engine"] = "custom";
|
||||
}
|
||||
}
|
||||
|
||||
result["log_spill"] = (int)tLogSpillType;
|
||||
|
||||
if (remoteTLogReplicationFactor == 1) {
|
||||
|
@ -352,7 +373,7 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const {
|
|||
}
|
||||
|
||||
result["backup_worker_enabled"] = (int32_t)backupWorkerEnabled;
|
||||
|
||||
result["perpetual_storage_wiggle"] = perpetualStorageWiggleSpeed;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -448,6 +469,8 @@ bool DatabaseConfiguration::setInternal(KeyRef key, ValueRef value) {
|
|||
}
|
||||
} else if (ck == LiteralStringRef("storage_replicas")) {
|
||||
parse(&storageTeamSize, value);
|
||||
} else if (ck == LiteralStringRef("tss_count")) {
|
||||
parse(&desiredTSSCount, value);
|
||||
} else if (ck == LiteralStringRef("log_version")) {
|
||||
parse((&type), value);
|
||||
type = std::max((int)TLogVersion::MIN_RECRUITABLE, type);
|
||||
|
@ -470,6 +493,9 @@ bool DatabaseConfiguration::setInternal(KeyRef key, ValueRef value) {
|
|||
} else if (ck == LiteralStringRef("storage_engine")) {
|
||||
parse((&type), value);
|
||||
storageServerStoreType = (KeyValueStoreType::StoreType)type;
|
||||
} else if (ck == LiteralStringRef("tss_storage_engine")) {
|
||||
parse((&type), value);
|
||||
testingStorageServerStoreType = (KeyValueStoreType::StoreType)type;
|
||||
} else if (ck == LiteralStringRef("auto_commit_proxies")) {
|
||||
parse(&autoCommitProxyCount, value);
|
||||
} else if (ck == LiteralStringRef("auto_grv_proxies")) {
|
||||
|
@ -499,6 +525,8 @@ bool DatabaseConfiguration::setInternal(KeyRef key, ValueRef value) {
|
|||
parse(&repopulateRegionAntiQuorum, value);
|
||||
} else if (ck == LiteralStringRef("regions")) {
|
||||
parse(®ions, value);
|
||||
} else if (ck == LiteralStringRef("perpetual_storage_wiggle")) {
|
||||
parse(&perpetualStorageWiggleSpeed, value);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -225,6 +225,10 @@ struct DatabaseConfiguration {
|
|||
int32_t storageTeamSize;
|
||||
KeyValueStoreType storageServerStoreType;
|
||||
|
||||
// Testing StorageServers
|
||||
int32_t desiredTSSCount;
|
||||
KeyValueStoreType testingStorageServerStoreType;
|
||||
|
||||
// Remote TLogs
|
||||
int32_t desiredLogRouterCount;
|
||||
int32_t remoteDesiredTLogCount;
|
||||
|
@ -239,6 +243,9 @@ struct DatabaseConfiguration {
|
|||
int32_t repopulateRegionAntiQuorum;
|
||||
std::vector<RegionInfo> regions;
|
||||
|
||||
// Perpetual Storage Setting
|
||||
int32_t perpetualStorageWiggleSpeed;
|
||||
|
||||
// Excluded servers (no state should be here)
|
||||
bool isExcludedServer(NetworkAddressList) const;
|
||||
std::set<AddressExclusion> getExcludedServers() const;
|
||||
|
|
|
@ -273,6 +273,9 @@ public:
|
|||
Reference<AsyncVar<Reference<ClusterConnectionFile>>> connectionFile;
|
||||
AsyncTrigger proxiesChangeTrigger;
|
||||
Future<Void> monitorProxiesInfoChange;
|
||||
Future<Void> monitorTssInfoChange;
|
||||
Future<Void> tssMismatchHandler;
|
||||
PromiseStream<UID> tssMismatchStream;
|
||||
Reference<CommitProxyInfo> commitProxies;
|
||||
Reference<GrvProxyInfo> grvProxies;
|
||||
bool proxyProvisional; // Provisional commit proxy and grv proxy are used at the same time.
|
||||
|
@ -320,6 +323,11 @@ public:
|
|||
|
||||
std::map<UID, StorageServerInfo*> server_interf;
|
||||
|
||||
// map from ssid -> tss interface
|
||||
std::unordered_map<UID, StorageServerInterface> tssMapping;
|
||||
// map from tssid -> metrics for that tss pair
|
||||
std::unordered_map<UID, Reference<TSSMetrics>> tssMetrics;
|
||||
|
||||
UID dbId;
|
||||
bool internal; // Only contexts created through the C client and fdbcli are non-internal
|
||||
|
||||
|
@ -419,6 +427,14 @@ public:
|
|||
static bool debugUseTags;
|
||||
static const std::vector<std::string> debugTransactionTagChoices;
|
||||
std::unordered_map<KeyRef, Reference<WatchMetadata>> watchMap;
|
||||
|
||||
// Adds or updates the specified (SS, TSS) pair in the TSS mapping (if not already present).
|
||||
// Requests to the storage server will be duplicated to the TSS.
|
||||
void addTssMapping(StorageServerInterface const& ssi, StorageServerInterface const& tssi);
|
||||
|
||||
// Removes the storage server and its TSS pair from the TSS mapping (if present).
|
||||
// Requests to the storage server will no longer be duplicated to its pair TSS.
|
||||
void removeTssMapping(StorageServerInterface const& ssi);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2705,13 +2705,17 @@ struct StartFullBackupTaskFunc : BackupTaskFuncBase {
|
|||
wait(checkTaskVersion(cx, task, StartFullBackupTaskFunc::name, StartFullBackupTaskFunc::version));
|
||||
|
||||
state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
|
||||
state BackupConfig config(task);
|
||||
state Future<Optional<bool>> partitionedLog;
|
||||
loop {
|
||||
try {
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
Version startVersion = wait(tr->getReadVersion());
|
||||
partitionedLog = config.partitionedLogEnabled().get(tr);
|
||||
state Future<Version> startVersionFuture = tr->getReadVersion();
|
||||
wait(success(partitionedLog) && success(startVersionFuture));
|
||||
|
||||
Params.beginVersion().set(task, startVersion);
|
||||
Params.beginVersion().set(task, startVersionFuture.get());
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
wait(tr->onError(e));
|
||||
|
@ -2721,14 +2725,15 @@ struct StartFullBackupTaskFunc : BackupTaskFuncBase {
|
|||
// Check if backup worker is enabled
|
||||
DatabaseConfiguration dbConfig = wait(getDatabaseConfiguration(cx));
|
||||
state bool backupWorkerEnabled = dbConfig.backupWorkerEnabled;
|
||||
if (!backupWorkerEnabled) {
|
||||
if (!backupWorkerEnabled && partitionedLog.get().present() && partitionedLog.get().get()) {
|
||||
// Change configuration only when we set to use partitioned logs and
|
||||
// the flag was not set before.
|
||||
wait(success(changeConfig(cx, "backup_worker_enabled:=1", true)));
|
||||
backupWorkerEnabled = true;
|
||||
}
|
||||
|
||||
// Set the "backupStartedKey" and wait for all backup worker started
|
||||
tr->reset();
|
||||
state BackupConfig config(task);
|
||||
loop {
|
||||
state Future<Void> watchFuture;
|
||||
try {
|
||||
|
@ -2738,7 +2743,7 @@ struct StartFullBackupTaskFunc : BackupTaskFuncBase {
|
|||
|
||||
state Future<Optional<Value>> started = tr->get(backupStartedKey);
|
||||
state Future<Optional<Value>> taskStarted = tr->get(config.allWorkerStarted().key);
|
||||
state Future<Optional<bool>> partitionedLog = config.partitionedLogEnabled().get(tr);
|
||||
partitionedLog = config.partitionedLogEnabled().get(tr);
|
||||
wait(success(started) && success(taskStarted) && success(partitionedLog));
|
||||
|
||||
if (!partitionedLog.get().present() || !partitionedLog.get().get()) {
|
||||
|
|
|
@ -34,16 +34,7 @@ const KeyRef fdbClientInfoTxnSizeLimit = LiteralStringRef("config/fdb_client_inf
|
|||
const KeyRef transactionTagSampleRate = LiteralStringRef("config/transaction_tag_sample_rate");
|
||||
const KeyRef transactionTagSampleCost = LiteralStringRef("config/transaction_tag_sample_cost");
|
||||
|
||||
GlobalConfig::GlobalConfig() : lastUpdate(0) {}
|
||||
|
||||
void GlobalConfig::create(DatabaseContext* cx, Reference<AsyncVar<ClientDBInfo>> dbInfo) {
|
||||
if (g_network->global(INetwork::enGlobalConfig) == nullptr) {
|
||||
auto config = new GlobalConfig{};
|
||||
config->cx = Database(cx);
|
||||
g_network->setGlobal(INetwork::enGlobalConfig, config);
|
||||
config->_updater = updater(config, dbInfo);
|
||||
}
|
||||
}
|
||||
GlobalConfig::GlobalConfig(Database& cx) : cx(cx), lastUpdate(0) {}
|
||||
|
||||
GlobalConfig& GlobalConfig::globalConfig() {
|
||||
void* res = g_network->global(INetwork::enGlobalConfig);
|
||||
|
@ -77,6 +68,14 @@ Future<Void> GlobalConfig::onInitialized() {
|
|||
return initialized.getFuture();
|
||||
}
|
||||
|
||||
Future<Void> GlobalConfig::onChange() {
|
||||
return configChanged.onTrigger();
|
||||
}
|
||||
|
||||
void GlobalConfig::trigger(KeyRef key, std::function<void(std::optional<std::any>)> fn) {
|
||||
callbacks.emplace(key, std::move(fn));
|
||||
}
|
||||
|
||||
void GlobalConfig::insert(KeyRef key, ValueRef value) {
|
||||
data.erase(key);
|
||||
|
||||
|
@ -89,6 +88,8 @@ void GlobalConfig::insert(KeyRef key, ValueRef value) {
|
|||
any = StringRef(arena, t.getString(0).contents());
|
||||
} else if (t.getType(0) == Tuple::ElementType::INT) {
|
||||
any = t.getInt(0);
|
||||
} else if (t.getType(0) == Tuple::ElementType::BOOL) {
|
||||
any = t.getBool(0);
|
||||
} else if (t.getType(0) == Tuple::ElementType::FLOAT) {
|
||||
any = t.getFloat(0);
|
||||
} else if (t.getType(0) == Tuple::ElementType::DOUBLE) {
|
||||
|
@ -97,19 +98,26 @@ void GlobalConfig::insert(KeyRef key, ValueRef value) {
|
|||
ASSERT(false);
|
||||
}
|
||||
data[stableKey] = makeReference<ConfigValue>(std::move(arena), std::move(any));
|
||||
|
||||
if (callbacks.find(stableKey) != callbacks.end()) {
|
||||
callbacks[stableKey](data[stableKey]->value);
|
||||
}
|
||||
} catch (Error& e) {
|
||||
TraceEvent("GlobalConfigTupleParseError").detail("What", e.what());
|
||||
TraceEvent(SevWarn, "GlobalConfigTupleParseError").detail("What", e.what());
|
||||
}
|
||||
}
|
||||
|
||||
void GlobalConfig::erase(KeyRef key) {
|
||||
data.erase(key);
|
||||
void GlobalConfig::erase(Key key) {
|
||||
erase(KeyRangeRef(key, keyAfter(key)));
|
||||
}
|
||||
|
||||
void GlobalConfig::erase(KeyRangeRef range) {
|
||||
auto it = data.begin();
|
||||
while (it != data.end()) {
|
||||
if (range.contains(it->first)) {
|
||||
if (callbacks.find(it->first) != callbacks.end()) {
|
||||
callbacks[it->first](std::nullopt);
|
||||
}
|
||||
it = data.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
|
@ -134,36 +142,39 @@ ACTOR Future<Void> GlobalConfig::migrate(GlobalConfig* self) {
|
|||
state Optional<Value> sampleRate = wait(tr->get(Key("\xff\x02/fdbClientInfo/client_txn_sample_rate/"_sr)));
|
||||
state Optional<Value> sizeLimit = wait(tr->get(Key("\xff\x02/fdbClientInfo/client_txn_size_limit/"_sr)));
|
||||
|
||||
loop {
|
||||
try {
|
||||
tr->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES);
|
||||
// The value doesn't matter too much, as long as the key is set.
|
||||
tr->set(migratedKey.contents(), "1"_sr);
|
||||
if (sampleRate.present()) {
|
||||
const double sampleRateDbl =
|
||||
BinaryReader::fromStringRef<double>(sampleRate.get().contents(), Unversioned());
|
||||
Tuple rate = Tuple().appendDouble(sampleRateDbl);
|
||||
tr->set(GlobalConfig::prefixedKey(fdbClientInfoTxnSampleRate), rate.pack());
|
||||
}
|
||||
if (sizeLimit.present()) {
|
||||
const int64_t sizeLimitInt =
|
||||
BinaryReader::fromStringRef<int64_t>(sizeLimit.get().contents(), Unversioned());
|
||||
Tuple size = Tuple().append(sizeLimitInt);
|
||||
tr->set(GlobalConfig::prefixedKey(fdbClientInfoTxnSizeLimit), size.pack());
|
||||
}
|
||||
|
||||
wait(tr->commit());
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
throw;
|
||||
try {
|
||||
tr->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES);
|
||||
// The value doesn't matter too much, as long as the key is set.
|
||||
tr->set(migratedKey.contents(), "1"_sr);
|
||||
if (sampleRate.present()) {
|
||||
const double sampleRateDbl =
|
||||
BinaryReader::fromStringRef<double>(sampleRate.get().contents(), Unversioned());
|
||||
Tuple rate = Tuple().appendDouble(sampleRateDbl);
|
||||
tr->set(GlobalConfig::prefixedKey(fdbClientInfoTxnSampleRate), rate.pack());
|
||||
}
|
||||
if (sizeLimit.present()) {
|
||||
const int64_t sizeLimitInt =
|
||||
BinaryReader::fromStringRef<int64_t>(sizeLimit.get().contents(), Unversioned());
|
||||
Tuple size = Tuple().append(sizeLimitInt);
|
||||
tr->set(GlobalConfig::prefixedKey(fdbClientInfoTxnSizeLimit), size.pack());
|
||||
}
|
||||
|
||||
wait(tr->commit());
|
||||
} catch (Error& e) {
|
||||
// If multiple fdbserver processes are started at once, they will all
|
||||
// attempt this migration at the same time, sometimes resulting in
|
||||
// aborts due to conflicts. Purposefully avoid retrying, making this
|
||||
// migration best-effort.
|
||||
TraceEvent(SevInfo, "GlobalConfigMigrationError").detail("What", e.what());
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
// Updates local copy of global configuration by reading the entire key-range
|
||||
// from storage.
|
||||
ACTOR Future<Void> GlobalConfig::refresh(GlobalConfig* self) {
|
||||
self->data.clear();
|
||||
self->erase(KeyRangeRef(""_sr, "\xff"_sr));
|
||||
|
||||
Transaction tr(self->cx);
|
||||
RangeResult result = wait(tr.getRange(globalConfigDataKeys, CLIENT_KNOBS->TOO_MANY));
|
||||
|
@ -176,7 +187,8 @@ ACTOR Future<Void> GlobalConfig::refresh(GlobalConfig* self) {
|
|||
|
||||
// Applies updates to the local copy of the global configuration when this
|
||||
// process receives an updated history.
|
||||
ACTOR Future<Void> GlobalConfig::updater(GlobalConfig* self, Reference<AsyncVar<ClientDBInfo>> dbInfo) {
|
||||
ACTOR Future<Void> GlobalConfig::updater(GlobalConfig* self, const ClientDBInfo* dbInfo) {
|
||||
wait(self->cx->onConnected());
|
||||
wait(self->migrate(self));
|
||||
|
||||
wait(self->refresh(self));
|
||||
|
@ -184,9 +196,9 @@ ACTOR Future<Void> GlobalConfig::updater(GlobalConfig* self, Reference<AsyncVar<
|
|||
|
||||
loop {
|
||||
try {
|
||||
wait(dbInfo->onChange());
|
||||
wait(self->dbInfoChanged.onTrigger());
|
||||
|
||||
auto& history = dbInfo->get().history;
|
||||
auto& history = dbInfo->history;
|
||||
if (history.size() == 0) {
|
||||
continue;
|
||||
}
|
||||
|
@ -196,8 +208,8 @@ ACTOR Future<Void> GlobalConfig::updater(GlobalConfig* self, Reference<AsyncVar<
|
|||
// history updates or the protocol version changed, so it
|
||||
// must re-read the entire configuration range.
|
||||
wait(self->refresh(self));
|
||||
if (dbInfo->get().history.size() > 0) {
|
||||
self->lastUpdate = dbInfo->get().history.back().version;
|
||||
if (dbInfo->history.size() > 0) {
|
||||
self->lastUpdate = dbInfo->history.back().version;
|
||||
}
|
||||
} else {
|
||||
// Apply history in order, from lowest version to highest
|
||||
|
@ -222,6 +234,8 @@ ACTOR Future<Void> GlobalConfig::updater(GlobalConfig* self, Reference<AsyncVar<
|
|||
self->lastUpdate = vh.version;
|
||||
}
|
||||
}
|
||||
|
||||
self->configChanged.trigger();
|
||||
} catch (Error& e) {
|
||||
throw;
|
||||
}
|
||||
|
|
|
@ -62,10 +62,28 @@ struct ConfigValue : ReferenceCounted<ConfigValue> {
|
|||
|
||||
class GlobalConfig : NonCopyable {
|
||||
public:
|
||||
// Creates a GlobalConfig singleton, accessed by calling GlobalConfig().
|
||||
// This function should only be called once by each process (however, it is
|
||||
// idempotent and calling it multiple times will have no effect).
|
||||
static void create(DatabaseContext* cx, Reference<AsyncVar<ClientDBInfo>> dbInfo);
|
||||
// Creates a GlobalConfig singleton, accessed by calling
|
||||
// GlobalConfig::globalConfig(). This function requires a database object
|
||||
// to allow global configuration to run transactions on the database, and
|
||||
// an AsyncVar object to watch for changes on. The ClientDBInfo pointer
|
||||
// should point to a ClientDBInfo object which will contain the updated
|
||||
// global configuration history when the given AsyncVar changes. This
|
||||
// function should be called whenever the database object changes, in order
|
||||
// to allow global configuration to run transactions on the latest
|
||||
// database.
|
||||
template <class T>
|
||||
static void create(Database& cx, Reference<AsyncVar<T>> db, const ClientDBInfo* dbInfo) {
|
||||
if (g_network->global(INetwork::enGlobalConfig) == nullptr) {
|
||||
auto config = new GlobalConfig{ cx };
|
||||
g_network->setGlobal(INetwork::enGlobalConfig, config);
|
||||
config->_updater = updater(config, dbInfo);
|
||||
// Bind changes in `db` to the `dbInfoChanged` AsyncTrigger.
|
||||
forward(db, std::addressof(config->dbInfoChanged));
|
||||
} else {
|
||||
GlobalConfig* config = reinterpret_cast<GlobalConfig*>(g_network->global(INetwork::enGlobalConfig));
|
||||
config->cx = cx;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns a reference to the global GlobalConfig object. Clients should
|
||||
// call this function whenever they need to read a value out of the global
|
||||
|
@ -114,8 +132,18 @@ public:
|
|||
// been created and is ready.
|
||||
Future<Void> onInitialized();
|
||||
|
||||
// Triggers the returned future when any key-value pair in the global
|
||||
// configuration changes.
|
||||
Future<Void> onChange();
|
||||
|
||||
// Calls \ref fn when the value associated with \ref key is changed. \ref
|
||||
// fn is passed the updated value for the key, or an empty optional if the
|
||||
// key has been cleared. If the value is an allocated object, its memory
|
||||
// remains in the control of the global configuration.
|
||||
void trigger(KeyRef key, std::function<void(std::optional<std::any>)> fn);
|
||||
|
||||
private:
|
||||
GlobalConfig();
|
||||
GlobalConfig(Database& cx);
|
||||
|
||||
// The functions below only affect the local copy of the global
|
||||
// configuration keyspace! To insert or remove values across all nodes you
|
||||
|
@ -127,20 +155,23 @@ private:
|
|||
void insert(KeyRef key, ValueRef value);
|
||||
// Removes the given key (and associated value) from the local copy of the
|
||||
// global configuration keyspace.
|
||||
void erase(KeyRef key);
|
||||
void erase(Key key);
|
||||
// Removes the given key range (and associated values) from the local copy
|
||||
// of the global configuration keyspace.
|
||||
void erase(KeyRangeRef range);
|
||||
|
||||
ACTOR static Future<Void> migrate(GlobalConfig* self);
|
||||
ACTOR static Future<Void> refresh(GlobalConfig* self);
|
||||
ACTOR static Future<Void> updater(GlobalConfig* self, Reference<AsyncVar<ClientDBInfo>> dbInfo);
|
||||
ACTOR static Future<Void> updater(GlobalConfig* self, const ClientDBInfo* dbInfo);
|
||||
|
||||
Database cx;
|
||||
AsyncTrigger dbInfoChanged;
|
||||
Future<Void> _updater;
|
||||
Promise<Void> initialized;
|
||||
AsyncTrigger configChanged;
|
||||
std::unordered_map<StringRef, Reference<ConfigValue>> data;
|
||||
Version lastUpdate;
|
||||
std::unordered_map<KeyRef, std::function<void(std::optional<std::any>)>> callbacks;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -60,6 +60,13 @@ std::map<std::string, std::string> configForToken(std::string const& mode) {
|
|||
return out;
|
||||
}
|
||||
|
||||
if (mode == "tss") {
|
||||
// Set temporary marker in config map to mark that this is a tss configuration and not a normal storage/log
|
||||
// configuration. A bit of a hack but reuses the parsing code nicely.
|
||||
out[p + "istss"] = "1";
|
||||
return out;
|
||||
}
|
||||
|
||||
if (mode == "locked") {
|
||||
// Setting this key is interpreted as an instruction to use the normal version-stamp-based mechanism for locking
|
||||
// the database.
|
||||
|
@ -119,7 +126,7 @@ std::map<std::string, std::string> configForToken(std::string const& mode) {
|
|||
|
||||
if ((key == "logs" || key == "commit_proxies" || key == "grv_proxies" || key == "resolvers" ||
|
||||
key == "remote_logs" || key == "log_routers" || key == "usable_regions" ||
|
||||
key == "repopulate_anti_quorum") &&
|
||||
key == "repopulate_anti_quorum" || key == "count") &&
|
||||
isInteger(value)) {
|
||||
out[p + key] = value;
|
||||
}
|
||||
|
@ -134,6 +141,14 @@ std::map<std::string, std::string> configForToken(std::string const& mode) {
|
|||
BinaryWriter::toValue(regionObj, IncludeVersion(ProtocolVersion::withRegionConfiguration())).toString();
|
||||
}
|
||||
|
||||
if (key == "perpetual_storage_wiggle" && isInteger(value)) {
|
||||
int ppWiggle = atoi(value.c_str());
|
||||
if (ppWiggle >= 2 || ppWiggle < 0) {
|
||||
printf("Error: Only 0 and 1 are valid values of perpetual_storage_wiggle at present.\n");
|
||||
return out;
|
||||
}
|
||||
out[p + key] = value;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
|
@ -326,6 +341,35 @@ ConfigurationResult buildConfiguration(std::vector<StringRef> const& modeTokens,
|
|||
serializeReplicationPolicy(policyWriter, logPolicy);
|
||||
outConf[p + "log_replication_policy"] = policyWriter.toValue().toString();
|
||||
}
|
||||
if (outConf.count(p + "istss")) {
|
||||
// redo config parameters to be tss config instead of normal config
|
||||
|
||||
// save param values from parsing as a normal config
|
||||
bool isNew = outConf.count(p + "initialized");
|
||||
Optional<std::string> count;
|
||||
Optional<std::string> storageEngine;
|
||||
if (outConf.count(p + "count")) {
|
||||
count = Optional<std::string>(outConf[p + "count"]);
|
||||
}
|
||||
if (outConf.count(p + "storage_engine")) {
|
||||
storageEngine = Optional<std::string>(outConf[p + "storage_engine"]);
|
||||
}
|
||||
|
||||
// A new tss setup must have count + storage engine. An adjustment must have at least one.
|
||||
if ((isNew && (!count.present() || !storageEngine.present())) ||
|
||||
(!isNew && !count.present() && !storageEngine.present())) {
|
||||
return ConfigurationResult::INCOMPLETE_CONFIGURATION;
|
||||
}
|
||||
|
||||
// clear map and only reset tss parameters
|
||||
outConf.clear();
|
||||
if (count.present()) {
|
||||
outConf[p + "tss_count"] = count.get();
|
||||
}
|
||||
if (storageEngine.present()) {
|
||||
outConf[p + "tss_storage_engine"] = storageEngine.get();
|
||||
}
|
||||
}
|
||||
return ConfigurationResult::SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -1105,6 +1149,7 @@ ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
|
|||
|
||||
vector<Future<Optional<LeaderInfo>>> leaderServers;
|
||||
ClientCoordinators coord(Reference<ClusterConnectionFile>(new ClusterConnectionFile(conn)));
|
||||
|
||||
leaderServers.reserve(coord.clientLeaderServers.size());
|
||||
for (int i = 0; i < coord.clientLeaderServers.size(); i++)
|
||||
leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader,
|
||||
|
@ -1188,14 +1233,20 @@ ACTOR Future<CoordinatorsResult> changeQuorum(Database cx, Reference<IQuorumChan
|
|||
TEST(old.clusterKeyName() != conn.clusterKeyName()); // Quorum change with new name
|
||||
TEST(old.clusterKeyName() == conn.clusterKeyName()); // Quorum change with unchanged name
|
||||
|
||||
vector<Future<Optional<LeaderInfo>>> leaderServers;
|
||||
ClientCoordinators coord(Reference<ClusterConnectionFile>(new ClusterConnectionFile(conn)));
|
||||
state vector<Future<Optional<LeaderInfo>>> leaderServers;
|
||||
state ClientCoordinators coord(Reference<ClusterConnectionFile>(new ClusterConnectionFile(conn)));
|
||||
// check if allowed to modify the cluster descriptor
|
||||
if (!change->getDesiredClusterKeyName().empty()) {
|
||||
CheckDescriptorMutableReply mutabilityReply =
|
||||
wait(coord.clientLeaderServers[0].checkDescriptorMutable.getReply(CheckDescriptorMutableRequest()));
|
||||
if (!mutabilityReply.isMutable)
|
||||
return CoordinatorsResult::BAD_DATABASE_STATE;
|
||||
}
|
||||
leaderServers.reserve(coord.clientLeaderServers.size());
|
||||
for (int i = 0; i < coord.clientLeaderServers.size(); i++)
|
||||
leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader,
|
||||
GetLeaderRequest(coord.clusterKey, UID()),
|
||||
TaskPriority::CoordinationReply));
|
||||
|
||||
choose {
|
||||
when(wait(waitForAll(leaderServers))) {}
|
||||
when(wait(delay(5.0))) { return CoordinatorsResult::COORDINATOR_UNREACHABLE; }
|
||||
|
|
|
@ -380,11 +380,14 @@ ClientCoordinators::ClientCoordinators(Key clusterKey, std::vector<NetworkAddres
|
|||
|
||||
ClientLeaderRegInterface::ClientLeaderRegInterface(NetworkAddress remote)
|
||||
: getLeader(Endpoint({ remote }, WLTOKEN_CLIENTLEADERREG_GETLEADER)),
|
||||
openDatabase(Endpoint({ remote }, WLTOKEN_CLIENTLEADERREG_OPENDATABASE)) {}
|
||||
openDatabase(Endpoint({ remote }, WLTOKEN_CLIENTLEADERREG_OPENDATABASE)),
|
||||
checkDescriptorMutable(Endpoint({ remote }, WLTOKEN_CLIENTLEADERREG_DESCRIPTOR_MUTABLE)) {}
|
||||
|
||||
ClientLeaderRegInterface::ClientLeaderRegInterface(INetwork* local) {
|
||||
getLeader.makeWellKnownEndpoint(WLTOKEN_CLIENTLEADERREG_GETLEADER, TaskPriority::Coordination);
|
||||
openDatabase.makeWellKnownEndpoint(WLTOKEN_CLIENTLEADERREG_OPENDATABASE, TaskPriority::Coordination);
|
||||
checkDescriptorMutable.makeWellKnownEndpoint(WLTOKEN_CLIENTLEADERREG_DESCRIPTOR_MUTABLE,
|
||||
TaskPriority::Coordination);
|
||||
}
|
||||
|
||||
// Nominee is the worker among all workers that are considered as leader by a coordinator
|
||||
|
@ -496,7 +499,8 @@ ACTOR Future<MonitorLeaderInfo> monitorLeaderOneGeneration(Reference<ClusterConn
|
|||
if (leader.get().first.forward) {
|
||||
TraceEvent("MonitorLeaderForwarding")
|
||||
.detail("NewConnStr", leader.get().first.serializedInfo.toString())
|
||||
.detail("OldConnStr", info.intermediateConnFile->getConnectionString().toString()).trackLatest("MonitorLeaderForwarding");
|
||||
.detail("OldConnStr", info.intermediateConnFile->getConnectionString().toString())
|
||||
.trackLatest("MonitorLeaderForwarding");
|
||||
info.intermediateConnFile = makeReference<ClusterConnectionFile>(
|
||||
connFile->getFilename(), ClusterConnectionString(leader.get().first.serializedInfo.toString()));
|
||||
return info;
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
#include "fdbclient/DatabaseContext.h"
|
||||
#include "fdbclient/GlobalConfig.actor.h"
|
||||
#include "fdbclient/JsonBuilder.h"
|
||||
#include "fdbclient/KeyBackedTypes.h"
|
||||
#include "fdbclient/KeyRangeMap.h"
|
||||
#include "fdbclient/Knobs.h"
|
||||
#include "fdbclient/ManagementAPI.actor.h"
|
||||
|
@ -121,6 +122,52 @@ NetworkOptions::NetworkOptions()
|
|||
static const Key CLIENT_LATENCY_INFO_PREFIX = LiteralStringRef("client_latency/");
|
||||
static const Key CLIENT_LATENCY_INFO_CTR_PREFIX = LiteralStringRef("client_latency_counter/");
|
||||
|
||||
void DatabaseContext::addTssMapping(StorageServerInterface const& ssi, StorageServerInterface const& tssi) {
|
||||
auto result = tssMapping.find(ssi.id());
|
||||
// Update tss endpoint mapping if ss isn't in mapping, or the interface it mapped to changed
|
||||
if (result == tssMapping.end() ||
|
||||
result->second.getValue.getEndpoint().token.first() != tssi.getValue.getEndpoint().token.first()) {
|
||||
Reference<TSSMetrics> metrics;
|
||||
if (result == tssMapping.end()) {
|
||||
// new TSS pairing
|
||||
metrics = makeReference<TSSMetrics>();
|
||||
tssMetrics[tssi.id()] = metrics;
|
||||
tssMapping[ssi.id()] = tssi;
|
||||
} else {
|
||||
if (result->second.id() == tssi.id()) {
|
||||
metrics = tssMetrics[tssi.id()];
|
||||
} else {
|
||||
TEST(true); // SS now maps to new TSS! This will probably never happen in practice
|
||||
tssMetrics.erase(result->second.id());
|
||||
metrics = makeReference<TSSMetrics>();
|
||||
tssMetrics[tssi.id()] = metrics;
|
||||
}
|
||||
result->second = tssi;
|
||||
}
|
||||
|
||||
queueModel.updateTssEndpoint(ssi.getValue.getEndpoint().token.first(),
|
||||
TSSEndpointData(tssi.id(), tssi.getValue.getEndpoint(), metrics));
|
||||
queueModel.updateTssEndpoint(ssi.getKey.getEndpoint().token.first(),
|
||||
TSSEndpointData(tssi.id(), tssi.getKey.getEndpoint(), metrics));
|
||||
queueModel.updateTssEndpoint(ssi.getKeyValues.getEndpoint().token.first(),
|
||||
TSSEndpointData(tssi.id(), tssi.getKeyValues.getEndpoint(), metrics));
|
||||
queueModel.updateTssEndpoint(ssi.watchValue.getEndpoint().token.first(),
|
||||
TSSEndpointData(tssi.id(), tssi.watchValue.getEndpoint(), metrics));
|
||||
}
|
||||
}
|
||||
|
||||
void DatabaseContext::removeTssMapping(StorageServerInterface const& ssi) {
|
||||
auto result = tssMapping.find(ssi.id());
|
||||
if (result != tssMapping.end()) {
|
||||
tssMetrics.erase(ssi.id());
|
||||
tssMapping.erase(result);
|
||||
queueModel.removeTssEndpoint(ssi.getValue.getEndpoint().token.first());
|
||||
queueModel.removeTssEndpoint(ssi.getKey.getEndpoint().token.first());
|
||||
queueModel.removeTssEndpoint(ssi.getKeyValues.getEndpoint().token.first());
|
||||
queueModel.removeTssEndpoint(ssi.watchValue.getEndpoint().token.first());
|
||||
}
|
||||
}
|
||||
|
||||
Reference<StorageServerInfo> StorageServerInfo::getInterface(DatabaseContext* cx,
|
||||
StorageServerInterface const& ssi,
|
||||
LocalityData const& locality) {
|
||||
|
@ -133,6 +180,7 @@ Reference<StorageServerInfo> StorageServerInfo::getInterface(DatabaseContext* cx
|
|||
// pointing to. This is technically correct, but is very unnatural. We may want to refactor load
|
||||
// balance to take an AsyncVar<Reference<Interface>> so that it is notified when the interface
|
||||
// changes.
|
||||
|
||||
it->second->interf = ssi;
|
||||
} else {
|
||||
it->second->notifyContextDestroyed();
|
||||
|
@ -285,6 +333,13 @@ void delref(DatabaseContext* ptr) {
|
|||
ptr->delref();
|
||||
}
|
||||
|
||||
void traceTSSErrors(const char* name, UID tssId, const std::unordered_map<int, uint64_t>& errorsByCode) {
|
||||
TraceEvent ev(name, tssId);
|
||||
for (auto& it : errorsByCode) {
|
||||
ev.detail("E" + std::to_string(it.first), it.second);
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> databaseLogger(DatabaseContext* cx) {
|
||||
state double lastLogged = 0;
|
||||
loop {
|
||||
|
@ -327,6 +382,62 @@ ACTOR Future<Void> databaseLogger(DatabaseContext* cx) {
|
|||
cx->mutationsPerCommit.clear();
|
||||
cx->bytesPerCommit.clear();
|
||||
|
||||
for (const auto& it : cx->tssMetrics) {
|
||||
// TODO could skip this tss if request counter is zero? would potentially complicate elapsed calculation
|
||||
// though
|
||||
if (it.second->mismatches.getIntervalDelta()) {
|
||||
cx->tssMismatchStream.send(it.first);
|
||||
}
|
||||
|
||||
// do error histograms as separate event
|
||||
if (it.second->ssErrorsByCode.size()) {
|
||||
traceTSSErrors("TSS_SSErrors", it.first, it.second->ssErrorsByCode);
|
||||
}
|
||||
|
||||
if (it.second->tssErrorsByCode.size()) {
|
||||
traceTSSErrors("TSS_TSSErrors", it.first, it.second->tssErrorsByCode);
|
||||
}
|
||||
|
||||
TraceEvent tssEv("TSSClientMetrics", cx->dbId);
|
||||
tssEv.detail("TSSID", it.first)
|
||||
.detail("Elapsed", (lastLogged == 0) ? 0 : now() - lastLogged)
|
||||
.detail("Internal", cx->internal);
|
||||
|
||||
it.second->cc.logToTraceEvent(tssEv);
|
||||
|
||||
tssEv.detail("MeanSSGetValueLatency", it.second->SSgetValueLatency.mean())
|
||||
.detail("MedianSSGetValueLatency", it.second->SSgetValueLatency.median())
|
||||
.detail("SSGetValueLatency90", it.second->SSgetValueLatency.percentile(0.90))
|
||||
.detail("SSGetValueLatency99", it.second->SSgetValueLatency.percentile(0.99));
|
||||
|
||||
tssEv.detail("MeanTSSGetValueLatency", it.second->TSSgetValueLatency.mean())
|
||||
.detail("MedianTSSGetValueLatency", it.second->TSSgetValueLatency.median())
|
||||
.detail("TSSGetValueLatency90", it.second->TSSgetValueLatency.percentile(0.90))
|
||||
.detail("TSSGetValueLatency99", it.second->TSSgetValueLatency.percentile(0.99));
|
||||
|
||||
tssEv.detail("MeanSSGetKeyLatency", it.second->SSgetKeyLatency.mean())
|
||||
.detail("MedianSSGetKeyLatency", it.second->SSgetKeyLatency.median())
|
||||
.detail("SSGetKeyLatency90", it.second->SSgetKeyLatency.percentile(0.90))
|
||||
.detail("SSGetKeyLatency99", it.second->SSgetKeyLatency.percentile(0.99));
|
||||
|
||||
tssEv.detail("MeanTSSGetKeyLatency", it.second->TSSgetKeyLatency.mean())
|
||||
.detail("MedianTSSGetKeyLatency", it.second->TSSgetKeyLatency.median())
|
||||
.detail("TSSGetKeyLatency90", it.second->TSSgetKeyLatency.percentile(0.90))
|
||||
.detail("TSSGetKeyLatency99", it.second->TSSgetKeyLatency.percentile(0.99));
|
||||
|
||||
tssEv.detail("MeanSSGetKeyValuesLatency", it.second->SSgetKeyLatency.mean())
|
||||
.detail("MedianSSGetKeyValuesLatency", it.second->SSgetKeyLatency.median())
|
||||
.detail("SSGetKeyValuesLatency90", it.second->SSgetKeyLatency.percentile(0.90))
|
||||
.detail("SSGetKeyValuesLatency99", it.second->SSgetKeyLatency.percentile(0.99));
|
||||
|
||||
tssEv.detail("MeanTSSGetKeyValuesLatency", it.second->TSSgetKeyValuesLatency.mean())
|
||||
.detail("MedianTSSGetKeyValuesLatency", it.second->TSSgetKeyValuesLatency.median())
|
||||
.detail("TSSGetKeyValuesLatency90", it.second->TSSgetKeyValuesLatency.percentile(0.90))
|
||||
.detail("TSSGetKeyValuesLatency99", it.second->TSSgetKeyValuesLatency.percentile(0.99));
|
||||
|
||||
it.second->clear();
|
||||
}
|
||||
|
||||
lastLogged = now();
|
||||
}
|
||||
}
|
||||
|
@ -711,6 +822,59 @@ ACTOR Future<Void> monitorCacheList(DatabaseContext* self) {
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> handleTssMismatches(DatabaseContext* cx) {
|
||||
state Reference<ReadYourWritesTransaction> tr;
|
||||
state KeyBackedMap<UID, UID> tssMapDB = KeyBackedMap<UID, UID>(tssMappingKeys.begin);
|
||||
loop {
|
||||
state UID tssID = waitNext(cx->tssMismatchStream.getFuture());
|
||||
// find ss pair id so we can remove it from the mapping
|
||||
state UID tssPairID;
|
||||
bool found = false;
|
||||
for (const auto& it : cx->tssMapping) {
|
||||
if (it.second.id() == tssID) {
|
||||
tssPairID = it.first;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (found) {
|
||||
TraceEvent(SevWarnAlways, "TSS_KillMismatch").detail("TSSID", tssID.toString());
|
||||
TEST(true); // killing TSS because it got mismatch
|
||||
|
||||
// TODO we could write something to the system keyspace and then have DD listen to that keyspace and then DD
|
||||
// do exactly this, so why not just cut out the middle man (or the middle system keys, as it were)
|
||||
tr = makeReference<ReadYourWritesTransaction>(Database(Reference<DatabaseContext>::addRef(cx)));
|
||||
state int tries = 0;
|
||||
loop {
|
||||
try {
|
||||
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
|
||||
tr->clear(serverTagKeyFor(tssID));
|
||||
tssMapDB.erase(tr, tssPairID);
|
||||
|
||||
wait(tr->commit());
|
||||
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
tries++;
|
||||
if (tries > 10) {
|
||||
// Give up on trying to kill the tss, it'll get another mismatch or a human will investigate
|
||||
// eventually
|
||||
TraceEvent("TSS_KillMismatchGaveUp").detail("TSSID", tssID.toString());
|
||||
break;
|
||||
}
|
||||
}
|
||||
// clear out txn so that the extra DatabaseContext ref gets decref'd and we can free cx
|
||||
tr = makeReference<ReadYourWritesTransaction>();
|
||||
} else {
|
||||
TEST(true); // Not killing TSS with mismatch because it's already gone
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR static Future<HealthMetrics> getHealthMetricsActor(DatabaseContext* cx, bool detailed) {
|
||||
if (now() - cx->healthMetricsLastUpdated < CLIENT_KNOBS->AGGREGATE_HEALTH_METRICS_MAX_STALENESS) {
|
||||
if (detailed) {
|
||||
|
@ -957,9 +1121,8 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionF
|
|||
getValueSubmitted.init(LiteralStringRef("NativeAPI.GetValueSubmitted"));
|
||||
getValueCompleted.init(LiteralStringRef("NativeAPI.GetValueCompleted"));
|
||||
|
||||
GlobalConfig::create(this, clientInfo);
|
||||
|
||||
monitorProxiesInfoChange = monitorProxiesChange(clientInfo, &proxiesChangeTrigger);
|
||||
tssMismatchHandler = handleTssMismatches(this);
|
||||
clientStatusUpdater.actor = clientStatusUpdateActor(this);
|
||||
cacheListMonitor = monitorCacheList(this);
|
||||
|
||||
|
@ -1051,14 +1214,16 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionF
|
|||
SpecialKeySpace::IMPLTYPE::READWRITE,
|
||||
std::make_unique<ClientProfilingImpl>(
|
||||
KeyRangeRef(LiteralStringRef("profiling/"), LiteralStringRef("profiling0"))
|
||||
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)));
|
||||
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)));
|
||||
registerSpecialKeySpaceModule(
|
||||
SpecialKeySpace::MODULE::MANAGEMENT, SpecialKeySpace::IMPLTYPE::READWRITE,
|
||||
SpecialKeySpace::MODULE::MANAGEMENT,
|
||||
SpecialKeySpace::IMPLTYPE::READWRITE,
|
||||
std::make_unique<MaintenanceImpl>(
|
||||
KeyRangeRef(LiteralStringRef("maintenance/"), LiteralStringRef("maintenance0"))
|
||||
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)));
|
||||
registerSpecialKeySpaceModule(
|
||||
SpecialKeySpace::MODULE::MANAGEMENT, SpecialKeySpace::IMPLTYPE::READWRITE,
|
||||
SpecialKeySpace::MODULE::MANAGEMENT,
|
||||
SpecialKeySpace::IMPLTYPE::READWRITE,
|
||||
std::make_unique<DataDistributionImpl>(
|
||||
KeyRangeRef(LiteralStringRef("data_distribution/"), LiteralStringRef("data_distribution0"))
|
||||
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)));
|
||||
|
@ -1199,6 +1364,8 @@ Database DatabaseContext::create(Reference<AsyncVar<ClientDBInfo>> clientInfo,
|
|||
DatabaseContext::~DatabaseContext() {
|
||||
cacheListMonitor.cancel();
|
||||
monitorProxiesInfoChange.cancel();
|
||||
monitorTssInfoChange.cancel();
|
||||
tssMismatchHandler.cancel();
|
||||
for (auto it = server_interf.begin(); it != server_interf.end(); it = server_interf.erase(it))
|
||||
it->second->notifyContextDestroyed();
|
||||
ASSERT_ABORT(server_interf.empty());
|
||||
|
@ -1553,7 +1720,9 @@ Database Database::createDatabase(Reference<ClusterConnectionFile> connFile,
|
|||
/*switchable*/ true);
|
||||
}
|
||||
|
||||
return Database(db);
|
||||
auto database = Database(db);
|
||||
GlobalConfig::create(database, clientInfo, std::addressof(clientInfo->get()));
|
||||
return database;
|
||||
}
|
||||
|
||||
Database Database::createDatabase(std::string connFileName,
|
||||
|
@ -2015,6 +2184,29 @@ ACTOR Future<Optional<vector<StorageServerInterface>>> transactionalGetServerInt
|
|||
return serverInterfaces;
|
||||
}
|
||||
|
||||
void updateTssMappings(Database cx, const GetKeyServerLocationsReply& reply) {
|
||||
// Since a ss -> tss mapping is included in resultsTssMapping iff that SS is in results and has a tss pair,
|
||||
// all SS in results that do not have a mapping present must not have a tss pair.
|
||||
std::unordered_map<UID, const StorageServerInterface*> ssiById;
|
||||
for (const auto& [_, shard] : reply.results) {
|
||||
for (auto& ssi : shard) {
|
||||
ssiById[ssi.id()] = &ssi;
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& mapping : reply.resultsTssMapping) {
|
||||
auto ssi = ssiById.find(mapping.first);
|
||||
ASSERT(ssi != ssiById.end());
|
||||
cx->addTssMapping(*ssi->second, mapping.second);
|
||||
ssiById.erase(mapping.first);
|
||||
}
|
||||
|
||||
// if SS didn't have a mapping above, it's still in the ssiById map, so remove its tss mapping
|
||||
for (const auto& it : ssiById) {
|
||||
cx->removeTssMapping(*it.second);
|
||||
}
|
||||
}
|
||||
|
||||
// If isBackward == true, returns the shard containing the key before 'key' (an infinitely long, inexpressible key).
|
||||
// Otherwise returns the shard containing key
|
||||
ACTOR Future<pair<KeyRange, Reference<LocationInfo>>> getKeyLocation_internal(Database cx,
|
||||
|
@ -2047,6 +2239,7 @@ ACTOR Future<pair<KeyRange, Reference<LocationInfo>>> getKeyLocation_internal(Da
|
|||
ASSERT(rep.results.size() == 1);
|
||||
|
||||
auto locationInfo = cx->setCachedLocation(rep.results[0].first, rep.results[0].second);
|
||||
updateTssMappings(cx, rep);
|
||||
return std::make_pair(KeyRange(rep.results[0].first, rep.arena), locationInfo);
|
||||
}
|
||||
}
|
||||
|
@ -2110,6 +2303,7 @@ ACTOR Future<vector<pair<KeyRange, Reference<LocationInfo>>>> getKeyRangeLocatio
|
|||
cx->setCachedLocation(rep.results[shard].first, rep.results[shard].second));
|
||||
wait(yield());
|
||||
}
|
||||
updateTssMappings(cx, rep);
|
||||
|
||||
return results;
|
||||
}
|
||||
|
@ -2235,7 +2429,7 @@ ACTOR Future<Optional<Value>> getValue(Future<Version> version,
|
|||
|
||||
state GetValueReply reply;
|
||||
try {
|
||||
if (CLIENT_BUGGIFY) {
|
||||
if (CLIENT_BUGGIFY_WITH_PROB(.01)) {
|
||||
throw deterministicRandom()->randomChoice(
|
||||
std::vector<Error>{ transaction_too_old(), future_version() });
|
||||
}
|
||||
|
@ -2345,6 +2539,11 @@ ACTOR Future<Key> getKey(Database cx, KeySelector k, Future<Version> version, Tr
|
|||
"NativeAPI.getKey.Before"); //.detail("StartKey",
|
||||
// k.getKey()).detail("Offset",k.offset).detail("OrEqual",k.orEqual);
|
||||
++cx->transactionPhysicalReads;
|
||||
|
||||
GetKeyRequest req(
|
||||
span.context, k, version.get(), cx->sampleReadTags() ? tags : Optional<TagSet>(), getKeyID);
|
||||
req.arena.dependsOn(k.arena());
|
||||
|
||||
state GetKeyReply reply;
|
||||
try {
|
||||
choose {
|
||||
|
@ -2353,11 +2552,7 @@ ACTOR Future<Key> getKey(Database cx, KeySelector k, Future<Version> version, Tr
|
|||
wait(loadBalance(cx.getPtr(),
|
||||
ssi.second,
|
||||
&StorageServerInterface::getKey,
|
||||
GetKeyRequest(span.context,
|
||||
k,
|
||||
version.get(),
|
||||
cx->sampleReadTags() ? tags : Optional<TagSet>(),
|
||||
getKeyID),
|
||||
req,
|
||||
TaskPriority::DefaultPromiseEndpoint,
|
||||
false,
|
||||
cx->enableLocalityLoadBalance ? &cx->queueModel : nullptr))) {
|
||||
|
@ -2718,6 +2913,9 @@ ACTOR Future<RangeResult> getExactRange(Database cx,
|
|||
req.end = firstGreaterOrEqual(range.end);
|
||||
req.spanContext = span.context;
|
||||
|
||||
// keep shard's arena around in case of async tss comparison
|
||||
req.arena.dependsOn(locations[shard].first.arena());
|
||||
|
||||
transformRangeLimits(limits, reverse, req);
|
||||
ASSERT(req.limitBytes > 0 && req.limit != 0 && req.limit < 0 == reverse);
|
||||
|
||||
|
@ -3034,6 +3232,9 @@ ACTOR Future<RangeResult> getRange(Database cx,
|
|||
req.isFetchKeys = (info.taskID == TaskPriority::FetchKeys);
|
||||
req.version = readVersion;
|
||||
|
||||
// In case of async tss comparison, also make req arena depend on begin, end, and/or shard's arena depending
|
||||
// on which is used
|
||||
bool dependOnShard = false;
|
||||
if (reverse && (begin - 1).isDefinitelyLess(shard.begin) &&
|
||||
(!begin.isFirstGreaterOrEqual() ||
|
||||
begin.getKey() != shard.begin)) { // In this case we would be setting modifiedSelectors to true, but
|
||||
|
@ -3041,14 +3242,23 @@ ACTOR Future<RangeResult> getRange(Database cx,
|
|||
|
||||
req.begin = firstGreaterOrEqual(shard.begin);
|
||||
modifiedSelectors = true;
|
||||
} else
|
||||
req.arena.dependsOn(shard.arena());
|
||||
dependOnShard = true;
|
||||
} else {
|
||||
req.begin = begin;
|
||||
req.arena.dependsOn(begin.arena());
|
||||
}
|
||||
|
||||
if (!reverse && end.isDefinitelyGreater(shard.end)) {
|
||||
req.end = firstGreaterOrEqual(shard.end);
|
||||
modifiedSelectors = true;
|
||||
} else
|
||||
if (!dependOnShard) {
|
||||
req.arena.dependsOn(shard.arena());
|
||||
}
|
||||
} else {
|
||||
req.end = end;
|
||||
req.arena.dependsOn(end.arena());
|
||||
}
|
||||
|
||||
transformRangeLimits(limits, reverse, req);
|
||||
ASSERT(req.limitBytes > 0 && req.limit != 0 && req.limit < 0 == reverse);
|
||||
|
@ -3078,7 +3288,7 @@ ACTOR Future<RangeResult> getRange(Database cx,
|
|||
++cx->transactionPhysicalReads;
|
||||
state GetKeyValuesReply rep;
|
||||
try {
|
||||
if (CLIENT_BUGGIFY) {
|
||||
if (CLIENT_BUGGIFY_WITH_PROB(.01)) {
|
||||
throw deterministicRandom()->randomChoice(
|
||||
std::vector<Error>{ transaction_too_old(), future_version() });
|
||||
}
|
||||
|
@ -3133,10 +3343,17 @@ ACTOR Future<RangeResult> getRange(Database cx,
|
|||
output.readThroughEnd = readThroughEnd;
|
||||
|
||||
if (BUGGIFY && limits.hasByteLimit() && output.size() > std::max(1, originalLimits.minRows)) {
|
||||
// Copy instead of resizing because TSS maybe be using output's arena for comparison. This only
|
||||
// happens in simulation so it's fine
|
||||
RangeResult copy;
|
||||
int newSize =
|
||||
deterministicRandom()->randomInt(std::max(1, originalLimits.minRows), output.size());
|
||||
for (int i = 0; i < newSize; i++) {
|
||||
copy.push_back_deep(copy.arena(), output[i]);
|
||||
}
|
||||
output = copy;
|
||||
output.more = true;
|
||||
output.resize(
|
||||
output.arena(),
|
||||
deterministicRandom()->randomInt(std::max(1, originalLimits.minRows), output.size()));
|
||||
|
||||
getRangeFinished(cx,
|
||||
trLogInfo,
|
||||
startTime,
|
||||
|
|
|
@ -144,6 +144,16 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
|
|||
"counter":0,
|
||||
"roughness":0.0
|
||||
},
|
||||
"fetched_versions":{
|
||||
"hz":0.0,
|
||||
"counter":0,
|
||||
"roughness":0.0
|
||||
},
|
||||
"fetches_from_logs":{
|
||||
"hz":0.0,
|
||||
"counter":0,
|
||||
"roughness":0.0
|
||||
},
|
||||
"grv_latency_statistics":{
|
||||
"default":{
|
||||
"count":0,
|
||||
|
@ -421,6 +431,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
|
|||
"seconds" : 1.0,
|
||||
"versions" : 1000000
|
||||
},
|
||||
"active_tss_count":0,
|
||||
"degraded_processes":0,
|
||||
"database_available":true,
|
||||
"database_lock_state": {
|
||||
|
@ -648,6 +659,10 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
|
|||
"data_distribution_disabled_for_rebalance":true,
|
||||
"data_distribution_disabled":true,
|
||||
"active_primary_dc":"pv",
|
||||
"bounce_impact":{
|
||||
"can_clean_bounce":true,
|
||||
"reason":""
|
||||
},
|
||||
"configuration":{
|
||||
"log_anti_quorum":0,
|
||||
"log_replicas":2,
|
||||
|
@ -715,6 +730,19 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
|
|||
"memory-2",
|
||||
"memory-radixtree-beta"
|
||||
]},
|
||||
"tss_count":1,
|
||||
"tss_storage_engine":{
|
||||
"$enum":[
|
||||
"ssd",
|
||||
"ssd-1",
|
||||
"ssd-2",
|
||||
"ssd-redwood-experimental",
|
||||
"ssd-rocksdb-experimental",
|
||||
"memory",
|
||||
"memory-1",
|
||||
"memory-2",
|
||||
"memory-radixtree-beta"
|
||||
]},
|
||||
"coordinators_count":1,
|
||||
"excluded_servers":[
|
||||
{
|
||||
|
@ -727,7 +755,8 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
|
|||
"auto_logs":3,
|
||||
"commit_proxies":5,
|
||||
"grv_proxies":1,
|
||||
"backup_worker_enabled":1
|
||||
"backup_worker_enabled":1,
|
||||
"perpetual_storage_wiggle":0
|
||||
},
|
||||
"data":{
|
||||
"least_operating_space_bytes_log_server":0,
|
||||
|
@ -787,7 +816,8 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
|
|||
}
|
||||
}
|
||||
],
|
||||
"least_operating_space_bytes_storage_server":0
|
||||
"least_operating_space_bytes_storage_server":0,
|
||||
"max_machine_failures_without_losing_data":0
|
||||
},
|
||||
"machines":{
|
||||
"$map":{
|
||||
|
|
|
@ -1384,6 +1384,9 @@ Future<RangeResult> GlobalConfigImpl::getRange(ReadYourWritesTransaction* ryw, K
|
|||
} else if (config->value.type() == typeid(int64_t)) {
|
||||
result.push_back_deep(result.arena(),
|
||||
KeyValueRef(prefixedKey, std::to_string(std::any_cast<int64_t>(config->value))));
|
||||
} else if (config->value.type() == typeid(bool)) {
|
||||
result.push_back_deep(result.arena(),
|
||||
KeyValueRef(prefixedKey, std::to_string(std::any_cast<bool>(config->value))));
|
||||
} else if (config->value.type() == typeid(float)) {
|
||||
result.push_back_deep(result.arena(),
|
||||
KeyValueRef(prefixedKey, std::to_string(std::any_cast<float>(config->value))));
|
||||
|
@ -2058,9 +2061,20 @@ Future<Optional<std::string>> DataDistributionImpl::commit(ReadYourWritesTransac
|
|||
try {
|
||||
int mode = boost::lexical_cast<int>(iter->value().second.get().toString());
|
||||
Value modeVal = BinaryWriter::toValue(mode, Unversioned());
|
||||
if (mode == 0 || mode == 1)
|
||||
if (mode == 0 || mode == 1) {
|
||||
// Whenever configuration changes or DD related system keyspace is changed,
|
||||
// actor must grab the moveKeysLockOwnerKey and update moveKeysLockWriteKey.
|
||||
// This prevents concurrent write to the same system keyspace.
|
||||
// When the owner of the DD related system keyspace changes, DD will reboot
|
||||
BinaryWriter wrMyOwner(Unversioned());
|
||||
wrMyOwner << dataDistributionModeLock;
|
||||
ryw->getTransaction().set(moveKeysLockOwnerKey, wrMyOwner.toValue());
|
||||
BinaryWriter wrLastWrite(Unversioned());
|
||||
wrLastWrite << deterministicRandom()->randomUniqueID();
|
||||
ryw->getTransaction().set(moveKeysLockWriteKey, wrLastWrite.toValue());
|
||||
// set mode
|
||||
ryw->getTransaction().set(dataDistributionModeKey, modeVal);
|
||||
else
|
||||
} else
|
||||
msg = ManagementAPIError::toJsonString(false,
|
||||
"datadistribution",
|
||||
"Please set the value of the data_distribution/mode to "
|
||||
|
|
|
@ -0,0 +1,385 @@
|
|||
/*
|
||||
* StorageServerInterface.cpp
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "fdbclient/StorageServerInterface.h"
|
||||
#include "flow/crc32c.h" // for crc32c_append, to checksum values in tss trace events
|
||||
|
||||
// Includes template specializations for all tss operations on storage server types.
|
||||
// New StorageServerInterface reply types must be added here or it won't compile.
|
||||
|
||||
// if size + hex of checksum is shorter than value, record that instead of actual value. break-even point is 12
|
||||
// characters
|
||||
std::string traceChecksumValue(ValueRef s) {
|
||||
return s.size() > 12 ? format("(%d)%08x", s.size(), crc32c_append(0, s.begin(), s.size())) : s.toString();
|
||||
}
|
||||
|
||||
template <>
|
||||
bool TSS_doCompare(const GetValueRequest& req,
|
||||
const GetValueReply& src,
|
||||
const GetValueReply& tss,
|
||||
Severity traceSeverity,
|
||||
UID tssId) {
|
||||
if (src.value.present() != tss.value.present() || (src.value.present() && src.value.get() != tss.value.get())) {
|
||||
TraceEvent(traceSeverity, "TSSMismatchGetValue")
|
||||
.suppressFor(1.0)
|
||||
.detail("TSSID", tssId)
|
||||
.detail("Key", req.key.printable())
|
||||
.detail("Version", req.version)
|
||||
.detail("SSReply", src.value.present() ? traceChecksumValue(src.value.get()) : "missing")
|
||||
.detail("TSSReply", tss.value.present() ? traceChecksumValue(tss.value.get()) : "missing");
|
||||
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool TSS_doCompare(const GetKeyRequest& req,
|
||||
const GetKeyReply& src,
|
||||
const GetKeyReply& tss,
|
||||
Severity traceSeverity,
|
||||
UID tssId) {
|
||||
// This process is a bit complicated. Since the tss and ss can return different results if neighboring shards to
|
||||
// req.sel.key are currently being moved, We validate that the results are the same IF the returned key selectors
|
||||
// are final. Otherwise, we only mark the request as a mismatch if the difference between the two returned key
|
||||
// selectors could ONLY be because of different results from the storage engines. We can afford to only partially
|
||||
// check key selectors that start in a TSS shard and end in a non-TSS shard because the other read queries and the
|
||||
// consistency check will eventually catch a misbehaving storage engine.
|
||||
bool matches = true;
|
||||
if (src.sel.orEqual == tss.sel.orEqual && src.sel.offset == tss.sel.offset) {
|
||||
// full matching case
|
||||
if (src.sel.offset == 0 && src.sel.orEqual) {
|
||||
// found exact key, should be identical
|
||||
matches = src.sel.getKey() == tss.sel.getKey();
|
||||
}
|
||||
// if the query doesn't return the final key, there is an edge case where the ss and tss have different shard
|
||||
// boundaries, so they pass different shard boundary keys back for the same offset
|
||||
} else if (src.sel.getKey() == tss.sel.getKey()) {
|
||||
// There is one case with a positive offset where the shard boundary the incomplete query stopped at is the next
|
||||
// key in the shard that the complete query returned. This is not possible with a negative offset because the
|
||||
// shard boundary is exclusive backwards
|
||||
if (src.sel.offset == 0 && src.sel.orEqual && tss.sel.offset == 1 && !tss.sel.orEqual) {
|
||||
// case where ss was complete and tss was incomplete
|
||||
} else if (tss.sel.offset == 0 && tss.sel.orEqual && src.sel.offset == 1 && !src.sel.orEqual) {
|
||||
// case where tss was complete and ss was incomplete
|
||||
} else {
|
||||
matches = false;
|
||||
}
|
||||
} else {
|
||||
// ss/tss returned different keys, and different offsets and/or orEqual
|
||||
// here we just validate that ordering of the keys matches the ordering of the offsets
|
||||
bool tssKeyLarger = src.sel.getKey() < tss.sel.getKey();
|
||||
// the only case offsets are equal and orEqual aren't equal is the case with a negative offset,
|
||||
// where one response has <=0 with the actual result and the other has <0 with the shard upper boundary.
|
||||
// So whichever one has the actual result should have the lower key.
|
||||
bool tssOffsetLarger = (src.sel.offset == tss.sel.offset) ? tss.sel.orEqual : src.sel.offset < tss.sel.offset;
|
||||
matches = tssKeyLarger != tssOffsetLarger;
|
||||
}
|
||||
if (!matches) {
|
||||
TraceEvent(traceSeverity, "TSSMismatchGetKey")
|
||||
.suppressFor(1.0)
|
||||
.detail("TSSID", tssId)
|
||||
.detail("KeySelector",
|
||||
format("%s%s:%d", req.sel.orEqual ? "=" : "", req.sel.getKey().printable().c_str(), req.sel.offset))
|
||||
.detail("Version", req.version)
|
||||
.detail("SSReply",
|
||||
format("%s%s:%d", src.sel.orEqual ? "=" : "", src.sel.getKey().printable().c_str(), src.sel.offset))
|
||||
.detail(
|
||||
"TSSReply",
|
||||
format("%s%s:%d", tss.sel.orEqual ? "=" : "", tss.sel.getKey().printable().c_str(), tss.sel.offset));
|
||||
}
|
||||
return matches;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool TSS_doCompare(const GetKeyValuesRequest& req,
|
||||
const GetKeyValuesReply& src,
|
||||
const GetKeyValuesReply& tss,
|
||||
Severity traceSeverity,
|
||||
UID tssId) {
|
||||
if (src.more != tss.more || src.data != tss.data) {
|
||||
|
||||
std::string ssResultsString = format("(%d)%s:\n", src.data.size(), src.more ? "+" : "");
|
||||
for (auto& it : src.data) {
|
||||
ssResultsString += "\n" + it.key.printable() + "=" + traceChecksumValue(it.value);
|
||||
}
|
||||
|
||||
std::string tssResultsString = format("(%d)%s:\n", tss.data.size(), tss.more ? "+" : "");
|
||||
for (auto& it : tss.data) {
|
||||
tssResultsString += "\n" + it.key.printable() + "=" + traceChecksumValue(it.value);
|
||||
}
|
||||
|
||||
TraceEvent(traceSeverity, "TSSMismatchGetKeyValues")
|
||||
.suppressFor(1.0)
|
||||
.detail("TSSID", tssId)
|
||||
.detail(
|
||||
"Begin",
|
||||
format(
|
||||
"%s%s:%d", req.begin.orEqual ? "=" : "", req.begin.getKey().printable().c_str(), req.begin.offset))
|
||||
.detail("End",
|
||||
format("%s%s:%d", req.end.orEqual ? "=" : "", req.end.getKey().printable().c_str(), req.end.offset))
|
||||
.detail("Version", req.version)
|
||||
.detail("Limit", req.limit)
|
||||
.detail("LimitBytes", req.limitBytes)
|
||||
.detail("SSReply", ssResultsString)
|
||||
.detail("TSSReply", tssResultsString);
|
||||
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool TSS_doCompare(const WatchValueRequest& req,
|
||||
const WatchValueReply& src,
|
||||
const WatchValueReply& tss,
|
||||
Severity traceSeverity,
|
||||
UID tssId) {
|
||||
// We duplicate watches just for load, no need to validte replies.
|
||||
return true;
|
||||
}
|
||||
|
||||
// no-op template specializations for metrics replies
|
||||
template <>
|
||||
bool TSS_doCompare(const WaitMetricsRequest& req,
|
||||
const StorageMetrics& src,
|
||||
const StorageMetrics& tss,
|
||||
Severity traceSeverity,
|
||||
UID tssId) {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool TSS_doCompare(const SplitMetricsRequest& req,
|
||||
const SplitMetricsReply& src,
|
||||
const SplitMetricsReply& tss,
|
||||
Severity traceSeverity,
|
||||
UID tssId) {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool TSS_doCompare(const ReadHotSubRangeRequest& req,
|
||||
const ReadHotSubRangeReply& src,
|
||||
const ReadHotSubRangeReply& tss,
|
||||
Severity traceSeverity,
|
||||
UID tssId) {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool TSS_doCompare(const SplitRangeRequest& req,
|
||||
const SplitRangeReply& src,
|
||||
const SplitRangeReply& tss,
|
||||
Severity traceSeverity,
|
||||
UID tssId) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// only record metrics for data reads
|
||||
|
||||
template <>
|
||||
void TSSMetrics::recordLatency(const GetValueRequest& req, double ssLatency, double tssLatency) {
|
||||
SSgetValueLatency.addSample(ssLatency);
|
||||
TSSgetValueLatency.addSample(tssLatency);
|
||||
}
|
||||
|
||||
template <>
|
||||
void TSSMetrics::recordLatency(const GetKeyRequest& req, double ssLatency, double tssLatency) {
|
||||
SSgetKeyLatency.addSample(ssLatency);
|
||||
TSSgetKeyLatency.addSample(tssLatency);
|
||||
}
|
||||
|
||||
template <>
|
||||
void TSSMetrics::recordLatency(const GetKeyValuesRequest& req, double ssLatency, double tssLatency) {
|
||||
SSgetKeyValuesLatency.addSample(ssLatency);
|
||||
TSSgetKeyValuesLatency.addSample(tssLatency);
|
||||
}
|
||||
|
||||
template <>
|
||||
void TSSMetrics::recordLatency(const WatchValueRequest& req, double ssLatency, double tssLatency) {}
|
||||
|
||||
template <>
|
||||
void TSSMetrics::recordLatency(const WaitMetricsRequest& req, double ssLatency, double tssLatency) {}
|
||||
|
||||
template <>
|
||||
void TSSMetrics::recordLatency(const SplitMetricsRequest& req, double ssLatency, double tssLatency) {}
|
||||
|
||||
template <>
|
||||
void TSSMetrics::recordLatency(const ReadHotSubRangeRequest& req, double ssLatency, double tssLatency) {}
|
||||
|
||||
template <>
|
||||
void TSSMetrics::recordLatency(const SplitRangeRequest& req, double ssLatency, double tssLatency) {}
|
||||
|
||||
// -------------------
|
||||
|
||||
TEST_CASE("/StorageServerInterface/TSSCompare/TestComparison") {
|
||||
printf("testing tss comparisons\n");
|
||||
|
||||
// to avoid compiler issues that StringRef(char* is deprecated)
|
||||
std::string s_a = "a";
|
||||
std::string s_b = "b";
|
||||
std::string s_c = "c";
|
||||
std::string s_d = "d";
|
||||
std::string s_e = "e";
|
||||
|
||||
// test getValue
|
||||
GetValueRequest gvReq;
|
||||
gvReq.key = StringRef(s_a);
|
||||
gvReq.version = 5;
|
||||
|
||||
UID tssId;
|
||||
|
||||
GetValueReply gvReplyMissing;
|
||||
GetValueReply gvReplyA(Optional<Value>(StringRef(s_a)), false);
|
||||
GetValueReply gvReplyB(Optional<Value>(StringRef(s_b)), false);
|
||||
ASSERT(TSS_doCompare(gvReq, gvReplyMissing, gvReplyMissing, SevInfo, tssId));
|
||||
ASSERT(TSS_doCompare(gvReq, gvReplyA, gvReplyA, SevInfo, tssId));
|
||||
ASSERT(TSS_doCompare(gvReq, gvReplyB, gvReplyB, SevInfo, tssId));
|
||||
|
||||
ASSERT(!TSS_doCompare(gvReq, gvReplyMissing, gvReplyA, SevInfo, tssId));
|
||||
ASSERT(!TSS_doCompare(gvReq, gvReplyA, gvReplyB, SevInfo, tssId));
|
||||
|
||||
// test GetKeyValues
|
||||
Arena a; // for all of the refs. ASAN complains if this isn't done. Could also make them all standalone i guess
|
||||
GetKeyValuesRequest gkvReq;
|
||||
gkvReq.begin = firstGreaterOrEqual(StringRef(a, s_a));
|
||||
gkvReq.end = firstGreaterOrEqual(StringRef(a, s_b));
|
||||
gkvReq.version = 5;
|
||||
gkvReq.limit = 100;
|
||||
gkvReq.limitBytes = 1000;
|
||||
|
||||
GetKeyValuesReply gkvReplyEmpty;
|
||||
GetKeyValuesReply gkvReplyOne;
|
||||
KeyValueRef v;
|
||||
v.key = StringRef(a, s_a);
|
||||
v.value = StringRef(a, s_b);
|
||||
gkvReplyOne.data.push_back_deep(gkvReplyOne.arena, v);
|
||||
GetKeyValuesReply gkvReplyOneMore;
|
||||
gkvReplyOneMore.data.push_back_deep(gkvReplyOneMore.arena, v);
|
||||
gkvReplyOneMore.more = true;
|
||||
|
||||
ASSERT(TSS_doCompare(gkvReq, gkvReplyEmpty, gkvReplyEmpty, SevInfo, tssId));
|
||||
ASSERT(TSS_doCompare(gkvReq, gkvReplyOne, gkvReplyOne, SevInfo, tssId));
|
||||
ASSERT(TSS_doCompare(gkvReq, gkvReplyOneMore, gkvReplyOneMore, SevInfo, tssId));
|
||||
ASSERT(!TSS_doCompare(gkvReq, gkvReplyEmpty, gkvReplyOne, SevInfo, tssId));
|
||||
ASSERT(!TSS_doCompare(gkvReq, gkvReplyOne, gkvReplyOneMore, SevInfo, tssId));
|
||||
|
||||
// test GetKey
|
||||
GetKeyRequest gkReq;
|
||||
gkReq.sel = KeySelectorRef(StringRef(a, s_a), false, 1);
|
||||
gkReq.version = 5;
|
||||
|
||||
GetKeyReply gkReplyA(KeySelectorRef(StringRef(a, s_a), false, 20), false);
|
||||
GetKeyReply gkReplyB(KeySelectorRef(StringRef(a, s_b), false, 10), false);
|
||||
GetKeyReply gkReplyC(KeySelectorRef(StringRef(a, s_c), true, 0), false);
|
||||
GetKeyReply gkReplyD(KeySelectorRef(StringRef(a, s_d), false, -10), false);
|
||||
GetKeyReply gkReplyE(KeySelectorRef(StringRef(a, s_e), false, -20), false);
|
||||
|
||||
// identical cases
|
||||
ASSERT(TSS_doCompare(gkReq, gkReplyA, gkReplyA, SevInfo, tssId));
|
||||
ASSERT(TSS_doCompare(gkReq, gkReplyB, gkReplyB, SevInfo, tssId));
|
||||
ASSERT(TSS_doCompare(gkReq, gkReplyC, gkReplyC, SevInfo, tssId));
|
||||
ASSERT(TSS_doCompare(gkReq, gkReplyD, gkReplyD, SevInfo, tssId));
|
||||
ASSERT(TSS_doCompare(gkReq, gkReplyE, gkReplyE, SevInfo, tssId));
|
||||
|
||||
// relative offset cases
|
||||
ASSERT(TSS_doCompare(gkReq, gkReplyA, gkReplyB, SevInfo, tssId));
|
||||
ASSERT(TSS_doCompare(gkReq, gkReplyB, gkReplyA, SevInfo, tssId));
|
||||
ASSERT(TSS_doCompare(gkReq, gkReplyA, gkReplyC, SevInfo, tssId));
|
||||
ASSERT(TSS_doCompare(gkReq, gkReplyC, gkReplyA, SevInfo, tssId));
|
||||
ASSERT(TSS_doCompare(gkReq, gkReplyB, gkReplyC, SevInfo, tssId));
|
||||
ASSERT(TSS_doCompare(gkReq, gkReplyC, gkReplyB, SevInfo, tssId));
|
||||
|
||||
ASSERT(TSS_doCompare(gkReq, gkReplyC, gkReplyD, SevInfo, tssId));
|
||||
ASSERT(TSS_doCompare(gkReq, gkReplyD, gkReplyC, SevInfo, tssId));
|
||||
ASSERT(TSS_doCompare(gkReq, gkReplyC, gkReplyE, SevInfo, tssId));
|
||||
ASSERT(TSS_doCompare(gkReq, gkReplyE, gkReplyC, SevInfo, tssId));
|
||||
ASSERT(TSS_doCompare(gkReq, gkReplyD, gkReplyE, SevInfo, tssId));
|
||||
ASSERT(TSS_doCompare(gkReq, gkReplyE, gkReplyD, SevInfo, tssId));
|
||||
|
||||
// test same offset/orEqual wrong key
|
||||
ASSERT(!TSS_doCompare(gkReq,
|
||||
GetKeyReply(KeySelectorRef(StringRef(a, s_a), true, 0), false),
|
||||
GetKeyReply(KeySelectorRef(StringRef(a, s_b), true, 0), false),
|
||||
SevInfo,
|
||||
tssId));
|
||||
// this could be from different shard boundaries, so don't say it's a mismatch
|
||||
ASSERT(TSS_doCompare(gkReq,
|
||||
GetKeyReply(KeySelectorRef(StringRef(a, s_a), false, 10), false),
|
||||
GetKeyReply(KeySelectorRef(StringRef(a, s_b), false, 10), false),
|
||||
SevInfo,
|
||||
tssId));
|
||||
|
||||
// test offsets and key difference don't match
|
||||
ASSERT(!TSS_doCompare(gkReq,
|
||||
GetKeyReply(KeySelectorRef(StringRef(a, s_a), false, 0), false),
|
||||
GetKeyReply(KeySelectorRef(StringRef(a, s_b), false, 10), false),
|
||||
SevInfo,
|
||||
tssId));
|
||||
ASSERT(!TSS_doCompare(gkReq,
|
||||
GetKeyReply(KeySelectorRef(StringRef(a, s_a), false, -10), false),
|
||||
GetKeyReply(KeySelectorRef(StringRef(a, s_b), false, 0), false),
|
||||
SevInfo,
|
||||
tssId));
|
||||
|
||||
// test key is next over in one shard, one found it and other didn't
|
||||
// positive
|
||||
// one that didn't find is +1
|
||||
ASSERT(TSS_doCompare(gkReq,
|
||||
GetKeyReply(KeySelectorRef(StringRef(a, s_a), false, 1), false),
|
||||
GetKeyReply(KeySelectorRef(StringRef(a, s_b), true, 0), false),
|
||||
SevInfo,
|
||||
tssId));
|
||||
ASSERT(!TSS_doCompare(gkReq,
|
||||
GetKeyReply(KeySelectorRef(StringRef(a, s_a), true, 0), false),
|
||||
GetKeyReply(KeySelectorRef(StringRef(a, s_b), false, 1), false),
|
||||
SevInfo,
|
||||
tssId));
|
||||
|
||||
// negative will have zero offset but not equal set
|
||||
ASSERT(TSS_doCompare(gkReq,
|
||||
GetKeyReply(KeySelectorRef(StringRef(a, s_a), true, 0), false),
|
||||
GetKeyReply(KeySelectorRef(StringRef(a, s_b), false, 0), false),
|
||||
SevInfo,
|
||||
tssId));
|
||||
ASSERT(!TSS_doCompare(gkReq,
|
||||
GetKeyReply(KeySelectorRef(StringRef(a, s_a), false, 0), false),
|
||||
GetKeyReply(KeySelectorRef(StringRef(a, s_b), true, 0), false),
|
||||
SevInfo,
|
||||
tssId));
|
||||
|
||||
// test shard boundary key returned by incomplete query is the same as the key found by the other (only possible in
|
||||
// positive direction)
|
||||
ASSERT(TSS_doCompare(gkReq,
|
||||
GetKeyReply(KeySelectorRef(StringRef(a, s_a), true, 0), false),
|
||||
GetKeyReply(KeySelectorRef(StringRef(a, s_a), false, 1), false),
|
||||
SevInfo,
|
||||
tssId));
|
||||
|
||||
// explictly test checksum function
|
||||
std::string s12 = "ABCDEFGHIJKL";
|
||||
std::string s13 = "ABCDEFGHIJKLO";
|
||||
std::string checksumStart13 = "(13)";
|
||||
ASSERT(s_a == traceChecksumValue(StringRef(s_a)));
|
||||
ASSERT(s12 == traceChecksumValue(StringRef(s12)));
|
||||
ASSERT(checksumStart13 == traceChecksumValue(StringRef(s13)).substr(0, 4));
|
||||
return Void();
|
||||
}
|
|
@ -29,7 +29,9 @@
|
|||
#include "fdbrpc/LoadBalance.actor.h"
|
||||
#include "fdbrpc/Stats.h"
|
||||
#include "fdbrpc/TimedRequest.h"
|
||||
#include "fdbrpc/TSSComparison.h"
|
||||
#include "fdbclient/TagThrottle.h"
|
||||
#include "flow/UnitTest.h"
|
||||
|
||||
// Dead code, removed in the next protocol version
|
||||
struct VersionReply {
|
||||
|
@ -54,6 +56,7 @@ struct StorageServerInterface {
|
|||
|
||||
LocalityData locality;
|
||||
UID uniqueID;
|
||||
Optional<UID> tssPairID;
|
||||
|
||||
RequestStream<struct GetValueRequest> getValue;
|
||||
RequestStream<struct GetKeyRequest> getKey;
|
||||
|
@ -80,6 +83,7 @@ struct StorageServerInterface {
|
|||
NetworkAddress stableAddress() const { return getValue.getEndpoint().getStableAddress(); }
|
||||
Optional<NetworkAddress> secondaryAddress() const { return getValue.getEndpoint().addresses.secondaryAddress; }
|
||||
UID id() const { return uniqueID; }
|
||||
bool isTss() const { return tssPairID.present(); }
|
||||
std::string toString() const { return id().shortString(); }
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
|
@ -88,7 +92,11 @@ struct StorageServerInterface {
|
|||
// considered
|
||||
|
||||
if (ar.protocolVersion().hasSmallEndpoints()) {
|
||||
serializer(ar, uniqueID, locality, getValue);
|
||||
if (ar.protocolVersion().hasTSS()) {
|
||||
serializer(ar, uniqueID, locality, getValue, tssPairID);
|
||||
} else {
|
||||
serializer(ar, uniqueID, locality, getValue);
|
||||
}
|
||||
if (Ar::isDeserializing) {
|
||||
getKey = RequestStream<struct GetKeyRequest>(getValue.getEndpoint().getAdjustedEndpoint(1));
|
||||
getKeyValues = RequestStream<struct GetKeyValuesRequest>(getValue.getEndpoint().getAdjustedEndpoint(2));
|
||||
|
@ -127,8 +135,9 @@ struct StorageServerInterface {
|
|||
waitFailure,
|
||||
getQueuingMetrics,
|
||||
getKeyValueStoreType);
|
||||
if (ar.protocolVersion().hasWatches())
|
||||
if (ar.protocolVersion().hasWatches()) {
|
||||
serializer(ar, watchValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
bool operator==(StorageServerInterface const& s) const { return uniqueID == s.uniqueID; }
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include "flow/Arena.h"
|
||||
#include "flow/TDMetric.actor.h"
|
||||
#include "flow/serialize.h"
|
||||
#include "flow/UnitTest.h"
|
||||
|
||||
const KeyRef systemKeysPrefix = LiteralStringRef("\xff");
|
||||
const KeyRangeRef normalKeys(KeyRef(), systemKeysPrefix);
|
||||
|
@ -345,7 +346,10 @@ uint16_t cacheChangeKeyDecodeIndex(const KeyRef& key) {
|
|||
return idx;
|
||||
}
|
||||
|
||||
const KeyRangeRef tssMappingKeys(LiteralStringRef("\xff/tss/"), LiteralStringRef("\xff/tss0"));
|
||||
|
||||
const KeyRangeRef serverTagKeys(LiteralStringRef("\xff/serverTag/"), LiteralStringRef("\xff/serverTag0"));
|
||||
|
||||
const KeyRef serverTagPrefix = serverTagKeys.begin;
|
||||
const KeyRangeRef serverTagConflictKeys(LiteralStringRef("\xff/serverTagConflict/"),
|
||||
LiteralStringRef("\xff/serverTagConflict0"));
|
||||
|
@ -532,6 +536,7 @@ const Key serverListKeyFor(UID serverID) {
|
|||
return wr.toValue();
|
||||
}
|
||||
|
||||
// TODO use flatbuffers depending on version
|
||||
const Value serverListValue(StorageServerInterface const& server) {
|
||||
BinaryWriter wr(IncludeVersion(ProtocolVersion::withServerListValue()));
|
||||
wr << server;
|
||||
|
@ -550,6 +555,17 @@ StorageServerInterface decodeServerListValue(ValueRef const& value) {
|
|||
return s;
|
||||
}
|
||||
|
||||
const Value serverListValueFB(StorageServerInterface const& server) {
|
||||
return ObjectWriter::toValue(server, IncludeVersion());
|
||||
}
|
||||
|
||||
StorageServerInterface decodeServerListValueFB(ValueRef const& value) {
|
||||
StorageServerInterface s;
|
||||
ObjectReader reader(value.begin(), IncludeVersion());
|
||||
reader.deserialize(s);
|
||||
return s;
|
||||
}
|
||||
|
||||
// processClassKeys.contains(k) iff k.startsWith( processClassKeys.begin ) because '/'+1 == '0'
|
||||
const KeyRangeRef processClassKeys(LiteralStringRef("\xff/processClass/"), LiteralStringRef("\xff/processClass0"));
|
||||
const KeyRef processClassPrefix = processClassKeys.begin;
|
||||
|
@ -594,6 +610,9 @@ ProcessClass decodeProcessClassValue(ValueRef const& value) {
|
|||
const KeyRangeRef configKeys(LiteralStringRef("\xff/conf/"), LiteralStringRef("\xff/conf0"));
|
||||
const KeyRef configKeysPrefix = configKeys.begin;
|
||||
|
||||
const KeyRef perpetualStorageWiggleKey(LiteralStringRef("\xff/conf/perpetual_storage_wiggle"));
|
||||
const KeyRef wigglingStorageServerKey(LiteralStringRef("\xff/storageWigglePID"));
|
||||
|
||||
const KeyRef triggerDDTeamInfoPrintKey(LiteralStringRef("\xff/triggerDDTeamInfoPrint"));
|
||||
|
||||
const KeyRangeRef excludedServersKeys(LiteralStringRef("\xff/conf/excluded/"), LiteralStringRef("\xff/conf/excluded0"));
|
||||
|
@ -633,15 +652,17 @@ std::string encodeFailedServersKey(AddressExclusion const& addr) {
|
|||
// const KeyRangeRef globalConfigKeys( LiteralStringRef("\xff/globalConfig/"), LiteralStringRef("\xff/globalConfig0") );
|
||||
// const KeyRef globalConfigPrefix = globalConfigKeys.begin;
|
||||
|
||||
const KeyRangeRef globalConfigDataKeys( LiteralStringRef("\xff/globalConfig/k/"), LiteralStringRef("\xff/globalConfig/k0") );
|
||||
const KeyRangeRef globalConfigDataKeys(LiteralStringRef("\xff/globalConfig/k/"),
|
||||
LiteralStringRef("\xff/globalConfig/k0"));
|
||||
const KeyRef globalConfigKeysPrefix = globalConfigDataKeys.begin;
|
||||
|
||||
const KeyRangeRef globalConfigHistoryKeys( LiteralStringRef("\xff/globalConfig/h/"), LiteralStringRef("\xff/globalConfig/h0") );
|
||||
const KeyRangeRef globalConfigHistoryKeys(LiteralStringRef("\xff/globalConfig/h/"),
|
||||
LiteralStringRef("\xff/globalConfig/h0"));
|
||||
const KeyRef globalConfigHistoryPrefix = globalConfigHistoryKeys.begin;
|
||||
|
||||
const KeyRef globalConfigVersionKey = LiteralStringRef("\xff/globalConfig/v");
|
||||
|
||||
const KeyRangeRef workerListKeys( LiteralStringRef("\xff/worker/"), LiteralStringRef("\xff/worker0") );
|
||||
const KeyRangeRef workerListKeys(LiteralStringRef("\xff/worker/"), LiteralStringRef("\xff/worker0"));
|
||||
const KeyRef workerListPrefix = workerListKeys.begin;
|
||||
|
||||
const Key workerListKeyFor(StringRef processID) {
|
||||
|
@ -1082,3 +1103,60 @@ const KeyRangeRef testOnlyTxnStateStorePrefixRange(LiteralStringRef("\xff/TESTON
|
|||
const KeyRef writeRecoveryKey = LiteralStringRef("\xff/writeRecovery");
|
||||
const ValueRef writeRecoveryKeyTrue = LiteralStringRef("1");
|
||||
const KeyRef snapshotEndVersionKey = LiteralStringRef("\xff/snapshotEndVersion");
|
||||
|
||||
// for tests
|
||||
void testSSISerdes(StorageServerInterface const& ssi, bool useFB) {
|
||||
printf("ssi=\nid=%s\nlocality=%s\nisTss=%s\ntssId=%s\naddress=%s\ngetValue=%s\n\n\n",
|
||||
ssi.id().toString().c_str(),
|
||||
ssi.locality.toString().c_str(),
|
||||
ssi.isTss() ? "true" : "false",
|
||||
ssi.isTss() ? ssi.tssPairID.get().toString().c_str() : "",
|
||||
ssi.address().toString().c_str(),
|
||||
ssi.getValue.getEndpoint().token.toString().c_str());
|
||||
|
||||
StorageServerInterface ssi2 =
|
||||
(useFB) ? decodeServerListValueFB(serverListValueFB(ssi)) : decodeServerListValue(serverListValue(ssi));
|
||||
|
||||
printf("ssi2=\nid=%s\nlocality=%s\nisTss=%s\ntssId=%s\naddress=%s\ngetValue=%s\n\n\n",
|
||||
ssi2.id().toString().c_str(),
|
||||
ssi2.locality.toString().c_str(),
|
||||
ssi2.isTss() ? "true" : "false",
|
||||
ssi2.isTss() ? ssi2.tssPairID.get().toString().c_str() : "",
|
||||
ssi2.address().toString().c_str(),
|
||||
ssi2.getValue.getEndpoint().token.toString().c_str());
|
||||
|
||||
ASSERT(ssi.id() == ssi2.id());
|
||||
ASSERT(ssi.locality == ssi2.locality);
|
||||
ASSERT(ssi.isTss() == ssi2.isTss());
|
||||
if (ssi.isTss()) {
|
||||
ASSERT(ssi2.tssPairID.get() == ssi2.tssPairID.get());
|
||||
}
|
||||
ASSERT(ssi.address() == ssi2.address());
|
||||
ASSERT(ssi.getValue.getEndpoint().token == ssi2.getValue.getEndpoint().token);
|
||||
}
|
||||
|
||||
// unit test for serialization since tss stuff had bugs
|
||||
TEST_CASE("/SystemData/SerDes/SSI") {
|
||||
printf("testing ssi serdes\n");
|
||||
LocalityData localityData(Optional<Standalone<StringRef>>(),
|
||||
Standalone<StringRef>(deterministicRandom()->randomUniqueID().toString()),
|
||||
Standalone<StringRef>(deterministicRandom()->randomUniqueID().toString()),
|
||||
Optional<Standalone<StringRef>>());
|
||||
|
||||
// non-tss
|
||||
StorageServerInterface ssi;
|
||||
ssi.uniqueID = UID(0x1234123412341234, 0x5678567856785678);
|
||||
ssi.locality = localityData;
|
||||
ssi.initEndpoints();
|
||||
|
||||
testSSISerdes(ssi, false);
|
||||
testSSISerdes(ssi, true);
|
||||
|
||||
ssi.tssPairID = UID(0x2345234523452345, 0x1238123812381238);
|
||||
|
||||
testSSISerdes(ssi, false);
|
||||
testSSISerdes(ssi, true);
|
||||
printf("ssi serdes test complete\n");
|
||||
|
||||
return Void();
|
||||
}
|
|
@ -115,6 +115,9 @@ extern const KeyRef cacheChangePrefix;
|
|||
const Key cacheChangeKeyFor(uint16_t idx);
|
||||
uint16_t cacheChangeKeyDecodeIndex(const KeyRef& key);
|
||||
|
||||
// "\xff/tss/[[serverId]]" := "[[tssId]]"
|
||||
extern const KeyRangeRef tssMappingKeys;
|
||||
|
||||
// "\xff/serverTag/[[serverID]]" = "[[Tag]]"
|
||||
// Provides the Tag for the given serverID. Used to access a
|
||||
// storage server's corresponding TLog in order to apply mutations.
|
||||
|
@ -196,6 +199,8 @@ UID decodeProcessClassKeyOld(KeyRef const& key);
|
|||
extern const KeyRangeRef configKeys;
|
||||
extern const KeyRef configKeysPrefix;
|
||||
|
||||
extern const KeyRef perpetualStorageWiggleKey;
|
||||
extern const KeyRef wigglingStorageServerKey;
|
||||
// Change the value of this key to anything and that will trigger detailed data distribution team info log.
|
||||
extern const KeyRef triggerDDTeamInfoPrintKey;
|
||||
|
||||
|
|
|
@ -71,6 +71,8 @@ Tuple::Tuple(StringRef const& str, bool exclude_incomplete) {
|
|||
i += sizeof(float) + 1;
|
||||
} else if (data[i] == 0x21) {
|
||||
i += sizeof(double) + 1;
|
||||
} else if (data[i] == 0x26 || data[i] == 0x27) {
|
||||
i += 1;
|
||||
} else if (data[i] == '\x00') {
|
||||
i += 1;
|
||||
} else {
|
||||
|
@ -144,6 +146,16 @@ Tuple& Tuple::append(int64_t value) {
|
|||
return *this;
|
||||
}
|
||||
|
||||
Tuple& Tuple::appendBool(bool value) {
|
||||
offsets.push_back(data.size());
|
||||
if (value) {
|
||||
data.push_back(data.arena(), 0x27);
|
||||
} else {
|
||||
data.push_back(data.arena(), 0x26);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
Tuple& Tuple::appendFloat(float value) {
|
||||
offsets.push_back(data.size());
|
||||
float swap = bigEndianFloat(value);
|
||||
|
@ -192,6 +204,8 @@ Tuple::ElementType Tuple::getType(size_t index) const {
|
|||
return ElementType::FLOAT;
|
||||
} else if (code == 0x21) {
|
||||
return ElementType::DOUBLE;
|
||||
} else if (code == 0x26 || code == 0x27) {
|
||||
return ElementType::BOOL;
|
||||
} else {
|
||||
throw invalid_tuple_data_type();
|
||||
}
|
||||
|
@ -287,6 +301,21 @@ int64_t Tuple::getInt(size_t index, bool allow_incomplete) const {
|
|||
}
|
||||
|
||||
// TODO: Combine with bindings/flow/Tuple.*. This code is copied from there.
|
||||
bool Tuple::getBool(size_t index) const {
|
||||
if (index >= offsets.size()) {
|
||||
throw invalid_tuple_index();
|
||||
}
|
||||
ASSERT_LT(offsets[index], data.size());
|
||||
uint8_t code = data[offsets[index]];
|
||||
if (code == 0x26) {
|
||||
return false;
|
||||
} else if (code == 0x27) {
|
||||
return true;
|
||||
} else {
|
||||
throw invalid_tuple_data_type();
|
||||
}
|
||||
}
|
||||
|
||||
float Tuple::getFloat(size_t index) const {
|
||||
if (index >= offsets.size()) {
|
||||
throw invalid_tuple_index();
|
||||
|
|
|
@ -40,6 +40,7 @@ struct Tuple {
|
|||
Tuple& append(int64_t);
|
||||
// There are some ambiguous append calls in fdbclient, so to make it easier
|
||||
// to add append for floats and doubles, name them differently for now.
|
||||
Tuple& appendBool(bool);
|
||||
Tuple& appendFloat(float);
|
||||
Tuple& appendDouble(double);
|
||||
Tuple& appendNull();
|
||||
|
@ -51,7 +52,7 @@ struct Tuple {
|
|||
return append(t);
|
||||
}
|
||||
|
||||
enum ElementType { NULL_TYPE, INT, BYTES, UTF8, FLOAT, DOUBLE };
|
||||
enum ElementType { NULL_TYPE, INT, BYTES, UTF8, BOOL, FLOAT, DOUBLE };
|
||||
|
||||
// this is number of elements, not length of data
|
||||
size_t size() const { return offsets.size(); }
|
||||
|
@ -59,6 +60,7 @@ struct Tuple {
|
|||
ElementType getType(size_t index) const;
|
||||
Standalone<StringRef> getString(size_t index) const;
|
||||
int64_t getInt(size_t index, bool allow_incomplete = false) const;
|
||||
bool getBool(size_t index) const;
|
||||
float getFloat(size_t index) const;
|
||||
double getDouble(size_t index) const;
|
||||
|
||||
|
|
|
@ -46,7 +46,8 @@ EvictablePage::~EvictablePage() {
|
|||
}
|
||||
}
|
||||
|
||||
std::map<std::string, OpenFileInfo> AsyncFileCached::openFiles;
|
||||
// A map of filename to the file handle for all opened cached files
|
||||
std::map<std::string, UnsafeWeakFutureReference<IAsyncFile>> AsyncFileCached::openFiles;
|
||||
|
||||
void AsyncFileCached::remove_page(AFCPage* page) {
|
||||
pages.erase(page->pageOffset);
|
||||
|
|
|
@ -132,39 +132,32 @@ struct EvictablePageCache : ReferenceCounted<EvictablePageCache> {
|
|||
const CacheEvictionType cacheEvictionType;
|
||||
};
|
||||
|
||||
struct OpenFileInfo : NonCopyable {
|
||||
IAsyncFile* f;
|
||||
Future<Reference<IAsyncFile>> opened; // Only valid until the file is fully opened
|
||||
|
||||
OpenFileInfo() : f(0) {}
|
||||
OpenFileInfo(OpenFileInfo&& r) noexcept : f(r.f), opened(std::move(r.opened)) { r.f = 0; }
|
||||
|
||||
Future<Reference<IAsyncFile>> get() {
|
||||
if (f)
|
||||
return Reference<IAsyncFile>::addRef(f);
|
||||
else
|
||||
return opened;
|
||||
}
|
||||
};
|
||||
|
||||
struct AFCPage;
|
||||
|
||||
class AsyncFileCached final : public IAsyncFile, public ReferenceCounted<AsyncFileCached> {
|
||||
friend struct AFCPage;
|
||||
|
||||
public:
|
||||
// Opens a file that uses the FDB in-memory page cache
|
||||
static Future<Reference<IAsyncFile>> open(std::string filename, int flags, int mode) {
|
||||
//TraceEvent("AsyncFileCachedOpen").detail("Filename", filename);
|
||||
if (openFiles.find(filename) == openFiles.end()) {
|
||||
auto itr = openFiles.find(filename);
|
||||
if (itr == openFiles.end()) {
|
||||
auto f = open_impl(filename, flags, mode);
|
||||
if (f.isReady() && f.isError())
|
||||
return f;
|
||||
if (!f.isReady())
|
||||
openFiles[filename].opened = f;
|
||||
else
|
||||
return f.get();
|
||||
|
||||
auto result = openFiles.try_emplace(filename, f);
|
||||
|
||||
// This should be inserting a new entry
|
||||
ASSERT(result.second);
|
||||
itr = result.first;
|
||||
|
||||
// We return here instead of falling through to the outer scope so that we don't delete all references to
|
||||
// the underlying file before returning
|
||||
return itr->second.get();
|
||||
}
|
||||
return openFiles[filename].get();
|
||||
return itr->second.get();
|
||||
}
|
||||
|
||||
Future<int> read(void* data, int length, int64_t offset) override {
|
||||
|
@ -263,7 +256,9 @@ public:
|
|||
~AsyncFileCached() override;
|
||||
|
||||
private:
|
||||
static std::map<std::string, OpenFileInfo> openFiles;
|
||||
// A map of filename to the file handle for all opened cached files
|
||||
static std::map<std::string, UnsafeWeakFutureReference<IAsyncFile>> openFiles;
|
||||
|
||||
std::string filename;
|
||||
Reference<IAsyncFile> uncached;
|
||||
int64_t length;
|
||||
|
@ -330,6 +325,7 @@ private:
|
|||
|
||||
static Future<Reference<IAsyncFile>> open_impl(std::string filename, int flags, int mode);
|
||||
|
||||
// Opens a file that uses the FDB in-memory page cache
|
||||
ACTOR static Future<Reference<IAsyncFile>> open_impl(std::string filename,
|
||||
int flags,
|
||||
int mode,
|
||||
|
@ -345,10 +341,7 @@ private:
|
|||
TraceEvent("AFCUnderlyingOpenEnd").detail("Filename", filename);
|
||||
int64_t l = wait(f->size());
|
||||
TraceEvent("AFCUnderlyingSize").detail("Filename", filename).detail("Size", l);
|
||||
auto& of = openFiles[filename];
|
||||
of.f = new AsyncFileCached(f, filename, l, pageCache);
|
||||
of.opened = Future<Reference<IAsyncFile>>();
|
||||
return Reference<IAsyncFile>(of.f);
|
||||
return new AsyncFileCached(f, filename, l, pageCache);
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_actor_cancelled)
|
||||
openFiles.erase(filename);
|
||||
|
|
|
@ -130,6 +130,9 @@ public:
|
|||
UID id;
|
||||
std::string filename;
|
||||
|
||||
// For files that use atomic write and create, they are initially created with an extra suffix
|
||||
std::string initialFilename;
|
||||
|
||||
// An approximation of the size of the file; .size() should be used instead of this variable in most cases
|
||||
mutable int64_t approximateSize;
|
||||
|
||||
|
@ -182,11 +185,13 @@ private:
|
|||
reponses; // cannot call getResult on this actor collection, since the actors will be on different processes
|
||||
|
||||
AsyncFileNonDurable(const std::string& filename,
|
||||
const std::string& initialFilename,
|
||||
Reference<IAsyncFile> file,
|
||||
Reference<DiskParameters> diskParameters,
|
||||
NetworkAddress openedAddress,
|
||||
bool aio)
|
||||
: openedAddress(openedAddress), pendingModifications(uint64_t(-1)), approximateSize(0), reponses(false),
|
||||
: filename(filename), initialFilename(initialFilename), file(file), diskParameters(diskParameters),
|
||||
openedAddress(openedAddress), pendingModifications(uint64_t(-1)), approximateSize(0), reponses(false),
|
||||
aio(aio) {
|
||||
|
||||
// This is only designed to work in simulation
|
||||
|
@ -194,9 +199,6 @@ private:
|
|||
this->id = deterministicRandom()->randomUniqueID();
|
||||
|
||||
//TraceEvent("AsyncFileNonDurable_Create", id).detail("Filename", filename);
|
||||
this->file = file;
|
||||
this->filename = filename;
|
||||
this->diskParameters = diskParameters;
|
||||
maxWriteDelay = FLOW_KNOBS->NON_DURABLE_MAX_WRITE_DELAY;
|
||||
hasBeenSynced = false;
|
||||
|
||||
|
@ -236,10 +238,11 @@ public:
|
|||
//TraceEvent("AsyncFileNonDurableOpenWaitOnDelete2").detail("Filename", filename);
|
||||
if (shutdown.isReady())
|
||||
throw io_error().asInjectedFault();
|
||||
wait(g_simulator.onProcess(currentProcess, currentTaskID));
|
||||
}
|
||||
|
||||
state Reference<AsyncFileNonDurable> nonDurableFile(
|
||||
new AsyncFileNonDurable(filename, file, diskParameters, currentProcess->address, aio));
|
||||
new AsyncFileNonDurable(filename, actualFilename, file, diskParameters, currentProcess->address, aio));
|
||||
|
||||
// Causes the approximateSize member to be set
|
||||
state Future<int64_t> sizeFuture = nonDurableFile->size();
|
||||
|
@ -269,13 +272,38 @@ public:
|
|||
}
|
||||
|
||||
void addref() override { ReferenceCounted<AsyncFileNonDurable>::addref(); }
|
||||
|
||||
void delref() override {
|
||||
if (delref_no_destroy()) {
|
||||
ASSERT(filesBeingDeleted.count(filename) == 0);
|
||||
//TraceEvent("AsyncFileNonDurable_StartDelete", id).detail("Filename", filename);
|
||||
Future<Void> deleteFuture = deleteFile(this);
|
||||
if (!deleteFuture.isReady())
|
||||
filesBeingDeleted[filename] = deleteFuture;
|
||||
if (filesBeingDeleted.count(filename) == 0) {
|
||||
//TraceEvent("AsyncFileNonDurable_StartDelete", id).detail("Filename", filename);
|
||||
Future<Void> deleteFuture = deleteFile(this);
|
||||
if (!deleteFuture.isReady())
|
||||
filesBeingDeleted[filename] = deleteFuture;
|
||||
}
|
||||
|
||||
removeOpenFile(filename, this);
|
||||
if (initialFilename != filename) {
|
||||
removeOpenFile(initialFilename, this);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Removes a file from the openFiles map
|
||||
static void removeOpenFile(std::string filename, AsyncFileNonDurable* file) {
|
||||
auto& openFiles = g_simulator.getCurrentProcess()->machine->openFiles;
|
||||
|
||||
auto iter = openFiles.find(filename);
|
||||
|
||||
// Various actions (e.g. simulated delete) can remove a file from openFiles prematurely, so it may already
|
||||
// be gone. Renamed files (from atomic write and create) will also be present under only one of the two
|
||||
// names.
|
||||
if (iter != openFiles.end()) {
|
||||
// even if the filename exists, it doesn't mean that it references the same file. It could be that the
|
||||
// file was renamed and later a file with the same name was opened.
|
||||
if (iter->second.getPtrIfReady().orDefault(nullptr) == file) {
|
||||
openFiles.erase(iter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -832,11 +860,9 @@ private:
|
|||
//TraceEvent("AsyncFileNonDurable_FinishDelete", self->id).detail("Filename", self->filename);
|
||||
|
||||
delete self;
|
||||
wait(g_simulator.onProcess(currentProcess, currentTaskID));
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
state Error err = e;
|
||||
wait(g_simulator.onProcess(currentProcess, currentTaskID));
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,7 +29,8 @@ set(FDBRPC_SRCS
|
|||
sim2.actor.cpp
|
||||
sim_validation.cpp
|
||||
TimedRequest.h
|
||||
TraceFileIO.cpp)
|
||||
TraceFileIO.cpp
|
||||
TSSComparison.h)
|
||||
|
||||
set(COMPILE_EIO OFF)
|
||||
|
||||
|
|
|
@ -51,6 +51,8 @@ constexpr UID WLTOKEN_PING_PACKET(-1, 1);
|
|||
constexpr int PACKET_LEN_WIDTH = sizeof(uint32_t);
|
||||
const uint64_t TOKEN_STREAM_FLAG = 1;
|
||||
|
||||
const int WLTOKEN_COUNTS = 12; // number of wellKnownEndpoints
|
||||
|
||||
class EndpointMap : NonCopyable {
|
||||
public:
|
||||
// Reserve space for this many wellKnownEndpoints
|
||||
|
@ -96,6 +98,7 @@ void EndpointMap::realloc() {
|
|||
|
||||
void EndpointMap::insertWellKnown(NetworkMessageReceiver* r, const Endpoint::Token& token, TaskPriority priority) {
|
||||
int index = token.second();
|
||||
ASSERT(index <= WLTOKEN_COUNTS);
|
||||
ASSERT(data[index].receiver == nullptr);
|
||||
data[index].receiver = r;
|
||||
data[index].token() =
|
||||
|
@ -334,7 +337,7 @@ ACTOR Future<Void> pingLatencyLogger(TransportData* self) {
|
|||
}
|
||||
|
||||
TransportData::TransportData(uint64_t transportId)
|
||||
: endpoints(/*wellKnownTokenCount*/ 11), endpointNotFoundReceiver(endpoints), pingReceiver(endpoints),
|
||||
: endpoints(WLTOKEN_COUNTS), endpointNotFoundReceiver(endpoints), pingReceiver(endpoints),
|
||||
warnAlwaysForLargePacket(true), lastIncompatibleMessage(0), transportId(transportId),
|
||||
numIncompatibleConnections(0) {
|
||||
degraded = makeReference<AsyncVar<bool>>(false);
|
||||
|
@ -1215,7 +1218,7 @@ ACTOR static Future<Void> connectionReader(TransportData* transport,
|
|||
}
|
||||
compatible = false;
|
||||
if (!protocolVersion.hasInexpensiveMultiVersionClient()) {
|
||||
if(peer) {
|
||||
if (peer) {
|
||||
peer->protocolVersion->set(protocolVersion);
|
||||
}
|
||||
|
||||
|
|
|
@ -36,6 +36,8 @@
|
|||
#include "fdbrpc/Locality.h"
|
||||
#include "fdbrpc/QueueModel.h"
|
||||
#include "fdbrpc/MultiInterface.h"
|
||||
#include "fdbrpc/simulator.h" // for checking tss simulation mode
|
||||
#include "fdbrpc/TSSComparison.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
using std::vector;
|
||||
|
@ -75,6 +77,97 @@ struct LoadBalancedReply {
|
|||
Optional<LoadBalancedReply> getLoadBalancedReply(const LoadBalancedReply* reply);
|
||||
Optional<LoadBalancedReply> getLoadBalancedReply(const void*);
|
||||
|
||||
ACTOR template <class Req, class Resp>
|
||||
Future<Void> tssComparison(Req req,
|
||||
Future<ErrorOr<Resp>> fSource,
|
||||
Future<ErrorOr<Resp>> fTss,
|
||||
TSSEndpointData tssData) {
|
||||
state double startTime = now();
|
||||
state Future<Optional<ErrorOr<Resp>>> fTssWithTimeout = timeout(fTss, FLOW_KNOBS->LOAD_BALANCE_TSS_TIMEOUT);
|
||||
state int finished = 0;
|
||||
state double srcEndTime;
|
||||
state double tssEndTime;
|
||||
|
||||
loop {
|
||||
choose {
|
||||
when(state ErrorOr<Resp> src = wait(fSource)) {
|
||||
srcEndTime = now();
|
||||
fSource = Never();
|
||||
finished++;
|
||||
if (finished == 2) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
when(state Optional<ErrorOr<Resp>> tss = wait(fTssWithTimeout)) {
|
||||
tssEndTime = now();
|
||||
fTssWithTimeout = Never();
|
||||
finished++;
|
||||
if (finished == 2) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// we want to record ss/tss errors to metrics
|
||||
int srcErrorCode = error_code_success;
|
||||
int tssErrorCode = error_code_success;
|
||||
|
||||
++tssData.metrics->requests;
|
||||
|
||||
if (src.isError()) {
|
||||
srcErrorCode = src.getError().code();
|
||||
tssData.metrics->ssError(srcErrorCode);
|
||||
}
|
||||
if (!tss.present()) {
|
||||
++tssData.metrics->tssTimeouts;
|
||||
} else if (tss.get().isError()) {
|
||||
tssErrorCode = tss.get().getError().code();
|
||||
tssData.metrics->tssError(tssErrorCode);
|
||||
}
|
||||
if (!src.isError() && tss.present() && !tss.get().isError()) {
|
||||
Optional<LoadBalancedReply> srcLB = getLoadBalancedReply(&src.get());
|
||||
Optional<LoadBalancedReply> tssLB = getLoadBalancedReply(&tss.get().get());
|
||||
ASSERT(srcLB.present() ==
|
||||
tssLB.present()); // getLoadBalancedReply returned different responses for same templated type
|
||||
|
||||
// if Resp is a LoadBalancedReply, only compare if both replies are non-error
|
||||
if (!srcLB.present() || (!srcLB.get().error.present() && !tssLB.get().error.present())) {
|
||||
// only record latency difference if both requests actually succeeded, so that we're comparing apples to
|
||||
// apples
|
||||
tssData.metrics->recordLatency(req, srcEndTime - startTime, tssEndTime - startTime);
|
||||
|
||||
// expect mismatches in drop mutations mode.
|
||||
Severity traceSeverity =
|
||||
(g_network->isSimulated() && g_simulator.tssMode == ISimulator::TSSMode::EnabledDropMutations)
|
||||
? SevWarnAlways
|
||||
: SevError;
|
||||
|
||||
if (!TSS_doCompare(req, src.get(), tss.get().get(), traceSeverity, tssData.tssId)) {
|
||||
TEST(true); // TSS Mismatch
|
||||
++tssData.metrics->mismatches;
|
||||
}
|
||||
} else if (tssLB.present() && tssLB.get().error.present()) {
|
||||
tssErrorCode = tssLB.get().error.get().code();
|
||||
tssData.metrics->tssError(tssErrorCode);
|
||||
} else if (srcLB.present() && srcLB.get().error.present()) {
|
||||
srcErrorCode = srcLB.get().error.get().code();
|
||||
tssData.metrics->ssError(srcErrorCode);
|
||||
}
|
||||
}
|
||||
|
||||
if (srcErrorCode != error_code_success && tssErrorCode != error_code_success && srcErrorCode != tssErrorCode) {
|
||||
// if ss and tss both got different errors, record them
|
||||
TraceEvent("TSSErrorMismatch")
|
||||
.suppressFor(1.0)
|
||||
.detail("TSSID", tssData.tssId)
|
||||
.detail("SSError", srcErrorCode)
|
||||
.detail("TSSError", tssErrorCode);
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
// Stores state for a request made by the load balancer
|
||||
template <class Request>
|
||||
struct RequestData : NonCopyable {
|
||||
|
@ -91,11 +184,30 @@ struct RequestData : NonCopyable {
|
|||
// This is true once setupRequest is called, even though at that point the response is Never().
|
||||
bool isValid() { return response.isValid(); }
|
||||
|
||||
static void maybeDuplicateTSSRequest(RequestStream<Request> const* stream,
|
||||
Request& request,
|
||||
QueueModel* model,
|
||||
Future<Reply> ssResponse) {
|
||||
if (model) {
|
||||
// Send parallel request to TSS pair, if it exists
|
||||
Optional<TSSEndpointData> tssData = model->getTssData(stream->getEndpoint().token.first());
|
||||
|
||||
if (tssData.present()) {
|
||||
TEST(true); // duplicating request to TSS
|
||||
resetReply(request);
|
||||
// FIXME: optimize to avoid creating new netNotifiedQueue for each message
|
||||
RequestStream<Request> tssRequestStream(tssData.get().endpoint);
|
||||
Future<ErrorOr<REPLY_TYPE(Request)>> fTssResult = tssRequestStream.tryGetReply(request);
|
||||
model->addActor.send(tssComparison(request, ssResponse, fTssResult, tssData.get()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Initializes the request state and starts it, possibly after a backoff delay
|
||||
void startRequest(double backoff,
|
||||
bool triedAllOptions,
|
||||
RequestStream<Request> const* stream,
|
||||
Request const& request,
|
||||
Request& request,
|
||||
QueueModel* model) {
|
||||
modelHolder = Reference<ModelHolder>();
|
||||
requestStarted = false;
|
||||
|
@ -105,12 +217,15 @@ struct RequestData : NonCopyable {
|
|||
delay(backoff), [this, stream, &request, model](Void _) {
|
||||
requestStarted = true;
|
||||
modelHolder = Reference<ModelHolder>(new ModelHolder(model, stream->getEndpoint().token.first()));
|
||||
return stream->tryGetReply(request);
|
||||
Future<Reply> resp = stream->tryGetReply(request);
|
||||
maybeDuplicateTSSRequest(stream, request, model, resp);
|
||||
return resp;
|
||||
});
|
||||
} else {
|
||||
requestStarted = true;
|
||||
modelHolder = Reference<ModelHolder>(new ModelHolder(model, stream->getEndpoint().token.first()));
|
||||
response = stream->tryGetReply(request);
|
||||
maybeDuplicateTSSRequest(stream, request, model, response);
|
||||
}
|
||||
|
||||
requestProcessed = false;
|
||||
|
|
|
@ -60,6 +60,20 @@ double QueueModel::addRequest(uint64_t id) {
|
|||
return d.penalty;
|
||||
}
|
||||
|
||||
void QueueModel::updateTssEndpoint(uint64_t endpointId, const TSSEndpointData& tssData) {
|
||||
auto& d = data[endpointId];
|
||||
d.tssData = tssData;
|
||||
}
|
||||
|
||||
void QueueModel::removeTssEndpoint(uint64_t endpointId) {
|
||||
auto& d = data[endpointId];
|
||||
d.tssData = Optional<TSSEndpointData>();
|
||||
}
|
||||
|
||||
Optional<TSSEndpointData> QueueModel::getTssData(uint64_t id) {
|
||||
return data[id].tssData;
|
||||
}
|
||||
|
||||
Optional<LoadBalancedReply> getLoadBalancedReply(const LoadBalancedReply* reply) {
|
||||
return *reply;
|
||||
}
|
||||
|
|
|
@ -26,6 +26,17 @@
|
|||
#include "fdbrpc/Smoother.h"
|
||||
#include "flow/Knobs.h"
|
||||
#include "flow/ActorCollection.h"
|
||||
#include "fdbrpc/TSSComparison.h" // For TSS Metrics
|
||||
#include "fdbrpc/FlowTransport.h" // For Endpoint
|
||||
|
||||
struct TSSEndpointData {
|
||||
UID tssId;
|
||||
Endpoint endpoint;
|
||||
Reference<TSSMetrics> metrics;
|
||||
|
||||
TSSEndpointData(UID tssId, Endpoint endpoint, Reference<TSSMetrics> metrics)
|
||||
: tssId(tssId), endpoint(endpoint), metrics(metrics) {}
|
||||
};
|
||||
|
||||
// The data structure used for the client-side load balancing algorithm to
|
||||
// decide which storage server to read data from. Conceptually, it tracks the
|
||||
|
@ -59,6 +70,10 @@ struct QueueData {
|
|||
// hasn't returned a valid result, increase above `futureVersionBackoff`
|
||||
// to increase the future backoff amount.
|
||||
double increaseBackoffTime;
|
||||
|
||||
// a bit of a hack to store this here, but it's the only centralized place for per-endpoint tracking
|
||||
Optional<TSSEndpointData> tssData;
|
||||
|
||||
QueueData()
|
||||
: latency(0.001), penalty(1.0), smoothOutstanding(FLOW_KNOBS->QUEUE_MODEL_SMOOTHING_AMOUNT), failedUntil(0),
|
||||
futureVersionBackoff(FLOW_KNOBS->FUTURE_VERSION_INITIAL_BACKOFF), increaseBackoffTime(0) {}
|
||||
|
@ -89,13 +104,29 @@ public:
|
|||
double secondBudget;
|
||||
PromiseStream<Future<Void>> addActor;
|
||||
Future<Void> laggingRequests; // requests for which a different recipient already answered
|
||||
PromiseStream<Future<Void>> addTSSActor;
|
||||
Future<Void> tssComparisons; // requests for which a different recipient already answered
|
||||
int laggingRequestCount;
|
||||
int laggingTSSCompareCount;
|
||||
|
||||
// Updates this endpoint data to duplicate requests to the specified TSS endpoint
|
||||
void updateTssEndpoint(uint64_t endpointId, const TSSEndpointData& endpointData);
|
||||
|
||||
// Removes the TSS mapping from this endpoint to stop duplicating requests to a TSS endpoint
|
||||
void removeTssEndpoint(uint64_t endpointId);
|
||||
|
||||
// Retrieves the data for this endpoint's pair TSS endpoint, if present
|
||||
Optional<TSSEndpointData> getTssData(uint64_t endpointId);
|
||||
|
||||
QueueModel() : secondMultiplier(1.0), secondBudget(0), laggingRequestCount(0) {
|
||||
laggingRequests = actorCollection(addActor.getFuture(), &laggingRequestCount);
|
||||
tssComparisons = actorCollection(addTSSActor.getFuture(), &laggingTSSCompareCount);
|
||||
}
|
||||
|
||||
~QueueModel() { laggingRequests.cancel(); }
|
||||
~QueueModel() {
|
||||
laggingRequests.cancel();
|
||||
tssComparisons.cancel();
|
||||
}
|
||||
|
||||
private:
|
||||
std::unordered_map<uint64_t, QueueData> data;
|
||||
|
@ -121,4 +152,4 @@ private:
|
|||
};
|
||||
*/
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,89 @@
|
|||
/*
|
||||
* TSSComparison.h
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This header is to declare the tss comparison function that LoadBalance.Actor.h needs to be aware of to call,
|
||||
* But StorageServerInterface.h needs to implement on the types defined in SSI.h.
|
||||
*/
|
||||
#ifndef FDBRPC_TSS_COMPARISON_H
|
||||
#define FDBRPC_TSS_COMPARISON_H
|
||||
|
||||
#include "fdbrpc/ContinuousSample.h"
|
||||
#include "fdbrpc/Stats.h"
|
||||
|
||||
// refcounted + noncopyable because both DatabaseContext and individual endpoints share ownership
|
||||
struct TSSMetrics : ReferenceCounted<TSSMetrics>, NonCopyable {
|
||||
CounterCollection cc;
|
||||
Counter requests;
|
||||
Counter ssErrors;
|
||||
Counter tssErrors;
|
||||
Counter tssTimeouts;
|
||||
Counter mismatches;
|
||||
|
||||
// We could probably just ignore getKey as it's seldom used?
|
||||
ContinuousSample<double> SSgetValueLatency;
|
||||
ContinuousSample<double> SSgetKeyLatency;
|
||||
ContinuousSample<double> SSgetKeyValuesLatency;
|
||||
|
||||
ContinuousSample<double> TSSgetValueLatency;
|
||||
ContinuousSample<double> TSSgetKeyLatency;
|
||||
ContinuousSample<double> TSSgetKeyValuesLatency;
|
||||
|
||||
std::unordered_map<int, uint64_t> ssErrorsByCode;
|
||||
std::unordered_map<int, uint64_t> tssErrorsByCode;
|
||||
|
||||
void ssError(int code) {
|
||||
++ssErrors;
|
||||
ssErrorsByCode[code]++;
|
||||
}
|
||||
|
||||
void tssError(int code) {
|
||||
++tssErrors;
|
||||
tssErrorsByCode[code]++;
|
||||
}
|
||||
|
||||
template <class Req>
|
||||
void recordLatency(const Req& req, double ssLatency, double tssLatency);
|
||||
|
||||
void clear() {
|
||||
SSgetValueLatency.clear();
|
||||
SSgetKeyLatency.clear();
|
||||
SSgetKeyValuesLatency.clear();
|
||||
|
||||
TSSgetValueLatency.clear();
|
||||
TSSgetKeyLatency.clear();
|
||||
TSSgetKeyValuesLatency.clear();
|
||||
|
||||
tssErrorsByCode.clear();
|
||||
ssErrorsByCode.clear();
|
||||
}
|
||||
|
||||
TSSMetrics()
|
||||
: cc("TSSClientMetrics"), requests("Requests", cc), ssErrors("SSErrors", cc), tssErrors("TSSErrors", cc),
|
||||
tssTimeouts("TSSTimeouts", cc), mismatches("Mismatches", cc), SSgetValueLatency(1000), SSgetKeyLatency(1000),
|
||||
SSgetKeyValuesLatency(1000), TSSgetValueLatency(1000), TSSgetKeyLatency(1000), TSSgetKeyValuesLatency(1000) {}
|
||||
};
|
||||
|
||||
// part of the contract of this function is that if there is a mismatch, the implementation needs to record a trace
|
||||
// event with the specified severity and tssId in the event.
|
||||
template <class Req, class Rep>
|
||||
bool TSS_doCompare(const Req& req, const Rep& src, const Rep& tss, Severity traceSeverity, UID tssId);
|
||||
|
||||
#endif
|
|
@ -630,7 +630,14 @@ void showArena(ArenaBlock* a, ArenaBlock* parent) {
|
|||
int o = a->nextBlockOffset;
|
||||
while (o) {
|
||||
ArenaBlockRef* r = (ArenaBlockRef*)((char*)a->getData() + o);
|
||||
showArena(r->next, a);
|
||||
|
||||
// If alignedBuffer is valid then print its pointer and size, else recurse
|
||||
if (r->aligned4kBufferSize != 0) {
|
||||
printf("AlignedBuffer %p (<-%p) %u bytes\n", r->aligned4kBuffer, a, r->aligned4kBufferSize);
|
||||
} else {
|
||||
showArena(r->next, a);
|
||||
}
|
||||
|
||||
o = r->nextBlockOffset;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -536,7 +536,10 @@ public:
|
|||
|
||||
std::string getFilename() const override { return actualFilename; }
|
||||
|
||||
~SimpleFile() override { _close(h); }
|
||||
~SimpleFile() override {
|
||||
_close(h);
|
||||
--openCount;
|
||||
}
|
||||
|
||||
private:
|
||||
int h;
|
||||
|
@ -1015,8 +1018,8 @@ public:
|
|||
|
||||
// Get the size of all files we've created on the server and subtract them from the free space
|
||||
for (auto file = proc->machine->openFiles.begin(); file != proc->machine->openFiles.end(); ++file) {
|
||||
if (file->second.isReady()) {
|
||||
totalFileSize += ((AsyncFileNonDurable*)file->second.get().getPtr())->approximateSize;
|
||||
if (file->second.get().isReady()) {
|
||||
totalFileSize += ((AsyncFileNonDurable*)file->second.get().get().getPtr())->approximateSize;
|
||||
}
|
||||
numFiles++;
|
||||
}
|
||||
|
@ -2440,7 +2443,7 @@ Future<Reference<class IAsyncFile>> Sim2FileSystem::open(const std::string& file
|
|||
actualFilename = filename + ".part";
|
||||
auto partFile = machineCache.find(actualFilename);
|
||||
if (partFile != machineCache.end()) {
|
||||
Future<Reference<IAsyncFile>> f = AsyncFileDetachable::open(partFile->second);
|
||||
Future<Reference<IAsyncFile>> f = AsyncFileDetachable::open(partFile->second.get());
|
||||
if (FLOW_KNOBS->PAGE_WRITE_CHECKSUM_HISTORY > 0)
|
||||
f = map(f, [=](Reference<IAsyncFile> r) {
|
||||
return Reference<IAsyncFile>(new AsyncFileWriteChecker(r));
|
||||
|
@ -2448,19 +2451,26 @@ Future<Reference<class IAsyncFile>> Sim2FileSystem::open(const std::string& file
|
|||
return f;
|
||||
}
|
||||
}
|
||||
if (machineCache.find(actualFilename) == machineCache.end()) {
|
||||
|
||||
Future<Reference<IAsyncFile>> f;
|
||||
auto itr = machineCache.find(actualFilename);
|
||||
if (itr == machineCache.end()) {
|
||||
// Simulated disk parameters are shared by the AsyncFileNonDurable and the underlying SimpleFile.
|
||||
// This way, they can both keep up with the time to start the next operation
|
||||
auto diskParameters =
|
||||
makeReference<DiskParameters>(FLOW_KNOBS->SIM_DISK_IOPS, FLOW_KNOBS->SIM_DISK_BANDWIDTH);
|
||||
machineCache[actualFilename] =
|
||||
AsyncFileNonDurable::open(filename,
|
||||
f = AsyncFileNonDurable::open(filename,
|
||||
actualFilename,
|
||||
SimpleFile::open(filename, flags, mode, diskParameters, false),
|
||||
diskParameters,
|
||||
(flags & IAsyncFile::OPEN_NO_AIO) == 0);
|
||||
|
||||
machineCache[actualFilename] = UnsafeWeakFutureReference<IAsyncFile>(f);
|
||||
} else {
|
||||
f = itr->second.get();
|
||||
}
|
||||
Future<Reference<IAsyncFile>> f = AsyncFileDetachable::open(machineCache[actualFilename]);
|
||||
|
||||
f = AsyncFileDetachable::open(f);
|
||||
if (FLOW_KNOBS->PAGE_WRITE_CHECKSUM_HISTORY > 0)
|
||||
f = map(f, [=](Reference<IAsyncFile> r) { return Reference<IAsyncFile>(new AsyncFileWriteChecker(r)); });
|
||||
return f;
|
||||
|
|
|
@ -41,7 +41,7 @@ public:
|
|||
: desiredCoordinators(1), physicalDatacenters(1), processesPerMachine(0), listenersPerProcess(1),
|
||||
isStopped(false), lastConnectionFailure(0), connectionFailuresDisableDuration(0), speedUpSimulation(false),
|
||||
allSwapsDisabled(false), backupAgents(BackupAgentType::WaitForType), drAgents(BackupAgentType::WaitForType),
|
||||
extraDB(nullptr), allowLogSetKills(true), usableRegions(1) {}
|
||||
extraDB(nullptr), allowLogSetKills(true), usableRegions(1), tssMode(TSSMode::Disabled) {}
|
||||
|
||||
// Order matters!
|
||||
enum KillType {
|
||||
|
@ -55,6 +55,9 @@ public:
|
|||
None
|
||||
};
|
||||
|
||||
// Order matters! all modes >= 2 are fault injection modes
|
||||
enum TSSMode { Disabled, EnabledNormal, EnabledAddDelay, EnabledDropMutations };
|
||||
|
||||
enum class BackupAgentType { NoBackupAgents, WaitForType, BackupToFile, BackupToDB };
|
||||
|
||||
// Subclasses may subclass ProcessInfo as well
|
||||
|
@ -188,10 +191,14 @@ public:
|
|||
Promise<KillType> shutdownSignal;
|
||||
};
|
||||
|
||||
// A set of data associated with a simulated machine
|
||||
struct MachineInfo {
|
||||
ProcessInfo* machineProcess;
|
||||
std::vector<ProcessInfo*> processes;
|
||||
std::map<std::string, Future<Reference<IAsyncFile>>> openFiles;
|
||||
|
||||
// A map from filename to file handle for all open files on a machine
|
||||
std::map<std::string, UnsafeWeakFutureReference<IAsyncFile>> openFiles;
|
||||
|
||||
std::set<std::string> deletingFiles;
|
||||
std::set<std::string> closingFiles;
|
||||
Optional<Standalone<StringRef>> machineId;
|
||||
|
@ -401,6 +408,7 @@ public:
|
|||
int32_t satelliteTLogWriteAntiQuorumFallback;
|
||||
std::vector<Optional<Standalone<StringRef>>> primarySatelliteDcIds;
|
||||
std::vector<Optional<Standalone<StringRef>>> remoteSatelliteDcIds;
|
||||
TSSMode tssMode;
|
||||
|
||||
// Used by workloads that perform reconfigurations
|
||||
int testerCount;
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
*/
|
||||
|
||||
#include "fdbclient/MutationList.h"
|
||||
#include "fdbclient/KeyBackedTypes.h" // for key backed map codecs for tss mapping
|
||||
#include "fdbclient/SystemData.h"
|
||||
#include "fdbclient/BackupAgent.actor.h"
|
||||
#include "fdbclient/Notified.h"
|
||||
|
@ -64,10 +65,19 @@ void applyMetadataMutations(SpanID const& spanContext,
|
|||
NotifiedVersion* commitVersion,
|
||||
std::map<UID, Reference<StorageInfo>>* storageCache,
|
||||
std::map<Tag, Version>* tag_popped,
|
||||
std::unordered_map<UID, StorageServerInterface>* tssMapping,
|
||||
bool initialCommit) {
|
||||
// std::map<keyRef, vector<uint16_t>> cacheRangeInfo;
|
||||
std::map<KeyRef, MutationRef> cachedRangeInfo;
|
||||
|
||||
// Testing Storage Server removal (clearing serverTagKey) needs to read tss server list value to determine it is a
|
||||
// tss + find partner's tag to send the private mutation. Since the removeStorageServer transaction clears both the
|
||||
// storage list and server tag, we have to enforce ordering, proccessing the server tag first, and postpone the
|
||||
// server list clear until the end;
|
||||
// Similarly, the TSS mapping change key needs to read the server list at the end of the commit
|
||||
std::vector<KeyRangeRef> tssServerListToRemove;
|
||||
std::vector<std::pair<UID, UID>> tssMappingToAdd;
|
||||
|
||||
for (auto const& m : mutations) {
|
||||
//TraceEvent("MetadataMutation", dbgid).detail("M", m.toString());
|
||||
if (toCommit) {
|
||||
|
@ -95,12 +105,14 @@ void applyMetadataMutations(SpanID const& spanContext,
|
|||
|
||||
for (const auto& id : src) {
|
||||
auto storageInfo = getStorageInfo(id, storageCache, txnStateStore);
|
||||
ASSERT(!storageInfo->interf.isTss());
|
||||
ASSERT(storageInfo->tag != invalidTag);
|
||||
info.tags.push_back(storageInfo->tag);
|
||||
info.src_info.push_back(storageInfo);
|
||||
}
|
||||
for (const auto& id : dest) {
|
||||
auto storageInfo = getStorageInfo(id, storageCache, txnStateStore);
|
||||
ASSERT(!storageInfo->interf.isTss());
|
||||
ASSERT(storageInfo->tag != invalidTag);
|
||||
info.tags.push_back(storageInfo->tag);
|
||||
info.dest_info.push_back(storageInfo);
|
||||
|
@ -113,6 +125,8 @@ void applyMetadataMutations(SpanID const& spanContext,
|
|||
txnStateStore->set(KeyValueRef(m.param1, m.param2));
|
||||
} else if (m.param1.startsWith(serverKeysPrefix)) {
|
||||
if (toCommit) {
|
||||
Tag tag = decodeServerTagValue(
|
||||
txnStateStore->readValue(serverTagKeyFor(serverKeysDecodeServer(m.param1))).get().get());
|
||||
MutationRef privatized = m;
|
||||
privatized.param1 = m.param1.withPrefix(systemKeys.begin, arena);
|
||||
TraceEvent(SevDebug, "SendingPrivateMutation", dbgid)
|
||||
|
@ -120,14 +134,9 @@ void applyMetadataMutations(SpanID const& spanContext,
|
|||
.detail("Privatized", privatized.toString())
|
||||
.detail("Server", serverKeysDecodeServer(m.param1))
|
||||
.detail("TagKey", serverTagKeyFor(serverKeysDecodeServer(m.param1)))
|
||||
.detail(
|
||||
"Tag",
|
||||
decodeServerTagValue(
|
||||
txnStateStore->readValue(serverTagKeyFor(serverKeysDecodeServer(m.param1))).get().get())
|
||||
.toString());
|
||||
.detail("Tag", tag.toString());
|
||||
|
||||
toCommit->addTag(decodeServerTagValue(
|
||||
txnStateStore->readValue(serverTagKeyFor(serverKeysDecodeServer(m.param1))).get().get()));
|
||||
toCommit->addTag(tag);
|
||||
toCommit->writeTypedMessage(privatized);
|
||||
}
|
||||
} else if (m.param1.startsWith(serverTagPrefix)) {
|
||||
|
@ -235,6 +244,29 @@ void applyMetadataMutations(SpanID const& spanContext,
|
|||
}
|
||||
}
|
||||
}
|
||||
} else if (m.param1.startsWith(tssMappingKeys.begin)) {
|
||||
if (!initialCommit) {
|
||||
txnStateStore->set(KeyValueRef(m.param1, m.param2));
|
||||
if (tssMapping) {
|
||||
// Normally uses key backed map, so have to use same unpacking code here.
|
||||
UID ssId = Codec<UID>::unpack(Tuple::unpack(m.param1.removePrefix(tssMappingKeys.begin)));
|
||||
UID tssId = Codec<UID>::unpack(Tuple::unpack(m.param2));
|
||||
|
||||
tssMappingToAdd.push_back(std::pair(ssId, tssId));
|
||||
|
||||
// send private mutation to SS that it now has a TSS pair
|
||||
if (toCommit) {
|
||||
MutationRef privatized = m;
|
||||
privatized.param1 = m.param1.withPrefix(systemKeys.begin, arena);
|
||||
|
||||
Optional<Value> tagV = txnStateStore->readValue(serverTagKeyFor(ssId)).get();
|
||||
if (tagV.present()) {
|
||||
toCommit->addTag(decodeServerTagValue(tagV.get()));
|
||||
toCommit->writeTypedMessage(privatized);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (m.param1 == databaseLockedKey || m.param1 == metadataVersionKey ||
|
||||
m.param1 == mustContainSystemMutationsKey ||
|
||||
m.param1.startsWith(applyMutationsBeginRange.begin) ||
|
||||
|
@ -379,8 +411,20 @@ void applyMetadataMutations(SpanID const& spanContext,
|
|||
}
|
||||
}
|
||||
if (serverListKeys.intersects(range)) {
|
||||
if (!initialCommit)
|
||||
txnStateStore->clear(range & serverListKeys);
|
||||
if (!initialCommit) {
|
||||
KeyRangeRef rangeToClear = range & serverListKeys;
|
||||
if (rangeToClear.singleKeyRange()) {
|
||||
UID id = decodeServerListKey(rangeToClear.begin);
|
||||
Optional<Value> ssiV = txnStateStore->readValue(serverListKeyFor(id)).get();
|
||||
if (ssiV.present() && decodeServerListValue(ssiV.get()).isTss()) {
|
||||
tssServerListToRemove.push_back(rangeToClear);
|
||||
} else {
|
||||
txnStateStore->clear(rangeToClear);
|
||||
}
|
||||
} else {
|
||||
txnStateStore->clear(rangeToClear);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (tagLocalityListKeys.intersects(range)) {
|
||||
if (!initialCommit)
|
||||
|
@ -411,6 +455,32 @@ void applyMetadataMutations(SpanID const& spanContext,
|
|||
toCommit->writeTypedMessage(privatized);
|
||||
}
|
||||
}
|
||||
// Might be a tss removal, which doesn't store a tag there.
|
||||
// Chained if is a little verbose, but avoids unecessary work
|
||||
if (toCommit && !initialCommit && !serverKeysCleared.size()) {
|
||||
KeyRangeRef maybeTssRange = range & serverTagKeys;
|
||||
if (maybeTssRange.singleKeyRange()) {
|
||||
UID id = decodeServerTagKey(maybeTssRange.begin);
|
||||
Optional<Value> ssiV = txnStateStore->readValue(serverListKeyFor(id)).get();
|
||||
|
||||
if (ssiV.present()) {
|
||||
StorageServerInterface ssi = decodeServerListValue(ssiV.get());
|
||||
if (ssi.isTss()) {
|
||||
Optional<Value> tagV =
|
||||
txnStateStore->readValue(serverTagKeyFor(ssi.tssPairID.get())).get();
|
||||
if (tagV.present()) {
|
||||
MutationRef privatized = m;
|
||||
privatized.param1 = maybeTssRange.begin.withPrefix(systemKeys.begin, arena);
|
||||
privatized.param2 =
|
||||
keyAfter(maybeTssRange.begin, arena).withPrefix(systemKeys.begin, arena);
|
||||
|
||||
toCommit->addTag(decodeServerTagValue(tagV.get()));
|
||||
toCommit->writeTypedMessage(privatized);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!initialCommit) {
|
||||
KeyRangeRef clearRange = range & serverTagKeys;
|
||||
|
@ -439,6 +509,19 @@ void applyMetadataMutations(SpanID const& spanContext,
|
|||
if (!initialCommit)
|
||||
txnStateStore->clear(range & serverTagHistoryKeys);
|
||||
}
|
||||
if (tssMappingKeys.intersects(range)) {
|
||||
if (!initialCommit) {
|
||||
KeyRangeRef rangeToClear = range & tssMappingKeys;
|
||||
ASSERT(rangeToClear.singleKeyRange());
|
||||
txnStateStore->clear(rangeToClear);
|
||||
if (tssMapping) {
|
||||
// Normally uses key backed map, so have to use same unpacking code here.
|
||||
UID ssId =
|
||||
Codec<UID>::unpack(Tuple::unpack(rangeToClear.begin.removePrefix(tssMappingKeys.begin)));
|
||||
tssMapping->erase(ssId);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (range.contains(coordinatorsKey)) {
|
||||
if (!initialCommit)
|
||||
txnStateStore->clear(singleKeyRange(coordinatorsKey));
|
||||
|
@ -568,6 +651,17 @@ void applyMetadataMutations(SpanID const& spanContext,
|
|||
}
|
||||
}
|
||||
|
||||
for (KeyRangeRef& range : tssServerListToRemove) {
|
||||
txnStateStore->clear(range);
|
||||
}
|
||||
|
||||
for (auto& tssPair : tssMappingToAdd) {
|
||||
// read tss server list from txn state store and add it to tss mapping
|
||||
StorageServerInterface tssi =
|
||||
decodeServerListValue(txnStateStore->readValue(serverListKeyFor(tssPair.second)).get().get());
|
||||
(*tssMapping)[tssPair.first] = tssi;
|
||||
}
|
||||
|
||||
// If we accumulated private mutations for cached key-ranges, we also need to
|
||||
// tag them with the relevant storage servers. This is done to make the storage
|
||||
// servers aware of the cached key-ranges
|
||||
|
@ -666,6 +760,7 @@ void applyMetadataMutations(SpanID const& spanContext,
|
|||
&proxyCommitData.committedVersion,
|
||||
&proxyCommitData.storageCache,
|
||||
&proxyCommitData.tag_popped,
|
||||
&proxyCommitData.tssMapping,
|
||||
initialCommit);
|
||||
}
|
||||
|
||||
|
@ -695,5 +790,6 @@ void applyMetadataMutations(SpanID const& spanContext,
|
|||
/* commitVersion= */ nullptr,
|
||||
/* storageCache= */ nullptr,
|
||||
/* tag_popped= */ nullptr,
|
||||
/* tssMapping= */ nullptr,
|
||||
/* initialCommit= */ false);
|
||||
}
|
||||
|
|
|
@ -103,6 +103,8 @@ set(FDBSERVER_SRCS
|
|||
TesterInterface.actor.h
|
||||
TLogInterface.h
|
||||
TLogServer.actor.cpp
|
||||
TSSMappingUtil.actor.h
|
||||
TSSMappingUtil.actor.cpp
|
||||
VersionedBTree.actor.cpp
|
||||
VFSAsync.h
|
||||
VFSAsync.cpp
|
||||
|
|
|
@ -458,6 +458,38 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
// Log the reason why the worker is considered as unavailable.
|
||||
void logWorkerUnavailable(const Severity severity,
|
||||
const UID& id,
|
||||
const std::string& method,
|
||||
const std::string& reason,
|
||||
const WorkerDetails& details,
|
||||
const ProcessClass::Fitness& fitness,
|
||||
const std::set<Optional<Key>>& dcIds) {
|
||||
// Construct the list of DCs where the TLog recruitment is happening. This is mainly for logging purpose.
|
||||
std::string dcList;
|
||||
for (const auto& dc : dcIds) {
|
||||
if (!dcList.empty()) {
|
||||
dcList += ',';
|
||||
}
|
||||
dcList += printable(dc);
|
||||
}
|
||||
// Logging every possible options is a lot for every recruitment; logging all of the options with GoodFit or
|
||||
// BestFit may work because there should only be like 30 tlog class processes. Plus, the recruitment happens
|
||||
// only during initial database creation and recovery. So these trace events should be sparse.
|
||||
if (fitness == ProcessClass::GoodFit || fitness == ProcessClass::BestFit ||
|
||||
fitness == ProcessClass::NeverAssign) {
|
||||
TraceEvent(severity, "GetTLogTeamWorkerUnavailable", id)
|
||||
.detail("TLogRecruitMethod", method)
|
||||
.detail("Reason", reason)
|
||||
.detail("WorkerID", details.interf.id())
|
||||
.detail("WorkerDC", details.interf.locality.dcId())
|
||||
.detail("Address", details.interf.addresses().toString())
|
||||
.detail("Fitness", fitness)
|
||||
.detail("RecruitmentDcIds", dcList);
|
||||
}
|
||||
}
|
||||
|
||||
// A TLog recruitment method specialized for three_data_hall and three_datacenter configurations
|
||||
// It attempts to evenly recruit processes from across data_halls or datacenters
|
||||
std::vector<WorkerDetails> getWorkersForTlogsComplex(DatabaseConfiguration const& conf,
|
||||
|
@ -478,11 +510,37 @@ public:
|
|||
auto fitness = worker_details.processClass.machineClassFitness(ProcessClass::TLog);
|
||||
|
||||
if (std::find(exclusionWorkerIds.begin(), exclusionWorkerIds.end(), worker_details.interf.id()) !=
|
||||
exclusionWorkerIds.end() ||
|
||||
!workerAvailable(worker_info, checkStable) ||
|
||||
conf.isExcludedServer(worker_details.interf.addresses()) || fitness == ProcessClass::NeverAssign ||
|
||||
(!dcIds.empty() && dcIds.count(worker_details.interf.locality.dcId()) == 0) ||
|
||||
(!allowDegraded && worker_details.degraded)) {
|
||||
exclusionWorkerIds.end()) {
|
||||
logWorkerUnavailable(SevInfo, id, "complex", "Worker is excluded", worker_details, fitness, dcIds);
|
||||
continue;
|
||||
}
|
||||
if (!workerAvailable(worker_info, checkStable)) {
|
||||
logWorkerUnavailable(SevInfo, id, "complex", "Worker is not available", worker_details, fitness, dcIds);
|
||||
continue;
|
||||
}
|
||||
if (conf.isExcludedServer(worker_details.interf.addresses())) {
|
||||
logWorkerUnavailable(SevInfo,
|
||||
id,
|
||||
"complex",
|
||||
"Worker server is excluded from the cluster",
|
||||
worker_details,
|
||||
fitness,
|
||||
dcIds);
|
||||
continue;
|
||||
}
|
||||
if (fitness == ProcessClass::NeverAssign) {
|
||||
logWorkerUnavailable(
|
||||
SevDebug, id, "complex", "Worker's fitness is NeverAssign", worker_details, fitness, dcIds);
|
||||
continue;
|
||||
}
|
||||
if (!dcIds.empty() && dcIds.count(worker_details.interf.locality.dcId()) == 0) {
|
||||
logWorkerUnavailable(
|
||||
SevDebug, id, "complex", "Worker is not in the target DC", worker_details, fitness, dcIds);
|
||||
continue;
|
||||
}
|
||||
if (!allowDegraded && worker_details.degraded) {
|
||||
logWorkerUnavailable(
|
||||
SevInfo, id, "complex", "Worker is degraded and not allowed", worker_details, fitness, dcIds);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -685,11 +743,34 @@ public:
|
|||
for (const auto& [worker_process_id, worker_info] : id_worker) {
|
||||
const auto& worker_details = worker_info.details;
|
||||
auto fitness = worker_details.processClass.machineClassFitness(ProcessClass::TLog);
|
||||
|
||||
if (std::find(exclusionWorkerIds.begin(), exclusionWorkerIds.end(), worker_details.interf.id()) !=
|
||||
exclusionWorkerIds.end() ||
|
||||
!workerAvailable(worker_info, checkStable) ||
|
||||
conf.isExcludedServer(worker_details.interf.addresses()) || fitness == ProcessClass::NeverAssign ||
|
||||
(!dcIds.empty() && dcIds.count(worker_details.interf.locality.dcId()) == 0)) {
|
||||
exclusionWorkerIds.end()) {
|
||||
logWorkerUnavailable(SevInfo, id, "simple", "Worker is excluded", worker_details, fitness, dcIds);
|
||||
continue;
|
||||
}
|
||||
if (!workerAvailable(worker_info, checkStable)) {
|
||||
logWorkerUnavailable(SevInfo, id, "simple", "Worker is not available", worker_details, fitness, dcIds);
|
||||
continue;
|
||||
}
|
||||
if (conf.isExcludedServer(worker_details.interf.addresses())) {
|
||||
logWorkerUnavailable(SevInfo,
|
||||
id,
|
||||
"simple",
|
||||
"Worker server is excluded from the cluster",
|
||||
worker_details,
|
||||
fitness,
|
||||
dcIds);
|
||||
continue;
|
||||
}
|
||||
if (fitness == ProcessClass::NeverAssign) {
|
||||
logWorkerUnavailable(
|
||||
SevDebug, id, "complex", "Worker's fitness is NeverAssign", worker_details, fitness, dcIds);
|
||||
continue;
|
||||
}
|
||||
if (!dcIds.empty() && dcIds.count(worker_details.interf.locality.dcId()) == 0) {
|
||||
logWorkerUnavailable(
|
||||
SevDebug, id, "simple", "Worker is not in the target DC", worker_details, fitness, dcIds);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -794,11 +875,35 @@ public:
|
|||
for (const auto& [worker_process_id, worker_info] : id_worker) {
|
||||
const auto& worker_details = worker_info.details;
|
||||
auto fitness = worker_details.processClass.machineClassFitness(ProcessClass::TLog);
|
||||
|
||||
if (std::find(exclusionWorkerIds.begin(), exclusionWorkerIds.end(), worker_details.interf.id()) !=
|
||||
exclusionWorkerIds.end() ||
|
||||
!workerAvailable(worker_info, checkStable) ||
|
||||
conf.isExcludedServer(worker_details.interf.addresses()) || fitness == ProcessClass::NeverAssign ||
|
||||
(!dcIds.empty() && dcIds.count(worker_details.interf.locality.dcId()) == 0)) {
|
||||
exclusionWorkerIds.end()) {
|
||||
logWorkerUnavailable(SevInfo, id, "deprecated", "Worker is excluded", worker_details, fitness, dcIds);
|
||||
continue;
|
||||
}
|
||||
if (!workerAvailable(worker_info, checkStable)) {
|
||||
logWorkerUnavailable(
|
||||
SevInfo, id, "deprecated", "Worker is not available", worker_details, fitness, dcIds);
|
||||
continue;
|
||||
}
|
||||
if (conf.isExcludedServer(worker_details.interf.addresses())) {
|
||||
logWorkerUnavailable(SevInfo,
|
||||
id,
|
||||
"deprecated",
|
||||
"Worker server is excluded from the cluster",
|
||||
worker_details,
|
||||
fitness,
|
||||
dcIds);
|
||||
continue;
|
||||
}
|
||||
if (fitness == ProcessClass::NeverAssign) {
|
||||
logWorkerUnavailable(
|
||||
SevDebug, id, "complex", "Worker's fitness is NeverAssign", worker_details, fitness, dcIds);
|
||||
continue;
|
||||
}
|
||||
if (!dcIds.empty() && dcIds.count(worker_details.interf.locality.dcId()) == 0) {
|
||||
logWorkerUnavailable(
|
||||
SevDebug, id, "deprecated", "Worker is not in the target DC", worker_details, fitness, dcIds);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -3091,9 +3196,9 @@ ACTOR Future<Void> workerAvailabilityWatch(WorkerInterface worker,
|
|||
cluster->masterProcessId = Optional<Key>();
|
||||
}
|
||||
TraceEvent("ClusterControllerWorkerFailed", cluster->id)
|
||||
.detail("ProcessId", worker.locality.processId())
|
||||
.detail("ProcessClass", failedWorkerInfo.details.processClass.toString())
|
||||
.detail("Address", worker.address());
|
||||
.detail("ProcessId", worker.locality.processId())
|
||||
.detail("ProcessClass", failedWorkerInfo.details.processClass.toString())
|
||||
.detail("Address", worker.address());
|
||||
cluster->removedDBInfoEndpoints.insert(worker.updateServerDBInfo.getEndpoint());
|
||||
cluster->id_worker.erase(worker.locality.processId());
|
||||
cluster->updateWorkerList.set(worker.locality.processId(), Optional<ProcessData>());
|
||||
|
@ -3277,6 +3382,7 @@ void clusterRegisterMaster(ClusterControllerData* self, RegisterMasterRequest co
|
|||
if (db->clientInfo->get().commitProxies != req.commitProxies ||
|
||||
db->clientInfo->get().grvProxies != req.grvProxies) {
|
||||
isChanged = true;
|
||||
// TODO why construct a new one and not just copy the old one and change proxies + id?
|
||||
ClientDBInfo clientInfo;
|
||||
clientInfo.id = deterministicRandom()->randomUniqueID();
|
||||
clientInfo.commitProxies = req.commitProxies;
|
||||
|
@ -3769,7 +3875,7 @@ ACTOR Future<Void> monitorGlobalConfig(ClusterControllerData::DBInfo* db) {
|
|||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
state Optional<Value> globalConfigVersion = wait(tr.get(globalConfigVersionKey));
|
||||
state ClientDBInfo clientInfo = db->clientInfo->get();
|
||||
state ClientDBInfo clientInfo = db->serverInfo->get().client;
|
||||
|
||||
if (globalConfigVersion.present()) {
|
||||
// Since the history keys end with versionstamps, they
|
||||
|
@ -3827,6 +3933,14 @@ ACTOR Future<Void> monitorGlobalConfig(ClusterControllerData::DBInfo* db) {
|
|||
}
|
||||
|
||||
clientInfo.id = deterministicRandom()->randomUniqueID();
|
||||
// Update ServerDBInfo so fdbserver processes receive updated history.
|
||||
ServerDBInfo serverInfo = db->serverInfo->get();
|
||||
serverInfo.id = deterministicRandom()->randomUniqueID();
|
||||
serverInfo.infoGeneration = ++db->dbInfoCount;
|
||||
serverInfo.client = clientInfo;
|
||||
db->serverInfo->set(serverInfo);
|
||||
|
||||
// Update ClientDBInfo so client processes receive updated history.
|
||||
db->clientInfo->set(clientInfo);
|
||||
}
|
||||
|
||||
|
@ -4306,6 +4420,7 @@ ACTOR Future<Void> clusterControllerCore(ClusterControllerFullInterface interf,
|
|||
self.addActor.send(handleForcedRecoveries(&self, interf));
|
||||
self.addActor.send(monitorDataDistributor(&self));
|
||||
self.addActor.send(monitorRatekeeper(&self));
|
||||
// self.addActor.send(monitorTSSMapping(&self));
|
||||
self.addActor.send(dbInfoUpdater(&self));
|
||||
self.addActor.send(traceCounters("ClusterControllerMetrics",
|
||||
self.id,
|
||||
|
|
|
@ -1430,11 +1430,26 @@ ACTOR Future<Void> commitBatch(ProxyCommitData* self,
|
|||
return Void();
|
||||
}
|
||||
|
||||
// Add tss mapping data to the reply, if any of the included storage servers have a TSS pair
|
||||
void maybeAddTssMapping(GetKeyServerLocationsReply& reply,
|
||||
ProxyCommitData* commitData,
|
||||
std::unordered_set<UID>& included,
|
||||
UID ssId) {
|
||||
if (!included.count(ssId)) {
|
||||
auto mappingItr = commitData->tssMapping.find(ssId);
|
||||
if (mappingItr != commitData->tssMapping.end()) {
|
||||
included.insert(ssId);
|
||||
reply.resultsTssMapping.push_back(*mappingItr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> doKeyServerLocationRequest(GetKeyServerLocationsRequest req, ProxyCommitData* commitData) {
|
||||
// We can't respond to these requests until we have valid txnStateStore
|
||||
wait(commitData->validState.getFuture());
|
||||
wait(delay(0, TaskPriority::DefaultEndpoint));
|
||||
|
||||
std::unordered_set<UID> tssMappingsIncluded;
|
||||
GetKeyServerLocationsReply rep;
|
||||
if (!req.end.present()) {
|
||||
auto r = req.reverse ? commitData->keyInfo.rangeContainingKeyBefore(req.begin)
|
||||
|
@ -1443,6 +1458,7 @@ ACTOR static Future<Void> doKeyServerLocationRequest(GetKeyServerLocationsReques
|
|||
ssis.reserve(r.value().src_info.size());
|
||||
for (auto& it : r.value().src_info) {
|
||||
ssis.push_back(it->interf);
|
||||
maybeAddTssMapping(rep, commitData, tssMappingsIncluded, it->interf.id());
|
||||
}
|
||||
rep.results.push_back(std::make_pair(r.range(), ssis));
|
||||
} else if (!req.reverse) {
|
||||
|
@ -1454,6 +1470,7 @@ ACTOR static Future<Void> doKeyServerLocationRequest(GetKeyServerLocationsReques
|
|||
ssis.reserve(r.value().src_info.size());
|
||||
for (auto& it : r.value().src_info) {
|
||||
ssis.push_back(it->interf);
|
||||
maybeAddTssMapping(rep, commitData, tssMappingsIncluded, it->interf.id());
|
||||
}
|
||||
rep.results.push_back(std::make_pair(r.range(), ssis));
|
||||
count++;
|
||||
|
@ -1466,6 +1483,7 @@ ACTOR static Future<Void> doKeyServerLocationRequest(GetKeyServerLocationsReques
|
|||
ssis.reserve(r.value().src_info.size());
|
||||
for (auto& it : r.value().src_info) {
|
||||
ssis.push_back(it->interf);
|
||||
maybeAddTssMapping(rep, commitData, tssMappingsIncluded, it->interf.id());
|
||||
}
|
||||
rep.results.push_back(std::make_pair(r.range(), ssis));
|
||||
if (r == commitData->keyInfo.ranges().begin()) {
|
||||
|
|
|
@ -406,8 +406,8 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
|
|||
|
||||
// If the current leader's priority became worse, we still need to notified all clients because now one
|
||||
// of them might be better than the leader. In addition, even though FitnessRemote is better than
|
||||
// FitnessUnknown, we still need to notified clients so that monitorLeaderRemotely has a chance to switch
|
||||
// from passively monitoring the leader to actively attempting to become the leader.
|
||||
// FitnessUnknown, we still need to notified clients so that monitorLeaderRemotely has a chance to
|
||||
// switch from passively monitoring the leader to actively attempting to become the leader.
|
||||
if (!currentNominee.present() || !nextNominee.present() ||
|
||||
!currentNominee.get().equalInternalId(nextNominee.get()) ||
|
||||
nextNominee.get() > currentNominee.get() ||
|
||||
|
@ -545,15 +545,30 @@ struct LeaderRegisterCollection {
|
|||
}
|
||||
};
|
||||
|
||||
// extract the prefix descriptor from cluster id
|
||||
StringRef getClusterDescriptor(Key key) {
|
||||
StringRef str = key.contents();
|
||||
return str.eat(":");
|
||||
}
|
||||
|
||||
// leaderServer multiplexes multiple leaderRegisters onto a single LeaderElectionRegInterface,
|
||||
// creating and destroying them on demand.
|
||||
ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf, OnDemandStore* pStore, UID id) {
|
||||
ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf,
|
||||
OnDemandStore* pStore,
|
||||
UID id,
|
||||
Reference<ClusterConnectionFile> ccf) {
|
||||
state LeaderRegisterCollection regs(pStore);
|
||||
state ActorCollection forwarders(false);
|
||||
|
||||
wait(LeaderRegisterCollection::init(®s));
|
||||
|
||||
loop choose {
|
||||
when(CheckDescriptorMutableRequest req = waitNext(interf.checkDescriptorMutable.getFuture())) {
|
||||
// Note the response returns the value of a knob enforced by checking only one coordinator. It is not
|
||||
// quorum based.
|
||||
CheckDescriptorMutableReply rep(SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT);
|
||||
req.reply.send(rep);
|
||||
}
|
||||
when(OpenDatabaseCoordRequest req = waitNext(interf.openDatabase.getFuture())) {
|
||||
Optional<LeaderInfo> forward = regs.getForward(req.clusterKey);
|
||||
if (forward.present()) {
|
||||
|
@ -562,7 +577,18 @@ ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf, OnDemandStore
|
|||
info.forward = forward.get().serializedInfo;
|
||||
req.reply.send(CachedSerialization<ClientDBInfo>(info));
|
||||
} else {
|
||||
regs.getInterface(req.clusterKey, id).openDatabase.send(req);
|
||||
StringRef clusterName = ccf->getConnectionString().clusterKeyName();
|
||||
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT &&
|
||||
getClusterDescriptor(req.clusterKey).compare(clusterName)) {
|
||||
TraceEvent(SevWarn, "CCFMismatch")
|
||||
.detail("RequestType", "OpenDatabaseCoordRequest")
|
||||
.detail("LocalCS", ccf->getConnectionString().toString())
|
||||
.detail("IncomingClusterKey", req.clusterKey)
|
||||
.detail("IncomingCoordinators", describeList(req.coordinators, req.coordinators.size()));
|
||||
req.reply.sendError(wrong_connection_file());
|
||||
} else {
|
||||
regs.getInterface(req.clusterKey, id).openDatabase.send(req);
|
||||
}
|
||||
}
|
||||
}
|
||||
when(ElectionResultRequest req = waitNext(interf.electionResult.getFuture())) {
|
||||
|
@ -570,38 +596,89 @@ ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf, OnDemandStore
|
|||
if (forward.present()) {
|
||||
req.reply.send(forward.get());
|
||||
} else {
|
||||
regs.getInterface(req.key, id).electionResult.send(req);
|
||||
StringRef clusterName = ccf->getConnectionString().clusterKeyName();
|
||||
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT && getClusterDescriptor(req.key).compare(clusterName)) {
|
||||
TraceEvent(SevWarn, "CCFMismatch")
|
||||
.detail("RequestType", "ElectionResultRequest")
|
||||
.detail("LocalCS", ccf->getConnectionString().toString())
|
||||
.detail("IncomingClusterKey", req.key)
|
||||
.detail("ClusterKey", ccf->getConnectionString().clusterKey())
|
||||
.detail("IncomingCoordinators", describeList(req.coordinators, req.coordinators.size()));
|
||||
req.reply.sendError(wrong_connection_file());
|
||||
} else {
|
||||
regs.getInterface(req.key, id).electionResult.send(req);
|
||||
}
|
||||
}
|
||||
}
|
||||
when(GetLeaderRequest req = waitNext(interf.getLeader.getFuture())) {
|
||||
Optional<LeaderInfo> forward = regs.getForward(req.key);
|
||||
if (forward.present())
|
||||
req.reply.send(forward.get());
|
||||
else
|
||||
regs.getInterface(req.key, id).getLeader.send(req);
|
||||
else {
|
||||
StringRef clusterName = ccf->getConnectionString().clusterKeyName();
|
||||
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT && getClusterDescriptor(req.key).compare(clusterName)) {
|
||||
TraceEvent(SevWarn, "CCFMismatch")
|
||||
.detail("RequestType", "GetLeaderRequest")
|
||||
.detail("LocalCS", ccf->getConnectionString().toString())
|
||||
.detail("IncomingClusterKey", req.key)
|
||||
.detail("ClusterKey", ccf->getConnectionString().clusterKey());
|
||||
req.reply.sendError(wrong_connection_file());
|
||||
} else {
|
||||
regs.getInterface(req.key, id).getLeader.send(req);
|
||||
}
|
||||
}
|
||||
}
|
||||
when(CandidacyRequest req = waitNext(interf.candidacy.getFuture())) {
|
||||
Optional<LeaderInfo> forward = regs.getForward(req.key);
|
||||
if (forward.present())
|
||||
req.reply.send(forward.get());
|
||||
else
|
||||
regs.getInterface(req.key, id).candidacy.send(req);
|
||||
else {
|
||||
StringRef clusterName = ccf->getConnectionString().clusterKeyName();
|
||||
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT && getClusterDescriptor(req.key).compare(clusterName)) {
|
||||
TraceEvent(SevWarn, "CCFMismatch")
|
||||
.detail("RequestType", "CandidacyRequest")
|
||||
.detail("LocalCS", ccf->getConnectionString().toString())
|
||||
.detail("IncomingClusterKey", req.key);
|
||||
req.reply.sendError(wrong_connection_file());
|
||||
} else {
|
||||
regs.getInterface(req.key, id).candidacy.send(req);
|
||||
}
|
||||
}
|
||||
}
|
||||
when(LeaderHeartbeatRequest req = waitNext(interf.leaderHeartbeat.getFuture())) {
|
||||
Optional<LeaderInfo> forward = regs.getForward(req.key);
|
||||
if (forward.present())
|
||||
req.reply.send(LeaderHeartbeatReply{ false });
|
||||
else
|
||||
regs.getInterface(req.key, id).leaderHeartbeat.send(req);
|
||||
else {
|
||||
StringRef clusterName = ccf->getConnectionString().clusterKeyName();
|
||||
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT && getClusterDescriptor(req.key).compare(clusterName)) {
|
||||
TraceEvent(SevWarn, "CCFMismatch")
|
||||
.detail("RequestType", "LeaderHeartbeatRequest")
|
||||
.detail("LocalCS", ccf->getConnectionString().toString())
|
||||
.detail("IncomingClusterKey", req.key);
|
||||
req.reply.sendError(wrong_connection_file());
|
||||
} else {
|
||||
regs.getInterface(req.key, id).leaderHeartbeat.send(req);
|
||||
}
|
||||
}
|
||||
}
|
||||
when(ForwardRequest req = waitNext(interf.forward.getFuture())) {
|
||||
Optional<LeaderInfo> forward = regs.getForward(req.key);
|
||||
if (forward.present())
|
||||
req.reply.send(Void());
|
||||
else {
|
||||
forwarders.add(
|
||||
LeaderRegisterCollection::setForward(®s, req.key, ClusterConnectionString(req.conn.toString())));
|
||||
regs.getInterface(req.key, id).forward.send(req);
|
||||
StringRef clusterName = ccf->getConnectionString().clusterKeyName();
|
||||
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT && getClusterDescriptor(req.key).compare(clusterName)) {
|
||||
TraceEvent(SevWarn, "CCFMismatch")
|
||||
.detail("RequestType", "ForwardRequest")
|
||||
.detail("LocalCS", ccf->getConnectionString().toString())
|
||||
.detail("IncomingClusterKey", req.key);
|
||||
req.reply.sendError(wrong_connection_file());
|
||||
} else {
|
||||
forwarders.add(LeaderRegisterCollection::setForward(
|
||||
®s, req.key, ClusterConnectionString(req.conn.toString())));
|
||||
regs.getInterface(req.key, id).forward.send(req);
|
||||
}
|
||||
}
|
||||
}
|
||||
when(wait(forwarders.getResult())) {
|
||||
|
@ -611,7 +688,7 @@ ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf, OnDemandStore
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> coordinationServer(std::string dataFolder) {
|
||||
ACTOR Future<Void> coordinationServer(std::string dataFolder, Reference<ClusterConnectionFile> ccf) {
|
||||
state UID myID = deterministicRandom()->randomUniqueID();
|
||||
state LeaderElectionRegInterface myLeaderInterface(g_network);
|
||||
state GenerationRegInterface myInterface(g_network);
|
||||
|
@ -622,7 +699,7 @@ ACTOR Future<Void> coordinationServer(std::string dataFolder) {
|
|||
.detail("Folder", dataFolder);
|
||||
|
||||
try {
|
||||
wait(localGenerationReg(myInterface, &store) || leaderServer(myLeaderInterface, &store, myID) ||
|
||||
wait(localGenerationReg(myInterface, &store) || leaderServer(myLeaderInterface, &store, myID, ccf) ||
|
||||
store.getError());
|
||||
throw internal_error();
|
||||
} catch (Error& e) {
|
||||
|
|
|
@ -225,6 +225,6 @@ public:
|
|||
vector<GenerationRegInterface> stateServers;
|
||||
};
|
||||
|
||||
Future<Void> coordinationServer(std::string const& dataFolder);
|
||||
Future<Void> coordinationServer(std::string const& dataFolder, Reference<ClusterConnectionFile> const& ccf);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -66,6 +66,8 @@ struct TCServerInfo : public ReferenceCounted<TCServerInfo> {
|
|||
Future<std::pair<StorageServerInterface, ProcessClass>> onInterfaceChanged;
|
||||
Promise<Void> removed;
|
||||
Future<Void> onRemoved;
|
||||
Future<Void> onTSSPairRemoved;
|
||||
Promise<Void> killTss;
|
||||
Promise<Void> wakeUpTracker;
|
||||
bool inDesiredDC;
|
||||
LocalityEntry localityEntry;
|
||||
|
@ -83,8 +85,10 @@ struct TCServerInfo : public ReferenceCounted<TCServerInfo> {
|
|||
Reference<LocalitySet> storageServerSet)
|
||||
: id(ssi.id()), collection(collection), lastKnownInterface(ssi), lastKnownClass(processClass),
|
||||
dataInFlightToServer(0), onInterfaceChanged(interfaceChanged.getFuture()), onRemoved(removed.getFuture()),
|
||||
inDesiredDC(inDesiredDC), storeType(KeyValueStoreType::END) {
|
||||
localityEntry = ((LocalityMap<UID>*)storageServerSet.getPtr())->add(ssi.locality, &id);
|
||||
inDesiredDC(inDesiredDC), storeType(KeyValueStoreType::END), onTSSPairRemoved(Never()) {
|
||||
if (!ssi.isTss()) {
|
||||
localityEntry = ((LocalityMap<UID>*)storageServerSet.getPtr())->add(ssi.locality, &id);
|
||||
}
|
||||
}
|
||||
|
||||
bool isCorrectStoreType(KeyValueStoreType configStoreType) {
|
||||
|
@ -398,6 +402,7 @@ ACTOR Future<Reference<InitialDataDistribution>> getInitialDataDistribution(Data
|
|||
|
||||
state std::map<UID, Optional<Key>> server_dc;
|
||||
state std::map<vector<UID>, std::pair<vector<UID>, vector<UID>>> team_cache;
|
||||
state std::vector<std::pair<StorageServerInterface, ProcessClass>> tss_servers;
|
||||
|
||||
// Get the server list in its own try/catch block since it modifies result. We don't want a subsequent failure
|
||||
// causing entries to be duplicated
|
||||
|
@ -447,8 +452,12 @@ ACTOR Future<Reference<InitialDataDistribution>> getInitialDataDistribution(Data
|
|||
|
||||
for (int i = 0; i < serverList.get().size(); i++) {
|
||||
auto ssi = decodeServerListValue(serverList.get()[i].value);
|
||||
result->allServers.push_back(std::make_pair(ssi, id_data[ssi.locality.processId()].processClass));
|
||||
server_dc[ssi.id()] = ssi.locality.dcId();
|
||||
if (!ssi.isTss()) {
|
||||
result->allServers.push_back(std::make_pair(ssi, id_data[ssi.locality.processId()].processClass));
|
||||
server_dc[ssi.id()] = ssi.locality.dcId();
|
||||
} else {
|
||||
tss_servers.push_back(std::make_pair(ssi, id_data[ssi.locality.processId()].processClass));
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
|
@ -559,6 +568,11 @@ ACTOR Future<Reference<InitialDataDistribution>> getInitialDataDistribution(Data
|
|||
// a dummy shard at the end with no keys or servers makes life easier for trackInitialShards()
|
||||
result->shards.push_back(DDShardInfo(allKeys.end));
|
||||
|
||||
// add tss to server list AFTER teams are built
|
||||
for (auto& it : tss_servers) {
|
||||
result->allServers.push_back(it);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -567,7 +581,8 @@ ACTOR Future<Void> storageServerTracker(struct DDTeamCollection* self,
|
|||
TCServerInfo* server,
|
||||
Promise<Void> errorOut,
|
||||
Version addedVersion,
|
||||
const DDEnabledState* ddEnabledState);
|
||||
const DDEnabledState* ddEnabledState,
|
||||
bool isTss);
|
||||
|
||||
Future<Void> teamTracker(struct DDTeamCollection* const& self,
|
||||
Reference<TCTeamInfo> const& team,
|
||||
|
@ -598,6 +613,8 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
int64_t unhealthyServers;
|
||||
std::map<int,int> priority_teams;
|
||||
std::map<UID, Reference<TCServerInfo>> server_info;
|
||||
std::map<UID, Reference<TCServerInfo>> tss_info_by_pair;
|
||||
std::map<UID, Reference<TCServerInfo>> server_and_tss_info; // TODO could replace this with an efficient way to do a read-only concatenation of 2 data structures?
|
||||
std::map<Key, int> lagging_zones; // zone to number of storage servers lagging
|
||||
AsyncVar<bool> disableFailingLaggingServers;
|
||||
|
||||
|
@ -610,7 +627,8 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
vector<Reference<TCTeamInfo>> badTeams;
|
||||
Reference<ShardsAffectedByTeamFailure> shardsAffectedByTeamFailure;
|
||||
PromiseStream<UID> removedServers;
|
||||
std::set<UID> recruitingIds; // The IDs of the SS which are being recruited
|
||||
PromiseStream<UID> removedTSS;
|
||||
std::set<UID> recruitingIds; // The IDs of the SS/TSS which are being recruited
|
||||
std::set<NetworkAddress> recruitingLocalities;
|
||||
Future<Void> initialFailureReactionDelay;
|
||||
Future<Void> initializationDoneActor;
|
||||
|
@ -624,6 +642,8 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
int optimalTeamCount;
|
||||
AsyncVar<bool> zeroOptimalTeams;
|
||||
|
||||
bool isTssRecruiting; // If tss recruiting is waiting on a pair, don't consider DD recruiting for the purposes of QuietDB
|
||||
|
||||
// EXCLUDED if an address is in the excluded list in the database.
|
||||
// FAILED if an address is permanently failed.
|
||||
// NONE by default. Updated asynchronously (eventually)
|
||||
|
@ -709,7 +729,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
initializationDoneActor(logOnCompletion(readyToStart && initialFailureReactionDelay, this)),
|
||||
optimalTeamCount(0), recruitingStream(0), restartRecruiting(SERVER_KNOBS->DEBOUNCE_RECRUITING_DELAY),
|
||||
unhealthyServers(0), includedDCs(includedDCs), otherTrackedDCs(otherTrackedDCs),
|
||||
zeroHealthyTeams(zeroHealthyTeams), zeroOptimalTeams(true), primary(primary),
|
||||
zeroHealthyTeams(zeroHealthyTeams), zeroOptimalTeams(true), primary(primary), isTssRecruiting(false),
|
||||
medianAvailableSpace(SERVER_KNOBS->MIN_AVAILABLE_SPACE_RATIO), lastMedianAvailableSpaceUpdate(0),
|
||||
processingUnhealthy(processingUnhealthy), lowestUtilizationTeam(0), highestUtilizationTeam(0),
|
||||
getShardMetrics(getShardMetrics), removeFailedServer(removeFailedServer) {
|
||||
|
@ -758,10 +778,11 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
// The following makes sure that, even if a reference to a team is held in the DD Queue, the tracker will be
|
||||
// stopped
|
||||
// before the server_status map to which it has a pointer, is destroyed.
|
||||
for (auto& [_, info] : server_info) {
|
||||
for (auto& [_, info] : server_and_tss_info) {
|
||||
info->tracker.cancel();
|
||||
info->collection = nullptr;
|
||||
}
|
||||
|
||||
// TraceEvent("DDTeamCollectionDestructed", distributorId)
|
||||
// .detail("Primary", primary)
|
||||
// .detail("ServerTrackerDestroyed", server_info.size());
|
||||
|
@ -1128,6 +1149,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
self->healthyZone.set(initTeams->initHealthyZoneValue);
|
||||
// SOMEDAY: If some servers have teams and not others (or some servers have more data than others) and there is
|
||||
// an address/locality collision, should we preferentially mark the least used server as undesirable?
|
||||
|
||||
for (auto i = initTeams->allServers.begin(); i != initTeams->allServers.end(); ++i) {
|
||||
if (self->shouldHandleServer(i->first)) {
|
||||
if (!self->isValidLocality(self->configuration.storagePolicy, i->first.locality)) {
|
||||
|
@ -2419,14 +2441,18 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
if (!shouldHandleServer(newServer)) {
|
||||
return;
|
||||
}
|
||||
allServers.push_back(newServer.id());
|
||||
|
||||
TraceEvent("AddedStorageServer", distributorId)
|
||||
if (!newServer.isTss()) {
|
||||
allServers.push_back(newServer.id());
|
||||
}
|
||||
|
||||
TraceEvent(newServer.isTss() ? "AddedTSS" : "AddedStorageServer", distributorId)
|
||||
.detail("ServerID", newServer.id())
|
||||
.detail("ProcessClass", processClass.toString())
|
||||
.detail("WaitFailureToken", newServer.waitFailure.getEndpoint().token)
|
||||
.detail("Address", newServer.waitFailure.getEndpoint().getPrimaryAddress());
|
||||
auto& r = server_info[newServer.id()] = makeReference<TCServerInfo>(
|
||||
|
||||
auto& r = server_and_tss_info[newServer.id()] = makeReference<TCServerInfo>(
|
||||
newServer,
|
||||
this,
|
||||
processClass,
|
||||
|
@ -2434,12 +2460,33 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
std::find(includedDCs.begin(), includedDCs.end(), newServer.locality.dcId()) != includedDCs.end(),
|
||||
storageServerSet);
|
||||
|
||||
// Establish the relation between server and machine
|
||||
checkAndCreateMachine(r);
|
||||
if (newServer.isTss()) {
|
||||
tss_info_by_pair[newServer.tssPairID.get()] = r;
|
||||
|
||||
r->tracker = storageServerTracker(this, cx, r.getPtr(), errorOut, addedVersion, ddEnabledState);
|
||||
doBuildTeams = true; // Adding a new server triggers to build new teams
|
||||
restartTeamBuilder.trigger();
|
||||
if (server_info.count(newServer.tssPairID.get())) {
|
||||
r->onTSSPairRemoved = server_info[newServer.tssPairID.get()]->onRemoved;
|
||||
}
|
||||
} else {
|
||||
server_info[newServer.id()] = r;
|
||||
// Establish the relation between server and machine
|
||||
checkAndCreateMachine(r);
|
||||
}
|
||||
|
||||
r->tracker =
|
||||
storageServerTracker(this, cx, r.getPtr(), errorOut, addedVersion, ddEnabledState, newServer.isTss());
|
||||
|
||||
if (!newServer.isTss()) {
|
||||
// link and wake up tss' tracker so it knows when this server gets removed
|
||||
if (tss_info_by_pair.count(newServer.id())) {
|
||||
tss_info_by_pair[newServer.id()]->onTSSPairRemoved = r->onRemoved;
|
||||
if (tss_info_by_pair[newServer.id()]->wakeUpTracker.canBeSet()) {
|
||||
tss_info_by_pair[newServer.id()]->wakeUpTracker.send(Void());
|
||||
}
|
||||
}
|
||||
|
||||
doBuildTeams = true; // Adding a new server triggers to build new teams
|
||||
restartTeamBuilder.trigger();
|
||||
}
|
||||
}
|
||||
|
||||
bool removeTeam(Reference<TCTeamInfo> team) {
|
||||
|
@ -2605,6 +2652,17 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
return foundMachineTeam;
|
||||
}
|
||||
|
||||
void removeTSS(UID removedServer) {
|
||||
// much simpler than remove server. tss isn't in any teams, so just remove it from data structures
|
||||
TraceEvent("RemovedTSS", distributorId).detail("ServerID", removedServer);
|
||||
Reference<TCServerInfo> removedServerInfo = server_and_tss_info[removedServer];
|
||||
|
||||
tss_info_by_pair.erase(removedServerInfo->lastKnownInterface.tssPairID.get());
|
||||
server_and_tss_info.erase(removedServer);
|
||||
|
||||
server_status.clear(removedServer);
|
||||
}
|
||||
|
||||
void removeServer(UID removedServer) {
|
||||
TraceEvent("RemovedStorageServer", distributorId).detail("ServerID", removedServer);
|
||||
|
||||
|
@ -2703,6 +2761,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
}
|
||||
}
|
||||
server_info.erase(removedServer);
|
||||
server_and_tss_info.erase(removedServer);
|
||||
|
||||
if (server_status.get(removedServer).initialized && server_status.get(removedServer).isUnhealthy()) {
|
||||
unhealthyServers--;
|
||||
|
@ -2726,7 +2785,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
};
|
||||
|
||||
TCServerInfo::~TCServerInfo() {
|
||||
if (collection && ssVersionTooFarBehind.get()) {
|
||||
if (collection && ssVersionTooFarBehind.get() && !lastKnownInterface.isTss()) {
|
||||
collection->removeLaggingStorageServer(lastKnownInterface.locality.zoneId().get());
|
||||
}
|
||||
}
|
||||
|
@ -3359,6 +3418,7 @@ ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> tea
|
|||
.detail("IsReady", self->initialFailureReactionDelay.isReady());
|
||||
self->traceTeamCollectionInfo();
|
||||
}
|
||||
|
||||
// Check if the number of degraded machines has changed
|
||||
state vector<Future<Void>> change;
|
||||
bool anyUndesired = false;
|
||||
|
@ -3400,6 +3460,7 @@ ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> tea
|
|||
bool containsFailed = teamContainsFailedServer(self, team);
|
||||
bool recheck = !healthy && (lastReady != self->initialFailureReactionDelay.isReady() ||
|
||||
(lastZeroHealthy && !self->zeroHealthyTeams->get()) || containsFailed);
|
||||
|
||||
// TraceEvent("TeamHealthChangeDetected", self->distributorId)
|
||||
// .detail("Team", team->getDesc())
|
||||
// .detail("ServersLeft", serversLeft)
|
||||
|
@ -3764,8 +3825,8 @@ ACTOR Future<Void> waitServerListChange(DDTeamCollection* self,
|
|||
ProcessClass const& processClass = results[i].second;
|
||||
if (!self->shouldHandleServer(ssi)) {
|
||||
continue;
|
||||
} else if (self->server_info.count(serverId)) {
|
||||
auto& serverInfo = self->server_info[serverId];
|
||||
} else if (self->server_and_tss_info.count(serverId)) {
|
||||
auto& serverInfo = self->server_and_tss_info[serverId];
|
||||
if (ssi.getValue.getEndpoint() != serverInfo->lastKnownInterface.getValue.getEndpoint() ||
|
||||
processClass != serverInfo->lastKnownClass.classType()) {
|
||||
Promise<std::pair<StorageServerInterface, ProcessClass>> currentInterfaceChanged =
|
||||
|
@ -3783,7 +3844,9 @@ ACTOR Future<Void> waitServerListChange(DDTeamCollection* self,
|
|||
self->serverTrackerErrorOut,
|
||||
tr.getReadVersion().get(),
|
||||
ddEnabledState);
|
||||
self->doBuildTeams = true;
|
||||
if (!ssi.isTss()) {
|
||||
self->doBuildTeams = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3886,16 +3949,17 @@ ACTOR Future<Void> keyValueStoreTypeTracker(DDTeamCollection* self, TCServerInfo
|
|||
}
|
||||
|
||||
ACTOR Future<Void> waitForAllDataRemoved(Database cx, UID serverID, Version addedVersion, DDTeamCollection* teams) {
|
||||
state Transaction tr(cx);
|
||||
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(cx);
|
||||
loop {
|
||||
try {
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
Version ver = wait(tr.getReadVersion());
|
||||
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
Version ver = wait(tr->getReadVersion());
|
||||
|
||||
// we cannot remove a server immediately after adding it, because a perfectly timed master recovery could
|
||||
// cause us to not store the mutations sent to the short lived storage server.
|
||||
if (ver > addedVersion + SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS) {
|
||||
bool canRemove = wait(canRemoveStorageServer(&tr, serverID));
|
||||
bool canRemove = wait(canRemoveStorageServer(tr, serverID));
|
||||
// TraceEvent("WaitForAllDataRemoved")
|
||||
// .detail("Server", serverID)
|
||||
// .detail("CanRemove", canRemove)
|
||||
|
@ -3908,9 +3972,9 @@ ACTOR Future<Void> waitForAllDataRemoved(Database cx, UID serverID, Version adde
|
|||
|
||||
// Wait for any change to the serverKeys for this server
|
||||
wait(delay(SERVER_KNOBS->ALL_DATA_REMOVED_DELAY, TaskPriority::DataDistribution));
|
||||
tr.reset();
|
||||
tr->reset();
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3941,16 +4005,18 @@ ACTOR Future<Void> storageServerFailureTracker(DDTeamCollection* self,
|
|||
}
|
||||
}
|
||||
|
||||
if (self->server_status.get(interf.id()).initialized) {
|
||||
bool unhealthy = self->server_status.get(interf.id()).isUnhealthy();
|
||||
if (unhealthy && !status->isUnhealthy()) {
|
||||
self->unhealthyServers--;
|
||||
}
|
||||
if (!unhealthy && status->isUnhealthy()) {
|
||||
if (!interf.isTss()) {
|
||||
if (self->server_status.get(interf.id()).initialized) {
|
||||
bool unhealthy = self->server_status.get(interf.id()).isUnhealthy();
|
||||
if (unhealthy && !status->isUnhealthy()) {
|
||||
self->unhealthyServers--;
|
||||
}
|
||||
if (!unhealthy && status->isUnhealthy()) {
|
||||
self->unhealthyServers++;
|
||||
}
|
||||
} else if (status->isUnhealthy()) {
|
||||
self->unhealthyServers++;
|
||||
}
|
||||
} else if (status->isUnhealthy()) {
|
||||
self->unhealthyServers++;
|
||||
}
|
||||
|
||||
self->server_status.set(interf.id(), *status);
|
||||
|
@ -3971,7 +4037,7 @@ ACTOR Future<Void> storageServerFailureTracker(DDTeamCollection* self,
|
|||
choose {
|
||||
when(wait(healthChanged)) {
|
||||
status->isFailed = !status->isFailed;
|
||||
if (!status->isFailed &&
|
||||
if (!status->isFailed && !server->lastKnownInterface.isTss() &&
|
||||
(server->teams.size() < targetTeamNumPerServer || self->lastBuildTeamsFailed)) {
|
||||
self->doBuildTeams = true;
|
||||
}
|
||||
|
@ -4014,7 +4080,9 @@ ACTOR Future<Void> storageServerTracker(
|
|||
TCServerInfo* server, // This actor is owned by this TCServerInfo, point to server_info[id]
|
||||
Promise<Void> errorOut,
|
||||
Version addedVersion,
|
||||
const DDEnabledState* ddEnabledState) {
|
||||
const DDEnabledState* ddEnabledState,
|
||||
bool isTss) {
|
||||
|
||||
state Future<Void> failureTracker;
|
||||
state ServerStatus status(false, false, server->lastKnownInterface.locality);
|
||||
state bool lastIsUnhealthy = false;
|
||||
|
@ -4022,7 +4090,7 @@ ACTOR Future<Void> storageServerTracker(
|
|||
|
||||
state Future<std::pair<StorageServerInterface, ProcessClass>> interfaceChanged = server->onInterfaceChanged;
|
||||
|
||||
state Future<Void> storeTypeTracker = keyValueStoreTypeTracker(self, server);
|
||||
state Future<Void> storeTypeTracker = (isTss) ? Never() : keyValueStoreTypeTracker(self, server);
|
||||
state bool hasWrongDC = !isCorrectDC(self, server);
|
||||
state bool hasInvalidLocality =
|
||||
!self->isValidLocality(self->configuration.storagePolicy, server->lastKnownInterface.locality);
|
||||
|
@ -4042,7 +4110,7 @@ ACTOR Future<Void> storageServerTracker(
|
|||
// dcLocation, interface) is changed.
|
||||
state std::vector<Future<Void>> otherChanges;
|
||||
std::vector<Promise<Void>> wakeUpTrackers;
|
||||
for (const auto& i : self->server_info) {
|
||||
for (const auto& i : self->server_and_tss_info) {
|
||||
if (i.second.getPtr() != server &&
|
||||
i.second->lastKnownInterface.address() == server->lastKnownInterface.address()) {
|
||||
auto& statusInfo = self->server_status.get(i.first);
|
||||
|
@ -4144,11 +4212,11 @@ ACTOR Future<Void> storageServerTracker(
|
|||
.detail("Excluded", worstAddr.toString());
|
||||
status.isUndesired = true;
|
||||
status.isWrongConfiguration = true;
|
||||
if (worstStatus == DDTeamCollection::Status::FAILED) {
|
||||
if (worstStatus == DDTeamCollection::Status::FAILED && !isTss) {
|
||||
TraceEvent(SevWarn, "FailedServerRemoveKeys", self->distributorId)
|
||||
.detail("Server", server->id)
|
||||
.detail("Excluded", worstAddr.toString());
|
||||
wait(delay(0.0)); //Do not throw an error while still inside trackExcludedServers
|
||||
wait(delay(0.0)); // Do not throw an error while still inside trackExcludedServers
|
||||
while (!ddEnabledState->isDDEnabled()) {
|
||||
wait(delay(1.0));
|
||||
}
|
||||
|
@ -4165,7 +4233,7 @@ ACTOR Future<Void> storageServerTracker(
|
|||
self->restartRecruiting.trigger();
|
||||
}
|
||||
|
||||
if (lastIsUnhealthy && !status.isUnhealthy() &&
|
||||
if (lastIsUnhealthy && !status.isUnhealthy() && !isTss &&
|
||||
(server->teams.size() < targetTeamNumPerServer || self->lastBuildTeamsFailed)) {
|
||||
self->doBuildTeams = true;
|
||||
self->restartTeamBuilder.trigger(); // This does not trigger building teams if there exist healthy teams
|
||||
|
@ -4174,7 +4242,7 @@ ACTOR Future<Void> storageServerTracker(
|
|||
|
||||
state bool recordTeamCollectionInfo = false;
|
||||
choose {
|
||||
when(wait(failureTracker)) {
|
||||
when(wait(failureTracker || server->onTSSPairRemoved || server->killTss.getFuture())) {
|
||||
// The server is failed AND all data has been removed from it, so permanently remove it.
|
||||
TraceEvent("StatusMapChange", self->distributorId)
|
||||
.detail("ServerID", server->id)
|
||||
|
@ -4185,7 +4253,8 @@ ACTOR Future<Void> storageServerTracker(
|
|||
}
|
||||
|
||||
// Remove server from FF/serverList
|
||||
wait(removeStorageServer(cx, server->id, self->lock, ddEnabledState));
|
||||
wait(removeStorageServer(
|
||||
cx, server->id, server->lastKnownInterface.tssPairID, self->lock, ddEnabledState));
|
||||
|
||||
TraceEvent("StatusMapChange", self->distributorId)
|
||||
.detail("ServerID", server->id)
|
||||
|
@ -4193,7 +4262,11 @@ ACTOR Future<Void> storageServerTracker(
|
|||
// Sets removeSignal (alerting dataDistributionTeamCollection to remove the storage server from its
|
||||
// own data structures)
|
||||
server->removed.send(Void());
|
||||
self->removedServers.send(server->id);
|
||||
if (isTss) {
|
||||
self->removedTSS.send(server->id);
|
||||
} else {
|
||||
self->removedServers.send(server->id);
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
when(std::pair<StorageServerInterface, ProcessClass> newInterface = wait(interfaceChanged)) {
|
||||
|
@ -4211,7 +4284,7 @@ ACTOR Future<Void> storageServerTracker(
|
|||
|
||||
server->lastKnownInterface = newInterface.first;
|
||||
server->lastKnownClass = newInterface.second;
|
||||
if (localityChanged) {
|
||||
if (localityChanged && !isTss) {
|
||||
TEST(true); // Server locality changed
|
||||
|
||||
// The locality change of a server will affect machine teams related to the server if
|
||||
|
@ -4303,7 +4376,7 @@ ACTOR Future<Void> storageServerTracker(
|
|||
recordTeamCollectionInfo = true;
|
||||
// Restart the storeTracker for the new interface. This will cancel the previous
|
||||
// keyValueStoreTypeTracker
|
||||
storeTypeTracker = keyValueStoreTypeTracker(self, server);
|
||||
storeTypeTracker = (isTss) ? Never() : keyValueStoreTypeTracker(self, server);
|
||||
hasWrongDC = !isCorrectDC(self, server);
|
||||
hasInvalidLocality =
|
||||
!self->isValidLocality(self->configuration.storagePolicy, server->lastKnownInterface.locality);
|
||||
|
@ -4350,6 +4423,7 @@ ACTOR Future<Void> storageServerTracker(
|
|||
// Monitor whether or not storage servers are being recruited. If so, then a database cannot be considered quiet
|
||||
ACTOR Future<Void> monitorStorageServerRecruitment(DDTeamCollection* self) {
|
||||
state bool recruiting = false;
|
||||
state bool lastIsTss = false;
|
||||
TraceEvent("StorageServerRecruitment", self->distributorId)
|
||||
.detail("State", "Idle")
|
||||
.trackLatest("StorageServerRecruitment_" + self->distributorId.toString());
|
||||
|
@ -4360,12 +4434,22 @@ ACTOR Future<Void> monitorStorageServerRecruitment(DDTeamCollection* self) {
|
|||
}
|
||||
TraceEvent("StorageServerRecruitment", self->distributorId)
|
||||
.detail("State", "Recruiting")
|
||||
.detail("IsTSS", self->isTssRecruiting ? "True" : "False")
|
||||
.trackLatest("StorageServerRecruitment_" + self->distributorId.toString());
|
||||
recruiting = true;
|
||||
lastIsTss = self->isTssRecruiting;
|
||||
} else {
|
||||
loop {
|
||||
choose {
|
||||
when(wait(self->recruitingStream.onChange())) {}
|
||||
when(wait(self->recruitingStream.onChange())) {
|
||||
if (lastIsTss != self->isTssRecruiting) {
|
||||
TraceEvent("StorageServerRecruitment", self->distributorId)
|
||||
.detail("State", "Recruiting")
|
||||
.detail("IsTSS", self->isTssRecruiting ? "True" : "False")
|
||||
.trackLatest("StorageServerRecruitment_" + self->distributorId.toString());
|
||||
lastIsTss = self->isTssRecruiting;
|
||||
}
|
||||
}
|
||||
when(wait(self->recruitingStream.get() == 0
|
||||
? delay(SERVER_KNOBS->RECRUITMENT_IDLE_DELAY, TaskPriority::DataDistribution)
|
||||
: Future<Void>(Never()))) {
|
||||
|
@ -4445,7 +4529,7 @@ ACTOR Future<Void> checkAndRemoveInvalidLocalityAddr(DDTeamCollection* self) {
|
|||
|
||||
int numExistingSSOnAddr(DDTeamCollection* self, const AddressExclusion& addr) {
|
||||
int numExistingSS = 0;
|
||||
for (auto& server : self->server_info) {
|
||||
for (auto& server : self->server_and_tss_info) {
|
||||
const NetworkAddress& netAddr = server.second->lastKnownInterface.stableAddress();
|
||||
AddressExclusion usedAddr(netAddr.ip, netAddr.port);
|
||||
if (usedAddr == addr) {
|
||||
|
@ -4456,9 +4540,94 @@ int numExistingSSOnAddr(DDTeamCollection* self, const AddressExclusion& addr) {
|
|||
return numExistingSS;
|
||||
}
|
||||
|
||||
// All state that represents an ongoing tss pair recruitment
|
||||
struct TSSPairState : ReferenceCounted<TSSPairState>, NonCopyable {
|
||||
Promise<Optional<std::pair<UID, Version>>>
|
||||
ssPairInfo; // if set, for ss to pass its id to tss pair once it is successfully recruited
|
||||
Promise<bool> tssPairDone; // if set, for tss to pass ss that it was successfully recruited
|
||||
Promise<Void> complete;
|
||||
|
||||
Optional<Key> dcId; // dc
|
||||
Optional<Key> dataHallId; // data hall
|
||||
|
||||
bool active;
|
||||
|
||||
TSSPairState() : active(false) {}
|
||||
|
||||
TSSPairState(const LocalityData& locality)
|
||||
: active(true), dcId(locality.dcId()), dataHallId(locality.dataHallId()) {}
|
||||
|
||||
bool inDataZone(const LocalityData& locality) {
|
||||
return locality.dcId() == dcId && locality.dataHallId() == dataHallId;
|
||||
}
|
||||
|
||||
void cancel() {
|
||||
// only cancel if both haven't been set, otherwise one half of pair could think it was successful but the other
|
||||
// half would think it failed
|
||||
if (active && ssPairInfo.canBeSet() && tssPairDone.canBeSet()) {
|
||||
ssPairInfo.send(Optional<std::pair<UID, Version>>());
|
||||
// callback of ssPairInfo could have cancelled tssPairDone already, so double check before cancelling
|
||||
if (tssPairDone.canBeSet()) {
|
||||
tssPairDone.send(false);
|
||||
}
|
||||
if (complete.canBeSet()) {
|
||||
complete.send(Void());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool tssRecruitSuccess() {
|
||||
if (active && tssPairDone.canBeSet()) {
|
||||
tssPairDone.send(true);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool tssRecruitFailed() {
|
||||
if (active && tssPairDone.canBeSet()) {
|
||||
tssPairDone.send(false);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ssRecruitSuccess(std::pair<UID, Version> ssInfo) {
|
||||
if (active && ssPairInfo.canBeSet()) {
|
||||
ssPairInfo.send(Optional<std::pair<UID, Version>>(ssInfo));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ssRecruitFailed() {
|
||||
if (active && ssPairInfo.canBeSet()) {
|
||||
ssPairInfo.send(Optional<std::pair<UID, Version>>());
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool markComplete() {
|
||||
if (active && complete.canBeSet()) {
|
||||
complete.send(Void());
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
Future<Optional<std::pair<UID, Version>>> waitOnSS() { return ssPairInfo.getFuture(); }
|
||||
|
||||
Future<bool> waitOnTSS() { return tssPairDone.getFuture(); }
|
||||
|
||||
Future<Void> waitComplete() { return complete.getFuture(); }
|
||||
};
|
||||
|
||||
ACTOR Future<Void> initializeStorage(DDTeamCollection* self,
|
||||
RecruitStorageReply candidateWorker,
|
||||
const DDEnabledState* ddEnabledState) {
|
||||
const DDEnabledState* ddEnabledState,
|
||||
bool recruitTss,
|
||||
Reference<TSSPairState> tssState) {
|
||||
// SOMEDAY: Cluster controller waits for availability, retry quickly if a server's Locality changes
|
||||
self->recruitingStream.set(self->recruitingStream.get() + 1);
|
||||
|
||||
|
@ -4470,12 +4639,48 @@ ACTOR Future<Void> initializeStorage(DDTeamCollection* self,
|
|||
// too many storage server on the same address (i.e., process) can cause OOM.
|
||||
// Ask the candidateWorker to initialize a SS only if the worker does not have a pending request
|
||||
state UID interfaceId = deterministicRandom()->randomUniqueID();
|
||||
InitializeStorageRequest isr;
|
||||
isr.storeType = self->configuration.storageServerStoreType;
|
||||
|
||||
state InitializeStorageRequest isr;
|
||||
isr.storeType =
|
||||
recruitTss ? self->configuration.testingStorageServerStoreType : self->configuration.storageServerStoreType;
|
||||
isr.seedTag = invalidTag;
|
||||
isr.reqId = deterministicRandom()->randomUniqueID();
|
||||
isr.interfaceId = interfaceId;
|
||||
|
||||
self->recruitingIds.insert(interfaceId);
|
||||
self->recruitingLocalities.insert(candidateWorker.worker.stableAddress());
|
||||
|
||||
// if tss, wait for pair ss to finish and add its id to isr. If pair fails, don't recruit tss
|
||||
state bool doRecruit = true;
|
||||
if (recruitTss) {
|
||||
TraceEvent("TSS_Recruit", self->distributorId)
|
||||
.detail("TSSID", interfaceId)
|
||||
.detail("Stage", "TSSWaitingPair")
|
||||
.detail("Addr", candidateWorker.worker.address())
|
||||
.detail("Locality", candidateWorker.worker.locality.toString());
|
||||
|
||||
Optional<std::pair<UID, Version>> ssPairInfoResult = wait(tssState->waitOnSS());
|
||||
if (ssPairInfoResult.present()) {
|
||||
isr.tssPairIDAndVersion = ssPairInfoResult.get();
|
||||
|
||||
TraceEvent("TSS_Recruit", self->distributorId)
|
||||
.detail("SSID", ssPairInfoResult.get().first)
|
||||
.detail("TSSID", interfaceId)
|
||||
.detail("Stage", "TSSWaitingPair")
|
||||
.detail("Addr", candidateWorker.worker.address())
|
||||
.detail("Version", ssPairInfoResult.get().second)
|
||||
.detail("Locality", candidateWorker.worker.locality.toString());
|
||||
} else {
|
||||
doRecruit = false;
|
||||
|
||||
TraceEvent(SevWarnAlways, "TSS_RecruitError", self->distributorId)
|
||||
.detail("TSSID", interfaceId)
|
||||
.detail("Reason", "SS recruitment failed for some reason")
|
||||
.detail("Addr", candidateWorker.worker.address())
|
||||
.detail("Locality", candidateWorker.worker.locality.toString());
|
||||
}
|
||||
}
|
||||
|
||||
TraceEvent("DDRecruiting")
|
||||
.detail("Primary", self->primary)
|
||||
.detail("State", "Sending request to worker")
|
||||
|
@ -4483,19 +4688,53 @@ ACTOR Future<Void> initializeStorage(DDTeamCollection* self,
|
|||
.detail("WorkerLocality", candidateWorker.worker.locality.toString())
|
||||
.detail("Interf", interfaceId)
|
||||
.detail("Addr", candidateWorker.worker.address())
|
||||
.detail("TSS", recruitTss ? "true" : "false")
|
||||
.detail("RecruitingStream", self->recruitingStream.get());
|
||||
|
||||
self->recruitingIds.insert(interfaceId);
|
||||
self->recruitingLocalities.insert(candidateWorker.worker.stableAddress());
|
||||
state ErrorOr<InitializeStorageReply> newServer =
|
||||
wait(candidateWorker.worker.storage.tryGetReply(isr, TaskPriority::DataDistribution));
|
||||
if (newServer.isError()) {
|
||||
Future<ErrorOr<InitializeStorageReply>> fRecruit =
|
||||
doRecruit ? candidateWorker.worker.storage.tryGetReply(isr, TaskPriority::DataDistribution)
|
||||
: Future<ErrorOr<InitializeStorageReply>>(ErrorOr<InitializeStorageReply>(recruitment_failed()));
|
||||
|
||||
state ErrorOr<InitializeStorageReply> newServer = wait(fRecruit);
|
||||
|
||||
if (doRecruit && newServer.isError()) {
|
||||
TraceEvent(SevWarn, "DDRecruitmentError").error(newServer.getError());
|
||||
if (!newServer.isError(error_code_recruitment_failed) &&
|
||||
!newServer.isError(error_code_request_maybe_delivered))
|
||||
throw newServer.getError();
|
||||
wait(delay(SERVER_KNOBS->STORAGE_RECRUITMENT_DELAY, TaskPriority::DataDistribution));
|
||||
}
|
||||
|
||||
if (!recruitTss && newServer.present() &&
|
||||
tssState->ssRecruitSuccess(std::pair(interfaceId, newServer.get().addedVersion))) {
|
||||
// SS has a tss pair. send it this id, but try to wait for add server until tss is recruited
|
||||
|
||||
TraceEvent("TSS_Recruit", self->distributorId)
|
||||
.detail("SSID", interfaceId)
|
||||
.detail("Stage", "SSSignaling")
|
||||
.detail("Addr", candidateWorker.worker.address())
|
||||
.detail("Locality", candidateWorker.worker.locality.toString());
|
||||
|
||||
// wait for timeout, but eventually move on if no TSS pair recruited
|
||||
Optional<bool> tssSuccessful = wait(timeout(tssState->waitOnTSS(), SERVER_KNOBS->TSS_RECRUITMENT_TIMEOUT));
|
||||
|
||||
if (tssSuccessful.present() && tssSuccessful.get()) {
|
||||
TraceEvent("TSS_Recruit", self->distributorId)
|
||||
.detail("SSID", interfaceId)
|
||||
.detail("Stage", "SSGotPair")
|
||||
.detail("Addr", candidateWorker.worker.address())
|
||||
.detail("Locality", candidateWorker.worker.locality.toString());
|
||||
} else {
|
||||
TraceEvent(SevWarn, "TSS_RecruitError", self->distributorId)
|
||||
.detail("SSID", interfaceId)
|
||||
.detail("Reason",
|
||||
tssSuccessful.present() ? "TSS recruitment failed for some reason"
|
||||
: "TSS recruitment timed out")
|
||||
.detail("Addr", candidateWorker.worker.address())
|
||||
.detail("Locality", candidateWorker.worker.locality.toString());
|
||||
}
|
||||
}
|
||||
|
||||
self->recruitingIds.erase(interfaceId);
|
||||
self->recruitingLocalities.erase(candidateWorker.worker.stableAddress());
|
||||
|
||||
|
@ -4509,26 +4748,43 @@ ACTOR Future<Void> initializeStorage(DDTeamCollection* self,
|
|||
.detail("RecruitingStream", self->recruitingStream.get());
|
||||
|
||||
if (newServer.present()) {
|
||||
if (!self->server_info.count(newServer.get().interf.id()))
|
||||
self->addServer(newServer.get().interf,
|
||||
candidateWorker.processClass,
|
||||
self->serverTrackerErrorOut,
|
||||
newServer.get().addedVersion,
|
||||
ddEnabledState);
|
||||
else
|
||||
TraceEvent(SevWarn, "DDRecruitmentError").detail("Reason", "Server ID already recruited");
|
||||
|
||||
self->doBuildTeams = true;
|
||||
UID id = newServer.get().interf.id();
|
||||
if (!self->server_and_tss_info.count(id)) {
|
||||
if (!recruitTss || tssState->tssRecruitSuccess()) {
|
||||
self->addServer(newServer.get().interf,
|
||||
candidateWorker.processClass,
|
||||
self->serverTrackerErrorOut,
|
||||
newServer.get().addedVersion,
|
||||
ddEnabledState);
|
||||
// signal all done after adding tss to tracking info
|
||||
tssState->markComplete();
|
||||
}
|
||||
} else {
|
||||
TraceEvent(SevWarn, "DDRecruitmentError")
|
||||
.detail("Reason", "Server ID already recruited")
|
||||
.detail("ServerID", id);
|
||||
}
|
||||
if (!recruitTss) {
|
||||
self->doBuildTeams = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// SS and/or TSS recruitment failed at this point, update tssState
|
||||
if (recruitTss && tssState->tssRecruitFailed()) {
|
||||
tssState->markComplete();
|
||||
TEST(true); // TSS recruitment failed for some reason
|
||||
}
|
||||
if (!recruitTss && tssState->ssRecruitFailed()) {
|
||||
TEST(true); // SS with pair TSS recruitment failed for some reason
|
||||
}
|
||||
|
||||
self->recruitingStream.set(self->recruitingStream.get() - 1);
|
||||
self->restartRecruiting.trigger();
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
// Recruit a worker as a storage server
|
||||
ACTOR Future<Void> storageRecruiter(DDTeamCollection* self,
|
||||
Reference<AsyncVar<struct ServerDBInfo>> db,
|
||||
const DDEnabledState* ddEnabledState) {
|
||||
|
@ -4536,13 +4792,49 @@ ACTOR Future<Void> storageRecruiter(DDTeamCollection* self,
|
|||
state RecruitStorageRequest lastRequest;
|
||||
state bool hasHealthyTeam;
|
||||
state std::map<AddressExclusion, int> numSSPerAddr;
|
||||
|
||||
// tss-specific recruitment state
|
||||
state int32_t targetTSSInDC = 0;
|
||||
state int32_t tssToRecruit = 0;
|
||||
state int inProgressTSSCount = 0;
|
||||
state PromiseStream<Future<Void>> addTSSInProgress;
|
||||
state Future<Void> inProgressTSS =
|
||||
actorCollection(addTSSInProgress.getFuture(), &inProgressTSSCount, nullptr, nullptr, nullptr);
|
||||
state Reference<TSSPairState> tssState = makeReference<TSSPairState>();
|
||||
state Future<Void> checkTss = self->initialFailureReactionDelay;
|
||||
state bool pendingTSSCheck = false;
|
||||
|
||||
TraceEvent(SevDebug, "TSS_RecruitUpdated", self->distributorId).detail("Count", tssToRecruit);
|
||||
|
||||
loop {
|
||||
try {
|
||||
// Divide TSS evenly in each DC if there are multiple
|
||||
// TODO would it be better to put all of them in primary DC?
|
||||
targetTSSInDC = self->configuration.desiredTSSCount;
|
||||
if (self->configuration.usableRegions > 1) {
|
||||
targetTSSInDC /= self->configuration.usableRegions;
|
||||
if (self->primary) {
|
||||
// put extras in primary DC if it's uneven
|
||||
targetTSSInDC += (self->configuration.desiredTSSCount % self->configuration.usableRegions);
|
||||
}
|
||||
}
|
||||
int newTssToRecruit = targetTSSInDC - self->tss_info_by_pair.size() - inProgressTSSCount;
|
||||
if (newTssToRecruit != tssToRecruit) {
|
||||
TraceEvent("TSS_RecruitUpdated", self->distributorId).detail("Count", newTssToRecruit);
|
||||
tssToRecruit = newTssToRecruit;
|
||||
|
||||
// if we need to get rid of some TSS processes, signal to either cancel recruitment or kill existing TSS
|
||||
// processes
|
||||
if (!pendingTSSCheck && (tssToRecruit < 0 || self->zeroHealthyTeams->get()) &&
|
||||
(self->isTssRecruiting || (self->zeroHealthyTeams->get() && self->tss_info_by_pair.size() > 0))) {
|
||||
checkTss = self->initialFailureReactionDelay;
|
||||
}
|
||||
}
|
||||
numSSPerAddr.clear();
|
||||
hasHealthyTeam = (self->healthyTeamCount != 0);
|
||||
RecruitStorageRequest rsr;
|
||||
std::set<AddressExclusion> exclusions;
|
||||
for (auto s = self->server_info.begin(); s != self->server_info.end(); ++s) {
|
||||
for (auto s = self->server_and_tss_info.begin(); s != self->server_and_tss_info.end(); ++s) {
|
||||
auto serverStatus = self->server_status.get(s->second->lastKnownInterface.id());
|
||||
if (serverStatus.excludeOnRecruit()) {
|
||||
TraceEvent(SevDebug, "DDRecruitExcl1")
|
||||
|
@ -4574,7 +4866,7 @@ ACTOR Future<Void> storageRecruiter(DDTeamCollection* self,
|
|||
exclusions.insert(addr);
|
||||
}
|
||||
|
||||
rsr.criticalRecruitment = self->healthyTeamCount == 0;
|
||||
rsr.criticalRecruitment = !hasHealthyTeam;
|
||||
for (auto it : exclusions) {
|
||||
rsr.excludeAddresses.push_back(it);
|
||||
}
|
||||
|
@ -4611,11 +4903,100 @@ ACTOR Future<Void> storageRecruiter(DDTeamCollection* self,
|
|||
.detail("Addr", candidateSSAddr.toString())
|
||||
.detail("NumExistingSS", numExistingSS);
|
||||
}
|
||||
self->addActor.send(initializeStorage(self, candidateWorker, ddEnabledState));
|
||||
|
||||
if (hasHealthyTeam && !tssState->active && tssToRecruit > 0) {
|
||||
TraceEvent("TSS_Recruit", self->distributorId)
|
||||
.detail("Stage", "HoldTSS")
|
||||
.detail("Addr", candidateSSAddr.toString())
|
||||
.detail("Locality", candidateWorker.worker.locality.toString());
|
||||
|
||||
TEST(true); // Starting TSS recruitment
|
||||
self->isTssRecruiting = true;
|
||||
tssState = makeReference<TSSPairState>(candidateWorker.worker.locality);
|
||||
|
||||
addTSSInProgress.send(tssState->waitComplete());
|
||||
self->addActor.send(initializeStorage(self, candidateWorker, ddEnabledState, true, tssState));
|
||||
checkTss = self->initialFailureReactionDelay;
|
||||
} else {
|
||||
if (tssState->active && tssState->inDataZone(candidateWorker.worker.locality)) {
|
||||
TEST(true); // TSS recruits pair in same dc/datahall
|
||||
self->isTssRecruiting = false;
|
||||
TraceEvent("TSS_Recruit", self->distributorId)
|
||||
.detail("Stage", "PairSS")
|
||||
.detail("Addr", candidateSSAddr.toString())
|
||||
.detail("Locality", candidateWorker.worker.locality.toString());
|
||||
self->addActor.send(
|
||||
initializeStorage(self, candidateWorker, ddEnabledState, false, tssState));
|
||||
// successfully started recruitment of pair, reset tss recruitment state
|
||||
tssState = makeReference<TSSPairState>();
|
||||
} else {
|
||||
TEST(tssState->active); // TSS recruitment skipped potential pair because it's in a
|
||||
// different dc/datahall
|
||||
self->addActor.send(initializeStorage(
|
||||
self, candidateWorker, ddEnabledState, false, makeReference<TSSPairState>()));
|
||||
}
|
||||
}
|
||||
}
|
||||
when(wait(db->onChange())) { // SOMEDAY: only if clusterInterface changes?
|
||||
fCandidateWorker = Future<RecruitStorageReply>();
|
||||
}
|
||||
when(wait(self->zeroHealthyTeams->onChange())) {
|
||||
if (!pendingTSSCheck && self->zeroHealthyTeams->get() &&
|
||||
(self->isTssRecruiting || self->tss_info_by_pair.size() > 0)) {
|
||||
checkTss = self->initialFailureReactionDelay;
|
||||
}
|
||||
}
|
||||
when(wait(checkTss)) {
|
||||
bool cancelTss = self->isTssRecruiting && (tssToRecruit < 0 || self->zeroHealthyTeams->get());
|
||||
// Can't kill more tss' than we have. Kill 1 if zero healthy teams, otherwise kill enough to get
|
||||
// back to the desired amount
|
||||
int tssToKill = std::min((int)self->tss_info_by_pair.size(),
|
||||
std::max(-tssToRecruit, self->zeroHealthyTeams->get() ? 1 : 0));
|
||||
if (cancelTss) {
|
||||
TEST(tssToRecruit < 0); // tss recruitment cancelled due to too many TSS
|
||||
TEST(self->zeroHealthyTeams->get()); // tss recruitment cancelled due zero healthy teams
|
||||
|
||||
TraceEvent(SevWarn, "TSS_RecruitCancelled", self->distributorId)
|
||||
.detail("Reason", tssToRecruit <= 0 ? "TooMany" : "ZeroHealthyTeams");
|
||||
tssState->cancel();
|
||||
tssState = makeReference<TSSPairState>();
|
||||
self->isTssRecruiting = false;
|
||||
|
||||
pendingTSSCheck = true;
|
||||
checkTss = delay(SERVER_KNOBS->TSS_DD_CHECK_INTERVAL);
|
||||
} else if (tssToKill > 0) {
|
||||
auto itr = self->tss_info_by_pair.begin();
|
||||
for (int i = 0; i < tssToKill; i++, itr++) {
|
||||
UID tssId = itr->second->id;
|
||||
StorageServerInterface tssi = itr->second->lastKnownInterface;
|
||||
|
||||
if (self->shouldHandleServer(tssi) && self->server_and_tss_info.count(tssId)) {
|
||||
Promise<Void> killPromise = itr->second->killTss;
|
||||
if (killPromise.canBeSet()) {
|
||||
TEST(tssToRecruit < 0); // Killing TSS due to too many TSS
|
||||
TEST(self->zeroHealthyTeams->get()); // Killing TSS due zero healthy teams
|
||||
TraceEvent(SevWarn, "TSS_DDKill", self->distributorId)
|
||||
.detail("TSSID", tssId)
|
||||
.detail("Reason",
|
||||
self->zeroHealthyTeams->get() ? "ZeroHealthyTeams" : "TooMany");
|
||||
killPromise.send(Void());
|
||||
}
|
||||
}
|
||||
}
|
||||
// If we're killing a TSS because of zero healthy teams, wait a bit to give the replacing SS a
|
||||
// change to join teams and stuff before killing another TSS
|
||||
pendingTSSCheck = true;
|
||||
checkTss = delay(SERVER_KNOBS->TSS_DD_CHECK_INTERVAL);
|
||||
} else if (self->isTssRecruiting) {
|
||||
// check again later in case we need to cancel recruitment
|
||||
pendingTSSCheck = true;
|
||||
checkTss = delay(SERVER_KNOBS->TSS_DD_CHECK_INTERVAL);
|
||||
// FIXME: better way to do this than timer?
|
||||
} else {
|
||||
pendingTSSCheck = false;
|
||||
checkTss = Never();
|
||||
}
|
||||
}
|
||||
when(wait(self->restartRecruiting.onTrigger())) {}
|
||||
}
|
||||
wait(delay(FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY, TaskPriority::DataDistribution));
|
||||
|
@ -4760,6 +5141,13 @@ ACTOR Future<Void> dataDistributionTeamCollection(Reference<DDTeamCollection> te
|
|||
|
||||
self->restartRecruiting.trigger();
|
||||
}
|
||||
when(UID removedTSS = waitNext(self->removedTSS.getFuture())) {
|
||||
TEST(true); // TSS removed from database
|
||||
self->removeTSS(removedTSS);
|
||||
serverRemoved.send(Void());
|
||||
|
||||
self->restartRecruiting.trigger();
|
||||
}
|
||||
when(wait(self->zeroHealthyTeams->onChange())) {
|
||||
if (self->zeroHealthyTeams->get()) {
|
||||
self->restartRecruiting.trigger();
|
||||
|
@ -5265,11 +5653,13 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self,
|
|||
if (removeFailedServer.getFuture().isReady() && !removeFailedServer.getFuture().isError()) {
|
||||
TraceEvent("RemoveFailedServer", removeFailedServer.getFuture().get()).error(err);
|
||||
wait(removeKeysFromFailedServer(cx, removeFailedServer.getFuture().get(), lock, ddEnabledState));
|
||||
wait(removeStorageServer(cx, removeFailedServer.getFuture().get(), lock, ddEnabledState));
|
||||
Optional<UID> tssPairID;
|
||||
wait(removeStorageServer(cx, removeFailedServer.getFuture().get(), tssPairID, lock, ddEnabledState));
|
||||
} else {
|
||||
if (err.code() != error_code_movekeys_conflict) {
|
||||
throw err;
|
||||
}
|
||||
|
||||
bool ddEnabled = wait(isDataDistributionEnabled(cx, ddEnabledState));
|
||||
TraceEvent("DataDistributionMoveKeysConflict").detail("DataDistributionEnabled", ddEnabled).error(err);
|
||||
if (ddEnabled) {
|
||||
|
@ -5920,4 +6310,4 @@ TEST_CASE("/DataDistribution/AddTeamsBestOf/NotEnoughServers") {
|
|||
ASSERT(result == 8);
|
||||
|
||||
return Void();
|
||||
}
|
||||
}
|
|
@ -371,9 +371,9 @@ public:
|
|||
const T* upperBound() const { return upper; }
|
||||
|
||||
DeltaTree* tree;
|
||||
Arena arena;
|
||||
|
||||
private:
|
||||
Arena arena;
|
||||
DecodedNode* root;
|
||||
const T* lower;
|
||||
const T* upper;
|
||||
|
|
|
@ -148,7 +148,10 @@ ACTOR Future<int> spawnProcess(std::string path,
|
|||
state pid_t pid = pidAndReadFD.first;
|
||||
state Optional<int> readFD = pidAndReadFD.second;
|
||||
if (pid == -1) {
|
||||
TraceEvent(SevWarnAlways, "SpawnProcess: Command failed to spawn").detail("Cmd", path).detail("Args", allArgs);
|
||||
TraceEvent(SevWarnAlways, "SpawnProcessFailure")
|
||||
.detail("Reason", "Command failed to spawn")
|
||||
.detail("Cmd", path)
|
||||
.detail("Args", allArgs);
|
||||
return -1;
|
||||
} else if (pid > 0) {
|
||||
state int status = -1;
|
||||
|
@ -160,7 +163,8 @@ ACTOR Future<int> spawnProcess(std::string path,
|
|||
if (runTime > maxWaitTime) {
|
||||
// timing out
|
||||
|
||||
TraceEvent(SevWarnAlways, "SpawnProcess : Command failed, timeout")
|
||||
TraceEvent(SevWarnAlways, "SpawnProcessFailure")
|
||||
.detail("Reason", "Command failed, timeout")
|
||||
.detail("Cmd", path)
|
||||
.detail("Args", allArgs);
|
||||
return -1;
|
||||
|
@ -175,9 +179,10 @@ ACTOR Future<int> spawnProcess(std::string path,
|
|||
}
|
||||
|
||||
if (err < 0) {
|
||||
TraceEvent event(SevWarnAlways, "SpawnProcess : Command failed");
|
||||
TraceEvent event(SevWarnAlways, "SpawnProcessFailure");
|
||||
setupTraceWithOutput(event, bytesRead, outputBuffer);
|
||||
event.detail("Cmd", path)
|
||||
event.detail("Reason", "Command failed")
|
||||
.detail("Cmd", path)
|
||||
.detail("Args", allArgs)
|
||||
.detail("Errno", WIFEXITED(status) ? WEXITSTATUS(status) : -1);
|
||||
return -1;
|
||||
|
@ -194,14 +199,15 @@ ACTOR Future<int> spawnProcess(std::string path,
|
|||
} else {
|
||||
// child process completed
|
||||
if (!(WIFEXITED(status) && WEXITSTATUS(status) == 0)) {
|
||||
TraceEvent event(SevWarnAlways, "SpawnProcess : Command failed");
|
||||
TraceEvent event(SevWarnAlways, "SpawnProcessFailure");
|
||||
setupTraceWithOutput(event, bytesRead, outputBuffer);
|
||||
event.detail("Cmd", path)
|
||||
event.detail("Reason", "Command failed")
|
||||
.detail("Cmd", path)
|
||||
.detail("Args", allArgs)
|
||||
.detail("Errno", WIFEXITED(status) ? WEXITSTATUS(status) : -1);
|
||||
return WIFEXITED(status) ? WEXITSTATUS(status) : -1;
|
||||
}
|
||||
TraceEvent event("SpawnProcess : Command status");
|
||||
TraceEvent event("SpawnProcessCommandStatus");
|
||||
setupTraceWithOutput(event, bytesRead, outputBuffer);
|
||||
event.detail("Cmd", path)
|
||||
.detail("Args", allArgs)
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
|
||||
#include "flow/flow.h"
|
||||
#include "fdbclient/FDBTypes.h"
|
||||
#include "flow/crc32c.h"
|
||||
|
||||
#ifndef VALGRIND
|
||||
#define VALGRIND_MAKE_MEM_UNDEFINED(x, y)
|
||||
|
@ -36,39 +37,98 @@ typedef uint32_t LogicalPageID;
|
|||
typedef uint32_t PhysicalPageID;
|
||||
#define invalidLogicalPageID std::numeric_limits<LogicalPageID>::max()
|
||||
|
||||
class IPage {
|
||||
// Represents a block of memory in a 4096-byte aligned location held by an Arena.
|
||||
class ArenaPage : public ReferenceCounted<ArenaPage>, public FastAllocated<ArenaPage> {
|
||||
public:
|
||||
IPage() : userData(nullptr) {}
|
||||
// The page's logical size includes an opaque checksum, use size() to get usable size
|
||||
ArenaPage(int logicalSize, int bufferSize) : logicalSize(logicalSize), bufferSize(bufferSize), userData(nullptr) {
|
||||
if (bufferSize > 0) {
|
||||
buffer = (uint8_t*)arena.allocate4kAlignedBuffer(bufferSize);
|
||||
|
||||
virtual uint8_t const* begin() const = 0;
|
||||
virtual uint8_t* mutate() = 0;
|
||||
// Mark any unused page portion defined
|
||||
VALGRIND_MAKE_MEM_DEFINED(buffer + logicalSize, bufferSize - logicalSize);
|
||||
} else {
|
||||
buffer = nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
// Must return the same size for all pages created by the same pager instance
|
||||
virtual int size() const = 0;
|
||||
|
||||
StringRef asStringRef() const { return StringRef(begin(), size()); }
|
||||
|
||||
virtual ~IPage() {
|
||||
~ArenaPage() {
|
||||
if (userData != nullptr && userDataDestructor != nullptr) {
|
||||
userDataDestructor(userData);
|
||||
}
|
||||
}
|
||||
|
||||
virtual Reference<IPage> clone() const = 0;
|
||||
uint8_t const* begin() const { return (uint8_t*)buffer; }
|
||||
|
||||
virtual void addref() const = 0;
|
||||
virtual void delref() const = 0;
|
||||
uint8_t* mutate() { return (uint8_t*)buffer; }
|
||||
|
||||
typedef uint32_t Checksum;
|
||||
|
||||
// Usable size, without checksum
|
||||
int size() const { return logicalSize - sizeof(Checksum); }
|
||||
|
||||
Standalone<StringRef> asStringRef() const { return Standalone<StringRef>(StringRef(begin(), size()), arena); }
|
||||
|
||||
// Get an ArenaPage which is a copy of this page, in its own Arena
|
||||
Reference<ArenaPage> cloneContents() const {
|
||||
ArenaPage* p = new ArenaPage(logicalSize, bufferSize);
|
||||
memcpy(p->buffer, buffer, logicalSize);
|
||||
return Reference<ArenaPage>(p);
|
||||
}
|
||||
|
||||
// Get an ArenaPage which depends on this page's Arena and references some of its memory
|
||||
Reference<ArenaPage> subPage(int offset, int len) const {
|
||||
ArenaPage* p = new ArenaPage(len, 0);
|
||||
p->buffer = buffer + offset;
|
||||
p->arena.dependsOn(arena);
|
||||
return Reference<ArenaPage>(p);
|
||||
}
|
||||
|
||||
// Given a vector of pages with the same ->size(), create a new ArenaPage with a ->size() that is
|
||||
// equivalent to all of the input pages and has all of their contents copied into it.
|
||||
static Reference<ArenaPage> concatPages(const std::vector<Reference<const ArenaPage>>& pages) {
|
||||
int usableSize = pages.front()->size();
|
||||
int totalUsableSize = pages.size() * usableSize;
|
||||
int totalBufferSize = pages.front()->bufferSize * pages.size();
|
||||
ArenaPage* superpage = new ArenaPage(totalUsableSize + sizeof(Checksum), totalBufferSize);
|
||||
|
||||
uint8_t* wptr = superpage->mutate();
|
||||
for (auto& p : pages) {
|
||||
ASSERT(p->size() == usableSize);
|
||||
memcpy(wptr, p->begin(), usableSize);
|
||||
wptr += usableSize;
|
||||
}
|
||||
|
||||
return Reference<ArenaPage>(superpage);
|
||||
}
|
||||
|
||||
Checksum& getChecksum() { return *(Checksum*)(buffer + size()); }
|
||||
|
||||
Checksum calculateChecksum(LogicalPageID pageID) { return crc32c_append(pageID, buffer, size()); }
|
||||
|
||||
void updateChecksum(LogicalPageID pageID) { getChecksum() = calculateChecksum(pageID); }
|
||||
|
||||
bool verifyChecksum(LogicalPageID pageID) { return getChecksum() == calculateChecksum(pageID); }
|
||||
|
||||
const Arena& getArena() const { return arena; }
|
||||
|
||||
private:
|
||||
Arena arena;
|
||||
int logicalSize;
|
||||
int bufferSize;
|
||||
uint8_t* buffer;
|
||||
|
||||
public:
|
||||
mutable void* userData;
|
||||
mutable void (*userDataDestructor)(void*);
|
||||
};
|
||||
|
||||
class IPagerSnapshot {
|
||||
public:
|
||||
virtual Future<Reference<const IPage>> getPhysicalPage(LogicalPageID pageID,
|
||||
bool cacheable,
|
||||
bool nohit,
|
||||
bool* fromCache = nullptr) = 0;
|
||||
virtual Future<Reference<const ArenaPage>> getPhysicalPage(LogicalPageID pageID,
|
||||
bool cacheable,
|
||||
bool nohit,
|
||||
bool* fromCache = nullptr) = 0;
|
||||
virtual bool tryEvictPage(LogicalPageID id) = 0;
|
||||
virtual Version getVersion() const = 0;
|
||||
|
||||
|
@ -83,8 +143,8 @@ public:
|
|||
// This API is probably too customized to the behavior of DWALPager and probably needs some changes to be more generic.
|
||||
class IPager2 : public IClosable {
|
||||
public:
|
||||
// Returns an IPage that can be passed to writePage. The data in the returned IPage might not be zeroed.
|
||||
virtual Reference<IPage> newPageBuffer() = 0;
|
||||
// Returns an ArenaPage that can be passed to writePage. The data in the returned ArenaPage might not be zeroed.
|
||||
virtual Reference<ArenaPage> newPageBuffer() = 0;
|
||||
|
||||
// Returns the usable size of pages returned by the pager (i.e. the size of the page that isn't pager overhead).
|
||||
// For a given pager instance, separate calls to this function must return the same value.
|
||||
|
@ -98,13 +158,13 @@ public:
|
|||
// Replace the contents of a page with new data across *all* versions.
|
||||
// Existing holders of a page reference for pageID, read from any version,
|
||||
// may see the effects of this write.
|
||||
virtual void updatePage(LogicalPageID pageID, Reference<IPage> data) = 0;
|
||||
virtual void updatePage(LogicalPageID pageID, Reference<ArenaPage> data) = 0;
|
||||
|
||||
// Try to atomically update the contents of a page as of version v in the next commit.
|
||||
// If the pager is unable to do this at this time, it may choose to write the data to a new page ID
|
||||
// instead and return the new page ID to the caller. Otherwise the original pageID argument will be returned.
|
||||
// If a new page ID is returned, the old page ID will be freed as of version v
|
||||
virtual Future<LogicalPageID> atomicUpdatePage(LogicalPageID pageID, Reference<IPage> data, Version v) = 0;
|
||||
virtual Future<LogicalPageID> atomicUpdatePage(LogicalPageID pageID, Reference<ArenaPage> data, Version v) = 0;
|
||||
|
||||
// Free pageID to be used again after the commit that moves oldestVersion past v
|
||||
virtual void freePage(LogicalPageID pageID, Version v) = 0;
|
||||
|
@ -120,10 +180,10 @@ public:
|
|||
// Cacheable indicates that the page should be added to the page cache (if applicable?) as a result of this read.
|
||||
// NoHit indicates that the read should not be considered a cache hit, such as when preloading pages that are
|
||||
// considered likely to be needed soon.
|
||||
virtual Future<Reference<IPage>> readPage(LogicalPageID pageID,
|
||||
bool cacheable = true,
|
||||
bool noHit = false,
|
||||
bool* fromCache = nullptr) = 0;
|
||||
virtual Future<Reference<ArenaPage>> readPage(LogicalPageID pageID,
|
||||
bool cacheable = true,
|
||||
bool noHit = false,
|
||||
bool* fromCache = nullptr) = 0;
|
||||
|
||||
// Get a snapshot of the metakey and all pages as of the version v which must be >= getOldestVersion()
|
||||
// Note that snapshots at any version may still see the results of updatePage() calls.
|
||||
|
|
|
@ -217,6 +217,9 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
|
|||
init( SERVER_LIST_DELAY, 1.0 );
|
||||
init( RECRUITMENT_IDLE_DELAY, 1.0 );
|
||||
init( STORAGE_RECRUITMENT_DELAY, 10.0 );
|
||||
init( TSS_HACK_IDENTITY_MAPPING, false ); // THIS SHOULD NEVER BE SET IN PROD. Only for performance testing
|
||||
init( TSS_RECRUITMENT_TIMEOUT, 3*STORAGE_RECRUITMENT_DELAY ); if (randomize && BUGGIFY ) TSS_RECRUITMENT_TIMEOUT = 1.0; // Super low timeout should cause tss recruitments to fail
|
||||
init( TSS_DD_CHECK_INTERVAL, 60.0 ); if (randomize && BUGGIFY ) TSS_DD_CHECK_INTERVAL = 1.0; // May kill all TSS quickly
|
||||
init( DATA_DISTRIBUTION_LOGGING_INTERVAL, 5.0 );
|
||||
init( DD_ENABLED_CHECK_DELAY, 1.0 );
|
||||
init( DD_STALL_CHECK_DELAY, 0.4 ); //Must be larger than 2*MAX_BUGGIFIED_DELAY
|
||||
|
@ -631,6 +634,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
|
|||
|
||||
// Coordination
|
||||
init( COORDINATED_STATE_ONCONFLICT_POLL_INTERVAL, 1.0 ); if( randomize && BUGGIFY ) COORDINATED_STATE_ONCONFLICT_POLL_INTERVAL = 10.0;
|
||||
init( ENABLE_CROSS_CLUSTER_SUPPORT, true ); if( randomize && BUGGIFY ) ENABLE_CROSS_CLUSTER_SUPPORT = false;
|
||||
|
||||
// Buggification
|
||||
init( BUGGIFIED_EVENTUAL_CONSISTENCY, 1.0 );
|
||||
|
|
|
@ -167,6 +167,9 @@ public:
|
|||
double SERVER_LIST_DELAY;
|
||||
double RECRUITMENT_IDLE_DELAY;
|
||||
double STORAGE_RECRUITMENT_DELAY;
|
||||
bool TSS_HACK_IDENTITY_MAPPING;
|
||||
double TSS_RECRUITMENT_TIMEOUT;
|
||||
double TSS_DD_CHECK_INTERVAL;
|
||||
double DATA_DISTRIBUTION_LOGGING_INTERVAL;
|
||||
double DD_ENABLED_CHECK_DELAY;
|
||||
double DD_STALL_CHECK_DELAY;
|
||||
|
@ -559,6 +562,8 @@ public:
|
|||
|
||||
// Coordination
|
||||
double COORDINATED_STATE_ONCONFLICT_POLL_INTERVAL;
|
||||
bool ENABLE_CROSS_CLUSTER_SUPPORT; // Allow a coordinator to serve requests whose connection string does not match
|
||||
// the local descriptor
|
||||
|
||||
// Buggification
|
||||
double BUGGIFIED_EVENTUAL_CONSISTENCY;
|
||||
|
|
|
@ -410,6 +410,8 @@ struct ILogSystem {
|
|||
|
||||
virtual Optional<UID> getPrimaryPeekLocation() const = 0;
|
||||
|
||||
virtual Optional<UID> getCurrentPeekLocation() const = 0;
|
||||
|
||||
virtual void addref() = 0;
|
||||
|
||||
virtual void delref() = 0;
|
||||
|
@ -473,6 +475,7 @@ struct ILogSystem {
|
|||
Version popped() const override;
|
||||
Version getMinKnownCommittedVersion() const override;
|
||||
Optional<UID> getPrimaryPeekLocation() const override;
|
||||
Optional<UID> getCurrentPeekLocation() const override;
|
||||
|
||||
void addref() override { ReferenceCounted<ServerPeekCursor>::addref(); }
|
||||
|
||||
|
@ -534,6 +537,7 @@ struct ILogSystem {
|
|||
Version popped() const override;
|
||||
Version getMinKnownCommittedVersion() const override;
|
||||
Optional<UID> getPrimaryPeekLocation() const override;
|
||||
Optional<UID> getCurrentPeekLocation() const override;
|
||||
|
||||
void addref() override { ReferenceCounted<MergedPeekCursor>::addref(); }
|
||||
|
||||
|
@ -589,6 +593,7 @@ struct ILogSystem {
|
|||
Version popped() const override;
|
||||
Version getMinKnownCommittedVersion() const override;
|
||||
Optional<UID> getPrimaryPeekLocation() const override;
|
||||
Optional<UID> getCurrentPeekLocation() const override;
|
||||
|
||||
void addref() override { ReferenceCounted<SetPeekCursor>::addref(); }
|
||||
|
||||
|
@ -620,6 +625,7 @@ struct ILogSystem {
|
|||
Version popped() const override;
|
||||
Version getMinKnownCommittedVersion() const override;
|
||||
Optional<UID> getPrimaryPeekLocation() const override;
|
||||
Optional<UID> getCurrentPeekLocation() const override;
|
||||
|
||||
void addref() override { ReferenceCounted<MultiCursor>::addref(); }
|
||||
|
||||
|
@ -698,6 +704,7 @@ struct ILogSystem {
|
|||
Version popped() const override;
|
||||
Version getMinKnownCommittedVersion() const override;
|
||||
Optional<UID> getPrimaryPeekLocation() const override;
|
||||
Optional<UID> getCurrentPeekLocation() const override;
|
||||
|
||||
void addref() override { ReferenceCounted<BufferedCursor>::addref(); }
|
||||
|
||||
|
|
|
@ -393,12 +393,16 @@ Version ILogSystem::ServerPeekCursor::getMinKnownCommittedVersion() const {
|
|||
}
|
||||
|
||||
Optional<UID> ILogSystem::ServerPeekCursor::getPrimaryPeekLocation() const {
|
||||
if (interf) {
|
||||
if (interf && interf->get().present()) {
|
||||
return interf->get().id();
|
||||
}
|
||||
return Optional<UID>();
|
||||
}
|
||||
|
||||
Optional<UID> ILogSystem::ServerPeekCursor::getCurrentPeekLocation() const {
|
||||
return ILogSystem::ServerPeekCursor::getPrimaryPeekLocation();
|
||||
}
|
||||
|
||||
Version ILogSystem::ServerPeekCursor::popped() const {
|
||||
return poppedVersion;
|
||||
}
|
||||
|
@ -673,6 +677,13 @@ Optional<UID> ILogSystem::MergedPeekCursor::getPrimaryPeekLocation() const {
|
|||
return Optional<UID>();
|
||||
}
|
||||
|
||||
Optional<UID> ILogSystem::MergedPeekCursor::getCurrentPeekLocation() const {
|
||||
if (currentCursor >= 0) {
|
||||
return serverCursors[currentCursor]->getPrimaryPeekLocation();
|
||||
}
|
||||
return Optional<UID>();
|
||||
}
|
||||
|
||||
Version ILogSystem::MergedPeekCursor::popped() const {
|
||||
Version poppedVersion = 0;
|
||||
for (auto& c : serverCursors)
|
||||
|
@ -1023,6 +1034,13 @@ Optional<UID> ILogSystem::SetPeekCursor::getPrimaryPeekLocation() const {
|
|||
return Optional<UID>();
|
||||
}
|
||||
|
||||
Optional<UID> ILogSystem::SetPeekCursor::getCurrentPeekLocation() const {
|
||||
if (currentCursor >= 0 && currentSet >= 0) {
|
||||
return serverCursors[currentSet][currentCursor]->getPrimaryPeekLocation();
|
||||
}
|
||||
return Optional<UID>();
|
||||
}
|
||||
|
||||
Version ILogSystem::SetPeekCursor::popped() const {
|
||||
Version poppedVersion = 0;
|
||||
for (auto& cursors : serverCursors) {
|
||||
|
@ -1123,6 +1141,10 @@ Optional<UID> ILogSystem::MultiCursor::getPrimaryPeekLocation() const {
|
|||
return cursors.back()->getPrimaryPeekLocation();
|
||||
}
|
||||
|
||||
Optional<UID> ILogSystem::MultiCursor::getCurrentPeekLocation() const {
|
||||
return cursors.back()->getCurrentPeekLocation();
|
||||
}
|
||||
|
||||
Version ILogSystem::MultiCursor::popped() const {
|
||||
return std::max(poppedVersion, cursors.back()->popped());
|
||||
}
|
||||
|
@ -1403,6 +1425,10 @@ Optional<UID> ILogSystem::BufferedCursor::getPrimaryPeekLocation() const {
|
|||
return Optional<UID>();
|
||||
}
|
||||
|
||||
Optional<UID> ILogSystem::BufferedCursor::getCurrentPeekLocation() const {
|
||||
return Optional<UID>();
|
||||
}
|
||||
|
||||
Version ILogSystem::BufferedCursor::popped() const {
|
||||
if (initialPoppedVersion == poppedVersion) {
|
||||
return 0;
|
||||
|
|
|
@ -20,9 +20,11 @@
|
|||
|
||||
#include "flow/Util.h"
|
||||
#include "fdbrpc/FailureMonitor.h"
|
||||
#include "fdbclient/KeyBackedTypes.h"
|
||||
#include "fdbclient/SystemData.h"
|
||||
#include "fdbserver/MoveKeys.actor.h"
|
||||
#include "fdbserver/Knobs.h"
|
||||
#include "fdbserver/TSSMappingUtil.actor.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
using std::max;
|
||||
|
@ -158,7 +160,7 @@ ACTOR Future<Optional<UID>> checkReadWrite(Future<ErrorOr<GetShardStateReply>> f
|
|||
return Optional<UID>(uid);
|
||||
}
|
||||
|
||||
Future<Void> removeOldDestinations(Transaction* tr,
|
||||
Future<Void> removeOldDestinations(Reference<ReadYourWritesTransaction> tr,
|
||||
UID oldDest,
|
||||
VectorRef<KeyRangeRef> shards,
|
||||
KeyRangeRef currentKeys) {
|
||||
|
@ -235,7 +237,7 @@ ACTOR Future<vector<UID>> addReadWriteDestinations(KeyRangeRef shard,
|
|||
}
|
||||
|
||||
ACTOR Future<vector<vector<UID>>> additionalSources(RangeResult shards,
|
||||
Transaction* tr,
|
||||
Reference<ReadYourWritesTransaction> tr,
|
||||
int desiredHealthy,
|
||||
int maxServers) {
|
||||
state RangeResult UIDtoTagMap = wait(tr->getRange(serverTagKeys, CLIENT_KNOBS->TOO_MANY));
|
||||
|
@ -320,6 +322,7 @@ ACTOR static Future<Void> startMoveKeys(Database occ,
|
|||
MoveKeysLock lock,
|
||||
FlowLock* startMoveKeysLock,
|
||||
UID relocationIntervalId,
|
||||
std::map<UID, StorageServerInterface>* tssMapping,
|
||||
const DDEnabledState* ddEnabledState) {
|
||||
state TraceInterval interval("RelocateShard_StartMoveKeys");
|
||||
state Future<Void> warningLogger = logWarningAfter("StartMoveKeysTooLong", 600, servers);
|
||||
|
@ -327,6 +330,7 @@ ACTOR static Future<Void> startMoveKeys(Database occ,
|
|||
|
||||
wait(startMoveKeysLock->take(TaskPriority::DataDistributionLaunch));
|
||||
state FlowLock::Releaser releaser(*startMoveKeysLock);
|
||||
state bool loadedTssMapping = false;
|
||||
|
||||
TraceEvent(SevDebug, interval.begin(), relocationIntervalId);
|
||||
|
||||
|
@ -343,7 +347,8 @@ ACTOR static Future<Void> startMoveKeys(Database occ,
|
|||
TEST(begin > keys.begin); // Multi-transactional startMoveKeys
|
||||
batches++;
|
||||
|
||||
state Transaction tr(occ);
|
||||
// RYW to optimize re-reading the same key ranges
|
||||
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(occ);
|
||||
state int retries = 0;
|
||||
|
||||
loop {
|
||||
|
@ -356,15 +361,22 @@ ACTOR static Future<Void> startMoveKeys(Database occ,
|
|||
// Keep track of shards for all src servers so that we can preserve their values in serverKeys
|
||||
state Map<UID, VectorRef<KeyRangeRef>> shardMap;
|
||||
|
||||
tr.info.taskID = TaskPriority::MoveKeys;
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
tr->getTransaction().info.taskID = TaskPriority::MoveKeys;
|
||||
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
|
||||
wait(checkMoveKeysLock(&tr, lock, ddEnabledState));
|
||||
wait(checkMoveKeysLock(&(tr->getTransaction()), lock, ddEnabledState));
|
||||
|
||||
if (!loadedTssMapping) {
|
||||
// share transaction for loading tss mapping with the rest of start move keys
|
||||
wait(readTSSMappingRYW(tr, tssMapping));
|
||||
loadedTssMapping = true;
|
||||
}
|
||||
|
||||
vector<Future<Optional<Value>>> serverListEntries;
|
||||
serverListEntries.reserve(servers.size());
|
||||
for (int s = 0; s < servers.size(); s++)
|
||||
serverListEntries.push_back(tr.get(serverListKeyFor(servers[s])));
|
||||
serverListEntries.push_back(tr->get(serverListKeyFor(servers[s])));
|
||||
state vector<Optional<Value>> serverListValues = wait(getAll(serverListEntries));
|
||||
|
||||
for (int s = 0; s < serverListValues.size(); s++) {
|
||||
|
@ -380,7 +392,8 @@ ACTOR static Future<Void> startMoveKeys(Database occ,
|
|||
// Get all existing shards overlapping keys (exclude any that have been processed in a previous
|
||||
// iteration of the outer loop)
|
||||
state KeyRange currentKeys = KeyRangeRef(begin, keys.end);
|
||||
state RangeResult old = wait(krmGetRanges(&tr,
|
||||
|
||||
state RangeResult old = wait(krmGetRanges(tr,
|
||||
keyServersPrefix,
|
||||
currentKeys,
|
||||
SERVER_KNOBS->MOVE_KEYS_KRM_LIMIT,
|
||||
|
@ -399,10 +412,10 @@ ACTOR static Future<Void> startMoveKeys(Database occ,
|
|||
// printf("'%s': '%s'\n", old[i].key.toString().c_str(), old[i].value.toString().c_str());
|
||||
|
||||
// Check that enough servers for each shard are in the correct state
|
||||
state RangeResult UIDtoTagMap = wait(tr.getRange(serverTagKeys, CLIENT_KNOBS->TOO_MANY));
|
||||
state RangeResult UIDtoTagMap = wait(tr->getRange(serverTagKeys, CLIENT_KNOBS->TOO_MANY));
|
||||
ASSERT(!UIDtoTagMap.more && UIDtoTagMap.size() < CLIENT_KNOBS->TOO_MANY);
|
||||
vector<vector<UID>> addAsSource = wait(additionalSources(
|
||||
old, &tr, servers.size(), SERVER_KNOBS->MAX_ADDED_SOURCES_MULTIPLIER * servers.size()));
|
||||
old, tr, servers.size(), SERVER_KNOBS->MAX_ADDED_SOURCES_MULTIPLIER * servers.size()));
|
||||
|
||||
// For each intersecting range, update keyServers[range] dest to be servers and clear existing dest
|
||||
// servers from serverKeys
|
||||
|
@ -417,7 +430,7 @@ ACTOR static Future<Void> startMoveKeys(Database occ,
|
|||
// .detail("KeyEnd", rangeIntersectKeys.end.toString())
|
||||
// .detail("OldSrc", describe(src))
|
||||
// .detail("OldDest", describe(dest))
|
||||
// .detail("ReadVersion", tr.getReadVersion().get());
|
||||
// .detail("ReadVersion", tr->getReadVersion().get());
|
||||
|
||||
for (auto& uid : addAsSource[i]) {
|
||||
src.push_back(uid);
|
||||
|
@ -425,7 +438,7 @@ ACTOR static Future<Void> startMoveKeys(Database occ,
|
|||
uniquify(src);
|
||||
|
||||
// Update dest servers for this range to be equal to servers
|
||||
krmSetPreviouslyEmptyRange(&tr,
|
||||
krmSetPreviouslyEmptyRange(&(tr->getTransaction()),
|
||||
keyServersPrefix,
|
||||
rangeIntersectKeys,
|
||||
keyServersValue(UIDtoTagMap, src, servers),
|
||||
|
@ -455,7 +468,7 @@ ACTOR static Future<Void> startMoveKeys(Database occ,
|
|||
vector<Future<Void>> actors;
|
||||
for (oldDest = oldDests.begin(); oldDest != oldDests.end(); ++oldDest)
|
||||
if (std::find(servers.begin(), servers.end(), *oldDest) == servers.end())
|
||||
actors.push_back(removeOldDestinations(&tr, *oldDest, shardMap[*oldDest], currentKeys));
|
||||
actors.push_back(removeOldDestinations(tr, *oldDest, shardMap[*oldDest], currentKeys));
|
||||
|
||||
// Update serverKeys to include keys (or the currently processed subset of keys) for each SS in
|
||||
// servers
|
||||
|
@ -464,12 +477,12 @@ ACTOR static Future<Void> startMoveKeys(Database occ,
|
|||
// to have the same shard boundaries If that invariant was important, we would have to move this
|
||||
// inside the loop above and also set it for the src servers
|
||||
actors.push_back(krmSetRangeCoalescing(
|
||||
&tr, serverKeysPrefixFor(servers[i]), currentKeys, allKeys, serverKeysTrue));
|
||||
tr, serverKeysPrefixFor(servers[i]), currentKeys, allKeys, serverKeysTrue));
|
||||
}
|
||||
|
||||
wait(waitForAll(actors));
|
||||
|
||||
wait(tr.commit());
|
||||
wait(tr->commit());
|
||||
|
||||
/*TraceEvent("StartMoveKeysCommitDone", relocationIntervalId)
|
||||
.detail("CommitVersion", tr.getCommittedVersion())
|
||||
|
@ -481,7 +494,7 @@ ACTOR static Future<Void> startMoveKeys(Database occ,
|
|||
state Error err = e;
|
||||
if (err.code() == error_code_move_to_removed_server)
|
||||
throw;
|
||||
wait(tr.onError(e));
|
||||
wait(tr->onError(e));
|
||||
|
||||
if (retries % 10 == 0) {
|
||||
TraceEvent(
|
||||
|
@ -500,7 +513,7 @@ ACTOR static Future<Void> startMoveKeys(Database occ,
|
|||
}
|
||||
|
||||
// printf("Committed moving '%s'-'%s' (version %lld)\n", keys.begin.toString().c_str(),
|
||||
// keys.end.toString().c_str(), tr.getCommittedVersion());
|
||||
// keys.end.toString().c_str(), tr->getCommittedVersion());
|
||||
TraceEvent(SevDebug, interval.end(), relocationIntervalId)
|
||||
.detail("Batches", batches)
|
||||
.detail("Shards", shards)
|
||||
|
@ -536,11 +549,14 @@ ACTOR Future<Void> waitForShardReady(StorageServerInterface server,
|
|||
}
|
||||
}
|
||||
|
||||
// best effort to also wait for TSS on data move
|
||||
|
||||
ACTOR Future<Void> checkFetchingState(Database cx,
|
||||
vector<UID> dest,
|
||||
KeyRange keys,
|
||||
Promise<Void> dataMovementComplete,
|
||||
UID relocationIntervalId) {
|
||||
UID relocationIntervalId,
|
||||
std::map<UID, StorageServerInterface> tssMapping) {
|
||||
state Transaction tr(cx);
|
||||
|
||||
loop {
|
||||
|
@ -557,6 +573,7 @@ ACTOR Future<Void> checkFetchingState(Database cx,
|
|||
serverListEntries.push_back(tr.get(serverListKeyFor(dest[s])));
|
||||
state vector<Optional<Value>> serverListValues = wait(getAll(serverListEntries));
|
||||
vector<Future<Void>> requests;
|
||||
state vector<Future<Void>> tssRequests;
|
||||
for (int s = 0; s < serverListValues.size(); s++) {
|
||||
if (!serverListValues[s].present()) {
|
||||
// FIXME: Is this the right behavior? dataMovementComplete will never be sent!
|
||||
|
@ -567,10 +584,25 @@ ACTOR Future<Void> checkFetchingState(Database cx,
|
|||
ASSERT(si.id() == dest[s]);
|
||||
requests.push_back(
|
||||
waitForShardReady(si, keys, tr.getReadVersion().get(), GetShardStateRequest::FETCHING));
|
||||
|
||||
auto tssPair = tssMapping.find(si.id());
|
||||
if (tssPair != tssMapping.end()) {
|
||||
tssRequests.push_back(waitForShardReady(
|
||||
tssPair->second, keys, tr.getReadVersion().get(), GetShardStateRequest::FETCHING));
|
||||
}
|
||||
}
|
||||
|
||||
wait(timeoutError(waitForAll(requests), SERVER_KNOBS->SERVER_READY_QUORUM_TIMEOUT, TaskPriority::MoveKeys));
|
||||
|
||||
// If normal servers return normally, give TSS data movement a bit of a chance, but don't block on it, and
|
||||
// ignore errors in tss requests
|
||||
if (tssRequests.size()) {
|
||||
wait(timeout(waitForAllReady(tssRequests),
|
||||
SERVER_KNOBS->SERVER_READY_QUORUM_TIMEOUT / 2,
|
||||
Void(),
|
||||
TaskPriority::MoveKeys));
|
||||
}
|
||||
|
||||
dataMovementComplete.send(Void());
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
|
@ -593,6 +625,7 @@ ACTOR static Future<Void> finishMoveKeys(Database occ,
|
|||
FlowLock* finishMoveKeysParallelismLock,
|
||||
bool hasRemote,
|
||||
UID relocationIntervalId,
|
||||
std::map<UID, StorageServerInterface> tssMapping,
|
||||
const DDEnabledState* ddEnabledState) {
|
||||
state TraceInterval interval("RelocateShard_FinishMoveKeys");
|
||||
state TraceInterval waitInterval("");
|
||||
|
@ -602,6 +635,11 @@ ACTOR static Future<Void> finishMoveKeys(Database occ,
|
|||
state int retries = 0;
|
||||
state FlowLock::Releaser releaser;
|
||||
|
||||
state std::vector<std::pair<UID, UID>> tssToKill;
|
||||
state std::unordered_set<UID> tssToIgnore;
|
||||
// try waiting for tss for a 2 loops, give up if they're stuck to not affect the rest of the cluster
|
||||
state int waitForTSSCounter = 2;
|
||||
|
||||
ASSERT(!destinationTeam.empty());
|
||||
|
||||
try {
|
||||
|
@ -616,9 +654,26 @@ ACTOR static Future<Void> finishMoveKeys(Database occ,
|
|||
|
||||
state Transaction tr(occ);
|
||||
|
||||
// printf("finishMoveKeys( '%s'-'%s' )\n", keys.begin.toString().c_str(), keys.end.toString().c_str());
|
||||
// printf("finishMoveKeys( '%s'-'%s' )\n", begin.toString().c_str(), keys.end.toString().c_str());
|
||||
loop {
|
||||
try {
|
||||
if (tssToKill.size()) {
|
||||
TEST(true); // killing TSS because they were unavailable for movekeys
|
||||
|
||||
// Kill tss BEFORE committing main txn so that client requests don't make it to the tss when it
|
||||
// has a different shard set than its pair use a different RYW transaction since i'm too lazy
|
||||
// (and don't want to add bugs) by changing whole method to RYW. Also, using a different
|
||||
// transaction makes it commit earlier which we may need to guarantee causality of tss getting
|
||||
// removed before client sends a request to this key range on the new SS
|
||||
wait(removeTSSPairsFromCluster(occ, tssToKill));
|
||||
|
||||
for (auto& tssPair : tssToKill) {
|
||||
TraceEvent(SevWarnAlways, "TSS_KillMoveKeys").detail("TSSID", tssPair.second);
|
||||
tssToIgnore.insert(tssPair.second);
|
||||
}
|
||||
tssToKill.clear();
|
||||
}
|
||||
|
||||
tr.info.taskID = TaskPriority::MoveKeys;
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
|
||||
|
@ -763,6 +818,8 @@ ACTOR static Future<Void> finishMoveKeys(Database occ,
|
|||
// between
|
||||
// now and when this transaction commits.
|
||||
state vector<Future<Void>> serverReady; // only for count below
|
||||
state vector<Future<Void>> tssReady; // for waiting in parallel with tss
|
||||
state vector<StorageServerInterface> tssReadyInterfs;
|
||||
state vector<UID> newDestinations;
|
||||
std::set<UID> completeSrcSet(completeSrc.begin(), completeSrc.end());
|
||||
for (auto& it : dest) {
|
||||
|
@ -789,22 +846,95 @@ ACTOR static Future<Void> finishMoveKeys(Database occ,
|
|||
storageServerInterfaces.push_back(si);
|
||||
}
|
||||
|
||||
// update client info in case tss mapping changed or server got updated
|
||||
|
||||
// Wait for new destination servers to fetch the keys
|
||||
|
||||
serverReady.reserve(storageServerInterfaces.size());
|
||||
for (int s = 0; s < storageServerInterfaces.size(); s++)
|
||||
tssReady.reserve(storageServerInterfaces.size());
|
||||
tssReadyInterfs.reserve(storageServerInterfaces.size());
|
||||
for (int s = 0; s < storageServerInterfaces.size(); s++) {
|
||||
serverReady.push_back(waitForShardReady(storageServerInterfaces[s],
|
||||
keys,
|
||||
tr.getReadVersion().get(),
|
||||
GetShardStateRequest::READABLE));
|
||||
wait(timeout(waitForAll(serverReady),
|
||||
|
||||
auto tssPair = tssMapping.find(storageServerInterfaces[s].id());
|
||||
|
||||
if (tssPair != tssMapping.end() && waitForTSSCounter > 0 &&
|
||||
!tssToIgnore.count(tssPair->second.id())) {
|
||||
tssReadyInterfs.push_back(tssPair->second);
|
||||
tssReady.push_back(waitForShardReady(
|
||||
tssPair->second, keys, tr.getReadVersion().get(), GetShardStateRequest::READABLE));
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for all storage server moves, and explicitly swallow errors for tss ones with
|
||||
// waitForAllReady If this takes too long the transaction will time out and retry, which is ok
|
||||
wait(timeout(waitForAll(serverReady) && waitForAllReady(tssReady),
|
||||
SERVER_KNOBS->SERVER_READY_QUORUM_TIMEOUT,
|
||||
Void(),
|
||||
TaskPriority::MoveKeys));
|
||||
|
||||
// Check to see if we're waiting only on tss. If so, decrement the waiting counter.
|
||||
// If the waiting counter is zero, kill the slow/non-responsive tss processes before finalizing the
|
||||
// data move.
|
||||
if (tssReady.size()) {
|
||||
bool allSSDone = true;
|
||||
for (auto& f : serverReady) {
|
||||
allSSDone &= f.isReady() && !f.isError();
|
||||
if (!allSSDone) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (allSSDone) {
|
||||
bool anyTssNotDone = false;
|
||||
|
||||
for (auto& f : tssReady) {
|
||||
if (!f.isReady() || f.isError()) {
|
||||
anyTssNotDone = true;
|
||||
waitForTSSCounter--;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (anyTssNotDone && waitForTSSCounter == 0) {
|
||||
for (int i = 0; i < tssReady.size(); i++) {
|
||||
if (!tssReady[i].isReady() || tssReady[i].isError()) {
|
||||
tssToKill.push_back(
|
||||
std::pair(tssReadyInterfs[i].tssPairID.get(), tssReadyInterfs[i].id()));
|
||||
}
|
||||
}
|
||||
// repeat loop and go back to start to kill tss' before continuing on
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int count = dest.size() - newDestinations.size();
|
||||
for (int s = 0; s < serverReady.size(); s++)
|
||||
count += serverReady[s].isReady() && !serverReady[s].isError();
|
||||
|
||||
// printf(" fMK: moved data to %d/%d servers\n", count, serverReady.size());
|
||||
int tssCount = 0;
|
||||
for (int s = 0; s < tssReady.size(); s++)
|
||||
tssCount += tssReady[s].isReady() && !tssReady[s].isError();
|
||||
|
||||
/*if (tssReady.size()) {
|
||||
printf(" fMK: [%s - %s) moved data to %d/%d servers and %d/%d tss\n",
|
||||
begin.toString().c_str(),
|
||||
keys.end.toString().c_str(),
|
||||
count,
|
||||
serverReady.size(),
|
||||
tssCount,
|
||||
tssReady.size());
|
||||
} else {
|
||||
printf(" fMK: [%s - %s) moved data to %d/%d servers\n",
|
||||
begin.toString().c_str(),
|
||||
keys.end.toString().c_str(),
|
||||
count,
|
||||
serverReady.size());
|
||||
}*/
|
||||
TraceEvent(SevDebug, waitInterval.end(), relocationIntervalId).detail("ReadyServers", count);
|
||||
|
||||
if (count == dest.size()) {
|
||||
|
@ -862,43 +992,48 @@ ACTOR static Future<Void> finishMoveKeys(Database occ,
|
|||
}
|
||||
|
||||
ACTOR Future<std::pair<Version, Tag>> addStorageServer(Database cx, StorageServerInterface server) {
|
||||
state Transaction tr(cx);
|
||||
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(cx);
|
||||
state KeyBackedMap<UID, UID> tssMapDB = KeyBackedMap<UID, UID>(tssMappingKeys.begin);
|
||||
state int maxSkipTags = 1;
|
||||
|
||||
loop {
|
||||
try {
|
||||
state Future<RangeResult> fTagLocalities = tr.getRange(tagLocalityListKeys, CLIENT_KNOBS->TOO_MANY);
|
||||
state Future<Optional<Value>> fv = tr.get(serverListKeyFor(server.id()));
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
|
||||
state Future<Optional<Value>> fExclProc = tr.get(
|
||||
// FIXME: don't fetch tag localities, all tags, and history tags if tss. Just fetch pair's tag
|
||||
state Future<RangeResult> fTagLocalities = tr->getRange(tagLocalityListKeys, CLIENT_KNOBS->TOO_MANY);
|
||||
state Future<Optional<Value>> fv = tr->get(serverListKeyFor(server.id()));
|
||||
|
||||
state Future<Optional<Value>> fExclProc = tr->get(
|
||||
StringRef(encodeExcludedServersKey(AddressExclusion(server.address().ip, server.address().port))));
|
||||
state Future<Optional<Value>> fExclIP =
|
||||
tr.get(StringRef(encodeExcludedServersKey(AddressExclusion(server.address().ip))));
|
||||
state Future<Optional<Value>> fFailProc =
|
||||
tr.get(StringRef(encodeFailedServersKey(AddressExclusion(server.address().ip, server.address().port))));
|
||||
tr->get(StringRef(encodeExcludedServersKey(AddressExclusion(server.address().ip))));
|
||||
state Future<Optional<Value>> fFailProc = tr->get(
|
||||
StringRef(encodeFailedServersKey(AddressExclusion(server.address().ip, server.address().port))));
|
||||
state Future<Optional<Value>> fFailIP =
|
||||
tr.get(StringRef(encodeFailedServersKey(AddressExclusion(server.address().ip))));
|
||||
tr->get(StringRef(encodeFailedServersKey(AddressExclusion(server.address().ip))));
|
||||
|
||||
state Future<Optional<Value>> fExclProc2 =
|
||||
server.secondaryAddress().present()
|
||||
? tr.get(StringRef(encodeExcludedServersKey(
|
||||
? tr->get(StringRef(encodeExcludedServersKey(
|
||||
AddressExclusion(server.secondaryAddress().get().ip, server.secondaryAddress().get().port))))
|
||||
: Future<Optional<Value>>(Optional<Value>());
|
||||
state Future<Optional<Value>> fExclIP2 =
|
||||
server.secondaryAddress().present()
|
||||
? tr.get(StringRef(encodeExcludedServersKey(AddressExclusion(server.secondaryAddress().get().ip))))
|
||||
? tr->get(StringRef(encodeExcludedServersKey(AddressExclusion(server.secondaryAddress().get().ip))))
|
||||
: Future<Optional<Value>>(Optional<Value>());
|
||||
state Future<Optional<Value>> fFailProc2 =
|
||||
server.secondaryAddress().present()
|
||||
? tr.get(StringRef(encodeFailedServersKey(
|
||||
? tr->get(StringRef(encodeFailedServersKey(
|
||||
AddressExclusion(server.secondaryAddress().get().ip, server.secondaryAddress().get().port))))
|
||||
: Future<Optional<Value>>(Optional<Value>());
|
||||
state Future<Optional<Value>> fFailIP2 =
|
||||
server.secondaryAddress().present()
|
||||
? tr.get(StringRef(encodeFailedServersKey(AddressExclusion(server.secondaryAddress().get().ip))))
|
||||
? tr->get(StringRef(encodeFailedServersKey(AddressExclusion(server.secondaryAddress().get().ip))))
|
||||
: Future<Optional<Value>>(Optional<Value>());
|
||||
|
||||
state Future<RangeResult> fTags = tr.getRange(serverTagKeys, CLIENT_KNOBS->TOO_MANY, true);
|
||||
state Future<RangeResult> fHistoryTags = tr.getRange(serverTagHistoryKeys, CLIENT_KNOBS->TOO_MANY, true);
|
||||
state Future<RangeResult> fTags = tr->getRange(serverTagKeys, CLIENT_KNOBS->TOO_MANY, true);
|
||||
state Future<RangeResult> fHistoryTags = tr->getRange(serverTagHistoryKeys, CLIENT_KNOBS->TOO_MANY, true);
|
||||
|
||||
wait(success(fTagLocalities) && success(fv) && success(fTags) && success(fHistoryTags) &&
|
||||
success(fExclProc) && success(fExclIP) && success(fFailProc) && success(fFailIP) &&
|
||||
|
@ -914,63 +1049,90 @@ ACTOR Future<std::pair<Version, Tag>> addStorageServer(Database cx, StorageServe
|
|||
if (fTagLocalities.get().more || fTags.get().more || fHistoryTags.get().more)
|
||||
ASSERT(false);
|
||||
|
||||
int8_t maxTagLocality = 0;
|
||||
state int8_t locality = -1;
|
||||
for (auto& kv : fTagLocalities.get()) {
|
||||
int8_t loc = decodeTagLocalityListValue(kv.value);
|
||||
if (decodeTagLocalityListKey(kv.key) == server.locality.dcId()) {
|
||||
locality = loc;
|
||||
break;
|
||||
}
|
||||
maxTagLocality = std::max(maxTagLocality, loc);
|
||||
}
|
||||
|
||||
if (locality == -1) {
|
||||
locality = maxTagLocality + 1;
|
||||
if (locality < 0)
|
||||
throw recruitment_failed();
|
||||
tr.set(tagLocalityListKeyFor(server.locality.dcId()), tagLocalityListValue(locality));
|
||||
}
|
||||
|
||||
int skipTags = deterministicRandom()->randomInt(0, maxSkipTags);
|
||||
|
||||
state uint16_t tagId = 0;
|
||||
std::vector<uint16_t> usedTags;
|
||||
for (auto& it : fTags.get()) {
|
||||
Tag t = decodeServerTagValue(it.value);
|
||||
if (t.locality == locality) {
|
||||
usedTags.push_back(t.id);
|
||||
}
|
||||
}
|
||||
for (auto& it : fHistoryTags.get()) {
|
||||
Tag t = decodeServerTagValue(it.value);
|
||||
if (t.locality == locality) {
|
||||
usedTags.push_back(t.id);
|
||||
}
|
||||
}
|
||||
std::sort(usedTags.begin(), usedTags.end());
|
||||
|
||||
int usedIdx = 0;
|
||||
for (; usedTags.size() > 0 && tagId <= usedTags.end()[-1]; tagId++) {
|
||||
if (tagId < usedTags[usedIdx]) {
|
||||
if (skipTags == 0)
|
||||
state Tag tag;
|
||||
if (server.isTss()) {
|
||||
bool foundTag = false;
|
||||
for (auto& it : fTags.get()) {
|
||||
UID key = decodeServerTagKey(it.key);
|
||||
if (key == server.tssPairID.get()) {
|
||||
tag = decodeServerTagValue(it.value);
|
||||
foundTag = true;
|
||||
break;
|
||||
skipTags--;
|
||||
} else {
|
||||
usedIdx++;
|
||||
}
|
||||
}
|
||||
if (!foundTag) {
|
||||
throw recruitment_failed();
|
||||
}
|
||||
|
||||
tssMapDB.set(tr, server.tssPairID.get(), server.id());
|
||||
|
||||
} else {
|
||||
int8_t maxTagLocality = 0;
|
||||
state int8_t locality = -1;
|
||||
for (auto& kv : fTagLocalities.get()) {
|
||||
int8_t loc = decodeTagLocalityListValue(kv.value);
|
||||
if (decodeTagLocalityListKey(kv.key) == server.locality.dcId()) {
|
||||
locality = loc;
|
||||
break;
|
||||
}
|
||||
maxTagLocality = std::max(maxTagLocality, loc);
|
||||
}
|
||||
|
||||
if (locality == -1) {
|
||||
locality = maxTagLocality + 1;
|
||||
if (locality < 0) {
|
||||
throw recruitment_failed();
|
||||
}
|
||||
tr->set(tagLocalityListKeyFor(server.locality.dcId()), tagLocalityListValue(locality));
|
||||
}
|
||||
|
||||
int skipTags = deterministicRandom()->randomInt(0, maxSkipTags);
|
||||
|
||||
state uint16_t tagId = 0;
|
||||
std::vector<uint16_t> usedTags;
|
||||
for (auto& it : fTags.get()) {
|
||||
Tag t = decodeServerTagValue(it.value);
|
||||
if (t.locality == locality) {
|
||||
usedTags.push_back(t.id);
|
||||
}
|
||||
}
|
||||
for (auto& it : fHistoryTags.get()) {
|
||||
Tag t = decodeServerTagValue(it.value);
|
||||
if (t.locality == locality) {
|
||||
usedTags.push_back(t.id);
|
||||
}
|
||||
}
|
||||
std::sort(usedTags.begin(), usedTags.end());
|
||||
|
||||
int usedIdx = 0;
|
||||
for (; usedTags.size() > 0 && tagId <= usedTags.end()[-1]; tagId++) {
|
||||
if (tagId < usedTags[usedIdx]) {
|
||||
if (skipTags == 0)
|
||||
break;
|
||||
skipTags--;
|
||||
} else {
|
||||
usedIdx++;
|
||||
}
|
||||
}
|
||||
tagId += skipTags;
|
||||
|
||||
tag = Tag(locality, tagId);
|
||||
|
||||
tr->set(serverTagKeyFor(server.id()), serverTagValue(tag));
|
||||
KeyRange conflictRange = singleKeyRange(serverTagConflictKeyFor(tag));
|
||||
tr->addReadConflictRange(conflictRange);
|
||||
tr->addWriteConflictRange(conflictRange);
|
||||
|
||||
if (SERVER_KNOBS->TSS_HACK_IDENTITY_MAPPING) {
|
||||
// THIS SHOULD NEVER BE ENABLED IN ANY NON-TESTING ENVIRONMENT
|
||||
TraceEvent(SevError, "TSSIdentityMappingEnabled");
|
||||
tssMapDB.set(tr, server.id(), server.id());
|
||||
}
|
||||
}
|
||||
tagId += skipTags;
|
||||
|
||||
state Tag tag(locality, tagId);
|
||||
tr.set(serverTagKeyFor(server.id()), serverTagValue(tag));
|
||||
tr.set(serverListKeyFor(server.id()), serverListValue(server));
|
||||
KeyRange conflictRange = singleKeyRange(serverTagConflictKeyFor(tag));
|
||||
tr.addReadConflictRange(conflictRange);
|
||||
tr.addWriteConflictRange(conflictRange);
|
||||
|
||||
wait(tr.commit());
|
||||
return std::make_pair(tr.getCommittedVersion(), tag);
|
||||
tr->set(serverListKeyFor(server.id()), serverListValue(server));
|
||||
wait(tr->commit());
|
||||
return std::make_pair(tr->getCommittedVersion(), tag);
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_commit_unknown_result)
|
||||
throw recruitment_failed(); // There is a remote possibility that we successfully added ourselves and
|
||||
|
@ -980,12 +1142,12 @@ ACTOR Future<std::pair<Version, Tag>> addStorageServer(Database cx, StorageServe
|
|||
maxSkipTags = SERVER_KNOBS->MAX_SKIP_TAGS;
|
||||
}
|
||||
|
||||
wait(tr.onError(e));
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
// A SS can be removed only if all data (shards) on the SS have been moved away from the SS.
|
||||
ACTOR Future<bool> canRemoveStorageServer(Transaction* tr, UID serverID) {
|
||||
ACTOR Future<bool> canRemoveStorageServer(Reference<ReadYourWritesTransaction> tr, UID serverID) {
|
||||
RangeResult keys = wait(krmGetRanges(tr, serverKeysPrefixFor(serverID), allKeys, 2));
|
||||
|
||||
ASSERT(keys.size() >= 2);
|
||||
|
@ -1005,34 +1167,37 @@ ACTOR Future<bool> canRemoveStorageServer(Transaction* tr, UID serverID) {
|
|||
|
||||
ACTOR Future<Void> removeStorageServer(Database cx,
|
||||
UID serverID,
|
||||
Optional<UID> tssPairID,
|
||||
MoveKeysLock lock,
|
||||
const DDEnabledState* ddEnabledState) {
|
||||
state Transaction tr(cx);
|
||||
state KeyBackedMap<UID, UID> tssMapDB = KeyBackedMap<UID, UID>(tssMappingKeys.begin);
|
||||
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(cx);
|
||||
state bool retry = false;
|
||||
state int noCanRemoveCount = 0;
|
||||
|
||||
loop {
|
||||
try {
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
wait(checkMoveKeysLock(&tr, lock, ddEnabledState));
|
||||
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
wait(checkMoveKeysLock(&(tr->getTransaction()), lock, ddEnabledState));
|
||||
TraceEvent("RemoveStorageServerLocked")
|
||||
.detail("ServerID", serverID)
|
||||
.detail("Version", tr.getReadVersion().get());
|
||||
.detail("Version", tr->getReadVersion().get());
|
||||
|
||||
state bool canRemove = wait(canRemoveStorageServer(&tr, serverID));
|
||||
state bool canRemove = wait(canRemoveStorageServer(tr, serverID));
|
||||
if (!canRemove) {
|
||||
TEST(true); // The caller had a transaction in flight that assigned keys to the server. Wait for it to
|
||||
// reverse its mistake.
|
||||
TraceEvent(SevWarn, "NoCanRemove").detail("Count", noCanRemoveCount++).detail("ServerID", serverID);
|
||||
wait(delayJittered(SERVER_KNOBS->REMOVE_RETRY_DELAY, TaskPriority::DataDistributionLaunch));
|
||||
tr.reset();
|
||||
tr->reset();
|
||||
TraceEvent("RemoveStorageServerRetrying").detail("CanRemove", canRemove);
|
||||
} else {
|
||||
|
||||
state Future<Optional<Value>> fListKey = tr.get(serverListKeyFor(serverID));
|
||||
state Future<RangeResult> fTags = tr.getRange(serverTagKeys, CLIENT_KNOBS->TOO_MANY);
|
||||
state Future<RangeResult> fHistoryTags = tr.getRange(serverTagHistoryKeys, CLIENT_KNOBS->TOO_MANY);
|
||||
state Future<RangeResult> fTagLocalities = tr.getRange(tagLocalityListKeys, CLIENT_KNOBS->TOO_MANY);
|
||||
state Future<RangeResult> fTLogDatacenters = tr.getRange(tLogDatacentersKeys, CLIENT_KNOBS->TOO_MANY);
|
||||
state Future<Optional<Value>> fListKey = tr->get(serverListKeyFor(serverID));
|
||||
state Future<RangeResult> fTags = tr->getRange(serverTagKeys, CLIENT_KNOBS->TOO_MANY);
|
||||
state Future<RangeResult> fHistoryTags = tr->getRange(serverTagHistoryKeys, CLIENT_KNOBS->TOO_MANY);
|
||||
state Future<RangeResult> fTagLocalities = tr->getRange(tagLocalityListKeys, CLIENT_KNOBS->TOO_MANY);
|
||||
state Future<RangeResult> fTLogDatacenters = tr->getRange(tLogDatacentersKeys, CLIENT_KNOBS->TOO_MANY);
|
||||
|
||||
wait(success(fListKey) && success(fTags) && success(fHistoryTags) && success(fTagLocalities) &&
|
||||
success(fTLogDatacenters));
|
||||
|
@ -1072,22 +1237,32 @@ ACTOR Future<Void> removeStorageServer(Database cx,
|
|||
if (locality >= 0 && !allLocalities.count(locality)) {
|
||||
for (auto& it : fTagLocalities.get()) {
|
||||
if (locality == decodeTagLocalityListValue(it.value)) {
|
||||
tr.clear(it.key);
|
||||
tr->clear(it.key);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tr.clear(serverListKeyFor(serverID));
|
||||
tr.clear(serverTagKeyFor(serverID));
|
||||
tr.clear(serverTagHistoryRangeFor(serverID));
|
||||
tr->clear(serverListKeyFor(serverID));
|
||||
tr->clear(serverTagKeyFor(serverID)); // A tss uses this to communicate shutdown but it never has a
|
||||
// server tag key set in the first place
|
||||
tr->clear(serverTagHistoryRangeFor(serverID));
|
||||
|
||||
if (SERVER_KNOBS->TSS_HACK_IDENTITY_MAPPING) {
|
||||
// THIS SHOULD NEVER BE ENABLED IN ANY NON-TESTING ENVIRONMENT
|
||||
TraceEvent(SevError, "TSSIdentityMappingEnabled");
|
||||
tssMapDB.erase(tr, serverID);
|
||||
} else if (tssPairID.present()) {
|
||||
tssMapDB.erase(tr, tssPairID.get());
|
||||
}
|
||||
|
||||
retry = true;
|
||||
wait(tr.commit());
|
||||
wait(tr->commit());
|
||||
return Void();
|
||||
}
|
||||
} catch (Error& e) {
|
||||
state Error err = e;
|
||||
wait(tr.onError(e));
|
||||
wait(tr->onError(e));
|
||||
TraceEvent("RemoveStorageServerRetrying").error(err);
|
||||
}
|
||||
}
|
||||
|
@ -1180,11 +1355,20 @@ ACTOR Future<Void> moveKeys(Database cx,
|
|||
const DDEnabledState* ddEnabledState) {
|
||||
ASSERT(destinationTeam.size());
|
||||
std::sort(destinationTeam.begin(), destinationTeam.end());
|
||||
wait(startMoveKeys(
|
||||
cx, keys, destinationTeam, lock, startMoveKeysParallelismLock, relocationIntervalId, ddEnabledState));
|
||||
|
||||
state std::map<UID, StorageServerInterface> tssMapping;
|
||||
|
||||
wait(startMoveKeys(cx,
|
||||
keys,
|
||||
destinationTeam,
|
||||
lock,
|
||||
startMoveKeysParallelismLock,
|
||||
relocationIntervalId,
|
||||
&tssMapping,
|
||||
ddEnabledState));
|
||||
|
||||
state Future<Void> completionSignaller =
|
||||
checkFetchingState(cx, healthyDestinations, keys, dataMovementComplete, relocationIntervalId);
|
||||
checkFetchingState(cx, healthyDestinations, keys, dataMovementComplete, relocationIntervalId, tssMapping);
|
||||
|
||||
wait(finishMoveKeys(cx,
|
||||
keys,
|
||||
|
@ -1193,6 +1377,7 @@ ACTOR Future<Void> moveKeys(Database cx,
|
|||
finishMoveKeysParallelismLock,
|
||||
hasRemote,
|
||||
relocationIntervalId,
|
||||
tssMapping,
|
||||
ddEnabledState));
|
||||
|
||||
// This is defensive, but make sure that we always say that the movement is complete before moveKeys completes
|
||||
|
@ -1228,6 +1413,13 @@ void seedShardServers(Arena& arena, CommitTransactionRef& tr, vector<StorageServ
|
|||
for (auto& s : servers) {
|
||||
tr.set(arena, serverTagKeyFor(s.id()), serverTagValue(server_tag[s.id()]));
|
||||
tr.set(arena, serverListKeyFor(s.id()), serverListValue(s));
|
||||
if (SERVER_KNOBS->TSS_HACK_IDENTITY_MAPPING) {
|
||||
// THIS SHOULD NEVER BE ENABLED IN ANY NON-TESTING ENVIRONMENT
|
||||
TraceEvent(SevError, "TSSIdentityMappingEnabled");
|
||||
// hack key-backed map here since we can't really change CommitTransactionRef to a RYW transaction
|
||||
Key uidRef = Codec<UID>::pack(s.id()).pack();
|
||||
tr.set(arena, uidRef.withPrefix(tssMappingKeys.begin), uidRef);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Tag> serverTags;
|
||||
|
|
|
@ -89,13 +89,14 @@ ACTOR Future<std::pair<Version, Tag>> addStorageServer(Database cx, StorageServe
|
|||
|
||||
ACTOR Future<Void> removeStorageServer(Database cx,
|
||||
UID serverID,
|
||||
Optional<UID> tssPairID, // if serverID is a tss, set to its ss pair id
|
||||
MoveKeysLock lock,
|
||||
const DDEnabledState* ddEnabledState);
|
||||
// Removes the given storage server permanently from the database. It must already
|
||||
// have no shards assigned to it. The storage server MUST NOT be added again after this
|
||||
// (though a new storage server with a new unique ID may be recruited from the same fdbserver).
|
||||
|
||||
ACTOR Future<bool> canRemoveStorageServer(Transaction* tr, UID serverID);
|
||||
ACTOR Future<bool> canRemoveStorageServer(Reference<ReadYourWritesTransaction> tr, UID serverID);
|
||||
// Returns true if the given storage server has no keys assigned to it and may be safely removed
|
||||
// Obviously that could change later!
|
||||
ACTOR Future<Void> removeKeysFromFailedServer(Database cx,
|
||||
|
|
|
@ -1498,10 +1498,10 @@ ACTOR Future<Void> doQueueCommit(TLogData* self,
|
|||
|
||||
ACTOR Future<Void> commitQueue(TLogData* self) {
|
||||
state Reference<LogData> logData;
|
||||
state std::vector<Reference<LogData>> missingFinalCommit;
|
||||
|
||||
loop {
|
||||
int foundCount = 0;
|
||||
state std::vector<Reference<LogData>> missingFinalCommit;
|
||||
for (auto it : self->id_data) {
|
||||
if (!it.second->stopped) {
|
||||
logData = it.second;
|
||||
|
|
|
@ -1925,10 +1925,10 @@ ACTOR Future<Void> doQueueCommit(TLogData* self,
|
|||
|
||||
ACTOR Future<Void> commitQueue(TLogData* self) {
|
||||
state Reference<LogData> logData;
|
||||
state std::vector<Reference<LogData>> missingFinalCommit;
|
||||
|
||||
loop {
|
||||
int foundCount = 0;
|
||||
state std::vector<Reference<LogData>> missingFinalCommit;
|
||||
for (auto it : self->id_data) {
|
||||
if (!it.second->stopped) {
|
||||
logData = it.second;
|
||||
|
|
|
@ -158,6 +158,7 @@ struct ProxyCommitData {
|
|||
EventMetricHandle<SingleKeyMutation> singleKeyMutationEvent;
|
||||
|
||||
std::map<UID, Reference<StorageInfo>> storageCache;
|
||||
std::unordered_map<UID, StorageServerInterface> tssMapping;
|
||||
std::map<Tag, Version> tag_popped;
|
||||
Deque<std::pair<Version, Version>> txsPopVersions;
|
||||
Version lastTxsPop;
|
||||
|
|
|
@ -308,9 +308,13 @@ ACTOR Future<int64_t> getMaxStorageServerQueueSize(Database cx, Reference<AsyncV
|
|||
.detail("SS", servers[i].id());
|
||||
throw attribute_not_found();
|
||||
}
|
||||
messages.push_back(timeoutError(itr->second.eventLogRequest.getReply(
|
||||
EventLogRequest(StringRef(servers[i].id().toString() + "/StorageMetrics"))),
|
||||
1.0));
|
||||
// Ignore TSS in add delay mode since it can purposefully freeze forever
|
||||
if (!servers[i].isTss() || !g_network->isSimulated() ||
|
||||
g_simulator.tssMode != ISimulator::TSSMode::EnabledAddDelay) {
|
||||
messages.push_back(timeoutError(itr->second.eventLogRequest.getReply(EventLogRequest(
|
||||
StringRef(servers[i].id().toString() + "/StorageMetrics"))),
|
||||
1.0));
|
||||
}
|
||||
}
|
||||
|
||||
wait(waitForAll(messages));
|
||||
|
@ -516,7 +520,15 @@ ACTOR Future<bool> getStorageServersRecruiting(Database cx, WorkerInterface dist
|
|||
1.0));
|
||||
|
||||
TraceEvent("StorageServersRecruiting").detail("Message", recruitingMessage.toString());
|
||||
return recruitingMessage.getValue("State") == "Recruiting";
|
||||
|
||||
if (recruitingMessage.getValue("State") == "Recruiting") {
|
||||
std::string tssValue;
|
||||
// if we're tss recruiting, that's fine because that can block indefinitely if only 1 free storage process
|
||||
if (!recruitingMessage.tryGetValue("IsTSS", tssValue) || tssValue == "False") {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
} catch (Error& e) {
|
||||
TraceEvent("QuietDatabaseFailure", distributorWorker.id())
|
||||
.detail("Reason", "Failed to extract StorageServersRecruiting")
|
||||
|
|
|
@ -719,9 +719,11 @@ ACTOR Future<Void> trackEachStorageServer(
|
|||
when(state std::pair<UID, Optional<StorageServerInterface>> change = waitNext(serverChanges)) {
|
||||
wait(delay(0)); // prevent storageServerTracker from getting cancelled while on the call stack
|
||||
if (change.second.present()) {
|
||||
auto& a = actors[change.first];
|
||||
a = Future<Void>();
|
||||
a = splitError(trackStorageServerQueueInfo(self, change.second.get()), err);
|
||||
if (!change.second.get().isTss()) {
|
||||
auto& a = actors[change.first];
|
||||
a = Future<Void>();
|
||||
a = splitError(trackStorageServerQueueInfo(self, change.second.get()), err);
|
||||
}
|
||||
} else
|
||||
actors.erase(change.first);
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include <fstream>
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <toml.hpp>
|
||||
#include "fdbrpc/Locality.h"
|
||||
#include "fdbrpc/simulator.h"
|
||||
#include "fdbclient/DatabaseContext.h"
|
||||
|
@ -37,8 +38,8 @@
|
|||
#include "fdbclient/BackupAgent.actor.h"
|
||||
#include "fdbclient/versions.h"
|
||||
#include "flow/ProtocolVersion.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
#include "flow/network.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
#undef max
|
||||
#undef min
|
||||
|
@ -46,10 +47,236 @@
|
|||
extern "C" int g_expect_full_pointermap;
|
||||
extern const char* getSourceVersion();
|
||||
|
||||
using namespace std::literals;
|
||||
|
||||
const int MACHINE_REBOOT_TIME = 10;
|
||||
|
||||
bool destructed = false;
|
||||
|
||||
// Configuration details specified in workload test files that change the simulation
|
||||
// environment details
|
||||
class TestConfig {
|
||||
class ConfigBuilder {
|
||||
using value_type = toml::basic_value<toml::discard_comments>;
|
||||
std::unordered_map<std::string_view, std::function<void(value_type const&)>> confMap;
|
||||
|
||||
public:
|
||||
ConfigBuilder& add(std::string_view key, int* value) {
|
||||
confMap.emplace(key, [value](value_type const& v) { *value = v.as_integer(); });
|
||||
return *this;
|
||||
}
|
||||
ConfigBuilder& add(std::string_view key, Optional<int>* value) {
|
||||
confMap.emplace(key, [value](value_type const& v) { *value = v.as_integer(); });
|
||||
return *this;
|
||||
}
|
||||
ConfigBuilder& add(std::string_view key, bool* value) {
|
||||
confMap.emplace(key, [value](value_type const& v) { *value = v.as_boolean(); });
|
||||
return *this;
|
||||
}
|
||||
ConfigBuilder& add(std::string_view key, Optional<bool>* value) {
|
||||
confMap.emplace(key, [value](value_type const& v) { *value = v.as_boolean(); });
|
||||
return *this;
|
||||
}
|
||||
ConfigBuilder& add(std::string_view key, std::string* value) {
|
||||
confMap.emplace(key, [value](value_type const& v) { *value = v.as_string(); });
|
||||
return *this;
|
||||
}
|
||||
ConfigBuilder& add(std::string_view key, Optional<std::string>* value) {
|
||||
confMap.emplace(key, [value](value_type const& v) { *value = v.as_string(); });
|
||||
return *this;
|
||||
}
|
||||
ConfigBuilder& add(std::string_view key, std::vector<int>* value) {
|
||||
confMap.emplace(key, [value](value_type const& v) {
|
||||
auto arr = v.as_array();
|
||||
for (const auto& i : arr) {
|
||||
value->push_back(i.as_integer());
|
||||
}
|
||||
});
|
||||
return *this;
|
||||
}
|
||||
void set(std::string const& key, value_type const& val) {
|
||||
auto iter = confMap.find(key);
|
||||
if (iter == confMap.end()) {
|
||||
std::cerr << "Unknown configuration attribute " << key << std::endl;
|
||||
TraceEvent("UnknownConfigurationAttribute").detail("Name", key);
|
||||
throw unknown_error();
|
||||
}
|
||||
iter->second(val);
|
||||
}
|
||||
};
|
||||
|
||||
bool isIniFile(const char* fileName) {
|
||||
std::string name = fileName;
|
||||
auto pos = name.find_last_of('.');
|
||||
ASSERT(pos != std::string::npos && pos + 1 < name.size());
|
||||
auto extension = name.substr(pos + 1);
|
||||
return extension == "txt"sv;
|
||||
}
|
||||
|
||||
void loadIniFile(const char* testFile) {
|
||||
std::ifstream ifs;
|
||||
ifs.open(testFile, std::ifstream::in);
|
||||
if (!ifs.good())
|
||||
return;
|
||||
|
||||
std::string cline;
|
||||
|
||||
while (ifs.good()) {
|
||||
getline(ifs, cline);
|
||||
std::string line = removeWhitespace(std::string(cline));
|
||||
if (!line.size() || line.find(';') == 0)
|
||||
continue;
|
||||
|
||||
size_t found = line.find('=');
|
||||
if (found == std::string::npos)
|
||||
// hmmm, not good
|
||||
continue;
|
||||
std::string attrib = removeWhitespace(line.substr(0, found));
|
||||
std::string value = removeWhitespace(line.substr(found + 1));
|
||||
|
||||
if (attrib == "extraDB") {
|
||||
sscanf(value.c_str(), "%d", &extraDB);
|
||||
}
|
||||
|
||||
if (attrib == "minimumReplication") {
|
||||
sscanf(value.c_str(), "%d", &minimumReplication);
|
||||
}
|
||||
|
||||
if (attrib == "minimumRegions") {
|
||||
sscanf(value.c_str(), "%d", &minimumRegions);
|
||||
}
|
||||
|
||||
if (attrib == "configureLocked") {
|
||||
sscanf(value.c_str(), "%d", &configureLocked);
|
||||
}
|
||||
|
||||
if (attrib == "startIncompatibleProcess") {
|
||||
startIncompatibleProcess = strcmp(value.c_str(), "true") == 0;
|
||||
}
|
||||
|
||||
if (attrib == "logAntiQuorum") {
|
||||
sscanf(value.c_str(), "%d", &logAntiQuorum);
|
||||
}
|
||||
|
||||
if (attrib == "storageEngineExcludeTypes") {
|
||||
std::stringstream ss(value);
|
||||
for (int i; ss >> i;) {
|
||||
storageEngineExcludeTypes.push_back(i);
|
||||
if (ss.peek() == ',') {
|
||||
ss.ignore();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (attrib == "maxTLogVersion") {
|
||||
sscanf(value.c_str(), "%d", &maxTLogVersion);
|
||||
}
|
||||
if (attrib == "restartInfoLocation") {
|
||||
isFirstTestInRestart = true;
|
||||
}
|
||||
}
|
||||
|
||||
ifs.close();
|
||||
}
|
||||
|
||||
public:
|
||||
int extraDB = 0;
|
||||
int minimumReplication = 0;
|
||||
int minimumRegions = 0;
|
||||
bool configureLocked = false;
|
||||
bool startIncompatibleProcess = false;
|
||||
int logAntiQuorum = -1;
|
||||
bool isFirstTestInRestart = false;
|
||||
// Storage Engine Types: Verify match with SimulationConfig::generateNormalConfig
|
||||
// 0 = "ssd"
|
||||
// 1 = "memory"
|
||||
// 2 = "memory-radixtree-beta"
|
||||
// 3 = "ssd-redwood-experimental"
|
||||
// Requires a comma-separated list of numbers WITHOUT whitespaces
|
||||
std::vector<int> storageEngineExcludeTypes;
|
||||
// Set the maximum TLog version that can be selected for a test
|
||||
// Refer to FDBTypes.h::TLogVersion. Defaults to the maximum supported version.
|
||||
int maxTLogVersion = TLogVersion::MAX_SUPPORTED;
|
||||
// Set true to simplify simulation configs for easier debugging
|
||||
bool simpleConfig = false;
|
||||
Optional<bool> generateFearless, buggify;
|
||||
Optional<int> datacenters, desiredTLogCount, commitProxyCount, grvProxyCount, resolverCount, storageEngineType,
|
||||
stderrSeverity, machineCount, processesPerMachine, coordinators;
|
||||
Optional<std::string> config;
|
||||
|
||||
bool tomlKeyPresent(const toml::value& data, std::string key) {
|
||||
if (data.is_table()) {
|
||||
for (const auto& [k, v] : data.as_table()) {
|
||||
if (k == key || tomlKeyPresent(v, key)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} else if (data.is_array()) {
|
||||
for (const auto& v : data.as_array()) {
|
||||
if (tomlKeyPresent(v, key)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void readFromConfig(const char* testFile) {
|
||||
if (isIniFile(testFile)) {
|
||||
loadIniFile(testFile);
|
||||
return;
|
||||
}
|
||||
ConfigBuilder builder;
|
||||
builder.add("extraDB", &extraDB)
|
||||
.add("minimumReplication", &minimumReplication)
|
||||
.add("minimumRegions", &minimumRegions)
|
||||
.add("configureLocked", &configureLocked)
|
||||
.add("startIncompatibleProcess", &startIncompatibleProcess)
|
||||
.add("logAntiQuorum", &logAntiQuorum)
|
||||
.add("storageEngineExcludeTypes", &storageEngineExcludeTypes)
|
||||
.add("maxTLogVersion", &maxTLogVersion)
|
||||
.add("simpleConfig", &simpleConfig)
|
||||
.add("generateFearless", &generateFearless)
|
||||
.add("datacenters", &datacenters)
|
||||
.add("desiredTLogCount", &desiredTLogCount)
|
||||
.add("commitProxyCount", &commitProxyCount)
|
||||
.add("grvProxyCount", &grvProxyCount)
|
||||
.add("resolverCount", &resolverCount)
|
||||
.add("storageEngineType", &storageEngineType)
|
||||
.add("config", &config)
|
||||
.add("buggify", &buggify)
|
||||
.add("StderrSeverity", &stderrSeverity)
|
||||
.add("machineCount", &machineCount)
|
||||
.add("processesPerMachine", &processesPerMachine)
|
||||
.add("coordinators", &coordinators);
|
||||
try {
|
||||
auto file = toml::parse(testFile);
|
||||
if (file.contains("configuration") && toml::find(file, "configuration").is_table()) {
|
||||
auto conf = toml::find(file, "configuration").as_table();
|
||||
for (const auto& [key, value] : conf) {
|
||||
if (key == "ClientInfoLogging") {
|
||||
setNetworkOption(FDBNetworkOptions::DISABLE_CLIENT_STATISTICS_LOGGING);
|
||||
} else if (key == "restartInfoLocation") {
|
||||
isFirstTestInRestart = true;
|
||||
} else {
|
||||
builder.set(key, value);
|
||||
}
|
||||
}
|
||||
if (stderrSeverity.present()) {
|
||||
TraceEvent("StderrSeverity").detail("NewSeverity", stderrSeverity.get());
|
||||
}
|
||||
}
|
||||
// look for restartInfoLocation to mark isFirstTestInRestart
|
||||
if (!isFirstTestInRestart) {
|
||||
isFirstTestInRestart = tomlKeyPresent(file, "restartInfoLocation");
|
||||
}
|
||||
} catch (std::exception& e) {
|
||||
std::cerr << e.what() << std::endl;
|
||||
TraceEvent("TOMLParseError").detail("Error", printable(e.what()));
|
||||
throw unknown_error();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
T simulate(const T& in) {
|
||||
BinaryWriter writer(AssumeVersion(g_network->protocolVersion()));
|
||||
|
@ -507,8 +734,8 @@ ACTOR Future<Void> simulatedMachine(ClusterConnectionString connStr,
|
|||
// Copy the file pointers to a vector because the map may be modified while we are killing files
|
||||
std::vector<AsyncFileNonDurable*> files;
|
||||
for (auto fileItr = machineCache.begin(); fileItr != machineCache.end(); ++fileItr) {
|
||||
ASSERT(fileItr->second.isReady());
|
||||
files.push_back((AsyncFileNonDurable*)fileItr->second.get().getPtr());
|
||||
ASSERT(fileItr->second.get().isReady());
|
||||
files.push_back((AsyncFileNonDurable*)fileItr->second.get().get().getPtr());
|
||||
}
|
||||
|
||||
std::vector<Future<Void>> killFutures;
|
||||
|
@ -524,7 +751,7 @@ ACTOR Future<Void> simulatedMachine(ClusterConnectionString connStr,
|
|||
for (auto it : machineCache) {
|
||||
filenames.insert(it.first);
|
||||
closingStr += it.first + ", ";
|
||||
ASSERT(it.second.isReady() && !it.second.isError());
|
||||
ASSERT(it.second.get().canGet());
|
||||
}
|
||||
|
||||
for (auto it : g_simulator.getMachineById(localities.machineId())->deletingFiles) {
|
||||
|
@ -885,31 +1112,59 @@ StringRef StringRefOf(const char* s) {
|
|||
// of different combinations
|
||||
void SimulationConfig::generateNormalConfig(const TestConfig& testConfig) {
|
||||
set_config("new");
|
||||
const bool simple = false; // Set true to simplify simulation configs for easier debugging
|
||||
// generateMachineTeamTestConfig set up the number of servers per machine and the number of machines such that
|
||||
// if we do not remove the surplus server and machine teams, the simulation test will report error.
|
||||
// This is needed to make sure the number of server (and machine) teams is no larger than the desired number.
|
||||
bool generateMachineTeamTestConfig = BUGGIFY_WITH_PROB(0.1) ? true : false;
|
||||
bool generateFearless = simple ? false : (testConfig.minimumRegions > 1 || deterministicRandom()->random01() < 0.5);
|
||||
datacenters = simple ? 1
|
||||
: (generateFearless
|
||||
? (testConfig.minimumReplication > 0 || deterministicRandom()->random01() < 0.5 ? 4 : 6)
|
||||
: deterministicRandom()->randomInt(1, 4));
|
||||
if (deterministicRandom()->random01() < 0.25)
|
||||
db.desiredTLogCount = deterministicRandom()->randomInt(1, 7);
|
||||
if (deterministicRandom()->random01() < 0.25)
|
||||
db.commitProxyCount = deterministicRandom()->randomInt(1, 7);
|
||||
if (deterministicRandom()->random01() < 0.25)
|
||||
db.grvProxyCount = deterministicRandom()->randomInt(1, 4);
|
||||
if (deterministicRandom()->random01() < 0.25)
|
||||
db.resolverCount = deterministicRandom()->randomInt(1, 7);
|
||||
int storage_engine_type = deterministicRandom()->randomInt(0, 4);
|
||||
// Continuously re-pick the storage engine type if it's the one we want to exclude
|
||||
while (std::find(testConfig.storageEngineExcludeTypes.begin(),
|
||||
testConfig.storageEngineExcludeTypes.end(),
|
||||
storage_engine_type) != testConfig.storageEngineExcludeTypes.end()) {
|
||||
storage_engine_type = deterministicRandom()->randomInt(0, 4);
|
||||
bool generateFearless =
|
||||
testConfig.simpleConfig ? false : (testConfig.minimumRegions > 1 || deterministicRandom()->random01() < 0.5);
|
||||
if (testConfig.generateFearless.present()) {
|
||||
// overwrite whatever decision we made before
|
||||
generateFearless = testConfig.generateFearless.get();
|
||||
}
|
||||
datacenters =
|
||||
testConfig.simpleConfig
|
||||
? 1
|
||||
: (generateFearless ? (testConfig.minimumReplication > 0 || deterministicRandom()->random01() < 0.5 ? 4 : 6)
|
||||
: deterministicRandom()->randomInt(1, 4));
|
||||
if (testConfig.datacenters.present()) {
|
||||
datacenters = testConfig.datacenters.get();
|
||||
}
|
||||
if (testConfig.desiredTLogCount.present()) {
|
||||
db.desiredTLogCount = testConfig.desiredTLogCount.get();
|
||||
} else if (deterministicRandom()->random01() < 0.25) {
|
||||
db.desiredTLogCount = deterministicRandom()->randomInt(1, 7);
|
||||
}
|
||||
|
||||
if (testConfig.commitProxyCount.present()) {
|
||||
db.commitProxyCount = testConfig.commitProxyCount.get();
|
||||
} else if (deterministicRandom()->random01() < 0.25) {
|
||||
db.commitProxyCount = deterministicRandom()->randomInt(1, 7);
|
||||
}
|
||||
|
||||
if (testConfig.grvProxyCount.present()) {
|
||||
db.grvProxyCount = testConfig.grvProxyCount.get();
|
||||
} else if (deterministicRandom()->random01() < 0.25) {
|
||||
db.grvProxyCount = deterministicRandom()->randomInt(1, 4);
|
||||
}
|
||||
|
||||
if (testConfig.resolverCount.present()) {
|
||||
db.resolverCount = testConfig.resolverCount.get();
|
||||
} else if (deterministicRandom()->random01() < 0.25) {
|
||||
db.resolverCount = deterministicRandom()->randomInt(1, 7);
|
||||
}
|
||||
int storage_engine_type = deterministicRandom()->randomInt(0, 4);
|
||||
if (testConfig.storageEngineType.present()) {
|
||||
storage_engine_type = testConfig.storageEngineType.get();
|
||||
} else {
|
||||
// Continuously re-pick the storage engine type if it's the one we want to exclude
|
||||
while (std::find(testConfig.storageEngineExcludeTypes.begin(),
|
||||
testConfig.storageEngineExcludeTypes.end(),
|
||||
storage_engine_type) != testConfig.storageEngineExcludeTypes.end()) {
|
||||
storage_engine_type = deterministicRandom()->randomInt(0, 4);
|
||||
}
|
||||
}
|
||||
|
||||
switch (storage_engine_type) {
|
||||
case 0: {
|
||||
TEST(true); // Simulated cluster using ssd storage engine
|
||||
|
@ -934,6 +1189,13 @@ void SimulationConfig::generateNormalConfig(const TestConfig& testConfig) {
|
|||
default:
|
||||
ASSERT(false); // Programmer forgot to adjust cases.
|
||||
}
|
||||
|
||||
int tssCount = 0;
|
||||
if (!testConfig.simpleConfig && deterministicRandom()->random01() < 0.25) {
|
||||
// 1 or 2 tss
|
||||
tssCount = deterministicRandom()->randomInt(1, 3);
|
||||
}
|
||||
|
||||
// if (deterministicRandom()->random01() < 0.5) {
|
||||
// set_config("ssd");
|
||||
// } else {
|
||||
|
@ -941,75 +1203,81 @@ void SimulationConfig::generateNormalConfig(const TestConfig& testConfig) {
|
|||
// }
|
||||
// set_config("memory");
|
||||
// set_config("memory-radixtree-beta");
|
||||
if (simple) {
|
||||
if (testConfig.simpleConfig) {
|
||||
db.desiredTLogCount = 1;
|
||||
db.commitProxyCount = 1;
|
||||
db.grvProxyCount = 1;
|
||||
db.resolverCount = 1;
|
||||
}
|
||||
int replication_type = simple ? 1
|
||||
: (std::max(testConfig.minimumReplication,
|
||||
datacenters > 4 ? deterministicRandom()->randomInt(1, 3)
|
||||
: std::min(deterministicRandom()->randomInt(0, 6), 3)));
|
||||
switch (replication_type) {
|
||||
case 0: {
|
||||
TEST(true); // Simulated cluster using custom redundancy mode
|
||||
int storage_servers = deterministicRandom()->randomInt(1, generateFearless ? 4 : 5);
|
||||
// FIXME: log replicas must be more than storage replicas because otherwise better master exists will not
|
||||
// recognize it needs to change dcs
|
||||
int replication_factor = deterministicRandom()->randomInt(storage_servers, generateFearless ? 4 : 5);
|
||||
int anti_quorum = deterministicRandom()->randomInt(
|
||||
0,
|
||||
(replication_factor / 2) + 1); // The anti quorum cannot be more than half of the replication factor, or the
|
||||
// log system will continue to accept commits when a recovery is impossible
|
||||
// Go through buildConfiguration, as it sets tLogPolicy/storagePolicy.
|
||||
set_config(format("storage_replicas:=%d log_replicas:=%d log_anti_quorum:=%d "
|
||||
"replica_datacenters:=1 min_replica_datacenters:=1",
|
||||
storage_servers,
|
||||
replication_factor,
|
||||
anti_quorum));
|
||||
break;
|
||||
}
|
||||
case 1: {
|
||||
TEST(true); // Simulated cluster running in single redundancy mode
|
||||
set_config("single");
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
TEST(true); // Simulated cluster running in double redundancy mode
|
||||
set_config("double");
|
||||
break;
|
||||
}
|
||||
case 3: {
|
||||
if (datacenters <= 2 || generateFearless) {
|
||||
TEST(true); // Simulated cluster running in triple redundancy mode
|
||||
set_config("triple");
|
||||
} else if (datacenters == 3) {
|
||||
TEST(true); // Simulated cluster running in 3 data-hall mode
|
||||
set_config("three_data_hall");
|
||||
} else {
|
||||
ASSERT(false);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ASSERT(false); // Programmer forgot to adjust cases.
|
||||
}
|
||||
|
||||
if (deterministicRandom()->random01() < 0.5) {
|
||||
int logSpill = deterministicRandom()->randomInt(TLogSpillType::VALUE, TLogSpillType::END);
|
||||
set_config(format("log_spill:=%d", logSpill));
|
||||
int logVersion = deterministicRandom()->randomInt(TLogVersion::MIN_RECRUITABLE, testConfig.maxTLogVersion + 1);
|
||||
set_config(format("log_version:=%d", logVersion));
|
||||
int replication_type = testConfig.simpleConfig
|
||||
? 1
|
||||
: (std::max(testConfig.minimumReplication,
|
||||
datacenters > 4 ? deterministicRandom()->randomInt(1, 3)
|
||||
: std::min(deterministicRandom()->randomInt(0, 6), 3)));
|
||||
if (testConfig.config.present()) {
|
||||
set_config(testConfig.config.get());
|
||||
} else {
|
||||
if (deterministicRandom()->random01() < 0.7)
|
||||
set_config(format("log_version:=%d", testConfig.maxTLogVersion));
|
||||
if (deterministicRandom()->random01() < 0.5)
|
||||
set_config(format("log_spill:=%d", TLogSpillType::DEFAULT));
|
||||
}
|
||||
switch (replication_type) {
|
||||
case 0: {
|
||||
TEST(true); // Simulated cluster using custom redundancy mode
|
||||
int storage_servers = deterministicRandom()->randomInt(1, generateFearless ? 4 : 5);
|
||||
// FIXME: log replicas must be more than storage replicas because otherwise better master exists will not
|
||||
// recognize it needs to change dcs
|
||||
int replication_factor = deterministicRandom()->randomInt(storage_servers, generateFearless ? 4 : 5);
|
||||
int anti_quorum = deterministicRandom()->randomInt(
|
||||
0,
|
||||
(replication_factor / 2) +
|
||||
1); // The anti quorum cannot be more than half of the replication factor, or the
|
||||
// log system will continue to accept commits when a recovery is impossible
|
||||
// Go through buildConfiguration, as it sets tLogPolicy/storagePolicy.
|
||||
set_config(format("storage_replicas:=%d log_replicas:=%d log_anti_quorum:=%d "
|
||||
"replica_datacenters:=1 min_replica_datacenters:=1",
|
||||
storage_servers,
|
||||
replication_factor,
|
||||
anti_quorum));
|
||||
break;
|
||||
}
|
||||
case 1: {
|
||||
TEST(true); // Simulated cluster running in single redundancy mode
|
||||
set_config("single");
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
TEST(true); // Simulated cluster running in double redundancy mode
|
||||
set_config("double");
|
||||
break;
|
||||
}
|
||||
case 3: {
|
||||
if (datacenters <= 2 || generateFearless) {
|
||||
TEST(true); // Simulated cluster running in triple redundancy mode
|
||||
set_config("triple");
|
||||
} else if (datacenters == 3) {
|
||||
TEST(true); // Simulated cluster running in 3 data-hall mode
|
||||
set_config("three_data_hall");
|
||||
} else {
|
||||
ASSERT(false);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ASSERT(false); // Programmer forgot to adjust cases.
|
||||
}
|
||||
if (deterministicRandom()->random01() < 0.5) {
|
||||
int logSpill = deterministicRandom()->randomInt(TLogSpillType::VALUE, TLogSpillType::END);
|
||||
set_config(format("log_spill:=%d", logSpill));
|
||||
int logVersion =
|
||||
deterministicRandom()->randomInt(TLogVersion::MIN_RECRUITABLE, testConfig.maxTLogVersion + 1);
|
||||
set_config(format("log_version:=%d", logVersion));
|
||||
} else {
|
||||
if (deterministicRandom()->random01() < 0.7)
|
||||
set_config(format("log_version:=%d", testConfig.maxTLogVersion));
|
||||
if (deterministicRandom()->random01() < 0.5)
|
||||
set_config(format("log_spill:=%d", TLogSpillType::DEFAULT));
|
||||
}
|
||||
|
||||
if (deterministicRandom()->random01() < 0.5) {
|
||||
set_config("backup_worker_enabled:=1");
|
||||
if (deterministicRandom()->random01() < 0.5) {
|
||||
set_config("backup_worker_enabled:=1");
|
||||
}
|
||||
}
|
||||
|
||||
if (generateFearless || (datacenters == 2 && deterministicRandom()->random01() < 0.5)) {
|
||||
|
@ -1211,7 +1479,9 @@ void SimulationConfig::generateNormalConfig(const TestConfig& testConfig) {
|
|||
}
|
||||
}
|
||||
|
||||
if (generateFearless && testConfig.minimumReplication > 1) {
|
||||
if (testConfig.machineCount.present()) {
|
||||
machine_count = testConfig.machineCount.get();
|
||||
} else if (generateFearless && testConfig.minimumReplication > 1) {
|
||||
// low latency tests in fearless configurations need 4 machines per datacenter (3 for triple replication, 1 that
|
||||
// is down during failures).
|
||||
machine_count = 16;
|
||||
|
@ -1234,11 +1504,15 @@ void SimulationConfig::generateNormalConfig(const TestConfig& testConfig) {
|
|||
}
|
||||
}
|
||||
|
||||
// because we protect a majority of coordinators from being killed, it is better to run with low numbers of
|
||||
// coordinators to prevent too many processes from being protected
|
||||
coordinators = (testConfig.minimumRegions <= 1 && BUGGIFY)
|
||||
? deterministicRandom()->randomInt(1, std::max(machine_count, 2))
|
||||
: 1;
|
||||
if (testConfig.coordinators.present()) {
|
||||
coordinators = testConfig.coordinators.get();
|
||||
} else {
|
||||
// because we protect a majority of coordinators from being killed, it is better to run with low numbers of
|
||||
// coordinators to prevent too many processes from being protected
|
||||
coordinators = (testConfig.minimumRegions <= 1 && BUGGIFY)
|
||||
? deterministicRandom()->randomInt(1, std::max(machine_count, 2))
|
||||
: 1;
|
||||
}
|
||||
|
||||
if (testConfig.minimumReplication > 1 && datacenters == 3) {
|
||||
// low latency tests in 3 data hall mode need 2 other data centers with 2 machines each to avoid waiting for
|
||||
|
@ -1247,11 +1521,35 @@ void SimulationConfig::generateNormalConfig(const TestConfig& testConfig) {
|
|||
coordinators = 3;
|
||||
}
|
||||
|
||||
if (generateFearless) {
|
||||
if (testConfig.processesPerMachine.present()) {
|
||||
processes_per_machine = testConfig.processesPerMachine.get();
|
||||
} else if (generateFearless) {
|
||||
processes_per_machine = 1;
|
||||
} else {
|
||||
processes_per_machine = deterministicRandom()->randomInt(1, (extraDB ? 14 : 28) / machine_count + 2);
|
||||
}
|
||||
|
||||
// reduce tss to half of extra non-seed servers that can be recruited in usable regions.
|
||||
tssCount =
|
||||
std::max(0, std::min(tssCount, (db.usableRegions * (machine_count / datacenters) - replication_type) / 2));
|
||||
|
||||
if (!testConfig.config.present() && tssCount > 0) {
|
||||
std::string confStr = format("tss_count:=%d tss_storage_engine:=%d", tssCount, db.storageServerStoreType);
|
||||
set_config(confStr);
|
||||
double tssRandom = deterministicRandom()->random01();
|
||||
if (tssRandom > 0.5) {
|
||||
// normal tss mode
|
||||
g_simulator.tssMode = ISimulator::TSSMode::EnabledNormal;
|
||||
} else if (tssRandom < 0.25 && !testConfig.isFirstTestInRestart) {
|
||||
// fault injection - don't enable in first test in restart because second test won't know it intentionally
|
||||
// lost data
|
||||
g_simulator.tssMode = ISimulator::TSSMode::EnabledDropMutations;
|
||||
} else {
|
||||
// delay injection
|
||||
g_simulator.tssMode = ISimulator::TSSMode::EnabledAddDelay;
|
||||
}
|
||||
printf("enabling tss for simulation in mode %d: %s\n", g_simulator.tssMode, confStr.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
// Configures the system according to the given specifications in order to run
|
||||
|
@ -1275,6 +1573,9 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors,
|
|||
startingConfigString += " locked";
|
||||
}
|
||||
for (auto kv : startingConfigJSON) {
|
||||
if ("tss_storage_engine" == kv.first) {
|
||||
continue;
|
||||
}
|
||||
startingConfigString += " ";
|
||||
if (kv.second.type() == json_spirit::int_type) {
|
||||
startingConfigString += kv.first + ":=" + format("%d", kv.second.get_int());
|
||||
|
@ -1289,6 +1590,12 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors,
|
|||
}
|
||||
}
|
||||
|
||||
// handle tss_storage_engine separately because the passthrough needs the enum ordinal, but it's serialized to json
|
||||
// as the string name
|
||||
if (simconfig.db.desiredTSSCount > 0) {
|
||||
startingConfigString += format(" tss_storage_engine:=%d", simconfig.db.testingStorageServerStoreType);
|
||||
}
|
||||
|
||||
if (g_simulator.originalRegions != "") {
|
||||
simconfig.set_config(g_simulator.originalRegions);
|
||||
g_simulator.startingDisabledConfiguration = startingConfigString + " " + g_simulator.disableRemote;
|
||||
|
@ -1363,6 +1670,7 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors,
|
|||
TEST(!useIPv6); // Use IPv4
|
||||
|
||||
vector<NetworkAddress> coordinatorAddresses;
|
||||
vector<NetworkAddress> extraCoordinatorAddresses; // Used by extra DB if the DR db is a new one
|
||||
if (testConfig.minimumRegions > 1) {
|
||||
// do not put coordinators in the primary region so that we can kill that region safely
|
||||
int nonPrimaryDcs = dataCenters / 2;
|
||||
|
@ -1372,6 +1680,9 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors,
|
|||
auto ip = makeIPAddressForSim(useIPv6, { 2, dc, 1, m });
|
||||
coordinatorAddresses.push_back(
|
||||
NetworkAddress(ip, sslEnabled && !sslOnly ? 2 : 1, true, sslEnabled && sslOnly));
|
||||
auto extraIp = makeIPAddressForSim(useIPv6, { 4, dc, 1, m });
|
||||
extraCoordinatorAddresses.push_back(
|
||||
NetworkAddress(extraIp, sslEnabled && !sslOnly ? 2 : 1, true, sslEnabled && sslOnly));
|
||||
TraceEvent("SelectedCoordinator").detail("Address", coordinatorAddresses.back());
|
||||
}
|
||||
}
|
||||
|
@ -1400,6 +1711,9 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors,
|
|||
auto ip = makeIPAddressForSim(useIPv6, { 2, dc, 1, m });
|
||||
coordinatorAddresses.push_back(
|
||||
NetworkAddress(ip, sslEnabled && !sslOnly ? 2 : 1, true, sslEnabled && sslOnly));
|
||||
auto extraIp = makeIPAddressForSim(useIPv6, { 4, dc, 1, m });
|
||||
extraCoordinatorAddresses.push_back(
|
||||
NetworkAddress(extraIp, sslEnabled && !sslOnly ? 2 : 1, true, sslEnabled && sslOnly));
|
||||
TraceEvent("SelectedCoordinator")
|
||||
.detail("Address", coordinatorAddresses.back())
|
||||
.detail("M", m)
|
||||
|
@ -1436,11 +1750,13 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors,
|
|||
// If extraDB==0, leave g_simulator.extraDB as null because the test does not use DR.
|
||||
if (testConfig.extraDB == 1) {
|
||||
// The DR database can be either a new database or itself
|
||||
g_simulator.extraDB = new ClusterConnectionString(
|
||||
coordinatorAddresses, BUGGIFY ? LiteralStringRef("TestCluster:0") : LiteralStringRef("ExtraCluster:0"));
|
||||
g_simulator.extraDB =
|
||||
BUGGIFY ? new ClusterConnectionString(coordinatorAddresses, LiteralStringRef("TestCluster:0"))
|
||||
: new ClusterConnectionString(extraCoordinatorAddresses, LiteralStringRef("ExtraCluster:0"));
|
||||
} else if (testConfig.extraDB == 2) {
|
||||
// The DR database is a new database
|
||||
g_simulator.extraDB = new ClusterConnectionString(coordinatorAddresses, LiteralStringRef("ExtraCluster:0"));
|
||||
g_simulator.extraDB =
|
||||
new ClusterConnectionString(extraCoordinatorAddresses, LiteralStringRef("ExtraCluster:0"));
|
||||
} else if (testConfig.extraDB == 3) {
|
||||
// The DR database is the same database
|
||||
g_simulator.extraDB = new ClusterConnectionString(coordinatorAddresses, LiteralStringRef("TestCluster:0"));
|
||||
|
@ -1626,68 +1942,10 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors,
|
|||
.detail("StartingConfiguration", pStartingConfiguration->toString());
|
||||
}
|
||||
|
||||
using namespace std::literals;
|
||||
|
||||
// Populates the TestConfig fields according to what is found in the test file.
|
||||
void checkTestConf(const char* testFile, TestConfig* testConfig) {
|
||||
std::ifstream ifs;
|
||||
ifs.open(testFile, std::ifstream::in);
|
||||
if (!ifs.good())
|
||||
return;
|
||||
|
||||
std::string cline;
|
||||
|
||||
while (ifs.good()) {
|
||||
getline(ifs, cline);
|
||||
std::string line = removeWhitespace(std::string(cline));
|
||||
if (!line.size() || line.find(';') == 0)
|
||||
continue;
|
||||
|
||||
size_t found = line.find('=');
|
||||
if (found == std::string::npos)
|
||||
// hmmm, not good
|
||||
continue;
|
||||
std::string attrib = removeWhitespace(line.substr(0, found));
|
||||
std::string value = removeWhitespace(line.substr(found + 1));
|
||||
|
||||
if (attrib == "extraDB") {
|
||||
sscanf(value.c_str(), "%d", &testConfig->extraDB);
|
||||
}
|
||||
|
||||
if (attrib == "minimumReplication") {
|
||||
sscanf(value.c_str(), "%d", &testConfig->minimumReplication);
|
||||
}
|
||||
|
||||
if (attrib == "minimumRegions") {
|
||||
sscanf(value.c_str(), "%d", &testConfig->minimumRegions);
|
||||
}
|
||||
|
||||
if (attrib == "configureLocked") {
|
||||
sscanf(value.c_str(), "%d", &testConfig->configureLocked);
|
||||
}
|
||||
|
||||
if (attrib == "startIncompatibleProcess") {
|
||||
testConfig->startIncompatibleProcess = strcmp(value.c_str(), "true") == 0;
|
||||
}
|
||||
|
||||
if (attrib == "logAntiQuorum") {
|
||||
sscanf(value.c_str(), "%d", &testConfig->logAntiQuorum);
|
||||
}
|
||||
|
||||
if (attrib == "storageEngineExcludeTypes") {
|
||||
std::stringstream ss(value);
|
||||
for (int i; ss >> i;) {
|
||||
testConfig->storageEngineExcludeTypes.push_back(i);
|
||||
if (ss.peek() == ',') {
|
||||
ss.ignore();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (attrib == "maxTLogVersion") {
|
||||
sscanf(value.c_str(), "%d", &testConfig->maxTLogVersion);
|
||||
}
|
||||
}
|
||||
|
||||
ifs.close();
|
||||
}
|
||||
void checkTestConf(const char* testFile, TestConfig* testConfig) {}
|
||||
|
||||
ACTOR void setupAndRun(std::string dataFolder,
|
||||
const char* testFile,
|
||||
|
@ -1699,7 +1957,7 @@ ACTOR void setupAndRun(std::string dataFolder,
|
|||
state Standalone<StringRef> startingConfiguration;
|
||||
state int testerCount = 1;
|
||||
state TestConfig testConfig;
|
||||
checkTestConf(testFile, &testConfig);
|
||||
testConfig.readFromConfig(testFile);
|
||||
g_simulator.hasDiffProtocolProcess = testConfig.startIncompatibleProcess;
|
||||
g_simulator.setDiffProtocol = false;
|
||||
|
||||
|
|
|
@ -387,6 +387,19 @@ JsonBuilderObject getLagObject(int64_t versions) {
|
|||
return lag;
|
||||
}
|
||||
|
||||
static JsonBuilderObject getBounceImpactInfo(int recoveryStatusCode) {
|
||||
JsonBuilderObject bounceImpact;
|
||||
|
||||
if (recoveryStatusCode == RecoveryStatus::fully_recovered) {
|
||||
bounceImpact["can_clean_bounce"] = true;
|
||||
} else {
|
||||
bounceImpact["can_clean_bounce"] = false;
|
||||
bounceImpact["reason"] = "cluster hasn't fully recovered yet";
|
||||
}
|
||||
|
||||
return bounceImpact;
|
||||
}
|
||||
|
||||
struct MachineMemoryInfo {
|
||||
double memoryUsage;
|
||||
double aggregateLimit;
|
||||
|
@ -478,6 +491,8 @@ struct RolesInfo {
|
|||
obj["mutation_bytes"] = StatusCounter(storageMetrics.getValue("MutationBytes")).getStatus();
|
||||
obj["mutations"] = StatusCounter(storageMetrics.getValue("Mutations")).getStatus();
|
||||
obj.setKeyRawNumber("local_rate", storageMetrics.getValue("LocalRate"));
|
||||
obj["fetched_versions"] = StatusCounter(storageMetrics.getValue("FetchedVersions")).getStatus();
|
||||
obj["fetches_from_logs"] = StatusCounter(storageMetrics.getValue("FetchesFromLogs")).getStatus();
|
||||
|
||||
Version version = storageMetrics.getInt64("Version");
|
||||
Version durableVersion = storageMetrics.getInt64("DurableVersion");
|
||||
|
@ -615,7 +630,7 @@ struct RolesInfo {
|
|||
TraceEventFields const& commitLatencyBands = metrics.at("CommitLatencyBands");
|
||||
if (commitLatencyBands.size()) {
|
||||
obj["commit_latency_bands"] = addLatencyBandInfo(commitLatencyBands);
|
||||
}
|
||||
}
|
||||
|
||||
TraceEventFields const& commitBatchingWindowSize = metrics.at("CommitBatchingWindowSize");
|
||||
if (commitBatchingWindowSize.size()) {
|
||||
|
@ -1169,6 +1184,7 @@ ACTOR static Future<JsonBuilderObject> recoveryStateStatusFetcher(Database cx,
|
|||
} else if (mStatusCode == RecoveryStatus::locking_old_transaction_servers) {
|
||||
message["missing_logs"] = md.getValue("MissingIDs").c_str();
|
||||
}
|
||||
|
||||
// TODO: time_in_recovery: 0.5
|
||||
// time_in_state: 0.1
|
||||
|
||||
|
@ -1853,10 +1869,10 @@ ACTOR static Future<vector<std::pair<TLogInterface, EventMap>>> getTLogsAndMetri
|
|||
ACTOR static Future<vector<std::pair<CommitProxyInterface, EventMap>>> getCommitProxiesAndMetrics(
|
||||
Reference<AsyncVar<ServerDBInfo>> db,
|
||||
std::unordered_map<NetworkAddress, WorkerInterface> address_workers) {
|
||||
vector<std::pair<CommitProxyInterface, EventMap>> results =
|
||||
wait(getServerMetrics(db->get().client.commitProxies,
|
||||
address_workers,
|
||||
std::vector<std::string>{ "CommitLatencyMetrics", "CommitLatencyBands", "CommitBatchingWindowSize"}));
|
||||
vector<std::pair<CommitProxyInterface, EventMap>> results = wait(getServerMetrics(
|
||||
db->get().client.commitProxies,
|
||||
address_workers,
|
||||
std::vector<std::string>{ "CommitLatencyMetrics", "CommitLatencyBands", "CommitBatchingWindowSize" }));
|
||||
|
||||
return results;
|
||||
}
|
||||
|
@ -1864,10 +1880,10 @@ ACTOR static Future<vector<std::pair<CommitProxyInterface, EventMap>>> getCommit
|
|||
ACTOR static Future<vector<std::pair<GrvProxyInterface, EventMap>>> getGrvProxiesAndMetrics(
|
||||
Reference<AsyncVar<ServerDBInfo>> db,
|
||||
std::unordered_map<NetworkAddress, WorkerInterface> address_workers) {
|
||||
vector<std::pair<GrvProxyInterface, EventMap>> results =
|
||||
wait(getServerMetrics(db->get().client.grvProxies,
|
||||
address_workers,
|
||||
std::vector<std::string>{ "GRVLatencyMetrics", "GRVLatencyBands", "GRVBatchLatencyMetrics" }));
|
||||
vector<std::pair<GrvProxyInterface, EventMap>> results = wait(
|
||||
getServerMetrics(db->get().client.grvProxies,
|
||||
address_workers,
|
||||
std::vector<std::string>{ "GRVLatencyMetrics", "GRVLatencyBands", "GRVBatchLatencyMetrics" }));
|
||||
return results;
|
||||
}
|
||||
|
||||
|
@ -2775,6 +2791,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
|
|||
|
||||
statusObj["protocol_version"] = format("%" PRIx64, g_network->protocolVersion().version());
|
||||
statusObj["connection_string"] = coordinators.ccf->getConnectionString().toString();
|
||||
statusObj["bounce_impact"] = getBounceImpactInfo(statusCode);
|
||||
|
||||
state Optional<DatabaseConfiguration> configuration;
|
||||
state Optional<LoadConfigurationResult> loadResult;
|
||||
|
@ -2988,6 +3005,14 @@ ACTOR Future<StatusReply> clusterGetStatus(
|
|||
statusObj["incompatible_connections"] = incompatibleConnectionsArray;
|
||||
statusObj["datacenter_lag"] = getLagObject(datacenterVersionDifference);
|
||||
|
||||
int activeTSSCount = 0;
|
||||
for (auto& it : storageServers) {
|
||||
if (it.first.isTss()) {
|
||||
activeTSSCount++;
|
||||
}
|
||||
}
|
||||
statusObj["active_tss_count"] = activeTSSCount;
|
||||
|
||||
int totalDegraded = 0;
|
||||
for (auto& it : workers) {
|
||||
if (it.degraded) {
|
||||
|
|
|
@ -1965,10 +1965,10 @@ ACTOR Future<Void> doQueueCommit(TLogData* self,
|
|||
|
||||
ACTOR Future<Void> commitQueue(TLogData* self) {
|
||||
state Reference<LogData> logData;
|
||||
state std::vector<Reference<LogData>> missingFinalCommit;
|
||||
|
||||
loop {
|
||||
int foundCount = 0;
|
||||
state std::vector<Reference<LogData>> missingFinalCommit;
|
||||
for (auto it : self->id_data) {
|
||||
if (!it.second->stopped) {
|
||||
logData = it.second;
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
/*
|
||||
* TSSMappingUtil.actor.cpp
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "fdbclient/SystemData.h"
|
||||
#include "fdbclient/KeyBackedTypes.h"
|
||||
#include "fdbserver/TSSMappingUtil.actor.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
ACTOR Future<Void> readTSSMappingRYW(Reference<ReadYourWritesTransaction> tr, std::map<UID, StorageServerInterface>* tssMapping) {
|
||||
KeyBackedMap<UID, UID> tssMapDB = KeyBackedMap<UID, UID>(tssMappingKeys.begin);
|
||||
state std::vector<std::pair<UID, UID>> uidMapping = wait(tssMapDB.getRange(tr, UID(), Optional<UID>(), CLIENT_KNOBS->TOO_MANY));
|
||||
ASSERT(uidMapping.size() < CLIENT_KNOBS->TOO_MANY);
|
||||
|
||||
state std::map<UID, StorageServerInterface> mapping;
|
||||
for (auto& it : uidMapping) {
|
||||
state UID ssId = it.first;
|
||||
Optional<Value> v = wait(tr->get(serverListKeyFor(it.second)));
|
||||
(*tssMapping)[ssId] = decodeServerListValue(v.get());
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> readTSSMapping(Transaction* tr, std::map<UID, StorageServerInterface>* tssMapping) {
|
||||
state RangeResult mappingList = wait(tr->getRange(tssMappingKeys, CLIENT_KNOBS->TOO_MANY));
|
||||
ASSERT(!mappingList.more && mappingList.size() < CLIENT_KNOBS->TOO_MANY);
|
||||
|
||||
for (auto& it : mappingList) {
|
||||
state UID ssId = Codec<UID>::unpack(Tuple::unpack(it.key.removePrefix(tssMappingKeys.begin)));
|
||||
UID tssId = Codec<UID>::unpack(Tuple::unpack(it.value));
|
||||
Optional<Value> v = wait(tr->get(serverListKeyFor(tssId)));
|
||||
(*tssMapping)[ssId] = decodeServerListValue(v.get());
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> removeTSSPairsFromCluster(Database cx, vector<std::pair<UID, UID>> pairsToRemove) {
|
||||
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(cx);
|
||||
state KeyBackedMap<UID, UID> tssMapDB = KeyBackedMap<UID, UID>(tssMappingKeys.begin);
|
||||
loop {
|
||||
try {
|
||||
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
for (auto& tssPair : pairsToRemove) {
|
||||
// DO NOT remove server list key - that'll break a bunch of stuff. DD will eventually call removeStorageServer
|
||||
tr->clear(serverTagKeyFor(tssPair.second));
|
||||
tssMapDB.erase(tr, tssPair.first);
|
||||
}
|
||||
wait(tr->commit());
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
}
|
||||
return Void();
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* TSSMappingUtil.actor.h
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
// When actually compiled (NO_INTELLISENSE), include the generated version of this file. In intellisense use the source
|
||||
// version.
|
||||
#if defined(NO_INTELLISENSE) && !defined(TSS_MAPPING_UTIL_SERVER_G_H)
|
||||
#define TSS_MAPPING_UTIL_SERVER_G_H
|
||||
#include "fdbserver/TSSMappingUtil.actor.g.h"
|
||||
#elif !defined(TSS_MAPPING_UTIL_SERVER_H)
|
||||
#define TSS_MAPPING_UTIL_SERVER_H
|
||||
|
||||
#include "fdbclient/StorageServerInterface.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
/*
|
||||
* Collection of utility functions for dealing with the TSS mapping
|
||||
*/
|
||||
|
||||
// Reads the current cluster TSS mapping as part of the RYW transaction
|
||||
ACTOR Future<Void> readTSSMappingRYW(Reference<ReadYourWritesTransaction> tr, std::map<UID, StorageServerInterface>* tssMapping);
|
||||
|
||||
// Reads the current cluster TSS mapping as part of the given Transaction
|
||||
ACTOR Future<Void> readTSSMapping(Transaction* tr, std::map<UID, StorageServerInterface>* tssMapping);
|
||||
|
||||
// Removes the TSS pairs from the cluster
|
||||
ACTOR Future<Void> removeTSSPairsFromCluster(Database cx, vector<std::pair<UID, UID>> pairsToRemove);
|
||||
|
||||
#include "flow/unactorcompiler.h"
|
||||
#endif
|
|
@ -100,27 +100,6 @@ struct WorkloadRequest {
|
|||
}
|
||||
};
|
||||
|
||||
// Configuration details specified in workload test files that change the simulation
|
||||
// environment details
|
||||
struct TestConfig {
|
||||
int extraDB = 0;
|
||||
int minimumReplication = 0;
|
||||
int minimumRegions = 0;
|
||||
int configureLocked = 0;
|
||||
bool startIncompatibleProcess = false;
|
||||
int logAntiQuorum = -1;
|
||||
// Storage Engine Types: Verify match with SimulationConfig::generateNormalConfig
|
||||
// 0 = "ssd"
|
||||
// 1 = "memory"
|
||||
// 2 = "memory-radixtree-beta"
|
||||
// 3 = "ssd-redwood-experimental"
|
||||
// Requires a comma-separated list of numbers WITHOUT whitespaces
|
||||
std::vector<int> storageEngineExcludeTypes;
|
||||
// Set the maximum TLog version that can be selected for a test
|
||||
// Refer to FDBTypes.h::TLogVersion. Defaults to the maximum supported version.
|
||||
int maxTLogVersion = TLogVersion::MAX_SUPPORTED;
|
||||
};
|
||||
|
||||
struct TesterInterface {
|
||||
constexpr static FileIdentifier file_identifier = 4465210;
|
||||
RequestStream<WorkloadRequest> recruitments;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -614,11 +614,13 @@ struct InitializeStorageRequest {
|
|||
UID reqId;
|
||||
UID interfaceId;
|
||||
KeyValueStoreType storeType;
|
||||
Optional<std::pair<UID, Version>>
|
||||
tssPairIDAndVersion; // Only set if recruiting a tss. Will be the UID and Version of its SS pair.
|
||||
ReplyPromise<InitializeStorageReply> reply;
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, seedTag, reqId, interfaceId, storeType, reply);
|
||||
serializer(ar, seedTag, reqId, interfaceId, storeType, reply, tssPairIDAndVersion);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -770,6 +772,7 @@ struct DiskStoreRequest {
|
|||
struct Role {
|
||||
static const Role WORKER;
|
||||
static const Role STORAGE_SERVER;
|
||||
static const Role TESTING_STORAGE_SERVER;
|
||||
static const Role TRANSACTION_LOG;
|
||||
static const Role SHARED_TRANSACTION_LOG;
|
||||
static const Role COMMIT_PROXY;
|
||||
|
@ -840,6 +843,7 @@ class IDiskQueue;
|
|||
ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
|
||||
StorageServerInterface ssi,
|
||||
Tag seedTag,
|
||||
Version tssSeedVersion,
|
||||
ReplyPromise<InitializeStorageReply> recruitReply,
|
||||
Reference<AsyncVar<ServerDBInfo>> db,
|
||||
std::string folder);
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
#include "fdbclient/DatabaseContext.h"
|
||||
#include "fdbclient/KeyRangeMap.h"
|
||||
#include "fdbclient/CommitProxyInterface.h"
|
||||
#include "fdbclient/KeyBackedTypes.h"
|
||||
#include "fdbclient/NativeAPI.actor.h"
|
||||
#include "fdbclient/Notified.h"
|
||||
#include "fdbclient/StatusClient.h"
|
||||
|
@ -463,7 +464,7 @@ public:
|
|||
void byteSampleApplyClear(KeyRangeRef range, Version ver);
|
||||
|
||||
void popVersion(Version v, bool popAllTags = false) {
|
||||
if (logSystem) {
|
||||
if (logSystem && !isTss()) {
|
||||
if (v > poppedAllAfter) {
|
||||
popAllTags = true;
|
||||
poppedAllAfter = std::numeric_limits<Version>::max();
|
||||
|
@ -510,6 +511,21 @@ public:
|
|||
return mLV.push_back_deep(mLV.arena(), m);
|
||||
}
|
||||
|
||||
void setTssPair(UID pairId) {
|
||||
tssPairID = Optional<UID>(pairId);
|
||||
|
||||
// Set up tss fault injection here, only if we are in simulated mode and with fault injection.
|
||||
// With fault injection enabled, the tss will start acting normal for a bit, then after the specified delay
|
||||
// start behaving incorrectly.
|
||||
if (g_network->isSimulated() && !g_simulator.speedUpSimulation &&
|
||||
g_simulator.tssMode >= ISimulator::TSSMode::EnabledAddDelay) {
|
||||
tssFaultInjectTime = now() + deterministicRandom()->randomInt(60, 300);
|
||||
TraceEvent(SevWarnAlways, "TSSInjectFaultEnabled", thisServerID)
|
||||
.detail("Mode", g_simulator.tssMode)
|
||||
.detail("At", tssFaultInjectTime.get());
|
||||
}
|
||||
}
|
||||
|
||||
StorageServerDisk storage;
|
||||
|
||||
KeyRangeMap<Reference<ShardInfo>> shards;
|
||||
|
@ -544,12 +560,17 @@ public:
|
|||
int64_t versionLag; // An estimate for how many versions it takes for the data to move from the logs to this storage
|
||||
// server
|
||||
|
||||
Optional<UID> sourceTLogID; // the tLog from which the latest batch of versions were fetched
|
||||
|
||||
ProtocolVersion logProtocol;
|
||||
|
||||
Reference<ILogSystem> logSystem;
|
||||
Reference<ILogSystem::IPeekCursor> logCursor;
|
||||
|
||||
UID thisServerID;
|
||||
Optional<UID> tssPairID; // if this server is a tss, this is the id of its (ss) pair
|
||||
Optional<UID> ssPairID; // if this server is an ss, this is the id of its (tss) pair
|
||||
Optional<double> tssFaultInjectTime;
|
||||
Key sk;
|
||||
Reference<AsyncVar<ServerDBInfo>> db;
|
||||
Database cx;
|
||||
|
@ -677,6 +698,8 @@ public:
|
|||
Counter loops;
|
||||
Counter fetchWaitingMS, fetchWaitingCount, fetchExecutingMS, fetchExecutingCount;
|
||||
Counter readsRejected;
|
||||
Counter fetchedVersions;
|
||||
Counter fetchesFromLogs;
|
||||
|
||||
LatencySample readLatencySample;
|
||||
LatencyBands readLatencyBands;
|
||||
|
@ -694,10 +717,11 @@ public:
|
|||
updateBatches("UpdateBatches", cc), updateVersions("UpdateVersions", cc), loops("Loops", cc),
|
||||
fetchWaitingMS("FetchWaitingMS", cc), fetchWaitingCount("FetchWaitingCount", cc),
|
||||
fetchExecutingMS("FetchExecutingMS", cc), fetchExecutingCount("FetchExecutingCount", cc),
|
||||
readsRejected("ReadsRejected", cc), readLatencySample("ReadLatencyMetrics",
|
||||
self->thisServerID,
|
||||
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
|
||||
SERVER_KNOBS->LATENCY_SAMPLE_SIZE),
|
||||
readsRejected("ReadsRejected", cc), fetchedVersions("FetchedVersions", cc),
|
||||
fetchesFromLogs("FetchesFromLogs", cc), readLatencySample("ReadLatencyMetrics",
|
||||
self->thisServerID,
|
||||
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
|
||||
SERVER_KNOBS->LATENCY_SAMPLE_SIZE),
|
||||
readLatencyBands("ReadLatencyBands", self->thisServerID, SERVER_KNOBS->STORAGE_LOGGING_DELAY) {
|
||||
specialCounter(cc, "LastTLogVersion", [self]() { return self->lastTLogVersion; });
|
||||
specialCounter(cc, "Version", [self]() { return self->version.get(); });
|
||||
|
@ -780,6 +804,14 @@ public:
|
|||
mutableData().forgetVersionsBefore(ver);
|
||||
}
|
||||
|
||||
bool isTss() const { return tssPairID.present(); }
|
||||
|
||||
bool isSSWithTSSPair() const { return ssPairID.present(); }
|
||||
|
||||
void setSSWithTssPair(UID idOfTSS) { ssPairID = Optional<UID>(idOfTSS); }
|
||||
|
||||
void clearSSWithTssPair() { ssPairID = Optional<UID>(); }
|
||||
|
||||
// This is the maximum version that might be read from storage (the minimum version is durableVersion)
|
||||
Version storageVersion() const { return oldestVersion.get(); }
|
||||
|
||||
|
@ -1155,13 +1187,12 @@ ACTOR Future<Void> getValueQ(StorageServer* data, GetValueRequest req) {
|
|||
DEBUG_MUTATION("ShardGetValue",
|
||||
version,
|
||||
MutationRef(MutationRef::DebugKey, req.key, v.present() ? v.get() : LiteralStringRef("<null>")));
|
||||
DEBUG_MUTATION("ShardGetPath",
|
||||
version,
|
||||
MutationRef(MutationRef::DebugKey,
|
||||
req.key,
|
||||
path == 0 ? LiteralStringRef("0")
|
||||
: path == 1 ? LiteralStringRef("1")
|
||||
: LiteralStringRef("2")));
|
||||
DEBUG_MUTATION(
|
||||
"ShardGetPath",
|
||||
version,
|
||||
MutationRef(MutationRef::DebugKey,
|
||||
req.key,
|
||||
path == 0 ? LiteralStringRef("0") : path == 1 ? LiteralStringRef("1") : LiteralStringRef("2")));
|
||||
|
||||
/*
|
||||
StorageMetrics m;
|
||||
|
@ -1718,7 +1749,9 @@ ACTOR Future<Key> findKey(StorageServer* data,
|
|||
if (sel.offset <= 1 && sel.offset >= 0)
|
||||
maxBytes = std::numeric_limits<int>::max();
|
||||
else
|
||||
maxBytes = BUGGIFY ? SERVER_KNOBS->BUGGIFY_LIMIT_BYTES : SERVER_KNOBS->STORAGE_LIMIT_BYTES;
|
||||
maxBytes = (g_network->isSimulated() && g_simulator.tssMode == ISimulator::TSSMode::Disabled && BUGGIFY)
|
||||
? SERVER_KNOBS->BUGGIFY_LIMIT_BYTES
|
||||
: SERVER_KNOBS->STORAGE_LIMIT_BYTES;
|
||||
|
||||
state GetKeyValuesReply rep = wait(
|
||||
readRange(data,
|
||||
|
@ -1775,10 +1808,10 @@ ACTOR Future<Key> findKey(StorageServer* data,
|
|||
// This is possible if key/value pairs are very large and only one result is returned on a last less than
|
||||
// query SOMEDAY: graceful handling of exceptionally sized values
|
||||
ASSERT(returnKey != sel.getKey());
|
||||
|
||||
return returnKey;
|
||||
} else
|
||||
} else {
|
||||
return forward ? range.end : range.begin;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1849,6 +1882,7 @@ ACTOR Future<Void> getKeyValuesQ(StorageServer* data, GetKeyValuesRequest req)
|
|||
: findKey(data, req.end, version, shard, &offset2, span.context);
|
||||
state Key begin = wait(fBegin);
|
||||
state Key end = wait(fEnd);
|
||||
|
||||
if (req.debugID.present())
|
||||
g_traceBatch.addEvent(
|
||||
"TransactionDebug", req.debugID.get().first(), "storageserver.getKeyValues.AfterKeys");
|
||||
|
@ -1973,6 +2007,7 @@ ACTOR Future<Void> getKeyQ(StorageServer* data, GetKeyRequest req) {
|
|||
|
||||
try {
|
||||
state Version version = wait(waitForVersion(data, req.version, req.spanContext));
|
||||
|
||||
state uint64_t changeCounter = data->shardChangeCounter;
|
||||
state KeyRange shard = getShardKeyRange(data, req.sel);
|
||||
|
||||
|
@ -2927,32 +2962,30 @@ void changeServerKeys(StorageServer* data,
|
|||
ChangeServerKeysContext context) {
|
||||
ASSERT(!keys.empty());
|
||||
|
||||
//TraceEvent("ChangeServerKeys", data->thisServerID)
|
||||
// .detail("KeyBegin", keys.begin)
|
||||
// .detail("KeyEnd", keys.end)
|
||||
// .detail("NowAssigned", nowAssigned)
|
||||
// .detail("Version", version)
|
||||
// .detail("Context", changeServerKeysContextName[(int)context]);
|
||||
// TraceEvent("ChangeServerKeys", data->thisServerID)
|
||||
// .detail("KeyBegin", keys.begin)
|
||||
// .detail("KeyEnd", keys.end)
|
||||
// .detail("NowAssigned", nowAssigned)
|
||||
// .detail("Version", version)
|
||||
// .detail("Context", changeServerKeysContextName[(int)context]);
|
||||
validate(data);
|
||||
|
||||
// TODO(alexmiller): Figure out how to selectively enable spammy data distribution events.
|
||||
// DEBUG_KEY_RANGE( nowAssigned ? "KeysAssigned" : "KeysUnassigned", version, keys );
|
||||
DEBUG_KEY_RANGE(nowAssigned ? "KeysAssigned" : "KeysUnassigned", version, keys);
|
||||
|
||||
bool isDifferent = false;
|
||||
auto existingShards = data->shards.intersectingRanges(keys);
|
||||
for (auto it = existingShards.begin(); it != existingShards.end(); ++it) {
|
||||
if (nowAssigned != it->value()->assigned()) {
|
||||
isDifferent = true;
|
||||
/*TraceEvent("CSKRangeDifferent", data->thisServerID)
|
||||
.detail("KeyBegin", it->range().begin)
|
||||
.detail("KeyEnd", it->range().end);*/
|
||||
TraceEvent("CSKRangeDifferent", data->thisServerID)
|
||||
.detail("KeyBegin", it->range().begin)
|
||||
.detail("KeyEnd", it->range().end);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!isDifferent) {
|
||||
//TraceEvent("CSKShortCircuit", data->thisServerID)
|
||||
// .detail("KeyBegin", keys.begin)
|
||||
// .detail("KeyEnd", keys.end);
|
||||
// TraceEvent("CSKShortCircuit", data->thisServerID).detail("KeyBegin", keys.begin).detail("KeyEnd", keys.end);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -2990,13 +3023,13 @@ void changeServerKeys(StorageServer* data,
|
|||
for (auto r = vr.begin(); r != vr.end(); ++r) {
|
||||
KeyRangeRef range = keys & r->range();
|
||||
bool dataAvailable = r->value() == latestVersion || r->value() >= version;
|
||||
/*TraceEvent("CSKRange", data->thisServerID)
|
||||
.detail("KeyBegin", range.begin)
|
||||
.detail("KeyEnd", range.end)
|
||||
.detail("Available", dataAvailable)
|
||||
.detail("NowAssigned", nowAssigned)
|
||||
.detail("NewestAvailable", r->value())
|
||||
.detail("ShardState0", data->shards[range.begin]->debugDescribeState());*/
|
||||
// TraceEvent("CSKRange", data->thisServerID)
|
||||
// .detail("KeyBegin", range.begin)
|
||||
// .detail("KeyEnd", range.end)
|
||||
// .detail("Available", dataAvailable)
|
||||
// .detail("NowAssigned", nowAssigned)
|
||||
// .detail("NewestAvailable", r->value())
|
||||
// .detail("ShardState0", data->shards[range.begin]->debugDescribeState());
|
||||
if (!nowAssigned) {
|
||||
if (dataAvailable) {
|
||||
ASSERT(r->value() ==
|
||||
|
@ -3098,6 +3131,7 @@ static const KeyValueRef persistFormat(LiteralStringRef(PERSIST_PREFIX "Format")
|
|||
static const KeyRangeRef persistFormatReadableRange(LiteralStringRef("FoundationDB/StorageServer/1/2"),
|
||||
LiteralStringRef("FoundationDB/StorageServer/1/5"));
|
||||
static const KeyRef persistID = LiteralStringRef(PERSIST_PREFIX "ID");
|
||||
static const KeyRef persistTssPairID = LiteralStringRef(PERSIST_PREFIX "tssPairID");
|
||||
|
||||
// (Potentially) change with the durable version or when fetchKeys completes
|
||||
static const KeyRef persistVersion = LiteralStringRef(PERSIST_PREFIX "Version");
|
||||
|
@ -3213,10 +3247,17 @@ private:
|
|||
throw worker_removed();
|
||||
} else if ((m.type == MutationRef::SetValue || m.type == MutationRef::ClearRange) &&
|
||||
m.param1.substr(1).startsWith(serverTagPrefix)) {
|
||||
bool matchesThisServer = decodeServerTagKey(m.param1.substr(1)) == data->thisServerID;
|
||||
if ((m.type == MutationRef::SetValue && !matchesThisServer) ||
|
||||
(m.type == MutationRef::ClearRange && matchesThisServer))
|
||||
UID serverTagKey = decodeServerTagKey(m.param1.substr(1));
|
||||
bool matchesThisServer = serverTagKey == data->thisServerID;
|
||||
bool matchesTssPair = data->isTss() ? serverTagKey == data->tssPairID.get() : false;
|
||||
if ((m.type == MutationRef::SetValue && !data->isTss() && !matchesThisServer) ||
|
||||
(m.type == MutationRef::ClearRange && (matchesThisServer || (data->isTss() && matchesTssPair)))) {
|
||||
throw worker_removed();
|
||||
}
|
||||
if (!data->isTss() && m.type == MutationRef::ClearRange && data->ssPairID.present() &&
|
||||
serverTagKey == data->ssPairID.get()) {
|
||||
data->clearSSWithTssPair();
|
||||
}
|
||||
} else if (m.type == MutationRef::SetValue && m.param1 == rebootWhenDurablePrivateKey) {
|
||||
data->rebootAfterDurableVersion = currentVersion;
|
||||
TraceEvent("RebootWhenDurableSet", data->thisServerID)
|
||||
|
@ -3226,6 +3267,13 @@ private:
|
|||
data->primaryLocality = BinaryReader::fromStringRef<int8_t>(m.param2, Unversioned());
|
||||
auto& mLV = data->addVersionToMutationLog(data->data().getLatestVersion());
|
||||
data->addMutationToMutationLog(mLV, MutationRef(MutationRef::SetValue, persistPrimaryLocality, m.param2));
|
||||
} else if (m.type == MutationRef::SetValue && m.param1.substr(1).startsWith(tssMappingKeys.begin)) {
|
||||
if (!data->isTss()) {
|
||||
UID ssId = Codec<UID>::unpack(Tuple::unpack(m.param1.substr(1).removePrefix(tssMappingKeys.begin)));
|
||||
UID tssId = Codec<UID>::unpack(Tuple::unpack(m.param2));
|
||||
ASSERT(ssId == data->thisServerID);
|
||||
data->setSSWithTssPair(tssId);
|
||||
}
|
||||
} else {
|
||||
ASSERT(false); // Unknown private mutation
|
||||
}
|
||||
|
@ -3283,6 +3331,21 @@ ACTOR Future<Void> update(StorageServer* data, bool* pReceivedUpdate) {
|
|||
wait(delayJittered(.005, TaskPriority::TLogPeekReply));
|
||||
}
|
||||
|
||||
if (g_network->isSimulated() && data->isTss() && g_simulator.tssMode == ISimulator::TSSMode::EnabledAddDelay &&
|
||||
data->tssFaultInjectTime.present() && data->tssFaultInjectTime.get() < now()) {
|
||||
if (deterministicRandom()->random01() < 0.01) {
|
||||
TraceEvent(SevWarnAlways, "TSSInjectDelayForever", data->thisServerID);
|
||||
// small random chance to just completely get stuck here, each tss should eventually hit this in this
|
||||
// mode
|
||||
wait(Never());
|
||||
} else {
|
||||
// otherwise pause for part of a second
|
||||
double delayTime = deterministicRandom()->random01();
|
||||
TraceEvent(SevWarnAlways, "TSSInjectDelay", data->thisServerID).detail("Delay", delayTime);
|
||||
wait(delay(delayTime));
|
||||
}
|
||||
}
|
||||
|
||||
while (data->byteSampleClearsTooLarge.get()) {
|
||||
wait(data->byteSampleClearsTooLarge.onChange());
|
||||
}
|
||||
|
@ -3295,8 +3358,9 @@ ACTOR Future<Void> update(StorageServer* data, bool* pReceivedUpdate) {
|
|||
break;
|
||||
}
|
||||
}
|
||||
if (cursor->popped() > 0)
|
||||
if (cursor->popped() > 0) {
|
||||
throw worker_removed();
|
||||
}
|
||||
|
||||
++data->counters.updateBatches;
|
||||
data->lastTLogVersion = cursor->getMaxKnownVersion();
|
||||
|
@ -3347,7 +3411,7 @@ ACTOR Future<Void> update(StorageServer* data, bool* pReceivedUpdate) {
|
|||
} else {
|
||||
MutationRef msg;
|
||||
cloneReader >> msg;
|
||||
//TraceEvent(SevDebug, "SSReadingLog", data->thisServerID).detail("Mutation", msg.toString());
|
||||
// TraceEvent(SevDebug, "SSReadingLog", data->thisServerID).detail("Mutation", msg.toString());
|
||||
|
||||
if (firstMutation && msg.param1.startsWith(systemKeys.end))
|
||||
hasPrivateData = true;
|
||||
|
@ -3455,7 +3519,15 @@ ACTOR Future<Void> update(StorageServer* data, bool* pReceivedUpdate) {
|
|||
Span span("SS:update"_loc, { spanContext });
|
||||
span.addTag("key"_sr, msg.param1);
|
||||
|
||||
if (ver != invalidVersion) { // This change belongs to a version < minVersion
|
||||
if (g_network->isSimulated() && data->isTss() &&
|
||||
g_simulator.tssMode == ISimulator::TSSMode::EnabledDropMutations &&
|
||||
data->tssFaultInjectTime.present() && data->tssFaultInjectTime.get() < now() &&
|
||||
(msg.type == MutationRef::SetValue || msg.type == MutationRef::ClearRange) && msg.param1.size() &&
|
||||
msg.param1[0] != 0xff && deterministicRandom()->random01() < 0.05) {
|
||||
TraceEvent(SevWarnAlways, "TSSInjectDropMutation", data->thisServerID)
|
||||
.detail("Mutation", msg.toString())
|
||||
.detail("Version", cloneCursor2->version().toString());
|
||||
} else if (ver != invalidVersion) { // This change belongs to a version < minVersion
|
||||
DEBUG_MUTATION("SSPeek", ver, msg).detail("ServerID", data->thisServerID);
|
||||
if (ver == 1) {
|
||||
TraceEvent("SSPeekMutation", data->thisServerID);
|
||||
|
@ -3519,9 +3591,23 @@ ACTOR Future<Void> update(StorageServer* data, bool* pReceivedUpdate) {
|
|||
if (data->otherError.getFuture().isReady())
|
||||
data->otherError.getFuture().get();
|
||||
|
||||
data->counters.fetchedVersions += (ver - data->version.get());
|
||||
++data->counters.fetchesFromLogs;
|
||||
Optional<UID> curSourceTLogID = cursor->getCurrentPeekLocation();
|
||||
|
||||
if (curSourceTLogID != data->sourceTLogID) {
|
||||
data->sourceTLogID = curSourceTLogID;
|
||||
|
||||
TraceEvent("StorageServerSourceTLogID", data->thisServerID)
|
||||
.detail("SourceTLogID",
|
||||
data->sourceTLogID.present() ? data->sourceTLogID.get().toString() : "unknown")
|
||||
.trackLatest(data->thisServerID.toString() + "/StorageServerSourceTLogID");
|
||||
}
|
||||
|
||||
data->noRecentUpdates.set(false);
|
||||
data->lastUpdate = now();
|
||||
data->version.set(ver); // Triggers replies to waiting gets for new version(s)
|
||||
|
||||
setDataVersion(data->thisServerID, data->version.get());
|
||||
if (data->otherError.getFuture().isReady())
|
||||
data->otherError.getFuture().get();
|
||||
|
@ -3683,6 +3769,9 @@ ACTOR Future<Void> updateStorage(StorageServer* data) {
|
|||
void StorageServerDisk::makeNewStorageServerDurable() {
|
||||
storage->set(persistFormat);
|
||||
storage->set(KeyValueRef(persistID, BinaryWriter::toValue(data->thisServerID, Unversioned())));
|
||||
if (data->tssPairID.present()) {
|
||||
storage->set(KeyValueRef(persistTssPairID, BinaryWriter::toValue(data->tssPairID.get(), Unversioned())));
|
||||
}
|
||||
storage->set(KeyValueRef(persistVersion, BinaryWriter::toValue(data->version.get(), Unversioned())));
|
||||
storage->set(KeyValueRef(persistShardAssignedKeys.begin.toString(), LiteralStringRef("0")));
|
||||
storage->set(KeyValueRef(persistShardAvailableKeys.begin.toString(), LiteralStringRef("0")));
|
||||
|
@ -3911,6 +4000,7 @@ ACTOR Future<Void> restoreByteSample(StorageServer* data,
|
|||
ACTOR Future<bool> restoreDurableState(StorageServer* data, IKeyValueStore* storage) {
|
||||
state Future<Optional<Value>> fFormat = storage->readValue(persistFormat.key);
|
||||
state Future<Optional<Value>> fID = storage->readValue(persistID);
|
||||
state Future<Optional<Value>> ftssPairID = storage->readValue(persistTssPairID);
|
||||
state Future<Optional<Value>> fVersion = storage->readValue(persistVersion);
|
||||
state Future<Optional<Value>> fLogProtocol = storage->readValue(persistLogProtocol);
|
||||
state Future<Optional<Value>> fPrimaryLocality = storage->readValue(persistPrimaryLocality);
|
||||
|
@ -3923,7 +4013,7 @@ ACTOR Future<bool> restoreDurableState(StorageServer* data, IKeyValueStore* stor
|
|||
restoreByteSample(data, storage, byteSampleSampleRecovered, startByteSampleRestore.getFuture());
|
||||
|
||||
TraceEvent("ReadingDurableState", data->thisServerID);
|
||||
wait(waitForAll(std::vector{ fFormat, fID, fVersion, fLogProtocol, fPrimaryLocality }));
|
||||
wait(waitForAll(std::vector{ fFormat, fID, ftssPairID, fVersion, fLogProtocol, fPrimaryLocality }));
|
||||
wait(waitForAll(std::vector{ fShardAssigned, fShardAvailable }));
|
||||
wait(byteSampleSampleRecovered.getFuture());
|
||||
TraceEvent("RestoringDurableState", data->thisServerID);
|
||||
|
@ -3943,7 +4033,12 @@ ACTOR Future<bool> restoreDurableState(StorageServer* data, IKeyValueStore* stor
|
|||
throw worker_recovery_failed();
|
||||
}
|
||||
data->thisServerID = BinaryReader::fromStringRef<UID>(fID.get().get(), Unversioned());
|
||||
data->sk = serverKeysPrefixFor(data->thisServerID).withPrefix(systemKeys.begin); // FFFF/serverKeys/[this server]/
|
||||
if (ftssPairID.get().present()) {
|
||||
data->setTssPair(BinaryReader::fromStringRef<UID>(ftssPairID.get().get(), Unversioned()));
|
||||
}
|
||||
|
||||
data->sk = serverKeysPrefixFor((data->tssPairID.present()) ? data->tssPairID.get() : data->thisServerID)
|
||||
.withPrefix(systemKeys.begin); // FFFF/serverKeys/[this server]/
|
||||
|
||||
if (fLogProtocol.get().present())
|
||||
data->logProtocol = BinaryReader::fromStringRef<ProtocolVersion>(fLogProtocol.get().get(), Unversioned());
|
||||
|
@ -3988,6 +4083,7 @@ ACTOR Future<bool> restoreDurableState(StorageServer* data, IKeyValueStore* stor
|
|||
wait(yield());
|
||||
}
|
||||
|
||||
// TODO: why is this seemingly random delay here?
|
||||
wait(delay(0.0001));
|
||||
|
||||
{
|
||||
|
@ -4235,20 +4331,30 @@ ACTOR Future<Void> metricsCore(StorageServer* self, StorageServerInterface ssi)
|
|||
|
||||
wait(self->byteSampleRecovery);
|
||||
|
||||
Tag tag = self->tag;
|
||||
self->actors.add(traceCounters("StorageMetrics",
|
||||
self->thisServerID,
|
||||
SERVER_KNOBS->STORAGE_LOGGING_DELAY,
|
||||
&self->counters.cc,
|
||||
self->thisServerID.toString() + "/StorageMetrics",
|
||||
[tag, self=self](TraceEvent& te) {
|
||||
te.detail("Tag", tag.toString());
|
||||
[self = self](TraceEvent& te) {
|
||||
te.detail("Tag", self->tag.toString());
|
||||
StorageBytes sb = self->storage.getStorageBytes();
|
||||
te.detail("KvstoreBytesUsed", sb.used);
|
||||
te.detail("KvstoreBytesFree", sb.free);
|
||||
te.detail("KvstoreBytesAvailable", sb.available);
|
||||
te.detail("KvstoreBytesTotal", sb.total);
|
||||
te.detail("KvstoreBytesTemp", sb.temp);
|
||||
if (self->isTss()) {
|
||||
te.detail("TSSPairID", self->tssPairID);
|
||||
te.detail("TSSJointID",
|
||||
UID(self->thisServerID.first() ^ self->tssPairID.get().first(),
|
||||
self->thisServerID.second() ^ self->tssPairID.get().second()));
|
||||
} else if (self->isSSWithTSSPair()) {
|
||||
te.detail("SSPairID", self->ssPairID);
|
||||
te.detail("TSSJointID",
|
||||
UID(self->thisServerID.first() ^ self->ssPairID.get().first(),
|
||||
self->thisServerID.second() ^ self->ssPairID.get().second()));
|
||||
}
|
||||
}));
|
||||
|
||||
loop {
|
||||
|
@ -4352,6 +4458,7 @@ ACTOR Future<Void> serveGetValueRequests(StorageServer* self, FutureStream<GetVa
|
|||
ACTOR Future<Void> serveGetKeyValuesRequests(StorageServer* self, FutureStream<GetKeyValuesRequest> getKeyValues) {
|
||||
loop {
|
||||
GetKeyValuesRequest req = waitNext(getKeyValues);
|
||||
|
||||
// Warning: This code is executed at extremely high priority (TaskPriority::LoadBalancedEndpoint), so downgrade
|
||||
// before doing real work
|
||||
self->actors.add(self->readGuard(req, getKeyValuesQ));
|
||||
|
@ -4649,18 +4756,19 @@ ACTOR Future<Void> memoryStoreRecover(IKeyValueStore* store, Reference<ClusterCo
|
|||
// create a temp client connect to DB
|
||||
Database cx = Database::createDatabase(connFile, Database::API_VERSION_LATEST);
|
||||
|
||||
state Transaction tr(cx);
|
||||
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(cx);
|
||||
state int noCanRemoveCount = 0;
|
||||
loop {
|
||||
try {
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
|
||||
state bool canRemove = wait(canRemoveStorageServer(&tr, id));
|
||||
state bool canRemove = wait(canRemoveStorageServer(tr, id));
|
||||
if (!canRemove) {
|
||||
TEST(true); // it's possible that the caller had a transaction in flight that assigned keys to the
|
||||
// server. Wait for it to reverse its mistake.
|
||||
wait(delayJittered(SERVER_KNOBS->REMOVE_RETRY_DELAY, TaskPriority::UpdateStorage));
|
||||
tr.reset();
|
||||
tr->reset();
|
||||
TraceEvent("RemoveStorageServerRetrying")
|
||||
.detail("Count", noCanRemoveCount++)
|
||||
.detail("ServerID", id)
|
||||
|
@ -4670,21 +4778,28 @@ ACTOR Future<Void> memoryStoreRecover(IKeyValueStore* store, Reference<ClusterCo
|
|||
}
|
||||
} catch (Error& e) {
|
||||
state Error err = e;
|
||||
wait(tr.onError(e));
|
||||
wait(tr->onError(e));
|
||||
TraceEvent("RemoveStorageServerRetrying").error(err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// for creating a new storage server
|
||||
ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
|
||||
StorageServerInterface ssi,
|
||||
Tag seedTag,
|
||||
Version tssSeedVersion,
|
||||
ReplyPromise<InitializeStorageReply> recruitReply,
|
||||
Reference<AsyncVar<ServerDBInfo>> db,
|
||||
std::string folder) {
|
||||
state StorageServer self(persistentData, db, ssi);
|
||||
if (ssi.isTss()) {
|
||||
self.setTssPair(ssi.tssPairID.get());
|
||||
ASSERT(self.isTss());
|
||||
}
|
||||
|
||||
self.sk = serverKeysPrefixFor(self.thisServerID).withPrefix(systemKeys.begin); // FFFF/serverKeys/[this server]/
|
||||
self.sk = serverKeysPrefixFor(self.tssPairID.present() ? self.tssPairID.get() : self.thisServerID)
|
||||
.withPrefix(systemKeys.begin); // FFFF/serverKeys/[this server]/
|
||||
self.folder = folder;
|
||||
|
||||
try {
|
||||
|
@ -4695,7 +4810,11 @@ ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
|
|||
std::pair<Version, Tag> verAndTag = wait(addStorageServer(
|
||||
self.cx, ssi)); // Might throw recruitment_failed in case of simultaneous master failure
|
||||
self.tag = verAndTag.second;
|
||||
self.setInitialVersion(verAndTag.first - 1);
|
||||
if (ssi.isTss()) {
|
||||
self.setInitialVersion(tssSeedVersion);
|
||||
} else {
|
||||
self.setInitialVersion(verAndTag.first - 1);
|
||||
}
|
||||
} else {
|
||||
self.tag = seedTag;
|
||||
}
|
||||
|
@ -4705,12 +4824,14 @@ ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
|
|||
|
||||
TraceEvent("StorageServerInit", ssi.id())
|
||||
.detail("Version", self.version.get())
|
||||
.detail("SeedTag", seedTag.toString());
|
||||
.detail("SeedTag", seedTag.toString())
|
||||
.detail("TssPair", ssi.isTss() ? ssi.tssPairID.get().toString() : "");
|
||||
InitializeStorageReply rep;
|
||||
rep.interf = ssi;
|
||||
rep.addedVersion = self.version.get();
|
||||
recruitReply.send(rep);
|
||||
self.byteSampleRecovery = Void();
|
||||
|
||||
wait(storageServerCore(&self, ssi));
|
||||
|
||||
throw internal_error();
|
||||
|
@ -4726,6 +4847,7 @@ ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
|
|||
}
|
||||
|
||||
ACTOR Future<Void> replaceInterface(StorageServer* self, StorageServerInterface ssi) {
|
||||
ASSERT(!ssi.isTss());
|
||||
state Transaction tr(self->cx);
|
||||
|
||||
loop {
|
||||
|
@ -4740,6 +4862,7 @@ ACTOR Future<Void> replaceInterface(StorageServer* self, StorageServerInterface
|
|||
GetStorageServerRejoinInfoRequest(ssi.id(), ssi.locality.dcId()))
|
||||
: Never())) {
|
||||
state GetStorageServerRejoinInfoReply rep = _rep;
|
||||
|
||||
try {
|
||||
tr.reset();
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
|
@ -4758,6 +4881,7 @@ ACTOR Future<Void> replaceInterface(StorageServer* self, StorageServerInterface
|
|||
tagLocalityListValue(rep.newTag.get().locality));
|
||||
}
|
||||
|
||||
// this only should happen if SS moved datacenters
|
||||
if (rep.newTag.present()) {
|
||||
KeyRange conflictRange = singleKeyRange(serverTagConflictKeyFor(rep.newTag.get()));
|
||||
tr.addReadConflictRange(conflictRange);
|
||||
|
@ -4813,6 +4937,49 @@ ACTOR Future<Void> replaceInterface(StorageServer* self, StorageServerInterface
|
|||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> replaceTSSInterface(StorageServer* self, StorageServerInterface ssi) {
|
||||
// RYW for KeyBackedMap
|
||||
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(self->cx);
|
||||
state KeyBackedMap<UID, UID> tssMapDB = KeyBackedMap<UID, UID>(tssMappingKeys.begin);
|
||||
|
||||
ASSERT(ssi.isTss());
|
||||
|
||||
loop {
|
||||
try {
|
||||
state Tag myTag;
|
||||
|
||||
tr->reset();
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
|
||||
Optional<Value> pairTagValue = wait(tr->get(serverTagKeyFor(self->tssPairID.get())));
|
||||
|
||||
if (!pairTagValue.present()) {
|
||||
TEST(true); // Race where tss was down, pair was removed, tss starts back up
|
||||
throw worker_removed();
|
||||
}
|
||||
|
||||
myTag = decodeServerTagValue(pairTagValue.get());
|
||||
|
||||
tr->addReadConflictRange(singleKeyRange(serverListKeyFor(ssi.id())));
|
||||
tr->set(serverListKeyFor(ssi.id()), serverListValue(ssi));
|
||||
|
||||
// add itself back to tss mapping
|
||||
tssMapDB.set(tr, self->tssPairID.get(), ssi.id());
|
||||
|
||||
wait(tr->commit());
|
||||
self->tag = myTag;
|
||||
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
// for recovering an existing storage server
|
||||
ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
|
||||
StorageServerInterface ssi,
|
||||
Reference<AsyncVar<ServerDBInfo>> db,
|
||||
|
@ -4821,7 +4988,7 @@ ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
|
|||
Reference<ClusterConnectionFile> connFile) {
|
||||
state StorageServer self(persistentData, db, ssi);
|
||||
self.folder = folder;
|
||||
self.sk = serverKeysPrefixFor(self.thisServerID).withPrefix(systemKeys.begin); // FFFF/serverKeys/[this server]/
|
||||
|
||||
try {
|
||||
state double start = now();
|
||||
TraceEvent("StorageServerRebootStart", self.thisServerID);
|
||||
|
@ -4846,13 +5013,30 @@ ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
|
|||
}
|
||||
TraceEvent("SSTimeRestoreDurableState", self.thisServerID).detail("TimeTaken", now() - start);
|
||||
|
||||
// if this is a tss storage file, use that as source of truth for this server being a tss instead of the
|
||||
// presence of the tss pair key in the storage engine
|
||||
if (ssi.isTss()) {
|
||||
ASSERT(self.isTss());
|
||||
ssi.tssPairID = self.tssPairID.get();
|
||||
} else {
|
||||
ASSERT(!self.isTss());
|
||||
}
|
||||
|
||||
ASSERT(self.thisServerID == ssi.id());
|
||||
|
||||
self.sk = serverKeysPrefixFor(self.tssPairID.present() ? self.tssPairID.get() : self.thisServerID)
|
||||
.withPrefix(systemKeys.begin); // FFFF/serverKeys/[this server]/
|
||||
|
||||
TraceEvent("StorageServerReboot", self.thisServerID).detail("Version", self.version.get());
|
||||
|
||||
if (recovered.canBeSet())
|
||||
recovered.send(Void());
|
||||
|
||||
wait(replaceInterface(&self, ssi));
|
||||
if (self.isTss()) {
|
||||
wait(replaceTSSInterface(&self, ssi));
|
||||
} else {
|
||||
wait(replaceInterface(&self, ssi));
|
||||
}
|
||||
|
||||
TraceEvent("StorageServerStartingCore", self.thisServerID).detail("TimeTaken", now() - start);
|
||||
|
||||
|
|
|
@ -869,6 +869,7 @@ ACTOR Future<Void> checkConsistency(Database cx,
|
|||
std::vector<TesterInterface> testers,
|
||||
bool doQuiescentCheck,
|
||||
bool doCacheCheck,
|
||||
bool doTSSCheck,
|
||||
double quiescentWaitTimeout,
|
||||
double softTimeLimit,
|
||||
double databasePingDelay,
|
||||
|
@ -885,12 +886,16 @@ ACTOR Future<Void> checkConsistency(Database cx,
|
|||
Standalone<VectorRef<KeyValueRef>> options;
|
||||
StringRef performQuiescent = LiteralStringRef("false");
|
||||
StringRef performCacheCheck = LiteralStringRef("false");
|
||||
StringRef performTSSCheck = LiteralStringRef("false");
|
||||
if (doQuiescentCheck) {
|
||||
performQuiescent = LiteralStringRef("true");
|
||||
}
|
||||
if (doCacheCheck) {
|
||||
performCacheCheck = LiteralStringRef("true");
|
||||
}
|
||||
if (doTSSCheck) {
|
||||
performTSSCheck = LiteralStringRef("true");
|
||||
}
|
||||
spec.title = LiteralStringRef("ConsistencyCheck");
|
||||
spec.databasePingDelay = databasePingDelay;
|
||||
spec.timeout = 32000;
|
||||
|
@ -898,6 +903,7 @@ ACTOR Future<Void> checkConsistency(Database cx,
|
|||
KeyValueRef(LiteralStringRef("testName"), LiteralStringRef("ConsistencyCheck")));
|
||||
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("performQuiescentChecks"), performQuiescent));
|
||||
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("performCacheCheck"), performCacheCheck));
|
||||
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("performTSSCheck"), performTSSCheck));
|
||||
options.push_back_deep(options.arena(),
|
||||
KeyValueRef(LiteralStringRef("quiescentWaitTimeout"),
|
||||
ValueRef(options.arena(), format("%f", quiescentWaitTimeout))));
|
||||
|
@ -973,6 +979,7 @@ ACTOR Future<bool> runTest(Database cx,
|
|||
testers,
|
||||
quiescent,
|
||||
spec.runConsistencyCheckOnCache,
|
||||
spec.runConsistencyCheckOnTSS,
|
||||
10000.0,
|
||||
18000,
|
||||
spec.databasePingDelay,
|
||||
|
@ -1108,6 +1115,11 @@ std::map<std::string, std::function<void(const std::string& value, TestSpec* spe
|
|||
spec->runConsistencyCheckOnCache = (value == "true");
|
||||
TraceEvent("TestParserTest").detail("ParsedRunConsistencyCheckOnCache", spec->runConsistencyCheckOnCache);
|
||||
} },
|
||||
{ "runConsistencyCheckOnTSS",
|
||||
[](const std::string& value, TestSpec* spec) {
|
||||
spec->runConsistencyCheckOnTSS = (value == "true");
|
||||
TraceEvent("TestParserTest").detail("ParsedRunConsistencyCheckOnTSS", spec->runConsistencyCheckOnTSS);
|
||||
} },
|
||||
{ "waitForQuiescence",
|
||||
[](const std::string& value, TestSpec* spec) {
|
||||
bool toWait = value == "true";
|
||||
|
@ -1249,20 +1261,6 @@ std::vector<TestSpec> readTOMLTests_(std::string fileName) {
|
|||
|
||||
const toml::value& conf = toml::parse(fileName);
|
||||
|
||||
// Handle all global settings
|
||||
for (const auto& [k, v] : conf.as_table()) {
|
||||
if (k == "test") {
|
||||
continue;
|
||||
}
|
||||
if (testSpecGlobalKeys.find(k) != testSpecGlobalKeys.end()) {
|
||||
testSpecGlobalKeys[k](toml_to_string(v));
|
||||
} else {
|
||||
TraceEvent(SevError, "TestSpecUnrecognizedGlobalParam")
|
||||
.detail("Attrib", k)
|
||||
.detail("Value", toml_to_string(v));
|
||||
}
|
||||
}
|
||||
|
||||
// Then parse each test
|
||||
const toml::array& tests = toml::find(conf, "test").as_array();
|
||||
for (const toml::value& test : tests) {
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include <boost/lexical_cast.hpp>
|
||||
|
||||
#include "fdbrpc/Locality.h"
|
||||
#include "fdbclient/GlobalConfig.actor.h"
|
||||
#include "fdbclient/StorageServerInterface.h"
|
||||
#include "fdbserver/Knobs.h"
|
||||
#include "flow/ActorCollection.h"
|
||||
|
@ -139,12 +140,14 @@ Database openDBOnServer(Reference<AsyncVar<ServerDBInfo>> const& db,
|
|||
bool enableLocalityLoadBalance,
|
||||
bool lockAware) {
|
||||
auto info = makeReference<AsyncVar<ClientDBInfo>>();
|
||||
return DatabaseContext::create(info,
|
||||
extractClientInfo(db, info),
|
||||
enableLocalityLoadBalance ? db->get().myLocality : LocalityData(),
|
||||
enableLocalityLoadBalance,
|
||||
taskID,
|
||||
lockAware);
|
||||
auto cx = DatabaseContext::create(info,
|
||||
extractClientInfo(db, info),
|
||||
enableLocalityLoadBalance ? db->get().myLocality : LocalityData(),
|
||||
enableLocalityLoadBalance,
|
||||
taskID,
|
||||
lockAware);
|
||||
GlobalConfig::create(cx, db, std::addressof(db->get().client));
|
||||
return cx;
|
||||
}
|
||||
|
||||
struct ErrorInfo {
|
||||
|
@ -272,6 +275,7 @@ ACTOR Future<Void> loadedPonger(FutureStream<LoadedPingRequest> pings) {
|
|||
}
|
||||
|
||||
StringRef fileStoragePrefix = LiteralStringRef("storage-");
|
||||
StringRef testingStoragePrefix = LiteralStringRef("testingstorage-");
|
||||
StringRef fileLogDataPrefix = LiteralStringRef("log-");
|
||||
StringRef fileVersionedLogDataPrefix = LiteralStringRef("log2-");
|
||||
StringRef fileLogQueuePrefix = LiteralStringRef("logqueue-");
|
||||
|
@ -315,6 +319,7 @@ std::string filenameFromSample(KeyValueStoreType storeType, std::string folder,
|
|||
}
|
||||
|
||||
std::string filenameFromId(KeyValueStoreType storeType, std::string folder, std::string prefix, UID id) {
|
||||
|
||||
if (storeType == KeyValueStoreType::SSD_BTREE_V1)
|
||||
return joinPath(folder, prefix + id.toString() + ".fdb");
|
||||
else if (storeType == KeyValueStoreType::SSD_BTREE_V2)
|
||||
|
@ -326,6 +331,7 @@ std::string filenameFromId(KeyValueStoreType storeType, std::string folder, std:
|
|||
else if (storeType == KeyValueStoreType::SSD_ROCKSDB_V1)
|
||||
return joinPath(folder, prefix + id.toString() + ".rocksdb");
|
||||
|
||||
TraceEvent(SevError, "UnknownStoreType").detail("StoreType", storeType.toString());
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
|
@ -444,6 +450,9 @@ std::vector<DiskStore> getDiskStores(std::string folder,
|
|||
if (filename.startsWith(fileStoragePrefix)) {
|
||||
store.storedComponent = DiskStore::Storage;
|
||||
prefix = fileStoragePrefix;
|
||||
} else if (filename.startsWith(testingStoragePrefix)) {
|
||||
store.storedComponent = DiskStore::Storage;
|
||||
prefix = testingStoragePrefix;
|
||||
} else if (filename.startsWith(fileVersionedLogDataPrefix)) {
|
||||
store.storedComponent = DiskStore::TLogData;
|
||||
// Use the option string that's in the file rather than tLogOptions.toPrefix(),
|
||||
|
@ -739,6 +748,7 @@ ACTOR Future<Void> storageServerRollbackRebooter(Future<Void> prevStorageServer,
|
|||
std::string filename,
|
||||
UID id,
|
||||
LocalityData locality,
|
||||
bool isTss,
|
||||
Reference<AsyncVar<ServerDBInfo>> db,
|
||||
std::string folder,
|
||||
ActorCollection* filesClosed,
|
||||
|
@ -756,6 +766,9 @@ ACTOR Future<Void> storageServerRollbackRebooter(Future<Void> prevStorageServer,
|
|||
StorageServerInterface recruited;
|
||||
recruited.uniqueID = id;
|
||||
recruited.locality = locality;
|
||||
recruited.tssPairID =
|
||||
isTss ? Optional<UID>(UID()) : Optional<UID>(); // set this here since we use its presence to determine
|
||||
// whether this server is a tss or not
|
||||
recruited.initEndpoints();
|
||||
|
||||
DUMPTOKEN(recruited.getValue);
|
||||
|
@ -1097,14 +1110,27 @@ ACTOR Future<Void> workerServer(Reference<ClusterConnectionFile> connFile,
|
|||
Future<Void> kvClosed = kv->onClosed();
|
||||
filesClosed.add(kvClosed);
|
||||
|
||||
// std::string doesn't have startsWith
|
||||
std::string tssPrefix = testingStoragePrefix.toString();
|
||||
// TODO might be more efficient to mark a boolean on DiskStore in getDiskStores, but that kind of breaks
|
||||
// the abstraction since DiskStore also applies to storage cache + tlog
|
||||
bool isTss = s.filename.find(tssPrefix) != std::string::npos;
|
||||
Role ssRole = isTss ? Role::TESTING_STORAGE_SERVER : Role::STORAGE_SERVER;
|
||||
|
||||
StorageServerInterface recruited;
|
||||
recruited.uniqueID = s.storeID;
|
||||
recruited.locality = locality;
|
||||
recruited.tssPairID =
|
||||
isTss ? Optional<UID>(UID())
|
||||
: Optional<UID>(); // presence of optional is used as source of truth for tss vs not. Value
|
||||
// gets overridden later in restoreDurableState
|
||||
recruited.initEndpoints();
|
||||
|
||||
std::map<std::string, std::string> details;
|
||||
details["StorageEngine"] = s.storeType.toString();
|
||||
startRole(Role::STORAGE_SERVER, recruited.id(), interf.id(), details, "Restored");
|
||||
details["IsTSS"] = isTss ? "Yes" : "No";
|
||||
|
||||
startRole(ssRole, recruited.id(), interf.id(), details, "Restored");
|
||||
|
||||
DUMPTOKEN(recruited.getValue);
|
||||
DUMPTOKEN(recruited.getKey);
|
||||
|
@ -1129,12 +1155,13 @@ ACTOR Future<Void> workerServer(Reference<ClusterConnectionFile> connFile,
|
|||
s.filename,
|
||||
recruited.id(),
|
||||
recruited.locality,
|
||||
isTss,
|
||||
dbInfo,
|
||||
folder,
|
||||
&filesClosed,
|
||||
memoryLimit,
|
||||
kv);
|
||||
errorForwarders.add(forwardError(errors, Role::STORAGE_SERVER, recruited.id(), f));
|
||||
errorForwarders.add(forwardError(errors, ssRole, recruited.id(), f));
|
||||
} else if (s.storedComponent == DiskStore::TLogData) {
|
||||
std::string logQueueBasename;
|
||||
const std::string filename = basename(s.filename);
|
||||
|
@ -1268,7 +1295,6 @@ ACTOR Future<Void> workerServer(Reference<ClusterConnectionFile> connFile,
|
|||
notUpdated = interf.updateServerDBInfo.getEndpoint();
|
||||
} else if (localInfo.infoGeneration > dbInfo->get().infoGeneration ||
|
||||
dbInfo->get().clusterInterface != ccInterface->get().get()) {
|
||||
|
||||
TraceEvent("GotServerDBInfoChange")
|
||||
.detail("ChangeID", localInfo.id)
|
||||
.detail("MasterID", localInfo.master.id())
|
||||
|
@ -1487,13 +1513,19 @@ ACTOR Future<Void> workerServer(Reference<ClusterConnectionFile> connFile,
|
|||
}
|
||||
when(InitializeStorageRequest req = waitNext(interf.storage.getFuture())) {
|
||||
if (!storageCache.exists(req.reqId)) {
|
||||
|
||||
bool isTss = req.tssPairIDAndVersion.present();
|
||||
|
||||
StorageServerInterface recruited(req.interfaceId);
|
||||
recruited.locality = locality;
|
||||
recruited.tssPairID = isTss ? req.tssPairIDAndVersion.get().first : Optional<UID>();
|
||||
recruited.initEndpoints();
|
||||
|
||||
std::map<std::string, std::string> details;
|
||||
details["StorageEngine"] = req.storeType.toString();
|
||||
startRole(Role::STORAGE_SERVER, recruited.id(), interf.id(), details);
|
||||
details["IsTSS"] = std::to_string(isTss);
|
||||
Role ssRole = isTss ? Role::TESTING_STORAGE_SERVER : Role::STORAGE_SERVER;
|
||||
startRole(ssRole, recruited.id(), interf.id(), details);
|
||||
|
||||
DUMPTOKEN(recruited.getValue);
|
||||
DUMPTOKEN(recruited.getKey);
|
||||
|
@ -1511,13 +1543,22 @@ ACTOR Future<Void> workerServer(Reference<ClusterConnectionFile> connFile,
|
|||
// printf("Recruited as storageServer\n");
|
||||
|
||||
std::string filename =
|
||||
filenameFromId(req.storeType, folder, fileStoragePrefix.toString(), recruited.id());
|
||||
filenameFromId(req.storeType,
|
||||
folder,
|
||||
isTss ? testingStoragePrefix.toString() : fileStoragePrefix.toString(),
|
||||
recruited.id());
|
||||
IKeyValueStore* data = openKVStore(req.storeType, filename, recruited.id(), memoryLimit);
|
||||
Future<Void> kvClosed = data->onClosed();
|
||||
filesClosed.add(kvClosed);
|
||||
ReplyPromise<InitializeStorageReply> storageReady = req.reply;
|
||||
storageCache.set(req.reqId, storageReady.getFuture());
|
||||
Future<Void> s = storageServer(data, recruited, req.seedTag, storageReady, dbInfo, folder);
|
||||
Future<Void> s = storageServer(data,
|
||||
recruited,
|
||||
req.seedTag,
|
||||
isTss ? req.tssPairIDAndVersion.get().second : 0,
|
||||
storageReady,
|
||||
dbInfo,
|
||||
folder);
|
||||
s = handleIOErrors(s, data, recruited.id(), kvClosed);
|
||||
s = storageCache.removeOnReady(req.reqId, s);
|
||||
s = storageServerRollbackRebooter(s,
|
||||
|
@ -1525,12 +1566,13 @@ ACTOR Future<Void> workerServer(Reference<ClusterConnectionFile> connFile,
|
|||
filename,
|
||||
recruited.id(),
|
||||
recruited.locality,
|
||||
isTss,
|
||||
dbInfo,
|
||||
folder,
|
||||
&filesClosed,
|
||||
memoryLimit,
|
||||
data);
|
||||
errorForwarders.add(forwardError(errors, Role::STORAGE_SERVER, recruited.id(), s));
|
||||
errorForwarders.add(forwardError(errors, ssRole, recruited.id(), s));
|
||||
} else
|
||||
forwardPromise(req.reply, storageCache.get(req.reqId));
|
||||
}
|
||||
|
@ -2047,7 +2089,7 @@ ACTOR Future<Void> fdbd(Reference<ClusterConnectionFile> connFile,
|
|||
if (coordFolder.size()) {
|
||||
// SOMEDAY: remove the fileNotFound wrapper and make DiskQueue construction safe from errors setting up
|
||||
// their files
|
||||
actors.push_back(fileNotFoundToNever(coordinationServer(coordFolder)));
|
||||
actors.push_back(fileNotFoundToNever(coordinationServer(coordFolder, coordinators.ccf)));
|
||||
}
|
||||
|
||||
state UID processIDUid = wait(createAndLockProcessIdFile(dataFolder));
|
||||
|
@ -2111,6 +2153,7 @@ ACTOR Future<Void> fdbd(Reference<ClusterConnectionFile> connFile,
|
|||
|
||||
const Role Role::WORKER("Worker", "WK", false);
|
||||
const Role Role::STORAGE_SERVER("StorageServer", "SS");
|
||||
const Role Role::TESTING_STORAGE_SERVER("TestingStorageServer", "ST");
|
||||
const Role Role::TRANSACTION_LOG("TLog", "TL");
|
||||
const Role Role::SHARED_TRANSACTION_LOG("SharedTLog", "SL", false);
|
||||
const Role Role::COMMIT_PROXY("CommitProxyServer", "CP");
|
||||
|
|
|
@ -270,6 +270,7 @@ struct ConfigureDatabaseWorkload : TestWorkload {
|
|||
return Void();
|
||||
}
|
||||
state int randomChoice = deterministicRandom()->randomInt(0, 8);
|
||||
|
||||
if (randomChoice == 0) {
|
||||
wait(success(
|
||||
runRYWTransaction(cx, [=](Reference<ReadYourWritesTransaction> tr) -> Future<Optional<Value>> {
|
||||
|
@ -316,8 +317,14 @@ struct ConfigureDatabaseWorkload : TestWorkload {
|
|||
} else if (randomChoice == 4) {
|
||||
//TraceEvent("ConfigureTestQuorumBegin").detail("NewQuorum", s);
|
||||
auto ch = autoQuorumChange();
|
||||
std::string desiredClusterName = "NewName%d";
|
||||
if (!SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT) {
|
||||
// if configuration does not allow changing the descriptor, pass empty string (keep old descriptor)
|
||||
desiredClusterName = "";
|
||||
}
|
||||
if (deterministicRandom()->randomInt(0, 2))
|
||||
ch = nameQuorumChange(format("NewName%d", deterministicRandom()->randomInt(0, 100)), ch);
|
||||
ch = nameQuorumChange(format(desiredClusterName.c_str(), deterministicRandom()->randomInt(0, 100)),
|
||||
ch);
|
||||
wait(success(changeQuorum(cx, ch)));
|
||||
//TraceEvent("ConfigureTestConfigureEnd").detail("NewQuorum", s);
|
||||
} else if (randomChoice == 5) {
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
#include "fdbserver/StorageMetrics.h"
|
||||
#include "fdbserver/DataDistribution.actor.h"
|
||||
#include "fdbserver/QuietDatabase.h"
|
||||
#include "fdbserver/TSSMappingUtil.actor.h"
|
||||
#include "flow/DeterministicRandom.h"
|
||||
#include "fdbclient/ManagementAPI.actor.h"
|
||||
#include "fdbclient/StorageServerInterface.h"
|
||||
|
@ -48,6 +49,9 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
|||
// Whether or not perform consistency check between storage cache servers and storage servers
|
||||
bool performCacheCheck;
|
||||
|
||||
// Whether or not to perform consistency check between storage servers and pair TSS
|
||||
bool performTSSCheck;
|
||||
|
||||
// How long to wait for the database to go quiet before failing (if doing quiescent checks)
|
||||
double quiescentWaitTimeout;
|
||||
|
||||
|
@ -94,6 +98,7 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
|||
ConsistencyCheckWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
|
||||
performQuiescentChecks = getOption(options, LiteralStringRef("performQuiescentChecks"), false);
|
||||
performCacheCheck = getOption(options, LiteralStringRef("performCacheCheck"), false);
|
||||
performTSSCheck = getOption(options, LiteralStringRef("performTSSCheck"), true);
|
||||
quiescentWaitTimeout = getOption(options, LiteralStringRef("quiescentWaitTimeout"), 600.0);
|
||||
distributed = getOption(options, LiteralStringRef("distributed"), true);
|
||||
shardSampleFactor = std::max(getOption(options, LiteralStringRef("shardSampleFactor"), 1), 1);
|
||||
|
@ -205,11 +210,16 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
|||
if (self->firstClient || self->distributed) {
|
||||
try {
|
||||
state DatabaseConfiguration configuration;
|
||||
state std::map<UID, StorageServerInterface> tssMapping;
|
||||
|
||||
state Transaction tr(cx);
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
loop {
|
||||
try {
|
||||
if (self->performTSSCheck) {
|
||||
tssMapping.clear();
|
||||
wait(readTSSMapping(&tr, &tssMapping));
|
||||
}
|
||||
RangeResult res = wait(tr.getRange(configKeys, 1000));
|
||||
if (res.size() == 1000) {
|
||||
TraceEvent("ConsistencyCheck_TooManyConfigOptions");
|
||||
|
@ -282,7 +292,7 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
|||
throw;
|
||||
}
|
||||
|
||||
wait(::success(self->checkForStorage(cx, configuration, self)));
|
||||
wait(::success(self->checkForStorage(cx, configuration, tssMapping, self)));
|
||||
wait(::success(self->checkForExtraDataStores(cx, self)));
|
||||
|
||||
// Check that each machine is operating as its desired class
|
||||
|
@ -313,7 +323,7 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
|||
state Standalone<VectorRef<KeyValueRef>> keyLocations = keyLocationPromise.getFuture().get();
|
||||
|
||||
// Check that each shard has the same data on all storage servers that it resides on
|
||||
wait(::success(self->checkDataConsistency(cx, keyLocations, configuration, self)));
|
||||
wait(::success(self->checkDataConsistency(cx, keyLocations, configuration, tssMapping, self)));
|
||||
|
||||
// Cache consistency check
|
||||
if (self->performCacheCheck)
|
||||
|
@ -1057,7 +1067,9 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
|||
TraceEvent("ConsistencyCheck_FailedToFetchMetrics")
|
||||
.detail("Begin", printable(shard.begin))
|
||||
.detail("End", printable(shard.end))
|
||||
.detail("StorageServer", storageServers[i].id());
|
||||
.detail("StorageServer", storageServers[i].id())
|
||||
.detail("IsTSS", storageServers[i].isTss() ? "True" : "False")
|
||||
.error(reply.getError());
|
||||
estimatedBytes.push_back(-1);
|
||||
}
|
||||
|
||||
|
@ -1074,7 +1086,11 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
|||
.detail("Begin", printable(shard.begin))
|
||||
.detail("End", printable(shard.end))
|
||||
.detail("StorageServer1", storageServers[firstValidStorageServer].id())
|
||||
.detail("StorageServer2", storageServers[i].id());
|
||||
.detail("StorageServer2", storageServers[i].id())
|
||||
.detail("IsTSS",
|
||||
storageServers[i].isTss() || storageServers[firstValidStorageServer].isTss()
|
||||
? "True"
|
||||
: "False");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1114,6 +1130,7 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
|||
ACTOR Future<bool> checkDataConsistency(Database cx,
|
||||
VectorRef<KeyValueRef> keyLocations,
|
||||
DatabaseConfiguration configuration,
|
||||
std::map<UID, StorageServerInterface> tssMapping,
|
||||
ConsistencyCheckWorkload* self) {
|
||||
// Stores the total number of bytes on each storage server
|
||||
// In a distributed test, this will be an estimated size
|
||||
|
@ -1236,6 +1253,19 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
|||
}
|
||||
}
|
||||
|
||||
// add TSS to end of list, if configured and if not relocating
|
||||
if (!isRelocating && self->performTSSCheck) {
|
||||
int initialSize = storageServers.size();
|
||||
for (int i = 0; i < initialSize; i++) {
|
||||
auto tssPair = tssMapping.find(storageServers[i]);
|
||||
if (tssPair != tssMapping.end()) {
|
||||
TEST(true); // TSS checked in consistency check
|
||||
storageServers.push_back(tssPair->second.id());
|
||||
storageServerInterfaces.push_back(tssPair->second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
state vector<int64_t> estimatedBytes = wait(self->getStorageSizeEstimate(storageServerInterfaces, range));
|
||||
|
||||
// Gets permitted size range of shard
|
||||
|
@ -1323,7 +1353,8 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
|||
// Be especially verbose if in simulation
|
||||
if (g_network->isSimulated()) {
|
||||
int invalidIndex = -1;
|
||||
printf("\nSERVER %d (%s); shard = %s - %s:\n",
|
||||
printf("\n%sSERVER %d (%s); shard = %s - %s:\n",
|
||||
storageServerInterfaces[j].isTss() ? "TSS " : "",
|
||||
j,
|
||||
storageServerInterfaces[j].address().toString().c_str(),
|
||||
printable(req.begin.getKey()).c_str(),
|
||||
|
@ -1341,7 +1372,8 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
|||
}
|
||||
|
||||
printf(
|
||||
"\nSERVER %d (%s); shard = %s - %s:\n",
|
||||
"\n%sSERVER %d (%s); shard = %s - %s:\n",
|
||||
storageServerInterfaces[firstValidServer].isTss() ? "TSS " : "",
|
||||
firstValidServer,
|
||||
storageServerInterfaces[firstValidServer].address().toString().c_str(),
|
||||
printable(req.begin.getKey()).c_str(),
|
||||
|
@ -1430,16 +1462,29 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
|||
printable(referenceUniqueKey))
|
||||
.detail("ValueMismatches", valueMismatches)
|
||||
.detail("ValueMismatchKey", printable(valueMismatchKey))
|
||||
.detail("MatchingKVPairs", matchingKVPairs);
|
||||
.detail("MatchingKVPairs", matchingKVPairs)
|
||||
.detail("IsTSS",
|
||||
storageServerInterfaces[j].isTss() ||
|
||||
storageServerInterfaces[firstValidServer].isTss()
|
||||
? "True"
|
||||
: "False");
|
||||
|
||||
self->testFailure("Data inconsistent", true);
|
||||
return false;
|
||||
if ((g_network->isSimulated() &&
|
||||
g_simulator.tssMode != ISimulator::TSSMode::EnabledDropMutations) ||
|
||||
(!storageServerInterfaces[j].isTss() &&
|
||||
!storageServerInterfaces[firstValidServer].isTss())) {
|
||||
self->testFailure("Data inconsistent", true);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If the data is not available and we aren't relocating this shard
|
||||
else if (!isRelocating) {
|
||||
Error e =
|
||||
rangeResult.isError() ? rangeResult.getError() : rangeResult.get().error.get();
|
||||
|
||||
TraceEvent("ConsistencyCheck_StorageServerUnavailable")
|
||||
.suppressFor(1.0)
|
||||
.detail("StorageServer", storageServers[j])
|
||||
|
@ -1448,10 +1493,15 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
|||
.detail("Address", storageServerInterfaces[j].address())
|
||||
.detail("UID", storageServerInterfaces[j].id())
|
||||
.detail("GetKeyValuesToken",
|
||||
storageServerInterfaces[j].getKeyValues.getEndpoint().token);
|
||||
storageServerInterfaces[j].getKeyValues.getEndpoint().token)
|
||||
.detail("IsTSS", storageServerInterfaces[j].isTss() ? "True" : "False")
|
||||
.error(e);
|
||||
|
||||
// All shards should be available in quiscence
|
||||
if (self->performQuiescentChecks) {
|
||||
if (self->performQuiescentChecks &&
|
||||
((g_network->isSimulated() &&
|
||||
g_simulator.tssMode != ISimulator::TSSMode::EnabledAddDelay) ||
|
||||
!storageServerInterfaces[j].isTss())) {
|
||||
self->testFailure("Storage server unavailable");
|
||||
return false;
|
||||
}
|
||||
|
@ -1552,13 +1602,18 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
|||
TraceEvent("ConsistencyCheck_IncorrectEstimate")
|
||||
.detail("EstimatedBytes", estimatedBytes[j])
|
||||
.detail("CorrectSampledBytes", sampledBytes)
|
||||
.detail("StorageServer", storageServers[j]);
|
||||
self->testFailure("Storage servers had incorrect sampled estimate");
|
||||
.detail("StorageServer", storageServers[j])
|
||||
.detail("IsTSS", storageServerInterfaces[j].isTss() ? "True" : "False");
|
||||
|
||||
if (!storageServerInterfaces[j].isTss()) {
|
||||
self->testFailure("Storage servers had incorrect sampled estimate");
|
||||
}
|
||||
|
||||
hasValidEstimate = false;
|
||||
|
||||
break;
|
||||
} else if (estimatedBytes[j] < 0) {
|
||||
} else if (estimatedBytes[j] < 0 &&
|
||||
(g_network->isSimulated() || !storageServerInterfaces[j].isTss())) {
|
||||
self->testFailure("Could not get storage metrics from server");
|
||||
hasValidEstimate = false;
|
||||
break;
|
||||
|
@ -1670,7 +1725,10 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
|||
if (!keyValueStoreType.present()) {
|
||||
TraceEvent("ConsistencyCheck_ServerUnavailable").detail("ServerID", storageServers[i].id());
|
||||
self->testFailure("Storage server unavailable");
|
||||
} else if (keyValueStoreType.get() != configuration.storageServerStoreType) {
|
||||
} else if ((!storageServers[i].isTss() &&
|
||||
keyValueStoreType.get() != configuration.storageServerStoreType) ||
|
||||
(storageServers[i].isTss() &&
|
||||
keyValueStoreType.get() != configuration.testingStorageServerStoreType)) {
|
||||
TraceEvent("ConsistencyCheck_WrongKeyValueStoreType")
|
||||
.detail("ServerID", storageServers[i].id())
|
||||
.detail("StoreType", keyValueStoreType.get().toString())
|
||||
|
@ -1698,10 +1756,11 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
|||
// Returns false if any worker that should have a storage server does not have one
|
||||
ACTOR Future<bool> checkForStorage(Database cx,
|
||||
DatabaseConfiguration configuration,
|
||||
std::map<UID, StorageServerInterface> tssMapping,
|
||||
ConsistencyCheckWorkload* self) {
|
||||
state vector<WorkerDetails> workers = wait(getWorkers(self->dbInfo));
|
||||
state vector<StorageServerInterface> storageServers = wait(getStorageServers(cx));
|
||||
std::set<Optional<Key>> missingStorage;
|
||||
std::vector<Optional<Key>> missingStorage; // vector instead of a set to get the count
|
||||
|
||||
for (int i = 0; i < workers.size(); i++) {
|
||||
NetworkAddress addr = workers[i].interf.stableAddress();
|
||||
|
@ -1720,21 +1779,48 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
|||
.detail("Address", addr)
|
||||
.detail("ProcessClassEqualToStorageClass",
|
||||
(int)(workers[i].processClass == ProcessClass::StorageClass));
|
||||
missingStorage.insert(workers[i].interf.locality.dcId());
|
||||
missingStorage.push_back(workers[i].interf.locality.dcId());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int missingDc0 = configuration.regions.size() == 0
|
||||
? 0
|
||||
: std::count(missingStorage.begin(), missingStorage.end(), configuration.regions[0].dcId);
|
||||
int missingDc1 = configuration.regions.size() < 2
|
||||
? 0
|
||||
: std::count(missingStorage.begin(), missingStorage.end(), configuration.regions[1].dcId);
|
||||
|
||||
if ((configuration.regions.size() == 0 && missingStorage.size()) ||
|
||||
(configuration.regions.size() == 1 && missingStorage.count(configuration.regions[0].dcId)) ||
|
||||
(configuration.regions.size() == 2 && configuration.usableRegions == 1 &&
|
||||
missingStorage.count(configuration.regions[0].dcId) &&
|
||||
missingStorage.count(configuration.regions[1].dcId)) ||
|
||||
(configuration.regions.size() == 2 && configuration.usableRegions > 1 &&
|
||||
(missingStorage.count(configuration.regions[0].dcId) ||
|
||||
missingStorage.count(configuration.regions[1].dcId)))) {
|
||||
self->testFailure("No storage server on worker");
|
||||
return false;
|
||||
(configuration.regions.size() == 1 && missingDc0) ||
|
||||
(configuration.regions.size() == 2 && configuration.usableRegions == 1 && missingDc0 && missingDc1) ||
|
||||
(configuration.regions.size() == 2 && configuration.usableRegions > 1 && (missingDc0 || missingDc1))) {
|
||||
|
||||
// TODO could improve this check by also ensuring DD is currently recruiting a TSS by using quietdb?
|
||||
bool couldExpectMissingTss = (configuration.desiredTSSCount - tssMapping.size()) > 0;
|
||||
|
||||
int countMissing = missingStorage.size();
|
||||
int acceptableTssMissing = 1;
|
||||
if (configuration.regions.size() == 1) {
|
||||
countMissing = missingDc0;
|
||||
} else if (configuration.regions.size() == 2) {
|
||||
if (configuration.usableRegions == 1) {
|
||||
// all processes should be missing from 1, so take the number missing from the other
|
||||
countMissing = std::min(missingDc0, missingDc1);
|
||||
} else if (configuration.usableRegions == 2) {
|
||||
countMissing = missingDc0 + missingDc1;
|
||||
acceptableTssMissing = 2;
|
||||
} else {
|
||||
ASSERT(false); // in case fdb ever adds 3+ region support?
|
||||
}
|
||||
}
|
||||
|
||||
if (!couldExpectMissingTss || countMissing > acceptableTssMissing) {
|
||||
self->testFailure("No storage server on worker");
|
||||
return false;
|
||||
} else {
|
||||
TraceEvent(SevWarn, "ConsistencyCheck_TSSMissing");
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
@ -162,12 +162,13 @@ struct MoveKeysWorkload : TestWorkload {
|
|||
// The real data distribution algorithm doesn't want to deal with multiple servers
|
||||
// with the same address having keys. So if there are two servers with the same address,
|
||||
// don't use either one (so we don't have to find out which of them, if any, already has keys).
|
||||
// Also get rid of tss since we don't want to move a shard to a tss.
|
||||
std::map<NetworkAddress, int> count;
|
||||
for (int s = 0; s < servers.size(); s++)
|
||||
count[servers[s].address()]++;
|
||||
int o = 0;
|
||||
for (int s = 0; s < servers.size(); s++)
|
||||
if (count[servers[s].address()] == 1)
|
||||
if (count[servers[s].address()] == 1 && !servers[s].isTss())
|
||||
servers[o++] = servers[s];
|
||||
servers.resize(o);
|
||||
}
|
||||
|
|
|
@ -624,7 +624,7 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
|
|||
|
||||
ACTOR Future<Void> managementApiCorrectnessActor(Database cx_, SpecialKeySpaceCorrectnessWorkload* self) {
|
||||
// All management api related tests
|
||||
Database cx = cx_->clone();
|
||||
state Database cx = cx_->clone();
|
||||
state Reference<ReadYourWritesTransaction> tx = makeReference<ReadYourWritesTransaction>(cx);
|
||||
// test ordered option keys
|
||||
{
|
||||
|
@ -936,7 +936,10 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
|
|||
// test change coordinators and cluster description
|
||||
// we randomly pick one process(not coordinator) and add it, in this case, it should always succeed
|
||||
{
|
||||
state std::string new_cluster_description = deterministicRandom()->randomAlphaNumeric(8);
|
||||
// choose a new description if configuration allows transactions across differently named clusters
|
||||
state std::string new_cluster_description = SERVER_KNOBS->ENABLE_CROSS_CLUSTER_SUPPORT
|
||||
? deterministicRandom()->randomAlphaNumeric(8)
|
||||
: cs.clusterKeyName().toString();
|
||||
state std::string new_coordinator_process;
|
||||
state std::vector<std::string> old_coordinators_processes;
|
||||
state bool possible_to_add_coordinator;
|
||||
|
@ -1426,6 +1429,40 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
|
|||
}
|
||||
}
|
||||
}
|
||||
// make sure when we change dd related special keys, we grab the two system keys,
|
||||
// i.e. moveKeysLockOwnerKey and moveKeysLockWriteKey
|
||||
{
|
||||
state Reference<ReadYourWritesTransaction> tr1(new ReadYourWritesTransaction(cx));
|
||||
state Reference<ReadYourWritesTransaction> tr2(new ReadYourWritesTransaction(cx));
|
||||
loop {
|
||||
try {
|
||||
Version readVersion = wait(tr1->getReadVersion());
|
||||
tr2->setVersion(readVersion);
|
||||
tr1->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES);
|
||||
tr2->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
||||
KeyRef ddPrefix = SpecialKeySpace::getManagementApiCommandPrefix("datadistribution");
|
||||
tr1->set(LiteralStringRef("mode").withPrefix(ddPrefix), LiteralStringRef("1"));
|
||||
wait(tr1->commit());
|
||||
// randomly read the moveKeysLockOwnerKey/moveKeysLockWriteKey
|
||||
// both of them should be grabbed when changing dd mode
|
||||
wait(success(
|
||||
tr2->get(deterministicRandom()->coinflip() ? moveKeysLockOwnerKey : moveKeysLockWriteKey)));
|
||||
// tr2 shoulde never succeed, just write to a key to make it not a read-only transaction
|
||||
tr2->set(LiteralStringRef("unused_key"), LiteralStringRef(""));
|
||||
wait(tr2->commit());
|
||||
ASSERT(false); // commit should always fail due to conflict
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_not_committed) {
|
||||
// when buggify is enabled, it's possible we get other retriable errors
|
||||
wait(tr2->onError(e));
|
||||
tr1->reset();
|
||||
} else {
|
||||
// loop until we get conflict error
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
};
|
||||
|
|
|
@ -734,7 +734,9 @@ ACTOR Future<Void> randomTransaction(Database cx, WriteDuringReadWorkload* self,
|
|||
state bool readAheadDisabled = deterministicRandom()->random01() < 0.5;
|
||||
state bool snapshotRYWDisabled = deterministicRandom()->random01() < 0.5;
|
||||
state bool useBatchPriority = deterministicRandom()->random01() < 0.5;
|
||||
state int64_t timebomb = deterministicRandom()->random01() < 0.01 ? deterministicRandom()->randomInt64(1, 6000) : 0;
|
||||
state int64_t timebomb = (FLOW_KNOBS->MAX_BUGGIFIED_DELAY == 0.0 && deterministicRandom()->random01() < 0.01)
|
||||
? deterministicRandom()->randomInt64(1, 6000)
|
||||
: 0; // timebomb check can fail incorrectly if simulation injects delay longer than the timebomb
|
||||
state std::vector<Future<Void>> operations;
|
||||
state ActorCollection commits(false);
|
||||
state std::vector<Future<Void>> watches;
|
||||
|
|
|
@ -152,6 +152,7 @@ public:
|
|||
databasePingDelay = g_network->isSimulated() ? 0.0 : 15.0;
|
||||
runConsistencyCheck = g_network->isSimulated();
|
||||
runConsistencyCheckOnCache = false;
|
||||
runConsistencyCheckOnTSS = true;
|
||||
waitForQuiescenceBegin = true;
|
||||
waitForQuiescenceEnd = true;
|
||||
simCheckRelocationDuration = false;
|
||||
|
@ -167,8 +168,8 @@ public:
|
|||
double databasePingDelay = -1.0)
|
||||
: title(title), dumpAfterTest(dump), clearAfterTest(clear), startDelay(startDelay), useDB(useDB), timeout(600),
|
||||
databasePingDelay(databasePingDelay), runConsistencyCheck(g_network->isSimulated()),
|
||||
runConsistencyCheckOnCache(false), waitForQuiescenceBegin(true), waitForQuiescenceEnd(true),
|
||||
simCheckRelocationDuration(false), simConnectionFailuresDisableDuration(0),
|
||||
runConsistencyCheckOnCache(false), runConsistencyCheckOnTSS(false), waitForQuiescenceBegin(true),
|
||||
waitForQuiescenceEnd(true), simCheckRelocationDuration(false), simConnectionFailuresDisableDuration(0),
|
||||
simBackupAgents(ISimulator::BackupAgentType::NoBackupAgents),
|
||||
simDrAgents(ISimulator::BackupAgentType::NoBackupAgents) {
|
||||
phases = TestWorkload::SETUP | TestWorkload::EXECUTION | TestWorkload::CHECK | TestWorkload::METRICS;
|
||||
|
@ -187,6 +188,7 @@ public:
|
|||
double databasePingDelay;
|
||||
bool runConsistencyCheck;
|
||||
bool runConsistencyCheckOnCache;
|
||||
bool runConsistencyCheckOnTSS;
|
||||
bool waitForQuiescenceBegin;
|
||||
bool waitForQuiescenceEnd;
|
||||
|
||||
|
|
|
@ -101,6 +101,11 @@ void Arena::dependsOn(const Arena& p) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
void* Arena::allocate4kAlignedBuffer(uint32_t size) {
|
||||
return ArenaBlock::dependOn4kAlignedBuffer(impl, size);
|
||||
}
|
||||
|
||||
size_t Arena::getSize() const {
|
||||
if (impl) {
|
||||
allowAccess(impl.getPtr());
|
||||
|
@ -172,9 +177,13 @@ size_t ArenaBlock::totalSize() {
|
|||
while (o) {
|
||||
ArenaBlockRef* r = (ArenaBlockRef*)((char*)getData() + o);
|
||||
makeDefined(r, sizeof(ArenaBlockRef));
|
||||
allowAccess(r->next);
|
||||
s += r->next->totalSize();
|
||||
disallowAccess(r->next);
|
||||
if (r->aligned4kBufferSize != 0) {
|
||||
s += r->aligned4kBufferSize;
|
||||
} else {
|
||||
allowAccess(r->next);
|
||||
s += r->next->totalSize();
|
||||
disallowAccess(r->next);
|
||||
}
|
||||
o = r->nextBlockOffset;
|
||||
makeNoAccess(r, sizeof(ArenaBlockRef));
|
||||
}
|
||||
|
@ -190,7 +199,12 @@ void ArenaBlock::getUniqueBlocks(std::set<ArenaBlock*>& a) {
|
|||
while (o) {
|
||||
ArenaBlockRef* r = (ArenaBlockRef*)((char*)getData() + o);
|
||||
makeDefined(r, sizeof(ArenaBlockRef));
|
||||
r->next->getUniqueBlocks(a);
|
||||
|
||||
// If next is valid recursively count its blocks
|
||||
if (r->aligned4kBufferSize == 0) {
|
||||
r->next->getUniqueBlocks(a);
|
||||
}
|
||||
|
||||
o = r->nextBlockOffset;
|
||||
makeNoAccess(r, sizeof(ArenaBlockRef));
|
||||
}
|
||||
|
@ -212,6 +226,7 @@ int ArenaBlock::addUsed(int bytes) {
|
|||
void ArenaBlock::makeReference(ArenaBlock* next) {
|
||||
ArenaBlockRef* r = (ArenaBlockRef*)((char*)getData() + bigUsed);
|
||||
makeDefined(r, sizeof(ArenaBlockRef));
|
||||
r->aligned4kBufferSize = 0;
|
||||
r->next = next;
|
||||
r->nextBlockOffset = nextBlockOffset;
|
||||
makeNoAccess(r, sizeof(ArenaBlockRef));
|
||||
|
@ -219,6 +234,20 @@ void ArenaBlock::makeReference(ArenaBlock* next) {
|
|||
bigUsed += sizeof(ArenaBlockRef);
|
||||
}
|
||||
|
||||
void* ArenaBlock::make4kAlignedBuffer(uint32_t size) {
|
||||
ArenaBlockRef* r = (ArenaBlockRef*)((char*)getData() + bigUsed);
|
||||
makeDefined(r, sizeof(ArenaBlockRef));
|
||||
r->aligned4kBufferSize = size;
|
||||
r->aligned4kBuffer = allocateFast4kAligned(size);
|
||||
// printf("Arena::aligned4kBuffer alloc size=%u ptr=%p\n", size, r->aligned4kBuffer);
|
||||
r->nextBlockOffset = nextBlockOffset;
|
||||
auto result = r->aligned4kBuffer;
|
||||
makeNoAccess(r, sizeof(ArenaBlockRef));
|
||||
nextBlockOffset = bigUsed;
|
||||
bigUsed += sizeof(ArenaBlockRef);
|
||||
return result;
|
||||
}
|
||||
|
||||
void ArenaBlock::dependOn(Reference<ArenaBlock>& self, ArenaBlock* other) {
|
||||
other->addref();
|
||||
if (!self || self->isTiny() || self->unused() < sizeof(ArenaBlockRef))
|
||||
|
@ -227,6 +256,14 @@ void ArenaBlock::dependOn(Reference<ArenaBlock>& self, ArenaBlock* other) {
|
|||
self->makeReference(other);
|
||||
}
|
||||
|
||||
void* ArenaBlock::dependOn4kAlignedBuffer(Reference<ArenaBlock>& self, uint32_t size) {
|
||||
if (!self || self->isTiny() || self->unused() < sizeof(ArenaBlockRef)) {
|
||||
return create(SMALL, self)->make4kAlignedBuffer(size);
|
||||
} else {
|
||||
return self->make4kAlignedBuffer(size);
|
||||
}
|
||||
}
|
||||
|
||||
void* ArenaBlock::allocate(Reference<ArenaBlock>& self, int bytes) {
|
||||
ArenaBlock* b = self.getPtr();
|
||||
allowAccess(b);
|
||||
|
@ -359,10 +396,18 @@ void ArenaBlock::destroy() {
|
|||
while (o) {
|
||||
ArenaBlockRef* br = (ArenaBlockRef*)((char*)b->getData() + o);
|
||||
makeDefined(br, sizeof(ArenaBlockRef));
|
||||
allowAccess(br->next);
|
||||
if (br->next->delref_no_destroy())
|
||||
stack.push_back(stackArena, br->next);
|
||||
disallowAccess(br->next);
|
||||
|
||||
// If aligned4kBuffer is valid, free it
|
||||
if (br->aligned4kBufferSize != 0) {
|
||||
// printf("Arena::aligned4kBuffer free %p\n", br->aligned4kBuffer);
|
||||
freeFast4kAligned(br->aligned4kBufferSize, br->aligned4kBuffer);
|
||||
} else {
|
||||
allowAccess(br->next);
|
||||
if (br->next->delref_no_destroy())
|
||||
stack.push_back(stackArena, br->next);
|
||||
disallowAccess(br->next);
|
||||
}
|
||||
|
||||
o = br->nextBlockOffset;
|
||||
}
|
||||
}
|
||||
|
|
13
flow/Arena.h
13
flow/Arena.h
|
@ -102,6 +102,7 @@ public:
|
|||
Arena& operator=(Arena&&) noexcept;
|
||||
|
||||
void dependsOn(const Arena& p);
|
||||
void* allocate4kAlignedBuffer(uint32_t size);
|
||||
size_t getSize() const;
|
||||
|
||||
bool hasFree(size_t size, const void* address);
|
||||
|
@ -129,7 +130,15 @@ struct scalar_traits<Arena> : std::true_type {
|
|||
};
|
||||
|
||||
struct ArenaBlockRef {
|
||||
ArenaBlock* next;
|
||||
union {
|
||||
ArenaBlock* next;
|
||||
void* aligned4kBuffer;
|
||||
};
|
||||
|
||||
// Only one of (next, aligned4kBuffer) is valid at any one time, as they occupy the same space.
|
||||
// If aligned4kBufferSize is not 0, aligned4kBuffer is valid, otherwise next is valid.
|
||||
uint32_t aligned4kBufferSize;
|
||||
|
||||
uint32_t nextBlockOffset;
|
||||
};
|
||||
|
||||
|
@ -160,7 +169,9 @@ struct ArenaBlock : NonCopyable, ThreadSafeReferenceCounted<ArenaBlock> {
|
|||
void getUniqueBlocks(std::set<ArenaBlock*>& a);
|
||||
int addUsed(int bytes);
|
||||
void makeReference(ArenaBlock* next);
|
||||
void* make4kAlignedBuffer(uint32_t size);
|
||||
static void dependOn(Reference<ArenaBlock>& self, ArenaBlock* other);
|
||||
static void* dependOn4kAlignedBuffer(Reference<ArenaBlock>& self, uint32_t size);
|
||||
static void* allocate(Reference<ArenaBlock>& self, int bytes);
|
||||
// Return an appropriately-sized ArenaBlock to store the given data
|
||||
static ArenaBlock* create(int dataSize, Reference<ArenaBlock>& next);
|
||||
|
|
|
@ -266,4 +266,26 @@ inline void freeFast(int size, void* ptr) {
|
|||
delete[](uint8_t*) ptr;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline void* allocateFast4kAligned(int size) {
|
||||
// Use FastAllocator for sizes it supports to avoid internal fragmentation in some implementations of aligned_alloc
|
||||
if (size <= 4096)
|
||||
return FastAllocator<4096>::allocate();
|
||||
if (size <= 8192)
|
||||
return FastAllocator<8192>::allocate();
|
||||
if (size <= 16384)
|
||||
return FastAllocator<16384>::allocate();
|
||||
return aligned_alloc(4096, size);
|
||||
}
|
||||
|
||||
inline void freeFast4kAligned(int size, void* ptr) {
|
||||
// Sizes supported by FastAllocator must be release via FastAllocator
|
||||
if (size <= 4096)
|
||||
return FastAllocator<4096>::release(ptr);
|
||||
if (size <= 8192)
|
||||
return FastAllocator<8192>::release(ptr);
|
||||
if (size <= 16384)
|
||||
return FastAllocator<16384>::release(ptr);
|
||||
aligned_free(ptr);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -234,6 +234,7 @@ void FlowKnobs::initialize(bool randomize, bool isSimulated) {
|
|||
init( BASIC_LOAD_BALANCE_MIN_CPU, 0.05 ); //do not adjust LB probabilities if the proxies are less than 5% utilized
|
||||
init( BASIC_LOAD_BALANCE_BUCKETS, 40 ); //proxies bin recent GRV requests into 40 time bins
|
||||
init( BASIC_LOAD_BALANCE_COMPUTE_PRECISION, 10000 ); //determines how much of the LB usage is holding the CPU usage of the proxy
|
||||
init( LOAD_BALANCE_TSS_TIMEOUT, 5.0 );
|
||||
|
||||
// Health Monitor
|
||||
init( FAILURE_DETECTION_DELAY, 4.0 ); if( randomize && BUGGIFY ) FAILURE_DETECTION_DELAY = 1.0;
|
||||
|
|
|
@ -250,6 +250,7 @@ public:
|
|||
int BASIC_LOAD_BALANCE_COMPUTE_PRECISION;
|
||||
double BASIC_LOAD_BALANCE_MIN_REQUESTS;
|
||||
double BASIC_LOAD_BALANCE_MIN_CPU;
|
||||
double LOAD_BALANCE_TSS_TIMEOUT;
|
||||
|
||||
// Health Monitor
|
||||
int FAILURE_DETECTION_DELAY;
|
||||
|
|
|
@ -121,7 +121,7 @@ public: // introduced features
|
|||
PROTOCOL_VERSION_FEATURE(0x0FDB00B062010001LL, CloseUnusedConnection);
|
||||
PROTOCOL_VERSION_FEATURE(0x0FDB00B063010000LL, DBCoreState);
|
||||
PROTOCOL_VERSION_FEATURE(0x0FDB00B063010000LL, TagThrottleValue);
|
||||
PROTOCOL_VERSION_FEATURE(0x0FDB00B063010000LL, ServerListValue);
|
||||
PROTOCOL_VERSION_FEATURE(0x0FDB00B070010001LL, ServerListValue);
|
||||
PROTOCOL_VERSION_FEATURE(0x0FDB00B063010000LL, StorageCacheValue);
|
||||
PROTOCOL_VERSION_FEATURE(0x0FDB00B063010000LL, RestoreStatusValue);
|
||||
PROTOCOL_VERSION_FEATURE(0x0FDB00B063010000LL, RestoreRequestValue);
|
||||
|
@ -138,6 +138,7 @@ public: // introduced features
|
|||
PROTOCOL_VERSION_FEATURE(0x0FDB00B070010000LL, StableInterfaces);
|
||||
PROTOCOL_VERSION_FEATURE(0x0FDB00B070010001LL, TagThrottleValueReason);
|
||||
PROTOCOL_VERSION_FEATURE(0x0FDB00B070010001LL, SpanContext);
|
||||
PROTOCOL_VERSION_FEATURE(0x0FDB00B070010001LL, TSS);
|
||||
};
|
||||
|
||||
template <>
|
||||
|
|
|
@ -74,6 +74,7 @@ ERROR( disk_adapter_reset, 1050, "The disk queue adpater reset" )
|
|||
ERROR( batch_transaction_throttled, 1051, "Batch GRV request rate limit exceeded")
|
||||
ERROR( dd_cancelled, 1052, "Data distribution components cancelled")
|
||||
ERROR( dd_not_found, 1053, "Data distributor not found")
|
||||
ERROR( wrong_connection_file, 1054, "Connection file mismatch")
|
||||
|
||||
ERROR( broken_promise, 1100, "Broken promise" )
|
||||
ERROR( operation_cancelled, 1101, "Asynchronous operation cancelled" )
|
||||
|
|
|
@ -674,6 +674,8 @@ public:
|
|||
bool isValid() const { return sav != 0; }
|
||||
bool isReady() const { return sav->isSet(); }
|
||||
bool isError() const { return sav->isError(); }
|
||||
// returns true if get can be called on this future (counterpart of canBeSet on Promises)
|
||||
bool canGet() const { return isValid() && isReady() && !isError(); }
|
||||
Error& getError() const {
|
||||
ASSERT(isError());
|
||||
return sav->error_state;
|
||||
|
|
|
@ -697,6 +697,16 @@ private:
|
|||
AsyncVar<Void> v;
|
||||
};
|
||||
|
||||
// Binds an AsyncTrigger object to an AsyncVar, so when the AsyncVar changes
|
||||
// the AsyncTrigger is triggered.
|
||||
ACTOR template <class T>
|
||||
void forward(Reference<AsyncVar<T>> from, AsyncTrigger* to) {
|
||||
loop {
|
||||
wait(from->onChange());
|
||||
to->trigger();
|
||||
}
|
||||
}
|
||||
|
||||
class Debouncer : NonCopyable {
|
||||
public:
|
||||
explicit Debouncer(double delay) { worker = debounceWorker(this, delay); }
|
||||
|
@ -1334,6 +1344,14 @@ struct FlowLock : NonCopyable, public ReferenceCounted<FlowLock> {
|
|||
int64_t activePermits() const { return active; }
|
||||
int waiters() const { return takers.size(); }
|
||||
|
||||
// Try to send error to all current and future waiters
|
||||
// Only works if broken_on_destruct.canBeSet()
|
||||
void kill(Error e = broken_promise()) {
|
||||
if (broken_on_destruct.canBeSet()) {
|
||||
broken_on_destruct.sendError(e);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::list<std::pair<Promise<Void>, int64_t>> takers;
|
||||
const int64_t permits;
|
||||
|
@ -1891,6 +1909,59 @@ Future<U> operator>>(Future<T> const& lhs, Future<U> const& rhs) {
|
|||
return runAfter(lhs, rhs);
|
||||
}
|
||||
|
||||
// A weak reference type to wrap a future Reference<T> object.
|
||||
// Once the future is complete, this object holds a pointer to the referenced object but does
|
||||
// not contribute to its reference count.
|
||||
//
|
||||
// WARNING: this class will not be aware when the underlying object is destroyed. It is up to the
|
||||
// user to make sure that an UnsafeWeakFutureReference is discarded at the same time the object is.
|
||||
template <class T>
|
||||
class UnsafeWeakFutureReference {
|
||||
public:
|
||||
UnsafeWeakFutureReference() {}
|
||||
UnsafeWeakFutureReference(Future<Reference<T>> future) : data(new UnsafeWeakFutureReferenceData(future)) {}
|
||||
|
||||
// Returns a future to obtain a normal reference handle
|
||||
// If the future is ready, this creates a Reference<T> to wrap the object
|
||||
Future<Reference<T>> get() {
|
||||
if (!data) {
|
||||
return Reference<T>();
|
||||
} else if (data->ptr.present()) {
|
||||
return Reference<T>::addRef(data->ptr.get());
|
||||
} else {
|
||||
return data->future;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the raw pointer, if the object is ready
|
||||
// Note: this should be used with care, as this pointer is not counted as a reference to the object and
|
||||
// it could be deleted if all normal references are destroyed.
|
||||
Optional<T*> getPtrIfReady() { return data->ptr; }
|
||||
|
||||
private:
|
||||
// A class to hold the state for an UnsafeWeakFutureReference
|
||||
struct UnsafeWeakFutureReferenceData : public ReferenceCounted<UnsafeWeakFutureReferenceData>, NonCopyable {
|
||||
Optional<T*> ptr;
|
||||
Future<Reference<T>> future;
|
||||
Future<Void> moveResultFuture;
|
||||
|
||||
UnsafeWeakFutureReferenceData(Future<Reference<T>> future) : future(future) {
|
||||
moveResultFuture = moveResult(this);
|
||||
}
|
||||
|
||||
// Waits for the future to complete and then stores the pointer in local storage
|
||||
// When this completes, we will no longer be counted toward the reference count of the object
|
||||
ACTOR Future<Void> moveResult(UnsafeWeakFutureReferenceData* self) {
|
||||
Reference<T> result = wait(self->future);
|
||||
self->ptr = result.getPtr();
|
||||
self->future = Future<Reference<T>>();
|
||||
return Void();
|
||||
}
|
||||
};
|
||||
|
||||
Reference<UnsafeWeakFutureReferenceData> data;
|
||||
};
|
||||
|
||||
#include "flow/unactorcompiler.h"
|
||||
|
||||
#endif
|
||||
|
|
|
@ -87,7 +87,9 @@ if(WITH_PYTHON)
|
|||
add_fdb_test(TEST_FILES SlowTask.txt IGNORE)
|
||||
add_fdb_test(TEST_FILES SpecificUnitTest.txt IGNORE)
|
||||
add_fdb_test(TEST_FILES StorageMetricsSampleTests.txt IGNORE)
|
||||
add_fdb_test(TEST_FILES StorageServerInterface.txt)
|
||||
add_fdb_test(TEST_FILES StreamingWrite.txt IGNORE)
|
||||
add_fdb_test(TEST_FILES SystemData.txt)
|
||||
add_fdb_test(TEST_FILES ThreadSafety.txt IGNORE)
|
||||
add_fdb_test(TEST_FILES TraceEventMetrics.txt IGNORE)
|
||||
add_fdb_test(TEST_FILES PopulateTPCC.txt IGNORE)
|
||||
|
@ -186,17 +188,17 @@ if(WITH_PYTHON)
|
|||
TEST_FILES restarting/from_5.0.0/StorefrontTestRestart-1.txt
|
||||
restarting/from_5.0.0/StorefrontTestRestart-2.txt)
|
||||
add_fdb_test(
|
||||
TEST_FILES restarting/from_6.2.29/SnapTestAttrition-1.txt
|
||||
restarting/from_6.2.29/SnapTestAttrition-2.txt)
|
||||
TEST_FILES restarting/from_6.2.33/SnapTestAttrition-1.txt
|
||||
restarting/from_6.2.33/SnapTestAttrition-2.txt)
|
||||
add_fdb_test(
|
||||
TEST_FILES restarting/from_6.2.29/SnapTestSimpleRestart-1.txt
|
||||
restarting/from_6.2.29/SnapTestSimpleRestart-2.txt)
|
||||
TEST_FILES restarting/from_6.2.33/SnapTestSimpleRestart-1.txt
|
||||
restarting/from_6.2.33/SnapTestSimpleRestart-2.txt)
|
||||
add_fdb_test(
|
||||
TEST_FILES restarting/from_6.2.29/SnapTestRestart-1.txt
|
||||
restarting/from_6.2.29/SnapTestRestart-2.txt)
|
||||
TEST_FILES restarting/from_6.2.33/SnapTestRestart-1.txt
|
||||
restarting/from_6.2.33/SnapTestRestart-2.txt)
|
||||
add_fdb_test(
|
||||
TEST_FILES restarting/from_6.2.29/SnapCycleRestart-1.txt
|
||||
restarting/from_6.2.29/SnapCycleRestart-2.txt)
|
||||
TEST_FILES restarting/from_6.2.33/SnapCycleRestart-1.txt
|
||||
restarting/from_6.2.33/SnapCycleRestart-2.txt)
|
||||
add_fdb_test(
|
||||
TEST_FILES restarting/from_5.1.7/DrUpgradeRestart-1.txt
|
||||
restarting/from_5.1.7/DrUpgradeRestart-2.txt)
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
testTitle=UnitTests
|
||||
startDelay=0
|
||||
useDB=false
|
||||
|
||||
testName=UnitTests
|
||||
maxTestCases=0
|
||||
testsMatching=/StorageServerInterface/
|
|
@ -0,0 +1,7 @@
|
|||
testTitle=UnitTests
|
||||
startDelay=0
|
||||
useDB=false
|
||||
|
||||
testName=UnitTests
|
||||
maxTestCases=0
|
||||
testsMatching=/SystemData/
|
|
@ -1,3 +1,4 @@
|
|||
[configuration]
|
||||
extraDB = 1
|
||||
|
||||
[[test]]
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
[configuration]
|
||||
extraDB = 1
|
||||
|
||||
[[test]]
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
[configuration]
|
||||
extraDB = 1
|
||||
|
||||
[[test]]
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
configureLocked = 1
|
||||
[configuration]
|
||||
configureLocked = true
|
||||
|
||||
[[test]]
|
||||
testTitle = 'ConfigureLocked'
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue