diff --git a/bindings/c/CMakeLists.txt b/bindings/c/CMakeLists.txt index c44fd6208a..edcd48cc32 100644 --- a/bindings/c/CMakeLists.txt +++ b/bindings/c/CMakeLists.txt @@ -217,8 +217,8 @@ if(NOT WIN32) target_link_libraries(fdb_c_unit_tests_version_510 PRIVATE fdb_c Threads::Threads doctest) target_link_libraries(trace_partial_file_suffix_test PRIVATE fdb_c Threads::Threads flow doctest) target_link_libraries(disconnected_timeout_unit_tests PRIVATE fdb_c Threads::Threads doctest) - target_link_libraries(fdb_c_client_config_tester PRIVATE SimpleOpt fdb_cpp fdb_c Threads::Threads fmt::fmt) - target_include_directories(fdb_c_client_config_tester PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}/foundationdb/ ${CMAKE_SOURCE_DIR}/flow/include) + target_link_libraries(fdb_c_client_config_tester PRIVATE SimpleOpt fdb_cpp fdb_c fdbclient Threads::Threads fmt::fmt) + target_include_directories(fdb_c_client_config_tester PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}/foundationdb/) # do not set RPATH for mako set_property(TARGET mako PROPERTY SKIP_BUILD_RPATH TRUE) @@ -423,18 +423,18 @@ if(OPEN_FOR_IDE) target_link_libraries(fdb_c_shim_lib_tester PRIVATE fdb_c_shim SimpleOpt fdb_cpp Threads::Threads) target_include_directories(fdb_c_shim_lib_tester PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}/foundationdb/ ${CMAKE_SOURCE_DIR}/flow/include) -elseif(NOT WIN32 AND NOT APPLE AND NOT USE_SANITIZER) # Linux Only, non-santizer only +elseif(NOT WIN32 AND NOT APPLE) # Linux Only set(SHIM_LIB_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}) set(SHIM_LIB_GEN_SRC - ${SHIM_LIB_OUTPUT_DIR}/libfdb_c.so.init.c + ${SHIM_LIB_OUTPUT_DIR}/libfdb_c.so.init.cpp ${SHIM_LIB_OUTPUT_DIR}/libfdb_c.so.tramp.S) set(IMPLIBSO_SRC_DIR ${CMAKE_SOURCE_DIR}/contrib/Implib.so) set(IMPLIBSO_SRC ${IMPLIBSO_SRC_DIR}/implib-gen.py - ${IMPLIBSO_SRC_DIR}/arch/common/init.c.tpl + ${IMPLIBSO_SRC_DIR}/arch/common/init.cpp.tpl ${IMPLIBSO_SRC_DIR}/arch/${CMAKE_SYSTEM_PROCESSOR}/config.ini ${IMPLIBSO_SRC_DIR}/arch/${CMAKE_SYSTEM_PROCESSOR}/table.S.tpl ${IMPLIBSO_SRC_DIR}/arch/${CMAKE_SYSTEM_PROCESSOR}/trampoline.S.tpl @@ -467,6 +467,11 @@ elseif(NOT WIN32 AND NOT APPLE AND NOT USE_SANITIZER) # Linux Only, non-santizer target_link_libraries(fdb_c_shim_lib_tester PRIVATE fdb_c_shim SimpleOpt fdb_cpp Threads::Threads) target_include_directories(fdb_c_shim_lib_tester PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}/foundationdb/ ${CMAKE_SOURCE_DIR}/flow/include) + set(SHIM_LIB_TEST_EXTRA_OPTIONS "") + if(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR USE_SANITIZER) + list(APPEND SHIM_LIB_TEST_EXTRA_OPTIONS --disable-prev-version-tests) + endif() + add_python_venv_test(NAME fdb_c_shim_library_tests COMMAND python ${CMAKE_CURRENT_SOURCE_DIR}/test/fdb_c_shim_tests.py --build-dir ${CMAKE_BINARY_DIR} @@ -474,6 +479,7 @@ elseif(NOT WIN32 AND NOT APPLE AND NOT USE_SANITIZER) # Linux Only, non-santizer --api-tester-bin $ --shim-lib-tester-bin $ --api-test-dir ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests + ${SHIM_LIB_TEST_EXTRA_OPTIONS} ) endif() # End Linux only, non-sanitizer only diff --git a/bindings/c/test/client_config_tester.cpp b/bindings/c/test/client_config_tester.cpp index 3cb16a4cf0..179dead68d 100644 --- a/bindings/c/test/client_config_tester.cpp +++ b/bindings/c/test/client_config_tester.cpp @@ -34,6 +34,8 @@ #include "SimpleOpt/SimpleOpt.h" #include #include +#include +#include "fdbclient/FDBOptions.g.h" #if (defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__)) #include @@ -43,11 +45,6 @@ #error Unsupported platform #endif -#undef ERROR -#define ERROR(name, number, description) enum { error_code_##name = number }; - -#include "flow/error_definitions.h" - #define API_VERSION_CLIENT_TMP_DIR 720 using namespace std::string_view_literals; @@ -59,17 +56,14 @@ enum TesterOptionId { OPT_CONNFILE, OPT_EXTERNAL_CLIENT_LIBRARY, OPT_EXTERNAL_CLIENT_DIRECTORY, - OPT_DISABLE_LOCAL_CLIENT, - OPT_DISABLE_CLIENT_BYPASS, OPT_API_VERSION, OPT_TRANSACTION_TIMEOUT, OPT_TRACE, OPT_TRACE_DIR, OPT_TMP_DIR, - OPT_IGNORE_EXTERNAL_CLIENT_FAILURES, - OPT_FAIL_INCOMPATIBLE_CLIENT, OPT_EXPECTED_ERROR, - OPT_PRINT_STATUS + OPT_PRINT_STATUS, + OPT_NETWORK_OPTION }; const int MIN_TESTABLE_API_VERSION = 400; @@ -81,17 +75,14 @@ CSimpleOpt::SOption TesterOptionDefs[] = // { OPT_CONNFILE, "--cluster-file", SO_REQ_SEP }, { OPT_EXTERNAL_CLIENT_LIBRARY, "--external-client-library", SO_REQ_SEP }, { OPT_EXTERNAL_CLIENT_DIRECTORY, "--external-client-dir", SO_REQ_SEP }, - { OPT_DISABLE_LOCAL_CLIENT, "--disable-local-client", SO_NONE }, - { OPT_DISABLE_CLIENT_BYPASS, "--disable-client-bypass", SO_NONE }, { OPT_API_VERSION, "--api-version", SO_REQ_SEP }, { OPT_TRANSACTION_TIMEOUT, "--transaction-timeout", SO_REQ_SEP }, { OPT_TRACE, "--log", SO_NONE }, { OPT_TRACE_DIR, "--log-dir", SO_REQ_SEP }, { OPT_TMP_DIR, "--tmp-dir", SO_REQ_SEP }, - { OPT_IGNORE_EXTERNAL_CLIENT_FAILURES, "--ignore-external-client-failures", SO_NONE }, - { OPT_FAIL_INCOMPATIBLE_CLIENT, "--fail-incompatible-client", SO_NONE }, { OPT_EXPECTED_ERROR, "--expected-error", SO_REQ_SEP }, { OPT_PRINT_STATUS, "--print-status", SO_NONE }, + { OPT_NETWORK_OPTION, "--network-option-", SO_REQ_SEP }, SO_END_OF_OPTIONS }; class TesterOptions { @@ -111,6 +102,7 @@ public: bool failIncompatibleClient = false; fdb::Error::CodeType expectedError = 0; bool printStatus = false; + std::vector> networkOptions; }; namespace { @@ -130,10 +122,6 @@ void printProgramUsage(const char* execName) { " Path to the external client library.\n" " --external-client-dir DIR\n" " Directory containing external client libraries.\n" - " --disable-local-client\n" - " Disable the local client, i.e. use only external client libraries.\n" - " --disable-client-bypass\n" - " Disable bypassing Multi-Version Client when using the local client.\n" " --api-version VERSION\n" " Required FDB API version (default %d).\n" " --transaction-timeout MILLISECONDS\n" @@ -144,14 +132,12 @@ void printProgramUsage(const char* execName) { " no effect unless --log is specified.\n" " --tmp-dir DIR\n" " Directory for temporary files of the client.\n" - " --ignore-external-client-failures\n" - " Ignore failures to initialize external clients.\n" - " --fail-incompatible-client\n" - " Fail if there is no client matching the server version.\n" " --expected-error ERR\n" " FDB error code the test expected to fail with (default: 0).\n" " --print-status\n" " Print database client status.\n" + " --network-option-OPTIONNAME OPTIONVALUE\n" + " Changes a network option. OPTIONAME should be lowercase.\n" " -h, --help Display this help and exit.\n", FDB_API_VERSION); } @@ -170,6 +156,19 @@ bool processIntOption(const std::string& optionName, const std::string& value, i return true; } +// Extracts the key for command line arguments that are specified with a prefix (e.g. --knob-). +// This function converts any hyphens in the extracted key to underscores. +bool extractPrefixedArgument(std::string prefix, const std::string& arg, std::string& res) { + if (arg.size() <= prefix.size() || arg.find(prefix) != 0 || + (arg[prefix.size()] != '-' && arg[prefix.size()] != '_')) { + return false; + } + + res = arg.substr(prefix.size() + 1); + std::transform(res.begin(), res.end(), res.begin(), [](int c) { return c == '-' ? '_' : c; }); + return true; +} + bool processArg(const CSimpleOpt& args) { switch (args.OptionId()) { case OPT_CONNFILE: @@ -181,12 +180,6 @@ bool processArg(const CSimpleOpt& args) { case OPT_EXTERNAL_CLIENT_DIRECTORY: options.externalClientDir = args.OptionArg(); break; - case OPT_DISABLE_LOCAL_CLIENT: - options.disableLocalClient = true; - break; - case OPT_DISABLE_CLIENT_BYPASS: - options.disableClientBypass = true; - break; case OPT_API_VERSION: if (!processIntOption( args.OptionText(), args.OptionArg(), MIN_TESTABLE_API_VERSION, FDB_API_VERSION, options.apiVersion)) { @@ -207,12 +200,6 @@ bool processArg(const CSimpleOpt& args) { case OPT_TMP_DIR: options.tmpDir = args.OptionArg(); break; - case OPT_IGNORE_EXTERNAL_CLIENT_FAILURES: - options.ignoreExternalClientFailures = true; - break; - case OPT_FAIL_INCOMPATIBLE_CLIENT: - options.failIncompatibleClient = true; - break; case OPT_EXPECTED_ERROR: if (!processIntOption(args.OptionText(), args.OptionArg(), 0, 10000, options.expectedError)) { return false; @@ -221,6 +208,16 @@ bool processArg(const CSimpleOpt& args) { case OPT_PRINT_STATUS: options.printStatus = true; break; + + case OPT_NETWORK_OPTION: { + std::string optionName; + if (!extractPrefixedArgument("--network-option", args.OptionSyntax(), optionName)) { + fmt::print(stderr, "ERROR: unable to parse network option '{}'\n", args.OptionSyntax()); + return false; + } + options.networkOptions.emplace_back(optionName, args.OptionArg()); + break; + } } return true; } @@ -272,6 +269,12 @@ void fdb_check(fdb::Error e, std::string_view msg) { } } +std::string stringToUpper(const std::string& str) { + std::string outStr(str); + std::transform(outStr.begin(), outStr.end(), outStr.begin(), [](char c) { return std::toupper(c); }); + return outStr; +} + void applyNetworkOptions() { if (!options.tmpDir.empty() && options.apiVersion >= API_VERSION_CLIENT_TMP_DIR) { fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_CLIENT_TMP_DIR, options.tmpDir); @@ -283,20 +286,21 @@ void applyNetworkOptions() { if (!options.externalClientDir.empty()) { fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_EXTERNAL_CLIENT_DIRECTORY, options.externalClientDir); } - if (options.disableLocalClient) { - fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_DISABLE_LOCAL_CLIENT); - } if (options.trace) { fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_TRACE_ENABLE, options.traceDir); } - if (options.ignoreExternalClientFailures) { - fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_IGNORE_EXTERNAL_CLIENT_FAILURES); + + std::unordered_map networkOptionsByName; + for (auto const& [optionCode, optionInfo] : FDBNetworkOptions::optionInfo) { + networkOptionsByName[optionInfo.name] = static_cast(optionCode); } - if (options.failIncompatibleClient) { - fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_FAIL_INCOMPATIBLE_CLIENT); - } - if (options.disableClientBypass) { - fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_DISABLE_CLIENT_BYPASS); + + for (auto const& [optionName, optionVal] : options.networkOptions) { + auto iter = networkOptionsByName.find(stringToUpper(optionName)); + if (iter == networkOptionsByName.end()) { + fmt::print(stderr, "Unknown network option {}\n", optionName); + } + fdb::network::setOption(iter->second, optionVal); } } diff --git a/bindings/c/test/fdb_c_client_config_tests.py b/bindings/c/test/fdb_c_client_config_tests.py index d061d99c25..eed27710ab 100644 --- a/bindings/c/test/fdb_c_client_config_tests.py +++ b/bindings/c/test/fdb_c_client_config_tests.py @@ -8,6 +8,7 @@ import os import glob import unittest import json +import re from threading import Thread import time @@ -99,6 +100,9 @@ class ClientConfigTest: self.expected_error = None self.transaction_timeout = None self.print_status = False + self.trace_file_identifier = None + self.trace_initialize_on_setup = False + self.trace_format = None # ---------------------------- # Configuration methods @@ -208,6 +212,9 @@ class ClientConfigTest: self.tc.assertTrue("Healthy" in self.status_json) self.tc.assertEqual(expected_is_healthy, self.status_json["Healthy"]) + def list_trace_files(self): + return glob.glob(os.path.join(self.log_dir, "*")) + # ---------------------------- # Executing the test # ---------------------------- @@ -222,10 +229,10 @@ class ClientConfigTest: cmd_args += ["--log", "--log-dir", self.log_dir] if self.disable_local_client: - cmd_args += ["--disable-local-client"] + cmd_args += ["--network-option-disable_local_client", ""] if self.disable_client_bypass: - cmd_args += ["--disable-client-bypass"] + cmd_args += ["--network-option-disable_client_bypass", ""] if self.external_lib_path is not None: cmd_args += ["--external-client-library", self.external_lib_path] @@ -234,10 +241,19 @@ class ClientConfigTest: cmd_args += ["--external-client-dir", self.external_lib_dir] if self.ignore_external_client_failures: - cmd_args += ["--ignore-external-client-failures"] + cmd_args += ["--network-option-ignore_external_client_failures", ""] if self.fail_incompatible_client: - cmd_args += ["--fail-incompatible-client"] + cmd_args += ["--network-option-fail_incompatible_client", ""] + + if self.trace_file_identifier is not None: + cmd_args += ["--network-option-trace_file_identifier", self.trace_file_identifier] + + if self.trace_initialize_on_setup: + cmd_args += ["--network-option-trace_initialize_on_setup", ""] + + if self.trace_format is not None: + cmd_args += ["--network-option-trace_format", self.trace_format] if self.api_version is not None: cmd_args += ["--api-version", str(self.api_version)] @@ -252,26 +268,20 @@ class ClientConfigTest: cmd_args += ["--print-status"] print("\nExecuting test command: {}".format(" ".join([str(c) for c in cmd_args])), file=sys.stderr) - try: - tester_proc = subprocess.Popen(cmd_args, stdout=subprocess.PIPE, stderr=sys.stderr) - out, _ = tester_proc.communicate() - self.tc.assertEqual(0, tester_proc.returncode) - if self.print_status: - # Parse the output as status json - try: - self.status_json = json.loads(out) - except json.JSONDecodeError as e: - print("Error '{}' parsing output {}".format(e, out.decode()), file=sys.stderr) - self.tc.assertIsNotNone(self.status_json) - print("Status: ", self.status_json, file=sys.stderr) - else: - # Otherwise redirect the output to the console - print(out.decode(), file=sys.stderr) - finally: - self.cleanup() - - def cleanup(self): - shutil.rmtree(self.test_dir) + tester_proc = subprocess.Popen(cmd_args, stdout=subprocess.PIPE, stderr=sys.stderr) + out, _ = tester_proc.communicate() + self.tc.assertEqual(0, tester_proc.returncode) + if self.print_status: + # Parse the output as status json + try: + self.status_json = json.loads(out) + except json.JSONDecodeError as e: + print("Error '{}' parsing output {}".format(e, out.decode()), file=sys.stderr) + self.tc.assertIsNotNone(self.status_json) + print("Status: ", self.status_json, file=sys.stderr) + else: + # Otherwise redirect the output to the console + print(out.decode(), file=sys.stderr) class ClientConfigTests(unittest.TestCase): @@ -516,6 +526,171 @@ class ClientConfigSeparateCluster(unittest.TestCase): self.cluster.tear_down() +# Test client-side tracing +class ClientTracingTests(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.cluster = TestCluster(CURRENT_VERSION) + cls.cluster.setup() + + @classmethod + def tearDownClass(cls): + cls.cluster.tear_down() + + def test_default_config_normal_case(self): + # Test trace files created with a default trace configuration + # in a normal case + test = self.test + test.create_external_lib_dir([CURRENT_VERSION, PREV_RELEASE_VERSION]) + test.api_version = api_version_from_str(PREV_RELEASE_VERSION) + test.disable_local_client = True + + self.exec_test() + self.assertEqual(3, len(self.trace_files)) + primary_trace = self.find_trace_file(with_ip=True) + self.find_and_check_event(primary_trace, "ClientStart", ["Machine"], []) + cur_ver_trace = self.find_trace_file(with_ip=True, version=CURRENT_VERSION, thread_idx=0) + self.find_and_check_event(cur_ver_trace, "ClientStart", ["Machine"], []) + prev_ver_trace = self.find_trace_file(with_ip=True, version=PREV_RELEASE_VERSION, thread_idx=0) + self.find_and_check_event(prev_ver_trace, "ClientStart", ["Machine"], []) + + def test_default_config_error_case(self): + # Test that no trace files are created with a default configuration + # when an a client fails to initialize + test = self.test + test.create_external_lib_dir([CURRENT_VERSION, PREV_RELEASE_VERSION]) + test.api_version = api_version_from_str(CURRENT_VERSION) + test.disable_local_client = True + test.expected_error = 2204 # API function missing + + self.exec_test() + self.assertEqual(0, len(self.trace_files)) + + def test_init_on_setup_normal_case(self): + # Test trace files created with trace_initialize_on_setup option + # in a normal case + test = self.test + test.create_external_lib_dir([CURRENT_VERSION]) + test.api_version = api_version_from_str(CURRENT_VERSION) + test.disable_local_client = True + test.trace_initialize_on_setup = True + + self.exec_test() + self.assertEqual(2, len(self.trace_files)) + primary_trace = self.find_trace_file() + # The machine address will be available only in the second ClientStart event + self.find_and_check_event(primary_trace, "ClientStart", [], ["Machine"]) + self.find_and_check_event(primary_trace, "ClientStart", ["Machine"], [], seqno=1) + cur_ver_trace = self.find_trace_file(version=CURRENT_VERSION, thread_idx=0) + self.find_and_check_event(cur_ver_trace, "ClientStart", [], ["Machine"]) + self.find_and_check_event(cur_ver_trace, "ClientStart", ["Machine"], [], seqno=1) + + def test_init_on_setup_trace_error_case(self): + # Test trace files created with trace_initialize_on_setup option + # when an a client fails to initialize + test = self.test + test.create_external_lib_dir([CURRENT_VERSION, PREV_RELEASE_VERSION]) + test.api_version = api_version_from_str(CURRENT_VERSION) + test.disable_local_client = True + test.trace_initialize_on_setup = True + test.expected_error = 2204 # API function missing + + self.exec_test() + self.assertEqual(1, len(self.trace_files)) + primary_trace = self.find_trace_file() + self.find_and_check_event(primary_trace, "ClientStart", [], ["Machine"]) + + def test_trace_identifier(self): + # Test trace files created with file identifier + test = self.test + test.create_external_lib_dir([CURRENT_VERSION]) + test.api_version = api_version_from_str(CURRENT_VERSION) + test.disable_local_client = True + test.trace_file_identifier = "fdbclient" + + self.exec_test() + self.assertEqual(2, len(self.trace_files)) + self.find_trace_file(with_ip=True, identifier="fdbclient") + self.find_trace_file(with_ip=True, identifier="fdbclient", version=CURRENT_VERSION, thread_idx=0) + + def test_init_on_setup_and_trace_identifier(self): + # Test trace files created with trace_initialize_on_setup option + # and file identifier + test = self.test + test.create_external_lib_dir([CURRENT_VERSION]) + test.api_version = api_version_from_str(CURRENT_VERSION) + test.disable_local_client = True + test.trace_initialize_on_setup = True + test.trace_file_identifier = "fdbclient" + + self.exec_test() + self.assertEqual(2, len(self.trace_files)) + self.find_trace_file(identifier="fdbclient") + self.find_trace_file(identifier="fdbclient", version=CURRENT_VERSION, thread_idx=0) + + # --------------- + # Helper methods + # --------------- + + def setUp(self): + self.test = ClientConfigTest(self) + self.trace_files = None + self.test.trace_format = "json" + + def exec_test(self): + self.test.exec() + self.trace_files = self.test.list_trace_files() + if self.test.trace_format == "json": + self.load_trace_file_events() + + def load_trace_file_events(self): + self.trace_file_events = {} + for trace in self.trace_files: + events = [] + with open(trace, "r") as f: + for line in f: + events.append(json.loads(line)) + self.trace_file_events[trace] = events + + def find_trace_file(self, with_ip=False, identifier=None, version=None, thread_idx=None): + self.assertIsNotNone(self.trace_files) + for trace_file in self.trace_files: + name = os.path.basename(trace_file) + # trace prefix must be in all files + self.assertTrue(name.startswith("trace.")) + pattern = "^trace\." + if with_ip: + pattern += "127\.0\.0\.1\." + else: + pattern += "0\.0\.0\.0\." + if identifier is not None: + pattern += identifier + else: + pattern += "\d+" + if version is not None: + pattern += "_v{}".format(version.replace(".", "_")) + if thread_idx is not None: + pattern += "t{}".format(thread_idx) + pattern += "\.\d+\.\w+\.\d+\.\d+\.{}$".format(self.test.trace_format) + if re.match(pattern, name): + return trace_file + self.fail("No maching trace file found") + + def find_and_check_event(self, trace_file, event_type, attr_present, attr_missing, seqno=0): + self.assertTrue(trace_file in self.trace_file_events) + for event in self.trace_file_events[trace_file]: + if event["Type"] == event_type: + if seqno > 0: + seqno -= 1 + continue + for attr in attr_present: + self.assertTrue(attr in event) + for attr in attr_missing: + self.assertFalse(attr in event) + return + self.fail("No matching event found") + + if __name__ == "__main__": parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, diff --git a/bindings/c/test/fdb_c_shim_tests.py b/bindings/c/test/fdb_c_shim_tests.py index 1f9def4a3c..e6e29123f9 100644 --- a/bindings/c/test/fdb_c_shim_tests.py +++ b/bindings/c/test/fdb_c_shim_tests.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 from argparse import ArgumentParser, RawDescriptionHelpFormatter from pathlib import Path -import platform import shutil import subprocess import sys @@ -53,7 +52,7 @@ class TestEnv(LocalCluster): self.downloader.binary_path(version, "fdbcli"), 1, ) - self.set_env_var("LD_LIBRARY_PATH", self.downloader.lib_dir(version)) + self.set_env_var("LD_LIBRARY_PATH", "%s:%s" % (self.downloader.lib_dir(version), os.getenv("LD_LIBRARY_PATH"))) client_lib = self.downloader.lib_path(version) assert client_lib.exists(), "{} does not exist".format(client_lib) self.client_lib_external = self.tmp_dir.joinpath("libfdb_c_external.so") @@ -91,9 +90,8 @@ class FdbCShimTests: self.api_test_dir = Path(args.api_test_dir).resolve() assert self.api_test_dir.exists(), "{} does not exist".format(self.api_test_dir) self.downloader = FdbBinaryDownloader(args.build_dir) - # binary downloads are currently available only for x86_64 - self.platform = platform.machine() - if self.platform == "x86_64": + self.test_prev_versions = not args.disable_prev_version_tests + if self.test_prev_versions: self.downloader.download_old_binaries(PREV_RELEASE_VERSION) self.downloader.download_old_binaries("7.0.0") @@ -182,7 +180,8 @@ class FdbCShimTests: if use_external_lib: cmd_args = cmd_args + ["--disable-local-client", "--external-client-library", test_env.client_lib_external] env_vars = os.environ.copy() - env_vars["LD_LIBRARY_PATH"] = self.downloader.lib_dir(version) if set_ld_lib_path else "" + if set_ld_lib_path: + env_vars["LD_LIBRARY_PATH"] = "%s:%s" % (self.downloader.lib_dir(version), os.getenv("LD_LIBRARY_PATH")) if set_env_path: env_vars["FDB_LOCAL_CLIENT_LIBRARY_PATH"] = ( "dummy" if invalid_lib_path else self.downloader.lib_path(version) @@ -230,8 +229,7 @@ class FdbCShimTests: # Test calling a function that exists in the loaded library, but not for the selected API version self.run_c_shim_lib_tester(CURRENT_VERSION, test_env, call_set_path=True, api_version=700) - # binary downloads are currently available only for x86_64 - if self.platform == "x86_64": + if self.test_prev_versions: # Test the API workload with the release version self.run_c_api_test(PREV_RELEASE_VERSION, DEFAULT_TEST_FILE) @@ -283,6 +281,12 @@ if __name__ == "__main__": parser.add_argument( "--api-test-dir", type=str, help="Path to a directory with api test definitions.", required=True ) + parser.add_argument( + "--disable-prev-version-tests", + action="store_true", + default=False, + help="Disable tests that need binaries of previous versions", + ) args = parser.parse_args() test = FdbCShimTests(args) test.run_tests() diff --git a/bindings/flow/fdb_flow.actor.cpp b/bindings/flow/fdb_flow.actor.cpp index d6e1431c77..7d2ca93e66 100644 --- a/bindings/flow/fdb_flow.actor.cpp +++ b/bindings/flow/fdb_flow.actor.cpp @@ -87,7 +87,7 @@ void fdb_flow_test() { g_network = newNet2(TLSConfig()); - openTraceFile(NetworkAddress(), 1000000, 1000000, "."); + openTraceFile({}, 1000000, 1000000, "."); systemMonitor(); uncancellable(recurring(&systemMonitor, 5.0, TaskPriority::FlushTrace)); diff --git a/bindings/go/src/fdb/generated.go b/bindings/go/src/fdb/generated.go index 2947abe42e..9660884887 100644 --- a/bindings/go/src/fdb/generated.go +++ b/bindings/go/src/fdb/generated.go @@ -107,6 +107,11 @@ func (o NetworkOptions) SetTraceShareAmongClientThreads() error { return o.setOpt(37, nil) } +// Initialize trace files on network setup, determine the local IP later. Otherwise tracing is initialized when opening the first database. +func (o NetworkOptions) SetTraceInitializeOnSetup() error { + return o.setOpt(38, nil) +} + // Set file suffix for partially written log files. // // Parameter: Append this suffix to partially written log files. When a log file is complete, it is renamed to remove the suffix. No separator is added between the file and the suffix. If you want to add a file extension, you should include the separator - e.g. '.tmp' instead of 'tmp' to add the 'tmp' extension. diff --git a/contrib/Implib.so/README.md b/contrib/Implib.so/README.md index 2ced58b3e4..16c00241db 100644 --- a/contrib/Implib.so/README.md +++ b/contrib/Implib.so/README.md @@ -46,27 +46,17 @@ where `TARGET` can be any of * aarch64-linux-gnu, aarch64-none-linux-android * e2k-linux-gnu -Script generates two files: `libxyz.so.tramp.S` and `libxyz.so.init.c` which need to be linked to your application (instead of `-lxyz`): +Script generates two files: `libxyz.so.tramp.S` and `libxyz.so.init.cpp` which need to be linked to your application (instead of `-lxyz`): ``` -$ gcc myfile1.c myfile2.c ... libxyz.so.tramp.S libxyz.so.init.c ... -ldl +$ gcc myfile1.c myfile2.c ... libxyz.so.tramp.S libxyz.so.init.cpp ... -ldl ``` Note that you need to link against libdl.so. On ARM in case your app is compiled to Thumb code (which e.g. Ubuntu's `arm-linux-gnueabihf-gcc` does by default) you'll also need to add `-mthumb-interwork`. Application can then freely call functions from `libxyz.so` _without linking to it_. Library will be loaded (via `dlopen`) on first call to any of its functions. If you want to forcedly resolve all symbols (e.g. if you want to avoid delays further on) you can call `void libxyz_init_all()`. -Above command would perform a _lazy load_ i.e. load library on first call to one of it's symbols. If you want to load it at startup, run - -``` -$ implib-gen.py --no-lazy-load libxyz.so -``` - -If you don't want `dlopen` to be called automatically and prefer to load library yourself at program startup, run script as - -``` -$ implib-gen.py --no-dlopen libxys.so -``` +Above command would perform a _lazy load_ i.e. load library on first call to one of it's symbols. If you do want to load library via `dlopen` but would prefer to call it yourself (e.g. with custom parameters or with modified library name), run script as @@ -100,10 +90,6 @@ $ implib-gen.py --dlopen-callback=mycallback libxyz.so (callback must have signature `void *(*)(const char *lib_name)` and return handle of loaded library). -Finally to force library load and resolution of all symbols, call - - void _LIBNAME_tramp_resolve_all(void); - # Wrapping vtables By default the tool does not try to wrap vtables exported from the library. This can be enabled via `--vtables` flag: @@ -141,7 +127,7 @@ void *mycallback(const char *lib_name) { } $ implib-gen.py --dlopen-callback=mycallback --symbol-list=mysymbols.txt libxyz.so -$ ... # Link your app with libxyz.tramp.S, libxyz.init.c and mycallback.c +$ ... # Link your app with libxyz.tramp.S, libxyz.init.cpp and mycallback.c ``` Similar approach can be used if you want to provide a common interface for several libraries with partially intersecting interfaces (see [this example](tests/multilib/run.sh) for more details). @@ -156,7 +142,7 @@ To achieve this you can generate a wrapper with _renamed_ symbols which call to $ cat mycallback.c ... Same as before ... $ implib-gen.py --dlopen-callback=mycallback --symbol_prefix=MYPREFIX_ libxyz.so -$ ... # Link your app with libxyz.tramp.S, libxyz.init.c and mycallback.c +$ ... # Link your app with libxyz.tramp.S, libxyz.init.cpp and mycallback.c ``` # Linker wrapper diff --git a/contrib/Implib.so/arch/common/init.c.tpl b/contrib/Implib.so/arch/common/init.cpp.tpl similarity index 64% rename from contrib/Implib.so/arch/common/init.c.tpl rename to contrib/Implib.so/arch/common/init.cpp.tpl index d8b9b3bbde..9785129165 100644 --- a/contrib/Implib.so/arch/common/init.c.tpl +++ b/contrib/Implib.so/arch/common/init.cpp.tpl @@ -11,6 +11,7 @@ #include #include #include +#include // Sanity check for ARM to avoid puzzling runtime crashes #ifdef __arm__ @@ -31,22 +32,15 @@ extern "C" { } while(0) #define CALL_USER_CALLBACK $has_dlopen_callback -#define NO_DLOPEN $no_dlopen -#define LAZY_LOAD $lazy_load static void *lib_handle; -static int is_lib_loading; static void *load_library() { if(lib_handle) return lib_handle; - is_lib_loading = 1; - // TODO: dlopen and users callback must be protected w/ critical section (to avoid dlopening lib twice) -#if NO_DLOPEN - CHECK(0, "internal error"); // We shouldn't get here -#elif CALL_USER_CALLBACK +#if CALL_USER_CALLBACK extern void *$dlopen_callback(const char *lib_name); lib_handle = $dlopen_callback("$load_name"); CHECK(lib_handle, "callback '$dlopen_callback' failed to load library"); @@ -55,17 +49,9 @@ static void *load_library() { CHECK(lib_handle, "failed to load library: %s", dlerror()); #endif - is_lib_loading = 0; - return lib_handle; } -#if ! NO_DLOPEN && ! LAZY_LOAD -static void __attribute__((constructor)) load_lib() { - load_library(); -} -#endif - static void __attribute__((destructor)) unload_lib() { if(lib_handle) dlclose(lib_handle); @@ -79,34 +65,35 @@ static const char *const sym_names[] = { extern void *_${lib_suffix}_tramp_table[]; -// Can be sped up by manually parsing library symtab... -void _${lib_suffix}_tramp_resolve(int i) { - assert((unsigned)i + 1 < sizeof(sym_names) / sizeof(sym_names[0])); +// Load library and resolve all symbols +static void load_and_resolve(void) { + static std::mutex load_mutex; + static int is_loaded = false; - CHECK(!is_lib_loading, "library function '%s' called during library load", sym_names[i]); + std::unique_lock lock(load_mutex); + if (is_loaded) + return; void *h = 0; -#if NO_DLOPEN - // FIXME: instead of RTLD_NEXT we should search for loaded lib_handle - // as in https://github.com/jethrogb/ssltrace/blob/bf17c150a7/ssltrace.cpp#L74-L112 - h = RTLD_NEXT; -#elif LAZY_LOAD h = load_library(); -#else - h = lib_handle; - CHECK(h, "failed to resolve symbol '%s', library failed to load", sym_names[i]); -#endif - // Dlsym is thread-safe so don't need to protect it. - _${lib_suffix}_tramp_table[i] = dlsym(h, sym_names[i]); - CHECK(_${lib_suffix}_tramp_table[i], "failed to resolve symbol '%s'", sym_names[i]); -} - -// Helper for user to resolve all symbols -void _${lib_suffix}_tramp_resolve_all(void) { size_t i; for(i = 0; i + 1 < sizeof(sym_names) / sizeof(sym_names[0]); ++i) - _${lib_suffix}_tramp_resolve(i); + // Resolving some of the symbols may fail. We ignore it, because if we are loading + // a library of an older version it may lack certain functions + _${lib_suffix}_tramp_table[i] = dlsym(h, sym_names[i]); + + is_loaded = true; +} + +// The function is called if the table entry for the symbol is not set. +// In that case we load the library and try to resolve all symbols if that was not done yet. +// If the table entry is still missing, then the symbol is not available in the loaded library, +// which is a fatal error on which we immediately exit the process. +void _${lib_suffix}_tramp_resolve(int i) { + assert((unsigned)i + 1 < sizeof(sym_names) / sizeof(sym_names[0])); + load_and_resolve(); + CHECK(_${lib_suffix}_tramp_table[i], "failed to resolve symbol '%s'", sym_names[i]); } #ifdef __cplusplus diff --git a/contrib/Implib.so/implib-gen.py b/contrib/Implib.so/implib-gen.py index 5e9c756643..67aadaf338 100755 --- a/contrib/Implib.so/implib-gen.py +++ b/contrib/Implib.so/implib-gen.py @@ -22,532 +22,530 @@ import configparser me = os.path.basename(__file__) root = os.path.dirname(__file__) + def warn(msg): - """Emits a nicely-decorated warning.""" - sys.stderr.write(f'{me}: warning: {msg}\n') + """Emits a nicely-decorated warning.""" + sys.stderr.write(f"{me}: warning: {msg}\n") + def error(msg): - """Emits a nicely-decorated error and exits.""" - sys.stderr.write(f'{me}: error: {msg}\n') - sys.exit(1) + """Emits a nicely-decorated error and exits.""" + sys.stderr.write(f"{me}: error: {msg}\n") + sys.exit(1) + + +def run(args, stdin=""): + """Runs external program and aborts on error.""" + env = os.environ.copy() + # Force English language + env["LC_ALL"] = "c" + try: + del env["LANG"] + except KeyError: + pass + with subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env) as p: + out, err = p.communicate(input=stdin.encode("utf-8")) + out = out.decode("utf-8") + err = err.decode("utf-8") + if p.returncode != 0 or err: + error(f"{args[0]} failed with retcode {p.returncode}:\n{err}") + return out, err -def run(args, stdin=''): - """Runs external program and aborts on error.""" - env = os.environ.copy() - # Force English language - env['LC_ALL'] = 'c' - try: - del env["LANG"] - except KeyError: - pass - with subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, env=env) as p: - out, err = p.communicate(input=stdin.encode('utf-8')) - out = out.decode('utf-8') - err = err.decode('utf-8') - if p.returncode != 0 or err: - error(f"{args[0]} failed with retcode {p.returncode}:\n{err}") - return out, err def make_toc(words, renames=None): - "Make an mapping of words to their indices in list" - renames = renames or {} - toc = {} - for i, n in enumerate(words): - name = renames.get(n, n) - toc[i] = name - return toc + "Make an mapping of words to their indices in list" + renames = renames or {} + toc = {} + for i, n in enumerate(words): + name = renames.get(n, n) + toc[i] = name + return toc + def parse_row(words, toc, hex_keys): - "Make a mapping from column names to values" - vals = {k: (words[i] if i < len(words) else '') for i, k in toc.items()} - for k in hex_keys: - if vals[k]: - vals[k] = int(vals[k], 16) - return vals + "Make a mapping from column names to values" + vals = {k: (words[i] if i < len(words) else "") for i, k in toc.items()} + for k in hex_keys: + if vals[k]: + vals[k] = int(vals[k], 16) + return vals + def collect_syms(f): - """Collect ELF dynamic symtab.""" + """Collect ELF dynamic symtab.""" - # --dyn-syms does not always work for some reason so dump all symtabs - out, _ = run(['readelf', '-sW', f]) + # --dyn-syms does not always work for some reason so dump all symtabs + out, _ = run(["readelf", "-sW", f]) - toc = None - syms = [] - syms_set = set() - for line in out.splitlines(): - line = line.strip() - if not line: - # Next symtab - toc = None - continue - words = re.split(r' +', line) - if line.startswith('Num'): # Header? - if toc is not None: - error("multiple headers in output of readelf") - # Colons are different across readelf versions so get rid of them. - toc = make_toc(map(lambda n: n.replace(':', ''), words)) - elif toc is not None: - sym = parse_row(words, toc, ['Value']) - name = sym['Name'] - if name in syms_set: - continue - syms_set.add(name) - sym['Size'] = int(sym['Size'], 0) # Readelf is inconistent on Size format - if '@' in name: - sym['Default'] = '@@' in name - name, ver = re.split(r'@+', name) - sym['Name'] = name - sym['Version'] = ver - else: - sym['Default'] = True - sym['Version'] = None - syms.append(sym) + toc = None + syms = [] + syms_set = set() + for line in out.splitlines(): + line = line.strip() + if not line: + # Next symtab + toc = None + continue + words = re.split(r" +", line) + if line.startswith("Num"): # Header? + if toc is not None: + error("multiple headers in output of readelf") + # Colons are different across readelf versions so get rid of them. + toc = make_toc(map(lambda n: n.replace(":", ""), words)) + elif toc is not None: + sym = parse_row(words, toc, ["Value"]) + name = sym["Name"] + if name in syms_set: + continue + syms_set.add(name) + sym["Size"] = int(sym["Size"], 0) # Readelf is inconistent on Size format + if "@" in name: + sym["Default"] = "@@" in name + name, ver = re.split(r"@+", name) + sym["Name"] = name + sym["Version"] = ver + else: + sym["Default"] = True + sym["Version"] = None + syms.append(sym) - if toc is None: - error(f"failed to analyze symbols in {f}") + if toc is None: + error(f"failed to analyze symbols in {f}") - # Also collected demangled names - if syms: - out, _ = run(['c++filt'], '\n'.join((sym['Name'] for sym in syms))) - for i, name in enumerate(out.split("\n")): - syms[i]['Demangled Name'] = name + # Also collected demangled names + if syms: + out, _ = run(["c++filt"], "\n".join((sym["Name"] for sym in syms))) + for i, name in enumerate(out.split("\n")): + syms[i]["Demangled Name"] = name + + return syms - return syms def collect_relocs(f): - """Collect ELF dynamic relocs.""" + """Collect ELF dynamic relocs.""" - out, _ = run(['readelf', '-rW', f]) + out, _ = run(["readelf", "-rW", f]) - toc = None - rels = [] - for line in out.splitlines(): - line = line.strip() - if not line: - toc = None - continue - if line == 'There are no relocations in this file.': - return [] - if re.match(r'^\s*Offset', line): # Header? - if toc is not None: - error("multiple headers in output of readelf") - words = re.split(r'\s\s+', line) # "Symbol's Name + Addend" - toc = make_toc(words) - elif toc is not None: - line = re.sub(r' \+ ', '+', line) - words = re.split(r'\s+', line) - rel = parse_row(words, toc, ['Offset', 'Info']) - rels.append(rel) - # Split symbolic representation - sym_name = 'Symbol\'s Name + Addend' - if sym_name not in rel and 'Symbol\'s Name' in rel: - # Adapt to different versions of readelf - rel[sym_name] = rel['Symbol\'s Name'] + '+0' - if rel[sym_name]: - p = rel[sym_name].split('+') - if len(p) == 1: - p = ['', p[0]] - rel[sym_name] = (p[0], int(p[1], 16)) + toc = None + rels = [] + for line in out.splitlines(): + line = line.strip() + if not line: + toc = None + continue + if line == "There are no relocations in this file.": + return [] + if re.match(r"^\s*Offset", line): # Header? + if toc is not None: + error("multiple headers in output of readelf") + words = re.split(r"\s\s+", line) # "Symbol's Name + Addend" + toc = make_toc(words) + elif toc is not None: + line = re.sub(r" \+ ", "+", line) + words = re.split(r"\s+", line) + rel = parse_row(words, toc, ["Offset", "Info"]) + rels.append(rel) + # Split symbolic representation + sym_name = "Symbol's Name + Addend" + if sym_name not in rel and "Symbol's Name" in rel: + # Adapt to different versions of readelf + rel[sym_name] = rel["Symbol's Name"] + "+0" + if rel[sym_name]: + p = rel[sym_name].split("+") + if len(p) == 1: + p = ["", p[0]] + rel[sym_name] = (p[0], int(p[1], 16)) - if toc is None: - error(f"failed to analyze relocations in {f}") + if toc is None: + error(f"failed to analyze relocations in {f}") + + return rels - return rels def collect_sections(f): - """Collect section info from ELF.""" + """Collect section info from ELF.""" - out, _ = run(['readelf', '-SW', f]) + out, _ = run(["readelf", "-SW", f]) - toc = None - sections = [] - for line in out.splitlines(): - line = line.strip() - if not line: - continue - line = re.sub(r'\[\s+', '[', line) - words = re.split(r' +', line) - if line.startswith('[Nr]'): # Header? - if toc is not None: - error("multiple headers in output of readelf") - toc = make_toc(words, {'Addr' : 'Address'}) - elif line.startswith('[') and toc is not None: - sec = parse_row(words, toc, ['Address', 'Off', 'Size']) - if 'A' in sec['Flg']: # Allocatable section? - sections.append(sec) + toc = None + sections = [] + for line in out.splitlines(): + line = line.strip() + if not line: + continue + line = re.sub(r"\[\s+", "[", line) + words = re.split(r" +", line) + if line.startswith("[Nr]"): # Header? + if toc is not None: + error("multiple headers in output of readelf") + toc = make_toc(words, {"Addr": "Address"}) + elif line.startswith("[") and toc is not None: + sec = parse_row(words, toc, ["Address", "Off", "Size"]) + if "A" in sec["Flg"]: # Allocatable section? + sections.append(sec) - if toc is None: - error(f"failed to analyze sections in {f}") + if toc is None: + error(f"failed to analyze sections in {f}") + + return sections - return sections def read_unrelocated_data(input_name, syms, secs): - """Collect unrelocated data from ELF.""" - data = {} - with open(input_name, 'rb') as f: - def is_symbol_in_section(sym, sec): - sec_end = sec['Address'] + sec['Size'] - is_start_in_section = sec['Address'] <= sym['Value'] < sec_end - is_end_in_section = sym['Value'] + sym['Size'] <= sec_end - return is_start_in_section and is_end_in_section - for name, s in sorted(syms.items(), key=lambda s: s[1]['Value']): - # TODO: binary search (bisect) - sec = [sec for sec in secs if is_symbol_in_section(s, sec)] - if len(sec) != 1: - error(f"failed to locate section for interval [{s['Value']:x}, {s['Value'] + s['Size']:x})") - sec = sec[0] - f.seek(sec['Off']) - data[name] = f.read(s['Size']) - return data + """Collect unrelocated data from ELF.""" + data = {} + with open(input_name, "rb") as f: + + def is_symbol_in_section(sym, sec): + sec_end = sec["Address"] + sec["Size"] + is_start_in_section = sec["Address"] <= sym["Value"] < sec_end + is_end_in_section = sym["Value"] + sym["Size"] <= sec_end + return is_start_in_section and is_end_in_section + + for name, s in sorted(syms.items(), key=lambda s: s[1]["Value"]): + # TODO: binary search (bisect) + sec = [sec for sec in secs if is_symbol_in_section(s, sec)] + if len(sec) != 1: + error(f"failed to locate section for interval [{s['Value']:x}, {s['Value'] + s['Size']:x})") + sec = sec[0] + f.seek(sec["Off"]) + data[name] = f.read(s["Size"]) + return data + def collect_relocated_data(syms, bites, rels, ptr_size, reloc_types): - """Identify relocations for each symbol""" - data = {} - for name, s in sorted(syms.items()): - b = bites.get(name) - assert b is not None - if s['Demangled Name'].startswith('typeinfo name'): - data[name] = [('byte', int(x)) for x in b] - continue - data[name] = [] - for i in range(0, len(b), ptr_size): - val = int.from_bytes(b[i*ptr_size:(i + 1)*ptr_size], byteorder='little') - data[name].append(('offset', val)) - start = s['Value'] - finish = start + s['Size'] - # TODO: binary search (bisect) - for rel in rels: - if rel['Type'] in reloc_types and start <= rel['Offset'] < finish: - i = (rel['Offset'] - start) // ptr_size - assert i < len(data[name]) - data[name][i] = 'reloc', rel - return data + """Identify relocations for each symbol""" + data = {} + for name, s in sorted(syms.items()): + b = bites.get(name) + assert b is not None + if s["Demangled Name"].startswith("typeinfo name"): + data[name] = [("byte", int(x)) for x in b] + continue + data[name] = [] + for i in range(0, len(b), ptr_size): + val = int.from_bytes(b[i * ptr_size : (i + 1) * ptr_size], byteorder="little") + data[name].append(("offset", val)) + start = s["Value"] + finish = start + s["Size"] + # TODO: binary search (bisect) + for rel in rels: + if rel["Type"] in reloc_types and start <= rel["Offset"] < finish: + i = (rel["Offset"] - start) // ptr_size + assert i < len(data[name]) + data[name][i] = "reloc", rel + return data + def generate_vtables(cls_tables, cls_syms, cls_data): - """Generate code for vtables""" - c_types = { - 'reloc' : 'const void *', - 'byte' : 'unsigned char', - 'offset' : 'size_t' - } + """Generate code for vtables""" + c_types = {"reloc": "const void *", "byte": "unsigned char", "offset": "size_t"} - ss = [] - ss.append('''\ + ss = [] + ss.append( + """\ #ifdef __cplusplus extern "C" { #endif -''') +""" + ) - # Print externs + # Print externs - printed = set() - for name, data in sorted(cls_data.items()): - for typ, val in data: - if typ != 'reloc': - continue - sym_name, addend = val['Symbol\'s Name + Addend'] - sym_name = re.sub(r'@.*', '', sym_name) # Can we pin version in C? - if sym_name not in cls_syms and sym_name not in printed: - ss.append(f'''\ + printed = set() + for name, data in sorted(cls_data.items()): + for typ, val in data: + if typ != "reloc": + continue + sym_name, addend = val["Symbol's Name + Addend"] + sym_name = re.sub(r"@.*", "", sym_name) # Can we pin version in C? + if sym_name not in cls_syms and sym_name not in printed: + ss.append( + f"""\ extern const char {sym_name}[]; -''') +""" + ) - # Collect variable infos + # Collect variable infos - code_info = {} + code_info = {} - for name, s in sorted(cls_syms.items()): - data = cls_data[name] - if s['Demangled Name'].startswith('typeinfo name'): - declarator = 'const unsigned char %s[]' - else: - field_types = (f'{c_types[typ]} field_{i};' for i, (typ, _) in enumerate(data)) - declarator = 'const struct { %s } %%s' % ' '.join(field_types) # pylint: disable=C0209 # consider-using-f-string - vals = [] - for typ, val in data: - if typ != 'reloc': - vals.append(str(val) + 'UL') - else: - sym_name, addend = val['Symbol\'s Name + Addend'] - sym_name = re.sub(r'@.*', '', sym_name) # Can we pin version in C? - vals.append(f'(const char *)&{sym_name} + {addend}') - code_info[name] = (declarator, '{ %s }' % ', '.join(vals)) # pylint: disable= C0209 # consider-using-f-string + for name, s in sorted(cls_syms.items()): + data = cls_data[name] + if s["Demangled Name"].startswith("typeinfo name"): + declarator = "const unsigned char %s[]" + else: + field_types = (f"{c_types[typ]} field_{i};" for i, (typ, _) in enumerate(data)) + declarator = "const struct { %s } %%s" % " ".join( + field_types + ) # pylint: disable=C0209 # consider-using-f-string + vals = [] + for typ, val in data: + if typ != "reloc": + vals.append(str(val) + "UL") + else: + sym_name, addend = val["Symbol's Name + Addend"] + sym_name = re.sub(r"@.*", "", sym_name) # Can we pin version in C? + vals.append(f"(const char *)&{sym_name} + {addend}") + code_info[name] = (declarator, "{ %s }" % ", ".join(vals)) # pylint: disable= C0209 # consider-using-f-string - # Print declarations + # Print declarations - for name, (decl, _) in sorted(code_info.items()): - type_name = name + '_type' - type_decl = decl % type_name - ss.append(f'''\ + for name, (decl, _) in sorted(code_info.items()): + type_name = name + "_type" + type_decl = decl % type_name + ss.append( + f"""\ typedef {type_decl}; extern __attribute__((weak)) {type_name} {name}; -''') +""" + ) - # Print definitions + # Print definitions - for name, (_, init) in sorted(code_info.items()): - type_name = name + '_type' - ss.append(f'''\ + for name, (_, init) in sorted(code_info.items()): + type_name = name + "_type" + ss.append( + f"""\ const {type_name} {name} = {init}; -''') +""" + ) - ss.append('''\ + ss.append( + """\ #ifdef __cplusplus } // extern "C" #endif -''') +""" + ) + + return "".join(ss) - return ''.join(ss) def main(): - """Driver function""" - parser = argparse.ArgumentParser(description="Generate wrappers for shared library functions.", - formatter_class=argparse.RawDescriptionHelpFormatter, - epilog=f"""\ + """Driver function""" + parser = argparse.ArgumentParser( + description="Generate wrappers for shared library functions.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=f"""\ Examples: $ python3 {me} /usr/lib/x86_64-linux-gnu/libaccountsservice.so.0 Generating libaccountsservice.so.0.tramp.S... - Generating libaccountsservice.so.0.init.c... -""") + Generating libaccountsservice.so.0.init.cpp... +""", + ) - parser.add_argument('library', - metavar='LIB', - help="Library to be wrapped.") - parser.add_argument('--verbose', '-v', - help="Print diagnostic info", - action='count', - default=0) - parser.add_argument('--dlopen-callback', - help="Call user-provided custom callback to load library instead of dlopen", - default='') - parser.add_argument('--dlopen', - help="Emit dlopen call (default)", - dest='dlopen', action='store_true', default=True) - parser.add_argument('--no-dlopen', - help="Do not emit dlopen call (user must load library himself)", - dest='dlopen', action='store_false') - parser.add_argument('--library-load-name', - help="Use custom name for dlopened library (default is LIB)") - parser.add_argument('--lazy-load', - help="Load library lazily on first call to one of it's functions (default)", - dest='lazy_load', action='store_true', default=True) - parser.add_argument('--no-lazy-load', - help="Load library eagerly at program start", - dest='lazy_load', action='store_false') - parser.add_argument('--vtables', - help="Intercept virtual tables (EXPERIMENTAL)", - dest='vtables', action='store_true', default=False) - parser.add_argument('--no-vtables', - help="Do not intercept virtual tables (default)", - dest='vtables', action='store_false') - parser.add_argument('--target', - help="Target platform triple e.g. x86_64-unknown-linux-gnu or arm-none-eabi " - "(atm x86_64, i[0-9]86, arm/armhf/armeabi, aarch64/armv8 " - "and e2k are supported)", - default=os.uname()[-1]) - parser.add_argument('--symbol-list', - help="Path to file with symbols that should be present in wrapper " - "(all by default)") - parser.add_argument('--symbol-prefix', - metavar='PFX', - help="Prefix wrapper symbols with PFX", - default='') - parser.add_argument('-q', '--quiet', - help="Do not print progress info", - action='store_true') - parser.add_argument('--outdir', '-o', - help="Path to create wrapper at", - default='./') + parser.add_argument("library", metavar="LIB", help="Library to be wrapped.") + parser.add_argument("--verbose", "-v", help="Print diagnostic info", action="count", default=0) + parser.add_argument( + "--dlopen-callback", help="Call user-provided custom callback to load library instead of dlopen", default="" + ) + parser.add_argument("--library-load-name", help="Use custom name for dlopened library (default is LIB)") + parser.add_argument( + "--vtables", help="Intercept virtual tables (EXPERIMENTAL)", dest="vtables", action="store_true", default=False + ) + parser.add_argument( + "--no-vtables", help="Do not intercept virtual tables (default)", dest="vtables", action="store_false" + ) + parser.add_argument( + "--target", + help="Target platform triple e.g. x86_64-unknown-linux-gnu or arm-none-eabi " + "(atm x86_64, i[0-9]86, arm/armhf/armeabi, aarch64/armv8 " + "and e2k are supported)", + default=os.uname()[-1], + ) + parser.add_argument( + "--symbol-list", help="Path to file with symbols that should be present in wrapper " "(all by default)" + ) + parser.add_argument("--symbol-prefix", metavar="PFX", help="Prefix wrapper symbols with PFX", default="") + parser.add_argument("-q", "--quiet", help="Do not print progress info", action="store_true") + parser.add_argument("--outdir", "-o", help="Path to create wrapper at", default="./") - args = parser.parse_args() + args = parser.parse_args() - input_name = args.library - verbose = args.verbose - dlopen_callback = args.dlopen_callback - dlopen = args.dlopen - lazy_load = args.lazy_load - load_name = args.library_load_name or os.path.basename(input_name) - if args.target.startswith('arm'): - target = 'arm' # Handle armhf-..., armel-... - elif re.match(r'^i[0-9]86', args.target): - target = 'i386' - else: - target = args.target.split('-')[0] - quiet = args.quiet - outdir = args.outdir + input_name = args.library + verbose = args.verbose + dlopen_callback = args.dlopen_callback + load_name = args.library_load_name or os.path.basename(input_name) + if args.target.startswith("arm"): + target = "arm" # Handle armhf-..., armel-... + elif re.match(r"^i[0-9]86", args.target): + target = "i386" + else: + target = args.target.split("-")[0] + quiet = args.quiet + outdir = args.outdir - if args.symbol_list is None: - funs = None - else: - with open(args.symbol_list, 'r') as f: - funs = [] - for line in re.split(r'\r?\n', f.read()): - line = re.sub(r'#.*', '', line) - line = line.strip() - if line: - funs.append(line) + if args.symbol_list is None: + funs = None + else: + with open(args.symbol_list, "r") as f: + funs = [] + for line in re.split(r"\r?\n", f.read()): + line = re.sub(r"#.*", "", line) + line = line.strip() + if line: + funs.append(line) - # Collect target info + # Collect target info - target_dir = os.path.join(root, 'arch', target) + target_dir = os.path.join(root, "arch", target) - if not os.path.exists(target_dir): - error(f"unknown architecture '{target}'") + if not os.path.exists(target_dir): + error(f"unknown architecture '{target}'") - cfg = configparser.ConfigParser(inline_comment_prefixes=';') - cfg.read(target_dir + '/config.ini') + cfg = configparser.ConfigParser(inline_comment_prefixes=";") + cfg.read(target_dir + "/config.ini") - ptr_size = int(cfg['Arch']['PointerSize']) - symbol_reloc_types = set(re.split(r'\s*,\s*', cfg['Arch']['SymbolReloc'])) + ptr_size = int(cfg["Arch"]["PointerSize"]) + symbol_reloc_types = set(re.split(r"\s*,\s*", cfg["Arch"]["SymbolReloc"])) - def is_exported(s): - return (s['Bind'] != 'LOCAL' - and s['Type'] != 'NOTYPE' - and s['Ndx'] != 'UND' - and s['Name'] not in ['', '_init', '_fini']) + def is_exported(s): + return ( + s["Bind"] != "LOCAL" + and s["Type"] != "NOTYPE" + and s["Ndx"] != "UND" + and s["Name"] not in ["", "_init", "_fini"] + ) - syms = list(filter(is_exported, collect_syms(input_name))) + syms = list(filter(is_exported, collect_syms(input_name))) - def is_data_symbol(s): - return (s['Type'] == 'OBJECT' + def is_data_symbol(s): + return ( + s["Type"] == "OBJECT" # Allow vtables if --vtables is on - and not (' for ' in s['Demangled Name'] and args.vtables)) + and not (" for " in s["Demangled Name"] and args.vtables) + ) - exported_data = [s['Name'] for s in syms if is_data_symbol(s)] - if exported_data: - # TODO: we can generate wrappers for const data without relocations (or only code relocations) - warn(f"library '{input_name}' contains data symbols which won't be intercepted: " - + ', '.join(exported_data)) + exported_data = [s["Name"] for s in syms if is_data_symbol(s)] + if exported_data: + # TODO: we can generate wrappers for const data without relocations (or only code relocations) + warn(f"library '{input_name}' contains data symbols which won't be intercepted: " + ", ".join(exported_data)) - # Collect functions - # TODO: warn if user-specified functions are missing + # Collect functions + # TODO: warn if user-specified functions are missing - orig_funs = filter(lambda s: s['Type'] == 'FUNC', syms) + orig_funs = filter(lambda s: s["Type"] == "FUNC", syms) - all_funs = set() - warn_versioned = False - for s in orig_funs: - if s['Version'] is not None: - # TODO: support versions - if not warn_versioned: - warn(f"library {input_name} contains versioned symbols which are NYI") - warn_versioned = True - if verbose: - print(f"Skipping versioned symbol {s['Name']}") - continue - all_funs.add(s['Name']) + all_funs = set() + warn_versioned = False + for s in orig_funs: + if s["Version"] is not None: + # TODO: support versions + if not warn_versioned: + warn(f"library {input_name} contains versioned symbols which are NYI") + warn_versioned = True + if verbose: + print(f"Skipping versioned symbol {s['Name']}") + continue + all_funs.add(s["Name"]) - if funs is None: - funs = sorted(list(all_funs)) - if not funs and not quiet: - warn(f"no public functions were found in {input_name}") - else: - missing_funs = [name for name in funs if name not in all_funs] - if missing_funs: - warn("some user-specified functions are not present in library: " + ', '.join(missing_funs)) - funs = [name for name in funs if name in all_funs] - - if verbose: - print("Exported functions:") - for i, fun in enumerate(funs): - print(f" {i}: {fun}") - - # Collect vtables - - if args.vtables: - cls_tables = {} - cls_syms = {} - - for s in syms: - m = re.match(r'^(vtable|typeinfo|typeinfo name) for (.*)', s['Demangled Name']) - if m is not None and is_exported(s): - typ, cls = m.groups() - name = s['Name'] - cls_tables.setdefault(cls, {})[typ] = name - cls_syms[name] = s + if funs is None: + funs = sorted(list(all_funs)) + if not funs and not quiet: + warn(f"no public functions were found in {input_name}") + else: + missing_funs = [name for name in funs if name not in all_funs] + if missing_funs: + warn("some user-specified functions are not present in library: " + ", ".join(missing_funs)) + funs = [name for name in funs if name in all_funs] if verbose: - print("Exported classes:") - for cls, _ in sorted(cls_tables.items()): - print(f" {cls}") + print("Exported functions:") + for i, fun in enumerate(funs): + print(f" {i}: {fun}") - secs = collect_sections(input_name) - if verbose: - print("Sections:") - for sec in secs: - print(f" {sec['Name']}: [{sec['Address']:x}, {sec['Address'] + sec['Size']:x}), " - f"at {sec['Off']:x}") + # Collect vtables - bites = read_unrelocated_data(input_name, cls_syms, secs) - - rels = collect_relocs(input_name) - if verbose: - print("Relocs:") - for rel in rels: - sym_add = rel['Symbol\'s Name + Addend'] - print(f" {rel['Offset']}: {sym_add}") - - cls_data = collect_relocated_data(cls_syms, bites, rels, ptr_size, symbol_reloc_types) - if verbose: - print("Class data:") - for name, data in sorted(cls_data.items()): - demangled_name = cls_syms[name]['Demangled Name'] - print(f" {name} ({demangled_name}):") - for typ, val in data: - print(" " + str(val if typ != 'reloc' else val['Symbol\'s Name + Addend'])) - - # Generate assembly code - - suffix = os.path.basename(load_name) - lib_suffix = re.sub(r'[^a-zA-Z_0-9]+', '_', suffix) - - tramp_file = f'{suffix}.tramp.S' - with open(os.path.join(outdir, tramp_file), 'w') as f: - if not quiet: - print(f"Generating {tramp_file}...") - with open(target_dir + '/table.S.tpl', 'r') as t: - table_text = string.Template(t.read()).substitute( - lib_suffix=lib_suffix, - table_size=ptr_size*(len(funs) + 1)) - f.write(table_text) - - with open(target_dir + '/trampoline.S.tpl', 'r') as t: - tramp_tpl = string.Template(t.read()) - - for i, name in enumerate(funs): - tramp_text = tramp_tpl.substitute( - lib_suffix=lib_suffix, - sym=args.symbol_prefix + name, - offset=i*ptr_size, - number=i) - f.write(tramp_text) - - # Generate C code - - init_file = f'{suffix}.init.c' - with open(os.path.join(outdir, init_file), 'w') as f: - if not quiet: - print(f"Generating {init_file}...") - with open(os.path.join(root, 'arch/common/init.c.tpl'), 'r') as t: - if funs: - sym_names = ',\n '.join(f'"{name}"' for name in funs) + ',' - else: - sym_names = '' - init_text = string.Template(t.read()).substitute( - lib_suffix=lib_suffix, - load_name=load_name, - dlopen_callback=dlopen_callback, - has_dlopen_callback=int(bool(dlopen_callback)), - no_dlopen=not int(dlopen), - lazy_load=int(lazy_load), - sym_names=sym_names) - f.write(init_text) if args.vtables: - vtable_text = generate_vtables(cls_tables, cls_syms, cls_data) - f.write(vtable_text) + cls_tables = {} + cls_syms = {} -if __name__ == '__main__': - main() + for s in syms: + m = re.match(r"^(vtable|typeinfo|typeinfo name) for (.*)", s["Demangled Name"]) + if m is not None and is_exported(s): + typ, cls = m.groups() + name = s["Name"] + cls_tables.setdefault(cls, {})[typ] = name + cls_syms[name] = s + + if verbose: + print("Exported classes:") + for cls, _ in sorted(cls_tables.items()): + print(f" {cls}") + + secs = collect_sections(input_name) + if verbose: + print("Sections:") + for sec in secs: + print(f" {sec['Name']}: [{sec['Address']:x}, {sec['Address'] + sec['Size']:x}), " f"at {sec['Off']:x}") + + bites = read_unrelocated_data(input_name, cls_syms, secs) + + rels = collect_relocs(input_name) + if verbose: + print("Relocs:") + for rel in rels: + sym_add = rel["Symbol's Name + Addend"] + print(f" {rel['Offset']}: {sym_add}") + + cls_data = collect_relocated_data(cls_syms, bites, rels, ptr_size, symbol_reloc_types) + if verbose: + print("Class data:") + for name, data in sorted(cls_data.items()): + demangled_name = cls_syms[name]["Demangled Name"] + print(f" {name} ({demangled_name}):") + for typ, val in data: + print(" " + str(val if typ != "reloc" else val["Symbol's Name + Addend"])) + + # Generate assembly code + + suffix = os.path.basename(load_name) + lib_suffix = re.sub(r"[^a-zA-Z_0-9]+", "_", suffix) + + tramp_file = f"{suffix}.tramp.S" + with open(os.path.join(outdir, tramp_file), "w") as f: + if not quiet: + print(f"Generating {tramp_file}...") + with open(target_dir + "/table.S.tpl", "r") as t: + table_text = string.Template(t.read()).substitute( + lib_suffix=lib_suffix, table_size=ptr_size * (len(funs) + 1) + ) + f.write(table_text) + + with open(target_dir + "/trampoline.S.tpl", "r") as t: + tramp_tpl = string.Template(t.read()) + + for i, name in enumerate(funs): + tramp_text = tramp_tpl.substitute( + lib_suffix=lib_suffix, sym=args.symbol_prefix + name, offset=i * ptr_size, number=i + ) + f.write(tramp_text) + + # Generate C code + + init_file = f"{suffix}.init.cpp" + with open(os.path.join(outdir, init_file), "w") as f: + if not quiet: + print(f"Generating {init_file}...") + with open(os.path.join(root, "arch/common/init.cpp.tpl"), "r") as t: + if funs: + sym_names = ",\n ".join(f'"{name}"' for name in funs) + "," + else: + sym_names = "" + init_text = string.Template(t.read()).substitute( + lib_suffix=lib_suffix, + load_name=load_name, + dlopen_callback=dlopen_callback, + has_dlopen_callback=int(bool(dlopen_callback)), + sym_names=sym_names, + ) + f.write(init_text) + if args.vtables: + vtable_text = generate_vtables(cls_tables, cls_syms, cls_data) + f.write(vtable_text) + + +if __name__ == "__main__": + main() diff --git a/documentation/sphinx/source/command-line-interface.rst b/documentation/sphinx/source/command-line-interface.rst index 14f8eaf1db..b81252994e 100644 --- a/documentation/sphinx/source/command-line-interface.rst +++ b/documentation/sphinx/source/command-line-interface.rst @@ -131,12 +131,35 @@ The default is ``disabled``, which means changing the storage engine will not be ``aggressive`` tries to replace as many storages as it can at once, and will recruit a new storage server on the same process as the old one. This will be faster, but can potentially hit degraded performance or OOM with two storages on the same process. The main benefit over ``gradual`` is that this doesn't need to take one storage out of rotation, so it works for small or development clusters that have the same number of storage processes as the replication factor. Note that ``aggressive`` is not exclusive to running the perpetual wiggle. ``disabled`` means that if the storage engine is changed, fdb will not move the cluster over to the new storage engine. This will disable the perpetual wiggle from rewriting storage files. +consistencyscan +---------------- + +This command controls a native data consistency scan role that is automatically recruited in the FDB cluster. The consistency scan reads all replicas of each shard to verify data consistency. It is useful for finding corrupt cold data by ensuring that all data is read periodically. Any errors found will be logged as TraceEvents with Severity = 40. + +The syntax is + +``consistencyscan [ off | on [maxRate ] [targetInterval ] [restart ] ]`` + +* ``off`` will disable the consistency scan + +* ``on`` will enable the scan and can be accompanied by additional options shown above + + * ``RATE`` - sets the maximum read speed of the scan in bytes/s. + + * ``INTERVAL`` - sets the target completion time, in seconds, for each full pass over all data in the cluster. Scan speed will target this interval with a hard limit of RATE. + + * ``RESTART`` - a 1 or 0 and controls whether the process should restart from the beginning of userspace on startup or not. This should normally be set to 0 which will resume progress from the last time the scan was running. + +The consistency scan role publishes its configuration and metrics in Status JSON under the path ``.cluster.consistency_scan_info``. + consistencycheck ---------------- -The ``consistencycheck`` command enables or disables consistency checking. Its syntax is ``consistencycheck [on|off]``. Calling it with ``on`` enables consistency checking, and ``off`` disables it. Calling it with no arguments displays whether consistency checking is currently enabled. +Note: This command exists for backward compatibility, it is suggested to use the ``consistencyscan`` command to control FDB's internal consistency scan role instead. -You must be running an ``fdbserver`` process with the ``consistencycheck`` role to perform consistency checking. +This command controls a key which controls behavior of any externally configured consistency check roles. You must be running an ``fdbserver`` process with the ``consistencycheck`` role to perform consistency checking. + +The ``consistencycheck`` command enables or disables consistency checking. Its syntax is ``consistencycheck [on|off]``. Calling it with ``on`` enables consistency checking, and ``off`` disables it. Calling it with no arguments displays whether consistency checking is currently enabled. coordinators ------------ diff --git a/fdbbackup/FileConverter.actor.cpp b/fdbbackup/FileConverter.actor.cpp index 14f2adaba3..cbfd6693c9 100644 --- a/fdbbackup/FileConverter.actor.cpp +++ b/fdbbackup/FileConverter.actor.cpp @@ -608,7 +608,7 @@ int main(int argc, char** argv) { setupNetwork(0, UseMetrics::True); TraceEvent::setNetworkThread(); - openTraceFile(NetworkAddress(), 10 << 20, 10 << 20, param.log_dir, "convert", param.trace_log_group); + openTraceFile({}, 10 << 20, 10 << 20, param.log_dir, "convert", param.trace_log_group); auto f = stopAfter(convert(param)); diff --git a/fdbbackup/FileDecoder.actor.cpp b/fdbbackup/FileDecoder.actor.cpp index 248c7d4adf..406d981095 100644 --- a/fdbbackup/FileDecoder.actor.cpp +++ b/fdbbackup/FileDecoder.actor.cpp @@ -641,7 +641,7 @@ int main(int argc, char** argv) { param.updateKnobs(); TraceEvent::setNetworkThread(); - openTraceFile(NetworkAddress(), 10 << 20, 500 << 20, param.log_dir, "decode", param.trace_log_group); + openTraceFile({}, 10 << 20, 500 << 20, param.log_dir, "decode", param.trace_log_group); param.tlsConfig.setupBlobCredentials(); auto f = stopAfter(decode_logs(param)); diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp index 15ed71a09d..f98b8177d4 100644 --- a/fdbbackup/backup.actor.cpp +++ b/fdbbackup/backup.actor.cpp @@ -3973,7 +3973,7 @@ int main(int argc, char* argv[]) { // a cluster so they should use this instead. auto initTraceFile = [&]() { if (trace) - openTraceFile(NetworkAddress(), traceRollSize, traceMaxLogsSize, traceDir, "trace", traceLogGroup); + openTraceFile({}, traceRollSize, traceMaxLogsSize, traceDir, "trace", traceLogGroup); }; auto initCluster = [&](bool quiet = false) { diff --git a/fdbclient/BackupAgentBase.actor.cpp b/fdbclient/BackupAgentBase.actor.cpp index 9639cbc08a..5c7ffda624 100644 --- a/fdbclient/BackupAgentBase.actor.cpp +++ b/fdbclient/BackupAgentBase.actor.cpp @@ -24,6 +24,7 @@ #include "fdbclient/BackupAgent.actor.h" #include "fdbclient/BlobCipher.h" #include "fdbclient/CommitTransaction.h" +#include "fdbclient/FDBTypes.h" #include "fdbclient/GetEncryptCipherKeys.actor.h" #include "fdbclient/DatabaseContext.h" #include "fdbclient/ManagementAPI.actor.h" @@ -32,7 +33,6 @@ #include "fdbclient/TenantManagement.actor.h" #include "fdbrpc/simulator.h" #include "flow/ActorCollection.h" -#include "flow/Trace.h" #include "flow/actorcompiler.h" // has to be last include FDB_DEFINE_BOOLEAN_PARAM(LockDB); @@ -252,6 +252,34 @@ Version getLogKeyVersion(Key key) { return bigEndian64(*(int64_t*)(key.begin() + backupLogPrefixBytes + sizeof(UID) + sizeof(uint8_t))); } +bool validTenantAccess(std::map* tenantMap, + MutationRef m, + bool provisionalProxy, + Version version) { + if (isSystemKey(m.param1)) { + return true; + } + int64_t tenantId = TenantInfo::INVALID_TENANT; + if (m.isEncrypted()) { + tenantId = m.encryptionHeader()->cipherTextDetails.encryptDomainId; + } else { + tenantId = TenantAPI::extractTenantIdFromMutation(m); + } + ASSERT(tenantMap != nullptr); + if (m.isEncrypted() && isReservedEncryptDomain(tenantId)) { + // These are valid encrypt domains so don't check the tenant map + } else if (tenantMap->find(tenantId) == tenantMap->end()) { + // If a tenant is not found for a given mutation then exclude it from the batch + ASSERT(!provisionalProxy); + TraceEvent(SevWarnAlways, "MutationLogRestoreTenantNotFound") + .detail("Version", version) + .detail("TenantId", tenantId); + CODE_PROBE(true, "mutation log restore tenant not found"); + return false; + } + return true; +} + // Given a key from one of the ranges returned by get_log_ranges, // returns(version, part) where version is the database version number of // the transaction log data in the value, and part is 0 for the first such @@ -320,29 +348,49 @@ ACTOR static Future decodeBackupLogValue(Arena* arena, offset += len2; state Optional encryptedLogValue = Optional(); + // Check for valid tenant in required tenant mode. If the tenant does not exist in our tenant map then + // we EXCLUDE the mutation (of that respective tenant) during the restore. NOTE: This simply allows a + // restore to make progress in the event of tenant deletion, but tenant deletion should be considered + // carefully so that we do not run into this case. We do this check here so if encrypted mutations are not + // found in the tenant map then we exit early without needing to reach out to the EKP. + if (config.tenantMode == TenantMode::REQUIRED && + config.encryptionAtRestMode.mode != EncryptionAtRestMode::CLUSTER_AWARE && + !validTenantAccess(tenantMap, logValue, provisionalProxy, version)) { + consumed += BackupAgentBase::logHeaderSize + len1 + len2; + continue; + } + // Decrypt mutation ref if encrypted if (logValue.isEncrypted()) { encryptedLogValue = logValue; + state EncryptCipherDomainId domainId = logValue.encryptionHeader()->cipherTextDetails.encryptDomainId; Reference const> dbInfo = cx->clientInfo; - TextAndHeaderCipherKeys cipherKeys = - wait(getEncryptCipherKeys(dbInfo, *logValue.encryptionHeader(), BlobCipherMetrics::BACKUP)); - logValue = logValue.decrypt(cipherKeys, tempArena, BlobCipherMetrics::BACKUP); + try { + TextAndHeaderCipherKeys cipherKeys = + wait(getEncryptCipherKeys(dbInfo, *logValue.encryptionHeader(), BlobCipherMetrics::RESTORE)); + logValue = logValue.decrypt(cipherKeys, tempArena, BlobCipherMetrics::BACKUP); + } catch (Error& e) { + // It's possible a tenant was deleted and the encrypt key fetch failed + TraceEvent(SevWarnAlways, "MutationLogRestoreEncryptKeyFetchFailed") + .detail("Version", version) + .detail("TenantId", domainId); + if (e.code() == error_code_encrypt_keys_fetch_failed) { + CODE_PROBE(true, "mutation log restore encrypt keys not found"); + consumed += BackupAgentBase::logHeaderSize + len1 + len2; + continue; + } else { + throw; + } + } } ASSERT(!logValue.isEncrypted()); - if (config.tenantMode == TenantMode::REQUIRED && !isSystemKey(logValue.param1)) { - // If a tenant is not found for a given mutation then exclude it from the batch - int64_t tenantId = TenantAPI::extractTenantIdFromMutation(logValue); - ASSERT(tenantMap != nullptr); - if (tenantMap->find(tenantId) == tenantMap->end()) { - ASSERT(!provisionalProxy); - TraceEvent(SevWarnAlways, "MutationLogRestoreTenantNotFound") - .detail("Version", version) - .detail("TenantId", tenantId); - CODE_PROBE(true, "mutation log restore tenant not found"); - consumed += BackupAgentBase::logHeaderSize + len1 + len2; - continue; - } + // If the mutation was encrypted using cluster aware encryption then check after decryption + if (config.tenantMode == TenantMode::REQUIRED && + config.encryptionAtRestMode.mode == EncryptionAtRestMode::CLUSTER_AWARE && + !validTenantAccess(tenantMap, logValue, provisionalProxy, version)) { + consumed += BackupAgentBase::logHeaderSize + len1 + len2; + continue; } MutationRef originalLogValue = logValue; diff --git a/fdbclient/BackupContainerFileSystem.actor.cpp b/fdbclient/BackupContainerFileSystem.actor.cpp index 7cd267b12c..197f31bf21 100644 --- a/fdbclient/BackupContainerFileSystem.actor.cpp +++ b/fdbclient/BackupContainerFileSystem.actor.cpp @@ -973,23 +973,6 @@ public: continue; restorable.snapshot = snapshots[i]; - // TODO: Reenable the sanity check after TooManyFiles error is resolved - if (false && g_network->isSimulated()) { - // Sanity check key ranges - // TODO: If we want to re-enable this codepath, make sure that we are passing a valid DB object (instead - // of the DB object created on the line below) - ASSERT(false); - state Database cx; - state std::map::iterator rit; - for (rit = restorable.keyRanges.begin(); rit != restorable.keyRanges.end(); rit++) { - auto it = std::find_if(restorable.ranges.begin(), - restorable.ranges.end(), - [file = rit->first](const RangeFile f) { return f.fileName == file; }); - ASSERT(it != restorable.ranges.end()); - KeyRange result = wait(bc->getSnapshotFileKeyRange(*it, cx)); - ASSERT(rit->second.begin <= result.begin && rit->second.end >= result.end); - } - } // No logs needed if there is a complete filtered key space snapshot at the target version. if (minKeyRangeVersion == maxKeyRangeVersion && maxKeyRangeVersion == restorable.targetVersion) { diff --git a/fdbclient/BlobCipher.cpp b/fdbclient/BlobCipher.cpp index 1eb338f13c..25ed22564e 100644 --- a/fdbclient/BlobCipher.cpp +++ b/fdbclient/BlobCipher.cpp @@ -85,6 +85,7 @@ BlobCipherMetrics::BlobCipherMetrics() CounterSet(cc, "KVRedwood"), CounterSet(cc, "BlobGranule"), CounterSet(cc, "Backup"), + CounterSet(cc, "Restore"), CounterSet(cc, "Test") }) { specialCounter(cc, "CacheSize", []() { return BlobCipherKeyCache::getInstance()->getSize(); }); traceFuture = cc.traceCounters("BlobCipherMetrics", UID(), FLOW_KNOBS->ENCRYPT_KEY_CACHE_LOGGING_INTERVAL); @@ -102,6 +103,8 @@ std::string toString(BlobCipherMetrics::UsageType type) { return "BlobGranule"; case BlobCipherMetrics::UsageType::BACKUP: return "Backup"; + case BlobCipherMetrics::UsageType::RESTORE: + return "Restore"; case BlobCipherMetrics::UsageType::TEST: return "Test"; default: diff --git a/fdbclient/ClientKnobs.cpp b/fdbclient/ClientKnobs.cpp index 62554b793c..b4ff5005ad 100644 --- a/fdbclient/ClientKnobs.cpp +++ b/fdbclient/ClientKnobs.cpp @@ -301,6 +301,7 @@ void ClientKnobs::initialize(Randomize randomize) { init( CLIENT_ENABLE_USING_CLUSTER_ID_KEY, false ); init( ENABLE_ENCRYPTION_CPU_TIME_LOGGING, false ); + init( SIMULATION_EKP_TENANT_IDS_TO_DROP, "-1" ); // clang-format on } diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp index baa667d5d6..49e23c205b 100644 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -21,6 +21,7 @@ #include "fdbclient/DatabaseConfiguration.h" #include "fdbclient/TenantEntryCache.actor.h" #include "fdbclient/TenantManagement.actor.h" +#include "fdbrpc/TenantInfo.h" #include "fdbrpc/simulator.h" #include "flow/FastRef.h" #include "fmt/format.h" @@ -610,7 +611,7 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter { int64_t dataLen, Arena* arena) { Reference const> dbInfo = cx->clientInfo; - TextAndHeaderCipherKeys cipherKeys = wait(getEncryptCipherKeys(dbInfo, header, BlobCipherMetrics::BACKUP)); + TextAndHeaderCipherKeys cipherKeys = wait(getEncryptCipherKeys(dbInfo, header, BlobCipherMetrics::RESTORE)); ASSERT(cipherKeys.cipherHeaderKey.isValid() && cipherKeys.cipherTextKey.isValid()); validateEncryptionHeader(cipherKeys.cipherHeaderKey, cipherKeys.cipherTextKey, header); DecryptBlobCipherAes256Ctr decryptor( @@ -1131,6 +1132,7 @@ ACTOR Future>> decodeRangeFileBlock(Reference< wait(tenantCache.get()->init()); } state EncryptionAtRestMode encryptMode = config.encryptionAtRestMode; + state int64_t blockTenantId = TenantInfo::INVALID_TENANT; try { // Read header, currently only decoding BACKUP_AGENT_SNAPSHOT_FILE_VERSION or @@ -1142,7 +1144,7 @@ ACTOR Future>> decodeRangeFileBlock(Reference< } else if (file_version == BACKUP_AGENT_ENCRYPTED_SNAPSHOT_FILE_VERSION) { CODE_PROBE(true, "decoding encrypted block"); // decode options struct - uint32_t optionsLen = reader.consumeNetworkUInt32(); + state uint32_t optionsLen = reader.consumeNetworkUInt32(); const uint8_t* o = reader.consume(optionsLen); StringRef optionsStringRef = StringRef(o, optionsLen); EncryptedRangeFileWriter::Options options = @@ -1150,9 +1152,17 @@ ACTOR Future>> decodeRangeFileBlock(Reference< ASSERT(!options.compressionEnabled); // read encryption header - const uint8_t* headerStart = reader.consume(BlobCipherEncryptHeader::headerSize); + state const uint8_t* headerStart = reader.consume(BlobCipherEncryptHeader::headerSize); StringRef headerS = StringRef(headerStart, BlobCipherEncryptHeader::headerSize); state BlobCipherEncryptHeader header = BlobCipherEncryptHeader::fromStringRef(headerS); + blockTenantId = header.cipherTextDetails.encryptDomainId; + if (config.tenantMode == TenantMode::REQUIRED && !isReservedEncryptDomain(blockTenantId)) { + ASSERT(tenantCache.present()); + Optional> payload = wait(tenantCache.get()->getById(blockTenantId)); + if (!payload.present()) { + throw tenant_not_found(); + } + } const uint8_t* dataPayloadStart = headerStart + BlobCipherEncryptHeader::headerSize; // calculate the total bytes read up to (and including) the header int64_t bytesRead = sizeof(int32_t) + sizeof(uint32_t) + optionsLen + BlobCipherEncryptHeader::headerSize; @@ -1167,6 +1177,13 @@ ACTOR Future>> decodeRangeFileBlock(Reference< } return results; } catch (Error& e) { + if (e.code() == error_code_encrypt_keys_fetch_failed) { + TraceEvent(SevWarnAlways, "SnapshotRestoreEncryptKeyFetchFailed").detail("TenantId", blockTenantId); + CODE_PROBE(true, "Snapshot restore encrypt keys not found"); + } else if (e.code() == error_code_tenant_not_found) { + TraceEvent(SevWarnAlways, "EncryptedSnapshotRestoreTenantNotFound").detail("TenantId", blockTenantId); + CODE_PROBE(true, "Encrypted Snapshot restore tenant not found"); + } TraceEvent(SevWarn, "FileRestoreDecodeRangeFileBlockFailed") .error(e) .detail("Filename", file->getFilename()) @@ -3552,9 +3569,6 @@ struct RestoreRangeTaskFunc : RestoreFileTaskFuncBase { } state int64_t tenantId = TenantAPI::extractTenantIdFromKeyRef(key); Optional> payload = wait(tenantCache->getById(tenantId)); - if (!payload.present()) { - TraceEvent(SevError, "SnapshotRestoreInvalidTenantAccess").detail("Tenant", tenantId); - } ASSERT(payload.present()); return Void(); } @@ -3607,8 +3621,17 @@ struct RestoreRangeTaskFunc : RestoreFileTaskFuncBase { } state Reference inFile = wait(bc.get()->readFile(rangeFile.fileName)); - state Standalone> blockData = - wait(decodeRangeFileBlock(inFile, readOffset, readLen, cx)); + state Standalone> blockData; + try { + Standalone> data = wait(decodeRangeFileBlock(inFile, readOffset, readLen, cx)); + blockData = data; + } catch (Error& e) { + // It's possible a tenant was deleted and the encrypt key fetch failed + if (e.code() == error_code_encrypt_keys_fetch_failed || e.code() == error_code_tenant_not_found) { + return Void(); + } + throw; + } state Optional>> tenantCache; state std::vector> validTenantCheckFutures; state Arena arena; diff --git a/fdbclient/MultiVersionTransaction.actor.cpp b/fdbclient/MultiVersionTransaction.actor.cpp index 1748807cc4..1010e2dbfe 100644 --- a/fdbclient/MultiVersionTransaction.actor.cpp +++ b/fdbclient/MultiVersionTransaction.actor.cpp @@ -46,6 +46,7 @@ #include "flow/Platform.h" #include "flow/ProtocolVersion.h" #include "flow/UnitTest.h" +#include "flow/Trace.h" #ifdef __unixish__ #include @@ -2906,123 +2907,129 @@ void MultiVersionApi::setNetworkOptionInternal(FDBNetworkOptions::Option option, } void MultiVersionApi::setupNetwork() { - if (!externalClient) { - loadEnvironmentVariableNetworkOptions(); - } - - uint64_t transportId = 0; - { // lock scope - MutexHolder holder(lock); - if (networkStartSetup) { - throw network_already_setup(); + try { + if (!externalClient) { + loadEnvironmentVariableNetworkOptions(); } - if (threadCount > 1) { - disableLocalClient(); - } + uint64_t transportId = 0; + { // lock scope + MutexHolder holder(lock); + if (networkStartSetup) { + throw network_already_setup(); + } - if (!apiVersion.hasFailOnExternalClientErrors()) { - ignoreExternalClientFailures = true; - } + if (threadCount > 1) { + disableLocalClient(); + } - for (auto i : externalClientDescriptions) { - std::string path = i.second.libPath; - std::string filename = basename(path); - bool useFutureVersion = i.second.useFutureVersion; + networkStartSetup = true; - // Copy external lib for each thread - if (externalClients.count(filename) == 0) { - externalClients[filename] = {}; - auto libCopies = copyExternalLibraryPerThread(path); - for (int idx = 0; idx < libCopies.size(); ++idx) { - bool unlinkOnLoad = libCopies[idx].second && !retainClientLibCopies; - externalClients[filename].push_back(Reference( - new ClientInfo(new DLApi(libCopies[idx].first, unlinkOnLoad /*unlink on load*/), - path, - useFutureVersion, - idx))); + if (externalClientDescriptions.empty() && localClientDisabled) { + TraceEvent(SevWarn, "CannotSetupNetwork") + .detail("Reason", "Local client is disabled and no external clients configured"); + + throw no_external_client_provided(); + } + + if (externalClientDescriptions.empty() && !disableBypass) { + bypassMultiClientApi = true; // SOMEDAY: we won't be able to set this option once it becomes possible to + // add clients after setupNetwork is called + } + + if (!bypassMultiClientApi) { + transportId = + (uint64_t(uint32_t(platform::getRandomSeed())) << 32) ^ uint32_t(platform::getRandomSeed()); + if (transportId <= 1) + transportId += 2; + localClient->api->setNetworkOption(FDBNetworkOptions::EXTERNAL_CLIENT_TRANSPORT_ID, + std::to_string(transportId)); + } + localClient->api->setupNetwork(); + + if (!apiVersion.hasFailOnExternalClientErrors()) { + ignoreExternalClientFailures = true; + } + + for (auto i : externalClientDescriptions) { + std::string path = i.second.libPath; + std::string filename = basename(path); + bool useFutureVersion = i.second.useFutureVersion; + + // Copy external lib for each thread + if (externalClients.count(filename) == 0) { + externalClients[filename] = {}; + auto libCopies = copyExternalLibraryPerThread(path); + for (int idx = 0; idx < libCopies.size(); ++idx) { + bool unlinkOnLoad = libCopies[idx].second && !retainClientLibCopies; + externalClients[filename].push_back(Reference( + new ClientInfo(new DLApi(libCopies[idx].first, unlinkOnLoad /*unlink on load*/), + path, + useFutureVersion, + idx))); + } } } } - if (externalClients.empty() && localClientDisabled) { - TraceEvent(SevWarn, "CannotSetupNetwork") - .detail("Reason", "Local client is disabled and no external clients configured"); + localClient->loadVersion(); - throw no_external_client_provided(); + if (bypassMultiClientApi) { + networkSetup = true; + } else { + runOnExternalClientsAllThreads( + [this](Reference client) { + TraceEvent("InitializingExternalClient").detail("LibraryPath", client->libPath); + client->api->selectApiVersion(apiVersion.version()); + if (client->useFutureVersion) { + client->api->useFutureProtocolVersion(); + } + client->loadVersion(); + }, + false, + !ignoreExternalClientFailures); + + std::string baseTraceFileId; + if (apiVersion.hasTraceFileIdentifier()) { + // TRACE_FILE_IDENTIFIER option is supported since 6.3 + baseTraceFileId = traceFileIdentifier.empty() ? format("%d", getpid()) : traceFileIdentifier; + } + + MutexHolder holder(lock); + runOnExternalClientsAllThreads( + [this, transportId, baseTraceFileId](Reference client) { + for (auto option : options) { + client->api->setNetworkOption(option.first, option.second.castTo()); + } + client->api->setNetworkOption(FDBNetworkOptions::EXTERNAL_CLIENT_TRANSPORT_ID, + std::to_string(transportId)); + if (!baseTraceFileId.empty()) { + client->api->setNetworkOption(FDBNetworkOptions::TRACE_FILE_IDENTIFIER, + traceShareBaseNameAmongThreads + ? baseTraceFileId + : client->getTraceFileIdentifier(baseTraceFileId)); + } + client->api->setupNetwork(); + }, + false, + !ignoreExternalClientFailures); + + if (localClientDisabled && !hasNonFailedExternalClients()) { + TraceEvent(SevWarn, "CannotSetupNetwork") + .detail("Reason", "Local client is disabled and all external clients failed"); + throw all_external_clients_failed(); + } + + networkSetup = true; // Needs to be guarded by mutex } - networkStartSetup = true; - - if (externalClients.empty() && !disableBypass) { - bypassMultiClientApi = true; // SOMEDAY: we won't be able to set this option once it becomes possible to - // add clients after setupNetwork is called - } - - if (!bypassMultiClientApi) { - transportId = (uint64_t(uint32_t(platform::getRandomSeed())) << 32) ^ uint32_t(platform::getRandomSeed()); - if (transportId <= 1) - transportId += 2; - localClient->api->setNetworkOption(FDBNetworkOptions::EXTERNAL_CLIENT_TRANSPORT_ID, - std::to_string(transportId)); - } - localClient->api->setupNetwork(); + options.clear(); + updateSupportedVersions(); + } catch (Error& e) { + // Make sure all error and warning events are traced + flushTraceFileVoid(); + throw e; } - - localClient->loadVersion(); - - if (bypassMultiClientApi) { - networkSetup = true; - } else { - runOnExternalClientsAllThreads( - [this](Reference client) { - TraceEvent("InitializingExternalClient").detail("LibraryPath", client->libPath); - client->api->selectApiVersion(apiVersion.version()); - if (client->useFutureVersion) { - client->api->useFutureProtocolVersion(); - } - client->loadVersion(); - }, - false, - !ignoreExternalClientFailures); - - std::string baseTraceFileId; - if (apiVersion.hasTraceFileIdentifier()) { - // TRACE_FILE_IDENTIFIER option is supported since 6.3 - baseTraceFileId = traceFileIdentifier.empty() ? format("%d", getpid()) : traceFileIdentifier; - } - - MutexHolder holder(lock); - runOnExternalClientsAllThreads( - [this, transportId, baseTraceFileId](Reference client) { - for (auto option : options) { - client->api->setNetworkOption(option.first, option.second.castTo()); - } - client->api->setNetworkOption(FDBNetworkOptions::EXTERNAL_CLIENT_TRANSPORT_ID, - std::to_string(transportId)); - if (!baseTraceFileId.empty()) { - client->api->setNetworkOption(FDBNetworkOptions::TRACE_FILE_IDENTIFIER, - traceShareBaseNameAmongThreads - ? baseTraceFileId - : client->getTraceFileIdentifier(baseTraceFileId)); - } - client->api->setupNetwork(); - }, - false, - !ignoreExternalClientFailures); - - if (localClientDisabled && !hasNonFailedExternalClients()) { - TraceEvent(SevWarn, "CannotSetupNetwork") - .detail("Reason", "Local client is disabled and all external clients failed"); - - throw all_external_clients_failed(); - } - - networkSetup = true; // Needs to be guarded by mutex - } - - options.clear(); - updateSupportedVersions(); } THREAD_FUNC_RETURN runNetworkThread(void* param) { diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 3593d0e61e..f341829c8b 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -161,7 +161,7 @@ TLSConfig tlsConfig(TLSEndpointType::CLIENT); // The default values, TRACE_DEFAULT_ROLL_SIZE and TRACE_DEFAULT_MAX_LOGS_SIZE are located in Trace.h. NetworkOptions::NetworkOptions() : traceRollSize(TRACE_DEFAULT_ROLL_SIZE), traceMaxLogsSize(TRACE_DEFAULT_MAX_LOGS_SIZE), traceLogGroup("default"), - traceFormat("xml"), traceClockSource("now"), + traceFormat("xml"), traceClockSource("now"), traceInitializeOnSetup(false), supportedVersions(new ReferencedObject>>()), runLoopProfilingEnabled(false), primaryClient(true) {} @@ -2217,6 +2217,99 @@ void DatabaseContext::expireThrottles() { extern IPAddress determinePublicIPAutomatically(ClusterConnectionString& ccs); +// Initialize tracing for FDB client +// +// connRecord is necessary for determining the local IP, which is then included in the trace +// file name, and also used to annotate all trace events. +// +// If trace_initialize_on_setup is not set, tracing is initialized when opening a database. +// In that case we can immediatelly determine the IP. Thus, we can use the IP in the +// trace file name and annotate all events with it. +// +// If trace_initialize_on_setup network option is set, tracing is at first initialized without +// connRecord and thus without the local IP. In that case we cannot use the local IP in the +// trace file names. The IP is then provided by a repeated call to initializeClientTracing +// when opening a database. All tracing events from this point are annotated with the local IP +// +// If tracing initialization is completed, further calls to initializeClientTracing are ignored +void initializeClientTracing(Reference connRecord, Optional apiVersion) { + if (!networkOptions.traceDirectory.present()) { + return; + } + + bool initialized = traceFileIsOpen(); + if (initialized && (isTraceLocalAddressSet() || !connRecord)) { + // Tracing initialization is completed + return; + } + + // Network must be created before initializing tracing + ASSERT(g_network); + + Optional localAddress; + if (connRecord) { + auto publicIP = determinePublicIPAutomatically(connRecord->getConnectionString()); + localAddress = NetworkAddress(publicIP, ::getpid()); + } + platform::ImageInfo imageInfo = platform::getImageInfo(); + + if (initialized) { + // Tracing already initialized, just need to update the IP address + setTraceLocalAddress(localAddress.get()); + TraceEvent("ClientStart") + .detail("SourceVersion", getSourceVersion()) + .detail("Version", FDB_VT_VERSION) + .detail("PackageName", FDB_VT_PACKAGE_NAME) + .detailf("ActualTime", "%lld", DEBUG_DETERMINISM ? 0 : time(nullptr)) + .detail("ApiVersion", apiVersion) + .detail("ClientLibrary", imageInfo.fileName) + .detailf("ImageOffset", "%p", imageInfo.offset) + .detail("Primary", networkOptions.primaryClient) + .trackLatest("ClientStart"); + } else { + // Initialize tracing + selectTraceFormatter(networkOptions.traceFormat); + selectTraceClockSource(networkOptions.traceClockSource); + addUniversalTraceField("ClientDescription", + format("%s-%s-%" PRIu64, + networkOptions.primaryClient ? "primary" : "external", + FDB_VT_VERSION, + deterministicRandom()->randomUInt64())); + + std::string identifier = networkOptions.traceFileIdentifier; + openTraceFile(localAddress, + networkOptions.traceRollSize, + networkOptions.traceMaxLogsSize, + networkOptions.traceDirectory.get(), + "trace", + networkOptions.traceLogGroup, + identifier, + networkOptions.tracePartialFileSuffix); + + TraceEvent("ClientStart") + .detail("SourceVersion", getSourceVersion()) + .detail("Version", FDB_VT_VERSION) + .detail("PackageName", FDB_VT_PACKAGE_NAME) + .detailf("ActualTime", "%lld", DEBUG_DETERMINISM ? 0 : time(nullptr)) + .detail("ApiVersion", apiVersion) + .detail("ClientLibrary", imageInfo.fileName) + .detailf("ImageOffset", "%p", imageInfo.offset) + .detail("Primary", networkOptions.primaryClient) + .trackLatest("ClientStart"); + + g_network->initMetrics(); + FlowTransport::transport().initMetrics(); + initTraceEventMetrics(); + } + + // Initialize system monitoring once the local IP is available + if (localAddress.present()) { + initializeSystemMonitorMachineState(SystemMonitorMachineState(IPAddress(localAddress.get().ip))); + systemMonitor(); + uncancellable(recurring(&systemMonitor, CLIENT_KNOBS->SYSTEM_MONITOR_INTERVAL, TaskPriority::FlushTrace)); + } +} + // Creates a database object that represents a connection to a cluster // This constructor uses a preallocated DatabaseContext that may have been created // on another thread @@ -2230,49 +2323,7 @@ Database Database::createDatabase(Reference connRecord ASSERT(TraceEvent::isNetworkThread()); - platform::ImageInfo imageInfo = platform::getImageInfo(); - - if (connRecord) { - if (networkOptions.traceDirectory.present() && !traceFileIsOpen()) { - g_network->initMetrics(); - FlowTransport::transport().initMetrics(); - initTraceEventMetrics(); - - auto publicIP = determinePublicIPAutomatically(connRecord->getConnectionString()); - selectTraceFormatter(networkOptions.traceFormat); - selectTraceClockSource(networkOptions.traceClockSource); - addUniversalTraceField("ClientDescription", - format("%s-%s-%" PRIu64, - networkOptions.primaryClient ? "primary" : "external", - FDB_VT_VERSION, - getTraceThreadId())); - - openTraceFile(NetworkAddress(publicIP, ::getpid()), - networkOptions.traceRollSize, - networkOptions.traceMaxLogsSize, - networkOptions.traceDirectory.get(), - "trace", - networkOptions.traceLogGroup, - networkOptions.traceFileIdentifier, - networkOptions.tracePartialFileSuffix); - - TraceEvent("ClientStart") - .detail("SourceVersion", getSourceVersion()) - .detail("Version", FDB_VT_VERSION) - .detail("PackageName", FDB_VT_PACKAGE_NAME) - .detailf("ActualTime", "%lld", DEBUG_DETERMINISM ? 0 : time(nullptr)) - .detail("ApiVersion", apiVersion) - .detail("ClientLibrary", imageInfo.fileName) - .detailf("ImageOffset", "%p", imageInfo.offset) - .detail("Primary", networkOptions.primaryClient) - .trackLatest("ClientStart"); - - initializeSystemMonitorMachineState(SystemMonitorMachineState(IPAddress(publicIP))); - - systemMonitor(); - uncancellable(recurring(&systemMonitor, CLIENT_KNOBS->SYSTEM_MONITOR_INTERVAL, TaskPriority::FlushTrace)); - } - } + initializeClientTracing(connRecord, apiVersion); g_network->initTLS(); @@ -2324,7 +2375,7 @@ Database Database::createDatabase(Reference connRecord .detail("Version", FDB_VT_VERSION) .detail("ClusterFile", connRecord ? connRecord->toString() : "None") .detail("ConnectionString", connRecord ? connRecord->getConnectionString().toString() : "None") - .detail("ClientLibrary", imageInfo.fileName) + .detail("ClientLibrary", platform::getImageInfo().fileName) .detail("Primary", networkOptions.primaryClient) .detail("Internal", internal) .trackLatest(database->connectToDatabaseEventCacheHolder.trackingKey); @@ -2408,6 +2459,9 @@ void setNetworkOption(FDBNetworkOptions::Option option, Optional valu validateOptionValuePresent(value); networkOptions.tracePartialFileSuffix = value.get().toString(); break; + case FDBNetworkOptions::TRACE_INITIALIZE_ON_SETUP: + networkOptions.traceInitializeOnSetup = true; + break; case FDBNetworkOptions::KNOB: { validateOptionValuePresent(value); @@ -2608,6 +2662,10 @@ void setupNetwork(uint64_t transportId, UseMetrics useMetrics) { FlowTransport::createInstance(true, transportId, WLTOKEN_RESERVED_COUNT); Net2FileSystem::newFileSystem(); + if (networkOptions.traceInitializeOnSetup) { + ::initializeClientTracing({}, {}); + } + uncancellable(monitorNetworkBusyness()); } @@ -8603,24 +8661,36 @@ ACTOR Future>>> splitStorageMetricsWithLoc try { state int i = 0; for (; i < locations.size(); i++) { - SplitMetricsRequest req( - locations[i].range, limit, used, estimated, i == locations.size() - 1, minSplitBytes); - SplitMetricsReply res = wait(loadBalance(locations[i].locations->locations(), - &StorageServerInterface::splitMetrics, - req, - TaskPriority::DataDistribution)); - if (res.splits.size() && res.splits[0] <= results.back()) { // split points are out of order, possibly - // because of moving data, throw error to retry - ASSERT_WE_THINK(false); // FIXME: This seems impossible and doesn't seem to be covered by testing - throw all_alternatives_failed(); - } - if (res.splits.size()) { - results.append(results.arena(), res.splits.begin(), res.splits.size()); - results.arena().dependsOn(res.splits.arena()); - } - used = res.used; + state Key beginKey = locations[i].range.begin; + loop { + KeyRangeRef range(beginKey, locations[i].range.end); + SplitMetricsRequest req(range, limit, used, estimated, i == locations.size() - 1, minSplitBytes); + SplitMetricsReply res = wait(loadBalance(locations[i].locations->locations(), + &StorageServerInterface::splitMetrics, + req, + TaskPriority::DataDistribution)); + if (res.splits.size() && + res.splits[0] <= results.back()) { // split points are out of order, possibly + // because of moving data, throw error to retry + ASSERT_WE_THINK(false); // FIXME: This seems impossible and doesn't seem to be covered by testing + throw all_alternatives_failed(); + } - //TraceEvent("SplitStorageMetricsResult").detail("Used", used.bytes).detail("Location", i).detail("Size", res.splits.size()); + if (res.splits.size()) { + results.append(results.arena(), res.splits.begin(), res.splits.size()); + results.arena().dependsOn(res.splits.arena()); + } + + used = res.used; + + if (res.more && res.splits.size()) { + // Next request will return split points after this one + beginKey = KeyRef(beginKey.arena(), res.splits.back()); + } else { + break; + } + //TraceEvent("SplitStorageMetricsResult").detail("Used", used.bytes).detail("Location", i).detail("Size", res.splits.size()); + } } if (used.allLessOrEqual(limit * CLIENT_KNOBS->STORAGE_METRICS_UNFAIR_SPLIT_LIMIT) && results.size() > 1) { diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index e34f36f4d0..b9d8e16da9 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -850,6 +850,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi // This exists for flexibility but assigning each ReadType to its own unique priority number makes the most sense // The enumeration is currently: eager, fetch, low, normal, high init( STORAGESERVER_READTYPE_PRIORITY_MAP, "0,1,2,3,4" ); + init( SPLIT_METRICS_MAX_ROWS, 10000 ); //Wait Failure init( MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS, 250 ); if( randomize && BUGGIFY ) MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS = 2; diff --git a/fdbclient/include/fdbclient/BlobCipher.h b/fdbclient/include/fdbclient/BlobCipher.h index f0855d8f8d..1088e9c7e6 100644 --- a/fdbclient/include/fdbclient/BlobCipher.h +++ b/fdbclient/include/fdbclient/BlobCipher.h @@ -70,6 +70,7 @@ public: KV_REDWOOD, BLOB_GRANULE, BACKUP, + RESTORE, TEST, MAX, }; diff --git a/fdbclient/include/fdbclient/ClientKnobs.h b/fdbclient/include/fdbclient/ClientKnobs.h index a4d60e6d85..205436e695 100644 --- a/fdbclient/include/fdbclient/ClientKnobs.h +++ b/fdbclient/include/fdbclient/ClientKnobs.h @@ -297,6 +297,10 @@ public: // Encryption-at-rest bool ENABLE_ENCRYPTION_CPU_TIME_LOGGING; + // This Knob will be a comma-delimited string (i.e 0,1,2,3) that specifies which tenants the the EKP should throw + // key_not_found errors for. If TenantInfo::INVALID_TENANT is contained within the list then no tenants will be + // dropped. This Knob should ONLY be used in simulation for testing purposes + std::string SIMULATION_EKP_TENANT_IDS_TO_DROP; ClientKnobs(Randomize randomize); void initialize(Randomize randomize); diff --git a/fdbclient/include/fdbclient/GetEncryptCipherKeys.actor.h b/fdbclient/include/fdbclient/GetEncryptCipherKeys.actor.h index c1eeadd34d..84f4e2432d 100644 --- a/fdbclient/include/fdbclient/GetEncryptCipherKeys.actor.h +++ b/fdbclient/include/fdbclient/GetEncryptCipherKeys.actor.h @@ -19,6 +19,7 @@ */ #pragma once #include "flow/EncryptUtils.h" +#include "flow/genericactors.actor.h" #if defined(NO_INTELLISENSE) && !defined(FDBCLIENT_GETCIPHERKEYS_ACTOR_G_H) #define FDBCLIENT_GETCIPHERKEYS_ACTOR_G_H #include "fdbclient/GetEncryptCipherKeys.actor.g.h" @@ -27,7 +28,9 @@ #include "fdbclient/BlobCipher.h" #include "fdbclient/EncryptKeyProxyInterface.h" +#include "fdbclient/Knobs.h" #include "fdbrpc/Stats.h" +#include "fdbrpc/TenantInfo.h" #include "flow/Knobs.h" #include "flow/IRandom.h" @@ -182,6 +185,18 @@ Future getUncachedEncryptCipherKeys(ReferenceisSimulated() && usageType == BlobCipherMetrics::RESTORE) { + std::unordered_set tenantIdsToDrop = + parseStringToUnorderedSet(CLIENT_KNOBS->SIMULATION_EKP_TENANT_IDS_TO_DROP, ','); + if (!tenantIdsToDrop.count(TenantInfo::INVALID_TENANT)) { + for (auto& baseCipherInfo : request.baseCipherInfos) { + if (tenantIdsToDrop.count(baseCipherInfo.domainId)) { + TraceEvent("GetEncryptCipherKeys_SimulatedError").detail("DomainId", baseCipherInfo.domainId); + throw encrypt_keys_fetch_failed(); + } + } + } + } return reply; } catch (Error& e) { TraceEvent("GetEncryptCipherKeys_CaughtError").error(e); diff --git a/fdbclient/include/fdbclient/NativeAPI.actor.h b/fdbclient/include/fdbclient/NativeAPI.actor.h index 15d910b5dd..d9681215ee 100644 --- a/fdbclient/include/fdbclient/NativeAPI.actor.h +++ b/fdbclient/include/fdbclient/NativeAPI.actor.h @@ -71,6 +71,7 @@ struct NetworkOptions { std::string traceClockSource; std::string traceFileIdentifier; std::string tracePartialFileSuffix; + bool traceInitializeOnSetup; Optional logClientInfo; Reference>>> supportedVersions; bool runLoopProfilingEnabled; diff --git a/fdbclient/include/fdbclient/ServerKnobs.h b/fdbclient/include/fdbclient/ServerKnobs.h index 6c2a2ebaad..7ba69c1d12 100644 --- a/fdbclient/include/fdbclient/ServerKnobs.h +++ b/fdbclient/include/fdbclient/ServerKnobs.h @@ -791,6 +791,7 @@ public: std::string STORAGESERVER_READ_PRIORITIES; int STORAGE_SERVER_READ_CONCURRENCY; std::string STORAGESERVER_READTYPE_PRIORITY_MAP; + int SPLIT_METRICS_MAX_ROWS; // Wait Failure int MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS; diff --git a/fdbclient/include/fdbclient/StorageServerInterface.h b/fdbclient/include/fdbclient/StorageServerInterface.h index f75cc59394..1d2acf8e1c 100644 --- a/fdbclient/include/fdbclient/StorageServerInterface.h +++ b/fdbclient/include/fdbclient/StorageServerInterface.h @@ -740,10 +740,11 @@ struct SplitMetricsReply { constexpr static FileIdentifier file_identifier = 11530792; Standalone> splits; StorageMetrics used; + bool more = false; template void serialize(Ar& ar) { - serializer(ar, splits, used); + serializer(ar, splits, used, more); } }; diff --git a/fdbclient/include/fdbclient/TenantEntryCache.actor.h b/fdbclient/include/fdbclient/TenantEntryCache.actor.h index 199bdb04fa..4c74a5fbcc 100644 --- a/fdbclient/include/fdbclient/TenantEntryCache.actor.h +++ b/fdbclient/include/fdbclient/TenantEntryCache.actor.h @@ -220,7 +220,7 @@ private: if (!cache->lastTenantId.present()) { return false; } - return cache->lastTenantId.get() > 0; + return cache->lastTenantId.get() >= 0; } return true; } diff --git a/fdbclient/vexillographer/fdb.options b/fdbclient/vexillographer/fdb.options index a564c576e1..824ad8e015 100644 --- a/fdbclient/vexillographer/fdb.options +++ b/fdbclient/vexillographer/fdb.options @@ -59,6 +59,8 @@ description is not currently required but encouraged. description="Once provided, this string will be used to replace the port/PID in the log file names." />