Merge remote-tracking branch 'origin/main' into authz-tenant-name-to-tenant-id

This commit is contained in:
Junhyun Shim 2023-02-06 23:13:43 +01:00
commit be225acd2a
101 changed files with 2692 additions and 1457 deletions

View File

@ -217,8 +217,8 @@ if(NOT WIN32)
target_link_libraries(fdb_c_unit_tests_version_510 PRIVATE fdb_c Threads::Threads doctest)
target_link_libraries(trace_partial_file_suffix_test PRIVATE fdb_c Threads::Threads flow doctest)
target_link_libraries(disconnected_timeout_unit_tests PRIVATE fdb_c Threads::Threads doctest)
target_link_libraries(fdb_c_client_config_tester PRIVATE SimpleOpt fdb_cpp fdb_c Threads::Threads fmt::fmt)
target_include_directories(fdb_c_client_config_tester PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}/foundationdb/ ${CMAKE_SOURCE_DIR}/flow/include)
target_link_libraries(fdb_c_client_config_tester PRIVATE SimpleOpt fdb_cpp fdb_c fdbclient Threads::Threads fmt::fmt)
target_include_directories(fdb_c_client_config_tester PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}/foundationdb/)
# do not set RPATH for mako
set_property(TARGET mako PROPERTY SKIP_BUILD_RPATH TRUE)
@ -423,18 +423,18 @@ if(OPEN_FOR_IDE)
target_link_libraries(fdb_c_shim_lib_tester PRIVATE fdb_c_shim SimpleOpt fdb_cpp Threads::Threads)
target_include_directories(fdb_c_shim_lib_tester PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}/foundationdb/ ${CMAKE_SOURCE_DIR}/flow/include)
elseif(NOT WIN32 AND NOT APPLE AND NOT USE_SANITIZER) # Linux Only, non-santizer only
elseif(NOT WIN32 AND NOT APPLE) # Linux Only
set(SHIM_LIB_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR})
set(SHIM_LIB_GEN_SRC
${SHIM_LIB_OUTPUT_DIR}/libfdb_c.so.init.c
${SHIM_LIB_OUTPUT_DIR}/libfdb_c.so.init.cpp
${SHIM_LIB_OUTPUT_DIR}/libfdb_c.so.tramp.S)
set(IMPLIBSO_SRC_DIR ${CMAKE_SOURCE_DIR}/contrib/Implib.so)
set(IMPLIBSO_SRC
${IMPLIBSO_SRC_DIR}/implib-gen.py
${IMPLIBSO_SRC_DIR}/arch/common/init.c.tpl
${IMPLIBSO_SRC_DIR}/arch/common/init.cpp.tpl
${IMPLIBSO_SRC_DIR}/arch/${CMAKE_SYSTEM_PROCESSOR}/config.ini
${IMPLIBSO_SRC_DIR}/arch/${CMAKE_SYSTEM_PROCESSOR}/table.S.tpl
${IMPLIBSO_SRC_DIR}/arch/${CMAKE_SYSTEM_PROCESSOR}/trampoline.S.tpl
@ -467,6 +467,11 @@ elseif(NOT WIN32 AND NOT APPLE AND NOT USE_SANITIZER) # Linux Only, non-santizer
target_link_libraries(fdb_c_shim_lib_tester PRIVATE fdb_c_shim SimpleOpt fdb_cpp Threads::Threads)
target_include_directories(fdb_c_shim_lib_tester PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}/foundationdb/ ${CMAKE_SOURCE_DIR}/flow/include)
set(SHIM_LIB_TEST_EXTRA_OPTIONS "")
if(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR USE_SANITIZER)
list(APPEND SHIM_LIB_TEST_EXTRA_OPTIONS --disable-prev-version-tests)
endif()
add_python_venv_test(NAME fdb_c_shim_library_tests
COMMAND python ${CMAKE_CURRENT_SOURCE_DIR}/test/fdb_c_shim_tests.py
--build-dir ${CMAKE_BINARY_DIR}
@ -474,6 +479,7 @@ elseif(NOT WIN32 AND NOT APPLE AND NOT USE_SANITIZER) # Linux Only, non-santizer
--api-tester-bin $<TARGET_FILE:fdb_c_shim_api_tester>
--shim-lib-tester-bin $<TARGET_FILE:fdb_c_shim_lib_tester>
--api-test-dir ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests
${SHIM_LIB_TEST_EXTRA_OPTIONS}
)
endif() # End Linux only, non-sanitizer only

View File

@ -70,12 +70,15 @@ void ApiWorkload::start() {
schedule([this]() {
// 1. Clear data
clearData([this]() {
// 2. Workload setup
setup([this]() {
// 3. Populate initial data
populateData([this]() {
// 4. Generate random workload
runTests();
// 2. Create tenants if necessary.
createTenantsIfNecessary([this] {
// 3. Workload setup.
setup([this]() {
// 4. Populate initial data
populateData([this]() {
// 5. Generate random workload
runTests();
});
});
});
});
@ -259,9 +262,17 @@ void ApiWorkload::createTenants(TTaskFct cont) {
[this, cont]() { schedule(cont); });
}
void ApiWorkload::createTenantsIfNecessary(TTaskFct cont) {
if (tenants.size() > 0) {
createTenants(cont);
} else {
schedule(cont);
}
}
void ApiWorkload::populateData(TTaskFct cont) {
if (tenants.size() > 0) {
createTenants([this, cont]() { populateTenantData(cont, std::make_optional(0)); });
populateTenantData(cont, std::make_optional(0));
} else {
populateTenantData(cont, {});
}

View File

@ -141,6 +141,7 @@ private:
void populateDataTx(TTaskFct cont, std::optional<int> tenantId);
void populateTenantData(TTaskFct cont, std::optional<int> tenantId);
void createTenants(TTaskFct cont);
void createTenantsIfNecessary(TTaskFct cont);
void clearTenantData(TTaskFct cont, std::optional<int> tenantId);

View File

@ -40,7 +40,6 @@ public:
}
private:
// FIXME: add tenant support for DB operations
// FIXME: use other new blob granule apis!
enum OpType {
OP_INSERT,
@ -58,15 +57,8 @@ private:
void setup(TTaskFct cont) override { setupBlobGranules(cont); }
// FIXME: get rid of readSuccess* in this test now that setup is verify()-ing
// Allow reads at the start to get blob_granule_transaction_too_old if BG data isn't initialized yet
std::unordered_set<std::optional<int>> tenantsWithReadSuccess;
std::set<fdb::ByteString> validatedFiles;
inline void setReadSuccess(std::optional<int> tenantId) { tenantsWithReadSuccess.insert(tenantId); }
inline bool seenReadSuccess(std::optional<int> tenantId) { return tenantsWithReadSuccess.count(tenantId); }
void debugOp(std::string opName, fdb::KeyRange keyRange, std::optional<int> tenantId, std::string message) {
if (BG_API_DEBUG_VERBOSE) {
info(fmt::format("{0}: [{1} - {2}) {3}: {4}",
@ -99,30 +91,15 @@ private:
granuleContext);
auto out = fdb::Result::KeyValueRefArray{};
fdb::Error err = res.getKeyValueArrayNothrow(out);
if (err.code() == error_code_blob_granule_transaction_too_old) {
bool previousSuccess = seenReadSuccess(tenantId);
if (previousSuccess) {
error("Read bg too old after read success!\n");
} else {
info("Read bg too old\n");
}
ASSERT(!previousSuccess);
*tooOld = true;
ctx->done();
} else if (err.code() != error_code_success) {
ASSERT(err.code() != error_code_blob_granule_transaction_too_old);
if (err.code() != error_code_success) {
ctx->onError(err);
} else {
auto resCopy = copyKeyValueArray(out);
auto& [resVector, out_more] = resCopy;
ASSERT(!out_more);
results.get()->assign(resVector.begin(), resVector.end());
bool previousSuccess = seenReadSuccess(tenantId);
if (!previousSuccess) {
info(fmt::format("Read {0}: first success\n", debugTenantStr(tenantId)));
setReadSuccess(tenantId);
} else {
debugOp("Read", keyRange, tenantId, "complete");
}
debugOp("Read", keyRange, tenantId, "complete");
ctx->done();
}
},
@ -183,19 +160,13 @@ private:
},
[this, keyRange, tenantId, results, cont]() {
debugOp("GetGranules", keyRange, tenantId, fmt::format("complete with {0} granules", results->size()));
this->validateRanges(results, keyRange, seenReadSuccess(tenantId));
this->validateRanges(results, keyRange);
schedule(cont);
},
getTenant(tenantId));
}
void randomSummarizeOp(TTaskFct cont, std::optional<int> tenantId) {
if (!seenReadSuccess(tenantId)) {
// tester can't handle this throwing bg_txn_too_old, so just don't call it unless we have already seen a
// read success
schedule(cont);
return;
}
fdb::KeyRange keyRange = randomNonEmptyKeyRange();
auto results = std::make_shared<std::vector<fdb::GranuleSummary>>();
@ -231,33 +202,29 @@ private:
ranges->push_back((*results)[i].keyRange);
}
this->validateRanges(ranges, keyRange, true);
this->validateRanges(ranges, keyRange);
schedule(cont);
},
getTenant(tenantId));
}
void validateRanges(std::shared_ptr<std::vector<fdb::KeyRange>> results,
fdb::KeyRange keyRange,
bool shouldBeRanges) {
if (shouldBeRanges) {
if (results->size() == 0) {
error(fmt::format("ValidateRanges: [{0} - {1}): No ranges returned!",
fdb::toCharsRef(keyRange.beginKey),
fdb::toCharsRef(keyRange.endKey)));
}
ASSERT(results->size() > 0);
if (results->front().beginKey > keyRange.beginKey || results->back().endKey < keyRange.endKey) {
error(fmt::format("ValidateRanges: [{0} - {1}): Incomplete range(s) returned [{2} - {3})!",
fdb::toCharsRef(keyRange.beginKey),
fdb::toCharsRef(keyRange.endKey),
fdb::toCharsRef(results->front().beginKey),
fdb::toCharsRef(results->back().endKey)));
}
ASSERT(results->front().beginKey <= keyRange.beginKey);
ASSERT(results->back().endKey >= keyRange.endKey);
void validateRanges(std::shared_ptr<std::vector<fdb::KeyRange>> results, fdb::KeyRange keyRange) {
if (results->size() == 0) {
error(fmt::format("ValidateRanges: [{0} - {1}): No ranges returned!",
fdb::toCharsRef(keyRange.beginKey),
fdb::toCharsRef(keyRange.endKey)));
}
ASSERT(results->size() > 0);
if (results->front().beginKey > keyRange.beginKey || results->back().endKey < keyRange.endKey) {
error(fmt::format("ValidateRanges: [{0} - {1}): Incomplete range(s) returned [{2} - {3})!",
fdb::toCharsRef(keyRange.beginKey),
fdb::toCharsRef(keyRange.endKey),
fdb::toCharsRef(results->front().beginKey),
fdb::toCharsRef(results->back().endKey)));
}
ASSERT(results->front().beginKey <= keyRange.beginKey);
ASSERT(results->back().endKey >= keyRange.endKey);
for (int i = 0; i < results->size(); i++) {
// no empty or inverted ranges
if ((*results)[i].beginKey >= (*results)[i].endKey) {
@ -293,7 +260,6 @@ private:
execOperation(
[keyRange, results](auto ctx) {
// FIXME: add tenant!
fdb::Future f =
ctx->dbOps()->listBlobbifiedRanges(keyRange.beginKey, keyRange.endKey, 1000).eraseType();
ctx->continueAfter(f, [ctx, f, results]() {
@ -303,7 +269,7 @@ private:
},
[this, keyRange, tenantId, results, cont]() {
debugOp("GetBlobRanges", keyRange, tenantId, fmt::format("complete with {0} ranges", results->size()));
this->validateRanges(results, keyRange, seenReadSuccess(tenantId));
this->validateRanges(results, keyRange);
schedule(cont);
},
getTenant(tenantId),
@ -319,7 +285,6 @@ private:
auto verifyVersion = std::make_shared<int64_t>(-1);
execOperation(
[keyRange, verifyVersion](auto ctx) {
// FIXME: add tenant!!
fdb::Future f = ctx->dbOps()
->verifyBlobRange(keyRange.beginKey, keyRange.endKey, -2 /* latest version*/)
.eraseType();
@ -330,13 +295,6 @@ private:
},
[this, keyRange, tenantId, verifyVersion, cont]() {
debugOp("Verify", keyRange, tenantId, fmt::format("Complete @ {0}", *verifyVersion));
bool previousSuccess = seenReadSuccess(tenantId);
if (*verifyVersion == -1) {
ASSERT(!previousSuccess);
} else if (!previousSuccess) {
info(fmt::format("Verify {0}: first success\n", debugTenantStr(tenantId)));
setReadSuccess(tenantId);
}
schedule(cont);
},
getTenant(tenantId),
@ -475,11 +433,6 @@ private:
std::optional<int> tenantId,
int64_t readVersion) {
ASSERT(!results.empty());
ASSERT(results.front().keyRange.beginKey <= keyRange.beginKey);
ASSERT(keyRange.endKey <= results.back().keyRange.endKey);
for (int i = 0; i < results.size() - 1; i++) {
ASSERT(results[i].keyRange.endKey == results[i + 1].keyRange.beginKey);
}
if (tenantId) {
// FIXME: support tenants!!
@ -487,6 +440,12 @@ private:
return;
}
ASSERT(results.front().keyRange.beginKey <= keyRange.beginKey);
ASSERT(keyRange.endKey <= results.back().keyRange.endKey);
for (int i = 0; i < results.size() - 1; i++) {
ASSERT(results[i].keyRange.endKey == results[i + 1].keyRange.beginKey);
}
TesterGranuleContext testerContext(ctx->getBGBasePath());
fdb::native::FDBReadBlobGranuleContext bgCtx = createGranuleContext(&testerContext);
for (int i = 0; i < results.size(); i++) {
@ -495,9 +454,6 @@ private:
}
void randomReadDescription(TTaskFct cont, std::optional<int> tenantId) {
if (!seenReadSuccess(tenantId)) {
return;
}
fdb::KeyRange keyRange = randomNonEmptyKeyRange();
auto results = std::make_shared<std::vector<fdb::GranuleDescription>>();
auto readVersionOut = std::make_shared<int64_t>();

View File

@ -0,0 +1,24 @@
[[test]]
title = 'Blob Granule API Tenant Correctness Multi Threaded'
multiThreaded = true
buggify = true
minFdbThreads = 2
maxFdbThreads = 8
minClients = 1
maxClients = 8
minTenants = 1
maxTenants = 5
[[server]]
blob_granules_enabled = true
[[test.workload]]
name = 'ApiBlobGranuleCorrectness'
minKeyLength = 1
maxKeyLength = 64
minValueLength = 1
maxValueLength = 1000
maxKeysPerTransaction = 50
# TODO - increase initialSize and/or buggify down BG_SNAPSHOT_FILE_TARGET_BYTES to force multiple granules
initialSize = 100
numRandomOperations = 100

View File

@ -34,6 +34,8 @@
#include "SimpleOpt/SimpleOpt.h"
#include <thread>
#include <string_view>
#include <unordered_map>
#include "fdbclient/FDBOptions.g.h"
#if (defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__))
#include <unistd.h>
@ -43,11 +45,6 @@
#error Unsupported platform
#endif
#undef ERROR
#define ERROR(name, number, description) enum { error_code_##name = number };
#include "flow/error_definitions.h"
#define API_VERSION_CLIENT_TMP_DIR 720
using namespace std::string_view_literals;
@ -59,17 +56,14 @@ enum TesterOptionId {
OPT_CONNFILE,
OPT_EXTERNAL_CLIENT_LIBRARY,
OPT_EXTERNAL_CLIENT_DIRECTORY,
OPT_DISABLE_LOCAL_CLIENT,
OPT_DISABLE_CLIENT_BYPASS,
OPT_API_VERSION,
OPT_TRANSACTION_TIMEOUT,
OPT_TRACE,
OPT_TRACE_DIR,
OPT_TMP_DIR,
OPT_IGNORE_EXTERNAL_CLIENT_FAILURES,
OPT_FAIL_INCOMPATIBLE_CLIENT,
OPT_EXPECTED_ERROR,
OPT_PRINT_STATUS
OPT_PRINT_STATUS,
OPT_NETWORK_OPTION
};
const int MIN_TESTABLE_API_VERSION = 400;
@ -81,17 +75,14 @@ CSimpleOpt::SOption TesterOptionDefs[] = //
{ OPT_CONNFILE, "--cluster-file", SO_REQ_SEP },
{ OPT_EXTERNAL_CLIENT_LIBRARY, "--external-client-library", SO_REQ_SEP },
{ OPT_EXTERNAL_CLIENT_DIRECTORY, "--external-client-dir", SO_REQ_SEP },
{ OPT_DISABLE_LOCAL_CLIENT, "--disable-local-client", SO_NONE },
{ OPT_DISABLE_CLIENT_BYPASS, "--disable-client-bypass", SO_NONE },
{ OPT_API_VERSION, "--api-version", SO_REQ_SEP },
{ OPT_TRANSACTION_TIMEOUT, "--transaction-timeout", SO_REQ_SEP },
{ OPT_TRACE, "--log", SO_NONE },
{ OPT_TRACE_DIR, "--log-dir", SO_REQ_SEP },
{ OPT_TMP_DIR, "--tmp-dir", SO_REQ_SEP },
{ OPT_IGNORE_EXTERNAL_CLIENT_FAILURES, "--ignore-external-client-failures", SO_NONE },
{ OPT_FAIL_INCOMPATIBLE_CLIENT, "--fail-incompatible-client", SO_NONE },
{ OPT_EXPECTED_ERROR, "--expected-error", SO_REQ_SEP },
{ OPT_PRINT_STATUS, "--print-status", SO_NONE },
{ OPT_NETWORK_OPTION, "--network-option-", SO_REQ_SEP },
SO_END_OF_OPTIONS };
class TesterOptions {
@ -111,6 +102,7 @@ public:
bool failIncompatibleClient = false;
fdb::Error::CodeType expectedError = 0;
bool printStatus = false;
std::vector<std::pair<std::string, std::string>> networkOptions;
};
namespace {
@ -130,10 +122,6 @@ void printProgramUsage(const char* execName) {
" Path to the external client library.\n"
" --external-client-dir DIR\n"
" Directory containing external client libraries.\n"
" --disable-local-client\n"
" Disable the local client, i.e. use only external client libraries.\n"
" --disable-client-bypass\n"
" Disable bypassing Multi-Version Client when using the local client.\n"
" --api-version VERSION\n"
" Required FDB API version (default %d).\n"
" --transaction-timeout MILLISECONDS\n"
@ -144,14 +132,12 @@ void printProgramUsage(const char* execName) {
" no effect unless --log is specified.\n"
" --tmp-dir DIR\n"
" Directory for temporary files of the client.\n"
" --ignore-external-client-failures\n"
" Ignore failures to initialize external clients.\n"
" --fail-incompatible-client\n"
" Fail if there is no client matching the server version.\n"
" --expected-error ERR\n"
" FDB error code the test expected to fail with (default: 0).\n"
" --print-status\n"
" Print database client status.\n"
" --network-option-OPTIONNAME OPTIONVALUE\n"
" Changes a network option. OPTIONAME should be lowercase.\n"
" -h, --help Display this help and exit.\n",
FDB_API_VERSION);
}
@ -170,6 +156,19 @@ bool processIntOption(const std::string& optionName, const std::string& value, i
return true;
}
// Extracts the key for command line arguments that are specified with a prefix (e.g. --knob-).
// This function converts any hyphens in the extracted key to underscores.
bool extractPrefixedArgument(std::string prefix, const std::string& arg, std::string& res) {
if (arg.size() <= prefix.size() || arg.find(prefix) != 0 ||
(arg[prefix.size()] != '-' && arg[prefix.size()] != '_')) {
return false;
}
res = arg.substr(prefix.size() + 1);
std::transform(res.begin(), res.end(), res.begin(), [](int c) { return c == '-' ? '_' : c; });
return true;
}
bool processArg(const CSimpleOpt& args) {
switch (args.OptionId()) {
case OPT_CONNFILE:
@ -181,12 +180,6 @@ bool processArg(const CSimpleOpt& args) {
case OPT_EXTERNAL_CLIENT_DIRECTORY:
options.externalClientDir = args.OptionArg();
break;
case OPT_DISABLE_LOCAL_CLIENT:
options.disableLocalClient = true;
break;
case OPT_DISABLE_CLIENT_BYPASS:
options.disableClientBypass = true;
break;
case OPT_API_VERSION:
if (!processIntOption(
args.OptionText(), args.OptionArg(), MIN_TESTABLE_API_VERSION, FDB_API_VERSION, options.apiVersion)) {
@ -207,12 +200,6 @@ bool processArg(const CSimpleOpt& args) {
case OPT_TMP_DIR:
options.tmpDir = args.OptionArg();
break;
case OPT_IGNORE_EXTERNAL_CLIENT_FAILURES:
options.ignoreExternalClientFailures = true;
break;
case OPT_FAIL_INCOMPATIBLE_CLIENT:
options.failIncompatibleClient = true;
break;
case OPT_EXPECTED_ERROR:
if (!processIntOption(args.OptionText(), args.OptionArg(), 0, 10000, options.expectedError)) {
return false;
@ -221,6 +208,16 @@ bool processArg(const CSimpleOpt& args) {
case OPT_PRINT_STATUS:
options.printStatus = true;
break;
case OPT_NETWORK_OPTION: {
std::string optionName;
if (!extractPrefixedArgument("--network-option", args.OptionSyntax(), optionName)) {
fmt::print(stderr, "ERROR: unable to parse network option '{}'\n", args.OptionSyntax());
return false;
}
options.networkOptions.emplace_back(optionName, args.OptionArg());
break;
}
}
return true;
}
@ -272,6 +269,12 @@ void fdb_check(fdb::Error e, std::string_view msg) {
}
}
std::string stringToUpper(const std::string& str) {
std::string outStr(str);
std::transform(outStr.begin(), outStr.end(), outStr.begin(), [](char c) { return std::toupper(c); });
return outStr;
}
void applyNetworkOptions() {
if (!options.tmpDir.empty() && options.apiVersion >= API_VERSION_CLIENT_TMP_DIR) {
fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_CLIENT_TMP_DIR, options.tmpDir);
@ -283,20 +286,21 @@ void applyNetworkOptions() {
if (!options.externalClientDir.empty()) {
fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_EXTERNAL_CLIENT_DIRECTORY, options.externalClientDir);
}
if (options.disableLocalClient) {
fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_DISABLE_LOCAL_CLIENT);
}
if (options.trace) {
fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_TRACE_ENABLE, options.traceDir);
}
if (options.ignoreExternalClientFailures) {
fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_IGNORE_EXTERNAL_CLIENT_FAILURES);
std::unordered_map<std::string, FDBNetworkOption> networkOptionsByName;
for (auto const& [optionCode, optionInfo] : FDBNetworkOptions::optionInfo) {
networkOptionsByName[optionInfo.name] = static_cast<FDBNetworkOption>(optionCode);
}
if (options.failIncompatibleClient) {
fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_FAIL_INCOMPATIBLE_CLIENT);
}
if (options.disableClientBypass) {
fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_DISABLE_CLIENT_BYPASS);
for (auto const& [optionName, optionVal] : options.networkOptions) {
auto iter = networkOptionsByName.find(stringToUpper(optionName));
if (iter == networkOptionsByName.end()) {
fmt::print(stderr, "Unknown network option {}\n", optionName);
}
fdb::network::setOption(iter->second, optionVal);
}
}

View File

@ -8,6 +8,7 @@ import os
import glob
import unittest
import json
import re
from threading import Thread
import time
@ -99,6 +100,9 @@ class ClientConfigTest:
self.expected_error = None
self.transaction_timeout = None
self.print_status = False
self.trace_file_identifier = None
self.trace_initialize_on_setup = False
self.trace_format = None
# ----------------------------
# Configuration methods
@ -208,6 +212,9 @@ class ClientConfigTest:
self.tc.assertTrue("Healthy" in self.status_json)
self.tc.assertEqual(expected_is_healthy, self.status_json["Healthy"])
def list_trace_files(self):
return glob.glob(os.path.join(self.log_dir, "*"))
# ----------------------------
# Executing the test
# ----------------------------
@ -222,10 +229,10 @@ class ClientConfigTest:
cmd_args += ["--log", "--log-dir", self.log_dir]
if self.disable_local_client:
cmd_args += ["--disable-local-client"]
cmd_args += ["--network-option-disable_local_client", ""]
if self.disable_client_bypass:
cmd_args += ["--disable-client-bypass"]
cmd_args += ["--network-option-disable_client_bypass", ""]
if self.external_lib_path is not None:
cmd_args += ["--external-client-library", self.external_lib_path]
@ -234,10 +241,19 @@ class ClientConfigTest:
cmd_args += ["--external-client-dir", self.external_lib_dir]
if self.ignore_external_client_failures:
cmd_args += ["--ignore-external-client-failures"]
cmd_args += ["--network-option-ignore_external_client_failures", ""]
if self.fail_incompatible_client:
cmd_args += ["--fail-incompatible-client"]
cmd_args += ["--network-option-fail_incompatible_client", ""]
if self.trace_file_identifier is not None:
cmd_args += ["--network-option-trace_file_identifier", self.trace_file_identifier]
if self.trace_initialize_on_setup:
cmd_args += ["--network-option-trace_initialize_on_setup", ""]
if self.trace_format is not None:
cmd_args += ["--network-option-trace_format", self.trace_format]
if self.api_version is not None:
cmd_args += ["--api-version", str(self.api_version)]
@ -252,26 +268,20 @@ class ClientConfigTest:
cmd_args += ["--print-status"]
print("\nExecuting test command: {}".format(" ".join([str(c) for c in cmd_args])), file=sys.stderr)
try:
tester_proc = subprocess.Popen(cmd_args, stdout=subprocess.PIPE, stderr=sys.stderr)
out, _ = tester_proc.communicate()
self.tc.assertEqual(0, tester_proc.returncode)
if self.print_status:
# Parse the output as status json
try:
self.status_json = json.loads(out)
except json.JSONDecodeError as e:
print("Error '{}' parsing output {}".format(e, out.decode()), file=sys.stderr)
self.tc.assertIsNotNone(self.status_json)
print("Status: ", self.status_json, file=sys.stderr)
else:
# Otherwise redirect the output to the console
print(out.decode(), file=sys.stderr)
finally:
self.cleanup()
def cleanup(self):
shutil.rmtree(self.test_dir)
tester_proc = subprocess.Popen(cmd_args, stdout=subprocess.PIPE, stderr=sys.stderr)
out, _ = tester_proc.communicate()
self.tc.assertEqual(0, tester_proc.returncode)
if self.print_status:
# Parse the output as status json
try:
self.status_json = json.loads(out)
except json.JSONDecodeError as e:
print("Error '{}' parsing output {}".format(e, out.decode()), file=sys.stderr)
self.tc.assertIsNotNone(self.status_json)
print("Status: ", self.status_json, file=sys.stderr)
else:
# Otherwise redirect the output to the console
print(out.decode(), file=sys.stderr)
class ClientConfigTests(unittest.TestCase):
@ -516,6 +526,171 @@ class ClientConfigSeparateCluster(unittest.TestCase):
self.cluster.tear_down()
# Test client-side tracing
class ClientTracingTests(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.cluster = TestCluster(CURRENT_VERSION)
cls.cluster.setup()
@classmethod
def tearDownClass(cls):
cls.cluster.tear_down()
def test_default_config_normal_case(self):
# Test trace files created with a default trace configuration
# in a normal case
test = self.test
test.create_external_lib_dir([CURRENT_VERSION, PREV_RELEASE_VERSION])
test.api_version = api_version_from_str(PREV_RELEASE_VERSION)
test.disable_local_client = True
self.exec_test()
self.assertEqual(3, len(self.trace_files))
primary_trace = self.find_trace_file(with_ip=True)
self.find_and_check_event(primary_trace, "ClientStart", ["Machine"], [])
cur_ver_trace = self.find_trace_file(with_ip=True, version=CURRENT_VERSION, thread_idx=0)
self.find_and_check_event(cur_ver_trace, "ClientStart", ["Machine"], [])
prev_ver_trace = self.find_trace_file(with_ip=True, version=PREV_RELEASE_VERSION, thread_idx=0)
self.find_and_check_event(prev_ver_trace, "ClientStart", ["Machine"], [])
def test_default_config_error_case(self):
# Test that no trace files are created with a default configuration
# when an a client fails to initialize
test = self.test
test.create_external_lib_dir([CURRENT_VERSION, PREV_RELEASE_VERSION])
test.api_version = api_version_from_str(CURRENT_VERSION)
test.disable_local_client = True
test.expected_error = 2204 # API function missing
self.exec_test()
self.assertEqual(0, len(self.trace_files))
def test_init_on_setup_normal_case(self):
# Test trace files created with trace_initialize_on_setup option
# in a normal case
test = self.test
test.create_external_lib_dir([CURRENT_VERSION])
test.api_version = api_version_from_str(CURRENT_VERSION)
test.disable_local_client = True
test.trace_initialize_on_setup = True
self.exec_test()
self.assertEqual(2, len(self.trace_files))
primary_trace = self.find_trace_file()
# The machine address will be available only in the second ClientStart event
self.find_and_check_event(primary_trace, "ClientStart", [], ["Machine"])
self.find_and_check_event(primary_trace, "ClientStart", ["Machine"], [], seqno=1)
cur_ver_trace = self.find_trace_file(version=CURRENT_VERSION, thread_idx=0)
self.find_and_check_event(cur_ver_trace, "ClientStart", [], ["Machine"])
self.find_and_check_event(cur_ver_trace, "ClientStart", ["Machine"], [], seqno=1)
def test_init_on_setup_trace_error_case(self):
# Test trace files created with trace_initialize_on_setup option
# when an a client fails to initialize
test = self.test
test.create_external_lib_dir([CURRENT_VERSION, PREV_RELEASE_VERSION])
test.api_version = api_version_from_str(CURRENT_VERSION)
test.disable_local_client = True
test.trace_initialize_on_setup = True
test.expected_error = 2204 # API function missing
self.exec_test()
self.assertEqual(1, len(self.trace_files))
primary_trace = self.find_trace_file()
self.find_and_check_event(primary_trace, "ClientStart", [], ["Machine"])
def test_trace_identifier(self):
# Test trace files created with file identifier
test = self.test
test.create_external_lib_dir([CURRENT_VERSION])
test.api_version = api_version_from_str(CURRENT_VERSION)
test.disable_local_client = True
test.trace_file_identifier = "fdbclient"
self.exec_test()
self.assertEqual(2, len(self.trace_files))
self.find_trace_file(with_ip=True, identifier="fdbclient")
self.find_trace_file(with_ip=True, identifier="fdbclient", version=CURRENT_VERSION, thread_idx=0)
def test_init_on_setup_and_trace_identifier(self):
# Test trace files created with trace_initialize_on_setup option
# and file identifier
test = self.test
test.create_external_lib_dir([CURRENT_VERSION])
test.api_version = api_version_from_str(CURRENT_VERSION)
test.disable_local_client = True
test.trace_initialize_on_setup = True
test.trace_file_identifier = "fdbclient"
self.exec_test()
self.assertEqual(2, len(self.trace_files))
self.find_trace_file(identifier="fdbclient")
self.find_trace_file(identifier="fdbclient", version=CURRENT_VERSION, thread_idx=0)
# ---------------
# Helper methods
# ---------------
def setUp(self):
self.test = ClientConfigTest(self)
self.trace_files = None
self.test.trace_format = "json"
def exec_test(self):
self.test.exec()
self.trace_files = self.test.list_trace_files()
if self.test.trace_format == "json":
self.load_trace_file_events()
def load_trace_file_events(self):
self.trace_file_events = {}
for trace in self.trace_files:
events = []
with open(trace, "r") as f:
for line in f:
events.append(json.loads(line))
self.trace_file_events[trace] = events
def find_trace_file(self, with_ip=False, identifier=None, version=None, thread_idx=None):
self.assertIsNotNone(self.trace_files)
for trace_file in self.trace_files:
name = os.path.basename(trace_file)
# trace prefix must be in all files
self.assertTrue(name.startswith("trace."))
pattern = "^trace\."
if with_ip:
pattern += "127\.0\.0\.1\."
else:
pattern += "0\.0\.0\.0\."
if identifier is not None:
pattern += identifier
else:
pattern += "\d+"
if version is not None:
pattern += "_v{}".format(version.replace(".", "_"))
if thread_idx is not None:
pattern += "t{}".format(thread_idx)
pattern += "\.\d+\.\w+\.\d+\.\d+\.{}$".format(self.test.trace_format)
if re.match(pattern, name):
return trace_file
self.fail("No maching trace file found")
def find_and_check_event(self, trace_file, event_type, attr_present, attr_missing, seqno=0):
self.assertTrue(trace_file in self.trace_file_events)
for event in self.trace_file_events[trace_file]:
if event["Type"] == event_type:
if seqno > 0:
seqno -= 1
continue
for attr in attr_present:
self.assertTrue(attr in event)
for attr in attr_missing:
self.assertFalse(attr in event)
return
self.fail("No matching event found")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,

View File

@ -1,7 +1,6 @@
#!/usr/bin/env python3
from argparse import ArgumentParser, RawDescriptionHelpFormatter
from pathlib import Path
import platform
import shutil
import subprocess
import sys
@ -53,7 +52,7 @@ class TestEnv(LocalCluster):
self.downloader.binary_path(version, "fdbcli"),
1,
)
self.set_env_var("LD_LIBRARY_PATH", self.downloader.lib_dir(version))
self.set_env_var("LD_LIBRARY_PATH", "%s:%s" % (self.downloader.lib_dir(version), os.getenv("LD_LIBRARY_PATH")))
client_lib = self.downloader.lib_path(version)
assert client_lib.exists(), "{} does not exist".format(client_lib)
self.client_lib_external = self.tmp_dir.joinpath("libfdb_c_external.so")
@ -91,9 +90,8 @@ class FdbCShimTests:
self.api_test_dir = Path(args.api_test_dir).resolve()
assert self.api_test_dir.exists(), "{} does not exist".format(self.api_test_dir)
self.downloader = FdbBinaryDownloader(args.build_dir)
# binary downloads are currently available only for x86_64
self.platform = platform.machine()
if self.platform == "x86_64":
self.test_prev_versions = not args.disable_prev_version_tests
if self.test_prev_versions:
self.downloader.download_old_binaries(PREV_RELEASE_VERSION)
self.downloader.download_old_binaries("7.0.0")
@ -182,7 +180,8 @@ class FdbCShimTests:
if use_external_lib:
cmd_args = cmd_args + ["--disable-local-client", "--external-client-library", test_env.client_lib_external]
env_vars = os.environ.copy()
env_vars["LD_LIBRARY_PATH"] = self.downloader.lib_dir(version) if set_ld_lib_path else ""
if set_ld_lib_path:
env_vars["LD_LIBRARY_PATH"] = "%s:%s" % (self.downloader.lib_dir(version), os.getenv("LD_LIBRARY_PATH"))
if set_env_path:
env_vars["FDB_LOCAL_CLIENT_LIBRARY_PATH"] = (
"dummy" if invalid_lib_path else self.downloader.lib_path(version)
@ -230,8 +229,7 @@ class FdbCShimTests:
# Test calling a function that exists in the loaded library, but not for the selected API version
self.run_c_shim_lib_tester(CURRENT_VERSION, test_env, call_set_path=True, api_version=700)
# binary downloads are currently available only for x86_64
if self.platform == "x86_64":
if self.test_prev_versions:
# Test the API workload with the release version
self.run_c_api_test(PREV_RELEASE_VERSION, DEFAULT_TEST_FILE)
@ -283,6 +281,12 @@ if __name__ == "__main__":
parser.add_argument(
"--api-test-dir", type=str, help="Path to a directory with api test definitions.", required=True
)
parser.add_argument(
"--disable-prev-version-tests",
action="store_true",
default=False,
help="Disable tests that need binaries of previous versions",
)
args = parser.parse_args()
test = FdbCShimTests(args)
test.run_tests()

View File

@ -87,7 +87,7 @@ void fdb_flow_test() {
g_network = newNet2(TLSConfig());
openTraceFile(NetworkAddress(), 1000000, 1000000, ".");
openTraceFile({}, 1000000, 1000000, ".");
systemMonitor();
uncancellable(recurring(&systemMonitor, 5.0, TaskPriority::FlushTrace));

View File

@ -107,6 +107,11 @@ func (o NetworkOptions) SetTraceShareAmongClientThreads() error {
return o.setOpt(37, nil)
}
// Initialize trace files on network setup, determine the local IP later. Otherwise tracing is initialized when opening the first database.
func (o NetworkOptions) SetTraceInitializeOnSetup() error {
return o.setOpt(38, nil)
}
// Set file suffix for partially written log files.
//
// Parameter: Append this suffix to partially written log files. When a log file is complete, it is renamed to remove the suffix. No separator is added between the file and the suffix. If you want to add a file extension, you should include the separator - e.g. '.tmp' instead of 'tmp' to add the 'tmp' extension.

View File

@ -46,27 +46,17 @@ where `TARGET` can be any of
* aarch64-linux-gnu, aarch64-none-linux-android
* e2k-linux-gnu
Script generates two files: `libxyz.so.tramp.S` and `libxyz.so.init.c` which need to be linked to your application (instead of `-lxyz`):
Script generates two files: `libxyz.so.tramp.S` and `libxyz.so.init.cpp` which need to be linked to your application (instead of `-lxyz`):
```
$ gcc myfile1.c myfile2.c ... libxyz.so.tramp.S libxyz.so.init.c ... -ldl
$ gcc myfile1.c myfile2.c ... libxyz.so.tramp.S libxyz.so.init.cpp ... -ldl
```
Note that you need to link against libdl.so. On ARM in case your app is compiled to Thumb code (which e.g. Ubuntu's `arm-linux-gnueabihf-gcc` does by default) you'll also need to add `-mthumb-interwork`.
Application can then freely call functions from `libxyz.so` _without linking to it_. Library will be loaded (via `dlopen`) on first call to any of its functions. If you want to forcedly resolve all symbols (e.g. if you want to avoid delays further on) you can call `void libxyz_init_all()`.
Above command would perform a _lazy load_ i.e. load library on first call to one of it's symbols. If you want to load it at startup, run
```
$ implib-gen.py --no-lazy-load libxyz.so
```
If you don't want `dlopen` to be called automatically and prefer to load library yourself at program startup, run script as
```
$ implib-gen.py --no-dlopen libxys.so
```
Above command would perform a _lazy load_ i.e. load library on first call to one of it's symbols.
If you do want to load library via `dlopen` but would prefer to call it yourself (e.g. with custom parameters or with modified library name), run script as
@ -100,10 +90,6 @@ $ implib-gen.py --dlopen-callback=mycallback libxyz.so
(callback must have signature `void *(*)(const char *lib_name)` and return handle of loaded library).
Finally to force library load and resolution of all symbols, call
void _LIBNAME_tramp_resolve_all(void);
# Wrapping vtables
By default the tool does not try to wrap vtables exported from the library. This can be enabled via `--vtables` flag:
@ -141,7 +127,7 @@ void *mycallback(const char *lib_name) {
}
$ implib-gen.py --dlopen-callback=mycallback --symbol-list=mysymbols.txt libxyz.so
$ ... # Link your app with libxyz.tramp.S, libxyz.init.c and mycallback.c
$ ... # Link your app with libxyz.tramp.S, libxyz.init.cpp and mycallback.c
```
Similar approach can be used if you want to provide a common interface for several libraries with partially intersecting interfaces (see [this example](tests/multilib/run.sh) for more details).
@ -156,7 +142,7 @@ To achieve this you can generate a wrapper with _renamed_ symbols which call to
$ cat mycallback.c
... Same as before ...
$ implib-gen.py --dlopen-callback=mycallback --symbol_prefix=MYPREFIX_ libxyz.so
$ ... # Link your app with libxyz.tramp.S, libxyz.init.c and mycallback.c
$ ... # Link your app with libxyz.tramp.S, libxyz.init.cpp and mycallback.c
```
# Linker wrapper

View File

@ -11,6 +11,7 @@
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <mutex>
// Sanity check for ARM to avoid puzzling runtime crashes
#ifdef __arm__
@ -31,22 +32,15 @@ extern "C" {
} while(0)
#define CALL_USER_CALLBACK $has_dlopen_callback
#define NO_DLOPEN $no_dlopen
#define LAZY_LOAD $lazy_load
static void *lib_handle;
static int is_lib_loading;
static void *load_library() {
if(lib_handle)
return lib_handle;
is_lib_loading = 1;
// TODO: dlopen and users callback must be protected w/ critical section (to avoid dlopening lib twice)
#if NO_DLOPEN
CHECK(0, "internal error"); // We shouldn't get here
#elif CALL_USER_CALLBACK
#if CALL_USER_CALLBACK
extern void *$dlopen_callback(const char *lib_name);
lib_handle = $dlopen_callback("$load_name");
CHECK(lib_handle, "callback '$dlopen_callback' failed to load library");
@ -55,17 +49,9 @@ static void *load_library() {
CHECK(lib_handle, "failed to load library: %s", dlerror());
#endif
is_lib_loading = 0;
return lib_handle;
}
#if ! NO_DLOPEN && ! LAZY_LOAD
static void __attribute__((constructor)) load_lib() {
load_library();
}
#endif
static void __attribute__((destructor)) unload_lib() {
if(lib_handle)
dlclose(lib_handle);
@ -79,34 +65,35 @@ static const char *const sym_names[] = {
extern void *_${lib_suffix}_tramp_table[];
// Can be sped up by manually parsing library symtab...
void _${lib_suffix}_tramp_resolve(int i) {
assert((unsigned)i + 1 < sizeof(sym_names) / sizeof(sym_names[0]));
// Load library and resolve all symbols
static void load_and_resolve(void) {
static std::mutex load_mutex;
static int is_loaded = false;
CHECK(!is_lib_loading, "library function '%s' called during library load", sym_names[i]);
std::unique_lock<std::mutex> lock(load_mutex);
if (is_loaded)
return;
void *h = 0;
#if NO_DLOPEN
// FIXME: instead of RTLD_NEXT we should search for loaded lib_handle
// as in https://github.com/jethrogb/ssltrace/blob/bf17c150a7/ssltrace.cpp#L74-L112
h = RTLD_NEXT;
#elif LAZY_LOAD
h = load_library();
#else
h = lib_handle;
CHECK(h, "failed to resolve symbol '%s', library failed to load", sym_names[i]);
#endif
// Dlsym is thread-safe so don't need to protect it.
_${lib_suffix}_tramp_table[i] = dlsym(h, sym_names[i]);
CHECK(_${lib_suffix}_tramp_table[i], "failed to resolve symbol '%s'", sym_names[i]);
}
// Helper for user to resolve all symbols
void _${lib_suffix}_tramp_resolve_all(void) {
size_t i;
for(i = 0; i + 1 < sizeof(sym_names) / sizeof(sym_names[0]); ++i)
_${lib_suffix}_tramp_resolve(i);
// Resolving some of the symbols may fail. We ignore it, because if we are loading
// a library of an older version it may lack certain functions
_${lib_suffix}_tramp_table[i] = dlsym(h, sym_names[i]);
is_loaded = true;
}
// The function is called if the table entry for the symbol is not set.
// In that case we load the library and try to resolve all symbols if that was not done yet.
// If the table entry is still missing, then the symbol is not available in the loaded library,
// which is a fatal error on which we immediately exit the process.
void _${lib_suffix}_tramp_resolve(int i) {
assert((unsigned)i + 1 < sizeof(sym_names) / sizeof(sym_names[0]));
load_and_resolve();
CHECK(_${lib_suffix}_tramp_table[i], "failed to resolve symbol '%s'", sym_names[i]);
}
#ifdef __cplusplus

View File

@ -22,532 +22,530 @@ import configparser
me = os.path.basename(__file__)
root = os.path.dirname(__file__)
def warn(msg):
"""Emits a nicely-decorated warning."""
sys.stderr.write(f'{me}: warning: {msg}\n')
"""Emits a nicely-decorated warning."""
sys.stderr.write(f"{me}: warning: {msg}\n")
def error(msg):
"""Emits a nicely-decorated error and exits."""
sys.stderr.write(f'{me}: error: {msg}\n')
sys.exit(1)
"""Emits a nicely-decorated error and exits."""
sys.stderr.write(f"{me}: error: {msg}\n")
sys.exit(1)
def run(args, stdin=""):
"""Runs external program and aborts on error."""
env = os.environ.copy()
# Force English language
env["LC_ALL"] = "c"
try:
del env["LANG"]
except KeyError:
pass
with subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env) as p:
out, err = p.communicate(input=stdin.encode("utf-8"))
out = out.decode("utf-8")
err = err.decode("utf-8")
if p.returncode != 0 or err:
error(f"{args[0]} failed with retcode {p.returncode}:\n{err}")
return out, err
def run(args, stdin=''):
"""Runs external program and aborts on error."""
env = os.environ.copy()
# Force English language
env['LC_ALL'] = 'c'
try:
del env["LANG"]
except KeyError:
pass
with subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, env=env) as p:
out, err = p.communicate(input=stdin.encode('utf-8'))
out = out.decode('utf-8')
err = err.decode('utf-8')
if p.returncode != 0 or err:
error(f"{args[0]} failed with retcode {p.returncode}:\n{err}")
return out, err
def make_toc(words, renames=None):
"Make an mapping of words to their indices in list"
renames = renames or {}
toc = {}
for i, n in enumerate(words):
name = renames.get(n, n)
toc[i] = name
return toc
"Make an mapping of words to their indices in list"
renames = renames or {}
toc = {}
for i, n in enumerate(words):
name = renames.get(n, n)
toc[i] = name
return toc
def parse_row(words, toc, hex_keys):
"Make a mapping from column names to values"
vals = {k: (words[i] if i < len(words) else '') for i, k in toc.items()}
for k in hex_keys:
if vals[k]:
vals[k] = int(vals[k], 16)
return vals
"Make a mapping from column names to values"
vals = {k: (words[i] if i < len(words) else "") for i, k in toc.items()}
for k in hex_keys:
if vals[k]:
vals[k] = int(vals[k], 16)
return vals
def collect_syms(f):
"""Collect ELF dynamic symtab."""
"""Collect ELF dynamic symtab."""
# --dyn-syms does not always work for some reason so dump all symtabs
out, _ = run(['readelf', '-sW', f])
# --dyn-syms does not always work for some reason so dump all symtabs
out, _ = run(["readelf", "-sW", f])
toc = None
syms = []
syms_set = set()
for line in out.splitlines():
line = line.strip()
if not line:
# Next symtab
toc = None
continue
words = re.split(r' +', line)
if line.startswith('Num'): # Header?
if toc is not None:
error("multiple headers in output of readelf")
# Colons are different across readelf versions so get rid of them.
toc = make_toc(map(lambda n: n.replace(':', ''), words))
elif toc is not None:
sym = parse_row(words, toc, ['Value'])
name = sym['Name']
if name in syms_set:
continue
syms_set.add(name)
sym['Size'] = int(sym['Size'], 0) # Readelf is inconistent on Size format
if '@' in name:
sym['Default'] = '@@' in name
name, ver = re.split(r'@+', name)
sym['Name'] = name
sym['Version'] = ver
else:
sym['Default'] = True
sym['Version'] = None
syms.append(sym)
toc = None
syms = []
syms_set = set()
for line in out.splitlines():
line = line.strip()
if not line:
# Next symtab
toc = None
continue
words = re.split(r" +", line)
if line.startswith("Num"): # Header?
if toc is not None:
error("multiple headers in output of readelf")
# Colons are different across readelf versions so get rid of them.
toc = make_toc(map(lambda n: n.replace(":", ""), words))
elif toc is not None:
sym = parse_row(words, toc, ["Value"])
name = sym["Name"]
if name in syms_set:
continue
syms_set.add(name)
sym["Size"] = int(sym["Size"], 0) # Readelf is inconistent on Size format
if "@" in name:
sym["Default"] = "@@" in name
name, ver = re.split(r"@+", name)
sym["Name"] = name
sym["Version"] = ver
else:
sym["Default"] = True
sym["Version"] = None
syms.append(sym)
if toc is None:
error(f"failed to analyze symbols in {f}")
if toc is None:
error(f"failed to analyze symbols in {f}")
# Also collected demangled names
if syms:
out, _ = run(['c++filt'], '\n'.join((sym['Name'] for sym in syms)))
for i, name in enumerate(out.split("\n")):
syms[i]['Demangled Name'] = name
# Also collected demangled names
if syms:
out, _ = run(["c++filt"], "\n".join((sym["Name"] for sym in syms)))
for i, name in enumerate(out.split("\n")):
syms[i]["Demangled Name"] = name
return syms
return syms
def collect_relocs(f):
"""Collect ELF dynamic relocs."""
"""Collect ELF dynamic relocs."""
out, _ = run(['readelf', '-rW', f])
out, _ = run(["readelf", "-rW", f])
toc = None
rels = []
for line in out.splitlines():
line = line.strip()
if not line:
toc = None
continue
if line == 'There are no relocations in this file.':
return []
if re.match(r'^\s*Offset', line): # Header?
if toc is not None:
error("multiple headers in output of readelf")
words = re.split(r'\s\s+', line) # "Symbol's Name + Addend"
toc = make_toc(words)
elif toc is not None:
line = re.sub(r' \+ ', '+', line)
words = re.split(r'\s+', line)
rel = parse_row(words, toc, ['Offset', 'Info'])
rels.append(rel)
# Split symbolic representation
sym_name = 'Symbol\'s Name + Addend'
if sym_name not in rel and 'Symbol\'s Name' in rel:
# Adapt to different versions of readelf
rel[sym_name] = rel['Symbol\'s Name'] + '+0'
if rel[sym_name]:
p = rel[sym_name].split('+')
if len(p) == 1:
p = ['', p[0]]
rel[sym_name] = (p[0], int(p[1], 16))
toc = None
rels = []
for line in out.splitlines():
line = line.strip()
if not line:
toc = None
continue
if line == "There are no relocations in this file.":
return []
if re.match(r"^\s*Offset", line): # Header?
if toc is not None:
error("multiple headers in output of readelf")
words = re.split(r"\s\s+", line) # "Symbol's Name + Addend"
toc = make_toc(words)
elif toc is not None:
line = re.sub(r" \+ ", "+", line)
words = re.split(r"\s+", line)
rel = parse_row(words, toc, ["Offset", "Info"])
rels.append(rel)
# Split symbolic representation
sym_name = "Symbol's Name + Addend"
if sym_name not in rel and "Symbol's Name" in rel:
# Adapt to different versions of readelf
rel[sym_name] = rel["Symbol's Name"] + "+0"
if rel[sym_name]:
p = rel[sym_name].split("+")
if len(p) == 1:
p = ["", p[0]]
rel[sym_name] = (p[0], int(p[1], 16))
if toc is None:
error(f"failed to analyze relocations in {f}")
if toc is None:
error(f"failed to analyze relocations in {f}")
return rels
return rels
def collect_sections(f):
"""Collect section info from ELF."""
"""Collect section info from ELF."""
out, _ = run(['readelf', '-SW', f])
out, _ = run(["readelf", "-SW", f])
toc = None
sections = []
for line in out.splitlines():
line = line.strip()
if not line:
continue
line = re.sub(r'\[\s+', '[', line)
words = re.split(r' +', line)
if line.startswith('[Nr]'): # Header?
if toc is not None:
error("multiple headers in output of readelf")
toc = make_toc(words, {'Addr' : 'Address'})
elif line.startswith('[') and toc is not None:
sec = parse_row(words, toc, ['Address', 'Off', 'Size'])
if 'A' in sec['Flg']: # Allocatable section?
sections.append(sec)
toc = None
sections = []
for line in out.splitlines():
line = line.strip()
if not line:
continue
line = re.sub(r"\[\s+", "[", line)
words = re.split(r" +", line)
if line.startswith("[Nr]"): # Header?
if toc is not None:
error("multiple headers in output of readelf")
toc = make_toc(words, {"Addr": "Address"})
elif line.startswith("[") and toc is not None:
sec = parse_row(words, toc, ["Address", "Off", "Size"])
if "A" in sec["Flg"]: # Allocatable section?
sections.append(sec)
if toc is None:
error(f"failed to analyze sections in {f}")
if toc is None:
error(f"failed to analyze sections in {f}")
return sections
return sections
def read_unrelocated_data(input_name, syms, secs):
"""Collect unrelocated data from ELF."""
data = {}
with open(input_name, 'rb') as f:
def is_symbol_in_section(sym, sec):
sec_end = sec['Address'] + sec['Size']
is_start_in_section = sec['Address'] <= sym['Value'] < sec_end
is_end_in_section = sym['Value'] + sym['Size'] <= sec_end
return is_start_in_section and is_end_in_section
for name, s in sorted(syms.items(), key=lambda s: s[1]['Value']):
# TODO: binary search (bisect)
sec = [sec for sec in secs if is_symbol_in_section(s, sec)]
if len(sec) != 1:
error(f"failed to locate section for interval [{s['Value']:x}, {s['Value'] + s['Size']:x})")
sec = sec[0]
f.seek(sec['Off'])
data[name] = f.read(s['Size'])
return data
"""Collect unrelocated data from ELF."""
data = {}
with open(input_name, "rb") as f:
def is_symbol_in_section(sym, sec):
sec_end = sec["Address"] + sec["Size"]
is_start_in_section = sec["Address"] <= sym["Value"] < sec_end
is_end_in_section = sym["Value"] + sym["Size"] <= sec_end
return is_start_in_section and is_end_in_section
for name, s in sorted(syms.items(), key=lambda s: s[1]["Value"]):
# TODO: binary search (bisect)
sec = [sec for sec in secs if is_symbol_in_section(s, sec)]
if len(sec) != 1:
error(f"failed to locate section for interval [{s['Value']:x}, {s['Value'] + s['Size']:x})")
sec = sec[0]
f.seek(sec["Off"])
data[name] = f.read(s["Size"])
return data
def collect_relocated_data(syms, bites, rels, ptr_size, reloc_types):
"""Identify relocations for each symbol"""
data = {}
for name, s in sorted(syms.items()):
b = bites.get(name)
assert b is not None
if s['Demangled Name'].startswith('typeinfo name'):
data[name] = [('byte', int(x)) for x in b]
continue
data[name] = []
for i in range(0, len(b), ptr_size):
val = int.from_bytes(b[i*ptr_size:(i + 1)*ptr_size], byteorder='little')
data[name].append(('offset', val))
start = s['Value']
finish = start + s['Size']
# TODO: binary search (bisect)
for rel in rels:
if rel['Type'] in reloc_types and start <= rel['Offset'] < finish:
i = (rel['Offset'] - start) // ptr_size
assert i < len(data[name])
data[name][i] = 'reloc', rel
return data
"""Identify relocations for each symbol"""
data = {}
for name, s in sorted(syms.items()):
b = bites.get(name)
assert b is not None
if s["Demangled Name"].startswith("typeinfo name"):
data[name] = [("byte", int(x)) for x in b]
continue
data[name] = []
for i in range(0, len(b), ptr_size):
val = int.from_bytes(b[i * ptr_size : (i + 1) * ptr_size], byteorder="little")
data[name].append(("offset", val))
start = s["Value"]
finish = start + s["Size"]
# TODO: binary search (bisect)
for rel in rels:
if rel["Type"] in reloc_types and start <= rel["Offset"] < finish:
i = (rel["Offset"] - start) // ptr_size
assert i < len(data[name])
data[name][i] = "reloc", rel
return data
def generate_vtables(cls_tables, cls_syms, cls_data):
"""Generate code for vtables"""
c_types = {
'reloc' : 'const void *',
'byte' : 'unsigned char',
'offset' : 'size_t'
}
"""Generate code for vtables"""
c_types = {"reloc": "const void *", "byte": "unsigned char", "offset": "size_t"}
ss = []
ss.append('''\
ss = []
ss.append(
"""\
#ifdef __cplusplus
extern "C" {
#endif
''')
"""
)
# Print externs
# Print externs
printed = set()
for name, data in sorted(cls_data.items()):
for typ, val in data:
if typ != 'reloc':
continue
sym_name, addend = val['Symbol\'s Name + Addend']
sym_name = re.sub(r'@.*', '', sym_name) # Can we pin version in C?
if sym_name not in cls_syms and sym_name not in printed:
ss.append(f'''\
printed = set()
for name, data in sorted(cls_data.items()):
for typ, val in data:
if typ != "reloc":
continue
sym_name, addend = val["Symbol's Name + Addend"]
sym_name = re.sub(r"@.*", "", sym_name) # Can we pin version in C?
if sym_name not in cls_syms and sym_name not in printed:
ss.append(
f"""\
extern const char {sym_name}[];
''')
"""
)
# Collect variable infos
# Collect variable infos
code_info = {}
code_info = {}
for name, s in sorted(cls_syms.items()):
data = cls_data[name]
if s['Demangled Name'].startswith('typeinfo name'):
declarator = 'const unsigned char %s[]'
else:
field_types = (f'{c_types[typ]} field_{i};' for i, (typ, _) in enumerate(data))
declarator = 'const struct { %s } %%s' % ' '.join(field_types) # pylint: disable=C0209 # consider-using-f-string
vals = []
for typ, val in data:
if typ != 'reloc':
vals.append(str(val) + 'UL')
else:
sym_name, addend = val['Symbol\'s Name + Addend']
sym_name = re.sub(r'@.*', '', sym_name) # Can we pin version in C?
vals.append(f'(const char *)&{sym_name} + {addend}')
code_info[name] = (declarator, '{ %s }' % ', '.join(vals)) # pylint: disable= C0209 # consider-using-f-string
for name, s in sorted(cls_syms.items()):
data = cls_data[name]
if s["Demangled Name"].startswith("typeinfo name"):
declarator = "const unsigned char %s[]"
else:
field_types = (f"{c_types[typ]} field_{i};" for i, (typ, _) in enumerate(data))
declarator = "const struct { %s } %%s" % " ".join(
field_types
) # pylint: disable=C0209 # consider-using-f-string
vals = []
for typ, val in data:
if typ != "reloc":
vals.append(str(val) + "UL")
else:
sym_name, addend = val["Symbol's Name + Addend"]
sym_name = re.sub(r"@.*", "", sym_name) # Can we pin version in C?
vals.append(f"(const char *)&{sym_name} + {addend}")
code_info[name] = (declarator, "{ %s }" % ", ".join(vals)) # pylint: disable= C0209 # consider-using-f-string
# Print declarations
# Print declarations
for name, (decl, _) in sorted(code_info.items()):
type_name = name + '_type'
type_decl = decl % type_name
ss.append(f'''\
for name, (decl, _) in sorted(code_info.items()):
type_name = name + "_type"
type_decl = decl % type_name
ss.append(
f"""\
typedef {type_decl};
extern __attribute__((weak)) {type_name} {name};
''')
"""
)
# Print definitions
# Print definitions
for name, (_, init) in sorted(code_info.items()):
type_name = name + '_type'
ss.append(f'''\
for name, (_, init) in sorted(code_info.items()):
type_name = name + "_type"
ss.append(
f"""\
const {type_name} {name} = {init};
''')
"""
)
ss.append('''\
ss.append(
"""\
#ifdef __cplusplus
} // extern "C"
#endif
''')
"""
)
return "".join(ss)
return ''.join(ss)
def main():
"""Driver function"""
parser = argparse.ArgumentParser(description="Generate wrappers for shared library functions.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=f"""\
"""Driver function"""
parser = argparse.ArgumentParser(
description="Generate wrappers for shared library functions.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=f"""\
Examples:
$ python3 {me} /usr/lib/x86_64-linux-gnu/libaccountsservice.so.0
Generating libaccountsservice.so.0.tramp.S...
Generating libaccountsservice.so.0.init.c...
""")
Generating libaccountsservice.so.0.init.cpp...
""",
)
parser.add_argument('library',
metavar='LIB',
help="Library to be wrapped.")
parser.add_argument('--verbose', '-v',
help="Print diagnostic info",
action='count',
default=0)
parser.add_argument('--dlopen-callback',
help="Call user-provided custom callback to load library instead of dlopen",
default='')
parser.add_argument('--dlopen',
help="Emit dlopen call (default)",
dest='dlopen', action='store_true', default=True)
parser.add_argument('--no-dlopen',
help="Do not emit dlopen call (user must load library himself)",
dest='dlopen', action='store_false')
parser.add_argument('--library-load-name',
help="Use custom name for dlopened library (default is LIB)")
parser.add_argument('--lazy-load',
help="Load library lazily on first call to one of it's functions (default)",
dest='lazy_load', action='store_true', default=True)
parser.add_argument('--no-lazy-load',
help="Load library eagerly at program start",
dest='lazy_load', action='store_false')
parser.add_argument('--vtables',
help="Intercept virtual tables (EXPERIMENTAL)",
dest='vtables', action='store_true', default=False)
parser.add_argument('--no-vtables',
help="Do not intercept virtual tables (default)",
dest='vtables', action='store_false')
parser.add_argument('--target',
help="Target platform triple e.g. x86_64-unknown-linux-gnu or arm-none-eabi "
"(atm x86_64, i[0-9]86, arm/armhf/armeabi, aarch64/armv8 "
"and e2k are supported)",
default=os.uname()[-1])
parser.add_argument('--symbol-list',
help="Path to file with symbols that should be present in wrapper "
"(all by default)")
parser.add_argument('--symbol-prefix',
metavar='PFX',
help="Prefix wrapper symbols with PFX",
default='')
parser.add_argument('-q', '--quiet',
help="Do not print progress info",
action='store_true')
parser.add_argument('--outdir', '-o',
help="Path to create wrapper at",
default='./')
parser.add_argument("library", metavar="LIB", help="Library to be wrapped.")
parser.add_argument("--verbose", "-v", help="Print diagnostic info", action="count", default=0)
parser.add_argument(
"--dlopen-callback", help="Call user-provided custom callback to load library instead of dlopen", default=""
)
parser.add_argument("--library-load-name", help="Use custom name for dlopened library (default is LIB)")
parser.add_argument(
"--vtables", help="Intercept virtual tables (EXPERIMENTAL)", dest="vtables", action="store_true", default=False
)
parser.add_argument(
"--no-vtables", help="Do not intercept virtual tables (default)", dest="vtables", action="store_false"
)
parser.add_argument(
"--target",
help="Target platform triple e.g. x86_64-unknown-linux-gnu or arm-none-eabi "
"(atm x86_64, i[0-9]86, arm/armhf/armeabi, aarch64/armv8 "
"and e2k are supported)",
default=os.uname()[-1],
)
parser.add_argument(
"--symbol-list", help="Path to file with symbols that should be present in wrapper " "(all by default)"
)
parser.add_argument("--symbol-prefix", metavar="PFX", help="Prefix wrapper symbols with PFX", default="")
parser.add_argument("-q", "--quiet", help="Do not print progress info", action="store_true")
parser.add_argument("--outdir", "-o", help="Path to create wrapper at", default="./")
args = parser.parse_args()
args = parser.parse_args()
input_name = args.library
verbose = args.verbose
dlopen_callback = args.dlopen_callback
dlopen = args.dlopen
lazy_load = args.lazy_load
load_name = args.library_load_name or os.path.basename(input_name)
if args.target.startswith('arm'):
target = 'arm' # Handle armhf-..., armel-...
elif re.match(r'^i[0-9]86', args.target):
target = 'i386'
else:
target = args.target.split('-')[0]
quiet = args.quiet
outdir = args.outdir
input_name = args.library
verbose = args.verbose
dlopen_callback = args.dlopen_callback
load_name = args.library_load_name or os.path.basename(input_name)
if args.target.startswith("arm"):
target = "arm" # Handle armhf-..., armel-...
elif re.match(r"^i[0-9]86", args.target):
target = "i386"
else:
target = args.target.split("-")[0]
quiet = args.quiet
outdir = args.outdir
if args.symbol_list is None:
funs = None
else:
with open(args.symbol_list, 'r') as f:
funs = []
for line in re.split(r'\r?\n', f.read()):
line = re.sub(r'#.*', '', line)
line = line.strip()
if line:
funs.append(line)
if args.symbol_list is None:
funs = None
else:
with open(args.symbol_list, "r") as f:
funs = []
for line in re.split(r"\r?\n", f.read()):
line = re.sub(r"#.*", "", line)
line = line.strip()
if line:
funs.append(line)
# Collect target info
# Collect target info
target_dir = os.path.join(root, 'arch', target)
target_dir = os.path.join(root, "arch", target)
if not os.path.exists(target_dir):
error(f"unknown architecture '{target}'")
if not os.path.exists(target_dir):
error(f"unknown architecture '{target}'")
cfg = configparser.ConfigParser(inline_comment_prefixes=';')
cfg.read(target_dir + '/config.ini')
cfg = configparser.ConfigParser(inline_comment_prefixes=";")
cfg.read(target_dir + "/config.ini")
ptr_size = int(cfg['Arch']['PointerSize'])
symbol_reloc_types = set(re.split(r'\s*,\s*', cfg['Arch']['SymbolReloc']))
ptr_size = int(cfg["Arch"]["PointerSize"])
symbol_reloc_types = set(re.split(r"\s*,\s*", cfg["Arch"]["SymbolReloc"]))
def is_exported(s):
return (s['Bind'] != 'LOCAL'
and s['Type'] != 'NOTYPE'
and s['Ndx'] != 'UND'
and s['Name'] not in ['', '_init', '_fini'])
def is_exported(s):
return (
s["Bind"] != "LOCAL"
and s["Type"] != "NOTYPE"
and s["Ndx"] != "UND"
and s["Name"] not in ["", "_init", "_fini"]
)
syms = list(filter(is_exported, collect_syms(input_name)))
syms = list(filter(is_exported, collect_syms(input_name)))
def is_data_symbol(s):
return (s['Type'] == 'OBJECT'
def is_data_symbol(s):
return (
s["Type"] == "OBJECT"
# Allow vtables if --vtables is on
and not (' for ' in s['Demangled Name'] and args.vtables))
and not (" for " in s["Demangled Name"] and args.vtables)
)
exported_data = [s['Name'] for s in syms if is_data_symbol(s)]
if exported_data:
# TODO: we can generate wrappers for const data without relocations (or only code relocations)
warn(f"library '{input_name}' contains data symbols which won't be intercepted: "
+ ', '.join(exported_data))
exported_data = [s["Name"] for s in syms if is_data_symbol(s)]
if exported_data:
# TODO: we can generate wrappers for const data without relocations (or only code relocations)
warn(f"library '{input_name}' contains data symbols which won't be intercepted: " + ", ".join(exported_data))
# Collect functions
# TODO: warn if user-specified functions are missing
# Collect functions
# TODO: warn if user-specified functions are missing
orig_funs = filter(lambda s: s['Type'] == 'FUNC', syms)
orig_funs = filter(lambda s: s["Type"] == "FUNC", syms)
all_funs = set()
warn_versioned = False
for s in orig_funs:
if s['Version'] is not None:
# TODO: support versions
if not warn_versioned:
warn(f"library {input_name} contains versioned symbols which are NYI")
warn_versioned = True
if verbose:
print(f"Skipping versioned symbol {s['Name']}")
continue
all_funs.add(s['Name'])
all_funs = set()
warn_versioned = False
for s in orig_funs:
if s["Version"] is not None:
# TODO: support versions
if not warn_versioned:
warn(f"library {input_name} contains versioned symbols which are NYI")
warn_versioned = True
if verbose:
print(f"Skipping versioned symbol {s['Name']}")
continue
all_funs.add(s["Name"])
if funs is None:
funs = sorted(list(all_funs))
if not funs and not quiet:
warn(f"no public functions were found in {input_name}")
else:
missing_funs = [name for name in funs if name not in all_funs]
if missing_funs:
warn("some user-specified functions are not present in library: " + ', '.join(missing_funs))
funs = [name for name in funs if name in all_funs]
if verbose:
print("Exported functions:")
for i, fun in enumerate(funs):
print(f" {i}: {fun}")
# Collect vtables
if args.vtables:
cls_tables = {}
cls_syms = {}
for s in syms:
m = re.match(r'^(vtable|typeinfo|typeinfo name) for (.*)', s['Demangled Name'])
if m is not None and is_exported(s):
typ, cls = m.groups()
name = s['Name']
cls_tables.setdefault(cls, {})[typ] = name
cls_syms[name] = s
if funs is None:
funs = sorted(list(all_funs))
if not funs and not quiet:
warn(f"no public functions were found in {input_name}")
else:
missing_funs = [name for name in funs if name not in all_funs]
if missing_funs:
warn("some user-specified functions are not present in library: " + ", ".join(missing_funs))
funs = [name for name in funs if name in all_funs]
if verbose:
print("Exported classes:")
for cls, _ in sorted(cls_tables.items()):
print(f" {cls}")
print("Exported functions:")
for i, fun in enumerate(funs):
print(f" {i}: {fun}")
secs = collect_sections(input_name)
if verbose:
print("Sections:")
for sec in secs:
print(f" {sec['Name']}: [{sec['Address']:x}, {sec['Address'] + sec['Size']:x}), "
f"at {sec['Off']:x}")
# Collect vtables
bites = read_unrelocated_data(input_name, cls_syms, secs)
rels = collect_relocs(input_name)
if verbose:
print("Relocs:")
for rel in rels:
sym_add = rel['Symbol\'s Name + Addend']
print(f" {rel['Offset']}: {sym_add}")
cls_data = collect_relocated_data(cls_syms, bites, rels, ptr_size, symbol_reloc_types)
if verbose:
print("Class data:")
for name, data in sorted(cls_data.items()):
demangled_name = cls_syms[name]['Demangled Name']
print(f" {name} ({demangled_name}):")
for typ, val in data:
print(" " + str(val if typ != 'reloc' else val['Symbol\'s Name + Addend']))
# Generate assembly code
suffix = os.path.basename(load_name)
lib_suffix = re.sub(r'[^a-zA-Z_0-9]+', '_', suffix)
tramp_file = f'{suffix}.tramp.S'
with open(os.path.join(outdir, tramp_file), 'w') as f:
if not quiet:
print(f"Generating {tramp_file}...")
with open(target_dir + '/table.S.tpl', 'r') as t:
table_text = string.Template(t.read()).substitute(
lib_suffix=lib_suffix,
table_size=ptr_size*(len(funs) + 1))
f.write(table_text)
with open(target_dir + '/trampoline.S.tpl', 'r') as t:
tramp_tpl = string.Template(t.read())
for i, name in enumerate(funs):
tramp_text = tramp_tpl.substitute(
lib_suffix=lib_suffix,
sym=args.symbol_prefix + name,
offset=i*ptr_size,
number=i)
f.write(tramp_text)
# Generate C code
init_file = f'{suffix}.init.c'
with open(os.path.join(outdir, init_file), 'w') as f:
if not quiet:
print(f"Generating {init_file}...")
with open(os.path.join(root, 'arch/common/init.c.tpl'), 'r') as t:
if funs:
sym_names = ',\n '.join(f'"{name}"' for name in funs) + ','
else:
sym_names = ''
init_text = string.Template(t.read()).substitute(
lib_suffix=lib_suffix,
load_name=load_name,
dlopen_callback=dlopen_callback,
has_dlopen_callback=int(bool(dlopen_callback)),
no_dlopen=not int(dlopen),
lazy_load=int(lazy_load),
sym_names=sym_names)
f.write(init_text)
if args.vtables:
vtable_text = generate_vtables(cls_tables, cls_syms, cls_data)
f.write(vtable_text)
cls_tables = {}
cls_syms = {}
if __name__ == '__main__':
main()
for s in syms:
m = re.match(r"^(vtable|typeinfo|typeinfo name) for (.*)", s["Demangled Name"])
if m is not None and is_exported(s):
typ, cls = m.groups()
name = s["Name"]
cls_tables.setdefault(cls, {})[typ] = name
cls_syms[name] = s
if verbose:
print("Exported classes:")
for cls, _ in sorted(cls_tables.items()):
print(f" {cls}")
secs = collect_sections(input_name)
if verbose:
print("Sections:")
for sec in secs:
print(f" {sec['Name']}: [{sec['Address']:x}, {sec['Address'] + sec['Size']:x}), " f"at {sec['Off']:x}")
bites = read_unrelocated_data(input_name, cls_syms, secs)
rels = collect_relocs(input_name)
if verbose:
print("Relocs:")
for rel in rels:
sym_add = rel["Symbol's Name + Addend"]
print(f" {rel['Offset']}: {sym_add}")
cls_data = collect_relocated_data(cls_syms, bites, rels, ptr_size, symbol_reloc_types)
if verbose:
print("Class data:")
for name, data in sorted(cls_data.items()):
demangled_name = cls_syms[name]["Demangled Name"]
print(f" {name} ({demangled_name}):")
for typ, val in data:
print(" " + str(val if typ != "reloc" else val["Symbol's Name + Addend"]))
# Generate assembly code
suffix = os.path.basename(load_name)
lib_suffix = re.sub(r"[^a-zA-Z_0-9]+", "_", suffix)
tramp_file = f"{suffix}.tramp.S"
with open(os.path.join(outdir, tramp_file), "w") as f:
if not quiet:
print(f"Generating {tramp_file}...")
with open(target_dir + "/table.S.tpl", "r") as t:
table_text = string.Template(t.read()).substitute(
lib_suffix=lib_suffix, table_size=ptr_size * (len(funs) + 1)
)
f.write(table_text)
with open(target_dir + "/trampoline.S.tpl", "r") as t:
tramp_tpl = string.Template(t.read())
for i, name in enumerate(funs):
tramp_text = tramp_tpl.substitute(
lib_suffix=lib_suffix, sym=args.symbol_prefix + name, offset=i * ptr_size, number=i
)
f.write(tramp_text)
# Generate C code
init_file = f"{suffix}.init.cpp"
with open(os.path.join(outdir, init_file), "w") as f:
if not quiet:
print(f"Generating {init_file}...")
with open(os.path.join(root, "arch/common/init.cpp.tpl"), "r") as t:
if funs:
sym_names = ",\n ".join(f'"{name}"' for name in funs) + ","
else:
sym_names = ""
init_text = string.Template(t.read()).substitute(
lib_suffix=lib_suffix,
load_name=load_name,
dlopen_callback=dlopen_callback,
has_dlopen_callback=int(bool(dlopen_callback)),
sym_names=sym_names,
)
f.write(init_text)
if args.vtables:
vtable_text = generate_vtables(cls_tables, cls_syms, cls_data)
f.write(vtable_text)
if __name__ == "__main__":
main()

View File

@ -60,7 +60,7 @@ class StatFetcher:
class TestPicker:
def __init__(self, test_dir: Path):
if not test_dir.exists():
raise RuntimeError('{} is neither a directory nor a file'.format(test_dir))
raise RuntimeError("{} is neither a directory nor a file".format(test_dir))
self.include_files_regex = re.compile(config.include_test_files)
self.exclude_files_regex = re.compile(config.exclude_test_files)
self.include_tests_regex = re.compile(config.include_test_classes)
@ -78,6 +78,7 @@ class TestPicker:
self.stat_fetcher = StatFetcher(self.tests)
else:
from test_harness.fdb import FDBStatFetcher
self.stat_fetcher = FDBStatFetcher(self.tests)
if config.stats is not None:
self.load_stats(config.stats)
@ -106,50 +107,60 @@ class TestPicker:
break
assert test_name is not None and test_desc is not None
self.stat_fetcher.add_run_time(test_name, run_time, out)
out.attributes['TotalTestTime'] = str(test_desc.total_runtime)
out.attributes['TestRunCount'] = str(test_desc.num_runs)
out.attributes["TotalTestTime"] = str(test_desc.total_runtime)
out.attributes["TestRunCount"] = str(test_desc.num_runs)
def dump_stats(self) -> str:
res = array.array('I')
res = array.array("I")
for _, spec in self.tests.items():
res.append(spec.total_runtime)
return base64.standard_b64encode(res.tobytes()).decode('utf-8')
return base64.standard_b64encode(res.tobytes()).decode("utf-8")
def fetch_stats(self):
self.stat_fetcher.read_stats()
def load_stats(self, serialized: str):
times = array.array('I')
times = array.array("I")
times.frombytes(base64.standard_b64decode(serialized))
assert len(times) == len(self.tests.items())
for idx, (_, spec) in enumerate(self.tests.items()):
spec.total_runtime = times[idx]
def parse_txt(self, path: Path):
if self.include_files_regex.search(str(path)) is None or self.exclude_files_regex.search(str(path)) is not None:
if (
self.include_files_regex.search(str(path)) is None
or self.exclude_files_regex.search(str(path)) is not None
):
return
with path.open('r') as f:
with path.open("r") as f:
test_name: str | None = None
test_class: str | None = None
priority: float | None = None
for line in f:
line = line.strip()
kv = line.split('=')
kv = line.split("=")
if len(kv) != 2:
continue
kv[0] = kv[0].strip()
kv[1] = kv[1].strip(' \r\n\t\'"')
if kv[0] == 'testTitle' and test_name is None:
kv[1] = kv[1].strip(" \r\n\t'\"")
if kv[0] == "testTitle" and test_name is None:
test_name = kv[1]
if kv[0] == 'testClass' and test_class is None:
if kv[0] == "testClass" and test_class is None:
test_class = kv[1]
if kv[0] == 'testPriority' and priority is None:
if kv[0] == "testPriority" and priority is None:
try:
priority = float(kv[1])
except ValueError:
raise RuntimeError("Can't parse {} -- testPriority in {} should be set to a float".format(kv[1],
path))
if test_name is not None and test_class is not None and priority is not None:
raise RuntimeError(
"Can't parse {} -- testPriority in {} should be set to a float".format(
kv[1], path
)
)
if (
test_name is not None
and test_class is not None
and priority is not None
):
break
if test_name is None:
return
@ -157,8 +168,10 @@ class TestPicker:
test_class = test_name
if priority is None:
priority = 1.0
if self.include_tests_regex.search(test_class) is None \
or self.exclude_tests_regex.search(test_class) is not None:
if (
self.include_tests_regex.search(test_class) is None
or self.exclude_tests_regex.search(test_class) is not None
):
return
if test_class not in self.tests:
self.tests[test_class] = TestDescription(path, test_class, priority)
@ -173,12 +186,12 @@ class TestPicker:
# check whether we're looking at a restart test
if self.follow_test.match(test.name) is not None:
return
if test.suffix == '.txt' or test.suffix == '.toml':
if test.suffix == ".txt" or test.suffix == ".toml":
self.parse_txt(test)
@staticmethod
def list_restart_files(start_file: Path) -> List[Path]:
name = re.sub(r'-\d+.(txt|toml)', '', start_file.name)
name = re.sub(r"-\d+.(txt|toml)", "", start_file.name)
res: List[Path] = []
for test_file in start_file.parent.iterdir():
if test_file.name.startswith(name):
@ -209,12 +222,12 @@ class TestPicker:
class OldBinaries:
def __init__(self):
self.first_file_expr = re.compile(r'.*-1\.(txt|toml)')
self.first_file_expr = re.compile(r".*-1\.(txt|toml)")
self.old_binaries_path: Path = config.old_binaries_path
self.binaries: OrderedDict[Version, Path] = collections.OrderedDict()
if not self.old_binaries_path.exists() or not self.old_binaries_path.is_dir():
return
exec_pattern = re.compile(r'fdbserver-\d+\.\d+\.\d+(\.exe)?')
exec_pattern = re.compile(r"fdbserver-\d+\.\d+\.\d+(\.exe)?")
for file in self.old_binaries_path.iterdir():
if not file.is_file() or not os.access(file, os.X_OK):
continue
@ -222,9 +235,9 @@ class OldBinaries:
self._add_file(file)
def _add_file(self, file: Path):
version_str = file.name.split('-')[1]
if version_str.endswith('.exe'):
version_str = version_str[0:-len('.exe')]
version_str = file.name.split("-")[1]
if version_str.endswith(".exe"):
version_str = version_str[0 : -len(".exe")]
ver = Version.parse(version_str)
self.binaries[ver] = file
@ -232,21 +245,21 @@ class OldBinaries:
if len(self.binaries) == 0:
return config.binary
max_version = Version.max_version()
min_version = Version.parse('5.0.0')
min_version = Version.parse("5.0.0")
dirs = test_file.parent.parts
if 'restarting' not in dirs:
if "restarting" not in dirs:
return config.binary
version_expr = dirs[-1].split('_')
version_expr = dirs[-1].split("_")
first_file = self.first_file_expr.match(test_file.name) is not None
if first_file and version_expr[0] == 'to':
if first_file and version_expr[0] == "to":
# downgrade test -- first binary should be current one
return config.binary
if not first_file and version_expr[0] == 'from':
if not first_file and version_expr[0] == "from":
# upgrade test -- we only return an old version for the first test file
return config.binary
if version_expr[0] == 'from' or version_expr[0] == 'to':
if version_expr[0] == "from" or version_expr[0] == "to":
min_version = Version.parse(version_expr[1])
if len(version_expr) == 4 and version_expr[2] == 'until':
if len(version_expr) == 4 and version_expr[2] == "until":
max_version = Version.parse(version_expr[3])
candidates: List[Path] = []
for ver, binary in self.binaries.items():
@ -259,13 +272,13 @@ class OldBinaries:
def is_restarting_test(test_file: Path):
for p in test_file.parts:
if p == 'restarting':
if p == "restarting":
return True
return False
def is_no_sim(test_file: Path):
return test_file.parts[-2] == 'noSim'
return test_file.parts[-2] == "noSim"
class ResourceMonitor(threading.Thread):
@ -291,9 +304,19 @@ class ResourceMonitor(threading.Thread):
class TestRun:
def __init__(self, binary: Path, test_file: Path, random_seed: int, uid: uuid.UUID,
restarting: bool = False, test_determinism: bool = False, buggify_enabled: bool = False,
stats: str | None = None, expected_unseed: int | None = None, will_restart: bool = False):
def __init__(
self,
binary: Path,
test_file: Path,
random_seed: int,
uid: uuid.UUID,
restarting: bool = False,
test_determinism: bool = False,
buggify_enabled: bool = False,
stats: str | None = None,
expected_unseed: int | None = None,
will_restart: bool = False,
):
self.binary = binary
self.test_file = test_file
self.random_seed = random_seed
@ -313,23 +336,31 @@ class TestRun:
self.temp_path = config.run_dir / str(self.uid)
# state for the run
self.retryable_error: bool = False
self.summary: Summary = Summary(binary, uid=self.uid, stats=self.stats, expected_unseed=self.expected_unseed,
will_restart=will_restart, long_running=config.long_running)
self.summary: Summary = Summary(
binary,
uid=self.uid,
stats=self.stats,
expected_unseed=self.expected_unseed,
will_restart=will_restart,
long_running=config.long_running,
)
self.run_time: int = 0
self.success = self.run()
def log_test_plan(self, out: SummaryTree):
test_plan: SummaryTree = SummaryTree('TestPlan')
test_plan.attributes['TestUID'] = str(self.uid)
test_plan.attributes['RandomSeed'] = str(self.random_seed)
test_plan.attributes['TestFile'] = str(self.test_file)
test_plan.attributes['Buggify'] = '1' if self.buggify_enabled else '0'
test_plan.attributes['FaultInjectionEnabled'] = '1' if self.fault_injection_enabled else '0'
test_plan.attributes['DeterminismCheck'] = '1' if self.test_determinism else '0'
test_plan: SummaryTree = SummaryTree("TestPlan")
test_plan.attributes["TestUID"] = str(self.uid)
test_plan.attributes["RandomSeed"] = str(self.random_seed)
test_plan.attributes["TestFile"] = str(self.test_file)
test_plan.attributes["Buggify"] = "1" if self.buggify_enabled else "0"
test_plan.attributes["FaultInjectionEnabled"] = (
"1" if self.fault_injection_enabled else "0"
)
test_plan.attributes["DeterminismCheck"] = "1" if self.test_determinism else "0"
out.append(test_plan)
def delete_simdir(self):
shutil.rmtree(self.temp_path / Path('simfdb'))
shutil.rmtree(self.temp_path / Path("simfdb"))
def run(self):
command: List[str] = []
@ -341,47 +372,68 @@ class TestRun:
# the test take longer. Also old binaries weren't built with
# USE_VALGRIND=ON, and we have seen false positives with valgrind in
# such binaries.
command.append('valgrind')
valgrind_file = self.temp_path / Path('valgrind-{}.xml'.format(self.random_seed))
dbg_path = os.getenv('FDB_VALGRIND_DBGPATH')
command.append("valgrind")
valgrind_file = self.temp_path / Path(
"valgrind-{}.xml".format(self.random_seed)
)
dbg_path = os.getenv("FDB_VALGRIND_DBGPATH")
if dbg_path is not None:
command.append('--extra-debuginfo-path={}'.format(dbg_path))
command += ['--xml=yes', '--xml-file={}'.format(valgrind_file.absolute()), '-q']
command += [str(self.binary.absolute()),
'-r', 'test' if is_no_sim(self.test_file) else 'simulation',
'-f', str(self.test_file),
'-s', str(self.random_seed)]
command.append("--extra-debuginfo-path={}".format(dbg_path))
command += [
"--xml=yes",
"--xml-file={}".format(valgrind_file.absolute()),
"-q",
]
command += [
str(self.binary.absolute()),
"-r",
"test" if is_no_sim(self.test_file) else "simulation",
"-f",
str(self.test_file),
"-s",
str(self.random_seed),
]
if self.trace_format is not None:
command += ['--trace_format', self.trace_format]
command += ["--trace_format", self.trace_format]
if self.use_tls_plugin:
command += ['--tls_plugin', str(config.tls_plugin_path)]
command += ["--tls_plugin", str(config.tls_plugin_path)]
env["FDB_TLS_PLUGIN"] = str(config.tls_plugin_path)
if config.disable_kaio:
command += ['--knob-disable-posix-kernel-aio=1']
if Version.of_binary(self.binary) >= '7.1.0':
command += ['-fi', 'on' if self.fault_injection_enabled else 'off']
command += ["--knob-disable-posix-kernel-aio=1"]
if Version.of_binary(self.binary) >= "7.1.0":
command += ["-fi", "on" if self.fault_injection_enabled else "off"]
if self.restarting:
command.append('--restarting')
command.append("--restarting")
if self.buggify_enabled:
command += ['-b', 'on']
command += ["-b", "on"]
if config.crash_on_error:
command.append('--crash')
command.append("--crash")
if config.long_running:
# disable simulation speedup
command += ['--knob-sim-speedup-after-seconds=36000']
command += ["--knob-sim-speedup-after-seconds=36000"]
# disable traceTooManyLines Error MAX_TRACE_LINES
command += ['--knob-max-trace-lines=1000000000']
command += ["--knob-max-trace-lines=1000000000"]
self.temp_path.mkdir(parents=True, exist_ok=True)
# self.log_test_plan(out)
resources = ResourceMonitor()
resources.start()
process = subprocess.Popen(command, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, cwd=self.temp_path,
text=True, env=env)
process = subprocess.Popen(
command,
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
cwd=self.temp_path,
text=True,
env=env,
)
did_kill = False
# No timeout for long running tests
timeout = 20 * config.kill_seconds if self.use_valgrind else (None if config.long_running else config.kill_seconds)
timeout = (
20 * config.kill_seconds
if self.use_valgrind
else (None if config.long_running else config.kill_seconds)
)
err_out: str
try:
_, err_out = process.communicate(timeout=timeout)
@ -398,7 +450,7 @@ class TestRun:
self.summary.was_killed = did_kill
self.summary.valgrind_out_file = valgrind_file
self.summary.error_out = err_out
self.summary.summarize(self.temp_path, ' '.join(command))
self.summary.summarize(self.temp_path, " ".join(command))
return self.summary.ok()
@ -407,18 +459,18 @@ def decorate_summary(out: SummaryTree, test_file: Path, seed: int, buggify: bool
tests are then hard to reproduce (they can be reproduced through TestHarness but
require the user to run in the joshua docker container). To account for this we
will write the necessary information into the attributes if it is missing."""
if 'TestFile' not in out.attributes:
out.attributes['TestFile'] = str(test_file)
if 'RandomSeed' not in out.attributes:
out.attributes['RandomSeed'] = str(seed)
if 'BuggifyEnabled' not in out.attributes:
out.attributes['BuggifyEnabled'] = '1' if buggify else '0'
if "TestFile" not in out.attributes:
out.attributes["TestFile"] = str(test_file)
if "RandomSeed" not in out.attributes:
out.attributes["RandomSeed"] = str(seed)
if "BuggifyEnabled" not in out.attributes:
out.attributes["BuggifyEnabled"] = "1" if buggify else "0"
class TestRunner:
def __init__(self):
self.uid = uuid.uuid4()
self.test_path: Path = Path('tests')
self.test_path: Path = Path("tests")
self.cluster_file: str | None = None
self.fdb_app_dir: str | None = None
self.binary_chooser = OldBinaries()
@ -426,32 +478,43 @@ class TestRunner:
def backup_sim_dir(self, seed: int):
temp_dir = config.run_dir / str(self.uid)
src_dir = temp_dir / 'simfdb'
src_dir = temp_dir / "simfdb"
assert src_dir.is_dir()
dest_dir = temp_dir / 'simfdb.{}'.format(seed)
dest_dir = temp_dir / "simfdb.{}".format(seed)
assert not dest_dir.exists()
shutil.copytree(src_dir, dest_dir)
def restore_sim_dir(self, seed: int):
temp_dir = config.run_dir / str(self.uid)
src_dir = temp_dir / 'simfdb.{}'.format(seed)
src_dir = temp_dir / "simfdb.{}".format(seed)
assert src_dir.exists()
dest_dir = temp_dir / 'simfdb'
dest_dir = temp_dir / "simfdb"
shutil.rmtree(dest_dir)
shutil.move(src_dir, dest_dir)
def run_tests(self, test_files: List[Path], seed: int, test_picker: TestPicker) -> bool:
def run_tests(
self, test_files: List[Path], seed: int, test_picker: TestPicker
) -> bool:
result: bool = True
for count, file in enumerate(test_files):
will_restart = count + 1 < len(test_files)
binary = self.binary_chooser.choose_binary(file)
unseed_check = not is_no_sim(file) and config.random.random() < config.unseed_check_ratio
unseed_check = (
not is_no_sim(file)
and config.random.random() < config.unseed_check_ratio
)
buggify_enabled: bool = config.random.random() < config.buggify_on_ratio
if unseed_check and count != 0:
# for restarting tests we will need to restore the sim2 after the first run
self.backup_sim_dir(seed + count - 1)
run = TestRun(binary, file.absolute(), seed + count, self.uid, restarting=count != 0,
stats=test_picker.dump_stats(), will_restart=will_restart, buggify_enabled=buggify_enabled)
# FIXME: support unseed checks for restarting tests
run = TestRun(
binary,
file.absolute(),
seed + count,
self.uid,
restarting=count != 0,
stats=test_picker.dump_stats(),
will_restart=will_restart,
buggify_enabled=buggify_enabled,
)
result = result and run.success
test_picker.add_time(test_files[0], run.run_time, run.summary.out)
decorate_summary(run.summary.out, file, seed + count, run.buggify_enabled)
@ -460,14 +523,22 @@ class TestRunner:
run.summary.out.dump(sys.stdout)
if not result:
return False
if unseed_check and run.summary.unseed is not None:
if count != 0:
self.restore_sim_dir(seed + count - 1)
run2 = TestRun(binary, file.absolute(), seed + count, self.uid, restarting=count != 0,
stats=test_picker.dump_stats(), expected_unseed=run.summary.unseed,
will_restart=will_restart, buggify_enabled=buggify_enabled)
if count == 0 and unseed_check and run.summary.unseed is not None:
run2 = TestRun(
binary,
file.absolute(),
seed + count,
self.uid,
restarting=count != 0,
stats=test_picker.dump_stats(),
expected_unseed=run.summary.unseed,
will_restart=will_restart,
buggify_enabled=buggify_enabled,
)
test_picker.add_time(file, run2.run_time, run.summary.out)
decorate_summary(run2.summary.out, file, seed + count, run.buggify_enabled)
decorate_summary(
run2.summary.out, file, seed + count, run.buggify_enabled
)
run2.summary.out.dump(sys.stdout)
result = result and run2.success
if not result:
@ -475,7 +546,11 @@ class TestRunner:
return result
def run(self) -> bool:
seed = config.random_seed if config.random_seed is not None else config.random.randint(0, 2 ** 32 - 1)
seed = (
config.random_seed
if config.random_seed is not None
else config.random.randint(0, 2**32 - 1)
)
test_files = self.test_picker.choose_test()
success = self.run_tests(test_files, seed, self.test_picker)
if config.clean_up:

View File

@ -131,12 +131,35 @@ The default is ``disabled``, which means changing the storage engine will not be
``aggressive`` tries to replace as many storages as it can at once, and will recruit a new storage server on the same process as the old one. This will be faster, but can potentially hit degraded performance or OOM with two storages on the same process. The main benefit over ``gradual`` is that this doesn't need to take one storage out of rotation, so it works for small or development clusters that have the same number of storage processes as the replication factor. Note that ``aggressive`` is not exclusive to running the perpetual wiggle.
``disabled`` means that if the storage engine is changed, fdb will not move the cluster over to the new storage engine. This will disable the perpetual wiggle from rewriting storage files.
consistencyscan
----------------
This command controls a native data consistency scan role that is automatically recruited in the FDB cluster. The consistency scan reads all replicas of each shard to verify data consistency. It is useful for finding corrupt cold data by ensuring that all data is read periodically. Any errors found will be logged as TraceEvents with Severity = 40.
The syntax is
``consistencyscan [ off | on [maxRate <RATE>] [targetInterval <INTERVAL>] [restart <RESTART>] ]``
* ``off`` will disable the consistency scan
* ``on`` will enable the scan and can be accompanied by additional options shown above
* ``RATE`` - sets the maximum read speed of the scan in bytes/s.
* ``INTERVAL`` - sets the target completion time, in seconds, for each full pass over all data in the cluster. Scan speed will target this interval with a hard limit of RATE.
* ``RESTART`` - a 1 or 0 and controls whether the process should restart from the beginning of userspace on startup or not. This should normally be set to 0 which will resume progress from the last time the scan was running.
The consistency scan role publishes its configuration and metrics in Status JSON under the path ``.cluster.consistency_scan_info``.
consistencycheck
----------------
The ``consistencycheck`` command enables or disables consistency checking. Its syntax is ``consistencycheck [on|off]``. Calling it with ``on`` enables consistency checking, and ``off`` disables it. Calling it with no arguments displays whether consistency checking is currently enabled.
Note: This command exists for backward compatibility, it is suggested to use the ``consistencyscan`` command to control FDB's internal consistency scan role instead.
You must be running an ``fdbserver`` process with the ``consistencycheck`` role to perform consistency checking.
This command controls a key which controls behavior of any externally configured consistency check roles. You must be running an ``fdbserver`` process with the ``consistencycheck`` role to perform consistency checking.
The ``consistencycheck`` command enables or disables consistency checking. Its syntax is ``consistencycheck [on|off]``. Calling it with ``on`` enables consistency checking, and ``off`` disables it. Calling it with no arguments displays whether consistency checking is currently enabled.
coordinators
------------

View File

@ -608,7 +608,7 @@ int main(int argc, char** argv) {
setupNetwork(0, UseMetrics::True);
TraceEvent::setNetworkThread();
openTraceFile(NetworkAddress(), 10 << 20, 10 << 20, param.log_dir, "convert", param.trace_log_group);
openTraceFile({}, 10 << 20, 10 << 20, param.log_dir, "convert", param.trace_log_group);
auto f = stopAfter(convert(param));

View File

@ -641,7 +641,7 @@ int main(int argc, char** argv) {
param.updateKnobs();
TraceEvent::setNetworkThread();
openTraceFile(NetworkAddress(), 10 << 20, 500 << 20, param.log_dir, "decode", param.trace_log_group);
openTraceFile({}, 10 << 20, 500 << 20, param.log_dir, "decode", param.trace_log_group);
param.tlsConfig.setupBlobCredentials();
auto f = stopAfter(decode_logs(param));

View File

@ -3973,7 +3973,7 @@ int main(int argc, char* argv[]) {
// a cluster so they should use this instead.
auto initTraceFile = [&]() {
if (trace)
openTraceFile(NetworkAddress(), traceRollSize, traceMaxLogsSize, traceDir, "trace", traceLogGroup);
openTraceFile({}, traceRollSize, traceMaxLogsSize, traceDir, "trace", traceLogGroup);
};
auto initCluster = [&](bool quiet = false) {

View File

@ -21,7 +21,9 @@
#include "fdbcli/FlowLineNoise.h"
#include "flow/IThreadPool.h"
#ifndef BOOST_SYSTEM_NO_LIB
#define BOOST_SYSTEM_NO_LIB
#endif
#define BOOST_DATE_TIME_NO_LIB
#define BOOST_REGEX_NO_LIB
#include "boost/asio.hpp"

View File

@ -21,7 +21,9 @@
#include "flow/Platform.h"
#include <algorithm>
#ifndef BOOST_SYSTEM_NO_LIB
#define BOOST_SYSTEM_NO_LIB
#endif
#define BOOST_DATE_TIME_NO_LIB
#define BOOST_REGEX_NO_LIB
#include "boost/asio.hpp"

View File

@ -24,6 +24,7 @@
#include "fdbclient/BackupAgent.actor.h"
#include "fdbclient/BlobCipher.h"
#include "fdbclient/CommitTransaction.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/GetEncryptCipherKeys.actor.h"
#include "fdbclient/DatabaseContext.h"
#include "fdbclient/ManagementAPI.actor.h"
@ -32,7 +33,6 @@
#include "fdbclient/TenantManagement.actor.h"
#include "fdbrpc/simulator.h"
#include "flow/ActorCollection.h"
#include "flow/Trace.h"
#include "flow/actorcompiler.h" // has to be last include
FDB_DEFINE_BOOLEAN_PARAM(LockDB);
@ -252,6 +252,34 @@ Version getLogKeyVersion(Key key) {
return bigEndian64(*(int64_t*)(key.begin() + backupLogPrefixBytes + sizeof(UID) + sizeof(uint8_t)));
}
bool validTenantAccess(std::map<int64_t, TenantName>* tenantMap,
MutationRef m,
bool provisionalProxy,
Version version) {
if (isSystemKey(m.param1)) {
return true;
}
int64_t tenantId = TenantInfo::INVALID_TENANT;
if (m.isEncrypted()) {
tenantId = m.encryptionHeader()->cipherTextDetails.encryptDomainId;
} else {
tenantId = TenantAPI::extractTenantIdFromMutation(m);
}
ASSERT(tenantMap != nullptr);
if (m.isEncrypted() && isReservedEncryptDomain(tenantId)) {
// These are valid encrypt domains so don't check the tenant map
} else if (tenantMap->find(tenantId) == tenantMap->end()) {
// If a tenant is not found for a given mutation then exclude it from the batch
ASSERT(!provisionalProxy);
TraceEvent(SevWarnAlways, "MutationLogRestoreTenantNotFound")
.detail("Version", version)
.detail("TenantId", tenantId);
CODE_PROBE(true, "mutation log restore tenant not found");
return false;
}
return true;
}
// Given a key from one of the ranges returned by get_log_ranges,
// returns(version, part) where version is the database version number of
// the transaction log data in the value, and part is 0 for the first such
@ -320,29 +348,49 @@ ACTOR static Future<Void> decodeBackupLogValue(Arena* arena,
offset += len2;
state Optional<MutationRef> encryptedLogValue = Optional<MutationRef>();
// Check for valid tenant in required tenant mode. If the tenant does not exist in our tenant map then
// we EXCLUDE the mutation (of that respective tenant) during the restore. NOTE: This simply allows a
// restore to make progress in the event of tenant deletion, but tenant deletion should be considered
// carefully so that we do not run into this case. We do this check here so if encrypted mutations are not
// found in the tenant map then we exit early without needing to reach out to the EKP.
if (config.tenantMode == TenantMode::REQUIRED &&
config.encryptionAtRestMode.mode != EncryptionAtRestMode::CLUSTER_AWARE &&
!validTenantAccess(tenantMap, logValue, provisionalProxy, version)) {
consumed += BackupAgentBase::logHeaderSize + len1 + len2;
continue;
}
// Decrypt mutation ref if encrypted
if (logValue.isEncrypted()) {
encryptedLogValue = logValue;
state EncryptCipherDomainId domainId = logValue.encryptionHeader()->cipherTextDetails.encryptDomainId;
Reference<AsyncVar<ClientDBInfo> const> dbInfo = cx->clientInfo;
TextAndHeaderCipherKeys cipherKeys =
wait(getEncryptCipherKeys(dbInfo, *logValue.encryptionHeader(), BlobCipherMetrics::BACKUP));
logValue = logValue.decrypt(cipherKeys, tempArena, BlobCipherMetrics::BACKUP);
try {
TextAndHeaderCipherKeys cipherKeys =
wait(getEncryptCipherKeys(dbInfo, *logValue.encryptionHeader(), BlobCipherMetrics::RESTORE));
logValue = logValue.decrypt(cipherKeys, tempArena, BlobCipherMetrics::BACKUP);
} catch (Error& e) {
// It's possible a tenant was deleted and the encrypt key fetch failed
TraceEvent(SevWarnAlways, "MutationLogRestoreEncryptKeyFetchFailed")
.detail("Version", version)
.detail("TenantId", domainId);
if (e.code() == error_code_encrypt_keys_fetch_failed) {
CODE_PROBE(true, "mutation log restore encrypt keys not found");
consumed += BackupAgentBase::logHeaderSize + len1 + len2;
continue;
} else {
throw;
}
}
}
ASSERT(!logValue.isEncrypted());
if (config.tenantMode == TenantMode::REQUIRED && !isSystemKey(logValue.param1)) {
// If a tenant is not found for a given mutation then exclude it from the batch
int64_t tenantId = TenantAPI::extractTenantIdFromMutation(logValue);
ASSERT(tenantMap != nullptr);
if (tenantMap->find(tenantId) == tenantMap->end()) {
ASSERT(!provisionalProxy);
TraceEvent(SevWarnAlways, "MutationLogRestoreTenantNotFound")
.detail("Version", version)
.detail("TenantId", tenantId);
CODE_PROBE(true, "mutation log restore tenant not found");
consumed += BackupAgentBase::logHeaderSize + len1 + len2;
continue;
}
// If the mutation was encrypted using cluster aware encryption then check after decryption
if (config.tenantMode == TenantMode::REQUIRED &&
config.encryptionAtRestMode.mode == EncryptionAtRestMode::CLUSTER_AWARE &&
!validTenantAccess(tenantMap, logValue, provisionalProxy, version)) {
consumed += BackupAgentBase::logHeaderSize + len1 + len2;
continue;
}
MutationRef originalLogValue = logValue;

View File

@ -973,23 +973,6 @@ public:
continue;
restorable.snapshot = snapshots[i];
// TODO: Reenable the sanity check after TooManyFiles error is resolved
if (false && g_network->isSimulated()) {
// Sanity check key ranges
// TODO: If we want to re-enable this codepath, make sure that we are passing a valid DB object (instead
// of the DB object created on the line below)
ASSERT(false);
state Database cx;
state std::map<std::string, KeyRange>::iterator rit;
for (rit = restorable.keyRanges.begin(); rit != restorable.keyRanges.end(); rit++) {
auto it = std::find_if(restorable.ranges.begin(),
restorable.ranges.end(),
[file = rit->first](const RangeFile f) { return f.fileName == file; });
ASSERT(it != restorable.ranges.end());
KeyRange result = wait(bc->getSnapshotFileKeyRange(*it, cx));
ASSERT(rit->second.begin <= result.begin && rit->second.end >= result.end);
}
}
// No logs needed if there is a complete filtered key space snapshot at the target version.
if (minKeyRangeVersion == maxKeyRangeVersion && maxKeyRangeVersion == restorable.targetVersion) {

View File

@ -85,6 +85,7 @@ BlobCipherMetrics::BlobCipherMetrics()
CounterSet(cc, "KVRedwood"),
CounterSet(cc, "BlobGranule"),
CounterSet(cc, "Backup"),
CounterSet(cc, "Restore"),
CounterSet(cc, "Test") }) {
specialCounter(cc, "CacheSize", []() { return BlobCipherKeyCache::getInstance()->getSize(); });
traceFuture = cc.traceCounters("BlobCipherMetrics", UID(), FLOW_KNOBS->ENCRYPT_KEY_CACHE_LOGGING_INTERVAL);
@ -102,6 +103,8 @@ std::string toString(BlobCipherMetrics::UsageType type) {
return "BlobGranule";
case BlobCipherMetrics::UsageType::BACKUP:
return "Backup";
case BlobCipherMetrics::UsageType::RESTORE:
return "Restore";
case BlobCipherMetrics::UsageType::TEST:
return "Test";
default:

View File

@ -301,6 +301,7 @@ void ClientKnobs::initialize(Randomize randomize) {
init( CLIENT_ENABLE_USING_CLUSTER_ID_KEY, false );
init( ENABLE_ENCRYPTION_CPU_TIME_LOGGING, false );
init( SIMULATION_EKP_TENANT_IDS_TO_DROP, "-1" );
// clang-format on
}

View File

@ -21,6 +21,7 @@
#include "fdbclient/DatabaseConfiguration.h"
#include "fdbclient/TenantEntryCache.actor.h"
#include "fdbclient/TenantManagement.actor.h"
#include "fdbrpc/TenantInfo.h"
#include "fdbrpc/simulator.h"
#include "flow/FastRef.h"
#include "fmt/format.h"
@ -610,7 +611,7 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
int64_t dataLen,
Arena* arena) {
Reference<AsyncVar<ClientDBInfo> const> dbInfo = cx->clientInfo;
TextAndHeaderCipherKeys cipherKeys = wait(getEncryptCipherKeys(dbInfo, header, BlobCipherMetrics::BACKUP));
TextAndHeaderCipherKeys cipherKeys = wait(getEncryptCipherKeys(dbInfo, header, BlobCipherMetrics::RESTORE));
ASSERT(cipherKeys.cipherHeaderKey.isValid() && cipherKeys.cipherTextKey.isValid());
validateEncryptionHeader(cipherKeys.cipherHeaderKey, cipherKeys.cipherTextKey, header);
DecryptBlobCipherAes256Ctr decryptor(
@ -1131,6 +1132,7 @@ ACTOR Future<Standalone<VectorRef<KeyValueRef>>> decodeRangeFileBlock(Reference<
wait(tenantCache.get()->init());
}
state EncryptionAtRestMode encryptMode = config.encryptionAtRestMode;
state int64_t blockTenantId = TenantInfo::INVALID_TENANT;
try {
// Read header, currently only decoding BACKUP_AGENT_SNAPSHOT_FILE_VERSION or
@ -1142,7 +1144,7 @@ ACTOR Future<Standalone<VectorRef<KeyValueRef>>> decodeRangeFileBlock(Reference<
} else if (file_version == BACKUP_AGENT_ENCRYPTED_SNAPSHOT_FILE_VERSION) {
CODE_PROBE(true, "decoding encrypted block");
// decode options struct
uint32_t optionsLen = reader.consumeNetworkUInt32();
state uint32_t optionsLen = reader.consumeNetworkUInt32();
const uint8_t* o = reader.consume(optionsLen);
StringRef optionsStringRef = StringRef(o, optionsLen);
EncryptedRangeFileWriter::Options options =
@ -1150,9 +1152,17 @@ ACTOR Future<Standalone<VectorRef<KeyValueRef>>> decodeRangeFileBlock(Reference<
ASSERT(!options.compressionEnabled);
// read encryption header
const uint8_t* headerStart = reader.consume(BlobCipherEncryptHeader::headerSize);
state const uint8_t* headerStart = reader.consume(BlobCipherEncryptHeader::headerSize);
StringRef headerS = StringRef(headerStart, BlobCipherEncryptHeader::headerSize);
state BlobCipherEncryptHeader header = BlobCipherEncryptHeader::fromStringRef(headerS);
blockTenantId = header.cipherTextDetails.encryptDomainId;
if (config.tenantMode == TenantMode::REQUIRED && !isReservedEncryptDomain(blockTenantId)) {
ASSERT(tenantCache.present());
Optional<TenantEntryCachePayload<Void>> payload = wait(tenantCache.get()->getById(blockTenantId));
if (!payload.present()) {
throw tenant_not_found();
}
}
const uint8_t* dataPayloadStart = headerStart + BlobCipherEncryptHeader::headerSize;
// calculate the total bytes read up to (and including) the header
int64_t bytesRead = sizeof(int32_t) + sizeof(uint32_t) + optionsLen + BlobCipherEncryptHeader::headerSize;
@ -1167,6 +1177,13 @@ ACTOR Future<Standalone<VectorRef<KeyValueRef>>> decodeRangeFileBlock(Reference<
}
return results;
} catch (Error& e) {
if (e.code() == error_code_encrypt_keys_fetch_failed) {
TraceEvent(SevWarnAlways, "SnapshotRestoreEncryptKeyFetchFailed").detail("TenantId", blockTenantId);
CODE_PROBE(true, "Snapshot restore encrypt keys not found");
} else if (e.code() == error_code_tenant_not_found) {
TraceEvent(SevWarnAlways, "EncryptedSnapshotRestoreTenantNotFound").detail("TenantId", blockTenantId);
CODE_PROBE(true, "Encrypted Snapshot restore tenant not found");
}
TraceEvent(SevWarn, "FileRestoreDecodeRangeFileBlockFailed")
.error(e)
.detail("Filename", file->getFilename())
@ -3552,9 +3569,6 @@ struct RestoreRangeTaskFunc : RestoreFileTaskFuncBase {
}
state int64_t tenantId = TenantAPI::extractTenantIdFromKeyRef(key);
Optional<TenantEntryCachePayload<Void>> payload = wait(tenantCache->getById(tenantId));
if (!payload.present()) {
TraceEvent(SevError, "SnapshotRestoreInvalidTenantAccess").detail("Tenant", tenantId);
}
ASSERT(payload.present());
return Void();
}
@ -3607,8 +3621,17 @@ struct RestoreRangeTaskFunc : RestoreFileTaskFuncBase {
}
state Reference<IAsyncFile> inFile = wait(bc.get()->readFile(rangeFile.fileName));
state Standalone<VectorRef<KeyValueRef>> blockData =
wait(decodeRangeFileBlock(inFile, readOffset, readLen, cx));
state Standalone<VectorRef<KeyValueRef>> blockData;
try {
Standalone<VectorRef<KeyValueRef>> data = wait(decodeRangeFileBlock(inFile, readOffset, readLen, cx));
blockData = data;
} catch (Error& e) {
// It's possible a tenant was deleted and the encrypt key fetch failed
if (e.code() == error_code_encrypt_keys_fetch_failed || e.code() == error_code_tenant_not_found) {
return Void();
}
throw;
}
state Optional<Reference<TenantEntryCache<Void>>> tenantCache;
state std::vector<Future<Void>> validTenantCheckFutures;
state Arena arena;

View File

@ -46,6 +46,7 @@
#include "flow/Platform.h"
#include "flow/ProtocolVersion.h"
#include "flow/UnitTest.h"
#include "flow/Trace.h"
#ifdef __unixish__
#include <fcntl.h>
@ -2913,123 +2914,129 @@ void MultiVersionApi::setNetworkOptionInternal(FDBNetworkOptions::Option option,
}
void MultiVersionApi::setupNetwork() {
if (!externalClient) {
loadEnvironmentVariableNetworkOptions();
}
uint64_t transportId = 0;
{ // lock scope
MutexHolder holder(lock);
if (networkStartSetup) {
throw network_already_setup();
try {
if (!externalClient) {
loadEnvironmentVariableNetworkOptions();
}
if (threadCount > 1) {
disableLocalClient();
}
uint64_t transportId = 0;
{ // lock scope
MutexHolder holder(lock);
if (networkStartSetup) {
throw network_already_setup();
}
if (!apiVersion.hasFailOnExternalClientErrors()) {
ignoreExternalClientFailures = true;
}
if (threadCount > 1) {
disableLocalClient();
}
for (auto i : externalClientDescriptions) {
std::string path = i.second.libPath;
std::string filename = basename(path);
bool useFutureVersion = i.second.useFutureVersion;
networkStartSetup = true;
// Copy external lib for each thread
if (externalClients.count(filename) == 0) {
externalClients[filename] = {};
auto libCopies = copyExternalLibraryPerThread(path);
for (int idx = 0; idx < libCopies.size(); ++idx) {
bool unlinkOnLoad = libCopies[idx].second && !retainClientLibCopies;
externalClients[filename].push_back(Reference<ClientInfo>(
new ClientInfo(new DLApi(libCopies[idx].first, unlinkOnLoad /*unlink on load*/),
path,
useFutureVersion,
idx)));
if (externalClientDescriptions.empty() && localClientDisabled) {
TraceEvent(SevWarn, "CannotSetupNetwork")
.detail("Reason", "Local client is disabled and no external clients configured");
throw no_external_client_provided();
}
if (externalClientDescriptions.empty() && !disableBypass) {
bypassMultiClientApi = true; // SOMEDAY: we won't be able to set this option once it becomes possible to
// add clients after setupNetwork is called
}
if (!bypassMultiClientApi) {
transportId =
(uint64_t(uint32_t(platform::getRandomSeed())) << 32) ^ uint32_t(platform::getRandomSeed());
if (transportId <= 1)
transportId += 2;
localClient->api->setNetworkOption(FDBNetworkOptions::EXTERNAL_CLIENT_TRANSPORT_ID,
std::to_string(transportId));
}
localClient->api->setupNetwork();
if (!apiVersion.hasFailOnExternalClientErrors()) {
ignoreExternalClientFailures = true;
}
for (auto i : externalClientDescriptions) {
std::string path = i.second.libPath;
std::string filename = basename(path);
bool useFutureVersion = i.second.useFutureVersion;
// Copy external lib for each thread
if (externalClients.count(filename) == 0) {
externalClients[filename] = {};
auto libCopies = copyExternalLibraryPerThread(path);
for (int idx = 0; idx < libCopies.size(); ++idx) {
bool unlinkOnLoad = libCopies[idx].second && !retainClientLibCopies;
externalClients[filename].push_back(Reference<ClientInfo>(
new ClientInfo(new DLApi(libCopies[idx].first, unlinkOnLoad /*unlink on load*/),
path,
useFutureVersion,
idx)));
}
}
}
}
if (externalClients.empty() && localClientDisabled) {
TraceEvent(SevWarn, "CannotSetupNetwork")
.detail("Reason", "Local client is disabled and no external clients configured");
localClient->loadVersion();
throw no_external_client_provided();
if (bypassMultiClientApi) {
networkSetup = true;
} else {
runOnExternalClientsAllThreads(
[this](Reference<ClientInfo> client) {
TraceEvent("InitializingExternalClient").detail("LibraryPath", client->libPath);
client->api->selectApiVersion(apiVersion.version());
if (client->useFutureVersion) {
client->api->useFutureProtocolVersion();
}
client->loadVersion();
},
false,
!ignoreExternalClientFailures);
std::string baseTraceFileId;
if (apiVersion.hasTraceFileIdentifier()) {
// TRACE_FILE_IDENTIFIER option is supported since 6.3
baseTraceFileId = traceFileIdentifier.empty() ? format("%d", getpid()) : traceFileIdentifier;
}
MutexHolder holder(lock);
runOnExternalClientsAllThreads(
[this, transportId, baseTraceFileId](Reference<ClientInfo> client) {
for (auto option : options) {
client->api->setNetworkOption(option.first, option.second.castTo<StringRef>());
}
client->api->setNetworkOption(FDBNetworkOptions::EXTERNAL_CLIENT_TRANSPORT_ID,
std::to_string(transportId));
if (!baseTraceFileId.empty()) {
client->api->setNetworkOption(FDBNetworkOptions::TRACE_FILE_IDENTIFIER,
traceShareBaseNameAmongThreads
? baseTraceFileId
: client->getTraceFileIdentifier(baseTraceFileId));
}
client->api->setupNetwork();
},
false,
!ignoreExternalClientFailures);
if (localClientDisabled && !hasNonFailedExternalClients()) {
TraceEvent(SevWarn, "CannotSetupNetwork")
.detail("Reason", "Local client is disabled and all external clients failed");
throw all_external_clients_failed();
}
networkSetup = true; // Needs to be guarded by mutex
}
networkStartSetup = true;
if (externalClients.empty() && !disableBypass) {
bypassMultiClientApi = true; // SOMEDAY: we won't be able to set this option once it becomes possible to
// add clients after setupNetwork is called
}
if (!bypassMultiClientApi) {
transportId = (uint64_t(uint32_t(platform::getRandomSeed())) << 32) ^ uint32_t(platform::getRandomSeed());
if (transportId <= 1)
transportId += 2;
localClient->api->setNetworkOption(FDBNetworkOptions::EXTERNAL_CLIENT_TRANSPORT_ID,
std::to_string(transportId));
}
localClient->api->setupNetwork();
options.clear();
updateSupportedVersions();
} catch (Error& e) {
// Make sure all error and warning events are traced
flushTraceFileVoid();
throw e;
}
localClient->loadVersion();
if (bypassMultiClientApi) {
networkSetup = true;
} else {
runOnExternalClientsAllThreads(
[this](Reference<ClientInfo> client) {
TraceEvent("InitializingExternalClient").detail("LibraryPath", client->libPath);
client->api->selectApiVersion(apiVersion.version());
if (client->useFutureVersion) {
client->api->useFutureProtocolVersion();
}
client->loadVersion();
},
false,
!ignoreExternalClientFailures);
std::string baseTraceFileId;
if (apiVersion.hasTraceFileIdentifier()) {
// TRACE_FILE_IDENTIFIER option is supported since 6.3
baseTraceFileId = traceFileIdentifier.empty() ? format("%d", getpid()) : traceFileIdentifier;
}
MutexHolder holder(lock);
runOnExternalClientsAllThreads(
[this, transportId, baseTraceFileId](Reference<ClientInfo> client) {
for (auto option : options) {
client->api->setNetworkOption(option.first, option.second.castTo<StringRef>());
}
client->api->setNetworkOption(FDBNetworkOptions::EXTERNAL_CLIENT_TRANSPORT_ID,
std::to_string(transportId));
if (!baseTraceFileId.empty()) {
client->api->setNetworkOption(FDBNetworkOptions::TRACE_FILE_IDENTIFIER,
traceShareBaseNameAmongThreads
? baseTraceFileId
: client->getTraceFileIdentifier(baseTraceFileId));
}
client->api->setupNetwork();
},
false,
!ignoreExternalClientFailures);
if (localClientDisabled && !hasNonFailedExternalClients()) {
TraceEvent(SevWarn, "CannotSetupNetwork")
.detail("Reason", "Local client is disabled and all external clients failed");
throw all_external_clients_failed();
}
networkSetup = true; // Needs to be guarded by mutex
}
options.clear();
updateSupportedVersions();
}
THREAD_FUNC_RETURN runNetworkThread(void* param) {

View File

@ -161,7 +161,7 @@ TLSConfig tlsConfig(TLSEndpointType::CLIENT);
// The default values, TRACE_DEFAULT_ROLL_SIZE and TRACE_DEFAULT_MAX_LOGS_SIZE are located in Trace.h.
NetworkOptions::NetworkOptions()
: traceRollSize(TRACE_DEFAULT_ROLL_SIZE), traceMaxLogsSize(TRACE_DEFAULT_MAX_LOGS_SIZE), traceLogGroup("default"),
traceFormat("xml"), traceClockSource("now"),
traceFormat("xml"), traceClockSource("now"), traceInitializeOnSetup(false),
supportedVersions(new ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>()), runLoopProfilingEnabled(false),
primaryClient(true) {}
@ -2217,6 +2217,99 @@ void DatabaseContext::expireThrottles() {
extern IPAddress determinePublicIPAutomatically(ClusterConnectionString& ccs);
// Initialize tracing for FDB client
//
// connRecord is necessary for determining the local IP, which is then included in the trace
// file name, and also used to annotate all trace events.
//
// If trace_initialize_on_setup is not set, tracing is initialized when opening a database.
// In that case we can immediatelly determine the IP. Thus, we can use the IP in the
// trace file name and annotate all events with it.
//
// If trace_initialize_on_setup network option is set, tracing is at first initialized without
// connRecord and thus without the local IP. In that case we cannot use the local IP in the
// trace file names. The IP is then provided by a repeated call to initializeClientTracing
// when opening a database. All tracing events from this point are annotated with the local IP
//
// If tracing initialization is completed, further calls to initializeClientTracing are ignored
void initializeClientTracing(Reference<IClusterConnectionRecord> connRecord, Optional<int> apiVersion) {
if (!networkOptions.traceDirectory.present()) {
return;
}
bool initialized = traceFileIsOpen();
if (initialized && (isTraceLocalAddressSet() || !connRecord)) {
// Tracing initialization is completed
return;
}
// Network must be created before initializing tracing
ASSERT(g_network);
Optional<NetworkAddress> localAddress;
if (connRecord) {
auto publicIP = determinePublicIPAutomatically(connRecord->getConnectionString());
localAddress = NetworkAddress(publicIP, ::getpid());
}
platform::ImageInfo imageInfo = platform::getImageInfo();
if (initialized) {
// Tracing already initialized, just need to update the IP address
setTraceLocalAddress(localAddress.get());
TraceEvent("ClientStart")
.detail("SourceVersion", getSourceVersion())
.detail("Version", FDB_VT_VERSION)
.detail("PackageName", FDB_VT_PACKAGE_NAME)
.detailf("ActualTime", "%lld", DEBUG_DETERMINISM ? 0 : time(nullptr))
.detail("ApiVersion", apiVersion)
.detail("ClientLibrary", imageInfo.fileName)
.detailf("ImageOffset", "%p", imageInfo.offset)
.detail("Primary", networkOptions.primaryClient)
.trackLatest("ClientStart");
} else {
// Initialize tracing
selectTraceFormatter(networkOptions.traceFormat);
selectTraceClockSource(networkOptions.traceClockSource);
addUniversalTraceField("ClientDescription",
format("%s-%s-%" PRIu64,
networkOptions.primaryClient ? "primary" : "external",
FDB_VT_VERSION,
deterministicRandom()->randomUInt64()));
std::string identifier = networkOptions.traceFileIdentifier;
openTraceFile(localAddress,
networkOptions.traceRollSize,
networkOptions.traceMaxLogsSize,
networkOptions.traceDirectory.get(),
"trace",
networkOptions.traceLogGroup,
identifier,
networkOptions.tracePartialFileSuffix);
TraceEvent("ClientStart")
.detail("SourceVersion", getSourceVersion())
.detail("Version", FDB_VT_VERSION)
.detail("PackageName", FDB_VT_PACKAGE_NAME)
.detailf("ActualTime", "%lld", DEBUG_DETERMINISM ? 0 : time(nullptr))
.detail("ApiVersion", apiVersion)
.detail("ClientLibrary", imageInfo.fileName)
.detailf("ImageOffset", "%p", imageInfo.offset)
.detail("Primary", networkOptions.primaryClient)
.trackLatest("ClientStart");
g_network->initMetrics();
FlowTransport::transport().initMetrics();
initTraceEventMetrics();
}
// Initialize system monitoring once the local IP is available
if (localAddress.present()) {
initializeSystemMonitorMachineState(SystemMonitorMachineState(IPAddress(localAddress.get().ip)));
systemMonitor();
uncancellable(recurring(&systemMonitor, CLIENT_KNOBS->SYSTEM_MONITOR_INTERVAL, TaskPriority::FlushTrace));
}
}
// Creates a database object that represents a connection to a cluster
// This constructor uses a preallocated DatabaseContext that may have been created
// on another thread
@ -2230,49 +2323,7 @@ Database Database::createDatabase(Reference<IClusterConnectionRecord> connRecord
ASSERT(TraceEvent::isNetworkThread());
platform::ImageInfo imageInfo = platform::getImageInfo();
if (connRecord) {
if (networkOptions.traceDirectory.present() && !traceFileIsOpen()) {
g_network->initMetrics();
FlowTransport::transport().initMetrics();
initTraceEventMetrics();
auto publicIP = determinePublicIPAutomatically(connRecord->getConnectionString());
selectTraceFormatter(networkOptions.traceFormat);
selectTraceClockSource(networkOptions.traceClockSource);
addUniversalTraceField("ClientDescription",
format("%s-%s-%" PRIu64,
networkOptions.primaryClient ? "primary" : "external",
FDB_VT_VERSION,
getTraceThreadId()));
openTraceFile(NetworkAddress(publicIP, ::getpid()),
networkOptions.traceRollSize,
networkOptions.traceMaxLogsSize,
networkOptions.traceDirectory.get(),
"trace",
networkOptions.traceLogGroup,
networkOptions.traceFileIdentifier,
networkOptions.tracePartialFileSuffix);
TraceEvent("ClientStart")
.detail("SourceVersion", getSourceVersion())
.detail("Version", FDB_VT_VERSION)
.detail("PackageName", FDB_VT_PACKAGE_NAME)
.detailf("ActualTime", "%lld", DEBUG_DETERMINISM ? 0 : time(nullptr))
.detail("ApiVersion", apiVersion)
.detail("ClientLibrary", imageInfo.fileName)
.detailf("ImageOffset", "%p", imageInfo.offset)
.detail("Primary", networkOptions.primaryClient)
.trackLatest("ClientStart");
initializeSystemMonitorMachineState(SystemMonitorMachineState(IPAddress(publicIP)));
systemMonitor();
uncancellable(recurring(&systemMonitor, CLIENT_KNOBS->SYSTEM_MONITOR_INTERVAL, TaskPriority::FlushTrace));
}
}
initializeClientTracing(connRecord, apiVersion);
g_network->initTLS();
@ -2324,7 +2375,7 @@ Database Database::createDatabase(Reference<IClusterConnectionRecord> connRecord
.detail("Version", FDB_VT_VERSION)
.detail("ClusterFile", connRecord ? connRecord->toString() : "None")
.detail("ConnectionString", connRecord ? connRecord->getConnectionString().toString() : "None")
.detail("ClientLibrary", imageInfo.fileName)
.detail("ClientLibrary", platform::getImageInfo().fileName)
.detail("Primary", networkOptions.primaryClient)
.detail("Internal", internal)
.trackLatest(database->connectToDatabaseEventCacheHolder.trackingKey);
@ -2408,6 +2459,9 @@ void setNetworkOption(FDBNetworkOptions::Option option, Optional<StringRef> valu
validateOptionValuePresent(value);
networkOptions.tracePartialFileSuffix = value.get().toString();
break;
case FDBNetworkOptions::TRACE_INITIALIZE_ON_SETUP:
networkOptions.traceInitializeOnSetup = true;
break;
case FDBNetworkOptions::KNOB: {
validateOptionValuePresent(value);
@ -2608,6 +2662,10 @@ void setupNetwork(uint64_t transportId, UseMetrics useMetrics) {
FlowTransport::createInstance(true, transportId, WLTOKEN_RESERVED_COUNT);
Net2FileSystem::newFileSystem();
if (networkOptions.traceInitializeOnSetup) {
::initializeClientTracing({}, {});
}
uncancellable(monitorNetworkBusyness());
}
@ -8603,24 +8661,36 @@ ACTOR Future<Optional<Standalone<VectorRef<KeyRef>>>> splitStorageMetricsWithLoc
try {
state int i = 0;
for (; i < locations.size(); i++) {
SplitMetricsRequest req(
locations[i].range, limit, used, estimated, i == locations.size() - 1, minSplitBytes);
SplitMetricsReply res = wait(loadBalance(locations[i].locations->locations(),
&StorageServerInterface::splitMetrics,
req,
TaskPriority::DataDistribution));
if (res.splits.size() && res.splits[0] <= results.back()) { // split points are out of order, possibly
// because of moving data, throw error to retry
ASSERT_WE_THINK(false); // FIXME: This seems impossible and doesn't seem to be covered by testing
throw all_alternatives_failed();
}
if (res.splits.size()) {
results.append(results.arena(), res.splits.begin(), res.splits.size());
results.arena().dependsOn(res.splits.arena());
}
used = res.used;
state Key beginKey = locations[i].range.begin;
loop {
KeyRangeRef range(beginKey, locations[i].range.end);
SplitMetricsRequest req(range, limit, used, estimated, i == locations.size() - 1, minSplitBytes);
SplitMetricsReply res = wait(loadBalance(locations[i].locations->locations(),
&StorageServerInterface::splitMetrics,
req,
TaskPriority::DataDistribution));
if (res.splits.size() &&
res.splits[0] <= results.back()) { // split points are out of order, possibly
// because of moving data, throw error to retry
ASSERT_WE_THINK(false); // FIXME: This seems impossible and doesn't seem to be covered by testing
throw all_alternatives_failed();
}
//TraceEvent("SplitStorageMetricsResult").detail("Used", used.bytes).detail("Location", i).detail("Size", res.splits.size());
if (res.splits.size()) {
results.append(results.arena(), res.splits.begin(), res.splits.size());
results.arena().dependsOn(res.splits.arena());
}
used = res.used;
if (res.more && res.splits.size()) {
// Next request will return split points after this one
beginKey = KeyRef(beginKey.arena(), res.splits.back());
} else {
break;
}
//TraceEvent("SplitStorageMetricsResult").detail("Used", used.bytes).detail("Location", i).detail("Size", res.splits.size());
}
}
if (used.allLessOrEqual(limit * CLIENT_KNOBS->STORAGE_METRICS_UNFAIR_SPLIT_LIMIT) && results.size() > 1) {

View File

@ -70,7 +70,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( MAX_MESSAGE_SIZE, std::max<int>(LOG_SYSTEM_PUSHED_DATA_BLOCK_SIZE, 1e5 + 2e4 + 1) + 8 ); // VALUE_SIZE_LIMIT + SYSTEM_KEY_SIZE_LIMIT + 9 bytes (4 bytes for length, 4 bytes for sequence number, and 1 byte for mutation type)
init( TLOG_MESSAGE_BLOCK_BYTES, 10e6 );
init( TLOG_MESSAGE_BLOCK_OVERHEAD_FACTOR, double(TLOG_MESSAGE_BLOCK_BYTES) / (TLOG_MESSAGE_BLOCK_BYTES - MAX_MESSAGE_SIZE) ); //1.0121466709838096006362758832473
init( PEEK_TRACKER_EXPIRATION_TIME, 600 ); if( randomize && BUGGIFY ) PEEK_TRACKER_EXPIRATION_TIME = deterministicRandom()->coinflip() ? 0.1 : 120;
init( PEEK_TRACKER_EXPIRATION_TIME, 600 ); if( randomize && BUGGIFY ) PEEK_TRACKER_EXPIRATION_TIME = 120; // Cannot be buggified lower without changing the following assert in LogSystemPeekCursor.actor.cpp: ASSERT_WE_THINK(e.code() == error_code_operation_obsolete || SERVER_KNOBS->PEEK_TRACKER_EXPIRATION_TIME < 10);
init( PEEK_USING_STREAMING, false ); if( randomize && isSimulated && BUGGIFY ) PEEK_USING_STREAMING = true;
init( PARALLEL_GET_MORE_REQUESTS, 32 ); if( randomize && BUGGIFY ) PARALLEL_GET_MORE_REQUESTS = 2;
init( MULTI_CURSOR_PRE_FETCH_LIMIT, 10 );
@ -850,6 +850,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
// This exists for flexibility but assigning each ReadType to its own unique priority number makes the most sense
// The enumeration is currently: eager, fetch, low, normal, high
init( STORAGESERVER_READTYPE_PRIORITY_MAP, "0,1,2,3,4" );
init( SPLIT_METRICS_MAX_ROWS, 10000 );
//Wait Failure
init( MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS, 250 ); if( randomize && BUGGIFY ) MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS = 2;
@ -974,7 +975,6 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( REDWOOD_EVICT_UPDATED_PAGES, true ); if( randomize && BUGGIFY ) { REDWOOD_EVICT_UPDATED_PAGES = false; }
init( REDWOOD_DECODECACHE_REUSE_MIN_HEIGHT, 2 ); if( randomize && BUGGIFY ) { REDWOOD_DECODECACHE_REUSE_MIN_HEIGHT = deterministicRandom()->randomInt(1, 7); }
init( REDWOOD_IO_PRIORITIES, "32,32,32,32" );
init( REDWOOD_SPLIT_ENCRYPTED_PAGES_BY_TENANT, false );
// Server request latency measurement
init( LATENCY_SKETCH_ACCURACY, 0.01 );

View File

@ -70,6 +70,7 @@ public:
KV_REDWOOD,
BLOB_GRANULE,
BACKUP,
RESTORE,
TEST,
MAX,
};

View File

@ -297,6 +297,10 @@ public:
// Encryption-at-rest
bool ENABLE_ENCRYPTION_CPU_TIME_LOGGING;
// This Knob will be a comma-delimited string (i.e 0,1,2,3) that specifies which tenants the the EKP should throw
// key_not_found errors for. If TenantInfo::INVALID_TENANT is contained within the list then no tenants will be
// dropped. This Knob should ONLY be used in simulation for testing purposes
std::string SIMULATION_EKP_TENANT_IDS_TO_DROP;
ClientKnobs(Randomize randomize);
void initialize(Randomize randomize);

View File

@ -512,10 +512,11 @@ struct GetStorageServerRejoinInfoReply {
Optional<Tag> newTag;
bool newLocality;
std::vector<std::pair<Version, Tag>> history;
EncryptionAtRestMode encryptMode;
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, version, tag, newTag, newLocality, history);
serializer(ar, version, tag, newTag, newLocality, history, encryptMode);
}
};

View File

@ -1520,6 +1520,8 @@ struct EncryptionAtRestMode {
bool operator==(const EncryptionAtRestMode& e) const { return isEquals(e); }
bool operator!=(const EncryptionAtRestMode& e) const { return !isEquals(e); }
bool operator==(Mode m) const { return mode == m; }
bool operator!=(Mode m) const { return mode != m; }
bool isEncryptionEnabled() const { return mode != EncryptionAtRestMode::DISABLED; }
@ -1548,6 +1550,11 @@ struct EncryptionAtRestMode {
uint32_t mode;
};
template <>
struct Traceable<EncryptionAtRestMode> : std::true_type {
static std::string toString(const EncryptionAtRestMode& mode) { return mode.toString(); }
};
typedef StringRef ClusterNameRef;
typedef Standalone<ClusterNameRef> ClusterName;

View File

@ -19,6 +19,7 @@
*/
#pragma once
#include "flow/EncryptUtils.h"
#include "flow/genericactors.actor.h"
#if defined(NO_INTELLISENSE) && !defined(FDBCLIENT_GETCIPHERKEYS_ACTOR_G_H)
#define FDBCLIENT_GETCIPHERKEYS_ACTOR_G_H
#include "fdbclient/GetEncryptCipherKeys.actor.g.h"
@ -27,7 +28,9 @@
#include "fdbclient/BlobCipher.h"
#include "fdbclient/EncryptKeyProxyInterface.h"
#include "fdbclient/Knobs.h"
#include "fdbrpc/Stats.h"
#include "fdbrpc/TenantInfo.h"
#include "flow/Knobs.h"
#include "flow/IRandom.h"
@ -182,6 +185,18 @@ Future<EKPGetBaseCipherKeysByIdsReply> getUncachedEncryptCipherKeys(Reference<As
TraceEvent(SevWarn, "GetEncryptCipherKeys_RequestFailed").error(reply.error.get());
throw encrypt_keys_fetch_failed();
}
if (g_network && g_network->isSimulated() && usageType == BlobCipherMetrics::RESTORE) {
std::unordered_set<int64_t> tenantIdsToDrop =
parseStringToUnorderedSet<int64_t>(CLIENT_KNOBS->SIMULATION_EKP_TENANT_IDS_TO_DROP, ',');
if (!tenantIdsToDrop.count(TenantInfo::INVALID_TENANT)) {
for (auto& baseCipherInfo : request.baseCipherInfos) {
if (tenantIdsToDrop.count(baseCipherInfo.domainId)) {
TraceEvent("GetEncryptCipherKeys_SimulatedError").detail("DomainId", baseCipherInfo.domainId);
throw encrypt_keys_fetch_failed();
}
}
}
}
return reply;
} catch (Error& e) {
TraceEvent("GetEncryptCipherKeys_CaughtError").error(e);

View File

@ -71,6 +71,7 @@ struct NetworkOptions {
std::string traceClockSource;
std::string traceFileIdentifier;
std::string tracePartialFileSuffix;
bool traceInitializeOnSetup;
Optional<bool> logClientInfo;
Reference<ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>> supportedVersions;
bool runLoopProfilingEnabled;

View File

@ -791,6 +791,7 @@ public:
std::string STORAGESERVER_READ_PRIORITIES;
int STORAGE_SERVER_READ_CONCURRENCY;
std::string STORAGESERVER_READTYPE_PRIORITY_MAP;
int SPLIT_METRICS_MAX_ROWS;
// Wait Failure
int MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS;
@ -937,7 +938,6 @@ public:
double REDWOOD_HISTOGRAM_INTERVAL;
bool REDWOOD_EVICT_UPDATED_PAGES; // Whether to prioritize eviction of updated pages from cache.
int REDWOOD_DECODECACHE_REUSE_MIN_HEIGHT; // Minimum height for which to keep and reuse page decode caches
bool REDWOOD_SPLIT_ENCRYPTED_PAGES_BY_TENANT; // Whether to split pages by tenant if encryption is enabled
std::string REDWOOD_IO_PRIORITIES;

View File

@ -740,10 +740,11 @@ struct SplitMetricsReply {
constexpr static FileIdentifier file_identifier = 11530792;
Standalone<VectorRef<KeyRef>> splits;
StorageMetrics used;
bool more = false;
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, splits, used);
serializer(ar, splits, used, more);
}
};

View File

@ -220,7 +220,7 @@ private:
if (!cache->lastTenantId.present()) {
return false;
}
return cache->lastTenantId.get() > 0;
return cache->lastTenantId.get() >= 0;
}
return true;
}

View File

@ -59,6 +59,8 @@ description is not currently required but encouraged.
description="Once provided, this string will be used to replace the port/PID in the log file names." />
<Option name="trace_share_among_client_threads" code="37"
description="Use the same base trace file name for all client threads as it did before version 7.2. The current default behavior is to use distinct trace file names for client threads by including their version and thread index." />
<Option name="trace_initialize_on_setup" code="38"
description="Initialize trace files on network setup, determine the local IP later. Otherwise tracing is initialized when opening the first database." />
<Option name="trace_partial_file_suffix" code="39"
paramType="String" paramDescription="Append this suffix to partially written log files. When a log file is complete, it is renamed to remove the suffix. No separator is added between the file and the suffix. If you want to add a file extension, you should include the separator - e.g. '.tmp' instead of 'tmp' to add the 'tmp' extension."
description="Set file suffix for partially written log files." />

View File

@ -22,7 +22,9 @@
// Define boost::asio::io_service
#include <algorithm>
#ifndef BOOST_SYSTEM_NO_LIB
#define BOOST_SYSTEM_NO_LIB
#endif
#define BOOST_DATE_TIME_NO_LIB
#define BOOST_REGEX_NO_LIB
#include <boost/asio.hpp>

View File

@ -18,7 +18,9 @@
* limitations under the License.
*/
#ifndef BOOST_SYSTEM_NO_LIB
#define BOOST_SYSTEM_NO_LIB
#endif
#define BOOST_DATE_TIME_NO_LIB
#define BOOST_REGEX_NO_LIB
#include <boost/asio.hpp>

View File

@ -26,7 +26,9 @@
#include "fmt/format.h"
#include "fdbrpc/simulator.h"
#include "flow/Arena.h"
#ifndef BOOST_SYSTEM_NO_LIB
#define BOOST_SYSTEM_NO_LIB
#endif
#define BOOST_DATE_TIME_NO_LIB
#define BOOST_REGEX_NO_LIB
#include "fdbrpc/SimExternalConnection.h"
@ -1676,7 +1678,7 @@ public:
}
// Reboot if dead machines do fulfill policies
if (tooManyDead) {
if (tooManyDead || (usableRegions > 1 && notEnoughLeft)) {
newKt = KillType::Reboot;
canSurvive = false;
TraceEvent("KillChanged")

View File

@ -272,11 +272,7 @@ int runHost(TLSCreds creds, int addrPipe, int completionPipe, Result expect) {
tlsConfig.setKeyBytes(creds.keyBytes);
}
g_network = newNet2(tlsConfig);
openTraceFile(NetworkAddress(),
10 << 20,
10 << 20,
".",
IsServer ? "authz_tls_unittest_server" : "authz_tls_unittest_client");
openTraceFile({}, 10 << 20, 10 << 20, ".", IsServer ? "authz_tls_unittest_server" : "authz_tls_unittest_client");
FlowTransport::createInstance(!IsServer, 1, WLTOKEN_RESERVED_COUNT);
auto& transport = FlowTransport::transport();
if constexpr (IsServer) {

View File

@ -300,7 +300,6 @@ ACTOR Future<Void> validateGranuleSummaries(Database cx,
// same invariant isn't always true for delta version because of force flushing around granule
// merges
if (it.keyRange == itLast.range()) {
ASSERT(it.deltaVersion >= last.deltaVersion);
if (it.snapshotVersion == last.snapshotVersion) {
ASSERT(it.snapshotSize == last.snapshotSize);
}
@ -308,7 +307,11 @@ ACTOR Future<Void> validateGranuleSummaries(Database cx,
ASSERT(it.snapshotSize == last.snapshotSize);
ASSERT(it.deltaSize == last.deltaSize);
} else if (it.snapshotVersion == last.snapshotVersion) {
ASSERT(it.deltaSize > last.deltaSize);
// empty delta files can cause version to decrease or size to remain same with a version
// increase
if (it.deltaVersion >= last.deltaVersion) {
ASSERT(it.deltaSize >= last.deltaSize);
} // else can happen because of empty delta file version bump
}
break;
}

View File

@ -140,6 +140,9 @@ private:
for (auto& r : getSystemBackupRanges()) {
ranges.push_back(r);
}
// last updated version for table metadata
ranges.push_back(KeyRangeRef(metadataVersionKey, metadataVersionKeyEnd));
for (auto range : ranges) {
state GetRangeLimits limits(SERVER_KNOBS->BLOB_MANIFEST_RW_ROWS);
limits.minRows = 0;

View File

@ -914,8 +914,7 @@ ACTOR Future<BlobFileIndex> writeDeltaFile(Reference<BlobWorkerData> bwData,
}
if (BUGGIFY && bwData->maybeInjectTargetedRestart()) {
wait(delay(0)); // should be cancelled
ASSERT(false);
wait(Never());
}
if (BUGGIFY_WITH_PROB(0.01)) {
@ -953,6 +952,84 @@ ACTOR Future<BlobFileIndex> writeDeltaFile(Reference<BlobWorkerData> bwData,
}
}
ACTOR Future<BlobFileIndex> writeEmptyDeltaFile(Reference<BlobWorkerData> bwData,
KeyRange keyRange,
UID granuleID,
int64_t epoch,
int64_t seqno,
Version previousVersion,
Version currentDeltaVersion,
Future<BlobFileIndex> previousDeltaFileFuture,
Future<Void> waitCommitted,
Optional<std::pair<KeyRange, UID>> oldGranuleComplete) {
ASSERT(previousVersion < currentDeltaVersion);
wait(delay(0, TaskPriority::BlobWorkerUpdateStorage));
// before updating FDB, wait for the delta file version to be committed and previous delta files to finish
wait(waitCommitted);
BlobFileIndex prev = wait(previousDeltaFileFuture);
wait(delay(0, TaskPriority::BlobWorkerUpdateFDB));
// update FDB with new file
state Key oldDFKey = blobGranuleFileKeyFor(granuleID, previousVersion, 'D');
state Key newDFKey = blobGranuleFileKeyFor(granuleID, currentDeltaVersion, 'D');
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(bwData->db);
state Optional<Value> dfValue;
loop {
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
try {
wait(readAndCheckGranuleLock(tr, keyRange, epoch, seqno));
// FIXME: could construct this value from the prev BlobFileIndex, but checking that the key exists in the DB
// is a good sanity check anyway
if (!dfValue.present()) {
// Only check if not seen yet. If we get commit unknown result and then retry, we'd see our own delete
wait(store(dfValue, tr->get(oldDFKey)));
ASSERT(dfValue.present());
} else {
tr->addReadConflictRange(singleKeyRange(oldDFKey));
}
tr->clear(oldDFKey);
tr->set(newDFKey, dfValue.get());
if (oldGranuleComplete.present()) {
wait(updateGranuleSplitState(&tr->getTransaction(),
oldGranuleComplete.get().first,
oldGranuleComplete.get().second,
granuleID,
BlobGranuleSplitState::Done));
}
wait(tr->commit());
if (BW_DEBUG) {
fmt::print(
"Granule {0} [{1} - {2}) empty delta file bumped version last delta file from {3} -> {4}, cv={5}\n",
granuleID.toString(),
keyRange.begin.printable(),
keyRange.end.printable(),
previousVersion,
currentDeltaVersion,
tr->getCommittedVersion());
}
if (BUGGIFY && bwData->maybeInjectTargetedRestart()) {
wait(Never());
}
if (BUGGIFY_WITH_PROB(0.01)) {
wait(delay(deterministicRandom()->random01()));
}
return BlobFileIndex(currentDeltaVersion, "", 0, 0, 0, {});
} catch (Error& e) {
wait(tr->onError(e));
}
}
}
ACTOR Future<Void> reevaluateInitialSplit(Reference<BlobWorkerData> bwData,
UID granuleID,
KeyRange keyRange,
@ -1158,8 +1235,7 @@ ACTOR Future<BlobFileIndex> writeSnapshot(Reference<BlobWorkerData> bwData,
}
if (BUGGIFY && bwData->maybeInjectTargetedRestart()) {
wait(delay(0)); // should be cancelled
ASSERT(false);
wait(Never());
}
// FIXME: change when we implement multiplexing
@ -1213,8 +1289,7 @@ ACTOR Future<BlobFileIndex> dumpInitialSnapshotFromFDB(Reference<BlobWorkerData>
DEBUG_KEY_RANGE("BlobWorkerFDBSnapshot", readVersion, metadata->keyRange, bwData->id);
if (BUGGIFY && bwData->maybeInjectTargetedRestart()) {
wait(delay(0)); // should be cancelled
ASSERT(false);
wait(Never());
}
// initial snapshot is committed in fdb, we can pop the change feed up to this version
@ -1633,8 +1708,7 @@ ACTOR Future<Void> reevaluateInitialSplit(Reference<BlobWorkerData> bwData,
reply.proposedSplitKey = proposedSplitKey;
bwData->currentManagerStatusStream.get().send(reply);
if (BUGGIFY && bwData->maybeInjectTargetedRestart()) {
wait(delay(0)); // should be cancelled
ASSERT(false);
wait(Never());
}
// if a new manager appears, also tell it about this granule being splittable, or retry after a certain
// amount of time of not hearing back
@ -1755,8 +1829,16 @@ void handleCompletedDeltaFile(Reference<BlobWorkerData> bwData,
Key cfKey,
Version cfStartVersion,
std::deque<std::pair<Version, Version>>* rollbacksCompleted,
std::deque<Future<Void>>& inFlightPops) {
metadata->files.deltaFiles.push_back(completedDeltaFile);
std::deque<Future<Void>>& inFlightPops,
bool emptyDeltaFile) {
if (emptyDeltaFile) {
ASSERT(!metadata->files.deltaFiles.empty());
ASSERT(completedDeltaFile.length == 0);
ASSERT(metadata->files.deltaFiles.back().version < completedDeltaFile.version);
metadata->files.deltaFiles.back().version = completedDeltaFile.version;
} else {
metadata->files.deltaFiles.push_back(completedDeltaFile);
}
ASSERT(metadata->durableDeltaVersion.get() < completedDeltaFile.version);
metadata->durableDeltaVersion.set(completedDeltaFile.version);
@ -1813,9 +1895,10 @@ struct InFlightFile {
Version version;
uint64_t bytes;
bool snapshot;
bool emptyDeltaFile;
InFlightFile(Future<BlobFileIndex> future, Version version, uint64_t bytes, bool snapshot)
: future(future), version(version), bytes(bytes), snapshot(snapshot) {}
InFlightFile(Future<BlobFileIndex> future, Version version, uint64_t bytes, bool snapshot, bool emptyDeltaFile)
: future(future), version(version), bytes(bytes), snapshot(snapshot), emptyDeltaFile(emptyDeltaFile) {}
};
namespace {
@ -2233,7 +2316,7 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
startVersion = startState.previousDurableVersion;
Future<BlobFileIndex> inFlightBlobSnapshot = compactFromBlob(
bwData, bstore, metadata, startState.granuleID, startState.blobFilesToSnapshot, startVersion);
inFlightFiles.push_back(InFlightFile(inFlightBlobSnapshot, startVersion, 0, true));
inFlightFiles.push_back(InFlightFile(inFlightBlobSnapshot, startVersion, 0, true, false));
pendingSnapshots++;
metadata->durableSnapshotVersion.set(minDurableSnapshotV);
@ -2260,6 +2343,7 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
if (inFlightFiles.front().future.isReady()) {
BlobFileIndex completedFile = wait(inFlightFiles.front().future);
if (inFlightFiles.front().snapshot) {
ASSERT(!inFlightFiles.front().emptyDeltaFile);
if (metadata->files.deltaFiles.empty()) {
ASSERT(completedFile.version == metadata->initialSnapshotVersion);
} else {
@ -2343,6 +2427,7 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
if (inFlightFiles.front().future.isReady()) {
BlobFileIndex completedFile = wait(inFlightFiles.front().future);
if (inFlightFiles.front().snapshot) {
ASSERT(!inFlightFiles.front().emptyDeltaFile);
if (metadata->files.deltaFiles.empty()) {
ASSERT(completedFile.version == metadata->initialSnapshotVersion);
} else {
@ -2360,7 +2445,8 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
cfKey,
startState.changeFeedStartVersion,
&rollbacksCompleted,
inFlightPops);
inFlightPops,
inFlightFiles.front().emptyDeltaFile);
}
inFlightFiles.pop_front();
@ -2733,8 +2819,6 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
ASSERT(lastDeltaVersion >= metadata->currentDeltas.back().version);
ASSERT(metadata->pendingDeltaVersion < metadata->currentDeltas.front().version);
} else {
// FIXME: could always write special metadata for empty file, so we don't actually
// write/read a bunch of empty blob files
ASSERT(forceFlush);
ASSERT(!forceFlushVersions.empty());
CODE_PROBE(true, "Force flushing empty delta file!");
@ -2767,24 +2851,40 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
oldChangeFeedDataComplete.present() ? ". Finalizing " : "");
}
int64_t deltaFileBudget =
std::min((int64_t)metadata->bufferedDeltaBytes, SERVER_KNOBS->BLOB_WORKER_DELTA_WRITE_BUDGET_BYTES);
startDeltaFileWrite = bwData->deltaWritesBudget->take(TaskPriority::DefaultYield, deltaFileBudget);
Future<BlobFileIndex> dfFuture =
writeDeltaFile(bwData,
bstore,
metadata->keyRange,
startState.granuleID,
metadata->originalEpoch,
metadata->originalSeqno,
metadata->currentDeltas,
lastDeltaVersion,
previousFuture,
waitVersionCommitted(bwData, metadata, lastDeltaVersion),
oldChangeFeedDataComplete,
startDeltaFileWrite,
deltaFileBudget);
inFlightFiles.push_back(InFlightFile(dfFuture, lastDeltaVersion, metadata->bufferedDeltaBytes, false));
Future<BlobFileIndex> dfFuture;
bool emptyDeltaFile = metadata->bytesInNewDeltaFiles > 0 && metadata->currentDeltas.empty();
if (emptyDeltaFile) {
// Optimization to do a metadata-only update if flushing an empty delta file
dfFuture = writeEmptyDeltaFile(bwData,
metadata->keyRange,
startState.granuleID,
metadata->originalEpoch,
metadata->originalSeqno,
metadata->pendingDeltaVersion,
lastDeltaVersion,
previousFuture,
waitVersionCommitted(bwData, metadata, lastDeltaVersion),
oldChangeFeedDataComplete);
} else {
int64_t deltaFileBudget = std::min((int64_t)metadata->bufferedDeltaBytes,
SERVER_KNOBS->BLOB_WORKER_DELTA_WRITE_BUDGET_BYTES);
startDeltaFileWrite = bwData->deltaWritesBudget->take(TaskPriority::DefaultYield, deltaFileBudget);
dfFuture = writeDeltaFile(bwData,
bstore,
metadata->keyRange,
startState.granuleID,
metadata->originalEpoch,
metadata->originalSeqno,
metadata->currentDeltas,
lastDeltaVersion,
previousFuture,
waitVersionCommitted(bwData, metadata, lastDeltaVersion),
oldChangeFeedDataComplete,
startDeltaFileWrite,
deltaFileBudget);
}
inFlightFiles.push_back(
InFlightFile(dfFuture, lastDeltaVersion, metadata->bufferedDeltaBytes, false, emptyDeltaFile));
// add new pending delta file
ASSERT(metadata->pendingDeltaVersion < lastDeltaVersion);
@ -2864,7 +2964,8 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
reSnapshotNoCheck(bwData, bstore, metadata, startState.granuleID, previousFuture);
writeAmpTarget.decrease(metadata->bytesInNewDeltaFiles);
}
inFlightFiles.push_back(InFlightFile(inFlightBlobSnapshot, metadata->pendingDeltaVersion, 0, true));
inFlightFiles.push_back(
InFlightFile(inFlightBlobSnapshot, metadata->pendingDeltaVersion, 0, true, false));
pendingSnapshots++;
metadata->pendingSnapshotVersion = metadata->pendingDeltaVersion;
@ -2903,6 +3004,7 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
// TODO don't duplicate code
BlobFileIndex completedFile = wait(inFlightFiles.front().future);
if (inFlightFiles.front().snapshot) {
ASSERT(!inFlightFiles.front().emptyDeltaFile);
if (metadata->files.deltaFiles.empty()) {
ASSERT(completedFile.version == metadata->initialSnapshotVersion);
} else {
@ -2919,7 +3021,8 @@ ACTOR Future<Void> blobGranuleUpdateFiles(Reference<BlobWorkerData> bwData,
cfKey,
startState.changeFeedStartVersion,
&rollbacksCompleted,
inFlightPops);
inFlightPops,
inFlightFiles.front().emptyDeltaFile);
}
inFlightFiles.pop_front();
@ -4377,8 +4480,7 @@ ACTOR Future<GranuleStartState> openGranule(Reference<BlobWorkerData> bwData, As
}
if (BUGGIFY && bwData->maybeInjectTargetedRestart()) {
wait(delay(0)); // should be cancelled
ASSERT(false);
wait(Never());
}
return info;

View File

@ -363,6 +363,7 @@ ACTOR Future<Void> newSeedServers(Reference<ClusterRecoveryData> self,
isr.reqId = deterministicRandom()->randomUniqueID();
isr.interfaceId = deterministicRandom()->randomUniqueID();
isr.initialClusterVersion = self->recoveryTransactionVersion;
isr.encryptMode = self->configuration.encryptionAtRestMode;
ErrorOr<InitializeStorageReply> newServer = wait(recruits.storageServers[idx].storage.tryGetReply(isr));

View File

@ -203,6 +203,7 @@ struct ResolutionRequestBuilder {
ASSERT(transactionNumberInBatch >= 0 && transactionNumberInBatch < 32768);
bool isTXNStateTransaction = false;
DisabledTraceEvent("AddTransaction", self->dbgid).detail("TenantMode", (int)self->getTenantMode());
bool needParseTenantId = !trRequest.tenantInfo.hasTenant() && self->getTenantMode() == TenantMode::REQUIRED;
VectorRef<int64_t> tenantIds;
for (auto& m : trIn.mutations) {
@ -2471,6 +2472,7 @@ ACTOR static Future<Void> rejoinServer(CommitProxyInterface proxy, ProxyCommitDa
}
rep.newTag = Tag(maxTagLocality + 1, 0);
}
rep.encryptMode = commitData->encryptMode;
req.reply.send(rep);
} else {
req.reply.sendError(worker_removed());
@ -3058,7 +3060,7 @@ ACTOR Future<Void> commitProxyServerCore(CommitProxyInterface proxy,
state GetHealthMetricsReply healthMetricsReply;
state GetHealthMetricsReply detailedHealthMetricsReply;
TraceEvent("CPEncryptionAtRestMode").detail("Mode", commitData.encryptMode.toString());
TraceEvent("CPEncryptionAtRestMode", proxy.id()).detail("Mode", commitData.encryptMode);
addActor.send(waitFailureServer(proxy.waitFailure.getFuture()));
addActor.send(traceRole(Role::COMMIT_PROXY, proxy.id()));
@ -3223,7 +3225,16 @@ ACTOR Future<Void> updateLocalDbInfo(Reference<AsyncVar<ServerDBInfo> const> in,
// only update the db info if this is the current CP, or before we received first one including current CP.
// Several db infos at the beginning just contain the provisional CP
if (isIncluded || !firstValidDbInfo) {
out->set(in->get());
DisabledTraceEvent("UpdateLocalDbInfo", myInterface.id())
.detail("Provisional", myInterface.provisional)
.detail("Included", isIncluded)
.detail("FirstValid", firstValidDbInfo)
.detail("ReceivedRC", in->get().recoveryCount)
.detail("RecoveryCount", recoveryCount)
.detail("TenantMode", (int)in->get().client.tenantMode);
if (in->get().recoveryCount >= out->get().recoveryCount) {
out->set(in->get());
}
}
wait(in->onChange());

View File

@ -2355,6 +2355,7 @@ public:
isr.seedTag = invalidTag;
isr.reqId = deterministicRandom()->randomUniqueID();
isr.interfaceId = interfaceId;
isr.encryptMode = self->configuration.encryptionAtRestMode;
// if tss, wait for pair ss to finish and add its id to isr. If pair fails, don't recruit tss
state bool doRecruit = true;

View File

@ -19,7 +19,9 @@
*/
#if !defined(_WIN32) && !defined(__APPLE__) && !defined(__INTEL_COMPILER)
#ifndef BOOST_SYSTEM_NO_LIB
#define BOOST_SYSTEM_NO_LIB
#endif
#define BOOST_DATE_TIME_NO_LIB
#define BOOST_REGEX_NO_LIB
#include <boost/process.hpp>

View File

@ -81,6 +81,10 @@ struct KeyValueStoreCompressTestData final : IKeyValueStore {
return doReadRange(store, keys, rowLimit, byteLimit, options);
}
Future<EncryptionAtRestMode> encryptionMode() override {
return EncryptionAtRestMode(EncryptionAtRestMode::DISABLED);
}
private:
ACTOR static Future<Optional<Value>> doReadValue(IKeyValueStore* store, Key key, Optional<ReadOptions> options) {
Optional<Value> v = wait(store->readValue(key, options));

View File

@ -19,6 +19,7 @@
*/
#include "fdbclient/BlobCipher.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/Knobs.h"
#include "fdbclient/Notified.h"
#include "fdbclient/SystemData.h"
@ -290,6 +291,12 @@ public:
int uncommittedBytes() { return queue.totalSize(); }
// KeyValueStoreMemory does not support encryption-at-rest in general, despite it supports encryption
// when being used as TxnStateStore backend.
Future<EncryptionAtRestMode> encryptionMode() override {
return EncryptionAtRestMode(EncryptionAtRestMode::DISABLED);
}
private:
enum OpType {
OpSet,

View File

@ -18,6 +18,7 @@
* limitations under the License.
*/
#include "fdbclient/FDBTypes.h"
#ifdef SSD_ROCKSDB_EXPERIMENTAL
#include <rocksdb/c.h>
@ -2267,6 +2268,10 @@ struct RocksDBKeyValueStore : IKeyValueStore {
return Void();
}
Future<EncryptionAtRestMode> encryptionMode() override {
return EncryptionAtRestMode(EncryptionAtRestMode::DISABLED);
}
DB db = nullptr;
std::shared_ptr<SharedRocksDBState> sharedState;
std::shared_ptr<PerfContextMetrics> perfContextMetrics;

View File

@ -18,6 +18,7 @@
* limitations under the License.
*/
#include "fdbclient/FDBTypes.h"
#define SQLITE_THREADSAFE 0 // also in sqlite3.amalgamation.c!
#include "fmt/format.h"
#include "crc32/crc32c.h"
@ -1639,6 +1640,10 @@ public:
Future<SpringCleaningWorkPerformed> doClean();
void startReadThreads();
Future<EncryptionAtRestMode> encryptionMode() override {
return EncryptionAtRestMode(EncryptionAtRestMode::DISABLED);
}
private:
KeyValueStoreType type;
UID logID;

View File

@ -1,3 +1,4 @@
#include "fdbclient/FDBTypes.h"
#ifdef SSD_ROCKSDB_EXPERIMENTAL
#include "fdbclient/KeyRangeMap.h"
@ -3102,6 +3103,10 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
// Used for debugging shard mapping issue.
std::vector<std::pair<KeyRange, std::string>> getDataMapping() { return shardManager.getDataMapping(); }
Future<EncryptionAtRestMode> encryptionMode() override {
return EncryptionAtRestMode(EncryptionAtRestMode::DISABLED);
}
std::shared_ptr<ShardedRocksDBState> rState;
rocksdb::Options dbOptions;
ShardManager shardManager;

View File

@ -98,7 +98,7 @@ ACTOR Future<Void> dispatchRequests(Reference<RestoreLoaderData> self, Database
state int sendLoadParams = 0;
state int lastLoadReqs = 0;
loop {
TraceEvent(SevDebug, "FastRestoreLoaderDispatchRequests", self->id())
TraceEvent(SevVerbose, "FastRestoreLoaderDispatchRequests", self->id())
.detail("SendingQueue", self->sendingQueue.size())
.detail("LoadingQueue", self->loadingQueue.size())
.detail("SendingLoadParamQueue", self->sendLoadParamQueue.size())
@ -223,7 +223,7 @@ ACTOR Future<Void> dispatchRequests(Reference<RestoreLoaderData> self, Database
updateProcessStats(self);
if (self->loadingQueue.empty() && self->sendingQueue.empty() && self->sendLoadParamQueue.empty()) {
TraceEvent(SevDebug, "FastRestoreLoaderDispatchRequestsWaitOnRequests", self->id())
TraceEvent(SevVerbose, "FastRestoreLoaderDispatchRequestsWaitOnRequests", self->id())
.detail("HasPendingRequests", self->hasPendingRequests->get());
self->hasPendingRequests->set(false);
wait(self->hasPendingRequests->onChange()); // CAREFUL:Improper req release may cause restore stuck here
@ -1041,7 +1041,7 @@ void splitMutation(const KeyRangeMap<UID>& krMap,
VectorRef<MutationRef>& mvector,
Arena& nodeIDs_arena,
VectorRef<UID>& nodeIDs) {
TraceEvent(SevDebug, "FastRestoreSplitMutation").detail("Mutation", m);
TraceEvent(SevVerbose, "FastRestoreSplitMutation").detail("Mutation", m);
ASSERT(mvector.empty());
ASSERT(nodeIDs.empty());
auto r = krMap.intersectingRanges(KeyRangeRef(m.param1, m.param2));

View File

@ -1472,6 +1472,8 @@ private:
void setSimpleConfig();
void setSpecificConfig(const TestConfig& testConfig);
void setDatacenters(const TestConfig& testConfig);
void setTenantMode(const TestConfig& testConfig);
void setEncryptionAtRestMode(const TestConfig& testConfig);
void setStorageEngine(const TestConfig& testConfig);
void setRegions(const TestConfig& testConfig);
void setReplicationType(const TestConfig& testConfig);
@ -1579,12 +1581,60 @@ void SimulationConfig::setDatacenters(const TestConfig& testConfig) {
}
}
void SimulationConfig::setTenantMode(const TestConfig& testConfig) {
TenantMode tenantMode = TenantMode::DISABLED;
if (testConfig.tenantModes.size() > 0) {
tenantMode = TenantMode::fromString(deterministicRandom()->randomChoice(testConfig.tenantModes));
} else if (testConfig.allowDefaultTenant && deterministicRandom()->coinflip()) {
tenantMode = deterministicRandom()->random01() < 0.9 ? TenantMode::REQUIRED : TenantMode::OPTIONAL_TENANT;
} else if (deterministicRandom()->coinflip()) {
tenantMode = TenantMode::OPTIONAL_TENANT;
}
set_config("tenant_mode=" + tenantMode.toString());
}
void SimulationConfig::setEncryptionAtRestMode(const TestConfig& testConfig) {
EncryptionAtRestMode encryptionMode = EncryptionAtRestMode::DISABLED;
// Only Redwood support encryption. Disable encryption if non-Redwood storage engine is explicitly specified.
bool disableEncryption = testConfig.disableEncryption ||
(testConfig.storageEngineType.present() && testConfig.storageEngineType.get() != 3);
// TODO: Remove check on the ENABLE_ENCRYPTION knob once the EKP can start using the db config
if (!disableEncryption && (SERVER_KNOBS->ENABLE_ENCRYPTION || !testConfig.encryptModes.empty())) {
TenantMode tenantMode = db.tenantMode;
if (!testConfig.encryptModes.empty()) {
std::vector<EncryptionAtRestMode> validEncryptModes;
// Get the subset of valid encrypt modes given the tenant mode
for (int i = 0; i < testConfig.encryptModes.size(); i++) {
EncryptionAtRestMode encryptMode = EncryptionAtRestMode::fromString(testConfig.encryptModes.at(i));
if (encryptMode != EncryptionAtRestMode::DOMAIN_AWARE || tenantMode == TenantMode::REQUIRED) {
validEncryptModes.push_back(encryptMode);
}
}
if (validEncryptModes.size() > 0) {
encryptionMode = deterministicRandom()->randomChoice(validEncryptModes);
}
} else {
// TODO: These cases should only trigger with probability (BUGGIFY) once the server knob is removed
if (tenantMode == TenantMode::DISABLED || tenantMode == TenantMode::OPTIONAL_TENANT || BUGGIFY) {
// optional and disabled tenant modes currently only support cluster aware encryption
encryptionMode = EncryptionAtRestMode::CLUSTER_AWARE;
} else {
encryptionMode = EncryptionAtRestMode::DOMAIN_AWARE;
}
}
}
set_config("encryption_at_rest_mode=" + encryptionMode.toString());
}
// Sets storage engine based on testConfig details
void SimulationConfig::setStorageEngine(const TestConfig& testConfig) {
// Using [0, 4) to disable the RocksDB storage engine.
// TODO: Figure out what is broken with the RocksDB engine in simulation.
int storage_engine_type = deterministicRandom()->randomInt(0, 6);
if (testConfig.storageEngineType.present()) {
if (db.encryptionAtRestMode.isEncryptionEnabled()) {
// Only storage engine supporting encryption is Redwood.
storage_engine_type = 3;
} else if (testConfig.storageEngineType.present()) {
storage_engine_type = testConfig.storageEngineType.get();
} else {
// Continuously re-pick the storage engine type if it's the one we want to exclude
@ -2038,7 +2088,8 @@ void SimulationConfig::generateNormalConfig(const TestConfig& testConfig) {
setSimpleConfig();
}
setSpecificConfig(testConfig);
setTenantMode(testConfig);
setEncryptionAtRestMode(testConfig);
setStorageEngine(testConfig);
setReplicationType(testConfig);
if (generateFearless || (datacenters == 2 && deterministicRandom()->random01() < 0.5)) {
@ -2059,15 +2110,6 @@ void SimulationConfig::generateNormalConfig(const TestConfig& testConfig) {
setConfigDB(testConfig);
}
bool validateEncryptAndTenantModePair(EncryptionAtRestMode encryptMode, TenantMode tenantMode) {
// Domain aware encryption is only allowed when the tenant mode is required. Other encryption modes (disabled or
// cluster aware) are allowed regardless of the tenant mode
if (encryptMode.mode == EncryptionAtRestMode::DISABLED || encryptMode.mode == EncryptionAtRestMode::CLUSTER_AWARE) {
return true;
}
return tenantMode == TenantMode::REQUIRED;
}
// Configures the system according to the given specifications in order to run
// simulation under the correct conditions
void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
@ -2078,49 +2120,22 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
std::string whitelistBinPaths,
TestConfig testConfig,
ProtocolVersion protocolVersion,
TenantMode tenantMode) {
Optional<TenantMode>* tenantMode) {
auto& g_knobs = IKnobCollection::getMutableGlobalKnobCollection();
// SOMEDAY: this does not test multi-interface configurations
SimulationConfig simconfig(testConfig);
*tenantMode = simconfig.db.tenantMode;
if (testConfig.logAntiQuorum != -1) {
simconfig.db.tLogWriteAntiQuorum = testConfig.logAntiQuorum;
}
simconfig.db.tenantMode = tenantMode;
simconfig.db.encryptionAtRestMode = EncryptionAtRestMode::DISABLED;
// TODO: Remove check on the ENABLE_ENCRYPTION knob once the EKP can start using the db config
if (!testConfig.disableEncryption && (SERVER_KNOBS->ENABLE_ENCRYPTION || !testConfig.encryptModes.empty())) {
if (!testConfig.encryptModes.empty()) {
std::vector<EncryptionAtRestMode> validEncryptModes;
// Get the subset of valid encrypt modes given the tenant mode
for (int i = 0; i < testConfig.encryptModes.size(); i++) {
EncryptionAtRestMode encryptMode = EncryptionAtRestMode::fromString(testConfig.encryptModes.at(i));
if (validateEncryptAndTenantModePair(encryptMode, tenantMode)) {
validEncryptModes.push_back(encryptMode);
}
}
if (validEncryptModes.size() > 0) {
simconfig.db.encryptionAtRestMode = deterministicRandom()->randomChoice(validEncryptModes);
}
} else {
// TODO: These cases should only trigger with probability (BUGGIFY) once the server knob is removed
if (tenantMode == TenantMode::DISABLED || tenantMode == TenantMode::OPTIONAL_TENANT || BUGGIFY) {
// optional and disabled tenant modes currently only support cluster aware encryption
simconfig.db.encryptionAtRestMode = EncryptionAtRestMode::CLUSTER_AWARE;
} else {
simconfig.db.encryptionAtRestMode = EncryptionAtRestMode::DOMAIN_AWARE;
}
}
}
// TODO: remove knob hanlding once we move off from encrypption knobs to db config
if (simconfig.db.encryptionAtRestMode.mode == EncryptionAtRestMode::DISABLED) {
g_knobs.setKnob("enable_encryption", KnobValueRef::create(bool{ false }));
CODE_PROBE(true, "Disabled encryption in simulation");
} else {
g_knobs.setKnob("enable_encryption", KnobValueRef::create(bool{ true }));
g_knobs.setKnob(
"redwood_split_encrypted_pages_by_tenant",
KnobValueRef::create(bool{ simconfig.db.encryptionAtRestMode.mode == EncryptionAtRestMode::DOMAIN_AWARE }));
CODE_PROBE(simconfig.db.encryptionAtRestMode.mode == EncryptionAtRestMode::CLUSTER_AWARE,
"Enabled cluster-aware encryption in simulation");
CODE_PROBE(simconfig.db.encryptionAtRestMode.mode == EncryptionAtRestMode::DOMAIN_AWARE,
@ -2698,28 +2713,7 @@ ACTOR void setupAndRun(std::string dataFolder,
state Optional<TenantName> defaultTenant;
state Standalone<VectorRef<TenantNameRef>> tenantsToCreate;
state TenantMode tenantMode = TenantMode::DISABLED;
// If this is a restarting test, restartInfo.ini is read in restartSimulatedSystem
// where we update the defaultTenant and tenantMode in the testConfig
// Defer setting tenant mode and default tenant until later
if (!rebooting) {
if (testConfig.tenantModes.size()) {
auto randomPick = deterministicRandom()->randomChoice(testConfig.tenantModes);
tenantMode = TenantMode::fromString(randomPick);
if (tenantMode == TenantMode::REQUIRED && allowDefaultTenant) {
defaultTenant = "SimulatedDefaultTenant"_sr;
}
} else if (allowDefaultTenant && deterministicRandom()->coinflip()) {
defaultTenant = "SimulatedDefaultTenant"_sr;
if (deterministicRandom()->random01() < 0.9) {
tenantMode = TenantMode::REQUIRED;
} else {
tenantMode = TenantMode::OPTIONAL_TENANT;
}
} else if (deterministicRandom()->coinflip()) {
tenantMode = TenantMode::OPTIONAL_TENANT;
}
}
state Optional<TenantMode> tenantMode;
try {
// systemActors.push_back( startSystemMonitor(dataFolder) );
@ -2747,17 +2741,28 @@ ACTOR void setupAndRun(std::string dataFolder,
whitelistBinPaths,
testConfig,
protocolVersion,
tenantMode);
&tenantMode);
wait(delay(1.0)); // FIXME: WHY!!! //wait for machines to boot
}
// restartSimulatedSystem can adjust some testConfig params related to tenants
// so set/overwrite those options if necessary here
if (rebooting && testConfig.tenantModes.size()) {
tenantMode = TenantMode::fromString(testConfig.tenantModes[0]);
if (rebooting) {
if (testConfig.tenantModes.size()) {
tenantMode = TenantMode::fromString(testConfig.tenantModes[0]);
} else {
tenantMode = TenantMode::DISABLED;
}
}
if (testConfig.defaultTenant.present() && tenantMode != TenantMode::DISABLED && allowDefaultTenant) {
// setupSimulatedSystem/restartSimulatedSystem should fill tenantMode with valid value.
ASSERT(tenantMode.present());
if (tenantMode != TenantMode::DISABLED && allowDefaultTenant) {
// Default tenant set by testConfig or restarting data in restartInfo.ini
defaultTenant = testConfig.defaultTenant.get();
if (testConfig.defaultTenant.present()) {
defaultTenant = testConfig.defaultTenant.get();
} else if (!rebooting && (tenantMode == TenantMode::REQUIRED || deterministicRandom()->coinflip())) {
defaultTenant = "SimulatedDefaultTenant"_sr;
}
}
if (!rebooting) {
if (defaultTenant.present() && allowDefaultTenant) {
@ -2773,7 +2778,7 @@ ACTOR void setupAndRun(std::string dataFolder,
}
TraceEvent("SimulatedClusterTenantMode")
.detail("UsingTenant", defaultTenant)
.detail("TenantMode", tenantMode.toString())
.detail("TenantMode", tenantMode.get().toString())
.detail("TotalTenants", tenantsToCreate.size());
std::string clusterFileDir = joinPath(dataFolder, deterministicRandom()->randomUniqueID().toString());
platform::createDirectory(clusterFileDir);

View File

@ -292,6 +292,10 @@ void StorageServerMetrics::splitMetrics(SplitMetricsRequest req) const {
if (key == req.keys.end)
break;
reply.splits.push_back_deep(reply.splits.arena(), key);
if (reply.splits.size() > SERVER_KNOBS->SPLIT_METRICS_MAX_ROWS) {
reply.more = true;
break;
}
StorageMetrics diff = (getMetrics(KeyRangeRef(lastKey, key)) + used);
remaining -= diff;
@ -301,7 +305,7 @@ void StorageServerMetrics::splitMetrics(SplitMetricsRequest req) const {
lastKey = key;
}
reply.used = getMetrics(KeyRangeRef(lastKey, req.keys.end)) + used;
reply.used = reply.more ? StorageMetrics() : getMetrics(KeyRangeRef(lastKey, req.keys.end)) + used;
req.reply.send(reply);
} catch (Error& e) {
req.reply.sendError(e);

View File

@ -2017,18 +2017,12 @@ public:
int64_t remapCleanupWindowBytes,
int concurrentExtentReads,
bool memoryOnly,
Reference<IPageEncryptionKeyProvider> keyProvider,
Promise<Void> errorPromise = {})
: keyProvider(keyProvider),
ioLock(makeReference<PriorityMultiLock>(FLOW_KNOBS->MAX_OUTSTANDING, SERVER_KNOBS->REDWOOD_IO_PRIORITIES)),
: ioLock(makeReference<PriorityMultiLock>(FLOW_KNOBS->MAX_OUTSTANDING, SERVER_KNOBS->REDWOOD_IO_PRIORITIES)),
pageCacheBytes(pageCacheSizeBytes), desiredPageSize(desiredPageSize), desiredExtentSize(desiredExtentSize),
filename(filename), memoryOnly(memoryOnly), errorPromise(errorPromise),
remapCleanupWindowBytes(remapCleanupWindowBytes), concurrentExtentReads(new FlowLock(concurrentExtentReads)) {
if (!keyProvider) {
keyProvider = makeReference<NullKeyProvider>();
}
// This sets the page cache size for all PageCacheT instances using the same evictor
pageCache.evictor().sizeLimit = pageCacheBytes;
@ -2043,6 +2037,11 @@ public:
std::string getName() const override { return filename; }
void setEncryptionKeyProvider(Reference<IPageEncryptionKeyProvider> kp) override {
keyProvider = kp;
keyProviderInitialized.send(Void());
}
void setPageSize(int size) {
// Conservative maximum for number of records that can fit in this page size
g_redwoodMetrics.updateMaxRecordCount(315.0 * size / 4096);
@ -2812,6 +2811,10 @@ public:
try {
page->postReadHeader(pageID);
if (page->isEncrypted()) {
if (!self->keyProvider.isValid()) {
wait(self->keyProviderInitialized.getFuture());
ASSERT(self->keyProvider.isValid());
}
ArenaPage::EncryptionKey k = wait(self->keyProvider->getEncryptionKey(page->getEncodingHeader()));
page->encryptionKey = k;
}
@ -3807,6 +3810,7 @@ private:
int pagesPerExtent;
Reference<IPageEncryptionKeyProvider> keyProvider;
Promise<Void> keyProviderInitialized;
Reference<PriorityMultiLock> ioLock;
@ -4889,18 +4893,20 @@ public:
uint8_t height;
LazyClearQueueT::QueueState lazyDeleteQueue;
BTreeNodeLink root;
EncryptionAtRestMode encryptionMode = EncryptionAtRestMode::DISABLED; // since 7.3
std::string toString() {
return format("{formatVersion=%d height=%d root=%s lazyDeleteQueue=%s}",
return format("{formatVersion=%d height=%d root=%s lazyDeleteQueue=%s encryptionMode=%s}",
(int)formatVersion,
(int)height,
::toString(root).c_str(),
lazyDeleteQueue.toString().c_str());
lazyDeleteQueue.toString().c_str(),
encryptionMode.toString().c_str());
}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, formatVersion, encodingType, height, lazyDeleteQueue, root);
serializer(ar, formatVersion, encodingType, height, lazyDeleteQueue, root, encryptionMode);
}
};
@ -4964,35 +4970,21 @@ public:
// VersionedBTree takes ownership of pager
VersionedBTree(IPager2* pager,
std::string name,
EncodingType defaultEncodingType,
Reference<IPageEncryptionKeyProvider> keyProvider)
: m_pager(pager), m_encodingType(defaultEncodingType), m_enforceEncodingType(false), m_keyProvider(keyProvider),
m_pBuffer(nullptr), m_mutationCount(0), m_name(name) {
// For encrypted encoding types, enforce that BTree nodes read from disk use the default encoding type
// This prevents an attack where an encrypted page is replaced by an attacker with an unencrypted page
// or an encrypted page fabricated using a compromised scheme.
if (ArenaPage::isEncodingTypeEncrypted(m_encodingType)) {
ASSERT(keyProvider.isValid());
m_enforceEncodingType = true;
}
// If key provider isn't given, instantiate the null provider
if (!m_keyProvider) {
m_keyProvider = makeReference<NullKeyProvider>();
}
m_pBoundaryVerifier = DecodeBoundaryVerifier::getVerifier(name);
if (m_pBoundaryVerifier != nullptr) {
m_pBoundaryVerifier->setKeyProvider(m_keyProvider);
}
Reference<AsyncVar<ServerDBInfo> const> db,
Optional<EncryptionAtRestMode> expectedEncryptionMode,
EncodingType encodingType = EncodingType::MAX_ENCODING_TYPE,
Reference<IPageEncryptionKeyProvider> keyProvider = {})
: m_pager(pager), m_db(db), m_expectedEncryptionMode(expectedEncryptionMode), m_encodingType(encodingType),
m_enforceEncodingType(false), m_keyProvider(keyProvider), m_pBuffer(nullptr), m_mutationCount(0), m_name(name),
m_pBoundaryVerifier(DecodeBoundaryVerifier::getVerifier(name)) {
m_pDecodeCacheMemory = m_pager->getPageCachePenaltySource();
m_lazyClearActor = 0;
m_init = init_impl(this);
m_latestCommit = m_init;
}
Future<EncryptionAtRestMode> encryptionMode() { return m_encryptionMode.getFuture(); }
ACTOR static Future<Reference<ArenaPage>> makeEmptyRoot(VersionedBTree* self) {
state Reference<ArenaPage> page = self->m_pager->newPageBuffer();
page->init(self->m_encodingType, PageType::BTreeNode, 1);
@ -5115,6 +5107,71 @@ public:
return freedPages;
}
void checkOrUpdateEncodingType(const std::string& event,
const EncryptionAtRestMode& encryptionMode,
EncodingType& encodingType) {
EncodingType expectedEncodingType = EncodingType::MAX_ENCODING_TYPE;
if (encryptionMode == EncryptionAtRestMode::DISABLED) {
expectedEncodingType = EncodingType::XXHash64;
} else {
expectedEncodingType = FLOW_KNOBS->ENCRYPT_HEADER_AUTH_TOKEN_ENABLED ? EncodingType::AESEncryptionWithAuth
: EncodingType::AESEncryption;
}
// Randomly enable XOR encryption in simulation. Also ignore encoding type mismatch if XOR encryption is set but
// default encoding is expected.
if (encodingType == EncodingType::MAX_ENCODING_TYPE) {
encodingType = expectedEncodingType;
if (encodingType == EncodingType::XXHash64 && g_network->isSimulated() && BUGGIFY) {
encodingType = EncodingType::XOREncryption_TestOnly;
}
} else if (encodingType != expectedEncodingType) {
// In simulation we could enable xor encryption for testing. Ignore encoding type mismatch in such a case.
if (!(g_network->isSimulated() && encodingType == EncodingType::XOREncryption_TestOnly &&
expectedEncodingType == EncodingType::XXHash64)) {
TraceEvent(SevWarnAlways, "RedwoodBTreeMismatchEncryptionModeAndEncodingType")
.detail("InstanceName", m_pager->getName())
.detail("Event", event)
.detail("EncryptionMode", encryptionMode)
.detail("ExpectedEncodingType", expectedEncodingType)
.detail("ActualEncodingType", encodingType);
throw encrypt_mode_mismatch();
}
}
}
void initEncryptionKeyProvider() {
if (!m_keyProvider.isValid()) {
switch (m_encodingType) {
case EncodingType::XXHash64:
m_keyProvider = makeReference<NullEncryptionKeyProvider>();
break;
case EncodingType::XOREncryption_TestOnly:
m_keyProvider = makeReference<XOREncryptionKeyProvider_TestOnly>(m_name);
break;
case EncodingType::AESEncryption:
ASSERT(m_expectedEncryptionMode.present());
ASSERT(m_db.isValid());
m_keyProvider =
makeReference<AESEncryptionKeyProvider<AESEncryption>>(m_db, m_expectedEncryptionMode.get());
break;
case EncodingType::AESEncryptionWithAuth:
ASSERT(m_expectedEncryptionMode.present());
ASSERT(m_db.isValid());
m_keyProvider = makeReference<AESEncryptionKeyProvider<AESEncryptionWithAuth>>(
m_db, m_expectedEncryptionMode.get());
break;
default:
ASSERT(false);
}
} else {
ASSERT_EQ(m_encodingType, m_keyProvider->expectedEncodingType());
}
m_pager->setEncryptionKeyProvider(m_keyProvider);
if (m_pBoundaryVerifier != nullptr) {
m_pBoundaryVerifier->setKeyProvider(m_keyProvider);
}
}
ACTOR static Future<Void> init_impl(VersionedBTree* self) {
wait(self->m_pager->init());
self->m_pBuffer.reset(new MutationBuffer());
@ -5134,9 +5191,17 @@ public:
state Value btreeHeader = self->m_pager->getCommitRecord();
if (btreeHeader.size() == 0) {
// Create new BTree
ASSERT(self->m_expectedEncryptionMode.present());
self->m_encryptionMode.send(self->m_expectedEncryptionMode.get());
self->checkOrUpdateEncodingType("NewBTree", self->m_expectedEncryptionMode.get(), self->m_encodingType);
self->initEncryptionKeyProvider();
self->m_enforceEncodingType = isEncodingTypeEncrypted(self->m_encodingType);
self->m_header.formatVersion = BTreeCommitHeader::FORMAT_VERSION;
self->m_header.encodingType = self->m_encodingType;
self->m_header.height = 1;
self->m_header.encryptionMode = self->m_expectedEncryptionMode.get();
LogicalPageID id = wait(self->m_pager->newPageID());
self->m_header.root = BTreeNodeLinkRef((LogicalPageID*)&id, 1);
@ -5166,28 +5231,39 @@ public:
throw e;
}
if (self->m_expectedEncryptionMode.present()) {
if (self->m_header.encryptionMode != self->m_expectedEncryptionMode.get()) {
TraceEvent(SevWarnAlways, "RedwoodBTreeEncryptionModeMismatched")
.detail("InstanceName", self->m_pager->getName())
.detail("ExpectedEncryptionMode", self->m_expectedEncryptionMode)
.detail("StoredEncryptionMode", self->m_header.encryptionMode);
throw encrypt_mode_mismatch();
} else {
self->m_expectedEncryptionMode = self->m_header.encryptionMode;
}
} else {
self->m_expectedEncryptionMode = self->m_header.encryptionMode;
}
self->m_encryptionMode.send(self->m_header.encryptionMode);
ASSERT_NE(EncodingType::MAX_ENCODING_TYPE, self->m_header.encodingType);
if (self->m_encodingType == EncodingType::MAX_ENCODING_TYPE) {
self->m_encodingType = self->m_header.encodingType;
} else if (self->m_encodingType != self->m_header.encodingType) {
TraceEvent(SevWarn, "RedwoodBTreeUnexpectedEncodingType")
.detail("InstanceName", self->m_pager->getName())
.detail("UsingEncodingType", self->m_encodingType)
.detail("ExistingEncodingType", self->m_header.encodingType);
}
// Verify if encryption mode and encoding type in the header are consistent.
// This check can also fail in case of authentication mode mismatch.
self->checkOrUpdateEncodingType("ExistingBTree", self->m_header.encryptionMode, self->m_encodingType);
self->initEncryptionKeyProvider();
self->m_enforceEncodingType = isEncodingTypeEncrypted(self->m_encodingType);
self->m_lazyClearQueue.recover(self->m_pager, self->m_header.lazyDeleteQueue, "LazyClearQueueRecovered");
debug_printf("BTree recovered.\n");
if (ArenaPage::isEncodingTypeEncrypted(self->m_header.encodingType) &&
self->m_encodingType == EncodingType::XXHash64) {
// On restart the encryption config of the cluster could be unknown. In that case if we find the Redwood
// instance is encrypted, we should use the same encryption encoding.
self->m_encodingType = self->m_header.encodingType;
self->m_enforceEncodingType = true;
TraceEvent("RedwoodBTreeNodeForceEncryption")
.detail("InstanceName", self->m_pager->getName())
.detail("EncodingFound", self->m_header.encodingType)
.detail("EncodingDesired", self->m_encodingType);
}
if (self->m_header.encodingType != self->m_encodingType) {
TraceEvent(SevWarn, "RedwoodBTreeNodeEncodingMismatch")
.detail("InstanceName", self->m_pager->getName())
.detail("EncodingFound", self->m_header.encodingType)
.detail("EncodingDesired", self->m_encodingType);
}
}
self->m_lazyClearActor = 0;
TraceEvent e(SevInfo, "RedwoodRecoveredBTree");
@ -5487,6 +5563,10 @@ private:
*/
IPager2* m_pager;
Reference<AsyncVar<ServerDBInfo> const> m_db;
Optional<EncryptionAtRestMode> m_expectedEncryptionMode;
Promise<EncryptionAtRestMode> m_encryptionMode;
EncodingType m_encodingType;
bool m_enforceEncodingType;
Reference<IPageEncryptionKeyProvider> m_keyProvider;
@ -5526,12 +5606,12 @@ private:
int blockSize,
EncodingType encodingType,
unsigned int height,
bool useEncryptionDomain,
bool enableEncryptionDomain,
bool splitByDomain,
IPageEncryptionKeyProvider* keyProvider)
: startIndex(index), count(0), pageSize(blockSize),
largeDeltaTree(pageSize > BTreePage::BinaryTree::SmallSizeLimit), blockSize(blockSize), blockCount(1),
kvBytes(0), encodingType(encodingType), height(height), useEncryptionDomain(useEncryptionDomain),
kvBytes(0), encodingType(encodingType), height(height), enableEncryptionDomain(enableEncryptionDomain),
splitByDomain(splitByDomain), keyProvider(keyProvider) {
// Subtrace Page header overhead, BTreePage overhead, and DeltaTree (BTreePage::BinaryTree) overhead.
@ -5541,7 +5621,7 @@ private:
PageToBuild next() {
return PageToBuild(
endIndex(), blockSize, encodingType, height, useEncryptionDomain, splitByDomain, keyProvider);
endIndex(), blockSize, encodingType, height, enableEncryptionDomain, splitByDomain, keyProvider);
}
int startIndex; // Index of the first record
@ -5556,7 +5636,7 @@ private:
EncodingType encodingType;
unsigned int height;
bool useEncryptionDomain;
bool enableEncryptionDomain;
bool splitByDomain;
IPageEncryptionKeyProvider* keyProvider;
@ -5635,7 +5715,7 @@ private:
return false;
}
if (useEncryptionDomain) {
if (enableEncryptionDomain) {
int64_t defaultDomainId = keyProvider->getDefaultEncryptionDomainId();
int64_t currentDomainId;
size_t prefixLength;
@ -5709,7 +5789,7 @@ private:
}
void finish() {
if (useEncryptionDomain && canUseDefaultDomain) {
if (enableEncryptionDomain && canUseDefaultDomain) {
domainId = keyProvider->getDefaultEncryptionDomainId();
}
}
@ -5735,12 +5815,12 @@ private:
std::vector<PageToBuild> pages;
// Whether encryption is used and we need to set encryption domain for a page.
bool useEncryptionDomain =
ArenaPage::isEncodingTypeEncrypted(m_encodingType) && m_keyProvider->enableEncryptionDomain();
bool enableEncryptionDomain =
isEncodingTypeEncrypted(m_encodingType) && m_keyProvider->enableEncryptionDomain();
// Whether we may need to split by encryption domain. It is mean to be an optimization to avoid
// unnecessary domain check and may not be exhaust all cases.
bool splitByDomain = false;
if (useEncryptionDomain && records.size() > 1) {
if (enableEncryptionDomain && records.size() > 1) {
int64_t firstDomain = std::get<0>(m_keyProvider->getEncryptionDomain(records[0].key));
int64_t lastDomain = std::get<0>(m_keyProvider->getEncryptionDomain(records[records.size() - 1].key));
// If the two record falls in the same non-default domain, we know all the records fall in the
@ -5759,7 +5839,7 @@ private:
}
PageToBuild p(
0, m_blockSize, m_encodingType, height, useEncryptionDomain, splitByDomain, m_keyProvider.getPtr());
0, m_blockSize, m_encodingType, height, enableEncryptionDomain, splitByDomain, m_keyProvider.getPtr());
for (int i = 0; i < records.size();) {
bool force = p.count < minRecords || p.slackFraction() > maxSlack;
@ -5799,8 +5879,8 @@ private:
PageToBuild& b = pages.back();
// We can rebalance the two pages only if they are in the same encryption domain.
ASSERT(!useEncryptionDomain || (a.domainId.present() && b.domainId.present()));
if (!useEncryptionDomain || a.domainId.get() == b.domainId.get()) {
ASSERT(!enableEncryptionDomain || (a.domainId.present() && b.domainId.present()));
if (!enableEncryptionDomain || a.domainId.get() == b.domainId.get()) {
// While the last page page has too much slack and the second to last page
// has more than the minimum record count, shift a record from the second
@ -5835,8 +5915,8 @@ private:
state int prefixLen = lowerBound->getCommonPrefixLen(*upperBound);
// Whether encryption is used and we need to set encryption domain for a page.
state bool useEncryptionDomain =
ArenaPage::isEncodingTypeEncrypted(self->m_encodingType) && self->m_keyProvider->enableEncryptionDomain();
state bool enableEncryptionDomain =
isEncodingTypeEncrypted(self->m_encodingType) && self->m_keyProvider->enableEncryptionDomain();
state std::vector<PageToBuild> pagesToBuild =
self->splitPages(lowerBound, upperBound, prefixLen, entries, height);
@ -5850,7 +5930,7 @@ private:
state int pageIndex;
if (useEncryptionDomain) {
if (enableEncryptionDomain) {
ASSERT(pagesToBuild[0].domainId.present());
int64_t domainId = pagesToBuild[0].domainId.get();
// We make sure the page lower bound fits in the domain of the page.
@ -5885,7 +5965,7 @@ private:
pageUpperBound.truncate(commonPrefix + 1);
}
if (useEncryptionDomain && pageUpperBound.key != dbEnd.key) {
if (enableEncryptionDomain && pageUpperBound.key != dbEnd.key) {
int64_t ubDomainId;
KeyRef ubDomainPrefix;
if (lastPage) {
@ -5916,10 +5996,10 @@ private:
--p->count;
debug_printf("Skipping first null record, new count=%d\n", p->count);
// In case encryption or encryption domain is not enabled, if the page is now empty then it must be the
// last page in pagesToBuild, otherwise there would be more than 1 item since internal pages need to
// have multiple children. In case encryption and encryption domain is enabled, however, because of the
// page split by encryption domain, it may not be the last page.
// In case encryption or encryption domain is not enabled, if the page is now empty then it must be
// the last page in pagesToBuild, otherwise there would be more than 1 item since internal pages
// need to have multiple children. In case encryption and encryption domain is enabled, however,
// because of the page split by encryption domain, it may not be the last page.
//
// Either way, a record must be added to the output set because the upper boundary of the last
// page built does not match the upper boundary of the original page that this call to writePages() is
@ -5927,7 +6007,7 @@ private:
// built does not match the upper boundary of the original page that the page set is replacing, so
// adding the extra null link fixes this.
if (p->count == 0) {
ASSERT(useEncryptionDomain || lastPage);
ASSERT(enableEncryptionDomain || lastPage);
records.push_back_deep(records.arena(), pageLowerBound);
pageLowerBound = pageUpperBound;
continue;
@ -5940,8 +6020,8 @@ private:
self->m_encodingType, (p->blockCount == 1) ? PageType::BTreeNode : PageType::BTreeSuperNode, height);
if (page->isEncrypted()) {
ArenaPage::EncryptionKey k =
wait(useEncryptionDomain ? self->m_keyProvider->getLatestEncryptionKey(p->domainId.get())
: self->m_keyProvider->getLatestDefaultEncryptionKey());
wait(enableEncryptionDomain ? self->m_keyProvider->getLatestEncryptionKey(p->domainId.get())
: self->m_keyProvider->getLatestDefaultEncryptionKey());
page->encryptionKey = k;
}
@ -6091,8 +6171,9 @@ private:
records[0].key != dbBegin.key) {
CODE_PROBE(records.size() == 1, "Writing a new root because the current root pointer would be too large");
if (records[0].key != dbBegin.key) {
ASSERT(self->m_keyProvider.isValid() && self->m_keyProvider->enableEncryption() &&
self->m_keyProvider->enableEncryptionDomain());
ASSERT(self->m_expectedEncryptionMode.present() &&
self->m_expectedEncryptionMode.get().isEncryptionEnabled());
ASSERT(self->m_keyProvider.isValid() && self->m_keyProvider->enableEncryptionDomain());
int64_t domainId;
size_t prefixLength;
std::tie(domainId, prefixLength) = self->m_keyProvider->getEncryptionDomain(records[0].key);
@ -6678,9 +6759,9 @@ private:
// TryToUpdate indicates insert and erase operations should be tried on the existing page first
state bool tryToUpdate = btPage->tree()->numItems > 0 && update->boundariesNormal();
state bool useEncryptionDomain = page->isEncrypted() && self->m_keyProvider->enableEncryptionDomain();
state bool enableEncryptionDomain = page->isEncrypted() && self->m_keyProvider->enableEncryptionDomain();
state Optional<int64_t> pageDomainId;
if (useEncryptionDomain) {
if (enableEncryptionDomain) {
pageDomainId = page->getEncryptionDomainId();
}
@ -6803,7 +6884,7 @@ private:
// If updating, first try to add the record to the page
if (updatingDeltaTree) {
bool canInsert = true;
if (useEncryptionDomain) {
if (enableEncryptionDomain) {
ASSERT(pageDomainId.present());
canInsert = self->m_keyProvider->keyFitsInDomain(pageDomainId.get(), rec.key, false);
}
@ -6957,9 +7038,9 @@ private:
debug_print(addPrefix(context, update->toString()));
return Void();
} else {
debug_printf(
"%s Changes were made, writing, but subtree may still be unchanged from parent's perspective.\n",
context.c_str());
debug_printf("%s Changes were made, writing, but subtree may still be unchanged from parent's "
"perspective.\n",
context.c_str());
}
if (updatingDeltaTree) {
@ -7582,7 +7663,7 @@ public:
#if REDWOOD_DEBUG
path.push_back({ p, cursor, link.get().getChildPage() });
#else
path.push_back({ p, cursor });
path.push_back({ p, cursor });
#endif
if (btree->m_pBoundaryVerifier != nullptr) {
@ -7608,7 +7689,7 @@ public:
#if REDWOOD_DEBUG
path.push_back({ p, btree->getCursor(p.getPtr(), dbBegin, dbEnd), id });
#else
path.push_back({ p, btree->getCursor(p.getPtr(), dbBegin, dbEnd) });
path.push_back({ p, btree->getCursor(p.getPtr(), dbBegin, dbEnd) });
#endif
return Void();
});
@ -7848,8 +7929,16 @@ RedwoodRecordRef VersionedBTree::dbEnd("\xff\xff\xff\xff\xff"_sr);
class KeyValueStoreRedwood : public IKeyValueStore {
public:
KeyValueStoreRedwood(std::string filename, UID logID, Reference<IPageEncryptionKeyProvider> encryptionKeyProvider)
KeyValueStoreRedwood(std::string filename,
UID logID,
Reference<AsyncVar<ServerDBInfo> const> db,
Optional<EncryptionAtRestMode> encryptionMode,
EncodingType encodingType = EncodingType::MAX_ENCODING_TYPE,
Reference<IPageEncryptionKeyProvider> keyProvider = {})
: m_filename(filename), prefetch(SERVER_KNOBS->REDWOOD_KVSTORE_RANGE_PREFETCH) {
if (!encryptionMode.present() || encryptionMode.get().isEncryptionEnabled()) {
ASSERT(keyProvider.isValid() || db.isValid());
}
int pageSize =
BUGGIFY ? deterministicRandom()->randomInt(1000, 4096 * 4) : SERVER_KNOBS->REDWOOD_DEFAULT_PAGE_SIZE;
@ -7868,25 +7957,6 @@ public:
: 100 * 1024 * 1024) // 100M
: SERVER_KNOBS->REDWOOD_REMAP_CLEANUP_WINDOW_BYTES;
EncodingType encodingType = EncodingType::XXHash64;
// When reopening Redwood on restart, the cluser encryption config could be unknown at this point,
// for which shouldEnableEncryption will return false. In that case, if the Redwood instance was encrypted
// before, the encoding type in the header page will be used instead.
//
// TODO(yiwu): When the cluster encryption config is available later, fail if the cluster is configured to
// enable encryption, but the Redwood instance is unencrypted.
if (encryptionKeyProvider && encryptionKeyProvider->enableEncryption()) {
encodingType = FLOW_KNOBS->ENCRYPT_HEADER_AUTH_TOKEN_ENABLED ? EncodingType::AESEncryptionWithAuth
: EncodingType::AESEncryption;
ASSERT_EQ(encodingType, encryptionKeyProvider->expectedEncodingType());
m_keyProvider = encryptionKeyProvider;
} else if (g_allowXOREncryptionInSimulation && g_network->isSimulated() && logID.hash() % 2 == 0) {
// Simulation only. Deterministically enable encryption based on uid
encodingType = EncodingType::XOREncryption_TestOnly;
m_keyProvider = makeReference<XOREncryptionKeyProvider_TestOnly>(filename);
}
IPager2* pager = new DWALPager(pageSize,
extentSize,
filename,
@ -7894,9 +7964,8 @@ public:
remapCleanupWindowBytes,
SERVER_KNOBS->REDWOOD_EXTENT_CONCURRENT_READS,
false,
m_keyProvider,
m_error);
m_tree = new VersionedBTree(pager, filename, encodingType, m_keyProvider);
m_tree = new VersionedBTree(pager, filename, db, encryptionMode, encodingType, keyProvider);
m_init = catchError(init_impl(this));
}
@ -7912,6 +7981,8 @@ public:
return Void();
}
Future<EncryptionAtRestMode> encryptionMode() override { return m_tree->encryptionMode(); }
ACTOR void shutdown(KeyValueStoreRedwood* self, bool dispose) {
TraceEvent(SevInfo, "RedwoodShutdown").detail("Filename", self->m_filename).detail("Dispose", dispose);
@ -8189,8 +8260,9 @@ private:
IKeyValueStore* keyValueStoreRedwoodV1(std::string const& filename,
UID logID,
Reference<IPageEncryptionKeyProvider> encryptionKeyProvider) {
return new KeyValueStoreRedwood(filename, logID, encryptionKeyProvider);
Reference<AsyncVar<ServerDBInfo> const> db,
Optional<EncryptionAtRestMode> encryptionMode) {
return new KeyValueStoreRedwood(filename, logID, db, encryptionMode);
}
int randomSize(int max) {
@ -9998,13 +10070,23 @@ TEST_CASE("Lredwood/correctness/btree") {
state int64_t maxRecordsRead = params.getInt("maxRecordsRead").orDefault(300e6);
state EncodingType encodingType = static_cast<EncodingType>(encoding);
state EncryptionAtRestMode encryptionMode =
!isEncodingTypeAESEncrypted(encodingType)
? EncryptionAtRestMode::DISABLED
: (encryptionDomainMode == RandomEncryptionKeyProvider<AESEncryption>::EncryptionDomainMode::DISABLED
? EncryptionAtRestMode::CLUSTER_AWARE
: EncryptionAtRestMode::DOMAIN_AWARE);
state Reference<IPageEncryptionKeyProvider> keyProvider;
auto& g_knobs = IKnobCollection::getMutableGlobalKnobCollection();
if (encodingType == EncodingType::AESEncryption) {
keyProvider = makeReference<RandomEncryptionKeyProvider<AESEncryption>>(
RandomEncryptionKeyProvider<AESEncryption>::EncryptionDomainMode(encryptionDomainMode));
g_knobs.setKnob("encrypt_header_auth_token_enabled", KnobValueRef::create(bool{ false }));
} else if (encodingType == EncodingType::AESEncryptionWithAuth) {
keyProvider = makeReference<RandomEncryptionKeyProvider<AESEncryptionWithAuth>>(
RandomEncryptionKeyProvider<AESEncryptionWithAuth>::EncryptionDomainMode(encryptionDomainMode));
g_knobs.setKnob("encrypt_header_auth_token_enabled", KnobValueRef::create(bool{ true }));
g_knobs.setKnob("encrypt_header_auth_token_algo", KnobValueRef::create(int{ 1 }));
} else if (encodingType == EncodingType::XOREncryption_TestOnly) {
keyProvider = makeReference<XOREncryptionKeyProvider_TestOnly>(file);
}
@ -10042,15 +10124,9 @@ TEST_CASE("Lredwood/correctness/btree") {
deleteFile(file);
printf("Initializing...\n");
pager = new DWALPager(pageSize,
extentSize,
file,
pageCacheBytes,
remapCleanupWindowBytes,
concurrentExtentReads,
pagerMemoryOnly,
keyProvider);
state VersionedBTree* btree = new VersionedBTree(pager, file, encodingType, keyProvider);
pager = new DWALPager(
pageSize, extentSize, file, pageCacheBytes, remapCleanupWindowBytes, concurrentExtentReads, pagerMemoryOnly);
state VersionedBTree* btree = new VersionedBTree(pager, file, {}, encryptionMode, encodingType, keyProvider);
wait(btree->init());
state DecodeBoundaryVerifier* pBoundaries = DecodeBoundaryVerifier::getVerifier(file);
@ -10153,7 +10229,8 @@ TEST_CASE("Lredwood/correctness/btree") {
while (e != eEnd) {
auto w = *e;
++e;
// If e key is different from last and last was present then insert clear for last's key at version
// If e key is different from last and last was present then insert clear for last's key at
// version
if (last != eEnd &&
((e == eEnd || e->first.first != last->first.first) && last->second.present())) {
debug_printf(
@ -10221,8 +10298,8 @@ TEST_CASE("Lredwood/correctness/btree") {
keyBytesCleared.rate() / 1e6,
mutationBytes.rate() / 1e6);
// Sometimes advance the oldest version to close the gap between the oldest and latest versions by a random
// amount.
// Sometimes advance the oldest version to close the gap between the oldest and latest versions by a
// random amount.
if (deterministicRandom()->random01() < advanceOldVersionProbability) {
btree->setOldestReadableVersion(
btree->getLastCommittedVersion() -
@ -10286,15 +10363,9 @@ TEST_CASE("Lredwood/correctness/btree") {
wait(closedFuture);
printf("Reopening btree from disk.\n");
IPager2* pager = new DWALPager(pageSize,
extentSize,
file,
pageCacheBytes,
remapCleanupWindowBytes,
concurrentExtentReads,
false,
keyProvider);
btree = new VersionedBTree(pager, file, encodingType, keyProvider);
IPager2* pager = new DWALPager(
pageSize, extentSize, file, pageCacheBytes, remapCleanupWindowBytes, concurrentExtentReads, false);
btree = new VersionedBTree(pager, file, {}, encryptionMode, encodingType, keyProvider);
wait(btree->init());
@ -10341,9 +10412,10 @@ TEST_CASE("Lredwood/correctness/btree") {
pageCacheBytes,
(BUGGIFY ? 0 : remapCleanupWindowBytes),
concurrentExtentReads,
pagerMemoryOnly,
keyProvider),
pagerMemoryOnly),
file,
{},
{},
encodingType,
keyProvider);
wait(btree->init());
@ -10482,15 +10554,9 @@ TEST_CASE(":/redwood/performance/extentQueue") {
// Do random pushes into the queue and commit periodically
if (reload) {
pager = new DWALPager(pageSize,
extentSize,
fileName,
cacheSizeBytes,
remapCleanupWindowBytes,
concurrentExtentReads,
false,
Reference<IPageEncryptionKeyProvider>());
pager = new DWALPager(
pageSize, extentSize, fileName, cacheSizeBytes, remapCleanupWindowBytes, concurrentExtentReads, false);
pager->setEncryptionKeyProvider(makeReference<NullEncryptionKeyProvider>());
wait(success(pager->init()));
LogicalPageID extID = pager->newLastExtentID();
@ -10540,14 +10606,9 @@ TEST_CASE(":/redwood/performance/extentQueue") {
}
printf("Reopening pager file from disk.\n");
pager = new DWALPager(pageSize,
extentSize,
fileName,
cacheSizeBytes,
remapCleanupWindowBytes,
concurrentExtentReads,
false,
Reference<IPageEncryptionKeyProvider>());
pager = new DWALPager(
pageSize, extentSize, fileName, cacheSizeBytes, remapCleanupWindowBytes, concurrentExtentReads, false);
pager->setEncryptionKeyProvider(makeReference<NullEncryptionKeyProvider>());
wait(success(pager->init()));
printf("Starting ExtentQueue FastPath Recovery from Disk.\n");
@ -10687,16 +10748,10 @@ TEST_CASE(":/redwood/performance/set") {
deleteFile(file);
}
DWALPager* pager = new DWALPager(pageSize,
extentSize,
file,
pageCacheBytes,
remapCleanupWindowBytes,
concurrentExtentReads,
pagerMemoryOnly,
Reference<IPageEncryptionKeyProvider>());
DWALPager* pager = new DWALPager(
pageSize, extentSize, file, pageCacheBytes, remapCleanupWindowBytes, concurrentExtentReads, pagerMemoryOnly);
state VersionedBTree* btree =
new VersionedBTree(pager, file, EncodingType::XXHash64, Reference<IPageEncryptionKeyProvider>());
new VersionedBTree(pager, file, {}, {}, EncodingType::XXHash64, makeReference<NullEncryptionKeyProvider>());
wait(btree->init());
printf("Initialized. StorageBytes=%s\n", btree->getStorageBytes().toString().c_str());
@ -11062,8 +11117,9 @@ ACTOR Future<Void> prefixClusteredInsert(IKeyValueStore* kvs,
}
wait(commit);
// TODO is it desired that not all records are committed? This could commit again to ensure any records set() since
// the last commit are persisted. For the purposes of how this is used currently, I don't think it matters though
// TODO is it desired that not all records are committed? This could commit again to ensure any records set()
// since the last commit are persisted. For the purposes of how this is used currently, I don't think it matters
// though
stats();
printf("\n");
@ -11349,10 +11405,11 @@ void setAuthMode(EncodingType encodingType) {
TEST_CASE("/redwood/correctness/EnforceEncodingType") {
state const std::vector<std::pair<EncodingType, EncodingType>> testCases = {
{ XXHash64, AESEncryption }, { AESEncryption, AESEncryptionWithAuth }
{ XXHash64, XOREncryption_TestOnly }, { AESEncryption, AESEncryptionWithAuth }
};
state const std::map<EncodingType, Reference<IPageEncryptionKeyProvider>> encryptionKeyProviders = {
{ XXHash64, makeReference<NullKeyProvider>() },
{ XXHash64, makeReference<NullEncryptionKeyProvider>() },
{ XOREncryption_TestOnly, makeReference<XOREncryptionKeyProvider_TestOnly>("test.redwood-v1") },
{ AESEncryption, makeReference<RandomEncryptionKeyProvider<AESEncryption>>() },
{ AESEncryptionWithAuth, makeReference<RandomEncryptionKeyProvider<AESEncryptionWithAuth>>() }
};
@ -11362,18 +11419,18 @@ TEST_CASE("/redwood/correctness/EnforceEncodingType") {
state EncodingType initialEncodingType = testCase.first;
state EncodingType reopenEncodingType = testCase.second;
ASSERT_NE(initialEncodingType, reopenEncodingType);
ASSERT(ArenaPage::isEncodingTypeEncrypted(reopenEncodingType));
ASSERT(isEncodingTypeEncrypted(reopenEncodingType));
deleteFile("test.redwood-v1");
printf("Create KV store with encoding type %d\n", initialEncodingType);
setAuthMode(initialEncodingType);
kvs = openKVStore(KeyValueStoreType::SSD_REDWOOD_V1,
"test.redwood-v1",
UID(),
0,
false,
false,
false,
encryptionKeyProviders.at(initialEncodingType));
kvs = new KeyValueStoreRedwood("test.redwood-v1",
UID(),
{}, // db
isEncodingTypeAESEncrypted(initialEncodingType)
? EncryptionAtRestMode::CLUSTER_AWARE
: EncryptionAtRestMode::DISABLED,
initialEncodingType,
encryptionKeyProviders.at(initialEncodingType));
wait(kvs->init());
kvs->set(KeyValueRef("foo"_sr, "bar"_sr));
wait(kvs->commit());
@ -11381,14 +11438,12 @@ TEST_CASE("/redwood/correctness/EnforceEncodingType") {
// Reopen
printf("Reopen KV store with encoding type %d\n", reopenEncodingType);
setAuthMode(reopenEncodingType);
kvs = openKVStore(KeyValueStoreType::SSD_REDWOOD_V1,
"test.redwood-v1",
UID(),
0,
false,
false,
false,
encryptionKeyProviders.at(reopenEncodingType));
kvs = new KeyValueStoreRedwood("test.redwood-v1",
UID(),
{}, // db
{}, // encryptionMode
reopenEncodingType,
encryptionKeyProviders.at(reopenEncodingType));
wait(kvs->init());
try {
Optional<Value> v = wait(kvs->readValue("foo"_sr));

View File

@ -2018,7 +2018,7 @@ int main(int argc, char* argv[]) {
// startOldSimulator();
opts.buildNetwork(argv[0]);
startNewSimulator(opts.printSimTime);
openTraceFile(NetworkAddress(), opts.rollsize, opts.maxLogsSize, opts.logFolder, "trace", opts.logGroup);
openTraceFile({}, opts.rollsize, opts.maxLogsSize, opts.logFolder, "trace", opts.logGroup);
openTracer(TracerType(deterministicRandom()->randomInt(static_cast<int>(TracerType::DISABLED),
static_cast<int>(TracerType::SIM_END))));
} else {
@ -2265,10 +2265,6 @@ int main(int argc, char* argv[]) {
KnobValue::create(ini.GetBoolValue("META", "enableBlobGranuleEncryption", false)));
g_knobs.setKnob("enable_blob_granule_compression",
KnobValue::create(ini.GetBoolValue("META", "enableBlobGranuleEncryption", false)));
// Restart test does not preserve encryption mode (tenant-aware or domain-aware).
// Disable domain-aware encryption in Redwood until encryption mode from db config is being handled.
// TODO(yiwu): clean it up once we cleanup the knob.
g_knobs.setKnob("redwood_split_encrypted_pages_by_tenant", KnobValue::create(bool{ false }));
g_knobs.setKnob("encrypt_header_auth_token_enabled",
KnobValue::create(ini.GetBoolValue("META", "encryptHeaderAuthTokenEnabled", false)));
g_knobs.setKnob("encrypt_header_auth_token_algo",

View File

@ -20,6 +20,7 @@
#ifndef FDBSERVER_IKEYVALUESTORE_H
#define FDBSERVER_IKEYVALUESTORE_H
#include "flow/Trace.h"
#pragma once
#include "fdbclient/FDBTypes.h"
@ -133,6 +134,9 @@ public:
// of a rollback.
virtual Future<Void> init() { return Void(); }
// Obtain the encryption mode of the storage. The encryption mode needs to match the encryption mode of the cluster.
virtual Future<EncryptionAtRestMode> encryptionMode() = 0;
protected:
virtual ~IKeyValueStore() {}
};
@ -144,7 +148,8 @@ extern IKeyValueStore* keyValueStoreSQLite(std::string const& filename,
bool checkIntegrity = false);
extern IKeyValueStore* keyValueStoreRedwoodV1(std::string const& filename,
UID logID,
Reference<IPageEncryptionKeyProvider> encryptionKeyProvider = {});
Reference<AsyncVar<ServerDBInfo> const> db = {},
Optional<EncryptionAtRestMode> encryptionMode = {});
extern IKeyValueStore* keyValueStoreRocksDB(std::string const& path,
UID logID,
KeyValueStoreType storeType,
@ -183,7 +188,16 @@ inline IKeyValueStore* openKVStore(KeyValueStoreType storeType,
bool checkChecksums = false,
bool checkIntegrity = false,
bool openRemotely = false,
Reference<IPageEncryptionKeyProvider> encryptionKeyProvider = {}) {
Reference<AsyncVar<ServerDBInfo> const> db = {},
Optional<EncryptionAtRestMode> encryptionMode = {}) {
// Only Redwood support encryption currently.
if (encryptionMode.present() && encryptionMode.get().isEncryptionEnabled() &&
storeType != KeyValueStoreType::SSD_REDWOOD_V1) {
TraceEvent(SevWarn, "KVStoreTypeNotSupportingEncryption")
.detail("KVStoreType", storeType)
.detail("EncryptionMode", encryptionMode);
throw encrypt_mode_mismatch();
}
if (openRemotely) {
return openRemoteKVStore(storeType, filename, logID, memoryLimit, checkChecksums, checkIntegrity);
}
@ -195,7 +209,7 @@ inline IKeyValueStore* openKVStore(KeyValueStoreType storeType,
case KeyValueStoreType::MEMORY:
return keyValueStoreMemory(filename, logID, memoryLimit);
case KeyValueStoreType::SSD_REDWOOD_V1:
return keyValueStoreRedwoodV1(filename, logID, encryptionKeyProvider);
return keyValueStoreRedwoodV1(filename, logID, db, encryptionMode);
case KeyValueStoreType::SSD_ROCKSDB_V1:
return keyValueStoreRocksDB(filename, logID, storeType);
case KeyValueStoreType::SSD_SHARDED_ROCKSDB:

View File

@ -18,6 +18,8 @@
* limitations under the License.
*/
#include "fdbclient/TenantManagement.actor.h"
#include "fdbrpc/TenantInfo.h"
#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_IPAGEENCRYPTIONKEYPROVIDER_ACTOR_G_H)
#define FDBSERVER_IPAGEENCRYPTIONKEYPROVIDER_ACTOR_G_H
#include "fdbserver/IPageEncryptionKeyProvider.actor.g.h"
@ -68,12 +70,8 @@ public:
// Expected encoding type being used with the encryption key provider.
virtual EncodingType expectedEncodingType() const = 0;
// Checks whether encryption should be enabled. If not, the encryption key provider will not be used by
// the pager, and instead the default non-encrypted encoding type (XXHash64) is used.
virtual bool enableEncryption() const = 0;
// Whether encryption domain is enabled.
virtual bool enableEncryptionDomain() const { return false; }
virtual bool enableEncryptionDomain() const = 0;
// Get an encryption key from given encoding header.
virtual Future<EncryptionKey> getEncryptionKey(const void* encodingHeader) { throw not_implemented(); }
@ -110,11 +108,11 @@ public:
// The null key provider is useful to simplify page decoding.
// It throws an error for any key info requested.
class NullKeyProvider : public IPageEncryptionKeyProvider {
class NullEncryptionKeyProvider : public IPageEncryptionKeyProvider {
public:
virtual ~NullKeyProvider() {}
virtual ~NullEncryptionKeyProvider() {}
EncodingType expectedEncodingType() const override { return EncodingType::XXHash64; }
bool enableEncryption() const override { return false; }
bool enableEncryptionDomain() const override { return false; }
};
// Key provider for dummy XOR encryption scheme
@ -139,7 +137,7 @@ public:
EncodingType expectedEncodingType() const override { return EncodingType::XOREncryption_TestOnly; }
bool enableEncryption() const override { return true; }
bool enableEncryptionDomain() const override { return false; }
Future<EncryptionKey> getEncryptionKey(const void* encodingHeader) override {
@ -188,9 +186,7 @@ public:
EncodingType expectedEncodingType() const override { return encodingType; }
bool enableEncryption() const override { return true; }
bool enableEncryptionDomain() const override { return mode > 1; }
bool enableEncryptionDomain() const override { return mode > 0; }
Future<EncryptionKey> getEncryptionKey(const void* encodingHeader) override {
using Header = typename ArenaPage::AESEncryptionEncoder<encodingType>::Header;
@ -284,26 +280,29 @@ private:
template <EncodingType encodingType,
typename std::enable_if<encodingType == AESEncryption || encodingType == AESEncryptionWithAuth, bool>::type =
true>
class TenantAwareEncryptionKeyProvider : public IPageEncryptionKeyProvider {
class AESEncryptionKeyProvider : public IPageEncryptionKeyProvider {
public:
using EncodingHeader = typename ArenaPage::AESEncryptionEncoder<encodingType>::Header;
const StringRef systemKeysPrefix = systemKeys.begin;
TenantAwareEncryptionKeyProvider(Reference<AsyncVar<ServerDBInfo> const> db) : db(db) {}
AESEncryptionKeyProvider(Reference<AsyncVar<ServerDBInfo> const> db, EncryptionAtRestMode encryptionMode)
: db(db), encryptionMode(encryptionMode) {
ASSERT(encryptionMode != EncryptionAtRestMode::DISABLED);
ASSERT(db.isValid());
}
virtual ~TenantAwareEncryptionKeyProvider() = default;
virtual ~AESEncryptionKeyProvider() = default;
EncodingType expectedEncodingType() const override { return encodingType; }
bool enableEncryption() const override {
return isEncryptionOpSupported(EncryptOperationType::STORAGE_SERVER_ENCRYPTION);
bool enableEncryptionDomain() const override {
// Regardless of encryption mode, system keys always encrypted using system key space domain.
// Because of this, AESEncryptionKeyProvider always appears to be domain-aware.
return true;
}
bool enableEncryptionDomain() const override { return SERVER_KNOBS->REDWOOD_SPLIT_ENCRYPTED_PAGES_BY_TENANT; }
ACTOR static Future<EncryptionKey> getEncryptionKey(TenantAwareEncryptionKeyProvider* self,
const void* encodingHeader) {
ACTOR static Future<EncryptionKey> getEncryptionKey(AESEncryptionKeyProvider* self, const void* encodingHeader) {
const BlobCipherEncryptHeader& header = reinterpret_cast<const EncodingHeader*>(encodingHeader)->encryption;
TextAndHeaderCipherKeys cipherKeys =
wait(getEncryptCipherKeys(self->db, header, BlobCipherMetrics::KV_REDWOOD));
@ -320,9 +319,8 @@ public:
return getLatestEncryptionKey(getDefaultEncryptionDomainId());
}
ACTOR static Future<EncryptionKey> getLatestEncryptionKey(TenantAwareEncryptionKeyProvider* self,
int64_t domainId) {
ACTOR static Future<EncryptionKey> getLatestEncryptionKey(AESEncryptionKeyProvider* self, int64_t domainId) {
ASSERT(self->encryptionMode == EncryptionAtRestMode::DOMAIN_AWARE || domainId < 0);
TextAndHeaderCipherKeys cipherKeys =
wait(getLatestEncryptCipherKeysForDomain(self->db, domainId, BlobCipherMetrics::KV_REDWOOD));
EncryptionKey encryptionKey;
@ -341,14 +339,16 @@ public:
if (key.startsWith(systemKeysPrefix)) {
return { SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID, systemKeysPrefix.size() };
}
// Cluster-aware encryption.
if (encryptionMode == EncryptionAtRestMode::CLUSTER_AWARE) {
return { FDB_DEFAULT_ENCRYPT_DOMAIN_ID, 0 };
}
// Key smaller than tenant prefix in size belongs to the default domain.
if (key.size() < TenantAPI::PREFIX_SIZE) {
return { FDB_DEFAULT_ENCRYPT_DOMAIN_ID, 0 };
}
StringRef prefix = key.substr(0, TenantAPI::PREFIX_SIZE);
int64_t tenantId = TenantAPI::prefixToId(prefix, EnforceValidTenantId::False);
// Tenant id must be non-negative.
if (tenantId < 0) {
int64_t tenantId = TenantAPI::extractTenantIdFromKeyRef(key);
if (tenantId == TenantInfo::INVALID_TENANT) {
return { FDB_DEFAULT_ENCRYPT_DOMAIN_ID, 0 };
}
return { tenantId, TenantAPI::PREFIX_SIZE };
@ -362,6 +362,7 @@ public:
private:
Reference<AsyncVar<ServerDBInfo> const> db;
EncryptionAtRestMode encryptionMode;
};
#include "flow/unactorcompiler.h"

View File

@ -22,8 +22,6 @@
#ifndef FDBSERVER_IPAGER_H
#define FDBSERVER_IPAGER_H
#include <cstddef>
#include <stdint.h>
#include "fdbclient/BlobCipher.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/GetEncryptCipherKeys.actor.h"
@ -39,6 +37,10 @@
#define XXH_INLINE_ALL
#include "flow/xxhash.h"
#include <array>
#include <cstddef>
#include <stdint.h>
typedef uint32_t LogicalPageID;
typedef uint32_t PhysicalPageID;
#define invalidLogicalPageID std::numeric_limits<LogicalPageID>::max()
@ -102,6 +104,15 @@ enum EncodingType : uint8_t {
MAX_ENCODING_TYPE = 4
};
static constexpr std::array EncryptedEncodingTypes = { AESEncryption, AESEncryptionWithAuth, XOREncryption_TestOnly };
inline bool isEncodingTypeEncrypted(EncodingType encoding) {
return std::count(EncryptedEncodingTypes.begin(), EncryptedEncodingTypes.end(), encoding) > 0;
}
inline bool isEncodingTypeAESEncrypted(EncodingType encoding) {
return encoding == AESEncryption || encoding == AESEncryptionWithAuth;
}
enum PageType : uint8_t {
HeaderPage = 0,
BackupHeaderPage = 1,
@ -615,11 +626,6 @@ public:
const Arena& getArena() const { return arena; }
static bool isEncodingTypeEncrypted(EncodingType t) {
return t == EncodingType::AESEncryption || t == EncodingType::AESEncryptionWithAuth ||
t == EncodingType::XOREncryption_TestOnly;
}
// Returns true if the page's encoding type employs encryption
bool isEncrypted() const { return isEncodingTypeEncrypted(getEncodingType()); }
@ -707,11 +713,16 @@ public:
ArbitraryObject extra;
};
class IPageEncryptionKeyProvider;
// This API is probably too customized to the behavior of DWALPager and probably needs some changes to be more generic.
class IPager2 : public IClosable {
public:
virtual std::string getName() const = 0;
// Set an encryption key provider.
virtual void setEncryptionKeyProvider(Reference<IPageEncryptionKeyProvider> keyProvider) = 0;
// Returns an ArenaPage that can be passed to writePage. The data in the returned ArenaPage might not be zeroed.
virtual Reference<ArenaPage> newPageBuffer(size_t blocks = 1) = 0;

View File

@ -492,6 +492,10 @@ struct RemoteIKeyValueStore : public IKeyValueStore {
}
return Void();
}
Future<EncryptionAtRestMode> encryptionMode() override {
return EncryptionAtRestMode(EncryptionAtRestMode::DISABLED);
}
};
Future<Void> runFlowProcess(std::string const& name, Endpoint endpoint);

View File

@ -238,7 +238,7 @@ struct RestoreControllerData : RestoreRoleData, public ReferenceCounted<RestoreC
int rangeIdx,
const std::vector<RestoreFileFR>& logFiles) {
double size = 0;
TraceEvent(SevDebug, "FastRestoreGetVersionSize")
TraceEvent(SevVerbose, "FastRestoreGetVersionSize")
.detail("PreviousVersion", prevVersion)
.detail("NextVersion", nextVersion)
.detail("RangeFiles", rangeFiles.size())
@ -246,7 +246,7 @@ struct RestoreControllerData : RestoreRoleData, public ReferenceCounted<RestoreC
.detail("LogFiles", logFiles.size());
ASSERT(prevVersion <= nextVersion);
while (rangeIdx < rangeFiles.size()) {
TraceEvent(SevDebug, "FastRestoreGetVersionSize").detail("RangeFile", rangeFiles[rangeIdx].toString());
TraceEvent(SevVerbose, "FastRestoreGetVersionSize").detail("RangeFile", rangeFiles[rangeIdx].toString());
if (rangeFiles[rangeIdx].version < nextVersion) {
ASSERT(rangeFiles[rangeIdx].version >= prevVersion);
size += rangeFiles[rangeIdx].fileSize;

View File

@ -866,10 +866,12 @@ struct InitializeStorageRequest {
tssPairIDAndVersion; // Only set if recruiting a tss. Will be the UID and Version of its SS pair.
Version initialClusterVersion;
ReplyPromise<InitializeStorageReply> reply;
EncryptionAtRestMode encryptMode;
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, seedTag, reqId, interfaceId, storeType, reply, tssPairIDAndVersion, initialClusterVersion);
serializer(
ar, seedTag, reqId, interfaceId, storeType, reply, tssPairIDAndVersion, initialClusterVersion, encryptMode);
}
};
@ -1186,7 +1188,6 @@ ACTOR Future<Void> encryptKeyProxyServer(EncryptKeyProxyInterface ei, Reference<
class IKeyValueStore;
class ServerCoordinators;
class IDiskQueue;
class IPageEncryptionKeyProvider;
ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
StorageServerInterface ssi,
Tag seedTag,
@ -1194,8 +1195,7 @@ ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
Version tssSeedVersion,
ReplyPromise<InitializeStorageReply> recruitReply,
Reference<AsyncVar<ServerDBInfo> const> db,
std::string folder,
Reference<IPageEncryptionKeyProvider> encryptionKeyProvider);
std::string folder);
ACTOR Future<Void> storageServer(
IKeyValueStore* persistentData,
StorageServerInterface ssi,
@ -1203,8 +1203,7 @@ ACTOR Future<Void> storageServer(
std::string folder,
Promise<Void> recovered,
Reference<IClusterConnectionRecord>
connRecord, // changes pssi->id() to be the recovered ID); // changes pssi->id() to be the recovered ID
Reference<IPageEncryptionKeyProvider> encryptionKeyProvider);
connRecord); // changes pssi->id() to be the recovered ID); // changes pssi->id() to be the recovered ID
ACTOR Future<Void> masterServer(MasterInterface mi,
Reference<AsyncVar<ServerDBInfo> const> db,
Reference<AsyncVar<Optional<ClusterControllerFullInterface>> const> ccInterface,

View File

@ -437,6 +437,8 @@ struct StorageServerDisk {
StorageBytes getStorageBytes() const { return storage->getStorageBytes(); }
std::tuple<size_t, size_t, size_t> getSize() const { return storage->getSize(); }
Future<EncryptionAtRestMode> encryptionMode() { return storage->encryptionMode(); }
// The following are pointers to the Counters in StorageServer::counters of the same names.
Counter* kvCommitLogicalBytes;
Counter* kvClearRanges;
@ -797,8 +799,6 @@ public:
std::map<Version, std::vector<KeyRange>>
pendingRemoveRanges; // Pending requests to remove ranges from physical shards
Reference<IPageEncryptionKeyProvider> encryptionKeyProvider;
bool shardAware; // True if the storage server is aware of the physical shards.
// Histograms
@ -1162,6 +1162,8 @@ public:
Optional<LatencyBandConfig> latencyBandConfig;
Optional<EncryptionAtRestMode> encryptionMode;
struct Counters {
CounterCollection cc;
Counter allQueries, systemKeyQueries, getKeyQueries, getValueQueries, getRangeQueries, getRangeSystemKeyQueries,
@ -1374,12 +1376,10 @@ public:
StorageServer(IKeyValueStore* storage,
Reference<AsyncVar<ServerDBInfo> const> const& db,
StorageServerInterface const& ssi,
Reference<IPageEncryptionKeyProvider> encryptionKeyProvider)
: encryptionKeyProvider(encryptionKeyProvider), shardAware(false),
tlogCursorReadsLatencyHistogram(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
TLOG_CURSOR_READS_LATENCY_HISTOGRAM,
Histogram::Unit::milliseconds)),
StorageServerInterface const& ssi)
: shardAware(false), tlogCursorReadsLatencyHistogram(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
TLOG_CURSOR_READS_LATENCY_HISTOGRAM,
Histogram::Unit::milliseconds)),
ssVersionLockLatencyHistogram(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
SS_VERSION_LOCK_LATENCY_HISTOGRAM,
Histogram::Unit::milliseconds)),
@ -9197,11 +9197,13 @@ ACTOR Future<Void> update(StorageServer* data, bool* pReceivedUpdate) {
} else {
MutationRef msg;
cloneReader >> msg;
if (g_network && g_network->isSimulated() &&
isEncryptionOpSupported(EncryptOperationType::TLOG_ENCRYPTION) && !msg.isEncrypted() &&
!(isSingleKeyMutation((MutationRef::Type)msg.type) &&
(backupLogKeys.contains(msg.param1) || (applyLogKeys.contains(msg.param1))))) {
ASSERT(false);
if (g_network && g_network->isSimulated()) {
bool isBackupLogMutation =
isSingleKeyMutation((MutationRef::Type)msg.type) &&
(backupLogKeys.contains(msg.param1) || applyLogKeys.contains(msg.param1));
ASSERT(data->encryptionMode.present());
ASSERT(!data->encryptionMode.get().isEncryptionEnabled() || msg.isEncrypted() ||
isBackupLogMutation);
}
if (msg.isEncrypted()) {
if (!cipherKeys.present()) {
@ -9358,11 +9360,13 @@ ACTOR Future<Void> update(StorageServer* data, bool* pReceivedUpdate) {
MutationRef msg;
MutationRefAndCipherKeys encryptedMutation;
rd >> msg;
if (g_network && g_network->isSimulated() &&
isEncryptionOpSupported(EncryptOperationType::TLOG_ENCRYPTION) && !msg.isEncrypted() &&
!(isSingleKeyMutation((MutationRef::Type)msg.type) &&
(backupLogKeys.contains(msg.param1) || (applyLogKeys.contains(msg.param1))))) {
ASSERT(false);
if (g_network && g_network->isSimulated()) {
bool isBackupLogMutation =
isSingleKeyMutation((MutationRef::Type)msg.type) &&
(backupLogKeys.contains(msg.param1) || applyLogKeys.contains(msg.param1));
ASSERT(data->encryptionMode.present());
ASSERT(!data->encryptionMode.get().isEncryptionEnabled() || msg.isEncrypted() ||
isBackupLogMutation);
}
if (msg.isEncrypted()) {
ASSERT(cipherKeys.present());
@ -11402,6 +11406,7 @@ ACTOR Future<Void> initTenantMap(StorageServer* self) {
ACTOR Future<Void> replaceInterface(StorageServer* self, StorageServerInterface ssi) {
ASSERT(!ssi.isTss());
state EncryptionAtRestMode encryptionMode = wait(self->storage.encryptionMode());
state Transaction tr(self->cx);
loop {
@ -11415,6 +11420,12 @@ ACTOR Future<Void> replaceInterface(StorageServer* self, StorageServerInterface
GetStorageServerRejoinInfoRequest(ssi.id(), ssi.locality.dcId()))
: Never())) {
state GetStorageServerRejoinInfoReply rep = _rep;
if (rep.encryptMode != encryptionMode) {
TraceEvent(SevWarnAlways, "SSEncryptModeMismatch", self->thisServerID)
.detail("StorageEncryptionMode", encryptionMode)
.detail("ClusterEncryptionMode", rep.encryptMode);
throw encrypt_mode_mismatch();
}
try {
tr.reset();
@ -11569,9 +11580,8 @@ ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
Version tssSeedVersion,
ReplyPromise<InitializeStorageReply> recruitReply,
Reference<AsyncVar<ServerDBInfo> const> db,
std::string folder,
Reference<IPageEncryptionKeyProvider> encryptionKeyProvider) {
state StorageServer self(persistentData, db, ssi, encryptionKeyProvider);
std::string folder) {
state StorageServer self(persistentData, db, ssi);
self.shardAware = SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && persistentData->shardAware();
state Future<Void> ssCore;
self.initialClusterVersion = startVersion;
@ -11589,6 +11599,9 @@ ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
wait(self.storage.commit());
++self.counters.kvCommits;
EncryptionAtRestMode encryptionMode = wait(self.storage.encryptionMode());
self.encryptionMode = encryptionMode;
if (seedTag == invalidTag) {
ssi.startAcceptingRequests();
self.registerInterfaceAcceptingRequests.send(Void());
@ -11661,9 +11674,8 @@ ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
Reference<AsyncVar<ServerDBInfo> const> db,
std::string folder,
Promise<Void> recovered,
Reference<IClusterConnectionRecord> connRecord,
Reference<IPageEncryptionKeyProvider> encryptionKeyProvider) {
state StorageServer self(persistentData, db, ssi, encryptionKeyProvider);
Reference<IClusterConnectionRecord> connRecord) {
state StorageServer self(persistentData, db, ssi);
state Future<Void> ssCore;
self.folder = folder;
@ -11684,6 +11696,9 @@ ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
}
++self.counters.kvCommits;
EncryptionAtRestMode encryptionMode = wait(self.storage.encryptionMode());
self.encryptionMode = encryptionMode;
bool ok = wait(self.storage.restoreDurableState());
if (!ok) {
if (recovered.canBeSet())

View File

@ -1350,8 +1350,7 @@ ACTOR Future<Void> storageServerRollbackRebooter(std::set<std::pair<UID, KeyValu
int64_t memoryLimit,
IKeyValueStore* store,
bool validateDataFiles,
Promise<Void>* rebootKVStore,
Reference<IPageEncryptionKeyProvider> encryptionKeyProvider) {
Promise<Void>* rebootKVStore) {
state TrackRunningStorage _(id, storeType, runningStorages);
loop {
ErrorOr<Void> e = wait(errorOr(prevStorageServer));
@ -1418,13 +1417,8 @@ ACTOR Future<Void> storageServerRollbackRebooter(std::set<std::pair<UID, KeyValu
DUMPTOKEN(recruited.changeFeedPop);
DUMPTOKEN(recruited.changeFeedVersionUpdate);
prevStorageServer = storageServer(store,
recruited,
db,
folder,
Promise<Void>(),
Reference<IClusterConnectionRecord>(nullptr),
encryptionKeyProvider);
prevStorageServer =
storageServer(store, recruited, db, folder, Promise<Void>(), Reference<IClusterConnectionRecord>(nullptr));
prevStorageServer = handleIOErrors(prevStorageServer, store, id, store->onClosed());
}
}
@ -1888,14 +1882,6 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
LocalLineage _;
getCurrentLineage()->modify(&RoleLineage::role) = ProcessClass::ClusterRole::Storage;
Reference<IPageEncryptionKeyProvider> encryptionKeyProvider;
if (FLOW_KNOBS->ENCRYPT_HEADER_AUTH_TOKEN_ENABLED) {
encryptionKeyProvider =
makeReference<TenantAwareEncryptionKeyProvider<EncodingType::AESEncryptionWithAuth>>(dbInfo);
} else {
encryptionKeyProvider =
makeReference<TenantAwareEncryptionKeyProvider<EncodingType::AESEncryption>>(dbInfo);
}
IKeyValueStore* kv = openKVStore(
s.storeType,
s.filename,
@ -1909,7 +1895,7 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
s.storeType != KeyValueStoreType::SSD_SHARDED_ROCKSDB &&
deterministicRandom()->coinflip())
: true),
encryptionKeyProvider);
dbInfo);
Future<Void> kvClosed =
kv->onClosed() ||
rebootKVSPromise.getFuture() /* clear the onClosed() Future in actorCollection when rebooting */;
@ -1957,8 +1943,7 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
DUMPTOKEN(recruited.changeFeedVersionUpdate);
Promise<Void> recovery;
Future<Void> f =
storageServer(kv, recruited, dbInfo, folder, recovery, connRecord, encryptionKeyProvider);
Future<Void> f = storageServer(kv, recruited, dbInfo, folder, recovery, connRecord);
recoveries.push_back(recovery.getFuture());
f = handleIOErrors(f, kv, s.storeID, kvClosed);
f = storageServerRollbackRebooter(&runningStorages,
@ -1974,8 +1959,7 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
memoryLimit,
kv,
validateDataFiles,
&rebootKVSPromise,
encryptionKeyProvider);
&rebootKVSPromise);
errorForwarders.add(forwardError(errors, ssRole, recruited.id(), f));
} else if (s.storedComponent == DiskStore::TLogData) {
LocalLineage _;
@ -2581,15 +2565,6 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
folder,
isTss ? testingStoragePrefix.toString() : fileStoragePrefix.toString(),
recruited.id());
Reference<IPageEncryptionKeyProvider> encryptionKeyProvider;
if (FLOW_KNOBS->ENCRYPT_HEADER_AUTH_TOKEN_ENABLED) {
encryptionKeyProvider =
makeReference<TenantAwareEncryptionKeyProvider<EncodingType::AESEncryptionWithAuth>>(
dbInfo);
} else {
encryptionKeyProvider =
makeReference<TenantAwareEncryptionKeyProvider<EncodingType::AESEncryption>>(dbInfo);
}
IKeyValueStore* data = openKVStore(
req.storeType,
filename,
@ -2603,7 +2578,8 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
req.storeType != KeyValueStoreType::SSD_SHARDED_ROCKSDB &&
deterministicRandom()->coinflip())
: true),
encryptionKeyProvider);
dbInfo,
req.encryptMode);
Future<Void> kvClosed =
data->onClosed() ||
@ -2619,8 +2595,7 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
isTss ? req.tssPairIDAndVersion.get().second : 0,
storageReady,
dbInfo,
folder,
encryptionKeyProvider);
folder);
s = handleIOErrors(s, data, recruited.id(), kvClosed);
s = storageCache.removeOnReady(req.reqId, s);
s = storageServerRollbackRebooter(&runningStorages,
@ -2636,8 +2611,7 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
memoryLimit,
data,
false,
&rebootKVSPromise2,
encryptionKeyProvider);
&rebootKVSPromise2);
errorForwarders.add(forwardError(errors, ssRole, recruited.id(), s));
} else if (storageCache.exists(req.reqId)) {
forwardPromise(req.reply, storageCache.get(req.reqId));

View File

@ -1020,12 +1020,6 @@ struct BlobGranuleCorrectnessWorkload : TestWorkload {
ACTOR Future<bool> _check(Database cx, BlobGranuleCorrectnessWorkload* self) {
// check error counts, and do an availability check at the end
state std::vector<Future<bool>> results;
state Future<Void> checkFeedCleanupFuture;
if (self->clientId == 0) {
checkFeedCleanupFuture = checkFeedCleanup(cx, BGW_DEBUG);
} else {
checkFeedCleanupFuture = Future<Void>(Void());
}
for (auto& it : self->directories) {
results.push_back(self->checkDirectory(cx, self, it));
@ -1035,6 +1029,14 @@ struct BlobGranuleCorrectnessWorkload : TestWorkload {
bool dirSuccess = wait(f);
allSuccessful &= dirSuccess;
}
// do feed cleanup check only after data is guaranteed to be available for each granule
state Future<Void> checkFeedCleanupFuture;
if (self->clientId == 0) {
checkFeedCleanupFuture = checkFeedCleanup(cx, BGW_DEBUG);
} else {
checkFeedCleanupFuture = Future<Void>(Void());
}
wait(checkFeedCleanupFuture);
return allSuccessful;
}

View File

@ -111,7 +111,7 @@ struct BlobGranuleVerifierWorkload : TestWorkload {
sharedRandomNumber /= 3;
// randomly some tests write data first and then turn on blob granules later, to test conversion of existing DB
initAtEnd = !enablePurging && sharedRandomNumber % 10 == 0;
initAtEnd = getOption(options, "initAtEnd"_sr, sharedRandomNumber % 10 == 0);
sharedRandomNumber /= 10;
// FIXME: enable and fix bugs!
// granuleSizeCheck = initAtEnd;
@ -1061,13 +1061,6 @@ struct BlobGranuleVerifierWorkload : TestWorkload {
wait(self->setUpBlobRange(cx));
}
state Future<Void> checkFeedCleanupFuture;
if (self->clientId == 0) {
checkFeedCleanupFuture = checkFeedCleanup(cx, BGV_DEBUG);
} else {
checkFeedCleanupFuture = Future<Void>(Void());
}
state Version readVersion = wait(self->doGrv(&tr));
state Version startReadVersion = readVersion;
state int checks = 0;
@ -1187,6 +1180,14 @@ struct BlobGranuleVerifierWorkload : TestWorkload {
fmt::print("Availability check updated read version from {0} to {1}\n", startReadVersion, readVersion);
}
// start feed cleanup check after there's guaranteed to be data for each granule
state Future<Void> checkFeedCleanupFuture;
if (self->clientId == 0) {
checkFeedCleanupFuture = checkFeedCleanup(cx, BGV_DEBUG);
} else {
checkFeedCleanupFuture = Future<Void>(Void());
}
state bool dataPassed = wait(self->checkAllData(cx, self));
wait(checkFeedCleanupFuture);

View File

@ -18,9 +18,11 @@
* limitations under the License.
*/
#include "fdbclient/ClientKnobs.h"
#include "fdbclient/TenantEntryCache.actor.h"
#include "fdbclient/TenantManagement.actor.h"
#include "fdbrpc/ContinuousSample.h"
#include "fdbrpc/TenantInfo.h"
#include "fdbserver/Knobs.h"
#include "fdbserver/TesterInterface.actor.h"
#include "fdbserver/workloads/workloads.actor.h"
@ -39,6 +41,9 @@ struct BulkSetupWorkload : TestWorkload {
std::vector<Reference<Tenant>> tenants;
bool deleteTenants;
double testDuration;
std::unordered_map<int64_t, std::vector<KeyValueRef>> numKVPairsPerTenant;
bool enableEKPKeyFetchFailure;
Arena arena;
BulkSetupWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
transactionsPerSecond = getOption(options, "transactionsPerSecond"_sr, 5000.0) / clientCount;
@ -50,6 +55,7 @@ struct BulkSetupWorkload : TestWorkload {
deleteTenants = getOption(options, "deleteTenants"_sr, false);
ASSERT(minNumTenants <= maxNumTenants);
testDuration = getOption(options, "testDuration"_sr, -1);
enableEKPKeyFetchFailure = getOption(options, "enableEKPKeyFetchFailure"_sr, false);
}
void getMetrics(std::vector<PerfMetric>& m) override {}
@ -60,6 +66,30 @@ struct BulkSetupWorkload : TestWorkload {
Standalone<KeyValueRef> operator()(int n) { return KeyValueRef(key(n), value((n + 1) % nodeCount)); }
ACTOR static Future<std::vector<KeyValueRef>> getKVPairsForTenant(BulkSetupWorkload* workload,
Reference<Tenant> tenant,
Database cx) {
state KeySelector begin = firstGreaterOrEqual(normalKeys.begin);
state KeySelector end = firstGreaterOrEqual(normalKeys.end);
state std::vector<KeyValueRef> kvPairs;
state ReadYourWritesTransaction tr = ReadYourWritesTransaction(cx, tenant);
loop {
try {
RangeResult kvRange = wait(tr.getRange(begin, end, 1000));
if (!kvRange.more && kvRange.size() == 0) {
break;
}
for (int i = 0; i < kvRange.size(); i++) {
kvPairs.push_back(KeyValueRef(workload->arena, KeyValueRef(kvRange[i].key, kvRange[i].value)));
}
begin = firstGreaterThan(kvRange.end()[-1].key);
} catch (Error& e) {
wait(tr.onError(e));
}
}
return kvPairs;
}
ACTOR static Future<Void> _setup(BulkSetupWorkload* workload, Database cx) {
// create a bunch of tenants (between min and max tenants)
state int numTenantsToCreate =
@ -70,13 +100,13 @@ struct BulkSetupWorkload : TestWorkload {
state std::vector<Future<Optional<TenantMapEntry>>> tenantFutures;
for (int i = 0; i < numTenantsToCreate; i++) {
TenantName tenantName = TenantNameRef(format("BulkSetupTenant_%04d", i));
TraceEvent("CreatingTenant").detail("Tenant", tenantName);
tenantFutures.push_back(TenantAPI::createTenant(cx.getReference(), tenantName));
}
wait(waitForAll(tenantFutures));
for (auto& f : tenantFutures) {
ASSERT(f.get().present());
workload->tenants.push_back(makeReference<Tenant>(f.get().get().id, f.get().get().tenantName));
TraceEvent("BulkSetupCreatedTenant").detail("Tenant", workload->tenants.back());
}
}
wait(bulkSetup(cx,
@ -94,14 +124,82 @@ struct BulkSetupWorkload : TestWorkload {
0,
workload->tenants));
state int i;
state bool added = false;
for (i = 0; i < workload->tenants.size(); i++) {
std::vector<KeyValueRef> keysForCurTenant = wait(getKVPairsForTenant(workload, workload->tenants[i], cx));
if (workload->enableEKPKeyFetchFailure && keysForCurTenant.size() > 0 && !added) {
IKnobCollection::getMutableGlobalKnobCollection().setKnob(
"simulation_ekp_tenant_ids_to_drop",
KnobValueRef::create(std::to_string(workload->tenants[i]->id())));
TraceEvent("BulkSetupTenantForEKPToDrop")
.detail("Tenant", CLIENT_KNOBS->SIMULATION_EKP_TENANT_IDS_TO_DROP);
added = true;
}
workload->numKVPairsPerTenant[workload->tenants[i]->id()] = keysForCurTenant;
}
return Void();
}
ACTOR static Future<bool> _check(BulkSetupWorkload* workload, Database cx) {
state int i;
state std::unordered_set<int64_t> tenantIdsToDrop =
parseStringToUnorderedSet<int64_t>(CLIENT_KNOBS->SIMULATION_EKP_TENANT_IDS_TO_DROP, ',');
for (i = 0; i < workload->tenants.size(); i++) {
state Reference<Tenant> tenant = workload->tenants[i];
std::vector<KeyValueRef> keysForCurTenant = wait(getKVPairsForTenant(workload, tenant, cx));
if (tenantIdsToDrop.count(tenant->id())) {
// Don't check the tenants that the EKP would throw errors for
continue;
}
std::vector<KeyValueRef> expectedKeysForCurTenant = workload->numKVPairsPerTenant[tenant->id()];
if (keysForCurTenant.size() != expectedKeysForCurTenant.size()) {
TraceEvent(SevError, "BulkSetupNumKeysMismatch")
.detail("TenantName", tenant)
.detail("ActualCount", keysForCurTenant.size())
.detail("ExpectedCount", expectedKeysForCurTenant.size());
return false;
} else {
TraceEvent("BulkSetupNumKeys")
.detail("TenantName", tenant)
.detail("ActualCount", keysForCurTenant.size());
}
for (int j = 0; j < expectedKeysForCurTenant.size(); j++) {
if (expectedKeysForCurTenant[j].key != keysForCurTenant[j].key) {
TraceEvent(SevError, "BulkSetupNumKeyMismatch")
.detail("TenantName", tenant)
.detail("ActualKey", keysForCurTenant[j].key)
.detail("ExpectedKey", expectedKeysForCurTenant[j].key);
return false;
}
if (expectedKeysForCurTenant[j].value != keysForCurTenant[j].value) {
TraceEvent(SevError, "BulkSetupNumValueMismatch")
.detail("TenantName", tenant)
.detail("ActualValue", keysForCurTenant[j].value)
.detail("ExpectedValue", expectedKeysForCurTenant[j].value);
return false;
}
}
}
return true;
}
ACTOR static Future<Void> _start(BulkSetupWorkload* workload, Database cx) {
// We want to ensure that tenant deletion happens before the restore phase starts
if (workload->deleteTenants) {
state int numTenantsToDelete = deterministicRandom()->randomInt(0, workload->tenants.size() + 1);
// If there is only one tenant don't delete that tenant
if (workload->deleteTenants && workload->tenants.size() > 1) {
state Reference<TenantEntryCache<Void>> tenantCache =
makeReference<TenantEntryCache<Void>>(cx, TenantEntryCacheRefreshMode::WATCH);
wait(tenantCache->init());
state int numTenantsToDelete = deterministicRandom()->randomInt(0, workload->tenants.size());
TraceEvent("BulkSetupTenantDeletion").detail("NumTenants", numTenantsToDelete);
if (numTenantsToDelete > 0) {
state int i;
for (i = 0; i < numTenantsToDelete; i++) {
state int tenantIndex = deterministicRandom()->randomInt(0, workload->tenants.size());
state Reference<Tenant> tenant = workload->tenants[tenantIndex];
workload->tenants.erase(workload->tenants.begin() + tenantIndex);
TraceEvent("BulkSetupTenantDeletionClearing")
.detail("Tenant", tenant)
.detail("TotalNumTenants", workload->tenants.size());
@ -118,31 +216,35 @@ struct BulkSetupWorkload : TestWorkload {
}
// delete the tenant
wait(success(TenantAPI::deleteTenant(cx.getReference(), tenant->name.get(), tenant->id())));
workload->tenants.erase(workload->tenants.begin() + tenantIndex);
TraceEvent("BulkSetupTenantDeletionDone")
.detail("Tenant", tenant)
.detail("TotalNumTenants", workload->tenants.size());
}
}
}
return Void();
}
Future<Void> setup(Database const& cx) override { return Void(); }
Future<Void> start(Database const& cx) override {
Future<Void> setup(Database const& cx) override {
if (clientId == 0) {
if (testDuration > 0) {
return timeout(_setup(this, cx), testDuration, Void());
} else {
return _setup(this, cx);
}
return _setup(this, cx);
}
return Void();
}
Future<bool> check(Database const& cx) override { return true; }
Future<Void> start(Database const& cx) override {
if (clientId == 0) {
if (testDuration > 0) {
return timeout(_start(this, cx), testDuration, Void());
}
return _start(this, cx);
}
return Void();
}
Future<bool> check(Database const& cx) override {
if (clientId == 0) {
return _check(this, cx);
}
return true;
}
};
WorkloadFactory<BulkSetupWorkload> BulkSetupWorkloadFactory;

View File

@ -273,18 +273,17 @@ struct ConfigureDatabaseWorkload : TestWorkload {
ACTOR Future<Void> _setup(Database cx, ConfigureDatabaseWorkload* self) {
wait(success(ManagementAPI::changeConfig(cx.getReference(), "single storage_migration_type=aggressive", true)));
// Redwood is the only storage engine type supporting encryption.
DatabaseConfiguration config = wait(getDatabaseConfiguration(cx));
if (config.encryptionAtRestMode.isEncryptionEnabled()) {
self->storageEngineExcludeTypes = { 0, 1, 2, 4, 5 };
wait(success(ManagementAPI::changeConfig(cx.getReference(), "ssd-redwood-1-experimental", true)));
}
return Void();
}
ACTOR Future<Void> _start(ConfigureDatabaseWorkload* self, Database cx) {
// Redwood is the only storage engine type supporting encryption.
DatabaseConfiguration config = wait(getDatabaseConfiguration(cx));
TraceEvent("ConfigureDatabase_Config").detail("Config", config.toString());
if (config.encryptionAtRestMode.isEncryptionEnabled()) {
TraceEvent("ConfigureDatabase_EncryptionEnabled");
self->storageEngineExcludeTypes = { 0, 1, 2, 4, 5 };
}
if (self->clientId == 0) {
self->clients.push_back(timeout(self->singleDB(self, cx), self->testDuration, Void()));
wait(waitForAll(self->clients));

View File

@ -471,7 +471,7 @@ struct ReadWriteWorkload : ReadWriteCommon {
}
}
Future<Void> start(Database const& cx) override { return _start(cx, this); }
Future<Void> start(Database const& cx) override { return timeout(_start(cx, this), testDuration, Void()); }
ACTOR template <class Trans>
static Future<Void> readOp(Trans* tr, std::vector<int64_t> keys, ReadWriteWorkload* self, bool shouldRecord) {

View File

@ -100,6 +100,7 @@ struct RestoreBackupWorkload : TestWorkload {
state Transaction tr(cx);
loop {
try {
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr.clear(normalKeys);
for (auto& r : getSystemBackupRanges()) {
tr.clear(r);
@ -120,20 +121,19 @@ struct RestoreBackupWorkload : TestWorkload {
if (config.tenantMode == TenantMode::REQUIRED) {
// restore system keys
VectorRef<KeyRangeRef> systemBackupRanges = getSystemBackupRanges();
state std::vector<Future<Version>> restores;
for (int i = 0; i < systemBackupRanges.size(); i++) {
restores.push_back((self->backupAgent.restore(cx,
cx,
"system_restore"_sr,
Key(self->backupContainer->getURL()),
self->backupContainer->getProxy(),
WaitForComplete::True,
::invalidVersion,
Verbose::True,
systemBackupRanges[i])));
state VectorRef<KeyRangeRef> systemBackupRanges = getSystemBackupRanges();
state int i;
for (i = 0; i < systemBackupRanges.size(); i++) {
wait(success(self->backupAgent.restore(cx,
cx,
"system_restore"_sr,
Key(self->backupContainer->getURL()),
self->backupContainer->getProxy(),
WaitForComplete::True,
::invalidVersion,
Verbose::True,
systemBackupRanges[i])));
}
waitForAll(restores);
// restore non-system keys
wait(success(self->backupAgent.restore(cx,
cx,

View File

@ -18,15 +18,18 @@
* limitations under the License.
*/
#include "fdbclient/DatabaseConfiguration.h"
#include "fdbclient/ManagementAPI.actor.h"
#include "fdbclient/NativeAPI.actor.h"
#include "fdbserver/Knobs.h"
#include "fdbserver/TesterInterface.actor.h"
#include "fdbserver/workloads/workloads.actor.h"
#include "fdbrpc/simulator.h"
#include "flow/Knobs.h"
#include "boost/algorithm/string/predicate.hpp"
#include "flow/IConnection.h"
#include "fdbrpc/SimulatorProcessInfo.h"
#include "flow/Knobs.h"
#undef state
#include "fdbclient/SimpleIni.h"
@ -58,6 +61,7 @@ struct SaveAndKillWorkload : TestWorkload {
ACTOR Future<Void> _start(SaveAndKillWorkload* self, Database cx) {
state int i;
wait(delay(deterministicRandom()->random01() * self->testDuration));
DatabaseConfiguration config = wait(getDatabaseConfiguration(cx));
CSimpleIni ini;
ini.SetUnicode();
@ -71,7 +75,7 @@ struct SaveAndKillWorkload : TestWorkload {
ini.SetValue("META", "testerCount", format("%d", g_simulator->testerCount).c_str());
ini.SetValue("META", "tssMode", format("%d", g_simulator->tssMode).c_str());
ini.SetValue("META", "mockDNS", INetworkConnections::net()->convertMockDNSToString().c_str());
ini.SetValue("META", "tenantMode", cx->clientInfo->get().tenantMode.toString().c_str());
ini.SetValue("META", "tenantMode", config.tenantMode.toString().c_str());
if (cx->defaultTenant.present()) {
ini.SetValue("META", "defaultTenant", cx->defaultTenant.get().toString().c_str());
}

View File

@ -74,6 +74,7 @@ public: // introduced features
API_VERSION_FEATURE(@FDB_AV_GET_TOTAL_COST@, GetTotalCost);
API_VERSION_FEATURE(@FDB_AV_FAIL_ON_EXTERNAL_CLIENT_ERRORS@, FailOnExternalClientErrors);
API_VERSION_FEATURE(@FDB_AV_GET_CLIENT_STATUS@, GetClientStatus);
API_VERSION_FEATURE(@FDB_AV_INITIALIZE_TRACE_ON_SETUP@, InitializeTraceOnSetup);
};
#endif // FLOW_CODE_API_VERSION_H

View File

@ -15,3 +15,4 @@ set(FDB_AV_TENANT_BLOB_RANGE_API "720")
set(FDB_AV_GET_TOTAL_COST "730")
set(FDB_AV_FAIL_ON_EXTERNAL_CLIENT_ERRORS "730")
set(FDB_AV_GET_CLIENT_STATUS "730")
set(FDB_AV_INITIALIZE_TRACE_ON_SETUP "730")

View File

@ -128,4 +128,9 @@ EncryptAuthTokenAlgo getRandomAuthTokenAlgo() {
: EncryptAuthTokenAlgo::ENCRYPT_HEADER_AUTH_TOKEN_ALGO_HMAC_SHA;
return algo;
}
bool isReservedEncryptDomain(EncryptCipherDomainId domainId) {
return domainId == SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID || domainId == ENCRYPT_HEADER_DOMAIN_ID ||
domainId == FDB_DEFAULT_ENCRYPT_DOMAIN_ID;
}

View File

@ -21,7 +21,9 @@
#include "flow/IThreadPool.h"
#include <algorithm>
#ifndef BOOST_SYSTEM_NO_LIB
#define BOOST_SYSTEM_NO_LIB
#endif
#define BOOST_DATE_TIME_NO_LIB
#define BOOST_REGEX_NO_LIB
#include "boost/asio.hpp"

View File

@ -294,7 +294,7 @@ int main(int argc, char** argv) {
Error::init();
g_network = newNet2(TLSConfig());
if (enableTrace)
openTraceFile(NetworkAddress(), 10 << 20, 10 << 20, ".", "mkcert");
openTraceFile({}, 10 << 20, 10 << 20, ".", "mkcert");
auto thread = std::thread([]() { g_network->run(); });
auto cleanUpGuard = ScopeExit([&thread, enableTrace]() {
g_network->stop();

View File

@ -26,7 +26,9 @@
#include "flow/Trace.h"
#include <algorithm>
#include <memory>
#ifndef BOOST_SYSTEM_NO_LIB
#define BOOST_SYSTEM_NO_LIB
#endif
#define BOOST_DATE_TIME_NO_LIB
#define BOOST_REGEX_NO_LIB
#include <boost/asio.hpp>

View File

@ -574,6 +574,11 @@ public:
universalFields[name] = value;
}
Optional<NetworkAddress> getLocalAddress() {
MutexHolder holder(mutex);
return this->localAddress;
}
void setLocalAddress(const NetworkAddress& addr) {
MutexHolder holder(mutex);
this->localAddress = addr;
@ -763,7 +768,7 @@ void flushTraceFileVoid() {
}
}
void openTraceFile(const NetworkAddress& na,
void openTraceFile(const Optional<NetworkAddress>& na,
uint64_t rollsize,
uint64_t maxLogsSize,
std::string directory,
@ -780,14 +785,23 @@ void openTraceFile(const NetworkAddress& na,
if (baseOfBase.empty())
baseOfBase = "trace";
std::string ip = na.ip.toString();
std::replace(ip.begin(), ip.end(), ':', '_'); // For IPv6, Windows doesn't accept ':' in filenames.
std::string baseName;
if (identifier.size() > 0) {
baseName = format("%s.%s.%s", baseOfBase.c_str(), ip.c_str(), identifier.c_str());
if (na.present()) {
std::string ip = na.get().ip.toString();
std::replace(ip.begin(), ip.end(), ':', '_'); // For IPv6, Windows doesn't accept ':' in filenames.
if (!identifier.empty()) {
baseName = format("%s.%s.%s", baseOfBase.c_str(), ip.c_str(), identifier.c_str());
} else {
baseName = format("%s.%s.%d", baseOfBase.c_str(), ip.c_str(), na.get().port);
}
} else if (!identifier.empty()) {
baseName = format("%s.0.0.0.0.%s", baseOfBase.c_str(), identifier.c_str());
} else {
baseName = format("%s.%s.%d", baseOfBase.c_str(), ip.c_str(), na.port);
// If neither network address nor identifier is provided, use PID for identification
baseName = format("%s.0.0.0.0.%d", baseOfBase.c_str(), ::getpid());
}
g_traceLog.open(directory,
baseName,
logGroup,
@ -829,6 +843,10 @@ void addUniversalTraceField(const std::string& name, const std::string& value) {
g_traceLog.addUniversalTraceField(name, value);
}
bool isTraceLocalAddressSet() {
return g_traceLog.getLocalAddress().present();
}
void setTraceLocalAddress(const NetworkAddress& addr) {
g_traceLog.setLocalAddress(addr);
}

View File

@ -108,4 +108,6 @@ std::string getEncryptDbgTraceKeyWithTS(std::string_view prefix,
int getEncryptHeaderAuthTokenSize(int algo);
bool isReservedEncryptDomain(EncryptCipherDomainId domainId);
#endif

View File

@ -743,7 +743,26 @@ Future<T> safeThreadFutureToFutureImpl(ThreadFuture<T> threadFuture) {
return threadFuture.get();
}
// The allow anonymous_future type is used to prevent misuse of ThreadFutures.
// The removeArenaFromStandalone() actors simulate the behavior of DLApi. In this case,
// the memory is not owned by the Standalone. If the `future` goes out of scope, subsequent
// access to the memory via the returned standalone will be invalid.
ACTOR template <typename T>
Future<Standalone<T>> removeArenaFromStandalone(Future<Standalone<T>> future) {
Standalone<T> _ = wait(future);
return Standalone<T>(future.get(), Arena());
}
ACTOR template <typename T>
Future<Optional<Standalone<T>>> removeArenaFromStandalone(Future<Optional<Standalone<T>>> future) {
Optional<Standalone<T>> val = wait(future);
if (val.present()) {
return Standalone<T>(future.get().get(), Arena());
} else {
return Optional<Standalone<T>>();
}
}
// The allow_anonymous_future type is used to prevent misuse of ThreadFutures.
// For Standalone types, the memory in some cases is actually stored in the ThreadFuture object,
// in which case we expect the caller to keep that ThreadFuture around until the result is no
// longer needed.
@ -768,7 +787,11 @@ typename std::enable_if<allow_anonymous_future<T>::value, Future<T>>::type safeT
template <class T>
typename std::enable_if<!allow_anonymous_future<T>::value, Future<T>>::type safeThreadFutureToFuture(
ThreadFuture<T>& threadFuture) {
return safeThreadFutureToFutureImpl(threadFuture);
Future<T> f = safeThreadFutureToFutureImpl(threadFuture);
if (BUGGIFY) {
return removeArenaFromStandalone(f);
}
return f;
}
template <class T>
@ -781,7 +804,9 @@ typename std::enable_if<allow_anonymous_future<T>::value, Future<T>>::type safeT
template <class T>
typename std::enable_if<!allow_anonymous_future<T>::value, Future<T>>::type safeThreadFutureToFuture(
Future<T>& future) {
// Do nothing
if (BUGGIFY) {
return removeArenaFromStandalone(future);
}
return future;
}

View File

@ -643,7 +643,10 @@ struct EventCacheHolder : public ReferenceCounted<EventCacheHolder> {
#endif
struct NetworkAddress;
void openTraceFile(const NetworkAddress& na,
template <class T>
class Optional;
void openTraceFile(const Optional<NetworkAddress>& na,
uint64_t rollsize,
uint64_t maxLogsSize,
std::string directory = ".",
@ -673,6 +676,7 @@ void removeTraceRole(std::string const& role);
void retrieveTraceLogIssues(std::set<std::string>& out);
void setTraceLogGroup(const std::string& role);
void addUniversalTraceField(std::string const& name, std::string const& value);
bool isTraceLocalAddressSet();
void setTraceLocalAddress(const NetworkAddress& addr);
void disposeTraceFileWriter();
std::string getTraceFormatExtension();

View File

@ -356,6 +356,7 @@ ERROR( encrypt_invalid_id, 2706, "Invalid encryption cipher details" )
ERROR( encrypt_keys_fetch_failed, 2707, "Encryption keys fetch from external KMS failed" )
ERROR( encrypt_invalid_kms_config, 2708, "Invalid encryption/kms configuration: discovery-url, validation-token, endpoint etc." )
ERROR( encrypt_unsupported, 2709, "Encryption not supported" )
ERROR( encrypt_mode_mismatch, 2710, "Encryption mode mismatch with configuration")
// 4xxx Internal errors (those that should be generated only by bugs) are decimal 4xxx
ERROR( unknown_error, 4000, "An unknown error occurred" ) // C++ exception not of type Error

View File

@ -26,6 +26,7 @@
#include "flow/network.h"
#include <utility>
#include <functional>
#include <unordered_set>
#if defined(NO_INTELLISENSE) && !defined(FLOW_GENERICACTORS_ACTOR_G_H)
#define FLOW_GENERICACTORS_ACTOR_G_H
#include "flow/genericactors.actor.g.h"
@ -115,6 +116,21 @@ std::vector<T> parseStringToVector(std::string str, char delim) {
return result;
}
template <class T>
std::unordered_set<T> parseStringToUnorderedSet(std::string str, char delim) {
std::unordered_set<T> result;
std::stringstream stream(str);
std::string token;
while (stream.good()) {
getline(stream, token, delim);
std::istringstream tokenStream(token);
T item;
tokenStream >> item;
result.emplace(item);
}
return result;
}
template <class T>
ErrorOr<T> errorOr(T t) {
return ErrorOr<T>(t);

View File

@ -131,6 +131,7 @@ if(WITH_PYTHON)
add_fdb_test(TEST_FILES fast/BackupAzureBlobCorrectness.toml IGNORE)
add_fdb_test(TEST_FILES fast/BackupS3BlobCorrectness.toml IGNORE)
add_fdb_test(TEST_FILES fast/BackupCorrectness.toml)
add_fdb_test(TEST_FILES fast/BackupCorrectnessWithEKPKeyFetchFailures.toml)
add_fdb_test(TEST_FILES fast/BackupCorrectnessWithTenantDeletion.toml)
add_fdb_test(TEST_FILES fast/EncryptedBackupCorrectness.toml)
add_fdb_test(TEST_FILES fast/BackupCorrectnessClean.toml)
@ -163,6 +164,7 @@ if(WITH_PYTHON)
add_fdb_test(TEST_FILES fast/FuzzApiCorrectness.toml)
add_fdb_test(TEST_FILES fast/FuzzApiCorrectnessClean.toml)
add_fdb_test(TEST_FILES fast/IncrementalBackup.toml)
add_fdb_test(TEST_FILES fast/IncrementalBackupWithEKPKeyFetchFailures.toml)
add_fdb_test(TEST_FILES fast/IncrementalBackupWithTenantDeletion.toml)
add_fdb_test(TEST_FILES fast/IncrementTest.toml)
add_fdb_test(TEST_FILES fast/InventoryTestAlmostReadOnly.toml)
@ -284,8 +286,8 @@ if(WITH_PYTHON)
TEST_FILES restarting/from_6.3.13_until_7.2.0/DrUpgradeRestart-1.txt
restarting/from_6.3.13_until_7.2.0/DrUpgradeRestart-2.txt)
add_fdb_test(
TEST_FILES restarting/from_7.0.0_until_7.2.0/UpgradeAndBackupRestore-1.toml
restarting/from_7.0.0_until_7.2.0/UpgradeAndBackupRestore-2.toml)
TEST_FILES restarting/from_7.0.0_until_7.1.0/UpgradeAndBackupRestore-1.toml
restarting/from_7.0.0_until_7.1.0/UpgradeAndBackupRestore-2.toml)
add_fdb_test(
TEST_FILES restarting/from_7.1.0_until_7.2.0/ConfigureTestRestart-1.toml
restarting/from_7.1.0_until_7.2.0/ConfigureTestRestart-2.toml)
@ -307,6 +309,9 @@ if(WITH_PYTHON)
add_fdb_test(
TEST_FILES restarting/from_7.1.0_until_7.2.0/ConfigureStorageMigrationTestRestart-1.toml
restarting/from_7.1.0_until_7.2.0/ConfigureStorageMigrationTestRestart-2.toml)
add_fdb_test(
TEST_FILES restarting/from_7.1.0_until_7.2.0/UpgradeAndBackupRestore-1.toml
restarting/from_7.1.0_until_7.2.0/UpgradeAndBackupRestore-2.toml)
add_fdb_test(
TEST_FILES restarting/from_7.1.0_until_7.2.0/VersionVectorDisableRestart-1.toml
restarting/from_7.1.0_until_7.2.0/VersionVectorDisableRestart-2.toml)
@ -329,20 +334,26 @@ if(WITH_PYTHON)
TEST_FILES restarting/from_7.2.0/DrUpgradeRestart-1.toml
restarting/from_7.2.0/DrUpgradeRestart-2.toml)
add_fdb_test(
TEST_FILES restarting/from_7.2.4/UpgradeAndBackupRestore-1.toml
restarting/from_7.2.4/UpgradeAndBackupRestore-2.toml)
TEST_FILES restarting/from_7.2.4_until_7.3.0/UpgradeAndBackupRestore-1.toml
restarting/from_7.2.4_until_7.3.0/UpgradeAndBackupRestore-2.toml)
add_fdb_test(
TEST_FILES restarting/from_7.3.0/ConfigureTestRestart-1.toml
restarting/from_7.3.0/ConfigureTestRestart-2.toml)
add_fdb_test(
TEST_FILES restarting/from_7.3.0/ConfigureStorageMigrationTestRestart-1.toml
restarting/from_7.3.0/ConfigureStorageMigrationTestRestart-2.toml)
add_fdb_test(
TEST_FILES restarting/from_7.3.0/UpgradeAndBackupRestore-1.toml
restarting/from_7.3.0/UpgradeAndBackupRestore-2.toml)
add_fdb_test(
TEST_FILES restarting/from_7.3.0/VersionVectorDisableRestart-1.toml
restarting/from_7.3.0/VersionVectorDisableRestart-2.toml)
add_fdb_test(
TEST_FILES restarting/from_7.3.0/VersionVectorEnableRestart-1.toml
restarting/from_7.3.0/VersionVectorEnableRestart-2.toml)
add_fdb_test(
TEST_FILES restarting/from_7.3.0/BlobGranuleRestartCycle-1.toml
restarting/from_7.3.0/BlobGranuleRestartCycle-2.toml)
add_fdb_test(
TEST_FILES restarting/to_7.1.0_until_7.2.0/ConfigureStorageMigrationTestRestart-1.toml
restarting/to_7.1.0_until_7.2.0/ConfigureStorageMigrationTestRestart-2.toml)

View File

@ -0,0 +1,28 @@
[configuration]
allowDefaultTenant = false
tenantModes = ['required']
allowCreatingTenants = false
encryptModes = ['domain_aware']
[[knobs]]
enable_encryption = true
[[test]]
testTitle = 'BackupAndRestoreWithEKPKeyFetchFailures'
clearAfterTest = false
simBackupAgents = 'BackupToFile'
[[test.workload]]
testName = 'BulkLoadWithTenants'
maxNumTenants = 100
minNumTenants = 1
enableEKPKeyFetchFailure = true
transactionsPerSecond = 2500.0
testDuration = 60.0
[[test.workload]]
testName = 'BackupAndRestoreCorrectness'
defaultBackup = true
backupAfter = 10.0
restoreAfter = 100.0
backupRangesCount = -1

View File

@ -1,4 +1,6 @@
[configuration]
allowDefaultTenant = false
allowCreatingTenants = false
tenantModes = ['required']
encryptModes = ['domain_aware']

View File

@ -0,0 +1,48 @@
[configuration]
allowDefaultTenant = false
tenantModes = ['required']
allowCreatingTenants = false
encryptModes = ['domain_aware']
[[knobs]]
enable_encryption = true
[[test]]
testTitle = 'SubmitBackup'
simBackupAgents = 'BackupToFile'
runConsistencyCheck = false
[[test.workload]]
testName = 'IncrementalBackup'
tag = 'default'
submitOnly = true
waitForBackup = true
[[test]]
testTitle = 'BulkLoad'
clearAfterTest = true
simBackupAgents = 'BackupToFile'
[[test.workload]]
testName = 'BulkLoadWithTenants'
maxNumTenants = 100
minNumTenants = 1
transactionsPerSecond = 3000.0
enableEKPKeyFetchFailure = true
[[test.workload]]
testName = 'IncrementalBackup'
tag = 'default'
waitForBackup = true
stopBackup = true
[[test]]
testTitle = 'SubmitRestore'
clearAfterTest = false
simBackupAgents = 'BackupToFile'
[[test.workload]]
testName = 'IncrementalBackup'
tag = 'default'
restoreOnly = true

View File

@ -8,8 +8,7 @@ proxy_use_resolver_private_mutations = false
[[test]]
testTitle = 'RawTenantAccessClean'
clearAfterTest = true
timeout = 200
clearAfterTest = false
runSetup = true
[[test.workload]]

View File

@ -0,0 +1,57 @@
storageEngineExcludeTypes=3
[[test]]
testTitle = 'SubmitBackup'
simBackupAgents= 'BackupToFile'
clearAfterTest = false
runConsistencyCheck=false
[[test.workload]]
testName = 'SubmitBackup'
delayFor = 0
stopWhenDone = false
[[test]]
testTitle = 'FirstCycleTest'
clearAfterTest=false
runConsistencyCheck = false
[[test.workload]]
testName = 'Cycle'
nodeCount = 30000
transactionsPerSecond = 2500.0
testDuration = 30.0
expectedRate = 0
keyPrefix = 'BeforeRestart'
[[test.workload]]
testName = 'RandomClogging'
testDuration = 90.0
[[test.workload]]
testName = 'Rollback'
meanDelay = 90.0
testDuration = 90.0
[[test.workload]]
testName = 'Attrition'
machinesToKill = 10
machinesToLeave = 3
reboot = true
testDuration = 90.0
[[test.workload]]
testName='Attrition'
machinesToKill = 10
machinesToLeave = 3
reboot = true
testDuration = 90.0
[[test]]
testTitle = 'SaveDatabase'
clearAfterTest = false
[[test.workload]]
testName = 'SaveAndKill'
restartInfoLocation = 'simfdb/restartInfo.ini'
testDuration=30.0

View File

@ -0,0 +1,61 @@
[[test]]
testTitle = 'SecondCycleTest'
simBackupAgents = 'BackupToFile'
clearAfterTest=false
runConsistencyCheck=false
[[test.workload]]
testName = 'Cycle'
nodeCount = 30000
transactionsPerSecond = 2500.0
testDuration = 30.0
expectedRate = 0
keyPrefix = 'AfterRestart'
[[test.workload]]
testName = 'RandomClogging'
testDuration = 90.0
[[test.workload]]
testName = 'Rollback'
meanDelay = 90.0
testDuration = 90.0
[[test.workload]]
testName = 'Attrition'
machinesToKill = 10
machinesToLeave = 3
reboot = true
testDuration = 90.0
[[test.workload]]
testName = 'Attrition'
machinesToKill = 10
machinesToLeave = 3
reboot = true
testDuration = 90.0
[[test]]
testTitle= 'RestoreBackup'
simBackupAgents = 'BackupToFile'
clearAfterTest=false
[[test.workload]]
testName = 'RestoreBackup'
tag = 'default'
[[test]]
testTitle = 'CheckCycles'
checkOnly=true
[[test.workload]]
testName = 'Cycle'
nodeCount=30000
keyPrefix = 'AfterRestart'
expectedRate=0
[[test.workload]]
testName = 'Cycle'
nodeCount = 30000
keyPrefix= 'BeforeRestart'
expectedRate = 0

View File

@ -0,0 +1,68 @@
[configuration]
storageEngineExcludeTypes = [3]
disableEncryption = true
[[test]]
testTitle = 'SubmitBackup'
simBackupAgents= 'BackupToFile'
clearAfterTest = false
runConsistencyCheck=false
disabledFailureInjectionWorkloads = 'Attrition'
[[test.workload]]
testName = 'SubmitBackup'
delayFor = 0
stopWhenDone = false
[[test.workload]]
testName = 'Attrition'
machinesToKill = 10
machinesToLeave = 3
reboot = true
testDuration = 30.0
[[test]]
testTitle = 'FirstCycleTest'
clearAfterTest=false
runConsistencyCheck = false
disabledFailureInjectionWorkloads = 'Attrition'
[[test.workload]]
testName = 'Cycle'
nodeCount = 30000
transactionsPerSecond = 2500.0
testDuration = 30.0
expectedRate = 0
keyPrefix = 'BeforeRestart'
[[test.workload]]
testName = 'RandomClogging'
testDuration = 90.0
[[test.workload]]
testName = 'Rollback'
meanDelay = 90.0
testDuration = 90.0
[[test.workload]]
testName = 'Attrition'
machinesToKill = 10
machinesToLeave = 3
reboot = true
testDuration = 90.0
[[test.workload]]
testName='Attrition'
machinesToKill = 10
machinesToLeave = 3
reboot = true
testDuration = 90.0
[[test]]
testTitle = 'SaveDatabase'
clearAfterTest = false
[[test.workload]]
testName = 'SaveAndKill'
restartInfoLocation = 'simfdb/restartInfo.ini'
testDuration=30.0

View File

@ -0,0 +1,33 @@
# Blob Granules are only upgrade-able as of snowflake/release-71.2.3 and release
[configuration]
testClass = "BlobGranuleRestart"
blobGranulesEnabled = true
allowDefaultTenant = false
injectTargetedSSRestart = true
injectSSDelay = true
# FIXME: re-enable rocks at some point
storageEngineExcludeTypes = [4, 5]
[[test]]
testTitle = 'BlobGranuleRestartCycle'
clearAfterTest=false
[[test.workload]]
testName = 'Cycle'
transactionsPerSecond = 250.0
testDuration = 30.0
expectedRate = 0
[[test.workload]]
testName = 'BlobGranuleVerifier'
testDuration = 30.0
# don't delete state after test
clearAndMergeCheck = false
doForcePurge = false
initAtEnd = false
[[test.workload]]
testName='SaveAndKill'
restartInfoLocation='simfdb/restartInfo.ini'
testDuration=90.0

View File

@ -0,0 +1,27 @@
# Blob Granules are only upgrade-able as of snowflake/release-71.2.3 and release-7.2
[configuration]
testClass = "BlobGranuleRestart"
blobGranulesEnabled = true
allowDefaultTenant = false
injectTargetedSSRestart = true
injectSSDelay = true
# FIXME: re-enable rocks at some point
storageEngineExcludeTypes = [4, 5]
[[test]]
testTitle = 'BlobGranuleRestartCycle'
clearAfterTest=false
runSetup=false
[[test.workload]]
testName = 'Cycle'
transactionsPerSecond = 250.0
testDuration = 30.0
expectedRate = 0
[[test.workload]]
testName = 'BlobGranuleVerifier'
testDuration = 30.0
# cycle does its own workload checking, don't want clear racing with its checking
clearAndMergeCheck = false

Some files were not shown because too many files have changed in this diff Show More