Merge commit '0872cbfb2f00886817f18584d95af217e28ad51d' into storageserver-pml
# Conflicts: # fdbserver/storageserver.actor.cpp
This commit is contained in:
commit
e5a5ec36a4
|
@ -290,6 +290,8 @@ if(NOT WIN32)
|
|||
@TMP_DIR@
|
||||
--log-dir
|
||||
@LOG_DIR@
|
||||
--knob
|
||||
delete-native-lib-after-loading=false # for properly symbolizing xSAN errors
|
||||
)
|
||||
|
||||
add_fdbclient_test(
|
||||
|
@ -306,6 +308,8 @@ if(NOT WIN32)
|
|||
@TMP_DIR@
|
||||
--log-dir
|
||||
@LOG_DIR@
|
||||
--knob
|
||||
delete-native-lib-after-loading=false # for properly symbolizing xSAN errors
|
||||
)
|
||||
|
||||
add_fdbclient_test(
|
||||
|
@ -327,6 +331,8 @@ if(NOT WIN32)
|
|||
@TMP_DIR@
|
||||
--log-dir
|
||||
@LOG_DIR@
|
||||
--knob
|
||||
delete-native-lib-after-loading=false # for properly symbolizing xSAN errors
|
||||
)
|
||||
|
||||
add_fdbclient_test(
|
||||
|
@ -352,6 +358,8 @@ if(NOT WIN32)
|
|||
@CLIENT_KEY_FILE@
|
||||
--tls-ca-file
|
||||
@SERVER_CA_FILE@
|
||||
--knob
|
||||
delete-native-lib-after-loading=false # for properly symbolizing xSAN errors
|
||||
)
|
||||
|
||||
add_test(NAME fdb_c_upgrade_to_future_version
|
||||
|
|
|
@ -585,6 +585,58 @@ extern "C" DLLEXPORT FDBFuture* fdb_tenant_wait_purge_granules_complete(FDBTenan
|
|||
.extractPtr());
|
||||
}
|
||||
|
||||
extern "C" DLLEXPORT FDBFuture* fdb_tenant_blobbify_range(FDBTenant* tenant,
|
||||
uint8_t const* begin_key_name,
|
||||
int begin_key_name_length,
|
||||
uint8_t const* end_key_name,
|
||||
int end_key_name_length) {
|
||||
return (FDBFuture*)(TENANT(tenant)
|
||||
->blobbifyRange(KeyRangeRef(StringRef(begin_key_name, begin_key_name_length),
|
||||
StringRef(end_key_name, end_key_name_length)))
|
||||
.extractPtr());
|
||||
}
|
||||
|
||||
extern "C" DLLEXPORT FDBFuture* fdb_tenant_unblobbify_range(FDBTenant* tenant,
|
||||
uint8_t const* begin_key_name,
|
||||
int begin_key_name_length,
|
||||
uint8_t const* end_key_name,
|
||||
int end_key_name_length) {
|
||||
return (FDBFuture*)(TENANT(tenant)
|
||||
->unblobbifyRange(KeyRangeRef(StringRef(begin_key_name, begin_key_name_length),
|
||||
StringRef(end_key_name, end_key_name_length)))
|
||||
.extractPtr());
|
||||
}
|
||||
|
||||
extern "C" DLLEXPORT FDBFuture* fdb_tenant_list_blobbified_ranges(FDBTenant* tenant,
|
||||
uint8_t const* begin_key_name,
|
||||
int begin_key_name_length,
|
||||
uint8_t const* end_key_name,
|
||||
int end_key_name_length,
|
||||
int rangeLimit) {
|
||||
return (FDBFuture*)(TENANT(tenant)
|
||||
->listBlobbifiedRanges(KeyRangeRef(StringRef(begin_key_name, begin_key_name_length),
|
||||
StringRef(end_key_name, end_key_name_length)),
|
||||
rangeLimit)
|
||||
.extractPtr());
|
||||
}
|
||||
|
||||
extern "C" DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_tenant_verify_blob_range(FDBTenant* tenant,
|
||||
uint8_t const* begin_key_name,
|
||||
int begin_key_name_length,
|
||||
uint8_t const* end_key_name,
|
||||
int end_key_name_length,
|
||||
int64_t version) {
|
||||
Optional<Version> rv;
|
||||
if (version != latestVersion) {
|
||||
rv = version;
|
||||
}
|
||||
return (FDBFuture*)(TENANT(tenant)
|
||||
->verifyBlobRange(KeyRangeRef(StringRef(begin_key_name, begin_key_name_length),
|
||||
StringRef(end_key_name, end_key_name_length)),
|
||||
rv)
|
||||
.extractPtr());
|
||||
}
|
||||
|
||||
extern "C" DLLEXPORT void fdb_tenant_destroy(FDBTenant* tenant) {
|
||||
try {
|
||||
TENANT(tenant)->delref();
|
||||
|
|
|
@ -376,6 +376,39 @@ DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_tenant_wait_purge_granules_complete(
|
|||
uint8_t const* purge_key_name,
|
||||
int purge_key_name_length);
|
||||
|
||||
DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_tenant_blobbify_range(FDBTenant* tenant,
|
||||
uint8_t const* begin_key_name,
|
||||
int begin_key_name_length,
|
||||
uint8_t const* end_key_name,
|
||||
int end_key_name_length);
|
||||
|
||||
DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_tenant_unblobbify_range(FDBTenant* tenant,
|
||||
uint8_t const* begin_key_name,
|
||||
int begin_key_name_length,
|
||||
uint8_t const* end_key_name,
|
||||
int end_key_name_length);
|
||||
|
||||
DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_tenant_list_blobbified_ranges(FDBTenant* tenant,
|
||||
uint8_t const* begin_key_name,
|
||||
int begin_key_name_length,
|
||||
uint8_t const* end_key_name,
|
||||
int end_key_name_length,
|
||||
int rangeLimit);
|
||||
|
||||
DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_tenant_list_blobbified_ranges(FDBTenant* tenant,
|
||||
uint8_t const* begin_key_name,
|
||||
int begin_key_name_length,
|
||||
uint8_t const* end_key_name,
|
||||
int end_key_name_length,
|
||||
int rangeLimit);
|
||||
|
||||
DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_tenant_verify_blob_range(FDBTenant* tenant,
|
||||
uint8_t const* begin_key_name,
|
||||
int begin_key_name_length,
|
||||
uint8_t const* end_key_name,
|
||||
int end_key_name_length,
|
||||
int64_t version);
|
||||
|
||||
DLLEXPORT void fdb_tenant_destroy(FDBTenant* tenant);
|
||||
|
||||
DLLEXPORT void fdb_transaction_destroy(FDBTransaction* tr);
|
||||
|
|
|
@ -166,6 +166,7 @@ void ApiWorkload::populateDataTx(TTaskFct cont, std::optional<int> tenantId) {
|
|||
execTransaction(
|
||||
[kvPairs](auto ctx) {
|
||||
for (const fdb::KeyValue& kv : *kvPairs) {
|
||||
ctx->tx().addReadConflictRange(kv.key, kv.key + fdb::Key(1, '\x00'));
|
||||
ctx->tx().set(kv.key, kv.value);
|
||||
}
|
||||
ctx->commit();
|
||||
|
@ -257,6 +258,7 @@ void ApiWorkload::randomInsertOp(TTaskFct cont, std::optional<int> tenantId) {
|
|||
execTransaction(
|
||||
[kvPairs](auto ctx) {
|
||||
for (const fdb::KeyValue& kv : *kvPairs) {
|
||||
ctx->tx().addReadConflictRange(kv.key, kv.key + fdb::Key(1, '\x00'));
|
||||
ctx->tx().set(kv.key, kv.value);
|
||||
}
|
||||
ctx->commit();
|
||||
|
@ -279,6 +281,7 @@ void ApiWorkload::randomClearOp(TTaskFct cont, std::optional<int> tenantId) {
|
|||
execTransaction(
|
||||
[keys](auto ctx) {
|
||||
for (const auto& key : *keys) {
|
||||
ctx->tx().addReadConflictRange(key, key + fdb::Key(1, '\x00'));
|
||||
ctx->tx().clear(key);
|
||||
}
|
||||
ctx->commit();
|
||||
|
@ -300,6 +303,7 @@ void ApiWorkload::randomClearRangeOp(TTaskFct cont, std::optional<int> tenantId)
|
|||
}
|
||||
execTransaction(
|
||||
[begin, end](auto ctx) {
|
||||
ctx->tx().addReadConflictRange(begin, end);
|
||||
ctx->tx().clearRange(begin, end);
|
||||
ctx->commit();
|
||||
},
|
||||
|
|
|
@ -160,6 +160,7 @@ private:
|
|||
execTransaction(
|
||||
// 1. Set the key to val1
|
||||
[key, val1](auto ctx) {
|
||||
ctx->tx().addReadConflictRange(key, key + fdb::Key(1, '\x00'));
|
||||
ctx->tx().set(key, val1);
|
||||
ctx->commit();
|
||||
},
|
||||
|
@ -296,6 +297,7 @@ private:
|
|||
// 1. Set the key to initial value
|
||||
[key, val](auto ctx) {
|
||||
ctx->tx().set(key, val);
|
||||
ctx->tx().addReadConflictRange(key, key + fdb::Key(1, '\x00'));
|
||||
ctx->commit();
|
||||
},
|
||||
[this, key, val, cont]() {
|
||||
|
|
|
@ -50,6 +50,7 @@ private:
|
|||
execTransaction(
|
||||
[kvPairs](auto ctx) {
|
||||
for (const fdb::KeyValue& kv : *kvPairs) {
|
||||
ctx->tx().addReadConflictRange(kv.key, kv.key + fdb::Key(1, '\x00'));
|
||||
ctx->tx().set(kv.key, kv.value);
|
||||
}
|
||||
ctx->commit();
|
||||
|
|
|
@ -77,10 +77,11 @@ public:
|
|||
int retryLimit,
|
||||
std::string bgBasePath,
|
||||
std::optional<fdb::BytesRef> tenantName,
|
||||
bool transactional)
|
||||
bool transactional,
|
||||
bool restartOnTimeout)
|
||||
: executor(executor), startFct(startFct), contAfterDone(cont), scheduler(scheduler), retryLimit(retryLimit),
|
||||
txState(TxState::IN_PROGRESS), commitCalled(false), bgBasePath(bgBasePath), tenantName(tenantName),
|
||||
transactional(transactional) {
|
||||
transactional(transactional), restartOnTimeout(restartOnTimeout) {
|
||||
databaseCreateErrorInjected = executor->getOptions().injectDatabaseCreateErrors &&
|
||||
Random::get().randomBool(executor->getOptions().databaseCreateErrorRatio);
|
||||
if (databaseCreateErrorInjected) {
|
||||
|
@ -177,7 +178,8 @@ public:
|
|||
|
||||
ASSERT(!onErrorFuture);
|
||||
|
||||
if (databaseCreateErrorInjected && canBeInjectedDatabaseCreateError(err.code())) {
|
||||
if ((databaseCreateErrorInjected && canBeInjectedDatabaseCreateError(err.code())) ||
|
||||
(restartOnTimeout && err.code() == error_code_transaction_timed_out)) {
|
||||
// Failed to create a database because of failure injection
|
||||
// Restart by recreating the transaction in a valid database
|
||||
recreateAndRestartTransaction();
|
||||
|
@ -235,7 +237,11 @@ protected:
|
|||
fdb::Error err = onErrorFuture.error();
|
||||
onErrorFuture = {};
|
||||
if (err) {
|
||||
transactionFailed(err);
|
||||
if (restartOnTimeout && err.code() == error_code_transaction_timed_out) {
|
||||
recreateAndRestartTransaction();
|
||||
} else {
|
||||
transactionFailed(err);
|
||||
}
|
||||
} else {
|
||||
restartTransaction();
|
||||
}
|
||||
|
@ -359,6 +365,9 @@ protected:
|
|||
// Accessed on initialization and in ON_ERROR state only (no need for mutex)
|
||||
bool databaseCreateErrorInjected;
|
||||
|
||||
// Restart the transaction automatically on timeout errors
|
||||
const bool restartOnTimeout;
|
||||
|
||||
// The tenant that we will run this transaction in
|
||||
const std::optional<fdb::BytesRef> tenantName;
|
||||
|
||||
|
@ -378,9 +387,17 @@ public:
|
|||
int retryLimit,
|
||||
std::string bgBasePath,
|
||||
std::optional<fdb::BytesRef> tenantName,
|
||||
bool transactional)
|
||||
: TransactionContextBase(executor, startFct, cont, scheduler, retryLimit, bgBasePath, tenantName, transactional) {
|
||||
}
|
||||
bool transactional,
|
||||
bool restartOnTimeout)
|
||||
: TransactionContextBase(executor,
|
||||
startFct,
|
||||
cont,
|
||||
scheduler,
|
||||
retryLimit,
|
||||
bgBasePath,
|
||||
tenantName,
|
||||
transactional,
|
||||
restartOnTimeout) {}
|
||||
|
||||
protected:
|
||||
void doContinueAfter(fdb::Future f, TTaskFct cont, bool retryOnError) override {
|
||||
|
@ -456,9 +473,17 @@ public:
|
|||
int retryLimit,
|
||||
std::string bgBasePath,
|
||||
std::optional<fdb::BytesRef> tenantName,
|
||||
bool transactional)
|
||||
: TransactionContextBase(executor, startFct, cont, scheduler, retryLimit, bgBasePath, tenantName, transactional) {
|
||||
}
|
||||
bool transactional,
|
||||
bool restartOnTimeout)
|
||||
: TransactionContextBase(executor,
|
||||
startFct,
|
||||
cont,
|
||||
scheduler,
|
||||
retryLimit,
|
||||
bgBasePath,
|
||||
tenantName,
|
||||
transactional,
|
||||
restartOnTimeout) {}
|
||||
|
||||
protected:
|
||||
void doContinueAfter(fdb::Future f, TTaskFct cont, bool retryOnError) override {
|
||||
|
@ -470,7 +495,7 @@ protected:
|
|||
lock.unlock();
|
||||
try {
|
||||
f.then([this](fdb::Future f) { futureReadyCallback(f, this); });
|
||||
} catch (std::runtime_error& err) {
|
||||
} catch (std::exception& err) {
|
||||
lock.lock();
|
||||
callbackMap.erase(f);
|
||||
lock.unlock();
|
||||
|
@ -482,7 +507,7 @@ protected:
|
|||
try {
|
||||
AsyncTransactionContext* txCtx = (AsyncTransactionContext*)param;
|
||||
txCtx->onFutureReady(f);
|
||||
} catch (std::runtime_error& err) {
|
||||
} catch (std::exception& err) {
|
||||
fmt::print("Unexpected exception in callback {}\n", err.what());
|
||||
abort();
|
||||
} catch (...) {
|
||||
|
@ -544,7 +569,7 @@ protected:
|
|||
try {
|
||||
AsyncTransactionContext* txCtx = (AsyncTransactionContext*)param;
|
||||
txCtx->onErrorReady(f);
|
||||
} catch (std::runtime_error& err) {
|
||||
} catch (std::exception& err) {
|
||||
fmt::print("Unexpected exception in callback {}\n", err.what());
|
||||
abort();
|
||||
} catch (...) {
|
||||
|
@ -673,7 +698,8 @@ public:
|
|||
void execute(TOpStartFct startFct,
|
||||
TOpContFct cont,
|
||||
std::optional<fdb::BytesRef> tenantName,
|
||||
bool transactional) override {
|
||||
bool transactional,
|
||||
bool restartOnTimeout) override {
|
||||
try {
|
||||
std::shared_ptr<ITransactionContext> ctx;
|
||||
if (options.blockOnFutures) {
|
||||
|
@ -684,7 +710,8 @@ public:
|
|||
options.transactionRetryLimit,
|
||||
bgBasePath,
|
||||
tenantName,
|
||||
transactional);
|
||||
transactional,
|
||||
restartOnTimeout);
|
||||
} else {
|
||||
ctx = std::make_shared<AsyncTransactionContext>(this,
|
||||
startFct,
|
||||
|
@ -693,7 +720,8 @@ public:
|
|||
options.transactionRetryLimit,
|
||||
bgBasePath,
|
||||
tenantName,
|
||||
transactional);
|
||||
transactional,
|
||||
restartOnTimeout);
|
||||
}
|
||||
startFct(ctx);
|
||||
} catch (...) {
|
||||
|
|
|
@ -116,7 +116,8 @@ public:
|
|||
virtual void execute(TOpStartFct start,
|
||||
TOpContFct cont,
|
||||
std::optional<fdb::BytesRef> tenantName,
|
||||
bool transactional) = 0;
|
||||
bool transactional,
|
||||
bool restartOnTimeout) = 0;
|
||||
virtual fdb::Database selectDatabase() = 0;
|
||||
virtual std::string getClusterFileForErrorInjection() = 0;
|
||||
virtual const TransactionExecutorOptions& getOptions() = 0;
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
|
||||
#include "TesterWorkload.h"
|
||||
#include "TesterUtil.h"
|
||||
#include "fdb_c_options.g.h"
|
||||
#include "fmt/core.h"
|
||||
#include "test/apitester/TesterScheduler.h"
|
||||
#include <cstdlib>
|
||||
|
@ -82,6 +83,8 @@ WorkloadBase::WorkloadBase(const WorkloadConfig& config)
|
|||
: manager(nullptr), tasksScheduled(0), numErrors(0), clientId(config.clientId), numClients(config.numClients),
|
||||
failed(false), numTxCompleted(0), numTxStarted(0), inProgress(false) {
|
||||
maxErrors = config.getIntOption("maxErrors", 10);
|
||||
minTxTimeoutMs = config.getIntOption("minTxTimeoutMs", 0);
|
||||
maxTxTimeoutMs = config.getIntOption("maxTxTimeoutMs", 0);
|
||||
workloadId = fmt::format("{}{}", config.name, clientId);
|
||||
}
|
||||
|
||||
|
@ -129,9 +132,15 @@ void WorkloadBase::doExecute(TOpStartFct startFct,
|
|||
}
|
||||
tasksScheduled++;
|
||||
numTxStarted++;
|
||||
manager->txExecutor->execute(
|
||||
startFct,
|
||||
[this, startFct, cont, failOnError](fdb::Error err) {
|
||||
manager->txExecutor->execute( //
|
||||
[this, transactional, cont, startFct](auto ctx) {
|
||||
if (transactional && maxTxTimeoutMs > 0) {
|
||||
int timeoutMs = Random::get().randomInt(minTxTimeoutMs, maxTxTimeoutMs);
|
||||
ctx->tx().setOption(FDB_TR_OPTION_TIMEOUT, timeoutMs);
|
||||
}
|
||||
startFct(ctx);
|
||||
},
|
||||
[this, cont, failOnError](fdb::Error err) {
|
||||
numTxCompleted++;
|
||||
if (err.code() == error_code_success) {
|
||||
cont();
|
||||
|
@ -148,7 +157,8 @@ void WorkloadBase::doExecute(TOpStartFct startFct,
|
|||
scheduledTaskDone();
|
||||
},
|
||||
tenant,
|
||||
transactional);
|
||||
transactional,
|
||||
maxTxTimeoutMs > 0);
|
||||
}
|
||||
|
||||
void WorkloadBase::info(const std::string& msg) {
|
||||
|
|
|
@ -166,6 +166,12 @@ protected:
|
|||
// The maximum number of errors before stoppoing the workload
|
||||
int maxErrors;
|
||||
|
||||
// The timeout (in ms) automatically set for all transactions to a random value
|
||||
// in the range [minTxTimeoutMs, maxTxTimeoutMs]
|
||||
// If maxTxTimeoutMs <= 0, no timeout is set
|
||||
int minTxTimeoutMs;
|
||||
int maxTxTimeoutMs;
|
||||
|
||||
// Workload identifier, consisting of workload name and client ID
|
||||
std::string workloadId;
|
||||
|
||||
|
|
|
@ -429,7 +429,7 @@ bool runWorkloads(TesterOptions& options) {
|
|||
}
|
||||
workloadMgr.run();
|
||||
return !workloadMgr.failed();
|
||||
} catch (const std::runtime_error& err) {
|
||||
} catch (const std::exception& err) {
|
||||
fmt::print(stderr, "ERROR: {}\n", err.what());
|
||||
return false;
|
||||
}
|
||||
|
@ -461,7 +461,7 @@ int main(int argc, char** argv) {
|
|||
|
||||
fdb_check(fdb::network::stop());
|
||||
network_thread.join();
|
||||
} catch (const std::runtime_error& err) {
|
||||
} catch (const std::exception& err) {
|
||||
fmt::print(stderr, "ERROR: {}\n", err.what());
|
||||
retCode = 1;
|
||||
}
|
||||
|
|
|
@ -93,6 +93,10 @@ def run_tester(args, test_file):
|
|||
if args.tls_cert_file is not None:
|
||||
cmd += ["--tls-cert-file", args.tls_cert_file]
|
||||
|
||||
for knob in args.knobs:
|
||||
knob_name, knob_value = knob.split("=")
|
||||
cmd += ["--knob-" + knob_name, knob_value]
|
||||
|
||||
get_logger().info('\nRunning tester \'%s\'...' % ' '.join(cmd))
|
||||
proc = Popen(cmd, stdout=sys.stdout, stderr=sys.stderr)
|
||||
timed_out = False
|
||||
|
@ -164,6 +168,8 @@ def parse_args(argv):
|
|||
help='Path to client\'s TLS certificate file')
|
||||
parser.add_argument('--tls-key-file', type=str, default=None,
|
||||
help='Path to client\'s TLS private key file')
|
||||
parser.add_argument('--knob', type=str, default=[], action="append", dest="knobs",
|
||||
help='[lowercase-knob-name]=[knob-value] (there may be multiple --knob options)')
|
||||
|
||||
return parser.parse_args(argv)
|
||||
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
[[test]]
|
||||
title = 'Cancel Transactions with Timeouts'
|
||||
multiThreaded = true
|
||||
buggify = true
|
||||
minFdbThreads = 2
|
||||
maxFdbThreads = 8
|
||||
minDatabases = 2
|
||||
maxDatabases = 8
|
||||
minClientThreads = 2
|
||||
maxClientThreads = 8
|
||||
minClients = 2
|
||||
maxClients = 8
|
||||
|
||||
[[test.workload]]
|
||||
name = 'CancelTransaction'
|
||||
minKeyLength = 1
|
||||
maxKeyLength = 64
|
||||
minValueLength = 1
|
||||
maxValueLength = 1000
|
||||
maxKeysPerTransaction = 50
|
||||
initialSize = 100
|
||||
numRandomOperations = 100
|
||||
readExistingKeysRatio = 0.9
|
||||
minTxTimeoutMs = 10
|
||||
maxTxTimeoutMs = 10000
|
|
@ -0,0 +1,33 @@
|
|||
[[test]]
|
||||
title = 'API Correctness with Timeouts'
|
||||
multiThreaded = true
|
||||
buggify = true
|
||||
minFdbThreads = 2
|
||||
maxFdbThreads = 8
|
||||
minDatabases = 2
|
||||
maxDatabases = 8
|
||||
minClientThreads = 2
|
||||
maxClientThreads = 8
|
||||
minClients = 2
|
||||
maxClients = 8
|
||||
|
||||
[[test.workload]]
|
||||
name = 'ApiCorrectness'
|
||||
minKeyLength = 1
|
||||
maxKeyLength = 64
|
||||
minValueLength = 1
|
||||
maxValueLength = 1000
|
||||
maxKeysPerTransaction = 50
|
||||
initialSize = 100
|
||||
numRandomOperations = 100
|
||||
readExistingKeysRatio = 0.9
|
||||
minTxTimeoutMs = 100
|
||||
maxTxTimeoutMs = 10000
|
||||
|
||||
[[test.workload]]
|
||||
name = 'AtomicOpsCorrectness'
|
||||
initialSize = 0
|
||||
numRandomOperations = 100
|
||||
minTxTimeoutMs = 100
|
||||
maxTxTimeoutMs = 10000
|
||||
|
|
@ -875,6 +875,16 @@ int workerProcessMain(Arguments const& args, int worker_id, shared_memory::Acces
|
|||
}
|
||||
}
|
||||
|
||||
if (args.disable_client_bypass) {
|
||||
err = network::setOptionNothrow(FDB_NET_OPTION_DISABLE_CLIENT_BYPASS);
|
||||
if (err) {
|
||||
logr.error("network::setOption (FDB_NET_OPTION_DISABLE_CLIENT_BYPASS): {}",
|
||||
args.disable_client_bypass,
|
||||
err.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Network thread must be setup before doing anything */
|
||||
logr.debug("network::setup()");
|
||||
network::setup();
|
||||
|
@ -1005,6 +1015,7 @@ int initArguments(Arguments& args) {
|
|||
args.txnspec.ops[i][OP_COUNT] = 0;
|
||||
}
|
||||
args.client_threads_per_version = 0;
|
||||
args.disable_client_bypass = false;
|
||||
args.disable_ryw = 0;
|
||||
args.json_output_path[0] = '\0';
|
||||
args.stats_export_path[0] = '\0';
|
||||
|
@ -1188,6 +1199,8 @@ void usage() {
|
|||
printf("%-24s %s\n", " --flatbuffers", "Use flatbuffers");
|
||||
printf("%-24s %s\n", " --streaming", "Streaming mode: all (default), iterator, small, medium, large, serial");
|
||||
printf("%-24s %s\n", " --disable_ryw", "Disable snapshot read-your-writes");
|
||||
printf(
|
||||
"%-24s %s\n", " --disable_client_bypass", "Disable client-bypass forcing mako to use multi-version client");
|
||||
printf("%-24s %s\n", " --json_report=PATH", "Output stats to the specified json file (Default: mako.json)");
|
||||
printf("%-24s %s\n",
|
||||
" --bg_file_path=PATH",
|
||||
|
@ -1248,6 +1261,7 @@ int parseArguments(int argc, char* argv[], Arguments& args) {
|
|||
{ "txntagging_prefix", required_argument, NULL, ARG_TXNTAGGINGPREFIX },
|
||||
{ "version", no_argument, NULL, ARG_VERSION },
|
||||
{ "client_threads_per_version", required_argument, NULL, ARG_CLIENT_THREADS_PER_VERSION },
|
||||
{ "disable_client_bypass", no_argument, NULL, ARG_DISABLE_CLIENT_BYPASS },
|
||||
{ "disable_ryw", no_argument, NULL, ARG_DISABLE_RYW },
|
||||
{ "json_report", optional_argument, NULL, ARG_JSON_REPORT },
|
||||
{ "bg_file_path", required_argument, NULL, ARG_BG_FILE_PATH },
|
||||
|
@ -1446,6 +1460,9 @@ int parseArguments(int argc, char* argv[], Arguments& args) {
|
|||
case ARG_CLIENT_THREADS_PER_VERSION:
|
||||
args.client_threads_per_version = atoi(optarg);
|
||||
break;
|
||||
case ARG_DISABLE_CLIENT_BYPASS:
|
||||
args.disable_client_bypass = true;
|
||||
break;
|
||||
case ARG_DISABLE_RYW:
|
||||
args.disable_ryw = 1;
|
||||
break;
|
||||
|
|
|
@ -75,6 +75,7 @@ enum ArgKind {
|
|||
ARG_STREAMING_MODE,
|
||||
ARG_DISABLE_RYW,
|
||||
ARG_CLIENT_THREADS_PER_VERSION,
|
||||
ARG_DISABLE_CLIENT_BYPASS,
|
||||
ARG_JSON_REPORT,
|
||||
ARG_BG_FILE_PATH, // if blob granule files are stored locally, mako will read and materialize them if this is set
|
||||
ARG_EXPORT_PATH,
|
||||
|
@ -169,6 +170,7 @@ struct Arguments {
|
|||
char txntagging_prefix[TAGPREFIXLENGTH_MAX];
|
||||
FDBStreamingMode streaming_mode;
|
||||
int64_t client_threads_per_version;
|
||||
bool disable_client_bypass;
|
||||
int disable_ryw;
|
||||
char json_output_path[PATH_MAX];
|
||||
bool bg_materialize_files;
|
||||
|
|
|
@ -392,6 +392,11 @@ func (o DatabaseOptions) SetTransactionIncludePortInAddress() error {
|
|||
return o.setOpt(505, nil)
|
||||
}
|
||||
|
||||
// Set a random idempotency id for all transactions. See the transaction option description for more information.
|
||||
func (o DatabaseOptions) SetTransactionAutomaticIdempotency() error {
|
||||
return o.setOpt(506, nil)
|
||||
}
|
||||
|
||||
// Allows ``get`` operations to read from sections of keyspace that have become unreadable because of versionstamp operations. This sets the ``bypass_unreadable`` option of each transaction created by this database. See the transaction option description for more information.
|
||||
func (o DatabaseOptions) SetTransactionBypassUnreadable() error {
|
||||
return o.setOpt(700, nil)
|
||||
|
@ -551,6 +556,18 @@ func (o TransactionOptions) SetSizeLimit(param int64) error {
|
|||
return o.setOpt(503, int64ToBytes(param))
|
||||
}
|
||||
|
||||
// Associate this transaction with this ID for the purpose of checking whether or not this transaction has already committed. Must be at least 16 bytes and less than 256 bytes.
|
||||
//
|
||||
// Parameter: Unique ID
|
||||
func (o TransactionOptions) SetIdempotencyId(param string) error {
|
||||
return o.setOpt(504, []byte(param))
|
||||
}
|
||||
|
||||
// Automatically assign a random 16 byte idempotency id for this transaction. Prevents commits from failing with ``commit_unknown_result``. WARNING: If you are also using the multiversion client or transaction timeouts, if either cluster_version_changed or transaction_timed_out was thrown during a commit, then that commit may have already succeeded or may succeed in the future.
|
||||
func (o TransactionOptions) SetAutomaticIdempotency() error {
|
||||
return o.setOpt(505, nil)
|
||||
}
|
||||
|
||||
// Snapshot read operations will see the results of writes done in the same transaction. This is the default behavior.
|
||||
func (o TransactionOptions) SetSnapshotRywEnable() error {
|
||||
return o.setOpt(600, nil)
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
#include "com_apple_foundationdb_FDB.h"
|
||||
#include "com_apple_foundationdb_FDBDatabase.h"
|
||||
#include "com_apple_foundationdb_FDBTenant.h"
|
||||
#include "com_apple_foundationdb_FDBTransaction.h"
|
||||
#include "com_apple_foundationdb_FutureBool.h"
|
||||
#include "com_apple_foundationdb_FutureInt64.h"
|
||||
|
@ -1102,6 +1103,203 @@ JNIEXPORT void JNICALL Java_com_apple_foundationdb_FDBTenant_Tenant_1dispose(JNI
|
|||
fdb_tenant_destroy((FDBTenant*)tPtr);
|
||||
}
|
||||
|
||||
JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTenant_Tenant_1purgeBlobGranules(JNIEnv* jenv,
|
||||
jobject,
|
||||
jlong tPtr,
|
||||
jbyteArray beginKeyBytes,
|
||||
jbyteArray endKeyBytes,
|
||||
jlong purgeVersion,
|
||||
jboolean force) {
|
||||
if (!tPtr || !beginKeyBytes || !endKeyBytes) {
|
||||
throwParamNotNull(jenv);
|
||||
return 0;
|
||||
}
|
||||
FDBTenant* tenant = (FDBTenant*)tPtr;
|
||||
|
||||
uint8_t* beginKeyArr = (uint8_t*)jenv->GetByteArrayElements(beginKeyBytes, JNI_NULL);
|
||||
if (!beginKeyArr) {
|
||||
if (!jenv->ExceptionOccurred())
|
||||
throwRuntimeEx(jenv, "Error getting handle to native resources");
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint8_t* endKeyArr = (uint8_t*)jenv->GetByteArrayElements(endKeyBytes, JNI_NULL);
|
||||
if (!endKeyArr) {
|
||||
jenv->ReleaseByteArrayElements(beginKeyBytes, (jbyte*)beginKeyArr, JNI_ABORT);
|
||||
if (!jenv->ExceptionOccurred())
|
||||
throwRuntimeEx(jenv, "Error getting handle to native resources");
|
||||
return 0;
|
||||
}
|
||||
|
||||
FDBFuture* f = fdb_tenant_purge_blob_granules(tenant,
|
||||
beginKeyArr,
|
||||
jenv->GetArrayLength(beginKeyBytes),
|
||||
endKeyArr,
|
||||
jenv->GetArrayLength(endKeyBytes),
|
||||
purgeVersion,
|
||||
(fdb_bool_t)force);
|
||||
jenv->ReleaseByteArrayElements(beginKeyBytes, (jbyte*)beginKeyArr, JNI_ABORT);
|
||||
jenv->ReleaseByteArrayElements(endKeyBytes, (jbyte*)endKeyArr, JNI_ABORT);
|
||||
return (jlong)f;
|
||||
}
|
||||
|
||||
JNIEXPORT jlong JNICALL
|
||||
Java_com_apple_foundationdb_FDBTenant_Tenant_1waitPurgeGranulesComplete(JNIEnv* jenv,
|
||||
jobject,
|
||||
jlong tPtr,
|
||||
jbyteArray purgeKeyBytes) {
|
||||
if (!tPtr || !purgeKeyBytes) {
|
||||
throwParamNotNull(jenv);
|
||||
return 0;
|
||||
}
|
||||
FDBTenant* tenant = (FDBTenant*)tPtr;
|
||||
uint8_t* purgeKeyArr = (uint8_t*)jenv->GetByteArrayElements(purgeKeyBytes, JNI_NULL);
|
||||
|
||||
if (!purgeKeyArr) {
|
||||
if (!jenv->ExceptionOccurred())
|
||||
throwRuntimeEx(jenv, "Error getting handle to native resources");
|
||||
return 0;
|
||||
}
|
||||
FDBFuture* f = fdb_tenant_wait_purge_granules_complete(tenant, purgeKeyArr, jenv->GetArrayLength(purgeKeyBytes));
|
||||
jenv->ReleaseByteArrayElements(purgeKeyBytes, (jbyte*)purgeKeyArr, JNI_ABORT);
|
||||
|
||||
return (jlong)f;
|
||||
}
|
||||
|
||||
JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTenant_Tenant_1blobbifyRange(JNIEnv* jenv,
|
||||
jobject,
|
||||
jlong tPtr,
|
||||
jbyteArray beginKeyBytes,
|
||||
jbyteArray endKeyBytes) {
|
||||
if (!tPtr || !beginKeyBytes || !endKeyBytes) {
|
||||
throwParamNotNull(jenv);
|
||||
return 0;
|
||||
}
|
||||
FDBTenant* tenant = (FDBTenant*)tPtr;
|
||||
|
||||
uint8_t* beginKeyArr = (uint8_t*)jenv->GetByteArrayElements(beginKeyBytes, JNI_NULL);
|
||||
if (!beginKeyArr) {
|
||||
if (!jenv->ExceptionOccurred())
|
||||
throwRuntimeEx(jenv, "Error getting handle to native resources");
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint8_t* endKeyArr = (uint8_t*)jenv->GetByteArrayElements(endKeyBytes, JNI_NULL);
|
||||
if (!endKeyArr) {
|
||||
jenv->ReleaseByteArrayElements(beginKeyBytes, (jbyte*)beginKeyArr, JNI_ABORT);
|
||||
if (!jenv->ExceptionOccurred())
|
||||
throwRuntimeEx(jenv, "Error getting handle to native resources");
|
||||
return 0;
|
||||
}
|
||||
|
||||
FDBFuture* f = fdb_tenant_blobbify_range(
|
||||
tenant, beginKeyArr, jenv->GetArrayLength(beginKeyBytes), endKeyArr, jenv->GetArrayLength(endKeyBytes));
|
||||
jenv->ReleaseByteArrayElements(beginKeyBytes, (jbyte*)beginKeyArr, JNI_ABORT);
|
||||
jenv->ReleaseByteArrayElements(endKeyBytes, (jbyte*)endKeyArr, JNI_ABORT);
|
||||
return (jlong)f;
|
||||
}
|
||||
|
||||
JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTenant_Tenant_1unblobbifyRange(JNIEnv* jenv,
|
||||
jobject,
|
||||
jlong tPtr,
|
||||
jbyteArray beginKeyBytes,
|
||||
jbyteArray endKeyBytes) {
|
||||
if (!tPtr || !beginKeyBytes || !endKeyBytes) {
|
||||
throwParamNotNull(jenv);
|
||||
return 0;
|
||||
}
|
||||
FDBTenant* tenant = (FDBTenant*)tPtr;
|
||||
|
||||
uint8_t* beginKeyArr = (uint8_t*)jenv->GetByteArrayElements(beginKeyBytes, JNI_NULL);
|
||||
if (!beginKeyArr) {
|
||||
if (!jenv->ExceptionOccurred())
|
||||
throwRuntimeEx(jenv, "Error getting handle to native resources");
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint8_t* endKeyArr = (uint8_t*)jenv->GetByteArrayElements(endKeyBytes, JNI_NULL);
|
||||
if (!endKeyArr) {
|
||||
jenv->ReleaseByteArrayElements(beginKeyBytes, (jbyte*)beginKeyArr, JNI_ABORT);
|
||||
if (!jenv->ExceptionOccurred())
|
||||
throwRuntimeEx(jenv, "Error getting handle to native resources");
|
||||
return 0;
|
||||
}
|
||||
|
||||
FDBFuture* f = fdb_tenant_unblobbify_range(
|
||||
tenant, beginKeyArr, jenv->GetArrayLength(beginKeyBytes), endKeyArr, jenv->GetArrayLength(endKeyBytes));
|
||||
jenv->ReleaseByteArrayElements(beginKeyBytes, (jbyte*)beginKeyArr, JNI_ABORT);
|
||||
jenv->ReleaseByteArrayElements(endKeyBytes, (jbyte*)endKeyArr, JNI_ABORT);
|
||||
return (jlong)f;
|
||||
}
|
||||
|
||||
JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTenant_Tenant_1listBlobbifiedRanges(JNIEnv* jenv,
|
||||
jobject,
|
||||
jlong tPtr,
|
||||
jbyteArray beginKeyBytes,
|
||||
jbyteArray endKeyBytes,
|
||||
jint rangeLimit) {
|
||||
if (!tPtr || !beginKeyBytes || !endKeyBytes) {
|
||||
throwParamNotNull(jenv);
|
||||
return 0;
|
||||
}
|
||||
FDBTenant* tenant = (FDBTenant*)tPtr;
|
||||
|
||||
uint8_t* startKey = (uint8_t*)jenv->GetByteArrayElements(beginKeyBytes, JNI_NULL);
|
||||
if (!startKey) {
|
||||
if (!jenv->ExceptionOccurred())
|
||||
throwRuntimeEx(jenv, "Error getting handle to native resources");
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint8_t* endKey = (uint8_t*)jenv->GetByteArrayElements(endKeyBytes, JNI_NULL);
|
||||
if (!endKey) {
|
||||
jenv->ReleaseByteArrayElements(beginKeyBytes, (jbyte*)startKey, JNI_ABORT);
|
||||
if (!jenv->ExceptionOccurred())
|
||||
throwRuntimeEx(jenv, "Error getting handle to native resources");
|
||||
return 0;
|
||||
}
|
||||
|
||||
FDBFuture* f = fdb_tenant_list_blobbified_ranges(
|
||||
tenant, startKey, jenv->GetArrayLength(beginKeyBytes), endKey, jenv->GetArrayLength(endKeyBytes), rangeLimit);
|
||||
jenv->ReleaseByteArrayElements(beginKeyBytes, (jbyte*)startKey, JNI_ABORT);
|
||||
jenv->ReleaseByteArrayElements(endKeyBytes, (jbyte*)endKey, JNI_ABORT);
|
||||
return (jlong)f;
|
||||
}
|
||||
|
||||
JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTenant_Tenant_1verifyBlobRange(JNIEnv* jenv,
|
||||
jobject,
|
||||
jlong tPtr,
|
||||
jbyteArray beginKeyBytes,
|
||||
jbyteArray endKeyBytes,
|
||||
jlong version) {
|
||||
if (!tPtr || !beginKeyBytes || !endKeyBytes) {
|
||||
throwParamNotNull(jenv);
|
||||
return 0;
|
||||
}
|
||||
FDBTenant* tenant = (FDBTenant*)tPtr;
|
||||
|
||||
uint8_t* startKey = (uint8_t*)jenv->GetByteArrayElements(beginKeyBytes, JNI_NULL);
|
||||
if (!startKey) {
|
||||
if (!jenv->ExceptionOccurred())
|
||||
throwRuntimeEx(jenv, "Error getting handle to native resources");
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint8_t* endKey = (uint8_t*)jenv->GetByteArrayElements(endKeyBytes, JNI_NULL);
|
||||
if (!endKey) {
|
||||
jenv->ReleaseByteArrayElements(beginKeyBytes, (jbyte*)startKey, JNI_ABORT);
|
||||
if (!jenv->ExceptionOccurred())
|
||||
throwRuntimeEx(jenv, "Error getting handle to native resources");
|
||||
return 0;
|
||||
}
|
||||
|
||||
FDBFuture* f = fdb_tenant_verify_blob_range(
|
||||
tenant, startKey, jenv->GetArrayLength(beginKeyBytes), endKey, jenv->GetArrayLength(endKeyBytes), version);
|
||||
jenv->ReleaseByteArrayElements(beginKeyBytes, (jbyte*)startKey, JNI_ABORT);
|
||||
jenv->ReleaseByteArrayElements(endKeyBytes, (jbyte*)endKey, JNI_ABORT);
|
||||
return (jlong)f;
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1setVersion(JNIEnv* jenv,
|
||||
jobject,
|
||||
jlong tPtr,
|
||||
|
|
|
@ -138,6 +138,66 @@ class FDBTenant extends NativeObjectWrapper implements Tenant {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public CompletableFuture<byte[]> purgeBlobGranules(byte[] beginKey, byte[] endKey, long purgeVersion, boolean force, Executor e) {
|
||||
pointerReadLock.lock();
|
||||
try {
|
||||
return new FutureKey(Tenant_purgeBlobGranules(getPtr(), beginKey, endKey, purgeVersion, force), e, eventKeeper);
|
||||
} finally {
|
||||
pointerReadLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public CompletableFuture<Void> waitPurgeGranulesComplete(byte[] purgeKey, Executor e) {
|
||||
pointerReadLock.lock();
|
||||
try {
|
||||
return new FutureVoid(Tenant_waitPurgeGranulesComplete(getPtr(), purgeKey), e);
|
||||
} finally {
|
||||
pointerReadLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public CompletableFuture<Boolean> blobbifyRange(byte[] beginKey, byte[] endKey, Executor e) {
|
||||
pointerReadLock.lock();
|
||||
try {
|
||||
return new FutureBool(Tenant_blobbifyRange(getPtr(), beginKey, endKey), e);
|
||||
} finally {
|
||||
pointerReadLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public CompletableFuture<Boolean> unblobbifyRange(byte[] beginKey, byte[] endKey, Executor e) {
|
||||
pointerReadLock.lock();
|
||||
try {
|
||||
return new FutureBool(Tenant_unblobbifyRange(getPtr(), beginKey, endKey), e);
|
||||
} finally {
|
||||
pointerReadLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public CompletableFuture<KeyRangeArrayResult> listBlobbifiedRanges(byte[] beginKey, byte[] endKey, int rangeLimit, Executor e) {
|
||||
pointerReadLock.lock();
|
||||
try {
|
||||
return new FutureKeyRangeArray(Tenant_listBlobbifiedRanges(getPtr(), beginKey, endKey, rangeLimit), e);
|
||||
} finally {
|
||||
pointerReadLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public CompletableFuture<Long> verifyBlobRange(byte[] beginKey, byte[] endKey, long version, Executor e) {
|
||||
pointerReadLock.lock();
|
||||
try {
|
||||
return new FutureInt64(Tenant_verifyBlobRange(getPtr(), beginKey, endKey, version), e);
|
||||
} finally {
|
||||
pointerReadLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getName() {
|
||||
return name;
|
||||
|
@ -155,4 +215,10 @@ class FDBTenant extends NativeObjectWrapper implements Tenant {
|
|||
|
||||
private native long Tenant_createTransaction(long cPtr);
|
||||
private native void Tenant_dispose(long cPtr);
|
||||
private native long Tenant_purgeBlobGranules(long cPtr, byte[] beginKey, byte[] endKey, long purgeVersion, boolean force);
|
||||
private native long Tenant_waitPurgeGranulesComplete(long cPtr, byte[] purgeKey);
|
||||
private native long Tenant_blobbifyRange(long cPtr, byte[] beginKey, byte[] endKey);
|
||||
private native long Tenant_unblobbifyRange(long cPtr, byte[] beginKey, byte[] endKey);
|
||||
private native long Tenant_listBlobbifiedRanges(long cPtr, byte[] beginKey, byte[] endKey, int rangeLimit);
|
||||
private native long Tenant_verifyBlobRange(long cPtr, byte[] beginKey, byte[] endKey, long version);
|
||||
}
|
|
@ -247,6 +247,173 @@ public interface Tenant extends AutoCloseable, TransactionContext {
|
|||
<T> CompletableFuture<T> runAsync(
|
||||
Function<? super Transaction, ? extends CompletableFuture<T>> retryable, Executor e);
|
||||
|
||||
|
||||
/**
|
||||
* Runs {@link #purgeBlobGranules(Function)} on the default executor.
|
||||
*
|
||||
* @param beginKey start of the key range
|
||||
* @param endKey end of the key range
|
||||
* @param force if true delete all data, if not keep data >= purgeVersion
|
||||
*
|
||||
* @return the key to watch for purge complete
|
||||
*/
|
||||
default CompletableFuture<byte[]> purgeBlobGranules(byte[] beginKey, byte[] endKey, boolean force) {
|
||||
return purgeBlobGranules(beginKey, endKey, -2, force, getExecutor());
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs {@link #purgeBlobGranules(Function)} on the default executor.
|
||||
*
|
||||
* @param beginKey start of the key range
|
||||
* @param endKey end of the key range
|
||||
* @param purgeVersion version to purge at
|
||||
* @param force if true delete all data, if not keep data >= purgeVersion
|
||||
*
|
||||
* @return the key to watch for purge complete
|
||||
*/
|
||||
default CompletableFuture<byte[]> purgeBlobGranules(byte[] beginKey, byte[] endKey, long purgeVersion, boolean force) {
|
||||
return purgeBlobGranules(beginKey, endKey, purgeVersion, force, getExecutor());
|
||||
}
|
||||
|
||||
/**
|
||||
* Queues a purge of blob granules for specified key range of this tenant, at the specified version.
|
||||
*
|
||||
* @param beginKey start of the key range
|
||||
* @param endKey end of the key range
|
||||
* @param purgeVersion version to purge at
|
||||
* @param force if true delete all data, if not keep data >= purgeVersion
|
||||
* @param e the {@link Executor} to use for asynchronous callbacks
|
||||
|
||||
* @return the key to watch for purge complete
|
||||
*/
|
||||
CompletableFuture<byte[]> purgeBlobGranules(byte[] beginKey, byte[] endKey, long purgeVersion, boolean force, Executor e);
|
||||
|
||||
|
||||
/**
|
||||
* Runs {@link #waitPurgeGranulesComplete(Function)} on the default executor.
|
||||
*
|
||||
* @param purgeKey key to watch
|
||||
*/
|
||||
default CompletableFuture<Void> waitPurgeGranulesComplete(byte[] purgeKey) {
|
||||
return waitPurgeGranulesComplete(purgeKey, getExecutor());
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for a previous call to purgeBlobGranules to complete.
|
||||
*
|
||||
* @param purgeKey key to watch
|
||||
* @param e the {@link Executor} to use for asynchronous callbacks
|
||||
*/
|
||||
CompletableFuture<Void> waitPurgeGranulesComplete(byte[] purgeKey, Executor e);
|
||||
|
||||
/**
|
||||
* Runs {@link #blobbifyRange(Function)} on the default executor.
|
||||
*
|
||||
* @param beginKey start of the key range
|
||||
* @param endKey end of the key range
|
||||
|
||||
* @return if the recording of the range was successful
|
||||
*/
|
||||
default CompletableFuture<Boolean> blobbifyRange(byte[] beginKey, byte[] endKey) {
|
||||
return blobbifyRange(beginKey, endKey, getExecutor());
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a range to be blobbified in this tenant. Must be a completely unblobbified range.
|
||||
*
|
||||
* @param beginKey start of the key range
|
||||
* @param endKey end of the key range
|
||||
* @param e the {@link Executor} to use for asynchronous callbacks
|
||||
|
||||
* @return if the recording of the range was successful
|
||||
*/
|
||||
CompletableFuture<Boolean> blobbifyRange(byte[] beginKey, byte[] endKey, Executor e);
|
||||
|
||||
/**
|
||||
* Runs {@link #unblobbifyRange(Function)} on the default executor.
|
||||
*
|
||||
* @param beginKey start of the key range
|
||||
* @param endKey end of the key range
|
||||
|
||||
* @return if the recording of the range was successful
|
||||
*/
|
||||
default CompletableFuture<Boolean> unblobbifyRange(byte[] beginKey, byte[] endKey) {
|
||||
return unblobbifyRange(beginKey, endKey, getExecutor());
|
||||
}
|
||||
|
||||
/**
|
||||
* Unsets a blobbified range in this tenant. The range must be aligned to known blob ranges.
|
||||
*
|
||||
* @param beginKey start of the key range
|
||||
* @param endKey end of the key range
|
||||
* @param e the {@link Executor} to use for asynchronous callbacks
|
||||
|
||||
* @return if the recording of the range was successful
|
||||
*/
|
||||
CompletableFuture<Boolean> unblobbifyRange(byte[] beginKey, byte[] endKey, Executor e);
|
||||
|
||||
/**
|
||||
* Runs {@link #listBlobbifiedRanges(Function)} on the default executor.
|
||||
*
|
||||
* @param beginKey start of the key range
|
||||
* @param endKey end of the key range
|
||||
* @param rangeLimit batch size
|
||||
* @param e the {@link Executor} to use for asynchronous callbacks
|
||||
|
||||
* @return a future with the list of blobbified ranges: [lastLessThan(beginKey), firstGreaterThanOrEqual(endKey)]
|
||||
*/
|
||||
default CompletableFuture<KeyRangeArrayResult> listBlobbifiedRanges(byte[] beginKey, byte[] endKey, int rangeLimit) {
|
||||
return listBlobbifiedRanges(beginKey, endKey, rangeLimit, getExecutor());
|
||||
}
|
||||
|
||||
/**
|
||||
* Lists blobbified ranges in this tenant. There may be more if result.size() == rangeLimit.
|
||||
*
|
||||
* @param beginKey start of the key range
|
||||
* @param endKey end of the key range
|
||||
* @param rangeLimit batch size
|
||||
* @param e the {@link Executor} to use for asynchronous callbacks
|
||||
|
||||
* @return a future with the list of blobbified ranges: [lastLessThan(beginKey), firstGreaterThanOrEqual(endKey)]
|
||||
*/
|
||||
CompletableFuture<KeyRangeArrayResult> listBlobbifiedRanges(byte[] beginKey, byte[] endKey, int rangeLimit, Executor e);
|
||||
|
||||
/**
|
||||
* Runs {@link #verifyBlobRange(Function)} on the default executor.
|
||||
*
|
||||
* @param beginKey start of the key range
|
||||
* @param endKey end of the key range
|
||||
*
|
||||
* @return a future with the version of the last blob granule.
|
||||
*/
|
||||
default CompletableFuture<Long> verifyBlobRange(byte[] beginKey, byte[] endKey) {
|
||||
return verifyBlobRange(beginKey, endKey, -2, getExecutor());
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs {@link #verifyBlobRange(Function)} on the default executor.
|
||||
*
|
||||
* @param beginKey start of the key range
|
||||
* @param endKey end of the key range
|
||||
* @param version version to read at
|
||||
*
|
||||
* @return a future with the version of the last blob granule.
|
||||
*/
|
||||
default CompletableFuture<Long> verifyBlobRange(byte[] beginKey, byte[] endKey, long version) {
|
||||
return verifyBlobRange(beginKey, endKey, version, getExecutor());
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a blob range is blobbified in this tenant.
|
||||
*
|
||||
* @param beginKey start of the key range
|
||||
* @param endKey end of the key range
|
||||
* @param version version to read at
|
||||
*
|
||||
* @return a future with the version of the last blob granule.
|
||||
*/
|
||||
CompletableFuture<Long> verifyBlobRange(byte[] beginKey, byte[] endKey, long version, Executor e);
|
||||
|
||||
/**
|
||||
* Close the {@code Tenant} object and release any associated resources. This must be called at
|
||||
* least once after the {@code Tenant} object is no longer in use. This can be called multiple
|
||||
|
|
|
@ -56,7 +56,7 @@ endfunction()
|
|||
# all these tests in serialized order and within the same directory. This is
|
||||
# useful for restart tests
|
||||
function(add_fdb_test)
|
||||
set(options UNIT IGNORE)
|
||||
set(options UNIT IGNORE LONG_RUNNING)
|
||||
set(oneValueArgs TEST_NAME TIMEOUT)
|
||||
set(multiValueArgs TEST_FILES)
|
||||
cmake_parse_arguments(ADD_FDB_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")
|
||||
|
@ -106,6 +106,9 @@ function(add_fdb_test)
|
|||
if(ADD_FDB_TEST_UNIT)
|
||||
message(STATUS
|
||||
"ADDING UNIT TEST ${assigned_id} ${test_name}")
|
||||
elseif(ADD_FDB_TEST_LONG_RUNNING)
|
||||
message(STATUS
|
||||
"ADDING LONG RUNNING TEST ${assigned_id} ${test_name}")
|
||||
else()
|
||||
message(STATUS
|
||||
"ADDING SIMULATOR TEST ${assigned_id} ${test_name}")
|
||||
|
@ -150,9 +153,15 @@ function(add_fdb_test)
|
|||
endif()
|
||||
endif()
|
||||
# set variables used for generating test packages
|
||||
set(TEST_NAMES ${TEST_NAMES} ${test_name} PARENT_SCOPE)
|
||||
set(TEST_FILES_${test_name} ${ADD_FDB_TEST_TEST_FILES} PARENT_SCOPE)
|
||||
set(TEST_TYPE_${test_name} ${test_type} PARENT_SCOPE)
|
||||
if(ADD_FDB_TEST_LONG_RUNNING)
|
||||
set(LONG_RUNNING_TEST_NAMES ${LONG_RUNNING_TEST_NAMES} ${test_name} PARENT_SCOPE)
|
||||
set(LONG_RUNNING_TEST_FILES_${test_name} ${ADD_FDB_TEST_TEST_FILES} PARENT_SCOPE)
|
||||
set(LONG_RUNNING_TEST_TYPE_${test_name} ${test_type} PARENT_SCOPE)
|
||||
else()
|
||||
set(TEST_NAMES ${TEST_NAMES} ${test_name} PARENT_SCOPE)
|
||||
set(TEST_FILES_${test_name} ${ADD_FDB_TEST_TEST_FILES} PARENT_SCOPE)
|
||||
set(TEST_TYPE_${test_name} ${test_type} PARENT_SCOPE)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
if(NOT WIN32)
|
||||
|
@ -167,14 +176,21 @@ endif()
|
|||
# - OUT_DIR the directory where files will be staged
|
||||
# - CONTEXT the type of correctness package being built (e.g. 'valgrind correctness')
|
||||
function(stage_correctness_package)
|
||||
set(options LONG_RUNNING)
|
||||
set(oneValueArgs OUT_DIR CONTEXT OUT_FILES)
|
||||
cmake_parse_arguments(STAGE "" "${oneValueArgs}" "" "${ARGN}")
|
||||
set(multiValueArgs TEST_LIST)
|
||||
cmake_parse_arguments(STAGE "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")
|
||||
file(MAKE_DIRECTORY ${STAGE_OUT_DIR}/bin)
|
||||
string(LENGTH "${CMAKE_SOURCE_DIR}/tests/" base_length)
|
||||
foreach(test IN LISTS TEST_NAMES)
|
||||
foreach(test IN LISTS STAGE_TEST_LIST)
|
||||
if((${test} MATCHES ${TEST_PACKAGE_INCLUDE}) AND
|
||||
(NOT ${test} MATCHES ${TEST_PACKAGE_EXCLUDE}))
|
||||
foreach(file IN LISTS TEST_FILES_${test})
|
||||
string(LENGTH "${CMAKE_SOURCE_DIR}/tests/" base_length)
|
||||
if(STAGE_LONG_RUNNING)
|
||||
set(TEST_FILES_PREFIX "LONG_RUNNING_TEST_FILES")
|
||||
else()
|
||||
set(TEST_FILES_PREFIX "TEST_FILES")
|
||||
endif()
|
||||
foreach(file IN LISTS ${TEST_FILES_PREFIX}_${test})
|
||||
string(SUBSTRING ${file} ${base_length} -1 rel_out_file)
|
||||
set(out_file ${STAGE_OUT_DIR}/tests/${rel_out_file})
|
||||
list(APPEND test_files ${out_file})
|
||||
|
@ -265,7 +281,7 @@ function(create_correctness_package)
|
|||
return()
|
||||
endif()
|
||||
set(out_dir "${CMAKE_BINARY_DIR}/correctness")
|
||||
stage_correctness_package(OUT_DIR ${out_dir} CONTEXT "correctness" OUT_FILES package_files)
|
||||
stage_correctness_package(OUT_DIR ${out_dir} CONTEXT "correctness" OUT_FILES package_files TEST_LIST "${TEST_NAMES}")
|
||||
set(tar_file ${CMAKE_BINARY_DIR}/packages/correctness-${FDB_VERSION}.tar.gz)
|
||||
add_custom_command(
|
||||
OUTPUT ${tar_file}
|
||||
|
@ -294,13 +310,47 @@ function(create_correctness_package)
|
|||
add_dependencies(package_tests_u package_tests)
|
||||
endfunction()
|
||||
|
||||
function(create_long_running_correctness_package)
|
||||
if(WIN32)
|
||||
return()
|
||||
endif()
|
||||
set(out_dir "${CMAKE_BINARY_DIR}/long_running_correctness")
|
||||
stage_correctness_package(OUT_DIR ${out_dir} CONTEXT "long running correctness" OUT_FILES package_files TEST_LIST "${LONG_RUNNING_TEST_NAMES}" LONG_RUNNING)
|
||||
set(tar_file ${CMAKE_BINARY_DIR}/packages/long-running-correctness-${FDB_VERSION}.tar.gz)
|
||||
add_custom_command(
|
||||
OUTPUT ${tar_file}
|
||||
DEPENDS ${package_files}
|
||||
${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/correctnessTest.sh
|
||||
${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/correctnessTimeout.sh
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/correctnessTest.sh
|
||||
${out_dir}/joshua_test
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/correctnessTimeout.sh
|
||||
${out_dir}/joshua_timeout
|
||||
COMMAND ${CMAKE_COMMAND} -E tar cfz ${tar_file} ${package_files}
|
||||
${out_dir}/joshua_test
|
||||
${out_dir}/joshua_timeout
|
||||
WORKING_DIRECTORY ${out_dir}
|
||||
COMMENT "Package long running correctness archive"
|
||||
)
|
||||
add_custom_target(package_long_running_tests ALL DEPENDS ${tar_file})
|
||||
add_dependencies(package_long_running_tests strip_only_fdbserver TestHarness)
|
||||
set(unversioned_tar_file "${CMAKE_BINARY_DIR}/packages/long_running_correctness.tar.gz")
|
||||
add_custom_command(
|
||||
OUTPUT "${unversioned_tar_file}"
|
||||
DEPENDS "${tar_file}"
|
||||
COMMAND ${CMAKE_COMMAND} -E copy "${tar_file}" "${unversioned_tar_file}"
|
||||
COMMENT "Copy long running correctness package to ${unversioned_tar_file}")
|
||||
add_custom_target(package_long_running_tests_u DEPENDS "${unversioned_tar_file}")
|
||||
add_dependencies(package_long_running_tests_u package_long_running_tests)
|
||||
endfunction()
|
||||
|
||||
function(create_valgrind_correctness_package)
|
||||
if(WIN32)
|
||||
return()
|
||||
endif()
|
||||
if(USE_VALGRIND)
|
||||
set(out_dir "${CMAKE_BINARY_DIR}/valgrind_correctness")
|
||||
stage_correctness_package(OUT_DIR ${out_dir} CONTEXT "valgrind correctness" OUT_FILES package_files)
|
||||
stage_correctness_package(OUT_DIR ${out_dir} CONTEXT "valgrind correctness" OUT_FILES package_files TEST_LIST "${TEST_NAMES}")
|
||||
set(tar_file ${CMAKE_BINARY_DIR}/packages/valgrind-${FDB_VERSION}.tar.gz)
|
||||
add_custom_command(
|
||||
OUTPUT ${tar_file}
|
||||
|
|
|
@ -74,7 +74,7 @@ function(compile_boost)
|
|||
BUILD_IN_SOURCE ON
|
||||
INSTALL_COMMAND ""
|
||||
UPDATE_COMMAND ""
|
||||
BUILD_BYPRODUCTS "${BOOST_INSTALL_DIR}/boost/config.hpp"
|
||||
BUILD_BYPRODUCTS "${BOOST_INSTALL_DIR}/include/boost/config.hpp"
|
||||
"${BOOST_INSTALL_DIR}/lib/libboost_context.a"
|
||||
"${BOOST_INSTALL_DIR}/lib/libboost_filesystem.a"
|
||||
"${BOOST_INSTALL_DIR}/lib/libboost_iostreams.a")
|
||||
|
|
|
@ -23,5 +23,5 @@ function(compile_zstd)
|
|||
endif()
|
||||
endif()
|
||||
|
||||
set(ZSTD_LIB_INCLUDE_DIR ${zstd_SOURCE_DIR}/lib PARENT_SCOPE)
|
||||
set(ZSTD_LIB_INCLUDE_DIR ${zstd_SOURCE_DIR}/lib CACHE INTERNAL ZSTD_LIB_INCLUDE_DIR)
|
||||
endfunction(compile_zstd)
|
||||
|
|
|
@ -26,6 +26,7 @@ env_set(TRACE_PC_GUARD_INSTRUMENTATION_LIB "" STRING "Path to a library containi
|
|||
env_set(PROFILE_INSTR_GENERATE OFF BOOL "If set, build FDB as an instrumentation build to generate profiles")
|
||||
env_set(PROFILE_INSTR_USE "" STRING "If set, build FDB with profile")
|
||||
env_set(FULL_DEBUG_SYMBOLS OFF BOOL "Generate full debug symbols")
|
||||
env_set(ENABLE_LONG_RUNNING_TESTS OFF BOOL "Add a long running tests package")
|
||||
|
||||
set(USE_SANITIZER OFF)
|
||||
if(USE_ASAN OR USE_VALGRIND OR USE_MSAN OR USE_TSAN OR USE_UBSAN)
|
||||
|
@ -291,6 +292,19 @@ else()
|
|||
# for more information.
|
||||
#add_compile_options(-fno-builtin-memcpy)
|
||||
|
||||
if (USE_LIBCXX)
|
||||
# Make sure that libc++ can be found be the platform's loader, so that thing's like cmake's "try_run" work.
|
||||
find_library(LIBCXX_SO_PATH c++ /usr/local/lib)
|
||||
if (LIBCXX_SO_PATH)
|
||||
get_filename_component(LIBCXX_SO_DIR ${LIBCXX_SO_PATH} DIRECTORY)
|
||||
if (APPLE)
|
||||
set(ENV{DYLD_LIBRARY_PATH} "$ENV{DYLD_LIBRARY_PATH}:${LIBCXX_SO_DIR}")
|
||||
else()
|
||||
set(ENV{LD_LIBRARY_PATH} "$ENV{LD_LIBRARY_PATH}:${LIBCXX_SO_DIR}")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (CLANG OR ICX)
|
||||
if (APPLE OR USE_LIBCXX)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
|
||||
|
@ -298,19 +312,6 @@ else()
|
|||
if (STATIC_LINK_LIBCXX)
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -nostdlib++ -Wl,-Bstatic -lc++ -lc++abi -Wl,-Bdynamic")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -static-libgcc -nostdlib++ -Wl,-Bstatic -lc++ -lc++abi -Wl,-Bdynamic")
|
||||
else()
|
||||
# Make sure that libc++ can be found be the platform's loader, so that thing's like cmake's "try_run" work.
|
||||
find_library(LIBCXX_SO_PATH c++ /usr/local/lib)
|
||||
if (LIBCXX_SO_PATH)
|
||||
get_filename_component(LIBCXX_SO_DIR ${LIBCXX_SO_PATH} DIRECTORY)
|
||||
if (APPLE)
|
||||
set(ENV{DYLD_LIBRARY_PATH} "$ENV{DYLD_LIBRARY_PATH}:${LIBCXX_SO_DIR}")
|
||||
elseif(WIN32)
|
||||
set(ENV{PATH} "$ENV{PATH};${LIBCXX_SO_DIR}")
|
||||
else()
|
||||
set(ENV{LD_LIBRARY_PATH} "$ENV{LD_LIBRARY_PATH}:${LIBCXX_SO_DIR}")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libc++ -Wl,-build-id=sha1")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -stdlib=libc++ -Wl,-build-id=sha1")
|
||||
|
|
|
@ -42,9 +42,10 @@ class ToSummaryTree(xml.sax.handler.ContentHandler):
|
|||
|
||||
def _print_summary(summary: SummaryTree, commands: Set[str]):
|
||||
cmd = []
|
||||
is_valgrind_run = False
|
||||
if config.reproduce_prefix is not None:
|
||||
cmd.append(config.reproduce_prefix)
|
||||
cmd.append('fdbserver')
|
||||
cmd.append('bin/fdbserver')
|
||||
if 'TestFile' in summary.attributes:
|
||||
file_name = summary.attributes['TestFile']
|
||||
role = 'test' if test_harness.run.is_no_sim(Path(file_name)) else 'simulation'
|
||||
|
@ -63,11 +64,6 @@ def _print_summary(summary: SummaryTree, commands: Set[str]):
|
|||
else:
|
||||
cmd += ['b', '<ERROR>']
|
||||
cmd += ['--crash', '--trace_format', config.trace_format]
|
||||
key = ' '.join(cmd)
|
||||
count = 1
|
||||
while key in commands:
|
||||
key = '{} # {}'.format(' '.join(cmd), count)
|
||||
count += 1
|
||||
# we want the command as the first attribute
|
||||
attributes = {'Command': ' '.join(cmd)}
|
||||
for k, v in summary.attributes.items():
|
||||
|
@ -76,18 +72,6 @@ def _print_summary(summary: SummaryTree, commands: Set[str]):
|
|||
else:
|
||||
attributes[k] = v
|
||||
summary.attributes = attributes
|
||||
if config.details:
|
||||
key = str(len(commands))
|
||||
str_io = io.StringIO()
|
||||
summary.dump(str_io, prefix=(' ' if config.pretty_print else ''))
|
||||
if config.output_format == 'json':
|
||||
sys.stdout.write('{}"Test{}": {}'.format(' ' if config.pretty_print else '',
|
||||
key, str_io.getvalue()))
|
||||
else:
|
||||
sys.stdout.write(str_io.getvalue())
|
||||
if config.pretty_print:
|
||||
sys.stdout.write('\n' if config.output_format == 'xml' else ',\n')
|
||||
return key
|
||||
error_count = 0
|
||||
warning_count = 0
|
||||
small_summary = SummaryTree('Test')
|
||||
|
@ -98,6 +82,8 @@ def _print_summary(summary: SummaryTree, commands: Set[str]):
|
|||
for child in summary.children:
|
||||
if 'Severity' in child.attributes and child.attributes['Severity'] == '40' and error_count < config.max_errors:
|
||||
error_count += 1
|
||||
if errors.name == 'ValgrindError':
|
||||
is_valgrind_run = True
|
||||
errors.append(child)
|
||||
if 'Severity' in child.attributes and child.attributes[
|
||||
'Severity'] == '30' and warning_count < config.max_warnings:
|
||||
|
@ -122,6 +108,26 @@ def _print_summary(summary: SummaryTree, commands: Set[str]):
|
|||
small_summary.children.append(errors)
|
||||
if len(warnings.children) > 0:
|
||||
small_summary.children.append(warnings)
|
||||
if is_valgrind_run:
|
||||
idx = 0 if config.reproduce_prefix is None else 1
|
||||
cmd.insert(idx, 'valgrind')
|
||||
key = ' '.join(cmd)
|
||||
count = 1
|
||||
while key in commands:
|
||||
key = '{} # {}'.format(' '.join(cmd), count)
|
||||
count += 1
|
||||
if config.details:
|
||||
key = str(len(commands))
|
||||
str_io = io.StringIO()
|
||||
summary.dump(str_io, prefix=(' ' if config.pretty_print else ''))
|
||||
if config.output_format == 'json':
|
||||
sys.stdout.write('{}"Test{}": {}'.format(' ' if config.pretty_print else '',
|
||||
key, str_io.getvalue()))
|
||||
else:
|
||||
sys.stdout.write(str_io.getvalue())
|
||||
if config.pretty_print:
|
||||
sys.stdout.write('\n' if config.output_format == 'xml' else ',\n')
|
||||
return key
|
||||
output = io.StringIO()
|
||||
small_summary.dump(output, prefix=(' ' if config.pretty_print else ''))
|
||||
if config.output_format == 'json':
|
||||
|
|
|
@ -128,6 +128,35 @@ set_knob(db, 'min_trace_severity', '10', None, 'description')
|
|||
set_knob(db, 'min_trace_severity', '20', 'az-1', 'description')
|
||||
```
|
||||
|
||||
### CLI Usage
|
||||
|
||||
Users may also utilize `fdbcli` to set and update knobs dynamically. Usage is as follows
|
||||
```
|
||||
setknob <knob_name> <knob_value> [config_class]
|
||||
getknob <knob_name> [config_class]
|
||||
```
|
||||
Where `knob_name` is an existing knob, `knob_value` is the desired value to set the knob and `config_class` is the optional configuration class. Furthermore, `setknob` may be combined within a `begin\commit` to update multiple knobs atomically. If using this option, a description must follow `commit` otherwise a prompt will be shown asking for a description. The description must be non-empty. An example follows.
|
||||
```
|
||||
begin
|
||||
setknob min_trace_severity 30
|
||||
setknob tracing_udp_listener_addr 192.168.0.1
|
||||
commit "fdbcli change"
|
||||
```
|
||||
Users may only combine knob configuration changes with other knob configuration changes in the same transaction. For example, the following is not permitted and will raise an error.
|
||||
```
|
||||
begin
|
||||
set foo bar
|
||||
setknob max_metric_size 1000
|
||||
commit "change"
|
||||
```
|
||||
Specifically, `set, clear, get, getrange, clearrange` cannot be combined in any transaction with a `setknob` or `getknob`.
|
||||
|
||||
If using an individual `setknob` without being inside a `begin\commit` block, then `fdbcli` will prompt for a description as well.
|
||||
|
||||
#### Type checking
|
||||
Knobs have implicit types attached to them when defined. For example, the knob `tracing_udp_listener_addr` is set to `"127.0.0.1"` as so the type is string. If a user invokes `setknob` on this knob with an incorrect value that is not a string, the transaction will fail.
|
||||
|
||||
|
||||
### Disable the Configuration Database
|
||||
|
||||
The configuration database includes both client and server changes and is
|
||||
|
|
|
@ -47,6 +47,12 @@ Note that the quotas are specified in terms of bytes/second, and internally conv
|
|||
page_cost_quota = ceiling(byte_quota / CLIENT_KNOBS->READ_COST_BYTE_FACTOR)
|
||||
```
|
||||
|
||||
To clear a both reserved and total throughput quotas for a tag, run:
|
||||
|
||||
```
|
||||
fdbcli> quota clear <tag>
|
||||
```
|
||||
|
||||
### Limit Calculation
|
||||
The transaction budget that ratekeeper calculates and distributes to clients (via GRV proxies) for each tag is calculated based on several intermediate rate calculations, outlined in this section.
|
||||
|
||||
|
|
|
@ -0,0 +1,106 @@
|
|||
# Goals
|
||||
|
||||
The main goal is to make transactions safer and easier to reason about. New users should get a "just works" experience. One of the main selling points of FoundationDB is that it solves the hard distributed systems problems for you, so that you only need to concern yourself with your business logic. Non-idempotent transactions is probably the biggest "gotcha" that users need to be made aware of -- and they won't discover it organically. In order to achieve this "just works" experience I believe it is necessary to make automatic idempotency have low-enough overhead so that we can enable it by default.
|
||||
|
||||
As an intermediate goal, I plan to introduce this feature disabled by default. The long-term plan is to make it the default.
|
||||
|
||||
# API
|
||||
|
||||
Introduce a new transaction option `IDEMPOTENCY_ID`, which will be validated to be at most 255 bytes.
|
||||
Add
|
||||
```
|
||||
FDBFuture* fdb_transaction_commit_result(FDBTransaction* tr, uint8_t const* idempotency_id, int idempotency_id_length)
|
||||
```
|
||||
, which can be used to determine the result of a commit that failed with `transaction_timed_out`.
|
||||
|
||||
Commits for transactions with idempotency ids would not fail with `commit_unknown_result`, but in (extremely) rare cases could fail with a new error that clients are expected to handle by restarting the process.
|
||||
# Background
|
||||
|
||||
- https://forums.foundationdb.org/t/automatically-providing-transaction-idempotency/1873
|
||||
- https://github.com/apple/foundationdb/issues/1321
|
||||
- https://docs.google.com/document/d/19LDQuurg4Tt8eUcig3-8g2VOG9ZpQvtWrp_691RqMo8/edit#
|
||||
|
||||
# Data model
|
||||
|
||||
Commit proxies would combine idempotency IDs for transactions within a batch. The purpose of this is to try to limit the number of distinct database keys that need to be written, and to lessen the number of extra mutation bytes for idempotency IDs.
|
||||
|
||||
## Key format
|
||||
```
|
||||
\xff\x02/idmp/${commit_version_big_endian (8 bytes)}${high_order_byte_of_batch_index (1 byte)}
|
||||
```
|
||||
|
||||
- `commit_version_big_endian` the commit version stored big-endian so that the cleaner worker can find the oldest idempotency ids easily, and also so that "unknown_committed" transactions can recover their commit version.
|
||||
- `high_order_byte_of_batch_index` this limits us to 256 idempotency ids per value
|
||||
|
||||
## Value format
|
||||
```
|
||||
${protocol_version}(${n (1 byte)}${idempotency_id (n bytes)}${low_order_byte_of_batch_index})*
|
||||
```
|
||||
|
||||
The batch index for each idempotency id can be reconstructed from the high order byte and low order bytes stored in the key and value, respectively. This is necessary for an "unknown_committed" transaction to recover their full version stamp. Batch index is a `short int`, i.e. 2 bytes.
|
||||
|
||||
# Cleaning up old idempotency ids
|
||||
|
||||
After learning the result of an attempt to commit a transaction with an
|
||||
idempotency id, the client may inform the cluster that it's no longer interested
|
||||
in that id and the cluster can reclaim the space used to store the idempotency
|
||||
id. The happy-path reply to a CommitTransactionRequest will say which proxy this
|
||||
request should be sent to, and all idempotency ids for a database key will be
|
||||
sent to the same proxy so that it can clear the key once it receives all of
|
||||
them. The first proxy will also periodically clean up the oldest idempotency ids, based on a policy determined by two knobs. One knob will control the minimum lifetime of an idempotency id (i.e. don't delete anything younger than 1 day), and the other will control the target byte size of the idempotency keys (e.g. keep 100 MB of idempotency keys around).
|
||||
|
||||
# Commit protocol
|
||||
|
||||
The basic change will be that a commit future will not become ready until the client confirms whether or not the commit succeeded. (`transaction_timed_out` is an unfortunate exception here)
|
||||
|
||||
The idempotency id will be automatically added to both the read conflict range and the write conflict range, before makeSelfConflicting is called so that we don't duplicate that work. We can reuse the `\xff/SC/` self-conflicting key space here.
|
||||
|
||||
## Did I already commit?
|
||||
|
||||
The first version of this scans the keys in the idmp key range to check for the idempotency ids. The plan for the next version is the following:
|
||||
|
||||
Storage servers would have a new endpoint that clients can use to ask if the transaction for an idempotency id already committed. Clients would need to check every possible shard that their idempotency id may have ended up in.
|
||||
|
||||
Storage servers would maintain a map from idempotency id to versionstamp in memory, and clients would need to contact all storage servers responsible for the `[\xff\x02/idmp/, \xff\x02/idmp0)` keyspace to be sure of their commit status. Assuming an idempotency id + versionstamp is 16 + 10 bytes, and that the lifetime of most idempotency ids is less than 1 second, that corresponds to at least 260 MB of memory on the storage server at 1,000,000 transactions/s, which seems acceptable. Let's double that to account for things like hash table load factor and allocating extra memory to ensure amortized constant time insertion. Still seems acceptable. We probably want to use a hashtable with open addressing to avoid frequent heap allocations. I _think_ [swisstables](https://abseil.io/about/design/swisstables) would work here.
|
||||
|
||||
When a transaction learns that it did in fact commit, the commit future succeeds, and the versionstamp gets filled with the original, successful transaction's versionstamp. After the successful commit is reported, it's no longer necessary to store its idempotency ID. The client will send an RPC to the cleaner role indicating that it can remove this idempotency ID.
|
||||
|
||||
If a transaction learns that it did in fact _not_ commit, the commit future will fail with an error that indicates that the transaction did not commit. Perhaps `transaction_too_old`.
|
||||
|
||||
If a transaction learns that it has been in-flight so long that its idempotency id could have been expired, then it will fail with a new, non-retriable error. It is expected that this will be rare enough that crashing the application is acceptable.
|
||||
|
||||
# Considerations
|
||||
|
||||
- Additional storage space on the cluster. This can be controlled directly via an idempotency id target bytes knob/config.
|
||||
- Potential write hot spot.
|
||||
|
||||
# Multi-version client
|
||||
|
||||
The multi-version client will generate its own idempotency id for a transaction and manage its lifecycle. It will duplicate the logic in NativeApi to achieve the same guarantees. As part of this change we will also ensure that the previous commit attempt is no longer in-flight before allowing the commit future to become ready. This will fix a potential "causal-write-risky" issue if a commit attempt fails with `cluster_version_changed`.
|
||||
|
||||
# Experiments
|
||||
|
||||
- Initial experiments show that this is about 1% overhead for the worst case workload which is transactions that only update a single key.
|
||||
|
||||
```
|
||||
Single replication redwood cluster with dedicated ebs disks for tlog and storage. All tests saturated the tlog disk's IOPs.
|
||||
|
||||
volume_type: gp3
|
||||
volume_size: 384
|
||||
iops: 9000
|
||||
throughput: 250
|
||||
|
||||
$ bin/mako --mode run --rows 1000000 -x u1 -p 8 -t 8 --cluster=$HOME/fdb.cluster --seconds 100 # already warm, but quiesced
|
||||
|
||||
Baseline:
|
||||
|
||||
19714.67 TPS
|
||||
|
||||
"user space" method of writing idempotency id -> versionstamp in every transaction:
|
||||
|
||||
13831.00 TPS
|
||||
|
||||
"combine idempotency ids in transaction batch" method:
|
||||
|
||||
19515.62 TPS
|
||||
```
|
|
@ -203,6 +203,13 @@ The ``get`` command fetches the value of a given key. Its syntax is ``get <KEY>`
|
|||
|
||||
Note that :ref:`characters can be escaped <cli-escaping>` when specifying keys (or values) in ``fdbcli``.
|
||||
|
||||
getknob
|
||||
-------
|
||||
|
||||
The ``getknob`` command fetches the value of a given knob that has been populated by ``setknob``. Its syntax is ``getknob <KNOBNAME> [CONFIGCLASS]``. It displays the value of ``<KNOBNAME>`` if ``<KNOBNAME>`` is present in the database and ``not found`` otherwise.
|
||||
|
||||
Note that :ref:`characters can be escaped <cli-escaping>` when specifying keys (or values) in ``fdbcli``.
|
||||
|
||||
getrange
|
||||
--------
|
||||
|
||||
|
@ -395,6 +402,13 @@ The ``setclass`` command can be used to change the :ref:`process class <guidelin
|
|||
|
||||
The available process classes are ``unset``, ``storage``, ``transaction``, ``resolution``, ``grv_proxy``, ``commit_proxy``, ``master``, ``test``, ``unset``, ``stateless``, ``log``, ``router``, ``cluster_controller``, ``fast_restore``, ``data_distributor``, ``coordinator``, ``ratekeeper``, ``storage_cache``, ``backup``, and ``default``.
|
||||
|
||||
setknob
|
||||
-------
|
||||
|
||||
The ``setknob`` command can be used to set knobs dynamically. Its syntax is ``setknob <KNOBNAME> <KNOBVALUE> [CONFIGCLASS]``. If not present in a ``begin\commit`` block, the CLI will prompt for a description of the change.
|
||||
|
||||
Note that :ref:`characters can be escaped <cli-escaping>` when specifying keys (or values) in ``fdbcli``.
|
||||
|
||||
sleep
|
||||
-----
|
||||
|
||||
|
|
|
@ -528,7 +528,8 @@
|
|||
"duplicate_mutation_fetch_timeout",
|
||||
"primary_dc_missing",
|
||||
"fetch_primary_dc_timeout",
|
||||
"fetch_storage_wiggler_stats_timeout"
|
||||
"fetch_storage_wiggler_stats_timeout",
|
||||
"fetch_consistency_scan_info_timeout"
|
||||
]
|
||||
},
|
||||
"issues":[
|
||||
|
|
|
@ -56,7 +56,7 @@ ACTOR Future<Void> getQuota(Reference<IDatabase> db, TransactionTag tag, LimitTy
|
|||
loop {
|
||||
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
||||
try {
|
||||
state ThreadFuture<Optional<Value>> resultFuture = tr->get(tag.withPrefix(tagQuotaPrefix));
|
||||
state ThreadFuture<Optional<Value>> resultFuture = tr->get(ThrottleApi::getTagQuotaKey(tag));
|
||||
Optional<Value> v = wait(safeThreadFutureToFuture(resultFuture));
|
||||
if (!v.present()) {
|
||||
fmt::print("<empty>\n");
|
||||
|
@ -77,11 +77,10 @@ ACTOR Future<Void> getQuota(Reference<IDatabase> db, TransactionTag tag, LimitTy
|
|||
|
||||
ACTOR Future<Void> setQuota(Reference<IDatabase> db, TransactionTag tag, LimitType limitType, double value) {
|
||||
state Reference<ITransaction> tr = db->createTransaction();
|
||||
state Key key = tag.withPrefix(tagQuotaPrefix);
|
||||
loop {
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
try {
|
||||
state ThreadFuture<Optional<Value>> resultFuture = tr->get(key);
|
||||
state ThreadFuture<Optional<Value>> resultFuture = tr->get(ThrottleApi::getTagQuotaKey(tag));
|
||||
Optional<Value> v = wait(safeThreadFutureToFuture(resultFuture));
|
||||
ThrottleApi::TagQuotaValue quota;
|
||||
if (v.present()) {
|
||||
|
@ -103,8 +102,22 @@ ACTOR Future<Void> setQuota(Reference<IDatabase> db, TransactionTag tag, LimitTy
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> clearQuota(Reference<IDatabase> db, TransactionTag tag) {
|
||||
state Reference<ITransaction> tr = db->createTransaction();
|
||||
loop {
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
try {
|
||||
tr->clear(ThrottleApi::getTagQuotaKey(tag));
|
||||
wait(safeThreadFutureToFuture(tr->commit()));
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
wait(safeThreadFutureToFuture(tr->onError(e)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
constexpr auto usage = "quota [get <tag> [reserved_throughput|total_throughput] | set <tag> "
|
||||
"[reserved_throughput|total_throughput] <value>]";
|
||||
"[reserved_throughput|total_throughput] <value> | clear <tag>]";
|
||||
|
||||
bool exitFailure() {
|
||||
fmt::print(usage);
|
||||
|
@ -117,30 +130,40 @@ namespace fdb_cli {
|
|||
|
||||
ACTOR Future<bool> quotaCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens) {
|
||||
state bool result = true;
|
||||
if (tokens.size() != 5 && tokens.size() != 6) {
|
||||
if (tokens.size() < 3 || tokens.size() > 5) {
|
||||
return exitFailure();
|
||||
} else {
|
||||
auto tag = parseTag(tokens[2]);
|
||||
auto limitType = parseLimitType(tokens[3]);
|
||||
if (!tag.present() || !limitType.present()) {
|
||||
auto const tag = parseTag(tokens[2]);
|
||||
if (!tag.present()) {
|
||||
return exitFailure();
|
||||
}
|
||||
if (tokens[1] == "get"_sr) {
|
||||
if (tokens.size() != 4) {
|
||||
return exitFailure();
|
||||
}
|
||||
auto const limitType = parseLimitType(tokens[3]);
|
||||
if (!limitType.present()) {
|
||||
return exitFailure();
|
||||
}
|
||||
wait(getQuota(db, tag.get(), limitType.get()));
|
||||
return true;
|
||||
} else if (tokens[1] == "set"_sr) {
|
||||
if (tokens.size() != 5) {
|
||||
return exitFailure();
|
||||
}
|
||||
auto const limitType = parseLimitType(tokens[3]);
|
||||
auto const limitValue = parseLimitValue(tokens[4]);
|
||||
if (!limitValue.present()) {
|
||||
if (!limitType.present() || !limitValue.present()) {
|
||||
return exitFailure();
|
||||
}
|
||||
wait(setQuota(db, tag.get(), limitType.get(), limitValue.get()));
|
||||
return true;
|
||||
} else if (tokens[1] == "clear"_sr) {
|
||||
if (tokens.size() != 3) {
|
||||
return exitFailure();
|
||||
}
|
||||
wait(clearQuota(db, tag.get()));
|
||||
return true;
|
||||
} else {
|
||||
return exitFailure();
|
||||
}
|
||||
|
|
|
@ -442,7 +442,7 @@ void printStatus(StatusObjectReader statusObj,
|
|||
outputString += "\n Blob granules - enabled";
|
||||
}
|
||||
|
||||
outputString += "\n Encryption at-rest - ";
|
||||
outputString += "\n Encryption at-rest - ";
|
||||
if (statusObjConfig.get("encryption_at_rest_mode", strVal)) {
|
||||
outputString += strVal;
|
||||
} else {
|
||||
|
|
|
@ -499,11 +499,14 @@ void initHelp() {
|
|||
"transaction, and are automatically committed for you. By explicitly beginning a transaction, "
|
||||
"successive operations are all performed as part of a single transaction.\n\nTo commit the "
|
||||
"transaction, use the commit command. To discard the transaction, use the reset command.");
|
||||
helpMap["commit"] = CommandHelp("commit",
|
||||
helpMap["commit"] = CommandHelp("commit [description]",
|
||||
"commit the current transaction",
|
||||
"Any sets or clears executed after the start of the current transaction will be "
|
||||
"committed to the database. On success, the committed version number is displayed. "
|
||||
"If commit fails, the error is displayed and the transaction must be retried.");
|
||||
"If commit fails, the error is displayed and the transaction must be retried. The "
|
||||
"command optionally allows for a description in case the transaction targets the "
|
||||
"configuration database. If no description is provided in the command, a prompt "
|
||||
"will be shown asking for a relevant description of the configuration change");
|
||||
helpMap["clear"] = CommandHelp(
|
||||
"clear <KEY>",
|
||||
"clear a key from the database",
|
||||
|
@ -539,8 +542,8 @@ void initHelp() {
|
|||
"Displays the current read version of the database or currently running transaction.");
|
||||
helpMap["quota"] = CommandHelp("quota",
|
||||
"quota [get <tag> [reserved_throughput|total_throughput] | set <tag> "
|
||||
"[reserved_throughput|total_throughput] <value>]",
|
||||
"Get or modify the throughput quota for the specified tag.");
|
||||
"[reserved_throughput|total_throughput] <value> | clear <tag>]",
|
||||
"Get, modify, or clear the throughput quota for the specified tag.");
|
||||
helpMap["reset"] =
|
||||
CommandHelp("reset",
|
||||
"reset the current transaction",
|
||||
|
@ -552,6 +555,14 @@ void initHelp() {
|
|||
helpMap["set"] = CommandHelp("set <KEY> <VALUE>",
|
||||
"set a value for a given key",
|
||||
"If KEY is not already present in the database, it will be created." ESCAPINGKV);
|
||||
|
||||
helpMap["setknob"] = CommandHelp("setknob <KEY> <VALUE> [CONFIG_CLASS]",
|
||||
"updates a knob to specified value",
|
||||
"setknob will prompt for a descrption of the changes" ESCAPINGKV);
|
||||
|
||||
helpMap["getknob"] = CommandHelp(
|
||||
"getknob <KEY> [CONFIG_CLASS]", "gets the value of the specified knob", "CONFIG_CLASS is optional." ESCAPINGK);
|
||||
|
||||
helpMap["option"] = CommandHelp(
|
||||
"option <STATE> <OPTION> <ARG>",
|
||||
"enables or disables an option",
|
||||
|
@ -1050,12 +1061,17 @@ Future<T> stopNetworkAfter(Future<T> what) {
|
|||
}
|
||||
}
|
||||
|
||||
enum TransType { Db = 0, Config, None };
|
||||
|
||||
ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterConnectionFile> ccf) {
|
||||
state LineNoise& linenoise = *plinenoise;
|
||||
state bool intrans = false;
|
||||
state TransType transtype = TransType::None;
|
||||
state bool isCommitDesc = false;
|
||||
|
||||
state Database localDb;
|
||||
state Reference<IDatabase> db;
|
||||
state Reference<IDatabase> configDb;
|
||||
state Reference<ITenant> tenant;
|
||||
state Optional<TenantName> tenantName;
|
||||
state Optional<TenantMapEntry> tenantEntry;
|
||||
|
@ -1064,6 +1080,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterCo
|
|||
state const Reference<ITenant> managementTenant;
|
||||
|
||||
state Reference<ITransaction> tr;
|
||||
state Reference<ITransaction> config_tr;
|
||||
state Transaction trx;
|
||||
|
||||
state bool writeMode = false;
|
||||
|
@ -1085,6 +1102,8 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterCo
|
|||
printf("Using cluster file `%s'.\n", ccf->getLocation().c_str());
|
||||
}
|
||||
db = API->createDatabase(opt.clusterFile.c_str());
|
||||
configDb = API->createDatabase(opt.clusterFile.c_str());
|
||||
configDb->setOption(FDBDatabaseOptions::USE_CONFIG_DATABASE);
|
||||
} catch (Error& e) {
|
||||
fprintf(stderr, "ERROR: %s (%d)\n", e.what(), e.code());
|
||||
printf("Unable to connect to cluster from `%s'\n", ccf->getLocation().c_str());
|
||||
|
@ -1442,23 +1461,46 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterCo
|
|||
} else {
|
||||
activeOptions = FdbOptions(globalOptions);
|
||||
options = &activeOptions;
|
||||
getTransaction(db, tenant, tr, options, false);
|
||||
intrans = true;
|
||||
transtype = TransType::None;
|
||||
getTransaction(db, tenant, tr, options, false);
|
||||
printf("Transaction started\n");
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (tokencmp(tokens[0], "commit")) {
|
||||
if (tokens.size() != 1) {
|
||||
if (tokens.size() > 2) {
|
||||
printUsage(tokens[0]);
|
||||
is_error = true;
|
||||
} else if (!intrans) {
|
||||
fprintf(stderr, "ERROR: No active transaction\n");
|
||||
is_error = true;
|
||||
} else {
|
||||
wait(commitTransaction(tr));
|
||||
if (isCommitDesc && tokens.size() == 1) {
|
||||
// prompt for description and add to txn
|
||||
state Optional<std::string> raw;
|
||||
while (!raw.present() || raw.get().empty()) {
|
||||
fprintf(stdout,
|
||||
"Please set a description for the change. Description must be non-empty.\n");
|
||||
state Optional<std::string> rawline =
|
||||
wait(makeInterruptable(linenoise.read("description: ")));
|
||||
raw = rawline;
|
||||
}
|
||||
std::string line = raw.get();
|
||||
config_tr->set("\xff\xff/description"_sr, line);
|
||||
}
|
||||
if (transtype == TransType::Db) {
|
||||
wait(commitTransaction(tr));
|
||||
} else {
|
||||
if (tokens.size() > 1) {
|
||||
config_tr->set("\xff\xff/description"_sr, tokens[1]);
|
||||
}
|
||||
wait(commitTransaction(config_tr));
|
||||
}
|
||||
isCommitDesc = false;
|
||||
intrans = false;
|
||||
transtype = TransType::None;
|
||||
options = &globalOptions;
|
||||
}
|
||||
|
||||
|
@ -1481,10 +1523,16 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterCo
|
|||
fprintf(stderr, "ERROR: No active transaction\n");
|
||||
is_error = true;
|
||||
} else {
|
||||
tr->reset();
|
||||
activeOptions = FdbOptions(globalOptions);
|
||||
options = &activeOptions;
|
||||
options->apply(tr);
|
||||
if (transtype == TransType::Config) {
|
||||
config_tr->reset();
|
||||
} else {
|
||||
tr->reset();
|
||||
activeOptions = FdbOptions(globalOptions);
|
||||
options = &activeOptions;
|
||||
options->apply(tr);
|
||||
}
|
||||
isCommitDesc = false;
|
||||
transtype = TransType::None;
|
||||
printf("Transaction reset\n");
|
||||
}
|
||||
continue;
|
||||
|
@ -1510,6 +1558,15 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterCo
|
|||
printUsage(tokens[0]);
|
||||
is_error = true;
|
||||
} else {
|
||||
if (intrans) {
|
||||
if (transtype == TransType::None) {
|
||||
transtype = TransType::Db;
|
||||
} else if (transtype == TransType::Config) {
|
||||
fprintf(stderr, "ERROR: Cannot perform get in configuration transaction\n");
|
||||
is_error = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
state ThreadFuture<Optional<Value>> valueF =
|
||||
getTransaction(db, tenant, tr, options, intrans)->get(tokens[1]);
|
||||
Optional<Standalone<StringRef>> v = wait(makeInterruptable(safeThreadFutureToFuture(valueF)));
|
||||
|
@ -1618,7 +1675,17 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterCo
|
|||
} else {
|
||||
state int limit;
|
||||
bool valid = true;
|
||||
|
||||
if (intrans) {
|
||||
if (transtype == TransType::None) {
|
||||
transtype = TransType::Db;
|
||||
} else if (transtype == TransType::Config) {
|
||||
fprintf(
|
||||
stderr,
|
||||
"ERROR: Cannot perform getrange or getrangekeys in configuration transaction\n");
|
||||
is_error = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (tokens.size() == 4) {
|
||||
// INT_MAX is 10 digits; rather than
|
||||
// worrying about overflow we'll just cap
|
||||
|
@ -1707,6 +1774,15 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterCo
|
|||
printUsage(tokens[0]);
|
||||
is_error = true;
|
||||
} else {
|
||||
if (intrans) {
|
||||
if (transtype == TransType::None) {
|
||||
transtype = TransType::Db;
|
||||
} else if (transtype == TransType::Config) {
|
||||
fprintf(stderr, "ERROR: Cannot perform set in configuration transaction\n");
|
||||
is_error = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
getTransaction(db, tenant, tr, options, intrans);
|
||||
tr->set(tokens[1], tokens[2]);
|
||||
|
||||
|
@ -1717,6 +1793,91 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterCo
|
|||
continue;
|
||||
}
|
||||
|
||||
if (tokencmp(tokens[0], "setknob")) {
|
||||
if (tokens.size() > 4 || tokens.size() < 3) {
|
||||
printUsage(tokens[0]);
|
||||
is_error = true;
|
||||
} else {
|
||||
if (intrans) {
|
||||
if (transtype == TransType::None) {
|
||||
transtype = TransType::Config;
|
||||
} else if (transtype == TransType::Db) {
|
||||
fprintf(stderr, "ERROR: Cannot perform setknob in database transaction\n");
|
||||
is_error = true;
|
||||
isCommitDesc = false;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Tuple t;
|
||||
if (tokens.size() == 4) {
|
||||
t.append(tokens[3]);
|
||||
} else {
|
||||
t.appendNull();
|
||||
}
|
||||
t.append(tokens[1]);
|
||||
getTransaction(configDb, tenant, config_tr, options, intrans);
|
||||
|
||||
config_tr->set(t.pack(), tokens[2]);
|
||||
if (!intrans) {
|
||||
// prompt for description and add to txn
|
||||
state Optional<std::string> raw_desc;
|
||||
while (!raw_desc.present() || raw_desc.get().empty()) {
|
||||
fprintf(stdout,
|
||||
"Please set a description for the change. Description must be non-empty\n");
|
||||
state Optional<std::string> rawline_knob =
|
||||
wait(makeInterruptable(linenoise.read("description: ")));
|
||||
raw_desc = rawline_knob;
|
||||
}
|
||||
std::string line = raw_desc.get();
|
||||
config_tr->set("\xff\xff/description"_sr, line);
|
||||
wait(commitTransaction(config_tr));
|
||||
} else {
|
||||
isCommitDesc = true;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (tokencmp(tokens[0], "getknob")) {
|
||||
if (tokens.size() > 3 || tokens.size() < 2) {
|
||||
printUsage(tokens[0]);
|
||||
is_error = true;
|
||||
} else {
|
||||
if (intrans) {
|
||||
if (transtype == TransType::None) {
|
||||
transtype = TransType::Config;
|
||||
} else if (transtype == TransType::Db) {
|
||||
fprintf(stderr, "ERROR: Cannot perform getknob in database transaction\n");
|
||||
is_error = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Tuple t;
|
||||
if (tokens.size() == 2) {
|
||||
t.appendNull();
|
||||
} else {
|
||||
t.append(tokens[2]);
|
||||
}
|
||||
t.append(tokens[1]);
|
||||
state ThreadFuture<Optional<Value>> valueF_knob =
|
||||
getTransaction(configDb, tenant, config_tr, options, intrans)->get(t.pack());
|
||||
Optional<Standalone<StringRef>> v =
|
||||
wait(makeInterruptable(safeThreadFutureToFuture(valueF_knob)));
|
||||
std::string knob_class = printable(tokens[1]);
|
||||
if (tokens.size() == 3) {
|
||||
std::string config_class = (" in configuration class " + printable(tokens[2]));
|
||||
knob_class += config_class;
|
||||
}
|
||||
if (v.present())
|
||||
printf("`%s' is `%s'\n",
|
||||
knob_class.c_str(),
|
||||
Tuple::tupleToString(Tuple::unpack(v.get())).c_str());
|
||||
else
|
||||
printf("`%s' is not found\n", knob_class.c_str());
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (tokencmp(tokens[0], "clear")) {
|
||||
if (!writeMode) {
|
||||
fprintf(stderr, "ERROR: writemode must be enabled to set or clear keys in the database.\n");
|
||||
|
@ -1728,6 +1889,15 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterCo
|
|||
printUsage(tokens[0]);
|
||||
is_error = true;
|
||||
} else {
|
||||
if (intrans) {
|
||||
if (transtype == TransType::None) {
|
||||
transtype = TransType::Db;
|
||||
} else if (transtype == TransType::Config) {
|
||||
fprintf(stderr, "ERROR: Cannot perform clear in configuration transaction\n");
|
||||
is_error = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
getTransaction(db, tenant, tr, options, intrans);
|
||||
tr->clear(tokens[1]);
|
||||
|
||||
|
@ -1749,6 +1919,15 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterCo
|
|||
printUsage(tokens[0]);
|
||||
is_error = true;
|
||||
} else {
|
||||
if (intrans) {
|
||||
if (transtype == TransType::None) {
|
||||
transtype = TransType::Db;
|
||||
} else if (transtype == TransType::Config) {
|
||||
fprintf(stderr, "ERROR: Cannot perform clearrange in configuration transaction\n");
|
||||
is_error = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
getTransaction(db, tenant, tr, options, intrans);
|
||||
tr->clear(KeyRangeRef(tokens[1], tokens[2]));
|
||||
|
||||
|
@ -1928,7 +2107,6 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterCo
|
|||
}
|
||||
|
||||
TraceEvent(SevInfo, "CLICommandLog", randomID).detail("Command", line).detail("IsError", is_error);
|
||||
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_operation_cancelled) {
|
||||
throw;
|
||||
|
|
|
@ -103,6 +103,59 @@ def maintenance(logger):
|
|||
output3 = run_fdbcli_command('maintenance')
|
||||
assert output3 == no_maintenance_output
|
||||
|
||||
@enable_logging()
|
||||
def quota(logger):
|
||||
# Should be a noop
|
||||
command = 'quota clear green'
|
||||
output = run_fdbcli_command(command)
|
||||
logger.debug(command + ' : ' + output)
|
||||
assert output == ''
|
||||
|
||||
command = 'quota get green total_throughput'
|
||||
output = run_fdbcli_command(command)
|
||||
logger.debug(command + ' : ' + output)
|
||||
assert output == '<empty>'
|
||||
|
||||
# Ignored update
|
||||
command = 'quota set red total_throughput 49152'
|
||||
output = run_fdbcli_command(command)
|
||||
logger.debug(command + ' : ' + output)
|
||||
assert output == ''
|
||||
|
||||
command = 'quota set green total_throughput 32768'
|
||||
output = run_fdbcli_command(command)
|
||||
logger.debug(command + ' : ' + output)
|
||||
assert output == ''
|
||||
|
||||
command = 'quota set green reserved_throughput 16384'
|
||||
output = run_fdbcli_command(command)
|
||||
logger.debug(command + ' : ' + output)
|
||||
assert output == ''
|
||||
|
||||
command = 'quota get green total_throughput'
|
||||
output = run_fdbcli_command(command)
|
||||
logger.debug(command + ' : ' + output)
|
||||
assert output == '32768'
|
||||
|
||||
command = 'quota get green reserved_throughput'
|
||||
output = run_fdbcli_command(command)
|
||||
logger.debug(command + ' : ' + output)
|
||||
assert output == '16384'
|
||||
|
||||
command = 'quota clear green'
|
||||
output = run_fdbcli_command(command)
|
||||
logger.debug(command + ' : ' + output)
|
||||
assert output == ''
|
||||
|
||||
command = 'quota get green total_throughput'
|
||||
output = run_fdbcli_command(command)
|
||||
logger.debug(command + ' : ' + output)
|
||||
assert output == '<empty>'
|
||||
|
||||
# Too few arguments, should log help message
|
||||
command = 'quota get green'
|
||||
output = run_fdbcli_command(command)
|
||||
logger.debug(command + ' : ' + output)
|
||||
|
||||
@enable_logging()
|
||||
def setclass(logger):
|
||||
|
@ -334,6 +387,57 @@ def consistencycheck(logger):
|
|||
assert output3 == consistency_check_on_output
|
||||
|
||||
|
||||
@enable_logging()
|
||||
def knobmanagement(logger):
|
||||
# this test will set knobs and verify that the knobs are properly set
|
||||
# must use begin/commit to avoid prompt for description
|
||||
|
||||
# Incorrect arguments
|
||||
output = run_fdbcli_command('setknob')
|
||||
assert output == "Usage: setknob <KEY> <VALUE> [CONFIG_CLASS]"
|
||||
output = run_fdbcli_command('setknob', 'min_trace_severity')
|
||||
assert output == "Usage: setknob <KEY> <VALUE> [CONFIG_CLASS]"
|
||||
output = run_fdbcli_command('getknob')
|
||||
assert output == "Usage: getknob <KEY> [CONFIG_CLASS]"
|
||||
logger.debug("incorrect args passed")
|
||||
|
||||
# Invalid knob name
|
||||
err = run_fdbcli_command_and_get_error('begin; setknob dummy_knob 20; commit \"fdbcli change\";')
|
||||
logger.debug("err is: {}".format(err))
|
||||
assert len(err) > 0
|
||||
logger.debug("invalid knob name passed")
|
||||
|
||||
# Invalid type for knob
|
||||
err = run_fdbcli_command_and_get_error('begin; setknob min_trace_severity dummy-text; commit \"fdbcli change\";')
|
||||
logger.debug("err is: {}".format(err))
|
||||
assert len(err) > 0
|
||||
logger.debug("invalid knob type passed")
|
||||
|
||||
# Verifying we can't do a normal set, clear, get, getrange, clearrange
|
||||
# with a setknob
|
||||
err = run_fdbcli_command_and_get_error('writemode on; begin; set foo bar; setknob max_metric_size 1000; commit;')
|
||||
logger.debug("err is: {}".format(err))
|
||||
assert len(err) > 0
|
||||
|
||||
err = run_fdbcli_command_and_get_error('writemode on; begin; clear foo; setknob max_metric_size 1000; commit')
|
||||
logger.debug("err is: {}".format(err))
|
||||
assert len(err) > 0
|
||||
|
||||
# Various setknobs and verified by getknob
|
||||
output = run_fdbcli_command('begin; setknob min_trace_severity 30; setknob max_metric_size 1000; \
|
||||
setknob tracing_udp_listener_addr 192.168.0.1; \
|
||||
setknob tracing_sample_rate 0.3; \
|
||||
commit \"This is an fdbcli test for knobs\";')
|
||||
assert "Committed" in output
|
||||
output = run_fdbcli_command('getknob', 'min_trace_severity')
|
||||
assert r"`min_trace_severity' is `30'" == output
|
||||
output = run_fdbcli_command('getknob', 'max_metric_size')
|
||||
assert r"`max_metric_size' is `1000'" == output
|
||||
output = run_fdbcli_command('getknob', 'tracing_udp_listener_addr')
|
||||
assert r"`tracing_udp_listener_addr' is `'192.168.0.1''" == output
|
||||
output = run_fdbcli_command('getknob', 'tracing_sample_rate')
|
||||
assert r"`tracing_sample_rate' is `0.300000'" == output
|
||||
|
||||
@enable_logging()
|
||||
def cache_range(logger):
|
||||
# this command is currently experimental
|
||||
|
@ -983,6 +1087,8 @@ if __name__ == '__main__':
|
|||
versionepoch()
|
||||
integer_options()
|
||||
tls_address_suffix()
|
||||
knobmanagement()
|
||||
quota()
|
||||
else:
|
||||
assert args.process_number > 1, "Process number should be positive"
|
||||
coordinators()
|
||||
|
|
|
@ -86,6 +86,26 @@ BlobCipherMetrics::BlobCipherMetrics()
|
|||
traceFuture = traceCounters("BlobCipherMetrics", UID(), FLOW_KNOBS->ENCRYPT_KEY_CACHE_LOGGING_INTERVAL, &cc);
|
||||
}
|
||||
|
||||
std::string toString(BlobCipherMetrics::UsageType type) {
|
||||
switch (type) {
|
||||
case BlobCipherMetrics::UsageType::TLOG:
|
||||
return "TLog";
|
||||
case BlobCipherMetrics::UsageType::KV_MEMORY:
|
||||
return "KVMemory";
|
||||
case BlobCipherMetrics::UsageType::KV_REDWOOD:
|
||||
return "KVRedwood";
|
||||
case BlobCipherMetrics::UsageType::BLOB_GRANULE:
|
||||
return "BlobGranule";
|
||||
case BlobCipherMetrics::UsageType::BACKUP:
|
||||
return "Backup";
|
||||
case BlobCipherMetrics::UsageType::TEST:
|
||||
return "Test";
|
||||
default:
|
||||
ASSERT(false);
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
// BlobCipherKey class methods
|
||||
|
||||
BlobCipherKey::BlobCipherKey(const EncryptCipherDomainId& domainId,
|
||||
|
@ -636,34 +656,17 @@ Reference<EncryptBuf> EncryptBlobCipherAes265Ctr::encrypt(const uint8_t* plainte
|
|||
} else {
|
||||
|
||||
// Populate header authToken details
|
||||
if (header->flags.authTokenMode == EncryptAuthTokenMode::ENCRYPT_HEADER_AUTH_TOKEN_MODE_SINGLE) {
|
||||
ASSERT_GE(allocSize, (bytes + finalBytes));
|
||||
ASSERT_GE(encryptBuf->getLogicalSize(), (bytes + finalBytes));
|
||||
ASSERT_EQ(header->flags.authTokenMode, EncryptAuthTokenMode::ENCRYPT_HEADER_AUTH_TOKEN_MODE_SINGLE);
|
||||
ASSERT_GE(allocSize, (bytes + finalBytes));
|
||||
ASSERT_GE(encryptBuf->getLogicalSize(), (bytes + finalBytes));
|
||||
|
||||
computeAuthToken({ { ciphertext, bytes + finalBytes },
|
||||
{ reinterpret_cast<const uint8_t*>(header), sizeof(BlobCipherEncryptHeader) } },
|
||||
headerCipherKey->rawCipher(),
|
||||
AES_256_KEY_LENGTH,
|
||||
&header->singleAuthToken.authToken[0],
|
||||
(EncryptAuthTokenAlgo)header->flags.authTokenAlgo,
|
||||
AUTH_TOKEN_MAX_SIZE);
|
||||
} else {
|
||||
ASSERT_EQ(header->flags.authTokenMode, EncryptAuthTokenMode::ENCRYPT_HEADER_AUTH_TOKEN_MODE_MULTI);
|
||||
|
||||
// TOOD: Use HMAC_SHA encyrption authentication scheme as AES_CMAC needs minimum 16 bytes cipher key
|
||||
computeAuthToken({ { ciphertext, bytes + finalBytes } },
|
||||
reinterpret_cast<const uint8_t*>(&header->cipherTextDetails.salt),
|
||||
sizeof(EncryptCipherRandomSalt),
|
||||
&header->multiAuthTokens.cipherTextAuthToken[0],
|
||||
EncryptAuthTokenAlgo::ENCRYPT_HEADER_AUTH_TOKEN_ALGO_HMAC_SHA,
|
||||
AUTH_TOKEN_MAX_SIZE);
|
||||
computeAuthToken({ { reinterpret_cast<const uint8_t*>(header), sizeof(BlobCipherEncryptHeader) } },
|
||||
headerCipherKey->rawCipher(),
|
||||
AES_256_KEY_LENGTH,
|
||||
&header->multiAuthTokens.headerAuthToken[0],
|
||||
(EncryptAuthTokenAlgo)header->flags.authTokenAlgo,
|
||||
AUTH_TOKEN_MAX_SIZE);
|
||||
}
|
||||
computeAuthToken({ { ciphertext, bytes + finalBytes },
|
||||
{ reinterpret_cast<const uint8_t*>(header), sizeof(BlobCipherEncryptHeader) } },
|
||||
headerCipherKey->rawCipher(),
|
||||
AES_256_KEY_LENGTH,
|
||||
&header->singleAuthToken.authToken[0],
|
||||
(EncryptAuthTokenAlgo)header->flags.authTokenAlgo,
|
||||
AUTH_TOKEN_MAX_SIZE);
|
||||
}
|
||||
|
||||
encryptBuf->setLogicalSize(plaintextLen);
|
||||
|
@ -708,44 +711,6 @@ DecryptBlobCipherAes256Ctr::DecryptBlobCipherAes256Ctr(Reference<BlobCipherKey>
|
|||
}
|
||||
}
|
||||
|
||||
void DecryptBlobCipherAes256Ctr::verifyHeaderAuthToken(const BlobCipherEncryptHeader& header, Arena& arena) {
|
||||
if (header.flags.authTokenMode != ENCRYPT_HEADER_AUTH_TOKEN_MODE_MULTI) {
|
||||
// NoneAuthToken mode; no authToken is generated; nothing to do
|
||||
// SingleAuthToken mode; verification will happen as part of decryption.
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT_EQ(header.flags.authTokenMode, ENCRYPT_HEADER_AUTH_TOKEN_MODE_MULTI);
|
||||
ASSERT(isEncryptHeaderAuthTokenAlgoValid((EncryptAuthTokenAlgo)header.flags.authTokenAlgo));
|
||||
|
||||
BlobCipherEncryptHeader headerCopy;
|
||||
memcpy(reinterpret_cast<uint8_t*>(&headerCopy),
|
||||
reinterpret_cast<const uint8_t*>(&header),
|
||||
sizeof(BlobCipherEncryptHeader));
|
||||
memset(reinterpret_cast<uint8_t*>(&headerCopy.multiAuthTokens.headerAuthToken), 0, AUTH_TOKEN_MAX_SIZE);
|
||||
uint8_t computedHeaderAuthToken[AUTH_TOKEN_MAX_SIZE]{};
|
||||
computeAuthToken({ { reinterpret_cast<const uint8_t*>(&headerCopy), sizeof(BlobCipherEncryptHeader) } },
|
||||
headerCipherKey->rawCipher(),
|
||||
AES_256_KEY_LENGTH,
|
||||
&computedHeaderAuthToken[0],
|
||||
(EncryptAuthTokenAlgo)header.flags.authTokenAlgo,
|
||||
AUTH_TOKEN_MAX_SIZE);
|
||||
|
||||
int authTokenSize = getEncryptHeaderAuthTokenSize(header.flags.authTokenAlgo);
|
||||
ASSERT_LE(authTokenSize, AUTH_TOKEN_MAX_SIZE);
|
||||
if (memcmp(&header.multiAuthTokens.headerAuthToken[0], &computedHeaderAuthToken[0], authTokenSize) != 0) {
|
||||
TraceEvent(SevWarn, "BlobCipherVerifyEncryptBlobHeaderAuthTokenMismatch")
|
||||
.detail("HeaderVersion", header.flags.headerVersion)
|
||||
.detail("HeaderMode", header.flags.encryptMode)
|
||||
.detail("MultiAuthHeaderAuthToken",
|
||||
StringRef(arena, &header.multiAuthTokens.headerAuthToken[0], AUTH_TOKEN_MAX_SIZE).toString())
|
||||
.detail("ComputedHeaderAuthToken", StringRef(computedHeaderAuthToken, AUTH_TOKEN_MAX_SIZE));
|
||||
throw encrypt_header_authtoken_mismatch();
|
||||
}
|
||||
|
||||
headerAuthTokenValidationDone = true;
|
||||
}
|
||||
|
||||
void DecryptBlobCipherAes256Ctr::verifyHeaderSingleAuthToken(const uint8_t* ciphertext,
|
||||
const int ciphertextLen,
|
||||
const BlobCipherEncryptHeader& header,
|
||||
|
@ -759,7 +724,7 @@ void DecryptBlobCipherAes256Ctr::verifyHeaderSingleAuthToken(const uint8_t* ciph
|
|||
memcpy(reinterpret_cast<uint8_t*>(&headerCopy),
|
||||
reinterpret_cast<const uint8_t*>(&header),
|
||||
sizeof(BlobCipherEncryptHeader));
|
||||
memset(reinterpret_cast<uint8_t*>(&headerCopy.singleAuthToken), 0, 2 * AUTH_TOKEN_MAX_SIZE);
|
||||
memset(reinterpret_cast<uint8_t*>(&headerCopy.singleAuthToken), 0, AUTH_TOKEN_MAX_SIZE);
|
||||
uint8_t computed[AUTH_TOKEN_MAX_SIZE];
|
||||
computeAuthToken({ { ciphertext, ciphertextLen },
|
||||
{ reinterpret_cast<const uint8_t*>(&headerCopy), sizeof(BlobCipherEncryptHeader) } },
|
||||
|
@ -782,43 +747,12 @@ void DecryptBlobCipherAes256Ctr::verifyHeaderSingleAuthToken(const uint8_t* ciph
|
|||
}
|
||||
}
|
||||
|
||||
void DecryptBlobCipherAes256Ctr::verifyHeaderMultiAuthToken(const uint8_t* ciphertext,
|
||||
const int ciphertextLen,
|
||||
const BlobCipherEncryptHeader& header,
|
||||
Arena& arena) {
|
||||
if (!headerAuthTokenValidationDone) {
|
||||
verifyHeaderAuthToken(header, arena);
|
||||
}
|
||||
uint8_t computedCipherTextAuthToken[AUTH_TOKEN_MAX_SIZE];
|
||||
// TOOD: Use HMAC_SHA encyrption authentication scheme as AES_CMAC needs minimum 16 bytes cipher key
|
||||
computeAuthToken({ { ciphertext, ciphertextLen } },
|
||||
reinterpret_cast<const uint8_t*>(&header.cipherTextDetails.salt),
|
||||
sizeof(EncryptCipherRandomSalt),
|
||||
&computedCipherTextAuthToken[0],
|
||||
EncryptAuthTokenAlgo::ENCRYPT_HEADER_AUTH_TOKEN_ALGO_HMAC_SHA,
|
||||
AUTH_TOKEN_MAX_SIZE);
|
||||
if (memcmp(&header.multiAuthTokens.cipherTextAuthToken[0], &computedCipherTextAuthToken[0], AUTH_TOKEN_MAX_SIZE) !=
|
||||
0) {
|
||||
TraceEvent(SevWarn, "BlobCipherVerifyEncryptBlobHeaderAuthTokenMismatch")
|
||||
.detail("HeaderVersion", header.flags.headerVersion)
|
||||
.detail("HeaderMode", header.flags.encryptMode)
|
||||
.detail("MultiAuthCipherTextAuthToken",
|
||||
StringRef(arena, &header.multiAuthTokens.cipherTextAuthToken[0], AUTH_TOKEN_MAX_SIZE).toString())
|
||||
.detail("ComputedCipherTextAuthToken", StringRef(computedCipherTextAuthToken, AUTH_TOKEN_MAX_SIZE));
|
||||
throw encrypt_header_authtoken_mismatch();
|
||||
}
|
||||
}
|
||||
|
||||
void DecryptBlobCipherAes256Ctr::verifyAuthTokens(const uint8_t* ciphertext,
|
||||
const int ciphertextLen,
|
||||
const BlobCipherEncryptHeader& header,
|
||||
Arena& arena) {
|
||||
if (header.flags.authTokenMode == EncryptAuthTokenMode::ENCRYPT_HEADER_AUTH_TOKEN_MODE_SINGLE) {
|
||||
verifyHeaderSingleAuthToken(ciphertext, ciphertextLen, header, arena);
|
||||
} else {
|
||||
ASSERT_EQ(header.flags.authTokenMode, ENCRYPT_HEADER_AUTH_TOKEN_MODE_MULTI);
|
||||
verifyHeaderMultiAuthToken(ciphertext, ciphertextLen, header, arena);
|
||||
}
|
||||
ASSERT_EQ(header.flags.authTokenMode, EncryptAuthTokenMode::ENCRYPT_HEADER_AUTH_TOKEN_MODE_SINGLE);
|
||||
verifyHeaderSingleAuthToken(ciphertext, ciphertextLen, header, arena);
|
||||
|
||||
authTokensValidationDone = true;
|
||||
}
|
||||
|
@ -1504,266 +1438,6 @@ TEST_CASE("flow/BlobCipher") {
|
|||
TraceEvent("SingleAuthModeAesCmacDone");
|
||||
}
|
||||
|
||||
// validate basic encrypt followed by decrypt operation for AUTH_TOKEN_MODE_MULTI
|
||||
// HMAC_SHA authToken algorithm
|
||||
{
|
||||
TraceEvent("MultiAuthModeHmacShaStart").log();
|
||||
|
||||
EncryptBlobCipherAes265Ctr encryptor(cipherKey,
|
||||
headerCipherKey,
|
||||
iv,
|
||||
AES_256_IV_LENGTH,
|
||||
EncryptAuthTokenMode::ENCRYPT_HEADER_AUTH_TOKEN_MODE_MULTI,
|
||||
EncryptAuthTokenAlgo::ENCRYPT_HEADER_AUTH_TOKEN_ALGO_HMAC_SHA,
|
||||
BlobCipherMetrics::TEST);
|
||||
BlobCipherEncryptHeader header;
|
||||
Reference<EncryptBuf> encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
|
||||
|
||||
ASSERT_EQ(encrypted->getLogicalSize(), bufLen);
|
||||
ASSERT_NE(memcmp(&orgData[0], encrypted->begin(), bufLen), 0);
|
||||
ASSERT_EQ(header.flags.headerVersion, EncryptBlobCipherAes265Ctr::ENCRYPT_HEADER_VERSION);
|
||||
ASSERT_EQ(header.flags.encryptMode, ENCRYPT_CIPHER_MODE_AES_256_CTR);
|
||||
ASSERT_EQ(header.flags.authTokenMode, ENCRYPT_HEADER_AUTH_TOKEN_MODE_MULTI);
|
||||
ASSERT_EQ(header.flags.authTokenAlgo, EncryptAuthTokenAlgo::ENCRYPT_HEADER_AUTH_TOKEN_ALGO_HMAC_SHA);
|
||||
|
||||
TraceEvent("BlobCipherTestEncryptDone")
|
||||
.detail("HeaderVersion", header.flags.headerVersion)
|
||||
.detail("HeaderEncryptMode", header.flags.encryptMode)
|
||||
.detail("HeaderEncryptAuthTokenMode", header.flags.authTokenMode)
|
||||
.detail("HeaderEncryptAuthTokenAlgo", header.flags.authTokenAlgo)
|
||||
.detail("DomainId", header.cipherTextDetails.encryptDomainId)
|
||||
.detail("BaseCipherId", header.cipherTextDetails.baseCipherId)
|
||||
.detail("HeaderAuthToken",
|
||||
StringRef(arena, &header.singleAuthToken.authToken[0], AUTH_TOKEN_HMAC_SHA_SIZE).toString());
|
||||
|
||||
Reference<BlobCipherKey> tCipherKey = cipherKeyCache->getCipherKey(header.cipherTextDetails.encryptDomainId,
|
||||
header.cipherTextDetails.baseCipherId,
|
||||
header.cipherTextDetails.salt);
|
||||
Reference<BlobCipherKey> hCipherKey = cipherKeyCache->getCipherKey(header.cipherHeaderDetails.encryptDomainId,
|
||||
header.cipherHeaderDetails.baseCipherId,
|
||||
header.cipherHeaderDetails.salt);
|
||||
|
||||
ASSERT(tCipherKey->isEqual(cipherKey));
|
||||
DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv, BlobCipherMetrics::TEST);
|
||||
Reference<EncryptBuf> decrypted = decryptor.decrypt(encrypted->begin(), bufLen, header, arena);
|
||||
|
||||
ASSERT_EQ(decrypted->getLogicalSize(), bufLen);
|
||||
ASSERT_EQ(memcmp(decrypted->begin(), &orgData[0], bufLen), 0);
|
||||
|
||||
TraceEvent("BlobCipherTestDecryptDone").log();
|
||||
|
||||
// induce encryption header corruption - headerVersion corrupted
|
||||
encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
|
||||
memcpy(reinterpret_cast<uint8_t*>(&headerCopy),
|
||||
reinterpret_cast<const uint8_t*>(&header),
|
||||
sizeof(BlobCipherEncryptHeader));
|
||||
headerCopy.flags.headerVersion += 1;
|
||||
try {
|
||||
DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv, BlobCipherMetrics::TEST);
|
||||
decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena);
|
||||
ASSERT(false); // error expected
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_encrypt_header_metadata_mismatch) {
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
// induce encryption header corruption - encryptionMode corrupted
|
||||
encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
|
||||
memcpy(reinterpret_cast<uint8_t*>(&headerCopy),
|
||||
reinterpret_cast<const uint8_t*>(&header),
|
||||
sizeof(BlobCipherEncryptHeader));
|
||||
headerCopy.flags.encryptMode += 1;
|
||||
try {
|
||||
DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv, BlobCipherMetrics::TEST);
|
||||
decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena);
|
||||
ASSERT(false); // error expected
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_encrypt_header_metadata_mismatch) {
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
// induce encryption header corruption - cipherText authToken mismatch
|
||||
encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
|
||||
memcpy(reinterpret_cast<uint8_t*>(&headerCopy),
|
||||
reinterpret_cast<const uint8_t*>(&header),
|
||||
sizeof(BlobCipherEncryptHeader));
|
||||
int hIdx = deterministicRandom()->randomInt(0, AUTH_TOKEN_HMAC_SHA_SIZE - 1);
|
||||
headerCopy.multiAuthTokens.cipherTextAuthToken[hIdx] += 1;
|
||||
try {
|
||||
DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv, BlobCipherMetrics::TEST);
|
||||
decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena);
|
||||
ASSERT(false); // error expected
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_encrypt_header_authtoken_mismatch) {
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
// induce encryption header corruption - header authToken mismatch
|
||||
encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
|
||||
memcpy(reinterpret_cast<uint8_t*>(&headerCopy),
|
||||
reinterpret_cast<const uint8_t*>(&header),
|
||||
sizeof(BlobCipherEncryptHeader));
|
||||
hIdx = deterministicRandom()->randomInt(0, AUTH_TOKEN_HMAC_SHA_SIZE - 1);
|
||||
headerCopy.multiAuthTokens.headerAuthToken[hIdx] += 1;
|
||||
try {
|
||||
DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv, BlobCipherMetrics::TEST);
|
||||
decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena);
|
||||
ASSERT(false); // error expected
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_encrypt_header_authtoken_mismatch) {
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
|
||||
uint8_t temp[bufLen];
|
||||
memcpy(encrypted->begin(), &temp[0], bufLen);
|
||||
int tIdx = deterministicRandom()->randomInt(0, bufLen - 1);
|
||||
temp[tIdx] += 1;
|
||||
DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv, BlobCipherMetrics::TEST);
|
||||
decrypted = decryptor.decrypt(&temp[0], bufLen, header, arena);
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_encrypt_header_authtoken_mismatch) {
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
TraceEvent("MultiAuthModeHmacShaDone");
|
||||
}
|
||||
// AES_CMAC authToken algorithm
|
||||
{
|
||||
TraceEvent("MultiAuthModeAesCmacStart");
|
||||
|
||||
EncryptBlobCipherAes265Ctr encryptor(cipherKey,
|
||||
headerCipherKey,
|
||||
iv,
|
||||
AES_256_IV_LENGTH,
|
||||
EncryptAuthTokenMode::ENCRYPT_HEADER_AUTH_TOKEN_MODE_MULTI,
|
||||
EncryptAuthTokenAlgo::ENCRYPT_HEADER_AUTH_TOKEN_ALGO_AES_CMAC,
|
||||
BlobCipherMetrics::TEST);
|
||||
BlobCipherEncryptHeader header;
|
||||
Reference<EncryptBuf> encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
|
||||
|
||||
ASSERT_EQ(encrypted->getLogicalSize(), bufLen);
|
||||
ASSERT_NE(memcmp(&orgData[0], encrypted->begin(), bufLen), 0);
|
||||
ASSERT_EQ(header.flags.headerVersion, EncryptBlobCipherAes265Ctr::ENCRYPT_HEADER_VERSION);
|
||||
ASSERT_EQ(header.flags.encryptMode, ENCRYPT_CIPHER_MODE_AES_256_CTR);
|
||||
ASSERT_EQ(header.flags.authTokenMode, ENCRYPT_HEADER_AUTH_TOKEN_MODE_MULTI);
|
||||
ASSERT_EQ(header.flags.authTokenAlgo, EncryptAuthTokenAlgo::ENCRYPT_HEADER_AUTH_TOKEN_ALGO_AES_CMAC);
|
||||
|
||||
TraceEvent("BlobCipherTestEncryptDone")
|
||||
.detail("HeaderVersion", header.flags.headerVersion)
|
||||
.detail("HeaderEncryptMode", header.flags.encryptMode)
|
||||
.detail("HeaderEncryptAuthTokenMode", header.flags.authTokenMode)
|
||||
.detail("HeaderEncryptAuthTokenAlgo", header.flags.authTokenAlgo)
|
||||
.detail("DomainId", header.cipherTextDetails.encryptDomainId)
|
||||
.detail("BaseCipherId", header.cipherTextDetails.baseCipherId)
|
||||
.detail("HeaderAuthToken",
|
||||
StringRef(arena, &header.singleAuthToken.authToken[0], AUTH_TOKEN_AES_CMAC_SIZE).toString());
|
||||
|
||||
Reference<BlobCipherKey> tCipherKey = cipherKeyCache->getCipherKey(header.cipherTextDetails.encryptDomainId,
|
||||
header.cipherTextDetails.baseCipherId,
|
||||
header.cipherTextDetails.salt);
|
||||
Reference<BlobCipherKey> hCipherKey = cipherKeyCache->getCipherKey(header.cipherHeaderDetails.encryptDomainId,
|
||||
header.cipherHeaderDetails.baseCipherId,
|
||||
header.cipherHeaderDetails.salt);
|
||||
|
||||
ASSERT(tCipherKey->isEqual(cipherKey));
|
||||
DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv, BlobCipherMetrics::TEST);
|
||||
Reference<EncryptBuf> decrypted = decryptor.decrypt(encrypted->begin(), bufLen, header, arena);
|
||||
|
||||
ASSERT_EQ(decrypted->getLogicalSize(), bufLen);
|
||||
ASSERT_EQ(memcmp(decrypted->begin(), &orgData[0], bufLen), 0);
|
||||
|
||||
TraceEvent("BlobCipherTestDecryptDone").log();
|
||||
|
||||
// induce encryption header corruption - headerVersion corrupted
|
||||
encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
|
||||
memcpy(reinterpret_cast<uint8_t*>(&headerCopy),
|
||||
reinterpret_cast<const uint8_t*>(&header),
|
||||
sizeof(BlobCipherEncryptHeader));
|
||||
headerCopy.flags.headerVersion += 1;
|
||||
try {
|
||||
DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv, BlobCipherMetrics::TEST);
|
||||
decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena);
|
||||
ASSERT(false); // error expected
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_encrypt_header_metadata_mismatch) {
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
// induce encryption header corruption - encryptionMode corrupted
|
||||
encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
|
||||
memcpy(reinterpret_cast<uint8_t*>(&headerCopy),
|
||||
reinterpret_cast<const uint8_t*>(&header),
|
||||
sizeof(BlobCipherEncryptHeader));
|
||||
headerCopy.flags.encryptMode += 1;
|
||||
try {
|
||||
DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv, BlobCipherMetrics::TEST);
|
||||
decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena);
|
||||
ASSERT(false); // error expected
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_encrypt_header_metadata_mismatch) {
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
// induce encryption header corruption - cipherText authToken mismatch
|
||||
encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
|
||||
memcpy(reinterpret_cast<uint8_t*>(&headerCopy),
|
||||
reinterpret_cast<const uint8_t*>(&header),
|
||||
sizeof(BlobCipherEncryptHeader));
|
||||
int hIdx = deterministicRandom()->randomInt(0, AUTH_TOKEN_AES_CMAC_SIZE - 1);
|
||||
headerCopy.multiAuthTokens.cipherTextAuthToken[hIdx] += 1;
|
||||
try {
|
||||
DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv, BlobCipherMetrics::TEST);
|
||||
decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena);
|
||||
ASSERT(false); // error expected
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_encrypt_header_authtoken_mismatch) {
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
// induce encryption header corruption - header authToken mismatch
|
||||
encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
|
||||
memcpy(reinterpret_cast<uint8_t*>(&headerCopy),
|
||||
reinterpret_cast<const uint8_t*>(&header),
|
||||
sizeof(BlobCipherEncryptHeader));
|
||||
hIdx = deterministicRandom()->randomInt(0, AUTH_TOKEN_AES_CMAC_SIZE - 1);
|
||||
headerCopy.multiAuthTokens.headerAuthToken[hIdx] += 1;
|
||||
try {
|
||||
DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv, BlobCipherMetrics::TEST);
|
||||
decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena);
|
||||
ASSERT(false); // error expected
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_encrypt_header_authtoken_mismatch) {
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
|
||||
uint8_t temp[bufLen];
|
||||
memcpy(encrypted->begin(), &temp[0], bufLen);
|
||||
int tIdx = deterministicRandom()->randomInt(0, bufLen - 1);
|
||||
temp[tIdx] += 1;
|
||||
DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv, BlobCipherMetrics::TEST);
|
||||
decrypted = decryptor.decrypt(&temp[0], bufLen, header, arena);
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_encrypt_header_authtoken_mismatch) {
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
TraceEvent("MultiAuthModeAesCmacDone");
|
||||
}
|
||||
|
||||
// Validate dropping encryptDomainId cached keys
|
||||
const EncryptCipherDomainId candidate = deterministicRandom()->randomInt(minDomainId, maxDomainId);
|
||||
cipherKeyCache->resetEncryptDomainId(candidate);
|
||||
|
|
|
@ -33,6 +33,12 @@ public:
|
|||
|
||||
SingleBlobConnectionProvider(std::string url) { conn = BackupContainerFileSystem::openContainerFS(url, {}, {}); }
|
||||
|
||||
bool needsRefresh() const { return false; }
|
||||
|
||||
bool isExpired() const { return false; }
|
||||
|
||||
void update(Standalone<BlobMetadataDetailsRef> newBlobMetadata) { ASSERT(false); }
|
||||
|
||||
private:
|
||||
Reference<BackupContainerFileSystem> conn;
|
||||
};
|
||||
|
@ -44,18 +50,42 @@ struct PartitionedBlobConnectionProvider : BlobConnectionProvider {
|
|||
return std::pair(conn, metadata.partitions[writePartition].toString() + newFileName);
|
||||
}
|
||||
|
||||
Reference<BackupContainerFileSystem> getForRead(std::string filePath) { return conn; }
|
||||
Reference<BackupContainerFileSystem> getForRead(std::string filePath) {
|
||||
CODE_PROBE(isExpired(), "partitioned blob connection using expired blob metadata for read!");
|
||||
return conn;
|
||||
}
|
||||
|
||||
PartitionedBlobConnectionProvider(const Standalone<BlobMetadataDetailsRef> metadata) : metadata(metadata) {
|
||||
ASSERT(metadata.base.present());
|
||||
ASSERT(metadata.partitions.size() >= 2);
|
||||
conn = BackupContainerFileSystem::openContainerFS(metadata.base.get().toString(), {}, {});
|
||||
for (auto& it : metadata.partitions) {
|
||||
void updateMetadata(const Standalone<BlobMetadataDetailsRef>& newMetadata, bool checkPrevious) {
|
||||
ASSERT(newMetadata.base.present());
|
||||
ASSERT(newMetadata.partitions.size() >= 2);
|
||||
for (auto& it : newMetadata.partitions) {
|
||||
// these should be suffixes, not whole blob urls
|
||||
ASSERT(it.toString().find("://") == std::string::npos);
|
||||
}
|
||||
if (checkPrevious) {
|
||||
if (newMetadata.expireAt <= metadata.expireAt) {
|
||||
return;
|
||||
}
|
||||
// FIXME: validate only the credentials changed and the location is the same
|
||||
ASSERT(newMetadata.partitions.size() == metadata.partitions.size());
|
||||
for (int i = 0; i < newMetadata.partitions.size(); i++) {
|
||||
ASSERT(newMetadata.partitions[i] == metadata.partitions[i]);
|
||||
}
|
||||
}
|
||||
metadata = newMetadata;
|
||||
conn = BackupContainerFileSystem::openContainerFS(metadata.base.get().toString(), {}, {});
|
||||
}
|
||||
|
||||
PartitionedBlobConnectionProvider(const Standalone<BlobMetadataDetailsRef> metadata) {
|
||||
updateMetadata(metadata, false);
|
||||
}
|
||||
|
||||
bool needsRefresh() const { return now() >= metadata.refreshAt; }
|
||||
|
||||
bool isExpired() const { return now() >= metadata.expireAt; }
|
||||
|
||||
void update(Standalone<BlobMetadataDetailsRef> newBlobMetadata) { updateMetadata(newBlobMetadata, true); }
|
||||
|
||||
private:
|
||||
Standalone<BlobMetadataDetailsRef> metadata;
|
||||
Reference<BackupContainerFileSystem> conn;
|
||||
|
@ -72,6 +102,7 @@ struct StorageLocationBlobConnectionProvider : BlobConnectionProvider {
|
|||
}
|
||||
|
||||
Reference<BackupContainerFileSystem> getForRead(std::string filePath) {
|
||||
CODE_PROBE(isExpired(), "storage location blob connection using expired blob metadata for read!");
|
||||
size_t slash = filePath.find("/");
|
||||
ASSERT(slash != std::string::npos);
|
||||
int partition = stoi(filePath.substr(0, slash));
|
||||
|
@ -80,9 +111,18 @@ struct StorageLocationBlobConnectionProvider : BlobConnectionProvider {
|
|||
return partitions[partition];
|
||||
}
|
||||
|
||||
StorageLocationBlobConnectionProvider(const Standalone<BlobMetadataDetailsRef> metadata) {
|
||||
ASSERT(!metadata.base.present());
|
||||
ASSERT(metadata.partitions.size() >= 2);
|
||||
void updateMetadata(const Standalone<BlobMetadataDetailsRef>& newMetadata, bool checkPrevious) {
|
||||
ASSERT(!newMetadata.base.present());
|
||||
ASSERT(newMetadata.partitions.size() >= 2);
|
||||
if (checkPrevious) {
|
||||
// FIXME: validate only the credentials changed and the locations are the same
|
||||
ASSERT(newMetadata.partitions.size() == partitions.size());
|
||||
if (newMetadata.expireAt <= metadata.expireAt) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
metadata = newMetadata;
|
||||
partitions.clear();
|
||||
for (auto& it : metadata.partitions) {
|
||||
// these should be whole blob urls
|
||||
ASSERT(it.toString().find("://") != std::string::npos);
|
||||
|
@ -90,7 +130,18 @@ struct StorageLocationBlobConnectionProvider : BlobConnectionProvider {
|
|||
}
|
||||
}
|
||||
|
||||
StorageLocationBlobConnectionProvider(const Standalone<BlobMetadataDetailsRef> metadata) {
|
||||
updateMetadata(metadata, false);
|
||||
}
|
||||
|
||||
bool needsRefresh() const { return now() >= metadata.refreshAt; }
|
||||
|
||||
bool isExpired() const { return now() >= metadata.expireAt; }
|
||||
|
||||
void update(Standalone<BlobMetadataDetailsRef> newBlobMetadata) { updateMetadata(newBlobMetadata, true); }
|
||||
|
||||
private:
|
||||
Standalone<BlobMetadataDetailsRef> metadata;
|
||||
std::vector<Reference<BackupContainerFileSystem>> partitions;
|
||||
};
|
||||
|
||||
|
|
|
@ -1324,7 +1324,8 @@ typedef std::priority_queue<MergeStreamNext, std::vector<MergeStreamNext>, Order
|
|||
|
||||
static RangeResult mergeDeltaStreams(const BlobGranuleChunkRef& chunk,
|
||||
const std::vector<Standalone<VectorRef<ParsedDeltaBoundaryRef>>>& streams,
|
||||
const std::vector<bool> startClears) {
|
||||
const std::vector<bool> startClears,
|
||||
GranuleMaterializeStats& stats) {
|
||||
ASSERT(streams.size() < std::numeric_limits<int16_t>::max());
|
||||
ASSERT(startClears.size() == streams.size());
|
||||
|
||||
|
@ -1357,6 +1358,10 @@ static RangeResult mergeDeltaStreams(const BlobGranuleChunkRef& chunk,
|
|||
}
|
||||
}
|
||||
|
||||
if (chunk.snapshotFile.present()) {
|
||||
stats.snapshotRows += streams[0].size();
|
||||
}
|
||||
|
||||
RangeResult result;
|
||||
std::vector<MergeStreamNext> cur;
|
||||
cur.reserve(streams.size());
|
||||
|
@ -1373,6 +1378,7 @@ static RangeResult mergeDeltaStreams(const BlobGranuleChunkRef& chunk,
|
|||
|
||||
// un-set clears and find latest value for key (if present)
|
||||
bool foundValue = false;
|
||||
bool includesSnapshot = cur.back().streamIdx == 0 && chunk.snapshotFile.present();
|
||||
for (auto& it : cur) {
|
||||
auto& v = streams[it.streamIdx][it.dataIdx];
|
||||
if (clearActive[it.streamIdx]) {
|
||||
|
@ -1392,6 +1398,13 @@ static RangeResult mergeDeltaStreams(const BlobGranuleChunkRef& chunk,
|
|||
KeyRef finalKey =
|
||||
chunk.tenantPrefix.present() ? v.key.removePrefix(chunk.tenantPrefix.get()) : v.key;
|
||||
result.push_back_deep(result.arena(), KeyValueRef(finalKey, v.value));
|
||||
if (!includesSnapshot) {
|
||||
stats.rowsInserted++;
|
||||
} else if (it.streamIdx > 0) {
|
||||
stats.rowsUpdated++;
|
||||
}
|
||||
} else if (includesSnapshot) {
|
||||
stats.rowsCleared++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1413,6 +1426,8 @@ static RangeResult mergeDeltaStreams(const BlobGranuleChunkRef& chunk,
|
|||
}
|
||||
}
|
||||
|
||||
stats.outputBytes += result.expectedSize();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -1421,7 +1436,8 @@ RangeResult materializeBlobGranule(const BlobGranuleChunkRef& chunk,
|
|||
Version beginVersion,
|
||||
Version readVersion,
|
||||
Optional<StringRef> snapshotData,
|
||||
StringRef deltaFileData[]) {
|
||||
StringRef deltaFileData[],
|
||||
GranuleMaterializeStats& stats) {
|
||||
// TODO REMOVE with early replying
|
||||
ASSERT(readVersion == chunk.includedVersion);
|
||||
|
||||
|
@ -1444,6 +1460,7 @@ RangeResult materializeBlobGranule(const BlobGranuleChunkRef& chunk,
|
|||
streams.reserve(chunk.deltaFiles.size() + 2);
|
||||
|
||||
if (snapshotData.present()) {
|
||||
stats.inputBytes += snapshotData.get().size();
|
||||
ASSERT(chunk.snapshotFile.present());
|
||||
Standalone<VectorRef<ParsedDeltaBoundaryRef>> snapshotRows =
|
||||
loadSnapshotFile(chunk.snapshotFile.get().filename,
|
||||
|
@ -1461,6 +1478,7 @@ RangeResult materializeBlobGranule(const BlobGranuleChunkRef& chunk,
|
|||
fmt::print("Applying {} delta files\n", chunk.deltaFiles.size());
|
||||
}
|
||||
for (int deltaIdx = 0; deltaIdx < chunk.deltaFiles.size(); deltaIdx++) {
|
||||
stats.inputBytes += deltaFileData[deltaIdx].size();
|
||||
bool startClear = false;
|
||||
auto deltaRows = loadChunkedDeltaFile(chunk.deltaFiles[deltaIdx].filename,
|
||||
deltaFileData[deltaIdx],
|
||||
|
@ -1480,6 +1498,7 @@ RangeResult materializeBlobGranule(const BlobGranuleChunkRef& chunk,
|
|||
fmt::print("Applying {} memory deltas\n", chunk.newDeltas.size());
|
||||
}
|
||||
if (!chunk.newDeltas.empty()) {
|
||||
stats.inputBytes += chunk.newDeltas.expectedSize();
|
||||
// TODO REMOVE validation
|
||||
ASSERT(beginVersion <= chunk.newDeltas.front().version);
|
||||
ASSERT(readVersion >= chunk.newDeltas.back().version);
|
||||
|
@ -1491,7 +1510,7 @@ RangeResult materializeBlobGranule(const BlobGranuleChunkRef& chunk,
|
|||
}
|
||||
}
|
||||
|
||||
return mergeDeltaStreams(chunk, streams, startClears);
|
||||
return mergeDeltaStreams(chunk, streams, startClears, stats);
|
||||
}
|
||||
|
||||
struct GranuleLoadFreeHandle : NonCopyable, ReferenceCounted<GranuleLoadFreeHandle> {
|
||||
|
@ -1560,8 +1579,6 @@ ErrorOr<RangeResult> loadAndMaterializeBlobGranules(const Standalone<VectorRef<B
|
|||
}
|
||||
|
||||
GranuleLoadIds loadIds[files.size()];
|
||||
int64_t inputBytes = 0;
|
||||
int64_t outputBytes = 0;
|
||||
|
||||
try {
|
||||
// Kick off first file reads if parallelism > 1
|
||||
|
@ -1586,7 +1603,6 @@ ErrorOr<RangeResult> loadAndMaterializeBlobGranules(const Standalone<VectorRef<B
|
|||
if (!snapshotData.get().begin()) {
|
||||
return ErrorOr<RangeResult>(blob_granule_file_load_error());
|
||||
}
|
||||
inputBytes += snapshotData.get().size();
|
||||
}
|
||||
|
||||
// +1 to avoid UBSAN variable length array of size zero
|
||||
|
@ -1599,16 +1615,11 @@ ErrorOr<RangeResult> loadAndMaterializeBlobGranules(const Standalone<VectorRef<B
|
|||
if (!deltaData[i].begin()) {
|
||||
return ErrorOr<RangeResult>(blob_granule_file_load_error());
|
||||
}
|
||||
inputBytes += deltaData[i].size();
|
||||
}
|
||||
|
||||
inputBytes += files[chunkIdx].newDeltas.expectedSize();
|
||||
|
||||
// materialize rows from chunk
|
||||
chunkRows =
|
||||
materializeBlobGranule(files[chunkIdx], keyRange, beginVersion, readVersion, snapshotData, deltaData);
|
||||
|
||||
outputBytes += chunkRows.expectedSize();
|
||||
chunkRows = materializeBlobGranule(
|
||||
files[chunkIdx], keyRange, beginVersion, readVersion, snapshotData, deltaData, stats);
|
||||
|
||||
results.arena().dependsOn(chunkRows.arena());
|
||||
results.append(results.arena(), chunkRows.begin(), chunkRows.size());
|
||||
|
@ -1616,8 +1627,6 @@ ErrorOr<RangeResult> loadAndMaterializeBlobGranules(const Standalone<VectorRef<B
|
|||
// free once done by forcing FreeHandles to trigger
|
||||
loadIds[chunkIdx].freeHandles.clear();
|
||||
}
|
||||
stats.inputBytes = inputBytes;
|
||||
stats.outputBytes = outputBytes;
|
||||
return ErrorOr<RangeResult>(results);
|
||||
} catch (Error& e) {
|
||||
return ErrorOr<RangeResult>(e);
|
||||
|
@ -2303,6 +2312,7 @@ void checkDeltaRead(const KeyValueGen& kvGen,
|
|||
// expected answer
|
||||
std::map<KeyRef, ValueRef> expectedData;
|
||||
Version lastFileEndVersion = 0;
|
||||
GranuleMaterializeStats stats;
|
||||
|
||||
fmt::print("Delta Read [{0} - {1}) @ {2} - {3}\n",
|
||||
range.begin.printable(),
|
||||
|
@ -2322,7 +2332,7 @@ void checkDeltaRead(const KeyValueGen& kvGen,
|
|||
chunk.includedVersion = readVersion;
|
||||
chunk.snapshotVersion = invalidVersion;
|
||||
|
||||
RangeResult actualData = materializeBlobGranule(chunk, range, beginVersion, readVersion, {}, serialized);
|
||||
RangeResult actualData = materializeBlobGranule(chunk, range, beginVersion, readVersion, {}, serialized, stats);
|
||||
|
||||
if (expectedData.size() != actualData.size()) {
|
||||
fmt::print("Expected Data {0}:\n", expectedData.size());
|
||||
|
@ -2430,6 +2440,7 @@ void checkGranuleRead(const KeyValueGen& kvGen,
|
|||
}
|
||||
Version lastFileEndVersion = 0;
|
||||
applyDeltasByVersion(deltaData, range, beginVersion, readVersion, lastFileEndVersion, expectedData);
|
||||
GranuleMaterializeStats stats;
|
||||
|
||||
// actual answer
|
||||
Standalone<BlobGranuleChunkRef> chunk;
|
||||
|
@ -2477,7 +2488,8 @@ void checkGranuleRead(const KeyValueGen& kvGen,
|
|||
if (beginVersion == 0) {
|
||||
snapshotPtr = serializedSnapshot;
|
||||
}
|
||||
RangeResult actualData = materializeBlobGranule(chunk, range, beginVersion, readVersion, snapshotPtr, deltaPtrs);
|
||||
RangeResult actualData =
|
||||
materializeBlobGranule(chunk, range, beginVersion, readVersion, snapshotPtr, deltaPtrs, stats);
|
||||
|
||||
if (expectedData.size() != actualData.size()) {
|
||||
fmt::print("Expected Size {0} != Actual Size {1}\n", expectedData.size(), actualData.size());
|
||||
|
@ -2822,6 +2834,7 @@ std::pair<int64_t, double> doReadBench(const FileSet& fileSet,
|
|||
Version readVersion = std::get<1>(fileSet.deltaFiles.back());
|
||||
|
||||
Standalone<BlobGranuleChunkRef> chunk;
|
||||
GranuleMaterializeStats stats;
|
||||
StringRef deltaPtrs[fileSet.deltaFiles.size()];
|
||||
|
||||
MutationRef clearAllAtEndMutation;
|
||||
|
@ -2875,14 +2888,25 @@ std::pair<int64_t, double> doReadBench(const FileSet& fileSet,
|
|||
}
|
||||
serializedBytes += actualData.expectedSize();
|
||||
} else {
|
||||
RangeResult actualData =
|
||||
materializeBlobGranule(chunk, readRange, 0, readVersion, std::get<2>(fileSet.snapshotFile), deltaPtrs);
|
||||
RangeResult actualData = materializeBlobGranule(
|
||||
chunk, readRange, 0, readVersion, std::get<2>(fileSet.snapshotFile), deltaPtrs, stats);
|
||||
serializedBytes += actualData.expectedSize();
|
||||
}
|
||||
}
|
||||
elapsed += timer_monotonic();
|
||||
elapsed /= READ_RUNS;
|
||||
serializedBytes /= READ_RUNS;
|
||||
|
||||
// TODO REMOVE
|
||||
fmt::print("Materialize stats:\n");
|
||||
fmt::print(" Input bytes: {0}\n", stats.inputBytes);
|
||||
fmt::print(" Output bytes: {0}\n", stats.outputBytes);
|
||||
fmt::print(" Write Amp: {0}\n", (1.0 * stats.inputBytes) / stats.outputBytes);
|
||||
fmt::print(" Snapshot Rows: {0}\n", stats.snapshotRows);
|
||||
fmt::print(" Rows Cleared: {0}\n", stats.rowsCleared);
|
||||
fmt::print(" Rows Inserted: {0}\n", stats.rowsInserted);
|
||||
fmt::print(" Rows Updated: {0}\n", stats.rowsUpdated);
|
||||
|
||||
return { serializedBytes, elapsed };
|
||||
}
|
||||
|
||||
|
|
|
@ -105,7 +105,9 @@ ACTOR Future<RangeResult> readBlobGranule(BlobGranuleChunkRef chunk,
|
|||
arena.dependsOn(data.arena());
|
||||
}
|
||||
|
||||
return materializeBlobGranule(chunk, keyRange, beginVersion, readVersion, snapshotData, deltaData);
|
||||
// TODO do something useful with stats?
|
||||
GranuleMaterializeStats stats;
|
||||
return materializeBlobGranule(chunk, keyRange, beginVersion, readVersion, snapshotData, deltaData, stats);
|
||||
|
||||
} catch (Error& e) {
|
||||
throw e;
|
||||
|
|
|
@ -198,6 +198,7 @@ void ClientKnobs::initialize(Randomize randomize) {
|
|||
init( DEFAULT_AUTO_LOGS, 3 );
|
||||
init( DEFAULT_COMMIT_GRV_PROXIES_RATIO, 3 );
|
||||
init( DEFAULT_MAX_GRV_PROXIES, 4 );
|
||||
init( DELETE_NATIVE_LIB_AFTER_LOADING, true ); // if false, don't delete libfdb_c in tmp directory on client connect.
|
||||
|
||||
init( GLOBAL_CONFIG_REFRESH_BACKOFF, 0.5 );
|
||||
init( GLOBAL_CONFIG_REFRESH_MAX_BACKOFF, 60.0 );
|
||||
|
|
|
@ -44,19 +44,20 @@ ConfigKey ConfigKeyRef::decodeKey(KeyRef const& key) {
|
|||
}
|
||||
|
||||
Value KnobValueRef::ToValueFunc::operator()(int v) const {
|
||||
return BinaryWriter::toValue(v, Unversioned());
|
||||
// return BinaryWriter::toValue(v, Unversioned());
|
||||
return Tuple::makeTuple(v).pack();
|
||||
}
|
||||
Value KnobValueRef::ToValueFunc::operator()(int64_t v) const {
|
||||
return BinaryWriter::toValue(v, Unversioned());
|
||||
return Tuple::makeTuple(v).pack();
|
||||
}
|
||||
Value KnobValueRef::ToValueFunc::operator()(bool v) const {
|
||||
return BinaryWriter::toValue(v, Unversioned());
|
||||
return Tuple::makeTuple(v).pack();
|
||||
}
|
||||
Value KnobValueRef::ToValueFunc::operator()(ValueRef v) const {
|
||||
return v;
|
||||
return Tuple::makeTuple(v).pack();
|
||||
}
|
||||
Value KnobValueRef::ToValueFunc::operator()(double v) const {
|
||||
return BinaryWriter::toValue(v, Unversioned());
|
||||
return Tuple::makeTuple(v).pack();
|
||||
}
|
||||
|
||||
KnobValue KnobValueRef::CreatorFunc::operator()(NoKnobFound) const {
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "fdbclient/BackupContainer.h"
|
||||
#include "fdbclient/BlobCipher.h"
|
||||
#include "fdbclient/DatabaseContext.h"
|
||||
#include "fdbclient/FDBTypes.h"
|
||||
#include "fdbclient/GetEncryptCipherKeys.actor.h"
|
||||
#include "fdbclient/JsonBuilder.h"
|
||||
#include "fdbclient/KeyBackedTypes.h"
|
||||
|
@ -649,10 +650,8 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
|
|||
return Void();
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> updateEncryptionKeysCtx(EncryptedRangeFileWriter* self,
|
||||
KeyRef key,
|
||||
Reference<TenantEntryCache<Void>> cache) {
|
||||
state std::pair<int64_t, TenantName> curTenantInfo = wait(getEncryptionDomainDetails(key, cache));
|
||||
ACTOR static Future<Void> updateEncryptionKeysCtx(EncryptedRangeFileWriter* self, KeyRef key) {
|
||||
state std::pair<int64_t, TenantName> curTenantInfo = wait(getEncryptionDomainDetails(key, self));
|
||||
state Reference<AsyncVar<ClientDBInfo> const> dbInfo = self->cx->clientInfo;
|
||||
|
||||
// Get text and header cipher key
|
||||
|
@ -694,13 +693,12 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
|
|||
|
||||
static bool isSystemKey(KeyRef key) { return key.size() && key[0] == systemKeys.begin[0]; }
|
||||
|
||||
ACTOR static Future<std::pair<int64_t, TenantName>> getEncryptionDomainDetailsImpl(
|
||||
KeyRef key,
|
||||
Reference<TenantEntryCache<Void>> tenantCache) {
|
||||
ACTOR static Future<std::pair<int64_t, TenantName>>
|
||||
getEncryptionDomainDetailsImpl(KeyRef key, Reference<TenantEntryCache<Void>> tenantCache, bool useTenantCache) {
|
||||
if (isSystemKey(key)) {
|
||||
return std::make_pair(SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID, FDB_SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_NAME);
|
||||
}
|
||||
if (key.size() < TENANT_PREFIX_SIZE) {
|
||||
if (key.size() < TENANT_PREFIX_SIZE || !useTenantCache) {
|
||||
return std::make_pair(FDB_DEFAULT_ENCRYPT_DOMAIN_ID, FDB_DEFAULT_ENCRYPT_DOMAIN_NAME);
|
||||
}
|
||||
KeyRef tenantPrefix = KeyRef(key.begin(), TENANT_PREFIX_SIZE);
|
||||
|
@ -712,10 +710,21 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
|
|||
return std::make_pair(FDB_DEFAULT_ENCRYPT_DOMAIN_ID, FDB_DEFAULT_ENCRYPT_DOMAIN_NAME);
|
||||
}
|
||||
|
||||
static Future<std::pair<int64_t, TenantName>> getEncryptionDomainDetails(
|
||||
KeyRef key,
|
||||
Reference<TenantEntryCache<Void>> tenantCache) {
|
||||
return getEncryptionDomainDetailsImpl(key, tenantCache);
|
||||
static Future<std::pair<int64_t, TenantName>> getEncryptionDomainDetails(KeyRef key,
|
||||
EncryptedRangeFileWriter* self) {
|
||||
// If tenants are disabled on a cluster then don't use the TenantEntryCache as it will result in alot of
|
||||
// unnecessary cache misses. For a cluster configured in TenantMode::Optional, the backup performance may
|
||||
// degrade if most of the mutations belong to an invalid tenant
|
||||
TenantMode mode = self->cx->clientInfo->get().tenantMode;
|
||||
bool useTenantCache = mode != TenantMode::DISABLED;
|
||||
if (g_network->isSimulated() && mode == TenantMode::OPTIONAL_TENANT) {
|
||||
// TODO: Currently simulation tests run with optional tenant mode but most data does not belong to any
|
||||
// tenant. This results in many timeouts so disable using the tenant cache until optional tenant mode
|
||||
// support with backups is more performant
|
||||
useTenantCache = false;
|
||||
}
|
||||
CODE_PROBE(useTenantCache, "using tenant cache");
|
||||
return getEncryptionDomainDetailsImpl(key, self->tenantCache, useTenantCache);
|
||||
}
|
||||
|
||||
// Handles the first block and internal blocks. Ends current block if needed.
|
||||
|
@ -813,7 +822,7 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
|
|||
appendStringRefWithLenToBuffer(self, &endKey);
|
||||
appendStringRefWithLenToBuffer(self, &newValue);
|
||||
wait(newBlock(self, 0, endKey, writeValue));
|
||||
wait(updateEncryptionKeysCtx(self, self->lastKey, self->tenantCache));
|
||||
wait(updateEncryptionKeysCtx(self, self->lastKey));
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
@ -825,9 +834,8 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
|
|||
if (self->lastKey.size() == 0 || k.size() == 0) {
|
||||
return false;
|
||||
}
|
||||
state std::pair<int64_t, TenantName> curKeyTenantInfo = wait(getEncryptionDomainDetails(k, self->tenantCache));
|
||||
state std::pair<int64_t, TenantName> prevKeyTenantInfo =
|
||||
wait(getEncryptionDomainDetails(self->lastKey, self->tenantCache));
|
||||
state std::pair<int64_t, TenantName> curKeyTenantInfo = wait(getEncryptionDomainDetails(k, self));
|
||||
state std::pair<int64_t, TenantName> prevKeyTenantInfo = wait(getEncryptionDomainDetails(self->lastKey, self));
|
||||
// crossing tenant boundaries so finish the current block using only the tenant prefix of the new key
|
||||
if (curKeyTenantInfo.first != prevKeyTenantInfo.first) {
|
||||
CODE_PROBE(true, "crossed tenant boundaries");
|
||||
|
@ -840,7 +848,7 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
|
|||
// Start a new block if needed, then write the key and value
|
||||
ACTOR static Future<Void> writeKV_impl(EncryptedRangeFileWriter* self, Key k, Value v) {
|
||||
if (!self->cipherKeys.headerCipherKey.isValid() || !self->cipherKeys.textCipherKey.isValid()) {
|
||||
wait(updateEncryptionKeysCtx(self, k, self->tenantCache));
|
||||
wait(updateEncryptionKeysCtx(self, k));
|
||||
}
|
||||
state int toWrite = sizeof(int32_t) + k.size() + sizeof(int32_t) + v.size();
|
||||
wait(newBlockIfNeeded(self, toWrite));
|
||||
|
@ -862,7 +870,7 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
|
|||
// TODO (Nim): Is it possible to write empty begin and end keys?
|
||||
if (k.size() > 0 &&
|
||||
(!self->cipherKeys.headerCipherKey.isValid() || !self->cipherKeys.textCipherKey.isValid())) {
|
||||
wait(updateEncryptionKeysCtx(self, k, self->tenantCache));
|
||||
wait(updateEncryptionKeysCtx(self, k));
|
||||
}
|
||||
|
||||
// Need to account for extra "empty" value being written in the case of crossing tenant boundaries
|
||||
|
@ -1035,8 +1043,7 @@ private:
|
|||
ACTOR static Future<Void> decodeKVPairs(StringRefReader* reader,
|
||||
Standalone<VectorRef<KeyValueRef>>* results,
|
||||
bool encryptedBlock,
|
||||
Optional<Database> cx,
|
||||
Reference<TenantEntryCache<Void>> tenantCache) {
|
||||
Optional<Database> cx) {
|
||||
// Read begin key, if this fails then block was invalid.
|
||||
state uint32_t kLen = reader->consumeNetworkUInt32();
|
||||
state const uint8_t* k = reader->consume(kLen);
|
||||
|
@ -1091,7 +1098,7 @@ ACTOR Future<Standalone<VectorRef<KeyValueRef>>> decodeRangeFileBlock(Reference<
|
|||
// BACKUP_AGENT_ENCRYPTED_SNAPSHOT_FILE_VERSION
|
||||
int32_t file_version = reader.consume<int32_t>();
|
||||
if (file_version == BACKUP_AGENT_SNAPSHOT_FILE_VERSION) {
|
||||
wait(decodeKVPairs(&reader, &results, false, cx, Reference<TenantEntryCache<Void>>()));
|
||||
wait(decodeKVPairs(&reader, &results, false, cx));
|
||||
} else if (file_version == BACKUP_AGENT_ENCRYPTED_SNAPSHOT_FILE_VERSION) {
|
||||
CODE_PROBE(true, "decoding encrypted block");
|
||||
ASSERT(cx.present());
|
||||
|
@ -1114,8 +1121,7 @@ ACTOR Future<Standalone<VectorRef<KeyValueRef>>> decodeRangeFileBlock(Reference<
|
|||
StringRef decryptedData =
|
||||
wait(EncryptedRangeFileWriter::decrypt(cx.get(), header, dataPayloadStart, dataLen, &results.arena()));
|
||||
reader = StringRefReader(decryptedData, restore_corrupted_data());
|
||||
Reference<TenantEntryCache<Void>> tenantCache = makeReference<TenantEntryCache<Void>>(cx.get());
|
||||
wait(decodeKVPairs(&reader, &results, true, cx, tenantCache));
|
||||
wait(decodeKVPairs(&reader, &results, true, cx));
|
||||
} else {
|
||||
throw restore_unsupported_file_version();
|
||||
}
|
||||
|
@ -1698,7 +1704,7 @@ struct BackupRangeTaskFunc : BackupTaskFuncBase {
|
|||
state std::unique_ptr<IRangeFileWriter> rangeFile;
|
||||
state BackupConfig backup(task);
|
||||
state Arena arena;
|
||||
state Reference<TenantEntryCache<Void>> tenantCache = makeReference<TenantEntryCache<Void>>(cx);
|
||||
state Reference<TenantEntryCache<Void>> tenantCache;
|
||||
|
||||
// Don't need to check keepRunning(task) here because we will do that while finishing each output file, but
|
||||
// if bc is false then clearly the backup is no longer in progress
|
||||
|
@ -1792,6 +1798,10 @@ struct BackupRangeTaskFunc : BackupTaskFuncBase {
|
|||
// Initialize range file writer and write begin key
|
||||
if (encryptionEnabled) {
|
||||
CODE_PROBE(true, "using encrypted snapshot file writer");
|
||||
if (!tenantCache.isValid()) {
|
||||
tenantCache = makeReference<TenantEntryCache<Void>>(cx, TenantEntryCacheRefreshMode::WATCH);
|
||||
wait(tenantCache->init());
|
||||
}
|
||||
rangeFile = std::make_unique<EncryptedRangeFileWriter>(cx, &arena, tenantCache, outFile, blockSize);
|
||||
} else {
|
||||
rangeFile = std::make_unique<RangeFileWriter>(outFile, blockSize);
|
||||
|
|
|
@ -0,0 +1,175 @@
|
|||
/*
|
||||
* IdempotencyId.cpp
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "fdbclient/IdempotencyId.h"
|
||||
#include "fdbclient/SystemData.h"
|
||||
#include "flow/UnitTest.h"
|
||||
|
||||
struct IdempotencyIdKVBuilderImpl {
|
||||
Optional<Version> commitVersion;
|
||||
Optional<uint8_t> batchIndexHighOrderByte;
|
||||
BinaryWriter value{ IncludeVersion() };
|
||||
};
|
||||
|
||||
IdempotencyIdKVBuilder::IdempotencyIdKVBuilder() : impl(PImpl<IdempotencyIdKVBuilderImpl>::create()) {}
|
||||
|
||||
void IdempotencyIdKVBuilder::setCommitVersion(Version commitVersion) {
|
||||
impl->commitVersion = commitVersion;
|
||||
}
|
||||
|
||||
void IdempotencyIdKVBuilder::add(const IdempotencyIdRef& id, uint16_t batchIndex) {
|
||||
ASSERT(id.valid());
|
||||
if (impl->batchIndexHighOrderByte.present()) {
|
||||
ASSERT((batchIndex >> 8) == impl->batchIndexHighOrderByte.get());
|
||||
} else {
|
||||
impl->batchIndexHighOrderByte = batchIndex >> 8;
|
||||
}
|
||||
StringRef s = id.asStringRefUnsafe();
|
||||
impl->value << uint8_t(s.size());
|
||||
impl->value.serializeBytes(s);
|
||||
impl->value << uint8_t(batchIndex); // Low order byte of batchIndex
|
||||
}
|
||||
|
||||
Optional<KeyValue> IdempotencyIdKVBuilder::buildAndClear() {
|
||||
ASSERT(impl->commitVersion.present());
|
||||
if (!impl->batchIndexHighOrderByte.present()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
BinaryWriter key{ Unversioned() };
|
||||
key.serializeBytes(idempotencyIdKeys.begin);
|
||||
key << bigEndian64(impl->commitVersion.get());
|
||||
key << impl->batchIndexHighOrderByte.get();
|
||||
|
||||
Value v = impl->value.toValue();
|
||||
|
||||
impl->value = BinaryWriter(IncludeVersion());
|
||||
impl->batchIndexHighOrderByte = Optional<uint8_t>();
|
||||
|
||||
Optional<KeyValue> result = KeyValue();
|
||||
result.get().arena() = v.arena();
|
||||
result.get().key = key.toValue(result.get().arena());
|
||||
result.get().value = v;
|
||||
return result;
|
||||
}
|
||||
|
||||
IdempotencyIdKVBuilder::~IdempotencyIdKVBuilder() = default;
|
||||
|
||||
Optional<CommitResult> kvContainsIdempotencyId(const KeyValueRef& kv, const IdempotencyIdRef& id) {
|
||||
ASSERT(id.valid());
|
||||
StringRef needle = id.asStringRefUnsafe();
|
||||
StringRef haystack = kv.value;
|
||||
|
||||
#ifndef _WIN32
|
||||
// The common case is that the kv does not contain the idempotency id, so early return if memmem is available
|
||||
if (memmem(haystack.begin(), haystack.size(), needle.begin(), needle.size()) == nullptr) {
|
||||
return {};
|
||||
}
|
||||
#endif
|
||||
|
||||
// Even if id is a substring of value, it may still not actually contain it.
|
||||
BinaryReader reader(kv.value.begin(), kv.value.size(), IncludeVersion());
|
||||
while (!reader.empty()) {
|
||||
uint8_t length;
|
||||
reader >> length;
|
||||
StringRef candidate{ reinterpret_cast<const uint8_t*>(reader.readBytes(length)), length };
|
||||
uint8_t lowOrderBatchIndex;
|
||||
reader >> lowOrderBatchIndex;
|
||||
if (candidate == needle) {
|
||||
BinaryReader reader(kv.key.begin(), kv.key.size(), Unversioned());
|
||||
reader.readBytes(idempotencyIdKeys.begin.size());
|
||||
Version commitVersion;
|
||||
reader >> commitVersion;
|
||||
commitVersion = bigEndian64(commitVersion);
|
||||
uint8_t highOrderBatchIndex;
|
||||
reader >> highOrderBatchIndex;
|
||||
return CommitResult{ commitVersion,
|
||||
static_cast<uint16_t>((uint16_t(highOrderBatchIndex) << 8) |
|
||||
uint16_t(lowOrderBatchIndex)) };
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
void forceLinkIdempotencyIdTests() {}
|
||||
|
||||
namespace {
|
||||
IdempotencyIdRef generate(Arena& arena) {
|
||||
int length = deterministicRandom()->coinflip() ? deterministicRandom()->randomInt(16, 256) : 16;
|
||||
StringRef id = makeString(length, arena);
|
||||
deterministicRandom()->randomBytes(mutateString(id), length);
|
||||
return IdempotencyIdRef(id);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST_CASE("/fdbclient/IdempotencyId/basic") {
|
||||
Arena arena;
|
||||
uint16_t firstBatchIndex = deterministicRandom()->randomUInt32();
|
||||
firstBatchIndex &= 0xff7f; // ensure firstBatchIndex+5 won't change the higher order byte
|
||||
uint16_t batchIndex = firstBatchIndex;
|
||||
Version commitVersion = deterministicRandom()->randomInt64(0, std::numeric_limits<Version>::max());
|
||||
std::vector<IdempotencyIdRef> idVector; // Reference
|
||||
std::unordered_set<IdempotencyIdRef> idSet; // Make sure hash+equals works
|
||||
IdempotencyIdKVBuilder builder; // Check kv data format
|
||||
builder.setCommitVersion(commitVersion);
|
||||
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
auto id = generate(arena);
|
||||
idVector.emplace_back(id);
|
||||
idSet.emplace(id);
|
||||
builder.add(id, batchIndex++);
|
||||
}
|
||||
|
||||
batchIndex = firstBatchIndex;
|
||||
Optional<KeyValue> kvOpt = builder.buildAndClear();
|
||||
ASSERT(kvOpt.present());
|
||||
const auto& kv = kvOpt.get();
|
||||
|
||||
ASSERT(idSet.size() == idVector.size());
|
||||
for (const auto& id : idVector) {
|
||||
auto commitResult = kvContainsIdempotencyId(kv, id);
|
||||
ASSERT(commitResult.present());
|
||||
ASSERT(commitResult.get().commitVersion == commitVersion);
|
||||
ASSERT(commitResult.get().batchIndex == batchIndex++);
|
||||
ASSERT(idSet.find(id) != idSet.end());
|
||||
idSet.erase(id);
|
||||
ASSERT(idSet.find(id) == idSet.end());
|
||||
}
|
||||
ASSERT(idSet.size() == 0);
|
||||
|
||||
ASSERT(!kvContainsIdempotencyId(kv, generate(arena)).present());
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
TEST_CASE("/fdbclient/IdempotencyId/serialization") {
|
||||
ASSERT(ObjectReader::fromStringRef<IdempotencyIdRef>(ObjectWriter::toValue(IdempotencyIdRef(), Unversioned()),
|
||||
Unversioned()) == IdempotencyIdRef());
|
||||
for (int i = 0; i < 1000; ++i) {
|
||||
Arena arena;
|
||||
auto id = generate(arena);
|
||||
auto serialized = ObjectWriter::toValue(id, Unversioned());
|
||||
IdempotencyIdRef t;
|
||||
ObjectReader reader(serialized.begin(), Unversioned());
|
||||
reader.deserialize(t);
|
||||
ASSERT(t == id);
|
||||
}
|
||||
return Void();
|
||||
}
|
|
@ -2356,6 +2356,21 @@ ACTOR Future<Void> forceRecovery(Reference<IClusterConnectionRecord> clusterFile
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<UID> auditStorage(Reference<IClusterConnectionRecord> clusterFile, KeyRange range, AuditType type) {
|
||||
state Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface(new AsyncVar<Optional<ClusterInterface>>);
|
||||
state Future<Void> leaderMon = monitorLeader<ClusterInterface>(clusterFile, clusterInterface);
|
||||
|
||||
loop {
|
||||
while (!clusterInterface->get().present()) {
|
||||
wait(clusterInterface->onChange());
|
||||
}
|
||||
|
||||
UID auditId = wait(clusterInterface->get().get().triggerAudit.getReply(TriggerAuditRequest(type, range)));
|
||||
TraceEvent(SevDebug, "ManagementAPIAuditStorageEnd").detail("AuditID", auditId);
|
||||
return auditId;
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> waitForPrimaryDC(Database cx, StringRef dcId) {
|
||||
state ReadYourWritesTransaction tr(cx);
|
||||
|
||||
|
|
|
@ -490,6 +490,73 @@ ThreadFuture<Void> DLTenant::waitPurgeGranulesComplete(const KeyRef& purgeKey) {
|
|||
return toThreadFuture<Void>(api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) { return Void(); });
|
||||
}
|
||||
|
||||
ThreadFuture<bool> DLTenant::blobbifyRange(const KeyRangeRef& keyRange) {
|
||||
if (!api->tenantBlobbifyRange) {
|
||||
return unsupported_operation();
|
||||
}
|
||||
|
||||
FdbCApi::FDBFuture* f = api->tenantBlobbifyRange(
|
||||
tenant, keyRange.begin.begin(), keyRange.begin.size(), keyRange.end.begin(), keyRange.end.size());
|
||||
|
||||
return toThreadFuture<bool>(api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) {
|
||||
FdbCApi::fdb_bool_t ret = false;
|
||||
ASSERT(!api->futureGetBool(f, &ret));
|
||||
return ret;
|
||||
});
|
||||
}
|
||||
|
||||
ThreadFuture<bool> DLTenant::unblobbifyRange(const KeyRangeRef& keyRange) {
|
||||
if (!api->tenantUnblobbifyRange) {
|
||||
return unsupported_operation();
|
||||
}
|
||||
|
||||
FdbCApi::FDBFuture* f = api->tenantUnblobbifyRange(
|
||||
tenant, keyRange.begin.begin(), keyRange.begin.size(), keyRange.end.begin(), keyRange.end.size());
|
||||
|
||||
return toThreadFuture<bool>(api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) {
|
||||
FdbCApi::fdb_bool_t ret = false;
|
||||
ASSERT(!api->futureGetBool(f, &ret));
|
||||
return ret;
|
||||
});
|
||||
}
|
||||
|
||||
ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> DLTenant::listBlobbifiedRanges(const KeyRangeRef& keyRange,
|
||||
int rangeLimit) {
|
||||
if (!api->tenantListBlobbifiedRanges) {
|
||||
return unsupported_operation();
|
||||
}
|
||||
|
||||
FdbCApi::FDBFuture* f = api->tenantListBlobbifiedRanges(
|
||||
tenant, keyRange.begin.begin(), keyRange.begin.size(), keyRange.end.begin(), keyRange.end.size(), rangeLimit);
|
||||
|
||||
return toThreadFuture<Standalone<VectorRef<KeyRangeRef>>>(api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) {
|
||||
const FdbCApi::FDBKeyRange* keyRanges;
|
||||
int keyRangesLength;
|
||||
FdbCApi::fdb_error_t error = api->futureGetKeyRangeArray(f, &keyRanges, &keyRangesLength);
|
||||
ASSERT(!error);
|
||||
// The memory for this is stored in the FDBFuture and is released when the future gets destroyed.
|
||||
return Standalone<VectorRef<KeyRangeRef>>(VectorRef<KeyRangeRef>((KeyRangeRef*)keyRanges, keyRangesLength),
|
||||
Arena());
|
||||
});
|
||||
}
|
||||
|
||||
ThreadFuture<Version> DLTenant::verifyBlobRange(const KeyRangeRef& keyRange, Optional<Version> version) {
|
||||
if (!api->tenantVerifyBlobRange) {
|
||||
return unsupported_operation();
|
||||
}
|
||||
|
||||
Version readVersion = version.present() ? version.get() : latestVersion;
|
||||
|
||||
FdbCApi::FDBFuture* f = api->tenantVerifyBlobRange(
|
||||
tenant, keyRange.begin.begin(), keyRange.begin.size(), keyRange.end.begin(), keyRange.end.size(), readVersion);
|
||||
|
||||
return toThreadFuture<Version>(api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) {
|
||||
Version version = invalidVersion;
|
||||
ASSERT(!api->futureGetInt64(f, &version));
|
||||
return version;
|
||||
});
|
||||
}
|
||||
|
||||
// DLDatabase
|
||||
DLDatabase::DLDatabase(Reference<FdbCApi> api, ThreadFuture<FdbCApi::FDBDatabase*> dbFuture) : api(api), db(nullptr) {
|
||||
addref();
|
||||
|
@ -827,12 +894,32 @@ void DLApi::init() {
|
|||
lib,
|
||||
fdbCPath,
|
||||
"fdb_tenant_purge_blob_granules",
|
||||
headerVersion >= ApiVersion::withBlobRangeApi().version());
|
||||
headerVersion >= ApiVersion::withTenantBlobRangeApi().version());
|
||||
loadClientFunction(&api->tenantWaitPurgeGranulesComplete,
|
||||
lib,
|
||||
fdbCPath,
|
||||
"fdb_tenant_wait_purge_granules_complete",
|
||||
headerVersion >= ApiVersion::withBlobRangeApi().version());
|
||||
headerVersion >= ApiVersion::withTenantBlobRangeApi().version());
|
||||
loadClientFunction(&api->tenantBlobbifyRange,
|
||||
lib,
|
||||
fdbCPath,
|
||||
"fdb_tenant_blobbify_range",
|
||||
headerVersion >= ApiVersion::withTenantBlobRangeApi().version());
|
||||
loadClientFunction(&api->tenantUnblobbifyRange,
|
||||
lib,
|
||||
fdbCPath,
|
||||
"fdb_tenant_unblobbify_range",
|
||||
headerVersion >= ApiVersion::withTenantBlobRangeApi().version());
|
||||
loadClientFunction(&api->tenantListBlobbifiedRanges,
|
||||
lib,
|
||||
fdbCPath,
|
||||
"fdb_tenant_list_blobbified_ranges",
|
||||
headerVersion >= ApiVersion::withTenantBlobRangeApi().version());
|
||||
loadClientFunction(&api->tenantVerifyBlobRange,
|
||||
lib,
|
||||
fdbCPath,
|
||||
"fdb_tenant_verify_blob_range",
|
||||
headerVersion >= ApiVersion::withTenantBlobRangeApi().version());
|
||||
loadClientFunction(&api->tenantDestroy, lib, fdbCPath, "fdb_tenant_destroy", headerVersion >= 710);
|
||||
|
||||
loadClientFunction(&api->transactionSetOption, lib, fdbCPath, "fdb_transaction_set_option", headerVersion >= 0);
|
||||
|
@ -1608,13 +1695,41 @@ Reference<ITransaction> MultiVersionTenant::createTransaction() {
|
|||
}
|
||||
|
||||
ThreadFuture<Key> MultiVersionTenant::purgeBlobGranules(const KeyRangeRef& keyRange, Version purgeVersion, bool force) {
|
||||
auto f = tenantState->db ? tenantState->db->purgeBlobGranules(keyRange, purgeVersion, force)
|
||||
: ThreadFuture<Key>(Never());
|
||||
return abortableFuture(f, tenantState->db->dbState->dbVar->get().onChange);
|
||||
auto tenantDb = tenantState->tenantVar->get();
|
||||
auto f =
|
||||
tenantDb.value ? tenantDb.value->purgeBlobGranules(keyRange, purgeVersion, force) : ThreadFuture<Key>(Never());
|
||||
return abortableFuture(f, tenantDb.onChange);
|
||||
}
|
||||
ThreadFuture<Void> MultiVersionTenant::waitPurgeGranulesComplete(const KeyRef& purgeKey) {
|
||||
auto f = tenantState->db ? tenantState->db->waitPurgeGranulesComplete(purgeKey) : ThreadFuture<Void>(Never());
|
||||
return abortableFuture(f, tenantState->db->dbState->dbVar->get().onChange);
|
||||
auto tenantDb = tenantState->tenantVar->get();
|
||||
auto f = tenantDb.value ? tenantDb.value->waitPurgeGranulesComplete(purgeKey) : ThreadFuture<Void>(Never());
|
||||
return abortableFuture(f, tenantDb.onChange);
|
||||
}
|
||||
|
||||
ThreadFuture<bool> MultiVersionTenant::blobbifyRange(const KeyRangeRef& keyRange) {
|
||||
auto tenantDb = tenantState->tenantVar->get();
|
||||
auto f = tenantDb.value ? tenantDb.value->blobbifyRange(keyRange) : ThreadFuture<bool>(Never());
|
||||
return abortableFuture(f, tenantDb.onChange);
|
||||
}
|
||||
|
||||
ThreadFuture<bool> MultiVersionTenant::unblobbifyRange(const KeyRangeRef& keyRange) {
|
||||
auto tenantDb = tenantState->tenantVar->get();
|
||||
auto f = tenantDb.value ? tenantDb.value->unblobbifyRange(keyRange) : ThreadFuture<bool>(Never());
|
||||
return abortableFuture(f, tenantDb.onChange);
|
||||
}
|
||||
|
||||
ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> MultiVersionTenant::listBlobbifiedRanges(const KeyRangeRef& keyRange,
|
||||
int rangeLimit) {
|
||||
auto tenantDb = tenantState->tenantVar->get();
|
||||
auto f = tenantDb.value ? tenantDb.value->listBlobbifiedRanges(keyRange, rangeLimit)
|
||||
: ThreadFuture<Standalone<VectorRef<KeyRangeRef>>>(Never());
|
||||
return abortableFuture(f, tenantDb.onChange);
|
||||
}
|
||||
|
||||
ThreadFuture<Version> MultiVersionTenant::verifyBlobRange(const KeyRangeRef& keyRange, Optional<Version> version) {
|
||||
auto tenantDb = tenantState->tenantVar->get();
|
||||
auto f = tenantDb.value ? tenantDb.value->verifyBlobRange(keyRange, version) : ThreadFuture<Version>(Never());
|
||||
return abortableFuture(f, tenantDb.onChange);
|
||||
}
|
||||
|
||||
MultiVersionTenant::TenantState::TenantState(Reference<MultiVersionDatabase> db, TenantNameRef tenantName)
|
||||
|
@ -2546,8 +2661,9 @@ void MultiVersionApi::setupNetwork() {
|
|||
externalClients[filename] = {};
|
||||
auto libCopies = copyExternalLibraryPerThread(path);
|
||||
for (int idx = 0; idx < libCopies.size(); ++idx) {
|
||||
bool unlinkOnLoad = libCopies[idx].second && CLIENT_KNOBS->DELETE_NATIVE_LIB_AFTER_LOADING;
|
||||
externalClients[filename].push_back(Reference<ClientInfo>(
|
||||
new ClientInfo(new DLApi(libCopies[idx].first, libCopies[idx].second /*unlink on load*/),
|
||||
new ClientInfo(new DLApi(libCopies[idx].first, unlinkOnLoad /*unlink on load*/),
|
||||
path,
|
||||
useFutureVersion,
|
||||
idx)));
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1770,7 +1770,10 @@ Future<int64_t> ReadYourWritesTransaction::getEstimatedRangeSizeBytes(const KeyR
|
|||
if (resetPromise.isSet())
|
||||
return resetPromise.getFuture().getError();
|
||||
|
||||
return map(waitOrError(tr.getDatabase()->getStorageMetrics(keys, -1), resetPromise.getFuture()),
|
||||
// Pass in the TransactionState only if tenant is present
|
||||
Optional<Reference<TransactionState>> trState =
|
||||
tr.trState->hasTenant() ? tr.trState : Optional<Reference<TransactionState>>();
|
||||
return map(waitOrError(tr.getDatabase()->getStorageMetrics(keys, -1, trState), resetPromise.getFuture()),
|
||||
[](const StorageMetrics& m) { return m.bytes; });
|
||||
}
|
||||
|
||||
|
|
|
@ -582,7 +582,8 @@ const KeyRef JSONSchemas::statusSchema = R"statusSchema(
|
|||
"duplicate_mutation_fetch_timeout",
|
||||
"primary_dc_missing",
|
||||
"fetch_primary_dc_timeout",
|
||||
"fetch_storage_wiggler_stats_timeout"
|
||||
"fetch_storage_wiggler_stats_timeout",
|
||||
"fetch_consistency_scan_info_timeout"
|
||||
]
|
||||
},
|
||||
"issues":[
|
||||
|
|
|
@ -39,11 +39,12 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( ENABLE_VERSION_VECTOR, false );
|
||||
init( ENABLE_VERSION_VECTOR_TLOG_UNICAST, false );
|
||||
|
||||
bool buggifyShortReadWindow = randomize && BUGGIFY && !ENABLE_VERSION_VECTOR;
|
||||
bool buggifyShortReadWindow = randomize && BUGGIFY && !ENABLE_VERSION_VECTOR;
|
||||
init( MAX_READ_TRANSACTION_LIFE_VERSIONS, 5 * VERSIONS_PER_SECOND ); if (randomize && BUGGIFY) MAX_READ_TRANSACTION_LIFE_VERSIONS = VERSIONS_PER_SECOND; else if (buggifyShortReadWindow) MAX_READ_TRANSACTION_LIFE_VERSIONS = std::max<int>(1, 0.1 * VERSIONS_PER_SECOND); else if( randomize && BUGGIFY ) MAX_READ_TRANSACTION_LIFE_VERSIONS = 10 * VERSIONS_PER_SECOND;
|
||||
init( MAX_WRITE_TRANSACTION_LIFE_VERSIONS, 5 * VERSIONS_PER_SECOND ); if (randomize && BUGGIFY) MAX_WRITE_TRANSACTION_LIFE_VERSIONS=std::max<int>(1, 1 * VERSIONS_PER_SECOND);
|
||||
init( MAX_COMMIT_BATCH_INTERVAL, 2.0 ); if( randomize && BUGGIFY ) MAX_COMMIT_BATCH_INTERVAL = 0.5; // Each commit proxy generates a CommitTransactionBatchRequest at least this often, so that versions always advance smoothly
|
||||
MAX_COMMIT_BATCH_INTERVAL = std::min(MAX_COMMIT_BATCH_INTERVAL, MAX_READ_TRANSACTION_LIFE_VERSIONS/double(2*VERSIONS_PER_SECOND)); // Ensure that the proxy commits 2 times every MAX_READ_TRANSACTION_LIFE_VERSIONS, otherwise the master will not give out versions fast enough
|
||||
MAX_COMMIT_BATCH_INTERVAL = std::min(MAX_COMMIT_BATCH_INTERVAL, MAX_WRITE_TRANSACTION_LIFE_VERSIONS/double(2*VERSIONS_PER_SECOND)); // Ensure that the proxy commits 2 times every MAX_WRITE_TRANSACTION_LIFE_VERSIONS, otherwise the master will not give out versions fast enough
|
||||
init( MAX_VERSION_RATE_MODIFIER, 0.1 );
|
||||
init( MAX_VERSION_RATE_OFFSET, VERSIONS_PER_SECOND ); // If the calculated version is more than this amount away from the expected version, it will be clamped to this value. This prevents huge version jumps.
|
||||
init( ENABLE_VERSION_VECTOR_HA_OPTIMIZATION, false );
|
||||
|
@ -296,7 +297,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( DD_STORAGE_WIGGLE_MIN_SS_AGE_SEC, isSimulated ? 2 : 21 * 60 * 60 * 24 ); if(randomize && BUGGIFY) DD_STORAGE_WIGGLE_MIN_SS_AGE_SEC = isSimulated ? 0: 120;
|
||||
init( DD_TENANT_AWARENESS_ENABLED, false );
|
||||
init( TENANT_CACHE_LIST_REFRESH_INTERVAL, 2 ); if( randomize && BUGGIFY ) TENANT_CACHE_LIST_REFRESH_INTERVAL = deterministicRandom()->randomInt(1, 10);
|
||||
|
||||
init( TENANT_CACHE_STORAGE_REFRESH_INTERVAL, 2 ); if( randomize && BUGGIFY ) TENANT_CACHE_STORAGE_REFRESH_INTERVAL = deterministicRandom()->randomInt(1, 10);
|
||||
|
||||
// TeamRemover
|
||||
init( TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER, false ); if( randomize && BUGGIFY ) TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER = deterministicRandom()->random01() < 0.1 ? true : false; // false by default. disable the consistency check when it's true
|
||||
|
@ -570,6 +571,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( RATEKEEPER_FAILURE_TIME, 1.0 );
|
||||
init( CONSISTENCYSCAN_FAILURE_TIME, 1.0 );
|
||||
init( BLOB_MANAGER_FAILURE_TIME, 1.0 );
|
||||
init( BLOB_MIGRATOR_FAILURE_TIME, 1.0 );
|
||||
init( REPLACE_INTERFACE_DELAY, 60.0 );
|
||||
init( REPLACE_INTERFACE_CHECK_DELAY, 5.0 );
|
||||
init( COORDINATOR_REGISTER_INTERVAL, 5.0 );
|
||||
|
@ -725,6 +727,8 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( GLOBAL_TAG_THROTTLING_MIN_RATE, 1.0 );
|
||||
init( GLOBAL_TAG_THROTTLING_FOLDING_TIME, 10.0 );
|
||||
init( GLOBAL_TAG_THROTTLING_RW_FUNGIBILITY_RATIO, 5.0 );
|
||||
init( GLOBAL_TAG_THROTTLING_MAX_TAGS_TRACKED, 10 );
|
||||
init( GLOBAL_TAG_THROTTLING_TAG_EXPIRE_AFTER, 240.0 );
|
||||
|
||||
//Storage Metrics
|
||||
init( STORAGE_METRICS_AVERAGE_INTERVAL, 120.0 );
|
||||
|
@ -752,6 +756,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( FETCH_KEYS_PARALLELISM_FULL, 6 );
|
||||
init( FETCH_KEYS_LOWER_PRIORITY, 0 );
|
||||
init( SERVE_FETCH_CHECKPOINT_PARALLELISM, 4 );
|
||||
init( SERVE_AUDIT_STORAGE_PARALLELISM, 2 );
|
||||
init( CHANGE_FEED_DISK_READS_PARALLELISM, 1000 ); if( randomize && BUGGIFY ) CHANGE_FEED_DISK_READS_PARALLELISM = 20;
|
||||
init( BUGGIFY_BLOCK_BYTES, 10000 );
|
||||
init( STORAGE_RECOVERY_VERSION_LAG_LIMIT, 2 * MAX_READ_TRANSACTION_LIFE_VERSIONS );
|
||||
|
@ -991,8 +996,8 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
// Blob Metadata
|
||||
init( BLOB_METADATA_CACHE_TTL, isSimulated ? 120 : 24 * 60 * 60 );
|
||||
if ( randomize && BUGGIFY) { BLOB_METADATA_CACHE_TTL = deterministicRandom()->randomInt(50, 100); }
|
||||
init( BLOB_METADATA_REFRESH_INTERVAL, isSimulated ? 60 : 12 * 60 * 60 );
|
||||
if ( randomize && BUGGIFY) { BLOB_METADATA_REFRESH_INTERVAL = deterministicRandom()->randomInt(20, 40); }
|
||||
init( BLOB_METADATA_REFRESH_INTERVAL, isSimulated ? 60 : 60 * 60 );
|
||||
if ( randomize && BUGGIFY) { BLOB_METADATA_REFRESH_INTERVAL = deterministicRandom()->randomInt(5, 120); }
|
||||
|
||||
// HTTP KMS Connector
|
||||
init( REST_KMS_CONNECTOR_KMS_DISCOVERY_URL_MODE, "file");
|
||||
|
@ -1007,6 +1012,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
// NOTE: Care must be taken when attempting to update below configurations for a up/running FDB cluster.
|
||||
init( REST_KMS_CONNECTOR_DISCOVER_KMS_URL_FILE, "");
|
||||
init( REST_KMS_CONNECTOR_GET_ENCRYPTION_KEYS_ENDPOINT, "");
|
||||
init( REST_KMS_CONNECTOR_GET_BLOB_METADATA_ENDPOINT, "");
|
||||
// Details to fetch validation token from a localhost file
|
||||
// acceptable format: "<token_name1>#<absolute_file_path1>,<token_name2>#<absolute_file_path2>,.."
|
||||
// NOTE: 'token-name" can NOT contain '#' character
|
||||
|
|
|
@ -342,7 +342,7 @@ void TSS_traceMismatch(TraceEvent& event,
|
|||
// change feed
|
||||
template <>
|
||||
bool TSS_doCompare(const OverlappingChangeFeedsReply& src, const OverlappingChangeFeedsReply& tss) {
|
||||
ASSERT(false);
|
||||
// We duplicate for load, no need to validate replies
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -286,6 +286,41 @@ const KeyRangeRef writeConflictRangeKeysRange = KeyRangeRef("\xff\xff/transactio
|
|||
|
||||
const KeyRef clusterIdKey = "\xff/clusterId"_sr;
|
||||
|
||||
const KeyRangeRef auditRange = KeyRangeRef("\xff/audit/"_sr, "\xff/audit0"_sr);
|
||||
const KeyRef auditPrefix = auditRange.begin;
|
||||
|
||||
const Key auditRangeKey(const AuditType type, const UID& auditId, const KeyRef& key) {
|
||||
BinaryWriter wr(Unversioned());
|
||||
wr.serializeBytes(auditPrefix);
|
||||
wr << static_cast<uint8_t>(type);
|
||||
wr.serializeBytes("/"_sr);
|
||||
wr << auditId;
|
||||
wr.serializeBytes("/"_sr);
|
||||
wr.serializeBytes(key);
|
||||
return wr.toValue();
|
||||
}
|
||||
|
||||
const Key auditRangePrefix(const AuditType type, const UID& auditId) {
|
||||
BinaryWriter wr(Unversioned());
|
||||
wr.serializeBytes(auditPrefix);
|
||||
wr << static_cast<uint8_t>(type);
|
||||
wr.serializeBytes("/"_sr);
|
||||
wr << auditId;
|
||||
wr.serializeBytes("/"_sr);
|
||||
return wr.toValue();
|
||||
}
|
||||
|
||||
const Value auditStorageStateValue(const AuditStorageState& auditStorageState) {
|
||||
return ObjectWriter::toValue(auditStorageState, IncludeVersion());
|
||||
}
|
||||
|
||||
AuditStorageState decodeAuditStorageState(const ValueRef& value) {
|
||||
AuditStorageState auditState;
|
||||
ObjectReader reader(value.begin(), IncludeVersion());
|
||||
reader.deserialize(auditState);
|
||||
return auditState;
|
||||
}
|
||||
|
||||
const KeyRef checkpointPrefix = "\xff/checkpoint/"_sr;
|
||||
|
||||
const Key checkpointKeyFor(UID checkpointID) {
|
||||
|
@ -1629,6 +1664,9 @@ Key storageQuotaKey(StringRef tenantName) {
|
|||
return tenantName.withPrefix(storageQuotaPrefix);
|
||||
}
|
||||
|
||||
const KeyRangeRef idempotencyIdKeys("\xff\x02/idmp/"_sr, "\xff\x02/idmp0"_sr);
|
||||
const KeyRef idempotencyIdsExpiredVersion("\xff\x02/idmpExpiredVersion"_sr);
|
||||
|
||||
// for tests
|
||||
void testSSISerdes(StorageServerInterface const& ssi) {
|
||||
printf("ssi=\nid=%s\nlocality=%s\nisTss=%s\ntssId=%s\nacceptingRequests=%s\naddress=%s\ngetValue=%s\n\n\n",
|
||||
|
|
|
@ -145,13 +145,13 @@ Value ThrottleApi::TagQuotaValue::toValue() const {
|
|||
|
||||
ThrottleApi::TagQuotaValue ThrottleApi::TagQuotaValue::fromValue(ValueRef value) {
|
||||
auto tuple = Tuple::unpack(value);
|
||||
if (tuple.size() != 4) {
|
||||
if (tuple.size() != 2) {
|
||||
throw invalid_throttle_quota_value();
|
||||
}
|
||||
TagQuotaValue result;
|
||||
try {
|
||||
result.reservedQuota = tuple.getDouble(0);
|
||||
result.totalQuota = tuple.getDouble(1);
|
||||
result.reservedQuota = tuple.getInt(0);
|
||||
result.totalQuota = tuple.getInt(1);
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevWarnAlways, "TagQuotaValueFailedToDeserialize").error(e);
|
||||
throw invalid_throttle_quota_value();
|
||||
|
|
|
@ -246,6 +246,47 @@ ThreadFuture<Void> ThreadSafeTenant::waitPurgeGranulesComplete(const KeyRef& pur
|
|||
});
|
||||
}
|
||||
|
||||
ThreadFuture<bool> ThreadSafeTenant::blobbifyRange(const KeyRangeRef& keyRange) {
|
||||
DatabaseContext* db = this->db->db;
|
||||
TenantName tenantName = this->name;
|
||||
KeyRange range = keyRange;
|
||||
return onMainThread([=]() -> Future<bool> {
|
||||
db->checkDeferredError();
|
||||
return db->blobbifyRange(range, tenantName);
|
||||
});
|
||||
}
|
||||
|
||||
ThreadFuture<bool> ThreadSafeTenant::unblobbifyRange(const KeyRangeRef& keyRange) {
|
||||
DatabaseContext* db = this->db->db;
|
||||
TenantName tenantName = this->name;
|
||||
KeyRange range = keyRange;
|
||||
return onMainThread([=]() -> Future<bool> {
|
||||
db->checkDeferredError();
|
||||
return db->unblobbifyRange(range, tenantName);
|
||||
});
|
||||
}
|
||||
|
||||
ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> ThreadSafeTenant::listBlobbifiedRanges(const KeyRangeRef& keyRange,
|
||||
int rangeLimit) {
|
||||
DatabaseContext* db = this->db->db;
|
||||
TenantName tenantName = this->name;
|
||||
KeyRange range = keyRange;
|
||||
return onMainThread([=]() -> Future<Standalone<VectorRef<KeyRangeRef>>> {
|
||||
db->checkDeferredError();
|
||||
return db->listBlobbifiedRanges(range, rangeLimit, tenantName);
|
||||
});
|
||||
}
|
||||
|
||||
ThreadFuture<Version> ThreadSafeTenant::verifyBlobRange(const KeyRangeRef& keyRange, Optional<Version> version) {
|
||||
DatabaseContext* db = this->db->db;
|
||||
TenantName tenantName = this->name;
|
||||
KeyRange range = keyRange;
|
||||
return onMainThread([=]() -> Future<Version> {
|
||||
db->checkDeferredError();
|
||||
return db->verifyBlobRange(range, version, tenantName);
|
||||
});
|
||||
}
|
||||
|
||||
ThreadSafeTenant::~ThreadSafeTenant() {}
|
||||
|
||||
ThreadSafeTransaction::ThreadSafeTransaction(DatabaseContext* cx,
|
||||
|
|
|
@ -99,6 +99,48 @@ Tuple Tuple::unpack(StringRef const& str, bool exclude_incomplete) {
|
|||
return Tuple(str, exclude_incomplete);
|
||||
}
|
||||
|
||||
std::string Tuple::tupleToString(const Tuple& tuple) {
|
||||
std::string str;
|
||||
if (tuple.size() > 1) {
|
||||
str += "(";
|
||||
}
|
||||
for (int i = 0; i < tuple.size(); ++i) {
|
||||
Tuple::ElementType type = tuple.getType(i);
|
||||
if (type == Tuple::NULL_TYPE) {
|
||||
str += "NULL";
|
||||
} else if (type == Tuple::BYTES || type == Tuple::UTF8) {
|
||||
if (type == Tuple::UTF8) {
|
||||
str += "u";
|
||||
}
|
||||
str += "\'" + tuple.getString(i).printable() + "\'";
|
||||
} else if (type == Tuple::INT) {
|
||||
str += format("%ld", tuple.getInt(i));
|
||||
} else if (type == Tuple::FLOAT) {
|
||||
str += format("%f", tuple.getFloat(i));
|
||||
} else if (type == Tuple::DOUBLE) {
|
||||
str += format("%f", tuple.getDouble(i));
|
||||
} else if (type == Tuple::BOOL) {
|
||||
str += tuple.getBool(i) ? "true" : "false";
|
||||
} else if (type == Tuple::VERSIONSTAMP) {
|
||||
TupleVersionstamp versionstamp = tuple.getVersionstamp(i);
|
||||
str += format("Transaction Version: '%ld', BatchNumber: '%hd', UserVersion : '%hd'",
|
||||
versionstamp.getVersion(),
|
||||
versionstamp.getBatchNumber(),
|
||||
versionstamp.getUserVersion());
|
||||
} else {
|
||||
ASSERT(false);
|
||||
}
|
||||
|
||||
if (i < tuple.size() - 1) {
|
||||
str += ", ";
|
||||
}
|
||||
}
|
||||
if (tuple.size() > 1) {
|
||||
str += ")";
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
Tuple Tuple::unpackUserType(StringRef const& str, bool exclude_incomplete) {
|
||||
return Tuple(str, exclude_incomplete, true);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,111 @@
|
|||
/*
|
||||
* Audit.h
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef FDBCLIENT_AUDIT_H
|
||||
#define FDBCLIENT_AUDIT_H
|
||||
#pragma once
|
||||
|
||||
#include "fdbclient/FDBTypes.h"
|
||||
#include "fdbrpc/fdbrpc.h"
|
||||
|
||||
enum class AuditPhase : uint8_t {
|
||||
Invalid = 0,
|
||||
Running = 1,
|
||||
Complete = 2,
|
||||
Error = 3,
|
||||
Failed = 4,
|
||||
};
|
||||
|
||||
enum class AuditType : uint8_t {
|
||||
Invalid = 0,
|
||||
ValidateHA = 1,
|
||||
};
|
||||
|
||||
struct AuditStorageState {
|
||||
constexpr static FileIdentifier file_identifier = 13804340;
|
||||
|
||||
AuditStorageState() = default;
|
||||
AuditStorageState(UID id, AuditType type) : id(id), type(static_cast<uint8_t>(type)) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, id, type, phase, error);
|
||||
}
|
||||
|
||||
void setType(AuditType type) { this->type = static_cast<uint8_t>(this->type); }
|
||||
AuditType getType() const { return static_cast<AuditType>(this->type); }
|
||||
|
||||
void setPhase(AuditPhase phase) { this->phase = static_cast<uint8_t>(phase); }
|
||||
AuditPhase getPhase() const { return static_cast<AuditPhase>(this->phase); }
|
||||
|
||||
UID id;
|
||||
uint8_t type;
|
||||
uint8_t phase;
|
||||
std::string error;
|
||||
};
|
||||
|
||||
struct AuditStorageRequest {
|
||||
constexpr static FileIdentifier file_identifier = 13804341;
|
||||
|
||||
AuditStorageRequest() = default;
|
||||
AuditStorageRequest(UID id, KeyRange range, AuditType type)
|
||||
: id(id), range(range), type(static_cast<uint8_t>(type)) {}
|
||||
|
||||
void setType(AuditType type) { this->type = static_cast<uint8_t>(this->type); }
|
||||
AuditType getType() const { return static_cast<AuditType>(this->type); }
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, id, range, type, targetServers, reply);
|
||||
}
|
||||
|
||||
UID id;
|
||||
KeyRange range;
|
||||
uint8_t type;
|
||||
std::vector<UID> targetServers;
|
||||
ReplyPromise<AuditStorageState> reply;
|
||||
};
|
||||
|
||||
// Triggers an audit of the specific type, an audit id is returned if an audit is scheduled successfully.
|
||||
// If there is an running audit, the corresponding id will be returned, unless force is true;
|
||||
// When force is set, the ongoing audit will be cancelled, and a new audit will be scheduled.
|
||||
struct TriggerAuditRequest {
|
||||
constexpr static FileIdentifier file_identifier = 1384445;
|
||||
|
||||
TriggerAuditRequest() = default;
|
||||
TriggerAuditRequest(AuditType type, KeyRange range)
|
||||
: type(static_cast<uint8_t>(type)), range(range), force(false), async(false) {}
|
||||
|
||||
void setType(AuditType type) { this->type = static_cast<uint8_t>(this->type); }
|
||||
AuditType getType() const { return static_cast<AuditType>(this->type); }
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, type, range, force, async, reply);
|
||||
}
|
||||
|
||||
uint8_t type;
|
||||
KeyRange range;
|
||||
bool force;
|
||||
bool async;
|
||||
ReplyPromise<UID> reply;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* AuditUtils.actor.h
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef FDBCLIENT_AUDITUTILS_ACTOR_H
|
||||
#define FDBCLIENT_AUDITUTILS_ACTOR_H
|
||||
#pragma once
|
||||
|
||||
#include "fdbclient/Audit.h"
|
||||
#include "fdbclient/FDBTypes.h"
|
||||
#include "fdbrpc/fdbrpc.h"
|
||||
|
||||
#include "flow/actorcompiler.h" // has to be last include
|
||||
|
||||
ACTOR Future<Void> persistAuditStorageState(Key key, AuditStorageState auditState);
|
||||
|
||||
#include "flow/unactorcompiler.h"
|
||||
#endif
|
|
@ -106,6 +106,8 @@ public:
|
|||
std::array<CounterSet, int(UsageType::MAX)> counterSets;
|
||||
};
|
||||
|
||||
std::string toString(BlobCipherMetrics::UsageType type);
|
||||
|
||||
// Encryption operations buffer management
|
||||
// Approach limits number of copies needed during encryption or decryption operations.
|
||||
// For encryption EncryptBuf is allocated using client supplied Arena and provided to AES library to capture
|
||||
|
@ -185,7 +187,7 @@ struct hash<BlobCipherDetails> {
|
|||
|
||||
#pragma pack(push, 1) // exact fit - no padding
|
||||
typedef struct BlobCipherEncryptHeader {
|
||||
static constexpr int headerSize = 136;
|
||||
static constexpr int headerSize = 104;
|
||||
union {
|
||||
struct {
|
||||
uint8_t size; // reading first byte is sufficient to determine header
|
||||
|
@ -210,29 +212,22 @@ typedef struct BlobCipherEncryptHeader {
|
|||
// reads. FIPS compliance recommendation is to leverage cryptographic digest mechanism to generate 'authentication
|
||||
// token' (crypto-secure) to protect against malicious tampering and/or bit rot/flip scenarios.
|
||||
|
||||
union {
|
||||
// Encryption header support two modes of generation 'authentication tokens':
|
||||
// 1) SingleAuthTokenMode: the scheme generates single crypto-secrure auth token to protect {cipherText +
|
||||
// header} payload. Scheme is geared towards optimizing cost due to crypto-secure auth-token generation,
|
||||
// however, on decryption client needs to be read 'header' + 'encrypted-buffer' to validate the 'auth-token'.
|
||||
// The scheme is ideal for usecases where payload represented by the encryptionHeader is not large and it is
|
||||
// desirable to minimize CPU/latency penalty due to crypto-secure ops, such as: CommitProxies encrypted inline
|
||||
// transactions, StorageServer encrypting pages etc. 2) MultiAuthTokenMode: Scheme generates separate authTokens
|
||||
// for 'encrypted buffer' & 'encryption-header'. The scheme is ideal where payload represented by
|
||||
// encryptionHeader is large enough such that it is desirable to optimize cost of upfront reading full
|
||||
// 'encrypted buffer', compared to reading only encryptionHeader and ensuring its sanity; for instance:
|
||||
// backup-files.
|
||||
// Encryption header support two modes of generation 'authentication tokens':
|
||||
// 1) SingleAuthTokenMode: the scheme generates single crypto-secrure auth token to protect {cipherText +
|
||||
// header} payload. Scheme is geared towards optimizing cost due to crypto-secure auth-token generation,
|
||||
// however, on decryption client needs to be read 'header' + 'encrypted-buffer' to validate the 'auth-token'.
|
||||
// The scheme is ideal for usecases where payload represented by the encryptionHeader is not large and it is
|
||||
// desirable to minimize CPU/latency penalty due to crypto-secure ops, such as: CommitProxies encrypted inline
|
||||
// transactions, StorageServer encrypting pages etc.
|
||||
// SOMEDAY: Another potential scheme could be 'MultiAuthTokenMode': Scheme generates separate authTokens
|
||||
// for 'encrypted buffer' & 'encryption-header'. The scheme is ideal where payload represented by
|
||||
// encryptionHeader is large enough such that it is desirable to optimize cost of upfront reading full
|
||||
// 'encrypted buffer', compared to reading only encryptionHeader and ensuring its sanity; for instance:
|
||||
// backup-files.
|
||||
|
||||
struct {
|
||||
// Cipher text authentication token
|
||||
uint8_t cipherTextAuthToken[AUTH_TOKEN_MAX_SIZE]{};
|
||||
uint8_t headerAuthToken[AUTH_TOKEN_MAX_SIZE]{};
|
||||
} multiAuthTokens;
|
||||
struct {
|
||||
uint8_t authToken[AUTH_TOKEN_MAX_SIZE]{};
|
||||
uint8_t _reserved[AUTH_TOKEN_MAX_SIZE]{};
|
||||
} singleAuthToken;
|
||||
};
|
||||
struct {
|
||||
uint8_t authToken[AUTH_TOKEN_MAX_SIZE]{};
|
||||
} singleAuthToken;
|
||||
|
||||
BlobCipherEncryptHeader() {}
|
||||
|
||||
|
@ -628,10 +623,6 @@ private:
|
|||
const int ciphertextLen,
|
||||
const BlobCipherEncryptHeader& header,
|
||||
Arena& arena);
|
||||
void verifyHeaderMultiAuthToken(const uint8_t* ciphertext,
|
||||
const int ciphertextLen,
|
||||
const BlobCipherEncryptHeader& header,
|
||||
Arena& arena);
|
||||
};
|
||||
|
||||
class HmacSha256DigestGen final : NonCopyable {
|
||||
|
|
|
@ -33,13 +33,15 @@ struct BlobConnectionProvider : NonCopyable, ReferenceCounted<BlobConnectionProv
|
|||
// something returned from createForWrite
|
||||
virtual Reference<BackupContainerFileSystem> getForRead(std::string filePath) = 0;
|
||||
|
||||
virtual bool isExpired() const = 0;
|
||||
virtual bool needsRefresh() const = 0;
|
||||
virtual void update(Standalone<BlobMetadataDetailsRef> newBlobMetadata) = 0;
|
||||
|
||||
virtual ~BlobConnectionProvider() {}
|
||||
|
||||
static Reference<BlobConnectionProvider> newBlobConnectionProvider(std::string blobUrl);
|
||||
|
||||
static Reference<BlobConnectionProvider> newBlobConnectionProvider(Standalone<BlobMetadataDetailsRef> blobMetadata);
|
||||
|
||||
// TODO add update impl
|
||||
};
|
||||
|
||||
#endif
|
|
@ -56,10 +56,18 @@ struct GranuleDeltas : VectorRef<MutationsAndVersionRef> {
|
|||
};
|
||||
|
||||
struct GranuleMaterializeStats {
|
||||
// file-level stats
|
||||
int64_t inputBytes;
|
||||
int64_t outputBytes;
|
||||
|
||||
GranuleMaterializeStats() : inputBytes(0), outputBytes(0) {}
|
||||
// merge stats
|
||||
int32_t snapshotRows;
|
||||
int32_t rowsCleared;
|
||||
int32_t rowsInserted;
|
||||
int32_t rowsUpdated;
|
||||
|
||||
GranuleMaterializeStats()
|
||||
: inputBytes(0), outputBytes(0), snapshotRows(0), rowsCleared(0), rowsInserted(0), rowsUpdated(0) {}
|
||||
};
|
||||
|
||||
struct BlobGranuleCipherKeysMeta {
|
||||
|
|
|
@ -51,7 +51,8 @@ RangeResult materializeBlobGranule(const BlobGranuleChunkRef& chunk,
|
|||
Version beginVersion,
|
||||
Version readVersion,
|
||||
Optional<StringRef> snapshotData,
|
||||
StringRef deltaFileData[]);
|
||||
StringRef deltaFileData[],
|
||||
GranuleMaterializeStats& stats);
|
||||
|
||||
std::string randomBGFilename(UID blobWorkerID, UID granuleID, Version version, std::string suffix);
|
||||
|
||||
|
|
|
@ -25,6 +25,8 @@
|
|||
#include "flow/FileIdentifier.h"
|
||||
|
||||
using BlobMetadataDomainId = int64_t;
|
||||
using BlobMetadataDomainNameRef = StringRef;
|
||||
using BlobMetadataDomainName = Standalone<BlobMetadataDomainNameRef>;
|
||||
|
||||
/*
|
||||
* There are 3 cases for blob metadata.
|
||||
|
@ -38,26 +40,54 @@ using BlobMetadataDomainId = int64_t;
|
|||
struct BlobMetadataDetailsRef {
|
||||
constexpr static FileIdentifier file_identifier = 6685526;
|
||||
BlobMetadataDomainId domainId;
|
||||
BlobMetadataDomainNameRef domainName;
|
||||
Optional<StringRef> base;
|
||||
VectorRef<StringRef> partitions;
|
||||
|
||||
// cache options
|
||||
double refreshAt;
|
||||
double expireAt;
|
||||
|
||||
BlobMetadataDetailsRef() {}
|
||||
BlobMetadataDetailsRef(Arena& arena, const BlobMetadataDetailsRef& from)
|
||||
: domainId(from.domainId), partitions(arena, from.partitions) {
|
||||
: domainId(from.domainId), domainName(arena, from.domainName), partitions(arena, from.partitions),
|
||||
refreshAt(from.refreshAt), expireAt(from.expireAt) {
|
||||
if (from.base.present()) {
|
||||
base = StringRef(arena, from.base.get());
|
||||
}
|
||||
}
|
||||
explicit BlobMetadataDetailsRef(BlobMetadataDomainId domainId,
|
||||
Optional<StringRef> base,
|
||||
VectorRef<StringRef> partitions)
|
||||
: domainId(domainId), base(base), partitions(partitions) {}
|
||||
|
||||
int expectedSize() const { return sizeof(BlobMetadataDetailsRef) + partitions.expectedSize(); }
|
||||
explicit BlobMetadataDetailsRef(Arena& ar,
|
||||
BlobMetadataDomainId domainId,
|
||||
BlobMetadataDomainNameRef domainName,
|
||||
Optional<StringRef> base,
|
||||
VectorRef<StringRef> partitions,
|
||||
int64_t refreshAt,
|
||||
int64_t expireAt)
|
||||
: domainId(domainId), domainName(ar, domainName), partitions(ar, partitions), refreshAt(refreshAt),
|
||||
expireAt(expireAt) {
|
||||
if (base.present()) {
|
||||
base = StringRef(ar, base.get());
|
||||
}
|
||||
}
|
||||
|
||||
explicit BlobMetadataDetailsRef(BlobMetadataDomainId domainId,
|
||||
BlobMetadataDomainNameRef domainName,
|
||||
Optional<StringRef> base,
|
||||
VectorRef<StringRef> partitions,
|
||||
double refreshAt,
|
||||
double expireAt)
|
||||
: domainId(domainId), domainName(domainName), base(base), partitions(partitions), refreshAt(refreshAt),
|
||||
expireAt(expireAt) {}
|
||||
|
||||
int expectedSize() const {
|
||||
return sizeof(BlobMetadataDetailsRef) + domainName.size() + (base.present() ? base.get().size() : 0) +
|
||||
partitions.expectedSize();
|
||||
}
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, domainId, base, partitions);
|
||||
serializer(ar, domainId, domainName, base, partitions, refreshAt, expireAt);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* BuildIdempotencyIdMutations.h
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef FDBCLIENT_BUILD_IDEMPOTENCY_ID_MUTATIONS_H
|
||||
#define FDBCLIENT_BUILD_IDEMPOTENCY_ID_MUTATIONS_H
|
||||
|
||||
#include "fdbclient/CommitProxyInterface.h"
|
||||
#include "fdbclient/IdempotencyId.h"
|
||||
|
||||
#pragma once
|
||||
|
||||
// Iterate through trs looking for idempotency ids for committed transactions. Call onKvReady for each constructed key
|
||||
// value pair.
|
||||
template <class OnKVReady>
|
||||
void buildIdempotencyIdMutations(const std::vector<CommitTransactionRequest>& trs,
|
||||
IdempotencyIdKVBuilder& idempotencyKVBuilder,
|
||||
Version commitVersion,
|
||||
const std::vector<uint8_t>& committed,
|
||||
uint8_t committedValue,
|
||||
bool locked,
|
||||
const OnKVReady& onKvReady) {
|
||||
idempotencyKVBuilder.setCommitVersion(commitVersion);
|
||||
for (int h = 0; h < trs.size(); h += 256) {
|
||||
int end = std::min<int>(trs.size() - h, 256);
|
||||
for (int l = 0; l < end; ++l) {
|
||||
uint16_t batchIndex = h + l;
|
||||
if ((committed[batchIndex] == committedValue && (!locked || trs[batchIndex].isLockAware()))) {
|
||||
const auto& idempotency_id = trs[batchIndex].idempotencyId;
|
||||
if (idempotency_id.valid()) {
|
||||
idempotencyKVBuilder.add(idempotency_id, batchIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
Optional<KeyValue> kv = idempotencyKVBuilder.buildAndClear();
|
||||
if (kv.present()) {
|
||||
onKvReady(kv.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
|
@ -199,6 +199,7 @@ public:
|
|||
int32_t DEFAULT_MAX_GRV_PROXIES;
|
||||
int32_t DEFAULT_AUTO_RESOLVERS;
|
||||
int32_t DEFAULT_AUTO_LOGS;
|
||||
bool DELETE_NATIVE_LIB_AFTER_LOADING;
|
||||
|
||||
double GLOBAL_CONFIG_REFRESH_BACKOFF;
|
||||
double GLOBAL_CONFIG_REFRESH_MAX_BACKOFF;
|
||||
|
|
|
@ -40,6 +40,7 @@ struct ClusterInterface {
|
|||
RequestStream<struct MoveShardRequest> moveShard;
|
||||
RequestStream<struct RepairSystemDataRequest> repairSystemData;
|
||||
RequestStream<struct SplitShardRequest> splitShard;
|
||||
RequestStream<struct TriggerAuditRequest> triggerAudit;
|
||||
|
||||
bool operator==(ClusterInterface const& r) const { return id() == r.id(); }
|
||||
bool operator!=(ClusterInterface const& r) const { return id() != r.id(); }
|
||||
|
@ -51,7 +52,7 @@ struct ClusterInterface {
|
|||
databaseStatus.getFuture().isReady() || ping.getFuture().isReady() ||
|
||||
getClientWorkers.getFuture().isReady() || forceRecovery.getFuture().isReady() ||
|
||||
moveShard.getFuture().isReady() || repairSystemData.getFuture().isReady() ||
|
||||
splitShard.getFuture().isReady();
|
||||
splitShard.getFuture().isReady() || triggerAudit.getFuture().isReady();
|
||||
}
|
||||
|
||||
void initEndpoints() {
|
||||
|
@ -64,6 +65,7 @@ struct ClusterInterface {
|
|||
moveShard.getEndpoint(TaskPriority::ClusterController);
|
||||
repairSystemData.getEndpoint(TaskPriority::ClusterController);
|
||||
splitShard.getEndpoint(TaskPriority::ClusterController);
|
||||
triggerAudit.getEndpoint(TaskPriority::ClusterController);
|
||||
}
|
||||
|
||||
template <class Ar>
|
||||
|
@ -77,7 +79,8 @@ struct ClusterInterface {
|
|||
forceRecovery,
|
||||
moveShard,
|
||||
repairSystemData,
|
||||
splitShard);
|
||||
splitShard,
|
||||
triggerAudit);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "fdbclient/FDBTypes.h"
|
||||
#include "fdbclient/GlobalConfig.h"
|
||||
#include "fdbclient/GrvProxyInterface.h"
|
||||
#include "fdbclient/IdempotencyId.h"
|
||||
#include "fdbclient/StorageServerInterface.h"
|
||||
#include "fdbclient/TagThrottle.actor.h"
|
||||
#include "fdbclient/VersionVector.h"
|
||||
|
@ -186,6 +187,7 @@ struct CommitTransactionRequest : TimedRequest {
|
|||
Optional<UID> debugID;
|
||||
Optional<ClientTrCommitCostEstimation> commitCostEstimation;
|
||||
Optional<TagSet> tagSet;
|
||||
IdempotencyIdRef idempotencyId;
|
||||
|
||||
TenantInfo tenantInfo;
|
||||
|
||||
|
@ -196,8 +198,17 @@ struct CommitTransactionRequest : TimedRequest {
|
|||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(
|
||||
ar, transaction, reply, flags, debugID, commitCostEstimation, tagSet, spanContext, tenantInfo, arena);
|
||||
serializer(ar,
|
||||
transaction,
|
||||
reply,
|
||||
flags,
|
||||
debugID,
|
||||
commitCostEstimation,
|
||||
tagSet,
|
||||
spanContext,
|
||||
tenantInfo,
|
||||
idempotencyId,
|
||||
arena);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -224,6 +235,7 @@ struct GetReadVersionReply : public BasicLoadBalancedReply {
|
|||
bool rkBatchThrottled = false;
|
||||
|
||||
TransactionTagMap<ClientTagThrottleLimits> tagThrottleInfo;
|
||||
double proxyTagThrottledDuration{ 0.0 };
|
||||
|
||||
VersionVector ssVersionVectorDelta;
|
||||
UID proxyId; // GRV proxy ID to detect old GRV proxies at client side
|
||||
|
@ -242,7 +254,8 @@ struct GetReadVersionReply : public BasicLoadBalancedReply {
|
|||
rkDefaultThrottled,
|
||||
rkBatchThrottled,
|
||||
ssVersionVectorDelta,
|
||||
proxyId);
|
||||
proxyId,
|
||||
proxyTagThrottledDuration);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -267,6 +280,10 @@ struct GetReadVersionRequest : TimedRequest {
|
|||
TransactionPriority priority;
|
||||
|
||||
TransactionTagMap<uint32_t> tags;
|
||||
// Not serialized, because this field does not need to be sent to master.
|
||||
// It is used for reporting to clients the amount of time spent delayed by
|
||||
// the TagQueue
|
||||
double proxyTagThrottledDuration{ 0.0 };
|
||||
|
||||
Optional<UID> debugID;
|
||||
ReplyPromise<GetReadVersionReply> reply;
|
||||
|
@ -303,6 +320,8 @@ struct GetReadVersionRequest : TimedRequest {
|
|||
|
||||
bool operator<(GetReadVersionRequest const& rhs) const { return priority < rhs.priority; }
|
||||
|
||||
bool isTagged() const { return !tags.empty(); }
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, transactionCount, flags, tags, debugID, reply, spanContext, maxVersion);
|
||||
|
|
|
@ -298,13 +298,19 @@ public:
|
|||
Future<Void> onProxiesChanged() const;
|
||||
Future<HealthMetrics> getHealthMetrics(bool detailed);
|
||||
// Pass a negative value for `shardLimit` to indicate no limit on the shard number.
|
||||
Future<StorageMetrics> getStorageMetrics(KeyRange const& keys, int shardLimit);
|
||||
Future<std::pair<Optional<StorageMetrics>, int>> waitStorageMetrics(KeyRange const& keys,
|
||||
StorageMetrics const& min,
|
||||
StorageMetrics const& max,
|
||||
StorageMetrics const& permittedError,
|
||||
int shardLimit,
|
||||
int expectedShardCount);
|
||||
// Pass a valid `trState` with `hasTenant() == true` to make the function tenant-aware.
|
||||
Future<StorageMetrics> getStorageMetrics(
|
||||
KeyRange const& keys,
|
||||
int shardLimit,
|
||||
Optional<Reference<TransactionState>> trState = Optional<Reference<TransactionState>>());
|
||||
Future<std::pair<Optional<StorageMetrics>, int>> waitStorageMetrics(
|
||||
KeyRange const& keys,
|
||||
StorageMetrics const& min,
|
||||
StorageMetrics const& max,
|
||||
StorageMetrics const& permittedError,
|
||||
int shardLimit,
|
||||
int expectedShardCount,
|
||||
Optional<Reference<TransactionState>> trState = Optional<Reference<TransactionState>>());
|
||||
Future<Void> splitStorageMetricsStream(PromiseStream<Key> const& resultsStream,
|
||||
KeyRange const& keys,
|
||||
StorageMetrics const& limit,
|
||||
|
@ -388,10 +394,14 @@ public:
|
|||
bool force = false);
|
||||
Future<Void> waitPurgeGranulesComplete(Key purgeKey);
|
||||
|
||||
Future<bool> blobbifyRange(KeyRange range);
|
||||
Future<bool> unblobbifyRange(KeyRange range);
|
||||
Future<Standalone<VectorRef<KeyRangeRef>>> listBlobbifiedRanges(KeyRange range, int rangeLimit);
|
||||
Future<Version> verifyBlobRange(const KeyRange& range, Optional<Version> version);
|
||||
Future<bool> blobbifyRange(KeyRange range, Optional<TenantName> tenantName = {});
|
||||
Future<bool> unblobbifyRange(KeyRange range, Optional<TenantName> tenantName = {});
|
||||
Future<Standalone<VectorRef<KeyRangeRef>>> listBlobbifiedRanges(KeyRange range,
|
||||
int rangeLimit,
|
||||
Optional<TenantName> tenantName = {});
|
||||
Future<Version> verifyBlobRange(const KeyRange& range,
|
||||
Optional<Version> version,
|
||||
Optional<TenantName> tenantName = {});
|
||||
|
||||
// private:
|
||||
explicit DatabaseContext(Reference<AsyncVar<Reference<IClusterConnectionRecord>>> connectionRecord,
|
||||
|
@ -544,8 +554,17 @@ public:
|
|||
Counter transactionGrvFullBatches;
|
||||
Counter transactionGrvTimedOutBatches;
|
||||
Counter transactionCommitVersionNotFoundForSS;
|
||||
|
||||
// Blob Granule Read metrics. Omit from logging if not used.
|
||||
bool anyBGReads;
|
||||
CounterCollection ccBG;
|
||||
Counter bgReadInputBytes;
|
||||
Counter bgReadOutputBytes;
|
||||
Counter bgReadSnapshotRows;
|
||||
Counter bgReadRowsCleared;
|
||||
Counter bgReadRowsInserted;
|
||||
Counter bgReadRowsUpdated;
|
||||
ContinuousSample<double> bgLatencies, bgGranulesPerRequest;
|
||||
|
||||
// Change Feed metrics. Omit change feed metrics from logging if not used
|
||||
bool usedAnyChangeFeeds;
|
||||
|
@ -558,7 +577,7 @@ public:
|
|||
Counter feedPopsFallback;
|
||||
|
||||
ContinuousSample<double> latencies, readLatencies, commitLatencies, GRVLatencies, mutationsPerCommit,
|
||||
bytesPerCommit, bgLatencies, bgGranulesPerRequest;
|
||||
bytesPerCommit;
|
||||
|
||||
int outstandingWatches;
|
||||
int maxOutstandingWatches;
|
||||
|
@ -587,7 +606,6 @@ public:
|
|||
bool transactionTracingSample;
|
||||
double verifyCausalReadsProp = 0.0;
|
||||
bool blobGranuleNoMaterialize = false;
|
||||
bool anyBlobGranuleRequests = false;
|
||||
|
||||
Future<Void> logger;
|
||||
Future<Void> throttleExpirer;
|
||||
|
|
|
@ -197,6 +197,7 @@ struct EKPGetLatestBaseCipherKeysReply {
|
|||
}
|
||||
};
|
||||
|
||||
// TODO: also used for blob metadata, fix name
|
||||
struct EKPGetLatestCipherKeysRequestInfo {
|
||||
constexpr static FileIdentifier file_identifier = 2180516;
|
||||
// Encryption domain identifier
|
||||
|
@ -206,7 +207,7 @@ struct EKPGetLatestCipherKeysRequestInfo {
|
|||
EncryptCipherDomainNameRef domainName;
|
||||
|
||||
EKPGetLatestCipherKeysRequestInfo() : domainId(INVALID_ENCRYPT_DOMAIN_ID) {}
|
||||
EKPGetLatestCipherKeysRequestInfo(const EncryptCipherDomainId dId, StringRef name, Arena& arena)
|
||||
explicit EKPGetLatestCipherKeysRequestInfo(Arena& arena, const EncryptCipherDomainId dId, StringRef name)
|
||||
: domainId(dId), domainName(StringRef(arena, name)) {}
|
||||
|
||||
bool operator==(const EKPGetLatestCipherKeysRequestInfo& info) const {
|
||||
|
@ -261,16 +262,15 @@ struct EKPGetLatestBlobMetadataReply {
|
|||
|
||||
struct EKPGetLatestBlobMetadataRequest {
|
||||
constexpr static FileIdentifier file_identifier = 3821549;
|
||||
std::vector<BlobMetadataDomainId> domainIds;
|
||||
Standalone<VectorRef<EKPGetLatestCipherKeysRequestInfo>> domainInfos;
|
||||
Optional<UID> debugId;
|
||||
ReplyPromise<EKPGetLatestBlobMetadataReply> reply;
|
||||
|
||||
EKPGetLatestBlobMetadataRequest() {}
|
||||
explicit EKPGetLatestBlobMetadataRequest(const std::vector<BlobMetadataDomainId>& ids) : domainIds(ids) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, domainIds, debugId, reply);
|
||||
serializer(ar, domainInfos, debugId, reply);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -1669,8 +1669,8 @@ struct Versionstamp {
|
|||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
int64_t beVersion;
|
||||
int16_t beBatch;
|
||||
uint64_t beVersion;
|
||||
uint16_t beBatch;
|
||||
|
||||
if constexpr (!Ar::isDeserializing) {
|
||||
beVersion = bigEndian64(version);
|
||||
|
@ -1686,4 +1686,14 @@ struct Versionstamp {
|
|||
}
|
||||
};
|
||||
|
||||
template <class Ar>
|
||||
inline void save(Ar& ar, const Versionstamp& value) {
|
||||
return const_cast<Versionstamp&>(value).serialize(ar);
|
||||
}
|
||||
|
||||
template <class Ar>
|
||||
inline void load(Ar& ar, Versionstamp& value) {
|
||||
value.serialize(ar);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -59,11 +59,12 @@ Future<Void> onEncryptKeyProxyChange(Reference<AsyncVar<T> const> db) {
|
|||
|
||||
ACTOR template <class T>
|
||||
Future<EKPGetLatestBaseCipherKeysReply> getUncachedLatestEncryptCipherKeys(Reference<AsyncVar<T> const> db,
|
||||
EKPGetLatestBaseCipherKeysRequest request) {
|
||||
EKPGetLatestBaseCipherKeysRequest request,
|
||||
BlobCipherMetrics::UsageType usageType) {
|
||||
Optional<EncryptKeyProxyInterface> proxy = db->get().encryptKeyProxy;
|
||||
if (!proxy.present()) {
|
||||
// Wait for onEncryptKeyProxyChange.
|
||||
TraceEvent("GetLatestEncryptCipherKeys_EncryptKeyProxyNotPresent");
|
||||
TraceEvent("GetLatestEncryptCipherKeys_EncryptKeyProxyNotPresent").detail("UsageType", toString(usageType));
|
||||
return Never();
|
||||
}
|
||||
request.reply.reset();
|
||||
|
@ -108,7 +109,7 @@ Future<std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>>> getL
|
|||
cipherKeys[domain.first] = cachedCipherKey;
|
||||
} else {
|
||||
request.encryptDomainInfos.emplace_back(
|
||||
domain.first /*domainId*/, domain.second /*domainName*/, request.arena);
|
||||
request.arena, domain.first /*domainId*/, domain.second /*domainName*/);
|
||||
}
|
||||
}
|
||||
if (request.encryptDomainInfos.empty()) {
|
||||
|
@ -117,7 +118,7 @@ Future<std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>>> getL
|
|||
// Fetch any uncached cipher keys.
|
||||
state double startTime = now();
|
||||
loop choose {
|
||||
when(EKPGetLatestBaseCipherKeysReply reply = wait(getUncachedLatestEncryptCipherKeys(db, request))) {
|
||||
when(EKPGetLatestBaseCipherKeysReply reply = wait(getUncachedLatestEncryptCipherKeys(db, request, usageType))) {
|
||||
// Insert base cipher keys into cache and construct result.
|
||||
for (const EKPBaseCipherDetails& details : reply.baseCipherDetails) {
|
||||
EncryptCipherDomainId domainId = details.encryptDomainId;
|
||||
|
@ -167,11 +168,12 @@ Future<Reference<BlobCipherKey>> getLatestEncryptCipherKey(Reference<AsyncVar<T>
|
|||
|
||||
ACTOR template <class T>
|
||||
Future<EKPGetBaseCipherKeysByIdsReply> getUncachedEncryptCipherKeys(Reference<AsyncVar<T> const> db,
|
||||
EKPGetBaseCipherKeysByIdsRequest request) {
|
||||
EKPGetBaseCipherKeysByIdsRequest request,
|
||||
BlobCipherMetrics::UsageType usageType) {
|
||||
Optional<EncryptKeyProxyInterface> proxy = db->get().encryptKeyProxy;
|
||||
if (!proxy.present()) {
|
||||
// Wait for onEncryptKeyProxyChange.
|
||||
TraceEvent("GetEncryptCipherKeys_EncryptKeyProxyNotPresent");
|
||||
TraceEvent("GetEncryptCipherKeys_EncryptKeyProxyNotPresent").detail("UsageType", toString(usageType));
|
||||
return Never();
|
||||
}
|
||||
request.reply.reset();
|
||||
|
@ -232,7 +234,7 @@ Future<std::unordered_map<BlobCipherDetails, Reference<BlobCipherKey>>> getEncry
|
|||
// Fetch any uncached cipher keys.
|
||||
state double startTime = now();
|
||||
loop choose {
|
||||
when(EKPGetBaseCipherKeysByIdsReply reply = wait(getUncachedEncryptCipherKeys(db, request))) {
|
||||
when(EKPGetBaseCipherKeysByIdsReply reply = wait(getUncachedEncryptCipherKeys(db, request, usageType))) {
|
||||
std::unordered_map<BaseCipherIndex, EKPBaseCipherDetails, boost::hash<BaseCipherIndex>> baseCipherKeys;
|
||||
for (const EKPBaseCipherDetails& baseDetails : reply.baseCipherDetails) {
|
||||
BaseCipherIndex baseIdx = std::make_pair(baseDetails.encryptDomainId, baseDetails.baseCipherId);
|
||||
|
|
|
@ -150,6 +150,13 @@ public:
|
|||
virtual ThreadFuture<Key> purgeBlobGranules(const KeyRangeRef& keyRange, Version purgeVersion, bool force) = 0;
|
||||
virtual ThreadFuture<Void> waitPurgeGranulesComplete(const KeyRef& purgeKey) = 0;
|
||||
|
||||
virtual ThreadFuture<bool> blobbifyRange(const KeyRangeRef& keyRange) = 0;
|
||||
virtual ThreadFuture<bool> unblobbifyRange(const KeyRangeRef& keyRange) = 0;
|
||||
virtual ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> listBlobbifiedRanges(const KeyRangeRef& keyRange,
|
||||
int rangeLimit) = 0;
|
||||
|
||||
virtual ThreadFuture<Version> verifyBlobRange(const KeyRangeRef& keyRange, Optional<Version> version) = 0;
|
||||
|
||||
virtual void addref() = 0;
|
||||
virtual void delref() = 0;
|
||||
};
|
||||
|
|
|
@ -0,0 +1,166 @@
|
|||
/*
|
||||
* IdempotencyId.h
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef FDBCLIENT_IDEMPOTENCYID_H
|
||||
#define FDBCLIENT_IDEMPOTENCYID_H
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "fdbclient/FDBTypes.h"
|
||||
#include "fdbclient/PImpl.h"
|
||||
#include "flow/Arena.h"
|
||||
#include "flow/IRandom.h"
|
||||
#include "flow/serialize.h"
|
||||
|
||||
struct CommitResult {
|
||||
Version commitVersion;
|
||||
uint16_t batchIndex;
|
||||
};
|
||||
|
||||
// See design/idempotency_ids.md for more information. Designed so that the common case of a random 16 byte id does not
|
||||
// usually require indirection. Either invalid or an id with length >= 16 and < 256.
|
||||
struct IdempotencyIdRef {
|
||||
static constexpr auto file_identifier = 3858470;
|
||||
|
||||
// Create an invalid IdempotencyIdRef
|
||||
IdempotencyIdRef() : first(0) {}
|
||||
|
||||
// Borrows memory from the StringRef
|
||||
explicit IdempotencyIdRef(StringRef id) {
|
||||
if (id.empty()) {
|
||||
first = 0;
|
||||
return;
|
||||
}
|
||||
ASSERT(id.size() >= 16);
|
||||
ASSERT(id.size() < 256);
|
||||
if (id.size() == 16 &&
|
||||
/* If it's 16 bytes but first < 256 we still need to use an indirection to avoid ambiguity. */
|
||||
reinterpret_cast<const uint64_t*>(id.begin())[0] >= 256) {
|
||||
first = reinterpret_cast<const uint64_t*>(id.begin())[0];
|
||||
second.id = reinterpret_cast<const uint64_t*>(id.begin())[1];
|
||||
} else {
|
||||
first = id.size();
|
||||
second.ptr = id.begin();
|
||||
}
|
||||
}
|
||||
|
||||
IdempotencyIdRef(Arena& arena, IdempotencyIdRef t)
|
||||
: IdempotencyIdRef(t.valid() && t.indirect() ? StringRef(arena, t.asStringRefUnsafe()) : t.asStringRefUnsafe()) {}
|
||||
|
||||
int expectedSize() const {
|
||||
if (valid() && indirect()) {
|
||||
return first;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool operator==(const IdempotencyIdRef& other) const { return asStringRefUnsafe() == other.asStringRefUnsafe(); }
|
||||
|
||||
IdempotencyIdRef(IdempotencyIdRef&& other) = default;
|
||||
IdempotencyIdRef& operator=(IdempotencyIdRef&& other) = default;
|
||||
IdempotencyIdRef(const IdempotencyIdRef& other) = default;
|
||||
IdempotencyIdRef& operator=(const IdempotencyIdRef& other) = default;
|
||||
|
||||
template <class Archive>
|
||||
void serialize(Archive& ar) {
|
||||
// Only support network messages/object serializer for now
|
||||
ASSERT(false);
|
||||
}
|
||||
|
||||
bool valid() const { return first != 0; }
|
||||
|
||||
// Result may reference this, so *this must outlive result.
|
||||
StringRef asStringRefUnsafe() const {
|
||||
if (!valid()) {
|
||||
return StringRef();
|
||||
}
|
||||
if (indirect()) {
|
||||
return StringRef(second.ptr, first);
|
||||
} else {
|
||||
return StringRef(reinterpret_cast<const uint8_t*>(this), sizeof(*this));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
bool indirect() const { return first < 256; }
|
||||
// first == 0 means this id is invalid. This representation is not ambiguous
|
||||
// because if first < 256, then first is the length of the id, but a valid
|
||||
// id as at least 16 bytes long.
|
||||
uint64_t first;
|
||||
union {
|
||||
uint64_t id;
|
||||
const uint8_t* ptr;
|
||||
} second; // If first < 256, then ptr is valid. Otherwise id is valid.
|
||||
};
|
||||
|
||||
using IdempotencyId = Standalone<IdempotencyIdRef>;
|
||||
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<IdempotencyIdRef> {
|
||||
std::size_t operator()(const IdempotencyIdRef& id) const { return std::hash<StringRef>{}(id.asStringRefUnsafe()); }
|
||||
};
|
||||
template <>
|
||||
struct hash<IdempotencyId> {
|
||||
std::size_t operator()(const IdempotencyId& id) const { return std::hash<StringRef>{}(id.asStringRefUnsafe()); }
|
||||
};
|
||||
} // namespace std
|
||||
|
||||
template <>
|
||||
struct dynamic_size_traits<IdempotencyIdRef> : std::true_type {
|
||||
template <class Context>
|
||||
static size_t size(const IdempotencyIdRef& t, Context&) {
|
||||
return t.asStringRefUnsafe().size();
|
||||
}
|
||||
template <class Context>
|
||||
static void save(uint8_t* out, const IdempotencyIdRef& t, Context&) {
|
||||
StringRef s = t.asStringRefUnsafe();
|
||||
std::copy(s.begin(), s.end(), out);
|
||||
}
|
||||
|
||||
template <class Context>
|
||||
static void load(const uint8_t* ptr, size_t sz, IdempotencyIdRef& id, Context& context) {
|
||||
id = IdempotencyIdRef(StringRef(context.tryReadZeroCopy(ptr, sz), sz));
|
||||
}
|
||||
};
|
||||
|
||||
// The plan is to use this as a key in a potentially large hashtable, so it should be compact.
|
||||
static_assert(sizeof(IdempotencyIdRef) == 16);
|
||||
|
||||
// Use in the commit proxy to construct a kv pair according to the format described in design/idempotency_ids.md
|
||||
struct IdempotencyIdKVBuilder : NonCopyable {
|
||||
IdempotencyIdKVBuilder();
|
||||
void setCommitVersion(Version commitVersion);
|
||||
// All calls to add must share the same high order byte of batchIndex (until the next call to buildAndClear)
|
||||
void add(const IdempotencyIdRef& id, uint16_t batchIndex);
|
||||
// Must call setCommitVersion before calling buildAndClear. After calling buildAndClear, this object is ready to
|
||||
// start a new kv pair for the high order byte of batchIndex.
|
||||
Optional<KeyValue> buildAndClear();
|
||||
|
||||
~IdempotencyIdKVBuilder();
|
||||
|
||||
private:
|
||||
PImpl<struct IdempotencyIdKVBuilderImpl> impl;
|
||||
};
|
||||
|
||||
// Check if id is present in kv, and if so return the commit version and batchIndex
|
||||
Optional<CommitResult> kvContainsIdempotencyId(const KeyValueRef& kv, const IdempotencyIdRef& id);
|
||||
|
||||
#endif
|
|
@ -319,6 +319,11 @@ public:
|
|||
tr->clear(key);
|
||||
}
|
||||
|
||||
template <class Transaction>
|
||||
Future<Void> watch(Transaction tr) {
|
||||
return tr->watch(key);
|
||||
}
|
||||
|
||||
Key key;
|
||||
};
|
||||
|
||||
|
|
|
@ -138,6 +138,9 @@ ACTOR Future<int> setDDMode(Database cx, int mode);
|
|||
|
||||
ACTOR Future<Void> forceRecovery(Reference<IClusterConnectionRecord> clusterFile, Standalone<StringRef> dcId);
|
||||
|
||||
// Start an audit on range of the specific type.
|
||||
ACTOR Future<UID> auditStorage(Reference<IClusterConnectionRecord> clusterFile, KeyRange range, AuditType type);
|
||||
|
||||
ACTOR Future<Void> printHealthyZone(Database cx);
|
||||
ACTOR Future<bool> clearHealthyZone(Database cx, bool printWarning = false, bool clearSSFailureZoneString = false);
|
||||
ACTOR Future<bool> setHealthyZone(Database cx, StringRef zoneId, double seconds, bool printWarning = false);
|
||||
|
|
|
@ -502,6 +502,7 @@ Future<Void> decommissionMetacluster(Reference<DB> db) {
|
|||
ManagementClusterMetadata::tenantMetadata().lastTenantId.clear(tr);
|
||||
ManagementClusterMetadata::tenantMetadata().tenantTombstones.clear(tr);
|
||||
ManagementClusterMetadata::tenantMetadata().tombstoneCleanupData.clear(tr);
|
||||
ManagementClusterMetadata::tenantMetadata().lastTenantModification.clear(tr);
|
||||
|
||||
wait(managementClusterCheckEmpty(tr));
|
||||
MetaclusterMetadata::metaclusterRegistration().clear(tr);
|
||||
|
@ -797,6 +798,7 @@ struct RemoveClusterImpl {
|
|||
ASSERT(entry.getString(0) == self->ctx.clusterName.get());
|
||||
ManagementClusterMetadata::tenantMetadata().tenantMap.erase(tr, entry.getString(1));
|
||||
ManagementClusterMetadata::tenantMetadata().tenantIdIndex.erase(tr, entry.getInt(2));
|
||||
ManagementClusterMetadata::tenantMetadata().lastTenantModification.setVersionstamp(tr, Versionstamp(), 0);
|
||||
}
|
||||
|
||||
// Erase all of the tenants processed in this transaction from the cluster tenant index
|
||||
|
@ -1262,6 +1264,7 @@ struct CreateTenantImpl {
|
|||
self->tenantEntry.tenantState = TenantState::REGISTERING;
|
||||
ManagementClusterMetadata::tenantMetadata().tenantMap.set(tr, self->tenantName, self->tenantEntry);
|
||||
ManagementClusterMetadata::tenantMetadata().tenantIdIndex.set(tr, self->tenantEntry.id, self->tenantName);
|
||||
ManagementClusterMetadata::tenantMetadata().lastTenantModification.setVersionstamp(tr, Versionstamp(), 0);
|
||||
|
||||
ManagementClusterMetadata::tenantMetadata().tenantCount.atomicOp(tr, 1, MutationRef::AddValue);
|
||||
ManagementClusterMetadata::clusterTenantCount.atomicOp(
|
||||
|
@ -1317,6 +1320,7 @@ struct CreateTenantImpl {
|
|||
TenantMapEntry updatedEntry = managementEntry.get();
|
||||
updatedEntry.tenantState = TenantState::READY;
|
||||
ManagementClusterMetadata::tenantMetadata().tenantMap.set(tr, self->tenantName, updatedEntry);
|
||||
ManagementClusterMetadata::tenantMetadata().lastTenantModification.setVersionstamp(tr, Versionstamp(), 0);
|
||||
}
|
||||
|
||||
return Void();
|
||||
|
@ -1446,6 +1450,7 @@ struct DeleteTenantImpl {
|
|||
}
|
||||
updatedEntry.tenantState = TenantState::REMOVING;
|
||||
ManagementClusterMetadata::tenantMetadata().tenantMap.set(tr, self->tenantName, updatedEntry);
|
||||
ManagementClusterMetadata::tenantMetadata().lastTenantModification.setVersionstamp(tr, Versionstamp(), 0);
|
||||
// If this has a rename pair, also mark the other entry for deletion
|
||||
if (self->pairName.present()) {
|
||||
state Optional<TenantMapEntry> pairEntry = wait(tryGetTenantTransaction(tr, self->pairName.get()));
|
||||
|
@ -1457,6 +1462,8 @@ struct DeleteTenantImpl {
|
|||
CODE_PROBE(true, "marking pair tenant in removing state");
|
||||
updatedPairEntry.tenantState = TenantState::REMOVING;
|
||||
ManagementClusterMetadata::tenantMetadata().tenantMap.set(tr, self->pairName.get(), updatedPairEntry);
|
||||
ManagementClusterMetadata::tenantMetadata().lastTenantModification.setVersionstamp(
|
||||
tr, Versionstamp(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1485,6 +1492,7 @@ struct DeleteTenantImpl {
|
|||
// Erase the tenant entry itself
|
||||
ManagementClusterMetadata::tenantMetadata().tenantMap.erase(tr, tenantName);
|
||||
ManagementClusterMetadata::tenantMetadata().tenantIdIndex.erase(tr, tenantEntry.get().id);
|
||||
ManagementClusterMetadata::tenantMetadata().lastTenantModification.setVersionstamp(tr, Versionstamp(), 0);
|
||||
|
||||
// This is idempotent because this function is only called if the tenant is in the map
|
||||
ManagementClusterMetadata::tenantMetadata().tenantCount.atomicOp(tr, -1, MutationRef::AddValue);
|
||||
|
@ -1689,6 +1697,7 @@ struct ConfigureTenantImpl {
|
|||
|
||||
++self->updatedEntry.configurationSequenceNum;
|
||||
ManagementClusterMetadata::tenantMetadata().tenantMap.set(tr, self->tenantName, self->updatedEntry);
|
||||
ManagementClusterMetadata::tenantMetadata().lastTenantModification.setVersionstamp(tr, Versionstamp(), 0);
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
@ -1724,6 +1733,7 @@ struct ConfigureTenantImpl {
|
|||
|
||||
tenantEntry.get().tenantState = TenantState::READY;
|
||||
ManagementClusterMetadata::tenantMetadata().tenantMap.set(tr, self->tenantName, tenantEntry.get());
|
||||
ManagementClusterMetadata::tenantMetadata().lastTenantModification.setVersionstamp(tr, Versionstamp(), 0);
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
@ -1770,6 +1780,7 @@ struct RenameTenantImpl {
|
|||
TenantMapEntry tenantEntry) {
|
||||
// Erase the tenant entry itself
|
||||
ManagementClusterMetadata::tenantMetadata().tenantMap.erase(tr, self->oldName);
|
||||
ManagementClusterMetadata::tenantMetadata().lastTenantModification.setVersionstamp(tr, Versionstamp(), 0);
|
||||
|
||||
// Remove old tenant from tenant count
|
||||
ManagementClusterMetadata::tenantMetadata().tenantCount.atomicOp(tr, -1, MutationRef::AddValue);
|
||||
|
@ -1857,6 +1868,7 @@ struct RenameTenantImpl {
|
|||
|
||||
ManagementClusterMetadata::tenantMetadata().tenantMap.set(tr, self->oldName, updatedOldEntry);
|
||||
ManagementClusterMetadata::tenantMetadata().tenantMap.set(tr, self->newName, updatedNewEntry);
|
||||
ManagementClusterMetadata::tenantMetadata().lastTenantModification.setVersionstamp(tr, Versionstamp(), 0);
|
||||
|
||||
// Add temporary tenant to tenantCount to prevent exceeding capacity during a rename
|
||||
ManagementClusterMetadata::tenantMetadata().tenantCount.atomicOp(tr, 1, MutationRef::AddValue);
|
||||
|
@ -1919,6 +1931,7 @@ struct RenameTenantImpl {
|
|||
updatedNewEntry.renamePair.reset();
|
||||
ManagementClusterMetadata::tenantMetadata().tenantMap.set(tr, self->newName, updatedNewEntry);
|
||||
ManagementClusterMetadata::tenantMetadata().tenantIdIndex.set(tr, self->tenantId, self->newName);
|
||||
ManagementClusterMetadata::tenantMetadata().lastTenantModification.setVersionstamp(tr, Versionstamp(), 0);
|
||||
}
|
||||
|
||||
// We will remove the old entry from the management cluster
|
||||
|
|
|
@ -221,6 +221,32 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
|
|||
uint8_t const* purge_key_name,
|
||||
int purge_key_name_length);
|
||||
|
||||
FDBFuture* (*tenantBlobbifyRange)(FDBTenant* tenant,
|
||||
uint8_t const* begin_key_name,
|
||||
int begin_key_name_length,
|
||||
uint8_t const* end_key_name,
|
||||
int end_key_name_length);
|
||||
|
||||
FDBFuture* (*tenantUnblobbifyRange)(FDBTenant* tenant,
|
||||
uint8_t const* begin_key_name,
|
||||
int begin_key_name_length,
|
||||
uint8_t const* end_key_name,
|
||||
int end_key_name_length);
|
||||
|
||||
FDBFuture* (*tenantListBlobbifiedRanges)(FDBTenant* tenant,
|
||||
uint8_t const* begin_key_name,
|
||||
int begin_key_name_length,
|
||||
uint8_t const* end_key_name,
|
||||
int end_key_name_length,
|
||||
int rangeLimit);
|
||||
|
||||
FDBFuture* (*tenantVerifyBlobRange)(FDBTenant* tenant,
|
||||
uint8_t const* begin_key_name,
|
||||
int begin_key_name_length,
|
||||
uint8_t const* end_key_name,
|
||||
int end_key_name_length,
|
||||
int64_t version);
|
||||
|
||||
void (*tenantDestroy)(FDBTenant* tenant);
|
||||
|
||||
// Transaction
|
||||
|
@ -513,6 +539,13 @@ public:
|
|||
ThreadFuture<Key> purgeBlobGranules(const KeyRangeRef& keyRange, Version purgeVersion, bool force) override;
|
||||
ThreadFuture<Void> waitPurgeGranulesComplete(const KeyRef& purgeKey) override;
|
||||
|
||||
ThreadFuture<bool> blobbifyRange(const KeyRangeRef& keyRange) override;
|
||||
ThreadFuture<bool> unblobbifyRange(const KeyRangeRef& keyRange) override;
|
||||
ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> listBlobbifiedRanges(const KeyRangeRef& keyRange,
|
||||
int rangeLimit) override;
|
||||
|
||||
ThreadFuture<Version> verifyBlobRange(const KeyRangeRef& keyRange, Optional<Version> version) override;
|
||||
|
||||
void addref() override { ThreadSafeReferenceCounted<DLTenant>::addref(); }
|
||||
void delref() override { ThreadSafeReferenceCounted<DLTenant>::delref(); }
|
||||
|
||||
|
@ -560,6 +593,7 @@ public:
|
|||
ThreadFuture<bool> unblobbifyRange(const KeyRangeRef& keyRange) override;
|
||||
ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> listBlobbifiedRanges(const KeyRangeRef& keyRange,
|
||||
int rangeLimit) override;
|
||||
|
||||
ThreadFuture<Version> verifyBlobRange(const KeyRangeRef& keyRange, Optional<Version> version) override;
|
||||
|
||||
ThreadFuture<DatabaseSharedState*> createSharedState() override;
|
||||
|
@ -809,6 +843,12 @@ public:
|
|||
ThreadFuture<Key> purgeBlobGranules(const KeyRangeRef& keyRange, Version purgeVersion, bool force) override;
|
||||
ThreadFuture<Void> waitPurgeGranulesComplete(const KeyRef& purgeKey) override;
|
||||
|
||||
ThreadFuture<bool> blobbifyRange(const KeyRangeRef& keyRange) override;
|
||||
ThreadFuture<bool> unblobbifyRange(const KeyRangeRef& keyRange) override;
|
||||
ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> listBlobbifiedRanges(const KeyRangeRef& keyRange,
|
||||
int rangeLimit) override;
|
||||
ThreadFuture<Version> verifyBlobRange(const KeyRangeRef& keyRange, Optional<Version> version) override;
|
||||
|
||||
void addref() override { ThreadSafeReferenceCounted<MultiVersionTenant>::addref(); }
|
||||
void delref() override { ThreadSafeReferenceCounted<MultiVersionTenant>::delref(); }
|
||||
|
||||
|
|
|
@ -237,6 +237,8 @@ public:
|
|||
DD_STORAGE_WIGGLE_MIN_SS_AGE_SEC; // Minimal age of a correct-configured server before it's chosen to be wiggled
|
||||
bool DD_TENANT_AWARENESS_ENABLED;
|
||||
int TENANT_CACHE_LIST_REFRESH_INTERVAL; // How often the TenantCache is refreshed
|
||||
int TENANT_CACHE_STORAGE_REFRESH_INTERVAL; // How often the storage bytes used by each tenant in the TenantCache is
|
||||
// refreshed
|
||||
|
||||
// TeamRemover to remove redundant teams
|
||||
bool TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER; // disable the machineTeamRemover actor
|
||||
|
@ -480,6 +482,7 @@ public:
|
|||
double RATEKEEPER_FAILURE_TIME;
|
||||
double CONSISTENCYSCAN_FAILURE_TIME;
|
||||
double BLOB_MANAGER_FAILURE_TIME;
|
||||
double BLOB_MIGRATOR_FAILURE_TIME;
|
||||
double REPLACE_INTERFACE_DELAY;
|
||||
double REPLACE_INTERFACE_CHECK_DELAY;
|
||||
double COORDINATOR_REGISTER_INTERVAL;
|
||||
|
@ -623,6 +626,12 @@ public:
|
|||
double GLOBAL_TAG_THROTTLING_FOLDING_TIME;
|
||||
// Cost multiplier for writes (because write operations are more expensive than reads)
|
||||
double GLOBAL_TAG_THROTTLING_RW_FUNGIBILITY_RATIO;
|
||||
// Maximum number of tags tracked by global tag throttler. Additional tags will be ignored
|
||||
// until some existing tags expire
|
||||
int64_t GLOBAL_TAG_THROTTLING_MAX_TAGS_TRACKED;
|
||||
// Global tag throttler forgets about throughput from a tag once no new transactions from that
|
||||
// tag have been received for this duration (in seconds):
|
||||
int64_t GLOBAL_TAG_THROTTLING_TAG_EXPIRE_AFTER;
|
||||
|
||||
double MAX_TRANSACTIONS_PER_BYTE;
|
||||
|
||||
|
@ -703,6 +712,7 @@ public:
|
|||
int FETCH_KEYS_PARALLELISM_FULL;
|
||||
int FETCH_KEYS_LOWER_PRIORITY;
|
||||
int SERVE_FETCH_CHECKPOINT_PARALLELISM;
|
||||
int SERVE_AUDIT_STORAGE_PARALLELISM;
|
||||
int CHANGE_FEED_DISK_READS_PARALLELISM;
|
||||
int BUGGIFY_BLOCK_BYTES;
|
||||
int64_t STORAGE_RECOVERY_VERSION_LAG_LIMIT;
|
||||
|
@ -977,6 +987,7 @@ public:
|
|||
bool REST_KMS_CONNECTOR_REFRESH_KMS_URLS;
|
||||
double REST_KMS_CONNECTOR_REFRESH_KMS_URLS_INTERVAL_SEC;
|
||||
std::string REST_KMS_CONNECTOR_GET_ENCRYPTION_KEYS_ENDPOINT;
|
||||
std::string REST_KMS_CONNECTOR_GET_BLOB_METADATA_ENDPOINT;
|
||||
|
||||
ServerKnobs(Randomize, ClientKnobs*, IsSimulated);
|
||||
void initialize(Randomize, ClientKnobs*, IsSimulated);
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#define FDBCLIENT_STORAGESERVERINTERFACE_H
|
||||
#pragma once
|
||||
|
||||
#include "fdbclient/Audit.h"
|
||||
#include "fdbclient/FDBTypes.h"
|
||||
#include "fdbclient/StorageCheckpoint.h"
|
||||
#include "fdbclient/StorageServerShard.h"
|
||||
|
@ -102,7 +103,7 @@ struct StorageServerInterface {
|
|||
PublicRequestStream<struct GetMappedKeyValuesRequest> getMappedKeyValues;
|
||||
|
||||
RequestStream<struct GetShardStateRequest> getShardState;
|
||||
RequestStream<struct WaitMetricsRequest> waitMetrics;
|
||||
PublicRequestStream<struct WaitMetricsRequest> waitMetrics;
|
||||
RequestStream<struct SplitMetricsRequest> splitMetrics;
|
||||
RequestStream<struct GetStorageMetricsRequest> getStorageMetrics;
|
||||
RequestStream<ReplyPromise<Void>> waitFailure;
|
||||
|
@ -120,8 +121,8 @@ struct StorageServerInterface {
|
|||
RequestStream<struct GetCheckpointRequest> checkpoint;
|
||||
RequestStream<struct FetchCheckpointRequest> fetchCheckpoint;
|
||||
RequestStream<struct FetchCheckpointKeyValuesRequest> fetchCheckpointKeyValues;
|
||||
|
||||
RequestStream<struct UpdateCommitCostRequest> updateCommitCostRequest;
|
||||
RequestStream<struct AuditStorageRequest> auditStorage;
|
||||
|
||||
private:
|
||||
bool acceptingRequests;
|
||||
|
@ -160,7 +161,8 @@ public:
|
|||
PublicRequestStream<struct GetKeyValuesRequest>(getValue.getEndpoint().getAdjustedEndpoint(2));
|
||||
getShardState =
|
||||
RequestStream<struct GetShardStateRequest>(getValue.getEndpoint().getAdjustedEndpoint(3));
|
||||
waitMetrics = RequestStream<struct WaitMetricsRequest>(getValue.getEndpoint().getAdjustedEndpoint(4));
|
||||
waitMetrics =
|
||||
PublicRequestStream<struct WaitMetricsRequest>(getValue.getEndpoint().getAdjustedEndpoint(4));
|
||||
splitMetrics = RequestStream<struct SplitMetricsRequest>(getValue.getEndpoint().getAdjustedEndpoint(5));
|
||||
getStorageMetrics =
|
||||
RequestStream<struct GetStorageMetricsRequest>(getValue.getEndpoint().getAdjustedEndpoint(6));
|
||||
|
@ -194,6 +196,8 @@ public:
|
|||
getValue.getEndpoint().getAdjustedEndpoint(21));
|
||||
updateCommitCostRequest =
|
||||
RequestStream<struct UpdateCommitCostRequest>(getValue.getEndpoint().getAdjustedEndpoint(22));
|
||||
auditStorage =
|
||||
RequestStream<struct AuditStorageRequest>(getValue.getEndpoint().getAdjustedEndpoint(23));
|
||||
}
|
||||
} else {
|
||||
ASSERT(Ar::isDeserializing);
|
||||
|
@ -245,6 +249,7 @@ public:
|
|||
streams.push_back(fetchCheckpoint.getReceiver());
|
||||
streams.push_back(fetchCheckpointKeyValues.getReceiver());
|
||||
streams.push_back(updateCommitCostRequest.getReceiver());
|
||||
streams.push_back(auditStorage.getReceiver());
|
||||
FlowTransport::transport().addEndpoints(streams);
|
||||
}
|
||||
};
|
||||
|
@ -709,18 +714,25 @@ struct WaitMetricsRequest {
|
|||
// Waits for any of the given minimum or maximum metrics to be exceeded, and then returns the current values
|
||||
// Send a reversed range for min, max to receive an immediate report
|
||||
constexpr static FileIdentifier file_identifier = 1795961;
|
||||
// Setting the tenantInfo makes the request tenant-aware.
|
||||
Optional<TenantInfo> tenantInfo;
|
||||
Arena arena;
|
||||
KeyRangeRef keys;
|
||||
StorageMetrics min, max;
|
||||
ReplyPromise<StorageMetrics> reply;
|
||||
|
||||
bool verify() const { return tenantInfo.present() && tenantInfo.get().isAuthorized(); }
|
||||
|
||||
WaitMetricsRequest() {}
|
||||
WaitMetricsRequest(KeyRangeRef const& keys, StorageMetrics const& min, StorageMetrics const& max)
|
||||
: keys(arena, keys), min(min), max(max) {}
|
||||
WaitMetricsRequest(TenantInfo tenantInfo,
|
||||
KeyRangeRef const& keys,
|
||||
StorageMetrics const& min,
|
||||
StorageMetrics const& max)
|
||||
: tenantInfo(tenantInfo), keys(arena, keys), min(min), max(max) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, keys, min, max, reply, arena);
|
||||
serializer(ar, keys, min, max, reply, tenantInfo, arena);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -94,6 +94,13 @@ void decodeKeyServersValue(RangeResult result,
|
|||
|
||||
extern const KeyRef clusterIdKey;
|
||||
|
||||
extern const KeyRangeRef auditRange;
|
||||
extern const KeyRef auditPrefix;
|
||||
const Key auditRangeKey(const AuditType type, const UID& auditId, const KeyRef& key);
|
||||
const Key auditRangePrefix(const AuditType type, const UID& auditId);
|
||||
const Value auditStorageStateValue(const AuditStorageState& auditStorageState);
|
||||
AuditStorageState decodeAuditStorageState(const ValueRef& value);
|
||||
|
||||
// "\xff/checkpoint/[[UID]] := [[CheckpointMetaData]]"
|
||||
extern const KeyRef checkpointPrefix;
|
||||
const Key checkpointKeyFor(UID checkpointID);
|
||||
|
@ -708,6 +715,9 @@ extern const KeyRangeRef storageQuotaKeys;
|
|||
extern const KeyRef storageQuotaPrefix;
|
||||
Key storageQuotaKey(StringRef tenantName);
|
||||
|
||||
extern const KeyRangeRef idempotencyIdKeys;
|
||||
extern const KeyRef idempotencyIdsExpiredVersion;
|
||||
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
#endif
|
||||
|
|
|
@ -597,8 +597,8 @@ Future<Void> enableAuto(Reference<DB> db, bool enabled) {
|
|||
|
||||
class TagQuotaValue {
|
||||
public:
|
||||
double reservedQuota{ 0.0 };
|
||||
double totalQuota{ 0.0 };
|
||||
int64_t reservedQuota{ 0 };
|
||||
int64_t totalQuota{ 0 };
|
||||
bool isValid() const;
|
||||
Value toValue() const;
|
||||
static TagQuotaValue fromValue(ValueRef);
|
||||
|
|
|
@ -181,6 +181,7 @@ struct TenantMetadataSpecification {
|
|||
KeyBackedObjectProperty<TenantTombstoneCleanupData, decltype(IncludeVersion())> tombstoneCleanupData;
|
||||
KeyBackedSet<Tuple> tenantGroupTenantIndex;
|
||||
KeyBackedObjectMap<TenantGroupName, TenantGroupEntry, decltype(IncludeVersion()), NullCodec> tenantGroupMap;
|
||||
KeyBackedBinaryValue<Versionstamp> lastTenantModification;
|
||||
|
||||
TenantMetadataSpecification(KeyRef prefix)
|
||||
: subspace(prefix.withSuffix("tenant/"_sr)), tenantMap(subspace.withSuffix("map/"_sr), IncludeVersion()),
|
||||
|
@ -188,7 +189,8 @@ struct TenantMetadataSpecification {
|
|||
tenantCount(subspace.withSuffix("count"_sr)), tenantTombstones(subspace.withSuffix("tombstones/"_sr)),
|
||||
tombstoneCleanupData(subspace.withSuffix("tombstoneCleanup"_sr), IncludeVersion()),
|
||||
tenantGroupTenantIndex(subspace.withSuffix("tenantGroup/tenantIndex/"_sr)),
|
||||
tenantGroupMap(subspace.withSuffix("tenantGroup/map/"_sr), IncludeVersion()) {}
|
||||
tenantGroupMap(subspace.withSuffix("tenantGroup/map/"_sr), IncludeVersion()),
|
||||
lastTenantModification(subspace.withSuffix("lastModification"_sr)) {}
|
||||
};
|
||||
|
||||
struct TenantMetadata {
|
||||
|
@ -203,6 +205,7 @@ struct TenantMetadata {
|
|||
static inline auto& tombstoneCleanupData() { return instance().tombstoneCleanupData; }
|
||||
static inline auto& tenantGroupTenantIndex() { return instance().tenantGroupTenantIndex; }
|
||||
static inline auto& tenantGroupMap() { return instance().tenantGroupMap; }
|
||||
static inline auto& lastTenantModification() { return instance().lastTenantModification; }
|
||||
|
||||
static Key tenantMapPrivatePrefix();
|
||||
};
|
||||
|
|
|
@ -44,8 +44,14 @@
|
|||
using TenantNameEntryPair = std::pair<TenantName, TenantMapEntry>;
|
||||
using TenantNameEntryPairVec = std::vector<TenantNameEntryPair>;
|
||||
|
||||
enum class TenantEntryCacheRefreshReason { INIT = 1, PERIODIC_TASK = 2, CACHE_MISS = 3, REMOVE_ENTRY = 4 };
|
||||
enum class TenantEntryCacheRefreshMode { PERIODIC_TASK = 1, NONE = 2 };
|
||||
enum class TenantEntryCacheRefreshReason {
|
||||
INIT = 1,
|
||||
PERIODIC_TASK = 2,
|
||||
CACHE_MISS = 3,
|
||||
REMOVE_ENTRY = 4,
|
||||
WATCH_TRIGGER = 5
|
||||
};
|
||||
enum class TenantEntryCacheRefreshMode { PERIODIC_TASK = 1, WATCH = 2, NONE = 3 };
|
||||
|
||||
template <class T>
|
||||
struct TenantEntryCachePayload {
|
||||
|
@ -62,12 +68,6 @@ using TenantEntryCachePayloadFunc = std::function<TenantEntryCachePayload<T>(con
|
|||
// 1. Lookup by 'TenantId'
|
||||
// 2. Lookup by 'TenantPrefix'
|
||||
// 3. Lookup by 'TenantName'
|
||||
//
|
||||
// TODO:
|
||||
// ----
|
||||
// The cache allows user to construct the 'cached object' by supplying a callback. The cache implements a periodic
|
||||
// refresh mechanism, polling underlying database for updates (add/remove tenants), in future we might want to implement
|
||||
// database range-watch to monitor such updates
|
||||
|
||||
template <class T>
|
||||
class TenantEntryCache : public ReferenceCounted<TenantEntryCache<T>>, NonCopyable {
|
||||
|
@ -78,6 +78,10 @@ private:
|
|||
TenantEntryCacheRefreshMode refreshMode;
|
||||
|
||||
Future<Void> refresher;
|
||||
Future<Void> watchRefresher;
|
||||
Future<Void> lastTenantIdRefresher;
|
||||
Promise<Void> setInitialWatch;
|
||||
Optional<int64_t> lastTenantId;
|
||||
Map<int64_t, TenantEntryCachePayload<T>> mapByTenantId;
|
||||
Map<TenantName, TenantEntryCachePayload<T>> mapByTenantName;
|
||||
|
||||
|
@ -87,6 +91,7 @@ private:
|
|||
Counter refreshByCacheInit;
|
||||
Counter refreshByCacheMiss;
|
||||
Counter numRefreshes;
|
||||
Counter refreshByWatchTrigger;
|
||||
|
||||
ACTOR static Future<TenantNameEntryPairVec> getTenantList(Reference<ReadYourWritesTransaction> tr) {
|
||||
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
||||
|
@ -102,16 +107,166 @@ private:
|
|||
return tenantList.results;
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> refreshCacheById(int64_t tenantId,
|
||||
TenantEntryCache<T>* cache,
|
||||
TenantEntryCacheRefreshReason reason) {
|
||||
TraceEvent(SevDebug, "TenantEntryCacheIDRefreshStart", cache->id()).detail("Reason", static_cast<int>(reason));
|
||||
state Reference<ReadYourWritesTransaction> tr = cache->getDatabase()->createTransaction();
|
||||
loop {
|
||||
try {
|
||||
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
||||
tr->setOption(FDBTransactionOptions::READ_LOCK_AWARE);
|
||||
state Optional<TenantName> name = wait(TenantMetadata::tenantIdIndex().get(tr, tenantId));
|
||||
if (name.present()) {
|
||||
Optional<TenantMapEntry> entry = wait(TenantMetadata::tenantMap().get(tr, name.get()));
|
||||
if (entry.present()) {
|
||||
cache->put(std::make_pair(name.get(), entry.get()));
|
||||
updateCacheRefreshMetrics(cache, reason);
|
||||
}
|
||||
}
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
}
|
||||
TraceEvent(SevDebug, "TenantEntryCacheIDRefreshEnd", cache->id()).detail("Reason", static_cast<int>(reason));
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> refreshCacheByName(TenantName name,
|
||||
TenantEntryCache<T>* cache,
|
||||
TenantEntryCacheRefreshReason reason) {
|
||||
TraceEvent(SevDebug, "TenantEntryCacheNameRefreshStart", cache->id())
|
||||
.detail("Reason", static_cast<int>(reason));
|
||||
state Reference<ReadYourWritesTransaction> tr = cache->getDatabase()->createTransaction();
|
||||
loop {
|
||||
try {
|
||||
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
||||
tr->setOption(FDBTransactionOptions::READ_LOCK_AWARE);
|
||||
Optional<TenantMapEntry> entry = wait(TenantMetadata::tenantMap().get(tr, name));
|
||||
if (entry.present()) {
|
||||
cache->put(std::make_pair(name, entry.get()));
|
||||
updateCacheRefreshMetrics(cache, reason);
|
||||
}
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
}
|
||||
TraceEvent(SevDebug, "TenantEntryCacheNameRefreshEnd", cache->id()).detail("Reason", static_cast<int>(reason));
|
||||
return Void();
|
||||
}
|
||||
|
||||
static void updateCacheRefreshMetrics(TenantEntryCache<T>* cache, TenantEntryCacheRefreshReason reason) {
|
||||
if (reason == TenantEntryCacheRefreshReason::INIT) {
|
||||
cache->refreshByCacheInit += 1;
|
||||
} else if (reason == TenantEntryCacheRefreshReason::CACHE_MISS) {
|
||||
cache->refreshByCacheMiss += 1;
|
||||
} else if (reason == TenantEntryCacheRefreshReason::WATCH_TRIGGER) {
|
||||
cache->refreshByWatchTrigger += 1;
|
||||
}
|
||||
|
||||
cache->numRefreshes += 1;
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> refreshCacheUsingWatch(TenantEntryCache<T>* cache, TenantEntryCacheRefreshReason reason) {
|
||||
TraceEvent(SevDebug, "TenantEntryCacheRefreshUsingWatchStart", cache->id())
|
||||
.detail("Reason", static_cast<int>(reason));
|
||||
|
||||
state Reference<ReadYourWritesTransaction> tr = cache->getDatabase()->createTransaction();
|
||||
loop {
|
||||
try {
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
state Future<Void> tenantModifiedWatch = TenantMetadata::lastTenantModification().watch(tr);
|
||||
wait(tr->commit());
|
||||
TraceEvent(SevDebug, "TenantEntryCacheRefreshWatchSet", cache->id());
|
||||
// setInitialWatch is set to indicate that an inital watch has been set for the lastTenantModification
|
||||
// key. Currently this is only used in simulation to avoid a race condition where a tenant is created
|
||||
// before the inital watch is set. However, it can be enabled by passing waitForInitalWatch = true to
|
||||
// the init() method.
|
||||
if (cache->setInitialWatch.canBeSet()) {
|
||||
cache->setInitialWatch.send(Void());
|
||||
}
|
||||
wait(tenantModifiedWatch);
|
||||
// If watch triggered then refresh the cache as tenant metadata was updated
|
||||
TraceEvent(SevDebug, "TenantEntryCacheRefreshUsingWatchTriggered", cache->id())
|
||||
.detail("Reason", static_cast<int>(reason));
|
||||
wait(refreshImpl(cache, reason));
|
||||
tr->reset();
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_actor_cancelled) {
|
||||
TraceEvent("TenantEntryCacheRefreshUsingWatchError", cache->id())
|
||||
.errorUnsuppressed(e)
|
||||
.suppressFor(1.0);
|
||||
}
|
||||
wait(tr->onError(e));
|
||||
// In case the watch threw an error then refresh the cache just in case it was updated
|
||||
wait(refreshImpl(cache, reason));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool tenantsEnabled(TenantEntryCache<T>* cache) {
|
||||
// Avoid using the cache if the tenant mode is disabled. However since we use clientInfo, sometimes it may not
|
||||
// be fully up to date (i.e it may indicate the tenantMode is disabled when in fact it is required). Thus if
|
||||
// there is at least one tenant that has been created on the cluster then use the cache to avoid an incorrect
|
||||
// miss.
|
||||
if (cache->getDatabase()->clientInfo->get().tenantMode == TenantMode::DISABLED) {
|
||||
if (!cache->lastTenantId.present()) {
|
||||
return false;
|
||||
}
|
||||
return cache->lastTenantId.get() > 0;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> setLastTenantId(TenantEntryCache<T>* cache) {
|
||||
state Reference<ReadYourWritesTransaction> tr = cache->getDatabase()->createTransaction();
|
||||
loop {
|
||||
try {
|
||||
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
||||
tr->setOption(FDBTransactionOptions::READ_LOCK_AWARE);
|
||||
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
Optional<int64_t> lastTenantId = wait(TenantMetadata::lastTenantId().get(tr));
|
||||
cache->lastTenantId = lastTenantId;
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> lastTenantIdWatch(TenantEntryCache<T>* cache) {
|
||||
TraceEvent(SevDebug, "TenantEntryCacheLastTenantIdWatchStart", cache->id());
|
||||
// monitor for any changes on the last tenant id and update it as necessary
|
||||
state Reference<ReadYourWritesTransaction> tr = cache->getDatabase()->createTransaction();
|
||||
loop {
|
||||
try {
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
state Future<Void> lastTenantIdWatch = tr->watch(TenantMetadata::lastTenantId().key);
|
||||
wait(tr->commit());
|
||||
wait(lastTenantIdWatch);
|
||||
wait(setLastTenantId(cache));
|
||||
tr->reset();
|
||||
} catch (Error& e) {
|
||||
state Error err(e);
|
||||
if (err.code() != error_code_actor_cancelled) {
|
||||
TraceEvent("TenantEntryCacheLastTenantIdWatchError", cache->id())
|
||||
.errorUnsuppressed(err)
|
||||
.suppressFor(1.0);
|
||||
// In case watch errors out refresh the lastTenantId in case it has changed or we would have missed
|
||||
// an update
|
||||
wait(setLastTenantId(cache));
|
||||
}
|
||||
wait(tr->onError(err));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> refreshImpl(TenantEntryCache<T>* cache, TenantEntryCacheRefreshReason reason) {
|
||||
TraceEvent(SevDebug, "TenantEntryCacheRefreshStart", cache->id()).detail("Reason", static_cast<int>(reason));
|
||||
|
||||
|
@ -130,9 +285,7 @@ private:
|
|||
break;
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_actor_cancelled) {
|
||||
TraceEvent(SevInfo, "TenantEntryCacheRefreshError", cache->id())
|
||||
.errorUnsuppressed(e)
|
||||
.suppressFor(1.0);
|
||||
TraceEvent("TenantEntryCacheRefreshError", cache->id()).errorUnsuppressed(e).suppressFor(1.0);
|
||||
}
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
|
@ -151,12 +304,22 @@ private:
|
|||
return ret;
|
||||
}
|
||||
|
||||
TraceEvent(SevInfo, "TenantEntryCacheGetByIdRefresh").detail("TenantId", tenantId);
|
||||
if (!tenantsEnabled(cache)) {
|
||||
// If tenants are disabled on the cluster avoid using the cache
|
||||
return Optional<TenantEntryCachePayload<T>>();
|
||||
}
|
||||
|
||||
// Entry not found. Refresh cacheEntries by scanning underlying KeyRange.
|
||||
// TODO: Cache will implement a "KeyRange" watch, monitoring notification when a new entry gets added or any
|
||||
// existing entry gets updated within the KeyRange of interest. Hence, misses would be very rare
|
||||
wait(refreshImpl(cache, TenantEntryCacheRefreshReason::CACHE_MISS));
|
||||
TraceEvent("TenantEntryCacheGetByIdRefresh").detail("TenantId", tenantId);
|
||||
|
||||
if (cache->refreshMode == TenantEntryCacheRefreshMode::WATCH) {
|
||||
// Entry not found. Do a point refresh
|
||||
// TODO: Don't initiate refresh if tenantId < maxTenantId (stored as a system key currently) as we know that
|
||||
// such a tenant does not exist (it has either never existed or has been deleted)
|
||||
wait(refreshCacheById(tenantId, cache, TenantEntryCacheRefreshReason::CACHE_MISS));
|
||||
} else {
|
||||
// Entry not found. Refresh cacheEntries by scanning underlying KeyRange.
|
||||
wait(refreshImpl(cache, TenantEntryCacheRefreshReason::CACHE_MISS));
|
||||
}
|
||||
|
||||
cache->misses += 1;
|
||||
return cache->lookupById(tenantId);
|
||||
|
@ -170,12 +333,20 @@ private:
|
|||
return ret;
|
||||
}
|
||||
|
||||
if (!tenantsEnabled(cache)) {
|
||||
// If tenants are disabled on the cluster avoid using the cache
|
||||
return Optional<TenantEntryCachePayload<T>>();
|
||||
}
|
||||
|
||||
TraceEvent("TenantEntryCacheGetByNameRefresh").detail("TenantName", name);
|
||||
|
||||
// Entry not found. Refresh cacheEntries by scanning underlying KeyRange.
|
||||
// TODO: Cache will implement a "KeyRange" watch, monitoring notification when a new entry gets added or any
|
||||
// existing entry gets updated within the KeyRange of interest. Hence, misses would be very rare
|
||||
wait(refreshImpl(cache, TenantEntryCacheRefreshReason::CACHE_MISS));
|
||||
if (cache->refreshMode == TenantEntryCacheRefreshMode::WATCH) {
|
||||
// Entry not found. Do a point refresh
|
||||
wait(refreshCacheByName(name, cache, TenantEntryCacheRefreshReason::CACHE_MISS));
|
||||
} else {
|
||||
// Entry not found. Refresh cacheEntries by scanning underlying KeyRange.
|
||||
wait(refreshImpl(cache, TenantEntryCacheRefreshReason::CACHE_MISS));
|
||||
}
|
||||
|
||||
cache->misses += 1;
|
||||
return cache->lookupByName(name);
|
||||
|
@ -272,7 +443,18 @@ public:
|
|||
hits("TenantEntryCacheHits", metrics), misses("TenantEntryCacheMisses", metrics),
|
||||
refreshByCacheInit("TenantEntryCacheRefreshInit", metrics),
|
||||
refreshByCacheMiss("TenantEntryCacheRefreshMiss", metrics),
|
||||
numRefreshes("TenantEntryCacheNumRefreshes", metrics) {
|
||||
numRefreshes("TenantEntryCacheNumRefreshes", metrics),
|
||||
refreshByWatchTrigger("TenantEntryCacheRefreshWatchTrigger", metrics) {
|
||||
TraceEvent("TenantEntryCacheCreatedDefaultFunc", uid);
|
||||
}
|
||||
|
||||
TenantEntryCache(Database db, TenantEntryCacheRefreshMode mode)
|
||||
: uid(deterministicRandom()->randomUniqueID()), db(db), createPayloadFunc(defaultCreatePayload),
|
||||
refreshMode(mode), metrics("TenantEntryCacheMetrics", uid.toString()), hits("TenantEntryCacheHits", metrics),
|
||||
misses("TenantEntryCacheMisses", metrics), refreshByCacheInit("TenantEntryCacheRefreshInit", metrics),
|
||||
refreshByCacheMiss("TenantEntryCacheRefreshMiss", metrics),
|
||||
numRefreshes("TenantEntryCacheNumRefreshes", metrics),
|
||||
refreshByWatchTrigger("TenantEntryCacheRefreshWatchTrigger", metrics) {
|
||||
TraceEvent("TenantEntryCacheCreatedDefaultFunc", uid);
|
||||
}
|
||||
|
||||
|
@ -282,7 +464,8 @@ public:
|
|||
hits("TenantEntryCacheHits", metrics), misses("TenantEntryCacheMisses", metrics),
|
||||
refreshByCacheInit("TenantEntryCacheRefreshInit", metrics),
|
||||
refreshByCacheMiss("TenantEntryCacheRefreshMiss", metrics),
|
||||
numRefreshes("TenantEntryCacheNumRefreshes", metrics) {
|
||||
numRefreshes("TenantEntryCacheNumRefreshes", metrics),
|
||||
refreshByWatchTrigger("TenantEntryCacheRefreshWatchTrigger", metrics) {
|
||||
TraceEvent("TenantEntryCacheCreated", uid);
|
||||
}
|
||||
|
||||
|
@ -291,7 +474,8 @@ public:
|
|||
metrics("TenantEntryCacheMetrics", uid.toString()), hits("TenantEntryCacheHits", metrics),
|
||||
misses("TenantEntryCacheMisses", metrics), refreshByCacheInit("TenantEntryCacheRefreshInit", metrics),
|
||||
refreshByCacheMiss("TenantEntryCacheRefreshMiss", metrics),
|
||||
numRefreshes("TenantEntryCacheNumRefreshes", metrics) {
|
||||
numRefreshes("TenantEntryCacheNumRefreshes", metrics),
|
||||
refreshByWatchTrigger("TenantEntryCacheRefreshWatchTrigger", metrics) {
|
||||
TraceEvent("TenantEntryCacheCreated", uid);
|
||||
}
|
||||
|
||||
|
@ -300,26 +484,36 @@ public:
|
|||
hits("TenantEntryCacheHits", metrics), misses("TenantEntryCacheMisses", metrics),
|
||||
refreshByCacheInit("TenantEntryCacheRefreshInit", metrics),
|
||||
refreshByCacheMiss("TenantEntryCacheRefreshMiss", metrics),
|
||||
numRefreshes("TenantEntryCacheNumRefreshes", metrics) {
|
||||
numRefreshes("TenantEntryCacheNumRefreshes", metrics),
|
||||
refreshByWatchTrigger("TenantEntryCacheRefreshWatchTrigger", metrics) {
|
||||
TraceEvent("TenantEntryCacheCreated", uid);
|
||||
}
|
||||
|
||||
Future<Void> init() {
|
||||
Future<Void> init(bool waitForInitalWatch = false) {
|
||||
TraceEvent("TenantEntryCacheInit", uid);
|
||||
|
||||
Future<Void> f = refreshImpl(this, TenantEntryCacheRefreshReason::INIT);
|
||||
|
||||
// Launch reaper task to periodically refresh cache by scanning database KeyRange
|
||||
TenantEntryCacheRefreshReason reason = TenantEntryCacheRefreshReason::PERIODIC_TASK;
|
||||
Future<Void> initalWatchFuture = Void();
|
||||
lastTenantIdRefresher = lastTenantIdWatch(this);
|
||||
if (refreshMode == TenantEntryCacheRefreshMode::PERIODIC_TASK) {
|
||||
refresher = recurringAsync([&, reason]() { return refresh(reason); },
|
||||
CLIENT_KNOBS->TENANT_ENTRY_CACHE_LIST_REFRESH_INTERVAL, /* interval */
|
||||
true, /* absoluteIntervalDelay */
|
||||
CLIENT_KNOBS->TENANT_ENTRY_CACHE_LIST_REFRESH_INTERVAL, /* intialDelay */
|
||||
TaskPriority::Worker);
|
||||
} else if (refreshMode == TenantEntryCacheRefreshMode::WATCH) {
|
||||
if (waitForInitalWatch) {
|
||||
initalWatchFuture = setInitialWatch.getFuture();
|
||||
}
|
||||
watchRefresher = refreshCacheUsingWatch(this, TenantEntryCacheRefreshReason::WATCH_TRIGGER);
|
||||
}
|
||||
|
||||
return f;
|
||||
Future<Void> setLastTenant = setLastTenantId(this);
|
||||
|
||||
return f && initalWatchFuture && setLastTenant;
|
||||
}
|
||||
|
||||
Database getDatabase() const { return db; }
|
||||
|
@ -384,6 +578,7 @@ public:
|
|||
Counter::Value numCacheRefreshes() const { return numRefreshes.getValue(); }
|
||||
Counter::Value numRefreshByMisses() const { return refreshByCacheMiss.getValue(); }
|
||||
Counter::Value numRefreshByInit() const { return refreshByCacheInit.getValue(); }
|
||||
Counter::Value numWatchRefreshes() const { return refreshByWatchTrigger.getValue(); }
|
||||
};
|
||||
|
||||
#include "flow/unactorcompiler.h"
|
||||
|
|
|
@ -178,6 +178,7 @@ Future<std::pair<Optional<TenantMapEntry>, bool>> createTenantTransaction(
|
|||
|
||||
TenantMetadata::tenantMap().set(tr, name, tenantEntry);
|
||||
TenantMetadata::tenantIdIndex().set(tr, tenantEntry.id, name);
|
||||
TenantMetadata::lastTenantModification().setVersionstamp(tr, Versionstamp(), 0);
|
||||
|
||||
if (tenantEntry.tenantGroup.present()) {
|
||||
TenantMetadata::tenantGroupTenantIndex().insert(tr, Tuple::makeTuple(tenantEntry.tenantGroup.get(), name));
|
||||
|
@ -346,6 +347,7 @@ Future<Void> deleteTenantTransaction(Transaction tr,
|
|||
TenantMetadata::tenantMap().erase(tr, name);
|
||||
TenantMetadata::tenantIdIndex().erase(tr, tenantEntry.get().id);
|
||||
TenantMetadata::tenantCount().atomicOp(tr, -1, MutationRef::AddValue);
|
||||
TenantMetadata::lastTenantModification().setVersionstamp(tr, Versionstamp(), 0);
|
||||
|
||||
if (tenantEntry.get().tenantGroup.present()) {
|
||||
TenantMetadata::tenantGroupTenantIndex().erase(tr,
|
||||
|
@ -420,6 +422,7 @@ Future<Void> configureTenantTransaction(Transaction tr,
|
|||
|
||||
tr->setOption(FDBTransactionOptions::RAW_ACCESS);
|
||||
TenantMetadata::tenantMap().set(tr, tenantName, updatedTenantEntry);
|
||||
TenantMetadata::lastTenantModification().setVersionstamp(tr, Versionstamp(), 0);
|
||||
|
||||
// If the tenant group was changed, we need to update the tenant group metadata structures
|
||||
if (originalEntry.tenantGroup != updatedTenantEntry.tenantGroup) {
|
||||
|
@ -523,6 +526,7 @@ Future<Void> renameTenantTransaction(Transaction tr,
|
|||
TenantMetadata::tenantMap().erase(tr, oldName);
|
||||
TenantMetadata::tenantMap().set(tr, newName, oldEntry.get());
|
||||
TenantMetadata::tenantIdIndex().set(tr, oldEntry.get().id, newName);
|
||||
TenantMetadata::lastTenantModification().setVersionstamp(tr, Versionstamp(), 0);
|
||||
|
||||
// Update the tenant group index to reflect the new tenant name
|
||||
if (oldEntry.get().tenantGroup.present()) {
|
||||
|
|
|
@ -96,6 +96,13 @@ public:
|
|||
ThreadFuture<Key> purgeBlobGranules(const KeyRangeRef& keyRange, Version purgeVersion, bool force) override;
|
||||
ThreadFuture<Void> waitPurgeGranulesComplete(const KeyRef& purgeKey) override;
|
||||
|
||||
ThreadFuture<bool> blobbifyRange(const KeyRangeRef& keyRange) override;
|
||||
ThreadFuture<bool> unblobbifyRange(const KeyRangeRef& keyRange) override;
|
||||
ThreadFuture<Standalone<VectorRef<KeyRangeRef>>> listBlobbifiedRanges(const KeyRangeRef& keyRange,
|
||||
int rangeLimit) override;
|
||||
|
||||
ThreadFuture<Version> verifyBlobRange(const KeyRangeRef& keyRange, Optional<Version> version) override;
|
||||
|
||||
void addref() override { ThreadSafeReferenceCounted<ThreadSafeTenant>::addref(); }
|
||||
void delref() override { ThreadSafeReferenceCounted<ThreadSafeTenant>::delref(); }
|
||||
|
||||
|
|
|
@ -48,6 +48,7 @@ struct Tuple {
|
|||
// Note that strings can't be incomplete because they are parsed such that the end of the packed
|
||||
// byte string is considered the end of the string in lieu of a specific end.
|
||||
static Tuple unpack(StringRef const& str, bool exclude_incomplete = false);
|
||||
static std::string tupleToString(Tuple const& tuple);
|
||||
static Tuple unpackUserType(StringRef const& str, bool exclude_incomplete = false);
|
||||
|
||||
Tuple& append(Tuple const& tuple);
|
||||
|
|
|
@ -201,6 +201,9 @@ description is not currently required but encouraged.
|
|||
<Option name="transaction_include_port_in_address" code="505"
|
||||
description="Deprecated. Addresses returned by get_addresses_for_key include the port when enabled. As of api version 630, this option is enabled by default and setting this has no effect."
|
||||
defaultFor="23"/>
|
||||
<Option name="transaction_automatic_idempotency" code="506"
|
||||
description="Set a random idempotency id for all transactions. See the transaction option description for more information."
|
||||
defaultFor="505"/>
|
||||
<Option name="transaction_bypass_unreadable" code="700"
|
||||
description="Allows ``get`` operations to read from sections of keyspace that have become unreadable because of versionstamp operations. This sets the ``bypass_unreadable`` option of each transaction created by this database. See the transaction option description for more information."
|
||||
defaultFor="1100"/>
|
||||
|
@ -273,6 +276,11 @@ description is not currently required but encouraged.
|
|||
<Option name="size_limit" code="503"
|
||||
paramType="Int" paramDescription="value in bytes"
|
||||
description="Set the transaction size limit in bytes. The size is calculated by combining the sizes of all keys and values written or mutated, all key ranges cleared, and all read and write conflict ranges. (In other words, it includes the total size of all data included in the request to the cluster to commit the transaction.) Large transactions can cause performance problems on FoundationDB clusters, so setting this limit to a smaller value than the default can help prevent the client from accidentally degrading the cluster's performance. This value must be at least 32 and cannot be set to higher than 10,000,000, the default transaction size limit." />
|
||||
<Option name="idempotency_id" code="504"
|
||||
paramType="String" paramDescription="Unique ID"
|
||||
description="Associate this transaction with this ID for the purpose of checking whether or not this transaction has already committed. Must be at least 16 bytes and less than 256 bytes." />
|
||||
<Option name="automatic_idempotency" code="505"
|
||||
description="Automatically assign a random 16 byte idempotency id for this transaction. Prevents commits from failing with ``commit_unknown_result``. WARNING: If you are also using the multiversion client or transaction timeouts, if either cluster_version_changed or transaction_timed_out was thrown during a commit, then that commit may have already succeeded or may succeed in the future." />
|
||||
<Option name="snapshot_ryw_enable" code="600"
|
||||
description="Snapshot read operations will see the results of writes done in the same transaction. This is the default behavior." />
|
||||
<Option name="snapshot_ryw_disable" code="601"
|
||||
|
|
|
@ -283,6 +283,15 @@ ProcessClass::Fitness ProcessClass::machineClassFitness(ClusterRole role) const
|
|||
default:
|
||||
return ProcessClass::NeverAssign;
|
||||
}
|
||||
case ProcessClass::BlobMigrator:
|
||||
switch (_class) {
|
||||
case ProcessClass::StatelessClass:
|
||||
return ProcessClass::GoodFit;
|
||||
case ProcessClass::MasterClass:
|
||||
return ProcessClass::OkayFit;
|
||||
default:
|
||||
return ProcessClass::NeverAssign;
|
||||
}
|
||||
case ProcessClass::StorageCache:
|
||||
switch (_class) {
|
||||
case ProcessClass::StorageCacheClass:
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "flow/network.h"
|
||||
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include <boost/unordered_set.hpp>
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include <list>
|
||||
|
@ -123,20 +124,70 @@ TEST_CASE("/fdbrpc/authz/LRUCache") {
|
|||
return Void();
|
||||
}
|
||||
|
||||
struct TokenCacheImpl {
|
||||
struct CacheEntry {
|
||||
Arena arena;
|
||||
VectorRef<TenantNameRef> tenants;
|
||||
double expirationTime = 0.0;
|
||||
};
|
||||
struct CacheEntry {
|
||||
Arena arena;
|
||||
VectorRef<TenantNameRef> tenants;
|
||||
Optional<StringRef> tokenId;
|
||||
double expirationTime = 0.0;
|
||||
};
|
||||
|
||||
struct AuditEntry {
|
||||
NetworkAddress address;
|
||||
Optional<Standalone<StringRef>> tokenId;
|
||||
explicit AuditEntry(NetworkAddress const& address, CacheEntry const& cacheEntry)
|
||||
: address(address),
|
||||
tokenId(cacheEntry.tokenId.present() ? Standalone<StringRef>(cacheEntry.tokenId.get(), cacheEntry.arena)
|
||||
: Optional<Standalone<StringRef>>()) {}
|
||||
};
|
||||
|
||||
bool operator==(AuditEntry const& lhs, AuditEntry const& rhs) {
|
||||
return (lhs.address == rhs.address) && (lhs.tokenId.present() == rhs.tokenId.present()) &&
|
||||
(!lhs.tokenId.present() || lhs.tokenId.get() == rhs.tokenId.get());
|
||||
}
|
||||
|
||||
std::size_t hash_value(AuditEntry const& value) {
|
||||
std::size_t seed = 0;
|
||||
boost::hash_combine(seed, value.address);
|
||||
if (value.tokenId.present()) {
|
||||
boost::hash_combine(seed, value.tokenId.get());
|
||||
}
|
||||
return seed;
|
||||
}
|
||||
|
||||
struct TokenCacheImpl {
|
||||
LRUCache<StringRef, CacheEntry> cache;
|
||||
TokenCacheImpl() : cache(FLOW_KNOBS->TOKEN_CACHE_SIZE) {}
|
||||
boost::unordered_set<AuditEntry> usedTokens;
|
||||
Future<Void> auditor;
|
||||
TokenCacheImpl();
|
||||
|
||||
bool validate(TenantNameRef tenant, StringRef token);
|
||||
bool validateAndAdd(double currentTime, StringRef token, NetworkAddress const& peer);
|
||||
};
|
||||
|
||||
ACTOR Future<Void> tokenCacheAudit(TokenCacheImpl* self) {
|
||||
state boost::unordered_set<AuditEntry> audits;
|
||||
state boost::unordered_set<AuditEntry>::iterator iter;
|
||||
state double lastLoggedTime = 0;
|
||||
loop {
|
||||
auto const timeSinceLog = g_network->timer() - lastLoggedTime;
|
||||
if (timeSinceLog < FLOW_KNOBS->AUDIT_TIME_WINDOW) {
|
||||
wait(delay(FLOW_KNOBS->AUDIT_TIME_WINDOW - timeSinceLog));
|
||||
}
|
||||
lastLoggedTime = g_network->timer();
|
||||
audits.swap(self->usedTokens);
|
||||
for (iter = audits.begin(); iter != audits.end(); ++iter) {
|
||||
CODE_PROBE(true, "Audit Logging Running");
|
||||
TraceEvent("AuditTokenUsed").detail("Client", iter->address).detail("TokenId", iter->tokenId).log();
|
||||
wait(yield());
|
||||
}
|
||||
audits.clear();
|
||||
}
|
||||
}
|
||||
|
||||
TokenCacheImpl::TokenCacheImpl() : cache(FLOW_KNOBS->TOKEN_CACHE_SIZE) {
|
||||
auditor = tokenCacheAudit(this);
|
||||
}
|
||||
|
||||
TokenCache::TokenCache() : impl(new TokenCacheImpl()) {}
|
||||
TokenCache::~TokenCache() {
|
||||
delete impl;
|
||||
|
@ -212,6 +263,9 @@ bool TokenCacheImpl::validateAndAdd(double currentTime, StringRef token, Network
|
|||
for (auto tenant : t.tenants.get()) {
|
||||
c.tenants.push_back_deep(c.arena, tenant);
|
||||
}
|
||||
if (t.tokenId.present()) {
|
||||
c.tokenId = StringRef(c.arena, t.tokenId.get());
|
||||
}
|
||||
cache.insert(StringRef(c.arena, token), c);
|
||||
return true;
|
||||
}
|
||||
|
@ -250,6 +304,8 @@ bool TokenCacheImpl::validate(TenantNameRef name, StringRef token) {
|
|||
TraceEvent(SevWarn, "TenantTokenMismatch").detail("From", peer).detail("Tenant", name.toString());
|
||||
return false;
|
||||
}
|
||||
// audit logging
|
||||
usedTokens.insert(AuditEntry(peer, *cachedEntry.get()));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -43,16 +43,44 @@ struct ProcessClass {
|
|||
DataDistributorClass,
|
||||
CoordinatorClass,
|
||||
RatekeeperClass,
|
||||
ConsistencyScanClass,
|
||||
StorageCacheClass,
|
||||
BackupClass,
|
||||
GrvProxyClass,
|
||||
BlobManagerClass,
|
||||
BlobWorkerClass,
|
||||
EncryptKeyProxyClass,
|
||||
ConsistencyScanClass,
|
||||
BlobMigratorClass,
|
||||
InvalidClass = -1
|
||||
};
|
||||
|
||||
// class is serialized by enum value, so it's important not to change the
|
||||
// enum value of a class. New classes should only be added to the end.
|
||||
static_assert(ProcessClass::UnsetClass == 0);
|
||||
static_assert(ProcessClass::StorageClass == 1);
|
||||
static_assert(ProcessClass::TransactionClass == 2);
|
||||
static_assert(ProcessClass::ResolutionClass == 3);
|
||||
static_assert(ProcessClass::TesterClass == 4);
|
||||
static_assert(ProcessClass::CommitProxyClass == 5);
|
||||
static_assert(ProcessClass::MasterClass == 6);
|
||||
static_assert(ProcessClass::StatelessClass == 7);
|
||||
static_assert(ProcessClass::LogClass == 8);
|
||||
static_assert(ProcessClass::ClusterControllerClass == 9);
|
||||
static_assert(ProcessClass::LogRouterClass == 10);
|
||||
static_assert(ProcessClass::FastRestoreClass == 11);
|
||||
static_assert(ProcessClass::DataDistributorClass == 12);
|
||||
static_assert(ProcessClass::CoordinatorClass == 13);
|
||||
static_assert(ProcessClass::RatekeeperClass == 14);
|
||||
static_assert(ProcessClass::StorageCacheClass == 15);
|
||||
static_assert(ProcessClass::BackupClass == 16);
|
||||
static_assert(ProcessClass::GrvProxyClass == 17);
|
||||
static_assert(ProcessClass::BlobManagerClass == 18);
|
||||
static_assert(ProcessClass::BlobWorkerClass == 19);
|
||||
static_assert(ProcessClass::EncryptKeyProxyClass == 20);
|
||||
static_assert(ProcessClass::ConsistencyScanClass == 21);
|
||||
static_assert(ProcessClass::BlobMigratorClass == 22);
|
||||
static_assert(ProcessClass::InvalidClass == -1);
|
||||
|
||||
enum Fitness {
|
||||
BestFit,
|
||||
GoodFit,
|
||||
|
@ -76,6 +104,7 @@ struct ProcessClass {
|
|||
ConsistencyScan,
|
||||
BlobManager,
|
||||
BlobWorker,
|
||||
BlobMigrator,
|
||||
StorageCache,
|
||||
Backup,
|
||||
EncryptKeyProxy,
|
||||
|
@ -86,6 +115,12 @@ struct ProcessClass {
|
|||
int16_t _class;
|
||||
int16_t _source;
|
||||
|
||||
// source is serialized by enum value, so it's important not to change the
|
||||
// enum value of a source. New sources should only be added to the end.
|
||||
static_assert(ProcessClass::CommandLineSource == 0);
|
||||
static_assert(ProcessClass::AutoSource == 1);
|
||||
static_assert(ProcessClass::DBSource == 2);
|
||||
|
||||
public:
|
||||
ProcessClass() : _class(UnsetClass), _source(CommandLineSource) {}
|
||||
ProcessClass(ClassType type, ClassSource source) : _class(type), _source(source) {}
|
||||
|
|
|
@ -42,6 +42,8 @@ struct TenantInfo {
|
|||
// Is set during deserialization. It will be set to true if the tenant
|
||||
// name is set and the client is authorized to use this tenant.
|
||||
bool tenantAuthorized = false;
|
||||
// Number of storage bytes currently used by this tenant.
|
||||
int64_t storageUsage = 0;
|
||||
|
||||
// Helper function for most endpoints that read/write data. This returns true iff
|
||||
// the client is either a) a trusted peer or b) is accessing keyspace belonging to a tenant,
|
||||
|
|
|
@ -59,10 +59,9 @@ public:
|
|||
const UID& dbgid_,
|
||||
Arena& arena_,
|
||||
const VectorRef<MutationRef>& mutations_,
|
||||
IKeyValueStore* txnStateStore_,
|
||||
Reference<AsyncVar<ServerDBInfo> const> db)
|
||||
IKeyValueStore* txnStateStore_)
|
||||
: spanContext(spanContext_), dbgid(dbgid_), arena(arena_), mutations(mutations_), txnStateStore(txnStateStore_),
|
||||
confChange(dummyConfChange), dbInfo(db) {}
|
||||
confChange(dummyConfChange) {}
|
||||
|
||||
ApplyMetadataMutationsImpl(const SpanContext& spanContext_,
|
||||
Arena& arena_,
|
||||
|
@ -84,17 +83,16 @@ public:
|
|||
commit(proxyCommitData_.commit), cx(proxyCommitData_.cx), committedVersion(&proxyCommitData_.committedVersion),
|
||||
storageCache(&proxyCommitData_.storageCache), tag_popped(&proxyCommitData_.tag_popped),
|
||||
tssMapping(&proxyCommitData_.tssMapping), tenantMap(&proxyCommitData_.tenantMap),
|
||||
tenantIdIndex(&proxyCommitData_.tenantIdIndex), initialCommit(initialCommit_), dbInfo(proxyCommitData_.db) {}
|
||||
tenantIdIndex(&proxyCommitData_.tenantIdIndex), initialCommit(initialCommit_) {}
|
||||
|
||||
ApplyMetadataMutationsImpl(const SpanContext& spanContext_,
|
||||
ResolverData& resolverData_,
|
||||
const VectorRef<MutationRef>& mutations_,
|
||||
Reference<AsyncVar<ServerDBInfo> const> db)
|
||||
const VectorRef<MutationRef>& mutations_)
|
||||
: spanContext(spanContext_), dbgid(resolverData_.dbgid), arena(resolverData_.arena), mutations(mutations_),
|
||||
txnStateStore(resolverData_.txnStateStore), toCommit(resolverData_.toCommit),
|
||||
confChange(resolverData_.confChanges), logSystem(resolverData_.logSystem), popVersion(resolverData_.popVersion),
|
||||
keyInfo(resolverData_.keyInfo), storageCache(resolverData_.storageCache),
|
||||
initialCommit(resolverData_.initialCommit), forResolver(true), dbInfo(db) {}
|
||||
initialCommit(resolverData_.initialCommit), forResolver(true) {}
|
||||
|
||||
private:
|
||||
// The following variables are incoming parameters
|
||||
|
@ -142,8 +140,6 @@ private:
|
|||
// true if called from Resolver
|
||||
bool forResolver = false;
|
||||
|
||||
Reference<AsyncVar<ServerDBInfo> const> dbInfo;
|
||||
|
||||
private:
|
||||
// The following variables are used internally
|
||||
|
||||
|
@ -164,7 +160,7 @@ private:
|
|||
|
||||
private:
|
||||
void writeMutation(const MutationRef& m) {
|
||||
if (forResolver || !isEncryptionOpSupported(EncryptOperationType::TLOG_ENCRYPTION, dbInfo->get().client)) {
|
||||
if (forResolver || !isEncryptionOpSupported(EncryptOperationType::TLOG_ENCRYPTION)) {
|
||||
toCommit->writeTypedMessage(m);
|
||||
} else {
|
||||
ASSERT(cipherKeys != nullptr);
|
||||
|
@ -1347,16 +1343,14 @@ void applyMetadataMutations(SpanContext const& spanContext,
|
|||
|
||||
void applyMetadataMutations(SpanContext const& spanContext,
|
||||
ResolverData& resolverData,
|
||||
const VectorRef<MutationRef>& mutations,
|
||||
Reference<AsyncVar<ServerDBInfo> const> dbInfo) {
|
||||
ApplyMetadataMutationsImpl(spanContext, resolverData, mutations, dbInfo).apply();
|
||||
const VectorRef<MutationRef>& mutations) {
|
||||
ApplyMetadataMutationsImpl(spanContext, resolverData, mutations).apply();
|
||||
}
|
||||
|
||||
void applyMetadataMutations(SpanContext const& spanContext,
|
||||
const UID& dbgid,
|
||||
Arena& arena,
|
||||
const VectorRef<MutationRef>& mutations,
|
||||
IKeyValueStore* txnStateStore,
|
||||
Reference<AsyncVar<ServerDBInfo> const> dbInfo) {
|
||||
ApplyMetadataMutationsImpl(spanContext, dbgid, arena, mutations, txnStateStore, dbInfo).apply();
|
||||
IKeyValueStore* txnStateStore) {
|
||||
ApplyMetadataMutationsImpl(spanContext, dbgid, arena, mutations, txnStateStore).apply();
|
||||
}
|
||||
|
|
|
@ -451,12 +451,14 @@ TEST_CASE("/blobgranule/server/common/granulesummary") {
|
|||
}
|
||||
|
||||
// FIXME: if credentials can expire, refresh periodically
|
||||
ACTOR Future<Void> loadBlobMetadataForTenants(BGTenantMap* self, std::vector<TenantMapEntry> tenantMapEntries) {
|
||||
ACTOR Future<Void> loadBlobMetadataForTenants(
|
||||
BGTenantMap* self,
|
||||
std::vector<std::pair<BlobMetadataDomainId, BlobMetadataDomainName>> tenantsToLoad) {
|
||||
ASSERT(SERVER_KNOBS->BG_METADATA_SOURCE == "tenant");
|
||||
ASSERT(!tenantMapEntries.empty());
|
||||
state std::vector<BlobMetadataDomainId> domainIds;
|
||||
for (auto& entry : tenantMapEntries) {
|
||||
domainIds.push_back(entry.id);
|
||||
ASSERT(!tenantsToLoad.empty());
|
||||
state EKPGetLatestBlobMetadataRequest req;
|
||||
for (auto& tenant : tenantsToLoad) {
|
||||
req.domainInfos.emplace_back_deep(req.domainInfos.arena(), tenant.first, StringRef(tenant.second));
|
||||
}
|
||||
|
||||
// FIXME: if one tenant gets an error, don't kill whole process
|
||||
|
@ -464,8 +466,7 @@ ACTOR Future<Void> loadBlobMetadataForTenants(BGTenantMap* self, std::vector<Ten
|
|||
loop {
|
||||
Future<EKPGetLatestBlobMetadataReply> requestFuture;
|
||||
if (self->dbInfo.isValid() && self->dbInfo->get().encryptKeyProxy.present()) {
|
||||
EKPGetLatestBlobMetadataRequest req;
|
||||
req.domainIds = domainIds;
|
||||
req.reply.reset();
|
||||
requestFuture =
|
||||
brokenPromiseToNever(self->dbInfo->get().encryptKeyProxy.get().getLatestBlobMetadata.getReply(req));
|
||||
} else {
|
||||
|
@ -473,7 +474,7 @@ ACTOR Future<Void> loadBlobMetadataForTenants(BGTenantMap* self, std::vector<Ten
|
|||
}
|
||||
choose {
|
||||
when(EKPGetLatestBlobMetadataReply rep = wait(requestFuture)) {
|
||||
ASSERT(rep.blobMetadataDetails.size() == domainIds.size());
|
||||
ASSERT(rep.blobMetadataDetails.size() == req.domainInfos.size());
|
||||
// not guaranteed to be in same order in the request as the response
|
||||
for (auto& metadata : rep.blobMetadataDetails) {
|
||||
auto info = self->tenantInfoById.find(metadata.domainId);
|
||||
|
@ -482,7 +483,7 @@ ACTOR Future<Void> loadBlobMetadataForTenants(BGTenantMap* self, std::vector<Ten
|
|||
}
|
||||
auto dataEntry = self->tenantData.rangeContaining(info->second.prefix);
|
||||
ASSERT(dataEntry.begin() == info->second.prefix);
|
||||
dataEntry.cvalue()->setBStore(BlobConnectionProvider::newBlobConnectionProvider(metadata));
|
||||
dataEntry.cvalue()->updateBStore(metadata);
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
@ -491,9 +492,17 @@ ACTOR Future<Void> loadBlobMetadataForTenants(BGTenantMap* self, std::vector<Ten
|
|||
}
|
||||
}
|
||||
|
||||
Future<Void> loadBlobMetadataForTenant(BGTenantMap* self,
|
||||
BlobMetadataDomainId domainId,
|
||||
BlobMetadataDomainName domainName) {
|
||||
std::vector<std::pair<BlobMetadataDomainId, BlobMetadataDomainName>> toLoad;
|
||||
toLoad.push_back({ domainId, domainName });
|
||||
return loadBlobMetadataForTenants(self, toLoad);
|
||||
}
|
||||
|
||||
// list of tenants that may or may not already exist
|
||||
void BGTenantMap::addTenants(std::vector<std::pair<TenantName, TenantMapEntry>> tenants) {
|
||||
std::vector<TenantMapEntry> tenantsToLoad;
|
||||
std::vector<std::pair<BlobMetadataDomainId, BlobMetadataDomainName>> tenantsToLoad;
|
||||
for (auto entry : tenants) {
|
||||
if (tenantInfoById.insert({ entry.second.id, entry.second }).second) {
|
||||
auto r = makeReference<GranuleTenantData>(entry.first, entry.second);
|
||||
|
@ -501,7 +510,7 @@ void BGTenantMap::addTenants(std::vector<std::pair<TenantName, TenantMapEntry>>
|
|||
if (SERVER_KNOBS->BG_METADATA_SOURCE != "tenant") {
|
||||
r->bstoreLoaded.send(Void());
|
||||
} else {
|
||||
tenantsToLoad.push_back(entry.second);
|
||||
tenantsToLoad.push_back({ entry.second.id, entry.first });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -525,11 +534,41 @@ Optional<TenantMapEntry> BGTenantMap::getTenantById(int64_t id) {
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: handle case where tenant isn't loaded yet
|
||||
Reference<GranuleTenantData> BGTenantMap::getDataForGranule(const KeyRangeRef& keyRange) {
|
||||
auto tenant = tenantData.rangeContaining(keyRange.begin);
|
||||
ASSERT(tenant.begin() <= keyRange.begin);
|
||||
ASSERT(tenant.end() >= keyRange.end);
|
||||
// FIXME: batch requests for refresh?
|
||||
// FIXME: don't double fetch if multiple accesses to refreshing/expired metadata
|
||||
// FIXME: log warning if after refresh, data is still expired!
|
||||
ACTOR Future<Reference<GranuleTenantData>> getDataForGranuleActor(BGTenantMap* self, KeyRange keyRange) {
|
||||
state int loopCount = 0;
|
||||
loop {
|
||||
loopCount++;
|
||||
auto tenant = self->tenantData.rangeContaining(keyRange.begin);
|
||||
ASSERT(tenant.begin() <= keyRange.begin);
|
||||
ASSERT(tenant.end() >= keyRange.end);
|
||||
|
||||
return tenant.cvalue();
|
||||
}
|
||||
if (!tenant.cvalue().isValid() || !tenant.cvalue()->bstore.isValid()) {
|
||||
return tenant.cvalue();
|
||||
} else if (tenant.cvalue()->bstore->isExpired()) {
|
||||
CODE_PROBE(true, "re-fetching expired blob metadata");
|
||||
// fetch again
|
||||
Future<Void> reload = loadBlobMetadataForTenant(self, tenant.cvalue()->entry.id, tenant->cvalue()->name);
|
||||
wait(reload);
|
||||
if (loopCount > 1) {
|
||||
TraceEvent(SevWarn, "BlobMetadataStillExpired").suppressFor(5.0).detail("LoopCount", loopCount);
|
||||
wait(delay(0.001));
|
||||
}
|
||||
} else {
|
||||
// handle refresh in background if tenant needs refres
|
||||
if (tenant.cvalue()->bstore->needsRefresh()) {
|
||||
Future<Void> reload =
|
||||
loadBlobMetadataForTenant(self, tenant.cvalue()->entry.id, tenant->cvalue()->name);
|
||||
self->addActor.send(reload);
|
||||
}
|
||||
return tenant.cvalue();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: handle case where tenant isn't loaded yet
|
||||
Future<Reference<GranuleTenantData>> BGTenantMap::getDataForGranule(const KeyRangeRef& keyRange) {
|
||||
return getDataForGranuleActor(this, keyRange);
|
||||
}
|
||||
|
|
|
@ -562,11 +562,12 @@ ACTOR Future<BlobGranuleSplitPoints> alignKeys(Reference<BlobManagerData> bmData
|
|||
|
||||
state Transaction tr = Transaction(bmData->db);
|
||||
state int idx = 1;
|
||||
state Reference<GranuleTenantData> tenantData = bmData->tenantData.getDataForGranule(granuleRange);
|
||||
state Reference<GranuleTenantData> tenantData;
|
||||
wait(store(tenantData, bmData->tenantData.getDataForGranule(granuleRange)));
|
||||
while (SERVER_KNOBS->BG_METADATA_SOURCE == "tenant" && !tenantData.isValid()) {
|
||||
// this is a bit of a hack, but if we know this range is supposed to have a tenant, and it doesn't, just wait
|
||||
wait(delay(1.0));
|
||||
tenantData = bmData->tenantData.getDataForGranule(granuleRange);
|
||||
wait(store(tenantData, bmData->tenantData.getDataForGranule(granuleRange)));
|
||||
}
|
||||
for (; idx < splits.size() - 1; idx++) {
|
||||
loop {
|
||||
|
@ -4212,7 +4213,8 @@ ACTOR Future<Reference<BlobConnectionProvider>> getBStoreForGranule(Reference<Bl
|
|||
return self->bstore;
|
||||
}
|
||||
loop {
|
||||
state Reference<GranuleTenantData> data = self->tenantData.getDataForGranule(granuleRange);
|
||||
state Reference<GranuleTenantData> data;
|
||||
wait(store(data, self->tenantData.getDataForGranule(granuleRange)));
|
||||
if (data.isValid()) {
|
||||
wait(data->bstoreLoaded.getFuture());
|
||||
wait(delay(0));
|
||||
|
|
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
* BlobMigrator.actor.cpp
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "fdbserver/BlobMigratorInterface.h"
|
||||
#include "fdbserver/Knobs.h"
|
||||
#include "flow/ActorCollection.h"
|
||||
#include "flow/FastRef.h"
|
||||
#include "flow/IRandom.h"
|
||||
#include "flow/flow.h"
|
||||
#include "fdbclient/StorageServerInterface.h"
|
||||
#include "fdbclient/BlobConnectionProvider.h"
|
||||
#include "fdbclient/FDBTypes.h"
|
||||
#include "fdbclient/KeyRangeMap.h"
|
||||
#include "fdbclient/SystemData.h"
|
||||
#include "fdbclient/NativeAPI.actor.h"
|
||||
#include "fdbserver/ServerDBInfo.actor.h"
|
||||
#include "fdbserver/WaitFailure.h"
|
||||
|
||||
#include "flow/actorcompiler.h" // has to be last include
|
||||
|
||||
// BlobMigrator manages data migration from blob storage to storage server. It implements a minimal set of
|
||||
// StorageServerInterface APIs which are needed for DataDistributor to start data migration.
|
||||
class BlobMigrator : public NonCopyable, public ReferenceCounted<BlobMigrator> {
|
||||
public:
|
||||
BlobMigrator(Reference<AsyncVar<ServerDBInfo> const> dbInfo, BlobMigratorInterface interf)
|
||||
: blobMigratorInterf(interf), actors(false) {
|
||||
if (!blobConn.isValid() && SERVER_KNOBS->BG_METADATA_SOURCE != "tenant") {
|
||||
blobConn = BlobConnectionProvider::newBlobConnectionProvider(SERVER_KNOBS->BG_URL);
|
||||
}
|
||||
db = openDBOnServer(dbInfo, TaskPriority::DefaultEndpoint, LockAware::True);
|
||||
}
|
||||
~BlobMigrator() {}
|
||||
|
||||
ACTOR static Future<Void> start(Reference<BlobMigrator> self) {
|
||||
self->actors.add(waitFailureServer(self->blobMigratorInterf.waitFailure.getFuture()));
|
||||
loop {
|
||||
choose {
|
||||
when(HaltBlobMigratorRequest req = waitNext(self->blobMigratorInterf.haltBlobMigrator.getFuture())) {
|
||||
req.reply.send(Void());
|
||||
TraceEvent("BlobMigratorHalted", self->blobMigratorInterf.id()).detail("ReqID", req.requesterID);
|
||||
break;
|
||||
}
|
||||
when(wait(self->actors.getResult())) {}
|
||||
}
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
private:
|
||||
Database db;
|
||||
Reference<BlobConnectionProvider> blobConn;
|
||||
BlobMigratorInterface blobMigratorInterf;
|
||||
ActorCollection actors;
|
||||
};
|
||||
|
||||
// Main entry point
|
||||
ACTOR Future<Void> blobMigrator(BlobMigratorInterface ssi, Reference<AsyncVar<ServerDBInfo> const> dbInfo) {
|
||||
fmt::print("Start blob migrator {} \n", ssi.id().toString());
|
||||
try {
|
||||
Reference<BlobMigrator> self = makeReference<BlobMigrator>(dbInfo, ssi);
|
||||
wait(BlobMigrator::start(self));
|
||||
} catch (Error& e) {
|
||||
fmt::print("unexpected blob migrator error {}\n", e.what());
|
||||
}
|
||||
return Void();
|
||||
}
|
|
@ -225,8 +225,7 @@ struct BlobWorkerData : NonCopyable, ReferenceCounted<BlobWorkerData> {
|
|||
resnapshotLock(new FlowLock(SERVER_KNOBS->BLOB_WORKER_RESNAPSHOT_PARALLELISM)),
|
||||
deltaWritesLock(new FlowLock(SERVER_KNOBS->BLOB_WORKER_DELTA_FILE_WRITE_PARALLELISM)),
|
||||
stats(id, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, initialSnapshotLock, resnapshotLock, deltaWritesLock),
|
||||
isEncryptionEnabled(
|
||||
isEncryptionOpSupported(EncryptOperationType::BLOB_GRANULE_ENCRYPTION, db->clientInfo->get())) {}
|
||||
isEncryptionEnabled(isEncryptionOpSupported(EncryptOperationType::BLOB_GRANULE_ENCRYPTION)) {}
|
||||
|
||||
bool managerEpochOk(int64_t epoch) {
|
||||
if (epoch < currentManagerEpoch) {
|
||||
|
@ -367,7 +366,7 @@ ACTOR Future<BlobGranuleCipherKeysCtx> getLatestGranuleCipherKeys(Reference<Blob
|
|||
KeyRange keyRange,
|
||||
Arena* arena) {
|
||||
state BlobGranuleCipherKeysCtx cipherKeysCtx;
|
||||
state Reference<GranuleTenantData> tenantData = bwData->tenantData.getDataForGranule(keyRange);
|
||||
state Reference<GranuleTenantData> tenantData = wait(bwData->tenantData.getDataForGranule(keyRange));
|
||||
|
||||
ASSERT(tenantData.isValid());
|
||||
|
||||
|
@ -1195,8 +1194,7 @@ ACTOR Future<BlobFileIndex> compactFromBlob(Reference<BlobWorkerData> bwData,
|
|||
deltaF = files.deltaFiles[deltaIdx];
|
||||
|
||||
if (deltaF.cipherKeysMeta.present()) {
|
||||
ASSERT(isEncryptionOpSupported(EncryptOperationType::BLOB_GRANULE_ENCRYPTION,
|
||||
bwData->dbInfo->get().client));
|
||||
ASSERT(isEncryptionOpSupported(EncryptOperationType::BLOB_GRANULE_ENCRYPTION));
|
||||
|
||||
BlobGranuleCipherKeysCtx keysCtx =
|
||||
wait(getGranuleCipherKeysFromKeysMeta(bwData, deltaF.cipherKeysMeta.get(), &filenameArena));
|
||||
|
@ -4097,7 +4095,8 @@ ACTOR Future<Reference<BlobConnectionProvider>> loadBStoreForTenant(Reference<Bl
|
|||
KeyRange keyRange) {
|
||||
state int retryCount = 0;
|
||||
loop {
|
||||
state Reference<GranuleTenantData> data = bwData->tenantData.getDataForGranule(keyRange);
|
||||
state Reference<GranuleTenantData> data;
|
||||
wait(store(data, bwData->tenantData.getDataForGranule(keyRange)));
|
||||
if (data.isValid()) {
|
||||
wait(data->bstoreLoaded.getFuture());
|
||||
wait(delay(0));
|
||||
|
|
|
@ -29,6 +29,8 @@
|
|||
#include "fdbclient/DatabaseContext.h"
|
||||
#include "fdbrpc/FailureMonitor.h"
|
||||
#include "fdbclient/EncryptKeyProxyInterface.h"
|
||||
#include "fdbserver/BlobGranuleServerCommon.actor.h"
|
||||
#include "fdbserver/BlobMigratorInterface.h"
|
||||
#include "fdbserver/Knobs.h"
|
||||
#include "flow/ActorCollection.h"
|
||||
#include "fdbclient/ClusterConnectionMemoryRecord.h"
|
||||
|
@ -198,6 +200,32 @@ struct BlobManagerSingleton : Singleton<BlobManagerInterface> {
|
|||
}
|
||||
};
|
||||
|
||||
struct BlobMigratorSingleton : Singleton<BlobMigratorInterface> {
|
||||
|
||||
BlobMigratorSingleton(const Optional<BlobMigratorInterface>& interface) : Singleton(interface) {}
|
||||
|
||||
Role getRole() const { return Role::BLOB_MIGRATOR; }
|
||||
ProcessClass::ClusterRole getClusterRole() const { return ProcessClass::BlobMigrator; }
|
||||
|
||||
void setInterfaceToDbInfo(ClusterControllerData* cc) const {
|
||||
if (interface.present()) {
|
||||
TraceEvent("CCMG_SetInf", cc->id).detail("Id", interface.get().id());
|
||||
cc->db.setBlobMigrator(interface.get());
|
||||
}
|
||||
}
|
||||
void halt(ClusterControllerData* cc, Optional<Standalone<StringRef>> pid) const {
|
||||
if (interface.present()) {
|
||||
TraceEvent("CCMG_Halt", cc->id).detail("Id", interface.get().id());
|
||||
cc->id_worker[pid].haltBlobMigrator =
|
||||
brokenPromiseToNever(interface.get().haltBlobMigrator.getReply(HaltBlobMigratorRequest(cc->id)));
|
||||
}
|
||||
}
|
||||
void recruit(ClusterControllerData* cc) const {
|
||||
cc->lastRecruitTime = now();
|
||||
cc->recruitBlobMigrator.set(true);
|
||||
}
|
||||
};
|
||||
|
||||
struct EncryptKeyProxySingleton : Singleton<EncryptKeyProxyInterface> {
|
||||
|
||||
EncryptKeyProxySingleton(const Optional<EncryptKeyProxyInterface>& interface) : Singleton(interface) {}
|
||||
|
@ -275,6 +303,7 @@ ACTOR Future<Void> clusterWatchDatabase(ClusterControllerData* cluster,
|
|||
dbInfo.distributor = db->serverInfo->get().distributor;
|
||||
dbInfo.ratekeeper = db->serverInfo->get().ratekeeper;
|
||||
dbInfo.blobManager = db->serverInfo->get().blobManager;
|
||||
dbInfo.blobMigrator = db->serverInfo->get().blobMigrator;
|
||||
dbInfo.encryptKeyProxy = db->serverInfo->get().encryptKeyProxy;
|
||||
dbInfo.consistencyScan = db->serverInfo->get().consistencyScan;
|
||||
dbInfo.latencyBandConfig = db->serverInfo->get().latencyBandConfig;
|
||||
|
@ -656,8 +685,12 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
|||
WorkerDetails newCSWorker = findNewProcessForSingleton(self, ProcessClass::ConsistencyScan, id_used);
|
||||
|
||||
WorkerDetails newBMWorker;
|
||||
WorkerDetails newMGWorker;
|
||||
if (self->db.blobGranulesEnabled.get()) {
|
||||
newBMWorker = findNewProcessForSingleton(self, ProcessClass::BlobManager, id_used);
|
||||
if (isFullRestoreMode()) {
|
||||
newMGWorker = findNewProcessForSingleton(self, ProcessClass::BlobMigrator, id_used);
|
||||
}
|
||||
}
|
||||
|
||||
WorkerDetails newEKPWorker;
|
||||
|
@ -671,8 +704,12 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
|||
auto bestFitnessForCS = findBestFitnessForSingleton(self, newCSWorker, ProcessClass::ConsistencyScan);
|
||||
|
||||
ProcessClass::Fitness bestFitnessForBM;
|
||||
ProcessClass::Fitness bestFitnessForMG;
|
||||
if (self->db.blobGranulesEnabled.get()) {
|
||||
bestFitnessForBM = findBestFitnessForSingleton(self, newBMWorker, ProcessClass::BlobManager);
|
||||
if (isFullRestoreMode()) {
|
||||
bestFitnessForMG = findBestFitnessForSingleton(self, newMGWorker, ProcessClass::BlobManager);
|
||||
}
|
||||
}
|
||||
|
||||
ProcessClass::Fitness bestFitnessForEKP;
|
||||
|
@ -685,6 +722,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
|||
auto ddSingleton = DataDistributorSingleton(db.distributor);
|
||||
ConsistencyScanSingleton csSingleton(db.consistencyScan);
|
||||
BlobManagerSingleton bmSingleton(db.blobManager);
|
||||
BlobMigratorSingleton mgSingleton(db.blobMigrator);
|
||||
EncryptKeyProxySingleton ekpSingleton(db.encryptKeyProxy);
|
||||
|
||||
// Check if the singletons are healthy.
|
||||
|
@ -699,9 +737,14 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
|||
self, newCSWorker, csSingleton, bestFitnessForCS, self->recruitingConsistencyScanID);
|
||||
|
||||
bool bmHealthy = true;
|
||||
bool mgHealthy = true;
|
||||
if (self->db.blobGranulesEnabled.get()) {
|
||||
bmHealthy = isHealthySingleton<BlobManagerInterface>(
|
||||
self, newBMWorker, bmSingleton, bestFitnessForBM, self->recruitingBlobManagerID);
|
||||
if (isFullRestoreMode()) {
|
||||
mgHealthy = isHealthySingleton<BlobMigratorInterface>(
|
||||
self, newMGWorker, mgSingleton, bestFitnessForMG, self->recruitingBlobMigratorID);
|
||||
}
|
||||
}
|
||||
|
||||
bool ekpHealthy = true;
|
||||
|
@ -711,7 +754,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
|||
}
|
||||
// if any of the singletons are unhealthy (rerecruited or not stable), then do not
|
||||
// consider any further re-recruitments
|
||||
if (!(rkHealthy && ddHealthy && bmHealthy && ekpHealthy && csHealthy)) {
|
||||
if (!(rkHealthy && ddHealthy && bmHealthy && ekpHealthy && csHealthy && mgHealthy)) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -725,9 +768,14 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
|||
Optional<Standalone<StringRef>> newCSProcessId = newCSWorker.interf.locality.processId();
|
||||
|
||||
Optional<Standalone<StringRef>> currBMProcessId, newBMProcessId;
|
||||
Optional<Standalone<StringRef>> currMGProcessId, newMGProcessId;
|
||||
if (self->db.blobGranulesEnabled.get()) {
|
||||
currBMProcessId = bmSingleton.interface.get().locality.processId();
|
||||
newBMProcessId = newBMWorker.interf.locality.processId();
|
||||
if (isFullRestoreMode()) {
|
||||
currMGProcessId = mgSingleton.interface.get().locality.processId();
|
||||
newMGProcessId = newMGWorker.interf.locality.processId();
|
||||
}
|
||||
}
|
||||
|
||||
Optional<Standalone<StringRef>> currEKPProcessId, newEKPProcessId;
|
||||
|
@ -741,6 +789,10 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
|||
if (self->db.blobGranulesEnabled.get()) {
|
||||
currPids.emplace_back(currBMProcessId);
|
||||
newPids.emplace_back(newBMProcessId);
|
||||
if (isFullRestoreMode()) {
|
||||
currPids.emplace_back(currMGProcessId);
|
||||
newPids.emplace_back(newMGProcessId);
|
||||
}
|
||||
}
|
||||
|
||||
if (SERVER_KNOBS->ENABLE_ENCRYPTION) {
|
||||
|
@ -755,6 +807,10 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
|||
if (!self->db.blobGranulesEnabled.get()) {
|
||||
ASSERT(currColocMap[currBMProcessId] == 0);
|
||||
ASSERT(newColocMap[newBMProcessId] == 0);
|
||||
if (isFullRestoreMode()) {
|
||||
ASSERT(currColocMap[currMGProcessId] == 0);
|
||||
ASSERT(newColocMap[newMGProcessId] == 0);
|
||||
}
|
||||
}
|
||||
|
||||
// if the knob is disabled, the EKP coloc counts should have no affect on the coloc counts check below
|
||||
|
@ -767,6 +823,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
|||
if (newColocMap[newRKProcessId] <= currColocMap[currRKProcessId] &&
|
||||
newColocMap[newDDProcessId] <= currColocMap[currDDProcessId] &&
|
||||
newColocMap[newBMProcessId] <= currColocMap[currBMProcessId] &&
|
||||
newColocMap[newMGProcessId] <= currColocMap[currMGProcessId] &&
|
||||
newColocMap[newEKPProcessId] <= currColocMap[currEKPProcessId] &&
|
||||
newColocMap[newCSProcessId] <= currColocMap[currCSProcessId]) {
|
||||
// rerecruit the singleton for which we have found a better process, if any
|
||||
|
@ -776,6 +833,9 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
|||
ddSingleton.recruit(self);
|
||||
} else if (self->db.blobGranulesEnabled.get() && newColocMap[newBMProcessId] < currColocMap[currBMProcessId]) {
|
||||
bmSingleton.recruit(self);
|
||||
} else if (self->db.blobGranulesEnabled.get() && isFullRestoreMode() &&
|
||||
newColocMap[newMGProcessId] < currColocMap[currMGProcessId]) {
|
||||
mgSingleton.recruit(self);
|
||||
} else if (SERVER_KNOBS->ENABLE_ENCRYPTION && newColocMap[newEKPProcessId] < currColocMap[currEKPProcessId]) {
|
||||
ekpSingleton.recruit(self);
|
||||
} else if (newColocMap[newCSProcessId] < currColocMap[currCSProcessId]) {
|
||||
|
@ -1330,12 +1390,18 @@ ACTOR Future<Void> registerWorker(RegisterWorkerRequest req,
|
|||
self, w, currSingleton, registeringSingleton, self->recruitingRatekeeperID);
|
||||
}
|
||||
|
||||
if (self->db.blobGranulesEnabled.get() && req.blobManagerInterf.present()) {
|
||||
if (self->db.blobGranulesEnabled.get() && isFullRestoreMode() && req.blobManagerInterf.present()) {
|
||||
auto currSingleton = BlobManagerSingleton(self->db.serverInfo->get().blobManager);
|
||||
auto registeringSingleton = BlobManagerSingleton(req.blobManagerInterf);
|
||||
haltRegisteringOrCurrentSingleton<BlobManagerInterface>(
|
||||
self, w, currSingleton, registeringSingleton, self->recruitingBlobManagerID);
|
||||
}
|
||||
if (req.blobMigratorInterf.present()) {
|
||||
auto currSingleton = BlobMigratorSingleton(self->db.serverInfo->get().blobMigrator);
|
||||
auto registeringSingleton = BlobMigratorSingleton(req.blobMigratorInterf);
|
||||
haltRegisteringOrCurrentSingleton<BlobMigratorInterface>(
|
||||
self, w, currSingleton, registeringSingleton, self->recruitingBlobMigratorID);
|
||||
}
|
||||
|
||||
if (SERVER_KNOBS->ENABLE_ENCRYPTION && req.encryptKeyProxyInterf.present()) {
|
||||
auto currSingleton = EncryptKeyProxySingleton(self->db.serverInfo->get().encryptKeyProxy);
|
||||
|
@ -2013,6 +2079,53 @@ ACTOR Future<Void> handleForcedRecoveries(ClusterControllerData* self, ClusterCo
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> triggerAuditStorage(ClusterControllerData* self, TriggerAuditRequest req) {
|
||||
TraceEvent(SevInfo, "CCTriggerAuditStorageBegin", self->id)
|
||||
.detail("Range", req.range)
|
||||
.detail("AuditType", req.type);
|
||||
state UID auditId;
|
||||
|
||||
try {
|
||||
while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS ||
|
||||
!self->db.serverInfo->get().distributor.present()) {
|
||||
wait(self->db.serverInfo->onChange());
|
||||
}
|
||||
|
||||
TriggerAuditRequest fReq(req.getType(), req.range);
|
||||
UID auditId_ = wait(self->db.serverInfo->get().distributor.get().triggerAudit.getReply(fReq));
|
||||
auditId = auditId_;
|
||||
TraceEvent(SevDebug, "CCTriggerAuditStorageEnd", self->id)
|
||||
.detail("AuditID", auditId)
|
||||
.detail("Range", req.range)
|
||||
.detail("AuditType", req.type);
|
||||
if (!req.reply.isSet()) {
|
||||
req.reply.send(auditId);
|
||||
}
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevDebug, "CCTriggerAuditStorageError", self->id)
|
||||
.errorUnsuppressed(e)
|
||||
.detail("AuditID", auditId)
|
||||
.detail("Range", req.range)
|
||||
.detail("AuditType", req.type);
|
||||
if (!req.reply.isSet()) {
|
||||
req.reply.sendError(audit_storage_failed());
|
||||
}
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> handleTriggerAuditStorage(ClusterControllerData* self, ClusterControllerFullInterface interf) {
|
||||
loop {
|
||||
TriggerAuditRequest req = waitNext(interf.clientInterface.triggerAudit.getFuture());
|
||||
TraceEvent(SevDebug, "TriggerAuditStorageReceived", self->id)
|
||||
.detail("ClusterControllerDcId", self->clusterControllerDcId)
|
||||
.detail("Range", req.range)
|
||||
.detail("AuditType", req.type);
|
||||
self->addActor.send(triggerAuditStorage(self, req));
|
||||
}
|
||||
}
|
||||
|
||||
struct SingletonRecruitThrottler {
|
||||
double lastRecruitStart;
|
||||
|
||||
|
@ -2426,6 +2539,104 @@ ACTOR Future<int64_t> getNextBMEpoch(ClusterControllerData* self) {
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> startBlobMigrator(ClusterControllerData* self, double waitTime) {
|
||||
// If master fails at the same time, give it a chance to clear master PID.
|
||||
// Also wait to avoid too many consecutive recruits in a small time window.
|
||||
wait(delay(waitTime));
|
||||
|
||||
TraceEvent("CCStartBlobMigrator", self->id).log();
|
||||
loop {
|
||||
try {
|
||||
state bool noBlobMigrator = !self->db.serverInfo->get().blobMigrator.present();
|
||||
while (!self->masterProcessId.present() ||
|
||||
self->masterProcessId != self->db.serverInfo->get().master.locality.processId() ||
|
||||
self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
|
||||
wait(self->db.serverInfo->onChange() || delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY));
|
||||
}
|
||||
if (noBlobMigrator && self->db.serverInfo->get().blobMigrator.present()) {
|
||||
// Existing instance registers while waiting, so skip.
|
||||
return Void();
|
||||
}
|
||||
|
||||
std::map<Optional<Standalone<StringRef>>, int> id_used = self->getUsedIds();
|
||||
WorkerFitnessInfo blobMigratorWorker = self->getWorkerForRoleInDatacenter(self->clusterControllerDcId,
|
||||
ProcessClass::BlobMigrator,
|
||||
ProcessClass::NeverAssign,
|
||||
self->db.config,
|
||||
id_used);
|
||||
InitializeBlobMigratorRequest req(deterministicRandom()->randomUniqueID());
|
||||
state WorkerDetails worker = blobMigratorWorker.worker;
|
||||
if (self->onMasterIsBetter(worker, ProcessClass::BlobMigrator)) {
|
||||
worker = self->id_worker[self->masterProcessId.get()].details;
|
||||
}
|
||||
|
||||
self->recruitingBlobMigratorID = req.reqId;
|
||||
TraceEvent("CCRecruitBlobMigrator", self->id)
|
||||
.detail("Addr", worker.interf.address())
|
||||
.detail("MGID", req.reqId);
|
||||
|
||||
ErrorOr<BlobMigratorInterface> interf = wait(worker.interf.blobMigrator.getReplyUnlessFailedFor(
|
||||
req, SERVER_KNOBS->WAIT_FOR_BLOB_MANAGER_JOIN_DELAY, 0));
|
||||
|
||||
if (interf.present()) {
|
||||
self->recruitBlobMigrator.set(false);
|
||||
self->recruitingBlobMigratorID = interf.get().id();
|
||||
const auto& blobMigrator = self->db.serverInfo->get().blobMigrator;
|
||||
TraceEvent("CCBlobMigratorRecruited", self->id)
|
||||
.detail("Addr", worker.interf.address())
|
||||
.detail("MGID", interf.get().id());
|
||||
if (blobMigrator.present() && blobMigrator.get().id() != interf.get().id() &&
|
||||
self->id_worker.count(blobMigrator.get().locality.processId())) {
|
||||
TraceEvent("CCHaltBlobMigratorAfterRecruit", self->id)
|
||||
.detail("MGID", blobMigrator.get().id())
|
||||
.detail("DcID", printable(self->clusterControllerDcId));
|
||||
BlobMigratorSingleton(blobMigrator).halt(self, blobMigrator.get().locality.processId());
|
||||
}
|
||||
if (!blobMigrator.present() || blobMigrator.get().id() != interf.get().id()) {
|
||||
self->db.setBlobMigrator(interf.get());
|
||||
}
|
||||
checkOutstandingRequests(self);
|
||||
return Void();
|
||||
}
|
||||
} catch (Error& e) {
|
||||
TraceEvent("CCBlobMigratorRecruitError", self->id).error(e);
|
||||
if (e.code() != error_code_no_more_servers) {
|
||||
throw;
|
||||
}
|
||||
}
|
||||
wait(lowPriorityDelay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY));
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> monitorBlobMigrator(ClusterControllerData* self) {
|
||||
state SingletonRecruitThrottler recruitThrottler;
|
||||
while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
|
||||
wait(self->db.serverInfo->onChange());
|
||||
}
|
||||
loop {
|
||||
if (self->db.serverInfo->get().blobMigrator.present() && !self->recruitBlobMigrator.get()) {
|
||||
state Future<Void> wfClient = waitFailureClient(self->db.serverInfo->get().blobMigrator.get().waitFailure,
|
||||
SERVER_KNOBS->BLOB_MIGRATOR_FAILURE_TIME);
|
||||
loop {
|
||||
choose {
|
||||
when(wait(wfClient)) {
|
||||
TraceEvent("CCBlobMigratorDied", self->id)
|
||||
.detail("MGID", self->db.serverInfo->get().blobMigrator.get().id());
|
||||
self->db.clearInterf(ProcessClass::BlobMigratorClass);
|
||||
break;
|
||||
}
|
||||
when(wait(self->recruitBlobMigrator.onChange())) {}
|
||||
}
|
||||
}
|
||||
} else if (self->db.blobGranulesEnabled.get() && isFullRestoreMode()) {
|
||||
// if there is no blob migrator present but blob granules are now enabled, recruit a BM
|
||||
wait(startBlobMigrator(self, recruitThrottler.newRecruitment()));
|
||||
} else {
|
||||
wait(self->db.blobGranulesEnabled.onChange());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> startBlobManager(ClusterControllerData* self, double waitTime) {
|
||||
// If master fails at the same time, give it a chance to clear master PID.
|
||||
// Also wait to avoid too many consecutive recruits in a small time window.
|
||||
|
@ -2552,6 +2763,10 @@ ACTOR Future<Void> monitorBlobManager(ClusterControllerData* self) {
|
|||
const auto& blobManager = self->db.serverInfo->get().blobManager;
|
||||
BlobManagerSingleton(blobManager)
|
||||
.haltBlobGranules(self, blobManager.get().locality.processId());
|
||||
if (isFullRestoreMode()) {
|
||||
const auto& blobMigrator = self->db.serverInfo->get().blobMigrator;
|
||||
BlobMigratorSingleton(blobMigrator).halt(self, blobMigrator.get().locality.processId());
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -2782,9 +2997,11 @@ ACTOR Future<Void> clusterControllerCore(ClusterControllerFullInterface interf,
|
|||
self.addActor.send(updatedChangedDatacenters(&self));
|
||||
self.addActor.send(updateDatacenterVersionDifference(&self));
|
||||
self.addActor.send(handleForcedRecoveries(&self, interf));
|
||||
self.addActor.send(handleTriggerAuditStorage(&self, interf));
|
||||
self.addActor.send(monitorDataDistributor(&self));
|
||||
self.addActor.send(monitorRatekeeper(&self));
|
||||
self.addActor.send(monitorBlobManager(&self));
|
||||
self.addActor.send(monitorBlobMigrator(&self));
|
||||
self.addActor.send(watchBlobGranulesConfigKey(&self));
|
||||
self.addActor.send(monitorConsistencyScan(&self));
|
||||
self.addActor.send(metaclusterMetricsUpdater(&self));
|
||||
|
|
|
@ -1056,18 +1056,19 @@ ACTOR Future<Void> readTransactionSystemState(Reference<ClusterRecoveryData> sel
|
|||
// Sets self->configuration to the configuration (FF/conf/ keys) at self->lastEpochEnd
|
||||
|
||||
// Recover transaction state store
|
||||
bool enableEncryptionForTxnStateStore = isEncryptionOpSupported(EncryptOperationType::TLOG_ENCRYPTION);
|
||||
CODE_PROBE(enableEncryptionForTxnStateStore, "Enable encryption for txnStateStore");
|
||||
if (self->txnStateStore)
|
||||
self->txnStateStore->close();
|
||||
self->txnStateLogAdapter = openDiskQueueAdapter(oldLogSystem, myLocality, txsPoppedVersion);
|
||||
self->txnStateStore = keyValueStoreLogSystem(
|
||||
self->txnStateLogAdapter,
|
||||
self->dbInfo,
|
||||
self->dbgid,
|
||||
self->memoryLimit,
|
||||
false,
|
||||
false,
|
||||
true,
|
||||
isEncryptionOpSupported(EncryptOperationType::TLOG_ENCRYPTION, self->dbInfo->get().client));
|
||||
self->txnStateStore = keyValueStoreLogSystem(self->txnStateLogAdapter,
|
||||
self->dbInfo,
|
||||
self->dbgid,
|
||||
self->memoryLimit,
|
||||
false,
|
||||
false,
|
||||
true,
|
||||
enableEncryptionForTxnStateStore);
|
||||
|
||||
// Version 0 occurs at the version epoch. The version epoch is the number
|
||||
// of microseconds since the Unix epoch. It can be set through fdbcli.
|
||||
|
@ -1688,8 +1689,7 @@ ACTOR Future<Void> clusterRecoveryCore(Reference<ClusterRecoveryData> self) {
|
|||
self->dbgid,
|
||||
recoveryCommitRequest.arena,
|
||||
tr.mutations.slice(mmApplied, tr.mutations.size()),
|
||||
self->txnStateStore,
|
||||
self->dbInfo);
|
||||
self->txnStateStore);
|
||||
mmApplied = tr.mutations.size();
|
||||
|
||||
tr.read_snapshot = self->recoveryTransactionVersion; // lastEpochEnd would make more sense, but isn't in the initial
|
||||
|
|
|
@ -24,9 +24,11 @@
|
|||
#include "fdbclient/Atomic.h"
|
||||
#include "fdbclient/BackupAgent.actor.h"
|
||||
#include "fdbclient/BlobCipher.h"
|
||||
#include "fdbclient/BuildIdempotencyIdMutations.h"
|
||||
#include "fdbclient/CommitTransaction.h"
|
||||
#include "fdbclient/DatabaseContext.h"
|
||||
#include "fdbclient/FDBTypes.h"
|
||||
#include "fdbclient/IdempotencyId.h"
|
||||
#include "fdbclient/Knobs.h"
|
||||
#include "fdbclient/CommitProxyInterface.h"
|
||||
#include "fdbclient/NativeAPI.actor.h"
|
||||
|
@ -55,6 +57,7 @@
|
|||
#include "fdbserver/WaitFailure.h"
|
||||
#include "fdbserver/WorkerInterface.actor.h"
|
||||
#include "flow/ActorCollection.h"
|
||||
#include "flow/CodeProbe.h"
|
||||
#include "flow/EncryptUtils.h"
|
||||
#include "flow/Error.h"
|
||||
#include "flow/IRandom.h"
|
||||
|
@ -663,6 +666,8 @@ struct CommitBatchContext {
|
|||
// Cipher keys to be used to encrypt mutations
|
||||
std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>> cipherKeys;
|
||||
|
||||
IdempotencyIdKVBuilder idempotencyKVBuilder;
|
||||
|
||||
CommitBatchContext(ProxyCommitData*, const std::vector<CommitTransactionRequest>*, const int);
|
||||
|
||||
void setupTraceBatch();
|
||||
|
@ -998,7 +1003,7 @@ ACTOR Future<Void> getResolution(CommitBatchContext* self) {
|
|||
// Fetch cipher keys if needed.
|
||||
state Future<std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>>> getCipherKeys;
|
||||
if (pProxyCommitData->isEncryptionEnabled) {
|
||||
static std::unordered_map<EncryptCipherDomainId, EncryptCipherDomainName> defaultDomains = {
|
||||
static const std::unordered_map<EncryptCipherDomainId, EncryptCipherDomainName> defaultDomains = {
|
||||
{ SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID, FDB_SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_NAME },
|
||||
{ ENCRYPT_HEADER_DOMAIN_ID, FDB_ENCRYPT_HEADER_DOMAIN_NAME },
|
||||
{ FDB_DEFAULT_ENCRYPT_DOMAIN_ID, FDB_DEFAULT_ENCRYPT_DOMAIN_NAME }
|
||||
|
@ -1091,6 +1096,7 @@ void applyMetadataEffect(CommitBatchContext* self) {
|
|||
committed =
|
||||
committed && self->resolution[resolver].stateMutations[versionIndex][transactionIndex].committed;
|
||||
if (committed) {
|
||||
// Note: since we are not to commit, we don't need to pass cipherKeys for encryption.
|
||||
applyMetadataMutations(SpanContext(),
|
||||
*self->pProxyCommitData,
|
||||
self->arena,
|
||||
|
@ -1594,6 +1600,22 @@ ACTOR Future<Void> postResolution(CommitBatchContext* self) {
|
|||
&self->computeStart));
|
||||
}
|
||||
|
||||
buildIdempotencyIdMutations(self->trs,
|
||||
self->idempotencyKVBuilder,
|
||||
self->commitVersion,
|
||||
self->committed,
|
||||
ConflictBatch::TransactionCommitted,
|
||||
self->locked,
|
||||
[&](const KeyValue& kv) {
|
||||
MutationRef idempotencyIdSet;
|
||||
idempotencyIdSet.type = MutationRef::Type::SetValue;
|
||||
idempotencyIdSet.param1 = kv.key;
|
||||
idempotencyIdSet.param2 = kv.value;
|
||||
auto& tags = pProxyCommitData->tagsForKey(kv.key);
|
||||
self->toCommit.addTags(tags);
|
||||
self->toCommit.writeTypedMessage(idempotencyIdSet);
|
||||
});
|
||||
|
||||
self->toCommit.saveTags(self->writtenTags);
|
||||
|
||||
pProxyCommitData->stats.mutations += self->mutationCount;
|
||||
|
@ -2488,6 +2510,17 @@ ACTOR Future<Void> processCompleteTransactionStateRequest(TransactionStateResolv
|
|||
tag_uid[decodeServerTagValue(kv.value)] = decodeServerTagKey(kv.key);
|
||||
}
|
||||
|
||||
state std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>> cipherKeys;
|
||||
if (pContext->pCommitData->isEncryptionEnabled) {
|
||||
static const std::unordered_map<EncryptCipherDomainId, EncryptCipherDomainName> metadataDomains = {
|
||||
{ SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID, FDB_SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_NAME },
|
||||
{ ENCRYPT_HEADER_DOMAIN_ID, FDB_ENCRYPT_HEADER_DOMAIN_NAME }
|
||||
};
|
||||
std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>> cks =
|
||||
wait(getLatestEncryptCipherKeys(pContext->pCommitData->db, metadataDomains, BlobCipherMetrics::TLOG));
|
||||
cipherKeys = cks;
|
||||
}
|
||||
|
||||
loop {
|
||||
wait(yield());
|
||||
|
||||
|
@ -2545,13 +2578,16 @@ ACTOR Future<Void> processCompleteTransactionStateRequest(TransactionStateResolv
|
|||
|
||||
Arena arena;
|
||||
bool confChanges;
|
||||
CODE_PROBE(
|
||||
pContext->pCommitData->isEncryptionEnabled,
|
||||
"Commit proxy apply metadata mutations from txnStateStore on recovery, with encryption-at-rest enabled");
|
||||
applyMetadataMutations(SpanContext(),
|
||||
*pContext->pCommitData,
|
||||
arena,
|
||||
Reference<ILogSystem>(),
|
||||
mutations,
|
||||
/* pToCommit= */ nullptr,
|
||||
/* pCipherKeys= */ nullptr,
|
||||
pContext->pCommitData->isEncryptionEnabled ? &cipherKeys : nullptr,
|
||||
confChanges,
|
||||
/* version= */ 0,
|
||||
/* popVersion= */ 0,
|
||||
|
@ -2643,7 +2679,7 @@ ACTOR Future<Void> commitProxyServerCore(CommitProxyInterface proxy,
|
|||
// Wait until we can load the "real" logsystem, since we don't support switching them currently
|
||||
while (!(masterLifetime.isEqual(commitData.db->get().masterLifetime) &&
|
||||
commitData.db->get().recoveryState >= RecoveryState::RECOVERY_TRANSACTION &&
|
||||
(!isEncryptionOpSupported(EncryptOperationType::TLOG_ENCRYPTION, db->get().client) ||
|
||||
(!isEncryptionOpSupported(EncryptOperationType::TLOG_ENCRYPTION) ||
|
||||
commitData.db->get().encryptKeyProxy.present()))) {
|
||||
//TraceEvent("ProxyInit2", proxy.id()).detail("LSEpoch", db->get().logSystemConfig.epoch).detail("Need", epoch);
|
||||
wait(commitData.db->onChange());
|
||||
|
@ -2668,15 +2704,14 @@ ACTOR Future<Void> commitProxyServerCore(CommitProxyInterface proxy,
|
|||
commitData.logSystem = ILogSystem::fromServerDBInfo(proxy.id(), commitData.db->get(), false, addActor);
|
||||
commitData.logAdapter =
|
||||
new LogSystemDiskQueueAdapter(commitData.logSystem, Reference<AsyncVar<PeekTxsInfo>>(), 1, false);
|
||||
commitData.txnStateStore =
|
||||
keyValueStoreLogSystem(commitData.logAdapter,
|
||||
commitData.db,
|
||||
proxy.id(),
|
||||
2e9,
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
isEncryptionOpSupported(EncryptOperationType::TLOG_ENCRYPTION, db->get().client));
|
||||
commitData.txnStateStore = keyValueStoreLogSystem(commitData.logAdapter,
|
||||
commitData.db,
|
||||
proxy.id(),
|
||||
2e9,
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
isEncryptionOpSupported(EncryptOperationType::TLOG_ENCRYPTION));
|
||||
createWhitelistBinPathVec(whitelistBinPaths, commitData.whitelistedBinPathVec);
|
||||
|
||||
commitData.updateLatencyBandConfig(commitData.db->get().latencyBandConfig);
|
||||
|
|
|
@ -344,7 +344,7 @@ class TransactionEnvironment {
|
|||
state Key configKey = encodeConfigKey(configClass, knobName);
|
||||
state Optional<Value> value = wait(tr->get(configKey));
|
||||
if (expected.present()) {
|
||||
ASSERT_EQ(BinaryReader::fromStringRef<int64_t>(value.get(), Unversioned()), expected.get());
|
||||
ASSERT_EQ(Tuple::unpack(value.get()).getInt(0), expected.get());
|
||||
} else {
|
||||
ASSERT(!value.present());
|
||||
}
|
||||
|
|
|
@ -25,6 +25,8 @@
|
|||
#include "fdbclient/DatabaseContext.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
FDB_DEFINE_BOOLEAN_PARAM(SkipDDModeCheck);
|
||||
|
||||
class DDTxnProcessorImpl {
|
||||
friend class DDTxnProcessor;
|
||||
|
||||
|
@ -106,6 +108,62 @@ class DDTxnProcessorImpl {
|
|||
return IDDTxnProcessor::SourceServers{ std::vector<UID>(servers.begin(), servers.end()), completeSources };
|
||||
}
|
||||
|
||||
ACTOR static Future<std::vector<IDDTxnProcessor::DDRangeLocations>> getSourceServerInterfacesForRange(
|
||||
Database cx,
|
||||
KeyRangeRef range) {
|
||||
state std::vector<IDDTxnProcessor::DDRangeLocations> res;
|
||||
state Transaction tr(cx);
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
|
||||
loop {
|
||||
res.clear();
|
||||
try {
|
||||
state RangeResult shards = wait(krmGetRanges(&tr,
|
||||
keyServersPrefix,
|
||||
range,
|
||||
SERVER_KNOBS->MOVE_SHARD_KRM_ROW_LIMIT,
|
||||
SERVER_KNOBS->MOVE_SHARD_KRM_BYTE_LIMIT));
|
||||
ASSERT(!shards.empty());
|
||||
|
||||
state RangeResult UIDtoTagMap = wait(tr.getRange(serverTagKeys, CLIENT_KNOBS->TOO_MANY));
|
||||
ASSERT(!UIDtoTagMap.more && UIDtoTagMap.size() < CLIENT_KNOBS->TOO_MANY);
|
||||
|
||||
state int i = 0;
|
||||
for (i = 0; i < shards.size() - 1; ++i) {
|
||||
state std::vector<UID> src;
|
||||
std::vector<UID> dest;
|
||||
UID srcId, destId;
|
||||
decodeKeyServersValue(UIDtoTagMap, shards[i].value, src, dest, srcId, destId);
|
||||
|
||||
std::vector<Future<Optional<Value>>> serverListEntries;
|
||||
for (int j = 0; j < src.size(); ++j) {
|
||||
serverListEntries.push_back(tr.get(serverListKeyFor(src[j])));
|
||||
}
|
||||
std::vector<Optional<Value>> serverListValues = wait(getAll(serverListEntries));
|
||||
IDDTxnProcessor::DDRangeLocations current(KeyRangeRef(shards[i].key, shards[i + 1].key));
|
||||
for (int j = 0; j < serverListValues.size(); ++j) {
|
||||
if (!serverListValues[j].present()) {
|
||||
TraceEvent(SevWarnAlways, "GetSourceServerInterfacesMissing")
|
||||
.detail("StorageServer", src[j])
|
||||
.detail("Range", KeyRangeRef(shards[i].key, shards[i + 1].key));
|
||||
continue;
|
||||
}
|
||||
StorageServerInterface ssi = decodeServerListValue(serverListValues[j].get());
|
||||
current.servers[ssi.locality.describeDcId()].push_back(ssi);
|
||||
}
|
||||
res.push_back(current);
|
||||
}
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevWarnAlways, "GetSourceServerInterfacesError").errorUnsuppressed(e).detail("Range", range);
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
// set the system key space
|
||||
ACTOR static Future<Void> updateReplicaKeys(Database cx,
|
||||
std::vector<Optional<Key>> primaryDcId,
|
||||
|
@ -184,7 +242,8 @@ class DDTxnProcessorImpl {
|
|||
UID distributorId,
|
||||
MoveKeysLock moveKeysLock,
|
||||
std::vector<Optional<Key>> remoteDcIds,
|
||||
const DDEnabledState* ddEnabledState) {
|
||||
const DDEnabledState* ddEnabledState,
|
||||
SkipDDModeCheck skipDDModeCheck) {
|
||||
state Reference<InitialDataDistribution> result = makeReference<InitialDataDistribution>();
|
||||
state Key beginKey = allKeys.begin;
|
||||
|
||||
|
@ -197,6 +256,7 @@ class DDTxnProcessorImpl {
|
|||
state std::vector<std::pair<StorageServerInterface, ProcessClass>> tss_servers;
|
||||
state int numDataMoves = 0;
|
||||
|
||||
CODE_PROBE((bool)skipDDModeCheck, "DD Mode won't prevent read initial data distribution.");
|
||||
// Get the server list in its own try/catch block since it modifies result. We don't want a subsequent failure
|
||||
// causing entries to be duplicated
|
||||
loop {
|
||||
|
@ -229,7 +289,7 @@ class DDTxnProcessorImpl {
|
|||
BinaryReader rd(mode.get(), Unversioned());
|
||||
rd >> result->mode;
|
||||
}
|
||||
if (!result->mode || !ddEnabledState->isDDEnabled()) {
|
||||
if ((!skipDDModeCheck && !result->mode) || !ddEnabledState->isDDEnabled()) {
|
||||
// DD can be disabled persistently (result->mode = 0) or transiently (isDDEnabled() = 0)
|
||||
TraceEvent(SevDebug, "GetInitialDataDistribution_DisabledDD").log();
|
||||
return result;
|
||||
|
@ -537,6 +597,11 @@ Future<IDDTxnProcessor::SourceServers> DDTxnProcessor::getSourceServersForRange(
|
|||
return DDTxnProcessorImpl::getSourceServersForRange(cx, range);
|
||||
}
|
||||
|
||||
Future<std::vector<IDDTxnProcessor::DDRangeLocations>> DDTxnProcessor::getSourceServerInterfacesForRange(
|
||||
const KeyRangeRef range) {
|
||||
return DDTxnProcessorImpl::getSourceServerInterfacesForRange(cx, range);
|
||||
}
|
||||
|
||||
Future<ServerWorkerInfos> DDTxnProcessor::getServerListAndProcessClasses() {
|
||||
return DDTxnProcessorImpl::getServerListAndProcessClasses(cx);
|
||||
}
|
||||
|
@ -559,8 +624,10 @@ Future<Reference<InitialDataDistribution>> DDTxnProcessor::getInitialDataDistrib
|
|||
const UID& distributorId,
|
||||
const MoveKeysLock& moveKeysLock,
|
||||
const std::vector<Optional<Key>>& remoteDcIds,
|
||||
const DDEnabledState* ddEnabledState) {
|
||||
return DDTxnProcessorImpl::getInitialDataDistribution(cx, distributorId, moveKeysLock, remoteDcIds, ddEnabledState);
|
||||
const DDEnabledState* ddEnabledState,
|
||||
SkipDDModeCheck skipDDModeCheck) {
|
||||
return DDTxnProcessorImpl::getInitialDataDistribution(
|
||||
cx, distributorId, moveKeysLock, remoteDcIds, ddEnabledState, skipDDModeCheck);
|
||||
}
|
||||
|
||||
Future<Void> DDTxnProcessor::waitForDataDistributionEnabled(const DDEnabledState* ddEnabledState) const {
|
||||
|
@ -620,6 +687,33 @@ Future<std::vector<ProcessData>> DDTxnProcessor::getWorkers() const {
|
|||
return ::getWorkers(cx);
|
||||
}
|
||||
|
||||
Future<Void> DDTxnProcessor::rawStartMovement(MoveKeysParams& params,
|
||||
std::map<UID, StorageServerInterface>& tssMapping) {
|
||||
return ::rawStartMovement(cx, params, tssMapping);
|
||||
}
|
||||
|
||||
Future<Void> DDTxnProcessor::rawFinishMovement(MoveKeysParams& params,
|
||||
const std::map<UID, StorageServerInterface>& tssMapping) {
|
||||
return ::rawFinishMovement(cx, params, tssMapping);
|
||||
}
|
||||
|
||||
struct DDMockTxnProcessorImpl {
|
||||
ACTOR static Future<Void> moveKeys(DDMockTxnProcessor* self, MoveKeysParams params) {
|
||||
state std::map<UID, StorageServerInterface> tssMapping;
|
||||
self->rawStartMovement(params, tssMapping);
|
||||
ASSERT(tssMapping.empty());
|
||||
|
||||
if (BUGGIFY_WITH_PROB(0.5)) {
|
||||
wait(delayJittered(5.0));
|
||||
}
|
||||
|
||||
self->rawFinishMovement(params, tssMapping);
|
||||
if (!params.dataMovementComplete.isSet())
|
||||
params.dataMovementComplete.send(Void());
|
||||
return Void();
|
||||
}
|
||||
};
|
||||
|
||||
Future<ServerWorkerInfos> DDMockTxnProcessor::getServerListAndProcessClasses() {
|
||||
ServerWorkerInfos res;
|
||||
for (auto& [_, mss] : mgs->allServers) {
|
||||
|
@ -696,7 +790,8 @@ Future<Reference<InitialDataDistribution>> DDMockTxnProcessor::getInitialDataDis
|
|||
const UID& distributorId,
|
||||
const MoveKeysLock& moveKeysLock,
|
||||
const std::vector<Optional<Key>>& remoteDcIds,
|
||||
const DDEnabledState* ddEnabledState) {
|
||||
const DDEnabledState* ddEnabledState,
|
||||
SkipDDModeCheck skipDDModeCheck) {
|
||||
|
||||
// FIXME: now we just ignore ddEnabledState and moveKeysLock, will fix it in the future
|
||||
Reference<InitialDataDistribution> res = makeReference<InitialDataDistribution>();
|
||||
|
@ -756,9 +851,10 @@ void DDMockTxnProcessor::setupMockGlobalState(Reference<InitialDataDistribution>
|
|||
mgs->shardMapping->setCheckMode(ShardsAffectedByTeamFailure::CheckMode::Normal);
|
||||
}
|
||||
|
||||
// FIXME: finish moveKeys implementation
|
||||
Future<Void> DDMockTxnProcessor::moveKeys(const MoveKeysParams& params) {
|
||||
UNREACHABLE();
|
||||
// Not support location metadata yet
|
||||
ASSERT(!SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA);
|
||||
return DDMockTxnProcessorImpl::moveKeys(this, params);
|
||||
}
|
||||
|
||||
// FIXME: finish implementation
|
||||
|
@ -790,3 +886,48 @@ Future<std::pair<Optional<StorageMetrics>, int>> DDMockTxnProcessor::waitStorage
|
|||
Future<std::vector<ProcessData>> DDMockTxnProcessor::getWorkers() const {
|
||||
return Future<std::vector<ProcessData>>();
|
||||
}
|
||||
|
||||
void DDMockTxnProcessor::rawStartMovement(MoveKeysParams& params, std::map<UID, StorageServerInterface>& tssMapping) {
|
||||
FlowLock::Releaser releaser(*params.startMoveKeysParallelismLock);
|
||||
// Add wait(take) would always return immediately because there won’t be parallel rawStart or rawFinish in mock
|
||||
// world due to the fact the following *mock* transaction code will always finish without coroutine switch.
|
||||
ASSERT(params.startMoveKeysParallelismLock->take().isReady());
|
||||
|
||||
std::vector<ShardsAffectedByTeamFailure::Team> destTeams;
|
||||
destTeams.emplace_back(params.destinationTeam, true);
|
||||
mgs->shardMapping->moveShard(params.keys, destTeams);
|
||||
|
||||
for (auto& id : params.destinationTeam) {
|
||||
mgs->allServers.at(id).setShardStatus(params.keys, MockShardStatus::INFLIGHT, mgs->restrictSize);
|
||||
}
|
||||
}
|
||||
|
||||
void DDMockTxnProcessor::rawFinishMovement(MoveKeysParams& params,
|
||||
const std::map<UID, StorageServerInterface>& tssMapping) {
|
||||
FlowLock::Releaser releaser(*params.finishMoveKeysParallelismLock);
|
||||
// Add wait(take) would always return immediately because there won’t be parallel rawStart or rawFinish in mock
|
||||
// world due to the fact the following *mock* transaction code will always finish without coroutine switch.
|
||||
ASSERT(params.finishMoveKeysParallelismLock->take().isReady());
|
||||
|
||||
// get source and dest teams
|
||||
auto [destTeams, srcTeams] = mgs->shardMapping->getTeamsFor(params.keys);
|
||||
|
||||
ASSERT_EQ(destTeams.size(), 0);
|
||||
if (destTeams.front() != ShardsAffectedByTeamFailure::Team{ params.destinationTeam, true }) {
|
||||
TraceEvent(SevError, "MockRawFinishMovementError")
|
||||
.detail("Reason", "InconsistentDestinations")
|
||||
.detail("ShardMappingDest", describe(destTeams.front().servers))
|
||||
.detail("ParamDest", describe(params.destinationTeam));
|
||||
ASSERT(false); // This shouldn't happen because the overlapped key range movement won't be executed in parallel
|
||||
}
|
||||
|
||||
for (auto& id : params.destinationTeam) {
|
||||
mgs->allServers.at(id).setShardStatus(params.keys, MockShardStatus::COMPLETED, mgs->restrictSize);
|
||||
}
|
||||
|
||||
ASSERT_EQ(srcTeams.size(), 0);
|
||||
for (auto& id : srcTeams.front().servers) {
|
||||
mgs->allServers.at(id).removeShard(params.keys);
|
||||
}
|
||||
mgs->shardMapping->finishMove(params.keys);
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include <set>
|
||||
#include <string>
|
||||
|
||||
#include "fdbclient/Audit.h"
|
||||
#include "fdbclient/DatabaseContext.h"
|
||||
#include "fdbclient/FDBOptions.g.h"
|
||||
#include "fdbclient/FDBTypes.h"
|
||||
|
@ -52,6 +53,17 @@
|
|||
#include "fdbserver/DDSharedContext.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
struct DDAudit {
|
||||
DDAudit(UID id, KeyRange range, AuditType type)
|
||||
: id(id), range(range), type(type), auditMap(AuditPhase::Invalid, allKeys.end), actors(true) {}
|
||||
|
||||
const UID id;
|
||||
KeyRange range;
|
||||
const AuditType type;
|
||||
KeyRangeMap<AuditPhase> auditMap;
|
||||
ActorCollection actors;
|
||||
};
|
||||
|
||||
void DataMove::validateShard(const DDShardInfo& shard, KeyRangeRef range, int priority) {
|
||||
if (!valid) {
|
||||
if (shard.hasDest && shard.destId != anonymousShardId) {
|
||||
|
@ -276,6 +288,10 @@ public:
|
|||
|
||||
StorageQuotaInfo storageQuotaInfo;
|
||||
|
||||
Promise<Void> initialized;
|
||||
|
||||
std::unordered_map<AuditType, std::vector<std::shared_ptr<DDAudit>>> audits;
|
||||
|
||||
DataDistributor(Reference<AsyncVar<ServerDBInfo> const> const& db, UID id, Reference<DDSharedContext> context)
|
||||
: dbInfo(db), context(context), ddId(id), txnProcessor(nullptr),
|
||||
initialDDEventHolder(makeReference<EventCacheHolder>("InitialDD")),
|
||||
|
@ -300,7 +316,8 @@ public:
|
|||
ddId,
|
||||
lock,
|
||||
configuration.usableRegions > 1 ? remoteDcIds : std::vector<Optional<Key>>(),
|
||||
context->ddEnabledState.get()));
|
||||
context->ddEnabledState.get(),
|
||||
SkipDDModeCheck::False));
|
||||
}
|
||||
|
||||
void initDcInfo() {
|
||||
|
@ -577,6 +594,7 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributor> self,
|
|||
state bool ddIsTenantAware = SERVER_KNOBS->DD_TENANT_AWARENESS_ENABLED;
|
||||
loop {
|
||||
trackerCancelled = false;
|
||||
self->initialized = Promise<Void>();
|
||||
|
||||
// Stored outside of data distribution tracker to avoid slow tasks
|
||||
// when tracker is cancelled
|
||||
|
@ -594,7 +612,6 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributor> self,
|
|||
state PromiseStream<GetTopKMetricsRequest> getTopKShardMetrics;
|
||||
state Reference<AsyncVar<bool>> processingUnhealthy(new AsyncVar<bool>(false));
|
||||
state Reference<AsyncVar<bool>> processingWiggle(new AsyncVar<bool>(false));
|
||||
state Promise<Void> readyToStart;
|
||||
|
||||
state Optional<Reference<TenantCache>> ddTenantCache;
|
||||
if (ddIsTenantAware) {
|
||||
|
@ -636,7 +653,7 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributor> self,
|
|||
getTopKShardMetrics.getFuture(),
|
||||
getShardMetricsList,
|
||||
getAverageShardBytes.getFuture(),
|
||||
readyToStart,
|
||||
self->initialized,
|
||||
anyZeroHealthyTeams,
|
||||
self->ddId,
|
||||
&shards,
|
||||
|
@ -676,6 +693,10 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributor> self,
|
|||
"DDTenantCacheMonitor",
|
||||
self->ddId,
|
||||
&normalDDQueueErrors()));
|
||||
actors.push_back(reportErrorsExcept(ddTenantCache.get()->monitorStorageUsage(),
|
||||
"StorageUsageTracker",
|
||||
self->ddId,
|
||||
&normalDDQueueErrors()));
|
||||
}
|
||||
|
||||
std::vector<DDTeamCollection*> teamCollectionsPtrs;
|
||||
|
@ -688,7 +709,7 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributor> self,
|
|||
self->configuration,
|
||||
self->primaryDcId,
|
||||
self->configuration.usableRegions > 1 ? self->remoteDcIds : std::vector<Optional<Key>>(),
|
||||
readyToStart.getFuture(),
|
||||
self->initialized.getFuture(),
|
||||
zeroHealthyTeams[0],
|
||||
IsPrimary::True,
|
||||
processingUnhealthy,
|
||||
|
@ -709,7 +730,7 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributor> self,
|
|||
self->configuration,
|
||||
self->remoteDcIds,
|
||||
Optional<std::vector<Optional<Key>>>(),
|
||||
readyToStart.getFuture() && remoteRecovered(self->dbInfo),
|
||||
self->initialized.getFuture() && remoteRecovered(self->dbInfo),
|
||||
zeroHealthyTeams[1],
|
||||
IsPrimary::False,
|
||||
processingUnhealthy,
|
||||
|
@ -1327,6 +1348,157 @@ ACTOR Future<Void> ddGetMetrics(GetDataDistributorMetricsRequest req,
|
|||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> auditStorage(Reference<DataDistributor> self, TriggerAuditRequest req);
|
||||
ACTOR Future<Void> scheduleAuditForRange(Reference<DataDistributor> self,
|
||||
std::shared_ptr<DDAudit> audit,
|
||||
KeyRange range);
|
||||
ACTOR Future<Void> doAuditOnStorageServer(Reference<DataDistributor> self,
|
||||
std::shared_ptr<DDAudit> audit,
|
||||
StorageServerInterface ssi,
|
||||
AuditStorageRequest req);
|
||||
|
||||
ACTOR Future<Void> auditStorage(Reference<DataDistributor> self, TriggerAuditRequest req) {
|
||||
// TODO(heliu): Load running audit, and create one if no audit is running.
|
||||
state std::shared_ptr<DDAudit> audit;
|
||||
auto it = self->audits.find(req.getType());
|
||||
if (it != self->audits.end() && !it->second.empty()) {
|
||||
ASSERT_EQ(it->second.size(), 1);
|
||||
auto& currentAudit = it->second.front();
|
||||
if (currentAudit->range.contains(req.range)) {
|
||||
audit = it->second.front();
|
||||
} else {
|
||||
req.reply.sendError(audit_storage_exceeded_request_limit());
|
||||
return Void();
|
||||
}
|
||||
} else {
|
||||
const UID auditId = deterministicRandom()->randomUniqueID();
|
||||
audit = std::make_shared<DDAudit>(auditId, req.range, req.getType());
|
||||
self->audits[req.getType()].push_back(audit);
|
||||
audit->actors.add(scheduleAuditForRange(self, audit, req.range));
|
||||
TraceEvent(SevDebug, "DDAuditStorageBegin", audit->id).detail("Range", req.range).detail("AuditType", req.type);
|
||||
}
|
||||
|
||||
if (req.async && !req.reply.isSet()) {
|
||||
req.reply.send(audit->id);
|
||||
}
|
||||
|
||||
try {
|
||||
wait(audit->actors.getResult());
|
||||
TraceEvent(SevDebug, "DDAuditStorageEnd", audit->id).detail("Range", req.range).detail("AuditType", req.type);
|
||||
// TODO(heliu): Set the audit result, and clear auditId.
|
||||
if (!req.async && !req.reply.isSet()) {
|
||||
TraceEvent(SevDebug, "DDAuditStorageReply", audit->id)
|
||||
.detail("Range", req.range)
|
||||
.detail("AuditType", req.type);
|
||||
req.reply.send(audit->id);
|
||||
}
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevWarnAlways, "DDAuditStorageOperationError", audit->id)
|
||||
.errorUnsuppressed(e)
|
||||
.detail("Range", req.range)
|
||||
.detail("AuditType", req.type);
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> scheduleAuditForRange(Reference<DataDistributor> self,
|
||||
std::shared_ptr<DDAudit> audit,
|
||||
KeyRange range) {
|
||||
TraceEvent(SevDebug, "DDScheduleAuditForRangeBegin", audit->id)
|
||||
.detail("Range", range)
|
||||
.detail("AuditType", audit->type);
|
||||
// TODO(heliu): Load the audit map for `range`.
|
||||
state Key begin = range.begin;
|
||||
state KeyRange currentRange = range;
|
||||
|
||||
while (begin < range.end) {
|
||||
currentRange = KeyRangeRef(begin, range.end);
|
||||
|
||||
// Find the first keyrange that hasn't been validated.
|
||||
auto f = audit->auditMap.intersectingRanges(currentRange);
|
||||
for (auto it = f.begin(); it != f.end(); ++it) {
|
||||
if (it->value() != AuditPhase::Invalid && it->value() != AuditPhase::Failed) {
|
||||
begin = it->range().end;
|
||||
currentRange = KeyRangeRef(it->range().end, currentRange.end);
|
||||
} else {
|
||||
currentRange = KeyRangeRef(it->range().begin, it->range().end) & currentRange;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
state std::vector<IDDTxnProcessor::DDRangeLocations> rangeLocations =
|
||||
wait(self->txnProcessor->getSourceServerInterfacesForRange(currentRange));
|
||||
|
||||
state int i = 0;
|
||||
for (i = 0; i < rangeLocations.size(); ++i) {
|
||||
AuditStorageRequest req(audit->id, rangeLocations[i].range, audit->type);
|
||||
if (audit->type == AuditType::ValidateHA && rangeLocations[i].servers.size() >= 2) {
|
||||
auto it = rangeLocations[i].servers.begin();
|
||||
const int idx = deterministicRandom()->randomInt(0, it->second.size());
|
||||
StorageServerInterface& targetServer = it->second[idx];
|
||||
++it;
|
||||
for (; it != rangeLocations[i].servers.end(); ++it) {
|
||||
const int idx = deterministicRandom()->randomInt(0, it->second.size());
|
||||
req.targetServers.push_back(it->second[idx].id());
|
||||
}
|
||||
audit->actors.add(doAuditOnStorageServer(self, audit, targetServer, req));
|
||||
}
|
||||
begin = rangeLocations[i].range.end;
|
||||
wait(delay(0.01));
|
||||
}
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevWarnAlways, "DDScheduleAuditRangeError", audit->id)
|
||||
.errorUnsuppressed(e)
|
||||
.detail("Range", range);
|
||||
if (e.code() == error_code_actor_cancelled) {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> doAuditOnStorageServer(Reference<DataDistributor> self,
|
||||
std::shared_ptr<DDAudit> audit,
|
||||
StorageServerInterface ssi,
|
||||
AuditStorageRequest req) {
|
||||
TraceEvent(SevDebug, "DDDoAuditOnStorageServerBegin", req.id)
|
||||
.detail("Range", req.range)
|
||||
.detail("AuditType", req.type)
|
||||
.detail("StorageServer", ssi.toString())
|
||||
.detail("TargetServers", describe(req.targetServers));
|
||||
|
||||
try {
|
||||
audit->auditMap.insert(req.range, AuditPhase::Running);
|
||||
ErrorOr<AuditStorageState> vResult = wait(ssi.auditStorage.getReplyUnlessFailedFor(
|
||||
req, /*sustainedFailureDuration=*/2.0, /*sustainedFailureSlope=*/0));
|
||||
if (vResult.isError()) {
|
||||
throw vResult.getError();
|
||||
}
|
||||
TraceEvent e(vResult.get().error.empty() ? SevInfo : SevWarnAlways, "DDAuditStorageState", req.id);
|
||||
e.detail("Range", req.range);
|
||||
e.detail("StorageServer", ssi.toString());
|
||||
if (!vResult.get().error.empty()) {
|
||||
e.detail("ErrorMessage", vResult.get().error);
|
||||
}
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevWarn, "DDDoAuditOnStorageServerError", req.id)
|
||||
.errorUnsuppressed(e)
|
||||
.detail("Range", req.range)
|
||||
.detail("StorageServer", ssi.toString())
|
||||
.detail("TargetServers", describe(req.targetServers));
|
||||
if (e.code() != error_code_actor_cancelled) {
|
||||
audit->auditMap.insert(req.range, AuditPhase::Failed);
|
||||
audit->actors.add(scheduleAuditForRange(self, audit, req.range));
|
||||
}
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> dataDistributor(DataDistributorInterface di, Reference<AsyncVar<ServerDBInfo> const> db) {
|
||||
state Reference<DDSharedContext> context(new DDSharedContext(di.id()));
|
||||
state Reference<DataDistributor> self(new DataDistributor(db, di.id(), context));
|
||||
|
@ -1393,6 +1565,9 @@ ACTOR Future<Void> dataDistributor(DataDistributorInterface di, Reference<AsyncV
|
|||
when(GetStorageWigglerStateRequest req = waitNext(di.storageWigglerState.getFuture())) {
|
||||
req.reply.send(getStorageWigglerStates(self));
|
||||
}
|
||||
when(TriggerAuditRequest req = waitNext(di.triggerAudit.getFuture())) {
|
||||
actors.add(auditStorage(self, req));
|
||||
}
|
||||
}
|
||||
} catch (Error& err) {
|
||||
if (normalDataDistributorErrors().count(err.code()) == 0) {
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "fdbclient/BlobMetadataUtils.h"
|
||||
#include "fdbclient/EncryptKeyProxyInterface.h"
|
||||
|
||||
#include "fdbrpc/Locality.h"
|
||||
|
@ -30,6 +31,7 @@
|
|||
#include "fdbserver/WorkerInterface.actor.h"
|
||||
#include "fdbserver/ServerDBInfo.h"
|
||||
#include "flow/Arena.h"
|
||||
#include "flow/CodeProbe.h"
|
||||
#include "flow/EncryptUtils.h"
|
||||
#include "flow/Error.h"
|
||||
#include "flow/EventTypes.actor.h"
|
||||
|
@ -451,6 +453,8 @@ ACTOR Future<Void> getCipherKeysByBaseCipherKeyIds(Reference<EncryptKeyProxyData
|
|||
keyIdsReply.numHits = cachedCipherDetails.size();
|
||||
keysByIds.reply.send(keyIdsReply);
|
||||
|
||||
CODE_PROBE(!lookupCipherInfoMap.empty(), "EKP fetch cipherKeys by KeyId from KMS");
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
@ -474,13 +478,13 @@ ACTOR Future<Void> getLatestCipherKeys(Reference<EncryptKeyProxyData> ekpProxyDa
|
|||
// Dedup the requested domainIds.
|
||||
// TODO: endpoint serialization of std::unordered_set isn't working at the moment
|
||||
std::unordered_map<EncryptCipherDomainId, EKPGetLatestCipherKeysRequestInfo> dedupedDomainInfos;
|
||||
for (const auto info : req.encryptDomainInfos) {
|
||||
for (const auto& info : req.encryptDomainInfos) {
|
||||
dedupedDomainInfos.emplace(info.domainId, info);
|
||||
}
|
||||
|
||||
if (dbgTrace.present()) {
|
||||
dbgTrace.get().detail("NKeys", dedupedDomainInfos.size());
|
||||
for (const auto info : dedupedDomainInfos) {
|
||||
for (const auto& info : dedupedDomainInfos) {
|
||||
// log encryptDomainIds queried
|
||||
dbgTrace.get().detail(
|
||||
getEncryptDbgTraceKey(ENCRYPT_DBG_TRACE_QUERY_PREFIX, info.first, info.second.domainName), "");
|
||||
|
@ -587,6 +591,8 @@ ACTOR Future<Void> getLatestCipherKeys(Reference<EncryptKeyProxyData> ekpProxyDa
|
|||
latestCipherReply.numHits = cachedCipherDetails.size();
|
||||
latestKeysReq.reply.send(latestCipherReply);
|
||||
|
||||
CODE_PROBE(!lookupCipherDomains.empty(), "EKP fetch latest cipherKeys from KMS");
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
@ -594,11 +600,22 @@ bool isCipherKeyEligibleForRefresh(const EncryptBaseCipherKey& cipherKey, int64_
|
|||
// Candidate eligible for refresh iff either is true:
|
||||
// 1. CipherKey cell is either expired/needs-refresh right now.
|
||||
// 2. CipherKey cell 'will' be expired/needs-refresh before next refresh cycle interval (proactive refresh)
|
||||
if (BUGGIFY_WITH_PROB(0.01)) {
|
||||
return true;
|
||||
}
|
||||
int64_t nextRefreshCycleTS = currTS + FLOW_KNOBS->ENCRYPT_KEY_REFRESH_INTERVAL;
|
||||
return nextRefreshCycleTS > cipherKey.expireAt || nextRefreshCycleTS > cipherKey.refreshAt;
|
||||
}
|
||||
|
||||
ACTOR Future<Void> refreshEncryptionKeysCore(Reference<EncryptKeyProxyData> ekpProxyData,
|
||||
bool isBlobMetadataEligibleForRefresh(const BlobMetadataDetailsRef& blobMetadata, int64_t currTS) {
|
||||
if (BUGGIFY_WITH_PROB(0.01)) {
|
||||
return true;
|
||||
}
|
||||
int64_t nextRefreshCycleTS = currTS + SERVER_KNOBS->BLOB_METADATA_REFRESH_INTERVAL;
|
||||
return nextRefreshCycleTS > blobMetadata.expireAt || nextRefreshCycleTS > blobMetadata.refreshAt;
|
||||
}
|
||||
|
||||
ACTOR Future<Void> refreshEncryptionKeysImpl(Reference<EncryptKeyProxyData> ekpProxyData,
|
||||
KmsConnectorInterface kmsConnectorInf) {
|
||||
state UID debugId = deterministicRandom()->randomUniqueID();
|
||||
|
||||
|
@ -660,6 +677,7 @@ ACTOR Future<Void> refreshEncryptionKeysCore(Reference<EncryptKeyProxyData> ekpP
|
|||
ekpProxyData->baseCipherKeysRefreshed += rep.cipherKeyDetails.size();
|
||||
|
||||
t.detail("NumKeys", rep.cipherKeyDetails.size());
|
||||
CODE_PROBE(!rep.cipherKeyDetails.empty(), "EKP refresh cipherKeys");
|
||||
} catch (Error& e) {
|
||||
if (!canReplyWith(e)) {
|
||||
TraceEvent(SevWarn, "RefreshEKsError").error(e);
|
||||
|
@ -673,7 +691,7 @@ ACTOR Future<Void> refreshEncryptionKeysCore(Reference<EncryptKeyProxyData> ekpP
|
|||
}
|
||||
|
||||
Future<Void> refreshEncryptionKeys(Reference<EncryptKeyProxyData> ekpProxyData, KmsConnectorInterface kmsConnectorInf) {
|
||||
return refreshEncryptionKeysCore(ekpProxyData, kmsConnectorInf);
|
||||
return refreshEncryptionKeysImpl(ekpProxyData, kmsConnectorInf);
|
||||
}
|
||||
|
||||
ACTOR Future<Void> getLatestBlobMetadata(Reference<EncryptKeyProxyData> ekpProxyData,
|
||||
|
@ -690,44 +708,44 @@ ACTOR Future<Void> getLatestBlobMetadata(Reference<EncryptKeyProxyData> ekpProxy
|
|||
}
|
||||
|
||||
// Dedup the requested domainIds.
|
||||
std::unordered_set<BlobMetadataDomainId> dedupedDomainIds;
|
||||
for (auto id : req.domainIds) {
|
||||
dedupedDomainIds.emplace(id);
|
||||
std::unordered_map<BlobMetadataDomainId, BlobMetadataDomainName> dedupedDomainInfos;
|
||||
for (auto info : req.domainInfos) {
|
||||
dedupedDomainInfos.insert({ info.domainId, info.domainName });
|
||||
}
|
||||
|
||||
if (dbgTrace.present()) {
|
||||
dbgTrace.get().detail("NKeys", dedupedDomainIds.size());
|
||||
for (BlobMetadataDomainId id : dedupedDomainIds) {
|
||||
dbgTrace.get().detail("NKeys", dedupedDomainInfos.size());
|
||||
for (auto& info : dedupedDomainInfos) {
|
||||
// log domainids queried
|
||||
dbgTrace.get().detail("BMQ" + std::to_string(id), "");
|
||||
dbgTrace.get().detail("BMQ" + std::to_string(info.first), "");
|
||||
}
|
||||
}
|
||||
|
||||
// First, check if the requested information is already cached by the server.
|
||||
// Ensure the cached information is within SERVER_KNOBS->BLOB_METADATA_CACHE_TTL time window.
|
||||
std::vector<BlobMetadataDomainId> lookupDomains;
|
||||
for (BlobMetadataDomainId id : dedupedDomainIds) {
|
||||
const auto itr = ekpProxyData->blobMetadataDomainIdCache.find(id);
|
||||
if (itr != ekpProxyData->blobMetadataDomainIdCache.end() && itr->second.isValid()) {
|
||||
state KmsConnBlobMetadataReq kmsReq;
|
||||
kmsReq.debugId = req.debugId;
|
||||
|
||||
for (auto& info : dedupedDomainInfos) {
|
||||
const auto itr = ekpProxyData->blobMetadataDomainIdCache.find(info.first);
|
||||
if (itr != ekpProxyData->blobMetadataDomainIdCache.end() && itr->second.isValid() &&
|
||||
now() <= itr->second.metadataDetails.expireAt) {
|
||||
metadataDetails.arena().dependsOn(itr->second.metadataDetails.arena());
|
||||
metadataDetails.push_back(metadataDetails.arena(), itr->second.metadataDetails);
|
||||
|
||||
if (dbgTrace.present()) {
|
||||
dbgTrace.get().detail("BMC" + std::to_string(id), "");
|
||||
dbgTrace.get().detail("BMC" + std::to_string(info.first), "");
|
||||
}
|
||||
++ekpProxyData->blobMetadataCacheHits;
|
||||
} else {
|
||||
lookupDomains.emplace_back(id);
|
||||
++ekpProxyData->blobMetadataCacheMisses;
|
||||
kmsReq.domainInfos.emplace_back(kmsReq.domainInfos.arena(), info.first, info.second);
|
||||
}
|
||||
}
|
||||
|
||||
ekpProxyData->baseCipherDomainIdCacheHits += metadataDetails.size();
|
||||
ekpProxyData->baseCipherDomainIdCacheMisses += lookupDomains.size();
|
||||
ekpProxyData->blobMetadataCacheHits += metadataDetails.size();
|
||||
|
||||
if (!lookupDomains.empty()) {
|
||||
if (!kmsReq.domainInfos.empty()) {
|
||||
ekpProxyData->blobMetadataCacheMisses += kmsReq.domainInfos.size();
|
||||
try {
|
||||
KmsConnBlobMetadataReq kmsReq(lookupDomains, req.debugId);
|
||||
state double startTime = now();
|
||||
KmsConnBlobMetadataRep kmsRep = wait(kmsConnectorInf.blobMetadataReq.getReply(kmsReq));
|
||||
ekpProxyData->kmsBlobMetadataReqLatency.addMeasurement(now() - startTime);
|
||||
|
@ -755,15 +773,15 @@ ACTOR Future<Void> getLatestBlobMetadata(Reference<EncryptKeyProxyData> ekpProxy
|
|||
}
|
||||
|
||||
req.reply.send(EKPGetLatestBlobMetadataReply(metadataDetails));
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> refreshBlobMetadataCore(Reference<EncryptKeyProxyData> ekpProxyData,
|
||||
KmsConnectorInterface kmsConnectorInf) {
|
||||
state UID debugId = deterministicRandom()->randomUniqueID();
|
||||
state double startTime;
|
||||
|
||||
state TraceEvent t("RefreshBlobMetadata_Start", ekpProxyData->myId);
|
||||
state TraceEvent t("RefreshBlobMetadataStart", ekpProxyData->myId);
|
||||
t.setMaxEventLength(SERVER_KNOBS->ENCRYPT_PROXY_MAX_DBG_TRACE_LENGTH);
|
||||
t.detail("KmsConnInf", kmsConnectorInf.id());
|
||||
t.detail("DebugId", debugId);
|
||||
|
@ -771,12 +789,28 @@ ACTOR Future<Void> refreshBlobMetadataCore(Reference<EncryptKeyProxyData> ekpPro
|
|||
try {
|
||||
KmsConnBlobMetadataReq req;
|
||||
req.debugId = debugId;
|
||||
req.domainIds.reserve(ekpProxyData->blobMetadataDomainIdCache.size());
|
||||
|
||||
for (auto& item : ekpProxyData->blobMetadataDomainIdCache) {
|
||||
req.domainIds.emplace_back(item.first);
|
||||
int64_t currTS = (int64_t)now();
|
||||
for (auto itr = ekpProxyData->blobMetadataDomainIdCache.begin();
|
||||
itr != ekpProxyData->blobMetadataDomainIdCache.end();) {
|
||||
if (isBlobMetadataEligibleForRefresh(itr->second.metadataDetails, currTS)) {
|
||||
req.domainInfos.emplace_back_deep(
|
||||
req.domainInfos.arena(), itr->first, itr->second.metadataDetails.domainName);
|
||||
}
|
||||
|
||||
// Garbage collect expired cached Blob Metadata
|
||||
if (itr->second.metadataDetails.expireAt >= currTS) {
|
||||
itr = ekpProxyData->blobMetadataDomainIdCache.erase(itr);
|
||||
} else {
|
||||
itr++;
|
||||
}
|
||||
}
|
||||
state double startTime = now();
|
||||
|
||||
if (req.domainInfos.empty()) {
|
||||
return Void();
|
||||
}
|
||||
|
||||
startTime = now();
|
||||
KmsConnBlobMetadataRep rep = wait(kmsConnectorInf.blobMetadataReq.getReply(req));
|
||||
ekpProxyData->kmsBlobMetadataReqLatency.addMeasurement(now() - startTime);
|
||||
for (auto& item : rep.metadataDetails) {
|
||||
|
@ -789,7 +823,7 @@ ACTOR Future<Void> refreshBlobMetadataCore(Reference<EncryptKeyProxyData> ekpPro
|
|||
t.detail("nKeys", rep.metadataDetails.size());
|
||||
} catch (Error& e) {
|
||||
if (!canReplyWith(e)) {
|
||||
TraceEvent("RefreshBlobMetadata_Error").error(e);
|
||||
TraceEvent("RefreshBlobMetadataError").error(e);
|
||||
throw e;
|
||||
}
|
||||
TraceEvent("RefreshBlobMetadata").detail("ErrorCode", e.code());
|
||||
|
@ -804,24 +838,25 @@ void refreshBlobMetadata(Reference<EncryptKeyProxyData> ekpProxyData, KmsConnect
|
|||
}
|
||||
|
||||
void activateKmsConnector(Reference<EncryptKeyProxyData> ekpProxyData, KmsConnectorInterface kmsConnectorInf) {
|
||||
if (g_network->isSimulated() || (SERVER_KNOBS->KMS_CONNECTOR_TYPE.compare(FDB_PREF_KMS_CONNECTOR_TYPE_STR) == 0)) {
|
||||
ekpProxyData->kmsConnector = std::make_unique<SimKmsConnector>();
|
||||
if (g_network->isSimulated()) {
|
||||
ekpProxyData->kmsConnector = std::make_unique<SimKmsConnector>(FDB_SIM_KMS_CONNECTOR_TYPE_STR);
|
||||
} else if (SERVER_KNOBS->KMS_CONNECTOR_TYPE.compare(FDB_PREF_KMS_CONNECTOR_TYPE_STR) == 0) {
|
||||
ekpProxyData->kmsConnector = std::make_unique<SimKmsConnector>(FDB_PREF_KMS_CONNECTOR_TYPE_STR);
|
||||
} else if (SERVER_KNOBS->KMS_CONNECTOR_TYPE.compare(REST_KMS_CONNECTOR_TYPE_STR) == 0) {
|
||||
ekpProxyData->kmsConnector = std::make_unique<RESTKmsConnector>();
|
||||
ekpProxyData->kmsConnector = std::make_unique<RESTKmsConnector>(REST_KMS_CONNECTOR_TYPE_STR);
|
||||
} else {
|
||||
throw not_implemented();
|
||||
}
|
||||
|
||||
TraceEvent("EKPActiveKmsConnector", ekpProxyData->myId)
|
||||
.detail("ConnectorType",
|
||||
g_network->isSimulated() ? FDB_SIM_KMS_CONNECTOR_TYPE_STR : SERVER_KNOBS->KMS_CONNECTOR_TYPE)
|
||||
.detail("ConnectorType", ekpProxyData->kmsConnector->getConnectorStr())
|
||||
.detail("InfId", kmsConnectorInf.id());
|
||||
|
||||
ekpProxyData->addActor.send(ekpProxyData->kmsConnector->connectorCore(kmsConnectorInf));
|
||||
}
|
||||
|
||||
ACTOR Future<Void> encryptKeyProxyServer(EncryptKeyProxyInterface ekpInterface, Reference<AsyncVar<ServerDBInfo>> db) {
|
||||
state Reference<EncryptKeyProxyData> self(new EncryptKeyProxyData(ekpInterface.id()));
|
||||
state Reference<EncryptKeyProxyData> self = makeReference<EncryptKeyProxyData>(ekpInterface.id());
|
||||
state Future<Void> collection = actorCollection(self->addActor.getFuture());
|
||||
self->addActor.send(traceRole(Role::ENCRYPT_KEY_PROXY, ekpInterface.id()));
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue