Merge branch 'main' of https://github.com/apple/foundationdb into fix/main/ppwmessage

This commit is contained in:
Xiaoxi Wang 2022-09-07 09:48:25 -07:00
commit 50167b8c13
108 changed files with 1814 additions and 496 deletions

View File

@ -484,7 +484,7 @@ elseif(NOT WIN32 AND NOT APPLE AND NOT USE_SANITIZER) # Linux Only, non-santizer
--outdir ${SHIM_LIB_OUTPUT_DIR}
--dlopen-callback=fdb_shim_dlopen_callback
$<TARGET_FILE:fdb_c>
DEPENDS ${IMPLIBSO_SRC}
DEPENDS ${IMPLIBSO_SRC} fdb_c
COMMENT "Generating source code for C shim library")
add_library(fdb_c_shim STATIC ${SHIM_LIB_GEN_SRC} foundationdb/fdb_c_shim.h fdb_c_shim.cpp)

View File

@ -324,6 +324,15 @@ extern "C" DLLEXPORT fdb_error_t fdb_future_get_key_array(FDBFuture* f, FDBKey c
*out_count = na.size(););
}
extern "C" DLLEXPORT fdb_error_t fdb_future_get_granule_summary_array(FDBFuture* f,
FDBGranuleSummary const** out_ranges,
int* out_count) {
CATCH_AND_RETURN(Standalone<VectorRef<BlobGranuleSummaryRef>> na =
TSAV(Standalone<VectorRef<BlobGranuleSummaryRef>>, f)->get();
*out_ranges = (FDBGranuleSummary*)na.begin();
*out_count = na.size(););
}
extern "C" DLLEXPORT void fdb_result_destroy(FDBResult* r) {
CATCH_AND_DIE(TSAVB(r)->cancel(););
}
@ -994,6 +1003,23 @@ extern "C" DLLEXPORT FDBResult* fdb_transaction_read_blob_granules_finish(FDBTra
.extractPtr());
}
extern "C" DLLEXPORT FDBFuture* fdb_transaction_summarize_blob_granules(FDBTransaction* tr,
uint8_t const* begin_key_name,
int begin_key_name_length,
uint8_t const* end_key_name,
int end_key_name_length,
int64_t summaryVersion,
int rangeLimit) {
RETURN_FUTURE_ON_ERROR(
Standalone<VectorRef<BlobGranuleSummaryRef>>,
KeyRangeRef range(KeyRef(begin_key_name, begin_key_name_length), KeyRef(end_key_name, end_key_name_length));
Optional<Version> sv;
if (summaryVersion != latestVersion) { sv = summaryVersion; }
return (FDBFuture*)(TXN(tr)->summarizeBlobGranules(range, sv, rangeLimit).extractPtr()););
}
#include "fdb_c_function_pointers.g.h"
#define FDB_API_CHANGED(func, ver) \

View File

@ -179,6 +179,14 @@ typedef struct keyrange {
const uint8_t* end_key;
int end_key_length;
} FDBKeyRange;
typedef struct granulesummary {
FDBKeyRange key_range;
int64_t snapshot_version;
int64_t snapshot_size;
int64_t delta_version;
int64_t delta_size;
} FDBGranuleSummary;
#pragma pack(pop)
typedef struct readgranulecontext {
@ -264,6 +272,10 @@ DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_future_get_keyrange_array(FDBFuture
FDBKeyRange const** out_ranges,
int* out_count);
DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_future_get_granule_summary_array(FDBFuture* f,
FDBGranuleSummary const** out_summaries,
int* out_count);
/* FDBResult is a synchronous computation result, as opposed to a future that is asynchronous. */
DLLEXPORT void fdb_result_destroy(FDBResult* r);
@ -521,6 +533,14 @@ DLLEXPORT WARN_UNUSED_RESULT FDBResult* fdb_transaction_read_blob_granules(FDBTr
int64_t readVersion,
FDBReadBlobGranuleContext granuleContext);
DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_summarize_blob_granules(FDBTransaction* tr,
uint8_t const* begin_key_name,
int begin_key_name_length,
uint8_t const* end_key_name,
int end_key_name_length,
int64_t summaryVersion,
int rangeLimit);
#define FDB_KEYSEL_LAST_LESS_THAN(k, l) k, l, 0, 0
#define FDB_KEYSEL_LAST_LESS_OR_EQUAL(k, l) k, l, 1, 0
#define FDB_KEYSEL_FIRST_GREATER_THAN(k, l) k, l, 1, 1

View File

@ -32,13 +32,11 @@ public:
if (Random::get().randomInt(0, 1) == 0) {
excludedOpTypes.push_back(OP_CLEAR_RANGE);
}
// FIXME: remove! this bug is fixed in another PR
excludedOpTypes.push_back(OP_GET_RANGES);
}
private:
// FIXME: use other new blob granule apis!
enum OpType { OP_INSERT, OP_CLEAR, OP_CLEAR_RANGE, OP_READ, OP_GET_RANGES, OP_LAST = OP_GET_RANGES };
enum OpType { OP_INSERT, OP_CLEAR, OP_CLEAR_RANGE, OP_READ, OP_GET_RANGES, OP_SUMMARIZE, OP_LAST = OP_SUMMARIZE };
std::vector<OpType> excludedOpTypes;
// Allow reads at the start to get blob_granule_transaction_too_old if BG data isn't initialized yet
@ -48,11 +46,13 @@ private:
void randomReadOp(TTaskFct cont) {
fdb::Key begin = randomKeyName();
fdb::Key end = randomKeyName();
auto results = std::make_shared<std::vector<fdb::KeyValue>>();
auto tooOld = std::make_shared<bool>(false);
if (begin > end) {
std::swap(begin, end);
}
auto results = std::make_shared<std::vector<fdb::KeyValue>>();
auto tooOld = std::make_shared<bool>(false);
execTransaction(
[this, begin, end, results, tooOld](auto ctx) {
ctx->tx().setOption(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE);
@ -121,10 +121,11 @@ private:
void randomGetRangesOp(TTaskFct cont) {
fdb::Key begin = randomKeyName();
fdb::Key end = randomKeyName();
auto results = std::make_shared<std::vector<fdb::KeyRange>>();
if (begin > end) {
std::swap(begin, end);
}
auto results = std::make_shared<std::vector<fdb::KeyRange>>();
execTransaction(
[begin, end, results](auto ctx) {
fdb::Future f = ctx->tx().getBlobGranuleRanges(begin, end, 1000).eraseType();
@ -171,6 +172,48 @@ private:
});
}
void randomSummarizeOp(TTaskFct cont) {
fdb::Key begin = randomKeyName();
fdb::Key end = randomKeyName();
if (begin > end) {
std::swap(begin, end);
}
auto results = std::make_shared<std::vector<fdb::GranuleSummary>>();
execTransaction(
[begin, end, results](auto ctx) {
fdb::Future f = ctx->tx().summarizeBlobGranules(begin, end, -2 /*latest version*/, 1000).eraseType();
ctx->continueAfter(
f,
[ctx, f, results]() {
*results = copyGranuleSummaryArray(f.get<fdb::future_var::GranuleSummaryRefArray>());
ctx->done();
},
true);
},
[this, begin, end, results, cont]() {
if (seenReadSuccess) {
ASSERT(results->size() > 0);
ASSERT(results->front().keyRange.beginKey <= begin);
ASSERT(results->back().keyRange.endKey >= end);
}
for (int i = 0; i < results->size(); i++) {
// TODO: could do validation of subsequent calls and ensure snapshot version never decreases
ASSERT((*results)[i].keyRange.beginKey < (*results)[i].keyRange.endKey);
ASSERT((*results)[i].snapshotVersion <= (*results)[i].deltaVersion);
ASSERT((*results)[i].snapshotSize > 0);
ASSERT((*results)[i].deltaSize >= 0);
}
for (int i = 1; i < results->size(); i++) {
// ranges contain entire requested key range
ASSERT((*results)[i].keyRange.beginKey == (*results)[i - 1].keyRange.endKey);
}
schedule(cont);
});
}
void randomOperation(TTaskFct cont) {
OpType txType = (store.size() == 0) ? OP_INSERT : (OpType)Random::get().randomInt(0, OP_LAST);
while (std::count(excludedOpTypes.begin(), excludedOpTypes.end(), txType)) {
@ -192,6 +235,9 @@ private:
case OP_GET_RANGES:
randomGetRangesOp(cont);
break;
case OP_SUMMARIZE:
randomSummarizeOp(cont);
break;
}
}
};

View File

@ -81,6 +81,8 @@ public:
: fdbTx(tx), txActor(txActor), contAfterDone(cont), scheduler(scheduler), retryLimit(retryLimit),
txState(TxState::IN_PROGRESS), commitCalled(false), bgBasePath(bgBasePath) {}
virtual ~TransactionContextBase() { ASSERT(txState == TxState::DONE); }
// A state machine:
// IN_PROGRESS -> (ON_ERROR -> IN_PROGRESS)* [-> ON_ERROR] -> DONE
enum class TxState { IN_PROGRESS, ON_ERROR, DONE };
@ -114,6 +116,10 @@ public:
}
txState = TxState::DONE;
lock.unlock();
// No need for lock from here on, because only one thread
// can enter DONE state and handle it
if (retriedErrors.size() >= LARGE_NUMBER_OF_RETRIES) {
fmt::print("Transaction succeeded after {} retries on errors: {}\n",
retriedErrors.size(),
@ -124,6 +130,7 @@ public:
fdbTx.cancel();
txActor->complete(fdb::Error::success());
cleanUp();
ASSERT(txState == TxState::DONE);
contAfterDone();
}
@ -150,6 +157,10 @@ protected:
}
txState = TxState::DONE;
lock.unlock();
// No need for lock from here on, because only one thread
// can enter DONE state and handle it
txActor->complete(err);
cleanUp();
contAfterDone();
@ -164,6 +175,7 @@ protected:
transactionFailed(err);
} else {
std::unique_lock<std::mutex> lock(mutex);
ASSERT(txState == TxState::ON_ERROR);
txState = TxState::IN_PROGRESS;
commitCalled = false;
lock.unlock();
@ -197,43 +209,58 @@ protected:
}
// FDB transaction
// Provides a thread safe interface by itself (no need for mutex)
fdb::Transaction fdbTx;
// Actor implementing the transaction worklflow
// Set in constructor and reset on cleanup (no need for mutex)
std::shared_ptr<ITransactionActor> txActor;
// Mutex protecting access to shared mutable state
// Only the state that is accessible unter IN_PROGRESS state
// must be protected by mutex
std::mutex mutex;
// Continuation to be called after completion of the transaction
TTaskFct contAfterDone;
// Set in contructor, stays immutable
const TTaskFct contAfterDone;
// Reference to the scheduler
IScheduler* scheduler;
// Set in contructor, stays immutable
// Cannot be accessed in DONE state, workloads can be completed and the scheduler deleted
IScheduler* const scheduler;
// Retry limit
int retryLimit;
// Set in contructor, stays immutable
const int retryLimit;
// Transaction execution state
// Must be accessed under mutex
TxState txState;
// onError future used in ON_ERROR state
// onError future
// used only in ON_ERROR state (no need for mutex)
fdb::Future onErrorFuture;
// The error code on which onError was called
// used only in ON_ERROR state (no need for mutex)
fdb::Error onErrorArg;
// The time point of calling onError
// used only in ON_ERROR state (no need for mutex)
TimePoint onErrorCallTimePoint;
// Transaction is committed or being committed
// Must be accessed under mutex
bool commitCalled;
// A history of errors on which the transaction was retried
// used only in ON_ERROR and DONE states (no need for mutex)
std::vector<fdb::Error> retriedErrors;
// blob granule base path
std::string bgBasePath;
// Set in contructor, stays immutable
const std::string bgBasePath;
};
/**
@ -383,7 +410,6 @@ protected:
if (txState != TxState::IN_PROGRESS) {
return;
}
lock.unlock();
fdb::Error err = f.error();
auto waitTimeUs = timeElapsedInUs(cbInfo.startTime, endTime);
if (waitTimeUs > LONG_WAIT_TIME_US) {
@ -399,6 +425,10 @@ protected:
scheduler->schedule(cbInfo.cont);
return;
}
// We keep lock until here to prevent transitions from the IN_PROGRESS state
// which could possibly lead to completion of the workload and destruction
// of the scheduler
lock.unlock();
onError(err);
}
@ -411,6 +441,9 @@ protected:
txState = TxState::ON_ERROR;
lock.unlock();
// No need to hold the lock from here on, because ON_ERROR state is handled sequentially, and
// other callbacks are simply ignored while it stays in this state
if (!canRetry(err)) {
return;
}
@ -490,9 +523,12 @@ protected:
};
// Map for keeping track of future waits and holding necessary object references
// It can be accessed at any time when callbacks are triggered, so it mus always
// be mutex protected
std::unordered_map<fdb::Future, CallbackInfo> callbackMap;
// Holding reference to this for onError future C callback
// Accessed only in ON_ERROR state (no need for mutex)
std::shared_ptr<AsyncTransactionContext> onErrorThisRef;
};

View File

@ -106,4 +106,17 @@ KeyRangeArray copyKeyRangeArray(fdb::future_var::KeyRangeRefArray::Type array) {
return out;
};
GranuleSummaryArray copyGranuleSummaryArray(fdb::future_var::GranuleSummaryRefArray::Type array) {
auto& [in_summaries, in_count] = array;
GranuleSummaryArray out;
for (int i = 0; i < in_count; ++i) {
fdb::native::FDBGranuleSummary nativeSummary = *in_summaries++;
fdb::GranuleSummary summary(nativeSummary);
out.push_back(summary);
}
return out;
};
} // namespace FdbApiTester

View File

@ -120,6 +120,9 @@ KeyValueArray copyKeyValueArray(fdb::future_var::KeyValueRefArray::Type array);
using KeyRangeArray = std::vector<fdb::KeyRange>;
KeyRangeArray copyKeyRangeArray(fdb::future_var::KeyRangeRefArray::Type array);
using GranuleSummaryArray = std::vector<fdb::GranuleSummary>;
GranuleSummaryArray copyGranuleSummaryArray(fdb::future_var::GranuleSummaryRefArray::Type array);
static_assert(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__, "Do not support non-little-endian systems");
// Converts a little-endian encoded number into an integral type.

View File

@ -80,13 +80,14 @@ bool WorkloadConfig::getBoolOption(const std::string& name, bool defaultVal) con
WorkloadBase::WorkloadBase(const WorkloadConfig& config)
: manager(nullptr), tasksScheduled(0), numErrors(0), clientId(config.clientId), numClients(config.numClients),
failed(false), numTxCompleted(0) {
failed(false), numTxCompleted(0), numTxStarted(0), inProgress(false) {
maxErrors = config.getIntOption("maxErrors", 10);
workloadId = fmt::format("{}{}", config.name, clientId);
}
void WorkloadBase::init(WorkloadManager* manager) {
this->manager = manager;
inProgress = true;
}
void WorkloadBase::printStats() {
@ -94,6 +95,7 @@ void WorkloadBase::printStats() {
}
void WorkloadBase::schedule(TTaskFct task) {
ASSERT(inProgress);
if (failed) {
return;
}
@ -105,10 +107,12 @@ void WorkloadBase::schedule(TTaskFct task) {
}
void WorkloadBase::execTransaction(std::shared_ptr<ITransactionActor> tx, TTaskFct cont, bool failOnError) {
ASSERT(inProgress);
if (failed) {
return;
}
tasksScheduled++;
numTxStarted++;
manager->txExecutor->execute(tx, [this, tx, cont, failOnError]() {
numTxCompleted++;
fdb::Error err = tx->getError();
@ -143,11 +147,13 @@ void WorkloadBase::error(const std::string& msg) {
void WorkloadBase::scheduledTaskDone() {
if (--tasksScheduled == 0) {
inProgress = false;
if (numErrors > 0) {
error(fmt::format("Workload failed with {} errors", numErrors.load()));
} else {
info("Workload successfully completed");
}
ASSERT(numTxStarted == numTxCompleted);
manager->workloadDone(this, numErrors > 0);
}
}

View File

@ -164,6 +164,12 @@ protected:
// Number of completed transactions
std::atomic<int> numTxCompleted;
// Number of started transactions
std::atomic<int> numTxStarted;
// Workload is in progress (intialized, but not completed)
std::atomic<bool> inProgress;
};
// Workload manager

View File

@ -62,6 +62,22 @@ struct KeyRange {
Key beginKey;
Key endKey;
};
struct GranuleSummary {
KeyRange keyRange;
int64_t snapshotVersion;
int64_t snapshotSize;
int64_t deltaVersion;
int64_t deltaSize;
GranuleSummary(const native::FDBGranuleSummary& nativeSummary) {
keyRange.beginKey = fdb::Key(nativeSummary.key_range.begin_key, nativeSummary.key_range.begin_key_length);
keyRange.endKey = fdb::Key(nativeSummary.key_range.end_key, nativeSummary.key_range.end_key_length);
snapshotVersion = nativeSummary.snapshot_version;
snapshotSize = nativeSummary.snapshot_size;
deltaVersion = nativeSummary.delta_version;
deltaSize = nativeSummary.delta_size;
}
};
inline uint8_t const* toBytePtr(char const* ptr) noexcept {
return reinterpret_cast<uint8_t const*>(ptr);
@ -200,6 +216,27 @@ struct KeyRangeRefArray {
}
};
struct GranuleSummaryRef : native::FDBGranuleSummary {
fdb::KeyRef beginKey() const noexcept {
return fdb::KeyRef(native::FDBGranuleSummary::key_range.begin_key,
native::FDBGranuleSummary::key_range.begin_key_length);
}
fdb::KeyRef endKey() const noexcept {
return fdb::KeyRef(native::FDBGranuleSummary::key_range.end_key,
native::FDBGranuleSummary::key_range.end_key_length);
}
};
struct GranuleSummaryRefArray {
using Type = std::tuple<GranuleSummaryRef const*, int>;
static Error extract(native::FDBFuture* f, Type& out) noexcept {
auto& [out_summaries, out_count] = out;
auto err = native::fdb_future_get_granule_summary_array(
f, reinterpret_cast<const native::FDBGranuleSummary**>(&out_summaries), &out_count);
return Error(err);
}
};
} // namespace future_var
[[noreturn]] inline void throwError(std::string_view preamble, Error err) {
@ -573,6 +610,14 @@ public:
tr.get(), begin.data(), intSize(begin), end.data(), intSize(end), begin_version, read_version, context));
}
TypedFuture<future_var::GranuleSummaryRefArray> summarizeBlobGranules(KeyRef begin,
KeyRef end,
int64_t summaryVersion,
int rangeLimit) {
return native::fdb_transaction_summarize_blob_granules(
tr.get(), begin.data(), intSize(begin), end.data(), intSize(end), summaryVersion, rangeLimit);
}
TypedFuture<future_var::None> watch(KeyRef key) {
return native::fdb_transaction_watch(tr.get(), key.data(), intSize(key));
}

View File

@ -84,6 +84,12 @@ void Future::cancel() {
return fdb_future_get_keyrange_array(future_, out_keyranges, out_count);
}
// GranuleSummaryArrayFuture
[[nodiscard]] fdb_error_t GranuleSummaryArrayFuture::get(const FDBGranuleSummary** out_summaries, int* out_count) {
return fdb_future_get_granule_summary_array(future_, out_summaries, out_count);
}
// KeyValueArrayFuture
[[nodiscard]] fdb_error_t KeyValueArrayFuture::get(const FDBKeyValue** out_kv, int* out_count, fdb_bool_t* out_more) {
@ -366,6 +372,7 @@ KeyRangeArrayFuture Transaction::get_blob_granule_ranges(std::string_view begin_
end_key.size(),
rangeLimit));
}
KeyValueArrayResult Transaction::read_blob_granules(std::string_view begin_key,
std::string_view end_key,
int64_t beginVersion,
@ -381,4 +388,17 @@ KeyValueArrayResult Transaction::read_blob_granules(std::string_view begin_key,
granuleContext));
}
GranuleSummaryArrayFuture Transaction::summarize_blob_granules(std::string_view begin_key,
std::string_view end_key,
int64_t summary_version,
int rangeLimit) {
return GranuleSummaryArrayFuture(fdb_transaction_summarize_blob_granules(tr_,
(const uint8_t*)begin_key.data(),
begin_key.size(),
(const uint8_t*)end_key.data(),
end_key.size(),
summary_version,
rangeLimit));
}
} // namespace fdb

View File

@ -161,6 +161,18 @@ private:
KeyRangeArrayFuture(FDBFuture* f) : Future(f) {}
};
class GranuleSummaryArrayFuture : public Future {
public:
// Call this function instead of fdb_future_get_granule_summary_array when using
// the GranuleSummaryArrayFuture type. It's behavior is identical to
// fdb_future_get_granule_summary_array.
fdb_error_t get(const FDBGranuleSummary** out_summaries, int* out_count);
private:
friend class Transaction;
GranuleSummaryArrayFuture(FDBFuture* f) : Future(f) {}
};
class EmptyFuture : public Future {
private:
friend class Transaction;
@ -354,6 +366,10 @@ public:
int64_t beginVersion,
int64_t endVersion,
FDBReadBlobGranuleContext granule_context);
GranuleSummaryArrayFuture summarize_blob_granules(std::string_view begin_key,
std::string_view end_key,
int64_t summaryVersion,
int rangeLimit);
private:
FDBTransaction* tr_;

View File

@ -2761,6 +2761,7 @@ TEST_CASE("Blob Granule Functions") {
auto confValue =
get_value("\xff/conf/blob_granules_enabled", /* snapshot */ false, { FDB_TR_OPTION_READ_SYSTEM_KEYS });
if (!confValue.has_value() || confValue.value() != "1") {
// std::cout << "skipping blob granule test" << std::endl;
return;
}
@ -2817,7 +2818,6 @@ TEST_CASE("Blob Granule Functions") {
fdb::KeyValueArrayResult r =
tr.read_blob_granules(key("bg"), key("bh"), originalReadVersion, -2, granuleContext);
fdb_error_t err = r.get(&out_kv, &out_count, &out_more);
;
if (err && err != 2037 /* blob_granule_not_materialized */) {
fdb::EmptyFuture f2 = tr.on_error(err);
fdb_check(wait_future(f2));
@ -2865,6 +2865,10 @@ TEST_CASE("Blob Granule Functions") {
int out_count;
fdb_check(f.get(&out_kr, &out_count));
CHECK(std::string((const char*)out_kr[0].begin_key, out_kr[0].begin_key_length) <= key("bg"));
CHECK(std::string((const char*)out_kr[out_count - 1].end_key, out_kr[out_count - 1].end_key_length) >=
key("bh"));
CHECK(out_count >= 1);
// check key ranges are in order
for (int i = 0; i < out_count; i++) {
@ -2872,9 +2876,9 @@ TEST_CASE("Blob Granule Functions") {
CHECK(std::string((const char*)out_kr[i].begin_key, out_kr[i].begin_key_length) <
std::string((const char*)out_kr[i].end_key, out_kr[i].end_key_length));
}
// Ranges themselves are sorted
// Ranges themselves are sorted and contiguous
for (int i = 0; i < out_count - 1; i++) {
CHECK(std::string((const char*)out_kr[i].end_key, out_kr[i].end_key_length) <=
CHECK(std::string((const char*)out_kr[i].end_key, out_kr[i].end_key_length) ==
std::string((const char*)out_kr[i + 1].begin_key, out_kr[i + 1].begin_key_length));
}
@ -2900,7 +2904,6 @@ TEST_CASE("Blob Granule Functions") {
fdb_check(wait_future(waitPurgeFuture));
// re-read again at the purge version to make sure it is still valid
while (1) {
fdb_check(tr.set_option(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE, nullptr, 0));
fdb::KeyValueArrayResult r =
@ -2917,6 +2920,56 @@ TEST_CASE("Blob Granule Functions") {
tr.reset();
break;
}
// check granule summary
while (1) {
fdb::GranuleSummaryArrayFuture f = tr.summarize_blob_granules(key("bg"), key("bh"), originalReadVersion, 100);
fdb_error_t err = wait_future(f);
if (err) {
fdb::EmptyFuture f2 = tr.on_error(err);
fdb_check(wait_future(f2));
continue;
}
const FDBGranuleSummary* out_summaries;
int out_count;
fdb_check(f.get(&out_summaries, &out_count));
CHECK(out_count >= 1);
CHECK(out_count <= 100);
// check that ranges cover requested range
CHECK(std::string((const char*)out_summaries[0].key_range.begin_key,
out_summaries[0].key_range.begin_key_length) <= key("bg"));
CHECK(std::string((const char*)out_summaries[out_count - 1].key_range.end_key,
out_summaries[out_count - 1].key_range.end_key_length) >= key("bh"));
// check key ranges are in order
for (int i = 0; i < out_count; i++) {
// key range start < end
CHECK(std::string((const char*)out_summaries[i].key_range.begin_key,
out_summaries[i].key_range.begin_key_length) <
std::string((const char*)out_summaries[i].key_range.end_key,
out_summaries[i].key_range.end_key_length));
// sanity check versions and sizes
CHECK(out_summaries[i].snapshot_version <= originalReadVersion);
CHECK(out_summaries[i].delta_version <= originalReadVersion);
CHECK(out_summaries[i].snapshot_version <= out_summaries[i].delta_version);
CHECK(out_summaries[i].snapshot_size > 0);
CHECK(out_summaries[i].delta_size >= 0);
}
// Ranges themselves are sorted and contiguous
for (int i = 0; i < out_count - 1; i++) {
CHECK(std::string((const char*)out_summaries[i].key_range.end_key,
out_summaries[i].key_range.end_key_length) ==
std::string((const char*)out_summaries[i + 1].key_range.begin_key,
out_summaries[i + 1].key_range.begin_key_length));
}
tr.reset();
break;
}
}
int main(int argc, char** argv) {

View File

@ -102,6 +102,11 @@ func (o NetworkOptions) SetTraceFileIdentifier(param string) error {
return o.setOpt(36, []byte(param))
}
// Use the same base trace file name for all client threads as it did before version 7.2. The current default behavior is to use distinct trace file names for client threads by including their version and thread index.
func (o NetworkOptions) SetTraceShareAmongClientThreads() error {
return o.setOpt(37, nil)
}
// Set file suffix for partially written log files.
//
// Parameter: Append this suffix to partially written log files. When a log file is complete, it is renamed to remove the suffix. No separator is added between the file and the suffix. If you want to add a file extension, you should include the separator - e.g. '.tmp' instead of 'tmp' to add the 'tmp' extension.

View File

@ -7,6 +7,7 @@ import subprocess
import logging
import functools
import json
import tempfile
import time
import random
from argparse import ArgumentParser, RawDescriptionHelpFormatter
@ -770,6 +771,68 @@ def integer_options():
assert lines[1].startswith('Committed')
assert error_output == b''
def tls_address_suffix():
# fdbcli shall prevent a non-TLS fdbcli run from connecting to an all-TLS cluster, and vice versa
preamble = 'eNW1yf1M:eNW1yf1M@'
def make_addr(port: int, tls: bool = False):
return "127.0.0.1:{}{}".format(port, ":tls" if tls else "")
testcases = [
# IsServerTLS, NumServerAddrs
(True, 1),
(False, 1),
(True, 3),
(False, 3),
]
err_output_server_no_tls = "ERROR: fdbcli is configured with TLS, but none of the coordinators have TLS addresses."
err_output_server_tls = "ERROR: fdbcli is not configured with TLS, but all of the coordinators have TLS addresses."
# technically the contents of the certs and key files are not evaluated
# before tls-suffix check against tls configuration takes place,
# but we generate the certs and keys anyway to avoid
# imposing nuanced TLSConfig evaluation ordering requirement on the testcase
with tempfile.TemporaryDirectory() as tmpdir:
cert_file = tmpdir + "/client-cert.pem"
key_file = tmpdir + "/client-key.pem"
ca_file = tmpdir + "/server-ca.pem"
mkcert_process = subprocess.run([
args.build_dir + "/bin/mkcert",
"--server-chain-length", "1",
"--client-chain-length", "1",
"--server-cert-file", tmpdir + "/server-cert.pem",
"--client-cert-file", tmpdir + "/client-cert.pem",
"--server-key-file", tmpdir + "/server-key.pem",
"--client-key-file", tmpdir + "/client-key.pem",
"--server-ca-file", tmpdir + "/server-ca.pem",
"--client-ca-file", tmpdir + "/client-ca.pem",
],
capture_output=True)
if mkcert_process.returncode != 0:
print("mkcert returned with code {}".format(mkcert_process.returncode))
print("Output:\n{}{}\n".format(
mkcert_process.stdout.decode("utf8").strip(),
mkcert_process.stderr.decode("utf8").strip()))
assert False
cluster_fn = tmpdir + "/fdb.cluster"
for testcase in testcases:
is_server_tls, num_server_addrs = testcase
with open(cluster_fn, "w") as fp:
fp.write(preamble + ",".join(
[make_addr(port=4000 + addr_idx, tls=is_server_tls) for addr_idx in range(num_server_addrs)]))
fp.close()
tls_args = ["--tls-certificate-file",
cert_file,
"--tls-key-file",
key_file,
"--tls-ca-file",
ca_file] if not is_server_tls else []
fdbcli_process = subprocess.run(command_template[:2] + [cluster_fn] + tls_args, capture_output=True)
assert fdbcli_process.returncode != 0
err_out = fdbcli_process.stderr.decode("utf8").strip()
if is_server_tls:
assert err_out == err_output_server_tls, f"unexpected output: {err_out}"
else:
assert err_out == err_output_server_no_tls, f"unexpected output: {err_out}"
if __name__ == '__main__':
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
description="""
@ -816,6 +879,7 @@ if __name__ == '__main__':
tenants()
versionepoch()
integer_options()
tls_address_suffix()
else:
assert args.process_number > 1, "Process number should be positive"
coordinators()

View File

@ -64,7 +64,7 @@ The ``commit`` command commits the current transaction. Any sets or clears execu
configure
---------
The ``configure`` command changes the database configuration. Its syntax is ``configure [new|tss] [single|double|triple|three_data_hall|three_datacenter] [ssd|memory] [grv_proxies=<N>] [commit_proxies=<N>] [resolvers=<N>] [logs=<N>] [count=<TSS_COUNT>] [perpetual_storage_wiggle=<WIGGLE_SPEED>] [perpetual_storage_wiggle_locality=<<LOCALITY_KEY>:<LOCALITY_VALUE>|0>] [storage_migration_type={disabled|aggressive|gradual}] [tenant_mode={disabled|optional_experimental|required_experimental}]``.
The ``configure`` command changes the database configuration. Its syntax is ``configure [new|tss] [single|double|triple|three_data_hall|three_datacenter] [ssd|memory] [grv_proxies=<N>] [commit_proxies=<N>] [resolvers=<N>] [logs=<N>] [count=<TSS_COUNT>] [perpetual_storage_wiggle=<WIGGLE_SPEED>] [perpetual_storage_wiggle_locality=<<LOCALITY_KEY>:<LOCALITY_VALUE>|0>] [storage_migration_type={disabled|aggressive|gradual}] [tenant_mode={disabled|optional_experimental|required_experimental}] [encryption_at_rest_mode={aes_256_ctr|disabled}]``.
The ``new`` option, if present, initializes a new database with the given configuration rather than changing the configuration of an existing one. When ``new`` is used, both a redundancy mode and a storage engine must be specified.

View File

@ -790,6 +790,11 @@
"disabled",
"optional_experimental",
"required_experimental"
]},
"encryption_at_rest_mode": {
"$enum":[
"disabled",
"aes_256_ctr"
]}
},
"data":{

View File

@ -18,6 +18,7 @@
* limitations under the License.
*/
#include "flow/ApiVersion.h"
#include "fmt/format.h"
#include "fdbbackup/BackupTLSConfig.h"
#include "fdbclient/JsonBuilder.h"
@ -2314,7 +2315,7 @@ ACTOR Future<Void> runRestore(Database db,
throw restore_error();
}
origDb = Database::createDatabase(originalClusterFile, Database::API_VERSION_LATEST);
origDb = Database::createDatabase(originalClusterFile, ApiVersion::LATEST_VERSION);
Version v = wait(timeKeeperVersionFromDatetime(targetTimestamp, origDb.get()));
fmt::print("Timestamp '{0}' resolves to version {1}\n", targetTimestamp, v);
targetVersion = v;
@ -2720,7 +2721,7 @@ ACTOR Future<Void> queryBackup(const char* name,
return Void();
}
Database origDb = Database::createDatabase(originalClusterFile, Database::API_VERSION_LATEST);
Database origDb = Database::createDatabase(originalClusterFile, ApiVersion::LATEST_VERSION);
Version v = wait(timeKeeperVersionFromDatetime(restoreTimestamp, origDb));
result["restore_timestamp"] = restoreTimestamp;
result["restore_timestamp_resolved_version"] = v;
@ -3127,7 +3128,7 @@ Optional<Database> connectToCluster(std::string const& clusterFile,
}
try {
db = Database::createDatabase(ccf, -1, IsInternal::True, localities);
db = Database::createDatabase(ccf, ApiVersion::LATEST_VERSION, IsInternal::True, localities);
} catch (Error& e) {
if (!quiet) {
fprintf(stderr, "ERROR: %s\n", e.what());
@ -4123,7 +4124,7 @@ int main(int argc, char* argv[]) {
}
try {
db = Database::createDatabase(restoreClusterFileDest, Database::API_VERSION_LATEST);
db = Database::createDatabase(restoreClusterFileDest, ApiVersion::LATEST_VERSION);
} catch (Error& e) {
fprintf(stderr,
"Restore destination cluster file '%s' invalid: %s\n",
@ -4202,7 +4203,7 @@ int main(int argc, char* argv[]) {
}
try {
db = Database::createDatabase(restoreClusterFileDest, Database::API_VERSION_LATEST);
db = Database::createDatabase(restoreClusterFileDest, ApiVersion::LATEST_VERSION);
} catch (Error& e) {
fprintf(stderr,
"Restore destination cluster file '%s' invalid: %s\n",

View File

@ -275,6 +275,10 @@ ACTOR Future<bool> configureCommandActor(Reference<IDatabase> db,
fprintf(stderr, "ERROR: A cluster cannot change its tenant mode while part of a metacluster.\n");
ret = false;
break;
case ConfigurationResult::ENCRYPTION_AT_REST_MODE_ALREADY_SET:
fprintf(stderr, "ERROR: A cluster cannot change its encryption_at_rest state after database creation.\n");
ret = false;
break;
default:
ASSERT(false);
ret = false;
@ -308,6 +312,7 @@ void configureGenerator(const char* text,
"storage_migration_type=",
"tenant_mode=",
"blob_granules_enabled=",
"encryption_at_rest_mode=",
nullptr };
arrayGenerator(text, line, opts, lc);
}
@ -320,7 +325,8 @@ CommandFactory configureFactory(
"commit_proxies=<COMMIT_PROXIES>|grv_proxies=<GRV_PROXIES>|logs=<LOGS>|resolvers=<RESOLVERS>>*|"
"count=<TSS_COUNT>|perpetual_storage_wiggle=<WIGGLE_SPEED>|perpetual_storage_wiggle_locality="
"<<LOCALITY_KEY>:<LOCALITY_VALUE>|0>|storage_migration_type={disabled|gradual|aggressive}"
"|tenant_mode={disabled|optional_experimental|required_experimental}|blob_granules_enabled={0|1}",
"|tenant_mode={disabled|optional_experimental|required_experimental}|blob_granules_enabled={0|1}"
"|encryption_at_rest_mode={disabled|aes_256_ctr}",
"change the database configuration",
"The `new' option, if present, initializes a new database with the given configuration rather than changing "
"the configuration of an existing one. When used, both a redundancy mode and a storage engine must be "
@ -354,6 +360,9 @@ CommandFactory configureFactory(
"tenant_mode=<disabled|optional_experimental|required_experimental>: Sets the tenant mode for the cluster. If "
"optional, then transactions can be run with or without specifying tenants. If required, all data must be "
"accessed using tenants.\n\n"
"encryption_at_rest_mode=<disabled|aes_256_ctr>: Sets the cluster encryption data at-rest support for the "
"database. The configuration can be updated ONLY at the time of database creation and once set can't be "
"updated for the lifetime of the database.\n\n"
"See the FoundationDB Administration Guide for more information."),
&configureGenerator);

View File

@ -442,6 +442,13 @@ void printStatus(StatusObjectReader statusObj,
outputString += "\n Blob granules - enabled";
}
outputString += "\n Encryption at-rest - ";
if (statusObjConfig.get("encryption_at_rest_mode", strVal)) {
outputString += strVal;
} else {
outputString += "disabled";
}
outputString += "\n Coordinators - ";
if (statusObjConfig.get("coordinators_count", intVal)) {
outputString += std::to_string(intVal);

View File

@ -1050,7 +1050,7 @@ Future<T> stopNetworkAfter(Future<T> what) {
}
}
ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterConnectionFile> ccf) {
state LineNoise& linenoise = *plinenoise;
state bool intrans = false;
@ -1075,20 +1075,6 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
state FdbOptions* options = &globalOptions;
state Reference<ClusterConnectionFile> ccf;
state std::pair<std::string, bool> resolvedClusterFile =
ClusterConnectionFile::lookupClusterFileName(opt.clusterFile);
try {
ccf = makeReference<ClusterConnectionFile>(resolvedClusterFile.first);
} catch (Error& e) {
if (e.code() == error_code_operation_cancelled) {
throw;
}
fprintf(stderr, "%s\n", ClusterConnectionFile::getErrorString(resolvedClusterFile, e).c_str());
return 1;
}
// Ordinarily, this is done when the network is run. However, network thread should be set before TraceEvents are
// logged. This thread will eventually run the network, so call it now.
TraceEvent::setNetworkThread();
@ -1987,7 +1973,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
}
}
ACTOR Future<int> runCli(CLIOptions opt) {
ACTOR Future<int> runCli(CLIOptions opt, Reference<ClusterConnectionFile> ccf) {
state LineNoise linenoise(
[](std::string const& line, std::vector<std::string>& completions) { fdbcliCompCmd(line, completions); },
[enabled = opt.cliHints](std::string const& line) -> LineNoise::Hint {
@ -2051,7 +2037,7 @@ ACTOR Future<int> runCli(CLIOptions opt) {
.GetLastError();
}
state int result = wait(cli(opt, &linenoise));
state int result = wait(cli(opt, &linenoise, ccf));
if (!historyFilename.empty()) {
try {
@ -2073,6 +2059,33 @@ ACTOR Future<Void> timeExit(double duration) {
return Void();
}
const char* checkTlsConfigAgainstCoordAddrs(const ClusterConnectionString& ccs) {
// Resolve TLS config and inspect whether any of the certificate, key, ca bytes has been set
extern TLSConfig tlsConfig;
auto const loaded = tlsConfig.loadSync();
const bool tlsConfigured =
!loaded.getCertificateBytes().empty() || !loaded.getKeyBytes().empty() || !loaded.getCABytes().empty();
int tlsAddrs = 0;
int totalAddrs = 0;
for (const auto& addr : ccs.coords) {
if (addr.isTLS())
tlsAddrs++;
totalAddrs++;
}
for (const auto& host : ccs.hostnames) {
if (host.isTLS)
tlsAddrs++;
totalAddrs++;
}
if (tlsConfigured && tlsAddrs == 0) {
return "fdbcli is configured with TLS, but none of the coordinators have TLS addresses.";
} else if (!tlsConfigured && tlsAddrs == totalAddrs) {
return "fdbcli is not configured with TLS, but all of the coordinators have TLS addresses.";
} else {
return nullptr;
}
}
int main(int argc, char** argv) {
platformInit();
Error::init();
@ -2177,6 +2190,25 @@ int main(int argc, char** argv) {
return 0;
}
Reference<ClusterConnectionFile> ccf;
std::pair<std::string, bool> resolvedClusterFile = ClusterConnectionFile::lookupClusterFileName(opt.clusterFile);
try {
ccf = makeReference<ClusterConnectionFile>(resolvedClusterFile.first);
} catch (Error& e) {
if (e.code() == error_code_operation_cancelled) {
throw;
}
fprintf(stderr, "%s\n", ClusterConnectionFile::getErrorString(resolvedClusterFile, e).c_str());
return 1;
}
// Make sure that TLS configuration lines up with ":tls" prefix on coordinator addresses
if (auto errorMsg = checkTlsConfigAgainstCoordAddrs(ccf->getConnectionString())) {
fprintf(stderr, "ERROR: %s\n", errorMsg);
return 1;
}
try {
API->selectApiVersion(opt.apiVersion);
if (opt.useFutureProtocolVersion) {
@ -2188,7 +2220,7 @@ int main(int argc, char** argv) {
return opt.exit_code;
}
Future<Void> memoryUsageMonitor = startMemoryUsageMonitor(opt.memLimit);
Future<int> cliFuture = runCli(opt);
Future<int> cliFuture = runCli(opt, ccf);
Future<Void> timeoutFuture = opt.exit_timeout ? timeExit(opt.exit_timeout) : Never();
auto f = stopNetworkAfter(success(cliFuture) || timeoutFuture);
API->runNetwork();

View File

@ -290,7 +290,7 @@ void ClientKnobs::initialize(Randomize randomize) {
init( METACLUSTER_ASSIGNMENT_CLUSTERS_TO_CHECK, 5 ); if ( randomize && BUGGIFY ) METACLUSTER_ASSIGNMENT_CLUSTERS_TO_CHECK = 1;
init( METACLUSTER_ASSIGNMENT_FIRST_CHOICE_DELAY, 1.0 ); if ( randomize && BUGGIFY ) METACLUSTER_ASSIGNMENT_FIRST_CHOICE_DELAY = deterministicRandom()->random01() * 60;
init( METACLUSTER_ASSIGNMENT_AVAILABILITY_TIMEOUT, 10.0 ); if ( randomize && BUGGIFY ) METACLUSTER_ASSIGNMENT_AVAILABILITY_TIMEOUT = 1 + deterministicRandom()->random01() * 59;
init( TENANT_ENTRY_CACHE_LIST_REFRESH_INTERVAL, 2 ); if( randomize && BUGGIFY ) TENANT_ENTRY_CACHE_LIST_REFRESH_INTERVAL = deterministicRandom()->randomInt(1, 10);
// clang-format on
}

View File

@ -19,6 +19,7 @@
*/
#include "fdbclient/DatabaseConfiguration.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/SystemData.h"
#include "flow/ITrace.h"
#include "flow/Trace.h"
@ -53,6 +54,7 @@ void DatabaseConfiguration::resetInternal() {
storageMigrationType = StorageMigrationType::DEFAULT;
blobGranulesEnabled = false;
tenantMode = TenantMode::DISABLED;
encryptionAtRestMode = EncryptionAtRestMode::DISABLED;
}
int toInt(ValueRef const& v) {
@ -213,7 +215,8 @@ bool DatabaseConfiguration::isValid() const {
(perpetualStorageWiggleSpeed == 0 || perpetualStorageWiggleSpeed == 1) &&
isValidPerpetualStorageWiggleLocality(perpetualStorageWiggleLocality) &&
storageMigrationType != StorageMigrationType::UNSET && tenantMode >= TenantMode::DISABLED &&
tenantMode < TenantMode::END)) {
tenantMode < TenantMode::END && encryptionAtRestMode >= EncryptionAtRestMode::DISABLED &&
encryptionAtRestMode < EncryptionAtRestMode::END)) {
return false;
}
std::set<Key> dcIds;
@ -413,6 +416,7 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const {
result["storage_migration_type"] = storageMigrationType.toString();
result["blob_granules_enabled"] = (int32_t)blobGranulesEnabled;
result["tenant_mode"] = tenantMode.toString();
result["encryption_at_rest_mode"] = encryptionAtRestMode.toString();
return result;
}
@ -643,6 +647,8 @@ bool DatabaseConfiguration::setInternal(KeyRef key, ValueRef value) {
} else if (ck == LiteralStringRef("blob_granules_enabled")) {
parse((&type), value);
blobGranulesEnabled = (type != 0);
} else if (ck == LiteralStringRef("encryption_at_rest_mode")) {
encryptionAtRestMode = EncryptionAtRestMode::fromValue(value);
} else {
return false;
}

View File

@ -201,6 +201,20 @@ std::map<std::string, std::string> configForToken(std::string const& mode) {
}
out[p + key] = format("%d", tenantMode);
}
if (key == "encryption_at_rest_mode") {
EncryptionAtRestMode mode;
if (value == "disabled") {
mode = EncryptionAtRestMode::DISABLED;
} else if (value == "aes_256_ctr") {
mode = EncryptionAtRestMode::AES_256_CTR;
} else {
printf("Error: Only disabled|aes_256_ctr are valid for encryption_at_rest_mode.\n");
return out;
}
out[p + key] = format("%d", mode);
}
return out;
}

View File

@ -342,6 +342,29 @@ ThreadResult<RangeResult> DLTransaction::readBlobGranulesFinish(
return ThreadResult<RangeResult>((ThreadSingleAssignmentVar<RangeResult>*)(r));
};
ThreadFuture<Standalone<VectorRef<BlobGranuleSummaryRef>>>
DLTransaction::summarizeBlobGranules(const KeyRangeRef& keyRange, Optional<Version> summaryVersion, int rangeLimit) {
if (!api->transactionSummarizeBlobGranules) {
return unsupported_operation();
}
int64_t sv = summaryVersion.present() ? summaryVersion.get() : latestVersion;
FdbCApi::FDBFuture* f = api->transactionSummarizeBlobGranules(
tr, keyRange.begin.begin(), keyRange.begin.size(), keyRange.end.begin(), keyRange.end.size(), sv, rangeLimit);
return toThreadFuture<Standalone<VectorRef<BlobGranuleSummaryRef>>>(
api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) {
const FdbCApi::FDBGranuleSummary* summaries;
int summariesLength;
FdbCApi::fdb_error_t error = api->futureGetGranuleSummaryArray(f, &summaries, &summariesLength);
ASSERT(!error);
// The memory for this is stored in the FDBFuture and is released when the future gets destroyed
return Standalone<VectorRef<BlobGranuleSummaryRef>>(
VectorRef<BlobGranuleSummaryRef>((BlobGranuleSummaryRef*)summaries, summariesLength), Arena());
});
}
void DLTransaction::addReadConflictRange(const KeyRangeRef& keys) {
throwIfError(api->transactionAddConflictRange(
tr, keys.begin.begin(), keys.begin.size(), keys.end.begin(), keys.end.size(), FDB_CONFLICT_RANGE_TYPE_READ));
@ -728,8 +751,11 @@ void DLApi::init() {
loadClientFunction(&api->selectApiVersion, lib, fdbCPath, "fdb_select_api_version_impl", headerVersion >= 0);
loadClientFunction(&api->getClientVersion, lib, fdbCPath, "fdb_get_client_version", headerVersion >= 410);
loadClientFunction(
&api->useFutureProtocolVersion, lib, fdbCPath, "fdb_use_future_protocol_version", headerVersion >= 720);
loadClientFunction(&api->useFutureProtocolVersion,
lib,
fdbCPath,
"fdb_use_future_protocol_version",
headerVersion >= ApiVersion::withFutureProtocolVersionApi().version());
loadClientFunction(&api->setNetworkOption, lib, fdbCPath, "fdb_network_set_option", headerVersion >= 0);
loadClientFunction(&api->setupNetwork, lib, fdbCPath, "fdb_setup_network", headerVersion >= 0);
loadClientFunction(&api->runNetwork, lib, fdbCPath, "fdb_run_network", headerVersion >= 0);
@ -739,7 +765,7 @@ void DLApi::init() {
lib,
fdbCPath,
"fdb_create_database_from_connection_string",
headerVersion >= 720);
headerVersion >= ApiVersion::withCreateDBFromConnString().version());
loadClientFunction(&api->databaseOpenTenant, lib, fdbCPath, "fdb_database_open_tenant", headerVersion >= 710);
loadClientFunction(
@ -772,23 +798,39 @@ void DLApi::init() {
fdbCPath,
"fdb_database_wait_purge_granules_complete",
headerVersion >= 710);
loadClientFunction(&api->databaseBlobbifyRange, lib, fdbCPath, "fdb_database_blobbify_range", headerVersion >= 720);
loadClientFunction(
&api->databaseUnblobbifyRange, lib, fdbCPath, "fdb_database_unblobbify_range", headerVersion >= 720);
loadClientFunction(
&api->databaseListBlobbifiedRanges, lib, fdbCPath, "fdb_database_list_blobbified_ranges", headerVersion >= 720);
loadClientFunction(
&api->databaseVerifyBlobRange, lib, fdbCPath, "fdb_database_verify_blob_range", headerVersion >= 720);
loadClientFunction(&api->databaseBlobbifyRange,
lib,
fdbCPath,
"fdb_database_blobbify_range",
headerVersion >= ApiVersion::withBlobRangeApi().version());
loadClientFunction(&api->databaseUnblobbifyRange,
lib,
fdbCPath,
"fdb_database_unblobbify_range",
headerVersion >= ApiVersion::withBlobRangeApi().version());
loadClientFunction(&api->databaseListBlobbifiedRanges,
lib,
fdbCPath,
"fdb_database_list_blobbified_ranges",
headerVersion >= ApiVersion::withBlobRangeApi().version());
loadClientFunction(&api->databaseVerifyBlobRange,
lib,
fdbCPath,
"fdb_database_verify_blob_range",
headerVersion >= ApiVersion::withBlobRangeApi().version());
loadClientFunction(
&api->tenantCreateTransaction, lib, fdbCPath, "fdb_tenant_create_transaction", headerVersion >= 710);
loadClientFunction(
&api->tenantPurgeBlobGranules, lib, fdbCPath, "fdb_tenant_purge_blob_granules", headerVersion >= 720);
loadClientFunction(&api->tenantPurgeBlobGranules,
lib,
fdbCPath,
"fdb_tenant_purge_blob_granules",
headerVersion >= ApiVersion::withBlobRangeApi().version());
loadClientFunction(&api->tenantWaitPurgeGranulesComplete,
lib,
fdbCPath,
"fdb_tenant_wait_purge_granules_complete",
headerVersion >= 720);
headerVersion >= ApiVersion::withBlobRangeApi().version());
loadClientFunction(&api->tenantDestroy, lib, fdbCPath, "fdb_tenant_destroy", headerVersion >= 710);
loadClientFunction(&api->transactionSetOption, lib, fdbCPath, "fdb_transaction_set_option", headerVersion >= 0);
@ -852,18 +894,27 @@ void DLApi::init() {
lib,
fdbCPath,
"fdb_transaction_read_blob_granules_start",
headerVersion >= 720);
headerVersion >= ApiVersion::withBlobRangeApi().version());
loadClientFunction(&api->transactionReadBlobGranulesFinish,
lib,
fdbCPath,
"fdb_transaction_read_blob_granules_finish",
headerVersion >= 720);
headerVersion >= ApiVersion::withBlobRangeApi().version());
loadClientFunction(&api->transactionSummarizeBlobGranules,
lib,
fdbCPath,
"fdb_transaction_summarize_blob_granules",
headerVersion >= ApiVersion::withBlobRangeApi().version());
loadClientFunction(&api->futureGetInt64,
lib,
fdbCPath,
headerVersion >= 620 ? "fdb_future_get_int64" : "fdb_future_get_version",
headerVersion >= 0);
loadClientFunction(&api->futureGetBool, lib, fdbCPath, "fdb_future_get_bool", headerVersion >= 720);
loadClientFunction(&api->futureGetBool,
lib,
fdbCPath,
"fdb_future_get_bool",
headerVersion >= ApiVersion::withFutureGetBool().version());
loadClientFunction(&api->futureGetUInt64, lib, fdbCPath, "fdb_future_get_uint64", headerVersion >= 700);
loadClientFunction(&api->futureGetError, lib, fdbCPath, "fdb_future_get_error", headerVersion >= 0);
loadClientFunction(&api->futureGetKey, lib, fdbCPath, "fdb_future_get_key", headerVersion >= 0);
@ -876,6 +927,11 @@ void DLApi::init() {
&api->futureGetKeyValueArray, lib, fdbCPath, "fdb_future_get_keyvalue_array", headerVersion >= 0);
loadClientFunction(
&api->futureGetMappedKeyValueArray, lib, fdbCPath, "fdb_future_get_mappedkeyvalue_array", headerVersion >= 710);
loadClientFunction(&api->futureGetGranuleSummaryArray,
lib,
fdbCPath,
"fdb_future_get_granule_summary_array",
headerVersion >= ApiVersion::withBlobRangeApi().version());
loadClientFunction(&api->futureGetSharedState, lib, fdbCPath, "fdb_future_get_shared_state", headerVersion >= 710);
loadClientFunction(&api->futureSetCallback, lib, fdbCPath, "fdb_future_set_callback", headerVersion >= 0);
loadClientFunction(&api->futureCancel, lib, fdbCPath, "fdb_future_cancel", headerVersion >= 0);
@ -1250,6 +1306,16 @@ ThreadResult<RangeResult> MultiVersionTransaction::readBlobGranulesFinish(
return ThreadResult<RangeResult>(unsupported_operation());
}
ThreadFuture<Standalone<VectorRef<BlobGranuleSummaryRef>>> MultiVersionTransaction::summarizeBlobGranules(
const KeyRangeRef& keyRange,
Optional<Version> summaryVersion,
int rangeLimit) {
auto tr = getTransaction();
auto f = tr.transaction ? tr.transaction->summarizeBlobGranules(keyRange, summaryVersion, rangeLimit)
: makeTimeout<Standalone<VectorRef<BlobGranuleSummaryRef>>>();
return abortableFuture(f, tr.onChange);
}
void MultiVersionTransaction::atomicOp(const KeyRef& key, const ValueRef& value, uint32_t operationType) {
auto tr = getTransaction();
if (tr.transaction) {
@ -1344,7 +1410,7 @@ void MultiVersionTransaction::setOption(FDBTransactionOptions::Option option, Op
throw invalid_option();
}
if (MultiVersionApi::apiVersionAtLeast(610) && itr->second.persistent) {
if (MultiVersionApi::api->getApiVersion().hasPersistentOptions() && itr->second.persistent) {
persistentOptions.emplace_back(option, value.castTo<Standalone<StringRef>>());
}
@ -1862,7 +1928,7 @@ void MultiVersionDatabase::DatabaseState::protocolVersionChanged(ProtocolVersion
.detail("OldProtocolVersion", dbProtocolVersion);
// When the protocol version changes, clear the corresponding entry in the shared state map
// so it can be re-initialized. Only do so if there was a valid previous protocol version.
if (dbProtocolVersion.present() && MultiVersionApi::apiVersionAtLeast(710)) {
if (dbProtocolVersion.present() && MultiVersionApi::api->getApiVersion().hasClusterSharedStateMap()) {
MultiVersionApi::api->clearClusterSharedStateMapEntry(clusterId, dbProtocolVersion.get());
}
@ -1891,7 +1957,7 @@ void MultiVersionDatabase::DatabaseState::protocolVersionChanged(ProtocolVersion
return;
}
if (client->external && !MultiVersionApi::apiVersionAtLeast(610)) {
if (client->external && !MultiVersionApi::api->getApiVersion().hasInlineUpdateDatabase()) {
// Old API versions return a future when creating the database, so we need to wait for it
Reference<DatabaseState> self = Reference<DatabaseState>::addRef(this);
dbReady = mapThreadFuture<Void, Void>(
@ -1975,7 +2041,8 @@ void MultiVersionDatabase::DatabaseState::updateDatabase(Reference<IDatabase> ne
.detail("ConnectionRecord", connectionRecord);
}
}
if (db.isValid() && dbProtocolVersion.present() && MultiVersionApi::apiVersionAtLeast(710)) {
if (db.isValid() && dbProtocolVersion.present() &&
MultiVersionApi::api->getApiVersion().hasClusterSharedStateMap()) {
Future<std::string> updateResult =
MultiVersionApi::api->updateClusterSharedStateMap(connectionRecord, dbProtocolVersion.get(), db);
sharedStateUpdater = map(errorOr(updateResult), [this](ErrorOr<std::string> result) {
@ -2095,11 +2162,6 @@ void MultiVersionDatabase::LegacyVersionMonitor::close() {
}
// MultiVersionApi
bool MultiVersionApi::apiVersionAtLeast(int minVersion) {
ASSERT_NE(MultiVersionApi::api->apiVersion, 0);
return MultiVersionApi::api->apiVersion >= minVersion || MultiVersionApi::api->apiVersion < 0;
}
void MultiVersionApi::runOnExternalClientsAllThreads(std::function<void(Reference<ClientInfo>)> func,
bool runOnFailedClients) {
for (int i = 0; i < threadCount; i++) {
@ -2145,17 +2207,18 @@ Reference<ClientInfo> MultiVersionApi::getLocalClient() {
}
void MultiVersionApi::selectApiVersion(int apiVersion) {
ApiVersion newApiVersion(apiVersion);
if (!localClient) {
localClient = makeReference<ClientInfo>(getLocalClientAPI());
ASSERT(localClient);
}
if (this->apiVersion != 0 && this->apiVersion != apiVersion) {
if (this->apiVersion.isValid() && this->apiVersion != newApiVersion) {
throw api_version_already_set();
}
localClient->api->selectApiVersion(apiVersion);
this->apiVersion = apiVersion;
this->apiVersion = newApiVersion;
}
const char* MultiVersionApi::getClientVersion() {
@ -2408,6 +2471,18 @@ void MultiVersionApi::setNetworkOptionInternal(FDBNetworkOptions::Option option,
} else if (option == FDBNetworkOptions::FUTURE_VERSION_CLIENT_LIBRARY) {
validateOption(value, true, false, false);
addExternalLibrary(abspath(value.get().toString()), true);
} else if (option == FDBNetworkOptions::TRACE_FILE_IDENTIFIER) {
validateOption(value, true, false, true);
traceFileIdentifier = value.get().toString();
{
MutexHolder holder(lock);
// Forward the option unmodified only to the the local client and let it validate it.
// While for external clients the trace file identifiers are determined in setupNetwork
localClient->api->setNetworkOption(option, value);
}
} else if (option == FDBNetworkOptions::TRACE_SHARE_AMONG_CLIENT_THREADS) {
validateOption(value, false, true);
traceShareBaseNameAmongThreads = true;
} else {
forwardOption = true;
}
@ -2451,9 +2526,13 @@ void MultiVersionApi::setupNetwork() {
// Copy external lib for each thread
if (externalClients.count(filename) == 0) {
externalClients[filename] = {};
for (const auto& tmp : copyExternalLibraryPerThread(path)) {
auto libCopies = copyExternalLibraryPerThread(path);
for (int idx = 0; idx < libCopies.size(); ++idx) {
externalClients[filename].push_back(Reference<ClientInfo>(
new ClientInfo(new DLApi(tmp.first, tmp.second /*unlink on load*/), path, useFutureVersion)));
new ClientInfo(new DLApi(libCopies[idx].first, libCopies[idx].second /*unlink on load*/),
path,
useFutureVersion,
idx)));
}
}
}
@ -2492,20 +2571,30 @@ void MultiVersionApi::setupNetwork() {
if (!bypassMultiClientApi) {
runOnExternalClientsAllThreads([this](Reference<ClientInfo> client) {
TraceEvent("InitializingExternalClient").detail("LibraryPath", client->libPath);
client->api->selectApiVersion(apiVersion);
client->api->selectApiVersion(apiVersion.version());
if (client->useFutureVersion) {
client->api->useFutureProtocolVersion();
}
client->loadVersion();
});
std::string baseTraceFileId;
if (apiVersion.hasTraceFileIdentifier()) {
// TRACE_FILE_IDENTIFIER option is supported since 6.3
baseTraceFileId = traceFileIdentifier.empty() ? format("%d", getpid()) : traceFileIdentifier;
}
MutexHolder holder(lock);
runOnExternalClientsAllThreads([this, transportId](Reference<ClientInfo> client) {
runOnExternalClientsAllThreads([this, transportId, baseTraceFileId](Reference<ClientInfo> client) {
for (auto option : options) {
client->api->setNetworkOption(option.first, option.second.castTo<StringRef>());
}
client->api->setNetworkOption(FDBNetworkOptions::EXTERNAL_CLIENT_TRANSPORT_ID, std::to_string(transportId));
if (!baseTraceFileId.empty()) {
client->api->setNetworkOption(
FDBNetworkOptions::TRACE_FILE_IDENTIFIER,
traceShareBaseNameAmongThreads ? baseTraceFileId : client->getTraceFileIdentifier(baseTraceFileId));
}
client->api->setupNetwork();
});
@ -2544,21 +2633,17 @@ void MultiVersionApi::runNetwork() {
std::vector<THREAD_HANDLE> handles;
if (!bypassMultiClientApi) {
for (int threadNum = 0; threadNum < threadCount; threadNum++) {
runOnExternalClients(threadNum, [&handles, threadNum](Reference<ClientInfo> client) {
if (client->external) {
std::string threadName = format("fdb-%s-%d", client->releaseVersion.c_str(), threadNum);
if (threadName.size() > 15) {
threadName = format("fdb-%s", client->releaseVersion.c_str());
if (threadName.size() > 15) {
threadName = "fdb-external";
}
}
handles.push_back(
g_network->startThread(&runNetworkThread, client.getPtr(), 0, threadName.c_str()));
runOnExternalClientsAllThreads([&handles](Reference<ClientInfo> client) {
ASSERT(client->external);
std::string threadName = format("fdb-%s-%d", client->releaseVersion.c_str(), client->threadIndex);
if (threadName.size() > 15) {
threadName = format("fdb-%s", client->releaseVersion.c_str());
if (threadName.size() > 15) {
threadName = "fdb-external";
}
});
}
}
handles.push_back(g_network->startThread(&runNetworkThread, client.getPtr(), 0, threadName.c_str()));
});
}
localClient->api->runNetwork();
@ -2673,7 +2758,7 @@ ACTOR Future<std::string> updateClusterSharedStateMapImpl(MultiVersionApi* self,
// The cluster ID will be the connection record string (either a filename or the connection string itself)
// in API versions before we could read the cluster ID.
state std::string clusterId = connectionRecord.toString();
if (MultiVersionApi::apiVersionAtLeast(720)) {
if (MultiVersionApi::api->getApiVersion().hasCreateDBFromConnString()) {
state Reference<ITransaction> tr = db->createTransaction();
loop {
try {
@ -2848,7 +2933,7 @@ void MultiVersionApi::loadEnvironmentVariableNetworkOptions() {
MultiVersionApi::MultiVersionApi()
: callbackOnMainThread(true), localClientDisabled(false), networkStartSetup(false), networkSetup(false),
bypassMultiClientApi(false), externalClient(false), apiVersion(0), threadCount(0), tmpDir("/tmp"),
envOptionsLoaded(false) {}
traceShareBaseNameAmongThreads(false), envOptionsLoaded(false) {}
MultiVersionApi* MultiVersionApi::api = new MultiVersionApi();
@ -2885,6 +2970,12 @@ bool ClientInfo::canReplace(Reference<ClientInfo> other) const {
return !protocolVersion.isCompatible(other->protocolVersion);
}
std::string ClientInfo::getTraceFileIdentifier(const std::string& baseIdentifier) {
std::string versionStr = releaseVersion;
std::replace(versionStr.begin(), versionStr.end(), '.', '_');
return format("%s_v%st%d", baseIdentifier.c_str(), versionStr.c_str(), threadIndex);
}
// UNIT TESTS
TEST_CASE("/fdbclient/multiversionclient/EnvironmentVariableParsing") {
auto vals = parseOptionValues("a");

View File

@ -1273,7 +1273,7 @@ void DatabaseContext::registerSpecialKeysImpl(SpecialKeySpace::MODULE module,
std::unique_ptr<SpecialKeyRangeReadImpl>&& impl,
int deprecatedVersion) {
// if deprecated, add the implementation when the api version is less than the deprecated version
if (deprecatedVersion == -1 || apiVersion < deprecatedVersion) {
if (deprecatedVersion == -1 || apiVersion.version() < deprecatedVersion) {
specialKeySpace->registerKeyRange(module, type, impl->getKeyRange(), impl.get());
specialKeySpaceModules.push_back(std::move(impl));
}
@ -1426,7 +1426,7 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<IClusterConnection
EnableLocalityLoadBalance enableLocalityLoadBalance,
LockAware lockAware,
IsInternal internal,
int apiVersion,
int _apiVersion,
IsSwitchable switchable,
Optional<TenantName> defaultTenant)
: lockAware(lockAware), switchable(switchable), connectionRecord(connectionRecord), proxyProvisional(false),
@ -1466,7 +1466,7 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<IClusterConnection
bgGranulesPerRequest(1000), outstandingWatches(0), sharedStatePtr(nullptr), lastGrvTime(0.0), cachedReadVersion(0),
lastRkBatchThrottleTime(0.0), lastRkDefaultThrottleTime(0.0), lastProxyRequestTime(0.0),
transactionTracingSample(false), taskID(taskID), clientInfo(clientInfo), clientInfoMonitor(clientInfoMonitor),
coordinator(coordinator), apiVersion(apiVersion), mvCacheInsertLocation(0), healthMetricsLastUpdated(0),
coordinator(coordinator), apiVersion(_apiVersion), mvCacheInsertLocation(0), healthMetricsLastUpdated(0),
detailedHealthMetricsLastUpdated(0), smoothMidShardSize(CLIENT_KNOBS->SHARD_STAT_SMOOTH_AMOUNT),
specialKeySpace(std::make_unique<SpecialKeySpace>(specialKeys.begin, specialKeys.end, /* test */ false)),
connectToDatabaseEventCacheHolder(format("ConnectToDatabase/%s", dbId.toString().c_str())) {
@ -1482,7 +1482,7 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<IClusterConnection
metadataVersionCache.resize(CLIENT_KNOBS->METADATA_VERSION_CACHE_SIZE);
maxOutstandingWatches = CLIENT_KNOBS->DEFAULT_MAX_OUTSTANDING_WATCHES;
snapshotRywEnabled = apiVersionAtLeast(300) ? 1 : 0;
snapshotRywEnabled = apiVersion.hasSnapshotRYW() ? 1 : 0;
logger = databaseLogger(this) && tssLogger(this);
locationCacheSize = g_network->isSimulated() ? CLIENT_KNOBS->LOCATION_CACHE_EVICTION_SIZE_SIM
@ -1501,7 +1501,7 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<IClusterConnection
smoothMidShardSize.reset(CLIENT_KNOBS->INIT_MID_SHARD_BYTES);
globalConfig = std::make_unique<GlobalConfig>(this);
if (apiVersionAtLeast(720)) {
if (apiVersion.hasTenantsV2()) {
registerSpecialKeysImpl(
SpecialKeySpace::MODULE::CLUSTERID,
SpecialKeySpace::IMPLTYPE::READONLY,
@ -1521,14 +1521,13 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<IClusterConnection
SpecialKeySpace::MODULE::MANAGEMENT,
SpecialKeySpace::IMPLTYPE::READWRITE,
std::make_unique<TenantRangeImpl<true>>(SpecialKeySpace::getManagementApiCommandRange("tenant")));
}
if (apiVersionAtLeast(710) && !apiVersionAtLeast(720)) {
} else if (apiVersion.hasTenantsV1()) {
registerSpecialKeysImpl(
SpecialKeySpace::MODULE::MANAGEMENT,
SpecialKeySpace::IMPLTYPE::READWRITE,
std::make_unique<TenantRangeImpl<false>>(SpecialKeySpace::getManagementApiCommandRange("tenantmap")));
}
if (apiVersionAtLeast(700)) {
if (apiVersion.version() >= 700) {
registerSpecialKeysImpl(SpecialKeySpace::MODULE::ERRORMSG,
SpecialKeySpace::IMPLTYPE::READONLY,
std::make_unique<SingleSpecialKeyImpl>(
@ -1651,7 +1650,7 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<IClusterConnection
std::make_unique<ActorProfilerConf>(
SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::ACTOR_PROFILER_CONF)));
}
if (apiVersionAtLeast(630)) {
if (apiVersion.version() >= 630) {
registerSpecialKeysImpl(SpecialKeySpace::MODULE::TRANSACTION,
SpecialKeySpace::IMPLTYPE::READONLY,
std::make_unique<ConflictingKeysImpl>(conflictingKeysRange));
@ -4981,7 +4980,7 @@ ACTOR Future<Void> getRangeStreamFragment(Reference<TransactionState> trState,
throw;
}
if (e.code() == error_code_wrong_shard_server || e.code() == error_code_all_alternatives_failed ||
e.code() == error_code_connection_failed) {
e.code() == error_code_connection_failed || e.code() == error_code_request_maybe_delivered) {
const KeyRangeRef& range = locations[shard].range;
if (reverse)
@ -5542,7 +5541,7 @@ Future<RangeResult> Transaction::getRange(const KeySelector& begin,
// A method for streaming data from the storage server that is more efficient than getRange when reading large amounts
// of data
Future<Void> Transaction::getRangeStream(const PromiseStream<RangeResult>& results,
Future<Void> Transaction::getRangeStream(PromiseStream<RangeResult>& results,
const KeySelector& begin,
const KeySelector& end,
GetRangeLimits limits,
@ -5581,7 +5580,7 @@ Future<Void> Transaction::getRangeStream(const PromiseStream<RangeResult>& resul
::getRangeStream(trState, results, getReadVersion(), b, e, limits, conflictRange, snapshot, reverse), results);
}
Future<Void> Transaction::getRangeStream(const PromiseStream<RangeResult>& results,
Future<Void> Transaction::getRangeStream(PromiseStream<RangeResult>& results,
const KeySelector& begin,
const KeySelector& end,
int limit,
@ -8024,13 +8023,13 @@ Future<Standalone<VectorRef<BlobGranuleChunkRef>>> Transaction::readBlobGranules
ACTOR Future<Standalone<VectorRef<BlobGranuleSummaryRef>>> summarizeBlobGranulesActor(Transaction* self,
KeyRange range,
Version summaryVersion,
Optional<Version> summaryVersion,
int rangeLimit) {
state Version readVersionOut;
Standalone<VectorRef<BlobGranuleChunkRef>> chunks =
wait(readBlobGranulesActor(self, range, 0, summaryVersion, &readVersionOut, rangeLimit, true));
ASSERT(chunks.size() <= rangeLimit);
ASSERT(readVersionOut == summaryVersion);
ASSERT(!summaryVersion.present() || readVersionOut == summaryVersion.get());
Standalone<VectorRef<BlobGranuleSummaryRef>> summaries;
summaries.reserve(summaries.arena(), chunks.size());
for (auto& it : chunks) {
@ -8040,9 +8039,8 @@ ACTOR Future<Standalone<VectorRef<BlobGranuleSummaryRef>>> summarizeBlobGranules
return summaries;
}
Future<Standalone<VectorRef<BlobGranuleSummaryRef>>> Transaction::summarizeBlobGranules(const KeyRange& range,
Version summaryVersion,
int rangeLimit) {
Future<Standalone<VectorRef<BlobGranuleSummaryRef>>>
Transaction::summarizeBlobGranules(const KeyRange& range, Optional<Version> summaryVersion, int rangeLimit) {
return summarizeBlobGranulesActor(this, range, summaryVersion, rangeLimit);
}
@ -9543,6 +9541,10 @@ ACTOR Future<Void> getChangeFeedStreamActor(Reference<DatabaseContext> db,
if (useIdx >= 0) {
chosenLocations[loc] = useIdx;
loc++;
if (g_network->isSimulated() && !g_simulator.speedUpSimulation && BUGGIFY_WITH_PROB(0.01)) {
// simulate as if we had to wait for all alternatives delayed, before the next one
wait(delay(deterministicRandom()->random01()));
}
continue;
}
@ -9604,7 +9606,8 @@ ACTOR Future<Void> getChangeFeedStreamActor(Reference<DatabaseContext> db,
if (e.code() == error_code_wrong_shard_server || e.code() == error_code_all_alternatives_failed ||
e.code() == error_code_connection_failed || e.code() == error_code_unknown_change_feed ||
e.code() == error_code_broken_promise || e.code() == error_code_future_version) {
e.code() == error_code_broken_promise || e.code() == error_code_future_version ||
e.code() == error_code_request_maybe_delivered) {
db->changeFeedCache.erase(rangeID);
cx->invalidateCache(Key(), keys);
if (begin == lastBeginVersion) {

View File

@ -1823,6 +1823,25 @@ Future<Standalone<VectorRef<BlobGranuleChunkRef>>> ReadYourWritesTransaction::re
return waitOrError(tr.readBlobGranules(range, begin, readVersion, readVersionOut), resetPromise.getFuture());
}
Future<Standalone<VectorRef<BlobGranuleSummaryRef>>> ReadYourWritesTransaction::summarizeBlobGranules(
const KeyRange& range,
Optional<Version> summaryVersion,
int rangeLimit) {
if (checkUsedDuringCommit()) {
return used_during_commit();
}
if (resetPromise.isSet())
return resetPromise.getFuture().getError();
KeyRef maxKey = getMaxReadKey();
if (range.begin > maxKey || range.end > maxKey)
return key_outside_legal_range();
return waitOrError(tr.summarizeBlobGranules(range, summaryVersion, rangeLimit), resetPromise.getFuture());
}
void ReadYourWritesTransaction::addReadConflictRange(KeyRangeRef const& keys) {
if (checkUsedDuringCommit()) {
throw used_during_commit();

View File

@ -848,6 +848,11 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
"disabled",
"optional_experimental",
"required_experimental"
]},
"encryption_at_rest_mode": {
"$enum":[
"disabled",
"aes_256_ctr"
]}
},
"data":{

View File

@ -775,7 +775,6 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( MAX_PARALLEL_QUICK_GET_VALUE, 50 ); if ( randomize && BUGGIFY ) MAX_PARALLEL_QUICK_GET_VALUE = deterministicRandom()->randomInt(1, 100);
init( QUICK_GET_KEY_VALUES_LIMIT, 2000 );
init( QUICK_GET_KEY_VALUES_LIMIT_BYTES, 1e7 );
init( STORAGE_SERVER_SHARD_AWARE, true );
//Wait Failure
init( MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS, 250 ); if( randomize && BUGGIFY ) MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS = 2;

View File

@ -852,6 +852,8 @@ const KeyRef perpetualStorageWiggleStatsPrefix(
const KeyRef triggerDDTeamInfoPrintKey(LiteralStringRef("\xff/triggerDDTeamInfoPrint"));
const KeyRef encryptionAtRestModeConfKey(LiteralStringRef("\xff/conf/encryption_at_rest_mode"));
const KeyRangeRef excludedServersKeys(LiteralStringRef("\xff/conf/excluded/"), LiteralStringRef("\xff/conf/excluded0"));
const KeyRef excludedServersPrefix = excludedServersKeys.begin;
const KeyRef excludedServersVersionKey = LiteralStringRef("\xff/conf/excluded");

View File

@ -22,6 +22,7 @@
#include "fdbclient/SystemData.h"
#include "fdbclient/Tenant.h"
#include "libb64/encode.h"
#include "flow/ApiVersion.h"
#include "flow/UnitTest.h"
Key TenantMapEntry::idToPrefix(int64_t id) {
@ -127,7 +128,7 @@ std::string TenantMapEntry::toJson(int apiVersion) const {
tenantEntry["id"] = id;
tenantEntry["encrypted"] = encrypted;
if (apiVersion >= 720 || apiVersion == Database::API_VERSION_LATEST) {
if (apiVersion >= ApiVersion::withTenantsV2().version()) {
json_spirit::mObject prefixObject;
std::string encodedPrefix = base64::encoder::from_string(prefix.toString());
// Remove trailing newline

View File

@ -418,6 +418,7 @@ ThreadFuture<Standalone<VectorRef<BlobGranuleChunkRef>>> ThreadSafeTransaction::
return tr->readBlobGranules(r, beginVersion, readVersion, readVersionOut);
});
}
ThreadResult<RangeResult> ThreadSafeTransaction::readBlobGranulesFinish(
ThreadFuture<Standalone<VectorRef<BlobGranuleChunkRef>>> startFuture,
const KeyRangeRef& keyRange,
@ -429,6 +430,19 @@ ThreadResult<RangeResult> ThreadSafeTransaction::readBlobGranulesFinish(
return loadAndMaterializeBlobGranules(files, keyRange, beginVersion, readVersion, granuleContext);
}
ThreadFuture<Standalone<VectorRef<BlobGranuleSummaryRef>>> ThreadSafeTransaction::summarizeBlobGranules(
const KeyRangeRef& keyRange,
Optional<Version> summaryVersion,
int rangeLimit) {
ISingleThreadTransaction* tr = this->tr;
KeyRange r = keyRange;
return onMainThread([=]() -> Future<Standalone<VectorRef<BlobGranuleSummaryRef>>> {
tr->checkDeferredError();
return tr->summarizeBlobGranules(r, summaryVersion, rangeLimit);
});
}
void ThreadSafeTransaction::addReadConflictRange(const KeyRangeRef& keys) {
KeyRange r = keys;
@ -601,7 +615,7 @@ extern const char* getSourceVersion();
ThreadSafeApi::ThreadSafeApi() : apiVersion(-1), transportId(0) {}
void ThreadSafeApi::selectApiVersion(int apiVersion) {
this->apiVersion = apiVersion;
this->apiVersion = ApiVersion(apiVersion);
}
const char* ThreadSafeApi::getClientVersion() {
@ -673,12 +687,12 @@ void ThreadSafeApi::stopNetwork() {
Reference<IDatabase> ThreadSafeApi::createDatabase(const char* clusterFilePath) {
return Reference<IDatabase>(
new ThreadSafeDatabase(ThreadSafeDatabase::ConnectionRecordType::FILE, clusterFilePath, apiVersion));
new ThreadSafeDatabase(ThreadSafeDatabase::ConnectionRecordType::FILE, clusterFilePath, apiVersion.version()));
}
Reference<IDatabase> ThreadSafeApi::createDatabaseFromConnectionString(const char* connectionString) {
return Reference<IDatabase>(new ThreadSafeDatabase(
ThreadSafeDatabase::ConnectionRecordType::CONNECTION_STRING, connectionString, apiVersion));
ThreadSafeDatabase::ConnectionRecordType::CONNECTION_STRING, connectionString, apiVersion.version()));
}
void ThreadSafeApi::addNetworkThreadCompletionHook(void (*hook)(void*), void* hookParameter) {

View File

@ -284,6 +284,7 @@ public:
int METACLUSTER_ASSIGNMENT_CLUSTERS_TO_CHECK;
double METACLUSTER_ASSIGNMENT_FIRST_CHOICE_DELAY;
double METACLUSTER_ASSIGNMENT_AVAILABILITY_TIMEOUT;
int TENANT_ENTRY_CACHE_LIST_REFRESH_INTERVAL; // How often the TenantEntryCache is refreshed
ClientKnobs(Randomize randomize);
void initialize(Randomize randomize);

View File

@ -124,7 +124,7 @@ struct ConfigTransactionCommitRequest {
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, arena, generation, mutations, annotation, reply);
serializer(ar, generation, mutations, annotation, reply, arena);
}
};

View File

@ -256,6 +256,8 @@ struct DatabaseConfiguration {
bool blobGranulesEnabled;
TenantMode tenantMode;
EncryptionAtRestMode encryptionAtRestMode;
// Excluded servers (no state should be here)
bool isExcludedServer(NetworkAddressList) const;
bool isExcludedLocality(const LocalityData& locality) const;

View File

@ -21,6 +21,7 @@
#ifndef DatabaseContext_h
#define DatabaseContext_h
#include "fdbclient/Notified.h"
#include "flow/ApiVersion.h"
#include "flow/FastAlloc.h"
#include "flow/FastRef.h"
#include "fdbclient/GlobalConfig.actor.h"
@ -237,7 +238,7 @@ public:
EnableLocalityLoadBalance,
TaskPriority taskID = TaskPriority::DefaultEndpoint,
LockAware = LockAware::False,
int apiVersion = Database::API_VERSION_LATEST,
int _apiVersion = ApiVersion::LATEST_VERSION,
IsSwitchable = IsSwitchable::False);
~DatabaseContext();
@ -253,7 +254,7 @@ public:
enableLocalityLoadBalance,
lockAware,
internal,
apiVersion,
apiVersion.version(),
switchable,
defaultTenant));
cx->globalConfig->init(Reference<AsyncVar<ClientDBInfo> const>(cx->clientInfo),
@ -344,7 +345,7 @@ public:
}
}
int apiVersionAtLeast(int minVersion) const { return apiVersion < 0 || apiVersion >= minVersion; }
int apiVersionAtLeast(int minVersion) const { return apiVersion.version() >= minVersion; }
Future<Void> onConnected(); // Returns after a majority of coordination servers are available and have reported a
// leader. The cluster file therefore is valid, but the database might be unavailable.
@ -402,7 +403,7 @@ public:
EnableLocalityLoadBalance,
LockAware,
IsInternal = IsInternal::True,
int apiVersion = Database::API_VERSION_LATEST,
int _apiVersion = ApiVersion::LATEST_VERSION,
IsSwitchable = IsSwitchable::False,
Optional<TenantName> defaultTenant = Optional<TenantName>());
@ -595,7 +596,7 @@ public:
Future<Void> statusLeaderMon;
double lastStatusFetch;
int apiVersion;
ApiVersion apiVersion;
int mvCacheInsertLocation;
std::vector<std::pair<Version, Optional<Value>>> metadataVersionCache;

View File

@ -144,7 +144,7 @@ struct EKPGetBaseCipherKeysRequestInfo {
EncryptCipherBaseKeyId baseCipherId;
// Encryption domain name - ancillairy metadata information, an encryption key should be uniquely identified by
// {domainId, cipherBaseId} tuple
EncryptCipherDomainName domainName;
EncryptCipherDomainNameRef domainName;
EKPGetBaseCipherKeysRequestInfo()
: domainId(ENCRYPT_INVALID_DOMAIN_ID), baseCipherId(ENCRYPT_INVALID_CIPHER_KEY_ID) {}
@ -176,7 +176,7 @@ struct EKPGetBaseCipherKeysByIdsRequest {
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, arena, baseCipherInfos, debugId, reply);
serializer(ar, baseCipherInfos, debugId, reply, arena);
}
};
@ -193,7 +193,7 @@ struct EKPGetLatestBaseCipherKeysReply {
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, arena, baseCipherDetails, numHits, error);
serializer(ar, baseCipherDetails, numHits, error, arena);
}
};
@ -203,7 +203,7 @@ struct EKPGetLatestCipherKeysRequestInfo {
EncryptCipherDomainId domainId;
// Encryption domain name - ancillairy metadata information, an encryption key should be uniquely identified by
// {domainId, cipherBaseId} tuple
EncryptCipherDomainName domainName;
EncryptCipherDomainNameRef domainName;
EKPGetLatestCipherKeysRequestInfo() : domainId(ENCRYPT_INVALID_DOMAIN_ID) {}
EKPGetLatestCipherKeysRequestInfo(const EncryptCipherDomainId dId, StringRef name, Arena& arena)
@ -239,7 +239,7 @@ struct EKPGetLatestBaseCipherKeysRequest {
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, arena, encryptDomainInfos, debugId, reply);
serializer(ar, encryptDomainInfos, debugId, reply, arena);
}
};

View File

@ -1408,6 +1408,55 @@ struct TenantMode {
uint32_t mode;
};
struct EncryptionAtRestMode {
// These enumerated values are stored in the database configuration, so can NEVER be changed. Only add new ones
// just before END.
enum Mode { DISABLED = 0, AES_256_CTR = 1, END = 2 };
EncryptionAtRestMode() : mode(DISABLED) {}
EncryptionAtRestMode(Mode mode) : mode(mode) {
if ((uint32_t)mode >= END) {
this->mode = DISABLED;
}
}
operator Mode() const { return Mode(mode); }
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, mode);
}
std::string toString() const {
switch (mode) {
case DISABLED:
return "disabled";
case AES_256_CTR:
return "aes_256_ctr";
default:
ASSERT(false);
}
return "";
}
Value toValue() const { return ValueRef(format("%d", (int)mode)); }
static EncryptionAtRestMode fromValue(Optional<ValueRef> val) {
if (!val.present()) {
return DISABLED;
}
// A failed parsing returns 0 (DISABLED)
int num = atoi(val.get().toString().c_str());
if (num < 0 || num >= END) {
return DISABLED;
}
return static_cast<Mode>(num);
}
uint32_t mode;
};
typedef StringRef ClusterNameRef;
typedef Standalone<ClusterNameRef> ClusterName;

View File

@ -70,7 +70,8 @@ enum class ConfigurationResult {
SUCCESS_WARN_SHARDED_ROCKSDB_EXPERIMENTAL,
DATABASE_CREATED_WARN_ROCKSDB_EXPERIMENTAL,
DATABASE_CREATED_WARN_SHARDED_ROCKSDB_EXPERIMENTAL,
DATABASE_IS_REGISTERED
DATABASE_IS_REGISTERED,
ENCRYPTION_AT_REST_MODE_ALREADY_SET
};
enum class CoordinatorsResult {
@ -274,6 +275,9 @@ Future<ConfigurationResult> changeConfig(Reference<DB> db, std::map<std::string,
if (!isCompleteConfiguration(m)) {
return ConfigurationResult::INCOMPLETE_CONFIGURATION;
}
} else if (m.count(encryptionAtRestModeConfKey.toString()) != 0) {
// Encryption data at-rest mode can be set only at the time of database creation
return ConfigurationResult::ENCRYPTION_AT_REST_MODE_ALREADY_SET;
}
state Future<Void> tooLong = delay(60);

View File

@ -88,7 +88,7 @@ Future<EKPGetLatestBaseCipherKeysReply> getUncachedLatestEncryptCipherKeys(Refer
ACTOR template <class T>
Future<std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>>> getLatestEncryptCipherKeys(
Reference<AsyncVar<T> const> db,
std::unordered_map<EncryptCipherDomainId, EncryptCipherDomainName> domains) {
std::unordered_map<EncryptCipherDomainId, EncryptCipherDomainNameRef> domains) {
state Reference<BlobCipherKeyCache> cipherKeyCache = BlobCipherKeyCache::getInstance();
state std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>> cipherKeys;
state EKPGetLatestBaseCipherKeysRequest request;
@ -253,8 +253,8 @@ struct TextAndHeaderCipherKeys {
ACTOR template <class T>
Future<TextAndHeaderCipherKeys> getLatestEncryptCipherKeysForDomain(Reference<AsyncVar<T> const> db,
EncryptCipherDomainId domainId,
EncryptCipherDomainName domainName) {
std::unordered_map<EncryptCipherDomainId, EncryptCipherDomainName> domains;
EncryptCipherDomainNameRef domainName) {
std::unordered_map<EncryptCipherDomainId, EncryptCipherDomainNameRef> domains;
domains[domainId] = domainName;
domains[ENCRYPT_HEADER_DOMAIN_ID] = FDB_DEFAULT_ENCRYPT_DOMAIN_NAME;
std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>> cipherKeys =

View File

@ -100,6 +100,9 @@ public:
Version readVersion,
ReadBlobGranuleContext granuleContext) = 0;
virtual ThreadFuture<Standalone<VectorRef<BlobGranuleSummaryRef>>>
summarizeBlobGranules(const KeyRangeRef& keyRange, Optional<Version> summaryVersion, int rangeLimit) = 0;
virtual void atomicOp(const KeyRef& key, const ValueRef& value, uint32_t operationType) = 0;
virtual void set(const KeyRef& key, const ValueRef& value) = 0;
virtual void clear(const KeyRef& begin, const KeyRef& end) = 0;

View File

@ -64,6 +64,11 @@ public:
Version* readVersionOut) override {
throw client_invalid_operation();
}
Future<Standalone<VectorRef<BlobGranuleSummaryRef>>> summarizeBlobGranules(KeyRange const& range,
Optional<Version> readVersion,
int rangeLimit) override {
throw client_invalid_operation();
}
Future<int64_t> getEstimatedRangeSizeBytes(KeyRange const& keys) override { throw client_invalid_operation(); }
void addReadConflictRange(KeyRangeRef const& keys) override { throw client_invalid_operation(); }
void makeSelfConflicting() override { throw client_invalid_operation(); }

View File

@ -85,6 +85,9 @@ public:
Version begin,
Optional<Version> readVersion,
Version* readVersionOut = nullptr) = 0;
virtual Future<Standalone<VectorRef<BlobGranuleSummaryRef>>> summarizeBlobGranules(KeyRange const& range,
Optional<Version> summaryVersion,
int rangeLimit) = 0;
virtual void addReadConflictRange(KeyRangeRef const& keys) = 0;
virtual void makeSelfConflicting() = 0;
virtual void atomicOp(KeyRef const& key, ValueRef const& operand, uint32_t operationType) = 0;

View File

@ -26,6 +26,7 @@
#include "fdbclient/FDBOptions.g.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/IClientApi.h"
#include "flow/ApiVersion.h"
#include "flow/ProtocolVersion.h"
#include "flow/ThreadHelper.actor.h"
@ -89,6 +90,14 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
const void* endKey;
int endKeyLength;
} FDBKeyRange;
typedef struct granulesummary {
FDBKeyRange key_range;
int64_t snapshot_version;
int64_t snapshot_size;
int64_t delta_version;
int64_t delta_size;
} FDBGranuleSummary;
#pragma pack(pop)
typedef struct readgranulecontext {
@ -332,6 +341,14 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
int64_t readVersion,
FDBReadBlobGranuleContext* granule_context);
FDBFuture* (*transactionSummarizeBlobGranules)(FDBTransaction* tr,
uint8_t const* begin_key_name,
int begin_key_name_length,
uint8_t const* end_key_name,
int end_key_name_length,
int64_t summaryVersion,
int rangeLimit);
FDBFuture* (*transactionCommit)(FDBTransaction* tr);
fdb_error_t (*transactionGetCommittedVersion)(FDBTransaction* tr, int64_t* outVersion);
FDBFuture* (*transactionGetApproximateSize)(FDBTransaction* tr);
@ -363,6 +380,7 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
FDBMappedKeyValue const** outKVM,
int* outCount,
fdb_bool_t* outMore);
fdb_error_t (*futureGetGranuleSummaryArray)(FDBFuture* f, const FDBGranuleSummary** out_summaries, int* outCount);
fdb_error_t (*futureGetSharedState)(FDBFuture* f, DatabaseSharedState** outPtr);
fdb_error_t (*futureSetCallback)(FDBFuture* f, FDBCallback callback, void* callback_parameter);
void (*futureCancel)(FDBFuture* f);
@ -441,6 +459,10 @@ public:
Version readVersion,
ReadBlobGranuleContext granuleContext) override;
ThreadFuture<Standalone<VectorRef<BlobGranuleSummaryRef>>> summarizeBlobGranules(const KeyRangeRef& keyRange,
Optional<Version> summaryVersion,
int rangeLimit) override;
void addReadConflictRange(const KeyRangeRef& keys) override;
void atomicOp(const KeyRef& key, const ValueRef& value, uint32_t operationType) override;
@ -658,6 +680,10 @@ public:
Version readVersion,
ReadBlobGranuleContext granuleContext) override;
ThreadFuture<Standalone<VectorRef<BlobGranuleSummaryRef>>> summarizeBlobGranules(const KeyRangeRef& keyRange,
Optional<Version> summaryVersion,
int rangeLimit) override;
void atomicOp(const KeyRef& key, const ValueRef& value, uint32_t operationType) override;
void set(const KeyRef& key, const ValueRef& value) override;
void clear(const KeyRef& begin, const KeyRef& end) override;
@ -750,17 +776,22 @@ struct ClientInfo : ClientDesc, ThreadSafeReferenceCounted<ClientInfo> {
IClientApi* api;
bool failed;
std::atomic_bool initialized;
int threadIndex;
std::vector<std::pair<void (*)(void*), void*>> threadCompletionHooks;
ClientInfo()
: ClientDesc(std::string(), false, false), protocolVersion(0), api(nullptr), failed(true), initialized(false) {}
: ClientDesc(std::string(), false, false), protocolVersion(0), api(nullptr), failed(true), initialized(false),
threadIndex(0) {}
ClientInfo(IClientApi* api)
: ClientDesc("internal", false, false), protocolVersion(0), api(api), failed(false), initialized(false) {}
ClientInfo(IClientApi* api, std::string libPath, bool useFutureVersion)
: ClientDesc(libPath, true, useFutureVersion), protocolVersion(0), api(api), failed(false), initialized(false) {}
: ClientDesc("internal", false, false), protocolVersion(0), api(api), failed(false), initialized(false),
threadIndex(0) {}
ClientInfo(IClientApi* api, std::string libPath, bool useFutureVersion, int threadIndex)
: ClientDesc(libPath, true, useFutureVersion), protocolVersion(0), api(api), failed(false), initialized(false),
threadIndex(threadIndex) {}
void loadVersion();
bool canReplace(Reference<ClientInfo> other) const;
std::string getTraceFileIdentifier(const std::string& baseIdentifier);
};
class MultiVersionApi;
@ -1048,7 +1079,7 @@ public:
};
std::map<std::string, SharedStateInfo> clusterSharedStateMap;
static bool apiVersionAtLeast(int minVersion);
ApiVersion getApiVersion() { return apiVersion; }
private:
MultiVersionApi();
@ -1075,11 +1106,13 @@ private:
volatile bool networkSetup;
volatile bool bypassMultiClientApi;
volatile bool externalClient;
int apiVersion;
ApiVersion apiVersion;
int nextThread = 0;
int threadCount;
std::string tmpDir;
bool traceShareBaseNameAmongThreads;
std::string traceFileIdentifier;
Mutex lock;
std::vector<std::pair<FDBNetworkOptions::Option, Optional<Standalone<StringRef>>>> options;

View File

@ -82,8 +82,6 @@ struct NetworkOptions {
class Database {
public:
enum { API_VERSION_LATEST = -1 };
// Creates a database object that represents a connection to a cluster
// This constructor uses a preallocated DatabaseContext that may have been created
// on another thread
@ -365,19 +363,19 @@ private:
public:
// A method for streaming data from the storage server that is more efficient than getRange when reading large
// amounts of data
[[nodiscard]] Future<Void> getRangeStream(const PromiseStream<Standalone<RangeResultRef>>& results,
[[nodiscard]] Future<Void> getRangeStream(PromiseStream<Standalone<RangeResultRef>>& results,
const KeySelector& begin,
const KeySelector& end,
int limit,
Snapshot = Snapshot::False,
Reverse = Reverse::False);
[[nodiscard]] Future<Void> getRangeStream(const PromiseStream<Standalone<RangeResultRef>>& results,
[[nodiscard]] Future<Void> getRangeStream(PromiseStream<Standalone<RangeResultRef>>& results,
const KeySelector& begin,
const KeySelector& end,
GetRangeLimits limits,
Snapshot = Snapshot::False,
Reverse = Reverse::False);
[[nodiscard]] Future<Void> getRangeStream(const PromiseStream<Standalone<RangeResultRef>>& results,
[[nodiscard]] Future<Void> getRangeStream(PromiseStream<Standalone<RangeResultRef>>& results,
const KeyRange& keys,
int limit,
Snapshot snapshot = Snapshot::False,
@ -389,7 +387,7 @@ public:
snapshot,
reverse);
}
[[nodiscard]] Future<Void> getRangeStream(const PromiseStream<Standalone<RangeResultRef>>& results,
[[nodiscard]] Future<Void> getRangeStream(PromiseStream<Standalone<RangeResultRef>>& results,
const KeyRange& keys,
GetRangeLimits limits,
Snapshot snapshot = Snapshot::False,
@ -422,7 +420,7 @@ public:
Version* readVersionOut = nullptr);
Future<Standalone<VectorRef<BlobGranuleSummaryRef>>> summarizeBlobGranules(const KeyRange& range,
Version summaryVersion,
Optional<Version> summaryVersion,
int rangeLimit);
// If checkWriteConflictRanges is true, existing write conflict ranges will be searched for this key

View File

@ -127,6 +127,10 @@ public:
Optional<Version> readVersion,
Version* readVersionOut) override;
Future<Standalone<VectorRef<BlobGranuleSummaryRef>>> summarizeBlobGranules(const KeyRange& range,
Optional<Version> summaryVersion,
int rangeLimit) override;
void addReadConflictRange(KeyRangeRef const& keys) override;
void makeSelfConflicting() override { tr.makeSelfConflicting(); }

View File

@ -730,7 +730,6 @@ public:
int CHECKPOINT_TRANSFER_BLOCK_BYTES;
int QUICK_GET_KEY_VALUES_LIMIT;
int QUICK_GET_KEY_VALUES_LIMIT_BYTES;
bool STORAGE_SERVER_SHARD_AWARE;
// Wait Failure
int MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS;

View File

@ -273,6 +273,9 @@ extern const KeyRef perpetualStorageWiggleStatsPrefix;
// Change the value of this key to anything and that will trigger detailed data distribution team info log.
extern const KeyRef triggerDDTeamInfoPrintKey;
// Encryption data at-rest config key
extern const KeyRef encryptionAtRestModeConfKey;
// The differences between excluded and failed can be found in "command-line-interface.rst"
// and in the help message of the fdbcli command "exclude".

View File

@ -18,11 +18,11 @@
* limitations under the License.
*/
#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_TENANTENTRYCACHE_ACTOR_G_H)
#define FDBSERVER_TENANTENTRYCACHE_ACTOR_G_H
#include "fdbserver/TenantEntryCache.actor.g.h"
#elif !defined(FDBSERVER_TENANTENTRYCACHE_ACTOR_H)
#define FDBSERVER_TENANTENTRYCACHE_ACTOR_H
#if defined(NO_INTELLISENSE) && !defined(FDBCLIENT_TENANTENTRYCACHE_ACTOR_G_H)
#define FDBCLIENT_TENANTENTRYCACHE_ACTOR_G_H
#include "fdbclient/TenantEntryCache.actor.g.h"
#elif !defined(FDBCLIENT_TENANTENTRYCACHE_ACTOR_H)
#define FDBCLIENT_TENANTENTRYCACHE_ACTOR_H
#pragma once
@ -32,7 +32,7 @@
#include "fdbclient/RunTransaction.actor.h"
#include "fdbclient/Tenant.h"
#include "fdbclient/TenantManagement.actor.h"
#include "fdbserver/Knobs.h"
#include "fdbclient/Knobs.h"
#include "fdbrpc/TenantName.h"
#include "flow/IndexedSet.h"
@ -313,9 +313,9 @@ public:
TenantEntryCacheRefreshReason reason = TenantEntryCacheRefreshReason::PERIODIC_TASK;
if (refreshMode == TenantEntryCacheRefreshMode::PERIODIC_TASK) {
refresher = recurringAsync([&, reason]() { return refresh(reason); },
SERVER_KNOBS->TENANT_CACHE_LIST_REFRESH_INTERVAL, /* interval */
CLIENT_KNOBS->TENANT_ENTRY_CACHE_LIST_REFRESH_INTERVAL, /* interval */
true, /* absoluteIntervalDelay */
SERVER_KNOBS->TENANT_CACHE_LIST_REFRESH_INTERVAL, /* intialDelay */
CLIENT_KNOBS->TENANT_ENTRY_CACHE_LIST_REFRESH_INTERVAL, /* intialDelay */
TaskPriority::Worker);
}
@ -387,4 +387,4 @@ public:
};
#include "flow/unactorcompiler.h"
#endif // FDBSERVER_TENANTENTRYCACHE_ACTOR_H
#endif // FDBCLIENT_TENANTENTRYCACHE_ACTOR_H

View File

@ -76,7 +76,7 @@ private:
wait(TenantAPI::listTenantsTransaction(&ryw->getTransaction(), kr.begin, kr.end, limitsHint.rows));
for (auto tenant : tenants) {
std::string jsonString = tenant.second.toJson(ryw->getDatabase()->apiVersion);
std::string jsonString = tenant.second.toJson(ryw->getDatabase()->apiVersion.version());
ValueRef tenantEntryBytes(results->arena(), jsonString);
results->push_back(results->arena(),
KeyValueRef(withTenantMapPrefix(tenant.first, results->arena()), tenantEntryBytes));

View File

@ -20,6 +20,7 @@
#ifndef FDBCLIENT_THREADSAFETRANSACTION_H
#define FDBCLIENT_THREADSAFETRANSACTION_H
#include "flow/ApiVersion.h"
#include "flow/ProtocolVersion.h"
#pragma once
@ -176,6 +177,10 @@ public:
Version readVersion,
ReadBlobGranuleContext granuleContext) override;
ThreadFuture<Standalone<VectorRef<BlobGranuleSummaryRef>>> summarizeBlobGranules(const KeyRangeRef& keyRange,
Optional<Version> summaryVersion,
int rangeLimit) override;
void addReadConflictRange(const KeyRangeRef& keys) override;
void makeSelfConflicting();
@ -242,7 +247,7 @@ private:
friend IClientApi* getLocalClientAPI();
ThreadSafeApi();
int apiVersion;
ApiVersion apiVersion;
std::string clientVersion;
uint64_t transportId;

View File

@ -50,8 +50,7 @@ struct SpanContext {
SpanContext() : traceID(UID()), spanID(0), m_Flags(TraceFlags::unsampled) {}
SpanContext(UID traceID, uint64_t spanID, TraceFlags flags) : traceID(traceID), spanID(spanID), m_Flags(flags) {}
SpanContext(UID traceID, uint64_t spanID) : traceID(traceID), spanID(spanID), m_Flags(TraceFlags::unsampled) {}
SpanContext(Arena arena, const SpanContext& span)
: traceID(span.traceID), spanID(span.spanID), m_Flags(span.m_Flags) {}
SpanContext(const SpanContext& span) = default;
bool isSampled() const { return (m_Flags & TraceFlags::sampled) == TraceFlags::sampled; }
std::string toString() const { return format("%016llx%016llx%016llx", traceID.first(), traceID.second(), spanID); };
bool isValid() const { return traceID.first() != 0 && traceID.second() != 0 && spanID != 0; }
@ -62,6 +61,9 @@ struct SpanContext {
}
};
template <>
struct flow_ref<SpanContext> : std::false_type {};
// Span
//
// Span is a tracing implementation which, for the most part, complies with the W3C Trace Context specification
@ -155,7 +157,7 @@ public:
// We've determined for initial tracing release, spans with only a location will not be traced.
// Generally these are for background processes, some are called infrequently, while others may be high volume.
// TODO: review and address in subsequent PRs.
Span(const Location& location) : location(location), begin(g_network->now()) {}
explicit Span(const Location& location) : Span(location, SpanContext()) {}
Span(const Span&) = delete;
Span(Span&& o) {

View File

@ -57,6 +57,8 @@ description is not currently required but encouraged.
<Option name="trace_file_identifier" code="36"
paramType="String" paramDescription="The identifier that will be part of all trace file names"
description="Once provided, this string will be used to replace the port/PID in the log file names." />
<Option name="trace_share_among_client_threads" code="37"
description="Use the same base trace file name for all client threads as it did before version 7.2. The current default behavior is to use distinct trace file names for client threads by including their version and thread index." />
<Option name="trace_partial_file_suffix" code="39"
paramType="String" paramDescription="Append this suffix to partially written log files. When a log file is complete, it is renamed to remove the suffix. No separator is added between the file and the suffix. If you want to add a file extension, you should include the separator - e.g. '.tmp' instead of 'tmp' to add the 'tmp' extension."
description="Set file suffix for partially written log files." />

View File

@ -139,7 +139,8 @@ public:
pair_type endPair(endKey, Val());
map.insert(endPair, true, mf(endPair));
}
Val const& operator[](const Key& k) { return rangeContaining(k).value(); }
Val const& operator[](const Key& k) const { return rangeContaining(k).value(); }
Val& operator[](const Key& k) { return rangeContaining(k).value(); }
Ranges ranges() { return Ranges(iterator(map.begin()), iterator(map.lastItem())); }
ConstRanges ranges() const { return ConstRanges(const_iterator(map.begin()), const_iterator(map.lastItem())); }

View File

@ -811,8 +811,13 @@ public:
Future<Void> disc =
makeDependent<T>(IFailureMonitor::failureMonitor()).onDisconnectOrFailure(getEndpoint());
auto& p = getReplyPromiseStream(value);
if (disc.isReady()) {
p.sendError(request_maybe_delivered());
// FIXME: buggify only in simulation/not during speed up simulation?
if (disc.isReady() || BUGGIFY_WITH_PROB(0.01)) {
if (disc.isReady() && IFailureMonitor::failureMonitor().knownUnauthorized(getEndpoint())) {
p.sendError(unauthorized_attempt());
} else {
p.sendError(request_maybe_delivered());
}
} else {
Reference<Peer> peer =
FlowTransport::transport().sendUnreliable(SerializeSource<T>(value), getEndpoint(), true);

View File

@ -1,6 +1,8 @@
if(NOT WIN32)
add_flow_target(EXECUTABLE NAME authz_tls_unittest SRCS AuthzTlsTest.actor.cpp)
target_link_libraries(authz_tls_unittest PRIVATE flow fdbrpc fmt::fmt)
add_test(NAME authorization_tls_unittest
COMMAND $<TARGET_FILE:authz_tls_unittest>)
if(NOT OPEN_FOR_IDE)
add_test(NAME authorization_tls_unittest
COMMAND $<TARGET_FILE:authz_tls_unittest>)
endif()
endif()

View File

@ -357,7 +357,7 @@ ACTOR Future<BlobGranuleCipherKeysCtx> getLatestGranuleCipherKeys(Reference<Blob
ASSERT(tenantData.isValid());
std::unordered_map<EncryptCipherDomainId, EncryptCipherDomainName> domains;
std::unordered_map<EncryptCipherDomainId, EncryptCipherDomainNameRef> domains;
domains.emplace(tenantData->entry.id, StringRef(*arena, tenantData->name));
std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>> domainKeyMap =
wait(getLatestEncryptCipherKeys(bwData->dbInfo, domains));

View File

@ -196,21 +196,9 @@ struct EncryptKeyProxySingleton : Singleton<EncryptKeyProxyInterface> {
}
};
ACTOR Future<Void> handleLeaderReplacement(Reference<ClusterRecoveryData> self, Future<Void> leaderFail) {
loop choose {
when(wait(leaderFail)) {
TraceEvent("LeaderReplaced", self->controllerData->id).log();
// We are no longer the leader if this has changed.
self->controllerData->shouldCommitSuicide = true;
throw restart_cluster_controller();
}
}
}
ACTOR Future<Void> clusterWatchDatabase(ClusterControllerData* cluster,
ClusterControllerData::DBInfo* db,
ServerCoordinators coordinators,
Future<Void> leaderFail,
Future<Void> recoveredDiskFiles) {
state MasterInterface iMaster;
state Reference<ClusterRecoveryData> recoveryData;
@ -307,7 +295,6 @@ ACTOR Future<Void> clusterWatchDatabase(ClusterControllerData* cluster,
TraceEvent(SevDebug, "BackupWorkerDoneRequest", cluster->id).log();
}
when(wait(collection)) { throw internal_error(); }
when(wait(handleLeaderReplacement(recoveryData, leaderFail))) { throw internal_error(); }
}
// failed master (better master exists) could happen while change-coordinators request processing is
// in-progress
@ -2558,8 +2545,8 @@ ACTOR Future<Void> clusterControllerCore(ClusterControllerFullInterface interf,
if (SERVER_KNOBS->ENABLE_ENCRYPTION) {
self.addActor.send(monitorEncryptKeyProxy(&self));
}
self.addActor.send(clusterWatchDatabase(
&self, &self.db, coordinators, leaderFail, recoveredDiskFiles)); // Start the master database
self.addActor.send(
clusterWatchDatabase(&self, &self.db, coordinators, recoveredDiskFiles)); // Start the master database
self.addActor.send(self.updateWorkerList.init(self.db.db));
self.addActor.send(statusServer(interf.clientInterface.databaseStatus.getFuture(),
&self,
@ -2669,6 +2656,12 @@ ACTOR Future<Void> clusterControllerCore(ClusterControllerFullInterface interf,
when(GetServerDBInfoRequest req = waitNext(interf.getServerDBInfo.getFuture())) {
self.addActor.send(clusterGetServerInfo(&self.db, req.knownServerInfoID, req.reply));
}
when(wait(leaderFail)) {
// We are no longer the leader if this has changed.
endRole(Role::CLUSTER_CONTROLLER, interf.id(), "Leader Replaced", true);
CODE_PROBE(true, "Leader replaced");
return Void();
}
when(ReplyPromise<Void> ping = waitNext(interf.clientInterface.ping.getFuture())) { ping.send(Void()); }
}
}

View File

@ -917,11 +917,11 @@ ACTOR Future<Void> getResolution(CommitBatchContext* self) {
// Fetch cipher keys if needed.
state Future<std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>>> getCipherKeys;
if (pProxyCommitData->isEncryptionEnabled) {
static std::unordered_map<EncryptCipherDomainId, EncryptCipherDomainName> defaultDomains = {
static std::unordered_map<EncryptCipherDomainId, EncryptCipherDomainNameRef> defaultDomains = {
{ SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID, FDB_DEFAULT_ENCRYPT_DOMAIN_NAME },
{ ENCRYPT_HEADER_DOMAIN_ID, FDB_DEFAULT_ENCRYPT_DOMAIN_NAME }
};
std::unordered_map<EncryptCipherDomainId, EncryptCipherDomainName> encryptDomains = defaultDomains;
std::unordered_map<EncryptCipherDomainId, EncryptCipherDomainNameRef> encryptDomains = defaultDomains;
for (int t = 0; t < trs.size(); t++) {
TenantInfo const& tenantInfo = trs[t].tenantInfo;
int64_t tenantId = tenantInfo.tenantId;

View File

@ -18,13 +18,17 @@
* limitations under the License.
*/
#include "fdbclient/FDBTypes.h"
#include "fdbclient/StorageServerInterface.h"
#include "fdbrpc/FailureMonitor.h"
#include "fdbclient/SystemData.h"
#include "fdbserver/DataDistribution.actor.h"
#include "fdbserver/DDSharedContext.h"
#include "fdbserver/TenantCache.h"
#include "fdbserver/Knobs.h"
#include "fdbclient/DatabaseContext.h"
#include "flow/ActorCollection.h"
#include "flow/Arena.h"
#include "flow/FastRef.h"
#include "flow/Trace.h"
#include "flow/actorcompiler.h" // This must be the last #include.
@ -121,6 +125,8 @@ struct DataDistributionTracker : public IDDShardTracker {
}
};
Optional<Reference<TenantCache>> ddTenantCache;
DataDistributionTracker(Database cx,
UID distributorId,
Promise<Void> const& readyToStart,
@ -129,12 +135,13 @@ struct DataDistributionTracker : public IDDShardTracker {
Reference<PhysicalShardCollection> physicalShardCollection,
Reference<AsyncVar<bool>> anyZeroHealthyTeams,
KeyRangeMap<ShardTrackedData>* shards,
bool* trackerCancelled)
bool* trackerCancelled,
Optional<Reference<TenantCache>> ddTenantCache)
: IDDShardTracker(), cx(cx), distributorId(distributorId), shards(shards), sizeChanges(false),
systemSizeEstimate(0), dbSizeEstimate(new AsyncVar<int64_t>()), maxShardSize(new AsyncVar<Optional<int64_t>>()),
output(output), shardsAffectedByTeamFailure(shardsAffectedByTeamFailure),
physicalShardCollection(physicalShardCollection), readyToStart(readyToStart),
anyZeroHealthyTeams(anyZeroHealthyTeams), trackerCancelled(trackerCancelled) {}
anyZeroHealthyTeams(anyZeroHealthyTeams), trackerCancelled(trackerCancelled), ddTenantCache(ddTenantCache) {}
~DataDistributionTracker() override {
*trackerCancelled = true;
@ -501,6 +508,375 @@ private:
Promise<Void> cleared;
};
std::string describeSplit(KeyRange keys, Standalone<VectorRef<KeyRef>>& splitKeys) {
std::string s;
s += "[" + keys.begin.toString() + ", " + keys.end.toString() + ") -> ";
for (auto& sk : splitKeys) {
s += sk.printable() + " ";
}
return s;
}
void traceSplit(KeyRange keys, Standalone<VectorRef<KeyRef>>& splitKeys) {
auto s = describeSplit(keys, splitKeys);
TraceEvent(SevInfo, "ExecutingShardSplit").detail("AtKeys", s);
}
void executeShardSplit(DataDistributionTracker* self,
KeyRange keys,
Standalone<VectorRef<KeyRef>> splitKeys,
Reference<AsyncVar<Optional<ShardMetrics>>> shardSize,
bool relocate,
RelocateReason reason) {
int numShards = splitKeys.size() - 1;
ASSERT(numShards > 1);
int skipRange = deterministicRandom()->randomInt(0, numShards);
auto s = describeSplit(keys, splitKeys);
TraceEvent(SevInfo, "ExecutingShardSplit").suppressFor(0.5).detail("Splitting", s).detail("NumShards", numShards);
// The queue can't deal with RelocateShard requests which split an existing shard into three pieces, so
// we have to send the unskipped ranges in this order (nibbling in from the edges of the old range)
for (int i = 0; i < skipRange; i++)
restartShardTrackers(self, KeyRangeRef(splitKeys[i], splitKeys[i + 1]));
restartShardTrackers(self, KeyRangeRef(splitKeys[skipRange], splitKeys[skipRange + 1]));
for (int i = numShards - 1; i > skipRange; i--)
restartShardTrackers(self, KeyRangeRef(splitKeys[i], splitKeys[i + 1]));
for (int i = 0; i < skipRange; i++) {
KeyRangeRef r(splitKeys[i], splitKeys[i + 1]);
self->shardsAffectedByTeamFailure->defineShard(r);
if (relocate) {
self->output.send(RelocateShard(r, DataMovementReason::SPLIT_SHARD, reason));
}
}
for (int i = numShards - 1; i > skipRange; i--) {
KeyRangeRef r(splitKeys[i], splitKeys[i + 1]);
self->shardsAffectedByTeamFailure->defineShard(r);
if (relocate) {
self->output.send(RelocateShard(r, DataMovementReason::SPLIT_SHARD, reason));
}
}
self->sizeChanges.add(changeSizes(self, keys, shardSize->get().get().metrics.bytes));
}
struct RangeToSplit {
RangeMap<Standalone<StringRef>, ShardTrackedData, KeyRangeRef>::iterator shard;
Standalone<VectorRef<KeyRef>> faultLines;
RangeToSplit(RangeMap<Standalone<StringRef>, ShardTrackedData, KeyRangeRef>::iterator shard,
Standalone<VectorRef<KeyRef>> faultLines)
: shard(shard), faultLines(faultLines) {}
};
Standalone<VectorRef<KeyRef>> findShardFaultLines(KeyRef shardBegin,
KeyRef shardEnd,
KeyRef tenantBegin,
KeyRef tenantEnd) {
Standalone<VectorRef<KeyRef>> faultLines;
ASSERT((shardBegin < tenantBegin && shardEnd > tenantBegin) || (shardBegin < tenantEnd && shardEnd > tenantEnd));
faultLines.push_back_deep(faultLines.arena(), shardBegin);
if (shardBegin < tenantBegin && shardEnd > tenantBegin) {
faultLines.push_back_deep(faultLines.arena(), tenantBegin);
}
if (shardBegin < tenantEnd && shardEnd > tenantEnd) {
faultLines.push_back_deep(faultLines.arena(), tenantEnd);
}
faultLines.push_back_deep(faultLines.arena(), shardEnd);
return faultLines;
}
std::vector<RangeToSplit> findTenantShardBoundaries(KeyRangeMap<ShardTrackedData>* shards, KeyRange tenantKeys) {
std::vector<RangeToSplit> result;
auto shardContainingTenantStart = shards->rangeContaining(tenantKeys.begin);
auto shardContainingTenantEnd = shards->rangeContainingKeyBefore(tenantKeys.end);
// same shard
if (shardContainingTenantStart == shardContainingTenantEnd) {
// If shard boundaries are not aligned with tenantKeys
if (shardContainingTenantStart.begin() != tenantKeys.begin ||
shardContainingTenantStart.end() != tenantKeys.end) {
auto startShardSize = shardContainingTenantStart->value().stats;
if (startShardSize->get().present()) {
auto faultLines = findShardFaultLines(shardContainingTenantStart->begin(),
shardContainingTenantStart->end(),
tenantKeys.begin,
tenantKeys.end);
result.emplace_back(shardContainingTenantStart, faultLines);
}
}
} else {
auto startShardSize = shardContainingTenantStart->value().stats;
auto endShardSize = shardContainingTenantEnd->value().stats;
if (startShardSize->get().present() && endShardSize->get().present()) {
if (shardContainingTenantStart->begin() != tenantKeys.begin) {
auto faultLines = findShardFaultLines(shardContainingTenantStart->begin(),
shardContainingTenantStart->end(),
tenantKeys.begin,
tenantKeys.end);
result.emplace_back(shardContainingTenantStart, faultLines);
}
if (shardContainingTenantEnd->end() != tenantKeys.end) {
auto faultLines = findShardFaultLines(shardContainingTenantEnd->begin(),
shardContainingTenantEnd->end(),
tenantKeys.begin,
tenantKeys.end);
result.emplace_back(shardContainingTenantEnd, faultLines);
}
}
}
return result;
}
bool faultLinesMatch(std::vector<RangeToSplit>& ranges, std::vector<std::vector<KeyRef>>& expectedFaultLines) {
if (ranges.size() != expectedFaultLines.size()) {
return false;
}
for (auto& range : ranges) {
KeyRangeRef keys = KeyRangeRef(range.shard->begin(), range.shard->end());
traceSplit(keys, range.faultLines);
}
for (int r = 0; r < ranges.size(); r++) {
if (ranges[r].faultLines.size() != expectedFaultLines[r].size()) {
return false;
}
for (int fl = 0; fl < ranges[r].faultLines.size(); fl++) {
if (ranges[r].faultLines[fl] != expectedFaultLines[r][fl]) {
return false;
}
}
}
return true;
}
TEST_CASE("/DataDistribution/Tenant/SingleShardSplit") {
wait(Future<Void>(Void()));
ShardTrackedData data;
ShardMetrics sm(StorageMetrics(), now(), 1);
data.stats = makeReference<AsyncVar<Optional<ShardMetrics>>>();
KeyRangeMap<ShardTrackedData> shards;
KeyRef begin = "a"_sr, end = "f"_sr;
KeyRangeRef k(begin, end);
shards.insert(k, data);
KeyRangeRef tenantKeys("b"_sr, "c"_sr);
data.stats->set(sm);
std::vector<RangeToSplit> result = findTenantShardBoundaries(&shards, tenantKeys);
std::vector<std::vector<KeyRef>> expectedFaultLines = { { "a"_sr, "b"_sr, "c"_sr, "f"_sr } };
ASSERT(faultLinesMatch(result, expectedFaultLines));
return Void();
}
TEST_CASE("/DataDistribution/Tenant/SingleShardTenantAligned") {
wait(Future<Void>(Void()));
ShardTrackedData data;
ShardMetrics sm(StorageMetrics(), now(), 1);
data.stats = makeReference<AsyncVar<Optional<ShardMetrics>>>();
KeyRangeMap<ShardTrackedData> shards;
KeyRef begin = "a"_sr, end = "f"_sr;
KeyRangeRef k(begin, end);
shards.insert(k, data);
KeyRangeRef tenantKeys("a"_sr, "f"_sr);
data.stats->set(sm);
std::vector<RangeToSplit> result = findTenantShardBoundaries(&shards, tenantKeys);
std::vector<std::vector<KeyRef>> expectedFaultLines = {};
ASSERT(faultLinesMatch(result, expectedFaultLines));
return Void();
}
TEST_CASE("/DataDistribution/Tenant/SingleShardTenantAlignedAtStart") {
wait(Future<Void>(Void()));
ShardTrackedData data;
ShardMetrics sm(StorageMetrics(), now(), 1);
data.stats = makeReference<AsyncVar<Optional<ShardMetrics>>>();
KeyRangeMap<ShardTrackedData> shards;
KeyRef begin = "a"_sr, end = "f"_sr;
KeyRangeRef k(begin, end);
shards.insert(k, data);
KeyRangeRef tenantKeys("a"_sr, "d"_sr);
data.stats->set(sm);
std::vector<RangeToSplit> result = findTenantShardBoundaries(&shards, tenantKeys);
std::vector<std::vector<KeyRef>> expectedFaultLines = { { "a"_sr, "d"_sr, "f"_sr } };
ASSERT(faultLinesMatch(result, expectedFaultLines));
return Void();
}
TEST_CASE("/DataDistribution/Tenant/SingleShardTenantAlignedAtEnd") {
wait(Future<Void>(Void()));
ShardTrackedData data;
ShardMetrics sm(StorageMetrics(), now(), 1);
data.stats = makeReference<AsyncVar<Optional<ShardMetrics>>>();
KeyRangeMap<ShardTrackedData> shards;
KeyRef begin = "a"_sr, end = "f"_sr;
KeyRangeRef k(begin, end);
shards.insert(k, data);
KeyRangeRef tenantKeys("b"_sr, "f"_sr);
data.stats->set(sm);
std::vector<RangeToSplit> result = findTenantShardBoundaries(&shards, tenantKeys);
std::vector<std::vector<KeyRef>> expectedFaultLines = { { "a"_sr, "b"_sr, "f"_sr } };
ASSERT(faultLinesMatch(result, expectedFaultLines));
return Void();
}
TEST_CASE("/DataDistribution/Tenant/DoubleShardSplit") {
wait(Future<Void>(Void()));
ShardTrackedData data1, data2;
ShardMetrics sm(StorageMetrics(), now(), 1);
data1.stats = makeReference<AsyncVar<Optional<ShardMetrics>>>();
data2.stats = makeReference<AsyncVar<Optional<ShardMetrics>>>();
KeyRangeMap<ShardTrackedData> shards;
KeyRef begin1 = "a"_sr, end1 = "c"_sr;
KeyRef begin2 = "d"_sr, end2 = "f"_sr;
KeyRangeRef k1(begin1, end1);
KeyRangeRef k2(begin2, end2);
shards.insert(k1, data1);
shards.insert(k2, data2);
KeyRangeRef tenantKeys("b"_sr, "e"_sr);
data1.stats->set(sm);
data2.stats->set(sm);
std::vector<RangeToSplit> result = findTenantShardBoundaries(&shards, tenantKeys);
for (auto& range : result) {
KeyRangeRef keys = KeyRangeRef(range.shard->begin(), range.shard->end());
traceSplit(keys, range.faultLines);
}
std::vector<std::vector<KeyRef>> expectedFaultLines = { { "a"_sr, "b"_sr, "c"_sr }, { "d"_sr, "e"_sr, "f"_sr } };
ASSERT(faultLinesMatch(result, expectedFaultLines));
return Void();
}
TEST_CASE("/DataDistribution/Tenant/DoubleShardTenantAlignedAtStart") {
wait(Future<Void>(Void()));
ShardTrackedData data1, data2;
ShardMetrics sm(StorageMetrics(), now(), 1);
data1.stats = makeReference<AsyncVar<Optional<ShardMetrics>>>();
data2.stats = makeReference<AsyncVar<Optional<ShardMetrics>>>();
KeyRangeMap<ShardTrackedData> shards;
KeyRef begin1 = "a"_sr, end1 = "c"_sr;
KeyRef begin2 = "d"_sr, end2 = "f"_sr;
KeyRangeRef k1(begin1, end1);
KeyRangeRef k2(begin2, end2);
shards.insert(k1, data1);
shards.insert(k2, data2);
KeyRangeRef tenantKeys("a"_sr, "e"_sr);
data1.stats->set(sm);
data2.stats->set(sm);
std::vector<RangeToSplit> result = findTenantShardBoundaries(&shards, tenantKeys);
std::vector<std::vector<KeyRef>> expectedFaultLines = { { "d"_sr, "e"_sr, "f"_sr } };
ASSERT(faultLinesMatch(result, expectedFaultLines));
return Void();
}
TEST_CASE("/DataDistribution/Tenant/DoubleShardTenantAlignedAtEnd") {
wait(Future<Void>(Void()));
ShardTrackedData data1, data2;
ShardMetrics sm(StorageMetrics(), now(), 1);
data1.stats = makeReference<AsyncVar<Optional<ShardMetrics>>>();
data2.stats = makeReference<AsyncVar<Optional<ShardMetrics>>>();
KeyRangeMap<ShardTrackedData> shards;
KeyRef begin1 = "a"_sr, end1 = "c"_sr;
KeyRef begin2 = "d"_sr, end2 = "f"_sr;
KeyRangeRef k1(begin1, end1);
KeyRangeRef k2(begin2, end2);
shards.insert(k1, data1);
shards.insert(k2, data2);
KeyRangeRef tenantKeys("b"_sr, "f"_sr);
data1.stats->set(sm);
data2.stats->set(sm);
std::vector<RangeToSplit> result = findTenantShardBoundaries(&shards, tenantKeys);
std::vector<std::vector<KeyRef>> expectedFaultLines = { { "a"_sr, "b"_sr, "c"_sr } };
ASSERT(faultLinesMatch(result, expectedFaultLines));
return Void();
}
ACTOR Future<Void> tenantShardSplitter(DataDistributionTracker* self, KeyRange tenantKeys) {
wait(Future<Void>(Void()));
std::vector<RangeToSplit> rangesToSplit = findTenantShardBoundaries(self->shards, tenantKeys);
for (auto& range : rangesToSplit) {
KeyRangeRef keys = KeyRangeRef(range.shard->begin(), range.shard->end());
traceSplit(keys, range.faultLines);
executeShardSplit(self, keys, range.faultLines, range.shard->value().stats, true, RelocateReason::TENANT_SPLIT);
}
return Void();
}
ACTOR Future<Void> tenantCreationHandling(DataDistributionTracker* self, TenantCacheTenantCreated req) {
TraceEvent(SevInfo, "TenantCacheTenantCreated").detail("Begin", req.keys.begin).detail("End", req.keys.end);
wait(tenantShardSplitter(self, req.keys));
req.reply.send(true);
return Void();
}
ACTOR Future<Void> shardSplitter(DataDistributionTracker* self,
KeyRange keys,
Reference<AsyncVar<Optional<ShardMetrics>>> shardSize,
@ -540,27 +916,7 @@ ACTOR Future<Void> shardSplitter(DataDistributionTracker* self,
.detail("NumShards", numShards);
if (numShards > 1) {
int skipRange = deterministicRandom()->randomInt(0, numShards);
// The queue can't deal with RelocateShard requests which split an existing shard into three pieces, so
// we have to send the unskipped ranges in this order (nibbling in from the edges of the old range)
for (int i = 0; i < skipRange; i++)
restartShardTrackers(self, KeyRangeRef(splitKeys[i], splitKeys[i + 1]));
restartShardTrackers(self, KeyRangeRef(splitKeys[skipRange], splitKeys[skipRange + 1]));
for (int i = numShards - 1; i > skipRange; i--)
restartShardTrackers(self, KeyRangeRef(splitKeys[i], splitKeys[i + 1]));
for (int i = 0; i < skipRange; i++) {
KeyRangeRef r(splitKeys[i], splitKeys[i + 1]);
self->shardsAffectedByTeamFailure->defineShard(r);
self->output.send(RelocateShard(r, DataMovementReason::SPLIT_SHARD, reason));
}
for (int i = numShards - 1; i > skipRange; i--) {
KeyRangeRef r(splitKeys[i], splitKeys[i + 1]);
self->shardsAffectedByTeamFailure->defineShard(r);
self->output.send(RelocateShard(r, DataMovementReason::SPLIT_SHARD, reason));
}
self->sizeChanges.add(changeSizes(self, keys, shardSize->get().get().metrics.bytes));
executeShardSplit(self, keys, splitKeys, shardSize, true, reason);
} else {
wait(delay(1.0, TaskPriority::DataDistribution)); // In case the reason the split point was off was due to a
// discrepancy between storage servers
@ -579,6 +935,43 @@ ACTOR Future<Void> brokenPromiseToReady(Future<Void> f) {
return Void();
}
static bool shardMergeFeasible(DataDistributionTracker* self, KeyRange const& keys, KeyRangeRef adjRange) {
bool honorTenantKeyspaceBoundaries = self->ddTenantCache.present();
if (!honorTenantKeyspaceBoundaries) {
return true;
}
Optional<Reference<TCTenantInfo>> tenantOwningRange = {};
Optional<Reference<TCTenantInfo>> tenantOwningAdjRange = {};
tenantOwningRange = self->ddTenantCache.get()->tenantOwning(keys.begin);
tenantOwningAdjRange = self->ddTenantCache.get()->tenantOwning(adjRange.begin);
if ((tenantOwningRange.present() != tenantOwningAdjRange.present()) ||
(tenantOwningRange.present() && (tenantOwningRange != tenantOwningAdjRange))) {
return false;
}
return true;
}
static bool shardForwardMergeFeasible(DataDistributionTracker* self, KeyRange const& keys, KeyRangeRef nextRange) {
if (keys.end == allKeys.end) {
return false;
}
return shardMergeFeasible(self, keys, nextRange);
}
static bool shardBackwardMergeFeasible(DataDistributionTracker* self, KeyRange const& keys, KeyRangeRef prevRange) {
if (keys.begin == allKeys.begin) {
return false;
}
return shardMergeFeasible(self, keys, prevRange);
}
Future<Void> shardMerger(DataDistributionTracker* self,
KeyRange const& keys,
Reference<AsyncVar<Optional<ShardMetrics>>> shardSize) {
@ -594,6 +987,7 @@ Future<Void> shardMerger(DataDistributionTracker* self,
int shardsMerged = 1;
bool forwardComplete = false;
KeyRangeRef merged;
StorageMetrics endingStats = shardSize->get().get().metrics;
int shardCount = shardSize->get().get().shardCount;
double lastLowBandwidthStartTime = shardSize->get().get().lastLowBandwidthStartTime;
@ -614,11 +1008,20 @@ Future<Void> shardMerger(DataDistributionTracker* self,
forwardComplete = true;
continue;
}
++nextIter;
if (!shardForwardMergeFeasible(self, keys, nextIter->range())) {
--nextIter;
forwardComplete = true;
continue;
}
newMetrics = nextIter->value().stats->get();
// If going forward, give up when the next shard's stats are not yet present.
if (!newMetrics.present() || shardCount + newMetrics.get().shardCount >= CLIENT_KNOBS->SHARD_COUNT_LIMIT) {
// If going forward, give up when the next shard's stats are not yet present, or if the
// the shard is already over the merge bounds.
if (!newMetrics.present() || shardCount + newMetrics.get().shardCount >= CLIENT_KNOBS->SHARD_COUNT_LIMIT ||
(endingStats.bytes + newMetrics.get().metrics.bytes > maxShardSize)) {
--nextIter;
forwardComplete = true;
continue;
@ -627,10 +1030,16 @@ Future<Void> shardMerger(DataDistributionTracker* self,
--prevIter;
newMetrics = prevIter->value().stats->get();
if (!shardBackwardMergeFeasible(self, keys, prevIter->range())) {
++prevIter;
break;
}
// If going backward, stop when the stats are not present or if the shard is already over the merge
// bounds. If this check triggers right away (if we have not merged anything) then return a trigger
// on the previous shard changing "size".
if (!newMetrics.present() || shardCount + newMetrics.get().shardCount >= CLIENT_KNOBS->SHARD_COUNT_LIMIT) {
if (!newMetrics.present() || shardCount + newMetrics.get().shardCount >= CLIENT_KNOBS->SHARD_COUNT_LIMIT ||
(endingStats.bytes + newMetrics.get().metrics.bytes > maxShardSize)) {
if (shardsMerged == 1) {
CODE_PROBE(true, "shardMerger cannot merge anything");
return brokenPromiseToReady(prevIter->value().stats->onChange());
@ -651,8 +1060,8 @@ Future<Void> shardMerger(DataDistributionTracker* self,
shardsMerged++;
auto shardBounds = getShardSizeBounds(merged, maxShardSize);
// If we just recently get the current shard's metrics (i.e., less than DD_LOW_BANDWIDTH_DELAY ago), it means
// the shard's metric may not be stable yet. So we cannot continue merging in this direction.
// If we just recently get the current shard's metrics (i.e., less than DD_LOW_BANDWIDTH_DELAY ago), it
// means the shard's metric may not be stable yet. So we cannot continue merging in this direction.
if (endingStats.bytes >= shardBounds.min.bytes || getBandwidthStatus(endingStats) != BandwidthStatusLow ||
now() - lastLowBandwidthStartTime < SERVER_KNOBS->DD_LOW_BANDWIDTH_DELAY ||
shardsMerged >= SERVER_KNOBS->DD_MERGE_LIMIT) {
@ -679,6 +1088,10 @@ Future<Void> shardMerger(DataDistributionTracker* self,
}
}
if (shardsMerged == 1) {
return brokenPromiseToReady(nextIter->value().stats->onChange());
}
// restarting shard tracker will derefenced values in the shard map, so make a copy
KeyRange mergeRange = merged;
@ -686,9 +1099,11 @@ Future<Void> shardMerger(DataDistributionTracker* self,
// NewKeys: New key range after shards are merged;
// EndingSize: The new merged shard size in bytes;
// BatchedMerges: The number of shards merged. Each shard is defined in self->shards;
// LastLowBandwidthStartTime: When does a shard's bandwidth status becomes BandwidthStatusLow. If a shard's status
// LastLowBandwidthStartTime: When does a shard's bandwidth status becomes BandwidthStatusLow. If a shard's
// status
// becomes BandwidthStatusLow less than DD_LOW_BANDWIDTH_DELAY ago, the merging logic will stop at the shard;
// ShardCount: The number of non-splittable shards that are merged. Each shard is defined in self->shards may have
// ShardCount: The number of non-splittable shards that are merged. Each shard is defined in self->shards may
// have
// more than 1 shards.
TraceEvent("RelocateShardMergeMetrics", self->distributorId)
.detail("OldKeys", keys)
@ -721,10 +1136,22 @@ ACTOR Future<Void> shardEvaluator(DataDistributionTracker* self,
ShardSizeBounds shardBounds = getShardSizeBounds(keys, self->maxShardSize->get().get());
StorageMetrics const& stats = shardSize->get().get().metrics;
auto bandwidthStatus = getBandwidthStatus(stats);
bool sizeSplit = stats.bytes > shardBounds.max.bytes,
writeSplit = bandwidthStatus == BandwidthStatusHigh && keys.begin < keyServersKeys.begin;
bool shouldSplit = sizeSplit || writeSplit;
bool shouldMerge = stats.bytes < shardBounds.min.bytes && bandwidthStatus == BandwidthStatusLow;
auto prevIter = self->shards->rangeContaining(keys.begin);
if (keys.begin > allKeys.begin)
--prevIter;
auto nextIter = self->shards->rangeContaining(keys.begin);
if (keys.end < allKeys.end)
++nextIter;
bool shouldMerge = stats.bytes < shardBounds.min.bytes && bandwidthStatus == BandwidthStatusLow &&
(shardForwardMergeFeasible(self, keys, nextIter.range()) ||
shardBackwardMergeFeasible(self, keys, prevIter.range()));
// Every invocation must set this or clear it
if (shouldMerge && !self->anyZeroHealthyTeams->get()) {
@ -793,8 +1220,8 @@ ACTOR Future<Void> shardTracker(DataDistributionTracker::SafeAccessor self,
// Use the current known size to check for (and start) splits and merges.
wait(shardEvaluator(self(), keys, shardSize, wantsToMerge));
// We could have a lot of actors being released from the previous wait at the same time. Immediately calling
// delay(0) mitigates the resulting SlowTask
// We could have a lot of actors being released from the previous wait at the same time. Immediately
// calling delay(0) mitigates the resulting SlowTask
wait(delay(0, TaskPriority::DataDistribution));
}
} catch (Error& e) {
@ -1042,7 +1469,8 @@ ACTOR Future<Void> dataDistributionTracker(Reference<InitialDataDistribution> in
Reference<AsyncVar<bool>> anyZeroHealthyTeams,
UID distributorId,
KeyRangeMap<ShardTrackedData>* shards,
bool* trackerCancelled) {
bool* trackerCancelled,
Optional<Reference<TenantCache>> ddTenantCache) {
state DataDistributionTracker self(cx,
distributorId,
readyToStart,
@ -1051,7 +1479,8 @@ ACTOR Future<Void> dataDistributionTracker(Reference<InitialDataDistribution> in
physicalShardCollection,
anyZeroHealthyTeams,
shards,
trackerCancelled);
trackerCancelled,
ddTenantCache);
state Future<Void> loggingTrigger = Void();
state Future<Void> readHotDetect = readHotDetector(&self);
state Reference<EventCacheHolder> ddTrackerStatsEventHolder = makeReference<EventCacheHolder>("DDTrackerStats");
@ -1059,6 +1488,11 @@ ACTOR Future<Void> dataDistributionTracker(Reference<InitialDataDistribution> in
wait(trackInitialShards(&self, initData));
initData = Reference<InitialDataDistribution>();
state PromiseStream<TenantCacheTenantCreated> tenantCreationSignal;
if (self.ddTenantCache.present()) {
tenantCreationSignal = self.ddTenantCache.get()->tenantCreationSignal;
}
loop choose {
when(Promise<int64_t> req = waitNext(getAverageShardBytes)) { req.send(self.getAverageShardBytes()); }
when(wait(loggingTrigger)) {
@ -1080,6 +1514,11 @@ ACTOR Future<Void> dataDistributionTracker(Reference<InitialDataDistribution> in
self.sizeChanges.add(fetchShardMetricsList(&self, req));
}
when(wait(self.sizeChanges.getResult())) {}
when(TenantCacheTenantCreated newTenant = waitNext(tenantCreationSignal.getFuture())) {
self.sizeChanges.add(tenantCreationHandling(&self, newTenant));
}
when(KeyRange req = waitNext(self.shardsAffectedByTeamFailure->restartShardTracker.getFuture())) {
restartShardTrackers(&self, req);
}

View File

@ -584,12 +584,6 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributor> self,
try {
wait(DataDistributor::init(self));
state Reference<TenantCache> ddTenantCache;
if (ddIsTenantAware) {
ddTenantCache = makeReference<TenantCache>(cx, self->ddId);
wait(ddTenantCache->build(cx));
}
// When/If this assertion fails, Evan owes Ben a pat on the back for his foresight
ASSERT(self->configuration.storageTeamSize > 0);
@ -601,6 +595,12 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributor> self,
state Reference<AsyncVar<bool>> processingWiggle(new AsyncVar<bool>(false));
state Promise<Void> readyToStart;
state Optional<Reference<TenantCache>> ddTenantCache;
if (ddIsTenantAware) {
ddTenantCache = makeReference<TenantCache>(cx, self->ddId);
wait(ddTenantCache.get()->build());
}
self->shardsAffectedByTeamFailure = makeReference<ShardsAffectedByTeamFailure>();
self->physicalShardCollection = makeReference<PhysicalShardCollection>();
wait(self->resumeRelocations());
@ -624,10 +624,6 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributor> self,
} else {
anyZeroHealthyTeams = zeroHealthyTeams[0];
}
if (ddIsTenantAware) {
actors.push_back(reportErrorsExcept(
ddTenantCache->monitorTenantMap(), "DDTenantCacheMonitor", self->ddId, &normalDDQueueErrors()));
}
actors.push_back(self->pollMoveKeysLock());
actors.push_back(reportErrorsExcept(dataDistributionTracker(self->initData,
@ -643,7 +639,8 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributor> self,
anyZeroHealthyTeams,
self->ddId,
&shards,
&trackerCancelled),
&trackerCancelled,
ddTenantCache),
"DDTracker",
self->ddId,
&normalDDQueueErrors()));
@ -673,6 +670,13 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributor> self,
self->ddId,
&normalDDQueueErrors()));
if (ddIsTenantAware) {
actors.push_back(reportErrorsExcept(ddTenantCache.get()->monitorTenantMap(),
"DDTenantCacheMonitor",
self->ddId,
&normalDDQueueErrors()));
}
std::vector<DDTeamCollection*> teamCollectionsPtrs;
primaryTeamCollection = makeReference<DDTeamCollection>(
cx,

View File

@ -141,7 +141,7 @@ CipherKeyValidityTS getCipherKeyValidityTS(Optional<int64_t> refreshInterval, Op
struct EncryptBaseCipherKey {
EncryptCipherDomainId domainId;
Standalone<EncryptCipherDomainName> domainName;
Standalone<EncryptCipherDomainNameRef> domainName;
EncryptCipherBaseKeyId baseCipherId;
Standalone<StringRef> baseCipherKey;
// Timestamp after which the cached CipherKey is eligible for KMS refresh
@ -159,13 +159,13 @@ struct EncryptBaseCipherKey {
EncryptBaseCipherKey() : domainId(0), baseCipherId(0), baseCipherKey(StringRef()), refreshAt(0), expireAt(0) {}
explicit EncryptBaseCipherKey(EncryptCipherDomainId dId,
EncryptCipherDomainName dName,
Standalone<EncryptCipherDomainNameRef> dName,
EncryptCipherBaseKeyId cipherId,
StringRef cipherKey,
Standalone<StringRef> cipherKey,
int64_t refAtTS,
int64_t expAtTS)
: domainId(dId), domainName(Standalone<StringRef>(dName)), baseCipherId(cipherId),
baseCipherKey(Standalone<StringRef>(cipherKey)), refreshAt(refAtTS), expireAt(expAtTS) {}
: domainId(dId), domainName(dName), baseCipherId(cipherId), baseCipherKey(cipherKey), refreshAt(refAtTS),
expireAt(expAtTS) {}
bool isValid() const {
int64_t currTS = (int64_t)now();
@ -244,9 +244,9 @@ public:
}
void insertIntoBaseDomainIdCache(const EncryptCipherDomainId domainId,
EncryptCipherDomainName domainName,
Standalone<EncryptCipherDomainNameRef> domainName,
const EncryptCipherBaseKeyId baseCipherId,
StringRef baseCipherKey,
Standalone<StringRef> baseCipherKey,
int64_t refreshAtTS,
int64_t expireAtTS) {
// Entries in domainId cache are eligible for periodic refreshes to support 'limiting lifetime of encryption
@ -263,9 +263,9 @@ public:
}
void insertIntoBaseCipherIdCache(const EncryptCipherDomainId domainId,
EncryptCipherDomainName domainName,
Standalone<EncryptCipherDomainNameRef> domainName,
const EncryptCipherBaseKeyId baseCipherId,
const StringRef baseCipherKey,
const Standalone<StringRef> baseCipherKey,
int64_t refreshAtTS,
int64_t expireAtTS) {
// Given an cipherKey is immutable, it is OK to NOT expire cached information.

View File

@ -382,7 +382,8 @@ ACTOR Future<Void> serverPeekStreamGetMore(ILogSystem::ServerPeekCursor* self, T
DebugLogTraceEvent(SevDebug, "SPC_GetMoreB_Error", self->randomID)
.errorUnsuppressed(e)
.detail("Tag", self->tag);
if (e.code() == error_code_connection_failed || e.code() == error_code_operation_obsolete) {
if (e.code() == error_code_connection_failed || e.code() == error_code_operation_obsolete ||
e.code() == error_code_request_maybe_delivered) {
// NOTE: delay in order to avoid the endless retry loop block other tasks
self->peekReplyStream.reset();
wait(delay(0));

View File

@ -19,6 +19,7 @@
*/
#include <cmath>
#include "flow/ApiVersion.h"
#include "flow/UnitTest.h"
#include "flow/TDMetric.actor.h"
#include "fdbclient/DatabaseContext.h"
@ -417,7 +418,7 @@ TEST_CASE("/fdbserver/metrics/TraceEvents") {
}
fprintf(stdout, "Using environment variables METRICS_CONNFILE and METRICS_PREFIX.\n");
state Database metricsDb = Database::createDatabase(metricsConnFile, Database::API_VERSION_LATEST);
state Database metricsDb = Database::createDatabase(metricsConnFile, ApiVersion::LATEST_VERSION);
TDMetricCollection::getTDMetrics()->address = LiteralStringRef("0.0.0.0:0");
state Future<Void> metrics = runMetrics(metricsDb, KeyRef(metricsPrefix));
state int64_t x = 0;

View File

@ -768,7 +768,7 @@ ACTOR Future<Void> waitForQuietDatabase(Database cx,
int64_t maxDataDistributionQueueSize = 0,
int64_t maxPoppedVersionLag = 30e6,
int64_t maxVersionOffset = 1e6) {
state QuietDatabaseChecker checker(isBuggifyEnabled(BuggifyType::General) ? 3600.0 : 1000.0);
state QuietDatabaseChecker checker(isBuggifyEnabled(BuggifyType::General) ? 4000.0 : 1000.0);
state Future<Void> reconfig =
reconfigureAfter(cx, 100 + (deterministicRandom()->random01() * 100), dbInfo, "QuietDatabase");
state Future<int64_t> dataInFlight;

View File

@ -276,7 +276,7 @@ ACTOR Future<Void> discoverKmsUrls(Reference<RESTKmsConnectorCtx> ctx, bool refr
void parseKmsResponse(Reference<RESTKmsConnectorCtx> ctx,
Reference<HTTP::Response> resp,
Arena* arena,
VectorRef<EncryptCipherKeyDetails>* outCipherKeyDetails) {
VectorRef<EncryptCipherKeyDetailsRef>* outCipherKeyDetails) {
// Acceptable response payload json format:
//
// response_json_payload {
@ -542,7 +542,7 @@ ACTOR
Future<Void> fetchEncryptionKeys_impl(Reference<RESTKmsConnectorCtx> ctx,
StringRef requestBodyRef,
Arena* arena,
VectorRef<EncryptCipherKeyDetails>* outCipherKeyDetails) {
VectorRef<EncryptCipherKeyDetailsRef>* outCipherKeyDetails) {
state Reference<HTTP::Response> resp;
// Follow 2-phase scheme:
@ -1096,15 +1096,15 @@ void validateKmsUrls(Reference<RESTKmsConnectorCtx> ctx) {
ASSERT_EQ(urlCtx->url.compare(KMS_URL_NAME_TEST), 0);
}
void testGetEncryptKeysByKeyIdsRequestBody(Reference<RESTKmsConnectorCtx> ctx, Arena arena) {
void testGetEncryptKeysByKeyIdsRequestBody(Reference<RESTKmsConnectorCtx> ctx, Arena& arena) {
KmsConnLookupEKsByKeyIdsReq req;
std::unordered_map<EncryptCipherBaseKeyId, EncryptCipherDomainId> keyMap;
const int nKeys = deterministicRandom()->randomInt(7, 8);
for (int i = 1; i < nKeys; i++) {
EncryptCipherDomainId domainId = getRandomDomainId();
EncryptCipherDomainName domainName = domainId < 0
? StringRef(arena, std::string(FDB_DEFAULT_ENCRYPT_DOMAIN_NAME))
: StringRef(arena, std::to_string(domainId));
EncryptCipherDomainNameRef domainName = domainId < 0
? StringRef(arena, std::string(FDB_DEFAULT_ENCRYPT_DOMAIN_NAME))
: StringRef(arena, std::to_string(domainId));
req.encryptKeyInfos.emplace_back_deep(req.arena, domainId, i, domainName);
keyMap[i] = domainId;
}
@ -1121,7 +1121,7 @@ void testGetEncryptKeysByKeyIdsRequestBody(Reference<RESTKmsConnectorCtx> ctx, A
getFakeKmsResponse(requestBodyRef, true, httpResp);
TraceEvent("FetchKeysByKeyIds", ctx->uid).setMaxFieldLength(100000).detail("HttpRespStr", httpResp->content);
VectorRef<EncryptCipherKeyDetails> cipherDetails;
VectorRef<EncryptCipherKeyDetailsRef> cipherDetails;
parseKmsResponse(ctx, httpResp, &arena, &cipherDetails);
ASSERT_EQ(cipherDetails.size(), keyMap.size());
for (const auto& detail : cipherDetails) {
@ -1135,16 +1135,16 @@ void testGetEncryptKeysByKeyIdsRequestBody(Reference<RESTKmsConnectorCtx> ctx, A
}
}
void testGetEncryptKeysByDomainIdsRequestBody(Reference<RESTKmsConnectorCtx> ctx, Arena arena) {
void testGetEncryptKeysByDomainIdsRequestBody(Reference<RESTKmsConnectorCtx> ctx, Arena& arena) {
KmsConnLookupEKsByDomainIdsReq req;
std::unordered_map<EncryptCipherDomainId, KmsConnLookupDomainIdsReqInfo> domainInfoMap;
std::unordered_map<EncryptCipherDomainId, KmsConnLookupDomainIdsReqInfoRef> domainInfoMap;
const int nKeys = deterministicRandom()->randomInt(7, 25);
for (int i = 1; i < nKeys; i++) {
EncryptCipherDomainId domainId = getRandomDomainId();
EncryptCipherDomainName domainName = domainId < 0
? StringRef(arena, std::string(FDB_DEFAULT_ENCRYPT_DOMAIN_NAME))
: StringRef(arena, std::to_string(domainId));
KmsConnLookupDomainIdsReqInfo reqInfo(req.arena, domainId, domainName);
EncryptCipherDomainNameRef domainName = domainId < 0
? StringRef(arena, std::string(FDB_DEFAULT_ENCRYPT_DOMAIN_NAME))
: StringRef(arena, std::to_string(domainId));
KmsConnLookupDomainIdsReqInfoRef reqInfo(req.arena, domainId, domainName);
if (domainInfoMap.insert({ domainId, reqInfo }).second) {
req.encryptDomainInfos.push_back(req.arena, reqInfo);
}
@ -1159,7 +1159,7 @@ void testGetEncryptKeysByDomainIdsRequestBody(Reference<RESTKmsConnectorCtx> ctx
getFakeKmsResponse(jsonReqRef, false, httpResp);
TraceEvent("FetchKeysByDomainIds", ctx->uid).detail("HttpRespStr", httpResp->content);
VectorRef<EncryptCipherKeyDetails> cipherDetails;
VectorRef<EncryptCipherKeyDetailsRef> cipherDetails;
parseKmsResponse(ctx, httpResp, &arena, &cipherDetails);
ASSERT_EQ(domainInfoMap.size(), cipherDetails.size());
for (const auto& detail : cipherDetails) {
@ -1174,7 +1174,7 @@ void testGetEncryptKeysByDomainIdsRequestBody(Reference<RESTKmsConnectorCtx> ctx
void testMissingCipherDetailsTag(Reference<RESTKmsConnectorCtx> ctx) {
Arena arena;
VectorRef<EncryptCipherKeyDetails> cipherDetails;
VectorRef<EncryptCipherKeyDetailsRef> cipherDetails;
rapidjson::Document doc;
doc.SetObject();
@ -1201,7 +1201,7 @@ void testMissingCipherDetailsTag(Reference<RESTKmsConnectorCtx> ctx) {
void testMalformedCipherDetails(Reference<RESTKmsConnectorCtx> ctx) {
Arena arena;
VectorRef<EncryptCipherKeyDetails> cipherDetails;
VectorRef<EncryptCipherKeyDetailsRef> cipherDetails;
rapidjson::Document doc;
doc.SetObject();
@ -1228,7 +1228,7 @@ void testMalformedCipherDetails(Reference<RESTKmsConnectorCtx> ctx) {
void testMalfromedCipherDetailObj(Reference<RESTKmsConnectorCtx> ctx) {
Arena arena;
VectorRef<EncryptCipherKeyDetails> cipherDetails;
VectorRef<EncryptCipherKeyDetailsRef> cipherDetails;
rapidjson::Document doc;
doc.SetObject();
@ -1260,7 +1260,7 @@ void testMalfromedCipherDetailObj(Reference<RESTKmsConnectorCtx> ctx) {
void testKMSErrorResponse(Reference<RESTKmsConnectorCtx> ctx) {
Arena arena;
VectorRef<EncryptCipherKeyDetails> cipherDetails;
VectorRef<EncryptCipherKeyDetailsRef> cipherDetails;
rapidjson::Document doc;
doc.SetObject();

View File

@ -31,6 +31,7 @@
#include "fdbclient/ManagementAPI.actor.h"
#include "fdbclient/MutationList.h"
#include "fdbclient/BackupContainer.h"
#include "flow/ApiVersion.h"
#include "flow/IAsyncFile.h"
#include "fdbrpc/simulator.h"
#include "flow/genericactors.actor.h"
@ -410,7 +411,7 @@ ACTOR Future<Void> restoreWorker(Reference<IClusterConnectionRecord> connRecord,
LocalityData locality,
std::string coordFolder) {
try {
Database cx = Database::createDatabase(connRecord, Database::API_VERSION_LATEST, IsInternal::True, locality);
Database cx = Database::createDatabase(connRecord, ApiVersion::LATEST_VERSION, IsInternal::True, locality);
wait(reportErrors(_restoreWorker(cx, locality), "RestoreWorker"));
} catch (Error& e) {
TraceEvent("FastRestoreWorker").detail("Error", e.what());

View File

@ -295,7 +295,7 @@ ACTOR Future<Void> testRunWorkload(KmsConnectorInterface inf, uint32_t nEncrypti
for (i = 0; i < maxDomainIds; i++) {
// domainIdsReq.encryptDomainIds.push_back(i);
EncryptCipherDomainId domainId = i;
EncryptCipherDomainName domainName = StringRef(domainIdsReq.arena, std::to_string(domainId));
EncryptCipherDomainNameRef domainName = StringRef(domainIdsReq.arena, std::to_string(domainId));
domainIdsReq.encryptDomainInfos.emplace_back(domainIdsReq.arena, i, domainName);
}
KmsConnLookupEKsByDomainIdsRep domainIdsRep = wait(inf.ekLookupByDomainIds.getReply(domainIdsReq));

View File

@ -24,6 +24,7 @@
#include <sstream>
#include <string_view>
#include <toml.hpp>
#include "fdbclient/FDBTypes.h"
#include "fdbrpc/Locality.h"
#include "fdbrpc/simulator.h"
#include "fdbrpc/IPAllowList.h"
@ -473,7 +474,7 @@ ACTOR Future<Void> runBackup(Reference<IClusterConnectionRecord> connRecord) {
}
if (g_simulator.backupAgents == ISimulator::BackupAgentType::BackupToFile) {
Database cx = Database::createDatabase(connRecord, -1);
Database cx = Database::createDatabase(connRecord, ApiVersion::LATEST_VERSION);
state FileBackupAgent fileAgent;
agentFutures.push_back(fileAgent.run(
@ -501,11 +502,11 @@ ACTOR Future<Void> runDr(Reference<IClusterConnectionRecord> connRecord) {
if (g_simulator.drAgents == ISimulator::BackupAgentType::BackupToDB) {
ASSERT(g_simulator.extraDatabases.size() == 1);
Database cx = Database::createDatabase(connRecord, -1);
Database cx = Database::createDatabase(connRecord, ApiVersion::LATEST_VERSION);
auto extraFile =
makeReference<ClusterConnectionMemoryRecord>(ClusterConnectionString(g_simulator.extraDatabases[0]));
state Database drDatabase = Database::createDatabase(extraFile, -1);
state Database drDatabase = Database::createDatabase(extraFile, ApiVersion::LATEST_VERSION);
TraceEvent("StartingDrAgents")
.detail("ConnectionString", connRecord->getConnectionString().toString())
@ -1914,6 +1915,7 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
}
simconfig.db.tenantMode = tenantMode;
simconfig.db.encryptionAtRestMode = EncryptionAtRestMode::DISABLED;
StatusObject startingConfigJSON = simconfig.db.toJSON(true);
std::string startingConfigString = "new";
@ -1948,7 +1950,8 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
if (kv.second.type() == json_spirit::int_type) {
startingConfigString += kv.first + ":=" + format("%d", kv.second.get_int());
} else if (kv.second.type() == json_spirit::str_type) {
if ("storage_migration_type" == kv.first || "tenant_mode" == kv.first) {
if ("storage_migration_type" == kv.first || "tenant_mode" == kv.first ||
"encryption_at_rest_mode" == kv.first) {
startingConfigString += kv.first + "=" + kv.second.get_str();
} else {
startingConfigString += kv.second.get_str();

View File

@ -18,8 +18,11 @@
* limitations under the License.
*/
#include "fdbclient/SystemData.h"
#include "fdbclient/FDBTypes.h"
#include "fdbserver/DDTeamCollection.h"
#include "fdbserver/TenantCache.h"
#include "flow/flow.h"
#include <limits>
#include <string>
#include "flow/actorcompiler.h"
@ -87,6 +90,8 @@ public:
for (int i = 0; i < tenantList.size(); i++) {
if (tenantCache->update(tenantList[i].first, tenantList[i].second)) {
tenantListUpdated = true;
TenantCacheTenantCreated req(tenantList[i].second.prefix);
tenantCache->tenantCreationSignal.send(req);
}
}
@ -174,7 +179,7 @@ std::string TenantCache::desc() const {
s += ", ";
}
s += "Name: " + tenant->name().toString() + " Prefix: " + tenantPrefix.printable();
s += "Name: " + tenant->name().toString() + " Prefix: " + tenantPrefix.toString();
count++;
}
@ -194,10 +199,23 @@ bool TenantCache::isTenantKey(KeyRef key) const {
return true;
}
Future<Void> TenantCache::build(Database cx) {
Future<Void> TenantCache::build() {
return TenantCacheImpl::build(this);
}
Optional<Reference<TCTenantInfo>> TenantCache::tenantOwning(KeyRef key) const {
auto it = tenantCache.lastLessOrEqual(key);
if (it == tenantCache.end()) {
return {};
}
if (!key.startsWith(it->key)) {
return {};
}
return it->value;
}
Future<Void> TenantCache::monitorTenantMap() {
return TenantCacheImpl::monitorTenantMap(this);
}

View File

@ -25,6 +25,9 @@
#define FDBSERVER_DATA_DISTRIBUTION_ACTOR_H
#include "fdbclient/NativeAPI.actor.h"
#include "fdbserver/MoveKeys.actor.h"
#include "fdbserver/TenantCache.h"
#include "fdbserver/TCInfo.h"
#include "fdbclient/RunTransaction.actor.h"
#include "fdbserver/DDTxnProcessor.h"
#include "fdbserver/Knobs.h"
@ -45,7 +48,16 @@
// RelocateReason to DataMovementReason is one-to-N mapping
class RelocateReason {
public:
enum Value : int8_t { OTHER = 0, REBALANCE_DISK, REBALANCE_READ, MERGE_SHARD, SIZE_SPLIT, WRITE_SPLIT, __COUNT };
enum Value : int8_t {
OTHER = 0,
REBALANCE_DISK,
REBALANCE_READ,
MERGE_SHARD,
SIZE_SPLIT,
WRITE_SPLIT,
TENANT_SPLIT,
__COUNT
};
RelocateReason(Value v) : value(v) { ASSERT(value != __COUNT); }
explicit RelocateReason(int v) : value((Value)v) { ASSERT(value != __COUNT); }
std::string toString() const {
@ -62,6 +74,8 @@ public:
return "SizeSplit";
case WRITE_SPLIT:
return "WriteSplit";
case TENANT_SPLIT:
return "TenantSplit";
case __COUNT:
ASSERT(false);
}
@ -153,130 +167,6 @@ private:
moveReason(DataMovementReason::INVALID) {}
};
struct IDataDistributionTeam {
virtual std::vector<StorageServerInterface> getLastKnownServerInterfaces() const = 0;
virtual int size() const = 0;
virtual std::vector<UID> const& getServerIDs() const = 0;
virtual void addDataInFlightToTeam(int64_t delta) = 0;
virtual void addReadInFlightToTeam(int64_t delta) = 0;
virtual int64_t getDataInFlightToTeam() const = 0;
virtual int64_t getLoadBytes(bool includeInFlight = true, double inflightPenalty = 1.0) const = 0;
virtual int64_t getReadInFlightToTeam() const = 0;
virtual double getLoadReadBandwidth(bool includeInFlight = true, double inflightPenalty = 1.0) const = 0;
virtual int64_t getMinAvailableSpace(bool includeInFlight = true) const = 0;
virtual double getMinAvailableSpaceRatio(bool includeInFlight = true) const = 0;
virtual bool hasHealthyAvailableSpace(double minRatio) const = 0;
virtual Future<Void> updateStorageMetrics() = 0;
virtual void addref() const = 0;
virtual void delref() const = 0;
virtual bool isHealthy() const = 0;
virtual void setHealthy(bool) = 0;
virtual int getPriority() const = 0;
virtual void setPriority(int) = 0;
virtual bool isOptimal() const = 0;
virtual bool isWrongConfiguration() const = 0;
virtual void setWrongConfiguration(bool) = 0;
virtual void addServers(const std::vector<UID>& servers) = 0;
virtual std::string getTeamID() const = 0;
std::string getDesc() const {
const auto& servers = getLastKnownServerInterfaces();
std::string s = format("TeamID %s; ", getTeamID().c_str());
s += format("Size %d; ", servers.size());
for (int i = 0; i < servers.size(); i++) {
if (i)
s += ", ";
s += servers[i].address().toString() + " " + servers[i].id().shortString();
}
return s;
}
};
FDB_DECLARE_BOOLEAN_PARAM(WantNewServers);
FDB_DECLARE_BOOLEAN_PARAM(WantTrueBest);
FDB_DECLARE_BOOLEAN_PARAM(PreferLowerDiskUtil);
FDB_DECLARE_BOOLEAN_PARAM(TeamMustHaveShards);
FDB_DECLARE_BOOLEAN_PARAM(ForReadBalance);
FDB_DECLARE_BOOLEAN_PARAM(PreferLowerReadUtil);
FDB_DECLARE_BOOLEAN_PARAM(FindTeamByServers);
struct GetTeamRequest {
bool wantsNewServers; // In additional to servers in completeSources, try to find teams with new server
bool wantsTrueBest;
bool preferLowerDiskUtil; // if true, lower utilized team has higher score
bool teamMustHaveShards;
bool forReadBalance;
bool preferLowerReadUtil; // only make sense when forReadBalance is true
double inflightPenalty;
bool findTeamByServers;
std::vector<UID> completeSources;
std::vector<UID> src;
Promise<std::pair<Optional<Reference<IDataDistributionTeam>>, bool>> reply;
typedef Reference<IDataDistributionTeam> TeamRef;
GetTeamRequest() {}
GetTeamRequest(WantNewServers wantsNewServers,
WantTrueBest wantsTrueBest,
PreferLowerDiskUtil preferLowerDiskUtil,
TeamMustHaveShards teamMustHaveShards,
ForReadBalance forReadBalance = ForReadBalance::False,
PreferLowerReadUtil preferLowerReadUtil = PreferLowerReadUtil::False,
double inflightPenalty = 1.0)
: wantsNewServers(wantsNewServers), wantsTrueBest(wantsTrueBest), preferLowerDiskUtil(preferLowerDiskUtil),
teamMustHaveShards(teamMustHaveShards), forReadBalance(forReadBalance),
preferLowerReadUtil(preferLowerReadUtil), inflightPenalty(inflightPenalty),
findTeamByServers(FindTeamByServers::False) {}
GetTeamRequest(std::vector<UID> servers)
: wantsNewServers(WantNewServers::False), wantsTrueBest(WantTrueBest::False),
preferLowerDiskUtil(PreferLowerDiskUtil::False), teamMustHaveShards(TeamMustHaveShards::False),
forReadBalance(ForReadBalance::False), preferLowerReadUtil(PreferLowerReadUtil::False), inflightPenalty(1.0),
findTeamByServers(FindTeamByServers::True), src(std::move(servers)) {}
// return true if a.score < b.score
[[nodiscard]] bool lessCompare(TeamRef a, TeamRef b, int64_t aLoadBytes, int64_t bLoadBytes) const {
int res = 0;
if (forReadBalance) {
res = preferLowerReadUtil ? greaterReadLoad(a, b) : lessReadLoad(a, b);
}
return res == 0 ? lessCompareByLoad(aLoadBytes, bLoadBytes) : res < 0;
}
std::string getDesc() const {
std::stringstream ss;
ss << "WantsNewServers:" << wantsNewServers << " WantsTrueBest:" << wantsTrueBest
<< " PreferLowerDiskUtil:" << preferLowerDiskUtil << " teamMustHaveShards:" << teamMustHaveShards
<< "forReadBalance" << forReadBalance << " inflightPenalty:" << inflightPenalty
<< " findTeamByServers:" << findTeamByServers << ";";
ss << "CompleteSources:";
for (const auto& cs : completeSources) {
ss << cs.toString() << ",";
}
return std::move(ss).str();
}
private:
// return true if preferHigherUtil && aLoadBytes <= bLoadBytes (higher load bytes has larger score)
// or preferLowerUtil && aLoadBytes > bLoadBytes
bool lessCompareByLoad(int64_t aLoadBytes, int64_t bLoadBytes) const {
bool lessLoad = aLoadBytes <= bLoadBytes;
return preferLowerDiskUtil ? !lessLoad : lessLoad;
}
// return -1 if a.readload > b.readload
static int greaterReadLoad(TeamRef a, TeamRef b) {
auto r1 = a->getLoadReadBandwidth(true), r2 = b->getLoadReadBandwidth(true);
return r1 == r2 ? 0 : (r1 > r2 ? -1 : 1);
}
// return -1 if a.readload < b.readload
static int lessReadLoad(TeamRef a, TeamRef b) {
auto r1 = a->getLoadReadBandwidth(false), r2 = b->getLoadReadBandwidth(false);
return r1 == r2 ? 0 : (r1 < r2 ? -1 : 1);
}
};
struct GetMetricsRequest {
KeyRange keys;
Promise<StorageMetrics> reply;
@ -621,7 +511,8 @@ ACTOR Future<Void> dataDistributionTracker(Reference<InitialDataDistribution> in
Reference<AsyncVar<bool>> zeroHealthyTeams,
UID distributorId,
KeyRangeMap<ShardTrackedData>* shards,
bool* trackerCancelled);
bool* trackerCancelled,
Optional<Reference<TenantCache>> ddTenantCache);
ACTOR Future<Void> dataDistributionQueue(Database cx,
PromiseStream<RelocateShard> output,

View File

@ -0,0 +1,147 @@
/*
* DataDistributionTeam.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "fdbclient/StorageServerInterface.h"
struct IDataDistributionTeam {
virtual std::vector<StorageServerInterface> getLastKnownServerInterfaces() const = 0;
virtual int size() const = 0;
virtual std::vector<UID> const& getServerIDs() const = 0;
virtual void addDataInFlightToTeam(int64_t delta) = 0;
virtual void addReadInFlightToTeam(int64_t delta) = 0;
virtual int64_t getDataInFlightToTeam() const = 0;
virtual int64_t getLoadBytes(bool includeInFlight = true, double inflightPenalty = 1.0) const = 0;
virtual int64_t getReadInFlightToTeam() const = 0;
virtual double getLoadReadBandwidth(bool includeInFlight = true, double inflightPenalty = 1.0) const = 0;
virtual int64_t getMinAvailableSpace(bool includeInFlight = true) const = 0;
virtual double getMinAvailableSpaceRatio(bool includeInFlight = true) const = 0;
virtual bool hasHealthyAvailableSpace(double minRatio) const = 0;
virtual Future<Void> updateStorageMetrics() = 0;
virtual void addref() const = 0;
virtual void delref() const = 0;
virtual bool isHealthy() const = 0;
virtual void setHealthy(bool) = 0;
virtual int getPriority() const = 0;
virtual void setPriority(int) = 0;
virtual bool isOptimal() const = 0;
virtual bool isWrongConfiguration() const = 0;
virtual void setWrongConfiguration(bool) = 0;
virtual void addServers(const std::vector<UID>& servers) = 0;
virtual std::string getTeamID() const = 0;
std::string getDesc() const {
const auto& servers = getLastKnownServerInterfaces();
std::string s = format("TeamID %s; ", getTeamID().c_str());
s += format("Size %d; ", servers.size());
for (int i = 0; i < servers.size(); i++) {
if (i)
s += ", ";
s += servers[i].address().toString() + " " + servers[i].id().shortString();
}
return s;
}
};
FDB_DECLARE_BOOLEAN_PARAM(WantNewServers);
FDB_DECLARE_BOOLEAN_PARAM(WantTrueBest);
FDB_DECLARE_BOOLEAN_PARAM(PreferLowerDiskUtil);
FDB_DECLARE_BOOLEAN_PARAM(TeamMustHaveShards);
FDB_DECLARE_BOOLEAN_PARAM(ForReadBalance);
FDB_DECLARE_BOOLEAN_PARAM(PreferLowerReadUtil);
FDB_DECLARE_BOOLEAN_PARAM(FindTeamByServers);
struct GetTeamRequest {
bool wantsNewServers; // In additional to servers in completeSources, try to find teams with new server
bool wantsTrueBest;
bool preferLowerDiskUtil; // if true, lower utilized team has higher score
bool teamMustHaveShards;
bool forReadBalance;
bool preferLowerReadUtil; // only make sense when forReadBalance is true
double inflightPenalty;
bool findTeamByServers;
std::vector<UID> completeSources;
std::vector<UID> src;
Promise<std::pair<Optional<Reference<IDataDistributionTeam>>, bool>> reply;
typedef Reference<IDataDistributionTeam> TeamRef;
GetTeamRequest() {}
GetTeamRequest(WantNewServers wantsNewServers,
WantTrueBest wantsTrueBest,
PreferLowerDiskUtil preferLowerDiskUtil,
TeamMustHaveShards teamMustHaveShards,
ForReadBalance forReadBalance = ForReadBalance::False,
PreferLowerReadUtil preferLowerReadUtil = PreferLowerReadUtil::False,
double inflightPenalty = 1.0)
: wantsNewServers(wantsNewServers), wantsTrueBest(wantsTrueBest), preferLowerDiskUtil(preferLowerDiskUtil),
teamMustHaveShards(teamMustHaveShards), forReadBalance(forReadBalance),
preferLowerReadUtil(preferLowerReadUtil), inflightPenalty(inflightPenalty),
findTeamByServers(FindTeamByServers::False) {}
GetTeamRequest(std::vector<UID> servers)
: wantsNewServers(WantNewServers::False), wantsTrueBest(WantTrueBest::False),
preferLowerDiskUtil(PreferLowerDiskUtil::False), teamMustHaveShards(TeamMustHaveShards::False),
forReadBalance(ForReadBalance::False), preferLowerReadUtil(PreferLowerReadUtil::False), inflightPenalty(1.0),
findTeamByServers(FindTeamByServers::True), src(std::move(servers)) {}
// return true if a.score < b.score
[[nodiscard]] bool lessCompare(TeamRef a, TeamRef b, int64_t aLoadBytes, int64_t bLoadBytes) const {
int res = 0;
if (forReadBalance) {
res = preferLowerReadUtil ? greaterReadLoad(a, b) : lessReadLoad(a, b);
}
return res == 0 ? lessCompareByLoad(aLoadBytes, bLoadBytes) : res < 0;
}
std::string getDesc() const {
std::stringstream ss;
ss << "WantsNewServers:" << wantsNewServers << " WantsTrueBest:" << wantsTrueBest
<< " PreferLowerDiskUtil:" << preferLowerDiskUtil << " teamMustHaveShards:" << teamMustHaveShards
<< "forReadBalance" << forReadBalance << " inflightPenalty:" << inflightPenalty
<< " findTeamByServers:" << findTeamByServers << ";";
ss << "CompleteSources:";
for (const auto& cs : completeSources) {
ss << cs.toString() << ",";
}
return std::move(ss).str();
}
private:
// return true if preferHigherUtil && aLoadBytes <= bLoadBytes (higher load bytes has larger score)
// or preferLowerUtil && aLoadBytes > bLoadBytes
bool lessCompareByLoad(int64_t aLoadBytes, int64_t bLoadBytes) const {
bool lessLoad = aLoadBytes <= bLoadBytes;
return preferLowerDiskUtil ? !lessLoad : lessLoad;
}
// return -1 if a.readload > b.readload
static int greaterReadLoad(TeamRef a, TeamRef b) {
auto r1 = a->getLoadReadBandwidth(true), r2 = b->getLoadReadBandwidth(true);
return r1 == r2 ? 0 : (r1 > r2 ? -1 : 1);
}
// return -1 if a.readload < b.readload
static int lessReadLoad(TeamRef a, TeamRef b) {
auto r1 = a->getLoadReadBandwidth(false), r2 = b->getLoadReadBandwidth(false);
return r1 == r2 ? 0 : (r1 < r2 ? -1 : 1);
}
};

View File

@ -216,7 +216,7 @@ public:
Future<EncryptionKey> getSecrets(const EncryptionKeyRef& key) override { return getSecrets(this, key); }
ACTOR static Future<EncryptionKey> getByRange(TenantAwareEncryptionKeyProvider* self, KeyRef begin, KeyRef end) {
EncryptCipherDomainName domainName;
EncryptCipherDomainNameRef domainName;
EncryptCipherDomainId domainId = self->getEncryptionDomainId(begin, end, &domainName);
TextAndHeaderCipherKeys cipherKeys = wait(getLatestEncryptCipherKeysForDomain(self->db, domainId, domainName));
EncryptionKey s;
@ -236,7 +236,7 @@ public:
private:
EncryptCipherDomainId getEncryptionDomainId(const KeyRef& begin,
const KeyRef& end,
EncryptCipherDomainName* domainName) {
EncryptCipherDomainNameRef* domainName) {
int64_t domainId = SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID;
int64_t beginTenantId = getTenant(begin, true /*inclusive*/);
int64_t endTenantId = getTenant(end, false /*inclusive*/);

View File

@ -67,7 +67,7 @@ struct KmsConnectorInterface {
}
};
struct EncryptCipherKeyDetails {
struct EncryptCipherKeyDetailsRef {
constexpr static FileIdentifier file_identifier = 1227025;
EncryptCipherDomainId encryptDomainId;
EncryptCipherBaseKeyId encryptKeyId;
@ -75,33 +75,33 @@ struct EncryptCipherKeyDetails {
Optional<int64_t> refreshAfterSec;
Optional<int64_t> expireAfterSec;
EncryptCipherKeyDetails() {}
explicit EncryptCipherKeyDetails(Arena& arena,
EncryptCipherDomainId dId,
EncryptCipherBaseKeyId keyId,
StringRef key)
EncryptCipherKeyDetailsRef() {}
explicit EncryptCipherKeyDetailsRef(Arena& arena,
EncryptCipherDomainId dId,
EncryptCipherBaseKeyId keyId,
StringRef key)
: encryptDomainId(dId), encryptKeyId(keyId), encryptKey(StringRef(arena, key)),
refreshAfterSec(Optional<int64_t>()), expireAfterSec(Optional<int64_t>()) {}
explicit EncryptCipherKeyDetails(EncryptCipherDomainId dId, EncryptCipherBaseKeyId keyId, StringRef key)
explicit EncryptCipherKeyDetailsRef(EncryptCipherDomainId dId, EncryptCipherBaseKeyId keyId, StringRef key)
: encryptDomainId(dId), encryptKeyId(keyId), encryptKey(key), refreshAfterSec(Optional<int64_t>()),
expireAfterSec(Optional<int64_t>()) {}
explicit EncryptCipherKeyDetails(Arena& arena,
EncryptCipherDomainId dId,
EncryptCipherBaseKeyId keyId,
StringRef key,
Optional<int64_t> refAfterSec,
Optional<int64_t> expAfterSec)
explicit EncryptCipherKeyDetailsRef(Arena& arena,
EncryptCipherDomainId dId,
EncryptCipherBaseKeyId keyId,
StringRef key,
Optional<int64_t> refAfterSec,
Optional<int64_t> expAfterSec)
: encryptDomainId(dId), encryptKeyId(keyId), encryptKey(StringRef(arena, key)), refreshAfterSec(refAfterSec),
expireAfterSec(expAfterSec) {}
explicit EncryptCipherKeyDetails(EncryptCipherDomainId dId,
EncryptCipherBaseKeyId keyId,
StringRef key,
Optional<int64_t> refAfterSec,
Optional<int64_t> expAfterSec)
explicit EncryptCipherKeyDetailsRef(EncryptCipherDomainId dId,
EncryptCipherBaseKeyId keyId,
StringRef key,
Optional<int64_t> refAfterSec,
Optional<int64_t> expAfterSec)
: encryptDomainId(dId), encryptKeyId(keyId), encryptKey(key), refreshAfterSec(refAfterSec),
expireAfterSec(expAfterSec) {}
bool operator==(const EncryptCipherKeyDetails& toCompare) {
bool operator==(const EncryptCipherKeyDetailsRef& toCompare) {
return encryptDomainId == toCompare.encryptDomainId && encryptKeyId == toCompare.encryptKeyId &&
encryptKey.compare(toCompare.encryptKey) == 0;
}
@ -115,30 +115,31 @@ struct EncryptCipherKeyDetails {
struct KmsConnLookupEKsByKeyIdsRep {
constexpr static FileIdentifier file_identifier = 2313778;
Arena arena;
VectorRef<EncryptCipherKeyDetails> cipherKeyDetails;
VectorRef<EncryptCipherKeyDetailsRef> cipherKeyDetails;
KmsConnLookupEKsByKeyIdsRep() {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, arena, cipherKeyDetails);
serializer(ar, cipherKeyDetails, arena);
}
};
struct KmsConnLookupKeyIdsReqInfo {
struct KmsConnLookupKeyIdsReqInfoRef {
constexpr static FileIdentifier file_identifier = 3092256;
EncryptCipherDomainId domainId;
EncryptCipherBaseKeyId baseCipherId;
EncryptCipherDomainName domainName;
EncryptCipherDomainNameRef domainName;
KmsConnLookupKeyIdsReqInfo() : domainId(ENCRYPT_INVALID_DOMAIN_ID), baseCipherId(ENCRYPT_INVALID_CIPHER_KEY_ID) {}
explicit KmsConnLookupKeyIdsReqInfo(Arena& arena,
const EncryptCipherDomainId dId,
const EncryptCipherBaseKeyId bCId,
StringRef name)
KmsConnLookupKeyIdsReqInfoRef()
: domainId(ENCRYPT_INVALID_DOMAIN_ID), baseCipherId(ENCRYPT_INVALID_CIPHER_KEY_ID) {}
explicit KmsConnLookupKeyIdsReqInfoRef(Arena& arena,
const EncryptCipherDomainId dId,
const EncryptCipherBaseKeyId bCId,
StringRef name)
: domainId(dId), baseCipherId(bCId), domainName(StringRef(arena, name)) {}
bool operator==(const KmsConnLookupKeyIdsReqInfo& info) const {
bool operator==(const KmsConnLookupKeyIdsReqInfoRef& info) const {
return domainId == info.domainId && baseCipherId == info.baseCipherId &&
(domainName.compare(info.domainName) == 0);
}
@ -152,45 +153,45 @@ struct KmsConnLookupKeyIdsReqInfo {
struct KmsConnLookupEKsByKeyIdsReq {
constexpr static FileIdentifier file_identifier = 6913396;
Arena arena;
VectorRef<KmsConnLookupKeyIdsReqInfo> encryptKeyInfos;
VectorRef<KmsConnLookupKeyIdsReqInfoRef> encryptKeyInfos;
Optional<UID> debugId;
ReplyPromise<KmsConnLookupEKsByKeyIdsRep> reply;
KmsConnLookupEKsByKeyIdsReq() {}
explicit KmsConnLookupEKsByKeyIdsReq(VectorRef<KmsConnLookupKeyIdsReqInfo> keyInfos, Optional<UID> dbgId)
explicit KmsConnLookupEKsByKeyIdsReq(VectorRef<KmsConnLookupKeyIdsReqInfoRef> keyInfos, Optional<UID> dbgId)
: encryptKeyInfos(keyInfos), debugId(dbgId) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, arena, encryptKeyInfos, debugId, reply);
serializer(ar, encryptKeyInfos, debugId, reply, arena);
}
};
struct KmsConnLookupEKsByDomainIdsRep {
constexpr static FileIdentifier file_identifier = 3009025;
Arena arena;
VectorRef<EncryptCipherKeyDetails> cipherKeyDetails;
VectorRef<EncryptCipherKeyDetailsRef> cipherKeyDetails;
KmsConnLookupEKsByDomainIdsRep() {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, arena, cipherKeyDetails);
serializer(ar, cipherKeyDetails, arena);
}
};
struct KmsConnLookupDomainIdsReqInfo {
struct KmsConnLookupDomainIdsReqInfoRef {
constexpr static FileIdentifier file_identifier = 8980149;
EncryptCipherDomainId domainId;
EncryptCipherDomainName domainName;
EncryptCipherDomainNameRef domainName;
KmsConnLookupDomainIdsReqInfo() : domainId(ENCRYPT_INVALID_DOMAIN_ID) {}
explicit KmsConnLookupDomainIdsReqInfo(Arena& arena, const EncryptCipherDomainId dId, StringRef name)
KmsConnLookupDomainIdsReqInfoRef() : domainId(ENCRYPT_INVALID_DOMAIN_ID) {}
explicit KmsConnLookupDomainIdsReqInfoRef(Arena& arena, const EncryptCipherDomainId dId, StringRef name)
: domainId(dId), domainName(StringRef(arena, name)) {}
explicit KmsConnLookupDomainIdsReqInfo(const EncryptCipherDomainId dId, StringRef name)
explicit KmsConnLookupDomainIdsReqInfoRef(const EncryptCipherDomainId dId, StringRef name)
: domainId(dId), domainName(name) {}
bool operator==(const KmsConnLookupDomainIdsReqInfo& info) const {
bool operator==(const KmsConnLookupDomainIdsReqInfoRef& info) const {
return domainId == info.domainId && (domainName.compare(info.domainName) == 0);
}
@ -203,17 +204,17 @@ struct KmsConnLookupDomainIdsReqInfo {
struct KmsConnLookupEKsByDomainIdsReq {
constexpr static FileIdentifier file_identifier = 9918682;
Arena arena;
VectorRef<KmsConnLookupDomainIdsReqInfo> encryptDomainInfos;
VectorRef<KmsConnLookupDomainIdsReqInfoRef> encryptDomainInfos;
Optional<UID> debugId;
ReplyPromise<KmsConnLookupEKsByDomainIdsRep> reply;
KmsConnLookupEKsByDomainIdsReq() {}
explicit KmsConnLookupEKsByDomainIdsReq(VectorRef<KmsConnLookupDomainIdsReqInfo>& infos, Optional<UID> dbgId)
explicit KmsConnLookupEKsByDomainIdsReq(VectorRef<KmsConnLookupDomainIdsReqInfoRef>& infos, Optional<UID> dbgId)
: encryptDomainInfos(infos), debugId(dbgId) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, arena, encryptDomainInfos, debugId, reply);
serializer(ar, encryptDomainInfos, debugId, reply, arena);
}
};

View File

@ -98,7 +98,7 @@ struct SimGetEncryptKeysByKeyIdsReply {
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, arena, encryptKeyDetails);
serializer(ar, encryptKeyDetails, arena);
}
};
@ -127,7 +127,7 @@ struct SimGetEncryptKeyByDomainIdReply {
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, arena, encryptKeyDetails);
serializer(ar, encryptKeyDetails, arena);
}
};

View File

@ -20,8 +20,11 @@
#pragma once
#include "fdbclient/SystemData.h"
#include "fdbclient/Tenant.h"
#include "fdbserver/DDTeamCollection.h"
#include "fdbrpc/ReplicationTypes.h"
#include "fdbserver/DataDistributionTeam.h"
#include "flow/Arena.h"
#include "flow/FastRef.h"
@ -29,6 +32,7 @@ class TCTeamInfo;
class TCTenantInfo;
class TCMachineInfo;
class TCMachineTeamInfo;
class DDTeamCollection;
class TCServerInfo : public ReferenceCounted<TCServerInfo> {
friend class TCServerInfoImpl;
@ -257,8 +261,8 @@ public:
TCTenantInfo(TenantInfo tinfo, Key prefix) : m_tenantInfo(tinfo), m_prefix(prefix) {}
std::vector<Reference<TCTeamInfo>>& teams() { return m_tenantTeams; }
TenantName name() { return m_tenantInfo.name.get(); }
std::string prefixDesc() { return m_prefix.printable(); }
TenantName name() const { return m_tenantInfo.name.get(); }
std::string prefixDesc() const { return m_prefix.printable(); }
void addTeam(TCTeamInfo team);
void removeTeam(TCTeamInfo team);

View File

@ -189,7 +189,7 @@ struct TLogPeekReply {
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, arena, messages, end, popped, maxKnownVersion, minKnownCommittedVersion, begin, onlySpilled);
serializer(ar, messages, end, popped, maxKnownVersion, minKnownCommittedVersion, begin, onlySpilled, arena);
}
};

View File

@ -18,17 +18,26 @@
* limitations under the License.
*/
#pragma once
#include "fdbclient/FDBTypes.h"
#include "fdbclient/NativeAPI.actor.h"
#include "fdbclient/Tenant.h"
#include "fdbserver/DDTeamCollection.h"
#include "fdbserver/TCInfo.h"
#include "flow/IRandom.h"
#include "flow/IndexedSet.h"
#include "flow/flow.h"
#include <limits>
#include <string>
typedef Map<KeyRef, Reference<TCTenantInfo>> TenantMapByPrefix;
struct TenantCacheTenantCreated {
KeyRange keys;
Promise<bool> reply;
TenantCacheTenantCreated(Key prefix) { keys = prefixRange(prefix); }
};
class TenantCache : public ReferenceCounted<TenantCache> {
friend class TenantCacheImpl;
friend class TenantCacheUnitTest;
@ -62,11 +71,15 @@ public:
generation = deterministicRandom()->randomUInt32();
}
Future<Void> build(Database cx);
PromiseStream<TenantCacheTenantCreated> tenantCreationSignal;
Future<Void> build();
Future<Void> monitorTenantMap();
std::string desc() const;
bool isTenantKey(KeyRef key) const;
Optional<Reference<TCTenantInfo>> tenantOwning(KeyRef key) const;
};

View File

@ -291,7 +291,7 @@ struct ApiWorkload : TestWorkload {
ASSERT(g_simulator.extraDatabases.size() == 1);
auto extraFile =
makeReference<ClusterConnectionMemoryRecord>(ClusterConnectionString(g_simulator.extraDatabases[0]));
extraDB = Database::createDatabase(extraFile, -1);
extraDB = Database::createDatabase(extraFile, ApiVersion::LATEST_VERSION);
}
}

View File

@ -193,7 +193,8 @@ ACTOR Future<Void> testClientStream(std::vector<NetworkTestInterface> interfs,
ASSERT(rep.index == j++);
}
} catch (Error& e) {
ASSERT(e.code() == error_code_end_of_stream || e.code() == error_code_connection_failed);
ASSERT(e.code() == error_code_end_of_stream || e.code() == error_code_connection_failed ||
e.code() == error_code_request_maybe_delivered);
}
latency->tock(sample);
(*completed)++;

View File

@ -24,6 +24,7 @@
#include <unordered_map>
#include "fdbclient/BlobGranuleCommon.h"
#include "flow/ApiVersion.h"
#include "fmt/format.h"
#include "fdbclient/CommitTransaction.h"
#include "fdbclient/FDBTypes.h"
@ -1265,8 +1266,7 @@ public:
newestAvailableVersion.insert(allKeys, invalidVersion);
newestDirtyVersion.insert(allKeys, invalidVersion);
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA &&
(SERVER_KNOBS->STORAGE_SERVER_SHARD_AWARE || storage->shardAware())) {
if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && storage->shardAware()) {
addShard(ShardInfo::newShard(this, StorageServerShard::notAssigned(allKeys)));
} else {
addShard(ShardInfo::newNotAssigned(allKeys));
@ -10359,7 +10359,7 @@ ACTOR Future<Void> memoryStoreRecover(IKeyValueStore* store, Reference<IClusterC
}
// create a temp client connect to DB
Database cx = Database::createDatabase(connRecord, Database::API_VERSION_LATEST);
Database cx = Database::createDatabase(connRecord, ApiVersion::LATEST_VERSION);
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(cx);
state int noCanRemoveCount = 0;
@ -10595,8 +10595,7 @@ ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
std::string folder,
Reference<IEncryptionKeyProvider> encryptionKeyProvider) {
state StorageServer self(persistentData, db, ssi, encryptionKeyProvider);
self.shardAware = SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA &&
(SERVER_KNOBS->STORAGE_SERVER_SHARD_AWARE || persistentData->shardAware());
self.shardAware = SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && persistentData->shardAware();
state Future<Void> ssCore;
self.clusterId.send(clusterId);
self.initialClusterVersion = startVersion;

View File

@ -684,7 +684,7 @@ ACTOR Future<Void> testerServerWorkload(WorkloadRequest work,
startRole(Role::TESTER, workIface.id(), UID(), details);
if (work.useDatabase) {
cx = Database::createDatabase(ccr, -1, IsInternal::True, locality);
cx = Database::createDatabase(ccr, ApiVersion::LATEST_VERSION, IsInternal::True, locality);
cx->defaultTenant = work.defaultTenant.castTo<TenantName>();
wait(delay(1.0));
}

View File

@ -23,6 +23,7 @@
#include <boost/lexical_cast.hpp>
#include "fdbclient/FDBTypes.h"
#include "flow/ApiVersion.h"
#include "flow/IAsyncFile.h"
#include "fdbrpc/Locality.h"
#include "fdbclient/GlobalConfig.actor.h"
@ -1663,7 +1664,7 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
if (metricsConnFile.size() > 0) {
try {
state Database db =
Database::createDatabase(metricsConnFile, Database::API_VERSION_LATEST, IsInternal::True, locality);
Database::createDatabase(metricsConnFile, ApiVersion::LATEST_VERSION, IsInternal::True, locality);
metricsLogger = runMetrics(db, KeyRef(metricsPrefix));
db->globalConfig->trigger(samplingFrequency, samplingProfilerUpdateFrequency);
} catch (Error& e) {

View File

@ -307,7 +307,7 @@ ACTOR Future<Void> chooseTransactionFactory(Database cx, std::vector<Transaction
new TransactionFactory<ThreadTransactionWrapper, Reference<IDatabase>>(dbHandle, dbHandle, false));
} else if (transactionType == MULTI_VERSION) {
printf("client %d: Running Multi-Version Transactions\n", self->clientPrefixInt);
MultiVersionApi::api->selectApiVersion(cx->apiVersion);
MultiVersionApi::api->selectApiVersion(cx->apiVersion.version());
Reference<IDatabase> threadSafeHandle =
wait(unsafeThreadFutureToFuture(ThreadSafeDatabase::createFromExistingDatabase(cx)));
Reference<IDatabase> dbHandle = MultiVersionDatabase::debugCreateFromExistingDatabase(threadSafeHandle);

View File

@ -111,7 +111,7 @@ struct AtomicOpsWorkload : TestWorkload {
Future<Void> setup(Database const& cx) override {
if (apiVersion500)
cx->apiVersion = 500;
cx->apiVersion = ApiVersion(500);
if (clientId != 0)
return Void();

View File

@ -29,9 +29,9 @@ struct AtomicOpsApiCorrectnessWorkload : TestWorkload {
uint32_t opType;
private:
static int getApiVersion(const Database& cx) { return cx->apiVersion; }
static int getApiVersion(const Database& cx) { return cx->apiVersion.version(); }
static void setApiVersion(Database* cx, int version) { (*cx)->apiVersion = version; }
static void setApiVersion(Database* cx, int version) { (*cx)->apiVersion = ApiVersion(version); }
Key getTestKey(std::string prefix) {
std::string key = prefix + std::to_string(clientId);

View File

@ -42,7 +42,7 @@ struct AtomicSwitchoverWorkload : TestWorkload {
ASSERT(g_simulator.extraDatabases.size() == 1);
auto extraFile =
makeReference<ClusterConnectionMemoryRecord>(ClusterConnectionString(g_simulator.extraDatabases[0]));
extraDB = Database::createDatabase(extraFile, -1);
extraDB = Database::createDatabase(extraFile, ApiVersion::LATEST_VERSION);
}
std::string description() const override { return "AtomicSwitchover"; }

View File

@ -23,6 +23,7 @@
#include "fdbclient/ManagementAPI.actor.h"
#include "fdbclient/NativeAPI.actor.h"
#include "fdbserver/workloads/workloads.actor.h"
#include "flow/ApiVersion.h"
#include "flow/actorcompiler.h" // This must be the last #include.
struct BackupToDBAbort : TestWorkload {
@ -39,7 +40,7 @@ struct BackupToDBAbort : TestWorkload {
ASSERT(g_simulator.extraDatabases.size() == 1);
auto extraFile =
makeReference<ClusterConnectionMemoryRecord>(ClusterConnectionString(g_simulator.extraDatabases[0]));
extraDB = Database::createDatabase(extraFile, -1);
extraDB = Database::createDatabase(extraFile, ApiVersion::LATEST_VERSION);
lockid = UID(0xbeeffeed, 0xdecaf00d);
}

View File

@ -23,6 +23,7 @@
#include "fdbclient/ClusterConnectionMemoryRecord.h"
#include "fdbserver/workloads/workloads.actor.h"
#include "fdbserver/workloads/BulkSetup.actor.h"
#include "flow/ApiVersion.h"
#include "flow/actorcompiler.h" // This must be the last #include.
// A workload which test the correctness of backup and restore process. The
@ -131,7 +132,7 @@ struct BackupToDBCorrectnessWorkload : TestWorkload {
ASSERT(g_simulator.extraDatabases.size() == 1);
auto extraFile =
makeReference<ClusterConnectionMemoryRecord>(ClusterConnectionString(g_simulator.extraDatabases[0]));
extraDB = Database::createDatabase(extraFile, -1);
extraDB = Database::createDatabase(extraFile, ApiVersion::LATEST_VERSION);
TraceEvent("BARW_Start").detail("Locked", locked);
}

View File

@ -24,6 +24,7 @@
#include "fdbserver/workloads/workloads.actor.h"
#include "fdbserver/workloads/BulkSetup.actor.h"
#include "fdbclient/ManagementAPI.actor.h"
#include "flow/ApiVersion.h"
#include "flow/actorcompiler.h" // This must be the last #include.
// A workload which test the correctness of upgrading DR from 5.1 to 5.2
@ -79,7 +80,7 @@ struct BackupToDBUpgradeWorkload : TestWorkload {
ASSERT(g_simulator.extraDatabases.size() == 1);
auto extraFile =
makeReference<ClusterConnectionMemoryRecord>(ClusterConnectionString(g_simulator.extraDatabases[0]));
extraDB = Database::createDatabase(extraFile, -1);
extraDB = Database::createDatabase(extraFile, ApiVersion::LATEST_VERSION);
TraceEvent("DRU_Start").log();
}

View File

@ -26,6 +26,7 @@
#include "fdbserver/workloads/workloads.actor.h"
#include "fdbrpc/simulator.h"
#include "fdbclient/Schemas.h"
#include "flow/ApiVersion.h"
#include "flow/actorcompiler.h" // This must be the last #include.
struct ChangeConfigWorkload : TestWorkload {
@ -85,7 +86,7 @@ struct ChangeConfigWorkload : TestWorkload {
if (g_network->isSimulated()) {
for (auto extraDatabase : g_simulator.extraDatabases) {
auto extraFile = makeReference<ClusterConnectionMemoryRecord>(ClusterConnectionString(extraDatabase));
Database db = Database::createDatabase(extraFile, -1);
Database db = Database::createDatabase(extraFile, ApiVersion::LATEST_VERSION);
futures.push_back(configureExtraDatabase(self, db));
}
}

View File

@ -24,6 +24,7 @@
#include <fmt/format.h>
#include "flow/ApiVersion.h"
#include "flow/actorcompiler.h" // has to be last include
class WorkloadProcessState {
@ -130,7 +131,7 @@ struct WorkloadProcess {
try {
child = childCreator(wcx);
TraceEvent("ClientWorkloadOpenDatabase", id).detail("ClusterFileLocation", child->ccr->getLocation());
cx = Database::createDatabase(child->ccr, -1);
cx = Database::createDatabase(child->ccr, ApiVersion::LATEST_VERSION);
desc = child->description();
} catch (Error&) {
throw;

View File

@ -23,6 +23,7 @@
#include "fdbclient/RunTransaction.actor.h"
#include "fdbrpc/simulator.h"
#include "fdbserver/workloads/workloads.actor.h"
#include "flow/ApiVersion.h"
#include "flow/genericactors.actor.h"
#include "flow/actorcompiler.h" // This must be the last #include.
@ -40,7 +41,7 @@ struct DifferentClustersSameRVWorkload : TestWorkload {
ASSERT(g_simulator.extraDatabases.size() == 1);
auto extraFile =
makeReference<ClusterConnectionMemoryRecord>(ClusterConnectionString(g_simulator.extraDatabases[0]));
extraDB = Database::createDatabase(extraFile, -1);
extraDB = Database::createDatabase(extraFile, ApiVersion::LATEST_VERSION);
testDuration = getOption(options, LiteralStringRef("testDuration"), 100.0);
switchAfter = getOption(options, LiteralStringRef("switchAfter"), 50.0);
keyToRead = getOption(options, LiteralStringRef("keyToRead"), LiteralStringRef("someKey"));

View File

@ -96,7 +96,7 @@ struct MetaclusterManagementWorkload : TestWorkload {
Reference<IDatabase> threadSafeHandle =
wait(unsafeThreadFutureToFuture(ThreadSafeDatabase::createFromExistingDatabase(cx)));
MultiVersionApi::api->selectApiVersion(cx->apiVersion);
MultiVersionApi::api->selectApiVersion(cx->apiVersion.version());
self->managementDb = MultiVersionDatabase::debugCreateFromExistingDatabase(threadSafeHandle);
ASSERT(g_simulator.extraDatabases.size() > 0);
@ -104,7 +104,8 @@ struct MetaclusterManagementWorkload : TestWorkload {
ClusterConnectionString ccs(connectionString);
auto extraFile = makeReference<ClusterConnectionMemoryRecord>(ccs);
self->dataDbIndex.push_back(ClusterName(format("cluster_%08d", self->dataDbs.size())));
self->dataDbs[self->dataDbIndex.back()] = DataClusterData(Database::createDatabase(extraFile, -1));
self->dataDbs[self->dataDbIndex.back()] =
DataClusterData(Database::createDatabase(extraFile, ApiVersion::LATEST_VERSION));
}
wait(success(MetaclusterAPI::createMetacluster(cx.getReference(), "management_cluster"_sr)));

View File

@ -107,8 +107,9 @@ struct PhysicalShardMoveWorkLoad : TestWorkload {
teamSize,
includes,
excludes));
includes.insert(teamA.begin(), teamA.end());
// Move range [TestKeyB, TestKeyC) to sh1, on the same server.
includes.insert(teamA.begin(), teamA.end());
state std::vector<UID> teamB = wait(self->moveShard(self,
cx,
UID(sh1, deterministicRandom()->randomUInt64()),

View File

@ -22,8 +22,8 @@
#include "fdbclient/NativeAPI.actor.h"
#include "fdbclient/TenantManagement.actor.h"
#include "fdbserver/Knobs.h"
#include "fdbserver/TenantEntryCache.actor.h"
#include "fdbclient/Knobs.h"
#include "fdbclient/TenantEntryCache.actor.h"
#include "fdbserver/workloads/workloads.actor.h"
#include "flow/Error.h"
@ -220,7 +220,7 @@ struct TenantEntryCacheWorkload : TestWorkload {
ASSERT_GE(cache->numCacheRefreshes(), 1);
int refreshWait =
SERVER_KNOBS->TENANT_CACHE_LIST_REFRESH_INTERVAL * 10; // initial delay + multiple refresh runs
CLIENT_KNOBS->TENANT_ENTRY_CACHE_LIST_REFRESH_INTERVAL * 10; // initial delay + multiple refresh runs
wait(delay(refreshWait));
// InitRefresh + multiple timer based invocations
@ -277,7 +277,7 @@ struct TenantEntryCacheWorkload : TestWorkload {
Future<Void> setup(Database const& cx) override {
if (clientId == 0 && g_network->isSimulated() && BUGGIFY) {
IKnobCollection::getMutableGlobalKnobCollection().setKnob("tenant_cache_list_refresh_interval",
IKnobCollection::getMutableGlobalKnobCollection().setKnob("tenant_entry_cache_list_refresh_interval",
KnobValueRef::create(int{ 2 }));
}

View File

@ -96,7 +96,7 @@ struct TenantManagementConcurrencyWorkload : TestWorkload {
Reference<IDatabase> threadSafeHandle =
wait(unsafeThreadFutureToFuture(ThreadSafeDatabase::createFromExistingDatabase(cx)));
MultiVersionApi::api->selectApiVersion(cx->apiVersion);
MultiVersionApi::api->selectApiVersion(cx->apiVersion.version());
self->mvDb = MultiVersionDatabase::debugCreateFromExistingDatabase(threadSafeHandle);
if (self->useMetacluster && self->clientId == 0) {
@ -143,7 +143,7 @@ struct TenantManagementConcurrencyWorkload : TestWorkload {
if (self->useMetacluster) {
ASSERT(g_simulator.extraDatabases.size() == 1);
auto extraFile = makeReference<ClusterConnectionMemoryRecord>(connectionString);
self->dataDb = Database::createDatabase(extraFile, -1);
self->dataDb = Database::createDatabase(extraFile, ApiVersion::LATEST_VERSION);
} else {
self->dataDb = cx;
}

View File

@ -36,6 +36,7 @@
#include "fdbserver/workloads/TenantConsistency.actor.h"
#include "fdbserver/workloads/workloads.actor.h"
#include "fdbserver/Knobs.h"
#include "flow/ApiVersion.h"
#include "flow/Error.h"
#include "flow/IRandom.h"
#include "flow/ThreadHelper.actor.h"
@ -171,7 +172,7 @@ struct TenantManagementWorkload : TestWorkload {
Reference<IDatabase> threadSafeHandle =
wait(unsafeThreadFutureToFuture(ThreadSafeDatabase::createFromExistingDatabase(cx)));
MultiVersionApi::api->selectApiVersion(cx->apiVersion);
MultiVersionApi::api->selectApiVersion(cx->apiVersion.version());
self->mvDb = MultiVersionDatabase::debugCreateFromExistingDatabase(threadSafeHandle);
if (self->useMetacluster && self->clientId == 0) {
@ -219,7 +220,7 @@ struct TenantManagementWorkload : TestWorkload {
if (self->useMetacluster) {
ASSERT(g_simulator.extraDatabases.size() == 1);
auto extraFile = makeReference<ClusterConnectionMemoryRecord>(g_simulator.extraDatabases[0]);
self->dataDb = Database::createDatabase(extraFile, -1);
self->dataDb = Database::createDatabase(extraFile, ApiVersion::LATEST_VERSION);
} else {
self->dataDb = cx;
}

View File

@ -149,7 +149,7 @@ struct ThreadSafetyWorkload : TestWorkload {
self->db = dbRef;
if (deterministicRandom()->coinflip()) {
MultiVersionApi::api->selectApiVersion(cx->apiVersion);
MultiVersionApi::api->selectApiVersion(cx->apiVersion.version());
self->db = MultiVersionDatabase::debugCreateFromExistingDatabase(dbRef);
}

View File

@ -25,6 +25,7 @@
#include "fdbserver/workloads/BulkSetup.actor.h"
#include "fdbclient/ReadYourWrites.h"
#include "fdbserver/workloads/workloads.actor.h"
#include "flow/ApiVersion.h"
#include "flow/actorcompiler.h" // This must be the last #include.
struct VersionStampWorkload : TestWorkload {
@ -72,13 +73,13 @@ struct VersionStampWorkload : TestWorkload {
} else if (choice < 0.3) {
apiVersion = 520;
} else {
apiVersion = Database::API_VERSION_LATEST;
apiVersion = ApiVersion::LATEST_VERSION;
}
TraceEvent("VersionStampApiVersion").detail("ApiVersion", apiVersion);
allowMetadataVersionKey = apiVersion >= 610 || apiVersion == Database::API_VERSION_LATEST;
allowMetadataVersionKey = apiVersion >= 610 || apiVersion == ApiVersion::LATEST_VERSION;
cx->apiVersion = apiVersion;
cx->apiVersion = ApiVersion(apiVersion);
if (clientId == 0)
return _start(cx, this, 1 / transactionsPerSecond);
return Void();
@ -158,7 +159,7 @@ struct VersionStampWorkload : TestWorkload {
ASSERT(g_simulator.extraDatabases.size() == 1);
auto extraFile =
makeReference<ClusterConnectionMemoryRecord>(ClusterConnectionString(g_simulator.extraDatabases[0]));
cx = Database::createDatabase(extraFile, -1);
cx = Database::createDatabase(extraFile, ApiVersion::LATEST_VERSION);
}
state ReadYourWritesTransaction tr(cx);
// We specifically wish to grab the smalles read version that we can get and maintain it, to
@ -318,7 +319,7 @@ struct VersionStampWorkload : TestWorkload {
ASSERT(g_simulator.extraDatabases.size() == 1);
auto extraFile =
makeReference<ClusterConnectionMemoryRecord>(ClusterConnectionString(g_simulator.extraDatabases[0]));
extraDB = Database::createDatabase(extraFile, -1);
extraDB = Database::createDatabase(extraFile, ApiVersion::LATEST_VERSION);
}
state Future<Void> metadataWatch = Void();

Some files were not shown because too many files have changed in this diff Show More