Merge remote-tracking branch 'origin/main' into features/private-request-streams

This commit is contained in:
Markus Pilman 2022-03-15 17:17:47 +01:00
commit 117ee637db
87 changed files with 3107 additions and 1060 deletions

View File

@ -281,6 +281,16 @@ fdb_error_t fdb_future_get_keyvalue_array_v13(FDBFuture* f, FDBKeyValue const**
*out_count = rrr.size(););
}
extern "C" DLLEXPORT fdb_error_t fdb_future_get_mappedkeyvalue_array(FDBFuture* f,
FDBMappedKeyValue const** out_kvm,
int* out_count,
fdb_bool_t* out_more) {
CATCH_AND_RETURN(Standalone<MappedRangeResultRef> rrr = TSAV(Standalone<MappedRangeResultRef>, f)->get();
*out_kvm = (FDBMappedKeyValue*)rrr.begin();
*out_count = rrr.size();
*out_more = rrr.more;);
}
extern "C" DLLEXPORT fdb_error_t fdb_future_get_string_array(FDBFuture* f, const char*** out_strings, int* out_count) {
CATCH_AND_RETURN(Standalone<VectorRef<const char*>> na = TSAV(Standalone<VectorRef<const char*>>, f)->get();
*out_strings = (const char**)na.begin();
@ -571,29 +581,29 @@ FDBFuture* fdb_transaction_get_range_impl(FDBTransaction* tr,
.extractPtr());
}
FDBFuture* fdb_transaction_get_range_and_flat_map_impl(FDBTransaction* tr,
uint8_t const* begin_key_name,
int begin_key_name_length,
fdb_bool_t begin_or_equal,
int begin_offset,
uint8_t const* end_key_name,
int end_key_name_length,
fdb_bool_t end_or_equal,
int end_offset,
uint8_t const* mapper_name,
int mapper_name_length,
int limit,
int target_bytes,
FDBStreamingMode mode,
int iteration,
fdb_bool_t snapshot,
fdb_bool_t reverse) {
FDBFuture* fdb_transaction_get_mapped_range_impl(FDBTransaction* tr,
uint8_t const* begin_key_name,
int begin_key_name_length,
fdb_bool_t begin_or_equal,
int begin_offset,
uint8_t const* end_key_name,
int end_key_name_length,
fdb_bool_t end_or_equal,
int end_offset,
uint8_t const* mapper_name,
int mapper_name_length,
int limit,
int target_bytes,
FDBStreamingMode mode,
int iteration,
fdb_bool_t snapshot,
fdb_bool_t reverse) {
FDBFuture* r = validate_and_update_parameters(limit, target_bytes, mode, iteration, reverse);
if (r != nullptr)
return r;
return (
FDBFuture*)(TXN(tr)
->getRangeAndFlatMap(
->getMappedRange(
KeySelectorRef(KeyRef(begin_key_name, begin_key_name_length), begin_or_equal, begin_offset),
KeySelectorRef(KeyRef(end_key_name, end_key_name_length), end_or_equal, end_offset),
StringRef(mapper_name, mapper_name_length),
@ -604,23 +614,23 @@ FDBFuture* fdb_transaction_get_range_and_flat_map_impl(FDBTransaction* tr,
}
// TODO: Support FDB_API_ADDED in generate_asm.py and then this can be replaced with fdb_api_ptr_unimpl.
FDBFuture* fdb_transaction_get_range_and_flat_map_v699(FDBTransaction* tr,
uint8_t const* begin_key_name,
int begin_key_name_length,
fdb_bool_t begin_or_equal,
int begin_offset,
uint8_t const* end_key_name,
int end_key_name_length,
fdb_bool_t end_or_equal,
int end_offset,
uint8_t const* mapper_name,
int mapper_name_length,
int limit,
int target_bytes,
FDBStreamingMode mode,
int iteration,
fdb_bool_t snapshot,
fdb_bool_t reverse) {
FDBFuture* fdb_transaction_get_mapped_range_v699(FDBTransaction* tr,
uint8_t const* begin_key_name,
int begin_key_name_length,
fdb_bool_t begin_or_equal,
int begin_offset,
uint8_t const* end_key_name,
int end_key_name_length,
fdb_bool_t end_or_equal,
int end_offset,
uint8_t const* mapper_name,
int mapper_name_length,
int limit,
int target_bytes,
FDBStreamingMode mode,
int iteration,
fdb_bool_t snapshot,
fdb_bool_t reverse) {
fprintf(stderr, "UNIMPLEMENTED FDB API FUNCTION\n");
abort();
}
@ -857,7 +867,7 @@ extern "C" DLLEXPORT fdb_error_t fdb_select_api_version_impl(int runtime_version
// WARNING: use caution when implementing removed functions by calling public API functions. This can lead to
// undesired behavior when using the multi-version API. Instead, it is better to have both the removed and public
// functions call an internal implementation function. See fdb_create_database_impl for an example.
FDB_API_CHANGED(fdb_transaction_get_range_and_flat_map, 700);
FDB_API_CHANGED(fdb_transaction_get_mapped_range, 700);
FDB_API_REMOVED(fdb_future_get_version, 620);
FDB_API_REMOVED(fdb_create_cluster, 610);
FDB_API_REMOVED(fdb_cluster_create_database, 610);

View File

@ -113,6 +113,64 @@ typedef struct keyvalue {
int value_length;
} FDBKeyValue;
#endif
#pragma pack(pop)
/* Memory layout of KeySelectorRef. */
typedef struct keyselector {
FDBKey key;
/* orEqual and offset have not be tested in C binding. Just a placeholder. */
fdb_bool_t orEqual;
int offset;
} FDBKeySelector;
/* Memory layout of GetRangeReqAndResultRef. */
typedef struct getrangereqandresult {
FDBKeySelector begin;
FDBKeySelector end;
FDBKeyValue* data;
int m_size, m_capacity;
} FDBGetRangeReqAndResult;
/* Memory layout of MappedKeyValueRef.
Total 112 bytes
- key (12 bytes)
:74:8F:8E:5F:AE:7F:00:00
:4A:00:00:00
- value (12 bytes)
:70:8F:8E:5F:AE:7F:00:00
:00:00:00:00
- begin selector (20 bytes)
:30:8F:8E:5F:AE:7F:00:00
:2D:00:00:00
:00:7F:00:00
:01:00:00:00
- end selector (20 bytes)
:EC:8E:8E:5F:AE:7F:00:00
:2D:00:00:00
:00:2B:3C:60
:01:00:00:00
- vector (16 bytes)
:74:94:8E:5F:AE:7F:00:00
:01:00:00:00
:01:00:00:00
- buffer (32 bytes)
:00:20:D1:61:00:00:00:00
:00:00:00:00:00:00:00:00
:00:00:00:00:00:00:00:00
:01:00:00:00:AE:7F:00:00
*/
typedef struct mappedkeyvalue {
FDBKey key;
FDBKey value;
/* It's complicated to map a std::variant to C. For now we assume the underlying requests are always getRange and
* take the shortcut. */
FDBGetRangeReqAndResult getRange;
unsigned char buffer[32];
} FDBMappedKeyValue;
#pragma pack(push, 4)
typedef struct keyrange {
const uint8_t* begin_key;
int begin_key_length;
@ -176,6 +234,12 @@ DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_future_get_keyvalue_array(FDBFuture
int* out_count,
fdb_bool_t* out_more);
#endif
DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_future_get_mappedkeyvalue_array(FDBFuture* f,
FDBMappedKeyValue const** out_kv,
int* out_count,
fdb_bool_t* out_more);
DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_future_get_key_array(FDBFuture* f,
FDBKey const** out_key_array,
int* out_count);
@ -283,23 +347,23 @@ DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_get_range(FDBTransaction
fdb_bool_t reverse);
#endif
DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_get_range_and_flat_map(FDBTransaction* tr,
uint8_t const* begin_key_name,
int begin_key_name_length,
fdb_bool_t begin_or_equal,
int begin_offset,
uint8_t const* end_key_name,
int end_key_name_length,
fdb_bool_t end_or_equal,
int end_offset,
uint8_t const* mapper_name,
int mapper_name_length,
int limit,
int target_bytes,
FDBStreamingMode mode,
int iteration,
fdb_bool_t snapshot,
fdb_bool_t reverse);
DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_get_mapped_range(FDBTransaction* tr,
uint8_t const* begin_key_name,
int begin_key_name_length,
fdb_bool_t begin_or_equal,
int begin_offset,
uint8_t const* end_key_name,
int end_key_name_length,
fdb_bool_t end_or_equal,
int end_offset,
uint8_t const* mapper_name,
int mapper_name_length,
int limit,
int target_bytes,
FDBStreamingMode mode,
int iteration,
fdb_bool_t snapshot,
fdb_bool_t reverse);
DLLEXPORT void fdb_transaction_set(FDBTransaction* tr,
uint8_t const* key_name,

View File

@ -90,6 +90,14 @@ void Future::cancel() {
return fdb_future_get_keyvalue_array(future_, out_kv, out_count, out_more);
}
// MappedKeyValueArrayFuture
[[nodiscard]] fdb_error_t MappedKeyValueArrayFuture::get(const FDBMappedKeyValue** out_kv,
int* out_count,
fdb_bool_t* out_more) {
return fdb_future_get_mappedkeyvalue_array(future_, out_kv, out_count, out_more);
}
// Result
Result::~Result() {
@ -210,7 +218,7 @@ KeyValueArrayFuture Transaction::get_range(const uint8_t* begin_key_name,
reverse));
}
KeyValueArrayFuture Transaction::get_range_and_flat_map(const uint8_t* begin_key_name,
MappedKeyValueArrayFuture Transaction::get_mapped_range(const uint8_t* begin_key_name,
int begin_key_name_length,
fdb_bool_t begin_or_equal,
int begin_offset,
@ -226,7 +234,7 @@ KeyValueArrayFuture Transaction::get_range_and_flat_map(const uint8_t* begin_key
int iteration,
fdb_bool_t snapshot,
fdb_bool_t reverse) {
return KeyValueArrayFuture(fdb_transaction_get_range_and_flat_map(tr_,
return MappedKeyValueArrayFuture(fdb_transaction_get_mapped_range(tr_,
begin_key_name,
begin_key_name_length,
begin_or_equal,

View File

@ -135,6 +135,18 @@ private:
KeyValueArrayFuture(FDBFuture* f) : Future(f) {}
};
class MappedKeyValueArrayFuture : public Future {
public:
// Call this function instead of fdb_future_get_mappedkeyvalue_array when using
// the MappedKeyValueArrayFuture type. Its behavior is identical to
// fdb_future_get_mappedkeyvalue_array.
fdb_error_t get(const FDBMappedKeyValue** out_kv, int* out_count, fdb_bool_t* out_more);
private:
friend class Transaction;
MappedKeyValueArrayFuture(FDBFuture* f) : Future(f) {}
};
class KeyRangeArrayFuture : public Future {
public:
// Call this function instead of fdb_future_get_keyrange_array when using
@ -254,7 +266,7 @@ public:
// WARNING: This feature is considered experimental at this time. It is only allowed when using snapshot isolation
// AND disabling read-your-writes. Returns a future which will be set to an FDBKeyValue array.
KeyValueArrayFuture get_range_and_flat_map(const uint8_t* begin_key_name,
MappedKeyValueArrayFuture get_mapped_range(const uint8_t* begin_key_name,
int begin_key_name_length,
fdb_bool_t begin_or_equal,
int begin_offset,

View File

@ -173,6 +173,20 @@ struct GetRangeResult {
fdb_error_t err;
};
struct GetMappedRangeResult {
std::vector<std::tuple<std::string, // key
std::string, // value
std::string, // begin
std::string, // end
std::vector<std::pair<std::string, std::string>> // range results
>>
mkvs;
// True if values remain in the key range requested.
bool more;
// Set to a non-zero value if an error occurred during the transaction.
fdb_error_t err;
};
// Helper function to get a range of kv pairs. Returns a GetRangeResult struct
// containing the results of the range read. Caller is responsible for checking
// error on failure and retrying if necessary.
@ -225,7 +239,11 @@ GetRangeResult get_range(fdb::Transaction& tr,
return GetRangeResult{ results, out_more != 0, 0 };
}
GetRangeResult get_range_and_flat_map(fdb::Transaction& tr,
static inline std::string extractString(FDBKey key) {
return std::string((const char*)key.key, key.key_length);
}
GetMappedRangeResult get_mapped_range(fdb::Transaction& tr,
const uint8_t* begin_key_name,
int begin_key_name_length,
fdb_bool_t begin_or_equal,
@ -242,7 +260,7 @@ GetRangeResult get_range_and_flat_map(fdb::Transaction& tr,
int iteration,
fdb_bool_t snapshot,
fdb_bool_t reverse) {
fdb::KeyValueArrayFuture f1 = tr.get_range_and_flat_map(begin_key_name,
fdb::MappedKeyValueArrayFuture f1 = tr.get_mapped_range(begin_key_name,
begin_key_name_length,
begin_or_equal,
begin_offset,
@ -261,21 +279,41 @@ GetRangeResult get_range_and_flat_map(fdb::Transaction& tr,
fdb_error_t err = wait_future(f1);
if (err) {
return GetRangeResult{ {}, false, err };
return GetMappedRangeResult{ {}, false, err };
}
const FDBKeyValue* out_kv;
const FDBMappedKeyValue* out_mkv;
int out_count;
fdb_bool_t out_more;
fdb_check(f1.get(&out_kv, &out_count, &out_more));
std::vector<std::pair<std::string, std::string>> results;
fdb_check(f1.get(&out_mkv, &out_count, &out_more));
GetMappedRangeResult result;
result.more = (out_more != 0);
result.err = 0;
// std::cout << "out_count:" << out_count << " out_more:" << out_more << " out_mkv:" << (void*)out_mkv <<
// std::endl;
for (int i = 0; i < out_count; ++i) {
std::string key((const char*)out_kv[i].key, out_kv[i].key_length);
std::string value((const char*)out_kv[i].value, out_kv[i].value_length);
results.emplace_back(key, value);
FDBMappedKeyValue mkv = out_mkv[i];
auto key = extractString(mkv.key);
auto value = extractString(mkv.value);
auto begin = extractString(mkv.getRange.begin.key);
auto end = extractString(mkv.getRange.end.key);
// std::cout << "key:" << key << " value:" << value << " begin:" << begin << " end:" << end << std::endl;
std::vector<std::pair<std::string, std::string>> range_results;
for (int i = 0; i < mkv.getRange.m_size; ++i) {
const auto& kv = mkv.getRange.data[i];
std::string k((const char*)kv.key, kv.key_length);
std::string v((const char*)kv.value, kv.value_length);
range_results.emplace_back(k, v);
// std::cout << "[" << i << "]" << k << " -> " << v << std::endl;
}
result.mkvs.emplace_back(key, value, begin, end, range_results);
}
return GetRangeResult{ results, out_more != 0, 0 };
return result;
}
// Clears all data in the database.
@ -888,32 +926,35 @@ static Value dataOfRecord(const int i) {
static std::string indexEntryKey(const int i) {
return Tuple().append(StringRef(prefix)).append(INDEX).append(indexKey(i)).append(primaryKey(i)).pack().toString();
}
static std::string recordKey(const int i) {
return Tuple().append(prefix).append(RECORD).append(primaryKey(i)).pack().toString();
static std::string recordKey(const int i, const int split) {
return Tuple().append(prefix).append(RECORD).append(primaryKey(i)).append(split).pack().toString();
}
static std::string recordValue(const int i) {
return Tuple().append(dataOfRecord(i)).pack().toString();
static std::string recordValue(const int i, const int split) {
return Tuple().append(dataOfRecord(i)).append(split).pack().toString();
}
const static int SPLIT_SIZE = 3;
std::map<std::string, std::string> fillInRecords(int n) {
// Note: The user requested `prefix` should be added as the first element of the tuple that forms the key, rather
// than the prefix of the key. So we don't use key() or create_data() in this test.
std::map<std::string, std::string> data;
for (int i = 0; i < n; i++) {
data[indexEntryKey(i)] = EMPTY;
data[recordKey(i)] = recordValue(i);
for (int split = 0; split < SPLIT_SIZE; split++) {
data[recordKey(i, split)] = recordValue(i, split);
}
}
insert_data(db, data);
return data;
}
GetRangeResult getIndexEntriesAndMap(int beginId, int endId, fdb::Transaction& tr) {
GetMappedRangeResult getMappedIndexEntries(int beginId, int endId, fdb::Transaction& tr) {
std::string indexEntryKeyBegin = indexEntryKey(beginId);
std::string indexEntryKeyEnd = indexEntryKey(endId);
std::string mapper = Tuple().append(prefix).append(RECORD).append("{K[3]}"_sr).pack().toString();
std::string mapper = Tuple().append(prefix).append(RECORD).append("{K[3]}"_sr).append("{...}"_sr).pack().toString();
return get_range_and_flat_map(
return get_mapped_range(
tr,
FDB_KEYSEL_FIRST_GREATER_OR_EQUAL((const uint8_t*)indexEntryKeyBegin.c_str(), indexEntryKeyBegin.size()),
FDB_KEYSEL_FIRST_GREATER_OR_EQUAL((const uint8_t*)indexEntryKeyEnd.c_str(), indexEntryKeyEnd.size()),
@ -923,20 +964,20 @@ GetRangeResult getIndexEntriesAndMap(int beginId, int endId, fdb::Transaction& t
/* target_bytes */ 0,
/* FDBStreamingMode */ FDB_STREAMING_MODE_WANT_ALL,
/* iteration */ 0,
/* snapshot */ true,
/* snapshot */ false,
/* reverse */ 0);
}
TEST_CASE("fdb_transaction_get_range_and_flat_map") {
fillInRecords(20);
TEST_CASE("fdb_transaction_get_mapped_range") {
const int TOTAL_RECORDS = 20;
fillInRecords(TOTAL_RECORDS);
fdb::Transaction tr(db);
// get_range_and_flat_map is only support without RYW. This is a must!!!
fdb_check(tr.set_option(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE, nullptr, 0));
// RYW should be enabled.
while (1) {
int beginId = 1;
int endId = 19;
auto result = getIndexEntriesAndMap(beginId, endId, tr);
auto result = getMappedIndexEntries(beginId, endId, tr);
if (result.err) {
fdb::EmptyFuture f1 = tr.on_error(result.err);
@ -945,32 +986,30 @@ TEST_CASE("fdb_transaction_get_range_and_flat_map") {
}
int expectSize = endId - beginId;
CHECK(result.kvs.size() == expectSize);
CHECK(result.mkvs.size() == expectSize);
CHECK(!result.more);
int id = beginId;
for (int i = 0; i < result.kvs.size(); i++, id++) {
const auto& [key, value] = result.kvs[i];
CHECK(recordKey(id).compare(key) == 0);
CHECK(recordValue(id).compare(value) == 0);
for (int i = 0; i < expectSize; i++, id++) {
const auto& [key, value, begin, end, range_results] = result.mkvs[i];
CHECK(indexEntryKey(id).compare(key) == 0);
CHECK(EMPTY.compare(value) == 0);
CHECK(range_results.size() == SPLIT_SIZE);
for (int split = 0; split < SPLIT_SIZE; split++) {
auto& [k, v] = range_results[split];
CHECK(recordKey(id, split).compare(k) == 0);
CHECK(recordValue(id, split).compare(v) == 0);
}
}
break;
}
}
TEST_CASE("fdb_transaction_get_range_and_flat_map get_key_values_and_map_has_more") {
fillInRecords(2000);
fdb::Transaction tr(db);
fdb_check(tr.set_option(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE, nullptr, 0));
auto result = getIndexEntriesAndMap(100, 1900, tr);
CHECK(result.err == error_code_get_key_values_and_map_has_more);
}
TEST_CASE("fdb_transaction_get_range_and_flat_map_restricted_to_snapshot") {
TEST_CASE("fdb_transaction_get_mapped_range_restricted_to_serializable") {
std::string mapper = Tuple().append(prefix).append(RECORD).append("{K[3]}"_sr).pack().toString();
fdb::Transaction tr(db);
fdb_check(tr.set_option(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE, nullptr, 0));
auto result = get_range_and_flat_map(
auto result = get_mapped_range(
tr,
FDB_KEYSEL_FIRST_GREATER_OR_EQUAL((const uint8_t*)indexEntryKey(0).c_str(), indexEntryKey(0).size()),
FDB_KEYSEL_FIRST_GREATER_THAN((const uint8_t*)indexEntryKey(1).c_str(), indexEntryKey(1).size()),
@ -980,16 +1019,16 @@ TEST_CASE("fdb_transaction_get_range_and_flat_map_restricted_to_snapshot") {
/* target_bytes */ 0,
/* FDBStreamingMode */ FDB_STREAMING_MODE_WANT_ALL,
/* iteration */ 0,
/* snapshot */ false, // Set snapshot to false
/* snapshot */ true, // Set snapshot to true
/* reverse */ 0);
ASSERT(result.err == error_code_client_invalid_operation);
ASSERT(result.err == error_code_unsupported_operation);
}
TEST_CASE("fdb_transaction_get_range_and_flat_map_restricted_to_ryw_disable") {
TEST_CASE("fdb_transaction_get_mapped_range_restricted_to_ryw_enable") {
std::string mapper = Tuple().append(prefix).append(RECORD).append("{K[3]}"_sr).pack().toString();
fdb::Transaction tr(db);
// Not set FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE.
auto result = get_range_and_flat_map(
fdb_check(tr.set_option(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE, nullptr, 0)); // Not disable RYW
auto result = get_mapped_range(
tr,
FDB_KEYSEL_FIRST_GREATER_OR_EQUAL((const uint8_t*)indexEntryKey(0).c_str(), indexEntryKey(0).size()),
FDB_KEYSEL_FIRST_GREATER_THAN((const uint8_t*)indexEntryKey(1).c_str(), indexEntryKey(1).size()),
@ -1001,7 +1040,7 @@ TEST_CASE("fdb_transaction_get_range_and_flat_map_restricted_to_ryw_disable") {
/* iteration */ 0,
/* snapshot */ true,
/* reverse */ 0);
ASSERT(result.err == error_code_client_invalid_operation);
ASSERT(result.err == error_code_unsupported_operation);
}
TEST_CASE("fdb_transaction_get_range reverse") {

View File

@ -1,5 +1,5 @@
/*
* workloads.h
* SimpleWorkload.cpp
*
* This source file is part of the FoundationDB open source project
*

View File

@ -1,5 +1,5 @@
/*
* workloads.h
* workloads.cpp
*
* This source file is part of the FoundationDB open source project
*

View File

@ -27,6 +27,8 @@ set(JAVA_BINDING_SRCS
src/main/com/apple/foundationdb/directory/package-info.java
src/main/com/apple/foundationdb/directory/PathUtil.java
src/main/com/apple/foundationdb/DirectBufferIterator.java
src/main/com/apple/foundationdb/RangeResultDirectBufferIterator.java
src/main/com/apple/foundationdb/MappedRangeResultDirectBufferIterator.java
src/main/com/apple/foundationdb/DirectBufferPool.java
src/main/com/apple/foundationdb/FDB.java
src/main/com/apple/foundationdb/FDBDatabase.java
@ -36,11 +38,13 @@ set(JAVA_BINDING_SRCS
src/main/com/apple/foundationdb/FutureKeyArray.java
src/main/com/apple/foundationdb/FutureResult.java
src/main/com/apple/foundationdb/FutureResults.java
src/main/com/apple/foundationdb/FutureMappedResults.java
src/main/com/apple/foundationdb/FutureStrings.java
src/main/com/apple/foundationdb/FutureVoid.java
src/main/com/apple/foundationdb/JNIUtil.java
src/main/com/apple/foundationdb/KeySelector.java
src/main/com/apple/foundationdb/KeyValue.java
src/main/com/apple/foundationdb/MappedKeyValue.java
src/main/com/apple/foundationdb/LocalityUtil.java
src/main/com/apple/foundationdb/NativeFuture.java
src/main/com/apple/foundationdb/NativeObjectWrapper.java
@ -49,9 +53,12 @@ set(JAVA_BINDING_SRCS
src/main/com/apple/foundationdb/package-info.java
src/main/com/apple/foundationdb/Range.java
src/main/com/apple/foundationdb/RangeQuery.java
src/main/com/apple/foundationdb/MappedRangeQuery.java
src/main/com/apple/foundationdb/KeyArrayResult.java
src/main/com/apple/foundationdb/RangeResult.java
src/main/com/apple/foundationdb/MappedRangeResult.java
src/main/com/apple/foundationdb/RangeResultInfo.java
src/main/com/apple/foundationdb/MappedRangeResultInfo.java
src/main/com/apple/foundationdb/RangeResultSummary.java
src/main/com/apple/foundationdb/ReadTransaction.java
src/main/com/apple/foundationdb/ReadTransactionContext.java

View File

@ -20,6 +20,7 @@
#include <jni.h>
#include <string.h>
#include <functional>
#include "com_apple_foundationdb_FDB.h"
#include "com_apple_foundationdb_FDBDatabase.h"
@ -50,10 +51,14 @@ static thread_local jmethodID g_IFutureCallback_call_methodID = JNI_NULL;
static thread_local bool is_external = false;
static jclass range_result_summary_class;
static jclass range_result_class;
static jclass mapped_range_result_class;
static jclass mapped_key_value_class;
static jclass string_class;
static jclass key_array_result_class;
static jmethodID key_array_result_init;
static jmethodID range_result_init;
static jmethodID mapped_range_result_init;
static jmethodID mapped_key_value_from_bytes;
static jmethodID range_result_summary_init;
void detachIfExternalThread(void* ignore) {
@ -478,6 +483,127 @@ JNIEXPORT jobject JNICALL Java_com_apple_foundationdb_FutureResults_FutureResult
return result;
}
class ExecuteOnLeave {
std::function<void()> func;
public:
explicit ExecuteOnLeave(std::function<void()> func) : func(func) {}
~ExecuteOnLeave() { func(); }
};
void cpBytesAndLengthInner(uint8_t*& pByte, jint*& pLength, const uint8_t* data, const int& length) {
*pLength = length;
pLength++;
memcpy(pByte, data, length);
pByte += length;
}
void cpBytesAndLength(uint8_t*& pByte, jint*& pLength, const FDBKey& key) {
cpBytesAndLengthInner(pByte, pLength, key.key, key.key_length);
}
JNIEXPORT jobject JNICALL Java_com_apple_foundationdb_FutureMappedResults_FutureMappedResults_1get(JNIEnv* jenv,
jobject,
jlong future) {
if (!future) {
throwParamNotNull(jenv);
return JNI_NULL;
}
FDBFuture* f = (FDBFuture*)future;
const FDBMappedKeyValue* kvms;
int count;
fdb_bool_t more;
fdb_error_t err = fdb_future_get_mappedkeyvalue_array(f, &kvms, &count, &more);
if (err) {
safeThrow(jenv, getThrowable(jenv, err));
return JNI_NULL;
}
jobjectArray mrr_values = jenv->NewObjectArray(count, mapped_key_value_class, NULL);
if (!mrr_values) {
if (!jenv->ExceptionOccurred())
throwOutOfMem(jenv);
return JNI_NULL;
}
for (int i = 0; i < count; i++) {
FDBMappedKeyValue kvm = kvms[i];
int kvm_count = kvm.getRange.m_size;
const int totalLengths = 4 + kvm_count * 2;
int totalBytes = kvm.key.key_length + kvm.value.key_length + kvm.getRange.begin.key.key_length +
kvm.getRange.end.key.key_length;
for (int i = 0; i < kvm_count; i++) {
auto kv = kvm.getRange.data[i];
totalBytes += kv.key_length + kv.value_length;
}
jbyteArray bytesArray = jenv->NewByteArray(totalBytes);
if (!bytesArray) {
if (!jenv->ExceptionOccurred())
throwOutOfMem(jenv);
return JNI_NULL;
}
jintArray lengthArray = jenv->NewIntArray(totalLengths);
if (!lengthArray) {
if (!jenv->ExceptionOccurred())
throwOutOfMem(jenv);
return JNI_NULL;
}
uint8_t* bytes_barr = (uint8_t*)jenv->GetByteArrayElements(bytesArray, JNI_NULL);
if (!bytes_barr) {
throwRuntimeEx(jenv, "Error getting handle to native resources");
return JNI_NULL;
}
{
ExecuteOnLeave e([&]() { jenv->ReleaseByteArrayElements(bytesArray, (jbyte*)bytes_barr, 0); });
jint* length_barr = jenv->GetIntArrayElements(lengthArray, JNI_NULL);
if (!length_barr) {
if (!jenv->ExceptionOccurred())
throwOutOfMem(jenv);
return JNI_NULL;
}
{
ExecuteOnLeave e([&]() { jenv->ReleaseIntArrayElements(lengthArray, length_barr, 0); });
uint8_t* pByte = bytes_barr;
jint* pLength = length_barr;
cpBytesAndLength(pByte, pLength, kvm.key);
cpBytesAndLength(pByte, pLength, kvm.value);
cpBytesAndLength(pByte, pLength, kvm.getRange.begin.key);
cpBytesAndLength(pByte, pLength, kvm.getRange.end.key);
for (int kvm_i = 0; kvm_i < kvm_count; kvm_i++) {
auto kv = kvm.getRange.data[kvm_i];
cpBytesAndLengthInner(pByte, pLength, kv.key, kv.key_length);
cpBytesAndLengthInner(pByte, pLength, kv.value, kv.value_length);
}
}
}
// After native arrays are released
jobject mkv = jenv->CallStaticObjectMethod(
mapped_key_value_class, mapped_key_value_from_bytes, (jbyteArray)bytesArray, (jintArray)lengthArray);
if (jenv->ExceptionOccurred())
return JNI_NULL;
jenv->SetObjectArrayElement(mrr_values, i, mkv);
if (jenv->ExceptionOccurred())
return JNI_NULL;
}
jobject mrr = jenv->NewObject(mapped_range_result_class, mapped_range_result_init, mrr_values, (jboolean)more);
if (jenv->ExceptionOccurred())
return JNI_NULL;
return mrr;
}
// SOMEDAY: explore doing this more efficiently with Direct ByteBuffers
JNIEXPORT jbyteArray JNICALL Java_com_apple_foundationdb_FutureResult_FutureResult_1get(JNIEnv* jenv,
jobject,
@ -767,23 +893,22 @@ JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1
return (jlong)f;
}
JNIEXPORT jlong JNICALL
Java_com_apple_foundationdb_FDBTransaction_Transaction_1getRangeAndFlatMap(JNIEnv* jenv,
jobject,
jlong tPtr,
jbyteArray keyBeginBytes,
jboolean orEqualBegin,
jint offsetBegin,
jbyteArray keyEndBytes,
jboolean orEqualEnd,
jint offsetEnd,
jbyteArray mapperBytes,
jint rowLimit,
jint targetBytes,
jint streamingMode,
jint iteration,
jboolean snapshot,
jboolean reverse) {
JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1getMappedRange(JNIEnv* jenv,
jobject,
jlong tPtr,
jbyteArray keyBeginBytes,
jboolean orEqualBegin,
jint offsetBegin,
jbyteArray keyEndBytes,
jboolean orEqualEnd,
jint offsetEnd,
jbyteArray mapperBytes,
jint rowLimit,
jint targetBytes,
jint streamingMode,
jint iteration,
jboolean snapshot,
jboolean reverse) {
if (!tPtr || !keyBeginBytes || !keyEndBytes || !mapperBytes) {
throwParamNotNull(jenv);
return 0;
@ -814,23 +939,23 @@ Java_com_apple_foundationdb_FDBTransaction_Transaction_1getRangeAndFlatMap(JNIEn
return 0;
}
FDBFuture* f = fdb_transaction_get_range_and_flat_map(tr,
barrBegin,
jenv->GetArrayLength(keyBeginBytes),
orEqualBegin,
offsetBegin,
barrEnd,
jenv->GetArrayLength(keyEndBytes),
orEqualEnd,
offsetEnd,
barrMapper,
jenv->GetArrayLength(mapperBytes),
rowLimit,
targetBytes,
(FDBStreamingMode)streamingMode,
iteration,
snapshot,
reverse);
FDBFuture* f = fdb_transaction_get_mapped_range(tr,
barrBegin,
jenv->GetArrayLength(keyBeginBytes),
orEqualBegin,
offsetBegin,
barrEnd,
jenv->GetArrayLength(keyEndBytes),
orEqualEnd,
offsetEnd,
barrMapper,
jenv->GetArrayLength(mapperBytes),
rowLimit,
targetBytes,
(FDBStreamingMode)streamingMode,
iteration,
snapshot,
reverse);
jenv->ReleaseByteArrayElements(keyBeginBytes, (jbyte*)barrBegin, JNI_ABORT);
jenv->ReleaseByteArrayElements(keyEndBytes, (jbyte*)barrEnd, JNI_ABORT);
jenv->ReleaseByteArrayElements(mapperBytes, (jbyte*)barrMapper, JNI_ABORT);
@ -842,7 +967,6 @@ JNIEXPORT void JNICALL Java_com_apple_foundationdb_FutureResults_FutureResults_1
jlong future,
jobject jbuffer,
jint bufferCapacity) {
if (!future) {
throwParamNotNull(jenv);
return;
@ -902,6 +1026,92 @@ JNIEXPORT void JNICALL Java_com_apple_foundationdb_FutureResults_FutureResults_1
}
}
void memcpyStringInner(uint8_t* buffer, int& offset, const uint8_t* data, const int& length) {
memcpy(buffer + offset, &length, sizeof(jint));
offset += sizeof(jint);
memcpy(buffer + offset, data, length);
offset += length;
}
void memcpyString(uint8_t* buffer, int& offset, const FDBKey& key) {
memcpyStringInner(buffer, offset, key.key, key.key_length);
}
JNIEXPORT void JNICALL
Java_com_apple_foundationdb_FutureMappedResults_FutureMappedResults_1getDirect(JNIEnv* jenv,
jobject,
jlong future,
jobject jbuffer,
jint bufferCapacity) {
if (!future) {
throwParamNotNull(jenv);
return;
}
uint8_t* buffer = (uint8_t*)jenv->GetDirectBufferAddress(jbuffer);
if (!buffer) {
if (!jenv->ExceptionOccurred())
throwRuntimeEx(jenv, "Error getting handle to native resources");
return;
}
FDBFuture* f = (FDBFuture*)future;
const FDBMappedKeyValue* kvms;
int count;
fdb_bool_t more;
fdb_error_t err = fdb_future_get_mappedkeyvalue_array(f, &kvms, &count, &more);
if (err) {
safeThrow(jenv, getThrowable(jenv, err));
return;
}
int totalCapacityNeeded = 2 * sizeof(jint);
for (int i = 0; i < count; i++) {
const FDBMappedKeyValue& kvm = kvms[i];
totalCapacityNeeded += kvm.key.key_length + kvm.value.key_length + kvm.getRange.begin.key.key_length +
kvm.getRange.end.key.key_length +
5 * sizeof(jint); // Besides the 4 lengths above, also one for kvm_count.
int kvm_count = kvm.getRange.m_size;
for (int i = 0; i < kvm_count; i++) {
auto kv = kvm.getRange.data[i];
totalCapacityNeeded += kv.key_length + kv.value_length + 2 * sizeof(jint);
}
if (bufferCapacity < totalCapacityNeeded) {
count = i; /* Only fit first `i` K/V pairs */
more = true;
break;
}
}
int offset = 0;
// First copy RangeResultSummary, i.e. [keyCount, more]
memcpy(buffer + offset, &count, sizeof(jint));
offset += sizeof(jint);
memcpy(buffer + offset, &more, sizeof(jint));
offset += sizeof(jint);
for (int i = 0; i < count; i++) {
const FDBMappedKeyValue& kvm = kvms[i];
memcpyString(buffer, offset, kvm.key);
memcpyString(buffer, offset, kvm.value);
memcpyString(buffer, offset, kvm.getRange.begin.key);
memcpyString(buffer, offset, kvm.getRange.end.key);
int kvm_count = kvm.getRange.m_size;
memcpy(buffer + offset, &kvm_count, sizeof(jint));
offset += sizeof(jint);
for (int i = 0; i < kvm_count; i++) {
auto kv = kvm.getRange.data[i];
memcpyStringInner(buffer, offset, kv.key, kv.key_length);
memcpyStringInner(buffer, offset, kv.value, kv.value_length);
}
}
}
JNIEXPORT jlong JNICALL
Java_com_apple_foundationdb_FDBTransaction_Transaction_1getEstimatedRangeSizeBytes(JNIEnv* jenv,
jobject,
@ -1396,6 +1606,16 @@ jint JNI_OnLoad(JavaVM* vm, void* reserved) {
range_result_init = env->GetMethodID(local_range_result_class, "<init>", "([B[IZ)V");
range_result_class = (jclass)(env)->NewGlobalRef(local_range_result_class);
jclass local_mapped_range_result_class = env->FindClass("com/apple/foundationdb/MappedRangeResult");
mapped_range_result_init =
env->GetMethodID(local_mapped_range_result_class, "<init>", "([Lcom/apple/foundationdb/MappedKeyValue;Z)V");
mapped_range_result_class = (jclass)(env)->NewGlobalRef(local_mapped_range_result_class);
jclass local_mapped_key_value_class = env->FindClass("com/apple/foundationdb/MappedKeyValue");
mapped_key_value_from_bytes = env->GetStaticMethodID(
local_mapped_key_value_class, "fromBytes", "([B[I)Lcom/apple/foundationdb/MappedKeyValue;");
mapped_key_value_class = (jclass)(env)->NewGlobalRef(local_mapped_key_value_class);
jclass local_key_array_result_class = env->FindClass("com/apple/foundationdb/KeyArrayResult");
key_array_result_init = env->GetMethodID(local_key_array_result_class, "<init>", "([B[I)V");
key_array_result_class = (jclass)(env)->NewGlobalRef(local_key_array_result_class);
@ -1424,6 +1644,12 @@ void JNI_OnUnload(JavaVM* vm, void* reserved) {
if (range_result_class != JNI_NULL) {
env->DeleteGlobalRef(range_result_class);
}
if (mapped_range_result_class != JNI_NULL) {
env->DeleteGlobalRef(mapped_range_result_class);
}
if (mapped_key_value_class != JNI_NULL) {
env->DeleteGlobalRef(mapped_key_value_class);
}
if (string_class != JNI_NULL) {
env->DeleteGlobalRef(string_class);
}

View File

@ -1,5 +1,5 @@
/*
* RangeAndFlatMapQueryIntegrationTest.java
* MappedRangeQueryIntegrationTest.java
*
* This source file is part of the FoundationDB open source project
*
@ -40,7 +40,7 @@ import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
@ExtendWith(RequiresDatabase.class)
class RangeAndFlatMapQueryIntegrationTest {
class MappedRangeQueryIntegrationTest {
private static final FDB fdb = FDB.selectAPIVersion(710);
public String databaseArg = null;
private Database openFDB() { return fdb.open(databaseArg); }
@ -67,16 +67,27 @@ class RangeAndFlatMapQueryIntegrationTest {
static private String indexKey(int i) { return String.format("index-key-of-record-%08d", i); }
static private String dataOfRecord(int i) { return String.format("data-of-record-%08d", i); }
static byte[] MAPPER = Tuple.from(PREFIX, RECORD, "{K[3]}").pack();
static byte[] MAPPER = Tuple.from(PREFIX, RECORD, "{K[3]}", "{...}").pack();
static int SPLIT_SIZE = 3;
static private byte[] indexEntryKey(final int i) {
return Tuple.from(PREFIX, INDEX, indexKey(i), primaryKey(i)).pack();
}
static private byte[] recordKey(final int i) { return Tuple.from(PREFIX, RECORD, primaryKey(i)).pack(); }
static private byte[] recordValue(final int i) { return Tuple.from(dataOfRecord(i)).pack(); }
static private byte[] recordKeyPrefix(final int i) {
return Tuple.from(PREFIX, RECORD, primaryKey(i)).pack();
}
static private byte[] recordKey(final int i, final int split) {
return Tuple.from(PREFIX, RECORD, primaryKey(i), split).pack();
}
static private byte[] recordValue(final int i, final int split) {
return Tuple.from(dataOfRecord(i), split).pack();
}
static private void insertRecordWithIndex(final Transaction tr, final int i) {
tr.set(indexEntryKey(i), EMPTY);
tr.set(recordKey(i), recordValue(i));
for (int split = 0; split < SPLIT_SIZE; split++) {
tr.set(recordKey(i, split), recordValue(i, split));
}
}
private static String getArgFromEnv() {
@ -86,7 +97,7 @@ class RangeAndFlatMapQueryIntegrationTest {
return cluster;
}
public static void main(String[] args) throws Exception {
final RangeAndFlatMapQueryIntegrationTest test = new RangeAndFlatMapQueryIntegrationTest();
final MappedRangeQueryIntegrationTest test = new MappedRangeQueryIntegrationTest();
test.databaseArg = getArgFromEnv();
test.clearDatabase();
test.comparePerformance();
@ -94,21 +105,21 @@ class RangeAndFlatMapQueryIntegrationTest {
}
int numRecords = 10000;
int numQueries = 10000;
int numQueries = 1;
int numRecordsPerQuery = 100;
boolean validate = false;
boolean validate = true;
@Test
void comparePerformance() {
FDB fdb = FDB.selectAPIVersion(710);
try (Database db = openFDB()) {
insertRecordsWithIndexes(numRecords, db);
instrument(rangeQueryAndGet, "rangeQueryAndGet", db);
instrument(rangeQueryAndFlatMap, "rangeQueryAndFlatMap", db);
instrument(rangeQueryAndThenRangeQueries, "rangeQueryAndThenRangeQueries", db);
instrument(mappedRangeQuery, "mappedRangeQuery", db);
}
}
private void instrument(final RangeQueryWithIndex query, final String name, final Database db) {
System.out.printf("Starting %s (numQueries:%d, numRecordsPerQuery:%d)\n", name, numQueries, numRecordsPerQuery);
System.out.printf("Starting %s (numQueries:%d, numRecordsPerQuery:%d, validation:%s)\n", name, numQueries, numRecordsPerQuery, validate ? "on" : "off");
long startTime = System.currentTimeMillis();
for (int queryId = 0; queryId < numQueries; queryId++) {
int begin = ThreadLocalRandom.current().nextInt(numRecords - numRecordsPerQuery);
@ -140,7 +151,7 @@ class RangeAndFlatMapQueryIntegrationTest {
void run(int begin, int end, Database db);
}
RangeQueryWithIndex rangeQueryAndGet = (int begin, int end, Database db) -> db.run(tr -> {
RangeQueryWithIndex rangeQueryAndThenRangeQueries = (int begin, int end, Database db) -> db.run(tr -> {
try {
List<KeyValue> kvs = tr.getRange(KeySelector.firstGreaterOrEqual(indexEntryKey(begin)),
KeySelector.firstGreaterOrEqual(indexEntryKey(end)),
@ -150,22 +161,25 @@ class RangeAndFlatMapQueryIntegrationTest {
Assertions.assertEquals(end - begin, kvs.size());
// Get the records of each index entry IN PARALLEL.
List<CompletableFuture<byte[]>> resultFutures = new ArrayList<>();
List<CompletableFuture<List<KeyValue>>> resultFutures = new ArrayList<>();
// In reality, we need to get the record key by parsing the index entry key. But considering this is a
// performance test, we just ignore the returned key and simply generate it from recordKey.
for (int id = begin; id < end; id++) {
resultFutures.add(tr.get(recordKey(id)));
resultFutures.add(tr.getRange(Range.startsWith(recordKeyPrefix(id)),
ReadTransaction.ROW_LIMIT_UNLIMITED, false, StreamingMode.WANT_ALL).asList());
}
AsyncUtil.whenAll(resultFutures).get();
if (validate) {
final Iterator<KeyValue> indexes = kvs.iterator();
final Iterator<CompletableFuture<byte[]>> records = resultFutures.iterator();
final Iterator<CompletableFuture<List<KeyValue>>> records = resultFutures.iterator();
for (int id = begin; id < end; id++) {
Assertions.assertTrue(indexes.hasNext());
assertByteArrayEquals(indexEntryKey(id), indexes.next().getKey());
Assertions.assertTrue(records.hasNext());
assertByteArrayEquals(recordValue(id), records.next().get());
List<KeyValue> rangeResult = records.next().get();
validateRangeResult(id, rangeResult);
}
Assertions.assertFalse(indexes.hasNext());
Assertions.assertFalse(records.hasNext());
@ -176,23 +190,32 @@ class RangeAndFlatMapQueryIntegrationTest {
return null;
});
RangeQueryWithIndex rangeQueryAndFlatMap = (int begin, int end, Database db) -> db.run(tr -> {
RangeQueryWithIndex mappedRangeQuery = (int begin, int end, Database db) -> db.run(tr -> {
try {
tr.options().setReadYourWritesDisable();
List<KeyValue> kvs =
tr.snapshot()
.getRangeAndFlatMap(KeySelector.firstGreaterOrEqual(indexEntryKey(begin)),
KeySelector.firstGreaterOrEqual(indexEntryKey(end)), MAPPER,
ReadTransaction.ROW_LIMIT_UNLIMITED, false, StreamingMode.WANT_ALL)
List<MappedKeyValue> kvs =
tr.getMappedRange(KeySelector.firstGreaterOrEqual(indexEntryKey(begin)),
KeySelector.firstGreaterOrEqual(indexEntryKey(end)), MAPPER,
ReadTransaction.ROW_LIMIT_UNLIMITED, false, StreamingMode.WANT_ALL)
.asList()
.get();
Assertions.assertEquals(end - begin, kvs.size());
if (validate) {
final Iterator<KeyValue> results = kvs.iterator();
final Iterator<MappedKeyValue> results = kvs.iterator();
for (int id = begin; id < end; id++) {
Assertions.assertTrue(results.hasNext());
assertByteArrayEquals(recordValue(id), results.next().getValue());
MappedKeyValue mappedKeyValue = results.next();
assertByteArrayEquals(indexEntryKey(id), mappedKeyValue.getKey());
assertByteArrayEquals(EMPTY, mappedKeyValue.getValue());
assertByteArrayEquals(indexEntryKey(id), mappedKeyValue.getKey());
byte[] prefix = recordKeyPrefix(id);
assertByteArrayEquals(prefix, mappedKeyValue.getRangeBegin());
prefix[prefix.length - 1] = (byte)0x01;
assertByteArrayEquals(prefix, mappedKeyValue.getRangeEnd());
List<KeyValue> rangeResult = mappedKeyValue.getRangeResult();
validateRangeResult(id, rangeResult);
}
Assertions.assertFalse(results.hasNext());
}
@ -202,55 +225,16 @@ class RangeAndFlatMapQueryIntegrationTest {
return null;
});
void validateRangeResult(int id, List<KeyValue> rangeResult) {
Assertions.assertEquals(rangeResult.size(), SPLIT_SIZE);
for (int split = 0; split < SPLIT_SIZE; split++) {
KeyValue keyValue = rangeResult.get(split);
assertByteArrayEquals(recordKey(id, split), keyValue.getKey());
assertByteArrayEquals(recordValue(id, split), keyValue.getValue());
}
}
void assertByteArrayEquals(byte[] expected, byte[] actual) {
Assertions.assertEquals(ByteArrayUtil.printable(expected), ByteArrayUtil.printable(actual));
}
@Test
void rangeAndFlatMapQueryOverMultipleRows() throws Exception {
try (Database db = openFDB()) {
insertRecordsWithIndexes(3, db);
List<byte[]> expected_data_of_records = new ArrayList<>();
for (int i = 0; i <= 1; i++) {
expected_data_of_records.add(recordValue(i));
}
db.run(tr -> {
// getRangeAndFlatMap is only support without RYW. This is a must!!!
tr.options().setReadYourWritesDisable();
// getRangeAndFlatMap is only supported with snapshot.
Iterator<KeyValue> kvs =
tr.snapshot()
.getRangeAndFlatMap(KeySelector.firstGreaterOrEqual(indexEntryKey(0)),
KeySelector.firstGreaterThan(indexEntryKey(1)), MAPPER,
ReadTransaction.ROW_LIMIT_UNLIMITED, false, StreamingMode.WANT_ALL)
.iterator();
Iterator<byte[]> expected_data_of_records_iter = expected_data_of_records.iterator();
while (expected_data_of_records_iter.hasNext()) {
Assertions.assertTrue(kvs.hasNext(), "iterator ended too early");
KeyValue kv = kvs.next();
byte[] actual_data_of_record = kv.getValue();
byte[] expected_data_of_record = expected_data_of_records_iter.next();
// System.out.println("result key:" + ByteArrayUtil.printable(kv.getKey()) + " value:" +
// ByteArrayUtil.printable(kv.getValue())); Output:
// result
// key:\x02prefix\x00\x02INDEX\x00\x02index-key-of-record-0\x00\x02primary-key-of-record-0\x00
// value:\x02data-of-record-0\x00
// result
// key:\x02prefix\x00\x02INDEX\x00\x02index-key-of-record-1\x00\x02primary-key-of-record-1\x00
// value:\x02data-of-record-1\x00
// For now, we don't guarantee what that the returned keys mean.
Assertions.assertArrayEquals(expected_data_of_record, actual_data_of_record,
"Incorrect data of record!");
}
Assertions.assertFalse(kvs.hasNext(), "Iterator returned too much data");
return null;
});
}
}
}

View File

@ -89,8 +89,6 @@ public class FakeFDBTransaction extends FDBTransaction {
@Override
protected FutureResults getRange_internal(KeySelector begin, KeySelector end,
// TODO: map is not supported in FakeFDBTransaction yet.
byte[] mapper, // Nullable
int rowLimit, int targetBytes, int streamingMode, int iteration,
boolean isSnapshot, boolean reverse) {
numRangeCalls++;

View File

@ -32,11 +32,11 @@ import java.util.NoSuchElementException;
* The serialization format of result is =>
* [int keyCount, boolean more, ListOf<(int keyLen, int valueLen, byte[] key, byte[] value)>]
*/
class DirectBufferIterator implements Iterator<KeyValue>, AutoCloseable {
private ByteBuffer byteBuffer;
private int current = 0;
private int keyCount = -1;
private boolean more = false;
abstract class DirectBufferIterator implements AutoCloseable {
protected ByteBuffer byteBuffer;
protected int current = 0;
protected int keyCount = -1;
protected boolean more = false;
public DirectBufferIterator(ByteBuffer buffer) {
byteBuffer = buffer;
@ -55,31 +55,11 @@ class DirectBufferIterator implements Iterator<KeyValue>, AutoCloseable {
return keyCount > -1;
}
@Override
public boolean hasNext() {
assert (hasResultReady());
return current < keyCount;
}
@Override
public KeyValue next() {
assert (hasResultReady()); // Must be called once its ready.
if (!hasNext()) {
throw new NoSuchElementException();
}
final int keyLen = byteBuffer.getInt();
final int valueLen = byteBuffer.getInt();
byte[] key = new byte[keyLen];
byteBuffer.get(key);
byte[] value = new byte[valueLen];
byteBuffer.get(value);
current += 1;
return new KeyValue(key, value);
}
public ByteBuffer getBuffer() {
return byteBuffer;
}

View File

@ -92,12 +92,10 @@ class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionC
}
@Override
public AsyncIterable<KeyValue> getRangeAndFlatMap(KeySelector begin, KeySelector end, byte[] mapper, int limit,
boolean reverse, StreamingMode mode) {
if (mapper == null) {
throw new IllegalArgumentException("Mapper must be non-null");
}
return new RangeQuery(FDBTransaction.this, true, begin, end, mapper, limit, reverse, mode, eventKeeper);
public AsyncIterable<MappedKeyValue> getMappedRange(KeySelector begin, KeySelector end, byte[] mapper,
int limit, boolean reverse, StreamingMode mode) {
throw new UnsupportedOperationException("getMappedRange is only supported in serializable");
}
///////////////////
@ -348,9 +346,12 @@ class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionC
}
@Override
public AsyncIterable<KeyValue> getRangeAndFlatMap(KeySelector begin, KeySelector end, byte[] mapper, int limit,
boolean reverse, StreamingMode mode) {
throw new UnsupportedOperationException("getRangeAndFlatMap is only supported in snapshot");
public AsyncIterable<MappedKeyValue> getMappedRange(KeySelector begin, KeySelector end, byte[] mapper,
int limit, boolean reverse, StreamingMode mode) {
if (mapper == null) {
throw new IllegalArgumentException("Mapper must be non-null");
}
return new MappedRangeQuery(FDBTransaction.this, false, begin, end, mapper, limit, reverse, mode, eventKeeper);
}
///////////////////
@ -431,7 +432,6 @@ class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionC
// Users of this function must close the returned FutureResults when finished
protected FutureResults getRange_internal(KeySelector begin, KeySelector end,
byte[] mapper, // Nullable
int rowLimit, int targetBytes, int streamingMode, int iteration,
boolean isSnapshot, boolean reverse) {
if (eventKeeper != null) {
@ -443,14 +443,33 @@ class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionC
" -- range get: (%s, %s) limit: %d, bytes: %d, mode: %d, iteration: %d, snap: %s, reverse %s",
begin.toString(), end.toString(), rowLimit, targetBytes, streamingMode,
iteration, Boolean.toString(isSnapshot), Boolean.toString(reverse)));*/
return new FutureResults(
mapper == null
? Transaction_getRange(getPtr(), begin.getKey(), begin.orEqual(), begin.getOffset(), end.getKey(),
end.orEqual(), end.getOffset(), rowLimit, targetBytes, streamingMode,
iteration, isSnapshot, reverse)
: Transaction_getRangeAndFlatMap(getPtr(), begin.getKey(), begin.orEqual(), begin.getOffset(),
end.getKey(), end.orEqual(), end.getOffset(), mapper, rowLimit,
targetBytes, streamingMode, iteration, isSnapshot, reverse),
return new FutureResults(Transaction_getRange(getPtr(), begin.getKey(), begin.orEqual(), begin.getOffset(),
end.getKey(), end.orEqual(), end.getOffset(), rowLimit,
targetBytes, streamingMode, iteration, isSnapshot, reverse),
FDB.instance().isDirectBufferQueriesEnabled(), executor, eventKeeper);
} finally {
pointerReadLock.unlock();
}
}
// Users of this function must close the returned FutureResults when finished
protected FutureMappedResults getMappedRange_internal(KeySelector begin, KeySelector end,
byte[] mapper, // Nullable
int rowLimit, int targetBytes, int streamingMode,
int iteration, boolean isSnapshot, boolean reverse) {
if (eventKeeper != null) {
eventKeeper.increment(Events.JNI_CALL);
}
pointerReadLock.lock();
try {
/*System.out.println(String.format(
" -- range get: (%s, %s) limit: %d, bytes: %d, mode: %d, iteration: %d, snap: %s, reverse %s",
begin.toString(), end.toString(), rowLimit, targetBytes, streamingMode,
iteration, Boolean.toString(isSnapshot), Boolean.toString(reverse)));*/
return new FutureMappedResults(
Transaction_getMappedRange(getPtr(), begin.getKey(), begin.orEqual(), begin.getOffset(),
end.getKey(), end.orEqual(), end.getOffset(), mapper, rowLimit,
targetBytes, streamingMode, iteration, isSnapshot, reverse),
FDB.instance().isDirectBufferQueriesEnabled(), executor, eventKeeper);
} finally {
pointerReadLock.unlock();
@ -790,7 +809,7 @@ class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionC
byte[] keyEnd, boolean orEqualEnd, int offsetEnd,
int rowLimit, int targetBytes, int streamingMode, int iteration,
boolean isSnapshot, boolean reverse);
private native long Transaction_getRangeAndFlatMap(long cPtr, byte[] keyBegin, boolean orEqualBegin,
private native long Transaction_getMappedRange(long cPtr, byte[] keyBegin, boolean orEqualBegin,
int offsetBegin, byte[] keyEnd, boolean orEqualEnd,
int offsetEnd,
byte[] mapper, // Nonnull

View File

@ -0,0 +1,87 @@
/*
* FutureMappedResults.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.apple.foundationdb;
import java.nio.ByteBuffer;
import java.util.concurrent.Executor;
import com.apple.foundationdb.EventKeeper.Events;
class FutureMappedResults extends NativeFuture<MappedRangeResultInfo> {
private final EventKeeper eventKeeper;
FutureMappedResults(long cPtr, boolean enableDirectBufferQueries, Executor executor, EventKeeper eventKeeper) {
super(cPtr);
registerMarshalCallback(executor);
this.enableDirectBufferQueries = enableDirectBufferQueries;
this.eventKeeper = eventKeeper;
}
@Override
protected void postMarshal(MappedRangeResultInfo rri) {
// We can't close because this class actually marshals on-demand
}
@Override
protected MappedRangeResultInfo getIfDone_internal(long cPtr) throws FDBException {
if (eventKeeper != null) {
eventKeeper.increment(Events.JNI_CALL);
}
FDBException err = Future_getError(cPtr);
if (err != null && !err.isSuccess()) {
throw err;
}
return new MappedRangeResultInfo(this);
}
public MappedRangeResult getResults() {
ByteBuffer buffer = enableDirectBufferQueries ? DirectBufferPool.getInstance().poll() : null;
if (buffer != null && eventKeeper != null) {
eventKeeper.increment(Events.RANGE_QUERY_DIRECT_BUFFER_HIT);
eventKeeper.increment(Events.JNI_CALL);
} else if (eventKeeper != null) {
eventKeeper.increment(Events.RANGE_QUERY_DIRECT_BUFFER_MISS);
eventKeeper.increment(Events.JNI_CALL);
}
try {
pointerReadLock.lock();
if (buffer != null) {
try (MappedRangeResultDirectBufferIterator directIterator =
new MappedRangeResultDirectBufferIterator(buffer)) {
FutureMappedResults_getDirect(getPtr(), directIterator.getBuffer(),
directIterator.getBuffer().capacity());
return new MappedRangeResult(directIterator);
}
} else {
return FutureMappedResults_get(getPtr());
}
} finally {
pointerReadLock.unlock();
}
}
private boolean enableDirectBufferQueries = false;
private native MappedRangeResult FutureMappedResults_get(long cPtr) throws FDBException;
private native void FutureMappedResults_getDirect(long cPtr, ByteBuffer buffer, int capacity) throws FDBException;
}

View File

@ -66,7 +66,7 @@ class FutureResults extends NativeFuture<RangeResultInfo> {
try {
pointerReadLock.lock();
if (buffer != null) {
try (DirectBufferIterator directIterator = new DirectBufferIterator(buffer)) {
try (RangeResultDirectBufferIterator directIterator = new RangeResultDirectBufferIterator(buffer)) {
FutureResults_getDirect(getPtr(), directIterator.getBuffer(), directIterator.getBuffer().capacity());
return new RangeResult(directIterator);
}

View File

@ -20,6 +20,8 @@
package com.apple.foundationdb;
import com.apple.foundationdb.tuple.ByteArrayUtil;
import java.util.Arrays;
/**
@ -77,4 +79,13 @@ public class KeyValue {
public int hashCode() {
return 17 + (37 * Arrays.hashCode(key) + Arrays.hashCode(value));
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("KeyValue{");
sb.append("key=").append(ByteArrayUtil.printable(key));
sb.append(", value=").append(ByteArrayUtil.printable(value));
sb.append('}');
return sb.toString();
}
}

View File

@ -0,0 +1,96 @@
/*
* MappedKeyValue.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.apple.foundationdb;
import com.apple.foundationdb.tuple.ByteArrayUtil;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
public class MappedKeyValue extends KeyValue {
private final byte[] rangeBegin;
private final byte[] rangeEnd;
private final List<KeyValue> rangeResult;
MappedKeyValue(byte[] key, byte[] value, byte[] rangeBegin, byte[] rangeEnd, List<KeyValue> rangeResult) {
super(key, value);
this.rangeBegin = rangeBegin;
this.rangeEnd = rangeEnd;
this.rangeResult = rangeResult;
}
public byte[] getRangeBegin() { return rangeBegin; }
public byte[] getRangeEnd() { return rangeEnd; }
public List<KeyValue> getRangeResult() { return rangeResult; }
public static MappedKeyValue fromBytes(byte[] bytes, int[] lengths) {
// Lengths include: key, value, rangeBegin, rangeEnd, count * (underlying key, underlying value)
if (lengths.length < 4) {
throw new IllegalArgumentException("There needs to be at least 4 lengths to cover the metadata");
}
Offset offset = new Offset();
byte[] key = takeBytes(offset, bytes, lengths);
byte[] value = takeBytes(offset, bytes, lengths);
byte[] rangeBegin = takeBytes(offset, bytes, lengths);
byte[] rangeEnd = takeBytes(offset, bytes, lengths);
if ((lengths.length - 4) % 2 != 0) {
throw new IllegalArgumentException("There needs to be an even number of lengths!");
}
int count = (lengths.length - 4) / 2;
List<KeyValue> rangeResult = new ArrayList<>(count);
for (int i = 0; i < count; i++) {
byte[] k = takeBytes(offset, bytes, lengths);
byte[] v = takeBytes(offset, bytes, lengths);
rangeResult.add(new KeyValue(k, v));
}
return new MappedKeyValue(key, value, rangeBegin, rangeEnd, rangeResult);
}
static class Offset {
int bytes = 0;
int lengths = 0;
}
static byte[] takeBytes(Offset offset, byte[] bytes, int[] lengths) {
int len = lengths[offset.lengths];
byte[] b = new byte[len];
System.arraycopy(bytes, offset.bytes, b, 0, len);
offset.lengths++;
offset.bytes += len;
return b;
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("MappedKeyValue{");
sb.append("rangeBegin=").append(ByteArrayUtil.printable(rangeBegin));
sb.append(", rangeEnd=").append(ByteArrayUtil.printable(rangeEnd));
sb.append(", rangeResult=").append(rangeResult);
sb.append('}');
return super.toString() + "->" + sb.toString();
}
}

View File

@ -0,0 +1,333 @@
/*
* RangeQuery.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.apple.foundationdb;
import com.apple.foundationdb.EventKeeper.Events;
import com.apple.foundationdb.async.AsyncIterable;
import com.apple.foundationdb.async.AsyncIterator;
import com.apple.foundationdb.async.AsyncUtil;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.concurrent.CancellationException;
import java.util.concurrent.CompletableFuture;
import java.util.function.BiConsumer;
// TODO: Share code with RangeQuery?
/**
* Represents a query against FoundationDB for a range of keys. The
* result of this query can be iterated over in a blocking fashion with a call to
* {@link #iterator()} (as specified by {@link Iterable}).
* If the calling program uses an asynchronous paradigm, a non-blocking
* {@link AsyncIterator} is returned from {@link #iterator()}. Both of these
* constructions will not begin to query the database until the first call to
* {@code hasNext()}. As the query uses its {@link Transaction} of origin to fetch
* all the data, the use of this query object must not span more than a few seconds.
*
* <br><br><b>NOTE:</b> although resulting {@code Iterator}s do support the {@code remove()}
* operation, the remove is not durable until {@code commit()} on the {@code Transaction}
* that yielded this query returns <code>true</code>.
*/
class MappedRangeQuery implements AsyncIterable<MappedKeyValue> {
private final FDBTransaction tr;
private final KeySelector begin;
private final KeySelector end;
private final byte[] mapper; // Nonnull
private final boolean snapshot;
private final int rowLimit;
private final boolean reverse;
private final StreamingMode streamingMode;
private final EventKeeper eventKeeper;
MappedRangeQuery(FDBTransaction transaction, boolean isSnapshot, KeySelector begin, KeySelector end, byte[] mapper,
int rowLimit, boolean reverse, StreamingMode streamingMode, EventKeeper eventKeeper) {
this.tr = transaction;
this.begin = begin;
this.end = end;
this.mapper = mapper;
this.snapshot = isSnapshot;
this.rowLimit = rowLimit;
this.reverse = reverse;
this.streamingMode = streamingMode;
this.eventKeeper = eventKeeper;
}
/**
* Returns all the results from the range requested as a {@code List}. If there were no
* limits on the original query and there is a large amount of data in the database
* this call could use a very large amount of memory.
*
* @return a {@code CompletableFuture} that will be set to the contents of the database
* constrained by the query parameters.
*/
@Override
public CompletableFuture<List<MappedKeyValue>> asList() {
StreamingMode mode = this.streamingMode;
if (mode == StreamingMode.ITERATOR) mode = (this.rowLimit == 0) ? StreamingMode.WANT_ALL : StreamingMode.EXACT;
// if the streaming mode is EXACT, try and grab things as one chunk
if (mode == StreamingMode.EXACT) {
FutureMappedResults range =
tr.getMappedRange_internal(this.begin, this.end, this.mapper, this.rowLimit, 0,
StreamingMode.EXACT.code(), 1, this.snapshot, this.reverse);
return range.thenApply(result -> result.get().values).whenComplete((result, e) -> range.close());
}
// If the streaming mode is not EXACT, simply collect the results of an
// iteration into a list
return AsyncUtil.collect(
new MappedRangeQuery(tr, snapshot, begin, end, mapper, rowLimit, reverse, mode, eventKeeper),
tr.getExecutor());
}
/**
* Returns an {@code Iterator} over the results of this query against FoundationDB.
*
* @return an {@code Iterator} over type {@code MappedKeyValue}.
*/
@Override
public AsyncRangeIterator iterator() {
return new AsyncRangeIterator(this.rowLimit, this.reverse, this.streamingMode);
}
private class AsyncRangeIterator implements AsyncIterator<MappedKeyValue> {
// immutable aspects of this iterator
private final boolean rowsLimited;
private final boolean reverse;
private final StreamingMode streamingMode;
// There is the chance for parallelism in the two "chunks" for fetched data
private MappedRangeResult chunk = null;
private MappedRangeResult nextChunk = null;
private boolean fetchOutstanding = false;
private byte[] prevKey = null;
private int index = 0;
private int iteration = 0;
private KeySelector begin;
private KeySelector end;
private int rowsRemaining;
private FutureMappedResults fetchingChunk;
private CompletableFuture<Boolean> nextFuture;
private boolean isCancelled = false;
private AsyncRangeIterator(int rowLimit, boolean reverse, StreamingMode streamingMode) {
this.begin = MappedRangeQuery.this.begin;
this.end = MappedRangeQuery.this.end;
this.rowsLimited = rowLimit != 0;
this.rowsRemaining = rowLimit;
this.reverse = reverse;
this.streamingMode = streamingMode;
startNextFetch();
}
private synchronized boolean mainChunkIsTheLast() { return !chunk.more || (rowsLimited && rowsRemaining < 1); }
class FetchComplete implements BiConsumer<MappedRangeResultInfo, Throwable> {
final FutureMappedResults fetchingChunk;
final CompletableFuture<Boolean> promise;
FetchComplete(FutureMappedResults fetch, CompletableFuture<Boolean> promise) {
this.fetchingChunk = fetch;
this.promise = promise;
}
@Override
public void accept(MappedRangeResultInfo data, Throwable error) {
try {
if (error != null) {
if (eventKeeper != null) {
eventKeeper.increment(Events.RANGE_QUERY_CHUNK_FAILED);
}
promise.completeExceptionally(error);
if (error instanceof Error) {
throw(Error) error;
}
return;
}
final MappedRangeResult rangeResult = data.get();
final RangeResultSummary summary = rangeResult.getSummary();
if (summary.lastKey == null) {
promise.complete(Boolean.FALSE);
return;
}
synchronized (MappedRangeQuery.AsyncRangeIterator.this) {
fetchOutstanding = false;
// adjust the total number of rows we should ever fetch
rowsRemaining -= summary.keyCount;
// set up the next fetch
if (reverse) {
end = KeySelector.firstGreaterOrEqual(summary.lastKey);
} else {
begin = KeySelector.firstGreaterThan(summary.lastKey);
}
// If this is the first fetch or the main chunk is exhausted
if (chunk == null || index == chunk.values.size()) {
nextChunk = null;
chunk = rangeResult;
index = 0;
} else {
nextChunk = rangeResult;
}
}
promise.complete(Boolean.TRUE);
} finally {
fetchingChunk.close();
}
}
}
private synchronized void startNextFetch() {
if (fetchOutstanding)
throw new IllegalStateException("Reentrant call not allowed"); // This can not be called reentrantly
if (isCancelled) return;
if (chunk != null && mainChunkIsTheLast()) return;
fetchOutstanding = true;
nextChunk = null;
nextFuture = new CompletableFuture<>();
final long sTime = System.nanoTime();
fetchingChunk = tr.getMappedRange_internal(begin, end, mapper, rowsLimited ? rowsRemaining : 0, 0,
streamingMode.code(), ++iteration, snapshot, reverse);
BiConsumer<MappedRangeResultInfo, Throwable> cons = new FetchComplete(fetchingChunk, nextFuture);
if (eventKeeper != null) {
eventKeeper.increment(Events.RANGE_QUERY_FETCHES);
cons = cons.andThen((r, t) -> {
eventKeeper.timeNanos(Events.RANGE_QUERY_FETCH_TIME_NANOS, System.nanoTime() - sTime);
});
}
fetchingChunk.whenComplete(cons);
}
@Override
public synchronized CompletableFuture<Boolean> onHasNext() {
if (isCancelled) throw new CancellationException();
// This will only happen before the first fetch has completed
if (chunk == null) {
return nextFuture;
}
// We have a chunk and are still working though it
if (index < chunk.values.size()) {
return AsyncUtil.READY_TRUE;
}
// If we are at the end of the current chunk there is either:
// - no more data -or-
// - we are already fetching the next block
return mainChunkIsTheLast() ? AsyncUtil.READY_FALSE : nextFuture;
}
@Override
public boolean hasNext() {
return onHasNext().join();
}
@Override
public MappedKeyValue next() {
CompletableFuture<Boolean> nextFuture;
synchronized (this) {
if (isCancelled) throw new CancellationException();
// at least the first chunk has been fetched and there is at least one
// available result
if (chunk != null && index < chunk.values.size()) {
// If this is the first call to next() on a chunk, then we will want to
// start fetching the data for the next block
boolean initialNext = index == 0;
MappedKeyValue result = chunk.values.get(index);
prevKey = result.getKey();
index++;
if (eventKeeper != null) {
// We record the BYTES_FETCHED here, rather than at a lower level,
// because some parts of the construction of a MappedRangeResult occur underneath
// the JNI boundary, and we don't want to pass the eventKeeper down there
// (note: account for the length fields as well when recording the bytes
// fetched)
eventKeeper.count(Events.BYTES_FETCHED, result.getKey().length + result.getValue().length + 8);
eventKeeper.increment(Events.RANGE_QUERY_RECORDS_FETCHED);
}
// If this is the first call to next() on a chunk there cannot
// be another waiting, since we could not have issued a request
assert (!(initialNext && nextChunk != null));
// we are at the end of the current chunk and there is more to be had already
if (index == chunk.values.size() && nextChunk != null) {
index = 0;
chunk = nextChunk;
nextChunk = null;
}
if (initialNext) {
startNextFetch();
}
return result;
}
nextFuture = onHasNext();
}
// If there was no result ready then we need to wait on the future
// and return the proper result, throwing if there are no more elements
return nextFuture
.thenApply(hasNext -> {
if (hasNext) {
return next();
}
throw new NoSuchElementException();
})
.join();
}
@Override
public synchronized void remove() {
if (prevKey == null) throw new IllegalStateException("No value has been fetched from database");
tr.clear(prevKey);
}
@Override
public synchronized void cancel() {
isCancelled = true;
nextFuture.cancel(true);
fetchingChunk.cancel(true);
}
}
}

View File

@ -0,0 +1,64 @@
/*
* MappedRangeResult.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.apple.foundationdb;
import com.apple.foundationdb.tuple.ByteArrayUtil;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
class MappedRangeResult {
final List<MappedKeyValue> values;
final boolean more;
public MappedRangeResult(MappedKeyValue[] values, boolean more) {
this.values = Arrays.asList(values);
this.more = more;
}
MappedRangeResult(MappedRangeResultDirectBufferIterator iterator) {
iterator.readResultsSummary();
more = iterator.hasMore();
int count = iterator.count();
values = new ArrayList<>(count);
for (int i = 0; i < count; ++i) {
values.add(iterator.next());
}
}
public RangeResultSummary getSummary() {
final int keyCount = values.size();
final byte[] lastKey = keyCount > 0 ? values.get(keyCount - 1).getKey() : null;
return new RangeResultSummary(lastKey, keyCount, more);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("MappedRangeResult{");
sb.append("values=").append(values);
sb.append(", more=").append(more);
sb.append('}');
return sb.toString();
}
}

View File

@ -0,0 +1,71 @@
/*
* MappedRangeResultDirectBufferIterator.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2015-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.apple.foundationdb;
import java.io.Closeable;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
/**
* Holds the direct buffer that is shared with JNI wrapper.
*/
class MappedRangeResultDirectBufferIterator extends DirectBufferIterator implements Iterator<KeyValue> {
MappedRangeResultDirectBufferIterator(ByteBuffer buffer) { super(buffer); }
@Override
public boolean hasNext() {
return super.hasNext();
}
@Override
public MappedKeyValue next() {
assert (hasResultReady()); // Must be called once its ready.
if (!hasNext()) {
throw new NoSuchElementException();
}
final byte[] key = getString();
final byte[] value = getString();
final byte[] rangeBegin = getString();
final byte[] rangeEnd = getString();
final int rangeResultSize = byteBuffer.getInt();
List<KeyValue> rangeResult = new ArrayList();
for (int i = 0; i < rangeResultSize; i++) {
final byte[] k = getString();
final byte[] v = getString();
rangeResult.add(new KeyValue(k, v));
}
current += 1;
return new MappedKeyValue(key, value, rangeBegin, rangeEnd, rangeResult);
}
private byte[] getString() {
final int len = byteBuffer.getInt();
byte[] s = new byte[len];
byteBuffer.get(s);
return s;
}
}

View File

@ -0,0 +1,29 @@
/*
* MappedRangeResultInfo.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.apple.foundationdb;
class MappedRangeResultInfo {
MappedRangeResult get() { return f.getResults(); }
MappedRangeResultInfo(FutureMappedResults f) { this.f = f; }
private FutureMappedResults f;
}

View File

@ -49,19 +49,17 @@ class RangeQuery implements AsyncIterable<KeyValue> {
private final FDBTransaction tr;
private final KeySelector begin;
private final KeySelector end;
private final byte[] mapper; // Nullable
private final boolean snapshot;
private final int rowLimit;
private final boolean reverse;
private final StreamingMode streamingMode;
private final EventKeeper eventKeeper;
RangeQuery(FDBTransaction transaction, boolean isSnapshot, KeySelector begin, KeySelector end, byte[] mapper,
int rowLimit, boolean reverse, StreamingMode streamingMode, EventKeeper eventKeeper) {
RangeQuery(FDBTransaction transaction, boolean isSnapshot, KeySelector begin, KeySelector end, int rowLimit,
boolean reverse, StreamingMode streamingMode, EventKeeper eventKeeper) {
this.tr = transaction;
this.begin = begin;
this.end = end;
this.mapper = mapper;
this.snapshot = isSnapshot;
this.rowLimit = rowLimit;
this.reverse = reverse;
@ -69,12 +67,6 @@ class RangeQuery implements AsyncIterable<KeyValue> {
this.eventKeeper = eventKeeper;
}
// RangeQueryAndFlatMap
RangeQuery(FDBTransaction transaction, boolean isSnapshot, KeySelector begin, KeySelector end, int rowLimit,
boolean reverse, StreamingMode streamingMode, EventKeeper eventKeeper) {
this(transaction, isSnapshot, begin, end, null, rowLimit, reverse, streamingMode, eventKeeper);
}
/**
* Returns all the results from the range requested as a {@code List}. If there were no
* limits on the original query and there is a large amount of data in the database
@ -92,7 +84,7 @@ class RangeQuery implements AsyncIterable<KeyValue> {
// if the streaming mode is EXACT, try and grab things as one chunk
if(mode == StreamingMode.EXACT) {
FutureResults range = tr.getRange_internal(this.begin, this.end, this.mapper, this.rowLimit, 0,
FutureResults range = tr.getRange_internal(this.begin, this.end, this.rowLimit, 0,
StreamingMode.EXACT.code(), 1, this.snapshot, this.reverse);
return range.thenApply(result -> result.get().values)
.whenComplete((result, e) -> range.close());
@ -100,7 +92,7 @@ class RangeQuery implements AsyncIterable<KeyValue> {
// If the streaming mode is not EXACT, simply collect the results of an
// iteration into a list
return AsyncUtil.collect(new RangeQuery(tr, snapshot, begin, end, mapper, rowLimit, reverse, mode, eventKeeper),
return AsyncUtil.collect(new RangeQuery(tr, snapshot, begin, end, rowLimit, reverse, mode, eventKeeper),
tr.getExecutor());
}
@ -229,8 +221,8 @@ class RangeQuery implements AsyncIterable<KeyValue> {
nextFuture = new CompletableFuture<>();
final long sTime = System.nanoTime();
fetchingChunk = tr.getRange_internal(begin, end, mapper, rowsLimited ? rowsRemaining : 0, 0,
streamingMode.code(), ++iteration, snapshot, reverse);
fetchingChunk = tr.getRange_internal(begin, end, rowsLimited ? rowsRemaining : 0, 0, streamingMode.code(),
++iteration, snapshot, reverse);
BiConsumer<RangeResultInfo,Throwable> cons = new FetchComplete(fetchingChunk,nextFuture);
if(eventKeeper!=null){

View File

@ -58,7 +58,7 @@ class RangeResult {
this.more = more;
}
RangeResult(DirectBufferIterator iterator) {
RangeResult(RangeResultDirectBufferIterator iterator) {
iterator.readResultsSummary();
more = iterator.hasMore();

View File

@ -0,0 +1,62 @@
/*
* RangeResultDirectBufferIterator.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2015-2020 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.apple.foundationdb;
import java.io.Closeable;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Iterator;
import java.util.NoSuchElementException;
/**
* Holds the direct buffer that is shared with JNI wrapper. A typical usage is as follows:
*
* The serialization format of result is =>
* [int keyCount, boolean more, ListOf<(int keyLen, int valueLen, byte[] key, byte[] value)>]
*/
class RangeResultDirectBufferIterator extends DirectBufferIterator implements Iterator<KeyValue> {
RangeResultDirectBufferIterator(ByteBuffer buffer) { super(buffer); }
@Override
public boolean hasNext() {
return super.hasNext();
}
@Override
public KeyValue next() {
assert (hasResultReady()); // Must be called once its ready.
if (!hasNext()) {
throw new NoSuchElementException();
}
final int keyLen = byteBuffer.getInt();
final int valueLen = byteBuffer.getInt();
byte[] key = new byte[keyLen];
byteBuffer.get(key);
byte[] value = new byte[valueLen];
byteBuffer.get(value);
current += 1;
return new KeyValue(key, value);
}
}

View File

@ -20,6 +20,8 @@
package com.apple.foundationdb;
import com.apple.foundationdb.tuple.ByteArrayUtil;
class RangeResultSummary {
final byte[] lastKey;
final int keyCount;
@ -30,4 +32,14 @@ class RangeResultSummary {
this.keyCount = keyCount;
this.more = more;
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("RangeResultSummary{");
sb.append("lastKey=").append(ByteArrayUtil.printable(lastKey));
sb.append(", keyCount=").append(keyCount);
sb.append(", more=").append(more);
sb.append('}');
return sb.toString();
}
}

View File

@ -457,8 +457,8 @@ public interface ReadTransaction extends ReadTransactionContext {
* </p>
* @return a handle to access the results of the asynchronous call
*/
AsyncIterable<KeyValue> getRangeAndFlatMap(KeySelector begin, KeySelector end, byte[] mapper, int limit,
boolean reverse, StreamingMode mode);
AsyncIterable<MappedKeyValue> getMappedRange(KeySelector begin, KeySelector end, byte[] mapper, int limit,
boolean reverse, StreamingMode mode);
/**
* Gets an estimate for the number of bytes stored in the given range.

View File

@ -52,7 +52,7 @@ set(JAVA_INTEGRATION_TESTS
src/integration/com/apple/foundationdb/CycleMultiClientIntegrationTest.java
src/integration/com/apple/foundationdb/SidebandMultiThreadClientTest.java
src/integration/com/apple/foundationdb/RepeatableReadMultiThreadClientTest.java
src/integration/com/apple/foundationdb/RangeAndFlatMapQueryIntegrationTest.java
src/integration/com/apple/foundationdb/MappedRangeQueryIntegrationTest.java
)
# Resources that are used in integration testing, but are not explicitly test files (JUnit rules,

View File

@ -5,6 +5,7 @@ env_set(USE_DTRACE ON BOOL "Enable dtrace probes on supported platforms")
env_set(USE_VALGRIND OFF BOOL "Compile for valgrind usage")
env_set(USE_VALGRIND_FOR_CTEST ${USE_VALGRIND} BOOL "Use valgrind for ctest")
env_set(ALLOC_INSTRUMENTATION OFF BOOL "Instrument alloc")
env_set(USE_JEMALLOC ON BOOL "Link with jemalloc")
env_set(USE_ASAN OFF BOOL "Compile with address sanitizer")
env_set(USE_GCOV OFF BOOL "Compile with gcov instrumentation")
env_set(USE_MSAN OFF BOOL "Compile with memory sanitizer. To avoid false positives you need to dynamically link to a msan-instrumented libc++ and libc++abi, which you must compile separately. See https://github.com/google/sanitizers/wiki/MemorySanitizerLibcxxHowTo#instrumented-libc.")

View File

@ -1,6 +1,5 @@
add_library(jemalloc INTERFACE)
set(USE_JEMALLOC ON)
# We don't want to use jemalloc on Windows
# Nor on FreeBSD, where jemalloc is the default system allocator
if(USE_SANITIZER OR WIN32 OR (CMAKE_SYSTEM_NAME STREQUAL "FreeBSD") OR APPLE)
@ -8,6 +7,11 @@ if(USE_SANITIZER OR WIN32 OR (CMAKE_SYSTEM_NAME STREQUAL "FreeBSD") OR APPLE)
return()
endif()
if(NOT USE_JEMALLOC)
return()
endif()
add_definitions(-DUSE_JEMALLOC)
find_path(JEMALLOC_INCLUDE_DIR
NAMES
jemalloc/jemalloc.h

View File

@ -6,13 +6,13 @@ Data distribution manages the lifetime of storage servers, decides which storage
## Components
**Storage server (`struct TCServerInfo`):** DD creates a TCServerInfo object for each storage server (SS). The TCServerInfo includes: (1) the SS locality, which includes the processID that is unique to ip:port, the zoneId that specifies which rack the SS is on, and the dcId that specifies which DC the SS is in; (2) the servers teams, which will be discussed in the following paragraph; (3) the tracker that monitor the status of the server; and (4) extra information related to the servers interface and preference. A server is healthy if its storage engine on the process is the same with the configured storage engine, and it is marked as desired by DD.
**Storage server (`class TCServerInfo`):** DD creates a TCServerInfo object for each storage server (SS). The TCServerInfo includes: (1) the SS locality, which includes the processID that is unique to ip:port, the zoneId that specifies which rack the SS is on, and the dcId that specifies which DC the SS is in; (2) the servers teams, which will be discussed in the following paragraph; (3) the tracker that monitor the status of the server; and (4) extra information related to the servers interface and preference. A server is healthy if its storage engine on the process is the same with the configured storage engine, and it is marked as desired by DD.
**Machine (`struct TCMachineInfo`)**: A machine in FDB is considered as a rack, because a typical FDB cluster will only use one physical host from each rack in the datacenter to reduce the impact of regular rack-maintenance events on the cluster. All servers on the same rack belong to the same machine. A machine is healthy if there exists a healthy server on the machine.
**Machine (`class TCMachineInfo`)**: A machine in FDB is considered as a rack, because a typical FDB cluster will only use one physical host from each rack in the datacenter to reduce the impact of regular rack-maintenance events on the cluster. All servers on the same rack belong to the same machine. A machine is healthy if there exists a healthy server on the machine.
**Server team (`struct TCTeamInfo`)**: A server team is a group of *k* servers that host the same key ranges, where *k* is the replication factor that is usually three. A server team is healthy if every server in the team is healthy and those servers localities satisfy the replication requirement. Servers are grouped into server teams to reduce the possibility of data unavailability events at the event of *k* server failures.
**Server team (`class TCTeamInfo`)**: A server team is a group of *k* servers that host the same key ranges, where *k* is the replication factor that is usually three. A server team is healthy if every server in the team is healthy and those servers localities satisfy the replication requirement. Servers are grouped into server teams to reduce the possibility of data unavailability events at the event of *k* server failures.
**Machine team (`struct TCMachineTeamInfo`)**: A machine team is a group of k machines, where k is the replication factor. Each server team must be on a machine team, meaning that each server in the server team is on a machine in the machine team and that no two servers are on the same machine. Similar to the purpose of server teams, machine teams are used to reduce the possibility of data unavailability events at the event of *k* machine failures. A machine team is healthy if every machine on the team is healthy and machines localities satisfy the replication policy.
**Machine team (`class TCMachineTeamInfo`)**: A machine team is a group of k machines, where k is the replication factor. Each server team must be on a machine team, meaning that each server in the server team is on a machine in the machine team and that no two servers are on the same machine. Similar to the purpose of server teams, machine teams are used to reduce the possibility of data unavailability events at the event of *k* machine failures. A machine team is healthy if every machine on the team is healthy and machines localities satisfy the replication policy.
**`TeamCollection`**: It has a global view of all servers and server teams, machines and machine teams. With the information, it creates server teams and machine teams. It also maintains the configuration settings for DD, which is used to create teams and decide which type of storage servers to recruit.
@ -30,7 +30,7 @@ Data distribution manages the lifetime of storage servers, decides which storage
*`moveKeysLockOwnerKey`* (`\xff/moveKeysLock/Owner`) and *moveKeysLockWriteKey* (`\xff/moveKeysLock/Write`): When DD moves keys, it must grab the moveKeysLock, which consists of an owner key and a write key. The owner key (i.e., `moveKeysLockOwnerKey`) specifies which DD currently owns the lock. The write key (i.e., `moveKeysLockWriteKey`) specifies which DD is currently changing the mapping between keys and servers (i.e., operating on serverKeys and keyServers subspace). If DD finds it does not own both keys when it tries to move keys, it will kill itself by throwing an error. The cluster controller will recruit a new one.
When a new DD is initialized, it will set itself as the owner by setting its random UID to the `moveKeysLockOwnerKey`. Since the owner key has only one value, at most one DD can own the DD-related system subspace. This avoids the potential race condition between multiple DDs which may co-exit during DD recruitment.
When a new DD is initialized, it will set itself as the owner by setting its random UID to the `moveKeysLockOwnerKey`. Since the owner key has only one value, at most one DD can own the DD-related system subspace. This avoids the potential race condition between multiple DDs which may co-exist during DD recruitment.
**Transaction State Store (txnStateStore)**: It is a replica of the special keyspace that stores the clusters states, such as which SS is responsible for which shard. Because commit proxies use txnStateStore to decide which tLog and SS should receive a mutation, commit proxies must have a consistent view of txnStateStore. Therefore, changes to txnStateStore must be populated to all commit proxies in total order. To achieve that, we use the special transaction (`applyMetaMutations`) to update txnStateStore and use resolvers to ensure the total ordering (serializable snapshot isolation).

View File

@ -132,7 +132,7 @@ ACTOR Future<bool> changeCoordinators(Reference<IDatabase> db, std::vector<Strin
throw;
}
}
std::string new_coordinators_str = boost::algorithm::join(newCoordinatorslist, ", ");
std::string new_coordinators_str = boost::algorithm::join(newCoordinatorslist, ",");
tr->set(fdb_cli::coordinatorsProcessSpecialKey, new_coordinators_str);
}
wait(safeThreadFutureToFuture(tr->commit()));

View File

@ -446,7 +446,7 @@ public:
Counter transactionGetKeyRequests;
Counter transactionGetValueRequests;
Counter transactionGetRangeRequests;
Counter transactionGetRangeAndFlatMapRequests;
Counter transactionGetMappedRangeRequests;
Counter transactionGetRangeStreamRequests;
Counter transactionWatchRequests;
Counter transactionGetAddressesForKeyRequests;

View File

@ -475,6 +475,7 @@ using KeyRange = Standalone<KeyRangeRef>;
using KeyValue = Standalone<KeyValueRef>;
using KeySelector = Standalone<struct KeySelectorRef>;
using RangeResult = Standalone<struct RangeResultRef>;
using MappedRangeResult = Standalone<struct MappedRangeResultRef>;
enum { invalidVersion = -1, latestVersion = -2, MAX_VERSION = std::numeric_limits<int64_t>::max() };
@ -616,6 +617,8 @@ KeyRangeWith<Val> keyRangeWith(const KeyRangeRef& range, const Val& value) {
return KeyRangeWith<Val>(range, value);
}
struct MappedKeyValueRef;
struct GetRangeLimits {
enum { ROW_LIMIT_UNLIMITED = -1, BYTE_LIMIT_UNLIMITED = -1 };
@ -629,6 +632,8 @@ struct GetRangeLimits {
void decrement(VectorRef<KeyValueRef> const& data);
void decrement(KeyValueRef const& data);
void decrement(VectorRef<MappedKeyValueRef> const& data);
void decrement(MappedKeyValueRef const& data);
// True if either the row or byte limit has been reached
bool isReached();
@ -689,6 +694,114 @@ struct Traceable<RangeResultRef> : std::true_type {
}
};
// Similar to KeyValueRef, but result can be empty.
struct GetValueReqAndResultRef {
KeyRef key;
Optional<ValueRef> result;
GetValueReqAndResultRef() {}
GetValueReqAndResultRef(Arena& a, const GetValueReqAndResultRef& copyFrom)
: key(a, copyFrom.key), result(a, copyFrom.result) {}
bool operator==(const GetValueReqAndResultRef& rhs) const { return key == rhs.key && result == rhs.result; }
bool operator!=(const GetValueReqAndResultRef& rhs) const { return !(rhs == *this); }
int expectedSize() const { return key.expectedSize() + result.expectedSize(); }
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, key, result);
}
};
struct GetRangeReqAndResultRef {
KeySelectorRef begin, end;
RangeResultRef result;
GetRangeReqAndResultRef() {}
// KeyValueRef(const KeyRef& key, const ValueRef& value) : key(key), value(value) {}
GetRangeReqAndResultRef(Arena& a, const GetRangeReqAndResultRef& copyFrom)
: begin(a, copyFrom.begin), end(a, copyFrom.end), result(a, copyFrom.result) {}
bool operator==(const GetRangeReqAndResultRef& rhs) const {
return begin == rhs.begin && end == rhs.end && result == rhs.result;
}
bool operator!=(const GetRangeReqAndResultRef& rhs) const { return !(rhs == *this); }
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, begin, end, result);
}
};
using MappedReqAndResultRef = std::variant<GetValueReqAndResultRef, GetRangeReqAndResultRef>;
struct MappedKeyValueRef : KeyValueRef {
// Save the original key value at the base (KeyValueRef).
MappedReqAndResultRef reqAndResult;
MappedKeyValueRef() = default;
MappedKeyValueRef(Arena& a, const MappedKeyValueRef& copyFrom) : KeyValueRef(a, copyFrom) {
const auto& reqAndResultCopyFrom = copyFrom.reqAndResult;
if (std::holds_alternative<GetValueReqAndResultRef>(reqAndResultCopyFrom)) {
auto getValue = std::get<GetValueReqAndResultRef>(reqAndResultCopyFrom);
reqAndResult = GetValueReqAndResultRef(a, getValue);
} else if (std::holds_alternative<GetRangeReqAndResultRef>(reqAndResultCopyFrom)) {
auto getRange = std::get<GetRangeReqAndResultRef>(reqAndResultCopyFrom);
reqAndResult = GetRangeReqAndResultRef(a, getRange);
} else {
throw internal_error();
}
}
bool operator==(const MappedKeyValueRef& rhs) const {
return static_cast<const KeyValueRef&>(*this) == static_cast<const KeyValueRef&>(rhs) &&
reqAndResult == rhs.reqAndResult;
}
bool operator!=(const MappedKeyValueRef& rhs) const { return !(rhs == *this); }
// It relies on the base to provide the expectedSize. TODO: Consider add the underlying request and key values into
// expected size?
// int expectedSize() const { return ((KeyValueRef*)this)->expectedSisze() + reqA }
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, ((KeyValueRef&)*this), reqAndResult);
}
};
struct MappedRangeResultRef : VectorRef<MappedKeyValueRef> {
// Additional information on range result. See comments on RangeResultRef.
bool more;
Optional<KeyRef> readThrough;
bool readToBegin;
bool readThroughEnd;
MappedRangeResultRef() : more(false), readToBegin(false), readThroughEnd(false) {}
MappedRangeResultRef(Arena& p, const MappedRangeResultRef& toCopy)
: VectorRef<MappedKeyValueRef>(p, toCopy), more(toCopy.more),
readThrough(toCopy.readThrough.present() ? KeyRef(p, toCopy.readThrough.get()) : Optional<KeyRef>()),
readToBegin(toCopy.readToBegin), readThroughEnd(toCopy.readThroughEnd) {}
MappedRangeResultRef(const VectorRef<MappedKeyValueRef>& value,
bool more,
Optional<KeyRef> readThrough = Optional<KeyRef>())
: VectorRef<MappedKeyValueRef>(value), more(more), readThrough(readThrough), readToBegin(false),
readThroughEnd(false) {}
MappedRangeResultRef(bool readToBegin, bool readThroughEnd)
: more(false), readToBegin(readToBegin), readThroughEnd(readThroughEnd) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, ((VectorRef<MappedKeyValueRef>&)*this), more, readThrough, readToBegin, readThroughEnd);
}
std::string toString() const {
return "more:" + std::to_string(more) +
" readThrough:" + (readThrough.present() ? readThrough.get().toString() : "[unset]") +
" readToBegin:" + std::to_string(readToBegin) + " readThroughEnd:" + std::to_string(readThroughEnd);
}
};
struct KeyValueStoreType {
constexpr static FileIdentifier file_identifier = 6560359;
// These enumerated values are stored in the database configuration, so should NEVER be changed.

View File

@ -59,12 +59,12 @@ public:
GetRangeLimits limits,
bool snapshot = false,
bool reverse = false) = 0;
virtual ThreadFuture<RangeResult> getRangeAndFlatMap(const KeySelectorRef& begin,
const KeySelectorRef& end,
const StringRef& mapper,
GetRangeLimits limits,
bool snapshot = false,
bool reverse = false) = 0;
virtual ThreadFuture<MappedRangeResult> getMappedRange(const KeySelectorRef& begin,
const KeySelectorRef& end,
const StringRef& mapper,
GetRangeLimits limits,
bool snapshot = false,
bool reverse = false) = 0;
virtual ThreadFuture<Standalone<VectorRef<const char*>>> getAddressesForKey(const KeyRef& key) = 0;
virtual ThreadFuture<Standalone<StringRef>> getVersionstamp() = 0;

View File

@ -63,12 +63,12 @@ public:
GetRangeLimits limits,
Snapshot = Snapshot::False,
Reverse = Reverse::False) = 0;
virtual Future<RangeResult> getRangeAndFlatMap(KeySelector begin,
KeySelector end,
Key mapper,
GetRangeLimits limits,
Snapshot = Snapshot::False,
Reverse = Reverse::False) = 0;
virtual Future<MappedRangeResult> getMappedRange(KeySelector begin,
KeySelector end,
Key mapper,
GetRangeLimits limits,
Snapshot = Snapshot::False,
Reverse = Reverse::False) = 0;
virtual Future<Standalone<VectorRef<const char*>>> getAddressesForKey(Key const& key) = 0;
virtual Future<Standalone<VectorRef<KeyRef>>> getRangeSplitPoints(KeyRange const& range, int64_t chunkSize) = 0;
virtual Future<int64_t> getEstimatedRangeSizeBytes(KeyRange const& keys) = 0;

View File

@ -146,38 +146,39 @@ ThreadFuture<RangeResult> DLTransaction::getRange(const KeyRangeRef& keys,
return getRange(firstGreaterOrEqual(keys.begin), firstGreaterOrEqual(keys.end), limits, snapshot, reverse);
}
ThreadFuture<RangeResult> DLTransaction::getRangeAndFlatMap(const KeySelectorRef& begin,
const KeySelectorRef& end,
const StringRef& mapper,
GetRangeLimits limits,
bool snapshot,
bool reverse) {
FdbCApi::FDBFuture* f = api->transactionGetRangeAndFlatMap(tr,
begin.getKey().begin(),
begin.getKey().size(),
begin.orEqual,
begin.offset,
end.getKey().begin(),
end.getKey().size(),
end.orEqual,
end.offset,
mapper.begin(),
mapper.size(),
limits.rows,
limits.bytes,
FDB_STREAMING_MODE_EXACT,
0,
snapshot,
reverse);
return toThreadFuture<RangeResult>(api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) {
const FdbCApi::FDBKeyValue* kvs;
ThreadFuture<MappedRangeResult> DLTransaction::getMappedRange(const KeySelectorRef& begin,
const KeySelectorRef& end,
const StringRef& mapper,
GetRangeLimits limits,
bool snapshot,
bool reverse) {
FdbCApi::FDBFuture* f = api->transactionGetMappedRange(tr,
begin.getKey().begin(),
begin.getKey().size(),
begin.orEqual,
begin.offset,
end.getKey().begin(),
end.getKey().size(),
end.orEqual,
end.offset,
mapper.begin(),
mapper.size(),
limits.rows,
limits.bytes,
FDB_STREAMING_MODE_EXACT,
0,
snapshot,
reverse);
return toThreadFuture<MappedRangeResult>(api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) {
const FdbCApi::FDBMappedKeyValue* kvms;
int count;
FdbCApi::fdb_bool_t more;
FdbCApi::fdb_error_t error = api->futureGetKeyValueArray(f, &kvs, &count, &more);
FdbCApi::fdb_error_t error = api->futureGetMappedKeyValueArray(f, &kvms, &count, &more);
ASSERT(!error);
// The memory for this is stored in the FDBFuture and is released when the future gets destroyed
return RangeResult(RangeResultRef(VectorRef<KeyValueRef>((KeyValueRef*)kvs, count), more), Arena());
return MappedRangeResult(
MappedRangeResultRef(VectorRef<MappedKeyValueRef>((MappedKeyValueRef*)kvms, count), more), Arena());
});
}
@ -555,11 +556,8 @@ void DLApi::init() {
"fdb_transaction_get_addresses_for_key",
headerVersion >= 0);
loadClientFunction(&api->transactionGetRange, lib, fdbCPath, "fdb_transaction_get_range", headerVersion >= 0);
loadClientFunction(&api->transactionGetRangeAndFlatMap,
lib,
fdbCPath,
"fdb_transaction_get_range_and_flat_map",
headerVersion >= 700);
loadClientFunction(
&api->transactionGetMappedRange, lib, fdbCPath, "fdb_transaction_get_mapped_range", headerVersion >= 700);
loadClientFunction(
&api->transactionGetVersionstamp, lib, fdbCPath, "fdb_transaction_get_versionstamp", headerVersion >= 410);
loadClientFunction(&api->transactionSet, lib, fdbCPath, "fdb_transaction_set", headerVersion >= 0);
@ -616,6 +614,8 @@ void DLApi::init() {
loadClientFunction(&api->futureGetKeyArray, lib, fdbCPath, "fdb_future_get_key_array", headerVersion >= 700);
loadClientFunction(
&api->futureGetKeyValueArray, lib, fdbCPath, "fdb_future_get_keyvalue_array", headerVersion >= 0);
loadClientFunction(
&api->futureGetMappedKeyValueArray, lib, fdbCPath, "fdb_future_get_mappedkeyvalue_array", headerVersion >= 700);
loadClientFunction(&api->futureSetCallback, lib, fdbCPath, "fdb_future_set_callback", headerVersion >= 0);
loadClientFunction(&api->futureCancel, lib, fdbCPath, "fdb_future_cancel", headerVersion >= 0);
loadClientFunction(&api->futureDestroy, lib, fdbCPath, "fdb_future_destroy", headerVersion >= 0);
@ -861,15 +861,15 @@ ThreadFuture<RangeResult> MultiVersionTransaction::getRange(const KeyRangeRef& k
return abortableFuture(f, tr.onChange);
}
ThreadFuture<RangeResult> MultiVersionTransaction::getRangeAndFlatMap(const KeySelectorRef& begin,
const KeySelectorRef& end,
const StringRef& mapper,
GetRangeLimits limits,
bool snapshot,
bool reverse) {
ThreadFuture<MappedRangeResult> MultiVersionTransaction::getMappedRange(const KeySelectorRef& begin,
const KeySelectorRef& end,
const StringRef& mapper,
GetRangeLimits limits,
bool snapshot,
bool reverse) {
auto tr = getTransaction();
auto f = tr.transaction ? tr.transaction->getRangeAndFlatMap(begin, end, mapper, limits, snapshot, reverse)
: makeTimeout<RangeResult>();
auto f = tr.transaction ? tr.transaction->getMappedRange(begin, end, mapper, limits, snapshot, reverse)
: makeTimeout<MappedRangeResult>();
return abortableFuture(f, tr.onChange);
}

View File

@ -38,6 +38,9 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
typedef struct FDB_database FDBDatabase;
typedef struct FDB_transaction FDBTransaction;
typedef int fdb_error_t;
typedef int fdb_bool_t;
#pragma pack(push, 4)
typedef struct key {
const uint8_t* key;
@ -49,6 +52,35 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
const void* value;
int valueLength;
} FDBKeyValue;
#pragma pack(pop)
/* Memory layout of KeySelectorRef. */
typedef struct keyselector {
FDBKey key;
/* orEqual and offset have not be tested in C binding. Just a placeholder. */
fdb_bool_t orEqual;
int offset;
} FDBKeySelector;
/* Memory layout of GetRangeReqAndResultRef. */
typedef struct getrangereqandresult {
FDBKeySelector begin;
FDBKeySelector end;
FDBKeyValue* data;
int m_size, m_capacity;
} FDBGetRangeReqAndResult;
typedef struct mappedkeyvalue {
FDBKey key;
FDBKey value;
/* It's complicated to map a std::variant to C. For now we assume the underlying requests are always getRange
* and take the shortcut. */
FDBGetRangeReqAndResult getRange;
unsigned char buffer[32];
} FDBMappedKeyValue;
#pragma pack(push, 4)
typedef struct keyrange {
const void* beginKey;
int beginKeyLength;
@ -57,9 +89,6 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
} FDBKeyRange;
#pragma pack(pop)
typedef int fdb_error_t;
typedef int fdb_bool_t;
typedef struct readgranulecontext {
// User context to pass along to functions
void* userContext;
@ -144,23 +173,23 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
int iteration,
fdb_bool_t snapshot,
fdb_bool_t reverse);
FDBFuture* (*transactionGetRangeAndFlatMap)(FDBTransaction* tr,
uint8_t const* beginKeyName,
int beginKeyNameLength,
fdb_bool_t beginOrEqual,
int beginOffset,
uint8_t const* endKeyName,
int endKeyNameLength,
fdb_bool_t endOrEqual,
int endOffset,
uint8_t const* mapper_name,
int mapper_name_length,
int limit,
int targetBytes,
FDBStreamingMode mode,
int iteration,
fdb_bool_t snapshot,
fdb_bool_t reverse);
FDBFuture* (*transactionGetMappedRange)(FDBTransaction* tr,
uint8_t const* beginKeyName,
int beginKeyNameLength,
fdb_bool_t beginOrEqual,
int beginOffset,
uint8_t const* endKeyName,
int endKeyNameLength,
fdb_bool_t endOrEqual,
int endOffset,
uint8_t const* mapper_name,
int mapper_name_length,
int limit,
int targetBytes,
FDBStreamingMode mode,
int iteration,
fdb_bool_t snapshot,
fdb_bool_t reverse);
FDBFuture* (*transactionGetVersionstamp)(FDBTransaction* tr);
void (*transactionSet)(FDBTransaction* tr,
@ -236,6 +265,10 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
fdb_error_t (*futureGetKeyRangeArray)(FDBFuture* f, const FDBKeyRange** out_keyranges, int* outCount);
fdb_error_t (*futureGetKeyArray)(FDBFuture* f, FDBKey const** outKeys, int* outCount);
fdb_error_t (*futureGetKeyValueArray)(FDBFuture* f, FDBKeyValue const** outKV, int* outCount, fdb_bool_t* outMore);
fdb_error_t (*futureGetMappedKeyValueArray)(FDBFuture* f,
FDBMappedKeyValue const** outKVM,
int* outCount,
fdb_bool_t* outMore);
fdb_error_t (*futureSetCallback)(FDBFuture* f, FDBCallback callback, void* callback_parameter);
void (*futureCancel)(FDBFuture* f);
void (*futureDestroy)(FDBFuture* f);
@ -281,12 +314,12 @@ public:
GetRangeLimits limits,
bool snapshot = false,
bool reverse = false) override;
ThreadFuture<RangeResult> getRangeAndFlatMap(const KeySelectorRef& begin,
const KeySelectorRef& end,
const StringRef& mapper,
GetRangeLimits limits,
bool snapshot,
bool reverse) override;
ThreadFuture<MappedRangeResult> getMappedRange(const KeySelectorRef& begin,
const KeySelectorRef& end,
const StringRef& mapper,
GetRangeLimits limits,
bool snapshot,
bool reverse) override;
ThreadFuture<Standalone<VectorRef<const char*>>> getAddressesForKey(const KeyRef& key) override;
ThreadFuture<Standalone<StringRef>> getVersionstamp() override;
ThreadFuture<int64_t> getEstimatedRangeSizeBytes(const KeyRangeRef& keys) override;
@ -434,12 +467,12 @@ public:
GetRangeLimits limits,
bool snapshot = false,
bool reverse = false) override;
ThreadFuture<RangeResult> getRangeAndFlatMap(const KeySelectorRef& begin,
const KeySelectorRef& end,
const StringRef& mapper,
GetRangeLimits limits,
bool snapshot,
bool reverse) override;
ThreadFuture<MappedRangeResult> getMappedRange(const KeySelectorRef& begin,
const KeySelectorRef& end,
const StringRef& mapper,
GetRangeLimits limits,
bool snapshot,
bool reverse) override;
ThreadFuture<Standalone<VectorRef<const char*>>> getAddressesForKey(const KeyRef& key) override;
ThreadFuture<Standalone<StringRef>> getVersionstamp() override;

View File

@ -171,8 +171,8 @@ void DatabaseContext::addTssMapping(StorageServerInterface const& ssi, StorageSe
TSSEndpointData(tssi.id(), tssi.getKey.getEndpoint(), metrics));
queueModel.updateTssEndpoint(ssi.getKeyValues.getEndpoint().token.first(),
TSSEndpointData(tssi.id(), tssi.getKeyValues.getEndpoint(), metrics));
queueModel.updateTssEndpoint(ssi.getKeyValuesAndFlatMap.getEndpoint().token.first(),
TSSEndpointData(tssi.id(), tssi.getKeyValuesAndFlatMap.getEndpoint(), metrics));
queueModel.updateTssEndpoint(ssi.getMappedKeyValues.getEndpoint().token.first(),
TSSEndpointData(tssi.id(), tssi.getMappedKeyValues.getEndpoint(), metrics));
queueModel.updateTssEndpoint(ssi.getKeyValuesStream.getEndpoint().token.first(),
TSSEndpointData(tssi.id(), tssi.getKeyValuesStream.getEndpoint(), metrics));
@ -196,7 +196,7 @@ void DatabaseContext::removeTssMapping(StorageServerInterface const& ssi) {
queueModel.removeTssEndpoint(ssi.getValue.getEndpoint().token.first());
queueModel.removeTssEndpoint(ssi.getKey.getEndpoint().token.first());
queueModel.removeTssEndpoint(ssi.getKeyValues.getEndpoint().token.first());
queueModel.removeTssEndpoint(ssi.getKeyValuesAndFlatMap.getEndpoint().token.first());
queueModel.removeTssEndpoint(ssi.getMappedKeyValues.getEndpoint().token.first());
queueModel.removeTssEndpoint(ssi.getKeyValuesStream.getEndpoint().token.first());
queueModel.removeTssEndpoint(ssi.watchValue.getEndpoint().token.first());
@ -476,9 +476,9 @@ ACTOR Future<Void> tssLogger(DatabaseContext* cx) {
tssEv, "GetKeyValuesLatency", it.second->SSgetKeyValuesLatency, it.second->TSSgetKeyValuesLatency);
traceTSSPercentiles(tssEv, "GetKeyLatency", it.second->SSgetKeyLatency, it.second->TSSgetKeyLatency);
traceTSSPercentiles(tssEv,
"GetKeyValuesAndFlatMapLatency",
it.second->SSgetKeyValuesAndFlatMapLatency,
it.second->TSSgetKeyValuesAndFlatMapLatency);
"GetMappedKeyValuesLatency",
it.second->SSgetMappedKeyValuesLatency,
it.second->TSSgetMappedKeyValuesLatency);
it.second->clear();
}
@ -1314,7 +1314,7 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<IClusterConnection
transactionPhysicalReadsCompleted("PhysicalReadRequestsCompleted", cc),
transactionGetKeyRequests("GetKeyRequests", cc), transactionGetValueRequests("GetValueRequests", cc),
transactionGetRangeRequests("GetRangeRequests", cc),
transactionGetRangeAndFlatMapRequests("GetRangeAndFlatMapRequests", cc),
transactionGetMappedRangeRequests("GetMappedRangeRequests", cc),
transactionGetRangeStreamRequests("GetRangeStreamRequests", cc), transactionWatchRequests("WatchRequests", cc),
transactionGetAddressesForKeyRequests("GetAddressesForKeyRequests", cc), transactionBytesRead("BytesRead", cc),
transactionKeysRead("KeysRead", cc), transactionMetadataVersionReads("MetadataVersionReads", cc),
@ -1572,7 +1572,7 @@ DatabaseContext::DatabaseContext(const Error& err)
transactionPhysicalReadsCompleted("PhysicalReadRequestsCompleted", cc),
transactionGetKeyRequests("GetKeyRequests", cc), transactionGetValueRequests("GetValueRequests", cc),
transactionGetRangeRequests("GetRangeRequests", cc),
transactionGetRangeAndFlatMapRequests("GetRangeAndFlatMapRequests", cc),
transactionGetMappedRangeRequests("GetMappedRangeRequests", cc),
transactionGetRangeStreamRequests("GetRangeStreamRequests", cc), transactionWatchRequests("WatchRequests", cc),
transactionGetAddressesForKeyRequests("GetAddressesForKeyRequests", cc), transactionBytesRead("BytesRead", cc),
transactionKeysRead("KeysRead", cc), transactionMetadataVersionReads("MetadataVersionReads", cc),
@ -2428,6 +2428,30 @@ void GetRangeLimits::decrement(KeyValueRef const& data) {
bytes = std::max(0, bytes - (int)8 - (int)data.expectedSize());
}
void GetRangeLimits::decrement(VectorRef<MappedKeyValueRef> const& data) {
if (rows != GetRangeLimits::ROW_LIMIT_UNLIMITED) {
ASSERT(data.size() <= rows);
rows -= data.size();
}
minRows = std::max(0, minRows - data.size());
// TODO: For now, expectedSize only considers the size of the original key values, but not the underlying queries or
// results. Also, double check it is correct when dealing with sizeof(MappedKeyValueRef).
if (bytes != GetRangeLimits::BYTE_LIMIT_UNLIMITED)
bytes = std::max(0, bytes - (int)data.expectedSize() - (8 - (int)sizeof(MappedKeyValueRef)) * data.size());
}
void GetRangeLimits::decrement(MappedKeyValueRef const& data) {
minRows = std::max(0, minRows - 1);
if (rows != GetRangeLimits::ROW_LIMIT_UNLIMITED)
rows--;
// TODO: For now, expectedSize only considers the size of the original key values, but not the underlying queries or
// results. Also, double check it is correct when dealing with sizeof(MappedKeyValueRef).
if (bytes != GetRangeLimits::BYTE_LIMIT_UNLIMITED)
bytes = std::max(0, bytes - (int)8 - (int)data.expectedSize());
}
// True if either the row or byte limit has been reached
bool GetRangeLimits::isReached() {
return rows == 0 || (bytes == 0 && minRows == 0);
@ -3355,21 +3379,21 @@ template <class GetKeyValuesFamilyRequest>
PublicRequestStream<GetKeyValuesFamilyRequest> StorageServerInterface::*getRangeRequestStream() {
if constexpr (std::is_same<GetKeyValuesFamilyRequest, GetKeyValuesRequest>::value) {
return &StorageServerInterface::getKeyValues;
} else if (std::is_same<GetKeyValuesFamilyRequest, GetKeyValuesAndFlatMapRequest>::value) {
return &StorageServerInterface::getKeyValuesAndFlatMap;
} else if (std::is_same<GetKeyValuesFamilyRequest, GetMappedKeyValuesRequest>::value) {
return &StorageServerInterface::getMappedKeyValues;
} else {
UNREACHABLE();
}
}
ACTOR template <class GetKeyValuesFamilyRequest, class GetKeyValuesFamilyReply>
Future<RangeResult> getExactRange(Reference<TransactionState> trState,
Version version,
KeyRange keys,
Key mapper,
GetRangeLimits limits,
Reverse reverse) {
state RangeResult output;
ACTOR template <class GetKeyValuesFamilyRequest, class GetKeyValuesFamilyReply, class RangeResultFamily>
Future<RangeResultFamily> getExactRange(Reference<TransactionState> trState,
Version version,
KeyRange keys,
Key mapper,
GetRangeLimits limits,
Reverse reverse) {
state RangeResultFamily output;
state Span span("NAPI:getExactRange"_loc, trState->spanID);
// printf("getExactRange( '%s', '%s' )\n", keys.begin.toString().c_str(), keys.end.toString().c_str());
@ -3547,14 +3571,14 @@ Future<Key> resolveKey(Reference<TransactionState> trState, KeySelector const& k
return getKey(trState, key, version);
}
ACTOR template <class GetKeyValuesFamilyRequest, class GetKeyValuesFamilyReply>
Future<RangeResult> getRangeFallback(Reference<TransactionState> trState,
Version version,
KeySelector begin,
KeySelector end,
Key mapper,
GetRangeLimits limits,
Reverse reverse) {
ACTOR template <class GetKeyValuesFamilyRequest, class GetKeyValuesFamilyReply, class RangeResultFamily>
Future<RangeResultFamily> getRangeFallback(Reference<TransactionState> trState,
Version version,
KeySelector begin,
KeySelector end,
Key mapper,
GetRangeLimits limits,
Reverse reverse) {
if (version == latestVersion) {
state Transaction transaction(trState->cx);
transaction.setOption(FDBTransactionOptions::CAUSAL_READ_RISKY);
@ -3570,16 +3594,16 @@ Future<RangeResult> getRangeFallback(Reference<TransactionState> trState,
state Key b = wait(fb);
state Key e = wait(fe);
if (b >= e) {
return RangeResult();
return RangeResultFamily();
}
// if e is allKeys.end, we have read through the end of the database
// if b is allKeys.begin, we have either read through the beginning of the database,
// or allKeys.begin exists in the database and will be part of the conflict range anyways
RangeResult _r = wait(getExactRange<GetKeyValuesFamilyRequest, GetKeyValuesFamilyReply>(
RangeResultFamily _r = wait(getExactRange<GetKeyValuesFamilyRequest, GetKeyValuesFamilyReply, RangeResultFamily>(
trState, version, KeyRangeRef(b, e), mapper, limits, reverse));
RangeResult r = _r;
RangeResultFamily r = _r;
if (b == allKeys.begin && ((reverse && !r.more) || !reverse))
r.readToBegin = true;
@ -3603,7 +3627,31 @@ Future<RangeResult> getRangeFallback(Reference<TransactionState> trState,
return r;
}
int64_t inline getRangeResultFamilyBytes(RangeResultRef result) {
return result.expectedSize();
}
int64_t inline getRangeResultFamilyBytes(MappedRangeResultRef result) {
int64_t bytes = 0;
for (const MappedKeyValueRef& mappedKeyValue : result) {
bytes += mappedKeyValue.key.size() + mappedKeyValue.value.size();
auto& reqAndResult = mappedKeyValue.reqAndResult;
if (std::holds_alternative<GetValueReqAndResultRef>(reqAndResult)) {
auto getValue = std::get<GetValueReqAndResultRef>(reqAndResult);
bytes += getValue.expectedSize();
} else if (std::holds_alternative<GetRangeReqAndResultRef>(reqAndResult)) {
auto getRange = std::get<GetRangeReqAndResultRef>(reqAndResult);
bytes += getRange.result.expectedSize();
} else {
throw internal_error();
}
}
return bytes;
}
// TODO: Client should add mapped keys to conflict ranges.
ACTOR template <class RangeResultFamily> // RangeResult or MappedRangeResult
void getRangeFinished(Reference<TransactionState> trState,
double startTime,
KeySelector begin,
@ -3611,11 +3659,8 @@ void getRangeFinished(Reference<TransactionState> trState,
Snapshot snapshot,
Promise<std::pair<Key, Key>> conflictRange,
Reverse reverse,
RangeResult result) {
int64_t bytes = 0;
for (const KeyValueRef& kv : result) {
bytes += kv.key.size() + kv.value.size();
}
RangeResultFamily result) {
int64_t bytes = getRangeResultFamilyBytes(result);
trState->cx->transactionBytesRead += bytes;
trState->cx->transactionKeysRead += result.size();
@ -3657,24 +3702,26 @@ void getRangeFinished(Reference<TransactionState> trState,
}
}
// GetKeyValuesFamilyRequest: GetKeyValuesRequest or GetKeyValuesAndFlatMapRequest
// GetKeyValuesFamilyReply: GetKeyValuesReply or GetKeyValuesAndFlatMapReply
// Sadly we need GetKeyValuesFamilyReply because cannot do something like: state
// REPLY_TYPE(GetKeyValuesFamilyRequest) rep;
ACTOR template <class GetKeyValuesFamilyRequest, class GetKeyValuesFamilyReply>
Future<RangeResult> getRange(Reference<TransactionState> trState,
Future<Version> fVersion,
KeySelector begin,
KeySelector end,
Key mapper,
GetRangeLimits limits,
Promise<std::pair<Key, Key>> conflictRange,
Snapshot snapshot,
Reverse reverse) {
ACTOR template <class GetKeyValuesFamilyRequest, // GetKeyValuesRequest or GetMappedKeyValuesRequest
class GetKeyValuesFamilyReply, // GetKeyValuesReply or GetMappedKeyValuesReply (It would be nice if
// we could use REPLY_TYPE(GetKeyValuesFamilyRequest) instead of specify
// it as a separate template element)
class RangeResultFamily // RangeResult or MappedRangeResult
>
Future<RangeResultFamily> getRange(Reference<TransactionState> trState,
Future<Version> fVersion,
KeySelector begin,
KeySelector end,
Key mapper,
GetRangeLimits limits,
Promise<std::pair<Key, Key>> conflictRange,
Snapshot snapshot,
Reverse reverse) {
// state using RangeResultRefFamily = typename RangeResultFamily::RefType;
state GetRangeLimits originalLimits(limits);
state KeySelector originalBegin = begin;
state KeySelector originalEnd = end;
state RangeResult output;
state RangeResultFamily output;
state Span span("NAPI:getRange"_loc, trState->spanID);
try {
@ -3822,15 +3869,16 @@ Future<RangeResult> getRange(Reference<TransactionState> trState,
bool readToBegin = output.readToBegin;
bool readThroughEnd = output.readThroughEnd;
output = RangeResult(RangeResultRef(rep.data, modifiedSelectors || limits.isReached() || rep.more),
rep.arena);
using RangeResultRefFamily = typename RangeResultFamily::RefType;
output = RangeResultFamily(
RangeResultRefFamily(rep.data, modifiedSelectors || limits.isReached() || rep.more), rep.arena);
output.readToBegin = readToBegin;
output.readThroughEnd = readThroughEnd;
if (BUGGIFY && limits.hasByteLimit() && output.size() > std::max(1, originalLimits.minRows)) {
// Copy instead of resizing because TSS maybe be using output's arena for comparison. This only
// happens in simulation so it's fine
RangeResult copy;
RangeResultFamily copy;
int newSize =
deterministicRandom()->randomInt(std::max(1, originalLimits.minRows), output.size());
for (int i = 0; i < newSize; i++) {
@ -3876,8 +3924,9 @@ Future<RangeResult> getRange(Reference<TransactionState> trState,
TEST(true); // !GetKeyValuesFamilyReply.more and modifiedSelectors in getRange
if (!rep.data.size()) {
RangeResult result = wait(getRangeFallback<GetKeyValuesFamilyRequest, GetKeyValuesFamilyReply>(
trState, version, originalBegin, originalEnd, mapper, originalLimits, reverse));
RangeResultFamily result = wait(
getRangeFallback<GetKeyValuesFamilyRequest, GetKeyValuesFamilyReply, RangeResultFamily>(
trState, version, originalBegin, originalEnd, mapper, originalLimits, reverse));
getRangeFinished(
trState, startTime, originalBegin, originalEnd, snapshot, conflictRange, reverse, result);
return result;
@ -3907,8 +3956,9 @@ Future<RangeResult> getRange(Reference<TransactionState> trState,
Reverse{ reverse ? (end - 1).isBackward() : begin.isBackward() });
if (e.code() == error_code_wrong_shard_server) {
RangeResult result = wait(getRangeFallback<GetKeyValuesFamilyRequest, GetKeyValuesFamilyReply>(
trState, version, originalBegin, originalEnd, mapper, originalLimits, reverse));
RangeResultFamily result = wait(
getRangeFallback<GetKeyValuesFamilyRequest, GetKeyValuesFamilyReply, RangeResultFamily>(
trState, version, originalBegin, originalEnd, mapper, originalLimits, reverse));
getRangeFinished(
trState, startTime, originalBegin, originalEnd, snapshot, conflictRange, reverse, result);
return result;
@ -4461,7 +4511,7 @@ Future<RangeResult> getRange(Reference<TransactionState> const& trState,
KeySelector const& end,
GetRangeLimits const& limits,
Reverse const& reverse) {
return getRange<GetKeyValuesRequest, GetKeyValuesReply>(
return getRange<GetKeyValuesRequest, GetKeyValuesReply, RangeResult>(
trState, fVersion, begin, end, ""_sr, limits, Promise<std::pair<Key, Key>>(), Snapshot::True, reverse);
}
@ -4755,25 +4805,25 @@ template <class GetKeyValuesFamilyRequest>
void increaseCounterForRequest(Database cx) {
if constexpr (std::is_same<GetKeyValuesFamilyRequest, GetKeyValuesRequest>::value) {
++cx->transactionGetRangeRequests;
} else if (std::is_same<GetKeyValuesFamilyRequest, GetKeyValuesAndFlatMapRequest>::value) {
++cx->transactionGetRangeAndFlatMapRequests;
} else if (std::is_same<GetKeyValuesFamilyRequest, GetMappedKeyValuesRequest>::value) {
++cx->transactionGetMappedRangeRequests;
} else {
UNREACHABLE();
}
}
template <class GetKeyValuesFamilyRequest, class GetKeyValuesFamilyReply>
Future<RangeResult> Transaction::getRangeInternal(const KeySelector& begin,
const KeySelector& end,
const Key& mapper,
GetRangeLimits limits,
Snapshot snapshot,
Reverse reverse) {
template <class GetKeyValuesFamilyRequest, class GetKeyValuesFamilyReply, class RangeResultFamily>
Future<RangeResultFamily> Transaction::getRangeInternal(const KeySelector& begin,
const KeySelector& end,
const Key& mapper,
GetRangeLimits limits,
Snapshot snapshot,
Reverse reverse) {
++trState->cx->transactionLogicalReads;
increaseCounterForRequest<GetKeyValuesFamilyRequest>(trState->cx);
if (limits.isReached())
return RangeResult();
return RangeResultFamily();
if (!limits.isValid())
return range_limits_invalid();
@ -4794,15 +4844,21 @@ Future<RangeResult> Transaction::getRangeInternal(const KeySelector& begin,
if (b.offset >= e.offset && b.getKey() >= e.getKey()) {
TEST(true); // Native range inverted
return RangeResult();
return RangeResultFamily();
}
if (!snapshot && !std::is_same_v<GetKeyValuesFamilyRequest, GetKeyValuesRequest>) {
// Currently, NativeAPI does not support serialization for getMappedRange. You should consider use
// ReadYourWrites APIs which wraps around NativeAPI and provides serialization for getMappedRange. (Even if
// you don't want RYW, you may use ReadYourWrites APIs with RYW disabled.)
throw unsupported_operation();
}
Promise<std::pair<Key, Key>> conflictRange;
if (!snapshot) {
extraConflictRanges.push_back(conflictRange.getFuture());
}
return ::getRange<GetKeyValuesFamilyRequest, GetKeyValuesFamilyReply>(
return ::getRange<GetKeyValuesFamilyRequest, GetKeyValuesFamilyReply, RangeResultFamily>(
trState, getReadVersion(), b, e, mapper, limits, conflictRange, snapshot, reverse);
}
@ -4811,16 +4867,17 @@ Future<RangeResult> Transaction::getRange(const KeySelector& begin,
GetRangeLimits limits,
Snapshot snapshot,
Reverse reverse) {
return getRangeInternal<GetKeyValuesRequest, GetKeyValuesReply>(begin, end, ""_sr, limits, snapshot, reverse);
return getRangeInternal<GetKeyValuesRequest, GetKeyValuesReply, RangeResult>(
begin, end, ""_sr, limits, snapshot, reverse);
}
Future<RangeResult> Transaction::getRangeAndFlatMap(const KeySelector& begin,
const KeySelector& end,
const Key& mapper,
GetRangeLimits limits,
Snapshot snapshot,
Reverse reverse) {
return getRangeInternal<GetKeyValuesAndFlatMapRequest, GetKeyValuesAndFlatMapReply>(
Future<MappedRangeResult> Transaction::getMappedRange(const KeySelector& begin,
const KeySelector& end,
const Key& mapper,
GetRangeLimits limits,
Snapshot snapshot,
Reverse reverse) {
return getRangeInternal<GetMappedKeyValuesRequest, GetMappedKeyValuesReply, MappedRangeResult>(
begin, end, mapper, limits, snapshot, reverse);
}

View File

@ -309,13 +309,23 @@ public:
reverse);
}
[[nodiscard]] Future<RangeResult> getRangeAndFlatMap(const KeySelector& begin,
const KeySelector& end,
const Key& mapper,
GetRangeLimits limits,
Snapshot = Snapshot::False,
Reverse = Reverse::False);
[[nodiscard]] Future<MappedRangeResult> getMappedRange(const KeySelector& begin,
const KeySelector& end,
const Key& mapper,
GetRangeLimits limits,
Snapshot = Snapshot::False,
Reverse = Reverse::False);
private:
template <class GetKeyValuesFamilyRequest, class GetKeyValuesFamilyReply, class RangeResultFamily>
Future<RangeResultFamily> getRangeInternal(const KeySelector& begin,
const KeySelector& end,
const Key& mapper,
GetRangeLimits limits,
Snapshot snapshot,
Reverse reverse);
public:
// A method for streaming data from the storage server that is more efficient than getRange when reading large
// amounts of data
[[nodiscard]] Future<Void> getRangeStream(const PromiseStream<Standalone<RangeResultRef>>& results,

View File

@ -50,12 +50,12 @@ public:
GetRangeLimits limits,
Snapshot = Snapshot::False,
Reverse = Reverse::False) override;
Future<RangeResult> getRangeAndFlatMap(KeySelector begin,
KeySelector end,
Key mapper,
GetRangeLimits limits,
Snapshot = Snapshot::False,
Reverse = Reverse::False) override {
Future<MappedRangeResult> getMappedRange(KeySelector begin,
KeySelector end,
Key mapper,
GetRangeLimits limits,
Snapshot = Snapshot::False,
Reverse = Reverse::False) override {
throw client_invalid_operation();
}
void set(KeyRef const& key, ValueRef const& value) override;

View File

@ -44,7 +44,7 @@ public:
ExtStringRef beginKey();
ExtStringRef endKey();
const KeyValueRef* kv(Arena& arena);
virtual const KeyValueRef* kv(Arena& arena);
RYWIterator& operator++();
@ -61,14 +61,14 @@ public:
void bypassUnreadableProtection() { bypassUnreadable = true; }
WriteMap::iterator& extractWriteMapIterator();
virtual WriteMap::iterator& extractWriteMapIterator();
// Really this should return an iterator by value, but for performance it's convenient to actually grab the internal
// one. Consider copying the return value if performance isn't critical. If you modify the returned iterator, it
// invalidates this iterator until the next call to skip()
void dbg();
private:
protected:
int begin_key_cmp; // -1 if cache.beginKey() < writes.beginKey(), 0 if ==, +1 if >
int end_key_cmp; //
SnapshotCache::iterator cache;

View File

@ -75,13 +75,13 @@ public:
};
template <bool reverse>
struct GetRangeAndFlatMapReq {
GetRangeAndFlatMapReq(KeySelector begin, KeySelector end, Key mapper, GetRangeLimits limits)
struct GetMappedRangeReq {
GetMappedRangeReq(KeySelector begin, KeySelector end, Key mapper, GetRangeLimits limits)
: begin(begin), end(end), mapper(mapper), limits(limits) {}
KeySelector begin, end;
Key mapper;
GetRangeLimits limits;
using Result = RangeResult;
using Result = MappedRangeResult;
};
// read() Performs a read (get, getKey, getRange, etc), in the context of the given transaction. Snapshot or RYW
@ -213,46 +213,17 @@ public:
return v;
}
ACTOR template <bool backwards>
static Future<RangeResult> readThroughAndFlatMap(ReadYourWritesTransaction* ryw,
GetRangeAndFlatMapReq<backwards> read,
Snapshot snapshot) {
if (backwards && read.end.offset > 1) {
// FIXME: Optimistically assume that this will not run into the system keys, and only reissue if the result
// actually does.
Key key = wait(ryw->tr.getKey(read.end, snapshot));
if (key > ryw->getMaxReadKey())
read.end = firstGreaterOrEqual(ryw->getMaxReadKey());
else
read.end = KeySelector(firstGreaterOrEqual(key), key.arena());
}
RangeResult v = wait(ryw->tr.getRangeAndFlatMap(
read.begin, read.end, read.mapper, read.limits, snapshot, backwards ? Reverse::True : Reverse::False));
KeyRef maxKey = ryw->getMaxReadKey();
if (v.size() > 0) {
if (!backwards && v[v.size() - 1].key >= maxKey) {
state RangeResult _v = v;
int i = _v.size() - 2;
for (; i >= 0 && _v[i].key >= maxKey; --i) {
}
return RangeResult(RangeResultRef(VectorRef<KeyValueRef>(&_v[0], i + 1), false), _v.arena());
}
}
return v;
}
// addConflictRange(ryw,read,result) is called after a serializable read and is responsible for adding the relevant
// conflict range
template <bool mustUnmodified = false>
static void addConflictRange(ReadYourWritesTransaction* ryw,
GetValueReq read,
WriteMap::iterator& it,
Optional<Value> result) {
// it will already point to the right segment (see the calling code in read()), so we don't need to skip
// read.key will be copied into ryw->arena inside of updateConflictMap if it is being added
ryw->updateConflictMap(read.key, it);
updateConflictMap<mustUnmodified>(ryw, read.key, it);
}
static void addConflictRange(ReadYourWritesTransaction* ryw, GetKeyReq read, WriteMap::iterator& it, Key result) {
@ -270,10 +241,11 @@ public:
ryw->updateConflictMap(readRange, it);
}
template <bool mustUnmodified = false, class RangeResultFamily = RangeResult>
static void addConflictRange(ReadYourWritesTransaction* ryw,
GetRangeReq<false> read,
WriteMap::iterator& it,
RangeResult const& result) {
RangeResultFamily& result) {
KeyRef rangeBegin, rangeEnd;
bool endInArena = false;
@ -302,13 +274,15 @@ public:
KeyRangeRef readRange =
KeyRangeRef(KeyRef(ryw->arena, rangeBegin), endInArena ? rangeEnd : KeyRef(ryw->arena, rangeEnd));
it.skip(readRange.begin);
ryw->updateConflictMap(readRange, it);
updateConflictMap<mustUnmodified>(ryw, readRange, it);
}
// In the case where RangeResultFamily is MappedRangeResult, it only adds the primary range to conflict.
template <bool mustUnmodified = false, class RangeResultFamily = RangeResult>
static void addConflictRange(ReadYourWritesTransaction* ryw,
GetRangeReq<true> read,
WriteMap::iterator& it,
RangeResult const& result) {
RangeResultFamily& result) {
KeyRef rangeBegin, rangeEnd;
bool endInArena = false;
@ -336,7 +310,39 @@ public:
KeyRangeRef readRange =
KeyRangeRef(KeyRef(ryw->arena, rangeBegin), endInArena ? rangeEnd : KeyRef(ryw->arena, rangeEnd));
it.skip(readRange.begin);
ryw->updateConflictMap(readRange, it);
updateConflictMap<mustUnmodified>(ryw, readRange, it);
}
template <bool mustUnmodified = false>
static void updateConflictMap(ReadYourWritesTransaction* ryw, KeyRef const& key, WriteMap::iterator& it) {
// it.skip( key );
// ASSERT( it.beginKey() <= key && key < it.endKey() );
if (mustUnmodified && !it.is_unmodified_range()) {
throw get_mapped_range_reads_your_writes();
}
if (it.is_unmodified_range() || (it.is_operation() && !it.is_independent())) {
ryw->approximateSize += 2 * key.expectedSize() + 1 + sizeof(KeyRangeRef);
ryw->readConflicts.insert(singleKeyRange(key, ryw->arena), true);
}
}
template <bool mustUnmodified = false>
static void updateConflictMap(ReadYourWritesTransaction* ryw, KeyRangeRef const& keys, WriteMap::iterator& it) {
// it.skip( keys.begin );
// ASSERT( it.beginKey() <= keys.begin && keys.begin < it.endKey() );
for (; it.beginKey() < keys.end; ++it) {
if (mustUnmodified && !it.is_unmodified_range()) {
throw get_mapped_range_reads_your_writes();
}
if (it.is_unmodified_range() || (it.is_operation() && !it.is_independent())) {
KeyRangeRef insert_range = KeyRangeRef(std::max(keys.begin, it.beginKey().toArenaOrRef(ryw->arena)),
std::min(keys.end, it.endKey().toArenaOrRef(ryw->arena)));
if (!insert_range.empty()) {
ryw->approximateSize += keys.expectedSize() + sizeof(KeyRangeRef);
ryw->readConflicts.insert(insert_range, true);
}
}
}
}
ACTOR template <class Req>
@ -349,15 +355,6 @@ public:
}
}
ACTOR template <class Req>
static Future<typename Req::Result> readWithConflictRangeThroughAndFlatMap(ReadYourWritesTransaction* ryw,
Req req,
Snapshot snapshot) {
choose {
when(typename Req::Result result = wait(readThroughAndFlatMap(ryw, req, snapshot))) { return result; }
when(wait(ryw->resetPromise.getFuture())) { throw internal_error(); }
}
}
ACTOR template <class Req>
static Future<typename Req::Result> readWithConflictRangeSnapshot(ReadYourWritesTransaction* ryw, Req req) {
state SnapshotCache::iterator it(&ryw->cache, &ryw->writes);
choose {
@ -393,19 +390,6 @@ public:
return readWithConflictRangeRYW(ryw, req, snapshot);
}
template <class Req>
static inline Future<typename Req::Result> readWithConflictRangeAndFlatMap(ReadYourWritesTransaction* ryw,
Req const& req,
Snapshot snapshot) {
// For now, getRangeAndFlatMap is only supported if transaction use snapshot isolation AND read-your-writes is
// disabled.
if (snapshot && ryw->options.readYourWritesDisabled) {
return readWithConflictRangeThroughAndFlatMap(ryw, req, snapshot);
}
TEST(true); // readWithConflictRangeRYW not supported for getRangeAndFlatMap
throw client_invalid_operation();
}
template <class Iter>
static void resolveKeySelectorFromCache(KeySelector& key,
Iter& it,
@ -1126,6 +1110,119 @@ public:
return result;
}
#ifndef __INTEL_COMPILER
#pragma region GetMappedRange
#endif
template <class Iter>
static Future<MappedRangeResult> read(ReadYourWritesTransaction* ryw, GetMappedRangeReq<false> read, Iter* it) {
return getMappedRangeValue(ryw, read.begin, read.end, read.mapper, read.limits, it);
};
template <class Iter>
static Future<MappedRangeResult> read(ReadYourWritesTransaction* ryw, GetMappedRangeReq<true> read, Iter* it) {
throw unsupported_operation();
// TODO: Support reverse. return getMappedRangeValueBack(ryw, read.begin, read.end, read.mapper,
// read.limits, it);
};
ACTOR template <bool backwards>
static Future<MappedRangeResult> readThrough(ReadYourWritesTransaction* ryw,
GetMappedRangeReq<backwards> read,
Snapshot snapshot) {
if (backwards && read.end.offset > 1) {
// FIXME: Optimistically assume that this will not run into the system keys, and only reissue if the result
// actually does.
Key key = wait(ryw->tr.getKey(read.end, snapshot));
if (key > ryw->getMaxReadKey())
read.end = firstGreaterOrEqual(ryw->getMaxReadKey());
else
read.end = KeySelector(firstGreaterOrEqual(key), key.arena());
}
MappedRangeResult v = wait(ryw->tr.getMappedRange(
read.begin, read.end, read.mapper, read.limits, snapshot, backwards ? Reverse::True : Reverse::False));
return v;
}
template <bool backwards>
static void addConflictRangeAndMustUnmodified(ReadYourWritesTransaction* ryw,
GetMappedRangeReq<backwards> read,
WriteMap::iterator& it,
MappedRangeResult result) {
// Primary getRange.
addConflictRange<true, MappedRangeResult>(
ryw, GetRangeReq<backwards>(read.begin, read.end, read.limits), it, result);
// Secondary getValue/getRanges.
for (const auto& mappedKeyValue : result) {
const auto& reqAndResult = mappedKeyValue.reqAndResult;
if (std::holds_alternative<GetValueReqAndResultRef>(reqAndResult)) {
auto getValue = std::get<GetValueReqAndResultRef>(reqAndResult);
// GetValueReq variation of addConflictRange require it to point at the right segment.
it.skip(getValue.key);
// The result is not used in GetValueReq variation of addConflictRange. Let's just pass in a
// placeholder.
addConflictRange<true>(ryw, GetValueReq(getValue.key), it, Optional<Value>());
} else if (std::holds_alternative<GetRangeReqAndResultRef>(reqAndResult)) {
auto getRange = std::get<GetRangeReqAndResultRef>(reqAndResult);
// We only support forward scan for secondary getRange requests.
// The limits are not used in addConflictRange. Let's just pass in a placeholder.
addConflictRange<true>(
ryw, GetRangeReq<false>(getRange.begin, getRange.end, GetRangeLimits()), it, getRange.result);
} else {
throw internal_error();
}
}
}
// For Snapshot::True and NOT readYourWritesDisabled.
ACTOR template <bool backwards>
static Future<MappedRangeResult> readWithConflictRangeRYW(ReadYourWritesTransaction* ryw,
GetMappedRangeReq<backwards> req,
Snapshot snapshot) {
choose {
when(MappedRangeResult result = wait(readThrough(ryw, req, Snapshot::True))) {
// Insert read conflicts (so that it supported Snapshot::True) and check it is not modified (so it masks
// sure not break RYW semantic while not implementing RYW) for both the primary getRange and all
// underlying getValue/getRanges.
WriteMap::iterator writes(&ryw->writes);
addConflictRangeAndMustUnmodified<backwards>(ryw, req, writes, result);
return result;
}
when(wait(ryw->resetPromise.getFuture())) { throw internal_error(); }
}
}
template <bool backwards>
static inline Future<MappedRangeResult> readWithConflictRangeForGetMappedRange(
ReadYourWritesTransaction* ryw,
GetMappedRangeReq<backwards> const& req,
Snapshot snapshot) {
// For now, getMappedRange requires serializable isolation. (Technically it is trivial to add snapshot
// isolation support. But it is not default and is rarely used. So we disallow it until we have thorough test
// coverage for it.)
if (snapshot) {
TEST(true); // getMappedRange not supported for snapshot.
throw unsupported_operation();
}
// For now, getMappedRange requires read-your-writes being NOT disabled. But the support of RYW is limited
// to throwing get_mapped_range_reads_your_writes error when getMappedRange actually reads your own writes.
// Applications should fall back in their own ways. This is different from what is usually expected from RYW,
// which returns the written value transparently. In another word, it makes sure not break RYW semantics without
// actually implementing reading from the writes.
if (ryw->options.readYourWritesDisabled) {
TEST(true); // getMappedRange not supported for read-your-writes disabled.
throw unsupported_operation();
}
return readWithConflictRangeRYW(ryw, req, snapshot);
}
#ifndef __INTEL_COMPILER
#pragma endregion
#endif
static void triggerWatches(ReadYourWritesTransaction* ryw,
KeyRangeRef range,
Optional<ValueRef> val,
@ -1571,16 +1668,16 @@ Future<RangeResult> ReadYourWritesTransaction::getRange(const KeySelector& begin
return getRange(begin, end, GetRangeLimits(limit), snapshot, reverse);
}
Future<RangeResult> ReadYourWritesTransaction::getRangeAndFlatMap(KeySelector begin,
KeySelector end,
Key mapper,
GetRangeLimits limits,
Snapshot snapshot,
Reverse reverse) {
Future<MappedRangeResult> ReadYourWritesTransaction::getMappedRange(KeySelector begin,
KeySelector end,
Key mapper,
GetRangeLimits limits,
Snapshot snapshot,
Reverse reverse) {
if (getDatabase()->apiVersionAtLeast(630)) {
if (specialKeys.contains(begin.getKey()) && specialKeys.begin <= end.getKey() &&
end.getKey() <= specialKeys.end) {
TEST(true); // Special key space get range (FlatMap)
TEST(true); // Special key space get range (getMappedRange)
throw client_invalid_operation(); // Not support special keys.
}
} else {
@ -1602,8 +1699,8 @@ Future<RangeResult> ReadYourWritesTransaction::getRangeAndFlatMap(KeySelector be
// This optimization prevents nullptr operations from being added to the conflict range
if (limits.isReached()) {
TEST(true); // RYW range read limit 0 (FlatMap)
return RangeResult();
TEST(true); // RYW range read limit 0 (getMappedRange)
return MappedRangeResult();
}
if (!limits.isValid())
@ -1616,17 +1713,16 @@ Future<RangeResult> ReadYourWritesTransaction::getRangeAndFlatMap(KeySelector be
end.removeOrEqual(end.arena());
if (begin.offset >= end.offset && begin.getKey() >= end.getKey()) {
TEST(true); // RYW range inverted (FlatMap)
return RangeResult();
TEST(true); // RYW range inverted (getMappedRange)
return MappedRangeResult();
}
Future<RangeResult> result =
reverse ? RYWImpl::readWithConflictRangeAndFlatMap(
this, RYWImpl::GetRangeAndFlatMapReq<true>(begin, end, mapper, limits), snapshot)
: RYWImpl::readWithConflictRangeAndFlatMap(
this, RYWImpl::GetRangeAndFlatMapReq<false>(begin, end, mapper, limits), snapshot);
Future<MappedRangeResult> result =
reverse ? RYWImpl::readWithConflictRangeForGetMappedRange(
this, RYWImpl::GetMappedRangeReq<true>(begin, end, mapper, limits), snapshot)
: RYWImpl::readWithConflictRangeForGetMappedRange(
this, RYWImpl::GetMappedRangeReq<false>(begin, end, mapper, limits), snapshot);
reading.add(success(result));
return result;
}
@ -1761,27 +1857,11 @@ void ReadYourWritesTransaction::addReadConflictRange(KeyRangeRef const& keys) {
}
void ReadYourWritesTransaction::updateConflictMap(KeyRef const& key, WriteMap::iterator& it) {
// it.skip( key );
// ASSERT( it.beginKey() <= key && key < it.endKey() );
if (it.is_unmodified_range() || (it.is_operation() && !it.is_independent())) {
approximateSize += 2 * key.expectedSize() + 1 + sizeof(KeyRangeRef);
readConflicts.insert(singleKeyRange(key, arena), true);
}
RYWImpl::updateConflictMap(this, key, it);
}
void ReadYourWritesTransaction::updateConflictMap(KeyRangeRef const& keys, WriteMap::iterator& it) {
// it.skip( keys.begin );
// ASSERT( it.beginKey() <= keys.begin && keys.begin < it.endKey() );
for (; it.beginKey() < keys.end; ++it) {
if (it.is_unmodified_range() || (it.is_operation() && !it.is_independent())) {
KeyRangeRef insert_range = KeyRangeRef(std::max(keys.begin, it.beginKey().toArenaOrRef(arena)),
std::min(keys.end, it.endKey().toArenaOrRef(arena)));
if (!insert_range.empty()) {
approximateSize += keys.expectedSize() + sizeof(KeyRangeRef);
readConflicts.insert(insert_range, true);
}
}
}
RYWImpl::updateConflictMap(this, keys, it);
}
void ReadYourWritesTransaction::writeRangeToNativeTransaction(KeyRangeRef const& keys) {

View File

@ -61,6 +61,9 @@ struct TransactionDebugInfo : public ReferenceCounted<TransactionDebugInfo> {
// Values returned by a ReadYourWritesTransaction will contain a reference to the transaction's arena. Therefore,
// keeping a reference to a value longer than its creating transaction would hold all of the memory generated by the
// transaction
// If options.readYourWritesDisabled, rely on NativeAPI to handle everything. Otherwise, read NativeAPI with
// Snapshot::True and handle read conflicts at ReadYourWritesTransaction, write NativeAPI with AddConflictRange::False
// and handle write conflicts at ReadYourWritesTransaction, eventually send this information to NativeAPI on commit.
class ReadYourWritesTransaction final : NonCopyable,
public ISingleThreadTransaction,
public FastAllocated<ReadYourWritesTransaction> {
@ -104,12 +107,12 @@ public:
snapshot,
reverse);
}
Future<RangeResult> getRangeAndFlatMap(KeySelector begin,
KeySelector end,
Key mapper,
GetRangeLimits limits,
Snapshot = Snapshot::False,
Reverse = Reverse::False) override;
Future<MappedRangeResult> getMappedRange(KeySelector begin,
KeySelector end,
Key mapper,
GetRangeLimits limits,
Snapshot = Snapshot::False,
Reverse = Reverse::False) override;
[[nodiscard]] Future<Standalone<VectorRef<const char*>>> getAddressesForKey(const Key& key) override;
Future<Standalone<VectorRef<KeyRef>>> getRangeSplitPoints(const KeyRange& range, int64_t chunkSize) override;

View File

@ -676,8 +676,10 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( MAX_STORAGE_COMMIT_TIME, 120.0 ); //The max fsync stall time on the storage server and tlog before marking a disk as failed
init( RANGESTREAM_LIMIT_BYTES, 2e6 ); if( randomize && BUGGIFY ) RANGESTREAM_LIMIT_BYTES = 1;
init( ENABLE_CLEAR_RANGE_EAGER_READS, true );
init( QUICK_GET_VALUE_FALLBACK, false );
init( QUICK_GET_KEY_VALUES_FALLBACK, false );
init( QUICK_GET_VALUE_FALLBACK, true );
init( QUICK_GET_KEY_VALUES_FALLBACK, true );
init( QUICK_GET_KEY_VALUES_LIMIT, 2000 );
init( QUICK_GET_KEY_VALUES_LIMIT_BYTES, 1e7 );
//Wait Failure
init( MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS, 250 ); if( randomize && BUGGIFY ) MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS = 2;
@ -814,7 +816,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init ( CLUSTER_RECOVERY_EVENT_NAME_PREFIX, "Master");
// encrypt key proxy
init( ENABLE_ENCRYPT_KEY_PROXY, false );
init( ENABLE_ENCRYPTION, false );
// Blob granlues
init( BG_URL, "" ); // TODO: store in system key space, eventually

View File

@ -615,6 +615,8 @@ public:
bool ENABLE_CLEAR_RANGE_EAGER_READS;
bool QUICK_GET_VALUE_FALLBACK;
bool QUICK_GET_KEY_VALUES_FALLBACK;
int QUICK_GET_KEY_VALUES_LIMIT;
int QUICK_GET_KEY_VALUES_LIMIT_BYTES;
// Wait Failure
int MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS;
@ -763,7 +765,7 @@ public:
std::string CLUSTER_RECOVERY_EVENT_NAME_PREFIX;
// encrypt key proxy
bool ENABLE_ENCRYPT_KEY_PROXY;
bool ENABLE_ENCRYPTION;
// blob granule stuff
// FIXME: configure url with database configuration instead of knob eventually

View File

@ -59,12 +59,12 @@ public:
GetRangeLimits limits,
Snapshot = Snapshot::False,
Reverse = Reverse::False) override;
Future<RangeResult> getRangeAndFlatMap(KeySelector begin,
KeySelector end,
Key mapper,
GetRangeLimits limits,
Snapshot = Snapshot::False,
Reverse = Reverse::False) override {
Future<MappedRangeResult> getMappedRange(KeySelector begin,
KeySelector end,
Key mapper,
GetRangeLimits limits,
Snapshot = Snapshot::False,
Reverse = Reverse::False) override {
throw client_invalid_operation();
}
Future<Void> commit() override;

View File

@ -41,10 +41,6 @@
namespace {
const std::string kTracingTransactionIdKey = "transaction_id";
const std::string kTracingTokenKey = "token";
// Max version we can set for minRequiredCommitVersionKey,
// making sure the cluster can still be alive for 1000 years after the recovery
const Version maxAllowedVerion =
std::numeric_limits<int64_t>::max() - 1 - CLIENT_KNOBS->VERSIONS_PER_SECOND * 3600 * 24 * 365 * 1000;
static bool isAlphaNumeric(const std::string& key) {
// [A-Za-z0-9_]+
@ -1865,6 +1861,11 @@ Future<RangeResult> AdvanceVersionImpl::getRange(ReadYourWritesTransaction* ryw,
}
ACTOR static Future<Optional<std::string>> advanceVersionCommitActor(ReadYourWritesTransaction* ryw, Version v) {
// Max version we can set for minRequiredCommitVersionKey,
// making sure the cluster can still be alive for 1000 years after the recovery
static const Version maxAllowedVerion =
std::numeric_limits<int64_t>::max() - 1 - CLIENT_KNOBS->VERSIONS_PER_SECOND * 3600 * 24 * 365 * 1000;
ryw->getTransaction().setOption(FDBTransactionOptions::LOCK_AWARE);
ryw->getTransaction().setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
TraceEvent(SevDebug, "AdvanceVersion").detail("MaxAllowedVersion", maxAllowedVerion);

View File

@ -28,7 +28,7 @@
// if size + hex of checksum is shorter than value, record that instead of actual value. break-even point is 12
// characters
std::string traceChecksumValue(ValueRef s) {
std::string traceChecksumValue(const ValueRef& s) {
return s.size() > 12 ? format("(%d)%08x", s.size(), crc32c_append(0, s.begin(), s.size())) : s.toString();
}
@ -49,6 +49,7 @@ void TSS_traceMismatch(TraceEvent& event,
const GetValueReply& src,
const GetValueReply& tss) {
event.detail("Key", req.key.printable())
.detail("Tenant", req.tenantInfo.name)
.detail("Version", req.version)
.detail("SSReply", src.value.present() ? traceChecksumValue(src.value.get()) : "missing")
.detail("TSSReply", tss.value.present() ? traceChecksumValue(tss.value.get()) : "missing");
@ -125,73 +126,107 @@ const char* TSS_mismatchTraceName(const GetKeyValuesRequest& req) {
return "TSSMismatchGetKeyValues";
}
static void traceKeyValuesSummary(TraceEvent& event,
const KeySelectorRef& begin,
const KeySelectorRef& end,
Optional<TenantName> tenant,
Version version,
int limit,
int limitBytes,
size_t ssSize,
bool ssMore,
size_t tssSize,
bool tssMore) {
std::string ssSummaryString = format("(%d)%s", ssSize, ssMore ? "+" : "");
std::string tssSummaryString = format("(%d)%s", tssSize, tssMore ? "+" : "");
event.detail("Begin", format("%s%s:%d", begin.orEqual ? "=" : "", begin.getKey().printable().c_str(), begin.offset))
.detail("End", format("%s%s:%d", end.orEqual ? "=" : "", end.getKey().printable().c_str(), end.offset))
.detail("Tenant", tenant)
.detail("Version", version)
.detail("Limit", limit)
.detail("LimitBytes", limitBytes)
.detail("SSReplySummary", ssSummaryString)
.detail("TSSReplySummary", tssSummaryString);
}
static void traceKeyValuesDiff(TraceEvent& event,
const KeySelectorRef& begin,
const KeySelectorRef& end,
Optional<TenantName> tenant,
Version version,
int limit,
int limitBytes,
const VectorRef<KeyValueRef>& ssKV,
bool ssMore,
const VectorRef<KeyValueRef>& tssKV,
bool tssMore) {
traceKeyValuesSummary(
event, begin, end, tenant, version, limit, limitBytes, ssKV.size(), ssMore, tssKV.size(), tssMore);
bool mismatchFound = false;
for (int i = 0; i < std::max(ssKV.size(), tssKV.size()); i++) {
if (i >= ssKV.size() || i >= tssKV.size() || ssKV[i] != tssKV[i]) {
event.detail("MismatchIndex", i);
if (i >= ssKV.size() || i >= tssKV.size() || ssKV[i].key != tssKV[i].key) {
event.detail("MismatchSSKey", i < ssKV.size() ? ssKV[i].key.printable() : "missing");
event.detail("MismatchTSSKey", i < tssKV.size() ? tssKV[i].key.printable() : "missing");
} else {
event.detail("MismatchKey", ssKV[i].key.printable());
event.detail("MismatchSSValue", traceChecksumValue(ssKV[i].value));
event.detail("MismatchTSSValue", traceChecksumValue(tssKV[i].value));
}
mismatchFound = true;
break;
}
}
ASSERT(mismatchFound);
}
template <>
void TSS_traceMismatch(TraceEvent& event,
const GetKeyValuesRequest& req,
const GetKeyValuesReply& src,
const GetKeyValuesReply& tss) {
std::string ssResultsString = format("(%d)%s:\n", src.data.size(), src.more ? "+" : "");
for (auto& it : src.data) {
ssResultsString += "\n" + it.key.printable() + "=" + traceChecksumValue(it.value);
}
std::string tssResultsString = format("(%d)%s:\n", tss.data.size(), tss.more ? "+" : "");
for (auto& it : tss.data) {
tssResultsString += "\n" + it.key.printable() + "=" + traceChecksumValue(it.value);
}
event
.detail(
"Begin",
format("%s%s:%d", req.begin.orEqual ? "=" : "", req.begin.getKey().printable().c_str(), req.begin.offset))
.detail("End",
format("%s%s:%d", req.end.orEqual ? "=" : "", req.end.getKey().printable().c_str(), req.end.offset))
.detail("Tenant", req.tenantInfo.name)
.detail("Version", req.version)
.detail("Limit", req.limit)
.detail("LimitBytes", req.limitBytes)
.setMaxFieldLength(FLOW_KNOBS->TSS_LARGE_TRACE_SIZE * 4 / 10)
.detail("SSReply", ssResultsString)
.detail("TSSReply", tssResultsString);
traceKeyValuesDiff(event,
req.begin,
req.end,
req.tenantInfo.name,
req.version,
req.limit,
req.limitBytes,
src.data,
src.more,
tss.data,
tss.more);
}
// range reads and flat map
template <>
bool TSS_doCompare(const GetKeyValuesAndFlatMapReply& src, const GetKeyValuesAndFlatMapReply& tss) {
bool TSS_doCompare(const GetMappedKeyValuesReply& src, const GetMappedKeyValuesReply& tss) {
return src.more == tss.more && src.data == tss.data;
}
template <>
const char* TSS_mismatchTraceName(const GetKeyValuesAndFlatMapRequest& req) {
return "TSSMismatchGetKeyValuesAndFlatMap";
const char* TSS_mismatchTraceName(const GetMappedKeyValuesRequest& req) {
return "TSSMismatchGetMappedKeyValues";
}
template <>
void TSS_traceMismatch(TraceEvent& event,
const GetKeyValuesAndFlatMapRequest& req,
const GetKeyValuesAndFlatMapReply& src,
const GetKeyValuesAndFlatMapReply& tss) {
std::string ssResultsString = format("(%d)%s:\n", src.data.size(), src.more ? "+" : "");
for (auto& it : src.data) {
ssResultsString += "\n" + it.key.printable() + "=" + traceChecksumValue(it.value);
}
std::string tssResultsString = format("(%d)%s:\n", tss.data.size(), tss.more ? "+" : "");
for (auto& it : tss.data) {
tssResultsString += "\n" + it.key.printable() + "=" + traceChecksumValue(it.value);
}
event
.detail(
"Begin",
format("%s%s:%d", req.begin.orEqual ? "=" : "", req.begin.getKey().printable().c_str(), req.begin.offset))
.detail("End",
format("%s%s:%d", req.end.orEqual ? "=" : "", req.end.getKey().printable().c_str(), req.end.offset))
.detail("Tenant", req.tenantInfo.name)
.detail("Version", req.version)
.detail("Limit", req.limit)
.detail("LimitBytes", req.limitBytes)
.setMaxFieldLength(FLOW_KNOBS->TSS_LARGE_TRACE_SIZE * 4 / 10)
.detail("SSReply", ssResultsString)
.detail("TSSReply", tssResultsString);
const GetMappedKeyValuesRequest& req,
const GetMappedKeyValuesReply& src,
const GetMappedKeyValuesReply& tss) {
traceKeyValuesSummary(event,
req.begin,
req.end,
req.tenantInfo.name,
req.version,
req.limit,
req.limitBytes,
src.data.size(),
src.more,
tss.data.size(),
tss.more);
// FIXME: trace details for TSS mismatch of mapped data
}
// streaming range reads
@ -211,28 +246,17 @@ void TSS_traceMismatch(TraceEvent& event,
const GetKeyValuesStreamRequest& req,
const GetKeyValuesStreamReply& src,
const GetKeyValuesStreamReply& tss) {
std::string ssResultsString = format("(%d)%s:\n", src.data.size(), src.more ? "+" : "");
for (auto& it : src.data) {
ssResultsString += "\n" + it.key.printable() + "=" + traceChecksumValue(it.value);
}
std::string tssResultsString = format("(%d)%s:\n", tss.data.size(), tss.more ? "+" : "");
for (auto& it : tss.data) {
tssResultsString += "\n" + it.key.printable() + "=" + traceChecksumValue(it.value);
}
event
.detail(
"Begin",
format("%s%s:%d", req.begin.orEqual ? "=" : "", req.begin.getKey().printable().c_str(), req.begin.offset))
.detail("End",
format("%s%s:%d", req.end.orEqual ? "=" : "", req.end.getKey().printable().c_str(), req.end.offset))
.detail("Tenant", req.tenantInfo.name)
.detail("Version", req.version)
.detail("Limit", req.limit)
.detail("LimitBytes", req.limitBytes)
.setMaxFieldLength(FLOW_KNOBS->TSS_LARGE_TRACE_SIZE * 4 / 10)
.detail("SSReply", ssResultsString)
.detail("TSSReply", tssResultsString);
traceKeyValuesDiff(event,
req.begin,
req.end,
req.tenantInfo.name,
req.version,
req.limit,
req.limitBytes,
src.data,
src.more,
tss.data,
tss.more);
}
template <>
@ -400,9 +424,9 @@ void TSSMetrics::recordLatency(const GetKeyValuesRequest& req, double ssLatency,
}
template <>
void TSSMetrics::recordLatency(const GetKeyValuesAndFlatMapRequest& req, double ssLatency, double tssLatency) {
SSgetKeyValuesAndFlatMapLatency.addSample(ssLatency);
TSSgetKeyValuesAndFlatMapLatency.addSample(tssLatency);
void TSSMetrics::recordLatency(const GetMappedKeyValuesRequest& req, double ssLatency, double tssLatency) {
SSgetMappedKeyValuesLatency.addSample(ssLatency);
TSSgetMappedKeyValuesLatency.addSample(tssLatency);
}
template <>

View File

@ -181,7 +181,7 @@ struct StorageServerInterface {
streams.push_back(getReadHotRanges.getReceiver());
streams.push_back(getRangeSplitPoints.getReceiver());
streams.push_back(getKeyValuesStream.getReceiver(TaskPriority::LoadBalancedEndpoint));
streams.push_back(getKeyValuesAndFlatMap.getReceiver(TaskPriority::LoadBalancedEndpoint));
streams.push_back(getMappedKeyValues.getReceiver(TaskPriority::LoadBalancedEndpoint));
streams.push_back(changeFeedStream.getReceiver());
streams.push_back(overlappingChangeFeeds.getReceiver());
streams.push_back(changeFeedPop.getReceiver());
@ -364,15 +364,17 @@ struct GetKeyValuesRequest : TimedRequest {
}
};
struct GetKeyValuesAndFlatMapReply : public LoadBalancedReply {
struct GetMappedKeyValuesReply : public LoadBalancedReply {
constexpr static FileIdentifier file_identifier = 1783067;
Arena arena;
VectorRef<KeyValueRef, VecSerStrategy::String> data;
// MappedKeyValueRef is not string_serialized_traits, so we have to use FlatBuffers.
VectorRef<MappedKeyValueRef, VecSerStrategy::FlatBuffers> data;
Version version; // useful when latestVersion was requested
bool more;
bool cached = false;
GetKeyValuesAndFlatMapReply() : version(invalidVersion), more(false), cached(false) {}
GetMappedKeyValuesReply() : version(invalidVersion), more(false), cached(false) {}
template <class Ar>
void serialize(Ar& ar) {
@ -380,7 +382,7 @@ struct GetKeyValuesAndFlatMapReply : public LoadBalancedReply {
}
};
struct GetKeyValuesAndFlatMapRequest : TimedRequest {
struct GetMappedKeyValuesRequest : TimedRequest {
constexpr static FileIdentifier file_identifier = 6795747;
SpanID spanContext;
Arena arena;
@ -392,10 +394,9 @@ struct GetKeyValuesAndFlatMapRequest : TimedRequest {
bool isFetchKeys;
Optional<TagSet> tags;
Optional<UID> debugID;
ReplyPromise<GetKeyValuesAndFlatMapReply> reply;
GetKeyValuesAndFlatMapRequest() : isFetchKeys(false) {}
ReplyPromise<GetMappedKeyValuesReply> reply;
GetMappedKeyValuesRequest() : isFetchKeys(false) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar,

View File

@ -258,20 +258,20 @@ ThreadFuture<RangeResult> ThreadSafeTransaction::getRange(const KeySelectorRef&
});
}
ThreadFuture<RangeResult> ThreadSafeTransaction::getRangeAndFlatMap(const KeySelectorRef& begin,
const KeySelectorRef& end,
const StringRef& mapper,
GetRangeLimits limits,
bool snapshot,
bool reverse) {
ThreadFuture<MappedRangeResult> ThreadSafeTransaction::getMappedRange(const KeySelectorRef& begin,
const KeySelectorRef& end,
const StringRef& mapper,
GetRangeLimits limits,
bool snapshot,
bool reverse) {
KeySelector b = begin;
KeySelector e = end;
Key h = mapper;
ISingleThreadTransaction* tr = this->tr;
return onMainThread([tr, b, e, h, limits, snapshot, reverse]() -> Future<RangeResult> {
return onMainThread([tr, b, e, h, limits, snapshot, reverse]() -> Future<MappedRangeResult> {
tr->checkDeferredError();
return tr->getRangeAndFlatMap(b, e, h, limits, Snapshot{ snapshot }, Reverse{ reverse });
return tr->getMappedRange(b, e, h, limits, Snapshot{ snapshot }, Reverse{ reverse });
});
}

View File

@ -106,12 +106,12 @@ public:
bool reverse = false) override {
return getRange(firstGreaterOrEqual(keys.begin), firstGreaterOrEqual(keys.end), limits, snapshot, reverse);
}
ThreadFuture<RangeResult> getRangeAndFlatMap(const KeySelectorRef& begin,
const KeySelectorRef& end,
const StringRef& mapper,
GetRangeLimits limits,
bool snapshot,
bool reverse) override;
ThreadFuture<MappedRangeResult> getMappedRange(const KeySelectorRef& begin,
const KeySelectorRef& end,
const StringRef& mapper,
GetRangeLimits limits,
bool snapshot,
bool reverse) override;
ThreadFuture<Standalone<VectorRef<const char*>>> getAddressesForKey(const KeyRef& key) override;
ThreadFuture<Standalone<StringRef>> getVersionstamp() override;
ThreadFuture<int64_t> getEstimatedRangeSizeBytes(const KeyRangeRef& keys) override;

View File

@ -29,10 +29,14 @@ static std::map<NetworkAddress, std::pair<Reference<EvictablePageCache>, Referen
EvictablePage::~EvictablePage() {
if (data) {
#if defined(USE_JEMALLOC)
aligned_free(data);
#else
if (pageCache->pageSize == 4096)
FastAllocator<4096>::release(data);
else
aligned_free(data);
#endif
}
if (EvictablePageCache::RANDOM == pageCache->cacheEvictionType) {
if (index > -1) {
@ -169,10 +173,14 @@ void AsyncFileCached::releaseZeroCopy(void* data, int length, int64_t offset) {
if (o != orphanedPages.end()) {
if (o->second == 1) {
if (data) {
#if defined(USE_JEMALLOC)
aligned_free(data);
#else
if (length == 4096)
FastAllocator<4096>::release(data);
else
aligned_free(data);
#endif
}
} else {
--o->second;

View File

@ -79,7 +79,11 @@ struct EvictablePageCache : ReferenceCounted<EvictablePageCache> {
void allocate(EvictablePage* page) {
try_evict();
try_evict();
#if defined(USE_JEMALLOC)
page->data = aligned_alloc(4096, pageSize);
#else
page->data = pageSize == 4096 ? FastAllocator<4096>::allocate() : aligned_alloc(4096, pageSize);
#endif
if (RANDOM == cacheEvictionType) {
page->index = pages.size();
pages.push_back(page);
@ -387,7 +391,11 @@ struct AFCPage : public EvictablePage, public FastAllocated<AFCPage> {
owner->orphanedPages[data] = zeroCopyRefCount;
zeroCopyRefCount = 0;
notReading = Void();
#if defined(USE_JEMALLOC)
data = aligned_alloc(4096, pageCache->pageSize);
#else
data = pageCache->pageSize == 4096 ? FastAllocator<4096>::allocate() : aligned_alloc(4096, pageCache->pageSize);
#endif
}
Future<Void> write(void const* data, int length, int offset) {

View File

@ -37,7 +37,7 @@ set(FDBRPC_SRCS
TraceFileIO.cpp
TSSComparison.h)
if(WITH_TLS AND NOT WIN32)
if(WITH_TLS)
set(FDBRPC_SRCS
${FDBRPC_SRCS}
AsyncFileEncrypted.actor.cpp)

View File

@ -51,12 +51,12 @@ struct TSSMetrics : ReferenceCounted<TSSMetrics>, NonCopyable {
ContinuousSample<double> SSgetValueLatency;
ContinuousSample<double> SSgetKeyLatency;
ContinuousSample<double> SSgetKeyValuesLatency;
ContinuousSample<double> SSgetKeyValuesAndFlatMapLatency;
ContinuousSample<double> SSgetMappedKeyValuesLatency;
ContinuousSample<double> TSSgetValueLatency;
ContinuousSample<double> TSSgetKeyLatency;
ContinuousSample<double> TSSgetKeyValuesLatency;
ContinuousSample<double> TSSgetKeyValuesAndFlatMapLatency;
ContinuousSample<double> TSSgetMappedKeyValuesLatency;
std::unordered_map<int, uint64_t> ssErrorsByCode;
std::unordered_map<int, uint64_t> tssErrorsByCode;
@ -90,12 +90,12 @@ struct TSSMetrics : ReferenceCounted<TSSMetrics>, NonCopyable {
SSgetValueLatency.clear();
SSgetKeyLatency.clear();
SSgetKeyValuesLatency.clear();
SSgetKeyValuesAndFlatMapLatency.clear();
SSgetMappedKeyValuesLatency.clear();
TSSgetValueLatency.clear();
TSSgetKeyLatency.clear();
TSSgetKeyValuesLatency.clear();
TSSgetKeyValuesAndFlatMapLatency.clear();
TSSgetMappedKeyValuesLatency.clear();
tssErrorsByCode.clear();
ssErrorsByCode.clear();
@ -107,8 +107,8 @@ struct TSSMetrics : ReferenceCounted<TSSMetrics>, NonCopyable {
: cc("TSSClientMetrics"), requests("Requests", cc), streamComparisons("StreamComparisons", cc),
ssErrors("SSErrors", cc), tssErrors("TSSErrors", cc), tssTimeouts("TSSTimeouts", cc),
mismatches("Mismatches", cc), SSgetValueLatency(1000), SSgetKeyLatency(1000), SSgetKeyValuesLatency(1000),
SSgetKeyValuesAndFlatMapLatency(1000), TSSgetValueLatency(1000), TSSgetKeyLatency(1000),
TSSgetKeyValuesLatency(1000), TSSgetKeyValuesAndFlatMapLatency(1000) {}
SSgetMappedKeyValuesLatency(1000), TSSgetValueLatency(1000), TSSgetKeyLatency(1000),
TSSgetKeyValuesLatency(1000), TSSgetMappedKeyValuesLatency(1000) {}
};
template <class Rep>

View File

@ -1,5 +1,5 @@
/*
* BackupProgress.h
* BackupProgress.actor.h
*
* This source file is part of the FoundationDB open source project
*

View File

@ -117,6 +117,8 @@ set(FDBSERVER_SRCS
ServerDBInfo.actor.h
ServerDBInfo.h
SigStack.cpp
SimEncryptVaultProxy.actor.h
SimEncryptVaultProxy.actor.cpp
SimpleConfigConsumer.actor.cpp
SimpleConfigConsumer.h
SimulatedCluster.actor.cpp

View File

@ -599,7 +599,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
}
WorkerDetails newEKPWorker;
if (SERVER_KNOBS->ENABLE_ENCRYPT_KEY_PROXY) {
if (SERVER_KNOBS->ENABLE_ENCRYPTION) {
newEKPWorker = findNewProcessForSingleton(self, ProcessClass::EncryptKeyProxy, id_used);
}
@ -613,7 +613,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
}
ProcessClass::Fitness bestFitnessForEKP;
if (SERVER_KNOBS->ENABLE_ENCRYPT_KEY_PROXY) {
if (SERVER_KNOBS->ENABLE_ENCRYPTION) {
bestFitnessForEKP = findBestFitnessForSingleton(self, newEKPWorker, ProcessClass::EncryptKeyProxy);
}
@ -638,7 +638,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
}
bool ekpHealthy = true;
if (SERVER_KNOBS->ENABLE_ENCRYPT_KEY_PROXY) {
if (SERVER_KNOBS->ENABLE_ENCRYPTION) {
ekpHealthy = isHealthySingleton<EncryptKeyProxyInterface>(
self, newEKPWorker, ekpSingleton, bestFitnessForEKP, self->recruitingEncryptKeyProxyID);
}
@ -662,7 +662,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
}
Optional<Standalone<StringRef>> currEKPProcessId, newEKPProcessId;
if (SERVER_KNOBS->ENABLE_ENCRYPT_KEY_PROXY) {
if (SERVER_KNOBS->ENABLE_ENCRYPTION) {
currEKPProcessId = ekpSingleton.interface.get().locality.processId();
newEKPProcessId = newEKPWorker.interf.locality.processId();
}
@ -674,7 +674,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
newPids.emplace_back(newBMProcessId);
}
if (SERVER_KNOBS->ENABLE_ENCRYPT_KEY_PROXY) {
if (SERVER_KNOBS->ENABLE_ENCRYPTION) {
currPids.emplace_back(currEKPProcessId);
newPids.emplace_back(newEKPProcessId);
}
@ -689,7 +689,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
}
// if the knob is disabled, the EKP coloc counts should have no affect on the coloc counts check below
if (!SERVER_KNOBS->ENABLE_ENCRYPT_KEY_PROXY) {
if (!SERVER_KNOBS->ENABLE_ENCRYPTION) {
ASSERT(currColocMap[currEKPProcessId] == 0);
ASSERT(newColocMap[newEKPProcessId] == 0);
}
@ -706,8 +706,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
ddSingleton.recruit(self);
} else if (CLIENT_KNOBS->ENABLE_BLOB_GRANULES && newColocMap[newBMProcessId] < currColocMap[currBMProcessId]) {
bmSingleton.recruit(self);
} else if (SERVER_KNOBS->ENABLE_ENCRYPT_KEY_PROXY &&
newColocMap[newEKPProcessId] < currColocMap[currEKPProcessId]) {
} else if (SERVER_KNOBS->ENABLE_ENCRYPTION && newColocMap[newEKPProcessId] < currColocMap[currEKPProcessId]) {
ekpSingleton.recruit(self);
}
}
@ -1240,7 +1239,7 @@ void registerWorker(RegisterWorkerRequest req,
self, w, currSingleton, registeringSingleton, self->recruitingBlobManagerID);
}
if (SERVER_KNOBS->ENABLE_ENCRYPT_KEY_PROXY && req.encryptKeyProxyInterf.present()) {
if (SERVER_KNOBS->ENABLE_ENCRYPTION && req.encryptKeyProxyInterf.present()) {
auto currSingleton = EncryptKeyProxySingleton(self->db.serverInfo->get().encryptKeyProxy);
auto registeringSingleton = EncryptKeyProxySingleton(req.encryptKeyProxyInterf);
haltRegisteringOrCurrentSingleton<EncryptKeyProxyInterface>(
@ -2416,7 +2415,7 @@ ACTOR Future<Void> clusterControllerCore(ClusterControllerFullInterface interf,
state Future<ErrorOr<Void>> error = errorOr(actorCollection(self.addActor.getFuture()));
// EncryptKeyProxy is necessary for TLog recovery, recruit it as the first process
if (SERVER_KNOBS->ENABLE_ENCRYPT_KEY_PROXY) {
if (SERVER_KNOBS->ENABLE_ENCRYPTION) {
self.addActor.send(monitorEncryptKeyProxy(&self));
}
self.addActor.send(clusterWatchDatabase(&self, &self.db, coordinators, leaderFail)); // Start the master database

View File

@ -5316,10 +5316,10 @@ public:
collection->disableBuildingTeams();
collection->setCheckTeamDelay();
collection->server_info[UID(1, 0)]->setServerMetrics(mid_avail);
collection->server_info[UID(2, 0)]->setServerMetrics(high_avail);
collection->server_info[UID(3, 0)]->setServerMetrics(high_avail);
collection->server_info[UID(4, 0)]->setServerMetrics(high_avail);
collection->server_info[UID(1, 0)]->setMetrics(mid_avail);
collection->server_info[UID(2, 0)]->setMetrics(high_avail);
collection->server_info[UID(3, 0)]->setMetrics(high_avail);
collection->server_info[UID(4, 0)]->setMetrics(high_avail);
/*
* Suppose 1, 2 and 3 are complete sources, i.e., they have all shards in
@ -5372,10 +5372,10 @@ public:
collection->disableBuildingTeams();
collection->setCheckTeamDelay();
collection->server_info[UID(1, 0)]->setServerMetrics(mid_avail);
collection->server_info[UID(2, 0)]->setServerMetrics(high_avail);
collection->server_info[UID(3, 0)]->setServerMetrics(high_avail);
collection->server_info[UID(4, 0)]->setServerMetrics(high_avail);
collection->server_info[UID(1, 0)]->setMetrics(mid_avail);
collection->server_info[UID(2, 0)]->setMetrics(high_avail);
collection->server_info[UID(3, 0)]->setMetrics(high_avail);
collection->server_info[UID(4, 0)]->setMetrics(high_avail);
collection->server_info[UID(1, 0)]->markTeamUnhealthy(0);
/*
@ -5435,10 +5435,10 @@ public:
* least utilized, if the caller says they preferLowerUtilization.
*/
collection->server_info[UID(1, 0)]->setServerMetrics(mid_avail);
collection->server_info[UID(2, 0)]->setServerMetrics(high_avail);
collection->server_info[UID(3, 0)]->setServerMetrics(high_avail);
collection->server_info[UID(4, 0)]->setServerMetrics(high_avail);
collection->server_info[UID(1, 0)]->setMetrics(mid_avail);
collection->server_info[UID(2, 0)]->setMetrics(high_avail);
collection->server_info[UID(3, 0)]->setMetrics(high_avail);
collection->server_info[UID(4, 0)]->setMetrics(high_avail);
bool wantsNewServers = true;
bool wantsTrueBest = true;
@ -5485,10 +5485,10 @@ public:
collection->disableBuildingTeams();
collection->setCheckTeamDelay();
collection->server_info[UID(1, 0)]->setServerMetrics(mid_avail);
collection->server_info[UID(2, 0)]->setServerMetrics(high_avail);
collection->server_info[UID(3, 0)]->setServerMetrics(high_avail);
collection->server_info[UID(4, 0)]->setServerMetrics(high_avail);
collection->server_info[UID(1, 0)]->setMetrics(mid_avail);
collection->server_info[UID(2, 0)]->setMetrics(high_avail);
collection->server_info[UID(3, 0)]->setMetrics(high_avail);
collection->server_info[UID(4, 0)]->setMetrics(high_avail);
/*
* Among server teams that have healthy space available, pick the team that is
@ -5539,10 +5539,10 @@ public:
collection->disableBuildingTeams();
collection->setCheckTeamDelay();
collection->server_info[UID(1, 0)]->setServerMetrics(high_avail);
collection->server_info[UID(2, 0)]->setServerMetrics(low_avail);
collection->server_info[UID(3, 0)]->setServerMetrics(high_avail);
collection->server_info[UID(4, 0)]->setServerMetrics(low_avail);
collection->server_info[UID(1, 0)]->setMetrics(high_avail);
collection->server_info[UID(2, 0)]->setMetrics(low_avail);
collection->server_info[UID(3, 0)]->setMetrics(high_avail);
collection->server_info[UID(4, 0)]->setMetrics(low_avail);
collection->server_info[UID(1, 0)]->markTeamUnhealthy(0);
/*
@ -5599,11 +5599,11 @@ public:
collection->disableBuildingTeams();
collection->setCheckTeamDelay();
collection->server_info[UID(1, 0)]->setServerMetrics(high_avail);
collection->server_info[UID(2, 0)]->setServerMetrics(low_avail);
collection->server_info[UID(3, 0)]->setServerMetrics(high_avail);
collection->server_info[UID(4, 0)]->setServerMetrics(low_avail);
collection->server_info[UID(5, 0)]->setServerMetrics(high_avail);
collection->server_info[UID(1, 0)]->setMetrics(high_avail);
collection->server_info[UID(2, 0)]->setMetrics(low_avail);
collection->server_info[UID(3, 0)]->setMetrics(high_avail);
collection->server_info[UID(4, 0)]->setMetrics(low_avail);
collection->server_info[UID(5, 0)]->setMetrics(high_avail);
collection->server_info[UID(1, 0)]->markTeamUnhealthy(0);
/*

View File

@ -19,14 +19,18 @@
*/
#include "fdbserver/EncryptKeyProxyInterface.h"
#include "fdbserver/SimEncryptVaultProxy.actor.h"
#include "fdbserver/WorkerInterface.actor.h"
#include "fdbserver/ServerDBInfo.h"
#include "flow/Arena.h"
#include "flow/Error.h"
#include "flow/EventTypes.actor.h"
#include "flow/FastRef.h"
#include "flow/IRandom.h"
#include "flow/Trace.h"
#include "flow/genericactors.actor.h"
#include "flow/network.h"
#include "flow/actorcompiler.h" // This must be the last #include.
struct EncryptKeyProxyData : NonCopyable, ReferenceCounted<EncryptKeyProxyData> {
@ -42,6 +46,17 @@ ACTOR Future<Void> encryptKeyProxyServer(EncryptKeyProxyInterface ekpInterface,
state Future<Void> collection = actorCollection(self->addActor.getFuture());
self->addActor.send(traceRole(Role::ENCRYPT_KEY_PROXY, ekpInterface.id()));
SimEncryptVaultProxyInterface simEncryptVaultProxyInf;
if (g_network->isSimulated()) {
// In simulation construct an EncryptVaultProxy actor to satisfy encryption keys lookups otherwise satisfied by
// integrating external Encryption Key Management solutions.
const uint32_t maxEncryptKeys = deterministicRandom()->randomInt(1024, 2048);
simEncryptVaultProxyInf.initEndpoints();
self->addActor.send(simEncryptVaultProxyCore(simEncryptVaultProxyInf, maxEncryptKeys));
}
TraceEvent("EKP_Start", self->myId).log();
// TODO(ahusain): skeleton implementation, more to come

View File

@ -20,6 +20,7 @@
#ifndef FDBSERVER_ENCRYPTKEYPROXYINTERFACE_H
#define FDBSERVER_ENCRYPTKEYPROXYINTERFACE_H
#include "flow/FileIdentifier.h"
#include "flow/network.h"
#pragma once

View File

@ -1,5 +1,5 @@
/*
* ProxyCommitData.h
* ProxyCommitData.actor.h
*
* This source file is part of the FoundationDB open source project
*

View File

@ -1,5 +1,5 @@
/*
* RestoreController.h
* RestoreController.actor.h
*
* This source file is part of the FoundationDB open source project
*

View File

@ -0,0 +1,153 @@
/*
* SimEncryptVaulProxy.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2021 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <memory>
#include <unordered_map>
#include "fdbrpc/sim_validation.h"
#include "fdbserver/SimEncryptVaultProxy.actor.h"
#include "flow/ActorCollection.h"
#include "flow/Error.h"
#include "flow/IRandom.h"
#include "flow/ITrace.h"
#include "flow/StreamCipher.h"
#include "flow/UnitTest.h"
#include "flow/actorcompiler.h" // This must be the last #include.
struct SimEncryptKeyCtx {
SimEncryptKeyId id;
SimEncryptKey key;
SimEncryptKeyCtx() : id(0) {}
explicit SimEncryptKeyCtx(SimEncryptKeyId kId, const char* data) : id(kId), key(data) {}
};
struct SimEncyrptVaultProxyContext {
uint32_t maxEncryptionKeys;
std::unordered_map<SimEncryptKeyId, std::unique_ptr<SimEncryptKeyCtx>> simEncryptKeyStore;
SimEncyrptVaultProxyContext() : maxEncryptionKeys(0) {}
explicit SimEncyrptVaultProxyContext(uint32_t keyCount) : maxEncryptionKeys(keyCount) {
uint8_t buffer[AES_256_KEY_LENGTH];
// Construct encryption keyStore.
for (int i = 0; i < maxEncryptionKeys; i++) {
generateRandomData(&buffer[0], AES_256_KEY_LENGTH);
SimEncryptKeyCtx ctx(i, reinterpret_cast<const char*>(buffer));
simEncryptKeyStore[i] = std::make_unique<SimEncryptKeyCtx>(i, reinterpret_cast<const char*>(buffer));
}
}
};
ACTOR Future<Void> simEncryptVaultProxyCore(SimEncryptVaultProxyInterface interf, uint32_t maxEncryptKeys) {
state SimEncyrptVaultProxyContext vaultProxyCtx(maxEncryptKeys);
ASSERT(vaultProxyCtx.simEncryptKeyStore.size() == maxEncryptKeys);
TraceEvent("SimEncryptVaultProxy_Init", interf.id()).detail("MaxEncrptKeys", maxEncryptKeys);
loop {
choose {
when(SimGetEncryptKeyByKeyIdRequest req = waitNext(interf.encryptKeyLookupByKeyId.getFuture())) {
SimGetEncryptKeyByKeyIdReply reply;
// Lookup corresponding EncryptKeyCtx for input keyId
if (vaultProxyCtx.simEncryptKeyStore.find(req.encryptKeyId) != vaultProxyCtx.simEncryptKeyStore.end()) {
reply.encryptKey = StringRef(vaultProxyCtx.simEncryptKeyStore[req.encryptKeyId].get()->key);
req.reply.send(reply);
} else {
req.reply.sendError(key_not_found());
}
}
when(SimGetEncryptKeyByDomainIdRequest req = waitNext(interf.encryptKeyLookupByDomainId.getFuture())) {
SimGetEncryptKeyByDomainIdReply reply;
// Map encryptionDomainId to corresponding EncryptKeyCtx element using a modulo operation. This would
// mean multiple domains gets mapped to the same encryption key which is fine, the EncryptKeyStore
// guarantees that keyId -> plaintext encryptKey mapping is idempotent.
reply.encryptKeyId = req.encryptDomainId % maxEncryptKeys;
reply.encryptKey = StringRef(vaultProxyCtx.simEncryptKeyStore[reply.encryptKeyId].get()->key);
req.reply.send(reply);
}
}
}
}
void forceLinkSimEncryptVaultProxyTests() {}
namespace {
ACTOR Future<Void> testRunWorkload(SimEncryptVaultProxyInterface inf, uint32_t nEncryptionKeys) {
state uint32_t maxEncryptionKeys = nEncryptionKeys;
state int maxDomainIds = deterministicRandom()->randomInt(121, 295);
state int maxIterations = deterministicRandom()->randomInt(786, 1786);
state std::unordered_map<SimEncryptDomainId, std::unique_ptr<SimEncryptKeyCtx>> domainIdKeyMap;
state int i = 0;
TraceEvent("RunWorkloadStart").detail("MaxDomainIds", maxDomainIds).detail("MaxIterations", maxIterations);
{
// construct domainId to EncryptKeyCtx map
for (i = 0; i < maxDomainIds; i++) {
SimGetEncryptKeyByDomainIdRequest req;
req.encryptDomainId = i;
SimGetEncryptKeyByDomainIdReply reply = wait(inf.encryptKeyLookupByDomainId.getReply(req));
domainIdKeyMap[i] =
std::make_unique<SimEncryptKeyCtx>(reply.encryptKeyId, reply.encryptKey.toString().c_str());
}
// randomly pick any domainId and validate if lookupByKeyId result matches
for (i = 0; i < maxIterations; i++) {
state int idx = deterministicRandom()->randomInt(0, maxDomainIds);
state SimEncryptKeyCtx* ctx = domainIdKeyMap[idx].get();
SimGetEncryptKeyByKeyIdRequest req(ctx->id);
SimGetEncryptKeyByKeyIdReply reply = wait(inf.encryptKeyLookupByKeyId.getReply(req));
ASSERT(reply.encryptKey.compare(ctx->key) == 0);
}
}
{
// Verify unknown key access returns the error
state SimGetEncryptKeyByKeyIdRequest req;
req.encryptKeyId = maxEncryptionKeys + 1;
try {
SimGetEncryptKeyByKeyIdReply reply = wait(inf.encryptKeyLookupByKeyId.getReply(req));
} catch (Error& e) {
ASSERT(e.code() == error_code_key_not_found);
}
}
TraceEvent("RunWorkloadDone").log();
return Void();
}
} // namespace
TEST_CASE("fdbserver/SimEncryptVaultProxy") {
state SimEncryptVaultProxyInterface inf;
state uint32_t maxEncryptKeys = 64;
loop choose {
when(wait(simEncryptVaultProxyCore(inf, maxEncryptKeys))) { throw internal_error(); }
when(wait(testRunWorkload(inf, maxEncryptKeys))) { break; }
}
return Void();
}

View File

@ -0,0 +1,132 @@
/*
* SimEncryptVaultProxy.actor.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2021 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_SIMENCRYPTVAULTPROXY_ACTOR_G_H)
#define FDBSERVER_SIMENCRYPTVAULTPROXY_ACTOR_G_H
#include "fdbserver/SimEncryptVaultProxy.actor.g.h"
#elif !defined(FDBSERVER_SIMENCRYPTVAULTPROXY_ACTOR_H)
#define FDBSERVER_SIMENCRYPTVAULTPROXY_ACTOR_H
#include "fdbclient/FDBTypes.h"
#include "fdbrpc/fdbrpc.h"
#include "flow/FileIdentifier.h"
#include "flow/Trace.h"
#include "flow/flow.h"
#include "flow/network.h"
#include "flow/actorcompiler.h" // This must be the last #include.
using SimEncryptKeyId = uint64_t;
using SimEncryptDomainId = uint64_t;
using SimEncryptKey = std::string;
struct SimEncryptVaultProxyInterface {
constexpr static FileIdentifier file_identifier = 2416711;
RequestStream<ReplyPromise<Void>> waitFailure;
RequestStream<struct SimGetEncryptKeyByKeyIdRequest> encryptKeyLookupByKeyId;
RequestStream<struct SimGetEncryptKeyByDomainIdRequest> encryptKeyLookupByDomainId;
SimEncryptVaultProxyInterface() {}
UID id() const { return encryptKeyLookupByKeyId.getEndpoint().token; }
template <class Archive>
void serialize(Archive& ar) {
if constexpr (!is_fb_function<Archive>) {
ASSERT(ar.protocolVersion().isValid());
}
serializer(ar, waitFailure);
if (Archive::isDeserializing) {
encryptKeyLookupByKeyId =
RequestStream<struct GetCommitVersionRequest>(waitFailure.getEndpoint().getAdjustedEndpoint(1));
encryptKeyLookupByDomainId =
RequestStream<struct GetRawCommittedVersionRequest>(waitFailure.getEndpoint().getAdjustedEndpoint(2));
}
}
void initEndpoints() {
std::vector<std::pair<FlowReceiver*, TaskPriority>> streams;
streams.push_back(waitFailure.getReceiver());
streams.push_back(encryptKeyLookupByKeyId.getReceiver(TaskPriority::DefaultPromiseEndpoint));
streams.push_back(encryptKeyLookupByDomainId.getReceiver(TaskPriority::DefaultPromiseEndpoint));
FlowTransport::transport().addEndpoints(streams);
}
};
struct SimGetEncryptKeyByKeyIdReply {
constexpr static FileIdentifier file_identifier = 2313778;
Standalone<StringRef> encryptKey;
SimGetEncryptKeyByKeyIdReply() : encryptKey(StringRef()) {}
explicit SimGetEncryptKeyByKeyIdReply(Standalone<StringRef> key) : encryptKey(key) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, encryptKey);
}
};
struct SimGetEncryptKeyByKeyIdRequest {
constexpr static FileIdentifier file_identifier = 6913396;
SimEncryptKeyId encryptKeyId;
ReplyPromise<SimGetEncryptKeyByKeyIdReply> reply;
SimGetEncryptKeyByKeyIdRequest() : encryptKeyId(0) {}
explicit SimGetEncryptKeyByKeyIdRequest(SimEncryptKeyId keyId) : encryptKeyId(keyId) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, encryptKeyId, reply);
}
};
struct SimGetEncryptKeyByDomainIdReply {
constexpr static FileIdentifier file_identifier = 3009025;
SimEncryptDomainId encryptKeyId;
Standalone<StringRef> encryptKey;
SimGetEncryptKeyByDomainIdReply() : encryptKeyId(0), encryptKey(StringRef()) {}
explicit SimGetEncryptKeyByDomainIdReply(SimEncryptKeyId keyId, Standalone<StringRef> key)
: encryptKeyId(keyId), encryptKey(key) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, encryptKeyId, encryptKey);
}
};
struct SimGetEncryptKeyByDomainIdRequest {
constexpr static FileIdentifier file_identifier = 9918682;
SimEncryptDomainId encryptDomainId;
ReplyPromise<SimGetEncryptKeyByDomainIdReply> reply;
SimGetEncryptKeyByDomainIdRequest() : encryptDomainId(0) {}
explicit SimGetEncryptKeyByDomainIdRequest(SimEncryptDomainId domainId) : encryptDomainId(domainId) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, encryptDomainId, reply);
}
};
ACTOR Future<Void> simEncryptVaultProxyCore(struct SimEncryptVaultProxyInterface interf, uint32_t maxEncryptKeys);
#include "flow/unactorcompiler.h"
#endif // FDBSERVER_SIMENCRYPTVAULTPROXY_ACTOR_H

View File

@ -2249,7 +2249,7 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
using namespace std::literals;
#if defined(SSD_ROCKSDB_EXPERIMENTAL) && !VALGRIND
#if defined(SSD_ROCKSDB_EXPERIMENTAL)
bool rocksDBEnabled = true;
#else
bool rocksDBEnabled = false;

View File

@ -813,7 +813,7 @@ ACTOR static Future<JsonBuilderObject> processStatusFetcher(
roles.addRole("blob_manager", db->get().blobManager.get());
}
if (SERVER_KNOBS->ENABLE_ENCRYPT_KEY_PROXY && db->get().encryptKeyProxy.present()) {
if (SERVER_KNOBS->ENABLE_ENCRYPTION && db->get().encryptKeyProxy.present()) {
roles.addRole("encrypt_key_proxy", db->get().encryptKeyProxy.get());
}

View File

@ -37,7 +37,7 @@ public:
choose {
when(ErrorOr<GetStorageMetricsReply> rep = wait(metricsRequest)) {
if (rep.present()) {
server->serverMetrics = rep;
server->metrics = rep;
if (server->updated.canBeSet()) {
server->updated.send(Void());
}
@ -65,27 +65,27 @@ public:
}
}
if (server->serverMetrics.get().lastUpdate < now() - SERVER_KNOBS->DD_SS_STUCK_TIME_LIMIT) {
if (server->metrics.get().lastUpdate < now() - SERVER_KNOBS->DD_SS_STUCK_TIME_LIMIT) {
if (server->ssVersionTooFarBehind.get() == false) {
TraceEvent("StorageServerStuck", server->collection->getDistributorId())
.detail("ServerId", server->id.toString())
.detail("LastUpdate", server->serverMetrics.get().lastUpdate);
.detail("LastUpdate", server->metrics.get().lastUpdate);
server->ssVersionTooFarBehind.set(true);
server->collection->addLaggingStorageServer(server->lastKnownInterface.locality.zoneId().get());
}
} else if (server->serverMetrics.get().versionLag > SERVER_KNOBS->DD_SS_FAILURE_VERSIONLAG) {
} else if (server->metrics.get().versionLag > SERVER_KNOBS->DD_SS_FAILURE_VERSIONLAG) {
if (server->ssVersionTooFarBehind.get() == false) {
TraceEvent(SevWarn, "SSVersionDiffLarge", server->collection->getDistributorId())
.detail("ServerId", server->id.toString())
.detail("VersionLag", server->serverMetrics.get().versionLag);
.detail("VersionLag", server->metrics.get().versionLag);
server->ssVersionTooFarBehind.set(true);
server->collection->addLaggingStorageServer(server->lastKnownInterface.locality.zoneId().get());
}
} else if (server->serverMetrics.get().versionLag < SERVER_KNOBS->DD_SS_ALLOWED_VERSIONLAG) {
} else if (server->metrics.get().versionLag < SERVER_KNOBS->DD_SS_ALLOWED_VERSIONLAG) {
if (server->ssVersionTooFarBehind.get() == true) {
TraceEvent("SSVersionDiffNormal", server->collection->getDistributorId())
.detail("ServerId", server->id.toString())
.detail("VersionLag", server->serverMetrics.get().versionLag);
.detail("VersionLag", server->metrics.get().versionLag);
server->ssVersionTooFarBehind.set(false);
server->collection->removeLaggingStorageServer(server->lastKnownInterface.locality.zoneId().get());
}
@ -138,9 +138,9 @@ TCServerInfo::TCServerInfo(StorageServerInterface ssi,
}
bool TCServerInfo::hasHealthyAvailableSpace(double minAvailableSpaceRatio) const {
ASSERT(serverMetricsPresent());
ASSERT(metricsPresent());
auto& metrics = getServerMetrics();
auto& metrics = getMetrics();
ASSERT(metrics.available.bytes >= 0);
ASSERT(metrics.capacity.bytes >= 0);
@ -198,6 +198,23 @@ void TCServerInfo::removeTeamsContainingServer(UID removedServer) {
}
}
std::pair<int64_t, int64_t> TCServerInfo::spaceBytes(bool includeInFlight) const {
auto& metrics = getMetrics();
ASSERT(metrics.capacity.bytes >= 0);
ASSERT(metrics.available.bytes >= 0);
int64_t bytesAvailable = metrics.available.bytes;
if (includeInFlight) {
bytesAvailable -= getDataInFlightToServer();
}
return std::make_pair(bytesAvailable, metrics.capacity.bytes); // bytesAvailable could be negative
}
int64_t TCServerInfo::loadBytes() const {
return getMetrics().load.bytes;
}
void TCServerInfo::removeTeam(Reference<TCTeamInfo> team) {
for (int t = 0; t < teams.size(); t++) {
if (teams[t] == team) {
@ -356,17 +373,8 @@ int64_t TCTeamInfo::getLoadBytes(bool includeInFlight, double inflightPenalty) c
int64_t TCTeamInfo::getMinAvailableSpace(bool includeInFlight) const {
int64_t minAvailableSpace = std::numeric_limits<int64_t>::max();
for (const auto& server : servers) {
if (server->serverMetricsPresent()) {
auto& replyValue = server->getServerMetrics();
ASSERT(replyValue.available.bytes >= 0);
ASSERT(replyValue.capacity.bytes >= 0);
int64_t bytesAvailable = replyValue.available.bytes;
if (includeInFlight) {
bytesAvailable -= server->getDataInFlightToServer();
}
if (server->metricsPresent()) {
const auto [bytesAvailable, bytesCapacity] = server->spaceBytes(includeInFlight);
minAvailableSpace = std::min(bytesAvailable, minAvailableSpace);
}
}
@ -377,21 +385,14 @@ int64_t TCTeamInfo::getMinAvailableSpace(bool includeInFlight) const {
double TCTeamInfo::getMinAvailableSpaceRatio(bool includeInFlight) const {
double minRatio = 1.0;
for (const auto& server : servers) {
if (server->serverMetricsPresent()) {
auto const& replyValue = server->getServerMetrics();
if (server->metricsPresent()) {
auto [bytesAvailable, bytesCapacity] = server->spaceBytes(includeInFlight);
bytesAvailable = std::max((int64_t)0, bytesAvailable);
ASSERT(replyValue.available.bytes >= 0);
ASSERT(replyValue.capacity.bytes >= 0);
int64_t bytesAvailable = replyValue.available.bytes;
if (includeInFlight) {
bytesAvailable = std::max((int64_t)0, bytesAvailable - server->getDataInFlightToServer());
}
if (replyValue.capacity.bytes == 0)
if (bytesCapacity == 0)
minRatio = 0;
else
minRatio = std::min(minRatio, ((double)bytesAvailable) / replyValue.capacity.bytes);
minRatio = std::min(minRatio, ((double)bytesAvailable) / bytesCapacity);
}
}
@ -403,7 +404,7 @@ bool TCTeamInfo::allServersHaveHealthyAvailableSpace() const {
double minAvailableSpaceRatio =
SERVER_KNOBS->MIN_AVAILABLE_SPACE_RATIO + SERVER_KNOBS->MIN_AVAILABLE_SPACE_RATIO_SAFETY_BUFFER;
for (const auto& server : servers) {
if (!server->serverMetricsPresent() || !server->hasHealthyAvailableSpace(minAvailableSpaceRatio)) {
if (!server->metricsPresent() || !server->hasHealthyAvailableSpace(minAvailableSpaceRatio)) {
result = false;
break;
}
@ -440,11 +441,12 @@ void TCTeamInfo::addServers(const std::vector<UID>& servers) {
int64_t TCTeamInfo::getLoadAverage() const {
int64_t bytesSum = 0;
int added = 0;
for (int i = 0; i < servers.size(); i++)
if (servers[i]->serverMetricsPresent()) {
for (const auto& server : servers) {
if (server->metricsPresent()) {
added++;
bytesSum += servers[i]->getServerMetrics().load.bytes;
bytesSum += server->loadBytes();
}
}
if (added < servers.size())
bytesSum *= 2;

View File

@ -28,6 +28,7 @@ class TCMachineTeamInfo;
class TCServerInfo : public ReferenceCounted<TCServerInfo> {
friend class TCServerInfoImpl;
friend class DDTeamCollectionUnitTest;
UID id;
bool inDesiredDC;
DDTeamCollection* collection;
@ -45,7 +46,12 @@ class TCServerInfo : public ReferenceCounted<TCServerInfo> {
int64_t dataInFlightToServer;
std::vector<Reference<TCTeamInfo>> teams;
ErrorOr<GetStorageMetricsReply> serverMetrics;
ErrorOr<GetStorageMetricsReply> metrics;
GetStorageMetricsReply const& getMetrics() const { return metrics.get(); }
void setMetrics(GetStorageMetricsReply serverMetrics) { this->metrics = serverMetrics; }
void markTeamUnhealthy(int teamIndex);
public:
Reference<TCMachineInfo> machine;
@ -84,8 +90,7 @@ public:
void addTeam(Reference<TCTeamInfo> team) { teams.push_back(team); }
void removeTeamsContainingServer(UID removedServer);
void removeTeam(Reference<TCTeamInfo>);
GetStorageMetricsReply const& getServerMetrics() const { return serverMetrics.get(); }
bool serverMetricsPresent() const { return serverMetrics.present(); }
bool metricsPresent() const { return metrics.present(); }
bool isCorrectStoreType(KeyValueStoreType configStoreType) const {
// A new storage server's store type may not be set immediately.
@ -93,18 +98,14 @@ public:
return (storeType == configStoreType || storeType == KeyValueStoreType::END);
}
std::pair<int64_t, int64_t> spaceBytes(bool includeInFlight = true) const;
int64_t loadBytes() const;
bool hasHealthyAvailableSpace(double minAvailableSpaceRatio) const;
Future<Void> updateServerMetrics();
static Future<Void> updateServerMetrics(Reference<TCServerInfo> server);
Future<Void> serverMetricsPolling();
// FIXME: Public for testing only:
void setServerMetrics(GetStorageMetricsReply serverMetrics) { this->serverMetrics = serverMetrics; }
// FIXME: Public for testing only:
void markTeamUnhealthy(int teamIndex);
~TCServerInfo();
};

View File

@ -88,14 +88,14 @@ bool canReplyWith(Error e) {
case error_code_server_overloaded:
case error_code_tenant_name_required:
case error_code_unknown_tenant:
// getRangeAndMap related exceptions that are not retriable:
// getMappedRange related exceptions that are not retriable:
case error_code_mapper_bad_index:
case error_code_mapper_no_such_key:
case error_code_mapper_bad_range_decriptor:
case error_code_quick_get_key_values_has_more:
case error_code_quick_get_value_miss:
case error_code_quick_get_key_values_miss:
case error_code_get_key_values_and_map_has_more:
case error_code_get_mapped_key_values_has_more:
// case error_code_all_alternatives_failed:
return true;
default:
@ -820,7 +820,7 @@ public:
struct Counters {
CounterCollection cc;
Counter allQueries, getKeyQueries, getValueQueries, getRangeQueries, getRangeAndFlatMapQueries,
Counter allQueries, getKeyQueries, getValueQueries, getRangeQueries, getMappedRangeQueries,
getRangeStreamQueries, finishedQueries, lowPriorityQueries, rowsQueried, bytesQueried, watchQueries,
emptyQueries;
@ -862,7 +862,7 @@ public:
Counter wrongShardServer;
Counter fetchedVersions;
Counter fetchesFromLogs;
// The following counters measure how many of lookups in the getRangeAndFlatMapQueries are effective. "Miss"
// The following counters measure how many of lookups in the getMappedRangeQueries are effective. "Miss"
// means fallback if fallback is enabled, otherwise means failure (so that another layer could implement
// fallback).
Counter quickGetValueHit, quickGetValueMiss, quickGetKeyValuesHit, quickGetKeyValuesMiss;
@ -886,7 +886,7 @@ public:
Counters(StorageServer* self)
: cc("StorageServer", self->thisServerID.toString()), allQueries("QueryQueue", cc),
getKeyQueries("GetKeyQueries", cc), getValueQueries("GetValueQueries", cc),
getRangeQueries("GetRangeQueries", cc), getRangeAndFlatMapQueries("GetRangeAndFlatMapQueries", cc),
getRangeQueries("GetRangeQueries", cc), getMappedRangeQueries("GetMappedRangeQueries", cc),
getRangeStreamQueries("GetRangeStreamQueries", cc), finishedQueries("FinishedQueries", cc),
lowPriorityQueries("LowPriorityQueries", cc), rowsQueried("RowsQueried", cc),
bytesQueried("BytesQueried", cc), watchQueries("WatchQueries", cc), emptyQueries("EmptyQueries", cc),
@ -2203,11 +2203,24 @@ void merge(Arena& arena,
}
}
ACTOR Future<Optional<Value>> quickGetValue(StorageServer* data,
StringRef key,
Version version,
// To provide span context, tags, debug ID to underlying lookups.
GetKeyValuesAndFlatMapRequest* pOriginalReq) {
static inline void copyOptionalValue(Arena* a,
GetValueReqAndResultRef& getValue,
const Optional<Value>& optionalValue) {
std::function<StringRef(Value)> contents = [](Value value) { return value.contents(); };
getValue.result = optionalValue.map(contents);
if (optionalValue.present()) {
a->dependsOn(optionalValue.get().arena());
}
}
ACTOR Future<GetValueReqAndResultRef> quickGetValue(StorageServer* data,
StringRef key,
Version version,
Arena* a,
// To provide span context, tags, debug ID to underlying lookups.
GetMappedKeyValuesRequest* pOriginalReq) {
state GetValueReqAndResultRef getValue;
getValue.key = key;
if (data->shards[key]->isReadable()) {
try {
// TODO: Use a lower level API may be better? Or tweak priorities?
@ -2220,7 +2233,8 @@ ACTOR Future<Optional<Value>> quickGetValue(StorageServer* data,
GetValueReply reply = wait(req.reply.getFuture());
if (!reply.error.present()) {
++data->counters.quickGetValueHit;
return reply.value;
copyOptionalValue(a, getValue, reply.value);
return getValue;
}
// Otherwise fallback.
} catch (Error& e) {
@ -2237,8 +2251,9 @@ ACTOR Future<Optional<Value>> quickGetValue(StorageServer* data,
tr.trState->taskID = TaskPriority::DefaultPromiseEndpoint;
Future<Optional<Value>> valueFuture = tr.get(key, Snapshot::True);
// TODO: async in case it needs to read from other servers.
state Optional<Value> valueOption = wait(valueFuture);
return valueOption;
Optional<Value> valueOption = wait(valueFuture);
copyOptionalValue(a, getValue, valueOption);
return getValue;
} else {
throw quick_get_value_miss();
}
@ -2783,19 +2798,29 @@ ACTOR Future<Void> getKeyValuesQ(StorageServer* data, GetKeyValuesRequest req)
return Void();
}
ACTOR Future<RangeResult> quickGetKeyValues(StorageServer* data,
StringRef prefix,
Version version,
// To provide span context, tags, debug ID to underlying lookups.
GetKeyValuesAndFlatMapRequest* pOriginalReq) {
ACTOR Future<GetRangeReqAndResultRef> quickGetKeyValues(
StorageServer* data,
StringRef prefix,
Version version,
Arena* a,
// To provide span context, tags, debug ID to underlying lookups.
GetMappedKeyValuesRequest* pOriginalReq) {
state GetRangeReqAndResultRef getRange;
getRange.begin = firstGreaterOrEqual(KeyRef(*a, prefix));
getRange.end = firstGreaterOrEqual(strinc(prefix, *a));
try {
// TODO: Use a lower level API may be better? Or tweak priorities?
GetKeyValuesRequest req;
req.spanContext = pOriginalReq->spanContext;
req.arena = Arena();
req.begin = firstGreaterOrEqual(KeyRef(req.arena, prefix));
req.end = firstGreaterOrEqual(strinc(prefix, req.arena));
req.arena = *a;
req.begin = getRange.begin;
req.end = getRange.end;
req.version = version;
// TODO: Validate when the underlying range query exceeds the limit.
// TODO: Use remainingLimit, remainingLimitBytes rather than separate knobs.
req.limit = SERVER_KNOBS->QUICK_GET_KEY_VALUES_LIMIT;
req.limitBytes = SERVER_KNOBS->QUICK_GET_KEY_VALUES_LIMIT_BYTES;
req.isFetchKeys = false;
req.tags = pOriginalReq->tags;
req.debugID = pOriginalReq->debugID;
@ -2807,7 +2832,9 @@ ACTOR Future<RangeResult> quickGetKeyValues(StorageServer* data,
if (!reply.error.present()) {
++data->counters.quickGetKeyValuesHit;
// Convert GetKeyValuesReply to RangeResult.
return RangeResult(RangeResultRef(reply.data, reply.more), reply.arena);
a->dependsOn(reply.arena);
getRange.result = RangeResultRef(reply.data, reply.more);
return getRange;
}
// Otherwise fallback.
} catch (Error& e) {
@ -2823,7 +2850,9 @@ ACTOR Future<RangeResult> quickGetKeyValues(StorageServer* data,
Future<RangeResult> rangeResultFuture = tr.getRange(prefixRange(prefix), Snapshot::True);
// TODO: async in case it needs to read from other servers.
RangeResult rangeResult = wait(rangeResultFuture);
return rangeResult;
a->dependsOn(rangeResult.arena());
getRange.result = rangeResult;
return getRange;
} else {
throw quick_get_key_values_miss();
}
@ -3039,73 +3068,59 @@ TEST_CASE("/fdbserver/storageserver/constructMappedKey") {
return Void();
}
ACTOR Future<GetKeyValuesAndFlatMapReply> flatMap(StorageServer* data,
GetKeyValuesReply input,
StringRef mapper,
// To provide span context, tags, debug ID to underlying lookups.
GetKeyValuesAndFlatMapRequest* pOriginalReq,
Optional<Key> tenantPrefix) {
state GetKeyValuesAndFlatMapReply result;
ACTOR Future<GetMappedKeyValuesReply> mapKeyValues(StorageServer* data,
GetKeyValuesReply input,
StringRef mapper,
// To provide span context, tags, debug ID to underlying lookups.
GetMappedKeyValuesRequest* pOriginalReq,
Optional<Key> tenantPrefix) {
state GetMappedKeyValuesReply result;
result.version = input.version;
if (input.more) {
throw get_key_values_and_map_has_more();
}
result.more = input.more;
result.cached = input.cached;
result.arena.dependsOn(input.arena);
result.data.reserve(result.arena, input.data.size());
state bool isRangeQuery = false;
state Tuple mappedKeyFormatTuple = Tuple::unpack(mapper);
state KeyValueRef* it = input.data.begin();
for (; it != input.data.end(); it++) {
state MappedKeyValueRef kvm;
kvm.key = it->key;
kvm.value = it->value;
state bool isRangeQuery = false;
state Key mappedKey = constructMappedKey(it, mappedKeyFormatTuple, isRangeQuery, tenantPrefix);
// Make sure the mappedKey is always available, so that it's good even we want to get key asynchronously.
result.arena.dependsOn(mappedKey.arena());
// std::cout << "key:" << printable(kvm.key) << ", value:" << printable(kvm.value)
// << ", mappedKey:" << printable(mappedKey) << std::endl;
if (isRangeQuery) {
// Use the mappedKey as the prefix of the range query.
RangeResult rangeResult = wait(quickGetKeyValues(data, mappedKey, input.version, pOriginalReq));
if (rangeResult.more) {
// Probably the fan out is too large. The user should use the old way to query.
throw quick_get_key_values_has_more();
}
result.arena.dependsOn(rangeResult.arena());
for (int i = 0; i < rangeResult.size(); i++) {
KeyRef key = rangeResult[i].key;
if (tenantPrefix.present()) {
key = key.removePrefix(tenantPrefix.get());
}
result.data.emplace_back(result.arena, key, rangeResult[i].value);
}
GetRangeReqAndResultRef getRange =
wait(quickGetKeyValues(data, mappedKey, input.version, &(result.arena), pOriginalReq));
// TODO: Remove tenant prefixes in the keys if they haven't been removed?
kvm.reqAndResult = getRange;
} else {
Optional<Value> valueOption = wait(quickGetValue(data, mappedKey, input.version, pOriginalReq));
if (valueOption.present()) {
Value value = valueOption.get();
result.arena.dependsOn(value.arena());
KeyRef key = mappedKey;
if (tenantPrefix.present()) {
key = key.removePrefix(tenantPrefix.get());
}
result.data.emplace_back(result.arena, key, value);
} else {
// TODO: Shall we throw exception if the key doesn't exist or the range is empty?
}
GetValueReqAndResultRef getValue =
wait(quickGetValue(data, mappedKey, input.version, &(result.arena), pOriginalReq));
// TODO: Remove tenant prefixes in the keys if they haven't been removed?
kvm.reqAndResult = getValue;
}
result.data.push_back(result.arena, kvm);
}
return result;
}
// Most of the actor is copied from getKeyValuesQ. I tried to use templates but things become nearly impossible after
// combining actor shenanigans with template shenanigans.
ACTOR Future<Void> getKeyValuesAndFlatMapQ(StorageServer* data, GetKeyValuesAndFlatMapRequest req)
ACTOR Future<Void> getMappedKeyValuesQ(StorageServer* data, GetMappedKeyValuesRequest req)
// Throws a wrong_shard_server if the keys in the request or result depend on data outside this server OR if a large
// selector offset prevents all data from being read in one range read
{
state Span span("SS:getKeyValuesAndFlatMap"_loc, { req.spanContext });
state Span span("SS:getMappedKeyValues"_loc, { req.spanContext });
state int64_t resultSize = 0;
state IKeyValueStore::ReadType type =
req.isFetchKeys ? IKeyValueStore::ReadType::FETCH : IKeyValueStore::ReadType::NORMAL;
@ -3116,7 +3131,7 @@ ACTOR Future<Void> getKeyValuesAndFlatMapQ(StorageServer* data, GetKeyValuesAndF
getCurrentLineage()->modify(&TransactionLineage::txID) = req.spanContext.first();
++data->counters.getRangeAndFlatMapQueries;
++data->counters.getMappedRangeQueries;
++data->counters.allQueries;
++data->readQueueSizeMetric;
data->maxQueryQueue = std::max<int>(
@ -3133,7 +3148,7 @@ ACTOR Future<Void> getKeyValuesAndFlatMapQ(StorageServer* data, GetKeyValuesAndF
try {
if (req.debugID.present())
g_traceBatch.addEvent(
"TransactionDebug", req.debugID.get().first(), "storageserver.getKeyValuesAndFlatMap.Before");
"TransactionDebug", req.debugID.get().first(), "storageserver.getMappedKeyValues.Before");
state Version version = wait(waitForVersion(data, req.version, span.context));
state Optional<TenantMapEntry> tenantEntry = data->getTenantEntry(req.version, req.tenantInfo);
@ -3149,16 +3164,16 @@ ACTOR Future<Void> getKeyValuesAndFlatMapQ(StorageServer* data, GetKeyValuesAndF
if (req.debugID.present())
g_traceBatch.addEvent(
"TransactionDebug", req.debugID.get().first(), "storageserver.getKeyValuesAndFlatMap.AfterVersion");
"TransactionDebug", req.debugID.get().first(), "storageserver.getMappedKeyValues.AfterVersion");
//.detail("ShardBegin", shard.begin).detail("ShardEnd", shard.end);
//} catch (Error& e) { TraceEvent("WrongShardServer", data->thisServerID).detail("Begin",
// req.begin.toString()).detail("End", req.end.toString()).detail("Version", version).detail("Shard",
//"None").detail("In", "getKeyValuesAndFlatMap>getShardKeyRange"); throw e; }
//"None").detail("In", "getMappedKeyValues>getShardKeyRange"); throw e; }
if (!selectorInRange(req.end, shard) && !(req.end.isFirstGreaterOrEqual() && req.end.getKey() == shard.end)) {
// TraceEvent("WrongShardServer1", data->thisServerID).detail("Begin",
// req.begin.toString()).detail("End", req.end.toString()).detail("Version", version).detail("ShardBegin",
// shard.begin).detail("ShardEnd", shard.end).detail("In", "getKeyValuesAndFlatMap>checkShardExtents");
// shard.begin).detail("ShardEnd", shard.end).detail("In", "getMappedKeyValues>checkShardExtents");
throw wrong_shard_server();
}
@ -3196,7 +3211,7 @@ ACTOR Future<Void> getKeyValuesAndFlatMapQ(StorageServer* data, GetKeyValuesAndF
if (req.debugID.present())
g_traceBatch.addEvent(
"TransactionDebug", req.debugID.get().first(), "storageserver.getKeyValuesAndFlatMap.AfterKeys");
"TransactionDebug", req.debugID.get().first(), "storageserver.getMappedKeyValues.AfterKeys");
//.detail("Off1",offset1).detail("Off2",offset2).detail("ReqBegin",req.begin.getKey()).detail("ReqEnd",req.end.getKey());
// Offsets of zero indicate begin/end keys in this shard, which obviously means we can answer the query
@ -3204,22 +3219,22 @@ ACTOR Future<Void> getKeyValuesAndFlatMapQ(StorageServer* data, GetKeyValuesAndF
// end the last actual key returned must be from this shard. A begin offset of 1 is also OK because then either
// begin is past end or equal to end (so the result is definitely empty)
if ((offset1 && offset1 != 1) || (offset2 && offset2 != 1)) {
TEST(true); // wrong_shard_server due to offset in getKeyValuesAndFlatMapQ
TEST(true); // wrong_shard_server due to offset in getMappedKeyValuesQ
// We could detect when offset1 takes us off the beginning of the database or offset2 takes us off the end,
// and return a clipped range rather than an error (since that is what the NativeAPI.getRange will do anyway
// via its "slow path"), but we would have to add some flags to the response to encode whether we went off
// the beginning and the end, since it needs that information.
//TraceEvent("WrongShardServer2", data->thisServerID).detail("Begin", req.begin.toString()).detail("End", req.end.toString()).detail("Version", version).detail("ShardBegin", shard.begin).detail("ShardEnd", shard.end).detail("In", "getKeyValuesAndFlatMap>checkOffsets").detail("BeginKey", begin).detail("EndKey", end).detail("BeginOffset", offset1).detail("EndOffset", offset2);
//TraceEvent("WrongShardServer2", data->thisServerID).detail("Begin", req.begin.toString()).detail("End", req.end.toString()).detail("Version", version).detail("ShardBegin", shard.begin).detail("ShardEnd", shard.end).detail("In", "getMappedKeyValues>checkOffsets").detail("BeginKey", begin).detail("EndKey", end).detail("BeginOffset", offset1).detail("EndOffset", offset2);
throw wrong_shard_server();
}
if (begin >= end) {
if (req.debugID.present())
g_traceBatch.addEvent(
"TransactionDebug", req.debugID.get().first(), "storageserver.getKeyValuesAndFlatMap.Send");
"TransactionDebug", req.debugID.get().first(), "storageserver.getMappedKeyValues.Send");
//.detail("Begin",begin).detail("End",end);
GetKeyValuesAndFlatMapReply none;
GetMappedKeyValuesReply none;
none.version = version;
none.more = false;
none.penalty = data->getPenalty();
@ -3240,27 +3255,27 @@ ACTOR Future<Void> getKeyValuesAndFlatMapQ(StorageServer* data, GetKeyValuesAndF
type,
tenantPrefix));
state GetKeyValuesAndFlatMapReply r;
state GetMappedKeyValuesReply r;
try {
// Map the scanned range to another list of keys and look up.
GetKeyValuesAndFlatMapReply _r = wait(flatMap(data, getKeyValuesReply, req.mapper, &req, tenantPrefix));
GetMappedKeyValuesReply _r =
wait(mapKeyValues(data, getKeyValuesReply, req.mapper, &req, tenantPrefix));
r = _r;
} catch (Error& e) {
TraceEvent("FlatMapError").error(e);
TraceEvent("MapError").error(e);
throw;
}
if (req.debugID.present())
g_traceBatch.addEvent("TransactionDebug",
req.debugID.get().first(),
"storageserver.getKeyValuesAndFlatMap.AfterReadRange");
g_traceBatch.addEvent(
"TransactionDebug", req.debugID.get().first(), "storageserver.getMappedKeyValues.AfterReadRange");
//.detail("Begin",begin).detail("End",end).detail("SizeOf",r.data.size());
data->checkChangeCounter(
changeCounter,
KeyRangeRef(std::min<KeyRef>(begin, std::min<KeyRef>(req.begin.getKey(), req.end.getKey())),
std::max<KeyRef>(end, std::max<KeyRef>(req.begin.getKey(), req.end.getKey()))));
if (EXPENSIVE_VALIDATION) {
// TODO: GetKeyValuesWithFlatMapRequest doesn't respect limit yet.
// TODO: GetMappedKeyValuesRequest doesn't respect limit yet.
// ASSERT(r.data.size() <= std::abs(req.limit));
}
@ -4159,6 +4174,7 @@ static const KeyRangeRef persistFormatReadableRange(LiteralStringRef("Foundation
LiteralStringRef("FoundationDB/StorageServer/1/5"));
static const KeyRef persistID = LiteralStringRef(PERSIST_PREFIX "ID");
static const KeyRef persistTssPairID = LiteralStringRef(PERSIST_PREFIX "tssPairID");
static const KeyRef persistSSPairID = LiteralStringRef(PERSIST_PREFIX "ssWithTSSPairID");
static const KeyRef persistTssQuarantine = LiteralStringRef(PERSIST_PREFIX "tssQ");
static const KeyRef persistClusterIdKey = LiteralStringRef(PERSIST_PREFIX "clusterId");
@ -5163,6 +5179,10 @@ private:
if (!data->isTss() && m.type == MutationRef::ClearRange && data->ssPairID.present() &&
serverTagKey == data->ssPairID.get()) {
data->clearSSWithTssPair();
// Add ss pair id change to mutation log to make durable
auto& mLV = data->addVersionToMutationLog(data->data().getLatestVersion());
data->addMutationToMutationLog(
mLV, MutationRef(MutationRef::ClearRange, persistSSPairID, keyAfter(persistSSPairID)));
}
} else if (m.type == MutationRef::SetValue && m.param1 == rebootWhenDurablePrivateKey) {
data->rebootAfterDurableVersion = currentVersion;
@ -5266,11 +5286,19 @@ private:
if (!data->isTss()) {
UID ssId = Codec<UID>::unpack(Tuple::unpack(m.param1.substr(1).removePrefix(tssMappingKeys.begin)));
ASSERT(ssId == data->thisServerID);
// Add ss pair id change to mutation log to make durable
auto& mLV = data->addVersionToMutationLog(data->data().getLatestVersion());
if (m.type == MutationRef::SetValue) {
UID tssId = Codec<UID>::unpack(Tuple::unpack(m.param2));
data->setSSWithTssPair(tssId);
data->addMutationToMutationLog(mLV,
MutationRef(MutationRef::SetValue,
persistSSPairID,
BinaryWriter::toValue(tssId, Unversioned())));
} else {
data->clearSSWithTssPair();
data->addMutationToMutationLog(
mLV, MutationRef(MutationRef::ClearRange, persistSSPairID, keyAfter(persistSSPairID)));
}
}
} else if (m.param1.substr(1).startsWith(tssQuarantineKeys.begin) &&
@ -6261,6 +6289,7 @@ ACTOR Future<bool> restoreDurableState(StorageServer* data, IKeyValueStore* stor
state Future<Optional<Value>> fID = storage->readValue(persistID);
state Future<Optional<Value>> fClusterID = storage->readValue(persistClusterIdKey);
state Future<Optional<Value>> ftssPairID = storage->readValue(persistTssPairID);
state Future<Optional<Value>> fssPairID = storage->readValue(persistSSPairID);
state Future<Optional<Value>> fTssQuarantine = storage->readValue(persistTssQuarantine);
state Future<Optional<Value>> fVersion = storage->readValue(persistVersion);
state Future<Optional<Value>> fLogProtocol = storage->readValue(persistLogProtocol);
@ -6276,8 +6305,8 @@ ACTOR Future<bool> restoreDurableState(StorageServer* data, IKeyValueStore* stor
restoreByteSample(data, storage, byteSampleSampleRecovered, startByteSampleRestore.getFuture());
TraceEvent("ReadingDurableState", data->thisServerID).log();
wait(waitForAll(
std::vector{ fFormat, fID, fClusterID, ftssPairID, fTssQuarantine, fVersion, fLogProtocol, fPrimaryLocality }));
wait(waitForAll(std::vector{
fFormat, fID, fClusterID, ftssPairID, fssPairID, fTssQuarantine, fVersion, fLogProtocol, fPrimaryLocality }));
wait(waitForAll(std::vector{ fShardAssigned, fShardAvailable, fChangeFeeds, fTenantMap }));
wait(byteSampleSampleRecovered.getFuture());
TraceEvent("RestoringDurableState", data->thisServerID).log();
@ -6304,6 +6333,11 @@ ACTOR Future<bool> restoreDurableState(StorageServer* data, IKeyValueStore* stor
data->bytesRestored += ftssPairID.get().expectedSize();
}
if (fssPairID.get().present()) {
data->setSSWithTssPair(BinaryReader::fromStringRef<UID>(fssPairID.get().get(), Unversioned()));
data->bytesRestored += fssPairID.get().expectedSize();
}
if (fClusterID.get().present()) {
data->clusterId.send(BinaryReader::fromStringRef<UID>(fClusterID.get().get(), Unversioned()));
data->bytesRestored += fClusterID.get().expectedSize();
@ -6813,17 +6847,16 @@ ACTOR Future<Void> serveGetKeyValuesRequests(StorageServer* self, FutureStream<G
}
}
ACTOR Future<Void> serveGetKeyValuesAndFlatMapRequests(
StorageServer* self,
FutureStream<GetKeyValuesAndFlatMapRequest> getKeyValuesAndFlatMap) {
ACTOR Future<Void> serveGetMappedKeyValuesRequests(StorageServer* self,
FutureStream<GetMappedKeyValuesRequest> getMappedKeyValues) {
// TODO: Is it fine to keep TransactionLineage::Operation::GetKeyValues here?
getCurrentLineage()->modify(&TransactionLineage::operation) = TransactionLineage::Operation::GetKeyValues;
loop {
GetKeyValuesAndFlatMapRequest req = waitNext(getKeyValuesAndFlatMap);
GetMappedKeyValuesRequest req = waitNext(getMappedKeyValues);
// Warning: This code is executed at extremely high priority (TaskPriority::LoadBalancedEndpoint), so downgrade
// before doing real work
self->actors.add(self->readGuard(req, getKeyValuesAndFlatMapQ));
self->actors.add(self->readGuard(req, getMappedKeyValuesQ));
}
}
@ -7049,7 +7082,7 @@ ACTOR Future<Void> storageServerCore(StorageServer* self, StorageServerInterface
self->actors.add(checkBehind(self));
self->actors.add(serveGetValueRequests(self, ssi.getValue.getFuture()));
self->actors.add(serveGetKeyValuesRequests(self, ssi.getKeyValues.getFuture()));
self->actors.add(serveGetKeyValuesAndFlatMapRequests(self, ssi.getKeyValuesAndFlatMap.getFuture()));
self->actors.add(serveGetMappedKeyValuesRequests(self, ssi.getMappedKeyValues.getFuture()));
self->actors.add(serveGetKeyValuesStreamRequests(self, ssi.getKeyValuesStream.getFuture()));
self->actors.add(serveGetKeyRequests(self, ssi.getKey.getFuture()));
self->actors.add(serveWatchValueRequests(self, ssi.watchValue.getFuture()));

View File

@ -1126,7 +1126,7 @@ ACTOR Future<Void> storageServerRollbackRebooter(std::set<std::pair<UID, KeyValu
DUMPTOKEN(recruited.getValue);
DUMPTOKEN(recruited.getKey);
DUMPTOKEN(recruited.getKeyValues);
DUMPTOKEN(recruited.getKeyValuesAndFlatMap);
DUMPTOKEN(recruited.getMappedKeyValues);
DUMPTOKEN(recruited.getShardState);
DUMPTOKEN(recruited.waitMetrics);
DUMPTOKEN(recruited.splitMetrics);
@ -1138,7 +1138,7 @@ ACTOR Future<Void> storageServerRollbackRebooter(std::set<std::pair<UID, KeyValu
DUMPTOKEN(recruited.getKeyValueStoreType);
DUMPTOKEN(recruited.watchValue);
DUMPTOKEN(recruited.getKeyValuesStream);
DUMPTOKEN(recruited.getKeyValuesAndFlatMap);
DUMPTOKEN(recruited.getMappedKeyValues);
prevStorageServer =
storageServer(store, recruited, db, folder, Promise<Void>(), Reference<IClusterConnectionRecord>(nullptr));
@ -1540,7 +1540,7 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
DUMPTOKEN(recruited.getKeyValueStoreType);
DUMPTOKEN(recruited.watchValue);
DUMPTOKEN(recruited.getKeyValuesStream);
DUMPTOKEN(recruited.getKeyValuesAndFlatMap);
DUMPTOKEN(recruited.getMappedKeyValues);
Promise<Void> recovery;
Future<Void> f = storageServer(kv, recruited, dbInfo, folder, recovery, connRecord);
@ -1636,7 +1636,7 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
DUMPTOKEN(recruited.getValue);
DUMPTOKEN(recruited.getKey);
DUMPTOKEN(recruited.getKeyValues);
DUMPTOKEN(recruited.getKeyValuesAndFlatMap);
DUMPTOKEN(recruited.getMappedKeyValues);
DUMPTOKEN(recruited.getShardState);
DUMPTOKEN(recruited.waitMetrics);
DUMPTOKEN(recruited.splitMetrics);
@ -2039,7 +2039,7 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
DUMPTOKEN(recruited.getKeyValueStoreType);
DUMPTOKEN(recruited.watchValue);
DUMPTOKEN(recruited.getKeyValuesStream);
DUMPTOKEN(recruited.getKeyValuesAndFlatMap);
DUMPTOKEN(recruited.getMappedKeyValues);
// printf("Recruited as storageServer\n");
std::string filename =

View File

@ -287,6 +287,7 @@ Value ApiWorkload::generateValue() {
// Creates a random transaction factory to produce transaction of one of the TransactionType choices
ACTOR Future<Void> chooseTransactionFactory(Database cx, std::vector<TransactionType> choices, ApiWorkload* self) {
TransactionType transactionType = deterministicRandom()->randomChoice(choices);
self->transactionType = transactionType;
if (transactionType == NATIVE) {
printf("client %d: Running NativeAPI Transactions\n", self->clientPrefixInt);

View File

@ -52,6 +52,13 @@ struct TransactionWrapper : public ReferenceCounted<TransactionWrapper> {
// Gets a range of key-value pairs from the database specified by a pair of key selectors
virtual Future<RangeResult> getRange(KeySelectorRef& begin, KeySelectorRef& end, int limit, Reverse reverse) = 0;
virtual Future<MappedRangeResult> getMappedRange(KeySelector& begin,
KeySelector& end,
Key& mapper,
GetRangeLimits limits,
Snapshot snapshot,
Reverse reverse) = 0;
// Gets the key from the database specified by a given key selector
virtual Future<Key> getKey(KeySelectorRef& key) = 0;
@ -111,6 +118,15 @@ struct FlowTransactionWrapper : public TransactionWrapper {
return transaction.getRange(begin, end, limit, Snapshot::False, reverse);
}
Future<MappedRangeResult> getMappedRange(KeySelector& begin,
KeySelector& end,
Key& mapper,
GetRangeLimits limits,
Snapshot snapshot,
Reverse reverse) override {
return transaction.getMappedRange(begin, end, mapper, limits, snapshot, reverse);
}
// Gets the key from the database specified by a given key selector
Future<Key> getKey(KeySelectorRef& key) override { return transaction.getKey(key); }
@ -171,6 +187,15 @@ struct ThreadTransactionWrapper : public TransactionWrapper {
return unsafeThreadFutureToFuture(transaction->getRange(begin, end, limit, Snapshot::False, reverse));
}
Future<MappedRangeResult> getMappedRange(KeySelector& begin,
KeySelector& end,
Key& mapper,
GetRangeLimits limits,
Snapshot snapshot,
Reverse reverse) override {
return unsafeThreadFutureToFuture(transaction->getMappedRange(begin, end, mapper, limits, snapshot, reverse));
}
// Gets the key from the database specified by a given key selector
Future<Key> getKey(KeySelectorRef& key) override { return unsafeThreadFutureToFuture(transaction->getKey(key)); }
@ -347,6 +372,9 @@ struct ApiWorkload : TestWorkload {
// The transaction factory used to create transactions in this run
Reference<TransactionFactoryInterface> transactionFactory;
// Transaction type of the transaction factory above.
TransactionType transactionType;
};
#include "flow/unactorcompiler.h"

View File

@ -2374,7 +2374,7 @@ struct ConsistencyCheckWorkload : TestWorkload {
}
// Check EncryptKeyProxy
if (SERVER_KNOBS->ENABLE_ENCRYPT_KEY_PROXY && db.encryptKeyProxy.present() &&
if (SERVER_KNOBS->ENABLE_ENCRYPTION && db.encryptKeyProxy.present() &&
(!nonExcludedWorkerProcessMap.count(db.encryptKeyProxy.get().address()) ||
nonExcludedWorkerProcessMap[db.encryptKeyProxy.get().address()].processClass.machineClassFitness(
ProcessClass::EncryptKeyProxy) > fitnessLowerBound)) {

View File

@ -0,0 +1,406 @@
/*
* GetMappedRange.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cstdint>
#include <limits>
#include <algorithm>
#include "fdbrpc/simulator.h"
#include "fdbclient/MutationLogReader.actor.h"
#include "fdbclient/Tuple.h"
#include "fdbserver/workloads/ApiWorkload.h"
#include "fdbserver/workloads/workloads.actor.h"
#include "fdbserver/Knobs.h"
#include "flow/Error.h"
#include "flow/IRandom.h"
#include "flow/flow.h"
#include "flow/actorcompiler.h" // This must be the last #include.
const Value EMPTY = Tuple().pack();
ValueRef SOMETHING = "SOMETHING"_sr;
const KeyRef prefix = "prefix"_sr;
const KeyRef RECORD = "RECORD"_sr;
const KeyRef INDEX = "INDEX"_sr;
struct GetMappedRangeWorkload : ApiWorkload {
bool enabled;
Snapshot snapshot = Snapshot::False;
// const bool BAD_MAPPER = deterministicRandom()->random01() < 0.1;
const bool BAD_MAPPER = false;
// const bool SPLIT_RECORDS = deterministicRandom()->random01() < 0.5;
const bool SPLIT_RECORDS = true;
const static int SPLIT_SIZE = 3;
GetMappedRangeWorkload(WorkloadContext const& wcx) : ApiWorkload(wcx) {
enabled = !clientId; // only do this on the "first" client
}
std::string description() const override { return "GetMappedRange"; }
Future<Void> start(Database const& cx) override {
// This workload is generated different from typical ApiWorkload. So don't use ApiWorkload::_start.
if (enabled) {
return GetMappedRangeWorkload::_start(cx, this);
}
return Void();
}
ACTOR Future<Void> performSetup(Database cx, GetMappedRangeWorkload* self) {
std::vector<TransactionType> types;
types.push_back(NATIVE);
types.push_back(READ_YOUR_WRITES);
wait(self->chooseTransactionFactory(cx, types));
return Void();
}
Future<Void> performSetup(Database const& cx) override { return performSetup(cx, this); }
Future<Void> performTest(Database const& cx, Standalone<VectorRef<KeyValueRef>> const& data) override {
// Ignore this because we are not using ApiWorkload's default ::start.
return Future<Void>();
}
static Key primaryKey(int i) { return Key(format("primary-key-of-record-%08d", i)); }
static Key indexKey(int i) { return Key(format("index-key-of-record-%08d", i)); }
static Value dataOfRecord(int i) { return Key(format("data-of-record-%08d", i)); }
static Value dataOfRecord(int i, int split) { return Key(format("data-of-record-%08d-split-%08d", i, split)); }
static Key indexEntryKey(int i) {
return Tuple().append(prefix).append(INDEX).append(indexKey(i)).append(primaryKey(i)).pack();
}
static Key recordKey(int i) { return Tuple().append(prefix).append(RECORD).append(primaryKey(i)).pack(); }
static Key recordKey(int i, int split) {
return Tuple().append(prefix).append(RECORD).append(primaryKey(i)).append(split).pack();
}
static Value recordValue(int i) { return Tuple().append(dataOfRecord(i)).pack(); }
static Value recordValue(int i, int split) { return Tuple().append(dataOfRecord(i, split)).pack(); }
ACTOR Future<Void> fillInRecords(Database cx, int n, GetMappedRangeWorkload* self) {
state Transaction tr(cx);
loop {
std::cout << "start fillInRecords n=" << n << std::endl;
// TODO: When n is large, split into multiple transactions.
try {
for (int i = 0; i < n; i++) {
if (self->SPLIT_RECORDS) {
for (int split = 0; split < SPLIT_SIZE; split++) {
tr.set(recordKey(i, split), recordValue(i, split));
}
} else {
tr.set(recordKey(i), recordValue(i));
}
tr.set(indexEntryKey(i), EMPTY);
}
wait(tr.commit());
std::cout << "finished fillInRecords with version " << tr.getCommittedVersion() << std::endl;
break;
} catch (Error& e) {
std::cout << "failed fillInRecords, retry" << std::endl;
wait(tr.onError(e));
}
}
return Void();
}
static void showResult(const RangeResult& result) {
std::cout << "result size: " << result.size() << std::endl;
for (const KeyValueRef* it = result.begin(); it != result.end(); it++) {
std::cout << "key=" << it->key.printable() << ", value=" << it->value.printable() << std::endl;
}
}
ACTOR Future<Void> scanRange(Database cx, KeyRangeRef range) {
std::cout << "start scanRange " << range.toString() << std::endl;
// TODO: When n is large, split into multiple transactions.
state Transaction tr(cx);
loop {
try {
RangeResult result = wait(tr.getRange(range, CLIENT_KNOBS->TOO_MANY));
// showResult(result);
break;
} catch (Error& e) {
wait(tr.onError(e));
}
}
std::cout << "finished scanRange" << std::endl;
return Void();
}
static void validateRecord(int expectedId, const MappedKeyValueRef* it, GetMappedRangeWorkload* self) {
// std::cout << "validateRecord expectedId " << expectedId << " it->key " << printable(it->key) << "
// indexEntryKey(expectedId) " << printable(indexEntryKey(expectedId)) << std::endl;
ASSERT(it->key == indexEntryKey(expectedId));
ASSERT(it->value == EMPTY);
if (self->SPLIT_RECORDS) {
ASSERT(std::holds_alternative<GetRangeReqAndResultRef>(it->reqAndResult));
auto& getRange = std::get<GetRangeReqAndResultRef>(it->reqAndResult);
auto& rangeResult = getRange.result;
// std::cout << "rangeResult.size()=" << rangeResult.size() << std::endl;
ASSERT(rangeResult.more == false);
ASSERT(rangeResult.size() == SPLIT_SIZE);
for (int split = 0; split < SPLIT_SIZE; split++) {
auto& kv = rangeResult[split];
// std::cout << "kv.key=" << printable(kv.key)
// << ", recordKey(id, split)=" << printable(recordKey(id, split)) <<
// std::endl; std::cout << "kv.value=" << printable(kv.value)
// << ", recordValue(id, split)=" << printable(recordValue(id, split)) <<
// std::endl;
ASSERT(kv.key == recordKey(expectedId, split));
ASSERT(kv.value == recordValue(expectedId, split));
}
} else {
ASSERT(std::holds_alternative<GetValueReqAndResultRef>(it->reqAndResult));
auto& getValue = std::get<GetValueReqAndResultRef>(it->reqAndResult);
ASSERT(getValue.key == recordKey(expectedId));
ASSERT(getValue.result.present());
ASSERT(getValue.result.get() == recordValue(expectedId));
}
}
ACTOR Future<MappedRangeResult> scanMappedRangeWithLimits(Database cx,
KeySelector beginSelector,
KeySelector endSelector,
Key mapper,
int limit,
int expectedBeginId,
GetMappedRangeWorkload* self) {
std::cout << "start scanMappedRangeWithLimits beginSelector:" << beginSelector.toString()
<< " endSelector:" << endSelector.toString() << " expectedBeginId:" << expectedBeginId
<< " limit:" << limit << std::endl;
loop {
state Reference<TransactionWrapper> tr = self->createTransaction();
try {
MappedRangeResult result = wait(tr->getMappedRange(
beginSelector, endSelector, mapper, GetRangeLimits(limit), self->snapshot, Reverse::False));
// showResult(result);
if (self->BAD_MAPPER) {
TraceEvent("GetMappedRangeWorkloadShouldNotReachable").detail("ResultSize", result.size());
}
std::cout << "result.size()=" << result.size() << std::endl;
std::cout << "result.more=" << result.more << std::endl;
ASSERT(result.size() <= limit);
int expectedId = expectedBeginId;
for (const MappedKeyValueRef* it = result.begin(); it != result.end(); it++) {
validateRecord(expectedId, it, self);
expectedId++;
}
std::cout << "finished scanMappedRangeWithLimits" << std::endl;
return result;
} catch (Error& e) {
if ((self->BAD_MAPPER && e.code() == error_code_mapper_bad_index) ||
(!SERVER_KNOBS->QUICK_GET_VALUE_FALLBACK && e.code() == error_code_quick_get_value_miss) ||
(!SERVER_KNOBS->QUICK_GET_KEY_VALUES_FALLBACK &&
e.code() == error_code_quick_get_key_values_miss)) {
TraceEvent("GetMappedRangeWorkloadExpectedErrorDetected").error(e);
return MappedRangeResult();
} else {
std::cout << "error " << e.what() << std::endl;
wait(tr->onError(e));
}
std::cout << "failed scanMappedRangeWithLimits" << std::endl;
}
}
}
ACTOR Future<Void> scanMappedRange(Database cx, int beginId, int endId, Key mapper, GetMappedRangeWorkload* self) {
Key beginTuple = Tuple().append(prefix).append(INDEX).append(indexKey(beginId)).getDataAsStandalone();
state KeySelector beginSelector = KeySelector(firstGreaterOrEqual(beginTuple));
Key endTuple = Tuple().append(prefix).append(INDEX).append(indexKey(endId)).getDataAsStandalone();
state KeySelector endSelector = KeySelector(firstGreaterOrEqual(endTuple));
state int limit = 100;
state int expectedBeginId = beginId;
while (true) {
MappedRangeResult result = wait(
self->scanMappedRangeWithLimits(cx, beginSelector, endSelector, mapper, limit, expectedBeginId, self));
expectedBeginId += result.size();
if (result.more) {
if (result.empty()) {
// This is usually not expected.
std::cout << "not result but have more, try again" << std::endl;
} else {
beginSelector = KeySelector(firstGreaterThan(result.back().key));
}
} else {
// No more, finished.
break;
}
}
ASSERT(expectedBeginId == endId);
return Void();
}
static void conflictWriteOnRecord(int conflictRecordId,
Reference<TransactionWrapper>& tr,
GetMappedRangeWorkload* self) {
Key writeKey;
if (deterministicRandom()->random01() < 0.5) {
// Concurrent write to the primary scanned range
writeKey = indexEntryKey(conflictRecordId);
} else {
// Concurrent write to the underlying scanned ranges/keys
if (self->SPLIT_RECORDS) {
// Update one of the splits is sufficient.
writeKey = recordKey(conflictRecordId, 0);
} else {
writeKey = recordKey(conflictRecordId);
}
}
tr->set(writeKey, SOMETHING);
std::cout << "conflict write to " << printable(writeKey) << std::endl;
}
static Future<MappedRangeResult> runGetMappedRange(int beginId,
int endId,
Reference<TransactionWrapper>& tr,
GetMappedRangeWorkload* self) {
Key mapper = getMapper(self);
Key beginTuple = Tuple().append(prefix).append(INDEX).append(indexKey(beginId)).getDataAsStandalone();
KeySelector beginSelector = KeySelector(firstGreaterOrEqual(beginTuple));
Key endTuple = Tuple().append(prefix).append(INDEX).append(indexKey(endId)).getDataAsStandalone();
KeySelector endSelector = KeySelector(firstGreaterOrEqual(endTuple));
return tr->getMappedRange(beginSelector,
endSelector,
mapper,
GetRangeLimits(GetRangeLimits::ROW_LIMIT_UNLIMITED),
self->snapshot,
Reverse::False);
}
// If another transaction writes to our read set (the scanned ranges) before we commit, the transaction should
// fail.
ACTOR Future<Void> testSerializableConflicts(GetMappedRangeWorkload* self) {
std::cout << "testSerializableConflicts" << std::endl;
loop {
state Reference<TransactionWrapper> tr1 = self->createTransaction();
try {
MappedRangeResult result = wait(runGetMappedRange(5, 10, tr1, self));
// Commit another transaction that has conflict writes.
loop {
state Reference<TransactionWrapper> tr2 = self->createTransaction();
try {
conflictWriteOnRecord(7, tr2, self);
wait(tr2->commit());
break;
} catch (Error& e) {
std::cout << "tr2 error " << e.what() << std::endl;
wait(tr2->onError(e));
}
}
// Do some writes so that tr1 is not read-only.
tr1->set(SOMETHING, SOMETHING);
wait(tr1->commit());
UNREACHABLE();
} catch (Error& e) {
if (e.code() == error_code_not_committed) {
std::cout << "tr1 failed because of conflicts (as expected)" << std::endl;
TraceEvent("GetMappedRangeWorkloadExpectedErrorDetected").error(e);
return Void();
} else {
std::cout << "tr1 error " << e.what() << std::endl;
wait(tr1->onError(e));
}
}
}
}
// If the same transaction writes to the read set (the scanned ranges) before reading, it should throw read your
// write exception.
ACTOR Future<Void> testRYW(GetMappedRangeWorkload* self) {
std::cout << "testRYW" << std::endl;
loop {
state Reference<TransactionWrapper> tr1 = self->createTransaction();
try {
// Write something that will be read in getMappedRange.
conflictWriteOnRecord(7, tr1, self);
MappedRangeResult result = wait(runGetMappedRange(5, 10, tr1, self));
UNREACHABLE();
} catch (Error& e) {
if (e.code() == error_code_get_mapped_range_reads_your_writes) {
std::cout << "tr1 failed because of read your writes (as expected)" << std::endl;
TraceEvent("GetMappedRangeWorkloadExpectedErrorDetected").error(e);
return Void();
} else {
std::cout << "tr1 error " << e.what() << std::endl;
wait(tr1->onError(e));
}
}
}
}
ACTOR Future<Void> _start(Database cx, GetMappedRangeWorkload* self) {
TraceEvent("GetMappedRangeWorkloadConfig").detail("BadMapper", self->BAD_MAPPER);
// TODO: Use toml to config
wait(self->fillInRecords(cx, 500, self));
if (self->transactionType == NATIVE) {
self->snapshot = Snapshot::True;
} else if (self->transactionType == READ_YOUR_WRITES) {
self->snapshot = Snapshot::False;
const double rand = deterministicRandom()->random01();
if (rand < 0.1) {
wait(self->testSerializableConflicts(self));
return Void();
} else if (rand < 0.2) {
wait(self->testRYW(self));
return Void();
} else {
// Test the happy path where there is no conflicts or RYW
}
} else {
UNREACHABLE();
}
std::cout << "Test configuration: transactionType:" << self->transactionType << " snapshot:" << self->snapshot
<< "bad_mapper:" << self->BAD_MAPPER << std::endl;
Key mapper = getMapper(self);
// The scanned range cannot be too large to hit get_mapped_key_values_has_more. We have a unit validating the
// error is thrown when the range is large.
wait(self->scanMappedRange(cx, 10, 490, mapper, self));
return Void();
}
static Key getMapper(GetMappedRangeWorkload* self) {
Tuple mapperTuple;
if (self->BAD_MAPPER) {
mapperTuple << prefix << RECORD << "{K[xxx]}"_sr;
} else {
mapperTuple << prefix << RECORD << "{K[3]}"_sr;
if (self->SPLIT_RECORDS) {
mapperTuple << "{...}"_sr;
}
}
Key mapper = mapperTuple.getDataAsStandalone();
return mapper;
}
Future<bool> check(Database const& cx) override { return true; }
void getMetrics(std::vector<PerfMetric>& m) override {}
};
WorkloadFactory<GetMappedRangeWorkload> GetMappedRangeWorkloadFactory("GetMappedRange");

View File

@ -1,186 +0,0 @@
/*
* GetRangeAndMap.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cstdint>
#include <limits>
#include "fdbrpc/simulator.h"
#include "fdbclient/MutationLogReader.actor.h"
#include "fdbclient/Tuple.h"
#include "fdbserver/workloads/workloads.actor.h"
#include "fdbserver/Knobs.h"
#include "flow/Error.h"
#include "flow/IRandom.h"
#include "flow/flow.h"
#include "flow/actorcompiler.h" // This must be the last #include.
const Value EMPTY = Tuple().pack();
const KeyRef prefix = "prefix"_sr;
const KeyRef RECORD = "RECORD"_sr;
const KeyRef INDEX = "INDEX"_sr;
struct GetRangeAndMapWorkload : TestWorkload {
bool enabled;
const bool BAD_MAPPER = deterministicRandom()->random01() < 0.1;
GetRangeAndMapWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
enabled = !clientId; // only do this on the "first" client
}
std::string description() const override { return "GetRangeAndMap"; }
Future<Void> start(Database const& cx) override {
if (enabled) {
return _start(cx, this);
}
return Void();
}
static Key primaryKey(int i) { return Key(format("primary-key-of-record-%08d", i)); }
static Key indexKey(int i) { return Key(format("index-key-of-record-%08d", i)); }
static Value dataOfRecord(int i) { return Key(format("data-of-record-%08d", i)); }
static Key indexEntryKey(int i) {
return Tuple().append(prefix).append(INDEX).append(indexKey(i)).append(primaryKey(i)).pack();
}
static Key recordKey(int i) { return Tuple().append(prefix).append(RECORD).append(primaryKey(i)).pack(); }
static Value recordValue(int i) { return Tuple().append(dataOfRecord(i)).pack(); }
ACTOR Future<Void> fillInRecords(Database cx, int n) {
loop {
std::cout << "start fillInRecords n=" << n << std::endl;
// TODO: When n is large, split into multiple transactions.
state Transaction tr(cx);
try {
tr.reset();
for (int i = 0; i < n; i++) {
tr.set(recordKey(i), recordValue(i));
tr.set(indexEntryKey(i), EMPTY);
}
wait(tr.commit());
std::cout << "finished fillInRecords with version " << tr.getCommittedVersion() << std::endl;
break;
} catch (Error& e) {
std::cout << "failed fillInRecords, retry" << std::endl;
wait(tr.onError(e));
}
}
return Void();
}
static void showResult(const RangeResult& result) {
std::cout << "result size: " << result.size() << std::endl;
for (const KeyValueRef* it = result.begin(); it != result.end(); it++) {
std::cout << "key=" << it->key.printable() << ", value=" << it->value.printable() << std::endl;
}
}
ACTOR Future<Void> scanRange(Database cx, KeyRangeRef range) {
std::cout << "start scanRange " << range.toString() << std::endl;
// TODO: When n is large, split into multiple transactions.
state Transaction tr(cx);
try {
tr.reset();
RangeResult result = wait(tr.getRange(range, CLIENT_KNOBS->TOO_MANY));
// showResult(result);
} catch (Error& e) {
wait(tr.onError(e));
}
std::cout << "finished scanRange" << std::endl;
return Void();
}
ACTOR Future<Void> scanRangeAndFlatMap(Database cx,
int beginId,
int endId,
Key mapper,
GetRangeAndMapWorkload* self) {
Key someIndexesBegin = Tuple().append(prefix).append(INDEX).append(indexKey(beginId)).getDataAsStandalone();
Key someIndexesEnd = Tuple().append(prefix).append(INDEX).append(indexKey(endId)).getDataAsStandalone();
state KeyRange range = KeyRangeRef(someIndexesBegin, someIndexesEnd);
std::cout << "start scanRangeAndFlatMap " << range.toString() << std::endl;
// TODO: When n is large, split into multiple transactions.
state Transaction tr(cx);
try {
tr.reset();
RangeResult result =
wait(tr.getRangeAndFlatMap(KeySelector(firstGreaterOrEqual(range.begin), range.arena()),
KeySelector(firstGreaterOrEqual(range.end), range.arena()),
mapper,
GetRangeLimits(CLIENT_KNOBS->TOO_MANY),
Snapshot::True));
// showResult(result);
if (self->BAD_MAPPER) {
TraceEvent("GetRangeAndMapWorkloadShouldNotReachable").detail("ResultSize", result.size());
}
// Examples:
// key=\x01prefix\x00\x01RECORD\x00\x01primary-key-of-record-2\x00, value=\x01data-of-record-2\x00
// key=\x01prefix\x00\x01RECORD\x00\x01primary-key-of-record-3\x00, value=\x01data-of-record-3\x00
std::cout << "result.size()=" << result.size() << std::endl;
std::cout << "result.more=" << result.more << std::endl;
ASSERT(result.size() == endId - beginId);
int id = beginId;
for (const KeyValueRef* it = result.begin(); it != result.end(); it++) {
ASSERT(it->key == recordKey(id));
ASSERT(it->value == recordValue(id));
id++;
}
} catch (Error& e) {
if ((self->BAD_MAPPER && e.code() == error_code_mapper_bad_index) ||
(!SERVER_KNOBS->QUICK_GET_VALUE_FALLBACK && e.code() == error_code_quick_get_value_miss) ||
(!SERVER_KNOBS->QUICK_GET_KEY_VALUES_FALLBACK && e.code() == error_code_quick_get_key_values_miss)) {
TraceEvent("GetRangeAndMapWorkloadExpectedErrorDetected").error(e);
} else {
wait(tr.onError(e));
}
}
std::cout << "finished scanRangeAndFlatMap" << std::endl;
return Void();
}
ACTOR Future<Void> _start(Database cx, GetRangeAndMapWorkload* self) {
TraceEvent("GetRangeAndMapWorkloadConfig").detail("BadMapper", self->BAD_MAPPER);
// TODO: Use toml to config
wait(self->fillInRecords(cx, 200));
wait(self->scanRange(cx, normalKeys));
// wait(self->scanRange(cx, someIndexes));
Tuple mapperTuple;
if (self->BAD_MAPPER) {
mapperTuple << prefix << RECORD << "{K[xxx]}"_sr;
} else {
mapperTuple << prefix << RECORD << "{K[3]}"_sr;
}
Key mapper = mapperTuple.getDataAsStandalone();
// The scanned range cannot be too large to hit get_key_values_and_map_has_more. We have a unit validating the
// error is thrown when the range is large.
wait(self->scanRangeAndFlatMap(cx, 10, 190, mapper, self));
return Void();
}
Future<bool> check(Database const& cx) override { return true; }
void getMetrics(std::vector<PerfMetric>& m) override {}
};
WorkloadFactory<GetRangeAndMapWorkload> GetRangeAndMapWorkloadFactory("GetRangeAndMap");

View File

@ -34,6 +34,7 @@ void forceLinkStreamCipherTests();
void forceLinkParallelStreamTests();
void forceLinkSimExternalConnectionTests();
void forceLinkMutationLogReaderTests();
void forceLinkSimEncryptVaultProxyTests();
void forceLinkIThreadPoolTests();
struct UnitTestWorkload : TestWorkload {
@ -79,6 +80,7 @@ struct UnitTestWorkload : TestWorkload {
forceLinkParallelStreamTests();
forceLinkSimExternalConnectionTests();
forceLinkMutationLogReaderTests();
forceLinkSimEncryptVaultProxyTests();
forceLinkIThreadPoolTests();
}

View File

@ -349,6 +349,8 @@ struct union_like_traits<Optional<T>> : std::true_type {
template <class T>
class Standalone : private Arena, public T {
public:
using RefType = T;
// T must have no destructor
Arena& arena() { return *(Arena*)this; }
const Arena& arena() const { return *(const Arena*)this; }

View File

@ -1,5 +1,5 @@
/*
* Arena.h
* ArgParseUtil.h
*
* This source file is part of the FoundationDB open source project
*
@ -37,4 +37,4 @@ Optional<std::string> extractPrefixedArgument(std::string prefix, std::string ar
return arg;
}
#endif
#endif

View File

@ -1,5 +1,5 @@
/*
* Arena.h
* BooleanParam.h
*
* This source file is part of the FoundationDB open source project
*

View File

@ -100,7 +100,7 @@ set(FLOW_SRCS
xxhash.c
xxhash.h)
if(WITH_TLS AND NOT WIN32)
if(WITH_TLS)
set(FLOW_SRCS
${FLOW_SRCS}
StreamCipher.cpp)

View File

@ -278,6 +278,7 @@ inline void freeFast(int size, void* ptr) {
}
[[nodiscard]] inline void* allocateFast4kAligned(int size) {
#if !defined(USE_JEMALLOC)
// Use FastAllocator for sizes it supports to avoid internal fragmentation in some implementations of aligned_alloc
if (size <= 4096)
return FastAllocator<4096>::allocate();
@ -285,10 +286,12 @@ inline void freeFast(int size, void* ptr) {
return FastAllocator<8192>::allocate();
if (size <= 16384)
return FastAllocator<16384>::allocate();
#endif
return aligned_alloc(4096, size);
}
inline void freeFast4kAligned(int size, void* ptr) {
#if !defined(USE_JEMALLOC)
// Sizes supported by FastAllocator must be release via FastAllocator
if (size <= 4096)
return FastAllocator<4096>::release(ptr);
@ -296,6 +299,7 @@ inline void freeFast4kAligned(int size, void* ptr) {
return FastAllocator<8192>::release(ptr);
if (size <= 16384)
return FastAllocator<16384>::release(ptr);
#endif
aligned_free(ptr);
}

View File

@ -20,7 +20,7 @@
#pragma once
#if (!defined(TLS_DISABLED) && !defined(_WIN32))
#if (!defined(TLS_DISABLED))
#define ENCRYPTION_ENABLED 1
#else
#define ENCRYPTION_ENABLED 0

View File

@ -172,7 +172,8 @@ ERROR( quick_get_value_miss, 2034, "Found a mapped key that is not served in the
ERROR( quick_get_key_values_miss, 2035, "Found a mapped range that is not served in the same SS" )
ERROR( blob_granule_no_ryw, 2036, "Blob Granule Read Transactions must be specified as ryw-disabled" )
ERROR( blob_granule_not_materialized, 2037, "Blob Granule Read Transactions must be specified as ryw-disabled" )
ERROR( get_key_values_and_map_has_more, 2038, "getRangeAndFlatMap does not support continuation for now" )
ERROR( get_mapped_key_values_has_more, 2038, "getMappedRange does not support continuation for now" )
ERROR( get_mapped_range_reads_your_writes, 2039, "getMappedRange tries to read data that were previously written in the transaction" )
ERROR( incompatible_protocol_version, 2100, "Incompatible protocol version" )
ERROR( transaction_too_large, 2101, "Transaction exceeds byte limit" )

View File

@ -1,5 +1,5 @@
/*
* flow.h
* folly_memcpy.h
*
* This source file is part of the FoundationDB open source project
*

View File

@ -0,0 +1,6 @@
[[test]]
testTitle = 'GetMappedRange'
useDB = true
[[test.workload]]
testName = 'GetMappedRange'

View File

@ -1,6 +0,0 @@
[[test]]
testTitle = 'GetRangeAndMap'
useDB = true
[[test.workload]]
testName = 'GetRangeAndMap'