Merge branch 'main' into blob_integration

This commit is contained in:
Josh Slocum 2022-03-17 18:45:42 -05:00
commit 37e7c80f26
252 changed files with 14139 additions and 2615 deletions

View File

@ -91,11 +91,35 @@ if(NOT WIN32)
set(UNIT_TEST_VERSION_510_SRCS test/unit/unit_tests_version_510.cpp)
set(TRACE_PARTIAL_FILE_SUFFIX_TEST_SRCS test/unit/trace_partial_file_suffix_test.cpp)
set(DISCONNECTED_TIMEOUT_UNIT_TEST_SRCS
set(DISCONNECTED_TIMEOUT_UNIT_TEST_SRCS
test/unit/disconnected_timeout_tests.cpp
test/unit/fdb_api.cpp
test/unit/fdb_api.hpp)
set(API_TESTER_SRCS
test/apitester/fdb_c_api_tester.cpp
test/apitester/TesterApiWorkload.cpp
test/apitester/TesterApiWorkload.h
test/apitester/TesterApiWrapper.cpp
test/apitester/TesterApiWrapper.h
test/apitester/TesterTestSpec.cpp
test/apitester/TesterTestSpec.h
test/apitester/TesterCancelTransactionWorkload.cpp
test/apitester/TesterCorrectnessWorkload.cpp
test/apitester/TesterKeyValueStore.cpp
test/apitester/TesterKeyValueStore.h
test/apitester/TesterOptions.h
test/apitester/TesterScheduler.cpp
test/apitester/TesterScheduler.h
test/apitester/TesterTransactionExecutor.cpp
test/apitester/TesterTransactionExecutor.h
test/apitester/TesterUtil.cpp
test/apitester/TesterUtil.h
test/apitester/TesterWorkload.cpp
test/apitester/TesterWorkload.h
../../flow/SimpleOpt.h
)
if(OPEN_FOR_IDE)
add_library(fdb_c_performance_test OBJECT test/performance_test.c test/test.h)
add_library(fdb_c_ryw_benchmark OBJECT test/ryw_benchmark.c test/test.h)
@ -106,6 +130,7 @@ if(NOT WIN32)
add_library(fdb_c_unit_tests_version_510 OBJECT ${UNIT_TEST_VERSION_510_SRCS})
add_library(trace_partial_file_suffix_test OBJECT ${TRACE_PARTIAL_FILE_SUFFIX_TEST_SRCS})
add_library(disconnected_timeout_unit_tests OBJECT ${DISCONNECTED_TIMEOUT_UNIT_TEST_SRCS})
add_library(fdb_c_api_tester OBJECT ${API_TESTER_SRCS})
else()
add_executable(fdb_c_performance_test test/performance_test.c test/test.h)
add_executable(fdb_c_ryw_benchmark test/ryw_benchmark.c test/test.h)
@ -116,6 +141,7 @@ if(NOT WIN32)
add_executable(fdb_c_unit_tests_version_510 ${UNIT_TEST_VERSION_510_SRCS})
add_executable(trace_partial_file_suffix_test ${TRACE_PARTIAL_FILE_SUFFIX_TEST_SRCS})
add_executable(disconnected_timeout_unit_tests ${DISCONNECTED_TIMEOUT_UNIT_TEST_SRCS})
add_executable(fdb_c_api_tester ${API_TESTER_SRCS})
strip_debug_symbols(fdb_c_performance_test)
strip_debug_symbols(fdb_c_ryw_benchmark)
strip_debug_symbols(fdb_c_txn_size_test)
@ -138,6 +164,12 @@ if(NOT WIN32)
target_link_libraries(trace_partial_file_suffix_test PRIVATE fdb_c Threads::Threads flow)
target_link_libraries(disconnected_timeout_unit_tests PRIVATE fdb_c Threads::Threads)
if(USE_SANITIZER)
target_link_libraries(fdb_c_api_tester PRIVATE fdb_c toml11_target Threads::Threads fmt::fmt boost_asan)
else()
target_link_libraries(fdb_c_api_tester PRIVATE fdb_c toml11_target Threads::Threads fmt::fmt boost_target)
endif()
# do not set RPATH for mako
set_property(TARGET mako PROPERTY SKIP_BUILD_RPATH TRUE)
target_link_libraries(mako PRIVATE fdb_c fdbclient)
@ -163,6 +195,7 @@ if(NOT WIN32)
add_custom_target(external_client DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libfdb_c_external.so)
add_dependencies(fdb_c_unit_tests external_client)
add_dependencies(disconnected_timeout_unit_tests external_client)
add_dependencies(fdb_c_api_tester external_client)
add_fdbclient_test(
NAME fdb_c_setup_tests
@ -200,6 +233,19 @@ if(NOT WIN32)
@CLUSTER_FILE@
${CMAKE_CURRENT_BINARY_DIR}/libfdb_c_external.so
)
add_fdbclient_test(
NAME fdb_c_api_tests
DISABLE_LOG_DUMP
COMMAND ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/run_c_api_tests.py
--cluster-file
@CLUSTER_FILE@
--tester-binary
$<TARGET_FILE:fdb_c_api_tester>
--external-client-library
${CMAKE_CURRENT_BINARY_DIR}/libfdb_c_external.so
--test-dir
${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests
)
endif()
set(c_workloads_srcs

View File

@ -37,12 +37,14 @@ int g_api_version = 0;
* FDBFuture -> ThreadSingleAssignmentVarBase
* FDBResult -> ThreadSingleAssignmentVarBase
* FDBDatabase -> IDatabase
* FDBTenant -> ITenant
* FDBTransaction -> ITransaction
*/
#define TSAVB(f) ((ThreadSingleAssignmentVarBase*)(f))
#define TSAV(T, f) ((ThreadSingleAssignmentVar<T>*)(f))
#define DB(d) ((IDatabase*)d)
#define TENANT(t) ((ITenant*)t)
#define TXN(t) ((ITransaction*)t)
// Legacy (pre API version 610)
@ -281,6 +283,16 @@ fdb_error_t fdb_future_get_keyvalue_array_v13(FDBFuture* f, FDBKeyValue const**
*out_count = rrr.size(););
}
extern "C" DLLEXPORT fdb_error_t fdb_future_get_mappedkeyvalue_array(FDBFuture* f,
FDBMappedKeyValue const** out_kvm,
int* out_count,
fdb_bool_t* out_more) {
CATCH_AND_RETURN(Standalone<MappedRangeResultRef> rrr = TSAV(Standalone<MappedRangeResultRef>, f)->get();
*out_kvm = (FDBMappedKeyValue*)rrr.begin();
*out_count = rrr.size();
*out_more = rrr.more;);
}
extern "C" DLLEXPORT fdb_error_t fdb_future_get_string_array(FDBFuture* f, const char*** out_strings, int* out_count) {
CATCH_AND_RETURN(Standalone<VectorRef<const char*>> na = TSAV(Standalone<VectorRef<const char*>>, f)->get();
*out_strings = (const char**)na.begin();
@ -376,6 +388,14 @@ extern "C" DLLEXPORT void fdb_database_destroy(FDBDatabase* d) {
CATCH_AND_DIE(DB(d)->delref(););
}
extern "C" DLLEXPORT fdb_error_t fdb_database_open_tenant(FDBDatabase* d,
uint8_t const* tenant_name,
int tenant_name_length,
FDBTenant** out_tenant) {
CATCH_AND_RETURN(*out_tenant =
(FDBTenant*)DB(d)->openTenant(TenantNameRef(tenant_name, tenant_name_length)).extractPtr(););
}
extern "C" DLLEXPORT fdb_error_t fdb_database_create_transaction(FDBDatabase* d, FDBTransaction** out_transaction) {
CATCH_AND_RETURN(Reference<ITransaction> tr = DB(d)->createTransaction();
if (g_api_version <= 15) tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
@ -429,6 +449,17 @@ extern "C" DLLEXPORT FDBFuture* fdb_database_get_server_protocol(FDBDatabase* db
}).extractPtr());
}
extern "C" DLLEXPORT fdb_error_t fdb_tenant_create_transaction(FDBTenant* tenant, FDBTransaction** out_transaction) {
CATCH_AND_RETURN(*out_transaction = (FDBTransaction*)TENANT(tenant)->createTransaction().extractPtr(););
}
extern "C" DLLEXPORT void fdb_tenant_destroy(FDBTenant* tenant) {
try {
TENANT(tenant)->delref();
} catch (...) {
}
}
extern "C" DLLEXPORT void fdb_transaction_destroy(FDBTransaction* tr) {
try {
TXN(tr)->delref();
@ -571,29 +602,29 @@ FDBFuture* fdb_transaction_get_range_impl(FDBTransaction* tr,
.extractPtr());
}
FDBFuture* fdb_transaction_get_range_and_flat_map_impl(FDBTransaction* tr,
uint8_t const* begin_key_name,
int begin_key_name_length,
fdb_bool_t begin_or_equal,
int begin_offset,
uint8_t const* end_key_name,
int end_key_name_length,
fdb_bool_t end_or_equal,
int end_offset,
uint8_t const* mapper_name,
int mapper_name_length,
int limit,
int target_bytes,
FDBStreamingMode mode,
int iteration,
fdb_bool_t snapshot,
fdb_bool_t reverse) {
FDBFuture* fdb_transaction_get_mapped_range_impl(FDBTransaction* tr,
uint8_t const* begin_key_name,
int begin_key_name_length,
fdb_bool_t begin_or_equal,
int begin_offset,
uint8_t const* end_key_name,
int end_key_name_length,
fdb_bool_t end_or_equal,
int end_offset,
uint8_t const* mapper_name,
int mapper_name_length,
int limit,
int target_bytes,
FDBStreamingMode mode,
int iteration,
fdb_bool_t snapshot,
fdb_bool_t reverse) {
FDBFuture* r = validate_and_update_parameters(limit, target_bytes, mode, iteration, reverse);
if (r != nullptr)
return r;
return (
FDBFuture*)(TXN(tr)
->getRangeAndFlatMap(
->getMappedRange(
KeySelectorRef(KeyRef(begin_key_name, begin_key_name_length), begin_or_equal, begin_offset),
KeySelectorRef(KeyRef(end_key_name, end_key_name_length), end_or_equal, end_offset),
StringRef(mapper_name, mapper_name_length),
@ -604,23 +635,23 @@ FDBFuture* fdb_transaction_get_range_and_flat_map_impl(FDBTransaction* tr,
}
// TODO: Support FDB_API_ADDED in generate_asm.py and then this can be replaced with fdb_api_ptr_unimpl.
FDBFuture* fdb_transaction_get_range_and_flat_map_v699(FDBTransaction* tr,
uint8_t const* begin_key_name,
int begin_key_name_length,
fdb_bool_t begin_or_equal,
int begin_offset,
uint8_t const* end_key_name,
int end_key_name_length,
fdb_bool_t end_or_equal,
int end_offset,
uint8_t const* mapper_name,
int mapper_name_length,
int limit,
int target_bytes,
FDBStreamingMode mode,
int iteration,
fdb_bool_t snapshot,
fdb_bool_t reverse) {
FDBFuture* fdb_transaction_get_mapped_range_v699(FDBTransaction* tr,
uint8_t const* begin_key_name,
int begin_key_name_length,
fdb_bool_t begin_or_equal,
int begin_offset,
uint8_t const* end_key_name,
int end_key_name_length,
fdb_bool_t end_or_equal,
int end_offset,
uint8_t const* mapper_name,
int mapper_name_length,
int limit,
int target_bytes,
FDBStreamingMode mode,
int iteration,
fdb_bool_t snapshot,
fdb_bool_t reverse) {
fprintf(stderr, "UNIMPLEMENTED FDB API FUNCTION\n");
abort();
}
@ -857,7 +888,7 @@ extern "C" DLLEXPORT fdb_error_t fdb_select_api_version_impl(int runtime_version
// WARNING: use caution when implementing removed functions by calling public API functions. This can lead to
// undesired behavior when using the multi-version API. Instead, it is better to have both the removed and public
// functions call an internal implementation function. See fdb_create_database_impl for an example.
FDB_API_CHANGED(fdb_transaction_get_range_and_flat_map, 700);
FDB_API_CHANGED(fdb_transaction_get_mapped_range, 700);
FDB_API_REMOVED(fdb_future_get_version, 620);
FDB_API_REMOVED(fdb_create_cluster, 610);
FDB_API_REMOVED(fdb_cluster_create_database, 610);

View File

@ -67,6 +67,7 @@ extern "C" {
typedef struct FDB_future FDBFuture;
typedef struct FDB_result FDBResult;
typedef struct FDB_database FDBDatabase;
typedef struct FDB_tenant FDBTenant;
typedef struct FDB_transaction FDBTransaction;
typedef int fdb_error_t;
@ -113,6 +114,64 @@ typedef struct keyvalue {
int value_length;
} FDBKeyValue;
#endif
#pragma pack(pop)
/* Memory layout of KeySelectorRef. */
typedef struct keyselector {
FDBKey key;
/* orEqual and offset have not be tested in C binding. Just a placeholder. */
fdb_bool_t orEqual;
int offset;
} FDBKeySelector;
/* Memory layout of GetRangeReqAndResultRef. */
typedef struct getrangereqandresult {
FDBKeySelector begin;
FDBKeySelector end;
FDBKeyValue* data;
int m_size, m_capacity;
} FDBGetRangeReqAndResult;
/* Memory layout of MappedKeyValueRef.
Total 112 bytes
- key (12 bytes)
:74:8F:8E:5F:AE:7F:00:00
:4A:00:00:00
- value (12 bytes)
:70:8F:8E:5F:AE:7F:00:00
:00:00:00:00
- begin selector (20 bytes)
:30:8F:8E:5F:AE:7F:00:00
:2D:00:00:00
:00:7F:00:00
:01:00:00:00
- end selector (20 bytes)
:EC:8E:8E:5F:AE:7F:00:00
:2D:00:00:00
:00:2B:3C:60
:01:00:00:00
- vector (16 bytes)
:74:94:8E:5F:AE:7F:00:00
:01:00:00:00
:01:00:00:00
- buffer (32 bytes)
:00:20:D1:61:00:00:00:00
:00:00:00:00:00:00:00:00
:00:00:00:00:00:00:00:00
:01:00:00:00:AE:7F:00:00
*/
typedef struct mappedkeyvalue {
FDBKey key;
FDBKey value;
/* It's complicated to map a std::variant to C. For now we assume the underlying requests are always getRange and
* take the shortcut. */
FDBGetRangeReqAndResult getRange;
unsigned char buffer[32];
} FDBMappedKeyValue;
#pragma pack(push, 4)
typedef struct keyrange {
const uint8_t* begin_key;
int begin_key_length;
@ -176,6 +235,12 @@ DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_future_get_keyvalue_array(FDBFuture
int* out_count,
fdb_bool_t* out_more);
#endif
DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_future_get_mappedkeyvalue_array(FDBFuture* f,
FDBMappedKeyValue const** out_kv,
int* out_count,
fdb_bool_t* out_more);
DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_future_get_key_array(FDBFuture* f,
FDBKey const** out_key_array,
int* out_count);
@ -207,6 +272,11 @@ DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_database_set_option(FDBDatabase* d,
uint8_t const* value,
int value_length);
DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_database_open_tenant(FDBDatabase* d,
uint8_t const* tenant_name,
int tenant_name_length,
FDBTenant** out_tenant);
DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_database_create_transaction(FDBDatabase* d,
FDBTransaction** out_transaction);
@ -230,6 +300,11 @@ DLLEXPORT WARN_UNUSED_RESULT double fdb_database_get_main_thread_busyness(FDBDat
DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_database_get_server_protocol(FDBDatabase* db, uint64_t expected_version);
DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_tenant_create_transaction(FDBTenant* tenant,
FDBTransaction** out_transaction);
DLLEXPORT void fdb_tenant_destroy(FDBTenant* tenant);
DLLEXPORT void fdb_transaction_destroy(FDBTransaction* tr);
DLLEXPORT void fdb_transaction_cancel(FDBTransaction* tr);
@ -283,23 +358,23 @@ DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_get_range(FDBTransaction
fdb_bool_t reverse);
#endif
DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_get_range_and_flat_map(FDBTransaction* tr,
uint8_t const* begin_key_name,
int begin_key_name_length,
fdb_bool_t begin_or_equal,
int begin_offset,
uint8_t const* end_key_name,
int end_key_name_length,
fdb_bool_t end_or_equal,
int end_offset,
uint8_t const* mapper_name,
int mapper_name_length,
int limit,
int target_bytes,
FDBStreamingMode mode,
int iteration,
fdb_bool_t snapshot,
fdb_bool_t reverse);
DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_get_mapped_range(FDBTransaction* tr,
uint8_t const* begin_key_name,
int begin_key_name_length,
fdb_bool_t begin_or_equal,
int begin_offset,
uint8_t const* end_key_name,
int end_key_name_length,
fdb_bool_t end_or_equal,
int end_offset,
uint8_t const* mapper_name,
int mapper_name_length,
int limit,
int target_bytes,
FDBStreamingMode mode,
int iteration,
fdb_bool_t snapshot,
fdb_bool_t reverse);
DLLEXPORT void fdb_transaction_set(FDBTransaction* tr,
uint8_t const* key_name,

View File

@ -0,0 +1,129 @@
/*
* TesterApiWorkload.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "TesterApiWorkload.h"
#include "TesterUtil.h"
#include <fmt/format.h>
namespace FdbApiTester {
ApiWorkload::ApiWorkload(const WorkloadConfig& config) : WorkloadBase(config) {
minKeyLength = config.getIntOption("minKeyLength", 1);
maxKeyLength = config.getIntOption("maxKeyLength", 64);
minValueLength = config.getIntOption("minValueLength", 1);
maxValueLength = config.getIntOption("maxValueLength", 1000);
maxKeysPerTransaction = config.getIntOption("maxKeysPerTransaction", 50);
initialSize = config.getIntOption("initialSize", 1000);
readExistingKeysRatio = config.getFloatOption("readExistingKeysRatio", 0.9);
keyPrefix = fmt::format("{}/", workloadId);
}
void ApiWorkload::start() {
schedule([this]() {
// 1. Clear data
clearData([this]() {
// 2. Populate initial data
populateData([this]() {
// 3. Generate random workload
runTests();
});
});
});
}
std::string ApiWorkload::randomKeyName() {
return keyPrefix + Random::get().randomStringLowerCase(minKeyLength, maxKeyLength);
}
std::string ApiWorkload::randomValue() {
return Random::get().randomStringLowerCase(minValueLength, maxValueLength);
}
std::string ApiWorkload::randomNotExistingKey() {
while (true) {
std::string key = randomKeyName();
if (!store.exists(key)) {
return key;
}
}
}
std::string ApiWorkload::randomExistingKey() {
std::string genKey = randomKeyName();
std::string key = store.getKey(genKey, true, 1);
if (key != store.endKey()) {
return key;
}
key = store.getKey(genKey, true, 0);
if (key != store.startKey()) {
return key;
}
info("No existing key found, using a new random key.");
return genKey;
}
std::string ApiWorkload::randomKey(double existingKeyRatio) {
if (Random::get().randomBool(existingKeyRatio)) {
return randomExistingKey();
} else {
return randomNotExistingKey();
}
}
void ApiWorkload::populateDataTx(TTaskFct cont) {
int numKeys = maxKeysPerTransaction;
auto kvPairs = std::make_shared<std::vector<KeyValue>>();
for (int i = 0; i < numKeys; i++) {
kvPairs->push_back(KeyValue{ randomNotExistingKey(), randomValue() });
}
execTransaction(
[kvPairs](auto ctx) {
for (const KeyValue& kv : *kvPairs) {
ctx->tx()->set(kv.key, kv.value);
}
ctx->commit();
},
[this, kvPairs, cont]() {
for (const KeyValue& kv : *kvPairs) {
store.set(kv.key, kv.value);
}
schedule(cont);
});
}
void ApiWorkload::clearData(TTaskFct cont) {
execTransaction(
[this](auto ctx) {
ctx->tx()->clearRange(keyPrefix, fmt::format("{}\xff", keyPrefix));
ctx->commit();
},
[this, cont]() { schedule(cont); });
}
void ApiWorkload::populateData(TTaskFct cont) {
if (store.size() < initialSize) {
populateDataTx([this, cont]() { populateData(cont); });
} else {
info("Data population completed");
schedule(cont);
}
}
} // namespace FdbApiTester

View File

@ -0,0 +1,89 @@
/*
* TesterApiWorkload.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef APITESTER_API_WORKLOAD_H
#define APITESTER_API_WORKLOAD_H
#include "TesterWorkload.h"
#include "TesterKeyValueStore.h"
namespace FdbApiTester {
/**
* Base class for implementing API testing workloads.
* Provides various helper methods and reusable configuration parameters
*/
class ApiWorkload : public WorkloadBase {
public:
void start() override;
// Method to be overridden to run specific tests
virtual void runTests() = 0;
protected:
// The minimum length of a key
int minKeyLength;
// The maximum length of a key
int maxKeyLength;
// The minimum length of a value
int minValueLength;
// The maximum length of a value
int maxValueLength;
// Maximum number of keys to be accessed by a transaction
int maxKeysPerTransaction;
// Initial data size (number of key-value pairs)
int initialSize;
// The ratio of reading existing keys
double readExistingKeysRatio;
// Key prefix
std::string keyPrefix;
// In-memory store maintaining expected database state
KeyValueStore store;
ApiWorkload(const WorkloadConfig& config);
// Methods for generating random keys and values
std::string randomKeyName();
std::string randomValue();
std::string randomNotExistingKey();
std::string randomExistingKey();
std::string randomKey(double existingKeyRatio);
// Generate initial random data for the workload
void populateData(TTaskFct cont);
// Clear the data of the workload
void clearData(TTaskFct cont);
private:
void populateDataTx(TTaskFct cont);
};
} // namespace FdbApiTester
#endif

View File

@ -0,0 +1,124 @@
/*
* TesterApiWrapper.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "TesterApiWrapper.h"
#include "TesterUtil.h"
#include <cstdint>
#include <fmt/format.h>
namespace FdbApiTester {
namespace {
void fdb_check(fdb_error_t e) {
if (e) {
fmt::print(stderr, "Unexpected error: %s\n", fdb_get_error(e));
std::abort();
}
}
} // namespace
Future::Future(FDBFuture* f) : future_(f, fdb_future_destroy) {}
void Future::reset() {
future_.reset();
}
void Future::cancel() {
ASSERT(future_);
fdb_future_cancel(future_.get());
}
fdb_error_t Future::getError() const {
ASSERT(future_);
return fdb_future_get_error(future_.get());
}
std::optional<std::string> ValueFuture::getValue() const {
ASSERT(future_);
int out_present;
const std::uint8_t* val;
int vallen;
fdb_check(fdb_future_get_value(future_.get(), &out_present, &val, &vallen));
return out_present ? std::make_optional(std::string((const char*)val, vallen)) : std::nullopt;
}
// Given an FDBDatabase, initializes a new transaction.
Transaction::Transaction(FDBTransaction* tx) : tx_(tx, fdb_transaction_destroy) {}
ValueFuture Transaction::get(std::string_view key, fdb_bool_t snapshot) {
ASSERT(tx_);
return ValueFuture(fdb_transaction_get(tx_.get(), (const uint8_t*)key.data(), key.size(), snapshot));
}
void Transaction::set(std::string_view key, std::string_view value) {
ASSERT(tx_);
fdb_transaction_set(tx_.get(), (const uint8_t*)key.data(), key.size(), (const uint8_t*)value.data(), value.size());
}
void Transaction::clear(std::string_view key) {
ASSERT(tx_);
fdb_transaction_clear(tx_.get(), (const uint8_t*)key.data(), key.size());
}
void Transaction::clearRange(std::string_view begin, std::string_view end) {
ASSERT(tx_);
fdb_transaction_clear_range(
tx_.get(), (const uint8_t*)begin.data(), begin.size(), (const uint8_t*)end.data(), end.size());
}
Future Transaction::commit() {
ASSERT(tx_);
return Future(fdb_transaction_commit(tx_.get()));
}
void Transaction::cancel() {
ASSERT(tx_);
fdb_transaction_cancel(tx_.get());
}
Future Transaction::onError(fdb_error_t err) {
ASSERT(tx_);
return Future(fdb_transaction_on_error(tx_.get(), err));
}
void Transaction::reset() {
ASSERT(tx_);
fdb_transaction_reset(tx_.get());
}
fdb_error_t Transaction::setOption(FDBTransactionOption option) {
ASSERT(tx_);
return fdb_transaction_set_option(tx_.get(), option, reinterpret_cast<const uint8_t*>(""), 0);
}
fdb_error_t FdbApi::setOption(FDBNetworkOption option, std::string_view value) {
return fdb_network_set_option(option, reinterpret_cast<const uint8_t*>(value.data()), value.size());
}
fdb_error_t FdbApi::setOption(FDBNetworkOption option, int64_t value) {
return fdb_network_set_option(option, reinterpret_cast<const uint8_t*>(&value), sizeof(value));
}
fdb_error_t FdbApi::setOption(FDBNetworkOption option) {
return fdb_network_set_option(option, reinterpret_cast<const uint8_t*>(""), 0);
}
} // namespace FdbApiTester

View File

@ -0,0 +1,92 @@
/*
* TesterApiWrapper.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#ifndef APITESTER_API_WRAPPER_H
#define APITESTER_API_WRAPPER_H
#include <string_view>
#include <optional>
#include <memory>
#define FDB_API_VERSION 710
#include "bindings/c/foundationdb/fdb_c.h"
#undef ERROR
#define ERROR(name, number, description) enum { error_code_##name = number };
#include "flow/error_definitions.h"
namespace FdbApiTester {
// Wrapper parent class to manage memory of an FDBFuture pointer. Cleans up
// FDBFuture when this instance goes out of scope.
class Future {
public:
Future() = default;
Future(FDBFuture* f);
FDBFuture* fdbFuture() { return future_.get(); };
fdb_error_t getError() const;
explicit operator bool() const { return future_ != nullptr; };
void reset();
void cancel();
protected:
std::shared_ptr<FDBFuture> future_;
};
class ValueFuture : public Future {
public:
ValueFuture() = default;
ValueFuture(FDBFuture* f) : Future(f) {}
std::optional<std::string> getValue() const;
};
class Transaction {
public:
Transaction() = default;
Transaction(FDBTransaction* tx);
ValueFuture get(std::string_view key, fdb_bool_t snapshot);
void set(std::string_view key, std::string_view value);
void clear(std::string_view key);
void clearRange(std::string_view begin, std::string_view end);
Future commit();
void cancel();
Future onError(fdb_error_t err);
void reset();
fdb_error_t setOption(FDBTransactionOption option);
private:
std::shared_ptr<FDBTransaction> tx_;
};
class FdbApi {
public:
static fdb_error_t setOption(FDBNetworkOption option, std::string_view value);
static fdb_error_t setOption(FDBNetworkOption option, int64_t value);
static fdb_error_t setOption(FDBNetworkOption option);
};
} // namespace FdbApiTester
#endif

View File

@ -0,0 +1,113 @@
/*
* TesterCancelTransactionWorkload.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "TesterApiWorkload.h"
#include "TesterUtil.h"
namespace FdbApiTester {
class CancelTransactionWorkload : public ApiWorkload {
public:
CancelTransactionWorkload(const WorkloadConfig& config) : ApiWorkload(config) {
numRandomOperations = config.getIntOption("numRandomOperations", 1000);
numOpLeft = numRandomOperations;
}
void runTests() override { randomOperations(); }
private:
enum OpType { OP_CANCEL_GET, OP_CANCEL_AFTER_FIRST_GET, OP_LAST = OP_CANCEL_AFTER_FIRST_GET };
// The number of operations to be executed
int numRandomOperations;
// Operations counter
int numOpLeft;
// Start multiple concurrent gets and cancel the transaction
void randomCancelGetTx(TTaskFct cont) {
int numKeys = Random::get().randomInt(1, maxKeysPerTransaction);
auto keys = std::make_shared<std::vector<std::string>>();
for (int i = 0; i < numKeys; i++) {
keys->push_back(randomKey(readExistingKeysRatio));
}
execTransaction(
[keys](auto ctx) {
std::vector<Future> futures;
for (const auto& key : *keys) {
futures.push_back(ctx->tx()->get(key, false));
}
ctx->done();
},
[this, cont]() { schedule(cont); });
}
// Start multiple concurrent gets and cancel the transaction after the first get returns
void randomCancelAfterFirstResTx(TTaskFct cont) {
int numKeys = Random::get().randomInt(1, maxKeysPerTransaction);
auto keys = std::make_shared<std::vector<std::string>>();
for (int i = 0; i < numKeys; i++) {
keys->push_back(randomKey(readExistingKeysRatio));
}
execTransaction(
[this, keys](auto ctx) {
std::vector<ValueFuture> futures;
for (const auto& key : *keys) {
futures.push_back(ctx->tx()->get(key, false));
}
for (int i = 0; i < keys->size(); i++) {
ValueFuture f = futures[i];
auto expectedVal = store.get((*keys)[i]);
ctx->continueAfter(f, [expectedVal, f, this, ctx]() {
auto val = f.getValue();
if (expectedVal != val) {
error(fmt::format(
"cancelAfterFirstResTx mismatch. expected: {:.80} actual: {:.80}", expectedVal, val));
}
ctx->done();
});
}
},
[this, cont]() { schedule(cont); });
}
void randomOperation(TTaskFct cont) {
OpType txType = (OpType)Random::get().randomInt(0, OP_LAST);
switch (txType) {
case OP_CANCEL_GET:
randomCancelGetTx(cont);
break;
case OP_CANCEL_AFTER_FIRST_GET:
randomCancelAfterFirstResTx(cont);
break;
}
}
void randomOperations() {
if (numOpLeft == 0)
return;
numOpLeft--;
randomOperation([this]() { randomOperations(); });
}
};
WorkloadFactory<CancelTransactionWorkload> MiscTestWorkloadFactory("CancelTransaction");
} // namespace FdbApiTester

View File

@ -0,0 +1,227 @@
/*
* TesterCorrectnessWorkload.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "TesterApiWorkload.h"
#include "TesterUtil.h"
#include <memory>
#include <fmt/format.h>
namespace FdbApiTester {
class ApiCorrectnessWorkload : public ApiWorkload {
public:
ApiCorrectnessWorkload(const WorkloadConfig& config) : ApiWorkload(config) {
numRandomOperations = config.getIntOption("numRandomOperations", 1000);
numOpLeft = numRandomOperations;
}
void runTests() override { randomOperations(); }
private:
enum OpType { OP_INSERT, OP_GET, OP_CLEAR, OP_CLEAR_RANGE, OP_COMMIT_READ, OP_LAST = OP_COMMIT_READ };
// The number of operations to be executed
int numRandomOperations;
// Operations counter
int numOpLeft;
void randomInsertOp(TTaskFct cont) {
int numKeys = Random::get().randomInt(1, maxKeysPerTransaction);
auto kvPairs = std::make_shared<std::vector<KeyValue>>();
for (int i = 0; i < numKeys; i++) {
kvPairs->push_back(KeyValue{ randomNotExistingKey(), randomValue() });
}
execTransaction(
[kvPairs](auto ctx) {
for (const KeyValue& kv : *kvPairs) {
ctx->tx()->set(kv.key, kv.value);
}
ctx->commit();
},
[this, kvPairs, cont]() {
for (const KeyValue& kv : *kvPairs) {
store.set(kv.key, kv.value);
}
schedule(cont);
});
}
void randomCommitReadOp(TTaskFct cont) {
int numKeys = Random::get().randomInt(1, maxKeysPerTransaction);
auto kvPairs = std::make_shared<std::vector<KeyValue>>();
for (int i = 0; i < numKeys; i++) {
kvPairs->push_back(KeyValue{ randomKey(readExistingKeysRatio), randomValue() });
}
execTransaction(
[kvPairs](auto ctx) {
for (const KeyValue& kv : *kvPairs) {
ctx->tx()->set(kv.key, kv.value);
}
ctx->commit();
},
[this, kvPairs, cont]() {
for (const KeyValue& kv : *kvPairs) {
store.set(kv.key, kv.value);
}
auto results = std::make_shared<std::vector<std::optional<std::string>>>();
execTransaction(
[kvPairs, results](auto ctx) {
// TODO: Enable after merging with GRV caching
// ctx->tx()->setOption(FDB_TR_OPTION_USE_GRV_CACHE);
auto futures = std::make_shared<std::vector<Future>>();
for (const auto& kv : *kvPairs) {
futures->push_back(ctx->tx()->get(kv.key, false));
}
ctx->continueAfterAll(*futures, [ctx, futures, results]() {
results->clear();
for (auto& f : *futures) {
results->push_back(((ValueFuture&)f).getValue());
}
ASSERT(results->size() == futures->size());
ctx->done();
});
},
[this, kvPairs, results, cont]() {
ASSERT(results->size() == kvPairs->size());
for (int i = 0; i < kvPairs->size(); i++) {
auto expected = store.get((*kvPairs)[i].key);
auto actual = (*results)[i];
if (actual != expected) {
error(
fmt::format("randomCommitReadOp mismatch. key: {} expected: {:.80} actual: {:.80}",
(*kvPairs)[i].key,
expected,
actual));
ASSERT(false);
}
}
schedule(cont);
});
});
}
void randomGetOp(TTaskFct cont) {
int numKeys = Random::get().randomInt(1, maxKeysPerTransaction);
auto keys = std::make_shared<std::vector<std::string>>();
auto results = std::make_shared<std::vector<std::optional<std::string>>>();
for (int i = 0; i < numKeys; i++) {
keys->push_back(randomKey(readExistingKeysRatio));
}
execTransaction(
[keys, results](auto ctx) {
auto futures = std::make_shared<std::vector<Future>>();
for (const auto& key : *keys) {
futures->push_back(ctx->tx()->get(key, false));
}
ctx->continueAfterAll(*futures, [ctx, futures, results]() {
results->clear();
for (auto& f : *futures) {
results->push_back(((ValueFuture&)f).getValue());
}
ASSERT(results->size() == futures->size());
ctx->done();
});
},
[this, keys, results, cont]() {
ASSERT(results->size() == keys->size());
for (int i = 0; i < keys->size(); i++) {
auto expected = store.get((*keys)[i]);
if ((*results)[i] != expected) {
error(fmt::format("randomGetOp mismatch. key: {} expected: {:.80} actual: {:.80}",
(*keys)[i],
expected,
(*results)[i]));
}
}
schedule(cont);
});
}
void randomClearOp(TTaskFct cont) {
int numKeys = Random::get().randomInt(1, maxKeysPerTransaction);
auto keys = std::make_shared<std::vector<std::string>>();
for (int i = 0; i < numKeys; i++) {
keys->push_back(randomExistingKey());
}
execTransaction(
[keys](auto ctx) {
for (const auto& key : *keys) {
ctx->tx()->clear(key);
}
ctx->commit();
},
[this, keys, cont]() {
for (const auto& key : *keys) {
store.clear(key);
}
schedule(cont);
});
}
void randomClearRangeOp(TTaskFct cont) {
std::string begin = randomKeyName();
std::string end = randomKeyName();
if (begin > end) {
std::swap(begin, end);
}
execTransaction(
[begin, end](auto ctx) {
ctx->tx()->clearRange(begin, end);
ctx->commit();
},
[this, begin, end, cont]() {
store.clear(begin, end);
schedule(cont);
});
}
void randomOperation(TTaskFct cont) {
OpType txType = (store.size() == 0) ? OP_INSERT : (OpType)Random::get().randomInt(0, OP_LAST);
switch (txType) {
case OP_INSERT:
randomInsertOp(cont);
break;
case OP_GET:
randomGetOp(cont);
break;
case OP_CLEAR:
randomClearOp(cont);
break;
case OP_CLEAR_RANGE:
randomClearRangeOp(cont);
break;
case OP_COMMIT_READ:
randomCommitReadOp(cont);
break;
}
}
void randomOperations() {
if (numOpLeft == 0)
return;
numOpLeft--;
randomOperation([this]() { randomOperations(); });
}
};
WorkloadFactory<ApiCorrectnessWorkload> ApiCorrectnessWorkloadFactory("ApiCorrectness");
} // namespace FdbApiTester

View File

@ -0,0 +1,167 @@
/*
* TesterKeyValueStore.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "TesterKeyValueStore.h"
namespace FdbApiTester {
// Get the value associated with a key
std::optional<std::string> KeyValueStore::get(std::string_view key) const {
std::unique_lock<std::mutex> lock(mutex);
auto value = store.find(std::string(key));
if (value != store.end())
return value->second;
else
return std::optional<std::string>();
}
// Checks if the key exists
bool KeyValueStore::exists(std::string_view key) {
std::unique_lock<std::mutex> lock(mutex);
return (store.find(std::string(key)) != store.end());
}
// Returns the key designated by a key selector
std::string KeyValueStore::getKey(std::string_view keyName, bool orEqual, int offset) const {
std::unique_lock<std::mutex> lock(mutex);
// Begin by getting the start key referenced by the key selector
std::map<std::string, std::string>::const_iterator mapItr = store.lower_bound(keyName);
// Update the iterator position if necessary based on the value of orEqual
int count = 0;
if (offset <= 0) {
if (mapItr == store.end() || keyName != mapItr->first || !orEqual) {
if (mapItr == store.begin())
return startKey();
mapItr--;
}
} else {
if (mapItr == store.end())
return endKey();
if (keyName == mapItr->first && orEqual) {
mapItr++;
}
count++;
}
// Increment the map iterator until the desired offset is reached
for (; count < abs(offset); count++) {
if (offset < 0) {
if (mapItr == store.begin())
break;
mapItr--;
} else {
if (mapItr == store.end())
break;
mapItr++;
}
}
if (mapItr == store.end())
return endKey();
else if (count == abs(offset))
return mapItr->first;
else
return startKey();
}
// Gets a range of key-value pairs, returning a maximum of <limit> results
std::vector<KeyValue> KeyValueStore::getRange(std::string_view begin,
std::string_view end,
int limit,
bool reverse) const {
std::unique_lock<std::mutex> lock(mutex);
std::vector<KeyValue> results;
if (!reverse) {
std::map<std::string, std::string>::const_iterator mapItr = store.lower_bound(begin);
for (; mapItr != store.end() && mapItr->first < end && results.size() < limit; mapItr++)
results.push_back(KeyValue{ mapItr->first, mapItr->second });
}
// Support for reverse getRange queries is supported, but not tested at this time. This is because reverse range
// queries have been disallowed by the database at the API level
else {
std::map<std::string, std::string>::const_iterator mapItr = store.lower_bound(end);
if (mapItr == store.begin())
return results;
for (--mapItr; mapItr->first >= begin && results.size() < abs(limit); mapItr--) {
results.push_back(KeyValue{ mapItr->first, mapItr->second });
if (mapItr == store.begin())
break;
}
}
return results;
}
// Stores a key-value pair in the database
void KeyValueStore::set(std::string_view key, std::string_view value) {
std::unique_lock<std::mutex> lock(mutex);
store[std::string(key)] = value;
}
// Removes a key from the database
void KeyValueStore::clear(std::string_view key) {
std::unique_lock<std::mutex> lock(mutex);
auto iter = store.find(key);
if (iter != store.end()) {
store.erase(iter);
}
}
// Removes a range of keys from the database
void KeyValueStore::clear(std::string_view begin, std::string_view end) {
std::unique_lock<std::mutex> lock(mutex);
store.erase(store.lower_bound(begin), store.lower_bound(end));
}
// The number of keys in the database
uint64_t KeyValueStore::size() const {
std::unique_lock<std::mutex> lock(mutex);
return store.size();
}
// The first key in the database; returned by key selectors that choose a key off the front
std::string KeyValueStore::startKey() const {
return "";
}
// The last key in the database; returned by key selectors that choose a key off the back
std::string KeyValueStore::endKey() const {
return "\xff";
}
// Debugging function that prints all key-value pairs
void KeyValueStore::printContents() const {
std::unique_lock<std::mutex> lock(mutex);
printf("Contents:\n");
std::map<std::string, std::string>::const_iterator mapItr;
for (mapItr = store.begin(); mapItr != store.end(); mapItr++)
printf("%s\n", mapItr->first.c_str());
}
} // namespace FdbApiTester

View File

@ -0,0 +1,83 @@
/*
* TesterKeyValueStore.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#ifndef APITESTER_KEY_VALUE_STORE_H
#define APITESTER_KEY_VALUE_STORE_H
#include <map>
#include <optional>
#include <string>
#include <string_view>
#include <vector>
#include <mutex>
namespace FdbApiTester {
struct KeyValue {
std::string key;
std::string value;
};
class KeyValueStore {
public:
// Get the value associated with a key
std::optional<std::string> get(std::string_view key) const;
// Checks if the key exists
bool exists(std::string_view key);
// Returns the key designated by a key selector
std::string getKey(std::string_view keyName, bool orEqual, int offset) const;
// Gets a range of key-value pairs, returning a maximum of <limit> results
std::vector<KeyValue> getRange(std::string_view begin, std::string_view end, int limit, bool reverse) const;
// Stores a key-value pair in the database
void set(std::string_view key, std::string_view value);
// Removes a key from the database
void clear(std::string_view key);
// Removes a range of keys from the database
void clear(std::string_view begin, std::string_view end);
// The number of keys in the database
uint64_t size() const;
// The first key in the database; returned by key selectors that choose a key off the front
std::string startKey() const;
// The last key in the database; returned by key selectors that choose a key off the back
std::string endKey() const;
// Debugging function that prints all key-value pairs
void printContents() const;
private:
// A map holding the key-value pairs
std::map<std::string, std::string, std::less<>> store;
mutable std::mutex mutex;
};
} // namespace FdbApiTester
#endif

View File

@ -0,0 +1,49 @@
/*
* TesterOptions.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#ifndef APITESTER_TESTER_OPTIONS_H
#define APITESTER_TESTER_OPTIONS_H
#include "TesterTestSpec.h"
namespace FdbApiTester {
class TesterOptions {
public:
std::string clusterFile;
bool trace = false;
std::string traceDir;
std::string traceFormat;
std::string logGroup;
std::string externalClientLibrary;
std::string testFile;
int numFdbThreads;
int numClientThreads;
int numDatabases;
int numClients;
std::vector<std::pair<std::string, std::string>> knobs;
TestSpec testSpec;
};
} // namespace FdbApiTester
#endif

View File

@ -0,0 +1,67 @@
/*
* TesterScheduler.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "TesterScheduler.h"
#include "TesterUtil.h"
#include <memory>
#include <thread>
#include <boost/asio.hpp>
using namespace boost::asio;
namespace FdbApiTester {
const TTaskFct NO_OP_TASK = []() {};
class AsioScheduler : public IScheduler {
public:
AsioScheduler(int numThreads) : numThreads(numThreads) {}
void start() override {
work = require(io_ctx.get_executor(), execution::outstanding_work.tracked);
for (int i = 0; i < numThreads; i++) {
threads.emplace_back([this]() { io_ctx.run(); });
}
}
void schedule(TTaskFct task) override { post(io_ctx, task); }
void stop() override { work = any_io_executor(); }
void join() override {
for (auto& th : threads) {
th.join();
}
}
private:
int numThreads;
std::vector<std::thread> threads;
io_context io_ctx;
any_io_executor work;
};
std::unique_ptr<IScheduler> createScheduler(int numThreads) {
ASSERT(numThreads > 0 && numThreads <= 1000);
return std::make_unique<AsioScheduler>(numThreads);
}
} // namespace FdbApiTester

View File

@ -0,0 +1,60 @@
/*
* TesterScheduler.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#ifndef APITESTER_SCHEDULER_H
#define APITESTER_SCHEDULER_H
#include <functional>
#include <memory>
namespace FdbApiTester {
using TTaskFct = std::function<void(void)>;
extern const TTaskFct NO_OP_TASK;
/**
* Scheduler for asynchronous execution of tasks on a pool of threads
*/
class IScheduler {
public:
virtual ~IScheduler() {}
// Create scheduler threads and begin accepting tasks
virtual void start() = 0;
// Schedule a task for asynchronous execution
virtual void schedule(TTaskFct task) = 0;
// Gracefully stop the scheduler. Waits for already running tasks to be finish
virtual void stop() = 0;
// Join with all threads of the scheduler
virtual void join() = 0;
};
// create a scheduler using given number of threads
std::unique_ptr<IScheduler> createScheduler(int numThreads);
} // namespace FdbApiTester
#endif

View File

@ -0,0 +1,169 @@
/*
* TesterTestSpec.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "TesterTestSpec.h"
#include "TesterUtil.h"
#include <toml.hpp>
#include <fmt/format.h>
#include <functional>
namespace FdbApiTester {
namespace {
void processIntOption(const std::string& value, const std::string& optionName, int& res, int minVal, int maxVal) {
char* endptr;
res = strtol(value.c_str(), &endptr, 10);
if (*endptr != '\0') {
throw TesterError(fmt::format("Invalid test file. Invalid value {} for {}", value, optionName));
}
if (res < minVal || res > maxVal) {
throw TesterError(
fmt::format("Invalid test file. Value for {} must be between {} and {}", optionName, minVal, maxVal));
}
}
std::unordered_map<std::string, std::function<void(const std::string& value, TestSpec* spec)>> testSpecTestKeys = {
{ "title",
[](const std::string& value, TestSpec* spec) { //
spec->title = value;
} },
{ "apiVersion",
[](const std::string& value, TestSpec* spec) { //
processIntOption(value, "apiVersion", spec->apiVersion, 700, 710);
} },
{ "blockOnFutures",
[](const std::string& value, TestSpec* spec) { //
spec->blockOnFutures = (value == "true");
} },
{ "buggify",
[](const std::string& value, TestSpec* spec) { //
spec->buggify = (value == "true");
} },
{ "multiThreaded",
[](const std::string& value, TestSpec* spec) { //
spec->multiThreaded = (value == "true");
} },
{ "fdbCallbacksOnExternalThreads",
[](const std::string& value, TestSpec* spec) { //
spec->fdbCallbacksOnExternalThreads = (value == "true");
} },
{ "databasePerTransaction",
[](const std::string& value, TestSpec* spec) { //
spec->databasePerTransaction = (value == "true");
} },
{ "minFdbThreads",
[](const std::string& value, TestSpec* spec) { //
processIntOption(value, "minFdbThreads", spec->minFdbThreads, 1, 1000);
} },
{ "maxFdbThreads",
[](const std::string& value, TestSpec* spec) { //
processIntOption(value, "maxFdbThreads", spec->maxFdbThreads, 1, 1000);
} },
{ "minClientThreads",
[](const std::string& value, TestSpec* spec) { //
processIntOption(value, "minClientThreads", spec->minClientThreads, 1, 1000);
} },
{ "maxClientThreads",
[](const std::string& value, TestSpec* spec) { //
processIntOption(value, "maxClientThreads", spec->maxClientThreads, 1, 1000);
} },
{ "minDatabases",
[](const std::string& value, TestSpec* spec) { //
processIntOption(value, "minDatabases", spec->minDatabases, 1, 1000);
} },
{ "maxDatabases",
[](const std::string& value, TestSpec* spec) { //
processIntOption(value, "maxDatabases", spec->maxDatabases, 1, 1000);
} },
{ "minClients",
[](const std::string& value, TestSpec* spec) { //
processIntOption(value, "minClients", spec->minClients, 1, 1000);
} },
{ "maxClients",
[](const std::string& value, TestSpec* spec) { //
processIntOption(value, "maxClients", spec->maxClients, 1, 1000);
} }
};
template <typename T>
std::string toml_to_string(const T& value) {
// TOML formatting converts numbers to strings exactly how they're in the file
// and thus, is equivalent to testspec. However, strings are quoted, so we
// must remove the quotes.
if (value.type() == toml::value_t::string) {
const std::string& formatted = toml::format(value);
return formatted.substr(1, formatted.size() - 2);
} else {
return toml::format(value);
}
}
} // namespace
TestSpec readTomlTestSpec(std::string fileName) {
TestSpec spec;
WorkloadSpec workloadSpec;
const toml::value& conf = toml::parse(fileName);
// Then parse each test
const toml::array& tests = toml::find(conf, "test").as_array();
if (tests.size() == 0) {
throw TesterError("Invalid test file. No [test] section found");
} else if (tests.size() > 1) {
throw TesterError("Invalid test file. More than one [test] section found");
}
const toml::value& test = tests[0];
// First handle all test-level settings
for (const auto& [k, v] : test.as_table()) {
if (k == "workload") {
continue;
}
if (testSpecTestKeys.find(k) != testSpecTestKeys.end()) {
testSpecTestKeys[k](toml_to_string(v), &spec);
} else {
throw TesterError(fmt::format(
"Invalid test file. Unrecognized test parameter. Name: {}, value {}", k, toml_to_string(v)));
}
}
// And then copy the workload attributes to spec.options
const toml::array& workloads = toml::find(test, "workload").as_array();
for (const toml::value& workload : workloads) {
workloadSpec = WorkloadSpec();
auto& options = workloadSpec.options;
for (const auto& [attrib, v] : workload.as_table()) {
options[attrib] = toml_to_string(v);
}
auto itr = options.find("name");
if (itr == options.end()) {
throw TesterError("Invalid test file. Unspecified workload name.");
}
workloadSpec.name = itr->second;
spec.workloads.push_back(workloadSpec);
}
return spec;
}
} // namespace FdbApiTester

View File

@ -0,0 +1,90 @@
/*
* TesterTestSpec.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#ifndef APITESTER_CONFIG_READER_H
#define APITESTER_CONFIG_READER_H
#include <string>
#include <unordered_map>
#include <vector>
#define FDB_API_VERSION 710
namespace FdbApiTester {
/// Workload specification
struct WorkloadSpec {
std::string name;
std::unordered_map<std::string, std::string> options;
};
// Test speficification loaded from a *.toml file
struct TestSpec {
// Title of the test
std::string title;
// FDB API version, using the latest version by default
int apiVersion = FDB_API_VERSION;
// Use blocking waits on futures instead of scheduling callbacks
bool blockOnFutures = false;
// Use multi-threaded FDB client
bool multiThreaded = false;
// Enable injection of errors in FDB client
bool buggify = false;
// Execute future callbacks on the threads of the external FDB library
// rather than on the main thread of the local FDB client library
bool fdbCallbacksOnExternalThreads = false;
// Execute each transaction in a separate database instance
bool databasePerTransaction = false;
// Size of the FDB client thread pool (a random number in the [min,max] range)
int minFdbThreads = 1;
int maxFdbThreads = 1;
// Size of the thread pool for test workloads (a random number in the [min,max] range)
int minClientThreads = 1;
int maxClientThreads = 1;
// Size of the database instance pool (a random number in the [min,max] range)
// Each transaction is assigned randomly to one of the databases in the pool
int minDatabases = 1;
int maxDatabases = 1;
// Number of workload clients (a random number in the [min,max] range)
int minClients = 1;
int maxClients = 10;
// List of workloads with their options
std::vector<WorkloadSpec> workloads;
};
// Read the test specfication from a *.toml file
TestSpec readTomlTestSpec(std::string fileName);
} // namespace FdbApiTester
#endif

View File

@ -0,0 +1,471 @@
/*
* TesterTransactionExecutor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "TesterTransactionExecutor.h"
#include "TesterUtil.h"
#include "test/apitester/TesterScheduler.h"
#include <memory>
#include <unordered_map>
#include <mutex>
#include <atomic>
#include <chrono>
#include <thread>
#include <fmt/format.h>
namespace FdbApiTester {
void TransactionActorBase::complete(fdb_error_t err) {
error = err;
context = {};
}
void ITransactionContext::continueAfterAll(std::vector<Future> futures, TTaskFct cont) {
auto counter = std::make_shared<std::atomic<int>>(futures.size());
auto errorCode = std::make_shared<std::atomic<fdb_error_t>>(error_code_success);
auto thisPtr = shared_from_this();
for (auto& f : futures) {
continueAfter(
f,
[thisPtr, f, counter, errorCode, cont]() {
if (f.getError() != error_code_success) {
(*errorCode) = f.getError();
}
if (--(*counter) == 0) {
if (*errorCode == error_code_success) {
// all futures successful -> continue
cont();
} else {
// at least one future failed -> retry the transaction
thisPtr->onError(*errorCode);
}
}
},
false);
}
}
/**
* Transaction context base class, containing reusable functionality
*/
class TransactionContextBase : public ITransactionContext {
public:
TransactionContextBase(FDBTransaction* tx,
std::shared_ptr<ITransactionActor> txActor,
TTaskFct cont,
IScheduler* scheduler)
: fdbTx(tx), txActor(txActor), contAfterDone(cont), scheduler(scheduler), txState(TxState::IN_PROGRESS) {}
// A state machine:
// IN_PROGRESS -> (ON_ERROR -> IN_PROGRESS)* [-> ON_ERROR] -> DONE
enum class TxState { IN_PROGRESS, ON_ERROR, DONE };
Transaction* tx() override { return &fdbTx; }
// Set a continuation to be executed when a future gets ready
void continueAfter(Future f, TTaskFct cont, bool retryOnError) override { doContinueAfter(f, cont, retryOnError); }
// Complete the transaction with a commit
void commit() override {
std::unique_lock<std::mutex> lock(mutex);
if (txState != TxState::IN_PROGRESS) {
return;
}
lock.unlock();
Future f = fdbTx.commit();
auto thisRef = shared_from_this();
doContinueAfter(
f, [thisRef]() { thisRef->done(); }, true);
}
// Complete the transaction without a commit (for read transactions)
void done() override {
std::unique_lock<std::mutex> lock(mutex);
if (txState != TxState::IN_PROGRESS) {
return;
}
txState = TxState::DONE;
lock.unlock();
// cancel transaction so that any pending operations on it
// fail gracefully
fdbTx.cancel();
txActor->complete(error_code_success);
cleanUp();
contAfterDone();
}
protected:
virtual void doContinueAfter(Future f, TTaskFct cont, bool retryOnError) = 0;
// Clean up transaction state after completing the transaction
// Note that the object may live longer, because it is referenced
// by not yet triggered callbacks
virtual void cleanUp() {
ASSERT(txState == TxState::DONE);
ASSERT(!onErrorFuture);
txActor = {};
}
// Complete the transaction with an (unretriable) error
void transactionFailed(fdb_error_t err) {
ASSERT(err != error_code_success);
std::unique_lock<std::mutex> lock(mutex);
if (txState == TxState::DONE) {
return;
}
txState = TxState::DONE;
lock.unlock();
txActor->complete(err);
cleanUp();
contAfterDone();
}
// Handle result of an a transaction onError call
void handleOnErrorResult() {
ASSERT(txState == TxState::ON_ERROR);
fdb_error_t err = onErrorFuture.getError();
onErrorFuture = {};
if (err) {
transactionFailed(err);
} else {
std::unique_lock<std::mutex> lock(mutex);
txState = TxState::IN_PROGRESS;
lock.unlock();
txActor->start();
}
}
// FDB transaction
Transaction fdbTx;
// Actor implementing the transaction worklflow
std::shared_ptr<ITransactionActor> txActor;
// Mutex protecting access to shared mutable state
std::mutex mutex;
// Continuation to be called after completion of the transaction
TTaskFct contAfterDone;
// Reference to the scheduler
IScheduler* scheduler;
// Transaction execution state
TxState txState;
// onError future used in ON_ERROR state
Future onErrorFuture;
};
/**
* Transaction context using blocking waits to implement continuations on futures
*/
class BlockingTransactionContext : public TransactionContextBase {
public:
BlockingTransactionContext(FDBTransaction* tx,
std::shared_ptr<ITransactionActor> txActor,
TTaskFct cont,
IScheduler* scheduler)
: TransactionContextBase(tx, txActor, cont, scheduler) {}
protected:
void doContinueAfter(Future f, TTaskFct cont, bool retryOnError) override {
auto thisRef = std::static_pointer_cast<BlockingTransactionContext>(shared_from_this());
scheduler->schedule(
[thisRef, f, cont, retryOnError]() mutable { thisRef->blockingContinueAfter(f, cont, retryOnError); });
}
void blockingContinueAfter(Future f, TTaskFct cont, bool retryOnError) {
std::unique_lock<std::mutex> lock(mutex);
if (txState != TxState::IN_PROGRESS) {
return;
}
lock.unlock();
fdb_error_t err = fdb_future_block_until_ready(f.fdbFuture());
if (err) {
transactionFailed(err);
return;
}
err = f.getError();
if (err == error_code_transaction_cancelled) {
return;
}
if (err == error_code_success || !retryOnError) {
scheduler->schedule([cont]() { cont(); });
return;
}
onError(err);
}
virtual void onError(fdb_error_t err) override {
std::unique_lock<std::mutex> lock(mutex);
if (txState != TxState::IN_PROGRESS) {
// Ignore further errors, if the transaction is in the error handing mode or completed
return;
}
txState = TxState::ON_ERROR;
lock.unlock();
ASSERT(!onErrorFuture);
onErrorFuture = fdbTx.onError(err);
fdb_error_t err2 = fdb_future_block_until_ready(onErrorFuture.fdbFuture());
if (err2) {
transactionFailed(err2);
return;
}
auto thisRef = std::static_pointer_cast<BlockingTransactionContext>(shared_from_this());
scheduler->schedule([thisRef]() { thisRef->handleOnErrorResult(); });
}
};
/**
* Transaction context using callbacks to implement continuations on futures
*/
class AsyncTransactionContext : public TransactionContextBase {
public:
AsyncTransactionContext(FDBTransaction* tx,
std::shared_ptr<ITransactionActor> txActor,
TTaskFct cont,
IScheduler* scheduler)
: TransactionContextBase(tx, txActor, cont, scheduler) {}
protected:
void doContinueAfter(Future f, TTaskFct cont, bool retryOnError) override {
std::unique_lock<std::mutex> lock(mutex);
if (txState != TxState::IN_PROGRESS) {
return;
}
callbackMap[f.fdbFuture()] = CallbackInfo{ f, cont, shared_from_this(), retryOnError };
lock.unlock();
fdb_error_t err = fdb_future_set_callback(f.fdbFuture(), futureReadyCallback, this);
if (err) {
lock.lock();
callbackMap.erase(f.fdbFuture());
lock.unlock();
transactionFailed(err);
}
}
static void futureReadyCallback(FDBFuture* f, void* param) {
AsyncTransactionContext* txCtx = (AsyncTransactionContext*)param;
txCtx->onFutureReady(f);
}
void onFutureReady(FDBFuture* f) {
injectRandomSleep();
// Hold a reference to this to avoid it to be
// destroyed before releasing the mutex
auto thisRef = shared_from_this();
std::unique_lock<std::mutex> lock(mutex);
auto iter = callbackMap.find(f);
ASSERT(iter != callbackMap.end());
CallbackInfo cbInfo = iter->second;
callbackMap.erase(iter);
if (txState != TxState::IN_PROGRESS) {
return;
}
lock.unlock();
fdb_error_t err = fdb_future_get_error(f);
if (err == error_code_transaction_cancelled) {
return;
}
if (err == error_code_success || !cbInfo.retryOnError) {
scheduler->schedule(cbInfo.cont);
return;
}
onError(err);
}
virtual void onError(fdb_error_t err) override {
std::unique_lock<std::mutex> lock(mutex);
if (txState != TxState::IN_PROGRESS) {
// Ignore further errors, if the transaction is in the error handing mode or completed
return;
}
txState = TxState::ON_ERROR;
lock.unlock();
ASSERT(!onErrorFuture);
onErrorFuture = tx()->onError(err);
onErrorThisRef = std::static_pointer_cast<AsyncTransactionContext>(shared_from_this());
fdb_error_t err2 = fdb_future_set_callback(onErrorFuture.fdbFuture(), onErrorReadyCallback, this);
if (err2) {
onErrorFuture = {};
transactionFailed(err2);
}
}
static void onErrorReadyCallback(FDBFuture* f, void* param) {
AsyncTransactionContext* txCtx = (AsyncTransactionContext*)param;
txCtx->onErrorReady(f);
}
void onErrorReady(FDBFuture* f) {
injectRandomSleep();
auto thisRef = onErrorThisRef;
onErrorThisRef = {};
scheduler->schedule([thisRef]() { thisRef->handleOnErrorResult(); });
}
void cleanUp() override {
TransactionContextBase::cleanUp();
// Cancel all pending operations
// Note that the callbacks of the cancelled futures will still be called
std::unique_lock<std::mutex> lock(mutex);
std::vector<Future> futures;
for (auto& iter : callbackMap) {
futures.push_back(iter.second.future);
}
lock.unlock();
for (auto& f : futures) {
f.cancel();
}
}
// Inject a random sleep with a low probability
void injectRandomSleep() {
if (Random::get().randomBool(0.01)) {
std::this_thread::sleep_for(std::chrono::milliseconds(Random::get().randomInt(1, 5)));
}
}
// Object references for a future callback
struct CallbackInfo {
Future future;
TTaskFct cont;
std::shared_ptr<ITransactionContext> thisRef;
bool retryOnError;
};
// Map for keeping track of future waits and holding necessary object references
std::unordered_map<FDBFuture*, CallbackInfo> callbackMap;
// Holding reference to this for onError future C callback
std::shared_ptr<AsyncTransactionContext> onErrorThisRef;
};
/**
* Transaction executor base class, containing reusable functionality
*/
class TransactionExecutorBase : public ITransactionExecutor {
public:
TransactionExecutorBase(const TransactionExecutorOptions& options) : options(options), scheduler(nullptr) {}
void init(IScheduler* scheduler, const char* clusterFile) override {
this->scheduler = scheduler;
this->clusterFile = clusterFile;
}
protected:
// Execute the transaction on the given database instance
void executeOnDatabase(FDBDatabase* db, std::shared_ptr<ITransactionActor> txActor, TTaskFct cont) {
FDBTransaction* tx;
fdb_error_t err = fdb_database_create_transaction(db, &tx);
if (err != error_code_success) {
txActor->complete(err);
cont();
} else {
std::shared_ptr<ITransactionContext> ctx;
if (options.blockOnFutures) {
ctx = std::make_shared<BlockingTransactionContext>(tx, txActor, cont, scheduler);
} else {
ctx = std::make_shared<AsyncTransactionContext>(tx, txActor, cont, scheduler);
}
txActor->init(ctx);
txActor->start();
}
}
protected:
TransactionExecutorOptions options;
std::string clusterFile;
IScheduler* scheduler;
};
/**
* Transaction executor load balancing transactions over a fixed pool of databases
*/
class DBPoolTransactionExecutor : public TransactionExecutorBase {
public:
DBPoolTransactionExecutor(const TransactionExecutorOptions& options) : TransactionExecutorBase(options) {}
~DBPoolTransactionExecutor() override { release(); }
void init(IScheduler* scheduler, const char* clusterFile) override {
TransactionExecutorBase::init(scheduler, clusterFile);
for (int i = 0; i < options.numDatabases; i++) {
FDBDatabase* db;
fdb_error_t err = fdb_create_database(clusterFile, &db);
if (err != error_code_success) {
throw TesterError(fmt::format("Failed create database with the cluster file '{}'. Error: {}({})",
clusterFile,
err,
fdb_get_error(err)));
}
databases.push_back(db);
}
}
void execute(std::shared_ptr<ITransactionActor> txActor, TTaskFct cont) override {
int idx = Random::get().randomInt(0, options.numDatabases - 1);
executeOnDatabase(databases[idx], txActor, cont);
}
void release() {
for (FDBDatabase* db : databases) {
fdb_database_destroy(db);
}
}
private:
std::vector<FDBDatabase*> databases;
};
/**
* Transaction executor executing each transaction on a separate database
*/
class DBPerTransactionExecutor : public TransactionExecutorBase {
public:
DBPerTransactionExecutor(const TransactionExecutorOptions& options) : TransactionExecutorBase(options) {}
void execute(std::shared_ptr<ITransactionActor> txActor, TTaskFct cont) override {
FDBDatabase* db = nullptr;
fdb_error_t err = fdb_create_database(clusterFile.c_str(), &db);
if (err != error_code_success) {
txActor->complete(err);
cont();
}
executeOnDatabase(db, txActor, [cont, db]() {
fdb_database_destroy(db);
cont();
});
}
};
std::unique_ptr<ITransactionExecutor> createTransactionExecutor(const TransactionExecutorOptions& options) {
if (options.databasePerTransaction) {
return std::make_unique<DBPerTransactionExecutor>(options);
} else {
return std::make_unique<DBPoolTransactionExecutor>(options);
}
}
} // namespace FdbApiTester

View File

@ -0,0 +1,145 @@
/*
* TesterTransactionExecutor.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#ifndef APITESTER_TRANSACTION_EXECUTOR_H
#define APITESTER_TRANSACTION_EXECUTOR_H
#include "TesterOptions.h"
#include "TesterApiWrapper.h"
#include "TesterScheduler.h"
#include <string_view>
#include <memory>
namespace FdbApiTester {
/**
* Interface to be used for implementation of a concrete transaction
*/
class ITransactionContext : public std::enable_shared_from_this<ITransactionContext> {
public:
virtual ~ITransactionContext() {}
// Current FDB transaction
virtual Transaction* tx() = 0;
// Schedule a continuation to be executed when the future gets ready
// retryOnError controls whether transaction is retried in case of an error instead
// of calling the continuation
virtual void continueAfter(Future f, TTaskFct cont, bool retryOnError = true) = 0;
// Complete the transaction with a commit
virtual void commit() = 0;
// retry transaction on error
virtual void onError(fdb_error_t err) = 0;
// Mark the transaction as completed without committing it (for read transactions)
virtual void done() = 0;
// A continuation to be executed when all of the given futures get ready
virtual void continueAfterAll(std::vector<Future> futures, TTaskFct cont);
};
/**
* Interface of an actor object implementing a concrete transaction
*/
class ITransactionActor {
public:
virtual ~ITransactionActor() {}
// Initialize with the given transaction context
virtual void init(std::shared_ptr<ITransactionContext> ctx) = 0;
// Start execution of the transaction, also called on retries
virtual void start() = 0;
// Transaction completion result (error_code_success in case of success)
virtual fdb_error_t getErrorCode() = 0;
// Notification about the completion of the transaction
virtual void complete(fdb_error_t err) = 0;
};
/**
* A helper base class for transaction actors
*/
class TransactionActorBase : public ITransactionActor {
public:
void init(std::shared_ptr<ITransactionContext> ctx) override { context = ctx; }
fdb_error_t getErrorCode() override { return error; }
void complete(fdb_error_t err) override;
protected:
std::shared_ptr<ITransactionContext> ctx() { return context; }
private:
std::shared_ptr<ITransactionContext> context;
fdb_error_t error = error_code_success;
};
// Type of the lambda functions implementing a transaction
using TTxStartFct = std::function<void(std::shared_ptr<ITransactionContext>)>;
/**
* A wrapper class for transactions implemented by lambda functions
*/
class TransactionFct : public TransactionActorBase {
public:
TransactionFct(TTxStartFct startFct) : startFct(startFct) {}
void start() override { startFct(this->ctx()); }
private:
TTxStartFct startFct;
};
/**
* Configuration of transaction execution mode
*/
struct TransactionExecutorOptions {
// Use blocking waits on futures
bool blockOnFutures = false;
// Create each transaction in a separate database instance
bool databasePerTransaction = false;
// The size of the database instance pool
int numDatabases = 1;
};
/**
* Transaction executor provides an interface for executing transactions
* It is responsible for instantiating FDB databases and transactions and managing their lifecycle
* according to the provided options
*/
class ITransactionExecutor {
public:
virtual ~ITransactionExecutor() {}
virtual void init(IScheduler* sched, const char* clusterFile) = 0;
virtual void execute(std::shared_ptr<ITransactionActor> tx, TTaskFct cont) = 0;
};
// Create a transaction executor for the given options
std::unique_ptr<ITransactionExecutor> createTransactionExecutor(const TransactionExecutorOptions& options);
} // namespace FdbApiTester
#endif

View File

@ -0,0 +1,58 @@
/*
* TesterUtil.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "TesterUtil.h"
#include <cstdio>
namespace FdbApiTester {
Random::Random() {
std::random_device dev;
random.seed(dev());
}
int Random::randomInt(int min, int max) {
return std::uniform_int_distribution<int>(min, max)(random);
}
Random& Random::get() {
static thread_local Random random;
return random;
}
std::string Random::randomStringLowerCase(int minLength, int maxLength) {
int length = randomInt(minLength, maxLength);
std::string str;
str.reserve(length);
for (int i = 0; i < length; i++) {
str += (char)randomInt('a', 'z');
}
return str;
}
bool Random::randomBool(double trueRatio) {
return std::uniform_real_distribution<double>(0.0, 1.0)(random) <= trueRatio;
}
void print_internal_error(const char* msg, const char* file, int line) {
fprintf(stderr, "Assertion %s failed @ %s %d:\n", msg, file, line);
}
} // namespace FdbApiTester

View File

@ -0,0 +1,87 @@
/*
* TesterUtil.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#ifndef APITESTER_UTIL_H
#define APITESTER_UTIL_H
#include <random>
#include <ostream>
#include <optional>
#include <fmt/format.h>
namespace fmt {
template <typename T>
struct formatter<std::optional<T>> : fmt::formatter<T> {
template <typename FormatContext>
auto format(const std::optional<T>& opt, FormatContext& ctx) {
if (opt) {
fmt::formatter<T>::format(*opt, ctx);
return ctx.out();
}
return fmt::format_to(ctx.out(), "<empty>");
}
};
} // namespace fmt
namespace FdbApiTester {
class Random {
public:
Random();
static Random& get();
int randomInt(int min, int max);
std::string randomStringLowerCase(int minLength, int maxLength);
bool randomBool(double trueRatio);
std::mt19937 random;
};
class TesterError : public std::runtime_error {
public:
explicit TesterError(const char* message) : std::runtime_error(message) {}
explicit TesterError(const std::string& message) : std::runtime_error(message) {}
TesterError(const TesterError&) = default;
TesterError& operator=(const TesterError&) = default;
TesterError(TesterError&&) = default;
TesterError& operator=(TesterError&&) = default;
};
void print_internal_error(const char* msg, const char* file, int line);
#define ASSERT(condition) \
do { \
if (!(condition)) { \
print_internal_error(#condition, __FILE__, __LINE__); \
abort(); \
} \
} while (false) // For use in destructors, where throwing exceptions is extremely dangerous
} // namespace FdbApiTester
#endif

View File

@ -0,0 +1,184 @@
/*
* TesterWorkload.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "TesterWorkload.h"
#include "TesterUtil.h"
#include "test/apitester/TesterScheduler.h"
#include <cstdlib>
#include <memory>
#include <fmt/format.h>
#include <vector>
namespace FdbApiTester {
int WorkloadConfig::getIntOption(const std::string& name, int defaultVal) const {
auto iter = options.find(name);
if (iter == options.end()) {
return defaultVal;
} else {
char* endptr;
int intVal = strtol(iter->second.c_str(), &endptr, 10);
if (*endptr != '\0') {
throw TesterError(
fmt::format("Invalid workload configuration. Invalid value {} for {}", iter->second, name));
}
return intVal;
}
}
double WorkloadConfig::getFloatOption(const std::string& name, double defaultVal) const {
auto iter = options.find(name);
if (iter == options.end()) {
return defaultVal;
} else {
char* endptr;
double floatVal = strtod(iter->second.c_str(), &endptr);
if (*endptr != '\0') {
throw TesterError(
fmt::format("Invalid workload configuration. Invalid value {} for {}", iter->second, name));
}
return floatVal;
}
}
WorkloadBase::WorkloadBase(const WorkloadConfig& config)
: manager(nullptr), tasksScheduled(0), numErrors(0), clientId(config.clientId), numClients(config.numClients),
failed(false) {
maxErrors = config.getIntOption("maxErrors", 10);
workloadId = fmt::format("{}{}", config.name, clientId);
}
void WorkloadBase::init(WorkloadManager* manager) {
this->manager = manager;
}
void WorkloadBase::schedule(TTaskFct task) {
if (failed) {
return;
}
tasksScheduled++;
manager->scheduler->schedule([this, task]() {
task();
scheduledTaskDone();
});
}
void WorkloadBase::execTransaction(std::shared_ptr<ITransactionActor> tx, TTaskFct cont, bool failOnError) {
if (failed) {
return;
}
tasksScheduled++;
manager->txExecutor->execute(tx, [this, tx, cont, failOnError]() {
fdb_error_t err = tx->getErrorCode();
if (tx->getErrorCode() == error_code_success) {
cont();
} else {
std::string msg = fmt::format("Transaction failed with error: {} ({}})", err, fdb_get_error(err));
if (failOnError) {
error(msg);
failed = true;
} else {
info(msg);
cont();
}
}
scheduledTaskDone();
});
}
void WorkloadBase::info(const std::string& msg) {
fmt::print(stderr, "[{}] {}\n", workloadId, msg);
}
void WorkloadBase::error(const std::string& msg) {
fmt::print(stderr, "[{}] ERROR: {}\n", workloadId, msg);
numErrors++;
if (numErrors > maxErrors && !failed) {
fmt::print(stderr, "[{}] ERROR: Stopping workload after {} errors\n", workloadId, numErrors);
failed = true;
}
}
void WorkloadBase::scheduledTaskDone() {
if (--tasksScheduled == 0) {
if (numErrors > 0) {
error(fmt::format("Workload failed with {} errors", numErrors.load()));
} else {
info("Workload successfully completed");
}
manager->workloadDone(this, numErrors > 0);
}
}
void WorkloadManager::add(std::shared_ptr<IWorkload> workload, TTaskFct cont) {
std::unique_lock<std::mutex> lock(mutex);
workloads[workload.get()] = WorkloadInfo{ workload, cont };
}
void WorkloadManager::run() {
std::vector<std::shared_ptr<IWorkload>> initialWorkloads;
for (auto iter : workloads) {
initialWorkloads.push_back(iter.second.ref);
}
for (auto iter : initialWorkloads) {
iter->init(this);
}
for (auto iter : initialWorkloads) {
iter->start();
}
scheduler->join();
if (failed()) {
fmt::print(stderr, "{} workloads failed\n", numWorkloadsFailed);
} else {
fprintf(stderr, "All workloads succesfully completed\n");
}
}
void WorkloadManager::workloadDone(IWorkload* workload, bool failed) {
std::unique_lock<std::mutex> lock(mutex);
auto iter = workloads.find(workload);
ASSERT(iter != workloads.end());
lock.unlock();
iter->second.cont();
lock.lock();
workloads.erase(iter);
if (failed) {
numWorkloadsFailed++;
}
bool done = workloads.empty();
lock.unlock();
if (done) {
scheduler->stop();
}
}
std::shared_ptr<IWorkload> IWorkloadFactory::create(std::string const& name, const WorkloadConfig& config) {
auto it = factories().find(name);
if (it == factories().end())
return {}; // or throw?
return it->second->create(config);
}
std::unordered_map<std::string, IWorkloadFactory*>& IWorkloadFactory::factories() {
static std::unordered_map<std::string, IWorkloadFactory*> theFactories;
return theFactories;
}
} // namespace FdbApiTester

View File

@ -0,0 +1,205 @@
/*
* TesterWorkload.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <memory>
#ifndef APITESTER_WORKLOAD_H
#define APITESTER_WORKLOAD_H
#include "TesterTransactionExecutor.h"
#include "TesterUtil.h"
#include <atomic>
#include <unordered_map>
#include <mutex>
namespace FdbApiTester {
class WorkloadManager;
// Workoad interface
class IWorkload {
public:
virtual ~IWorkload() {}
// Intialize the workload
virtual void init(WorkloadManager* manager) = 0;
// Start executing the workload
virtual void start() = 0;
};
// Workload configuration
struct WorkloadConfig {
// Workoad name
std::string name;
// Client ID assigned to the workload (a number from 0 to numClients-1)
int clientId;
// Total number of clients
int numClients;
// Workload options: as key-value pairs
std::unordered_map<std::string, std::string> options;
// Get option of a certain type by name. Throws an exception if the values is of a wrong type
int getIntOption(const std::string& name, int defaultVal) const;
double getFloatOption(const std::string& name, double defaultVal) const;
};
// A base class for test workloads
// Tracks if workload is active, notifies the workload manager when the workload completes
class WorkloadBase : public IWorkload {
public:
WorkloadBase(const WorkloadConfig& config);
// Initialize the workload
void init(WorkloadManager* manager) override;
protected:
// Schedule the a task as a part of the workload
void schedule(TTaskFct task);
// Execute a transaction within the workload
void execTransaction(std::shared_ptr<ITransactionActor> tx, TTaskFct cont, bool failOnError = true);
// Execute a transaction within the workload, a convenience method for a tranasaction defined by a lambda function
void execTransaction(TTxStartFct start, TTaskFct cont, bool failOnError = true) {
execTransaction(std::make_shared<TransactionFct>(start), cont, failOnError);
}
// Log an error message, increase error counter
void error(const std::string& msg);
// Log an info message
void info(const std::string& msg);
private:
WorkloadManager* manager;
// Decrease scheduled task counter, notify the workload manager
// that the task is done if no more tasks schedule
void scheduledTaskDone();
// Keep track of tasks scheduled by the workload
// End workload when this number falls to 0
std::atomic<int> tasksScheduled;
// Number of errors logged
std::atomic<int> numErrors;
protected:
// Client ID assigned to the workload (a number from 0 to numClients-1)
int clientId;
// Total number of clients
int numClients;
// The maximum number of errors before stoppoing the workload
int maxErrors;
// Workload identifier, consisting of workload name and client ID
std::string workloadId;
// Workload is failed, no further transactions or continuations will be scheduled by the workload
std::atomic<bool> failed;
};
// Workload manager
// Keeps track of active workoads, stops the scheduler after all workloads complete
class WorkloadManager {
public:
WorkloadManager(ITransactionExecutor* txExecutor, IScheduler* scheduler)
: txExecutor(txExecutor), scheduler(scheduler), numWorkloadsFailed(0) {}
// Add a workload
// A continuation is to be specified for subworkloads
void add(std::shared_ptr<IWorkload> workload, TTaskFct cont = NO_OP_TASK);
// Run all workloads. Blocks until all workloads complete
void run();
// True if at least one workload has failed
bool failed() {
std::unique_lock<std::mutex> lock(mutex);
return numWorkloadsFailed > 0;
}
private:
friend WorkloadBase;
// Info about a running workload
struct WorkloadInfo {
// Reference to the workoad for ownership
std::shared_ptr<IWorkload> ref;
// Continuation to be executed after completing the workload
TTaskFct cont;
};
// To be called by a workload to notify that it is done
void workloadDone(IWorkload* workload, bool failed);
// Transaction executor to be used by the workloads
ITransactionExecutor* txExecutor;
// A scheduler to be used by the workloads
IScheduler* scheduler;
// Mutex protects access to workloads & numWorkloadsFailed
std::mutex mutex;
// A map of currently running workloads
std::unordered_map<IWorkload*, WorkloadInfo> workloads;
// Number of workloads failed
int numWorkloadsFailed;
};
// A workload factory
struct IWorkloadFactory {
// create a workload by name
static std::shared_ptr<IWorkload> create(std::string const& name, const WorkloadConfig& config);
// a singleton registry of workload factories
static std::unordered_map<std::string, IWorkloadFactory*>& factories();
// Interface to be implemented by a workload factory
virtual ~IWorkloadFactory() = default;
virtual std::shared_ptr<IWorkload> create(const WorkloadConfig& config) = 0;
};
/**
* A template for a workload factory for creating workloads of a certain type
*
* Declare a global instance of the factory for a workload type as follows:
* WorkloadFactory<MyWorkload> MyWorkloadFactory("myWorkload");
*/
template <class WorkloadType>
struct WorkloadFactory : IWorkloadFactory {
WorkloadFactory(const char* name) { factories()[name] = this; }
std::shared_ptr<IWorkload> create(const WorkloadConfig& config) override {
return std::make_shared<WorkloadType>(config);
}
};
} // namespace FdbApiTester
#endif

View File

@ -0,0 +1,284 @@
/*
* fdb_c_api_tester.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "TesterOptions.h"
#include "TesterWorkload.h"
#include "TesterScheduler.h"
#include "TesterTransactionExecutor.h"
#include "TesterTestSpec.h"
#include "TesterUtil.h"
#include "flow/SimpleOpt.h"
#include "bindings/c/foundationdb/fdb_c.h"
#include <memory>
#include <stdexcept>
#include <thread>
#include <fmt/format.h>
namespace FdbApiTester {
namespace {
enum TesterOptionId {
OPT_CONNFILE,
OPT_HELP,
OPT_TRACE,
OPT_TRACE_DIR,
OPT_LOGGROUP,
OPT_TRACE_FORMAT,
OPT_KNOB,
OPT_EXTERNAL_CLIENT_LIBRARY,
OPT_TEST_FILE
};
CSimpleOpt::SOption TesterOptionDefs[] = //
{ { OPT_CONNFILE, "-C", SO_REQ_SEP },
{ OPT_CONNFILE, "--cluster-file", SO_REQ_SEP },
{ OPT_TRACE, "--log", SO_NONE },
{ OPT_TRACE_DIR, "--log-dir", SO_REQ_SEP },
{ OPT_LOGGROUP, "--log-group", SO_REQ_SEP },
{ OPT_HELP, "-h", SO_NONE },
{ OPT_HELP, "--help", SO_NONE },
{ OPT_TRACE_FORMAT, "--trace-format", SO_REQ_SEP },
{ OPT_KNOB, "--knob-", SO_REQ_SEP },
{ OPT_EXTERNAL_CLIENT_LIBRARY, "--external-client-library", SO_REQ_SEP },
{ OPT_TEST_FILE, "-f", SO_REQ_SEP },
{ OPT_TEST_FILE, "--test-file", SO_REQ_SEP },
SO_END_OF_OPTIONS };
void printProgramUsage(const char* execName) {
printf("usage: %s [OPTIONS]\n"
"\n",
execName);
printf(" -C, --cluster-file FILE\n"
" The path of a file containing the connection string for the\n"
" FoundationDB cluster. The default is `fdb.cluster'\n"
" --log Enables trace file logging for the CLI session.\n"
" --log-dir PATH Specifes the output directory for trace files. If\n"
" unspecified, defaults to the current directory. Has\n"
" no effect unless --log is specified.\n"
" --log-group LOG_GROUP\n"
" Sets the LogGroup field with the specified value for all\n"
" events in the trace output (defaults to `default').\n"
" --trace-format FORMAT\n"
" Select the format of the log files. xml (the default) and json\n"
" are supported. Has no effect unless --log is specified.\n"
" --knob-KNOBNAME KNOBVALUE\n"
" Changes a knob option. KNOBNAME should be lowercase.\n"
" --external-client-library FILE\n"
" Path to the external client library.\n"
" -f, --test-file FILE\n"
" Test file to run.\n"
" -h, --help Display this help and exit.\n");
}
// Extracts the key for command line arguments that are specified with a prefix (e.g. --knob-).
// This function converts any hyphens in the extracted key to underscores.
bool extractPrefixedArgument(std::string prefix, const std::string& arg, std::string& res) {
if (arg.size() <= prefix.size() || arg.find(prefix) != 0 ||
(arg[prefix.size()] != '-' && arg[prefix.size()] != '_')) {
return false;
}
res = arg.substr(prefix.size() + 1);
std::transform(res.begin(), res.end(), res.begin(), [](int c) { return c == '-' ? '_' : c; });
return true;
}
bool validateTraceFormat(std::string_view format) {
return format == "xml" || format == "json";
}
bool processArg(TesterOptions& options, const CSimpleOpt& args) {
switch (args.OptionId()) {
case OPT_CONNFILE:
options.clusterFile = args.OptionArg();
break;
case OPT_TRACE:
options.trace = true;
break;
case OPT_TRACE_DIR:
options.traceDir = args.OptionArg();
break;
case OPT_LOGGROUP:
options.logGroup = args.OptionArg();
break;
case OPT_TRACE_FORMAT:
if (!validateTraceFormat(args.OptionArg())) {
fmt::print(stderr, "ERROR: Unrecognized trace format `{}'\n", args.OptionArg());
return false;
}
options.traceFormat = args.OptionArg();
break;
case OPT_KNOB: {
std::string knobName;
if (!extractPrefixedArgument("--knob", args.OptionSyntax(), knobName)) {
fmt::print(stderr, "ERROR: unable to parse knob option '{}'\n", args.OptionSyntax());
return false;
}
options.knobs.emplace_back(knobName, args.OptionArg());
break;
}
case OPT_EXTERNAL_CLIENT_LIBRARY:
options.externalClientLibrary = args.OptionArg();
break;
case OPT_TEST_FILE:
options.testFile = args.OptionArg();
options.testSpec = readTomlTestSpec(options.testFile);
break;
}
return true;
}
bool parseArgs(TesterOptions& options, int argc, char** argv) {
// declare our options parser, pass in the arguments from main
// as well as our array of valid options.
CSimpleOpt args(argc, argv, TesterOptionDefs);
// while there are arguments left to process
while (args.Next()) {
if (args.LastError() == SO_SUCCESS) {
if (args.OptionId() == OPT_HELP) {
printProgramUsage(argv[0]);
return false;
}
if (!processArg(options, args)) {
return false;
}
} else {
fmt::print(stderr, "ERROR: Invalid argument: {}\n", args.OptionText());
printProgramUsage(argv[0]);
return false;
}
}
return true;
}
void fdb_check(fdb_error_t e) {
if (e) {
fmt::print(stderr, "Unexpected FDB error: {}({})\n", e, fdb_get_error(e));
std::abort();
}
}
void applyNetworkOptions(TesterOptions& options) {
if (!options.externalClientLibrary.empty()) {
fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_DISABLE_LOCAL_CLIENT));
fdb_check(
FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_EXTERNAL_CLIENT_LIBRARY, options.externalClientLibrary));
}
if (options.testSpec.multiThreaded) {
fdb_check(
FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_CLIENT_THREADS_PER_VERSION, options.numFdbThreads));
}
if (options.testSpec.fdbCallbacksOnExternalThreads) {
fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_CALLBACKS_ON_EXTERNAL_THREADS));
}
if (options.testSpec.buggify) {
fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_CLIENT_BUGGIFY_ENABLE));
}
if (options.trace) {
fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_TRACE_ENABLE, options.traceDir));
fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_TRACE_FORMAT, options.traceFormat));
fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_TRACE_LOG_GROUP, options.logGroup));
}
for (auto knob : options.knobs) {
fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_KNOB,
fmt::format("{}={}", knob.first.c_str(), knob.second.c_str())));
}
}
void randomizeOptions(TesterOptions& options) {
Random& random = Random::get();
options.numFdbThreads = random.randomInt(options.testSpec.minFdbThreads, options.testSpec.maxFdbThreads);
options.numClientThreads = random.randomInt(options.testSpec.minClientThreads, options.testSpec.maxClientThreads);
options.numDatabases = random.randomInt(options.testSpec.minDatabases, options.testSpec.maxDatabases);
options.numClients = random.randomInt(options.testSpec.minClients, options.testSpec.maxClients);
}
bool runWorkloads(TesterOptions& options) {
TransactionExecutorOptions txExecOptions;
txExecOptions.blockOnFutures = options.testSpec.blockOnFutures;
txExecOptions.numDatabases = options.numDatabases;
txExecOptions.databasePerTransaction = options.testSpec.databasePerTransaction;
std::unique_ptr<IScheduler> scheduler = createScheduler(options.numClientThreads);
std::unique_ptr<ITransactionExecutor> txExecutor = createTransactionExecutor(txExecOptions);
scheduler->start();
txExecutor->init(scheduler.get(), options.clusterFile.c_str());
WorkloadManager workloadMgr(txExecutor.get(), scheduler.get());
for (const auto& workloadSpec : options.testSpec.workloads) {
for (int i = 0; i < options.numClients; i++) {
WorkloadConfig config;
config.name = workloadSpec.name;
config.options = workloadSpec.options;
config.clientId = i;
config.numClients = options.numClients;
std::shared_ptr<IWorkload> workload = IWorkloadFactory::create(workloadSpec.name, config);
if (!workload) {
throw TesterError(fmt::format("Unknown workload '{}'", workloadSpec.name));
}
workloadMgr.add(workload);
}
}
workloadMgr.run();
return !workloadMgr.failed();
}
} // namespace
} // namespace FdbApiTester
using namespace FdbApiTester;
int main(int argc, char** argv) {
int retCode = 0;
try {
TesterOptions options;
if (!parseArgs(options, argc, argv)) {
return 1;
}
randomizeOptions(options);
fdb_check(fdb_select_api_version(options.testSpec.apiVersion));
applyNetworkOptions(options);
fdb_check(fdb_setup_network());
std::thread network_thread{ &fdb_run_network };
if (!runWorkloads(options)) {
retCode = 1;
}
fdb_check(fdb_stop_network());
network_thread.join();
} catch (const std::runtime_error& err) {
fmt::print(stderr, "ERROR: {}\n", err.what());
retCode = 1;
}
return retCode;
}

View File

@ -0,0 +1,125 @@
#!/usr/bin/env python3
#
# run_c_api_tests.py
#
# This source file is part of the FoundationDB open source project
#
# Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import sys
import subprocess
import argparse
import os
from subprocess import Popen, TimeoutExpired
import logging
import signal
def get_logger():
return logging.getLogger('foundationdb.run_c_api_tests')
def initialize_logger_level(logging_level):
logger = get_logger()
assert logging_level in ['DEBUG', 'INFO', 'WARNING', 'ERROR']
logging.basicConfig(format='%(message)s')
if logging_level == 'DEBUG':
logger.setLevel(logging.DEBUG)
elif logging_level == 'INFO':
logger.setLevel(logging.INFO)
elif logging_level == 'WARNING':
logger.setLevel(logging.WARNING)
elif logging_level == 'ERROR':
logger.setLevel(logging.ERROR)
def run_tester(args, test_file):
cmd = [args.tester_binary, "--cluster-file",
args.cluster_file, "--test-file", test_file]
if args.external_client_library is not None:
cmd += ["--external-client-library", args.external_client_library]
get_logger().info('\nRunning tester \'%s\'...' % ' '.join(cmd))
proc = Popen(cmd, stdout=sys.stdout, stderr=sys.stderr)
timed_out = False
try:
ret_code = proc.wait(args.timeout)
except TimeoutExpired:
proc.kill()
timed_out = True
except Exception as e:
raise Exception('Unable to run tester (%s)' % e)
if ret_code != 0:
if ret_code < 0:
reason = signal.Signals(-ret_code).name
else:
reason = 'exit code: %d' % ret_code
if timed_out:
reason = 'timed out after %d seconds' % args.timeout
ret_code = 1
get_logger().error('\n\'%s\' did not complete succesfully (%s)' %
(cmd[0], reason))
get_logger().info('')
return ret_code
def run_tests(args):
num_failed = 0
test_files = [f for f in os.listdir(args.test_dir)
if os.path.isfile(os.path.join(args.test_dir, f)) and f.endswith(".toml")]
for test_file in test_files:
get_logger().info('=========================================================')
get_logger().info('Running test %s' % test_file)
get_logger().info('=========================================================')
ret_code = run_tester(args, os.path.join(args.test_dir, test_file))
if ret_code != 0:
num_failed += 1
return num_failed
def parse_args(argv):
parser = argparse.ArgumentParser(description='FoundationDB C API Tester')
parser.add_argument('--cluster-file', type=str, default="fdb.cluster",
help='The cluster file for the cluster being connected to. (default: fdb.cluster)')
parser.add_argument('--tester-binary', type=str, default="fdb_c_api_tester",
help='Path to the fdb_c_api_tester executable. (default: fdb_c_api_tester)')
parser.add_argument('--external-client-library', type=str, default=None,
help='Path to the external client library. (default: None)')
parser.add_argument('--test-dir', type=str, default="./",
help='Path to a directory with test definitions. (default: ./)')
parser.add_argument('--timeout', type=int, default=300,
help='The timeout in seconds for running each individual test. (default 300)')
parser.add_argument('--logging-level', type=str, default='INFO',
choices=['ERROR', 'WARNING', 'INFO', 'DEBUG'], help='Specifies the level of detail in the tester output (default=\'INFO\').')
return parser.parse_args(argv)
def main(argv):
args = parse_args(argv)
initialize_logger_level(args.logging_level)
return run_tests(args)
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))

View File

@ -0,0 +1,24 @@
[[test]]
title = 'Cancel Transaction with Blocking Waits'
multiThreaded = true
buggify = true
blockOnFutures = true
minFdbThreads = 2
maxFdbThreads = 8
minDatabases = 2
maxDatabases = 8
minClientThreads = 2
maxClientThreads = 8
minClients = 2
maxClients = 8
[[test.workload]]
name = 'CancelTransaction'
minKeyLength = 1
maxKeyLength = 64
minValueLength = 1
maxValueLength = 1000
maxKeysPerTransaction = 50
initialSize = 100
numRandomOperations = 100
readExistingKeysRatio = 0.9

View File

@ -0,0 +1,23 @@
[[test]]
title = 'Cancel Transactions with Future Callbacks'
multiThreaded = true
buggify = true
minFdbThreads = 2
maxFdbThreads = 8
minDatabases = 2
maxDatabases = 8
minClientThreads = 2
maxClientThreads = 8
minClients = 2
maxClients = 8
[[test.workload]]
name = 'CancelTransaction'
minKeyLength = 1
maxKeyLength = 64
minValueLength = 1
maxValueLength = 1000
maxKeysPerTransaction = 50
initialSize = 100
numRandomOperations = 100
readExistingKeysRatio = 0.9

View File

@ -0,0 +1,24 @@
[[test]]
title = 'Cancel Transaction with Database per Transaction'
multiThreaded = true
buggify = true
databasePerTransaction = true
minFdbThreads = 2
maxFdbThreads = 8
minDatabases = 2
maxDatabases = 8
minClientThreads = 2
maxClientThreads = 8
minClients = 2
maxClients = 8
[[test.workload]]
name = 'CancelTransaction'
minKeyLength = 1
maxKeyLength = 64
minValueLength = 1
maxValueLength = 1000
maxKeysPerTransaction = 50
initialSize = 100
numRandomOperations = 100
readExistingKeysRatio = 0.9

View File

@ -0,0 +1,25 @@
[[test]]
title = 'API Correctness Blocking'
multiThreaded = true
buggify = true
blockOnFutures = true
minFdbThreads = 2
maxFdbThreads = 8
minDatabases = 2
maxDatabases = 8
minClientThreads = 2
maxClientThreads = 8
minClients = 2
maxClients = 8
[[test.workload]]
name = 'ApiCorrectness'
minKeyLength = 1
maxKeyLength = 64
minValueLength = 1
maxValueLength = 1000
maxKeysPerTransaction = 50
initialSize = 100
numRandomOperations = 100
readExistingKeysRatio = 0.9

View File

@ -0,0 +1,24 @@
[[test]]
title = 'API Correctness Callbacks On External Threads'
multiThreaded = true
fdbCallbacksOnExternalThreads = true
buggify = true
minFdbThreads = 2
maxFdbThreads = 8
minDatabases = 2
maxDatabases = 8
minClientThreads = 2
maxClientThreads = 8
minClients = 2
maxClients = 8
[[test.workload]]
name = 'ApiCorrectness'
minKeyLength = 1
maxKeyLength = 64
minValueLength = 1
maxValueLength = 1000
maxKeysPerTransaction = 50
initialSize = 100
numRandomOperations = 100
readExistingKeysRatio = 0.9

View File

@ -0,0 +1,24 @@
[[test]]
title = 'API Correctness Database Per Transaction'
multiThreaded = true
buggify = true
databasePerTransaction = true
minFdbThreads = 2
maxFdbThreads = 8
minDatabases = 2
maxDatabases = 8
minClientThreads = 2
maxClientThreads = 8
minClients = 2
maxClients = 8
[[test.workload]]
name = 'ApiCorrectness'
minKeyLength = 1
maxKeyLength = 64
minValueLength = 1
maxValueLength = 1000
maxKeysPerTransaction = 50
initialSize = 100
numRandomOperations = 100
readExistingKeysRatio = 0.9

View File

@ -0,0 +1,23 @@
[[test]]
title = 'API Correctness Multi Threaded'
multiThreaded = true
buggify = true
minFdbThreads = 2
maxFdbThreads = 8
minDatabases = 2
maxDatabases = 8
minClientThreads = 2
maxClientThreads = 8
minClients = 2
maxClients = 8
[[test.workload]]
name = 'ApiCorrectness'
minKeyLength = 1
maxKeyLength = 64
minValueLength = 1
maxValueLength = 1000
maxKeysPerTransaction = 50
initialSize = 100
numRandomOperations = 100
readExistingKeysRatio = 0.9

View File

@ -0,0 +1,16 @@
[[test]]
title = 'API Correctness Single Threaded'
minClients = 1
maxClients = 3
multiThreaded = false
[[test.workload]]
name = 'ApiCorrectness'
minKeyLength = 1
maxKeyLength = 64
minValueLength = 1
maxValueLength = 1000
maxKeysPerTransaction = 50
initialSize = 100
numRandomOperations = 100
readExistingKeysRatio = 0.9

View File

@ -149,7 +149,7 @@ Format
------
| One operation type is defined as ``<Type><Count>`` or ``<Type><Count>:<Range>``.
| When Count is omitted, it's equivalent to setting it to 1. (e.g. ``g`` is equivalent to ``g1``)
| Multiple operation types within the same trancaction can be concatenated. (e.g. ``g9u1`` = 9 GETs and 1 update)
| Multiple operation types within the same transaction can be concatenated. (e.g. ``g9u1`` = 9 GETs and 1 update)
Transaction Specification Examples
----------------------------------

View File

@ -90,6 +90,14 @@ void Future::cancel() {
return fdb_future_get_keyvalue_array(future_, out_kv, out_count, out_more);
}
// MappedKeyValueArrayFuture
[[nodiscard]] fdb_error_t MappedKeyValueArrayFuture::get(const FDBMappedKeyValue** out_kv,
int* out_count,
fdb_bool_t* out_more) {
return fdb_future_get_mappedkeyvalue_array(future_, out_kv, out_count, out_more);
}
// Result
Result::~Result() {
@ -122,8 +130,15 @@ EmptyFuture Database::create_snapshot(FDBDatabase* db,
return EmptyFuture(fdb_database_create_snapshot(db, uid, uid_length, snap_command, snap_command_length));
}
// Transaction
// Tenant
Tenant::Tenant(FDBDatabase* db, const uint8_t* name, int name_length) {
if (fdb_error_t err = fdb_database_open_tenant(db, name, name_length, &tenant)) {
std::cerr << fdb_get_error(err) << std::endl;
std::abort();
}
}
// Transaction
Transaction::Transaction(FDBDatabase* db) {
if (fdb_error_t err = fdb_database_create_transaction(db, &tr_)) {
std::cerr << fdb_get_error(err) << std::endl;
@ -131,6 +146,13 @@ Transaction::Transaction(FDBDatabase* db) {
}
}
Transaction::Transaction(Tenant tenant) {
if (fdb_error_t err = fdb_tenant_create_transaction(tenant.tenant, &tr_)) {
std::cerr << fdb_get_error(err) << std::endl;
std::abort();
}
}
Transaction::~Transaction() {
fdb_transaction_destroy(tr_);
}
@ -210,7 +232,7 @@ KeyValueArrayFuture Transaction::get_range(const uint8_t* begin_key_name,
reverse));
}
KeyValueArrayFuture Transaction::get_range_and_flat_map(const uint8_t* begin_key_name,
MappedKeyValueArrayFuture Transaction::get_mapped_range(const uint8_t* begin_key_name,
int begin_key_name_length,
fdb_bool_t begin_or_equal,
int begin_offset,
@ -226,7 +248,7 @@ KeyValueArrayFuture Transaction::get_range_and_flat_map(const uint8_t* begin_key
int iteration,
fdb_bool_t snapshot,
fdb_bool_t reverse) {
return KeyValueArrayFuture(fdb_transaction_get_range_and_flat_map(tr_,
return MappedKeyValueArrayFuture(fdb_transaction_get_mapped_range(tr_,
begin_key_name,
begin_key_name_length,
begin_or_equal,

View File

@ -135,6 +135,18 @@ private:
KeyValueArrayFuture(FDBFuture* f) : Future(f) {}
};
class MappedKeyValueArrayFuture : public Future {
public:
// Call this function instead of fdb_future_get_mappedkeyvalue_array when using
// the MappedKeyValueArrayFuture type. Its behavior is identical to
// fdb_future_get_mappedkeyvalue_array.
fdb_error_t get(const FDBMappedKeyValue** out_kv, int* out_count, fdb_bool_t* out_more);
private:
friend class Transaction;
MappedKeyValueArrayFuture(FDBFuture* f) : Future(f) {}
};
class KeyRangeArrayFuture : public Future {
public:
// Call this function instead of fdb_future_get_keyrange_array when using
@ -191,6 +203,15 @@ public:
int snap_command_length);
};
class Tenant final {
public:
Tenant(FDBDatabase* db, const uint8_t* name, int name_length);
private:
friend class Transaction;
FDBTenant* tenant;
};
// Wrapper around FDBTransaction, providing the same set of calls as the C API.
// Handles cleanup of memory, removing the need to call
// fdb_transaction_destroy.
@ -198,6 +219,7 @@ class Transaction final {
public:
// Given an FDBDatabase, initializes a new transaction.
Transaction(FDBDatabase* db);
Transaction(Tenant tenant);
~Transaction();
// Wrapper around fdb_transaction_reset.
@ -254,7 +276,7 @@ public:
// WARNING: This feature is considered experimental at this time. It is only allowed when using snapshot isolation
// AND disabling read-your-writes. Returns a future which will be set to an FDBKeyValue array.
KeyValueArrayFuture get_range_and_flat_map(const uint8_t* begin_key_name,
MappedKeyValueArrayFuture get_mapped_range(const uint8_t* begin_key_name,
int begin_key_name_length,
fdb_bool_t begin_or_equal,
int begin_offset,

View File

@ -173,6 +173,20 @@ struct GetRangeResult {
fdb_error_t err;
};
struct GetMappedRangeResult {
std::vector<std::tuple<std::string, // key
std::string, // value
std::string, // begin
std::string, // end
std::vector<std::pair<std::string, std::string>> // range results
>>
mkvs;
// True if values remain in the key range requested.
bool more;
// Set to a non-zero value if an error occurred during the transaction.
fdb_error_t err;
};
// Helper function to get a range of kv pairs. Returns a GetRangeResult struct
// containing the results of the range read. Caller is responsible for checking
// error on failure and retrying if necessary.
@ -225,7 +239,11 @@ GetRangeResult get_range(fdb::Transaction& tr,
return GetRangeResult{ results, out_more != 0, 0 };
}
GetRangeResult get_range_and_flat_map(fdb::Transaction& tr,
static inline std::string extractString(FDBKey key) {
return std::string((const char*)key.key, key.key_length);
}
GetMappedRangeResult get_mapped_range(fdb::Transaction& tr,
const uint8_t* begin_key_name,
int begin_key_name_length,
fdb_bool_t begin_or_equal,
@ -242,7 +260,7 @@ GetRangeResult get_range_and_flat_map(fdb::Transaction& tr,
int iteration,
fdb_bool_t snapshot,
fdb_bool_t reverse) {
fdb::KeyValueArrayFuture f1 = tr.get_range_and_flat_map(begin_key_name,
fdb::MappedKeyValueArrayFuture f1 = tr.get_mapped_range(begin_key_name,
begin_key_name_length,
begin_or_equal,
begin_offset,
@ -261,21 +279,41 @@ GetRangeResult get_range_and_flat_map(fdb::Transaction& tr,
fdb_error_t err = wait_future(f1);
if (err) {
return GetRangeResult{ {}, false, err };
return GetMappedRangeResult{ {}, false, err };
}
const FDBKeyValue* out_kv;
const FDBMappedKeyValue* out_mkv;
int out_count;
fdb_bool_t out_more;
fdb_check(f1.get(&out_kv, &out_count, &out_more));
std::vector<std::pair<std::string, std::string>> results;
fdb_check(f1.get(&out_mkv, &out_count, &out_more));
GetMappedRangeResult result;
result.more = (out_more != 0);
result.err = 0;
// std::cout << "out_count:" << out_count << " out_more:" << out_more << " out_mkv:" << (void*)out_mkv <<
// std::endl;
for (int i = 0; i < out_count; ++i) {
std::string key((const char*)out_kv[i].key, out_kv[i].key_length);
std::string value((const char*)out_kv[i].value, out_kv[i].value_length);
results.emplace_back(key, value);
FDBMappedKeyValue mkv = out_mkv[i];
auto key = extractString(mkv.key);
auto value = extractString(mkv.value);
auto begin = extractString(mkv.getRange.begin.key);
auto end = extractString(mkv.getRange.end.key);
// std::cout << "key:" << key << " value:" << value << " begin:" << begin << " end:" << end << std::endl;
std::vector<std::pair<std::string, std::string>> range_results;
for (int i = 0; i < mkv.getRange.m_size; ++i) {
const auto& kv = mkv.getRange.data[i];
std::string k((const char*)kv.key, kv.key_length);
std::string v((const char*)kv.value, kv.value_length);
range_results.emplace_back(k, v);
// std::cout << "[" << i << "]" << k << " -> " << v << std::endl;
}
result.mkvs.emplace_back(key, value, begin, end, range_results);
}
return GetRangeResult{ results, out_more != 0, 0 };
return result;
}
// Clears all data in the database.
@ -888,32 +926,35 @@ static Value dataOfRecord(const int i) {
static std::string indexEntryKey(const int i) {
return Tuple().append(StringRef(prefix)).append(INDEX).append(indexKey(i)).append(primaryKey(i)).pack().toString();
}
static std::string recordKey(const int i) {
return Tuple().append(prefix).append(RECORD).append(primaryKey(i)).pack().toString();
static std::string recordKey(const int i, const int split) {
return Tuple().append(prefix).append(RECORD).append(primaryKey(i)).append(split).pack().toString();
}
static std::string recordValue(const int i) {
return Tuple().append(dataOfRecord(i)).pack().toString();
static std::string recordValue(const int i, const int split) {
return Tuple().append(dataOfRecord(i)).append(split).pack().toString();
}
const static int SPLIT_SIZE = 3;
std::map<std::string, std::string> fillInRecords(int n) {
// Note: The user requested `prefix` should be added as the first element of the tuple that forms the key, rather
// than the prefix of the key. So we don't use key() or create_data() in this test.
std::map<std::string, std::string> data;
for (int i = 0; i < n; i++) {
data[indexEntryKey(i)] = EMPTY;
data[recordKey(i)] = recordValue(i);
for (int split = 0; split < SPLIT_SIZE; split++) {
data[recordKey(i, split)] = recordValue(i, split);
}
}
insert_data(db, data);
return data;
}
GetRangeResult getIndexEntriesAndMap(int beginId, int endId, fdb::Transaction& tr) {
GetMappedRangeResult getMappedIndexEntries(int beginId, int endId, fdb::Transaction& tr) {
std::string indexEntryKeyBegin = indexEntryKey(beginId);
std::string indexEntryKeyEnd = indexEntryKey(endId);
std::string mapper = Tuple().append(prefix).append(RECORD).append("{K[3]}"_sr).pack().toString();
std::string mapper = Tuple().append(prefix).append(RECORD).append("{K[3]}"_sr).append("{...}"_sr).pack().toString();
return get_range_and_flat_map(
return get_mapped_range(
tr,
FDB_KEYSEL_FIRST_GREATER_OR_EQUAL((const uint8_t*)indexEntryKeyBegin.c_str(), indexEntryKeyBegin.size()),
FDB_KEYSEL_FIRST_GREATER_OR_EQUAL((const uint8_t*)indexEntryKeyEnd.c_str(), indexEntryKeyEnd.size()),
@ -923,20 +964,20 @@ GetRangeResult getIndexEntriesAndMap(int beginId, int endId, fdb::Transaction& t
/* target_bytes */ 0,
/* FDBStreamingMode */ FDB_STREAMING_MODE_WANT_ALL,
/* iteration */ 0,
/* snapshot */ true,
/* snapshot */ false,
/* reverse */ 0);
}
TEST_CASE("fdb_transaction_get_range_and_flat_map") {
fillInRecords(20);
TEST_CASE("fdb_transaction_get_mapped_range") {
const int TOTAL_RECORDS = 20;
fillInRecords(TOTAL_RECORDS);
fdb::Transaction tr(db);
// get_range_and_flat_map is only support without RYW. This is a must!!!
fdb_check(tr.set_option(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE, nullptr, 0));
// RYW should be enabled.
while (1) {
int beginId = 1;
int endId = 19;
auto result = getIndexEntriesAndMap(beginId, endId, tr);
auto result = getMappedIndexEntries(beginId, endId, tr);
if (result.err) {
fdb::EmptyFuture f1 = tr.on_error(result.err);
@ -945,32 +986,30 @@ TEST_CASE("fdb_transaction_get_range_and_flat_map") {
}
int expectSize = endId - beginId;
CHECK(result.kvs.size() == expectSize);
CHECK(result.mkvs.size() == expectSize);
CHECK(!result.more);
int id = beginId;
for (int i = 0; i < result.kvs.size(); i++, id++) {
const auto& [key, value] = result.kvs[i];
CHECK(recordKey(id).compare(key) == 0);
CHECK(recordValue(id).compare(value) == 0);
for (int i = 0; i < expectSize; i++, id++) {
const auto& [key, value, begin, end, range_results] = result.mkvs[i];
CHECK(indexEntryKey(id).compare(key) == 0);
CHECK(EMPTY.compare(value) == 0);
CHECK(range_results.size() == SPLIT_SIZE);
for (int split = 0; split < SPLIT_SIZE; split++) {
auto& [k, v] = range_results[split];
CHECK(recordKey(id, split).compare(k) == 0);
CHECK(recordValue(id, split).compare(v) == 0);
}
}
break;
}
}
TEST_CASE("fdb_transaction_get_range_and_flat_map get_key_values_and_map_has_more") {
fillInRecords(2000);
fdb::Transaction tr(db);
fdb_check(tr.set_option(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE, nullptr, 0));
auto result = getIndexEntriesAndMap(100, 1900, tr);
CHECK(result.err == error_code_get_key_values_and_map_has_more);
}
TEST_CASE("fdb_transaction_get_range_and_flat_map_restricted_to_snapshot") {
TEST_CASE("fdb_transaction_get_mapped_range_restricted_to_serializable") {
std::string mapper = Tuple().append(prefix).append(RECORD).append("{K[3]}"_sr).pack().toString();
fdb::Transaction tr(db);
fdb_check(tr.set_option(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE, nullptr, 0));
auto result = get_range_and_flat_map(
auto result = get_mapped_range(
tr,
FDB_KEYSEL_FIRST_GREATER_OR_EQUAL((const uint8_t*)indexEntryKey(0).c_str(), indexEntryKey(0).size()),
FDB_KEYSEL_FIRST_GREATER_THAN((const uint8_t*)indexEntryKey(1).c_str(), indexEntryKey(1).size()),
@ -980,16 +1019,16 @@ TEST_CASE("fdb_transaction_get_range_and_flat_map_restricted_to_snapshot") {
/* target_bytes */ 0,
/* FDBStreamingMode */ FDB_STREAMING_MODE_WANT_ALL,
/* iteration */ 0,
/* snapshot */ false, // Set snapshot to false
/* snapshot */ true, // Set snapshot to true
/* reverse */ 0);
ASSERT(result.err == error_code_client_invalid_operation);
ASSERT(result.err == error_code_unsupported_operation);
}
TEST_CASE("fdb_transaction_get_range_and_flat_map_restricted_to_ryw_disable") {
TEST_CASE("fdb_transaction_get_mapped_range_restricted_to_ryw_enable") {
std::string mapper = Tuple().append(prefix).append(RECORD).append("{K[3]}"_sr).pack().toString();
fdb::Transaction tr(db);
// Not set FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE.
auto result = get_range_and_flat_map(
fdb_check(tr.set_option(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE, nullptr, 0)); // Not disable RYW
auto result = get_mapped_range(
tr,
FDB_KEYSEL_FIRST_GREATER_OR_EQUAL((const uint8_t*)indexEntryKey(0).c_str(), indexEntryKey(0).size()),
FDB_KEYSEL_FIRST_GREATER_THAN((const uint8_t*)indexEntryKey(1).c_str(), indexEntryKey(1).size()),
@ -1001,7 +1040,7 @@ TEST_CASE("fdb_transaction_get_range_and_flat_map_restricted_to_ryw_disable") {
/* iteration */ 0,
/* snapshot */ true,
/* reverse */ 0);
ASSERT(result.err == error_code_client_invalid_operation);
ASSERT(result.err == error_code_unsupported_operation);
}
TEST_CASE("fdb_transaction_get_range reverse") {
@ -2371,6 +2410,101 @@ TEST_CASE("Fast alloc thread cleanup") {
}
}
TEST_CASE("Tenant create, access, and delete") {
std::string tenantName = "tenant";
std::string testKey = "foo";
std::string testValue = "bar";
fdb::Transaction tr(db);
while (1) {
fdb_check(tr.set_option(FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES, nullptr, 0));
tr.set("\xff\xff/management/tenant_map/" + tenantName, "");
fdb::EmptyFuture commitFuture = tr.commit();
fdb_error_t err = wait_future(commitFuture);
if (err) {
fdb::EmptyFuture f = tr.on_error(err);
fdb_check(wait_future(f));
continue;
}
tr.reset();
break;
}
fdb::Tenant tenant(db, reinterpret_cast<const uint8_t*>(tenantName.c_str()), tenantName.size());
fdb::Transaction tr2(tenant);
while (1) {
tr2.set(testKey, testValue);
fdb::EmptyFuture commitFuture = tr2.commit();
fdb_error_t err = wait_future(commitFuture);
if (err) {
fdb::EmptyFuture f = tr2.on_error(err);
fdb_check(wait_future(f));
continue;
}
tr2.reset();
break;
}
while (1) {
fdb::ValueFuture f1 = tr2.get(testKey, false);
fdb_error_t err = wait_future(f1);
if (err) {
fdb::EmptyFuture f2 = tr.on_error(err);
fdb_check(wait_future(f2));
continue;
}
int out_present;
char* val;
int vallen;
fdb_check(f1.get(&out_present, (const uint8_t**)&val, &vallen));
CHECK(out_present == 1);
CHECK(vallen == testValue.size());
CHECK(testValue == val);
tr2.clear(testKey);
fdb::EmptyFuture commitFuture = tr2.commit();
err = wait_future(commitFuture);
if (err) {
fdb::EmptyFuture f = tr2.on_error(err);
fdb_check(wait_future(f));
continue;
}
tr2.reset();
break;
}
while (1) {
fdb_check(tr.set_option(FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES, nullptr, 0));
tr.clear("\xff\xff/management/tenant_map/" + tenantName);
fdb::EmptyFuture commitFuture = tr.commit();
fdb_error_t err = wait_future(commitFuture);
if (err) {
fdb::EmptyFuture f = tr.on_error(err);
fdb_check(wait_future(f));
continue;
}
tr.reset();
break;
}
while (1) {
fdb::ValueFuture f1 = tr2.get(testKey, false);
fdb_error_t err = wait_future(f1);
if (err == error_code_tenant_not_found) {
tr2.reset();
break;
}
if (err) {
fdb::EmptyFuture f2 = tr.on_error(err);
fdb_check(wait_future(f2));
continue;
}
}
}
int main(int argc, char** argv) {
if (argc < 3) {
std::cout << "Unit tests for the FoundationDB C API.\n"

View File

@ -1,5 +1,5 @@
/*
* workloads.h
* SimpleWorkload.cpp
*
* This source file is part of the FoundationDB open source project
*

View File

@ -1,5 +1,5 @@
/*
* workloads.h
* workloads.cpp
*
* This source file is part of the FoundationDB open source project
*

View File

@ -161,7 +161,7 @@ struct RangeResultRef : VectorRef<KeyValueRef> {
// False implies that no such values remain
Optional<KeyRef> readThrough; // Only present when 'more' is true. When present, this value represent the end (or
// beginning if reverse) of the range
// which was read to produce these results. This is guarenteed to be less than the requested range.
// which was read to produce these results. This is guaranteed to be less than the requested range.
bool readToBegin;
bool readThroughEnd;

View File

@ -448,16 +448,21 @@ func (o TransactionOptions) SetInitializeNewDatabase() error {
return o.setOpt(300, nil)
}
// Allows this transaction to read and modify system keys (those that start with the byte 0xFF)
// Allows this transaction to read and modify system keys (those that start with the byte 0xFF). Implies raw_access.
func (o TransactionOptions) SetAccessSystemKeys() error {
return o.setOpt(301, nil)
}
// Allows this transaction to read system keys (those that start with the byte 0xFF)
// Allows this transaction to read system keys (those that start with the byte 0xFF). Implies raw_access.
func (o TransactionOptions) SetReadSystemKeys() error {
return o.setOpt(302, nil)
}
// Allows this transaction to access the raw key-space when tenant mode is on.
func (o TransactionOptions) SetRawAccess() error {
return o.setOpt(303, nil)
}
// Not yet implemented.
func (o TransactionOptions) SetDebugRetryLogging(param string) error {
return o.setOpt(401, []byte(param))

View File

@ -27,6 +27,8 @@ set(JAVA_BINDING_SRCS
src/main/com/apple/foundationdb/directory/package-info.java
src/main/com/apple/foundationdb/directory/PathUtil.java
src/main/com/apple/foundationdb/DirectBufferIterator.java
src/main/com/apple/foundationdb/RangeResultDirectBufferIterator.java
src/main/com/apple/foundationdb/MappedRangeResultDirectBufferIterator.java
src/main/com/apple/foundationdb/DirectBufferPool.java
src/main/com/apple/foundationdb/FDB.java
src/main/com/apple/foundationdb/FDBDatabase.java
@ -36,11 +38,13 @@ set(JAVA_BINDING_SRCS
src/main/com/apple/foundationdb/FutureKeyArray.java
src/main/com/apple/foundationdb/FutureResult.java
src/main/com/apple/foundationdb/FutureResults.java
src/main/com/apple/foundationdb/FutureMappedResults.java
src/main/com/apple/foundationdb/FutureStrings.java
src/main/com/apple/foundationdb/FutureVoid.java
src/main/com/apple/foundationdb/JNIUtil.java
src/main/com/apple/foundationdb/KeySelector.java
src/main/com/apple/foundationdb/KeyValue.java
src/main/com/apple/foundationdb/MappedKeyValue.java
src/main/com/apple/foundationdb/LocalityUtil.java
src/main/com/apple/foundationdb/NativeFuture.java
src/main/com/apple/foundationdb/NativeObjectWrapper.java
@ -49,9 +53,12 @@ set(JAVA_BINDING_SRCS
src/main/com/apple/foundationdb/package-info.java
src/main/com/apple/foundationdb/Range.java
src/main/com/apple/foundationdb/RangeQuery.java
src/main/com/apple/foundationdb/MappedRangeQuery.java
src/main/com/apple/foundationdb/KeyArrayResult.java
src/main/com/apple/foundationdb/RangeResult.java
src/main/com/apple/foundationdb/MappedRangeResult.java
src/main/com/apple/foundationdb/RangeResultInfo.java
src/main/com/apple/foundationdb/MappedRangeResultInfo.java
src/main/com/apple/foundationdb/RangeResultSummary.java
src/main/com/apple/foundationdb/ReadTransaction.java
src/main/com/apple/foundationdb/ReadTransactionContext.java

View File

@ -20,6 +20,7 @@
#include <jni.h>
#include <string.h>
#include <functional>
#include "com_apple_foundationdb_FDB.h"
#include "com_apple_foundationdb_FDBDatabase.h"
@ -50,10 +51,14 @@ static thread_local jmethodID g_IFutureCallback_call_methodID = JNI_NULL;
static thread_local bool is_external = false;
static jclass range_result_summary_class;
static jclass range_result_class;
static jclass mapped_range_result_class;
static jclass mapped_key_value_class;
static jclass string_class;
static jclass key_array_result_class;
static jmethodID key_array_result_init;
static jmethodID range_result_init;
static jmethodID mapped_range_result_init;
static jmethodID mapped_key_value_from_bytes;
static jmethodID range_result_summary_init;
void detachIfExternalThread(void* ignore) {
@ -478,6 +483,127 @@ JNIEXPORT jobject JNICALL Java_com_apple_foundationdb_FutureResults_FutureResult
return result;
}
class ExecuteOnLeave {
std::function<void()> func;
public:
explicit ExecuteOnLeave(std::function<void()> func) : func(func) {}
~ExecuteOnLeave() { func(); }
};
void cpBytesAndLengthInner(uint8_t*& pByte, jint*& pLength, const uint8_t* data, const int& length) {
*pLength = length;
pLength++;
memcpy(pByte, data, length);
pByte += length;
}
void cpBytesAndLength(uint8_t*& pByte, jint*& pLength, const FDBKey& key) {
cpBytesAndLengthInner(pByte, pLength, key.key, key.key_length);
}
JNIEXPORT jobject JNICALL Java_com_apple_foundationdb_FutureMappedResults_FutureMappedResults_1get(JNIEnv* jenv,
jobject,
jlong future) {
if (!future) {
throwParamNotNull(jenv);
return JNI_NULL;
}
FDBFuture* f = (FDBFuture*)future;
const FDBMappedKeyValue* kvms;
int count;
fdb_bool_t more;
fdb_error_t err = fdb_future_get_mappedkeyvalue_array(f, &kvms, &count, &more);
if (err) {
safeThrow(jenv, getThrowable(jenv, err));
return JNI_NULL;
}
jobjectArray mrr_values = jenv->NewObjectArray(count, mapped_key_value_class, NULL);
if (!mrr_values) {
if (!jenv->ExceptionOccurred())
throwOutOfMem(jenv);
return JNI_NULL;
}
for (int i = 0; i < count; i++) {
FDBMappedKeyValue kvm = kvms[i];
int kvm_count = kvm.getRange.m_size;
const int totalLengths = 4 + kvm_count * 2;
int totalBytes = kvm.key.key_length + kvm.value.key_length + kvm.getRange.begin.key.key_length +
kvm.getRange.end.key.key_length;
for (int i = 0; i < kvm_count; i++) {
auto kv = kvm.getRange.data[i];
totalBytes += kv.key_length + kv.value_length;
}
jbyteArray bytesArray = jenv->NewByteArray(totalBytes);
if (!bytesArray) {
if (!jenv->ExceptionOccurred())
throwOutOfMem(jenv);
return JNI_NULL;
}
jintArray lengthArray = jenv->NewIntArray(totalLengths);
if (!lengthArray) {
if (!jenv->ExceptionOccurred())
throwOutOfMem(jenv);
return JNI_NULL;
}
uint8_t* bytes_barr = (uint8_t*)jenv->GetByteArrayElements(bytesArray, JNI_NULL);
if (!bytes_barr) {
throwRuntimeEx(jenv, "Error getting handle to native resources");
return JNI_NULL;
}
{
ExecuteOnLeave e([&]() { jenv->ReleaseByteArrayElements(bytesArray, (jbyte*)bytes_barr, 0); });
jint* length_barr = jenv->GetIntArrayElements(lengthArray, JNI_NULL);
if (!length_barr) {
if (!jenv->ExceptionOccurred())
throwOutOfMem(jenv);
return JNI_NULL;
}
{
ExecuteOnLeave e([&]() { jenv->ReleaseIntArrayElements(lengthArray, length_barr, 0); });
uint8_t* pByte = bytes_barr;
jint* pLength = length_barr;
cpBytesAndLength(pByte, pLength, kvm.key);
cpBytesAndLength(pByte, pLength, kvm.value);
cpBytesAndLength(pByte, pLength, kvm.getRange.begin.key);
cpBytesAndLength(pByte, pLength, kvm.getRange.end.key);
for (int kvm_i = 0; kvm_i < kvm_count; kvm_i++) {
auto kv = kvm.getRange.data[kvm_i];
cpBytesAndLengthInner(pByte, pLength, kv.key, kv.key_length);
cpBytesAndLengthInner(pByte, pLength, kv.value, kv.value_length);
}
}
}
// After native arrays are released
jobject mkv = jenv->CallStaticObjectMethod(
mapped_key_value_class, mapped_key_value_from_bytes, (jbyteArray)bytesArray, (jintArray)lengthArray);
if (jenv->ExceptionOccurred())
return JNI_NULL;
jenv->SetObjectArrayElement(mrr_values, i, mkv);
if (jenv->ExceptionOccurred())
return JNI_NULL;
}
jobject mrr = jenv->NewObject(mapped_range_result_class, mapped_range_result_init, mrr_values, (jboolean)more);
if (jenv->ExceptionOccurred())
return JNI_NULL;
return mrr;
}
// SOMEDAY: explore doing this more efficiently with Direct ByteBuffers
JNIEXPORT jbyteArray JNICALL Java_com_apple_foundationdb_FutureResult_FutureResult_1get(JNIEnv* jenv,
jobject,
@ -767,23 +893,22 @@ JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1
return (jlong)f;
}
JNIEXPORT jlong JNICALL
Java_com_apple_foundationdb_FDBTransaction_Transaction_1getRangeAndFlatMap(JNIEnv* jenv,
jobject,
jlong tPtr,
jbyteArray keyBeginBytes,
jboolean orEqualBegin,
jint offsetBegin,
jbyteArray keyEndBytes,
jboolean orEqualEnd,
jint offsetEnd,
jbyteArray mapperBytes,
jint rowLimit,
jint targetBytes,
jint streamingMode,
jint iteration,
jboolean snapshot,
jboolean reverse) {
JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1getMappedRange(JNIEnv* jenv,
jobject,
jlong tPtr,
jbyteArray keyBeginBytes,
jboolean orEqualBegin,
jint offsetBegin,
jbyteArray keyEndBytes,
jboolean orEqualEnd,
jint offsetEnd,
jbyteArray mapperBytes,
jint rowLimit,
jint targetBytes,
jint streamingMode,
jint iteration,
jboolean snapshot,
jboolean reverse) {
if (!tPtr || !keyBeginBytes || !keyEndBytes || !mapperBytes) {
throwParamNotNull(jenv);
return 0;
@ -814,23 +939,23 @@ Java_com_apple_foundationdb_FDBTransaction_Transaction_1getRangeAndFlatMap(JNIEn
return 0;
}
FDBFuture* f = fdb_transaction_get_range_and_flat_map(tr,
barrBegin,
jenv->GetArrayLength(keyBeginBytes),
orEqualBegin,
offsetBegin,
barrEnd,
jenv->GetArrayLength(keyEndBytes),
orEqualEnd,
offsetEnd,
barrMapper,
jenv->GetArrayLength(mapperBytes),
rowLimit,
targetBytes,
(FDBStreamingMode)streamingMode,
iteration,
snapshot,
reverse);
FDBFuture* f = fdb_transaction_get_mapped_range(tr,
barrBegin,
jenv->GetArrayLength(keyBeginBytes),
orEqualBegin,
offsetBegin,
barrEnd,
jenv->GetArrayLength(keyEndBytes),
orEqualEnd,
offsetEnd,
barrMapper,
jenv->GetArrayLength(mapperBytes),
rowLimit,
targetBytes,
(FDBStreamingMode)streamingMode,
iteration,
snapshot,
reverse);
jenv->ReleaseByteArrayElements(keyBeginBytes, (jbyte*)barrBegin, JNI_ABORT);
jenv->ReleaseByteArrayElements(keyEndBytes, (jbyte*)barrEnd, JNI_ABORT);
jenv->ReleaseByteArrayElements(mapperBytes, (jbyte*)barrMapper, JNI_ABORT);
@ -842,7 +967,6 @@ JNIEXPORT void JNICALL Java_com_apple_foundationdb_FutureResults_FutureResults_1
jlong future,
jobject jbuffer,
jint bufferCapacity) {
if (!future) {
throwParamNotNull(jenv);
return;
@ -902,6 +1026,92 @@ JNIEXPORT void JNICALL Java_com_apple_foundationdb_FutureResults_FutureResults_1
}
}
void memcpyStringInner(uint8_t* buffer, int& offset, const uint8_t* data, const int& length) {
memcpy(buffer + offset, &length, sizeof(jint));
offset += sizeof(jint);
memcpy(buffer + offset, data, length);
offset += length;
}
void memcpyString(uint8_t* buffer, int& offset, const FDBKey& key) {
memcpyStringInner(buffer, offset, key.key, key.key_length);
}
JNIEXPORT void JNICALL
Java_com_apple_foundationdb_FutureMappedResults_FutureMappedResults_1getDirect(JNIEnv* jenv,
jobject,
jlong future,
jobject jbuffer,
jint bufferCapacity) {
if (!future) {
throwParamNotNull(jenv);
return;
}
uint8_t* buffer = (uint8_t*)jenv->GetDirectBufferAddress(jbuffer);
if (!buffer) {
if (!jenv->ExceptionOccurred())
throwRuntimeEx(jenv, "Error getting handle to native resources");
return;
}
FDBFuture* f = (FDBFuture*)future;
const FDBMappedKeyValue* kvms;
int count;
fdb_bool_t more;
fdb_error_t err = fdb_future_get_mappedkeyvalue_array(f, &kvms, &count, &more);
if (err) {
safeThrow(jenv, getThrowable(jenv, err));
return;
}
int totalCapacityNeeded = 2 * sizeof(jint);
for (int i = 0; i < count; i++) {
const FDBMappedKeyValue& kvm = kvms[i];
totalCapacityNeeded += kvm.key.key_length + kvm.value.key_length + kvm.getRange.begin.key.key_length +
kvm.getRange.end.key.key_length +
5 * sizeof(jint); // Besides the 4 lengths above, also one for kvm_count.
int kvm_count = kvm.getRange.m_size;
for (int i = 0; i < kvm_count; i++) {
auto kv = kvm.getRange.data[i];
totalCapacityNeeded += kv.key_length + kv.value_length + 2 * sizeof(jint);
}
if (bufferCapacity < totalCapacityNeeded) {
count = i; /* Only fit first `i` K/V pairs */
more = true;
break;
}
}
int offset = 0;
// First copy RangeResultSummary, i.e. [keyCount, more]
memcpy(buffer + offset, &count, sizeof(jint));
offset += sizeof(jint);
memcpy(buffer + offset, &more, sizeof(jint));
offset += sizeof(jint);
for (int i = 0; i < count; i++) {
const FDBMappedKeyValue& kvm = kvms[i];
memcpyString(buffer, offset, kvm.key);
memcpyString(buffer, offset, kvm.value);
memcpyString(buffer, offset, kvm.getRange.begin.key);
memcpyString(buffer, offset, kvm.getRange.end.key);
int kvm_count = kvm.getRange.m_size;
memcpy(buffer + offset, &kvm_count, sizeof(jint));
offset += sizeof(jint);
for (int i = 0; i < kvm_count; i++) {
auto kv = kvm.getRange.data[i];
memcpyStringInner(buffer, offset, kv.key, kv.key_length);
memcpyStringInner(buffer, offset, kv.value, kv.value_length);
}
}
}
JNIEXPORT jlong JNICALL
Java_com_apple_foundationdb_FDBTransaction_Transaction_1getEstimatedRangeSizeBytes(JNIEnv* jenv,
jobject,
@ -1396,6 +1606,16 @@ jint JNI_OnLoad(JavaVM* vm, void* reserved) {
range_result_init = env->GetMethodID(local_range_result_class, "<init>", "([B[IZ)V");
range_result_class = (jclass)(env)->NewGlobalRef(local_range_result_class);
jclass local_mapped_range_result_class = env->FindClass("com/apple/foundationdb/MappedRangeResult");
mapped_range_result_init =
env->GetMethodID(local_mapped_range_result_class, "<init>", "([Lcom/apple/foundationdb/MappedKeyValue;Z)V");
mapped_range_result_class = (jclass)(env)->NewGlobalRef(local_mapped_range_result_class);
jclass local_mapped_key_value_class = env->FindClass("com/apple/foundationdb/MappedKeyValue");
mapped_key_value_from_bytes = env->GetStaticMethodID(
local_mapped_key_value_class, "fromBytes", "([B[I)Lcom/apple/foundationdb/MappedKeyValue;");
mapped_key_value_class = (jclass)(env)->NewGlobalRef(local_mapped_key_value_class);
jclass local_key_array_result_class = env->FindClass("com/apple/foundationdb/KeyArrayResult");
key_array_result_init = env->GetMethodID(local_key_array_result_class, "<init>", "([B[I)V");
key_array_result_class = (jclass)(env)->NewGlobalRef(local_key_array_result_class);
@ -1424,6 +1644,12 @@ void JNI_OnUnload(JavaVM* vm, void* reserved) {
if (range_result_class != JNI_NULL) {
env->DeleteGlobalRef(range_result_class);
}
if (mapped_range_result_class != JNI_NULL) {
env->DeleteGlobalRef(mapped_range_result_class);
}
if (mapped_key_value_class != JNI_NULL) {
env->DeleteGlobalRef(mapped_key_value_class);
}
if (string_class != JNI_NULL) {
env->DeleteGlobalRef(string_class);
}

View File

@ -1,5 +1,5 @@
/*
* RangeAndFlatMapQueryIntegrationTest.java
* MappedRangeQueryIntegrationTest.java
*
* This source file is part of the FoundationDB open source project
*
@ -40,7 +40,7 @@ import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
@ExtendWith(RequiresDatabase.class)
class RangeAndFlatMapQueryIntegrationTest {
class MappedRangeQueryIntegrationTest {
private static final FDB fdb = FDB.selectAPIVersion(710);
public String databaseArg = null;
private Database openFDB() { return fdb.open(databaseArg); }
@ -67,16 +67,27 @@ class RangeAndFlatMapQueryIntegrationTest {
static private String indexKey(int i) { return String.format("index-key-of-record-%08d", i); }
static private String dataOfRecord(int i) { return String.format("data-of-record-%08d", i); }
static byte[] MAPPER = Tuple.from(PREFIX, RECORD, "{K[3]}").pack();
static byte[] MAPPER = Tuple.from(PREFIX, RECORD, "{K[3]}", "{...}").pack();
static int SPLIT_SIZE = 3;
static private byte[] indexEntryKey(final int i) {
return Tuple.from(PREFIX, INDEX, indexKey(i), primaryKey(i)).pack();
}
static private byte[] recordKey(final int i) { return Tuple.from(PREFIX, RECORD, primaryKey(i)).pack(); }
static private byte[] recordValue(final int i) { return Tuple.from(dataOfRecord(i)).pack(); }
static private byte[] recordKeyPrefix(final int i) {
return Tuple.from(PREFIX, RECORD, primaryKey(i)).pack();
}
static private byte[] recordKey(final int i, final int split) {
return Tuple.from(PREFIX, RECORD, primaryKey(i), split).pack();
}
static private byte[] recordValue(final int i, final int split) {
return Tuple.from(dataOfRecord(i), split).pack();
}
static private void insertRecordWithIndex(final Transaction tr, final int i) {
tr.set(indexEntryKey(i), EMPTY);
tr.set(recordKey(i), recordValue(i));
for (int split = 0; split < SPLIT_SIZE; split++) {
tr.set(recordKey(i, split), recordValue(i, split));
}
}
private static String getArgFromEnv() {
@ -86,7 +97,7 @@ class RangeAndFlatMapQueryIntegrationTest {
return cluster;
}
public static void main(String[] args) throws Exception {
final RangeAndFlatMapQueryIntegrationTest test = new RangeAndFlatMapQueryIntegrationTest();
final MappedRangeQueryIntegrationTest test = new MappedRangeQueryIntegrationTest();
test.databaseArg = getArgFromEnv();
test.clearDatabase();
test.comparePerformance();
@ -94,21 +105,21 @@ class RangeAndFlatMapQueryIntegrationTest {
}
int numRecords = 10000;
int numQueries = 10000;
int numQueries = 1;
int numRecordsPerQuery = 100;
boolean validate = false;
boolean validate = true;
@Test
void comparePerformance() {
FDB fdb = FDB.selectAPIVersion(710);
try (Database db = openFDB()) {
insertRecordsWithIndexes(numRecords, db);
instrument(rangeQueryAndGet, "rangeQueryAndGet", db);
instrument(rangeQueryAndFlatMap, "rangeQueryAndFlatMap", db);
instrument(rangeQueryAndThenRangeQueries, "rangeQueryAndThenRangeQueries", db);
instrument(mappedRangeQuery, "mappedRangeQuery", db);
}
}
private void instrument(final RangeQueryWithIndex query, final String name, final Database db) {
System.out.printf("Starting %s (numQueries:%d, numRecordsPerQuery:%d)\n", name, numQueries, numRecordsPerQuery);
System.out.printf("Starting %s (numQueries:%d, numRecordsPerQuery:%d, validation:%s)\n", name, numQueries, numRecordsPerQuery, validate ? "on" : "off");
long startTime = System.currentTimeMillis();
for (int queryId = 0; queryId < numQueries; queryId++) {
int begin = ThreadLocalRandom.current().nextInt(numRecords - numRecordsPerQuery);
@ -140,7 +151,7 @@ class RangeAndFlatMapQueryIntegrationTest {
void run(int begin, int end, Database db);
}
RangeQueryWithIndex rangeQueryAndGet = (int begin, int end, Database db) -> db.run(tr -> {
RangeQueryWithIndex rangeQueryAndThenRangeQueries = (int begin, int end, Database db) -> db.run(tr -> {
try {
List<KeyValue> kvs = tr.getRange(KeySelector.firstGreaterOrEqual(indexEntryKey(begin)),
KeySelector.firstGreaterOrEqual(indexEntryKey(end)),
@ -150,22 +161,25 @@ class RangeAndFlatMapQueryIntegrationTest {
Assertions.assertEquals(end - begin, kvs.size());
// Get the records of each index entry IN PARALLEL.
List<CompletableFuture<byte[]>> resultFutures = new ArrayList<>();
List<CompletableFuture<List<KeyValue>>> resultFutures = new ArrayList<>();
// In reality, we need to get the record key by parsing the index entry key. But considering this is a
// performance test, we just ignore the returned key and simply generate it from recordKey.
for (int id = begin; id < end; id++) {
resultFutures.add(tr.get(recordKey(id)));
resultFutures.add(tr.getRange(Range.startsWith(recordKeyPrefix(id)),
ReadTransaction.ROW_LIMIT_UNLIMITED, false, StreamingMode.WANT_ALL).asList());
}
AsyncUtil.whenAll(resultFutures).get();
if (validate) {
final Iterator<KeyValue> indexes = kvs.iterator();
final Iterator<CompletableFuture<byte[]>> records = resultFutures.iterator();
final Iterator<CompletableFuture<List<KeyValue>>> records = resultFutures.iterator();
for (int id = begin; id < end; id++) {
Assertions.assertTrue(indexes.hasNext());
assertByteArrayEquals(indexEntryKey(id), indexes.next().getKey());
Assertions.assertTrue(records.hasNext());
assertByteArrayEquals(recordValue(id), records.next().get());
List<KeyValue> rangeResult = records.next().get();
validateRangeResult(id, rangeResult);
}
Assertions.assertFalse(indexes.hasNext());
Assertions.assertFalse(records.hasNext());
@ -176,23 +190,32 @@ class RangeAndFlatMapQueryIntegrationTest {
return null;
});
RangeQueryWithIndex rangeQueryAndFlatMap = (int begin, int end, Database db) -> db.run(tr -> {
RangeQueryWithIndex mappedRangeQuery = (int begin, int end, Database db) -> db.run(tr -> {
try {
tr.options().setReadYourWritesDisable();
List<KeyValue> kvs =
tr.snapshot()
.getRangeAndFlatMap(KeySelector.firstGreaterOrEqual(indexEntryKey(begin)),
KeySelector.firstGreaterOrEqual(indexEntryKey(end)), MAPPER,
ReadTransaction.ROW_LIMIT_UNLIMITED, false, StreamingMode.WANT_ALL)
List<MappedKeyValue> kvs =
tr.getMappedRange(KeySelector.firstGreaterOrEqual(indexEntryKey(begin)),
KeySelector.firstGreaterOrEqual(indexEntryKey(end)), MAPPER,
ReadTransaction.ROW_LIMIT_UNLIMITED, false, StreamingMode.WANT_ALL)
.asList()
.get();
Assertions.assertEquals(end - begin, kvs.size());
if (validate) {
final Iterator<KeyValue> results = kvs.iterator();
final Iterator<MappedKeyValue> results = kvs.iterator();
for (int id = begin; id < end; id++) {
Assertions.assertTrue(results.hasNext());
assertByteArrayEquals(recordValue(id), results.next().getValue());
MappedKeyValue mappedKeyValue = results.next();
assertByteArrayEquals(indexEntryKey(id), mappedKeyValue.getKey());
assertByteArrayEquals(EMPTY, mappedKeyValue.getValue());
assertByteArrayEquals(indexEntryKey(id), mappedKeyValue.getKey());
byte[] prefix = recordKeyPrefix(id);
assertByteArrayEquals(prefix, mappedKeyValue.getRangeBegin());
prefix[prefix.length - 1] = (byte)0x01;
assertByteArrayEquals(prefix, mappedKeyValue.getRangeEnd());
List<KeyValue> rangeResult = mappedKeyValue.getRangeResult();
validateRangeResult(id, rangeResult);
}
Assertions.assertFalse(results.hasNext());
}
@ -202,55 +225,16 @@ class RangeAndFlatMapQueryIntegrationTest {
return null;
});
void validateRangeResult(int id, List<KeyValue> rangeResult) {
Assertions.assertEquals(rangeResult.size(), SPLIT_SIZE);
for (int split = 0; split < SPLIT_SIZE; split++) {
KeyValue keyValue = rangeResult.get(split);
assertByteArrayEquals(recordKey(id, split), keyValue.getKey());
assertByteArrayEquals(recordValue(id, split), keyValue.getValue());
}
}
void assertByteArrayEquals(byte[] expected, byte[] actual) {
Assertions.assertEquals(ByteArrayUtil.printable(expected), ByteArrayUtil.printable(actual));
}
@Test
void rangeAndFlatMapQueryOverMultipleRows() throws Exception {
try (Database db = openFDB()) {
insertRecordsWithIndexes(3, db);
List<byte[]> expected_data_of_records = new ArrayList<>();
for (int i = 0; i <= 1; i++) {
expected_data_of_records.add(recordValue(i));
}
db.run(tr -> {
// getRangeAndFlatMap is only support without RYW. This is a must!!!
tr.options().setReadYourWritesDisable();
// getRangeAndFlatMap is only supported with snapshot.
Iterator<KeyValue> kvs =
tr.snapshot()
.getRangeAndFlatMap(KeySelector.firstGreaterOrEqual(indexEntryKey(0)),
KeySelector.firstGreaterThan(indexEntryKey(1)), MAPPER,
ReadTransaction.ROW_LIMIT_UNLIMITED, false, StreamingMode.WANT_ALL)
.iterator();
Iterator<byte[]> expected_data_of_records_iter = expected_data_of_records.iterator();
while (expected_data_of_records_iter.hasNext()) {
Assertions.assertTrue(kvs.hasNext(), "iterator ended too early");
KeyValue kv = kvs.next();
byte[] actual_data_of_record = kv.getValue();
byte[] expected_data_of_record = expected_data_of_records_iter.next();
// System.out.println("result key:" + ByteArrayUtil.printable(kv.getKey()) + " value:" +
// ByteArrayUtil.printable(kv.getValue())); Output:
// result
// key:\x02prefix\x00\x02INDEX\x00\x02index-key-of-record-0\x00\x02primary-key-of-record-0\x00
// value:\x02data-of-record-0\x00
// result
// key:\x02prefix\x00\x02INDEX\x00\x02index-key-of-record-1\x00\x02primary-key-of-record-1\x00
// value:\x02data-of-record-1\x00
// For now, we don't guarantee what that the returned keys mean.
Assertions.assertArrayEquals(expected_data_of_record, actual_data_of_record,
"Incorrect data of record!");
}
Assertions.assertFalse(kvs.hasNext(), "Iterator returned too much data");
return null;
});
}
}
}

View File

@ -89,8 +89,6 @@ public class FakeFDBTransaction extends FDBTransaction {
@Override
protected FutureResults getRange_internal(KeySelector begin, KeySelector end,
// TODO: map is not supported in FakeFDBTransaction yet.
byte[] mapper, // Nullable
int rowLimit, int targetBytes, int streamingMode, int iteration,
boolean isSnapshot, boolean reverse) {
numRangeCalls++;

View File

@ -32,11 +32,11 @@ import java.util.NoSuchElementException;
* The serialization format of result is =>
* [int keyCount, boolean more, ListOf<(int keyLen, int valueLen, byte[] key, byte[] value)>]
*/
class DirectBufferIterator implements Iterator<KeyValue>, AutoCloseable {
private ByteBuffer byteBuffer;
private int current = 0;
private int keyCount = -1;
private boolean more = false;
abstract class DirectBufferIterator implements AutoCloseable {
protected ByteBuffer byteBuffer;
protected int current = 0;
protected int keyCount = -1;
protected boolean more = false;
public DirectBufferIterator(ByteBuffer buffer) {
byteBuffer = buffer;
@ -55,31 +55,11 @@ class DirectBufferIterator implements Iterator<KeyValue>, AutoCloseable {
return keyCount > -1;
}
@Override
public boolean hasNext() {
assert (hasResultReady());
return current < keyCount;
}
@Override
public KeyValue next() {
assert (hasResultReady()); // Must be called once its ready.
if (!hasNext()) {
throw new NoSuchElementException();
}
final int keyLen = byteBuffer.getInt();
final int valueLen = byteBuffer.getInt();
byte[] key = new byte[keyLen];
byteBuffer.get(key);
byte[] value = new byte[valueLen];
byteBuffer.get(value);
current += 1;
return new KeyValue(key, value);
}
public ByteBuffer getBuffer() {
return byteBuffer;
}

View File

@ -92,12 +92,10 @@ class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionC
}
@Override
public AsyncIterable<KeyValue> getRangeAndFlatMap(KeySelector begin, KeySelector end, byte[] mapper, int limit,
boolean reverse, StreamingMode mode) {
if (mapper == null) {
throw new IllegalArgumentException("Mapper must be non-null");
}
return new RangeQuery(FDBTransaction.this, true, begin, end, mapper, limit, reverse, mode, eventKeeper);
public AsyncIterable<MappedKeyValue> getMappedRange(KeySelector begin, KeySelector end, byte[] mapper,
int limit, boolean reverse, StreamingMode mode) {
throw new UnsupportedOperationException("getMappedRange is only supported in serializable");
}
///////////////////
@ -348,9 +346,12 @@ class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionC
}
@Override
public AsyncIterable<KeyValue> getRangeAndFlatMap(KeySelector begin, KeySelector end, byte[] mapper, int limit,
boolean reverse, StreamingMode mode) {
throw new UnsupportedOperationException("getRangeAndFlatMap is only supported in snapshot");
public AsyncIterable<MappedKeyValue> getMappedRange(KeySelector begin, KeySelector end, byte[] mapper,
int limit, boolean reverse, StreamingMode mode) {
if (mapper == null) {
throw new IllegalArgumentException("Mapper must be non-null");
}
return new MappedRangeQuery(FDBTransaction.this, false, begin, end, mapper, limit, reverse, mode, eventKeeper);
}
///////////////////
@ -431,7 +432,6 @@ class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionC
// Users of this function must close the returned FutureResults when finished
protected FutureResults getRange_internal(KeySelector begin, KeySelector end,
byte[] mapper, // Nullable
int rowLimit, int targetBytes, int streamingMode, int iteration,
boolean isSnapshot, boolean reverse) {
if (eventKeeper != null) {
@ -443,14 +443,33 @@ class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionC
" -- range get: (%s, %s) limit: %d, bytes: %d, mode: %d, iteration: %d, snap: %s, reverse %s",
begin.toString(), end.toString(), rowLimit, targetBytes, streamingMode,
iteration, Boolean.toString(isSnapshot), Boolean.toString(reverse)));*/
return new FutureResults(
mapper == null
? Transaction_getRange(getPtr(), begin.getKey(), begin.orEqual(), begin.getOffset(), end.getKey(),
end.orEqual(), end.getOffset(), rowLimit, targetBytes, streamingMode,
iteration, isSnapshot, reverse)
: Transaction_getRangeAndFlatMap(getPtr(), begin.getKey(), begin.orEqual(), begin.getOffset(),
end.getKey(), end.orEqual(), end.getOffset(), mapper, rowLimit,
targetBytes, streamingMode, iteration, isSnapshot, reverse),
return new FutureResults(Transaction_getRange(getPtr(), begin.getKey(), begin.orEqual(), begin.getOffset(),
end.getKey(), end.orEqual(), end.getOffset(), rowLimit,
targetBytes, streamingMode, iteration, isSnapshot, reverse),
FDB.instance().isDirectBufferQueriesEnabled(), executor, eventKeeper);
} finally {
pointerReadLock.unlock();
}
}
// Users of this function must close the returned FutureResults when finished
protected FutureMappedResults getMappedRange_internal(KeySelector begin, KeySelector end,
byte[] mapper, // Nullable
int rowLimit, int targetBytes, int streamingMode,
int iteration, boolean isSnapshot, boolean reverse) {
if (eventKeeper != null) {
eventKeeper.increment(Events.JNI_CALL);
}
pointerReadLock.lock();
try {
/*System.out.println(String.format(
" -- range get: (%s, %s) limit: %d, bytes: %d, mode: %d, iteration: %d, snap: %s, reverse %s",
begin.toString(), end.toString(), rowLimit, targetBytes, streamingMode,
iteration, Boolean.toString(isSnapshot), Boolean.toString(reverse)));*/
return new FutureMappedResults(
Transaction_getMappedRange(getPtr(), begin.getKey(), begin.orEqual(), begin.getOffset(),
end.getKey(), end.orEqual(), end.getOffset(), mapper, rowLimit,
targetBytes, streamingMode, iteration, isSnapshot, reverse),
FDB.instance().isDirectBufferQueriesEnabled(), executor, eventKeeper);
} finally {
pointerReadLock.unlock();
@ -790,7 +809,7 @@ class FDBTransaction extends NativeObjectWrapper implements Transaction, OptionC
byte[] keyEnd, boolean orEqualEnd, int offsetEnd,
int rowLimit, int targetBytes, int streamingMode, int iteration,
boolean isSnapshot, boolean reverse);
private native long Transaction_getRangeAndFlatMap(long cPtr, byte[] keyBegin, boolean orEqualBegin,
private native long Transaction_getMappedRange(long cPtr, byte[] keyBegin, boolean orEqualBegin,
int offsetBegin, byte[] keyEnd, boolean orEqualEnd,
int offsetEnd,
byte[] mapper, // Nonnull

View File

@ -0,0 +1,87 @@
/*
* FutureMappedResults.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.apple.foundationdb;
import java.nio.ByteBuffer;
import java.util.concurrent.Executor;
import com.apple.foundationdb.EventKeeper.Events;
class FutureMappedResults extends NativeFuture<MappedRangeResultInfo> {
private final EventKeeper eventKeeper;
FutureMappedResults(long cPtr, boolean enableDirectBufferQueries, Executor executor, EventKeeper eventKeeper) {
super(cPtr);
registerMarshalCallback(executor);
this.enableDirectBufferQueries = enableDirectBufferQueries;
this.eventKeeper = eventKeeper;
}
@Override
protected void postMarshal(MappedRangeResultInfo rri) {
// We can't close because this class actually marshals on-demand
}
@Override
protected MappedRangeResultInfo getIfDone_internal(long cPtr) throws FDBException {
if (eventKeeper != null) {
eventKeeper.increment(Events.JNI_CALL);
}
FDBException err = Future_getError(cPtr);
if (err != null && !err.isSuccess()) {
throw err;
}
return new MappedRangeResultInfo(this);
}
public MappedRangeResult getResults() {
ByteBuffer buffer = enableDirectBufferQueries ? DirectBufferPool.getInstance().poll() : null;
if (buffer != null && eventKeeper != null) {
eventKeeper.increment(Events.RANGE_QUERY_DIRECT_BUFFER_HIT);
eventKeeper.increment(Events.JNI_CALL);
} else if (eventKeeper != null) {
eventKeeper.increment(Events.RANGE_QUERY_DIRECT_BUFFER_MISS);
eventKeeper.increment(Events.JNI_CALL);
}
try {
pointerReadLock.lock();
if (buffer != null) {
try (MappedRangeResultDirectBufferIterator directIterator =
new MappedRangeResultDirectBufferIterator(buffer)) {
FutureMappedResults_getDirect(getPtr(), directIterator.getBuffer(),
directIterator.getBuffer().capacity());
return new MappedRangeResult(directIterator);
}
} else {
return FutureMappedResults_get(getPtr());
}
} finally {
pointerReadLock.unlock();
}
}
private boolean enableDirectBufferQueries = false;
private native MappedRangeResult FutureMappedResults_get(long cPtr) throws FDBException;
private native void FutureMappedResults_getDirect(long cPtr, ByteBuffer buffer, int capacity) throws FDBException;
}

View File

@ -66,7 +66,7 @@ class FutureResults extends NativeFuture<RangeResultInfo> {
try {
pointerReadLock.lock();
if (buffer != null) {
try (DirectBufferIterator directIterator = new DirectBufferIterator(buffer)) {
try (RangeResultDirectBufferIterator directIterator = new RangeResultDirectBufferIterator(buffer)) {
FutureResults_getDirect(getPtr(), directIterator.getBuffer(), directIterator.getBuffer().capacity());
return new RangeResult(directIterator);
}

View File

@ -20,6 +20,8 @@
package com.apple.foundationdb;
import com.apple.foundationdb.tuple.ByteArrayUtil;
import java.util.Arrays;
/**
@ -77,4 +79,13 @@ public class KeyValue {
public int hashCode() {
return 17 + (37 * Arrays.hashCode(key) + Arrays.hashCode(value));
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("KeyValue{");
sb.append("key=").append(ByteArrayUtil.printable(key));
sb.append(", value=").append(ByteArrayUtil.printable(value));
sb.append('}');
return sb.toString();
}
}

View File

@ -0,0 +1,96 @@
/*
* MappedKeyValue.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.apple.foundationdb;
import com.apple.foundationdb.tuple.ByteArrayUtil;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
public class MappedKeyValue extends KeyValue {
private final byte[] rangeBegin;
private final byte[] rangeEnd;
private final List<KeyValue> rangeResult;
MappedKeyValue(byte[] key, byte[] value, byte[] rangeBegin, byte[] rangeEnd, List<KeyValue> rangeResult) {
super(key, value);
this.rangeBegin = rangeBegin;
this.rangeEnd = rangeEnd;
this.rangeResult = rangeResult;
}
public byte[] getRangeBegin() { return rangeBegin; }
public byte[] getRangeEnd() { return rangeEnd; }
public List<KeyValue> getRangeResult() { return rangeResult; }
public static MappedKeyValue fromBytes(byte[] bytes, int[] lengths) {
// Lengths include: key, value, rangeBegin, rangeEnd, count * (underlying key, underlying value)
if (lengths.length < 4) {
throw new IllegalArgumentException("There needs to be at least 4 lengths to cover the metadata");
}
Offset offset = new Offset();
byte[] key = takeBytes(offset, bytes, lengths);
byte[] value = takeBytes(offset, bytes, lengths);
byte[] rangeBegin = takeBytes(offset, bytes, lengths);
byte[] rangeEnd = takeBytes(offset, bytes, lengths);
if ((lengths.length - 4) % 2 != 0) {
throw new IllegalArgumentException("There needs to be an even number of lengths!");
}
int count = (lengths.length - 4) / 2;
List<KeyValue> rangeResult = new ArrayList<>(count);
for (int i = 0; i < count; i++) {
byte[] k = takeBytes(offset, bytes, lengths);
byte[] v = takeBytes(offset, bytes, lengths);
rangeResult.add(new KeyValue(k, v));
}
return new MappedKeyValue(key, value, rangeBegin, rangeEnd, rangeResult);
}
static class Offset {
int bytes = 0;
int lengths = 0;
}
static byte[] takeBytes(Offset offset, byte[] bytes, int[] lengths) {
int len = lengths[offset.lengths];
byte[] b = new byte[len];
System.arraycopy(bytes, offset.bytes, b, 0, len);
offset.lengths++;
offset.bytes += len;
return b;
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("MappedKeyValue{");
sb.append("rangeBegin=").append(ByteArrayUtil.printable(rangeBegin));
sb.append(", rangeEnd=").append(ByteArrayUtil.printable(rangeEnd));
sb.append(", rangeResult=").append(rangeResult);
sb.append('}');
return super.toString() + "->" + sb.toString();
}
}

View File

@ -0,0 +1,333 @@
/*
* RangeQuery.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.apple.foundationdb;
import com.apple.foundationdb.EventKeeper.Events;
import com.apple.foundationdb.async.AsyncIterable;
import com.apple.foundationdb.async.AsyncIterator;
import com.apple.foundationdb.async.AsyncUtil;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.concurrent.CancellationException;
import java.util.concurrent.CompletableFuture;
import java.util.function.BiConsumer;
// TODO: Share code with RangeQuery?
/**
* Represents a query against FoundationDB for a range of keys. The
* result of this query can be iterated over in a blocking fashion with a call to
* {@link #iterator()} (as specified by {@link Iterable}).
* If the calling program uses an asynchronous paradigm, a non-blocking
* {@link AsyncIterator} is returned from {@link #iterator()}. Both of these
* constructions will not begin to query the database until the first call to
* {@code hasNext()}. As the query uses its {@link Transaction} of origin to fetch
* all the data, the use of this query object must not span more than a few seconds.
*
* <br><br><b>NOTE:</b> although resulting {@code Iterator}s do support the {@code remove()}
* operation, the remove is not durable until {@code commit()} on the {@code Transaction}
* that yielded this query returns <code>true</code>.
*/
class MappedRangeQuery implements AsyncIterable<MappedKeyValue> {
private final FDBTransaction tr;
private final KeySelector begin;
private final KeySelector end;
private final byte[] mapper; // Nonnull
private final boolean snapshot;
private final int rowLimit;
private final boolean reverse;
private final StreamingMode streamingMode;
private final EventKeeper eventKeeper;
MappedRangeQuery(FDBTransaction transaction, boolean isSnapshot, KeySelector begin, KeySelector end, byte[] mapper,
int rowLimit, boolean reverse, StreamingMode streamingMode, EventKeeper eventKeeper) {
this.tr = transaction;
this.begin = begin;
this.end = end;
this.mapper = mapper;
this.snapshot = isSnapshot;
this.rowLimit = rowLimit;
this.reverse = reverse;
this.streamingMode = streamingMode;
this.eventKeeper = eventKeeper;
}
/**
* Returns all the results from the range requested as a {@code List}. If there were no
* limits on the original query and there is a large amount of data in the database
* this call could use a very large amount of memory.
*
* @return a {@code CompletableFuture} that will be set to the contents of the database
* constrained by the query parameters.
*/
@Override
public CompletableFuture<List<MappedKeyValue>> asList() {
StreamingMode mode = this.streamingMode;
if (mode == StreamingMode.ITERATOR) mode = (this.rowLimit == 0) ? StreamingMode.WANT_ALL : StreamingMode.EXACT;
// if the streaming mode is EXACT, try and grab things as one chunk
if (mode == StreamingMode.EXACT) {
FutureMappedResults range =
tr.getMappedRange_internal(this.begin, this.end, this.mapper, this.rowLimit, 0,
StreamingMode.EXACT.code(), 1, this.snapshot, this.reverse);
return range.thenApply(result -> result.get().values).whenComplete((result, e) -> range.close());
}
// If the streaming mode is not EXACT, simply collect the results of an
// iteration into a list
return AsyncUtil.collect(
new MappedRangeQuery(tr, snapshot, begin, end, mapper, rowLimit, reverse, mode, eventKeeper),
tr.getExecutor());
}
/**
* Returns an {@code Iterator} over the results of this query against FoundationDB.
*
* @return an {@code Iterator} over type {@code MappedKeyValue}.
*/
@Override
public AsyncRangeIterator iterator() {
return new AsyncRangeIterator(this.rowLimit, this.reverse, this.streamingMode);
}
private class AsyncRangeIterator implements AsyncIterator<MappedKeyValue> {
// immutable aspects of this iterator
private final boolean rowsLimited;
private final boolean reverse;
private final StreamingMode streamingMode;
// There is the chance for parallelism in the two "chunks" for fetched data
private MappedRangeResult chunk = null;
private MappedRangeResult nextChunk = null;
private boolean fetchOutstanding = false;
private byte[] prevKey = null;
private int index = 0;
private int iteration = 0;
private KeySelector begin;
private KeySelector end;
private int rowsRemaining;
private FutureMappedResults fetchingChunk;
private CompletableFuture<Boolean> nextFuture;
private boolean isCancelled = false;
private AsyncRangeIterator(int rowLimit, boolean reverse, StreamingMode streamingMode) {
this.begin = MappedRangeQuery.this.begin;
this.end = MappedRangeQuery.this.end;
this.rowsLimited = rowLimit != 0;
this.rowsRemaining = rowLimit;
this.reverse = reverse;
this.streamingMode = streamingMode;
startNextFetch();
}
private synchronized boolean mainChunkIsTheLast() { return !chunk.more || (rowsLimited && rowsRemaining < 1); }
class FetchComplete implements BiConsumer<MappedRangeResultInfo, Throwable> {
final FutureMappedResults fetchingChunk;
final CompletableFuture<Boolean> promise;
FetchComplete(FutureMappedResults fetch, CompletableFuture<Boolean> promise) {
this.fetchingChunk = fetch;
this.promise = promise;
}
@Override
public void accept(MappedRangeResultInfo data, Throwable error) {
try {
if (error != null) {
if (eventKeeper != null) {
eventKeeper.increment(Events.RANGE_QUERY_CHUNK_FAILED);
}
promise.completeExceptionally(error);
if (error instanceof Error) {
throw(Error) error;
}
return;
}
final MappedRangeResult rangeResult = data.get();
final RangeResultSummary summary = rangeResult.getSummary();
if (summary.lastKey == null) {
promise.complete(Boolean.FALSE);
return;
}
synchronized (MappedRangeQuery.AsyncRangeIterator.this) {
fetchOutstanding = false;
// adjust the total number of rows we should ever fetch
rowsRemaining -= summary.keyCount;
// set up the next fetch
if (reverse) {
end = KeySelector.firstGreaterOrEqual(summary.lastKey);
} else {
begin = KeySelector.firstGreaterThan(summary.lastKey);
}
// If this is the first fetch or the main chunk is exhausted
if (chunk == null || index == chunk.values.size()) {
nextChunk = null;
chunk = rangeResult;
index = 0;
} else {
nextChunk = rangeResult;
}
}
promise.complete(Boolean.TRUE);
} finally {
fetchingChunk.close();
}
}
}
private synchronized void startNextFetch() {
if (fetchOutstanding)
throw new IllegalStateException("Reentrant call not allowed"); // This can not be called reentrantly
if (isCancelled) return;
if (chunk != null && mainChunkIsTheLast()) return;
fetchOutstanding = true;
nextChunk = null;
nextFuture = new CompletableFuture<>();
final long sTime = System.nanoTime();
fetchingChunk = tr.getMappedRange_internal(begin, end, mapper, rowsLimited ? rowsRemaining : 0, 0,
streamingMode.code(), ++iteration, snapshot, reverse);
BiConsumer<MappedRangeResultInfo, Throwable> cons = new FetchComplete(fetchingChunk, nextFuture);
if (eventKeeper != null) {
eventKeeper.increment(Events.RANGE_QUERY_FETCHES);
cons = cons.andThen((r, t) -> {
eventKeeper.timeNanos(Events.RANGE_QUERY_FETCH_TIME_NANOS, System.nanoTime() - sTime);
});
}
fetchingChunk.whenComplete(cons);
}
@Override
public synchronized CompletableFuture<Boolean> onHasNext() {
if (isCancelled) throw new CancellationException();
// This will only happen before the first fetch has completed
if (chunk == null) {
return nextFuture;
}
// We have a chunk and are still working though it
if (index < chunk.values.size()) {
return AsyncUtil.READY_TRUE;
}
// If we are at the end of the current chunk there is either:
// - no more data -or-
// - we are already fetching the next block
return mainChunkIsTheLast() ? AsyncUtil.READY_FALSE : nextFuture;
}
@Override
public boolean hasNext() {
return onHasNext().join();
}
@Override
public MappedKeyValue next() {
CompletableFuture<Boolean> nextFuture;
synchronized (this) {
if (isCancelled) throw new CancellationException();
// at least the first chunk has been fetched and there is at least one
// available result
if (chunk != null && index < chunk.values.size()) {
// If this is the first call to next() on a chunk, then we will want to
// start fetching the data for the next block
boolean initialNext = index == 0;
MappedKeyValue result = chunk.values.get(index);
prevKey = result.getKey();
index++;
if (eventKeeper != null) {
// We record the BYTES_FETCHED here, rather than at a lower level,
// because some parts of the construction of a MappedRangeResult occur underneath
// the JNI boundary, and we don't want to pass the eventKeeper down there
// (note: account for the length fields as well when recording the bytes
// fetched)
eventKeeper.count(Events.BYTES_FETCHED, result.getKey().length + result.getValue().length + 8);
eventKeeper.increment(Events.RANGE_QUERY_RECORDS_FETCHED);
}
// If this is the first call to next() on a chunk there cannot
// be another waiting, since we could not have issued a request
assert (!(initialNext && nextChunk != null));
// we are at the end of the current chunk and there is more to be had already
if (index == chunk.values.size() && nextChunk != null) {
index = 0;
chunk = nextChunk;
nextChunk = null;
}
if (initialNext) {
startNextFetch();
}
return result;
}
nextFuture = onHasNext();
}
// If there was no result ready then we need to wait on the future
// and return the proper result, throwing if there are no more elements
return nextFuture
.thenApply(hasNext -> {
if (hasNext) {
return next();
}
throw new NoSuchElementException();
})
.join();
}
@Override
public synchronized void remove() {
if (prevKey == null) throw new IllegalStateException("No value has been fetched from database");
tr.clear(prevKey);
}
@Override
public synchronized void cancel() {
isCancelled = true;
nextFuture.cancel(true);
fetchingChunk.cancel(true);
}
}
}

View File

@ -0,0 +1,64 @@
/*
* MappedRangeResult.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.apple.foundationdb;
import com.apple.foundationdb.tuple.ByteArrayUtil;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
class MappedRangeResult {
final List<MappedKeyValue> values;
final boolean more;
public MappedRangeResult(MappedKeyValue[] values, boolean more) {
this.values = Arrays.asList(values);
this.more = more;
}
MappedRangeResult(MappedRangeResultDirectBufferIterator iterator) {
iterator.readResultsSummary();
more = iterator.hasMore();
int count = iterator.count();
values = new ArrayList<>(count);
for (int i = 0; i < count; ++i) {
values.add(iterator.next());
}
}
public RangeResultSummary getSummary() {
final int keyCount = values.size();
final byte[] lastKey = keyCount > 0 ? values.get(keyCount - 1).getKey() : null;
return new RangeResultSummary(lastKey, keyCount, more);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("MappedRangeResult{");
sb.append("values=").append(values);
sb.append(", more=").append(more);
sb.append('}');
return sb.toString();
}
}

View File

@ -0,0 +1,71 @@
/*
* MappedRangeResultDirectBufferIterator.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2015-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.apple.foundationdb;
import java.io.Closeable;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
/**
* Holds the direct buffer that is shared with JNI wrapper.
*/
class MappedRangeResultDirectBufferIterator extends DirectBufferIterator implements Iterator<KeyValue> {
MappedRangeResultDirectBufferIterator(ByteBuffer buffer) { super(buffer); }
@Override
public boolean hasNext() {
return super.hasNext();
}
@Override
public MappedKeyValue next() {
assert (hasResultReady()); // Must be called once its ready.
if (!hasNext()) {
throw new NoSuchElementException();
}
final byte[] key = getString();
final byte[] value = getString();
final byte[] rangeBegin = getString();
final byte[] rangeEnd = getString();
final int rangeResultSize = byteBuffer.getInt();
List<KeyValue> rangeResult = new ArrayList();
for (int i = 0; i < rangeResultSize; i++) {
final byte[] k = getString();
final byte[] v = getString();
rangeResult.add(new KeyValue(k, v));
}
current += 1;
return new MappedKeyValue(key, value, rangeBegin, rangeEnd, rangeResult);
}
private byte[] getString() {
final int len = byteBuffer.getInt();
byte[] s = new byte[len];
byteBuffer.get(s);
return s;
}
}

View File

@ -0,0 +1,29 @@
/*
* MappedRangeResultInfo.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.apple.foundationdb;
class MappedRangeResultInfo {
MappedRangeResult get() { return f.getResults(); }
MappedRangeResultInfo(FutureMappedResults f) { this.f = f; }
private FutureMappedResults f;
}

View File

@ -49,19 +49,17 @@ class RangeQuery implements AsyncIterable<KeyValue> {
private final FDBTransaction tr;
private final KeySelector begin;
private final KeySelector end;
private final byte[] mapper; // Nullable
private final boolean snapshot;
private final int rowLimit;
private final boolean reverse;
private final StreamingMode streamingMode;
private final EventKeeper eventKeeper;
RangeQuery(FDBTransaction transaction, boolean isSnapshot, KeySelector begin, KeySelector end, byte[] mapper,
int rowLimit, boolean reverse, StreamingMode streamingMode, EventKeeper eventKeeper) {
RangeQuery(FDBTransaction transaction, boolean isSnapshot, KeySelector begin, KeySelector end, int rowLimit,
boolean reverse, StreamingMode streamingMode, EventKeeper eventKeeper) {
this.tr = transaction;
this.begin = begin;
this.end = end;
this.mapper = mapper;
this.snapshot = isSnapshot;
this.rowLimit = rowLimit;
this.reverse = reverse;
@ -69,12 +67,6 @@ class RangeQuery implements AsyncIterable<KeyValue> {
this.eventKeeper = eventKeeper;
}
// RangeQueryAndFlatMap
RangeQuery(FDBTransaction transaction, boolean isSnapshot, KeySelector begin, KeySelector end, int rowLimit,
boolean reverse, StreamingMode streamingMode, EventKeeper eventKeeper) {
this(transaction, isSnapshot, begin, end, null, rowLimit, reverse, streamingMode, eventKeeper);
}
/**
* Returns all the results from the range requested as a {@code List}. If there were no
* limits on the original query and there is a large amount of data in the database
@ -92,7 +84,7 @@ class RangeQuery implements AsyncIterable<KeyValue> {
// if the streaming mode is EXACT, try and grab things as one chunk
if(mode == StreamingMode.EXACT) {
FutureResults range = tr.getRange_internal(this.begin, this.end, this.mapper, this.rowLimit, 0,
FutureResults range = tr.getRange_internal(this.begin, this.end, this.rowLimit, 0,
StreamingMode.EXACT.code(), 1, this.snapshot, this.reverse);
return range.thenApply(result -> result.get().values)
.whenComplete((result, e) -> range.close());
@ -100,7 +92,7 @@ class RangeQuery implements AsyncIterable<KeyValue> {
// If the streaming mode is not EXACT, simply collect the results of an
// iteration into a list
return AsyncUtil.collect(new RangeQuery(tr, snapshot, begin, end, mapper, rowLimit, reverse, mode, eventKeeper),
return AsyncUtil.collect(new RangeQuery(tr, snapshot, begin, end, rowLimit, reverse, mode, eventKeeper),
tr.getExecutor());
}
@ -229,8 +221,8 @@ class RangeQuery implements AsyncIterable<KeyValue> {
nextFuture = new CompletableFuture<>();
final long sTime = System.nanoTime();
fetchingChunk = tr.getRange_internal(begin, end, mapper, rowsLimited ? rowsRemaining : 0, 0,
streamingMode.code(), ++iteration, snapshot, reverse);
fetchingChunk = tr.getRange_internal(begin, end, rowsLimited ? rowsRemaining : 0, 0, streamingMode.code(),
++iteration, snapshot, reverse);
BiConsumer<RangeResultInfo,Throwable> cons = new FetchComplete(fetchingChunk,nextFuture);
if(eventKeeper!=null){

View File

@ -58,7 +58,7 @@ class RangeResult {
this.more = more;
}
RangeResult(DirectBufferIterator iterator) {
RangeResult(RangeResultDirectBufferIterator iterator) {
iterator.readResultsSummary();
more = iterator.hasMore();

View File

@ -0,0 +1,62 @@
/*
* RangeResultDirectBufferIterator.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2015-2020 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.apple.foundationdb;
import java.io.Closeable;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Iterator;
import java.util.NoSuchElementException;
/**
* Holds the direct buffer that is shared with JNI wrapper. A typical usage is as follows:
*
* The serialization format of result is =>
* [int keyCount, boolean more, ListOf<(int keyLen, int valueLen, byte[] key, byte[] value)>]
*/
class RangeResultDirectBufferIterator extends DirectBufferIterator implements Iterator<KeyValue> {
RangeResultDirectBufferIterator(ByteBuffer buffer) { super(buffer); }
@Override
public boolean hasNext() {
return super.hasNext();
}
@Override
public KeyValue next() {
assert (hasResultReady()); // Must be called once its ready.
if (!hasNext()) {
throw new NoSuchElementException();
}
final int keyLen = byteBuffer.getInt();
final int valueLen = byteBuffer.getInt();
byte[] key = new byte[keyLen];
byteBuffer.get(key);
byte[] value = new byte[valueLen];
byteBuffer.get(value);
current += 1;
return new KeyValue(key, value);
}
}

View File

@ -20,6 +20,8 @@
package com.apple.foundationdb;
import com.apple.foundationdb.tuple.ByteArrayUtil;
class RangeResultSummary {
final byte[] lastKey;
final int keyCount;
@ -30,4 +32,14 @@ class RangeResultSummary {
this.keyCount = keyCount;
this.more = more;
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("RangeResultSummary{");
sb.append("lastKey=").append(ByteArrayUtil.printable(lastKey));
sb.append(", keyCount=").append(keyCount);
sb.append(", more=").append(more);
sb.append('}');
return sb.toString();
}
}

View File

@ -457,8 +457,8 @@ public interface ReadTransaction extends ReadTransactionContext {
* </p>
* @return a handle to access the results of the asynchronous call
*/
AsyncIterable<KeyValue> getRangeAndFlatMap(KeySelector begin, KeySelector end, byte[] mapper, int limit,
boolean reverse, StreamingMode mode);
AsyncIterable<MappedKeyValue> getMappedRange(KeySelector begin, KeySelector end, byte[] mapper, int limit,
boolean reverse, StreamingMode mode);
/**
* Gets an estimate for the number of bytes stored in the given range.

View File

@ -24,7 +24,6 @@ import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Arrays;
/**
* Used to represent values written by versionstamp operations with a {@link Tuple}.
* This wraps a single array which should contain twelve bytes. The first ten bytes
@ -37,7 +36,7 @@ import java.util.Arrays;
* over time. The final two bytes are the "user" version and should be set by the client.
* This allows the user to use this class to impose a total order of items across multiple
* transactions in the database in a consistent and conflict-free way. The user can elect to
* ignore this parameter by instantiating the class with the paramaterless {@link #incomplete() incomplete()}
* ignore this parameter by instantiating the class with the parameterless {@link #incomplete() incomplete()}
* and one-parameter {@link #complete(byte[]) complete} static initializers. If they do so,
* then versions are written with a default (constant) user version.
*

View File

@ -52,7 +52,7 @@ set(JAVA_INTEGRATION_TESTS
src/integration/com/apple/foundationdb/CycleMultiClientIntegrationTest.java
src/integration/com/apple/foundationdb/SidebandMultiThreadClientTest.java
src/integration/com/apple/foundationdb/RepeatableReadMultiThreadClientTest.java
src/integration/com/apple/foundationdb/RangeAndFlatMapQueryIntegrationTest.java
src/integration/com/apple/foundationdb/MappedRangeQueryIntegrationTest.java
)
# Resources that are used in integration testing, but are not explicitly test files (JUnit rules,

View File

@ -129,7 +129,7 @@ function(add_fdb_test)
-n ${test_name}
-b ${PROJECT_BINARY_DIR}
-t ${test_type}
-O ${OLD_FDBSERVER_BINARY}
-O ${OLD_FDBSERVER_BINARY}
--config "@CTEST_CONFIGURATION_TYPE@"
--crash
--aggregate-traces ${TEST_AGGREGATE_TRACES}
@ -404,7 +404,7 @@ endfunction()
# Creates a single cluster before running the specified command (usually a ctest test)
function(add_fdbclient_test)
set(options DISABLED ENABLED)
set(options DISABLED ENABLED DISABLE_LOG_DUMP)
set(oneValueArgs NAME PROCESS_NUMBER TEST_TIMEOUT WORKING_DIRECTORY)
set(multiValueArgs COMMAND)
cmake_parse_arguments(T "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")
@ -423,23 +423,20 @@ function(add_fdbclient_test)
if(NOT T_COMMAND)
message(FATAL_ERROR "COMMAND is a required argument for add_fdbclient_test")
endif()
message(STATUS "Adding Client test ${T_NAME}")
if (T_PROCESS_NUMBER)
add_test(NAME "${T_NAME}"
WORKING_DIRECTORY ${T_WORKING_DIRECTORY}
COMMAND ${Python_EXECUTABLE} ${CMAKE_SOURCE_DIR}/tests/TestRunner/tmp_cluster.py
--build-dir ${CMAKE_BINARY_DIR}
--process-number ${T_PROCESS_NUMBER}
--
${T_COMMAND})
else()
add_test(NAME "${T_NAME}"
WORKING_DIRECTORY ${T_WORKING_DIRECTORY}
COMMAND ${Python_EXECUTABLE} ${CMAKE_SOURCE_DIR}/tests/TestRunner/tmp_cluster.py
--build-dir ${CMAKE_BINARY_DIR}
--
${T_COMMAND})
set(TMP_CLUSTER_CMD ${CMAKE_SOURCE_DIR}/tests/TestRunner/tmp_cluster.py
--build-dir ${CMAKE_BINARY_DIR})
if(T_PROCESS_NUMBER)
list(APPEND TMP_CLUSTER_CMD --process-number ${T_PROCESS_NUMBER})
endif()
if(T_DISABLE_LOG_DUMP)
list(APPEND TMP_CLUSTER_CMD --disable-log-dump)
endif()
message(STATUS "Adding Client test ${T_NAME}")
add_test(NAME "${T_NAME}"
WORKING_DIRECTORY ${T_WORKING_DIRECTORY}
COMMAND ${Python_EXECUTABLE} ${TMP_CLUSTER_CMD}
--
${T_COMMAND})
if (T_TEST_TIMEOUT)
set_tests_properties("${T_NAME}" PROPERTIES TIMEOUT ${T_TEST_TIMEOUT})
else()
@ -449,7 +446,7 @@ function(add_fdbclient_test)
set_tests_properties("${T_NAME}" PROPERTIES ENVIRONMENT UBSAN_OPTIONS=print_stacktrace=1:halt_on_error=1)
endfunction()
# Creates a cluster file for a nonexistent cluster before running the specified command
# Creates a cluster file for a nonexistent cluster before running the specified command
# (usually a ctest test)
function(add_unavailable_fdbclient_test)
set(options DISABLED ENABLED)

View File

@ -5,6 +5,7 @@ env_set(USE_DTRACE ON BOOL "Enable dtrace probes on supported platforms")
env_set(USE_VALGRIND OFF BOOL "Compile for valgrind usage")
env_set(USE_VALGRIND_FOR_CTEST ${USE_VALGRIND} BOOL "Use valgrind for ctest")
env_set(ALLOC_INSTRUMENTATION OFF BOOL "Instrument alloc")
env_set(USE_JEMALLOC ON BOOL "Link with jemalloc")
env_set(USE_ASAN OFF BOOL "Compile with address sanitizer")
env_set(USE_GCOV OFF BOOL "Compile with gcov instrumentation")
env_set(USE_MSAN OFF BOOL "Compile with memory sanitizer. To avoid false positives you need to dynamically link to a msan-instrumented libc++ and libc++abi, which you must compile separately. See https://github.com/google/sanitizers/wiki/MemorySanitizerLibcxxHowTo#instrumented-libc.")

View File

@ -1,6 +1,5 @@
add_library(jemalloc INTERFACE)
set(USE_JEMALLOC ON)
# We don't want to use jemalloc on Windows
# Nor on FreeBSD, where jemalloc is the default system allocator
if(USE_SANITIZER OR WIN32 OR (CMAKE_SYSTEM_NAME STREQUAL "FreeBSD") OR APPLE)
@ -8,42 +7,29 @@ if(USE_SANITIZER OR WIN32 OR (CMAKE_SYSTEM_NAME STREQUAL "FreeBSD") OR APPLE)
return()
endif()
find_path(JEMALLOC_INCLUDE_DIR
NAMES
jemalloc/jemalloc.h
PATH_SUFFIXES
include
)
find_library(JEMALLOC NAMES libjemalloc.a)
find_library(JEMALLOC_PIC NAMES libjemalloc_pic.a)
if(NOT USE_JEMALLOC)
return()
endif()
add_library(im_jemalloc_pic STATIC IMPORTED)
add_library(im_jemalloc STATIC IMPORTED)
if(JEMALLOC_INCLUDE_DIR AND JEMALLOC AND JEMALLOC_PIC)
set_target_properties(im_jemalloc_pic PROPERTIES IMPORTED_LOCATION "${JEMALLOC_PIC}")
set_target_properties(im_jemalloc PROPERTIES IMPORTED_LOCATION "${JEMALLOC}")
target_include_directories(jemalloc INTERFACE "${JEMALLOC_INCLUDE_DIR}")
# the ordering here is important: for dynamic libraries we have to use all
# symbols that are in the library which was compiled with PIC (for executables
# we could omit the pic-library)
target_link_libraries(jemalloc INTERFACE im_jemalloc_pic im_jemalloc)
else()
include(ExternalProject)
set(JEMALLOC_DIR "${CMAKE_BINARY_DIR}/jemalloc")
ExternalProject_add(Jemalloc_project
URL "https://github.com/jemalloc/jemalloc/releases/download/5.2.1/jemalloc-5.2.1.tar.bz2"
URL_HASH SHA256=34330e5ce276099e2e8950d9335db5a875689a4c6a56751ef3b1d8c537f887f6
BUILD_BYPRODUCTS "${JEMALLOC_DIR}/include/jemalloc/jemalloc.h"
"${JEMALLOC_DIR}/lib/libjemalloc.a"
"${JEMALLOC_DIR}/lib/libjemalloc_pic.a"
CONFIGURE_COMMAND ./configure --prefix=${JEMALLOC_DIR} --enable-static --disable-cxx --enable-prof
BUILD_IN_SOURCE ON
BUILD_COMMAND make
INSTALL_DIR "${JEMALLOC_DIR}"
INSTALL_COMMAND make install)
add_dependencies(im_jemalloc Jemalloc_project)
add_dependencies(im_jemalloc_pic Jemalloc_project)
set_target_properties(im_jemalloc_pic PROPERTIES IMPORTED_LOCATION "${JEMALLOC_DIR}/lib/libjemalloc_pic.a")
set_target_properties(im_jemalloc PROPERTIES IMPORTED_LOCATION "${JEMALLOC_DIR}/lib/libjemalloc.a")
target_include_directories(jemalloc INTERFACE "${JEMALLOC_DIR}/include")
target_link_libraries(jemalloc INTERFACE im_jemalloc_pic im_jemalloc)
endif()
include(ExternalProject)
set(JEMALLOC_DIR "${CMAKE_BINARY_DIR}/jemalloc")
ExternalProject_add(Jemalloc_project
URL "https://github.com/jemalloc/jemalloc/releases/download/5.2.1/jemalloc-5.2.1.tar.bz2"
URL_HASH SHA256=34330e5ce276099e2e8950d9335db5a875689a4c6a56751ef3b1d8c537f887f6
BUILD_BYPRODUCTS "${JEMALLOC_DIR}/include/jemalloc/jemalloc.h"
"${JEMALLOC_DIR}/lib/libjemalloc.a"
"${JEMALLOC_DIR}/lib/libjemalloc_pic.a"
PATCH_COMMAND patch -p1 < ${CMAKE_SOURCE_DIR}/cmake/jemalloc.patch
CONFIGURE_COMMAND ./configure --prefix=${JEMALLOC_DIR} --enable-static --disable-cxx --enable-prof
BUILD_IN_SOURCE ON
BUILD_COMMAND make
INSTALL_DIR "${JEMALLOC_DIR}"
INSTALL_COMMAND make install)
add_dependencies(im_jemalloc Jemalloc_project)
add_dependencies(im_jemalloc_pic Jemalloc_project)
set_target_properties(im_jemalloc_pic PROPERTIES IMPORTED_LOCATION "${JEMALLOC_DIR}/lib/libjemalloc_pic.a")
set_target_properties(im_jemalloc PROPERTIES IMPORTED_LOCATION "${JEMALLOC_DIR}/lib/libjemalloc.a")
target_include_directories(jemalloc INTERFACE "${JEMALLOC_DIR}/include")
target_link_libraries(jemalloc INTERFACE im_jemalloc_pic im_jemalloc)

38
cmake/jemalloc.patch Normal file
View File

@ -0,0 +1,38 @@
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 68e558ab..87bb2280 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -266,7 +266,7 @@ sz_sa2u(size_t size, size_t alignment) {
assert(alignment != 0 && ((alignment - 1) & alignment) == 0);
/* Try for a small size class. */
- if (size <= SC_SMALL_MAXCLASS && alignment < PAGE) {
+ if (size <= SC_SMALL_MAXCLASS && alignment <= PAGE) {
/*
* Round size up to the nearest multiple of alignment.
*
diff --git a/src/arena.c b/src/arena.c
index ba50e410..dc7646e6 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1533,10 +1533,17 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
bool zero, tcache_t *tcache) {
void *ret;
- if (usize <= SC_SMALL_MAXCLASS
- && (alignment < PAGE
- || (alignment == PAGE && (usize & PAGE_MASK) == 0))) {
+ if (usize <= SC_SMALL_MAXCLASS) {
/* Small; alignment doesn't require special slab placement. */
+
+ /* usize should be a result of sz_sa2u() */
+ assert((usize & (alignment - 1)) == 0);
+
+ /*
+ * Small usize can't come from an alignment larger than a page.
+ */
+ assert(alignment <= PAGE);
+
ret = arena_malloc(tsdn, arena, usize, sz_size2index(usize),
zero, tcache, true);
} else {

View File

@ -359,7 +359,7 @@ namespace SummarizeTest
}
int result = 0;
bool unseedCheck = random.NextDouble() < unseedRatio;
bool unseedCheck = !noSim && random.NextDouble() < unseedRatio;
for (int i = 0; i < maxTries; ++i)
{
bool logOnRetryableError = i == maxTries - 1;

View File

@ -41,10 +41,10 @@ def print_stacks(stack_count, sort_by_count):
sort_dict = counts if sort_by_count else sizes
ordered_list = [(val, backtrace) for (backtrace, val) in sort_dict.items()]
ordered_list.sort(reverse=True)
ordered_list.sort()
if stack_count:
ordered_list = ordered_list[:stack_count]
ordered_list = ordered_list[-stack_count:]
for size, backtrace in ordered_list:
print(str.format('bytes={0:<10} count={1:<8} {2}', sizes[backtrace], counts[backtrace], backtrace))

View File

@ -192,6 +192,8 @@ class BaseInfo(object):
self.start_timestamp = bb.get_double()
if protocol_version >= PROTOCOL_VERSION_6_3:
self.dc_id = bb.get_bytes_with_length()
if protocol_version >= PROTOCOL_VERSION_7_1:
self.tenant = bb.get_bytes_with_length()
class GetVersionInfo(BaseInfo):
def __init__(self, bb, protocol_version):

View File

@ -6,13 +6,13 @@ Data distribution manages the lifetime of storage servers, decides which storage
## Components
**Storage server (`struct TCServerInfo`):** DD creates a TCServerInfo object for each storage server (SS). The TCServerInfo includes: (1) the SS locality, which includes the processID that is unique to ip:port, the zoneId that specifies which rack the SS is on, and the dcId that specifies which DC the SS is in; (2) the servers teams, which will be discussed in the following paragraph; (3) the tracker that monitor the status of the server; and (4) extra information related to the servers interface and preference. A server is healthy if its storage engine on the process is the same with the configured storage engine, and it is marked as desired by DD.
**Storage server (`class TCServerInfo`):** DD creates a TCServerInfo object for each storage server (SS). The TCServerInfo includes: (1) the SS locality, which includes the processID that is unique to ip:port, the zoneId that specifies which rack the SS is on, and the dcId that specifies which DC the SS is in; (2) the servers teams, which will be discussed in the following paragraph; (3) the tracker that monitor the status of the server; and (4) extra information related to the servers interface and preference. A server is healthy if its storage engine on the process is the same with the configured storage engine, and it is marked as desired by DD.
**Machine (`struct TCMachineInfo`)**: A machine in FDB is considered as a rack, because a typical FDB cluster will only use one physical host from each rack in the datacenter to reduce the impact of regular rack-maintenance events on the cluster. All servers on the same rack belong to the same machine. A machine is healthy if there exists a healthy server on the machine.
**Machine (`class TCMachineInfo`)**: A machine in FDB is considered as a rack, because a typical FDB cluster will only use one physical host from each rack in the datacenter to reduce the impact of regular rack-maintenance events on the cluster. All servers on the same rack belong to the same machine. A machine is healthy if there exists a healthy server on the machine.
**Server team (`struct TCTeamInfo`)**: A server team is a group of *k* servers that host the same key ranges, where *k* is the replication factor that is usually three. A server team is healthy if every server in the team is healthy and those servers localities satisfy the replication requirement. Servers are grouped into server teams to reduce the possibility of data unavailability events at the event of *k* server failures.
**Server team (`class TCTeamInfo`)**: A server team is a group of *k* servers that host the same key ranges, where *k* is the replication factor that is usually three. A server team is healthy if every server in the team is healthy and those servers localities satisfy the replication requirement. Servers are grouped into server teams to reduce the possibility of data unavailability events at the event of *k* server failures.
**Machine team (`struct TCMachineTeamInfo`)**: A machine team is a group of k machines, where k is the replication factor. Each server team must be on a machine team, meaning that each server in the server team is on a machine in the machine team and that no two servers are on the same machine. Similar to the purpose of server teams, machine teams are used to reduce the possibility of data unavailability events at the event of *k* machine failures. A machine team is healthy if every machine on the team is healthy and machines localities satisfy the replication policy.
**Machine team (`class TCMachineTeamInfo`)**: A machine team is a group of k machines, where k is the replication factor. Each server team must be on a machine team, meaning that each server in the server team is on a machine in the machine team and that no two servers are on the same machine. Similar to the purpose of server teams, machine teams are used to reduce the possibility of data unavailability events at the event of *k* machine failures. A machine team is healthy if every machine on the team is healthy and machines localities satisfy the replication policy.
**`TeamCollection`**: It has a global view of all servers and server teams, machines and machine teams. With the information, it creates server teams and machine teams. It also maintains the configuration settings for DD, which is used to create teams and decide which type of storage servers to recruit.
@ -30,7 +30,7 @@ Data distribution manages the lifetime of storage servers, decides which storage
*`moveKeysLockOwnerKey`* (`\xff/moveKeysLock/Owner`) and *moveKeysLockWriteKey* (`\xff/moveKeysLock/Write`): When DD moves keys, it must grab the moveKeysLock, which consists of an owner key and a write key. The owner key (i.e., `moveKeysLockOwnerKey`) specifies which DD currently owns the lock. The write key (i.e., `moveKeysLockWriteKey`) specifies which DD is currently changing the mapping between keys and servers (i.e., operating on serverKeys and keyServers subspace). If DD finds it does not own both keys when it tries to move keys, it will kill itself by throwing an error. The cluster controller will recruit a new one.
When a new DD is initialized, it will set itself as the owner by setting its random UID to the `moveKeysLockOwnerKey`. Since the owner key has only one value, at most one DD can own the DD-related system subspace. This avoids the potential race condition between multiple DDs which may co-exit during DD recruitment.
When a new DD is initialized, it will set itself as the owner by setting its random UID to the `moveKeysLockOwnerKey`. Since the owner key has only one value, at most one DD can own the DD-related system subspace. This avoids the potential race condition between multiple DDs which may co-exist during DD recruitment.
**Transaction State Store (txnStateStore)**: It is a replica of the special keyspace that stores the clusters states, such as which SS is responsible for which shard. Because commit proxies use txnStateStore to decide which tLog and SS should receive a mutation, commit proxies must have a consistent view of txnStateStore. Therefore, changes to txnStateStore must be populated to all commit proxies in total order. To achieve that, we use the special transaction (`applyMetaMutations`) to update txnStateStore and use resolvers to ensure the total ordering (serializable snapshot isolation).

View File

@ -6,6 +6,7 @@
.. |database-type| replace:: ``FDBDatabase``
.. |database-class| replace:: :type:`FDBDatabase`
.. |database-auto| replace:: FIXME
.. |tenant-type| replace:: ``FDBTenant``
.. |transaction-class| replace:: FIXME
.. |get-key-func| replace:: :func:`fdb_transaction_get_key()`
.. |get-range-func| replace:: :func:`fdb_transaction_get_range()`
@ -419,9 +420,20 @@ An |database-blurb1| Modifications to a database are performed via transactions.
|option-doc|
.. function:: fdb_error_t fdb_database_open_tenant(FDBDatabase* database, uint8_t const* tenant_name, int tenant_name_length, FDBTenant** out_tenant)
Opens a tenant on the given database. All transactions created by this tenant will operate on the tenant's key-space. The caller assumes ownership of the :type:`FDBTenant` object and must destroy it with :func:`fdb_tenant_destroy()`.
``tenant_name``
The name of the tenant being accessed, as a byte string.
``tenant_name_length``
The length of the tenant name byte string.
``*out_tenant``
Set to point to the newly created :type:`FDBTenant`.
.. function:: fdb_error_t fdb_database_create_transaction(FDBDatabase* database, FDBTransaction** out_transaction)
Creates a new transaction on the given database. The caller assumes ownership of the :type:`FDBTransaction` object and must destroy it with :func:`fdb_transaction_destroy()`.
Creates a new transaction on the given database without using a tenant, meaning that it will operate on the entire database key-space. The caller assumes ownership of the :type:`FDBTransaction` object and must destroy it with :func:`fdb_transaction_destroy()`.
``*out_transaction``
Set to point to the newly created :type:`FDBTransaction`.
@ -454,7 +466,7 @@ An |database-blurb1| Modifications to a database are performed via transactions.
The function will change the region configuration to have a positive priority for the chosen dcId, and a negative priority for all other dcIds.
In particular, no error will be thrown if the given dcId does not exist. It will just not attemp to force a recovery.
In particular, no error will be thrown if the given dcId does not exist. It will just not attempt to force a recovery.
If the database has already recovered, the function does nothing. Thus it's safe to call it multiple times.
@ -486,6 +498,26 @@ An |database-blurb1| Modifications to a database are performed via transactions.
Returns a value where 0 indicates that the client is idle and 1 (or larger) indicates that the client is saturated. By default, this value is updated every second.
Tenant
======
|tenant-blurb1|
.. type:: FDBTenant
An opaque type that represents a tenant in the FoundationDB C API.
.. function:: void fdb_tenant_destroy(FDBTenant* tenant)
Destroys an :type:`FDBTenant` object. It must be called exactly once for each successful call to :func:`fdb_database_create_tenant()`. This function only destroys a handle to the tenant -- the tenant and its data will be fine!
.. function:: fdb_error_t fdb_tenant_create_transaction(FDBTenant* tenant, FDBTronsaction **out_transaction)
Creates a new transaction on the given tenant. This transaction will operate within the tenant's key-space and cannot access data outside the tenant. The caller assumes ownership of the :type:`FDBTransaction` object and must destroy it with :func:`fdb_transaction_destroy()`.
``*out_transaction``
Set to point to the newly created :type:`FDBTransaction`.
Transaction
===========

View File

@ -74,6 +74,9 @@
.. |database-sync| replace::
The convenience methods provided by |database-type| have the same signature as the corresponding methods of ``Transaction``. However, most of the |database-type| methods are fully synchronous. (An exception is the methods for watches.) As a result, the |database-type| methods do not support the use of :ref:`implicit parallelism with futures <developer-guide-programming-with-futures>`.
.. |tenant-blurb1| replace::
|tenant-type| represents a FoundationDB tenant. Tenants are optional named transaction domains that can be used to provide multiple disjoint key-spaces to client applications. A transaction created in a tenant will be limited to the keys contained within that tenant, and transactions operating on different tenants can use the same key names without interfering with each other.
.. |keysel-blurb1| replace::
FoundationDB's lexicographically ordered data model permits finding keys based on their order (for example, finding the first key in the database greater than a given key). Key selectors represent a description of a key in the database that could be resolved to an actual key by |get-key-func| or used directly as the beginning or end of a range in |get-range-func|.
@ -627,4 +630,4 @@
.. |option-set-distributed-client-tracer| replace::
Sets a tracer to run on the client. Should be set to the same value as the tracer set on the server.
Sets a tracer to run on the client. Should be set to the same value as the tracer set on the server.

View File

@ -7,6 +7,7 @@
.. |database-type| replace:: ``Database``
.. |database-class| replace:: :class:`Database`
.. |database-auto| replace:: the :func:`@fdb.transactional <transactional>` decorator
.. |tenant-type| replace:: FIXME
.. |transaction-class| replace:: :class:`Transaction`
.. |get-key-func| replace:: :func:`Transaction.get_key`
.. |get-range-func| replace:: :func:`Transaction.get_range`

View File

@ -5,6 +5,7 @@
.. |database-type| replace:: ``Database``
.. |database-class| replace:: :class:`Database`
.. |database-auto| replace:: :meth:`Database.transact`
.. |tenant-type| replace:: FIXME
.. |transaction-class| replace:: :class:`Transaction`
.. |get-key-func| replace:: :meth:`Transaction.get_key`
.. |get-range-func| replace:: :meth:`Transaction.get_range`

View File

@ -115,7 +115,7 @@ Here is a complete list of valid parameters:
*request_timeout_min* (or *rtom*) - Minimum number of seconds to wait for a request to succeed after a connection is established.
*request_tries* (or *rt*) - Number of times to try each request until a parseable HTTP response other than 429 is received.
*request_tries* (or *rt*) - Number of times to try each request until a parsable HTTP response other than 429 is received.
*requests_per_second* (or *rps*) - Max number of requests to start per second.

View File

@ -11,7 +11,7 @@ Testing Error Handling with Buggify
FoundationDB clients need to handle errors correctly. Wrong error handling can lead to many bugs - in the worst case it can
lead to a corrupted database. Because of this it is important that an application or layer author tests properly their
application during failure scenarios. But this is non-trivial. In a developement environment cluster failures are very
application during failure scenarios. But this is non-trivial. In a development environment cluster failures are very
unlikely and it is therefore possible that certain types of exceptions are never tested in a controlled environment.
The simplest way of testing for these kind of errors is a simple mechanism called ``Buggify``. If this option is enabled
@ -327,7 +327,7 @@ processes with the class test. So above 2-step process becomes a bit more comple
1. Write the test (same as above).
2. Set up a cluster with as many test clients as you want.
3. Run the orchestor to actually execute the test.
3. Run the orchestrator to actually execute the test.
Step 1. is explained further up. For step 2., please refer to the general FoundationDB
configuration. The main difference to a normal FoundationDB cluster is that some processes

View File

@ -8,6 +8,7 @@
.. |database-type| replace:: ``Database``
.. |database-class| replace:: ``Database``
.. |database-auto| replace:: FIXME
.. |tenant-type| replace:: FIXME
.. |transaction-class| replace:: ``Transaction``
.. |get-key-func| replace:: get_key()
.. |get-range-func| replace:: get_range()

View File

@ -8,6 +8,7 @@
.. |database-type| replace:: ``Database``
.. |database-class| replace:: ``Database``
.. |database-auto| replace:: FIXME
.. |tenant-type| replace:: FIXME
.. |transaction-class| replace:: ``Transaction``
.. |get-key-func| replace:: get_key()
.. |get-range-func| replace:: get_range()
@ -915,7 +916,7 @@ When using FoundationDB we strongly recommend users to use the retry-loop. In Py
except FDBError as e:
tr.on_error(e.code).wait()
This is also what the transaction decoration in python does, if you pass a ``Database`` object to a decorated function. There are some interesting properies of this retry loop:
This is also what the transaction decoration in python does, if you pass a ``Database`` object to a decorated function. There are some interesting properties of this retry loop:
* We never create a new transaction within that loop. Instead ``tr.on_error`` will create a soft reset on the transaction.
* ``tr.on_error`` returns a future. This is because ``on_error`` will do back off to make sure we don't overwhelm the cluster.

View File

@ -38,6 +38,8 @@ The latest changes are detailed in :ref:`release-notes`. The documentation has t
* :doc:`administration` contains documentation on administering FoundationDB.
* :doc:`monitored-metrics` contains documentation on monitoring and alerting for FoundationDB.
* :doc:`redwood` contains documentation on Redwood Storage Engine.
* :doc:`visibility` contains documentation related to Visibility into FoundationDB.
@ -55,6 +57,7 @@ The latest changes are detailed in :ref:`release-notes`. The documentation has t
api-reference
tutorials
administration
monitored-metrics
redwood
visibility
earlier-release-notes

File diff suppressed because it is too large Load Diff

View File

@ -121,8 +121,8 @@ Aggregate stats about cluster health. Reading this key alone is slightly cheaper
**Field** **Type** **Description**
----------------------------------- -------- ---------------
batch_limited boolean Whether or not the cluster is limiting batch priority transactions
limiting_storage_durability_lag number storage_durability_lag that ratekeeper is using to determing throttling (see the description for storage_durability_lag)
limiting_storage_queue number storage_queue that ratekeeper is using to determing throttling (see the description for storage_queue)
limiting_storage_durability_lag number storage_durability_lag that ratekeeper is using to determine throttling (see the description for storage_durability_lag)
limiting_storage_queue number storage_queue that ratekeeper is using to determine throttling (see the description for storage_queue)
tps_limit number The rate at which normal priority transactions are allowed to start
worst_storage_durability_lag number See the description for storage_durability_lag
worst_storage_queue number See the description for storage_queue
@ -205,6 +205,7 @@ that process, and wait for necessary data to be moved away.
#. ``\xff\xff/management/failed_locality/<locality>`` Read/write. Indicates that the cluster should consider matching processes as permanently failed. This allows the cluster to avoid maintaining extra state and doing extra work in the hope that these processes come back. See :ref:`removing machines from a cluster <removing-machines-from-a-cluster>` for documentation for the corresponding fdbcli command.
#. ``\xff\xff/management/options/excluded_locality/force`` Read/write. Setting this key disables safety checks for writes to ``\xff\xff/management/excluded_locality/<locality>``. Setting this key only has an effect in the current transaction and is not persisted on commit.
#. ``\xff\xff/management/options/failed_locality/force`` Read/write. Setting this key disables safety checks for writes to ``\xff\xff/management/failed_locality/<locality>``. Setting this key only has an effect in the current transaction and is not persisted on commit.
#. ``\xff\xff/management/tenant_map/<tenant>`` Read/write. Setting a key in this range to any value will result in a tenant being created with name ``<tenant>``. Clearing a key in this range will delete the tenant with name ``<tenant>``. Reading all or a portion of this range will return the list of tenants currently present in the cluster, excluding any changes in this transaction. Values read in this range will be JSON objects containing the metadata for the associated tenants.
An exclusion is syntactically either an ip address (e.g. ``127.0.0.1``), or
an ip address and port (e.g. ``127.0.0.1:4500``) or any locality (e.g ``locality_dcid:primary-satellite`` or

View File

@ -13,7 +13,7 @@ This document covers the operation and architecture of the Testing Storage Serve
Summary
============
The TSS feature allows FoundationDB to run an "untrusted" storage engine (the *testing storage engine*) directly in a QA or production envronment with identical workload to the current storage engine, with zero impact on durability or correctness, and minimal impact on performance.
The TSS feature allows FoundationDB to run an "untrusted" storage engine (the *testing storage engine*) directly in a QA or production environment with identical workload to the current storage engine, with zero impact on durability or correctness, and minimal impact on performance.
This allows a FoundationDB cluster operator to validate the correctness and performance of a different storage engine on the exact cluster workload before migrating data to the different storage engine.
@ -44,10 +44,10 @@ The ``status`` command in the FDB :ref:`command line interface <command-line-int
Trace Events
----------------------
Whenever a client detects a *TSS Mismatch*, or when the SS and TSS response differ, and the difference can only be explained by different storage engine contents, it will emit an error-level trace event with a type starting with ``TSSMismatch``, with a different type for each read request. This trace event will include all of the information necessary to investgate the mismatch, such as the TSS storage ID, the full request data, and the summarized replies (full keys and checksummed values) from both the SS and TSS.
Whenever a client detects a *TSS Mismatch*, or when the SS and TSS response differ, and the difference can only be explained by different storage engine contents, it will emit an error-level trace event with a type starting with ``TSSMismatch``, with a different type for each read request. This trace event will include all of the information necessary to investigate the mismatch, such as the TSS storage ID, the full request data, and the summarized replies (full keys and checksummed values) from both the SS and TSS.
Each client emits a ``TSSClientMetrics`` trace event for each TSS pair in the cluster that it has sent requests to recently, similar to the ``TransactionMetrics`` trace event.
It contains the TSS storage ID, and latency statistics for each type of read request. It also includes a count of any mismatches, and a histogram of error codes recieved by the SS and TSS to ensure the storage engines have similar error rates and types.
It contains the TSS storage ID, and latency statistics for each type of read request. It also includes a count of any mismatches, and a histogram of error codes received by the SS and TSS to ensure the storage engines have similar error rates and types.
The ``StorageMetrics`` trace event emitted by storage servers includes the storage ID of its pair if part of a TSS pairing, and includes a ``TSSJointID`` detail with a unique id for the SS/TSS pair that enables correlating the separate StorageMetrics events from the SS and TSS.
@ -101,7 +101,7 @@ The pair recruitment logic is as follows:
* Once DD gets a candidate worker from the Cluster Controller, hold that worker as a desired TSS process.
* Once DD gets a second candidate worker from the Cluster Controller, initialize that worker as a normal SS.
* Once the second candidate worker is successfully initialized, initialize the first candidate worker as a TSS, passing it the storage ID, starting tag + version, and other information from its SS pair. Because the TSS reads from the same tag starting at the same version, it is guaranteed to recieve the same mutations and data movements as its pair.
* Once the second candidate worker is successfully initialized, initialize the first candidate worker as a TSS, passing it the storage ID, starting tag + version, and other information from its SS pair. Because the TSS reads from the same tag starting at the same version, it is guaranteed to receive the same mutations and data movements as its pair.
One implication of this is, during TSS recruitment, the cluster is effectively down one storage process until a second storage process becomes available.
While clusters should be able to handle being down a single storage process anyway to tolerate machine failure, an active TSS recruitment will be cancelled if the lack of that single storage process is causing the cluster to be unhealthy. Similarly, if the cluster is unhealthy and unable to find new teams to replicate data to, any existing TSS processes may be killed to make room for new storage servers.
@ -121,4 +121,4 @@ Because it is only enabled on a small percentage of the cluster and only compare
TSS testing using the recommended small number of TSS pairs may also miss performance pathologies from workloads not experienced by the specific storage teams with TSS pairs in their membership.
TSS testing is not a substitute for full-cluster performance and correctness testing or simulation testing.
TSS testing is not a substitute for full-cluster performance and correctness testing or simulation testing.

View File

@ -1661,7 +1661,7 @@ ACTOR Future<std::string> getLayerStatus(Reference<ReadYourWritesTransaction> tr
return json;
}
// Check for unparseable or expired statuses and delete them.
// Check for unparsable or expired statuses and delete them.
// First checks the first doc in the key range, and if it is valid, alive and not "me" then
// returns. Otherwise, checks the rest of the range as well.
ACTOR Future<Void> cleanupStatus(Reference<ReadYourWritesTransaction> tr,

View File

@ -132,7 +132,7 @@ ACTOR Future<bool> changeCoordinators(Reference<IDatabase> db, std::vector<Strin
throw;
}
}
std::string new_coordinators_str = boost::algorithm::join(newCoordinatorslist, ", ");
std::string new_coordinators_str = boost::algorithm::join(newCoordinatorslist, ",");
tr->set(fdb_cli::coordinatorsProcessSpecialKey, new_coordinators_str);
}
wait(safeThreadFutureToFuture(tr->commit()));

View File

@ -1201,7 +1201,7 @@ void printStatus(StatusObjectReader statusObj,
// "db" is the handler to the multiversion database
// localDb is the native Database object
// localDb is rarely needed except the "db" has not establised a connection to the cluster where the operation will
// localDb is rarely needed except the "db" has not established a connection to the cluster where the operation will
// return Never as we expect status command to always return, we use "localDb" to return the default result
ACTOR Future<bool> statusCommandActor(Reference<IDatabase> db,
Database localDb,
@ -1255,4 +1255,4 @@ CommandFactory statusFactory(
"statistics.\n\nSpecifying `minimal' will provide a minimal description of the status of your "
"database.\n\nSpecifying `details' will provide load information for individual "
"workers.\n\nSpecifying `json' will provide status information in a machine readable JSON format."));
} // namespace fdb_cli
} // namespace fdb_cli

View File

@ -30,7 +30,7 @@
#include "fdbclient/BlobWorkerInterface.h"
#include "flow/actorcompiler.h" // This must be the last #include.
// TODO more efficient data structure besides std::map? PTree is unecessary since this isn't versioned, but some other
// TODO more efficient data structure besides std::map? PTree is unnecessary since this isn't versioned, but some other
// sorted thing could work. And if it used arenas it'd probably be more efficient with allocations, since everything
// else is in 1 arena and discarded at the end.

View File

@ -128,6 +128,7 @@ set(FDBCLIENT_SRCS
StatusClient.h
StorageServerInterface.cpp
StorageServerInterface.h
StorageCheckpoint.h
Subspace.cpp
Subspace.h
StackLineage.h
@ -138,6 +139,7 @@ set(FDBCLIENT_SRCS
TagThrottle.actor.h
TaskBucket.actor.cpp
TaskBucket.h
Tenant.cpp
Tenant.h
TestKnobCollection.cpp
TestKnobCollection.h

View File

@ -61,6 +61,7 @@ void ClientKnobs::initialize(Randomize randomize) {
init( WRONG_SHARD_SERVER_DELAY, .01 ); if( randomize && BUGGIFY ) WRONG_SHARD_SERVER_DELAY = deterministicRandom()->random01(); // FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY; // SOMEDAY: This delay can limit performance of retrieving data when the cache is mostly wrong (e.g. dumping the database after a test)
init( FUTURE_VERSION_RETRY_DELAY, .01 ); if( randomize && BUGGIFY ) FUTURE_VERSION_RETRY_DELAY = deterministicRandom()->random01();// FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY;
init( UNKNOWN_TENANT_RETRY_DELAY, 0.0 ); if( randomize && BUGGIFY ) UNKNOWN_TENANT_RETRY_DELAY = deterministicRandom()->random01();
init( REPLY_BYTE_LIMIT, 80000 );
init( DEFAULT_BACKOFF, .01 ); if( randomize && BUGGIFY ) DEFAULT_BACKOFF = deterministicRandom()->random01();
init( DEFAULT_MAX_BACKOFF, 1.0 );
@ -90,6 +91,8 @@ void ClientKnobs::initialize(Randomize randomize) {
init( LOCATION_CACHE_EVICTION_SIZE_SIM, 10 ); if( randomize && BUGGIFY ) LOCATION_CACHE_EVICTION_SIZE_SIM = 3;
init( LOCATION_CACHE_ENDPOINT_FAILURE_GRACE_PERIOD, 60 );
init( LOCATION_CACHE_FAILED_ENDPOINT_RETRY_INTERVAL, 60 );
init( TENANT_CACHE_EVICTION_SIZE, 100000 );
init( TENANT_CACHE_EVICTION_SIZE_SIM, 10 ); if( randomize && BUGGIFY ) TENANT_CACHE_EVICTION_SIZE_SIM = 3;
init( GET_RANGE_SHARD_LIMIT, 2 );
init( WARM_RANGE_SHARD_LIMIT, 100 );

View File

@ -60,6 +60,7 @@ public:
double WRONG_SHARD_SERVER_DELAY; // SOMEDAY: This delay can limit performance of retrieving data when the cache is
// mostly wrong (e.g. dumping the database after a test)
double FUTURE_VERSION_RETRY_DELAY;
double UNKNOWN_TENANT_RETRY_DELAY;
int REPLY_BYTE_LIMIT;
double DEFAULT_BACKOFF;
double DEFAULT_MAX_BACKOFF;
@ -90,6 +91,8 @@ public:
int LOCATION_CACHE_EVICTION_SIZE_SIM;
double LOCATION_CACHE_ENDPOINT_FAILURE_GRACE_PERIOD;
double LOCATION_CACHE_FAILED_ENDPOINT_RETRY_INTERVAL;
int TENANT_CACHE_EVICTION_SIZE;
int TENANT_CACHE_EVICTION_SIZE_SIM;
int GET_RANGE_SHARD_LIMIT;
int WARM_RANGE_SHARD_LIMIT;

View File

@ -41,7 +41,8 @@ enum class TransactionPriorityType : int { PRIORITY_DEFAULT = 0, PRIORITY_BATCH
static_assert(sizeof(TransactionPriorityType) == 4, "transaction_profiling_analyzer.py assumes this field has size 4");
struct Event {
Event(EventType t, double ts, const Optional<Standalone<StringRef>>& dc) : type(t), startTs(ts) {
Event(EventType t, double ts, const Optional<Standalone<StringRef>>& dc, const Optional<TenantName>& tenant)
: type(t), startTs(ts), tenant(tenant) {
if (dc.present())
dcId = dc.get();
}
@ -49,7 +50,9 @@ struct Event {
template <typename Ar>
Ar& serialize(Ar& ar) {
if (ar.protocolVersion().version() >= (uint64_t)0x0FDB00B063010001LL) {
if (ar.protocolVersion().hasTenants()) {
return serializer(ar, type, startTs, dcId, tenant);
} else if (ar.protocolVersion().version() >= (uint64_t)0x0FDB00B063010001LL) {
return serializer(ar, type, startTs, dcId);
} else {
return serializer(ar, type, startTs);
@ -59,8 +62,10 @@ struct Event {
EventType type{ EventType::UNSET };
double startTs{ 0 };
Key dcId{};
Optional<TenantName> tenant{};
void logEvent(std::string id, int maxFieldLength) const {}
void augmentTraceEvent(TraceEvent& event) const { event.detail("Tenant", tenant); }
};
struct EventGetVersion : public Event {
@ -77,7 +82,9 @@ struct EventGetVersion : public Event {
double latency;
void logEvent(std::string id, int maxFieldLength) const {
TraceEvent("TransactionTrace_GetVersion").detail("TransactionID", id).detail("Latency", latency);
TraceEvent event("TransactionTrace_GetVersion");
event.detail("TransactionID", id).detail("Latency", latency);
augmentTraceEvent(event);
}
};
@ -97,10 +104,9 @@ struct EventGetVersion_V2 : public Event {
TransactionPriorityType priorityType{ TransactionPriorityType::UNSET };
void logEvent(std::string id, int maxFieldLength) const {
TraceEvent("TransactionTrace_GetVersion")
.detail("TransactionID", id)
.detail("Latency", latency)
.detail("PriorityType", priorityType);
TraceEvent event("TransactionTrace_GetVersion");
event.detail("TransactionID", id).detail("Latency", latency).detail("PriorityType", priorityType);
augmentTraceEvent(event);
}
};
@ -110,8 +116,9 @@ struct EventGetVersion_V3 : public Event {
const Optional<Standalone<StringRef>>& dcId,
double lat,
TransactionPriority priority,
Version version)
: Event(EventType::GET_VERSION_LATENCY, ts, dcId), latency(lat), readVersion(version) {
Version version,
const Optional<TenantName>& tenant)
: Event(EventType::GET_VERSION_LATENCY, ts, dcId, tenant), latency(lat), readVersion(version) {
switch (priority) {
// Unfortunately, the enum serialized here disagrees with the enum used elsewhere for the values used by each
// priority
@ -143,17 +150,23 @@ struct EventGetVersion_V3 : public Event {
Version readVersion;
void logEvent(std::string id, int maxFieldLength) const {
TraceEvent("TransactionTrace_GetVersion")
.detail("TransactionID", id)
TraceEvent event("TransactionTrace_GetVersion");
event.detail("TransactionID", id)
.detail("Latency", latency)
.detail("PriorityType", priorityType)
.detail("ReadVersion", readVersion);
augmentTraceEvent(event);
}
};
struct EventGet : public Event {
EventGet(double ts, const Optional<Standalone<StringRef>>& dcId, double lat, int size, const KeyRef& in_key)
: Event(EventType::GET_LATENCY, ts, dcId), latency(lat), valueSize(size), key(in_key) {}
EventGet(double ts,
const Optional<Standalone<StringRef>>& dcId,
double lat,
int size,
const KeyRef& in_key,
const Optional<TenantName>& tenant)
: Event(EventType::GET_LATENCY, ts, dcId, tenant), latency(lat), valueSize(size), key(in_key) {}
EventGet() {}
template <typename Ar>
@ -169,13 +182,14 @@ struct EventGet : public Event {
Key key;
void logEvent(std::string id, int maxFieldLength) const {
TraceEvent("TransactionTrace_Get")
.setMaxEventLength(-1)
TraceEvent event("TransactionTrace_Get");
event.setMaxEventLength(-1)
.detail("TransactionID", id)
.detail("Latency", latency)
.detail("ValueSizeBytes", valueSize)
.setMaxFieldLength(maxFieldLength)
.detail("Key", key);
augmentTraceEvent(event);
}
};
@ -185,8 +199,9 @@ struct EventGetRange : public Event {
double lat,
int size,
const KeyRef& start_key,
const KeyRef& end_key)
: Event(EventType::GET_RANGE_LATENCY, ts, dcId), latency(lat), rangeSize(size), startKey(start_key),
const KeyRef& end_key,
const Optional<TenantName>& tenant)
: Event(EventType::GET_RANGE_LATENCY, ts, dcId, tenant), latency(lat), rangeSize(size), startKey(start_key),
endKey(end_key) {}
EventGetRange() {}
@ -204,14 +219,15 @@ struct EventGetRange : public Event {
Key endKey;
void logEvent(std::string id, int maxFieldLength) const {
TraceEvent("TransactionTrace_GetRange")
.setMaxEventLength(-1)
TraceEvent event("TransactionTrace_GetRange");
event.setMaxEventLength(-1)
.detail("TransactionID", id)
.detail("Latency", latency)
.detail("RangeSizeBytes", rangeSize)
.setMaxFieldLength(maxFieldLength)
.detail("StartKey", startKey)
.detail("EndKey", endKey);
augmentTraceEvent(event);
}
};
@ -234,36 +250,40 @@ struct EventCommit : public Event {
void logEvent(std::string id, int maxFieldLength) const {
for (auto& read_range : req.transaction.read_conflict_ranges) {
TraceEvent("TransactionTrace_Commit_ReadConflictRange")
.setMaxEventLength(-1)
TraceEvent ev1("TransactionTrace_Commit_ReadConflictRange");
ev1.setMaxEventLength(-1)
.detail("TransactionID", id)
.setMaxFieldLength(maxFieldLength)
.detail("Begin", read_range.begin)
.detail("End", read_range.end);
augmentTraceEvent(ev1);
}
for (auto& write_range : req.transaction.write_conflict_ranges) {
TraceEvent("TransactionTrace_Commit_WriteConflictRange")
.setMaxEventLength(-1)
TraceEvent ev2("TransactionTrace_Commit_WriteConflictRange");
ev2.setMaxEventLength(-1)
.detail("TransactionID", id)
.setMaxFieldLength(maxFieldLength)
.detail("Begin", write_range.begin)
.detail("End", write_range.end);
augmentTraceEvent(ev2);
}
for (auto& mutation : req.transaction.mutations) {
TraceEvent("TransactionTrace_Commit_Mutation")
.setMaxEventLength(-1)
TraceEvent ev3("TransactionTrace_Commit_Mutation");
ev3.setMaxEventLength(-1)
.detail("TransactionID", id)
.setMaxFieldLength(maxFieldLength)
.detail("Mutation", mutation);
augmentTraceEvent(ev3);
}
TraceEvent("TransactionTrace_Commit")
.detail("TransactionID", id)
TraceEvent ev4("TransactionTrace_Commit");
ev4.detail("TransactionID", id)
.detail("Latency", latency)
.detail("NumMutations", numMutations)
.detail("CommitSizeBytes", commitBytes);
augmentTraceEvent(ev4);
}
};
@ -275,8 +295,9 @@ struct EventCommit_V2 : public Event {
int mut,
int bytes,
Version version,
const CommitTransactionRequest& commit_req)
: Event(EventType::COMMIT_LATENCY, ts, dcId), latency(lat), numMutations(mut), commitBytes(bytes),
const CommitTransactionRequest& commit_req,
const Optional<TenantName>& tenant)
: Event(EventType::COMMIT_LATENCY, ts, dcId, tenant), latency(lat), numMutations(mut), commitBytes(bytes),
commitVersion(version), req(commit_req) {}
EventCommit_V2() {}
@ -298,43 +319,51 @@ struct EventCommit_V2 : public Event {
void logEvent(std::string id, int maxFieldLength) const {
for (auto& read_range : req.transaction.read_conflict_ranges) {
TraceEvent("TransactionTrace_Commit_ReadConflictRange")
.setMaxEventLength(-1)
TraceEvent ev1("TransactionTrace_Commit_ReadConflictRange");
ev1.setMaxEventLength(-1)
.detail("TransactionID", id)
.setMaxFieldLength(maxFieldLength)
.detail("Begin", read_range.begin)
.detail("End", read_range.end);
augmentTraceEvent(ev1);
}
for (auto& write_range : req.transaction.write_conflict_ranges) {
TraceEvent("TransactionTrace_Commit_WriteConflictRange")
.setMaxEventLength(-1)
TraceEvent ev2("TransactionTrace_Commit_WriteConflictRange");
ev2.setMaxEventLength(-1)
.detail("TransactionID", id)
.setMaxFieldLength(maxFieldLength)
.detail("Begin", write_range.begin)
.detail("End", write_range.end);
augmentTraceEvent(ev2);
}
for (auto& mutation : req.transaction.mutations) {
TraceEvent("TransactionTrace_Commit_Mutation")
.setMaxEventLength(-1)
TraceEvent ev3("TransactionTrace_Commit_Mutation");
ev3.setMaxEventLength(-1)
.detail("TransactionID", id)
.setMaxFieldLength(maxFieldLength)
.detail("Mutation", mutation);
augmentTraceEvent(ev3);
}
TraceEvent("TransactionTrace_Commit")
.detail("TransactionID", id)
TraceEvent ev4("TransactionTrace_Commit");
ev4.detail("TransactionID", id)
.detail("CommitVersion", commitVersion)
.detail("Latency", latency)
.detail("NumMutations", numMutations)
.detail("CommitSizeBytes", commitBytes);
augmentTraceEvent(ev4);
}
};
struct EventGetError : public Event {
EventGetError(double ts, const Optional<Standalone<StringRef>>& dcId, int err_code, const KeyRef& in_key)
: Event(EventType::ERROR_GET, ts, dcId), errCode(err_code), key(in_key) {}
EventGetError(double ts,
const Optional<Standalone<StringRef>>& dcId,
int err_code,
const KeyRef& in_key,
const Optional<TenantName>& tenant)
: Event(EventType::ERROR_GET, ts, dcId, tenant), errCode(err_code), key(in_key) {}
EventGetError() {}
template <typename Ar>
@ -349,12 +378,13 @@ struct EventGetError : public Event {
Key key;
void logEvent(std::string id, int maxFieldLength) const {
TraceEvent("TransactionTrace_GetError")
.setMaxEventLength(-1)
TraceEvent event("TransactionTrace_GetError");
event.setMaxEventLength(-1)
.detail("TransactionID", id)
.detail("ErrCode", errCode)
.setMaxFieldLength(maxFieldLength)
.detail("Key", key);
augmentTraceEvent(event);
}
};
@ -363,8 +393,9 @@ struct EventGetRangeError : public Event {
const Optional<Standalone<StringRef>>& dcId,
int err_code,
const KeyRef& start_key,
const KeyRef& end_key)
: Event(EventType::ERROR_GET_RANGE, ts, dcId), errCode(err_code), startKey(start_key), endKey(end_key) {}
const KeyRef& end_key,
const Optional<TenantName>& tenant)
: Event(EventType::ERROR_GET_RANGE, ts, dcId, tenant), errCode(err_code), startKey(start_key), endKey(end_key) {}
EventGetRangeError() {}
template <typename Ar>
@ -380,13 +411,14 @@ struct EventGetRangeError : public Event {
Key endKey;
void logEvent(std::string id, int maxFieldLength) const {
TraceEvent("TransactionTrace_GetRangeError")
.setMaxEventLength(-1)
TraceEvent event("TransactionTrace_GetRangeError");
event.setMaxEventLength(-1)
.detail("TransactionID", id)
.detail("ErrCode", errCode)
.setMaxFieldLength(maxFieldLength)
.detail("StartKey", startKey)
.detail("EndKey", endKey);
augmentTraceEvent(event);
}
};
@ -394,8 +426,9 @@ struct EventCommitError : public Event {
EventCommitError(double ts,
const Optional<Standalone<StringRef>>& dcId,
int err_code,
const CommitTransactionRequest& commit_req)
: Event(EventType::ERROR_COMMIT, ts, dcId), errCode(err_code), req(commit_req) {}
const CommitTransactionRequest& commit_req,
const Optional<TenantName>& tenant)
: Event(EventType::ERROR_COMMIT, ts, dcId, tenant), errCode(err_code), req(commit_req) {}
EventCommitError() {}
template <typename Ar>
@ -412,32 +445,37 @@ struct EventCommitError : public Event {
void logEvent(std::string id, int maxFieldLength) const {
for (auto& read_range : req.transaction.read_conflict_ranges) {
TraceEvent("TransactionTrace_CommitError_ReadConflictRange")
.setMaxEventLength(-1)
TraceEvent ev1("TransactionTrace_CommitError_ReadConflictRange");
ev1.setMaxEventLength(-1)
.detail("TransactionID", id)
.setMaxFieldLength(maxFieldLength)
.detail("Begin", read_range.begin)
.detail("End", read_range.end);
augmentTraceEvent(ev1);
}
for (auto& write_range : req.transaction.write_conflict_ranges) {
TraceEvent("TransactionTrace_CommitError_WriteConflictRange")
.setMaxEventLength(-1)
TraceEvent ev2("TransactionTrace_CommitError_WriteConflictRange");
ev2.setMaxEventLength(-1)
.detail("TransactionID", id)
.setMaxFieldLength(maxFieldLength)
.detail("Begin", write_range.begin)
.detail("End", write_range.end);
augmentTraceEvent(ev2);
}
for (auto& mutation : req.transaction.mutations) {
TraceEvent("TransactionTrace_CommitError_Mutation")
.setMaxEventLength(-1)
TraceEvent ev3("TransactionTrace_CommitError_Mutation");
ev3.setMaxEventLength(-1)
.detail("TransactionID", id)
.setMaxFieldLength(maxFieldLength)
.detail("Mutation", mutation);
augmentTraceEvent(ev3);
}
TraceEvent("TransactionTrace_CommitError").detail("TransactionID", id).detail("ErrCode", errCode);
TraceEvent ev4("TransactionTrace_CommitError");
ev4.detail("TransactionID", id).detail("ErrCode", errCode);
augmentTraceEvent(ev4);
}
};
} // namespace FdbClientLogEvents

View File

@ -308,7 +308,7 @@ struct SplitShardReply {
};
// Split keyrange [shard.begin, shard.end) into num shards.
// Split points are chosen as the arithmeticlly equal division points of the given range.
// Split points are chosen as the arithmetically equal division points of the given range.
struct SplitShardRequest {
constexpr static FileIdentifier file_identifier = 1384443;
KeyRange shard;

View File

@ -171,9 +171,8 @@ struct CommitTransactionRequest : TimedRequest {
TenantInfo tenantInfo;
CommitTransactionRequest() : CommitTransactionRequest(TenantInfo(), SpanID()) {}
CommitTransactionRequest(TenantInfo const& tenantInfo, SpanID const& context)
: spanContext(context), flags(0), tenantInfo(tenantInfo) {}
CommitTransactionRequest() : CommitTransactionRequest(SpanID()) {}
CommitTransactionRequest(SpanID const& context) : spanContext(context), flags(0) {}
template <class Ar>
void serialize(Ar& ar) {

View File

@ -32,10 +32,11 @@
#include <numeric>
#include "fdbclient/ManagementAPI.actor.h"
#include "fdbclient/KeyBackedTypes.h"
#include "flow/actorcompiler.h" // has to be last include
#include <inttypes.h>
#include <map>
#include "flow/actorcompiler.h" // has to be last include
const Key DatabaseBackupAgent::keyAddPrefix = LiteralStringRef("add_prefix");
const Key DatabaseBackupAgent::keyRemovePrefix = LiteralStringRef("remove_prefix");
const Key DatabaseBackupAgent::keyRangeVersions = LiteralStringRef("range_versions");

View File

@ -107,13 +107,13 @@ public:
void addReleased(int released) { smoothReleased.addDelta(released); }
bool expired() { return expiration <= now(); }
bool expired() const { return expiration <= now(); }
void updateChecked() { lastCheck = now(); }
bool canRecheck() { return lastCheck < now() - CLIENT_KNOBS->TAG_THROTTLE_RECHECK_INTERVAL; }
bool canRecheck() const { return lastCheck < now() - CLIENT_KNOBS->TAG_THROTTLE_RECHECK_INTERVAL; }
double throttleDuration() {
double throttleDuration() const {
if (expiration <= now()) {
return 0.0;
}
@ -133,6 +133,7 @@ public:
};
struct WatchParameters : public ReferenceCounted<WatchParameters> {
const TenantInfo tenant;
const Key key;
const Optional<Value> value;
@ -143,7 +144,8 @@ struct WatchParameters : public ReferenceCounted<WatchParameters> {
const Optional<UID> debugID;
const UseProvisionalProxies useProvisionalProxies;
WatchParameters(Key key,
WatchParameters(TenantInfo tenant,
Key key,
Optional<Value> value,
Version version,
TagSet tags,
@ -151,8 +153,8 @@ struct WatchParameters : public ReferenceCounted<WatchParameters> {
TaskPriority taskID,
Optional<UID> debugID,
UseProvisionalProxies useProvisionalProxies)
: key(key), value(value), version(version), tags(tags), spanID(spanID), taskID(taskID), debugID(debugID),
useProvisionalProxies(useProvisionalProxies) {}
: tenant(tenant), key(key), value(value), version(version), tags(tags), spanID(spanID), taskID(taskID),
debugID(debugID), useProvisionalProxies(useProvisionalProxies) {}
};
class WatchMetadata : public ReferenceCounted<WatchMetadata> {
@ -208,6 +210,16 @@ struct EndpointFailureInfo {
double lastRefreshTime = 0;
};
struct KeyRangeLocationInfo {
TenantMapEntry tenantEntry;
KeyRange range;
Reference<LocationInfo> locations;
KeyRangeLocationInfo() {}
KeyRangeLocationInfo(TenantMapEntry tenantEntry, KeyRange range, Reference<LocationInfo> locations)
: tenantEntry(tenantEntry), range(range), locations(locations) {}
};
class DatabaseContext : public ReferenceCounted<DatabaseContext>, public FastAllocated<DatabaseContext>, NonCopyable {
public:
static DatabaseContext* allocateOnForeignThread() {
@ -239,17 +251,26 @@ public:
lockAware,
internal,
apiVersion,
switchable));
switchable,
defaultTenant));
}
std::pair<KeyRange, Reference<LocationInfo>> getCachedLocation(const KeyRef&, Reverse isBackward = Reverse::False);
bool getCachedLocations(const KeyRangeRef&,
std::vector<std::pair<KeyRange, Reference<LocationInfo>>>&,
Optional<KeyRangeLocationInfo> getCachedLocation(const Optional<TenantName>& tenant,
const KeyRef&,
Reverse isBackward = Reverse::False);
bool getCachedLocations(const Optional<TenantName>& tenant,
const KeyRangeRef&,
std::vector<KeyRangeLocationInfo>&,
int limit,
Reverse reverse);
Reference<LocationInfo> setCachedLocation(const KeyRangeRef&, const std::vector<struct StorageServerInterface>&);
void invalidateCache(const KeyRef&, Reverse isBackward = Reverse::False);
void invalidateCache(const KeyRangeRef&);
void cacheTenant(const TenantName& tenant, const TenantMapEntry& tenantEntry);
Reference<LocationInfo> setCachedLocation(const Optional<TenantName>& tenant,
const TenantMapEntry& tenantEntry,
const KeyRangeRef&,
const std::vector<struct StorageServerInterface>&);
void invalidateCachedTenant(const TenantNameRef& tenant);
void invalidateCache(const KeyRef& tenantPrefix, const KeyRef& key, Reverse isBackward = Reverse::False);
void invalidateCache(const KeyRef& tenantPrefix, const KeyRangeRef& keys);
// Records that `endpoint` is failed on a healthy server.
void setFailedEndpointOnHealthyServer(const Endpoint& endpoint);
@ -296,9 +317,9 @@ public:
void removeWatch();
// watch map operations
Reference<WatchMetadata> getWatchMetadata(KeyRef key) const;
Key setWatchMetadata(Reference<WatchMetadata> metadata);
void deleteWatchMetadata(KeyRef key);
Reference<WatchMetadata> getWatchMetadata(int64_t tenantId, KeyRef key) const;
void setWatchMetadata(Reference<WatchMetadata> metadata);
void deleteWatchMetadata(int64_t tenant, KeyRef key);
void clearWatchMetadata();
void setOption(FDBDatabaseOptions::Option option, Optional<StringRef> value);
@ -361,7 +382,8 @@ public:
LockAware,
IsInternal = IsInternal::True,
int apiVersion = Database::API_VERSION_LATEST,
IsSwitchable = IsSwitchable::False);
IsSwitchable = IsSwitchable::False,
Optional<TenantName> defaultTenant = Optional<TenantName>());
explicit DatabaseContext(const Error& err);
@ -383,6 +405,10 @@ public:
QueueModel queueModel;
EnableLocalityLoadBalance enableLocalityLoadBalance{ EnableLocalityLoadBalance::False };
// The tenant used when none is specified for a transaction. Ordinarily this is unspecified, in which case the raw
// key-space is used.
Optional<TenantName> defaultTenant;
struct VersionRequest {
SpanID spanContext;
Promise<GetReadVersionReply> reply;
@ -418,8 +444,10 @@ public:
// Cache of location information
int locationCacheSize;
int tenantCacheSize;
CoalescedKeyRangeMap<Reference<LocationInfo>> locationCache;
std::unordered_map<Endpoint, EndpointFailureInfo> failedEndpointsOnHealthyServersInfo;
std::unordered_map<TenantName, TenantMapEntry> tenantCache;
std::map<UID, StorageServerInfo*> server_interf;
std::map<UID, BlobWorkerInterface> blobWorker_interf; // blob workers don't change endpoints for the same ID
@ -457,7 +485,7 @@ public:
Counter transactionGetKeyRequests;
Counter transactionGetValueRequests;
Counter transactionGetRangeRequests;
Counter transactionGetRangeAndFlatMapRequests;
Counter transactionGetMappedRangeRequests;
Counter transactionGetRangeStreamRequests;
Counter transactionWatchRequests;
Counter transactionGetAddressesForKeyRequests;
@ -569,7 +597,8 @@ public:
EventCacheHolder connectToDatabaseEventCacheHolder;
private:
std::unordered_map<Key, Reference<WatchMetadata>> watchMap;
std::unordered_map<std::pair<int64_t, Key>, Reference<WatchMetadata>, boost::hash<std::pair<int64_t, Key>>>
watchMap;
};
#endif

View File

@ -475,6 +475,7 @@ using KeyRange = Standalone<KeyRangeRef>;
using KeyValue = Standalone<KeyValueRef>;
using KeySelector = Standalone<struct KeySelectorRef>;
using RangeResult = Standalone<struct RangeResultRef>;
using MappedRangeResult = Standalone<struct MappedRangeResultRef>;
enum { invalidVersion = -1, latestVersion = -2, MAX_VERSION = std::numeric_limits<int64_t>::max() };
@ -616,6 +617,8 @@ KeyRangeWith<Val> keyRangeWith(const KeyRangeRef& range, const Val& value) {
return KeyRangeWith<Val>(range, value);
}
struct MappedKeyValueRef;
struct GetRangeLimits {
enum { ROW_LIMIT_UNLIMITED = -1, BYTE_LIMIT_UNLIMITED = -1 };
@ -629,6 +632,8 @@ struct GetRangeLimits {
void decrement(VectorRef<KeyValueRef> const& data);
void decrement(KeyValueRef const& data);
void decrement(VectorRef<MappedKeyValueRef> const& data);
void decrement(MappedKeyValueRef const& data);
// True if either the row or byte limit has been reached
bool isReached();
@ -651,7 +656,7 @@ struct RangeResultRef : VectorRef<KeyValueRef> {
// limits requested) False implies that no such values remain
Optional<KeyRef> readThrough; // Only present when 'more' is true. When present, this value represent the end (or
// beginning if reverse) of the range which was read to produce these results. This is
// guarenteed to be less than the requested range.
// guaranteed to be less than the requested range.
bool readToBegin;
bool readThroughEnd;
@ -689,6 +694,114 @@ struct Traceable<RangeResultRef> : std::true_type {
}
};
// Similar to KeyValueRef, but result can be empty.
struct GetValueReqAndResultRef {
KeyRef key;
Optional<ValueRef> result;
GetValueReqAndResultRef() {}
GetValueReqAndResultRef(Arena& a, const GetValueReqAndResultRef& copyFrom)
: key(a, copyFrom.key), result(a, copyFrom.result) {}
bool operator==(const GetValueReqAndResultRef& rhs) const { return key == rhs.key && result == rhs.result; }
bool operator!=(const GetValueReqAndResultRef& rhs) const { return !(rhs == *this); }
int expectedSize() const { return key.expectedSize() + result.expectedSize(); }
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, key, result);
}
};
struct GetRangeReqAndResultRef {
KeySelectorRef begin, end;
RangeResultRef result;
GetRangeReqAndResultRef() {}
// KeyValueRef(const KeyRef& key, const ValueRef& value) : key(key), value(value) {}
GetRangeReqAndResultRef(Arena& a, const GetRangeReqAndResultRef& copyFrom)
: begin(a, copyFrom.begin), end(a, copyFrom.end), result(a, copyFrom.result) {}
bool operator==(const GetRangeReqAndResultRef& rhs) const {
return begin == rhs.begin && end == rhs.end && result == rhs.result;
}
bool operator!=(const GetRangeReqAndResultRef& rhs) const { return !(rhs == *this); }
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, begin, end, result);
}
};
using MappedReqAndResultRef = std::variant<GetValueReqAndResultRef, GetRangeReqAndResultRef>;
struct MappedKeyValueRef : KeyValueRef {
// Save the original key value at the base (KeyValueRef).
MappedReqAndResultRef reqAndResult;
MappedKeyValueRef() = default;
MappedKeyValueRef(Arena& a, const MappedKeyValueRef& copyFrom) : KeyValueRef(a, copyFrom) {
const auto& reqAndResultCopyFrom = copyFrom.reqAndResult;
if (std::holds_alternative<GetValueReqAndResultRef>(reqAndResultCopyFrom)) {
auto getValue = std::get<GetValueReqAndResultRef>(reqAndResultCopyFrom);
reqAndResult = GetValueReqAndResultRef(a, getValue);
} else if (std::holds_alternative<GetRangeReqAndResultRef>(reqAndResultCopyFrom)) {
auto getRange = std::get<GetRangeReqAndResultRef>(reqAndResultCopyFrom);
reqAndResult = GetRangeReqAndResultRef(a, getRange);
} else {
throw internal_error();
}
}
bool operator==(const MappedKeyValueRef& rhs) const {
return static_cast<const KeyValueRef&>(*this) == static_cast<const KeyValueRef&>(rhs) &&
reqAndResult == rhs.reqAndResult;
}
bool operator!=(const MappedKeyValueRef& rhs) const { return !(rhs == *this); }
// It relies on the base to provide the expectedSize. TODO: Consider add the underlying request and key values into
// expected size?
// int expectedSize() const { return ((KeyValueRef*)this)->expectedSisze() + reqA }
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, ((KeyValueRef&)*this), reqAndResult);
}
};
struct MappedRangeResultRef : VectorRef<MappedKeyValueRef> {
// Additional information on range result. See comments on RangeResultRef.
bool more;
Optional<KeyRef> readThrough;
bool readToBegin;
bool readThroughEnd;
MappedRangeResultRef() : more(false), readToBegin(false), readThroughEnd(false) {}
MappedRangeResultRef(Arena& p, const MappedRangeResultRef& toCopy)
: VectorRef<MappedKeyValueRef>(p, toCopy), more(toCopy.more),
readThrough(toCopy.readThrough.present() ? KeyRef(p, toCopy.readThrough.get()) : Optional<KeyRef>()),
readToBegin(toCopy.readToBegin), readThroughEnd(toCopy.readThroughEnd) {}
MappedRangeResultRef(const VectorRef<MappedKeyValueRef>& value,
bool more,
Optional<KeyRef> readThrough = Optional<KeyRef>())
: VectorRef<MappedKeyValueRef>(value), more(more), readThrough(readThrough), readToBegin(false),
readThroughEnd(false) {}
MappedRangeResultRef(bool readToBegin, bool readThroughEnd)
: more(false), readToBegin(readToBegin), readThroughEnd(readThroughEnd) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, ((VectorRef<MappedKeyValueRef>&)*this), more, readThrough, readToBegin, readThroughEnd);
}
std::string toString() const {
return "more:" + std::to_string(more) +
" readThrough:" + (readThrough.present() ? readThrough.get().toString() : "[unset]") +
" readToBegin:" + std::to_string(readToBegin) + " readThroughEnd:" + std::to_string(readThroughEnd);
}
};
struct KeyValueStoreType {
constexpr static FileIdentifier file_identifier = 6560359;
// These enumerated values are stored in the database configuration, so should NEVER be changed.

View File

@ -37,6 +37,9 @@ the contents of the system key space.
#include "fdbclient/ClientBooleanParams.h"
#include "fdbclient/DatabaseConfiguration.h"
#include "fdbclient/Status.h"
#include "fdbclient/Subspace.h"
#include "fdbclient/DatabaseConfiguration.h"
#include "fdbclient/Status.h"
#include "fdbclient/SystemData.h"
#include "flow/actorcompiler.h" // has to be last include
@ -626,6 +629,231 @@ Future<ConfigurationResult> changeConfig(Reference<DB> db,
// used by special keys and fdbcli
std::string generateErrorMessage(const CoordinatorsResult& res);
ACTOR template <class Transaction>
Future<Optional<TenantMapEntry>> tryGetTenantTransaction(Transaction tr, TenantName name) {
state Key tenantMapKey = name.withPrefix(tenantMapPrefix);
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::READ_LOCK_AWARE);
Optional<Value> val = wait(safeThreadFutureToFuture(tr->get(tenantMapKey)));
return val.map<TenantMapEntry>([](Optional<Value> v) { return decodeTenantEntry(v.get()); });
}
ACTOR template <class DB>
Future<Optional<TenantMapEntry>> tryGetTenant(Reference<DB> db, TenantName name) {
state Reference<typename DB::TransactionT> tr = db->createTransaction();
loop {
try {
Optional<TenantMapEntry> entry = wait(tryGetTenantTransaction(tr, name));
return entry;
} catch (Error& e) {
wait(safeThreadFutureToFuture(tr->onError(e)));
}
}
}
ACTOR template <class Transaction>
Future<TenantMapEntry> getTenantTransaction(Transaction tr, TenantName name) {
Optional<TenantMapEntry> entry = wait(tryGetTenantTransaction(tr, name));
if (!entry.present()) {
throw tenant_not_found();
}
return entry.get();
}
ACTOR template <class DB>
Future<TenantMapEntry> getTenant(Reference<DB> db, TenantName name) {
Optional<TenantMapEntry> entry = wait(tryGetTenant(db, name));
if (!entry.present()) {
throw tenant_not_found();
}
return entry.get();
}
// Creates a tenant with the given name. If the tenant already exists, an empty optional will be returned.
ACTOR template <class Transaction>
Future<Optional<TenantMapEntry>> createTenantTransaction(Transaction tr, TenantNameRef name) {
state Key tenantMapKey = name.withPrefix(tenantMapPrefix);
if (name.startsWith("\xff"_sr)) {
throw invalid_tenant_name();
}
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
state Future<Optional<TenantMapEntry>> tenantEntryFuture = tryGetTenantTransaction(tr, name);
state Future<Optional<Value>> tenantDataPrefixFuture = safeThreadFutureToFuture(tr->get(tenantDataPrefixKey));
state Future<Optional<Value>> lastIdFuture = safeThreadFutureToFuture(tr->get(tenantLastIdKey));
Optional<Value> tenantMode = wait(safeThreadFutureToFuture(tr->get(configKeysPrefix.withSuffix("tenant_mode"_sr))));
if (!tenantMode.present() || tenantMode.get() == StringRef(format("%d", TenantMode::DISABLED))) {
throw tenants_disabled();
}
Optional<TenantMapEntry> tenantEntry = wait(tenantEntryFuture);
if (tenantEntry.present()) {
return Optional<TenantMapEntry>();
}
state Optional<Value> lastIdVal = wait(lastIdFuture);
Optional<Value> tenantDataPrefix = wait(tenantDataPrefixFuture);
state TenantMapEntry newTenant(lastIdVal.present() ? TenantMapEntry::prefixToId(lastIdVal.get()) + 1 : 0,
tenantDataPrefix.present() ? (KeyRef)tenantDataPrefix.get() : ""_sr);
RangeResult contents = wait(safeThreadFutureToFuture(tr->getRange(prefixRange(newTenant.prefix), 1)));
if (!contents.empty()) {
throw tenant_prefix_allocator_conflict();
}
tr->set(tenantLastIdKey, TenantMapEntry::idToPrefix(newTenant.id));
tr->set(tenantMapKey, encodeTenantEntry(newTenant));
return newTenant;
}
ACTOR template <class DB>
Future<Void> createTenant(Reference<DB> db, TenantName name) {
state Reference<typename DB::TransactionT> tr = db->createTransaction();
state bool firstTry = true;
loop {
try {
if (firstTry) {
Optional<TenantMapEntry> entry = wait(tryGetTenantTransaction(tr, name));
if (entry.present()) {
throw tenant_already_exists();
}
firstTry = false;
}
state Optional<TenantMapEntry> newTenant = wait(createTenantTransaction(tr, name));
if (BUGGIFY) {
throw commit_unknown_result();
}
wait(safeThreadFutureToFuture(tr->commit()));
if (BUGGIFY) {
throw commit_unknown_result();
}
TraceEvent("CreatedTenant")
.detail("Tenant", name)
.detail("TenantId", newTenant.present() ? newTenant.get().id : -1)
.detail("Prefix", newTenant.present() ? (StringRef)newTenant.get().prefix : "Unknown"_sr)
.detail("Version", tr->getCommittedVersion());
return Void();
} catch (Error& e) {
wait(safeThreadFutureToFuture(tr->onError(e)));
}
}
}
ACTOR template <class Transaction>
Future<Void> deleteTenantTransaction(Transaction tr, TenantNameRef name) {
state Key tenantMapKey = name.withPrefix(tenantMapPrefix);
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
state Optional<TenantMapEntry> tenantEntry = wait(tryGetTenantTransaction(tr, name));
if (!tenantEntry.present()) {
return Void();
}
RangeResult contents = wait(safeThreadFutureToFuture(tr->getRange(prefixRange(tenantEntry.get().prefix), 1)));
if (!contents.empty()) {
throw tenant_not_empty();
}
tr->clear(tenantMapKey);
return Void();
}
ACTOR template <class DB>
Future<Void> deleteTenant(Reference<DB> db, TenantName name) {
state Reference<typename DB::TransactionT> tr = db->createTransaction();
state bool firstTry = true;
loop {
try {
if (firstTry) {
Optional<TenantMapEntry> entry = wait(tryGetTenantTransaction(tr, name));
if (!entry.present()) {
throw tenant_not_found();
}
firstTry = false;
}
wait(deleteTenantTransaction(tr, name));
if (BUGGIFY) {
throw commit_unknown_result();
}
wait(safeThreadFutureToFuture(tr->commit()));
if (BUGGIFY) {
throw commit_unknown_result();
}
TraceEvent("DeletedTenant").detail("Tenant", name).detail("Version", tr->getCommittedVersion());
return Void();
} catch (Error& e) {
wait(safeThreadFutureToFuture(tr->onError(e)));
}
}
}
ACTOR template <class Transaction>
Future<std::map<TenantName, TenantMapEntry>> listTenantsTransaction(Transaction tr,
TenantNameRef begin,
TenantNameRef end,
int limit) {
state KeyRange range = KeyRangeRef(begin, end).withPrefix(tenantMapPrefix);
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::READ_LOCK_AWARE);
RangeResult results = wait(safeThreadFutureToFuture(
tr->getRange(firstGreaterOrEqual(range.begin), firstGreaterOrEqual(range.end), limit)));
std::map<TenantName, TenantMapEntry> tenants;
for (auto kv : results) {
tenants[kv.key.removePrefix(tenantMapPrefix)] = decodeTenantEntry(kv.value);
}
return tenants;
}
ACTOR template <class DB>
Future<std::map<TenantName, TenantMapEntry>> listTenants(Reference<DB> db,
TenantName begin,
TenantName end,
int limit) {
state Reference<typename DB::TransactionT> tr = db->createTransaction();
loop {
try {
std::map<TenantName, TenantMapEntry> tenants = wait(listTenantsTransaction(tr, begin, end, limit));
return tenants;
} catch (Error& e) {
wait(safeThreadFutureToFuture(tr->onError(e)));
}
}
}
} // namespace ManagementAPI
#include "flow/unactorcompiler.h"

View File

@ -207,7 +207,7 @@ ACTOR Future<Void> read_http_response_headers(Reference<IConnection> conn,
// Reads an HTTP response from a network connection
// If the connection fails while being read the exception will emitted
// If the response is not parseable or complete in some way, http_bad_response will be thrown
// If the response is not parsable or complete in some way, http_bad_response will be thrown
ACTOR Future<Void> read_http_response(Reference<HTTP::Response> r, Reference<IConnection> conn, bool header_only) {
state std::string buf;
state size_t pos = 0;

View File

@ -24,6 +24,7 @@
#include "fdbclient/FDBOptions.g.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/Tenant.h"
#include "flow/ThreadHelper.actor.h"
@ -59,12 +60,12 @@ public:
GetRangeLimits limits,
bool snapshot = false,
bool reverse = false) = 0;
virtual ThreadFuture<RangeResult> getRangeAndFlatMap(const KeySelectorRef& begin,
const KeySelectorRef& end,
const StringRef& mapper,
GetRangeLimits limits,
bool snapshot = false,
bool reverse = false) = 0;
virtual ThreadFuture<MappedRangeResult> getMappedRange(const KeySelectorRef& begin,
const KeySelectorRef& end,
const StringRef& mapper,
GetRangeLimits limits,
bool snapshot = false,
bool reverse = false) = 0;
virtual ThreadFuture<Standalone<VectorRef<const char*>>> getAddressesForKey(const KeyRef& key) = 0;
virtual ThreadFuture<Standalone<StringRef>> getVersionstamp() = 0;
@ -109,6 +110,18 @@ public:
// Only if it's a MultiVersionTransaction and the underlying transaction handler is null,
// it will return false
virtual bool isValid() { return true; }
virtual Optional<TenantName> getTenant() = 0;
};
class ITenant {
public:
virtual ~ITenant() {}
virtual Reference<ITransaction> createTransaction() = 0;
virtual void addref() = 0;
virtual void delref() = 0;
};
// An interface that represents a connection to a cluster made by a client
@ -116,6 +129,7 @@ class IDatabase {
public:
virtual ~IDatabase() {}
virtual Reference<ITenant> openTenant(TenantNameRef tenantName) = 0;
virtual Reference<ITransaction> createTransaction() = 0;
virtual void setOption(FDBDatabaseOptions::Option option, Optional<StringRef> value = Optional<StringRef>()) = 0;
virtual double getMainThreadBusyness() = 0;

View File

@ -48,6 +48,21 @@ Reference<ISingleThreadTransaction> ISingleThreadTransaction::create(Type type,
} else {
result = makeReference<PaxosConfigTransaction>();
}
result->setDatabase(cx);
result->construct(cx);
return result;
}
Reference<ISingleThreadTransaction> ISingleThreadTransaction::create(Type type,
Database const& cx,
TenantName const& tenant) {
Reference<ISingleThreadTransaction> result;
if (type == Type::RYW) {
result = makeReference<ReadYourWritesTransaction>();
} else if (type == Type::SIMPLE_CONFIG) {
result = makeReference<SimpleConfigTransaction>();
} else {
result = makeReference<PaxosConfigTransaction>();
}
result->construct(cx, tenant);
return result;
}

View File

@ -45,8 +45,15 @@ public:
};
static ISingleThreadTransaction* allocateOnForeignThread(Type);
static Reference<ISingleThreadTransaction> create(Type, Database const&);
virtual void setDatabase(Database const&) = 0;
static Reference<ISingleThreadTransaction> create(Type, Database const&, TenantName const&);
virtual void construct(Database const&) = 0;
virtual void construct(Database const&, TenantName const&) {
// By default, a transaction implementation does not support tenants.
ASSERT(false);
}
virtual void setVersion(Version v) = 0;
virtual Future<Version> getReadVersion() = 0;
@ -63,12 +70,12 @@ public:
GetRangeLimits limits,
Snapshot = Snapshot::False,
Reverse = Reverse::False) = 0;
virtual Future<RangeResult> getRangeAndFlatMap(KeySelector begin,
KeySelector end,
Key mapper,
GetRangeLimits limits,
Snapshot = Snapshot::False,
Reverse = Reverse::False) = 0;
virtual Future<MappedRangeResult> getMappedRange(KeySelector begin,
KeySelector end,
Key mapper,
GetRangeLimits limits,
Snapshot = Snapshot::False,
Reverse = Reverse::False) = 0;
virtual Future<Standalone<VectorRef<const char*>>> getAddressesForKey(Key const& key) = 0;
virtual Future<Standalone<VectorRef<KeyRef>>> getRangeSplitPoints(KeyRange const& range, int64_t chunkSize) = 0;
virtual Future<int64_t> getEstimatedRangeSizeBytes(KeyRange const& keys) = 0;

View File

@ -41,9 +41,10 @@
#include "flow/UnitTest.h"
#include "fdbrpc/ReplicationPolicy.h"
#include "fdbrpc/Replication.h"
#include "flow/actorcompiler.h" // This must be the last #include.
#include "fdbclient/Schemas.h"
#include "flow/actorcompiler.h" // This must be the last #include.
bool isInteger(const std::string& s) {
if (s.empty())
return false;

Some files were not shown because too many files have changed in this diff Show More