diff --git a/bindings/c/test/apitester/TesterApiWorkload.cpp b/bindings/c/test/apitester/TesterApiWorkload.cpp index a51b7dc03a..3ae4b381fa 100644 --- a/bindings/c/test/apitester/TesterApiWorkload.cpp +++ b/bindings/c/test/apitester/TesterApiWorkload.cpp @@ -166,6 +166,7 @@ void ApiWorkload::populateDataTx(TTaskFct cont, std::optional tenantId) { execTransaction( [kvPairs](auto ctx) { for (const fdb::KeyValue& kv : *kvPairs) { + ctx->tx().addReadConflictRange(kv.key, kv.key + fdb::Key(1, '\x00')); ctx->tx().set(kv.key, kv.value); } ctx->commit(); @@ -257,6 +258,7 @@ void ApiWorkload::randomInsertOp(TTaskFct cont, std::optional tenantId) { execTransaction( [kvPairs](auto ctx) { for (const fdb::KeyValue& kv : *kvPairs) { + ctx->tx().addReadConflictRange(kv.key, kv.key + fdb::Key(1, '\x00')); ctx->tx().set(kv.key, kv.value); } ctx->commit(); @@ -279,6 +281,7 @@ void ApiWorkload::randomClearOp(TTaskFct cont, std::optional tenantId) { execTransaction( [keys](auto ctx) { for (const auto& key : *keys) { + ctx->tx().addReadConflictRange(key, key + fdb::Key(1, '\x00')); ctx->tx().clear(key); } ctx->commit(); @@ -300,6 +303,7 @@ void ApiWorkload::randomClearRangeOp(TTaskFct cont, std::optional tenantId) } execTransaction( [begin, end](auto ctx) { + ctx->tx().addReadConflictRange(begin, end); ctx->tx().clearRange(begin, end); ctx->commit(); }, diff --git a/bindings/c/test/apitester/TesterAtomicOpsCorrectnessWorkload.cpp b/bindings/c/test/apitester/TesterAtomicOpsCorrectnessWorkload.cpp index ae0d600422..dddb54d636 100644 --- a/bindings/c/test/apitester/TesterAtomicOpsCorrectnessWorkload.cpp +++ b/bindings/c/test/apitester/TesterAtomicOpsCorrectnessWorkload.cpp @@ -160,6 +160,7 @@ private: execTransaction( // 1. Set the key to val1 [key, val1](auto ctx) { + ctx->tx().addReadConflictRange(key, key + fdb::Key(1, '\x00')); ctx->tx().set(key, val1); ctx->commit(); }, @@ -296,6 +297,7 @@ private: // 1. Set the key to initial value [key, val](auto ctx) { ctx->tx().set(key, val); + ctx->tx().addReadConflictRange(key, key + fdb::Key(1, '\x00')); ctx->commit(); }, [this, key, val, cont]() { diff --git a/bindings/c/test/apitester/TesterCorrectnessWorkload.cpp b/bindings/c/test/apitester/TesterCorrectnessWorkload.cpp index 4486abdf97..e657ff4c51 100644 --- a/bindings/c/test/apitester/TesterCorrectnessWorkload.cpp +++ b/bindings/c/test/apitester/TesterCorrectnessWorkload.cpp @@ -50,6 +50,7 @@ private: execTransaction( [kvPairs](auto ctx) { for (const fdb::KeyValue& kv : *kvPairs) { + ctx->tx().addReadConflictRange(kv.key, kv.key + fdb::Key(1, '\x00')); ctx->tx().set(kv.key, kv.value); } ctx->commit(); diff --git a/bindings/c/test/apitester/TesterTransactionExecutor.cpp b/bindings/c/test/apitester/TesterTransactionExecutor.cpp index 547f6b4965..a89a4d1b3a 100644 --- a/bindings/c/test/apitester/TesterTransactionExecutor.cpp +++ b/bindings/c/test/apitester/TesterTransactionExecutor.cpp @@ -77,10 +77,11 @@ public: int retryLimit, std::string bgBasePath, std::optional tenantName, - bool transactional) + bool transactional, + bool restartOnTimeout) : executor(executor), startFct(startFct), contAfterDone(cont), scheduler(scheduler), retryLimit(retryLimit), txState(TxState::IN_PROGRESS), commitCalled(false), bgBasePath(bgBasePath), tenantName(tenantName), - transactional(transactional) { + transactional(transactional), restartOnTimeout(restartOnTimeout) { databaseCreateErrorInjected = executor->getOptions().injectDatabaseCreateErrors && Random::get().randomBool(executor->getOptions().databaseCreateErrorRatio); if (databaseCreateErrorInjected) { @@ -177,7 +178,8 @@ public: ASSERT(!onErrorFuture); - if (databaseCreateErrorInjected && canBeInjectedDatabaseCreateError(err.code())) { + if ((databaseCreateErrorInjected && canBeInjectedDatabaseCreateError(err.code())) || + (restartOnTimeout && err.code() == error_code_transaction_timed_out)) { // Failed to create a database because of failure injection // Restart by recreating the transaction in a valid database recreateAndRestartTransaction(); @@ -235,7 +237,11 @@ protected: fdb::Error err = onErrorFuture.error(); onErrorFuture = {}; if (err) { - transactionFailed(err); + if (restartOnTimeout && err.code() == error_code_transaction_timed_out) { + recreateAndRestartTransaction(); + } else { + transactionFailed(err); + } } else { restartTransaction(); } @@ -359,6 +365,9 @@ protected: // Accessed on initialization and in ON_ERROR state only (no need for mutex) bool databaseCreateErrorInjected; + // Restart the transaction automatically on timeout errors + const bool restartOnTimeout; + // The tenant that we will run this transaction in const std::optional tenantName; @@ -378,9 +387,17 @@ public: int retryLimit, std::string bgBasePath, std::optional tenantName, - bool transactional) - : TransactionContextBase(executor, startFct, cont, scheduler, retryLimit, bgBasePath, tenantName, transactional) { - } + bool transactional, + bool restartOnTimeout) + : TransactionContextBase(executor, + startFct, + cont, + scheduler, + retryLimit, + bgBasePath, + tenantName, + transactional, + restartOnTimeout) {} protected: void doContinueAfter(fdb::Future f, TTaskFct cont, bool retryOnError) override { @@ -456,9 +473,17 @@ public: int retryLimit, std::string bgBasePath, std::optional tenantName, - bool transactional) - : TransactionContextBase(executor, startFct, cont, scheduler, retryLimit, bgBasePath, tenantName, transactional) { - } + bool transactional, + bool restartOnTimeout) + : TransactionContextBase(executor, + startFct, + cont, + scheduler, + retryLimit, + bgBasePath, + tenantName, + transactional, + restartOnTimeout) {} protected: void doContinueAfter(fdb::Future f, TTaskFct cont, bool retryOnError) override { @@ -470,7 +495,7 @@ protected: lock.unlock(); try { f.then([this](fdb::Future f) { futureReadyCallback(f, this); }); - } catch (std::runtime_error& err) { + } catch (std::exception& err) { lock.lock(); callbackMap.erase(f); lock.unlock(); @@ -482,7 +507,7 @@ protected: try { AsyncTransactionContext* txCtx = (AsyncTransactionContext*)param; txCtx->onFutureReady(f); - } catch (std::runtime_error& err) { + } catch (std::exception& err) { fmt::print("Unexpected exception in callback {}\n", err.what()); abort(); } catch (...) { @@ -544,7 +569,7 @@ protected: try { AsyncTransactionContext* txCtx = (AsyncTransactionContext*)param; txCtx->onErrorReady(f); - } catch (std::runtime_error& err) { + } catch (std::exception& err) { fmt::print("Unexpected exception in callback {}\n", err.what()); abort(); } catch (...) { @@ -673,7 +698,8 @@ public: void execute(TOpStartFct startFct, TOpContFct cont, std::optional tenantName, - bool transactional) override { + bool transactional, + bool restartOnTimeout) override { try { std::shared_ptr ctx; if (options.blockOnFutures) { @@ -684,7 +710,8 @@ public: options.transactionRetryLimit, bgBasePath, tenantName, - transactional); + transactional, + restartOnTimeout); } else { ctx = std::make_shared(this, startFct, @@ -693,7 +720,8 @@ public: options.transactionRetryLimit, bgBasePath, tenantName, - transactional); + transactional, + restartOnTimeout); } startFct(ctx); } catch (...) { diff --git a/bindings/c/test/apitester/TesterTransactionExecutor.h b/bindings/c/test/apitester/TesterTransactionExecutor.h index b0e5268d14..b0303efbbd 100644 --- a/bindings/c/test/apitester/TesterTransactionExecutor.h +++ b/bindings/c/test/apitester/TesterTransactionExecutor.h @@ -116,7 +116,8 @@ public: virtual void execute(TOpStartFct start, TOpContFct cont, std::optional tenantName, - bool transactional) = 0; + bool transactional, + bool restartOnTimeout) = 0; virtual fdb::Database selectDatabase() = 0; virtual std::string getClusterFileForErrorInjection() = 0; virtual const TransactionExecutorOptions& getOptions() = 0; diff --git a/bindings/c/test/apitester/TesterWorkload.cpp b/bindings/c/test/apitester/TesterWorkload.cpp index 8e7289f437..d8790667ae 100644 --- a/bindings/c/test/apitester/TesterWorkload.cpp +++ b/bindings/c/test/apitester/TesterWorkload.cpp @@ -20,6 +20,7 @@ #include "TesterWorkload.h" #include "TesterUtil.h" +#include "fdb_c_options.g.h" #include "fmt/core.h" #include "test/apitester/TesterScheduler.h" #include @@ -82,6 +83,8 @@ WorkloadBase::WorkloadBase(const WorkloadConfig& config) : manager(nullptr), tasksScheduled(0), numErrors(0), clientId(config.clientId), numClients(config.numClients), failed(false), numTxCompleted(0), numTxStarted(0), inProgress(false) { maxErrors = config.getIntOption("maxErrors", 10); + minTxTimeoutMs = config.getIntOption("minTxTimeoutMs", 0); + maxTxTimeoutMs = config.getIntOption("maxTxTimeoutMs", 0); workloadId = fmt::format("{}{}", config.name, clientId); } @@ -129,9 +132,15 @@ void WorkloadBase::doExecute(TOpStartFct startFct, } tasksScheduled++; numTxStarted++; - manager->txExecutor->execute( - startFct, - [this, startFct, cont, failOnError](fdb::Error err) { + manager->txExecutor->execute( // + [this, transactional, cont, startFct](auto ctx) { + if (transactional && maxTxTimeoutMs > 0) { + int timeoutMs = Random::get().randomInt(minTxTimeoutMs, maxTxTimeoutMs); + ctx->tx().setOption(FDB_TR_OPTION_TIMEOUT, timeoutMs); + } + startFct(ctx); + }, + [this, cont, failOnError](fdb::Error err) { numTxCompleted++; if (err.code() == error_code_success) { cont(); @@ -148,7 +157,8 @@ void WorkloadBase::doExecute(TOpStartFct startFct, scheduledTaskDone(); }, tenant, - transactional); + transactional, + maxTxTimeoutMs > 0); } void WorkloadBase::info(const std::string& msg) { diff --git a/bindings/c/test/apitester/TesterWorkload.h b/bindings/c/test/apitester/TesterWorkload.h index ea1c6816f9..0ba93b1a2b 100644 --- a/bindings/c/test/apitester/TesterWorkload.h +++ b/bindings/c/test/apitester/TesterWorkload.h @@ -166,6 +166,12 @@ protected: // The maximum number of errors before stoppoing the workload int maxErrors; + // The timeout (in ms) automatically set for all transactions to a random value + // in the range [minTxTimeoutMs, maxTxTimeoutMs] + // If maxTxTimeoutMs <= 0, no timeout is set + int minTxTimeoutMs; + int maxTxTimeoutMs; + // Workload identifier, consisting of workload name and client ID std::string workloadId; diff --git a/bindings/c/test/apitester/fdb_c_api_tester.cpp b/bindings/c/test/apitester/fdb_c_api_tester.cpp index 1d79dd754c..d7d828a756 100644 --- a/bindings/c/test/apitester/fdb_c_api_tester.cpp +++ b/bindings/c/test/apitester/fdb_c_api_tester.cpp @@ -429,7 +429,7 @@ bool runWorkloads(TesterOptions& options) { } workloadMgr.run(); return !workloadMgr.failed(); - } catch (const std::runtime_error& err) { + } catch (const std::exception& err) { fmt::print(stderr, "ERROR: {}\n", err.what()); return false; } @@ -461,7 +461,7 @@ int main(int argc, char** argv) { fdb_check(fdb::network::stop()); network_thread.join(); - } catch (const std::runtime_error& err) { + } catch (const std::exception& err) { fmt::print(stderr, "ERROR: {}\n", err.what()); retCode = 1; } diff --git a/bindings/c/test/apitester/tests/CApiCancelTransactionWithTimeout.toml b/bindings/c/test/apitester/tests/CApiCancelTransactionWithTimeout.toml new file mode 100644 index 0000000000..5a9e4f9b8f --- /dev/null +++ b/bindings/c/test/apitester/tests/CApiCancelTransactionWithTimeout.toml @@ -0,0 +1,25 @@ +[[test]] +title = 'Cancel Transactions with Timeouts' +multiThreaded = true +buggify = true +minFdbThreads = 2 +maxFdbThreads = 8 +minDatabases = 2 +maxDatabases = 8 +minClientThreads = 2 +maxClientThreads = 8 +minClients = 2 +maxClients = 8 + + [[test.workload]] + name = 'CancelTransaction' + minKeyLength = 1 + maxKeyLength = 64 + minValueLength = 1 + maxValueLength = 1000 + maxKeysPerTransaction = 50 + initialSize = 100 + numRandomOperations = 100 + readExistingKeysRatio = 0.9 + minTxTimeoutMs = 10 + maxTxTimeoutMs = 10000 \ No newline at end of file diff --git a/bindings/c/test/apitester/tests/CApiCorrectnessWithTimeout.toml b/bindings/c/test/apitester/tests/CApiCorrectnessWithTimeout.toml new file mode 100644 index 0000000000..4da54431b1 --- /dev/null +++ b/bindings/c/test/apitester/tests/CApiCorrectnessWithTimeout.toml @@ -0,0 +1,33 @@ +[[test]] +title = 'API Correctness with Timeouts' +multiThreaded = true +buggify = true +minFdbThreads = 2 +maxFdbThreads = 8 +minDatabases = 2 +maxDatabases = 8 +minClientThreads = 2 +maxClientThreads = 8 +minClients = 2 +maxClients = 8 + + [[test.workload]] + name = 'ApiCorrectness' + minKeyLength = 1 + maxKeyLength = 64 + minValueLength = 1 + maxValueLength = 1000 + maxKeysPerTransaction = 50 + initialSize = 100 + numRandomOperations = 100 + readExistingKeysRatio = 0.9 + minTxTimeoutMs = 100 + maxTxTimeoutMs = 10000 + + [[test.workload]] + name = 'AtomicOpsCorrectness' + initialSize = 0 + numRandomOperations = 100 + minTxTimeoutMs = 100 + maxTxTimeoutMs = 10000 + diff --git a/bindings/c/test/mako/mako.cpp b/bindings/c/test/mako/mako.cpp index becf107fc7..858dd1dc8f 100644 --- a/bindings/c/test/mako/mako.cpp +++ b/bindings/c/test/mako/mako.cpp @@ -1199,6 +1199,8 @@ void usage() { printf("%-24s %s\n", " --flatbuffers", "Use flatbuffers"); printf("%-24s %s\n", " --streaming", "Streaming mode: all (default), iterator, small, medium, large, serial"); printf("%-24s %s\n", " --disable_ryw", "Disable snapshot read-your-writes"); + printf( + "%-24s %s\n", " --disable_client_bypass", "Disable client-bypass forcing mako to use multi-version client"); printf("%-24s %s\n", " --json_report=PATH", "Output stats to the specified json file (Default: mako.json)"); printf("%-24s %s\n", " --bg_file_path=PATH", diff --git a/bindings/go/src/fdb/generated.go b/bindings/go/src/fdb/generated.go index b765e09508..9cd8f09fe6 100644 --- a/bindings/go/src/fdb/generated.go +++ b/bindings/go/src/fdb/generated.go @@ -392,6 +392,11 @@ func (o DatabaseOptions) SetTransactionIncludePortInAddress() error { return o.setOpt(505, nil) } +// Set a random idempotency id for all transactions. See the transaction option description for more information. +func (o DatabaseOptions) SetTransactionAutomaticIdempotency() error { + return o.setOpt(506, nil) +} + // Allows ``get`` operations to read from sections of keyspace that have become unreadable because of versionstamp operations. This sets the ``bypass_unreadable`` option of each transaction created by this database. See the transaction option description for more information. func (o DatabaseOptions) SetTransactionBypassUnreadable() error { return o.setOpt(700, nil) @@ -551,6 +556,18 @@ func (o TransactionOptions) SetSizeLimit(param int64) error { return o.setOpt(503, int64ToBytes(param)) } +// Associate this transaction with this ID for the purpose of checking whether or not this transaction has already committed. Must be at least 16 bytes and less than 256 bytes. +// +// Parameter: Unique ID +func (o TransactionOptions) SetIdempotencyId(param string) error { + return o.setOpt(504, []byte(param)) +} + +// Automatically assign a random 16 byte idempotency id for this transaction. Prevents commits from failing with ``commit_unknown_result``. WARNING: If you are also using the multiversion client or transaction timeouts, if either cluster_version_changed or transaction_timed_out was thrown during a commit, then that commit may have already succeeded or may succeed in the future. +func (o TransactionOptions) SetAutomaticIdempotency() error { + return o.setOpt(505, nil) +} + // Snapshot read operations will see the results of writes done in the same transaction. This is the default behavior. func (o TransactionOptions) SetSnapshotRywEnable() error { return o.setOpt(600, nil) diff --git a/cmake/AddFdbTest.cmake b/cmake/AddFdbTest.cmake index 786126359b..f6c298ddfe 100644 --- a/cmake/AddFdbTest.cmake +++ b/cmake/AddFdbTest.cmake @@ -56,7 +56,7 @@ endfunction() # all these tests in serialized order and within the same directory. This is # useful for restart tests function(add_fdb_test) - set(options UNIT IGNORE) + set(options UNIT IGNORE LONG_RUNNING) set(oneValueArgs TEST_NAME TIMEOUT) set(multiValueArgs TEST_FILES) cmake_parse_arguments(ADD_FDB_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}") @@ -106,6 +106,9 @@ function(add_fdb_test) if(ADD_FDB_TEST_UNIT) message(STATUS "ADDING UNIT TEST ${assigned_id} ${test_name}") + elseif(ADD_FDB_TEST_LONG_RUNNING) + message(STATUS + "ADDING LONG RUNNING TEST ${assigned_id} ${test_name}") else() message(STATUS "ADDING SIMULATOR TEST ${assigned_id} ${test_name}") @@ -150,9 +153,15 @@ function(add_fdb_test) endif() endif() # set variables used for generating test packages - set(TEST_NAMES ${TEST_NAMES} ${test_name} PARENT_SCOPE) - set(TEST_FILES_${test_name} ${ADD_FDB_TEST_TEST_FILES} PARENT_SCOPE) - set(TEST_TYPE_${test_name} ${test_type} PARENT_SCOPE) + if(ADD_FDB_TEST_LONG_RUNNING) + set(LONG_RUNNING_TEST_NAMES ${LONG_RUNNING_TEST_NAMES} ${test_name} PARENT_SCOPE) + set(LONG_RUNNING_TEST_FILES_${test_name} ${ADD_FDB_TEST_TEST_FILES} PARENT_SCOPE) + set(LONG_RUNNING_TEST_TYPE_${test_name} ${test_type} PARENT_SCOPE) + else() + set(TEST_NAMES ${TEST_NAMES} ${test_name} PARENT_SCOPE) + set(TEST_FILES_${test_name} ${ADD_FDB_TEST_TEST_FILES} PARENT_SCOPE) + set(TEST_TYPE_${test_name} ${test_type} PARENT_SCOPE) + endif() endfunction() if(NOT WIN32) @@ -167,14 +176,21 @@ endif() # - OUT_DIR the directory where files will be staged # - CONTEXT the type of correctness package being built (e.g. 'valgrind correctness') function(stage_correctness_package) + set(options LONG_RUNNING) set(oneValueArgs OUT_DIR CONTEXT OUT_FILES) - cmake_parse_arguments(STAGE "" "${oneValueArgs}" "" "${ARGN}") + set(multiValueArgs TEST_LIST) + cmake_parse_arguments(STAGE "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}") file(MAKE_DIRECTORY ${STAGE_OUT_DIR}/bin) - string(LENGTH "${CMAKE_SOURCE_DIR}/tests/" base_length) - foreach(test IN LISTS TEST_NAMES) + foreach(test IN LISTS STAGE_TEST_LIST) if((${test} MATCHES ${TEST_PACKAGE_INCLUDE}) AND (NOT ${test} MATCHES ${TEST_PACKAGE_EXCLUDE})) - foreach(file IN LISTS TEST_FILES_${test}) + string(LENGTH "${CMAKE_SOURCE_DIR}/tests/" base_length) + if(STAGE_LONG_RUNNING) + set(TEST_FILES_PREFIX "LONG_RUNNING_TEST_FILES") + else() + set(TEST_FILES_PREFIX "TEST_FILES") + endif() + foreach(file IN LISTS ${TEST_FILES_PREFIX}_${test}) string(SUBSTRING ${file} ${base_length} -1 rel_out_file) set(out_file ${STAGE_OUT_DIR}/tests/${rel_out_file}) list(APPEND test_files ${out_file}) @@ -265,7 +281,7 @@ function(create_correctness_package) return() endif() set(out_dir "${CMAKE_BINARY_DIR}/correctness") - stage_correctness_package(OUT_DIR ${out_dir} CONTEXT "correctness" OUT_FILES package_files) + stage_correctness_package(OUT_DIR ${out_dir} CONTEXT "correctness" OUT_FILES package_files TEST_LIST "${TEST_NAMES}") set(tar_file ${CMAKE_BINARY_DIR}/packages/correctness-${FDB_VERSION}.tar.gz) add_custom_command( OUTPUT ${tar_file} @@ -294,13 +310,47 @@ function(create_correctness_package) add_dependencies(package_tests_u package_tests) endfunction() +function(create_long_running_correctness_package) + if(WIN32) + return() + endif() + set(out_dir "${CMAKE_BINARY_DIR}/long_running_correctness") + stage_correctness_package(OUT_DIR ${out_dir} CONTEXT "long running correctness" OUT_FILES package_files TEST_LIST "${LONG_RUNNING_TEST_NAMES}" LONG_RUNNING) + set(tar_file ${CMAKE_BINARY_DIR}/packages/long-running-correctness-${FDB_VERSION}.tar.gz) + add_custom_command( + OUTPUT ${tar_file} + DEPENDS ${package_files} + ${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/correctnessTest.sh + ${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/correctnessTimeout.sh + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/correctnessTest.sh + ${out_dir}/joshua_test + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/correctnessTimeout.sh + ${out_dir}/joshua_timeout + COMMAND ${CMAKE_COMMAND} -E tar cfz ${tar_file} ${package_files} + ${out_dir}/joshua_test + ${out_dir}/joshua_timeout + WORKING_DIRECTORY ${out_dir} + COMMENT "Package long running correctness archive" + ) + add_custom_target(package_long_running_tests ALL DEPENDS ${tar_file}) + add_dependencies(package_long_running_tests strip_only_fdbserver TestHarness) + set(unversioned_tar_file "${CMAKE_BINARY_DIR}/packages/long_running_correctness.tar.gz") + add_custom_command( + OUTPUT "${unversioned_tar_file}" + DEPENDS "${tar_file}" + COMMAND ${CMAKE_COMMAND} -E copy "${tar_file}" "${unversioned_tar_file}" + COMMENT "Copy long running correctness package to ${unversioned_tar_file}") + add_custom_target(package_long_running_tests_u DEPENDS "${unversioned_tar_file}") + add_dependencies(package_long_running_tests_u package_long_running_tests) +endfunction() + function(create_valgrind_correctness_package) if(WIN32) return() endif() if(USE_VALGRIND) set(out_dir "${CMAKE_BINARY_DIR}/valgrind_correctness") - stage_correctness_package(OUT_DIR ${out_dir} CONTEXT "valgrind correctness" OUT_FILES package_files) + stage_correctness_package(OUT_DIR ${out_dir} CONTEXT "valgrind correctness" OUT_FILES package_files TEST_LIST "${TEST_NAMES}") set(tar_file ${CMAKE_BINARY_DIR}/packages/valgrind-${FDB_VERSION}.tar.gz) add_custom_command( OUTPUT ${tar_file} diff --git a/cmake/ConfigureCompiler.cmake b/cmake/ConfigureCompiler.cmake index cb442604d5..1ded344f8d 100644 --- a/cmake/ConfigureCompiler.cmake +++ b/cmake/ConfigureCompiler.cmake @@ -26,6 +26,7 @@ env_set(TRACE_PC_GUARD_INSTRUMENTATION_LIB "" STRING "Path to a library containi env_set(PROFILE_INSTR_GENERATE OFF BOOL "If set, build FDB as an instrumentation build to generate profiles") env_set(PROFILE_INSTR_USE "" STRING "If set, build FDB with profile") env_set(FULL_DEBUG_SYMBOLS OFF BOOL "Generate full debug symbols") +env_set(ENABLE_LONG_RUNNING_TESTS OFF BOOL "Add a long running tests package") set(USE_SANITIZER OFF) if(USE_ASAN OR USE_VALGRIND OR USE_MSAN OR USE_TSAN OR USE_UBSAN) diff --git a/design/dynamic-knobs.md b/design/dynamic-knobs.md index 00fe39e725..f0088dc394 100644 --- a/design/dynamic-knobs.md +++ b/design/dynamic-knobs.md @@ -128,6 +128,35 @@ set_knob(db, 'min_trace_severity', '10', None, 'description') set_knob(db, 'min_trace_severity', '20', 'az-1', 'description') ``` +### CLI Usage + +Users may also utilize `fdbcli` to set and update knobs dynamically. Usage is as follows +``` +setknob [config_class] +getknob [config_class] +``` +Where `knob_name` is an existing knob, `knob_value` is the desired value to set the knob and `config_class` is the optional configuration class. Furthermore, `setknob` may be combined within a `begin\commit` to update multiple knobs atomically. If using this option, a description must follow `commit` otherwise a prompt will be shown asking for a description. The description must be non-empty. An example follows. +``` +begin +setknob min_trace_severity 30 +setknob tracing_udp_listener_addr 192.168.0.1 +commit "fdbcli change" +``` +Users may only combine knob configuration changes with other knob configuration changes in the same transaction. For example, the following is not permitted and will raise an error. +``` +begin +set foo bar +setknob max_metric_size 1000 +commit "change" +``` +Specifically, `set, clear, get, getrange, clearrange` cannot be combined in any transaction with a `setknob` or `getknob`. + +If using an individual `setknob` without being inside a `begin\commit` block, then `fdbcli` will prompt for a description as well. + +#### Type checking +Knobs have implicit types attached to them when defined. For example, the knob `tracing_udp_listener_addr` is set to `"127.0.0.1"` as so the type is string. If a user invokes `setknob` on this knob with an incorrect value that is not a string, the transaction will fail. + + ### Disable the Configuration Database The configuration database includes both client and server changes and is diff --git a/design/idempotency_ids.md b/design/idempotency_ids.md new file mode 100644 index 0000000000..278503713a --- /dev/null +++ b/design/idempotency_ids.md @@ -0,0 +1,106 @@ +# Goals + +The main goal is to make transactions safer and easier to reason about. New users should get a "just works" experience. One of the main selling points of FoundationDB is that it solves the hard distributed systems problems for you, so that you only need to concern yourself with your business logic. Non-idempotent transactions is probably the biggest "gotcha" that users need to be made aware of -- and they won't discover it organically. In order to achieve this "just works" experience I believe it is necessary to make automatic idempotency have low-enough overhead so that we can enable it by default. + +As an intermediate goal, I plan to introduce this feature disabled by default. The long-term plan is to make it the default. + +# API + +Introduce a new transaction option `IDEMPOTENCY_ID`, which will be validated to be at most 255 bytes. +Add +``` +FDBFuture* fdb_transaction_commit_result(FDBTransaction* tr, uint8_t const* idempotency_id, int idempotency_id_length) +``` +, which can be used to determine the result of a commit that failed with `transaction_timed_out`. + +Commits for transactions with idempotency ids would not fail with `commit_unknown_result`, but in (extremely) rare cases could fail with a new error that clients are expected to handle by restarting the process. +# Background + +- https://forums.foundationdb.org/t/automatically-providing-transaction-idempotency/1873 +- https://github.com/apple/foundationdb/issues/1321 +- https://docs.google.com/document/d/19LDQuurg4Tt8eUcig3-8g2VOG9ZpQvtWrp_691RqMo8/edit# + +# Data model + +Commit proxies would combine idempotency IDs for transactions within a batch. The purpose of this is to try to limit the number of distinct database keys that need to be written, and to lessen the number of extra mutation bytes for idempotency IDs. + +## Key format +``` +\xff\x02/idmp/${commit_version_big_endian (8 bytes)}${high_order_byte_of_batch_index (1 byte)} +``` + +- `commit_version_big_endian` the commit version stored big-endian so that the cleaner worker can find the oldest idempotency ids easily, and also so that "unknown_committed" transactions can recover their commit version. +- `high_order_byte_of_batch_index` this limits us to 256 idempotency ids per value + +## Value format +``` +${protocol_version}(${n (1 byte)}${idempotency_id (n bytes)}${low_order_byte_of_batch_index})* +``` + +The batch index for each idempotency id can be reconstructed from the high order byte and low order bytes stored in the key and value, respectively. This is necessary for an "unknown_committed" transaction to recover their full version stamp. Batch index is a `short int`, i.e. 2 bytes. + +# Cleaning up old idempotency ids + +After learning the result of an attempt to commit a transaction with an +idempotency id, the client may inform the cluster that it's no longer interested +in that id and the cluster can reclaim the space used to store the idempotency +id. The happy-path reply to a CommitTransactionRequest will say which proxy this +request should be sent to, and all idempotency ids for a database key will be +sent to the same proxy so that it can clear the key once it receives all of +them. The first proxy will also periodically clean up the oldest idempotency ids, based on a policy determined by two knobs. One knob will control the minimum lifetime of an idempotency id (i.e. don't delete anything younger than 1 day), and the other will control the target byte size of the idempotency keys (e.g. keep 100 MB of idempotency keys around). + +# Commit protocol + +The basic change will be that a commit future will not become ready until the client confirms whether or not the commit succeeded. (`transaction_timed_out` is an unfortunate exception here) + +The idempotency id will be automatically added to both the read conflict range and the write conflict range, before makeSelfConflicting is called so that we don't duplicate that work. We can reuse the `\xff/SC/` self-conflicting key space here. + +## Did I already commit? + +The first version of this scans the keys in the idmp key range to check for the idempotency ids. The plan for the next version is the following: + +Storage servers would have a new endpoint that clients can use to ask if the transaction for an idempotency id already committed. Clients would need to check every possible shard that their idempotency id may have ended up in. + +Storage servers would maintain a map from idempotency id to versionstamp in memory, and clients would need to contact all storage servers responsible for the `[\xff\x02/idmp/, \xff\x02/idmp0)` keyspace to be sure of their commit status. Assuming an idempotency id + versionstamp is 16 + 10 bytes, and that the lifetime of most idempotency ids is less than 1 second, that corresponds to at least 260 MB of memory on the storage server at 1,000,000 transactions/s, which seems acceptable. Let's double that to account for things like hash table load factor and allocating extra memory to ensure amortized constant time insertion. Still seems acceptable. We probably want to use a hashtable with open addressing to avoid frequent heap allocations. I _think_ [swisstables](https://abseil.io/about/design/swisstables) would work here. + +When a transaction learns that it did in fact commit, the commit future succeeds, and the versionstamp gets filled with the original, successful transaction's versionstamp. After the successful commit is reported, it's no longer necessary to store its idempotency ID. The client will send an RPC to the cleaner role indicating that it can remove this idempotency ID. + +If a transaction learns that it did in fact _not_ commit, the commit future will fail with an error that indicates that the transaction did not commit. Perhaps `transaction_too_old`. + +If a transaction learns that it has been in-flight so long that its idempotency id could have been expired, then it will fail with a new, non-retriable error. It is expected that this will be rare enough that crashing the application is acceptable. + +# Considerations + +- Additional storage space on the cluster. This can be controlled directly via an idempotency id target bytes knob/config. +- Potential write hot spot. + +# Multi-version client + +The multi-version client will generate its own idempotency id for a transaction and manage its lifecycle. It will duplicate the logic in NativeApi to achieve the same guarantees. As part of this change we will also ensure that the previous commit attempt is no longer in-flight before allowing the commit future to become ready. This will fix a potential "causal-write-risky" issue if a commit attempt fails with `cluster_version_changed`. + +# Experiments + +- Initial experiments show that this is about 1% overhead for the worst case workload which is transactions that only update a single key. + +``` +Single replication redwood cluster with dedicated ebs disks for tlog and storage. All tests saturated the tlog disk's IOPs. + +volume_type: gp3 +volume_size: 384 +iops: 9000 +throughput: 250 + +$ bin/mako --mode run --rows 1000000 -x u1 -p 8 -t 8 --cluster=$HOME/fdb.cluster --seconds 100 # already warm, but quiesced + +Baseline: + +19714.67 TPS + +"user space" method of writing idempotency id -> versionstamp in every transaction: + +13831.00 TPS + +"combine idempotency ids in transaction batch" method: + +19515.62 TPS +``` diff --git a/documentation/sphinx/source/command-line-interface.rst b/documentation/sphinx/source/command-line-interface.rst index 5f413d6c98..c561379100 100644 --- a/documentation/sphinx/source/command-line-interface.rst +++ b/documentation/sphinx/source/command-line-interface.rst @@ -203,6 +203,13 @@ The ``get`` command fetches the value of a given key. Its syntax is ``get ` Note that :ref:`characters can be escaped ` when specifying keys (or values) in ``fdbcli``. +getknob +------- + +The ``getknob`` command fetches the value of a given knob that has been populated by ``setknob``. Its syntax is ``getknob [CONFIGCLASS]``. It displays the value of ```` if ```` is present in the database and ``not found`` otherwise. + +Note that :ref:`characters can be escaped ` when specifying keys (or values) in ``fdbcli``. + getrange -------- @@ -395,6 +402,13 @@ The ``setclass`` command can be used to change the :ref:`process class [CONFIGCLASS]``. If not present in a ``begin\commit`` block, the CLI will prompt for a description of the change. + +Note that :ref:`characters can be escaped ` when specifying keys (or values) in ``fdbcli``. + sleep ----- diff --git a/fdbcli/fdbcli.actor.cpp b/fdbcli/fdbcli.actor.cpp index a5c2e2e75a..886f5e9620 100644 --- a/fdbcli/fdbcli.actor.cpp +++ b/fdbcli/fdbcli.actor.cpp @@ -499,11 +499,14 @@ void initHelp() { "transaction, and are automatically committed for you. By explicitly beginning a transaction, " "successive operations are all performed as part of a single transaction.\n\nTo commit the " "transaction, use the commit command. To discard the transaction, use the reset command."); - helpMap["commit"] = CommandHelp("commit", + helpMap["commit"] = CommandHelp("commit [description]", "commit the current transaction", "Any sets or clears executed after the start of the current transaction will be " "committed to the database. On success, the committed version number is displayed. " - "If commit fails, the error is displayed and the transaction must be retried."); + "If commit fails, the error is displayed and the transaction must be retried. The " + "command optionally allows for a description in case the transaction targets the " + "configuration database. If no description is provided in the command, a prompt " + "will be shown asking for a relevant description of the configuration change"); helpMap["clear"] = CommandHelp( "clear ", "clear a key from the database", @@ -552,6 +555,14 @@ void initHelp() { helpMap["set"] = CommandHelp("set ", "set a value for a given key", "If KEY is not already present in the database, it will be created." ESCAPINGKV); + + helpMap["setknob"] = CommandHelp("setknob [CONFIG_CLASS]", + "updates a knob to specified value", + "setknob will prompt for a descrption of the changes" ESCAPINGKV); + + helpMap["getknob"] = CommandHelp( + "getknob [CONFIG_CLASS]", "gets the value of the specified knob", "CONFIG_CLASS is optional." ESCAPINGK); + helpMap["option"] = CommandHelp( "option