Merge remote-tracking branch 'origin/main' into fix-includes

2022-03-16 22:29:18 -07:00 · 2022-03-16 22:29:18 -07:00 · c4bc35da09
parent 62f547ff6e 34cd80818b
commit c4bc35da09
63 changed files with 4771 additions and 255 deletions
--- a/bindings/c/CMakeLists.txt
+++ b/bindings/c/CMakeLists.txt
@ -91,11 +91,35 @@ if(NOT WIN32)

  set(UNIT_TEST_VERSION_510_SRCS test/unit/unit_tests_version_510.cpp)
  set(TRACE_PARTIAL_FILE_SUFFIX_TEST_SRCS test/unit/trace_partial_file_suffix_test.cpp)
-  set(DISCONNECTED_TIMEOUT_UNIT_TEST_SRCS 
+  set(DISCONNECTED_TIMEOUT_UNIT_TEST_SRCS
    test/unit/disconnected_timeout_tests.cpp
    test/unit/fdb_api.cpp
    test/unit/fdb_api.hpp)

+  set(API_TESTER_SRCS
+    test/apitester/fdb_c_api_tester.cpp
+    test/apitester/TesterApiWorkload.cpp
+    test/apitester/TesterApiWorkload.h
+    test/apitester/TesterApiWrapper.cpp
+    test/apitester/TesterApiWrapper.h
+    test/apitester/TesterTestSpec.cpp
+    test/apitester/TesterTestSpec.h
+    test/apitester/TesterCancelTransactionWorkload.cpp
+    test/apitester/TesterCorrectnessWorkload.cpp
+    test/apitester/TesterKeyValueStore.cpp
+    test/apitester/TesterKeyValueStore.h
+    test/apitester/TesterOptions.h
+    test/apitester/TesterScheduler.cpp
+    test/apitester/TesterScheduler.h
+    test/apitester/TesterTransactionExecutor.cpp
+    test/apitester/TesterTransactionExecutor.h
+    test/apitester/TesterUtil.cpp
+    test/apitester/TesterUtil.h
+    test/apitester/TesterWorkload.cpp
+    test/apitester/TesterWorkload.h
+    ../../flow/SimpleOpt.h
+  )
+
  if(OPEN_FOR_IDE)
    add_library(fdb_c_performance_test OBJECT test/performance_test.c test/test.h)
    add_library(fdb_c_ryw_benchmark OBJECT test/ryw_benchmark.c test/test.h)
@ -106,6 +130,7 @@ if(NOT WIN32)
    add_library(fdb_c_unit_tests_version_510 OBJECT ${UNIT_TEST_VERSION_510_SRCS})
    add_library(trace_partial_file_suffix_test OBJECT ${TRACE_PARTIAL_FILE_SUFFIX_TEST_SRCS})
    add_library(disconnected_timeout_unit_tests OBJECT ${DISCONNECTED_TIMEOUT_UNIT_TEST_SRCS})
+    add_library(fdb_c_api_tester OBJECT ${API_TESTER_SRCS})
  else()
    add_executable(fdb_c_performance_test test/performance_test.c test/test.h)
    add_executable(fdb_c_ryw_benchmark test/ryw_benchmark.c test/test.h)
@ -116,6 +141,7 @@ if(NOT WIN32)
    add_executable(fdb_c_unit_tests_version_510 ${UNIT_TEST_VERSION_510_SRCS})
    add_executable(trace_partial_file_suffix_test ${TRACE_PARTIAL_FILE_SUFFIX_TEST_SRCS})
    add_executable(disconnected_timeout_unit_tests ${DISCONNECTED_TIMEOUT_UNIT_TEST_SRCS})
+    add_executable(fdb_c_api_tester ${API_TESTER_SRCS})
    strip_debug_symbols(fdb_c_performance_test)
    strip_debug_symbols(fdb_c_ryw_benchmark)
    strip_debug_symbols(fdb_c_txn_size_test)
@ -138,6 +164,12 @@ if(NOT WIN32)
  target_link_libraries(trace_partial_file_suffix_test PRIVATE fdb_c Threads::Threads flow)
  target_link_libraries(disconnected_timeout_unit_tests PRIVATE fdb_c Threads::Threads)

+if(USE_SANITIZER)
+  target_link_libraries(fdb_c_api_tester PRIVATE fdb_c toml11_target Threads::Threads fmt::fmt boost_asan)
+else()
+  target_link_libraries(fdb_c_api_tester PRIVATE fdb_c toml11_target Threads::Threads fmt::fmt boost_target)
+endif()
+
  # do not set RPATH for mako
  set_property(TARGET mako PROPERTY SKIP_BUILD_RPATH TRUE)
  target_link_libraries(mako PRIVATE fdb_c fdbclient)
@ -163,6 +195,7 @@ if(NOT WIN32)
  add_custom_target(external_client DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libfdb_c_external.so)
  add_dependencies(fdb_c_unit_tests external_client)
  add_dependencies(disconnected_timeout_unit_tests external_client)
+  add_dependencies(fdb_c_api_tester external_client)

  add_fdbclient_test(
    NAME fdb_c_setup_tests
@ -200,6 +233,19 @@ if(NOT WIN32)
            @CLUSTER_FILE@
            ${CMAKE_CURRENT_BINARY_DIR}/libfdb_c_external.so
            )
+  add_fdbclient_test(
+    NAME fdb_c_api_tests
+    DISABLE_LOG_DUMP
+    COMMAND ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/run_c_api_tests.py
+            --cluster-file
+            @CLUSTER_FILE@
+            --tester-binary
+            $<TARGET_FILE:fdb_c_api_tester>
+            --external-client-library
+            ${CMAKE_CURRENT_BINARY_DIR}/libfdb_c_external.so
+            --test-dir
+            ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests
+            )
 endif()

 set(c_workloads_srcs
--- a/bindings/c/test/apitester/TesterApiWorkload.cpp
+++ b/bindings/c/test/apitester/TesterApiWorkload.cpp
@ -0,0 +1,129 @@
+/*
+ * TesterApiWorkload.cpp
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TesterApiWorkload.h"
+#include "TesterUtil.h"
+#include <fmt/format.h>
+
+namespace FdbApiTester {
+
+ApiWorkload::ApiWorkload(const WorkloadConfig& config) : WorkloadBase(config) {
+	minKeyLength = config.getIntOption("minKeyLength", 1);
+	maxKeyLength = config.getIntOption("maxKeyLength", 64);
+	minValueLength = config.getIntOption("minValueLength", 1);
+	maxValueLength = config.getIntOption("maxValueLength", 1000);
+	maxKeysPerTransaction = config.getIntOption("maxKeysPerTransaction", 50);
+	initialSize = config.getIntOption("initialSize", 1000);
+	readExistingKeysRatio = config.getFloatOption("readExistingKeysRatio", 0.9);
+	keyPrefix = fmt::format("{}/", workloadId);
+}
+
+void ApiWorkload::start() {
+	schedule([this]() {
+		// 1. Clear data
+		clearData([this]() {
+			// 2. Populate initial data
+			populateData([this]() {
+				// 3. Generate random workload
+				runTests();
+			});
+		});
+	});
+}
+
+std::string ApiWorkload::randomKeyName() {
+	return keyPrefix + Random::get().randomStringLowerCase(minKeyLength, maxKeyLength);
+}
+
+std::string ApiWorkload::randomValue() {
+	return Random::get().randomStringLowerCase(minValueLength, maxValueLength);
+}
+
+std::string ApiWorkload::randomNotExistingKey() {
+	while (true) {
+		std::string key = randomKeyName();
+		if (!store.exists(key)) {
+			return key;
+		}
+	}
+}
+
+std::string ApiWorkload::randomExistingKey() {
+	std::string genKey = randomKeyName();
+	std::string key = store.getKey(genKey, true, 1);
+	if (key != store.endKey()) {
+		return key;
+	}
+	key = store.getKey(genKey, true, 0);
+	if (key != store.startKey()) {
+		return key;
+	}
+	info("No existing key found, using a new random key.");
+	return genKey;
+}
+
+std::string ApiWorkload::randomKey(double existingKeyRatio) {
+	if (Random::get().randomBool(existingKeyRatio)) {
+		return randomExistingKey();
+	} else {
+		return randomNotExistingKey();
+	}
+}
+
+void ApiWorkload::populateDataTx(TTaskFct cont) {
+	int numKeys = maxKeysPerTransaction;
+	auto kvPairs = std::make_shared<std::vector<KeyValue>>();
+	for (int i = 0; i < numKeys; i++) {
+		kvPairs->push_back(KeyValue{ randomNotExistingKey(), randomValue() });
+	}
+	execTransaction(
+	    [kvPairs](auto ctx) {
+		    for (const KeyValue& kv : *kvPairs) {
+			    ctx->tx()->set(kv.key, kv.value);
+		    }
+		    ctx->commit();
+	    },
+	    [this, kvPairs, cont]() {
+		    for (const KeyValue& kv : *kvPairs) {
+			    store.set(kv.key, kv.value);
+		    }
+		    schedule(cont);
+	    });
+}
+
+void ApiWorkload::clearData(TTaskFct cont) {
+	execTransaction(
+	    [this](auto ctx) {
+		    ctx->tx()->clearRange(keyPrefix, fmt::format("{}\xff", keyPrefix));
+		    ctx->commit();
+	    },
+	    [this, cont]() { schedule(cont); });
+}
+
+void ApiWorkload::populateData(TTaskFct cont) {
+	if (store.size() < initialSize) {
+		populateDataTx([this, cont]() { populateData(cont); });
+	} else {
+		info("Data population completed");
+		schedule(cont);
+	}
+}
+
+} // namespace FdbApiTester
--- a/bindings/c/test/apitester/TesterApiWorkload.h
+++ b/bindings/c/test/apitester/TesterApiWorkload.h
@ -0,0 +1,89 @@
+/*
+ * TesterApiWorkload.h
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef APITESTER_API_WORKLOAD_H
+#define APITESTER_API_WORKLOAD_H
+
+#include "TesterWorkload.h"
+#include "TesterKeyValueStore.h"
+
+namespace FdbApiTester {
+
+/**
+ * Base class for implementing API testing workloads.
+ * Provides various helper methods and reusable configuration parameters
+ */
+class ApiWorkload : public WorkloadBase {
+public:
+	void start() override;
+
+	// Method to be overridden to run specific tests
+	virtual void runTests() = 0;
+
+protected:
+	// The minimum length of a key
+	int minKeyLength;
+
+	// The maximum length of a key
+	int maxKeyLength;
+
+	// The minimum length of a value
+	int minValueLength;
+
+	// The maximum length of a value
+	int maxValueLength;
+
+	// Maximum number of keys to be accessed by a transaction
+	int maxKeysPerTransaction;
+
+	// Initial data size (number of key-value pairs)
+	int initialSize;
+
+	// The ratio of reading existing keys
+	double readExistingKeysRatio;
+
+	// Key prefix
+	std::string keyPrefix;
+
+	// In-memory store maintaining expected database state
+	KeyValueStore store;
+
+	ApiWorkload(const WorkloadConfig& config);
+
+	// Methods for generating random keys and values
+	std::string randomKeyName();
+	std::string randomValue();
+	std::string randomNotExistingKey();
+	std::string randomExistingKey();
+	std::string randomKey(double existingKeyRatio);
+
+	// Generate initial random data for the workload
+	void populateData(TTaskFct cont);
+
+	// Clear the data of the workload
+	void clearData(TTaskFct cont);
+
+private:
+	void populateDataTx(TTaskFct cont);
+};
+
+} // namespace FdbApiTester
+
+#endif
--- a/bindings/c/test/apitester/TesterApiWrapper.cpp
+++ b/bindings/c/test/apitester/TesterApiWrapper.cpp
@ -0,0 +1,124 @@
+/*
+ * TesterApiWrapper.cpp
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "TesterApiWrapper.h"
+#include "TesterUtil.h"
+#include <cstdint>
+#include <fmt/format.h>
+
+namespace FdbApiTester {
+
+namespace {
+
+void fdb_check(fdb_error_t e) {
+	if (e) {
+		fmt::print(stderr, "Unexpected error: %s\n", fdb_get_error(e));
+		std::abort();
+	}
+}
+
+} // namespace
+
+Future::Future(FDBFuture* f) : future_(f, fdb_future_destroy) {}
+
+void Future::reset() {
+	future_.reset();
+}
+
+void Future::cancel() {
+	ASSERT(future_);
+	fdb_future_cancel(future_.get());
+}
+
+fdb_error_t Future::getError() const {
+	ASSERT(future_);
+	return fdb_future_get_error(future_.get());
+}
+
+std::optional<std::string> ValueFuture::getValue() const {
+	ASSERT(future_);
+	int out_present;
+	const std::uint8_t* val;
+	int vallen;
+	fdb_check(fdb_future_get_value(future_.get(), &out_present, &val, &vallen));
+	return out_present ? std::make_optional(std::string((const char*)val, vallen)) : std::nullopt;
+}
+
+// Given an FDBDatabase, initializes a new transaction.
+Transaction::Transaction(FDBTransaction* tx) : tx_(tx, fdb_transaction_destroy) {}
+
+ValueFuture Transaction::get(std::string_view key, fdb_bool_t snapshot) {
+	ASSERT(tx_);
+	return ValueFuture(fdb_transaction_get(tx_.get(), (const uint8_t*)key.data(), key.size(), snapshot));
+}
+
+void Transaction::set(std::string_view key, std::string_view value) {
+	ASSERT(tx_);
+	fdb_transaction_set(tx_.get(), (const uint8_t*)key.data(), key.size(), (const uint8_t*)value.data(), value.size());
+}
+
+void Transaction::clear(std::string_view key) {
+	ASSERT(tx_);
+	fdb_transaction_clear(tx_.get(), (const uint8_t*)key.data(), key.size());
+}
+
+void Transaction::clearRange(std::string_view begin, std::string_view end) {
+	ASSERT(tx_);
+	fdb_transaction_clear_range(
+	    tx_.get(), (const uint8_t*)begin.data(), begin.size(), (const uint8_t*)end.data(), end.size());
+}
+
+Future Transaction::commit() {
+	ASSERT(tx_);
+	return Future(fdb_transaction_commit(tx_.get()));
+}
+
+void Transaction::cancel() {
+	ASSERT(tx_);
+	fdb_transaction_cancel(tx_.get());
+}
+
+Future Transaction::onError(fdb_error_t err) {
+	ASSERT(tx_);
+	return Future(fdb_transaction_on_error(tx_.get(), err));
+}
+
+void Transaction::reset() {
+	ASSERT(tx_);
+	fdb_transaction_reset(tx_.get());
+}
+
+fdb_error_t Transaction::setOption(FDBTransactionOption option) {
+	ASSERT(tx_);
+	return fdb_transaction_set_option(tx_.get(), option, reinterpret_cast<const uint8_t*>(""), 0);
+}
+
+fdb_error_t FdbApi::setOption(FDBNetworkOption option, std::string_view value) {
+	return fdb_network_set_option(option, reinterpret_cast<const uint8_t*>(value.data()), value.size());
+}
+
+fdb_error_t FdbApi::setOption(FDBNetworkOption option, int64_t value) {
+	return fdb_network_set_option(option, reinterpret_cast<const uint8_t*>(&value), sizeof(value));
+}
+
+fdb_error_t FdbApi::setOption(FDBNetworkOption option) {
+	return fdb_network_set_option(option, reinterpret_cast<const uint8_t*>(""), 0);
+}
+
+} // namespace FdbApiTester
--- a/bindings/c/test/apitester/TesterApiWrapper.h
+++ b/bindings/c/test/apitester/TesterApiWrapper.h
@ -0,0 +1,92 @@
+/*
+ * TesterApiWrapper.h
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifndef APITESTER_API_WRAPPER_H
+#define APITESTER_API_WRAPPER_H
+
+#include <string_view>
+#include <optional>
+#include <memory>
+
+#define FDB_API_VERSION 710
+#include "bindings/c/foundationdb/fdb_c.h"
+
+#undef ERROR
+#define ERROR(name, number, description) enum { error_code_##name = number };
+
+#include "flow/error_definitions.h"
+
+namespace FdbApiTester {
+
+// Wrapper parent class to manage memory of an FDBFuture pointer. Cleans up
+// FDBFuture when this instance goes out of scope.
+class Future {
+public:
+	Future() = default;
+	Future(FDBFuture* f);
+
+	FDBFuture* fdbFuture() { return future_.get(); };
+
+	fdb_error_t getError() const;
+	explicit operator bool() const { return future_ != nullptr; };
+	void reset();
+	void cancel();
+
+protected:
+	std::shared_ptr<FDBFuture> future_;
+};
+
+class ValueFuture : public Future {
+public:
+	ValueFuture() = default;
+	ValueFuture(FDBFuture* f) : Future(f) {}
+	std::optional<std::string> getValue() const;
+};
+
+class Transaction {
+public:
+	Transaction() = default;
+	Transaction(FDBTransaction* tx);
+	ValueFuture get(std::string_view key, fdb_bool_t snapshot);
+	void set(std::string_view key, std::string_view value);
+	void clear(std::string_view key);
+	void clearRange(std::string_view begin, std::string_view end);
+	Future commit();
+	void cancel();
+	Future onError(fdb_error_t err);
+	void reset();
+	fdb_error_t setOption(FDBTransactionOption option);
+
+private:
+	std::shared_ptr<FDBTransaction> tx_;
+};
+
+class FdbApi {
+public:
+	static fdb_error_t setOption(FDBNetworkOption option, std::string_view value);
+	static fdb_error_t setOption(FDBNetworkOption option, int64_t value);
+	static fdb_error_t setOption(FDBNetworkOption option);
+};
+
+} // namespace FdbApiTester
+
+#endif
--- a/bindings/c/test/apitester/TesterCancelTransactionWorkload.cpp
+++ b/bindings/c/test/apitester/TesterCancelTransactionWorkload.cpp
@ -0,0 +1,113 @@
+/*
+ * TesterCancelTransactionWorkload.cpp
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "TesterApiWorkload.h"
+#include "TesterUtil.h"
+
+namespace FdbApiTester {
+
+class CancelTransactionWorkload : public ApiWorkload {
+public:
+	CancelTransactionWorkload(const WorkloadConfig& config) : ApiWorkload(config) {
+		numRandomOperations = config.getIntOption("numRandomOperations", 1000);
+		numOpLeft = numRandomOperations;
+	}
+
+	void runTests() override { randomOperations(); }
+
+private:
+	enum OpType { OP_CANCEL_GET, OP_CANCEL_AFTER_FIRST_GET, OP_LAST = OP_CANCEL_AFTER_FIRST_GET };
+
+	// The number of operations to be executed
+	int numRandomOperations;
+
+	// Operations counter
+	int numOpLeft;
+
+	// Start multiple concurrent gets and cancel the transaction
+	void randomCancelGetTx(TTaskFct cont) {
+		int numKeys = Random::get().randomInt(1, maxKeysPerTransaction);
+		auto keys = std::make_shared<std::vector<std::string>>();
+		for (int i = 0; i < numKeys; i++) {
+			keys->push_back(randomKey(readExistingKeysRatio));
+		}
+		execTransaction(
+		    [keys](auto ctx) {
+			    std::vector<Future> futures;
+			    for (const auto& key : *keys) {
+				    futures.push_back(ctx->tx()->get(key, false));
+			    }
+			    ctx->done();
+		    },
+		    [this, cont]() { schedule(cont); });
+	}
+
+	// Start multiple concurrent gets and cancel the transaction after the first get returns
+	void randomCancelAfterFirstResTx(TTaskFct cont) {
+		int numKeys = Random::get().randomInt(1, maxKeysPerTransaction);
+		auto keys = std::make_shared<std::vector<std::string>>();
+		for (int i = 0; i < numKeys; i++) {
+			keys->push_back(randomKey(readExistingKeysRatio));
+		}
+		execTransaction(
+		    [this, keys](auto ctx) {
+			    std::vector<ValueFuture> futures;
+			    for (const auto& key : *keys) {
+				    futures.push_back(ctx->tx()->get(key, false));
+			    }
+			    for (int i = 0; i < keys->size(); i++) {
+				    ValueFuture f = futures[i];
+				    auto expectedVal = store.get((*keys)[i]);
+				    ctx->continueAfter(f, [expectedVal, f, this, ctx]() {
+					    auto val = f.getValue();
+					    if (expectedVal != val) {
+						    error(fmt::format(
+						        "cancelAfterFirstResTx mismatch. expected: {:.80} actual: {:.80}", expectedVal, val));
+					    }
+					    ctx->done();
+				    });
+			    }
+		    },
+		    [this, cont]() { schedule(cont); });
+	}
+
+	void randomOperation(TTaskFct cont) {
+		OpType txType = (OpType)Random::get().randomInt(0, OP_LAST);
+		switch (txType) {
+		case OP_CANCEL_GET:
+			randomCancelGetTx(cont);
+			break;
+		case OP_CANCEL_AFTER_FIRST_GET:
+			randomCancelAfterFirstResTx(cont);
+			break;
+		}
+	}
+
+	void randomOperations() {
+		if (numOpLeft == 0)
+			return;
+
+		numOpLeft--;
+		randomOperation([this]() { randomOperations(); });
+	}
+};
+
+WorkloadFactory<CancelTransactionWorkload> MiscTestWorkloadFactory("CancelTransaction");
+
+} // namespace FdbApiTester
--- a/bindings/c/test/apitester/TesterCorrectnessWorkload.cpp
+++ b/bindings/c/test/apitester/TesterCorrectnessWorkload.cpp
@ -0,0 +1,227 @@
+/*
+ * TesterCorrectnessWorkload.cpp
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "TesterApiWorkload.h"
+#include "TesterUtil.h"
+#include <memory>
+#include <fmt/format.h>
+
+namespace FdbApiTester {
+
+class ApiCorrectnessWorkload : public ApiWorkload {
+public:
+	ApiCorrectnessWorkload(const WorkloadConfig& config) : ApiWorkload(config) {
+		numRandomOperations = config.getIntOption("numRandomOperations", 1000);
+		numOpLeft = numRandomOperations;
+	}
+
+	void runTests() override { randomOperations(); }
+
+private:
+	enum OpType { OP_INSERT, OP_GET, OP_CLEAR, OP_CLEAR_RANGE, OP_COMMIT_READ, OP_LAST = OP_COMMIT_READ };
+
+	// The number of operations to be executed
+	int numRandomOperations;
+
+	// Operations counter
+	int numOpLeft;
+
+	void randomInsertOp(TTaskFct cont) {
+		int numKeys = Random::get().randomInt(1, maxKeysPerTransaction);
+		auto kvPairs = std::make_shared<std::vector<KeyValue>>();
+		for (int i = 0; i < numKeys; i++) {
+			kvPairs->push_back(KeyValue{ randomNotExistingKey(), randomValue() });
+		}
+		execTransaction(
+		    [kvPairs](auto ctx) {
+			    for (const KeyValue& kv : *kvPairs) {
+				    ctx->tx()->set(kv.key, kv.value);
+			    }
+			    ctx->commit();
+		    },
+		    [this, kvPairs, cont]() {
+			    for (const KeyValue& kv : *kvPairs) {
+				    store.set(kv.key, kv.value);
+			    }
+			    schedule(cont);
+		    });
+	}
+
+	void randomCommitReadOp(TTaskFct cont) {
+		int numKeys = Random::get().randomInt(1, maxKeysPerTransaction);
+		auto kvPairs = std::make_shared<std::vector<KeyValue>>();
+		for (int i = 0; i < numKeys; i++) {
+			kvPairs->push_back(KeyValue{ randomKey(readExistingKeysRatio), randomValue() });
+		}
+		execTransaction(
+		    [kvPairs](auto ctx) {
+			    for (const KeyValue& kv : *kvPairs) {
+				    ctx->tx()->set(kv.key, kv.value);
+			    }
+			    ctx->commit();
+		    },
+		    [this, kvPairs, cont]() {
+			    for (const KeyValue& kv : *kvPairs) {
+				    store.set(kv.key, kv.value);
+			    }
+			    auto results = std::make_shared<std::vector<std::optional<std::string>>>();
+			    execTransaction(
+			        [kvPairs, results](auto ctx) {
+				        // TODO: Enable after merging with GRV caching
+				        // ctx->tx()->setOption(FDB_TR_OPTION_USE_GRV_CACHE);
+				        auto futures = std::make_shared<std::vector<Future>>();
+				        for (const auto& kv : *kvPairs) {
+					        futures->push_back(ctx->tx()->get(kv.key, false));
+				        }
+				        ctx->continueAfterAll(*futures, [ctx, futures, results]() {
+					        results->clear();
+					        for (auto& f : *futures) {
+						        results->push_back(((ValueFuture&)f).getValue());
+					        }
+					        ASSERT(results->size() == futures->size());
+					        ctx->done();
+				        });
+			        },
+			        [this, kvPairs, results, cont]() {
+				        ASSERT(results->size() == kvPairs->size());
+				        for (int i = 0; i < kvPairs->size(); i++) {
+					        auto expected = store.get((*kvPairs)[i].key);
+					        auto actual = (*results)[i];
+					        if (actual != expected) {
+						        error(
+						            fmt::format("randomCommitReadOp mismatch. key: {} expected: {:.80} actual: {:.80}",
+						                        (*kvPairs)[i].key,
+						                        expected,
+						                        actual));
+						        ASSERT(false);
+					        }
+				        }
+				        schedule(cont);
+			        });
+		    });
+	}
+
+	void randomGetOp(TTaskFct cont) {
+		int numKeys = Random::get().randomInt(1, maxKeysPerTransaction);
+		auto keys = std::make_shared<std::vector<std::string>>();
+		auto results = std::make_shared<std::vector<std::optional<std::string>>>();
+		for (int i = 0; i < numKeys; i++) {
+			keys->push_back(randomKey(readExistingKeysRatio));
+		}
+		execTransaction(
+		    [keys, results](auto ctx) {
+			    auto futures = std::make_shared<std::vector<Future>>();
+			    for (const auto& key : *keys) {
+				    futures->push_back(ctx->tx()->get(key, false));
+			    }
+			    ctx->continueAfterAll(*futures, [ctx, futures, results]() {
+				    results->clear();
+				    for (auto& f : *futures) {
+					    results->push_back(((ValueFuture&)f).getValue());
+				    }
+				    ASSERT(results->size() == futures->size());
+				    ctx->done();
+			    });
+		    },
+		    [this, keys, results, cont]() {
+			    ASSERT(results->size() == keys->size());
+			    for (int i = 0; i < keys->size(); i++) {
+				    auto expected = store.get((*keys)[i]);
+				    if ((*results)[i] != expected) {
+					    error(fmt::format("randomGetOp mismatch. key: {} expected: {:.80} actual: {:.80}",
+					                      (*keys)[i],
+					                      expected,
+					                      (*results)[i]));
+				    }
+			    }
+			    schedule(cont);
+		    });
+	}
+
+	void randomClearOp(TTaskFct cont) {
+		int numKeys = Random::get().randomInt(1, maxKeysPerTransaction);
+		auto keys = std::make_shared<std::vector<std::string>>();
+		for (int i = 0; i < numKeys; i++) {
+			keys->push_back(randomExistingKey());
+		}
+		execTransaction(
+		    [keys](auto ctx) {
+			    for (const auto& key : *keys) {
+				    ctx->tx()->clear(key);
+			    }
+			    ctx->commit();
+		    },
+		    [this, keys, cont]() {
+			    for (const auto& key : *keys) {
+				    store.clear(key);
+			    }
+			    schedule(cont);
+		    });
+	}
+
+	void randomClearRangeOp(TTaskFct cont) {
+		std::string begin = randomKeyName();
+		std::string end = randomKeyName();
+		if (begin > end) {
+			std::swap(begin, end);
+		}
+		execTransaction(
+		    [begin, end](auto ctx) {
+			    ctx->tx()->clearRange(begin, end);
+			    ctx->commit();
+		    },
+		    [this, begin, end, cont]() {
+			    store.clear(begin, end);
+			    schedule(cont);
+		    });
+	}
+
+	void randomOperation(TTaskFct cont) {
+		OpType txType = (store.size() == 0) ? OP_INSERT : (OpType)Random::get().randomInt(0, OP_LAST);
+		switch (txType) {
+		case OP_INSERT:
+			randomInsertOp(cont);
+			break;
+		case OP_GET:
+			randomGetOp(cont);
+			break;
+		case OP_CLEAR:
+			randomClearOp(cont);
+			break;
+		case OP_CLEAR_RANGE:
+			randomClearRangeOp(cont);
+			break;
+		case OP_COMMIT_READ:
+			randomCommitReadOp(cont);
+			break;
+		}
+	}
+
+	void randomOperations() {
+		if (numOpLeft == 0)
+			return;
+
+		numOpLeft--;
+		randomOperation([this]() { randomOperations(); });
+	}
+};
+
+WorkloadFactory<ApiCorrectnessWorkload> ApiCorrectnessWorkloadFactory("ApiCorrectness");
+
+} // namespace FdbApiTester
--- a/bindings/c/test/apitester/TesterKeyValueStore.cpp
+++ b/bindings/c/test/apitester/TesterKeyValueStore.cpp
@ -0,0 +1,167 @@
+/*
+ * TesterKeyValueStore.cpp
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TesterKeyValueStore.h"
+
+namespace FdbApiTester {
+
+// Get the value associated with a key
+std::optional<std::string> KeyValueStore::get(std::string_view key) const {
+	std::unique_lock<std::mutex> lock(mutex);
+	auto value = store.find(std::string(key));
+	if (value != store.end())
+		return value->second;
+	else
+		return std::optional<std::string>();
+}
+
+// Checks if the key exists
+bool KeyValueStore::exists(std::string_view key) {
+	std::unique_lock<std::mutex> lock(mutex);
+	return (store.find(std::string(key)) != store.end());
+}
+
+// Returns the key designated by a key selector
+std::string KeyValueStore::getKey(std::string_view keyName, bool orEqual, int offset) const {
+	std::unique_lock<std::mutex> lock(mutex);
+	// Begin by getting the start key referenced by the key selector
+	std::map<std::string, std::string>::const_iterator mapItr = store.lower_bound(keyName);
+
+	// Update the iterator position if necessary based on the value of orEqual
+	int count = 0;
+	if (offset <= 0) {
+		if (mapItr == store.end() || keyName != mapItr->first || !orEqual) {
+			if (mapItr == store.begin())
+				return startKey();
+
+			mapItr--;
+		}
+	} else {
+		if (mapItr == store.end())
+			return endKey();
+
+		if (keyName == mapItr->first && orEqual) {
+			mapItr++;
+		}
+
+		count++;
+	}
+
+	// Increment the map iterator until the desired offset is reached
+	for (; count < abs(offset); count++) {
+		if (offset < 0) {
+			if (mapItr == store.begin())
+				break;
+
+			mapItr--;
+		} else {
+			if (mapItr == store.end())
+				break;
+
+			mapItr++;
+		}
+	}
+
+	if (mapItr == store.end())
+		return endKey();
+	else if (count == abs(offset))
+		return mapItr->first;
+	else
+		return startKey();
+}
+
+// Gets a range of key-value pairs, returning a maximum of <limit> results
+std::vector<KeyValue> KeyValueStore::getRange(std::string_view begin,
+                                              std::string_view end,
+                                              int limit,
+                                              bool reverse) const {
+	std::unique_lock<std::mutex> lock(mutex);
+	std::vector<KeyValue> results;
+	if (!reverse) {
+		std::map<std::string, std::string>::const_iterator mapItr = store.lower_bound(begin);
+
+		for (; mapItr != store.end() && mapItr->first < end && results.size() < limit; mapItr++)
+			results.push_back(KeyValue{ mapItr->first, mapItr->second });
+	}
+
+	// Support for reverse getRange queries is supported, but not tested at this time.  This is because reverse range
+	// queries have been disallowed by the database at the API level
+	else {
+		std::map<std::string, std::string>::const_iterator mapItr = store.lower_bound(end);
+		if (mapItr == store.begin())
+			return results;
+
+		for (--mapItr; mapItr->first >= begin && results.size() < abs(limit); mapItr--) {
+			results.push_back(KeyValue{ mapItr->first, mapItr->second });
+			if (mapItr == store.begin())
+				break;
+		}
+	}
+
+	return results;
+}
+
+// Stores a key-value pair in the database
+void KeyValueStore::set(std::string_view key, std::string_view value) {
+	std::unique_lock<std::mutex> lock(mutex);
+	store[std::string(key)] = value;
+}
+
+// Removes a key from the database
+void KeyValueStore::clear(std::string_view key) {
+	std::unique_lock<std::mutex> lock(mutex);
+	auto iter = store.find(key);
+	if (iter != store.end()) {
+		store.erase(iter);
+	}
+}
+
+// Removes a range of keys from the database
+void KeyValueStore::clear(std::string_view begin, std::string_view end) {
+	std::unique_lock<std::mutex> lock(mutex);
+	store.erase(store.lower_bound(begin), store.lower_bound(end));
+}
+
+// The number of keys in the database
+uint64_t KeyValueStore::size() const {
+	std::unique_lock<std::mutex> lock(mutex);
+	return store.size();
+}
+
+// The first key in the database; returned by key selectors that choose a key off the front
+std::string KeyValueStore::startKey() const {
+	return "";
+}
+
+// The last key in the database; returned by key selectors that choose a key off the back
+std::string KeyValueStore::endKey() const {
+	return "\xff";
+}
+
+// Debugging function that prints all key-value pairs
+void KeyValueStore::printContents() const {
+	std::unique_lock<std::mutex> lock(mutex);
+	printf("Contents:\n");
+	std::map<std::string, std::string>::const_iterator mapItr;
+	for (mapItr = store.begin(); mapItr != store.end(); mapItr++)
+		printf("%s\n", mapItr->first.c_str());
+}
+
+} // namespace FdbApiTester
--- a/bindings/c/test/apitester/TesterKeyValueStore.h
+++ b/bindings/c/test/apitester/TesterKeyValueStore.h
@ -0,0 +1,83 @@
+/*
+ * TesterKeyValueStore.h
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifndef APITESTER_KEY_VALUE_STORE_H
+#define APITESTER_KEY_VALUE_STORE_H
+
+#include <map>
+#include <optional>
+#include <string>
+#include <string_view>
+#include <vector>
+#include <mutex>
+
+namespace FdbApiTester {
+
+struct KeyValue {
+	std::string key;
+	std::string value;
+};
+
+class KeyValueStore {
+public:
+	// Get the value associated with a key
+	std::optional<std::string> get(std::string_view key) const;
+
+	// Checks if the key exists
+	bool exists(std::string_view key);
+
+	// Returns the key designated by a key selector
+	std::string getKey(std::string_view keyName, bool orEqual, int offset) const;
+
+	// Gets a range of key-value pairs, returning a maximum of <limit> results
+	std::vector<KeyValue> getRange(std::string_view begin, std::string_view end, int limit, bool reverse) const;
+
+	// Stores a key-value pair in the database
+	void set(std::string_view key, std::string_view value);
+
+	// Removes a key from the database
+	void clear(std::string_view key);
+
+	// Removes a range of keys from the database
+	void clear(std::string_view begin, std::string_view end);
+
+	// The number of keys in the database
+	uint64_t size() const;
+
+	// The first key in the database; returned by key selectors that choose a key off the front
+	std::string startKey() const;
+
+	// The last key in the database; returned by key selectors that choose a key off the back
+	std::string endKey() const;
+
+	// Debugging function that prints all key-value pairs
+	void printContents() const;
+
+private:
+	// A map holding the key-value pairs
+	std::map<std::string, std::string, std::less<>> store;
+	mutable std::mutex mutex;
+};
+
+} // namespace FdbApiTester
+
+#endif
--- a/bindings/c/test/apitester/TesterOptions.h
+++ b/bindings/c/test/apitester/TesterOptions.h
@ -0,0 +1,49 @@
+/*
+ * TesterOptions.h
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifndef APITESTER_TESTER_OPTIONS_H
+#define APITESTER_TESTER_OPTIONS_H
+
+#include "TesterTestSpec.h"
+
+namespace FdbApiTester {
+
+class TesterOptions {
+public:
+	std::string clusterFile;
+	bool trace = false;
+	std::string traceDir;
+	std::string traceFormat;
+	std::string logGroup;
+	std::string externalClientLibrary;
+	std::string testFile;
+	int numFdbThreads;
+	int numClientThreads;
+	int numDatabases;
+	int numClients;
+	std::vector<std::pair<std::string, std::string>> knobs;
+	TestSpec testSpec;
+};
+
+} // namespace FdbApiTester
+
+#endif
--- a/bindings/c/test/apitester/TesterScheduler.cpp
+++ b/bindings/c/test/apitester/TesterScheduler.cpp
@ -0,0 +1,67 @@
+/*
+ * TesterScheduler.cpp
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TesterScheduler.h"
+#include "TesterUtil.h"
+
+#include <memory>
+#include <thread>
+#include <boost/asio.hpp>
+
+using namespace boost::asio;
+
+namespace FdbApiTester {
+
+const TTaskFct NO_OP_TASK = []() {};
+
+class AsioScheduler : public IScheduler {
+public:
+	AsioScheduler(int numThreads) : numThreads(numThreads) {}
+
+	void start() override {
+		work = require(io_ctx.get_executor(), execution::outstanding_work.tracked);
+		for (int i = 0; i < numThreads; i++) {
+			threads.emplace_back([this]() { io_ctx.run(); });
+		}
+	}
+
+	void schedule(TTaskFct task) override { post(io_ctx, task); }
+
+	void stop() override { work = any_io_executor(); }
+
+	void join() override {
+		for (auto& th : threads) {
+			th.join();
+		}
+	}
+
+private:
+	int numThreads;
+	std::vector<std::thread> threads;
+	io_context io_ctx;
+	any_io_executor work;
+};
+
+std::unique_ptr<IScheduler> createScheduler(int numThreads) {
+	ASSERT(numThreads > 0 && numThreads <= 1000);
+	return std::make_unique<AsioScheduler>(numThreads);
+}
+
+} // namespace FdbApiTester
--- a/bindings/c/test/apitester/TesterScheduler.h
+++ b/bindings/c/test/apitester/TesterScheduler.h
@ -0,0 +1,60 @@
+/*
+ * TesterScheduler.h
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifndef APITESTER_SCHEDULER_H
+#define APITESTER_SCHEDULER_H
+
+#include <functional>
+#include <memory>
+
+namespace FdbApiTester {
+
+using TTaskFct = std::function<void(void)>;
+
+extern const TTaskFct NO_OP_TASK;
+
+/**
+ * Scheduler for asynchronous execution of tasks on a pool of threads
+ */
+class IScheduler {
+public:
+	virtual ~IScheduler() {}
+
+	// Create scheduler threads and begin accepting tasks
+	virtual void start() = 0;
+
+	// Schedule a task for asynchronous execution
+	virtual void schedule(TTaskFct task) = 0;
+
+	// Gracefully stop the scheduler. Waits for already running tasks to be finish
+	virtual void stop() = 0;
+
+	// Join with all threads of the scheduler
+	virtual void join() = 0;
+};
+
+// create a scheduler using given number of threads
+std::unique_ptr<IScheduler> createScheduler(int numThreads);
+
+} // namespace FdbApiTester
+
+#endif
--- a/bindings/c/test/apitester/TesterTestSpec.cpp
+++ b/bindings/c/test/apitester/TesterTestSpec.cpp
@ -0,0 +1,169 @@
+/*
+ * TesterTestSpec.cpp
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TesterTestSpec.h"
+#include "TesterUtil.h"
+#include <toml.hpp>
+#include <fmt/format.h>
+#include <functional>
+
+namespace FdbApiTester {
+
+namespace {
+
+void processIntOption(const std::string& value, const std::string& optionName, int& res, int minVal, int maxVal) {
+	char* endptr;
+	res = strtol(value.c_str(), &endptr, 10);
+	if (*endptr != '\0') {
+		throw TesterError(fmt::format("Invalid test file. Invalid value {} for {}", value, optionName));
+	}
+	if (res < minVal || res > maxVal) {
+		throw TesterError(
+		    fmt::format("Invalid test file. Value for {} must be between {} and {}", optionName, minVal, maxVal));
+	}
+}
+
+std::unordered_map<std::string, std::function<void(const std::string& value, TestSpec* spec)>> testSpecTestKeys = {
+	{ "title",
+	  [](const std::string& value, TestSpec* spec) { //
+	      spec->title = value;
+	  } },
+	{ "apiVersion",
+	  [](const std::string& value, TestSpec* spec) { //
+	      processIntOption(value, "apiVersion", spec->apiVersion, 700, 710);
+	  } },
+	{ "blockOnFutures",
+	  [](const std::string& value, TestSpec* spec) { //
+	      spec->blockOnFutures = (value == "true");
+	  } },
+	{ "buggify",
+	  [](const std::string& value, TestSpec* spec) { //
+	      spec->buggify = (value == "true");
+	  } },
+	{ "multiThreaded",
+	  [](const std::string& value, TestSpec* spec) { //
+	      spec->multiThreaded = (value == "true");
+	  } },
+	{ "fdbCallbacksOnExternalThreads",
+	  [](const std::string& value, TestSpec* spec) { //
+	      spec->fdbCallbacksOnExternalThreads = (value == "true");
+	  } },
+	{ "databasePerTransaction",
+	  [](const std::string& value, TestSpec* spec) { //
+	      spec->databasePerTransaction = (value == "true");
+	  } },
+	{ "minFdbThreads",
+	  [](const std::string& value, TestSpec* spec) { //
+	      processIntOption(value, "minFdbThreads", spec->minFdbThreads, 1, 1000);
+	  } },
+	{ "maxFdbThreads",
+	  [](const std::string& value, TestSpec* spec) { //
+	      processIntOption(value, "maxFdbThreads", spec->maxFdbThreads, 1, 1000);
+	  } },
+	{ "minClientThreads",
+	  [](const std::string& value, TestSpec* spec) { //
+	      processIntOption(value, "minClientThreads", spec->minClientThreads, 1, 1000);
+	  } },
+	{ "maxClientThreads",
+	  [](const std::string& value, TestSpec* spec) { //
+	      processIntOption(value, "maxClientThreads", spec->maxClientThreads, 1, 1000);
+	  } },
+	{ "minDatabases",
+	  [](const std::string& value, TestSpec* spec) { //
+	      processIntOption(value, "minDatabases", spec->minDatabases, 1, 1000);
+	  } },
+	{ "maxDatabases",
+	  [](const std::string& value, TestSpec* spec) { //
+	      processIntOption(value, "maxDatabases", spec->maxDatabases, 1, 1000);
+	  } },
+	{ "minClients",
+	  [](const std::string& value, TestSpec* spec) { //
+	      processIntOption(value, "minClients", spec->minClients, 1, 1000);
+	  } },
+	{ "maxClients",
+	  [](const std::string& value, TestSpec* spec) { //
+	      processIntOption(value, "maxClients", spec->maxClients, 1, 1000);
+	  } }
+};
+
+template <typename T>
+std::string toml_to_string(const T& value) {
+	// TOML formatting converts numbers to strings exactly how they're in the file
+	// and thus, is equivalent to testspec.  However, strings are quoted, so we
+	// must remove the quotes.
+	if (value.type() == toml::value_t::string) {
+		const std::string& formatted = toml::format(value);
+		return formatted.substr(1, formatted.size() - 2);
+	} else {
+		return toml::format(value);
+	}
+}
+
+} // namespace
+
+TestSpec readTomlTestSpec(std::string fileName) {
+	TestSpec spec;
+	WorkloadSpec workloadSpec;
+
+	const toml::value& conf = toml::parse(fileName);
+
+	// Then parse each test
+	const toml::array& tests = toml::find(conf, "test").as_array();
+	if (tests.size() == 0) {
+		throw TesterError("Invalid test file. No [test] section found");
+	} else if (tests.size() > 1) {
+		throw TesterError("Invalid test file. More than one [test] section found");
+	}
+
+	const toml::value& test = tests[0];
+
+	// First handle all test-level settings
+	for (const auto& [k, v] : test.as_table()) {
+		if (k == "workload") {
+			continue;
+		}
+		if (testSpecTestKeys.find(k) != testSpecTestKeys.end()) {
+			testSpecTestKeys[k](toml_to_string(v), &spec);
+		} else {
+			throw TesterError(fmt::format(
+			    "Invalid test file. Unrecognized test parameter. Name: {}, value {}", k, toml_to_string(v)));
+		}
+	}
+
+	// And then copy the workload attributes to spec.options
+	const toml::array& workloads = toml::find(test, "workload").as_array();
+	for (const toml::value& workload : workloads) {
+		workloadSpec = WorkloadSpec();
+		auto& options = workloadSpec.options;
+		for (const auto& [attrib, v] : workload.as_table()) {
+			options[attrib] = toml_to_string(v);
+		}
+		auto itr = options.find("name");
+		if (itr == options.end()) {
+			throw TesterError("Invalid test file. Unspecified workload name.");
+		}
+		workloadSpec.name = itr->second;
+		spec.workloads.push_back(workloadSpec);
+	}
+
+	return spec;
+}
+
+} // namespace FdbApiTester
--- a/bindings/c/test/apitester/TesterTestSpec.h
+++ b/bindings/c/test/apitester/TesterTestSpec.h
@ -0,0 +1,90 @@
+/*
+ * TesterTestSpec.h
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifndef APITESTER_CONFIG_READER_H
+#define APITESTER_CONFIG_READER_H
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#define FDB_API_VERSION 710
+
+namespace FdbApiTester {
+
+/// Workload specification
+struct WorkloadSpec {
+	std::string name;
+	std::unordered_map<std::string, std::string> options;
+};
+
+// Test speficification loaded from a *.toml file
+struct TestSpec {
+	// Title of the test
+	std::string title;
+
+	// FDB API version, using the latest version by default
+	int apiVersion = FDB_API_VERSION;
+
+	// Use blocking waits on futures instead of scheduling callbacks
+	bool blockOnFutures = false;
+
+	// Use multi-threaded FDB client
+	bool multiThreaded = false;
+
+	// Enable injection of errors in FDB client
+	bool buggify = false;
+
+	// Execute future callbacks on the threads of the external FDB library
+	// rather than on the main thread of the local FDB client library
+	bool fdbCallbacksOnExternalThreads = false;
+
+	// Execute each transaction in a separate database instance
+	bool databasePerTransaction = false;
+
+	// Size of the FDB client thread pool (a random number in the [min,max] range)
+	int minFdbThreads = 1;
+	int maxFdbThreads = 1;
+
+	// Size of the thread pool for test workloads (a random number in the [min,max] range)
+	int minClientThreads = 1;
+	int maxClientThreads = 1;
+
+	// Size of the database instance pool (a random number in the [min,max] range)
+	// Each transaction is assigned randomly to one of the databases in the pool
+	int minDatabases = 1;
+	int maxDatabases = 1;
+
+	// Number of workload clients (a random number in the [min,max] range)
+	int minClients = 1;
+	int maxClients = 10;
+
+	// List of workloads with their options
+	std::vector<WorkloadSpec> workloads;
+};
+
+// Read the test specfication from a *.toml file
+TestSpec readTomlTestSpec(std::string fileName);
+
+} // namespace FdbApiTester
+
+#endif
--- a/bindings/c/test/apitester/TesterTransactionExecutor.cpp
+++ b/bindings/c/test/apitester/TesterTransactionExecutor.cpp
@ -0,0 +1,471 @@
+/*
+ * TesterTransactionExecutor.cpp
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TesterTransactionExecutor.h"
+#include "TesterUtil.h"
+#include "test/apitester/TesterScheduler.h"
+#include <memory>
+#include <unordered_map>
+#include <mutex>
+#include <atomic>
+#include <chrono>
+#include <thread>
+#include <fmt/format.h>
+
+namespace FdbApiTester {
+
+void TransactionActorBase::complete(fdb_error_t err) {
+	error = err;
+	context = {};
+}
+
+void ITransactionContext::continueAfterAll(std::vector<Future> futures, TTaskFct cont) {
+	auto counter = std::make_shared<std::atomic<int>>(futures.size());
+	auto errorCode = std::make_shared<std::atomic<fdb_error_t>>(error_code_success);
+	auto thisPtr = shared_from_this();
+	for (auto& f : futures) {
+		continueAfter(
+		    f,
+		    [thisPtr, f, counter, errorCode, cont]() {
+			    if (f.getError() != error_code_success) {
+				    (*errorCode) = f.getError();
+			    }
+			    if (--(*counter) == 0) {
+				    if (*errorCode == error_code_success) {
+					    // all futures successful -> continue
+					    cont();
+				    } else {
+					    // at least one future failed -> retry the transaction
+					    thisPtr->onError(*errorCode);
+				    }
+			    }
+		    },
+		    false);
+	}
+}
+
+/**
+ * Transaction context base class, containing reusable functionality
+ */
+class TransactionContextBase : public ITransactionContext {
+public:
+	TransactionContextBase(FDBTransaction* tx,
+	                       std::shared_ptr<ITransactionActor> txActor,
+	                       TTaskFct cont,
+	                       IScheduler* scheduler)
+	  : fdbTx(tx), txActor(txActor), contAfterDone(cont), scheduler(scheduler), txState(TxState::IN_PROGRESS) {}
+
+	// A state machine:
+	// IN_PROGRESS -> (ON_ERROR -> IN_PROGRESS)* [-> ON_ERROR] -> DONE
+	enum class TxState { IN_PROGRESS, ON_ERROR, DONE };
+
+	Transaction* tx() override { return &fdbTx; }
+
+	// Set a continuation to be executed when a future gets ready
+	void continueAfter(Future f, TTaskFct cont, bool retryOnError) override { doContinueAfter(f, cont, retryOnError); }
+
+	// Complete the transaction with a commit
+	void commit() override {
+		std::unique_lock<std::mutex> lock(mutex);
+		if (txState != TxState::IN_PROGRESS) {
+			return;
+		}
+		lock.unlock();
+		Future f = fdbTx.commit();
+		auto thisRef = shared_from_this();
+		doContinueAfter(
+		    f, [thisRef]() { thisRef->done(); }, true);
+	}
+
+	// Complete the transaction without a commit (for read transactions)
+	void done() override {
+		std::unique_lock<std::mutex> lock(mutex);
+		if (txState != TxState::IN_PROGRESS) {
+			return;
+		}
+		txState = TxState::DONE;
+		lock.unlock();
+		// cancel transaction so that any pending operations on it
+		// fail gracefully
+		fdbTx.cancel();
+		txActor->complete(error_code_success);
+		cleanUp();
+		contAfterDone();
+	}
+
+protected:
+	virtual void doContinueAfter(Future f, TTaskFct cont, bool retryOnError) = 0;
+
+	// Clean up transaction state after completing the transaction
+	// Note that the object may live longer, because it is referenced
+	// by not yet triggered callbacks
+	virtual void cleanUp() {
+		ASSERT(txState == TxState::DONE);
+		ASSERT(!onErrorFuture);
+		txActor = {};
+	}
+
+	// Complete the transaction with an (unretriable) error
+	void transactionFailed(fdb_error_t err) {
+		ASSERT(err != error_code_success);
+		std::unique_lock<std::mutex> lock(mutex);
+		if (txState == TxState::DONE) {
+			return;
+		}
+		txState = TxState::DONE;
+		lock.unlock();
+		txActor->complete(err);
+		cleanUp();
+		contAfterDone();
+	}
+
+	// Handle result of an a transaction onError call
+	void handleOnErrorResult() {
+		ASSERT(txState == TxState::ON_ERROR);
+		fdb_error_t err = onErrorFuture.getError();
+		onErrorFuture = {};
+		if (err) {
+			transactionFailed(err);
+		} else {
+			std::unique_lock<std::mutex> lock(mutex);
+			txState = TxState::IN_PROGRESS;
+			lock.unlock();
+			txActor->start();
+		}
+	}
+
+	// FDB transaction
+	Transaction fdbTx;
+
+	// Actor implementing the transaction worklflow
+	std::shared_ptr<ITransactionActor> txActor;
+
+	// Mutex protecting access to shared mutable state
+	std::mutex mutex;
+
+	// Continuation to be called after completion of the transaction
+	TTaskFct contAfterDone;
+
+	// Reference to the scheduler
+	IScheduler* scheduler;
+
+	// Transaction execution state
+	TxState txState;
+
+	// onError future used in ON_ERROR state
+	Future onErrorFuture;
+};
+
+/**
+ *  Transaction context using blocking waits to implement continuations on futures
+ */
+class BlockingTransactionContext : public TransactionContextBase {
+public:
+	BlockingTransactionContext(FDBTransaction* tx,
+	                           std::shared_ptr<ITransactionActor> txActor,
+	                           TTaskFct cont,
+	                           IScheduler* scheduler)
+	  : TransactionContextBase(tx, txActor, cont, scheduler) {}
+
+protected:
+	void doContinueAfter(Future f, TTaskFct cont, bool retryOnError) override {
+		auto thisRef = std::static_pointer_cast<BlockingTransactionContext>(shared_from_this());
+		scheduler->schedule(
+		    [thisRef, f, cont, retryOnError]() mutable { thisRef->blockingContinueAfter(f, cont, retryOnError); });
+	}
+
+	void blockingContinueAfter(Future f, TTaskFct cont, bool retryOnError) {
+		std::unique_lock<std::mutex> lock(mutex);
+		if (txState != TxState::IN_PROGRESS) {
+			return;
+		}
+		lock.unlock();
+		fdb_error_t err = fdb_future_block_until_ready(f.fdbFuture());
+		if (err) {
+			transactionFailed(err);
+			return;
+		}
+		err = f.getError();
+		if (err == error_code_transaction_cancelled) {
+			return;
+		}
+		if (err == error_code_success || !retryOnError) {
+			scheduler->schedule([cont]() { cont(); });
+			return;
+		}
+
+		onError(err);
+	}
+
+	virtual void onError(fdb_error_t err) override {
+		std::unique_lock<std::mutex> lock(mutex);
+		if (txState != TxState::IN_PROGRESS) {
+			// Ignore further errors, if the transaction is in the error handing mode or completed
+			return;
+		}
+		txState = TxState::ON_ERROR;
+		lock.unlock();
+
+		ASSERT(!onErrorFuture);
+		onErrorFuture = fdbTx.onError(err);
+		fdb_error_t err2 = fdb_future_block_until_ready(onErrorFuture.fdbFuture());
+		if (err2) {
+			transactionFailed(err2);
+			return;
+		}
+		auto thisRef = std::static_pointer_cast<BlockingTransactionContext>(shared_from_this());
+		scheduler->schedule([thisRef]() { thisRef->handleOnErrorResult(); });
+	}
+};
+
+/**
+ *  Transaction context using callbacks to implement continuations on futures
+ */
+class AsyncTransactionContext : public TransactionContextBase {
+public:
+	AsyncTransactionContext(FDBTransaction* tx,
+	                        std::shared_ptr<ITransactionActor> txActor,
+	                        TTaskFct cont,
+	                        IScheduler* scheduler)
+	  : TransactionContextBase(tx, txActor, cont, scheduler) {}
+
+protected:
+	void doContinueAfter(Future f, TTaskFct cont, bool retryOnError) override {
+		std::unique_lock<std::mutex> lock(mutex);
+		if (txState != TxState::IN_PROGRESS) {
+			return;
+		}
+		callbackMap[f.fdbFuture()] = CallbackInfo{ f, cont, shared_from_this(), retryOnError };
+		lock.unlock();
+		fdb_error_t err = fdb_future_set_callback(f.fdbFuture(), futureReadyCallback, this);
+		if (err) {
+			lock.lock();
+			callbackMap.erase(f.fdbFuture());
+			lock.unlock();
+			transactionFailed(err);
+		}
+	}
+
+	static void futureReadyCallback(FDBFuture* f, void* param) {
+		AsyncTransactionContext* txCtx = (AsyncTransactionContext*)param;
+		txCtx->onFutureReady(f);
+	}
+
+	void onFutureReady(FDBFuture* f) {
+		injectRandomSleep();
+		// Hold a reference to this to avoid it to be
+		// destroyed before releasing the mutex
+		auto thisRef = shared_from_this();
+		std::unique_lock<std::mutex> lock(mutex);
+		auto iter = callbackMap.find(f);
+		ASSERT(iter != callbackMap.end());
+		CallbackInfo cbInfo = iter->second;
+		callbackMap.erase(iter);
+		if (txState != TxState::IN_PROGRESS) {
+			return;
+		}
+		lock.unlock();
+		fdb_error_t err = fdb_future_get_error(f);
+		if (err == error_code_transaction_cancelled) {
+			return;
+		}
+		if (err == error_code_success || !cbInfo.retryOnError) {
+			scheduler->schedule(cbInfo.cont);
+			return;
+		}
+		onError(err);
+	}
+
+	virtual void onError(fdb_error_t err) override {
+		std::unique_lock<std::mutex> lock(mutex);
+		if (txState != TxState::IN_PROGRESS) {
+			// Ignore further errors, if the transaction is in the error handing mode or completed
+			return;
+		}
+		txState = TxState::ON_ERROR;
+		lock.unlock();
+
+		ASSERT(!onErrorFuture);
+		onErrorFuture = tx()->onError(err);
+		onErrorThisRef = std::static_pointer_cast<AsyncTransactionContext>(shared_from_this());
+		fdb_error_t err2 = fdb_future_set_callback(onErrorFuture.fdbFuture(), onErrorReadyCallback, this);
+		if (err2) {
+			onErrorFuture = {};
+			transactionFailed(err2);
+		}
+	}
+
+	static void onErrorReadyCallback(FDBFuture* f, void* param) {
+		AsyncTransactionContext* txCtx = (AsyncTransactionContext*)param;
+		txCtx->onErrorReady(f);
+	}
+
+	void onErrorReady(FDBFuture* f) {
+		injectRandomSleep();
+		auto thisRef = onErrorThisRef;
+		onErrorThisRef = {};
+		scheduler->schedule([thisRef]() { thisRef->handleOnErrorResult(); });
+	}
+
+	void cleanUp() override {
+		TransactionContextBase::cleanUp();
+
+		// Cancel all pending operations
+		// Note that the callbacks of the cancelled futures will still be called
+		std::unique_lock<std::mutex> lock(mutex);
+		std::vector<Future> futures;
+		for (auto& iter : callbackMap) {
+			futures.push_back(iter.second.future);
+		}
+		lock.unlock();
+		for (auto& f : futures) {
+			f.cancel();
+		}
+	}
+
+	// Inject a random sleep with a low probability
+	void injectRandomSleep() {
+		if (Random::get().randomBool(0.01)) {
+			std::this_thread::sleep_for(std::chrono::milliseconds(Random::get().randomInt(1, 5)));
+		}
+	}
+
+	// Object references for a future callback
+	struct CallbackInfo {
+		Future future;
+		TTaskFct cont;
+		std::shared_ptr<ITransactionContext> thisRef;
+		bool retryOnError;
+	};
+
+	// Map for keeping track of future waits and holding necessary object references
+	std::unordered_map<FDBFuture*, CallbackInfo> callbackMap;
+
+	// Holding reference to this for onError future C callback
+	std::shared_ptr<AsyncTransactionContext> onErrorThisRef;
+};
+
+/**
+ * Transaction executor base class, containing reusable functionality
+ */
+class TransactionExecutorBase : public ITransactionExecutor {
+public:
+	TransactionExecutorBase(const TransactionExecutorOptions& options) : options(options), scheduler(nullptr) {}
+
+	void init(IScheduler* scheduler, const char* clusterFile) override {
+		this->scheduler = scheduler;
+		this->clusterFile = clusterFile;
+	}
+
+protected:
+	// Execute the transaction on the given database instance
+	void executeOnDatabase(FDBDatabase* db, std::shared_ptr<ITransactionActor> txActor, TTaskFct cont) {
+		FDBTransaction* tx;
+		fdb_error_t err = fdb_database_create_transaction(db, &tx);
+		if (err != error_code_success) {
+			txActor->complete(err);
+			cont();
+		} else {
+			std::shared_ptr<ITransactionContext> ctx;
+			if (options.blockOnFutures) {
+				ctx = std::make_shared<BlockingTransactionContext>(tx, txActor, cont, scheduler);
+			} else {
+				ctx = std::make_shared<AsyncTransactionContext>(tx, txActor, cont, scheduler);
+			}
+			txActor->init(ctx);
+			txActor->start();
+		}
+	}
+
+protected:
+	TransactionExecutorOptions options;
+	std::string clusterFile;
+	IScheduler* scheduler;
+};
+
+/**
+ * Transaction executor load balancing transactions over a fixed pool of databases
+ */
+class DBPoolTransactionExecutor : public TransactionExecutorBase {
+public:
+	DBPoolTransactionExecutor(const TransactionExecutorOptions& options) : TransactionExecutorBase(options) {}
+
+	~DBPoolTransactionExecutor() override { release(); }
+
+	void init(IScheduler* scheduler, const char* clusterFile) override {
+		TransactionExecutorBase::init(scheduler, clusterFile);
+		for (int i = 0; i < options.numDatabases; i++) {
+			FDBDatabase* db;
+			fdb_error_t err = fdb_create_database(clusterFile, &db);
+			if (err != error_code_success) {
+				throw TesterError(fmt::format("Failed create database with the cluster file '{}'. Error: {}({})",
+				                              clusterFile,
+				                              err,
+				                              fdb_get_error(err)));
+			}
+			databases.push_back(db);
+		}
+	}
+
+	void execute(std::shared_ptr<ITransactionActor> txActor, TTaskFct cont) override {
+		int idx = Random::get().randomInt(0, options.numDatabases - 1);
+		executeOnDatabase(databases[idx], txActor, cont);
+	}
+
+	void release() {
+		for (FDBDatabase* db : databases) {
+			fdb_database_destroy(db);
+		}
+	}
+
+private:
+	std::vector<FDBDatabase*> databases;
+};
+
+/**
+ * Transaction executor executing each transaction on a separate database
+ */
+class DBPerTransactionExecutor : public TransactionExecutorBase {
+public:
+	DBPerTransactionExecutor(const TransactionExecutorOptions& options) : TransactionExecutorBase(options) {}
+
+	void execute(std::shared_ptr<ITransactionActor> txActor, TTaskFct cont) override {
+		FDBDatabase* db = nullptr;
+		fdb_error_t err = fdb_create_database(clusterFile.c_str(), &db);
+		if (err != error_code_success) {
+			txActor->complete(err);
+			cont();
+		}
+		executeOnDatabase(db, txActor, [cont, db]() {
+			fdb_database_destroy(db);
+			cont();
+		});
+	}
+};
+
+std::unique_ptr<ITransactionExecutor> createTransactionExecutor(const TransactionExecutorOptions& options) {
+	if (options.databasePerTransaction) {
+		return std::make_unique<DBPerTransactionExecutor>(options);
+	} else {
+		return std::make_unique<DBPoolTransactionExecutor>(options);
+	}
+}
+
+} // namespace FdbApiTester
--- a/bindings/c/test/apitester/TesterTransactionExecutor.h
+++ b/bindings/c/test/apitester/TesterTransactionExecutor.h
@ -0,0 +1,145 @@
+/*
+ * TesterTransactionExecutor.h
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifndef APITESTER_TRANSACTION_EXECUTOR_H
+#define APITESTER_TRANSACTION_EXECUTOR_H
+
+#include "TesterOptions.h"
+#include "TesterApiWrapper.h"
+#include "TesterScheduler.h"
+#include <string_view>
+#include <memory>
+
+namespace FdbApiTester {
+
+/**
+ * Interface to be used for implementation of a concrete transaction
+ */
+class ITransactionContext : public std::enable_shared_from_this<ITransactionContext> {
+public:
+	virtual ~ITransactionContext() {}
+
+	// Current FDB transaction
+	virtual Transaction* tx() = 0;
+
+	// Schedule a continuation to be executed when the future gets ready
+	// retryOnError controls whether transaction is retried in case of an error instead
+	// of calling the continuation
+	virtual void continueAfter(Future f, TTaskFct cont, bool retryOnError = true) = 0;
+
+	// Complete the transaction with a commit
+	virtual void commit() = 0;
+
+	// retry transaction on error
+	virtual void onError(fdb_error_t err) = 0;
+
+	// Mark the transaction as completed without committing it (for read transactions)
+	virtual void done() = 0;
+
+	// A continuation to be executed when all of the given futures get ready
+	virtual void continueAfterAll(std::vector<Future> futures, TTaskFct cont);
+};
+
+/**
+ * Interface of an actor object implementing a concrete transaction
+ */
+class ITransactionActor {
+public:
+	virtual ~ITransactionActor() {}
+
+	// Initialize with the given transaction context
+	virtual void init(std::shared_ptr<ITransactionContext> ctx) = 0;
+
+	// Start execution of the transaction, also called on retries
+	virtual void start() = 0;
+
+	// Transaction completion result (error_code_success in case of success)
+	virtual fdb_error_t getErrorCode() = 0;
+
+	// Notification about the completion of the transaction
+	virtual void complete(fdb_error_t err) = 0;
+};
+
+/**
+ * A helper base class for transaction actors
+ */
+class TransactionActorBase : public ITransactionActor {
+public:
+	void init(std::shared_ptr<ITransactionContext> ctx) override { context = ctx; }
+	fdb_error_t getErrorCode() override { return error; }
+	void complete(fdb_error_t err) override;
+
+protected:
+	std::shared_ptr<ITransactionContext> ctx() { return context; }
+
+private:
+	std::shared_ptr<ITransactionContext> context;
+	fdb_error_t error = error_code_success;
+};
+
+// Type of the lambda functions implementing a transaction
+using TTxStartFct = std::function<void(std::shared_ptr<ITransactionContext>)>;
+
+/**
+ * A wrapper class for transactions implemented by lambda functions
+ */
+class TransactionFct : public TransactionActorBase {
+public:
+	TransactionFct(TTxStartFct startFct) : startFct(startFct) {}
+	void start() override { startFct(this->ctx()); }
+
+private:
+	TTxStartFct startFct;
+};
+
+/**
+ * Configuration of transaction execution mode
+ */
+struct TransactionExecutorOptions {
+	// Use blocking waits on futures
+	bool blockOnFutures = false;
+
+	// Create each transaction in a separate database instance
+	bool databasePerTransaction = false;
+
+	// The size of the database instance pool
+	int numDatabases = 1;
+};
+
+/**
+ * Transaction executor provides an interface for executing transactions
+ * It is responsible for instantiating FDB databases and transactions and managing their lifecycle
+ * according to the provided options
+ */
+class ITransactionExecutor {
+public:
+	virtual ~ITransactionExecutor() {}
+	virtual void init(IScheduler* sched, const char* clusterFile) = 0;
+	virtual void execute(std::shared_ptr<ITransactionActor> tx, TTaskFct cont) = 0;
+};
+
+// Create a transaction executor for the given options
+std::unique_ptr<ITransactionExecutor> createTransactionExecutor(const TransactionExecutorOptions& options);
+
+} // namespace FdbApiTester
+
+#endif
--- a/bindings/c/test/apitester/TesterUtil.cpp
+++ b/bindings/c/test/apitester/TesterUtil.cpp
@ -0,0 +1,58 @@
+/*
+ * TesterUtil.cpp
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TesterUtil.h"
+#include <cstdio>
+
+namespace FdbApiTester {
+
+Random::Random() {
+	std::random_device dev;
+	random.seed(dev());
+}
+
+int Random::randomInt(int min, int max) {
+	return std::uniform_int_distribution<int>(min, max)(random);
+}
+
+Random& Random::get() {
+	static thread_local Random random;
+	return random;
+}
+
+std::string Random::randomStringLowerCase(int minLength, int maxLength) {
+	int length = randomInt(minLength, maxLength);
+	std::string str;
+	str.reserve(length);
+	for (int i = 0; i < length; i++) {
+		str += (char)randomInt('a', 'z');
+	}
+	return str;
+}
+
+bool Random::randomBool(double trueRatio) {
+	return std::uniform_real_distribution<double>(0.0, 1.0)(random) <= trueRatio;
+}
+
+void print_internal_error(const char* msg, const char* file, int line) {
+	fprintf(stderr, "Assertion %s failed @ %s %d:\n", msg, file, line);
+}
+
+} // namespace FdbApiTester
--- a/bindings/c/test/apitester/TesterUtil.h
+++ b/bindings/c/test/apitester/TesterUtil.h
@ -0,0 +1,87 @@
+/*
+ * TesterUtil.h
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifndef APITESTER_UTIL_H
+#define APITESTER_UTIL_H
+
+#include <random>
+#include <ostream>
+#include <optional>
+#include <fmt/format.h>
+
+namespace fmt {
+
+template <typename T>
+struct formatter<std::optional<T>> : fmt::formatter<T> {
+
+	template <typename FormatContext>
+	auto format(const std::optional<T>& opt, FormatContext& ctx) {
+		if (opt) {
+			fmt::formatter<T>::format(*opt, ctx);
+			return ctx.out();
+		}
+		return fmt::format_to(ctx.out(), "<empty>");
+	}
+};
+
+} // namespace fmt
+
+namespace FdbApiTester {
+
+class Random {
+public:
+	Random();
+
+	static Random& get();
+
+	int randomInt(int min, int max);
+
+	std::string randomStringLowerCase(int minLength, int maxLength);
+
+	bool randomBool(double trueRatio);
+
+	std::mt19937 random;
+};
+
+class TesterError : public std::runtime_error {
+public:
+	explicit TesterError(const char* message) : std::runtime_error(message) {}
+	explicit TesterError(const std::string& message) : std::runtime_error(message) {}
+	TesterError(const TesterError&) = default;
+	TesterError& operator=(const TesterError&) = default;
+	TesterError(TesterError&&) = default;
+	TesterError& operator=(TesterError&&) = default;
+};
+
+void print_internal_error(const char* msg, const char* file, int line);
+
+#define ASSERT(condition)                                                                                              \
+	do {                                                                                                               \
+		if (!(condition)) {                                                                                            \
+			print_internal_error(#condition, __FILE__, __LINE__);                                                      \
+			abort();                                                                                                   \
+		}                                                                                                              \
+	} while (false) // For use in destructors, where throwing exceptions is extremely dangerous
+
+} // namespace FdbApiTester
+
+#endif
--- a/bindings/c/test/apitester/TesterWorkload.cpp
+++ b/bindings/c/test/apitester/TesterWorkload.cpp
@ -0,0 +1,184 @@
+/*
+ * TesterWorkload.cpp
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TesterWorkload.h"
+#include "TesterUtil.h"
+#include "test/apitester/TesterScheduler.h"
+#include <cstdlib>
+#include <memory>
+#include <fmt/format.h>
+#include <vector>
+
+namespace FdbApiTester {
+
+int WorkloadConfig::getIntOption(const std::string& name, int defaultVal) const {
+	auto iter = options.find(name);
+	if (iter == options.end()) {
+		return defaultVal;
+	} else {
+		char* endptr;
+		int intVal = strtol(iter->second.c_str(), &endptr, 10);
+		if (*endptr != '\0') {
+			throw TesterError(
+			    fmt::format("Invalid workload configuration. Invalid value {} for {}", iter->second, name));
+		}
+		return intVal;
+	}
+}
+
+double WorkloadConfig::getFloatOption(const std::string& name, double defaultVal) const {
+	auto iter = options.find(name);
+	if (iter == options.end()) {
+		return defaultVal;
+	} else {
+		char* endptr;
+		double floatVal = strtod(iter->second.c_str(), &endptr);
+		if (*endptr != '\0') {
+			throw TesterError(
+			    fmt::format("Invalid workload configuration. Invalid value {} for {}", iter->second, name));
+		}
+		return floatVal;
+	}
+}
+
+WorkloadBase::WorkloadBase(const WorkloadConfig& config)
+  : manager(nullptr), tasksScheduled(0), numErrors(0), clientId(config.clientId), numClients(config.numClients),
+    failed(false) {
+	maxErrors = config.getIntOption("maxErrors", 10);
+	workloadId = fmt::format("{}{}", config.name, clientId);
+}
+
+void WorkloadBase::init(WorkloadManager* manager) {
+	this->manager = manager;
+}
+
+void WorkloadBase::schedule(TTaskFct task) {
+	if (failed) {
+		return;
+	}
+	tasksScheduled++;
+	manager->scheduler->schedule([this, task]() {
+		task();
+		scheduledTaskDone();
+	});
+}
+
+void WorkloadBase::execTransaction(std::shared_ptr<ITransactionActor> tx, TTaskFct cont, bool failOnError) {
+	if (failed) {
+		return;
+	}
+	tasksScheduled++;
+	manager->txExecutor->execute(tx, [this, tx, cont, failOnError]() {
+		fdb_error_t err = tx->getErrorCode();
+		if (tx->getErrorCode() == error_code_success) {
+			cont();
+		} else {
+			std::string msg = fmt::format("Transaction failed with error: {} ({}})", err, fdb_get_error(err));
+			if (failOnError) {
+				error(msg);
+				failed = true;
+			} else {
+				info(msg);
+				cont();
+			}
+		}
+		scheduledTaskDone();
+	});
+}
+
+void WorkloadBase::info(const std::string& msg) {
+	fmt::print(stderr, "[{}] {}\n", workloadId, msg);
+}
+
+void WorkloadBase::error(const std::string& msg) {
+	fmt::print(stderr, "[{}] ERROR: {}\n", workloadId, msg);
+	numErrors++;
+	if (numErrors > maxErrors && !failed) {
+		fmt::print(stderr, "[{}] ERROR: Stopping workload after {} errors\n", workloadId, numErrors);
+		failed = true;
+	}
+}
+
+void WorkloadBase::scheduledTaskDone() {
+	if (--tasksScheduled == 0) {
+		if (numErrors > 0) {
+			error(fmt::format("Workload failed with {} errors", numErrors.load()));
+		} else {
+			info("Workload successfully completed");
+		}
+		manager->workloadDone(this, numErrors > 0);
+	}
+}
+
+void WorkloadManager::add(std::shared_ptr<IWorkload> workload, TTaskFct cont) {
+	std::unique_lock<std::mutex> lock(mutex);
+	workloads[workload.get()] = WorkloadInfo{ workload, cont };
+}
+
+void WorkloadManager::run() {
+	std::vector<std::shared_ptr<IWorkload>> initialWorkloads;
+	for (auto iter : workloads) {
+		initialWorkloads.push_back(iter.second.ref);
+	}
+	for (auto iter : initialWorkloads) {
+		iter->init(this);
+	}
+	for (auto iter : initialWorkloads) {
+		iter->start();
+	}
+	scheduler->join();
+	if (failed()) {
+		fmt::print(stderr, "{} workloads failed\n", numWorkloadsFailed);
+	} else {
+		fprintf(stderr, "All workloads succesfully completed\n");
+	}
+}
+
+void WorkloadManager::workloadDone(IWorkload* workload, bool failed) {
+	std::unique_lock<std::mutex> lock(mutex);
+	auto iter = workloads.find(workload);
+	ASSERT(iter != workloads.end());
+	lock.unlock();
+	iter->second.cont();
+	lock.lock();
+	workloads.erase(iter);
+	if (failed) {
+		numWorkloadsFailed++;
+	}
+	bool done = workloads.empty();
+	lock.unlock();
+	if (done) {
+		scheduler->stop();
+	}
+}
+
+std::shared_ptr<IWorkload> IWorkloadFactory::create(std::string const& name, const WorkloadConfig& config) {
+	auto it = factories().find(name);
+	if (it == factories().end())
+		return {}; // or throw?
+	return it->second->create(config);
+}
+
+std::unordered_map<std::string, IWorkloadFactory*>& IWorkloadFactory::factories() {
+	static std::unordered_map<std::string, IWorkloadFactory*> theFactories;
+	return theFactories;
+}
+
+} // namespace FdbApiTester
--- a/bindings/c/test/apitester/TesterWorkload.h
+++ b/bindings/c/test/apitester/TesterWorkload.h
@ -0,0 +1,205 @@
+/*
+ * TesterWorkload.h
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <memory>
+#ifndef APITESTER_WORKLOAD_H
+#define APITESTER_WORKLOAD_H
+
+#include "TesterTransactionExecutor.h"
+#include "TesterUtil.h"
+#include <atomic>
+#include <unordered_map>
+#include <mutex>
+
+namespace FdbApiTester {
+
+class WorkloadManager;
+
+// Workoad interface
+class IWorkload {
+public:
+	virtual ~IWorkload() {}
+
+	// Intialize the workload
+	virtual void init(WorkloadManager* manager) = 0;
+
+	// Start executing the workload
+	virtual void start() = 0;
+};
+
+// Workload configuration
+struct WorkloadConfig {
+	// Workoad name
+	std::string name;
+
+	// Client ID assigned to the workload (a number from 0 to numClients-1)
+	int clientId;
+
+	// Total number of clients
+	int numClients;
+
+	// Workload options: as key-value pairs
+	std::unordered_map<std::string, std::string> options;
+
+	// Get option of a certain type by name. Throws an exception if the values is of a wrong type
+	int getIntOption(const std::string& name, int defaultVal) const;
+	double getFloatOption(const std::string& name, double defaultVal) const;
+};
+
+// A base class for test workloads
+// Tracks if workload is active, notifies the workload manager when the workload completes
+class WorkloadBase : public IWorkload {
+public:
+	WorkloadBase(const WorkloadConfig& config);
+
+	// Initialize the workload
+	void init(WorkloadManager* manager) override;
+
+protected:
+	// Schedule the a task as a part of the workload
+	void schedule(TTaskFct task);
+
+	// Execute a transaction within the workload
+	void execTransaction(std::shared_ptr<ITransactionActor> tx, TTaskFct cont, bool failOnError = true);
+
+	// Execute a transaction within the workload, a convenience method for a tranasaction defined by a lambda function
+	void execTransaction(TTxStartFct start, TTaskFct cont, bool failOnError = true) {
+		execTransaction(std::make_shared<TransactionFct>(start), cont, failOnError);
+	}
+
+	// Log an error message, increase error counter
+	void error(const std::string& msg);
+
+	// Log an info message
+	void info(const std::string& msg);
+
+private:
+	WorkloadManager* manager;
+
+	// Decrease scheduled task counter, notify the workload manager
+	// that the task is done if no more tasks schedule
+	void scheduledTaskDone();
+
+	// Keep track of tasks scheduled by the workload
+	// End workload when this number falls to 0
+	std::atomic<int> tasksScheduled;
+
+	// Number of errors logged
+	std::atomic<int> numErrors;
+
+protected:
+	// Client ID assigned to the workload (a number from 0 to numClients-1)
+	int clientId;
+
+	// Total number of clients
+	int numClients;
+
+	// The maximum number of errors before stoppoing the workload
+	int maxErrors;
+
+	// Workload identifier, consisting of workload name and client ID
+	std::string workloadId;
+
+	// Workload is failed, no further transactions or continuations will be scheduled by the workload
+	std::atomic<bool> failed;
+};
+
+// Workload manager
+// Keeps track of active workoads, stops the scheduler after all workloads complete
+class WorkloadManager {
+public:
+	WorkloadManager(ITransactionExecutor* txExecutor, IScheduler* scheduler)
+	  : txExecutor(txExecutor), scheduler(scheduler), numWorkloadsFailed(0) {}
+
+	// Add a workload
+	// A continuation is to be specified for subworkloads
+	void add(std::shared_ptr<IWorkload> workload, TTaskFct cont = NO_OP_TASK);
+
+	// Run all workloads. Blocks until all workloads complete
+	void run();
+
+	// True if at least one workload has failed
+	bool failed() {
+		std::unique_lock<std::mutex> lock(mutex);
+		return numWorkloadsFailed > 0;
+	}
+
+private:
+	friend WorkloadBase;
+
+	// Info about a running workload
+	struct WorkloadInfo {
+		// Reference to the workoad for ownership
+		std::shared_ptr<IWorkload> ref;
+		// Continuation to be executed after completing the workload
+		TTaskFct cont;
+	};
+
+	// To be called by a workload to notify that it is done
+	void workloadDone(IWorkload* workload, bool failed);
+
+	// Transaction executor to be used by the workloads
+	ITransactionExecutor* txExecutor;
+
+	// A scheduler to be used by the workloads
+	IScheduler* scheduler;
+
+	// Mutex protects access to workloads & numWorkloadsFailed
+	std::mutex mutex;
+
+	// A map of currently running workloads
+	std::unordered_map<IWorkload*, WorkloadInfo> workloads;
+
+	// Number of workloads failed
+	int numWorkloadsFailed;
+};
+
+// A workload factory
+struct IWorkloadFactory {
+	// create a workload by name
+	static std::shared_ptr<IWorkload> create(std::string const& name, const WorkloadConfig& config);
+
+	// a singleton registry of workload factories
+	static std::unordered_map<std::string, IWorkloadFactory*>& factories();
+
+	// Interface to be implemented by a workload factory
+	virtual ~IWorkloadFactory() = default;
+	virtual std::shared_ptr<IWorkload> create(const WorkloadConfig& config) = 0;
+};
+
+/**
+ * A template for a workload factory for creating workloads of a certain type
+ *
+ * Declare a global instance of the factory for a workload type as follows:
+ * WorkloadFactory<MyWorkload> MyWorkloadFactory("myWorkload");
+ */
+template <class WorkloadType>
+struct WorkloadFactory : IWorkloadFactory {
+	WorkloadFactory(const char* name) { factories()[name] = this; }
+	std::shared_ptr<IWorkload> create(const WorkloadConfig& config) override {
+		return std::make_shared<WorkloadType>(config);
+	}
+};
+
+} // namespace FdbApiTester
+
+#endif
--- a/bindings/c/test/apitester/fdb_c_api_tester.cpp
+++ b/bindings/c/test/apitester/fdb_c_api_tester.cpp
@ -0,0 +1,284 @@
+/*
+ * fdb_c_api_tester.cpp
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TesterOptions.h"
+#include "TesterWorkload.h"
+#include "TesterScheduler.h"
+#include "TesterTransactionExecutor.h"
+#include "TesterTestSpec.h"
+#include "TesterUtil.h"
+#include "flow/SimpleOpt.h"
+#include "bindings/c/foundationdb/fdb_c.h"
+
+#include <memory>
+#include <stdexcept>
+#include <thread>
+#include <fmt/format.h>
+
+namespace FdbApiTester {
+
+namespace {
+
+enum TesterOptionId {
+	OPT_CONNFILE,
+	OPT_HELP,
+	OPT_TRACE,
+	OPT_TRACE_DIR,
+	OPT_LOGGROUP,
+	OPT_TRACE_FORMAT,
+	OPT_KNOB,
+	OPT_EXTERNAL_CLIENT_LIBRARY,
+	OPT_TEST_FILE
+};
+
+CSimpleOpt::SOption TesterOptionDefs[] = //
+    { { OPT_CONNFILE, "-C", SO_REQ_SEP },
+	  { OPT_CONNFILE, "--cluster-file", SO_REQ_SEP },
+	  { OPT_TRACE, "--log", SO_NONE },
+	  { OPT_TRACE_DIR, "--log-dir", SO_REQ_SEP },
+	  { OPT_LOGGROUP, "--log-group", SO_REQ_SEP },
+	  { OPT_HELP, "-h", SO_NONE },
+	  { OPT_HELP, "--help", SO_NONE },
+	  { OPT_TRACE_FORMAT, "--trace-format", SO_REQ_SEP },
+	  { OPT_KNOB, "--knob-", SO_REQ_SEP },
+	  { OPT_EXTERNAL_CLIENT_LIBRARY, "--external-client-library", SO_REQ_SEP },
+	  { OPT_TEST_FILE, "-f", SO_REQ_SEP },
+	  { OPT_TEST_FILE, "--test-file", SO_REQ_SEP },
+	  SO_END_OF_OPTIONS };
+
+void printProgramUsage(const char* execName) {
+	printf("usage: %s [OPTIONS]\n"
+	       "\n",
+	       execName);
+	printf("  -C, --cluster-file FILE\n"
+	       "                 The path of a file containing the connection string for the\n"
+	       "                 FoundationDB cluster. The default is `fdb.cluster'\n"
+	       "  --log          Enables trace file logging for the CLI session.\n"
+	       "  --log-dir PATH Specifes the output directory for trace files. If\n"
+	       "                 unspecified, defaults to the current directory. Has\n"
+	       "                 no effect unless --log is specified.\n"
+	       "  --log-group LOG_GROUP\n"
+	       "                 Sets the LogGroup field with the specified value for all\n"
+	       "                 events in the trace output (defaults to `default').\n"
+	       "  --trace-format FORMAT\n"
+	       "                 Select the format of the log files. xml (the default) and json\n"
+	       "                 are supported. Has no effect unless --log is specified.\n"
+	       "  --knob-KNOBNAME KNOBVALUE\n"
+	       "                 Changes a knob option. KNOBNAME should be lowercase.\n"
+	       "  --external-client-library FILE\n"
+	       "                 Path to the external client library.\n"
+	       "  -f, --test-file FILE\n"
+	       "                 Test file to run.\n"
+	       "  -h, --help     Display this help and exit.\n");
+}
+
+// Extracts the key for command line arguments that are specified with a prefix (e.g. --knob-).
+// This function converts any hyphens in the extracted key to underscores.
+bool extractPrefixedArgument(std::string prefix, const std::string& arg, std::string& res) {
+	if (arg.size() <= prefix.size() || arg.find(prefix) != 0 ||
+	    (arg[prefix.size()] != '-' && arg[prefix.size()] != '_')) {
+		return false;
+	}
+
+	res = arg.substr(prefix.size() + 1);
+	std::transform(res.begin(), res.end(), res.begin(), [](int c) { return c == '-' ? '_' : c; });
+	return true;
+}
+
+bool validateTraceFormat(std::string_view format) {
+	return format == "xml" || format == "json";
+}
+
+bool processArg(TesterOptions& options, const CSimpleOpt& args) {
+	switch (args.OptionId()) {
+	case OPT_CONNFILE:
+		options.clusterFile = args.OptionArg();
+		break;
+	case OPT_TRACE:
+		options.trace = true;
+		break;
+	case OPT_TRACE_DIR:
+		options.traceDir = args.OptionArg();
+		break;
+	case OPT_LOGGROUP:
+		options.logGroup = args.OptionArg();
+		break;
+	case OPT_TRACE_FORMAT:
+		if (!validateTraceFormat(args.OptionArg())) {
+			fmt::print(stderr, "ERROR: Unrecognized trace format `{}'\n", args.OptionArg());
+			return false;
+		}
+		options.traceFormat = args.OptionArg();
+		break;
+	case OPT_KNOB: {
+		std::string knobName;
+		if (!extractPrefixedArgument("--knob", args.OptionSyntax(), knobName)) {
+			fmt::print(stderr, "ERROR: unable to parse knob option '{}'\n", args.OptionSyntax());
+			return false;
+		}
+		options.knobs.emplace_back(knobName, args.OptionArg());
+		break;
+	}
+	case OPT_EXTERNAL_CLIENT_LIBRARY:
+		options.externalClientLibrary = args.OptionArg();
+		break;
+
+	case OPT_TEST_FILE:
+		options.testFile = args.OptionArg();
+		options.testSpec = readTomlTestSpec(options.testFile);
+		break;
+	}
+	return true;
+}
+
+bool parseArgs(TesterOptions& options, int argc, char** argv) {
+	// declare our options parser, pass in the arguments from main
+	// as well as our array of valid options.
+	CSimpleOpt args(argc, argv, TesterOptionDefs);
+
+	// while there are arguments left to process
+	while (args.Next()) {
+		if (args.LastError() == SO_SUCCESS) {
+			if (args.OptionId() == OPT_HELP) {
+				printProgramUsage(argv[0]);
+				return false;
+			}
+			if (!processArg(options, args)) {
+				return false;
+			}
+		} else {
+			fmt::print(stderr, "ERROR: Invalid argument: {}\n", args.OptionText());
+			printProgramUsage(argv[0]);
+			return false;
+		}
+	}
+	return true;
+}
+
+void fdb_check(fdb_error_t e) {
+	if (e) {
+		fmt::print(stderr, "Unexpected FDB error: {}({})\n", e, fdb_get_error(e));
+		std::abort();
+	}
+}
+
+void applyNetworkOptions(TesterOptions& options) {
+	if (!options.externalClientLibrary.empty()) {
+		fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_DISABLE_LOCAL_CLIENT));
+		fdb_check(
+		    FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_EXTERNAL_CLIENT_LIBRARY, options.externalClientLibrary));
+	}
+
+	if (options.testSpec.multiThreaded) {
+		fdb_check(
+		    FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_CLIENT_THREADS_PER_VERSION, options.numFdbThreads));
+	}
+
+	if (options.testSpec.fdbCallbacksOnExternalThreads) {
+		fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_CALLBACKS_ON_EXTERNAL_THREADS));
+	}
+
+	if (options.testSpec.buggify) {
+		fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_CLIENT_BUGGIFY_ENABLE));
+	}
+
+	if (options.trace) {
+		fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_TRACE_ENABLE, options.traceDir));
+		fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_TRACE_FORMAT, options.traceFormat));
+		fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_TRACE_LOG_GROUP, options.logGroup));
+	}
+
+	for (auto knob : options.knobs) {
+		fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_KNOB,
+		                            fmt::format("{}={}", knob.first.c_str(), knob.second.c_str())));
+	}
+}
+
+void randomizeOptions(TesterOptions& options) {
+	Random& random = Random::get();
+	options.numFdbThreads = random.randomInt(options.testSpec.minFdbThreads, options.testSpec.maxFdbThreads);
+	options.numClientThreads = random.randomInt(options.testSpec.minClientThreads, options.testSpec.maxClientThreads);
+	options.numDatabases = random.randomInt(options.testSpec.minDatabases, options.testSpec.maxDatabases);
+	options.numClients = random.randomInt(options.testSpec.minClients, options.testSpec.maxClients);
+}
+
+bool runWorkloads(TesterOptions& options) {
+	TransactionExecutorOptions txExecOptions;
+	txExecOptions.blockOnFutures = options.testSpec.blockOnFutures;
+	txExecOptions.numDatabases = options.numDatabases;
+	txExecOptions.databasePerTransaction = options.testSpec.databasePerTransaction;
+
+	std::unique_ptr<IScheduler> scheduler = createScheduler(options.numClientThreads);
+	std::unique_ptr<ITransactionExecutor> txExecutor = createTransactionExecutor(txExecOptions);
+	scheduler->start();
+	txExecutor->init(scheduler.get(), options.clusterFile.c_str());
+
+	WorkloadManager workloadMgr(txExecutor.get(), scheduler.get());
+	for (const auto& workloadSpec : options.testSpec.workloads) {
+		for (int i = 0; i < options.numClients; i++) {
+			WorkloadConfig config;
+			config.name = workloadSpec.name;
+			config.options = workloadSpec.options;
+			config.clientId = i;
+			config.numClients = options.numClients;
+			std::shared_ptr<IWorkload> workload = IWorkloadFactory::create(workloadSpec.name, config);
+			if (!workload) {
+				throw TesterError(fmt::format("Unknown workload '{}'", workloadSpec.name));
+			}
+			workloadMgr.add(workload);
+		}
+	}
+
+	workloadMgr.run();
+	return !workloadMgr.failed();
+}
+
+} // namespace
+} // namespace FdbApiTester
+
+using namespace FdbApiTester;
+
+int main(int argc, char** argv) {
+	int retCode = 0;
+	try {
+		TesterOptions options;
+		if (!parseArgs(options, argc, argv)) {
+			return 1;
+		}
+		randomizeOptions(options);
+
+		fdb_check(fdb_select_api_version(options.testSpec.apiVersion));
+		applyNetworkOptions(options);
+		fdb_check(fdb_setup_network());
+
+		std::thread network_thread{ &fdb_run_network };
+
+		if (!runWorkloads(options)) {
+			retCode = 1;
+		}
+
+		fdb_check(fdb_stop_network());
+		network_thread.join();
+	} catch (const std::runtime_error& err) {
+		fmt::print(stderr, "ERROR: {}\n", err.what());
+		retCode = 1;
+	}
+	return retCode;
+}
--- a/bindings/c/test/apitester/run_c_api_tests.py
+++ b/bindings/c/test/apitester/run_c_api_tests.py
@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+#
+# run_c_api_tests.py
+#
+# This source file is part of the FoundationDB open source project
+#
+# Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+import subprocess
+import argparse
+import os
+from subprocess import Popen, TimeoutExpired
+import logging
+import signal
+
+
+def get_logger():
+    return logging.getLogger('foundationdb.run_c_api_tests')
+
+
+def initialize_logger_level(logging_level):
+    logger = get_logger()
+
+    assert logging_level in ['DEBUG', 'INFO', 'WARNING', 'ERROR']
+
+    logging.basicConfig(format='%(message)s')
+    if logging_level == 'DEBUG':
+        logger.setLevel(logging.DEBUG)
+    elif logging_level == 'INFO':
+        logger.setLevel(logging.INFO)
+    elif logging_level == 'WARNING':
+        logger.setLevel(logging.WARNING)
+    elif logging_level == 'ERROR':
+        logger.setLevel(logging.ERROR)
+
+
+def run_tester(args, test_file):
+    cmd = [args.tester_binary, "--cluster-file",
+           args.cluster_file, "--test-file", test_file]
+    if args.external_client_library is not None:
+        cmd += ["--external-client-library", args.external_client_library]
+
+    get_logger().info('\nRunning tester \'%s\'...' % ' '.join(cmd))
+    proc = Popen(cmd, stdout=sys.stdout, stderr=sys.stderr)
+    timed_out = False
+    try:
+        ret_code = proc.wait(args.timeout)
+    except TimeoutExpired:
+        proc.kill()
+        timed_out = True
+    except Exception as e:
+        raise Exception('Unable to run tester (%s)' % e)
+
+    if ret_code != 0:
+        if ret_code < 0:
+            reason = signal.Signals(-ret_code).name
+        else:
+            reason = 'exit code: %d' % ret_code
+        if timed_out:
+            reason = 'timed out after %d seconds' % args.timeout
+            ret_code = 1
+        get_logger().error('\n\'%s\' did not complete succesfully (%s)' %
+                           (cmd[0], reason))
+
+    get_logger().info('')
+    return ret_code
+
+
+def run_tests(args):
+    num_failed = 0
+    test_files = [f for f in os.listdir(args.test_dir)
+                  if os.path.isfile(os.path.join(args.test_dir, f)) and f.endswith(".toml")]
+
+    for test_file in test_files:
+        get_logger().info('=========================================================')
+        get_logger().info('Running test %s' % test_file)
+        get_logger().info('=========================================================')
+        ret_code = run_tester(args, os.path.join(args.test_dir, test_file))
+        if ret_code != 0:
+            num_failed += 1
+
+    return num_failed
+
+
+def parse_args(argv):
+    parser = argparse.ArgumentParser(description='FoundationDB C API Tester')
+
+    parser.add_argument('--cluster-file', type=str, default="fdb.cluster",
+                        help='The cluster file for the cluster being connected to. (default: fdb.cluster)')
+    parser.add_argument('--tester-binary', type=str, default="fdb_c_api_tester",
+                        help='Path to the fdb_c_api_tester executable. (default: fdb_c_api_tester)')
+    parser.add_argument('--external-client-library', type=str, default=None,
+                        help='Path to the external client library. (default: None)')
+    parser.add_argument('--test-dir', type=str, default="./",
+                        help='Path to a directory with test definitions. (default: ./)')
+    parser.add_argument('--timeout', type=int, default=300,
+                        help='The timeout in seconds for running each individual test. (default 300)')
+    parser.add_argument('--logging-level', type=str, default='INFO',
+                        choices=['ERROR', 'WARNING', 'INFO', 'DEBUG'], help='Specifies the level of detail in the tester output (default=\'INFO\').')
+
+    return parser.parse_args(argv)
+
+
+def main(argv):
+    args = parse_args(argv)
+    initialize_logger_level(args.logging_level)
+    return run_tests(args)
+
+
+if __name__ == '__main__':
+    sys.exit(main(sys.argv[1:]))
--- a/bindings/c/test/apitester/tests/CApiCancelTransactionBlocking.toml
+++ b/bindings/c/test/apitester/tests/CApiCancelTransactionBlocking.toml
@ -0,0 +1,24 @@
+[[test]]
+title = 'Cancel Transaction with Blocking Waits'
+multiThreaded = true
+buggify = true
+blockOnFutures = true
+minFdbThreads = 2
+maxFdbThreads = 8
+minDatabases = 2
+maxDatabases = 8
+minClientThreads = 2
+maxClientThreads = 8
+minClients = 2
+maxClients = 8
+
+	[[test.workload]]
+    name = 'CancelTransaction'
+    minKeyLength = 1
+	maxKeyLength = 64
+	minValueLength = 1
+	maxValueLength = 1000
+	maxKeysPerTransaction = 50
+	initialSize = 100
+	numRandomOperations = 100
+	readExistingKeysRatio = 0.9
--- a/bindings/c/test/apitester/tests/CApiCancelTransactionCB.toml
+++ b/bindings/c/test/apitester/tests/CApiCancelTransactionCB.toml
@ -0,0 +1,23 @@
+[[test]]
+title = 'Cancel Transactions with Future Callbacks'
+multiThreaded = true
+buggify = true
+minFdbThreads = 2
+maxFdbThreads = 8
+minDatabases = 2
+maxDatabases = 8
+minClientThreads = 2
+maxClientThreads = 8
+minClients = 2
+maxClients = 8
+
+	[[test.workload]]
+    name = 'CancelTransaction'
+    minKeyLength = 1
+	maxKeyLength = 64
+	minValueLength = 1
+	maxValueLength = 1000
+	maxKeysPerTransaction = 50
+	initialSize = 100
+	numRandomOperations = 100
+	readExistingKeysRatio = 0.9
--- a/bindings/c/test/apitester/tests/CApiCancelTransactionDBPerTX.toml
+++ b/bindings/c/test/apitester/tests/CApiCancelTransactionDBPerTX.toml
@ -0,0 +1,24 @@
+[[test]]
+title = 'Cancel Transaction with Database per Transaction'
+multiThreaded = true
+buggify = true
+databasePerTransaction = true
+minFdbThreads = 2
+maxFdbThreads = 8
+minDatabases = 2
+maxDatabases = 8
+minClientThreads = 2
+maxClientThreads = 8
+minClients = 2
+maxClients = 8
+
+	[[test.workload]]
+    name = 'CancelTransaction'
+    minKeyLength = 1
+	maxKeyLength = 64
+	minValueLength = 1
+	maxValueLength = 1000
+	maxKeysPerTransaction = 50
+	initialSize = 100
+	numRandomOperations = 100
+	readExistingKeysRatio = 0.9
--- a/bindings/c/test/apitester/tests/CApiCorrectnessBlocking.toml
+++ b/bindings/c/test/apitester/tests/CApiCorrectnessBlocking.toml
@ -0,0 +1,25 @@
+[[test]]
+title = 'API Correctness Blocking'
+multiThreaded = true
+buggify = true
+blockOnFutures = true
+minFdbThreads = 2
+maxFdbThreads = 8
+minDatabases = 2
+maxDatabases = 8
+minClientThreads = 2
+maxClientThreads = 8
+minClients = 2
+maxClients = 8
+
+
+    [[test.workload]]
+    name = 'ApiCorrectness'
+    minKeyLength = 1
+	maxKeyLength = 64
+	minValueLength = 1
+	maxValueLength = 1000
+	maxKeysPerTransaction = 50
+	initialSize = 100
+	numRandomOperations = 100
+	readExistingKeysRatio = 0.9
--- a/bindings/c/test/apitester/tests/CApiCorrectnessCallbacksOnExtThr.toml
+++ b/bindings/c/test/apitester/tests/CApiCorrectnessCallbacksOnExtThr.toml
@ -0,0 +1,24 @@
+[[test]]
+title = 'API Correctness Callbacks On External Threads'
+multiThreaded = true
+fdbCallbacksOnExternalThreads = true
+buggify = true
+minFdbThreads = 2
+maxFdbThreads = 8
+minDatabases = 2
+maxDatabases = 8
+minClientThreads = 2
+maxClientThreads = 8
+minClients = 2
+maxClients = 8
+
+    [[test.workload]]
+    name = 'ApiCorrectness'
+    minKeyLength = 1
+	maxKeyLength = 64
+	minValueLength = 1
+	maxValueLength = 1000
+	maxKeysPerTransaction = 50
+	initialSize = 100
+	numRandomOperations = 100
+	readExistingKeysRatio = 0.9
--- a/bindings/c/test/apitester/tests/CApiCorrectnessDBPerTX.toml
+++ b/bindings/c/test/apitester/tests/CApiCorrectnessDBPerTX.toml
@ -0,0 +1,24 @@
+[[test]]
+title = 'API Correctness Database Per Transaction'
+multiThreaded = true
+buggify = true
+databasePerTransaction = true
+minFdbThreads = 2
+maxFdbThreads = 8
+minDatabases = 2
+maxDatabases = 8
+minClientThreads = 2
+maxClientThreads = 8
+minClients = 2
+maxClients = 8
+
+    [[test.workload]]
+    name = 'ApiCorrectness'
+    minKeyLength = 1
+	maxKeyLength = 64
+	minValueLength = 1
+	maxValueLength = 1000
+	maxKeysPerTransaction = 50
+	initialSize = 100
+	numRandomOperations = 100
+	readExistingKeysRatio = 0.9
--- a/bindings/c/test/apitester/tests/CApiCorrectnessMultiThr.toml
+++ b/bindings/c/test/apitester/tests/CApiCorrectnessMultiThr.toml
@ -0,0 +1,23 @@
+[[test]]
+title = 'API Correctness Multi Threaded'
+multiThreaded = true
+buggify = true
+minFdbThreads = 2
+maxFdbThreads = 8
+minDatabases = 2
+maxDatabases = 8
+minClientThreads = 2
+maxClientThreads = 8
+minClients = 2
+maxClients = 8
+
+    [[test.workload]]
+    name = 'ApiCorrectness'
+    minKeyLength = 1
+	maxKeyLength = 64
+	minValueLength = 1
+	maxValueLength = 1000
+	maxKeysPerTransaction = 50
+	initialSize = 100
+	numRandomOperations = 100
+	readExistingKeysRatio = 0.9
--- a/bindings/c/test/apitester/tests/CApiCorrectnessSingleThr.toml
+++ b/bindings/c/test/apitester/tests/CApiCorrectnessSingleThr.toml
@ -0,0 +1,16 @@
+[[test]]
+title = 'API Correctness Single Threaded'
+minClients = 1
+maxClients = 3
+multiThreaded = false
+
+    [[test.workload]]
+    name = 'ApiCorrectness'
+    minKeyLength = 1
+	maxKeyLength = 64
+	minValueLength = 1
+	maxValueLength = 1000
+	maxKeysPerTransaction = 50
+	initialSize = 100
+	numRandomOperations = 100
+	readExistingKeysRatio = 0.9
--- a/cmake/AddFdbTest.cmake
+++ b/cmake/AddFdbTest.cmake
@ -129,7 +129,7 @@ function(add_fdb_test)
      -n ${test_name}
      -b ${PROJECT_BINARY_DIR}
      -t ${test_type}
-      -O ${OLD_FDBSERVER_BINARY}  
+      -O ${OLD_FDBSERVER_BINARY}
      --config "@CTEST_CONFIGURATION_TYPE@"
      --crash
      --aggregate-traces ${TEST_AGGREGATE_TRACES}
@ -404,7 +404,7 @@ endfunction()

 # Creates a single cluster before running the specified command (usually a ctest test)
 function(add_fdbclient_test)
-  set(options DISABLED ENABLED)
+  set(options DISABLED ENABLED DISABLE_LOG_DUMP)
  set(oneValueArgs NAME PROCESS_NUMBER TEST_TIMEOUT WORKING_DIRECTORY)
  set(multiValueArgs COMMAND)
  cmake_parse_arguments(T "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")
@ -423,23 +423,20 @@ function(add_fdbclient_test)
  if(NOT T_COMMAND)
    message(FATAL_ERROR "COMMAND is a required argument for add_fdbclient_test")
  endif()
-  message(STATUS "Adding Client test ${T_NAME}")
-  if (T_PROCESS_NUMBER)
-    add_test(NAME "${T_NAME}"
-    WORKING_DIRECTORY ${T_WORKING_DIRECTORY}
-    COMMAND ${Python_EXECUTABLE} ${CMAKE_SOURCE_DIR}/tests/TestRunner/tmp_cluster.py
-            --build-dir ${CMAKE_BINARY_DIR}
-            --process-number ${T_PROCESS_NUMBER}
-            --
-            ${T_COMMAND})
-  else()
-    add_test(NAME "${T_NAME}"
-    WORKING_DIRECTORY ${T_WORKING_DIRECTORY}
-    COMMAND ${Python_EXECUTABLE} ${CMAKE_SOURCE_DIR}/tests/TestRunner/tmp_cluster.py
-            --build-dir ${CMAKE_BINARY_DIR}
-            --
-            ${T_COMMAND})
+  set(TMP_CLUSTER_CMD ${CMAKE_SOURCE_DIR}/tests/TestRunner/tmp_cluster.py
+                      --build-dir ${CMAKE_BINARY_DIR})
+  if(T_PROCESS_NUMBER)
+    list(APPEND TMP_CLUSTER_CMD --process-number ${T_PROCESS_NUMBER})
  endif()
+  if(T_DISABLE_LOG_DUMP)
+    list(APPEND TMP_CLUSTER_CMD --disable-log-dump)
+  endif()
+  message(STATUS "Adding Client test ${T_NAME}")
+  add_test(NAME "${T_NAME}"
+    WORKING_DIRECTORY ${T_WORKING_DIRECTORY}
+    COMMAND ${Python_EXECUTABLE} ${TMP_CLUSTER_CMD}
+            --
+            ${T_COMMAND})
  if (T_TEST_TIMEOUT)
    set_tests_properties("${T_NAME}" PROPERTIES TIMEOUT ${T_TEST_TIMEOUT})
  else()
@ -449,7 +446,7 @@ function(add_fdbclient_test)
  set_tests_properties("${T_NAME}" PROPERTIES ENVIRONMENT UBSAN_OPTIONS=print_stacktrace=1:halt_on_error=1)
 endfunction()

-# Creates a cluster file for a nonexistent cluster before running the specified command 
+# Creates a cluster file for a nonexistent cluster before running the specified command
 # (usually a ctest test)
 function(add_unavailable_fdbclient_test)
  set(options DISABLED ENABLED)
--- a/cmake/Jemalloc.cmake
+++ b/cmake/Jemalloc.cmake
@ -11,43 +11,25 @@ if(NOT USE_JEMALLOC)
  return()
 endif()

-add_definitions(-DUSE_JEMALLOC)
-find_path(JEMALLOC_INCLUDE_DIR
-  NAMES
-  jemalloc/jemalloc.h
-  PATH_SUFFIXES
-  include
-  )
-find_library(JEMALLOC NAMES libjemalloc.a)
-find_library(JEMALLOC_PIC NAMES libjemalloc_pic.a)
 add_library(im_jemalloc_pic STATIC IMPORTED)
 add_library(im_jemalloc STATIC IMPORTED)
-if(JEMALLOC_INCLUDE_DIR AND JEMALLOC AND JEMALLOC_PIC)
-  set_target_properties(im_jemalloc_pic PROPERTIES IMPORTED_LOCATION "${JEMALLOC_PIC}")
-  set_target_properties(im_jemalloc PROPERTIES IMPORTED_LOCATION "${JEMALLOC}")
-  target_include_directories(jemalloc INTERFACE "${JEMALLOC_INCLUDE_DIR}")
-  # the ordering here is important: for dynamic libraries we have to use all
-  # symbols that are in the library which was compiled with PIC (for executables
-  # we could omit the pic-library)
-  target_link_libraries(jemalloc INTERFACE im_jemalloc_pic im_jemalloc)
-else()
-  include(ExternalProject)
-  set(JEMALLOC_DIR "${CMAKE_BINARY_DIR}/jemalloc")
-  ExternalProject_add(Jemalloc_project
-    URL "https://github.com/jemalloc/jemalloc/releases/download/5.2.1/jemalloc-5.2.1.tar.bz2"
-    URL_HASH SHA256=34330e5ce276099e2e8950d9335db5a875689a4c6a56751ef3b1d8c537f887f6
-    BUILD_BYPRODUCTS "${JEMALLOC_DIR}/include/jemalloc/jemalloc.h"
-    "${JEMALLOC_DIR}/lib/libjemalloc.a"
-    "${JEMALLOC_DIR}/lib/libjemalloc_pic.a"
-    CONFIGURE_COMMAND ./configure --prefix=${JEMALLOC_DIR} --enable-static --disable-cxx --enable-prof
-    BUILD_IN_SOURCE ON
-    BUILD_COMMAND make
-    INSTALL_DIR "${JEMALLOC_DIR}"
-    INSTALL_COMMAND make install)
-  add_dependencies(im_jemalloc Jemalloc_project)
-  add_dependencies(im_jemalloc_pic Jemalloc_project)
-  set_target_properties(im_jemalloc_pic PROPERTIES IMPORTED_LOCATION "${JEMALLOC_DIR}/lib/libjemalloc_pic.a")
-  set_target_properties(im_jemalloc PROPERTIES IMPORTED_LOCATION "${JEMALLOC_DIR}/lib/libjemalloc.a")
-  target_include_directories(jemalloc INTERFACE "${JEMALLOC_DIR}/include")
-  target_link_libraries(jemalloc INTERFACE im_jemalloc_pic im_jemalloc)
-endif()
+include(ExternalProject)
+set(JEMALLOC_DIR "${CMAKE_BINARY_DIR}/jemalloc")
+ExternalProject_add(Jemalloc_project
+  URL "https://github.com/jemalloc/jemalloc/releases/download/5.2.1/jemalloc-5.2.1.tar.bz2"
+  URL_HASH SHA256=34330e5ce276099e2e8950d9335db5a875689a4c6a56751ef3b1d8c537f887f6
+  BUILD_BYPRODUCTS "${JEMALLOC_DIR}/include/jemalloc/jemalloc.h"
+  "${JEMALLOC_DIR}/lib/libjemalloc.a"
+  "${JEMALLOC_DIR}/lib/libjemalloc_pic.a"
+  PATCH_COMMAND patch -p1 < ${CMAKE_SOURCE_DIR}/cmake/jemalloc.patch
+  CONFIGURE_COMMAND ./configure --prefix=${JEMALLOC_DIR} --enable-static --disable-cxx --enable-prof
+  BUILD_IN_SOURCE ON
+  BUILD_COMMAND make
+  INSTALL_DIR "${JEMALLOC_DIR}"
+  INSTALL_COMMAND make install)
+add_dependencies(im_jemalloc Jemalloc_project)
+add_dependencies(im_jemalloc_pic Jemalloc_project)
+set_target_properties(im_jemalloc_pic PROPERTIES IMPORTED_LOCATION "${JEMALLOC_DIR}/lib/libjemalloc_pic.a")
+set_target_properties(im_jemalloc PROPERTIES IMPORTED_LOCATION "${JEMALLOC_DIR}/lib/libjemalloc.a")
+target_include_directories(jemalloc INTERFACE "${JEMALLOC_DIR}/include")
+target_link_libraries(jemalloc INTERFACE im_jemalloc_pic im_jemalloc)
--- a/cmake/jemalloc.patch
+++ b/cmake/jemalloc.patch
@ -0,0 +1,38 @@
+diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
+index 68e558ab..87bb2280 100644
+--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
+@@ -266,7 +266,7 @@ sz_sa2u(size_t size, size_t alignment) {
+ 	assert(alignment != 0 && ((alignment - 1) & alignment) == 0);
+ 
+ 	/* Try for a small size class. */
+-	if (size <= SC_SMALL_MAXCLASS && alignment < PAGE) {
+	if (size <= SC_SMALL_MAXCLASS && alignment <= PAGE) {
+ 		/*
+ 		 * Round size up to the nearest multiple of alignment.
+ 		 *
+diff --git a/src/arena.c b/src/arena.c
+index ba50e410..dc7646e6 100644
+--- a/src/arena.c
+++ b/src/arena.c
+@@ -1533,10 +1533,17 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
+     bool zero, tcache_t *tcache) {
+ 	void *ret;
+ 
+-	if (usize <= SC_SMALL_MAXCLASS
+-	    && (alignment < PAGE
+-	    || (alignment == PAGE && (usize & PAGE_MASK) == 0))) {
+	if (usize <= SC_SMALL_MAXCLASS) {
+ 		/* Small; alignment doesn't require special slab placement. */
+
+		/* usize should be a result of sz_sa2u() */
+		assert((usize & (alignment - 1)) == 0);
+
+		/*
+		 * Small usize can't come from an alignment larger than a page.
+		 */
+		assert(alignment <= PAGE);
+
+ 		ret = arena_malloc(tsdn, arena, usize, sz_size2index(usize),
+ 		    zero, tcache, true);
+ 	} else {
--- a/documentation/sphinx/source/index.rst
+++ b/documentation/sphinx/source/index.rst
@ -38,6 +38,8 @@ The latest changes are detailed in :ref:`release-notes`. The documentation has t

 * :doc:`administration` contains documentation on administering FoundationDB.

+* :doc:`monitored-metrics` contains documentation on monitoring and alerting for FoundationDB.
+
 * :doc:`redwood` contains documentation on Redwood Storage Engine.

 * :doc:`visibility` contains documentation related to Visibility into FoundationDB.
@ -55,6 +57,7 @@ The latest changes are detailed in :ref:`release-notes`. The documentation has t
   api-reference
   tutorials
   administration
+   monitored-metrics
   redwood
   visibility
   earlier-release-notes
--- a/documentation/sphinx/source/monitored-metrics.rst
+++ b/documentation/sphinx/source/monitored-metrics.rst
--- a/fdbclient/DatabaseContext.h
+++ b/fdbclient/DatabaseContext.h
@ -107,13 +107,13 @@ public:

 	void addReleased(int released) { smoothReleased.addDelta(released); }

-	bool expired() { return expiration <= now(); }
+	bool expired() const { return expiration <= now(); }

 	void updateChecked() { lastCheck = now(); }

-	bool canRecheck() { return lastCheck < now() - CLIENT_KNOBS->TAG_THROTTLE_RECHECK_INTERVAL; }
+	bool canRecheck() const { return lastCheck < now() - CLIENT_KNOBS->TAG_THROTTLE_RECHECK_INTERVAL; }

-	double throttleDuration() {
+	double throttleDuration() const {
 		if (expiration <= now()) {
 			return 0.0;
 		}
--- a/fdbclient/NativeAPI.actor.cpp
+++ b/fdbclient/NativeAPI.actor.cpp
@ -129,6 +129,9 @@ Future<REPLY_TYPE(Request)> loadBalance(
 FDB_BOOLEAN_PARAM(TransactionRecordLogInfo);
 FDB_DEFINE_BOOLEAN_PARAM(UseProvisionalProxies);

+// Used to determine whether or not client will load balance based on the number of GRVs released by each proxy
+FDB_DEFINE_BOOLEAN_PARAM(BalanceOnRequests);
+
 // Whether or not a request should include the tenant name
 FDB_BOOLEAN_PARAM(UseTenant);

@ -1875,9 +1878,9 @@ void DatabaseContext::setOption(FDBDatabaseOptions::Option option, Optional<Stri
 			                 clientLocality.machineId(),
 			                 clientLocality.dcId());
 			if (clientInfo->get().commitProxies.size())
-				commitProxies = makeReference<CommitProxyInfo>(clientInfo->get().commitProxies, false);
+				commitProxies = makeReference<CommitProxyInfo>(clientInfo->get().commitProxies);
 			if (clientInfo->get().grvProxies.size())
-				grvProxies = makeReference<GrvProxyInfo>(clientInfo->get().grvProxies, true);
+				grvProxies = makeReference<GrvProxyInfo>(clientInfo->get().grvProxies, BalanceOnRequests::True);
 			server_interf.clear();
 			locationCache.insert(allKeys, Reference<LocationInfo>());
 			break;
@ -1891,9 +1894,9 @@ void DatabaseContext::setOption(FDBDatabaseOptions::Option option, Optional<Stri
 			                 clientLocality.machineId(),
 			                 value.present() ? Standalone<StringRef>(value.get()) : Optional<Standalone<StringRef>>());
 			if (clientInfo->get().commitProxies.size())
-				commitProxies = makeReference<CommitProxyInfo>(clientInfo->get().commitProxies, false);
+				commitProxies = makeReference<CommitProxyInfo>(clientInfo->get().commitProxies);
 			if (clientInfo->get().grvProxies.size())
-				grvProxies = makeReference<GrvProxyInfo>(clientInfo->get().grvProxies, true);
+				grvProxies = makeReference<GrvProxyInfo>(clientInfo->get().grvProxies, BalanceOnRequests::True);
 			server_interf.clear();
 			locationCache.insert(allKeys, Reference<LocationInfo>());
 			break;
@ -2448,11 +2451,11 @@ void DatabaseContext::updateProxies() {
 	grvProxies.clear();
 	bool commitProxyProvisional = false, grvProxyProvisional = false;
 	if (clientInfo->get().commitProxies.size()) {
-		commitProxies = makeReference<CommitProxyInfo>(clientInfo->get().commitProxies, false);
+		commitProxies = makeReference<CommitProxyInfo>(clientInfo->get().commitProxies);
 		commitProxyProvisional = clientInfo->get().commitProxies[0].provisional;
 	}
 	if (clientInfo->get().grvProxies.size()) {
-		grvProxies = makeReference<GrvProxyInfo>(clientInfo->get().grvProxies, true);
+		grvProxies = makeReference<GrvProxyInfo>(clientInfo->get().grvProxies, BalanceOnRequests::True);
 		grvProxyProvisional = clientInfo->get().grvProxies[0].provisional;
 	}
 	if (clientInfo->get().commitProxies.size() && clientInfo->get().grvProxies.size()) {
--- a/fdbclient/PaxosConfigTransaction.actor.cpp
+++ b/fdbclient/PaxosConfigTransaction.actor.cpp
@ -22,6 +22,8 @@
 #include "fdbclient/PaxosConfigTransaction.h"
 #include "flow/actorcompiler.h" // must be last include

+using ConfigTransactionInfo = ModelInterface<ConfigTransactionInterface>;
+
 class CommitQuorum {
 	ActorCollection actors{ false };
 	std::vector<ConfigTransactionInterface> ctis;
@ -224,10 +226,12 @@ class PaxosConfigTransactionImpl {
 		loop {
 			try {
 				ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
-				// TODO: Load balance
+				state Reference<ConfigTransactionInfo> configNodes(
+				    new ConfigTransactionInfo(self->getGenerationQuorum.getReadReplicas()));
 				ConfigTransactionGetReply reply =
-				    wait(timeoutError(self->getGenerationQuorum.getReadReplicas()[0].get.getReply(
-				                          ConfigTransactionGetRequest{ generation, configKey }),
+				    wait(timeoutError(basicLoadBalance(configNodes,
+				                                       &ConfigTransactionInterface::get,
+				                                       ConfigTransactionGetRequest{ generation, configKey }),
 				                      CLIENT_KNOBS->GET_KNOB_TIMEOUT));
 				if (reply.value.present()) {
 					return reply.value.get().toValue();
@ -245,10 +249,12 @@ class PaxosConfigTransactionImpl {

 	ACTOR static Future<RangeResult> getConfigClasses(PaxosConfigTransactionImpl* self) {
 		ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
-		// TODO: Load balance
+		state Reference<ConfigTransactionInfo> configNodes(
+		    new ConfigTransactionInfo(self->getGenerationQuorum.getReadReplicas()));
 		ConfigTransactionGetConfigClassesReply reply =
-		    wait(retryBrokenPromise(self->getGenerationQuorum.getReadReplicas()[0].getClasses,
-		                            ConfigTransactionGetConfigClassesRequest{ generation }));
+		    wait(basicLoadBalance(configNodes,
+		                          &ConfigTransactionInterface::getClasses,
+		                          ConfigTransactionGetConfigClassesRequest{ generation }));
 		RangeResult result;
 		result.reserve(result.arena(), reply.configClasses.size());
 		for (const auto& configClass : reply.configClasses) {
@ -259,10 +265,12 @@ class PaxosConfigTransactionImpl {

 	ACTOR static Future<RangeResult> getKnobs(PaxosConfigTransactionImpl* self, Optional<Key> configClass) {
 		ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
-		// TODO: Load balance
+		state Reference<ConfigTransactionInfo> configNodes(
+		    new ConfigTransactionInfo(self->getGenerationQuorum.getReadReplicas()));
 		ConfigTransactionGetKnobsReply reply =
-		    wait(retryBrokenPromise(self->getGenerationQuorum.getReadReplicas()[0].getKnobs,
-		                            ConfigTransactionGetKnobsRequest{ generation, configClass }));
+		    wait(basicLoadBalance(configNodes,
+		                          &ConfigTransactionInterface::getKnobs,
+		                          ConfigTransactionGetKnobsRequest{ generation, configClass }));
 		RangeResult result;
 		result.reserve(result.arena(), reply.knobNames.size());
 		for (const auto& knobName : reply.knobNames) {
--- a/fdbclient/ServerKnobs.cpp
+++ b/fdbclient/ServerKnobs.cpp
@ -719,6 +719,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
 	init( COORDINATOR_LEADER_CONNECTION_TIMEOUT,                20.0 );

 	// Dynamic Knobs (implementation)
+	init( COMPACTION_INTERVAL,             isSimulated ? 5.0 : 300.0 );
 	init( UPDATE_NODE_TIMEOUT,                                   3.0 );
 	init( GET_COMMITTED_VERSION_TIMEOUT,                         3.0 );
 	init( GET_SNAPSHOT_AND_CHANGES_TIMEOUT,                      3.0 );
--- a/fdbclient/ServerKnobs.h
+++ b/fdbclient/ServerKnobs.h
@ -657,6 +657,7 @@ public:
 	double COORDINATOR_LEADER_CONNECTION_TIMEOUT;

 	// Dynamic Knobs (implementation)
+	double COMPACTION_INTERVAL;
 	double UPDATE_NODE_TIMEOUT;
 	double GET_COMMITTED_VERSION_TIMEOUT;
 	double GET_SNAPSHOT_AND_CHANGES_TIMEOUT;
--- a/fdbrpc/LoadBalance.actor.h
+++ b/fdbrpc/LoadBalance.actor.h
@ -490,7 +490,7 @@ Future<REPLY_TYPE(Request)> loadBalance(

 			RequestStream<Request> const* thisStream = &alternatives->get(i, channel);
 			if (!IFailureMonitor::failureMonitor().getState(thisStream->getEndpoint()).failed) {
-				auto& qd = model->getMeasurement(thisStream->getEndpoint().token.first());
+				auto const& qd = model->getMeasurement(thisStream->getEndpoint().token.first());
 				if (now() > qd.failedUntil) {
 					double thisMetric = qd.smoothOutstanding.smoothTotal();
 					double thisTime = qd.latency;
@ -529,7 +529,7 @@ Future<REPLY_TYPE(Request)> loadBalance(
 			for (int i = alternatives->countBest(); i < alternatives->size(); i++) {
 				RequestStream<Request> const* thisStream = &alternatives->get(i, channel);
 				if (!IFailureMonitor::failureMonitor().getState(thisStream->getEndpoint()).failed) {
-					auto& qd = model->getMeasurement(thisStream->getEndpoint().token.first());
+					auto const& qd = model->getMeasurement(thisStream->getEndpoint().token.first());
 					if (now() > qd.failedUntil) {
 						double thisMetric = qd.smoothOutstanding.smoothTotal();
 						double thisTime = qd.latency;
--- a/fdbrpc/MultiInterface.h
+++ b/fdbrpc/MultiInterface.h
@ -91,13 +91,16 @@ struct AlternativeInfo {
 	bool operator==(double const& r) const { return cumulativeProbability == r; }
 };

+FDB_DECLARE_BOOLEAN_PARAM(BalanceOnRequests);
+
 template <class T>
 class ModelInterface : public ReferenceCounted<ModelInterface<T>> {
 public:
 	// If balanceOnRequests is true, the client will load balance based on the number of GRVs released by each proxy
 	// If balanceOnRequests is false, the client will load balance based on the CPU usage of each proxy
 	// Only requests which take from the GRV budget on the proxy should set balanceOnRequests to true
-	ModelInterface(const std::vector<T>& v, bool balanceOnRequests) : balanceOnRequests(balanceOnRequests) {
+	explicit ModelInterface(const std::vector<T>& v, BalanceOnRequests balanceOnRequests = BalanceOnRequests::False)
+	  : balanceOnRequests(balanceOnRequests) {
 		for (int i = 0; i < v.size(); i++) {
 			alternatives.push_back(AlternativeInfo(v[i], 1.0 / v.size(), (i + 1.0) / v.size()));
 		}
--- a/fdbrpc/QueueModel.cpp
+++ b/fdbrpc/QueueModel.cpp
@ -50,7 +50,7 @@ void QueueModel::endRequest(uint64_t id, double latency, double penalty, double
 	}
 }

-QueueData& QueueModel::getMeasurement(uint64_t id) {
+QueueData const& QueueModel::getMeasurement(uint64_t id) {
 	return data[id]; // return smoothed penalty
 }

--- a/fdbrpc/QueueModel.h
+++ b/fdbrpc/QueueModel.h
@ -93,7 +93,7 @@ public:
 	// 	 - futureVersion: indicates whether there was "future version" error or
 	//					  not.
 	void endRequest(uint64_t id, double latency, double penalty, double delta, bool clean, bool futureVersion);
-	QueueData& getMeasurement(uint64_t id);
+	QueueData const& getMeasurement(uint64_t id);

 	// Starts a new request to storage server with `id`. If the storage
 	// server contains a penalty, add it to the queue size, and return the
--- a/fdbrpc/Smoother.h
+++ b/fdbrpc/Smoother.h
@ -18,40 +18,20 @@
 * limitations under the License.
 */

-#ifndef FLOW_SMOOTHER_H
-#define FLOW_SMOOTHER_H
 #pragma once

 #include "flow/flow.h"
 #include <cmath>

-struct Smoother {
+template <class T>
+class SmootherImpl {
 	// Times (t) are expected to be nondecreasing

-	explicit Smoother(double eFoldingTime) : eFoldingTime(eFoldingTime) { reset(0); }
-	void reset(double value) {
-		time = 0;
-		total = value;
-		estimate = value;
-	}
+	double eFoldingTime;
+	double total;
+	mutable double time, estimate;

-	void setTotal(double total, double t = now()) { addDelta(total - this->total, t); }
-	void addDelta(double delta, double t = now()) {
-		update(t);
-		total += delta;
-	}
-	// smoothTotal() is a continuous (under)estimate of the sum of all addDeltas()
-	double smoothTotal(double t = now()) {
-		update(t);
-		return estimate;
-	}
-	// smoothRate() is d/dt[smoothTotal], and is NOT continuous
-	double smoothRate(double t = now()) {
-		update(t);
-		return (total - estimate) / eFoldingTime;
-	}
-
-	void update(double t) {
+	void update(double t) const {
 		double elapsed = t - time;
 		if (elapsed) {
 			time = t;
@ -59,44 +39,41 @@ struct Smoother {
 		}
 	}

-	double eFoldingTime;
-	double time, total, estimate;
-};
+protected:
+	explicit SmootherImpl(double eFoldingTime) : eFoldingTime(eFoldingTime) { reset(0); }

-struct TimerSmoother {
-	// Times (t) are expected to be nondecreasing
-
-	explicit TimerSmoother(double eFoldingTime) : eFoldingTime(eFoldingTime) { reset(0); }
+public:
 	void reset(double value) {
 		time = 0;
 		total = value;
 		estimate = value;
 	}
-
-	void setTotal(double total, double t = timer()) { addDelta(total - this->total, t); }
-	void addDelta(double delta, double t = timer()) {
+	void setTotal(double total, double t = T::now()) { addDelta(total - this->total, t); }
+	void addDelta(double delta, double t = T::now()) {
 		update(t);
 		total += delta;
 	}
 	// smoothTotal() is a continuous (under)estimate of the sum of all addDeltas()
-	double smoothTotal(double t = timer()) {
+	double smoothTotal(double t = T::now()) const {
 		update(t);
 		return estimate;
 	}
 	// smoothRate() is d/dt[smoothTotal], and is NOT continuous
-	double smoothRate(double t = timer()) {
+	double smoothRate(double t = T::now()) const {
 		update(t);
 		return (total - estimate) / eFoldingTime;
 	}

-	void update(double t) {
-		double elapsed = t - time;
-		time = t;
-		estimate += (total - estimate) * (1 - exp(-elapsed / eFoldingTime));
-	}
-
-	double eFoldingTime;
-	double time, total, estimate;
+	double getTotal() const { return total; }
 };

-#endif
+class Smoother : public SmootherImpl<Smoother> {
+public:
+	static double now() { return ::now(); }
+	explicit Smoother(double eFoldingTime) : SmootherImpl<Smoother>(eFoldingTime) {}
+};
+class TimerSmoother : public SmootherImpl<TimerSmoother> {
+public:
+	static double now() { return timer(); }
+	explicit TimerSmoother(double eFoldingTime) : SmootherImpl<TimerSmoother>(eFoldingTime) {}
+};
--- a/fdbserver/ConfigBroadcaster.actor.cpp
+++ b/fdbserver/ConfigBroadcaster.actor.cpp
@ -94,6 +94,7 @@ class ConfigBroadcasterImpl {

 	int coordinators = 0;
 	std::unordered_set<NetworkAddress> activeConfigNodes;
+	std::unordered_set<NetworkAddress> registrationResponses;
 	bool disallowUnregistered = false;
 	Promise<Void> newConfigNodesAllowed;

@ -217,6 +218,7 @@ class ConfigBroadcasterImpl {
 		self->clients.erase(clientUID);
 		self->clientFailures.erase(clientUID);
 		self->activeConfigNodes.erase(clientAddress);
+		self->registrationResponses.erase(clientAddress);
 		// See comment where this promise is reset below.
 		if (self->newConfigNodesAllowed.isSet()) {
 			self->newConfigNodesAllowed.reset();
@ -258,6 +260,7 @@ class ConfigBroadcasterImpl {
 				self->newConfigNodesAllowed.reset();
 			}
 		}
+		self->registrationResponses.insert(address);

 		if (registered) {
 			if (!self->disallowUnregistered) {
@ -265,9 +268,18 @@ class ConfigBroadcasterImpl {
 			}
 			self->activeConfigNodes.insert(address);
 			self->disallowUnregistered = true;
-		} else if (self->activeConfigNodes.size() < self->coordinators / 2 + 1 && !self->disallowUnregistered) {
-			// Need to allow registration of previously unregistered nodes when
-			// the cluster first starts up.
+		} else if ((self->activeConfigNodes.size() < self->coordinators / 2 + 1 && !self->disallowUnregistered) ||
+		           self->coordinators - self->registrationResponses.size() <=
+		               self->coordinators / 2 + 1 - self->activeConfigNodes.size()) {
+			// Received a registration request from an unregistered node. There
+			// are two cases where we want to allow unregistered nodes to
+			// register:
+			// 	 * the cluster is just starting and no nodes are registered
+			// 	 * a minority of nodes are registered and a majority are
+			// 	   unregistered. This situation should only occur in rare
+			// 	   circumstances where the cluster controller dies with only a
+			// 	   minority of config nodes having received a
+			// 	   ConfigBroadcastReadyRequest
 			self->activeConfigNodes.insert(address);
 			if (self->activeConfigNodes.size() >= self->coordinators / 2 + 1 &&
 			    self->newConfigNodesAllowed.canBeSet()) {
@ -390,9 +402,9 @@ public:
 		this->coordinators = coordinators.configServers.size();
 		if (configDBType != ConfigDBType::DISABLED) {
 			if (configDBType == ConfigDBType::SIMPLE) {
-				consumer = IConfigConsumer::createSimple(coordinators, 0.5, Optional<double>{});
+				consumer = IConfigConsumer::createSimple(coordinators, 0.5, SERVER_KNOBS->COMPACTION_INTERVAL);
 			} else {
-				consumer = IConfigConsumer::createPaxos(coordinators, 0.5, Optional<double>{});
+				consumer = IConfigConsumer::createPaxos(coordinators, 0.5, SERVER_KNOBS->COMPACTION_INTERVAL);
 			}
 			TraceEvent(SevDebug, "ConfigBroadcasterStartingConsumer", id)
 			    .detail("Consumer", consumer->getID())
--- a/fdbserver/ConfigFollowerInterface.h
+++ b/fdbserver/ConfigFollowerInterface.h
@ -176,14 +176,16 @@ struct ConfigFollowerRollforwardRequest {

 struct ConfigFollowerGetCommittedVersionReply {
 	static constexpr FileIdentifier file_identifier = 9214735;
+	Version lastCompacted;
 	Version lastCommitted;

 	ConfigFollowerGetCommittedVersionReply() = default;
-	explicit ConfigFollowerGetCommittedVersionReply(Version lastCommitted) : lastCommitted(lastCommitted) {}
+	explicit ConfigFollowerGetCommittedVersionReply(Version lastCompacted, Version lastCommitted)
+	  : lastCompacted(lastCompacted), lastCommitted(lastCommitted) {}

 	template <class Ar>
 	void serialize(Ar& ar) {
-		serializer(ar, lastCommitted);
+		serializer(ar, lastCompacted, lastCommitted);
 	}
 };

--- a/fdbserver/ConfigNode.actor.cpp
+++ b/fdbserver/ConfigNode.actor.cpp
@ -495,7 +495,7 @@ class ConfigNodeImpl {
 	}

 	ACTOR static Future<Void> rollforward(ConfigNodeImpl* self, ConfigFollowerRollforwardRequest req) {
-		Version lastCompactedVersion = wait(getLastCompactedVersion(self));
+		state Version lastCompactedVersion = wait(getLastCompactedVersion(self));
 		if (req.lastKnownCommitted < lastCompactedVersion) {
 			req.reply.sendError(version_already_compacted());
 			return Void();
@ -529,6 +529,10 @@ class ConfigNodeImpl {
 			                                 versionedAnnotationKey(currentGeneration.committedVersion + 1)));

 			currentGeneration.committedVersion = req.rollback.get();
+			if (req.rollback.get() < lastCompactedVersion) {
+				self->kvStore->set(
+				    KeyValueRef(lastCompactedVersionKey, BinaryWriter::toValue(req.rollback.get(), IncludeVersion())));
+			}
 			// The mutation commit loop below should persist the new generation
 			// to disk, so we don't need to do it here.
 		}
@ -536,13 +540,15 @@ class ConfigNodeImpl {
 		// committed version and rollforward version.
 		ASSERT_GT(req.mutations[0].version, currentGeneration.committedVersion);
 		wait(commitMutations(self, req.mutations, req.annotations, req.target));
+
 		req.reply.send(Void());
 		return Void();
 	}

 	ACTOR static Future<Void> getCommittedVersion(ConfigNodeImpl* self, ConfigFollowerGetCommittedVersionRequest req) {
+		state Version lastCompacted = wait(getLastCompactedVersion(self));
 		ConfigGeneration generation = wait(getGeneration(self));
-		req.reply.send(ConfigFollowerGetCommittedVersionReply{ generation.committedVersion });
+		req.reply.send(ConfigFollowerGetCommittedVersionReply{ lastCompacted, generation.committedVersion });
 		return Void();
 	}

--- a/fdbserver/DataDistribution.actor.cpp
+++ b/fdbserver/DataDistribution.actor.cpp
@ -495,7 +495,7 @@ ACTOR Future<Void> monitorBatchLimitedTime(Reference<AsyncVar<ServerDBInfo> cons
 	loop {
 		wait(delay(SERVER_KNOBS->METRIC_UPDATE_RATE));

-		state Reference<GrvProxyInfo> grvProxies(new GrvProxyInfo(db->get().client.grvProxies, false));
+		state Reference<GrvProxyInfo> grvProxies(new GrvProxyInfo(db->get().client.grvProxies));

 		choose {
 			when(wait(db->onChange())) {}
--- a/fdbserver/DataDistribution.actor.h
+++ b/fdbserver/DataDistribution.actor.h
@ -170,7 +170,7 @@ public:
 	//       intersecting shards.

 	int getNumberOfShards(UID ssID) const;
-	std::vector<KeyRange> getShardsFor(Team team);
+	std::vector<KeyRange> getShardsFor(Team team) const;
 	bool hasShards(Team team) const;

 	// The first element of the pair is either the source for non-moving shards or the destination team for in-flight
@ -180,7 +180,7 @@ public:
 	void defineShard(KeyRangeRef keys);
 	void moveShard(KeyRangeRef keys, std::vector<Team> destinationTeam);
 	void finishMove(KeyRangeRef keys);
-	void check();
+	void check() const;

 private:
 	struct OrderByTeamKey {
@ -315,29 +315,23 @@ struct StorageWiggleMetrics {

 	template <class Ar>
 	void serialize(Ar& ar) {
+		double step_total, round_total;
+		if (!ar.isDeserializing) {
+			step_total = smoothed_wiggle_duration.getTotal();
+			round_total = smoothed_round_duration.getTotal();
+		}
+		serializer(ar,
+		           last_wiggle_start,
+		           last_wiggle_finish,
+		           step_total,
+		           finished_wiggle,
+		           last_round_start,
+		           last_round_finish,
+		           round_total,
+		           finished_round);
 		if (ar.isDeserializing) {
-			double step_total, round_total;
-			serializer(ar,
-			           last_wiggle_start,
-			           last_wiggle_finish,
-			           step_total,
-			           finished_wiggle,
-			           last_round_start,
-			           last_round_finish,
-			           round_total,
-			           finished_round);
 			smoothed_round_duration.reset(round_total);
 			smoothed_wiggle_duration.reset(step_total);
-		} else {
-			serializer(ar,
-			           last_wiggle_start,
-			           last_wiggle_finish,
-			           smoothed_wiggle_duration.total,
-			           finished_wiggle,
-			           last_round_start,
-			           last_round_finish,
-			           smoothed_round_duration.total,
-			           finished_round);
 		}
 	}

@ -369,27 +363,27 @@ struct StorageWiggleMetrics {
 		});
 	}

-	StatusObject toJSON() {
+	StatusObject toJSON() const {
 		StatusObject result;
 		result["last_round_start_datetime"] = timerIntToGmt(last_round_start);
 		result["last_round_finish_datetime"] = timerIntToGmt(last_round_finish);
 		result["last_round_start_timestamp"] = last_round_start;
 		result["last_round_finish_timestamp"] = last_round_finish;
-		result["smoothed_round_seconds"] = smoothed_round_duration.estimate;
+		result["smoothed_round_seconds"] = smoothed_round_duration.smoothTotal();
 		result["finished_round"] = finished_round;

 		result["last_wiggle_start_datetime"] = timerIntToGmt(last_wiggle_start);
 		result["last_wiggle_finish_datetime"] = timerIntToGmt(last_wiggle_finish);
 		result["last_wiggle_start_timestamp"] = last_wiggle_start;
 		result["last_wiggle_finish_timestamp"] = last_wiggle_finish;
-		result["smoothed_wiggle_seconds"] = smoothed_wiggle_duration.estimate;
+		result["smoothed_wiggle_seconds"] = smoothed_wiggle_duration.smoothTotal();
 		result["finished_wiggle"] = finished_wiggle;
 		return result;
 	}
 };

 struct StorageWiggler : ReferenceCounted<StorageWiggler> {
-	DDTeamCollection* teamCollection;
+	DDTeamCollection const* teamCollection;
 	StorageWiggleMetrics metrics;

 	// data structures
@ -416,8 +410,8 @@ struct StorageWiggler : ReferenceCounted<StorageWiggler> {
 	void removeServer(const UID& serverId);
 	// update metadata and adjust priority_queue
 	void updateMetadata(const UID& serverId, const StorageMetadataType& metadata);
-	bool contains(const UID& serverId) { return pq_handles.count(serverId) > 0; }
-	bool empty() { return wiggle_pq.empty(); }
+	bool contains(const UID& serverId) const { return pq_handles.count(serverId) > 0; }
+	bool empty() const { return wiggle_pq.empty(); }
 	Optional<UID> getNextServerId();

 	// -- statistic update
@ -429,8 +423,8 @@ struct StorageWiggler : ReferenceCounted<StorageWiggler> {
 	// called when start wiggling a SS
 	Future<Void> startWiggle();
 	Future<Void> finishWiggle();
-	bool shouldStartNewRound() { return metrics.last_round_finish >= metrics.last_round_start; }
-	bool shouldFinishRound() {
+	bool shouldStartNewRound() const { return metrics.last_round_finish >= metrics.last_round_start; }
+	bool shouldFinishRound() const {
 		if (wiggle_pq.empty())
 			return true;
 		return (wiggle_pq.top().first.createdTime >= metrics.last_round_start);
--- a/fdbserver/DataDistributionTracker.actor.cpp
+++ b/fdbserver/DataDistributionTracker.actor.cpp
@ -973,7 +973,7 @@ ACTOR Future<Void> dataDistributionTracker(Reference<InitialDataDistribution> in
 	}
 }

-std::vector<KeyRange> ShardsAffectedByTeamFailure::getShardsFor(Team team) {
+std::vector<KeyRange> ShardsAffectedByTeamFailure::getShardsFor(Team team) const {
 	std::vector<KeyRange> r;
 	for (auto it = team_shards.lower_bound(std::pair<Team, KeyRange>(team, KeyRangeRef()));
 	     it != team_shards.end() && it->first == team;
@ -1106,7 +1106,7 @@ void ShardsAffectedByTeamFailure::finishMove(KeyRangeRef keys) {
 	}
 }

-void ShardsAffectedByTeamFailure::check() {
+void ShardsAffectedByTeamFailure::check() const {
 	if (EXPENSIVE_VALIDATION) {
 		for (auto t = team_shards.begin(); t != team_shards.end(); ++t) {
 			auto i = shard_teams.rangeContaining(t->second.begin);
@ -1115,8 +1115,8 @@ void ShardsAffectedByTeamFailure::check() {
 			}
 		}
 		auto rs = shard_teams.ranges();
-		for (auto i = rs.begin(); i != rs.end(); ++i)
-			for (std::vector<Team>::iterator t = i->value().first.begin(); t != i->value().first.end(); ++t)
+		for (auto i = rs.begin(); i != rs.end(); ++i) {
+			for (auto t = i->value().first.begin(); t != i->value().first.end(); ++t) {
 				if (!team_shards.count(std::make_pair(*t, i->range()))) {
 					std::string teamDesc, shards;
 					for (int k = 0; k < t->servers.size(); k++)
@ -1132,5 +1132,7 @@ void ShardsAffectedByTeamFailure::check() {
 					    .detail("Shards", shards);
 					ASSERT(false);
 				}
+			}
+		}
 	}
 }
--- a/fdbserver/PaxosConfigConsumer.actor.cpp
+++ b/fdbserver/PaxosConfigConsumer.actor.cpp
@ -27,6 +27,8 @@
 #include "fdbserver/Knobs.h"
 #include "flow/actorcompiler.h" // This must be the last #include.

+using ConfigFollowerInfo = ModelInterface<ConfigFollowerInterface>;
+
 struct CommittedVersions {
 	Version secondToLastCommitted;
 	Version lastCommitted;
@ -42,6 +44,10 @@ class GetCommittedVersionQuorum {
 	std::vector<ConfigFollowerInterface> cfis;
 	std::map<Version, std::vector<ConfigFollowerInterface>> replies;
 	std::map<Version, Version> priorVersions;
+	std::map<NetworkAddress, Version> committed;
+	// Need to know the largest compacted version on any node to avoid asking
+	// for changes that have already been compacted.
+	Version largestCompactedResponse{ 0 };
 	// Last durably committed version.
 	Version lastSeenVersion;
 	size_t totalRepliesReceived{ 0 };
@ -58,6 +64,7 @@ class GetCommittedVersionQuorum {
 	ACTOR static Future<Void> updateNode(GetCommittedVersionQuorum* self,
 	                                     CommittedVersions nodeVersion,
 	                                     CommittedVersions quorumVersion,
+	                                     Version lastCompacted,
 	                                     ConfigFollowerInterface cfi) {
 		state Version target = quorumVersion.lastCommitted;
 		if (nodeVersion.lastCommitted == target) {
@ -79,37 +86,41 @@ class GetCommittedVersionQuorum {
 				rollback = std::max(nodeVersion.lastCommitted - 1, Version{ 0 });
 			}

+			if (rollback.present()) {
+				// When a new ConfigBroadcaster is created, it may not know
+				// about the last committed version on the ConfigNodes. If
+				// compaction has occurred, this can cause change requests to
+				// be sent to nodes asking for version 0 when the node has
+				// already compacted that version, causing an error. Make sure
+				// the rollback version is at least set to the last compacted
+				// version to prevent this issue.
+				rollback = std::max(rollback.get(), lastCompacted);
+			}
+
 			// Now roll node forward to match the largest committed version of
 			// the replies.
-			// TODO: Load balance over quorum. Also need to catch
-			// error_code_process_behind and retry with the next ConfigNode in
-			// the quorum.
-			state ConfigFollowerInterface quorumCfi = self->replies[target][0];
+			state Reference<ConfigFollowerInfo> quorumCfi(new ConfigFollowerInfo(self->replies[target]));
 			try {
-				state Version lastSeenVersion = rollback.present() ? rollback.get() : nodeVersion.lastCommitted;
-				ConfigFollowerGetChangesReply reply = wait(timeoutError(
-				    quorumCfi.getChanges.getReply(ConfigFollowerGetChangesRequest{ lastSeenVersion, target }),
-				    SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
+				state Version lastSeenVersion = std::max(
+				    rollback.present() ? rollback.get() : nodeVersion.lastCommitted, self->largestCompactedResponse);
+				ConfigFollowerGetChangesReply reply =
+				    wait(timeoutError(basicLoadBalance(quorumCfi,
+				                                       &ConfigFollowerInterface::getChanges,
+				                                       ConfigFollowerGetChangesRequest{ lastSeenVersion, target }),
+				                      SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
 				wait(timeoutError(cfi.rollforward.getReply(ConfigFollowerRollforwardRequest{
 				                      rollback, nodeVersion.lastCommitted, target, reply.changes, reply.annotations }),
 				                  SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
 			} catch (Error& e) {
-				if (e.code() == error_code_version_already_compacted) {
-					TEST(true); // PaxosConfigConsumer rollforward compacted ConfigNode
-					ConfigFollowerGetSnapshotAndChangesReply reply = wait(retryBrokenPromise(
-					    quorumCfi.getSnapshotAndChanges, ConfigFollowerGetSnapshotAndChangesRequest{ target }));
-					wait(retryBrokenPromise(
-					    cfi.rollforward,
-					    ConfigFollowerRollforwardRequest{
-					        rollback, nodeVersion.lastCommitted, target, reply.changes, reply.annotations }));
-				} else if (e.code() == error_code_transaction_too_old) {
+				if (e.code() == error_code_transaction_too_old) {
 					// Seeing this trace is not necessarily a problem. There
 					// are legitimate scenarios where a ConfigNode could return
-					// transaction_too_old in response to a rollforward
-					// request.
+					// one of these errors in response to a get changes or
+					// rollforward request. The retry loop should handle this
+					// case.
 					TraceEvent(SevInfo, "ConfigNodeRollforwardError").error(e);
 				} else {
-					throw e;
+					throw;
 				}
 			}
 		}
@ -123,6 +134,8 @@ class GetCommittedVersionQuorum {
 			                      SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));

 			++self->totalRepliesReceived;
+			self->largestCompactedResponse = std::max(self->largestCompactedResponse, reply.lastCompacted);
+			state Version lastCompacted = reply.lastCompacted;
 			self->largestCommitted = std::max(self->largestCommitted, reply.lastCommitted);
 			state CommittedVersions committedVersions = CommittedVersions{ self->lastSeenVersion, reply.lastCommitted };
 			if (self->priorVersions.find(committedVersions.lastCommitted) == self->priorVersions.end()) {
@ -136,14 +149,15 @@ class GetCommittedVersionQuorum {
 				if (self->quorumVersion.canBeSet()) {
 					self->quorumVersion.send(QuorumVersion{ committedVersions, true });
 				}
-				wait(self->updateNode(self, committedVersions, self->quorumVersion.getFuture().get().versions, cfi));
+				wait(self->updateNode(
+				    self, committedVersions, self->quorumVersion.getFuture().get().versions, lastCompacted, cfi));
 			} else if (self->maxAgreement >= self->cfis.size() / 2 + 1) {
 				// A quorum of ConfigNodes agree on the latest committed version,
 				// but the node we just got a reply from is not one of them. We may
 				// need to roll it forward or back.
 				QuorumVersion quorumVersion = wait(self->quorumVersion.getFuture());
 				ASSERT(committedVersions.lastCommitted != quorumVersion.versions.lastCommitted);
-				wait(self->updateNode(self, committedVersions, quorumVersion.versions, cfi));
+				wait(self->updateNode(self, committedVersions, quorumVersion.versions, lastCompacted, cfi));
 			} else if (self->maxAgreement + (self->cfis.size() - self->totalRepliesReceived) <
 			           (self->cfis.size() / 2 + 1)) {
 				// It is impossible to reach a quorum of ConfigNodes that agree
@ -158,18 +172,25 @@ class GetCommittedVersionQuorum {
 					self->quorumVersion.send(
 					    QuorumVersion{ CommittedVersions{ largestCommittedPrior, largestCommitted }, false });
 				}
-				wait(self->updateNode(self, committedVersions, self->quorumVersion.getFuture().get().versions, cfi));
+				wait(self->updateNode(
+				    self, committedVersions, self->quorumVersion.getFuture().get().versions, lastCompacted, cfi));
 			} else {
 				// Still building up responses; don't have enough data to act on
 				// yet, so wait until we do.
 				QuorumVersion quorumVersion = wait(self->quorumVersion.getFuture());
-				wait(self->updateNode(self, committedVersions, quorumVersion.versions, cfi));
+				wait(self->updateNode(self, committedVersions, quorumVersion.versions, lastCompacted, cfi));
 			}
 		} catch (Error& e) {
 			// Count a timeout as a reply.
 			++self->totalRepliesReceived;
-			if (e.code() != error_code_timed_out) {
-				throw;
+			if (e.code() == error_code_version_already_compacted) {
+				if (self->quorumVersion.canBeSet()) {
+					self->quorumVersion.sendError(e);
+				}
+			} else if (e.code() != error_code_timed_out && e.code() != error_code_broken_promise) {
+				if (self->quorumVersion.canBeSet()) {
+					self->quorumVersion.sendError(e);
+				}
 			} else if (self->totalRepliesReceived == self->cfis.size() && self->quorumVersion.canBeSet() &&
 			           !self->quorumVersion.isError()) {
 				size_t nonTimeoutReplies =
@ -178,14 +199,10 @@ class GetCommittedVersionQuorum {
 				    });
 				if (nonTimeoutReplies >= self->cfis.size() / 2 + 1) {
 					// Make sure to trigger the quorumVersion if a timeout
-					// occurred, a quorum disagree on the committed version, and
-					// there are no more incoming responses. Note that this means
-					// that it is impossible to reach a quorum, so send back the
-					// largest committed version seen. We also need to store the
-					// interface for the timed out server for future communication
-					// attempts.
-					auto& nodes = self->replies[self->largestCommitted];
-					nodes.push_back(cfi);
+					// occurred, a quorum disagree on the committed version,
+					// and there are no more incoming responses. Note that this
+					// means that it is impossible to reach a quorum, so send
+					// back the largest committed version seen.
 					self->quorumVersion.send(
 					    QuorumVersion{ CommittedVersions{ self->lastSeenVersion, self->largestCommitted }, false });
 				} else if (!self->quorumVersion.isSet()) {
@ -219,6 +236,16 @@ public:
 		ASSERT(isReady());
 		return replies.at(quorumVersion.getFuture().get().versions.lastCommitted);
 	}
+	Version getSmallestCommitted() const {
+		if (committed.size() == cfis.size()) {
+			Version smallest = MAX_VERSION;
+			for (const auto& [key, value] : committed) {
+				smallest = std::min(smallest, value);
+			}
+			return smallest;
+		}
+		return ::invalidVersion;
+	}
 	Future<Void> complete() const { return waitForAll(actors); }
 };

@ -226,6 +253,7 @@ class PaxosConfigConsumerImpl {
 	std::vector<ConfigFollowerInterface> cfis;
 	GetCommittedVersionQuorum getCommittedVersionQuorum;
 	Version lastSeenVersion{ 0 };
+	Version compactionVersion{ 0 };
 	double pollingInterval;
 	Optional<double> compactionInterval;
 	UID id;
@ -238,13 +266,15 @@ class PaxosConfigConsumerImpl {
 		return quorumVersion.versions.lastCommitted;
 	}

+	// Periodically compact knob changes on the configuration nodes. All nodes
+	// must have received a version before it can be compacted.
 	ACTOR static Future<Void> compactor(PaxosConfigConsumerImpl* self, ConfigBroadcaster* broadcaster) {
 		if (!self->compactionInterval.present()) {
 			wait(Never());
 			return Void();
 		}
 		loop {
-			state Version compactionVersion = self->lastSeenVersion;
+			state Version compactionVersion = self->compactionVersion;
 			wait(delayJittered(self->compactionInterval.get()));
 			std::vector<Future<Void>> compactionRequests;
 			compactionRequests.reserve(compactionRequests.size());
@ -263,12 +293,14 @@ class PaxosConfigConsumerImpl {
 		loop {
 			self->resetCommittedVersionQuorum(); // TODO: This seems to fix a segfault, investigate more
 			try {
-				// TODO: Load balance
 				state Version committedVersion = wait(getCommittedVersion(self));
-				ConfigFollowerGetSnapshotAndChangesReply reply = wait(
-				    timeoutError(self->getCommittedVersionQuorum.getReadReplicas()[0].getSnapshotAndChanges.getReply(
-				                     ConfigFollowerGetSnapshotAndChangesRequest{ committedVersion }),
-				                 SERVER_KNOBS->GET_SNAPSHOT_AND_CHANGES_TIMEOUT));
+				state Reference<ConfigFollowerInfo> configNodes(
+				    new ConfigFollowerInfo(self->getCommittedVersionQuorum.getReadReplicas()));
+				ConfigFollowerGetSnapshotAndChangesReply reply =
+				    wait(timeoutError(basicLoadBalance(configNodes,
+				                                       &ConfigFollowerInterface::getSnapshotAndChanges,
+				                                       ConfigFollowerGetSnapshotAndChangesRequest{ committedVersion }),
+				                      SERVER_KNOBS->GET_SNAPSHOT_AND_CHANGES_TIMEOUT));
 				TraceEvent(SevDebug, "ConfigConsumerGotSnapshotAndChanges", self->id)
 				    .detail("SnapshotVersion", reply.snapshotVersion)
 				    .detail("SnapshotSize", reply.snapshot.size())
@ -277,6 +309,8 @@ class PaxosConfigConsumerImpl {
 				    .detail("AnnotationsSize", reply.annotations.size());
 				ASSERT_GE(committedVersion, self->lastSeenVersion);
 				self->lastSeenVersion = committedVersion;
+				Version smallestCommitted = self->getCommittedVersionQuorum.getSmallestCommitted();
+				self->compactionVersion = std::max(self->compactionVersion, smallestCommitted);
 				broadcaster->applySnapshotAndChanges(std::move(reply.snapshot),
 				                                     reply.snapshotVersion,
 				                                     reply.changes,
@ -288,7 +322,8 @@ class PaxosConfigConsumerImpl {
 			} catch (Error& e) {
 				if (e.code() == error_code_failed_to_reach_quorum) {
 					wait(self->getCommittedVersionQuorum.complete());
-				} else if (e.code() != error_code_timed_out && e.code() != error_code_broken_promise) {
+				} else if (e.code() != error_code_timed_out && e.code() != error_code_broken_promise &&
+				           e.code() != error_code_version_already_compacted && e.code() != error_code_process_behind) {
 					throw;
 				}
 				wait(delayJittered(0.1));
@ -313,13 +348,14 @@ class PaxosConfigConsumerImpl {
 				// ConfigNodes changes to 1, 1, 2, the committed version
 				// returned would be 1.
 				if (committedVersion > self->lastSeenVersion) {
-					// TODO: Load balance to avoid always hitting the
-					// node at index 0 first
 					ASSERT(self->getCommittedVersionQuorum.getReadReplicas().size() >= self->cfis.size() / 2 + 1);
-					ConfigFollowerGetChangesReply reply = wait(
-					    timeoutError(self->getCommittedVersionQuorum.getReadReplicas()[0].getChanges.getReply(
+					state Reference<ConfigFollowerInfo> configNodes(
+					    new ConfigFollowerInfo(self->getCommittedVersionQuorum.getReadReplicas()));
+					ConfigFollowerGetChangesReply reply = wait(timeoutError(
+					    basicLoadBalance(configNodes,
+					                     &ConfigFollowerInterface::getChanges,
 					                     ConfigFollowerGetChangesRequest{ self->lastSeenVersion, committedVersion }),
-					                 SERVER_KNOBS->FETCH_CHANGES_TIMEOUT));
+					    SERVER_KNOBS->FETCH_CHANGES_TIMEOUT));
 					for (const auto& versionedMutation : reply.changes) {
 						TraceEvent te(SevDebug, "ConsumerFetchedMutation", self->id);
 						te.detail("Version", versionedMutation.version)
@ -333,19 +369,20 @@ class PaxosConfigConsumerImpl {
 						}
 					}
 					self->lastSeenVersion = committedVersion;
+					Version smallestCommitted = self->getCommittedVersionQuorum.getSmallestCommitted();
+					self->compactionVersion = std::max(self->compactionVersion, smallestCommitted);
 					broadcaster->applyChanges(reply.changes,
 					                          committedVersion,
 					                          reply.annotations,
 					                          self->getCommittedVersionQuorum.getReadReplicas());
-					// TODO: Catch error_code_process_behind and retry with
-					// the next ConfigNode in the quorum.
 				} else if (committedVersion == self->lastSeenVersion) {
 					broadcaster->applyChanges({}, -1, {}, self->getCommittedVersionQuorum.getReadReplicas());
 				}
 				wait(delayJittered(self->pollingInterval));
 			} catch (Error& e) {
 				if (e.code() == error_code_version_already_compacted || e.code() == error_code_timed_out ||
-				    e.code() == error_code_failed_to_reach_quorum) {
+				    e.code() == error_code_failed_to_reach_quorum || e.code() == error_code_version_already_compacted ||
+				    e.code() == error_code_process_behind) {
 					TEST(true); // PaxosConfigConsumer get version_already_compacted error
 					if (e.code() == error_code_failed_to_reach_quorum) {
 						try {
@ -365,7 +402,7 @@ class PaxosConfigConsumerImpl {
 					self->resetCommittedVersionQuorum();
 					continue;
 				} else {
-					throw e;
+					throw;
 				}
 			}
 			try {
--- a/fdbserver/Ratekeeper.actor.cpp
+++ b/fdbserver/Ratekeeper.actor.cpp
@ -511,8 +511,8 @@ void Ratekeeper::updateRate(RatekeeperLimits* limits) {
 	int64_t limitingStorageQueueStorageServer = 0;
 	int64_t worstDurabilityLag = 0;

-	std::multimap<double, StorageQueueInfo*> storageTpsLimitReverseIndex;
-	std::multimap<int64_t, StorageQueueInfo*> storageDurabilityLagReverseIndex;
+	std::multimap<double, StorageQueueInfo const*> storageTpsLimitReverseIndex;
+	std::multimap<int64_t, StorageQueueInfo const*> storageDurabilityLagReverseIndex;

 	std::map<UID, limitReason_t> ssReasons;

@ -522,7 +522,7 @@ void Ratekeeper::updateRate(RatekeeperLimits* limits) {

 	// Look at each storage server's write queue and local rate, compute and store the desired rate ratio
 	for (auto i = storageQueueInfo.begin(); i != storageQueueInfo.end(); ++i) {
-		auto& ss = i->value;
+		auto const& ss = i->value;
 		if (!ss.valid || (remoteDC.present() && ss.locality.dcId() == remoteDC))
 			continue;
 		++sscount;
@ -779,7 +779,7 @@ void Ratekeeper::updateRate(RatekeeperLimits* limits) {
 	int64_t worstStorageQueueTLog = 0;
 	int tlcount = 0;
 	for (auto& it : tlogQueueInfo) {
-		auto& tl = it.value;
+		auto const& tl = it.value;
 		if (!tl.valid)
 			continue;
 		++tlcount;
--- a/fdbserver/StorageCache.actor.cpp
+++ b/fdbserver/StorageCache.actor.cpp
@ -2223,6 +2223,25 @@ ACTOR Future<Void> storageCacheServer(StorageServerInterface ssi,
 			when(ReplyPromise<KeyValueStoreType> reply = waitNext(ssi.getKeyValueStoreType.getFuture())) {
 				ASSERT(false);
 			}
+
+			when(GetMappedKeyValuesRequest req = waitNext(ssi.getMappedKeyValues.getFuture())) { ASSERT(false); }
+			when(WaitMetricsRequest req = waitNext(ssi.waitMetrics.getFuture())) { ASSERT(false); }
+			when(SplitMetricsRequest req = waitNext(ssi.splitMetrics.getFuture())) { ASSERT(false); }
+			when(GetStorageMetricsRequest req = waitNext(ssi.getStorageMetrics.getFuture())) { ASSERT(false); }
+			when(ReadHotSubRangeRequest req = waitNext(ssi.getReadHotRanges.getFuture())) { ASSERT(false); }
+			when(SplitRangeRequest req = waitNext(ssi.getRangeSplitPoints.getFuture())) { ASSERT(false); }
+			when(GetKeyValuesStreamRequest req = waitNext(ssi.getKeyValuesStream.getFuture())) { ASSERT(false); }
+			when(ChangeFeedStreamRequest req = waitNext(ssi.changeFeedStream.getFuture())) { ASSERT(false); }
+			when(OverlappingChangeFeedsRequest req = waitNext(ssi.overlappingChangeFeeds.getFuture())) {
+				// Simulate endpoint not found so that the requester will try another endpoint
+				// This is a workaround to the fact that storage servers do not have an easy way to enforce this
+				// request goes only to other storage servers, and in simulation we manage to trigger this behavior
+				req.reply.sendError(broken_promise());
+			}
+			when(ChangeFeedPopRequest req = waitNext(ssi.changeFeedPop.getFuture())) { ASSERT(false); }
+			when(ChangeFeedVersionUpdateRequest req = waitNext(ssi.changeFeedVersionUpdate.getFuture())) {
+				ASSERT(false);
+			}
 			when(wait(actors.getResult())) {}
 		}
 	}
--- a/fdbserver/TagThrottler.actor.cpp
+++ b/fdbserver/TagThrottler.actor.cpp
@ -38,7 +38,7 @@ class RkTagThrottleCollection : NonCopyable {

 		RkTagThrottleData() : clientRate(CLIENT_KNOBS->TAG_THROTTLE_SMOOTHING_WINDOW) {}

-		double getTargetRate(Optional<double> requestRate) {
+		double getTargetRate(Optional<double> requestRate) const {
 			if (limits.tpsRate == 0.0 || !requestRate.present() || requestRate.get() == 0.0 || !rateSet) {
 				return limits.tpsRate;
 			} else {
@ -538,7 +538,7 @@ public:
 	int64_t manualThrottleCount() const { return throttledTags.manualThrottleCount(); }
 	bool isAutoThrottlingEnabled() const { return autoThrottlingEnabled; }

-	Future<Void> tryAutoThrottleTag(StorageQueueInfo& ss, int64_t storageQueue, int64_t storageDurabilityLag) {
+	Future<Void> tryAutoThrottleTag(StorageQueueInfo const& ss, int64_t storageQueue, int64_t storageDurabilityLag) {
 		// NOTE: we just keep it simple and don't differentiate write-saturation and read-saturation at the moment. In
 		// most of situation, this works. More indicators besides queue size and durability lag could be investigated in
 		// the future
@ -591,7 +591,7 @@ int64_t TagThrottler::manualThrottleCount() const {
 bool TagThrottler::isAutoThrottlingEnabled() const {
 	return impl->isAutoThrottlingEnabled();
 }
-Future<Void> TagThrottler::tryAutoThrottleTag(StorageQueueInfo& ss,
+Future<Void> TagThrottler::tryAutoThrottleTag(StorageQueueInfo const& ss,
                                              int64_t storageQueue,
                                              int64_t storageDurabilityLag) {
 	return impl->tryAutoThrottleTag(ss, storageQueue, storageDurabilityLag);
--- a/fdbserver/TagThrottler.h
+++ b/fdbserver/TagThrottler.h
@ -38,5 +38,5 @@ public:
 	uint32_t busyWriteTagCount() const;
 	int64_t manualThrottleCount() const;
 	bool isAutoThrottlingEnabled() const;
-	Future<Void> tryAutoThrottleTag(StorageQueueInfo&, int64_t storageQueue, int64_t storageDurabilityLag);
+	Future<Void> tryAutoThrottleTag(StorageQueueInfo const&, int64_t storageQueue, int64_t storageDurabilityLag);
 };
--- a/fdbserver/storageserver.actor.cpp
+++ b/fdbserver/storageserver.actor.cpp
@ -159,6 +159,7 @@ struct AddingShard : NonCopyable {

 	struct StorageServer* server;
 	Version transferredVersion;
+	Version fetchVersion;

 	// To learn more details of the phase transitions, see function fetchKeys(). The phases below are sorted in
 	// chronological order and do not go back.
@ -179,7 +180,7 @@ struct AddingShard : NonCopyable {
 	// When fetchKeys "partially completes" (splits an adding shard in two), this is used to construct the left half
 	AddingShard(AddingShard* prev, KeyRange const& keys)
 	  : keys(keys), fetchClient(prev->fetchClient), server(prev->server), transferredVersion(prev->transferredVersion),
-	    phase(prev->phase) {}
+	    fetchVersion(prev->fetchVersion), phase(prev->phase) {}
 	~AddingShard() {
 		if (!fetchComplete.isSet())
 			fetchComplete.send(Void());
@ -4548,6 +4549,7 @@ ACTOR Future<Void> fetchKeys(StorageServer* data, AddingShard* shard) {
 		// Get the history
 		state int debug_getRangeRetries = 0;
 		state int debug_nextRetryToLog = 1;
+		state Error lastError();

 		// FIXME: The client cache does not notice when servers are added to a team. To read from a local storage server
 		// we must refresh the cache manually.
@ -4555,8 +4557,27 @@ ACTOR Future<Void> fetchKeys(StorageServer* data, AddingShard* shard) {

 		loop {
 			state Transaction tr(data->cx);
-			fetchVersion = data->version.get();
-
+			// fetchVersion = data->version.get();
+			// A quick fix:
+			// By default, we use data->version as the fetchVersion.
+			// In the case where dest SS falls far behind src SS, we use GRV as the fetchVersion instead of
+			// data->version, and then the dest SS waits for catching up the fetchVersion outside the
+			// fetchKeysParallelismLock.
+			// For example, consider dest SS falls far behind src SS.
+			// At iteration 0, dest SS selects its version as fetchVersion,
+			// but cannot read src SS and result in error_code_transaction_too_old.
+			// Due to error_code_transaction_too_old, dest SS starts iteration 1.
+			// At iteration 1, dest SS selects GRV as fetchVersion and (suppose) can read the data from src SS.
+			// Then dest SS waits its version catch up with this GRV version and write the data to disk.
+			// Note that dest SS waits outside the fetchKeysParallelismLock.
+			if (lastError.code() == error_code_transaction_too_old) {
+				Version grvVersion = wait(tr.getRawReadVersion());
+				fetchVersion = std::max(grvVersion, data->version.get());
+			} else {
+				fetchVersion = std::max(shard->fetchVersion, data->version.get());
+			}
+			ASSERT(fetchVersion >= shard->fetchVersion); // at this point, shard->fetchVersion is the last fetchVersion
+			shard->fetchVersion = fetchVersion;
 			TraceEvent(SevDebug, "FetchKeysUnblocked", data->thisServerID)
 			    .detail("FKID", interval.pairID)
 			    .detail("Version", fetchVersion);
@ -4633,6 +4654,7 @@ ACTOR Future<Void> fetchKeys(StorageServer* data, AddingShard* shard) {
 				    e.code() != error_code_process_behind && e.code() != error_code_server_overloaded) {
 					throw;
 				}
+				lastError = e;
 				if (nfk == keys.begin) {
 					TraceEvent("FKBlockFail", data->thisServerID)
 					    .errorUnsuppressed(e)
@ -4705,10 +4727,11 @@ ACTOR Future<Void> fetchKeys(StorageServer* data, AddingShard* shard) {
 		// we have written)

 		state Future<Void> fetchDurable = data->durableVersion.whenAtLeast(data->storageVersion() + 1);
+		state Future<Void> dataArrive = data->version.whenAtLeast(fetchVersion);
 		wait(dispatchChangeFeeds(data, fetchKeysID, keys, fetchVersion));

 		holdingFKPL.release();
-		wait(fetchDurable);
+		wait(dataArrive && fetchDurable);

 		TraceEvent(SevDebug, "FKAfterFinalCommit", data->thisServerID)
 		    .detail("FKID", interval.pairID)
@ -4726,7 +4749,7 @@ ACTOR Future<Void> fetchKeys(StorageServer* data, AddingShard* shard) {
 		TraceEvent(SevDebug, "FKUpdateBatch", data->thisServerID).detail("FKID", interval.pairID);

 		shard->phase = AddingShard::Waiting;
-
+		ASSERT(data->version.get() >= fetchVersion);
 		// Choose a transferredVersion.  This choice and timing ensure that
 		//   * The transferredVersion can be mutated in versionedData
 		//   * The transferredVersion isn't yet committed to storage (so we can write the availability status change)
@ -4746,6 +4769,9 @@ ACTOR Future<Void> fetchKeys(StorageServer* data, AddingShard* shard) {
 		    .detail("StorageVersion", data->storageVersion());
 		validate(data);

+		// the minimal version in updates must be larger than fetchVersion
+		ASSERT(shard->updates.empty() || shard->updates[0].version > fetchVersion);
+
 		// Put the updates that were collected during the FinalCommit phase into the batch at the transferredVersion.
 		// Eager reads will be done for them by update(), and the mutations will come back through
 		// AddingShard::addMutations and be applied to versionedMap and mutationLog as normal. The lie about their
@ -4835,11 +4861,15 @@ ACTOR Future<Void> fetchKeys(StorageServer* data, AddingShard* shard) {
 };

 AddingShard::AddingShard(StorageServer* server, KeyRangeRef const& keys)
-  : keys(keys), server(server), transferredVersion(invalidVersion), phase(WaitPrevious) {
+  : keys(keys), server(server), transferredVersion(invalidVersion), fetchVersion(invalidVersion), phase(WaitPrevious) {
 	fetchClient = fetchKeys(server, this);
 }

 void AddingShard::addMutation(Version version, bool fromFetch, MutationRef const& mutation) {
+	if (version <= fetchVersion) {
+		return;
+	}
+
 	server->counters.logicalBytesMoveInOverhead += mutation.expectedSize();
 	if (mutation.type == mutation.ClearRange) {
 		ASSERT(keys.begin <= mutation.param1 && mutation.param2 <= keys.end);
@ -7619,8 +7649,7 @@ ACTOR Future<Void> replaceInterface(StorageServer* self, StorageServerInterface

 	loop {
 		state Future<Void> infoChanged = self->db->onChange();
-		state Reference<CommitProxyInfo> commitProxies(
-		    new CommitProxyInfo(self->db->get().client.commitProxies, false));
+		state Reference<CommitProxyInfo> commitProxies(new CommitProxyInfo(self->db->get().client.commitProxies));
 		choose {
 			when(GetStorageServerRejoinInfoReply _rep =
 			         wait(commitProxies->size()
--- a/flow/Arena.cpp
+++ b/flow/Arena.cpp
@ -676,6 +676,7 @@ TEST_CASE("/flow/Arena/DefaultBoostHash") {

 TEST_CASE("/flow/Arena/Size") {
 	Arena a;
+	int fastSize, slowSize;

 	// Size estimates are accurate unless dependencies are added to an Arena via another Arena
 	// handle which points to a non-root node.
@ -683,10 +684,14 @@ TEST_CASE("/flow/Arena/Size") {
 	// Note that the ASSERT argument order matters, the estimate must be calculated first as
 	// the full accurate calculation will update the estimate
 	makeString(40, a);
-	ASSERT_EQ(a.getSize(true), a.getSize());
+	fastSize = a.getSize(true);
+	slowSize = a.getSize();
+	ASSERT_EQ(fastSize, slowSize);

 	makeString(700, a);
-	ASSERT_EQ(a.getSize(true), a.getSize());
+	fastSize = a.getSize(true);
+	slowSize = a.getSize();
+	ASSERT_EQ(fastSize, slowSize);

 	// Copy a at a point where it points to a large block with room for block references
 	Arena b = a;
@ -697,35 +702,51 @@ TEST_CASE("/flow/Arena/Size") {

 	makeString(1000, a);
 	makeString(1000, a);
-	ASSERT_EQ(a.getSize(true), a.getSize());
+	fastSize = a.getSize(true);
+	slowSize = a.getSize();
+	ASSERT_EQ(fastSize, slowSize);

 	Standalone<StringRef> s = makeString(500);
 	a.dependsOn(s.arena());
-	ASSERT_EQ(a.getSize(true), a.getSize());
+	fastSize = a.getSize(true);
+	slowSize = a.getSize();
+	ASSERT_EQ(fastSize, slowSize);

 	Standalone<StringRef> s2 = makeString(500);
 	a.dependsOn(s2.arena());
-	ASSERT_EQ(a.getSize(true), a.getSize());
+	fastSize = a.getSize(true);
+	slowSize = a.getSize();
+	ASSERT_EQ(fastSize, slowSize);

 	// Add a dependency to b, which will fit in b's root and update b's size estimate
 	Standalone<StringRef> s3 = makeString(100);
 	b.dependsOn(s3.arena());
-	ASSERT_EQ(b.getSize(true), b.getSize());
+	fastSize = b.getSize(true);
+	slowSize = b.getSize();
+	ASSERT_EQ(fastSize, slowSize);

 	// But now a's size estimate is out of date because the new reference in b's root is still
 	// in a's tree
-	ASSERT_LT(a.getSize(true), a.getSize());
+	fastSize = a.getSize(true);
+	slowSize = a.getSize();
+	ASSERT_LT(fastSize, slowSize);

 	// Now that a full size calc has been done on a, the estimate is up to date.
-	ASSERT_EQ(a.getSize(true), a.getSize());
+	fastSize = a.getSize(true);
+	slowSize = a.getSize();
+	ASSERT_EQ(fastSize, slowSize);

 	// Add a dependency to c, which will NOT fit in c's root, so it will be added to a new
 	// root for c and that root will not be in a's tree so a's size and estimate remain
 	// unchanged and the same.  The size and estimate of c will also match.
 	Standalone<StringRef> s4 = makeString(100);
 	c.dependsOn(s4.arena());
-	ASSERT_EQ(c.getSize(true), c.getSize());
-	ASSERT_EQ(a.getSize(true), a.getSize());
+	fastSize = c.getSize(true);
+	slowSize = c.getSize();
+	ASSERT_EQ(fastSize, slowSize);
+	fastSize = a.getSize(true);
+	slowSize = a.getSize();
+	ASSERT_EQ(fastSize, slowSize);

 	return Void();
 }
--- a/flow/FastAlloc.cpp
+++ b/flow/FastAlloc.cpp
@ -24,6 +24,7 @@
 #include "flow/Trace.h"
 #include "flow/Error.h"
 #include "flow/Knobs.h"
+#include "flow/UnitTest.h"
 #include "flow/crc32c.h"
 #include "flow/flow.h"

@ -588,3 +589,20 @@ template class FastAllocator<2048>;
 template class FastAllocator<4096>;
 template class FastAllocator<8192>;
 template class FastAllocator<16384>;
+
+#ifdef USE_JEMALLOC
+#include <jemalloc/jemalloc.h>
+TEST_CASE("/jemalloc/4k_aligned_usable_size") {
+	for (int i = 1; i < 4; ++i) {
+		auto* ptr = aligned_alloc(4096, i * 4096);
+		try {
+			ASSERT_EQ(malloc_usable_size(ptr), i * 4096);
+		} catch (...) {
+			aligned_free(ptr);
+			throw;
+		}
+		aligned_free(ptr);
+	}
+	return Void();
+}
+#endif
--- a/flow/config.h.cmake
+++ b/flow/config.h.cmake
@ -28,4 +28,5 @@
 # endif
 # cmakedefine DTRACE_PROBES
 # cmakedefine HAS_ALIGNED_ALLOC
+# cmakedefine USE_JEMALLOC
 #endif // WIN32
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -138,7 +138,6 @@ if(WITH_PYTHON)
  add_fdb_test(TEST_FILES fast/CycleAndLock.toml)
  add_fdb_test(TEST_FILES fast/CycleTest.toml)
  add_fdb_test(TEST_FILES fast/ChangeFeeds.toml)
-  add_fdb_test(TEST_FILES fast/PhysicalShardMove.toml)
  add_fdb_test(TEST_FILES fast/DataLossRecovery.toml)
  add_fdb_test(TEST_FILES fast/EncryptionOps.toml)
  add_fdb_test(TEST_FILES fast/FuzzApiCorrectness.toml)
@ -181,8 +180,12 @@ if(WITH_PYTHON)
  add_fdb_test(TEST_FILES fast/WriteDuringRead.toml)
  add_fdb_test(TEST_FILES fast/WriteDuringReadClean.toml)
  add_fdb_test(TEST_FILES noSim/RandomUnitTests.toml UNIT)
-  if (SSD_ROCKSDB_EXPERIMENTAL)
-    add_fdb_test(TEST_FILES noSim/KeyValueStoreRocksDBTest.toml IGNORE) # re-enable as needed for RocksDB. Breaks correctness tests if RocksDB is disabled.
+  if (WITH_ROCKSDB_EXPERIMENTAL)
+    add_fdb_test(TEST_FILES noSim/KeyValueStoreRocksDBTest.toml)
+    add_fdb_test(TEST_FILES fast/PhysicalShardMove.toml)
+  else()
+    add_fdb_test(TEST_FILES noSim/KeyValueStoreRocksDBTest.toml IGNORE)
+    add_fdb_test(TEST_FILES fast/PhysicalShardMove.toml IGNORE)
  endif()
  add_fdb_test(TEST_FILES rare/CheckRelocation.toml)
  add_fdb_test(TEST_FILES rare/ClogUnclog.toml)
--- a/tests/TestRunner/tmp_cluster.py
+++ b/tests/TestRunner/tmp_cluster.py
@ -18,7 +18,8 @@ class TempCluster:
        assert self.build_dir.is_dir(), "{} is not a directory".format(build_dir)
        tmp_dir = self.build_dir.joinpath(
            "tmp",
-            "".join(choice(LocalCluster.valid_letters_for_secret) for i in range(16)),
+            "".join(choice(LocalCluster.valid_letters_for_secret)
+                    for i in range(16)),
        )
        tmp_dir.mkdir(parents=True)
        self.cluster = LocalCluster(
@ -75,7 +76,8 @@ if __name__ == "__main__":
        help="FDB build directory",
        required=True,
    )
-    parser.add_argument("cmd", metavar="COMMAND", nargs="+", help="The command to run")
+    parser.add_argument("cmd", metavar="COMMAND",
+                        nargs="+", help="The command to run")
    parser.add_argument(
        "--process-number",
        "-p",
@ -83,6 +85,11 @@ if __name__ == "__main__":
        type=int,
        default=1,
    )
+    parser.add_argument(
+        '--disable-log-dump',
+        help='Do not dump cluster log on error',
+        action="store_true"
+    )
    args = parser.parse_args()
    errcode = 1
    with TempCluster(args.build_dir, args.process_number) as cluster:
@ -128,7 +135,7 @@ if __name__ == "__main__":
            errcode = 1
            break

-        if errcode:
+        if errcode and not args.disable_log_dump:
            for etc_file in glob.glob(os.path.join(cluster.etc, "*")):
                print(">>>>>>>>>>>>>>>>>>>> Contents of {}:".format(etc_file))
                with open(etc_file, "r") as f:
--- a/tests/fast/CacheTest.toml
+++ b/tests/fast/CacheTest.toml
@ -12,5 +12,5 @@ testTitle = 'Cycle'
    testName = 'Cycle'
    transactionsPerSecond = 2500.0
    testDuration = 10.0
-    expectedRate = 0.80
+    expectedRate = 0.01
    keyPrefix = 'foo/'