Merge branch 'main' of github.com:apple/foundationdb into getsizetenant

2022-10-14 16:32:38 -07:00 · 2022-10-14 16:32:38 -07:00 · 0f9da9d1ad
parent a24ab2d4fa 914dfd7438
commit 0f9da9d1ad
255 changed files with 5554 additions and 1929 deletions
--- a/bindings/c/test/apitester/TesterApiWorkload.cpp
+++ b/bindings/c/test/apitester/TesterApiWorkload.cpp
@ -166,6 +166,7 @@ void ApiWorkload::populateDataTx(TTaskFct cont, std::optional<int> tenantId) {
 	execTransaction(
 	    [kvPairs](auto ctx) {
 		    for (const fdb::KeyValue& kv : *kvPairs) {
 			    ctx->tx().addReadConflictRange(kv.key, kv.key + fdb::Key(1, '\x00'));
 			    ctx->tx().set(kv.key, kv.value);
 		    }
 		    ctx->commit();
@ -257,6 +258,7 @@ void ApiWorkload::randomInsertOp(TTaskFct cont, std::optional<int> tenantId) {
 	execTransaction(
 	    [kvPairs](auto ctx) {
 		    for (const fdb::KeyValue& kv : *kvPairs) {
 			    ctx->tx().addReadConflictRange(kv.key, kv.key + fdb::Key(1, '\x00'));
 			    ctx->tx().set(kv.key, kv.value);
 		    }
 		    ctx->commit();
@ -279,6 +281,7 @@ void ApiWorkload::randomClearOp(TTaskFct cont, std::optional<int> tenantId) {
 	execTransaction(
 	    [keys](auto ctx) {
 		    for (const auto& key : *keys) {
 			    ctx->tx().addReadConflictRange(key, key + fdb::Key(1, '\x00'));
 			    ctx->tx().clear(key);
 		    }
 		    ctx->commit();
@ -300,6 +303,7 @@ void ApiWorkload::randomClearRangeOp(TTaskFct cont, std::optional<int> tenantId)
 	}
 	execTransaction(
 	    [begin, end](auto ctx) {
 		    ctx->tx().addReadConflictRange(begin, end);
 		    ctx->tx().clearRange(begin, end);
 		    ctx->commit();
 	    },
--- a/bindings/c/test/apitester/TesterAtomicOpsCorrectnessWorkload.cpp
+++ b/bindings/c/test/apitester/TesterAtomicOpsCorrectnessWorkload.cpp
@ -160,6 +160,7 @@ private:
 		execTransaction(
 		    // 1. Set the key to val1
 		    [key, val1](auto ctx) {
 			    ctx->tx().addReadConflictRange(key, key + fdb::Key(1, '\x00'));
 			    ctx->tx().set(key, val1);
 			    ctx->commit();
 		    },
@ -296,6 +297,7 @@ private:
 		    // 1. Set the key to initial value
 		    [key, val](auto ctx) {
 			    ctx->tx().set(key, val);
 			    ctx->tx().addReadConflictRange(key, key + fdb::Key(1, '\x00'));
 			    ctx->commit();
 		    },
 		    [this, key, val, cont]() {
--- a/bindings/c/test/apitester/TesterCorrectnessWorkload.cpp
+++ b/bindings/c/test/apitester/TesterCorrectnessWorkload.cpp
@ -50,6 +50,7 @@ private:
 		execTransaction(
 		    [kvPairs](auto ctx) {
 			    for (const fdb::KeyValue& kv : *kvPairs) {
 				    ctx->tx().addReadConflictRange(kv.key, kv.key + fdb::Key(1, '\x00'));
 				    ctx->tx().set(kv.key, kv.value);
 			    }
 			    ctx->commit();
--- a/bindings/c/test/apitester/TesterTransactionExecutor.cpp
+++ b/bindings/c/test/apitester/TesterTransactionExecutor.cpp
@ -77,10 +77,11 @@ public:
 	                       int retryLimit,
 	                       std::string bgBasePath,
 	                       std::optional<fdb::BytesRef> tenantName,
-	                       bool transactional)
+	                       bool transactional,
 	                       bool restartOnTimeout)
 	  : executor(executor), startFct(startFct), contAfterDone(cont), scheduler(scheduler), retryLimit(retryLimit),
 	    txState(TxState::IN_PROGRESS), commitCalled(false), bgBasePath(bgBasePath), tenantName(tenantName),
-	    transactional(transactional) {
+	    transactional(transactional), restartOnTimeout(restartOnTimeout) {
 		databaseCreateErrorInjected = executor->getOptions().injectDatabaseCreateErrors &&
 		                              Random::get().randomBool(executor->getOptions().databaseCreateErrorRatio);
 		if (databaseCreateErrorInjected) {
@ -177,7 +178,8 @@ public:
 		ASSERT(!onErrorFuture);
-		if (databaseCreateErrorInjected && canBeInjectedDatabaseCreateError(err.code())) {
+		if ((databaseCreateErrorInjected && canBeInjectedDatabaseCreateError(err.code())) ||
 		    (restartOnTimeout && err.code() == error_code_transaction_timed_out)) {
 			// Failed to create a database because of failure injection
 			// Restart by recreating the transaction in a valid database
 			recreateAndRestartTransaction();
@ -235,7 +237,11 @@ protected:
 		fdb::Error err = onErrorFuture.error();
 		onErrorFuture = {};
 		if (err) {
-			transactionFailed(err);
+			if (restartOnTimeout && err.code() == error_code_transaction_timed_out) {
 				recreateAndRestartTransaction();
 			} else {
 				transactionFailed(err);
 			}
 		} else {
 			restartTransaction();
 		}
@ -359,6 +365,9 @@ protected:
 	// Accessed on initialization and in ON_ERROR state only (no need for mutex)
 	bool databaseCreateErrorInjected;
 	// Restart the transaction automatically on timeout errors
 	const bool restartOnTimeout;
 	// The tenant that we will run this transaction in
 	const std::optional<fdb::BytesRef> tenantName;
@ -378,9 +387,17 @@ public:
 	                           int retryLimit,
 	                           std::string bgBasePath,
 	                           std::optional<fdb::BytesRef> tenantName,
-	                           bool transactional)
+	                           bool transactional,
-	  : TransactionContextBase(executor, startFct, cont, scheduler, retryLimit, bgBasePath, tenantName, transactional) {
+	                           bool restartOnTimeout)
-	}
+	  : TransactionContextBase(executor,
 	                           startFct,
 	                           cont,
 	                           scheduler,
 	                           retryLimit,
 	                           bgBasePath,
 	                           tenantName,
 	                           transactional,
 	                           restartOnTimeout) {}
 protected:
 	void doContinueAfter(fdb::Future f, TTaskFct cont, bool retryOnError) override {
@ -456,9 +473,17 @@ public:
 	                        int retryLimit,
 	                        std::string bgBasePath,
 	                        std::optional<fdb::BytesRef> tenantName,
-	                        bool transactional)
+	                        bool transactional,
-	  : TransactionContextBase(executor, startFct, cont, scheduler, retryLimit, bgBasePath, tenantName, transactional) {
+	                        bool restartOnTimeout)
-	}
+	  : TransactionContextBase(executor,
 	                           startFct,
 	                           cont,
 	                           scheduler,
 	                           retryLimit,
 	                           bgBasePath,
 	                           tenantName,
 	                           transactional,
 	                           restartOnTimeout) {}
 protected:
 	void doContinueAfter(fdb::Future f, TTaskFct cont, bool retryOnError) override {
@ -470,7 +495,7 @@ protected:
 		lock.unlock();
 		try {
 			f.then([this](fdb::Future f) { futureReadyCallback(f, this); });
-		} catch (std::runtime_error& err) {
+		} catch (std::exception& err) {
 			lock.lock();
 			callbackMap.erase(f);
 			lock.unlock();
@ -482,7 +507,7 @@ protected:
 		try {
 			AsyncTransactionContext* txCtx = (AsyncTransactionContext*)param;
 			txCtx->onFutureReady(f);
-		} catch (std::runtime_error& err) {
+		} catch (std::exception& err) {
 			fmt::print("Unexpected exception in callback {}\n", err.what());
 			abort();
 		} catch (...) {
@ -544,7 +569,7 @@ protected:
 		try {
 			AsyncTransactionContext* txCtx = (AsyncTransactionContext*)param;
 			txCtx->onErrorReady(f);
-		} catch (std::runtime_error& err) {
+		} catch (std::exception& err) {
 			fmt::print("Unexpected exception in callback {}\n", err.what());
 			abort();
 		} catch (...) {
@ -673,7 +698,8 @@ public:
 	void execute(TOpStartFct startFct,
 	             TOpContFct cont,
 	             std::optional<fdb::BytesRef> tenantName,
-	             bool transactional) override {
+	             bool transactional,
 	             bool restartOnTimeout) override {
 		try {
 			std::shared_ptr<ITransactionContext> ctx;
 			if (options.blockOnFutures) {
@ -684,7 +710,8 @@ public:
 				                                                   options.transactionRetryLimit,
 				                                                   bgBasePath,
 				                                                   tenantName,
-				                                                   transactional);
+				                                                   transactional,
 				                                                   restartOnTimeout);
 			} else {
 				ctx = std::make_shared<AsyncTransactionContext>(this,
 				                                                startFct,
@ -693,7 +720,8 @@ public:
 				                                                options.transactionRetryLimit,
 				                                                bgBasePath,
 				                                                tenantName,
-				                                                transactional);
+				                                                transactional,
 				                                                restartOnTimeout);
 			}
 			startFct(ctx);
 		} catch (...) {
--- a/bindings/c/test/apitester/TesterTransactionExecutor.h
+++ b/bindings/c/test/apitester/TesterTransactionExecutor.h
@ -116,7 +116,8 @@ public:
 	virtual void execute(TOpStartFct start,
 	                     TOpContFct cont,
 	                     std::optional<fdb::BytesRef> tenantName,
-	                     bool transactional) = 0;
+	                     bool transactional,
 	                     bool restartOnTimeout) = 0;
 	virtual fdb::Database selectDatabase() = 0;
 	virtual std::string getClusterFileForErrorInjection() = 0;
 	virtual const TransactionExecutorOptions& getOptions() = 0;
--- a/bindings/c/test/apitester/TesterWorkload.cpp
+++ b/bindings/c/test/apitester/TesterWorkload.cpp
@ -20,6 +20,7 @@
 #include "TesterWorkload.h"
 #include "TesterUtil.h"
 #include "fdb_c_options.g.h"
 #include "fmt/core.h"
 #include "test/apitester/TesterScheduler.h"
 #include <cstdlib>
@ -82,6 +83,8 @@ WorkloadBase::WorkloadBase(const WorkloadConfig& config)
  : manager(nullptr), tasksScheduled(0), numErrors(0), clientId(config.clientId), numClients(config.numClients),
    failed(false), numTxCompleted(0), numTxStarted(0), inProgress(false) {
 	maxErrors = config.getIntOption("maxErrors", 10);
 	minTxTimeoutMs = config.getIntOption("minTxTimeoutMs", 0);
 	maxTxTimeoutMs = config.getIntOption("maxTxTimeoutMs", 0);
 	workloadId = fmt::format("{}{}", config.name, clientId);
 }
@ -129,9 +132,15 @@ void WorkloadBase::doExecute(TOpStartFct startFct,
 	}
 	tasksScheduled++;
 	numTxStarted++;
-	manager->txExecutor->execute(
+	manager->txExecutor->execute( //
-	    startFct,
+	    [this, transactional, cont, startFct](auto ctx) {
-	    [this, startFct, cont, failOnError](fdb::Error err) {
+		    if (transactional && maxTxTimeoutMs > 0) {
 			    int timeoutMs = Random::get().randomInt(minTxTimeoutMs, maxTxTimeoutMs);
 			    ctx->tx().setOption(FDB_TR_OPTION_TIMEOUT, timeoutMs);
 		    }
 		    startFct(ctx);
 	    },
 	    [this, cont, failOnError](fdb::Error err) {
 		    numTxCompleted++;
 		    if (err.code() == error_code_success) {
 			    cont();
@ -148,7 +157,8 @@ void WorkloadBase::doExecute(TOpStartFct startFct,
 		    scheduledTaskDone();
 	    },
 	    tenant,
-	    transactional);
+	    transactional,
 	    maxTxTimeoutMs > 0);
 }
 void WorkloadBase::info(const std::string& msg) {
--- a/bindings/c/test/apitester/TesterWorkload.h
+++ b/bindings/c/test/apitester/TesterWorkload.h
@ -166,6 +166,12 @@ protected:
 	// The maximum number of errors before stoppoing the workload
 	int maxErrors;
 	// The timeout (in ms) automatically set for all transactions to a random value
 	// in the range [minTxTimeoutMs, maxTxTimeoutMs]
 	// If maxTxTimeoutMs <= 0, no timeout is set
 	int minTxTimeoutMs;
 	int maxTxTimeoutMs;
 	// Workload identifier, consisting of workload name and client ID
 	std::string workloadId;
--- a/bindings/c/test/apitester/fdb_c_api_tester.cpp
+++ b/bindings/c/test/apitester/fdb_c_api_tester.cpp
@ -429,7 +429,7 @@ bool runWorkloads(TesterOptions& options) {
 		}
 		workloadMgr.run();
 		return !workloadMgr.failed();
-	} catch (const std::runtime_error& err) {
+	} catch (const std::exception& err) {
 		fmt::print(stderr, "ERROR: {}\n", err.what());
 		return false;
 	}
@ -461,7 +461,7 @@ int main(int argc, char** argv) {
 		fdb_check(fdb::network::stop());
 		network_thread.join();
-	} catch (const std::runtime_error& err) {
+	} catch (const std::exception& err) {
 		fmt::print(stderr, "ERROR: {}\n", err.what());
 		retCode = 1;
 	}
--- a/bindings/c/test/apitester/tests/CApiCancelTransactionWithTimeout.toml
+++ b/bindings/c/test/apitester/tests/CApiCancelTransactionWithTimeout.toml
@ -0,0 +1,25 @@
 [[test]]
 title = 'Cancel Transactions with Timeouts'
 multiThreaded = true
 buggify = true
 minFdbThreads = 2
 maxFdbThreads = 8
 minDatabases = 2
 maxDatabases = 8
 minClientThreads = 2
 maxClientThreads = 8
 minClients = 2
 maxClients = 8
    [[test.workload]]
    name = 'CancelTransaction'
    minKeyLength = 1
    maxKeyLength = 64
    minValueLength = 1
    maxValueLength = 1000
    maxKeysPerTransaction = 50
    initialSize = 100
    numRandomOperations = 100
    readExistingKeysRatio = 0.9
    minTxTimeoutMs = 10
    maxTxTimeoutMs = 10000
--- a/bindings/c/test/apitester/tests/CApiCorrectnessWithTimeout.toml
+++ b/bindings/c/test/apitester/tests/CApiCorrectnessWithTimeout.toml
@ -0,0 +1,33 @@
 [[test]]
 title = 'API Correctness with Timeouts'
 multiThreaded = true
 buggify = true
 minFdbThreads = 2
 maxFdbThreads = 8
 minDatabases = 2
 maxDatabases = 8
 minClientThreads = 2
 maxClientThreads = 8
 minClients = 2
 maxClients = 8
    [[test.workload]]
    name = 'ApiCorrectness'
    minKeyLength = 1
    maxKeyLength = 64
    minValueLength = 1
    maxValueLength = 1000
    maxKeysPerTransaction = 50
    initialSize = 100
    numRandomOperations = 100
    readExistingKeysRatio = 0.9
    minTxTimeoutMs = 100
    maxTxTimeoutMs = 10000
    [[test.workload]]
    name = 'AtomicOpsCorrectness'
    initialSize = 0
    numRandomOperations = 100
    minTxTimeoutMs = 100
    maxTxTimeoutMs = 10000
--- a/bindings/c/test/mako/mako.cpp
+++ b/bindings/c/test/mako/mako.cpp
@ -1199,6 +1199,8 @@ void usage() {
 	printf("%-24s %s\n", "    --flatbuffers", "Use flatbuffers");
 	printf("%-24s %s\n", "    --streaming", "Streaming mode: all (default), iterator, small, medium, large, serial");
 	printf("%-24s %s\n", "    --disable_ryw", "Disable snapshot read-your-writes");
 	printf(
 	    "%-24s %s\n", "    --disable_client_bypass", "Disable client-bypass forcing mako to use multi-version client");
 	printf("%-24s %s\n", "    --json_report=PATH", "Output stats to the specified json file (Default: mako.json)");
 	printf("%-24s %s\n",
 	       "    --bg_file_path=PATH",
--- a/bindings/go/src/fdb/generated.go
+++ b/bindings/go/src/fdb/generated.go
@ -392,6 +392,11 @@ func (o DatabaseOptions) SetTransactionIncludePortInAddress() error {
 	return o.setOpt(505, nil)
 }
 // Set a random idempotency id for all transactions. See the transaction option description for more information.
 func (o DatabaseOptions) SetTransactionAutomaticIdempotency() error {
 	return o.setOpt(506, nil)
 }
 // Allows ``get`` operations to read from sections of keyspace that have become unreadable because of versionstamp operations. This sets the ``bypass_unreadable`` option of each transaction created by this database. See the transaction option description for more information.
 func (o DatabaseOptions) SetTransactionBypassUnreadable() error {
 	return o.setOpt(700, nil)
@ -551,6 +556,18 @@ func (o TransactionOptions) SetSizeLimit(param int64) error {
 	return o.setOpt(503, int64ToBytes(param))
 }
 // Associate this transaction with this ID for the purpose of checking whether or not this transaction has already committed. Must be at least 16 bytes and less than 256 bytes.
 //
 // Parameter: Unique ID
 func (o TransactionOptions) SetIdempotencyId(param string) error {
 	return o.setOpt(504, []byte(param))
 }
 // Automatically assign a random 16 byte idempotency id for this transaction. Prevents commits from failing with ``commit_unknown_result``. WARNING: If you are also using the multiversion client or transaction timeouts, if either cluster_version_changed or transaction_timed_out was thrown during a commit, then that commit may have already succeeded or may succeed in the future.
 func (o TransactionOptions) SetAutomaticIdempotency() error {
 	return o.setOpt(505, nil)
 }
 // Snapshot read operations will see the results of writes done in the same transaction. This is the default behavior.
 func (o TransactionOptions) SetSnapshotRywEnable() error {
 	return o.setOpt(600, nil)
--- a/cmake/AddFdbTest.cmake
+++ b/cmake/AddFdbTest.cmake
@ -56,7 +56,7 @@ endfunction()
 #   all these tests in serialized order and within the same directory. This is
 #   useful for restart tests
 function(add_fdb_test)
-  set(options UNIT IGNORE)
+  set(options UNIT IGNORE LONG_RUNNING)
  set(oneValueArgs TEST_NAME TIMEOUT)
  set(multiValueArgs TEST_FILES)
  cmake_parse_arguments(ADD_FDB_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")
@ -106,6 +106,9 @@ function(add_fdb_test)
  if(ADD_FDB_TEST_UNIT)
    message(STATUS
      "ADDING UNIT TEST ${assigned_id} ${test_name}")
  elseif(ADD_FDB_TEST_LONG_RUNNING)
    message(STATUS
      "ADDING LONG RUNNING TEST ${assigned_id} ${test_name}")
  else()
    message(STATUS
      "ADDING SIMULATOR TEST ${assigned_id} ${test_name}")
@ -150,9 +153,15 @@ function(add_fdb_test)
    endif()
  endif()
  # set variables used for generating test packages
-  set(TEST_NAMES ${TEST_NAMES} ${test_name} PARENT_SCOPE)
+  if(ADD_FDB_TEST_LONG_RUNNING)
-  set(TEST_FILES_${test_name} ${ADD_FDB_TEST_TEST_FILES} PARENT_SCOPE)
+    set(LONG_RUNNING_TEST_NAMES ${LONG_RUNNING_TEST_NAMES} ${test_name} PARENT_SCOPE)
-  set(TEST_TYPE_${test_name} ${test_type} PARENT_SCOPE)
+    set(LONG_RUNNING_TEST_FILES_${test_name} ${ADD_FDB_TEST_TEST_FILES} PARENT_SCOPE)
    set(LONG_RUNNING_TEST_TYPE_${test_name} ${test_type} PARENT_SCOPE)
  else()
    set(TEST_NAMES ${TEST_NAMES} ${test_name} PARENT_SCOPE)
    set(TEST_FILES_${test_name} ${ADD_FDB_TEST_TEST_FILES} PARENT_SCOPE)
    set(TEST_TYPE_${test_name} ${test_type} PARENT_SCOPE)
  endif()
 endfunction()
 if(NOT WIN32)
@ -167,14 +176,21 @@ endif()
 # - OUT_DIR the directory where files will be staged
 # - CONTEXT the type of correctness package being built (e.g. 'valgrind correctness')
 function(stage_correctness_package)
  set(options LONG_RUNNING)
  set(oneValueArgs OUT_DIR CONTEXT OUT_FILES)
-  cmake_parse_arguments(STAGE "" "${oneValueArgs}" "" "${ARGN}")
+  set(multiValueArgs TEST_LIST)
  cmake_parse_arguments(STAGE "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")
  file(MAKE_DIRECTORY ${STAGE_OUT_DIR}/bin)
-  string(LENGTH "${CMAKE_SOURCE_DIR}/tests/" base_length)
+  foreach(test IN LISTS STAGE_TEST_LIST)
  foreach(test IN LISTS TEST_NAMES)
    if((${test} MATCHES ${TEST_PACKAGE_INCLUDE}) AND
        (NOT ${test} MATCHES ${TEST_PACKAGE_EXCLUDE}))
-      foreach(file IN LISTS TEST_FILES_${test})
+      string(LENGTH "${CMAKE_SOURCE_DIR}/tests/" base_length)
      if(STAGE_LONG_RUNNING)
        set(TEST_FILES_PREFIX "LONG_RUNNING_TEST_FILES")
      else()
        set(TEST_FILES_PREFIX "TEST_FILES")
      endif()
      foreach(file IN LISTS ${TEST_FILES_PREFIX}_${test})
        string(SUBSTRING ${file} ${base_length} -1 rel_out_file)
        set(out_file ${STAGE_OUT_DIR}/tests/${rel_out_file})
        list(APPEND test_files ${out_file})
@ -265,7 +281,7 @@ function(create_correctness_package)
    return()
  endif()
  set(out_dir "${CMAKE_BINARY_DIR}/correctness")
-  stage_correctness_package(OUT_DIR ${out_dir} CONTEXT "correctness" OUT_FILES package_files)
+  stage_correctness_package(OUT_DIR ${out_dir} CONTEXT "correctness" OUT_FILES package_files TEST_LIST "${TEST_NAMES}")
  set(tar_file ${CMAKE_BINARY_DIR}/packages/correctness-${FDB_VERSION}.tar.gz)
  add_custom_command(
    OUTPUT ${tar_file}
@ -294,13 +310,47 @@ function(create_correctness_package)
  add_dependencies(package_tests_u package_tests)
 endfunction()
 function(create_long_running_correctness_package)
  if(WIN32)
    return()
  endif()
  set(out_dir "${CMAKE_BINARY_DIR}/long_running_correctness")
  stage_correctness_package(OUT_DIR ${out_dir} CONTEXT "long running correctness" OUT_FILES package_files TEST_LIST "${LONG_RUNNING_TEST_NAMES}" LONG_RUNNING)
  set(tar_file ${CMAKE_BINARY_DIR}/packages/long-running-correctness-${FDB_VERSION}.tar.gz)
  add_custom_command(
    OUTPUT ${tar_file}
    DEPENDS ${package_files}
            ${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/correctnessTest.sh
            ${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/correctnessTimeout.sh
    COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/correctnessTest.sh
                                    ${out_dir}/joshua_test
    COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/correctnessTimeout.sh
                                    ${out_dir}/joshua_timeout
    COMMAND ${CMAKE_COMMAND} -E tar cfz ${tar_file} ${package_files}
                                                    ${out_dir}/joshua_test
                                                    ${out_dir}/joshua_timeout
    WORKING_DIRECTORY ${out_dir}
    COMMENT "Package long running correctness archive"
    )
  add_custom_target(package_long_running_tests ALL DEPENDS ${tar_file})
  add_dependencies(package_long_running_tests strip_only_fdbserver TestHarness)
  set(unversioned_tar_file "${CMAKE_BINARY_DIR}/packages/long_running_correctness.tar.gz")
  add_custom_command(
    OUTPUT "${unversioned_tar_file}"
    DEPENDS "${tar_file}"
    COMMAND ${CMAKE_COMMAND} -E copy "${tar_file}" "${unversioned_tar_file}"
    COMMENT "Copy long running correctness package to ${unversioned_tar_file}")
  add_custom_target(package_long_running_tests_u DEPENDS "${unversioned_tar_file}")
  add_dependencies(package_long_running_tests_u package_long_running_tests)
 endfunction()
 function(create_valgrind_correctness_package)
  if(WIN32)
    return()
  endif()
  if(USE_VALGRIND)
    set(out_dir "${CMAKE_BINARY_DIR}/valgrind_correctness")
-    stage_correctness_package(OUT_DIR ${out_dir} CONTEXT "valgrind correctness" OUT_FILES package_files)
+    stage_correctness_package(OUT_DIR ${out_dir} CONTEXT "valgrind correctness" OUT_FILES package_files TEST_LIST "${TEST_NAMES}")
    set(tar_file ${CMAKE_BINARY_DIR}/packages/valgrind-${FDB_VERSION}.tar.gz)
    add_custom_command(
      OUTPUT ${tar_file}
--- a/cmake/ConfigureCompiler.cmake
+++ b/cmake/ConfigureCompiler.cmake
@ -26,6 +26,7 @@ env_set(TRACE_PC_GUARD_INSTRUMENTATION_LIB "" STRING "Path to a library containi
 env_set(PROFILE_INSTR_GENERATE OFF BOOL "If set, build FDB as an instrumentation build to generate profiles")
 env_set(PROFILE_INSTR_USE "" STRING "If set, build FDB with profile")
 env_set(FULL_DEBUG_SYMBOLS OFF BOOL "Generate full debug symbols")
 env_set(ENABLE_LONG_RUNNING_TESTS OFF BOOL "Add a long running tests package")
 set(USE_SANITIZER OFF)
 if(USE_ASAN OR USE_VALGRIND OR USE_MSAN OR USE_TSAN OR USE_UBSAN)
--- a/design/dynamic-knobs.md
+++ b/design/dynamic-knobs.md
@ -128,6 +128,35 @@ set_knob(db, 'min_trace_severity', '10', None, 'description')
 set_knob(db, 'min_trace_severity', '20', 'az-1', 'description')
 ```
 ### CLI Usage
 Users may also utilize `fdbcli` to set and update knobs dynamically. Usage is as follows
 ```
 setknob <knob_name> <knob_value> [config_class]
 getknob <knob_name> [config_class]
 ```
 Where `knob_name` is an existing knob, `knob_value` is the desired value to set the knob and `config_class` is the optional configuration class. Furthermore, `setknob` may be combined within a `begin\commit` to update multiple knobs atomically. If using this option, a description must follow `commit` otherwise a prompt will be shown asking for a description. The description must be non-empty. An example follows.
 ```
 begin
 setknob min_trace_severity 30
 setknob tracing_udp_listener_addr 192.168.0.1
 commit "fdbcli change"
 ```
 Users may only combine knob configuration changes with other knob configuration changes in the same transaction. For example, the following is not permitted and will raise an error.
 ```
 begin
 set foo bar
 setknob max_metric_size 1000
 commit "change"
 ```
 Specifically, `set, clear, get, getrange, clearrange` cannot be combined in any transaction with a `setknob` or `getknob`.
 If using an individual `setknob` without being inside a `begin\commit` block, then `fdbcli` will prompt for a description as well.
 #### Type checking
 Knobs have implicit types attached to them when defined. For example, the knob `tracing_udp_listener_addr` is set to `"127.0.0.1"` as so the type is string. If a user invokes `setknob` on this knob with an incorrect value that is not a string, the transaction will fail. 
 ### Disable the Configuration Database
 The configuration database includes both client and server changes and is
--- a/design/idempotency_ids.md
+++ b/design/idempotency_ids.md
@ -0,0 +1,106 @@
 # Goals
 The main goal is to make transactions safer and easier to reason about. New users should get a "just works" experience. One of the main selling points of FoundationDB is that it solves the hard distributed systems problems for you, so that you only need to concern yourself with your business logic. Non-idempotent transactions is probably the biggest "gotcha" that users need to be made aware of -- and they won't discover it organically. In order to achieve this "just works" experience I believe it is necessary to make automatic idempotency have low-enough overhead so that we can enable it by default.
 As an intermediate goal, I plan to introduce this feature disabled by default. The long-term plan is to make it the default.
 # API
 Introduce a new transaction option `IDEMPOTENCY_ID`, which will be validated to be at most 255 bytes.
 Add 
 ```
 FDBFuture* fdb_transaction_commit_result(FDBTransaction* tr, uint8_t const* idempotency_id, int idempotency_id_length)
 ```
 , which can be used to determine the result of a commit that failed with `transaction_timed_out`.
 Commits for transactions with idempotency ids would not fail with `commit_unknown_result`, but in (extremely) rare cases could fail with a new error that clients are expected to handle by restarting the process.
 # Background
 - https://forums.foundationdb.org/t/automatically-providing-transaction-idempotency/1873
 - https://github.com/apple/foundationdb/issues/1321
 - https://docs.google.com/document/d/19LDQuurg4Tt8eUcig3-8g2VOG9ZpQvtWrp_691RqMo8/edit#
 # Data model
 Commit proxies would combine idempotency IDs for transactions within a batch. The purpose of this is to try to limit the number of distinct database keys that need to be written, and to lessen the number of extra mutation bytes for idempotency IDs.
 ## Key format
 ```
 \xff\x02/idmp/${commit_version_big_endian (8 bytes)}${high_order_byte_of_batch_index (1 byte)}
 ```
 - `commit_version_big_endian` the commit version stored big-endian so that the cleaner worker can find the oldest idempotency ids easily, and also so that "unknown_committed" transactions can recover their commit version.
 - `high_order_byte_of_batch_index` this limits us to 256 idempotency ids per value
 ## Value format
 ```
 ${protocol_version}(${n (1 byte)}${idempotency_id (n bytes)}${low_order_byte_of_batch_index})*
 ```
 The batch index for each idempotency id can be reconstructed from the high order byte and low order bytes stored in the key and value, respectively. This is necessary for an "unknown_committed" transaction to recover their full version stamp. Batch index is a `short int`, i.e. 2 bytes.
 # Cleaning up old idempotency ids
 After learning the result of an attempt to commit a transaction with an
 idempotency id, the client may inform the cluster that it's no longer interested
 in that id and the cluster can reclaim the space used to store the idempotency
 id. The happy-path reply to a CommitTransactionRequest will say which proxy this
 request should be sent to, and all idempotency ids for a database key will be
 sent to the same proxy so that it can clear the key once it receives all of
 them. The first proxy will also periodically clean up the oldest idempotency ids, based on a policy determined by two knobs. One knob will control the minimum lifetime of an idempotency id (i.e. don't delete anything younger than 1 day), and the other will control the target byte size of the idempotency keys (e.g. keep 100 MB of idempotency keys around).
 # Commit protocol
 The basic change will be that a commit future will not become ready until the client confirms whether or not the commit succeeded. (`transaction_timed_out` is an unfortunate exception here)
 The idempotency id will be automatically added to both the read conflict range and the write conflict range, before makeSelfConflicting is called so that we don't duplicate that work. We can reuse the `\xff/SC/` self-conflicting key space here.
 ## Did I already commit?
 The first version of this scans the keys in the idmp key range to check for the idempotency ids. The plan for the next version is the following:
 Storage servers would have a new endpoint that clients can use to ask if the transaction for an idempotency id already committed. Clients would need to check every possible shard that their idempotency id may have ended up in.
 Storage servers would maintain a map from idempotency id to versionstamp in memory, and clients would need to contact all storage servers responsible for the `[\xff\x02/idmp/, \xff\x02/idmp0)` keyspace to be sure of their commit status. Assuming an idempotency id + versionstamp is 16 + 10 bytes, and that the lifetime of most idempotency ids is less than 1 second, that corresponds to at least 260 MB of memory on the storage server at 1,000,000 transactions/s, which seems acceptable. Let's double that to account for things like hash table load factor and allocating extra memory to ensure amortized constant time insertion. Still seems acceptable. We probably want to use a hashtable with open addressing to avoid frequent heap allocations. I _think_ [swisstables](https://abseil.io/about/design/swisstables) would work here.
 When a transaction learns that it did in fact commit, the commit future succeeds, and the versionstamp gets filled with the original, successful transaction's versionstamp. After the successful commit is reported, it's no longer necessary to store its idempotency ID. The client will send an RPC to the cleaner role indicating that it can remove this idempotency ID.
 If a transaction learns that it did in fact _not_ commit, the commit future will fail with an error that indicates that the transaction did not commit. Perhaps `transaction_too_old`.
 If a transaction learns that it has been in-flight so long that its idempotency id could have been expired, then it will fail with a new, non-retriable error. It is expected that this will be rare enough that crashing the application is acceptable.
 # Considerations
 - Additional storage space on the cluster. This can be controlled directly via an idempotency id target bytes knob/config.
 - Potential write hot spot.
 # Multi-version client
 The multi-version client will generate its own idempotency id for a transaction and manage its lifecycle. It will duplicate the logic in NativeApi to achieve the same guarantees. As part of this change we will also ensure that the previous commit attempt is no longer in-flight before allowing the commit future to become ready. This will fix a potential "causal-write-risky" issue if a commit attempt fails with `cluster_version_changed`.
 # Experiments
 - Initial experiments show that this is about 1% overhead for the worst case workload which is transactions that only update a single key.
 ```
 Single replication redwood cluster with dedicated ebs disks for tlog and storage. All tests saturated the tlog disk's IOPs.
 volume_type: gp3
 volume_size: 384
 iops: 9000
 throughput: 250
 $ bin/mako --mode run --rows 1000000 -x u1 -p 8 -t 8 --cluster=$HOME/fdb.cluster  --seconds 100 # already warm, but quiesced
 Baseline:
 19714.67 TPS
 "user space" method of writing idempotency id -> versionstamp in every transaction:
 13831.00 TPS
 "combine idempotency ids in transaction batch" method:
 19515.62 TPS
 ```
--- a/documentation/sphinx/source/command-line-interface.rst
+++ b/documentation/sphinx/source/command-line-interface.rst
@ -203,6 +203,13 @@ The ``get`` command fetches the value of a given key. Its syntax is ``get <KEY>`
 Note that :ref:`characters can be escaped <cli-escaping>` when specifying keys (or values) in ``fdbcli``.
 getknob
 -------
 The ``getknob`` command fetches the value of a given knob that has been populated by ``setknob``. Its syntax is ``getknob <KNOBNAME> [CONFIGCLASS]``. It displays the value of ``<KNOBNAME>`` if ``<KNOBNAME>`` is present in the database and ``not found`` otherwise.
 Note that :ref:`characters can be escaped <cli-escaping>` when specifying keys (or values) in ``fdbcli``.
 getrange
 --------
@ -395,6 +402,13 @@ The ``setclass`` command can be used to change the :ref:`process class <guidelin
 The available process classes are ``unset``, ``storage``, ``transaction``, ``resolution``, ``grv_proxy``, ``commit_proxy``, ``master``, ``test``, ``unset``, ``stateless``, ``log``, ``router``, ``cluster_controller``, ``fast_restore``, ``data_distributor``, ``coordinator``, ``ratekeeper``, ``storage_cache``, ``backup``, and ``default``.
 setknob
 -------
 The ``setknob`` command can be used to set knobs dynamically. Its syntax is ``setknob <KNOBNAME> <KNOBVALUE> [CONFIGCLASS]``. If not present in a ``begin\commit`` block, the CLI will prompt for a description of the change. 
 Note that :ref:`characters can be escaped <cli-escaping>` when specifying keys (or values) in ``fdbcli``.
 sleep
 -----
--- a/fdbcli/fdbcli.actor.cpp
+++ b/fdbcli/fdbcli.actor.cpp
@ -499,11 +499,14 @@ void initHelp() {
 	                "transaction, and are automatically committed for you. By explicitly beginning a transaction, "
 	                "successive operations are all performed as part of a single transaction.\n\nTo commit the "
 	                "transaction, use the commit command. To discard the transaction, use the reset command.");
-	helpMap["commit"] = CommandHelp("commit",
+	helpMap["commit"] = CommandHelp("commit [description]",
 	                                "commit the current transaction",
 	                                "Any sets or clears executed after the start of the current transaction will be "
 	                                "committed to the database. On success, the committed version number is displayed. "
-	                                "If commit fails, the error is displayed and the transaction must be retried.");
+	                                "If commit fails, the error is displayed and the transaction must be retried. The "
 	                                "command optionally allows for a description in case the transaction targets the "
 	                                "configuration database. If no description is provided in the command, a prompt "
 	                                "will be shown asking for a relevant description of the configuration change");
 	helpMap["clear"] = CommandHelp(
 	    "clear <KEY>",
 	    "clear a key from the database",
@ -552,6 +555,14 @@ void initHelp() {
 	helpMap["set"] = CommandHelp("set <KEY> <VALUE>",
 	                             "set a value for a given key",
 	                             "If KEY is not already present in the database, it will be created." ESCAPINGKV);
 	helpMap["setknob"] = CommandHelp("setknob <KEY> <VALUE> [CONFIG_CLASS]",
 	                                 "updates a knob to specified value",
 	                                 "setknob will prompt for a descrption of the changes" ESCAPINGKV);
 	helpMap["getknob"] = CommandHelp(
 	    "getknob <KEY> [CONFIG_CLASS]", "gets the value of the specified knob", "CONFIG_CLASS is optional." ESCAPINGK);
 	helpMap["option"] = CommandHelp(
 	    "option <STATE> <OPTION> <ARG>",
 	    "enables or disables an option",
@ -1050,12 +1061,17 @@ Future<T> stopNetworkAfter(Future<T> what) {
 	}
 }
 enum TransType { Db = 0, Config, None };
 ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterConnectionFile> ccf) {
 	state LineNoise& linenoise = *plinenoise;
 	state bool intrans = false;
 	state TransType transtype = TransType::None;
 	state bool isCommitDesc = false;
 	state Database localDb;
 	state Reference<IDatabase> db;
 	state Reference<IDatabase> configDb;
 	state Reference<ITenant> tenant;
 	state Optional<TenantName> tenantName;
 	state Optional<TenantMapEntry> tenantEntry;
@ -1064,6 +1080,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterCo
 	state const Reference<ITenant> managementTenant;
 	state Reference<ITransaction> tr;
 	state Reference<ITransaction> config_tr;
 	state Transaction trx;
 	state bool writeMode = false;
@ -1085,6 +1102,8 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterCo
 			printf("Using cluster file `%s'.\n", ccf->getLocation().c_str());
 		}
 		db = API->createDatabase(opt.clusterFile.c_str());
 		configDb = API->createDatabase(opt.clusterFile.c_str());
 		configDb->setOption(FDBDatabaseOptions::USE_CONFIG_DATABASE);
 	} catch (Error& e) {
 		fprintf(stderr, "ERROR: %s (%d)\n", e.what(), e.code());
 		printf("Unable to connect to cluster from `%s'\n", ccf->getLocation().c_str());
@ -1442,23 +1461,46 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterCo
 					} else {
 						activeOptions = FdbOptions(globalOptions);
 						options = &activeOptions;
 						getTransaction(db, tenant, tr, options, false);
 						intrans = true;
 						transtype = TransType::None;
 						getTransaction(db, tenant, tr, options, false);
 						printf("Transaction started\n");
 					}
 					continue;
 				}
 				if (tokencmp(tokens[0], "commit")) {
-					if (tokens.size() != 1) {
+					if (tokens.size() > 2) {
 						printUsage(tokens[0]);
 						is_error = true;
 					} else if (!intrans) {
 						fprintf(stderr, "ERROR: No active transaction\n");
 						is_error = true;
 					} else {
-						wait(commitTransaction(tr));
+						if (isCommitDesc && tokens.size() == 1) {
 							// prompt for description and add to txn
 							state Optional<std::string> raw;
 							while (!raw.present() || raw.get().empty()) {
 								fprintf(stdout,
 								        "Please set a description for the change. Description must be non-empty.\n");
 								state Optional<std::string> rawline =
 								    wait(makeInterruptable(linenoise.read("description: ")));
 								raw = rawline;
 							}
 							std::string line = raw.get();
 							config_tr->set("\xff\xff/description"_sr, line);
 						}
 						if (transtype == TransType::Db) {
 							wait(commitTransaction(tr));
 						} else {
 							if (tokens.size() > 1) {
 								config_tr->set("\xff\xff/description"_sr, tokens[1]);
 							}
 							wait(commitTransaction(config_tr));
 						}
 						isCommitDesc = false;
 						intrans = false;
 						transtype = TransType::None;
 						options = &globalOptions;
 					}
@ -1481,10 +1523,16 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterCo
 						fprintf(stderr, "ERROR: No active transaction\n");
 						is_error = true;
 					} else {
-						tr->reset();
+						if (transtype == TransType::Config) {
-						activeOptions = FdbOptions(globalOptions);
+							config_tr->reset();
-						options = &activeOptions;
+						} else {
-						options->apply(tr);
+							tr->reset();
 							activeOptions = FdbOptions(globalOptions);
 							options = &activeOptions;
 							options->apply(tr);
 						}
 						isCommitDesc = false;
 						transtype = TransType::None;
 						printf("Transaction reset\n");
 					}
 					continue;
@ -1510,6 +1558,15 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterCo
 						printUsage(tokens[0]);
 						is_error = true;
 					} else {
 						if (intrans) {
 							if (transtype == TransType::None) {
 								transtype = TransType::Db;
 							} else if (transtype == TransType::Config) {
 								fprintf(stderr, "ERROR: Cannot perform get in configuration transaction\n");
 								is_error = true;
 								continue;
 							}
 						}
 						state ThreadFuture<Optional<Value>> valueF =
 						    getTransaction(db, tenant, tr, options, intrans)->get(tokens[1]);
 						Optional<Standalone<StringRef>> v = wait(makeInterruptable(safeThreadFutureToFuture(valueF)));
@ -1618,7 +1675,17 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterCo
 					} else {
 						state int limit;
 						bool valid = true;
-
+						if (intrans) {
 							if (transtype == TransType::None) {
 								transtype = TransType::Db;
 							} else if (transtype == TransType::Config) {
 								fprintf(
 								    stderr,
 								    "ERROR: Cannot perform getrange or getrangekeys in configuration transaction\n");
 								is_error = true;
 								continue;
 							}
 						}
 						if (tokens.size() == 4) {
 							// INT_MAX is 10 digits; rather than
 							// worrying about overflow we'll just cap
@ -1707,6 +1774,15 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterCo
 						printUsage(tokens[0]);
 						is_error = true;
 					} else {
 						if (intrans) {
 							if (transtype == TransType::None) {
 								transtype = TransType::Db;
 							} else if (transtype == TransType::Config) {
 								fprintf(stderr, "ERROR: Cannot perform set in configuration transaction\n");
 								is_error = true;
 								continue;
 							}
 						}
 						getTransaction(db, tenant, tr, options, intrans);
 						tr->set(tokens[1], tokens[2]);
@ -1717,6 +1793,91 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterCo
 					continue;
 				}
 				if (tokencmp(tokens[0], "setknob")) {
 					if (tokens.size() > 4 || tokens.size() < 3) {
 						printUsage(tokens[0]);
 						is_error = true;
 					} else {
 						if (intrans) {
 							if (transtype == TransType::None) {
 								transtype = TransType::Config;
 							} else if (transtype == TransType::Db) {
 								fprintf(stderr, "ERROR: Cannot perform setknob in database transaction\n");
 								is_error = true;
 								isCommitDesc = false;
 								continue;
 							}
 						}
 						Tuple t;
 						if (tokens.size() == 4) {
 							t.append(tokens[3]);
 						} else {
 							t.appendNull();
 						}
 						t.append(tokens[1]);
 						getTransaction(configDb, tenant, config_tr, options, intrans);
 						config_tr->set(t.pack(), tokens[2]);
 						if (!intrans) {
 							// prompt for description and add to txn
 							state Optional<std::string> raw_desc;
 							while (!raw_desc.present() || raw_desc.get().empty()) {
 								fprintf(stdout,
 								        "Please set a description for the change. Description must be non-empty\n");
 								state Optional<std::string> rawline_knob =
 								    wait(makeInterruptable(linenoise.read("description: ")));
 								raw_desc = rawline_knob;
 							}
 							std::string line = raw_desc.get();
 							config_tr->set("\xff\xff/description"_sr, line);
 							wait(commitTransaction(config_tr));
 						} else {
 							isCommitDesc = true;
 						}
 					}
 					continue;
 				}
 				if (tokencmp(tokens[0], "getknob")) {
 					if (tokens.size() > 3 || tokens.size() < 2) {
 						printUsage(tokens[0]);
 						is_error = true;
 					} else {
 						if (intrans) {
 							if (transtype == TransType::None) {
 								transtype = TransType::Config;
 							} else if (transtype == TransType::Db) {
 								fprintf(stderr, "ERROR: Cannot perform getknob in database transaction\n");
 								is_error = true;
 								continue;
 							}
 						}
 						Tuple t;
 						if (tokens.size() == 2) {
 							t.appendNull();
 						} else {
 							t.append(tokens[2]);
 						}
 						t.append(tokens[1]);
 						state ThreadFuture<Optional<Value>> valueF_knob =
 						    getTransaction(configDb, tenant, config_tr, options, intrans)->get(t.pack());
 						Optional<Standalone<StringRef>> v =
 						    wait(makeInterruptable(safeThreadFutureToFuture(valueF_knob)));
 						std::string knob_class = printable(tokens[1]);
 						if (tokens.size() == 3) {
 							std::string config_class = (" in configuration class " + printable(tokens[2]));
 							knob_class += config_class;
 						}
 						if (v.present())
 							printf("`%s' is `%s'\n",
 							       knob_class.c_str(),
 							       Tuple::tupleToString(Tuple::unpack(v.get())).c_str());
 						else
 							printf("`%s' is not found\n", knob_class.c_str());
 					}
 					continue;
 				}
 				if (tokencmp(tokens[0], "clear")) {
 					if (!writeMode) {
 						fprintf(stderr, "ERROR: writemode must be enabled to set or clear keys in the database.\n");
@ -1728,6 +1889,15 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterCo
 						printUsage(tokens[0]);
 						is_error = true;
 					} else {
 						if (intrans) {
 							if (transtype == TransType::None) {
 								transtype = TransType::Db;
 							} else if (transtype == TransType::Config) {
 								fprintf(stderr, "ERROR: Cannot perform clear in configuration transaction\n");
 								is_error = true;
 								continue;
 							}
 						}
 						getTransaction(db, tenant, tr, options, intrans);
 						tr->clear(tokens[1]);
@ -1749,6 +1919,15 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterCo
 						printUsage(tokens[0]);
 						is_error = true;
 					} else {
 						if (intrans) {
 							if (transtype == TransType::None) {
 								transtype = TransType::Db;
 							} else if (transtype == TransType::Config) {
 								fprintf(stderr, "ERROR: Cannot perform clearrange in configuration transaction\n");
 								is_error = true;
 								continue;
 							}
 						}
 						getTransaction(db, tenant, tr, options, intrans);
 						tr->clear(KeyRangeRef(tokens[1], tokens[2]));
@ -1928,7 +2107,6 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterCo
 			}
 			TraceEvent(SevInfo, "CLICommandLog", randomID).detail("Command", line).detail("IsError", is_error);
 		} catch (Error& e) {
 			if (e.code() == error_code_operation_cancelled) {
 				throw;
--- a/fdbcli/tests/fdbcli_tests.py
+++ b/fdbcli/tests/fdbcli_tests.py
@ -334,6 +334,57 @@ def consistencycheck(logger):
    assert output3 == consistency_check_on_output
@enable_logging()
 def knobmanagement(logger):
    # this test will set knobs and verify that the knobs are properly set
    # must use begin/commit to avoid prompt for description
    # Incorrect arguments
    output = run_fdbcli_command('setknob')
    assert output == "Usage: setknob <KEY> <VALUE> [CONFIG_CLASS]"
    output = run_fdbcli_command('setknob', 'min_trace_severity')
    assert output == "Usage: setknob <KEY> <VALUE> [CONFIG_CLASS]"
    output = run_fdbcli_command('getknob')
    assert output == "Usage: getknob <KEY> [CONFIG_CLASS]"
    logger.debug("incorrect args passed")
    # Invalid knob name
    err = run_fdbcli_command_and_get_error('begin; setknob dummy_knob 20; commit \"fdbcli change\";')
    logger.debug("err is: {}".format(err))
    assert len(err) > 0
    logger.debug("invalid knob name passed")
    # Invalid type for knob
    err = run_fdbcli_command_and_get_error('begin; setknob min_trace_severity dummy-text; commit \"fdbcli change\";')
    logger.debug("err is: {}".format(err))
    assert len(err) > 0
    logger.debug("invalid knob type passed")
    # Verifying we can't do a normal set, clear, get, getrange, clearrange 
    # with a setknob
    err = run_fdbcli_command_and_get_error('writemode on; begin; set foo bar; setknob max_metric_size 1000; commit;')
    logger.debug("err is: {}".format(err))
    assert len(err) > 0
    err = run_fdbcli_command_and_get_error('writemode on; begin; clear foo; setknob max_metric_size 1000; commit')
    logger.debug("err is: {}".format(err))
    assert len(err) > 0 
    # Various setknobs and verified by getknob
    output = run_fdbcli_command('begin; setknob min_trace_severity 30; setknob max_metric_size 1000; \
                                setknob tracing_udp_listener_addr 192.168.0.1;                       \
                                setknob tracing_sample_rate 0.3;                                     \
                                commit \"This is an fdbcli test for knobs\";')
    assert "Committed" in output
    output = run_fdbcli_command('getknob', 'min_trace_severity')
    assert r"`min_trace_severity' is `30'" == output
    output = run_fdbcli_command('getknob', 'max_metric_size')
    assert r"`max_metric_size' is `1000'" == output
    output = run_fdbcli_command('getknob', 'tracing_udp_listener_addr')
    assert r"`tracing_udp_listener_addr' is `'192.168.0.1''" == output
    output = run_fdbcli_command('getknob', 'tracing_sample_rate')
    assert r"`tracing_sample_rate' is `0.300000'" == output
@enable_logging()
 def cache_range(logger):
    # this command is currently experimental
@ -983,6 +1034,7 @@ if __name__ == '__main__':
        versionepoch()
        integer_options()
        tls_address_suffix()
        knobmanagement()
    else:
        assert args.process_number > 1, "Process number should be positive"
        coordinators()
--- a/fdbclient/BlobCipher.cpp
+++ b/fdbclient/BlobCipher.cpp
@ -86,6 +86,26 @@ BlobCipherMetrics::BlobCipherMetrics()
 	traceFuture = traceCounters("BlobCipherMetrics", UID(), FLOW_KNOBS->ENCRYPT_KEY_CACHE_LOGGING_INTERVAL, &cc);
 }
 std::string toString(BlobCipherMetrics::UsageType type) {
 	switch (type) {
 	case BlobCipherMetrics::UsageType::TLOG:
 		return "TLog";
 	case BlobCipherMetrics::UsageType::KV_MEMORY:
 		return "KVMemory";
 	case BlobCipherMetrics::UsageType::KV_REDWOOD:
 		return "KVRedwood";
 	case BlobCipherMetrics::UsageType::BLOB_GRANULE:
 		return "BlobGranule";
 	case BlobCipherMetrics::UsageType::BACKUP:
 		return "Backup";
 	case BlobCipherMetrics::UsageType::TEST:
 		return "Test";
 	default:
 		ASSERT(false);
 		return "";
 	}
 }
 // BlobCipherKey class methods
 BlobCipherKey::BlobCipherKey(const EncryptCipherDomainId& domainId,
@ -636,34 +656,17 @@ Reference<EncryptBuf> EncryptBlobCipherAes265Ctr::encrypt(const uint8_t* plainte
 	} else {
 		// Populate header authToken details
-		if (header->flags.authTokenMode == EncryptAuthTokenMode::ENCRYPT_HEADER_AUTH_TOKEN_MODE_SINGLE) {
+		ASSERT_EQ(header->flags.authTokenMode, EncryptAuthTokenMode::ENCRYPT_HEADER_AUTH_TOKEN_MODE_SINGLE);
-			ASSERT_GE(allocSize, (bytes + finalBytes));
+		ASSERT_GE(allocSize, (bytes + finalBytes));
-			ASSERT_GE(encryptBuf->getLogicalSize(), (bytes + finalBytes));
+		ASSERT_GE(encryptBuf->getLogicalSize(), (bytes + finalBytes));
-			computeAuthToken({ { ciphertext, bytes + finalBytes },
+		computeAuthToken({ { ciphertext, bytes + finalBytes },
-			                   { reinterpret_cast<const uint8_t*>(header), sizeof(BlobCipherEncryptHeader) } },
+		                   { reinterpret_cast<const uint8_t*>(header), sizeof(BlobCipherEncryptHeader) } },
-			                 headerCipherKey->rawCipher(),
+		                 headerCipherKey->rawCipher(),
-			                 AES_256_KEY_LENGTH,
+		                 AES_256_KEY_LENGTH,
-			                 &header->singleAuthToken.authToken[0],
+		                 &header->singleAuthToken.authToken[0],
-			                 (EncryptAuthTokenAlgo)header->flags.authTokenAlgo,
+		                 (EncryptAuthTokenAlgo)header->flags.authTokenAlgo,
-			                 AUTH_TOKEN_MAX_SIZE);
+		                 AUTH_TOKEN_MAX_SIZE);
 		} else {
 			ASSERT_EQ(header->flags.authTokenMode, EncryptAuthTokenMode::ENCRYPT_HEADER_AUTH_TOKEN_MODE_MULTI);
 			// TOOD: Use HMAC_SHA encyrption authentication scheme as AES_CMAC needs minimum 16 bytes cipher key
 			computeAuthToken({ { ciphertext, bytes + finalBytes } },
 			                 reinterpret_cast<const uint8_t*>(&header->cipherTextDetails.salt),
 			                 sizeof(EncryptCipherRandomSalt),
 			                 &header->multiAuthTokens.cipherTextAuthToken[0],
 			                 EncryptAuthTokenAlgo::ENCRYPT_HEADER_AUTH_TOKEN_ALGO_HMAC_SHA,
 			                 AUTH_TOKEN_MAX_SIZE);
 			computeAuthToken({ { reinterpret_cast<const uint8_t*>(header), sizeof(BlobCipherEncryptHeader) } },
 			                 headerCipherKey->rawCipher(),
 			                 AES_256_KEY_LENGTH,
 			                 &header->multiAuthTokens.headerAuthToken[0],
 			                 (EncryptAuthTokenAlgo)header->flags.authTokenAlgo,
 			                 AUTH_TOKEN_MAX_SIZE);
 		}
 	}
 	encryptBuf->setLogicalSize(plaintextLen);
@ -708,44 +711,6 @@ DecryptBlobCipherAes256Ctr::DecryptBlobCipherAes256Ctr(Reference<BlobCipherKey>
 	}
 }
 void DecryptBlobCipherAes256Ctr::verifyHeaderAuthToken(const BlobCipherEncryptHeader& header, Arena& arena) {
 	if (header.flags.authTokenMode != ENCRYPT_HEADER_AUTH_TOKEN_MODE_MULTI) {
 		// NoneAuthToken mode; no authToken is generated; nothing to do
 		// SingleAuthToken mode; verification will happen as part of decryption.
 		return;
 	}
 	ASSERT_EQ(header.flags.authTokenMode, ENCRYPT_HEADER_AUTH_TOKEN_MODE_MULTI);
 	ASSERT(isEncryptHeaderAuthTokenAlgoValid((EncryptAuthTokenAlgo)header.flags.authTokenAlgo));
 	BlobCipherEncryptHeader headerCopy;
 	memcpy(reinterpret_cast<uint8_t*>(&headerCopy),
 	       reinterpret_cast<const uint8_t*>(&header),
 	       sizeof(BlobCipherEncryptHeader));
 	memset(reinterpret_cast<uint8_t*>(&headerCopy.multiAuthTokens.headerAuthToken), 0, AUTH_TOKEN_MAX_SIZE);
 	uint8_t computedHeaderAuthToken[AUTH_TOKEN_MAX_SIZE]{};
 	computeAuthToken({ { reinterpret_cast<const uint8_t*>(&headerCopy), sizeof(BlobCipherEncryptHeader) } },
 	                 headerCipherKey->rawCipher(),
 	                 AES_256_KEY_LENGTH,
 	                 &computedHeaderAuthToken[0],
 	                 (EncryptAuthTokenAlgo)header.flags.authTokenAlgo,
 	                 AUTH_TOKEN_MAX_SIZE);
 	int authTokenSize = getEncryptHeaderAuthTokenSize(header.flags.authTokenAlgo);
 	ASSERT_LE(authTokenSize, AUTH_TOKEN_MAX_SIZE);
 	if (memcmp(&header.multiAuthTokens.headerAuthToken[0], &computedHeaderAuthToken[0], authTokenSize) != 0) {
 		TraceEvent(SevWarn, "BlobCipherVerifyEncryptBlobHeaderAuthTokenMismatch")
 		    .detail("HeaderVersion", header.flags.headerVersion)
 		    .detail("HeaderMode", header.flags.encryptMode)
 		    .detail("MultiAuthHeaderAuthToken",
 		            StringRef(arena, &header.multiAuthTokens.headerAuthToken[0], AUTH_TOKEN_MAX_SIZE).toString())
 		    .detail("ComputedHeaderAuthToken", StringRef(computedHeaderAuthToken, AUTH_TOKEN_MAX_SIZE));
 		throw encrypt_header_authtoken_mismatch();
 	}
 	headerAuthTokenValidationDone = true;
 }
 void DecryptBlobCipherAes256Ctr::verifyHeaderSingleAuthToken(const uint8_t* ciphertext,
                                                             const int ciphertextLen,
                                                             const BlobCipherEncryptHeader& header,
@ -759,7 +724,7 @@ void DecryptBlobCipherAes256Ctr::verifyHeaderSingleAuthToken(const uint8_t* ciph
 	memcpy(reinterpret_cast<uint8_t*>(&headerCopy),
 	       reinterpret_cast<const uint8_t*>(&header),
 	       sizeof(BlobCipherEncryptHeader));
-	memset(reinterpret_cast<uint8_t*>(&headerCopy.singleAuthToken), 0, 2 * AUTH_TOKEN_MAX_SIZE);
+	memset(reinterpret_cast<uint8_t*>(&headerCopy.singleAuthToken), 0, AUTH_TOKEN_MAX_SIZE);
 	uint8_t computed[AUTH_TOKEN_MAX_SIZE];
 	computeAuthToken({ { ciphertext, ciphertextLen },
 	                   { reinterpret_cast<const uint8_t*>(&headerCopy), sizeof(BlobCipherEncryptHeader) } },
@ -782,43 +747,12 @@ void DecryptBlobCipherAes256Ctr::verifyHeaderSingleAuthToken(const uint8_t* ciph
 	}
 }
 void DecryptBlobCipherAes256Ctr::verifyHeaderMultiAuthToken(const uint8_t* ciphertext,
                                                            const int ciphertextLen,
                                                            const BlobCipherEncryptHeader& header,
                                                            Arena& arena) {
 	if (!headerAuthTokenValidationDone) {
 		verifyHeaderAuthToken(header, arena);
 	}
 	uint8_t computedCipherTextAuthToken[AUTH_TOKEN_MAX_SIZE];
 	// TOOD: Use HMAC_SHA encyrption authentication scheme as AES_CMAC needs minimum 16 bytes cipher key
 	computeAuthToken({ { ciphertext, ciphertextLen } },
 	                 reinterpret_cast<const uint8_t*>(&header.cipherTextDetails.salt),
 	                 sizeof(EncryptCipherRandomSalt),
 	                 &computedCipherTextAuthToken[0],
 	                 EncryptAuthTokenAlgo::ENCRYPT_HEADER_AUTH_TOKEN_ALGO_HMAC_SHA,
 	                 AUTH_TOKEN_MAX_SIZE);
 	if (memcmp(&header.multiAuthTokens.cipherTextAuthToken[0], &computedCipherTextAuthToken[0], AUTH_TOKEN_MAX_SIZE) !=
 	    0) {
 		TraceEvent(SevWarn, "BlobCipherVerifyEncryptBlobHeaderAuthTokenMismatch")
 		    .detail("HeaderVersion", header.flags.headerVersion)
 		    .detail("HeaderMode", header.flags.encryptMode)
 		    .detail("MultiAuthCipherTextAuthToken",
 		            StringRef(arena, &header.multiAuthTokens.cipherTextAuthToken[0], AUTH_TOKEN_MAX_SIZE).toString())
 		    .detail("ComputedCipherTextAuthToken", StringRef(computedCipherTextAuthToken, AUTH_TOKEN_MAX_SIZE));
 		throw encrypt_header_authtoken_mismatch();
 	}
 }
 void DecryptBlobCipherAes256Ctr::verifyAuthTokens(const uint8_t* ciphertext,
                                                  const int ciphertextLen,
                                                  const BlobCipherEncryptHeader& header,
                                                  Arena& arena) {
-	if (header.flags.authTokenMode == EncryptAuthTokenMode::ENCRYPT_HEADER_AUTH_TOKEN_MODE_SINGLE) {
+	ASSERT_EQ(header.flags.authTokenMode, EncryptAuthTokenMode::ENCRYPT_HEADER_AUTH_TOKEN_MODE_SINGLE);
-		verifyHeaderSingleAuthToken(ciphertext, ciphertextLen, header, arena);
+	verifyHeaderSingleAuthToken(ciphertext, ciphertextLen, header, arena);
 	} else {
 		ASSERT_EQ(header.flags.authTokenMode, ENCRYPT_HEADER_AUTH_TOKEN_MODE_MULTI);
 		verifyHeaderMultiAuthToken(ciphertext, ciphertextLen, header, arena);
 	}
 	authTokensValidationDone = true;
 }
@ -1504,266 +1438,6 @@ TEST_CASE("flow/BlobCipher") {
 		TraceEvent("SingleAuthModeAesCmacDone");
 	}
 	// validate basic encrypt followed by decrypt operation for AUTH_TOKEN_MODE_MULTI
 	// HMAC_SHA authToken algorithm
 	{
 		TraceEvent("MultiAuthModeHmacShaStart").log();
 		EncryptBlobCipherAes265Ctr encryptor(cipherKey,
 		                                     headerCipherKey,
 		                                     iv,
 		                                     AES_256_IV_LENGTH,
 		                                     EncryptAuthTokenMode::ENCRYPT_HEADER_AUTH_TOKEN_MODE_MULTI,
 		                                     EncryptAuthTokenAlgo::ENCRYPT_HEADER_AUTH_TOKEN_ALGO_HMAC_SHA,
 		                                     BlobCipherMetrics::TEST);
 		BlobCipherEncryptHeader header;
 		Reference<EncryptBuf> encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
 		ASSERT_EQ(encrypted->getLogicalSize(), bufLen);
 		ASSERT_NE(memcmp(&orgData[0], encrypted->begin(), bufLen), 0);
 		ASSERT_EQ(header.flags.headerVersion, EncryptBlobCipherAes265Ctr::ENCRYPT_HEADER_VERSION);
 		ASSERT_EQ(header.flags.encryptMode, ENCRYPT_CIPHER_MODE_AES_256_CTR);
 		ASSERT_EQ(header.flags.authTokenMode, ENCRYPT_HEADER_AUTH_TOKEN_MODE_MULTI);
 		ASSERT_EQ(header.flags.authTokenAlgo, EncryptAuthTokenAlgo::ENCRYPT_HEADER_AUTH_TOKEN_ALGO_HMAC_SHA);
 		TraceEvent("BlobCipherTestEncryptDone")
 		    .detail("HeaderVersion", header.flags.headerVersion)
 		    .detail("HeaderEncryptMode", header.flags.encryptMode)
 		    .detail("HeaderEncryptAuthTokenMode", header.flags.authTokenMode)
 		    .detail("HeaderEncryptAuthTokenAlgo", header.flags.authTokenAlgo)
 		    .detail("DomainId", header.cipherTextDetails.encryptDomainId)
 		    .detail("BaseCipherId", header.cipherTextDetails.baseCipherId)
 		    .detail("HeaderAuthToken",
 		            StringRef(arena, &header.singleAuthToken.authToken[0], AUTH_TOKEN_HMAC_SHA_SIZE).toString());
 		Reference<BlobCipherKey> tCipherKey = cipherKeyCache->getCipherKey(header.cipherTextDetails.encryptDomainId,
 		                                                                   header.cipherTextDetails.baseCipherId,
 		                                                                   header.cipherTextDetails.salt);
 		Reference<BlobCipherKey> hCipherKey = cipherKeyCache->getCipherKey(header.cipherHeaderDetails.encryptDomainId,
 		                                                                   header.cipherHeaderDetails.baseCipherId,
 		                                                                   header.cipherHeaderDetails.salt);
 		ASSERT(tCipherKey->isEqual(cipherKey));
 		DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv, BlobCipherMetrics::TEST);
 		Reference<EncryptBuf> decrypted = decryptor.decrypt(encrypted->begin(), bufLen, header, arena);
 		ASSERT_EQ(decrypted->getLogicalSize(), bufLen);
 		ASSERT_EQ(memcmp(decrypted->begin(), &orgData[0], bufLen), 0);
 		TraceEvent("BlobCipherTestDecryptDone").log();
 		// induce encryption header corruption - headerVersion corrupted
 		encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
 		memcpy(reinterpret_cast<uint8_t*>(&headerCopy),
 		       reinterpret_cast<const uint8_t*>(&header),
 		       sizeof(BlobCipherEncryptHeader));
 		headerCopy.flags.headerVersion += 1;
 		try {
 			DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv, BlobCipherMetrics::TEST);
 			decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena);
 			ASSERT(false); // error expected
 		} catch (Error& e) {
 			if (e.code() != error_code_encrypt_header_metadata_mismatch) {
 				throw;
 			}
 		}
 		// induce encryption header corruption - encryptionMode corrupted
 		encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
 		memcpy(reinterpret_cast<uint8_t*>(&headerCopy),
 		       reinterpret_cast<const uint8_t*>(&header),
 		       sizeof(BlobCipherEncryptHeader));
 		headerCopy.flags.encryptMode += 1;
 		try {
 			DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv, BlobCipherMetrics::TEST);
 			decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena);
 			ASSERT(false); // error expected
 		} catch (Error& e) {
 			if (e.code() != error_code_encrypt_header_metadata_mismatch) {
 				throw;
 			}
 		}
 		// induce encryption header corruption - cipherText authToken mismatch
 		encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
 		memcpy(reinterpret_cast<uint8_t*>(&headerCopy),
 		       reinterpret_cast<const uint8_t*>(&header),
 		       sizeof(BlobCipherEncryptHeader));
 		int hIdx = deterministicRandom()->randomInt(0, AUTH_TOKEN_HMAC_SHA_SIZE - 1);
 		headerCopy.multiAuthTokens.cipherTextAuthToken[hIdx] += 1;
 		try {
 			DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv, BlobCipherMetrics::TEST);
 			decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena);
 			ASSERT(false); // error expected
 		} catch (Error& e) {
 			if (e.code() != error_code_encrypt_header_authtoken_mismatch) {
 				throw;
 			}
 		}
 		// induce encryption header corruption - header authToken mismatch
 		encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
 		memcpy(reinterpret_cast<uint8_t*>(&headerCopy),
 		       reinterpret_cast<const uint8_t*>(&header),
 		       sizeof(BlobCipherEncryptHeader));
 		hIdx = deterministicRandom()->randomInt(0, AUTH_TOKEN_HMAC_SHA_SIZE - 1);
 		headerCopy.multiAuthTokens.headerAuthToken[hIdx] += 1;
 		try {
 			DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv, BlobCipherMetrics::TEST);
 			decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena);
 			ASSERT(false); // error expected
 		} catch (Error& e) {
 			if (e.code() != error_code_encrypt_header_authtoken_mismatch) {
 				throw;
 			}
 		}
 		try {
 			encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
 			uint8_t temp[bufLen];
 			memcpy(encrypted->begin(), &temp[0], bufLen);
 			int tIdx = deterministicRandom()->randomInt(0, bufLen - 1);
 			temp[tIdx] += 1;
 			DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv, BlobCipherMetrics::TEST);
 			decrypted = decryptor.decrypt(&temp[0], bufLen, header, arena);
 		} catch (Error& e) {
 			if (e.code() != error_code_encrypt_header_authtoken_mismatch) {
 				throw;
 			}
 		}
 		TraceEvent("MultiAuthModeHmacShaDone");
 	}
 	// AES_CMAC authToken algorithm
 	{
 		TraceEvent("MultiAuthModeAesCmacStart");
 		EncryptBlobCipherAes265Ctr encryptor(cipherKey,
 		                                     headerCipherKey,
 		                                     iv,
 		                                     AES_256_IV_LENGTH,
 		                                     EncryptAuthTokenMode::ENCRYPT_HEADER_AUTH_TOKEN_MODE_MULTI,
 		                                     EncryptAuthTokenAlgo::ENCRYPT_HEADER_AUTH_TOKEN_ALGO_AES_CMAC,
 		                                     BlobCipherMetrics::TEST);
 		BlobCipherEncryptHeader header;
 		Reference<EncryptBuf> encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
 		ASSERT_EQ(encrypted->getLogicalSize(), bufLen);
 		ASSERT_NE(memcmp(&orgData[0], encrypted->begin(), bufLen), 0);
 		ASSERT_EQ(header.flags.headerVersion, EncryptBlobCipherAes265Ctr::ENCRYPT_HEADER_VERSION);
 		ASSERT_EQ(header.flags.encryptMode, ENCRYPT_CIPHER_MODE_AES_256_CTR);
 		ASSERT_EQ(header.flags.authTokenMode, ENCRYPT_HEADER_AUTH_TOKEN_MODE_MULTI);
 		ASSERT_EQ(header.flags.authTokenAlgo, EncryptAuthTokenAlgo::ENCRYPT_HEADER_AUTH_TOKEN_ALGO_AES_CMAC);
 		TraceEvent("BlobCipherTestEncryptDone")
 		    .detail("HeaderVersion", header.flags.headerVersion)
 		    .detail("HeaderEncryptMode", header.flags.encryptMode)
 		    .detail("HeaderEncryptAuthTokenMode", header.flags.authTokenMode)
 		    .detail("HeaderEncryptAuthTokenAlgo", header.flags.authTokenAlgo)
 		    .detail("DomainId", header.cipherTextDetails.encryptDomainId)
 		    .detail("BaseCipherId", header.cipherTextDetails.baseCipherId)
 		    .detail("HeaderAuthToken",
 		            StringRef(arena, &header.singleAuthToken.authToken[0], AUTH_TOKEN_AES_CMAC_SIZE).toString());
 		Reference<BlobCipherKey> tCipherKey = cipherKeyCache->getCipherKey(header.cipherTextDetails.encryptDomainId,
 		                                                                   header.cipherTextDetails.baseCipherId,
 		                                                                   header.cipherTextDetails.salt);
 		Reference<BlobCipherKey> hCipherKey = cipherKeyCache->getCipherKey(header.cipherHeaderDetails.encryptDomainId,
 		                                                                   header.cipherHeaderDetails.baseCipherId,
 		                                                                   header.cipherHeaderDetails.salt);
 		ASSERT(tCipherKey->isEqual(cipherKey));
 		DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv, BlobCipherMetrics::TEST);
 		Reference<EncryptBuf> decrypted = decryptor.decrypt(encrypted->begin(), bufLen, header, arena);
 		ASSERT_EQ(decrypted->getLogicalSize(), bufLen);
 		ASSERT_EQ(memcmp(decrypted->begin(), &orgData[0], bufLen), 0);
 		TraceEvent("BlobCipherTestDecryptDone").log();
 		// induce encryption header corruption - headerVersion corrupted
 		encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
 		memcpy(reinterpret_cast<uint8_t*>(&headerCopy),
 		       reinterpret_cast<const uint8_t*>(&header),
 		       sizeof(BlobCipherEncryptHeader));
 		headerCopy.flags.headerVersion += 1;
 		try {
 			DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv, BlobCipherMetrics::TEST);
 			decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena);
 			ASSERT(false); // error expected
 		} catch (Error& e) {
 			if (e.code() != error_code_encrypt_header_metadata_mismatch) {
 				throw;
 			}
 		}
 		// induce encryption header corruption - encryptionMode corrupted
 		encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
 		memcpy(reinterpret_cast<uint8_t*>(&headerCopy),
 		       reinterpret_cast<const uint8_t*>(&header),
 		       sizeof(BlobCipherEncryptHeader));
 		headerCopy.flags.encryptMode += 1;
 		try {
 			DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv, BlobCipherMetrics::TEST);
 			decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena);
 			ASSERT(false); // error expected
 		} catch (Error& e) {
 			if (e.code() != error_code_encrypt_header_metadata_mismatch) {
 				throw;
 			}
 		}
 		// induce encryption header corruption - cipherText authToken mismatch
 		encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
 		memcpy(reinterpret_cast<uint8_t*>(&headerCopy),
 		       reinterpret_cast<const uint8_t*>(&header),
 		       sizeof(BlobCipherEncryptHeader));
 		int hIdx = deterministicRandom()->randomInt(0, AUTH_TOKEN_AES_CMAC_SIZE - 1);
 		headerCopy.multiAuthTokens.cipherTextAuthToken[hIdx] += 1;
 		try {
 			DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv, BlobCipherMetrics::TEST);
 			decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena);
 			ASSERT(false); // error expected
 		} catch (Error& e) {
 			if (e.code() != error_code_encrypt_header_authtoken_mismatch) {
 				throw;
 			}
 		}
 		// induce encryption header corruption - header authToken mismatch
 		encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
 		memcpy(reinterpret_cast<uint8_t*>(&headerCopy),
 		       reinterpret_cast<const uint8_t*>(&header),
 		       sizeof(BlobCipherEncryptHeader));
 		hIdx = deterministicRandom()->randomInt(0, AUTH_TOKEN_AES_CMAC_SIZE - 1);
 		headerCopy.multiAuthTokens.headerAuthToken[hIdx] += 1;
 		try {
 			DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv, BlobCipherMetrics::TEST);
 			decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena);
 			ASSERT(false); // error expected
 		} catch (Error& e) {
 			if (e.code() != error_code_encrypt_header_authtoken_mismatch) {
 				throw;
 			}
 		}
 		try {
 			encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
 			uint8_t temp[bufLen];
 			memcpy(encrypted->begin(), &temp[0], bufLen);
 			int tIdx = deterministicRandom()->randomInt(0, bufLen - 1);
 			temp[tIdx] += 1;
 			DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv, BlobCipherMetrics::TEST);
 			decrypted = decryptor.decrypt(&temp[0], bufLen, header, arena);
 		} catch (Error& e) {
 			if (e.code() != error_code_encrypt_header_authtoken_mismatch) {
 				throw;
 			}
 		}
 		TraceEvent("MultiAuthModeAesCmacDone");
 	}
 	// Validate dropping encryptDomainId cached keys
 	const EncryptCipherDomainId candidate = deterministicRandom()->randomInt(minDomainId, maxDomainId);
 	cipherKeyCache->resetEncryptDomainId(candidate);
--- a/fdbclient/BlobConnectionProvider.cpp
+++ b/fdbclient/BlobConnectionProvider.cpp
@ -33,6 +33,12 @@ public:
 	SingleBlobConnectionProvider(std::string url) { conn = BackupContainerFileSystem::openContainerFS(url, {}, {}); }
 	bool needsRefresh() const { return false; }
 	bool isExpired() const { return false; }
 	void update(Standalone<BlobMetadataDetailsRef> newBlobMetadata) { ASSERT(false); }
 private:
 	Reference<BackupContainerFileSystem> conn;
 };
@ -44,18 +50,42 @@ struct PartitionedBlobConnectionProvider : BlobConnectionProvider {
 		return std::pair(conn, metadata.partitions[writePartition].toString() + newFileName);
 	}
-	Reference<BackupContainerFileSystem> getForRead(std::string filePath) { return conn; }
+	Reference<BackupContainerFileSystem> getForRead(std::string filePath) {
 		CODE_PROBE(isExpired(), "partitioned blob connection using expired blob metadata for read!");
 		return conn;
 	}
-	PartitionedBlobConnectionProvider(const Standalone<BlobMetadataDetailsRef> metadata) : metadata(metadata) {
+	void updateMetadata(const Standalone<BlobMetadataDetailsRef>& newMetadata, bool checkPrevious) {
-		ASSERT(metadata.base.present());
+		ASSERT(newMetadata.base.present());
-		ASSERT(metadata.partitions.size() >= 2);
+		ASSERT(newMetadata.partitions.size() >= 2);
-		conn = BackupContainerFileSystem::openContainerFS(metadata.base.get().toString(), {}, {});
+		for (auto& it : newMetadata.partitions) {
 		for (auto& it : metadata.partitions) {
 			// these should be suffixes, not whole blob urls
 			ASSERT(it.toString().find("://") == std::string::npos);
 		}
 		if (checkPrevious) {
 			if (newMetadata.expireAt <= metadata.expireAt) {
 				return;
 			}
 			// FIXME: validate only the credentials changed and the location is the same
 			ASSERT(newMetadata.partitions.size() == metadata.partitions.size());
 			for (int i = 0; i < newMetadata.partitions.size(); i++) {
 				ASSERT(newMetadata.partitions[i] == metadata.partitions[i]);
 			}
 		}
 		metadata = newMetadata;
 		conn = BackupContainerFileSystem::openContainerFS(metadata.base.get().toString(), {}, {});
 	}
 	PartitionedBlobConnectionProvider(const Standalone<BlobMetadataDetailsRef> metadata) {
 		updateMetadata(metadata, false);
 	}
 	bool needsRefresh() const { return now() >= metadata.refreshAt; }
 	bool isExpired() const { return now() >= metadata.expireAt; }
 	void update(Standalone<BlobMetadataDetailsRef> newBlobMetadata) { updateMetadata(newBlobMetadata, true); }
 private:
 	Standalone<BlobMetadataDetailsRef> metadata;
 	Reference<BackupContainerFileSystem> conn;
@ -72,6 +102,7 @@ struct StorageLocationBlobConnectionProvider : BlobConnectionProvider {
 	}
 	Reference<BackupContainerFileSystem> getForRead(std::string filePath) {
 		CODE_PROBE(isExpired(), "storage location blob connection using expired blob metadata for read!");
 		size_t slash = filePath.find("/");
 		ASSERT(slash != std::string::npos);
 		int partition = stoi(filePath.substr(0, slash));
@ -80,9 +111,18 @@ struct StorageLocationBlobConnectionProvider : BlobConnectionProvider {
 		return partitions[partition];
 	}
-	StorageLocationBlobConnectionProvider(const Standalone<BlobMetadataDetailsRef> metadata) {
+	void updateMetadata(const Standalone<BlobMetadataDetailsRef>& newMetadata, bool checkPrevious) {
-		ASSERT(!metadata.base.present());
+		ASSERT(!newMetadata.base.present());
-		ASSERT(metadata.partitions.size() >= 2);
+		ASSERT(newMetadata.partitions.size() >= 2);
 		if (checkPrevious) {
 			// FIXME: validate only the credentials changed and the locations are the same
 			ASSERT(newMetadata.partitions.size() == partitions.size());
 			if (newMetadata.expireAt <= metadata.expireAt) {
 				return;
 			}
 		}
 		metadata = newMetadata;
 		partitions.clear();
 		for (auto& it : metadata.partitions) {
 			// these should be whole blob urls
 			ASSERT(it.toString().find("://") != std::string::npos);
@ -90,7 +130,18 @@ struct StorageLocationBlobConnectionProvider : BlobConnectionProvider {
 		}
 	}
 	StorageLocationBlobConnectionProvider(const Standalone<BlobMetadataDetailsRef> metadata) {
 		updateMetadata(metadata, false);
 	}
 	bool needsRefresh() const { return now() >= metadata.refreshAt; }
 	bool isExpired() const { return now() >= metadata.expireAt; }
 	void update(Standalone<BlobMetadataDetailsRef> newBlobMetadata) { updateMetadata(newBlobMetadata, true); }
 private:
 	Standalone<BlobMetadataDetailsRef> metadata;
 	std::vector<Reference<BackupContainerFileSystem>> partitions;
 };
--- a/fdbclient/ClientKnobs.cpp
+++ b/fdbclient/ClientKnobs.cpp
@ -198,6 +198,7 @@ void ClientKnobs::initialize(Randomize randomize) {
 	init( DEFAULT_AUTO_LOGS,                         3 );
 	init( DEFAULT_COMMIT_GRV_PROXIES_RATIO,          3 );
 	init( DEFAULT_MAX_GRV_PROXIES,                   4 );
 	init( DELETE_NATIVE_LIB_AFTER_LOADING,        true ); // if false, don't delete libfdb_c in tmp directory on client connect.
 	init( GLOBAL_CONFIG_REFRESH_BACKOFF,           0.5 );
 	init( GLOBAL_CONFIG_REFRESH_MAX_BACKOFF,      60.0 );
--- a/fdbclient/ConfigKnobs.cpp
+++ b/fdbclient/ConfigKnobs.cpp
@ -44,19 +44,20 @@ ConfigKey ConfigKeyRef::decodeKey(KeyRef const& key) {
 }
 Value KnobValueRef::ToValueFunc::operator()(int v) const {
-	return BinaryWriter::toValue(v, Unversioned());
+	// return BinaryWriter::toValue(v, Unversioned());
 	return Tuple::makeTuple(v).pack();
 }
 Value KnobValueRef::ToValueFunc::operator()(int64_t v) const {
-	return BinaryWriter::toValue(v, Unversioned());
+	return Tuple::makeTuple(v).pack();
 }
 Value KnobValueRef::ToValueFunc::operator()(bool v) const {
-	return BinaryWriter::toValue(v, Unversioned());
+	return Tuple::makeTuple(v).pack();
 }
 Value KnobValueRef::ToValueFunc::operator()(ValueRef v) const {
-	return v;
+	return Tuple::makeTuple(v).pack();
 }
 Value KnobValueRef::ToValueFunc::operator()(double v) const {
-	return BinaryWriter::toValue(v, Unversioned());
+	return Tuple::makeTuple(v).pack();
 }
 KnobValue KnobValueRef::CreatorFunc::operator()(NoKnobFound) const {
--- a/fdbclient/FileBackupAgent.actor.cpp
+++ b/fdbclient/FileBackupAgent.actor.cpp
@ -23,6 +23,7 @@
 #include "fdbclient/BackupContainer.h"
 #include "fdbclient/BlobCipher.h"
 #include "fdbclient/DatabaseContext.h"
 #include "fdbclient/FDBTypes.h"
 #include "fdbclient/GetEncryptCipherKeys.actor.h"
 #include "fdbclient/JsonBuilder.h"
 #include "fdbclient/KeyBackedTypes.h"
@ -649,10 +650,8 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
 		return Void();
 	}
-	ACTOR static Future<Void> updateEncryptionKeysCtx(EncryptedRangeFileWriter* self,
+	ACTOR static Future<Void> updateEncryptionKeysCtx(EncryptedRangeFileWriter* self, KeyRef key) {
-	                                                  KeyRef key,
+		state std::pair<int64_t, TenantName> curTenantInfo = wait(getEncryptionDomainDetails(key, self));
 	                                                  Reference<TenantEntryCache<Void>> cache) {
 		state std::pair<int64_t, TenantName> curTenantInfo = wait(getEncryptionDomainDetails(key, cache));
 		state Reference<AsyncVar<ClientDBInfo> const> dbInfo = self->cx->clientInfo;
 		// Get text and header cipher key
@ -694,13 +693,12 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
 	static bool isSystemKey(KeyRef key) { return key.size() && key[0] == systemKeys.begin[0]; }
-	ACTOR static Future<std::pair<int64_t, TenantName>> getEncryptionDomainDetailsImpl(
+	ACTOR static Future<std::pair<int64_t, TenantName>>
-	    KeyRef key,
+	getEncryptionDomainDetailsImpl(KeyRef key, Reference<TenantEntryCache<Void>> tenantCache, bool useTenantCache) {
 	    Reference<TenantEntryCache<Void>> tenantCache) {
 		if (isSystemKey(key)) {
 			return std::make_pair(SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID, FDB_SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_NAME);
 		}
-		if (key.size() < TENANT_PREFIX_SIZE) {
+		if (key.size() < TENANT_PREFIX_SIZE || !useTenantCache) {
 			return std::make_pair(FDB_DEFAULT_ENCRYPT_DOMAIN_ID, FDB_DEFAULT_ENCRYPT_DOMAIN_NAME);
 		}
 		KeyRef tenantPrefix = KeyRef(key.begin(), TENANT_PREFIX_SIZE);
@ -712,10 +710,21 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
 		return std::make_pair(FDB_DEFAULT_ENCRYPT_DOMAIN_ID, FDB_DEFAULT_ENCRYPT_DOMAIN_NAME);
 	}
-	static Future<std::pair<int64_t, TenantName>> getEncryptionDomainDetails(
+	static Future<std::pair<int64_t, TenantName>> getEncryptionDomainDetails(KeyRef key,
-	    KeyRef key,
+	                                                                         EncryptedRangeFileWriter* self) {
-	    Reference<TenantEntryCache<Void>> tenantCache) {
+		// If tenants are disabled on a cluster then don't use the TenantEntryCache as it will result in alot of
-		return getEncryptionDomainDetailsImpl(key, tenantCache);
+		// unnecessary cache misses. For a cluster configured in TenantMode::Optional, the backup performance may
 		// degrade if most of the mutations belong to an invalid tenant
 		TenantMode mode = self->cx->clientInfo->get().tenantMode;
 		bool useTenantCache = mode != TenantMode::DISABLED;
 		if (g_network->isSimulated() && mode == TenantMode::OPTIONAL_TENANT) {
 			// TODO: Currently simulation tests run with optional tenant mode but most data does not belong to any
 			// tenant. This results in many timeouts so disable using the tenant cache until optional tenant mode
 			// support with backups is more performant
 			useTenantCache = false;
 		}
 		CODE_PROBE(useTenantCache, "using tenant cache");
 		return getEncryptionDomainDetailsImpl(key, self->tenantCache, useTenantCache);
 	}
 	// Handles the first block and internal blocks.  Ends current block if needed.
@ -813,7 +822,7 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
 		appendStringRefWithLenToBuffer(self, &endKey);
 		appendStringRefWithLenToBuffer(self, &newValue);
 		wait(newBlock(self, 0, endKey, writeValue));
-		wait(updateEncryptionKeysCtx(self, self->lastKey, self->tenantCache));
+		wait(updateEncryptionKeysCtx(self, self->lastKey));
 		return Void();
 	}
@ -825,9 +834,8 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
 		if (self->lastKey.size() == 0 || k.size() == 0) {
 			return false;
 		}
-		state std::pair<int64_t, TenantName> curKeyTenantInfo = wait(getEncryptionDomainDetails(k, self->tenantCache));
+		state std::pair<int64_t, TenantName> curKeyTenantInfo = wait(getEncryptionDomainDetails(k, self));
-		state std::pair<int64_t, TenantName> prevKeyTenantInfo =
+		state std::pair<int64_t, TenantName> prevKeyTenantInfo = wait(getEncryptionDomainDetails(self->lastKey, self));
 		    wait(getEncryptionDomainDetails(self->lastKey, self->tenantCache));
 		// crossing tenant boundaries so finish the current block using only the tenant prefix of the new key
 		if (curKeyTenantInfo.first != prevKeyTenantInfo.first) {
 			CODE_PROBE(true, "crossed tenant boundaries");
@ -840,7 +848,7 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
 	// Start a new block if needed, then write the key and value
 	ACTOR static Future<Void> writeKV_impl(EncryptedRangeFileWriter* self, Key k, Value v) {
 		if (!self->cipherKeys.headerCipherKey.isValid() || !self->cipherKeys.textCipherKey.isValid()) {
-			wait(updateEncryptionKeysCtx(self, k, self->tenantCache));
+			wait(updateEncryptionKeysCtx(self, k));
 		}
 		state int toWrite = sizeof(int32_t) + k.size() + sizeof(int32_t) + v.size();
 		wait(newBlockIfNeeded(self, toWrite));
@ -862,7 +870,7 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
 		// TODO (Nim): Is it possible to write empty begin and end keys?
 		if (k.size() > 0 &&
 		    (!self->cipherKeys.headerCipherKey.isValid() || !self->cipherKeys.textCipherKey.isValid())) {
-			wait(updateEncryptionKeysCtx(self, k, self->tenantCache));
+			wait(updateEncryptionKeysCtx(self, k));
 		}
 		// Need to account for extra "empty" value being written in the case of crossing tenant boundaries
@ -1035,8 +1043,7 @@ private:
 ACTOR static Future<Void> decodeKVPairs(StringRefReader* reader,
                                        Standalone<VectorRef<KeyValueRef>>* results,
                                        bool encryptedBlock,
-                                        Optional<Database> cx,
+                                        Optional<Database> cx) {
                                        Reference<TenantEntryCache<Void>> tenantCache) {
 	// Read begin key, if this fails then block was invalid.
 	state uint32_t kLen = reader->consumeNetworkUInt32();
 	state const uint8_t* k = reader->consume(kLen);
@ -1091,7 +1098,7 @@ ACTOR Future<Standalone<VectorRef<KeyValueRef>>> decodeRangeFileBlock(Reference<
 		// BACKUP_AGENT_ENCRYPTED_SNAPSHOT_FILE_VERSION
 		int32_t file_version = reader.consume<int32_t>();
 		if (file_version == BACKUP_AGENT_SNAPSHOT_FILE_VERSION) {
-			wait(decodeKVPairs(&reader, &results, false, cx, Reference<TenantEntryCache<Void>>()));
+			wait(decodeKVPairs(&reader, &results, false, cx));
 		} else if (file_version == BACKUP_AGENT_ENCRYPTED_SNAPSHOT_FILE_VERSION) {
 			CODE_PROBE(true, "decoding encrypted block");
 			ASSERT(cx.present());
@ -1114,8 +1121,7 @@ ACTOR Future<Standalone<VectorRef<KeyValueRef>>> decodeRangeFileBlock(Reference<
 			StringRef decryptedData =
 			    wait(EncryptedRangeFileWriter::decrypt(cx.get(), header, dataPayloadStart, dataLen, &results.arena()));
 			reader = StringRefReader(decryptedData, restore_corrupted_data());
-			Reference<TenantEntryCache<Void>> tenantCache = makeReference<TenantEntryCache<Void>>(cx.get());
+			wait(decodeKVPairs(&reader, &results, true, cx));
 			wait(decodeKVPairs(&reader, &results, true, cx, tenantCache));
 		} else {
 			throw restore_unsupported_file_version();
 		}
--- a/fdbclient/IdempotencyId.cpp
+++ b/fdbclient/IdempotencyId.cpp
@ -0,0 +1,174 @@
 /*
 * IdempotencyId.cpp
 *
 * This source file is part of the FoundationDB open source project
 *
 * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "fdbclient/IdempotencyId.h"
 #include "fdbclient/SystemData.h"
 #include "flow/UnitTest.h"
 struct IdempotencyIdKVBuilderImpl {
 	Optional<Version> commitVersion;
 	Optional<uint8_t> batchIndexHighOrderByte;
 	BinaryWriter value{ IncludeVersion() };
 };
 IdempotencyIdKVBuilder::IdempotencyIdKVBuilder() : impl(PImpl<IdempotencyIdKVBuilderImpl>::create()) {}
 void IdempotencyIdKVBuilder::setCommitVersion(Version commitVersion) {
 	impl->commitVersion = commitVersion;
 }
 void IdempotencyIdKVBuilder::add(const IdempotencyIdRef& id, uint16_t batchIndex) {
 	ASSERT(id.valid());
 	if (impl->batchIndexHighOrderByte.present()) {
 		ASSERT((batchIndex >> 8) == impl->batchIndexHighOrderByte.get());
 	} else {
 		impl->batchIndexHighOrderByte = batchIndex >> 8;
 	}
 	StringRef s = id.asStringRefUnsafe();
 	impl->value << uint8_t(s.size());
 	impl->value.serializeBytes(s);
 	impl->value << uint8_t(batchIndex); // Low order byte of batchIndex
 }
 Optional<KeyValue> IdempotencyIdKVBuilder::buildAndClear() {
 	ASSERT(impl->commitVersion.present());
 	if (!impl->batchIndexHighOrderByte.present()) {
 		return {};
 	}
 	BinaryWriter key{ Unversioned() };
 	key.serializeBytes(idempotencyIdKeys.begin);
 	key << bigEndian64(impl->commitVersion.get());
 	key << impl->batchIndexHighOrderByte.get();
 	Value v = impl->value.toValue();
 	impl->value = BinaryWriter(IncludeVersion());
 	impl->batchIndexHighOrderByte = Optional<uint8_t>();
 	Optional<KeyValue> result = KeyValue();
 	result.get().arena() = v.arena();
 	result.get().key = key.toValue(result.get().arena());
 	result.get().value = v;
 	return result;
 }
 IdempotencyIdKVBuilder::~IdempotencyIdKVBuilder() = default;
 Optional<CommitResult> kvContainsIdempotencyId(const KeyValueRef& kv, const IdempotencyIdRef& id) {
 	ASSERT(id.valid());
 	StringRef needle = id.asStringRefUnsafe();
 	StringRef haystack = kv.value;
 #ifndef _WIN32
 	// The common case is that the kv does not contain the idempotency id, so early return if memmem is available
 	if (memmem(haystack.begin(), haystack.size(), needle.begin(), needle.size()) == nullptr) {
 		return {};
 	}
 #endif
 	// Even if id is a substring of value, it may still not actually contain it.
 	BinaryReader reader(kv.value.begin(), kv.value.size(), IncludeVersion());
 	while (!reader.empty()) {
 		uint8_t length;
 		reader >> length;
 		StringRef candidate{ reinterpret_cast<const uint8_t*>(reader.readBytes(length)), length };
 		uint8_t lowOrderBatchIndex;
 		reader >> lowOrderBatchIndex;
 		if (candidate == needle) {
 			BinaryReader reader(kv.key.begin(), kv.key.size(), Unversioned());
 			reader.readBytes(idempotencyIdKeys.begin.size());
 			Version commitVersion;
 			reader >> commitVersion;
 			commitVersion = bigEndian64(commitVersion);
 			uint8_t highOrderBatchIndex;
 			reader >> highOrderBatchIndex;
 			return CommitResult{ commitVersion,
 				                 static_cast<uint16_t>((uint16_t(highOrderBatchIndex) << 8) |
 				                                       uint16_t(lowOrderBatchIndex)) };
 		}
 	}
 	return {};
 }
 void forceLinkIdempotencyIdTests() {}
 namespace {
 IdempotencyIdRef generate(Arena& arena) {
 	int length = deterministicRandom()->coinflip() ? deterministicRandom()->randomInt(16, 256) : 16;
 	StringRef id = makeString(length, arena);
 	deterministicRandom()->randomBytes(mutateString(id), length);
 	return IdempotencyIdRef(id);
 }
 } // namespace
 TEST_CASE("/fdbclient/IdempotencyId/basic") {
 	Arena arena;
 	uint16_t firstBatchIndex = deterministicRandom()->randomUInt32();
 	uint16_t batchIndex = firstBatchIndex;
 	Version commitVersion = deterministicRandom()->randomInt64(0, std::numeric_limits<Version>::max());
 	std::vector<IdempotencyIdRef> idVector; // Reference
 	std::unordered_set<IdempotencyIdRef> idSet; // Make sure hash+equals works
 	IdempotencyIdKVBuilder builder; // Check kv data format
 	builder.setCommitVersion(commitVersion);
 	for (int i = 0; i < 5; ++i) {
 		auto id = generate(arena);
 		idVector.emplace_back(id);
 		idSet.emplace(id);
 		builder.add(id, batchIndex++);
 	}
 	batchIndex = firstBatchIndex;
 	Optional<KeyValue> kvOpt = builder.buildAndClear();
 	ASSERT(kvOpt.present());
 	const auto& kv = kvOpt.get();
 	ASSERT(idSet.size() == idVector.size());
 	for (const auto& id : idVector) {
 		auto commitResult = kvContainsIdempotencyId(kv, id);
 		ASSERT(commitResult.present());
 		ASSERT(commitResult.get().commitVersion == commitVersion);
 		ASSERT(commitResult.get().batchIndex == batchIndex++);
 		ASSERT(idSet.find(id) != idSet.end());
 		idSet.erase(id);
 		ASSERT(idSet.find(id) == idSet.end());
 	}
 	ASSERT(idSet.size() == 0);
 	ASSERT(!kvContainsIdempotencyId(kv, generate(arena)).present());
 	return Void();
 }
 TEST_CASE("/fdbclient/IdempotencyId/serialization") {
 	ASSERT(ObjectReader::fromStringRef<IdempotencyIdRef>(ObjectWriter::toValue(IdempotencyIdRef(), Unversioned()),
 	                                                     Unversioned()) == IdempotencyIdRef());
 	for (int i = 0; i < 1000; ++i) {
 		Arena arena;
 		auto id = generate(arena);
 		auto serialized = ObjectWriter::toValue(id, Unversioned());
 		IdempotencyIdRef t;
 		ObjectReader reader(serialized.begin(), Unversioned());
 		reader.deserialize(t);
 		ASSERT(t == id);
 	}
 	return Void();
 }
--- a/fdbclient/ManagementAPI.actor.cpp
+++ b/fdbclient/ManagementAPI.actor.cpp
@ -2356,6 +2356,21 @@ ACTOR Future<Void> forceRecovery(Reference<IClusterConnectionRecord> clusterFile
 	}
 }
 ACTOR Future<UID> auditStorage(Reference<IClusterConnectionRecord> clusterFile, KeyRange range, AuditType type) {
 	state Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface(new AsyncVar<Optional<ClusterInterface>>);
 	state Future<Void> leaderMon = monitorLeader<ClusterInterface>(clusterFile, clusterInterface);
 	loop {
 		while (!clusterInterface->get().present()) {
 			wait(clusterInterface->onChange());
 		}
 		UID auditId = wait(clusterInterface->get().get().triggerAudit.getReply(TriggerAuditRequest(type, range)));
 		TraceEvent(SevDebug, "ManagementAPIAuditStorageEnd").detail("AuditID", auditId);
 		return auditId;
 	}
 }
 ACTOR Future<Void> waitForPrimaryDC(Database cx, StringRef dcId) {
 	state ReadYourWritesTransaction tr(cx);
--- a/fdbclient/MultiVersionTransaction.actor.cpp
+++ b/fdbclient/MultiVersionTransaction.actor.cpp
@ -2546,8 +2546,9 @@ void MultiVersionApi::setupNetwork() {
 				externalClients[filename] = {};
 				auto libCopies = copyExternalLibraryPerThread(path);
 				for (int idx = 0; idx < libCopies.size(); ++idx) {
 					bool unlinkOnLoad = libCopies[idx].second && CLIENT_KNOBS->DELETE_NATIVE_LIB_AFTER_LOADING;
 					externalClients[filename].push_back(Reference<ClientInfo>(
-					    new ClientInfo(new DLApi(libCopies[idx].first, libCopies[idx].second /*unlink on load*/),
+					    new ClientInfo(new DLApi(libCopies[idx].first, unlinkOnLoad /*unlink on load*/),
 					                   path,
 					                   useFutureVersion,
 					                   idx)));
--- a/fdbclient/NativeAPI.actor.cpp
+++ b/fdbclient/NativeAPI.actor.cpp
@ -32,6 +32,7 @@
 #include <vector>
 #include "boost/algorithm/string.hpp"
 #include "flow/CodeProbe.h"
 #include "fmt/format.h"
@ -49,6 +50,7 @@
 #include "fdbclient/ClusterConnectionFile.h"
 #include "fdbclient/ClusterConnectionMemoryRecord.h"
 #include "fdbclient/CoordinationInterface.h"
 #include "fdbclient/CommitTransaction.h"
 #include "fdbclient/DatabaseContext.h"
 #include "fdbclient/GlobalConfig.actor.h"
 #include "fdbclient/IKnobCollection.h"
@ -190,6 +192,8 @@ void DatabaseContext::addTssMapping(StorageServerInterface const& ssi, StorageSe
 		                             TSSEndpointData(tssi.id(), tssi.getMappedKeyValues.getEndpoint(), metrics));
 		queueModel.updateTssEndpoint(ssi.getKeyValuesStream.getEndpoint().token.first(),
 		                             TSSEndpointData(tssi.id(), tssi.getKeyValuesStream.getEndpoint(), metrics));
 		queueModel.updateTssEndpoint(ssi.changeFeedStream.getEndpoint().token.first(),
 		                             TSSEndpointData(tssi.id(), tssi.changeFeedStream.getEndpoint(), metrics));
 		// non-data requests duplicated for load
 		queueModel.updateTssEndpoint(ssi.watchValue.getEndpoint().token.first(),
@ -200,6 +204,12 @@ void DatabaseContext::addTssMapping(StorageServerInterface const& ssi, StorageSe
 		                             TSSEndpointData(tssi.id(), tssi.getReadHotRanges.getEndpoint(), metrics));
 		queueModel.updateTssEndpoint(ssi.getRangeSplitPoints.getEndpoint().token.first(),
 		                             TSSEndpointData(tssi.id(), tssi.getRangeSplitPoints.getEndpoint(), metrics));
 		queueModel.updateTssEndpoint(ssi.overlappingChangeFeeds.getEndpoint().token.first(),
 		                             TSSEndpointData(tssi.id(), tssi.overlappingChangeFeeds.getEndpoint(), metrics));
 		// duplicated to ensure feed data cleanup
 		queueModel.updateTssEndpoint(ssi.changeFeedPop.getEndpoint().token.first(),
 		                             TSSEndpointData(tssi.id(), tssi.changeFeedPop.getEndpoint(), metrics));
 	}
 }
@ -6113,6 +6123,61 @@ ACTOR static Future<Void> commitDummyTransaction(Reference<TransactionState> trS
 	}
 }
 ACTOR static Future<Optional<CommitResult>> determineCommitStatus(Reference<TransactionState> trState,
                                                                  Version minPossibleCommitVersion,
                                                                  Version maxPossibleCommitVersion,
                                                                  IdempotencyIdRef idempotencyId) {
 	state Transaction tr(trState->cx);
 	state int retries = 0;
 	state Span span("NAPI:determineCommitStatus"_loc, trState->spanContext);
 	tr.span.setParent(span.context);
 	loop {
 		try {
 			tr.trState->options = trState->options;
 			tr.trState->taskID = trState->taskID;
 			tr.trState->authToken = trState->authToken;
 			tr.setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
 			tr.setOption(FDBTransactionOptions::READ_LOCK_AWARE);
 			Version rv = wait(tr.getReadVersion());
 			TraceEvent("DetermineCommitStatusAttempt")
 			    .detail("IdempotencyId", idempotencyId.asStringRefUnsafe())
 			    .detail("Retries", retries)
 			    .detail("ReadVersion", rv)
 			    .detail("MinPossibleCommitVersion", minPossibleCommitVersion)
 			    .detail("MaxPossibleCommitVersion", maxPossibleCommitVersion);
 			KeyRange possibleRange =
 			    KeyRangeRef(BinaryWriter::toValue(bigEndian64(minPossibleCommitVersion), Unversioned())
 			                    .withPrefix(idempotencyIdKeys.begin),
 			                BinaryWriter::toValue(bigEndian64(maxPossibleCommitVersion + 1), Unversioned())
 			                    .withPrefix(idempotencyIdKeys.begin));
 			RangeResult range = wait(tr.getRange(possibleRange, CLIENT_KNOBS->TOO_MANY));
 			ASSERT(!range.more);
 			for (const auto& kv : range) {
 				auto commitResult = kvContainsIdempotencyId(kv, idempotencyId);
 				if (commitResult.present()) {
 					TraceEvent("DetermineCommitStatus")
 					    .detail("Committed", 1)
 					    .detail("IdempotencyId", idempotencyId.asStringRefUnsafe())
 					    .detail("Retries", retries);
 					return commitResult;
 				}
 			}
 			TraceEvent("DetermineCommitStatus")
 			    .detail("Committed", 0)
 			    .detail("IdempotencyId", idempotencyId.asStringRefUnsafe())
 			    .detail("Retries", retries);
 			return Optional<CommitResult>();
 		} catch (Error& e) {
 			TraceEvent("DetermineCommitStatusError")
 			    .errorUnsuppressed(e)
 			    .detail("IdempotencyId", idempotencyId.asStringRefUnsafe())
 			    .detail("Retries", retries);
 			wait(tr.onError(e));
 		}
 		++retries;
 	}
 }
 void Transaction::cancelWatches(Error const& e) {
 	for (int i = 0; i < watches.size(); ++i)
 		if (!watches[i]->onChangeTrigger.isSet())
@ -6420,7 +6485,7 @@ ACTOR static Future<Void> tryCommit(Reference<TransactionState> trState,
 		if (e.code() == error_code_request_maybe_delivered || e.code() == error_code_commit_unknown_result) {
 			// We don't know if the commit happened, and it might even still be in flight.
-			if (!trState->options.causalWriteRisky) {
+			if (!trState->options.causalWriteRisky || req.idempotencyId.valid()) {
 				// Make sure it's not still in flight, either by ensuring the master we submitted to is dead, or the
 				// version we submitted with is dead, or by committing a conflicting transaction successfully
 				// if ( cx->getCommitProxies()->masterGeneration <= originalMasterGeneration )
@ -6437,6 +6502,24 @@ ACTOR static Future<Void> tryCommit(Reference<TransactionState> trState,
 				wait(
 				    commitDummyTransaction(trState, singleKeyRange(selfConflictingRange.begin), tenantPrefixPrepended));
 				if (req.idempotencyId.valid()) {
 					Optional<CommitResult> commitResult = wait(determineCommitStatus(
 					    trState,
 					    req.transaction.read_snapshot,
 					    req.transaction.read_snapshot + 5e6 /* Based on MAX_WRITE_TRANSACTION_LIFE_VERSIONS */,
 					    req.idempotencyId));
 					if (commitResult.present()) {
 						Standalone<StringRef> ret = makeString(10);
 						placeVersionstamp(
 						    mutateString(ret), commitResult.get().commitVersion, commitResult.get().batchIndex);
 						trState->versionstampPromise.send(ret);
 						CODE_PROBE(true, "AutomaticIdempotencyCommitted");
 						return Void();
 					} else {
 						CODE_PROBE(true, "AutomaticIdempotencyNotCommitted");
 						throw transaction_too_old();
 					}
 				}
 			}
 			// The user needs to be informed that we aren't sure whether the commit happened.  Standard retry loops
@ -6519,6 +6602,18 @@ Future<Void> Transaction::commitMutations() {
 				tr.transaction.read_conflict_ranges.emplace_back(
 				    tr.arena, extraConflictRanges[i].get().first, extraConflictRanges[i].get().second);
 		if (tr.idempotencyId.valid()) {
 			// We need to be able confirm that this transaction is no longer in
 			// flight, and if the idempotency id is in the read and write
 			// conflict range we can use that.
 			BinaryWriter wr(Unversioned());
 			wr.serializeBytes("\xFF/SC/"_sr);
 			wr.serializeBytes(tr.idempotencyId.asStringRefUnsafe());
 			auto r = singleKeyRange(wr.toValue(), tr.arena);
 			tr.transaction.read_conflict_ranges.push_back(tr.arena, r);
 			tr.transaction.write_conflict_ranges.push_back(tr.arena, r);
 		}
 		if (!trState->options.causalWriteRisky &&
 		    !intersects(tr.transaction.write_conflict_ranges, tr.transaction.read_conflict_ranges).present())
 			makeSelfConflicting();
@ -6829,6 +6924,23 @@ void Transaction::setOption(FDBTransactionOptions::Option option, Optional<Strin
 		else
 			trState->authToken.reset();
 		break;
 	case FDBTransactionOptions::IDEMPOTENCY_ID:
 		validateOptionValuePresent(value);
 		if (!(value.get().size() >= 16 && value.get().size() < 256)) {
 			Error e = invalid_option();
 			TraceEvent(SevWarn, "IdempotencyIdInvalidSize")
 			    .error(e)
 			    .detail("IdempotencyId", value.get().printable())
 			    .detail("Recommendation", "Use an idempotency id that's at least 16 bytes and less than 256 bytes");
 			throw e;
 		}
 		tr.idempotencyId = IdempotencyIdRef(tr.arena, IdempotencyIdRef(value.get()));
 		break;
 	case FDBTransactionOptions::AUTOMATIC_IDEMPOTENCY:
 		validateOptionValueNotPresent(value);
 		tr.idempotencyId = IdempotencyIdRef(
 		    tr.arena, IdempotencyIdRef(BinaryWriter::toValue(deterministicRandom()->randomUniqueID(), Unversioned())));
 		break;
 	default:
 		break;
@ -9039,6 +9151,273 @@ void DatabaseContext::setDesiredChangeFeedVersion(Version v) {
 	}
 }
 // Because two storage servers, depending on the shard map, can have different representations of a clear at the same
 // version depending on their shard maps at the time of the mutation, it is non-trivial to directly compare change feed
 // streams. Instead we compare the presence of data at each version. This both saves on cpu cost of validation, and
 // because historically most change feed corruption bugs are the absence of entire versions, not a subset of mutations
 // within a version.
 struct ChangeFeedTSSValidationData {
 	PromiseStream<Version> ssStreamSummary;
 	ReplyPromiseStream<ChangeFeedStreamReply> tssStream;
 	Future<Void> validatorFuture;
 	std::deque<std::pair<Version, Version>> rollbacks;
 	Version popVersion = invalidVersion;
 	bool done = false;
 	ChangeFeedTSSValidationData() {}
 	ChangeFeedTSSValidationData(ReplyPromiseStream<ChangeFeedStreamReply> tssStream) : tssStream(tssStream) {}
 	void updatePopped(Version newPopVersion) { popVersion = std::max(popVersion, newPopVersion); }
 	bool checkRollback(const MutationsAndVersionRef& m) {
 		if (m.mutations.size() == 1 && m.mutations.back().param1 == lastEpochEndPrivateKey) {
 			if (rollbacks.empty() || rollbacks.back().second < m.version) {
 				Version rollbackVersion;
 				BinaryReader br(m.mutations.back().param2, Unversioned());
 				br >> rollbackVersion;
 				if (!rollbacks.empty()) {
 					ASSERT(rollbacks.back().second <= rollbackVersion);
 				}
 				rollbacks.push_back({ rollbackVersion, m.version });
 			}
 			return true;
 		} else {
 			return false;
 		}
 	}
 	bool shouldAddMutation(const MutationsAndVersionRef& m) {
 		return !done && !m.mutations.empty() && !checkRollback(m);
 	}
 	bool isRolledBack(Version v) {
 		return !rollbacks.empty() && rollbacks.front().first < v && rollbacks.front().second > v;
 	}
 	void send(const ChangeFeedStreamReply& ssReply) {
 		if (done) {
 			return;
 		}
 		updatePopped(ssReply.popVersion);
 		for (auto& it : ssReply.mutations) {
 			if (shouldAddMutation(it)) {
 				ssStreamSummary.send(it.version);
 			}
 		}
 	}
 	void complete() {
 		done = true;
 		// destroy TSS stream to stop server actor
 		tssStream.reset();
 	}
 };
 void handleTSSChangeFeedMismatch(const ChangeFeedStreamRequest& request,
                                 const TSSEndpointData& tssData,
                                 int64_t matchesFound,
                                 Version lastMatchingVersion,
                                 Version ssVersion,
                                 Version tssVersion,
                                 Version popVersion) {
 	if (request.canReadPopped) {
 		// There is a known issue where this can return different data between an SS and TSS when a feed was popped but
 		// the SS restarted before the pop could be persisted, for reads that can read popped data. As such, only count
 		// this as a mismatch when !req.canReadPopped
 		return;
 	}
 	CODE_PROBE(true, "TSS mismatch in stream comparison");
 	if (tssData.metrics->shouldRecordDetailedMismatch()) {
 		TraceEvent mismatchEvent(
 		    (g_network->isSimulated() && g_simulator->tssMode == ISimulator::TSSMode::EnabledDropMutations)
 		        ? SevWarnAlways
 		        : SevError,
 		    "TSSMismatchChangeFeedStream");
 		mismatchEvent.setMaxEventLength(FLOW_KNOBS->TSS_LARGE_TRACE_SIZE);
 		// request info
 		mismatchEvent.detail("TSSID", tssData.tssId);
 		mismatchEvent.detail("FeedID", request.rangeID);
 		mismatchEvent.detail("BeginVersion", request.begin);
 		mismatchEvent.detail("EndVersion", request.end);
 		mismatchEvent.detail("StartKey", request.range.begin);
 		mismatchEvent.detail("EndKey", request.range.end);
 		mismatchEvent.detail("CanReadPopped", request.canReadPopped);
 		mismatchEvent.detail("PopVersion", popVersion);
 		mismatchEvent.detail("DebugUID", request.debugUID);
 		// mismatch info
 		mismatchEvent.detail("MatchesFound", matchesFound);
 		mismatchEvent.detail("LastMatchingVersion", lastMatchingVersion);
 		mismatchEvent.detail("SSVersion", ssVersion);
 		mismatchEvent.detail("TSSVersion", tssVersion);
 		CODE_PROBE(FLOW_KNOBS->LOAD_BALANCE_TSS_MISMATCH_TRACE_FULL,
 		           "Tracing Full TSS Feed Mismatch in stream comparison");
 		CODE_PROBE(!FLOW_KNOBS->LOAD_BALANCE_TSS_MISMATCH_TRACE_FULL,
 		           "Tracing Partial TSS Feed Mismatch in stream comparison and storing the rest in FDB");
 		if (!FLOW_KNOBS->LOAD_BALANCE_TSS_MISMATCH_TRACE_FULL) {
 			mismatchEvent.disable();
 			UID mismatchUID = deterministicRandom()->randomUniqueID();
 			tssData.metrics->recordDetailedMismatchData(mismatchUID, mismatchEvent.getFields().toString());
 			// record a summarized trace event instead
 			TraceEvent summaryEvent(
 			    (g_network->isSimulated() && g_simulator->tssMode == ISimulator::TSSMode::EnabledDropMutations)
 			        ? SevWarnAlways
 			        : SevError,
 			    "TSSMismatchChangeFeedStream");
 			summaryEvent.detail("TSSID", tssData.tssId)
 			    .detail("MismatchId", mismatchUID)
 			    .detail("FeedDebugUID", request.debugUID);
 		}
 	}
 }
 ACTOR Future<Void> changeFeedTSSValidator(ChangeFeedStreamRequest req,
                                          Optional<ChangeFeedTSSValidationData>* data,
                                          TSSEndpointData tssData) {
 	state bool ssDone = false;
 	state bool tssDone = false;
 	state std::deque<Version> ssSummary;
 	state std::deque<Version> tssSummary;
 	ASSERT(data->present());
 	state int64_t matchesFound = 0;
 	state Version lastMatchingVersion = req.begin - 1;
 	loop {
 		// If SS stream gets error, whole stream data gets reset, so it's ok to cancel this actor
 		if (!ssDone && ssSummary.empty()) {
 			try {
 				Version next = waitNext(data->get().ssStreamSummary.getFuture());
 				ssSummary.push_back(next);
 			} catch (Error& e) {
 				if (e.code() != error_code_end_of_stream) {
 					data->get().complete();
 					if (e.code() != error_code_operation_cancelled) {
 						tssData.metrics->ssError(e.code());
 					}
 					throw e;
 				}
 				ssDone = true;
 				if (tssDone) {
 					data->get().complete();
 					return Void();
 				}
 			}
 		}
 		if (!tssDone && tssSummary.empty()) {
 			try {
 				choose {
 					when(ChangeFeedStreamReply nextTss = waitNext(data->get().tssStream.getFuture())) {
 						data->get().updatePopped(nextTss.popVersion);
 						for (auto& it : nextTss.mutations) {
 							if (data->get().shouldAddMutation(it)) {
 								tssSummary.push_back(it.version);
 							}
 						}
 					}
 					// if ss has result, tss needs to return it
 					when(wait((ssDone || !ssSummary.empty()) ? delay(2.0 * FLOW_KNOBS->LOAD_BALANCE_TSS_TIMEOUT)
 					                                         : Never())) {
 						++tssData.metrics->tssTimeouts;
 						data->get().complete();
 						return Void();
 					}
 				}
 			} catch (Error& e) {
 				if (e.code() == error_code_operation_cancelled) {
 					throw e;
 				}
 				if (e.code() == error_code_end_of_stream) {
 					tssDone = true;
 					if (ssDone) {
 						data->get().complete();
 						return Void();
 					}
 				} else {
 					tssData.metrics->tssError(e.code());
 					data->get().complete();
 					return Void();
 				}
 			}
 		}
 		// handle rollbacks and concurrent pops
 		while (!ssSummary.empty() &&
 		       (ssSummary.front() < data->get().popVersion || data->get().isRolledBack(ssSummary.front()))) {
 			ssSummary.pop_front();
 		}
 		while (!tssSummary.empty() &&
 		       (tssSummary.front() < data->get().popVersion || data->get().isRolledBack(tssSummary.front()))) {
 			tssSummary.pop_front();
 		}
 		while (!ssSummary.empty() && !tssSummary.empty()) {
 			CODE_PROBE(true, "Comparing TSS change feed data");
 			if (ssSummary.front() != tssSummary.front()) {
 				CODE_PROBE(true, "TSS change feed mismatch");
 				handleTSSChangeFeedMismatch(req,
 				                            tssData,
 				                            matchesFound,
 				                            lastMatchingVersion,
 				                            ssSummary.front(),
 				                            tssSummary.front(),
 				                            data->get().popVersion);
 				data->get().complete();
 				return Void();
 			}
 			matchesFound++;
 			lastMatchingVersion = ssSummary.front();
 			ssSummary.pop_front();
 			tssSummary.pop_front();
 			while (!data->get().rollbacks.empty() && data->get().rollbacks.front().second <= lastMatchingVersion) {
 				data->get().rollbacks.pop_front();
 			}
 		}
 		ASSERT(!ssDone || !tssDone); // both shouldn't be done, otherwise we shouldn't have looped
 		if ((ssDone && !tssSummary.empty()) || (tssDone && !ssSummary.empty())) {
 			CODE_PROBE(true, "TSS change feed mismatch at end of stream");
 			handleTSSChangeFeedMismatch(req,
 			                            tssData,
 			                            matchesFound,
 			                            lastMatchingVersion,
 			                            ssDone ? -1 : ssSummary.front(),
 			                            tssDone ? -1 : tssSummary.front(),
 			                            data->get().popVersion);
 			data->get().complete();
 			return Void();
 		}
 	}
 }
 void maybeDuplicateTSSChangeFeedStream(ChangeFeedStreamRequest& req,
                                       const RequestStream<ChangeFeedStreamRequest>& stream,
                                       QueueModel* model,
                                       Optional<ChangeFeedTSSValidationData>* tssData) {
 	if (model) {
 		Optional<TSSEndpointData> tssPair = model->getTssData(stream.getEndpoint().token.first());
 		if (tssPair.present()) {
 			CODE_PROBE(true, "duplicating feed stream to TSS");
 			resetReply(req);
 			RequestStream<ChangeFeedStreamRequest> tssRequestStream(tssPair.get().endpoint);
 			*tssData = Optional<ChangeFeedTSSValidationData>(
 			    ChangeFeedTSSValidationData(tssRequestStream.getReplyStream(req)));
 			// tie validator actor to the lifetime of the stream being active
 			tssData->get().validatorFuture = changeFeedTSSValidator(req, tssData, tssPair.get());
 		}
 	}
 }
 ChangeFeedStorageData::~ChangeFeedStorageData() {
 	if (context) {
 		context->changeFeedUpdaters.erase(interfToken);
@ -9160,7 +9539,8 @@ ACTOR Future<Void> partialChangeFeedStream(StorageServerInterface interf,
                                           Version end,
                                           Reference<ChangeFeedData> feedData,
                                           Reference<ChangeFeedStorageData> storageData,
-                                           UID debugUID) {
+                                           UID debugUID,
                                           Optional<ChangeFeedTSSValidationData>* tssData) {
 	// calling lastReturnedVersion's callbacks could cause us to be cancelled
 	state Promise<Void> refresh = feedData->refresh;
@ -9204,6 +9584,9 @@ ACTOR Future<Void> partialChangeFeedStream(StorageServerInterface interf,
 					if (rep.popVersion > feedData->popVersion) {
 						feedData->popVersion = rep.popVersion;
 					}
 					if (tssData->present()) {
 						tssData->get().updatePopped(rep.popVersion);
 					}
 					if (lastEmpty != invalidVersion && !results.isEmpty()) {
 						for (auto& it : feedData->storageData) {
@ -9218,6 +9601,10 @@ ACTOR Future<Void> partialChangeFeedStream(StorageServerInterface interf,
 					while (resultLoc < rep.mutations.size()) {
 						wait(results.onEmpty());
 						if (rep.mutations[resultLoc].version >= nextVersion) {
 							if (tssData->present() && tssData->get().shouldAddMutation(rep.mutations[resultLoc])) {
 								tssData->get().ssStreamSummary.send(rep.mutations[resultLoc].version);
 							}
 							results.send(rep.mutations[resultLoc]);
 							if (DEBUG_CF_CLIENT_TRACE) {
@ -9414,6 +9801,11 @@ ACTOR Future<Void> mergeChangeFeedStream(Reference<DatabaseContext> db,
 	state std::vector<Future<Void>> fetchers(interfs.size());
 	state std::vector<Future<Void>> onErrors(interfs.size());
 	state std::vector<MutationAndVersionStream> streams(interfs.size());
 	state std::vector<Optional<ChangeFeedTSSValidationData>> tssDatas;
 	tssDatas.reserve(interfs.size());
 	for (int i = 0; i < interfs.size(); i++) {
 		tssDatas.push_back({});
 	}
 	CODE_PROBE(interfs.size() > 10, "Large change feed merge cursor");
 	CODE_PROBE(interfs.size() > 100, "Very large change feed merge cursor");
@ -9421,12 +9813,12 @@ ACTOR Future<Void> mergeChangeFeedStream(Reference<DatabaseContext> db,
 	state UID mergeCursorUID = UID();
 	state std::vector<UID> debugUIDs;
 	results->streams.clear();
-	for (auto& it : interfs) {
+	for (int i = 0; i < interfs.size(); i++) {
 		ChangeFeedStreamRequest req;
 		req.rangeID = rangeID;
 		req.begin = *begin;
 		req.end = end;
-		req.range = it.second;
+		req.range = interfs[i].second;
 		req.canReadPopped = canReadPopped;
 		// divide total buffer size among sub-streams, but keep individual streams large enough to be efficient
 		req.replyBufferSize = replyBufferSize / interfs.size();
@ -9438,7 +9830,11 @@ ACTOR Future<Void> mergeChangeFeedStream(Reference<DatabaseContext> db,
 		mergeCursorUID =
 		    UID(mergeCursorUID.first() ^ req.debugUID.first(), mergeCursorUID.second() ^ req.debugUID.second());
-		results->streams.push_back(it.first.changeFeedStream.getReplyStream(req));
+		results->streams.push_back(interfs[i].first.changeFeedStream.getReplyStream(req));
 		maybeDuplicateTSSChangeFeedStream(req,
 		                                  interfs[i].first.changeFeedStream,
 		                                  db->enableLocalityLoadBalance ? &db->queueModel : nullptr,
 		                                  &tssDatas[i]);
 	}
 	results->maxSeenVersion = invalidVersion;
@ -9475,7 +9871,8 @@ ACTOR Future<Void> mergeChangeFeedStream(Reference<DatabaseContext> db,
 		                                      end,
 		                                      results,
 		                                      results->storageData[i],
-		                                      debugUIDs[i]);
+		                                      debugUIDs[i],
 		                                      &tssDatas[i]);
 	}
 	wait(waitForAny(onErrors) || mergeChangeFeedStreamInternal(results, interfs, streams, begin, end, mergeCursorUID));
@ -9529,7 +9926,8 @@ ACTOR Future<Void> singleChangeFeedStreamInternal(KeyRange range,
                                                  Reference<ChangeFeedData> results,
                                                  Key rangeID,
                                                  Version* begin,
-                                                  Version end) {
+                                                  Version end,
                                                  Optional<ChangeFeedTSSValidationData>* tssData) {
 	state Promise<Void> refresh = results->refresh;
 	ASSERT(results->streams.size() == 1);
@ -9564,6 +9962,9 @@ ACTOR Future<Void> singleChangeFeedStreamInternal(KeyRange range,
 		if (feedReply.popVersion > results->popVersion) {
 			results->popVersion = feedReply.popVersion;
 		}
 		if (tssData->present()) {
 			tssData->get().updatePopped(feedReply.popVersion);
 		}
 		// don't send completely empty set of mutations to promise stream
 		bool anyMutations = false;
@ -9578,6 +9979,10 @@ ACTOR Future<Void> singleChangeFeedStreamInternal(KeyRange range,
 			// stream. Anything with mutations should be strictly greater than lastReturnedVersion
 			ASSERT(feedReply.mutations.front().version > results->lastReturnedVersion.get());
 			if (tssData->present()) {
 				tssData->get().send(feedReply);
 			}
 			results->mutations.send(
 			    Standalone<VectorRef<MutationsAndVersionRef>>(feedReply.mutations, feedReply.arena));
@ -9629,6 +10034,7 @@ ACTOR Future<Void> singleChangeFeedStream(Reference<DatabaseContext> db,
                                          bool canReadPopped) {
 	state Database cx(db);
 	state ChangeFeedStreamRequest req;
 	state Optional<ChangeFeedTSSValidationData> tssData;
 	req.rangeID = rangeID;
 	req.begin = *begin;
 	req.end = end;
@ -9662,7 +10068,11 @@ ACTOR Future<Void> singleChangeFeedStream(Reference<DatabaseContext> db,
 	}
 	refresh.send(Void());
-	wait(results->streams[0].onError() || singleChangeFeedStreamInternal(range, results, rangeID, begin, end));
+	maybeDuplicateTSSChangeFeedStream(
 	    req, interf.changeFeedStream, cx->enableLocalityLoadBalance ? &cx->queueModel : nullptr, &tssData);
 	wait(results->streams[0].onError() ||
 	     singleChangeFeedStreamInternal(range, results, rangeID, begin, end, &tssData));
 	return Void();
 }
@ -10008,6 +10418,8 @@ ACTOR Future<Void> popChangeFeedMutationsActor(Reference<DatabaseContext> db, Ke
 		return Void();
 	}
 	auto model = cx->enableLocalityLoadBalance ? &cx->queueModel : nullptr;
 	bool foundFailed = false;
 	for (int i = 0; i < locations.size() && !foundFailed; i++) {
 		for (int j = 0; j < locations[i].locations->size() && !foundFailed; j++) {
@ -10016,6 +10428,15 @@ ACTOR Future<Void> popChangeFeedMutationsActor(Reference<DatabaseContext> db, Ke
 			        .isFailed()) {
 				foundFailed = true;
 			}
 			// for now, if any of popping SS has a TSS pair, just always use backup method
 			if (model && model
 			                 ->getTssData(locations[i]
 			                                  .locations->get(j, &StorageServerInterface::changeFeedPop)
 			                                  .getEndpoint()
 			                                  .token.first())
 			                 .present()) {
 				foundFailed = true;
 			}
 		}
 	}
--- a/fdbclient/ServerKnobs.cpp
+++ b/fdbclient/ServerKnobs.cpp
@ -570,6 +570,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
 	init( RATEKEEPER_FAILURE_TIME,                               1.0 );
 	init( CONSISTENCYSCAN_FAILURE_TIME,                          1.0 );
 	init( BLOB_MANAGER_FAILURE_TIME,                             1.0 );
 	init( BLOB_MIGRATOR_FAILURE_TIME,                            1.0 );
 	init( REPLACE_INTERFACE_DELAY,                              60.0 );
 	init( REPLACE_INTERFACE_CHECK_DELAY,                         5.0 );
 	init( COORDINATOR_REGISTER_INTERVAL,                         5.0 );
@ -752,6 +753,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
 	init( FETCH_KEYS_PARALLELISM_FULL,                             6 );
 	init( FETCH_KEYS_LOWER_PRIORITY,                               0 );
 	init( SERVE_FETCH_CHECKPOINT_PARALLELISM,                      4 );
 	init( SERVE_AUDIT_STORAGE_PARALLELISM,                      2 );
 	init( CHANGE_FEED_DISK_READS_PARALLELISM,                   1000 ); if( randomize && BUGGIFY ) CHANGE_FEED_DISK_READS_PARALLELISM = 20;
 	init( BUGGIFY_BLOCK_BYTES,                                 10000 );
 	init( STORAGE_RECOVERY_VERSION_LAG_LIMIT,				2 * MAX_READ_TRANSACTION_LIFE_VERSIONS );
@ -987,8 +989,8 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
 	// Blob Metadata
 	init( BLOB_METADATA_CACHE_TTL, isSimulated ? 120 : 24 * 60 * 60 );
 	if ( randomize && BUGGIFY) { BLOB_METADATA_CACHE_TTL = deterministicRandom()->randomInt(50, 100); }
-	init( BLOB_METADATA_REFRESH_INTERVAL,   isSimulated ? 60 : 12 * 60 * 60 );
+	init( BLOB_METADATA_REFRESH_INTERVAL,   isSimulated ? 60 : 60 * 60 );
-	if ( randomize && BUGGIFY) { BLOB_METADATA_REFRESH_INTERVAL = deterministicRandom()->randomInt(20, 40); }
+	if ( randomize && BUGGIFY) { BLOB_METADATA_REFRESH_INTERVAL = deterministicRandom()->randomInt(5, 120); }
 	// HTTP KMS Connector
 	init( REST_KMS_CONNECTOR_KMS_DISCOVERY_URL_MODE,           "file");
@ -1003,6 +1005,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
 	// NOTE: Care must be taken when attempting to update below configurations for a up/running FDB cluster.
 	init( REST_KMS_CONNECTOR_DISCOVER_KMS_URL_FILE,                "");
 	init( REST_KMS_CONNECTOR_GET_ENCRYPTION_KEYS_ENDPOINT,         "");
 	init( REST_KMS_CONNECTOR_GET_BLOB_METADATA_ENDPOINT,           "");
 	// Details to fetch validation token from a localhost file
 	// acceptable format: "<token_name1>#<absolute_file_path1>,<token_name2>#<absolute_file_path2>,.."
 	// NOTE: 'token-name" can NOT contain '#' character
--- a/fdbclient/StorageServerInterface.cpp
+++ b/fdbclient/StorageServerInterface.cpp
@ -342,7 +342,7 @@ void TSS_traceMismatch(TraceEvent& event,
 // change feed
 template <>
 bool TSS_doCompare(const OverlappingChangeFeedsReply& src, const OverlappingChangeFeedsReply& tss) {
-	ASSERT(false);
+	// We duplicate for load, no need to validate replies
 	return true;
 }
--- a/fdbclient/SystemData.cpp
+++ b/fdbclient/SystemData.cpp
@ -286,6 +286,41 @@ const KeyRangeRef writeConflictRangeKeysRange = KeyRangeRef("\xff\xff/transactio
 const KeyRef clusterIdKey = "\xff/clusterId"_sr;
 const KeyRangeRef auditRange = KeyRangeRef("\xff/audit/"_sr, "\xff/audit0"_sr);
 const KeyRef auditPrefix = auditRange.begin;
 const Key auditRangeKey(const AuditType type, const UID& auditId, const KeyRef& key) {
 	BinaryWriter wr(Unversioned());
 	wr.serializeBytes(auditPrefix);
 	wr << static_cast<uint8_t>(type);
 	wr.serializeBytes("/"_sr);
 	wr << auditId;
 	wr.serializeBytes("/"_sr);
 	wr.serializeBytes(key);
 	return wr.toValue();
 }
 const Key auditRangePrefix(const AuditType type, const UID& auditId) {
 	BinaryWriter wr(Unversioned());
 	wr.serializeBytes(auditPrefix);
 	wr << static_cast<uint8_t>(type);
 	wr.serializeBytes("/"_sr);
 	wr << auditId;
 	wr.serializeBytes("/"_sr);
 	return wr.toValue();
 }
 const Value auditStorageStateValue(const AuditStorageState& auditStorageState) {
 	return ObjectWriter::toValue(auditStorageState, IncludeVersion());
 }
 AuditStorageState decodeAuditStorageState(const ValueRef& value) {
 	AuditStorageState auditState;
 	ObjectReader reader(value.begin(), IncludeVersion());
 	reader.deserialize(auditState);
 	return auditState;
 }
 const KeyRef checkpointPrefix = "\xff/checkpoint/"_sr;
 const Key checkpointKeyFor(UID checkpointID) {
@ -1629,6 +1664,9 @@ Key storageQuotaKey(StringRef tenantName) {
 	return tenantName.withPrefix(storageQuotaPrefix);
 }
 const KeyRangeRef idempotencyIdKeys("\xff\x02/idmp/"_sr, "\xff\x02/idmp0"_sr);
 const KeyRef idempotencyIdsExpiredVersion("\xff\x02/idmpExpiredVersion"_sr);
 // for tests
 void testSSISerdes(StorageServerInterface const& ssi) {
 	printf("ssi=\nid=%s\nlocality=%s\nisTss=%s\ntssId=%s\nacceptingRequests=%s\naddress=%s\ngetValue=%s\n\n\n",
--- a/fdbclient/Tuple.cpp
+++ b/fdbclient/Tuple.cpp
@ -99,6 +99,48 @@ Tuple Tuple::unpack(StringRef const& str, bool exclude_incomplete) {
 	return Tuple(str, exclude_incomplete);
 }
 std::string Tuple::tupleToString(const Tuple& tuple) {
 	std::string str;
 	if (tuple.size() > 1) {
 		str += "(";
 	}
 	for (int i = 0; i < tuple.size(); ++i) {
 		Tuple::ElementType type = tuple.getType(i);
 		if (type == Tuple::NULL_TYPE) {
 			str += "NULL";
 		} else if (type == Tuple::BYTES || type == Tuple::UTF8) {
 			if (type == Tuple::UTF8) {
 				str += "u";
 			}
 			str += "\'" + tuple.getString(i).printable() + "\'";
 		} else if (type == Tuple::INT) {
 			str += format("%ld", tuple.getInt(i));
 		} else if (type == Tuple::FLOAT) {
 			str += format("%f", tuple.getFloat(i));
 		} else if (type == Tuple::DOUBLE) {
 			str += format("%f", tuple.getDouble(i));
 		} else if (type == Tuple::BOOL) {
 			str += tuple.getBool(i) ? "true" : "false";
 		} else if (type == Tuple::VERSIONSTAMP) {
 			TupleVersionstamp versionstamp = tuple.getVersionstamp(i);
 			str += format("Transaction Version: '%ld', BatchNumber: '%hd', UserVersion : '%hd'",
 			              versionstamp.getVersion(),
 			              versionstamp.getBatchNumber(),
 			              versionstamp.getUserVersion());
 		} else {
 			ASSERT(false);
 		}
 		if (i < tuple.size() - 1) {
 			str += ", ";
 		}
 	}
 	if (tuple.size() > 1) {
 		str += ")";
 	}
 	return str;
 }
 Tuple Tuple::unpackUserType(StringRef const& str, bool exclude_incomplete) {
 	return Tuple(str, exclude_incomplete, true);
 }
--- a/fdbclient/include/fdbclient/Audit.h
+++ b/fdbclient/include/fdbclient/Audit.h
@ -0,0 +1,111 @@
 /*
 * Audit.h
 *
 * This source file is part of the FoundationDB open source project
 *
 * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef FDBCLIENT_AUDIT_H
 #define FDBCLIENT_AUDIT_H
 #pragma once
 #include "fdbclient/FDBTypes.h"
 #include "fdbrpc/fdbrpc.h"
 enum class AuditPhase : uint8_t {
 	Invalid = 0,
 	Running = 1,
 	Complete = 2,
 	Error = 3,
 	Failed = 4,
 };
 enum class AuditType : uint8_t {
 	Invalid = 0,
 	ValidateHA = 1,
 };
 struct AuditStorageState {
 	constexpr static FileIdentifier file_identifier = 13804340;
 	AuditStorageState() = default;
 	AuditStorageState(UID id, AuditType type) : id(id), type(static_cast<uint8_t>(type)) {}
 	template <class Ar>
 	void serialize(Ar& ar) {
 		serializer(ar, id, type, phase, error);
 	}
 	void setType(AuditType type) { this->type = static_cast<uint8_t>(this->type); }
 	AuditType getType() const { return static_cast<AuditType>(this->type); }
 	void setPhase(AuditPhase phase) { this->phase = static_cast<uint8_t>(phase); }
 	AuditPhase getPhase() const { return static_cast<AuditPhase>(this->phase); }
 	UID id;
 	uint8_t type;
 	uint8_t phase;
 	std::string error;
 };
 struct AuditStorageRequest {
 	constexpr static FileIdentifier file_identifier = 13804341;
 	AuditStorageRequest() = default;
 	AuditStorageRequest(UID id, KeyRange range, AuditType type)
 	  : id(id), range(range), type(static_cast<uint8_t>(type)) {}
 	void setType(AuditType type) { this->type = static_cast<uint8_t>(this->type); }
 	AuditType getType() const { return static_cast<AuditType>(this->type); }
 	template <class Ar>
 	void serialize(Ar& ar) {
 		serializer(ar, id, range, type, targetServers, reply);
 	}
 	UID id;
 	KeyRange range;
 	uint8_t type;
 	std::vector<UID> targetServers;
 	ReplyPromise<AuditStorageState> reply;
 };
 // Triggers an audit of the specific type, an audit id is returned if an audit is scheduled successfully.
 // If there is an running audit, the corresponding id will be returned, unless force is true;
 // When force is set, the ongoing audit will be cancelled, and a new audit will be scheduled.
 struct TriggerAuditRequest {
 	constexpr static FileIdentifier file_identifier = 1384445;
 	TriggerAuditRequest() = default;
 	TriggerAuditRequest(AuditType type, KeyRange range)
 	  : type(static_cast<uint8_t>(type)), range(range), force(false), async(false) {}
 	void setType(AuditType type) { this->type = static_cast<uint8_t>(this->type); }
 	AuditType getType() const { return static_cast<AuditType>(this->type); }
 	template <class Ar>
 	void serialize(Ar& ar) {
 		serializer(ar, type, range, force, async, reply);
 	}
 	uint8_t type;
 	KeyRange range;
 	bool force;
 	bool async;
 	ReplyPromise<UID> reply;
 };
 #endif
--- a/fdbclient/include/fdbclient/AuditUtils.actor.h
+++ b/fdbclient/include/fdbclient/AuditUtils.actor.h
@ -0,0 +1,34 @@
 /*
 * AuditUtils.actor.h
 *
 * This source file is part of the FoundationDB open source project
 *
 * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef FDBCLIENT_AUDITUTILS_ACTOR_H
 #define FDBCLIENT_AUDITUTILS_ACTOR_H
 #pragma once
 #include "fdbclient/Audit.h"
 #include "fdbclient/FDBTypes.h"
 #include "fdbrpc/fdbrpc.h"
 #include "flow/actorcompiler.h" // has to be last include
 ACTOR Future<Void> persistAuditStorageState(Key key, AuditStorageState auditState);
 #include "flow/unactorcompiler.h"
 #endif
--- a/fdbclient/include/fdbclient/BlobCipher.h
+++ b/fdbclient/include/fdbclient/BlobCipher.h
@ -106,6 +106,8 @@ public:
 	std::array<CounterSet, int(UsageType::MAX)> counterSets;
 };
 std::string toString(BlobCipherMetrics::UsageType type);
 // Encryption operations buffer management
 // Approach limits number of copies needed during encryption or decryption operations.
 // For encryption EncryptBuf is allocated using client supplied Arena and provided to AES library to capture
@ -185,7 +187,7 @@ struct hash<BlobCipherDetails> {
 #pragma pack(push, 1) // exact fit - no padding
 typedef struct BlobCipherEncryptHeader {
-	static constexpr int headerSize = 136;
+	static constexpr int headerSize = 104;
 	union {
 		struct {
 			uint8_t size; // reading first byte is sufficient to determine header
@ -210,29 +212,22 @@ typedef struct BlobCipherEncryptHeader {
 	// reads. FIPS compliance recommendation is to leverage cryptographic digest mechanism to generate 'authentication
 	// token' (crypto-secure) to protect against malicious tampering and/or bit rot/flip scenarios.
-	union {
+	// Encryption header support two modes of generation 'authentication tokens':
-		// Encryption header support two modes of generation 'authentication tokens':
+	// 1) SingleAuthTokenMode: the scheme generates single crypto-secrure auth token to protect {cipherText +
-		// 1) SingleAuthTokenMode: the scheme generates single crypto-secrure auth token to protect {cipherText +
+	// header} payload. Scheme is geared towards optimizing cost due to crypto-secure auth-token generation,
-		// header} payload. Scheme is geared towards optimizing cost due to crypto-secure auth-token generation,
+	// however, on decryption client needs to be read 'header' + 'encrypted-buffer' to validate the 'auth-token'.
-		// however, on decryption client needs to be read 'header' + 'encrypted-buffer' to validate the 'auth-token'.
+	// The scheme is ideal for usecases where payload represented by the encryptionHeader is not large and it is
-		// The scheme is ideal for usecases where payload represented by the encryptionHeader is not large and it is
+	// desirable to minimize CPU/latency penalty due to crypto-secure ops, such as: CommitProxies encrypted inline
-		// desirable to minimize CPU/latency penalty due to crypto-secure ops, such as: CommitProxies encrypted inline
+	// transactions, StorageServer encrypting pages etc.
-		// transactions, StorageServer encrypting pages etc. 2) MultiAuthTokenMode: Scheme generates separate authTokens
+	// SOMEDAY: Another potential scheme could be 'MultiAuthTokenMode': Scheme generates separate authTokens
-		// for 'encrypted buffer' & 'encryption-header'. The scheme is ideal where payload represented by
+	// for 'encrypted buffer' & 'encryption-header'. The scheme is ideal where payload represented by
-		// encryptionHeader is large enough such that it is desirable to optimize cost of upfront reading full
+	// encryptionHeader is large enough such that it is desirable to optimize cost of upfront reading full
-		// 'encrypted buffer', compared to reading only encryptionHeader and ensuring its sanity; for instance:
+	// 'encrypted buffer', compared to reading only encryptionHeader and ensuring its sanity; for instance:
-		// backup-files.
+	// backup-files.
-		struct {
+	struct {
-			// Cipher text authentication token
+		uint8_t authToken[AUTH_TOKEN_MAX_SIZE]{};
-			uint8_t cipherTextAuthToken[AUTH_TOKEN_MAX_SIZE]{};
+	} singleAuthToken;
 			uint8_t headerAuthToken[AUTH_TOKEN_MAX_SIZE]{};
 		} multiAuthTokens;
 		struct {
 			uint8_t authToken[AUTH_TOKEN_MAX_SIZE]{};
 			uint8_t _reserved[AUTH_TOKEN_MAX_SIZE]{};
 		} singleAuthToken;
 	};
 	BlobCipherEncryptHeader() {}
@ -628,10 +623,6 @@ private:
 	                                 const int ciphertextLen,
 	                                 const BlobCipherEncryptHeader& header,
 	                                 Arena& arena);
 	void verifyHeaderMultiAuthToken(const uint8_t* ciphertext,
 	                                const int ciphertextLen,
 	                                const BlobCipherEncryptHeader& header,
 	                                Arena& arena);
 };
 class HmacSha256DigestGen final : NonCopyable {
--- a/fdbclient/include/fdbclient/BlobConnectionProvider.h
+++ b/fdbclient/include/fdbclient/BlobConnectionProvider.h
@ -33,13 +33,15 @@ struct BlobConnectionProvider : NonCopyable, ReferenceCounted<BlobConnectionProv
 	// something returned from createForWrite
 	virtual Reference<BackupContainerFileSystem> getForRead(std::string filePath) = 0;
 	virtual bool isExpired() const = 0;
 	virtual bool needsRefresh() const = 0;
 	virtual void update(Standalone<BlobMetadataDetailsRef> newBlobMetadata) = 0;
 	virtual ~BlobConnectionProvider() {}
 	static Reference<BlobConnectionProvider> newBlobConnectionProvider(std::string blobUrl);
 	static Reference<BlobConnectionProvider> newBlobConnectionProvider(Standalone<BlobMetadataDetailsRef> blobMetadata);
 	// TODO add update impl
 };
 #endif
--- a/fdbclient/include/fdbclient/BlobMetadataUtils.h
+++ b/fdbclient/include/fdbclient/BlobMetadataUtils.h
@ -44,18 +44,41 @@ struct BlobMetadataDetailsRef {
 	Optional<StringRef> base;
 	VectorRef<StringRef> partitions;
 	// cache options
 	double refreshAt;
 	double expireAt;
 	BlobMetadataDetailsRef() {}
 	BlobMetadataDetailsRef(Arena& arena, const BlobMetadataDetailsRef& from)
-	  : domainId(from.domainId), domainName(arena, from.domainName), partitions(arena, from.partitions) {
+	  : domainId(from.domainId), domainName(arena, from.domainName), partitions(arena, from.partitions),
 	    refreshAt(from.refreshAt), expireAt(from.expireAt) {
 		if (from.base.present()) {
 			base = StringRef(arena, from.base.get());
 		}
 	}
 	explicit BlobMetadataDetailsRef(Arena& ar,
 	                                BlobMetadataDomainId domainId,
 	                                BlobMetadataDomainNameRef domainName,
 	                                Optional<StringRef> base,
 	                                VectorRef<StringRef> partitions,
 	                                int64_t refreshAt,
 	                                int64_t expireAt)
 	  : domainId(domainId), domainName(ar, domainName), partitions(ar, partitions), refreshAt(refreshAt),
 	    expireAt(expireAt) {
 		if (base.present()) {
 			base = StringRef(ar, base.get());
 		}
 	}
 	explicit BlobMetadataDetailsRef(BlobMetadataDomainId domainId,
 	                                BlobMetadataDomainNameRef domainName,
 	                                Optional<StringRef> base,
-	                                VectorRef<StringRef> partitions)
+	                                VectorRef<StringRef> partitions,
-	  : domainId(domainId), domainName(domainName), base(base), partitions(partitions) {}
+	                                double refreshAt,
 	                                double expireAt)
 	  : domainId(domainId), domainName(domainName), base(base), partitions(partitions), refreshAt(refreshAt),
 	    expireAt(expireAt) {}
 	int expectedSize() const {
 		return sizeof(BlobMetadataDetailsRef) + domainName.size() + (base.present() ? base.get().size() : 0) +
@ -64,7 +87,7 @@ struct BlobMetadataDetailsRef {
 	template <class Ar>
 	void serialize(Ar& ar) {
-		serializer(ar, domainId, domainName, base, partitions);
+		serializer(ar, domainId, domainName, base, partitions, refreshAt, expireAt);
 	}
 };
--- a/fdbclient/include/fdbclient/BuildIdempotencyIdMutations.h
+++ b/fdbclient/include/fdbclient/BuildIdempotencyIdMutations.h
@ -0,0 +1,58 @@
 /*
 * BuildIdempotencyIdMutations.h
 *
 * This source file is part of the FoundationDB open source project
 *
 * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef FDBCLIENT_BUILD_IDEMPOTENCY_ID_MUTATIONS_H
 #define FDBCLIENT_BUILD_IDEMPOTENCY_ID_MUTATIONS_H
 #include "fdbclient/CommitProxyInterface.h"
 #include "fdbclient/IdempotencyId.h"
 #pragma once
 // Iterate through trs looking for idempotency ids for committed transactions. Call onKvReady for each constructed key
 // value pair.
 template <class OnKVReady>
 void buildIdempotencyIdMutations(const std::vector<CommitTransactionRequest>& trs,
                                 IdempotencyIdKVBuilder& idempotencyKVBuilder,
                                 Version commitVersion,
                                 const std::vector<uint8_t>& committed,
                                 uint8_t committedValue,
                                 bool locked,
                                 const OnKVReady& onKvReady) {
 	idempotencyKVBuilder.setCommitVersion(commitVersion);
 	for (int h = 0; h < trs.size(); h += 256) {
 		int end = std::min<int>(trs.size() - h, 256);
 		for (int l = 0; l < end; ++l) {
 			uint16_t batchIndex = h + l;
 			if ((committed[batchIndex] == committedValue && (!locked || trs[batchIndex].isLockAware()))) {
 				const auto& idempotency_id = trs[batchIndex].idempotencyId;
 				if (idempotency_id.valid()) {
 					idempotencyKVBuilder.add(idempotency_id, batchIndex);
 				}
 			}
 		}
 		Optional<KeyValue> kv = idempotencyKVBuilder.buildAndClear();
 		if (kv.present()) {
 			onKvReady(kv.get());
 		}
 	}
 }
 #endif
--- a/fdbclient/include/fdbclient/ClientKnobs.h
+++ b/fdbclient/include/fdbclient/ClientKnobs.h
@ -199,6 +199,7 @@ public:
 	int32_t DEFAULT_MAX_GRV_PROXIES;
 	int32_t DEFAULT_AUTO_RESOLVERS;
 	int32_t DEFAULT_AUTO_LOGS;
 	bool DELETE_NATIVE_LIB_AFTER_LOADING;
 	double GLOBAL_CONFIG_REFRESH_BACKOFF;
 	double GLOBAL_CONFIG_REFRESH_MAX_BACKOFF;
--- a/fdbclient/include/fdbclient/ClusterInterface.h
+++ b/fdbclient/include/fdbclient/ClusterInterface.h
@ -40,6 +40,7 @@ struct ClusterInterface {
 	RequestStream<struct MoveShardRequest> moveShard;
 	RequestStream<struct RepairSystemDataRequest> repairSystemData;
 	RequestStream<struct SplitShardRequest> splitShard;
 	RequestStream<struct TriggerAuditRequest> triggerAudit;
 	bool operator==(ClusterInterface const& r) const { return id() == r.id(); }
 	bool operator!=(ClusterInterface const& r) const { return id() != r.id(); }
@ -51,7 +52,7 @@ struct ClusterInterface {
 		       databaseStatus.getFuture().isReady() || ping.getFuture().isReady() ||
 		       getClientWorkers.getFuture().isReady() || forceRecovery.getFuture().isReady() ||
 		       moveShard.getFuture().isReady() || repairSystemData.getFuture().isReady() ||
-		       splitShard.getFuture().isReady();
+		       splitShard.getFuture().isReady() || triggerAudit.getFuture().isReady();
 	}
 	void initEndpoints() {
@ -64,6 +65,7 @@ struct ClusterInterface {
 		moveShard.getEndpoint(TaskPriority::ClusterController);
 		repairSystemData.getEndpoint(TaskPriority::ClusterController);
 		splitShard.getEndpoint(TaskPriority::ClusterController);
 		triggerAudit.getEndpoint(TaskPriority::ClusterController);
 	}
 	template <class Ar>
@ -77,7 +79,8 @@ struct ClusterInterface {
 		           forceRecovery,
 		           moveShard,
 		           repairSystemData,
-		           splitShard);
+		           splitShard,
 		           triggerAudit);
 	}
 };
--- a/fdbclient/include/fdbclient/CommitProxyInterface.h
+++ b/fdbclient/include/fdbclient/CommitProxyInterface.h
@ -30,6 +30,7 @@
 #include "fdbclient/FDBTypes.h"
 #include "fdbclient/GlobalConfig.h"
 #include "fdbclient/GrvProxyInterface.h"
 #include "fdbclient/IdempotencyId.h"
 #include "fdbclient/StorageServerInterface.h"
 #include "fdbclient/TagThrottle.actor.h"
 #include "fdbclient/VersionVector.h"
@ -186,6 +187,7 @@ struct CommitTransactionRequest : TimedRequest {
 	Optional<UID> debugID;
 	Optional<ClientTrCommitCostEstimation> commitCostEstimation;
 	Optional<TagSet> tagSet;
 	IdempotencyIdRef idempotencyId;
 	TenantInfo tenantInfo;
@ -196,8 +198,17 @@ struct CommitTransactionRequest : TimedRequest {
 	template <class Ar>
 	void serialize(Ar& ar) {
-		serializer(
+		serializer(ar,
-		    ar, transaction, reply, flags, debugID, commitCostEstimation, tagSet, spanContext, tenantInfo, arena);
+		           transaction,
 		           reply,
 		           flags,
 		           debugID,
 		           commitCostEstimation,
 		           tagSet,
 		           spanContext,
 		           tenantInfo,
 		           idempotencyId,
 		           arena);
 	}
 };
@ -224,6 +235,7 @@ struct GetReadVersionReply : public BasicLoadBalancedReply {
 	bool rkBatchThrottled = false;
 	TransactionTagMap<ClientTagThrottleLimits> tagThrottleInfo;
 	double proxyTagThrottledDuration{ 0.0 };
 	VersionVector ssVersionVectorDelta;
 	UID proxyId; // GRV proxy ID to detect old GRV proxies at client side
@ -242,7 +254,8 @@ struct GetReadVersionReply : public BasicLoadBalancedReply {
 		           rkDefaultThrottled,
 		           rkBatchThrottled,
 		           ssVersionVectorDelta,
-		           proxyId);
+		           proxyId,
 		           proxyTagThrottledDuration);
 	}
 };
@ -267,6 +280,10 @@ struct GetReadVersionRequest : TimedRequest {
 	TransactionPriority priority;
 	TransactionTagMap<uint32_t> tags;
 	// Not serialized, because this field does not need to be sent to master.
 	// It is used for reporting to clients the amount of time spent delayed by
 	// the TagQueue
 	double proxyTagThrottledDuration{ 0.0 };
 	Optional<UID> debugID;
 	ReplyPromise<GetReadVersionReply> reply;
@ -303,6 +320,8 @@ struct GetReadVersionRequest : TimedRequest {
 	bool operator<(GetReadVersionRequest const& rhs) const { return priority < rhs.priority; }
 	bool isTagged() const { return !tags.empty(); }
 	template <class Ar>
 	void serialize(Ar& ar) {
 		serializer(ar, transactionCount, flags, tags, debugID, reply, spanContext, maxVersion);
--- a/fdbclient/include/fdbclient/GetEncryptCipherKeys.actor.h
+++ b/fdbclient/include/fdbclient/GetEncryptCipherKeys.actor.h
@ -59,11 +59,12 @@ Future<Void> onEncryptKeyProxyChange(Reference<AsyncVar<T> const> db) {
 ACTOR template <class T>
 Future<EKPGetLatestBaseCipherKeysReply> getUncachedLatestEncryptCipherKeys(Reference<AsyncVar<T> const> db,
-                                                                           EKPGetLatestBaseCipherKeysRequest request) {
+                                                                           EKPGetLatestBaseCipherKeysRequest request,
                                                                           BlobCipherMetrics::UsageType usageType) {
 	Optional<EncryptKeyProxyInterface> proxy = db->get().encryptKeyProxy;
 	if (!proxy.present()) {
 		// Wait for onEncryptKeyProxyChange.
-		TraceEvent("GetLatestEncryptCipherKeys_EncryptKeyProxyNotPresent");
+		TraceEvent("GetLatestEncryptCipherKeys_EncryptKeyProxyNotPresent").detail("UsageType", toString(usageType));
 		return Never();
 	}
 	request.reply.reset();
@ -117,7 +118,7 @@ Future<std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>>> getL
 	// Fetch any uncached cipher keys.
 	state double startTime = now();
 	loop choose {
-		when(EKPGetLatestBaseCipherKeysReply reply = wait(getUncachedLatestEncryptCipherKeys(db, request))) {
+		when(EKPGetLatestBaseCipherKeysReply reply = wait(getUncachedLatestEncryptCipherKeys(db, request, usageType))) {
 			// Insert base cipher keys into cache and construct result.
 			for (const EKPBaseCipherDetails& details : reply.baseCipherDetails) {
 				EncryptCipherDomainId domainId = details.encryptDomainId;
@ -167,11 +168,12 @@ Future<Reference<BlobCipherKey>> getLatestEncryptCipherKey(Reference<AsyncVar<T>
 ACTOR template <class T>
 Future<EKPGetBaseCipherKeysByIdsReply> getUncachedEncryptCipherKeys(Reference<AsyncVar<T> const> db,
-                                                                    EKPGetBaseCipherKeysByIdsRequest request) {
+                                                                    EKPGetBaseCipherKeysByIdsRequest request,
                                                                    BlobCipherMetrics::UsageType usageType) {
 	Optional<EncryptKeyProxyInterface> proxy = db->get().encryptKeyProxy;
 	if (!proxy.present()) {
 		// Wait for onEncryptKeyProxyChange.
-		TraceEvent("GetEncryptCipherKeys_EncryptKeyProxyNotPresent");
+		TraceEvent("GetEncryptCipherKeys_EncryptKeyProxyNotPresent").detail("UsageType", toString(usageType));
 		return Never();
 	}
 	request.reply.reset();
@ -232,7 +234,7 @@ Future<std::unordered_map<BlobCipherDetails, Reference<BlobCipherKey>>> getEncry
 	// Fetch any uncached cipher keys.
 	state double startTime = now();
 	loop choose {
-		when(EKPGetBaseCipherKeysByIdsReply reply = wait(getUncachedEncryptCipherKeys(db, request))) {
+		when(EKPGetBaseCipherKeysByIdsReply reply = wait(getUncachedEncryptCipherKeys(db, request, usageType))) {
 			std::unordered_map<BaseCipherIndex, EKPBaseCipherDetails, boost::hash<BaseCipherIndex>> baseCipherKeys;
 			for (const EKPBaseCipherDetails& baseDetails : reply.baseCipherDetails) {
 				BaseCipherIndex baseIdx = std::make_pair(baseDetails.encryptDomainId, baseDetails.baseCipherId);
--- a/fdbclient/include/fdbclient/IdempotencyId.h
+++ b/fdbclient/include/fdbclient/IdempotencyId.h
@ -0,0 +1,166 @@
 /*
 * IdempotencyId.h
 *
 * This source file is part of the FoundationDB open source project
 *
 * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef FDBCLIENT_IDEMPOTENCYID_H
 #define FDBCLIENT_IDEMPOTENCYID_H
 #pragma once
 #include "fdbclient/FDBTypes.h"
 #include "fdbclient/PImpl.h"
 #include "flow/Arena.h"
 #include "flow/IRandom.h"
 #include "flow/serialize.h"
 struct CommitResult {
 	Version commitVersion;
 	uint16_t batchIndex;
 };
 // See design/idempotency_ids.md for more information. Designed so that the common case of a random 16 byte id does not
 // usually require indirection. Either invalid or an id with length >= 16 and < 256.
 struct IdempotencyIdRef {
 	static constexpr auto file_identifier = 3858470;
 	// Create an invalid IdempotencyIdRef
 	IdempotencyIdRef() : first(0) {}
 	// Borrows memory from the StringRef
 	explicit IdempotencyIdRef(StringRef id) {
 		if (id.empty()) {
 			first = 0;
 			return;
 		}
 		ASSERT(id.size() >= 16);
 		ASSERT(id.size() < 256);
 		if (id.size() == 16 &&
 		    /* If it's 16 bytes but first < 256 we still need to use an indirection to avoid ambiguity. */
 		    reinterpret_cast<const uint64_t*>(id.begin())[0] >= 256) {
 			first = reinterpret_cast<const uint64_t*>(id.begin())[0];
 			second.id = reinterpret_cast<const uint64_t*>(id.begin())[1];
 		} else {
 			first = id.size();
 			second.ptr = id.begin();
 		}
 	}
 	IdempotencyIdRef(Arena& arena, IdempotencyIdRef t)
 	  : IdempotencyIdRef(t.valid() && t.indirect() ? StringRef(arena, t.asStringRefUnsafe()) : t.asStringRefUnsafe()) {}
 	int expectedSize() const {
 		if (valid() && indirect()) {
 			return first;
 		}
 		return 0;
 	}
 	bool operator==(const IdempotencyIdRef& other) const { return asStringRefUnsafe() == other.asStringRefUnsafe(); }
 	IdempotencyIdRef(IdempotencyIdRef&& other) = default;
 	IdempotencyIdRef& operator=(IdempotencyIdRef&& other) = default;
 	IdempotencyIdRef(const IdempotencyIdRef& other) = default;
 	IdempotencyIdRef& operator=(const IdempotencyIdRef& other) = default;
 	template <class Archive>
 	void serialize(Archive& ar) {
 		// Only support network messages/object serializer for now
 		ASSERT(false);
 	}
 	bool valid() const { return first != 0; }
 	// Result may reference this, so *this must outlive result.
 	StringRef asStringRefUnsafe() const {
 		if (!valid()) {
 			return StringRef();
 		}
 		if (indirect()) {
 			return StringRef(second.ptr, first);
 		} else {
 			return StringRef(reinterpret_cast<const uint8_t*>(this), sizeof(*this));
 		}
 	}
 private:
 	bool indirect() const { return first < 256; }
 	// first == 0 means this id is invalid. This representation is not ambiguous
 	// because if first < 256, then first is the length of the id, but a valid
 	// id as at least 16 bytes long.
 	uint64_t first;
 	union {
 		uint64_t id;
 		const uint8_t* ptr;
 	} second; // If first < 256, then ptr is valid. Otherwise id is valid.
 };
 using IdempotencyId = Standalone<IdempotencyIdRef>;
 namespace std {
 template <>
 struct hash<IdempotencyIdRef> {
 	std::size_t operator()(const IdempotencyIdRef& id) const { return std::hash<StringRef>{}(id.asStringRefUnsafe()); }
 };
 template <>
 struct hash<IdempotencyId> {
 	std::size_t operator()(const IdempotencyId& id) const { return std::hash<StringRef>{}(id.asStringRefUnsafe()); }
 };
 } // namespace std
 template <>
 struct dynamic_size_traits<IdempotencyIdRef> : std::true_type {
 	template <class Context>
 	static size_t size(const IdempotencyIdRef& t, Context&) {
 		return t.asStringRefUnsafe().size();
 	}
 	template <class Context>
 	static void save(uint8_t* out, const IdempotencyIdRef& t, Context&) {
 		StringRef s = t.asStringRefUnsafe();
 		std::copy(s.begin(), s.end(), out);
 	}
 	template <class Context>
 	static void load(const uint8_t* ptr, size_t sz, IdempotencyIdRef& id, Context& context) {
 		id = IdempotencyIdRef(StringRef(context.tryReadZeroCopy(ptr, sz), sz));
 	}
 };
 // The plan is to use this as a key in a potentially large hashtable, so it should be compact.
 static_assert(sizeof(IdempotencyIdRef) == 16);
 // Use in the commit proxy to construct a kv pair according to the format described in design/idempotency_ids.md
 struct IdempotencyIdKVBuilder : NonCopyable {
 	IdempotencyIdKVBuilder();
 	void setCommitVersion(Version commitVersion);
 	// All calls to add must share the same high order byte of batchIndex (until the next call to buildAndClear)
 	void add(const IdempotencyIdRef& id, uint16_t batchIndex);
 	// Must call setCommitVersion before calling buildAndClear. After calling buildAndClear, this object is ready to
 	// start a new kv pair for the high order byte of batchIndex.
 	Optional<KeyValue> buildAndClear();
 	~IdempotencyIdKVBuilder();
 private:
 	PImpl<struct IdempotencyIdKVBuilderImpl> impl;
 };
 // Check if id is present in kv, and if so return the commit version and batchIndex
 Optional<CommitResult> kvContainsIdempotencyId(const KeyValueRef& kv, const IdempotencyIdRef& id);
 #endif
--- a/fdbclient/include/fdbclient/ManagementAPI.actor.h
+++ b/fdbclient/include/fdbclient/ManagementAPI.actor.h
@ -138,6 +138,9 @@ ACTOR Future<int> setDDMode(Database cx, int mode);
 ACTOR Future<Void> forceRecovery(Reference<IClusterConnectionRecord> clusterFile, Standalone<StringRef> dcId);
 // Start an audit on range of the specific type.
 ACTOR Future<UID> auditStorage(Reference<IClusterConnectionRecord> clusterFile, KeyRange range, AuditType type);
 ACTOR Future<Void> printHealthyZone(Database cx);
 ACTOR Future<bool> clearHealthyZone(Database cx, bool printWarning = false, bool clearSSFailureZoneString = false);
 ACTOR Future<bool> setHealthyZone(Database cx, StringRef zoneId, double seconds, bool printWarning = false);
--- a/fdbclient/include/fdbclient/ServerKnobs.h
+++ b/fdbclient/include/fdbclient/ServerKnobs.h
@ -480,6 +480,7 @@ public:
 	double RATEKEEPER_FAILURE_TIME;
 	double CONSISTENCYSCAN_FAILURE_TIME;
 	double BLOB_MANAGER_FAILURE_TIME;
 	double BLOB_MIGRATOR_FAILURE_TIME;
 	double REPLACE_INTERFACE_DELAY;
 	double REPLACE_INTERFACE_CHECK_DELAY;
 	double COORDINATOR_REGISTER_INTERVAL;
@ -703,6 +704,7 @@ public:
 	int FETCH_KEYS_PARALLELISM_FULL;
 	int FETCH_KEYS_LOWER_PRIORITY;
 	int SERVE_FETCH_CHECKPOINT_PARALLELISM;
 	int SERVE_AUDIT_STORAGE_PARALLELISM;
 	int CHANGE_FEED_DISK_READS_PARALLELISM;
 	int BUGGIFY_BLOCK_BYTES;
 	int64_t STORAGE_RECOVERY_VERSION_LAG_LIMIT;
@ -973,6 +975,7 @@ public:
 	bool REST_KMS_CONNECTOR_REFRESH_KMS_URLS;
 	double REST_KMS_CONNECTOR_REFRESH_KMS_URLS_INTERVAL_SEC;
 	std::string REST_KMS_CONNECTOR_GET_ENCRYPTION_KEYS_ENDPOINT;
 	std::string REST_KMS_CONNECTOR_GET_BLOB_METADATA_ENDPOINT;
 	ServerKnobs(Randomize, ClientKnobs*, IsSimulated);
 	void initialize(Randomize, ClientKnobs*, IsSimulated);
--- a/fdbclient/include/fdbclient/StorageServerInterface.h
+++ b/fdbclient/include/fdbclient/StorageServerInterface.h
@ -22,6 +22,7 @@
 #define FDBCLIENT_STORAGESERVERINTERFACE_H
 #pragma once
 #include "fdbclient/Audit.h"
 #include "fdbclient/FDBTypes.h"
 #include "fdbclient/StorageCheckpoint.h"
 #include "fdbclient/StorageServerShard.h"
@ -120,8 +121,8 @@ struct StorageServerInterface {
 	RequestStream<struct GetCheckpointRequest> checkpoint;
 	RequestStream<struct FetchCheckpointRequest> fetchCheckpoint;
 	RequestStream<struct FetchCheckpointKeyValuesRequest> fetchCheckpointKeyValues;
 	RequestStream<struct UpdateCommitCostRequest> updateCommitCostRequest;
 	RequestStream<struct AuditStorageRequest> auditStorage;
 private:
 	bool acceptingRequests;
@ -195,6 +196,8 @@ public:
 				    getValue.getEndpoint().getAdjustedEndpoint(21));
 				updateCommitCostRequest =
 				    RequestStream<struct UpdateCommitCostRequest>(getValue.getEndpoint().getAdjustedEndpoint(22));
 				auditStorage =
 				    RequestStream<struct AuditStorageRequest>(getValue.getEndpoint().getAdjustedEndpoint(23));
 			}
 		} else {
 			ASSERT(Ar::isDeserializing);
@ -246,6 +249,7 @@ public:
 		streams.push_back(fetchCheckpoint.getReceiver());
 		streams.push_back(fetchCheckpointKeyValues.getReceiver());
 		streams.push_back(updateCommitCostRequest.getReceiver());
 		streams.push_back(auditStorage.getReceiver());
 		FlowTransport::transport().addEndpoints(streams);
 	}
 };
--- a/fdbclient/include/fdbclient/SystemData.h
+++ b/fdbclient/include/fdbclient/SystemData.h
@ -94,6 +94,13 @@ void decodeKeyServersValue(RangeResult result,
 extern const KeyRef clusterIdKey;
 extern const KeyRangeRef auditRange;
 extern const KeyRef auditPrefix;
 const Key auditRangeKey(const AuditType type, const UID& auditId, const KeyRef& key);
 const Key auditRangePrefix(const AuditType type, const UID& auditId);
 const Value auditStorageStateValue(const AuditStorageState& auditStorageState);
 AuditStorageState decodeAuditStorageState(const ValueRef& value);
 // "\xff/checkpoint/[[UID]] := [[CheckpointMetaData]]"
 extern const KeyRef checkpointPrefix;
 const Key checkpointKeyFor(UID checkpointID);
@ -708,6 +715,9 @@ extern const KeyRangeRef storageQuotaKeys;
 extern const KeyRef storageQuotaPrefix;
 Key storageQuotaKey(StringRef tenantName);
 extern const KeyRangeRef idempotencyIdKeys;
 extern const KeyRef idempotencyIdsExpiredVersion;
 #pragma clang diagnostic pop
 #endif
--- a/fdbclient/include/fdbclient/Tuple.h
+++ b/fdbclient/include/fdbclient/Tuple.h
@ -48,6 +48,7 @@ struct Tuple {
 	// Note that strings can't be incomplete because they are parsed such that the end of the packed
 	// byte string is considered the end of the string in lieu of a specific end.
 	static Tuple unpack(StringRef const& str, bool exclude_incomplete = false);
 	static std::string tupleToString(Tuple const& tuple);
 	static Tuple unpackUserType(StringRef const& str, bool exclude_incomplete = false);
 	Tuple& append(Tuple const& tuple);
--- a/fdbclient/vexillographer/fdb.options
+++ b/fdbclient/vexillographer/fdb.options
@ -201,6 +201,9 @@ description is not currently required but encouraged.
    <Option name="transaction_include_port_in_address" code="505"
            description="Deprecated. Addresses returned by get_addresses_for_key include the port when enabled. As of api version 630, this option is enabled by default and setting this has no effect."
            defaultFor="23"/>
    <Option name="transaction_automatic_idempotency" code="506"
            description="Set a random idempotency id for all transactions. See the transaction option description for more information." 
            defaultFor="505"/>
    <Option name="transaction_bypass_unreadable" code="700"
            description="Allows ``get`` operations to read from sections of keyspace that have become unreadable because of versionstamp operations. This sets the ``bypass_unreadable`` option of each transaction created by this database. See the transaction option description for more information."
            defaultFor="1100"/>
@ -273,6 +276,11 @@ description is not currently required but encouraged.
    <Option name="size_limit" code="503"
            paramType="Int" paramDescription="value in bytes"
            description="Set the transaction size limit in bytes. The size is calculated by combining the sizes of all keys and values written or mutated, all key ranges cleared, and all read and write conflict ranges. (In other words, it includes the total size of all data included in the request to the cluster to commit the transaction.) Large transactions can cause performance problems on FoundationDB clusters, so setting this limit to a smaller value than the default can help prevent the client from accidentally degrading the cluster's performance. This value must be at least 32 and cannot be set to higher than 10,000,000, the default transaction size limit." />
    <Option name="idempotency_id" code="504"
            paramType="String" paramDescription="Unique ID"
            description="Associate this transaction with this ID for the purpose of checking whether or not this transaction has already committed. Must be at least 16 bytes and less than 256 bytes." />
    <Option name="automatic_idempotency" code="505"
            description="Automatically assign a random 16 byte idempotency id for this transaction. Prevents commits from failing with ``commit_unknown_result``. WARNING: If you are also using the multiversion client or transaction timeouts, if either cluster_version_changed or transaction_timed_out was thrown during a commit, then that commit may have already succeeded or may succeed in the future." />
    <Option name="snapshot_ryw_enable" code="600"
            description="Snapshot read operations will see the results of writes done in the same transaction. This is the default behavior." />
    <Option name="snapshot_ryw_disable" code="601"
--- a/fdbrpc/Locality.cpp
+++ b/fdbrpc/Locality.cpp
@ -283,6 +283,15 @@ ProcessClass::Fitness ProcessClass::machineClassFitness(ClusterRole role) const
 		default:
 			return ProcessClass::NeverAssign;
 		}
 	case ProcessClass::BlobMigrator:
 		switch (_class) {
 		case ProcessClass::StatelessClass:
 			return ProcessClass::GoodFit;
 		case ProcessClass::MasterClass:
 			return ProcessClass::OkayFit;
 		default:
 			return ProcessClass::NeverAssign;
 		}
 	case ProcessClass::StorageCache:
 		switch (_class) {
 		case ProcessClass::StorageCacheClass:
--- a/fdbrpc/TokenCache.actor.cpp
+++ b/fdbrpc/TokenCache.actor.cpp
@ -8,6 +8,7 @@
 #include "flow/network.h"
 #include <boost/unordered_map.hpp>
 #include <boost/unordered_set.hpp>
 #include <fmt/format.h>
 #include <list>
@ -123,20 +124,70 @@ TEST_CASE("/fdbrpc/authz/LRUCache") {
 	return Void();
 }
-struct TokenCacheImpl {
+struct CacheEntry {
-	struct CacheEntry {
+	Arena arena;
-		Arena arena;
+	VectorRef<TenantNameRef> tenants;
-		VectorRef<TenantNameRef> tenants;
+	Optional<StringRef> tokenId;
-		double expirationTime = 0.0;
+	double expirationTime = 0.0;
-	};
+};
 struct AuditEntry {
 	NetworkAddress address;
 	Optional<Standalone<StringRef>> tokenId;
 	explicit AuditEntry(NetworkAddress const& address, CacheEntry const& cacheEntry)
 	  : address(address),
 	    tokenId(cacheEntry.tokenId.present() ? Standalone<StringRef>(cacheEntry.tokenId.get(), cacheEntry.arena)
 	                                         : Optional<Standalone<StringRef>>()) {}
 };
 bool operator==(AuditEntry const& lhs, AuditEntry const& rhs) {
 	return (lhs.address == rhs.address) && (lhs.tokenId.present() == rhs.tokenId.present()) &&
 	       (!lhs.tokenId.present() || lhs.tokenId.get() == rhs.tokenId.get());
 }
 std::size_t hash_value(AuditEntry const& value) {
 	std::size_t seed = 0;
 	boost::hash_combine(seed, value.address);
 	if (value.tokenId.present()) {
 		boost::hash_combine(seed, value.tokenId.get());
 	}
 	return seed;
 }
 struct TokenCacheImpl {
 	LRUCache<StringRef, CacheEntry> cache;
-	TokenCacheImpl() : cache(FLOW_KNOBS->TOKEN_CACHE_SIZE) {}
+	boost::unordered_set<AuditEntry> usedTokens;
 	Future<Void> auditor;
 	TokenCacheImpl();
 	bool validate(TenantNameRef tenant, StringRef token);
 	bool validateAndAdd(double currentTime, StringRef token, NetworkAddress const& peer);
 };
 ACTOR Future<Void> tokenCacheAudit(TokenCacheImpl* self) {
 	state boost::unordered_set<AuditEntry> audits;
 	state boost::unordered_set<AuditEntry>::iterator iter;
 	state double lastLoggedTime = 0;
 	loop {
 		auto const timeSinceLog = g_network->timer() - lastLoggedTime;
 		if (timeSinceLog < FLOW_KNOBS->AUDIT_TIME_WINDOW) {
 			wait(delay(FLOW_KNOBS->AUDIT_TIME_WINDOW - timeSinceLog));
 		}
 		lastLoggedTime = g_network->timer();
 		audits.swap(self->usedTokens);
 		for (iter = audits.begin(); iter != audits.end(); ++iter) {
 			CODE_PROBE(true, "Audit Logging Running");
 			TraceEvent("AuditTokenUsed").detail("Client", iter->address).detail("TokenId", iter->tokenId).log();
 			wait(yield());
 		}
 		audits.clear();
 	}
 }
 TokenCacheImpl::TokenCacheImpl() : cache(FLOW_KNOBS->TOKEN_CACHE_SIZE) {
 	auditor = tokenCacheAudit(this);
 }
 TokenCache::TokenCache() : impl(new TokenCacheImpl()) {}
 TokenCache::~TokenCache() {
 	delete impl;
@ -212,6 +263,9 @@ bool TokenCacheImpl::validateAndAdd(double currentTime, StringRef token, Network
 		for (auto tenant : t.tenants.get()) {
 			c.tenants.push_back_deep(c.arena, tenant);
 		}
 		if (t.tokenId.present()) {
 			c.tokenId = StringRef(c.arena, t.tokenId.get());
 		}
 		cache.insert(StringRef(c.arena, token), c);
 		return true;
 	}
@ -250,6 +304,8 @@ bool TokenCacheImpl::validate(TenantNameRef name, StringRef token) {
 		TraceEvent(SevWarn, "TenantTokenMismatch").detail("From", peer).detail("Tenant", name.toString());
 		return false;
 	}
 	// audit logging
 	usedTokens.insert(AuditEntry(peer, *cachedEntry.get()));
 	return true;
 }
--- a/fdbrpc/include/fdbrpc/Locality.h
+++ b/fdbrpc/include/fdbrpc/Locality.h
@ -50,6 +50,7 @@ struct ProcessClass {
 		BlobWorkerClass,
 		EncryptKeyProxyClass,
 		ConsistencyScanClass,
 		BlobMigratorClass,
 		InvalidClass = -1
 	};
@ -77,6 +78,7 @@ struct ProcessClass {
 	static_assert(ProcessClass::BlobWorkerClass == 19);
 	static_assert(ProcessClass::EncryptKeyProxyClass == 20);
 	static_assert(ProcessClass::ConsistencyScanClass == 21);
 	static_assert(ProcessClass::BlobMigratorClass == 22);
 	static_assert(ProcessClass::InvalidClass == -1);
 	enum Fitness {
@ -102,6 +104,7 @@ struct ProcessClass {
 		ConsistencyScan,
 		BlobManager,
 		BlobWorker,
 		BlobMigrator,
 		StorageCache,
 		Backup,
 		EncryptKeyProxy,
--- a/fdbserver/ApplyMetadataMutation.cpp
+++ b/fdbserver/ApplyMetadataMutation.cpp
@ -59,10 +59,9 @@ public:
 	                           const UID& dbgid_,
 	                           Arena& arena_,
 	                           const VectorRef<MutationRef>& mutations_,
-	                           IKeyValueStore* txnStateStore_,
+	                           IKeyValueStore* txnStateStore_)
 	                           Reference<AsyncVar<ServerDBInfo> const> db)
 	  : spanContext(spanContext_), dbgid(dbgid_), arena(arena_), mutations(mutations_), txnStateStore(txnStateStore_),
-	    confChange(dummyConfChange), dbInfo(db) {}
+	    confChange(dummyConfChange) {}
 	ApplyMetadataMutationsImpl(const SpanContext& spanContext_,
 	                           Arena& arena_,
@ -84,17 +83,16 @@ public:
 	    commit(proxyCommitData_.commit), cx(proxyCommitData_.cx), committedVersion(&proxyCommitData_.committedVersion),
 	    storageCache(&proxyCommitData_.storageCache), tag_popped(&proxyCommitData_.tag_popped),
 	    tssMapping(&proxyCommitData_.tssMapping), tenantMap(&proxyCommitData_.tenantMap),
-	    tenantIdIndex(&proxyCommitData_.tenantIdIndex), initialCommit(initialCommit_), dbInfo(proxyCommitData_.db) {}
+	    tenantIdIndex(&proxyCommitData_.tenantIdIndex), initialCommit(initialCommit_) {}
 	ApplyMetadataMutationsImpl(const SpanContext& spanContext_,
 	                           ResolverData& resolverData_,
-	                           const VectorRef<MutationRef>& mutations_,
+	                           const VectorRef<MutationRef>& mutations_)
 	                           Reference<AsyncVar<ServerDBInfo> const> db)
 	  : spanContext(spanContext_), dbgid(resolverData_.dbgid), arena(resolverData_.arena), mutations(mutations_),
 	    txnStateStore(resolverData_.txnStateStore), toCommit(resolverData_.toCommit),
 	    confChange(resolverData_.confChanges), logSystem(resolverData_.logSystem), popVersion(resolverData_.popVersion),
 	    keyInfo(resolverData_.keyInfo), storageCache(resolverData_.storageCache),
-	    initialCommit(resolverData_.initialCommit), forResolver(true), dbInfo(db) {}
+	    initialCommit(resolverData_.initialCommit), forResolver(true) {}
 private:
 	// The following variables are incoming parameters
@ -142,8 +140,6 @@ private:
 	// true if called from Resolver
 	bool forResolver = false;
 	Reference<AsyncVar<ServerDBInfo> const> dbInfo;
 private:
 	// The following variables are used internally
@ -164,7 +160,7 @@ private:
 private:
 	void writeMutation(const MutationRef& m) {
-		if (forResolver || !isEncryptionOpSupported(EncryptOperationType::TLOG_ENCRYPTION, dbInfo->get().client)) {
+		if (forResolver || !isEncryptionOpSupported(EncryptOperationType::TLOG_ENCRYPTION)) {
 			toCommit->writeTypedMessage(m);
 		} else {
 			ASSERT(cipherKeys != nullptr);
@ -1347,16 +1343,14 @@ void applyMetadataMutations(SpanContext const& spanContext,
 void applyMetadataMutations(SpanContext const& spanContext,
                            ResolverData& resolverData,
-                            const VectorRef<MutationRef>& mutations,
+                            const VectorRef<MutationRef>& mutations) {
-                            Reference<AsyncVar<ServerDBInfo> const> dbInfo) {
+	ApplyMetadataMutationsImpl(spanContext, resolverData, mutations).apply();
 	ApplyMetadataMutationsImpl(spanContext, resolverData, mutations, dbInfo).apply();
 }
 void applyMetadataMutations(SpanContext const& spanContext,
                            const UID& dbgid,
                            Arena& arena,
                            const VectorRef<MutationRef>& mutations,
-                            IKeyValueStore* txnStateStore,
+                            IKeyValueStore* txnStateStore) {
-                            Reference<AsyncVar<ServerDBInfo> const> dbInfo) {
+	ApplyMetadataMutationsImpl(spanContext, dbgid, arena, mutations, txnStateStore).apply();
 	ApplyMetadataMutationsImpl(spanContext, dbgid, arena, mutations, txnStateStore, dbInfo).apply();
 }
--- a/fdbserver/BlobGranuleServerCommon.actor.cpp
+++ b/fdbserver/BlobGranuleServerCommon.actor.cpp
@ -483,7 +483,7 @@ ACTOR Future<Void> loadBlobMetadataForTenants(
 					}
 					auto dataEntry = self->tenantData.rangeContaining(info->second.prefix);
 					ASSERT(dataEntry.begin() == info->second.prefix);
-					dataEntry.cvalue()->setBStore(BlobConnectionProvider::newBlobConnectionProvider(metadata));
+					dataEntry.cvalue()->updateBStore(metadata);
 				}
 				return Void();
 			}
@ -492,6 +492,14 @@ ACTOR Future<Void> loadBlobMetadataForTenants(
 	}
 }
 Future<Void> loadBlobMetadataForTenant(BGTenantMap* self,
                                       BlobMetadataDomainId domainId,
                                       BlobMetadataDomainName domainName) {
 	std::vector<std::pair<BlobMetadataDomainId, BlobMetadataDomainName>> toLoad;
 	toLoad.push_back({ domainId, domainName });
 	return loadBlobMetadataForTenants(self, toLoad);
 }
 // list of tenants that may or may not already exist
 void BGTenantMap::addTenants(std::vector<std::pair<TenantName, TenantMapEntry>> tenants) {
 	std::vector<std::pair<BlobMetadataDomainId, BlobMetadataDomainName>> tenantsToLoad;
@ -526,11 +534,41 @@ Optional<TenantMapEntry> BGTenantMap::getTenantById(int64_t id) {
 	}
 }
-// TODO: handle case where tenant isn't loaded yet
+// FIXME: batch requests for refresh?
-Reference<GranuleTenantData> BGTenantMap::getDataForGranule(const KeyRangeRef& keyRange) {
+// FIXME: don't double fetch if multiple accesses to refreshing/expired metadata
-	auto tenant = tenantData.rangeContaining(keyRange.begin);
+// FIXME: log warning if after refresh, data is still expired!
-	ASSERT(tenant.begin() <= keyRange.begin);
+ACTOR Future<Reference<GranuleTenantData>> getDataForGranuleActor(BGTenantMap* self, KeyRange keyRange) {
-	ASSERT(tenant.end() >= keyRange.end);
+	state int loopCount = 0;
 	loop {
 		loopCount++;
 		auto tenant = self->tenantData.rangeContaining(keyRange.begin);
 		ASSERT(tenant.begin() <= keyRange.begin);
 		ASSERT(tenant.end() >= keyRange.end);
-	return tenant.cvalue();
+		if (!tenant.cvalue().isValid() || !tenant.cvalue()->bstore.isValid()) {
 			return tenant.cvalue();
 		} else if (tenant.cvalue()->bstore->isExpired()) {
 			CODE_PROBE(true, "re-fetching expired blob metadata");
 			// fetch again
 			Future<Void> reload = loadBlobMetadataForTenant(self, tenant.cvalue()->entry.id, tenant->cvalue()->name);
 			wait(reload);
 			if (loopCount > 1) {
 				TraceEvent(SevWarn, "BlobMetadataStillExpired").suppressFor(5.0).detail("LoopCount", loopCount);
 				wait(delay(0.001));
 			}
 		} else {
 			// handle refresh in background if tenant needs refres
 			if (tenant.cvalue()->bstore->needsRefresh()) {
 				Future<Void> reload =
 				    loadBlobMetadataForTenant(self, tenant.cvalue()->entry.id, tenant->cvalue()->name);
 				self->addActor.send(reload);
 			}
 			return tenant.cvalue();
 		}
 	}
 }
 // TODO: handle case where tenant isn't loaded yet
 Future<Reference<GranuleTenantData>> BGTenantMap::getDataForGranule(const KeyRangeRef& keyRange) {
 	return getDataForGranuleActor(this, keyRange);
 }
--- a/fdbserver/BlobManager.actor.cpp
+++ b/fdbserver/BlobManager.actor.cpp
@ -562,11 +562,12 @@ ACTOR Future<BlobGranuleSplitPoints> alignKeys(Reference<BlobManagerData> bmData
 	state Transaction tr = Transaction(bmData->db);
 	state int idx = 1;
-	state Reference<GranuleTenantData> tenantData = bmData->tenantData.getDataForGranule(granuleRange);
+	state Reference<GranuleTenantData> tenantData;
 	wait(store(tenantData, bmData->tenantData.getDataForGranule(granuleRange)));
 	while (SERVER_KNOBS->BG_METADATA_SOURCE == "tenant" && !tenantData.isValid()) {
 		// this is a bit of a hack, but if we know this range is supposed to have a tenant, and it doesn't, just wait
 		wait(delay(1.0));
-		tenantData = bmData->tenantData.getDataForGranule(granuleRange);
+		wait(store(tenantData, bmData->tenantData.getDataForGranule(granuleRange)));
 	}
 	for (; idx < splits.size() - 1; idx++) {
 		loop {
@ -4212,7 +4213,8 @@ ACTOR Future<Reference<BlobConnectionProvider>> getBStoreForGranule(Reference<Bl
 		return self->bstore;
 	}
 	loop {
-		state Reference<GranuleTenantData> data = self->tenantData.getDataForGranule(granuleRange);
+		state Reference<GranuleTenantData> data;
 		wait(store(data, self->tenantData.getDataForGranule(granuleRange)));
 		if (data.isValid()) {
 			wait(data->bstoreLoaded.getFuture());
 			wait(delay(0));
--- a/fdbserver/BlobMigrator.actor.cpp
+++ b/fdbserver/BlobMigrator.actor.cpp
@ -0,0 +1,83 @@
 /*
 * BlobMigrator.actor.cpp
 *
 * This source file is part of the FoundationDB open source project
 *
 * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "fdbserver/BlobMigratorInterface.h"
 #include "fdbserver/Knobs.h"
 #include "flow/ActorCollection.h"
 #include "flow/FastRef.h"
 #include "flow/IRandom.h"
 #include "flow/flow.h"
 #include "fdbclient/StorageServerInterface.h"
 #include "fdbclient/BlobConnectionProvider.h"
 #include "fdbclient/FDBTypes.h"
 #include "fdbclient/KeyRangeMap.h"
 #include "fdbclient/SystemData.h"
 #include "fdbclient/NativeAPI.actor.h"
 #include "fdbserver/ServerDBInfo.actor.h"
 #include "fdbserver/WaitFailure.h"
 #include "flow/actorcompiler.h" // has to be last include
 // BlobMigrator manages data migration from blob storage to storage server. It implements a minimal set of
 // StorageServerInterface APIs which are needed for DataDistributor to start data migration.
 class BlobMigrator : public NonCopyable, public ReferenceCounted<BlobMigrator> {
 public:
 	BlobMigrator(Reference<AsyncVar<ServerDBInfo> const> dbInfo, BlobMigratorInterface interf)
 	  : blobMigratorInterf(interf), actors(false) {
 		if (!blobConn.isValid() && SERVER_KNOBS->BG_METADATA_SOURCE != "tenant") {
 			blobConn = BlobConnectionProvider::newBlobConnectionProvider(SERVER_KNOBS->BG_URL);
 		}
 		db = openDBOnServer(dbInfo, TaskPriority::DefaultEndpoint, LockAware::True);
 	}
 	~BlobMigrator() {}
 	ACTOR static Future<Void> start(Reference<BlobMigrator> self) {
 		self->actors.add(waitFailureServer(self->blobMigratorInterf.waitFailure.getFuture()));
 		loop {
 			choose {
 				when(HaltBlobMigratorRequest req = waitNext(self->blobMigratorInterf.haltBlobMigrator.getFuture())) {
 					req.reply.send(Void());
 					TraceEvent("BlobMigratorHalted", self->blobMigratorInterf.id()).detail("ReqID", req.requesterID);
 					break;
 				}
 				when(wait(self->actors.getResult())) {}
 			}
 		}
 		return Void();
 	}
 private:
 	Database db;
 	Reference<BlobConnectionProvider> blobConn;
 	BlobMigratorInterface blobMigratorInterf;
 	ActorCollection actors;
 };
 // Main entry point
 ACTOR Future<Void> blobMigrator(BlobMigratorInterface ssi, Reference<AsyncVar<ServerDBInfo> const> dbInfo) {
 	fmt::print("Start blob migrator {} \n", ssi.id().toString());
 	try {
 		Reference<BlobMigrator> self = makeReference<BlobMigrator>(dbInfo, ssi);
 		wait(BlobMigrator::start(self));
 	} catch (Error& e) {
 		fmt::print("unexpected blob migrator error {}\n", e.what());
 	}
 	return Void();
 }
--- a/fdbserver/BlobWorker.actor.cpp
+++ b/fdbserver/BlobWorker.actor.cpp
@ -225,8 +225,7 @@ struct BlobWorkerData : NonCopyable, ReferenceCounted<BlobWorkerData> {
 	    resnapshotLock(new FlowLock(SERVER_KNOBS->BLOB_WORKER_RESNAPSHOT_PARALLELISM)),
 	    deltaWritesLock(new FlowLock(SERVER_KNOBS->BLOB_WORKER_DELTA_FILE_WRITE_PARALLELISM)),
 	    stats(id, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, initialSnapshotLock, resnapshotLock, deltaWritesLock),
-	    isEncryptionEnabled(
+	    isEncryptionEnabled(isEncryptionOpSupported(EncryptOperationType::BLOB_GRANULE_ENCRYPTION)) {}
 	        isEncryptionOpSupported(EncryptOperationType::BLOB_GRANULE_ENCRYPTION, db->clientInfo->get())) {}
 	bool managerEpochOk(int64_t epoch) {
 		if (epoch < currentManagerEpoch) {
@ -367,7 +366,7 @@ ACTOR Future<BlobGranuleCipherKeysCtx> getLatestGranuleCipherKeys(Reference<Blob
                                                                  KeyRange keyRange,
                                                                  Arena* arena) {
 	state BlobGranuleCipherKeysCtx cipherKeysCtx;
-	state Reference<GranuleTenantData> tenantData = bwData->tenantData.getDataForGranule(keyRange);
+	state Reference<GranuleTenantData> tenantData = wait(bwData->tenantData.getDataForGranule(keyRange));
 	ASSERT(tenantData.isValid());
@ -1195,8 +1194,7 @@ ACTOR Future<BlobFileIndex> compactFromBlob(Reference<BlobWorkerData> bwData,
 			deltaF = files.deltaFiles[deltaIdx];
 			if (deltaF.cipherKeysMeta.present()) {
-				ASSERT(isEncryptionOpSupported(EncryptOperationType::BLOB_GRANULE_ENCRYPTION,
+				ASSERT(isEncryptionOpSupported(EncryptOperationType::BLOB_GRANULE_ENCRYPTION));
 				                               bwData->dbInfo->get().client));
 				BlobGranuleCipherKeysCtx keysCtx =
 				    wait(getGranuleCipherKeysFromKeysMeta(bwData, deltaF.cipherKeysMeta.get(), &filenameArena));
@ -4097,7 +4095,8 @@ ACTOR Future<Reference<BlobConnectionProvider>> loadBStoreForTenant(Reference<Bl
                                                                    KeyRange keyRange) {
 	state int retryCount = 0;
 	loop {
-		state Reference<GranuleTenantData> data = bwData->tenantData.getDataForGranule(keyRange);
+		state Reference<GranuleTenantData> data;
 		wait(store(data, bwData->tenantData.getDataForGranule(keyRange)));
 		if (data.isValid()) {
 			wait(data->bstoreLoaded.getFuture());
 			wait(delay(0));
--- a/fdbserver/ClusterController.actor.cpp
+++ b/fdbserver/ClusterController.actor.cpp
@ -29,6 +29,8 @@
 #include "fdbclient/DatabaseContext.h"
 #include "fdbrpc/FailureMonitor.h"
 #include "fdbclient/EncryptKeyProxyInterface.h"
 #include "fdbserver/BlobGranuleServerCommon.actor.h"
 #include "fdbserver/BlobMigratorInterface.h"
 #include "fdbserver/Knobs.h"
 #include "flow/ActorCollection.h"
 #include "fdbclient/ClusterConnectionMemoryRecord.h"
@ -198,6 +200,32 @@ struct BlobManagerSingleton : Singleton<BlobManagerInterface> {
 	}
 };
 struct BlobMigratorSingleton : Singleton<BlobMigratorInterface> {
 	BlobMigratorSingleton(const Optional<BlobMigratorInterface>& interface) : Singleton(interface) {}
 	Role getRole() const { return Role::BLOB_MIGRATOR; }
 	ProcessClass::ClusterRole getClusterRole() const { return ProcessClass::BlobMigrator; }
 	void setInterfaceToDbInfo(ClusterControllerData* cc) const {
 		if (interface.present()) {
 			TraceEvent("CCMG_SetInf", cc->id).detail("Id", interface.get().id());
 			cc->db.setBlobMigrator(interface.get());
 		}
 	}
 	void halt(ClusterControllerData* cc, Optional<Standalone<StringRef>> pid) const {
 		if (interface.present()) {
 			TraceEvent("CCMG_Halt", cc->id).detail("Id", interface.get().id());
 			cc->id_worker[pid].haltBlobMigrator =
 			    brokenPromiseToNever(interface.get().haltBlobMigrator.getReply(HaltBlobMigratorRequest(cc->id)));
 		}
 	}
 	void recruit(ClusterControllerData* cc) const {
 		cc->lastRecruitTime = now();
 		cc->recruitBlobMigrator.set(true);
 	}
 };
 struct EncryptKeyProxySingleton : Singleton<EncryptKeyProxyInterface> {
 	EncryptKeyProxySingleton(const Optional<EncryptKeyProxyInterface>& interface) : Singleton(interface) {}
@ -275,6 +303,7 @@ ACTOR Future<Void> clusterWatchDatabase(ClusterControllerData* cluster,
 			dbInfo.distributor = db->serverInfo->get().distributor;
 			dbInfo.ratekeeper = db->serverInfo->get().ratekeeper;
 			dbInfo.blobManager = db->serverInfo->get().blobManager;
 			dbInfo.blobMigrator = db->serverInfo->get().blobMigrator;
 			dbInfo.encryptKeyProxy = db->serverInfo->get().encryptKeyProxy;
 			dbInfo.consistencyScan = db->serverInfo->get().consistencyScan;
 			dbInfo.latencyBandConfig = db->serverInfo->get().latencyBandConfig;
@ -656,8 +685,12 @@ void checkBetterSingletons(ClusterControllerData* self) {
 	WorkerDetails newCSWorker = findNewProcessForSingleton(self, ProcessClass::ConsistencyScan, id_used);
 	WorkerDetails newBMWorker;
 	WorkerDetails newMGWorker;
 	if (self->db.blobGranulesEnabled.get()) {
 		newBMWorker = findNewProcessForSingleton(self, ProcessClass::BlobManager, id_used);
 		if (isFullRestoreMode()) {
 			newMGWorker = findNewProcessForSingleton(self, ProcessClass::BlobMigrator, id_used);
 		}
 	}
 	WorkerDetails newEKPWorker;
@ -671,8 +704,12 @@ void checkBetterSingletons(ClusterControllerData* self) {
 	auto bestFitnessForCS = findBestFitnessForSingleton(self, newCSWorker, ProcessClass::ConsistencyScan);
 	ProcessClass::Fitness bestFitnessForBM;
 	ProcessClass::Fitness bestFitnessForMG;
 	if (self->db.blobGranulesEnabled.get()) {
 		bestFitnessForBM = findBestFitnessForSingleton(self, newBMWorker, ProcessClass::BlobManager);
 		if (isFullRestoreMode()) {
 			bestFitnessForMG = findBestFitnessForSingleton(self, newMGWorker, ProcessClass::BlobManager);
 		}
 	}
 	ProcessClass::Fitness bestFitnessForEKP;
@ -685,6 +722,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
 	auto ddSingleton = DataDistributorSingleton(db.distributor);
 	ConsistencyScanSingleton csSingleton(db.consistencyScan);
 	BlobManagerSingleton bmSingleton(db.blobManager);
 	BlobMigratorSingleton mgSingleton(db.blobMigrator);
 	EncryptKeyProxySingleton ekpSingleton(db.encryptKeyProxy);
 	// Check if the singletons are healthy.
@ -699,9 +737,14 @@ void checkBetterSingletons(ClusterControllerData* self) {
 	    self, newCSWorker, csSingleton, bestFitnessForCS, self->recruitingConsistencyScanID);
 	bool bmHealthy = true;
 	bool mgHealthy = true;
 	if (self->db.blobGranulesEnabled.get()) {
 		bmHealthy = isHealthySingleton<BlobManagerInterface>(
 		    self, newBMWorker, bmSingleton, bestFitnessForBM, self->recruitingBlobManagerID);
 		if (isFullRestoreMode()) {
 			mgHealthy = isHealthySingleton<BlobMigratorInterface>(
 			    self, newMGWorker, mgSingleton, bestFitnessForMG, self->recruitingBlobMigratorID);
 		}
 	}
 	bool ekpHealthy = true;
@ -711,7 +754,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
 	}
 	// if any of the singletons are unhealthy (rerecruited or not stable), then do not
 	// consider any further re-recruitments
-	if (!(rkHealthy && ddHealthy && bmHealthy && ekpHealthy && csHealthy)) {
+	if (!(rkHealthy && ddHealthy && bmHealthy && ekpHealthy && csHealthy && mgHealthy)) {
 		return;
 	}
@ -725,9 +768,14 @@ void checkBetterSingletons(ClusterControllerData* self) {
 	Optional<Standalone<StringRef>> newCSProcessId = newCSWorker.interf.locality.processId();
 	Optional<Standalone<StringRef>> currBMProcessId, newBMProcessId;
 	Optional<Standalone<StringRef>> currMGProcessId, newMGProcessId;
 	if (self->db.blobGranulesEnabled.get()) {
 		currBMProcessId = bmSingleton.interface.get().locality.processId();
 		newBMProcessId = newBMWorker.interf.locality.processId();
 		if (isFullRestoreMode()) {
 			currMGProcessId = mgSingleton.interface.get().locality.processId();
 			newMGProcessId = newMGWorker.interf.locality.processId();
 		}
 	}
 	Optional<Standalone<StringRef>> currEKPProcessId, newEKPProcessId;
@ -741,6 +789,10 @@ void checkBetterSingletons(ClusterControllerData* self) {
 	if (self->db.blobGranulesEnabled.get()) {
 		currPids.emplace_back(currBMProcessId);
 		newPids.emplace_back(newBMProcessId);
 		if (isFullRestoreMode()) {
 			currPids.emplace_back(currMGProcessId);
 			newPids.emplace_back(newMGProcessId);
 		}
 	}
 	if (SERVER_KNOBS->ENABLE_ENCRYPTION) {
@ -755,6 +807,10 @@ void checkBetterSingletons(ClusterControllerData* self) {
 	if (!self->db.blobGranulesEnabled.get()) {
 		ASSERT(currColocMap[currBMProcessId] == 0);
 		ASSERT(newColocMap[newBMProcessId] == 0);
 		if (isFullRestoreMode()) {
 			ASSERT(currColocMap[currMGProcessId] == 0);
 			ASSERT(newColocMap[newMGProcessId] == 0);
 		}
 	}
 	// if the knob is disabled, the EKP coloc counts should have no affect on the coloc counts check below
@ -767,6 +823,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
 	if (newColocMap[newRKProcessId] <= currColocMap[currRKProcessId] &&
 	    newColocMap[newDDProcessId] <= currColocMap[currDDProcessId] &&
 	    newColocMap[newBMProcessId] <= currColocMap[currBMProcessId] &&
 	    newColocMap[newMGProcessId] <= currColocMap[currMGProcessId] &&
 	    newColocMap[newEKPProcessId] <= currColocMap[currEKPProcessId] &&
 	    newColocMap[newCSProcessId] <= currColocMap[currCSProcessId]) {
 		// rerecruit the singleton for which we have found a better process, if any
@ -776,6 +833,9 @@ void checkBetterSingletons(ClusterControllerData* self) {
 			ddSingleton.recruit(self);
 		} else if (self->db.blobGranulesEnabled.get() && newColocMap[newBMProcessId] < currColocMap[currBMProcessId]) {
 			bmSingleton.recruit(self);
 		} else if (self->db.blobGranulesEnabled.get() && isFullRestoreMode() &&
 		           newColocMap[newMGProcessId] < currColocMap[currMGProcessId]) {
 			mgSingleton.recruit(self);
 		} else if (SERVER_KNOBS->ENABLE_ENCRYPTION && newColocMap[newEKPProcessId] < currColocMap[currEKPProcessId]) {
 			ekpSingleton.recruit(self);
 		} else if (newColocMap[newCSProcessId] < currColocMap[currCSProcessId]) {
@ -1330,12 +1390,18 @@ ACTOR Future<Void> registerWorker(RegisterWorkerRequest req,
 		    self, w, currSingleton, registeringSingleton, self->recruitingRatekeeperID);
 	}
-	if (self->db.blobGranulesEnabled.get() && req.blobManagerInterf.present()) {
+	if (self->db.blobGranulesEnabled.get() && isFullRestoreMode() && req.blobManagerInterf.present()) {
 		auto currSingleton = BlobManagerSingleton(self->db.serverInfo->get().blobManager);
 		auto registeringSingleton = BlobManagerSingleton(req.blobManagerInterf);
 		haltRegisteringOrCurrentSingleton<BlobManagerInterface>(
 		    self, w, currSingleton, registeringSingleton, self->recruitingBlobManagerID);
 	}
 	if (req.blobMigratorInterf.present()) {
 		auto currSingleton = BlobMigratorSingleton(self->db.serverInfo->get().blobMigrator);
 		auto registeringSingleton = BlobMigratorSingleton(req.blobMigratorInterf);
 		haltRegisteringOrCurrentSingleton<BlobMigratorInterface>(
 		    self, w, currSingleton, registeringSingleton, self->recruitingBlobMigratorID);
 	}
 	if (SERVER_KNOBS->ENABLE_ENCRYPTION && req.encryptKeyProxyInterf.present()) {
 		auto currSingleton = EncryptKeyProxySingleton(self->db.serverInfo->get().encryptKeyProxy);
@ -2013,6 +2079,53 @@ ACTOR Future<Void> handleForcedRecoveries(ClusterControllerData* self, ClusterCo
 	}
 }
 ACTOR Future<Void> triggerAuditStorage(ClusterControllerData* self, TriggerAuditRequest req) {
 	TraceEvent(SevInfo, "CCTriggerAuditStorageBegin", self->id)
 	    .detail("Range", req.range)
 	    .detail("AuditType", req.type);
 	state UID auditId;
 	try {
 		while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS ||
 		       !self->db.serverInfo->get().distributor.present()) {
 			wait(self->db.serverInfo->onChange());
 		}
 		TriggerAuditRequest fReq(req.getType(), req.range);
 		UID auditId_ = wait(self->db.serverInfo->get().distributor.get().triggerAudit.getReply(fReq));
 		auditId = auditId_;
 		TraceEvent(SevDebug, "CCTriggerAuditStorageEnd", self->id)
 		    .detail("AuditID", auditId)
 		    .detail("Range", req.range)
 		    .detail("AuditType", req.type);
 		if (!req.reply.isSet()) {
 			req.reply.send(auditId);
 		}
 	} catch (Error& e) {
 		TraceEvent(SevDebug, "CCTriggerAuditStorageError", self->id)
 		    .errorUnsuppressed(e)
 		    .detail("AuditID", auditId)
 		    .detail("Range", req.range)
 		    .detail("AuditType", req.type);
 		if (!req.reply.isSet()) {
 			req.reply.sendError(audit_storage_failed());
 		}
 	}
 	return Void();
 }
 ACTOR Future<Void> handleTriggerAuditStorage(ClusterControllerData* self, ClusterControllerFullInterface interf) {
 	loop {
 		TriggerAuditRequest req = waitNext(interf.clientInterface.triggerAudit.getFuture());
 		TraceEvent(SevDebug, "TriggerAuditStorageReceived", self->id)
 		    .detail("ClusterControllerDcId", self->clusterControllerDcId)
 		    .detail("Range", req.range)
 		    .detail("AuditType", req.type);
 		self->addActor.send(triggerAuditStorage(self, req));
 	}
 }
 struct SingletonRecruitThrottler {
 	double lastRecruitStart;
@ -2426,6 +2539,104 @@ ACTOR Future<int64_t> getNextBMEpoch(ClusterControllerData* self) {
 	}
 }
 ACTOR Future<Void> startBlobMigrator(ClusterControllerData* self, double waitTime) {
 	// If master fails at the same time, give it a chance to clear master PID.
 	// Also wait to avoid too many consecutive recruits in a small time window.
 	wait(delay(waitTime));
 	TraceEvent("CCStartBlobMigrator", self->id).log();
 	loop {
 		try {
 			state bool noBlobMigrator = !self->db.serverInfo->get().blobMigrator.present();
 			while (!self->masterProcessId.present() ||
 			       self->masterProcessId != self->db.serverInfo->get().master.locality.processId() ||
 			       self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
 				wait(self->db.serverInfo->onChange() || delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY));
 			}
 			if (noBlobMigrator && self->db.serverInfo->get().blobMigrator.present()) {
 				// Existing instance registers while waiting, so skip.
 				return Void();
 			}
 			std::map<Optional<Standalone<StringRef>>, int> id_used = self->getUsedIds();
 			WorkerFitnessInfo blobMigratorWorker = self->getWorkerForRoleInDatacenter(self->clusterControllerDcId,
 			                                                                          ProcessClass::BlobMigrator,
 			                                                                          ProcessClass::NeverAssign,
 			                                                                          self->db.config,
 			                                                                          id_used);
 			InitializeBlobMigratorRequest req(deterministicRandom()->randomUniqueID());
 			state WorkerDetails worker = blobMigratorWorker.worker;
 			if (self->onMasterIsBetter(worker, ProcessClass::BlobMigrator)) {
 				worker = self->id_worker[self->masterProcessId.get()].details;
 			}
 			self->recruitingBlobMigratorID = req.reqId;
 			TraceEvent("CCRecruitBlobMigrator", self->id)
 			    .detail("Addr", worker.interf.address())
 			    .detail("MGID", req.reqId);
 			ErrorOr<BlobMigratorInterface> interf = wait(worker.interf.blobMigrator.getReplyUnlessFailedFor(
 			    req, SERVER_KNOBS->WAIT_FOR_BLOB_MANAGER_JOIN_DELAY, 0));
 			if (interf.present()) {
 				self->recruitBlobMigrator.set(false);
 				self->recruitingBlobMigratorID = interf.get().id();
 				const auto& blobMigrator = self->db.serverInfo->get().blobMigrator;
 				TraceEvent("CCBlobMigratorRecruited", self->id)
 				    .detail("Addr", worker.interf.address())
 				    .detail("MGID", interf.get().id());
 				if (blobMigrator.present() && blobMigrator.get().id() != interf.get().id() &&
 				    self->id_worker.count(blobMigrator.get().locality.processId())) {
 					TraceEvent("CCHaltBlobMigratorAfterRecruit", self->id)
 					    .detail("MGID", blobMigrator.get().id())
 					    .detail("DcID", printable(self->clusterControllerDcId));
 					BlobMigratorSingleton(blobMigrator).halt(self, blobMigrator.get().locality.processId());
 				}
 				if (!blobMigrator.present() || blobMigrator.get().id() != interf.get().id()) {
 					self->db.setBlobMigrator(interf.get());
 				}
 				checkOutstandingRequests(self);
 				return Void();
 			}
 		} catch (Error& e) {
 			TraceEvent("CCBlobMigratorRecruitError", self->id).error(e);
 			if (e.code() != error_code_no_more_servers) {
 				throw;
 			}
 		}
 		wait(lowPriorityDelay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY));
 	}
 }
 ACTOR Future<Void> monitorBlobMigrator(ClusterControllerData* self) {
 	state SingletonRecruitThrottler recruitThrottler;
 	while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
 		wait(self->db.serverInfo->onChange());
 	}
 	loop {
 		if (self->db.serverInfo->get().blobMigrator.present() && !self->recruitBlobMigrator.get()) {
 			state Future<Void> wfClient = waitFailureClient(self->db.serverInfo->get().blobMigrator.get().waitFailure,
 			                                                SERVER_KNOBS->BLOB_MIGRATOR_FAILURE_TIME);
 			loop {
 				choose {
 					when(wait(wfClient)) {
 						TraceEvent("CCBlobMigratorDied", self->id)
 						    .detail("MGID", self->db.serverInfo->get().blobMigrator.get().id());
 						self->db.clearInterf(ProcessClass::BlobMigratorClass);
 						break;
 					}
 					when(wait(self->recruitBlobMigrator.onChange())) {}
 				}
 			}
 		} else if (self->db.blobGranulesEnabled.get() && isFullRestoreMode()) {
 			// if there is no blob migrator present but blob granules are now enabled, recruit a BM
 			wait(startBlobMigrator(self, recruitThrottler.newRecruitment()));
 		} else {
 			wait(self->db.blobGranulesEnabled.onChange());
 		}
 	}
 }
 ACTOR Future<Void> startBlobManager(ClusterControllerData* self, double waitTime) {
 	// If master fails at the same time, give it a chance to clear master PID.
 	// Also wait to avoid too many consecutive recruits in a small time window.
@ -2552,6 +2763,10 @@ ACTOR Future<Void> monitorBlobManager(ClusterControllerData* self) {
 							const auto& blobManager = self->db.serverInfo->get().blobManager;
 							BlobManagerSingleton(blobManager)
 							    .haltBlobGranules(self, blobManager.get().locality.processId());
 							if (isFullRestoreMode()) {
 								const auto& blobMigrator = self->db.serverInfo->get().blobMigrator;
 								BlobMigratorSingleton(blobMigrator).halt(self, blobMigrator.get().locality.processId());
 							}
 							break;
 						}
 					}
@ -2782,9 +2997,11 @@ ACTOR Future<Void> clusterControllerCore(ClusterControllerFullInterface interf,
 	self.addActor.send(updatedChangedDatacenters(&self));
 	self.addActor.send(updateDatacenterVersionDifference(&self));
 	self.addActor.send(handleForcedRecoveries(&self, interf));
 	self.addActor.send(handleTriggerAuditStorage(&self, interf));
 	self.addActor.send(monitorDataDistributor(&self));
 	self.addActor.send(monitorRatekeeper(&self));
 	self.addActor.send(monitorBlobManager(&self));
 	self.addActor.send(monitorBlobMigrator(&self));
 	self.addActor.send(watchBlobGranulesConfigKey(&self));
 	self.addActor.send(monitorConsistencyScan(&self));
 	self.addActor.send(metaclusterMetricsUpdater(&self));
--- a/fdbserver/ClusterRecovery.actor.cpp
+++ b/fdbserver/ClusterRecovery.actor.cpp
@ -1056,18 +1056,19 @@ ACTOR Future<Void> readTransactionSystemState(Reference<ClusterRecoveryData> sel
 	// Sets self->configuration to the configuration (FF/conf/ keys) at self->lastEpochEnd
 	// Recover transaction state store
 	bool enableEncryptionForTxnStateStore = isEncryptionOpSupported(EncryptOperationType::TLOG_ENCRYPTION);
 	CODE_PROBE(enableEncryptionForTxnStateStore, "Enable encryption for txnStateStore");
 	if (self->txnStateStore)
 		self->txnStateStore->close();
 	self->txnStateLogAdapter = openDiskQueueAdapter(oldLogSystem, myLocality, txsPoppedVersion);
-	self->txnStateStore = keyValueStoreLogSystem(
+	self->txnStateStore = keyValueStoreLogSystem(self->txnStateLogAdapter,
-	    self->txnStateLogAdapter,
+	                                             self->dbInfo,
-	    self->dbInfo,
+	                                             self->dbgid,
-	    self->dbgid,
+	                                             self->memoryLimit,
-	    self->memoryLimit,
+	                                             false,
-	    false,
+	                                             false,
-	    false,
+	                                             true,
-	    true,
+	                                             enableEncryptionForTxnStateStore);
 	    isEncryptionOpSupported(EncryptOperationType::TLOG_ENCRYPTION, self->dbInfo->get().client));
 	// Version 0 occurs at the version epoch. The version epoch is the number
 	// of microseconds since the Unix epoch. It can be set through fdbcli.
@ -1688,8 +1689,7 @@ ACTOR Future<Void> clusterRecoveryCore(Reference<ClusterRecoveryData> self) {
 	                       self->dbgid,
 	                       recoveryCommitRequest.arena,
 	                       tr.mutations.slice(mmApplied, tr.mutations.size()),
-	                       self->txnStateStore,
+	                       self->txnStateStore);
 	                       self->dbInfo);
 	mmApplied = tr.mutations.size();
 	tr.read_snapshot = self->recoveryTransactionVersion; // lastEpochEnd would make more sense, but isn't in the initial
--- a/fdbserver/CommitProxyServer.actor.cpp
+++ b/fdbserver/CommitProxyServer.actor.cpp
@ -24,9 +24,11 @@
 #include "fdbclient/Atomic.h"
 #include "fdbclient/BackupAgent.actor.h"
 #include "fdbclient/BlobCipher.h"
 #include "fdbclient/BuildIdempotencyIdMutations.h"
 #include "fdbclient/CommitTransaction.h"
 #include "fdbclient/DatabaseContext.h"
 #include "fdbclient/FDBTypes.h"
 #include "fdbclient/IdempotencyId.h"
 #include "fdbclient/Knobs.h"
 #include "fdbclient/CommitProxyInterface.h"
 #include "fdbclient/NativeAPI.actor.h"
@ -55,6 +57,7 @@
 #include "fdbserver/WaitFailure.h"
 #include "fdbserver/WorkerInterface.actor.h"
 #include "flow/ActorCollection.h"
 #include "flow/CodeProbe.h"
 #include "flow/EncryptUtils.h"
 #include "flow/Error.h"
 #include "flow/IRandom.h"
@ -663,6 +666,8 @@ struct CommitBatchContext {
 	// Cipher keys to be used to encrypt mutations
 	std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>> cipherKeys;
 	IdempotencyIdKVBuilder idempotencyKVBuilder;
 	CommitBatchContext(ProxyCommitData*, const std::vector<CommitTransactionRequest>*, const int);
 	void setupTraceBatch();
@ -998,7 +1003,7 @@ ACTOR Future<Void> getResolution(CommitBatchContext* self) {
 	// Fetch cipher keys if needed.
 	state Future<std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>>> getCipherKeys;
 	if (pProxyCommitData->isEncryptionEnabled) {
-		static std::unordered_map<EncryptCipherDomainId, EncryptCipherDomainName> defaultDomains = {
+		static const std::unordered_map<EncryptCipherDomainId, EncryptCipherDomainName> defaultDomains = {
 			{ SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID, FDB_SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_NAME },
 			{ ENCRYPT_HEADER_DOMAIN_ID, FDB_ENCRYPT_HEADER_DOMAIN_NAME },
 			{ FDB_DEFAULT_ENCRYPT_DOMAIN_ID, FDB_DEFAULT_ENCRYPT_DOMAIN_NAME }
@ -1091,6 +1096,7 @@ void applyMetadataEffect(CommitBatchContext* self) {
 				committed =
 				    committed && self->resolution[resolver].stateMutations[versionIndex][transactionIndex].committed;
 			if (committed) {
 				// Note: since we are not to commit, we don't need to pass cipherKeys for encryption.
 				applyMetadataMutations(SpanContext(),
 				                       *self->pProxyCommitData,
 				                       self->arena,
@ -1594,6 +1600,22 @@ ACTOR Future<Void> postResolution(CommitBatchContext* self) {
 		                        &self->computeStart));
 	}
 	buildIdempotencyIdMutations(self->trs,
 	                            self->idempotencyKVBuilder,
 	                            self->commitVersion,
 	                            self->committed,
 	                            ConflictBatch::TransactionCommitted,
 	                            self->locked,
 	                            [&](const KeyValue& kv) {
 		                            MutationRef idempotencyIdSet;
 		                            idempotencyIdSet.type = MutationRef::Type::SetValue;
 		                            idempotencyIdSet.param1 = kv.key;
 		                            idempotencyIdSet.param2 = kv.value;
 		                            auto& tags = pProxyCommitData->tagsForKey(kv.key);
 		                            self->toCommit.addTags(tags);
 		                            self->toCommit.writeTypedMessage(idempotencyIdSet);
 	                            });
 	self->toCommit.saveTags(self->writtenTags);
 	pProxyCommitData->stats.mutations += self->mutationCount;
@ -2488,6 +2510,17 @@ ACTOR Future<Void> processCompleteTransactionStateRequest(TransactionStateResolv
 		tag_uid[decodeServerTagValue(kv.value)] = decodeServerTagKey(kv.key);
 	}
 	state std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>> cipherKeys;
 	if (pContext->pCommitData->isEncryptionEnabled) {
 		static const std::unordered_map<EncryptCipherDomainId, EncryptCipherDomainName> metadataDomains = {
 			{ SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID, FDB_SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_NAME },
 			{ ENCRYPT_HEADER_DOMAIN_ID, FDB_ENCRYPT_HEADER_DOMAIN_NAME }
 		};
 		std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>> cks =
 		    wait(getLatestEncryptCipherKeys(pContext->pCommitData->db, metadataDomains, BlobCipherMetrics::TLOG));
 		cipherKeys = cks;
 	}
 	loop {
 		wait(yield());
@ -2545,13 +2578,16 @@ ACTOR Future<Void> processCompleteTransactionStateRequest(TransactionStateResolv
 		Arena arena;
 		bool confChanges;
 		CODE_PROBE(
 		    pContext->pCommitData->isEncryptionEnabled,
 		    "Commit proxy apply metadata mutations from txnStateStore on recovery, with encryption-at-rest enabled");
 		applyMetadataMutations(SpanContext(),
 		                       *pContext->pCommitData,
 		                       arena,
 		                       Reference<ILogSystem>(),
 		                       mutations,
 		                       /* pToCommit= */ nullptr,
-		                       /* pCipherKeys= */ nullptr,
+		                       pContext->pCommitData->isEncryptionEnabled ? &cipherKeys : nullptr,
 		                       confChanges,
 		                       /* version= */ 0,
 		                       /* popVersion= */ 0,
@ -2643,7 +2679,7 @@ ACTOR Future<Void> commitProxyServerCore(CommitProxyInterface proxy,
 	// Wait until we can load the "real" logsystem, since we don't support switching them currently
 	while (!(masterLifetime.isEqual(commitData.db->get().masterLifetime) &&
 	         commitData.db->get().recoveryState >= RecoveryState::RECOVERY_TRANSACTION &&
-	         (!isEncryptionOpSupported(EncryptOperationType::TLOG_ENCRYPTION, db->get().client) ||
+	         (!isEncryptionOpSupported(EncryptOperationType::TLOG_ENCRYPTION) ||
 	          commitData.db->get().encryptKeyProxy.present()))) {
 		//TraceEvent("ProxyInit2", proxy.id()).detail("LSEpoch", db->get().logSystemConfig.epoch).detail("Need", epoch);
 		wait(commitData.db->onChange());
@ -2668,15 +2704,14 @@ ACTOR Future<Void> commitProxyServerCore(CommitProxyInterface proxy,
 	commitData.logSystem = ILogSystem::fromServerDBInfo(proxy.id(), commitData.db->get(), false, addActor);
 	commitData.logAdapter =
 	    new LogSystemDiskQueueAdapter(commitData.logSystem, Reference<AsyncVar<PeekTxsInfo>>(), 1, false);
-	commitData.txnStateStore =
+	commitData.txnStateStore = keyValueStoreLogSystem(commitData.logAdapter,
-	    keyValueStoreLogSystem(commitData.logAdapter,
+	                                                  commitData.db,
-	                           commitData.db,
+	                                                  proxy.id(),
-	                           proxy.id(),
+	                                                  2e9,
-	                           2e9,
+	                                                  true,
-	                           true,
+	                                                  true,
-	                           true,
+	                                                  true,
-	                           true,
+	                                                  isEncryptionOpSupported(EncryptOperationType::TLOG_ENCRYPTION));
 	                           isEncryptionOpSupported(EncryptOperationType::TLOG_ENCRYPTION, db->get().client));
 	createWhitelistBinPathVec(whitelistBinPaths, commitData.whitelistedBinPathVec);
 	commitData.updateLatencyBandConfig(commitData.db->get().latencyBandConfig);
--- a/fdbserver/ConfigDatabaseUnitTests.actor.cpp
+++ b/fdbserver/ConfigDatabaseUnitTests.actor.cpp
@ -344,7 +344,7 @@ class TransactionEnvironment {
 		state Key configKey = encodeConfigKey(configClass, knobName);
 		state Optional<Value> value = wait(tr->get(configKey));
 		if (expected.present()) {
-			ASSERT_EQ(BinaryReader::fromStringRef<int64_t>(value.get(), Unversioned()), expected.get());
+			ASSERT_EQ(Tuple::unpack(value.get()).getInt(0), expected.get());
 		} else {
 			ASSERT(!value.present());
 		}
--- a/fdbserver/DDTxnProcessor.actor.cpp
+++ b/fdbserver/DDTxnProcessor.actor.cpp
@ -106,6 +106,62 @@ class DDTxnProcessorImpl {
 		return IDDTxnProcessor::SourceServers{ std::vector<UID>(servers.begin(), servers.end()), completeSources };
 	}
 	ACTOR static Future<std::vector<IDDTxnProcessor::DDRangeLocations>> getSourceServerInterfacesForRange(
 	    Database cx,
 	    KeyRangeRef range) {
 		state std::vector<IDDTxnProcessor::DDRangeLocations> res;
 		state Transaction tr(cx);
 		tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
 		tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
 		loop {
 			res.clear();
 			try {
 				state RangeResult shards = wait(krmGetRanges(&tr,
 				                                             keyServersPrefix,
 				                                             range,
 				                                             SERVER_KNOBS->MOVE_SHARD_KRM_ROW_LIMIT,
 				                                             SERVER_KNOBS->MOVE_SHARD_KRM_BYTE_LIMIT));
 				ASSERT(!shards.empty());
 				state RangeResult UIDtoTagMap = wait(tr.getRange(serverTagKeys, CLIENT_KNOBS->TOO_MANY));
 				ASSERT(!UIDtoTagMap.more && UIDtoTagMap.size() < CLIENT_KNOBS->TOO_MANY);
 				state int i = 0;
 				for (i = 0; i < shards.size() - 1; ++i) {
 					state std::vector<UID> src;
 					std::vector<UID> dest;
 					UID srcId, destId;
 					decodeKeyServersValue(UIDtoTagMap, shards[i].value, src, dest, srcId, destId);
 					std::vector<Future<Optional<Value>>> serverListEntries;
 					for (int j = 0; j < src.size(); ++j) {
 						serverListEntries.push_back(tr.get(serverListKeyFor(src[j])));
 					}
 					std::vector<Optional<Value>> serverListValues = wait(getAll(serverListEntries));
 					IDDTxnProcessor::DDRangeLocations current(KeyRangeRef(shards[i].key, shards[i + 1].key));
 					for (int j = 0; j < serverListValues.size(); ++j) {
 						if (!serverListValues[j].present()) {
 							TraceEvent(SevWarnAlways, "GetSourceServerInterfacesMissing")
 							    .detail("StorageServer", src[j])
 							    .detail("Range", KeyRangeRef(shards[i].key, shards[i + 1].key));
 							continue;
 						}
 						StorageServerInterface ssi = decodeServerListValue(serverListValues[j].get());
 						current.servers[ssi.locality.describeDcId()].push_back(ssi);
 					}
 					res.push_back(current);
 				}
 				break;
 			} catch (Error& e) {
 				TraceEvent(SevWarnAlways, "GetSourceServerInterfacesError").errorUnsuppressed(e).detail("Range", range);
 				wait(tr.onError(e));
 			}
 		}
 		return res;
 	}
 	// set the system key space
 	ACTOR static Future<Void> updateReplicaKeys(Database cx,
 	                                            std::vector<Optional<Key>> primaryDcId,
@ -537,6 +593,11 @@ Future<IDDTxnProcessor::SourceServers> DDTxnProcessor::getSourceServersForRange(
 	return DDTxnProcessorImpl::getSourceServersForRange(cx, range);
 }
 Future<std::vector<IDDTxnProcessor::DDRangeLocations>> DDTxnProcessor::getSourceServerInterfacesForRange(
    const KeyRangeRef range) {
 	return DDTxnProcessorImpl::getSourceServerInterfacesForRange(cx, range);
 }
 Future<ServerWorkerInfos> DDTxnProcessor::getServerListAndProcessClasses() {
 	return DDTxnProcessorImpl::getServerListAndProcessClasses(cx);
 }
--- a/fdbserver/DataDistribution.actor.cpp
+++ b/fdbserver/DataDistribution.actor.cpp
@ -21,6 +21,7 @@
 #include <set>
 #include <string>
 #include "fdbclient/Audit.h"
 #include "fdbclient/DatabaseContext.h"
 #include "fdbclient/FDBOptions.g.h"
 #include "fdbclient/FDBTypes.h"
@ -52,6 +53,17 @@
 #include "fdbserver/DDSharedContext.h"
 #include "flow/actorcompiler.h" // This must be the last #include.
 struct DDAudit {
 	DDAudit(UID id, KeyRange range, AuditType type)
 	  : id(id), range(range), type(type), auditMap(AuditPhase::Invalid, allKeys.end), actors(true) {}
 	const UID id;
 	KeyRange range;
 	const AuditType type;
 	KeyRangeMap<AuditPhase> auditMap;
 	ActorCollection actors;
 };
 void DataMove::validateShard(const DDShardInfo& shard, KeyRangeRef range, int priority) {
 	if (!valid) {
 		if (shard.hasDest && shard.destId != anonymousShardId) {
@ -276,6 +288,10 @@ public:
 	StorageQuotaInfo storageQuotaInfo;
 	Promise<Void> initialized;
 	std::unordered_map<AuditType, std::vector<std::shared_ptr<DDAudit>>> audits;
 	DataDistributor(Reference<AsyncVar<ServerDBInfo> const> const& db, UID id, Reference<DDSharedContext> context)
 	  : dbInfo(db), context(context), ddId(id), txnProcessor(nullptr),
 	    initialDDEventHolder(makeReference<EventCacheHolder>("InitialDD")),
@ -577,6 +593,7 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributor> self,
 	state bool ddIsTenantAware = SERVER_KNOBS->DD_TENANT_AWARENESS_ENABLED;
 	loop {
 		trackerCancelled = false;
 		self->initialized = Promise<Void>();
 		// Stored outside of data distribution tracker to avoid slow tasks
 		// when tracker is cancelled
@ -594,7 +611,6 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributor> self,
 			state PromiseStream<GetTopKMetricsRequest> getTopKShardMetrics;
 			state Reference<AsyncVar<bool>> processingUnhealthy(new AsyncVar<bool>(false));
 			state Reference<AsyncVar<bool>> processingWiggle(new AsyncVar<bool>(false));
 			state Promise<Void> readyToStart;
 			state Optional<Reference<TenantCache>> ddTenantCache;
 			if (ddIsTenantAware) {
@ -636,7 +652,7 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributor> self,
 			                                                            getTopKShardMetrics.getFuture(),
 			                                                            getShardMetricsList,
 			                                                            getAverageShardBytes.getFuture(),
-			                                                            readyToStart,
+			                                                            self->initialized,
 			                                                            anyZeroHealthyTeams,
 			                                                            self->ddId,
 			                                                            &shards,
@ -688,7 +704,7 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributor> self,
 			    self->configuration,
 			    self->primaryDcId,
 			    self->configuration.usableRegions > 1 ? self->remoteDcIds : std::vector<Optional<Key>>(),
-			    readyToStart.getFuture(),
+			    self->initialized.getFuture(),
 			    zeroHealthyTeams[0],
 			    IsPrimary::True,
 			    processingUnhealthy,
@ -709,7 +725,7 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributor> self,
 				                                    self->configuration,
 				                                    self->remoteDcIds,
 				                                    Optional<std::vector<Optional<Key>>>(),
-				                                    readyToStart.getFuture() && remoteRecovered(self->dbInfo),
+				                                    self->initialized.getFuture() && remoteRecovered(self->dbInfo),
 				                                    zeroHealthyTeams[1],
 				                                    IsPrimary::False,
 				                                    processingUnhealthy,
@ -1327,6 +1343,157 @@ ACTOR Future<Void> ddGetMetrics(GetDataDistributorMetricsRequest req,
 	return Void();
 }
 ACTOR Future<Void> auditStorage(Reference<DataDistributor> self, TriggerAuditRequest req);
 ACTOR Future<Void> scheduleAuditForRange(Reference<DataDistributor> self,
                                         std::shared_ptr<DDAudit> audit,
                                         KeyRange range);
 ACTOR Future<Void> doAuditOnStorageServer(Reference<DataDistributor> self,
                                          std::shared_ptr<DDAudit> audit,
                                          StorageServerInterface ssi,
                                          AuditStorageRequest req);
 ACTOR Future<Void> auditStorage(Reference<DataDistributor> self, TriggerAuditRequest req) {
 	// TODO(heliu): Load running audit, and create one if no audit is running.
 	state std::shared_ptr<DDAudit> audit;
 	auto it = self->audits.find(req.getType());
 	if (it != self->audits.end() && !it->second.empty()) {
 		ASSERT_EQ(it->second.size(), 1);
 		auto& currentAudit = it->second.front();
 		if (currentAudit->range.contains(req.range)) {
 			audit = it->second.front();
 		} else {
 			req.reply.sendError(audit_storage_exceeded_request_limit());
 			return Void();
 		}
 	} else {
 		const UID auditId = deterministicRandom()->randomUniqueID();
 		audit = std::make_shared<DDAudit>(auditId, req.range, req.getType());
 		self->audits[req.getType()].push_back(audit);
 		audit->actors.add(scheduleAuditForRange(self, audit, req.range));
 		TraceEvent(SevDebug, "DDAuditStorageBegin", audit->id).detail("Range", req.range).detail("AuditType", req.type);
 	}
 	if (req.async && !req.reply.isSet()) {
 		req.reply.send(audit->id);
 	}
 	try {
 		wait(audit->actors.getResult());
 		TraceEvent(SevDebug, "DDAuditStorageEnd", audit->id).detail("Range", req.range).detail("AuditType", req.type);
 		// TODO(heliu): Set the audit result, and clear auditId.
 		if (!req.async && !req.reply.isSet()) {
 			TraceEvent(SevDebug, "DDAuditStorageReply", audit->id)
 			    .detail("Range", req.range)
 			    .detail("AuditType", req.type);
 			req.reply.send(audit->id);
 		}
 	} catch (Error& e) {
 		TraceEvent(SevWarnAlways, "DDAuditStorageOperationError", audit->id)
 		    .errorUnsuppressed(e)
 		    .detail("Range", req.range)
 		    .detail("AuditType", req.type);
 	}
 	return Void();
 }
 ACTOR Future<Void> scheduleAuditForRange(Reference<DataDistributor> self,
                                         std::shared_ptr<DDAudit> audit,
                                         KeyRange range) {
 	TraceEvent(SevDebug, "DDScheduleAuditForRangeBegin", audit->id)
 	    .detail("Range", range)
 	    .detail("AuditType", audit->type);
 	// TODO(heliu): Load the audit map for `range`.
 	state Key begin = range.begin;
 	state KeyRange currentRange = range;
 	while (begin < range.end) {
 		currentRange = KeyRangeRef(begin, range.end);
 		// Find the first keyrange that hasn't been validated.
 		auto f = audit->auditMap.intersectingRanges(currentRange);
 		for (auto it = f.begin(); it != f.end(); ++it) {
 			if (it->value() != AuditPhase::Invalid && it->value() != AuditPhase::Failed) {
 				begin = it->range().end;
 				currentRange = KeyRangeRef(it->range().end, currentRange.end);
 			} else {
 				currentRange = KeyRangeRef(it->range().begin, it->range().end) & currentRange;
 				break;
 			}
 		}
 		try {
 			state std::vector<IDDTxnProcessor::DDRangeLocations> rangeLocations =
 			    wait(self->txnProcessor->getSourceServerInterfacesForRange(currentRange));
 			state int i = 0;
 			for (i = 0; i < rangeLocations.size(); ++i) {
 				AuditStorageRequest req(audit->id, rangeLocations[i].range, audit->type);
 				if (audit->type == AuditType::ValidateHA && rangeLocations[i].servers.size() >= 2) {
 					auto it = rangeLocations[i].servers.begin();
 					const int idx = deterministicRandom()->randomInt(0, it->second.size());
 					StorageServerInterface& targetServer = it->second[idx];
 					++it;
 					for (; it != rangeLocations[i].servers.end(); ++it) {
 						const int idx = deterministicRandom()->randomInt(0, it->second.size());
 						req.targetServers.push_back(it->second[idx].id());
 					}
 					audit->actors.add(doAuditOnStorageServer(self, audit, targetServer, req));
 				}
 				begin = rangeLocations[i].range.end;
 				wait(delay(0.01));
 			}
 		} catch (Error& e) {
 			TraceEvent(SevWarnAlways, "DDScheduleAuditRangeError", audit->id)
 			    .errorUnsuppressed(e)
 			    .detail("Range", range);
 			if (e.code() == error_code_actor_cancelled) {
 				throw e;
 			}
 		}
 	}
 	return Void();
 }
 ACTOR Future<Void> doAuditOnStorageServer(Reference<DataDistributor> self,
                                          std::shared_ptr<DDAudit> audit,
                                          StorageServerInterface ssi,
                                          AuditStorageRequest req) {
 	TraceEvent(SevDebug, "DDDoAuditOnStorageServerBegin", req.id)
 	    .detail("Range", req.range)
 	    .detail("AuditType", req.type)
 	    .detail("StorageServer", ssi.toString())
 	    .detail("TargetServers", describe(req.targetServers));
 	try {
 		audit->auditMap.insert(req.range, AuditPhase::Running);
 		ErrorOr<AuditStorageState> vResult = wait(ssi.auditStorage.getReplyUnlessFailedFor(
 		    req, /*sustainedFailureDuration=*/2.0, /*sustainedFailureSlope=*/0));
 		if (vResult.isError()) {
 			throw vResult.getError();
 		}
 		TraceEvent e(vResult.get().error.empty() ? SevInfo : SevWarnAlways, "DDAuditStorageState", req.id);
 		e.detail("Range", req.range);
 		e.detail("StorageServer", ssi.toString());
 		if (!vResult.get().error.empty()) {
 			e.detail("ErrorMessage", vResult.get().error);
 		}
 	} catch (Error& e) {
 		TraceEvent(SevWarn, "DDDoAuditOnStorageServerError", req.id)
 		    .errorUnsuppressed(e)
 		    .detail("Range", req.range)
 		    .detail("StorageServer", ssi.toString())
 		    .detail("TargetServers", describe(req.targetServers));
 		if (e.code() != error_code_actor_cancelled) {
 			audit->auditMap.insert(req.range, AuditPhase::Failed);
 			audit->actors.add(scheduleAuditForRange(self, audit, req.range));
 		}
 	}
 	return Void();
 }
 ACTOR Future<Void> dataDistributor(DataDistributorInterface di, Reference<AsyncVar<ServerDBInfo> const> db) {
 	state Reference<DDSharedContext> context(new DDSharedContext(di.id()));
 	state Reference<DataDistributor> self(new DataDistributor(db, di.id(), context));
@ -1393,6 +1560,9 @@ ACTOR Future<Void> dataDistributor(DataDistributorInterface di, Reference<AsyncV
 			when(GetStorageWigglerStateRequest req = waitNext(di.storageWigglerState.getFuture())) {
 				req.reply.send(getStorageWigglerStates(self));
 			}
 			when(TriggerAuditRequest req = waitNext(di.triggerAudit.getFuture())) {
 				actors.add(auditStorage(self, req));
 			}
 		}
 	} catch (Error& err) {
 		if (normalDataDistributorErrors().count(err.code()) == 0) {
--- a/fdbserver/EncryptKeyProxy.actor.cpp
+++ b/fdbserver/EncryptKeyProxy.actor.cpp
@ -18,6 +18,7 @@
 * limitations under the License.
 */
 #include "fdbclient/BlobMetadataUtils.h"
 #include "fdbclient/EncryptKeyProxyInterface.h"
 #include "fdbrpc/Locality.h"
@ -594,10 +595,21 @@ bool isCipherKeyEligibleForRefresh(const EncryptBaseCipherKey& cipherKey, int64_
 	// Candidate eligible for refresh iff either is true:
 	// 1. CipherKey cell is either expired/needs-refresh right now.
 	// 2. CipherKey cell 'will' be expired/needs-refresh before next refresh cycle interval (proactive refresh)
 	if (BUGGIFY_WITH_PROB(0.01)) {
 		return true;
 	}
 	int64_t nextRefreshCycleTS = currTS + FLOW_KNOBS->ENCRYPT_KEY_REFRESH_INTERVAL;
 	return nextRefreshCycleTS > cipherKey.expireAt || nextRefreshCycleTS > cipherKey.refreshAt;
 }
 bool isBlobMetadataEligibleForRefresh(const BlobMetadataDetailsRef& blobMetadata, int64_t currTS) {
 	if (BUGGIFY_WITH_PROB(0.01)) {
 		return true;
 	}
 	int64_t nextRefreshCycleTS = currTS + SERVER_KNOBS->BLOB_METADATA_REFRESH_INTERVAL;
 	return nextRefreshCycleTS > blobMetadata.expireAt || nextRefreshCycleTS > blobMetadata.refreshAt;
 }
 ACTOR Future<Void> refreshEncryptionKeysCore(Reference<EncryptKeyProxyData> ekpProxyData,
                                             KmsConnectorInterface kmsConnectorInf) {
 	state UID debugId = deterministicRandom()->randomUniqueID();
@ -710,7 +722,8 @@ ACTOR Future<Void> getLatestBlobMetadata(Reference<EncryptKeyProxyData> ekpProxy
 	for (auto& info : dedupedDomainInfos) {
 		const auto itr = ekpProxyData->blobMetadataDomainIdCache.find(info.first);
-		if (itr != ekpProxyData->blobMetadataDomainIdCache.end() && itr->second.isValid()) {
+		if (itr != ekpProxyData->blobMetadataDomainIdCache.end() && itr->second.isValid() &&
 		    now() <= itr->second.metadataDetails.expireAt) {
 			metadataDetails.arena().dependsOn(itr->second.metadataDetails.arena());
 			metadataDetails.push_back(metadataDetails.arena(), itr->second.metadataDetails);
@ -760,6 +773,7 @@ ACTOR Future<Void> getLatestBlobMetadata(Reference<EncryptKeyProxyData> ekpProxy
 ACTOR Future<Void> refreshBlobMetadataCore(Reference<EncryptKeyProxyData> ekpProxyData,
                                           KmsConnectorInterface kmsConnectorInf) {
 	state UID debugId = deterministicRandom()->randomUniqueID();
 	state double startTime;
 	state TraceEvent t("RefreshBlobMetadata_Start", ekpProxyData->myId);
 	t.setMaxEventLength(SERVER_KNOBS->ENCRYPT_PROXY_MAX_DBG_TRACE_LENGTH);
@ -769,13 +783,28 @@ ACTOR Future<Void> refreshBlobMetadataCore(Reference<EncryptKeyProxyData> ekpPro
 	try {
 		KmsConnBlobMetadataReq req;
 		req.debugId = debugId;
 		req.domainInfos.reserve(req.domainInfos.arena(), ekpProxyData->blobMetadataDomainIdCache.size());
-		// TODO add refresh + expire timestamp and filter to only ones that need refreshing
+		int64_t currTS = (int64_t)now();
-		for (auto& item : ekpProxyData->blobMetadataDomainIdCache) {
+		for (auto itr = ekpProxyData->blobMetadataDomainIdCache.begin();
-			req.domainInfos.emplace_back(req.domainInfos.arena(), item.first, item.second.metadataDetails.domainName);
+		     itr != ekpProxyData->blobMetadataDomainIdCache.end();) {
 			if (isBlobMetadataEligibleForRefresh(itr->second.metadataDetails, currTS)) {
 				req.domainInfos.emplace_back_deep(
 				    req.domainInfos.arena(), itr->first, itr->second.metadataDetails.domainName);
 			}
 			// Garbage collect expired cached Blob Metadata
 			if (itr->second.metadataDetails.expireAt >= currTS) {
 				itr = ekpProxyData->blobMetadataDomainIdCache.erase(itr);
 			} else {
 				itr++;
 			}
 		}
-		state double startTime = now();
+
 		if (req.domainInfos.empty()) {
 			return Void();
 		}
 		startTime = now();
 		KmsConnBlobMetadataRep rep = wait(kmsConnectorInf.blobMetadataReq.getReply(req));
 		ekpProxyData->kmsBlobMetadataReqLatency.addMeasurement(now() - startTime);
 		for (auto& item : rep.metadataDetails) {
--- a/fdbserver/GlobalTagThrottler.actor.cpp
+++ b/fdbserver/GlobalTagThrottler.actor.cpp
@ -400,8 +400,8 @@ public:
 	void addRequests(TransactionTag tag, int count) { tagStatistics[tag].addTransactions(static_cast<double>(count)); }
 	uint64_t getThrottledTagChangeId() const { return throttledTagChangeId; }
-	PrioritizedTransactionTagMap<double> getProxyRates(int numProxies) {
+	TransactionTagMap<double> getProxyRates(int numProxies) {
-		PrioritizedTransactionTagMap<double> result;
+		TransactionTagMap<double> result;
 		lastBusyTagCount = 0;
 		for (auto& [tag, stats] : tagStatistics) {
@ -414,8 +414,7 @@ public:
 			}
 			if (targetTps.present()) {
 				auto const smoothedTargetTps = stats.updateAndGetTargetLimit(targetTps.get());
-				result[TransactionPriority::BATCH][tag] = result[TransactionPriority::DEFAULT][tag] =
+				result[tag] = smoothedTargetTps / numProxies;
 				    smoothedTargetTps / numProxies;
 			} else {
 				te.disable();
 			}
@ -497,7 +496,7 @@ uint64_t GlobalTagThrottler::getThrottledTagChangeId() const {
 PrioritizedTransactionTagMap<ClientTagThrottleLimits> GlobalTagThrottler::getClientRates() {
 	return impl->getClientRates();
 }
-PrioritizedTransactionTagMap<double> GlobalTagThrottler::getProxyRates(int numProxies) {
+TransactionTagMap<double> GlobalTagThrottler::getProxyRates(int numProxies) {
 	return impl->getProxyRates(numProxies);
 }
 int64_t GlobalTagThrottler::autoThrottleCount() const {
@ -679,12 +678,9 @@ bool isNear(Optional<double> a, Optional<double> b) {
 bool targetRateIsNear(GlobalTagThrottler& globalTagThrottler, TransactionTag tag, Optional<double> expected) {
 	Optional<double> rate;
 	auto targetRates = globalTagThrottler.getProxyRates(1);
-	auto it1 = targetRates.find(TransactionPriority::DEFAULT);
+	auto it = targetRates.find(tag);
-	if (it1 != targetRates.end()) {
+	if (it != targetRates.end()) {
-		auto it2 = it1->second.find(tag);
+		rate = it->second;
 		if (it2 != it1->second.end()) {
 			rate = it2->second;
 		}
 	}
 	TraceEvent("GlobalTagThrottling_RateMonitor")
 	    .detail("Tag", tag)
--- a/fdbserver/GrvProxyServer.actor.cpp
+++ b/fdbserver/GrvProxyServer.actor.cpp
@ -24,11 +24,13 @@
 #include "fdbclient/Notified.h"
 #include "fdbclient/TransactionLineage.h"
 #include "fdbclient/Tuple.h"
 #include "fdbserver/LogSystem.h"
 #include "fdbserver/LogSystemDiskQueueAdapter.h"
 #include "fdbclient/CommitProxyInterface.h"
 #include "fdbclient/GrvProxyInterface.h"
 #include "fdbclient/VersionVector.h"
 #include "fdbserver/GrvProxyTransactionTagThrottler.h"
 #include "fdbserver/GrvTransactionRateInfo.h"
 #include "fdbserver/LogSystem.h"
 #include "fdbserver/LogSystemDiskQueueAdapter.h"
 #include "fdbserver/WaitFailure.h"
 #include "fdbserver/WorkerInterface.actor.h"
 #include "fdbrpc/sim_validation.h"
@ -158,83 +160,6 @@ struct GrvProxyStats {
 	}
 };
 struct GrvTransactionRateInfo {
 	double rate;
 	double limit;
 	double budget;
 	bool disabled;
 	Smoother smoothRate;
 	Smoother smoothReleased;
 	GrvTransactionRateInfo(double rate = 0.0)
 	  : rate(rate), limit(0), budget(0), disabled(true), smoothRate(SERVER_KNOBS->START_TRANSACTION_RATE_WINDOW),
 	    smoothReleased(SERVER_KNOBS->START_TRANSACTION_RATE_WINDOW) {}
 	void reset() {
 		// Determine the number of transactions that this proxy is allowed to release
 		// Roughly speaking, this is done by computing the number of transactions over some historical window that we
 		// could have started but didn't, and making that our limit. More precisely, we track a smoothed rate limit and
 		// release rate, the difference of which is the rate of additional transactions that we could have released
 		// based on that window. Then we multiply by the window size to get a number of transactions.
 		//
 		// Limit can be negative in the event that we are releasing more transactions than we are allowed (due to the
 		// use of our budget or because of higher priority transactions).
 		double releaseRate = smoothRate.smoothTotal() - smoothReleased.smoothRate();
 		limit = SERVER_KNOBS->START_TRANSACTION_RATE_WINDOW * releaseRate;
 	}
 	bool canStart(int64_t numAlreadyStarted, int64_t count) const {
 		return numAlreadyStarted + count <=
 		       std::min(limit + budget, SERVER_KNOBS->START_TRANSACTION_MAX_TRANSACTIONS_TO_START);
 	}
 	void updateBudget(int64_t numStartedAtPriority, bool queueEmptyAtPriority, double elapsed) {
 		// Update the budget to accumulate any extra capacity available or remove any excess that was used.
 		// The actual delta is the portion of the limit we didn't use multiplied by the fraction of the window that
 		// elapsed.
 		//
 		// We may have exceeded our limit due to the budget or because of higher priority transactions, in which case
 		// this delta will be negative. The delta can also be negative in the event that our limit was negative, which
 		// can happen if we had already started more transactions in our window than our rate would have allowed.
 		//
 		// This budget has the property that when the budget is required to start transactions (because batches are
 		// big), the sum limit+budget will increase linearly from 0 to the batch size over time and decrease by the
 		// batch size upon starting a batch. In other words, this works equivalently to a model where we linearly
 		// accumulate budget over time in the case that our batches are too big to take advantage of the window based
 		// limits.
 		budget = std::max(
 		    0.0, budget + elapsed * (limit - numStartedAtPriority) / SERVER_KNOBS->START_TRANSACTION_RATE_WINDOW);
 		// If we are emptying out the queue of requests, then we don't need to carry much budget forward
 		// If we did keep accumulating budget, then our responsiveness to changes in workflow could be compromised
 		if (queueEmptyAtPriority) {
 			budget = std::min(budget, SERVER_KNOBS->START_TRANSACTION_MAX_EMPTY_QUEUE_BUDGET);
 		}
 		smoothReleased.addDelta(numStartedAtPriority);
 	}
 	void disable() {
 		disabled = true;
 		// Use smoothRate.setTotal(0) instead of setting rate to 0 so txns will not be throttled immediately.
 		smoothRate.setTotal(0);
 	}
 	void setRate(double rate) {
 		ASSERT(rate >= 0 && rate != std::numeric_limits<double>::infinity() && !std::isnan(rate));
 		this->rate = rate;
 		if (disabled) {
 			smoothRate.reset(rate);
 			disabled = false;
 		} else {
 			smoothRate.setTotal(rate);
 		}
 	}
 };
 struct GrvProxyData {
 	GrvProxyInterface proxy;
 	UID dbgid;
@ -437,7 +362,7 @@ ACTOR Future<Void> getRate(UID myID,
                           GetHealthMetricsReply* detailedHealthMetricsReply,
                           TransactionTagMap<uint64_t>* transactionTagCounter,
                           PrioritizedTransactionTagMap<ClientTagThrottleLimits>* clientThrottledTags,
-                           PrioritizedTransactionTagMap<GrvTransactionRateInfo>* perTagRateInfo,
+                           GrvProxyTransactionTagThrottler* tagThrottler,
                           GrvProxyStats* stats,
                           GrvProxyData* proxyData) {
 	state Future<Void> nextRequestTimer = Never();
@ -498,12 +423,7 @@ ACTOR Future<Void> getRate(UID myID,
 				*clientThrottledTags = std::move(rep.clientThrottledTags.get());
 			}
 			if (rep.proxyThrottledTags.present()) {
-				perTagRateInfo->clear();
+				tagThrottler->updateRates(rep.proxyThrottledTags.get());
 				for (const auto& [priority, tagToRate] : rep.proxyThrottledTags.get()) {
 					for (const auto& [tag, rate] : tagToRate) {
 						(*perTagRateInfo)[priority][tag].setRate(rate);
 					}
 				}
 			}
 		}
 		when(wait(leaseTimeout)) {
@ -537,20 +457,19 @@ void dropRequestFromQueue(Deque<GetReadVersionRequest>* queue, GrvProxyStats* st
 }
 // Put a GetReadVersion request into the queue corresponding to its priority.
-ACTOR Future<Void> queueGetReadVersionRequests(
+ACTOR Future<Void> queueGetReadVersionRequests(Reference<AsyncVar<ServerDBInfo> const> db,
-    Reference<AsyncVar<ServerDBInfo> const> db,
+                                               SpannedDeque<GetReadVersionRequest>* systemQueue,
-    SpannedDeque<GetReadVersionRequest>* systemQueue,
+                                               SpannedDeque<GetReadVersionRequest>* defaultQueue,
-    SpannedDeque<GetReadVersionRequest>* defaultQueue,
+                                               SpannedDeque<GetReadVersionRequest>* batchQueue,
-    SpannedDeque<GetReadVersionRequest>* batchQueue,
+                                               FutureStream<GetReadVersionRequest> readVersionRequests,
-    FutureStream<GetReadVersionRequest> readVersionRequests,
+                                               PromiseStream<Void> GRVTimer,
-    PromiseStream<Void> GRVTimer,
+                                               double* lastGRVTime,
-    double* lastGRVTime,
+                                               double* GRVBatchTime,
-    double* GRVBatchTime,
+                                               FutureStream<double> normalGRVLatency,
-    FutureStream<double> normalGRVLatency,
+                                               GrvProxyStats* stats,
-    GrvProxyStats* stats,
+                                               GrvTransactionRateInfo* batchRateInfo,
-    GrvTransactionRateInfo* batchRateInfo,
+                                               TransactionTagMap<uint64_t>* transactionTagCounter,
-    TransactionTagMap<uint64_t>* transactionTagCounter,
+                                               GrvProxyTransactionTagThrottler* tagThrottler) {
    PrioritizedTransactionTagMap<GrvTransactionRateInfo> const* perClientRateInfo) {
 	getCurrentLineage()->modify(&TransactionLineage::operation) =
 	    TransactionLineage::Operation::GetConsistentReadVersion;
 	loop choose {
@ -617,12 +536,16 @@ ACTOR Future<Void> queueGetReadVersionRequests(
 					stats->txnStartIn += req.transactionCount;
 					stats->txnDefaultPriorityStartIn += req.transactionCount;
 					++stats->defaultGRVQueueSize;
-					defaultQueue->push_back(req);
+					if (SERVER_KNOBS->ENFORCE_TAG_THROTTLING_ON_PROXIES && req.isTagged()) {
 						tagThrottler->addRequest(req);
 					} else {
 						defaultQueue->push_back(req);
 					}
 					// defaultQueue->span.addParent(req.spanContext);
 				} else {
 					// Return error for batch_priority GRV requests
 					int64_t proxiesCount = std::max((int)db->get().client.grvProxies.size(), 1);
-					if (batchRateInfo->rate <= (1.0 / proxiesCount)) {
+					if (batchRateInfo->getRate() <= (1.0 / proxiesCount)) {
 						req.reply.sendError(batch_transaction_throttled());
 						stats->txnThrottled += req.transactionCount;
 					} else {
@ -630,7 +553,11 @@ ACTOR Future<Void> queueGetReadVersionRequests(
 						stats->txnStartIn += req.transactionCount;
 						stats->txnBatchPriorityStartIn += req.transactionCount;
 						++stats->batchGRVQueueSize;
-						batchQueue->push_back(req);
+						if (SERVER_KNOBS->ENFORCE_TAG_THROTTLING_ON_PROXIES && req.isTagged()) {
 							tagThrottler->addRequest(req);
 						} else {
 							batchQueue->push_back(req);
 						}
 						// batchQueue->span.addParent(req.spanContext);
 					}
 				}
@ -791,6 +718,7 @@ ACTOR Future<Void> sendGrvReplies(Future<GetReadVersionReply> replyFuture,
 			grvProxyData->versionVectorSizeOnGRVReply.addMeasurement(reply.ssVersionVectorDelta.size());
 		}
 		reply.proxyId = grvProxyData->dbgid;
 		reply.proxyTagThrottledDuration = request.proxyTagThrottledDuration;
 		if (!request.tags.empty()) {
 			auto& priorityThrottledTags = clientThrottledTags[request.priority];
@ -895,7 +823,7 @@ ACTOR static Future<Void> transactionStarter(GrvProxyInterface proxy,
 	state int64_t batchTransactionCount = 0;
 	state GrvTransactionRateInfo normalRateInfo(10);
 	state GrvTransactionRateInfo batchRateInfo(0);
-	state PrioritizedTransactionTagMap<GrvTransactionRateInfo> perTagRateInfo;
+	state GrvProxyTransactionTagThrottler tagThrottler;
 	state SpannedDeque<GetReadVersionRequest> systemQueue("GP:transactionStarterSystemQueue"_loc);
 	state SpannedDeque<GetReadVersionRequest> defaultQueue("GP:transactionStarterDefaultQueue"_loc);
@ -922,7 +850,7 @@ ACTOR static Future<Void> transactionStarter(GrvProxyInterface proxy,
 	                      detailedHealthMetricsReply,
 	                      &transactionTagCounter,
 	                      &clientThrottledTags,
-	                      &perTagRateInfo,
+	                      &tagThrottler,
 	                      &grvProxyData->stats,
 	                      grvProxyData));
 	addActor.send(queueGetReadVersionRequests(db,
@ -937,7 +865,7 @@ ACTOR static Future<Void> transactionStarter(GrvProxyInterface proxy,
 	                                          &grvProxyData->stats,
 	                                          &batchRateInfo,
 	                                          &transactionTagCounter,
-	                                          &perTagRateInfo));
+	                                          &tagThrottler));
 	while (std::find(db->get().client.grvProxies.begin(), db->get().client.grvProxies.end(), proxy) ==
 	       db->get().client.grvProxies.end()) {
@ -960,11 +888,12 @@ ACTOR static Future<Void> transactionStarter(GrvProxyInterface proxy,
 			elapsed = 1e-15;
 		}
-		normalRateInfo.reset();
+		tagThrottler.releaseTransactions(elapsed, defaultQueue, batchQueue);
-		batchRateInfo.reset();
+		normalRateInfo.startReleaseWindow();
 		batchRateInfo.startReleaseWindow();
-		grvProxyData->stats.transactionLimit = normalRateInfo.limit;
+		grvProxyData->stats.transactionLimit = normalRateInfo.getLimit();
-		grvProxyData->stats.batchTransactionLimit = batchRateInfo.limit;
+		grvProxyData->stats.batchTransactionLimit = batchRateInfo.getLimit();
 		int transactionsStarted[2] = { 0, 0 };
 		int systemTransactionsStarted[2] = { 0, 0 };
@ -1071,11 +1000,11 @@ ACTOR static Future<Void> transactionStarter(GrvProxyInterface proxy,
 		transactionCount += transactionsStarted[0] + transactionsStarted[1];
 		batchTransactionCount += batchTotalStarted;
-		normalRateInfo.updateBudget(
+		normalRateInfo.endReleaseWindow(
 		    systemTotalStarted + normalTotalStarted, systemQueue.empty() && defaultQueue.empty(), elapsed);
-		batchRateInfo.updateBudget(systemTotalStarted + normalTotalStarted + batchTotalStarted,
+		batchRateInfo.endReleaseWindow(systemTotalStarted + normalTotalStarted + batchTotalStarted,
-		                           systemQueue.empty() && defaultQueue.empty() && batchQueue.empty(),
+		                               systemQueue.empty() && defaultQueue.empty() && batchQueue.empty(),
-		                           elapsed);
+		                               elapsed);
 		if (debugID.present()) {
 			g_traceBatch.addEvent("TransactionDebug",
--- a/fdbserver/GrvProxyTransactionTagThrottler.actor.cpp
+++ b/fdbserver/GrvProxyTransactionTagThrottler.actor.cpp
@ -0,0 +1,399 @@
 /*
 * GrvProxyTransactionTagThrottler.actor.cpp
 *
 * This source file is part of the FoundationDB open source project
 *
 * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "fdbserver/GrvProxyTransactionTagThrottler.h"
 #include "flow/UnitTest.h"
 #include "flow/actorcompiler.h" // must be last include
 uint64_t GrvProxyTransactionTagThrottler::DelayedRequest::lastSequenceNumber = 0;
 void GrvProxyTransactionTagThrottler::DelayedRequest::updateProxyTagThrottledDuration() {
 	req.proxyTagThrottledDuration = now() - startTime;
 }
 void GrvProxyTransactionTagThrottler::TagQueue::setRate(double rate) {
 	if (rateInfo.present()) {
 		rateInfo.get().setRate(rate);
 	} else {
 		rateInfo = GrvTransactionRateInfo(rate);
 	}
 }
 void GrvProxyTransactionTagThrottler::updateRates(TransactionTagMap<double> const& newRates) {
 	for (const auto& [tag, rate] : newRates) {
 		auto it = queues.find(tag);
 		if (it == queues.end()) {
 			queues[tag] = TagQueue(rate);
 		} else {
 			it->second.setRate(rate);
 		}
 	}
 	// Clean up tags that did not appear in newRates
 	for (auto& [tag, queue] : queues) {
 		if (newRates.find(tag) == newRates.end()) {
 			queue.rateInfo.reset();
 		}
 	}
 	// TODO: Use std::erase_if in C++20
 	for (auto it = queues.begin(); it != queues.end();) {
 		const auto& [tag, queue] = *it;
 		if (queue.requests.empty() && !queue.rateInfo.present()) {
 			it = queues.erase(it);
 		} else {
 			++it;
 		}
 	}
 }
 void GrvProxyTransactionTagThrottler::addRequest(GetReadVersionRequest const& req) {
 	ASSERT(req.isTagged());
 	auto const& tag = req.tags.begin()->first;
 	if (req.tags.size() > 1) {
 		// The GrvProxyTransactionTagThrottler assumes that each GetReadVersionRequest
 		// has at most one tag. If a transaction uses multiple tags and
 		// SERVER_KNOBS->ENFORCE_TAG_THROTTLING_ON_PROXIES is enabled, there may be
 		// unexpected behaviour, because only one tag is used for throttling.
 		TraceEvent(SevWarnAlways, "GrvProxyTransactionTagThrottler_MultipleTags")
 		    .detail("NumTags", req.tags.size())
 		    .detail("UsingTag", printable(tag));
 	}
 	queues[tag].requests.emplace_back(req);
 }
 void GrvProxyTransactionTagThrottler::releaseTransactions(double elapsed,
                                                          SpannedDeque<GetReadVersionRequest>& outBatchPriority,
                                                          SpannedDeque<GetReadVersionRequest>& outDefaultPriority) {
 	// Pointer to a TagQueue with some extra metadata stored alongside
 	struct TagQueueHandle {
 		// Store pointers here to avoid frequent std::unordered_map lookups
 		TagQueue* queue;
 		// Cannot be stored directly because we need to
 		uint32_t* numReleased;
 		// Sequence number of the first queued request
 		int64_t nextSeqNo;
 		bool operator>(TagQueueHandle const& rhs) const { return nextSeqNo > rhs.nextSeqNo; }
 		explicit TagQueueHandle(TagQueue& queue, uint32_t& numReleased) : queue(&queue), numReleased(&numReleased) {
 			ASSERT(!this->queue->requests.empty());
 			nextSeqNo = this->queue->requests.front().sequenceNumber;
 		}
 	};
 	// Priority queue of queues for each tag, ordered by the sequence number of the
 	// next request to process in each queue
 	std::priority_queue<TagQueueHandle, std::vector<TagQueueHandle>, std::greater<TagQueueHandle>> pqOfQueues;
 	// Track transactions released for each tag
 	std::vector<std::pair<TransactionTag, uint32_t>> transactionsReleased;
 	transactionsReleased.reserve(queues.size());
 	auto const transactionsReleasedInitialCapacity = transactionsReleased.capacity();
 	for (auto& [tag, queue] : queues) {
 		if (queue.rateInfo.present()) {
 			queue.rateInfo.get().startReleaseWindow();
 		}
 		if (!queue.requests.empty()) {
 			// First place the count in the transactionsReleased object,
 			// then pass a reference to the count to the TagQueueHandle object
 			// emplaced into pqOfQueues.
 			//
 			// Because we've reserved enough space in transactionsReleased
 			// to avoid resizing, this reference should remain valid.
 			// This allows each TagQueueHandle to update its number of
 			// numReleased counter without incurring the cost of a std::unordered_map lookup.
 			auto& [_, count] = transactionsReleased.emplace_back(tag, 0);
 			pqOfQueues.emplace(queue, count);
 		}
 	}
 	while (!pqOfQueues.empty()) {
 		auto tagQueueHandle = pqOfQueues.top();
 		pqOfQueues.pop();
 		// Used to determine when it is time to start processing another tag
 		auto const nextQueueSeqNo =
 		    pqOfQueues.empty() ? std::numeric_limits<int64_t>::max() : pqOfQueues.top().nextSeqNo;
 		while (!tagQueueHandle.queue->requests.empty()) {
 			auto& delayedReq = tagQueueHandle.queue->requests.front();
 			auto count = delayedReq.req.tags.begin()->second;
 			ASSERT_EQ(tagQueueHandle.nextSeqNo, delayedReq.sequenceNumber);
 			if (tagQueueHandle.queue->rateInfo.present() &&
 			    !tagQueueHandle.queue->rateInfo.get().canStart(*(tagQueueHandle.numReleased), count)) {
 				// Cannot release any more transaction from this tag (don't push the tag queue handle back into
 				// pqOfQueues)
 				CODE_PROBE(true, "GrvProxyTransactionTagThrottler throttling transaction");
 				break;
 			} else {
 				if (tagQueueHandle.nextSeqNo < nextQueueSeqNo) {
 					// Releasing transaction
 					*(tagQueueHandle.numReleased) += count;
 					delayedReq.updateProxyTagThrottledDuration();
 					if (delayedReq.req.priority == TransactionPriority::BATCH) {
 						outBatchPriority.push_back(delayedReq.req);
 					} else if (delayedReq.req.priority == TransactionPriority::DEFAULT) {
 						outDefaultPriority.push_back(delayedReq.req);
 					} else {
 						// Immediate priority transactions should bypass the GrvProxyTransactionTagThrottler
 						ASSERT(false);
 					}
 					tagQueueHandle.queue->requests.pop_front();
 					if (!tagQueueHandle.queue->requests.empty()) {
 						tagQueueHandle.nextSeqNo = tagQueueHandle.queue->requests.front().sequenceNumber;
 					}
 				} else {
 					CODE_PROBE(true, "GrvProxyTransactionTagThrottler switching tags to preserve FIFO");
 					pqOfQueues.push(tagQueueHandle);
 					break;
 				}
 			}
 		}
 	}
 	// End release windows for queues with valid rateInfo
 	{
 		TransactionTagMap<uint32_t> transactionsReleasedMap;
 		for (const auto& [tag, count] : transactionsReleased) {
 			transactionsReleasedMap[tag] = count;
 		}
 		for (auto& [tag, queue] : queues) {
 			if (queue.rateInfo.present()) {
 				queue.rateInfo.get().endReleaseWindow(transactionsReleasedMap[tag], false, elapsed);
 			}
 		}
 	}
 	// If the capacity is increased, that means the vector has been illegally resized, potentially
 	// corrupting memory
 	ASSERT_EQ(transactionsReleased.capacity(), transactionsReleasedInitialCapacity);
 }
 uint32_t GrvProxyTransactionTagThrottler::size() {
 	return queues.size();
 }
 ACTOR static Future<Void> mockClient(GrvProxyTransactionTagThrottler* throttler,
                                     TransactionPriority priority,
                                     TagSet tagSet,
                                     int batchSize,
                                     double desiredRate,
                                     TransactionTagMap<uint32_t>* counters) {
 	state Future<Void> timer;
 	state TransactionTagMap<uint32_t> tags;
 	for (const auto& tag : tagSet) {
 		tags[tag] = batchSize;
 	}
 	loop {
 		timer = delayJittered(static_cast<double>(batchSize) / desiredRate);
 		GetReadVersionRequest req;
 		req.tags = tags;
 		req.priority = priority;
 		throttler->addRequest(req);
 		wait(success(req.reply.getFuture()) && timer);
 		for (auto& [tag, _] : tags) {
 			(*counters)[tag] += batchSize;
 		}
 	}
 }
 ACTOR static Future<Void> mockFifoClient(GrvProxyTransactionTagThrottler* throttler) {
 	state TransactionTagMap<uint32_t> tagSet1;
 	state TransactionTagMap<uint32_t> tagSet2;
 	state std::vector<GetReadVersionRequest> reqs;
 	state int i = 0;
 	// Used to track the order in which replies are received
 	state std::vector<int> replyIndices;
 	// Tag half of requests with one tag, half with another, then randomly shuffle
 	tagSet1["sampleTag1"_sr] = 1;
 	tagSet2["sampleTag2"_sr] = 1;
 	for (i = 0; i < 2000; ++i) {
 		auto& req = reqs.emplace_back();
 		req.priority = TransactionPriority::DEFAULT;
 		if (i < 1000) {
 			req.tags = tagSet1;
 		} else {
 			req.tags = tagSet2;
 		}
 	}
 	deterministicRandom()->randomShuffle(reqs);
 	// Send requests to throttler and assert that responses are received in FIFO order
 	for (const auto& req : reqs) {
 		throttler->addRequest(req);
 	}
 	state std::vector<Future<Void>> futures;
 	for (int j = 0; j < 2000; ++j) {
 		// Flow hack to capture replyIndices
 		auto* _replyIndices = &replyIndices;
 		futures.push_back(map(reqs[j].reply.getFuture(), [_replyIndices, j](auto const&) {
 			(*_replyIndices).push_back(j);
 			return Void();
 		}));
 	}
 	wait(waitForAll(futures));
 	for (i = 0; i < 2000; ++i) {
 		ASSERT_EQ(replyIndices[i], i);
 	}
 	return Void();
 }
 ACTOR static Future<Void> mockServer(GrvProxyTransactionTagThrottler* throttler) {
 	state SpannedDeque<GetReadVersionRequest> outBatchPriority("TestGrvProxyTransactionTagThrottler_Batch"_loc);
 	state SpannedDeque<GetReadVersionRequest> outDefaultPriority("TestGrvProxyTransactionTagThrottler_Default"_loc);
 	loop {
 		state double elapsed = (0.009 + 0.002 * deterministicRandom()->random01());
 		wait(delay(elapsed));
 		throttler->releaseTransactions(elapsed, outBatchPriority, outDefaultPriority);
 		while (!outBatchPriority.empty()) {
 			outBatchPriority.front().reply.send(GetReadVersionReply{});
 			outBatchPriority.pop_front();
 		}
 		while (!outDefaultPriority.empty()) {
 			outDefaultPriority.front().reply.send(GetReadVersionReply{});
 			outDefaultPriority.pop_front();
 		}
 	}
 }
 static TransactionTag getRandomTag() {
 	TransactionTag result;
 	auto arr = new (result.arena()) uint8_t[32];
 	for (int i = 0; i < 32; ++i) {
 		arr[i] = (uint8_t)deterministicRandom()->randomInt(0, 256);
 	}
 	result.contents() = TransactionTagRef(arr, 32);
 	return result;
 }
 static bool isNear(double desired, int64_t actual) {
 	return std::abs(desired - actual) * 10 < desired;
 }
 // Rate limit set at 10, but client attempts 20 transactions per second.
 // Client should be throttled to only 10 transactions per second.
 TEST_CASE("/GrvProxyTransactionTagThrottler/Simple") {
 	state GrvProxyTransactionTagThrottler throttler;
 	state TagSet tagSet;
 	state TransactionTagMap<uint32_t> counters;
 	{
 		TransactionTagMap<double> rates;
 		rates["sampleTag"_sr] = 10.0;
 		throttler.updateRates(rates);
 	}
 	tagSet.addTag("sampleTag"_sr);
 	state Future<Void> client = mockClient(&throttler, TransactionPriority::DEFAULT, tagSet, 1, 20.0, &counters);
 	state Future<Void> server = mockServer(&throttler);
 	wait(timeout(client && server, 60.0, Void()));
 	TraceEvent("TagQuotaTest_Simple").detail("Counter", counters["sampleTag"_sr]);
 	ASSERT(isNear(counters["sampleTag"_sr], 60.0 * 10.0));
 	return Void();
 }
 // Clients share the available 30 transaction/second budget
 TEST_CASE("/GrvProxyTransactionTagThrottler/MultiClient") {
 	state GrvProxyTransactionTagThrottler throttler;
 	state TagSet tagSet;
 	state TransactionTagMap<uint32_t> counters;
 	{
 		TransactionTagMap<double> rates;
 		rates["sampleTag"_sr] = 30.0;
 		throttler.updateRates(rates);
 	}
 	tagSet.addTag("sampleTag"_sr);
 	state std::vector<Future<Void>> clients;
 	clients.reserve(10);
 	for (int i = 0; i < 10; ++i) {
 		clients.push_back(mockClient(&throttler, TransactionPriority::DEFAULT, tagSet, 1, 10.0, &counters));
 	}
 	state Future<Void> server = mockServer(&throttler);
 	wait(timeout(waitForAll(clients) && server, 60.0, Void()));
 	TraceEvent("TagQuotaTest_MultiClient").detail("Counter", counters["sampleTag"_sr]);
 	ASSERT(isNear(counters["sampleTag"_sr], 60.0 * 30.0));
 	return Void();
 }
 // Test processing GetReadVersionRequests that batch several transactions
 TEST_CASE("/GrvProxyTransactionTagThrottler/Batch") {
 	state GrvProxyTransactionTagThrottler throttler;
 	state TagSet tagSet;
 	state TransactionTagMap<uint32_t> counters;
 	{
 		TransactionTagMap<double> rates;
 		rates["sampleTag"_sr] = 10.0;
 		throttler.updateRates(rates);
 	}
 	tagSet.addTag("sampleTag"_sr);
 	state Future<Void> client = mockClient(&throttler, TransactionPriority::DEFAULT, tagSet, 5, 20.0, &counters);
 	state Future<Void> server = mockServer(&throttler);
 	wait(timeout(client && server, 60.0, Void()));
 	TraceEvent("TagQuotaTest_Batch").detail("Counter", counters["sampleTag"_sr]);
 	ASSERT(isNear(counters["sampleTag"_sr], 60.0 * 10.0));
 	return Void();
 }
 // Tests cleanup of tags that are no longer throttled.
 TEST_CASE("/GrvProxyTransactionTagThrottler/Cleanup1") {
 	GrvProxyTransactionTagThrottler throttler;
 	for (int i = 0; i < 1000; ++i) {
 		auto const tag = getRandomTag();
 		TransactionTagMap<double> rates;
 		rates[tag] = 10.0;
 		throttler.updateRates(rates);
 		ASSERT_EQ(throttler.size(), 1);
 	}
 	return Void();
 }
 // Tests cleanup of tags once queues have been emptied
 TEST_CASE("/GrvProxyTransactionTagThrottler/Cleanup2") {
 	GrvProxyTransactionTagThrottler throttler;
 	{
 		GetReadVersionRequest req;
 		req.tags["sampleTag"_sr] = 1;
 		req.priority = TransactionPriority::DEFAULT;
 		throttler.addRequest(req);
 	}
 	ASSERT_EQ(throttler.size(), 1);
 	throttler.updateRates(TransactionTagMap<double>{});
 	ASSERT_EQ(throttler.size(), 1);
 	{
 		SpannedDeque<GetReadVersionRequest> outBatchPriority("TestGrvProxyTransactionTagThrottler_Batch"_loc);
 		SpannedDeque<GetReadVersionRequest> outDefaultPriority("TestGrvProxyTransactionTagThrottler_Default"_loc);
 		throttler.releaseTransactions(0.1, outBatchPriority, outDefaultPriority);
 	}
 	// Calling updates cleans up the queues in throttler
 	throttler.updateRates(TransactionTagMap<double>{});
 	ASSERT_EQ(throttler.size(), 0);
 	return Void();
 }
 // Tests that unthrottled transactions are released in FIFO order, even when they
 // have different tags
 TEST_CASE("/GrvProxyTransactionTagThrottler/Fifo") {
 	state GrvProxyTransactionTagThrottler throttler;
 	state Future<Void> server = mockServer(&throttler);
 	wait(mockFifoClient(&throttler));
 	return Void();
 }
--- a/fdbserver/GrvTransactionRateInfo.actor.cpp
+++ b/fdbserver/GrvTransactionRateInfo.actor.cpp
@ -0,0 +1,123 @@
 /*
 * GrvTransactionRateInfo.actor.cpp
 *
 * This source file is part of the FoundationDB open source project
 *
 * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "fdbserver/GrvTransactionRateInfo.h"
 #include "fdbserver/Knobs.h"
 #include "flow/UnitTest.h"
 #include "flow/actorcompiler.h" // must be last include
 GrvTransactionRateInfo::GrvTransactionRateInfo(double rate)
  : rate(rate), smoothRate(SERVER_KNOBS->START_TRANSACTION_RATE_WINDOW),
    smoothReleased(SERVER_KNOBS->START_TRANSACTION_RATE_WINDOW) {
 	smoothRate.setTotal(rate);
 }
 bool GrvTransactionRateInfo::canStart(int64_t numAlreadyStarted, int64_t count) const {
 	return numAlreadyStarted + count <=
 	       std::min(limit + budget, SERVER_KNOBS->START_TRANSACTION_MAX_TRANSACTIONS_TO_START);
 }
 void GrvTransactionRateInfo::endReleaseWindow(int64_t numStartedAtPriority, bool queueEmptyAtPriority, double elapsed) {
 	// Update the budget to accumulate any extra capacity available or remove any excess that was used.
 	// The actual delta is the portion of the limit we didn't use multiplied by the fraction of the rate window that
 	// elapsed.
 	//
 	// We may have exceeded our limit due to the budget or because of higher priority transactions, in which case
 	// this delta will be negative. The delta can also be negative in the event that our limit was negative, which
 	// can happen if we had already started more transactions in our rate window than our rate would have allowed.
 	//
 	// This budget has the property that when the budget is required to start transactions (because batches are
 	// big), the sum limit+budget will increase linearly from 0 to the batch size over time and decrease by the
 	// batch size upon starting a batch. In other words, this works equivalently to a model where we linearly
 	// accumulate budget over time in the case that our batches are too big to take advantage of the rate window based
 	// limits.
 	//
 	// Note that "rate window" here indicates a period of SERVER_KNOBS->START_TRANSACTION_RATE_WINDOW seconds,
 	// whereas "release window" is the period between wait statements, with duration indicated by "elapsed."
 	budget =
 	    std::max(0.0, budget + elapsed * (limit - numStartedAtPriority) / SERVER_KNOBS->START_TRANSACTION_RATE_WINDOW);
 	// If we are emptying out the queue of requests, then we don't need to carry much budget forward
 	// If we did keep accumulating budget, then our responsiveness to changes in workflow could be compromised
 	if (queueEmptyAtPriority) {
 		budget = std::min(budget, SERVER_KNOBS->START_TRANSACTION_MAX_EMPTY_QUEUE_BUDGET);
 	}
 	smoothReleased.addDelta(numStartedAtPriority);
 }
 void GrvTransactionRateInfo::disable() {
 	disabled = true;
 	// Use smoothRate.setTotal(0) instead of setting rate to 0 so txns will not be throttled immediately.
 	smoothRate.setTotal(0);
 }
 void GrvTransactionRateInfo::setRate(double rate) {
 	ASSERT(rate >= 0 && rate != std::numeric_limits<double>::infinity() && !std::isnan(rate));
 	this->rate = rate;
 	if (disabled) {
 		smoothRate.reset(rate);
 		disabled = false;
 	} else {
 		smoothRate.setTotal(rate);
 	}
 }
 void GrvTransactionRateInfo::startReleaseWindow() {
 	// Determine the number of transactions that this proxy is allowed to release
 	// Roughly speaking, this is done by computing the number of transactions over some historical window that we
 	// could have started but didn't, and making that our limit. More precisely, we track a smoothed rate limit and
 	// release rate, the difference of which is the rate of additional transactions that we could have released
 	// based on that window. Then we multiply by the window size to get a number of transactions.
 	//
 	// Limit can be negative in the event that we are releasing more transactions than we are allowed (due to the
 	// use of our budget or because of higher priority transactions).
 	double releaseRate = smoothRate.smoothTotal() - smoothReleased.smoothRate();
 	limit = SERVER_KNOBS->START_TRANSACTION_RATE_WINDOW * releaseRate;
 }
 static bool isNear(double desired, int64_t actual) {
 	return std::abs(desired - actual) * 10 < desired;
 }
 ACTOR static Future<Void> mockClient(GrvTransactionRateInfo* rateInfo, double desiredRate, int64_t* counter) {
 	loop {
 		state double elapsed = (0.9 + 0.2 * deterministicRandom()->random01()) / desiredRate;
 		wait(delay(elapsed));
 		rateInfo->startReleaseWindow();
 		int started = rateInfo->canStart(0, 1) ? 1 : 0;
 		*counter += started;
 		rateInfo->endReleaseWindow(started, false, elapsed);
 	}
 }
 // Rate limit set at 10, but client attempts 20 transactions per second.
 // Client should be throttled to only 10 transactions per second.
 TEST_CASE("/GrvTransactionRateInfo/Simple") {
 	state GrvTransactionRateInfo rateInfo;
 	state int64_t counter;
 	rateInfo.setRate(10.0);
 	wait(timeout(mockClient(&rateInfo, 20.0, &counter), 60.0, Void()));
 	TraceEvent("GrvTransactionRateInfoTest").detail("Counter", counter);
 	ASSERT(isNear(60.0 * 10.0, counter));
 	return Void();
 }
--- a/fdbserver/KeyValueStoreMemory.actor.cpp
+++ b/fdbserver/KeyValueStoreMemory.actor.cpp
@ -288,6 +288,8 @@ public:
 	void enableSnapshot() override { disableSnapshot = false; }
 	int uncommittedBytes() { return queue.totalSize(); }
 private:
 	enum OpType {
 		OpSet,
@ -731,13 +733,16 @@ private:
 				    .detail("Commits", dbgCommitCount)
 				    .detail("TimeTaken", now() - startt);
-				self->semiCommit();
+				// Make sure cipher keys are ready before recovery finishes. The semiCommit below also require cipher
-
+				// keys.
 				// Make sure cipher keys are ready before recovery finishes.
 				if (self->enableEncryption) {
 					wait(updateCipherKeys(self));
 				}
 				CODE_PROBE(self->enableEncryption && self->uncommittedBytes() > 0,
 				           "KeyValueStoreMemory recovered partial transaction while encryption-at-rest is enabled");
 				self->semiCommit();
 				return Void();
 			} catch (Error& e) {
 				bool ok = e.code() == error_code_operation_cancelled || e.code() == error_code_file_not_found ||
--- a/fdbserver/KeyValueStoreRocksDB.actor.cpp
+++ b/fdbserver/KeyValueStoreRocksDB.actor.cpp
@ -81,9 +81,9 @@ class SharedRocksDBState {
 public:
 	SharedRocksDBState(UID id);
-	std::shared_ptr<LatencySample> commitLatency;
+	LatencySample commitLatency;
-	std::shared_ptr<LatencySample> commitQueueLatency;
+	LatencySample commitQueueLatency;
-	std::shared_ptr<LatencySample> dbWriteLatency;
+	LatencySample dbWriteLatency;
 	void setClosing() { this->closing = true; }
 	bool isClosing() const { return this->closing; }
@ -107,19 +107,18 @@ private:
 SharedRocksDBState::SharedRocksDBState(UID id)
  : id(id), closing(false), dbOptions(initialDbOptions()), cfOptions(initialCfOptions()),
-    readOptions(initialReadOptions()),
+    readOptions(initialReadOptions()), commitLatency(LatencySample("RocksDBCommitLatency",
-    commitLatency(std::make_shared<LatencySample>("RocksDBCommitLatency",
+                                                                   id,
-                                                  id,
+                                                                   SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
-                                                  SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
+                                                                   SERVER_KNOBS->LATENCY_SAMPLE_SIZE)),
-                                                  SERVER_KNOBS->LATENCY_SAMPLE_SIZE)),
+    commitQueueLatency(LatencySample("RocksDBCommitQueueLatency",
-    commitQueueLatency(std::make_shared<LatencySample>("RocksDBCommitQueueLatency",
+                                     id,
-                                                       id,
+                                     SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
-                                                       SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
+                                     SERVER_KNOBS->LATENCY_SAMPLE_SIZE)),
-                                                       SERVER_KNOBS->LATENCY_SAMPLE_SIZE)),
+    dbWriteLatency(LatencySample("RocksDBWriteLatency",
-    dbWriteLatency(std::make_shared<LatencySample>("RocksDBWriteLatency",
+                                 id,
-                                                   id,
+                                 SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
-                                                   SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
+                                 SERVER_KNOBS->LATENCY_SAMPLE_SIZE)) {}
                                                   SERVER_KNOBS->LATENCY_SAMPLE_SIZE)) {}
 rocksdb::ColumnFamilyOptions SharedRocksDBState::initialCfOptions() {
 	rocksdb::ColumnFamilyOptions options;
@ -1161,14 +1160,9 @@ struct RocksDBKeyValueStore : IKeyValueStore {
 			double startTime;
 			bool getHistograms;
 			double getTimeEstimate() const override { return SERVER_KNOBS->COMMIT_TIME_ESTIMATE; }
-			CommitAction() {
+			CommitAction()
-				if (deterministicRandom()->random01() < SERVER_KNOBS->ROCKSDB_HISTOGRAMS_SAMPLE_RATE) {
+			  : startTime(timer_monotonic()),
-					getHistograms = true;
+			    getHistograms(deterministicRandom()->random01() < SERVER_KNOBS->ROCKSDB_HISTOGRAMS_SAMPLE_RATE) {}
 					startTime = timer_monotonic();
 				} else {
 					getHistograms = false;
 				}
 			}
 		};
 		void action(CommitAction& a) {
 			bool doPerfContextMetrics =
@ -1178,7 +1172,7 @@ struct RocksDBKeyValueStore : IKeyValueStore {
 				perfContextMetrics->reset();
 			}
 			double commitBeginTime = timer_monotonic();
-			sharedState->commitQueueLatency->addMeasurement(commitBeginTime - a.startTime);
+			sharedState->commitQueueLatency.addMeasurement(commitBeginTime - a.startTime);
 			if (a.getHistograms) {
 				metricPromiseStream->send(
 				    std::make_pair(ROCKSDB_COMMIT_QUEUEWAIT_HISTOGRAM.toString(), commitBeginTime - a.startTime));
@ -1200,7 +1194,7 @@ struct RocksDBKeyValueStore : IKeyValueStore {
 				options.sync = false;
 			}
-			double writeBeginTime = a.getHistograms ? timer_monotonic() : 0;
+			double writeBeginTime = timer_monotonic();
 			if (rateLimiter) {
 				// Controls the total write rate of compaction and flush in bytes per second.
 				// Request for batchToCommit bytes. If this request cannot be satisfied, the call is blocked.
@ -1209,7 +1203,7 @@ struct RocksDBKeyValueStore : IKeyValueStore {
 			s = db->Write(options, a.batchToCommit.get());
 			readIterPool->update();
 			double currTime = timer_monotonic();
-			sharedState->dbWriteLatency->addMeasurement(currTime - writeBeginTime);
+			sharedState->dbWriteLatency.addMeasurement(currTime - writeBeginTime);
 			if (a.getHistograms) {
 				metricPromiseStream->send(
 				    std::make_pair(ROCKSDB_WRITE_HISTOGRAM.toString(), currTime - writeBeginTime));
@ -1236,7 +1230,7 @@ struct RocksDBKeyValueStore : IKeyValueStore {
 				}
 			}
 			currTime = timer_monotonic();
-			sharedState->commitLatency->addMeasurement(currTime - a.startTime);
+			sharedState->commitLatency.addMeasurement(currTime - a.startTime);
 			if (a.getHistograms) {
 				metricPromiseStream->send(
 				    std::make_pair(ROCKSDB_COMMIT_ACTION_HISTOGRAM.toString(), currTime - commitBeginTime));
@ -1361,9 +1355,7 @@ struct RocksDBKeyValueStore : IKeyValueStore {
 			ThreadReturnPromise<Optional<Value>> result;
 			ReadValueAction(KeyRef key, Optional<UID> debugID)
 			  : key(key), debugID(debugID), startTime(timer_monotonic()),
-			    getHistograms(
+			    getHistograms(deterministicRandom()->random01() < SERVER_KNOBS->ROCKSDB_HISTOGRAMS_SAMPLE_RATE) {}
 			        (deterministicRandom()->random01() < SERVER_KNOBS->ROCKSDB_HISTOGRAMS_SAMPLE_RATE) ? true : false) {
 			}
 			double getTimeEstimate() const override { return SERVER_KNOBS->READ_VALUE_TIME_ESTIMATE; }
 		};
 		void action(ReadValueAction& a) {
@ -1447,9 +1439,7 @@ struct RocksDBKeyValueStore : IKeyValueStore {
 			ThreadReturnPromise<Optional<Value>> result;
 			ReadValuePrefixAction(Key key, int maxLength, Optional<UID> debugID)
 			  : key(key), maxLength(maxLength), debugID(debugID), startTime(timer_monotonic()),
-			    getHistograms(
+			    getHistograms(deterministicRandom()->random01() < SERVER_KNOBS->ROCKSDB_HISTOGRAMS_SAMPLE_RATE) {}
 			        (deterministicRandom()->random01() < SERVER_KNOBS->ROCKSDB_HISTOGRAMS_SAMPLE_RATE) ? true : false) {
 			}
 			double getTimeEstimate() const override { return SERVER_KNOBS->READ_VALUE_TIME_ESTIMATE; }
 		};
 		void action(ReadValuePrefixAction& a) {
@ -1529,9 +1519,7 @@ struct RocksDBKeyValueStore : IKeyValueStore {
 			ThreadReturnPromise<RangeResult> result;
 			ReadRangeAction(KeyRange keys, int rowLimit, int byteLimit)
 			  : keys(keys), rowLimit(rowLimit), byteLimit(byteLimit), startTime(timer_monotonic()),
-			    getHistograms(
+			    getHistograms(deterministicRandom()->random01() < SERVER_KNOBS->ROCKSDB_HISTOGRAMS_SAMPLE_RATE) {}
 			        (deterministicRandom()->random01() < SERVER_KNOBS->ROCKSDB_HISTOGRAMS_SAMPLE_RATE) ? true : false) {
 			}
 			double getTimeEstimate() const override { return SERVER_KNOBS->READ_RANGE_TIME_ESTIMATE; }
 		};
 		void action(ReadRangeAction& a) {
--- a/fdbserver/KeyValueStoreShardedRocksDB.actor.cpp
+++ b/fdbserver/KeyValueStoreShardedRocksDB.actor.cpp
@ -1023,6 +1023,10 @@ public:
 		writeBatch->Put(metadataShard->cf,
 		                getShardMappingKey(lastKey, shardMappingPrefix),
 		                nextShard == nullptr ? "" : nextShard->physicalShard->id);
 		TraceEvent(SevDebug, "ShardedRocksDB", this->logId)
 		    .detail("Action", "PersistRangeMappingEnd")
 		    .detail("NextShardKey", lastKey)
 		    .detail("Value", nextShard == nullptr ? "" : nextShard->physicalShard->id);
 		dirtyShards->insert(metadataShard.get());
 	}
--- a/fdbserver/RESTKmsConnector.actor.cpp
+++ b/fdbserver/RESTKmsConnector.actor.cpp
--- a/fdbserver/Resolver.actor.cpp
+++ b/fdbserver/Resolver.actor.cpp
@ -278,8 +278,9 @@ ACTOR Future<Void> resolveBatch(Reference<Resolver> self,
 		// Detect conflicts
 		double expire = now() + SERVER_KNOBS->SAMPLE_EXPIRATION_TIME;
 		ConflictBatch conflictBatch(self->conflictSet, &reply.conflictingKeyRangeMap, &reply.arena);
 		const Version newOldestVersion = req.version - SERVER_KNOBS->MAX_WRITE_TRANSACTION_LIFE_VERSIONS;
 		for (int t = 0; t < req.transactions.size(); t++) {
-			conflictBatch.addTransaction(req.transactions[t]);
+			conflictBatch.addTransaction(req.transactions[t], newOldestVersion);
 			self->resolvedReadConflictRanges += req.transactions[t].read_conflict_ranges.size();
 			self->resolvedWriteConflictRanges += req.transactions[t].write_conflict_ranges.size();
@ -292,8 +293,7 @@ ACTOR Future<Void> resolveBatch(Reference<Resolver> self,
 					    it.begin, SERVER_KNOBS->SAMPLE_OFFSET_PER_KEY + it.begin.size(), expire);
 			}
 		}
-		conflictBatch.detectConflicts(
+		conflictBatch.detectConflicts(req.version, newOldestVersion, commitList, &tooOldList);
 		    req.version, req.version - SERVER_KNOBS->MAX_WRITE_TRANSACTION_LIFE_VERSIONS, commitList, &tooOldList);
 		reply.debugID = req.debugID;
 		reply.committed.resize(reply.arena, req.transactions.size());
@ -351,7 +351,7 @@ ACTOR Future<Void> resolveBatch(Reference<Resolver> self,
 				SpanContext spanContext =
 				    req.transactions[t].spanContext.present() ? req.transactions[t].spanContext.get() : SpanContext();
-				applyMetadataMutations(spanContext, *resolverData, req.transactions[t].mutations, db);
+				applyMetadataMutations(spanContext, *resolverData, req.transactions[t].mutations);
 			}
 			CODE_PROBE(self->forceRecovery, "Resolver detects forced recovery");
 		}
@ -574,7 +574,7 @@ ACTOR Future<Void> processCompleteTransactionStateRequest(TransactionStateResolv
 		bool confChanges; // Ignore configuration changes for initial commits.
 		ResolverData resolverData(
 		    pContext->pResolverData->dbgid, pContext->pTxnStateStore, &pContext->pResolverData->keyInfo, confChanges);
-		applyMetadataMutations(SpanContext(), resolverData, mutations, db);
+		applyMetadataMutations(SpanContext(), resolverData, mutations);
 	} // loop
 	auto lockedKey = pContext->pTxnStateStore->readValue(databaseLockedKey).get();
@ -653,15 +653,14 @@ ACTOR Future<Void> resolverCore(ResolverInterface resolver,
 	state TransactionStateResolveContext transactionStateResolveContext;
 	if (SERVER_KNOBS->PROXY_USE_RESOLVER_PRIVATE_MUTATIONS) {
 		self->logAdapter = new LogSystemDiskQueueAdapter(self->logSystem, Reference<AsyncVar<PeekTxsInfo>>(), 1, false);
-		self->txnStateStore =
+		self->txnStateStore = keyValueStoreLogSystem(self->logAdapter,
-		    keyValueStoreLogSystem(self->logAdapter,
+		                                             db,
-		                           db,
+		                                             resolver.id(),
-		                           resolver.id(),
+		                                             2e9,
-		                           2e9,
+		                                             true,
-		                           true,
+		                                             true,
-		                           true,
+		                                             true,
-		                           true,
+		                                             isEncryptionOpSupported(EncryptOperationType::TLOG_ENCRYPTION));
 		                           isEncryptionOpSupported(EncryptOperationType::TLOG_ENCRYPTION, db->get().client));
 		// wait for txnStateStore recovery
 		wait(success(self->txnStateStore->readValue(StringRef())));
--- a/fdbserver/SimKmsConnector.actor.cpp
+++ b/fdbserver/SimKmsConnector.actor.cpp
@ -38,6 +38,7 @@
 #include "flow/network.h"
 #include "flow/UnitTest.h"
 #include <limits>
 #include <memory>
 #include <unordered_map>
 #include <utility>
@ -191,6 +192,7 @@ static Standalone<BlobMetadataDetailsRef> createBlobMetadata(BlobMetadataDomainI
                                                             BlobMetadataDomainName domainName) {
 	Standalone<BlobMetadataDetailsRef> metadata;
 	metadata.domainId = domainId;
 	metadata.arena().dependsOn(domainName.arena());
 	metadata.domainName = domainName;
 	// 0 == no partition, 1 == suffix partitioned, 2 == storage location partitioned
 	int type = deterministicRandom()->randomInt(0, 3);
@ -226,6 +228,17 @@ static Standalone<BlobMetadataDetailsRef> createBlobMetadata(BlobMetadataDomainI
 			ev.detail("P" + std::to_string(i), metadata.partitions.back());
 		}
 	}
 	// set random refresh + expire time
 	if (deterministicRandom()->coinflip()) {
 		metadata.refreshAt = now() + deterministicRandom()->random01() * SERVER_KNOBS->BLOB_METADATA_REFRESH_INTERVAL;
 		metadata.expireAt =
 		    metadata.refreshAt + deterministicRandom()->random01() * SERVER_KNOBS->BLOB_METADATA_REFRESH_INTERVAL;
 	} else {
 		metadata.refreshAt = std::numeric_limits<double>::max();
 		metadata.expireAt = metadata.refreshAt;
 	}
 	return metadata;
 }
@ -244,6 +257,10 @@ ACTOR Future<Void> blobMetadataLookup(KmsConnectorInterface interf, KmsConnBlobM
 			it = simBlobMetadataStore
 			         .insert({ domainInfo.domainId, createBlobMetadata(domainInfo.domainId, domainInfo.domainName) })
 			         .first;
 		} else if (now() >= it->second.expireAt) {
 			// update random refresh and expire time
 			it->second.refreshAt = now() + deterministicRandom()->random01() * 30;
 			it->second.expireAt = it->second.refreshAt + deterministicRandom()->random01() * 10;
 		}
 		rep.metadataDetails.arena().dependsOn(it->second.arena());
 		rep.metadataDetails.push_back(rep.metadataDetails.arena(), it->second);
--- a/fdbserver/SimulatedCluster.actor.cpp
+++ b/fdbserver/SimulatedCluster.actor.cpp
@ -368,6 +368,7 @@ public:
 	bool allowDisablingTenants = true;
 	bool allowCreatingTenants = true;
 	bool injectTargetedSSRestart = false;
 	bool tenantModeRequired = false;
 	bool injectSSDelay = false;
 	std::string testClass; // unused -- used in TestHarness
 	float testPriority; // unused -- used in TestHarness
@ -433,6 +434,7 @@ public:
 		    .add("allowDefaultTenant", &allowDefaultTenant)
 		    .add("allowDisablingTenants", &allowDisablingTenants)
 		    .add("allowCreatingTenants", &allowCreatingTenants)
 		    .add("tenantModeRequired", &tenantModeRequired)
 		    .add("randomlyRenameZoneId", &randomlyRenameZoneId)
 		    .add("injectTargetedSSRestart", &injectTargetedSSRestart)
 		    .add("injectSSDelay", &injectSSDelay);
@ -2451,6 +2453,7 @@ ACTOR void setupAndRun(std::string dataFolder,
 	state bool allowDefaultTenant = testConfig.allowDefaultTenant;
 	state bool allowDisablingTenants = testConfig.allowDisablingTenants;
 	state bool allowCreatingTenants = testConfig.allowCreatingTenants;
 	state bool tenantModeRequired = testConfig.tenantModeRequired;
 	if (!SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA) {
 		testConfig.storageEngineExcludeTypes.push_back(5);
@ -2467,6 +2470,7 @@ ACTOR void setupAndRun(std::string dataFolder,
 		// TODO: persist the chosen default tenant in the restartInfo.ini file for the second test
 		allowDefaultTenant = false;
 		allowCreatingTenants = false;
 		tenantModeRequired = false;
 	}
 	// TODO: Currently backup and restore related simulation tests are failing when run with rocksDB storage engine
@ -2516,10 +2520,10 @@ ACTOR void setupAndRun(std::string dataFolder,
 	state Optional<TenantName> defaultTenant;
 	state Standalone<VectorRef<TenantNameRef>> tenantsToCreate;
 	state TenantMode tenantMode = TenantMode::DISABLED;
-	if (allowDefaultTenant && deterministicRandom()->random01() < 0.5) {
+	if (tenantModeRequired || (allowDefaultTenant && deterministicRandom()->random01() < 0.5)) {
 		defaultTenant = "SimulatedDefaultTenant"_sr;
 		tenantsToCreate.push_back_deep(tenantsToCreate.arena(), defaultTenant.get());
-		if (deterministicRandom()->random01() < 0.9) {
+		if (tenantModeRequired || deterministicRandom()->random01() < 0.9) {
 			tenantMode = TenantMode::REQUIRED;
 		} else {
 			tenantMode = TenantMode::OPTIONAL_TENANT;
--- a/fdbserver/SkipList.cpp
+++ b/fdbserver/SkipList.cpp
@ -816,14 +816,14 @@ struct TransactionInfo {
 	bool reportConflictingKeys;
 };
-void ConflictBatch::addTransaction(const CommitTransactionRef& tr) {
+void ConflictBatch::addTransaction(const CommitTransactionRef& tr, Version newOldestVersion) {
 	const int t = transactionCount++;
 	Arena& arena = transactionInfo.arena();
 	TransactionInfo* info = new (arena) TransactionInfo;
 	info->reportConflictingKeys = tr.report_conflicting_keys;
-	if (tr.read_snapshot < cs->oldestVersion && tr.read_conflict_ranges.size()) {
+	if (tr.read_snapshot < newOldestVersion && tr.read_conflict_ranges.size()) {
 		info->tooOld = true;
 	} else {
 		info->tooOld = false;
@ -1143,7 +1143,7 @@ void skipListTest() {
 		t = timer();
 		ConflictBatch batch(cs);
 		for (const auto& tr : trs) {
-			batch.addTransaction(tr);
+			batch.addTransaction(tr, version);
 		}
 		g_add += timer() - t;
--- a/fdbserver/Status.actor.cpp
+++ b/fdbserver/Status.actor.cpp
@ -828,6 +828,10 @@ ACTOR static Future<JsonBuilderObject> processStatusFetcher(
 		roles.addRole("blob_manager", db->get().blobManager.get());
 	}
 	if (configuration.present() && configuration.get().blobGranulesEnabled && db->get().blobMigrator.present()) {
 		roles.addRole("blob_migrator", db->get().blobMigrator.get());
 	}
 	if (db->get().consistencyScan.present()) {
 		roles.addRole("consistency_scan", db->get().consistencyScan.get());
 	}
--- a/fdbserver/fdbserver.actor.cpp
+++ b/fdbserver/fdbserver.actor.cpp
@ -117,7 +117,7 @@ enum {
 	OPT_METRICSPREFIX, OPT_LOGGROUP, OPT_LOCALITY, OPT_IO_TRUST_SECONDS, OPT_IO_TRUST_WARN_ONLY, OPT_FILESYSTEM, OPT_PROFILER_RSS_SIZE, OPT_KVFILE,
 	OPT_TRACE_FORMAT, OPT_WHITELIST_BINPATH, OPT_BLOB_CREDENTIAL_FILE, OPT_CONFIG_PATH, OPT_USE_TEST_CONFIG_DB, OPT_NO_CONFIG_DB, OPT_FAULT_INJECTION, OPT_PROFILER, OPT_PRINT_SIMTIME,
 	OPT_FLOW_PROCESS_NAME, OPT_FLOW_PROCESS_ENDPOINT, OPT_IP_TRUSTED_MASK, OPT_KMS_CONN_DISCOVERY_URL_FILE, OPT_KMS_CONNECTOR_TYPE, OPT_KMS_CONN_VALIDATION_TOKEN_DETAILS,
-	OPT_KMS_CONN_GET_ENCRYPTION_KEYS_ENDPOINT, OPT_NEW_CLUSTER_KEY, OPT_AUTHZ_PUBLIC_KEY_FILE, OPT_USE_FUTURE_PROTOCOL_VERSION
+	OPT_KMS_CONN_GET_ENCRYPTION_KEYS_ENDPOINT, OPT_KMS_CONN_GET_BLOB_METADATA_ENDPOINT, OPT_NEW_CLUSTER_KEY, OPT_AUTHZ_PUBLIC_KEY_FILE, OPT_USE_FUTURE_PROTOCOL_VERSION
 };
 CSimpleOpt::SOption g_rgOptions[] = {
@ -218,6 +218,7 @@ CSimpleOpt::SOption g_rgOptions[] = {
 	{ OPT_KMS_CONNECTOR_TYPE,    "--kms-connector-type",        SO_REQ_SEP },
 	{ OPT_KMS_CONN_VALIDATION_TOKEN_DETAILS,     "--kms-conn-validation-token-details",     SO_REQ_SEP },
 	{ OPT_KMS_CONN_GET_ENCRYPTION_KEYS_ENDPOINT, "--kms-conn-get-encryption-keys-endpoint", SO_REQ_SEP },
 	{ OPT_KMS_CONN_GET_BLOB_METADATA_ENDPOINT,   "--kms-conn-get-blob-metadata-endpoint",   SO_REQ_SEP },
 	{ OPT_USE_FUTURE_PROTOCOL_VERSION, 			 "--use-future-protocol-version",			SO_REQ_SEP },
 	TLS_OPTION_FLAGS,
 	SO_END_OF_OPTIONS
@ -1707,6 +1708,10 @@ private:
 				knobs.emplace_back("rest_kms_connector_get_encryption_keys_endpoint", args.OptionArg());
 				break;
 			}
 			case OPT_KMS_CONN_GET_BLOB_METADATA_ENDPOINT: {
 				knobs.emplace_back("rest_kms_connector_get_blob_metadata_endpoint", args.OptionArg());
 				break;
 			}
 			case OPT_NEW_CLUSTER_KEY: {
 				newClusterKey = args.OptionArg();
 				try {
--- a/fdbserver/include/fdbserver/ApplyMetadataMutation.h
+++ b/fdbserver/include/fdbserver/ApplyMetadataMutation.h
@ -103,8 +103,7 @@ void applyMetadataMutations(SpanContext const& spanContext,
                            const UID& dbgid,
                            Arena& arena,
                            const VectorRef<MutationRef>& mutations,
-                            IKeyValueStore* txnStateStore,
+                            IKeyValueStore* txnStateStore);
                            Reference<AsyncVar<ServerDBInfo> const> dbInfo);
 inline bool isSystemKey(KeyRef key) {
 	return key.size() && key[0] == systemKeys.begin[0];
@ -145,7 +144,6 @@ inline bool containsMetadataMutation(const VectorRef<MutationRef>& mutations) {
 // Resolver's version
 void applyMetadataMutations(SpanContext const& spanContext,
                            ResolverData& resolverData,
-                            const VectorRef<MutationRef>& mutations,
+                            const VectorRef<MutationRef>& mutations);
                            Reference<AsyncVar<ServerDBInfo> const> dbInfo);
 #endif
--- a/fdbserver/include/fdbserver/BlobGranuleServerCommon.actor.h
+++ b/fdbserver/include/fdbserver/BlobGranuleServerCommon.actor.h
@ -1,5 +1,5 @@
 /*
- * BlobGranuleServerCommon.h
+ * BlobGranuleServerCommon.actor.h
 *
 * This source file is part of the FoundationDB open source project
 *
@ -105,10 +105,15 @@ struct GranuleTenantData : NonCopyable, ReferenceCounted<GranuleTenantData> {
 	GranuleTenantData() {}
 	GranuleTenantData(TenantName name, TenantMapEntry entry) : name(name), entry(entry) {}
-	void setBStore(Reference<BlobConnectionProvider> bs) {
+	void updateBStore(const BlobMetadataDetailsRef& metadata) {
-		ASSERT(bstoreLoaded.canBeSet());
+		if (bstoreLoaded.canBeSet()) {
-		bstore = bs;
+			// new
-		bstoreLoaded.send(Void());
+			bstore = BlobConnectionProvider::newBlobConnectionProvider(metadata);
 			bstoreLoaded.send(Void());
 		} else {
 			// update existing
 			bstore->update(metadata);
 		}
 	}
 };
@ -119,7 +124,7 @@ public:
 	void removeTenants(std::vector<int64_t> tenantIds);
 	Optional<TenantMapEntry> getTenantById(int64_t id);
-	Reference<GranuleTenantData> getDataForGranule(const KeyRangeRef& keyRange);
+	Future<Reference<GranuleTenantData>> getDataForGranule(const KeyRangeRef& keyRange);
 	KeyRangeMap<Reference<GranuleTenantData>> tenantData;
 	std::unordered_map<int64_t, TenantMapEntry> tenantInfoById;
--- a/fdbserver/include/fdbserver/BlobMigratorInterface.h
+++ b/fdbserver/include/fdbserver/BlobMigratorInterface.h
@ -0,0 +1,67 @@
 /*
 * BlobMigratorInterface.h
 *
 * This source file is part of the FoundationDB open source project
 *
 * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef FDBSERVER_BLOBMIGRATORINTERFACE_H
 #define FDBSERVER_BLOBMIGRATORINTERFACE_H
 #pragma once
 #include "fdbclient/StorageServerInterface.h"
 #include "fdbclient/FDBTypes.h"
 #include "fdbrpc/Locality.h"
 #include "fdbrpc/fdbrpc.h"
 struct BlobMigratorInterface {
 	constexpr static FileIdentifier file_identifier = 869199;
 	RequestStream<struct HaltBlobMigratorRequest> haltBlobMigrator;
 	RequestStream<ReplyPromise<Void>> waitFailure;
 	LocalityData locality;
 	UID uniqueID;
 	BlobMigratorInterface() {}
 	BlobMigratorInterface(const struct LocalityData& l, UID id) : uniqueID(id), locality(l) {}
 	void initEndpoints() {}
 	UID id() const { return uniqueID; }
 	NetworkAddress address() const { return waitFailure.getEndpoint().getPrimaryAddress(); }
 	bool operator==(const BlobMigratorInterface& r) const { return id() == r.id(); }
 	bool operator!=(const BlobMigratorInterface& r) const { return !(*this == r); }
 	template <class Archive>
 	void serialize(Archive& ar) {
 		// StorageServerInterface::serialize(ar);
 		serializer(ar, waitFailure, haltBlobMigrator, locality, uniqueID);
 	}
 };
 struct HaltBlobMigratorRequest {
 	constexpr static FileIdentifier file_identifier = 4980139;
 	UID requesterID;
 	ReplyPromise<Void> reply;
 	HaltBlobMigratorRequest() {}
 	explicit HaltBlobMigratorRequest(UID uid) : requesterID(uid) {}
 	template <class Ar>
 	void serialize(Ar& ar) {
 		serializer(ar, requesterID, reply);
 	}
 };
 #endif
--- a/fdbserver/include/fdbserver/ClusterController.actor.h
+++ b/fdbserver/include/fdbserver/ClusterController.actor.h
@ -22,6 +22,8 @@
 // When actually compiled (NO_INTELLISENSE), include the generated version of this file.  In intellisense use the source
 // version.
 #include "fdbclient/StorageServerInterface.h"
 #include "fdbserver/BlobMigratorInterface.h"
 #include <utility>
 #if defined(NO_INTELLISENSE) && !defined(FDBSERVER_CLUSTERCONTROLLER_ACTOR_G_H)
@ -51,6 +53,7 @@ struct WorkerInfo : NonCopyable {
 	Future<Void> haltRatekeeper;
 	Future<Void> haltDistributor;
 	Future<Void> haltBlobManager;
 	Future<Void> haltBlobMigrator;
 	Future<Void> haltEncryptKeyProxy;
 	Future<Void> haltConsistencyScan;
 	Standalone<VectorRef<StringRef>> issues;
@ -184,6 +187,14 @@ public:
 			serverInfo->set(newInfo);
 		}
 		void setBlobMigrator(const BlobMigratorInterface& interf) {
 			auto newInfo = serverInfo->get();
 			newInfo.id = deterministicRandom()->randomUniqueID();
 			newInfo.infoGeneration = ++dbInfoCount;
 			newInfo.blobMigrator = interf;
 			serverInfo->set(newInfo);
 		}
 		void setEncryptKeyProxy(const EncryptKeyProxyInterface& interf) {
 			auto newInfo = serverInfo->get();
 			auto newClientInfo = clientInfo->get();
@ -217,6 +228,8 @@ public:
 				newInfo.ratekeeper = Optional<RatekeeperInterface>();
 			} else if (t == ProcessClass::BlobManagerClass) {
 				newInfo.blobManager = Optional<BlobManagerInterface>();
 			} else if (t == ProcessClass::BlobMigratorClass) {
 				newInfo.blobMigrator = Optional<BlobMigratorInterface>();
 			} else if (t == ProcessClass::EncryptKeyProxyClass) {
 				newInfo.encryptKeyProxy = Optional<EncryptKeyProxyInterface>();
 				newInfo.client.encryptKeyProxy = Optional<EncryptKeyProxyInterface>();
@ -317,6 +330,8 @@ public:
 		        db.serverInfo->get().ratekeeper.get().locality.processId() == processId) ||
 		       (db.serverInfo->get().blobManager.present() &&
 		        db.serverInfo->get().blobManager.get().locality.processId() == processId) ||
 		       (db.serverInfo->get().blobMigrator.present() &&
 		        db.serverInfo->get().blobMigrator.get().locality.processId() == processId) ||
 		       (db.serverInfo->get().encryptKeyProxy.present() &&
 		        db.serverInfo->get().encryptKeyProxy.get().locality.processId() == processId) ||
 		       (db.serverInfo->get().consistencyScan.present() &&
@ -3360,6 +3375,8 @@ public:
 	Optional<UID> recruitingRatekeeperID;
 	AsyncVar<bool> recruitBlobManager;
 	Optional<UID> recruitingBlobManagerID;
 	AsyncVar<bool> recruitBlobMigrator;
 	Optional<UID> recruitingBlobMigratorID;
 	AsyncVar<bool> recruitEncryptKeyProxy;
 	Optional<UID> recruitingEncryptKeyProxyID;
 	AsyncVar<bool> recruitConsistencyScan;
@ -3401,8 +3418,9 @@ public:
 	    ac(false), outstandingRequestChecker(Void()), outstandingRemoteRequestChecker(Void()), startTime(now()),
 	    goodRecruitmentTime(Never()), goodRemoteRecruitmentTime(Never()), datacenterVersionDifference(0),
 	    versionDifferenceUpdated(false), remoteDCMonitorStarted(false), remoteTransactionSystemDegraded(false),
-	    recruitDistributor(false), recruitRatekeeper(false), recruitBlobManager(false), recruitEncryptKeyProxy(false),
+	    recruitDistributor(false), recruitRatekeeper(false), recruitBlobManager(false), recruitBlobMigrator(false),
-	    recruitConsistencyScan(false), clusterControllerMetrics("ClusterController", id.toString()),
+	    recruitEncryptKeyProxy(false), recruitConsistencyScan(false),
 	    clusterControllerMetrics("ClusterController", id.toString()),
 	    openDatabaseRequests("OpenDatabaseRequests", clusterControllerMetrics),
 	    registerWorkerRequests("RegisterWorkerRequests", clusterControllerMetrics),
 	    getWorkersRequests("GetWorkersRequests", clusterControllerMetrics),
--- a/fdbserver/include/fdbserver/ConflictSet.h
+++ b/fdbserver/include/fdbserver/ConflictSet.h
@ -45,7 +45,7 @@ struct ConflictBatch {
 		TransactionCommitted,
 	};
-	void addTransaction(const CommitTransactionRef& transaction);
+	void addTransaction(const CommitTransactionRef& transaction, Version newOldestVersion);
 	void detectConflicts(Version now,
 	                     Version newOldestVersion,
 	                     std::vector<int>& nonConflicting,
--- a/fdbserver/include/fdbserver/DDTxnProcessor.h
+++ b/fdbserver/include/fdbserver/DDTxnProcessor.h
@ -44,11 +44,25 @@ public:
 	struct SourceServers {
 		std::vector<UID> srcServers, completeSources; // the same as RelocateData.src, RelocateData.completeSources;
 	};
 	struct DDRangeLocations {
 		DDRangeLocations() = default;
 		DDRangeLocations(KeyRangeRef range) : range(range) {}
 		// A map of dcId : list of servers
 		std::map<std::string, std::vector<StorageServerInterface>> servers;
 		KeyRange range;
 	};
 	virtual Database context() const = 0;
 	virtual bool isMocked() const = 0;
 	// get the source server list and complete source server list for range
 	virtual Future<SourceServers> getSourceServersForRange(const KeyRangeRef range) { return SourceServers{}; };
 	virtual Future<std::vector<DDRangeLocations>> getSourceServerInterfacesForRange(const KeyRangeRef range) {
 		return std::vector<DDRangeLocations>();
 	}
 	// get the storage server list and Process class, only throw transaction non-retryable exceptions
 	virtual Future<ServerWorkerInfos> getServerListAndProcessClasses() = 0;
@ -142,6 +156,9 @@ public:
 	Future<SourceServers> getSourceServersForRange(const KeyRangeRef range) override;
 	Future<std::vector<IDDTxnProcessor::DDRangeLocations>> getSourceServerInterfacesForRange(
 	    const KeyRangeRef range) override;
 	// Call NativeAPI implementation directly
 	Future<ServerWorkerInfos> getServerListAndProcessClasses() override;
--- a/fdbserver/include/fdbserver/DataDistributorInterface.h
+++ b/fdbserver/include/fdbserver/DataDistributorInterface.h
@ -37,6 +37,7 @@ struct DataDistributorInterface {
 	RequestStream<struct GetDataDistributorMetricsRequest> dataDistributorMetrics;
 	RequestStream<struct DistributorSplitRangeRequest> distributorSplitRange;
 	RequestStream<struct GetStorageWigglerStateRequest> storageWigglerState;
 	RequestStream<struct TriggerAuditRequest> triggerAudit;
 	DataDistributorInterface() {}
 	explicit DataDistributorInterface(const struct LocalityData& l, UID id) : locality(l), myId(id) {}
@ -58,7 +59,8 @@ struct DataDistributorInterface {
 		           distributorExclCheckReq,
 		           dataDistributorMetrics,
 		           distributorSplitRange,
-		           storageWigglerState);
+		           storageWigglerState,
 		           triggerAudit);
 	}
 };
--- a/fdbserver/include/fdbserver/EncryptionOpsUtils.h
+++ b/fdbserver/include/fdbserver/EncryptionOpsUtils.h
@ -27,8 +27,11 @@
 typedef enum { TLOG_ENCRYPTION = 0, STORAGE_SERVER_ENCRYPTION = 1, BLOB_GRANULE_ENCRYPTION = 2 } EncryptOperationType;
-inline bool isEncryptionOpSupported(EncryptOperationType operation_type, const ClientDBInfo& dbInfo) {
+inline bool isEncryptionOpSupported(EncryptOperationType operation_type) {
-	if (!dbInfo.isEncryptionEnabled) {
+	// We would check against dbInfo.isEncryptionEnabled instead, but the dbInfo may not be available before
 	// ClusterController broadcast the dbInfo to workers. Before the broadcast encryption may appear to be disabled
 	// when it should be enabled. Moving the encryption switch to DB config could fix the issue.
 	if (!SERVER_KNOBS->ENABLE_ENCRYPTION) {
 		return false;
 	}
--- a/fdbserver/include/fdbserver/GrvProxyTransactionTagThrottler.h
+++ b/fdbserver/include/fdbserver/GrvProxyTransactionTagThrottler.h
@ -0,0 +1,80 @@
 /*
 * GrvProxyTransactionTagThrottler.h
 *
 * This source file is part of the FoundationDB open source project
 *
 * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #pragma once
 #include "fdbclient/CommitProxyInterface.h"
 #include "fdbclient/TagThrottle.actor.h"
 #include "fdbserver/GrvTransactionRateInfo.h"
 // GrvProxyTransactionTagThrottler is used to throttle GetReadVersionRequests based on tag quotas
 // before they're pushed into priority-partitioned queues.
 //
 // A GrvTransactionRateInfo object and a request queue are maintained for each tag.
 // The GrvTransactionRateInfo object is used to determine when a request can be released.
 //
 // Between each set of waits, releaseTransactions is run, releasing queued transactions
 // that have passed the tag throttling stage. Transactions that are not yet ready
 // are requeued during releaseTransactions.
 class GrvProxyTransactionTagThrottler {
 	class DelayedRequest {
 		static uint64_t lastSequenceNumber;
 		double startTime;
 	public:
 		GetReadVersionRequest req;
 		uint64_t sequenceNumber;
 		explicit DelayedRequest(GetReadVersionRequest const& req)
 		  : req(req), startTime(now()), sequenceNumber(++lastSequenceNumber) {}
 		void updateProxyTagThrottledDuration();
 	};
 	struct TagQueue {
 		Optional<GrvTransactionRateInfo> rateInfo;
 		Deque<DelayedRequest> requests;
 		TagQueue() = default;
 		explicit TagQueue(double rate) : rateInfo(rate) {}
 		void setRate(double rate);
 	};
 	// Track the budgets for each tag
 	TransactionTagMap<TagQueue> queues;
 public:
 	// Called with rates received from ratekeeper
 	void updateRates(TransactionTagMap<double> const& newRates);
 	// elapsed indicates the amount of time since the last epoch was run.
 	// If a request is ready to be executed, it is sent to the deque
 	// corresponding to its priority. If not, the request remains queued.
 	void releaseTransactions(double elapsed,
 	                         SpannedDeque<GetReadVersionRequest>& outBatchPriority,
 	                         SpannedDeque<GetReadVersionRequest>& outDefaultPriority);
 	void addRequest(GetReadVersionRequest const&);
 public: // testing
 	// Returns number of tags tracked
 	uint32_t size();
 };
--- a/fdbserver/include/fdbserver/GrvTransactionRateInfo.h
+++ b/fdbserver/include/fdbserver/GrvTransactionRateInfo.h
@ -0,0 +1,69 @@
 /*
 * GrvTransactionRateInfo.h
 *
 * This source file is part of the FoundationDB open source project
 *
 * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #pragma once
 #include "fdbrpc/Smoother.h"
 // Used by GRV Proxy to enforce rate limits received from the Ratekeeper.
 //
 // Between waits, the GrvTransactionRateInfo executes a "release window" starting
 // with a call to the startReleaseWindow method. Within this release window, transactions are
 // released while canStart returns true. At the end of the release window, the
 // endReleaseWindow method is called, and the budget is updated to add or
 // remove capacity.
 //
 // Meanwhile, the desired rate is updated through the setRate method.
 //
 // Smoothers are used to avoid turbulent throttling behaviour.
 class GrvTransactionRateInfo {
 	double rate = 0.0;
 	double limit{ 0.0 };
 	double budget{ 0.0 };
 	bool disabled{ true };
 	Smoother smoothRate;
 	Smoother smoothReleased;
 public:
 	explicit GrvTransactionRateInfo(double rate = 0.0);
 	// Determines the number of transactions that this proxy is allowed to release
 	// in this release window.
 	void startReleaseWindow();
 	// Checks if a "count" new transactions can be released, given that
 	// "numAlreadyStarted" transactions have already been released in the
 	// current release window.
 	bool canStart(int64_t numAlreadyStarted, int64_t count) const;
 	// Updates the budget to accumulate any extra capacity available or remove any excess that was used.
 	// Call at the end of a release window.
 	void endReleaseWindow(int64_t numStartedAtPriority, bool queueEmptyAtPriority, double elapsed);
 	// Smoothly sets rate. If currently disabled, reenable
 	void setRate(double rate);
 	// Smoothly sets transaction rate to 0. Call disable when new rates have not been
 	// set for a sufficiently long period of time.
 	void disable();
 	double getRate() const { return rate; }
 	double getLimit() const { return limit; }
 };
--- a/fdbserver/include/fdbserver/IEncryptionKeyProvider.actor.h
+++ b/fdbserver/include/fdbserver/IEncryptionKeyProvider.actor.h
@ -1,299 +0,0 @@
 /*
 * IEncryptionKeyProvider.actor.h
 *
 * This source file is part of the FoundationDB open source project
 *
 * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "fdbclient/BlobCipher.h"
 #if defined(NO_INTELLISENSE) && !defined(FDBSERVER_IENCRYPTIONKEYPROVIDER_ACTOR_G_H)
 #define FDBSERVER_IENCRYPTIONKEYPROVIDER_ACTOR_G_H
 #include "fdbserver/IEncryptionKeyProvider.actor.g.h"
 #elif !defined(FDBSERVER_IENCRYPTIONKEYPROVIDER_ACTOR_H)
 #define FDBSERVER_IENCRYPTIONKEYPROVIDER_ACTOR_H
 #include "fdbclient/GetEncryptCipherKeys.actor.h"
 #include "fdbclient/Tenant.h"
 #include "fdbserver/EncryptionOpsUtils.h"
 #include "fdbserver/ServerDBInfo.h"
 #include "flow/Arena.h"
 #include "flow/EncryptUtils.h"
 #define XXH_INLINE_ALL
 #include "flow/xxhash.h"
 #include "flow/actorcompiler.h" // This must be the last #include.
 typedef uint64_t XOREncryptionKeyID;
 // EncryptionKeyRef is somewhat multi-variant, it will contain members representing the union
 // of all fields relevant to any implemented encryption scheme.  They are generally of
 // the form
 //   Page Fields - fields which come from or are stored in the Page
 //   Secret Fields - fields which are only known by the Key Provider
 // but it is up to each encoding and provider which fields are which and which ones are used
 //
 // TODO(yiwu): Rename and/or refactor this struct. It doesn't sound like an encryption key should
 // contain page fields like encryption header.
 struct EncryptionKeyRef {
 	EncryptionKeyRef(){};
 	EncryptionKeyRef(Arena& arena, const EncryptionKeyRef& toCopy)
 	  : cipherKeys(toCopy.cipherKeys), secret(arena, toCopy.secret), id(toCopy.id) {}
 	int expectedSize() const { return secret.size(); }
 	// Fields for AESEncryptionV1
 	TextAndHeaderCipherKeys cipherKeys;
 	Optional<BlobCipherEncryptHeader> cipherHeader;
 	// Fields for XOREncryption_TestOnly
 	StringRef secret;
 	Optional<XOREncryptionKeyID> id;
 };
 typedef Standalone<EncryptionKeyRef> EncryptionKey;
 // Interface used by pager to get encryption keys reading pages from disk
 // and by the BTree to get encryption keys to use for new pages
 class IEncryptionKeyProvider : public ReferenceCounted<IEncryptionKeyProvider> {
 public:
 	virtual ~IEncryptionKeyProvider() {}
 	// Get an EncryptionKey with Secret Fields populated based on the given Page Fields.
 	// It is up to the implementation which fields those are.
 	// The output Page Fields must match the input Page Fields.
 	virtual Future<EncryptionKey> getSecrets(const EncryptionKeyRef& key) = 0;
 	// Get encryption key that should be used for a given user Key-Value range
 	virtual Future<EncryptionKey> getByRange(const KeyRef& begin, const KeyRef& end) = 0;
 	// Setting tenant prefix to tenant name map.
 	virtual void setTenantPrefixIndex(Reference<TenantPrefixIndex> tenantPrefixIndex) {}
 	virtual bool shouldEnableEncryption() const = 0;
 };
 // The null key provider is useful to simplify page decoding.
 // It throws an error for any key info requested.
 class NullKeyProvider : public IEncryptionKeyProvider {
 public:
 	virtual ~NullKeyProvider() {}
 	bool shouldEnableEncryption() const override { return true; }
 	Future<EncryptionKey> getSecrets(const EncryptionKeyRef& key) override { throw encryption_key_not_found(); }
 	Future<EncryptionKey> getByRange(const KeyRef& begin, const KeyRef& end) override {
 		throw encryption_key_not_found();
 	}
 };
 // Key provider for dummy XOR encryption scheme
 class XOREncryptionKeyProvider_TestOnly : public IEncryptionKeyProvider {
 public:
 	XOREncryptionKeyProvider_TestOnly(std::string filename) {
 		ASSERT(g_network->isSimulated());
 		// Choose a deterministic random filename (without path) byte for secret generation
 		// Remove any leading directory names
 		size_t lastSlash = filename.find_last_of("\\/");
 		if (lastSlash != filename.npos) {
 			filename.erase(0, lastSlash);
 		}
 		xorWith = filename.empty() ? 0x5e
 		                           : (uint8_t)filename[XXH3_64bits(filename.data(), filename.size()) % filename.size()];
 	}
 	virtual ~XOREncryptionKeyProvider_TestOnly() {}
 	bool shouldEnableEncryption() const override { return true; }
 	Future<EncryptionKey> getSecrets(const EncryptionKeyRef& key) override {
 		if (!key.id.present()) {
 			throw encryption_key_not_found();
 		}
 		EncryptionKey s = key;
 		uint8_t secret = ~(uint8_t)key.id.get() ^ xorWith;
 		s.secret = StringRef(s.arena(), &secret, 1);
 		return s;
 	}
 	Future<EncryptionKey> getByRange(const KeyRef& begin, const KeyRef& end) override {
 		EncryptionKeyRef k;
 		k.id = end.empty() ? 0 : *(end.end() - 1);
 		return getSecrets(k);
 	}
 	uint8_t xorWith;
 };
 // Key provider to provider cipher keys randomly from a pre-generated pool. Use for testing.
 class RandomEncryptionKeyProvider : public IEncryptionKeyProvider {
 public:
 	RandomEncryptionKeyProvider() {
 		for (unsigned i = 0; i < NUM_CIPHER; i++) {
 			BlobCipherDetails cipherDetails;
 			cipherDetails.encryptDomainId = i;
 			cipherDetails.baseCipherId = deterministicRandom()->randomUInt64();
 			cipherDetails.salt = deterministicRandom()->randomUInt64();
 			cipherKeys[i] = generateCipherKey(cipherDetails);
 		}
 	}
 	virtual ~RandomEncryptionKeyProvider() = default;
 	bool shouldEnableEncryption() const override { return true; }
 	Future<EncryptionKey> getSecrets(const EncryptionKeyRef& key) override {
 		ASSERT(key.cipherHeader.present());
 		EncryptionKey s = key;
 		s.cipherKeys.cipherTextKey = cipherKeys[key.cipherHeader.get().cipherTextDetails.encryptDomainId];
 		s.cipherKeys.cipherHeaderKey = cipherKeys[key.cipherHeader.get().cipherHeaderDetails.encryptDomainId];
 		return s;
 	}
 	Future<EncryptionKey> getByRange(const KeyRef& /*begin*/, const KeyRef& /*end*/) override {
 		EncryptionKey s;
 		s.cipherKeys.cipherTextKey = getRandomCipherKey();
 		s.cipherKeys.cipherHeaderKey = getRandomCipherKey();
 		return s;
 	}
 private:
 	Reference<BlobCipherKey> generateCipherKey(const BlobCipherDetails& cipherDetails) {
 		static unsigned char SHA_KEY[] = "3ab9570b44b8315fdb261da6b1b6c13b";
 		uint8_t digest[AUTH_TOKEN_SIZE];
 		computeAuthToken(reinterpret_cast<const unsigned char*>(&cipherDetails.baseCipherId),
 		                 sizeof(EncryptCipherBaseKeyId),
 		                 SHA_KEY,
 		                 AES_256_KEY_LENGTH,
 		                 &digest[0],
 		                 AUTH_TOKEN_SIZE);
 		return makeReference<BlobCipherKey>(cipherDetails.encryptDomainId,
 		                                    cipherDetails.baseCipherId,
 		                                    &digest[0],
 		                                    AES_256_KEY_LENGTH,
 		                                    cipherDetails.salt,
 		                                    std::numeric_limits<int64_t>::max() /* refreshAt */,
 		                                    std::numeric_limits<int64_t>::max() /* expireAt */);
 	}
 	Reference<BlobCipherKey> getRandomCipherKey() {
 		return cipherKeys[deterministicRandom()->randomInt(0, NUM_CIPHER)];
 	}
 	static constexpr int NUM_CIPHER = 1000;
 	Reference<BlobCipherKey> cipherKeys[NUM_CIPHER];
 };
 // Key provider which extract tenant id from range key prefixes, and fetch tenant specific encryption keys from
 // EncryptKeyProxy.
 class TenantAwareEncryptionKeyProvider : public IEncryptionKeyProvider {
 public:
 	TenantAwareEncryptionKeyProvider(Reference<AsyncVar<ServerDBInfo> const> db) : db(db) {}
 	virtual ~TenantAwareEncryptionKeyProvider() = default;
 	bool shouldEnableEncryption() const override {
 		return isEncryptionOpSupported(EncryptOperationType::STORAGE_SERVER_ENCRYPTION, db->get().client);
 	}
 	ACTOR static Future<EncryptionKey> getSecrets(TenantAwareEncryptionKeyProvider* self, EncryptionKeyRef key) {
 		if (!key.cipherHeader.present()) {
 			TraceEvent("TenantAwareEncryptionKeyProvider_CipherHeaderMissing");
 			throw encrypt_ops_error();
 		}
 		TextAndHeaderCipherKeys cipherKeys =
 		    wait(getEncryptCipherKeys(self->db, key.cipherHeader.get(), BlobCipherMetrics::KV_REDWOOD));
 		EncryptionKey s = key;
 		s.cipherKeys = cipherKeys;
 		return s;
 	}
 	Future<EncryptionKey> getSecrets(const EncryptionKeyRef& key) override { return getSecrets(this, key); }
 	ACTOR static Future<EncryptionKey> getByRange(TenantAwareEncryptionKeyProvider* self, KeyRef begin, KeyRef end) {
 		EncryptCipherDomainNameRef domainName;
 		EncryptCipherDomainId domainId = self->getEncryptionDomainId(begin, end, &domainName);
 		TextAndHeaderCipherKeys cipherKeys =
 		    wait(getLatestEncryptCipherKeysForDomain(self->db, domainId, domainName, BlobCipherMetrics::KV_REDWOOD));
 		EncryptionKey s;
 		s.cipherKeys = cipherKeys;
 		return s;
 	}
 	Future<EncryptionKey> getByRange(const KeyRef& begin, const KeyRef& end) override {
 		return getByRange(this, begin, end);
 	}
 	void setTenantPrefixIndex(Reference<TenantPrefixIndex> tenantPrefixIndex) override {
 		ASSERT(tenantPrefixIndex.isValid());
 		this->tenantPrefixIndex = tenantPrefixIndex;
 	}
 private:
 	EncryptCipherDomainId getEncryptionDomainId(const KeyRef& begin,
 	                                            const KeyRef& end,
 	                                            EncryptCipherDomainNameRef* domainName) {
 		int64_t domainId = SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID;
 		int64_t beginTenantId = getTenantId(begin, true /*inclusive*/);
 		int64_t endTenantId = getTenantId(end, false /*inclusive*/);
 		if (beginTenantId == endTenantId && beginTenantId != SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID) {
 			ASSERT(tenantPrefixIndex.isValid());
 			Key tenantPrefix = TenantMapEntry::idToPrefix(beginTenantId);
 			auto view = tenantPrefixIndex->atLatest();
 			auto itr = view.find(tenantPrefix);
 			if (itr != view.end()) {
 				*domainName = *itr;
 				domainId = beginTenantId;
 			} else {
 				// No tenant with the same tenant id. We could be in optional or disabled tenant mode.
 			}
 		}
 		if (domainId == SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID) {
 			*domainName = FDB_SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_NAME;
 		}
 		return domainId;
 	}
 	int64_t getTenantId(const KeyRef& key, bool inclusive) {
 		// A valid tenant id is always a valid encrypt domain id.
 		static_assert(INVALID_ENCRYPT_DOMAIN_ID == -1);
 		if (key.size() && key >= systemKeys.begin) {
 			return SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID;
 		}
 		if (key.size() < TENANT_PREFIX_SIZE) {
 			// Encryption domain information not available, leverage 'default encryption domain'
 			return FDB_DEFAULT_ENCRYPT_DOMAIN_ID;
 		}
 		StringRef prefix = key.substr(0, TENANT_PREFIX_SIZE);
 		int64_t tenantId = TenantMapEntry::prefixToId(prefix, EnforceValidTenantId::False);
 		if (tenantId == TenantInfo::INVALID_TENANT) {
 			// Encryption domain information not available, leverage 'default encryption domain'
 			return FDB_DEFAULT_ENCRYPT_DOMAIN_ID;
 		}
 		if (!inclusive && key.size() == TENANT_PREFIX_SIZE) {
 			tenantId = tenantId - 1;
 		}
 		ASSERT(tenantId >= 0);
 		return tenantId;
 	}
 	Reference<AsyncVar<ServerDBInfo> const> db;
 	Reference<TenantPrefixIndex> tenantPrefixIndex;
 };
 #include "flow/unactorcompiler.h"
 #endif
--- a/fdbserver/include/fdbserver/IPageEncryptionKeyProvider.actor.h
+++ b/fdbserver/include/fdbserver/IPageEncryptionKeyProvider.actor.h
@ -294,7 +294,7 @@ public:
 	EncodingType expectedEncodingType() const override { return EncodingType::AESEncryptionV1; }
 	bool enableEncryption() const override {
-		return isEncryptionOpSupported(EncryptOperationType::STORAGE_SERVER_ENCRYPTION, db->get().client);
+		return isEncryptionOpSupported(EncryptOperationType::STORAGE_SERVER_ENCRYPTION);
 	}
 	bool enableEncryptionDomain() const override { return SERVER_KNOBS->REDWOOD_SPLIT_ENCRYPTED_PAGES_BY_TENANT; }
--- a/fdbserver/include/fdbserver/KmsConnectorInterface.h
+++ b/fdbserver/include/fdbserver/KmsConnectorInterface.h
@ -186,6 +186,8 @@ struct KmsConnLookupDomainIdsReqInfoRef {
 	EncryptCipherDomainNameRef domainName;
 	KmsConnLookupDomainIdsReqInfoRef() : domainId(INVALID_ENCRYPT_DOMAIN_ID) {}
 	explicit KmsConnLookupDomainIdsReqInfoRef(Arena& arena, const KmsConnLookupDomainIdsReqInfoRef& from)
 	  : domainId(from.domainId), domainName(StringRef(arena, from.domainName)) {}
 	explicit KmsConnLookupDomainIdsReqInfoRef(Arena& arena, const EncryptCipherDomainId dId, StringRef name)
 	  : domainId(dId), domainName(StringRef(arena, name)) {}
 	explicit KmsConnLookupDomainIdsReqInfoRef(const EncryptCipherDomainId dId, StringRef name)
--- a/fdbserver/include/fdbserver/ProxyCommitData.actor.h
+++ b/fdbserver/include/fdbserver/ProxyCommitData.actor.h
@ -294,8 +294,7 @@ struct ProxyCommitData {
 	    cx(openDBOnServer(db, TaskPriority::DefaultEndpoint, LockAware::True)), db(db),
 	    singleKeyMutationEvent("SingleKeyMutation"_sr), lastTxsPop(0), popRemoteTxs(false), lastStartCommit(0),
 	    lastCommitLatency(SERVER_KNOBS->REQUIRED_MIN_RECOVERY_DURATION), lastCommitTime(0), lastMasterReset(now()),
-	    lastResolverReset(now()),
+	    lastResolverReset(now()), isEncryptionEnabled(isEncryptionOpSupported(EncryptOperationType::TLOG_ENCRYPTION)) {
 	    isEncryptionEnabled(isEncryptionOpSupported(EncryptOperationType::TLOG_ENCRYPTION, db->get().client)) {
 		commitComputePerOperation.resize(SERVER_KNOBS->PROXY_COMPUTE_BUCKETS, 0.0);
 	}
 };
--- a/fdbserver/include/fdbserver/RatekeeperInterface.h
+++ b/fdbserver/include/fdbserver/RatekeeperInterface.h
@ -81,7 +81,7 @@ struct GetRateInfoReply {
 	// Depending on the value of SERVER_KNOBS->ENFORCE_TAG_THROTTLING_ON_PROXIES,
 	// one of these fields may be populated
 	Optional<PrioritizedTransactionTagMap<ClientTagThrottleLimits>> clientThrottledTags;
-	Optional<PrioritizedTransactionTagMap<double>> proxyThrottledTags;
+	Optional<TransactionTagMap<double>> proxyThrottledTags;
 	template <class Ar>
 	void serialize(Ar& ar) {
--- a/fdbserver/include/fdbserver/ServerDBInfo.actor.h
+++ b/fdbserver/include/fdbserver/ServerDBInfo.actor.h
@ -26,12 +26,13 @@
 #define FDBSERVER_SERVERDBINFO_H
 #pragma once
 #include "fdbclient/ConsistencyScanInterface.actor.h"
 #include "fdbserver/DataDistributorInterface.h"
 #include "fdbserver/MasterInterface.h"
 #include "fdbserver/LogSystemConfig.h"
 #include "fdbserver/RatekeeperInterface.h"
 #include "fdbserver/BlobManagerInterface.h"
-#include "fdbclient/ConsistencyScanInterface.actor.h"
+#include "fdbserver/BlobMigratorInterface.h"
 #include "fdbserver/RecoveryState.h"
 #include "fdbserver/LatencyBandConfig.h"
 #include "fdbserver/WorkerInterface.actor.h"
@ -50,6 +51,7 @@ struct ServerDBInfo {
 	MasterInterface master; // The best guess as to the most recent master, which might still be recovering
 	Optional<RatekeeperInterface> ratekeeper;
 	Optional<BlobManagerInterface> blobManager;
 	Optional<BlobMigratorInterface> blobMigrator;
 	Optional<EncryptKeyProxyInterface> encryptKeyProxy;
 	Optional<ConsistencyScanInterface> consistencyScan;
 	std::vector<ResolverInterface> resolvers;
@ -84,6 +86,7 @@ struct ServerDBInfo {
 		           master,
 		           ratekeeper,
 		           blobManager,
 		           blobMigrator,
 		           encryptKeyProxy,
 		           consistencyScan,
 		           resolvers,
--- a/fdbserver/include/fdbserver/TagThrottler.h
+++ b/fdbserver/include/fdbserver/TagThrottler.h
@ -42,7 +42,7 @@ public:
 	// For each tag and priority combination, return the throughput limit for the cluster
 	// (to be shared across all GRV proxies)
-	virtual PrioritizedTransactionTagMap<double> getProxyRates(int numProxies) = 0;
+	virtual TransactionTagMap<double> getProxyRates(int numProxies) = 0;
 	virtual int64_t autoThrottleCount() const = 0;
 	virtual uint32_t busyReadTagCount() const = 0;
@ -66,7 +66,7 @@ public:
 	void addRequests(TransactionTag tag, int count) override;
 	uint64_t getThrottledTagChangeId() const override;
 	PrioritizedTransactionTagMap<ClientTagThrottleLimits> getClientRates() override;
-	PrioritizedTransactionTagMap<double> getProxyRates(int numProxies) override { throw not_implemented(); }
+	TransactionTagMap<double> getProxyRates(int numProxies) override { throw not_implemented(); }
 	int64_t autoThrottleCount() const override;
 	uint32_t busyReadTagCount() const override;
 	uint32_t busyWriteTagCount() const override;
@ -94,7 +94,7 @@ public:
 	Future<Void> tryUpdateAutoThrottling(StorageQueueInfo const&) override;
 	PrioritizedTransactionTagMap<ClientTagThrottleLimits> getClientRates() override;
-	PrioritizedTransactionTagMap<double> getProxyRates(int numProxies) override;
+	TransactionTagMap<double> getProxyRates(int numProxies) override;
 	// Testing only:
 public:
--- a/fdbserver/include/fdbserver/WorkerInterface.actor.h
+++ b/fdbserver/include/fdbserver/WorkerInterface.actor.h
@ -33,6 +33,7 @@
 #include "fdbserver/RatekeeperInterface.h"
 #include "fdbclient/ConsistencyScanInterface.actor.h"
 #include "fdbserver/BlobManagerInterface.h"
 #include "fdbserver/BlobMigratorInterface.h"
 #include "fdbserver/ResolverInterface.h"
 #include "fdbclient/BlobWorkerInterface.h"
 #include "fdbclient/ClientBooleanParams.h"
@ -59,6 +60,7 @@ struct WorkerInterface {
 	RequestStream<struct InitializeBlobManagerRequest> blobManager;
 	RequestStream<struct InitializeBlobWorkerRequest> blobWorker;
 	RequestStream<struct InitializeConsistencyScanRequest> consistencyScan;
 	RequestStream<struct InitializeBlobMigratorRequest> blobMigrator;
 	RequestStream<struct InitializeResolverRequest> resolver;
 	RequestStream<struct InitializeStorageRequest> storage;
 	RequestStream<struct InitializeLogRouterRequest> logRouter;
@ -115,6 +117,7 @@ struct WorkerInterface {
 		           blobManager,
 		           blobWorker,
 		           consistencyScan,
 		           blobMigrator,
 		           resolver,
 		           storage,
 		           logRouter,
@ -430,6 +433,7 @@ struct RegisterWorkerRequest {
 	Optional<DataDistributorInterface> distributorInterf;
 	Optional<RatekeeperInterface> ratekeeperInterf;
 	Optional<BlobManagerInterface> blobManagerInterf;
 	Optional<BlobMigratorInterface> blobMigratorInterf;
 	Optional<EncryptKeyProxyInterface> encryptKeyProxyInterf;
 	Optional<ConsistencyScanInterface> consistencyScanInterf;
 	Standalone<VectorRef<StringRef>> issues;
@ -452,6 +456,7 @@ struct RegisterWorkerRequest {
 	                      Optional<DataDistributorInterface> ddInterf,
 	                      Optional<RatekeeperInterface> rkInterf,
 	                      Optional<BlobManagerInterface> bmInterf,
 	                      Optional<BlobMigratorInterface> mgInterf,
 	                      Optional<EncryptKeyProxyInterface> ekpInterf,
 	                      Optional<ConsistencyScanInterface> csInterf,
 	                      bool degraded,
@ -461,9 +466,10 @@ struct RegisterWorkerRequest {
 	                      ConfigBroadcastInterface configBroadcastInterface)
 	  : wi(wi), initialClass(initialClass), processClass(processClass), priorityInfo(priorityInfo),
 	    generation(generation), distributorInterf(ddInterf), ratekeeperInterf(rkInterf), blobManagerInterf(bmInterf),
-	    encryptKeyProxyInterf(ekpInterf), consistencyScanInterf(csInterf), degraded(degraded),
+	    blobMigratorInterf(mgInterf), encryptKeyProxyInterf(ekpInterf), consistencyScanInterf(csInterf),
-	    lastSeenKnobVersion(lastSeenKnobVersion), knobConfigClassSet(knobConfigClassSet), requestDbInfo(false),
+	    degraded(degraded), lastSeenKnobVersion(lastSeenKnobVersion), knobConfigClassSet(knobConfigClassSet),
-	    recoveredDiskFiles(recoveredDiskFiles), configBroadcastInterface(configBroadcastInterface) {}
+	    requestDbInfo(false), recoveredDiskFiles(recoveredDiskFiles),
 	    configBroadcastInterface(configBroadcastInterface) {}
 	template <class Ar>
 	void serialize(Ar& ar) {
@ -476,6 +482,7 @@ struct RegisterWorkerRequest {
 		           distributorInterf,
 		           ratekeeperInterf,
 		           blobManagerInterf,
 		           blobMigratorInterf,
 		           encryptKeyProxyInterf,
 		           consistencyScanInterf,
 		           issues,
@ -762,6 +769,19 @@ struct InitializeBlobManagerRequest {
 	}
 };
 struct InitializeBlobMigratorRequest {
 	constexpr static FileIdentifier file_identifier = 7932681;
 	UID reqId;
 	ReplyPromise<BlobMigratorInterface> reply;
 	InitializeBlobMigratorRequest() {}
 	explicit InitializeBlobMigratorRequest(UID uid) : reqId(uid) {}
 	template <class Ar>
 	void serialize(Ar& ar) {
 		serializer(ar, reqId, reply);
 	}
 };
 struct InitializeResolverRequest {
 	constexpr static FileIdentifier file_identifier = 7413317;
 	LifetimeToken masterLifetime;
@ -1006,6 +1026,7 @@ struct Role {
 	static const Role RATEKEEPER;
 	static const Role BLOB_MANAGER;
 	static const Role BLOB_WORKER;
 	static const Role BLOB_MIGRATOR;
 	static const Role STORAGE_CACHE;
 	static const Role COORDINATOR;
 	static const Role BACKUP;
@ -1042,6 +1063,8 @@ struct Role {
 			return BLOB_MANAGER;
 		case ProcessClass::BlobWorker:
 			return BLOB_WORKER;
 		case ProcessClass::BlobMigrator:
 			return BLOB_MIGRATOR;
 		case ProcessClass::StorageCache:
 			return STORAGE_CACHE;
 		case ProcessClass::Backup:
@ -1173,6 +1196,7 @@ ACTOR Future<Void> dataDistributor(DataDistributorInterface ddi, Reference<Async
 ACTOR Future<Void> ratekeeper(RatekeeperInterface rki, Reference<AsyncVar<ServerDBInfo> const> db);
 ACTOR Future<Void> consistencyScan(ConsistencyScanInterface csInterf, Reference<AsyncVar<ServerDBInfo> const> dbInfo);
 ACTOR Future<Void> blobManager(BlobManagerInterface bmi, Reference<AsyncVar<ServerDBInfo> const> db, int64_t epoch);
 ACTOR Future<Void> blobMigrator(BlobMigratorInterface mgi, Reference<AsyncVar<ServerDBInfo> const> db);
 ACTOR Future<Void> storageCacheServer(StorageServerInterface interf,
                                      uint16_t id,
                                      Reference<AsyncVar<ServerDBInfo> const> db);
--- a/fdbserver/include/fdbserver/workloads/workloads.actor.h
+++ b/fdbserver/include/fdbserver/workloads/workloads.actor.h
@ -64,6 +64,8 @@ struct WorkloadContext {
 };
 struct TestWorkload : NonCopyable, WorkloadContext, ReferenceCounted<TestWorkload> {
 	// Implementations of TestWorkload need to provide their name by defining a static member variable called name:
 	// static constexpr const char* name = "WorkloadName";
 	int phases;
 	// Subclasses are expected to also have a constructor with this signature (to work with WorkloadFactory<>):
@ -75,6 +77,8 @@ struct TestWorkload : NonCopyable, WorkloadContext, ReferenceCounted<TestWorkloa
 	}
 	virtual ~TestWorkload(){};
 	virtual Future<Void> initialized() { return Void(); }
 	// WARNING: this method must not be implemented by a workload directly. Instead, this will be implemented by
 	// the workload factory. Instead, provide a static member variable called name.
 	virtual std::string description() const = 0;
 	virtual void disableFailureInjectionWorkloads(std::set<std::string>& out) const;
 	virtual Future<Void> setup(Database const& cx) { return Void(); }
@ -94,11 +98,26 @@ private:
 	virtual void getMetrics(std::vector<PerfMetric>& m) = 0;
 };
 struct NoOptions {};
 template <class Workload, bool isFailureInjectionWorkload = false>
 struct TestWorkloadImpl : Workload {
 	static_assert(std::is_convertible_v<Workload&, TestWorkload&>);
 	static_assert(std::is_convertible_v<decltype(Workload::NAME), std::string>,
 	              "Workload must have a static member `name` which is convertible to string");
 	static_assert(std::is_same_v<decltype(&TestWorkload::description), decltype(&Workload::description)>,
 	              "Workload must not override TestWorkload::description");
 	TestWorkloadImpl(WorkloadContext const& wcx) : Workload(wcx) {}
 	template <bool E = isFailureInjectionWorkload>
 	TestWorkloadImpl(WorkloadContext const& wcx, std::enable_if_t<E, NoOptions> o) : Workload(wcx, o) {}
 	std::string description() const override { return Workload::NAME; }
 };
 struct CompoundWorkload;
 class DeterministicRandom;
 struct NoOptions {};
 struct FailureInjectionWorkload : TestWorkload {
 	FailureInjectionWorkload(WorkloadContext const&);
 	virtual ~FailureInjectionWorkload() {}
@ -126,12 +145,11 @@ struct FailureInjectorFactory : IFailureInjectorFactory {
 		IFailureInjectorFactory::factories().push_back(Reference<IFailureInjectorFactory>::addRef(this));
 	}
 	Reference<FailureInjectionWorkload> create(WorkloadContext const& wcx) override {
-		return makeReference<W>(wcx, NoOptions());
+		return makeReference<TestWorkloadImpl<W, true>>(wcx, NoOptions());
 	}
 };
 struct CompoundWorkload : TestWorkload {
 	bool runFailureWorkloads = true;
 	std::vector<Reference<TestWorkload>> workloads;
 	std::vector<Reference<FailureInjectionWorkload>> failureInjection;
@ -213,14 +231,20 @@ struct IWorkloadFactory : ReferenceCounted<IWorkloadFactory> {
 	virtual Reference<TestWorkload> create(WorkloadContext const& wcx) = 0;
 };
-template <class WorkloadType>
+FDB_DECLARE_BOOLEAN_PARAM(UntrustedMode);
 template <class Workload>
 struct WorkloadFactory : IWorkloadFactory {
-	bool asClient;
+	static_assert(std::is_convertible_v<decltype(Workload::NAME), std::string>,
-	WorkloadFactory(const char* name, bool asClient = false) : asClient(asClient) {
+	              "Each workload must have a Workload::NAME member");
-		factories()[name] = Reference<IWorkloadFactory>::addRef(this);
+	using WorkloadType = TestWorkloadImpl<Workload>;
 	bool runInUntrustedClient;
 	WorkloadFactory(UntrustedMode runInUntrustedClient = UntrustedMode::False)
 	  : runInUntrustedClient(runInUntrustedClient) {
 		factories()[WorkloadType::NAME] = Reference<IWorkloadFactory>::addRef(this);
 	}
 	Reference<TestWorkload> create(WorkloadContext const& wcx) override {
-		if (g_network->isSimulated() && asClient) {
+		if (g_network->isSimulated() && runInUntrustedClient) {
 			return makeReference<ClientWorkload>(
 			    [](WorkloadContext const& wcx) { return makeReference<WorkloadType>(wcx); }, wcx);
 		}
@ -228,7 +252,7 @@ struct WorkloadFactory : IWorkloadFactory {
 	}
 };
-#define REGISTER_WORKLOAD(classname) WorkloadFactory<classname> classname##WorkloadFactory(#classname)
+#define REGISTER_WORKLOAD(classname) WorkloadFactory<classname> classname##WorkloadFactory
 struct DistributedTestResults {
 	std::vector<PerfMetric> metrics;
--- a/fdbserver/storageserver.actor.cpp
+++ b/fdbserver/storageserver.actor.cpp
@ -28,6 +28,7 @@
 #include "fdbrpc/TenantInfo.h"
 #include "flow/ApiVersion.h"
 #include "fmt/format.h"
 #include "fdbclient/Audit.h"
 #include "fdbclient/CommitTransaction.h"
 #include "fdbclient/FDBTypes.h"
 #include "fdbrpc/fdbrpc.h"
@ -1016,6 +1017,8 @@ public:
 	FlowLock serveFetchCheckpointParallelismLock;
 	FlowLock serveAuditStorageParallelismLock;
 	int64_t instanceID;
 	Promise<Void> otherError;
@ -1225,6 +1228,12 @@ public:
 			specialCounter(cc, "ServeFetchCheckpointWaiting", [self]() {
 				return self->serveFetchCheckpointParallelismLock.waiters();
 			});
 			specialCounter(cc, "ServeValidateStorageActive", [self]() {
 				return self->serveAuditStorageParallelismLock.activePermits();
 			});
 			specialCounter(cc, "ServeValidateStorageWaiting", [self]() {
 				return self->serveAuditStorageParallelismLock.waiters();
 			});
 			specialCounter(
 			    cc, "ChangeFeedDiskReadsActive", [self]() { return self->changeFeedDiskReadsLock.activePermits(); });
 			specialCounter(
@ -1291,6 +1300,7 @@ public:
 	    changeFeedDiskReadsLock(SERVER_KNOBS->CHANGE_FEED_DISK_READS_PARALLELISM),
 	    fetchKeysBytesBudget(SERVER_KNOBS->STORAGE_FETCH_BYTES), fetchKeysBudgetUsed(false),
 	    serveFetchCheckpointParallelismLock(SERVER_KNOBS->SERVE_FETCH_CHECKPOINT_PARALLELISM),
 	    serveAuditStorageParallelismLock(SERVER_KNOBS->SERVE_AUDIT_STORAGE_PARALLELISM),
 	    instanceID(deterministicRandom()->randomUniqueID().first()), shuttingDown(false), behind(false),
 	    versionBehind(false), debug_inApplyUpdate(false), debug_lastValidateTime(0), lastBytesInputEBrake(0),
 	    lastDurableVersionEBrake(0), maxQueryQueue(0), transactionTagCounter(ssi.id()),
@ -2864,20 +2874,6 @@ ACTOR Future<std::pair<ChangeFeedStreamReply, bool>> getChangeFeedMutations(Stor
 		}
 	}
 	if (DEBUG_CF_TRACE) {
 		TraceEvent(SevDebug, "ChangeFeedMutationsDone", data->thisServerID)
 		    .detail("FeedID", req.rangeID)
 		    .detail("StreamUID", streamUID)
 		    .detail("Range", req.range)
 		    .detail("Begin", req.begin)
 		    .detail("End", req.end)
 		    .detail("FirstVersion", reply.mutations.empty() ? invalidVersion : reply.mutations.front().version)
 		    .detail("LastVersion", reply.mutations.empty() ? invalidVersion : reply.mutations.back().version)
 		    .detail("Count", reply.mutations.size())
 		    .detail("GotAll", gotAll)
 		    .detail("PeerAddr", req.reply.getEndpoint().getPrimaryAddress());
 	}
 	if (DEBUG_CF_MISSING(req.rangeID, req.range, req.begin, reply.mutations.back().version) && !req.canReadPopped) {
 		bool foundVersion = false;
 		bool foundKey = false;
@ -2929,6 +2925,21 @@ ACTOR Future<std::pair<ChangeFeedStreamReply, bool>> getChangeFeedMutations(Stor
 	reply.popVersion = feedInfo->emptyVersion + 1;
 	if (DEBUG_CF_TRACE) {
 		TraceEvent(SevDebug, "ChangeFeedMutationsDone", data->thisServerID)
 		    .detail("FeedID", req.rangeID)
 		    .detail("StreamUID", streamUID)
 		    .detail("Range", req.range)
 		    .detail("Begin", req.begin)
 		    .detail("End", req.end)
 		    .detail("FirstVersion", reply.mutations.empty() ? invalidVersion : reply.mutations.front().version)
 		    .detail("LastVersion", reply.mutations.empty() ? invalidVersion : reply.mutations.back().version)
 		    .detail("PopVersion", reply.popVersion)
 		    .detail("Count", reply.mutations.size())
 		    .detail("GotAll", gotAll)
 		    .detail("PeerAddr", req.reply.getEndpoint().getPrimaryAddress());
 	}
 	// If the SS's version advanced at all during any of the waits, the read from memory may have missed some
 	// mutations, so gotAll can only be true if data->version didn't change over the course of this actor
 	return std::make_pair(reply, gotAll);
@ -4138,6 +4149,322 @@ Key constructMappedKey(KeyValueRef* keyValue, std::vector<Optional<Tuple>>& vec,
 	return mappedKeyTuple.pack();
 }
 ACTOR Future<Void> validateRangeAgainstServer(StorageServer* data,
                                              KeyRange range,
                                              Version version,
                                              StorageServerInterface remoteServer) {
 	TraceEvent(SevInfo, "ValidateRangeAgainstServerBegin", data->thisServerID)
 	    .detail("Range", range)
 	    .detail("Version", version)
 	    .detail("RemoteServer", remoteServer.toString());
 	state int validatedKeys = 0;
 	state std::string error;
 	loop {
 		try {
 			std::vector<Future<ErrorOr<GetKeyValuesReply>>> fs;
 			int limit = 1e4;
 			int limitBytes = CLIENT_KNOBS->REPLY_BYTE_LIMIT;
 			GetKeyValuesRequest req;
 			req.begin = firstGreaterOrEqual(range.begin);
 			req.end = firstGreaterOrEqual(range.end);
 			req.limit = limit;
 			req.limitBytes = limitBytes;
 			req.version = version;
 			req.tags = TagSet();
 			fs.push_back(remoteServer.getKeyValues.getReplyUnlessFailedFor(req, 2, 0));
 			GetKeyValuesRequest localReq;
 			localReq.begin = firstGreaterOrEqual(range.begin);
 			localReq.end = firstGreaterOrEqual(range.end);
 			localReq.limit = limit;
 			localReq.limitBytes = limitBytes;
 			localReq.version = version;
 			localReq.tags = TagSet();
 			data->actors.add(getKeyValuesQ(data, localReq));
 			fs.push_back(errorOr(localReq.reply.getFuture()));
 			std::vector<ErrorOr<GetKeyValuesReply>> reps = wait(getAll(fs));
 			for (int i = 0; i < reps.size(); ++i) {
 				if (reps[i].isError()) {
 					TraceEvent(SevWarn, "ValidateRangeGetKeyValuesError", data->thisServerID)
 					    .errorUnsuppressed(reps[i].getError())
 					    .detail("ReplyIndex", i)
 					    .detail("Range", range);
 					throw reps[i].getError();
 				}
 				if (reps[i].get().error.present()) {
 					TraceEvent(SevWarn, "ValidateRangeGetKeyValuesError", data->thisServerID)
 					    .errorUnsuppressed(reps[i].get().error.get())
 					    .detail("ReplyIndex", i)
 					    .detail("Range", range);
 					throw reps[i].get().error.get();
 				}
 			}
 			GetKeyValuesReply remote = reps[0].get(), local = reps[1].get();
 			Key lastKey = range.begin;
 			const int end = std::min(local.data.size(), remote.data.size());
 			int i = 0;
 			for (; i < end; ++i) {
 				KeyValueRef remoteKV = remote.data[i];
 				KeyValueRef localKV = local.data[i];
 				if (!range.contains(remoteKV.key) || !range.contains(localKV.key)) {
 					TraceEvent(SevDebug, "SSValidateRangeKeyOutOfRange", data->thisServerID)
 					    .detail("Range", range)
 					    .detail("RemoteServer", remoteServer.toString().c_str())
 					    .detail("LocalKey", Traceable<StringRef>::toString(localKV.key).c_str())
 					    .detail("RemoteKey", Traceable<StringRef>::toString(remoteKV.key).c_str());
 					throw wrong_shard_server();
 				}
 				if (remoteKV.key != localKV.key) {
 					error = format("Key Mismatch: local server (%016llx): %s, remote server(%016llx) %s",
 					               data->thisServerID.first(),
 					               Traceable<StringRef>::toString(localKV.key).c_str(),
 					               remoteServer.uniqueID.first(),
 					               Traceable<StringRef>::toString(remoteKV.key).c_str());
 				} else if (remoteKV.value != localKV.value) {
 					error = format("Value Mismatch for Key %s: local server (%016llx): %s, remote server(%016llx) %s",
 					               Traceable<StringRef>::toString(localKV.key).c_str(),
 					               data->thisServerID.first(),
 					               Traceable<StringRef>::toString(localKV.value).c_str(),
 					               remoteServer.uniqueID.first(),
 					               Traceable<StringRef>::toString(remoteKV.value).c_str());
 				} else {
 					TraceEvent(SevVerbose, "ValidatedKey", data->thisServerID).detail("Key", localKV.key);
 					++validatedKeys;
 				}
 				lastKey = localKV.key;
 			}
 			if (!error.empty()) {
 				break;
 			}
 			if (!local.more && !remote.more && local.data.size() == remote.data.size()) {
 				break;
 			} else if (i >= local.data.size() && !local.more && i < remote.data.size()) {
 				error = format("Missing key(s) form local server (%lld), next key: %s, remote server(%016llx) ",
 				               data->thisServerID.first(),
 				               Traceable<StringRef>::toString(remote.data[i].key).c_str(),
 				               remoteServer.uniqueID.first());
 				break;
 			} else if (i >= remote.data.size() && !remote.more && i < local.data.size()) {
 				error = format("Missing key(s) form remote server (%lld), next local server(%016llx) key: %s",
 				               remoteServer.uniqueID.first(),
 				               data->thisServerID.first(),
 				               Traceable<StringRef>::toString(local.data[i].key).c_str());
 				break;
 			}
 			range = KeyRangeRef(keyAfter(lastKey), range.end);
 		} catch (Error& e) {
 			TraceEvent(SevWarnAlways, "ValidateRangeAgainstServerError", data->thisServerID)
 			    .errorUnsuppressed(e)
 			    .detail("RemoteServer", remoteServer.toString())
 			    .detail("Range", range)
 			    .detail("Version", version);
 			throw e;
 		}
 	}
 	if (!error.empty()) {
 		TraceEvent(SevError, "ValidateRangeAgainstServerError", data->thisServerID)
 		    .detail("Range", range)
 		    .detail("Version", version)
 		    .detail("ErrorMessage", error)
 		    .detail("RemoteServer", remoteServer.toString());
 	}
 	TraceEvent(SevDebug, "ValidateRangeAgainstServerEnd", data->thisServerID)
 	    .detail("Range", range)
 	    .detail("Version", version)
 	    .detail("ValidatedKeys", validatedKeys)
 	    .detail("Servers", remoteServer.toString());
 	return Void();
 }
 ACTOR Future<Void> validateRangeShard(StorageServer* data, KeyRange range, std::vector<UID> candidates) {
 	TraceEvent(SevDebug, "ServeValidateRangeShardBegin", data->thisServerID)
 	    .detail("Range", range)
 	    .detail("Servers", describe(candidates));
 	state Version version;
 	state std::vector<Optional<Value>> serverListValues;
 	state Transaction tr(data->cx);
 	tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
 	tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
 	loop {
 		try {
 			std::vector<Future<Optional<Value>>> serverListEntries;
 			for (const UID& id : candidates) {
 				serverListEntries.push_back(tr.get(serverListKeyFor(id)));
 			}
 			std::vector<Optional<Value>> serverListValues_ = wait(getAll(serverListEntries));
 			serverListValues = serverListValues_;
 			Version version_ = wait(tr.getReadVersion());
 			version = version_;
 			break;
 		} catch (Error& e) {
 			wait(tr.onError(e));
 		}
 	}
 	std::unordered_map<std::string, std::vector<StorageServerInterface>> ssis;
 	std::string thisDcId;
 	for (const auto& v : serverListValues) {
 		if (!v.present()) {
 			continue;
 		}
 		const StorageServerInterface ssi = decodeServerListValue(v.get());
 		if (ssi.uniqueID == data->thisServerID) {
 			thisDcId = ssi.locality.describeDcId();
 		}
 		ssis[ssi.locality.describeDcId()].push_back(ssi);
 	}
 	if (ssis.size() < 2) {
 		TraceEvent(SevWarn, "ServeValidateRangeShardNotHAConfig", data->thisServerID)
 		    .detail("Range", range)
 		    .detail("Servers", describe(candidates));
 		return Void();
 	}
 	StorageServerInterface* remoteServer = nullptr;
 	for (auto& [dcId, ssiList] : ssis) {
 		if (dcId != thisDcId) {
 			if (ssiList.empty()) {
 				break;
 			}
 			const int idx = deterministicRandom()->randomInt(0, ssiList.size());
 			remoteServer = &ssiList[idx];
 			break;
 		}
 	}
 	if (remoteServer != nullptr) {
 		wait(validateRangeAgainstServer(data, range, version, *remoteServer));
 	} else {
 		TraceEvent(SevWarn, "ServeValidateRangeShardRemoteNotFound", data->thisServerID)
 		    .detail("Range", range)
 		    .detail("Servers", describe(candidates));
 		throw audit_storage_failed();
 	}
 	return Void();
 }
 ACTOR Future<Void> validateRangeAgainstServers(StorageServer* data, KeyRange range, std::vector<UID> targetServers) {
 	TraceEvent(SevDebug, "ValidateRangeAgainstServersBegin", data->thisServerID)
 	    .detail("Range", range)
 	    .detail("TargetServers", describe(targetServers));
 	state Version version;
 	state std::vector<Optional<Value>> serverListValues;
 	state Transaction tr(data->cx);
 	tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
 	tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
 	loop {
 		try {
 			std::vector<Future<Optional<Value>>> serverListEntries;
 			for (const UID& id : targetServers) {
 				if (id != data->thisServerID) {
 					serverListEntries.push_back(tr.get(serverListKeyFor(id)));
 				}
 			}
 			std::vector<Optional<Value>> serverListValues_ = wait(getAll(serverListEntries));
 			serverListValues = serverListValues_;
 			Version version_ = wait(tr.getReadVersion());
 			version = version_;
 			break;
 		} catch (Error& e) {
 			wait(tr.onError(e));
 		}
 	}
 	std::vector<Future<Void>> fs;
 	for (const auto& v : serverListValues) {
 		if (!v.present()) {
 			TraceEvent(SevWarn, "ValidateRangeRemoteServerNotFound", data->thisServerID).detail("Range", range);
 			throw audit_storage_failed();
 		}
 		fs.push_back(validateRangeAgainstServer(data, range, version, decodeServerListValue(v.get())));
 	}
 	wait(waitForAll(fs));
 	return Void();
 }
 ACTOR Future<Void> auditStorageQ(StorageServer* data, AuditStorageRequest req) {
 	wait(data->serveAuditStorageParallelismLock.take(TaskPriority::DefaultYield));
 	state FlowLock::Releaser holder(data->serveAuditStorageParallelismLock);
 	TraceEvent(SevInfo, "ServeAuditStorageBegin", data->thisServerID)
 	    .detail("RequestID", req.id)
 	    .detail("Range", req.range)
 	    .detail("AuditType", req.type)
 	    .detail("TargetServers", describe(req.targetServers));
 	state Key begin = req.range.begin;
 	state std::vector<Future<Void>> fs;
 	try {
 		if (req.targetServers.empty()) {
 			while (begin < req.range.end) {
 				state Transaction tr(data->cx);
 				tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
 				tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
 				try {
 					state RangeResult shards = wait(krmGetRanges(&tr,
 					                                             keyServersPrefix,
 					                                             req.range,
 					                                             SERVER_KNOBS->MOVE_SHARD_KRM_ROW_LIMIT,
 					                                             SERVER_KNOBS->MOVE_SHARD_KRM_BYTE_LIMIT));
 					ASSERT(!shards.empty());
 					state RangeResult UIDtoTagMap = wait(tr.getRange(serverTagKeys, CLIENT_KNOBS->TOO_MANY));
 					ASSERT(!UIDtoTagMap.more && UIDtoTagMap.size() < CLIENT_KNOBS->TOO_MANY);
 					for (int i = 0; i < shards.size() - 1; ++i) {
 						std::vector<UID> src;
 						std::vector<UID> dest;
 						UID srcId, destId;
 						decodeKeyServersValue(UIDtoTagMap, shards[i].value, src, dest, srcId, destId);
 						fs.push_back(validateRangeShard(data, KeyRangeRef(shards[i].key, shards[i + 1].key), src));
 						begin = shards[i + 1].key;
 					}
 				} catch (Error& e) {
 					wait(tr.onError(e));
 				}
 			}
 		} else {
 			fs.push_back(validateRangeAgainstServers(data, req.range, req.targetServers));
 		}
 		wait(waitForAll(fs));
 		AuditStorageState res(req.id, req.getType());
 		res.setPhase(AuditPhase::Complete);
 		req.reply.send(res);
 	} catch (Error& e) {
 		TraceEvent(SevWarn, "ServeAuditStorageError", data->thisServerID)
 		    .errorUnsuppressed(e)
 		    .detail("RequestID", req.id)
 		    .detail("Range", req.range)
 		    .detail("AuditType", req.type);
 		req.reply.sendError(audit_storage_failed());
 	}
 	return Void();
 }
 TEST_CASE("/fdbserver/storageserver/constructMappedKey") {
 	Key key = Tuple::makeTuple("key-0"_sr, "key-1"_sr, "key-2"_sr).getDataAsStandalone();
 	Value value = Tuple::makeTuple("value-0"_sr, "value-1"_sr, "value-2"_sr).getDataAsStandalone();
@ -8780,6 +9107,16 @@ ACTOR Future<Void> updateStorage(StorageServer* data) {
 			if (info != data->uidChangeFeed.end()) {
 				// Cannot yield in mutation updating loop because of race with fetchVersion
 				Version alreadyFetched = std::max(info->second->fetchVersion, info->second->durableFetchVersion.get());
 				if (info->second->removing) {
 					auto cleanupPending = data->changeFeedCleanupDurable.find(info->second->id);
 					if (cleanupPending != data->changeFeedCleanupDurable.end() &&
 					    cleanupPending->second <= newOldestVersion) {
 						// due to a race, we just applied a cleanup mutation, but feed updates happen just after. Don't
 						// write any mutations for this feed.
 						curFeed++;
 						continue;
 					}
 				}
 				for (auto& it : info->second->mutations) {
 					if (it.version <= alreadyFetched) {
 						continue;
@ -10366,6 +10703,9 @@ ACTOR Future<Void> storageServerCore(StorageServer* self, StorageServerInterface
 			when(FetchCheckpointKeyValuesRequest req = waitNext(ssi.fetchCheckpointKeyValues.getFuture())) {
 				self->actors.add(fetchCheckpointKeyValuesQ(self, req));
 			}
 			when(AuditStorageRequest req = waitNext(ssi.auditStorage.getFuture())) {
 				self->actors.add(auditStorageQ(self, req));
 			}
 			when(wait(updateProcessStatsTimer)) {
 				updateProcessStats(self);
 				updateProcessStatsTimer = delay(SERVER_KNOBS->FASTRESTORE_UPDATE_PROCESS_STATS_INTERVAL);
--- a/fdbserver/tester.actor.cpp
+++ b/fdbserver/tester.actor.cpp
@ -46,6 +46,8 @@
 #include "fdbserver/WorkerInterface.actor.h"
 #include "flow/actorcompiler.h" // This must be the last #include.
 FDB_DEFINE_BOOLEAN_PARAM(UntrustedMode);
 WorkloadContext::WorkloadContext() {}
 WorkloadContext::WorkloadContext(const WorkloadContext& r)
--- a/fdbserver/worker.actor.cpp
+++ b/fdbserver/worker.actor.cpp
@ -23,6 +23,7 @@
 #include <boost/lexical_cast.hpp>
 #include "fdbclient/FDBTypes.h"
 #include "fdbserver/BlobMigratorInterface.h"
 #include "flow/ApiVersion.h"
 #include "flow/IAsyncFile.h"
 #include "fdbrpc/Locality.h"
@ -561,6 +562,7 @@ ACTOR Future<Void> registrationClient(
    Reference<AsyncVar<Optional<DataDistributorInterface>> const> ddInterf,
    Reference<AsyncVar<Optional<RatekeeperInterface>> const> rkInterf,
    Reference<AsyncVar<Optional<std::pair<int64_t, BlobManagerInterface>>> const> bmInterf,
    Reference<AsyncVar<Optional<BlobMigratorInterface>> const> blobMigratorInterf,
    Reference<AsyncVar<Optional<EncryptKeyProxyInterface>> const> ekpInterf,
    Reference<AsyncVar<Optional<ConsistencyScanInterface>> const> csInterf,
    Reference<AsyncVar<bool> const> degraded,
@ -602,6 +604,7 @@ ACTOR Future<Void> registrationClient(
 		    ddInterf->get(),
 		    rkInterf->get(),
 		    bmInterf->get().present() ? bmInterf->get().get().second : Optional<BlobManagerInterface>(),
 		    blobMigratorInterf->get(),
 		    ekpInterf->get(),
 		    csInterf->get(),
 		    degraded->get(),
@ -674,6 +677,7 @@ ACTOR Future<Void> registrationClient(
 			when(wait(rkInterf->onChange())) { break; }
 			when(wait(csInterf->onChange())) { break; }
 			when(wait(bmInterf->onChange())) { break; }
 			when(wait(blobMigratorInterf->onChange())) { break; }
 			when(wait(ekpInterf->onChange())) { break; }
 			when(wait(degraded->onChange())) { break; }
 			when(wait(FlowTransport::transport().onIncompatibleChanged())) { break; }
@ -707,6 +711,10 @@ bool addressInDbAndPrimaryDc(const NetworkAddress& address, Reference<AsyncVar<S
 		return true;
 	}
 	if (dbi.blobMigrator.present() && dbi.blobMigrator.get().address() == address) {
 		return true;
 	}
 	if (dbi.encryptKeyProxy.present() && dbi.encryptKeyProxy.get().address() == address) {
 		return true;
 	}
@ -1651,6 +1659,8 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
 	state Reference<AsyncVar<Optional<RatekeeperInterface>>> rkInterf(new AsyncVar<Optional<RatekeeperInterface>>());
 	state Reference<AsyncVar<Optional<std::pair<int64_t, BlobManagerInterface>>>> bmEpochAndInterf(
 	    new AsyncVar<Optional<std::pair<int64_t, BlobManagerInterface>>>());
 	state Reference<AsyncVar<Optional<BlobMigratorInterface>>> blobMigratorInterf(
 	    new AsyncVar<Optional<BlobMigratorInterface>>());
 	state UID lastBMRecruitRequestId;
 	state Reference<AsyncVar<Optional<EncryptKeyProxyInterface>>> ekpInterf(
 	    new AsyncVar<Optional<EncryptKeyProxyInterface>>());
@ -1977,6 +1987,7 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
 		                                       ddInterf,
 		                                       rkInterf,
 		                                       bmEpochAndInterf,
 		                                       blobMigratorInterf,
 		                                       ekpInterf,
 		                                       csInterf,
 		                                       degraded,
@ -2023,8 +2034,11 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
 						            localInfo.distributor.present() ? localInfo.distributor.get().id() : UID())
 						    .detail("BlobManagerID",
 						            localInfo.blobManager.present() ? localInfo.blobManager.get().id() : UID())
 						    .detail("BlobMigratorID",
 						            localInfo.blobMigrator.present() ? localInfo.blobMigrator.get().id() : UID())
 						    .detail("EncryptKeyProxyID",
-						            localInfo.encryptKeyProxy.present() ? localInfo.encryptKeyProxy.get().id() : UID());
+						            localInfo.encryptKeyProxy.present() ? localInfo.encryptKeyProxy.get().id() : UID())
 						    .detail("IsEncryptionEnabled", localInfo.client.isEncryptionEnabled);
 						dbInfo->set(localInfo);
 					}
@ -2242,6 +2256,31 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
 				TraceEvent("BlobManagerReceived", req.reqId).detail("BlobManagerId", recruited.id());
 				req.reply.send(recruited);
 			}
 			when(InitializeBlobMigratorRequest req = waitNext(interf.blobMigrator.getFuture())) {
 				LocalLineage _;
 				getCurrentLineage()->modify(&RoleLineage::role) = ProcessClass::ClusterRole::BlobMigrator;
 				BlobMigratorInterface recruited(locality, req.reqId);
 				recruited.initEndpoints();
 				if (blobMigratorInterf->get().present()) {
 					recruited = blobMigratorInterf->get().get();
 					CODE_PROBE(true, "Recruited while already a blob migrator.");
 				} else {
 					startRole(Role::BLOB_MIGRATOR, recruited.id(), interf.id());
 					DUMPTOKEN(recruited.waitFailure);
 					Future<Void> blobMigratorProcess = blobMigrator(recruited, dbInfo);
 					errorForwarders.add(forwardError(errors,
 					                                 Role::BLOB_MIGRATOR,
 					                                 recruited.id(),
 					                                 setWhenDoneOrError(blobMigratorProcess,
 					                                                    blobMigratorInterf,
 					                                                    Optional<BlobMigratorInterface>())));
 					blobMigratorInterf->set(Optional<BlobMigratorInterface>(recruited));
 				}
 				TraceEvent("BlobMigrator_InitRequest", req.reqId).detail("BlobMigratorId", recruited.id());
 				req.reply.send(recruited);
 			}
 			when(InitializeBackupRequest req = waitNext(interf.backup.getFuture())) {
 				if (!backupWorkerCache.exists(req.reqId)) {
 					LocalLineage _;
@ -2727,8 +2766,7 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
 			f.cancel();
 		state Error e = err;
 		bool ok = e.code() == error_code_please_reboot || e.code() == error_code_actor_cancelled ||
-		          e.code() == error_code_please_reboot_delete;
+		          e.code() == error_code_please_reboot_delete || e.code() == error_code_local_config_changed;
 		endRole(Role::WORKER, interf.id(), "WorkerError", ok, e);
 		errorForwarders.clear(false);
 		sharedLogs.clear();
@ -3546,6 +3584,7 @@ const Role Role::DATA_DISTRIBUTOR("DataDistributor", "DD");
 const Role Role::RATEKEEPER("Ratekeeper", "RK");
 const Role Role::BLOB_MANAGER("BlobManager", "BM");
 const Role Role::BLOB_WORKER("BlobWorker", "BW");
 const Role Role::BLOB_MIGRATOR("BlobMigrator", "MG");
 const Role Role::STORAGE_CACHE("StorageCache", "SC");
 const Role Role::COORDINATOR("Coordinator", "CD");
 const Role Role::BACKUP("Backup", "BK");
--- a/Show More
+++ b/Show More