Merge branch 'main' of github.com:apple/foundationdb into tenant-delete-id

2022-11-29 16:49:23 -08:00 · 2022-11-29 16:49:23 -08:00 · b7cba23126
parent 1ad359a39f 7e55522cb0
commit b7cba23126
131 changed files with 3323 additions and 1454 deletions
--- a/.flake8
+++ b/.flake8
@ -1,5 +1,5 @@
 [flake8]
-ignore = E203, E266, E501, W503, F403, F401, E711, C901, W605
-max-line-length = 79
+ignore = E203, E266, E501, W503, F403, F401, E711, C901, E721, W605
+max-line-length = 88
 max-complexity = 18
 select = B,C,E,F,W,T4,B9
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -110,6 +110,12 @@ set(FDB_PACKAGE_NAME "${FDB_MAJOR}.${FDB_MINOR}")
 configure_file(${CMAKE_SOURCE_DIR}/versions.target.cmake ${CMAKE_CURRENT_BINARY_DIR}/versions.target)
 file(WRITE ${CMAKE_BINARY_DIR}/version.txt ${FDB_VERSION})

+set(FDB_CURRENT_VERSION ${PROJECT_VERSION})
+set(FDB_FUTURE_VERSION "7.4.0")
+set(FDB_PREV_RELEASE_VERSION "7.1.25")
+set(FDB_PREV2_RELEASE_VERSION "7.0.0")
+set(FDB_PREV3_RELEASE_VERSION "6.3.25")
+
 ################################################################################
 # Flow
 ################################################################################
--- a/bindings/bindingtester/tests/api.py
+++ b/bindings/bindingtester/tests/api.py
@ -154,6 +154,8 @@ class ApiTest(Test):
        snapshot_reads = [x + '_SNAPSHOT' for x in reads]
        database_reads = [x + '_DATABASE' for x in reads]
        database_mutations = [x + '_DATABASE' for x in mutations]
+        tenant_reads = [x + '_TENANT' for x in reads]
+        tenant_mutations = [x + '_TENANT' for x in mutations]
        mutations += ['VERSIONSTAMP']
        versions = ['GET_READ_VERSION', 'SET_READ_VERSION', 'GET_COMMITTED_VERSION']
        snapshot_versions = ['GET_READ_VERSION_SNAPSHOT']
@ -183,6 +185,8 @@ class ApiTest(Test):

        if not args.no_tenants:
            op_choices += tenants
+            op_choices += tenant_reads
+            op_choices += tenant_mutations

        idempotent_atomic_ops = ['BIT_AND', 'BIT_OR', 'MAX', 'MIN', 'BYTE_MIN', 'BYTE_MAX']
        atomic_ops = idempotent_atomic_ops + ['ADD', 'BIT_XOR', 'APPEND_IF_FITS']
--- a/bindings/c/CMakeLists.txt
+++ b/bindings/c/CMakeLists.txt
@ -283,7 +283,8 @@ if(NOT WIN32)
    foreach(test_file ${API_TEST_FILES})
      get_filename_component(file_name "${test_file}" NAME_WE)
      set(test_name "fdb_c_api_test_${file_name}")
-      add_test(NAME "${test_name}"
+      add_scripted_fdb_test(NAME "${test_name}"
+        TIMEOUT 300
        COMMAND ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/run_c_api_tests.py
          --build-dir ${CMAKE_BINARY_DIR}
          --api-tester-bin $<TARGET_FILE:fdb_c_api_tester>
@ -291,99 +292,87 @@ if(NOT WIN32)
          --test-file ${test_file}
          --retain-client-lib-copies
      )
-      set_tests_properties("${test_name}" PROPERTIES TIMEOUT 300)
    endforeach()

-    add_test(NAME fdb_c_upgrade_to_future_version
+    add_scripted_fdb_test(NAME fdb_c_upgrade_to_future_version
      COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
      --build-dir ${CMAKE_BINARY_DIR}
      --test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml
-      --upgrade-path "7.3.0" "7.4.0" "7.3.0"
+      --upgrade-path "${FDB_CURRENT_VERSION}" "${FDB_FUTURE_VERSION}" "${FDB_CURRENT_VERSION}"
      --process-number 3
      )
-    set_tests_properties("fdb_c_upgrade_to_future_version" PROPERTIES ENVIRONMENT "${SANITIZER_OPTIONS}")

-    add_test(NAME fdb_c_upgrade_to_future_version_blob_granules
+    add_scripted_fdb_test(NAME fdb_c_upgrade_to_future_version_blob_granules
      COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
      --build-dir ${CMAKE_BINARY_DIR}
      --test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/ApiBlobGranulesCorrectness.toml
-      --upgrade-path "7.3.0" "7.4.0" "7.3.0"
+      --upgrade-path "${FDB_CURRENT_VERSION}" "${FDB_FUTURE_VERSION}" "${FDB_CURRENT_VERSION}"
      --blob-granules-enabled
      --process-number 3
      )

    if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT USE_SANITIZER)
-      add_test(NAME fdb_c_client_config_tests
+      add_scripted_fdb_test(NAME fdb_c_client_config_tests
        COMMAND $<TARGET_FILE:Python3::Interpreter> ${CMAKE_CURRENT_SOURCE_DIR}/test/fdb_c_client_config_tests.py
        --build-dir ${CMAKE_BINARY_DIR}
        --client-config-tester-bin $<TARGET_FILE:fdb_c_client_config_tester>
        )

-      add_test(NAME fdb_c_upgrade_single_threaded_630api
-        COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
-        --build-dir ${CMAKE_BINARY_DIR}
-        --test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadSingleThr.toml
-        --upgrade-path "6.3.23" "7.0.0" "7.1.9" "7.3.0"
-        --process-number 1
-        )
-
-      add_test(NAME fdb_c_upgrade_single_threaded_700api
-        COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
-        --build-dir ${CMAKE_BINARY_DIR}
-        --test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadSingleThr.toml
-        --upgrade-path "7.0.0" "7.1.9" "7.3.0"
-        --process-number 1
-        )
-
-      add_test(NAME fdb_c_upgrade_multi_threaded_630api
+      add_scripted_fdb_test(NAME fdb_c_upgrade_from_prev3_gradual
        COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
        --build-dir ${CMAKE_BINARY_DIR}
        --test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml
-        --upgrade-path "6.3.23" "7.0.0" "7.1.9" "7.3.0" "7.1.9"
+        --upgrade-path "${FDB_PREV3_RELEASE_VERSION}" "${FDB_PREV2_RELEASE_VERSION}" "${FDB_PREV_RELEASE_VERSION}" "${FDB_CURRENT_VERSION}" "${FDB_PREV_RELEASE_VERSION}"
        --process-number 3
        )

-      add_test(NAME fdb_c_upgrade_multi_threaded_700api
+      add_scripted_fdb_test(NAME fdb_c_upgrade_from_prev3_direct
        COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
        --build-dir ${CMAKE_BINARY_DIR}
        --test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml
-        --upgrade-path "7.0.0" "7.1.9" "7.3.0" "7.1.9"
+        --upgrade-path "${FDB_PREV3_RELEASE_VERSION}" "${FDB_CURRENT_VERSION}" "${FDB_PREV_RELEASE_VERSION}"
        --process-number 3
        )

-      add_test(NAME fdb_c_upgrade_multi_threaded_710api
+      add_scripted_fdb_test(NAME fdb_c_upgrade_from_prev2_gradual
        COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
        --build-dir ${CMAKE_BINARY_DIR}
        --test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml
-        --upgrade-path "7.1.9" "7.3.0" "7.1.9"
+        --upgrade-path "${FDB_PREV2_RELEASE_VERSION}" "${FDB_PREV_RELEASE_VERSION}" "${FDB_CURRENT_VERSION}" "${FDB_PREV_RELEASE_VERSION}"
        --process-number 3
        )

-      add_test(NAME fdb_c_cluster_wiggle
+      add_scripted_fdb_test(NAME fdb_c_upgrade_from_prev2_direct
        COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
        --build-dir ${CMAKE_BINARY_DIR}
        --test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml
-        --upgrade-path "7.3.0" "wiggle"
+        --upgrade-path "${FDB_PREV2_RELEASE_VERSION}" "${FDB_CURRENT_VERSION}" "${FDB_PREV_RELEASE_VERSION}"
+        --process-number 3
+        )
+
+      add_scripted_fdb_test(NAME fdb_c_upgrade_from_prev
+        COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
+        --build-dir ${CMAKE_BINARY_DIR}
+        --test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml
+        --upgrade-path "${FDB_PREV_RELEASE_VERSION}" "${FDB_CURRENT_VERSION}" "${FDB_PREV_RELEASE_VERSION}"
+        --process-number 3
+        )
+
+      add_scripted_fdb_test(NAME fdb_c_wiggle_only
+        COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
+        --build-dir ${CMAKE_BINARY_DIR}
+        --test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml
+        --upgrade-path "${FDB_CURRENT_VERSION}" "wiggle"
        --disable-log-dump
        --process-number 3
        --redundancy double
        )

-      add_test(NAME fdb_c_wiggle_and_upgrade_latest
+      add_scripted_fdb_test(NAME fdb_c_wiggle_and_upgrade
        COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
        --build-dir ${CMAKE_BINARY_DIR}
        --test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml
-        --upgrade-path "7.1.9" "wiggle" "7.3.0"
-        --disable-log-dump
-        --process-number 3
-        --redundancy double
-        )
-
-      add_test(NAME fdb_c_wiggle_and_upgrade_63
-        COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py
-        --build-dir ${CMAKE_BINARY_DIR}
-        --test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml
-        --upgrade-path "6.3.24" "wiggle" "7.0.0"
+        --upgrade-path "${FDB_PREV_RELEASE_VERSION}" "wiggle" "${FDB_CURRENT_VERSION}"
        --disable-log-dump
        --process-number 3
        --redundancy double
@ -470,7 +459,7 @@ elseif(NOT WIN32 AND NOT APPLE AND NOT USE_SANITIZER) # Linux Only, non-santizer
  target_link_libraries(fdb_c_shim_lib_tester PRIVATE fdb_c_shim SimpleOpt fdb_cpp Threads::Threads)
  target_include_directories(fdb_c_shim_lib_tester PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}/foundationdb/ ${CMAKE_SOURCE_DIR}/flow/include)

-  add_test(NAME fdb_c_shim_library_tests
+  add_scripted_fdb_test(NAME fdb_c_shim_library_tests
    COMMAND $<TARGET_FILE:Python3::Interpreter> ${CMAKE_CURRENT_SOURCE_DIR}/test/fdb_c_shim_tests.py
    --build-dir ${CMAKE_BINARY_DIR}
    --unit-tests-bin $<TARGET_FILE:fdb_c_shim_unit_tests>
--- a/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadSingleThr.toml
+++ b/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadSingleThr.toml
@ -1,43 +0,0 @@
-[[test]]
-title = 'Mixed Workload for Upgrade Tests with a Single FDB Thread'
-multiThreaded = false
-buggify = true
-databasePerTransaction = false
-minDatabases = 2
-maxDatabases = 8
-minClientThreads = 2
-maxClientThreads = 8
-minClients = 2
-maxClients = 8
-
-    [[test.workload]]
-    name = 'ApiCorrectness'
-    minKeyLength = 1
-    maxKeyLength = 64
-    minValueLength = 1
-    maxValueLength = 1000
-    maxKeysPerTransaction = 50
-    initialSize = 100
-    runUntilStop = true
-    readExistingKeysRatio = 0.9
-
-    [[test.workload]]
-    name = 'CancelTransaction'
-    minKeyLength = 1
-    maxKeyLength = 64
-    minValueLength = 1
-    maxValueLength = 1000
-    maxKeysPerTransaction = 50
-    initialSize = 100
-    runUntilStop = true
-    readExistingKeysRatio = 0.9
-
-    [[test.workload]]
-    name = 'AtomicOpsCorrectness'
-    initialSize = 0
-    runUntilStop = true
-
-    [[test.workload]]
-    name = 'WatchAndWait'
-    initialSize = 0
-    runUntilStop = true
--- a/bindings/c/test/fdb_c_client_config_tests.py
+++ b/bindings/c/test/fdb_c_client_config_tests.py
@ -7,16 +7,9 @@ import sys
 import os
 import glob
 import unittest
-
-sys.path[:0] = [os.path.join(os.path.dirname(__file__), "..", "..", "..", "tests", "TestRunner")]
-
-# fmt: off
-from binary_download import FdbBinaryDownloader, CURRENT_VERSION
+from fdb_version import CURRENT_VERSION, PREV_RELEASE_VERSION, PREV2_RELEASE_VERSION
+from binary_download import FdbBinaryDownloader
 from local_cluster import LocalCluster, random_secret_string
-# fmt: on
-
-PREV_RELEASE_VERSION = "7.1.5"
-PREV_PREV_RELEASE_VERSION = "7.0.0"

 args = None
 downloader = None
@ -180,15 +173,15 @@ class ClientConfigTests(unittest.TestCase):
    def test_multiple_external_clients(self):
        # Multiple external clients, normal case
        test = ClientConfigTest(self)
-        test.create_external_lib_dir([CURRENT_VERSION, PREV_RELEASE_VERSION, PREV_PREV_RELEASE_VERSION])
+        test.create_external_lib_dir([CURRENT_VERSION, PREV_RELEASE_VERSION, PREV2_RELEASE_VERSION])
        test.disable_local_client = True
-        test.api_version = api_version_from_str(PREV_PREV_RELEASE_VERSION)
+        test.api_version = api_version_from_str(PREV2_RELEASE_VERSION)
        test.exec()

    def test_no_external_client_support_api_version(self):
        # Multiple external clients, API version supported by none of them
        test = ClientConfigTest(self)
-        test.create_external_lib_dir([PREV_PREV_RELEASE_VERSION, PREV_RELEASE_VERSION])
+        test.create_external_lib_dir([PREV2_RELEASE_VERSION, PREV_RELEASE_VERSION])
        test.disable_local_client = True
        test.api_version = api_version_from_str(CURRENT_VERSION)
        test.expected_error = 2204  # API function missing
@ -197,7 +190,7 @@ class ClientConfigTests(unittest.TestCase):
    def test_no_external_client_support_api_version_ignore(self):
        # Multiple external clients; API version supported by none of them; Ignore failures
        test = ClientConfigTest(self)
-        test.create_external_lib_dir([PREV_PREV_RELEASE_VERSION, PREV_RELEASE_VERSION])
+        test.create_external_lib_dir([PREV2_RELEASE_VERSION, PREV_RELEASE_VERSION])
        test.disable_local_client = True
        test.api_version = api_version_from_str(CURRENT_VERSION)
        test.ignore_external_client_failures = True
@ -207,7 +200,7 @@ class ClientConfigTests(unittest.TestCase):
    def test_one_external_client_wrong_api_version(self):
        # Multiple external clients, API version unsupported by one of othem
        test = ClientConfigTest(self)
-        test.create_external_lib_dir([CURRENT_VERSION, PREV_RELEASE_VERSION, PREV_PREV_RELEASE_VERSION])
+        test.create_external_lib_dir([CURRENT_VERSION, PREV_RELEASE_VERSION, PREV2_RELEASE_VERSION])
        test.disable_local_client = True
        test.api_version = api_version_from_str(CURRENT_VERSION)
        test.expected_error = 2204  # API function missing
@ -216,7 +209,7 @@ class ClientConfigTests(unittest.TestCase):
    def test_one_external_client_wrong_api_version_ignore(self):
        # Multiple external clients;  API version unsupported by one of them; Ignore failures
        test = ClientConfigTest(self)
-        test.create_external_lib_dir([CURRENT_VERSION, PREV_RELEASE_VERSION, PREV_PREV_RELEASE_VERSION])
+        test.create_external_lib_dir([CURRENT_VERSION, PREV_RELEASE_VERSION, PREV2_RELEASE_VERSION])
        test.disable_local_client = True
        test.api_version = api_version_from_str(CURRENT_VERSION)
        test.ignore_external_client_failures = True
@ -286,6 +279,6 @@ if __name__ == "__main__":

    downloader = FdbBinaryDownloader(args.build_dir)
    downloader.download_old_binaries(PREV_RELEASE_VERSION)
-    downloader.download_old_binaries(PREV_PREV_RELEASE_VERSION)
+    downloader.download_old_binaries(PREV2_RELEASE_VERSION)

    unittest.main(verbosity=2)
--- a/bindings/c/test/fdb_c_shim_tests.py
+++ b/bindings/c/test/fdb_c_shim_tests.py
@ -6,15 +6,10 @@ import shutil
 import subprocess
 import sys
 import os
-
-sys.path[:0] = [os.path.join(os.path.dirname(__file__), '..', '..', '..', 'tests', 'TestRunner')]
-
-# fmt: off
-from binary_download import FdbBinaryDownloader, CURRENT_VERSION
+from binary_download import FdbBinaryDownloader
 from local_cluster import LocalCluster, random_secret_string
-# fmt: on
+from fdb_version import CURRENT_VERSION, PREV_RELEASE_VERSION

-LAST_RELEASE_VERSION = "7.1.5"
 TESTER_STATS_INTERVAL_SEC = 5
 DEFAULT_TEST_FILE = "CApiCorrectnessMultiThr.toml"
 IMPLIBSO_ERROR_CODE = -6  # SIGABORT
@ -54,13 +49,12 @@ class TestEnv(LocalCluster):
            self.downloader.binary_path(version, "fdbserver"),
            self.downloader.binary_path(version, "fdbmonitor"),
            self.downloader.binary_path(version, "fdbcli"),
-            1
+            1,
        )
        self.set_env_var("LD_LIBRARY_PATH", self.downloader.lib_dir(version))
        client_lib = self.downloader.lib_path(version)
        assert client_lib.exists(), "{} does not exist".format(client_lib)
-        self.client_lib_external = self.tmp_dir.joinpath(
-            "libfdb_c_external.so")
+        self.client_lib_external = self.tmp_dir.joinpath("libfdb_c_external.so")
        shutil.copyfile(client_lib, self.client_lib_external)

    def __enter__(self):
@ -73,22 +67,16 @@ class TestEnv(LocalCluster):
        shutil.rmtree(self.tmp_dir)

    def exec_client_command(self, cmd_args, env_vars=None, expected_ret_code=0):
-        print("Executing test command: {}".format(
-            " ".join([str(c) for c in cmd_args])
-        ))
-        tester_proc = subprocess.Popen(
-            cmd_args, stdout=sys.stdout, stderr=sys.stderr, env=env_vars
-        )
+        print("Executing test command: {}".format(" ".join([str(c) for c in cmd_args])))
+        tester_proc = subprocess.Popen(cmd_args, stdout=sys.stdout, stderr=sys.stderr, env=env_vars)
        tester_retcode = tester_proc.wait()
        assert tester_retcode == expected_ret_code, "Tester completed return code {}, but {} was expected".format(
-            tester_retcode, expected_ret_code)
+            tester_retcode, expected_ret_code
+        )


 class FdbCShimTests:
-    def __init__(
-        self,
-        args
-    ):
+    def __init__(self, args):
        self.build_dir = Path(args.build_dir).resolve()
        assert self.build_dir.exists(), "{} does not exist".format(args.build_dir)
        assert self.build_dir.is_dir(), "{} is not a directory".format(args.build_dir)
@ -97,15 +85,14 @@ class FdbCShimTests:
        self.api_tester_bin = Path(args.api_tester_bin).resolve()
        assert self.api_tester_bin.exists(), "{} does not exist".format(self.api_tests_bin)
        self.shim_lib_tester_bin = Path(args.shim_lib_tester_bin).resolve()
-        assert self.shim_lib_tester_bin.exists(
-        ), "{} does not exist".format(self.shim_lib_tester_bin)
+        assert self.shim_lib_tester_bin.exists(), "{} does not exist".format(self.shim_lib_tester_bin)
        self.api_test_dir = Path(args.api_test_dir).resolve()
        assert self.api_test_dir.exists(), "{} does not exist".format(self.api_test_dir)
        self.downloader = FdbBinaryDownloader(args.build_dir)
        # binary downloads are currently available only for x86_64
        self.platform = platform.machine()
-        if (self.platform == "x86_64"):
-            self.downloader.download_old_binaries(LAST_RELEASE_VERSION)
+        if self.platform == "x86_64":
+            self.downloader.download_old_binaries(PREV_RELEASE_VERSION)
            self.downloader.download_old_binaries("7.0.0")

    def build_c_api_tester_args(self, test_env, test_file):
@ -127,34 +114,27 @@ class FdbCShimTests:
            "--tmp-dir",
            test_env.tmp_dir,
            "--stats-interval",
-            str(TESTER_STATS_INTERVAL_SEC * 1000)
+            str(TESTER_STATS_INTERVAL_SEC * 1000),
        ]

    def run_c_api_test(self, version, test_file):
-        print('-' * 80)
+        print("-" * 80)
        print("C API Test - version: {}, workload: {}".format(version, test_file))
-        print('-' * 80)
+        print("-" * 80)
        with TestEnv(self.build_dir, self.downloader, version) as test_env:
            cmd_args = self.build_c_api_tester_args(test_env, test_file)
            env_vars = os.environ.copy()
-            env_vars["FDB_LOCAL_CLIENT_LIBRARY_PATH"] = self.downloader.lib_path(
-                version)
+            env_vars["FDB_LOCAL_CLIENT_LIBRARY_PATH"] = self.downloader.lib_path(version)
            test_env.exec_client_command(cmd_args, env_vars)

    def run_c_unit_tests(self, version):
-        print('-' * 80)
+        print("-" * 80)
        print("C Unit Tests - version: {}".format(version))
-        print('-' * 80)
+        print("-" * 80)
        with TestEnv(self.build_dir, self.downloader, version) as test_env:
-            cmd_args = [
-                self.unit_tests_bin,
-                test_env.cluster_file,
-                "fdb",
-                test_env.client_lib_external
-            ]
+            cmd_args = [self.unit_tests_bin, test_env.cluster_file, "fdb", test_env.client_lib_external]
            env_vars = os.environ.copy()
-            env_vars["FDB_LOCAL_CLIENT_LIBRARY_PATH"] = self.downloader.lib_path(
-                version)
+            env_vars["FDB_LOCAL_CLIENT_LIBRARY_PATH"] = self.downloader.lib_path(version)
            test_env.exec_client_command(cmd_args, env_vars)

    def run_c_shim_lib_tester(
@ -167,9 +147,9 @@ class FdbCShimTests:
        set_env_path=False,
        set_ld_lib_path=False,
        use_external_lib=True,
-        expected_ret_code=0
+        expected_ret_code=0,
    ):
-        print('-' * 80)
+        print("-" * 80)
        if api_version is None:
            api_version = api_version_from_str(version)
        test_flags = []
@ -183,9 +163,8 @@ class FdbCShimTests:
            test_flags.append("use_external_lib")
        else:
            test_flags.append("use_local_lib")
-        print("C Shim Tests - version: {}, API version: {}, {}".format(version,
-              api_version, ", ".join(test_flags)))
-        print('-' * 80)
+        print("C Shim Tests - version: {}, API version: {}, {}".format(version, api_version, ", ".join(test_flags)))
+        print("-" * 80)
        cmd_args = [
            self.shim_lib_tester_bin,
            "--cluster-file",
@ -196,20 +175,16 @@ class FdbCShimTests:
        if call_set_path:
            cmd_args = cmd_args + [
                "--local-client-library",
-                ("dummy" if invalid_lib_path else self.downloader.lib_path(version))
+                ("dummy" if invalid_lib_path else self.downloader.lib_path(version)),
            ]
        if use_external_lib:
-            cmd_args = cmd_args + [
-                "--disable-local-client",
-                "--external-client-library",
-                test_env.client_lib_external
-            ]
+            cmd_args = cmd_args + ["--disable-local-client", "--external-client-library", test_env.client_lib_external]
        env_vars = os.environ.copy()
-        env_vars["LD_LIBRARY_PATH"] = (
-            self.downloader.lib_dir(version) if set_ld_lib_path else "")
+        env_vars["LD_LIBRARY_PATH"] = self.downloader.lib_dir(version) if set_ld_lib_path else ""
        if set_env_path:
            env_vars["FDB_LOCAL_CLIENT_LIBRARY_PATH"] = (
-                "dummy" if invalid_lib_path else self.downloader.lib_path(version))
+                "dummy" if invalid_lib_path else self.downloader.lib_path(version)
+            )
        test_env.exec_client_command(cmd_args, env_vars, expected_ret_code)

    def run_tests(self):
@ -221,50 +196,60 @@ class FdbCShimTests:

        with TestEnv(self.build_dir, self.downloader, CURRENT_VERSION) as test_env:
            # Test lookup of the client library over LD_LIBRARY_PATH
-            self.run_c_shim_lib_tester(
-                CURRENT_VERSION, test_env, set_ld_lib_path=True)
+            self.run_c_shim_lib_tester(CURRENT_VERSION, test_env, set_ld_lib_path=True)

            # Test setting the client library path over an API call
-            self.run_c_shim_lib_tester(
-                CURRENT_VERSION, test_env, call_set_path=True)
+            self.run_c_shim_lib_tester(CURRENT_VERSION, test_env, call_set_path=True)

            # Test setting the client library path over an environment variable
-            self.run_c_shim_lib_tester(
-                CURRENT_VERSION, test_env, set_env_path=True)
+            self.run_c_shim_lib_tester(CURRENT_VERSION, test_env, set_env_path=True)

            # Test using the loaded client library as the local client
-            self.run_c_shim_lib_tester(
-                CURRENT_VERSION, test_env, call_set_path=True, use_external_lib=False)
+            self.run_c_shim_lib_tester(CURRENT_VERSION, test_env, call_set_path=True, use_external_lib=False)

            # Test setting an invalid client library path over an API call
            self.run_c_shim_lib_tester(
-                CURRENT_VERSION, test_env, call_set_path=True, invalid_lib_path=True, expected_ret_code=IMPLIBSO_ERROR_CODE)
+                CURRENT_VERSION,
+                test_env,
+                call_set_path=True,
+                invalid_lib_path=True,
+                expected_ret_code=IMPLIBSO_ERROR_CODE,
+            )

            # Test setting an invalid client library path over an environment variable
            self.run_c_shim_lib_tester(
-                CURRENT_VERSION, test_env, set_env_path=True, invalid_lib_path=True, expected_ret_code=IMPLIBSO_ERROR_CODE)
+                CURRENT_VERSION,
+                test_env,
+                set_env_path=True,
+                invalid_lib_path=True,
+                expected_ret_code=IMPLIBSO_ERROR_CODE,
+            )

            # Test calling a function that exists in the loaded library, but not for the selected API version
-            self.run_c_shim_lib_tester(
-                CURRENT_VERSION, test_env, call_set_path=True, api_version=700)
+            self.run_c_shim_lib_tester(CURRENT_VERSION, test_env, call_set_path=True, api_version=700)

        # binary downloads are currently available only for x86_64
        if self.platform == "x86_64":
            # Test the API workload with the release version
-            self.run_c_api_test(LAST_RELEASE_VERSION, DEFAULT_TEST_FILE)
+            self.run_c_api_test(PREV_RELEASE_VERSION, DEFAULT_TEST_FILE)

-            with TestEnv(self.build_dir, self.downloader, LAST_RELEASE_VERSION) as test_env:
+            with TestEnv(self.build_dir, self.downloader, PREV_RELEASE_VERSION) as test_env:
                # Test using the loaded client library as the local client
-                self.run_c_shim_lib_tester(
-                    LAST_RELEASE_VERSION, test_env, call_set_path=True, use_external_lib=False)
+                self.run_c_shim_lib_tester(PREV_RELEASE_VERSION, test_env, call_set_path=True, use_external_lib=False)

                # Test the client library of the release version in combination with the dev API version
                self.run_c_shim_lib_tester(
-                    LAST_RELEASE_VERSION, test_env, call_set_path=True, api_version=api_version_from_str(CURRENT_VERSION), expected_ret_code=1)
+                    PREV_RELEASE_VERSION,
+                    test_env,
+                    call_set_path=True,
+                    api_version=api_version_from_str(CURRENT_VERSION),
+                    expected_ret_code=1,
+                )

                # Test calling a function that does not exist in the loaded library
                self.run_c_shim_lib_tester(
-                    "7.0.0", test_env, call_set_path=True, api_version=700, expected_ret_code=IMPLIBSO_ERROR_CODE)
+                    "7.0.0", test_env, call_set_path=True, api_version=700, expected_ret_code=IMPLIBSO_ERROR_CODE
+                )


 if __name__ == "__main__":
@ -285,25 +270,17 @@ if __name__ == "__main__":
        required=True,
    )
    parser.add_argument(
-        '--unit-tests-bin',
-        type=str,
-        help='Path to the fdb_c_shim_unit_tests executable.',
-        required=True)
+        "--unit-tests-bin", type=str, help="Path to the fdb_c_shim_unit_tests executable.", required=True
+    )
    parser.add_argument(
-        '--api-tester-bin',
-        type=str,
-        help='Path to the fdb_c_shim_api_tester executable.',
-        required=True)
+        "--api-tester-bin", type=str, help="Path to the fdb_c_shim_api_tester executable.", required=True
+    )
    parser.add_argument(
-        '--shim-lib-tester-bin',
-        type=str,
-        help='Path to the fdb_c_shim_lib_tester executable.',
-        required=True)
+        "--shim-lib-tester-bin", type=str, help="Path to the fdb_c_shim_lib_tester executable.", required=True
+    )
    parser.add_argument(
-        '--api-test-dir',
-        type=str,
-        help='Path to a directory with api test definitions.',
-        required=True)
+        "--api-test-dir", type=str, help="Path to a directory with api test definitions.", required=True
+    )
    args = parser.parse_args()
    test = FdbCShimTests(args)
    test.run_tests()
--- a/bindings/go/src/fdb/database.go
+++ b/bindings/go/src/fdb/database.go
@ -42,6 +42,8 @@ import (
 // usually created and committed automatically by the (Database).Transact
 // method.
 type Database struct {
+	// String reference to the cluster file.
+	clusterFile string
 	*database
 }

@ -56,6 +58,16 @@ type DatabaseOptions struct {
 	d *database
 }

+// Close will close the Database and clean up all resources.
+// You have to ensure that you're not resuing this database.
+func (d *Database) Close() {
+	// Remove database object from the cached databases
+	delete(openDatabases, d.clusterFile)
+
+	// Destroy the database
+	d.destroy()
+}
+
 func (opt DatabaseOptions) setOpt(code int, param []byte) error {
 	return setOpt(func(p *C.uint8_t, pl C.int) C.fdb_error_t {
 		return C.fdb_database_set_option(opt.d.ptr, C.FDBDatabaseOption(code), p, pl)
@ -63,6 +75,10 @@ func (opt DatabaseOptions) setOpt(code int, param []byte) error {
 }

 func (d *database) destroy() {
+	if d.ptr == nil {
+		return
+	}
+
 	C.fdb_database_destroy(d.ptr)
 }

--- a/bindings/go/src/fdb/fdb.go
+++ b/bindings/go/src/fdb/fdb.go
@ -39,6 +39,7 @@ import (
 // Would put this in futures.go but for the documented issue with
 // exports and functions in preamble
 // (https://code.google.com/p/go-wiki/wiki/cgo#Global_functions)
+//
 //export unlockMutex
 func unlockMutex(p unsafe.Pointer) {
 	m := (*sync.Mutex)(p)
@ -337,7 +338,7 @@ func createDatabase(clusterFile string) (Database, error) {
 	db := &database{outdb}
 	runtime.SetFinalizer(db, (*database).destroy)

-	return Database{db}, nil
+	return Database{clusterFile, db}, nil
 }

 // Deprecated: Use OpenDatabase instead.
--- a/bindings/go/src/fdb/fdb_test.go
+++ b/bindings/go/src/fdb/fdb_test.go
@ -48,7 +48,10 @@ func ExampleOpenDefault() {
 		return
 	}

-	_ = db
+	// Close the database after usage
+	defer db.Close()
+
+	// Do work here

 	// Output:
 }
@ -313,3 +316,30 @@ func ExamplePrintable() {
 	fmt.Println(fdb.Printable([]byte{0, 1, 2, 'a', 'b', 'c', '1', '2', '3', '!', '?', 255}))
 	// Output: \x00\x01\x02abc123!?\xff
 }
+
+func TestDatabaseCloseRemovesResources(t *testing.T) {
+	err := fdb.APIVersion(API_VERSION)
+	if err != nil {
+		t.Fatalf("Unable to set API version: %v\n", err)
+	}
+
+	// OpenDefault opens the database described by the platform-specific default
+	// cluster file
+	db, err := fdb.OpenDefault()
+	if err != nil {
+		t.Fatalf("Unable to set API version: %v\n", err)
+	}
+
+	// Close the database after usage
+	db.Close()
+
+	// Open the same database again, if the database is still in the cache we would return the same object, if not we create a new object with a new pointer
+	newDB, err := fdb.OpenDefault()
+	if err != nil {
+		t.Fatalf("Unable to set API version: %v\n", err)
+	}
+
+	if db == newDB {
+		t.Fatalf("Expected a different database object, got: %v and %v\n", db, newDB)
+	}
+}
--- a/bindings/python/fdb/init.py
+++ b/bindings/python/fdb/init.py
@ -25,14 +25,14 @@ https://apple.github.io/foundationdb/api-python.html"""


 def open(*args, **kwargs):
-    raise RuntimeError('You must call api_version() before using any fdb methods')
+    raise RuntimeError("You must call api_version() before using any fdb methods")


 init = open


 def transactional(*args, **kwargs):
-    raise RuntimeError('You must call api_version() before using fdb.transactional')
+    raise RuntimeError("You must call api_version() before using fdb.transactional")


 def _add_symbols(module, symbols):
@ -41,29 +41,29 @@ def _add_symbols(module, symbols):


 def is_api_version_selected():
-    return '_version' in globals()
+    return "_version" in globals()


 def get_api_version():
    if is_api_version_selected():
-        return globals()['_version']
+        return globals()["_version"]
    else:
-        raise RuntimeError('API version is not set')
+        raise RuntimeError("API version is not set")


 def api_version(ver):
    header_version = 720

-    if '_version' in globals():
-        if globals()['_version'] != ver:
-            raise RuntimeError('FDB API already loaded at version %d' % _version)
+    if "_version" in globals():
+        if globals()["_version"] != ver:
+            raise RuntimeError("FDB API already loaded at version %d" % _version)
        return

    if ver < 13:
-        raise RuntimeError('FDB API versions before 13 are not supported')
+        raise RuntimeError("FDB API versions before 13 are not supported")

    if ver > header_version:
-        raise RuntimeError('Latest known FDB API version is %d' % header_version)
+        raise RuntimeError("Latest known FDB API version is %d" % header_version)

    import fdb.impl

@ -71,31 +71,37 @@ def api_version(ver):
    if err == 2203:  # api_version_not_supported, but that's not helpful to the user
        max_supported_ver = fdb.impl._capi.fdb_get_max_api_version()
        if header_version > max_supported_ver:
-            raise RuntimeError("This version of the FoundationDB Python binding is not supported by the installed "
-                               "FoundationDB C library. The binding requires a library that supports API version "
-                               "%d, but the installed library supports a maximum version of %d." % (header_version, max_supported_ver))
+            raise RuntimeError(
+                "This version of the FoundationDB Python binding is not supported by the installed "
+                "FoundationDB C library. The binding requires a library that supports API version "
+                "%d, but the installed library supports a maximum version of %d."
+                % (header_version, max_supported_ver)
+            )

        else:
-            raise RuntimeError("API version %d is not supported by the installed FoundationDB C library." % ver)
+            raise RuntimeError(
+                "API version %d is not supported by the installed FoundationDB C library."
+                % ver
+            )

    elif err != 0:
-        raise RuntimeError('FoundationDB API error')
+        raise RuntimeError("FoundationDB API error")

    fdb.impl.init_c_api()

    list = (
-        'FDBError',
-        'predicates',
-        'Future',
-        'Database',
-        'Tenant',
-        'Transaction',
-        'KeyValue',
-        'KeySelector',
-        'open',
-        'transactional',
-        'options',
-        'StreamingMode',
+        "FDBError",
+        "predicates",
+        "Future",
+        "Database",
+        "Tenant",
+        "Transaction",
+        "KeyValue",
+        "KeySelector",
+        "open",
+        "transactional",
+        "options",
+        "StreamingMode",
    )

    _add_symbols(fdb.impl, list)
@ -134,14 +140,20 @@ def api_version(ver):
            if not hasattr(self, "__iterating"):
                self.__iterating = iter(self)
            return next(self.__iterating)
+
        setattr(fdb.impl.FDBRange, "next", next)

-    globals()['_version'] = ver
+    globals()["_version"] = ver

    import fdb.directory_impl
-    directory_symbols = ('directory', 'DirectoryLayer',)
+
+    directory_symbols = (
+        "directory",
+        "DirectoryLayer",
+    )
    _add_symbols(fdb.directory_impl, directory_symbols)

    import fdb.subspace_impl
-    subspace_symbols = ('Subspace',)
+
+    subspace_symbols = ("Subspace",)
    _add_symbols(fdb.subspace_impl, subspace_symbols)
--- a/bindings/python/fdb/directory_impl.py
+++ b/bindings/python/fdb/directory_impl.py
@ -35,8 +35,7 @@ class AllocatorTransactionState:
        self.lock = threading.Lock()


-class HighContentionAllocator (object):
-
+class HighContentionAllocator(object):
    def __init__(self, subspace):
        self.counters = subspace[0]
        self.recent = subspace[1]
@ -45,9 +44,9 @@ class HighContentionAllocator (object):
    @_impl.transactional
    def allocate(self, tr):
        """Returns a byte string that
-            1) has never and will never be returned by another call to this
-               method on the same subspace
-            2) is nearly as short as possible given the above
+        1) has never and will never be returned by another call to this
+           method on the same subspace
+        2) is nearly as short as possible given the above
        """

        # Get transaction-local state
@ -59,16 +58,23 @@ class HighContentionAllocator (object):
        tr_state = tr.__fdb_directory_layer_hca_state__

        while True:
-            [start] = [self.counters.unpack(k)[0] for k, _ in tr.snapshot.get_range(
-                self.counters.range().start, self.counters.range().stop, limit=1, reverse=True)] or [0]
+            [start] = [
+                self.counters.unpack(k)[0]
+                for k, _ in tr.snapshot.get_range(
+                    self.counters.range().start,
+                    self.counters.range().stop,
+                    limit=1,
+                    reverse=True,
+                )
+            ] or [0]

            window_advanced = False
            while True:
                with tr_state.lock:
                    if window_advanced:
-                        del tr[self.counters: self.counters[start]]
+                        del tr[self.counters : self.counters[start]]
                        tr.options.set_next_write_no_write_conflict_range()
-                        del tr[self.recent: self.recent[start]]
+                        del tr[self.recent : self.recent[start]]

                    # Increment the allocation count for the current window
                    tr.add(self.counters[start], struct.pack("<q", 1))
@ -94,10 +100,15 @@ class HighContentionAllocator (object):
                candidate = random.randrange(start, start + window)

                with tr_state.lock:
-                    latest_counter = tr.snapshot.get_range(self.counters.range().start, self.counters.range().stop, limit=1, reverse=True)
+                    latest_counter = tr.snapshot.get_range(
+                        self.counters.range().start,
+                        self.counters.range().stop,
+                        limit=1,
+                        reverse=True,
+                    )
                    candidate_value = tr[self.recent[candidate]]
                    tr.options.set_next_write_no_write_conflict_range()
-                    tr[self.recent[candidate]] = b''
+                    tr[self.recent[candidate]] = b""

                latest_counter = [self.counters.unpack(k)[0] for k, _ in latest_counter]
                if len(latest_counter) > 0 and latest_counter[0] > start:
@ -121,7 +132,7 @@ class HighContentionAllocator (object):


 class Directory(object):
-    def __init__(self, directory_layer, path=(), layer=b''):
+    def __init__(self, directory_layer, path=(), layer=b""):
        self._directory_layer = directory_layer
        self._path = path
        self._layer = layer
@ -129,7 +140,9 @@ class Directory(object):
    @_impl.transactional
    def create_or_open(self, tr, path, layer=None):
        path = self._tuplify_path(path)
-        return self._directory_layer.create_or_open(tr, self._partition_subpath(path), layer)
+        return self._directory_layer.create_or_open(
+            tr, self._partition_subpath(path), layer
+        )

    @_impl.transactional
    def open(self, tr, path, layer=None):
@ -139,7 +152,9 @@ class Directory(object):
    @_impl.transactional
    def create(self, tr, path, layer=None, prefix=None):
        path = self._tuplify_path(path)
-        return self._directory_layer.create(tr, self._partition_subpath(path), layer, prefix)
+        return self._directory_layer.create(
+            tr, self._partition_subpath(path), layer, prefix
+        )

    @_impl.transactional
    def list(self, tr, path=()):
@ -150,7 +165,9 @@ class Directory(object):
    def move(self, tr, old_path, new_path):
        old_path = self._tuplify_path(old_path)
        new_path = self._tuplify_path(new_path)
-        return self._directory_layer.move(tr, self._partition_subpath(old_path), self._partition_subpath(new_path))
+        return self._directory_layer.move(
+            tr, self._partition_subpath(old_path), self._partition_subpath(new_path)
+        )

    @_impl.transactional
    def move_to(self, tr, new_absolute_path):
@ -161,25 +178,33 @@ class Directory(object):
        if partition_path != directory_layer._path:
            raise ValueError("Cannot move between partitions.")

-        return directory_layer.move(tr, self._path[partition_len:], new_absolute_path[partition_len:])
+        return directory_layer.move(
+            tr, self._path[partition_len:], new_absolute_path[partition_len:]
+        )

    @_impl.transactional
    def remove(self, tr, path=()):
        path = self._tuplify_path(path)
        directory_layer = self._get_layer_for_path(path)
-        return directory_layer.remove(tr, self._partition_subpath(path, directory_layer))
+        return directory_layer.remove(
+            tr, self._partition_subpath(path, directory_layer)
+        )

    @_impl.transactional
    def remove_if_exists(self, tr, path=()):
        path = self._tuplify_path(path)
        directory_layer = self._get_layer_for_path(path)
-        return directory_layer.remove_if_exists(tr, self._partition_subpath(path, directory_layer))
+        return directory_layer.remove_if_exists(
+            tr, self._partition_subpath(path, directory_layer)
+        )

    @_impl.transactional
    def exists(self, tr, path=()):
        path = self._tuplify_path(path)
        directory_layer = self._get_layer_for_path(path)
-        return directory_layer.exists(tr, self._partition_subpath(path, directory_layer))
+        return directory_layer.exists(
+            tr, self._partition_subpath(path, directory_layer)
+        )

    def get_layer(self):
        return self._layer
@ -194,7 +219,7 @@ class Directory(object):

    def _partition_subpath(self, path, directory_layer=None):
        directory_layer = directory_layer or self._directory_layer
-        return self._path[len(directory_layer._path):] + path
+        return self._path[len(directory_layer._path) :] + path

    # Called by all functions that could operate on this subspace directly (move_to, remove, remove_if_exists, exists)
    # Subclasses can choose to return a different directory layer to use for the operation if path is in fact ()
@ -203,8 +228,12 @@ class Directory(object):


 class DirectoryLayer(Directory):
-
-    def __init__(self, node_subspace=Subspace(rawPrefix=b'\xfe'), content_subspace=Subspace(), allow_manual_prefixes=False):
+    def __init__(
+        self,
+        node_subspace=Subspace(rawPrefix=b"\xfe"),
+        content_subspace=Subspace(),
+        allow_manual_prefixes=False,
+    ):
        Directory.__init__(self, self)

        # If specified, new automatically allocated prefixes will all fall within content_subspace
@ -215,11 +244,11 @@ class DirectoryLayer(Directory):

        # The root node is the one whose contents are the node subspace
        self._root_node = self._node_subspace[self._node_subspace.key()]
-        self._allocator = HighContentionAllocator(self._root_node[b'hca'])
+        self._allocator = HighContentionAllocator(self._root_node[b"hca"])

    @_impl.transactional
    def create_or_open(self, tr, path, layer=None):
-        """ Opens the directory with the given path.
+        """Opens the directory with the given path.

        If the directory does not exist, it is created (creating parent
        directories if necessary).
@ -229,12 +258,16 @@ class DirectoryLayer(Directory):
        """
        return self._create_or_open_internal(tr, path, layer)

-    def _create_or_open_internal(self, tr, path, layer=None, prefix=None, allow_create=True, allow_open=True):
+    def _create_or_open_internal(
+        self, tr, path, layer=None, prefix=None, allow_create=True, allow_open=True
+    ):
        self._check_version(tr, write_access=False)

        if prefix is not None and not self._allow_manual_prefixes:
            if len(self._path) == 0:
-                raise ValueError("Cannot specify a prefix unless manual prefixes are enabled.")
+                raise ValueError(
+                    "Cannot specify a prefix unless manual prefixes are enabled."
+                )
            else:
                raise ValueError("Cannot specify a prefix in a partition.")

@ -248,7 +281,9 @@ class DirectoryLayer(Directory):
        if existing_node.exists():
            if existing_node.is_in_partition():
                subpath = existing_node.get_partition_subpath()
-                return existing_node.get_contents(self)._directory_layer._create_or_open_internal(
+                return existing_node.get_contents(
+                    self
+                )._directory_layer._create_or_open_internal(
                    tr, subpath, layer, prefix, allow_create, allow_open
                )

@ -256,7 +291,9 @@ class DirectoryLayer(Directory):
                raise ValueError("The directory already exists.")

            if layer and existing_node.layer() != layer:
-                raise ValueError("The directory was created with an incompatible layer.")
+                raise ValueError(
+                    "The directory was created with an incompatible layer."
+                )

            return existing_node.get_contents(self)

@ -269,16 +306,23 @@ class DirectoryLayer(Directory):
            prefix = self._content_subspace.key() + self._allocator.allocate(tr)

            if len(list(tr.get_range_startswith(prefix, limit=1))) > 0:
-                raise Exception("The database has keys stored at the prefix chosen by the automatic prefix allocator: %r." % prefix)
+                raise Exception(
+                    "The database has keys stored at the prefix chosen by the automatic prefix allocator: %r."
+                    % prefix
+                )

            if not self._is_prefix_free(tr.snapshot, prefix):
-                raise Exception("The directory layer has manually allocated prefixes that conflict with the automatic prefix allocator.")
+                raise Exception(
+                    "The directory layer has manually allocated prefixes that conflict with the automatic prefix allocator."
+                )

        elif not self._is_prefix_free(tr, prefix):
            raise ValueError("The given prefix is already in use.")

        if len(path) > 1:
-            parent_node = self._node_with_prefix(self.create_or_open(tr, path[:-1]).key())
+            parent_node = self._node_with_prefix(
+                self.create_or_open(tr, path[:-1]).key()
+            )
        else:
            parent_node = self._root_node
        if not parent_node:
@ -288,15 +332,15 @@ class DirectoryLayer(Directory):
        node = self._node_with_prefix(prefix)
        tr[parent_node[self.SUBDIRS][path[-1]]] = prefix
        if not layer:
-            layer = b''
+            layer = b""

-        tr[node[b'layer']] = layer
+        tr[node[b"layer"]] = layer

        return self._contents_of_node(node, path, layer)

    @_impl.transactional
    def open(self, tr, path, layer=None):
-        """ Opens the directory with the given path.
+        """Opens the directory with the given path.

        An error is raised if the directory does not exist, or if a layer is
        specified and a different layer was specified when the directory was
@ -321,7 +365,7 @@ class DirectoryLayer(Directory):

    @_impl.transactional
    def move_to(self, tr, new_absolute_path):
-        raise Exception('The root directory cannot be moved.')
+        raise Exception("The root directory cannot be moved.")

    @_impl.transactional
    def move(self, tr, old_path, new_path):
@ -339,8 +383,10 @@ class DirectoryLayer(Directory):
        old_path = _to_unicode_path(old_path)
        new_path = _to_unicode_path(new_path)

-        if old_path == new_path[:len(old_path)]:
-            raise ValueError("The destination directory cannot be a subdirectory of the source directory.")
+        if old_path == new_path[: len(old_path)]:
+            raise ValueError(
+                "The destination directory cannot be a subdirectory of the source directory."
+            )

        old_node = self._find(tr, old_path).prefetch_metadata(tr)
        new_node = self._find(tr, new_path).prefetch_metadata(tr)
@ -349,18 +395,30 @@ class DirectoryLayer(Directory):
            raise ValueError("The source directory does not exist.")

        if old_node.is_in_partition() or new_node.is_in_partition():
-            if not old_node.is_in_partition() or not new_node.is_in_partition() or old_node.path != new_node.path:
+            if (
+                not old_node.is_in_partition()
+                or not new_node.is_in_partition()
+                or old_node.path != new_node.path
+            ):
                raise ValueError("Cannot move between partitions.")

-            return new_node.get_contents(self).move(tr, old_node.get_partition_subpath(), new_node.get_partition_subpath())
+            return new_node.get_contents(self).move(
+                tr, old_node.get_partition_subpath(), new_node.get_partition_subpath()
+            )

        if new_node.exists():
-            raise ValueError("The destination directory already exists. Remove it first.")
+            raise ValueError(
+                "The destination directory already exists. Remove it first."
+            )

        parent_node = self._find(tr, new_path[:-1])
        if not parent_node.exists():
-            raise ValueError("The parent of the destination directory does not exist. Create it first.")
-        tr[parent_node.subspace[self.SUBDIRS][new_path[-1]]] = self._node_subspace.unpack(old_node.subspace.key())[0]
+            raise ValueError(
+                "The parent of the destination directory does not exist. Create it first."
+            )
+        tr[
+            parent_node.subspace[self.SUBDIRS][new_path[-1]]
+        ] = self._node_subspace.unpack(old_node.subspace.key())[0]
        self._remove_from_parent(tr, old_path)
        return self._contents_of_node(old_node.subspace, new_path, old_node.layer())

@ -400,7 +458,9 @@ class DirectoryLayer(Directory):
                return False

        if node.is_in_partition():
-            return node.get_contents(self)._directory_layer._remove_internal(tr, node.get_partition_subpath(), fail_on_nonexistent)
+            return node.get_contents(self)._directory_layer._remove_internal(
+                tr, node.get_partition_subpath(), fail_on_nonexistent
+            )

        self._remove_recursive(tr, node.subspace)
        self._remove_from_parent(tr, path)
@ -447,7 +507,7 @@ class DirectoryLayer(Directory):
    VERSION = (1, 0, 0)

    def _check_version(self, tr, write_access=True):
-        version = tr[self._root_node[b'version']]
+        version = tr[self._root_node[b"version"]]

        if not version.present():
            if write_access:
@ -455,16 +515,22 @@ class DirectoryLayer(Directory):

            return

-        version = struct.unpack('<III', bytes(version))
+        version = struct.unpack("<III", bytes(version))

        if version[0] > self.VERSION[0]:
-            raise Exception("Cannot load directory with version %d.%d.%d using directory layer %d.%d.%d" % (version + self.VERSION))
+            raise Exception(
+                "Cannot load directory with version %d.%d.%d using directory layer %d.%d.%d"
+                % (version + self.VERSION)
+            )

        if version[1] > self.VERSION[1] and write_access:
-            raise Exception("Directory with version %d.%d.%d is read-only when opened using directory layer %d.%d.%d" % (version + self.VERSION))
+            raise Exception(
+                "Directory with version %d.%d.%d is read-only when opened using directory layer %d.%d.%d"
+                % (version + self.VERSION)
+            )

    def _initialize_directory(self, tr):
-        tr[self._root_node[b'version']] = struct.pack('<III', *self.VERSION)
+        tr[self._root_node[b"version"]] = struct.pack("<III", *self.VERSION)

    def _node_containing_key(self, tr, key):
        # Right now this is only used for _is_prefix_free(), but if we add
@ -472,10 +538,12 @@ class DirectoryLayer(Directory):
        # path based on a key.
        if key.startswith(self._node_subspace.key()):
            return self._root_node
-        for k, v in tr.get_range(self._node_subspace.range(()).start,
-                                 self._node_subspace.pack((key,)) + b'\x00',
-                                 reverse=True,
-                                 limit=1):
+        for k, v in tr.get_range(
+            self._node_subspace.range(()).start,
+            self._node_subspace.pack((key,)) + b"\x00",
+            reverse=True,
+            limit=1,
+        ):
            prev_prefix = self._node_subspace.unpack(k)[0]
            if key.startswith(prev_prefix):
                return self._node_with_prefix(prev_prefix)
@ -489,7 +557,7 @@ class DirectoryLayer(Directory):
    def _contents_of_node(self, node, path, layer=None):
        prefix = self._node_subspace.unpack(node.key())[0]

-        if layer == b'partition':
+        if layer == b"partition":
            return DirectoryPartition(self._path + path, prefix, self)
        else:
            return DirectorySubspace(self._path + path, prefix, self, layer)
@ -497,8 +565,12 @@ class DirectoryLayer(Directory):
    def _find(self, tr, path):
        n = _Node(self._root_node, (), path)
        for i, name in enumerate(path):
-            n = _Node(self._node_with_prefix(tr[n.subspace[self.SUBDIRS][name]]), path[:i + 1], path)
-            if not n.exists() or n.layer(tr) == b'partition':
+            n = _Node(
+                self._node_with_prefix(tr[n.subspace[self.SUBDIRS][name]]),
+                path[: i + 1],
+                path,
+            )
+            if not n.exists() or n.layer(tr) == b"partition":
                return n
        return n

@ -521,8 +593,19 @@ class DirectoryLayer(Directory):
        # Returns true if the given prefix does not "intersect" any currently
        # allocated prefix (including the root node). This means that it neither
        # contains any other prefix nor is contained by any other prefix.
-        return prefix and not self._node_containing_key(tr, prefix) \
-            and not len(list(tr.get_range(self._node_subspace.pack((prefix,)), self._node_subspace.pack((_impl.strinc(prefix),)), limit=1)))
+        return (
+            prefix
+            and not self._node_containing_key(tr, prefix)
+            and not len(
+                list(
+                    tr.get_range(
+                        self._node_subspace.pack((prefix,)),
+                        self._node_subspace.pack((_impl.strinc(prefix),)),
+                        limit=1,
+                    )
+                )
+            )
+        )

    def _is_prefix_empty(self, tr, prefix):
        return len(list(tr.get_range(prefix, _impl.strinc(prefix), limit=1))) == 0
@ -541,11 +624,15 @@ def _to_unicode_path(path):
            if isinstance(name, bytes):
                path[i] = six.text_type(path[i])
            elif not isinstance(name, six.text_type):
-                raise ValueError('Invalid path: must be a unicode string or a tuple of unicode strings')
+                raise ValueError(
+                    "Invalid path: must be a unicode string or a tuple of unicode strings"
+                )

        return tuple(path)

-    raise ValueError('Invalid path: must be a unicode string or a tuple of unicode strings')
+    raise ValueError(
+        "Invalid path: must be a unicode string or a tuple of unicode strings"
+    )


 directory = DirectoryLayer()
@ -561,43 +648,59 @@ class DirectorySubspace(Subspace, Directory):
        Directory.__init__(self, directory_layer, path, layer)

    def __repr__(self):
-        return 'DirectorySubspace(path=' + repr(self._path) + ', prefix=' + repr(self.rawPrefix) + ')'
+        return (
+            "DirectorySubspace(path="
+            + repr(self._path)
+            + ", prefix="
+            + repr(self.rawPrefix)
+            + ")"
+        )


 class DirectoryPartition(DirectorySubspace):
    def __init__(self, path, prefix, parent_directory_layer):
-        directory_layer = DirectoryLayer(Subspace(rawPrefix=prefix + b'\xfe'), Subspace(rawPrefix=prefix))
+        directory_layer = DirectoryLayer(
+            Subspace(rawPrefix=prefix + b"\xfe"), Subspace(rawPrefix=prefix)
+        )
        directory_layer._path = path
-        DirectorySubspace.__init__(self, path, prefix, directory_layer, b'partition')
+        DirectorySubspace.__init__(self, path, prefix, directory_layer, b"partition")

        self._parent_directory_layer = parent_directory_layer

    def __repr__(self):
-        return 'DirectoryPartition(path=' + repr(self._path) + ', prefix=' + repr(self.rawPrefix) + ')'
+        return (
+            "DirectoryPartition(path="
+            + repr(self._path)
+            + ", prefix="
+            + repr(self.rawPrefix)
+            + ")"
+        )

    def __getitem__(self, name):
-        raise Exception('Cannot open subspace in the root of a directory partition.')
+        raise Exception("Cannot open subspace in the root of a directory partition.")

    def key(self):
-        raise Exception('Cannot get key for the root of a directory partition.')
+        raise Exception("Cannot get key for the root of a directory partition.")

    def pack(self, t=tuple()):
-        raise Exception('Cannot pack keys using the root of a directory partition.')
+        raise Exception("Cannot pack keys using the root of a directory partition.")

    def unpack(self, key):
-        raise Exception('Cannot unpack keys using the root of a directory partition.')
+        raise Exception("Cannot unpack keys using the root of a directory partition.")

    def range(self, t=tuple()):
-        raise Exception('Cannot get range for the root of a directory partition.')
+        raise Exception("Cannot get range for the root of a directory partition.")

    def contains(self, key):
-        raise Exception('Cannot check whether a key belongs to the root of a directory partition.')
+        raise Exception(
+            "Cannot check whether a key belongs to the root of a directory partition."
+        )

    def as_foundationdb_key(self):
-        raise Exception('Cannot use the root of a directory partition as a key.')
+        raise Exception("Cannot use the root of a directory partition as a key.")

    def subspace(self, tuple):
-        raise Exception('Cannot open subspace in the root of a directory partition.')
+        raise Exception("Cannot open subspace in the root of a directory partition.")

    def _get_layer_for_path(self, path):
        if path == ():
@ -606,8 +709,7 @@ class DirectoryPartition(DirectorySubspace):
            return self._directory_layer


-class _Node (object):
-
+class _Node(object):
    def __init__(self, subspace, path, target_path):
        self.subspace = subspace
        self.path = path
@ -625,17 +727,23 @@ class _Node (object):

    def layer(self, tr=None):
        if tr:
-            self._layer = tr[self.subspace[b'layer']]
+            self._layer = tr[self.subspace[b"layer"]]
        elif self._layer is None:
-            raise Exception('Layer has not been read')
+            raise Exception("Layer has not been read")

        return self._layer

    def is_in_partition(self, tr=None, include_empty_subpath=False):
-        return self.exists() and self.layer(tr) == b'partition' and (include_empty_subpath or len(self.target_path) > len(self.path))
+        return (
+            self.exists()
+            and self.layer(tr) == b"partition"
+            and (include_empty_subpath or len(self.target_path) > len(self.path))
+        )

    def get_partition_subpath(self):
-        return self.target_path[len(self.path):]
+        return self.target_path[len(self.path) :]

    def get_contents(self, directory_layer, tr=None):
-        return directory_layer._contents_of_node(self.subspace, self.path, self.layer(tr))
+        return directory_layer._contents_of_node(
+            self.subspace, self.path, self.layer(tr)
+        )
--- a/bindings/python/fdb/impl.py
+++ b/bindings/python/fdb/impl.py
--- a/bindings/python/fdb/locality.py
+++ b/bindings/python/fdb/locality.py
@ -40,13 +40,15 @@ def _get_boundary_keys(db_or_tr, begin, end):
            lastbegin = begin
            tr.options.set_read_system_keys()
            tr.options.set_lock_aware()
-            kvs = tr.snapshot.get_range(b'\xff' + b'/keyServers/' + begin, b'\xff' + b'/keyServers/' + end)
+            kvs = tr.snapshot.get_range(
+                b"\xff" + b"/keyServers/" + begin, b"\xff" + b"/keyServers/" + end
+            )
            if first_time:
                first_time = False
                yield None  # trick to get the above get_range to be asynchronously dispatched before get_boundary_keys() returns.
            for kv in kvs:
                yield kv.key[13:]
-                begin = kv.key[13:] + b'\x00'
+                begin = kv.key[13:] + b"\x00"
            begin = end
        except _impl.FDBError as e:
            # if we get a transaction_too_old and *something* has happened, then we are no longer transactional
@ -71,4 +73,8 @@ def get_boundary_keys(db_or_tr, begin, end):
@_impl.transactional
 def get_addresses_for_key(tr, key):
    keyBytes = _impl.keyToBytes(key)
-    return _impl.FutureStringArray(tr.capi.fdb_transaction_get_addresses_for_key(tr.tpointer, keyBytes, len(keyBytes)))
+    return _impl.FutureStringArray(
+        tr.capi.fdb_transaction_get_addresses_for_key(
+            tr.tpointer, keyBytes, len(keyBytes)
+        )
+    )
--- a/bindings/python/fdb/subspace_impl.py
+++ b/bindings/python/fdb/subspace_impl.py
@ -23,13 +23,12 @@
 import fdb.tuple


-class Subspace (object):
-
-    def __init__(self, prefixTuple=tuple(), rawPrefix=b''):
+class Subspace(object):
+    def __init__(self, prefixTuple=tuple(), rawPrefix=b""):
        self.rawPrefix = fdb.tuple.pack(prefixTuple, prefix=rawPrefix)

    def __repr__(self):
-        return 'Subspace(rawPrefix=' + repr(self.rawPrefix) + ')'
+        return "Subspace(rawPrefix=" + repr(self.rawPrefix) + ")"

    def __getitem__(self, name):
        return Subspace((name,), self.rawPrefix)
@ -45,7 +44,7 @@ class Subspace (object):

    def unpack(self, key):
        if not self.contains(key):
-            raise ValueError('Cannot unpack key that is not in subspace.')
+            raise ValueError("Cannot unpack key that is not in subspace.")

        return fdb.tuple.unpack(key, prefix_len=len(self.rawPrefix))

--- a/bindings/python/fdb/tenant_management.py
+++ b/bindings/python/fdb/tenant_management.py
@ -25,9 +25,10 @@ https://apple.github.io/foundationdb/api-python.html"""

 from fdb import impl as _impl

-_tenant_map_prefix = b'\xff\xff/management/tenant/map/'
+_tenant_map_prefix = b"\xff\xff/management/tenant/map/"

-# If the existence_check_marker is an empty list, then check whether the tenant exists. 
+
+# If the existence_check_marker is an empty list, then check whether the tenant exists.
 # After the check, append an item to the existence_check_marker list so that subsequent
 # calls to this function will not perform the existence check.
 #
@ -37,11 +38,12 @@ def _check_tenant_existence(tr, key, existence_check_marker, force_maybe_commite
        existing_tenant = tr[key].wait()
        existence_check_marker.append(None)
        if force_maybe_commited:
-            raise _impl.FDBError(1021) # maybe_committed
+            raise _impl.FDBError(1021)  # maybe_committed
        return existing_tenant != None

    return None

+
 # Attempt to create a tenant in the cluster. If existence_check_marker is an empty
 # list, then this function will check if the tenant already exists and fail if it does.
 # Once the existence check is completed, it will not be done again if this function
@ -51,15 +53,23 @@ def _check_tenant_existence(tr, key, existence_check_marker, force_maybe_commite
 #
 # If the existence_check_marker is a non-empty list, then the existence check is skipped.
@_impl.transactional
-def _create_tenant_impl(tr, tenant_name, existence_check_marker, force_existence_check_maybe_committed=False):
+def _create_tenant_impl(
+    tr, tenant_name, existence_check_marker, force_existence_check_maybe_committed=False
+):
    tr.options.set_special_key_space_enable_writes()
-    key = b'%s%s' % (_tenant_map_prefix, tenant_name)
+    key = b"%s%s" % (_tenant_map_prefix, tenant_name)
+
+    if (
+        _check_tenant_existence(
+            tr, key, existence_check_marker, force_existence_check_maybe_committed
+        )
+        is True
+    ):
+        raise _impl.FDBError(2132)  # tenant_already_exists
+
+    tr[key] = b""

-    if _check_tenant_existence(tr, key, existence_check_marker, force_existence_check_maybe_committed) is True:
-        raise _impl.FDBError(2132) # tenant_already_exists

-    tr[key] = b''
-    
 # Attempt to delete a tenant from the cluster. If existence_check_marker is an empty
 # list, then this function will check if the tenant already exists and fail if it does
 # not. Once the existence check is completed, it will not be done again if this function
@ -69,15 +79,23 @@ def _create_tenant_impl(tr, tenant_name, existence_check_marker, force_existence
 #
 # If the existence_check_marker is a non-empty list, then the existence check is skipped.
@_impl.transactional
-def _delete_tenant_impl(tr, tenant_name, existence_check_marker, force_existence_check_maybe_committed=False):
+def _delete_tenant_impl(
+    tr, tenant_name, existence_check_marker, force_existence_check_maybe_committed=False
+):
    tr.options.set_special_key_space_enable_writes()
-    key = b'%s%s' % (_tenant_map_prefix, tenant_name)
+    key = b"%s%s" % (_tenant_map_prefix, tenant_name)

-    if _check_tenant_existence(tr, key, existence_check_marker, force_existence_check_maybe_committed) is False:
-        raise _impl.FDBError(2131) # tenant_not_found
+    if (
+        _check_tenant_existence(
+            tr, key, existence_check_marker, force_existence_check_maybe_committed
+        )
+        is False
+    ):
+        raise _impl.FDBError(2131)  # tenant_not_found

    del tr[key]

+
 class FDBTenantList(object):
    """Iterates over the results of list_tenants query. Returns
    KeyValue objects.
@ -96,6 +114,7 @@ class FDBTenantList(object):
            tenant_name = _impl.remove_prefix(next_item.key, _tenant_map_prefix)
            yield _impl.KeyValue(tenant_name, next_item.value)

+
 # Lists the tenants created in the cluster, specified by the begin and end range.
 # Also limited in number of results by the limit parameter.
 # Returns an iterable object that yields KeyValue objects
@ -104,29 +123,36 @@ class FDBTenantList(object):
@_impl.transactional
 def _list_tenants_impl(tr, begin, end, limit):
    tr.options.set_raw_access()
-    begin_key = b'%s%s' % (_tenant_map_prefix, begin)
-    end_key = b'%s%s' % (_tenant_map_prefix, end)
+    begin_key = b"%s%s" % (_tenant_map_prefix, begin)
+    end_key = b"%s%s" % (_tenant_map_prefix, end)

    rangeresult = tr.get_range(begin_key, end_key, limit)

    return FDBTenantList(rangeresult)

+
 def create_tenant(db_or_tr, tenant_name):
    tenant_name = _impl.process_tenant_name(tenant_name)

    # Only perform the existence check when run using a database
    # Callers using a transaction are expected to check existence themselves if required
-    existence_check_marker = [] if not isinstance(db_or_tr, _impl.TransactionRead) else [None]
+    existence_check_marker = (
+        [] if not isinstance(db_or_tr, _impl.TransactionRead) else [None]
+    )
    _create_tenant_impl(db_or_tr, tenant_name, existence_check_marker)

+
 def delete_tenant(db_or_tr, tenant_name):
    tenant_name = _impl.process_tenant_name(tenant_name)

    # Only perform the existence check when run using a database
    # Callers using a transaction are expected to check existence themselves if required
-    existence_check_marker = [] if not isinstance(db_or_tr, _impl.TransactionRead) else [None]
+    existence_check_marker = (
+        [] if not isinstance(db_or_tr, _impl.TransactionRead) else [None]
+    )
    _delete_tenant_impl(db_or_tr, tenant_name, existence_check_marker)

+
 def list_tenants(db_or_tr, begin, end, limit):
    begin = _impl.process_tenant_name(begin)
    end = _impl.process_tenant_name(end)
--- a/bindings/python/fdb/tuple.py
+++ b/bindings/python/fdb/tuple.py
@ -39,8 +39,8 @@ BYTES_CODE = 0x01
 STRING_CODE = 0x02
 NESTED_CODE = 0x05
 INT_ZERO_CODE = 0x14
-POS_INT_END = 0x1d
-NEG_INT_START = 0x0b
+POS_INT_END = 0x1D
+NEG_INT_START = 0x0B
 FLOAT_CODE = 0x20
 DOUBLE_CODE = 0x21
 FALSE_CODE = 0x26
@ -54,10 +54,10 @@ VERSIONSTAMP_CODE = 0x33
 def _find_terminator(v, pos):
    # Finds the start of the next terminator [\x00]![\xff] or the end of v
    while True:
-        pos = v.find(b'\x00', pos)
+        pos = v.find(b"\x00", pos)
        if pos < 0:
            return len(v)
-        if pos + 1 == len(v) or v[pos + 1:pos + 2] != b'\xff':
+        if pos + 1 == len(v) or v[pos + 1 : pos + 2] != b"\xff":
            return pos
        pos += 2

@ -66,9 +66,9 @@ def _find_terminator(v, pos):
 # If decoding and sign bit is 0 (negative), flip all of the bits. Otherwise, just flip sign.
 def _float_adjust(v, encode):
    if encode and six.indexbytes(v, 0) & 0x80 != 0x00:
-        return b''.join(map(lambda x: six.int2byte(x ^ 0xff), six.iterbytes(v)))
+        return b"".join(map(lambda x: six.int2byte(x ^ 0xFF), six.iterbytes(v)))
    elif not encode and six.indexbytes(v, 0) & 0x80 != 0x80:
-        return b''.join(map(lambda x: six.int2byte(x ^ 0xff), six.iterbytes(v)))
+        return b"".join(map(lambda x: six.int2byte(x ^ 0xFF), six.iterbytes(v)))
    else:
        return six.int2byte(six.indexbytes(v, 0) ^ 0x80) + v[1:]

@ -84,7 +84,9 @@ class SingleFloat(object):
        elif isinstance(value, six.integer_types):
            self.value = ctypes.c_float(value).value
        else:
-            raise ValueError("Incompatible type for single-precision float: " + repr(value))
+            raise ValueError(
+                "Incompatible type for single-precision float: " + repr(value)
+            )

    # Comparisons
    def __eq__(self, other):
@ -119,24 +121,42 @@ class Versionstamp(object):
    LENGTH = 12
    _TR_VERSION_LEN = 10
    _MAX_USER_VERSION = (1 << 16) - 1
-    _UNSET_TR_VERSION = 10 * six.int2byte(0xff)
-    _STRUCT_FORMAT_STRING = '>' + str(_TR_VERSION_LEN) + 'sH'
+    _UNSET_TR_VERSION = 10 * six.int2byte(0xFF)
+    _STRUCT_FORMAT_STRING = ">" + str(_TR_VERSION_LEN) + "sH"

    @classmethod
    def validate_tr_version(cls, tr_version):
        if tr_version is None:
            return
        if not isinstance(tr_version, bytes):
-            raise TypeError("Global version has illegal type " + str(type(tr_version)) + " (requires bytes)")
+            raise TypeError(
+                "Global version has illegal type "
+                + str(type(tr_version))
+                + " (requires bytes)"
+            )
        elif len(tr_version) != cls._TR_VERSION_LEN:
-            raise ValueError("Global version has incorrect length " + str(len(tr_version)) + " (requires " + str(cls._TR_VERSION_LEN) + ")")
+            raise ValueError(
+                "Global version has incorrect length "
+                + str(len(tr_version))
+                + " (requires "
+                + str(cls._TR_VERSION_LEN)
+                + ")"
+            )

    @classmethod
    def validate_user_version(cls, user_version):
        if not isinstance(user_version, six.integer_types):
-            raise TypeError("Local version has illegal type " + str(type(user_version)) + " (requires integer type)")
+            raise TypeError(
+                "Local version has illegal type "
+                + str(type(user_version))
+                + " (requires integer type)"
+            )
        elif user_version < 0 or user_version > cls._MAX_USER_VERSION:
-            raise ValueError("Local version has value " + str(user_version) + " which is out of range")
+            raise ValueError(
+                "Local version has value "
+                + str(user_version)
+                + " which is out of range"
+            )

    def __init__(self, tr_version=None, user_version=0):
        Versionstamp.validate_tr_version(tr_version)
@ -153,30 +173,50 @@ class Versionstamp(object):
        if not isinstance(v, bytes):
            raise TypeError("Cannot parse versionstamp from non-byte string")
        elif len(v) - start < cls.LENGTH:
-            raise ValueError("Versionstamp byte string is too short (only " + str(len(v) - start) + " bytes to read from")
+            raise ValueError(
+                "Versionstamp byte string is too short (only "
+                + str(len(v) - start)
+                + " bytes to read from"
+            )
        else:
-            tr_version = v[start:start + cls._TR_VERSION_LEN]
+            tr_version = v[start : start + cls._TR_VERSION_LEN]
            if tr_version == cls._UNSET_TR_VERSION:
                tr_version = None
-            user_version = six.indexbytes(v, start + cls._TR_VERSION_LEN) * (1 << 8) + six.indexbytes(v, start + cls._TR_VERSION_LEN + 1)
+            user_version = six.indexbytes(v, start + cls._TR_VERSION_LEN) * (
+                1 << 8
+            ) + six.indexbytes(v, start + cls._TR_VERSION_LEN + 1)
            return Versionstamp(tr_version, user_version)

    def is_complete(self):
        return self.tr_version is not None

    def __repr__(self):
-        return "fdb.tuple.Versionstamp(" + repr(self.tr_version) + ", " + repr(self.user_version) + ")"
+        return (
+            "fdb.tuple.Versionstamp("
+            + repr(self.tr_version)
+            + ", "
+            + repr(self.user_version)
+            + ")"
+        )

    def __str__(self):
-        return "Versionstamp(" + repr(self.tr_version) + ", " + str(self.user_version) + ")"
+        return (
+            "Versionstamp("
+            + repr(self.tr_version)
+            + ", "
+            + str(self.user_version)
+            + ")"
+        )

    def to_bytes(self):
        tr_version = self.tr_version
        if isinstance(tr_version, fdb.impl.Value):
            tr_version = tr_version.value
-        return struct.pack(self._STRUCT_FORMAT_STRING,
-                           tr_version if self.is_complete() else self._UNSET_TR_VERSION,
-                           self.user_version)
+        return struct.pack(
+            self._STRUCT_FORMAT_STRING,
+            tr_version if self.is_complete() else self._UNSET_TR_VERSION,
+            self.user_version,
+        )

    def completed(self, new_tr_version):
        if self.is_complete():
@ -187,7 +227,10 @@ class Versionstamp(object):
    # Comparisons
    def __eq__(self, other):
        if isinstance(other, Versionstamp):
-            return self.tr_version == other.tr_version and self.user_version == other.user_version
+            return (
+                self.tr_version == other.tr_version
+                and self.user_version == other.user_version
+            )
        else:
            return False

@ -224,18 +267,22 @@ def _decode(v, pos):
        return None, pos + 1
    elif code == BYTES_CODE:
        end = _find_terminator(v, pos + 1)
-        return v[pos + 1:end].replace(b"\x00\xFF", b"\x00"), end + 1
+        return v[pos + 1 : end].replace(b"\x00\xFF", b"\x00"), end + 1
    elif code == STRING_CODE:
        end = _find_terminator(v, pos + 1)
-        return v[pos + 1:end].replace(b"\x00\xFF", b"\x00").decode("utf-8"), end + 1
+        return v[pos + 1 : end].replace(b"\x00\xFF", b"\x00").decode("utf-8"), end + 1
    elif code >= INT_ZERO_CODE and code < POS_INT_END:
        n = code - 20
        end = pos + 1 + n
-        return struct.unpack(">Q", b'\x00' * (8 - n) + v[pos + 1:end])[0], end
+        return struct.unpack(">Q", b"\x00" * (8 - n) + v[pos + 1 : end])[0], end
    elif code > NEG_INT_START and code < INT_ZERO_CODE:
        n = 20 - code
        end = pos + 1 + n
-        return struct.unpack(">Q", b'\x00' * (8 - n) + v[pos + 1:end])[0] - _size_limits[n], end
+        return (
+            struct.unpack(">Q", b"\x00" * (8 - n) + v[pos + 1 : end])[0]
+            - _size_limits[n],
+            end,
+        )
    elif code == POS_INT_END:  # 0x1d; Positive 9-255 byte integer
        length = six.indexbytes(v, pos + 1)
        val = 0
@ -244,25 +291,37 @@ def _decode(v, pos):
            val += six.indexbytes(v, pos + 2 + i)
        return val, pos + 2 + length
    elif code == NEG_INT_START:  # 0x0b; Negative 9-255 byte integer
-        length = six.indexbytes(v, pos + 1) ^ 0xff
+        length = six.indexbytes(v, pos + 1) ^ 0xFF
        val = 0
        for i in _range(length):
            val = val << 8
            val += six.indexbytes(v, pos + 2 + i)
        return val - (1 << (length * 8)) + 1, pos + 2 + length
    elif code == FLOAT_CODE:
-        return SingleFloat(struct.unpack(">f", _float_adjust(v[pos + 1:pos + 5], False))[0]), pos + 5
+        return (
+            SingleFloat(
+                struct.unpack(">f", _float_adjust(v[pos + 1 : pos + 5], False))[0]
+            ),
+            pos + 5,
+        )
    elif code == DOUBLE_CODE:
-        return struct.unpack(">d", _float_adjust(v[pos + 1:pos + 9], False))[0], pos + 9
+        return (
+            struct.unpack(">d", _float_adjust(v[pos + 1 : pos + 9], False))[0],
+            pos + 9,
+        )
    elif code == UUID_CODE:
-        return uuid.UUID(bytes=v[pos + 1:pos + 17]), pos + 17
+        return uuid.UUID(bytes=v[pos + 1 : pos + 17]), pos + 17
    elif code == FALSE_CODE:
        if fdb.is_api_version_selected() and fdb.get_api_version() < 500:
-            raise ValueError("Invalid API version " + str(fdb._version) + " for boolean types")
+            raise ValueError(
+                "Invalid API version " + str(fdb._version) + " for boolean types"
+            )
        return False, pos + 1
    elif code == TRUE_CODE:
        if fdb.is_api_version_selected() and fdb.get_api_version() < 500:
-            raise ValueError("Invalid API version " + str(fdb._version) + " for boolean types")
+            raise ValueError(
+                "Invalid API version " + str(fdb._version) + " for boolean types"
+            )
        return True, pos + 1
    elif code == VERSIONSTAMP_CODE:
        return Versionstamp.from_bytes(v, pos + 1), pos + 1 + Versionstamp.LENGTH
@ -271,7 +330,7 @@ def _decode(v, pos):
        end_pos = pos + 1
        while end_pos < len(v):
            if six.indexbytes(v, end_pos) == 0x00:
-                if end_pos + 1 < len(v) and six.indexbytes(v, end_pos + 1) == 0xff:
+                if end_pos + 1 < len(v) and six.indexbytes(v, end_pos + 1) == 0xFF:
                    ret.append(None)
                    end_pos += 2
                else:
@ -299,11 +358,15 @@ def _reduce_children(child_values):


 if sys.version_info < (2, 7):
+
    def _bit_length(x):
-        s = bin(x)       # binary representation:  bin(-37) --> '-0b100101'
-        s = s.lstrip('-0b') # remove leading zeros and minus sign
+        s = bin(x)  # binary representation:  bin(-37) --> '-0b100101'
+        s = s.lstrip("-0b")  # remove leading zeros and minus sign
        return len(s)
+
+
 else:
+
    def _bit_length(x):
        return x.bit_length()

@ -314,23 +377,33 @@ def _encode(value, nested=False):
    # sorting need to work too!
    if value == None:  # ==, not is, because some fdb.impl.Value are equal to None
        if nested:
-            return b''.join([six.int2byte(NULL_CODE), six.int2byte(0xff)]), -1
+            return b"".join([six.int2byte(NULL_CODE), six.int2byte(0xFF)]), -1
        else:
-            return b''.join([six.int2byte(NULL_CODE)]), -1
+            return b"".join([six.int2byte(NULL_CODE)]), -1
    elif isinstance(value, bytes):  # also gets non-None fdb.impl.Value
-        return six.int2byte(BYTES_CODE) + value.replace(b'\x00', b'\x00\xFF') + b'\x00', -1
+        return (
+            six.int2byte(BYTES_CODE) + value.replace(b"\x00", b"\x00\xFF") + b"\x00",
+            -1,
+        )
    elif isinstance(value, six.text_type):
-        return six.int2byte(STRING_CODE) + value.encode('utf-8').replace(b'\x00', b'\x00\xFF') + b'\x00', -1
-    elif isinstance(value, six.integer_types) and (not isinstance(value, bool) or (hasattr(fdb, '_version') and fdb._version < 500)):
+        return (
+            six.int2byte(STRING_CODE)
+            + value.encode("utf-8").replace(b"\x00", b"\x00\xFF")
+            + b"\x00",
+            -1,
+        )
+    elif isinstance(value, six.integer_types) and (
+        not isinstance(value, bool) or (hasattr(fdb, "_version") and fdb._version < 500)
+    ):
        if value == 0:
-            return b''.join([six.int2byte(INT_ZERO_CODE)]), -1
+            return b"".join([six.int2byte(INT_ZERO_CODE)]), -1
        elif value > 0:
            if value >= _size_limits[-1]:
                length = (_bit_length(value) + 7) // 8
                data = [six.int2byte(POS_INT_END), six.int2byte(length)]
                for i in _range(length - 1, -1, -1):
-                    data.append(six.int2byte((value >> (8 * i)) & 0xff))
-                return b''.join(data), -1
+                    data.append(six.int2byte((value >> (8 * i)) & 0xFF))
+                return b"".join(data), -1

            n = bisect_left(_size_limits, value)
            return six.int2byte(INT_ZERO_CODE + n) + struct.pack(">Q", value)[-n:], -1
@ -338,34 +411,53 @@ def _encode(value, nested=False):
            if -value >= _size_limits[-1]:
                length = (_bit_length(value) + 7) // 8
                value += (1 << (length * 8)) - 1
-                data = [six.int2byte(NEG_INT_START), six.int2byte(length ^ 0xff)]
+                data = [six.int2byte(NEG_INT_START), six.int2byte(length ^ 0xFF)]
                for i in _range(length - 1, -1, -1):
-                    data.append(six.int2byte((value >> (8 * i)) & 0xff))
-                return b''.join(data), -1
+                    data.append(six.int2byte((value >> (8 * i)) & 0xFF))
+                return b"".join(data), -1

            n = bisect_left(_size_limits, -value)
            maxv = _size_limits[n]
-            return six.int2byte(INT_ZERO_CODE - n) + struct.pack(">Q", maxv + value)[-n:], -1
+            return (
+                six.int2byte(INT_ZERO_CODE - n) + struct.pack(">Q", maxv + value)[-n:],
+                -1,
+            )
    elif isinstance(value, ctypes.c_float) or isinstance(value, SingleFloat):
-        return six.int2byte(FLOAT_CODE) + _float_adjust(struct.pack(">f", value.value), True), -1
+        return (
+            six.int2byte(FLOAT_CODE)
+            + _float_adjust(struct.pack(">f", value.value), True),
+            -1,
+        )
    elif isinstance(value, ctypes.c_double):
-        return six.int2byte(DOUBLE_CODE) + _float_adjust(struct.pack(">d", value.value), True), -1
+        return (
+            six.int2byte(DOUBLE_CODE)
+            + _float_adjust(struct.pack(">d", value.value), True),
+            -1,
+        )
    elif isinstance(value, float):
-        return six.int2byte(DOUBLE_CODE) + _float_adjust(struct.pack(">d", value), True), -1
+        return (
+            six.int2byte(DOUBLE_CODE) + _float_adjust(struct.pack(">d", value), True),
+            -1,
+        )
    elif isinstance(value, uuid.UUID):
        return six.int2byte(UUID_CODE) + value.bytes, -1
    elif isinstance(value, bool):
        if value:
-            return b''.join([six.int2byte(TRUE_CODE)]), -1
+            return b"".join([six.int2byte(TRUE_CODE)]), -1
        else:
-            return b''.join([six.int2byte(FALSE_CODE)]), -1
+            return b"".join([six.int2byte(FALSE_CODE)]), -1
    elif isinstance(value, Versionstamp):
        version_pos = -1 if value.is_complete() else 1
        return six.int2byte(VERSIONSTAMP_CODE) + value.to_bytes(), version_pos
    elif isinstance(value, tuple) or isinstance(value, list):
-        child_bytes, version_pos = _reduce_children(map(lambda x: _encode(x, True), value))
+        child_bytes, version_pos = _reduce_children(
+            map(lambda x: _encode(x, True), value)
+        )
        new_version_pos = -1 if version_pos < 0 else version_pos + 1
-        return b''.join([six.int2byte(NESTED_CODE)] + child_bytes + [six.int2byte(0x00)]), new_version_pos
+        return (
+            b"".join([six.int2byte(NESTED_CODE)] + child_bytes + [six.int2byte(0x00)]),
+            new_version_pos,
+        )
    else:
        raise ValueError("Unsupported data type: " + str(type(value)))

@ -387,13 +479,13 @@ def _pack_maybe_with_versionstamp(t, prefix=None):
        version_pos += len(prefix) if prefix is not None else 0
        bytes_list.extend(child_bytes)
        if fdb.is_api_version_selected() and fdb.get_api_version() < 520:
-            bytes_list.append(struct.pack('<H', version_pos))
+            bytes_list.append(struct.pack("<H", version_pos))
        else:
-            bytes_list.append(struct.pack('<L', version_pos))
+            bytes_list.append(struct.pack("<L", version_pos))
    else:
        bytes_list.extend(child_bytes)

-    return b''.join(bytes_list), version_pos
+    return b"".join(bytes_list), version_pos


 # packs the specified tuple into a key
@ -408,7 +500,9 @@ def pack(t, prefix=None):
 def pack_with_versionstamp(t, prefix=None):
    res, version_pos = _pack_maybe_with_versionstamp(t, prefix)
    if version_pos < 0:
-        raise ValueError("No incomplete versionstamp included in tuple pack with versionstamp")
+        raise ValueError(
+            "No incomplete versionstamp included in tuple pack with versionstamp"
+        )
    return res


@ -433,6 +527,7 @@ def has_incomplete_versionstamp(t):
            return has_incomplete_versionstamp(item)
        else:
            return False
+
    return any(map(_elem_has_incomplete, t))


@ -450,9 +545,7 @@ def range(t):
        raise Exception("fdbtuple range() expects a tuple, got a " + str(type(t)))

    p = pack(t)
-    return slice(
-        p + b'\x00',
-        p + b'\xff')
+    return slice(p + b"\x00", p + b"\xff")


 def _code_for(value):
@ -462,7 +555,9 @@ def _code_for(value):
        return BYTES_CODE
    elif isinstance(value, six.text_type):
        return STRING_CODE
-    elif (not hasattr(fdb, '_version') or fdb._version >= 500) and isinstance(value, bool):
+    elif (not hasattr(fdb, "_version") or fdb._version >= 500) and isinstance(
+        value, bool
+    ):
        return FALSE_CODE
    elif isinstance(value, six.integer_types):
        return INT_ZERO_CODE
@ -514,8 +609,8 @@ def _compare_values(value1, value2):
    if code1 == NULL_CODE:
        return 0
    elif code1 == STRING_CODE:
-        encoded1 = value1.encode('utf-8')
-        encoded2 = value2.encode('utf-8')
+        encoded1 = value1.encode("utf-8")
+        encoded2 = value2.encode("utf-8")
        return -1 if encoded1 < encoded2 else 0 if encoded1 == encoded2 else 1
    elif code1 == FLOAT_CODE:
        f1 = value1 if isinstance(value1, SingleFloat) else SingleFloat(value1.value)
--- a/bindings/python/tests/cancellation_timeout_tests.py
+++ b/bindings/python/tests/cancellation_timeout_tests.py
@ -518,7 +518,7 @@ def test_timeouts(db):
        for i in range(2):
            tr.options.set_timeout(1500)
            tr.set_read_version(0x7ffffffffffffff0)
-            x = tr[b'foo']
+            _ = tr[b'foo']
            try:
                tr.commit().wait()
                tr.reset()
@ -557,7 +557,7 @@ def test_db_timeouts(db):
        tr[b'foo'] = b'bar'
        tr.on_error(err).wait()  # should not throw
        time.sleep(1)
-        tr[b'foo']
+        _ = tr[b'foo']
        try:
            tr.commit().wait()  # should throw
            raise TestError("(2) Timeout didn't fire.")
@ -574,7 +574,7 @@ def test_db_timeouts(db):
        time.sleep(0.75)
        tr[b'foo'] = b'bar'
        tr.on_error(err).wait()  # should not throw
-        tr[b'foo']
+        _ = tr[b'foo']
        time.sleep(0.75)
        try:
            tr.commit().wait()  # should throw
@ -615,7 +615,7 @@ def test_db_timeouts(db):
        tr.reset()
        tr[b'foo'] = b'bar'
        time.sleep(0.2)
-        tr.on_error(err).wait()  #should not throw
+        tr.on_error(err).wait()  # should not throw
        tr[b'foo'] = b'bar'
        time.sleep(0.8)
        try:
--- a/bindings/python/tests/size_limit_tests.py
+++ b/bindings/python/tests/size_limit_tests.py
@ -24,15 +24,18 @@ import sys
 if __name__ == '__main__':
    fdb.api_version(720)

+
@fdb.transactional
 def setValue(tr, key, value):
    tr[key] = value

+
@fdb.transactional
 def setValueWithLimit(tr, key, value, limit):
    tr.options.set_size_limit(limit)
    tr[key] = value

+
 def test_size_limit_option(db):
    value = b'a' * 1024

@ -69,6 +72,7 @@ def test_size_limit_option(db):
    # Reset the size limit for future tests
    db.options.set_transaction_size_limit(10000000)

+
@fdb.transactional
 def test_get_approximate_size(tr):
    tr[b'key1'] = b'value1'
@ -90,6 +94,7 @@ def test_get_approximate_size(tr):
    s5 = tr.get_approximate_size().wait()
    assert(s4 < s5)

+
 # Expect a cluster file as input. This test will write to the FDB cluster, so
 # be aware of potential side effects.
 if __name__ == '__main__':
--- a/bindings/python/tests/tenant_tests.py
+++ b/bindings/python/tests/tenant_tests.py
@ -27,24 +27,26 @@ from fdb.tuple import pack
 if __name__ == '__main__':
    fdb.api_version(720)

+
 def cleanup_tenant(db, tenant_name):
    try:
        tenant = db.open_tenant(tenant_name)
        del tenant[:]
        fdb.tenant_management.delete_tenant(db, tenant_name)
    except fdb.FDBError as e:
-        if e.code == 2131: # tenant not found
+        if e.code == 2131:  # tenant not found
            pass
        else:
            raise

+
 def test_tenant_tuple_name(db):
-    tuplename=(b'test', b'level', b'hierarchy', 3, 1.24, 'str')
+    tuplename = (b'test', b'level', b'hierarchy', 3, 1.24, 'str')
    cleanup_tenant(db, tuplename)

    fdb.tenant_management.create_tenant(db, tuplename)

-    tenant=db.open_tenant(tuplename)
+    tenant = db.open_tenant(tuplename)
    tenant[b'foo'] = b'bar'

    assert tenant[b'foo'] == b'bar'
@ -100,7 +102,7 @@ def test_tenant_operations(db):
        del tr1[:]
        tr1.commit().wait()
    except fdb.FDBError as e:
-        tr.on_error(e).wait()
+        tr1.on_error(e).wait()

    assert tenant1[b'tenant_test_key'] == None
    assert db[prefix1 + b'tenant_test_key'] == None
@ -113,7 +115,7 @@ def test_tenant_operations(db):
        tenant1[b'tenant_test_key']
        assert False
    except fdb.FDBError as e:
-        assert e.code == 2131 # tenant not found
+        assert e.code == 2131  # tenant not found

    del tenant2[:]
    fdb.tenant_management.delete_tenant(db, b'tenant2')
@ -126,6 +128,7 @@ def test_tenant_operations(db):

    assert db[b'tenant_test_key'] == None

+
 def test_tenant_operation_retries(db):
    cleanup_tenant(db, b'tenant1')
    cleanup_tenant(db, b'tenant2')
@ -138,7 +141,7 @@ def test_tenant_operation_retries(db):
        fdb.tenant_management.create_tenant(db, b'tenant1')
        assert False
    except fdb.FDBError as e:
-        assert e.code == 2132 # tenant already exists
+        assert e.code == 2132  # tenant already exists

    # Using a transaction skips the existence check
    tr = db.create_transaction()
@ -166,7 +169,7 @@ def test_tenant_operation_retries(db):
        fdb.tenant_management.delete_tenant(db, b'tenant1')
        assert False
    except fdb.FDBError as e:
-        assert e.code == 2131 # tenant not found
+        assert e.code == 2131  # tenant not found

    # Using a transaction skips the existence check
    tr = db.create_transaction()
@ -186,11 +189,13 @@ def test_tenant_operation_retries(db):
    except fdb.FDBError as e:
        tr.on_error(e).wait()

+
 def test_tenants(db):
    test_tenant_tuple_name(db)
    test_tenant_operations(db)
    test_tenant_operation_retries(db)

+
 # Expect a cluster file as input. This test will write to the FDB cluster, so
 # be aware of potential side effects.
 if __name__ == '__main__':
--- a/bindings/python/tests/tester.py
+++ b/bindings/python/tests/tester.py
@ -26,7 +26,6 @@ import sys
 import os
 import struct
 import threading
-import time
 import random
 import time
 import traceback
@ -136,7 +135,7 @@ def test_fdb_transactional_generator(db):
        def function_that_yields(tr):
            yield 0
        assert fdb.get_api_version() < 630, "Pre-6.3, a decorator may wrap a function that yields"
-    except ValueError as e:
+    except ValueError:
        assert fdb.get_api_version() >= 630, "Post-6.3, a decorator should throw if wrapped function yields"


@ -144,12 +143,13 @@ def test_fdb_transactional_returns_generator(db):
    try:
        def function_that_yields(tr):
            yield 0
+
        @fdb.transactional
        def function_that_returns(tr):
            return function_that_yields(tr)
        function_that_returns()
        assert fdb.get_api_version() < 630, "Pre-6.3, returning a generator is allowed"
-    except ValueError as e:
+    except ValueError:
        assert fdb.get_api_version() >= 630, "Post-6.3, returning a generator should throw"


@ -400,11 +400,11 @@ class Tester:
                        inst.push(f)
                elif inst.op == six.u("GET_ESTIMATED_RANGE_SIZE"):
                    begin, end = inst.pop(2)
-                    estimatedSize = obj.get_estimated_range_size_bytes(begin, end).wait()
+                    obj.get_estimated_range_size_bytes(begin, end).wait()
                    inst.push(b"GOT_ESTIMATED_RANGE_SIZE")
                elif inst.op == six.u("GET_RANGE_SPLIT_POINTS"):
                    begin, end, chunkSize = inst.pop(3)
-                    estimatedSize = obj.get_range_split_points(begin, end, chunkSize).wait()
+                    obj.get_range_split_points(begin, end, chunkSize).wait()
                    inst.push(b"GOT_RANGE_SPLIT_POINTS")
                elif inst.op == six.u("GET_KEY"):
                    key, or_equal, offset, prefix = inst.pop(4)
@ -522,7 +522,7 @@ class Tester:
                    self.last_version = inst.tr.get_committed_version()
                    inst.push(b"GOT_COMMITTED_VERSION")
                elif inst.op == six.u("GET_APPROXIMATE_SIZE"):
-                    approximate_size = inst.tr.get_approximate_size().wait()
+                    inst.tr.get_approximate_size().wait()
                    inst.push(b"GOT_APPROXIMATE_SIZE")
                elif inst.op == six.u("GET_VERSIONSTAMP"):
                    inst.push(inst.tr.get_versionstamp())
@ -613,9 +613,9 @@ class Tester:
                        result += [tenant.key]
                        try:
                            metadata = json.loads(tenant.value)
-                            id =  metadata["id"]
-                            prefix = metadata["prefix"]
-                        except (json.decoder.JSONDecodeError, KeyError) as e:
+                            _ = metadata["id"]
+                            _ = metadata["prefix"]
+                        except (json.decoder.JSONDecodeError, KeyError):
                            assert False, "Invalid Tenant Metadata"
                    inst.push(fdb.tuple.pack(tuple(result)))
                elif inst.op == six.u("UNIT_TESTS"):
--- a/bindings/python/tests/tuple_tests.py
+++ b/bindings/python/tests/tuple_tests.py
@ -173,7 +173,7 @@ def tupleTest(N=10000):
            print("Prefix not before prefixed:\n    Tuple: %s\n    Bytes: %s\n    Other: %s\n    Bytes: %s" % (t, repr(pack(t)), t2, repr(pack(t2))))
            return False

-    print ("Tuple check %d OK" % N)
+    print("Tuple check %d OK" % N)
    return True

 # test:
--- a/cmake/AddFdbTest.cmake
+++ b/cmake/AddFdbTest.cmake
@ -622,3 +622,39 @@ function(add_java_test)
            -Djava.library.path=${CMAKE_BINARY_DIR}/lib
            ${T_CLASS} "@CLUSTER_FILE@")
 endfunction()
+
+# Adds a FDB test implemented by a script that does the full setup, such as creating cluster
+# and running client binaries as necessary
+function(add_scripted_fdb_test)
+  set(options DISABLED ENABLED)
+  set(oneValueArgs NAME TEST_TIMEOUT)
+  set(multiValueArgs COMMAND)
+  cmake_parse_arguments(T "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")
+  if(OPEN_FOR_IDE)
+    return()
+  endif()
+  if(NOT T_ENABLED AND T_DISABLED)
+    return()
+  endif()
+  if(NOT T_NAME)
+    message(FATAL_ERROR "NAME is a required argument for add_scripted_fdb_test")
+  endif()
+  if(NOT T_COMMAND)
+    message(FATAL_ERROR "COMMAND is a required argument for add_scripted_fdb_test")
+  endif()
+  message(STATUS "Adding Scripted FDB test ${T_NAME}")
+  add_test(NAME "${T_NAME}"
+    COMMAND ${T_COMMAND})
+  set_tests_properties("${T_NAME}" PROPERTIES ENVIRONMENT
+    "${SANITIZER_OPTIONS};PYTHONPATH=${CMAKE_SOURCE_DIR}/tests/TestRunner:${CMAKE_BINARY_DIR}/tests/TestRunner")
+  if (T_TEST_TIMEOUT)
+    set_tests_properties("${T_NAME}" PROPERTIES TIMEOUT ${T_TEST_TIMEOUT})
+  else()
+    # default timeout
+    if(USE_SANITIZER)
+      set_tests_properties("${T_NAME}" PROPERTIES TIMEOUT 1200)
+    else()
+      set_tests_properties("${T_NAME}" PROPERTIES TIMEOUT 300)
+    endif()
+  endif()
+endfunction()
--- a/cmake/CompileRocksDB.cmake
+++ b/cmake/CompileRocksDB.cmake
@ -1,6 +1,6 @@
 # FindRocksDB

-find_package(RocksDB 6.27.3)
+find_package(RocksDB 7.7.3)

 include(ExternalProject)

@ -49,8 +49,8 @@ if(ROCKSDB_FOUND)
      ${BINARY_DIR}/librocksdb.a)
 else()
  ExternalProject_Add(rocksdb
-    URL https://github.com/facebook/rocksdb/archive/refs/tags/v6.27.3.tar.gz
-    URL_HASH SHA256=ee29901749b9132692b26f0a6c1d693f47d1a9ed8e3771e60556afe80282bf58
+    URL https://github.com/facebook/rocksdb/archive/refs/tags/v7.7.3.tar.gz
+    URL_HASH SHA256=b8ac9784a342b2e314c821f6d701148912215666ac5e9bdbccd93cf3767cb611
    CMAKE_ARGS ${RocksDB_CMAKE_ARGS}
    BUILD_BYPRODUCTS <BINARY_DIR>/librocksdb.a
    INSTALL_COMMAND ""
--- a/contrib/grv_proxy_model/grv_test.py
+++ b/contrib/grv_proxy_model/grv_test.py
@ -42,6 +42,7 @@ parser.add_argument('--no-graph', action='store_true', default=False, help='Disa

 args = parser.parse_args()

+
 def print_choices_list(context=None):
    if context == 'workload' or context is None:
        print('Workloads:')
@ -70,6 +71,7 @@ def print_choices_list(context=None):
                    name = name[0:-len('Limiter')]
                print('  %s' % name)

+
 if args.workload is None or args.ratekeeper is None:
    print('ERROR: A workload (-w/--workload) and ratekeeper model (-r/--ratekeeper) must be specified.\n')
    print_choices_list()
@ -79,16 +81,18 @@ if args.list:
    print_choices_list()
    sys.exit(0)

+
 def validate_class_type(var, name, superclass):
    cls = getattr(var, name, None)
    return cls is not None and inspect.isclass(cls) and issubclass(cls, superclass)

-if not args.ratekeeper in ratekeeper_model.predefined_ratekeeper:
+
+if args.ratekeeper not in ratekeeper_model.predefined_ratekeeper:
    print('Invalid ratekeeper model `%s\'' % args.ratekeeper)
    print_choices_list('ratekeeper')
    sys.exit(1)

-if not args.workload in workload_model.predefined_workloads:
+if args.workload not in workload_model.predefined_workloads:
    print('Invalid workload model `%s\'' % args.workload)
    print_choices_list('workload')
    sys.exit(1)
@ -120,11 +124,11 @@ for priority in workload.priorities():
    still_queued = sum([r.count for r in proxy.request_queue if r.priority == priority])

    if len(latencies) > 0:
-        print('\n%s: %d requests in %d seconds (rate=%f). %d still queued.' % (priority, total_started, proxy.time, float(total_started)/proxy.time, still_queued))
-        print('  Median latency: %f' % latencies[len(latencies)//2])
-        print('  90%% latency: %f' % latencies[int(0.9*len(latencies))])
-        print('  99%% latency: %f' % latencies[int(0.99*len(latencies))])
-        print('  99.9%% latency: %f' % latencies[int(0.999*len(latencies))])
+        print('\n%s: %d requests in %d seconds (rate=%f). %d still queued.' % (priority, total_started, proxy.time, float(total_started) / proxy.time, still_queued))
+        print('  Median latency: %f' % latencies[len(latencies) // 2])
+        print('  90%% latency: %f' % latencies[int(0.9 * len(latencies))])
+        print('  99%% latency: %f' % latencies[int(0.99 * len(latencies))])
+        print('  99.9%% latency: %f' % latencies[int(0.999 * len(latencies))])
        print('  Max latency: %f' % latencies[-1])

 print('')
--- a/contrib/grv_proxy_model/plot.py
+++ b/contrib/grv_proxy_model/plot.py
@ -20,6 +20,7 @@

 import matplotlib.pyplot as plt

+
 class Plotter:
    def __init__(self, results):
        self.results = results
@ -28,13 +29,13 @@ class Plotter:
        out_data = {}
        counts = {}
        for t in data.keys():
-            out_data.setdefault(t//time_resolution*time_resolution, 0)
-            counts.setdefault(t//time_resolution*time_resolution, 0)
-            out_data[t//time_resolution*time_resolution] += data[t]
-            counts[t//time_resolution*time_resolution] += 1
+            out_data.setdefault(t // time_resolution * time_resolution, 0)
+            counts.setdefault(t // time_resolution * time_resolution, 0)
+            out_data[t // time_resolution * time_resolution] += data[t]
+            counts[t // time_resolution * time_resolution] += 1

        if use_avg:
-            out_data = { t: v/counts[t] for t,v in out_data.items() }
+            out_data = {t: v / counts[t] for t, v in out_data.items()}

        plt.plot(list(out_data.keys()), list(out_data.values()), label=label)

@ -42,7 +43,7 @@ class Plotter:
        plt.plot(list(data.keys()), list(data.values()), label=label)

    def display(self, time_resolution=0.1):
-        plt.figure(figsize=(40,9))
+        plt.figure(figsize=(40, 9))
        plt.subplot(3, 3, 1)
        for priority in self.results.started.keys():
            Plotter.add_plot(self.results.started[priority], time_resolution, priority)
@ -61,7 +62,7 @@ class Plotter:

        plt.subplot(3, 3, 3)
        for priority in self.results.unprocessed_queue_sizes.keys():
-            data = {k: max(v) for (k,v) in self.results.unprocessed_queue_sizes[priority].items()}
+            data = {k: max(v) for (k, v) in self.results.unprocessed_queue_sizes[priority].items()}
            Plotter.add_plot(data, time_resolution, priority)

        plt.xlabel('Time (s)')
@ -71,9 +72,11 @@ class Plotter:
        num = 4
        for priority in self.results.latencies.keys():
            plt.subplot(3, 3, num)
-            median_latencies = {k: v[int(0.5*len(v))] if len(v) > 0 else 0 for (k,v) in self.results.latencies[priority].items()}
-            percentile90_latencies = {k: v[int(0.9*len(v))] if len(v) > 0 else 0 for (k,v) in self.results.latencies[priority].items()}
-            max_latencies = {k: max(v) if len(v) > 0 else 0 for (k,v) in self.results.latencies[priority].items()}
+            median_latencies = {k: v[int(0.5 * len(v))] if len(v) > 0 else 0 for (k, v) in
+                                self.results.latencies[priority].items()}
+            percentile90_latencies = {k: v[int(0.9 * len(v))] if len(v) > 0 else 0 for (k, v) in
+                                      self.results.latencies[priority].items()}
+            max_latencies = {k: max(v) if len(v) > 0 else 0 for (k, v) in self.results.latencies[priority].items()}

            Plotter.add_plot(median_latencies, time_resolution, 'median')
            Plotter.add_plot(percentile90_latencies, time_resolution, '90th percentile')
@ -94,7 +97,8 @@ class Plotter:
            if len(self.results.limit[priority]) > 0:
                Plotter.add_plot(self.results.limit[priority], time_resolution, 'Limit', use_avg=True)
            if len(self.results.limit_and_budget[priority]) > 0:
-                Plotter.add_plot(self.results.limit_and_budget[priority], time_resolution, 'Limit and budget', use_avg=True)
+                Plotter.add_plot(self.results.limit_and_budget[priority], time_resolution, 'Limit and budget',
+                                 use_avg=True)
            if len(self.results.budget[priority]) > 0:
                Plotter.add_plot(self.results.budget[priority], time_resolution, 'Budget', use_avg=True)

@ -104,4 +108,3 @@ class Plotter:
            num += 1

        plt.show()
-
--- a/contrib/grv_proxy_model/priority.py
+++ b/contrib/grv_proxy_model/priority.py
@ -20,6 +20,7 @@

 import functools

+
@functools.total_ordering
 class Priority:
    def __init__(self, priority_value, label):
@ -35,6 +36,7 @@ class Priority:
    def __repr__(self):
        return repr(self.label)

+
 Priority.SYSTEM = Priority(0, "System")
 Priority.DEFAULT = Priority(1, "Default")
 Priority.BATCH = Priority(2, "Batch")
--- a/contrib/grv_proxy_model/proxy_model.py
+++ b/contrib/grv_proxy_model/proxy_model.py
@ -25,6 +25,7 @@ import heapq
 from priority import Priority
 from smoother import Smoother

+
@functools.total_ordering
 class Task:
    def __init__(self, time, fxn):
@ -34,6 +35,7 @@ class Task:
    def __lt__(self, other):
        return self.time < other.time

+
 class Limiter:
    class UpdateRateParams:
        def __init__(self, time):
@ -79,6 +81,7 @@ class Limiter:
    def update_budget(self, params):
        pass

+
 class OriginalLimiter(Limiter):
    def __init__(self, priority, limit_rate_model, proxy_model):
        Limiter.__init__(self, priority, limit_rate_model, proxy_model)
@ -100,6 +103,7 @@ class OriginalLimiter(Limiter):
    def update_budget(self, params):
        self.limit -= params.num_started

+
 class PositiveBudgetLimiter(OriginalLimiter):
    def __init__(self, priority, limit_rate_model, proxy_model):
        OriginalLimiter.__init__(self, priority, limit_rate_model, proxy_model)
@ -108,6 +112,7 @@ class PositiveBudgetLimiter(OriginalLimiter):
        self.limit += params.elapsed * self.rate
        self.limit = min(self.limit, 2.0 * self.rate)

+
 class ClampedBudgetLimiter(PositiveBudgetLimiter):
    def __init__(self, priority, limit_rate_model, proxy_model):
        PositiveBudgetLimiter.__init__(self, priority, limit_rate_model, proxy_model)
@ -117,6 +122,7 @@ class ClampedBudgetLimiter(PositiveBudgetLimiter):
        if self.limit > min_budget:
            self.limit = max(self.limit - params.num_started, min_budget)

+
 class TimeLimiter(PositiveBudgetLimiter):
    def __init__(self, priority, limit_rate_model, proxy_model):
        PositiveBudgetLimiter.__init__(self, priority, limit_rate_model, proxy_model)
@ -126,15 +132,17 @@ class TimeLimiter(PositiveBudgetLimiter):
        return params.time >= self.locked_until and PositiveBudgetLimiter.can_start(self, params)

    def update_budget(self, params):
-        #print('Start update budget: time=%f, limit=%f, locked_until=%f, num_started=%d, priority=%s, min_priority=%s, last_batch=%d' % (params.time, self.limit, self.locked_until, params.num_started, self.priority, params.min_priority, params.last_batch))
+        # print('Start update budget: time=%f, limit=%f, locked_until=%f, num_started=%d, priority=%s, min_priority=%s, last_batch=%d' % (params.time, self.limit, self.locked_until, params.num_started, self.priority, params.min_priority, params.last_batch))

        if params.min_priority >= self.priority or params.num_started < self.limit:
            self.limit -= params.num_started
        else:
            self.limit = min(self.limit, max(self.limit - params.num_started, -params.last_batch))
-            self.locked_until = min(params.time + 2.0, max(params.time, self.locked_until) + (params.num_started - self.limit)/self.rate)
+            self.locked_until = min(params.time + 2.0,
+                                    max(params.time, self.locked_until) + (params.num_started - self.limit) / self.rate)
+
+        # print('End update budget: time=%f, limit=%f, locked_until=%f, num_started=%d, priority=%s, min_priority=%s' % (params.time, self.limit, self.locked_until, params.num_started, self.priority, params.min_priority))

-        #print('End update budget: time=%f, limit=%f, locked_until=%f, num_started=%d, priority=%s, min_priority=%s' % (params.time, self.limit, self.locked_until, params.num_started, self.priority, params.min_priority))

 class TimePositiveBudgetLimiter(PositiveBudgetLimiter):
    def __init__(self, priority, limit_rate_model, proxy_model):
@ -149,17 +157,18 @@ class TimePositiveBudgetLimiter(PositiveBudgetLimiter):
        return params.num_started + params.count <= self.limit

    def update_budget(self, params):
-        #if params.num_started > 0:
-            #print('Start update budget: time=%f, limit=%f, locked_until=%f, num_started=%d, priority=%s, min_priority=%s, last_batch=%d' % (params.time, self.limit, self.locked_until, params.num_started, self.priority, params.min_priority, params.last_batch))
+        # if params.num_started > 0:
+        # print('Start update budget: time=%f, limit=%f, locked_until=%f, num_started=%d, priority=%s, min_priority=%s, last_batch=%d' % (params.time, self.limit, self.locked_until, params.num_started, self.priority, params.min_priority, params.last_batch))

        if params.num_started > self.limit:
-            self.locked_until = min(params.time + 2.0, max(params.time, self.locked_until) + penalty/self.rate)
+            self.locked_until = min(params.time + 2.0, max(params.time, self.locked_until) + (params.num_started - self.limit) / self.rate)
            self.limit = 0
        else:
            self.limit -= params.num_started

-        #if params.num_started > 0:
-            #print('End update budget: time=%f, limit=%f, locked_until=%f, num_started=%d, priority=%s, min_priority=%s' % (params.time, self.limit, self.locked_until, params.num_started, self.priority, params.min_priority))
+        # if params.num_started > 0:
+        # print('End update budget: time=%f, limit=%f, locked_until=%f, num_started=%d, priority=%s, min_priority=%s' % (params.time, self.limit, self.locked_until, params.num_started, self.priority, params.min_priority))
+

 class SmoothingLimiter(OriginalLimiter):
    def __init__(self, priority, limit_rate_model, proxy_model):
@ -177,7 +186,8 @@ class SmoothingLimiter(OriginalLimiter):
            self.smooth_rate_limit.set_total(params.time, self.rate)

    def update_limit(self, params):
-        self.limit = 2.0 * (self.smooth_rate_limit.smooth_total(params.time) - self.smooth_released.smooth_rate(params.time))
+        self.limit = 2.0 * (
+            self.smooth_rate_limit.smooth_total(params.time) - self.smooth_released.smooth_rate(params.time))

    def can_start(self, params):
        return params.num_started + params.count <= self.limit
@ -185,15 +195,17 @@ class SmoothingLimiter(OriginalLimiter):
    def update_budget(self, params):
        self.smooth_released.add_delta(params.time, params.num_started)

+
 class SmoothingBudgetLimiter(SmoothingLimiter):
    def __init__(self, priority, limit_rate_model, proxy_model):
        SmoothingLimiter.__init__(self, priority, limit_rate_model, proxy_model)
-        #self.smooth_filled = Smoother(2)
+        # self.smooth_filled = Smoother(2)
        self.budget = 0

    def update_limit(self, params):
-        release_rate = (self.smooth_rate_limit.smooth_total(params.time) - self.smooth_released.smooth_rate(params.time))
-        #self.smooth_filled.set_total(params.time, 1 if release_rate > 0 else 0)
+        release_rate = (
+            self.smooth_rate_limit.smooth_total(params.time) - self.smooth_released.smooth_rate(params.time))
+        # self.smooth_filled.set_total(params.time, 1 if release_rate > 0 else 0)
        self.limit = 2.0 * release_rate

        self.proxy_model.results.rate[self.priority][params.time] = self.smooth_rate_limit.smooth_total(params.time)
@ -202,15 +214,15 @@ class SmoothingBudgetLimiter(SmoothingLimiter):
        self.proxy_model.results.limit_and_budget[self.priority][params.time] = self.limit + self.budget
        self.proxy_model.results.budget[self.priority][params.time] = self.budget

-        #self.budget = max(0, self.budget + params.elapsed * self.smooth_rate_limit.smooth_total(params.time))
+        # self.budget = max(0, self.budget + params.elapsed * self.smooth_rate_limit.smooth_total(params.time))

-        #if self.smooth_filled.smooth_total(params.time) >= 0.1:
-            #self.budget += params.elapsed * self.smooth_rate_limit.smooth_total(params.time)
+        # if self.smooth_filled.smooth_total(params.time) >= 0.1:
+        # self.budget += params.elapsed * self.smooth_rate_limit.smooth_total(params.time)

-        #print('Update limit: time=%f, priority=%s, limit=%f, rate=%f, released=%f, budget=%f' % (params.time, self.priority, self.limit, self.smooth_rate_limit.smooth_total(params.time), self.smooth_released.smooth_rate(params.time), self.budget))
+        # print('Update limit: time=%f, priority=%s, limit=%f, rate=%f, released=%f, budget=%f' % (params.time, self.priority, self.limit, self.smooth_rate_limit.smooth_total(params.time), self.smooth_released.smooth_rate(params.time), self.budget))

    def can_start(self, params):
-        return params.num_started + params.count <= self.limit + self.budget #or params.num_started + params.count <= self.budget
+        return params.num_started + params.count <= self.limit + self.budget  # or params.num_started + params.count <= self.budget

    def update_budget(self, params):
        self.budget = max(0, self.budget + (self.limit - params.num_started_at_priority) / 2 * params.elapsed)
@ -220,6 +232,7 @@ class SmoothingBudgetLimiter(SmoothingLimiter):

        self.smooth_released.add_delta(params.time, params.num_started_at_priority)

+
 class ProxyModel:
    class Results:
        def __init__(self, priorities, duration):
@ -228,11 +241,11 @@ class ProxyModel:
            self.latencies = self.init_result(priorities, [], duration)
            self.unprocessed_queue_sizes = self.init_result(priorities, [], duration)

-            self.rate = {p:{} for p in priorities}
-            self.released = {p:{} for p in priorities}
-            self.limit = {p:{} for p in priorities}
-            self.limit_and_budget = {p:{} for p in priorities}
-            self.budget = {p:{} for p in priorities}
+            self.rate = {p: {} for p in priorities}
+            self.released = {p: {} for p in priorities}
+            self.limit = {p: {} for p in priorities}
+            self.limit_and_budget = {p: {} for p in priorities}
+            self.budget = {p: {} for p in priorities}

        def init_result(self, priorities, starting_value, duration):
            return {p: {s: copy.copy(starting_value) for s in range(0, duration)} for p in priorities}
@ -241,9 +254,10 @@ class ProxyModel:
        self.time = 0
        self.log_time = 0
        self.duration = duration
-        self.priority_limiters = { priority: Limiter(priority, ratekeeper_model, self) for priority in workload_model.priorities() }
+        self.priority_limiters = {priority: Limiter(priority, ratekeeper_model, self) for priority in
+                                  workload_model.priorities()}
        self.workload_model = workload_model
-        self.request_scheduled = { p: False for p in self.workload_model.priorities()}
+        self.request_scheduled = {p: False for p in self.workload_model.priorities()}

        self.tasks = []
        self.request_queue = []
@ -256,13 +270,14 @@ class ProxyModel:
        for priority in self.workload_model.priorities():
            next_request = self.workload_model.next_request(self.time, priority)
            assert next_request is not None
-            heapq.heappush(self.tasks, Task(next_request.time, lambda next_request=next_request: self.receive_request(next_request)))
+            heapq.heappush(self.tasks, Task(next_request.time,
+                                            lambda next_request=next_request: self.receive_request(next_request)))
            self.request_scheduled[priority] = True

-        while True:# or len(self.request_queue) > 0:
+        while True:  # or len(self.request_queue) > 0:
            if int(self.time) > self.log_time:
                self.log_time = int(self.time)
-                #print(self.log_time)
+                # print(self.log_time)

            task = heapq.heappop(self.tasks)
            self.time = task.time
@ -294,14 +309,15 @@ class ProxyModel:
            limiter.update_limit(Limiter.UpdateLimitParams(self.time, elapsed))

        current_started = 0
-        started = {p:0 for p in self.workload_model.priorities()}
+        started = {p: 0 for p in self.workload_model.priorities()}

        min_priority = Priority.SYSTEM
        last_batch = 0
        while len(self.request_queue) > 0:
            request = self.request_queue[0]

-            if not self.priority_limiters[request.priority].can_start(Limiter.CanStartParams(self.time, current_started, request.count)):
+            if not self.priority_limiters[request.priority].can_start(
+                    Limiter.CanStartParams(self.time, current_started, request.count)):
                break

            min_priority = request.priority
@ -310,7 +326,8 @@ class ProxyModel:
            if self.workload_model.request_completed(request) and not self.request_scheduled[request.priority]:
                next_request = self.workload_model.next_request(self.time, request.priority)
                assert next_request is not None
-                heapq.heappush(self.tasks, Task(next_request.time, lambda next_request=next_request: self.receive_request(next_request)))
+                heapq.heappush(self.tasks, Task(next_request.time,
+                                                lambda next_request=next_request: self.receive_request(next_request)))
                self.request_scheduled[request.priority] = True

            current_started += request.count
@ -318,21 +335,23 @@ class ProxyModel:

            heapq.heappop(self.request_queue)
            self.results.started[request.priority][int(self.time)] += request.count
-            self.results.latencies[request.priority][int(self.time)].append(self.time-request.time)
+            self.results.latencies[request.priority][int(self.time)].append(self.time - request.time)

        if len(self.request_queue) == 0:
            min_priority = Priority.BATCH

        for priority, limiter in self.priority_limiters.items():
-            started_at_priority = sum([v for p,v in started.items() if p <= priority])
-            limiter.update_budget(Limiter.UpdateBudgetParams(self.time, current_started, started_at_priority, min_priority, last_batch, len(self.request_queue) == 0 or self.request_queue[0].priority > priority, elapsed))
-        
+            started_at_priority = sum([v for p, v in started.items() if p <= priority])
+            limiter.update_budget(
+                Limiter.UpdateBudgetParams(self.time, current_started, started_at_priority, min_priority, last_batch,
+                                           len(self.request_queue) == 0 or self.request_queue[0].priority > priority,
+                                           elapsed))
+
        for priority in self.workload_model.priorities():
-            self.results.unprocessed_queue_sizes[priority][int(self.time)].append(self.workload_model.workload_models[priority].outstanding)
+            self.results.unprocessed_queue_sizes[priority][int(self.time)].append(
+                self.workload_model.workload_models[priority].outstanding)

        current_time = self.time

        delay = 0.001
        heapq.heappush(self.tasks, Task(self.time + delay, lambda: self.process_requests(current_time)))
-
-
--- a/contrib/grv_proxy_model/rate_model.py
+++ b/contrib/grv_proxy_model/rate_model.py
@ -20,6 +20,7 @@

 import numpy

+
 class RateModel:
    def __init__(self):
        pass
@ -27,6 +28,7 @@ class RateModel:
    def get_rate(self, time):
        pass

+
 class FixedRateModel(RateModel):
    def __init__(self, rate):
        RateModel.__init__(self)
@ -35,10 +37,12 @@ class FixedRateModel(RateModel):
    def get_rate(self, time):
        return self.rate

+
 class UnlimitedRateModel(FixedRateModel):
    def __init__(self):
        self.rate = 1e9

+
 class IntervalRateModel(RateModel):
    def __init__(self, intervals):
        self.intervals = sorted(intervals)
@ -46,16 +50,17 @@ class IntervalRateModel(RateModel):
    def get_rate(self, time):
        if len(self.intervals) == 0 or time < self.intervals[0][0]:
            return 0
-        
-        target_interval = len(self.intervals)-1
+
+        target_interval = len(self.intervals) - 1
        for i in range(1, len(self.intervals)):
            if time < self.intervals[i][0]:
-                target_interval = i-1
+                target_interval = i - 1
                break

        self.intervals = self.intervals[target_interval:]
        return self.intervals[0][1]

+
 class SawtoothRateModel(RateModel):
    def __init__(self, low, high, frequency):
        self.low = low
@ -63,11 +68,12 @@ class SawtoothRateModel(RateModel):
        self.frequency = frequency

    def get_rate(self, time):
-        if int(2*time/self.frequency) % 2 == 0:
+        if int(2 * time / self.frequency) % 2 == 0:
            return self.low
        else:
            return self.high

+
 class DistributionRateModel(RateModel):
    def __init__(self, distribution, frequency):
        self.distribution = distribution
--- a/contrib/grv_proxy_model/ratekeeper_model.py
+++ b/contrib/grv_proxy_model/ratekeeper_model.py
@ -22,6 +22,7 @@ import numpy
 import rate_model
 from priority import Priority

+
 class RatekeeperModel:
    def __init__(self, limit_models):
        self.limit_models = limit_models
@ -29,39 +30,40 @@ class RatekeeperModel:
    def get_limit(self, time, priority):
        return self.limit_models[priority].get_rate(time)

+
 predefined_ratekeeper = {}

 predefined_ratekeeper['default200_batch100'] = RatekeeperModel(
-{ 
-    Priority.SYSTEM: rate_model.UnlimitedRateModel(), 
-    Priority.DEFAULT: rate_model.FixedRateModel(200),
-    Priority.BATCH: rate_model.FixedRateModel(100) 
-})
+    {
+        Priority.SYSTEM: rate_model.UnlimitedRateModel(),
+        Priority.DEFAULT: rate_model.FixedRateModel(200),
+        Priority.BATCH: rate_model.FixedRateModel(100)
+    })

 predefined_ratekeeper['default_sawtooth'] = RatekeeperModel(
-{ 
-    Priority.SYSTEM: rate_model.UnlimitedRateModel(), 
-    Priority.DEFAULT: rate_model.SawtoothRateModel(10, 200, 1),
-    Priority.BATCH: rate_model.FixedRateModel(0) 
-})
+    {
+        Priority.SYSTEM: rate_model.UnlimitedRateModel(),
+        Priority.DEFAULT: rate_model.SawtoothRateModel(10, 200, 1),
+        Priority.BATCH: rate_model.FixedRateModel(0)
+    })

 predefined_ratekeeper['default_uniform_random'] = RatekeeperModel(
-{ 
-    Priority.SYSTEM: rate_model.UnlimitedRateModel(), 
-    Priority.DEFAULT: rate_model.DistributionRateModel(lambda: numpy.random.uniform(10, 200), 1),
-    Priority.BATCH: rate_model.FixedRateModel(0) 
-})
+    {
+        Priority.SYSTEM: rate_model.UnlimitedRateModel(),
+        Priority.DEFAULT: rate_model.DistributionRateModel(lambda: numpy.random.uniform(10, 200), 1),
+        Priority.BATCH: rate_model.FixedRateModel(0)
+    })

 predefined_ratekeeper['default_trickle'] = RatekeeperModel(
-{ 
-    Priority.SYSTEM: rate_model.UnlimitedRateModel(), 
-    Priority.DEFAULT: rate_model.FixedRateModel(3),
-    Priority.BATCH: rate_model.FixedRateModel(0) 
-})
+    {
+        Priority.SYSTEM: rate_model.UnlimitedRateModel(),
+        Priority.DEFAULT: rate_model.FixedRateModel(3),
+        Priority.BATCH: rate_model.FixedRateModel(0)
+    })

 predefined_ratekeeper['default1000'] = RatekeeperModel(
-{
-    Priority.SYSTEM: rate_model.UnlimitedRateModel(),
-    Priority.DEFAULT: rate_model.FixedRateModel(1000),
-    Priority.BATCH: rate_model.FixedRateModel(500)
-})
+    {
+        Priority.SYSTEM: rate_model.UnlimitedRateModel(),
+        Priority.DEFAULT: rate_model.FixedRateModel(1000),
+        Priority.BATCH: rate_model.FixedRateModel(500)
+    })
--- a/contrib/grv_proxy_model/smoother.py
+++ b/contrib/grv_proxy_model/smoother.py
@ -20,6 +20,7 @@

 import math

+
 class Smoother:
    def __init__(self, folding_time):
        self.folding_time = folding_time
@ -28,10 +29,10 @@ class Smoother:
    def reset(self, value):
        self.time = 0
        self.total = value
-        self.estimate = value 
-        
+        self.estimate = value
+
    def set_total(self, time, total):
-        self.add_delta(time, total-self.total)
+        self.add_delta(time, total - self.total)

    def add_delta(self, time, delta):
        self.update(time)
@ -43,11 +44,10 @@ class Smoother:

    def smooth_rate(self, time):
        self.update(time)
-        return (self.total-self.estimate) / self.folding_time
+        return (self.total - self.estimate) / self.folding_time

    def update(self, time):
        elapsed = time - self.time
        if elapsed > 0:
            self.time = time
-            self.estimate += (self.total-self.estimate) * (1-math.exp(-elapsed/self.folding_time))
-
+            self.estimate += (self.total - self.estimate) * (1 - math.exp(-elapsed / self.folding_time))
--- a/contrib/grv_proxy_model/workload_model.py
+++ b/contrib/grv_proxy_model/workload_model.py
@ -25,6 +25,7 @@ import math
 import rate_model
 from priority import Priority

+
@functools.total_ordering
 class Request:
    def __init__(self, time, count, priority):
@ -35,6 +36,7 @@ class Request:
    def __lt__(self, other):
        return self.priority < other.priority

+
 class PriorityWorkloadModel:
    def __init__(self, priority, rate_model, batch_model, generator, max_outstanding=1e9):
        self.priority = priority
@ -59,6 +61,7 @@ class PriorityWorkloadModel:

        return was_full and self.outstanding < self.max_outstanding

+
 class WorkloadModel:
    def __init__(self, workload_models):
        self.workload_models = workload_models
@ -72,10 +75,17 @@ class WorkloadModel:
    def request_completed(self, request):
        return self.workload_models[request.priority].request_completed(request)

+
 class Distribution:
-    EXPONENTIAL = lambda x: numpy.random.exponential(x)
-    UNIFORM = lambda x: numpy.random.uniform(0, 2.0*x)
-    FIXED = lambda x: x 
+    def exponential(x):
+        return numpy.random.exponential(x)
+
+    def uniform(x):
+        return numpy.random.uniform(0, 2.0 * x)
+
+    def fixed(x):
+        return x
+

 class BatchGenerator:
    def __init__(self):
@ -84,6 +94,7 @@ class BatchGenerator:
    def next_batch(self):
        pass

+
 class DistributionBatchGenerator(BatchGenerator):
    def __init__(self, distribution, size):
        BatchGenerator.__init__(self)
@ -93,6 +104,7 @@ class DistributionBatchGenerator(BatchGenerator):
    def next_batch(self):
        return math.ceil(self.distribution(self.size))

+
 class RequestGenerator:
    def __init__(self):
        pass
@ -100,6 +112,7 @@ class RequestGenerator:
    def next_request_interval(self, rate):
        pass

+
 class DistributionRequestGenerator(RequestGenerator):
    def __init__(self, distribution):
        RequestGenerator.__init__(self)
@ -109,93 +122,94 @@ class DistributionRequestGenerator(RequestGenerator):
        if rate == 0:
            return 1e9

-        return self.distribution(1.0/rate)
+        return self.distribution(1.0 / rate)
+

 predefined_workloads = {}

 predefined_workloads['slow_exponential'] = WorkloadModel(
-{
-    Priority.DEFAULT: PriorityWorkloadModel(Priority.DEFAULT, 
-                                            rate_model.FixedRateModel(100), 
-                                            DistributionBatchGenerator(Distribution.FIXED, 1),
-                                            DistributionRequestGenerator(Distribution.EXPONENTIAL),
-                                            max_outstanding=100
-    )
-})
+    {
+        Priority.DEFAULT: PriorityWorkloadModel(Priority.DEFAULT,
+                                                rate_model.FixedRateModel(100),
+                                                DistributionBatchGenerator(Distribution.fixed, 1),
+                                                DistributionRequestGenerator(Distribution.exponential),
+                                                max_outstanding=100
+                                                )
+    })

 predefined_workloads['fixed_uniform'] = WorkloadModel(
-{
-    Priority.SYSTEM: PriorityWorkloadModel(Priority.SYSTEM, 
-                                           rate_model.FixedRateModel(0), 
-                                           DistributionBatchGenerator(Distribution.FIXED, 1),
-                                           DistributionRequestGenerator(Distribution.UNIFORM),
-                                           max_outstanding=10
-    ),
-    Priority.DEFAULT: PriorityWorkloadModel(Priority.DEFAULT, 
-                                            rate_model.FixedRateModel(95), 
-                                            DistributionBatchGenerator(Distribution.FIXED, 10),
-                                            DistributionRequestGenerator(Distribution.UNIFORM),
-                                            max_outstanding=200
-    ),
-    Priority.BATCH: PriorityWorkloadModel(Priority.BATCH, 
-                                          rate_model.FixedRateModel(1), 
-                                          DistributionBatchGenerator(Distribution.UNIFORM, 500),
-                                          DistributionRequestGenerator(Distribution.UNIFORM),
-                                          max_outstanding=200
-    )
-})
+    {
+        Priority.SYSTEM: PriorityWorkloadModel(Priority.SYSTEM,
+                                               rate_model.FixedRateModel(0),
+                                               DistributionBatchGenerator(Distribution.fixed, 1),
+                                               DistributionRequestGenerator(Distribution.uniform),
+                                               max_outstanding=10
+                                               ),
+        Priority.DEFAULT: PriorityWorkloadModel(Priority.DEFAULT,
+                                                rate_model.FixedRateModel(95),
+                                                DistributionBatchGenerator(Distribution.fixed, 10),
+                                                DistributionRequestGenerator(Distribution.uniform),
+                                                max_outstanding=200
+                                                ),
+        Priority.BATCH: PriorityWorkloadModel(Priority.BATCH,
+                                              rate_model.FixedRateModel(1),
+                                              DistributionBatchGenerator(Distribution.uniform, 500),
+                                              DistributionRequestGenerator(Distribution.uniform),
+                                              max_outstanding=200
+                                              )
+    })

 predefined_workloads['batch_starvation'] = WorkloadModel(
-{
-    Priority.SYSTEM: PriorityWorkloadModel(Priority.SYSTEM, 
-                                           rate_model.FixedRateModel(1), 
-                                           DistributionBatchGenerator(Distribution.FIXED, 1),
-                                           DistributionRequestGenerator(Distribution.UNIFORM),
-                                           max_outstanding=10
-    ),
-    Priority.DEFAULT: PriorityWorkloadModel(Priority.DEFAULT, 
-                                            rate_model.IntervalRateModel([(0,50), (60,150), (120,90)]), 
-                                            DistributionBatchGenerator(Distribution.FIXED, 1),
-                                            DistributionRequestGenerator(Distribution.UNIFORM),
-                                            max_outstanding=200
-    ),
-    Priority.BATCH: PriorityWorkloadModel(Priority.BATCH, 
-                                          rate_model.FixedRateModel(100), 
-                                          DistributionBatchGenerator(Distribution.FIXED, 1),
-                                          DistributionRequestGenerator(Distribution.UNIFORM),
-                                          max_outstanding=200
-    )
-})
+    {
+        Priority.SYSTEM: PriorityWorkloadModel(Priority.SYSTEM,
+                                               rate_model.FixedRateModel(1),
+                                               DistributionBatchGenerator(Distribution.fixed, 1),
+                                               DistributionRequestGenerator(Distribution.uniform),
+                                               max_outstanding=10
+                                               ),
+        Priority.DEFAULT: PriorityWorkloadModel(Priority.DEFAULT,
+                                                rate_model.IntervalRateModel([(0, 50), (60, 150), (120, 90)]),
+                                                DistributionBatchGenerator(Distribution.fixed, 1),
+                                                DistributionRequestGenerator(Distribution.uniform),
+                                                max_outstanding=200
+                                                ),
+        Priority.BATCH: PriorityWorkloadModel(Priority.BATCH,
+                                              rate_model.FixedRateModel(100),
+                                              DistributionBatchGenerator(Distribution.fixed, 1),
+                                              DistributionRequestGenerator(Distribution.uniform),
+                                              max_outstanding=200
+                                              )
+    })

 predefined_workloads['default_low_high_low'] = WorkloadModel(
-{
-    Priority.SYSTEM: PriorityWorkloadModel(Priority.SYSTEM, 
-                                           rate_model.FixedRateModel(0), 
-                                           DistributionBatchGenerator(Distribution.FIXED, 1),
-                                           DistributionRequestGenerator(Distribution.UNIFORM),
-                                           max_outstanding=10
-    ),
-    Priority.DEFAULT: PriorityWorkloadModel(Priority.DEFAULT, 
-                                            rate_model.IntervalRateModel([(0,100), (60,300), (120,100)]), 
-                                            DistributionBatchGenerator(Distribution.FIXED, 1),
-                                            DistributionRequestGenerator(Distribution.UNIFORM),
-                                            max_outstanding=200
-    ),
-    Priority.BATCH: PriorityWorkloadModel(Priority.BATCH, 
-                                          rate_model.FixedRateModel(0), 
-                                          DistributionBatchGenerator(Distribution.FIXED, 1),
-                                          DistributionRequestGenerator(Distribution.UNIFORM),
-                                          max_outstanding=200
-    )
-})
+    {
+        Priority.SYSTEM: PriorityWorkloadModel(Priority.SYSTEM,
+                                               rate_model.FixedRateModel(0),
+                                               DistributionBatchGenerator(Distribution.fixed, 1),
+                                               DistributionRequestGenerator(Distribution.uniform),
+                                               max_outstanding=10
+                                               ),
+        Priority.DEFAULT: PriorityWorkloadModel(Priority.DEFAULT,
+                                                rate_model.IntervalRateModel([(0, 100), (60, 300), (120, 100)]),
+                                                DistributionBatchGenerator(Distribution.fixed, 1),
+                                                DistributionRequestGenerator(Distribution.uniform),
+                                                max_outstanding=200
+                                                ),
+        Priority.BATCH: PriorityWorkloadModel(Priority.BATCH,
+                                              rate_model.FixedRateModel(0),
+                                              DistributionBatchGenerator(Distribution.fixed, 1),
+                                              DistributionRequestGenerator(Distribution.uniform),
+                                              max_outstanding=200
+                                              )
+    })

 for rate in [83, 100, 180, 190, 200]:
    predefined_workloads['default%d' % rate] = WorkloadModel(
-    {
-        Priority.DEFAULT: PriorityWorkloadModel(Priority.DEFAULT,
-                                                rate_model.FixedRateModel(rate),
-                                                DistributionBatchGenerator(Distribution.FIXED, 1),
-                                                DistributionRequestGenerator(Distribution.EXPONENTIAL),
-                                                max_outstanding=1000
-        )
-    })
+        {
+            Priority.DEFAULT: PriorityWorkloadModel(Priority.DEFAULT,
+                                                    rate_model.FixedRateModel(rate),
+                                                    DistributionBatchGenerator(Distribution.fixed, 1),
+                                                    DistributionRequestGenerator(Distribution.exponential),
+                                                    max_outstanding=1000
+                                                    )
+        })
--- a/contrib/lsan.suppressions
+++ b/contrib/lsan.suppressions
@ -0,0 +1,5 @@
+# LeakSanitizer suppressions file for FDB
+# https://github.com/google/sanitizers/wiki/AddressSanitizerLeakSanitizer
+
+# Not all incoming connections are cleanly shut down in client API tests
+leak:ConnectionReaderActorState
--- a/contrib/monitoring/fdb_c_version.py
+++ b/contrib/monitoring/fdb_c_version.py
@ -24,10 +24,12 @@ import sys
 import platform
 import os

+
 def error(message):
    print(message)
    sys.exit(1)

+
 def get_version_string(library_path):
    try:
        lib = ctypes.cdll.LoadLibrary(library_path)
@ -58,6 +60,7 @@ def get_version_string(library_path):

    return version_str

+
 if __name__ == '__main__':
    if platform.system() == 'Linux':
        default_lib = 'libfdb_c.so'
--- a/contrib/transaction_profiling_analyzer/transaction_profiling_analyzer.py
+++ b/contrib/transaction_profiling_analyzer/transaction_profiling_analyzer.py
@ -28,7 +28,6 @@ optional packages:
  sortedcontainers (for estimating key range read/write density)
 """

-
 import argparse
 from collections import defaultdict
 from enum import Enum
@ -55,7 +54,6 @@ supported_protocol_versions = frozenset([PROTOCOL_VERSION_5_2, PROTOCOL_VERSION_
                                         PROTOCOL_VERSION_6_2, PROTOCOL_VERSION_6_3, PROTOCOL_VERSION_7_0,
                                         PROTOCOL_VERSION_7_1, PROTOCOL_VERSION_7_2])

-
 fdb.api_version(520)

 BASIC_FORMAT = "%(asctime)s - %(levelname)-8s %(message)s"
@ -188,6 +186,7 @@ class BaseInfo(object):
    """
    Corresponds to FdbClientLogEvents::Event
    """
+
    def __init__(self, bb, protocol_version):
        # we already read the EventType, so go straight to start_timestamp
        self.start_timestamp = bb.get_double()
@ -197,6 +196,7 @@ class BaseInfo(object):
            if bb.get_bool():
                self.tenant = bb.get_bytes_with_length()

+
 class GetVersionInfo(BaseInfo):
    def __init__(self, bb, protocol_version):
        super().__init__(bb, protocol_version)
@ -206,6 +206,7 @@ class GetVersionInfo(BaseInfo):
        if protocol_version >= PROTOCOL_VERSION_6_3:
            self.read_version = bb.get_long()

+
 class GetInfo(BaseInfo):
    def __init__(self, bb, protocol_version):
        super().__init__(bb, protocol_version)
@ -244,11 +245,11 @@ class CommitInfo(BaseInfo):
        self.read_snapshot_version = bb.get_long()
        if protocol_version >= PROTOCOL_VERSION_6_3:
            self.report_conflicting_keys = bb.get_bool()
-        
+
        if protocol_version >= PROTOCOL_VERSION_7_1:
-            lock_aware = bb.get_bool()
+            self.lock_aware = bb.get_bool()
            if bb.get_bool():
-                spanId = bb.get_bytes(16)
+                self.spanId = bb.get_bytes(16)


 class ErrorGetInfo(BaseInfo):
@ -285,9 +286,9 @@ class ErrorCommitInfo(BaseInfo):
            self.report_conflicting_keys = bb.get_bool()

        if protocol_version >= PROTOCOL_VERSION_7_1:
-            lock_aware = bb.get_bool()
+            self.lock_aware = bb.get_bool()
            if bb.get_bool():
-                spanId = bb.get_bytes(16)
+                self.spanId = bb.get_bytes(16)


 class UnsupportedProtocolVersionError(Exception):
@ -314,52 +315,57 @@ class ClientTransactionInfo:
            if event == 0:
                # we need to read it to consume the buffer even if we don't want to store it
                get_version = GetVersionInfo(bb, protocol_version)
-                if (not type_filter or "get_version" in type_filter):
+                if not type_filter or "get_version" in type_filter:
                    self.get_version = get_version
            elif event == 1:
                get = GetInfo(bb, protocol_version)
-                if (not type_filter or "get" in type_filter):
+                if not type_filter or "get" in type_filter:
                    # because of the crappy json serializtion using __dict__ we have to set the list here otherwise
                    # it doesn't print
-                    if not self.gets: self.gets = []
+                    if not self.gets:
+                        self.gets = []
                    self.gets.append(get)
            elif event == 2:
                get_range = GetRangeInfo(bb, protocol_version)
-                if (not type_filter or "get_range" in type_filter):
-                    if not self.get_ranges: self.get_ranges = []
+                if not type_filter or "get_range" in type_filter:
+                    if not self.get_ranges:
+                        self.get_ranges = []
                    self.get_ranges.append(get_range)
            elif event == 3:
                commit = CommitInfo(bb, protocol_version, full_output=full_output)
-                if (not type_filter or "commit" in type_filter):
+                if not type_filter or "commit" in type_filter:
                    self.commit = commit
            elif event == 4:
                error_get = ErrorGetInfo(bb, protocol_version)
-                if (not type_filter or "error_gets" in type_filter):
-                    if not self.error_gets: self.error_gets = []
+                if not type_filter or "error_gets" in type_filter:
+                    if not self.error_gets:
+                        self.error_gets = []
                    self.error_gets.append(error_get)
            elif event == 5:
                error_get_range = ErrorGetRangeInfo(bb, protocol_version)
-                if (not type_filter or "error_get_range" in type_filter):
-                    if not self.error_get_ranges: self.error_get_ranges = []
+                if not type_filter or "error_get_range" in type_filter:
+                    if not self.error_get_ranges:
+                        self.error_get_ranges = []
                    self.error_get_ranges.append(error_get_range)
            elif event == 6:
                error_commit = ErrorCommitInfo(bb, protocol_version, full_output=full_output)
-                if (not type_filter or "error_commit" in type_filter):
-                    if not self.error_commits: self.error_commits = []
+                if not type_filter or "error_commit" in type_filter:
+                    if not self.error_commits:
+                        self.error_commits = []
                    self.error_commits.append(error_commit)
            else:
                raise Exception("Unknown event type %d" % event)

    def has_types(self):
-        return self.get_version or self.gets or self.get_ranges or self.commit or self.error_gets \
-            or self.error_get_ranges or self.error_commits
+        return self.get_version or self.gets or self.get_ranges or self.commit \
+            or self.error_gets or self.error_get_ranges or self.error_commits

    def to_json(self):
        return json.dumps(self, cls=ObjJsonEncoder, sort_keys=True)


 class TransactionInfoLoader(object):
-    max_num_chunks_to_store = 1000 # Each chunk would be 100 KB in size
+    max_num_chunks_to_store = 1000  # Each chunk would be 100 KB in size

    def __init__(self, db, full_output=True, type_filter=None, min_timestamp=None, max_timestamp=None):
        self.db = db
@ -433,7 +439,7 @@ class TransactionInfoLoader(object):
            reverse = False
        for k, v in tr.snapshot.get_range(start_key, end_key, limit=1, reverse=reverse):
            return fdb.tuple.unpack(v)[0]
-        return 0 if start else 0x8000000000000000 # we didn't find any timekeeper data so find the max range
+        return 0 if start else 0x8000000000000000  # we didn't find any timekeeper data so find the max range

    def fetch_transaction_info(self):
        if self.min_timestamp:
@ -469,12 +475,12 @@ class TransactionInfoLoader(object):
                                                               streaming_mode=fdb.impl.StreamingMode.want_all)
                for k, v in transaction_info_range:
                    found += 1
-                    #logger.debug(k)
+                    # logger.debug(k)
                    start_key = fdb.KeySelector.first_greater_than(k)

                    _, tr_id, num_chunks, chunk_num = self.parse_key(k)

-                    #logger.debug("num_chunks=%d, chunk_num=%d" % (num_chunks,chunk_num))
+                    # logger.debug("num_chunks=%d, chunk_num=%d" % (num_chunks,chunk_num))

                    if num_chunks == 1:
                        assert chunk_num == 1
@ -482,7 +488,7 @@ class TransactionInfoLoader(object):
                            info = build_client_transaction_info(v)
                            if info.has_types():
                                buffer.append(info)
-                        except UnsupportedProtocolVersionError as e:
+                        except UnsupportedProtocolVersionError:
                            invalid_transaction_infos += 1
                        except ValueError:
                            invalid_transaction_infos += 1
@ -497,7 +503,8 @@ class TransactionInfoLoader(object):
                            self._check_and_adjust_chunk_cache_size()
                        else:
                            if tr_id not in self.tr_info_map:
-                                logger.error("Got a middle chunk without getting beginning part. Discarding transaction id: %s\n" % tr_id)
+                                logger.error(
+                                    "Got a middle chunk without getting beginning part. Discarding transaction id: %s\n" % tr_id)
                                continue
                            c_list = self.tr_info_map[tr_id]
                            if c_list[-1].num_chunks != num_chunks or c_list[-1].chunk_num != chunk_num - 1:
@ -513,7 +520,7 @@ class TransactionInfoLoader(object):
                                    info = build_client_transaction_info(b''.join([chunk.value for chunk in c_list]))
                                    if info.has_types():
                                        buffer.append(info)
-                                except UnsupportedProtocolVersionError as e:
+                                except UnsupportedProtocolVersionError:
                                    invalid_transaction_infos += 1
                                except ValueError:
                                    invalid_transaction_infos += 1
@ -553,6 +560,7 @@ def has_dateparser():
        logger.warn("Can't find dateparser so disabling human date parsing")
        return False

+
 class ReadCounter(object):
    def __init__(self):
        from sortedcontainers import SortedDict
@ -560,7 +568,7 @@ class ReadCounter(object):
        self.reads[b''] = [0, 0]

        self.read_counts = {}
-        self.hit_count=0
+        self.hit_count = 0

    def process(self, transaction_info):
        for get in transaction_info.gets:
@ -576,7 +584,7 @@ class ReadCounter(object):
        if end_key is not None:
            self.reads.setdefault(end_key, [0, 0])[1] += 1
        else:
-            self.reads.setdefault(start_key+b'\x00', [0, 0])[1] += 1
+            self.reads.setdefault(start_key + b'\x00', [0, 0])[1] += 1

    def get_total_reads(self):
        return sum([v for v in self.read_counts.values()])
@ -673,8 +681,8 @@ class ShardFinder(object):
        self.shard_cache = {}

    def _get_boundary_keys(self, begin, end):
-        start_pos = max(0, bisect_right(self.boundary_keys, begin)-1)
-        end_pos = max(0, bisect_right(self.boundary_keys, end)-1)
+        start_pos = max(0, bisect_right(self.boundary_keys, begin) - 1)
+        end_pos = max(0, bisect_right(self.boundary_keys, end) - 1)

        return self.boundary_keys[start_pos:end_pos]

@ -691,9 +699,9 @@ class ShardFinder(object):
        return len(self._get_boundary_keys(start_key, end_key)) + 1

    def get_addresses_for_key(self, key):
-        shard = self.boundary_keys[max(0, bisect_right(self.boundary_keys, key)-1)]
+        shard = self.boundary_keys[max(0, bisect_right(self.boundary_keys, key) - 1)]
        do_load = False
-        if not shard in self.shard_cache:
+        if shard not in self.shard_cache:
            do_load = True
        elif self.shard_cache[shard].is_ready():
            try:
@ -708,7 +716,7 @@ class ShardFinder(object):
                for f in self.outstanding:
                    try:
                        f.wait()
-                    except fdb.FDBError as e:
+                    except fdb.FDBError:
                        pass

                self.outstanding = []
@ -726,10 +734,13 @@ class ShardFinder(object):
            if item[addr_idx] is not None:
                while True:
                    try:
-                        ranges[index] = item[0:addr_idx] + ([a.decode('ascii') for a in item[addr_idx].wait()],) + item[addr_idx+1:]
+                        ranges[index] = item[0:addr_idx] + ([a.decode('ascii') for a in item[addr_idx].wait()],) \
+                            + item[addr_idx + 1:]
                        break
-                    except fdb.FDBError as e:
-                        ranges[index] = item[0:addr_idx] + (self.get_addresses_for_key(item[key_idx]),) + item[addr_idx+1:]
+                    except fdb.FDBError:
+                        ranges[index] = item[0:addr_idx] + (self.get_addresses_for_key(item[key_idx]),) \
+                            + item[addr_idx + 1:]
+

 class WriteCounter(object):
    mutation_types_to_consider = frozenset([MutationType.SET_VALUE, MutationType.ADD_VALUE])
@ -795,10 +806,11 @@ class WriteCounter(object):
                filter_addresses = set(filter_addresses)
                results = [r for r in results if filter_addresses.issubset(set(r[3]))][0:num]
        else:
-            results = [(key, end, count) for (count, key) in count_pairs[0:num]]
+            results = [(key, None, count) for (count, key) in count_pairs[0:num]]

        return results

+
 def connect(cluster_file=None):
    db = fdb.open(cluster_file=cluster_file)
    return db
@ -831,22 +843,34 @@ def main():
    end_time_group = parser.add_mutually_exclusive_group()
    end_time_group.add_argument("--max-timestamp", type=int, help="Don't return events newer than this epoch time")
    end_time_group.add_argument("-e", "--end-time", type=str, help="Don't return events older than this parsed time")
-    parser.add_argument("--num-buckets", type=int, help="The number of buckets to partition the key-space into for operation counts", default=100)
-    parser.add_argument("--top-requests", type=int, help="If specified will output this many top keys for reads or writes", default=0)
-    parser.add_argument("--exclude-ports", action="store_true", help="Print addresses without the port number. Only works in versions older than 6.3, and is required in versions older than 6.2.")
-    parser.add_argument("--single-shard-ranges-only", action="store_true", help="Only print range boundaries that exist in a single shard")
-    parser.add_argument("-a", "--filter-address", action="append", help="Only print range boundaries that include the given address. This option can used multiple times to include more than one address in the filter, in which case all addresses must match.")
+    parser.add_argument("--num-buckets", type=int,
+                        help="The number of buckets to partition the key-space into for operation counts", default=100)
+    parser.add_argument("--top-requests", type=int,
+                        help="If specified will output this many top keys for reads or writes", default=0)
+    parser.add_argument("--exclude-ports", action="store_true",
+                        help="Print addresses without the port number. Only works in versions older than 6.3, and is required in versions older than 6.2.")
+    parser.add_argument("--single-shard-ranges-only", action="store_true",
+                        help="Only print range boundaries that exist in a single shard")
+    parser.add_argument("-a", "--filter-address", action="append",
+                        help="Only print range boundaries that include the given address. This option can used multiple times to include more than one address in the filter, in which case all addresses must match.")

    args = parser.parse_args()

    type_filter = set()
-    if args.filter_get_version: type_filter.add("get_version")
-    if args.filter_get or args.filter_reads: type_filter.add("get")
-    if args.filter_get_range or args.filter_reads: type_filter.add("get_range")
-    if args.filter_commit: type_filter.add("commit")
-    if args.filter_error_get: type_filter.add("error_get")
-    if args.filter_error_get_range: type_filter.add("error_get_range")
-    if args.filter_error_commit: type_filter.add("error_commit")
+    if args.filter_get_version:
+        type_filter.add("get_version")
+    if args.filter_get or args.filter_reads:
+        type_filter.add("get")
+    if args.filter_get_range or args.filter_reads:
+        type_filter.add("get_range")
+    if args.filter_commit:
+        type_filter.add("commit")
+    if args.filter_error_get:
+        type_filter.add("error_get")
+    if args.filter_error_get_range:
+        type_filter.add("error_get_range")
+    if args.filter_error_commit:
+        type_filter.add("error_commit")

    if (not type_filter or "commit" in type_filter):
        write_counter = WriteCounter() if args.num_buckets else None
@ -912,7 +936,8 @@ def main():
                else:
                    op_str = 'Key %r' % start

-                print(" %d. %s\n    %d sampled %s (%.2f%%, %.2f%% cumulative)" % (idx+1, op_str, count, context, 100*count/total, 100*running_count/total))
+                print(" %d. %s\n    %d sampled %s (%.2f%%, %.2f%% cumulative)" % (
+                    idx + 1, op_str, count, context, 100 * count / total, 100 * running_count / total))
                print("    shard addresses: %s\n" % ", ".join(addresses))

        else:
@ -933,10 +958,10 @@ def main():

            if not omit:
                if omit_start is not None:
-                    if omit_start == idx-1:
+                    if omit_start == idx - 1:
                        print(" %d. Omitted\n" % (idx))
                    else:
-                        print(" %d - %d. Omitted\n" % (omit_start+1, idx))
+                        print(" %d - %d. Omitted\n" % (omit_start + 1, idx))
                    omit_start = None

                if total_count is None:
@ -944,18 +969,19 @@ def main():
                else:
                    count_str = '%d sampled %s (%d intersecting)' % (start_count, context, total_count)
                if not shard_count:
-                    print(" %d. [%s, %s]\n     %d sampled %s\n" % (idx+1, start, end, count, context))
+                    print(" %d. [%s, %s]\n     %s\n" % (idx + 1, start, end, count_str))
                else:
                    addresses_string = "; addresses=%s" % ', '.join(addresses) if addresses else ''
-                    print(" %d. [%s, %s]\n     %s spanning %d shard(s)%s\n" % (idx+1, start, end, count_str, shard_count, addresses_string))
+                    print(" %d. [%s, %s]\n     %s spanning %d shard(s)%s\n" % (
+                        idx + 1, start, end, count_str, shard_count, addresses_string))
            elif omit_start is None:
                omit_start = idx

        if omit_start is not None:
-            if omit_start == len(range_boundaries)-1:
+            if omit_start == len(range_boundaries) - 1:
                print(" %d. Omitted\n" % len(range_boundaries))
            else:
-                print(" %d - %d. Omitted\n" % (omit_start+1, len(range_boundaries)))
+                print(" %d - %d. Omitted\n" % (omit_start + 1, len(range_boundaries)))

    shard_finder = ShardFinder(db, args.exclude_ports)

@ -963,7 +989,8 @@ def main():

    if write_counter:
        if args.top_requests:
-            top_writes = write_counter.get_top_k_writes(args.top_requests, args.filter_address, shard_finder=shard_finder)
+            top_writes = write_counter.get_top_k_writes(args.top_requests, args.filter_address,
+                                                        shard_finder=shard_finder)

        range_boundaries = write_counter.get_range_boundaries(args.num_buckets, shard_finder=shard_finder)
        num_writes = write_counter.get_total_writes()
@ -1014,5 +1041,6 @@ def main():
            print("Key-space boundaries with approximately equal read counts:\n")
            print_range_boundaries(range_boundaries, "reads")

+
 if __name__ == "__main__":
    main()
--- a/contrib/transaction_profiling_analyzer/transaction_profiling_analyzer_tests.py
+++ b/contrib/transaction_profiling_analyzer/transaction_profiling_analyzer_tests.py
@ -105,8 +105,8 @@ class RangeCounterTest(unittest.TestCase):
                assert rc_count == v, "Counts for %s mismatch. Expected %d got %d" % (k, v, rc_count)

            for _ in range(0, 100):
-                i = random.randint(0, len(letters)-1)
-                j = random.randint(0, len(letters)-2)
+                i = random.randint(0, len(letters) - 1)
+                j = random.randint(0, len(letters) - 2)
                if i == j:
                    j += 1
                start_index = min(i, j)
@ -123,4 +123,4 @@ class RangeCounterTest(unittest.TestCase):


 if __name__ == "__main__":
-    unittest.main() # run all tests
+    unittest.main()  # run all tests
--- a/documentation/sphinx/source/client-testing.rst
+++ b/documentation/sphinx/source/client-testing.rst
@ -321,7 +321,7 @@ and pass the test with ``-f``:
 Running a Workload on an actual Cluster
 =======================================

-Running a workload on a cluster works basically the smae way. However, one must
+Running a workload on a cluster works basically the same way. However, one must
 actually setup a cluster first. This cluster must run between one and many server
 processes with the class test. So above 2-step process becomes a bit more complex:

--- a/documentation/sphinx/source/mr-status-json-schemas.rst.inc
+++ b/documentation/sphinx/source/mr-status-json-schemas.rst.inc
@ -890,8 +890,18 @@
            }
         }
      },
-      "tenants":{
-         "num_tenants":0
+      "metacluster" : {
+         "cluster_type" : "management",         // management, data, or standalone
+         "metacluster_name" : "metacluster1",
+         "metacluster_id" : 12345,
+         "data_cluster_name" : "data_cluster1", // data cluster only
+         "data_cluster_id" : 12346,             // data cluster only
+         "num_data_clusters": 10                // management cluster only
+      },
+      "tenants" : {
+         "num_tenants" : 1, // on data cluster, local count; on management cluster, total metacluster count
+         "num_tenant_groups" : 10,
+         "tenant_group_capacity" : 20,
      }
   },
   "client":{
--- a/fdbcli/BlobRestoreCommand.actor.cpp
+++ b/fdbcli/BlobRestoreCommand.actor.cpp
@ -36,7 +36,8 @@ ACTOR Future<bool> blobRestoreCommandActor(Database localDb, std::vector<StringR
 	state bool success = false;
 	wait(store(success, localDb->blobRestore(normalKeys)));
 	if (success) {
-		fmt::print("Started blob restore for the full cluster. Please use 'status' command to check progress.\n");
+		fmt::print(
+		    "Started blob restore for the full cluster. Please use 'status details' command to check progress.\n");
 	} else {
 		fmt::print("Fail to start a new blob restore while there is a pending one.\n");
 	}
--- a/fdbcli/ConfigureCommand.actor.cpp
+++ b/fdbcli/ConfigureCommand.actor.cpp
@ -326,7 +326,7 @@ CommandFactory configureFactory(
        "count=<TSS_COUNT>|perpetual_storage_wiggle=<WIGGLE_SPEED>|perpetual_storage_wiggle_locality="
        "<<LOCALITY_KEY>:<LOCALITY_VALUE>|0>|storage_migration_type={disabled|gradual|aggressive}"
        "|tenant_mode={disabled|optional_experimental|required_experimental}|blob_granules_enabled={0|1}"
-        "|encryption_at_rest_mode={disabled|aes_256_ctr}",
+        "|encryption_at_rest_mode={disabled|domain_aware|cluster_aware}",
        "change the database configuration",
        "The `new' option, if present, initializes a new database with the given configuration rather than changing "
        "the configuration of an existing one. When used, both a redundancy mode and a storage engine must be "
@ -360,7 +360,8 @@ CommandFactory configureFactory(
        "tenant_mode=<disabled|optional_experimental|required_experimental>: Sets the tenant mode for the cluster. If "
        "optional, then transactions can be run with or without specifying tenants. If required, all data must be "
        "accessed using tenants.\n\n"
-        "encryption_at_rest_mode=<disabled|aes_256_ctr>: Sets the cluster encryption data at-rest support for the "
+        "encryption_at_rest_mode=<disabled|domain_aware|cluster_aware>: Sets the cluster encryption data at-rest "
+        "support for the "
        "database. The configuration can be updated ONLY at the time of database creation and once set can't be "
        "updated for the lifetime of the database.\n\n"

--- a/fdbcli/StatusCommand.actor.cpp
+++ b/fdbcli/StatusCommand.actor.cpp
@ -1125,6 +1125,15 @@ void printStatus(StatusObjectReader statusObj,
 					outputString += "\n  Number of Workers      - " + format("%d", numWorkers);
 					auto numKeyRanges = statusObjBlobGranules["number_of_key_ranges"].get_int();
 					outputString += "\n  Number of Key Ranges   - " + format("%d", numKeyRanges);
+					if (statusObjCluster.has("blob_restore")) {
+						StatusObjectReader statusObjBlobRestore = statusObjCluster["blob_restore"];
+						std::string restoreStatus = statusObjBlobRestore["blob_full_restore_phase"].get_str();
+						if (statusObjBlobRestore.has("blob_full_restore_progress")) {
+							auto progress = statusObjBlobRestore["blob_full_restore_progress"].get_int();
+							restoreStatus += " " + format("%d%%", progress);
+						}
+						outputString += "\n  Full Restore           - " + restoreStatus;
+					}
 				}
 			}

--- a/fdbclient/ClientKnobs.cpp
+++ b/fdbclient/ClientKnobs.cpp
@ -294,6 +294,7 @@ void ClientKnobs::initialize(Randomize randomize) {
 	init( METACLUSTER_ASSIGNMENT_FIRST_CHOICE_DELAY, 1.0 ); if ( randomize && BUGGIFY ) METACLUSTER_ASSIGNMENT_FIRST_CHOICE_DELAY = deterministicRandom()->random01() * 60;
 	init( METACLUSTER_ASSIGNMENT_AVAILABILITY_TIMEOUT, 10.0 ); if ( randomize && BUGGIFY ) METACLUSTER_ASSIGNMENT_AVAILABILITY_TIMEOUT = 1 + deterministicRandom()->random01() * 59;
 	init( TENANT_ENTRY_CACHE_LIST_REFRESH_INTERVAL,  2 ); if( randomize && BUGGIFY ) TENANT_ENTRY_CACHE_LIST_REFRESH_INTERVAL = deterministicRandom()->randomInt(1, 10);
+	init( CLIENT_ENABLE_USING_CLUSTER_ID_KEY,     false );

 	init( ENABLE_ENCRYPTION_CPU_TIME_LOGGING,     false );
 	// clang-format on
--- a/fdbclient/ManagementAPI.actor.cpp
+++ b/fdbclient/ManagementAPI.actor.cpp
@ -206,10 +206,12 @@ std::map<std::string, std::string> configForToken(std::string const& mode) {
 			EncryptionAtRestMode mode;
 			if (value == "disabled") {
 				mode = EncryptionAtRestMode::DISABLED;
-			} else if (value == "aes_256_ctr") {
-				mode = EncryptionAtRestMode::AES_256_CTR;
+			} else if (value == "domain_aware") {
+				mode = EncryptionAtRestMode::DOMAIN_AWARE;
+			} else if (value == "cluster_aware") {
+				mode = EncryptionAtRestMode::CLUSTER_AWARE;
 			} else {
-				printf("Error: Only disabled|aes_256_ctr are valid for encryption_at_rest_mode.\n");
+				printf("Error: Only disabled|domain_aware|cluster_aware are valid for encryption_at_rest_mode.\n");
 				return out;
 			}
 			out[p + key] = format("%d", mode);
@ -465,6 +467,168 @@ bool isCompleteConfiguration(std::map<std::string, std::string> const& options)
 	       options.count(p + "storage_engine") == 1;
 }

+/*
+    - Validates encryption and tenant mode configurations
+    - During cluster creation (configure new) we allow the following:
+        - If encryption mode is disabled/cluster_aware then any tenant mode is allowed
+        - If the encryption mode is domain_aware then the only allowed tenant mode is required
+    - During cluster configuration changes the following is allowed:
+        - Encryption mode cannot be changed (can only be set during creation)
+        - If the encryption mode is disabled/cluster_aware then any tenant mode changes are allowed
+        - If the encryption mode is domain_aware then tenant mode changes are not allowed (as the only supported mode is
+          required)
+*/
+bool isEncryptionAtRestModeConfigValid(Optional<DatabaseConfiguration> oldConfiguration,
+                                       std::map<std::string, std::string> newConfig,
+                                       bool creating) {
+	EncryptionAtRestMode encryptMode;
+	TenantMode tenantMode;
+	if (creating) {
+		if (newConfig.count(encryptionAtRestModeConfKey.toString()) != 0) {
+			encryptMode = EncryptionAtRestMode::fromValueRef(
+			    ValueRef(newConfig.find(encryptionAtRestModeConfKey.toString())->second));
+			// check if the tenant mode is being set during configure new (otherwise assume tenants are disabled)
+			if (newConfig.count(tenantModeConfKey.toString()) != 0) {
+				tenantMode = TenantMode::fromValue(ValueRef(newConfig.find(tenantModeConfKey.toString())->second));
+			}
+		}
+	} else {
+		ASSERT(oldConfiguration.present());
+		encryptMode = oldConfiguration.get().encryptionAtRestMode;
+		if (newConfig.count(tenantModeConfKey.toString()) != 0) {
+			tenantMode = TenantMode::fromValue(ValueRef(newConfig.find(tenantModeConfKey.toString())->second));
+		} else {
+			// Tenant mode and encryption mode didn't change
+			return true;
+		}
+	}
+	TraceEvent(SevDebug, "EncryptAndTenantModes")
+	    .detail("EncryptMode", encryptMode.toString())
+	    .detail("TenantMode", tenantMode.toString());
+
+	if (encryptMode.mode == EncryptionAtRestMode::DOMAIN_AWARE && tenantMode != TenantMode::REQUIRED) {
+		// For domain aware encryption only the required tenant mode is currently supported
+		TraceEvent(SevWarnAlways, "InvalidEncryptAndTenantConfiguration")
+		    .detail("EncryptMode", encryptMode.toString())
+		    .detail("TenantMode", tenantMode.toString());
+		return false;
+	}
+
+	return true;
+}
+
+bool isTenantModeModeConfigValid(DatabaseConfiguration oldConfiguration, DatabaseConfiguration newConfiguration) {
+	TenantMode oldTenantMode = oldConfiguration.tenantMode;
+	TenantMode newTenantMode = newConfiguration.tenantMode;
+	TraceEvent(SevDebug, "TenantModes")
+	    .detail("OldTenantMode", oldTenantMode.toString())
+	    .detail("NewTenantMode", newTenantMode.toString());
+	if (oldTenantMode != TenantMode::REQUIRED && newTenantMode == TenantMode::REQUIRED) {
+		// TODO: Changing from optional/disabled to required tenant mode should be allowed if there is no non-tenant
+		// data present
+		TraceEvent(SevWarnAlways, "InvalidTenantConfiguration")
+		    .detail("OldTenantMode", oldTenantMode.toString())
+		    .detail("NewTenantMode", newTenantMode.toString());
+		return false;
+	}
+	return true;
+}
+
+TEST_CASE("/ManagementAPI/ChangeConfig/TenantMode") {
+	DatabaseConfiguration oldConfig;
+	DatabaseConfiguration newConfig;
+	std::vector<TenantMode> tenantModes = { TenantMode::DISABLED, TenantMode::OPTIONAL_TENANT, TenantMode::REQUIRED };
+	// required tenant mode can change to any other tenant mode
+	oldConfig.tenantMode = TenantMode::REQUIRED;
+	newConfig.tenantMode = deterministicRandom()->randomChoice(tenantModes);
+	ASSERT(isTenantModeModeConfigValid(oldConfig, newConfig));
+	// optional/disabled tenant mode can switch to optional/disabled tenant mode
+	oldConfig.tenantMode = deterministicRandom()->coinflip() ? TenantMode::DISABLED : TenantMode::OPTIONAL_TENANT;
+	newConfig.tenantMode = deterministicRandom()->coinflip() ? TenantMode::DISABLED : TenantMode::OPTIONAL_TENANT;
+	ASSERT(isTenantModeModeConfigValid(oldConfig, newConfig));
+	// optional/disabled tenant mode CANNOT switch to required tenant mode
+	oldConfig.tenantMode = deterministicRandom()->coinflip() ? TenantMode::DISABLED : TenantMode::OPTIONAL_TENANT;
+	newConfig.tenantMode = TenantMode::REQUIRED;
+	ASSERT(!isTenantModeModeConfigValid(oldConfig, newConfig));
+
+	return Void();
+}
+
+// unit test for changing encryption/tenant mode config options
+TEST_CASE("/ManagementAPI/ChangeConfig/TenantAndEncryptMode") {
+	std::map<std::string, std::string> newConfig;
+	std::string encryptModeKey = encryptionAtRestModeConfKey.toString();
+	std::string tenantModeKey = tenantModeConfKey.toString();
+	std::vector<TenantMode> tenantModes = { TenantMode::DISABLED, TenantMode::OPTIONAL_TENANT, TenantMode::REQUIRED };
+	std::vector<EncryptionAtRestMode> encryptionModes = { EncryptionAtRestMode::DISABLED,
+		                                                  EncryptionAtRestMode::CLUSTER_AWARE,
+		                                                  EncryptionAtRestMode::DOMAIN_AWARE };
+	// configure new test cases
+
+	// encryption disabled checks
+	newConfig[encryptModeKey] = std::to_string(EncryptionAtRestMode::DISABLED);
+	newConfig[tenantModeKey] = std::to_string(deterministicRandom()->randomChoice(tenantModes));
+	ASSERT(isEncryptionAtRestModeConfigValid(Optional<DatabaseConfiguration>(), newConfig, true));
+
+	// cluster aware encryption checks
+	newConfig[encryptModeKey] = std::to_string(EncryptionAtRestMode::CLUSTER_AWARE);
+	newConfig[tenantModeKey] = std::to_string(deterministicRandom()->randomChoice(tenantModes));
+	ASSERT(isEncryptionAtRestModeConfigValid(Optional<DatabaseConfiguration>(), newConfig, true));
+
+	// domain aware encryption checks
+	newConfig[encryptModeKey] = std::to_string(EncryptionAtRestMode::DOMAIN_AWARE);
+	newConfig[tenantModeKey] =
+	    std::to_string(deterministicRandom()->coinflip() ? TenantMode::DISABLED : TenantMode::OPTIONAL_TENANT);
+	ASSERT(!isEncryptionAtRestModeConfigValid(Optional<DatabaseConfiguration>(), newConfig, true));
+	newConfig[tenantModeKey] = std::to_string(TenantMode::REQUIRED);
+	ASSERT(isEncryptionAtRestModeConfigValid(Optional<DatabaseConfiguration>(), newConfig, true));
+
+	// no encrypt mode present
+	newConfig.erase(encryptModeKey);
+	newConfig[tenantModeKey] = std::to_string(deterministicRandom()->randomChoice(tenantModes));
+	ASSERT(isEncryptionAtRestModeConfigValid(Optional<DatabaseConfiguration>(), newConfig, true));
+
+	// no tenant mode present
+	newConfig.erase(tenantModeKey);
+	newConfig[encryptModeKey] = std::to_string(EncryptionAtRestMode::DOMAIN_AWARE);
+	ASSERT(!isEncryptionAtRestModeConfigValid(Optional<DatabaseConfiguration>(), newConfig, true));
+	newConfig[encryptModeKey] = std::to_string(EncryptionAtRestMode::CLUSTER_AWARE);
+	ASSERT(isEncryptionAtRestModeConfigValid(Optional<DatabaseConfiguration>(), newConfig, true));
+
+	// change config test cases
+	DatabaseConfiguration oldConfig;
+
+	// encryption disabled checks
+	oldConfig.encryptionAtRestMode = EncryptionAtRestMode::DISABLED;
+	oldConfig.tenantMode = deterministicRandom()->randomChoice(tenantModes);
+	newConfig[tenantModeKey] = std::to_string(deterministicRandom()->randomChoice(tenantModes));
+	ASSERT(isEncryptionAtRestModeConfigValid(oldConfig, newConfig, false));
+
+	// domain aware encryption checks
+	oldConfig.encryptionAtRestMode = EncryptionAtRestMode::DOMAIN_AWARE;
+	oldConfig.tenantMode = TenantMode::REQUIRED;
+	newConfig[tenantModeKey] =
+	    std::to_string(deterministicRandom()->coinflip() ? TenantMode::DISABLED : TenantMode::OPTIONAL_TENANT);
+	ASSERT(!isEncryptionAtRestModeConfigValid(oldConfig, newConfig, false));
+	newConfig[tenantModeKey] = std::to_string(TenantMode::REQUIRED);
+	ASSERT(isEncryptionAtRestModeConfigValid(oldConfig, newConfig, false));
+
+	// cluster aware encryption checks
+	oldConfig.encryptionAtRestMode = EncryptionAtRestMode::CLUSTER_AWARE;
+	// required tenant mode can switch to any other tenant mode with cluster aware encryption
+	oldConfig.tenantMode = deterministicRandom()->randomChoice(tenantModes);
+	newConfig[tenantModeKey] = std::to_string(deterministicRandom()->randomChoice(tenantModes));
+	ASSERT(isEncryptionAtRestModeConfigValid(oldConfig, newConfig, false));
+
+	// no tenant mode present
+	newConfig.erase(tenantModeKey);
+	oldConfig.tenantMode = deterministicRandom()->randomChoice(tenantModes);
+	oldConfig.encryptionAtRestMode = deterministicRandom()->randomChoice(encryptionModes);
+	ASSERT(isEncryptionAtRestModeConfigValid(oldConfig, newConfig, false));
+
+	return Void();
+}
+
 ACTOR Future<DatabaseConfiguration> getDatabaseConfiguration(Transaction* tr) {
 	tr->setOption(FDBTransactionOptions::READ_LOCK_AWARE);
 	tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
@ -962,6 +1126,14 @@ ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
 		if (!disableConfigDB) {
 			wait(verifyConfigurationDatabaseAlive(tr->getDatabase()));
 		}
+		if (BUGGIFY_WITH_PROB(0.1)) {
+			// Introduce a random delay in simulation to allow processes to be
+			// killed before previousCoordinatorKeys has been reset. This will
+			// help test scenarios where the previous configuration database
+			// state has been transferred to the new coordinators but the
+			// broadcaster thinks it has not been transferred.
+			wait(delay(deterministicRandom()->random01() * 10));
+		}
 		wait(resetPreviousCoordinatorsKey(tr->getDatabase()));
 		return CoordinatorsResult::SAME_NETWORK_ADDRESSES;
 	}
--- a/fdbclient/MultiVersionTransaction.actor.cpp
+++ b/fdbclient/MultiVersionTransaction.actor.cpp
@ -1548,17 +1548,19 @@ ThreadFuture<Void> MultiVersionTransaction::onError(Error const& e) {
 		auto f = tr.transaction ? tr.transaction->onError(e) : makeTimeout<Void>();
 		f = abortableFuture(f, tr.onChange);

-		return flatMapThreadFuture<Void, Void>(f, [this, e](ErrorOr<Void> ready) {
-			if (!ready.isError() || ready.getError().code() != error_code_cluster_version_changed) {
-				if (ready.isError()) {
-					return ErrorOr<ThreadFuture<Void>>(ready.getError());
-				}
-
+		return flatMapThreadFuture<Void, Void>(f, [this](ErrorOr<Void> ready) {
+			if (ready.isError() && ready.getError().code() == error_code_cluster_version_changed) {
+				// In case of a cluster version change, upgrade (or downgrade) the transaction
+				// and let it to be retried independently of the original error
+				updateTransaction();
+				return ErrorOr<ThreadFuture<Void>>(Void());
+			}
+			// In all other cases forward the result of the inner onError call
+			if (ready.isError()) {
+				return ErrorOr<ThreadFuture<Void>>(ready.getError());
+			} else {
 				return ErrorOr<ThreadFuture<Void>>(Void());
 			}
-
-			updateTransaction();
-			return ErrorOr<ThreadFuture<Void>>(onError(e));
 		});
 	}
 }
@ -2968,7 +2970,7 @@ ACTOR Future<std::string> updateClusterSharedStateMapImpl(MultiVersionApi* self,
 	// The cluster ID will be the connection record string (either a filename or the connection string itself)
 	// in versions before we could read the cluster ID.
 	state std::string clusterId = connectionRecord.toString();
-	if (dbProtocolVersion.hasClusterIdSpecialKey()) {
+	if (CLIENT_KNOBS->CLIENT_ENABLE_USING_CLUSTER_ID_KEY && dbProtocolVersion.hasClusterIdSpecialKey()) {
 		state Reference<ITransaction> tr = db->createTransaction();
 		loop {
 			try {
--- a/fdbclient/NativeAPI.actor.cpp
+++ b/fdbclient/NativeAPI.actor.cpp
@ -7110,11 +7110,11 @@ ACTOR Future<Void> readVersionBatcher(DatabaseContext* cx,
 	state Reference<Histogram> batchIntervalDist =
 	    Histogram::getHistogram("GrvBatcher"_sr,
 	                            "ClientGrvBatchInterval"_sr,
-	                            Histogram::Unit::microseconds,
+	                            Histogram::Unit::milliseconds,
 	                            0,
 	                            CLIENT_KNOBS->GRV_BATCH_TIMEOUT * 1000000 * 2);
 	state Reference<Histogram> grvReplyLatencyDist =
-	    Histogram::getHistogram("GrvBatcher"_sr, "ClientGrvReplyLatency"_sr, Histogram::Unit::microseconds);
+	    Histogram::getHistogram("GrvBatcher"_sr, "ClientGrvReplyLatency"_sr, Histogram::Unit::milliseconds);
 	state double lastRequestTime = now();

 	state TransactionTagMap<uint32_t> tags;
@ -10732,12 +10732,13 @@ ACTOR Future<Key> purgeBlobGranulesActor(Reference<DatabaseContext> db,

 			// must be aligned to blob range(s)
 			state Future<Standalone<VectorRef<KeyRangeRef>>> blobbifiedBegin =
-			    getBlobRanges(&tr, KeyRangeRef(purgeRange.begin, purgeRange.begin), 2);
+			    getBlobRanges(&tr, KeyRangeRef(purgeRange.begin, keyAfter(purgeRange.begin)), 1);
 			state Future<Standalone<VectorRef<KeyRangeRef>>> blobbifiedEnd =
-			    getBlobRanges(&tr, KeyRangeRef(purgeRange.end, purgeRange.end), 2);
+			    getBlobRanges(&tr, KeyRangeRef(purgeRange.end, keyAfter(purgeRange.end)), 1);
 			wait(success(blobbifiedBegin) && success(blobbifiedEnd));
+			// If there are no blob ranges on the boundary that's okay as we allow purging of multiple full ranges.
 			if ((!blobbifiedBegin.get().empty() && blobbifiedBegin.get().front().begin < purgeRange.begin) ||
-			    (!blobbifiedEnd.get().empty() && blobbifiedEnd.get().back().end > purgeRange.end)) {
+			    (!blobbifiedEnd.get().empty() && blobbifiedEnd.get().front().begin < purgeRange.end)) {
 				TraceEvent("UnalignedPurge")
 				    .detail("Range", range)
 				    .detail("Version", purgeVersion)
@ -10941,8 +10942,7 @@ ACTOR Future<bool> blobRestoreActor(Reference<DatabaseContext> cx, KeyRange rang
 					return false; // stop if there is in-progress restore.
 				}
 			}
-			Standalone<BlobRestoreStatus> status;
-			status.progress = 0;
+			BlobRestoreStatus status(BlobRestorePhase::INIT);
 			Value newValue = blobRestoreCommandValueFor(status);
 			tr->set(key, newValue);
 			wait(tr->commit());
--- a/fdbclient/PaxosConfigTransaction.actor.cpp
+++ b/fdbclient/PaxosConfigTransaction.actor.cpp
@ -218,8 +218,12 @@ class GetGenerationQuorum {
 					if (self->coordinatorsChangedFuture.isReady()) {
 						throw coordinators_changed();
 					}
-					wait(delayJittered(std::clamp(
-					    0.005 * (1 << std::min(retries, 30)), 0.0, CLIENT_KNOBS->TIMEOUT_RETRY_UPPER_BOUND)));
+					if (deterministicRandom()->random01() < 0.95) {
+						// Add some random jitter to prevent clients from
+						// contending.
+						wait(delayJittered(std::clamp(
+						    0.006 * (1 << std::min(retries, 30)), 0.0, CLIENT_KNOBS->TIMEOUT_RETRY_UPPER_BOUND)));
+					}
 					if (deterministicRandom()->random01() < 0.05) {
 						// Randomly inject a delay of at least the generation
 						// reply timeout, to try to prevent contention between
--- a/fdbclient/Schemas.cpp
+++ b/fdbclient/Schemas.cpp
@ -855,7 +855,8 @@ const KeyRef JSONSchemas::statusSchema = R"statusSchema(
         "encryption_at_rest_mode": {
             "$enum":[
             "disabled",
-             "aes_256_ctr"
+             "domain_aware",
+             "cluster_aware"
         ]}
      },
      "consistency_scan_info":{
@ -963,11 +964,18 @@ const KeyRef JSONSchemas::statusSchema = R"statusSchema(
            }
         }
      },
-      "tenants":{
-         "num_tenants":0
-      },
      "metacluster" : {
-         "cluster_type" : "standalone"
+         "cluster_type" : "management",
+         "metacluster_name":"metacluster1",
+         "metacluster_id":12345,
+         "data_cluster_name" : "data_cluster1",
+         "data_cluster_id" : 12346,
+         "num_data_clusters":10
+      },
+      "tenants":{
+         "num_tenants":0,
+         "num_tenant_groups":10,
+         "tenant_group_capacity":20
      }
   },
   "client":{
--- a/fdbclient/ServerKnobs.cpp
+++ b/fdbclient/ServerKnobs.cpp
@ -301,6 +301,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
 	init( TENANT_CACHE_LIST_REFRESH_INTERVAL,                      2 ); if( randomize && BUGGIFY ) TENANT_CACHE_LIST_REFRESH_INTERVAL = deterministicRandom()->randomInt(1, 10);
 	init( TENANT_CACHE_STORAGE_USAGE_REFRESH_INTERVAL,             2 ); if( randomize && BUGGIFY ) TENANT_CACHE_STORAGE_USAGE_REFRESH_INTERVAL = deterministicRandom()->randomInt(1, 10);
 	init( TENANT_CACHE_STORAGE_QUOTA_REFRESH_INTERVAL,            10 ); if( randomize && BUGGIFY ) TENANT_CACHE_STORAGE_QUOTA_REFRESH_INTERVAL = deterministicRandom()->randomInt(1, 10);
+	init( TENANT_CACHE_STORAGE_USAGE_TRACE_INTERVAL,             300 );
 	init( CP_FETCH_TENANTS_OVER_STORAGE_QUOTA_INTERVAL,            5 ); if( randomize && BUGGIFY ) CP_FETCH_TENANTS_OVER_STORAGE_QUOTA_INTERVAL = deterministicRandom()->randomInt(1, 10);

 	// TeamRemover
@ -390,19 +391,22 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
 	// If true, do not process and store RocksDB logs
 	init( ROCKSDB_MUTE_LOGS,                                    true );
 	// Use a smaller memtable in simulation to avoid OOMs.
-	int64_t memtableBytes = isSimulated ? 32 * 1024 : 512 * 1024 * 1024;
+	int64_t memtableBytes = isSimulated ? 1024 * 1024 : 512 * 1024 * 1024;
 	init( ROCKSDB_MEMTABLE_BYTES,                      memtableBytes );
 	init( ROCKSDB_LEVEL_STYLE_COMPACTION,                       true );
 	init( ROCKSDB_UNSAFE_AUTO_FSYNC,                           false );
 	init( ROCKSDB_PERIODIC_COMPACTION_SECONDS,                     0 );
 	init( ROCKSDB_PREFIX_LEN,                                      0 );
 	// If rocksdb block cache size is 0, the default 8MB is used.
-	int64_t blockCacheSize = isSimulated ? 0 : 1024 * 1024 * 1024 /* 1GB */;
+	int64_t blockCacheSize = isSimulated ? 16 * 1024 * 1024 : 1024 * 1024 * 1024 /* 1GB */;
 	init( ROCKSDB_BLOCK_CACHE_SIZE,                   blockCacheSize );
 	init( ROCKSDB_METRICS_DELAY,                                60.0 );
-	init( ROCKSDB_READ_VALUE_TIMEOUT,      isSimulated ? 5.0 : 200.0 );
-	init( ROCKSDB_READ_VALUE_PREFIX_TIMEOUT, isSimulated ? 5.0 : 200.0 );
-	init( ROCKSDB_READ_RANGE_TIMEOUT,       isSimulated ? 5.0 : 200.0 );
+	// ROCKSDB_READ_VALUE_TIMEOUT, ROCKSDB_READ_VALUE_PREFIX_TIMEOUT, ROCKSDB_READ_RANGE_TIMEOUT knobs:
+	// In simulation, increasing the read operation timeouts to 5 minutes, as some of the tests have
+	// very high load and single read thread cannot process all the load within the timeouts.
+	init( ROCKSDB_READ_VALUE_TIMEOUT,                            5.0 ); if (isSimulated) ROCKSDB_READ_VALUE_TIMEOUT = 5 * 60;
+	init( ROCKSDB_READ_VALUE_PREFIX_TIMEOUT,                     5.0 ); if (isSimulated) ROCKSDB_READ_VALUE_PREFIX_TIMEOUT = 5 * 60;
+	init( ROCKSDB_READ_RANGE_TIMEOUT,                            5.0 ); if (isSimulated) ROCKSDB_READ_RANGE_TIMEOUT = 5 * 60;
 	init( ROCKSDB_READ_QUEUE_WAIT,                               1.0 );
 	init( ROCKSDB_READ_QUEUE_HARD_MAX,                          1000 );
 	init( ROCKSDB_READ_QUEUE_SOFT_MAX,                           500 );
@ -436,6 +440,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
 	init( ROCKSDB_SINGLEKEY_DELETES_BYTES_LIMIT,              200000 ); // 200KB
 	init( ROCKSDB_ENABLE_CLEAR_RANGE_EAGER_READS,               true ); if( randomize && BUGGIFY ) ROCKSDB_ENABLE_CLEAR_RANGE_EAGER_READS = deterministicRandom()->coinflip();
 	// ROCKSDB_STATS_LEVEL=1 indicates rocksdb::StatsLevel::kExceptHistogramOrTimers
+	// Refer StatsLevel: https://github.com/facebook/rocksdb/blob/main/include/rocksdb/statistics.h#L594
 	init( ROCKSDB_STATS_LEVEL,                                     1 ); if( randomize && BUGGIFY ) ROCKSDB_STATS_LEVEL = deterministicRandom()->randomInt(0, 6);
 	// Can commit will delay ROCKSDB_CAN_COMMIT_DELAY_ON_OVERLOAD seconds for
 	// ROCKSDB_CAN_COMMIT_DELAY_TIMES_ON_OVERLOAD times, if rocksdb overloaded.
@ -555,7 +560,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
 	init( BACKUP_TIMEOUT,                                        0.4 );
 	init( BACKUP_NOOP_POP_DELAY,                                 5.0 );
 	init( BACKUP_FILE_BLOCK_BYTES,                       1024 * 1024 );
-	init( BACKUP_LOCK_BYTES,                                     3e9 ); if(randomize && BUGGIFY) BACKUP_LOCK_BYTES = deterministicRandom()->randomInt(1024, 4096) * 15 * 1024;
+	init( BACKUP_LOCK_BYTES,                                     3e9 ); if(randomize && BUGGIFY) BACKUP_LOCK_BYTES = deterministicRandom()->randomInt(1024, 4096) * 30 * 1024;
 	init( BACKUP_UPLOAD_DELAY,                                  10.0 ); if(randomize && BUGGIFY) BACKUP_UPLOAD_DELAY = deterministicRandom()->random01() * 60;

 	//Cluster Controller
--- a/fdbclient/SystemData.cpp
+++ b/fdbclient/SystemData.cpp
@ -876,6 +876,7 @@ const KeyRef triggerDDTeamInfoPrintKey("\xff/triggerDDTeamInfoPrint"_sr);
 const KeyRef consistencyScanInfoKey = "\xff/consistencyScanInfo"_sr;

 const KeyRef encryptionAtRestModeConfKey("\xff/conf/encryption_at_rest_mode"_sr);
+const KeyRef tenantModeConfKey("\xff/conf/tenant_mode"_sr);

 const KeyRangeRef excludedServersKeys("\xff/conf/excluded/"_sr, "\xff/conf/excluded0"_sr);
 const KeyRef excludedServersPrefix = excludedServersKeys.begin;
--- a/fdbclient/Tracing.actor.cpp
+++ b/fdbclient/Tracing.actor.cpp
@ -355,21 +355,25 @@ Span& Span::operator=(Span&& o) {
 		g_tracer->trace(*this);
 	}
 	arena = std::move(o.arena);
-	context = o.context;
-	parentContext = o.parentContext;
-	begin = o.begin;
-	end = o.end;
-	location = o.location;
-	links = std::move(o.links);
-	events = std::move(o.events);
-	status = o.status;
-	kind = o.kind;
-	o.context = SpanContext();
-	o.parentContext = SpanContext();
-	o.kind = SpanKind::INTERNAL;
-	o.begin = 0.0;
-	o.end = 0.0;
-	o.status = SpanStatus::UNSET;
+	// All memory referenced in *Ref fields of Span is now (potentially)
+	// invalid, and o no longer has ownership of any memory referenced by *Ref
+	// fields of o. We must ensure that o no longer references any memory it no
+	// longer owns, and that *this no longer references any memory it no longer
+	// owns. Not every field references arena memory, but this std::exchange
+	// pattern provides a nice template for getting this right in a concise way
+	// should we add more fields to Span.
+
+	attributes = std::exchange(o.attributes, decltype(o.attributes)());
+	begin = std::exchange(o.begin, decltype(o.begin)());
+	context = std::exchange(o.context, decltype(o.context)());
+	end = std::exchange(o.end, decltype(o.end)());
+	events = std::exchange(o.events, decltype(o.events)());
+	kind = std::exchange(o.kind, decltype(o.kind)());
+	links = std::exchange(o.links, decltype(o.links)());
+	location = std::exchange(o.location, decltype(o.location)());
+	parentContext = std::exchange(o.parentContext, decltype(o.parentContext)());
+	status = std::exchange(o.status, decltype(o.status)());
+
 	return *this;
 }

--- a/fdbclient/include/fdbclient/BlobGranuleCommon.h
+++ b/fdbclient/include/fdbclient/BlobGranuleCommon.h
@ -314,13 +314,19 @@ struct BlobManifest {
 };

 // Defines blob restore status
+enum BlobRestorePhase { INIT = 0, LOAD_MANIFEST = 1, MANIFEST_DONE = 2, MIGRATE = 3, APPLY_MLOGS = 4, DONE = 5 };
 struct BlobRestoreStatus {
 	constexpr static FileIdentifier file_identifier = 378657;
+	BlobRestorePhase phase;
 	int progress;

+	BlobRestoreStatus() : phase(BlobRestorePhase::INIT){};
+	BlobRestoreStatus(BlobRestorePhase pha) : phase(pha), progress(0){};
+	BlobRestoreStatus(BlobRestorePhase pha, int prog) : phase(pha), progress(prog){};
+
 	template <class Ar>
 	void serialize(Ar& ar) {
-		serializer(ar, progress);
+		serializer(ar, phase, progress);
 	}
 };

--- a/fdbclient/include/fdbclient/ClientKnobs.h
+++ b/fdbclient/include/fdbclient/ClientKnobs.h
@ -289,6 +289,7 @@ public:
 	double METACLUSTER_ASSIGNMENT_FIRST_CHOICE_DELAY;
 	double METACLUSTER_ASSIGNMENT_AVAILABILITY_TIMEOUT;
 	int TENANT_ENTRY_CACHE_LIST_REFRESH_INTERVAL; // How often the TenantEntryCache is refreshed
+	bool CLIENT_ENABLE_USING_CLUSTER_ID_KEY;

 	// Encryption-at-rest
 	bool ENABLE_ENCRYPTION_CPU_TIME_LOGGING;
--- a/fdbclient/include/fdbclient/FDBTypes.h
+++ b/fdbclient/include/fdbclient/FDBTypes.h
@ -1464,7 +1464,7 @@ struct TenantMode {
 struct EncryptionAtRestMode {
 	// These enumerated values are stored in the database configuration, so can NEVER be changed.  Only add new ones
 	// just before END.
-	enum Mode { DISABLED = 0, AES_256_CTR = 1, END = 2 };
+	enum Mode { DISABLED = 0, DOMAIN_AWARE = 1, CLUSTER_AWARE = 2, END = 3 };

 	EncryptionAtRestMode() : mode(DISABLED) {}
 	EncryptionAtRestMode(Mode mode) : mode(mode) {
@ -1483,14 +1483,30 @@ struct EncryptionAtRestMode {
 		switch (mode) {
 		case DISABLED:
 			return "disabled";
-		case AES_256_CTR:
-			return "aes_256_ctr";
+		case DOMAIN_AWARE:
+			return "domain_aware";
+		case CLUSTER_AWARE:
+			return "cluster_aware";
 		default:
 			ASSERT(false);
 		}
 		return "";
 	}

+	static EncryptionAtRestMode fromString(std::string mode) {
+		if (mode == "disabled") {
+			return EncryptionAtRestMode::DISABLED;
+		} else if (mode == "cluster_aware") {
+			return EncryptionAtRestMode::CLUSTER_AWARE;
+		} else if (mode == "domain_aware") {
+			return EncryptionAtRestMode::DOMAIN_AWARE;
+		} else {
+			TraceEvent(SevError, "UnknownEncryptMode").detail("EncryptMode", mode);
+			ASSERT(false);
+			throw internal_error();
+		}
+	}
+
 	Value toValue() const { return ValueRef(format("%d", (int)mode)); }

 	bool isEquals(const EncryptionAtRestMode& e) const { return this->mode == e.mode; }
--- a/fdbclient/include/fdbclient/GenericManagementAPI.actor.h
+++ b/fdbclient/include/fdbclient/GenericManagementAPI.actor.h
@ -133,6 +133,11 @@ bool isCompleteConfiguration(std::map<std::string, std::string> const& options);

 ConfigureAutoResult parseConfig(StatusObject const& status);

+bool isEncryptionAtRestModeConfigValid(Optional<DatabaseConfiguration> oldConfiguration,
+                                       std::map<std::string, std::string> newConfig,
+                                       bool creating);
+bool isTenantModeModeConfigValid(DatabaseConfiguration oldConfiguration, DatabaseConfiguration newConfiguration);
+
 // Management API written in template code to support both IClientAPI and NativeAPI
 namespace ManagementAPI {

@ -276,6 +281,9 @@ Future<ConfigurationResult> changeConfig(Reference<DB> db, std::map<std::string,
 		if (!isCompleteConfiguration(m)) {
 			return ConfigurationResult::INCOMPLETE_CONFIGURATION;
 		}
+		if (!isEncryptionAtRestModeConfigValid(Optional<DatabaseConfiguration>(), m, creating)) {
+			return ConfigurationResult::INVALID_CONFIGURATION;
+		}
 	} else if (m.count(encryptionAtRestModeConfKey.toString()) != 0) {
 		// Encryption data at-rest mode can be set only at the time of database creation
 		return ConfigurationResult::ENCRYPTION_AT_REST_MODE_ALREADY_SET;
@ -322,6 +330,12 @@ Future<ConfigurationResult> changeConfig(Reference<DB> db, std::map<std::string,
 					if (!newConfig.isValid()) {
 						return ConfigurationResult::INVALID_CONFIGURATION;
 					}
+					if (!isEncryptionAtRestModeConfigValid(oldConfig, m, creating)) {
+						return ConfigurationResult::INVALID_CONFIGURATION;
+					}
+					if (!isTenantModeModeConfigValid(oldConfig, newConfig)) {
+						return ConfigurationResult::INVALID_CONFIGURATION;
+					}

 					if (newConfig.tLogPolicy->attributeKeys().count("dcid") && newConfig.regions.size() > 0) {
 						return ConfigurationResult::REGION_REPLICATION_MISMATCH;
--- a/fdbclient/include/fdbclient/ServerKnobs.h
+++ b/fdbclient/include/fdbclient/ServerKnobs.h
@ -244,6 +244,8 @@ public:
 	                                                 // in the TenantCache
 	int TENANT_CACHE_STORAGE_QUOTA_REFRESH_INTERVAL; // How often the storage quota allocated to each tenant is
 	                                                 // refreshed in the TenantCache
+	int TENANT_CACHE_STORAGE_USAGE_TRACE_INTERVAL; // The minimum interval between consecutive trace events logging the
+	                                               // storage bytes used by a tenant group
 	int CP_FETCH_TENANTS_OVER_STORAGE_QUOTA_INTERVAL; // How often the commit proxies send requests to the data
 	                                                  // distributor to fetch the list of tenants over storage quota

@ -313,7 +315,7 @@ public:
 	// KeyValueStoreRocksDB
 	bool ROCKSDB_SET_READ_TIMEOUT;
 	bool ROCKSDB_LEVEL_COMPACTION_DYNAMIC_LEVEL_BYTES;
-	int ROCKSDB_SUGGEST_COMPACT_CLEAR_RANGE;
+	bool ROCKSDB_SUGGEST_COMPACT_CLEAR_RANGE;
 	int ROCKSDB_READ_RANGE_ROW_LIMIT;
 	int ROCKSDB_READER_THREAD_PRIORITY;
 	int ROCKSDB_WRITER_THREAD_PRIORITY;
--- a/fdbclient/include/fdbclient/SystemData.h
+++ b/fdbclient/include/fdbclient/SystemData.h
@ -284,6 +284,9 @@ extern const KeyRef triggerDDTeamInfoPrintKey;
 // Encryption data at-rest config key
 extern const KeyRef encryptionAtRestModeConfKey;

+// Tenant mode config key
+extern const KeyRef tenantModeConfKey;
+
 //	The differences between excluded and failed can be found in "command-line-interface.rst"
 //	and in the help message of the fdbcli command "exclude".

--- a/fdbclient/include/fdbclient/Tenant.h
+++ b/fdbclient/include/fdbclient/Tenant.h
@ -235,7 +235,6 @@ struct TenantNameUniqueSet {
 		return tenantNames.empty();
 	}
 };
-
-class TenantPrefixIndex : public VersionedMap<Key, TenantNameUniqueSet>, public ReferenceCounted<TenantPrefixIndex> {};
+typedef VersionedMap<Key, TenantNameUniqueSet> TenantPrefixIndex;

 #endif
--- a/fdbrpc/HTTP.actor.cpp
+++ b/fdbrpc/HTTP.actor.cpp
@ -243,7 +243,7 @@ ACTOR Future<Void> read_http_response(Reference<HTTP::Response> r, Reference<ICo

 	auto i = r->headers.find("Content-Length");
 	if (i != r->headers.end())
-		r->contentLen = atoi(i->second.c_str());
+		r->contentLen = strtoll(i->second.c_str(), NULL, 10);
 	else
 		r->contentLen = -1; // Content length unknown

@ -481,7 +481,7 @@ ACTOR Future<Reference<HTTP::Response>> doRequest(Reference<IConnection> conn,
 		}

 		if (FLOW_KNOBS->HTTP_VERBOSE_LEVEL > 0) {
-			printf("[%s] HTTP %scode=%d early=%d, time=%fs %s %s contentLen=%d [%d out, response content len %d]\n",
+			printf("[%s] HTTP %scode=%d early=%d, time=%fs %s %s contentLen=%d [%d out, response content len %lld]\n",
 			       conn->getDebugID().toString().c_str(),
 			       (err.present() ? format("*ERROR*=%s ", err.get().name()).c_str() : ""),
 			       r->code,
@ -491,7 +491,7 @@ ACTOR Future<Reference<HTTP::Response>> doRequest(Reference<IConnection> conn,
 			       resource.c_str(),
 			       contentLen,
 			       total_sent,
-			       (int)r->contentLen);
+			       r->contentLen);
 		}
 		if (FLOW_KNOBS->HTTP_VERBOSE_LEVEL > 2) {
 			printf("[%s] HTTP RESPONSE:  %s %s\n%s\n",
--- a/fdbrpc/include/fdbrpc/AsyncFileReadAhead.actor.h
+++ b/fdbrpc/include/fdbrpc/AsyncFileReadAhead.actor.h
@ -102,7 +102,7 @@ public:
 			// If not found, start the read.
 			if (i == f->m_blocks.end() || (i->second.isValid() && i->second.isError())) {
 				// printf("starting read of %s block %d\n", f->getFilename().c_str(), blockNum);
-				fblock = readBlock(f.getPtr(), f->m_block_size, f->m_block_size * blockNum);
+				fblock = readBlock(f.getPtr(), f->m_block_size, (int64_t)f->m_block_size * blockNum);
 				f->m_blocks[blockNum] = fblock;
 			} else
 				fblock = i->second;
@ -121,7 +121,7 @@ public:
 			// Calculate the block-relative read range.  It's a given that the offset / length range touches this block
 			// so readStart will never be greater than blocksize (though it could be past the actual end of a short
 			// block).
-			int64_t blockStart = blockNum * f->m_block_size;
+			int64_t blockStart = (int64_t)blockNum * f->m_block_size;
 			int64_t readStart = std::max<int64_t>(0, offset - blockStart);
 			int64_t readEnd = std::min<int64_t>(f->m_block_size, offset + length - blockStart);
 			int rlen = readEnd - readStart;
--- a/fdbrpc/include/fdbrpc/simulator.h
+++ b/fdbrpc/include/fdbrpc/simulator.h
@ -56,7 +56,7 @@ public:
 		FailDisk,
 		RebootAndDelete,
 		RebootProcessAndDelete,
-		RebootProcessAndSwitch,
+		RebootProcessAndSwitch, // Reboot and switch cluster file
 		Reboot,
 		RebootProcess,
 		None
--- a/fdbrpc/sim2.actor.cpp
+++ b/fdbrpc/sim2.actor.cpp
@ -63,7 +63,8 @@ ISimulator::ISimulator()
  : desiredCoordinators(1), physicalDatacenters(1), processesPerMachine(0), listenersPerProcess(1), usableRegions(1),
    allowLogSetKills(true), tssMode(TSSMode::Disabled), configDBType(ConfigDBType::DISABLED), isStopped(false),
    lastConnectionFailure(0), connectionFailuresDisableDuration(0), speedUpSimulation(false),
-    backupAgents(BackupAgentType::WaitForType), drAgents(BackupAgentType::WaitForType), allSwapsDisabled(false) {}
+    backupAgents(BackupAgentType::WaitForType), drAgents(BackupAgentType::WaitForType), allSwapsDisabled(false),
+    blobGranulesEnabled(false) {}
 ISimulator::~ISimulator() = default;

 bool simulator_should_inject_fault(const char* context, const char* file, int line, int error_code) {
--- a/fdbserver/BackupWorker.actor.cpp
+++ b/fdbserver/BackupWorker.actor.cpp
@ -49,8 +49,8 @@ struct VersionedMessage {
 	Arena decryptArena; // Arena used for decrypt buffer.
 	size_t bytes; // arena's size when inserted, which can grow afterwards

-	VersionedMessage(LogMessageVersion v, StringRef m, const VectorRef<Tag>& t, const Arena& a)
-	  : version(v), message(m), tags(t), arena(a), bytes(a.getSize()) {}
+	VersionedMessage(LogMessageVersion v, StringRef m, const VectorRef<Tag>& t, const Arena& a, size_t n)
+	  : version(v), message(m), tags(t), arena(a), bytes(n) {}
 	Version getVersion() const { return version.version; }
 	uint32_t getSubVersion() const { return version.sub; }

@ -977,15 +977,17 @@ ACTOR Future<Void> pullAsyncData(BackupData* self) {
 		// Note we aggressively peek (uncommitted) messages, but only committed
 		// messages/mutations will be flushed to disk/blob in uploadData().
 		while (r->hasMessage()) {
+			state size_t takeBytes = 0;
 			if (!prev.sameArena(r->arena())) {
 				TraceEvent(SevDebugMemory, "BackupWorkerMemory", self->myId)
 				    .detail("Take", r->arena().getSize())
 				    .detail("Current", self->lock->activePermits());

-				wait(self->lock->take(TaskPriority::DefaultYield, r->arena().getSize()));
+				takeBytes = r->arena().getSize(); // more bytes can be allocated after the wait.
+				wait(self->lock->take(TaskPriority::DefaultYield, takeBytes));
 				prev = r->arena();
 			}
-			self->messages.emplace_back(r->version(), r->getMessage(), r->getTags(), r->arena());
+			self->messages.emplace_back(r->version(), r->getMessage(), r->getTags(), r->arena(), takeBytes);
 			r->nextMessage();
 		}

--- a/fdbserver/BlobManager.actor.cpp
+++ b/fdbserver/BlobManager.actor.cpp
@ -3547,10 +3547,16 @@ ACTOR Future<Void> recoverBlobManager(Reference<BlobManagerData> bmData) {
 	bool isFullRestore = wait(isFullRestoreMode(bmData->db, normalKeys));
 	bmData->isFullRestoreMode = isFullRestore;
 	if (bmData->isFullRestoreMode) {
+		BlobRestoreStatus initStatus(BlobRestorePhase::LOAD_MANIFEST);
+		wait(updateRestoreStatus(bmData->db, normalKeys, initStatus));
+
 		wait(loadManifest(bmData->db, bmData->bstore));

 		int64_t epoc = wait(lastBlobEpoc(bmData->db, bmData->bstore));
 		wait(updateEpoch(bmData, epoc + 1));
+
+		BlobRestoreStatus completedStatus(BlobRestorePhase::MANIFEST_DONE);
+		wait(updateRestoreStatus(bmData->db, normalKeys, completedStatus));
 	}

 	state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(bmData->db);
--- a/fdbserver/BlobManifest.actor.cpp
+++ b/fdbserver/BlobManifest.actor.cpp
@ -545,7 +545,7 @@ ACTOR Future<bool> isFullRestoreMode(Database db, KeyRangeRef keys) {
 					KeyRange keyRange = decodeBlobRestoreCommandKeyFor(r.key);
 					if (keyRange.contains(keys)) {
 						Standalone<BlobRestoreStatus> status = decodeBlobRestoreStatus(r.value);
-						return status.progress < 100; // progress is less than 100
+						return status.phase < BlobRestorePhase::DONE;
 					}
 				}
 				if (!ranges.more) {
@ -563,3 +563,44 @@ ACTOR Future<bool> isFullRestoreMode(Database db, KeyRangeRef keys) {
 		}
 	}
 }
+
+// Update restore status
+ACTOR Future<Void> updateRestoreStatus(Database db, KeyRangeRef range, BlobRestoreStatus status) {
+	state Transaction tr(db);
+	loop {
+		try {
+			tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
+			tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
+			tr.setOption(FDBTransactionOptions::LOCK_AWARE);
+			Key key = blobRestoreCommandKeyFor(range);
+			Value value = blobRestoreCommandValueFor(status);
+			tr.set(key, value);
+			wait(tr.commit());
+			return Void();
+		} catch (Error& e) {
+			wait(tr.onError(e));
+		}
+	}
+}
+
+// Get restore status
+ACTOR Future<Optional<BlobRestoreStatus>> getRestoreStatus(Database db, KeyRangeRef range) {
+	state Transaction tr(db);
+	loop {
+		try {
+			tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
+			tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
+			tr.setOption(FDBTransactionOptions::LOCK_AWARE);
+			Key key = blobRestoreCommandKeyFor(range);
+			Optional<Value> value = wait(tr.get(key));
+			Optional<BlobRestoreStatus> result;
+			if (value.present()) {
+				Standalone<BlobRestoreStatus> status = decodeBlobRestoreStatus(value.get());
+				result = status;
+			}
+			return result;
+		} catch (Error& e) {
+			wait(tr.onError(e));
+		}
+	}
+}
--- a/fdbserver/BlobMigrator.actor.cpp
+++ b/fdbserver/BlobMigrator.actor.cpp
@ -18,6 +18,7 @@
 * limitations under the License.
 */

+#include "fdbclient/BlobGranuleCommon.h"
 #include "flow/ActorCollection.h"
 #include "flow/FastRef.h"
 #include "flow/IRandom.h"
@ -75,8 +76,8 @@ private:
 	// Check if blob manifest is loaded so that blob migration can start
 	ACTOR static Future<Void> checkIfReadyForMigration(Reference<BlobMigrator> self) {
 		loop {
-			bool isFullRestore = wait(isFullRestoreMode(self->db_, normalKeys));
-			if (isFullRestore) {
+			Optional<BlobRestoreStatus> status = wait(getRestoreStatus(self->db_, normalKeys));
+			if (canStartMigration(status)) {
 				BlobGranuleRestoreVersionVector granules = wait(listBlobGranules(self->db_, self->blobConn_));
 				if (!granules.empty()) {
 					self->blobGranules_ = granules;
@ -87,6 +88,9 @@ private:
 						    .detail("Version", granule.version)
 						    .detail("SizeInBytes", granule.sizeInBytes);
 					}
+
+					BlobRestoreStatus status(BlobRestorePhase::MIGRATE, 0);
+					wait(updateRestoreStatus(self->db_, normalKeys, status));
 					return Void();
 				}
 			}
@ -94,6 +98,15 @@ private:
 		}
 	}

+	// Check if we should start migration. Migration can be started after manifest is fully loaded
+	static bool canStartMigration(Optional<BlobRestoreStatus> status) {
+		if (status.present()) {
+			BlobRestoreStatus value = status.get();
+			return value.phase == BlobRestorePhase::MANIFEST_DONE; // manifest is loaded successfully
+		}
+		return false;
+	}
+
 	// Prepare for data migration for given key range.
 	ACTOR static Future<Void> prepare(Reference<BlobMigrator> self, KeyRangeRef keys) {
 		// Register as a storage server, so that DataDistributor could start data movement after
@ -120,8 +133,8 @@ private:
 			tr.setOption(FDBTransactionOptions::LOCK_AWARE);
 			try {
 				state Value value = keyServersValue(std::vector<UID>({ serverUID }), std::vector<UID>(), UID(), UID());
-				wait(krmSetRange(&tr, keyServersPrefix, keys, value));
-				wait(krmSetRange(&tr, serverKeysPrefixFor(serverUID), keys, serverKeysTrue));
+				wait(krmSetRangeCoalescing(&tr, keyServersPrefix, keys, allKeys, value));
+				wait(krmSetRangeCoalescing(&tr, serverKeysPrefixFor(serverUID), keys, allKeys, serverKeysTrue));
 				wait(tr.commit());
 				dprint("Assign {} to server {}\n", normalKeys.toString(), serverUID.toString());
 				return Void();
@ -152,7 +165,7 @@ private:
 						}
 					}
 					if (owning) {
-						wait(krmSetRange(&tr, serverKeysPrefixFor(id), keys, serverKeysFalse));
+						wait(krmSetRangeCoalescing(&tr, serverKeysPrefixFor(id), keys, allKeys, serverKeysFalse));
 						dprint("Unassign {} from storage server {}\n", keys.toString(), id.toString());
 						TraceEvent("UnassignKeys").detail("Keys", keys.toString()).detail("From", id.toString());
 					}
@ -169,8 +182,12 @@ private:
 	ACTOR static Future<Void> logProgress(Reference<BlobMigrator> self) {
 		loop {
 			bool done = wait(checkProgress(self));
-			if (done)
+			if (done) {
+				BlobRestoreStatus status(BlobRestorePhase::DONE);
+				wait(updateRestoreStatus(self->db_, normalKeys, status));
+
 				return Void();
+			}
 			wait(delay(SERVER_KNOBS->BLOB_MIGRATOR_CHECK_INTERVAL));
 		}
 	}
@ -205,7 +222,8 @@ private:
 				state bool done = incompleted == 0;
 				dprint("Migration progress :{}%. done {}\n", progress, done);
 				TraceEvent("BlobMigratorProgress").detail("Progress", progress).detail("Done", done);
-				wait(updateProgress(self, normalKeys, progress));
+				BlobRestoreStatus status(BlobRestorePhase::MIGRATE, progress);
+				wait(updateRestoreStatus(self->db_, normalKeys, status));
 				return done;
 			} catch (Error& e) {
 				wait(tr.onError(e));
@ -213,32 +231,6 @@ private:
 		}
 	}

-	// Update restore progress
-	ACTOR static Future<Void> updateProgress(Reference<BlobMigrator> self, KeyRangeRef range, int progress) {
-		state Transaction tr(self->db_);
-		loop {
-			try {
-				tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
-				tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
-				tr.setOption(FDBTransactionOptions::LOCK_AWARE);
-				state Key key = blobRestoreCommandKeyFor(range);
-				Optional<Value> value = wait(tr.get(key));
-				if (value.present()) {
-					Standalone<BlobRestoreStatus> status = decodeBlobRestoreStatus(value.get());
-					if (progress > status.progress) {
-						status.progress = progress;
-						Value updatedValue = blobRestoreCommandValueFor(status);
-						tr.set(key, updatedValue);
-						wait(tr.commit());
-					}
-				}
-				return Void();
-			} catch (Error& e) {
-				wait(tr.onError(e));
-			}
-		}
-	}
-
 	// Advance version, so that future commits will have a larger version than the restored data
 	ACTOR static Future<Void> advanceVersion(Reference<BlobMigrator> self) {
 		state Transaction tr(self->db_);
--- a/fdbserver/ClusterController.actor.cpp
+++ b/fdbserver/ClusterController.actor.cpp
@ -26,6 +26,7 @@
 #include <tuple>
 #include <vector>

+#include "fdbclient/BlobGranuleCommon.h"
 #include "fdbclient/FDBTypes.h"
 #include "fdbclient/SystemData.h"
 #include "fdbclient/DatabaseContext.h"
@ -2565,8 +2566,8 @@ ACTOR Future<Void> watchBlobRestoreCommand(ClusterControllerData* self) {
 			Optional<Value> blobRestoreCommand = wait(tr->get(blobRestoreCommandKey));
 			if (blobRestoreCommand.present()) {
 				Standalone<BlobRestoreStatus> status = decodeBlobRestoreStatus(blobRestoreCommand.get());
-				TraceEvent("WatchBlobRestoreCommand").detail("Progress", status.progress);
-				if (status.progress == 0) {
+				TraceEvent("WatchBlobRestoreCommand").detail("Progress", status.progress).detail("Phase", status.phase);
+				if (status.phase == BlobRestorePhase::INIT) {
 					self->db.blobRestoreEnabled.set(true);
 					if (self->db.blobGranulesEnabled.get()) {
 						const auto& blobManager = self->db.serverInfo->get().blobManager;
--- a/fdbserver/ClusterRecovery.actor.cpp
+++ b/fdbserver/ClusterRecovery.actor.cpp
@ -435,7 +435,7 @@ namespace {
 EncryptionAtRestMode getEncryptionAtRest() {
 	// TODO: Use db-config encryption config to determine cluster encryption status
 	if (SERVER_KNOBS->ENABLE_ENCRYPTION) {
-		return EncryptionAtRestMode(EncryptionAtRestMode::Mode::AES_256_CTR);
+		return EncryptionAtRestMode(EncryptionAtRestMode::Mode::DOMAIN_AWARE);
 	} else {
 		return EncryptionAtRestMode();
 	}
--- a/fdbserver/CommitProxyServer.actor.cpp
+++ b/fdbserver/CommitProxyServer.actor.cpp
@ -2910,7 +2910,7 @@ ACTOR Future<Void> commitProxyServerCore(CommitProxyInterface proxy,
 	ASSERT(commitData.resolvers.size() != 0);
 	for (int i = 0; i < commitData.resolvers.size(); ++i) {
 		commitData.stats.resolverDist.push_back(Histogram::getHistogram(
-		    "CommitProxy"_sr, "ToResolver_" + commitData.resolvers[i].id().toString(), Histogram::Unit::microseconds));
+		    "CommitProxy"_sr, "ToResolver_" + commitData.resolvers[i].id().toString(), Histogram::Unit::milliseconds));
 	}

 	// Initialize keyResolvers map
--- a/fdbserver/ConfigBroadcaster.actor.cpp
+++ b/fdbserver/ConfigBroadcaster.actor.cpp
@ -92,10 +92,10 @@ class ConfigBroadcasterImpl {

 	// Used to read a snapshot from the previous coordinators after a change
 	// coordinators command.
-	Version maxLastSeenVersion = ::invalidVersion;
 	Future<Optional<Value>> previousCoordinatorsFuture;
 	std::unique_ptr<IConfigConsumer> previousCoordinatorsConsumer;
 	Future<Void> previousCoordinatorsSnapshotFuture;
+	Version largestConfigNodeVersion{ ::invalidVersion };

 	UID id;
 	CounterCollection cc;
@ -106,6 +106,7 @@ class ConfigBroadcasterImpl {
 	Future<Void> logger;

 	int coordinators = 0;
+	std::unordered_set<NetworkAddress> registeredConfigNodes;
 	std::unordered_set<NetworkAddress> activeConfigNodes;
 	std::unordered_set<NetworkAddress> registrationResponses;
 	std::unordered_set<NetworkAddress> registrationResponsesUnregistered;
@ -268,7 +269,7 @@ class ConfigBroadcasterImpl {
 		// Ask the registering ConfigNode whether it has registered in the past.
 		state ConfigBroadcastRegisteredReply reply = wait(
 		    brokenPromiseToNever(configBroadcastInterface.registered.getReply(ConfigBroadcastRegisteredRequest{})));
-		self->maxLastSeenVersion = std::max(self->maxLastSeenVersion, reply.lastSeenVersion);
+		self->largestConfigNodeVersion = std::max(self->largestConfigNodeVersion, reply.lastSeenVersion);
 		state bool registered = reply.registered;
 		TraceEvent("ConfigBroadcasterRegisterNodeReceivedRegistrationReply", self->id)
 		    .detail("Address", address)
@ -302,6 +303,7 @@ class ConfigBroadcasterImpl {
 		int nodesTillQuorum = self->coordinators / 2 + 1 - (int)self->activeConfigNodes.size();

 		if (registered) {
+			self->registeredConfigNodes.insert(address);
 			self->activeConfigNodes.insert(address);
 			self->disallowUnregistered = true;
 		} else if ((self->activeConfigNodes.size() < self->coordinators / 2 + 1 && !self->disallowUnregistered) ||
@ -365,6 +367,52 @@ class ConfigBroadcasterImpl {

 		state bool sendSnapshot =
 		    self->previousCoordinatorsConsumer && reply.lastSeenVersion <= self->mostRecentVersion;
+
+		// If a coordinator change is ongoing, a quorum of ConfigNodes are
+		// already registered and the largest version at least one of those
+		// ConfigNodes knows about is greater than the version of the latest
+		// snapshot the broadcaster has, don't send a snapshot to any
+		// ConfigNodes. This could end up overwriting committed data. Consider
+		// the following scenario, with three ConfigNodes:
+		//
+		//   T=0:
+		//     A: v5
+		//   T=1:
+		//     change coordinators, new coordinators are B, C, D
+		//   T=2:
+		//     B: v5, C: v5, D: v5
+		//   T=3:
+		//     B: v5, C: v10, D: v10
+		//     (some commits happen on only C and D)
+		//     (previousCoordinatorsKey has not been cleared yet)
+		//   T=4:
+		//     D dies and loses its data
+		//   T=5:
+		//     D starts
+		//     B: v5 (registered=yes), C: v10 (registered=yes), D: v0 (registered=no)
+		//     Broadcaster: has an old snapshot, only knows about v5
+		//       self->mostRecentVersion=5
+		//   T=6:
+		//     B, C, D (re-)register with broadcaster
+		//
+		// At T=5, the broadcaster would send snapshots to B and D because the
+		// largest version they know about (5) is less than or equal to
+		// self->mostRecentVersion (5). But this would cause a majority of
+		// nodes to think v5 is the latest committed version, causing C to be
+		// rolled back, and losing commit data between versions 5 and 10.
+		//
+		// This is a special case where the coordinators are being changed.
+		// During a coordinator change, a majority of ConfigNodes being
+		// registered means the coordinator change already took place, and it
+		// is being retried due to some failure. In that case, we don't want to
+		// resend snapshots if a majority of the new ConfigNodes are
+		// registered, because they could have been accepting commits. Instead,
+		// let the rollback/rollforward algorithm update the out of date nodes.
+		if (self->previousCoordinatorsConsumer && self->largestConfigNodeVersion > self->mostRecentVersion &&
+		    self->registeredConfigNodes.size() >= self->coordinators / 2 + 1) {
+			sendSnapshot = false;
+		}
+
 		// Unregistered nodes need to wait for either:
 		//   1. A quorum of registered nodes to register and send their
 		//      snapshots, so the unregistered nodes can be rolled forward, or
--- a/fdbserver/ConfigNode.actor.cpp
+++ b/fdbserver/ConfigNode.actor.cpp
@ -234,10 +234,13 @@ class ConfigNodeImpl {
 			req.reply.sendError(process_behind()); // Reuse the process_behind error
 			return Void();
 		}
+		if (BUGGIFY) {
+			wait(delay(deterministicRandom()->random01() * 2));
+		}
 		state Standalone<VectorRef<VersionedConfigMutationRef>> versionedMutations =
-		    wait(getMutations(self, req.lastSeenVersion + 1, committedVersion));
+		    wait(getMutations(self, req.lastSeenVersion + 1, req.mostRecentVersion));
 		state Standalone<VectorRef<VersionedConfigCommitAnnotationRef>> versionedAnnotations =
-		    wait(getAnnotations(self, req.lastSeenVersion + 1, committedVersion));
+		    wait(getAnnotations(self, req.lastSeenVersion + 1, req.mostRecentVersion));
 		TraceEvent(SevInfo, "ConfigNodeSendingChanges", self->id)
 		    .detail("ReqLastSeenVersion", req.lastSeenVersion)
 		    .detail("ReqMostRecentVersion", req.mostRecentVersion)
@ -245,7 +248,7 @@ class ConfigNodeImpl {
 		    .detail("NumMutations", versionedMutations.size())
 		    .detail("NumCommits", versionedAnnotations.size());
 		++self->successfulChangeRequests;
-		req.reply.send(ConfigFollowerGetChangesReply{ committedVersion, versionedMutations, versionedAnnotations });
+		req.reply.send(ConfigFollowerGetChangesReply{ versionedMutations, versionedAnnotations });
 		return Void();
 	}

@ -520,6 +523,18 @@ class ConfigNodeImpl {
 			    ObjectReader::fromStringRef<KnobValue>(kv.value, IncludeVersion());
 		}
 		wait(store(reply.snapshotVersion, getLastCompactedVersion(self)));
+		if (req.mostRecentVersion < reply.snapshotVersion) {
+			// The version in the request can be less than the last compacted
+			// version in certain circumstances where the coordinators are
+			// being changed and the consumer reads the latest committed
+			// version from a majority of ConfigNodes before they have received
+			// up to date snapshots. This should be fine, it just means the
+			// consumer needs to fetch the latest version and retry its
+			// request.
+			CODE_PROBE(true, "ConfigNode ahead of consumer", probe::decoration::rare);
+			req.reply.sendError(version_already_compacted());
+			return Void();
+		}
 		wait(store(reply.changes, getMutations(self, reply.snapshotVersion + 1, req.mostRecentVersion)));
 		wait(store(reply.annotations, getAnnotations(self, reply.snapshotVersion + 1, req.mostRecentVersion)));
 		TraceEvent(SevInfo, "ConfigNodeGettingSnapshot", self->id)
--- a/fdbserver/DDRelocationQueue.actor.cpp
+++ b/fdbserver/DDRelocationQueue.actor.cpp
@ -1548,14 +1548,20 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,
 						if (enableShardMove && tciIndex == 1) {
 							ASSERT(physicalShardIDCandidate != UID().first() &&
 							       physicalShardIDCandidate != anonymousShardId.first());
-							Optional<ShardsAffectedByTeamFailure::Team> remoteTeamWithPhysicalShard =
+							std::pair<Optional<ShardsAffectedByTeamFailure::Team>, bool> remoteTeamWithPhysicalShard =
 							    self->physicalShardCollection->tryGetAvailableRemoteTeamWith(
 							        physicalShardIDCandidate, metrics, debugID);
-							// TODO: when we know that `physicalShardIDCandidate` exists, remote team must also exists.
-							if (remoteTeamWithPhysicalShard.present()) {
+							if (!remoteTeamWithPhysicalShard.second) {
+								// Physical shard with `physicalShardIDCandidate` is not available. Retry selecting new
+								// dst physical shard.
+								self->retryFindDstReasonCount[DDQueue::RetryFindDstReason::NoAvailablePhysicalShard]++;
+								foundTeams = false;
+								break;
+							}
+							if (remoteTeamWithPhysicalShard.first.present()) {
 								// Exists a remoteTeam in the mapping that has the physicalShardIDCandidate
 								// use the remoteTeam with the physicalShard as the bestTeam
-								req = GetTeamRequest(remoteTeamWithPhysicalShard.get().servers);
+								req = GetTeamRequest(remoteTeamWithPhysicalShard.first.get().servers);
 							}
 						}

@ -1853,19 +1859,35 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,
 			state Error error = success();
 			state Promise<Void> dataMovementComplete;
 			// Move keys from source to destination by changing the serverKeyList and keyServerList system keys
-			state Future<Void> doMoveKeys =
-			    self->txnProcessor->moveKeys(MoveKeysParams{ rd.dataMoveId,
-			                                                 rd.keys,
-			                                                 destIds,
-			                                                 healthyIds,
-			                                                 self->lock,
-			                                                 dataMovementComplete,
-			                                                 &self->startMoveKeysParallelismLock,
-			                                                 &self->finishMoveKeysParallelismLock,
-			                                                 self->teamCollections.size() > 1,
-			                                                 relocateShardInterval.pairID,
-			                                                 ddEnabledState,
-			                                                 CancelConflictingDataMoves::False });
+			std::unique_ptr<MoveKeysParams> params;
+			if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA) {
+				params = std::make_unique<MoveKeysParams>(rd.dataMoveId,
+				                                          std::vector<KeyRange>{ rd.keys },
+				                                          destIds,
+				                                          healthyIds,
+				                                          self->lock,
+				                                          dataMovementComplete,
+				                                          &self->startMoveKeysParallelismLock,
+				                                          &self->finishMoveKeysParallelismLock,
+				                                          self->teamCollections.size() > 1,
+				                                          relocateShardInterval.pairID,
+				                                          ddEnabledState,
+				                                          CancelConflictingDataMoves::False);
+			} else {
+				params = std::make_unique<MoveKeysParams>(rd.dataMoveId,
+				                                          rd.keys,
+				                                          destIds,
+				                                          healthyIds,
+				                                          self->lock,
+				                                          dataMovementComplete,
+				                                          &self->startMoveKeysParallelismLock,
+				                                          &self->finishMoveKeysParallelismLock,
+				                                          self->teamCollections.size() > 1,
+				                                          relocateShardInterval.pairID,
+				                                          ddEnabledState,
+				                                          CancelConflictingDataMoves::False);
+			}
+			state Future<Void> doMoveKeys = self->txnProcessor->moveKeys(*params);
 			state Future<Void> pollHealth =
 			    signalledTransferComplete ? Never()
 			                              : delay(SERVER_KNOBS->HEALTH_POLL_TIME, TaskPriority::DataDistributionLaunch);
@ -1878,19 +1900,35 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,
 								healthyIds.insert(healthyIds.end(), extraIds.begin(), extraIds.end());
 								extraIds.clear();
 								ASSERT(totalIds == destIds.size()); // Sanity check the destIDs before we move keys
-								doMoveKeys =
-								    self->txnProcessor->moveKeys(MoveKeysParams{ rd.dataMoveId,
-								                                                 rd.keys,
-								                                                 destIds,
-								                                                 healthyIds,
-								                                                 self->lock,
-								                                                 Promise<Void>(),
-								                                                 &self->startMoveKeysParallelismLock,
-								                                                 &self->finishMoveKeysParallelismLock,
-								                                                 self->teamCollections.size() > 1,
-								                                                 relocateShardInterval.pairID,
-								                                                 ddEnabledState,
-								                                                 CancelConflictingDataMoves::False });
+								std::unique_ptr<MoveKeysParams> params;
+								if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA) {
+									params = std::make_unique<MoveKeysParams>(rd.dataMoveId,
+									                                          std::vector<KeyRange>{ rd.keys },
+									                                          destIds,
+									                                          healthyIds,
+									                                          self->lock,
+									                                          Promise<Void>(),
+									                                          &self->startMoveKeysParallelismLock,
+									                                          &self->finishMoveKeysParallelismLock,
+									                                          self->teamCollections.size() > 1,
+									                                          relocateShardInterval.pairID,
+									                                          ddEnabledState,
+									                                          CancelConflictingDataMoves::False);
+								} else {
+									params = std::make_unique<MoveKeysParams>(rd.dataMoveId,
+									                                          rd.keys,
+									                                          destIds,
+									                                          healthyIds,
+									                                          self->lock,
+									                                          Promise<Void>(),
+									                                          &self->startMoveKeysParallelismLock,
+									                                          &self->finishMoveKeysParallelismLock,
+									                                          self->teamCollections.size() > 1,
+									                                          relocateShardInterval.pairID,
+									                                          ddEnabledState,
+									                                          CancelConflictingDataMoves::False);
+								}
+								doMoveKeys = self->txnProcessor->moveKeys(*params);
 							} else {
 								self->fetchKeysComplete.insert(rd);
 								if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA) {
--- a/fdbserver/DDShardTracker.actor.cpp
+++ b/fdbserver/DDShardTracker.actor.cpp
@ -1756,7 +1756,7 @@ InOverSizePhysicalShard PhysicalShardCollection::isInOverSizePhysicalShard(KeyRa
 }

 // May return a problematic remote team
-Optional<ShardsAffectedByTeamFailure::Team> PhysicalShardCollection::tryGetAvailableRemoteTeamWith(
+std::pair<Optional<ShardsAffectedByTeamFailure::Team>, bool> PhysicalShardCollection::tryGetAvailableRemoteTeamWith(
    uint64_t inputPhysicalShardID,
    StorageMetrics const& moveInMetrics,
    uint64_t debugID) {
@ -1764,10 +1764,10 @@ Optional<ShardsAffectedByTeamFailure::Team> PhysicalShardCollection::tryGetAvail
 	ASSERT(SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD);
 	ASSERT(inputPhysicalShardID != anonymousShardId.first() && inputPhysicalShardID != UID().first());
 	if (physicalShardInstances.count(inputPhysicalShardID) == 0) {
-		return Optional<ShardsAffectedByTeamFailure::Team>();
+		return { Optional<ShardsAffectedByTeamFailure::Team>(), true };
 	}
 	if (!checkPhysicalShardAvailable(inputPhysicalShardID, moveInMetrics)) {
-		return Optional<ShardsAffectedByTeamFailure::Team>();
+		return { Optional<ShardsAffectedByTeamFailure::Team>(), false };
 	}
 	for (auto team : physicalShardInstances[inputPhysicalShardID].teams) {
 		if (team.primary == false) {
@ -1777,10 +1777,12 @@ Optional<ShardsAffectedByTeamFailure::Team> PhysicalShardCollection::tryGetAvail
 			    .detail("TeamSize", team.servers.size())
 			    .detail("PhysicalShardsOfTeam", convertIDsToString(teamPhysicalShardIDs[team]))
 			    .detail("DebugID", debugID);*/
-			return team;
+			return { team, true };
 		}
 	}
-	UNREACHABLE();
+	// In this case, the physical shard may not be populated in the remote region yet, e.g., we are making a
+	// configuration change to turn a single region cluster into HA mode.
+	return { Optional<ShardsAffectedByTeamFailure::Team>(), true };
 }

 // The update of PhysicalShardToTeams, Collection, keyRangePhysicalShardIDMap should be atomic
--- a/fdbserver/DDTxnProcessor.actor.cpp
+++ b/fdbserver/DDTxnProcessor.actor.cpp
@ -723,6 +723,17 @@ struct DDMockTxnProcessorImpl {
 		return Void();
 	}

+	static Future<Void> rawCheckFetchingState(DDMockTxnProcessor* self, const MoveKeysParams& params) {
+		if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA) {
+			ASSERT(params.ranges.present());
+			// TODO: make startMoveShards work with multiple ranges.
+			ASSERT(params.ranges.get().size() == 1);
+			return checkFetchingState(self, params.destinationTeam, params.ranges.get().at(0));
+		}
+		ASSERT(params.keys.present());
+		return checkFetchingState(self, params.destinationTeam, params.keys.get());
+	}
+
 	ACTOR static Future<Void> moveKeys(DDMockTxnProcessor* self, MoveKeysParams params) {
 		state std::map<UID, StorageServerInterface> tssMapping;
 		// Because SFBTF::Team requires the ID is ordered
@ -732,7 +743,7 @@ struct DDMockTxnProcessorImpl {
 		wait(self->rawStartMovement(params, tssMapping));
 		ASSERT(tssMapping.empty());

-		wait(checkFetchingState(self, params.destinationTeam, params.keys));
+		wait(rawCheckFetchingState(self, params));

 		wait(self->rawFinishMovement(params, tssMapping));
 		if (!params.dataMovementComplete.isSet())
@ -915,6 +926,16 @@ Future<std::vector<ProcessData>> DDMockTxnProcessor::getWorkers() const {
 ACTOR Future<Void> rawStartMovement(std::shared_ptr<MockGlobalState> mgs,
                                    MoveKeysParams params,
                                    std::map<UID, StorageServerInterface> tssMapping) {
+	state KeyRange keys;
+	if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA) {
+		ASSERT(params.ranges.present());
+		// TODO: make startMoveShards work with multiple ranges.
+		ASSERT(params.ranges.get().size() == 1);
+		keys = params.ranges.get().at(0);
+	} else {
+		ASSERT(params.keys.present());
+		keys = params.keys.get();
+	}
 	// There won’t be parallel rawStart or rawFinish in mock world due to the fact the following *mock* transaction code
 	// will always finish without coroutine switch.
 	ASSERT(params.startMoveKeysParallelismLock->activePermits() == 0);
@ -925,15 +946,15 @@ ACTOR Future<Void> rawStartMovement(std::shared_ptr<MockGlobalState> mgs,
 	destTeams.emplace_back(params.destinationTeam, true);
 	// invariant: the splitting and merge operation won't happen at the same moveKeys action. For example, if [a,c) [c,
 	// e) exists, the params.keys won't be [b, d).
-	auto intersectRanges = mgs->shardMapping->intersectingRanges(params.keys);
+	auto intersectRanges = mgs->shardMapping->intersectingRanges(keys);
 	// 1. splitting or just move a range. The new boundary need to be defined in startMovement
-	if (intersectRanges.begin().range().contains(params.keys)) {
-		mgs->shardMapping->defineShard(params.keys);
+	if (intersectRanges.begin().range().contains(keys)) {
+		mgs->shardMapping->defineShard(keys);
 	}
 	// 2. merge ops will coalesce the boundary in finishMovement;
-	intersectRanges = mgs->shardMapping->intersectingRanges(params.keys);
-	ASSERT(params.keys.begin == intersectRanges.begin().begin());
-	ASSERT(params.keys.end == intersectRanges.end().begin());
+	intersectRanges = mgs->shardMapping->intersectingRanges(keys);
+	ASSERT(keys.begin == intersectRanges.begin().begin());
+	ASSERT(keys.end == intersectRanges.end().begin());

 	for (auto it = intersectRanges.begin(); it != intersectRanges.end(); ++it) {
 		auto teamPair = mgs->shardMapping->getTeamsFor(it->begin());
@ -945,8 +966,8 @@ ACTOR Future<Void> rawStartMovement(std::shared_ptr<MockGlobalState> mgs,
 	    deterministicRandom()->randomInt64(SERVER_KNOBS->MIN_SHARD_BYTES, SERVER_KNOBS->MAX_SHARD_BYTES);
 	for (auto& id : params.destinationTeam) {
 		auto& server = mgs->allServers.at(id);
-		server.setShardStatus(params.keys, MockShardStatus::INFLIGHT, mgs->restrictSize);
-		server.signalFetchKeys(params.keys, randomRangeSize);
+		server.setShardStatus(keys, MockShardStatus::INFLIGHT, mgs->restrictSize);
+		server.signalFetchKeys(keys, randomRangeSize);
 	}
 	return Void();
 }
@ -959,6 +980,17 @@ Future<Void> DDMockTxnProcessor::rawStartMovement(const MoveKeysParams& params,
 ACTOR Future<Void> rawFinishMovement(std::shared_ptr<MockGlobalState> mgs,
                                     MoveKeysParams params,
                                     std::map<UID, StorageServerInterface> tssMapping) {
+	state KeyRange keys;
+	if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA) {
+		ASSERT(params.ranges.present());
+		// TODO: make startMoveShards work with multiple ranges.
+		ASSERT(params.ranges.get().size() == 1);
+		keys = params.ranges.get().at(0);
+	} else {
+		ASSERT(params.keys.present());
+		keys = params.keys.get();
+	}
+
 	// There won’t be parallel rawStart or rawFinish in mock world due to the fact the following *mock* transaction code
 	// will always finish without coroutine switch.
 	ASSERT(params.finishMoveKeysParallelismLock->activePermits() == 0);
@ -966,7 +998,7 @@ ACTOR Future<Void> rawFinishMovement(std::shared_ptr<MockGlobalState> mgs,
 	state FlowLock::Releaser releaser(*params.finishMoveKeysParallelismLock);

 	// get source and dest teams
-	auto [destTeams, srcTeams] = mgs->shardMapping->getTeamsForFirstShard(params.keys);
+	auto [destTeams, srcTeams] = mgs->shardMapping->getTeamsForFirstShard(keys);

 	ASSERT_EQ(destTeams.size(), 1); // Will the multi-region or dynamic replica make destTeam.size() > 1?
 	if (destTeams.front() != ShardsAffectedByTeamFailure::Team{ params.destinationTeam, true }) {
@ -978,7 +1010,7 @@ ACTOR Future<Void> rawFinishMovement(std::shared_ptr<MockGlobalState> mgs,
 	}

 	for (auto& id : params.destinationTeam) {
-		mgs->allServers.at(id).setShardStatus(params.keys, MockShardStatus::COMPLETED, mgs->restrictSize);
+		mgs->allServers.at(id).setShardStatus(keys, MockShardStatus::COMPLETED, mgs->restrictSize);
 	}

 	// remove destination servers from source servers
@ -986,11 +1018,11 @@ ACTOR Future<Void> rawFinishMovement(std::shared_ptr<MockGlobalState> mgs,
 	for (auto& id : srcTeams.front().servers) {
 		// the only caller moveKeys will always make sure the UID are sorted
 		if (!std::binary_search(params.destinationTeam.begin(), params.destinationTeam.end(), id)) {
-			mgs->allServers.at(id).removeShard(params.keys);
+			mgs->allServers.at(id).removeShard(keys);
 		}
 	}
-	mgs->shardMapping->finishMove(params.keys);
-	mgs->shardMapping->defineShard(params.keys); // coalesce for merge
+	mgs->shardMapping->finishMove(keys);
+	mgs->shardMapping->defineShard(keys); // coalesce for merge
 	return Void();
 }

--- a/fdbserver/GrvProxyServer.actor.cpp
+++ b/fdbserver/GrvProxyServer.actor.cpp
@ -134,9 +134,9 @@ struct GrvProxyStats {
 	    recentRequests(0), lastBucketBegin(now()),
 	    bucketInterval(FLOW_KNOBS->BASIC_LOAD_BALANCE_UPDATE_RATE / FLOW_KNOBS->BASIC_LOAD_BALANCE_BUCKETS),
 	    grvConfirmEpochLiveDist(
-	        Histogram::getHistogram("GrvProxy"_sr, "GrvConfirmEpochLive"_sr, Histogram::Unit::microseconds)),
+	        Histogram::getHistogram("GrvProxy"_sr, "GrvConfirmEpochLive"_sr, Histogram::Unit::milliseconds)),
 	    grvGetCommittedVersionRpcDist(
-	        Histogram::getHistogram("GrvProxy"_sr, "GrvGetCommittedVersionRpc"_sr, Histogram::Unit::microseconds)) {
+	        Histogram::getHistogram("GrvProxy"_sr, "GrvGetCommittedVersionRpc"_sr, Histogram::Unit::milliseconds)) {
 		// The rate at which the limit(budget) is allowed to grow.
 		specialCounter(cc, "SystemGRVQueueSize", [this]() { return this->systemGRVQueueSize; });
 		specialCounter(cc, "DefaultGRVQueueSize", [this]() { return this->defaultGRVQueueSize; });
--- a/fdbserver/KeyValueStoreRocksDB.actor.cpp
+++ b/fdbserver/KeyValueStoreRocksDB.actor.cpp
@ -68,12 +68,9 @@

 #ifdef SSD_ROCKSDB_EXPERIMENTAL

-// Enforcing rocksdb version to be 6.27.3 or greater.
-static_assert(ROCKSDB_MAJOR >= 6, "Unsupported rocksdb version. Update the rocksdb to 6.27.3 version");
-static_assert(ROCKSDB_MAJOR == 6 ? ROCKSDB_MINOR >= 27 : true,
-              "Unsupported rocksdb version. Update the rocksdb to 6.27.3 version");
-static_assert((ROCKSDB_MAJOR == 6 && ROCKSDB_MINOR == 27) ? ROCKSDB_PATCH >= 3 : true,
-              "Unsupported rocksdb version. Update the rocksdb to 6.27.3 version");
+// Enforcing rocksdb version to be 7.7.3.
+static_assert((ROCKSDB_MAJOR == 7 && ROCKSDB_MINOR == 7 && ROCKSDB_PATCH == 3),
+              "Unsupported rocksdb version. Update the rocksdb to 7.7.3 version");

 namespace {
 using rocksdb::BackgroundErrorReason;
@ -901,6 +898,7 @@ ACTOR Future<Void> rocksDBMetricLogger(UID id,
 	};

 	// To control the rocksdb::StatsLevel, use ROCKSDB_STATS_LEVEL knob.
+	// Refer StatsLevel: https://github.com/facebook/rocksdb/blob/main/include/rocksdb/statistics.h#L594
 	state std::vector<std::pair<const char*, uint32_t>> histogramStats = {
 		{ "CompactionTime", rocksdb::COMPACTION_TIME }, // enabled if rocksdb::StatsLevel > kExceptTimers(2)
 		{ "CompactionCPUTime", rocksdb::COMPACTION_CPU_TIME }, // enabled if rocksdb::StatsLevel > kExceptTimers(2)
@ -970,6 +968,7 @@ ACTOR Future<Void> rocksDBMetricLogger(UID id,
 		}

 		// None of the histogramStats are enabled unless the ROCKSDB_STATS_LEVEL > kExceptHistogramOrTimers(1)
+		// Refer StatsLevel: https://github.com/facebook/rocksdb/blob/main/include/rocksdb/statistics.h#L594
 		if (SERVER_KNOBS->ROCKSDB_STATS_LEVEL > rocksdb::kExceptHistogramOrTimers) {
 			for (auto& [name, histogram] : histogramStats) {
 				rocksdb::HistogramData histogram_data;
@ -1031,7 +1030,10 @@ void logRocksDBError(UID id,
                     Optional<Severity> sev = Optional<Severity>()) {
 	Severity level = sev.present() ? sev.get() : (status.IsTimedOut() ? SevWarn : SevError);
 	TraceEvent e(level, "RocksDBError", id);
-	e.detail("Error", status.ToString()).detail("Method", method).detail("RocksDBSeverity", status.severity());
+	e.setMaxFieldLength(10000)
+	    .detail("Error", status.ToString())
+	    .detail("Method", method)
+	    .detail("RocksDBSeverity", status.severity());
 	if (status.IsIOError()) {
 		e.detail("SubCode", status.subcode());
 	}
@ -1253,15 +1255,18 @@ struct RocksDBKeyValueStore : IKeyValueStore {
 				    std::make_pair(ROCKSDB_COMMIT_QUEUEWAIT_HISTOGRAM.toString(), commitBeginTime - a.startTime));
 			}
 			Standalone<VectorRef<KeyRangeRef>> deletes;
-			DeleteVisitor dv(deletes, deletes.arena());
-			rocksdb::Status s = a.batchToCommit->Iterate(&dv);
-			if (!s.ok()) {
-				logRocksDBError(id, s, "CommitDeleteVisitor");
-				a.done.sendError(statusToError(s));
-				return;
+			if (SERVER_KNOBS->ROCKSDB_SUGGEST_COMPACT_CLEAR_RANGE) {
+				DeleteVisitor dv(deletes, deletes.arena());
+				rocksdb::Status s = a.batchToCommit->Iterate(&dv);
+				if (!s.ok()) {
+					logRocksDBError(id, s, "CommitDeleteVisitor");
+					a.done.sendError(statusToError(s));
+					return;
+				}
+				// If there are any range deletes, we should have added them to be deleted.
+				ASSERT(!deletes.empty() || !a.batchToCommit->HasDeleteRange());
 			}
-			// If there are any range deletes, we should have added them to be deleted.
-			ASSERT(!deletes.empty() || !a.batchToCommit->HasDeleteRange());
+
 			rocksdb::WriteOptions options;
 			options.sync = !SERVER_KNOBS->ROCKSDB_UNSAFE_AUTO_FSYNC;
 			if (SERVER_KNOBS->ROCKSDB_DISABLE_WAL_EXPERIMENTAL) {
@ -1275,7 +1280,7 @@ struct RocksDBKeyValueStore : IKeyValueStore {
 				// Request for batchToCommit bytes. If this request cannot be satisfied, the call is blocked.
 				rateLimiter->Request(a.batchToCommit->GetDataSize() /* bytes */, rocksdb::Env::IO_HIGH);
 			}
-			s = db->Write(options, a.batchToCommit.get());
+			rocksdb::Status s = db->Write(options, a.batchToCommit.get());
 			readIterPool->update();
 			double currTime = timer_monotonic();
 			sharedState->dbWriteLatency.addMeasurement(currTime - writeBeginTime);
@ -1402,17 +1407,11 @@ struct RocksDBKeyValueStore : IKeyValueStore {
 		                ThreadReturnPromiseStream<std::pair<std::string, double>>* metricPromiseStream)
 		  : id(id), db(db), cf(cf), sharedState(sharedState), readIterPool(readIterPool),
 		    perfContextMetrics(perfContextMetrics), metricPromiseStream(metricPromiseStream), threadIndex(threadIndex) {
-			if (g_network->isSimulated()) {
-				// In simulation, increasing the read operation timeouts to 5 minutes, as some of the tests have
-				// very high load and single read thread cannot process all the load within the timeouts.
-				readValueTimeout = 5 * 60;
-				readValuePrefixTimeout = 5 * 60;
-				readRangeTimeout = 5 * 60;
-			} else {
-				readValueTimeout = SERVER_KNOBS->ROCKSDB_READ_VALUE_TIMEOUT;
-				readValuePrefixTimeout = SERVER_KNOBS->ROCKSDB_READ_VALUE_PREFIX_TIMEOUT;
-				readRangeTimeout = SERVER_KNOBS->ROCKSDB_READ_RANGE_TIMEOUT;
-			}
+
+			readValueTimeout = SERVER_KNOBS->ROCKSDB_READ_VALUE_TIMEOUT;
+			readValuePrefixTimeout = SERVER_KNOBS->ROCKSDB_READ_VALUE_PREFIX_TIMEOUT;
+			readRangeTimeout = SERVER_KNOBS->ROCKSDB_READ_RANGE_TIMEOUT;
+
 			if (SERVER_KNOBS->ROCKSDB_PERFCONTEXT_ENABLE) {
 				// Enable perf context on the same thread with the db thread
 				rocksdb::SetPerfLevel(rocksdb::PerfLevel::kEnableTimeExceptForMutex);
@ -1792,39 +1791,39 @@ struct RocksDBKeyValueStore : IKeyValueStore {

 	ACTOR Future<Void> updateHistogram(FutureStream<std::pair<std::string, double>> metricFutureStream) {
 		state Reference<Histogram> commitLatencyHistogram = Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_COMMIT_LATENCY_HISTOGRAM, Histogram::Unit::microseconds);
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_COMMIT_LATENCY_HISTOGRAM, Histogram::Unit::milliseconds);
 		state Reference<Histogram> commitActionHistogram = Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_COMMIT_ACTION_HISTOGRAM, Histogram::Unit::microseconds);
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_COMMIT_ACTION_HISTOGRAM, Histogram::Unit::milliseconds);
 		state Reference<Histogram> commitQueueWaitHistogram = Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_COMMIT_QUEUEWAIT_HISTOGRAM, Histogram::Unit::microseconds);
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_COMMIT_QUEUEWAIT_HISTOGRAM, Histogram::Unit::milliseconds);
 		state Reference<Histogram> writeHistogram = Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_WRITE_HISTOGRAM, Histogram::Unit::microseconds);
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_WRITE_HISTOGRAM, Histogram::Unit::milliseconds);
 		state Reference<Histogram> deleteCompactRangeHistogram = Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_DELETE_COMPACTRANGE_HISTOGRAM, Histogram::Unit::microseconds);
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_DELETE_COMPACTRANGE_HISTOGRAM, Histogram::Unit::milliseconds);
 		state Reference<Histogram> readRangeLatencyHistogram = Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_LATENCY_HISTOGRAM, Histogram::Unit::microseconds);
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_LATENCY_HISTOGRAM, Histogram::Unit::milliseconds);
 		state Reference<Histogram> readValueLatencyHistogram = Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_LATENCY_HISTOGRAM, Histogram::Unit::microseconds);
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_LATENCY_HISTOGRAM, Histogram::Unit::milliseconds);
 		state Reference<Histogram> readPrefixLatencyHistogram = Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_LATENCY_HISTOGRAM, Histogram::Unit::microseconds);
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_LATENCY_HISTOGRAM, Histogram::Unit::milliseconds);
 		state Reference<Histogram> readRangeActionHistogram = Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_ACTION_HISTOGRAM, Histogram::Unit::microseconds);
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_ACTION_HISTOGRAM, Histogram::Unit::milliseconds);
 		state Reference<Histogram> readValueActionHistogram = Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_ACTION_HISTOGRAM, Histogram::Unit::microseconds);
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_ACTION_HISTOGRAM, Histogram::Unit::milliseconds);
 		state Reference<Histogram> readPrefixActionHistogram = Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_ACTION_HISTOGRAM, Histogram::Unit::microseconds);
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_ACTION_HISTOGRAM, Histogram::Unit::milliseconds);
 		state Reference<Histogram> readRangeQueueWaitHistogram = Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_QUEUEWAIT_HISTOGRAM, Histogram::Unit::microseconds);
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_QUEUEWAIT_HISTOGRAM, Histogram::Unit::milliseconds);
 		state Reference<Histogram> readValueQueueWaitHistogram = Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_QUEUEWAIT_HISTOGRAM, Histogram::Unit::microseconds);
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_QUEUEWAIT_HISTOGRAM, Histogram::Unit::milliseconds);
 		state Reference<Histogram> readPrefixQueueWaitHistogram = Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_QUEUEWAIT_HISTOGRAM, Histogram::Unit::microseconds);
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_QUEUEWAIT_HISTOGRAM, Histogram::Unit::milliseconds);
 		state Reference<Histogram> readRangeNewIteratorHistogram = Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_NEWITERATOR_HISTOGRAM, Histogram::Unit::microseconds);
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_NEWITERATOR_HISTOGRAM, Histogram::Unit::milliseconds);
 		state Reference<Histogram> readValueGetHistogram = Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_GET_HISTOGRAM, Histogram::Unit::microseconds);
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_GET_HISTOGRAM, Histogram::Unit::milliseconds);
 		state Reference<Histogram> readPrefixGetHistogram = Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_GET_HISTOGRAM, Histogram::Unit::microseconds);
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_GET_HISTOGRAM, Histogram::Unit::milliseconds);
 		loop {
 			choose {
 				when(std::pair<std::string, double> measure = waitNext(metricFutureStream)) {
--- a/fdbserver/KeyValueStoreShardedRocksDB.actor.cpp
+++ b/fdbserver/KeyValueStoreShardedRocksDB.actor.cpp
@ -41,12 +41,9 @@

 #ifdef SSD_ROCKSDB_EXPERIMENTAL

-// Enforcing rocksdb version to be 6.27.3 or greater.
-static_assert(ROCKSDB_MAJOR >= 6, "Unsupported rocksdb version. Update the rocksdb to 6.27.3 version");
-static_assert(ROCKSDB_MAJOR == 6 ? ROCKSDB_MINOR >= 27 : true,
-              "Unsupported rocksdb version. Update the rocksdb to 6.27.3 version");
-static_assert((ROCKSDB_MAJOR == 6 && ROCKSDB_MINOR == 27) ? ROCKSDB_PATCH >= 3 : true,
-              "Unsupported rocksdb version. Update the rocksdb to 6.27.3 version");
+// Enforcing rocksdb version to be 7.7.3.
+static_assert((ROCKSDB_MAJOR == 7 && ROCKSDB_MINOR == 7 && ROCKSDB_PATCH == 3),
+              "Unsupported rocksdb version. Update the rocksdb to 7.7.3 version");

 const std::string rocksDataFolderSuffix = "-data";
 const std::string METADATA_SHARD_ID = "kvs-metadata";
@ -170,7 +167,10 @@ std::string getShardMappingKey(KeyRef key, StringRef prefix) {
 void logRocksDBError(const rocksdb::Status& status, const std::string& method) {
 	auto level = status.IsTimedOut() ? SevWarn : SevError;
 	TraceEvent e(level, "ShardedRocksDBError");
-	e.detail("Error", status.ToString()).detail("Method", method).detail("ShardedRocksDBSeverity", status.severity());
+	e.setMaxFieldLength(10000)
+	    .detail("Error", status.ToString())
+	    .detail("Method", method)
+	    .detail("ShardedRocksDBSeverity", status.severity());
 	if (status.IsIOError()) {
 		e.detail("SubCode", status.subcode());
 	}
@ -449,7 +449,8 @@ struct DataShard {
 // PhysicalShard represent a collection of logical shards. A PhysicalShard could have one or more DataShards. A
 // PhysicalShard is stored as a column family in rocksdb. Each PhysicalShard has its own iterator pool.
 struct PhysicalShard {
-	PhysicalShard(rocksdb::DB* db, std::string id) : db(db), id(id), isInitialized(false) {}
+	PhysicalShard(rocksdb::DB* db, std::string id, const rocksdb::ColumnFamilyOptions& options)
+	  : db(db), id(id), cfOptions(options), isInitialized(false) {}
 	PhysicalShard(rocksdb::DB* db, std::string id, rocksdb::ColumnFamilyHandle* handle)
 	  : db(db), id(id), cf(handle), isInitialized(true) {
 		ASSERT(cf);
@ -460,7 +461,7 @@ struct PhysicalShard {
 		if (cf) {
 			return rocksdb::Status::OK();
 		}
-		auto status = db->CreateColumnFamily(getCFOptions(), id, &cf);
+		auto status = db->CreateColumnFamily(cfOptions, id, &cf);
 		if (!status.ok()) {
 			logRocksDBError(status, "AddCF");
 			return status;
@ -516,6 +517,7 @@ struct PhysicalShard {

 	rocksdb::DB* db;
 	std::string id;
+	rocksdb::ColumnFamilyOptions cfOptions;
 	rocksdb::ColumnFamilyHandle* cf = nullptr;
 	std::unordered_map<std::string, std::unique_ptr<DataShard>> dataShards;
 	std::shared_ptr<ReadIteratorPool> readIterPool;
@ -586,7 +588,8 @@ int readRangeInDb(PhysicalShard* shard, const KeyRangeRef range, int rowLimit, i
 // Manages physical shards and maintains logical shard mapping.
 class ShardManager {
 public:
-	ShardManager(std::string path, UID logId) : path(path), logId(logId), dataShardMap(nullptr, specialKeys.end) {}
+	ShardManager(std::string path, UID logId, const rocksdb::Options& options)
+	  : path(path), logId(logId), dbOptions(options), dataShardMap(nullptr, specialKeys.end) {}

 	ACTOR static Future<Void> shardMetricsLogger(std::shared_ptr<ShardedRocksDBState> rState,
 	                                             Future<Void> openFuture,
@ -637,31 +640,31 @@ public:
 		return Void();
 	}

-	rocksdb::Status init(rocksdb::Options options) {
+	rocksdb::Status init() {
 		// Open instance.
 		TraceEvent(SevInfo, "ShardedRocksShardManagerInitBegin", this->logId).detail("DataPath", path);
 		std::vector<std::string> columnFamilies;
-		rocksdb::Status status = rocksdb::DB::ListColumnFamilies(options, path, &columnFamilies);
+		rocksdb::Status status = rocksdb::DB::ListColumnFamilies(dbOptions, path, &columnFamilies);

-		rocksdb::ColumnFamilyOptions cfOptions = getCFOptions();
 		std::vector<rocksdb::ColumnFamilyDescriptor> descriptors;
 		bool foundMetadata = false;
 		for (const auto& name : columnFamilies) {
 			if (name == METADATA_SHARD_ID) {
 				foundMetadata = true;
 			}
-			descriptors.push_back(rocksdb::ColumnFamilyDescriptor{ name, cfOptions });
+			descriptors.push_back(rocksdb::ColumnFamilyDescriptor{ name, rocksdb::ColumnFamilyOptions(dbOptions) });
 		}

 		ASSERT(foundMetadata || descriptors.size() == 0);

 		// Add default column family if it's a newly opened database.
 		if (descriptors.size() == 0) {
-			descriptors.push_back(rocksdb::ColumnFamilyDescriptor{ "default", cfOptions });
+			descriptors.push_back(
+			    rocksdb::ColumnFamilyDescriptor{ "default", rocksdb::ColumnFamilyOptions(dbOptions) });
 		}

 		std::vector<rocksdb::ColumnFamilyHandle*> handles;
-		status = rocksdb::DB::Open(options, path, descriptors, &handles, &db);
+		status = rocksdb::DB::Open(dbOptions, path, descriptors, &handles, &db);
 		if (!status.ok()) {
 			logRocksDBError(status, "Open");
 			return status;
@ -766,7 +769,8 @@ public:
 			physicalShards[defaultShard->id] = defaultShard;

 			// Create metadata shard.
-			auto metadataShard = std::make_shared<PhysicalShard>(db, METADATA_SHARD_ID);
+			auto metadataShard =
+			    std::make_shared<PhysicalShard>(db, METADATA_SHARD_ID, rocksdb::ColumnFamilyOptions(dbOptions));
 			metadataShard->init();
 			columnFamilyMap[metadataShard->cf->GetID()] = metadataShard->cf;
 			physicalShards[METADATA_SHARD_ID] = metadataShard;
@ -832,7 +836,8 @@ public:
 			}
 		}

-		auto [it, inserted] = physicalShards.emplace(id, std::make_shared<PhysicalShard>(db, id));
+		auto [it, inserted] = physicalShards.emplace(
+		    id, std::make_shared<PhysicalShard>(db, id, rocksdb::ColumnFamilyOptions(dbOptions)));
 		std::shared_ptr<PhysicalShard>& shard = it->second;

 		activePhysicalShardIds.emplace(id);
@ -1146,6 +1151,7 @@ public:
 private:
 	const std::string path;
 	const UID logId;
+	rocksdb::Options dbOptions;
 	rocksdb::DB* db = nullptr;
 	std::unordered_map<std::string, std::shared_ptr<PhysicalShard>> physicalShards;
 	std::unordered_set<std::string> activePhysicalShardIds;
@ -1421,40 +1427,40 @@ RocksDBMetrics::RocksDBMetrics(UID debugID, std::shared_ptr<rocksdb::Statistics>
 	}
 	for (int i = 0; i < SERVER_KNOBS->ROCKSDB_READ_PARALLELISM; i++) {
 		readRangeLatencyHistograms.push_back(Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_LATENCY_HISTOGRAM, Histogram::Unit::microseconds));
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_LATENCY_HISTOGRAM, Histogram::Unit::milliseconds));
 		readValueLatencyHistograms.push_back(Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_LATENCY_HISTOGRAM, Histogram::Unit::microseconds));
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_LATENCY_HISTOGRAM, Histogram::Unit::milliseconds));
 		readPrefixLatencyHistograms.push_back(Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_LATENCY_HISTOGRAM, Histogram::Unit::microseconds));
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_LATENCY_HISTOGRAM, Histogram::Unit::milliseconds));
 		readRangeActionHistograms.push_back(Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_ACTION_HISTOGRAM, Histogram::Unit::microseconds));
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_ACTION_HISTOGRAM, Histogram::Unit::milliseconds));
 		readValueActionHistograms.push_back(Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_ACTION_HISTOGRAM, Histogram::Unit::microseconds));
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_ACTION_HISTOGRAM, Histogram::Unit::milliseconds));
 		readPrefixActionHistograms.push_back(Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_ACTION_HISTOGRAM, Histogram::Unit::microseconds));
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_ACTION_HISTOGRAM, Histogram::Unit::milliseconds));
 		readRangeQueueWaitHistograms.push_back(Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_QUEUEWAIT_HISTOGRAM, Histogram::Unit::microseconds));
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_QUEUEWAIT_HISTOGRAM, Histogram::Unit::milliseconds));
 		readValueQueueWaitHistograms.push_back(Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_QUEUEWAIT_HISTOGRAM, Histogram::Unit::microseconds));
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_QUEUEWAIT_HISTOGRAM, Histogram::Unit::milliseconds));
 		readPrefixQueueWaitHistograms.push_back(Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_QUEUEWAIT_HISTOGRAM, Histogram::Unit::microseconds));
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_QUEUEWAIT_HISTOGRAM, Histogram::Unit::milliseconds));
 		readRangeNewIteratorHistograms.push_back(Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_NEWITERATOR_HISTOGRAM, Histogram::Unit::microseconds));
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READRANGE_NEWITERATOR_HISTOGRAM, Histogram::Unit::milliseconds));
 		readValueGetHistograms.push_back(Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_GET_HISTOGRAM, Histogram::Unit::microseconds));
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READVALUE_GET_HISTOGRAM, Histogram::Unit::milliseconds));
 		readPrefixGetHistograms.push_back(Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_GET_HISTOGRAM, Histogram::Unit::microseconds));
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_READPREFIX_GET_HISTOGRAM, Histogram::Unit::milliseconds));
 	}
 	commitLatencyHistogram = Histogram::getHistogram(
-	    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_COMMIT_LATENCY_HISTOGRAM, Histogram::Unit::microseconds);
+	    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_COMMIT_LATENCY_HISTOGRAM, Histogram::Unit::milliseconds);
 	commitActionHistogram = Histogram::getHistogram(
-	    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_COMMIT_ACTION_HISTOGRAM, Histogram::Unit::microseconds);
+	    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_COMMIT_ACTION_HISTOGRAM, Histogram::Unit::milliseconds);
 	commitQueueWaitHistogram = Histogram::getHistogram(
-	    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_COMMIT_QUEUEWAIT_HISTOGRAM, Histogram::Unit::microseconds);
+	    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_COMMIT_QUEUEWAIT_HISTOGRAM, Histogram::Unit::milliseconds);
 	writeHistogram =
-	    Histogram::getHistogram(ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_WRITE_HISTOGRAM, Histogram::Unit::microseconds);
+	    Histogram::getHistogram(ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_WRITE_HISTOGRAM, Histogram::Unit::milliseconds);
 	deleteCompactRangeHistogram = Histogram::getHistogram(
-	    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_DELETE_COMPACTRANGE_HISTOGRAM, Histogram::Unit::microseconds);
+	    ROCKSDBSTORAGE_HISTOGRAM_GROUP, ROCKSDB_DELETE_COMPACTRANGE_HISTOGRAM, Histogram::Unit::milliseconds);
 }

 void RocksDBMetrics::logStats(rocksdb::DB* db) {
@ -1689,7 +1695,7 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
 	    Future<Void> readyToStart,
 	    std::unordered_map<std::string, std::shared_ptr<PhysicalShard>>* physicalShards) {
 		state Reference<Histogram> histogram = Histogram::getHistogram(
-		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, "TimeSpentRefreshIterators"_sr, Histogram::Unit::microseconds);
+		    ROCKSDBSTORAGE_HISTOGRAM_GROUP, "TimeSpentRefreshIterators"_sr, Histogram::Unit::milliseconds);

 		if (SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_ITERATORS) {
 			try {
@ -1755,7 +1761,6 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {

 		struct OpenAction : TypedAction<Writer, OpenAction> {
 			ShardManager* shardManager;
-			rocksdb::Options dbOptions;
 			ThreadReturnPromise<Void> done;
 			Optional<Future<Void>>& metrics;
 			const FlowLock* readLock;
@ -1763,19 +1768,18 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
 			std::shared_ptr<RocksDBErrorListener> errorListener;

 			OpenAction(ShardManager* shardManager,
-			           rocksdb::Options dbOptions,
 			           Optional<Future<Void>>& metrics,
 			           const FlowLock* readLock,
 			           const FlowLock* fetchLock,
 			           std::shared_ptr<RocksDBErrorListener> errorListener)
-			  : shardManager(shardManager), dbOptions(dbOptions), metrics(metrics), readLock(readLock),
-			    fetchLock(fetchLock), errorListener(errorListener) {}
+			  : shardManager(shardManager), metrics(metrics), readLock(readLock), fetchLock(fetchLock),
+			    errorListener(errorListener) {}

 			double getTimeEstimate() const override { return SERVER_KNOBS->COMMIT_TIME_ESTIMATE; }
 		};

 		void action(OpenAction& a) {
-			auto status = a.shardManager->init(a.dbOptions);
+			auto status = a.shardManager->init();

 			if (!status.ok()) {
 				logRocksDBError(status, "Open");
@ -1886,21 +1890,23 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
 		                         rocksdb::DB* db,
 		                         std::vector<std::pair<uint32_t, KeyRange>>* deletes,
 		                         bool sample) {
-			DeleteVisitor dv(deletes);
-			rocksdb::Status s = batch->Iterate(&dv);
-			if (!s.ok()) {
-				logRocksDBError(s, "CommitDeleteVisitor");
-				return s;
-			}
+			if (SERVER_KNOBS->ROCKSDB_SUGGEST_COMPACT_CLEAR_RANGE) {
+				DeleteVisitor dv(deletes);
+				rocksdb::Status s = batch->Iterate(&dv);
+				if (!s.ok()) {
+					logRocksDBError(s, "CommitDeleteVisitor");
+					return s;
+				}

-			// If there are any range deletes, we should have added them to be deleted.
-			ASSERT(!deletes->empty() || !batch->HasDeleteRange());
+				// If there are any range deletes, we should have added them to be deleted.
+				ASSERT(!deletes->empty() || !batch->HasDeleteRange());
+			}

 			rocksdb::WriteOptions options;
 			options.sync = !SERVER_KNOBS->ROCKSDB_UNSAFE_AUTO_FSYNC;

 			double writeBeginTime = sample ? timer_monotonic() : 0;
-			s = db->Write(options, batch);
+			rocksdb::Status s = db->Write(options, batch);
 			if (sample) {
 				rocksDBMetrics->getWriteHistogram()->sampleSeconds(timer_monotonic() - writeBeginTime);
 			}
@ -2280,7 +2286,7 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
 	    numReadWaiters(SERVER_KNOBS->ROCKSDB_READ_QUEUE_HARD_MAX - SERVER_KNOBS->ROCKSDB_READ_QUEUE_SOFT_MAX),
 	    numFetchWaiters(SERVER_KNOBS->ROCKSDB_FETCH_QUEUE_HARD_MAX - SERVER_KNOBS->ROCKSDB_FETCH_QUEUE_SOFT_MAX),
 	    errorListener(std::make_shared<RocksDBErrorListener>()), errorFuture(errorListener->getFuture()),
-	    shardManager(path, id), dbOptions(getOptions()),
+	    dbOptions(getOptions()), shardManager(path, id, dbOptions),
 	    rocksDBMetrics(std::make_shared<RocksDBMetrics>(id, dbOptions.statistics)) {
 		// In simluation, run the reader/writer threads as Coro threads (i.e. in the network thread. The storage
 		// engine is still multi-threaded as background compaction threads are still present. Reads/writes to disk
@ -2347,7 +2353,7 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
 			// mapping data.
 		} else {
 			auto a = std::make_unique<Writer::OpenAction>(
-			    &shardManager, dbOptions, metrics, &readSemaphore, &fetchSemaphore, errorListener);
+			    &shardManager, metrics, &readSemaphore, &fetchSemaphore, errorListener);
 			openFuture = a->done.getFuture();
 			this->metrics = ShardManager::shardMetricsLogger(this->rState, openFuture, &shardManager) &&
 			                rocksDBAggregatedMetricsLogger(this->rState, openFuture, rocksDBMetrics, &shardManager);
@ -2581,8 +2587,8 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
 	std::vector<std::pair<KeyRange, std::string>> getDataMapping() { return shardManager.getDataMapping(); }

 	std::shared_ptr<ShardedRocksDBState> rState;
-	ShardManager shardManager;
 	rocksdb::Options dbOptions;
+	ShardManager shardManager;
 	std::shared_ptr<RocksDBMetrics> rocksDBMetrics;
 	std::string path;
 	UID id;
--- a/fdbserver/LogRouter.actor.cpp
+++ b/fdbserver/LogRouter.actor.cpp
@ -138,7 +138,7 @@ struct LogRouterData {
 	  : dbgid(dbgid), logSystem(new AsyncVar<Reference<ILogSystem>>()), version(req.startVersion - 1), minPopped(0),
 	    startVersion(req.startVersion), minKnownCommittedVersion(0), poppedVersion(0), routerTag(req.routerTag),
 	    allowPops(false), foundEpochEnd(false), generation(req.recoveryCount),
-	    peekLatencyDist(Histogram::getHistogram("LogRouter"_sr, "PeekTLogLatency"_sr, Histogram::Unit::microseconds)),
+	    peekLatencyDist(Histogram::getHistogram("LogRouter"_sr, "PeekTLogLatency"_sr, Histogram::Unit::milliseconds)),
 	    cc("LogRouter", dbgid.toString()), getMoreCount("GetMoreCount", cc),
 	    getMoreBlockedCount("GetMoreBlockedCount", cc) {
 		// setup just enough of a logSet to be able to call getPushLocations
--- a/fdbserver/LogSystem.cpp
+++ b/fdbserver/LogSystem.cpp
@ -375,7 +375,7 @@ bool LogPushData::writeTransactionInfo(int location, uint32_t subseq) {
 		// parent->child.
 		SpanContextMessage contextMessage;
 		if (spanContext.isSampled()) {
-			CODE_PROBE(true, "Converting OTELSpanContextMessage to traced SpanContextMessage", probe::decoration::rare);
+			CODE_PROBE(true, "Converting OTELSpanContextMessage to traced SpanContextMessage");
 			contextMessage = SpanContextMessage(UID(spanContext.traceID.first(), spanContext.traceID.second()));
 		} else {
 			CODE_PROBE(true, "Converting OTELSpanContextMessage to untraced SpanContextMessage");
--- a/fdbserver/MoveKeys.actor.cpp
+++ b/fdbserver/MoveKeys.actor.cpp
@ -1241,7 +1241,7 @@ ACTOR static Future<Void> finishMoveKeys(Database occ,
 // Set dataMoves[dataMoveId] = DataMoveMetaData.
 ACTOR static Future<Void> startMoveShards(Database occ,
                                          UID dataMoveId,
-                                          KeyRange keys,
+                                          std::vector<KeyRange> ranges,
                                          std::vector<UID> servers,
                                          MoveKeysLock lock,
                                          FlowLock* startMoveKeysLock,
@ -1257,8 +1257,11 @@ ACTOR static Future<Void> startMoveShards(Database occ,

 	TraceEvent(SevDebug, "StartMoveShardsBegin", relocationIntervalId)
 	    .detail("DataMoveID", dataMoveId)
-	    .detail("TargetRange", keys);
+	    .detail("TargetRange", describe(ranges));

+	// TODO: make startMoveShards work with multiple ranges.
+	ASSERT(ranges.size() == 1);
+	state KeyRangeRef keys = ranges[0];
 	try {
 		state Key begin = keys.begin;
 		state KeyRange currentKeys = keys;
@ -1576,7 +1579,7 @@ ACTOR static Future<Void> checkDataMoveComplete(Database occ, UID dataMoveId, Ke
 // Clear dataMoves[dataMoveId].
 ACTOR static Future<Void> finishMoveShards(Database occ,
                                           UID dataMoveId,
-                                           KeyRange targetKeys,
+                                           std::vector<KeyRange> targetRanges,
                                           std::vector<UID> destinationTeam,
                                           MoveKeysLock lock,
                                           FlowLock* finishMoveKeysParallelismLock,
@ -1585,7 +1588,10 @@ ACTOR static Future<Void> finishMoveShards(Database occ,
                                           std::map<UID, StorageServerInterface> tssMapping,
                                           const DDEnabledState* ddEnabledState) {
 	ASSERT(SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA);
-	state KeyRange keys = targetKeys;
+
+	// TODO: make startMoveShards work with multiple ranges.
+	ASSERT(targetRanges.size() == 1);
+	state KeyRange keys = targetRanges[0];
 	state Future<Void> warningLogger = logWarningAfter("FinishMoveShardsTooLong", 600, destinationTeam);
 	state int retries = 0;
 	state DataMoveMetaData dataMove;
@ -1636,7 +1642,7 @@ ACTOR static Future<Void> finishMoveShards(Database occ,
 				} else {
 					TraceEvent(SevWarn, "FinishMoveShardsDataMoveDeleted", relocationIntervalId)
 					    .detail("DataMoveID", dataMoveId);
-					wait(checkDataMoveComplete(occ, dataMoveId, targetKeys, relocationIntervalId));
+					wait(checkDataMoveComplete(occ, dataMoveId, keys, relocationIntervalId));
 					return Void();
 				}

@ -2485,9 +2491,10 @@ Future<Void> rawStartMovement(Database occ,
                              const MoveKeysParams& params,
                              std::map<UID, StorageServerInterface>& tssMapping) {
 	if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA) {
+		ASSERT(params.ranges.present());
 		return startMoveShards(std::move(occ),
 		                       params.dataMoveId,
-		                       params.keys,
+		                       params.ranges.get(),
 		                       params.destinationTeam,
 		                       params.lock,
 		                       params.startMoveKeysParallelismLock,
@ -2495,8 +2502,9 @@ Future<Void> rawStartMovement(Database occ,
 		                       params.ddEnabledState,
 		                       params.cancelConflictingDataMoves);
 	}
+	ASSERT(params.keys.present());
 	return startMoveKeys(std::move(occ),
-	                     params.keys,
+	                     params.keys.get(),
 	                     params.destinationTeam,
 	                     params.lock,
 	                     params.startMoveKeysParallelismLock,
@ -2505,13 +2513,37 @@ Future<Void> rawStartMovement(Database occ,
 	                     params.ddEnabledState);
 }

+Future<Void> rawCheckFetchingState(const Database& cx,
+                                   const MoveKeysParams& params,
+                                   const std::map<UID, StorageServerInterface>& tssMapping) {
+	if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA) {
+		ASSERT(params.ranges.present());
+		// TODO: make startMoveShards work with multiple ranges.
+		ASSERT(params.ranges.get().size() == 1);
+		return checkFetchingState(cx,
+		                          params.healthyDestinations,
+		                          params.ranges.get().at(0),
+		                          params.dataMovementComplete,
+		                          params.relocationIntervalId,
+		                          tssMapping);
+	}
+	ASSERT(params.keys.present());
+	return checkFetchingState(cx,
+	                          params.healthyDestinations,
+	                          params.keys.get(),
+	                          params.dataMovementComplete,
+	                          params.relocationIntervalId,
+	                          tssMapping);
+}
+
 Future<Void> rawFinishMovement(Database occ,
                               const MoveKeysParams& params,
                               const std::map<UID, StorageServerInterface>& tssMapping) {
 	if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA) {
+		ASSERT(params.ranges.present());
 		return finishMoveShards(std::move(occ),
 		                        params.dataMoveId,
-		                        params.keys,
+		                        params.ranges.get(),
 		                        params.destinationTeam,
 		                        params.lock,
 		                        params.finishMoveKeysParallelismLock,
@ -2520,8 +2552,9 @@ Future<Void> rawFinishMovement(Database occ,
 		                        tssMapping,
 		                        params.ddEnabledState);
 	}
+	ASSERT(params.keys.present());
 	return finishMoveKeys(std::move(occ),
-	                      params.keys,
+	                      params.keys.get(),
 	                      params.destinationTeam,
 	                      params.lock,
 	                      params.finishMoveKeysParallelismLock,
@ -2539,12 +2572,7 @@ ACTOR Future<Void> moveKeys(Database occ, MoveKeysParams params) {

 	wait(rawStartMovement(occ, params, tssMapping));

-	state Future<Void> completionSignaller = checkFetchingState(occ,
-	                                                            params.healthyDestinations,
-	                                                            params.keys,
-	                                                            params.dataMovementComplete,
-	                                                            params.relocationIntervalId,
-	                                                            tssMapping);
+	state Future<Void> completionSignaller = rawCheckFetchingState(occ, params, tssMapping);

 	wait(rawFinishMovement(occ, params, tssMapping));

--- a/fdbserver/MutationTracking.cpp
+++ b/fdbserver/MutationTracking.cpp
@ -98,7 +98,6 @@ TraceEvent debugTagsAndMessageEnabled(const char* context, Version version, Stri
 			SpanContextMessage scm;
 			br >> scm;
 		} else if (OTELSpanContextMessage::startsOTELSpanContextMessage(mutationType)) {
-			CODE_PROBE(true, "MutationTracking reading OTELSpanContextMessage", probe::decoration::rare);
 			BinaryReader br(mutationData, AssumeVersion(rdr.protocolVersion()));
 			OTELSpanContextMessage scm;
 			br >> scm;
--- a/fdbserver/OldTLogServer_4_6.actor.cpp
+++ b/fdbserver/OldTLogServer_4_6.actor.cpp
@ -1633,7 +1633,7 @@ ACTOR Future<Void> tLog(IKeyValueStore* persistentData,
 	TraceEvent("SharedTlog", tlogId).detail("Version", "4.6");

 	try {
-		wait(ioTimeoutError(persistentData->init(), SERVER_KNOBS->TLOG_MAX_CREATE_DURATION));
+		wait(ioTimeoutError(persistentData->init(), SERVER_KNOBS->TLOG_MAX_CREATE_DURATION, "TLogInit"));
 		wait(restorePersistentState(&self, locality));

 		self.sharedActors.send(cleanupPeekTrackers(&self));
--- a/fdbserver/OldTLogServer_6_0.actor.cpp
+++ b/fdbserver/OldTLogServer_6_0.actor.cpp
@ -1484,7 +1484,7 @@ ACTOR Future<Void> doQueueCommit(TLogData* self,
 	self->largeDiskQueueCommitBytes.set(false);

 	wait(ioDegradedOrTimeoutError(
-	    c, SERVER_KNOBS->MAX_STORAGE_COMMIT_TIME, self->degraded, SERVER_KNOBS->TLOG_DEGRADED_DURATION));
+	    c, SERVER_KNOBS->MAX_STORAGE_COMMIT_TIME, self->degraded, SERVER_KNOBS->TLOG_DEGRADED_DURATION, "TLogCommit"));
 	if (g_network->isSimulated() && !g_simulator->speedUpSimulation && BUGGIFY_WITH_PROB(0.0001)) {
 		wait(delay(6.0));
 	}
@ -1701,7 +1701,7 @@ ACTOR Future<Void> initPersistentState(TLogData* self, Reference<LogData> logDat
 	}

 	TraceEvent("TLogInitCommit", logData->logId).log();
-	wait(ioTimeoutError(self->persistentData->commit(), SERVER_KNOBS->TLOG_MAX_CREATE_DURATION));
+	wait(ioTimeoutError(self->persistentData->commit(), SERVER_KNOBS->TLOG_MAX_CREATE_DURATION, "TLogCommit"));
 	return Void();
 }

@ -2801,13 +2801,13 @@ ACTOR Future<Void> tLog(IKeyValueStore* persistentData,
 	TraceEvent("SharedTlog", tlogId).detail("Version", "6.0");

 	try {
-		wait(ioTimeoutError(persistentData->init(), SERVER_KNOBS->TLOG_MAX_CREATE_DURATION));
+		wait(ioTimeoutError(persistentData->init(), SERVER_KNOBS->TLOG_MAX_CREATE_DURATION, "TLogInit"));

 		if (restoreFromDisk) {
 			wait(restorePersistentState(&self, locality, oldLog, recovered, tlogRequests));
 		} else {
-			wait(ioTimeoutError(checkEmptyQueue(&self) && checkRecovered(&self),
-			                    SERVER_KNOBS->TLOG_MAX_CREATE_DURATION));
+			wait(ioTimeoutError(
+			    checkEmptyQueue(&self) && checkRecovered(&self), SERVER_KNOBS->TLOG_MAX_CREATE_DURATION, "TLogInit"));
 		}

 		// Disk errors need a chance to kill this actor.
--- a/fdbserver/OldTLogServer_6_2.actor.cpp
+++ b/fdbserver/OldTLogServer_6_2.actor.cpp
@ -3291,7 +3291,7 @@ ACTOR Future<Void> tLog(IKeyValueStore* persistentData,
 	TraceEvent("SharedTlog", tlogId).detail("Version", "6.2");

 	try {
-		wait(ioTimeoutError(persistentData->init(), SERVER_KNOBS->TLOG_MAX_CREATE_DURATION));
+		wait(ioTimeoutError(persistentData->init(), SERVER_KNOBS->TLOG_MAX_CREATE_DURATION, "TLogInit"));

 		if (restoreFromDisk) {
 			wait(restorePersistentState(&self, locality, oldLog, recovered, tlogRequests));
--- a/fdbserver/PaxosConfigConsumer.actor.cpp
+++ b/fdbserver/PaxosConfigConsumer.actor.cpp
@ -487,12 +487,12 @@ class PaxosConfigConsumerImpl {
 				    .detail("LargestLiveVersion", self->getCommittedVersionQuorum.getLargestLive())
 				    .detail("SmallestCommitted", smallestCommitted);
 				ASSERT_GE(committedVersion, self->lastSeenVersion);
-				self->lastSeenVersion = committedVersion;
+				self->lastSeenVersion = std::max(self->lastSeenVersion, committedVersion);
 				self->compactionVersion = std::max(self->compactionVersion, smallestCommitted);
 				broadcaster->applySnapshotAndChanges(std::move(reply.snapshot),
 				                                     reply.snapshotVersion,
 				                                     reply.changes,
-				                                     committedVersion,
+				                                     self->lastSeenVersion,
 				                                     reply.annotations,
 				                                     self->getCommittedVersionQuorum.getReadReplicas(),
 				                                     self->getCommittedVersionQuorum.getLargestLive(),
@ -534,6 +534,13 @@ class PaxosConfigConsumerImpl {
 				if (committedVersion > self->lastSeenVersion) {
 					ASSERT(self->getCommittedVersionQuorum.getReadReplicas().size() >= self->cfis.size() / 2 + 1 ||
 					       self->getCommittedVersionQuorum.isSpecialZeroQuorum());
+					if (BUGGIFY) {
+						// Inject a random delay between getting the committed
+						// version and reading any changes. The goal is to
+						// allow attrition to occasionally kill ConfigNodes in
+						// this in-between state.
+						wait(delay(deterministicRandom()->random01() * 5));
+					}
 					state std::vector<ConfigFollowerInterface> readReplicas =
 					    self->getCommittedVersionQuorum.getReadReplicas();
 					std::vector<Future<Void>> fs;
@ -567,7 +574,7 @@ class PaxosConfigConsumerImpl {
 					Version smallestCommitted = self->getCommittedVersionQuorum.getSmallestCommitted();
 					self->compactionVersion = std::max(self->compactionVersion, smallestCommitted);
 					broadcaster->applyChanges(reply.changes,
-					                          committedVersion,
+					                          self->lastSeenVersion,
 					                          reply.annotations,
 					                          self->getCommittedVersionQuorum.getReadReplicas());
 				} else if (committedVersion == self->lastSeenVersion) {
--- a/fdbserver/RocksDBCheckpointUtils.actor.cpp
+++ b/fdbserver/RocksDBCheckpointUtils.actor.cpp
@ -43,9 +43,9 @@
 #include "flow/actorcompiler.h" // has to be last include

 #ifdef SSD_ROCKSDB_EXPERIMENTAL
-// Enforcing rocksdb version to be 6.22.1 or greater.
-static_assert(ROCKSDB_MAJOR == 6 && ROCKSDB_MINOR >= 22 && ROCKSDB_PATCH >= 1,
-              "Unsupported rocksdb version. Update the rocksdb to at least 6.22.1 version");
+// Enforcing rocksdb version to be 7.7.3.
+static_assert((ROCKSDB_MAJOR == 7 && ROCKSDB_MINOR == 7 && ROCKSDB_PATCH == 3),
+              "Unsupported rocksdb version. Update the rocksdb to 7.7.3 version");

 namespace {

--- a/fdbserver/SimulatedCluster.actor.cpp
+++ b/fdbserver/SimulatedCluster.actor.cpp
@ -328,6 +328,13 @@ class TestConfig : public BasicTestConfig {
 			if (attrib == "disableEncryption") {
 				disableEncryption = strcmp(value.c_str(), "true") == 0;
 			}
+			if (attrib == "encryptModes") {
+				std::stringstream ss(value);
+				std::string token;
+				while (std::getline(ss, token, ',')) {
+					encryptModes.push_back(token);
+				}
+			}
 			if (attrib == "restartInfoLocation") {
 				isFirstTestInRestart = true;
 			}
@ -397,6 +404,9 @@ public:
 	bool disableRemoteKVS = false;
 	// 7.2 cannot be downgraded to 7.1 or below after enabling encryption-at-rest.
 	bool disableEncryption = false;
+	// By default, encryption mode is set randomly (based on the tenant mode)
+	// If provided, set using EncryptionAtRestMode::fromString
+	std::vector<std::string> encryptModes;
 	// Storage Engine Types: Verify match with SimulationConfig::generateNormalConfig
 	//	0 = "ssd"
 	//	1 = "memory"
@ -474,6 +484,7 @@ public:
 		    .add("disableHostname", &disableHostname)
 		    .add("disableRemoteKVS", &disableRemoteKVS)
 		    .add("disableEncryption", &disableEncryption)
+		    .add("encryptModes", &encryptModes)
 		    .add("simpleConfig", &simpleConfig)
 		    .add("generateFearless", &generateFearless)
 		    .add("datacenters", &datacenters)
@ -1274,6 +1285,7 @@ ACTOR Future<Void> restartSimulatedSystem(std::vector<Future<Void>>* systemActor
 			g_knobs.setKnob("remote_kv_store", KnobValueRef::create(bool{ false }));
 			TraceEvent(SevDebug, "DisableRemoteKVS");
 		}
+		// TODO: Remove this code when encryption knobs are removed
 		if (testConfig->disableEncryption) {
 			g_knobs.setKnob("enable_encryption", KnobValueRef::create(bool{ false }));
 			g_knobs.setKnob("enable_tlog_encryption", KnobValueRef::create(bool{ false }));
@ -2052,6 +2064,19 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,

 	simconfig.db.tenantMode = tenantMode;
 	simconfig.db.encryptionAtRestMode = EncryptionAtRestMode::DISABLED;
+	if (!testConfig.encryptModes.empty()) {
+		simconfig.db.encryptionAtRestMode =
+		    EncryptionAtRestMode::fromString(deterministicRandom()->randomChoice(testConfig.encryptModes));
+	} else if (!testConfig.disableEncryption && deterministicRandom()->coinflip()) {
+		if (tenantMode == TenantMode::DISABLED || tenantMode == TenantMode::OPTIONAL_TENANT ||
+		    deterministicRandom()->coinflip()) {
+			// optional and disabled tenant modes currently only support cluster aware encryption
+			simconfig.db.encryptionAtRestMode = EncryptionAtRestMode::CLUSTER_AWARE;
+		} else {
+			simconfig.db.encryptionAtRestMode = EncryptionAtRestMode::DOMAIN_AWARE;
+		}
+	}
+	TraceEvent("SimulatedClusterEncryptionMode").detail("Mode", simconfig.db.encryptionAtRestMode.toString());

 	g_simulator->blobGranulesEnabled = simconfig.db.blobGranulesEnabled;

@ -2065,6 +2090,7 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
 		g_knobs.setKnob("remote_kv_store", KnobValueRef::create(bool{ false }));
 		TraceEvent(SevDebug, "DisableRemoteKVS");
 	}
+	// TODO: Remove this code once encryption knobs are removed
 	if (testConfig.disableEncryption) {
 		g_knobs.setKnob("enable_encryption", KnobValueRef::create(bool{ false }));
 		g_knobs.setKnob("enable_tlog_encryption", KnobValueRef::create(bool{ false }));
--- a/fdbserver/Status.actor.cpp
+++ b/fdbserver/Status.actor.cpp
@ -19,6 +19,8 @@
 */

 #include <cinttypes>
+#include "fdbclient/BlobGranuleCommon.h"
+#include "fdbserver/BlobGranuleServerCommon.actor.h"
 #include "fmt/format.h"
 #include "fdbclient/BackupAgent.actor.h"
 #include "fdbclient/BlobWorkerInterface.h"
@ -2443,6 +2445,47 @@ ACTOR static Future<JsonBuilderObject> blobWorkerStatusFetcher(
 	return statusObj;
 }

+ACTOR static Future<JsonBuilderObject> blobRestoreStatusFetcher(Database db, std::set<std::string>* incompleteReason) {
+
+	state JsonBuilderObject statusObj;
+	state std::vector<Future<Optional<TraceEventFields>>> futures;
+
+	try {
+		Optional<BlobRestoreStatus> status = wait(getRestoreStatus(db, normalKeys));
+		if (status.present()) {
+			switch (status.get().phase) {
+			case BlobRestorePhase::INIT:
+				statusObj["blob_full_restore_phase"] = "Initializing";
+				break;
+			case BlobRestorePhase::LOAD_MANIFEST:
+				statusObj["blob_full_restore_phase"] = "Loading manifest";
+				break;
+			case BlobRestorePhase::MANIFEST_DONE:
+				statusObj["blob_full_restore_phase"] = "Manifest loaded";
+				break;
+			case BlobRestorePhase::MIGRATE:
+				statusObj["blob_full_restore_phase"] = "Copying data";
+				statusObj["blob_full_restore_progress"] = status.get().progress;
+				break;
+			case BlobRestorePhase::APPLY_MLOGS:
+				statusObj["blob_full_restore_phase"] = "Applying mutation logs";
+				statusObj["blob_full_restore_progress"] = status.get().progress;
+				break;
+			case BlobRestorePhase::DONE:
+				statusObj["blob_full_restore_phase"] = "Completed";
+				break;
+			default:
+				statusObj["blob_full_restore_phase"] = "Unexpected phase";
+			}
+		}
+	} catch (Error& e) {
+		if (e.code() == error_code_actor_cancelled)
+			throw;
+		incompleteReason->insert("Unable to query blob restore status");
+	}
+	return statusObj;
+}
+
 static JsonBuilderObject tlogFetcher(int* logFaultTolerance,
                                     const std::vector<TLogSet>& tLogs,
                                     std::unordered_map<NetworkAddress, WorkerInterface> const& address_workers) {
@ -3409,6 +3452,8 @@ ACTOR Future<StatusReply> clusterGetStatus(
 			JsonBuilderObject blobGranuelsStatus =
 			    wait(blobWorkerStatusFetcher(blobWorkers, address_workers, &status_incomplete_reasons));
 			statusObj["blob_granules"] = blobGranuelsStatus;
+			JsonBuilderObject blobRestoreStatus = wait(blobRestoreStatusFetcher(cx, &status_incomplete_reasons));
+			statusObj["blob_restore"] = blobRestoreStatus;
 		}

 		JsonBuilderArray incompatibleConnectionsArray;
--- a/fdbserver/TLogServer.actor.cpp
+++ b/fdbserver/TLogServer.actor.cpp
@ -375,7 +375,7 @@ struct TLogData : NonCopyable {
 	    peekMemoryLimiter(SERVER_KNOBS->TLOG_SPILL_REFERENCE_MAX_PEEK_MEMORY_BYTES),
 	    concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS), ignorePopDeadline(0), dataFolder(folder),
 	    degraded(degraded),
-	    commitLatencyDist(Histogram::getHistogram("tLog"_sr, "commit"_sr, Histogram::Unit::microseconds)) {
+	    commitLatencyDist(Histogram::getHistogram("tLog"_sr, "commit"_sr, Histogram::Unit::milliseconds)) {
 		cx = openDBOnServer(dbInfo, TaskPriority::DefaultEndpoint, LockAware::True);
 	}
 };
@ -1098,7 +1098,7 @@ ACTOR Future<Void> updatePersistentData(TLogData* self, Reference<LogData> logDa
 	}
 	// SOMEDAY: This seems to be running pretty often, should we slow it down???
 	// This needs a timeout since nothing prevents I/O operations from hanging indefinitely.
-	wait(ioTimeoutError(self->persistentData->commit(), tLogMaxCreateDuration));
+	wait(ioTimeoutError(self->persistentData->commit(), tLogMaxCreateDuration, "TLogCommit"));

 	wait(delay(0, TaskPriority::UpdateStorage));

@ -2160,7 +2160,7 @@ ACTOR Future<Void> doQueueCommit(TLogData* self,
 	self->largeDiskQueueCommitBytes.set(false);

 	wait(ioDegradedOrTimeoutError(
-	    c, SERVER_KNOBS->MAX_STORAGE_COMMIT_TIME, self->degraded, SERVER_KNOBS->TLOG_DEGRADED_DURATION));
+	    c, SERVER_KNOBS->MAX_STORAGE_COMMIT_TIME, self->degraded, SERVER_KNOBS->TLOG_DEGRADED_DURATION, "TLogCommit"));
 	if (g_network->isSimulated() && !g_simulator->speedUpSimulation && BUGGIFY_WITH_PROB(0.0001)) {
 		wait(delay(6.0));
 	}
@ -3464,7 +3464,8 @@ ACTOR Future<Void> tLogStart(TLogData* self, InitializeTLogRequest req, Locality
 			logData->unpoppedRecoveredTagCount = req.allTags.size();
 			logData->unpoppedRecoveredTags = std::set<Tag>(req.allTags.begin(), req.allTags.end());
 			wait(ioTimeoutError(initPersistentState(self, logData) || logData->removed,
-			                    SERVER_KNOBS->TLOG_MAX_CREATE_DURATION));
+			                    SERVER_KNOBS->TLOG_MAX_CREATE_DURATION,
+			                    "TLogInit"));

 			TraceEvent("TLogRecover", self->dbgid)
 			    .detail("LogId", logData->logId)
@ -3529,7 +3530,8 @@ ACTOR Future<Void> tLogStart(TLogData* self, InitializeTLogRequest req, Locality
 		} else {
 			// Brand new tlog, initialization has already been done by caller
 			wait(ioTimeoutError(initPersistentState(self, logData) || logData->removed,
-			                    SERVER_KNOBS->TLOG_MAX_CREATE_DURATION));
+			                    SERVER_KNOBS->TLOG_MAX_CREATE_DURATION,
+			                    "TLogInit"));

 			if (logData->recoveryComplete.isSet()) {
 				throw worker_removed();
@ -3600,13 +3602,14 @@ ACTOR Future<Void> tLog(IKeyValueStore* persistentData,

 	TraceEvent("SharedTlog", tlogId);
 	try {
-		wait(ioTimeoutError(persistentData->init(), SERVER_KNOBS->TLOG_MAX_CREATE_DURATION));
+		wait(ioTimeoutError(persistentData->init(), SERVER_KNOBS->TLOG_MAX_CREATE_DURATION, "TLogInit"));

 		if (restoreFromDisk) {
 			wait(restorePersistentState(&self, locality, oldLog, recovered, tlogRequests));
 		} else {
 			wait(ioTimeoutError(checkEmptyQueue(&self) && initPersistentStorage(&self),
-			                    SERVER_KNOBS->TLOG_MAX_CREATE_DURATION));
+			                    SERVER_KNOBS->TLOG_MAX_CREATE_DURATION,
+			                    "TLogInit"));
 		}

 		// Disk errors need a chance to kill this actor.
--- a/fdbserver/TagPartitionedLogSystem.actor.cpp
+++ b/fdbserver/TagPartitionedLogSystem.actor.cpp
@ -546,7 +546,7 @@ Future<Version> TagPartitionedLogSystem::push(Version prevVersion,
 					it->tlogPushDistTrackers.push_back(
 					    Histogram::getHistogram("ToTlog_" + it->logServers[i]->get().interf().uniqueID.toString(),
 					                            it->logServers[i]->get().interf().address().toString(),
-					                            Histogram::Unit::microseconds));
+					                            Histogram::Unit::milliseconds));
 				}
 			}
 			std::vector<Future<Void>> tLogCommitResults;
--- a/fdbserver/TenantCache.actor.cpp
+++ b/fdbserver/TenantCache.actor.cpp
@ -124,9 +124,17 @@ public:

 		state int refreshInterval = SERVER_KNOBS->TENANT_CACHE_STORAGE_USAGE_REFRESH_INTERVAL;
 		state double lastTenantListFetchTime = now();
+		state double lastTraceTime = 0;

 		loop {
 			state double fetchStartTime = now();
+
+			state bool toTrace = false;
+			if (fetchStartTime - lastTraceTime > SERVER_KNOBS->TENANT_CACHE_STORAGE_USAGE_TRACE_INTERVAL) {
+				toTrace = true;
+				lastTraceTime = fetchStartTime;
+			}
+
 			state std::vector<TenantGroupName> groups;
 			for (const auto& [group, storage] : tenantCache->tenantStorageMap) {
 				groups.push_back(group);
@ -159,6 +167,14 @@ public:
 					}
 				}
 				tenantCache->tenantStorageMap[group].usage = usage;
+
+				if (toTrace) {
+					// Trace the storage used by all tenant groups for visibility.
+					TraceEvent(SevInfo, "StorageUsageUpdated", tenantCache->id())
+					    .detail("TenantGroup", group)
+					    .detail("Quota", tenantCache->tenantStorageMap[group].quota)
+					    .detail("Usage", tenantCache->tenantStorageMap[group].usage);
+				}
 			}

 			lastTenantListFetchTime = now();
--- a/fdbserver/VersionedBTree.actor.cpp
+++ b/fdbserver/VersionedBTree.actor.cpp
@ -459,7 +459,13 @@ public:
 		// Since cursors can have async operations pending which modify their state they can't be copied cleanly
 		Cursor(const Cursor& other) = delete;

-		~Cursor() { writeOperations.cancel(); }
+		~Cursor() { cancel(); }
+
+		// Cancel outstanding operations.  Further use of cursor is not allowed.
+		void cancel() {
+			nextPageReader.cancel();
+			writeOperations.cancel();
+		}

 		// A read cursor can be initialized from a pop cursor
 		void initReadOnly(const Cursor& c, bool readExtents = false) {
@ -921,7 +927,15 @@ public:
 public:
 	FIFOQueue() : pager(nullptr) {}

-	~FIFOQueue() { newTailPage.cancel(); }
+	~FIFOQueue() { cancel(); }
+
+	// Cancel outstanding operations.  Further use of queue is not allowed.
+	void cancel() {
+		headReader.cancel();
+		tailWriter.cancel();
+		headWriter.cancel();
+		newTailPage.cancel();
+	}

 	FIFOQueue(const FIFOQueue& other) = delete;
 	void operator=(const FIFOQueue& rhs) = delete;
@ -3627,6 +3641,13 @@ public:
 		}
 		self->operations.clear();

+		debug_printf("DWALPager(%s) shutdown cancel queues\n", self->filename.c_str());
+		self->freeList.cancel();
+		self->delayedFreeList.cancel();
+		self->remapQueue.cancel();
+		self->extentFreeList.cancel();
+		self->extentUsedList.cancel();
+
 		debug_printf("DWALPager(%s) shutdown destroy page cache\n", self->filename.c_str());
 		wait(self->extentCache.clear());
 		wait(self->pageCache.clear());
@ -4697,21 +4718,15 @@ public:

 		if (domainId.present()) {
 			ASSERT(keyProvider && keyProvider->enableEncryptionDomain());
-			// Temporarily disabling the check, since if a tenant is removed, where the key provider
-			// would not find the domain, the data for the tenant may still be in Redwood and being read.
-			// TODO(yiwu): re-enable the check.
-			/*
-			if (domainId.get() != keyProvider->getDefaultEncryptionDomainId() &&
-			    !keyProvider->keyFitsInDomain(domainId.get(), lowerBound, false)) {
-			    fprintf(stderr,
-			            "Page lower bound not in domain: %s %s, domain id %s, lower bound '%s'\n",
-			            ::toString(id).c_str(),
-			            ::toString(v).c_str(),
-			            ::toString(domainId).c_str(),
-			            lowerBound.printable().c_str());
-			    return false;
+			if (!keyProvider->keyFitsInDomain(domainId.get(), lowerBound, true)) {
+				fprintf(stderr,
+				        "Page lower bound not in domain: %s %s, domain id %s, lower bound '%s'\n",
+				        ::toString(id).c_str(),
+				        ::toString(v).c_str(),
+				        ::toString(domainId).c_str(),
+				        lowerBound.printable().c_str());
+				return false;
 			}
-			*/
 		}

 		auto& b = boundariesByPageID[id.front()][v];
@ -4759,45 +4774,27 @@ public:
 				        ::toString(b->second.domainId).c_str());
 				return false;
 			}
-			// Temporarily disabling the check, since if a tenant is removed, where the key provider
-			// would not find the domain, the data for the tenant may still be in Redwood and being read.
-			// TODO(yiwu): re-enable the check.
-			/*
 			ASSERT(domainId.present());
 			auto checkKeyFitsInDomain = [&]() -> bool {
-			    if (!keyProvider->keyFitsInDomain(domainId.get(), cursor.get().key, b->second.height > 1)) {
-			        fprintf(stderr,
-			                "Encryption domain mismatch on %s, %s, domain: %s, key %s\n",
-			                ::toString(id).c_str(),
-			                ::toString(v).c_str(),
-			                ::toString(domainId).c_str(),
-			                cursor.get().key.printable().c_str());
-			        return false;
-			    }
-			    return true;
+				if (!keyProvider->keyFitsInDomain(domainId.get(), cursor.get().key, b->second.height > 1)) {
+					fprintf(stderr,
+					        "Encryption domain mismatch on %s, %s, domain: %s, key %s\n",
+					        ::toString(id).c_str(),
+					        ::toString(v).c_str(),
+					        ::toString(domainId).c_str(),
+					        cursor.get().key.printable().c_str());
+					return false;
+				}
+				return true;
 			};
-			if (domainId.get() != keyProvider->getDefaultEncryptionDomainId()) {
-			    cursor.moveFirst();
-			    if (cursor.valid() && !checkKeyFitsInDomain()) {
-			        return false;
-			    }
-			    cursor.moveLast();
-			    if (cursor.valid() && !checkKeyFitsInDomain()) {
-			        return false;
-			    }
-			} else {
-			    if (deterministicRandom()->random01() < domainPrefixScanProbability) {
-			        cursor.moveFirst();
-			        while (cursor.valid()) {
-			            if (!checkKeyFitsInDomain()) {
-			                return false;
-			            }
-			            cursor.moveNext();
-			        }
-			        domainPrefixScanCount++;
-			    }
+			cursor.moveFirst();
+			if (cursor.valid() && !checkKeyFitsInDomain()) {
+				return false;
+			}
+			cursor.moveLast();
+			if (cursor.valid() && !checkKeyFitsInDomain()) {
+				return false;
 			}
-			*/
 		}

 		return true;
@ -5674,8 +5671,8 @@ private:
 				int64_t defaultDomainId = keyProvider->getDefaultEncryptionDomainId();
 				int64_t currentDomainId;
 				size_t prefixLength;
-				if (count == 0 || (splitByDomain && count > 0)) {
-					std::tie(currentDomainId, prefixLength) = keyProvider->getEncryptionDomain(rec.key, domainId);
+				if (count == 0 || splitByDomain) {
+					std::tie(currentDomainId, prefixLength) = keyProvider->getEncryptionDomain(rec.key);
 				}
 				if (count == 0) {
 					domainId = currentDomainId;
@ -5886,12 +5883,18 @@ private:
 		if (useEncryptionDomain) {
 			ASSERT(pagesToBuild[0].domainId.present());
 			int64_t domainId = pagesToBuild[0].domainId.get();
-			// We need to make sure we use the domain prefix as the page lower bound, for the first page
-			// of a non-default domain on a level. That way we ensure that pages for a domain form a full subtree
-			// (i.e. have a single root) in the B-tree.
-			if (domainId != self->m_keyProvider->getDefaultEncryptionDomainId() &&
-			    !self->m_keyProvider->keyFitsInDomain(domainId, pageLowerBound.key, false)) {
-				pageLowerBound = RedwoodRecordRef(entries[0].key.substr(0, pagesToBuild[0].domainPrefixLength));
+			// We make sure the page lower bound fits in the domain of the page.
+			// If the page domain is the default domain, we make sure the page doesn't fall within a domain
+			// specific subtree.
+			// If the page domain is non-default, in addition, we make the first page of the domain on a level
+			// use the domain prefix as the lower bound. Such a lower bound will ensure that pages for a domain
+			// form a full subtree (i.e. have a single root) in the B-tree.
+			if (!self->m_keyProvider->keyFitsInDomain(domainId, pageLowerBound.key, true)) {
+				if (domainId == self->m_keyProvider->getDefaultEncryptionDomainId()) {
+					pageLowerBound = RedwoodRecordRef(entries[0].key);
+				} else {
+					pageLowerBound = RedwoodRecordRef(entries[0].key.substr(0, pagesToBuild[0].domainPrefixLength));
+				}
 			}
 		}

--- a/fdbserver/include/fdbserver/BlobGranuleServerCommon.actor.h
+++ b/fdbserver/include/fdbserver/BlobGranuleServerCommon.actor.h
@ -163,7 +163,8 @@ ACTOR Future<Void> printRestoreSummary(Database db, Reference<BlobConnectionProv
 ACTOR Future<BlobGranuleRestoreVersionVector> listBlobGranules(Database db, Reference<BlobConnectionProvider> blobConn);
 ACTOR Future<int64_t> lastBlobEpoc(Database db, Reference<BlobConnectionProvider> blobConn);
 ACTOR Future<bool> isFullRestoreMode(Database db, KeyRangeRef range);
-
+ACTOR Future<Void> updateRestoreStatus(Database db, KeyRangeRef range, BlobRestoreStatus status);
+ACTOR Future<Optional<BlobRestoreStatus>> getRestoreStatus(Database db, KeyRangeRef range);
 #include "flow/unactorcompiler.h"

 #endif
--- a/fdbserver/include/fdbserver/ClusterController.actor.h
+++ b/fdbserver/include/fdbserver/ClusterController.actor.h
@ -920,7 +920,7 @@ public:
 			}
 			if (fitness == ProcessClass::NeverAssign) {
 				logWorkerUnavailable(
-				    SevDebug, id, "complex", "Worker's fitness is NeverAssign", worker_details, fitness, dcIds);
+				    SevDebug, id, "simple", "Worker's fitness is NeverAssign", worker_details, fitness, dcIds);
 				continue;
 			}
 			if (!dcIds.empty() && dcIds.count(worker_details.interf.locality.dcId()) == 0) {
@ -1072,7 +1072,7 @@ public:
 			}
 			if (fitness == ProcessClass::NeverAssign) {
 				logWorkerUnavailable(
-				    SevDebug, id, "complex", "Worker's fitness is NeverAssign", worker_details, fitness, dcIds);
+				    SevDebug, id, "deprecated", "Worker's fitness is NeverAssign", worker_details, fitness, dcIds);
 				continue;
 			}
 			if (!dcIds.empty() && dcIds.count(worker_details.interf.locality.dcId()) == 0) {
--- a/fdbserver/include/fdbserver/ConfigFollowerInterface.h
+++ b/fdbserver/include/fdbserver/ConfigFollowerInterface.h
@ -110,8 +110,7 @@ struct ConfigFollowerGetChangesReply {
 	Standalone<VectorRef<VersionedConfigCommitAnnotationRef>> annotations;

 	ConfigFollowerGetChangesReply() = default;
-	explicit ConfigFollowerGetChangesReply(Version mostRecentVersion,
-	                                       Standalone<VectorRef<VersionedConfigMutationRef>> const& changes,
+	explicit ConfigFollowerGetChangesReply(Standalone<VectorRef<VersionedConfigMutationRef>> const& changes,
 	                                       Standalone<VectorRef<VersionedConfigCommitAnnotationRef>> const& annotations)
 	  : changes(changes), annotations(annotations) {}

--- a/fdbserver/include/fdbserver/DataDistribution.actor.h
+++ b/fdbserver/include/fdbserver/DataDistribution.actor.h
@ -284,12 +284,12 @@ public:
 	                                                      const std::unordered_set<uint64_t>& excludedPhysicalShards,
 	                                                      uint64_t debugID);

-	// Step 2: get a remote team which has the input physical shard
-	// Return empty if no such remote team
-	// May return a problematic remote team, and re-selection is required for this case
-	Optional<ShardsAffectedByTeamFailure::Team> tryGetAvailableRemoteTeamWith(uint64_t inputPhysicalShardID,
-	                                                                          StorageMetrics const& moveInMetrics,
-	                                                                          uint64_t debugID);
+	// Step 2: get a remote team which has the input physical shard.
+	// Second field in the returned pair indicates whether this physical shard is available or not.
+	// Return empty if no such remote team.
+	// May return a problematic remote team, and re-selection is required for this case.
+	std::pair<Optional<ShardsAffectedByTeamFailure::Team>, bool>
+	tryGetAvailableRemoteTeamWith(uint64_t inputPhysicalShardID, StorageMetrics const& moveInMetrics, uint64_t debugID);
 	// Invariant:
 	// (1) If forceToUseNewPhysicalShard is set, use the bestTeams selected by getTeam(), and create a new physical
 	// shard for the teams
--- a/fdbserver/include/fdbserver/IPageEncryptionKeyProvider.actor.h
+++ b/fdbserver/include/fdbserver/IPageEncryptionKeyProvider.actor.h
@ -90,21 +90,11 @@ public:
 	virtual int64_t getDefaultEncryptionDomainId() const { throw not_implemented(); }

 	// Get encryption domain from a key. Return the domain id, and the size of the encryption domain prefix.
-	// It is assumed that all keys with the same encryption domain prefix as the given key falls in the same encryption
-	// domain. If possibleDomainId is given, it is a valid domain id previously returned by the key provider,
-	// potentially for a different key. The possibleDomainId parm is used by TenantAwareEncryptionKeyProvider to speed
-	// up encryption domain lookup.
-	virtual std::tuple<int64_t, size_t> getEncryptionDomain(const KeyRef& key,
-	                                                        Optional<int64_t> possibleDomainId = Optional<int64_t>()) {
-		throw not_implemented();
-	}
+	virtual std::tuple<int64_t, size_t> getEncryptionDomain(const KeyRef& key) { throw not_implemented(); }

 	// Get encryption domain of a page given encoding header.
 	virtual int64_t getEncryptionDomainIdFromHeader(const void* encodingHeader) { throw not_implemented(); }

-	// Setting tenant prefix to tenant name map. Used by TenantAwareEncryptionKeyProvider.
-	virtual void setTenantPrefixIndex(Reference<TenantPrefixIndex> tenantPrefixIndex) {}
-
 	// Helper methods.

 	// Check if a key fits in an encryption domain.
@ -220,7 +210,7 @@ public:

 	int64_t getDefaultEncryptionDomainId() const override { return FDB_DEFAULT_ENCRYPT_DOMAIN_ID; }

-	std::tuple<int64_t, size_t> getEncryptionDomain(const KeyRef& key, Optional<int64_t>) override {
+	std::tuple<int64_t, size_t> getEncryptionDomain(const KeyRef& key) override {
 		int64_t domainId;
 		if (key.size() < PREFIX_LENGTH) {
 			domainId = getDefaultEncryptionDomainId();
@ -291,6 +281,8 @@ class TenantAwareEncryptionKeyProvider : public IPageEncryptionKeyProvider {
 public:
 	using EncodingHeader = ArenaPage::AESEncryptionV1Encoder::Header;

+	const StringRef systemKeysPrefix = systemKeys.begin;
+
 	TenantAwareEncryptionKeyProvider(Reference<AsyncVar<ServerDBInfo> const> db) : db(db) {}

 	virtual ~TenantAwareEncryptionKeyProvider() = default;
@ -337,10 +329,10 @@ public:

 	int64_t getDefaultEncryptionDomainId() const override { return FDB_DEFAULT_ENCRYPT_DOMAIN_ID; }

-	std::tuple<int64_t, size_t> getEncryptionDomain(const KeyRef& key, Optional<int64_t> possibleDomainId) override {
+	std::tuple<int64_t, size_t> getEncryptionDomain(const KeyRef& key) override {
 		// System key.
-		if (key.startsWith(systemKeys.begin)) {
-			return { SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID, 2 };
+		if (key.startsWith(systemKeysPrefix)) {
+			return { SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID, systemKeysPrefix.size() };
 		}
 		// Key smaller than tenant prefix in size belongs to the default domain.
 		if (key.size() < TENANT_PREFIX_SIZE) {
@ -352,21 +344,7 @@ public:
 		if (tenantId < 0) {
 			return { FDB_DEFAULT_ENCRYPT_DOMAIN_ID, 0 };
 		}
-		// Optimization: Caller guarantee possibleDomainId is a valid domain id that we previously returned.
-		// We can return immediately without checking with tenant map.
-		if (possibleDomainId.present() && possibleDomainId.get() == tenantId) {
-			return { tenantId, TENANT_PREFIX_SIZE };
-		}
-		if (tenantPrefixIndex.isValid()) {
-			auto view = tenantPrefixIndex->atLatest();
-			auto itr = view.find(prefix);
-			if (itr != view.end()) {
-				// Tenant not found. Tenant must be disabled, or in optional mode.
-				return { tenantId, TENANT_PREFIX_SIZE };
-			}
-		}
-		// The prefix does not belong to any tenant. The key belongs to the default domain.
-		return { FDB_DEFAULT_ENCRYPT_DOMAIN_ID, 0 };
+		return { tenantId, TENANT_PREFIX_SIZE };
 	}

 	int64_t getEncryptionDomainIdFromHeader(const void* encodingHeader) override {
@ -375,13 +353,8 @@ public:
 		return header->cipherTextDetails.encryptDomainId;
 	}

-	void setTenantPrefixIndex(Reference<TenantPrefixIndex> tenantPrefixIndex) override {
-		this->tenantPrefixIndex = tenantPrefixIndex;
-	}
-
 private:
 	Reference<AsyncVar<ServerDBInfo> const> db;
-	Reference<TenantPrefixIndex> tenantPrefixIndex;
 };

 #include "flow/unactorcompiler.h"
--- a/fdbserver/include/fdbserver/MoveKeys.actor.h
+++ b/fdbserver/include/fdbserver/MoveKeys.actor.h
@ -58,7 +58,12 @@ public:

 struct MoveKeysParams {
 	UID dataMoveId;
-	KeyRange keys;
+
+	// Only one of `keys` and `ranges` can be set. `ranges` is created mainly for physical shard moves to move a full
+	// physical shard with multiple key ranges.
+	Optional<KeyRange> keys;
+	Optional<std::vector<KeyRange>> ranges;
+
 	std::vector<UID> destinationTeam, healthyDestinations;
 	MoveKeysLock lock;
 	Promise<Void> dataMovementComplete;
@ -68,6 +73,46 @@ struct MoveKeysParams {
 	UID relocationIntervalId;
 	const DDEnabledState* ddEnabledState = nullptr;
 	CancelConflictingDataMoves cancelConflictingDataMoves = CancelConflictingDataMoves::False;
+
+	MoveKeysParams() {}
+
+	MoveKeysParams(UID dataMoveId,
+	               const KeyRange& keys,
+	               const std::vector<UID>& destinationTeam,
+	               const std::vector<UID>& healthyDestinations,
+	               const MoveKeysLock& lock,
+	               const Promise<Void>& dataMovementComplete,
+	               FlowLock* startMoveKeysParallelismLock,
+	               FlowLock* finishMoveKeysParallelismLock,
+	               bool hasRemote,
+	               UID relocationIntervalId,
+	               const DDEnabledState* ddEnabledState,
+	               CancelConflictingDataMoves cancelConflictingDataMoves)
+	  : dataMoveId(dataMoveId), keys(keys), destinationTeam(destinationTeam), healthyDestinations(healthyDestinations),
+	    lock(lock), dataMovementComplete(dataMovementComplete),
+	    startMoveKeysParallelismLock(startMoveKeysParallelismLock),
+	    finishMoveKeysParallelismLock(finishMoveKeysParallelismLock), hasRemote(hasRemote),
+	    relocationIntervalId(relocationIntervalId), ddEnabledState(ddEnabledState),
+	    cancelConflictingDataMoves(cancelConflictingDataMoves) {}
+
+	MoveKeysParams(UID dataMoveId,
+	               const std::vector<KeyRange>& ranges,
+	               const std::vector<UID>& destinationTeam,
+	               const std::vector<UID>& healthyDestinations,
+	               const MoveKeysLock& lock,
+	               const Promise<Void>& dataMovementComplete,
+	               FlowLock* startMoveKeysParallelismLock,
+	               FlowLock* finishMoveKeysParallelismLock,
+	               bool hasRemote,
+	               UID relocationIntervalId,
+	               const DDEnabledState* ddEnabledState,
+	               CancelConflictingDataMoves cancelConflictingDataMoves)
+	  : dataMoveId(dataMoveId), ranges(ranges), destinationTeam(destinationTeam),
+	    healthyDestinations(healthyDestinations), lock(lock), dataMovementComplete(dataMovementComplete),
+	    startMoveKeysParallelismLock(startMoveKeysParallelismLock),
+	    finishMoveKeysParallelismLock(finishMoveKeysParallelismLock), hasRemote(hasRemote),
+	    relocationIntervalId(relocationIntervalId), ddEnabledState(ddEnabledState),
+	    cancelConflictingDataMoves(cancelConflictingDataMoves) {}
 };

 // read the lock value in system keyspace but do not change anything
--- a/fdbserver/include/fdbserver/ProxyCommitData.actor.h
+++ b/fdbserver/include/fdbserver/ProxyCommitData.actor.h
@ -137,16 +137,16 @@ struct ProxyStats {
 	                   SERVER_KNOBS->LATENCY_SKETCH_ACCURACY),
 	    maxComputeNS(0), minComputeNS(1e12),
 	    commitBatchQueuingDist(
-	        Histogram::getHistogram("CommitProxy"_sr, "CommitBatchQueuing"_sr, Histogram::Unit::microseconds)),
+	        Histogram::getHistogram("CommitProxy"_sr, "CommitBatchQueuing"_sr, Histogram::Unit::milliseconds)),
 	    getCommitVersionDist(
-	        Histogram::getHistogram("CommitProxy"_sr, "GetCommitVersion"_sr, Histogram::Unit::microseconds)),
-	    resolutionDist(Histogram::getHistogram("CommitProxy"_sr, "Resolution"_sr, Histogram::Unit::microseconds)),
+	        Histogram::getHistogram("CommitProxy"_sr, "GetCommitVersion"_sr, Histogram::Unit::milliseconds)),
+	    resolutionDist(Histogram::getHistogram("CommitProxy"_sr, "Resolution"_sr, Histogram::Unit::milliseconds)),
 	    postResolutionDist(
-	        Histogram::getHistogram("CommitProxy"_sr, "PostResolutionQueuing"_sr, Histogram::Unit::microseconds)),
+	        Histogram::getHistogram("CommitProxy"_sr, "PostResolutionQueuing"_sr, Histogram::Unit::milliseconds)),
 	    processingMutationDist(
-	        Histogram::getHistogram("CommitProxy"_sr, "ProcessingMutation"_sr, Histogram::Unit::microseconds)),
-	    tlogLoggingDist(Histogram::getHistogram("CommitProxy"_sr, "TlogLogging"_sr, Histogram::Unit::microseconds)),
-	    replyCommitDist(Histogram::getHistogram("CommitProxy"_sr, "ReplyCommit"_sr, Histogram::Unit::microseconds)) {
+	        Histogram::getHistogram("CommitProxy"_sr, "ProcessingMutation"_sr, Histogram::Unit::milliseconds)),
+	    tlogLoggingDist(Histogram::getHistogram("CommitProxy"_sr, "TlogLogging"_sr, Histogram::Unit::milliseconds)),
+	    replyCommitDist(Histogram::getHistogram("CommitProxy"_sr, "ReplyCommit"_sr, Histogram::Unit::milliseconds)) {
 		specialCounter(cc, "LastAssignedCommitVersion", [this]() { return this->lastCommitVersionAssigned; });
 		specialCounter(cc, "Version", [pVersion]() { return pVersion->get(); });
 		specialCounter(cc, "CommittedVersion", [pCommittedVersion]() { return pCommittedVersion->get(); });
--- a/fdbserver/include/fdbserver/WorkerInterface.actor.h
+++ b/fdbserver/include/fdbserver/WorkerInterface.actor.h
@ -1284,7 +1284,7 @@ ACTOR Future<Void> tLog(IKeyValueStore* persistentData,
 typedef decltype(&tLog) TLogFn;

 ACTOR template <class T>
-Future<T> ioTimeoutError(Future<T> what, double time) {
+Future<T> ioTimeoutError(Future<T> what, double time, const char* context = nullptr) {
 	// Before simulation is sped up, IO operations can take a very long time so limit timeouts
 	// to not end until at least time after simulation is sped up.
 	if (g_network->isSimulated() && !g_simulator->speedUpSimulation) {
@ -1298,7 +1298,12 @@ Future<T> ioTimeoutError(Future<T> what, double time) {
 			if (g_network->isSimulated() && !g_simulator->getCurrentProcess()->isReliable()) {
 				err = err.asInjectedFault();
 			}
-			TraceEvent(SevError, "IoTimeoutError").error(err);
+			TraceEvent e(SevError, "IoTimeoutError");
+			e.error(err);
+			if (context != nullptr) {
+				e.detail("Context", context);
+			}
+			e.log();
 			throw err;
 		}
 	}
@ -1308,7 +1313,8 @@ ACTOR template <class T>
 Future<T> ioDegradedOrTimeoutError(Future<T> what,
                                   double errTime,
                                   Reference<AsyncVar<bool>> degraded,
-                                   double degradedTime) {
+                                   double degradedTime,
+                                   const char* context = nullptr) {
 	// Before simulation is sped up, IO operations can take a very long time so limit timeouts
 	// to not end until at least time after simulation is sped up.
 	if (g_network->isSimulated() && !g_simulator->speedUpSimulation) {
@ -1337,7 +1343,12 @@ Future<T> ioDegradedOrTimeoutError(Future<T> what,
 			if (g_network->isSimulated() && !g_simulator->getCurrentProcess()->isReliable()) {
 				err = err.asInjectedFault();
 			}
-			TraceEvent(SevError, "IoTimeoutError").error(err);
+			TraceEvent e(SevError, "IoTimeoutError");
+			e.error(err);
+			if (context != nullptr) {
+				e.detail("Context", context);
+			}
+			e.log();
 			throw err;
 		}
 	}
--- a/fdbserver/storageserver.actor.cpp
+++ b/fdbserver/storageserver.actor.cpp
@ -159,8 +159,7 @@ bool canReplyWith(Error e) {

 #define PERSIST_PREFIX "\xff\xff"

-FDB_DECLARE_BOOLEAN_PARAM(UnlimitedCommitBytes);
-FDB_DEFINE_BOOLEAN_PARAM(UnlimitedCommitBytes);
+FDB_BOOLEAN_PARAM(UnlimitedCommitBytes);

 // Immutable
 static const KeyValueRef persistFormat(PERSIST_PREFIX "Format"_sr, "FoundationDB/StorageServer/1/4"_sr);
@ -786,7 +785,7 @@ public:
 	std::map<Version, std::vector<CheckpointMetaData>> pendingCheckpoints; // Pending checkpoint requests
 	std::unordered_map<UID, CheckpointMetaData> checkpoints; // Existing and deleting checkpoints
 	TenantMap tenantMap;
-	Reference<TenantPrefixIndex> tenantPrefixIndex;
+	TenantPrefixIndex tenantPrefixIndex;
 	std::map<Version, std::vector<PendingNewShard>>
 	    pendingAddRanges; // Pending requests to add ranges to physical shards
 	std::map<Version, std::vector<KeyRange>>
@ -805,7 +804,7 @@ public:
 		FetchKeysHistograms()
 		  : latency(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
 		                                    FETCH_KEYS_LATENCY_HISTOGRAM,
-		                                    Histogram::Unit::microseconds)),
+		                                    Histogram::Unit::milliseconds)),
 		    bytes(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
 		                                  FETCH_KEYS_BYTES_HISTOGRAM,
 		                                  Histogram::Unit::bytes)),
@ -1369,31 +1368,31 @@ public:
 	              Reference<AsyncVar<ServerDBInfo> const> const& db,
 	              StorageServerInterface const& ssi,
 	              Reference<IPageEncryptionKeyProvider> encryptionKeyProvider)
-	  : tenantPrefixIndex(makeReference<TenantPrefixIndex>()), encryptionKeyProvider(encryptionKeyProvider),
-	    shardAware(false), tlogCursorReadsLatencyHistogram(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
-	                                                                               TLOG_CURSOR_READS_LATENCY_HISTOGRAM,
-	                                                                               Histogram::Unit::microseconds)),
+	  : encryptionKeyProvider(encryptionKeyProvider), shardAware(false),
+	    tlogCursorReadsLatencyHistogram(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
+	                                                            TLOG_CURSOR_READS_LATENCY_HISTOGRAM,
+	                                                            Histogram::Unit::milliseconds)),
 	    ssVersionLockLatencyHistogram(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
 	                                                          SS_VERSION_LOCK_LATENCY_HISTOGRAM,
-	                                                          Histogram::Unit::microseconds)),
+	                                                          Histogram::Unit::milliseconds)),
 	    eagerReadsLatencyHistogram(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
 	                                                       EAGER_READS_LATENCY_HISTOGRAM,
-	                                                       Histogram::Unit::microseconds)),
+	                                                       Histogram::Unit::milliseconds)),
 	    fetchKeysPTreeUpdatesLatencyHistogram(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
 	                                                                  FETCH_KEYS_PTREE_UPDATES_LATENCY_HISTOGRAM,
-	                                                                  Histogram::Unit::microseconds)),
+	                                                                  Histogram::Unit::milliseconds)),
 	    tLogMsgsPTreeUpdatesLatencyHistogram(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
 	                                                                 TLOG_MSGS_PTREE_UPDATES_LATENCY_HISTOGRAM,
-	                                                                 Histogram::Unit::microseconds)),
+	                                                                 Histogram::Unit::milliseconds)),
 	    storageUpdatesDurableLatencyHistogram(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
 	                                                                  STORAGE_UPDATES_DURABLE_LATENCY_HISTOGRAM,
-	                                                                  Histogram::Unit::microseconds)),
+	                                                                  Histogram::Unit::milliseconds)),
 	    storageCommitLatencyHistogram(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
 	                                                          STORAGE_COMMIT_LATENCY_HISTOGRAM,
-	                                                          Histogram::Unit::microseconds)),
+	                                                          Histogram::Unit::milliseconds)),
 	    ssDurableVersionUpdateLatencyHistogram(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
 	                                                                   SS_DURABLE_VERSION_UPDATE_LATENCY_HISTOGRAM,
-	                                                                   Histogram::Unit::microseconds)),
+	                                                                   Histogram::Unit::milliseconds)),
 	    readRangeBytesReturnedHistogram(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP,
 	                                                            SS_READ_RANGE_BYTES_RETURNED_HISTOGRAM,
 	                                                            Histogram::Unit::bytes)),
@ -5111,7 +5110,7 @@ ACTOR Future<Void> getMappedKeyValuesQ(StorageServer* data, GetMappedKeyValuesRe
 				throw tenant_name_required();
 			}

-			if (rangeIntersectsAnyTenant(*(data->tenantPrefixIndex), KeyRangeRef(begin, end), req.version)) {
+			if (rangeIntersectsAnyTenant(data->tenantPrefixIndex, KeyRangeRef(begin, end), req.version)) {
 				throw tenant_name_required();
 			}
 		}
@ -8616,11 +8615,11 @@ private:
 bool StorageServer::insertTenant(TenantNameRef tenantName, TenantMapEntry tenantEntry, Version version) {
 	if (version >= tenantMap.getLatestVersion()) {
 		tenantMap.createNewVersion(version);
-		tenantPrefixIndex->createNewVersion(version);
+		tenantPrefixIndex.createNewVersion(version);

 		tenantMap.insert(tenantName, tenantEntry);

-		auto view = tenantPrefixIndex->at(version);
+		auto view = tenantPrefixIndex.at(version);
 		auto itr = view.find(tenantEntry.prefix);
 		TenantNameUniqueSet nameSet;
 		if (itr != view.end()) {
@ -8628,7 +8627,7 @@ bool StorageServer::insertTenant(TenantNameRef tenantName, TenantMapEntry tenant
 		}

 		nameSet.insert(tenantName);
-		tenantPrefixIndex->insert(tenantEntry.prefix, nameSet);
+		tenantPrefixIndex.insert(tenantEntry.prefix, nameSet);

 		TraceEvent("InsertTenant", thisServerID).detail("Tenant", tenantName).detail("Version", version);
 		return true;
@ -8648,20 +8647,20 @@ void StorageServer::insertTenant(TenantNameRef tenantName, ValueRef value, Versi
 void StorageServer::clearTenants(TenantNameRef startTenant, TenantNameRef endTenant, Version version) {
 	if (version >= tenantMap.getLatestVersion()) {
 		tenantMap.createNewVersion(version);
-		tenantPrefixIndex->createNewVersion(version);
+		tenantPrefixIndex.createNewVersion(version);

 		auto view = tenantMap.at(version);
 		for (auto itr = view.lower_bound(startTenant); itr != view.lower_bound(endTenant); ++itr) {
-			auto indexView = tenantPrefixIndex->at(version);
+			auto indexView = tenantPrefixIndex.at(version);
 			// Trigger any watches on the prefix associated with the tenant.
 			watches.triggerRange(itr->prefix, strinc(itr->prefix));
 			auto indexItr = indexView.find(itr->prefix);
 			ASSERT(indexItr != indexView.end());
 			TenantNameUniqueSet nameSet = *indexItr;
 			if (nameSet.remove(itr.key())) {
-				tenantPrefixIndex->erase(itr->prefix);
+				tenantPrefixIndex.erase(itr->prefix);
 			} else {
-				tenantPrefixIndex->insert(itr->prefix, nameSet);
+				tenantPrefixIndex.insert(itr->prefix, nameSet);
 			}
 			TraceEvent("EraseTenant", thisServerID).detail("Tenant", itr.key()).detail("Version", version);
 		}
@ -9348,7 +9347,7 @@ ACTOR Future<Void> updateStorage(StorageServer* data) {
 			    newOldestVersion, desiredVersion, bytesLeft, unlimitedCommitBytes);
 			if (data->tenantMap.getLatestVersion() < newOldestVersion) {
 				data->tenantMap.createNewVersion(newOldestVersion);
-				data->tenantPrefixIndex->createNewVersion(newOldestVersion);
+				data->tenantPrefixIndex.createNewVersion(newOldestVersion);
 			}
 			// We want to forget things from these data structures atomically with changing oldestVersion (and "before",
 			// since oldestVersion.set() may trigger waiting actors) forgetVersionsBeforeAsync visibly forgets
@ -9356,7 +9355,7 @@ ACTOR Future<Void> updateStorage(StorageServer* data) {
 			Future<Void> finishedForgetting =
 			    data->mutableData().forgetVersionsBeforeAsync(newOldestVersion, TaskPriority::UpdateStorage) &&
 			    data->tenantMap.forgetVersionsBeforeAsync(newOldestVersion, TaskPriority::UpdateStorage) &&
-			    data->tenantPrefixIndex->forgetVersionsBeforeAsync(newOldestVersion, TaskPriority::UpdateStorage);
+			    data->tenantPrefixIndex.forgetVersionsBeforeAsync(newOldestVersion, TaskPriority::UpdateStorage);
 			data->oldestVersion.set(newOldestVersion);
 			wait(finishedForgetting);
 			wait(yield(TaskPriority::UpdateStorage));
@ -9468,7 +9467,7 @@ ACTOR Future<Void> updateStorage(StorageServer* data) {
 			durableDelay = delay(SERVER_KNOBS->STORAGE_COMMIT_INTERVAL, TaskPriority::UpdateStorage);
 		}

-		wait(ioTimeoutError(durable, SERVER_KNOBS->MAX_STORAGE_COMMIT_TIME));
+		wait(ioTimeoutError(durable, SERVER_KNOBS->MAX_STORAGE_COMMIT_TIME, "StorageCommit"));
 		data->storageCommitLatencyHistogram->sampleSeconds(now() - beforeStorageCommit);

 		debug_advanceMinCommittedVersion(data->thisServerID, data->storageMinRecoverVersion);
@ -10165,7 +10164,7 @@ ACTOR Future<bool> restoreDurableState(StorageServer* data, IKeyValueStore* stor

 		data->tenantMap.insert(tenantName, tenantEntry);

-		auto view = data->tenantPrefixIndex->at(version);
+		auto view = data->tenantPrefixIndex.at(version);
 		auto itr = view.find(tenantEntry.prefix);
 		TenantNameUniqueSet nameSet;
 		if (itr != view.end()) {
@ -10173,7 +10172,7 @@ ACTOR Future<bool> restoreDurableState(StorageServer* data, IKeyValueStore* stor
 		}

 		nameSet.insert(tenantName);
-		data->tenantPrefixIndex->insert(tenantEntry.prefix, nameSet);
+		data->tenantPrefixIndex.insert(tenantEntry.prefix, nameSet);

 		TraceEvent("RestoringTenant", data->thisServerID)
 		    .detail("Key", tenantMap[tenantMapLoc].key)
@ -11275,7 +11274,6 @@ ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
 			self.tag = seedTag;
 		}

-		self.encryptionKeyProvider->setTenantPrefixIndex(self.tenantPrefixIndex);
 		self.storage.makeNewStorageServerDurable(self.shardAware);
 		wait(self.storage.commit());
 		++self.counters.kvCommits;
@ -11358,13 +11356,6 @@ ACTOR Future<Void> storageServer(IKeyValueStore* persistentData,
 				recovered.send(Void());
 			return Void();
 		}
-		// Pass a reference of tenantPrefixIndex to the storage engine to support per-tenant data encryption,
-		// after the tenant map is recovered in restoreDurableState. In case of a storage server reboot,
-		// it is possible that the storage engine is still holding a pre-reboot tenantPrefixIndex, and use that
-		// for its own recovery, before we set the tenantPrefixIndex here.
-		if (self.encryptionKeyProvider.isValid()) {
-			self.encryptionKeyProvider->setTenantPrefixIndex(self.tenantPrefixIndex);
-		}
 		TraceEvent("SSTimeRestoreDurableState", self.thisServerID).detail("TimeTaken", now() - start);

 		// if this is a tss storage file, use that as source of truth for this server being a tss instead of the
--- a/Show More
+++ b/Show More