Merge remote-tracking branch 'origin/master' into features/actor-lineage

2021-04-01 11:01:31 -06:00 · 2021-04-01 11:01:31 -06:00 · dc35af3760
parent 8e87627198 91afa3cc9c
commit dc35af3760
55 changed files with 1093 additions and 515 deletions
--- a/.gitignore
+++ b/.gitignore
@ -70,6 +70,8 @@ trace.*.xml
 *.user
 .idea/
 .project
+.projectile
+.dir-locals.el
 .pydevproject
 .vscode
 .vs/
@ -85,6 +87,7 @@ flow/coveragetool/obj
 /compile_commands.json
 /.ccls-cache
 /.clangd
+/.cache

 # Temporary and user configuration files
 *~
--- a/README.md
+++ b/README.md
@ -1,5 +1,7 @@
 <img alt="FoundationDB logo" src="documentation/FDB_logo.png?raw=true" width="400">

+![Build Status](https://codebuild.us-west-2.amazonaws.com/badges?uuid=eyJlbmNyeXB0ZWREYXRhIjoidTh3TTlZa2FQdkdPL1drZzJUQnA1NWg0MzQ3c1VnMXlaVWQ0MVUwcUJpRlltUExBYmRCc3h2c0p1TXZLdWhDQ3BoS0Jmc2ZZdG5yVmxGUHNJM0JtV0MwPSIsIml2UGFyYW1ldGVyU3BlYyI6InBrclM3R0J2d3hmRUFDTjgiLCJtYXRlcmlhbFNldFNlcmlhbCI6MX0%3D&branch=master)
+
 FoundationDB is a distributed database designed to handle large volumes of structured data across clusters of commodity servers. It organizes data as an ordered key-value store and employs ACID transactions for all operations. It is especially well-suited for read/write workloads but also has excellent performance for write-intensive workloads. Users interact with the database using API language binding.

 To learn more about FoundationDB, visit [foundationdb.org](https://www.foundationdb.org/)
--- a/bindings/c/fdb_c.cpp
+++ b/bindings/c/fdb_c.cpp
@ -357,6 +357,13 @@ extern "C" DLLEXPORT FDBFuture* fdb_database_create_snapshot(FDBDatabase* db,
 	                        .extractPtr());
 }

+// Get network thread busyness (updated every 1s)
+// A value of 0 indicates that the client is more or less idle
+// A value of 1 (or more) indicates that the client is saturated
+extern "C" DLLEXPORT double fdb_database_get_main_thread_busyness(FDBDatabase* d) {
+	return DB(d)->getMainThreadBusyness();
+}
+
 extern "C" DLLEXPORT void fdb_transaction_destroy(FDBTransaction* tr) {
 	try {
 		TXN(tr)->delref();
--- a/bindings/c/foundationdb/fdb_c.h
+++ b/bindings/c/foundationdb/fdb_c.h
@ -187,6 +187,8 @@ DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_database_create_snapshot(FDBDatabase
                                                                     uint8_t const* snap_command,
                                                                     int snap_command_length);

+DLLEXPORT WARN_UNUSED_RESULT double fdb_database_get_main_thread_busyness(FDBDatabase* db);
+
 DLLEXPORT void fdb_transaction_destroy(FDBTransaction* tr);

 DLLEXPORT void fdb_transaction_cancel(FDBTransaction* tr);
--- a/bindings/c/test/unit/unit_tests.cpp
+++ b/bindings/c/test/unit/unit_tests.cpp
@ -35,6 +35,7 @@
 #include <tuple>
 #include <vector>
 #include <random>
+#include <chrono>

 #define DOCTEST_CONFIG_IMPLEMENT
 #include "doctest.h"
@ -2126,6 +2127,24 @@ TEST_CASE("block_from_callback") {
 	context.event.wait();
 }

+// monitors network busyness for 2 sec (40 readings)
+TEST_CASE("monitor_network_busyness") {
+	bool containsGreaterZero = false;
+	for (int i = 0; i < 40; i++) {
+		double busyness = fdb_database_get_main_thread_busyness(db);
+		// make sure the busyness is between 0 and 1
+		CHECK(busyness >= 0);
+		CHECK(busyness <= 1);
+		if (busyness > 0) {
+			containsGreaterZero = true;
+		}
+		std::this_thread::sleep_for(std::chrono::milliseconds(50));
+	}
+
+	// assert that at least one of the busyness readings was greater than 0
+	CHECK(containsGreaterZero);
+}
+
 int main(int argc, char** argv) {
 	if (argc != 3 && argc != 4) {
 		std::cout << "Unit tests for the FoundationDB C API.\n"
--- a/build/docker/buildspec.yml
+++ b/build/docker/buildspec.yml
@ -1,158 +0,0 @@
-version: 0.2
-
-env:
-  secrets-manager:
-    DOCKERHUB_AUTH: dockerhub_foundationdb:foundationdb
-phases:
-  install:
-    commands:
-      - echo "install phase"
-      - 'ACCOUNT_ID=$(echo $CODEBUILD_BUILD_ARN | cut -d : -f 5)'
-      - REGISTRY=${ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com
-      - aws ecr get-login-password | docker login --username AWS --password-stdin ${REGISTRY}
-      - docker pull ${REGISTRY}/centos:6
-      - docker tag ${REGISTRY}/centos:6 centos:6
-      - docker pull ${REGISTRY}/centos:7
-      - docker tag ${REGISTRY}/centos:7 centos:7
-  pre_build:
-    commands:
-      - echo "pre_build phase"
-      - COMMIT_HASH=$(echo $CODEBUILD_RESOLVED_SOURCE_VERSION | cut -c 1-7)
-      - DATE_STR=$(date +"%Y%m%d%H%M%S")
-  build:
-    commands:
-      - echo "build phase"
-      - ################################################################################
-      - # CENTOS 7 foundationdb/build
-      - ################################################################################
-      - cd ${CODEBUILD_SRC_DIR}/build/docker/centos7/build
-      - docker pull ${REGISTRY}/foundationdb/build:centos7-latest || true
-      - docker build --cache-from ${REGISTRY}/foundationdb/build:centos7-latest
-        --tag ${REGISTRY}/foundationdb/build:centos7-${DATE_STR}-${COMMIT_HASH}
-        --tag ${REGISTRY}/foundationdb/build:centos7-latest
-        --tag ${REGISTRY}/foundationdb/build:latest
-        --tag foundationdb/build:centos7-${DATE_STR}-${COMMIT_HASH}
-        --tag foundationdb/build:centos7-latest
-        --tag foundationdb/build:latest
-        .
-      - ################################################################################
-      - # CENTOS 7 foundationdb/devel
-      - ################################################################################
-      - cd ${CODEBUILD_SRC_DIR}/build/docker/centos7/devel
-      - docker pull ${REGISTRY}/foundationdb/devel:centos7-latest || true
-      - docker build --cache-from ${REGISTRY}/foundationdb/devel:centos7-latest
-        --build-arg REPOSITORY=${REGISTRY}/foundationdb/build
-        --tag ${REGISTRY}/foundationdb/devel:centos7-${DATE_STR}-${COMMIT_HASH}
-        --tag ${REGISTRY}/foundationdb/devel:centos7-latest
-        --tag ${REGISTRY}/foundationdb/devel:latest
-        --tag foundationdb/devel:centos7-${DATE_STR}-${COMMIT_HASH}
-        --tag foundationdb/devel:centos7-latest
-        --tag foundationdb/devel:latest
-        .
-      - ################################################################################
-      - # CENTOS 7 foundationdb/distcc
-      - ################################################################################
-      - cd ${CODEBUILD_SRC_DIR}/build/docker/centos7/distcc
-      - docker pull ${REGISTRY}/foundationdb/distcc:centos7-latest || true
-      - docker build --cache-from ${REGISTRY}/foundationdb/distcc:centos7-latest
-        --build-arg REPOSITORY=${REGISTRY}/foundationdb/build
-        --tag ${REGISTRY}/foundationdb/distcc:centos7-${DATE_STR}-${COMMIT_HASH}
-        --tag ${REGISTRY}/foundationdb/distcc:centos7-latest
-        --tag ${REGISTRY}/foundationdb/distcc:latest
-        --tag foundationdb/distcc:centos7-${DATE_STR}-${COMMIT_HASH}
-        --tag foundationdb/distcc:centos7-latest
-        --tag foundationdb/distcc:latest
-        .
-      - ################################################################################
-      - # CENTOS 6 foundationdb/build
-      - ################################################################################
-      - cd ${CODEBUILD_SRC_DIR}/build/docker/centos6/build
-      - docker pull ${REGISTRY}/foundationdb/build:centos6-latest || true
-      - docker build --cache-from ${REGISTRY}/foundationdb/build:centos6-latest
-        --tag ${REGISTRY}/foundationdb/build:centos6-${DATE_STR}-${COMMIT_HASH}
-        --tag ${REGISTRY}/foundationdb/build:centos6-latest
-        --tag foundationdb/build:centos6-${DATE_STR}-${COMMIT_HASH}
-        --tag foundationdb/build:centos6-latest
-        .
-      - ################################################################################
-      - # CENTOS 6 foundationdb/devel
-      - ################################################################################
-      - cd ${CODEBUILD_SRC_DIR}/build/docker/centos6/devel
-      - docker pull ${REGISTRY}/foundationdb/devel:centos6-latest || true
-      - docker build --cache-from ${REGISTRY}/foundationdb/devel:centos6-latest
-        --build-arg REPOSITORY=${REGISTRY}/foundationdb/build
-        --tag ${REGISTRY}/foundationdb/devel:centos6-${DATE_STR}-${COMMIT_HASH}
-        --tag ${REGISTRY}/foundationdb/devel:centos6-latest
-        --tag foundationdb/devel:centos6-${DATE_STR}-${COMMIT_HASH}
-        --tag foundationdb/devel:centos6-latest
-        .
-      - ################################################################################
-      - # CENTOS 6 foundationdb/distcc
-      - ################################################################################
-      - cd ${CODEBUILD_SRC_DIR}/build/docker/centos6/distcc
-      - docker pull ${REGISTRY}/foundationdb/distcc:centos6-latest || true
-      - docker build --cache-from ${REGISTRY}/foundationdb/distcc:centos6-latest
-        --build-arg REPOSITORY=${REGISTRY}/foundationdb/build
-        --tag ${REGISTRY}/foundationdb/distcc:centos6-${DATE_STR}-${COMMIT_HASH}
-        --tag ${REGISTRY}/foundationdb/distcc:centos6-latest
-        --tag foundationdb/distcc:centos6-${DATE_STR}-${COMMIT_HASH}
-        --tag foundationdb/distcc:centos6-latest
-        .
-  post_build:
-    commands:
-      - echo "post_build phase"
-      - echo ${DOCKERHUB_AUTH} | docker login --username foundationdb --password-stdin
-      - ################################################################################
-      - # CENTOS 7 PUSH TO ECR
-      - ################################################################################
-      - # PUSH TO build ECR
-      - docker push ${REGISTRY}/foundationdb/build:centos7-${DATE_STR}-${COMMIT_HASH}
-      - docker push ${REGISTRY}/foundationdb/build:centos7-latest
-      - docker push ${REGISTRY}/foundationdb/build:latest
-      - # PUSH TO devel ECR
-      - docker push ${REGISTRY}/foundationdb/devel:centos7-${DATE_STR}-${COMMIT_HASH}
-      - docker push ${REGISTRY}/foundationdb/devel:centos7-latest
-      - docker push ${REGISTRY}/foundationdb/devel:latest
-      - # PUSH TO distcc ECR
-      - docker push ${REGISTRY}/foundationdb/distcc:centos7-${DATE_STR}-${COMMIT_HASH}
-      - docker push ${REGISTRY}/foundationdb/distcc:centos7-latest
-      - docker push ${REGISTRY}/foundationdb/distcc:latest
-      - ################################################################################
-      - # CENTOS 7 PUSH TO DOCKERHUB
-      - ################################################################################
-      - # PUSH TO build DOCKERHUB
-      - docker push foundationdb/build:centos7-${DATE_STR}-${COMMIT_HASH}
-      - docker push foundationdb/build:centos7-latest
-      - docker push foundationdb/build:latest
-      - # PUSH TO devel DOCKERHUB
-      - docker push foundationdb/devel:centos7-${DATE_STR}-${COMMIT_HASH}
-      - docker push foundationdb/devel:centos7-latest
-      - docker push foundationdb/devel:latest
-      - # PUSH TO distcc DOCKERHUB
-      - docker push foundationdb/distcc:centos7-${DATE_STR}-${COMMIT_HASH}
-      - docker push foundationdb/distcc:centos7-latest
-      - docker push foundationdb/distcc:latest
-      - ################################################################################
-      - # CENTOS 6 PUSH TO ECR
-      - ################################################################################
-      - # PUSH TO build ECR
-      - docker push ${REGISTRY}/foundationdb/build:centos6-${DATE_STR}-${COMMIT_HASH}
-      - docker push ${REGISTRY}/foundationdb/build:centos6-latest
-      - # PUSH TO devel ECR
-      - docker push ${REGISTRY}/foundationdb/devel:centos6-${DATE_STR}-${COMMIT_HASH}
-      - docker push ${REGISTRY}/foundationdb/devel:centos6-latest
-      - # PUSH TO distcc ECR
-      - docker push ${REGISTRY}/foundationdb/distcc:centos6-${DATE_STR}-${COMMIT_HASH}
-      - docker push ${REGISTRY}/foundationdb/distcc:centos6-latest
-      - ################################################################################
-      - # CENTOS 6 PUSH TO DOCKERHUB
-      - ################################################################################
-      - # PUSH TO build DOCKERHUB
-      - docker push foundationdb/build:centos6-${DATE_STR}-${COMMIT_HASH}
-      - docker push foundationdb/build:centos6-latest
-      - # PUSH TO devel DOCKERHUB
-      - docker push foundationdb/devel:centos6-${DATE_STR}-${COMMIT_HASH}
-      - docker push foundationdb/devel:centos6-latest
-      - # PUSH TO distcc DOCKERHUB
-      - docker push foundationdb/distcc:centos6-${DATE_STR}-${COMMIT_HASH}
-      - docker push foundationdb/distcc:centos6-latest
--- a/build/docker/centos6/build/Dockerfile
+++ b/build/docker/centos6/build/Dockerfile
@ -37,13 +37,13 @@ RUN sed -i -e '/enabled/d' /etc/yum.repos.d/CentOS-Base.repo && \
        lz4-devel \
        lz4-static \
        mono-devel \
-        rh-python36 \
-        rh-python36-python-devel \
-        rh-ruby24 \
        rpm-build \
        tcl-devel \
        unzip \
-        wget && \
+        wget \
+        rh-python36 \
+        rh-python36-python-devel \
+        rh-ruby24 && \
    yum clean all && \
    rm -rf /var/cache/yum

--- a/build/docker/centos6/devel/Dockerfile
+++ b/build/docker/centos6/devel/Dockerfile
@ -5,13 +5,16 @@ FROM ${REPOSITORY}:${VERSION}
 # add vscode server
 RUN yum repolist && \
    yum -y install \
+        bash-completion \
+        byobu \
+        cgdb \
+        emacs-nox \
+        jq \
+        the_silver_searcher \
        tmux \
        tree \
-        emacs-nox \
        vim \
-        bash-completion \
-        jq \
-        cgdb && \
+        zsh && \
    yum clean all && \
    rm -rf /var/cache/yum

@ -19,14 +22,25 @@ WORKDIR /tmp
 RUN source /opt/rh/devtoolset-8/enable && \
    source /opt/rh/rh-python36/enable && \
    pip3 install \
+        lxml \
+        psutil \
        python-dateutil \
-        subprocess32 \
-        psutil && \
+        subprocess32 && \
    mkdir fdb-joshua && \
    cd fdb-joshua && \
    git clone --branch code_pipeline https://github.com/FoundationDB/fdb-joshua . && \
    pip3 install /tmp/fdb-joshua && \
    cd /tmp && \
+    curl -Ls https://amazon-eks.s3.us-west-2.amazonaws.com/1.18.9/2020-11-02/bin/linux/amd64/kubectl -o kubectl && \
+    echo "3dbe69e6deb35fbd6fec95b13d20ac1527544867ae56e3dae17e8c4d638b25b9  kubectl" > kubectl.txt && \
+    sha256sum -c kubectl.txt && \
+    mv kubectl /usr/local/bin/kubectl && \
+    chmod 755 /usr/local/bin/kubectl && \
+    curl https://awscli.amazonaws.com/awscli-exe-linux-x86_64-2.0.30.zip -o "awscliv2.zip" && \
+    echo "7ee475f22c1b35cc9e53affbf96a9ffce91706e154a9441d0d39cbf8366b718e  awscliv2.zip" > awscliv2.txt && \
+    sha256sum -c awscliv2.txt && \
+    unzip -qq awscliv2.zip && \
+    ./aws/install && \
    rm -rf /tmp/*

 ARG OLD_FDB_BINARY_DIR=/app/deploy/global_data/oldBinaries/
@ -45,17 +59,23 @@ RUN mkdir -p ${OLD_FDB_BINARY_DIR} \
    ln -s ${OLD_TLS_LIBRARY_DIR}/FDBGnuTLS.so /usr/lib/foundationdb/plugins/FDBGnuTLS.so

 WORKDIR /root
-RUN echo -en "\n"\
-    "source /opt/rh/devtoolset-8/enable\n"\
-    "source /opt/rh/rh-python36/enable\n"\
-    "source /opt/rh/rh-ruby24/enable\n"\
-    "\n"\
-    "function cmk() {\n"\
-    "    cmake -S ${HOME}/src/foundationdb -B build_output -D USE_CCACHE=1 -D RocksDB_ROOT=/opt/rocksdb-6.10.1 -G Ninja && ninja -C build_output -j 84\n"\
-    "}\n"\
-    "function ct() {\n"\
-    "    cd ${HOME}/build_output && ctest -j 32 --output-on-failure\n"\
-    "}\n"\
-    "function j() {\n"\
-    "   python3 -m joshua.joshua --cluster-file /etc/foundationdb/cluster-file \"\${@}\"\n"\
-    "}\n" >> .bashrc
+RUN rm -f /root/anaconda-ks.cfg && \
+    printf '%s\n' \
+    'source /opt/rh/devtoolset-8/enable' \
+    'source /opt/rh/rh-python36/enable' \
+    'source /opt/rh/rh-ruby26/enable' \
+    '' \
+    'function cmk() {' \
+    '    cmake -S ${HOME}/src/foundationdb -B ${HOME}/build_output -D USE_CCACHE=1 -D RocksDB_ROOT=/opt/rocksdb-6.10.1 -G Ninja && ninja -C build_output -j 84' \
+    '}' \
+    'function ct() {' \
+    '    cd ${HOME}/build_output && ctest -j 32 --output-on-failure' \
+    '}' \
+    'function j() {' \
+    '   python3 -m joshua.joshua "${@}"' \
+    '}' \
+    'function jsd() {' \
+    '   j start --tarball $(find ${HOME}/build_output/packages -name correctness\*.tar.gz) "${@}"' \
+    '}' \
+    '' \
+    >> .bashrc
--- a/build/docker/centos7/build/Dockerfile
+++ b/build/docker/centos7/build/Dockerfile
@ -10,6 +10,7 @@ RUN rpmkeys --import mono-project.com.rpmkey.pgp && \
        epel-release \
        scl-utils \
        yum-utils && \
+    yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo && \
    yum install -y \
        autoconf \
        automake \
@ -19,6 +20,7 @@ RUN rpmkeys --import mono-project.com.rpmkey.pgp && \
        devtoolset-8 \
        devtoolset-8-libubsan-devel \
        devtoolset-8-valgrind-devel \
+        docker-ce \
        dos2unix \
        dpkg \
        gettext-devel \
@ -59,9 +61,10 @@ RUN source /opt/rh/devtoolset-8/enable && \
    tar --strip-components 1 --no-same-owner --directory git -xf git.tar.gz && \
    cd git && \
    make configure && \
-    ./configure \
-    && make && \
+    ./configure && \
+    make && \
    make install && \
+    cd ../ && \
    rm -rf /tmp/*

 # build/install ninja
--- a/build/docker/centos7/devel/Dockerfile
+++ b/build/docker/centos7/devel/Dockerfile
@ -3,15 +3,21 @@ ARG VERSION=centos7-latest
 FROM ${REPOSITORY}:${VERSION}

 # add vscode server
-RUN yum repolist && \
+RUN yum-config-manager --add-repo=https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/repo/epel-7/carlwgeorge-ripgrep-epel-7.repo && \
+    yum repolist && \
    yum -y install \
+        bash-completion \
+        byobu \
+        cgdb \
+        emacs-nox \
+        fish \
+        jq \
+        ripgrep \
+        the_silver_searcher \
        tmux \
        tree \
-        emacs-nox \
        vim \
-        bash-completion \
-        jq \
-        cgdb && \
+        zsh && \
    yum clean all && \
    rm -rf /var/cache/yum

@ -19,14 +25,25 @@ WORKDIR /tmp
 RUN source /opt/rh/devtoolset-8/enable && \
    source /opt/rh/rh-python36/enable && \
    pip3 install \
+        lxml \
+        psutil \
        python-dateutil \
-        subprocess32 \
-        psutil && \
+        subprocess32 && \
    mkdir fdb-joshua && \
    cd fdb-joshua && \
    git clone --branch code_pipeline https://github.com/FoundationDB/fdb-joshua . && \
    pip3 install /tmp/fdb-joshua && \
    cd /tmp && \
+    curl -Ls https://amazon-eks.s3.us-west-2.amazonaws.com/1.18.9/2020-11-02/bin/linux/amd64/kubectl -o kubectl && \
+    echo "3dbe69e6deb35fbd6fec95b13d20ac1527544867ae56e3dae17e8c4d638b25b9  kubectl" > kubectl.txt && \
+    sha256sum -c kubectl.txt && \
+    mv kubectl /usr/local/bin/kubectl && \
+    chmod 755 /usr/local/bin/kubectl && \
+    curl https://awscli.amazonaws.com/awscli-exe-linux-x86_64-2.0.30.zip -o "awscliv2.zip" && \
+    echo "7ee475f22c1b35cc9e53affbf96a9ffce91706e154a9441d0d39cbf8366b718e  awscliv2.zip" > awscliv2.txt && \
+    sha256sum -c awscliv2.txt && \
+    unzip -qq awscliv2.zip && \
+    ./aws/install && \
    rm -rf /tmp/*

 ARG OLD_FDB_BINARY_DIR=/app/deploy/global_data/oldBinaries/
@ -49,18 +66,44 @@ RUN curl -Ls https://update.code.visualstudio.com/latest/server-linux-x64/stable
    mkdir -p .vscode-server/bin/latest && \
    tar --strip-components 1 --no-same-owner --directory .vscode-server/bin/latest -xf /tmp/vscode-server-linux-x64.tar.gz && \
    touch .vscode-server/bin/latest/0 && \
-    rm /tmp/*
-RUN echo -en "\n"\
-    "source /opt/rh/devtoolset-8/enable\n"\
-    "source /opt/rh/rh-python36/enable\n"\
-    "source /opt/rh/rh-ruby26/enable\n"\
-    "\n"\
-    "function cmk() {\n"\
-    "    cmake -S ${HOME}/src/foundationdb -B build_output -D USE_CCACHE=1 -D RocksDB_ROOT=/opt/rocksdb-6.10.1 -G Ninja && ninja -C build_output -j 84\n"\
-    "}\n"\
-    "function ct() {\n"\
-    "    cd ${HOME}/build_output && ctest -j 32 --output-on-failure\n"\
-    "}\n"\
-    "function j() {\n"\
-    "   python3 -m joshua.joshua --cluster-file /etc/foundationdb/cluster-file \"\${@}\"\n"\
-    "}\n" >> .bashrc
+    rm -rf /tmp/*
+RUN rm -f /root/anaconda-ks.cfg && \
+    printf '%s\n' \
+    '#!/usr/bin/env bash' \
+    'set -Eeuo pipefail' \
+    '' \
+    'mkdir -p ~/.docker' \
+    'cat > ~/.docker/config.json << EOF' \
+    '{' \
+    ' "proxies":' \
+    ' {' \
+    '   "default":' \
+    '   {' \
+    '     "httpProxy": "${HTTP_PROXY}",' \
+    '     "httpsProxy": "${HTTPS_PROXY}",' \
+    '     "noProxy": "${NO_PROXY}"' \
+    '   }' \
+    ' }' \
+    '}' \
+    'EOF' \
+    > docker_proxy.sh && \
+    chmod 755 docker_proxy.sh && \
+    printf '%s\n' \
+    'source /opt/rh/devtoolset-8/enable' \
+    'source /opt/rh/rh-python36/enable' \
+    'source /opt/rh/rh-ruby26/enable' \
+    '' \
+    'function cmk() {' \
+    '    cmake -S ${HOME}/src/foundationdb -B ${HOME}/build_output -D USE_CCACHE=1 -D RocksDB_ROOT=/opt/rocksdb-6.10.1 -G Ninja && ninja -C build_output -j 84' \
+    '}' \
+    'function ct() {' \
+    '    cd ${HOME}/build_output && ctest -j 32 --output-on-failure' \
+    '}' \
+    'function j() {' \
+    '   python3 -m joshua.joshua "${@}"' \
+    '}' \
+    'function jsd() {' \
+    '   j start --tarball $(find ${HOME}/build_output/packages -name correctness\*.tar.gz) "${@}"' \
+    '}' \
+    '' \
+    >> .bashrc
--- a/build/docker/centos7/ycsb/Dockerfile
+++ b/build/docker/centos7/ycsb/Dockerfile
@ -0,0 +1,20 @@
+ARG REPOSITORY=foundationdb/build
+ARG VERSION=centos7-latest
+FROM ${REPOSITORY}:${VERSION}
+
+ENV YCSB_VERSION=ycsb-foundationdb-binding-0.17.0 \
+    PATH=${PATH}:/usr/bin
+
+RUN cd /opt \
+    && eval curl "-Ls https://github.com/brianfrankcooper/YCSB/releases/download/0.17.0/ycsb-foundationdb-binding-0.17.0.tar.gz" \
+    | tar -xzvf -
+
+RUN rm -Rf /opt/${YCSB_VERSION}/lib/fdb-java-5.2.5.jar
+
+# COPY The Appropriate fdb-java-.jar Aaron from packages
+# COPY binary RPM for foundationd-db
+# Install Binary
+
+WORKDIR "/opt/${YCSB_VERSION}"
+
+ENTRYPOINT ["bin/ycsb.sh"]
--- a/cmake/CompileBoost.cmake
+++ b/cmake/CompileBoost.cmake
@ -10,7 +10,7 @@ function(compile_boost)
  set(BOOST_COMPILER_FLAGS -fvisibility=hidden -fPIC -std=c++14 -w)
  set(BOOST_CXX_COMPILER "${CMAKE_CXX_COMPILER}")
  if(APPLE)
-    set(BOOST_TOOLSET "darwin")
+    set(BOOST_TOOLSET "clang-darwin")
    # this is to fix a weird macOS issue -- by default
    # cmake would otherwise pass a compiler that can't
    # compile boost
--- a/cmake/Jemalloc.cmake
+++ b/cmake/Jemalloc.cmake
@ -3,7 +3,7 @@ add_library(jemalloc INTERFACE)
 set(USE_JEMALLOC ON)
 # We don't want to use jemalloc on Windows
 # Nor on FreeBSD, where jemalloc is the default system allocator
-if(USE_SANITIZER OR WIN32 OR (CMAKE_SYSTEM_NAME STREQUAL "FreeBSD"))
+if(USE_SANITIZER OR WIN32 OR (CMAKE_SYSTEM_NAME STREQUAL "FreeBSD") OR APPLE)
  set(USE_JEMALLOC OFF)
  return()
 endif()
--- a/documentation/sphinx/source/api-c.rst
+++ b/documentation/sphinx/source/api-c.rst
@ -481,7 +481,11 @@ An |database-blurb1| Modifications to a database are performed via transactions.
         |length-of| ``snapshot_command``
   
   .. note:: The function is exposing the functionality of the fdbcli command ``snapshot``. Please take a look at the documentation before using (see :ref:`disk-snapshot-backups`).
-         
+
+.. function:: double fdb_database_get_main_thread_busyness(FDBDatabase* database)
+
+   Returns a value where 0 indicates that the client is idle and 1 (or larger) indicates that the client is saturated. By default, this value is updated every second.
+
 Transaction
 ===========

--- a/documentation/sphinx/source/release-notes/release-notes-620.rst
+++ b/documentation/sphinx/source/release-notes/release-notes-620.rst
@ -7,6 +7,7 @@ Release Notes
 * Fixed an issue where storage servers could shutdown with ``unknown_error``. `(PR #4437) <https://github.com/apple/foundationdb/pull/4437>`_
 * Fix backup agent stall when writing to local filesystem with slow metadata operations. `(PR #4428) <https://github.com/apple/foundationdb/pull/4428>`_
 * Backup agent no longer uses 4k block caching layer on local output files so that write operations are larger. `(PR #4428) <https://github.com/apple/foundationdb/pull/4428>`_
+* Fix accounting error that could cause commits to incorrectly fail with ``proxy_memory_limit_exceeded``. `(PR #4529) <https://github.com/apple/foundationdb/pull/4529>`_

 6.2.32
 ======
--- a/documentation/sphinx/source/release-notes/release-notes-630.rst
+++ b/documentation/sphinx/source/release-notes/release-notes-630.rst
@ -5,6 +5,8 @@ Release Notes
 6.3.12
 ======
 * Change the default for --knob_tls_server_handshake_threads to 64. The previous was 1000. This avoids starting 1000 threads by default, but may adversely affect recovery time for large clusters using tls. Users with large tls clusters should consider explicitly setting this knob in their foundationdb.conf file. `(PR #4421) <https://github.com/apple/foundationdb/pull/4421>`_
+* Fix accounting error that could cause commits to incorrectly fail with ``proxy_memory_limit_exceeded``. `(PR #4526) <https://github.com/apple/foundationdb/pull/4526>`_
+* As an optimization, partial restore using target key ranges now filters backup log data prior to loading it into the database.  `(PR #4554) <https://github.com/apple/foundationdb/pull/4554>`_

 6.3.11
 ======
--- a/fdbbackup/backup.actor.cpp
+++ b/fdbbackup/backup.actor.cpp
@ -2236,6 +2236,8 @@ Reference<IBackupContainer> openBackupContainer(const char* name, std::string de
 	return c;
 }

+// Submit the restore request to the database if "performRestore" is true. Otherwise,
+// check if the restore can be performed.
 ACTOR Future<Void> runRestore(Database db,
                              std::string originalClusterFile,
                              std::string tagName,
@ -2328,7 +2330,7 @@ ACTOR Future<Void> runRestore(Database db,
 				printf("Restored to version %" PRId64 "\n", restoredVersion);
 			}
 		} else {
-			state Optional<RestorableFileSet> rset = wait(bc->getRestoreSet(targetVersion));
+			state Optional<RestorableFileSet> rset = wait(bc->getRestoreSet(targetVersion, ranges));

 			if (!rset.present()) {
 				fprintf(stderr,
--- a/fdbclient/BackupAgentBase.actor.cpp
+++ b/fdbclient/BackupAgentBase.actor.cpp
@ -142,8 +142,9 @@ Version getVersionFromString(std::string const& value) {
 }

 // Transaction log data is stored by the FoundationDB core in the
-// \xff / bklog / keyspace in a funny order for performance reasons.
-// Return the ranges of keys that contain the data for the given range
+// "backupLogKeys" (i.e., \xff\x02/blog/) keyspace in a funny order for
+// performance reasons.
+// Returns the ranges of keys that contain the data for the given range
 // of versions.
 // assert CLIENT_KNOBS->LOG_RANGE_BLOCK_SIZE % blocksize = 0. Otherwise calculation of hash will be incorrect
 Standalone<VectorRef<KeyRangeRef>> getLogRanges(Version beginVersion,
--- a/fdbclient/BackupContainerFileSystem.actor.cpp
+++ b/fdbclient/BackupContainerFileSystem.actor.cpp
@ -891,16 +891,21 @@ public:
 		return Optional<RestorableFileSet>();
 	}

+	// Get a set of files that can restore the given "keyRangesFilter" to the "targetVersion".
+	// If "keyRangesFilter" is empty, the file set will cover all key ranges present in the backup.
+	// It's generally a good idea to specify "keyRangesFilter" to reduce the number of files for
+	// restore times.
+	//
+	// If "logsOnly" is true, then only log files are returned and "keyRangesFilter" is ignored,
+	// because the log can contain mutations of the whole key space, unlike range files that each
+	// is limited to a smaller key range.
 	ACTOR static Future<Optional<RestorableFileSet>> getRestoreSet(Reference<BackupContainerFileSystem> bc,
 	                                                               Version targetVersion,
 	                                                               VectorRef<KeyRangeRef> keyRangesFilter,
 	                                                               bool logsOnly = false,
 	                                                               Version beginVersion = invalidVersion) {
-		// Does not support use keyRangesFilter for logsOnly yet
-		if (logsOnly && !keyRangesFilter.empty()) {
-			TraceEvent(SevError, "BackupContainerRestoreSetUnsupportedAPI")
-			    .detail("KeyRangesFilter", keyRangesFilter.size());
-			return Optional<RestorableFileSet>();
+		for (const auto& range : keyRangesFilter) {
+			TraceEvent("BackupContainerGetRestoreSet").detail("RangeFilter", printable(range));
 		}

 		if (logsOnly) {
--- a/fdbclient/DatabaseConfiguration.h
+++ b/fdbclient/DatabaseConfiguration.h
@ -160,33 +160,44 @@ struct DatabaseConfiguration {
 	}

 	// Retuns the maximum number of discrete failures a cluster can tolerate.
-	// In HA mode, `fullyReplicatedRegions` is set to false initially when data is being
-	// replicated to remote, and will be true later. `forAvailablity` is set to true
+	// In HA mode, `fullyReplicatedRegions` is set to "1" initially when data is being
+	// replicated to remote, and will be incremented later. `forAvailablity` is set to true
 	// if we want to account the number for machines that can recruit new tLogs/SS after failures.
-	// Killing an entire datacenter counts as killing one zone in modes that support it
+	// Killing an entire datacenter counts as killing one zone in modes that support it.
 	int32_t maxZoneFailuresTolerated(int fullyReplicatedRegions, bool forAvailability) const {
-		int worstSatellite = regions.size() ? std::numeric_limits<int>::max() : 0;
+		int worstSatelliteTLogReplicationFactor = regions.size() ? std::numeric_limits<int>::max() : 0;
 		int regionsWithNonNegativePriority = 0;
 		for (auto& r : regions) {
 			if (r.priority >= 0) {
 				regionsWithNonNegativePriority++;
 			}
-			worstSatellite =
-			    std::min(worstSatellite, r.satelliteTLogReplicationFactor - r.satelliteTLogWriteAntiQuorum);
+			worstSatelliteTLogReplicationFactor = std::min(
+			    worstSatelliteTLogReplicationFactor, r.satelliteTLogReplicationFactor - r.satelliteTLogWriteAntiQuorum);
 			if (r.satelliteTLogUsableDcsFallback > 0) {
-				worstSatellite = std::min(
-				    worstSatellite, r.satelliteTLogReplicationFactorFallback - r.satelliteTLogWriteAntiQuorumFallback);
+				worstSatelliteTLogReplicationFactor =
+				    std::min(worstSatelliteTLogReplicationFactor,
+				             r.satelliteTLogReplicationFactorFallback - r.satelliteTLogWriteAntiQuorumFallback);
 			}
 		}
-		if (usableRegions > 1 && fullyReplicatedRegions > 1 && worstSatellite > 0 &&
-		    (!forAvailability || regionsWithNonNegativePriority > 1)) {
-			return 1 + std::min(std::max(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, worstSatellite - 1),
-			                    storageTeamSize - 1);
-		} else if (worstSatellite > 0) {
-			// Primary and Satellite tLogs are synchronously replicated, hence we can lose all but 1.
-			return std::min(tLogReplicationFactor + worstSatellite - 1 - tLogWriteAntiQuorum, storageTeamSize - 1);
+
+		if (worstSatelliteTLogReplicationFactor <= 0) {
+			// HA is not enabled in this database. Return single cluster zone failures to tolerate.
+			return std::min(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, storageTeamSize - 1);
 		}
-		return std::min(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, storageTeamSize - 1);
+
+		// Compute HA enabled database zone failure tolerance.
+		auto isGeoReplicatedData = [this, &fullyReplicatedRegions]() {
+			return usableRegions > 1 && fullyReplicatedRegions > 1;
+		};
+
+		if (isGeoReplicatedData() && (!forAvailability || regionsWithNonNegativePriority > 1)) {
+			return 1 + std::min(std::max(tLogReplicationFactor - 1 - tLogWriteAntiQuorum,
+			                             worstSatelliteTLogReplicationFactor - 1),
+			                    storageTeamSize - 1);
+		}
+		// Primary and Satellite tLogs are synchronously replicated, hence we can lose all but 1.
+		return std::min(tLogReplicationFactor + worstSatelliteTLogReplicationFactor - 1 - tLogWriteAntiQuorum,
+		                storageTeamSize - 1);
 	}

 	// CommitProxy Servers
--- a/fdbclient/FileBackupAgent.actor.cpp
+++ b/fdbclient/FileBackupAgent.actor.cpp
@ -3192,6 +3192,154 @@ struct RestoreRangeTaskFunc : RestoreFileTaskFuncBase {
 StringRef RestoreRangeTaskFunc::name = LiteralStringRef("restore_range_data");
 REGISTER_TASKFUNC(RestoreRangeTaskFunc);

+// Decodes a mutation log key, which contains (hash, commitVersion, chunkNumber) and
+// returns (commitVersion, chunkNumber)
+std::pair<Version, int32_t> decodeLogKey(const StringRef& key) {
+	ASSERT(key.size() == sizeof(uint8_t) + sizeof(Version) + sizeof(int32_t));
+
+	uint8_t hash;
+	Version version;
+	int32_t part;
+	BinaryReader rd(key, Unversioned());
+	rd >> hash >> version >> part;
+	version = bigEndian64(version);
+	part = bigEndian32(part);
+
+	int32_t v = version / CLIENT_KNOBS->LOG_RANGE_BLOCK_SIZE;
+	ASSERT(((uint8_t)hashlittle(&v, sizeof(v), 0)) == hash);
+
+	return std::make_pair(version, part);
+}
+
+// Decodes an encoded list of mutations in the format of:
+//   [includeVersion:uint64_t][val_length:uint32_t][mutation_1][mutation_2]...[mutation_k],
+// where a mutation is encoded as:
+//   [type:uint32_t][keyLength:uint32_t][valueLength:uint32_t][param1][param2]
+std::vector<MutationRef> decodeLogValue(const StringRef& value) {
+	StringRefReader reader(value, restore_corrupted_data());
+
+	Version protocolVersion = reader.consume<uint64_t>();
+	if (protocolVersion <= 0x0FDB00A200090001) {
+		throw incompatible_protocol_version();
+	}
+
+	uint32_t val_length = reader.consume<uint32_t>();
+	if (val_length != value.size() - sizeof(uint64_t) - sizeof(uint32_t)) {
+		TraceEvent(SevError, "FileRestoreLogValueError")
+		    .detail("ValueLen", val_length)
+		    .detail("ValueSize", value.size())
+		    .detail("Value", printable(value));
+	}
+
+	std::vector<MutationRef> mutations;
+	while (1) {
+		if (reader.eof())
+			break;
+
+		// Deserialization of a MutationRef, which was packed by MutationListRef::push_back_deep()
+		uint32_t type, p1len, p2len;
+		type = reader.consume<uint32_t>();
+		p1len = reader.consume<uint32_t>();
+		p2len = reader.consume<uint32_t>();
+
+		const uint8_t* key = reader.consume(p1len);
+		const uint8_t* val = reader.consume(p2len);
+
+		mutations.emplace_back((MutationRef::Type)type, StringRef(key, p1len), StringRef(val, p2len));
+	}
+	return mutations;
+}
+
+// Accumulates mutation log value chunks, as both a vector of chunks and as a combined chunk,
+// in chunk order, and can check the chunk set for completion or intersection with a set
+// of ranges.
+struct AccumulatedMutations {
+	AccumulatedMutations() : lastChunkNumber(-1) {}
+
+	// Add a KV pair for this mutation chunk set
+	// It will be accumulated onto serializedMutations if the chunk number is
+	// the next expected value.
+	void addChunk(int chunkNumber, const KeyValueRef& kv) {
+		if (chunkNumber == lastChunkNumber + 1) {
+			lastChunkNumber = chunkNumber;
+			serializedMutations += kv.value.toString();
+		} else {
+			lastChunkNumber = -2;
+			serializedMutations.clear();
+		}
+		kvs.push_back(kv);
+	}
+
+	// Returns true if both
+	//   - 1 or more chunks were added to this set
+	//   - The header of the first chunk contains a valid protocol version and a length
+	//     that matches the bytes after the header in the combined value in serializedMutations
+	bool isComplete() const {
+		if (lastChunkNumber >= 0) {
+			StringRefReader reader(serializedMutations, restore_corrupted_data());
+
+			Version protocolVersion = reader.consume<uint64_t>();
+			if (protocolVersion <= 0x0FDB00A200090001) {
+				throw incompatible_protocol_version();
+			}
+
+			uint32_t vLen = reader.consume<uint32_t>();
+			return vLen == reader.remainder().size();
+		}
+
+		return false;
+	}
+
+	// Returns true if a complete chunk contains any MutationRefs which intersect with any
+	// range in ranges.
+	// It is undefined behavior to run this if isComplete() does not return true.
+	bool matchesAnyRange(const std::vector<KeyRange>& ranges) const {
+		std::vector<MutationRef> mutations = decodeLogValue(serializedMutations);
+		for (auto& m : mutations) {
+			for (auto& r : ranges) {
+				if (m.type == MutationRef::ClearRange) {
+					if (r.intersects(KeyRangeRef(m.param1, m.param2))) {
+						return true;
+					}
+				} else {
+					if (r.contains(m.param1)) {
+						return true;
+					}
+				}
+			}
+		}
+
+		return false;
+	}
+
+	std::vector<KeyValueRef> kvs;
+	std::string serializedMutations;
+	int lastChunkNumber;
+};
+
+// Returns a vector of filtered KV refs from data which are either part of incomplete mutation groups OR complete
+// and have data relevant to one of the KV ranges in ranges
+std::vector<KeyValueRef> filterLogMutationKVPairs(VectorRef<KeyValueRef> data, const std::vector<KeyRange>& ranges) {
+	std::unordered_map<Version, AccumulatedMutations> mutationBlocksByVersion;
+
+	for (auto& kv : data) {
+		auto versionAndChunkNumber = decodeLogKey(kv.key);
+		mutationBlocksByVersion[versionAndChunkNumber.first].addChunk(versionAndChunkNumber.second, kv);
+	}
+
+	std::vector<KeyValueRef> output;
+
+	for (auto& vb : mutationBlocksByVersion) {
+		AccumulatedMutations& m = vb.second;
+
+		// If the mutations are incomplete or match one of the ranges, include in results.
+		if (!m.isComplete() || m.matchesAnyRange(ranges)) {
+			output.insert(output.end(), m.kvs.begin(), m.kvs.end());
+		}
+	}
+
+	return output;
+}
 struct RestoreLogDataTaskFunc : RestoreFileTaskFuncBase {
 	static StringRef name;
 	static constexpr uint32_t version = 1;
@ -3223,6 +3371,7 @@ struct RestoreLogDataTaskFunc : RestoreFileTaskFuncBase {

 		state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
 		state Reference<IBackupContainer> bc;
+		state std::vector<KeyRange> ranges;

 		loop {
 			try {
@ -3232,6 +3381,8 @@ struct RestoreLogDataTaskFunc : RestoreFileTaskFuncBase {
 				Reference<IBackupContainer> _bc = wait(restore.sourceContainer().getOrThrow(tr));
 				bc = _bc;

+				wait(store(ranges, restore.getRestoreRangesOrDefault(tr)));
+
 				wait(checkTaskVersion(tr->getDatabase(), task, name, version));
 				wait(taskBucket->keepRunning(tr, task));

@ -3243,10 +3394,14 @@ struct RestoreLogDataTaskFunc : RestoreFileTaskFuncBase {

 		state Key mutationLogPrefix = restore.mutationLogPrefix();
 		state Reference<IAsyncFile> inFile = wait(bc->readFile(logFile.fileName));
-		state Standalone<VectorRef<KeyValueRef>> data = wait(decodeLogFileBlock(inFile, readOffset, readLen));
+		state Standalone<VectorRef<KeyValueRef>> dataOriginal = wait(decodeLogFileBlock(inFile, readOffset, readLen));
+
+		// Filter the KV pairs extracted from the log file block to remove any records known to not be needed for this
+		// restore based on the restore range set.
+		state std::vector<KeyValueRef> dataFiltered = filterLogMutationKVPairs(dataOriginal, ranges);

 		state int start = 0;
-		state int end = data.size();
+		state int end = dataFiltered.size();
 		state int dataSizeLimit =
 		    BUGGIFY ? deterministicRandom()->randomInt(256 * 1024, 10e6) : CLIENT_KNOBS->RESTORE_WRITE_TX_SIZE;

@ -3262,8 +3417,8 @@ struct RestoreLogDataTaskFunc : RestoreFileTaskFuncBase {
 				state int i = start;
 				state int txBytes = 0;
 				for (; i < end && txBytes < dataSizeLimit; ++i) {
-					Key k = data[i].key.withPrefix(mutationLogPrefix);
-					ValueRef v = data[i].value;
+					Key k = dataFiltered[i].key.withPrefix(mutationLogPrefix);
+					ValueRef v = dataFiltered[i].value;
 					tr->set(k, v);
 					txBytes += k.expectedSize();
 					txBytes += v.expectedSize();
@ -3291,7 +3446,8 @@ struct RestoreLogDataTaskFunc : RestoreFileTaskFuncBase {
 				    .detail("CommitVersion", tr->getCommittedVersion())
 				    .detail("StartIndex", start)
 				    .detail("EndIndex", i)
-				    .detail("DataSize", data.size())
+				    .detail("RecordCountOriginal", dataOriginal.size())
+				    .detail("RecordCountFiltered", dataFiltered.size())
 				    .detail("Bytes", txBytes)
 				    .detail("TaskInstance", THIS_ADDR);

@ -3845,6 +4001,8 @@ struct StartFullRestoreTaskFunc : RestoreTaskFuncBase {
 		static TaskParam<Version> firstVersion() { return LiteralStringRef(__FUNCTION__); }
 	} Params;

+	// Find all files needed for the restore and save them in the RestoreConfig for the task.
+	// Update the total number of files and blocks and change state to starting.
 	ACTOR static Future<Void> _execute(Database cx,
 	                                   Reference<TaskBucket> taskBucket,
 	                                   Reference<FutureBucket> futureBucket,
@ -3854,6 +4012,7 @@ struct StartFullRestoreTaskFunc : RestoreTaskFuncBase {
 		state Version restoreVersion;
 		state Version beginVersion;
 		state Reference<IBackupContainer> bc;
+		state std::vector<KeyRange> ranges;

 		loop {
 			try {
@ -3861,10 +4020,12 @@ struct StartFullRestoreTaskFunc : RestoreTaskFuncBase {
 				tr->setOption(FDBTransactionOptions::LOCK_AWARE);

 				wait(checkTaskVersion(tr->getDatabase(), task, name, version));
-				Version _restoreVersion = wait(restore.restoreVersion().getOrThrow(tr));
-				restoreVersion = _restoreVersion;
 				Optional<Version> _beginVersion = wait(restore.beginVersion().get(tr));
 				beginVersion = _beginVersion.present() ? _beginVersion.get() : invalidVersion;
+
+				wait(store(restoreVersion, restore.restoreVersion().getOrThrow(tr)));
+				wait(store(ranges, restore.getRestoreRangesOrDefault(tr)));
+
 				wait(taskBucket->keepRunning(tr, task));

 				ERestoreState oldState = wait(restore.stateEnum().getD(tr));
@ -3909,13 +4070,18 @@ struct StartFullRestoreTaskFunc : RestoreTaskFuncBase {
 				wait(tr->onError(e));
 			}
 		}
+
 		Optional<bool> _incremental = wait(restore.incrementalBackupOnly().get(tr));
 		state bool incremental = _incremental.present() ? _incremental.get() : false;
 		if (beginVersion == invalidVersion) {
 			beginVersion = 0;
 		}
+		state Standalone<VectorRef<KeyRangeRef>> keyRangesFilter;
+		for (auto const& r : ranges) {
+			keyRangesFilter.push_back_deep(keyRangesFilter.arena(), KeyRangeRef(r));
+		}
 		Optional<RestorableFileSet> restorable =
-		    wait(bc->getRestoreSet(restoreVersion, VectorRef<KeyRangeRef>(), incremental, beginVersion));
+		    wait(bc->getRestoreSet(restoreVersion, keyRangesFilter, incremental, beginVersion));
 		if (!incremental) {
 			beginVersion = restorable.get().snapshot.beginVersion;
 		}
@ -5034,6 +5200,24 @@ public:
 		return r;
 	}

+	// Submits the restore request to the database and throws "restore_invalid_version" error if
+	// restore is not possible. Parameters:
+	//   cx: the database to be restored to
+	//   cxOrig: if present, is used to resolve the restore timestamp into a version.
+	//   tagName: restore tag
+	//   url: the backup container's URL that contains all backup files
+	//   ranges: the restored key ranges; if empty, restore all key ranges in the backup
+	//   waitForComplete: if set, wait until the restore is completed before returning; otherwise,
+	//                    return when the request is submitted to the database.
+	//   targetVersion: the version to be restored.
+	//   verbose: print verbose information.
+	//   addPrefix: each key is added this prefix during restore.
+	//   removePrefix: for each key to be restored, remove this prefix first.
+	//   lockDB: if set lock the database with randomUid before performing restore;
+	//           otherwise, check database is locked with the randomUid
+	//   incrementalBackupOnly: only perform incremental backup
+	//   beginVersion: restore's begin version
+	//   randomUid: the UID for lock the database
 	ACTOR static Future<Version> restore(FileBackupAgent* backupAgent,
 	                                     Database cx,
 	                                     Optional<Database> cxOrig,
@ -5065,7 +5249,7 @@ public:
 		}

 		Optional<RestorableFileSet> restoreSet =
-		    wait(bc->getRestoreSet(targetVersion, VectorRef<KeyRangeRef>(), incrementalBackupOnly, beginVersion));
+		    wait(bc->getRestoreSet(targetVersion, ranges, incrementalBackupOnly, beginVersion));

 		if (!restoreSet.present()) {
 			TraceEvent(SevWarn, "FileBackupAgentRestoreNotPossible")
--- a/fdbclient/IClientApi.h
+++ b/fdbclient/IClientApi.h
@ -96,6 +96,7 @@ public:

 	virtual Reference<ITransaction> createTransaction() = 0;
 	virtual void setOption(FDBDatabaseOptions::Option option, Optional<StringRef> value = Optional<StringRef>()) = 0;
+	virtual double getMainThreadBusyness() = 0;

 	virtual void addref() = 0;
 	virtual void delref() = 0;
--- a/fdbclient/Knobs.cpp
+++ b/fdbclient/Knobs.cpp
@ -38,6 +38,7 @@ void ClientKnobs::initialize(bool randomize) {
 	init( TOO_MANY,                            1000000 );

 	init( SYSTEM_MONITOR_INTERVAL,                 5.0 );
+	init( NETWORK_BUSYNESS_MONITOR_INTERVAL,       1.0 );

 	init( FAILURE_MAX_DELAY,                       5.0 );
 	init( FAILURE_MIN_DELAY,                       4.0 ); if( randomize && BUGGIFY ) FAILURE_MIN_DELAY = 1.0;
--- a/fdbclient/Knobs.h
+++ b/fdbclient/Knobs.h
@ -30,6 +30,7 @@ public:
 	int TOO_MANY; // FIXME: this should really be split up so we can control these more specifically

 	double SYSTEM_MONITOR_INTERVAL;
+	double NETWORK_BUSYNESS_MONITOR_INTERVAL; // The interval in which we should update the network busyness metric

 	double FAILURE_MAX_DELAY;
 	double FAILURE_MIN_DELAY;
--- a/fdbclient/MultiVersionTransaction.actor.cpp
+++ b/fdbclient/MultiVersionTransaction.actor.cpp
@ -347,6 +347,15 @@ ThreadFuture<Void> DLDatabase::createSnapshot(const StringRef& uid, const String
 	return toThreadFuture<Void>(api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) { return Void(); });
 }

+// Get network thread busyness
+double DLDatabase::getMainThreadBusyness() {
+	if (api->databaseGetMainThreadBusyness != nullptr) {
+		return api->databaseGetMainThreadBusyness(db);
+	}
+
+	return 0;
+}
+
 // DLApi
 template <class T>
 void loadClientFunction(T* fp, void* lib, std::string libPath, const char* functionName, bool requireFunction = true) {
@ -360,6 +369,7 @@ void loadClientFunction(T* fp, void* lib, std::string libPath, const char* funct
 DLApi::DLApi(std::string fdbCPath, bool unlinkOnLoad)
  : api(new FdbCApi()), fdbCPath(fdbCPath), unlinkOnLoad(unlinkOnLoad), networkSetup(false) {}

+// Loads client API functions (definitions are in FdbCApi struct)
 void DLApi::init() {
 	if (isLibraryLoaded(fdbCPath.c_str())) {
 		throw external_client_already_loaded();
@ -388,6 +398,11 @@ void DLApi::init() {

 	loadClientFunction(&api->databaseCreateTransaction, lib, fdbCPath, "fdb_database_create_transaction");
 	loadClientFunction(&api->databaseSetOption, lib, fdbCPath, "fdb_database_set_option");
+	loadClientFunction(&api->databaseGetMainThreadBusyness,
+	                   lib,
+	                   fdbCPath,
+	                   "fdb_database_get_main_thread_busyness",
+	                   headerVersion >= 700);
 	loadClientFunction(&api->databaseDestroy, lib, fdbCPath, "fdb_database_destroy");
 	loadClientFunction(&api->databaseRebootWorker, lib, fdbCPath, "fdb_database_reboot_worker", headerVersion >= 700);
 	loadClientFunction(&api->databaseForceRecoveryWithDataLoss,
@ -917,6 +932,15 @@ ThreadFuture<Void> MultiVersionDatabase::createSnapshot(const StringRef& uid, co
 	return abortableFuture(f, dbState->dbVar->get().onChange);
 }

+// Get network thread busyness
+double MultiVersionDatabase::getMainThreadBusyness() {
+	if (dbState->db) {
+		return dbState->db->getMainThreadBusyness();
+	}
+
+	return 0;
+}
+
 void MultiVersionDatabase::Connector::connect() {
 	addref();
 	onMainThreadVoid(
--- a/fdbclient/MultiVersionTransaction.h
+++ b/fdbclient/MultiVersionTransaction.h
@ -80,6 +80,7 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
 	                                     int uidLength,
 	                                     uint8_t const* snapshotCommmand,
 	                                     int snapshotCommandLength);
+	double (*databaseGetMainThreadBusyness)(FDBDatabase* database);

 	// Transaction
 	fdb_error_t (*transactionSetOption)(FDBTransaction* tr,
@ -262,6 +263,7 @@ public:

 	Reference<ITransaction> createTransaction() override;
 	void setOption(FDBDatabaseOptions::Option option, Optional<StringRef> value = Optional<StringRef>()) override;
+	double getMainThreadBusyness() override;

 	void addref() override { ThreadSafeReferenceCounted<DLDatabase>::addref(); }
 	void delref() override { ThreadSafeReferenceCounted<DLDatabase>::delref(); }
@ -422,6 +424,7 @@ public:

 	Reference<ITransaction> createTransaction() override;
 	void setOption(FDBDatabaseOptions::Option option, Optional<StringRef> value = Optional<StringRef>()) override;
+	double getMainThreadBusyness() override;

 	void addref() override { ThreadSafeReferenceCounted<MultiVersionDatabase>::addref(); }
 	void delref() override { ThreadSafeReferenceCounted<MultiVersionDatabase>::delref(); }
--- a/fdbclient/NativeAPI.actor.cpp
+++ b/fdbclient/NativeAPI.actor.cpp
@ -1743,6 +1743,30 @@ void setNetworkOption(FDBNetworkOptions::Option option, Optional<StringRef> valu
 	}
 }

+// update the network busyness on a 1s cadence
+ACTOR Future<Void> monitorNetworkBusyness() {
+	state double prevTime = now();
+	loop {
+		wait(delay(CLIENT_KNOBS->NETWORK_BUSYNESS_MONITOR_INTERVAL, TaskPriority::FlushTrace));
+		double elapsed = now() - prevTime; // get elapsed time from last execution
+		prevTime = now();
+		struct NetworkMetrics::PriorityStats& tracker = g_network->networkInfo.metrics.starvationTrackerNetworkBusyness;
+
+		if (tracker.active) { // update metrics
+			tracker.duration += now() - tracker.windowedTimer;
+			tracker.maxDuration = std::max(tracker.maxDuration, now() - tracker.timer);
+			tracker.windowedTimer = now();
+		}
+
+		g_network->networkInfo.metrics.networkBusyness =
+		    std::min(elapsed, tracker.duration) / elapsed; // average duration spent doing "work"
+
+		tracker.duration = 0;
+		tracker.maxDuration = 0;
+	}
+}
+
+// Setup g_network and start monitoring for network busyness
 void setupNetwork(uint64_t transportId, bool useMetrics) {
 	if (g_network)
 		throw network_already_setup();
@ -1756,6 +1780,8 @@ void setupNetwork(uint64_t transportId, bool useMetrics) {
 	g_network->addStopCallback(TLS::DestroyOpenSSLGlobalState);
 	FlowTransport::createInstance(true, transportId);
 	Net2FileSystem::newFileSystem();
+
+	uncancellable(monitorNetworkBusyness());
 }

 void runNetwork() {
--- a/fdbclient/ThreadSafeTransaction.cpp
+++ b/fdbclient/ThreadSafeTransaction.cpp
@ -91,6 +91,12 @@ ThreadFuture<Void> ThreadSafeDatabase::createSnapshot(const StringRef& uid, cons
 	return onMainThread([db, snapUID, cmd]() -> Future<Void> { return db->createSnapshot(snapUID, cmd); });
 }

+// Return the main network thread busyness
+double ThreadSafeDatabase::getMainThreadBusyness() {
+	ASSERT(g_network);
+	return g_network->networkInfo.metrics.networkBusyness;
+}
+
 ThreadSafeDatabase::ThreadSafeDatabase(std::string connFilename, int apiVersion) {
 	ClusterConnectionFile* connFile =
 	    new ClusterConnectionFile(ClusterConnectionFile::lookupClusterFileName(connFilename).first);
--- a/fdbclient/ThreadSafeTransaction.h
+++ b/fdbclient/ThreadSafeTransaction.h
@ -35,6 +35,7 @@ public:
 	Reference<ITransaction> createTransaction() override;

 	void setOption(FDBDatabaseOptions::Option option, Optional<StringRef> value = Optional<StringRef>()) override;
+	double getMainThreadBusyness() override;

 	ThreadFuture<Void>
 	onConnected(); // Returns after a majority of coordination servers are available and have reported a leader. The
--- a/fdbrpc/AsyncFileNonDurable.actor.h
+++ b/fdbrpc/AsyncFileNonDurable.actor.h
@ -145,8 +145,17 @@ private:
 	// The maximum amount of time a write is delayed before being passed along to the underlying file
 	double maxWriteDelay;

-	// Modifications which haven't been pushed to file, mapped by the location in the file that is being modified
+	// Modifications which haven't been pushed to file, mapped by the location in the file that is being modified.
+	// Be sure to update minSizeAfterPendingModifications when modifying pendingModifications.
 	RangeMap<uint64_t, Future<Void>> pendingModifications;
+	// The size of the file after the set of pendingModifications completes,
+	// (the set pending at the time of reading this member). Must be updated in
+	// lockstep with any inserts into the pendingModifications map. Tracking
+	// this variable is necessary so that we can know the range of the file a
+	// truncate is modifying, so we can insert it into the pendingModifications
+	// map. Until minSizeAfterPendingModificationsIsExact is true, this is only a lower bound.
+	mutable int64_t minSizeAfterPendingModifications = 0;
+	mutable bool minSizeAfterPendingModificationsIsExact = false;

 	// Will be blocked whenever kill is running
 	Promise<Void> killed;
@ -437,6 +446,7 @@ private:
 			Future<Void> writeEnded = wait(ownFuture);
 			std::vector<Future<Void>> priorModifications =
 			    self->getModificationsAndInsert(offset, length, true, writeEnded);
+			self->minSizeAfterPendingModifications = std::max(self->minSizeAfterPendingModifications, offset + length);

 			if (BUGGIFY_WITH_PROB(0.001))
 				priorModifications.push_back(
@ -603,9 +613,19 @@ private:
 			//TraceEvent("AsyncFileNonDurable_Truncate", self->id).detail("Delay", delayDuration).detail("Filename", self->filename);
 			wait(checkKilled(self, "Truncate"));

-			Future<Void> truncateEnded = wait(ownFuture);
+			state Future<Void> truncateEnded = wait(ownFuture);
+
+			// Need to know the size of the file directly before this truncate
+			// takes effect to see what range it modifies.
+			if (!self->minSizeAfterPendingModificationsIsExact) {
+				wait(success(self->size()));
+			}
+			ASSERT(self->minSizeAfterPendingModificationsIsExact);
+			int64_t beginModifiedRange = std::min(size, self->minSizeAfterPendingModifications);
+			self->minSizeAfterPendingModifications = size;
+
 			std::vector<Future<Void>> priorModifications =
-			    self->getModificationsAndInsert(size, -1, true, truncateEnded);
+			    self->getModificationsAndInsert(beginModifiedRange, /*through end of file*/ -1, true, truncateEnded);

 			if (BUGGIFY_WITH_PROB(0.001))
 				priorModifications.push_back(
@ -751,8 +771,9 @@ private:
 		wait(checkKilled(self, "SizeEnd"));

 		// Include any modifications which extend past the end of the file
-		uint64_t maxModification = self->pendingModifications.lastItem().begin();
-		self->approximateSize = std::max<int64_t>(sizeFuture.get(), maxModification);
+		self->approximateSize = self->minSizeAfterPendingModifications =
+		    std::max<int64_t>(sizeFuture.get(), self->minSizeAfterPendingModifications);
+		self->minSizeAfterPendingModificationsIsExact = true;
 		return self->approximateSize;
 	}

--- a/fdbrpc/IAsyncFile.actor.cpp
+++ b/fdbrpc/IAsyncFile.actor.cpp
@ -182,3 +182,22 @@ TEST_CASE("/fileio/rename") {
 	wait(IAsyncFileSystem::filesystem()->deleteFile(renamedFile, true));
 	return Void();
 }
+
+// Truncating to extend size should zero the new data
+TEST_CASE("/fileio/truncateAndRead") {
+	state std::string filename = "/tmp/__JUNK__";
+	state Reference<IAsyncFile> f = wait(IAsyncFileSystem::filesystem()->open(
+	    filename, IAsyncFile::OPEN_ATOMIC_WRITE_AND_CREATE | IAsyncFile::OPEN_CREATE | IAsyncFile::OPEN_READWRITE, 0));
+	state std::array<char, 4096> data;
+	wait(f->sync());
+	wait(f->truncate(4096));
+	int length = wait(f->read(&data[0], 4096, 0));
+	ASSERT(length == 4096);
+	for (auto c : data) {
+		ASSERT(c == '\0');
+	}
+	// close the file by deleting the reference
+	f.clear();
+	wait(IAsyncFileSystem::filesystem()->incrementalDeleteFile(filename, true));
+	return Void();
+}
--- a/fdbrpc/sim2.actor.cpp
+++ b/fdbrpc/sim2.actor.cpp
@ -712,6 +712,7 @@ private:
 		return Void();
 	}

+	// Simulated sync does not actually do anything besides wait a random amount of time
 	ACTOR static Future<Void> sync_impl(SimpleFile* self) {
 		state UID opId = deterministicRandom()->randomUniqueID();
 		if (randLog)
@ -737,7 +738,6 @@ private:
 				    .detail("FileCount", machineCache.count(self->filename));
 				renameFile(sourceFilename.c_str(), self->filename.c_str());

-				ASSERT(!machineCache.count(self->filename));
 				machineCache[self->filename] = machineCache[sourceFilename];
 				machineCache.erase(sourceFilename);
 				self->actualFilename = self->filename;
@ -2436,19 +2436,19 @@ Future<Reference<class IAsyncFile>> Sim2FileSystem::open(const std::string& file
 	if (flags & IAsyncFile::OPEN_UNCACHED) {
 		auto& machineCache = g_simulator.getCurrentProcess()->machine->openFiles;
 		std::string actualFilename = filename;
-		if (machineCache.find(filename) == machineCache.end()) {
-			if (flags & IAsyncFile::OPEN_ATOMIC_WRITE_AND_CREATE) {
-				actualFilename = filename + ".part";
-				auto partFile = machineCache.find(actualFilename);
-				if (partFile != machineCache.end()) {
-					Future<Reference<IAsyncFile>> f = AsyncFileDetachable::open(partFile->second);
-					if (FLOW_KNOBS->PAGE_WRITE_CHECKSUM_HISTORY > 0)
-						f = map(f, [=](Reference<IAsyncFile> r) {
-							return Reference<IAsyncFile>(new AsyncFileWriteChecker(r));
-						});
-					return f;
-				}
+		if (flags & IAsyncFile::OPEN_ATOMIC_WRITE_AND_CREATE) {
+			actualFilename = filename + ".part";
+			auto partFile = machineCache.find(actualFilename);
+			if (partFile != machineCache.end()) {
+				Future<Reference<IAsyncFile>> f = AsyncFileDetachable::open(partFile->second);
+				if (FLOW_KNOBS->PAGE_WRITE_CHECKSUM_HISTORY > 0)
+					f = map(f, [=](Reference<IAsyncFile> r) {
+						return Reference<IAsyncFile>(new AsyncFileWriteChecker(r));
+					});
+				return f;
 			}
+		}
+		if (machineCache.find(actualFilename) == machineCache.end()) {
 			// Simulated disk parameters are shared by the AsyncFileNonDurable and the underlying SimpleFile.
 			// This way, they can both keep up with the time to start the next operation
 			auto diskParameters =
--- a/fdbrpc/simulator.h
+++ b/fdbrpc/simulator.h
@ -391,6 +391,7 @@ public:
 	std::string disablePrimary;
 	std::string disableRemote;
 	std::string originalRegions;
+	std::string startingDisabledConfiguration;
 	bool allowLogSetKills;
 	Optional<Standalone<StringRef>> remoteDcId;
 	bool hasSatelliteReplication;
--- a/fdbserver/ClusterController.actor.cpp
+++ b/fdbserver/ClusterController.actor.cpp
@ -339,7 +339,7 @@ public:
 	                                              bool checkStable = false,
 	                                              std::set<Optional<Key>> dcIds = std::set<Optional<Key>>(),
 	                                              std::vector<UID> exclusionWorkerIds = {}) {
-		std::map<std::pair<ProcessClass::Fitness, bool>, vector<WorkerDetails>> fitness_workers;
+		std::map<std::tuple<ProcessClass::Fitness, int, bool, bool>, vector<WorkerDetails>> fitness_workers;
 		std::vector<WorkerDetails> results;
 		std::vector<LocalityData> unavailableLocals;
 		Reference<LocalitySet> logServerSet;
@ -406,80 +406,94 @@ public:
 			}

 			// This worker is a candidate for TLog recruitment.
-			fitness_workers[std::make_pair(fitness, worker_details.degraded)].push_back(worker_details);
+			bool inCCDC = worker_details.interf.locality.dcId() == clusterControllerDcId;
+			fitness_workers[std::make_tuple(fitness, id_used[worker_process_id], worker_details.degraded, inCCDC)]
+			    .push_back(worker_details);
 		}

-		results.reserve(results.size() + id_worker.size());
-		for (int fitness = ProcessClass::BestFit; fitness != ProcessClass::NeverAssign && !bCompleted; fitness++) {
+		//  FIXME: it's not clear whether this is necessary.
+		for (int fitness = ProcessClass::BestFit; fitness != ProcessClass::NeverAssign; fitness++) {
 			auto fitnessEnum = (ProcessClass::Fitness)fitness;
 			for (int addingDegraded = 0; addingDegraded < 2; addingDegraded++) {
-				auto workerItr = fitness_workers.find(std::make_pair(fitnessEnum, (bool)addingDegraded));
-				if (workerItr != fitness_workers.end()) {
-					for (auto& worker : workerItr->second) {
-						logServerMap->add(worker.interf.locality, &worker);
-					}
-				}
+				fitness_workers[std::make_tuple(fitnessEnum, 0, addingDegraded, false)];
+			}
+		}
+		results.reserve(results.size() + id_worker.size());
+		for (auto workerIter = fitness_workers.begin(); workerIter != fitness_workers.end(); ++workerIter) {
+			auto fitness = std::get<0>(workerIter->first);
+			auto used = std::get<1>(workerIter->first);
+			auto addingDegraded = std::get<2>(workerIter->first);
+			ASSERT(fitness < ProcessClass::NeverAssign);
+			if (bCompleted) {
+				break;
+			}

-				if (logServerSet->size() < (addingDegraded == 0 ? desired : required)) {
-				} else if (logServerSet->size() == required || logServerSet->size() <= desired) {
-					if (logServerSet->validate(policy)) {
-						for (auto& object : logServerMap->getObjects()) {
-							results.push_back(*object);
-						}
-						bCompleted = true;
-						break;
+			for (auto& worker : workerIter->second) {
+				logServerMap->add(worker.interf.locality, &worker);
+			}
+
+			if (logServerSet->size() < (std::get<2>(workerIter->first) ? required : desired)) {
+			} else if (logServerSet->size() == required || logServerSet->size() <= desired) {
+				if (logServerSet->validate(policy)) {
+					for (auto& object : logServerMap->getObjects()) {
+						results.push_back(*object);
 					}
-					TraceEvent(SevWarn, "GWFTADNotAcceptable", id)
+					bCompleted = true;
+					break;
+				}
+				TraceEvent(SevWarn, "GWFTADNotAcceptable", id)
+				    .detail("DcIds", dcList)
+				    .detail("Fitness", fitness)
+				    .detail("Processes", logServerSet->size())
+				    .detail("Required", required)
+				    .detail("TLogPolicy", policy->info())
+				    .detail("DesiredLogs", desired)
+				    .detail("Used", used)
+				    .detail("AddingDegraded", addingDegraded);
+			}
+			// Try to select the desired size, if larger
+			else {
+				std::vector<LocalityEntry> bestSet;
+				std::vector<LocalityData> tLocalities;
+
+				// Try to find the best team of servers to fulfill the policy
+				if (findBestPolicySet(bestSet,
+				                      logServerSet,
+				                      policy,
+				                      desired,
+				                      SERVER_KNOBS->POLICY_RATING_TESTS,
+				                      SERVER_KNOBS->POLICY_GENERATIONS)) {
+					results.reserve(results.size() + bestSet.size());
+					for (auto& entry : bestSet) {
+						auto object = logServerMap->getObject(entry);
+						ASSERT(object);
+						results.push_back(*object);
+						tLocalities.push_back(object->interf.locality);
+					}
+					TraceEvent("GWFTADBestResults", id)
 					    .detail("DcIds", dcList)
 					    .detail("Fitness", fitness)
+					    .detail("Used", used)
 					    .detail("Processes", logServerSet->size())
-					    .detail("Required", required)
-					    .detail("TLogPolicy", policy->info())
-					    .detail("DesiredLogs", desired)
-					    .detail("AddingDegraded", addingDegraded);
-				}
-				// Try to select the desired size, if larger
-				else {
-					std::vector<LocalityEntry> bestSet;
-					std::vector<LocalityData> tLocalities;
-
-					// Try to find the best team of servers to fulfill the policy
-					if (findBestPolicySet(bestSet,
-					                      logServerSet,
-					                      policy,
-					                      desired,
-					                      SERVER_KNOBS->POLICY_RATING_TESTS,
-					                      SERVER_KNOBS->POLICY_GENERATIONS)) {
-						results.reserve(results.size() + bestSet.size());
-						for (auto& entry : bestSet) {
-							auto object = logServerMap->getObject(entry);
-							ASSERT(object);
-							results.push_back(*object);
-							tLocalities.push_back(object->interf.locality);
-						}
-						TraceEvent("GWFTADBestResults", id)
-						    .detail("DcIds", dcList)
-						    .detail("Fitness", fitness)
-						    .detail("Processes", logServerSet->size())
-						    .detail("BestCount", bestSet.size())
-						    .detail("BestZones", ::describeZones(tLocalities))
-						    .detail("BestDataHalls", ::describeDataHalls(tLocalities))
-						    .detail("TLogPolicy", policy->info())
-						    .detail("TotalResults", results.size())
-						    .detail("DesiredLogs", desired)
-						    .detail("AddingDegraded", addingDegraded);
-						bCompleted = true;
-						break;
-					}
-					TraceEvent(SevWarn, "GWFTADNoBest", id)
-					    .detail("DcIds", dcList)
-					    .detail("Fitness", fitness)
-					    .detail("Processes", logServerSet->size())
-					    .detail("Required", required)
+					    .detail("BestCount", bestSet.size())
+					    .detail("BestZones", ::describeZones(tLocalities))
+					    .detail("BestDataHalls", ::describeDataHalls(tLocalities))
 					    .detail("TLogPolicy", policy->info())
+					    .detail("TotalResults", results.size())
 					    .detail("DesiredLogs", desired)
 					    .detail("AddingDegraded", addingDegraded);
+					bCompleted = true;
+					break;
 				}
+				TraceEvent(SevWarn, "GWFTADNoBest", id)
+				    .detail("DcIds", dcList)
+				    .detail("Fitness", fitness)
+				    .detail("Used", used)
+				    .detail("Processes", logServerSet->size())
+				    .detail("Required", required)
+				    .detail("TLogPolicy", policy->info())
+				    .detail("DesiredLogs", desired)
+				    .detail("AddingDegraded", addingDegraded);
 			}
 		}

@ -1157,12 +1171,14 @@ public:
 					                                                    req.configuration,
 					                                                    used,
 					                                                    first_commit_proxy);
+
 					auto grv_proxies = getWorkersForRoleInDatacenter(dcId,
 					                                                 ProcessClass::GrvProxy,
 					                                                 req.configuration.getDesiredGrvProxies(),
 					                                                 req.configuration,
 					                                                 used,
 					                                                 first_grv_proxy);
+
 					auto resolvers = getWorkersForRoleInDatacenter(dcId,
 					                                               ProcessClass::Resolver,
 					                                               req.configuration.getDesiredResolvers(),
@ -1216,6 +1232,7 @@ public:
 			}

 			if (bestDC != clusterControllerDcId) {
+				TraceEvent("BestDCIsNotClusterDC");
 				vector<Optional<Key>> dcPriority;
 				dcPriority.push_back(bestDC);
 				desiredDcIds.set(dcPriority);
@ -1321,6 +1338,9 @@ public:
 	}

 	// FIXME: determine when to fail the cluster controller when a primaryDC has not been set
+
+	// This function returns true when the cluster controller determines it is worth forcing
+	// a master recovery in order to change the recruited processes in the transaction subsystem.
 	bool betterMasterExists() {
 		const ServerDBInfo dbi = db.serverInfo->get();

@ -1488,13 +1508,15 @@ public:

 		bool oldSatelliteFallback = false;

-		for (auto& logSet : dbi.logSystemConfig.tLogs) {
-			if (region.satelliteTLogPolicy.isValid() && logSet.isLocal && logSet.locality == tagLocalitySatellite) {
-				oldSatelliteFallback = logSet.tLogPolicy->info() != region.satelliteTLogPolicy->info();
-				ASSERT(!oldSatelliteFallback ||
-				       (region.satelliteTLogPolicyFallback.isValid() &&
-				        logSet.tLogPolicy->info() == region.satelliteTLogPolicyFallback->info()));
-				break;
+		if (region.satelliteTLogPolicyFallback.isValid()) {
+			for (auto& logSet : dbi.logSystemConfig.tLogs) {
+				if (region.satelliteTLogPolicy.isValid() && logSet.isLocal && logSet.locality == tagLocalitySatellite) {
+					oldSatelliteFallback = logSet.tLogPolicy->info() != region.satelliteTLogPolicy->info();
+					ASSERT(!oldSatelliteFallback ||
+					       (region.satelliteTLogPolicyFallback.isValid() &&
+					        logSet.tLogPolicy->info() == region.satelliteTLogPolicyFallback->info()));
+					break;
+				}
 			}
 		}

@ -1629,12 +1651,26 @@ public:
 			return false;
 		}

+		// Because a configuration with fewer proxies or resolvers does not cause this function to fail,
+		// we need an extra check to determine if the total number of processes has been reduced.
+		// This is mainly helpful in avoiding situations where killing a degraded process
+		// would result in a configuration with less total processes than desired.
+		if (oldTLogFit.count + oldInFit.proxy.count + oldInFit.grvProxy.count + oldInFit.resolver.count >
+		    newTLogFit.count + newInFit.proxy.count + newInFit.grvProxy.count + newInFit.resolver.count) {
+			return false;
+		}
+
 		// Check backup worker fitness
 		RoleFitness oldBackupWorkersFit(backup_workers, ProcessClass::Backup);
 		const int nBackup = backup_addresses.size();
-		RoleFitness newBackupWorkersFit(
-		    getWorkersForRoleInDatacenter(clusterControllerDcId, ProcessClass::Backup, nBackup, db.config, id_used),
-		    ProcessClass::Backup);
+		RoleFitness newBackupWorkersFit(getWorkersForRoleInDatacenter(clusterControllerDcId,
+		                                                              ProcessClass::Backup,
+		                                                              nBackup,
+		                                                              db.config,
+		                                                              id_used,
+		                                                              Optional<WorkerFitnessInfo>(),
+		                                                              true),
+		                                ProcessClass::Backup);

 		if (oldTLogFit > newTLogFit || oldInFit > newInFit || oldSatelliteTLogFit > newSatelliteTLogFit ||
 		    oldRemoteTLogFit > newRemoteTLogFit || oldLogRoutersFit > newLogRoutersFit ||
--- a/fdbserver/DataDistribution.actor.cpp
+++ b/fdbserver/DataDistribution.actor.cpp
@ -658,6 +658,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {

 	AsyncTrigger printDetailedTeamsInfo;
 	PromiseStream<GetMetricsRequest> getShardMetrics;
+	Promise<UID> removeFailedServer;

 	void resetLocalitySet() {
 		storageServerSet = Reference<LocalitySet>(new LocalityMap<UID>());
@ -695,7 +696,8 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 	                 Reference<AsyncVar<bool>> zeroHealthyTeams,
 	                 bool primary,
 	                 Reference<AsyncVar<bool>> processingUnhealthy,
-	                 PromiseStream<GetMetricsRequest> getShardMetrics)
+	                 PromiseStream<GetMetricsRequest> getShardMetrics,
+	                 Promise<UID> removeFailedServer)
 	  : cx(cx), distributorId(distributorId), lock(lock), output(output),
 	    shardsAffectedByTeamFailure(shardsAffectedByTeamFailure), doBuildTeams(true), lastBuildTeamsFailed(false),
 	    teamBuilder(Void()), badTeamRemover(Void()), checkInvalidLocalities(Void()), wrongStoreTypeRemover(Void()),
@ -710,7 +712,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 	    zeroHealthyTeams(zeroHealthyTeams), zeroOptimalTeams(true), primary(primary),
 	    medianAvailableSpace(SERVER_KNOBS->MIN_AVAILABLE_SPACE_RATIO), lastMedianAvailableSpaceUpdate(0),
 	    processingUnhealthy(processingUnhealthy), lowestUtilizationTeam(0), highestUtilizationTeam(0),
-	    getShardMetrics(getShardMetrics) {
+	    getShardMetrics(getShardMetrics), removeFailedServer(removeFailedServer) {
 		if (!primary || configuration.usableRegions == 1) {
 			TraceEvent("DDTrackerStarting", distributorId).detail("State", "Inactive").trackLatest("DDTrackerStarting");
 		}
@ -718,6 +720,13 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {

 	~DDTeamCollection() {
 		TraceEvent("DDTeamCollectionDestructed", distributorId).detail("Primary", primary);
+
+		// Cancel the teamBuilder to avoid creating new teams after teams are cancelled.
+		teamBuilder.cancel();
+		// TraceEvent("DDTeamCollectionDestructed", distributorId)
+		//    .detail("Primary", primary)
+		//    .detail("TeamBuilderDestroyed", server_info.size());
+
 		// Other teamCollections also hold pointer to this teamCollection;
 		// TeamTracker may access the destructed DDTeamCollection if we do not reset the pointer
 		for (int i = 0; i < teamCollections.size(); i++) {
@ -754,12 +763,8 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 			info->collection = nullptr;
 		}
 		// TraceEvent("DDTeamCollectionDestructed", distributorId)
-		//     .detail("Primary", primary)
-		//     .detail("ServerTrackerDestroyed", server_info.size());
-		teamBuilder.cancel();
-		// TraceEvent("DDTeamCollectionDestructed", distributorId)
-		//     .detail("Primary", primary)
-		//     .detail("TeamBuilderDestroyed", server_info.size());
+		//    .detail("Primary", primary)
+		//    .detail("ServerTrackerDestroyed", server_info.size());
 	}

 	void addLaggingStorageServer(Key zoneId) {
@ -4145,10 +4150,14 @@ ACTOR Future<Void> storageServerTracker(
 					TraceEvent(SevWarn, "FailedServerRemoveKeys", self->distributorId)
 					    .detail("Server", server->id)
 					    .detail("Excluded", worstAddr.toString());
-					wait(removeKeysFromFailedServer(cx, server->id, self->lock, ddEnabledState));
-					if (BUGGIFY)
-						wait(delay(5.0));
-					self->shardsAffectedByTeamFailure->eraseServer(server->id);
+					wait(delay(0.0)); //Do not throw an error while still inside trackExcludedServers
+					while (!ddEnabledState->isDDEnabled()) {
+						wait(delay(1.0));
+					}
+					if (self->removeFailedServer.canBeSet()) {
+						self->removeFailedServer.send(server->id);
+					}
+					throw movekeys_conflict();
 				}
 			}

@ -4944,6 +4953,7 @@ ACTOR Future<Void> monitorBatchLimitedTime(Reference<AsyncVar<ServerDBInfo>> db,
 	}
 }

+// Runs the data distribution algorithm for FDB, including the DD Queue, DD tracker, and DD team collection
 ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self,
                                    PromiseStream<GetMetricsListRequest> getShardMetricsList,
                                    const DDEnabledState* ddEnabledState) {
@ -4973,7 +4983,7 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self,
 		// Stored outside of data distribution tracker to avoid slow tasks
 		// when tracker is cancelled
 		state KeyRangeMap<ShardTrackedData> shards;
-
+		state Promise<UID> removeFailedServer;
 		try {
 			loop {
 				TraceEvent("DDInitTakingMoveKeysLock", self->ddId);
@ -5204,7 +5214,8 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self,
 			    zeroHealthyTeams[0],
 			    true,
 			    processingUnhealthy,
-			    getShardMetrics);
+			    getShardMetrics,
+			    removeFailedServer);
 			teamCollectionsPtrs.push_back(primaryTeamCollection.getPtr());
 			if (configuration.usableRegions > 1) {
 				remoteTeamCollection =
@ -5220,7 +5231,8 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self,
 				                                    zeroHealthyTeams[1],
 				                                    false,
 				                                    processingUnhealthy,
-				                                    getShardMetrics);
+				                                    getShardMetrics,
+				                                    removeFailedServer);
 				teamCollectionsPtrs.push_back(remoteTeamCollection.getPtr());
 				remoteTeamCollection->teamCollections = teamCollectionsPtrs;
 				actors.push_back(
@ -5252,12 +5264,21 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self,
 			primaryTeamCollection = Reference<DDTeamCollection>();
 			remoteTeamCollection = Reference<DDTeamCollection>();
 			wait(shards.clearAsync());
-			if (err.code() != error_code_movekeys_conflict)
-				throw err;
-			bool ddEnabled = wait(isDataDistributionEnabled(cx, ddEnabledState));
-			TraceEvent("DataDistributionMoveKeysConflict").detail("DataDistributionEnabled", ddEnabled).error(err);
-			if (ddEnabled)
-				throw err;
+			TraceEvent("DataDistributorTeamCollectionsDestroyed").error(err);
+			if (removeFailedServer.getFuture().isReady() && !removeFailedServer.getFuture().isError()) {
+				TraceEvent("RemoveFailedServer", removeFailedServer.getFuture().get()).error(err);
+				wait(removeKeysFromFailedServer(cx, removeFailedServer.getFuture().get(), lock, ddEnabledState));
+				wait(removeStorageServer(cx, removeFailedServer.getFuture().get(), lock, ddEnabledState));
+			} else {
+				if (err.code() != error_code_movekeys_conflict) {
+					throw err;
+				}
+				bool ddEnabled = wait(isDataDistributionEnabled(cx, ddEnabledState));
+				TraceEvent("DataDistributionMoveKeysConflict").detail("DataDistributionEnabled", ddEnabled).error(err);
+				if (ddEnabled) {
+					throw err;
+				}
+			}
 		}
 	}
 }
@ -5682,7 +5703,8 @@ std::unique_ptr<DDTeamCollection> testTeamCollection(int teamSize,
 	                                                           makeReference<AsyncVar<bool>>(true),
 	                                                           true,
 	                                                           makeReference<AsyncVar<bool>>(false),
-	                                                           PromiseStream<GetMetricsRequest>()));
+	                                                           PromiseStream<GetMetricsRequest>(),
+	                                                           Promise<UID>()));

 	for (int id = 1; id <= processCount; ++id) {
 		UID uid(id, 0);
@ -5723,7 +5745,8 @@ std::unique_ptr<DDTeamCollection> testMachineTeamCollection(int teamSize,
 	                                                           makeReference<AsyncVar<bool>>(true),
 	                                                           true,
 	                                                           makeReference<AsyncVar<bool>>(false),
-	                                                           PromiseStream<GetMetricsRequest>()));
+	                                                           PromiseStream<GetMetricsRequest>(),
+	                                                           Promise<UID>()));

 	for (int id = 1; id <= processCount; id++) {
 		UID uid(id, 0);
--- a/fdbserver/DataDistribution.actor.h
+++ b/fdbserver/DataDistribution.actor.h
@ -178,7 +178,6 @@ public:
 	void moveShard(KeyRangeRef keys, std::vector<Team> destinationTeam);
 	void finishMove(KeyRangeRef keys);
 	void check();
-	void eraseServer(UID ssID);

 private:
 	struct OrderByTeamKey {
--- a/fdbserver/DataDistributionTracker.actor.cpp
+++ b/fdbserver/DataDistributionTracker.actor.cpp
@ -999,10 +999,6 @@ void ShardsAffectedByTeamFailure::erase(Team team, KeyRange const& range) {
 	}
 }

-void ShardsAffectedByTeamFailure::eraseServer(UID ssID) {
-	storageServerShards[ssID] = 0;
-}
-
 void ShardsAffectedByTeamFailure::insert(Team team, KeyRange const& range) {
 	if (team_shards.insert(std::pair<Team, KeyRange>(team, range)).second) {
 		for (auto uid = team.servers.begin(); uid != team.servers.end(); ++uid)
--- a/fdbserver/Knobs.cpp
+++ b/fdbserver/Knobs.cpp
@ -492,6 +492,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
 	init( MAX_REBOOT_TIME,                                       5.0 ); if( longReboots ) MAX_REBOOT_TIME = 20.0;
 	init( LOG_DIRECTORY,                                          ".");  // Will be set to the command line flag.
 	init( SERVER_MEM_LIMIT,                                8LL << 30 );
+	init( SYSTEM_MONITOR_FREQUENCY,                              5.0 );

 	//Ratekeeper
 	bool slowRatekeeper = randomize && BUGGIFY;
--- a/fdbserver/Knobs.h
+++ b/fdbserver/Knobs.h
@ -416,6 +416,7 @@ public:
 	double MAX_REBOOT_TIME;
 	std::string LOG_DIRECTORY;
 	int64_t SERVER_MEM_LIMIT;
+	double SYSTEM_MONITOR_FREQUENCY;

 	// Ratekeeper
 	double SMOOTHING_AMOUNT;
--- a/fdbserver/SimulatedCluster.actor.cpp
+++ b/fdbserver/SimulatedCluster.actor.cpp
@ -673,12 +673,15 @@ IPAddress makeIPAddressForSim(bool isIPv6, std::array<int, 4> parts) {

 #include "fdbclient/MonitorLeader.h"

+// Configures the system according to the given specifications in order to run
+// simulation, but with the additional consideration that it is meant to act
+// like a "rebooted" machine, mostly used for restarting tests.
 ACTOR Future<Void> restartSimulatedSystem(vector<Future<Void>>* systemActors,
                                          std::string baseFolder,
                                          int* pTesterCount,
                                          Optional<ClusterConnectionString>* pConnString,
                                          Standalone<StringRef>* pStartingConfiguration,
-                                          int extraDB,
+                                          TestConfig testConfig,
                                          std::string whitelistBinPaths,
                                          ProtocolVersion protocolVersion) {
 	CSimpleIni ini;
@ -698,7 +701,7 @@ ACTOR Future<Void> restartSimulatedSystem(vector<Future<Void>>* systemActors,
 		}
 		int desiredCoordinators = atoi(ini.GetValue("META", "desiredCoordinators"));
 		int testerCount = atoi(ini.GetValue("META", "testerCount"));
-		bool enableExtraDB = (extraDB == 3);
+		bool enableExtraDB = (testConfig.extraDB == 3);
 		ClusterConnectionString conn(ini.GetValue("META", "connectionString"));
 		if (enableExtraDB) {
 			g_simulator.extraDB = new ClusterConnectionString(ini.GetValue("META", "connectionString"));
@ -836,8 +839,9 @@ ACTOR Future<Void> restartSimulatedSystem(vector<Future<Void>>* systemActors,
 	return Void();
 }

+// Configuration details compiled in a structure used when setting up a simulated cluster
 struct SimulationConfig {
-	explicit SimulationConfig(int extraDB, int minimumReplication, int minimumRegions);
+	explicit SimulationConfig(const TestConfig& testConfig);
 	int extraDB;

 	DatabaseConfiguration db;
@ -851,11 +855,11 @@ struct SimulationConfig {
 	int coordinators;

 private:
-	void generateNormalConfig(int minimumReplication, int minimumRegions);
+	void generateNormalConfig(const TestConfig& testConfig);
 };

-SimulationConfig::SimulationConfig(int extraDB, int minimumReplication, int minimumRegions) : extraDB(extraDB) {
-	generateNormalConfig(minimumReplication, minimumRegions);
+SimulationConfig::SimulationConfig(const TestConfig& testConfig) : extraDB(testConfig.extraDB) {
+	generateNormalConfig(testConfig);
 }

 void SimulationConfig::set_config(std::string config) {
@ -871,18 +875,18 @@ StringRef StringRefOf(const char* s) {
 	return StringRef((uint8_t*)s, strlen(s));
 }

-void SimulationConfig::generateNormalConfig(int minimumReplication, int minimumRegions) {
+void SimulationConfig::generateNormalConfig(const TestConfig& testConfig) {
 	set_config("new");
 	const bool simple = false; // Set true to simplify simulation configs for easier debugging
 	// generateMachineTeamTestConfig set up the number of servers per machine and the number of machines such that
 	// if we do not remove the surplus server and machine teams, the simulation test will report error.
 	// This is needed to make sure the number of server (and machine) teams is no larger than the desired number.
 	bool generateMachineTeamTestConfig = BUGGIFY_WITH_PROB(0.1) ? true : false;
-	bool generateFearless = simple ? false : (minimumRegions > 1 || deterministicRandom()->random01() < 0.5);
-	datacenters = simple
-	                  ? 1
-	                  : (generateFearless ? (minimumReplication > 0 || deterministicRandom()->random01() < 0.5 ? 4 : 6)
-	                                      : deterministicRandom()->randomInt(1, 4));
+	bool generateFearless = simple ? false : (testConfig.minimumRegions > 1 || deterministicRandom()->random01() < 0.5);
+	datacenters = simple ? 1
+	                     : (generateFearless
+	                            ? (testConfig.minimumReplication > 0 || deterministicRandom()->random01() < 0.5 ? 4 : 6)
+	                            : deterministicRandom()->randomInt(1, 4));
 	if (deterministicRandom()->random01() < 0.25)
 		db.desiredTLogCount = deterministicRandom()->randomInt(1, 7);
 	if (deterministicRandom()->random01() < 0.25)
@ -892,6 +896,10 @@ void SimulationConfig::generateNormalConfig(int minimumReplication, int minimumR
 	if (deterministicRandom()->random01() < 0.25)
 		db.resolverCount = deterministicRandom()->randomInt(1, 7);
 	int storage_engine_type = deterministicRandom()->randomInt(0, 4);
+	// Continuously re-pick the storage engine type if it's the one we want to exclude
+	while (storage_engine_type == testConfig.storageEngineExcludeType) {
+		storage_engine_type = deterministicRandom()->randomInt(0, 4);
+	}
 	switch (storage_engine_type) {
 	case 0: {
 		TEST(true); // Simulated cluster using ssd storage engine
@ -930,7 +938,7 @@ void SimulationConfig::generateNormalConfig(int minimumReplication, int minimumR
 		db.resolverCount = 1;
 	}
 	int replication_type = simple ? 1
-	                              : (std::max(minimumReplication,
+	                              : (std::max(testConfig.minimumReplication,
 	                                          datacenters > 4 ? deterministicRandom()->randomInt(1, 3)
 	                                                          : std::min(deterministicRandom()->randomInt(0, 6), 3)));
 	switch (replication_type) {
@ -1078,7 +1086,7 @@ void SimulationConfig::generateNormalConfig(int minimumReplication, int minimumR

 			// We cannot run with a remote DC when MAX_READ_TRANSACTION_LIFE_VERSIONS is too small, because the log
 			// routers will not be able to keep up.
-			if (minimumRegions <= 1 &&
+			if (testConfig.minimumRegions <= 1 &&
 			    (deterministicRandom()->random01() < 0.25 ||
 			     SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS < SERVER_KNOBS->VERSIONS_PER_SECOND)) {
 				TEST(true); // Simulated cluster using one region
@ -1124,7 +1132,7 @@ void SimulationConfig::generateNormalConfig(int minimumReplication, int minimumR
 				db.remoteDesiredTLogCount = deterministicRandom()->randomInt(1, 7);

 			bool useNormalDCsAsSatellites =
-			    datacenters > 4 && minimumRegions < 2 && deterministicRandom()->random01() < 0.3;
+			    datacenters > 4 && testConfig.minimumRegions < 2 && deterministicRandom()->random01() < 0.3;
 			StatusObject primarySatelliteObj;
 			primarySatelliteObj["id"] = useNormalDCsAsSatellites ? "1" : "2";
 			primarySatelliteObj["priority"] = 1;
@ -1172,9 +1180,6 @@ void SimulationConfig::generateNormalConfig(int minimumReplication, int minimumR
 			regionArr.push_back(remoteObj);
 		}

-		set_config("regions=" +
-		           json_spirit::write_string(json_spirit::mValue(regionArr), json_spirit::Output_options::none));
-
 		if (needsRemote) {
 			g_simulator.originalRegions = "regions=" + json_spirit::write_string(json_spirit::mValue(regionArr),
 			                                                                     json_spirit::Output_options::none);
@ -1188,10 +1193,15 @@ void SimulationConfig::generateNormalConfig(int minimumReplication, int minimumR
 			disableRemote[1].get_obj()["datacenters"].get_array()[0].get_obj()["priority"] = -1;
 			g_simulator.disableRemote = "regions=" + json_spirit::write_string(json_spirit::mValue(disableRemote),
 			                                                                   json_spirit::Output_options::none);
+		} else {
+			// In order to generate a starting configuration with the remote disabled, do not apply the region
+			// configuration to the DatabaseConfiguration until after creating the starting conf string.
+			set_config("regions=" +
+			           json_spirit::write_string(json_spirit::mValue(regionArr), json_spirit::Output_options::none));
 		}
 	}

-	if (generateFearless && minimumReplication > 1) {
+	if (generateFearless && testConfig.minimumReplication > 1) {
 		// low latency tests in fearless configurations need 4 machines per datacenter (3 for triple replication, 1 that
 		// is down during failures).
 		machine_count = 16;
@ -1216,10 +1226,11 @@ void SimulationConfig::generateNormalConfig(int minimumReplication, int minimumR

 	// because we protect a majority of coordinators from being killed, it is better to run with low numbers of
 	// coordinators to prevent too many processes from being protected
-	coordinators =
-	    (minimumRegions <= 1 && BUGGIFY) ? deterministicRandom()->randomInt(1, std::max(machine_count, 2)) : 1;
+	coordinators = (testConfig.minimumRegions <= 1 && BUGGIFY)
+	                   ? deterministicRandom()->randomInt(1, std::max(machine_count, 2))
+	                   : 1;

-	if (minimumReplication > 1 && datacenters == 3) {
+	if (testConfig.minimumReplication > 1 && datacenters == 3) {
 		// low latency tests in 3 data hall mode need 2 other data centers with 2 machines each to avoid waiting for
 		// logs to recover.
 		machine_count = std::max(machine_count, 6);
@ -1233,26 +1244,24 @@ void SimulationConfig::generateNormalConfig(int minimumReplication, int minimumR
 	}
 }

+// Configures the system according to the given specifications in order to run
+// simulation under the correct conditions
 void setupSimulatedSystem(vector<Future<Void>>* systemActors,
                          std::string baseFolder,
                          int* pTesterCount,
                          Optional<ClusterConnectionString>* pConnString,
                          Standalone<StringRef>* pStartingConfiguration,
-                          int extraDB,
-                          int minimumReplication,
-                          int minimumRegions,
                          std::string whitelistBinPaths,
-                          bool configureLocked,
-                          int logAntiQuorum,
+                          TestConfig testConfig,
                          ProtocolVersion protocolVersion) {
 	// SOMEDAY: this does not test multi-interface configurations
-	SimulationConfig simconfig(extraDB, minimumReplication, minimumRegions);
-	if (logAntiQuorum != -1) {
-		simconfig.db.tLogWriteAntiQuorum = logAntiQuorum;
+	SimulationConfig simconfig(testConfig);
+	if (testConfig.logAntiQuorum != -1) {
+		simconfig.db.tLogWriteAntiQuorum = testConfig.logAntiQuorum;
 	}
 	StatusObject startingConfigJSON = simconfig.db.toJSON(true);
 	std::string startingConfigString = "new";
-	if (configureLocked) {
+	if (testConfig.configureLocked) {
 		startingConfigString += " locked";
 	}
 	for (auto kv : startingConfigJSON) {
@ -1270,6 +1279,12 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors,
 		}
 	}

+	if (g_simulator.originalRegions != "") {
+		simconfig.set_config(g_simulator.originalRegions);
+		g_simulator.startingDisabledConfiguration = startingConfigString + " " + g_simulator.disableRemote;
+		startingConfigString += " " + g_simulator.originalRegions;
+	}
+
 	g_simulator.storagePolicy = simconfig.db.storagePolicy;
 	g_simulator.tLogPolicy = simconfig.db.tLogPolicy;
 	g_simulator.tLogWriteAntiQuorum = simconfig.db.tLogWriteAntiQuorum;
@ -1338,7 +1353,7 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors,
 	TEST(!useIPv6); // Use IPv4

 	vector<NetworkAddress> coordinatorAddresses;
-	if (minimumRegions > 1) {
+	if (testConfig.minimumRegions > 1) {
 		// do not put coordinators in the primary region so that we can kill that region safely
 		int nonPrimaryDcs = dataCenters / 2;
 		for (int dc = 1; dc < dataCenters; dc += 2) {
@ -1409,14 +1424,14 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors,
 	ClusterConnectionString conn(coordinatorAddresses, LiteralStringRef("TestCluster:0"));

 	// If extraDB==0, leave g_simulator.extraDB as null because the test does not use DR.
-	if (extraDB == 1) {
+	if (testConfig.extraDB == 1) {
 		// The DR database can be either a new database or itself
 		g_simulator.extraDB = new ClusterConnectionString(
 		    coordinatorAddresses, BUGGIFY ? LiteralStringRef("TestCluster:0") : LiteralStringRef("ExtraCluster:0"));
-	} else if (extraDB == 2) {
+	} else if (testConfig.extraDB == 2) {
 		// The DR database is a new database
 		g_simulator.extraDB = new ClusterConnectionString(coordinatorAddresses, LiteralStringRef("ExtraCluster:0"));
-	} else if (extraDB == 3) {
+	} else if (testConfig.extraDB == 3) {
 		// The DR database is the same database
 		g_simulator.extraDB = new ClusterConnectionString(coordinatorAddresses, LiteralStringRef("TestCluster:0"));
 	}
@ -1427,7 +1442,7 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors,
 	    .detail("String", conn.toString())
 	    .detail("ConfigString", startingConfigString);

-	bool requiresExtraDBMachines = extraDB && g_simulator.extraDB->toString() != conn.toString();
+	bool requiresExtraDBMachines = testConfig.extraDB && g_simulator.extraDB->toString() != conn.toString();
 	int assignedMachines = 0, nonVersatileMachines = 0;
 	std::vector<ProcessClass::ClassType> processClassesSubSet = { ProcessClass::UnsetClass,
 		                                                          ProcessClass::ResolutionClass,
@ -1602,13 +1617,8 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors,
 	    .detail("StartingConfiguration", pStartingConfiguration->toString());
 }

-void checkTestConf(const char* testFile,
-                   int& extraDB,
-                   int& minimumReplication,
-                   int& minimumRegions,
-                   int& configureLocked,
-                   int& logAntiQuorum,
-                   bool& startIncompatibleProcess) {
+// Populates the TestConfig fields according to what is found in the test file.
+void checkTestConf(const char* testFile, TestConfig* testConfig) {
 	std::ifstream ifs;
 	ifs.open(testFile, std::ifstream::in);
 	if (!ifs.good())
@ -1630,26 +1640,31 @@ void checkTestConf(const char* testFile,
 		std::string value = removeWhitespace(line.substr(found + 1));

 		if (attrib == "extraDB") {
-			sscanf(value.c_str(), "%d", &extraDB);
+			sscanf(value.c_str(), "%d", &testConfig->extraDB);
 		}

 		if (attrib == "minimumReplication") {
-			sscanf(value.c_str(), "%d", &minimumReplication);
+			sscanf(value.c_str(), "%d", &testConfig->minimumReplication);
 		}

 		if (attrib == "minimumRegions") {
-			sscanf(value.c_str(), "%d", &minimumRegions);
+			sscanf(value.c_str(), "%d", &testConfig->minimumRegions);
 		}

 		if (attrib == "configureLocked") {
-			sscanf(value.c_str(), "%d", &configureLocked);
+			sscanf(value.c_str(), "%d", &testConfig->configureLocked);
 		}

 		if (attrib == "startIncompatibleProcess") {
-			startIncompatibleProcess = strcmp(value.c_str(), "true") == 0;
+			testConfig->startIncompatibleProcess = strcmp(value.c_str(), "true") == 0;
 		}
+
 		if (attrib == "logAntiQuorum") {
-			sscanf(value.c_str(), "%d", &logAntiQuorum);
+			sscanf(value.c_str(), "%d", &testConfig->logAntiQuorum);
+		}
+
+		if (attrib == "storageEngineExcludeType") {
+			sscanf(value.c_str(), "%d", &testConfig->storageEngineExcludeType);
 		}
 	}

@ -1665,24 +1680,13 @@ ACTOR void setupAndRun(std::string dataFolder,
 	state Optional<ClusterConnectionString> connFile;
 	state Standalone<StringRef> startingConfiguration;
 	state int testerCount = 1;
-	state int extraDB = 0;
-	state int minimumReplication = 0;
-	state int minimumRegions = 0;
-	state int configureLocked = 0;
-	state int logAntiQuorum = -1;
-	state bool startIncompatibleProcess = false;
-	checkTestConf(testFile,
-	              extraDB,
-	              minimumReplication,
-	              minimumRegions,
-	              configureLocked,
-	              logAntiQuorum,
-	              startIncompatibleProcess);
-	g_simulator.hasDiffProtocolProcess = startIncompatibleProcess;
+	state TestConfig testConfig;
+	checkTestConf(testFile, &testConfig);
+	g_simulator.hasDiffProtocolProcess = testConfig.startIncompatibleProcess;
 	g_simulator.setDiffProtocol = false;

 	state ProtocolVersion protocolVersion = currentProtocolVersion;
-	if (startIncompatibleProcess) {
+	if (testConfig.startIncompatibleProcess) {
 		// isolates right most 1 bit of compatibleProtocolVersionMask to make this protocolVersion incompatible
 		uint64_t minAddToMakeIncompatible =
 		    ProtocolVersion::compatibleProtocolVersionMask & ~(ProtocolVersion::compatibleProtocolVersionMask - 1);
@ -1717,7 +1721,7 @@ ACTOR void setupAndRun(std::string dataFolder,
 			                                         &testerCount,
 			                                         &connFile,
 			                                         &startingConfiguration,
-			                                         extraDB,
+			                                         testConfig,
 			                                         whitelistBinPaths,
 			                                         protocolVersion),
 			                  100.0));
@ -1732,12 +1736,8 @@ ACTOR void setupAndRun(std::string dataFolder,
 			                     &testerCount,
 			                     &connFile,
 			                     &startingConfiguration,
-			                     extraDB,
-			                     minimumReplication,
-			                     minimumRegions,
 			                     whitelistBinPaths,
-			                     configureLocked,
-			                     logAntiQuorum,
+			                     testConfig,
 			                     protocolVersion);
 			wait(delay(1.0)); // FIXME: WHY!!!  //wait for machines to boot
 		}
--- a/fdbserver/Status.actor.cpp
+++ b/fdbserver/Status.actor.cpp
@ -1585,7 +1585,7 @@ static JsonBuilderObject configurationFetcher(Optional<DatabaseConfiguration> co

 ACTOR static Future<JsonBuilderObject> dataStatusFetcher(WorkerDetails ddWorker,
                                                         DatabaseConfiguration configuration,
-                                                         int* minReplicasRemaining) {
+                                                         int* minStorageReplicasRemaining) {
 	state JsonBuilderObject statusObjData;

 	try {
@ -1648,9 +1648,9 @@ ACTOR static Future<JsonBuilderObject> dataStatusFetcher(WorkerDetails ddWorker,
 		}

 		JsonBuilderArray teamTrackers;
-		for (int i = 0; i < 2; i++) {
-			TraceEventFields inFlight = dataInfo[3 + i];
-			if (!inFlight.size()) {
+		for (int i = 3; i < 5; i++) {
+			const TraceEventFields& inFlight = dataInfo[i];
+			if (inFlight.size() == 0) {
 				continue;
 			}

@ -1674,19 +1674,16 @@ ACTOR static Future<JsonBuilderObject> dataStatusFetcher(WorkerDetails ddWorker,
 				stateSectionObj["healthy"] = false;
 				stateSectionObj["name"] = "missing_data";
 				stateSectionObj["description"] = "No replicas remain of some data";
-				stateSectionObj["min_replicas_remaining"] = 0;
 				replicas = 0;
 			} else if (highestPriority >= SERVER_KNOBS->PRIORITY_TEAM_1_LEFT) {
 				stateSectionObj["healthy"] = false;
 				stateSectionObj["name"] = "healing";
 				stateSectionObj["description"] = "Only one replica remains of some data";
-				stateSectionObj["min_replicas_remaining"] = 1;
 				replicas = 1;
 			} else if (highestPriority >= SERVER_KNOBS->PRIORITY_TEAM_2_LEFT) {
 				stateSectionObj["healthy"] = false;
 				stateSectionObj["name"] = "healing";
 				stateSectionObj["description"] = "Only two replicas remain of some data";
-				stateSectionObj["min_replicas_remaining"] = 2;
 				replicas = 2;
 			} else if (highestPriority >= SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY) {
 				stateSectionObj["healthy"] = false;
@ -1720,6 +1717,10 @@ ACTOR static Future<JsonBuilderObject> dataStatusFetcher(WorkerDetails ddWorker,
 				stateSectionObj["name"] = "healthy";
 			}

+			// Track the number of min replicas the storage servers in this region has. The sum of the replicas from
+			// both primary and remote region give the total number of data replicas this database currently has.
+			stateSectionObj["min_replicas_remaining"] = replicas;
+
 			if (!stateSectionObj.empty()) {
 				team_tracker["state"] = stateSectionObj;
 				teamTrackers.push_back(team_tracker);
@ -1728,10 +1729,13 @@ ACTOR static Future<JsonBuilderObject> dataStatusFetcher(WorkerDetails ddWorker,
 				}
 			}

+			// Update minStorageReplicasRemaining. It is mainly used for fault tolerance computation later. Note that
+			// FDB treats the entire remote region as one zone, and all the zones in the remote region are in the same
+			// failure domain.
 			if (primary) {
-				*minReplicasRemaining = std::max(*minReplicasRemaining, 0) + replicas;
+				*minStorageReplicasRemaining = std::max(*minStorageReplicasRemaining, 0) + replicas;
 			} else if (replicas > 0) {
-				*minReplicasRemaining = std::max(*minReplicasRemaining, 0) + 1;
+				*minStorageReplicasRemaining = std::max(*minStorageReplicasRemaining, 0) + 1;
 			}
 		}
 		statusObjData["team_trackers"] = teamTrackers;
@ -1850,7 +1854,7 @@ ACTOR static Future<vector<std::pair<GrvProxyInterface, EventMap>>> getGrvProxie
 	return results;
 }

-// Returns the number of zones eligble for recruiting new tLogs after failures, to maintain the current replication
+// Returns the number of zones eligble for recruiting new tLogs after zone failures, to maintain the current replication
 // factor.
 static int getExtraTLogEligibleZones(const vector<WorkerDetails>& workers, const DatabaseConfiguration& configuration) {
 	std::set<StringRef> allZones;
@ -1868,17 +1872,20 @@ static int getExtraTLogEligibleZones(const vector<WorkerDetails>& workers, const
 	if (configuration.regions.size() == 0) {
 		return allZones.size() - std::max(configuration.tLogReplicationFactor, configuration.storageTeamSize);
 	}
+
 	int extraTlogEligibleZones = 0;
 	int regionsWithNonNegativePriority = 0;
-	for (auto& region : configuration.regions) {
+	int maxRequiredReplicationFactor =
+	    std::max(configuration.remoteTLogReplicationFactor,
+	             std::max(configuration.tLogReplicationFactor, configuration.storageTeamSize));
+	for (const auto& region : configuration.regions) {
 		if (region.priority >= 0) {
-			int eligible = dcId_zone[region.dcId].size() -
-			               std::max(configuration.remoteTLogReplicationFactor,
-			                        std::max(configuration.tLogReplicationFactor, configuration.storageTeamSize));
+			int eligible = dcId_zone[region.dcId].size() - maxRequiredReplicationFactor;
+
 			// FIXME: does not take into account fallback satellite policies
 			if (region.satelliteTLogReplicationFactor > 0 && configuration.usableRegions > 1) {
 				int totalSatelliteEligible = 0;
-				for (auto& sat : region.satellites) {
+				for (const auto& sat : region.satellites) {
 					totalSatelliteEligible += dcId_zone[sat.dcId].size();
 				}
 				eligible = std::min<int>(eligible, totalSatelliteEligible - region.satelliteTLogReplicationFactor);
@ -1890,6 +1897,8 @@ static int getExtraTLogEligibleZones(const vector<WorkerDetails>& workers, const
 		}
 	}
 	if (regionsWithNonNegativePriority > 1) {
+		// If the database is replicated across multiple regions, we can afford to lose one entire region without
+		// losing data.
 		extraTlogEligibleZones++;
 	}
 	return extraTlogEligibleZones;
@ -2229,9 +2238,16 @@ static JsonBuilderObject tlogFetcher(int* logFaultTolerance,
 	int minFaultTolerance = 1000;
 	int localSetsWithNonNegativeFaultTolerance = 0;

-	for (int i = 0; i < tLogs.size(); i++) {
+	for (const auto& tLogSet : tLogs) {
+		if (tLogSet.tLogs.size() == 0) {
+			// We can have LogSets where there are no tLogs but some LogRouters. It's the way
+			// recruiting is implemented for old LogRouters in TagPartitionedLogSystem, where
+			// it adds an empty LogSet for missing locality.
+			continue;
+		}
+
 		int failedLogs = 0;
-		for (auto& log : tLogs[i].tLogs) {
+		for (auto& log : tLogSet.tLogs) {
 			JsonBuilderObject logObj;
 			bool failed = !log.present() || !address_workers.count(log.interf().address());
 			logObj["id"] = log.id().shortString();
@ -2245,13 +2261,14 @@ static JsonBuilderObject tlogFetcher(int* logFaultTolerance,
 			}
 		}

-		if (tLogs[i].isLocal) {
-			int currentFaultTolerance = tLogs[i].tLogReplicationFactor - 1 - tLogs[i].tLogWriteAntiQuorum - failedLogs;
+		if (tLogSet.isLocal) {
+			ASSERT_WE_THINK(tLogSet.tLogReplicationFactor > 0);
+			int currentFaultTolerance = tLogSet.tLogReplicationFactor - 1 - tLogSet.tLogWriteAntiQuorum - failedLogs;
 			if (currentFaultTolerance >= 0) {
 				localSetsWithNonNegativeFaultTolerance++;
 			}

-			if (tLogs[i].locality == tagLocalitySatellite) {
+			if (tLogSet.locality == tagLocalitySatellite) {
 				// FIXME: This hack to bump satellite fault tolerance, is to make it consistent
 				//  with 6.2.
 				minFaultTolerance = std::min(minFaultTolerance, currentFaultTolerance + 1);
@ -2260,17 +2277,17 @@ static JsonBuilderObject tlogFetcher(int* logFaultTolerance,
 			}
 		}

-		if (tLogs[i].isLocal && tLogs[i].locality == tagLocalitySatellite) {
-			sat_log_replication_factor = tLogs[i].tLogReplicationFactor;
-			sat_log_write_anti_quorum = tLogs[i].tLogWriteAntiQuorum;
-			sat_log_fault_tolerance = tLogs[i].tLogReplicationFactor - 1 - tLogs[i].tLogWriteAntiQuorum - failedLogs;
-		} else if (tLogs[i].isLocal) {
-			log_replication_factor = tLogs[i].tLogReplicationFactor;
-			log_write_anti_quorum = tLogs[i].tLogWriteAntiQuorum;
-			log_fault_tolerance = tLogs[i].tLogReplicationFactor - 1 - tLogs[i].tLogWriteAntiQuorum - failedLogs;
+		if (tLogSet.isLocal && tLogSet.locality == tagLocalitySatellite) {
+			sat_log_replication_factor = tLogSet.tLogReplicationFactor;
+			sat_log_write_anti_quorum = tLogSet.tLogWriteAntiQuorum;
+			sat_log_fault_tolerance = tLogSet.tLogReplicationFactor - 1 - tLogSet.tLogWriteAntiQuorum - failedLogs;
+		} else if (tLogSet.isLocal) {
+			log_replication_factor = tLogSet.tLogReplicationFactor;
+			log_write_anti_quorum = tLogSet.tLogWriteAntiQuorum;
+			log_fault_tolerance = tLogSet.tLogReplicationFactor - 1 - tLogSet.tLogWriteAntiQuorum - failedLogs;
 		} else {
-			remote_log_replication_factor = tLogs[i].tLogReplicationFactor;
-			remote_log_fault_tolerance = tLogs[i].tLogReplicationFactor - 1 - failedLogs;
+			remote_log_replication_factor = tLogSet.tLogReplicationFactor;
+			remote_log_fault_tolerance = tLogSet.tLogReplicationFactor - 1 - failedLogs;
 		}
 	}
 	if (minFaultTolerance == 1000) {
@ -2313,6 +2330,8 @@ static JsonBuilderArray tlogFetcher(int* logFaultTolerance,
                                    std::unordered_map<NetworkAddress, WorkerInterface> const& address_workers) {
 	JsonBuilderArray tlogsArray;
 	JsonBuilderObject tlogsStatus;
+
+	// First, fetch from the current TLog generation.
 	tlogsStatus = tlogFetcher(logFaultTolerance, db->get().logSystemConfig.tLogs, address_workers);
 	tlogsStatus["epoch"] = db->get().logSystemConfig.epoch;
 	tlogsStatus["current"] = true;
@ -2320,6 +2339,8 @@ static JsonBuilderArray tlogFetcher(int* logFaultTolerance,
 		tlogsStatus["begin_version"] = db->get().logSystemConfig.recoveredAt.get();
 	}
 	tlogsArray.push_back(tlogsStatus);
+
+	// fetch all the old generations of TLogs.
 	for (auto it : db->get().logSystemConfig.oldTLogs) {
 		JsonBuilderObject oldTlogsStatus = tlogFetcher(logFaultTolerance, it.tLogs, address_workers);
 		oldTlogsStatus["epoch"] = it.epoch;
@ -2335,7 +2356,7 @@ static JsonBuilderObject faultToleranceStatusFetcher(DatabaseConfiguration confi
                                                     ServerCoordinators coordinators,
                                                     std::vector<WorkerDetails>& workers,
                                                     int extraTlogEligibleZones,
-                                                     int minReplicasRemaining,
+                                                     int minStorageReplicasRemaining,
                                                     int oldLogFaultTolerance,
                                                     int fullyReplicatedRegions,
                                                     bool underMaintenance) {
@ -2375,8 +2396,8 @@ static JsonBuilderObject faultToleranceStatusFetcher(DatabaseConfiguration confi
 	// max zone failures that we can tolerate to not lose data
 	int zoneFailuresWithoutLosingData = std::min(maxZoneFailures, maxCoordinatorZoneFailures);

-	if (minReplicasRemaining >= 0) {
-		zoneFailuresWithoutLosingData = std::min(zoneFailuresWithoutLosingData, minReplicasRemaining - 1);
+	if (minStorageReplicasRemaining >= 0) {
+		zoneFailuresWithoutLosingData = std::min(zoneFailuresWithoutLosingData, minStorageReplicasRemaining - 1);
 	}

 	// oldLogFaultTolerance means max failures we can tolerate to lose logs data. -1 means we lose data or availability.
@ -2625,10 +2646,9 @@ ACTOR Future<StatusReply> clusterGetStatus(
    Version datacenterVersionDifference) {
 	state double tStart = timer();

-	// Check if master worker is present
 	state JsonBuilderArray messages;
 	state std::set<std::string> status_incomplete_reasons;
-	state WorkerDetails mWorker;
+	state WorkerDetails mWorker; // Master worker
 	state WorkerDetails ddWorker; // DataDistributor worker
 	state WorkerDetails rkWorker; // Ratekeeper worker

@ -2641,6 +2661,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
 			messages.push_back(
 			    JsonString::makeMessage("unreachable_master_worker", "Unable to locate the master worker."));
 		}
+
 		// Get the DataDistributor worker interface
 		Optional<WorkerDetails> _ddWorker;
 		if (db->get().distributor.present()) {
@ -2669,12 +2690,12 @@ ACTOR Future<StatusReply> clusterGetStatus(

 		// Get latest events for various event types from ALL workers
 		// WorkerEvents is a map of worker's NetworkAddress to its event string
-		// The pair represents worker responses and a set of worker NetworkAddress strings which did not respond
+		// The pair represents worker responses and a set of worker NetworkAddress strings which did not respond.
 		std::vector<Future<Optional<std::pair<WorkerEvents, std::set<std::string>>>>> futures;
 		futures.push_back(latestEventOnWorkers(workers, "MachineMetrics"));
 		futures.push_back(latestEventOnWorkers(workers, "ProcessMetrics"));
 		futures.push_back(latestEventOnWorkers(workers, "NetworkMetrics"));
-		futures.push_back(latestErrorOnWorkers(workers));
+		futures.push_back(latestErrorOnWorkers(workers)); // Get all latest errors.
 		futures.push_back(latestEventOnWorkers(workers, "TraceFileOpenError"));
 		futures.push_back(latestEventOnWorkers(workers, "ProgramStart"));

@ -2689,13 +2710,13 @@ ACTOR Future<StatusReply> clusterGetStatus(

 		// For each (optional) pair, if the pair is present and not empty then add the unreachable workers to the set.
 		for (auto pair : workerEventsVec) {
-			if (pair.present() && pair.get().second.size())
+			if (pair.present() && !pair.get().second.empty())
 				mergeUnreachable.insert(pair.get().second.begin(), pair.get().second.end());
 		}

 		// We now have a unique set of workers who were in some way unreachable.  If there is anything in that set,
 		// create a message for it and include the list of unreachable processes.
-		if (mergeUnreachable.size()) {
+		if (!mergeUnreachable.empty()) {
 			JsonBuilderObject message =
 			    JsonBuilder::makeMessage("unreachable_processes", "The cluster has some unreachable processes.");
 			JsonBuilderArray unreachableProcs;
@ -2806,11 +2827,11 @@ ACTOR Future<StatusReply> clusterGetStatus(
 			state Future<ErrorOr<vector<std::pair<GrvProxyInterface, EventMap>>>> grvProxyFuture =
 			    errorOr(getGrvProxiesAndMetrics(db, address_workers));

-			state int minReplicasRemaining = -1;
+			state int minStorageReplicasRemaining = -1;
 			state int fullyReplicatedRegions = -1;
 			state Future<Optional<Value>> primaryDCFO = getActivePrimaryDC(cx, &fullyReplicatedRegions, &messages);
 			std::vector<Future<JsonBuilderObject>> futures2;
-			futures2.push_back(dataStatusFetcher(ddWorker, configuration.get(), &minReplicasRemaining));
+			futures2.push_back(dataStatusFetcher(ddWorker, configuration.get(), &minStorageReplicasRemaining));
 			futures2.push_back(workloadStatusFetcher(
 			    db, workers, mWorker, rkWorker, &qos, &data_overlay, &status_incomplete_reasons, storageServerFuture));
 			futures2.push_back(layerStatusFetcher(cx, &messages, &status_incomplete_reasons));
@ -2825,18 +2846,16 @@ ACTOR Future<StatusReply> clusterGetStatus(
 				statusObj["logs"] = tlogFetcher(&logFaultTolerance, db, address_workers);
 			}

-			if (configuration.present()) {
-				int extraTlogEligibleZones = getExtraTLogEligibleZones(workers, configuration.get());
-				statusObj["fault_tolerance"] =
-				    faultToleranceStatusFetcher(configuration.get(),
-				                                coordinators,
-				                                workers,
-				                                extraTlogEligibleZones,
-				                                minReplicasRemaining,
-				                                logFaultTolerance,
-				                                fullyReplicatedRegions,
-				                                loadResult.present() && loadResult.get().healthyZone.present());
-			}
+			int extraTlogEligibleZones = getExtraTLogEligibleZones(workers, configuration.get());
+			statusObj["fault_tolerance"] =
+			    faultToleranceStatusFetcher(configuration.get(),
+			                                coordinators,
+			                                workers,
+			                                extraTlogEligibleZones,
+			                                minStorageReplicasRemaining,
+			                                logFaultTolerance,
+			                                fullyReplicatedRegions,
+			                                loadResult.present() && loadResult.get().healthyZone.present());

 			state JsonBuilderObject configObj =
 			    configurationFetcher(configuration, coordinators, &status_incomplete_reasons);
--- a/fdbserver/TesterInterface.actor.h
+++ b/fdbserver/TesterInterface.actor.h
@ -99,6 +99,24 @@ struct WorkloadRequest {
 	}
 };

+// Configuration details specified in workload test files that change the simulation
+// environment details
+struct TestConfig {
+	int extraDB = 0;
+	int minimumReplication = 0;
+	int minimumRegions = 0;
+	int configureLocked = 0;
+	bool startIncompatibleProcess = false;
+	int logAntiQuorum = -1;
+	// Storage Engine Types: Verify match with SimulationConfig::generateNormalConfig
+	//	-1 = None
+	//	0 = "ssd"
+	//	1 = "memory"
+	//	2 = "memory-radixtree-beta"
+	//	3 = "ssd-redwood-experimental"
+	int storageEngineExcludeType = -1;
+};
+
 struct TesterInterface {
 	constexpr static FileIdentifier file_identifier = 4465210;
 	RequestStream<WorkloadRequest> recruitments;
--- a/fdbserver/fdbserver.actor.cpp
+++ b/fdbserver/fdbserver.actor.cpp
@ -464,7 +464,7 @@ Future<Void> startSystemMonitor(std::string dataFolder,
 	    SystemMonitorMachineState(dataFolder, dcId, zoneId, machineId, g_network->getLocalAddress().ip));

 	systemMonitor();
-	return recurring(&systemMonitor, 5.0, TaskPriority::FlushTrace);
+	return recurring(&systemMonitor, SERVER_KNOBS->SYSTEM_MONITOR_FREQUENCY, TaskPriority::FlushTrace);
 }

 void testIndexedSet();
--- a/fdbserver/masterserver.actor.cpp
+++ b/fdbserver/masterserver.actor.cpp
@ -1406,22 +1406,26 @@ ACTOR Future<Void> rejoinRequestHandler(Reference<MasterData> self) {
 	}
 }

+// Keeps the coordinated state (cstate) updated as the set of recruited tlogs change through recovery.
 ACTOR Future<Void> trackTlogRecovery(Reference<MasterData> self,
                                     Reference<AsyncVar<Reference<ILogSystem>>> oldLogSystems,
                                     Future<Void> minRecoveryDuration) {
 	state Future<Void> rejoinRequests = Never();
 	state DBRecoveryCount recoverCount = self->cstate.myDBState.recoveryCount + 1;
+	state DatabaseConfiguration configuration =
+	    self->configuration; // self-configuration can be changed by configurationMonitor so we need a copy
 	loop {
 		state DBCoreState newState;
 		self->logSystem->toCoreState(newState);
 		newState.recoveryCount = recoverCount;
 		state Future<Void> changed = self->logSystem->onCoreStateChanged();
-		ASSERT(newState.tLogs[0].tLogWriteAntiQuorum == self->configuration.tLogWriteAntiQuorum &&
-		       newState.tLogs[0].tLogReplicationFactor == self->configuration.tLogReplicationFactor);
+
+		ASSERT(newState.tLogs[0].tLogWriteAntiQuorum == configuration.tLogWriteAntiQuorum &&
+		       newState.tLogs[0].tLogReplicationFactor == configuration.tLogReplicationFactor);

 		state bool allLogs =
 		    newState.tLogs.size() ==
-		    self->configuration.expectedLogSets(self->primaryDcId.size() ? self->primaryDcId[0] : Optional<Key>());
+		    configuration.expectedLogSets(self->primaryDcId.size() ? self->primaryDcId[0] : Optional<Key>());
 		state bool finalUpdate = !newState.oldTLogData.size() && allLogs;
 		wait(self->cstate.write(newState, finalUpdate));
 		wait(minRecoveryDuration);
@ -1455,7 +1459,7 @@ ACTOR Future<Void> trackTlogRecovery(Reference<MasterData> self,
 			    .trackLatest("MasterRecoveryState");
 		}

-		if (newState.oldTLogData.size() && self->configuration.repopulateRegionAntiQuorum > 0 &&
+		if (newState.oldTLogData.size() && configuration.repopulateRegionAntiQuorum > 0 &&
 		    self->logSystem->remoteStorageRecovered()) {
 			TraceEvent(SevWarnAlways, "RecruitmentStalled_RemoteStorageRecovered", self->dbgid);
 			self->recruitmentStalled->set(true);
--- a/fdbserver/tester.actor.cpp
+++ b/fdbserver/tester.actor.cpp
@ -363,6 +363,64 @@ TestWorkload* getWorkloadIface(WorkloadRequest work, Reference<AsyncVar<ServerDB
 	return compound;
 }

+/**
+ * Only works in simulation. This method prints all simulated processes in a human readable form to stdout. It groups
+ * processes by data center, data hall, zone, and machine (in this order).
+ */
+void printSimulatedTopology() {
+	if (!g_network->isSimulated()) {
+		return;
+	}
+	auto processes = g_simulator.getAllProcesses();
+	std::sort(processes.begin(), processes.end(), [](ISimulator::ProcessInfo* lhs, ISimulator::ProcessInfo* rhs) {
+		auto l = lhs->locality;
+		auto r = rhs->locality;
+		if (l.dcId() != r.dcId()) {
+			return l.dcId() < r.dcId();
+		}
+		if (l.dataHallId() != r.dataHallId()) {
+			return l.dataHallId() < r.dataHallId();
+		}
+		if (l.zoneId() != r.zoneId()) {
+			return l.zoneId() < r.zoneId();
+		}
+		if (l.machineId() != r.zoneId()) {
+			return l.machineId() < r.machineId();
+		}
+		return lhs->address < rhs->address;
+	});
+	printf("Simulated Cluster Topology:\n");
+	printf("===========================\n");
+	Optional<Standalone<StringRef>> dcId, dataHallId, zoneId, machineId;
+	for (auto p : processes) {
+		std::string indent = "";
+		if (dcId != p->locality.dcId()) {
+			dcId = p->locality.dcId();
+			printf("%sdcId: %s\n", indent.c_str(), p->locality.describeDcId().c_str());
+		}
+		indent += "  ";
+		if (dataHallId != p->locality.dataHallId()) {
+			dataHallId = p->locality.dataHallId();
+			printf("%sdataHallId: %s\n", indent.c_str(), p->locality.describeDataHall().c_str());
+		}
+		indent += "  ";
+		if (zoneId != p->locality.zoneId()) {
+			zoneId = p->locality.zoneId();
+			printf("%szoneId: %s\n", indent.c_str(), p->locality.describeZone().c_str());
+		}
+		indent += "  ";
+		if (machineId != p->locality.machineId()) {
+			machineId = p->locality.machineId();
+			printf("%smachineId: %s\n", indent.c_str(), p->locality.describeMachineId().c_str());
+		}
+		indent += "  ";
+		printf("%sAddress: %s\n", indent.c_str(), p->address.toString().c_str(), p->name);
+		indent += "  ";
+		printf("%sClass: %s\n", indent.c_str(), p->startingClass.toString().c_str());
+		printf("%sName: %s\n", indent.c_str(), p->name);
+	}
+}
+
 ACTOR Future<Void> databaseWarmer(Database cx) {
 	loop {
 		state Transaction tr(cx);
@ -977,7 +1035,9 @@ std::map<std::string, std::function<void(const std::string&)>> testSpecGlobalKey
 	      TraceEvent("TestParserTest").detail("ClientInfoLogging", value);
 	  } },
 	{ "startIncompatibleProcess",
-	  [](const std::string& value) { TraceEvent("TestParserTest").detail("ParsedStartIncompatibleProcess", value); } }
+	  [](const std::string& value) { TraceEvent("TestParserTest").detail("ParsedStartIncompatibleProcess", value); } },
+	{ "storageEngineExcludeType",
+	  [](const std::string& value) { TraceEvent("TestParserTest").detail("ParsedStorageEngineExcludeType", ""); } }
 };

 std::map<std::string, std::function<void(const std::string& value, TestSpec* spec)>> testSpecTestKeys = {
@ -1291,6 +1351,24 @@ ACTOR Future<Void> monitorServerDBInfo(Reference<AsyncVar<Optional<ClusterContro
 	}
 }

+/**
+ * \brief Test orchestrator: sends test specification to testers in the right order and collects the results.
+ *
+ * There are multiple actors in this file with similar names (runTest, runTests) and slightly different signatures.
+ *
+ * This is the actual orchestrator. It reads the test specifications (from tests), prepares the cluster (by running the
+ * configure command given in startingConfiguration) and then runs the workload.
+ *
+ * \param cc The cluster controller interface
+ * \param ci Same as cc.clientInterface
+ * \param testers The interfaces of the testers that should run the actual workloads
+ * \param tests The test specifications to run
+ * \param startingConfiguration If non-empty, the orchestrator will attempt to set this configuration before starting
+ * the tests.
+ * \param locality client locality (it seems this is unused?)
+ *
+ * \returns A future which will be set after all tests finished.
+ */
 ACTOR Future<Void> runTests(Reference<AsyncVar<Optional<struct ClusterControllerFullInterface>>> cc,
                            Reference<AsyncVar<Optional<struct ClusterInterface>>> ci,
                            vector<TesterInterface> testers,
@ -1346,6 +1424,7 @@ ACTOR Future<Void> runTests(Reference<AsyncVar<Optional<struct ClusterController

 	// Change the configuration (and/or create the database) if necessary
 	printf("startingConfiguration:%s start\n", startingConfiguration.toString().c_str());
+	printSimulatedTopology();
 	if (useDB && startingConfiguration != StringRef()) {
 		try {
 			wait(timeoutError(changeConfiguration(cx, testers, startingConfiguration), 2000.0));
@ -1402,6 +1481,24 @@ ACTOR Future<Void> runTests(Reference<AsyncVar<Optional<struct ClusterController
 	return Void();
 }

+/**
+ * \brief Proxy function that waits until enough testers are available and then calls into the orchestrator.
+ *
+ * There are multiple actors in this file with similar names (runTest, runTests) and slightly different signatures.
+ *
+ * This actor wraps the actual orchestrator (also called runTests). But before calling that actor, it waits for enough
+ * testers to come up.
+ *
+ * \param cc The cluster controller interface
+ * \param ci Same as cc.clientInterface
+ * \param tests The test specifications to run
+ * \param minTestersExpected The number of testers to expect. This actor will block until it can find this many testers.
+ * \param startingConfiguration If non-empty, the orchestrator will attempt to set this configuration before starting
+ * the tests.
+ * \param locality client locality (it seems this is unused?)
+ *
+ * \returns A future which will be set after all tests finished.
+ */
 ACTOR Future<Void> runTests(Reference<AsyncVar<Optional<struct ClusterControllerFullInterface>>> cc,
                            Reference<AsyncVar<Optional<struct ClusterInterface>>> ci,
                            vector<TestSpec> tests,
@ -1443,6 +1540,32 @@ ACTOR Future<Void> runTests(Reference<AsyncVar<Optional<struct ClusterController
 	return Void();
 }

+/**
+ * \brief Set up testing environment and run the given tests on a cluster.
+ *
+ * There are multiple actors in this file with similar names (runTest, runTests) and slightly different signatures.
+ *
+ * This actor is usually the first entry point into the test environment. It itself doesn't implement too much
+ * functionality. Its main purpose is to generate the test specification from passed arguments and then call into the
+ * correct actor which will orchestrate the actual test.
+ *
+ * \param connFile A cluster connection file. Not all tests require a functional cluster but all tests require
+ * a cluster file.
+ * \param whatToRun TEST_TYPE_FROM_FILE to read the test description from a passed toml file or
+ * TEST_TYPE_CONSISTENCY_CHECK to generate a test spec for consistency checking
+ * \param at TEST_HERE: this process will act as a test client and execute the given workload. TEST_ON_SERVERS: Run a
+ * test client on every worker in the cluster. TEST_ON_TESTERS: Run a test client on all servers with class Test
+ * \param minTestersExpected In at is not TEST_HERE, this will instruct the orchestrator until it can find at least
+ * minTestersExpected test-clients. This is usually passed through from a command line argument. In simulation, the
+ * simulator will pass the number of testers that it started.
+ * \param fileName The path to the toml-file containing the test description. Is ignored if whatToRun !=
+ * TEST_TYPE_FROM_FILE
+ * \param startingConfiguration Can be used to configure a cluster before running the test. If this is an empty string,
+ * it will be ignored, otherwise it will be passed to changeConfiguration.
+ * \param locality The client locality to be used. This is only used if at == TEST_HERE
+ *
+ * \returns A future which will be set after all tests finished.
+ */
 ACTOR Future<Void> runTests(Reference<ClusterConnectionFile> connFile,
                            test_type_t whatToRun,
                            test_location_t at,
--- a/fdbserver/workloads/ChangeConfig.actor.cpp
+++ b/fdbserver/workloads/ChangeConfig.actor.cpp
@ -52,6 +52,8 @@ struct ChangeConfigWorkload : TestWorkload {

 	void getMetrics(vector<PerfMetric>& m) override {}

+	// When simulated two clusters for DR tests, this actor sets the starting configuration
+	// for the extra cluster.
 	ACTOR Future<Void> extraDatabaseConfigure(ChangeConfigWorkload* self) {
 		if (g_network->isSimulated() && g_simulator.extraDB) {
 			auto extraFile = makeReference<ClusterConnectionFile>(*g_simulator.extraDB);
@ -59,10 +61,15 @@ struct ChangeConfigWorkload : TestWorkload {

 			wait(delay(5 * deterministicRandom()->random01()));
 			if (self->configMode.size()) {
+				if (g_simulator.startingDisabledConfiguration != "") {
+					// It is not safe to allow automatic failover to a region which is not fully replicated,
+					// so wait for both regions to be fully replicated before enabling failover
+					wait(success(changeConfig(extraDB, g_simulator.startingDisabledConfiguration, true)));
+					TraceEvent("WaitForReplicasExtra");
+					wait(waitForFullReplication(extraDB));
+					TraceEvent("WaitForReplicasExtraEnd");
+				}
 				wait(success(changeConfig(extraDB, self->configMode, true)));
-				TraceEvent("WaitForReplicasExtra");
-				wait(waitForFullReplication(extraDB));
-				TraceEvent("WaitForReplicasExtraEnd");
 			}
 			if (self->networkAddresses.size()) {
 				if (self->networkAddresses == "auto")
@ -75,6 +82,8 @@ struct ChangeConfigWorkload : TestWorkload {
 		return Void();
 	}

+	// Either changes the database configuration, or changes the coordinators based on the parameters
+	// of the workload.
 	ACTOR Future<Void> ChangeConfigClient(Database cx, ChangeConfigWorkload* self) {
 		wait(delay(self->minDelayBeforeChange +
 		           deterministicRandom()->random01() * (self->maxDelayBeforeChange - self->minDelayBeforeChange)));
@ -86,10 +95,15 @@ struct ChangeConfigWorkload : TestWorkload {
 		}

 		if (self->configMode.size()) {
+			if (g_network->isSimulated() && g_simulator.startingDisabledConfiguration != "") {
+				// It is not safe to allow automatic failover to a region which is not fully replicated,
+				// so wait for both regions to be fully replicated before enabling failover
+				wait(success(changeConfig(cx, g_simulator.startingDisabledConfiguration, true)));
+				TraceEvent("WaitForReplicas");
+				wait(waitForFullReplication(cx));
+				TraceEvent("WaitForReplicasEnd");
+			}
 			wait(success(changeConfig(cx, self->configMode, true)));
-			TraceEvent("WaitForReplicas");
-			wait(waitForFullReplication(cx));
-			TraceEvent("WaitForReplicasEnd");
 		}
 		if (self->networkAddresses.size()) {
 			if (self->networkAddresses == "auto")
--- a/fdbserver/workloads/FastTriggeredWatches.actor.cpp
+++ b/fdbserver/workloads/FastTriggeredWatches.actor.cpp
@ -27,6 +27,7 @@
 #include "flow/actorcompiler.h" // This must be the last #include.

 struct FastTriggeredWatchesWorkload : TestWorkload {
+	// Tests the time it takes for a watch to be fired after the value has changed in the storage server
 	int nodes, keyBytes;
 	double testDuration;
 	vector<Future<Void>> clients;
@ -75,6 +76,7 @@ struct FastTriggeredWatchesWorkload : TestWorkload {

 	ACTOR Future<Version> setter(Database cx, Key key, Optional<Value> value) {
 		state ReadYourWritesTransaction tr(cx);
+		// set the value of key and return the commit version
 		wait(delay(deterministicRandom()->random01()));
 		loop {
 			try {
@ -105,22 +107,24 @@ struct FastTriggeredWatchesWorkload : TestWorkload {
 				state Optional<Value> setValue;
 				if (deterministicRandom()->random01() > 0.5)
 					setValue = StringRef(format("%010d", deterministicRandom()->randomInt(0, 1000)));
+				// Set the value at setKey to something random
 				state Future<Version> setFuture = self->setter(cx, setKey, setValue);
 				wait(delay(deterministicRandom()->random01()));
 				loop {
 					state ReadYourWritesTransaction tr(cx);

 					try {
-
 						Optional<Value> val = wait(tr.get(setKey));
 						if (!first) {
 							getDuration = now() - watchEnd;
 						}
 						lastReadVersion = tr.getReadVersion().get();
 						//TraceEvent("FTWGet").detail("Key", printable(setKey)).detail("Value", printable(val)).detail("Ver", tr.getReadVersion().get());
+						// if the value is already setValue then there is no point setting a watch so break out of the loop
 						if (val == setValue)
 							break;
 						ASSERT(first);
+						// set a watch and wait for it to be triggered (i.e for self->setter to set the value)
 						state Future<Void> watchFuture = tr.watch(setKey);
 						wait(tr.commit());
 						//TraceEvent("FTWStartWatch").detail("Key", printable(setKey));
@ -134,8 +138,10 @@ struct FastTriggeredWatchesWorkload : TestWorkload {
 				}
 				Version ver = wait(setFuture);
 				//TraceEvent("FTWWatchDone").detail("Key", printable(setKey));
+				// Assert that the time from setting the key to triggering the watch is no greater than 25s
+				// TODO: This assertion can cause flaky behaviour since sometimes a watch can take longer to fire
 				ASSERT(lastReadVersion - ver >= SERVER_KNOBS->MAX_VERSIONS_IN_FLIGHT ||
-				       lastReadVersion - ver < SERVER_KNOBS->VERSIONS_PER_SECOND * (12 + getDuration));
+				       lastReadVersion - ver < SERVER_KNOBS->VERSIONS_PER_SECOND * (25 + getDuration));

 				if (now() - testStart > self->testDuration)
 					break;
--- a/fdbserver/workloads/LowLatency.actor.cpp
+++ b/fdbserver/workloads/LowLatency.actor.cpp
@ -74,6 +74,7 @@ struct LowLatencyWorkload : TestWorkload {
 				++self->operations;
 				loop {
 					try {
+						TraceEvent("StartLowLatencyTransaction");
 						tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
 						tr.setOption(FDBTransactionOptions::LOCK_AWARE);
 						if (doCommit) {
@ -84,6 +85,7 @@ struct LowLatencyWorkload : TestWorkload {
 						}
 						break;
 					} catch (Error& e) {
+						TraceEvent("LowLatencyTransactionFailed").error(e, true);
 						wait(tr.onError(e));
 						++self->retries;
 					}
--- a/fdbserver/workloads/RemoveServersSafely.actor.cpp
+++ b/fdbserver/workloads/RemoveServersSafely.actor.cpp
@ -502,6 +502,8 @@ struct RemoveServersSafelyWorkload : TestWorkload {
 		return killProcArray;
 	}

+	// Attempts to exclude a set of processes, and once the exclusion is successful it kills them.
+	// If markExcludeAsFailed is true, then it is an error if we cannot complete the exclusion.
 	ACTOR static Future<Void> removeAndKill(RemoveServersSafelyWorkload* self,
 	                                        Database cx,
 	                                        std::set<AddressExclusion> toKill,
@ -556,7 +558,11 @@ struct RemoveServersSafelyWorkload : TestWorkload {
 				    .detail("Step", "SafetyCheck")
 				    .detail("Exclusions", describe(toKillMarkFailedArray));
 				choose {
-					when(bool _safe = wait(checkSafeExclusions(cx, toKillMarkFailedArray))) { safe = _safe; }
+					when(bool _safe = wait(checkSafeExclusions(cx, toKillMarkFailedArray))) {
+						safe = _safe && self->protectServers(std::set<AddressExclusion>(toKillMarkFailedArray.begin(),
+						                                                                toKillMarkFailedArray.end()))
+						                        .size() == toKillMarkFailedArray.size();
+					}
 					when(wait(delay(5.0))) {
 						TraceEvent("RemoveAndKill", functionId)
 						    .detail("Step", "SafetyCheckTimedOut")
--- a/flow/IRandom.h
+++ b/flow/IRandom.h
@ -184,6 +184,12 @@ Reference<IRandom> deterministicRandom();
 // non-deterministic contexts.
 Reference<IRandom> nondeterministicRandom();

+// This returns a deterministic random number generator initialized with the same seed as the one returned by
+// deterministicRandom. The main use-case for this is to generate deterministic random numbers without changing the
+// determinism of the simulator. This is useful for things like generating random UIDs for debug transactions.
+// WARNING: This is not thread safe and must not be called from any other thread than the network thread!
+Reference<IRandom> debugRandom();
+
 // Populates a buffer with a random sequence of bytes
 void generateRandomData(uint8_t* buffer, int length);

--- a/flow/Net2.actor.cpp
+++ b/flow/Net2.actor.cpp
@ -135,6 +135,12 @@ thread_local INetwork* thread_network = 0;

 class Net2 final : public INetwork, public INetworkConnections {

+private:
+	void updateStarvationTracker(struct NetworkMetrics::PriorityStats& binStats,
+	                             TaskPriority priority,
+	                             TaskPriority lastPriority,
+	                             double now);
+
 public:
 	Net2(const TLSConfig& tlsConfig, bool useThreadPool, bool useMetrics);
 	void initTLS(ETLSInitState targetState) override;
@ -1582,6 +1588,28 @@ void Net2::run() {
 #endif
 }

+// Updates the PriorityStats found in NetworkMetrics
+void Net2::updateStarvationTracker(struct NetworkMetrics::PriorityStats& binStats,
+                                   TaskPriority priority,
+                                   TaskPriority lastPriority,
+                                   double now) {
+
+	// Busy -> idle at binStats.priority
+	if (binStats.priority > priority && binStats.priority <= lastPriority) {
+		binStats.active = false;
+		binStats.duration += now - binStats.windowedTimer;
+		binStats.maxDuration = std::max(binStats.maxDuration, now - binStats.timer);
+	}
+
+	// Idle -> busy at binStats.priority
+	else if (binStats.priority <= priority && binStats.priority > lastPriority) {
+		binStats.active = true;
+		binStats.timer = now;
+		binStats.windowedTimer = now;
+	}
+}
+
+// Update both vectors of starvation trackers (one that updates every 5s and the other every 1s)
 void Net2::trackAtPriority(TaskPriority priority, double now) {
 	if (lastPriorityStats == nullptr || priority != lastPriorityStats->priority) {
 		// Start tracking current priority
@ -1601,22 +1629,12 @@ void Net2::trackAtPriority(TaskPriority priority, double now) {
 			if (binStats.priority > lastPriority && binStats.priority > priority) {
 				break;
 			}
-
-			// Busy -> idle at binStats.priority
-			if (binStats.priority > priority && binStats.priority <= lastPriority) {
-				binStats.active = false;
-				binStats.duration += now - binStats.windowedTimer;
-				binStats.maxDuration = std::max(binStats.maxDuration, now - binStats.timer);
-			}
-
-			// Idle -> busy at binStats.priority
-			else if (binStats.priority <= priority && binStats.priority > lastPriority) {
-				binStats.active = true;
-				binStats.timer = now;
-				binStats.windowedTimer = now;
-			}
+			updateStarvationTracker(binStats, priority, lastPriority, now);
 		}

+		// Update starvation trackers for network busyness
+		updateStarvationTracker(networkInfo.metrics.starvationTrackerNetworkBusyness, priority, lastPriority, now);
+
 		lastPriorityStats = &activeStatsItr.first->second;
 	}
 }
--- a/flow/flow.cpp
+++ b/flow/flow.cpp
@ -65,11 +65,17 @@ INetwork* g_network = 0;

 FILE* randLog = 0;
 thread_local Reference<IRandom> seededRandom;
+Reference<IRandom> seededDebugRandom;
 uint64_t debug_lastLoadBalanceResultEndpointToken = 0;
 bool noUnseed = false;

 void setThreadLocalDeterministicRandomSeed(uint32_t seed) {
 	seededRandom = Reference<IRandom>(new DeterministicRandom(seed, true));
+	seededDebugRandom = Reference<IRandom>(new DeterministicRandom(seed));
+}
+
+Reference<IRandom> debugRandom() {
+	return seededDebugRandom;
 }

 Reference<IRandom> deterministicRandom() {
--- a/flow/network.h
+++ b/flow/network.h
@ -27,6 +27,7 @@
 #include <string>
 #include <stdint.h>
 #include <variant>
+#include <atomic>
 #include "boost/asio.hpp"
 #ifndef TLS_DISABLED
 #include "boost/asio/ssl.hpp"
@ -320,6 +321,7 @@ class Future;
 template <class T>
 class Promise;

+// Metrics which represent various network properties
 struct NetworkMetrics {
 	enum { SLOW_EVENT_BINS = 16 };
 	uint64_t countSlowEvents[SLOW_EVENT_BINS] = {};
@ -340,16 +342,37 @@ struct NetworkMetrics {
 	};

 	std::unordered_map<TaskPriority, struct PriorityStats> activeTrackers;
-	double lastRunLoopBusyness;
+	double lastRunLoopBusyness; // network thread busyness (measured every 5s by default)
+	std::atomic<double> networkBusyness; // network thread busyness which is returned to the the client (measured every 1s by default)
+
+	// starvation trackers which keeps track of different task priorities
 	std::vector<struct PriorityStats> starvationTrackers;
+	struct PriorityStats starvationTrackerNetworkBusyness;

 	static const std::vector<int> starvationBins;

-	NetworkMetrics() : lastRunLoopBusyness(0) {
-		for (int priority : starvationBins) {
+	NetworkMetrics()
+	  : lastRunLoopBusyness(0), networkBusyness(0),
+	    starvationTrackerNetworkBusyness(PriorityStats(static_cast<TaskPriority>(starvationBins.at(0)))) {
+		for (int priority : starvationBins) { // initalize starvation trackers with given priorities
 			starvationTrackers.emplace_back(static_cast<TaskPriority>(priority));
 		}
 	}
+
+	// Since networkBusyness is atomic we need to redefine copy assignment operator
+	NetworkMetrics& operator=(const NetworkMetrics& rhs) {
+		for (int i = 0; i < SLOW_EVENT_BINS; i++) {
+			countSlowEvents[i] = rhs.countSlowEvents[i];
+		}
+		secSquaredSubmit = rhs.secSquaredSubmit;
+		secSquaredDiskStall = rhs.secSquaredDiskStall;
+		activeTrackers = rhs.activeTrackers;
+		lastRunLoopBusyness = rhs.lastRunLoopBusyness;
+		networkBusyness = rhs.networkBusyness.load();
+		starvationTrackers = rhs.starvationTrackers;
+		starvationTrackerNetworkBusyness = rhs.starvationTrackerNetworkBusyness;
+		return *this;
+	}
 };

 struct FlowLock;
--- a/tests/restarting/to_6.3.10/CycleTestRestart-1.txt
+++ b/tests/restarting/to_6.3.10/CycleTestRestart-1.txt
@ -1,3 +1,4 @@
+storageEngineExcludeType=-1
 testTitle=Clogged
    clearAfterTest=false
    testName=Cycle
--- a/tests/restarting/to_6.3.10/CycleTestRestart-2.txt
+++ b/tests/restarting/to_6.3.10/CycleTestRestart-2.txt
@ -1,3 +1,4 @@
+storageEngineExcludeType=-1
 testTitle=Clogged
    runSetup=false
    testName=Cycle