Merge remote-tracking branch 'upstream/master' into add-lock-unlock-to-special-keys

2020-10-08 14:23:16 -07:00 · 2020-10-08 14:23:16 -07:00 · c66a775d64
parent 53d3759e73 e258dffc65
commit c66a775d64
59 changed files with 1634 additions and 668 deletions
--- a/bindings/c/fdb_c.cpp
+++ b/bindings/c/fdb_c.cpp
@ -153,7 +153,7 @@ void fdb_future_destroy( FDBFuture* f ) {

 extern "C" DLLEXPORT
 fdb_error_t fdb_future_block_until_ready( FDBFuture* f ) {
-	CATCH_AND_RETURN( TSAVB(f)->blockUntilReady(); );
+	CATCH_AND_RETURN(TSAVB(f)->blockUntilReadyCheckOnMainThread(););
 }

 fdb_bool_t fdb_future_is_error_v22( FDBFuture* f ) {
--- a/bindings/go/src/fdb/subspace/subspace.go
+++ b/bindings/go/src/fdb/subspace/subspace.go
@ -78,8 +78,9 @@ type Subspace interface {
 	// FoundationDB keys (corresponding to the prefix of this Subspace).
 	fdb.KeyConvertible

-	// All Subspaces implement fdb.ExactRange and fdb.Range, and describe all
-	// keys logically in this Subspace.
+	// All Subspaces implement fdb.ExactRange and fdb.Range, and describe all 
+	// keys strictly within the subspace that encode tuples. Specifically, 
+	// this will include all keys in [prefix + '\x00', prefix + '\xff').
 	fdb.ExactRange
 }

--- a/bindings/java/CMakeLists.txt
+++ b/bindings/java/CMakeLists.txt
@ -1,3 +1,6 @@
+set(RUN_JAVA_TESTS ON CACHE BOOL "Run Java unit tests")
+set(RUN_JUNIT_TESTS OFF CACHE BOOL "Compile and run junit tests")
+
 set(JAVA_BINDING_SRCS
  src/main/com/apple/foundationdb/async/AsyncIterable.java
  src/main/com/apple/foundationdb/async/AsyncIterator.java
@ -102,6 +105,10 @@ set(JAVA_TESTS_SRCS
  src/test/com/apple/foundationdb/test/WatchTest.java
  src/test/com/apple/foundationdb/test/WhileTrueTest.java)

+set(JAVA_JUNIT_TESTS
+  src/junit/com/apple/foundationdb/tuple/AllTests.java
+  src/junit/com/apple/foundationdb/tuple/ArrayUtilTests.java)
+
 set(GENERATED_JAVA_DIR ${CMAKE_CURRENT_BINARY_DIR}/src/main/com/apple/foundationdb)
 file(MAKE_DIRECTORY ${GENERATED_JAVA_DIR})

@ -173,12 +180,6 @@ add_jar(fdb-java ${JAVA_BINDING_SRCS} ${GENERATED_JAVA_FILES} ${CMAKE_SOURCE_DIR
  OUTPUT_DIR ${PROJECT_BINARY_DIR}/lib VERSION ${CMAKE_PROJECT_VERSION} MANIFEST ${MANIFEST_FILE})
 add_dependencies(fdb-java fdb_java_options fdb_java)

-# TODO[mpilman]: The java RPM will require some more effort (mostly on debian). However,
-# most people will use the fat-jar, so it is not clear how high this priority is.
-
-#install_jar(fdb-java DESTINATION ${FDB_SHARE_DIR}/java COMPONENT java)
-#install(TARGETS fdb_java DESTINATION ${FDB_LIB_DIR} COMPONENT java)
-
 if(NOT OPEN_FOR_IDE)
  set(FAT_JAR_BINARIES "NOTFOUND" CACHE STRING
    "Path of a directory structure with libraries to include in fat jar (a lib directory)")
@ -252,4 +253,30 @@ if(NOT OPEN_FOR_IDE)
  add_dependencies(fat-jar fdb-java)
  add_dependencies(fat-jar copy_lib)
  add_dependencies(packages fat-jar)
+
+  if(RUN_JAVA_TESTS)
+    set(enabled ENABLED)
+  else()
+    set(enabled DISABLED)
+  endif()
+  set(TEST_CP ${tests_jar} ${target_jar})
+  message(STATUS "TEST_CP ${TEST_CP}")
+  add_java_test(NAME DirectoryTest CLASS_PATH ${TEST_CP}
+    CLASS com.apple.foundationdb.test.DirectoryTest ${enabled})
+
+  if(RUN_JUNIT_TESTS)
+    file(DOWNLOAD "https://search.maven.org/remotecontent?filepath=junit/junit/4.13/junit-4.13.jar"
+      ${CMAKE_BINARY_DIR}/packages/junit-4.13.jar
+      EXPECTED_HASH SHA256=4b8532f63bdc0e0661507f947eb324a954d1dbac631ad19c8aa9a00feed1d863)
+    file(DOWNLOAD "https://repo1.maven.org/maven2/org/hamcrest/hamcrest-all/1.3/hamcrest-all-1.3.jar"
+      ${CMAKE_BINARY_DIR}/packages/hamcrest-all-1.3.jar
+      EXPECTED_HASH SHA256=4877670629ab96f34f5f90ab283125fcd9acb7e683e66319a68be6eb2cca60de)
+    add_jar(fdb-junit SOURCES ${JAVA_JUNIT_TESTS} INCLUDE_JARS fdb-java ${CMAKE_BINARY_DIR}/packages/junit-4.13.jar)
+    get_property(junit_jar_path TARGET fdb-junit PROPERTY JAR_FILE)
+    add_test(NAME junit
+      COMMAND ${Java_JAVA_EXECUTABLE}
+              -cp "${target_jar}:${junit_jar_path}:${CMAKE_BINARY_DIR}/packages/junit-4.13.jar:${CMAKE_BINARY_DIR}/packages/hamcrest-all-1.3.jar"
+              -Djava.library.path=${CMAKE_BINARY_DIR}/lib
+              org.junit.runner.JUnitCore "com.apple.foundationdb.tuple.AllTests")
+  endif()
 endif()
--- a/bindings/java/src/junit/com/apple/foundationdb/tuple/ArrayUtilTests.java
+++ b/bindings/java/src/junit/com/apple/foundationdb/tuple/ArrayUtilTests.java
@ -27,9 +27,14 @@ import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;

 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
+import java.util.Random;

+import org.junit.Assert;
+import org.junit.Before;
 import org.junit.Test;
+import org.junit.Ignore;

 /**
 * @author Ben
@ -251,7 +256,7 @@ public class ArrayUtilTests {
 	/**
 	 * Test method for {@link ByteArrayUtil#bisectLeft(java.math.BigInteger[], java.math.BigInteger)}.
 	 */
-	@Test
+	@Test @Ignore
 	public void testBisectLeft() {
 		fail("Not yet implemented");
 	}
@ -259,7 +264,7 @@ public class ArrayUtilTests {
 	/**
 	 * Test method for {@link ByteArrayUtil#compareUnsigned(byte[], byte[])}.
 	 */
-	@Test
+	@Test @Ignore
 	public void testCompare() {
 		fail("Not yet implemented");
 	}
@ -267,7 +272,7 @@ public class ArrayUtilTests {
 	/**
 	 * Test method for {@link ByteArrayUtil#findNext(byte[], byte, int)}.
 	 */
-	@Test
+	@Test @Ignore
 	public void testFindNext() {
 		fail("Not yet implemented");
 	}
@ -275,7 +280,7 @@ public class ArrayUtilTests {
 	/**
 	 * Test method for {@link ByteArrayUtil#findTerminator(byte[], byte, byte, int)}.
 	 */
-	@Test
+	@Test @Ignore
 	public void testFindTerminator() {
 		fail("Not yet implemented");
 	}
@ -283,7 +288,7 @@ public class ArrayUtilTests {
 	/**
 	 * Test method for {@link ByteArrayUtil#copyOfRange(byte[], int, int)}.
 	 */
-	@Test
+	@Test @Ignore
 	public void testCopyOfRange() {
 		fail("Not yet implemented");
 	}
@ -291,7 +296,7 @@ public class ArrayUtilTests {
 	/**
 	 * Test method for {@link ByteArrayUtil#strinc(byte[])}.
 	 */
-	@Test
+	@Test @Ignore
 	public void testStrinc() {
 		fail("Not yet implemented");
 	}
@ -299,7 +304,7 @@ public class ArrayUtilTests {
 	/**
 	 * Test method for {@link ByteArrayUtil#printable(byte[])}.
 	 */
-	@Test
+	@Test @Ignore
 	public void testPrintable() {
 		fail("Not yet implemented");
 	}
--- a/bindings/java/src/test/com/apple/foundationdb/test/DirectoryTest.java
+++ b/bindings/java/src/test/com/apple/foundationdb/test/DirectoryTest.java
@ -34,7 +34,7 @@ public class DirectoryTest {
 	public static void main(String[] args) throws Exception {
 		try {
 			FDB fdb = FDB.selectAPIVersion(700);
-			try(Database db = fdb.open()) {
+			try(Database db = args.length > 0 ? fdb.open(args[0]) : fdb.open()) {
 				runTests(db);
 			}
 		}
--- a/build/Dockerfile
+++ b/build/Dockerfile
@ -45,13 +45,13 @@ RUN cd /tmp && curl -L https://github.com/ninja-build/ninja/archive/v1.9.0.zip -
    cd .. && rm -rf ninja-1.9.0 ninja.zip

 # install openssl
-RUN cd /tmp && curl -L https://www.openssl.org/source/openssl-1.1.1d.tar.gz -o openssl.tar.gz &&\
-    echo "1e3a91bc1f9dfce01af26026f856e064eab4c8ee0a8f457b5ae30b40b8b711f2  openssl.tar.gz" > openssl-sha.txt &&\
+RUN cd /tmp && curl -L https://www.openssl.org/source/openssl-1.1.1h.tar.gz -o openssl.tar.gz &&\
+    echo "5c9ca8774bd7b03e5784f26ae9e9e6d749c9da2438545077e6b3d755a06595d9  openssl.tar.gz" > openssl-sha.txt &&\
    sha256sum -c openssl-sha.txt && tar -xzf openssl.tar.gz &&\
-    cd openssl-1.1.1d && scl enable devtoolset-8 -- ./config CFLAGS="-fPIC -O3" --prefix=/usr/local &&\
+    cd openssl-1.1.1h && scl enable devtoolset-8 -- ./config CFLAGS="-fPIC -O3" --prefix=/usr/local &&\
    scl enable devtoolset-8 -- make -j`nproc` && scl enable devtoolset-8 -- make -j1 install &&\
    ln -sv /usr/local/lib64/lib*.so.1.1 /usr/lib64/ &&\
-    cd /tmp/ && rm -rf /tmp/openssl-1.1.1d /tmp/openssl.tar.gz
+    cd /tmp/ && rm -rf /tmp/openssl-1.1.1h /tmp/openssl.tar.gz

 RUN cd /opt/ && curl -L https://github.com/facebook/rocksdb/archive/v6.10.1.tar.gz -o rocksdb.tar.gz &&\
    echo "d573d2f15cdda883714f7e0bc87b814a8d4a53a82edde558f08f940e905541ee  rocksdb.tar.gz" > rocksdb-sha.txt &&\
@ -61,8 +61,8 @@ RUN cd /opt/ && curl -L https://github.com/facebook/rocksdb/archive/v6.10.1.tar.
 ARG TIMEZONEINFO=America/Los_Angeles
 RUN rm -f /etc/localtime && ln -s /usr/share/zoneinfo/${TIMEZONEINFO} /etc/localtime

-LABEL version=0.1.15
-ENV DOCKER_IMAGEVER=0.1.15
+LABEL version=0.1.17
+ENV DOCKER_IMAGEVER=0.1.17
 ENV JAVA_HOME=/usr/lib/jvm/java-1.8.0
 ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc
 ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/g++
--- a/build/Dockerfile.devel
+++ b/build/Dockerfile.devel
@ -1,4 +1,4 @@
-FROM foundationdb/foundationdb-build:0.1.15
+FROM foundationdb/foundationdb-build:0.1.17

 USER root

--- a/build/docker-compose.yaml
+++ b/build/docker-compose.yaml
@ -2,7 +2,7 @@ version: "3"

 services:
  common: &common
-    image: foundationdb/foundationdb-build:0.1.15
+    image: foundationdb/foundationdb-build:0.1.17

  build-setup: &build-setup
    <<: *common
--- a/cmake/AddFdbTest.cmake
+++ b/cmake/AddFdbTest.cmake
@ -363,3 +363,60 @@ function(package_bindingtester)
  add_custom_target(bindingtester ALL DEPENDS ${tar_file})
  add_dependencies(bindingtester copy_bindingtester_binaries)
 endfunction()
+
+function(add_fdbclient_test)
+  set(options DISABLED ENABLED)
+  set(oneValueArgs NAME)
+  set(multiValueArgs COMMAND)
+  cmake_parse_arguments(T "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")
+  if(NOT T_ENABLED AND T_DISABLED)
+    return()
+  endif()
+  if(NOT T_NAME)
+    message(FATAL_ERROR "NAME is a required argument for add_fdbclient_test")
+  endif()
+  if(NOT T_COMMAND)
+    message(FATAL_ERROR "COMMAND is a required argument for add_fdbclient_test")
+  endif()
+  message(STATUS "Adding Client test ${T_NAME}")
+  add_test(NAME "${T_NAME}"
+    COMMAND ${CMAKE_SOURCE_DIR}/tests/TestRunner/tmp_cluster.py
+            --build-dir ${CMAKE_BINARY_DIR}
+            --
+            ${T_COMMAND})
+endfunction()
+
+function(add_java_test)
+  set(options DISABLED ENABLED)
+  set(oneValueArgs NAME CLASS)
+  set(multiValueArgs CLASS_PATH)
+  cmake_parse_arguments(T "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")
+  if(NOT T_ENABLED AND T_DISABLED)
+    return()
+  endif()
+  if(NOT T_NAME)
+    message(FATAL_ERROR "NAME is a required argument for add_fdbclient_test")
+  endif()
+  if(NOT T_CLASS)
+    message(FATAL_ERROR "CLASS is a required argument for add_fdbclient_test")
+  endif()
+  set(cp "")
+  set(separator ":")
+  if (WIN32)
+    set(separator ";")
+  endif()
+  message(STATUS "CLASSPATH ${T_CLASS_PATH}")
+  foreach(path ${T_CLASS_PATH})
+    if(cp)
+      set(cp "${cp}${separator}${path}")
+    else()
+      set(cp "${path}")
+    endif()
+  endforeach()
+  add_fdbclient_test(
+    NAME ${T_NAME}
+    COMMAND ${Java_JAVA_EXECUTABLE}
+            -cp "${cp}"
+            -Djava.library.path=${CMAKE_BINARY_DIR}/lib
+            ${T_CLASS} "@CLUSTER_FILE@")
+endfunction()
--- a/contrib/Joshua/scripts/bindingTestScript.sh
+++ b/contrib/Joshua/scripts/bindingTestScript.sh
@ -7,7 +7,7 @@ SCRIPTID="${$}"
 SAVEONERROR="${SAVEONERROR:-1}"
 PYTHONDIR="${BINDIR}/tests/python"
 testScript="${BINDIR}/tests/bindingtester/run_binding_tester.sh"
-VERSION="1.8"
+VERSION="1.9"

 source ${SCRIPTDIR}/localClusterStart.sh

@ -28,7 +28,7 @@ then
 	echo "Log dir:        ${LOGDIR}"
 	echo "Python path:    ${PYTHONDIR}"
 	echo "Lib dir:        ${LIBDIR}"
-	echo "Cluster String: ${CLUSTERSTRING}"
+	echo "Cluster String: ${FDBCLUSTERTEXT}"
 	echo "Script Id:      ${SCRIPTID}"
 	echo "Version:        ${VERSION}"
 fi
--- a/contrib/Joshua/scripts/localClusterStart.sh
+++ b/contrib/Joshua/scripts/localClusterStart.sh
@ -6,6 +6,7 @@ LOGDIR="${WORKDIR}/log"
 ETCDIR="${WORKDIR}/etc"
 BINDIR="${BINDIR:-${SCRIPTDIR}}"
 FDBPORTSTART="${FDBPORTSTART:-4000}"
+FDBPORTTOTAL="${FDBPORTTOTAL:-1000}"
 SERVERCHECKS="${SERVERCHECKS:-10}"
 CONFIGUREWAIT="${CONFIGUREWAIT:-240}"
 FDBCONF="${ETCDIR}/fdb.cluster"
@ -18,382 +19,384 @@ status=0
 messagetime=0
 messagecount=0

-# Define a random ip address and port on localhost
-if [ -z ${IPADDRESS} ]; then
-    let index2="${RANDOM} % 256"
-    let index3="${RANDOM} % 256"
-    let index4="(${RANDOM} % 255) + 1"
-    IPADDRESS="127.${index2}.${index3}.${index4}"
+# Do nothing, if cluster string is already defined
+if [ -n "${FDBCLUSTERTEXT}" ]
+then
+  :
+# Otherwise, define the cluster text
+else
+  # Define a random ip address and port on localhost
+  if [ -z "${IPADDRESS}" ]; then
+      let index2="${RANDOM} % 256"
+      let index3="${RANDOM} % 256"
+      let index4="(${RANDOM} % 255) + 1"
+      IPADDRESS="127.${index2}.${index3}.${index4}"
+  fi
+  if [ -z "${FDBPORT}" ]; then
+      let FDBPORT="(${RANDOM} % ${FDBPORTTOTAL}) + ${FDBPORTSTART}"
+  fi
+  FDBCLUSTERTEXT="${IPADDRESS}:${FDBPORT}"
 fi
-if [ -z ${FDBPORT} ]; then
-    let FDBPORT="(${RANDOM} % 1000) + ${FDBPORTSTART}"
-fi
-CLUSTERSTRING="${IPADDRESS}:${FDBPORT}"
-

 function log
 {
-	local status=0
-	if [ "$#" -lt 1 ]
-	then
-		echo "Usage: log <message> [echo]"
-		echo
-		echo "Logs the message and timestamp to LOGFILE (${LOGFILE}) and, if the"
-		echo "second argument is either not present or is set to 1, stdout."
-		let status="${status} + 1"
-	else
-		# Log to stdout.
-		if [ "$#" -lt 2 ] || [ "${2}" -ge 1 ]
-		then
-			echo "${1}"
-		fi
+  local status=0
+  if [ "$#" -lt 1 ]
+  then
+    echo "Usage: log <message> [echo]"
+    echo
+    echo "Logs the message and timestamp to LOGFILE (${LOGFILE}) and, if the"
+    echo "second argument is either not present or is set to 1, stdout."
+    let status="${status} + 1"
+  else
+    # Log to stdout.
+    if [ "$#" -lt 2 ] || [ "${2}" -ge 1 ]
+    then
+      echo "${1}"
+    fi

-		# Log to file.
-		datestr=$(date +"%Y-%m-%d %H:%M:%S (%s)")
-		dir=$(dirname "${LOGFILE}")
-		if ! [ -d "${dir}" ] && ! mkdir -p "${dir}"
-		then
-			echo "Could not create directory to log output."
-			let status="${status} + 1"
-		elif ! [ -f "${LOGFILE}" ] && ! touch "${LOGFILE}"
-		then
-			echo "Could not create file ${LOGFILE} to log output."
-			let status="${status} + 1"
-		elif ! echo "[ ${datestr} ] ${1}" >> "${LOGFILE}"
-		then
-			echo "Could not log output to ${LOGFILE}."
-			let status="${status} + 1"
-		fi
-	fi
+    # Log to file.
+    datestr=$(date +"%Y-%m-%d %H:%M:%S (%s)")
+    dir=$(dirname "${LOGFILE}")
+    if ! [ -d "${dir}" ] && ! mkdir -p "${dir}"
+    then
+      echo "Could not create directory to log output."
+      let status="${status} + 1"
+    elif ! [ -f "${LOGFILE}" ] && ! touch "${LOGFILE}"
+    then
+      echo "Could not create file ${LOGFILE} to log output."
+      let status="${status} + 1"
+    elif ! echo "[ ${datestr} ] ${1}" >> "${LOGFILE}"
+    then
+      echo "Could not log output to ${LOGFILE}."
+      let status="${status} + 1"
+    fi
+  fi

-	return "${status}"
+  return "${status}"
 }

 # Display a message for the user.
 function displayMessage
 {
-	local status=0
+  local status=0

-	if [ "$#" -lt 1 ]
-	then
-		echo "displayMessage <message>"
-		let status="${status} + 1"
-	elif ! log "${1}" 0
-	then
-		log "Could not write message to file."
-	else
-		# Increment the message counter
-		let messagecount="${messagecount} + 1"
+  if [ "$#" -lt 1 ]
+  then
+    echo "displayMessage <message>"
+    let status="${status} + 1"
+  elif ! log "${1}" 0
+  then
+    log "Could not write message to file."
+  else
+    # Increment the message counter
+    let messagecount="${messagecount} + 1"

-		# Display successful message, if previous message
-		if [ "${messagecount}" -gt 1 ]
-		then
-			# Determine the amount of transpired time
-			let timespent="${SECONDS}-${messagetime}"
+    # Display successful message, if previous message
+    if [ "${messagecount}" -gt 1 ]
+    then
+      # Determine the amount of transpired time
+      let timespent="${SECONDS}-${messagetime}"

-			if [ "${DEBUGLEVEL}" -gt 0 ]; then
-				printf "... done in %3d seconds\n" "${timespent}"
-			fi
-		fi
+      if [ "${DEBUGLEVEL}" -gt 0 ]; then
+        printf "... done in %3d seconds\n" "${timespent}"
+      fi
+    fi

-		# Display message
-		if [ "${DEBUGLEVEL}" -gt 0 ]; then
-			printf "%-16s	  %-35s " "$(date "+%F %H-%M-%S")" "$1"
-		fi
+    # Display message
+    if [ "${DEBUGLEVEL}" -gt 0 ]; then
+      printf "%-16s    %-35s " "$(date "+%F %H-%M-%S")" "$1"
+    fi

-		# Update the variables
-		messagetime="${SECONDS}"
-	fi
+    # Update the variables
+    messagetime="${SECONDS}"
+  fi

-	return "${status}"
+  return "${status}"
 }

 # Create the directories used by the server.
 function createDirectories
 {
-	local status=0
+  local status=0

-	# Display user message
-	if ! displayMessage "Creating directories"
-	then
-		echo 'Failed to display user message'
-		let status="${status} + 1"
+  # Display user message
+  if ! displayMessage "Creating directories"
+  then
+    echo 'Failed to display user message'
+    let status="${status} + 1"

-	elif ! mkdir -p "${LOGDIR}" "${ETCDIR}"
-	then
-		log "Failed to create directories"
-		let status="${status} + 1"
+  elif ! mkdir -p "${LOGDIR}" "${ETCDIR}"
+  then
+    log "Failed to create directories"
+    let status="${status} + 1"

-	# Display user message
-	elif ! displayMessage "Setting file permissions"
-	then
-		log 'Failed to display user message'
-		let status="${status} + 1"
+  # Display user message
+  elif ! displayMessage "Setting file permissions"
+  then
+    log 'Failed to display user message'
+    let status="${status} + 1"

-	elif ! chmod 755 "${BINDIR}/fdbserver" "${BINDIR}/fdbcli"
-	then
-		log "Failed to set file permissions"
-		let status="${status} + 1"
+  elif ! chmod 755 "${BINDIR}/fdbserver" "${BINDIR}/fdbcli"
+  then
+    log "Failed to set file permissions"
+    let status="${status} + 1"

-	else
-		while read filepath
-		do
-				if [ -f "${filepath}" ] && [ ! -x "${filepath}" ]
-				then
-					# if [ "${DEBUGLEVEL}" -gt 1 ]; then
-					# 	log "   Enable executable: ${filepath}"
-					# fi
-					log "   Enable executable: ${filepath}" "${DEBUGLEVEL}"
-					if ! chmod 755 "${filepath}"
-					then
-						log "Failed to set executable for file: ${filepath}"
-						let status="${status} + 1"
-					fi
-				fi
-		done < <(find "${BINDIR}" -iname '*.py' -o -iname '*.rb' -o -iname 'fdb_flow_tester' -o -iname '_stacktester' -o -iname '*.js' -o -iname '*.sh' -o -iname '*.ksh')
-	fi
+  else
+    while read filepath
+    do
+        if [ -f "${filepath}" ] && [ ! -x "${filepath}" ]
+        then
+          # if [ "${DEBUGLEVEL}" -gt 1 ]; then
+          #   log "   Enable executable: ${filepath}"
+          # fi
+          log "   Enable executable: ${filepath}" "${DEBUGLEVEL}"
+          if ! chmod 755 "${filepath}"
+          then
+            log "Failed to set executable for file: ${filepath}"
+            let status="${status} + 1"
+          fi
+        fi
+    done < <(find "${BINDIR}" -iname '*.py' -o -iname '*.rb' -o -iname 'fdb_flow_tester' -o -iname '_stacktester' -o -iname '*.js' -o -iname '*.sh' -o -iname '*.ksh')
+  fi

-	return ${status}
+  return ${status}
 }

 # Create a cluster file for the local cluster.
 function createClusterFile
 {
-	local status=0
+  local status=0

-	if [ "${status}" -ne 0 ]; then
-		:
-	# Display user message
-	elif ! displayMessage "Creating Fdb Cluster file"
-	then
-		log 'Failed to display user message'
-		let status="${status} + 1"
-	else
-		description=$(LC_CTYPE=C tr -dc A-Za-z0-9 < /dev/urandom 2> /dev/null | head -c 8)
-		random_str=$(LC_CTYPE=C tr -dc A-Za-z0-9 < /dev/urandom 2> /dev/null | head -c 8)
-		echo "${description}:${random_str}@${CLUSTERSTRING}" > "${FDBCONF}"
-	fi
+  if [ "${status}" -ne 0 ]; then
+    :
+  # Display user message
+  elif ! displayMessage "Creating Fdb Cluster file"
+  then
+    log 'Failed to display user message'
+    let status="${status} + 1"
+  else
+    description=$(LC_CTYPE=C tr -dc A-Za-z0-9 < /dev/urandom 2> /dev/null | head -c 8)
+    random_str=$(LC_CTYPE=C tr -dc A-Za-z0-9 < /dev/urandom 2> /dev/null | head -c 8)
+    echo "${description}:${random_str}@${FDBCLUSTERTEXT}" > "${FDBCONF}"
+  fi

-	if [ "${status}" -ne 0 ]; then
-		:
-	elif ! chmod 0664 "${FDBCONF}"; then
-		log "Failed to set permissions on fdbconf: ${FDBCONF}"
-		let status="${status} + 1"
-	fi
+  if [ "${status}" -ne 0 ]; then
+    :
+  elif ! chmod 0664 "${FDBCONF}"; then
+    log "Failed to set permissions on fdbconf: ${FDBCONF}"
+    let status="${status} + 1"
+  fi

-	return ${status}
+  return ${status}
 }

 # Stop the Cluster from running.
 function stopCluster
 {
-	local status=0
+  local status=0

-	# Add an audit entry, if enabled
-	if [ "${AUDITCLUSTER}" -gt 0 ]; then
-		printf '%-15s (%6s)  Stopping cluster %-20s (%6s): %s\n' "$(date +'%Y-%m-%d %H:%M:%S')" "${$}" "${CLUSTERSTRING}" "${FDBSERVERID}" >> "${AUDITLOG}"
-	fi
-	if [ -z "${FDBSERVERID}" ]; then
-		log 'FDB Server process is not defined'
-		let status="${status} + 1"
-	elif ! kill -0 "${FDBSERVERID}"; then
-		log "Failed to locate FDB Server process (${FDBSERVERID})"
-		let status="${status} + 1"
-	elif "${BINDIR}/fdbcli" -C "${FDBCONF}" --exec "kill; kill ${CLUSTERSTRING}; sleep 3" --timeout 120 &>> "${LOGDIR}/fdbcli-kill.log"
-	then
-		# Ensure that process is dead
-		if ! kill -0 "${FDBSERVERID}" 2> /dev/null; then
-			log "Killed cluster (${FDBSERVERID}) via cli"
-		elif ! kill -9 "${FDBSERVERID}"; then
-			log "Failed to kill FDB Server process (${FDBSERVERID}) via cli or kill command"
-			let status="${status} + 1"
-		else
-			log "Forcibly killed FDB Server process (${FDBSERVERID}) since cli failed"
-		fi
-	elif ! kill -9 "${FDBSERVERID}"; then
-		log "Failed to forcibly kill FDB Server process (${FDBSERVERID})"
-		let status="${status} + 1"
-	else
-		log "Forcibly killed FDB Server process (${FDBSERVERID})"
-	fi
-	return "${status}"
+  # Add an audit entry, if enabled
+  if [ "${AUDITCLUSTER}" -gt 0 ]; then
+    printf '%-15s (%6s)  Stopping cluster %-20s (%6s): %s\n' "$(date +'%Y-%m-%d %H:%M:%S')" "${$}" "${FDBCLUSTERTEXT}" "${FDBSERVERID}" >> "${AUDITLOG}"
+  fi
+  if [ -z "${FDBSERVERID}" ]; then
+    log 'FDB Server process is not defined'
+    let status="${status} + 1"
+  elif ! kill -0 "${FDBSERVERID}"; then
+    log "Failed to locate FDB Server process (${FDBSERVERID})"
+    let status="${status} + 1"
+  elif "${BINDIR}/fdbcli" -C "${FDBCONF}" --exec "kill; kill ${FDBCLUSTERTEXT}; sleep 3" --timeout 120 &>> "${LOGDIR}/fdbcli-kill.log"
+  then
+    # Ensure that process is dead
+    if ! kill -0 "${FDBSERVERID}" 2> /dev/null; then
+      log "Killed cluster (${FDBSERVERID}) via cli"
+    elif ! kill -9 "${FDBSERVERID}"; then
+      log "Failed to kill FDB Server process (${FDBSERVERID}) via cli or kill command"
+      let status="${status} + 1"
+    else
+      log "Forcibly killed FDB Server process (${FDBSERVERID}) since cli failed"
+    fi
+  elif ! kill -9 "${FDBSERVERID}"; then
+    log "Failed to forcibly kill FDB Server process (${FDBSERVERID})"
+    let status="${status} + 1"
+  else
+    log "Forcibly killed FDB Server process (${FDBSERVERID})"
+  fi
+  return "${status}"
 }

 # Start the server running.
 function startFdbServer
 {
-	local status=0
+  local status=0

-	# Add an audit entry, if enabled
-	if [ "${AUDITCLUSTER}" -gt 0 ]; then
-		printf '%-15s (%6s)  Starting cluster %-20s\n' "$(date +'%Y-%m-%d %H:%M:%S')" "${$}" "${CLUSTERSTRING}" >> "${AUDITLOG}"
-	fi
+  # Add an audit entry, if enabled
+  if [ "${AUDITCLUSTER}" -gt 0 ]; then
+    printf '%-15s (%6s)  Starting cluster %-20s\n' "$(date +'%Y-%m-%d %H:%M:%S')" "${$}" "${FDBCLUSTERTEXT}" >> "${AUDITLOG}"
+  fi

-	if [ "${status}" -ne 0 ]; then
-		:
-	elif ! displayMessage "Starting Fdb Server"
-	then
-		log 'Failed to display user message'
-		let status="${status} + 1"
+  if ! displayMessage "Starting Fdb Server"
+  then
+    log 'Failed to display user message'
+    let status="${status} + 1"

-	else
-            "${BINDIR}/fdbserver" --knob_disable_posix_kernel_aio=1 -C "${FDBCONF}" -p "${CLUSTERSTRING}" -L "${LOGDIR}" -d "${WORKDIR}/fdb/${$}" &> "${LOGDIR}/fdbserver.log" &
-            fdbpid=$!
-            fdbrc=$?
-            if [ $fdbrc -ne 0 ]
-            then
-                log "Failed to start FDB Server"
-                let status="${status} + 1"
-            else
-                FDBSERVERID="${fdbpid}"
-            fi
-	fi
+  else
+    "${BINDIR}/fdbserver" --knob_disable_posix_kernel_aio=1 -C "${FDBCONF}" -p "${FDBCLUSTERTEXT}" -L "${LOGDIR}" -d "${WORKDIR}/fdb/${$}" &> "${LOGDIR}/fdbserver.log" &
+    if [ "${?}" -ne 0 ]
+    then
+        log "Failed to start FDB Server"
+        let status="${status} + 1"
+    else
+        FDBSERVERID="${!}"
+    fi
+  fi

-	if [ -z "${FDBSERVERID}" ]; then
-		log "FDB Server start failed because no process"
-		let status="${status} + 1"
-	elif ! kill -0 "${FDBSERVERID}" ; then
-		log "FDB Server start failed because process terminated unexpectedly"
-		let status="${status} + 1"
-	fi
+  if [ -z "${FDBSERVERID}" ]; then
+    log "FDB Server start failed because no process"
+    let status="${status} + 1"
+  elif ! kill -0 "${FDBSERVERID}" ; then
+    log "FDB Server start failed because process terminated unexpectedly"
+    let status="${status} + 1"
+  fi

-	return ${status}
+  return ${status}
 }

 function getStatus
 {
-	local status=0
+  local status=0

-	if [ "${status}" -ne 0 ]; then
-		:
-	elif ! date &>> "${LOGDIR}/fdbclient.log"
-	then
-		log 'Failed to get date'
-		let status="${status} + 1"
-	elif ! "${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'status json' --timeout 120 &>> "${LOGDIR}/fdbclient.log"
-	then
-		log 'Failed to get status from fdbcli'
-		let status="${status} + 1"
-	elif !  date &>> "${LOGDIR}/fdbclient.log"
-	then
-		log 'Failed to get date'
-		let status="${status} + 1"
-	fi
+  if [ "${status}" -ne 0 ]; then
+    :
+  elif ! date &>> "${LOGDIR}/fdbclient.log"
+  then
+    log 'Failed to get date'
+    let status="${status} + 1"
+  elif ! "${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'status json' --timeout 120 &>> "${LOGDIR}/fdbclient.log"
+  then
+    log 'Failed to get status from fdbcli'
+    let status="${status} + 1"
+  elif !  date &>> "${LOGDIR}/fdbclient.log"
+  then
+    log 'Failed to get date'
+    let status="${status} + 1"
+  fi

-	return ${status}
+  return ${status}
 }

 # Verify that the cluster is available.
 function verifyAvailable
 {
-	local status=0
+  local status=0

-	if [ -z "${FDBSERVERID}" ]; then
-		log "FDB Server process is not defined."
-		let status="${status} + 1"
-	# Verify that the server is running.
-	elif ! kill -0 "${FDBSERVERID}"
-	then
-		log "FDB server process (${FDBSERVERID}) is not running"
-		let status="${status} + 1"
-	# Display user message.
-	elif ! displayMessage "Checking cluster availability"
-	then
-		log 'Failed to display user message'
-		let status="${status} + 1"
-	# Determine if status json says the database is available.
-	else
-		avail=`"${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'status json' --timeout "${SERVERCHECKS}" 2> /dev/null | grep -E '"database_available"|"available"' | grep 'true'`
-		log "Avail value: ${avail}" "${DEBUGLEVEL}"
-		if [[ -n "${avail}" ]] ; then
-			:
-		else
-			let status="${status} + 1"
-		fi
-	fi
-	return "${status}"
+  if [ -z "${FDBSERVERID}" ]; then
+    log "FDB Server process is not defined."
+    let status="${status} + 1"
+  # Verify that the server is running.
+  elif ! kill -0 "${FDBSERVERID}"
+  then
+    log "FDB server process (${FDBSERVERID}) is not running"
+    let status="${status} + 1"
+  # Display user message.
+  elif ! displayMessage "Checking cluster availability"
+  then
+    log 'Failed to display user message'
+    let status="${status} + 1"
+  # Determine if status json says the database is available.
+  else
+    avail=`"${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'status json' --timeout "${SERVERCHECKS}" 2> /dev/null | grep -E '"database_available"|"available"' | grep 'true'`
+    log "Avail value: ${avail}" "${DEBUGLEVEL}"
+    if [[ -n "${avail}" ]] ; then
+      :
+    else
+      let status="${status} + 1"
+    fi
+  fi
+  return "${status}"
 }

 # Configure the database on the server.
 function createDatabase
 {
-	local status=0
+  local status=0

-	if [ "${status}" -ne 0 ]; then
-		:
-	# Ensure that the server is running
-	elif ! kill -0 "${FDBSERVERID}"
-	then
-		log "FDB server process: (${FDBSERVERID}) is not running"
-		let status="${status} + 1"
+  if [ "${status}" -ne 0 ]; then
+    :
+  # Ensure that the server is running
+  elif ! kill -0 "${FDBSERVERID}"
+  then
+    log "FDB server process: (${FDBSERVERID}) is not running"
+    let status="${status} + 1"

-	# Display user message
-	elif ! displayMessage "Creating database"
-	then
-		log 'Failed to display user message'
-		let status="${status} + 1"
-	elif ! echo "Client log:" &> "${LOGDIR}/fdbclient.log"
-	then
-		log 'Failed to create fdbclient.log'
-		let status="${status} + 1"
-	elif ! getStatus
-	then
-		log 'Failed to get status'
-		let status="${status} + 1"
+  # Display user message
+  elif ! displayMessage "Creating database"
+  then
+    log 'Failed to display user message'
+    let status="${status} + 1"
+  elif ! echo "Client log:" &> "${LOGDIR}/fdbclient.log"
+  then
+    log 'Failed to create fdbclient.log'
+    let status="${status} + 1"
+  elif ! getStatus
+  then
+    log 'Failed to get status'
+    let status="${status} + 1"

-	# Configure the database.
-	else
-		"${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'configure new single memory; status' --timeout "${CONFIGUREWAIT}" --log --log-dir "${LOGDIR}" &>> "${LOGDIR}/fdbclient.log"
+  # Configure the database.
+  else
+    "${BINDIR}/fdbcli" -C "${FDBCONF}" --exec 'configure new single memory; status' --timeout "${CONFIGUREWAIT}" --log --log-dir "${LOGDIR}" &>> "${LOGDIR}/fdbclient.log"

-		if ! displayMessage "Checking if config succeeded"
-		then
-			log 'Failed to display user message.'
-		fi
+    if ! displayMessage "Checking if config succeeded"
+    then
+      log 'Failed to display user message.'
+    fi

-		iteration=0
-		while [[ "${iteration}" -lt "${SERVERCHECKS}" ]] && ! verifyAvailable
-		do
-			log "Database not created (iteration ${iteration})."
-			let iteration="${iteration} + 1"
-		done
+    iteration=0
+    while [[ "${iteration}" -lt "${SERVERCHECKS}" ]] && ! verifyAvailable
+    do
+      log "Database not created (iteration ${iteration})."
+      let iteration="${iteration} + 1"
+    done

-		if ! verifyAvailable
-		then
-			log "Failed to create database via cli"
-			getStatus
-			cat "${LOGDIR}/fdbclient.log"
-			log "Ignoring -- moving on"
-			#let status="${status} + 1"
-		fi
-	fi
+    if ! verifyAvailable
+    then
+      log "Failed to create database via cli"
+      getStatus
+      cat "${LOGDIR}/fdbclient.log"
+      log "Ignoring -- moving on"
+      #let status="${status} + 1"
+    fi
+  fi

-	return ${status}
+  return ${status}
 }

 # Begin the local cluster from scratch.
 function startCluster
 {
-	local status=0
+  local status=0

-	if [ "${status}" -ne 0 ]; then
-		:
-	elif ! createDirectories
-	then
-		log "Could not create directories."
-		let status="${status} + 1"
-	elif ! createClusterFile
-	then
-		log "Could not create cluster file."
-		let status="${status} + 1"
-	elif ! startFdbServer
-	then
-		log "Could not start FDB server."
-		let status="${status} + 1"
-	elif ! createDatabase
-	then
-		log "Could not create database."
-		let status="${status} + 1"
-	fi
+  if [ "${status}" -ne 0 ]; then
+    :
+  elif ! createDirectories
+  then
+    log "Could not create directories."
+    let status="${status} + 1"
+  elif ! createClusterFile
+  then
+    log "Could not create cluster file."
+    let status="${status} + 1"
+  elif ! startFdbServer
+  then
+    log "Could not start FDB server."
+    let status="${status} + 1"
+  elif ! createDatabase
+  then
+    log "Could not create database."
+    let status="${status} + 1"
+  fi

-	return ${status}
+  return ${status}
 }
--- a/documentation/sphinx/source/api-c.rst
+++ b/documentation/sphinx/source/api-c.rst
@ -263,9 +263,9 @@ See :ref:`developer-guide-programming-with-futures` for further (language-indepe

 .. function:: fdb_error_t fdb_future_block_until_ready(FDBFuture* future)

-   Blocks the calling thread until the given Future is ready. It will return success even if the Future is set to an error -- you must call :func:`fdb_future_get_error()` to determine that. :func:`fdb_future_block_until_ready()` will return an error only in exceptional conditions (e.g. out of memory or other operating system resources).
+   Blocks the calling thread until the given Future is ready. It will return success even if the Future is set to an error -- you must call :func:`fdb_future_get_error()` to determine that. :func:`fdb_future_block_until_ready()` will return an error only in exceptional conditions (e.g. deadlock detected, out of memory or other operating system resources).

-   .. warning:: Never call this function from a callback passed to :func:`fdb_future_set_callback()`. This may block the thread on which :func:`fdb_run_network()` was invoked, resulting in a deadlock.
+   .. warning:: Never call this function from a callback passed to :func:`fdb_future_set_callback()`. This may block the thread on which :func:`fdb_run_network()` was invoked, resulting in a deadlock. In some cases the client can detect the deadlock and throw a ``blocked_from_network_thread`` error.

 .. function:: fdb_bool_t fdb_future_is_ready(FDBFuture* future)

--- a/documentation/sphinx/source/api-error-codes.rst
+++ b/documentation/sphinx/source/api-error-codes.rst
@ -114,8 +114,12 @@ FoundationDB may return the following error codes from API functions. If you nee
 +-----------------------------------------------+-----+--------------------------------------------------------------------------------+
 | transaction_read_only                         | 2023| Attempted to commit a transaction specified as read-only                       |
 +-----------------------------------------------+-----+--------------------------------------------------------------------------------+
+| invalid_cache_eviction_policy                 | 2024| Invalid cache eviction policy, only random and lru are supported               |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+
 | network_cannot_be_restarted                   | 2025| Network can only be started once                                               |
 +-----------------------------------------------+-----+--------------------------------------------------------------------------------+
+| blocked_from_network_thread                   | 2026| Detected a deadlock in a callback called from the network thread               |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+
 | incompatible_protocol_version                 | 2100| Incompatible protocol version                                                  |
 +-----------------------------------------------+-----+--------------------------------------------------------------------------------+
 | transaction_too_large                         | 2101| Transaction exceeds byte limit                                                 |
--- a/documentation/sphinx/source/downloads.rst
+++ b/documentation/sphinx/source/downloads.rst
@ -10,38 +10,38 @@ macOS

 The macOS installation package is supported on macOS 10.7+. It includes the client and (optionally) the server.

-* `FoundationDB-6.3.5.pkg <https://www.foundationdb.org/downloads/6.3.5/macOS/installers/FoundationDB-6.3.5.pkg>`_
+* `FoundationDB-6.3.8.pkg <https://www.foundationdb.org/downloads/6.3.8/macOS/installers/FoundationDB-6.3.8.pkg>`_

 Ubuntu
 ------

 The Ubuntu packages are supported on 64-bit Ubuntu 12.04+, but beware of the Linux kernel bug in Ubuntu 12.x.

-* `foundationdb-clients-6.3.5-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.5/ubuntu/installers/foundationdb-clients_6.3.5-1_amd64.deb>`_
-* `foundationdb-server-6.3.5-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.5/ubuntu/installers/foundationdb-server_6.3.5-1_amd64.deb>`_ (depends on the clients package)
+* `foundationdb-clients-6.3.8-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.8/ubuntu/installers/foundationdb-clients_6.3.8-1_amd64.deb>`_
+* `foundationdb-server-6.3.8-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.8/ubuntu/installers/foundationdb-server_6.3.8-1_amd64.deb>`_ (depends on the clients package)

 RHEL/CentOS EL6
 ---------------

 The RHEL/CentOS EL6 packages are supported on 64-bit RHEL/CentOS 6.x.

-* `foundationdb-clients-6.3.5-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.5/rhel6/installers/foundationdb-clients-6.3.5-1.el6.x86_64.rpm>`_
-* `foundationdb-server-6.3.5-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.5/rhel6/installers/foundationdb-server-6.3.5-1.el6.x86_64.rpm>`_ (depends on the clients package)
+* `foundationdb-clients-6.3.8-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.8/rhel6/installers/foundationdb-clients-6.3.8-1.el6.x86_64.rpm>`_
+* `foundationdb-server-6.3.8-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.8/rhel6/installers/foundationdb-server-6.3.8-1.el6.x86_64.rpm>`_ (depends on the clients package)

 RHEL/CentOS EL7
 ---------------

 The RHEL/CentOS EL7 packages are supported on 64-bit RHEL/CentOS 7.x.

-* `foundationdb-clients-6.3.5-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.5/rhel7/installers/foundationdb-clients-6.3.5-1.el7.x86_64.rpm>`_
-* `foundationdb-server-6.3.5-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.5/rhel7/installers/foundationdb-server-6.3.5-1.el7.x86_64.rpm>`_ (depends on the clients package)
+* `foundationdb-clients-6.3.8-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.8/rhel7/installers/foundationdb-clients-6.3.8-1.el7.x86_64.rpm>`_
+* `foundationdb-server-6.3.8-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.8/rhel7/installers/foundationdb-server-6.3.8-1.el7.x86_64.rpm>`_ (depends on the clients package)

 Windows
 -------

 The Windows installer is supported on 64-bit Windows XP and later. It includes the client and (optionally) the server.

-* `foundationdb-6.3.5-x64.msi <https://www.foundationdb.org/downloads/6.3.5/windows/installers/foundationdb-6.3.5-x64.msi>`_
+* `foundationdb-6.3.8-x64.msi <https://www.foundationdb.org/downloads/6.3.8/windows/installers/foundationdb-6.3.8-x64.msi>`_

 API Language Bindings
 =====================
@ -58,18 +58,18 @@ On macOS and Windows, the FoundationDB Python API bindings are installed as part

 If you need to use the FoundationDB Python API from other Python installations or paths, use the Python package manager ``pip`` (``pip install foundationdb``) or download the Python package:

-* `foundationdb-6.3.5.tar.gz <https://www.foundationdb.org/downloads/6.3.5/bindings/python/foundationdb-6.3.5.tar.gz>`_
+* `foundationdb-6.3.8.tar.gz <https://www.foundationdb.org/downloads/6.3.8/bindings/python/foundationdb-6.3.8.tar.gz>`_

 Ruby 1.9.3/2.0.0+
 -----------------

-* `fdb-6.3.5.gem <https://www.foundationdb.org/downloads/6.3.5/bindings/ruby/fdb-6.3.5.gem>`_
+* `fdb-6.3.8.gem <https://www.foundationdb.org/downloads/6.3.8/bindings/ruby/fdb-6.3.8.gem>`_

 Java 8+
 -------

-* `fdb-java-6.3.5.jar <https://www.foundationdb.org/downloads/6.3.5/bindings/java/fdb-java-6.3.5.jar>`_
-* `fdb-java-6.3.5-javadoc.jar <https://www.foundationdb.org/downloads/6.3.5/bindings/java/fdb-java-6.3.5-javadoc.jar>`_
+* `fdb-java-6.3.8.jar <https://www.foundationdb.org/downloads/6.3.8/bindings/java/fdb-java-6.3.8.jar>`_
+* `fdb-java-6.3.8-javadoc.jar <https://www.foundationdb.org/downloads/6.3.8/bindings/java/fdb-java-6.3.8-javadoc.jar>`_

 Go 1.11+
 --------
--- a/documentation/sphinx/source/release-notes/release-notes-620.rst
+++ b/documentation/sphinx/source/release-notes/release-notes-620.rst
@ -2,6 +2,10 @@
 Release Notes
 #############

+6.2.26
+======
+* Attempt to detect when calling :func:`fdb_future_block_until_ready` would cause a deadlock, and throw ``blocked_from_network_thread`` if it would definitely cause a deadlock.
+
 6.2.25
 ======

--- a/documentation/sphinx/source/release-notes/release-notes-630.rst
+++ b/documentation/sphinx/source/release-notes/release-notes-630.rst
@ -2,11 +2,9 @@
 Release Notes
 #############

-6.3.5
+6.3.8
 =====

-* Report missing old tlogs information when in recovery before storage servers are fully recovered. `(PR #3706) <https://github.com/apple/foundationdb/pull/3706>`_
-
 Features
 --------

@ -110,6 +108,10 @@ Other Changes
 * Updated boost to 1.72.  `(PR #2684) <https://github.com/apple/foundationdb/pull/2684>`_
 * Calling ``fdb_run_network`` multiple times in a single run of a client program now returns an error instead of causing undefined behavior. [6.3.1] `(PR #3229) <https://github.com/apple/foundationdb/pull/3229>`_
 * Blob backup URL parameter ``request_timeout`` changed to ``request_timeout_min``, with prior name still supported. `(PR #3533) <https://github.com/apple/foundationdb/pull/3533>`_
+* Support query command in backup CLI that allows users to query restorable files by key ranges. [6.3.6] `(PR #3703) <https://github.com/apple/foundationdb/pull/3703>`_
+* Report missing old tlogs information when in recovery before storage servers are fully recovered. [6.3.6] `(PR #3706) <https://github.com/apple/foundationdb/pull/3706>`_
+* Updated OpenSSL to version 1.1.1h. [6.3.7] `(PR #3809) <https://github.com/apple/foundationdb/pull/3809>`_
+* Lowered the amount of time a watch will remain registered on a storage server from 900 seconds to 30 seconds. [6.3.8] `(PR #3833) <https://github.com/apple/foundationdb/pull/3833>`_

 Fixes from previous versions
 ----------------------------
@ -126,6 +128,8 @@ Fixes only impacting 6.3.0+
 * Refreshing TLS certificates could cause crashes. [6.3.2] `(PR #3352) <https://github.com/apple/foundationdb/pull/3352>`_
 * All storage class processes attempted to connect to the same coordinator. [6.3.2] `(PR #3361) <https://github.com/apple/foundationdb/pull/3361>`_
 * Adjusted the proxy load balancing algorithm to be based on the CPU usage of the process instead of the number of requests processed. [6.3.5] `(PR #3653) <https://github.com/apple/foundationdb/pull/3653>`_
+* Only return the error code ``batch_transaction_throttled`` for API versions greater than or equal to 630. [6.3.6] `(PR #3799) <https://github.com/apple/foundationdb/pull/3799>`_
+* The fault tolerance calculation in status did not take into account region configurations. [6.3.8] `(PR #3836) <https://github.com/apple/foundationdb/pull/3836>`_

 Earlier release notes
 ---------------------
--- a/fdbbackup/backup.actor.cpp
+++ b/fdbbackup/backup.actor.cpp
@ -18,6 +18,10 @@
 * limitations under the License.
 */

+#include "fdbclient/JsonBuilder.h"
+#include "flow/Arena.h"
+#include "flow/Error.h"
+#include "flow/Trace.h"
 #define BOOST_DATE_TIME_NO_LIB
 #include <boost/interprocess/managed_shared_memory.hpp>

@ -81,7 +85,22 @@ enum enumProgramExe {
 };

 enum enumBackupType {
-	BACKUP_UNDEFINED=0, BACKUP_START, BACKUP_MODIFY, BACKUP_STATUS, BACKUP_ABORT, BACKUP_WAIT, BACKUP_DISCONTINUE, BACKUP_PAUSE, BACKUP_RESUME, BACKUP_EXPIRE, BACKUP_DELETE, BACKUP_DESCRIBE, BACKUP_LIST, BACKUP_DUMP, BACKUP_CLEANUP
+	BACKUP_UNDEFINED = 0,
+	BACKUP_START,
+	BACKUP_MODIFY,
+	BACKUP_STATUS,
+	BACKUP_ABORT,
+	BACKUP_WAIT,
+	BACKUP_DISCONTINUE,
+	BACKUP_PAUSE,
+	BACKUP_RESUME,
+	BACKUP_EXPIRE,
+	BACKUP_DELETE,
+	BACKUP_DESCRIBE,
+	BACKUP_LIST,
+	BACKUP_QUERY,
+	BACKUP_DUMP,
+	BACKUP_CLEANUP
 };

 enum enumDBType {
@ -121,6 +140,7 @@ enum {
 	OPT_TAGNAME,
 	OPT_BACKUPKEYS,
 	OPT_WAITFORDONE,
+	OPT_BACKUPKEYS_FILTER,
 	OPT_INCREMENTALONLY,

 	// Backup Modify
@ -624,6 +644,40 @@ CSimpleOpt::SOption g_rgBackupListOptions[] = {
 	SO_END_OF_OPTIONS
 };

+CSimpleOpt::SOption g_rgBackupQueryOptions[] = {
+#ifdef _WIN32
+	{ OPT_PARENTPID, "--parentpid", SO_REQ_SEP },
+#endif
+	{ OPT_RESTORE_TIMESTAMP, "--query_restore_timestamp", SO_REQ_SEP },
+	{ OPT_DESTCONTAINER, "-d", SO_REQ_SEP },
+	{ OPT_DESTCONTAINER, "--destcontainer", SO_REQ_SEP },
+	{ OPT_RESTORE_VERSION, "-qrv", SO_REQ_SEP },
+	{ OPT_RESTORE_VERSION, "--query_restore_version", SO_REQ_SEP },
+	{ OPT_BACKUPKEYS_FILTER, "-k", SO_REQ_SEP },
+	{ OPT_BACKUPKEYS_FILTER, "--keys", SO_REQ_SEP },
+	{ OPT_TRACE, "--log", SO_NONE },
+	{ OPT_TRACE_DIR, "--logdir", SO_REQ_SEP },
+	{ OPT_TRACE_FORMAT, "--trace_format", SO_REQ_SEP },
+	{ OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP },
+	{ OPT_QUIET, "-q", SO_NONE },
+	{ OPT_QUIET, "--quiet", SO_NONE },
+	{ OPT_VERSION, "-v", SO_NONE },
+	{ OPT_VERSION, "--version", SO_NONE },
+	{ OPT_CRASHONERROR, "--crash", SO_NONE },
+	{ OPT_MEMLIMIT, "-m", SO_REQ_SEP },
+	{ OPT_MEMLIMIT, "--memory", SO_REQ_SEP },
+	{ OPT_HELP, "-?", SO_NONE },
+	{ OPT_HELP, "-h", SO_NONE },
+	{ OPT_HELP, "--help", SO_NONE },
+	{ OPT_DEVHELP, "--dev-help", SO_NONE },
+	{ OPT_BLOB_CREDENTIALS, "--blob_credentials", SO_REQ_SEP },
+	{ OPT_KNOB, "--knob_", SO_REQ_SEP },
+#ifndef TLS_DISABLED
+	TLS_OPTION_FLAGS
+#endif
+	    SO_END_OF_OPTIONS
+};
+
 // g_rgRestoreOptions is used by fdbrestore and fastrestore_tool
 CSimpleOpt::SOption g_rgRestoreOptions[] = {
 #ifdef _WIN32
@ -959,13 +1013,16 @@ void printBackupContainerInfo() {

 static void printBackupUsage(bool devhelp) {
 	printf("FoundationDB " FDB_VT_PACKAGE_NAME " (v" FDB_VT_VERSION ")\n");
-	printf("Usage: %s (start | status | abort | wait | discontinue | pause | resume | expire | delete | describe | list | cleanup) [OPTIONS]\n\n", exeBackup.toString().c_str());
+	printf("Usage: %s (start | status | abort | wait | discontinue | pause | resume | expire | delete | describe | "
+	       "list | query | cleanup) [OPTIONS]\n\n",
+	       exeBackup.toString().c_str());
 	printf("  -C CONNFILE    The path of a file containing the connection string for the\n"
 		   "                 FoundationDB cluster. The default is first the value of the\n"
 		   "                 FDB_CLUSTER_FILE environment variable, then `./fdb.cluster',\n"
 		   "                 then `%s'.\n", platform::getDefaultClusterFilePath().c_str());
 	printf("  -d, --destcontainer URL\n"
-	       "                 The Backup container URL for start, modify, describe, expire, and delete operations.\n");
+	       "                 The Backup container URL for start, modify, describe, query, expire, and delete "
+	       "operations.\n");
 	printBackupContainerInfo();
 	printf("  -b, --base_url BASEURL\n"
 		   "                 Base backup URL for list operations.  This looks like a Backup URL but without a backup name.\n");
@ -979,6 +1036,12 @@ static void printBackupUsage(bool devhelp) {
 	printf("  --delete_before_days NUM_DAYS\n"
 		   "                 Another way to specify version cutoff for expire operations.  Deletes data files containing no data at or after a\n"
 		   "                 version approximately NUM_DAYS days worth of versions prior to the latest log version in the backup.\n");
+	printf("  -qrv --query_restore_version VERSION\n"
+	       "                 For query operations, set target version for restoring a backup. Set -1 for maximum\n"
+	       "                 restorable version (default) and -2 for minimum restorable version.\n");
+	printf("  --query_restore_timestamp DATETIME\n"
+	       "                 For query operations, instead of a numeric version, use this to specify a timestamp in %s\n", BackupAgentBase::timeFormat().c_str());
+	printf("                 and it will be converted to a version from that time using metadata in the cluster file.\n");
 	printf("  --restorable_after_timestamp DATETIME\n"
 		   "                 For expire operations, set minimum acceptable restorability to the version equivalent of DATETIME and later.\n");
 	printf("  --restorable_after_version VERSION\n"
@ -997,8 +1060,8 @@ static void printBackupUsage(bool devhelp) {
 	       "                 Specifies a UID to verify against the BackupUID of the running backup.  If provided, the UID is verified in the same transaction\n"
 	       "                 which sets the new backup parameters (if the UID matches).\n");
 	printf("  -e ERRORLIMIT  The maximum number of errors printed by status (default is 10).\n");
-	printf("  -k KEYS        List of key ranges to backup.\n"
-		   "                 If not specified, the entire database will be backed up.\n");
+	printf("  -k KEYS        List of key ranges to backup or to filter the backup in query operations.\n"
+	       "                 If not specified, the entire database will be backed up or no filter will be applied.\n");
 	printf("  --partitioned_log_experimental  Starts with new type of backup system using partitioned logs.\n");
 	printf("  -n, --dryrun   For backup start or restore start, performs a trial run with no actual changes made.\n");
 	printf("  --log          Enables trace file logging for the CLI session.\n"
@ -1320,6 +1383,7 @@ enumBackupType	getBackupType(std::string backupType)
 		values["delete"] = BACKUP_DELETE;
 		values["describe"] = BACKUP_DESCRIBE;
 		values["list"] = BACKUP_LIST;
+		values["query"] = BACKUP_QUERY;
 		values["dump"] = BACKUP_DUMP;
 		values["modify"] = BACKUP_MODIFY;
 	}
@ -2458,6 +2522,135 @@ ACTOR Future<Void> describeBackup(const char *name, std::string destinationConta
 	return Void();
 }

+static void reportBackupQueryError(UID operationId, JsonBuilderObject& result, std::string errorMessage) {
+	result["error"] = errorMessage;
+	printf("%s\n", result.getJson().c_str());
+	TraceEvent("BackupQueryFailure").detail("OperationId", operationId).detail("Reason", errorMessage);
+}
+
+// If restoreVersion is invalidVersion or latestVersion, use the maximum or minimum restorable version respectively for
+// selected key ranges. If restoreTimestamp is specified, any specified restoreVersion will be overriden to the version
+// resolved to that timestamp.
+ACTOR Future<Void> queryBackup(const char* name, std::string destinationContainer,
+                               Standalone<VectorRef<KeyRangeRef>> keyRangesFilter, Version restoreVersion,
+                               std::string originalClusterFile, std::string restoreTimestamp, bool verbose) {
+	state UID operationId = deterministicRandom()->randomUniqueID();
+	state JsonBuilderObject result;
+	state std::string errorMessage;
+	result["key_ranges_filter"] = printable(keyRangesFilter);
+	result["destination_container"] = destinationContainer;
+
+	TraceEvent("BackupQueryStart")
+	    .detail("OperationId", operationId)
+	    .detail("DestinationContainer", destinationContainer)
+	    .detail("KeyRangesFilter", printable(keyRangesFilter))
+	    .detail("SpecifiedRestoreVersion", restoreVersion)
+	    .detail("RestoreTimestamp", restoreTimestamp)
+	    .detail("BackupClusterFile", originalClusterFile);
+
+	// Resolve restoreTimestamp if given
+	if (!restoreTimestamp.empty()) {
+		if (originalClusterFile.empty()) {
+			reportBackupQueryError(
+			    operationId, result,
+			    format("an original cluster file must be given in order to resolve restore target timestamp '%s'",
+			           restoreTimestamp.c_str()));
+			return Void();
+		}
+
+		if (!fileExists(originalClusterFile)) {
+			reportBackupQueryError(operationId, result,
+			                       format("The specified original source database cluster file '%s' does not exist\n",
+			                              originalClusterFile.c_str()));
+			return Void();
+		}
+
+		Database origDb = Database::createDatabase(originalClusterFile, Database::API_VERSION_LATEST);
+		Version v = wait(timeKeeperVersionFromDatetime(restoreTimestamp, origDb));
+		result["restore_timestamp"] = restoreTimestamp;
+		result["restore_timestamp_resolved_version"] = v;
+		restoreVersion = v;
+	}
+
+	try {
+		state Reference<IBackupContainer> bc = openBackupContainer(name, destinationContainer);
+		if (restoreVersion == invalidVersion) {
+			BackupDescription desc = wait(bc->describeBackup());
+			if (desc.maxRestorableVersion.present()) {
+				restoreVersion = desc.maxRestorableVersion.get();
+				// Use continuous log end version for the maximum restorable version for the key ranges.
+			} else if (keyRangesFilter.size() && desc.contiguousLogEnd.present()) {
+				restoreVersion = desc.contiguousLogEnd.get();
+			} else {
+				reportBackupQueryError(
+				    operationId, result,
+				    errorMessage = format("the backup for the specified key ranges is not restorable to any version"));
+			}
+		}
+
+		if (restoreVersion < 0 && restoreVersion != latestVersion) {
+			reportBackupQueryError(operationId, result,
+			                       errorMessage =
+			                           format("the specified restorable version %ld is not valid", restoreVersion));
+			return Void();
+		}
+		Optional<RestorableFileSet> fileSet = wait(bc->getRestoreSet(restoreVersion, keyRangesFilter));
+		if (fileSet.present()) {
+			int64_t totalRangeFilesSize = 0, totalLogFilesSize = 0;
+			result["restore_version"] = fileSet.get().targetVersion;
+			JsonBuilderArray rangeFilesJson;
+			JsonBuilderArray logFilesJson;
+			for (const auto& rangeFile : fileSet.get().ranges) {
+				JsonBuilderObject object;
+				object["file_name"] = rangeFile.fileName;
+				object["file_size"] = rangeFile.fileSize;
+				object["version"] = rangeFile.version;
+				object["key_range"] = fileSet.get().keyRanges.count(rangeFile.fileName) == 0
+				                          ? "none"
+				                          : fileSet.get().keyRanges.at(rangeFile.fileName).toString();
+				rangeFilesJson.push_back(object);
+				totalRangeFilesSize += rangeFile.fileSize;
+			}
+			for (const auto& log : fileSet.get().logs) {
+				JsonBuilderObject object;
+				object["file_name"] = log.fileName;
+				object["file_size"] = log.fileSize;
+				object["begin_version"] = log.beginVersion;
+				object["end_version"] = log.endVersion;
+				logFilesJson.push_back(object);
+				totalLogFilesSize += log.fileSize;
+			}
+
+			result["total_range_files_size"] = totalRangeFilesSize;
+			result["total_log_files_size"] = totalLogFilesSize;
+
+			if (verbose) {
+				result["ranges"] = rangeFilesJson;
+				result["logs"] = logFilesJson;
+			}
+
+			TraceEvent("BackupQueryReceivedRestorableFilesSet")
+			    .detail("DestinationContainer", destinationContainer)
+			    .detail("KeyRangesFilter", printable(keyRangesFilter))
+			    .detail("ActualRestoreVersion", fileSet.get().targetVersion)
+			    .detail("NumRangeFiles", fileSet.get().ranges.size())
+			    .detail("NumLogFiles", fileSet.get().logs.size())
+			    .detail("RangeFilesBytes", totalRangeFilesSize)
+			    .detail("LogFilesBytes", totalLogFilesSize);
+		} else {
+			reportBackupQueryError(operationId, result, "no restorable files set found for specified key ranges");
+			return Void();
+		}
+
+	} catch (Error& e) {
+		reportBackupQueryError(operationId, result, e.what());
+		return Void();
+	}
+
+	printf("%s\n", result.getJson().c_str());
+	return Void();
+}
+
 ACTOR Future<Void> listBackup(std::string baseUrl) {
 	try {
 		std::vector<std::string> containers = wait(IBackupContainer::listContainers(baseUrl));
@ -2827,6 +3020,9 @@ int main(int argc, char* argv[]) {
 				case BACKUP_LIST:
 					args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupListOptions, SO_O_EXACT);
 					break;
+				case BACKUP_QUERY:
+					args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupQueryOptions, SO_O_EXACT);
+					break;
 				case BACKUP_MODIFY:
 					args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupModifyOptions, SO_O_EXACT);
 					break;
@ -2966,6 +3162,7 @@ int main(int argc, char* argv[]) {
 		std::string addPrefix;
 		std::string removePrefix;
 		Standalone<VectorRef<KeyRangeRef>> backupKeys;
+		Standalone<VectorRef<KeyRangeRef>> backupKeysFilter;
 		int maxErrors = 20;
 		Version beginVersion = invalidVersion;
 		Version restoreVersion = invalidVersion;
@ -3188,6 +3385,15 @@ int main(int argc, char* argv[]) {
 						return FDB_EXIT_ERROR;
 					}
 					break;
+				case OPT_BACKUPKEYS_FILTER:
+					try {
+						addKeyRange(args->OptionArg(), backupKeysFilter);
+					}
+					catch (Error &) {
+						printHelpTeaser(argv[0]);
+						return FDB_EXIT_ERROR;
+					}
+					break;
 				case OPT_DESTCONTAINER:
 					destinationContainer = args->OptionArg();
 					// If the url starts with '/' then prepend "file://" for backwards compatibility
@ -3727,6 +3933,12 @@ int main(int argc, char* argv[]) {
 				f = stopAfter( listBackup(baseUrl) );
 				break;

+			case BACKUP_QUERY:
+				initTraceFile();
+				f = stopAfter(queryBackup(argv[0], destinationContainer, backupKeysFilter, restoreVersion,
+				                          restoreClusterFileOrig, restoreTimestamp, !quietDisplay));
+				break;
+
 			case BACKUP_DUMP:
 				initTraceFile();
 				f = stopAfter( dumpBackupData(argv[0], destinationContainer, dumpBegin, dumpEnd) );
--- a/fdbcli/fdbcli.actor.cpp
+++ b/fdbcli/fdbcli.actor.cpp
@ -471,8 +471,8 @@ void initHelp() {
 		"All keys between BEGINKEY (inclusive) and ENDKEY (exclusive) are cleared from the database. This command will succeed even if the specified range is empty, but may fail because of conflicts." ESCAPINGK);
 	helpMap["configure"] = CommandHelp(
 	    "configure [new] "
-	    "<single|double|triple|three_data_hall|three_datacenter|ssd|memory|memory-radixtree-beta|commit_proxies=<"
-	    "COMMIT_PROXIES>|grv_proxies=<GRV_PROXIES>|logs=<LOGS>|resolvers=<RESOLVERS>>*",
+	    "<single|double|triple|three_data_hall|three_datacenter|ssd|memory|memory-radixtree-beta|proxies=<PROXIES>|"
+	    "commit_proxies=<COMMIT_PROXIES>|grv_proxies=<GRV_PROXIES>|logs=<LOGS>|resolvers=<RESOLVERS>>*",
 	    "change the database configuration",
 	    "The `new' option, if present, initializes a new database with the given configuration rather than changing "
 	    "the configuration of an existing one. When used, both a redundancy mode and a storage engine must be "
@ -480,8 +480,13 @@ void initHelp() {
 	    "of data (survive one failure).\n  triple - three copies of data (survive two failures).\n  three_data_hall - "
 	    "See the Admin Guide.\n  three_datacenter - See the Admin Guide.\n\nStorage engine:\n  ssd - B-Tree storage "
 	    "engine optimized for solid state disks.\n  memory - Durable in-memory storage engine for small "
-	    "datasets.\n\ncommit_proxies=<COMMIT_PROXIES>: Sets the desired number of commit proxies in the cluster. Must "
-	    "be at least 1, or set to -1 which restores the number of commit proxies to the default "
+	    "datasets.\n\nproxies=<PROXIES>: Sets the desired number of proxies in the cluster. The proxy role is being "
+		"deprecated and split into GRV proxy and Commit proxy, now prefer configure 'grv_proxies' and 'commit_proxies' "
+		"separately. Generally we should follow that 'commit_proxies' is three times of 'grv_proxies' and 'grv_proxies' "
+		"should be not more than 4. If 'proxies' is specified, it will be converted to 'grv_proxies' and 'commit_proxies'. "
+		"Must be at least 2 (1 GRV proxy, 1 Commit proxy), or set to -1 which restores the number of proxies to the "
+		"default value.\n\ncommit_proxies=<COMMIT_PROXIES>: Sets the desired number of commit proxies in the cluster. "
+	    "Must be at least 1, or set to -1 which restores the number of commit proxies to the default "
 	    "value.\n\ngrv_proxies=<GRV_PROXIES>: Sets the desired number of GRV proxies in the cluster. Must be at least "
 	    "1, or set to -1 which restores the number of GRV proxies to the default value.\n\nlogs=<LOGS>: Sets the "
 	    "desired number of log servers in the cluster. Must be at least 1, or set to -1 which restores the number of "
@ -1058,10 +1063,10 @@ void printStatus(StatusObjectReader statusObj, StatusClient::StatusLevel level,
 				if (statusObjConfig.has("regions")) {
 					outputString += "\n  Regions: ";
 					regions = statusObjConfig["regions"].get_array();
-					bool isPrimary = false;
-					std::vector<std::string> regionSatelliteDCs;
-					std::string regionDC;
 					for (StatusObjectReader region : regions) {
+						bool isPrimary = false;
+						std::vector<std::string> regionSatelliteDCs;
+						std::string regionDC;
 						for (StatusObjectReader dc : region["datacenters"].get_array()) {
 							if (!dc.has("satellite")) {
 								regionDC = dc["id"].get_str();
@ -1807,7 +1812,7 @@ ACTOR Future<Void> commitTransaction( Reference<ReadYourWritesTransaction> tr )
 }

 ACTOR Future<bool> configure( Database db, std::vector<StringRef> tokens, Reference<ClusterConnectionFile> ccf, LineNoise* linenoise, Future<Void> warn ) {
-	state ConfigurationResult::Type result;
+	state ConfigurationResult result;
 	state int startToken = 1;
 	state bool force = false;
 	if (tokens.size() < 2)
@ -1888,7 +1893,8 @@ ACTOR Future<bool> configure( Database db, std::vector<StringRef> tokens, Refere
 			}
 		}

-		ConfigurationResult::Type r  = wait( makeInterruptable( changeConfig( db, std::vector<StringRef>(tokens.begin()+startToken,tokens.end()), conf, force) ) );
+		ConfigurationResult r = wait(makeInterruptable(
+		    changeConfig(db, std::vector<StringRef>(tokens.begin() + startToken, tokens.end()), conf, force)));
 		result = r;
 	}

@ -2014,7 +2020,7 @@ ACTOR Future<bool> fileConfigure(Database db, std::string filePath, bool isNewDa
 			return true;
 		}
 	}
-	ConfigurationResult::Type result = wait( makeInterruptable( changeConfig(db, configString, force) ) );
+	ConfigurationResult result = wait(makeInterruptable(changeConfig(db, configString, force)));
 	// Real errors get thrown from makeInterruptable and printed by the catch block in cli(), but
 	// there are various results specific to changeConfig() that we need to report:
 	bool ret;
@ -2145,7 +2151,7 @@ ACTOR Future<bool> coordinators( Database db, std::vector<StringRef> tokens, boo
 	}
 	if(setName.size()) change = nameQuorumChange( setName.toString(), change );

-	CoordinatorsResult::Type r = wait( makeInterruptable( changeQuorum( db, change ) ) );
+	CoordinatorsResult r = wait(makeInterruptable(changeQuorum(db, change)));

 	// Real errors get thrown from makeInterruptable and printed by the catch block in cli(), but
 	// there are various results specific to changeConfig() that we need to report:
--- a/fdbclient/BackupContainer.actor.cpp
+++ b/fdbclient/BackupContainer.actor.cpp
@ -23,6 +23,7 @@
 #include "fdbclient/BackupAgent.actor.h"
 #include "fdbclient/FDBTypes.h"
 #include "fdbclient/JsonBuilder.h"
+#include "flow/Arena.h"
 #include "flow/Trace.h"
 #include "flow/UnitTest.h"
 #include "flow/Hash3.h"
@ -245,7 +246,7 @@ std::string BackupDescription::toJSON() const {
 *     file written will be after the start version of the snapshot's execution.
 *
 *   Log files are at file paths like
- *       /plogs/...log,startVersion,endVersion,UID,tagID-of-N,blocksize
+ *       /plogs/.../log,startVersion,endVersion,UID,tagID-of-N,blocksize
 *       /logs/.../log,startVersion,endVersion,UID,blockSize
 *     where ... is a multi level path which sorts lexically into version order and results in approximately 1
 *     unique folder per day containing about 5,000 files. Logs after FDB 6.3 are stored in "plogs"
@ -1403,8 +1404,15 @@ public:
 	}

 	ACTOR static Future<Optional<RestorableFileSet>> getRestoreSet_impl(Reference<BackupContainerFileSystem> bc,
-	                                                                    Version targetVersion, bool logsOnly,
-	                                                                    Version beginVersion) {
+	                                                                    Version targetVersion,
+	                                                                    VectorRef<KeyRangeRef> keyRangesFilter, bool logsOnly = false,
+	                                                                    Version beginVersion = invalidVersion) {
+		// Does not support use keyRangesFilter for logsOnly yet
+		if (logsOnly && !keyRangesFilter.empty()) {
+			TraceEvent(SevError, "BackupContainerRestoreSetUnsupportedAPI").detail("KeyRangesFilter", keyRangesFilter.size());
+			return Optional<RestorableFileSet>();
+		}
+
 		if (logsOnly) {
 			state RestorableFileSet restorableSet;
 			state std::vector<LogFile> logFiles;
@ -1416,23 +1424,55 @@ public:
 				return getRestoreSetFromLogs(logFiles, targetVersion, restorableSet);
 			}
 		}
-		// Find the most recent keyrange snapshot to end at or before targetVersion
-		state Optional<KeyspaceSnapshotFile> snapshot;
-		std::vector<KeyspaceSnapshotFile> snapshots = wait(bc->listKeyspaceSnapshots());
-		for(auto const &s : snapshots) {
-			if(s.endVersion <= targetVersion)
-				snapshot = s;
-		}

-		if(snapshot.present()) {
+		// Find the most recent keyrange snapshot through which we can restore filtered key ranges into targetVersion.
+		state std::vector<KeyspaceSnapshotFile> snapshots = wait(bc->listKeyspaceSnapshots());
+		state int i = snapshots.size() - 1;
+		for (; i >= 0; i--) {
+			// The smallest version of filtered range files >= snapshot beginVersion > targetVersion
+			if (targetVersion >= 0 && snapshots[i].beginVersion > targetVersion) {
+				continue;
+			}
+
 			state RestorableFileSet restorable;
-			restorable.snapshot = snapshot.get();
-			restorable.targetVersion = targetVersion;
+			state Version minKeyRangeVersion = MAX_VERSION;
+			state Version maxKeyRangeVersion = -1;

 			std::pair<std::vector<RangeFile>, std::map<std::string, KeyRange>> results =
-			    wait(bc->readKeyspaceSnapshot(snapshot.get()));
-			restorable.ranges = std::move(results.first);
-			restorable.keyRanges = std::move(results.second);
+			    wait(bc->readKeyspaceSnapshot(snapshots[i]));
+
+			// Old backup does not have metadata about key ranges and can not be filtered with key ranges.
+			if (keyRangesFilter.size() && results.second.empty() && !results.first.empty()) {
+				throw backup_not_filterable_with_key_ranges();
+			}
+
+			// Filter by keyRangesFilter.
+			if (keyRangesFilter.empty()) {
+				restorable.ranges = std::move(results.first);
+				restorable.keyRanges = std::move(results.second);
+				minKeyRangeVersion = snapshots[i].beginVersion;
+				maxKeyRangeVersion = snapshots[i].endVersion;
+			} else {
+				for (const auto& rangeFile : results.first) {
+					const auto& keyRange = results.second.at(rangeFile.fileName);
+					if (keyRange.intersects(keyRangesFilter)) {
+						restorable.ranges.push_back(rangeFile);
+						restorable.keyRanges[rangeFile.fileName] = keyRange;
+						minKeyRangeVersion = std::min(minKeyRangeVersion, rangeFile.version);
+						maxKeyRangeVersion = std::max(maxKeyRangeVersion, rangeFile.version);
+					}
+				}
+				// No range file matches 'keyRangesFilter'.
+				if (restorable.ranges.empty()) {
+					throw backup_not_overlapped_with_keys_filter();
+				}
+			}
+			// 'latestVersion' represents using the minimum restorable version in a snapshot.
+			restorable.targetVersion = targetVersion == latestVersion ? maxKeyRangeVersion : targetVersion;
+			// Any version < maxKeyRangeVersion is not restorable.
+			if (restorable.targetVersion < maxKeyRangeVersion) continue;
+
+			restorable.snapshot = snapshots[i];
 			// TODO: Reenable the sanity check after TooManyFiles error is resolved
 			if (false && g_network->isSimulated()) {
 				// Sanity check key ranges
@ -1446,18 +1486,21 @@ public:
 				}
 			}

-			// No logs needed if there is a complete key space snapshot at the target version.
-			if (snapshot.get().beginVersion == snapshot.get().endVersion &&
-			    snapshot.get().endVersion == targetVersion) {
+			// No logs needed if there is a complete filtered key space snapshot at the target version.
+			if (minKeyRangeVersion == maxKeyRangeVersion && maxKeyRangeVersion == restorable.targetVersion) {
 				restorable.continuousBeginVersion = restorable.continuousEndVersion = invalidVersion;
+				TraceEvent("BackupContainerGetRestorableFilesWithoutLogs")
+				    .detail("KeyRangeVersion", restorable.targetVersion)
+				    .detail("NumberOfRangeFiles", restorable.ranges.size())
+				    .detail("KeyRangesFilter", printable(keyRangesFilter));
 				return Optional<RestorableFileSet>(restorable);
 			}

 			// FIXME: check if there are tagged logs. for each tag, there is no version gap.
 			state std::vector<LogFile> logs;
 			state std::vector<LogFile> plogs;
-			wait(store(logs, bc->listLogFiles(snapshot.get().beginVersion, targetVersion, false)) &&
-			     store(plogs, bc->listLogFiles(snapshot.get().beginVersion, targetVersion, true)));
+			wait(store(logs, bc->listLogFiles(minKeyRangeVersion, restorable.targetVersion, false)) &&
+			     store(plogs, bc->listLogFiles(minKeyRangeVersion, restorable.targetVersion, true)));

 			if (plogs.size() > 0) {
 				logs.swap(plogs);
@ -1469,13 +1512,12 @@ public:

 				// Remove duplicated log files that can happen for old epochs.
 				std::vector<LogFile> filtered = filterDuplicates(logs);
-
 				restorable.logs.swap(filtered);
 				// sort by version order again for continuous analysis
 				std::sort(restorable.logs.begin(), restorable.logs.end());
-				if (isPartitionedLogsContinuous(restorable.logs, snapshot.get().beginVersion, targetVersion)) {
-					restorable.continuousBeginVersion = snapshot.get().beginVersion;
-					restorable.continuousEndVersion = targetVersion + 1; // not inclusive
+				if (isPartitionedLogsContinuous(restorable.logs, minKeyRangeVersion, restorable.targetVersion)) {
+					restorable.continuousBeginVersion = minKeyRangeVersion;
+					restorable.continuousEndVersion = restorable.targetVersion + 1; // not inclusive
 					return Optional<RestorableFileSet>(restorable);
 				}
 				return Optional<RestorableFileSet>();
@ -1483,20 +1525,19 @@ public:

 			// List logs in version order so log continuity can be analyzed
 			std::sort(logs.begin(), logs.end());
-
-			// If there are logs and the first one starts at or before the snapshot begin version then proceed
-			if(!logs.empty() && logs.front().beginVersion <= snapshot.get().beginVersion) {
+			// If there are logs and the first one starts at or before the keyrange's snapshot begin version, then
+			// it is valid restore set and proceed
+			if (!logs.empty() && logs.front().beginVersion <= minKeyRangeVersion) {
 				return getRestoreSetFromLogs(logs, targetVersion, restorable);
 			}
 		}
-
 		return Optional<RestorableFileSet>();
 	}

-	Future<Optional<RestorableFileSet>> getRestoreSet(Version targetVersion, bool logsOnly,
-	                                                  Version beginVersion) final {
-		return getRestoreSet_impl(Reference<BackupContainerFileSystem>::addRef(this), targetVersion, logsOnly,
-		                          beginVersion);
+	Future<Optional<RestorableFileSet>> getRestoreSet(Version targetVersion, VectorRef<KeyRangeRef> keyRangesFilter,
+	                                                  bool logsOnly, Version beginVersion) final {
+		return getRestoreSet_impl(Reference<BackupContainerFileSystem>::addRef(this), targetVersion, keyRangesFilter,
+		                          logsOnly, beginVersion);
 	}

 private:
--- a/fdbclient/BackupContainer.h
+++ b/fdbclient/BackupContainer.h
@ -280,10 +280,13 @@ public:

 	virtual Future<BackupFileList> dumpFileList(Version begin = 0, Version end = std::numeric_limits<Version>::max()) = 0;

-	// Get exactly the files necessary to restore to targetVersion.  Returns non-present if
-	// restore to given version is not possible.
-	virtual Future<Optional<RestorableFileSet>> getRestoreSet(Version targetVersion, bool logsOnly = false,
-	                                                          Version beginVersion = -1) = 0;
+	// Get exactly the files necessary to restore the key space filtered by the specified key ranges to targetVersion.
+	// If targetVersion is 'latestVersion', use the minimum restorable version in a snapshot.
+	// If logsOnly is set, only use log files in [beginVersion, targetVervions) in restore set.
+	// Returns non-present if restoring to the given version is not possible.
+	virtual Future<Optional<RestorableFileSet>> getRestoreSet(Version targetVersion,
+	                                                          VectorRef<KeyRangeRef> keyRangesFilter = {},
+	                                                          bool logsOnly = false, Version beginVersion = -1) = 0;

 	// Get an IBackupContainer based on a container spec string
 	static Reference<IBackupContainer> openContainer(std::string url);
--- a/fdbclient/DatabaseConfiguration.h
+++ b/fdbclient/DatabaseConfiguration.h
@ -133,15 +133,19 @@ struct DatabaseConfiguration {
 	}

 	//Killing an entire datacenter counts as killing one zone in modes that support it
-	int32_t maxZoneFailuresTolerated() const {
+	int32_t maxZoneFailuresTolerated(int fullyReplicatedRegions, bool forAvailability) const {
 		int worstSatellite = regions.size() ? std::numeric_limits<int>::max() : 0;
+		int regionsWithNonNegativePriority = 0;
 		for(auto& r : regions) {
+			if(r.priority >= 0) {
+				regionsWithNonNegativePriority++;
+			}
 			worstSatellite = std::min(worstSatellite, r.satelliteTLogReplicationFactor - r.satelliteTLogWriteAntiQuorum);
 			if(r.satelliteTLogUsableDcsFallback > 0) {
 				worstSatellite = std::min(worstSatellite, r.satelliteTLogReplicationFactorFallback - r.satelliteTLogWriteAntiQuorumFallback);
 			}
 		}
-		if(usableRegions > 1 && worstSatellite > 0) {
+		if(usableRegions > 1 && fullyReplicatedRegions > 1 && worstSatellite > 0 && (!forAvailability || regionsWithNonNegativePriority > 1)) {
 			return 1 + std::min(std::max(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, worstSatellite - 1), storageTeamSize - 1);
 		} else if(worstSatellite > 0) {
 			return std::min(tLogReplicationFactor + worstSatellite - 2 - tLogWriteAntiQuorum, storageTeamSize - 1);
--- a/fdbclient/FDBTypes.h
+++ b/fdbclient/FDBTypes.h
@ -257,6 +257,7 @@ struct Traceable<std::set<T>> : std::true_type {
 std::string printable( const StringRef& val );
 std::string printable( const std::string& val );
 std::string printable( const KeyRangeRef& range );
+std::string printable(const VectorRef<KeyRangeRef>& val);
 std::string printable( const VectorRef<StringRef>& val );
 std::string printable( const VectorRef<KeyValueRef>& val );
 std::string printable( const KeyValueRef& val );
@ -289,6 +290,14 @@ struct KeyRangeRef {
 	bool contains( const KeyRef& key ) const { return begin <= key && key < end; }
 	bool contains( const KeyRangeRef& keys ) const { return begin <= keys.begin && keys.end <= end; }
 	bool intersects( const KeyRangeRef& keys ) const { return begin < keys.end && keys.begin < end; }
+	bool intersects(const VectorRef<KeyRangeRef>& keysVec) const {
+		for (const auto& keys : keysVec) {
+			if (intersects(keys)) {
+				return true;
+			}
+		}
+		return false;
+	}
 	bool empty() const { return begin == end; }
 	bool singleKeyRange() const { return equalsKeyAfter(begin, end); }

--- a/fdbclient/FileBackupAgent.actor.cpp
+++ b/fdbclient/FileBackupAgent.actor.cpp
@ -3470,12 +3470,13 @@ namespace fileBackup {
 			if (beginVersion == invalidVersion) {
 				beginVersion = 0;
 			}
-			Optional<RestorableFileSet> restorable = wait(bc->getRestoreSet(restoreVersion, incremental, beginVersion));
-			if (!incremental) {
-				beginVersion = restorable.get().snapshot.beginVersion;
-			}
+		    Optional<RestorableFileSet> restorable =
+		        wait(bc->getRestoreSet(restoreVersion, VectorRef<KeyRangeRef>(), incremental, beginVersion));
+		    if (!incremental) {
+			    beginVersion = restorable.get().snapshot.beginVersion;
+		    }

-			if(!restorable.present())
+		    if(!restorable.present())
 				throw restore_missing_data();

 			// First version for which log data should be applied
@ -4519,7 +4520,7 @@ public:
 		}

 		Optional<RestorableFileSet> restoreSet =
-		    wait(bc->getRestoreSet(targetVersion, incrementalBackupOnly, beginVersion));
+		    wait(bc->getRestoreSet(targetVersion, VectorRef<KeyRangeRef>(), incrementalBackupOnly, beginVersion));

 		if(!restoreSet.present()) {
 			TraceEvent(SevWarn, "FileBackupAgentRestoreNotPossible")
--- a/fdbclient/Knobs.cpp
+++ b/fdbclient/Knobs.cpp
@ -104,7 +104,7 @@ void ClientKnobs::initialize(bool randomize) {
 	init( WATCH_POLLING_TIME,                      1.0 ); if( randomize && BUGGIFY ) WATCH_POLLING_TIME = 5.0;
 	init( NO_RECENT_UPDATES_DURATION,             20.0 ); if( randomize && BUGGIFY ) NO_RECENT_UPDATES_DURATION = 0.1;
 	init( FAST_WATCH_TIMEOUT,                     20.0 ); if( randomize && BUGGIFY ) FAST_WATCH_TIMEOUT = 1.0;
-	init( WATCH_TIMEOUT,                         900.0 ); if( randomize && BUGGIFY ) WATCH_TIMEOUT = 20.0;
+	init( WATCH_TIMEOUT,                          30.0 ); if( randomize && BUGGIFY ) WATCH_TIMEOUT = 20.0;

 	// Core
 	init( CORE_VERSIONSPERSECOND,		           1e6 );
--- a/fdbclient/ManagementAPI.actor.cpp
+++ b/fdbclient/ManagementAPI.actor.cpp
@ -82,11 +82,17 @@ std::map<std::string, std::string> configForToken( std::string const& mode ) {
 		std::string value = mode.substr(pos+1);

 		if (key == "proxies" && isInteger(value)) {
-			printf("\nWarning: Proxy role is being split into GRV Proxy and Commit Proxy, now prefer configuring "
-			       "\"grv_proxies\" and \"commit_proxies\" separately.\n");
+			printf("Warning: Proxy role is being split into GRV Proxy and Commit Proxy, now prefer configuring "
+			       "'grv_proxies' and 'commit_proxies' separately. Generally we should follow that 'commit_proxies'"
+			       " is three times of 'grv_proxies' count and 'grv_proxies' should be not more than 4.\n");
 			int proxiesCount = atoi(value.c_str());
+			if (proxiesCount == -1) {
+				proxiesCount = CLIENT_KNOBS->DEFAULT_AUTO_GRV_PROXIES + CLIENT_KNOBS->DEFAULT_AUTO_COMMIT_PROXIES;
+				ASSERT_WE_THINK(proxiesCount >= 2);
+			}
+
 			if (proxiesCount < 2) {
-				printf("Error: At least 2 proxies (1 GRV proxy and Commit proxy) are required.\n");
+				printf("Error: At least 2 proxies (1 GRV proxy and 1 Commit proxy) are required.\n");
 				return out;
 			}

@ -102,7 +108,8 @@ std::map<std::string, std::string> configForToken( std::string const& mode ) {
 			       grvProxyCount, commitProxyCount);

 			TraceEvent("DatabaseConfigurationProxiesSpecified")
-			    .detail("SpecifiedProxies", grvProxyCount)
+			    .detail("SpecifiedProxies", atoi(value.c_str()))
+			    .detail("EffectiveSpecifiedProxies", proxiesCount)
 			    .detail("ConvertedGrvProxies", grvProxyCount)
 			    .detail("ConvertedCommitProxies", commitProxyCount);
 		}
@ -259,7 +266,8 @@ std::map<std::string, std::string> configForToken( std::string const& mode ) {
 	return out;
 }

-ConfigurationResult::Type buildConfiguration( std::vector<StringRef> const& modeTokens, std::map<std::string, std::string>& outConf ) {
+ConfigurationResult buildConfiguration(std::vector<StringRef> const& modeTokens,
+                                       std::map<std::string, std::string>& outConf) {
 	for(auto it : modeTokens) {
 		std::string mode = it.toString();
 		auto m = configForToken( mode );
@ -295,7 +303,7 @@ ConfigurationResult::Type buildConfiguration( std::vector<StringRef> const& mode
 	return ConfigurationResult::SUCCESS;
 }

-ConfigurationResult::Type buildConfiguration( std::string const& configMode, std::map<std::string, std::string>& outConf ) {
+ConfigurationResult buildConfiguration(std::string const& configMode, std::map<std::string, std::string>& outConf) {
 	std::vector<StringRef> modes;

 	int p = 0;
@ -335,7 +343,7 @@ ACTOR Future<DatabaseConfiguration> getDatabaseConfiguration( Database cx ) {
 	}
 }

-ACTOR Future<ConfigurationResult::Type> changeConfig( Database cx, std::map<std::string, std::string> m, bool force ) {
+ACTOR Future<ConfigurationResult> changeConfig(Database cx, std::map<std::string, std::string> m, bool force) {
 	state StringRef initIdKey = LiteralStringRef( "\xff/init_id" );
 	state Transaction tr(cx);

@ -852,7 +860,7 @@ ConfigureAutoResult parseConfig( StatusObject const& status ) {
 	return result;
 }

-ACTOR Future<ConfigurationResult::Type> autoConfig( Database cx, ConfigureAutoResult conf ) {
+ACTOR Future<ConfigurationResult> autoConfig(Database cx, ConfigureAutoResult conf) {
 	state Transaction tr(cx);
 	state Key versionKey = BinaryWriter::toValue(deterministicRandom()->randomUniqueID(),Unversioned());

@ -919,7 +927,8 @@ ACTOR Future<ConfigurationResult::Type> autoConfig( Database cx, ConfigureAutoRe
 	}
 }

-Future<ConfigurationResult::Type> changeConfig( Database const& cx, std::vector<StringRef> const& modes, Optional<ConfigureAutoResult> const& conf, bool force ) {
+Future<ConfigurationResult> changeConfig(Database const& cx, std::vector<StringRef> const& modes,
+                                         Optional<ConfigureAutoResult> const& conf, bool force) {
 	if( modes.size() && modes[0] == LiteralStringRef("auto") && conf.present() ) {
 		return autoConfig(cx, conf.get());
 	}
@ -931,7 +940,7 @@ Future<ConfigurationResult::Type> changeConfig( Database const& cx, std::vector<
 	return changeConfig(cx, m, force);
 }

-Future<ConfigurationResult::Type> changeConfig( Database const& cx, std::string const& modes, bool force ) {
+Future<ConfigurationResult> changeConfig(Database const& cx, std::string const& modes, bool force) {
 	TraceEvent("ChangeConfig").detail("Mode", modes);
 	std::map<std::string,std::string> m;
 	auto r = buildConfiguration( modes, m );
@ -1000,7 +1009,7 @@ ACTOR Future<std::vector<NetworkAddress>> getCoordinators( Database cx ) {
 	}
 }

-ACTOR Future<CoordinatorsResult::Type> changeQuorum( Database cx, Reference<IQuorumChange> change ) {
+ACTOR Future<CoordinatorsResult> changeQuorum(Database cx, Reference<IQuorumChange> change) {
 	state Transaction tr(cx);
 	state int retries = 0;
 	state std::vector<NetworkAddress> desiredCoordinators;
@ -1020,7 +1029,7 @@ ACTOR Future<CoordinatorsResult::Type> changeQuorum( Database cx, Reference<IQuo
 			if ( cx->getConnectionFile() && old.clusterKeyName().toString() != cx->getConnectionFile()->getConnectionString().clusterKeyName() )
 				return CoordinatorsResult::BAD_DATABASE_STATE;  // Someone changed the "name" of the database??

-			state CoordinatorsResult::Type result = CoordinatorsResult::SUCCESS;
+			state CoordinatorsResult result = CoordinatorsResult::SUCCESS;
 			if(!desiredCoordinators.size()) {
 				std::vector<NetworkAddress> _desiredCoordinators = wait( change->getDesiredCoordinators( &tr, old.coordinators(), Reference<ClusterConnectionFile>(new ClusterConnectionFile(old)), result ) );
 				desiredCoordinators = _desiredCoordinators;
@ -1090,14 +1099,20 @@ ACTOR Future<CoordinatorsResult::Type> changeQuorum( Database cx, Reference<IQuo
 struct SpecifiedQuorumChange : IQuorumChange {
 	vector<NetworkAddress> desired;
 	explicit SpecifiedQuorumChange( vector<NetworkAddress> const& desired ) : desired(desired) {}
-	virtual Future<vector<NetworkAddress>> getDesiredCoordinators( Transaction* tr, vector<NetworkAddress> oldCoordinators, Reference<ClusterConnectionFile>, CoordinatorsResult::Type& ) {
+	virtual Future<vector<NetworkAddress>> getDesiredCoordinators(Transaction* tr,
+	                                                              vector<NetworkAddress> oldCoordinators,
+	                                                              Reference<ClusterConnectionFile>,
+	                                                              CoordinatorsResult&) {
 		return desired;
 	}
 };
 Reference<IQuorumChange> specifiedQuorumChange(vector<NetworkAddress> const& addresses) { return Reference<IQuorumChange>(new SpecifiedQuorumChange(addresses)); }

 struct NoQuorumChange : IQuorumChange {
-	virtual Future<vector<NetworkAddress>> getDesiredCoordinators( Transaction* tr, vector<NetworkAddress> oldCoordinators, Reference<ClusterConnectionFile>, CoordinatorsResult::Type& ) {
+	virtual Future<vector<NetworkAddress>> getDesiredCoordinators(Transaction* tr,
+	                                                              vector<NetworkAddress> oldCoordinators,
+	                                                              Reference<ClusterConnectionFile>,
+	                                                              CoordinatorsResult&) {
 		return oldCoordinators;
 	}
 };
@ -1107,7 +1122,10 @@ struct NameQuorumChange : IQuorumChange {
 	std::string newName;
 	Reference<IQuorumChange> otherChange;
 	explicit NameQuorumChange( std::string const& newName, Reference<IQuorumChange> const& otherChange ) : newName(newName), otherChange(otherChange) {}
-	virtual Future<vector<NetworkAddress>> getDesiredCoordinators( Transaction* tr, vector<NetworkAddress> oldCoordinators, Reference<ClusterConnectionFile> cf, CoordinatorsResult::Type& t ) {
+	virtual Future<vector<NetworkAddress>> getDesiredCoordinators(Transaction* tr,
+	                                                              vector<NetworkAddress> oldCoordinators,
+	                                                              Reference<ClusterConnectionFile> cf,
+	                                                              CoordinatorsResult& t) {
 		return otherChange->getDesiredCoordinators(tr, oldCoordinators, cf, t);
 	}
 	virtual std::string getDesiredClusterKeyName() {
@ -1122,7 +1140,10 @@ struct AutoQuorumChange : IQuorumChange {
 	int desired;
 	explicit AutoQuorumChange( int desired ) : desired(desired) {}

-	virtual Future<vector<NetworkAddress>> getDesiredCoordinators( Transaction* tr, vector<NetworkAddress> oldCoordinators, Reference<ClusterConnectionFile> ccf, CoordinatorsResult::Type& err ) {
+	virtual Future<vector<NetworkAddress>> getDesiredCoordinators(Transaction* tr,
+	                                                              vector<NetworkAddress> oldCoordinators,
+	                                                              Reference<ClusterConnectionFile> ccf,
+	                                                              CoordinatorsResult& err) {
 		return getDesired( this, tr, oldCoordinators, ccf, &err );
 	}

@ -1174,7 +1195,10 @@ struct AutoQuorumChange : IQuorumChange {
 		return true; // The status quo seems fine
 	}

-	ACTOR static Future<vector<NetworkAddress>> getDesired( AutoQuorumChange* self, Transaction* tr, vector<NetworkAddress> oldCoordinators, Reference<ClusterConnectionFile> ccf, CoordinatorsResult::Type* err ) {
+	ACTOR static Future<vector<NetworkAddress>> getDesired(AutoQuorumChange* self, Transaction* tr,
+	                                                       vector<NetworkAddress> oldCoordinators,
+	                                                       Reference<ClusterConnectionFile> ccf,
+	                                                       CoordinatorsResult* err) {
 		state int desiredCount = self->desired;

 		if(desiredCount == -1) {
--- a/fdbclient/ManagementAPI.actor.h
+++ b/fdbclient/ManagementAPI.actor.h
@ -43,41 +43,35 @@ standard API and some knowledge of the contents of the system key space.

 // ConfigurationResult enumerates normal outcomes of changeConfig() and various error
 // conditions specific to it.  changeConfig may also throw an Error to report other problems.
-class ConfigurationResult {
-public:
-	enum Type {
-		NO_OPTIONS_PROVIDED,
-		CONFLICTING_OPTIONS,
-		UNKNOWN_OPTION,
-		INCOMPLETE_CONFIGURATION,
-		INVALID_CONFIGURATION,
-		DATABASE_ALREADY_CREATED,
-		DATABASE_CREATED,
-		DATABASE_UNAVAILABLE,
-		STORAGE_IN_UNKNOWN_DCID,
-		REGION_NOT_FULLY_REPLICATED,
-		MULTIPLE_ACTIVE_REGIONS,
-		REGIONS_CHANGED,
-		NOT_ENOUGH_WORKERS,
-		REGION_REPLICATION_MISMATCH,
-		DCID_MISSING,
-		LOCKED_NOT_NEW,
-		SUCCESS,
-	};
+enum class ConfigurationResult {
+	NO_OPTIONS_PROVIDED,
+	CONFLICTING_OPTIONS,
+	UNKNOWN_OPTION,
+	INCOMPLETE_CONFIGURATION,
+	INVALID_CONFIGURATION,
+	DATABASE_ALREADY_CREATED,
+	DATABASE_CREATED,
+	DATABASE_UNAVAILABLE,
+	STORAGE_IN_UNKNOWN_DCID,
+	REGION_NOT_FULLY_REPLICATED,
+	MULTIPLE_ACTIVE_REGIONS,
+	REGIONS_CHANGED,
+	NOT_ENOUGH_WORKERS,
+	REGION_REPLICATION_MISMATCH,
+	DCID_MISSING,
+	LOCKED_NOT_NEW,
+	SUCCESS,
 };

-class CoordinatorsResult {
-public:
-	enum Type {
-		INVALID_NETWORK_ADDRESSES,
-		SAME_NETWORK_ADDRESSES,
-		NOT_COORDINATORS, //FIXME: not detected
-		DATABASE_UNREACHABLE, //FIXME: not detected
-		BAD_DATABASE_STATE,
-		COORDINATOR_UNREACHABLE,
-		NOT_ENOUGH_MACHINES,
-		SUCCESS
-	};
+enum class CoordinatorsResult {
+	INVALID_NETWORK_ADDRESSES,
+	SAME_NETWORK_ADDRESSES,
+	NOT_COORDINATORS, // FIXME: not detected
+	DATABASE_UNREACHABLE, // FIXME: not detected
+	BAD_DATABASE_STATE,
+	COORDINATOR_UNREACHABLE,
+	NOT_ENOUGH_MACHINES,
+	SUCCESS
 };

 struct ConfigureAutoResult {
@ -116,17 +110,24 @@ struct ConfigureAutoResult {
 	bool isValid() const { return processes != -1; }
 };

-ConfigurationResult::Type buildConfiguration( std::vector<StringRef> const& modeTokens, std::map<std::string, std::string>& outConf );  // Accepts a vector of configuration tokens
-ConfigurationResult::Type buildConfiguration( std::string const& modeString, std::map<std::string, std::string>& outConf );				// Accepts tokens separated by spaces in a single string
+ConfigurationResult buildConfiguration(
+    std::vector<StringRef> const& modeTokens,
+    std::map<std::string, std::string>& outConf); // Accepts a vector of configuration tokens
+ConfigurationResult buildConfiguration(
+    std::string const& modeString,
+    std::map<std::string, std::string>& outConf); // Accepts tokens separated by spaces in a single string

 bool isCompleteConfiguration( std::map<std::string, std::string> const& options );

 // All versions of changeConfig apply the given set of configuration tokens to the database, and return a ConfigurationResult (or error).
-Future<ConfigurationResult::Type> changeConfig( Database const& cx, std::string const& configMode, bool force );  // Accepts tokens separated by spaces in a single string
+Future<ConfigurationResult> changeConfig(Database const& cx, std::string const& configMode,
+                                         bool force); // Accepts tokens separated by spaces in a single string

 ConfigureAutoResult parseConfig( StatusObject const& status );
-Future<ConfigurationResult::Type> changeConfig( Database const& cx, std::vector<StringRef> const& modes, Optional<ConfigureAutoResult> const& conf, bool force );  // Accepts a vector of configuration tokens
-ACTOR Future<ConfigurationResult::Type> changeConfig(
+Future<ConfigurationResult> changeConfig(Database const& cx, std::vector<StringRef> const& modes,
+                                         Optional<ConfigureAutoResult> const& conf,
+                                         bool force); // Accepts a vector of configuration tokens
+ACTOR Future<ConfigurationResult> changeConfig(
    Database cx, std::map<std::string, std::string> m,
    bool force); // Accepts a full configuration in key/value format (from buildConfiguration)

@ -135,12 +136,15 @@ ACTOR Future<Void> waitForFullReplication(Database cx);

 struct IQuorumChange : ReferenceCounted<IQuorumChange> {
 	virtual ~IQuorumChange() {}
-	virtual Future<vector<NetworkAddress>> getDesiredCoordinators( Transaction* tr, vector<NetworkAddress> oldCoordinators, Reference<ClusterConnectionFile>, CoordinatorsResult::Type& ) = 0;
+	virtual Future<vector<NetworkAddress>> getDesiredCoordinators(Transaction* tr,
+	                                                              vector<NetworkAddress> oldCoordinators,
+	                                                              Reference<ClusterConnectionFile>,
+	                                                              CoordinatorsResult&) = 0;
 	virtual std::string getDesiredClusterKeyName() { return std::string(); }
 };

 // Change to use the given set of coordination servers
-ACTOR Future<CoordinatorsResult::Type> changeQuorum(Database cx, Reference<IQuorumChange> change);
+ACTOR Future<CoordinatorsResult> changeQuorum(Database cx, Reference<IQuorumChange> change);
 Reference<IQuorumChange> autoQuorumChange(int desired = -1);
 Reference<IQuorumChange> noQuorumChange();
 Reference<IQuorumChange> specifiedQuorumChange(vector<NetworkAddress> const&);
--- a/fdbclient/NativeAPI.actor.cpp
+++ b/fdbclient/NativeAPI.actor.cpp
@ -189,6 +189,12 @@ std::string printable( const KeyRangeRef& range ) {
 	return printable(range.begin) + " - " + printable(range.end);
 }

+std::string printable(const VectorRef<KeyRangeRef>& val) {
+	std::string s;
+	for (int i = 0; i < val.size(); i++) s = s + printable(val[i]) + " ";
+	return s;
+}
+
 int unhex( char c ) {
 	if (c >= '0' && c <= '9')
 		return c-'0';
@ -3896,12 +3902,14 @@ ACTOR Future<GetReadVersionReply> getConsistentReadVersion(SpanID parentSpan, Da
                                                           TransactionPriority priority, uint32_t flags,
                                                           TransactionTagMap<uint32_t> tags, Optional<UID> debugID) {
 	state Span span("NAPI:getConsistentReadVersion"_loc, parentSpan);
-	try {
-		++cx->transactionReadVersionBatches;
-		if( debugID.present() )
-			g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), "NativeAPI.getConsistentReadVersion.Before");
-		loop {
+
+	++cx->transactionReadVersionBatches;
+	if( debugID.present() )
+		g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), "NativeAPI.getConsistentReadVersion.Before");
+	loop {
+		try {
 			state GetReadVersionRequest req( span.context, transactionCount, priority, flags, tags, debugID );
+
 			choose {
 				when ( wait( cx->onProxiesChanged() ) ) {}
 				when ( GetReadVersionReply v = wait( basicLoadBalance( cx->getGrvProxies(flags & GetReadVersionRequest::FLAG_USE_PROVISIONAL_PROXIES), &GrvProxyInterface::getConsistentReadVersion, req, cx->taskID ) ) ) {
@ -3930,12 +3938,17 @@ ACTOR Future<GetReadVersionReply> getConsistentReadVersion(SpanID parentSpan, Da
 					return v;
 				}
 			}
+		} catch (Error& e) {
+			if (e.code() != error_code_broken_promise && e.code() != error_code_batch_transaction_throttled)
+				TraceEvent(SevError, "GetConsistentReadVersionError").error(e);
+			if(e.code() == error_code_batch_transaction_throttled && !cx->apiVersionAtLeast(630)) {
+				wait(delayJittered(5.0));
+			} else {
+				throw;
+			}
 		}
-	} catch (Error& e) {
-		if (e.code() != error_code_broken_promise && e.code() != error_code_batch_transaction_throttled)
-			TraceEvent(SevError, "GetConsistentReadVersionError").error(e);
-		throw;
 	}
+
 }

 ACTOR Future<Void> readVersionBatcher( DatabaseContext *cx, FutureStream<DatabaseContext::VersionRequest> versionStream, TransactionPriority priority, uint32_t flags ) {
--- a/fdbclient/RestoreWorkerInterface.actor.h
+++ b/fdbclient/RestoreWorkerInterface.actor.h
@ -54,6 +54,7 @@ struct RestoreSysInfo;
 struct RestoreApplierInterface;
 struct RestoreFinishRequest;
 struct RestoreSamplesRequest;
+struct RestoreUpdateRateRequest;

 // RestoreSysInfo includes information each (type of) restore roles should know.
 // At this moment, it only include appliers. We keep the name for future extension.
@ -174,6 +175,7 @@ struct RestoreApplierInterface : RestoreRoleInterface {
 	RequestStream<RestoreVersionBatchRequest> initVersionBatch;
 	RequestStream<RestoreSimpleRequest> collectRestoreRoleInterfaces;
 	RequestStream<RestoreFinishRequest> finishRestore;
+	RequestStream<RestoreUpdateRateRequest> updateRate;

 	bool operator==(RestoreWorkerInterface const& r) const { return id() == r.id(); }
 	bool operator!=(RestoreWorkerInterface const& r) const { return id() != r.id(); }
@ -193,12 +195,13 @@ struct RestoreApplierInterface : RestoreRoleInterface {
 		initVersionBatch.getEndpoint(TaskPriority::LoadBalancedEndpoint);
 		collectRestoreRoleInterfaces.getEndpoint(TaskPriority::LoadBalancedEndpoint);
 		finishRestore.getEndpoint(TaskPriority::LoadBalancedEndpoint);
+		updateRate.getEndpoint(TaskPriority::LoadBalancedEndpoint);
 	}

 	template <class Ar>
 	void serialize(Ar& ar) {
 		serializer(ar, *(RestoreRoleInterface*)this, heartbeat, sendMutationVector, applyToDB, initVersionBatch,
-		           collectRestoreRoleInterfaces, finishRestore);
+		           collectRestoreRoleInterfaces, finishRestore, updateRate);
 	}

 	std::string toString() const { return nodeID.toString(); }
@ -616,6 +619,50 @@ struct RestoreFinishRequest : TimedRequest {
 	}
 };

+struct RestoreUpdateRateReply : TimedRequest {
+	constexpr static FileIdentifier file_identifier = 13018414;
+
+	UID id;
+	double remainMB; // remaining data in MB to write to DB;
+
+	RestoreUpdateRateReply() = default;
+	explicit RestoreUpdateRateReply(UID id, double remainMB) : id(id), remainMB(remainMB) {}
+
+	std::string toString() const {
+		std::stringstream ss;
+		ss << "RestoreUpdateRateReply NodeID:" << id.toString() << " remainMB:" << remainMB;
+		return ss.str();
+	}
+
+	template <class Ar>
+	void serialize(Ar& ar) {
+		serializer(ar, id, remainMB);
+	}
+};
+
+struct RestoreUpdateRateRequest : TimedRequest {
+	constexpr static FileIdentifier file_identifier = 13018415;
+
+	int batchIndex;
+	double writeMB;
+
+	ReplyPromise<RestoreUpdateRateReply> reply;
+
+	RestoreUpdateRateRequest() = default;
+	explicit RestoreUpdateRateRequest(int batchIndex, double writeMB) : batchIndex(batchIndex), writeMB(writeMB) {}
+
+	template <class Ar>
+	void serialize(Ar& ar) {
+		serializer(ar, batchIndex, writeMB, reply);
+	}
+
+	std::string toString() const {
+		std::stringstream ss;
+		ss << "RestoreUpdateRateRequest batchIndex:" << batchIndex << " writeMB:" << writeMB;
+		return ss.str();
+	}
+};
+
 struct RestoreRequest {
 	constexpr static FileIdentifier file_identifier = 16035338;

--- a/fdbrpc/FailureMonitor.actor.cpp
+++ b/fdbrpc/FailureMonitor.actor.cpp
@ -156,7 +156,7 @@ Future<Void> SimpleFailureMonitor::onStateChanged(Endpoint const& endpoint) {
 		return endpointKnownFailed.onChange(endpoint);
 }

-FailureStatus SimpleFailureMonitor::getState(Endpoint const& endpoint) {
+FailureStatus SimpleFailureMonitor::getState(Endpoint const& endpoint) const {
 	if (failedEndpoints.count(endpoint))
 		return FailureStatus(true);
 	else {
@ -170,7 +170,7 @@ FailureStatus SimpleFailureMonitor::getState(Endpoint const& endpoint) {
 	}
 }

-FailureStatus SimpleFailureMonitor::getState(NetworkAddress const& address) {
+FailureStatus SimpleFailureMonitor::getState(NetworkAddress const& address) const {
 	auto a = addressStatus.find(address);
 	if (a == addressStatus.end())
 		return FailureStatus();
@ -178,7 +178,7 @@ FailureStatus SimpleFailureMonitor::getState(NetworkAddress const& address) {
 		return a->second;
 }

-bool SimpleFailureMonitor::onlyEndpointFailed(Endpoint const& endpoint) {
+bool SimpleFailureMonitor::onlyEndpointFailed(Endpoint const& endpoint) const {
 	if (!failedEndpoints.count(endpoint)) return false;
 	auto a = addressStatus.find(endpoint.getPrimaryAddress());
 	if (a == addressStatus.end())
@ -187,7 +187,7 @@ bool SimpleFailureMonitor::onlyEndpointFailed(Endpoint const& endpoint) {
 		return !a->second.failed;
 }

-bool SimpleFailureMonitor::permanentlyFailed(Endpoint const& endpoint) {
+bool SimpleFailureMonitor::permanentlyFailed(Endpoint const& endpoint) const {
 	return failedEndpoints.count(endpoint);
 }

--- a/fdbrpc/FailureMonitor.h
+++ b/fdbrpc/FailureMonitor.h
@ -87,10 +87,10 @@ struct FailureStatus {
 class IFailureMonitor {
 public:
 	// Returns the currently known status for the endpoint
-	virtual FailureStatus getState(Endpoint const& endpoint) = 0;
+	virtual FailureStatus getState(Endpoint const& endpoint) const = 0;

 	// Returns the currently known status for the address
-	virtual FailureStatus getState(NetworkAddress const& address) = 0;
+	virtual FailureStatus getState(NetworkAddress const& address) const = 0;

 	// Only use this function when the endpoint is known to be failed
 	virtual void endpointNotFound(Endpoint const&) = 0;
@ -102,10 +102,10 @@ public:
 	virtual Future<Void> onDisconnectOrFailure(Endpoint const& endpoint) = 0;

 	// Returns true if the endpoint is failed but the address of the endpoint is not failed.
-	virtual bool onlyEndpointFailed(Endpoint const& endpoint) = 0;
+	virtual bool onlyEndpointFailed(Endpoint const& endpoint) const = 0;

 	// Returns true if the endpoint will never become available.
-	virtual bool permanentlyFailed(Endpoint const& endpoint) = 0;
+	virtual bool permanentlyFailed(Endpoint const& endpoint) const = 0;

 	// Called by FlowTransport when a connection closes and a prior request or reply might be lost
 	virtual void notifyDisconnect(NetworkAddress const&) = 0;
@ -140,14 +140,14 @@ public:
 	SimpleFailureMonitor();
 	void setStatus(NetworkAddress const& address, FailureStatus const& status);
 	void endpointNotFound(Endpoint const&);
-	virtual void notifyDisconnect(NetworkAddress const&);
+	void notifyDisconnect(NetworkAddress const&) override;

-	virtual Future<Void> onStateChanged(Endpoint const& endpoint);
-	virtual FailureStatus getState(Endpoint const& endpoint);
-	virtual FailureStatus getState(NetworkAddress const& address);
-	virtual Future<Void> onDisconnectOrFailure(Endpoint const& endpoint);
-	virtual bool onlyEndpointFailed(Endpoint const& endpoint);
-	virtual bool permanentlyFailed(Endpoint const& endpoint);
+	Future<Void> onStateChanged(Endpoint const& endpoint) override;
+	FailureStatus getState(Endpoint const& endpoint) const override;
+	FailureStatus getState(NetworkAddress const& address) const override;
+	Future<Void> onDisconnectOrFailure(Endpoint const& endpoint) override;
+	bool onlyEndpointFailed(Endpoint const& endpoint) const override;
+	bool permanentlyFailed(Endpoint const& endpoint) const override;

 	void reset();

--- a/fdbrpc/Locality.h
+++ b/fdbrpc/Locality.h
@ -78,6 +78,11 @@ public:
 		else if (s=="transaction") _class = TransactionClass;
 		else if (s=="resolution") _class = ResolutionClass;
 		else if (s=="commit_proxy") _class = CommitProxyClass;
+		else if (s=="proxy") {
+			_class = CommitProxyClass;
+			printf("WARNING: 'proxy' machine class is deprecated and will be automatically converted "
+					"'commit_proxy' machine class. Please use 'grv_proxy' or 'commit_proxy' specifically\n");
+		}
 		else if (s=="grv_proxy") _class = GrvProxyClass;
 		else if (s=="master") _class = MasterClass;
 		else if (s=="test") _class = TesterClass;
@ -100,6 +105,11 @@ public:
 		else if (classStr=="transaction") _class = TransactionClass;
 		else if (classStr=="resolution") _class = ResolutionClass;
 		else if (classStr=="commit_proxy") _class = CommitProxyClass;
+		else if (classStr=="proxy") {
+			_class = CommitProxyClass;
+			printf("WARNING: 'proxy' machine class is deprecated and will be automatically converted "
+					"'commit_proxy' machine class. Please use 'grv_proxy' or 'commit_proxy' specifically\n");
+		}
 		else if (classStr=="grv_proxy") _class = GrvProxyClass;
 		else if (classStr=="master") _class = MasterClass;
 		else if (classStr=="test") _class = TesterClass;
--- a/fdbserver/IKeyValueStore.h
+++ b/fdbserver/IKeyValueStore.h
@ -63,6 +63,8 @@ public:

 	virtual void enableSnapshot() {}

+	virtual bool canPipelineCommits() const = 0;
+
 	/*
 	Concurrency contract
 		Causal consistency:
--- a/fdbserver/KeyValueStoreCompressTestData.actor.cpp
+++ b/fdbserver/KeyValueStoreCompressTestData.actor.cpp
@ -35,6 +35,7 @@ struct KeyValueStoreCompressTestData : IKeyValueStore {

 	KeyValueStoreCompressTestData(IKeyValueStore* store) : store(store) {}

+	virtual bool canPipelineCommits() const override {return false;}
 	virtual Future<Void> getError() override { return store->getError(); }
 	virtual Future<Void> onClosed() override { return store->onClosed(); }
 	virtual void dispose() override {
--- a/fdbserver/KeyValueStoreMemory.actor.cpp
+++ b/fdbserver/KeyValueStoreMemory.actor.cpp
@ -63,6 +63,8 @@ public:
 	// IKeyValueStore
 	virtual KeyValueStoreType getType() const override { return type; }

+	virtual bool canPipelineCommits() const override { return false; }
+
 	virtual std::tuple<size_t, size_t, size_t> getSize() const override { return data.size(); }

 	int64_t getAvailableSize() const {
--- a/fdbserver/KeyValueStoreRocksDB.actor.cpp
+++ b/fdbserver/KeyValueStoreRocksDB.actor.cpp
@ -287,6 +287,8 @@ struct RocksDBKeyValueStore : IKeyValueStore {
 		return errorPromise.getFuture();
 	}

+	bool canPipelineCommits() const override { return true; }
+
 	ACTOR static void doClose(RocksDBKeyValueStore* self, bool deleteOnClose) {
 		wait(self->readThreads->stop());
 		auto a = new Writer::CloseAction(self->path, deleteOnClose);
--- a/fdbserver/KeyValueStoreSQLite.actor.cpp
+++ b/fdbserver/KeyValueStoreSQLite.actor.cpp
@ -1453,6 +1453,7 @@ public:

 	virtual KeyValueStoreType getType() const override { return type; }
 	virtual StorageBytes getStorageBytes() const override;
+	virtual bool canPipelineCommits() const override { return false; }

 	virtual void set(KeyValueRef keyValue, const Arena* arena = nullptr) override;
 	virtual void clear(KeyRangeRef range, const Arena* arena = nullptr) override;
--- a/fdbserver/Knobs.cpp
+++ b/fdbserver/Knobs.cpp
@ -46,7 +46,6 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
 	init( RECOVERY_TLOG_SMART_QUORUM_DELAY,                     0.25 ); if( randomize && BUGGIFY ) RECOVERY_TLOG_SMART_QUORUM_DELAY = 0.0; // smaller might be better for bug amplification
 	init( TLOG_STORAGE_MIN_UPDATE_INTERVAL,                      0.5 );
 	init( BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL,               30 );
-	init( UNFLUSHED_DATA_RATIO,                                 0.05 ); if( randomize && BUGGIFY ) UNFLUSHED_DATA_RATIO = 0.0;
 	init( DESIRED_TOTAL_BYTES,                                150000 ); if( randomize && BUGGIFY ) DESIRED_TOTAL_BYTES = 10000;
 	init( DESIRED_UPDATE_BYTES,                2*DESIRED_TOTAL_BYTES );
 	init( UPDATE_DELAY,                                        0.001 );
@ -549,6 +548,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
 	init( FETCH_KEYS_LOWER_PRIORITY,                               0 );
 	init( BUGGIFY_BLOCK_BYTES,                                 10000 );
 	init( STORAGE_COMMIT_BYTES,                             10000000 ); if( randomize && BUGGIFY ) STORAGE_COMMIT_BYTES = 2000000;
+	init( STORAGE_COMMIT_PIPELINE_BYTES_PER_YIELD,            100000 );
 	init( STORAGE_DURABILITY_LAG_REJECT_THRESHOLD,              0.25 );
 	init( STORAGE_DURABILITY_LAG_MIN_RATE,                       0.1 );
 	init( STORAGE_COMMIT_INTERVAL,                               0.5 ); if( randomize && BUGGIFY ) STORAGE_COMMIT_INTERVAL = 2.0;
@ -623,7 +623,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
 	init( FASTRESTORE_NUM_LOADERS,                                 3 ); if( randomize && BUGGIFY ) { FASTRESTORE_NUM_LOADERS = deterministicRandom()->random01() * 10 + 1; }
 	init( FASTRESTORE_NUM_APPLIERS,                                3 ); if( randomize && BUGGIFY ) { FASTRESTORE_NUM_APPLIERS = deterministicRandom()->random01() * 10 + 1; }
 	init( FASTRESTORE_TXN_BATCH_MAX_BYTES,           1024.0 * 1024.0 ); if( randomize && BUGGIFY ) { FASTRESTORE_TXN_BATCH_MAX_BYTES = deterministicRandom()->random01() * 1024.0 * 1024.0 + 1.0; }
-	init( FASTRESTORE_VERSIONBATCH_MAX_BYTES, 10.0 * 1024.0 * 1024.0 ); if( randomize && BUGGIFY ) { FASTRESTORE_VERSIONBATCH_MAX_BYTES = deterministicRandom()->random01() < 0.2 ? 5 * 1024 : deterministicRandom()->random01() < 0.4 ? 100 * 1024 * 1024 : deterministicRandom()->random01() * 1000.0 * 1024.0 * 1024.0; } // too small value may increase chance of TooManyFile error
+	init( FASTRESTORE_VERSIONBATCH_MAX_BYTES, 10.0 * 1024.0 * 1024.0 ); if( randomize && BUGGIFY ) { FASTRESTORE_VERSIONBATCH_MAX_BYTES = deterministicRandom()->random01() < 0.2 ? 10 * 1024 : deterministicRandom()->random01() < 0.4 ? 100 * 1024 * 1024 : deterministicRandom()->random01() * 1000.0 * 1024.0 * 1024.0; } // too small value may increase chance of TooManyFile error
 	init( FASTRESTORE_VB_PARALLELISM,                              5 ); if( randomize && BUGGIFY ) { FASTRESTORE_VB_PARALLELISM = deterministicRandom()->random01() < 0.2 ? 2 : deterministicRandom()->random01() * 10 + 1; }
 	init( FASTRESTORE_VB_MONITOR_DELAY,                           30 ); if( randomize && BUGGIFY ) { FASTRESTORE_VB_MONITOR_DELAY = deterministicRandom()->random01() * 20 + 1; }
 	init( FASTRESTORE_VB_LAUNCH_DELAY,                           1.0 ); if( randomize && BUGGIFY ) { FASTRESTORE_VB_LAUNCH_DELAY = deterministicRandom()->random01() < 0.2 ? 0.1 : deterministicRandom()->random01() * 10.0 + 1; }
@ -646,7 +646,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
 	init( FASTRESTORE_REQBATCH_LOG,                            false ); if( randomize && BUGGIFY ) { FASTRESTORE_REQBATCH_LOG = deterministicRandom()->random01() < 0.2 ? true : false; }
 	init( FASTRESTORE_TXN_CLEAR_MAX,                             100 ); if( randomize && BUGGIFY ) { FASTRESTORE_TXN_CLEAR_MAX = deterministicRandom()->random01() * 100 + 1; }
 	init( FASTRESTORE_TXN_RETRY_MAX,                              10 ); if( randomize && BUGGIFY ) { FASTRESTORE_TXN_RETRY_MAX = deterministicRandom()->random01() * 100 + 1; }
-	init( FASTRESTORE_TXN_EXTRA_DELAY,                           0.1 ); if( randomize && BUGGIFY ) { FASTRESTORE_TXN_EXTRA_DELAY = deterministicRandom()->random01() * 1 + 0.001;}
+	init( FASTRESTORE_TXN_EXTRA_DELAY,                           0.0 ); if( randomize && BUGGIFY ) { FASTRESTORE_TXN_EXTRA_DELAY = deterministicRandom()->random01() * 1 + 0.001;}
 	init( FASTRESTORE_NOT_WRITE_DB,                            false ); // Perf test only: set it to true will cause simulation failure
 	init( FASTRESTORE_USE_RANGE_FILE,                           true ); // Perf test only: set it to false will cause simulation failure
 	init( FASTRESTORE_USE_LOG_FILE,                             true ); // Perf test only: set it to false will cause simulation failure
@ -661,7 +661,8 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
 	init( FASTRESTORE_SCHED_SEND_FUTURE_VB_REQS_BATCH,             2 ); if( randomize && BUGGIFY ) { FASTRESTORE_SCHED_SEND_FUTURE_VB_REQS_BATCH = deterministicRandom()->random01() < 0.2 ? 1 : deterministicRandom()->random01() * 15 + 1;}
 	init( FASTRESTORE_NUM_TRACE_EVENTS,                          100 ); if( randomize && BUGGIFY ) { FASTRESTORE_NUM_TRACE_EVENTS = deterministicRandom()->random01() < 0.2 ? 1 : deterministicRandom()->random01() * 500 + 1;}
 	init( FASTRESTORE_EXPENSIVE_VALIDATION,                    false ); if( randomize && BUGGIFY ) { FASTRESTORE_EXPENSIVE_VALIDATION = deterministicRandom()->random01() < 0.5 ? true : false;}
-
+	init( FASTRESTORE_WRITE_BW_MB,                                70 ); if( randomize && BUGGIFY ) { FASTRESTORE_WRITE_BW_MB = deterministicRandom()->random01() < 0.5 ? 2 : 100;}
+	init( FASTRESTORE_RATE_UPDATE_SECONDS,                       1.0 ); if( randomize && BUGGIFY ) { FASTRESTORE_RATE_UPDATE_SECONDS = deterministicRandom()->random01() < 0.5 ? 0.1 : 2;}

 	init( REDWOOD_DEFAULT_PAGE_SIZE,                            4096 );
 	init( REDWOOD_KVSTORE_CONCURRENT_READS,                       64 );
--- a/fdbserver/Knobs.h
+++ b/fdbserver/Knobs.h
@ -45,7 +45,6 @@ public:
 	double RECOVERY_TLOG_SMART_QUORUM_DELAY;		// smaller might be better for bug amplification
 	double TLOG_STORAGE_MIN_UPDATE_INTERVAL;
 	double BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL;
-	double UNFLUSHED_DATA_RATIO;
 	int DESIRED_TOTAL_BYTES;
 	int DESIRED_UPDATE_BYTES;
 	double UPDATE_DELAY;
@ -482,6 +481,7 @@ public:
 	double STORAGE_DURABILITY_LAG_MIN_RATE;
 	int STORAGE_COMMIT_BYTES;
 	double STORAGE_COMMIT_INTERVAL;
+	int STORAGE_COMMIT_PIPELINE_BYTES_PER_YIELD;
 	double UPDATE_SHARD_VERSION_INTERVAL;
 	int BYTE_SAMPLING_FACTOR;
 	int BYTE_SAMPLING_OVERHEAD;
@ -547,6 +547,7 @@ public:
 	int64_t TIME_KEEPER_MAX_ENTRIES;

 	// Fast Restore
+	// TODO: After 6.3, review FR knobs, remove unneeded ones and change default value
 	int64_t FASTRESTORE_FAILURE_TIMEOUT;
 	int64_t FASTRESTORE_HEARTBEAT_INTERVAL;
 	double FASTRESTORE_SAMPLING_PERCENT;
@ -594,6 +595,8 @@ public:
 	int FASTRESTORE_SCHED_SEND_FUTURE_VB_REQS_BATCH; // number of future VB sendLoadingParam requests to process at once
 	int FASTRESTORE_NUM_TRACE_EVENTS;
 	bool FASTRESTORE_EXPENSIVE_VALIDATION; // when set true, performance will be heavily affected
+	double FASTRESTORE_WRITE_BW_MB; // target aggregated write bandwidth from all appliers
+	double FASTRESTORE_RATE_UPDATE_SECONDS; // how long to update appliers target write rate

 	int REDWOOD_DEFAULT_PAGE_SIZE;  // Page size for new Redwood files
 	int REDWOOD_KVSTORE_CONCURRENT_READS;  // Max number of simultaneous point or range reads in progress.
--- a/fdbserver/RestoreApplier.actor.cpp
+++ b/fdbserver/RestoreApplier.actor.cpp
@ -40,6 +40,7 @@ ACTOR static Future<Void> handleSendMutationVectorRequest(RestoreSendVersionedMu
                                                          Reference<RestoreApplierData> self);
 ACTOR static Future<Void> handleApplyToDBRequest(RestoreVersionBatchRequest req, Reference<RestoreApplierData> self,
                                                 Database cx);
+void handleUpdateRateRequest(RestoreUpdateRateRequest req, Reference<RestoreApplierData> self);

 ACTOR Future<Void> restoreApplierCore(RestoreApplierInterface applierInterf, int nodeIndex, Database cx) {
 	state Reference<RestoreApplierData> self =
@ -71,6 +72,10 @@ ACTOR Future<Void> restoreApplierCore(RestoreApplierInterface applierInterf, int
 					    req, self, cx)); // TODO: Check how FDB uses TaskPriority for ACTORS. We may need to add
 					                     // priority here to avoid requests at later VB block requests at earlier VBs
 				}
+				when(RestoreUpdateRateRequest req = waitNext(applierInterf.updateRate.getFuture())) {
+					requestTypeStr = "updateRate";
+					handleUpdateRateRequest(req, self);
+				}
 				when(RestoreVersionBatchRequest req = waitNext(applierInterf.initVersionBatch.getFuture())) {
 					requestTypeStr = "initVersionBatch";
 					actors.add(handleInitVersionBatchRequest(req, self));
@ -218,6 +223,7 @@ ACTOR static Future<Void> applyClearRangeMutations(Standalone<VectorRef<KeyRange

 	loop {
 		try {
+			// TODO: Consider clearrange traffic in write traffic control
 			tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
 			tr->setOption(FDBTransactionOptions::LOCK_AWARE);
 			for (auto& range : ranges) {
@ -463,31 +469,55 @@ ACTOR static Future<Void> precomputeMutationsResult(Reference<ApplierBatchData>
 	return Void();
 }

+bool okToReleaseTxns(double targetMB, double applyingDataBytes) {
+	return applyingDataBytes < targetMB * 1024 * 1024;
+}
+
+ACTOR static Future<Void> shouldReleaseTransaction(double* targetMB, double* applyingDataBytes,
+                                                   AsyncTrigger* releaseTxns) {
+	loop {
+		if (okToReleaseTxns(*targetMB, *applyingDataBytes)) {
+			break;
+		} else {
+			wait(releaseTxns->onTrigger());
+			wait(delay(0.0)); // Avoid all waiting txns are triggered at the same time and all decide to proceed before
+			                  // applyingDataBytes has a chance to update
+		}
+	}
+	return Void();
+}
+
 // Apply mutations in batchData->stagingKeys [begin, end).
 ACTOR static Future<Void> applyStagingKeysBatch(std::map<Key, StagingKey>::iterator begin,
-                                                std::map<Key, StagingKey>::iterator end, Database cx,
-                                                FlowLock* applyStagingKeysBatchLock, UID applierID,
-                                                ApplierBatchData::Counters* cc) {
+                                                std::map<Key, StagingKey>::iterator end, Database cx, UID applierID,
+                                                ApplierBatchData::Counters* cc, double* appliedBytes,
+                                                double* applyingDataBytes, double* targetMB,
+                                                AsyncTrigger* releaseTxnTrigger) {
 	if (SERVER_KNOBS->FASTRESTORE_NOT_WRITE_DB) {
 		TraceEvent("FastRestoreApplierPhaseApplyStagingKeysBatchSkipped", applierID).detail("Begin", begin->first);
 		ASSERT(!g_network->isSimulated());
 		return Void();
 	}
-	wait(applyStagingKeysBatchLock->take(TaskPriority::RestoreApplierWriteDB)); // Q: Do we really need the lock?
-	state FlowLock::Releaser releaser(*applyStagingKeysBatchLock);
+	wait(shouldReleaseTransaction(targetMB, applyingDataBytes, releaseTxnTrigger));
+
 	state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
 	state int sets = 0;
 	state int clears = 0;
 	state Key endKey = begin->first;
+	state double txnSize = 0;
+	state double txnSizeUsed = 0; // txn size accounted in applyingDataBytes
 	TraceEvent(SevFRDebugInfo, "FastRestoreApplierPhaseApplyStagingKeysBatch", applierID).detail("Begin", begin->first);
 	loop {
 		try {
+			txnSize = 0;
+			txnSizeUsed = 0;
 			tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
 			tr->setOption(FDBTransactionOptions::LOCK_AWARE);
 			std::map<Key, StagingKey>::iterator iter = begin;
 			while (iter != end) {
 				if (iter->second.type == MutationRef::SetValue) {
 					tr->set(iter->second.key, iter->second.val);
+					txnSize += iter->second.totalSize();
 					cc->appliedMutations += 1;
 					TraceEvent(SevFRMutationInfo, "FastRestoreApplierPhaseApplyStagingKeysBatch", applierID)
 					    .detail("SetKey", iter->second.key);
@ -501,6 +531,7 @@ ACTOR static Future<Void> applyStagingKeysBatch(std::map<Key, StagingKey>::itera
 						    .detail("SubVersion", iter->second.version.sub);
 					}
 					tr->clear(singleKeyRange(iter->second.key));
+					txnSize += iter->second.totalSize();
 					cc->appliedMutations += 1;
 					TraceEvent(SevFRMutationInfo, "FastRestoreApplierPhaseApplyStagingKeysBatch", applierID)
 					    .detail("ClearKey", iter->second.key);
@ -523,12 +554,21 @@ ACTOR static Future<Void> applyStagingKeysBatch(std::map<Key, StagingKey>::itera
 			    .detail("Sets", sets)
 			    .detail("Clears", clears);
 			tr->addWriteConflictRange(KeyRangeRef(begin->first, keyAfter(endKey))); // Reduce resolver load
+			txnSizeUsed = txnSize;
+			*applyingDataBytes += txnSizeUsed; // Must account for applying bytes before wait for write traffic control
 			wait(tr->commit());
 			cc->appliedTxns += 1;
+			cc->appliedBytes += txnSize;
+			*appliedBytes += txnSize;
+			*applyingDataBytes -= txnSizeUsed;
+			if (okToReleaseTxns(*targetMB, *applyingDataBytes)) {
+				releaseTxnTrigger->trigger();
+			}
 			break;
 		} catch (Error& e) {
 			cc->appliedTxnRetries += 1;
 			wait(tr->onError(e));
+			*applyingDataBytes -= txnSizeUsed;
 		}
 	}
 	return Void();
@ -545,13 +585,14 @@ ACTOR static Future<Void> applyStagingKeys(Reference<ApplierBatchData> batchData
 	TraceEvent("FastRestoreApplerPhaseApplyStagingKeysStart", applierID)
 	    .detail("BatchIndex", batchIndex)
 	    .detail("StagingKeys", batchData->stagingKeys.size());
+	batchData->totalBytesToWrite = 0;
 	while (cur != batchData->stagingKeys.end()) {
-		txnSize += cur->second.expectedMutationSize();
+		txnSize += cur->second.totalSize(); // should be consistent with receivedBytes accounting method
 		if (txnSize > SERVER_KNOBS->FASTRESTORE_TXN_BATCH_MAX_BYTES) {
-			fBatches.push_back(applyStagingKeysBatch(begin, cur, cx, &batchData->applyStagingKeysBatchLock, applierID,
-			                                         &batchData->counters));
-			batchData->counters.appliedBytes += txnSize;
-			batchData->appliedBytes += txnSize;
+			fBatches.push_back(applyStagingKeysBatch(begin, cur, cx, applierID, &batchData->counters,
+			                                         &batchData->appliedBytes, &batchData->applyingDataBytes,
+			                                         &batchData->targetWriteRateMB, &batchData->releaseTxnTrigger));
+			batchData->totalBytesToWrite += txnSize;
 			begin = cur;
 			txnSize = 0;
 			txnBatches++;
@ -559,10 +600,10 @@ ACTOR static Future<Void> applyStagingKeys(Reference<ApplierBatchData> batchData
 		cur++;
 	}
 	if (begin != batchData->stagingKeys.end()) {
-		fBatches.push_back(applyStagingKeysBatch(begin, cur, cx, &batchData->applyStagingKeysBatchLock, applierID,
-		                                         &batchData->counters));
-		batchData->counters.appliedBytes += txnSize;
-		batchData->appliedBytes += txnSize;
+		fBatches.push_back(applyStagingKeysBatch(begin, cur, cx, applierID, &batchData->counters,
+		                                         &batchData->appliedBytes, &batchData->applyingDataBytes,
+		                                         &batchData->targetWriteRateMB, &batchData->releaseTxnTrigger));
+		batchData->totalBytesToWrite += txnSize;
 		txnBatches++;
 	}

@ -571,18 +612,19 @@ ACTOR static Future<Void> applyStagingKeys(Reference<ApplierBatchData> batchData
 	TraceEvent("FastRestoreApplerPhaseApplyStagingKeysDone", applierID)
 	    .detail("BatchIndex", batchIndex)
 	    .detail("StagingKeys", batchData->stagingKeys.size())
-	    .detail("TransactionBatches", txnBatches);
+	    .detail("TransactionBatches", txnBatches)
+	    .detail("TotalBytesToWrite", batchData->totalBytesToWrite);
 	return Void();
 }

 // Write mutations to the destination DB
 ACTOR Future<Void> writeMutationsToDB(UID applierID, int64_t batchIndex, Reference<ApplierBatchData> batchData,
                                      Database cx) {
-	TraceEvent("FastRestoreApplerPhaseApplyTxnStart", applierID).detail("BatchIndex", batchIndex);
+	TraceEvent("FastRestoreApplierPhaseApplyTxnStart", applierID).detail("BatchIndex", batchIndex);
 	wait(precomputeMutationsResult(batchData, applierID, batchIndex, cx));

 	wait(applyStagingKeys(batchData, applierID, batchIndex, cx));
-	TraceEvent("FastRestoreApplerPhaseApplyTxnDone", applierID)
+	TraceEvent("FastRestoreApplierPhaseApplyTxnDone", applierID)
 	    .detail("BatchIndex", batchIndex)
 	    .detail("AppliedBytes", batchData->appliedBytes)
 	    .detail("ReceivedBytes", batchData->receivedBytes);
@ -590,6 +632,55 @@ ACTOR Future<Void> writeMutationsToDB(UID applierID, int64_t batchIndex, Referen
 	return Void();
 }

+void handleUpdateRateRequest(RestoreUpdateRateRequest req, Reference<RestoreApplierData> self) {
+	TraceEvent ev("FastRestoreApplierUpdateRateRequest", self->id());
+	ev.suppressFor(10)
+	    .detail("BatchIndex", req.batchIndex)
+	    .detail("FinishedBatch", self->finishedBatch.get())
+	    .detail("WriteMB", req.writeMB);
+	double remainingDataMB = 0;
+	if (self->finishedBatch.get() == req.batchIndex - 1) { // current applying batch
+		Reference<ApplierBatchData> batchData = self->batch[req.batchIndex];
+		ASSERT(batchData.isValid());
+		batchData->targetWriteRateMB = req.writeMB;
+		remainingDataMB = batchData->totalBytesToWrite > 0
+		                      ? std::max(0.0, batchData->totalBytesToWrite - batchData->appliedBytes) / 1024 / 1024
+		                      : batchData->receivedBytes / 1024 / 1024;
+		ev.detail("TotalBytesToWrite", batchData->totalBytesToWrite)
+		    .detail("AppliedBytes", batchData->appliedBytes)
+		    .detail("ReceivedBytes", batchData->receivedBytes)
+		    .detail("TargetWriteRateMB", batchData->targetWriteRateMB)
+		    .detail("RemainingDataMB", remainingDataMB);
+	}
+	req.reply.send(RestoreUpdateRateReply(self->id(), remainingDataMB));
+
+	return;
+}
+
+ACTOR static Future<Void> traceRate(const char* context, Reference<ApplierBatchData> batchData, int batchIndex,
+                                    UID nodeID, NotifiedVersion* finishedVB, bool once = false) {
+	loop {
+		if ((finishedVB->get() != batchIndex - 1) || !batchData.isValid()) {
+			break;
+		}
+		TraceEvent(context, nodeID)
+		    .suppressFor(10)
+		    .detail("BatchIndex", batchIndex)
+		    .detail("FinishedBatchIndex", finishedVB->get())
+		    .detail("TotalDataToWriteMB", batchData->totalBytesToWrite / 1024 / 1024)
+		    .detail("AppliedBytesMB", batchData->appliedBytes / 1024 / 1024)
+		    .detail("TargetBytesMB", batchData->targetWriteRateMB)
+		    .detail("InflightBytesMB", batchData->applyingDataBytes)
+		    .detail("ReceivedBytes", batchData->receivedBytes);
+		if (once) {
+			break;
+		}
+		wait(delay(5.0));
+	}
+
+	return Void();
+}
+
 ACTOR static Future<Void> handleApplyToDBRequest(RestoreVersionBatchRequest req, Reference<RestoreApplierData> self,
                                                 Database cx) {
 	TraceEvent("FastRestoreApplierPhaseHandleApplyToDBStart", self->id())
@ -601,9 +692,9 @@ ACTOR static Future<Void> handleApplyToDBRequest(RestoreVersionBatchRequest req,
 	wait(self->finishedBatch.whenAtLeast(req.batchIndex - 1));

 	state bool isDuplicated = true;
-	if (self->finishedBatch.get() ==
-	    req.batchIndex - 1) { // duplicate request from earlier version batch will be ignored
-		Reference<ApplierBatchData> batchData = self->batch[req.batchIndex];
+	if (self->finishedBatch.get() == req.batchIndex - 1) {
+		// duplicate request from earlier version batch will be ignored
+		state Reference<ApplierBatchData> batchData = self->batch[req.batchIndex];
 		ASSERT(batchData.isValid());
 		TraceEvent("FastRestoreApplierPhaseHandleApplyToDBRunning", self->id())
 		    .detail("BatchIndex", req.batchIndex)
@ -618,6 +709,8 @@ ACTOR static Future<Void> handleApplyToDBRequest(RestoreVersionBatchRequest req,
 			batchData->dbApplier = Never();
 			batchData->dbApplier = writeMutationsToDB(self->id(), req.batchIndex, batchData, cx);
 			batchData->vbState = ApplierVersionBatchState::WRITE_TO_DB;
+			batchData->rateTracer = traceRate("FastRestoreApplierTransactionRateControl", batchData, req.batchIndex,
+			                                  self->id(), &self->finishedBatch);
 		}

 		ASSERT(batchData->dbApplier.present());
@ -626,9 +719,12 @@ ACTOR static Future<Void> handleApplyToDBRequest(RestoreVersionBatchRequest req,

 		wait(batchData->dbApplier.get());

-		// Multiple actor invokation can wait on req.batchIndex-1;
+		// Multiple actors can wait on req.batchIndex-1;
 		// Avoid setting finishedBatch when finishedBatch > req.batchIndex
 		if (self->finishedBatch.get() == req.batchIndex - 1) {
+			batchData->rateTracer =
+			    traceRate("FastRestoreApplierTransactionRateControlDone", batchData, req.batchIndex, self->id(),
+			              &self->finishedBatch, true /*print once*/); // Track the last rate info
 			self->finishedBatch.set(req.batchIndex);
 			// self->batch[req.batchIndex]->vbState = ApplierVersionBatchState::DONE;
 			// Free memory for the version batch
--- a/fdbserver/RestoreApplier.actor.h
+++ b/fdbserver/RestoreApplier.actor.h
@ -199,7 +199,7 @@ struct StagingKey {
 		return pendingMutations.empty() || version >= pendingMutations.rbegin()->first;
 	}

-	int expectedMutationSize() { return key.size() + val.size(); }
+	int totalSize() { return MutationRef::OVERHEAD_BYTES + key.size() + val.size(); }
 };

 // The range mutation received on applier.
@ -244,7 +244,6 @@ struct ApplierBatchData : public ReferenceCounted<ApplierBatchData> {
 	VersionedMutationsMap kvOps; // Mutations at each version
 	std::map<Key, StagingKey> stagingKeys;
 	std::set<StagingKeyRange> stagingKeyRanges;
-	FlowLock applyStagingKeysBatchLock;

 	Future<Void> pollMetrics;

@ -253,8 +252,13 @@ struct ApplierBatchData : public ReferenceCounted<ApplierBatchData> {
 	long receiveMutationReqs;

 	// Stats
-	long receivedBytes;
-	long appliedBytes;
+	double receivedBytes; // received mutation size
+	double appliedBytes; // after coalesce, how many bytes to write to DB
+	double targetWriteRateMB; // target amount of data outstanding for DB;
+	double totalBytesToWrite; // total amount of data in bytes to write
+	double applyingDataBytes; // amount of data in flight of committing
+	AsyncTrigger releaseTxnTrigger; // trigger to release more txns
+	Future<Void> rateTracer; // trace transaction rate control info

 	// Status counters
 	struct Counters {
@ -280,14 +284,18 @@ struct ApplierBatchData : public ReferenceCounted<ApplierBatchData> {
 	void delref() { return ReferenceCounted<ApplierBatchData>::delref(); }

 	explicit ApplierBatchData(UID nodeID, int batchIndex)
-	  : counters(this, nodeID, batchIndex), applyStagingKeysBatchLock(SERVER_KNOBS->FASTRESTORE_APPLYING_PARALLELISM),
-	    vbState(ApplierVersionBatchState::NOT_INIT), receiveMutationReqs(0), receivedBytes(0), appliedBytes(0) {
+	  : counters(this, nodeID, batchIndex),
+	    targetWriteRateMB(SERVER_KNOBS->FASTRESTORE_WRITE_BW_MB / SERVER_KNOBS->FASTRESTORE_NUM_APPLIERS),
+	    totalBytesToWrite(-1), applyingDataBytes(0), vbState(ApplierVersionBatchState::NOT_INIT),
+	    receiveMutationReqs(0), receivedBytes(0), appliedBytes(0) {
 		pollMetrics = traceCounters(format("FastRestoreApplierMetrics%d", batchIndex), nodeID,
 		                            SERVER_KNOBS->FASTRESTORE_ROLE_LOGGING_DELAY, &counters.cc,
 		                            nodeID.toString() + "/RestoreApplierMetrics/" + std::to_string(batchIndex));
 		TraceEvent("FastRestoreApplierMetricsCreated").detail("Node", nodeID);
 	}
-	~ApplierBatchData() = default;
+	~ApplierBatchData() {
+		rateTracer = Void(); // cancel actor
+	}

 	void addMutation(MutationRef m, LogMessageVersion ver) {
 		if (!isRangeMutation(m)) {
--- a/fdbserver/RestoreController.actor.cpp
+++ b/fdbserver/RestoreController.actor.cpp
@ -105,24 +105,23 @@ ACTOR Future<Void> sampleBackups(Reference<RestoreControllerData> self, RestoreC
 }

 ACTOR Future<Void> startRestoreController(Reference<RestoreWorkerData> controllerWorker, Database cx) {
-	state ActorCollection actors(false);
-
 	ASSERT(controllerWorker.isValid());
 	ASSERT(controllerWorker->controllerInterf.present());
 	state Reference<RestoreControllerData> self =
 	    Reference<RestoreControllerData>(new RestoreControllerData(controllerWorker->controllerInterf.get().id()));
+	state Future<Void> error = actorCollection(self->addActor.getFuture());

 	try {
 		// recruitRestoreRoles must come after controllerWorker has finished collectWorkerInterface
 		wait(recruitRestoreRoles(controllerWorker, self));

-		actors.add(updateHeartbeatTime(self));
-		actors.add(checkRolesLiveness(self));
-		actors.add(updateProcessMetrics(self));
-		actors.add(traceProcessMetrics(self, "RestoreController"));
-		actors.add(sampleBackups(self, controllerWorker->controllerInterf.get()));
+		// self->addActor.send(updateHeartbeatTime(self));
+		self->addActor.send(checkRolesLiveness(self));
+		self->addActor.send(updateProcessMetrics(self));
+		self->addActor.send(traceProcessMetrics(self, "RestoreController"));
+		self->addActor.send(sampleBackups(self, controllerWorker->controllerInterf.get()));

-		wait(startProcessRestoreRequests(self, cx));
+		wait(startProcessRestoreRequests(self, cx) || error);
 	} catch (Error& e) {
 		if (e.code() != error_code_operation_cancelled) {
 			TraceEvent(SevError, "FastRestoreControllerStart").detail("Reason", "Unexpected unhandled error").error(e);
@ -304,7 +303,6 @@ ACTOR static Future<Version> processRestoreRequest(Reference<RestoreControllerDa
 	state std::vector<RestoreFileFR> logFiles;
 	state std::vector<RestoreFileFR> allFiles;
 	state Version minRangeVersion = MAX_VERSION;
-	state Future<Void> error = actorCollection(self->addActor.getFuture());

 	self->initBackupContainer(request.url);

@ -383,7 +381,7 @@ ACTOR static Future<Version> processRestoreRequest(Reference<RestoreControllerDa
 	}

 	try {
-		wait(waitForAll(fBatches) || error);
+		wait(waitForAll(fBatches));
 	} catch (Error& e) {
 		TraceEvent(SevError, "FastRestoreControllerDispatchVersionBatchesUnexpectedError").error(e);
 	}
@ -748,7 +746,9 @@ ACTOR static Future<Version> collectBackupFiles(Reference<IBackupContainer> bc,
 		std::cout << "Restore to version: " << request.targetVersion << "\nBackupDesc: \n" << desc.toString() << "\n\n";
 	}

-	Optional<RestorableFileSet> restorable = wait(bc->getRestoreSet(request.targetVersion));
+	state VectorRef<KeyRangeRef> restoreRanges;
+	restoreRanges.add(request.range);
+	Optional<RestorableFileSet> restorable = wait(bc->getRestoreSet(request.targetVersion, restoreRanges));

 	if (!restorable.present()) {
 		TraceEvent(SevWarn, "FastRestoreControllerPhaseCollectBackupFiles")
@ -908,6 +908,49 @@ ACTOR static Future<Void> initializeVersionBatch(std::map<UID, RestoreApplierInt
 	return Void();
 }

+// Calculate the amount of data each applier should keep outstanding to DB;
+// This is the amount of data that are in in-progress transactions.
+ACTOR static Future<Void> updateApplierWriteBW(Reference<ControllerBatchData> batchData,
+                                               std::map<UID, RestoreApplierInterface> appliersInterf, int batchIndex) {
+	state std::unordered_map<UID, double> applierRemainMB;
+	state double totalRemainMB = SERVER_KNOBS->FASTRESTORE_WRITE_BW_MB;
+	state double standardAvgBW = SERVER_KNOBS->FASTRESTORE_WRITE_BW_MB / SERVER_KNOBS->FASTRESTORE_NUM_APPLIERS;
+	state int loopCount = 0;
+	state std::vector<RestoreUpdateRateReply> replies;
+	state std::vector<std::pair<UID, RestoreUpdateRateRequest>> requests;
+	for (auto& applier : appliersInterf) {
+		applierRemainMB[applier.first] = SERVER_KNOBS->FASTRESTORE_WRITE_BW_MB / SERVER_KNOBS->FASTRESTORE_NUM_APPLIERS;
+	}
+
+	loop {
+		requests.clear();
+		for (auto& applier : appliersInterf) {
+			double writeRate = totalRemainMB > 1 ? (applierRemainMB[applier.first] / totalRemainMB) *
+			                                           SERVER_KNOBS->FASTRESTORE_WRITE_BW_MB
+			                                     : standardAvgBW;
+			requests.emplace_back(applier.first, RestoreUpdateRateRequest(batchIndex, writeRate));
+		}
+		replies.clear();
+		wait(getBatchReplies(
+		    &RestoreApplierInterface::updateRate, appliersInterf, requests, &replies,
+		    TaskPriority::DefaultEndpoint)); // DefaultEndpoint has higher priority than fast restore endpoints
+		ASSERT(replies.size() == requests.size());
+		totalRemainMB = 0;
+		for (int i = 0; i < replies.size(); i++) {
+			UID& applierID = requests[i].first;
+			applierRemainMB[applierID] = replies[i].remainMB;
+			totalRemainMB += replies[i].remainMB;
+		}
+		ASSERT(totalRemainMB >= 0);
+		double delayTime = SERVER_KNOBS->FASTRESTORE_RATE_UPDATE_SECONDS;
+		if (loopCount == 0) { // First loop: Need to update writeRate quicker
+			delayTime = 0.2;
+		}
+		loopCount++;
+		wait(delay(delayTime));
+	}
+}
+
 // Ask each applier to apply its received mutations to DB
 // NOTE: Controller cannot start applying mutations at batchIndex until all appliers have applied for (batchIndex - 1)
 //       because appliers at different batchIndex may have overlapped key ranges.
@ -921,6 +964,8 @@ ACTOR static Future<Void> notifyApplierToApplyMutations(Reference<ControllerBatc

 	wait(finishedBatch->whenAtLeast(batchIndex - 1));

+	state Future<Void> updateRate;
+
 	if (finishedBatch->get() == batchIndex - 1) {
 		// Prepare the applyToDB requests
 		std::vector<std::pair<UID, RestoreVersionBatchRequest>> requests;
@ -940,6 +985,7 @@ ACTOR static Future<Void> notifyApplierToApplyMutations(Reference<ControllerBatc
 			batchData->applyToDB = Never();
 			batchData->applyToDB = getBatchReplies(&RestoreApplierInterface::applyToDB, appliersInterf, requests,
 			                                       &replies, TaskPriority::RestoreApplierWriteDB);
+			updateRate = updateApplierWriteBW(batchData, appliersInterf, batchIndex);
 		} else {
 			TraceEvent(SevError, "FastRestoreControllerPhaseApplyToDB")
 			    .detail("BatchIndex", batchIndex)
@ -1051,6 +1097,7 @@ ACTOR static Future<Void> signalRestoreCompleted(Reference<RestoreControllerData
 }

 // Update the most recent time when controller receives hearbeat from each loader and applier
+// TODO: Replace the heartbeat mechanism with FDB failure monitoring mechanism
 ACTOR static Future<Void> updateHeartbeatTime(Reference<RestoreControllerData> self) {
 	wait(self->recruitedRoles.getFuture());

@ -1086,10 +1133,18 @@ ACTOR static Future<Void> updateHeartbeatTime(Reference<RestoreControllerData> s
 		}

 		fTimeout = delay(SERVER_KNOBS->FASTRESTORE_HEARTBEAT_DELAY);
-		wait(waitForAll(fReplies) || fTimeout);
+
+		// Here we have to handle error, otherwise controller worker will fail and exit.
+		try {
+			wait(waitForAll(fReplies) || fTimeout);
+		} catch (Error& e) {
+			// This should be an ignorable error.
+			TraceEvent(g_network->isSimulated() ? SevWarnAlways : SevError, "FastRestoreUpdateHeartbeatError").error(e);
+		}
+
 		// Update the most recent heart beat time for each role
 		for (int i = 0; i < fReplies.size(); ++i) {
-			if (fReplies[i].isReady()) {
+			if (!fReplies[i].isError() && fReplies[i].isReady()) {
 				double currentTime = now();
 				auto item = self->rolesHeartBeatTime.emplace(nodes[i], currentTime);
 				item.first->second = currentTime;
--- a/fdbserver/RestoreController.actor.h
+++ b/fdbserver/RestoreController.actor.h
@ -177,7 +177,8 @@ struct RestoreControllerData : RestoreRoleData, public ReferenceCounted<RestoreC
 		versionBatches.clear();
 		batch.clear();
 		batchStatus.clear();
-		finishedBatch = NotifiedVersion();
+		finishedBatch = NotifiedVersion(0);
+		versionBatchId = NotifiedVersion(0);
 		ASSERT(runningVersionBatches.get() == 0);
 	}

--- a/fdbserver/SimulatedCluster.actor.cpp
+++ b/fdbserver/SimulatedCluster.actor.cpp
@ -715,7 +715,7 @@ void SimulationConfig::set_config(std::string config) {
 	// The only mechanism we have for turning "single" into what single means
 	// is buildConfiguration()... :/
 	std::map<std::string, std::string> hack_map;
-	ASSERT( buildConfiguration(config, hack_map) );
+	ASSERT(buildConfiguration(config, hack_map) != ConfigurationResult::NO_OPTIONS_PROVIDED);
 	for(auto kv : hack_map) db.set( kv.first, kv.second );
 }

--- a/fdbserver/Status.actor.cpp
+++ b/fdbserver/Status.actor.cpp
@ -1700,25 +1700,28 @@ static int getExtraTLogEligibleZones(const vector<WorkerDetails>& workers, const
 	if(configuration.regions.size() == 0) {
 		return allZones.size() - std::max(configuration.tLogReplicationFactor, configuration.storageTeamSize);
 	}
-	int extraTlogEligibleZones = configuration.usableRegions == 1 ? 0 : std::numeric_limits<int>::max();
+	int extraTlogEligibleZones = 0;
+	int regionsWithNonNegativePriority = 0;
 	for(auto& region : configuration.regions) {
-		int eligible = dcId_zone[region.dcId].size() - std::max(configuration.remoteTLogReplicationFactor, std::max(configuration.tLogReplicationFactor, configuration.storageTeamSize) );
-		//FIXME: does not take into account fallback satellite policies
-		if(region.satelliteTLogReplicationFactor > 0 && configuration.usableRegions > 1) {
-			int totalSatelliteEligible = 0;
-			for(auto& sat : region.satellites) {
-				totalSatelliteEligible += dcId_zone[sat.dcId].size();
+		if( region.priority >= 0 ) {
+			int eligible = dcId_zone[region.dcId].size() - std::max(configuration.remoteTLogReplicationFactor, std::max(configuration.tLogReplicationFactor, configuration.storageTeamSize) );
+			//FIXME: does not take into account fallback satellite policies
+			if(region.satelliteTLogReplicationFactor > 0 && configuration.usableRegions > 1) {
+				int totalSatelliteEligible = 0;
+				for(auto& sat : region.satellites) {
+					totalSatelliteEligible += dcId_zone[sat.dcId].size();
+				}
+				eligible = std::min<int>( eligible, totalSatelliteEligible - region.satelliteTLogReplicationFactor );
 			}
-			eligible = std::min<int>( eligible, totalSatelliteEligible - region.satelliteTLogReplicationFactor );
-		}
-		if( configuration.usableRegions == 1 ) {
-			if( region.priority >= 0 ) {
-				extraTlogEligibleZones = std::max( extraTlogEligibleZones, eligible );
+			if(eligible >= 0) {
+				regionsWithNonNegativePriority++;
 			}
-		} else {
-			extraTlogEligibleZones = std::min( extraTlogEligibleZones, eligible );
+			extraTlogEligibleZones = std::max( extraTlogEligibleZones, eligible );
 		}
 	}
+	if(regionsWithNonNegativePriority > 1) {
+		extraTlogEligibleZones++;
+	}
 	return extraTlogEligibleZones;
 }

@ -2020,7 +2023,8 @@ static JsonBuilderObject tlogFetcher(int* logFaultTolerance, const std::vector<T
 	    log_replication_factor, log_write_anti_quorum, log_fault_tolerance, remote_log_replication_factor,
 	    remote_log_fault_tolerance;

-	int maxFaultTolerance = 0;
+	int minFaultTolerance = 1000;
+	int localSetsWithNonNegativeFaultTolerance = 0;

 	for (int i = 0; i < tLogs.size(); i++) {
 		int failedLogs = 0;
@ -2037,9 +2041,15 @@ static JsonBuilderObject tlogFetcher(int* logFaultTolerance, const std::vector<T
 				failedLogs++;
 			}
 		}
-		// The log generation's fault tolerance is the maximum tlog fault tolerance of each region.
-		maxFaultTolerance =
-		    std::max(maxFaultTolerance, tLogs[i].tLogReplicationFactor - 1 - tLogs[i].tLogWriteAntiQuorum - failedLogs);
+
+		if (tLogs[i].isLocal) {
+			int currentFaultTolerance = tLogs[i].tLogReplicationFactor - 1 - tLogs[i].tLogWriteAntiQuorum - failedLogs;
+			if(currentFaultTolerance >= 0) {
+				localSetsWithNonNegativeFaultTolerance++;
+			}
+			minFaultTolerance = std::min(minFaultTolerance, currentFaultTolerance);
+		}
+
 		if (tLogs[i].isLocal && tLogs[i].locality == tagLocalitySatellite) {
 			sat_log_replication_factor = tLogs[i].tLogReplicationFactor;
 			sat_log_write_anti_quorum = tLogs[i].tLogWriteAntiQuorum;
@ -2053,11 +2063,18 @@ static JsonBuilderObject tlogFetcher(int* logFaultTolerance, const std::vector<T
 			remote_log_fault_tolerance = tLogs[i].tLogReplicationFactor - 1 - failedLogs;
 		}
 	}
-	*logFaultTolerance = std::min(*logFaultTolerance, maxFaultTolerance);
+	if(minFaultTolerance == 1000) {
+		//just in case we do not have any tlog sets
+		minFaultTolerance = 0;
+	}
+	if(localSetsWithNonNegativeFaultTolerance > 1) {
+		minFaultTolerance++;
+	}
+	*logFaultTolerance = std::min(*logFaultTolerance, minFaultTolerance);
 	statusObj["log_interfaces"] = logsObj;
 	// We may lose logs in this log generation, storage servers may never be able to catch up this log
 	// generation.
-	statusObj["possibly_losing_data"] = maxFaultTolerance < 0;
+	statusObj["possibly_losing_data"] = minFaultTolerance < 0;

 	if (sat_log_replication_factor.present())
 		statusObj["satellite_log_replication_factor"] = sat_log_replication_factor.get();
@ -2102,12 +2119,13 @@ static JsonBuilderArray tlogFetcher(int* logFaultTolerance, Reference<AsyncVar<S
 static JsonBuilderObject faultToleranceStatusFetcher(DatabaseConfiguration configuration,
                                                     ServerCoordinators coordinators,
                                                     std::vector<WorkerDetails>& workers, int extraTlogEligibleZones,
-                                                     int minReplicasRemaining, int oldLogFaultTolerance,
+                                                     int minReplicasRemaining, int oldLogFaultTolerance, 
+													 int fullyReplicatedRegions,
                                                     bool underMaintenance) {
 	JsonBuilderObject statusObj;

 	// without losing data
-	int32_t maxZoneFailures = configuration.maxZoneFailuresTolerated();
+	int32_t maxZoneFailures = configuration.maxZoneFailuresTolerated(fullyReplicatedRegions, false);
 	if(underMaintenance) {
 		maxZoneFailures--;
 	}
@ -2145,8 +2163,14 @@ static JsonBuilderObject faultToleranceStatusFetcher(DatabaseConfiguration confi
 	// oldLogFaultTolerance means max failures we can tolerate to lose logs data. -1 means we lose data or availability.
 	zoneFailuresWithoutLosingData = std::max(std::min(zoneFailuresWithoutLosingData, oldLogFaultTolerance), -1);
 	statusObj["max_zone_failures_without_losing_data"] = zoneFailuresWithoutLosingData;
+
+	int32_t maxAvaiabilityZoneFailures = configuration.maxZoneFailuresTolerated(fullyReplicatedRegions, true);
+	if(underMaintenance) {
+		maxAvaiabilityZoneFailures--;
+	}
+
 	statusObj["max_zone_failures_without_losing_availability"] =
-	    std::max(std::min(extraTlogEligibleZones, zoneFailuresWithoutLosingData), -1);
+	    std::max(std::min(maxAvaiabilityZoneFailures,std::min(extraTlogEligibleZones, zoneFailuresWithoutLosingData)), -1);
 	return statusObj;
 }

@ -2323,7 +2347,7 @@ ACTOR Future<JsonBuilderObject> lockedStatusFetcher(Reference<AsyncVar<ServerDBI
 	return statusObj;
 }

-ACTOR Future<Optional<Value>> getActivePrimaryDC(Database cx, JsonBuilderArray* messages) {
+ACTOR Future<Optional<Value>> getActivePrimaryDC(Database cx, int* fullyReplicatedRegions, JsonBuilderArray* messages) {
 	state ReadYourWritesTransaction tr(cx);

 	state Future<Void> readTimeout = delay(5); // so that we won't loop forever
@ -2334,12 +2358,17 @@ ACTOR Future<Optional<Value>> getActivePrimaryDC(Database cx, JsonBuilderArray*
 			}
 			tr.setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
 			tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
-			Optional<Value> res = wait(timeoutError(tr.get(primaryDatacenterKey), 5));
-			if (!res.present()) {
+			state Future<Standalone<RangeResultRef>> fReplicaKeys = tr.getRange(datacenterReplicasKeys, CLIENT_KNOBS->TOO_MANY);
+			state Future<Optional<Value>> fPrimaryDatacenterKey = tr.get(primaryDatacenterKey);
+			wait(timeoutError(success(fPrimaryDatacenterKey) && success(fReplicaKeys), 5));
+			
+			*fullyReplicatedRegions = fReplicaKeys.get().size();
+
+			if (!fPrimaryDatacenterKey.get().present()) {
 				messages->push_back(
 				    JsonString::makeMessage("primary_dc_missing", "Unable to determine primary datacenter."));
 			}
-			return res;
+			return fPrimaryDatacenterKey.get();
 		} catch (Error& e) {
 			if (e.code() == error_code_timed_out) {
 				messages->push_back(
@ -2533,7 +2562,8 @@ ACTOR Future<StatusReply> clusterGetStatus(
 			state Future<ErrorOr<vector<std::pair<GrvProxyInterface, EventMap>>>> grvProxyFuture = errorOr(getGrvProxiesAndMetrics(db, address_workers));

 			state int minReplicasRemaining = -1;
-			state Future<Optional<Value>> primaryDCFO = getActivePrimaryDC(cx, &messages);
+			state int fullyReplicatedRegions = -1;
+			state Future<Optional<Value>> primaryDCFO = getActivePrimaryDC(cx, &fullyReplicatedRegions, &messages);
 			std::vector<Future<JsonBuilderObject>> futures2;
 			futures2.push_back(dataStatusFetcher(ddWorker, configuration.get(), &minReplicasRemaining));
 			futures2.push_back(workloadStatusFetcher(db, workers, mWorker, rkWorker, &qos, &data_overlay, &status_incomplete_reasons, storageServerFuture));
@ -2541,6 +2571,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
 			futures2.push_back(lockedStatusFetcher(db, &messages, &status_incomplete_reasons));
 			futures2.push_back(clusterSummaryStatisticsFetcher(pMetrics, storageServerFuture, tLogFuture, &status_incomplete_reasons));
 			state std::vector<JsonBuilderObject> workerStatuses = wait(getAll(futures2));
+			wait(success(primaryDCFO));

 			int logFaultTolerance = 100;
 			if (db->get().recoveryState >= RecoveryState::ACCEPTING_COMMITS) {
@ -2551,13 +2582,12 @@ ACTOR Future<StatusReply> clusterGetStatus(
 				int extraTlogEligibleZones = getExtraTLogEligibleZones(workers, configuration.get());
 				statusObj["fault_tolerance"] = faultToleranceStatusFetcher(
 				    configuration.get(), coordinators, workers, extraTlogEligibleZones, minReplicasRemaining,
-				    logFaultTolerance, loadResult.present() && loadResult.get().healthyZone.present());
+				    logFaultTolerance, fullyReplicatedRegions, loadResult.present() && loadResult.get().healthyZone.present());
 			}

 			state JsonBuilderObject configObj =
 			    configurationFetcher(configuration, coordinators, &status_incomplete_reasons);

-			wait(success(primaryDCFO));
 			if (primaryDCFO.get().present()) {
 				statusObj["active_primary_dc"] = primaryDCFO.get().get();
 			}
--- a/fdbserver/VersionedBTree.actor.cpp
+++ b/fdbserver/VersionedBTree.actor.cpp
@ -5738,6 +5738,8 @@ public:

 	KeyValueStoreType getType() const override { return KeyValueStoreType::SSD_REDWOOD_V1; }

+	bool canPipelineCommits() const override { return true; }
+
 	StorageBytes getStorageBytes() const override { return m_tree->getStorageBytes(); }

 	Future<Void> getError() { return delayed(m_error.getFuture()); };
--- a/fdbserver/fdbserver.actor.cpp
+++ b/fdbserver/fdbserver.actor.cpp
@ -1900,11 +1900,13 @@ int main(int argc, char* argv[]) {
 				g_network->run();
 			}
 		} else if (role == MultiTester) {
+			setupRunLoopProfiler();
 			f = stopAfter(runTests(opts.connectionFile, TEST_TYPE_FROM_FILE,
 			                       opts.testOnServers ? TEST_ON_SERVERS : TEST_ON_TESTERS, opts.minTesterCount,
 			                       opts.testFile, StringRef(), opts.localities));
 			g_network->run();
 		} else if (role == Test) {
+			setupRunLoopProfiler();
 			auto m = startSystemMonitor(opts.dataFolder, opts.zoneId, opts.zoneId);
 			f = stopAfter(runTests(opts.connectionFile, TEST_TYPE_FROM_FILE, TEST_HERE, 1, opts.testFile, StringRef(),
 			                       opts.localities));
--- a/fdbserver/storageserver.actor.cpp
+++ b/fdbserver/storageserver.actor.cpp
@ -151,10 +151,11 @@ struct ShardInfo : ReferenceCounted<ShardInfo>, NonCopyable {
 };

 struct StorageServerDisk {
-	explicit StorageServerDisk( struct StorageServer* data, IKeyValueStore* storage ) : data(data), storage(storage) {}
+	explicit StorageServerDisk( struct StorageServer* data, IKeyValueStore* storage ) : data(data), storage(storage), _canPipelineCommits(storage->canPipelineCommits()) {}

 	void makeNewStorageServerDurable();
-	bool makeVersionMutationsDurable( Version& prevStorageVersion, Version newStorageVersion, int64_t& bytesLeft );
+	// Asyncronously move data from mutation log into SE's commit buffer for next commit.
+	Future<bool> asyncPrepareVersionsForCommit(Version desiredOldestVersion, Future<Void> durable, Future<Void>durableMinDelay);
 	void makeVersionDurable( Version version );
 	Future<bool> restoreDurableState();

@ -177,12 +178,15 @@ struct StorageServerDisk {
 	KeyValueStoreType getKeyValueStoreType() const { return storage->getType(); }
 	StorageBytes getStorageBytes() const { return storage->getStorageBytes(); }
 	std::tuple<size_t, size_t, size_t> getSize() const { return storage->getSize(); }
+	
+	bool canPipelineCommits() const {return _canPipelineCommits;}
+	void set(KeyValueRef kv) { storage->set(kv);}
+	void clear(KeyRangeRef kr) { storage->clear(kr);}

 private:
 	struct StorageServer* data;
 	IKeyValueStore* storage;
-
-	void writeMutations(const VectorRef<MutationRef>& mutations, Version debugVersion, const char* debugContext);
+	bool _canPipelineCommits;

 	ACTOR static Future<Key> readFirstKey( IKeyValueStore* storage, KeyRangeRef range ) {
 		Standalone<RangeResultRef> r = wait( storage->readRange( range, 1 ) );
@ -1020,6 +1024,11 @@ ACTOR Future<Void> getValueQ( StorageServer* data, GetValueRequest req ) {
 	return Void();
 };

+// Pessimistic estimate the number of overhead bytes used by each
+// watch. Watch key references are stored in an AsyncMap<Key,bool>, and actors
+// must be kept alive until the watch is finished.
+static constexpr size_t WATCH_OVERHEAD_BYTES = 1000;
+
 ACTOR Future<Void> watchValue_impl( StorageServer* data, WatchValueRequest req, SpanID parent ) {
 	state Location spanLocation = "SS:WatchValueImpl"_loc;
 	state Span span(spanLocation, { parent });
@ -1070,7 +1079,7 @@ ACTOR Future<Void> watchValue_impl( StorageServer* data, WatchValueRequest req,
 				}

 				++data->numWatches;
-				data->watchBytes += ( req.key.expectedSize() + req.value.expectedSize() + 1000 );
+				data->watchBytes += (req.key.expectedSize() + req.value.expectedSize() + WATCH_OVERHEAD_BYTES);
 				try {
 					if(latest < minVersion) {
 						// If the version we read is less than minVersion, then we may fail to be notified of any changes that occur up to or including minVersion
@ -1083,10 +1092,10 @@ ACTOR Future<Void> watchValue_impl( StorageServer* data, WatchValueRequest req,
 					}
 					wait(watchFuture);
 					--data->numWatches;
-					data->watchBytes -= ( req.key.expectedSize() + req.value.expectedSize() + 1000 );
+					data->watchBytes -= (req.key.expectedSize() + req.value.expectedSize() + WATCH_OVERHEAD_BYTES);
 				} catch( Error &e ) {
 					--data->numWatches;
-					data->watchBytes -= ( req.key.expectedSize() + req.value.expectedSize() + 1000 );
+					data->watchBytes -= (req.key.expectedSize() + req.value.expectedSize() + WATCH_OVERHEAD_BYTES);
 					throw;
 				}
 			} catch( Error &e ) {
@ -3071,75 +3080,67 @@ ACTOR Future<Void> updateStorage(StorageServer* data) {
 		wait( delay(0, TaskPriority::UpdateStorage) );

 		state Promise<Void> durableInProgress;
-		data->durableInProgress = durableInProgress.getFuture();

-		state Version startOldestVersion = data->storageVersion();
-		state Version newOldestVersion = data->storageVersion();
-		state Version desiredVersion = data->desiredOldestVersion.get();
-		state int64_t bytesLeft = SERVER_KNOBS->STORAGE_COMMIT_BYTES;
+		state Version desiredOldestVersion = data->desiredOldestVersion.get();

-		// Write mutations to storage until we reach the desiredVersion or have written too much (bytesleft)
+		state Future<Void> durableMinDelay = Void();
+		state Future<Void> durable = Void();
+
+		state int64_t ssCommitQuotaBytes;
+		state Version pendingCommitVersion;
+		state int64_t bytesWritten = 0;
+		state bool finalCommit = false;
+		state bool done = false;
 		loop {
-			state bool done = data->storage.makeVersionMutationsDurable(newOldestVersion, desiredVersion, bytesLeft);
-			// We want to forget things from these data structures atomically with changing oldestVersion (and "before", since oldestVersion.set() may trigger waiting actors)
-			// forgetVersionsBeforeAsync visibly forgets immediately (without waiting) but asynchronously frees memory.
-			Future<Void> finishedForgetting = data->mutableData().forgetVersionsBeforeAsync( newOldestVersion, TaskPriority::UpdateStorage );
-			data->oldestVersion.set( newOldestVersion );
-			wait( finishedForgetting );
-			wait( yield(TaskPriority::UpdateStorage) );
+			// Keep making data from mutation log durable, until no data left whose version is <= desiredOldestVersion
+			pendingCommitVersion = data->storageVersion();
+			ssCommitQuotaBytes = SERVER_KNOBS->STORAGE_COMMIT_BYTES;
+			durableInProgress.reset();
+			data->durableInProgress = durableInProgress.getFuture();
+			durable = data->storage.commit(); // Commit data up to(inclusive) version pendingCommitVersion
+			durableMinDelay = delay(SERVER_KNOBS->STORAGE_COMMIT_INTERVAL, TaskPriority::UpdateStorage);
+			if (finalCommit) {
+				wait(durable && durableMinDelay);
+				done = true;
+			} else {
+				// Move data start from pendingCommitVersion+1 to SE's commit buffer.
+				bool _finalCommit = wait(data->storage.asyncPrepareVersionsForCommit(desiredOldestVersion, durable, durableMinDelay));
+				finalCommit = _finalCommit;
+			}
+			debug_advanceMinCommittedVersion( data->thisServerID, pendingCommitVersion );
+
+			if(pendingCommitVersion > data->rebootAfterDurableVersion) {
+				TraceEvent("RebootWhenDurableTriggered", data->thisServerID).detail("PendingCommitVersion", pendingCommitVersion).detail("RebootAfterDurableVersion", data->rebootAfterDurableVersion);
+				// To avoid brokenPromise error, which is caused by the sender of the durableInProgress (i.e., this process)
+				// never sets durableInProgress, we should set durableInProgress before send the please_reboot() error.
+				// Otherwise, in the race situation when storage server receives both reboot and
+				// brokenPromise of durableInProgress, the worker of the storage server will die.
+				// We will eventually end up with no worker for storage server role.
+				// The data distributor's buildTeam() will get stuck in building a team
+				durableInProgress.sendError(please_reboot());
+				throw please_reboot();
+			}
+
+			durableInProgress.send(Void());
+			wait( delay(0, TaskPriority::UpdateStorage) ); //Setting durableInProgess could cause the storage server to shut down, so delay to check for cancellation
+
+			// Taking and releasing the durableVersionLock ensures that no eager reads both begin before the commit was effective and
+			// are applied after we change the durable version. Also ensure that we have to lock while calling changeDurableVersion,
+			// because otherwise the latest version of mutableData might be partially loaded.
+			wait( data->durableVersionLock.take() );
+			data->popVersion( data->durableVersion.get() + 1 );
+
+			// Update durableVersion to pendingCommitVersion, which has been committed.
+			while (!changeDurableVersion( data, pendingCommitVersion )) {
+				if(g_network->check_yield(TaskPriority::UpdateStorage)) {
+					data->durableVersionLock.release();
+					wait(delay(0, TaskPriority::UpdateStorage));
+					wait( data->durableVersionLock.take() );
+				}
+			}
+			data->durableVersionLock.release();
 			if (done) break;
 		}
-
-		// Set the new durable version as part of the outstanding change set, before commit
-		if (startOldestVersion != newOldestVersion)
-			data->storage.makeVersionDurable( newOldestVersion );
-
-		debug_advanceMaxCommittedVersion( data->thisServerID, newOldestVersion );
-		state Future<Void> durable = data->storage.commit();
-		state Future<Void> durableDelay = Void();
-
-		if (bytesLeft > 0) {
-			durableDelay = delay(SERVER_KNOBS->STORAGE_COMMIT_INTERVAL, TaskPriority::UpdateStorage);
-		}
-
-		wait( durable );
-
-		debug_advanceMinCommittedVersion( data->thisServerID, newOldestVersion );
-
-		if(newOldestVersion > data->rebootAfterDurableVersion) {
-			TraceEvent("RebootWhenDurableTriggered", data->thisServerID).detail("NewOldestVersion", newOldestVersion).detail("RebootAfterDurableVersion", data->rebootAfterDurableVersion);
-			// To avoid brokenPromise error, which is caused by the sender of the durableInProgress (i.e., this process)
-			// never sets durableInProgress, we should set durableInProgress before send the please_reboot() error.
-			// Otherwise, in the race situation when storage server receives both reboot and
-			// brokenPromise of durableInProgress, the worker of the storage server will die.
-			// We will eventually end up with no worker for storage server role.
-			// The data distributor's buildTeam() will get stuck in building a team
-			durableInProgress.sendError(please_reboot());
-			throw please_reboot();
-		}
-
-		durableInProgress.send(Void());
-		wait( delay(0, TaskPriority::UpdateStorage) ); //Setting durableInProgess could cause the storage server to shut down, so delay to check for cancellation
-
-		// Taking and releasing the durableVersionLock ensures that no eager reads both begin before the commit was effective and
-		// are applied after we change the durable version. Also ensure that we have to lock while calling changeDurableVersion,
-		// because otherwise the latest version of mutableData might be partially loaded.
-		wait( data->durableVersionLock.take() );
-		data->popVersion( data->durableVersion.get() + 1 );
-
-		while (!changeDurableVersion( data, newOldestVersion )) {
-			if(g_network->check_yield(TaskPriority::UpdateStorage)) {
-				data->durableVersionLock.release();
-				wait(delay(0, TaskPriority::UpdateStorage));
-				wait( data->durableVersionLock.take() );
-			}
-		}
-
-		data->durableVersionLock.release();
-
-		//TraceEvent("StorageServerDurable", data->thisServerID).detail("Version", newOldestVersion);
-
-		wait( durableDelay );
 	}
 }

@ -3212,36 +3213,97 @@ void StorageServerDisk::writeMutation( MutationRef mutation ) {
 		ASSERT(false);
 }

-void StorageServerDisk::writeMutations(const VectorRef<MutationRef>& mutations, Version debugVersion,
-                                       const char* debugContext) {
-	for (const auto& m : mutations) {
+ACTOR Future<int64_t> asyncWriteMutationsToCommitBuffer(StorageServer* data, VectorRef<MutationRef> mutations, Version debugVersion, const char* debugContext, int64_t ssCommitQuotaBytes) {
+	state int bytesWritten = 0;
+	state int i = 0;
+	for (;i < mutations.size(); i++) {
+		const auto& m = mutations[i];
 		DEBUG_MUTATION(debugContext, debugVersion, m).detail("UID", data->thisServerID);
 		if (m.type == MutationRef::SetValue) {
-			storage->set(KeyValueRef(m.param1, m.param2));
+			data->storage.set(KeyValueRef(m.param1, m.param2));
 		} else if (m.type == MutationRef::ClearRange) {
-			storage->clear(KeyRangeRef(m.param1, m.param2));
+			data->storage.clear(KeyRangeRef(m.param1, m.param2));
+		}
+		auto mutationBytes = mvccStorageBytes(m);
+		bytesWritten += mutationBytes;
+		ssCommitQuotaBytes -= mutationBytes;
+		if (data->storage.canPipelineCommits() && bytesWritten >= SERVER_KNOBS->STORAGE_COMMIT_PIPELINE_BYTES_PER_YIELD) {
+			bytesWritten = 0;
+			wait(yield());
 		}
 	}
+	return ssCommitQuotaBytes;
 }

-bool StorageServerDisk::makeVersionMutationsDurable( Version& prevStorageVersion, Version newStorageVersion, int64_t& bytesLeft ) {
-	if (bytesLeft <= 0) return true;
+ACTOR Future<bool> asyncPrepareVersionsForCommit_impl(StorageServerDisk* self, StorageServer* data, Version desiredOldestVersion, Future<Void> durable, Future<Void>durableMinDelay) {
+	state int64_t ssCommitQuotaBytes = SERVER_KNOBS->STORAGE_COMMIT_BYTES;
+	state bool finalCommit = false;
+	state Version startOldestVersion = data->storageVersion();
+	state Version newOldestVersion = data->storageVersion();
+	state SignalableActorCollection forgetter;
+	loop {
+		// While committing previously written data, keep writting new data from later versions until
+		//    1.) commit is done, or
+		//    2.) ssCommitQuotaBytes <= 0, or
+		//    3.) no data in mutation log to write.
+		if (!data->storage.canPipelineCommits()) {
+			// Don't write version data while a commit is going on if the storage engine does not support pipelining
+			wait(durable && durableMinDelay);
+		}
+		state Future<Void> stopEarly = data->storage.canPipelineCommits() ? (durable && durableMinDelay) : Never();
+		// Apply mutations from the mutationLog
+		auto u = data->getMutationLog().upper_bound(newOldestVersion);
+		if (u != data->getMutationLog().end() && u->first <= desiredOldestVersion) {
+			VerUpdateRef const& v = u->second;
+			newOldestVersion = v.version;
+			ASSERT( newOldestVersion > data->storageVersion() && newOldestVersion <= desiredOldestVersion );
+			// TODO(alexmiller): Update to version tracking.
+			DEBUG_KEY_RANGE("asyncPrepareVersionsForCommit", newOldestVersion, KeyRangeRef());
+			int64_t _ssCommitQuotaBytes = wait(asyncWriteMutationsToCommitBuffer(data, v.mutations, newOldestVersion, "asyncPrepareVersionsForCommit", ssCommitQuotaBytes));
+			ssCommitQuotaBytes = _ssCommitQuotaBytes;

-	// Apply mutations from the mutationLog
-	auto u = data->getMutationLog().upper_bound(prevStorageVersion);
-	if (u != data->getMutationLog().end() && u->first <= newStorageVersion) {
-		VerUpdateRef const& v = u->second;
-		ASSERT( v.version > prevStorageVersion && v.version <= newStorageVersion );
-		// TODO(alexmiller): Update to version tracking.
-		DEBUG_KEY_RANGE("makeVersionMutationsDurable", v.version, KeyRangeRef());
-		writeMutations(v.mutations, v.version, "makeVersionDurable");
-		for (const auto& m : v.mutations) bytesLeft -= mvccStorageBytes(m);
-		prevStorageVersion = v.version;
-		return false;
-	} else {
-		prevStorageVersion = newStorageVersion;
-		return true;
+			// We want to forget things from these data structures atomically with changing oldestVersion (and "before", since oldestVersion.set() may trigger waiting actors)
+			// forgetVersionsBeforeAsync visibly forgets immediately (without waiting) but asynchronously frees memory.
+			forgetter.add(data->mutableData().forgetVersionsBeforeAsync( newOldestVersion, TaskPriority::UpdateStorage ));
+			data->oldestVersion.set( newOldestVersion );
+			if (ssCommitQuotaBytes <= 0) {
+				// No quota left. Wait for previous commit to finish.
+				wait(durable && durableMinDelay);
+				break;
+			}
+			if (stopEarly.isReady()) {
+				// Previous commit is done.
+				break;
+			}
+		} else {
+			// Since there is no data in mutation log, in order to make progress,
+			// advance it to desiredOldestVersion directly
+			newOldestVersion = desiredOldestVersion;
+			// We want to forget things from these data structures atomically with changing oldestVersion (and "before", since oldestVersion.set() may trigger waiting actors)
+			// forgetVersionsBeforeAsync visibly forgets immediately (without waiting) but asynchronously frees memory.
+			forgetter.add(data->mutableData().forgetVersionsBeforeAsync( newOldestVersion, TaskPriority::UpdateStorage ));
+			data->oldestVersion.set( newOldestVersion );
+
+			// No more data in mutation log can be written.
+			finalCommit = true;
+
+			// Wait the previously written data to be committed
+			wait(durable && durableMinDelay);
+
+			break;
+		}
 	}
+	if (newOldestVersion > startOldestVersion){
+		// Set the new durable version as part of the outstanding change set, before commit
+		data->storage.makeVersionDurable( newOldestVersion );
+	}
+	debug_advanceMaxCommittedVersion( data->thisServerID, newOldestVersion );
+	wait(forgetter.signal());
+	return finalCommit;
+}
+
+Future<bool> StorageServerDisk::asyncPrepareVersionsForCommit(Version desiredOldestVersion, Future<Void> durable, Future<Void>durableMinDelay) {
+	return asyncPrepareVersionsForCommit_impl(this, data, desiredOldestVersion, durable, durableMinDelay);
 }

 // Update data->storage to persist the changes from (data->storageVersion(),version]
@ -4175,4 +4237,3 @@ void versionedMapTest() {
 	printf("Memory used: %f MB\n",
 		 (after - before)/ 1e6);
 }
-
--- a/fdbserver/workloads/AtomicOps.actor.cpp
+++ b/fdbserver/workloads/AtomicOps.actor.cpp
@ -101,7 +101,7 @@ struct AtomicOpsWorkload : TestWorkload {
 		// case 10:
 		// 	TEST(true); // Testing atomic CompareAndClear Not supported yet
 		// 	opType = MutationRef::CompareAndClear
-		// 	break;
+		//  break;
 		default:
 			ASSERT(false);
 		}
--- a/fdbserver/workloads/BackupAndParallelRestoreCorrectness.actor.cpp
+++ b/fdbserver/workloads/BackupAndParallelRestoreCorrectness.actor.cpp
@ -476,6 +476,7 @@ struct BackupAndParallelRestoreCorrectnessWorkload : TestWorkload {
 				    .detail("LastBackupContainer", lastBackupContainer->getURL())
 				    .detail("RestoreAfter", self->restoreAfter)
 				    .detail("BackupTag", printable(self->backupTag));
+				// start restoring

 				auto container = IBackupContainer::openContainer(lastBackupContainer->getURL());
 				BackupDescription desc = wait(container->describeBackup());
--- a/fdbserver/workloads/ConfigureDatabase.actor.cpp
+++ b/fdbserver/workloads/ConfigureDatabase.actor.cpp
@ -243,7 +243,7 @@ struct ConfigureDatabaseWorkload : TestWorkload {
 		return StringRef(format("DestroyDB%d", dbIndex));
 	}

-	static Future<ConfigurationResult::Type> IssueConfigurationChange( Database cx, const std::string& config, bool force ) {
+	static Future<ConfigurationResult> IssueConfigurationChange(Database cx, const std::string& config, bool force) {
 		printf("Issuing configuration change: %s\n", config.c_str());
 		return changeConfig(cx, config, force);
 	}
--- a/fdbserver/workloads/RemoveServersSafely.actor.cpp
+++ b/fdbserver/workloads/RemoveServersSafely.actor.cpp
@ -549,7 +549,7 @@ struct RemoveServersSafelyWorkload : TestWorkload {
 			while (true) {
 				cycle ++;
 				nQuorum = ((g_simulator.desiredCoordinators+1)/2)*2-1;
-				CoordinatorsResult::Type result = wait( changeQuorum( cx, autoQuorumChange(nQuorum) ) );
+				CoordinatorsResult result = wait(changeQuorum(cx, autoQuorumChange(nQuorum)));
 				TraceEvent(result==CoordinatorsResult::SUCCESS || result==CoordinatorsResult::SAME_NETWORK_ADDRESSES ? SevInfo : SevWarn, "RemoveAndKillQuorumChangeResult").detail("Step", "coordinators auto").detail("Result", (int)result).detail("Attempt", cycle).detail("Quorum", nQuorum).detail("DesiredCoordinators", g_simulator.desiredCoordinators);
 				if (result==CoordinatorsResult::SUCCESS || result==CoordinatorsResult::SAME_NETWORK_ADDRESSES)
 					break;
--- a/fdbserver/workloads/SpecialKeySpaceCorrectness.actor.cpp
+++ b/fdbserver/workloads/SpecialKeySpaceCorrectness.actor.cpp
@ -104,6 +104,10 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
 		Future<Void> f;
 		{
 			ReadYourWritesTransaction ryw{ cx->clone() };
+			if(!ryw.getDatabase()->apiVersionAtLeast(630)) {
+				//This test is not valid for API versions smaller than 630
+				return;
+			}
 			f = success(ryw.get(LiteralStringRef("\xff\xff/status/json")));
 			TEST(!f.isReady());
 		}
--- a/fdbserver/workloads/TriggerRecovery.actor.cpp
+++ b/fdbserver/workloads/TriggerRecovery.actor.cpp
@ -71,7 +71,7 @@ struct TriggerRecoveryLoopWorkload : TestWorkload {
 		state StringRef configStr(format("resolvers=%d", numResolversToSet));
 		loop {
 			Optional<ConfigureAutoResult> conf;
-			ConfigurationResult::Type r = wait(changeConfig(cx, { configStr }, conf, true));
+			ConfigurationResult r = wait(changeConfig(cx, { configStr }, conf, true));
 			if (r == ConfigurationResult::SUCCESS) {
 				self->currentNumOfResolvers = numResolversToSet;
 				TraceEvent(SevInfo, "TriggerRecoveryLoop_ChangeResolverConfigSuccess").detail("NumOfResolvers", self->currentNumOfResolvers.get());
--- a/flow/Error.h
+++ b/flow/Error.h
@ -85,11 +85,12 @@ Error systemErrorCodeToError();
 inline Error actor_cancelled() { return Error( error_code_operation_cancelled ); }
 enum { error_code_actor_cancelled = error_code_operation_cancelled };

-extern Error internal_error_impl( const char* file, int line );
+extern Error internal_error_impl(const char* file, int line);
 extern Error internal_error_impl(const char* msg, const char* file, int line);
 extern Error internal_error_impl(const char * a_nm, long long a, const char * op_nm, const char * b_nm, long long b, const char * file, int line);

-#define inernal_error_msg(msg) internal_error_impl(msg, __FILE__, __LINE__)
+#define internal_error() internal_error_impl(__FILE__, __LINE__)
+#define internal_error_msg(msg) internal_error_impl(msg, __FILE__, __LINE__)

 extern bool isAssertDisabled( int line );
 //#define ASSERT( condition ) ((void)0)
--- a/flow/ThreadHelper.actor.h
+++ b/flow/ThreadHelper.actor.h
@ -204,7 +204,16 @@ public:
 		}
 	}

-	ThreadSingleAssignmentVarBase() : status(Unset), callback(nullptr), valueReferenceCount(0) {} //, referenceCount(1) {}
+	void blockUntilReadyCheckOnMainThread() {
+		if (!isReady()) {
+			if (g_network->isOnMainThread()) {
+				throw blocked_from_network_thread();
+			}
+			BlockCallback cb(*this);
+		}
+	}
+
+	ThreadSingleAssignmentVarBase() : status(Unset), callback(NULL), valueReferenceCount(0) {} //, referenceCount(1) {}
 	~ThreadSingleAssignmentVarBase() {
 		this->mutex.assertNotEntered();

@ -310,12 +319,12 @@ public:
 	}

 	virtual void cancel() {
-		// Cancels the action and decrements the reference count by 1
-		// The if statement is just an optimization. It's ok if we take the wrong path due to a race
-		if(isReadyUnsafe())
-			delref();
-		else
-			onMainThreadVoid( [this](){ this->cancelFuture.cancel(); this->delref(); }, nullptr );
+		onMainThreadVoid(
+		    [this]() {
+			    this->cancelFuture.cancel();
+			    this->delref();
+		    },
+		    nullptr);
 	}

 	void releaseMemory() {
@ -329,6 +338,7 @@ private:
 	int32_t valueReferenceCount;

 protected:
+	// The caller of any of these *Unsafe functions should be holding |mutex|
 	bool isReadyUnsafe() const { return status >= Set; }
 	bool isErrorUnsafe() const { return status == ErrorSet; }
 	bool canBeSetUnsafe() const { return status == Unset; }
@ -426,6 +436,8 @@ public:
 		sav->blockUntilReady();
 	}

+	void blockUntilReadyCheckOnMainThread() { sav->blockUntilReadyCheckOnMainThread(); }
+
 	bool isValid() const {
 		return sav != 0;
 	}
--- a/flow/error_definitions.h
+++ b/flow/error_definitions.h
@ -145,6 +145,7 @@ ERROR( environment_variable_network_option_failed, 2022, "Environment variable n
 ERROR( transaction_read_only, 2023, "Attempted to commit a transaction specified as read-only" )
 ERROR( invalid_cache_eviction_policy, 2024, "Invalid cache eviction policy, only random and lru are supported" )
 ERROR( network_cannot_be_restarted, 2025, "Network can only be started once" )
+ERROR( blocked_from_network_thread, 2026, "Detected a deadlock in a callback called from the network thread" )

 ERROR( incompatible_protocol_version, 2100, "Incompatible protocol version" )
 ERROR( transaction_too_large, 2101, "Transaction exceeds byte limit" )
@ -204,6 +205,8 @@ ERROR( backup_cannot_expire, 2316, "Cannot expire requested data from backup wit
 ERROR( backup_auth_missing, 2317, "Cannot find authentication details (such as a password or secret key) for the specified Backup Container URL")
 ERROR( backup_auth_unreadable, 2318, "Cannot read or parse one or more sources of authentication information for Backup Container URLs")
 ERROR( backup_does_not_exist, 2319, "Backup does not exist")
+ERROR( backup_not_filterable_with_key_ranges, 2320, "Backup before 6.3 cannot be filtered with key ranges")
+ERROR( backup_not_overlapped_with_keys_filter, 2321, "Backup key ranges doesn't overlap with key ranges filter")
 ERROR( restore_invalid_version, 2361, "Invalid restore version")
 ERROR( restore_corrupted_data, 2362, "Corrupted backup data")
 ERROR( restore_missing_data, 2363, "Missing backup data")
--- a/packaging/msi/FDBInstaller.wxs
+++ b/packaging/msi/FDBInstaller.wxs
@ -32,7 +32,7 @@

 <Wix xmlns='http://schemas.microsoft.com/wix/2006/wi'>
  <Product Name='$(var.Title)'
-           Id='{707FC06F-9954-4A7E-AC9C-A52C99AE776D}'
+           Id='{0AB36B0F-2187-4ECD-9E7E-983EDD966CEB}'
           UpgradeCode='{A95EA002-686E-4164-8356-C715B7F8B1C8}'
           Version='$(var.Version)'
           Manufacturer='$(var.Manufacturer)'
--- a/tests/TestRunner/local_cluster.py
+++ b/tests/TestRunner/local_cluster.py
@ -0,0 +1,121 @@
+from pathlib import Path
+from argparse import ArgumentParser
+import random
+import string
+import subprocess
+import sys
+import socket
+
+
+def get_free_port():
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.bind(('0.0.0.0', 0))
+        return s.getsockname()[1]
+
+
+class LocalCluster:
+    configuration_template = """
+## foundationdb.conf
+##
+## Configuration file for FoundationDB server processes
+## Full documentation is available at
+## https://apple.github.io/foundationdb/configuration.html#the-configuration-file
+
+[fdbmonitor]
+
+[general]
+restart_delay = 10
+## by default, restart_backoff = restart_delay_reset_interval = restart_delay
+# initial_restart_delay = 0
+# restart_backoff = 60
+# restart_delay_reset_interval = 60
+cluster_file = {etcdir}/fdb.cluster
+# delete_envvars =
+# kill_on_configuration_change = true
+
+## Default parameters for individual fdbserver processes
+[fdbserver]
+command = {fdbserver_bin}
+public_address = auto:$ID
+listen_address = public
+datadir = {datadir}
+logdir = {logdir}
+# logsize = 10MiB
+# maxlogssize = 100MiB
+# machine_id =
+# datacenter_id =
+# class =
+# memory = 8GiB
+# storage_memory = 1GiB
+# cache_memory = 2GiB
+# metrics_cluster =
+# metrics_prefix =
+
+## An individual fdbserver process with id 4000
+## Parameters set here override defaults from the [fdbserver] section
+[fdbserver.{server_port}]
+    """
+
+    valid_letters_for_secret = string.ascii_letters + string.digits
+
+    def __init__(self, basedir: str, fdbserver_binary: str, fdbmonitor_binary: str,
+                 fdbcli_binary: str, create_config=True, port=None, ip_address=None):
+        self.basedir = Path(basedir)
+        self.fdbserver_binary = Path(fdbserver_binary)
+        self.fdbmonitor_binary = Path(fdbmonitor_binary)
+        self.fdbcli_binary = Path(fdbcli_binary)
+        for b in (self.fdbserver_binary, self.fdbmonitor_binary, self.fdbcli_binary):
+            assert b.exists(), "{} does not exist".format(b)
+        if not self.basedir.exists():
+            self.basedir.mkdir()
+        self.etc = self.basedir.joinpath('etc')
+        self.log = self.basedir.joinpath('log')
+        self.data = self.basedir.joinpath('data')
+        self.etc.mkdir(exist_ok=True)
+        self.log.mkdir(exist_ok=True)
+        self.data.mkdir(exist_ok=True)
+        self.port = get_free_port() if port is None else port
+        self.ip_address = '127.0.0.1' if ip_address is None else ip_address
+        self.running = False
+        self.process = None
+        self.fdbmonitor_logfile = None
+        if create_config:
+            with open(self.etc.joinpath('fdb.cluster'), 'x') as f:
+                random_string = lambda len : ''.join(random.choice(LocalCluster.valid_letters_for_secret) for i in range(len))
+                f.write('{desc}:{secret}@{ip_addr}:{server_port}'.format(
+                    desc=random_string(8),
+                    secret=random_string(8),
+                    ip_addr=self.ip_address,
+                    server_port=self.port
+                ))
+                with open(self.etc.joinpath('foundationdb.conf'), 'x') as f:
+                    f.write(LocalCluster.configuration_template.format(
+                        etcdir=self.etc,
+                        fdbserver_bin=self.fdbserver_binary,
+                        datadir=self.data,
+                        logdir=self.log,
+                        server_port=self.port
+                    ))
+
+    def __enter__(self):
+        assert not self.running, "Can't start a server that is already running"
+        args = [str(self.fdbmonitor_binary),
+                '--conffile',
+                str(self.etc.joinpath('foundationdb.conf')),
+                '--lockfile',
+                str(self.etc.joinpath('fdbmonitor.lock'))]
+        self.fdbmonitor_logfile = open(self.log.joinpath('fdbmonitor.log'), 'w')
+        self.process = subprocess.Popen(args, stdout=self.fdbmonitor_logfile, stderr=self.fdbmonitor_logfile)
+        self.running = True
+        return self
+
+    def __exit__(self, xc_type, exc_value, traceback):
+        assert self.running, "Server is not running"
+        if self.process.poll() is None:
+            self.process.terminate()
+        self.running = False
+
+    def create_database(self, storage='ssd'):
+        args = [self.fdbcli_binary, '-C', self.etc.joinpath('fdb.cluster'), '--exec',
+                'configure new single {}'.format(storage)]
+        subprocess.run(args)
--- a/tests/TestRunner/tmp_cluster.py
+++ b/tests/TestRunner/tmp_cluster.py
@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+
+import shutil
+import subprocess
+import sys
+import socket
+from local_cluster import LocalCluster
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
+from random import choice
+from pathlib import Path
+
+class TempCluster:
+    def __init__(self, build_dir: str):
+        self.build_dir = Path(build_dir).resolve()
+        assert self.build_dir.exists(), "{} does not exist".format(build_dir)
+        assert self.build_dir.is_dir(), "{} is not a directory".format(build_dir)
+        tmp_dir = self.build_dir.joinpath(
+            'tmp',
+            ''.join(choice(LocalCluster.valid_letters_for_secret) for i in range(16)))
+        tmp_dir.mkdir(parents=True)
+        self.cluster = LocalCluster(tmp_dir,
+                                    self.build_dir.joinpath('bin', 'fdbserver'),
+                                    self.build_dir.joinpath('bin', 'fdbmonitor'),
+                                    self.build_dir.joinpath('bin', 'fdbcli'))
+        self.log = self.cluster.log
+        self.etc = self.cluster.etc
+        self.data = self.cluster.data
+        self.tmp_dir = tmp_dir
+
+    def __enter__(self):
+        self.cluster.__enter__()
+        self.cluster.create_database()
+        return self
+
+    def __exit__(self, xc_type, exc_value, traceback):
+        self.cluster.__exit__(xc_type, exc_value, traceback)
+        shutil.rmtree(self.tmp_dir)
+
+
+if __name__ == '__main__':
+    parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
+                            description="""
+    This script automatically configures a temporary local cluster on the machine
+    and then calls a command while this cluster is running. As soon as the command
+    returns, the configured cluster is killed and all generated data is deleted.
+    This is useful for testing: if a test needs access to a fresh fdb cluster, one
+    can simply pass the test command to this script.
+
+    The command to run after the cluster started. Before the command is executed,
+    the following arguments will be preprocessed:
+    - All occurrences of @CLUSTER_FILE@ will be replaced with the path to the generated cluster file.
+    - All occurrences of @DATA_DIR@ will be replaced with the path to the data directory.
+    - All occurrences of @LOG_DIR@ will be replaced with the path to the log directory.
+    - All occurrences of @ETC_DIR@ will be replaced with the path to the configuration directory.
+    """)
+    parser.add_argument('--build-dir', '-b', metavar='BUILD_DIRECTORY', help='FDB build directory', required=True)
+    parser.add_argument('cmd', metavar="COMMAND", nargs="+", help="The command to run")
+    args = parser.parse_args()
+    errcode = 1
+    with TempCluster(args.build_dir) as cluster:
+        print("log-dir: {}".format(cluster.log))
+        print("etc-dir: {}".format(cluster.etc))
+        print("data-dir: {}".format(cluster.data))
+        print("cluster-file: {}".format(cluster.etc.joinpath('fdb.cluster')))
+        cmd_args = []
+        for cmd in args.cmd:
+            if cmd == '@CLUSTER_FILE@':
+                cmd_args.append(str(cluster.etc.joinpath('fdb.cluster')))
+            elif cmd == '@DATA_DIR@':
+                cmd_args.append(str(cluster.data))
+            elif cmd == '@LOG_DIR@':
+                cmd_args.append(str(cluster.log))
+            elif cmd == '@ETC_DIR@':
+                cmd_args.append(str(cluster.etc))
+            else:
+                cmd_args.append(cmd)
+        errcode = subprocess.run(cmd_args, stdout=sys.stdout, stderr=sys.stderr).returncode
+    sys.exit(errcode)