Merge remote-tracking branch 'upstream/master' into peek-cursor-timeout-bug

2019-07-15 17:05:39 -07:00 · 2019-07-15 17:05:39 -07:00 · 4cc60dc9b8
parent 32af11286b 3586e0416e
commit 4cc60dc9b8
159 changed files with 3040 additions and 1674 deletions
--- a/bindings/bindingtester/bindingtester.py
+++ b/bindings/bindingtester/bindingtester.py
@ -68,6 +68,10 @@ class ResultSet(object):

        self.tester_results[name] = results

+    @staticmethod
+    def _min_tuple(t1, t2):
+        return t1 if fdb.tuple.compare(t1, t2) < 0 else t2
+
    def check_for_errors(self):
        if len(self.tester_results) == 1:
            return (0, False)
@ -97,7 +101,7 @@ class ResultSet(object):

            # If these results aren't using sequence numbers, then we match two results based on whether they share the same key
            else:
-                min_key = min([r.key(self.specification) for r in results.values()])
+                min_key = reduce(ResultSet._min_tuple, [r.key(self.specification) for r in results.values()])
                results = {i: r for i, r in results.items() if Result.tuples_match(r.key(self.specification), min_key)}

            # Increment the indices for those testers which produced a result in this iteration
--- a/bindings/flow/fdb_flow.actor.cpp
+++ b/bindings/flow/fdb_flow.actor.cpp
@ -85,7 +85,7 @@ void fdb_flow_test() {

 	openTraceFile(NetworkAddress(), 1000000, 1000000, ".");
 	systemMonitor();
-	uncancellable(recurring(&systemMonitor, 5.0, TaskFlushTrace));
+	uncancellable(recurring(&systemMonitor, 5.0, TaskPriority::FlushTrace));

 	Future<Void> t = _test();

@ -179,7 +179,7 @@ namespace FDB {
 	}

 	void backToFutureCallback( FDBFuture* f, void* data ) {
-		g_network->onMainThread( Promise<Void>((SAV<Void>*)data), TaskDefaultOnMainThread ); // SOMEDAY: think about this priority
+		g_network->onMainThread( Promise<Void>((SAV<Void>*)data), TaskPriority::DefaultOnMainThread ); // SOMEDAY: think about this priority
 	}

 	// backToFuture<Type>( FDBFuture*, (FDBFuture* -> Type) ) -> Future<Type>
--- a/bindings/flow/tester/Tester.actor.cpp
+++ b/bindings/flow/tester/Tester.actor.cpp
@ -1551,19 +1551,21 @@ struct UnitTestsFunc : InstructionFunc {
 		const uint64_t noRetryLimit = -1;
 		const uint64_t maxRetryDelay = 100;
 		const uint64_t sizeLimit = 100000;
+		const uint64_t maxFieldLength = 1000;

 		data->db->setDatabaseOption(FDBDatabaseOption::FDB_DB_OPTION_LOCATION_CACHE_SIZE, Optional<StringRef>(StringRef((const uint8_t*)&locationCacheSize, 8)));
 		data->db->setDatabaseOption(FDBDatabaseOption::FDB_DB_OPTION_MAX_WATCHES, Optional<StringRef>(StringRef((const uint8_t*)&maxWatches, 8)));
 		data->db->setDatabaseOption(FDBDatabaseOption::FDB_DB_OPTION_DATACENTER_ID, Optional<StringRef>(LiteralStringRef("dc_id")));
 		data->db->setDatabaseOption(FDBDatabaseOption::FDB_DB_OPTION_MACHINE_ID, Optional<StringRef>(LiteralStringRef("machine_id")));
+		data->db->setDatabaseOption(FDBDatabaseOption::FDB_DB_OPTION_SNAPSHOT_RYW_ENABLE);
+		data->db->setDatabaseOption(FDBDatabaseOption::FDB_DB_OPTION_SNAPSHOT_RYW_DISABLE);
+		data->db->setDatabaseOption(FDBDatabaseOption::FDB_DB_OPTION_TRANSACTION_LOGGING_MAX_FIELD_LENGTH, Optional<StringRef>(StringRef((const uint8_t*)&maxFieldLength, 8)));
 		data->db->setDatabaseOption(FDBDatabaseOption::FDB_DB_OPTION_TRANSACTION_TIMEOUT, Optional<StringRef>(StringRef((const uint8_t*)&timeout, 8)));
 		data->db->setDatabaseOption(FDBDatabaseOption::FDB_DB_OPTION_TRANSACTION_TIMEOUT, Optional<StringRef>(StringRef((const uint8_t*)&noTimeout, 8)));
 		data->db->setDatabaseOption(FDBDatabaseOption::FDB_DB_OPTION_TRANSACTION_MAX_RETRY_DELAY, Optional<StringRef>(StringRef((const uint8_t*)&maxRetryDelay, 8)));
 		data->db->setDatabaseOption(FDBDatabaseOption::FDB_DB_OPTION_TRANSACTION_SIZE_LIMIT, Optional<StringRef>(StringRef((const uint8_t*)&sizeLimit, 8)));
 		data->db->setDatabaseOption(FDBDatabaseOption::FDB_DB_OPTION_TRANSACTION_RETRY_LIMIT, Optional<StringRef>(StringRef((const uint8_t*)&retryLimit, 8)));
 		data->db->setDatabaseOption(FDBDatabaseOption::FDB_DB_OPTION_TRANSACTION_RETRY_LIMIT, Optional<StringRef>(StringRef((const uint8_t*)&noRetryLimit, 8)));
-		data->db->setDatabaseOption(FDBDatabaseOption::FDB_DB_OPTION_SNAPSHOT_RYW_ENABLE);
-		data->db->setDatabaseOption(FDBDatabaseOption::FDB_DB_OPTION_SNAPSHOT_RYW_DISABLE);

 		state Reference<Transaction> tr = data->db->createTransaction();
 		tr->setOption(FDBTransactionOption::FDB_TR_OPTION_PRIORITY_SYSTEM_IMMEDIATE);
@ -1574,6 +1576,7 @@ struct UnitTestsFunc : InstructionFunc {
 		tr->setOption(FDBTransactionOption::FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE);
 		tr->setOption(FDBTransactionOption::FDB_TR_OPTION_READ_SYSTEM_KEYS);
 		tr->setOption(FDBTransactionOption::FDB_TR_OPTION_ACCESS_SYSTEM_KEYS);
+		tr->setOption(FDBTransactionOption::FDB_TR_OPTION_TRANSACTION_LOGGING_MAX_FIELD_LENGTH, Optional<StringRef>(StringRef((const uint8_t*)&maxFieldLength, 8)));
 		tr->setOption(FDBTransactionOption::FDB_TR_OPTION_TIMEOUT, Optional<StringRef>(StringRef((const uint8_t*)&timeout, 8)));
 		tr->setOption(FDBTransactionOption::FDB_TR_OPTION_RETRY_LIMIT, Optional<StringRef>(StringRef((const uint8_t*)&retryLimit, 8)));
 		tr->setOption(FDBTransactionOption::FDB_TR_OPTION_MAX_RETRY_DELAY, Optional<StringRef>(StringRef((const uint8_t*)&maxRetryDelay, 8)));
--- a/bindings/go/src/_stacktester/stacktester.go
+++ b/bindings/go/src/_stacktester/stacktester.go
@ -793,13 +793,14 @@ func (sm *StackMachine) processInst(idx int, inst tuple.Tuple) {
 		db.Options().SetMaxWatches(10001)
 		db.Options().SetDatacenterId("dc_id")
 		db.Options().SetMachineId("machine_id")
+		db.Options().SetSnapshotRywEnable()
+		db.Options().SetSnapshotRywDisable()
+		db.Options().SetTransactionLoggingMaxFieldLength(1000)
 		db.Options().SetTransactionTimeout(100000)
 		db.Options().SetTransactionTimeout(0)
 		db.Options().SetTransactionMaxRetryDelay(100)
 		db.Options().SetTransactionRetryLimit(10)
 		db.Options().SetTransactionRetryLimit(-1)
-		db.Options().SetSnapshotRywEnable()
-		db.Options().SetSnapshotRywDisable()

 		if !fdb.IsAPIVersionSelected() {
 			log.Fatal("API version should be selected")
@ -836,6 +837,7 @@ func (sm *StackMachine) processInst(idx int, inst tuple.Tuple) {
 			tr.Options().SetReadYourWritesDisable()
 			tr.Options().SetReadSystemKeys()
 			tr.Options().SetAccessSystemKeys()
+			tr.Options().SetTransactionLoggingMaxFieldLength(1000)
 			tr.Options().SetTimeout(60 * 1000)
 			tr.Options().SetRetryLimit(50)
 			tr.Options().SetMaxRetryDelay(100)
--- a/bindings/go/src/fdb/generated.go
+++ b/bindings/go/src/fdb/generated.go
@ -228,6 +228,30 @@ func (o NetworkOptions) SetEnableSlowTaskProfiling() error {
 	return o.setOpt(71, nil)
 }

+// Enable client buggify - will make requests randomly fail (intended for client testing)
+func (o NetworkOptions) SetClientBuggifyEnable() error {
+	return o.setOpt(80, nil)
+}
+
+// Disable client buggify
+func (o NetworkOptions) SetClientBuggifyDisable() error {
+	return o.setOpt(81, nil)
+}
+
+// Set the probability of a CLIENT_BUGGIFY section being active for the current execution.
+//
+// Parameter: probability expressed as a percentage between 0 and 100
+func (o NetworkOptions) SetClientBuggifySectionActivatedProbability(param int64) error {
+	return o.setOpt(82, int64ToBytes(param))
+}
+
+// Set the probability of an active CLIENT_BUGGIFY section being fired. A section will only fire if it was activated
+//
+// Parameter: probability expressed as a percentage between 0 and 100
+func (o NetworkOptions) SetClientBuggifySectionFiredProbability(param int64) error {
+	return o.setOpt(83, int64ToBytes(param))
+}
+
 // Set the size of the client location cache. Raising this value can boost performance in very large databases where clients access data in a near-random pattern. Defaults to 100000.
 //
 // Parameter: Max location cache entries
@ -256,6 +280,23 @@ func (o DatabaseOptions) SetDatacenterId(param string) error {
 	return o.setOpt(22, []byte(param))
 }

+// Snapshot read operations will see the results of writes done in the same transaction. This is the default behavior.
+func (o DatabaseOptions) SetSnapshotRywEnable() error {
+	return o.setOpt(26, nil)
+}
+
+// Snapshot read operations will not see the results of writes done in the same transaction. This was the default behavior prior to API version 300.
+func (o DatabaseOptions) SetSnapshotRywDisable() error {
+	return o.setOpt(27, nil)
+}
+
+// Sets the maximum escaped length of key and value fields to be logged to the trace file via the LOG_TRANSACTION option. This sets the ``transaction_logging_max_field_length`` option of each transaction created by this database. See the transaction option description for more information.
+//
+// Parameter: Maximum length of escaped key and value fields.
+func (o DatabaseOptions) SetTransactionLoggingMaxFieldLength(param int64) error {
+	return o.setOpt(405, int64ToBytes(param))
+}
+
 // Set a timeout in milliseconds which, when elapsed, will cause each transaction automatically to be cancelled. This sets the ``timeout`` option of each transaction created by this database. See the transaction option description for more information. Using this option requires that the API version is 610 or higher.
 //
 // Parameter: value in milliseconds of timeout
@ -277,23 +318,13 @@ func (o DatabaseOptions) SetTransactionMaxRetryDelay(param int64) error {
 	return o.setOpt(502, int64ToBytes(param))
 }

-// Set the maximum transaction size which, if exceeded, will cause the transaction to be cancelled. Default to 10,000,000 bytes.
+// Set the maximum transaction size in bytes. This sets the ``size_limit`` option on each transaction created by this database. See the transaction option description for more information.
 //
 // Parameter: value in bytes
 func (o DatabaseOptions) SetTransactionSizeLimit(param int64) error {
 	return o.setOpt(503, int64ToBytes(param))
 }

-// Snapshot read operations will see the results of writes done in the same transaction. This is the default behavior.
-func (o DatabaseOptions) SetSnapshotRywEnable() error {
-	return o.setOpt(26, nil)
-}
-
-// Snapshot read operations will not see the results of writes done in the same transaction. This was the default behavior prior to API version 300.
-func (o DatabaseOptions) SetSnapshotRywDisable() error {
-	return o.setOpt(27, nil)
-}
-
 // The transaction, if not self-conflicting, may be committed a second time after commit succeeds, in the event of a fault
 func (o TransactionOptions) SetCausalWriteRisky() error {
 	return o.setOpt(10, nil)
@ -388,6 +419,13 @@ func (o TransactionOptions) SetLogTransaction() error {
 	return o.setOpt(404, nil)
 }

+// Sets the maximum escaped length of key and value fields to be logged to the trace file via the LOG_TRANSACTION option, after which the field will be truncated. A negative value disables truncation.
+//
+// Parameter: Maximum length of escaped key and value fields.
+func (o TransactionOptions) SetTransactionLoggingMaxFieldLength(param int64) error {
+	return o.setOpt(405, int64ToBytes(param))
+}
+
 // Set a timeout in milliseconds which, when elapsed, will cause the transaction automatically to be cancelled. Valid parameter values are ``[0, INT_MAX]``. If set to 0, will disable all timeouts. All pending and any future uses of the transaction will throw an exception. The transaction can be used again after it is reset. Prior to API version 610, like all other transaction options, the timeout must be reset after a call to ``onError``. If the API version is 610 or greater, the timeout is not reset after an ``onError`` call. This allows the user to specify a longer timeout on specific transactions than the default timeout specified through the ``transaction_timeout`` database option without the shorter database timeout cancelling transactions that encounter a retryable error. Note that at all API versions, it is safe and legal to set the timeout each time the transaction begins, so most code written assuming the older behavior can be upgraded to the newer behavior without requiring any modification, and the caller is not required to implement special logic in retry loops to only conditionally set this option.
 //
 // Parameter: value in milliseconds of timeout
@ -409,7 +447,7 @@ func (o TransactionOptions) SetMaxRetryDelay(param int64) error {
 	return o.setOpt(502, int64ToBytes(param))
 }

-// Set the maximum transaction size which, if exceeded, will cause the transaction to be cancelled. Valid parameter values are ``[32, 10,000,000]```.
+// Set the transaction size limit in bytes. The size is calculated by combining the sizes of all keys and values written or mutated, all key ranges cleared, and all read and write conflict ranges. (In other words, it includes the total size of all data included in the request to the cluster to commit the transaction.) Large transactions can cause performance problems on FoundationDB clusters, so setting this limit to a smaller value than the default can help prevent the client from accidentally degrading the cluster's performance. This value must be at least 32 and cannot be set to higher than 10,000,000, the default transaction size limit.
 //
 // Parameter: value in bytes
 func (o TransactionOptions) SetSizeLimit(param int64) error {
--- a/bindings/java/src/test/com/apple/foundationdb/test/AsyncStackTester.java
+++ b/bindings/java/src/test/com/apple/foundationdb/test/AsyncStackTester.java
@ -481,13 +481,14 @@ public class AsyncStackTester {
 				db.options().setMaxWatches(10001);
 				db.options().setDatacenterId("dc_id");
 				db.options().setMachineId("machine_id");
+				db.options().setSnapshotRywEnable();
+				db.options().setSnapshotRywDisable();
+				db.options().setTransactionLoggingMaxFieldLength(1000);
 				db.options().setTransactionTimeout(100000);
 				db.options().setTransactionTimeout(0);
 				db.options().setTransactionMaxRetryDelay(100);
 				db.options().setTransactionRetryLimit(10);
 				db.options().setTransactionRetryLimit(-1);
-				db.options().setSnapshotRywEnable();
-				db.options().setSnapshotRywDisable();

 				tr.options().setPrioritySystemImmediate();
 				tr.options().setPriorityBatch();
@ -496,6 +497,7 @@ public class AsyncStackTester {
 				tr.options().setReadYourWritesDisable();
 				tr.options().setReadSystemKeys();
 				tr.options().setAccessSystemKeys();
+				tr.options().setTransactionLoggingMaxFieldLength(1000);
 				tr.options().setTimeout(60*1000);
 				tr.options().setRetryLimit(50);
 				tr.options().setMaxRetryDelay(100);
--- a/bindings/java/src/test/com/apple/foundationdb/test/StackTester.java
+++ b/bindings/java/src/test/com/apple/foundationdb/test/StackTester.java
@ -434,13 +434,14 @@ public class StackTester {
 						db.options().setMaxWatches(10001);
 						db.options().setDatacenterId("dc_id");
 						db.options().setMachineId("machine_id");
+						db.options().setSnapshotRywEnable();
+						db.options().setSnapshotRywDisable();
+						db.options().setTransactionLoggingMaxFieldLength(1000);
 						db.options().setTransactionTimeout(100000);
 						db.options().setTransactionTimeout(0);
 						db.options().setTransactionMaxRetryDelay(100);
 						db.options().setTransactionRetryLimit(10);
 						db.options().setTransactionRetryLimit(-1);
-						db.options().setSnapshotRywEnable();
-						db.options().setSnapshotRywDisable();

 						tr.options().setPrioritySystemImmediate();
 						tr.options().setPriorityBatch();
@ -449,6 +450,7 @@ public class StackTester {
 						tr.options().setReadYourWritesDisable();
 						tr.options().setReadSystemKeys();
 						tr.options().setAccessSystemKeys();
+						tr.options().setTransactionLoggingMaxFieldLength(1000);
 						tr.options().setTimeout(60*1000);
 						tr.options().setRetryLimit(50);
 						tr.options().setMaxRetryDelay(100);
--- a/bindings/python/tests/size_limit.py
+++ b/bindings/python/tests/size_limit.py
@ -1,64 +0,0 @@
-#!/usr/bin/python
-#
-# size_limit.py
-#
-# This source file is part of the FoundationDB open source project
-#
-# Copyright 2013-2019 Apple Inc. and the FoundationDB project authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-import fdb
-import sys
-
-fdb.api_version(610)
-
-@fdb.transactional
-def setValue(tr, key, value):
-	tr[key] = value
-
-@fdb.transactional
-def setValueWithLimit(tr, key, value, limit):
-	tr.options.set_size_limit(limit)
-	tr[key] = value
-
-def run(clusterFile):
-	db = fdb.open(clusterFile)
-	db.options.set_transaction_timeout(2000)  # 2 seconds
-	db.options.set_transaction_retry_limit(3)
-	value = 'a' * 1024
-
-	setValue(db, 't1', value)
-	assert(value == db['t1'])
-
-	try:
-		db.options.set_transaction_size_limit(1000)
-		setValue(db, 't2', value)
-		assert(False)  # not reached
-	except fdb.impl.FDBError as e:
-		assert(e.code == 2101)  # Transaction exceeds byte limit (2101)
-
-	# Per transaction option overrides database option
-	db.options.set_transaction_size_limit(1000000)
-	try:
-		setValueWithLimit(db, 't3', value, 1000)
-		assert(False)  # not reached
-	except fdb.impl.FDBError as e:
-		assert(e.code == 2101)  # Transaction exceeds byte limit (2101)
-
-
-# Expect a cluster file as input. This test will write to the FDB cluster, so
-# be aware of potential side effects.
-if __name__ == '__main__':
-	clusterFile = sys.argv[1]
-	run(clusterFile)
--- a/bindings/python/tests/size_limit_tests.py
+++ b/bindings/python/tests/size_limit_tests.py
@ -0,0 +1,76 @@
+#!/usr/bin/python
+#
+# size_limit_tests.py
+#
+# This source file is part of the FoundationDB open source project
+#
+# Copyright 2013-2019 Apple Inc. and the FoundationDB project authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import fdb
+import sys
+
+if __name__ == '__main__':
+    fdb.api_version(610)
+
+@fdb.transactional
+def setValue(tr, key, value):
+    tr[key] = value
+
+@fdb.transactional
+def setValueWithLimit(tr, key, value, limit):
+    tr.options.set_size_limit(limit)
+    tr[key] = value
+
+def test_size_limit_option(db):
+    db.options.set_transaction_timeout(2000)  # 2 seconds
+    db.options.set_transaction_retry_limit(3)
+    value = b'a' * 1024
+
+    setValue(db, b't1', value)
+    assert(value == db[b't1'])
+
+    try:
+        db.options.set_transaction_size_limit(1000)
+        setValue(db, b't2', value)
+        assert(False)  # not reached
+    except fdb.FDBError as e:
+        assert(e.code == 2101)  # Transaction exceeds byte limit (2101)
+
+    # Per transaction option overrides database option
+    db.options.set_transaction_size_limit(1000000)
+    try:
+        setValueWithLimit(db, b't3', value, 1000)
+        assert(False)  # not reached
+    except fdb.FDBError as e:
+        assert(e.code == 2101)  # Transaction exceeds byte limit (2101)
+
+    # DB default survives on_error reset
+    db.options.set_transaction_size_limit(1000)
+    tr = db.create_transaction()
+    try:
+        tr[b't4'] = b'bar'
+        tr.on_error(fdb.FDBError(1007)).wait()
+        setValue(tr, b't4', value)
+        tr.commit().wait()
+        assert(False)  # not reached
+    except fdb.FDBError as e:
+        assert(e.code == 2101)  # Transaction exceeds byte limit (2101)
+
+# Expect a cluster file as input. This test will write to the FDB cluster, so
+# be aware of potential side effects.
+if __name__ == '__main__':
+    clusterFile = sys.argv[1]
+    db = fdb.open(clusterFile)
+    test_size_limit_option(db)
--- a/bindings/python/tests/tester.py
+++ b/bindings/python/tests/tester.py
@ -48,6 +48,8 @@ from cancellation_timeout_tests import test_retry_limits
 from cancellation_timeout_tests import test_db_retry_limits
 from cancellation_timeout_tests import test_combinations

+from size_limit_tests import test_size_limit_option
+
 random.seed(0)

 if len(sys.argv) == 4:
@ -126,9 +128,13 @@ class Instruction:


 def test_db_options(db):
+    db.options.set_location_cache_size(100001)
    db.options.set_max_watches(100001)
    db.options.set_datacenter_id("dc_id")
    db.options.set_machine_id("machine_id")
+    db.options.set_snapshot_ryw_enable()
+    db.options.set_snapshot_ryw_disable()
+    db.options.set_transaction_logging_max_field_length(1000)
    db.options.set_transaction_timeout(100000)
    db.options.set_transaction_timeout(0)
    db.options.set_transaction_timeout(0)
@ -136,8 +142,6 @@ def test_db_options(db):
    db.options.set_transaction_size_limit(100000)
    db.options.set_transaction_retry_limit(10)
    db.options.set_transaction_retry_limit(-1)
-    db.options.set_snapshot_ryw_enable()
-    db.options.set_snapshot_ryw_disable()


@fdb.transactional
@ -149,6 +153,7 @@ def test_options(tr):
    tr.options.set_read_your_writes_disable()
    tr.options.set_read_system_keys()
    tr.options.set_access_system_keys()
+    tr.options.set_transaction_logging_max_field_length(1000)
    tr.options.set_timeout(60 * 1000)
    tr.options.set_retry_limit(50)
    tr.options.set_max_retry_delay(100)
@ -543,8 +548,6 @@ class Tester:
                    inst.push(b"WAITED_FOR_EMPTY")
                elif inst.op == six.u("UNIT_TESTS"):
                    try:
-                        db.options.set_location_cache_size(100001)
-
                        test_db_options(db)
                        test_options(db)
                        test_watches(db)
@ -557,6 +560,8 @@ class Tester:
                        test_locality(db)
                        test_predicates()

+                        test_size_limit_option(db)
+
                    except fdb.FDBError as e:
                        print("Unit tests failed: %s" % e.description)
                        traceback.print_exc()
--- a/bindings/ruby/tests/tester.rb
+++ b/bindings/ruby/tests/tester.rb
@ -456,14 +456,15 @@ class Tester
            @db.options.set_max_watches(10001)
            @db.options.set_datacenter_id("dc_id")
            @db.options.set_machine_id("machine_id")
+            @db.options.set_snapshot_ryw_enable()
+            @db.options.set_snapshot_ryw_disable()
+            @db.options.set_transaction_logging_max_field_length(1000)
            @db.options.set_transaction_timeout(100000)
            @db.options.set_transaction_timeout(0)
            @db.options.set_transaction_max_retry_delay(100)
            @db.options.set_transaction_size_limit(100000)
            @db.options.set_transaction_retry_limit(10)
            @db.options.set_transaction_retry_limit(-1)
-            @db.options.set_snapshot_ryw_enable()
-            @db.options.set_snapshot_ryw_disable()

            @db.transact do |tr|
              tr.options.set_priority_system_immediate
@ -473,6 +474,7 @@ class Tester
              tr.options.set_read_your_writes_disable
              tr.options.set_read_system_keys
              tr.options.set_access_system_keys
+              tr.options.set_transaction_logging_max_field_length(1000)
              tr.options.set_timeout(60*1000)
              tr.options.set_retry_limit(50)
              tr.options.set_max_retry_delay(100)
--- a/cmake/ConfigureCompiler.cmake
+++ b/cmake/ConfigureCompiler.cmake
@ -8,6 +8,12 @@ set(FDB_RELEASE OFF CACHE BOOL "This is a building of a final release")
 set(USE_LD "LD" CACHE STRING "The linker to use for building: can be LD (system default, default choice), GOLD, or LLD")
 set(USE_LIBCXX OFF CACHE BOOL "Use libc++")
 set(USE_CCACHE OFF CACHE BOOL "Use ccache for compilation if available")
+set(RELATIVE_DEBUG_PATHS OFF CACHE BOOL "Use relative file paths in debug info")
+
+set(rel_debug_paths OFF)
+if(RELATIVE_DEBUG_PATHS)
+  set(rel_debug_paths ON)
+endif()

 if(USE_GPERFTOOLS)
  find_package(Gperftools REQUIRED)
@ -103,6 +109,10 @@ else()
    set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=lld -Wl,--disable-new-dtags")
  endif()

+  if(rel_debug_paths)
+    add_compile_options("-fdebug-prefix-map=${CMAKE_SOURCE_DIR}=." "-fdebug-prefix-map=${CMAKE_BINARY_DIR}=.")
+  endif()
+
  # we always compile with debug symbols. CPack will strip them out
  # and create a debuginfo rpm
  add_compile_options(-ggdb -fno-omit-frame-pointer)
--- a/documentation/sphinx/source/api-c.rst
+++ b/documentation/sphinx/source/api-c.rst
@ -49,6 +49,8 @@
 .. |max-retry-delay-database-option| replace:: FIXME
 .. |transaction-size-limit-database-option| replace:: FIXME
 .. |timeout-database-option| replace:: FIXME
+.. |transaction-logging-max-field-length-database-option| replace:: FIXME
+.. |transaction-logging-max-field-length-transaction-option| replace:: FIXME

 .. include:: api-common.rst.inc

--- a/documentation/sphinx/source/api-common.rst.inc
+++ b/documentation/sphinx/source/api-common.rst.inc
@ -326,6 +326,10 @@

    If this option has been set more times with this database than the disable option, snapshot reads will *not* see the effects of prior writes in the same transaction. Disabling this option is equivalent to calling |snapshot-ryw-disable-transaction-option| on each transaction created by this database.

+.. |option-db-tr-transaction-logging-max-field-length-blurb| replace::
+
+    Sets the maximum escaped length of key and value fields to be logged to the trace file via the LOG_TRANSACTION option. This is equivalent to calling |transaction-logging-max-field-length-transaction-option| on each transaction created by this database.
+
 .. |transaction-options-blurb| replace::

    Transaction options alter the behavior of FoundationDB transactions. FoundationDB defaults to extremely safe transaction behavior, and we have worked hard to make the performance excellent with the default setting, so you should not often need to use transaction options.
@ -399,7 +403,7 @@

 ..  |option-set-size-limit-blurb| replace::

-    Set the maximum transaction size limit in bytes. The size is calculated by combining the sizes of all keys and values written or mutated, all key ranges cleared, and all read and write conflict ranges. (In other words, it includes the total size of all data included in the request to the cluster to commit the transaction.) Large transactions can cause performance problems on FoundationDB clusters, so setting this limit to a smaller value than the default can help prevent the client from accidentally degrading the cluster's performance. This value must be at least 32 and cannot be set to higher than 10,000,000, the default transaction size limit. The value set by this limit will persist across transaction resets.
+    Set the transaction size limit in bytes. The size is calculated by combining the sizes of all keys and values written or mutated, all key ranges cleared, and all read and write conflict ranges. (In other words, it includes the total size of all data included in the request to the cluster to commit the transaction.) Large transactions can cause performance problems on FoundationDB clusters, so setting this limit to a smaller value than the default can help prevent the client from accidentally degrading the cluster's performance. This value must be at least 32 and cannot be set to higher than 10,000,000, the default transaction size limit.

 ..  |option-set-timeout-blurb1| replace::

@ -411,7 +415,7 @@

 .. |option-set-timeout-blurb3| replace::

-    Prior to API version 610, like all other transaction options, a timeout must be reset after a call to |on-error-func|. Note that resetting this option resets only the timeout *duration*, not the starting point from which the time is measured. If the API version is 610 or newer, then the timeout is not reset. This allows the user to specify a timeout for specific transactions that is longer than the timeout specified by |timeout-database-option|. Note that at all API versions, it is safe and legal to call this option after each call to |on-error-func|, so most code written assuming the older behavior can be upgraded without requiring any modification. This also means that there is no need to introduce logic to conditionally set this option within retry loops.  One can set the default timeout for all transactions by calling |timeout-database-option|.
+    Prior to API version 610, like all other transaction options, a timeout must be reset after a call to |on-error-func|. Note that resetting this option resets only the timeout *duration*, not the starting point from which the time is measured. If the API version is 610 or newer, then the timeout is not reset. This allows the user to specify a timeout for specific transactions that is longer than the timeout specified by |timeout-database-option|. Note that at all API versions, it is safe and legal to call this option after each call to |on-error-func|, so most code written assuming the older behavior can be upgraded without requiring any modification. This also means that there is no need to introduce logic to conditionally set this option within retry loops. One can set the default timeout for all transactions by calling |timeout-database-option|.

 .. |option-next-write-no-write-conflict-range-blurb| replace::

@ -421,6 +425,10 @@

    Care needs to be taken when using this option on a transaction that is shared between multiple threads. When setting this option, write conflict ranges will be disabled on the next write operation, regardless of what thread it is on.

+..  |option-set-transaction-logging-max-field-length-blurb| replace::
+
+    Sets the maximum escaped length of key and value fields to be logged to the trace file via the LOG_TRANSACTION option, after which the field will be truncated. A negative value disables truncation. One can set the default max field length for all transactions by calling |transaction-logging-max-field-length-database-option|.
+
 .. |future-blurb1| replace::
    Many FoundationDB API functions return "future" objects. A brief overview of futures is included in the :doc:`class scheduling tutorial <class-scheduling>`. Most future objects behave just like a normal object, but block when you use them for the first time if the asynchronous function which returned the future has not yet completed its action. A future object is considered ready when either a value is available, or when an error has occurred.

--- a/documentation/sphinx/source/api-python.rst
+++ b/documentation/sphinx/source/api-python.rst
@ -25,6 +25,7 @@
 .. |timeout-database-option| replace:: :func:`Database.options.set_transaction_timeout`
 .. |max-retry-delay-database-option| replace:: :func:`Database.options.set_transaction_max_retry_delay`
 .. |transaction-size-limit-database-option| replace:: :func:`Database.options.set_transaction_size_limit`
+.. |transaction-logging-max-field-length-database-option| replace:: :func:`Database.options.set_transaction_logging_max_field_length`
 .. |snapshot-ryw-enable-database-option| replace:: :func:`Database.options.set_snapshot_ryw_enable`
 .. |snapshot-ryw-disable-database-option| replace:: :func:`Database.options.set_snapshot_ryw_disable`
 .. |future-type-string| replace:: a :ref:`future <api-python-future>`
@ -35,6 +36,7 @@
 .. |size-limit-transaction-option| replace:: :func:`Transaction.options.set_size_limit`
 .. |snapshot-ryw-enable-transaction-option| replace:: :func:`Transaction.options.set_snapshot_ryw_enable`
 .. |snapshot-ryw-disable-transaction-option| replace:: :func:`Transaction.options.set_snapshot_ryw_disable`
+.. |transaction-logging-max-field-length-transaction-option| replace:: :func:`Transaction.options.set_transaction_logging_max_field_length`
 .. |lazy-iterator-object| replace:: generator
 .. |key-meth| replace:: :meth:`Subspace.key`
 .. |directory-subspace| replace:: :ref:`DirectorySubspace <api-python-directory-subspace>`
@ -384,6 +386,10 @@ Database options

    |option-db-tr-size-limit-blurb|

+.. method:: Database.options.set_transaction_logging_max_field_length(size_limit)
+
+    |option-db-tr-transaction-logging-max-field-length-blurb|
+
 .. method:: Database.options.set_snapshot_ryw_enable()

    |option-db-snapshot-ryw-enable-blurb|
@ -855,6 +861,10 @@ Transaction options

    |option-set-timeout-blurb3|

+.. method:: Transaction.options.set_transaction_logging_max_field_length(size_limit)
+
+    |option-set-transaction-logging-max-field-length-blurb|
+
 .. _api-python-future:

 Future objects
--- a/documentation/sphinx/source/api-ruby.rst
+++ b/documentation/sphinx/source/api-ruby.rst
@ -25,6 +25,7 @@
 .. |transaction-size-limit-database-option| replace:: :func:`Database.options.set_transaction_size_limit`
 .. |snapshot-ryw-enable-database-option| replace:: :meth:`Database.options.set_snapshot_ryw_enable`
 .. |snapshot-ryw-disable-database-option| replace:: :meth:`Database.options.set_snapshot_ryw_disable`
+.. |transaction-logging-max-field-length-database-option| replace:: :meth:`Database.options.set_transaction_logging_max_field_length`
 .. |future-type-string| replace:: a :class:`Future`
 .. |read-your-writes-disable-option| replace:: :meth:`Transaction.options.set_read_your_writes_disable`
 .. |retry-limit-transaction-option| replace:: :meth:`Transaction.options.set_retry_limit`
@ -33,6 +34,7 @@
 .. |size-limit-transaction-option| replace:: :meth:`Transaction.options.set_size_limit`
 .. |snapshot-ryw-enable-transaction-option| replace:: :meth:`Transaction.options.set_snapshot_ryw_enable`
 .. |snapshot-ryw-disable-transaction-option| replace:: :meth:`Transaction.options.set_snapshot_ryw_disable`
+.. |transaction-logging-max-field-length-transaction-option| replace:: :meth:`Transaction.options.set_transaction_logging_max_field_length`
 .. |lazy-iterator-object| replace:: :class:`Enumerator`
 .. |key-meth| replace:: :meth:`Subspace.key`
 .. |directory-subspace| replace:: :class:`DirectorySubspace`
@ -380,6 +382,10 @@ Database options

    |option-db-tr-size-limit-blurb|

+.. method:: Database.options.set_transaction_logging_max_field_length(size_limit) -> nil
+
+    |option-db-tr-transaction-logging-max-field-length-blurb|
+
 .. method:: Database.options.set_snapshot_ryw_enable() -> nil

    |option-db-snapshot-ryw-enable-blurb|
@ -797,6 +803,10 @@ Transaction options

    |option-set-timeout-blurb3|

+.. method:: Transaction.options.set_transaction_logging_max_field_length(size_limit) -> nil
+
+    |option-set-transaction-logging-max-field-length-blurb|
+
 .. _transact:

 The transact method
--- a/documentation/sphinx/source/configuration.rst
+++ b/documentation/sphinx/source/configuration.rst
@ -316,19 +316,19 @@ Single datacenter modes
 +==============================+==+=================+=================+================+
 | Best for                     |  | 1-2 machines    | 3-4 machines    | 5+ machines    | 
 +------------------------------+--+-----------------+-----------------+----------------+
-| Replication                  |  | 1 copy          | 2 copy          | 3 copy         |
+| Total Replicas               |  | 1 copy          | 2 copies        | 3 copies       |
 +------------------------------+--+-----------------+-----------------+----------------+
-| # live machines              |  |                 |                 |                |
+| Live machines required       |  |                 |                 |                |
 | to make progress             |  | 1               | 2               | 3              |
 +------------------------------+--+-----------------+-----------------+----------------+
-| Minimum # of machines        |  |                 |                 |                |
+| Required machines            |  |                 |                 |                |
 | for fault tolerance          |  | impossible      | 3               | 4              |
 +------------------------------+--+-----------------+-----------------+----------------+
-| Ideal # of                   |  |                 |                 |                |
+| Ideal number of              |  |                 |                 |                |
 | coordination servers         |  | 1               | 3               | 5              |
 +------------------------------+--+-----------------+-----------------+----------------+
-| # simultaneous failures      |  |                 |                 |                |
-| after which data may be lost |  | any machine     | 2+ machines     | 3+ machines    |
+| Simultaneous failures        |  |                 |                 |                |
+| after which data may be lost |  | any process     | 2+ machines     | 3+ machines    |
 +------------------------------+--+-----------------+-----------------+----------------+

 In the three single datacenter redundancy modes, FoundationDB replicates data across the required number of machines in the cluster, but without aiming for datacenter redundancy. Although machines may be placed in more than one datacenter, the cluster will not be tolerant of datacenter-correlated failures. 
--- a/documentation/sphinx/source/data-modeling.rst
+++ b/documentation/sphinx/source/data-modeling.rst
@ -51,6 +51,8 @@
 .. |max-retry-delay-database-option| replace:: FIXME
 .. |transaction-size-limit-database-option| replace:: FIXME
 .. |timeout-database-option| replace:: FIXME
+.. |transaction-logging-max-field-length-transaction-option| replace:: FIXME
+.. |transaction-logging-max-field-length-database-option| replace:: FIXME

 .. include:: api-common.rst.inc

--- a/documentation/sphinx/source/developer-guide.rst
+++ b/documentation/sphinx/source/developer-guide.rst
@ -51,6 +51,8 @@
 .. |max-retry-delay-database-option| replace:: FIXME
 .. |transaction-size-limit-database-option| replace:: FIXME
 .. |timeout-database-option| replace:: FIXME
+.. |transaction-logging-max-field-length-transaction-option| replace:: FIXME
+.. |transaction-logging-max-field-length-database-option| replace:: FIXME

 .. include:: api-common.rst.inc

--- a/documentation/sphinx/source/downloads.rst
+++ b/documentation/sphinx/source/downloads.rst
@ -10,38 +10,38 @@ macOS

 The macOS installation package is supported on macOS 10.7+. It includes the client and (optionally) the server.

-* `FoundationDB-6.1.10.pkg <https://www.foundationdb.org/downloads/6.1.10/macOS/installers/FoundationDB-6.1.10.pkg>`_
+* `FoundationDB-6.1.11.pkg <https://www.foundationdb.org/downloads/6.1.11/macOS/installers/FoundationDB-6.1.11.pkg>`_

 Ubuntu
 ------

 The Ubuntu packages are supported on 64-bit Ubuntu 12.04+, but beware of the Linux kernel bug in Ubuntu 12.x.

-* `foundationdb-clients-6.1.10-1_amd64.deb <https://www.foundationdb.org/downloads/6.1.10/ubuntu/installers/foundationdb-clients_6.1.10-1_amd64.deb>`_
-* `foundationdb-server-6.1.10-1_amd64.deb <https://www.foundationdb.org/downloads/6.1.10/ubuntu/installers/foundationdb-server_6.1.10-1_amd64.deb>`_ (depends on the clients package)
+* `foundationdb-clients-6.1.11-1_amd64.deb <https://www.foundationdb.org/downloads/6.1.11/ubuntu/installers/foundationdb-clients_6.1.11-1_amd64.deb>`_
+* `foundationdb-server-6.1.11-1_amd64.deb <https://www.foundationdb.org/downloads/6.1.11/ubuntu/installers/foundationdb-server_6.1.11-1_amd64.deb>`_ (depends on the clients package)

 RHEL/CentOS EL6
 ---------------

 The RHEL/CentOS EL6 packages are supported on 64-bit RHEL/CentOS 6.x.

-* `foundationdb-clients-6.1.10-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.10/rhel6/installers/foundationdb-clients-6.1.10-1.el6.x86_64.rpm>`_
-* `foundationdb-server-6.1.10-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.10/rhel6/installers/foundationdb-server-6.1.10-1.el6.x86_64.rpm>`_ (depends on the clients package)
+* `foundationdb-clients-6.1.11-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.11/rhel6/installers/foundationdb-clients-6.1.11-1.el6.x86_64.rpm>`_
+* `foundationdb-server-6.1.11-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.11/rhel6/installers/foundationdb-server-6.1.11-1.el6.x86_64.rpm>`_ (depends on the clients package)

 RHEL/CentOS EL7
 ---------------

 The RHEL/CentOS EL7 packages are supported on 64-bit RHEL/CentOS 7.x.

-* `foundationdb-clients-6.1.10-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.10/rhel7/installers/foundationdb-clients-6.1.10-1.el7.x86_64.rpm>`_
-* `foundationdb-server-6.1.10-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.10/rhel7/installers/foundationdb-server-6.1.10-1.el7.x86_64.rpm>`_ (depends on the clients package)
+* `foundationdb-clients-6.1.11-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.11/rhel7/installers/foundationdb-clients-6.1.11-1.el7.x86_64.rpm>`_
+* `foundationdb-server-6.1.11-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.11/rhel7/installers/foundationdb-server-6.1.11-1.el7.x86_64.rpm>`_ (depends on the clients package)

 Windows
 -------

 The Windows installer is supported on 64-bit Windows XP and later. It includes the client and (optionally) the server.

-* `foundationdb-6.1.10-x64.msi <https://www.foundationdb.org/downloads/6.1.10/windows/installers/foundationdb-6.1.10-x64.msi>`_
+* `foundationdb-6.1.11-x64.msi <https://www.foundationdb.org/downloads/6.1.11/windows/installers/foundationdb-6.1.11-x64.msi>`_

 API Language Bindings
 =====================
@ -58,18 +58,18 @@ On macOS and Windows, the FoundationDB Python API bindings are installed as part

 If you need to use the FoundationDB Python API from other Python installations or paths, download the Python package:

-* `foundationdb-6.1.10.tar.gz <https://www.foundationdb.org/downloads/6.1.10/bindings/python/foundationdb-6.1.10.tar.gz>`_
+* `foundationdb-6.1.11.tar.gz <https://www.foundationdb.org/downloads/6.1.11/bindings/python/foundationdb-6.1.11.tar.gz>`_

 Ruby 1.9.3/2.0.0+
 -----------------

-* `fdb-6.1.10.gem <https://www.foundationdb.org/downloads/6.1.10/bindings/ruby/fdb-6.1.10.gem>`_
+* `fdb-6.1.11.gem <https://www.foundationdb.org/downloads/6.1.11/bindings/ruby/fdb-6.1.11.gem>`_

 Java 8+
 -------

-* `fdb-java-6.1.10.jar <https://www.foundationdb.org/downloads/6.1.10/bindings/java/fdb-java-6.1.10.jar>`_
-* `fdb-java-6.1.10-javadoc.jar <https://www.foundationdb.org/downloads/6.1.10/bindings/java/fdb-java-6.1.10-javadoc.jar>`_
+* `fdb-java-6.1.11.jar <https://www.foundationdb.org/downloads/6.1.11/bindings/java/fdb-java-6.1.11.jar>`_
+* `fdb-java-6.1.11-javadoc.jar <https://www.foundationdb.org/downloads/6.1.11/bindings/java/fdb-java-6.1.11-javadoc.jar>`_

 Go 1.11+
 --------
--- a/documentation/sphinx/source/mr-status-json-schemas.rst.inc
+++ b/documentation/sphinx/source/mr-status-json-schemas.rst.inc
@ -36,6 +36,7 @@
            "roles":[
               {
                  "query_queue_max":0,
+                  "local_rate":0,
                  "input_bytes":{
                     "hz":0.0,
                     "counter":0,
@ -187,7 +188,8 @@
               "megabits_received":{
                  "hz":0.0
               }
-            }
+            },
+            "run_loop_busy":0.2 // fraction of time the run loop was busy
         }
      },
      "old_logs":[
@ -229,7 +231,8 @@
                  "storage_server_min_free_space",
                  "storage_server_min_free_space_ratio",
                  "log_server_min_free_space",
-                  "log_server_min_free_space_ratio"
+                  "log_server_min_free_space_ratio",
+                  "storage_server_durability_lag"
               ]
            },
            "description":"The database is not being saturated by the workload."
@ -248,7 +251,8 @@
                  "storage_server_min_free_space",
                  "storage_server_min_free_space_ratio",
                  "log_server_min_free_space",
-                  "log_server_min_free_space_ratio"
+                  "log_server_min_free_space_ratio",
+                  "storage_server_durability_lag"
               ]
            },
            "description":"The database is not being saturated by the workload."
@ -264,7 +268,10 @@
      },
      "incompatible_connections":[
      ],
-      "datacenter_version_difference":0,
+      "datacenter_lag":{
+         "seconds":1.0,
+         "versions":1000000
+      },
      "degraded_processes":0,
      "database_available":true,
      "database_locked":false,
@ -293,6 +300,10 @@
             }
         ]
      },
+      "page_cache":{
+         "log_hit_rate":0.5,
+         "storage_hit_rate":0.5
+      },
      "messages":[
         {
            "reasons":[
@ -407,6 +418,21 @@
               "counter":0,
               "roughness":0.0
            },
+            "started_immediate_priority":{
+               "hz":0.0,
+               "counter":0,
+               "roughness":0.0
+            },
+            "started_default_priority":{
+               "hz":0.0,
+               "counter":0,
+               "roughness":0.0
+            },
+            "started_batch_priority":{
+               "hz":0.0,
+               "counter":0,
+               "roughness":0.0
+            },
            "conflicted":{
               "hz":0.0,
               "counter":0,
--- a/documentation/sphinx/source/mr-status.rst
+++ b/documentation/sphinx/source/mr-status.rst
@ -128,4 +128,5 @@ min_free_space                      Running out of space (approaching 100MB limi
 min_free_space_ratio                Running out of space (approaching 5% limit).
 log_server_min_free_space           Log server running out of space (approaching 100MB limit).
 log_server_min_free_space_ratio     Log server running out of space (approaching 5% limit).
+storage_server_durability_lag       Storage server durable version falling behind.
 =================================== ====================================================
--- a/documentation/sphinx/source/old-release-notes/release-notes-610.rst
+++ b/documentation/sphinx/source/old-release-notes/release-notes-610.rst
@ -2,6 +2,14 @@
 Release Notes
 #############

+6.1.11
+======
+
+Fixes
+-----
+
+* Machines which were added to a cluster immediately after the cluster was upgraded to 6.1 would not be given data. `(PR #1764) <https://github.com/apple/foundationdb/pull/1764>`_
+
 6.1.10
 ======

@ -174,4 +182,4 @@ Earlier release notes
 * :doc:`Beta 2 (API Version 22) </old-release-notes/release-notes-022>`
 * :doc:`Beta 1 (API Version 21) </old-release-notes/release-notes-021>`
 * :doc:`Alpha 6 (API Version 16) </old-release-notes/release-notes-016>`
-* :doc:`Alpha 5 (API Version 14) </old-release-notes/release-notes-014>`
+* :doc:`Alpha 5 (API Version 14) </old-release-notes/release-notes-014>`
--- a/documentation/sphinx/source/release-notes.rst
+++ b/documentation/sphinx/source/release-notes.rst
@ -14,16 +14,23 @@ Performance
 Fixes
 -----

+* During an upgrade, the multi-version client now persists database default options and transaction options that aren't reset on retry (e.g. transaction timeout). In order for these options to function correctly during an upgrade, a 6.2 or later client should be used as the primary client. `(PR #1767) <https://github.com/apple/foundationdb/pull/1767>`_.
 * If a cluster is upgraded during an ``onError`` call, the cluster could return a ``cluster_version_changed`` error. `(PR #1734) <https://github.com/apple/foundationdb/pull/1734>`_.

 Status
 ------

+* Added ``run_loop_busy`` to the ``processes`` section to record the fraction of time the run loop is busy. `(PR #1760) <https://github.com/apple/foundationdb/pull/1760>`_.
+* Added ``cluster.page_cache`` section to status. In this section, added two new statistics ``storage_hit_rate`` and ``log_hit_rate`` that indicate the fraction of recent page reads that were served by cache. `(PR #1823) <https://github.com/apple/foundationdb/pull/1823>`_.
+* Added transaction start counts by priority to ``cluster.workload.transactions``. The new counters are named ``started_immediate_priority``, ``started_default_priority``, and ``started_batch_priority``. `(PR #1836) <https://github.com/apple/foundationdb/pull/1836>`_.
+* Remove ``cluster.datacenter_version_difference`` and replace it with ``cluster.datacenter_lag`` that has subfields ``versions`` and ``seconds``. `(PR #1800) <https://github.com/apple/foundationdb/pull/1800>`_.
+
 Bindings
 --------

 * Go: The Go bindings now require Go version 1.11 or later.
 * Go: Fix issue with finalizers running too early that could lead to undefined behavior. `(PR #1451) <https://github.com/apple/foundationdb/pull/1451>`_.
+* Added transaction option to control the field length of keys and values in debug transaction logging in order to avoid truncation. `(PR #1844) <https://github.com/apple/foundationdb/pull/1844>`_.

 Other Changes
 -------------
--- a/fdbbackup/backup.actor.cpp
+++ b/fdbbackup/backup.actor.cpp
@ -3194,11 +3194,14 @@ int main(int argc, char* argv[]) {
 		}

 		TraceEvent("ProgramStart")
+			.setMaxEventLength(12000)
 			.detail("SourceVersion", getHGVersion())
 			.detail("Version", FDB_VT_VERSION )
 			.detail("PackageName", FDB_VT_PACKAGE_NAME)
 			.detailf("ActualTime", "%lld", DEBUG_DETERMINISM ? 0 : time(NULL))
+			.setMaxFieldLength(10000)
 			.detail("CommandLine", commandLine)
+			.setMaxFieldLength(0)
 			.detail("MemoryLimit", memLimit)
 			.trackLatest("ProgramStart");

@ -3244,7 +3247,7 @@ int main(int argc, char* argv[]) {
 			}

 			try {
-				db = Database::createDatabase(ccf, -1, localities);
+				db = Database::createDatabase(ccf, -1, true, localities);
 			}
 			catch (Error& e) {
 				fprintf(stderr, "ERROR: %s\n", e.what());
@ -3266,7 +3269,7 @@ int main(int argc, char* argv[]) {
 			}

 			try {
-				sourceDb = Database::createDatabase(sourceCcf, -1, localities);
+				sourceDb = Database::createDatabase(sourceCcf, -1, true, localities);
 			}
 			catch (Error& e) {
 				fprintf(stderr, "ERROR: %s\n", e.what());
--- a/fdbcli/fdbcli.actor.cpp
+++ b/fdbcli/fdbcli.actor.cpp
@ -59,31 +59,44 @@ extern const char* getHGVersion();

 std::vector<std::string> validOptions;

-enum { OPT_CONNFILE, OPT_DATABASE, OPT_HELP, OPT_TRACE, OPT_TRACE_DIR, OPT_TIMEOUT, OPT_EXEC, OPT_NO_STATUS, OPT_STATUS_FROM_JSON, OPT_VERSION, OPT_TRACE_FORMAT };
+enum {
+	OPT_CONNFILE,
+	OPT_DATABASE,
+	OPT_HELP,
+	OPT_TRACE,
+	OPT_TRACE_DIR,
+	OPT_TIMEOUT,
+	OPT_EXEC,
+	OPT_NO_STATUS,
+	OPT_STATUS_FROM_JSON,
+	OPT_VERSION,
+	OPT_TRACE_FORMAT,
+	OPT_USE_OBJECT_SERIALIZER
+};

-CSimpleOpt::SOption g_rgOptions[] = {
-	{ OPT_CONNFILE, "-C", SO_REQ_SEP },
-	{ OPT_CONNFILE, "--cluster_file", SO_REQ_SEP },
-	{ OPT_DATABASE, "-d", SO_REQ_SEP },
-	{ OPT_TRACE, "--log", SO_NONE },
-	{ OPT_TRACE_DIR, "--log-dir", SO_REQ_SEP },
-	{ OPT_TIMEOUT, "--timeout", SO_REQ_SEP },
-	{ OPT_EXEC, "--exec", SO_REQ_SEP },
-	{ OPT_NO_STATUS, "--no-status", SO_NONE },
-	{ OPT_HELP, "-?", SO_NONE },
-	{ OPT_HELP, "-h", SO_NONE },
-	{ OPT_HELP, "--help", SO_NONE },
-	{ OPT_STATUS_FROM_JSON, "--status-from-json", SO_REQ_SEP },
-	{ OPT_VERSION,         "--version",        SO_NONE },
-	{ OPT_VERSION,         "-v",               SO_NONE },
-	{ OPT_TRACE_FORMAT, "--trace_format", SO_REQ_SEP },
+CSimpleOpt::SOption g_rgOptions[] = { { OPT_CONNFILE, "-C", SO_REQ_SEP },
+	                                  { OPT_CONNFILE, "--cluster_file", SO_REQ_SEP },
+	                                  { OPT_DATABASE, "-d", SO_REQ_SEP },
+	                                  { OPT_TRACE, "--log", SO_NONE },
+	                                  { OPT_TRACE_DIR, "--log-dir", SO_REQ_SEP },
+	                                  { OPT_TIMEOUT, "--timeout", SO_REQ_SEP },
+	                                  { OPT_EXEC, "--exec", SO_REQ_SEP },
+	                                  { OPT_NO_STATUS, "--no-status", SO_NONE },
+	                                  { OPT_HELP, "-?", SO_NONE },
+	                                  { OPT_HELP, "-h", SO_NONE },
+	                                  { OPT_HELP, "--help", SO_NONE },
+	                                  { OPT_STATUS_FROM_JSON, "--status-from-json", SO_REQ_SEP },
+	                                  { OPT_VERSION, "--version", SO_NONE },
+	                                  { OPT_VERSION, "-v", SO_NONE },
+	                                  { OPT_TRACE_FORMAT, "--trace_format", SO_REQ_SEP },
+	                                  { OPT_USE_OBJECT_SERIALIZER, "-S", SO_REQ_SEP },
+	                                  { OPT_USE_OBJECT_SERIALIZER, "--object-serializer", SO_REQ_SEP },

 #ifndef TLS_DISABLED
-	TLS_OPTION_FLAGS
+	                                  TLS_OPTION_FLAGS
 #endif

-	SO_END_OF_OPTIONS
-};
+	                                      SO_END_OF_OPTIONS };

 void printAtCol(const char* text, int col) {
 	const char* iter = text;
@ -168,7 +181,7 @@ public:
 private:
 	//Sets a transaction option. If intrans == true, then this option is also applied to the passed in transaction.
 	void setTransactionOption(Reference<ReadYourWritesTransaction> tr, FDBTransactionOptions::Option option, bool enabled, Optional<StringRef> arg, bool intrans) {
-		if(enabled && arg.present() != FDBTransactionOptions::optionInfo[option].hasParameter)	{
+		if(enabled && arg.present() != FDBTransactionOptions::optionInfo.getMustExist(option).hasParameter)	{
 			printf("ERROR: option %s a parameter\n", arg.present() ? "did not expect" : "expected");
 			throw invalid_option_value();
 		}
@ -224,7 +237,7 @@ private:

 		//Returns true if the specified option is documented
 		bool isDocumented(typename T::Option option) {
-			FDBOptionInfo info = T::optionInfo[option];
+			FDBOptionInfo info = T::optionInfo.getMustExist(option);

 			std::string deprecatedStr = "Deprecated";
 			return !info.comment.empty() && info.comment.substr(0, deprecatedStr.size()) != deprecatedStr;
@ -246,7 +259,7 @@ private:
 		void printHelpString() {
 			for(auto itr = legalOptions.begin(); itr != legalOptions.end(); ++itr) {
 				if(isDocumented(itr->second)) {
-					FDBOptionInfo info = T::optionInfo[itr->second];
+					FDBOptionInfo info = T::optionInfo.getMustExist(itr->second);
 					std::string helpStr = info.name + " - " + info.comment;
 					if(info.hasParameter)
 						helpStr += " " + info.parameterComment;
@ -401,21 +414,25 @@ static void printProgramUsage(const char* name) {
 		   "                 FDB_CLUSTER_FILE environment variable, then `./fdb.cluster',\n"
 		   "                 then `%s'.\n", platform::getDefaultClusterFilePath().c_str());
 	printf("  --log          Enables trace file logging for the CLI session.\n"
-		   "  --log-dir PATH Specifes the output directory for trace files. If\n"
-		   "                 unspecified, defaults to the current directory. Has\n"
-		   "                 no effect unless --log is specified.\n"
-		   "  --trace_format FORMAT\n"
-		   "                 Select the format of the log files. xml (the default) and json\n"
-		   "                 are supported. Has no effect unless --log is specified.\n"
-		   "  --exec CMDS    Immediately executes the semicolon separated CLI commands\n"
-		   "                 and then exits.\n"
-		   "  --no-status    Disables the initial status check done when starting\n"
-		   "                 the CLI.\n"
+	       "  --log-dir PATH Specifes the output directory for trace files. If\n"
+	       "                 unspecified, defaults to the current directory. Has\n"
+	       "                 no effect unless --log is specified.\n"
+	       "  --trace_format FORMAT\n"
+	       "                 Select the format of the log files. xml (the default) and json\n"
+	       "                 are supported. Has no effect unless --log is specified.\n"
+	       "  -S ON|OFF, --object-serializer ON|OFF\n"
+	       "                 Use object serializer for sending messages. The object serializer\n"
+	       "                 is currently a beta feature and it allows fdb processes to talk to\n"
+	       "                 each other even if they don't have the same version\n"
+	       "  --exec CMDS    Immediately executes the semicolon separated CLI commands\n"
+	       "                 and then exits.\n"
+	       "  --no-status    Disables the initial status check done when starting\n"
+	       "                 the CLI.\n"
 #ifndef TLS_DISABLED
-		   TLS_HELP
+	       TLS_HELP
 #endif
-		   "  -v, --version  Print FoundationDB CLI version information and exit.\n"
-		   "  -h, --help     Display this help and exit.\n");
+	       "  -v, --version  Print FoundationDB CLI version information and exit.\n"
+	       "  -h, --help     Display this help and exit.\n");
 }


@ -2332,6 +2349,7 @@ struct CLIOptions {
 	bool trace;
 	std::string traceDir;
 	std::string traceFormat;
+	bool useObjectSerializer = false;
 	int exit_timeout;
 	Optional<std::string> exec;
 	bool initialStatusCheck;
@ -2403,41 +2421,55 @@ struct CLIOptions {

 #ifndef TLS_DISABLED
 			// TLS Options
-			case TLSOptions::OPT_TLS_PLUGIN:
-				args.OptionArg();
-				break;
-			case TLSOptions::OPT_TLS_CERTIFICATES:
-				tlsCertPath = args.OptionArg();
-				break;
-			case TLSOptions::OPT_TLS_CA_FILE:
-				tlsCAPath = args.OptionArg();
-				break;
-			case TLSOptions::OPT_TLS_KEY:
-				tlsKeyPath = args.OptionArg();
-				break;
-			case TLSOptions::OPT_TLS_PASSWORD:
-				tlsPassword = args.OptionArg();
-				break;
-			case TLSOptions::OPT_TLS_VERIFY_PEERS:
-				tlsVerifyPeers = args.OptionArg();
-				break;
+		    case TLSOptions::OPT_TLS_PLUGIN:
+			    args.OptionArg();
+			    break;
+		    case TLSOptions::OPT_TLS_CERTIFICATES:
+			    tlsCertPath = args.OptionArg();
+			    break;
+		    case TLSOptions::OPT_TLS_CA_FILE:
+			    tlsCAPath = args.OptionArg();
+			    break;
+		    case TLSOptions::OPT_TLS_KEY:
+			    tlsKeyPath = args.OptionArg();
+			    break;
+		    case TLSOptions::OPT_TLS_PASSWORD:
+			    tlsPassword = args.OptionArg();
+			    break;
+		    case TLSOptions::OPT_TLS_VERIFY_PEERS:
+			    tlsVerifyPeers = args.OptionArg();
+			    break;
 #endif
-			case OPT_HELP:
-				printProgramUsage(program_name.c_str());
-				return 0;
-			case OPT_STATUS_FROM_JSON:
-				return printStatusFromJSON(args.OptionArg());
-			case OPT_TRACE_FORMAT:
-				if (!validateTraceFormat(args.OptionArg())) {
-					fprintf(stderr, "WARNING: Unrecognized trace format `%s'\n", args.OptionArg());
-				}
-				traceFormat = args.OptionArg();
-				break;
-			case OPT_VERSION:
-				printVersion();
-				return FDB_EXIT_SUCCESS;
-		}
-		return -1;
+		    case OPT_HELP:
+			    printProgramUsage(program_name.c_str());
+			    return 0;
+		    case OPT_STATUS_FROM_JSON:
+			    return printStatusFromJSON(args.OptionArg());
+		    case OPT_TRACE_FORMAT:
+			    if (!validateTraceFormat(args.OptionArg())) {
+				    fprintf(stderr, "WARNING: Unrecognized trace format `%s'\n", args.OptionArg());
+			    }
+			    traceFormat = args.OptionArg();
+			    break;
+		    case OPT_USE_OBJECT_SERIALIZER: {
+			    std::string s = args.OptionArg();
+			    std::transform(s.begin(), s.end(), s.begin(), ::tolower);
+			    if (s == "on" || s == "true" || s == "1") {
+				    useObjectSerializer = true;
+			    } else if (s == "off" || s == "false" || s == "0") {
+				    useObjectSerializer = false;
+			    } else {
+				    fprintf(stderr, "ERROR: Could not parse object serializer option: `%s'\n", s.c_str());
+				    printProgramUsage(program_name.c_str());
+				    flushAndExit(FDB_EXIT_ERROR);
+			    }
+			    break;
+		    }
+		    case OPT_VERSION:
+			    printVersion();
+			    return FDB_EXIT_SUCCESS;
+		    }
+		    return -1;
 	}
 };

@ -2484,7 +2516,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
 	TraceEvent::setNetworkThread();

 	try {
-		db = Database::createDatabase(ccf, -1);
+		db = Database::createDatabase(ccf, -1, false);
 		if (!opt.exec.present()) {
 			printf("Using cluster file `%s'.\n", ccf->getFilename().c_str());
 		}
@ -2497,12 +2529,14 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {

 	if (opt.trace) {
 		TraceEvent("CLIProgramStart")
+			.setMaxEventLength(12000)
 			.detail("SourceVersion", getHGVersion())
 			.detail("Version", FDB_VT_VERSION)
 			.detail("PackageName", FDB_VT_PACKAGE_NAME)
 			.detailf("ActualTime", "%lld", DEBUG_DETERMINISM ? 0 : time(NULL))
 			.detail("ClusterFile", ccf->getFilename().c_str())
 			.detail("ConnectionString", ccf->getConnectionString().toString())
+			.setMaxFieldLength(10000)
 			.detail("CommandLine", opt.commandLine)
 			.trackLatest("ProgramStart");
 	}
@ -3490,6 +3524,11 @@ int main(int argc, char **argv) {
 		}
 		setNetworkOption(FDBNetworkOptions::ENABLE_SLOW_TASK_PROFILING);
 	}
+	// The USE_OBJECT_SERIALIZER network option expects an 8 byte little endian integer which is interpreted as zero =
+	// false, non-zero = true.
+	setNetworkOption(FDBNetworkOptions::USE_OBJECT_SERIALIZER,
+	                 opt.useObjectSerializer ? LiteralStringRef("\x01\x00\x00\x00\x00\x00\x00\x00")
+	                                         : LiteralStringRef("\x00\x00\x00\x00\x00\x00\x00\x00"));

 	initHelp();

--- a/fdbclient/BackupAgentBase.actor.cpp
+++ b/fdbclient/BackupAgentBase.actor.cpp
@ -419,7 +419,7 @@ ACTOR Future<Void> readCommitted(Database cx, PromiseStream<RangeResultWithVersi

 			//add lock
 			releaser.release();
-			wait(lock->take(TaskDefaultYield, limits.bytes + CLIENT_KNOBS->VALUE_SIZE_LIMIT + CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT));
+			wait(lock->take(TaskPriority::DefaultYield, limits.bytes + CLIENT_KNOBS->VALUE_SIZE_LIMIT + CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT));
 			releaser = FlowLock::Releaser(*lock, limits.bytes + CLIENT_KNOBS->VALUE_SIZE_LIMIT + CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT);

 			state Standalone<RangeResultRef> values = wait(tr.getRange(begin, end, limits));
@ -495,7 +495,7 @@ ACTOR Future<Void> readCommitted(Database cx, PromiseStream<RCGroup> results, Fu
 			//add lock
 			wait(active);
 			releaser.release();
-			wait(lock->take(TaskDefaultYield, rangevalue.expectedSize() + rcGroup.items.expectedSize()));
+			wait(lock->take(TaskPriority::DefaultYield, rangevalue.expectedSize() + rcGroup.items.expectedSize()));
 			releaser = FlowLock::Releaser(*lock, rangevalue.expectedSize() + rcGroup.items.expectedSize());

 			for (auto & s : rangevalue){
@ -613,7 +613,7 @@ ACTOR Future<int> dumpData(Database cx, PromiseStream<RCGroup> results, Referenc
 		req.flags = req.flags | CommitTransactionRequest::FLAG_IS_LOCK_AWARE;

 		totalBytes += mutationSize;
-		wait( commitLock->take(TaskDefaultYield, mutationSize) );
+		wait( commitLock->take(TaskPriority::DefaultYield, mutationSize) );
 		addActor.send( commitLock->releaseWhen( success(commit.getReply(req)), mutationSize ) );

 		if(endOfStream) {
@ -653,7 +653,7 @@ ACTOR Future<Void> coalesceKeyVersionCache(Key uid, Version endVersion, Referenc
 		req.transaction.read_snapshot = committedVersion->get();
 		req.flags = req.flags | CommitTransactionRequest::FLAG_IS_LOCK_AWARE;

-		wait( commitLock->take(TaskDefaultYield, mutationSize) );
+		wait( commitLock->take(TaskPriority::DefaultYield, mutationSize) );
 		addActor.send( commitLock->releaseWhen( success(commit.getReply(req)), mutationSize ) );
 	}

@ -671,7 +671,7 @@ ACTOR Future<Void> applyMutations(Database cx, Key uid, Key addPrefix, Key remov
 	try {
 		loop {
 			if(beginVersion >= *endVersion) {
-				wait( commitLock.take(TaskDefaultYield, CLIENT_KNOBS->BACKUP_LOCK_BYTES) );
+				wait( commitLock.take(TaskPriority::DefaultYield, CLIENT_KNOBS->BACKUP_LOCK_BYTES) );
 				commitLock.release(CLIENT_KNOBS->BACKUP_LOCK_BYTES);
 				if(beginVersion >= *endVersion) {
 					return Void();
--- a/fdbclient/CMakeLists.txt
+++ b/fdbclient/CMakeLists.txt
@ -8,7 +8,6 @@ set(FDBCLIENT_SRCS
  BackupContainer.actor.cpp
  BackupContainer.h
  BlobStore.actor.cpp
-  ClientDBInfo.h
  ClientLogEvents.h
  ClientWorkerInterface.h
  ClusterInterface.h
--- a/fdbclient/ClientDBInfo.h
+++ b/fdbclient/ClientDBInfo.h
@ -1,49 +0,0 @@
-/*
- * ClientDBInfo.h
- *
- * This source file is part of the FoundationDB open source project
- *
- * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef FDBCLIENT_CLIENTDBINFO_H
-#define FDBCLIENT_CLIENTDBINFO_H
-#pragma once
-
-#include "fdbclient/MasterProxyInterface.h"
-
-// ClientDBInfo is all the information needed by a database client to access the database
-// It is returned (and kept up to date) by the OpenDatabaseRequest interface of ClusterInterface
-struct ClientDBInfo {
-	constexpr static FileIdentifier file_identifier = 5355080;
-	UID id;  // Changes each time anything else changes
-	vector< MasterProxyInterface > proxies;
-	double clientTxnInfoSampleRate;
-	int64_t clientTxnInfoSizeLimit;
-	ClientDBInfo() : clientTxnInfoSampleRate(std::numeric_limits<double>::infinity()), clientTxnInfoSizeLimit(-1) {}
-
-	bool operator == (ClientDBInfo const& r) const { return id == r.id; }
-	bool operator != (ClientDBInfo const& r) const { return id != r.id; }
-
-	template <class Archive>
-	void serialize(Archive& ar) {
-		if constexpr (!is_fb_function<Archive>) {
-			ASSERT(ar.protocolVersion().isValid());
-		}
-		serializer(ar, proxies, id, clientTxnInfoSampleRate, clientTxnInfoSizeLimit);
-	}
-};
-
-#endif
--- a/fdbclient/ClientLogEvents.h
+++ b/fdbclient/ClientLogEvents.h
@ -44,7 +44,7 @@ namespace FdbClientLogEvents {
 		EventType type{ EVENTTYPEEND };
 		double startTs{ 0 };

-		void logEvent(std::string id) const {}
+		void logEvent(std::string id, int maxFieldLength) const {}
 	};

 	struct EventGetVersion : public Event {
@ -60,8 +60,10 @@ namespace FdbClientLogEvents {

 		double latency;

-		void logEvent(std::string id) const {
-			TraceEvent("TransactionTrace_GetVersion").detail("TransactionID", id).detail("Latency", latency);
+		void logEvent(std::string id, int maxFieldLength) const {
+			TraceEvent("TransactionTrace_GetVersion")
+			.detail("TransactionID", id)
+			.detail("Latency", latency);
 		}
 	};

@ -80,8 +82,14 @@ namespace FdbClientLogEvents {
 		int valueSize;
 		Key key;

-		void logEvent(std::string id) const {
-			TraceEvent("TransactionTrace_Get").detail("TransactionID", id).detail("Latency", latency).detail("ValueSizeBytes", valueSize).detail("Key", key);
+		void logEvent(std::string id, int maxFieldLength) const {
+			TraceEvent("TransactionTrace_Get")
+			.setMaxEventLength(-1)
+			.detail("TransactionID", id)
+			.detail("Latency", latency)
+			.detail("ValueSizeBytes", valueSize)
+			.setMaxFieldLength(maxFieldLength)
+			.detail("Key", key);
 		}
 	};

@ -101,8 +109,15 @@ namespace FdbClientLogEvents {
 		Key startKey;
 		Key endKey;

-		void logEvent(std::string id) const {
-			TraceEvent("TransactionTrace_GetRange").detail("TransactionID", id).detail("Latency", latency).detail("RangeSizeBytes", rangeSize).detail("StartKey", startKey).detail("EndKey", endKey);
+		void logEvent(std::string id, int maxFieldLength) const {
+			TraceEvent("TransactionTrace_GetRange")
+			.setMaxEventLength(-1)
+			.detail("TransactionID", id)
+			.detail("Latency", latency)
+			.detail("RangeSizeBytes", rangeSize)
+			.setMaxFieldLength(maxFieldLength)
+			.detail("StartKey", startKey)
+			.detail("EndKey", endKey);
 		}
 	};

@ -122,20 +137,38 @@ namespace FdbClientLogEvents {
 		int commitBytes;
 		CommitTransactionRequest req; // Only CommitTransactionRef and Arena object within CommitTransactionRequest is serialized

-		void logEvent(std::string id) const {
+		void logEvent(std::string id, int maxFieldLength) const {
 			for (auto &read_range : req.transaction.read_conflict_ranges) {
-				TraceEvent("TransactionTrace_Commit_ReadConflictRange").detail("TransactionID", id).detail("Begin", read_range.begin).detail("End", read_range.end);
+				TraceEvent("TransactionTrace_Commit_ReadConflictRange")
+				.setMaxEventLength(-1)
+				.detail("TransactionID", id)
+				.setMaxFieldLength(maxFieldLength)
+				.detail("Begin", read_range.begin)
+				.detail("End", read_range.end);
 			}

 			for (auto &write_range : req.transaction.write_conflict_ranges) {
-				TraceEvent("TransactionTrace_Commit_WriteConflictRange").detail("TransactionID", id).detail("Begin", write_range.begin).detail("End", write_range.end);
+				TraceEvent("TransactionTrace_Commit_WriteConflictRange")
+				.setMaxEventLength(-1)
+				.detail("TransactionID", id)
+				.setMaxFieldLength(maxFieldLength)
+				.detail("Begin", write_range.begin)
+				.detail("End", write_range.end);
 			}

 			for (auto &mutation : req.transaction.mutations) {
-				TraceEvent("TransactionTrace_Commit_Mutation").detail("TransactionID", id).detail("Mutation", mutation.toString());
+				TraceEvent("TransactionTrace_Commit_Mutation")
+				.setMaxEventLength(-1)
+				.detail("TransactionID", id)
+				.setMaxFieldLength(maxFieldLength)
+				.detail("Mutation", mutation.toString());
 			}

-			TraceEvent("TransactionTrace_Commit").detail("TransactionID", id).detail("Latency", latency).detail("NumMutations", numMutations).detail("CommitSizeBytes", commitBytes);
+			TraceEvent("TransactionTrace_Commit")
+			.detail("TransactionID", id)
+			.detail("Latency", latency)
+			.detail("NumMutations", numMutations)
+			.detail("CommitSizeBytes", commitBytes);
 		}
 	};

@ -153,8 +186,13 @@ namespace FdbClientLogEvents {
 		int errCode;
 		Key key;

-		void logEvent(std::string id) const {
-			TraceEvent("TransactionTrace_GetError").detail("TransactionID", id).detail("ErrCode", errCode).detail("Key", key);
+		void logEvent(std::string id, int maxFieldLength) const {
+			TraceEvent("TransactionTrace_GetError")
+			.setMaxEventLength(-1)
+			.detail("TransactionID", id)
+			.detail("ErrCode", errCode)
+			.setMaxFieldLength(maxFieldLength)
+			.detail("Key", key);
 		}
 	};

@ -173,8 +211,14 @@ namespace FdbClientLogEvents {
 		Key startKey;
 		Key endKey;

-		void logEvent(std::string id) const {
-			TraceEvent("TransactionTrace_GetRangeError").detail("TransactionID", id).detail("ErrCode", errCode).detail("StartKey", startKey).detail("EndKey", endKey);
+		void logEvent(std::string id, int maxFieldLength) const {
+			TraceEvent("TransactionTrace_GetRangeError")
+			.setMaxEventLength(-1)
+			.detail("TransactionID", id)
+			.detail("ErrCode", errCode)
+			.setMaxFieldLength(maxFieldLength)
+			.detail("StartKey", startKey)
+			.detail("EndKey", endKey);
 		}
 	};

@ -192,20 +236,36 @@ namespace FdbClientLogEvents {
 		int errCode;
 		CommitTransactionRequest req; // Only CommitTransactionRef and Arena object within CommitTransactionRequest is serialized

-		void logEvent(std::string id) const {
+		void logEvent(std::string id, int maxFieldLength) const {
 			for (auto &read_range : req.transaction.read_conflict_ranges) {
-				TraceEvent("TransactionTrace_CommitError_ReadConflictRange").detail("TransactionID", id).detail("Begin", read_range.begin).detail("End", read_range.end);
+				TraceEvent("TransactionTrace_CommitError_ReadConflictRange")
+				.setMaxEventLength(-1)
+				.detail("TransactionID", id)
+				.setMaxFieldLength(maxFieldLength)
+				.detail("Begin", read_range.begin)
+				.detail("End", read_range.end);
 			}

 			for (auto &write_range : req.transaction.write_conflict_ranges) {
-				TraceEvent("TransactionTrace_CommitError_WriteConflictRange").detail("TransactionID", id).detail("Begin", write_range.begin).detail("End", write_range.end);
+				TraceEvent("TransactionTrace_CommitError_WriteConflictRange")
+				.setMaxEventLength(-1)
+				.detail("TransactionID", id)
+				.setMaxFieldLength(maxFieldLength)
+				.detail("Begin", write_range.begin)
+				.detail("End", write_range.end);
 			}

 			for (auto &mutation : req.transaction.mutations) {
-				TraceEvent("TransactionTrace_CommitError_Mutation").detail("TransactionID", id).detail("Mutation", mutation.toString());
+				TraceEvent("TransactionTrace_CommitError_Mutation")
+				.setMaxEventLength(-1)
+				.detail("TransactionID", id)
+				.setMaxFieldLength(maxFieldLength)
+				.detail("Mutation", mutation.toString());
 			}

-			TraceEvent("TransactionTrace_CommitError").detail("TransactionID", id).detail("ErrCode", errCode);
+			TraceEvent("TransactionTrace_CommitError")
+			.detail("TransactionID", id)
+			.detail("ErrCode", errCode);
 		}
 	};
 }
--- a/fdbclient/ClientWorkerInterface.h
+++ b/fdbclient/ClientWorkerInterface.h
@ -25,7 +25,7 @@
 #include "fdbclient/FDBTypes.h"
 #include "fdbrpc/FailureMonitor.h"
 #include "fdbclient/Status.h"
-#include "fdbclient/ClientDBInfo.h"
+#include "fdbclient/MasterProxyInterface.h"

 // Streams from WorkerInterface that are safe and useful to call from a client.
 // A ClientWorkerInterface is embedded as the first element of a WorkerInterface.
--- a/fdbclient/ClusterInterface.h
+++ b/fdbclient/ClusterInterface.h
@ -25,7 +25,7 @@
 #include "fdbclient/FDBTypes.h"
 #include "fdbrpc/FailureMonitor.h"
 #include "fdbclient/Status.h"
-#include "fdbclient/ClientDBInfo.h"
+#include "fdbclient/MasterProxyInterface.h"
 #include "fdbclient/ClientWorkerInterface.h"

 struct ClusterInterface {
@ -52,12 +52,12 @@ struct ClusterInterface {
 	}

 	void initEndpoints() {
-		openDatabase.getEndpoint( TaskClusterController );
-		failureMonitoring.getEndpoint( TaskFailureMonitor );
-		databaseStatus.getEndpoint( TaskClusterController );
-		ping.getEndpoint( TaskClusterController );
-		getClientWorkers.getEndpoint( TaskClusterController );
-		forceRecovery.getEndpoint( TaskClusterController );
+		openDatabase.getEndpoint( TaskPriority::ClusterController );
+		failureMonitoring.getEndpoint( TaskPriority::FailureMonitor );
+		databaseStatus.getEndpoint( TaskPriority::ClusterController );
+		ping.getEndpoint( TaskPriority::ClusterController );
+		getClientWorkers.getEndpoint( TaskPriority::ClusterController );
+		forceRecovery.getEndpoint( TaskPriority::ClusterController );
 	}

 	template <class Ar>
--- a/fdbclient/DatabaseContext.h
+++ b/fdbclient/DatabaseContext.h
@ -25,7 +25,6 @@
 #include "fdbclient/NativeAPI.actor.h"
 #include "fdbclient/KeyRangeMap.h"
 #include "fdbclient/MasterProxyInterface.h"
-#include "fdbclient/ClientDBInfo.h"
 #include "fdbrpc/QueueModel.h"
 #include "fdbrpc/MultiInterface.h"
 #include "flow/TDMetric.actor.h"
@ -54,11 +53,11 @@ public:

 	// For internal (fdbserver) use only
 	static Database create( Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface, Reference<ClusterConnectionFile> connFile, LocalityData const& clientLocality );
-	static Database create( Reference<AsyncVar<ClientDBInfo>> clientInfo, Future<Void> clientInfoMonitor, LocalityData clientLocality, bool enableLocalityLoadBalance, int taskID=TaskDefaultEndpoint, bool lockAware=false, int apiVersion=Database::API_VERSION_LATEST );
+	static Database create( Reference<AsyncVar<ClientDBInfo>> clientInfo, Future<Void> clientInfoMonitor, LocalityData clientLocality, bool enableLocalityLoadBalance, TaskPriority taskID=TaskPriority::DefaultEndpoint, bool lockAware=false, int apiVersion=Database::API_VERSION_LATEST );

 	~DatabaseContext();

-	Database clone() const { return Database(new DatabaseContext( cluster, clientInfo, clientInfoMonitor, dbId, taskID, clientLocality, enableLocalityLoadBalance, lockAware, apiVersion )); }
+	Database clone() const { return Database(new DatabaseContext( cluster, clientInfo, clientInfoMonitor, taskID, clientLocality, enableLocalityLoadBalance, lockAware, internal, apiVersion )); }

 	std::pair<KeyRange,Reference<LocationInfo>> getCachedLocation( const KeyRef&, bool isBackward = false );
 	bool getCachedLocations( const KeyRangeRef&, vector<std::pair<KeyRange,Reference<LocationInfo>>>&, int limit, bool reverse );
@ -97,8 +96,8 @@ public:

 //private: 
 	explicit DatabaseContext( Reference<Cluster> cluster, Reference<AsyncVar<ClientDBInfo>> clientDBInfo,
-		Future<Void> clientInfoMonitor, Standalone<StringRef> dbId, int taskID, LocalityData const& clientLocality, 
-		bool enableLocalityLoadBalance, bool lockAware, int apiVersion = Database::API_VERSION_LATEST );
+		Future<Void> clientInfoMonitor, TaskPriority taskID, LocalityData const& clientLocality, 
+		bool enableLocalityLoadBalance, bool lockAware, bool internal = true, int apiVersion = Database::API_VERSION_LATEST );

 	explicit DatabaseContext( const Error &err );

@ -133,36 +132,36 @@ public:

 	std::map< UID, StorageServerInfo* > server_interf;

-	Standalone<StringRef> dbId;
+	UID dbId;
+	bool internal; // Only contexts created through the C client and fdbcli are non-internal
+
+	CounterCollection cc;
+
+	Counter transactionReadVersions;
+	Counter transactionLogicalReads;
+	Counter transactionPhysicalReads;
+	Counter transactionCommittedMutations;
+	Counter transactionCommittedMutationBytes;
+	Counter transactionsCommitStarted;
+	Counter transactionsCommitCompleted;
+	Counter transactionsTooOld;
+	Counter transactionsFutureVersions;
+	Counter transactionsNotCommitted;
+	Counter transactionsMaybeCommitted;
+	Counter transactionsResourceConstrained;
+	Counter transactionsProcessBehind;
+	Counter transactionWaitsForFullRecovery;

-	int64_t transactionReadVersions;
-	int64_t transactionLogicalReads;
-	int64_t transactionPhysicalReads;
-	int64_t transactionCommittedMutations;
-	int64_t transactionCommittedMutationBytes;
-	int64_t transactionsCommitStarted;
-	int64_t transactionsCommitCompleted;
-	int64_t transactionsTooOld;
-	int64_t transactionsFutureVersions;
-	int64_t transactionsNotCommitted;
-	int64_t transactionsMaybeCommitted;
-	int64_t transactionsResourceConstrained;
-	int64_t transactionsProcessBehind;
-	int64_t transactionWaitsForFullRecovery;
 	ContinuousSample<double> latencies, readLatencies, commitLatencies, GRVLatencies, mutationsPerCommit, bytesPerCommit;

 	int outstandingWatches;
 	int maxOutstandingWatches;

-	double transactionTimeout;
-	int transactionMaxRetries;
-	double transactionMaxBackoff;
-	int transactionMaxSize;  // Max size in bytes.
 	int snapshotRywEnabled;

 	Future<Void> logger;

-	int taskID;
+	TaskPriority taskID;

 	Int64MetricHandle getValueSubmitted;
 	EventMetricHandle<GetValueComplete> getValueCompleted;
@ -180,6 +179,8 @@ public:
 	HealthMetrics healthMetrics;
 	double healthMetricsLastUpdated;
 	double detailedHealthMetricsLastUpdated;
+
+	UniqueOrderedOptionList<FDBTransactionOptions> transactionDefaults;
 };

 #endif
--- a/fdbclient/FDBOptions.h
+++ b/fdbclient/FDBOptions.h
@ -23,8 +23,11 @@
 #define FDBCLIENT_FDBOPTIONS_H

 #include <string>
+#include <list>
 #include <map>

+#include "flow/Arena.h"
+
 struct FDBOptionInfo {
 	std::string name;
 	std::string comment;
@ -32,9 +35,15 @@ struct FDBOptionInfo {

 	bool hasParameter;
 	bool hidden;
+	bool persistent;

-	FDBOptionInfo(std::string name, std::string comment, std::string parameterComment, bool hasParameter, bool hidden) 
-		: name(name), comment(comment), parameterComment(parameterComment), hasParameter(hasParameter), hidden(hidden) { }
+	// If non-negative, this specifies the code for the transaction option that this option is the default value for.
+	// Options that have a defaultFor will only retain the value from time they were most recently set (i.e. there can be no cumulative effects from calling multiple times).
+	int defaultFor;
+
+	FDBOptionInfo(std::string name, std::string comment, std::string parameterComment, bool hasParameter, bool hidden, bool persistent, int defaultFor) 
+		: name(name), comment(comment), parameterComment(parameterComment), hasParameter(hasParameter), hidden(hidden), persistent(persistent),
+		  defaultFor(defaultFor) { }

 	FDBOptionInfo() { }
 };
@ -45,15 +54,48 @@ private:
 	std::map<typename T::Option, FDBOptionInfo> optionInfo;

 public:
-	typename std::map<typename T::Option, FDBOptionInfo>::iterator begin() { return optionInfo.begin(); }
-	typename std::map<typename T::Option, FDBOptionInfo>::iterator end() { return optionInfo.end(); }
-	typename std::map<typename T::Option, FDBOptionInfo>::iterator find(const typename T::Option& key) { return optionInfo.find(key); }
+	typename std::map<typename T::Option, FDBOptionInfo>::const_iterator begin() const { return optionInfo.begin(); }
+	typename std::map<typename T::Option, FDBOptionInfo>::const_iterator end() const { return optionInfo.end(); }
+	typename std::map<typename T::Option, FDBOptionInfo>::const_iterator find(const typename T::Option& key) const { return optionInfo.find(key); }

-	FDBOptionInfo& operator[] (const typename T::Option& key) { return optionInfo[key]; }
+	void insert(const typename T::Option& key, FDBOptionInfo info) {
+		optionInfo[key] = info;
+	}
+
+	FDBOptionInfo const& getMustExist(const typename T::Option& key) const { 
+		auto itr = optionInfo.find(key);
+		ASSERT(itr != optionInfo.end());
+		return itr->second; 
+	}

 	FDBOptionInfoMap() { T::init(); }
 };

-#define ADD_OPTION_INFO( type, var, name, comment, parameterComment, hasParameter, hidden ) type::optionInfo[var] = FDBOptionInfo(name, comment, parameterComment, hasParameter, hidden);
+// An ordered list of options where each option is represented only once. Subsequent insertions will remove the option from its
+// original location and add it to the end with the new value.
+template<class T>
+class UniqueOrderedOptionList {
+public:
+	typedef std::list<std::pair<typename T::Option, Optional<Standalone<StringRef>>>> OptionList;
+
+private:
+	OptionList options;
+	std::map<typename T::Option, typename OptionList::iterator> optionsIndexMap;
+
+public:
+	void addOption(typename T::Option option, Optional<Standalone<StringRef>> value) {
+		auto itr = optionsIndexMap.find(option);
+		if(itr != optionsIndexMap.end()) {
+			options.erase(itr->second);
+		}
+		options.push_back(std::make_pair(option, value));
+		optionsIndexMap[option] = --options.end();
+	}
+
+	typename OptionList::const_iterator begin() const { return options.cbegin(); }
+	typename OptionList::const_iterator end() const { return options.cend(); }
+};
+
+#define ADD_OPTION_INFO( type, var, name, comment, parameterComment, hasParameter, hidden, persistent, defaultFor ) type::optionInfo.insert(var, FDBOptionInfo(name, comment, parameterComment, hasParameter, hidden, persistent, defaultFor));

 #endif
--- a/fdbclient/FDBTypes.h
+++ b/fdbclient/FDBTypes.h
@ -43,6 +43,7 @@ enum {
 	tagLocalityUpgraded = -4,
 	tagLocalitySatellite = -5,
 	tagLocalityLogRouterMapped = -6,
+	tagLocalityTxs = -7,
 	tagLocalityInvalid = -99
 }; //The TLog and LogRouter require these number to be as compact as possible

--- a/fdbclient/FailureMonitorClient.actor.cpp
+++ b/fdbclient/FailureMonitorClient.actor.cpp
@ -41,7 +41,7 @@ ACTOR Future<Void> failureMonitorClientLoop(
 {
 	state Version version = 0;
 	state Future<FailureMonitoringReply> request = Never();
-	state Future<Void> nextRequest = delay(0, TaskFailureMonitor);
+	state Future<Void> nextRequest = delay(0, TaskPriority::FailureMonitor);
 	state Future<Void> requestTimeout = Never();
 	state double before = now();
 	state double waitfor = 0;
@ -61,7 +61,7 @@ ACTOR Future<Void> failureMonitorClientLoop(
 		loop {
 			choose {
 				when( FailureMonitoringReply reply = wait( request ) ) {
-					g_network->setCurrentTask(TaskDefaultDelay);
+					g_network->setCurrentTask(TaskPriority::DefaultDelay);
 					request = Never();
 					requestTimeout = Never();
 					if (reply.allOthersFailed) {
@ -122,10 +122,10 @@ ACTOR Future<Void> failureMonitorClientLoop(
 					}
 					before = now();
 					waitfor = reply.clientRequestIntervalMS * .001;
-					nextRequest = delayJittered( waitfor, TaskFailureMonitor );
+					nextRequest = delayJittered( waitfor, TaskPriority::FailureMonitor );
 				}
 				when( wait( requestTimeout ) ) {
-					g_network->setCurrentTask(TaskDefaultDelay);
+					g_network->setCurrentTask(TaskPriority::DefaultDelay);
 					requestTimeout = Never();
 					TraceEvent(SevWarn, "FailureMonitoringServerDown").detail("OldServerID",controller.id());
 					monitor->setStatus(controlAddr.address, FailureStatus(true));
@ -136,7 +136,7 @@ ACTOR Future<Void> failureMonitorClientLoop(
 					}
 				}
 				when( wait( nextRequest ) ) {
-					g_network->setCurrentTask(TaskDefaultDelay);
+					g_network->setCurrentTask(TaskPriority::DefaultDelay);
 					nextRequest = Never();

 					double elapsed = now() - before;
@ -152,9 +152,9 @@ ACTOR Future<Void> failureMonitorClientLoop(
 					req.addresses = g_network->getLocalAddresses();
 					if (trackMyStatus)
 						req.senderStatus = FailureStatus(false);
-					request = controller.failureMonitoring.getReply( req, TaskFailureMonitor );
+					request = controller.failureMonitoring.getReply( req, TaskPriority::FailureMonitor );
 					if(!controller.failureMonitoring.getEndpoint().isLocal())
-						requestTimeout = delay( fmState->serverFailedTimeout, TaskFailureMonitor );
+						requestTimeout = delay( fmState->serverFailedTimeout, TaskPriority::FailureMonitor );
 				}
 			}
 		}
--- a/fdbclient/HTTP.actor.cpp
+++ b/fdbclient/HTTP.actor.cpp
@ -93,7 +93,7 @@ namespace HTTP {
 		loop {
 			// Wait for connection to have something to read
 			wait(conn->onReadable());
-			wait( delay( 0, TaskReadSocket ) );
+			wait( delay( 0, TaskPriority::ReadSocket ) );

 			// Read into buffer
 			int originalSize = buf->size();
@ -353,7 +353,7 @@ namespace HTTP {

 			loop {
 				wait(conn->onWritable());
-				wait( delay( 0, TaskWriteSocket ) );
+				wait( delay( 0, TaskPriority::WriteSocket ) );

 				// If we already got a response, before finishing sending the request, then close the connection,
 				// set the Connection header to "close" as a hint to the caller that this connection can't be used
--- a/fdbclient/ManagementAPI.actor.cpp
+++ b/fdbclient/ManagementAPI.actor.cpp
@ -967,7 +967,7 @@ ACTOR Future<CoordinatorsResult::Type> changeQuorum( Database cx, Reference<IQuo
 			vector<Future<Optional<LeaderInfo>>> leaderServers;
 			ClientCoordinators coord( Reference<ClusterConnectionFile>( new ClusterConnectionFile( conn ) ) );
 			for( int i = 0; i < coord.clientLeaderServers.size(); i++ )
-				leaderServers.push_back( retryBrokenPromise( coord.clientLeaderServers[i].getLeader, GetLeaderRequest( coord.clusterKey, UID() ), TaskCoordinationReply ) );
+				leaderServers.push_back( retryBrokenPromise( coord.clientLeaderServers[i].getLeader, GetLeaderRequest( coord.clusterKey, UID() ), TaskPriority::CoordinationReply ) );

 			choose {
 				when( wait( waitForAll( leaderServers ) ) ) {}
@ -1047,7 +1047,7 @@ struct AutoQuorumChange : IQuorumChange {
 		ClientCoordinators coord(ccf);
 		vector<Future<Optional<LeaderInfo>>> leaderServers;
 		for( int i = 0; i < coord.clientLeaderServers.size(); i++ )
-			leaderServers.push_back( retryBrokenPromise( coord.clientLeaderServers[i].getLeader, GetLeaderRequest( coord.clusterKey, UID() ), TaskCoordinationReply ) );
+			leaderServers.push_back( retryBrokenPromise( coord.clientLeaderServers[i].getLeader, GetLeaderRequest( coord.clusterKey, UID() ), TaskPriority::CoordinationReply ) );
 		Optional<vector<Optional<LeaderInfo>>> results = wait( timeout( getAll(leaderServers), CLIENT_KNOBS->IS_ACCEPTABLE_DELAY ) );
 		if (!results.present()) return false;  // Not all responded
 		for(auto& r : results.get())
--- a/fdbclient/MasterProxyInterface.h
+++ b/fdbclient/MasterProxyInterface.h
@ -67,15 +67,47 @@ struct MasterProxyInterface {
 	}

 	void initEndpoints() {
-		getConsistentReadVersion.getEndpoint(TaskProxyGetConsistentReadVersion);
-		getRawCommittedVersion.getEndpoint(TaskProxyGetRawCommittedVersion);
-		commit.getEndpoint(TaskProxyCommitDispatcher);
-		getStorageServerRejoinInfo.getEndpoint(TaskProxyStorageRejoin);
+		getConsistentReadVersion.getEndpoint(TaskPriority::ProxyGetConsistentReadVersion);
+		getRawCommittedVersion.getEndpoint(TaskPriority::ProxyGetRawCommittedVersion);
+		commit.getEndpoint(TaskPriority::ProxyCommitDispatcher);
+		getStorageServerRejoinInfo.getEndpoint(TaskPriority::ProxyStorageRejoin);
 		//getKeyServersLocations.getEndpoint(TaskProxyGetKeyServersLocations); //do not increase the priority of these requests, because clients cans bring down the cluster with too many of these messages.
 	}
 };

-struct CommitID {
+// ClientDBInfo is all the information needed by a database client to access the database
+// It is returned (and kept up to date) by the OpenDatabaseRequest interface of ClusterInterface
+struct ClientDBInfo {
+	constexpr static FileIdentifier file_identifier = 5355080;
+	UID id;  // Changes each time anything else changes
+	vector< MasterProxyInterface > proxies;
+	double clientTxnInfoSampleRate;
+	int64_t clientTxnInfoSizeLimit;
+	ClientDBInfo() : clientTxnInfoSampleRate(std::numeric_limits<double>::infinity()), clientTxnInfoSizeLimit(-1) {}
+
+	bool operator == (ClientDBInfo const& r) const { return id == r.id; }
+	bool operator != (ClientDBInfo const& r) const { return id != r.id; }
+
+	template <class Archive>
+	void serialize(Archive& ar) {
+		if constexpr (!is_fb_function<Archive>) {
+			ASSERT(ar.protocolVersion().isValid());
+		}
+		serializer(ar, proxies, id, clientTxnInfoSampleRate, clientTxnInfoSizeLimit);
+	}
+};
+
+struct ProxyForwardReply {
+	Optional<ClientDBInfo> newClientInfo;
+	ProxyForwardReply() {}
+
+	template <class Ar>
+	void serialize(Ar &ar) {
+		serializer(ar, newClientInfo);
+	}
+};
+
+struct CommitID : public ProxyForwardReply {
 	constexpr static FileIdentifier file_identifier = 14254927;
 	Version version; 			// returns invalidVersion if transaction conflicts
 	uint16_t txnBatchId;
@ -83,7 +115,7 @@ struct CommitID {

 	template <class Ar>
 	void serialize(Ar& ar) {
-		serializer(ar, version, txnBatchId, metadataVersion);
+		serializer(ar, *(ProxyForwardReply*)this, version, txnBatchId, metadataVersion);
 	}

 	CommitID() : version(invalidVersion), txnBatchId(0) {}
@ -127,7 +159,7 @@ static inline int getBytes( CommitTransactionRequest const& r ) {
 	return total;
 }

-struct GetReadVersionReply {
+struct GetReadVersionReply : public ProxyForwardReply {
 	constexpr static FileIdentifier file_identifier = 15709388;
 	Version version;
 	bool locked;
@ -135,7 +167,7 @@ struct GetReadVersionReply {

 	template <class Ar>
 	void serialize(Ar& ar) {
-		serializer(ar, version, locked, metadataVersion);
+		serializer(ar, *(ProxyForwardReply*)this, version, locked, metadataVersion);
 	}
 };

@ -169,14 +201,14 @@ struct GetReadVersionRequest : TimedRequest {
 	}
 };

-struct GetKeyServerLocationsReply {
+struct GetKeyServerLocationsReply : public ProxyForwardReply {
 	constexpr static FileIdentifier file_identifier = 10636023;
 	Arena arena;
 	std::vector<std::pair<KeyRangeRef, vector<StorageServerInterface>>> results;

 	template <class Ar>
 	void serialize(Ar& ar) {
-		serializer(ar, results, arena);
+		serializer(ar, *(ProxyForwardReply*)this, results, arena);
 	}
 };

--- a/fdbclient/MonitorLeader.actor.cpp
+++ b/fdbclient/MonitorLeader.actor.cpp
@ -371,7 +371,7 @@ ClientLeaderRegInterface::ClientLeaderRegInterface( NetworkAddress remote )
 }

 ClientLeaderRegInterface::ClientLeaderRegInterface( INetwork* local ) {
-	getLeader.makeWellKnownEndpoint( WLTOKEN_CLIENTLEADERREG_GETLEADER, TaskCoordination );
+	getLeader.makeWellKnownEndpoint( WLTOKEN_CLIENTLEADERREG_GETLEADER, TaskPriority::Coordination );
 }

 // Nominee is the worker among all workers that are considered as leader by a coordinator
@ -380,7 +380,7 @@ ClientLeaderRegInterface::ClientLeaderRegInterface( INetwork* local ) {
 ACTOR Future<Void> monitorNominee( Key key, ClientLeaderRegInterface coord, AsyncTrigger* nomineeChange, Optional<LeaderInfo> *info, int generation, Reference<AsyncVar<int>> connectedCoordinatorsNum ) {
 	state bool hasCounted = false;
 	loop {
-		state Optional<LeaderInfo> li = wait( retryBrokenPromise( coord.getLeader, GetLeaderRequest( key, info->present() ? info->get().changeID : UID() ), TaskCoordinationReply ) );
+		state Optional<LeaderInfo> li = wait( retryBrokenPromise( coord.getLeader, GetLeaderRequest( key, info->present() ? info->get().changeID : UID() ), TaskPriority::CoordinationReply ) );
 		if (li.present() && !hasCounted && connectedCoordinatorsNum.isValid()) {
 			connectedCoordinatorsNum->set(connectedCoordinatorsNum->get() + 1);
 			hasCounted = true;
--- a/fdbclient/MultiVersionTransaction.actor.cpp
+++ b/fdbclient/MultiVersionTransaction.actor.cpp
@ -408,17 +408,39 @@ void DLApi::addNetworkThreadCompletionHook(void (*hook)(void*), void *hookParame
 }

 // MultiVersionTransaction
-MultiVersionTransaction::MultiVersionTransaction(Reference<MultiVersionDatabase> db) : db(db) {
+MultiVersionTransaction::MultiVersionTransaction(Reference<MultiVersionDatabase> db, UniqueOrderedOptionList<FDBTransactionOptions> defaultOptions) : db(db) {
+	setDefaultOptions(defaultOptions);
 	updateTransaction();
 }

-// SOMEDAY: This function is unsafe if it's possible to set Database options that affect subsequently created transactions. There are currently no such options.
+void MultiVersionTransaction::setDefaultOptions(UniqueOrderedOptionList<FDBTransactionOptions> options) {
+	MutexHolder holder(db->dbState->optionLock);
+	std::copy(options.begin(), options.end(), std::back_inserter(persistentOptions));
+}
+
 void MultiVersionTransaction::updateTransaction() {
 	auto currentDb = db->dbState->dbVar->get();

 	TransactionInfo newTr;
 	if(currentDb.value) {
 		newTr.transaction = currentDb.value->createTransaction();
+
+		Optional<StringRef> timeout;
+		for (auto option : persistentOptions) {
+			if(option.first == FDBTransactionOptions::TIMEOUT) {
+				timeout = option.second.castTo<StringRef>();
+			}
+			else {
+				newTr.transaction->setOption(option.first, option.second.castTo<StringRef>());
+			}
+		}
+	
+		// Setting a timeout can immediately cause a transaction to fail. The only timeout 
+		// that matters is the one most recently set, so we ignore any earlier set timeouts
+		// that might inadvertently fail the transaction.
+		if(timeout.present()) {
+			newTr.transaction->setOption(FDBTransactionOptions::TIMEOUT, timeout);
+		}
 	}

 	newTr.onChange = currentDb.onChange;
@ -574,6 +596,15 @@ Version MultiVersionTransaction::getCommittedVersion() {
 }

 void MultiVersionTransaction::setOption(FDBTransactionOptions::Option option, Optional<StringRef> value) {
+	auto itr = FDBTransactionOptions::optionInfo.find(option);
+	if(itr == FDBTransactionOptions::optionInfo.end()) {
+		TraceEvent("UnknownTransactionOption").detail("Option", option);
+		throw invalid_option();
+	}
+	
+	if(MultiVersionApi::apiVersionAtLeast(610) && itr->second.persistent) {
+		persistentOptions.emplace_back(option, value.castTo<Standalone<StringRef>>());
+	}
 	auto tr = getTransaction();
 	if(tr.transaction) {
 		tr.transaction->setOption(option, value);
@ -606,6 +637,8 @@ ThreadFuture<Void> MultiVersionTransaction::onError(Error const& e) {
 }

 void MultiVersionTransaction::reset() {
+	persistentOptions.clear();
+	setDefaultOptions(db->dbState->transactionDefaultOptions);
 	updateTransaction();
 }

@ -643,27 +676,30 @@ Reference<IDatabase> MultiVersionDatabase::debugCreateFromExistingDatabase(Refer
 }

 Reference<ITransaction> MultiVersionDatabase::createTransaction() {
-	return Reference<ITransaction>(new MultiVersionTransaction(Reference<MultiVersionDatabase>::addRef(this)));
+	return Reference<ITransaction>(new MultiVersionTransaction(Reference<MultiVersionDatabase>::addRef(this), dbState->transactionDefaultOptions));
 }

 void MultiVersionDatabase::setOption(FDBDatabaseOptions::Option option, Optional<StringRef> value) {
 	MutexHolder holder(dbState->optionLock);

-
 	auto itr = FDBDatabaseOptions::optionInfo.find(option);
-	if(itr != FDBDatabaseOptions::optionInfo.end()) {
-		TraceEvent("SetDatabaseOption").detail("Option", itr->second.name);
-	}
-	else {
+	if(itr == FDBDatabaseOptions::optionInfo.end()) {
 		TraceEvent("UnknownDatabaseOption").detail("Option", option);
 		throw invalid_option();
 	}

-	if(dbState->db) {
-		dbState->db->setOption(option, value);
+	int defaultFor = itr->second.defaultFor;
+	if (defaultFor >= 0) {
+		ASSERT(FDBTransactionOptions::optionInfo.find((FDBTransactionOptions::Option)defaultFor) !=
+		       FDBTransactionOptions::optionInfo.end());
+		dbState->transactionDefaultOptions.addOption((FDBTransactionOptions::Option)defaultFor, value.castTo<Standalone<StringRef>>());
 	}

 	dbState->options.push_back(std::make_pair(option, value.castTo<Standalone<StringRef>>()));
+
+	if(dbState->db) {
+		dbState->db->setOption(option, value);
+	}
 }

 void MultiVersionDatabase::Connector::connect() {
@ -824,6 +860,11 @@ void MultiVersionDatabase::DatabaseState::cancelConnections() {

 // MultiVersionApi

+bool MultiVersionApi::apiVersionAtLeast(int minVersion) {
+	ASSERT(MultiVersionApi::api->apiVersion != 0);
+	return MultiVersionApi::api->apiVersion >= minVersion || MultiVersionApi::api->apiVersion < 0;
+}
+
 // runOnFailedClients should be used cautiously. Some failed clients may not have successfully loaded all symbols.
 void MultiVersionApi::runOnExternalClients(std::function<void(Reference<ClientInfo>)> func, bool runOnFailedClients) {
 	bool newFailure = false;
--- a/fdbclient/MultiVersionTransaction.h
+++ b/fdbclient/MultiVersionTransaction.h
@ -210,7 +210,7 @@ class MultiVersionDatabase;

 class MultiVersionTransaction : public ITransaction, ThreadSafeReferenceCounted<MultiVersionTransaction> {
 public:
-	MultiVersionTransaction(Reference<MultiVersionDatabase> db);
+	MultiVersionTransaction(Reference<MultiVersionDatabase> db, UniqueOrderedOptionList<FDBTransactionOptions> defaultOptions);

 	void cancel();
 	void setVersion(Version v);
@ -261,6 +261,9 @@ private:

 	TransactionInfo getTransaction();
 	void updateTransaction();
+	void setDefaultOptions(UniqueOrderedOptionList<FDBTransactionOptions> options);
+
+	std::vector<std::pair<FDBTransactionOptions::Option, Optional<Standalone<StringRef>>>> persistentOptions;
 };

 struct ClientInfo : ThreadSafeReferenceCounted<ClientInfo> {
@ -341,6 +344,7 @@ private:
 		std::vector<Reference<Connector>> connectionAttempts;

 		std::vector<std::pair<FDBDatabaseOptions::Option, Optional<Standalone<StringRef>>>> options;
+		UniqueOrderedOptionList<FDBTransactionOptions> transactionDefaultOptions;
 		Mutex optionLock;
 	};

@ -370,6 +374,8 @@ public:
 	bool callbackOnMainThread;
 	bool localClientDisabled;

+	static bool apiVersionAtLeast(int minVersion);
+
 private:
 	MultiVersionApi();

--- a/fdbclient/NativeAPI.actor.cpp
+++ b/fdbclient/NativeAPI.actor.cpp
@ -208,24 +208,18 @@ template <> void addref( DatabaseContext* ptr ) { ptr->addref(); }
 template <> void delref( DatabaseContext* ptr ) { ptr->delref(); }

 ACTOR Future<Void> databaseLogger( DatabaseContext *cx ) {
+	state double lastLogged = 0;
 	loop {
-		wait( delay( CLIENT_KNOBS->SYSTEM_MONITOR_INTERVAL, cx->taskID ) );
-		TraceEvent("TransactionMetrics")
+		wait(delay(CLIENT_KNOBS->SYSTEM_MONITOR_INTERVAL, cx->taskID));
+		TraceEvent ev("TransactionMetrics", cx->dbId);
+
+		ev.detail("Elapsed", (lastLogged == 0) ? 0 : now() - lastLogged)
 			.detail("Cluster", cx->cluster && cx->getConnectionFile() ? cx->getConnectionFile()->getConnectionString().clusterKeyName().toString() : "")
-			.detail("ReadVersions", cx->transactionReadVersions)
-			.detail("LogicalUncachedReads", cx->transactionLogicalReads)
-			.detail("PhysicalReadRequests", cx->transactionPhysicalReads)
-			.detail("CommittedMutations", cx->transactionCommittedMutations)
-			.detail("CommittedMutationBytes", cx->transactionCommittedMutationBytes)
-			.detail("CommitStarted", cx->transactionsCommitStarted)
-			.detail("CommitCompleted", cx->transactionsCommitCompleted)
-			.detail("TooOld", cx->transactionsTooOld)
-			.detail("FutureVersions", cx->transactionsFutureVersions)
-			.detail("NotCommitted", cx->transactionsNotCommitted)
-			.detail("MaybeCommitted", cx->transactionsMaybeCommitted)
-			.detail("ResourceConstrained", cx->transactionsResourceConstrained)
-			.detail("ProcessBehind", cx->transactionsProcessBehind)
-			.detail("MeanLatency", cx->latencies.mean())
+			.detail("Internal", cx->internal);
+
+		cx->cc.logToTraceEvent(ev);
+
+		ev.detail("MeanLatency", cx->latencies.mean())
 			.detail("MedianLatency", cx->latencies.median())
 			.detail("Latency90", cx->latencies.percentile(0.90))
 			.detail("Latency98", cx->latencies.percentile(0.98))
@ -245,12 +239,15 @@ ACTOR Future<Void> databaseLogger( DatabaseContext *cx ) {
 			.detail("MeanBytesPerCommit", cx->bytesPerCommit.mean())
 			.detail("MedianBytesPerCommit", cx->bytesPerCommit.median())
 			.detail("MaxBytesPerCommit", cx->bytesPerCommit.max());
+
 		cx->latencies.clear();
 		cx->readLatencies.clear();
 		cx->GRVLatencies.clear();
 		cx->commitLatencies.clear();
 		cx->mutationsPerCommit.clear();
 		cx->bytesPerCommit.clear();
+
+		lastLogged = now();
 	}
 }

@ -508,23 +505,24 @@ ACTOR static Future<HealthMetrics> getHealthMetricsActor(DatabaseContext *cx, bo
 Future<HealthMetrics> DatabaseContext::getHealthMetrics(bool detailed = false) {
 	return getHealthMetricsActor(this, detailed);
 }
-
 DatabaseContext::DatabaseContext(
-	Reference<Cluster> cluster, Reference<AsyncVar<ClientDBInfo>> clientInfo, Future<Void> clientInfoMonitor, Standalone<StringRef> dbId, 
-	int taskID, LocalityData const& clientLocality, bool enableLocalityLoadBalance, bool lockAware, int apiVersion ) 
-	: cluster(cluster), clientInfo(clientInfo), clientInfoMonitor(clientInfoMonitor), dbId(dbId), taskID(taskID), clientLocality(clientLocality), enableLocalityLoadBalance(enableLocalityLoadBalance),
-	lockAware(lockAware), apiVersion(apiVersion), provisional(false),
-	transactionReadVersions(0), transactionLogicalReads(0), transactionPhysicalReads(0), transactionCommittedMutations(0), transactionCommittedMutationBytes(0), 
-	transactionsCommitStarted(0), transactionsCommitCompleted(0), transactionsTooOld(0), transactionsFutureVersions(0), transactionsNotCommitted(0), 
-	transactionsMaybeCommitted(0), transactionsResourceConstrained(0), transactionsProcessBehind(0), outstandingWatches(0), transactionTimeout(0.0), transactionMaxRetries(-1),
+	Reference<Cluster> cluster, Reference<AsyncVar<ClientDBInfo>> clientInfo, Future<Void> clientInfoMonitor,
+	TaskPriority taskID, LocalityData const& clientLocality, bool enableLocalityLoadBalance, bool lockAware, bool internal, int apiVersion ) 
+	: cluster(cluster), clientInfo(clientInfo), clientInfoMonitor(clientInfoMonitor), taskID(taskID), clientLocality(clientLocality), enableLocalityLoadBalance(enableLocalityLoadBalance),
+	lockAware(lockAware), apiVersion(apiVersion), provisional(false), cc("TransactionMetrics"),
+	transactionReadVersions("ReadVersions", cc), transactionLogicalReads("LogicalUncachedReads", cc), transactionPhysicalReads("PhysicalReadRequests", cc), 
+	transactionCommittedMutations("CommittedMutations", cc), transactionCommittedMutationBytes("CommittedMutationBytes", cc), transactionsCommitStarted("CommitStarted", cc), 
+	transactionsCommitCompleted("CommitCompleted", cc), transactionsTooOld("TooOld", cc), transactionsFutureVersions("FutureVersions", cc), 
+	transactionsNotCommitted("NotCommitted", cc), transactionsMaybeCommitted("MaybeCommitted", cc), transactionsResourceConstrained("ResourceConstrained", cc), 
+	transactionsProcessBehind("ProcessBehind", cc), transactionWaitsForFullRecovery("WaitsForFullRecovery", cc), outstandingWatches(0),
 	latencies(1000), readLatencies(1000), commitLatencies(1000), GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000), mvCacheInsertLocation(0),
-	healthMetricsLastUpdated(0), detailedHealthMetricsLastUpdated(0)
+	healthMetricsLastUpdated(0), detailedHealthMetricsLastUpdated(0), internal(internal)
 {
+	dbId = deterministicRandom()->randomUniqueID();
+
 	metadataVersionCache.resize(CLIENT_KNOBS->METADATA_VERSION_CACHE_SIZE);
 	maxOutstandingWatches = CLIENT_KNOBS->DEFAULT_MAX_OUTSTANDING_WATCHES;

-	transactionMaxBackoff = CLIENT_KNOBS->FAILURE_MAX_DELAY;
-	transactionMaxSize = CLIENT_KNOBS->TRANSACTION_SIZE_LIMIT;
 	snapshotRywEnabled = apiVersionAtLeast(300) ? 1 : 0; 

 	logger = databaseLogger( this );
@ -539,7 +537,14 @@ DatabaseContext::DatabaseContext(
 	clientStatusUpdater.actor = clientStatusUpdateActor(this);
 }

-DatabaseContext::DatabaseContext( const Error &err ) : deferredError(err), latencies(1000), readLatencies(1000), commitLatencies(1000), GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000) {}
+DatabaseContext::DatabaseContext( const Error &err ) : deferredError(err), cc("TransactionMetrics"),
+	transactionReadVersions("ReadVersions", cc), transactionLogicalReads("LogicalUncachedReads", cc), transactionPhysicalReads("PhysicalReadRequests", cc), 
+	transactionCommittedMutations("CommittedMutations", cc), transactionCommittedMutationBytes("CommittedMutationBytes", cc), transactionsCommitStarted("CommitStarted", cc), 
+	transactionsCommitCompleted("CommitCompleted", cc), transactionsTooOld("TooOld", cc), transactionsFutureVersions("FutureVersions", cc), 
+	transactionsNotCommitted("NotCommitted", cc), transactionsMaybeCommitted("MaybeCommitted", cc), transactionsResourceConstrained("ResourceConstrained", cc), 
+	transactionsProcessBehind("ProcessBehind", cc), transactionWaitsForFullRecovery("WaitsForFullRecovery", cc), latencies(1000), readLatencies(1000), commitLatencies(1000), 
+	GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000), 
+	internal(false) {}

 ACTOR static Future<Void> monitorClientInfo( Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface, Reference<ClusterConnectionFile> ccf, Reference<AsyncVar<ClientDBInfo>> outInfo, Reference<AsyncVar<int>> connectedCoordinatorsNumDelayed ) {
 	try {
@ -573,38 +578,44 @@ ACTOR static Future<Void> monitorClientInfo( Reference<AsyncVar<Optional<Cluster

 			choose {
 				when( ClientDBInfo ni = wait( clusterInterface->get().present() ? brokenPromiseToNever( clusterInterface->get().get().openDatabase.getReply( req ) ) : Never() ) ) {
-					TraceEvent("ClientInfoChange").detail("ChangeID", ni.id);
 					outInfo->set(ni);

-					if (ni.proxies.empty()) {
-						TraceEvent("ClientInfo_NoProxiesReturned").detail("ChangeID", ni.id);
-						continue;
-					} else if (!FlowTransport::transport().isClient()) {
-						continue;
-					}
-
-					vector<Future<Void>> onProxyFailureVec;
-					bool skipWaitForProxyFail = false;
-					for (const auto& proxy : ni.proxies) {
-						if (proxy.provisional) {
-							skipWaitForProxyFail = true;
+					loop {
+						TraceEvent("ClientInfoChange").detail("ChangeID", outInfo->get().id);
+						if (outInfo->get().proxies.empty()) {
+							TraceEvent("ClientInfo_NoProxiesReturned").detail("ChangeID", outInfo->get().id);
+							break;
+						} else if (!FlowTransport::transport().isClient()) {
 							break;
 						}

-						onProxyFailureVec.push_back(
-						    IFailureMonitor::failureMonitor().onDisconnectOrFailure(
-						        proxy.getConsistentReadVersion.getEndpoint()) ||
-						    IFailureMonitor::failureMonitor().onDisconnectOrFailure(proxy.commit.getEndpoint()) ||
-						    IFailureMonitor::failureMonitor().onDisconnectOrFailure(
-						        proxy.getKeyServersLocations.getEndpoint()) ||
-						    IFailureMonitor::failureMonitor().onDisconnectOrFailure(
-						        proxy.getStorageServerRejoinInfo.getEndpoint()));
-					}
-					if (skipWaitForProxyFail) continue;
+						state vector<Future<Void>> onProxyFailureVec;
+						bool skipWaitForProxyFail = false;
+						for (const auto& proxy : outInfo->get().proxies) {
+							if (proxy.provisional) {
+								skipWaitForProxyFail = true;
+								break;
+							}

-					leaderMon = Void();
-					wait(waitForAny(onProxyFailureVec));
-					leaderMon = ccf ? monitorLeader(ccf, clusterInterface) : Void();
+							onProxyFailureVec.push_back(
+						    IFailureMonitor::failureMonitor().onStateEqual(
+						        proxy.getConsistentReadVersion.getEndpoint(), FailureStatus()) ||
+						    IFailureMonitor::failureMonitor().onStateEqual(proxy.commit.getEndpoint(), FailureStatus()) ||
+						    IFailureMonitor::failureMonitor().onStateEqual(
+						        proxy.getKeyServersLocations.getEndpoint(), FailureStatus()) ||
+						    IFailureMonitor::failureMonitor().onStateEqual(
+						        proxy.getStorageServerRejoinInfo.getEndpoint(), FailureStatus()));
+						}
+						if (skipWaitForProxyFail) break;
+
+						leaderMon = Void();
+						state Future<Void> anyFailures = waitForAny(onProxyFailureVec);
+						wait(anyFailures || outInfo->onChange());
+						if(anyFailures.isReady()) {
+							leaderMon = ccf ? monitorLeader(ccf, clusterInterface) : Void();
+							break;
+						}
+					}
 				}
 				when( wait( clusterInterface->onChange() ) ) {
 					if(clusterInterface->get().present())
@ -632,11 +643,11 @@ Database DatabaseContext::create(Reference<AsyncVar<Optional<ClusterInterface>>>
 	Reference<AsyncVar<ClientDBInfo>> clientInfo(new AsyncVar<ClientDBInfo>());
 	Future<Void> clientInfoMonitor = delayedAsyncVar(connectedCoordinatorsNum, connectedCoordinatorsNumDelayed, CLIENT_KNOBS->CHECK_CONNECTED_COORDINATOR_NUM_DELAY) || monitorClientInfo(clusterInterface, connFile, clientInfo, connectedCoordinatorsNumDelayed);

-	return Database(new DatabaseContext(cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false));
+	return Database(new DatabaseContext(cluster, clientInfo, clientInfoMonitor, TaskPriority::DefaultEndpoint, clientLocality, true, false, true));
 }

-Database DatabaseContext::create(Reference<AsyncVar<ClientDBInfo>> clientInfo, Future<Void> clientInfoMonitor, LocalityData clientLocality, bool enableLocalityLoadBalance, int taskID, bool lockAware, int apiVersion) {
-	return Database( new DatabaseContext( Reference<Cluster>(nullptr), clientInfo, clientInfoMonitor, LiteralStringRef(""), taskID, clientLocality, enableLocalityLoadBalance, lockAware, apiVersion ) );
+Database DatabaseContext::create(Reference<AsyncVar<ClientDBInfo>> clientInfo, Future<Void> clientInfoMonitor, LocalityData clientLocality, bool enableLocalityLoadBalance, TaskPriority taskID, bool lockAware, int apiVersion) {
+	return Database( new DatabaseContext( Reference<Cluster>(nullptr), clientInfo, clientInfoMonitor, taskID, clientLocality, enableLocalityLoadBalance, lockAware, true, apiVersion ) );
 }

 DatabaseContext::~DatabaseContext() {
@ -745,52 +756,45 @@ uint64_t extractHexOption( StringRef value ) {
 }

 void DatabaseContext::setOption( FDBDatabaseOptions::Option option, Optional<StringRef> value) {
-	switch(option) {
-		case FDBDatabaseOptions::LOCATION_CACHE_SIZE:
-			locationCacheSize = (int)extractIntOption(value, 0, std::numeric_limits<int>::max());
-			break;
-		case FDBDatabaseOptions::MACHINE_ID:
-			clientLocality = LocalityData( clientLocality.processId(), value.present() ? Standalone<StringRef>(value.get()) : Optional<Standalone<StringRef>>(), clientLocality.machineId(), clientLocality.dcId() );
-			if( clientInfo->get().proxies.size() )
-				masterProxies = Reference<ProxyInfo>( new ProxyInfo( clientInfo->get().proxies, clientLocality ) );
-			server_interf.clear();
-			locationCache.insert( allKeys, Reference<LocationInfo>() );
-			break;
-		case FDBDatabaseOptions::MAX_WATCHES:
-			maxOutstandingWatches = (int)extractIntOption(value, 0, CLIENT_KNOBS->ABSOLUTE_MAX_WATCHES);
-			break;
-		case FDBDatabaseOptions::DATACENTER_ID:
-			clientLocality = LocalityData(clientLocality.processId(), clientLocality.zoneId(), clientLocality.machineId(), value.present() ? Standalone<StringRef>(value.get()) : Optional<Standalone<StringRef>>());
-			if( clientInfo->get().proxies.size() )
-				masterProxies = Reference<ProxyInfo>( new ProxyInfo( clientInfo->get().proxies, clientLocality ));
-			server_interf.clear();
-			locationCache.insert( allKeys, Reference<LocationInfo>() );
-			break;
-		case FDBDatabaseOptions::TRANSACTION_TIMEOUT:
-			if( !apiVersionAtLeast(610) ) {
-				throw invalid_option();
-			}
-			transactionTimeout = extractIntOption(value, 0, std::numeric_limits<int>::max())/1000.0;
-			break;
-		case FDBDatabaseOptions::TRANSACTION_RETRY_LIMIT:
-			transactionMaxRetries = (int)extractIntOption(value, -1, std::numeric_limits<int>::max());
-			break;
-		case FDBDatabaseOptions::TRANSACTION_MAX_RETRY_DELAY:
-			validateOptionValue(value, true);
-			transactionMaxBackoff = extractIntOption(value, 0, std::numeric_limits<int32_t>::max()) / 1000.0;
-			break;
-		case FDBDatabaseOptions::TRANSACTION_SIZE_LIMIT:
-			validateOptionValue(value, true);
-			transactionMaxSize = extractIntOption(value, 32, CLIENT_KNOBS->TRANSACTION_SIZE_LIMIT);
-			break;
-		case FDBDatabaseOptions::SNAPSHOT_RYW_ENABLE:
-			validateOptionValue(value, false);
-			snapshotRywEnabled++;
-			break;
-		case FDBDatabaseOptions::SNAPSHOT_RYW_DISABLE:
-			validateOptionValue(value, false);
-			snapshotRywEnabled--;
-			break;
+	int defaultFor = FDBDatabaseOptions::optionInfo.getMustExist(option).defaultFor;
+	if (defaultFor >= 0) {
+		ASSERT(FDBTransactionOptions::optionInfo.find((FDBTransactionOptions::Option)defaultFor) !=
+		       FDBTransactionOptions::optionInfo.end());
+		transactionDefaults.addOption((FDBTransactionOptions::Option)option, value.castTo<Standalone<StringRef>>());
+	}
+	else {
+		switch(option) {
+			case FDBDatabaseOptions::LOCATION_CACHE_SIZE:
+				locationCacheSize = (int)extractIntOption(value, 0, std::numeric_limits<int>::max());
+				break;
+			case FDBDatabaseOptions::MACHINE_ID:
+				clientLocality = LocalityData( clientLocality.processId(), value.present() ? Standalone<StringRef>(value.get()) : Optional<Standalone<StringRef>>(), clientLocality.machineId(), clientLocality.dcId() );
+				if( clientInfo->get().proxies.size() )
+					masterProxies = Reference<ProxyInfo>( new ProxyInfo( clientInfo->get().proxies, clientLocality ) );
+				server_interf.clear();
+				locationCache.insert( allKeys, Reference<LocationInfo>() );
+				break;
+			case FDBDatabaseOptions::MAX_WATCHES:
+				maxOutstandingWatches = (int)extractIntOption(value, 0, CLIENT_KNOBS->ABSOLUTE_MAX_WATCHES);
+				break;
+			case FDBDatabaseOptions::DATACENTER_ID:
+				clientLocality = LocalityData(clientLocality.processId(), clientLocality.zoneId(), clientLocality.machineId(), value.present() ? Standalone<StringRef>(value.get()) : Optional<Standalone<StringRef>>());
+				if( clientInfo->get().proxies.size() )
+					masterProxies = Reference<ProxyInfo>( new ProxyInfo( clientInfo->get().proxies, clientLocality ));
+				server_interf.clear();
+				locationCache.insert( allKeys, Reference<LocationInfo>() );
+				break;
+			case FDBDatabaseOptions::SNAPSHOT_RYW_ENABLE:
+				validateOptionValue(value, false);
+				snapshotRywEnabled++;
+				break;
+			case FDBDatabaseOptions::SNAPSHOT_RYW_DISABLE:
+				validateOptionValue(value, false);
+				snapshotRywEnabled--;
+				break;
+			default:
+				break;
+		}
 	}
 }

@ -816,7 +820,7 @@ Reference<ClusterConnectionFile> DatabaseContext::getConnectionFile() {
 	return cluster->getConnectionFile();
 }

-Database Database::createDatabase( Reference<ClusterConnectionFile> connFile, int apiVersion, LocalityData const& clientLocality, DatabaseContext *preallocatedDb ) {
+Database Database::createDatabase( Reference<ClusterConnectionFile> connFile, int apiVersion, bool internal, LocalityData const& clientLocality, DatabaseContext *preallocatedDb ) {
 	Reference<AsyncVar<int>> connectedCoordinatorsNum(new AsyncVar<int>(0)); // Number of connected coordinators for the client
 	Reference<AsyncVar<int>> connectedCoordinatorsNumDelayed(new AsyncVar<int>(0));
 	Reference<Cluster> cluster(new Cluster(connFile, connectedCoordinatorsNum, apiVersion));
@ -825,18 +829,23 @@ Database Database::createDatabase( Reference<ClusterConnectionFile> connFile, in

 	DatabaseContext *db;
 	if(preallocatedDb) {
-		db = new (preallocatedDb) DatabaseContext(cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false, apiVersion);
+		db = new (preallocatedDb) DatabaseContext(cluster, clientInfo, clientInfoMonitor, TaskPriority::DefaultEndpoint, clientLocality, true, false, internal, apiVersion);
 	}
 	else {
-		db = new DatabaseContext(cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false, apiVersion);
+		db = new DatabaseContext(cluster, clientInfo, clientInfoMonitor, TaskPriority::DefaultEndpoint, clientLocality, true, false, internal, apiVersion);
 	}

 	return Database(db);
 }

-Database Database::createDatabase( std::string connFileName, int apiVersion, LocalityData const& clientLocality ) {
+Database Database::createDatabase( std::string connFileName, int apiVersion, bool internal, LocalityData const& clientLocality ) {
 	Reference<ClusterConnectionFile> rccf = Reference<ClusterConnectionFile>(new ClusterConnectionFile(ClusterConnectionFile::lookupClusterFileName(connFileName).first));
-	return Database::createDatabase(rccf, apiVersion, clientLocality);
+	return Database::createDatabase(rccf, apiVersion, internal, clientLocality);
+}
+
+const UniqueOrderedOptionList<FDBTransactionOptions>& Database::getTransactionDefaults() const {
+	ASSERT(db);
+	return db->transactionDefaults;
 }

 extern IPAddress determinePublicIPAutomatically(ClusterConnectionString const& ccs);
@ -884,7 +893,7 @@ void Cluster::init( Reference<ClusterConnectionFile> connFile, bool startClientI
 			initializeSystemMonitorMachineState(SystemMonitorMachineState(IPAddress(publicIP)));

 			systemMonitor();
-			uncancellable( recurring( &systemMonitor, CLIENT_KNOBS->SYSTEM_MONITOR_INTERVAL, TaskFlushTrace ) );
+			uncancellable( recurring( &systemMonitor, CLIENT_KNOBS->SYSTEM_MONITOR_INTERVAL, TaskPriority::FlushTrace ) );
 		}

 		failMon = failureMonitorClient( clusterInterface, false );
@ -1240,7 +1249,11 @@ ACTOR Future< pair<KeyRange,Reference<LocationInfo>> > getKeyLocation_internal(
 	loop {
 		choose {
 			when ( wait( cx->onMasterProxiesChanged() ) ) {}
-			when ( GetKeyServerLocationsReply rep = wait( loadBalance( cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::getKeyServersLocations, GetKeyServerLocationsRequest(key, Optional<KeyRef>(), 100, isBackward, key.arena()), TaskDefaultPromiseEndpoint ) ) ) {
+			when ( GetKeyServerLocationsReply rep = wait( loadBalance( cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::getKeyServersLocations, GetKeyServerLocationsRequest(key, Optional<KeyRef>(), 100, isBackward, key.arena()), TaskPriority::DefaultPromiseEndpoint ) ) ) {
+				if(rep.newClientInfo.present()) {
+					cx->clientInfo->set(rep.newClientInfo.get());
+					continue;
+				}
 				if( info.debugID.present() )
 					g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getKeyLocation.After");
 				ASSERT( rep.results.size() == 1 );
@ -1277,7 +1290,12 @@ ACTOR Future< vector< pair<KeyRange,Reference<LocationInfo>> > > getKeyRangeLoca
 	loop {
 		choose {
 			when ( wait( cx->onMasterProxiesChanged() ) ) {}
-			when ( GetKeyServerLocationsReply _rep = wait( loadBalance( cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::getKeyServersLocations, GetKeyServerLocationsRequest(keys.begin, keys.end, limit, reverse, keys.arena()), TaskDefaultPromiseEndpoint ) ) ) {
+			when ( GetKeyServerLocationsReply _rep = wait( loadBalance( cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::getKeyServersLocations, GetKeyServerLocationsRequest(keys.begin, keys.end, limit, reverse, keys.arena()), TaskPriority::DefaultPromiseEndpoint ) ) ) {
+				if(_rep.newClientInfo.present()) {
+					cx->clientInfo->set(_rep.newClientInfo.get());
+					continue;
+				}
+				
 				state GetKeyServerLocationsReply rep = _rep;
 				if( info.debugID.present() )
 					g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getKeyLocations.After");
@ -1398,7 +1416,7 @@ ACTOR Future<Optional<Value>> getValue( Future<Version> version, Key key, Databa
 			}
 			state GetValueReply reply = wait(
 			    loadBalance(ssi.second, &StorageServerInterface::getValue, GetValueRequest(key, ver, getValueID),
-			                TaskDefaultPromiseEndpoint, false, cx->enableLocalityLoadBalance ? &cx->queueModel : NULL));
+			                TaskPriority::DefaultPromiseEndpoint, false, cx->enableLocalityLoadBalance ? &cx->queueModel : NULL));
 			double latency = now() - startTimeD;
 			cx->readLatencies.addSample(latency);
 			if (trLogInfo) {
@ -1461,7 +1479,7 @@ ACTOR Future<Key> getKey( Database cx, KeySelector k, Future<Version> version, T
 			if( info.debugID.present() )
 				g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getKey.Before"); //.detail("StartKey", k.getKey()).detail("Offset",k.offset).detail("OrEqual",k.orEqual);
 			++cx->transactionPhysicalReads;
-			GetKeyReply reply = wait( loadBalance( ssi.second, &StorageServerInterface::getKey, GetKeyRequest(k, version.get()), TaskDefaultPromiseEndpoint, false, cx->enableLocalityLoadBalance ? &cx->queueModel : NULL ) );
+			GetKeyReply reply = wait( loadBalance( ssi.second, &StorageServerInterface::getKey, GetKeyRequest(k, version.get()), TaskPriority::DefaultPromiseEndpoint, false, cx->enableLocalityLoadBalance ? &cx->queueModel : NULL ) );
 			if( info.debugID.present() )
 				g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getKey.After"); //.detail("NextKey",reply.sel.key).detail("Offset", reply.sel.offset).detail("OrEqual", k.orEqual);
 			k = reply.sel;
@ -1490,6 +1508,11 @@ ACTOR Future<Version> waitForCommittedVersion( Database cx, Version version ) {
 			choose {
 				when ( wait( cx->onMasterProxiesChanged() ) ) {}
 				when ( GetReadVersionReply v = wait( loadBalance( cx->getMasterProxies(false), &MasterProxyInterface::getConsistentReadVersion, GetReadVersionRequest( 0, GetReadVersionRequest::PRIORITY_SYSTEM_IMMEDIATE ), cx->taskID ) ) ) {
+					if(v.newClientInfo.present()) {
+						cx->clientInfo->set(v.newClientInfo.get());
+						continue;
+					}
+					
 					if (v.version >= version)
 						return v.version;
 					// SOMEDAY: Do the wait on the server side, possibly use less expensive source of committed version (causal consistency is not needed for this purpose)
@ -1524,7 +1547,7 @@ ACTOR Future< Void > watchValue( Future<Version> version, Key key, Optional<Valu
 				g_traceBatch.addAttach("WatchValueAttachID", info.debugID.get().first(), watchValueID.get().first());
 				g_traceBatch.addEvent("WatchValueDebug", watchValueID.get().first(), "NativeAPI.watchValue.Before"); //.detail("TaskID", g_network->getCurrentTask());
 			}
-			state Version resp = wait( loadBalance( ssi.second, &StorageServerInterface::watchValue, WatchValueRequest(key, value, ver, watchValueID), TaskDefaultPromiseEndpoint ) );
+			state Version resp = wait( loadBalance( ssi.second, &StorageServerInterface::watchValue, WatchValueRequest(key, value, ver, watchValueID), TaskPriority::DefaultPromiseEndpoint ) );
 			if( info.debugID.present() ) {
 				g_traceBatch.addEvent("WatchValueDebug", watchValueID.get().first(), "NativeAPI.watchValue.After"); //.detail("TaskID", g_network->getCurrentTask());
 			}
@ -1616,7 +1639,7 @@ ACTOR Future<Standalone<RangeResultRef>> getExactRange( Database cx, Version ver
 						.detail("Servers", locations[shard].second->description());*/
 				}
 				++cx->transactionPhysicalReads;
-				GetKeyValuesReply rep = wait( loadBalance( locations[shard].second, &StorageServerInterface::getKeyValues, req, TaskDefaultPromiseEndpoint, false, cx->enableLocalityLoadBalance ? &cx->queueModel : NULL ) );
+				GetKeyValuesReply rep = wait( loadBalance( locations[shard].second, &StorageServerInterface::getKeyValues, req, TaskPriority::DefaultPromiseEndpoint, false, cx->enableLocalityLoadBalance ? &cx->queueModel : NULL ) );
 				if( info.debugID.present() )
 					g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getExactRange.After");
 				output.arena().dependsOn( rep.arena );
@ -1893,7 +1916,7 @@ ACTOR Future<Standalone<RangeResultRef>> getRange( Database cx, Reference<Transa
 							transaction_too_old(), future_version()
 								});
 				}
-				GetKeyValuesReply rep = wait( loadBalance(beginServer.second, &StorageServerInterface::getKeyValues, req, TaskDefaultPromiseEndpoint, false, cx->enableLocalityLoadBalance ? &cx->queueModel : NULL ) );
+				GetKeyValuesReply rep = wait( loadBalance(beginServer.second, &StorageServerInterface::getKeyValues, req, TaskPriority::DefaultPromiseEndpoint, false, cx->enableLocalityLoadBalance ? &cx->queueModel : NULL ) );

 				if( info.debugID.present() ) {
 					g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getRange.After");//.detail("SizeOf", rep.data.size());
@ -2457,8 +2480,6 @@ double Transaction::getBackoff(int errCode) {
 }

 TransactionOptions::TransactionOptions(Database const& cx) {
-	maxBackoff = cx->transactionMaxBackoff;
-	sizeLimit = cx->transactionMaxSize;
 	reset(cx);
 	if (BUGGIFY) {
 		commitOnFirstProxy = true;
@ -2472,11 +2493,9 @@ TransactionOptions::TransactionOptions() {
 }

 void TransactionOptions::reset(Database const& cx) {
-	double oldMaxBackoff = maxBackoff;
-	uint32_t oldSizeLimit = sizeLimit;
 	memset(this, 0, sizeof(*this));
-	maxBackoff = cx->apiVersionAtLeast(610) ? oldMaxBackoff : cx->transactionMaxBackoff;
-	sizeLimit = oldSizeLimit;
+	maxBackoff = CLIENT_KNOBS->DEFAULT_MAX_BACKOFF;
+	sizeLimit = CLIENT_KNOBS->TRANSACTION_SIZE_LIMIT;
 	lockAware = cx->lockAware;
 }

@ -2503,7 +2522,6 @@ void Transaction::reset() {
 void Transaction::fullReset() {
 	reset();
 	backoff = CLIENT_KNOBS->DEFAULT_BACKOFF;
-	options.maxBackoff = getDatabase()->transactionMaxBackoff;
 }

 int Transaction::apiVersionAtLeast(int minVersion) const {
@ -2694,7 +2712,7 @@ ACTOR static Future<Void> tryCommit( Database cx, Reference<TransactionLogInfo>
 			const std::vector<MasterProxyInterface>& proxies = cx->clientInfo->get().proxies;
 			reply = proxies.size() ? throwErrorOr ( brokenPromiseToMaybeDelivered ( proxies[0].commit.tryGetReply(req) ) ) : Never();
 		} else {
-			reply = loadBalance( cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::commit, req, TaskDefaultPromiseEndpoint, true );
+			reply = loadBalance( cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::commit, req, TaskPriority::DefaultPromiseEndpoint, true );
 		}

 		choose {
@ -2703,6 +2721,11 @@ ACTOR static Future<Void> tryCommit( Database cx, Reference<TransactionLogInfo>
 				throw request_maybe_delivered();
 			}
 			when (CommitID ci = wait( reply )) {
+				if(ci.newClientInfo.present()) {
+					cx->clientInfo->set(ci.newClientInfo.get());
+					throw not_committed();
+				}
+
 				Version v = ci.version;
 				if (v != invalidVersion) {
 					if (info.debugID.present())
@ -2718,7 +2741,7 @@ ACTOR static Future<Void> tryCommit( Database cx, Reference<TransactionLogInfo>
 					tr->versionstampPromise.send(ret);

 					tr->numErrors = 0;
-					cx->transactionsCommitCompleted++;
+					++cx->transactionsCommitCompleted;
 					cx->transactionCommittedMutations += req.transaction.mutations.size();
 					cx->transactionCommittedMutationBytes += req.transaction.mutations.expectedSize();

@ -2793,7 +2816,7 @@ Future<Void> Transaction::commitMutations() {
 			return Void();
 		}

-		cx->transactionsCommitStarted++;
+		++cx->transactionsCommitStarted;

 		if(options.readOnly)
 			return transaction_read_only();
@ -2971,6 +2994,7 @@ void Transaction::setOption( FDBTransactionOptions::Option option, Optional<Stri
 			}
 			else {
 				trLogInfo = Reference<TransactionLogInfo>(new TransactionLogInfo(value.get().printable(), TransactionLogInfo::DONT_LOG));
+				trLogInfo->maxFieldLength = options.maxTransactionLoggingFieldLength;
 			}
 			break;

@ -2985,6 +3009,20 @@ void Transaction::setOption( FDBTransactionOptions::Option option, Optional<Stri
 			}
 			break;

+		case FDBTransactionOptions::TRANSACTION_LOGGING_MAX_FIELD_LENGTH:
+			validateOptionValue(value, true);
+			{
+				int maxFieldLength = extractIntOption(value, -1, std::numeric_limits<int32_t>::max());
+				if(maxFieldLength == 0) {
+					throw invalid_option_value();
+				}
+				options.maxTransactionLoggingFieldLength = maxFieldLength;
+			}
+			if(trLogInfo) {
+				trLogInfo->maxFieldLength = options.maxTransactionLoggingFieldLength;
+			}
+			break;
+
 		case FDBTransactionOptions::MAX_RETRY_DELAY:
 			validateOptionValue(value, true);
 			options.maxBackoff = extractIntOption(value, 0, std::numeric_limits<int32_t>::max()) / 1000.0;
@ -3034,6 +3072,10 @@ ACTOR Future<GetReadVersionReply> getConsistentReadVersion( DatabaseContext *cx,
 			choose {
 				when ( wait( cx->onMasterProxiesChanged() ) ) {}
 				when ( GetReadVersionReply v = wait( loadBalance( cx->getMasterProxies(flags & GetReadVersionRequest::FLAG_USE_PROVISIONAL_PROXIES), &MasterProxyInterface::getConsistentReadVersion, req, cx->taskID ) ) ) {
+					if(v.newClientInfo.present()) {
+						cx->clientInfo->set(v.newClientInfo.get());
+						continue;
+					}
 					if( debugID.present() )
 						g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), "NativeAPI.getConsistentReadVersion.After");
 					ASSERT( v.version > 0 );
@ -3074,7 +3116,7 @@ ACTOR Future<Void> readVersionBatcher( DatabaseContext *cx, FutureStream< std::p
 				if (requests.size() == CLIENT_KNOBS->MAX_BATCH_SIZE)
 					send_batch = true;
 				else if (!timeout.isValid())
-					timeout = delay(batchTime, TaskProxyGetConsistentReadVersion);
+					timeout = delay(batchTime, TaskPriority::ProxyGetConsistentReadVersion);
 			}
 			when(wait(timeout.isValid() ? timeout : Never())) {
 				send_batch = true;
@ -3126,7 +3168,7 @@ ACTOR Future<Version> extractReadVersion(DatabaseContext* cx, Reference<Transact
 }

 Future<Version> Transaction::getReadVersion(uint32_t flags) {
-	cx->transactionReadVersions++;
+	++cx->transactionReadVersions;
 	flags |= options.getReadVersionFlags;

 	auto& batcher = cx->versionBatcher[ flags ];
@ -3150,8 +3192,7 @@ Future<Standalone<StringRef>> Transaction::getVersionstamp() {
 }

 Future<Void> Transaction::onError( Error const& e ) {
-	if (e.code() == error_code_success)
-	{
+	if (e.code() == error_code_success) {
 		return client_invalid_operation();
 	}
 	if (e.code() == error_code_not_committed ||
@ -3162,32 +3203,32 @@ Future<Void> Transaction::onError( Error const& e ) {
 		e.code() == error_code_cluster_not_fully_recovered)
 	{
 		if(e.code() == error_code_not_committed)
-			cx->transactionsNotCommitted++;
+			++cx->transactionsNotCommitted;
 		if(e.code() == error_code_commit_unknown_result)
-			cx->transactionsMaybeCommitted++;
+			++cx->transactionsMaybeCommitted;
 		if (e.code() == error_code_proxy_memory_limit_exceeded)
-			cx->transactionsResourceConstrained++;
+			++cx->transactionsResourceConstrained;
 		if (e.code() == error_code_process_behind)
-			cx->transactionsProcessBehind++;
+			++cx->transactionsProcessBehind;
 		if (e.code() == error_code_cluster_not_fully_recovered) {
-			cx->transactionWaitsForFullRecovery++;
+			++cx->transactionWaitsForFullRecovery;
 		}

 		double backoff = getBackoff(e.code());
 		reset();
-		return delay( backoff, info.taskID );
+		return delay(backoff, info.taskID);
 	}
 	if (e.code() == error_code_transaction_too_old ||
 		e.code() == error_code_future_version)
 	{
 		if( e.code() == error_code_transaction_too_old )
-			cx->transactionsTooOld++;
+			++cx->transactionsTooOld;
 		else if( e.code() == error_code_future_version )
-			cx->transactionsFutureVersions++;
+			++cx->transactionsFutureVersions;

 		double maxBackoff = options.maxBackoff;
 		reset();
-		return delay( std::min(CLIENT_KNOBS->FUTURE_VERSION_RETRY_DELAY, maxBackoff), info.taskID );
+		return delay(std::min(CLIENT_KNOBS->FUTURE_VERSION_RETRY_DELAY, maxBackoff), info.taskID);
 	}

 	if(g_network->isSimulated() && ++numErrors % 10 == 0)
@ -3235,7 +3276,7 @@ ACTOR Future< StorageMetrics > waitStorageMetricsMultipleLocations(
 		WaitMetricsRequest req(locations[i].first, StorageMetrics(), StorageMetrics());
 		req.min.bytes = 0;
 		req.max.bytes = -1;
-		fx[i] = loadBalance( locations[i].second, &StorageServerInterface::waitMetrics, req, TaskDataDistribution );
+		fx[i] = loadBalance( locations[i].second, &StorageServerInterface::waitMetrics, req, TaskPriority::DataDistribution );
 	}
 	wait( waitForAll(fx) );

@ -3266,7 +3307,7 @@ ACTOR Future< StorageMetrics > waitStorageMetrics(
 	int shardLimit )
 {
 	loop {
-		vector< pair<KeyRange, Reference<LocationInfo>> > locations = wait( getKeyRangeLocations( cx, keys, shardLimit, false, &StorageServerInterface::waitMetrics, TransactionInfo(TaskDataDistribution) ) );
+		vector< pair<KeyRange, Reference<LocationInfo>> > locations = wait( getKeyRangeLocations( cx, keys, shardLimit, false, &StorageServerInterface::waitMetrics, TransactionInfo(TaskPriority::DataDistribution) ) );

 		//SOMEDAY: Right now, if there are too many shards we delay and check again later. There may be a better solution to this.
 		if(locations.size() < shardLimit) {
@ -3276,7 +3317,7 @@ ACTOR Future< StorageMetrics > waitStorageMetrics(
 					fx = waitStorageMetricsMultipleLocations( locations, min, max, permittedError );
 				} else {
 					WaitMetricsRequest req( keys, min, max );
-					fx = loadBalance( locations[0].second, &StorageServerInterface::waitMetrics, req, TaskDataDistribution );
+					fx = loadBalance( locations[0].second, &StorageServerInterface::waitMetrics, req, TaskPriority::DataDistribution );
 				}
 				StorageMetrics x = wait(fx);
 				return x;
@ -3286,14 +3327,14 @@ ACTOR Future< StorageMetrics > waitStorageMetrics(
 					throw;
 				}
 				cx->invalidateCache(keys);
-				wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskDataDistribution));
+				wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskPriority::DataDistribution));
 			}
 		} else {
 			TraceEvent(SevWarn, "WaitStorageMetricsPenalty")
 				.detail("Keys", keys)
 				.detail("Limit", CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT)
 				.detail("JitteredSecondsOfPenitence", CLIENT_KNOBS->STORAGE_METRICS_TOO_MANY_SHARDS_DELAY);
-			wait(delayJittered(CLIENT_KNOBS->STORAGE_METRICS_TOO_MANY_SHARDS_DELAY, TaskDataDistribution));
+			wait(delayJittered(CLIENT_KNOBS->STORAGE_METRICS_TOO_MANY_SHARDS_DELAY, TaskPriority::DataDistribution));
 			// make sure that the next getKeyRangeLocations() call will actually re-fetch the range
 			cx->invalidateCache( keys );
 		}
@ -3319,13 +3360,13 @@ Future< StorageMetrics > Transaction::getStorageMetrics( KeyRange const& keys, i
 ACTOR Future< Standalone<VectorRef<KeyRef>> > splitStorageMetrics( Database cx, KeyRange keys, StorageMetrics limit, StorageMetrics estimated )
 {
 	loop {
-		state vector< pair<KeyRange, Reference<LocationInfo>> > locations = wait( getKeyRangeLocations( cx, keys, CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT, false, &StorageServerInterface::splitMetrics, TransactionInfo(TaskDataDistribution) ) );
+		state vector< pair<KeyRange, Reference<LocationInfo>> > locations = wait( getKeyRangeLocations( cx, keys, CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT, false, &StorageServerInterface::splitMetrics, TransactionInfo(TaskPriority::DataDistribution) ) );
 		state StorageMetrics used;
 		state Standalone<VectorRef<KeyRef>> results;

 		//SOMEDAY: Right now, if there are too many shards we delay and check again later. There may be a better solution to this.
 		if(locations.size() == CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT) {
-			wait(delay(CLIENT_KNOBS->STORAGE_METRICS_TOO_MANY_SHARDS_DELAY, TaskDataDistribution));
+			wait(delay(CLIENT_KNOBS->STORAGE_METRICS_TOO_MANY_SHARDS_DELAY, TaskPriority::DataDistribution));
 			cx->invalidateCache(keys);
 		}
 		else {
@ -3336,7 +3377,7 @@ ACTOR Future< Standalone<VectorRef<KeyRef>> > splitStorageMetrics( Database cx,
 				state int i = 0;
 				for(; i<locations.size(); i++) {
 					SplitMetricsRequest req( locations[i].first, limit, used, estimated, i == locations.size() - 1 );
-					SplitMetricsReply res = wait( loadBalance( locations[i].second, &StorageServerInterface::splitMetrics, req, TaskDataDistribution ) );
+					SplitMetricsReply res = wait( loadBalance( locations[i].second, &StorageServerInterface::splitMetrics, req, TaskPriority::DataDistribution ) );
 					if( res.splits.size() && res.splits[0] <= results.back() ) { // split points are out of order, possibly because of moving data, throw error to retry
 						ASSERT_WE_THINK(false);   // FIXME: This seems impossible and doesn't seem to be covered by testing
 						throw all_alternatives_failed();
@ -3362,7 +3403,7 @@ ACTOR Future< Standalone<VectorRef<KeyRef>> > splitStorageMetrics( Database cx,
 					throw;
 				}
 				cx->invalidateCache( keys );
-				wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskDataDistribution));
+				wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskPriority::DataDistribution));
 			}
 		}
 	}
--- a/fdbclient/NativeAPI.actor.h
+++ b/fdbclient/NativeAPI.actor.h
@ -74,8 +74,8 @@ class Database {
 public:
 	enum { API_VERSION_LATEST = -1 };

-	static Database createDatabase( Reference<ClusterConnectionFile> connFile, int apiVersion, LocalityData const& clientLocality=LocalityData(), DatabaseContext *preallocatedDb=nullptr );
-	static Database createDatabase( std::string connFileName, int apiVersion, LocalityData const& clientLocality=LocalityData() ); 
+	static Database createDatabase( Reference<ClusterConnectionFile> connFile, int apiVersion, bool internal=true, LocalityData const& clientLocality=LocalityData(), DatabaseContext *preallocatedDb=nullptr );
+	static Database createDatabase( std::string connFileName, int apiVersion, bool internal=true, LocalityData const& clientLocality=LocalityData() ); 

 	Database() {}  // an uninitialized database can be destructed or reassigned safely; that's it
 	void operator= ( Database const& rhs ) { db = rhs.db; }
@ -90,6 +90,8 @@ public:
 	inline DatabaseContext* extractPtr() { return db.extractPtr(); }
 	DatabaseContext* operator->() const { return db.getPtr(); }

+	const UniqueOrderedOptionList<FDBTransactionOptions>& getTransactionDefaults() const;
+
 private:
 	Reference<DatabaseContext> db;
 };
@ -147,6 +149,7 @@ struct TransactionOptions {
 	double maxBackoff;
 	uint32_t getReadVersionFlags;
 	uint32_t sizeLimit;
+	int maxTransactionLoggingFieldLength;
 	bool checkWritesEnabled : 1;
 	bool causalWriteRisky : 1;
 	bool commitOnFirstProxy : 1;
@ -163,26 +166,27 @@ struct TransactionOptions {

 struct TransactionInfo {
 	Optional<UID> debugID;
-	int taskID;
+	TaskPriority taskID;
 	bool useProvisionalProxies;

-	explicit TransactionInfo( int taskID ) : taskID(taskID), useProvisionalProxies(false) {}
+	explicit TransactionInfo( TaskPriority taskID ) : taskID(taskID), useProvisionalProxies(false) {}
 };

 struct TransactionLogInfo : public ReferenceCounted<TransactionLogInfo>, NonCopyable {
 	enum LoggingLocation { DONT_LOG = 0, TRACE_LOG = 1, DATABASE = 2 };

-	TransactionLogInfo() : logLocation(DONT_LOG) {}
-	TransactionLogInfo(LoggingLocation location) : logLocation(location) {}
-	TransactionLogInfo(std::string id, LoggingLocation location) : logLocation(location), identifier(id) {}
+	TransactionLogInfo() : logLocation(DONT_LOG), maxFieldLength(0) {}
+	TransactionLogInfo(LoggingLocation location) : logLocation(location), maxFieldLength(0) {}
+	TransactionLogInfo(std::string id, LoggingLocation location) : logLocation(location), identifier(id), maxFieldLength(0) {}

 	void setIdentifier(std::string id) { identifier = id; }
 	void logTo(LoggingLocation loc) { logLocation = logLocation | loc; }
+
 	template <typename T>
 	void addLog(const T& event) {
 		if(logLocation & TRACE_LOG) {
 			ASSERT(!identifier.empty())
-			event.logEvent(identifier);
+			event.logEvent(identifier, maxFieldLength);
 		}

 		if (flushed) {
@ -200,6 +204,7 @@ struct TransactionLogInfo : public ReferenceCounted<TransactionLogInfo>, NonCopy
 	bool logsAdded{ false };
 	bool flushed{ false };
 	int logLocation;
+	int maxFieldLength;
 	std::string identifier;
 };

@ -286,7 +291,7 @@ public:
 	void flushTrLogsIfEnabled();

 	// These are to permit use as state variables in actors:
-	Transaction() : info( TaskDefaultEndpoint ) {}
+	Transaction() : info( TaskPriority::DefaultEndpoint ) {}
 	void operator=(Transaction&& r) BOOST_NOEXCEPT;

 	void reset();
--- a/fdbclient/ReadYourWrites.actor.cpp
+++ b/fdbclient/ReadYourWrites.actor.cpp
@ -1124,7 +1124,8 @@ public:
 };

 ReadYourWritesTransaction::ReadYourWritesTransaction( Database const& cx ) : cache(&arena), writes(&arena), tr(cx), retries(0), creationTime(now()), commitStarted(false), options(tr), deferredError(cx->deferredError) {
-	resetTimeout();
+	std::copy(cx.getTransactionDefaults().begin(), cx.getTransactionDefaults().end(), std::back_inserter(persistentOptions));
+	applyPersistentOptions();
 }

 ACTOR Future<Void> timebomb(double endTime, Promise<Void> resetPromise) {
@ -1473,36 +1474,16 @@ void ReadYourWritesTransaction::writeRangeToNativeTransaction( KeyRangeRef const
 }

 ReadYourWritesTransactionOptions::ReadYourWritesTransactionOptions(Transaction const& tr) {
-	Database cx = tr.getDatabase();
-	timeoutInSeconds = cx->transactionTimeout;
-	maxRetries = cx->transactionMaxRetries;
 	reset(tr);
 }

 void ReadYourWritesTransactionOptions::reset(Transaction const& tr) {
-	double oldTimeout = timeoutInSeconds;
-	int oldMaxRetries = maxRetries;
 	memset(this, 0, sizeof(*this));
-	if( tr.apiVersionAtLeast(610) ) {
-		// Starting in API version 610, these options are not cleared after reset.
-		timeoutInSeconds = oldTimeout;
-		maxRetries = oldMaxRetries;
-	}
-	else {
-		Database cx = tr.getDatabase();
-		maxRetries = cx->transactionMaxRetries;
-		timeoutInSeconds = cx->transactionTimeout;
-	}
+	timeoutInSeconds = 0.0;
+	maxRetries = -1;
 	snapshotRywEnabled = tr.getDatabase()->snapshotRywEnabled;
 }

-void ReadYourWritesTransactionOptions::fullReset(Transaction const& tr) {
-	reset(tr);
-	Database cx = tr.getDatabase();
-	maxRetries = cx->transactionMaxRetries;
-	timeoutInSeconds = cx->transactionTimeout;
-}
-
 bool ReadYourWritesTransactionOptions::getAndResetWriteConflictDisabled() {
 	bool disabled = nextWriteDisableConflictRange;
 	nextWriteDisableConflictRange = false;
@ -1777,7 +1758,15 @@ Future<Standalone<StringRef>> ReadYourWritesTransaction::getVersionstamp() {
 	return waitOrError(tr.getVersionstamp(), resetPromise.getFuture());
 }

-void ReadYourWritesTransaction::setOption( FDBTransactionOptions::Option option, Optional<StringRef> value ) { 
+void ReadYourWritesTransaction::setOption( FDBTransactionOptions::Option option, Optional<StringRef> value ) {
+	setOptionImpl(option, value);
+
+	if (FDBTransactionOptions::optionInfo.getMustExist(option).persistent) {
+		persistentOptions.emplace_back(option, value.castTo<Standalone<StringRef>>());
+	}
+}
+
+void ReadYourWritesTransaction::setOptionImpl( FDBTransactionOptions::Option option, Optional<StringRef> value ) { 
 	switch(option) {
 		case FDBTransactionOptions::READ_YOUR_WRITES_DISABLE:
 			validateOptionValue(value, false);
@ -1872,6 +1861,7 @@ void ReadYourWritesTransaction::operator=(ReadYourWritesTransaction&& r) BOOST_N
 	transactionDebugInfo = r.transactionDebugInfo;
 	cache.arena = &arena;
 	writes.arena = &arena;
+	persistentOptions = std::move(r.persistentOptions);
 }

 ReadYourWritesTransaction::ReadYourWritesTransaction(ReadYourWritesTransaction&& r) BOOST_NOEXCEPT :
@ -1894,12 +1884,32 @@ ReadYourWritesTransaction::ReadYourWritesTransaction(ReadYourWritesTransaction&&
 	readConflicts = std::move(r.readConflicts);
 	watchMap = std::move( r.watchMap );
 	r.resetPromise = Promise<Void>();
+	persistentOptions = std::move(r.persistentOptions);
 }

 Future<Void> ReadYourWritesTransaction::onError(Error const& e) {
 	return RYWImpl::onError( this, e );
 }

+void ReadYourWritesTransaction::applyPersistentOptions() {
+	Optional<StringRef> timeout;
+	for (auto option : persistentOptions) {
+		if(option.first == FDBTransactionOptions::TIMEOUT) {
+			timeout = option.second.castTo<StringRef>();
+		}
+		else {
+			setOptionImpl(option.first, option.second.castTo<StringRef>());
+		}
+	}
+
+	// Setting a timeout can immediately cause a transaction to fail. The only timeout 
+	// that matters is the one most recently set, so we ignore any earlier set timeouts
+	// that might inadvertently fail the transaction.
+	if(timeout.present()) {
+		setOptionImpl(FDBTransactionOptions::TIMEOUT, timeout);
+	}
+}
+
 void ReadYourWritesTransaction::resetRyow() {
 	Promise<Void> oldReset = resetPromise;
 	resetPromise = Promise<Void>();
@ -1917,7 +1927,7 @@ void ReadYourWritesTransaction::resetRyow() {

 	if(tr.apiVersionAtLeast(16)) {
 		options.reset(tr);
-		resetTimeout();
+		applyPersistentOptions();
 	}

 	if ( !oldReset.isSet() )
@ -1933,9 +1943,11 @@ void ReadYourWritesTransaction::reset() {
 	retries = 0;
 	creationTime = now();
 	timeoutActor.cancel();
-	options.fullReset(tr);
+	persistentOptions.clear();
+	options.reset(tr);
 	transactionDebugInfo.clear();
 	tr.fullReset();
+	std::copy(tr.getDatabase().getTransactionDefaults().begin(), tr.getDatabase().getTransactionDefaults().end(), std::back_inserter(persistentOptions));
 	resetRyow();
 }

--- a/fdbclient/ReadYourWrites.h
+++ b/fdbclient/ReadYourWrites.h
@ -44,7 +44,6 @@ struct ReadYourWritesTransactionOptions {
 	ReadYourWritesTransactionOptions() {}
 	explicit ReadYourWritesTransactionOptions(Transaction const& tr);
 	void reset(Transaction const& tr);
-	void fullReset(Transaction const& tr);
 	bool getAndResetWriteConflictDisabled();
 };

@ -160,6 +159,10 @@ private:

 	void debugLogRetries(Optional<Error> error = Optional<Error>());

+	void setOptionImpl( FDBTransactionOptions::Option option, Optional<StringRef> value = Optional<StringRef>() );
+	void applyPersistentOptions();
+
+	std::vector<std::pair<FDBTransactionOptions::Option, Optional<Standalone<StringRef>>>> persistentOptions;
 	ReadYourWritesTransactionOptions options;
 };

--- a/fdbclient/Schemas.cpp
+++ b/fdbclient/Schemas.cpp
@ -56,6 +56,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
            "roles":[
               {
                  "query_queue_max":0,
+                  "local_rate":0,
                  "input_bytes":{
                     "hz":0.0,
                     "counter":0,
@ -207,7 +208,8 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
               "megabits_received":{
                  "hz":0.0
               }
-            }
+            },
+            "run_loop_busy":0.2
         }
      },
      "old_logs":[
@ -249,7 +251,8 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
                  "storage_server_min_free_space",
                  "storage_server_min_free_space_ratio",
                  "log_server_min_free_space",
-                  "log_server_min_free_space_ratio"
+                  "log_server_min_free_space_ratio",
+                  "storage_server_durability_lag"
               ]
            },
            "description":"The database is not being saturated by the workload."
@ -268,7 +271,8 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
                  "storage_server_min_free_space",
                  "storage_server_min_free_space_ratio",
                  "log_server_min_free_space",
-                  "log_server_min_free_space_ratio"
+                  "log_server_min_free_space_ratio",
+                  "storage_server_durability_lag"
               ]
            },
            "description":"The database is not being saturated by the workload."
@ -285,7 +289,10 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
      "incompatible_connections":[

      ],
-      "datacenter_version_difference":0,
+      "datacenter_lag": {
+         "seconds" : 1.0,
+         "versions" : 1000000
+      },
      "degraded_processes":0,
      "database_available":true,
      "database_locked":false,
@ -315,6 +322,10 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
             }
         ]
      },
+      "page_cache":{
+         "log_hit_rate":0.5,
+         "storage_hit_rate":0.5
+      },
      "messages":[
         {
            "reasons":[
@ -430,6 +441,21 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
               "counter":0,
               "roughness":0.0
            },
+            "started_immediate_priority":{
+               "hz":0.0,
+               "counter":0,
+               "roughness":0.0
+            },
+            "started_default_priority":{
+               "hz":0.0,
+               "counter":0,
+               "roughness":0.0
+            },
+            "started_batch_priority":{
+               "hz":0.0,
+               "counter":0,
+               "roughness":0.0
+            },
            "conflicted":{
               "hz":0.0,
               "counter":0,
--- a/fdbclient/StatusClient.actor.cpp
+++ b/fdbclient/StatusClient.actor.cpp
@ -291,7 +291,7 @@ ACTOR Future<Optional<StatusObject>> clientCoordinatorsStatusFetcher(Reference<C

 		state vector<Future<Optional<LeaderInfo>>> leaderServers;
 		for (int i = 0; i < coord.clientLeaderServers.size(); i++)
-			leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader, GetLeaderRequest(coord.clusterKey, UID()), TaskCoordinationReply));
+			leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader, GetLeaderRequest(coord.clusterKey, UID()), TaskPriority::CoordinationReply));

 		wait( smartQuorum(leaderServers, leaderServers.size() / 2 + 1, 1.5) || delay(2.0) );

--- a/fdbclient/StorageServerInterface.h
+++ b/fdbclient/StorageServerInterface.h
@ -80,9 +80,9 @@ struct StorageServerInterface {
 	bool operator == (StorageServerInterface const& s) const { return uniqueID == s.uniqueID; }
 	bool operator < (StorageServerInterface const& s) const { return uniqueID < s.uniqueID; }
 	void initEndpoints() {
-		getValue.getEndpoint( TaskLoadBalancedEndpoint );
-		getKey.getEndpoint( TaskLoadBalancedEndpoint );
-		getKeyValues.getEndpoint( TaskLoadBalancedEndpoint );
+		getValue.getEndpoint( TaskPriority::LoadBalancedEndpoint );
+		getKey.getEndpoint( TaskPriority::LoadBalancedEndpoint );
+		getKeyValues.getEndpoint( TaskPriority::LoadBalancedEndpoint );
 	}
 };

--- a/fdbclient/ThreadSafeTransaction.actor.cpp
+++ b/fdbclient/ThreadSafeTransaction.actor.cpp
@ -51,8 +51,19 @@ Reference<ITransaction> ThreadSafeDatabase::createTransaction() {
 }

 void ThreadSafeDatabase::setOption( FDBDatabaseOptions::Option option, Optional<StringRef> value) {
+	auto itr = FDBDatabaseOptions::optionInfo.find(option);
+	if(itr != FDBDatabaseOptions::optionInfo.end()) {
+		TraceEvent("SetDatabaseOption").detail("Option", itr->second.name);
+	}
+	else {
+		TraceEvent("UnknownDatabaseOption").detail("Option", option);
+		throw invalid_option();
+	}
+
 	DatabaseContext *db = this->db;
 	Standalone<Optional<StringRef>> passValue = value;
+
+	// ThreadSafeDatabase is not allowed to do anything with options except pass them through to RYW.
 	onMainThreadVoid( [db, option, passValue](){ 
 		db->checkDeferredError();
 		db->setOption(option, passValue.contents()); 
@ -68,7 +79,7 @@ ThreadSafeDatabase::ThreadSafeDatabase(std::string connFilename, int apiVersion)

 	onMainThreadVoid([db, connFile, apiVersion](){ 
 		try {
-			Database::createDatabase(connFile, apiVersion, LocalityData(), db).extractPtr();
+			Database::createDatabase(connFile, apiVersion, false, LocalityData(), db).extractPtr();
 		}
 		catch(Error &e) {
 			new (db) DatabaseContext(e);
@ -272,8 +283,16 @@ ThreadFuture<Standalone<StringRef>> ThreadSafeTransaction::getVersionstamp() {
 }

 void ThreadSafeTransaction::setOption( FDBTransactionOptions::Option option, Optional<StringRef> value ) {
+	auto itr = FDBTransactionOptions::optionInfo.find(option);
+	if(itr == FDBTransactionOptions::optionInfo.end()) {
+		TraceEvent("UnknownTransactionOption").detail("Option", option);
+		throw invalid_option();
+	}
+	
 	ReadYourWritesTransaction *tr = this->tr;
 	Standalone<Optional<StringRef>> passValue = value;
+
+	// ThreadSafeTransaction is not allowed to do anything with options except pass them through to RYW.
 	onMainThreadVoid( [tr, option, passValue](){ tr->setOption(option, passValue.contents()); }, &tr->deferredError );
 }

--- a/fdbclient/VersionedMap.actor.h
+++ b/fdbclient/VersionedMap.actor.h
@ -31,7 +31,7 @@
 #include "flow/actorcompiler.h"  // This must be the last #include.

 ACTOR template <class Tree>
-Future<Void> deferredCleanupActor( std::vector<Tree> toFree, int taskID = 7000 ) {
+Future<Void> deferredCleanupActor( std::vector<Tree> toFree, TaskPriority taskID = TaskPriority::DefaultYield ) {
 	state int freeCount = 0;
 	while (!toFree.empty()) {
 		Tree a = std::move( toFree.back() );
--- a/fdbclient/VersionedMap.h
+++ b/fdbclient/VersionedMap.h
@ -511,7 +511,7 @@ public:
 		oldestVersion = newOldestVersion;
 	}

-	Future<Void> forgetVersionsBeforeAsync( Version newOldestVersion, int taskID = 7000 ) {
+	Future<Void> forgetVersionsBeforeAsync( Version newOldestVersion, TaskPriority taskID = TaskPriority::DefaultYield ) {
 		ASSERT( newOldestVersion <= latestVersion );
 		roots[newOldestVersion] = getRoot(newOldestVersion);

--- a/fdbclient/fdbclient.vcxproj
+++ b/fdbclient/fdbclient.vcxproj
@ -31,7 +31,6 @@
      <EnableCompile Condition="'$(Configuration)|$(Platform)'=='Release|X64'">false</EnableCompile>
    </ActorCompiler>
    <ClInclude Include="BlobStore.h" />
-    <ClInclude Include="ClientDBInfo.h" />
    <ClInclude Include="ClientLogEvents.h" />
    <ClInclude Include="ClientWorkerInterface.h" />
    <ClInclude Include="ClusterInterface.h" />
--- a/fdbclient/vexillographer/cpp.cs
+++ b/fdbclient/vexillographer/cpp.cs
@ -47,8 +47,8 @@ namespace vexillographer

        private static string getCInfoLine(Option o, string indent, string structName)
        {
-            return String.Format("{0}ADD_OPTION_INFO({1}, {2}, \"{2}\", \"{3}\", \"{4}\", {5}, {6})",
-                indent, structName, o.name.ToUpper(), o.comment, o.getParameterComment(), (o.paramDesc != null).ToString().ToLower(), o.hidden.ToString().ToLower());
+            return String.Format("{0}ADD_OPTION_INFO({1}, {2}, \"{2}\", \"{3}\", \"{4}\", {5}, {6}, {7}, {8})",
+                indent, structName, o.name.ToUpper(), o.comment, o.getParameterComment(), (o.paramDesc != null).ToString().ToLower(), o.hidden.ToString().ToLower(), o.persistent.ToString().ToLower(), o.defaultFor);
        }

        private static void writeCppInfo(TextWriter outFile, Scope scope, IEnumerable<Option> options)
--- a/fdbclient/vexillographer/fdb.options
+++ b/fdbclient/vexillographer/fdb.options
@ -146,22 +146,31 @@ description is not currently required but encouraged.
    <Option name="datacenter_id" code="22"
            paramType="String" paramDescription="Hexadecimal ID"
            description="Specify the datacenter ID that was passed to fdbserver processes running in the same datacenter as this client, for better location-aware load balancing." />
-    <Option name="transaction_timeout" code="500"
-            paramType="Int" paramDescription="value in milliseconds of timeout"
-            description="Set a timeout in milliseconds which, when elapsed, will cause each transaction automatically to be cancelled. This sets the ``timeout`` option of each transaction created by this database. See the transaction option description for more information. Using this option requires that the API version is 610 or higher." />
-    <Option name="transaction_retry_limit" code="501"
-            paramType="Int" paramDescription="number of times to retry"
-            description="Set a timeout in milliseconds which, when elapsed, will cause a transaction automatically to be cancelled. This sets the ``retry_limit`` option of each transaction created by this database. See the transaction option description for more information." />
-    <Option name="transaction_max_retry_delay" code="502"
-            paramType="Int" paramDescription="value in milliseconds of maximum delay"
-            description="Set the maximum amount of backoff delay incurred in the call to ``onError`` if the error is retryable. This sets the ``max_retry_delay`` option of each transaction created by this database. See the transaction option description for more information." />
-    <Option name="transaction_size_limit" code="503"
-            paramType="Int" paramDescription="value in bytes"
-            description="Set the maximum transaction size which, if exceeded, will cause the transaction to be cancelled. Default to 10,000,000 bytes." />
+    <!-- The snapshot RYW options act like defaults for the equivalent transaction options, but database defaults cannot have cumulative effects from multiple calls.
+         Thus, we don't use the defaultFor annotation on these options. -->
    <Option name="snapshot_ryw_enable" code="26"
            description="Snapshot read operations will see the results of writes done in the same transaction. This is the default behavior." />
    <Option name="snapshot_ryw_disable" code="27"
            description="Snapshot read operations will not see the results of writes done in the same transaction. This was the default behavior prior to API version 300." />
+    <Option name="transaction_logging_max_field_length" code="405" paramType="Int" paramDescription="Maximum length of escaped key and value fields."
+            description="Sets the maximum escaped length of key and value fields to be logged to the trace file via the LOG_TRANSACTION option. This sets the ``transaction_logging_max_field_length`` option of each transaction created by this database. See the transaction option description for more information." 
+            defaultFor="405"/>
+    <Option name="transaction_timeout" code="500"
+            paramType="Int" paramDescription="value in milliseconds of timeout"
+            description="Set a timeout in milliseconds which, when elapsed, will cause each transaction automatically to be cancelled. This sets the ``timeout`` option of each transaction created by this database. See the transaction option description for more information. Using this option requires that the API version is 610 or higher." 
+            defaultFor="500"/>
+    <Option name="transaction_retry_limit" code="501"
+            paramType="Int" paramDescription="number of times to retry"
+            description="Set a timeout in milliseconds which, when elapsed, will cause a transaction automatically to be cancelled. This sets the ``retry_limit`` option of each transaction created by this database. See the transaction option description for more information." 
+            defaultFor="501"/>
+    <Option name="transaction_max_retry_delay" code="502"
+            paramType="Int" paramDescription="value in milliseconds of maximum delay"
+            description="Set the maximum amount of backoff delay incurred in the call to ``onError`` if the error is retryable. This sets the ``max_retry_delay`` option of each transaction created by this database. See the transaction option description for more information."
+            defaultFor="502"/>
+    <Option name="transaction_size_limit" code="503"
+            paramType="Int" paramDescription="value in bytes"
+            description="Set the maximum transaction size in bytes. This sets the ``size_limit`` option on each transaction created by this database. See the transaction option description for more information." 
+            defaultFor="503"/>
  </Scope>
  
  <Scope name="TransactionOption">
@ -204,19 +213,24 @@ description is not currently required but encouraged.
            description="Sets a client provided identifier for the transaction that will be used in scenarios like tracing or profiling. Client trace logging or transaction profiling must be separately enabled." />
    <Option name="log_transaction" code="404"
            description="Enables tracing for this transaction and logs results to the client trace logs. The DEBUG_TRANSACTION_IDENTIFIER option must be set before using this option, and client trace logging must be enabled and to get log output." />
+    <Option name="transaction_logging_max_field_length" code="405" paramType="Int" paramDescription="Maximum length of escaped key and value fields."
+            description="Sets the maximum escaped length of key and value fields to be logged to the trace file via the LOG_TRANSACTION option, after which the field will be truncated. A negative value disables truncation." />
    <Option name="timeout" code="500"
            paramType="Int" paramDescription="value in milliseconds of timeout"
-            description="Set a timeout in milliseconds which, when elapsed, will cause the transaction automatically to be cancelled. Valid parameter values are ``[0, INT_MAX]``. If set to 0, will disable all timeouts. All pending and any future uses of the transaction will throw an exception. The transaction can be used again after it is reset. Prior to API version 610, like all other transaction options, the timeout must be reset after a call to ``onError``. If the API version is 610 or greater, the timeout is not reset after an ``onError`` call. This allows the user to specify a longer timeout on specific transactions than the default timeout specified through the ``transaction_timeout`` database option without the shorter database timeout cancelling transactions that encounter a retryable error. Note that at all API versions, it is safe and legal to set the timeout each time the transaction begins, so most code written assuming the older behavior can be upgraded to the newer behavior without requiring any modification, and the caller is not required to implement special logic in retry loops to only conditionally set this option." />
+            description="Set a timeout in milliseconds which, when elapsed, will cause the transaction automatically to be cancelled. Valid parameter values are ``[0, INT_MAX]``. If set to 0, will disable all timeouts. All pending and any future uses of the transaction will throw an exception. The transaction can be used again after it is reset. Prior to API version 610, like all other transaction options, the timeout must be reset after a call to ``onError``. If the API version is 610 or greater, the timeout is not reset after an ``onError`` call. This allows the user to specify a longer timeout on specific transactions than the default timeout specified through the ``transaction_timeout`` database option without the shorter database timeout cancelling transactions that encounter a retryable error. Note that at all API versions, it is safe and legal to set the timeout each time the transaction begins, so most code written assuming the older behavior can be upgraded to the newer behavior without requiring any modification, and the caller is not required to implement special logic in retry loops to only conditionally set this option."
+            persistent="true" />
    <Option name="retry_limit" code="501"
            paramType="Int" paramDescription="number of times to retry"
-            description="Set a maximum number of retries after which additional calls to ``onError`` will throw the most recently seen error code. Valid parameter values are ``[-1, INT_MAX]``. If set to -1, will disable the retry limit. Prior to API version 610, like all other transaction options, the retry limit must be reset after a call to ``onError``. If the API version is 610 or greater, the retry limit is not reset after an ``onError`` call. Note that at all API versions, it is safe and legal to set the retry limit each time the transaction begins, so most code written assuming the older behavior can be upgraded to the newer behavior without requiring any modification, and the caller is not required to implement special logic in retry loops to only conditionally set this option." />
+            description="Set a maximum number of retries after which additional calls to ``onError`` will throw the most recently seen error code. Valid parameter values are ``[-1, INT_MAX]``. If set to -1, will disable the retry limit. Prior to API version 610, like all other transaction options, the retry limit must be reset after a call to ``onError``. If the API version is 610 or greater, the retry limit is not reset after an ``onError`` call. Note that at all API versions, it is safe and legal to set the retry limit each time the transaction begins, so most code written assuming the older behavior can be upgraded to the newer behavior without requiring any modification, and the caller is not required to implement special logic in retry loops to only conditionally set this option." 
+            persistent="true"/>
    <Option name="max_retry_delay" code="502"
            paramType="Int" paramDescription="value in milliseconds of maximum delay"
-            description="Set the maximum amount of backoff delay incurred in the call to ``onError`` if the error is retryable. Defaults to 1000 ms. Valid parameter values are ``[0, INT_MAX]``. If the maximum retry delay is less than the current retry delay of the transaction, then the current retry delay will be clamped to the maximum retry delay. Prior to API version 610, like all other transaction options, the maximum retry delay must be reset after a call to ``onError``. If the API version is 610 or greater, the retry limit is not reset after an ``onError`` call. Note that at all API versions, it is safe and legal to set the maximum retry delay each time the transaction begins, so most code written assuming the older behavior can be upgraded to the newer behavior without requiring any modification, and the caller is not required to implement special logic in retry loops to only conditionally set this option."/>
+            description="Set the maximum amount of backoff delay incurred in the call to ``onError`` if the error is retryable. Defaults to 1000 ms. Valid parameter values are ``[0, INT_MAX]``. If the maximum retry delay is less than the current retry delay of the transaction, then the current retry delay will be clamped to the maximum retry delay. Prior to API version 610, like all other transaction options, the maximum retry delay must be reset after a call to ``onError``. If the API version is 610 or greater, the retry limit is not reset after an ``onError`` call. Note that at all API versions, it is safe and legal to set the maximum retry delay each time the transaction begins, so most code written assuming the older behavior can be upgraded to the newer behavior without requiring any modification, and the caller is not required to implement special logic in retry loops to only conditionally set this option."
+            persistent="true"/>
    <Option name="size_limit" code="503"
            paramType="Int" paramDescription="value in bytes"
-            description="Set the maximum transaction size which, if exceeded, will cause the transaction to be cancelled. Valid parameter values are ``[32, 10,000,000]```." />
-	<Option name="snapshot_ryw_enable" code="600"
+            description="Set the transaction size limit in bytes. The size is calculated by combining the sizes of all keys and values written or mutated, all key ranges cleared, and all read and write conflict ranges. (In other words, it includes the total size of all data included in the request to the cluster to commit the transaction.) Large transactions can cause performance problems on FoundationDB clusters, so setting this limit to a smaller value than the default can help prevent the client from accidentally degrading the cluster's performance. This value must be at least 32 and cannot be set to higher than 10,000,000, the default transaction size limit." />
+    <Option name="snapshot_ryw_enable" code="600"
            description="Snapshot read operations will see the results of writes done in the same transaction. This is the default behavior." />
    <Option name="snapshot_ryw_disable" code="601"
            description="Snapshot read operations will not see the results of writes done in the same transaction. This was the default behavior prior to API version 300." />
--- a/fdbclient/vexillographer/vexillographer.cs
+++ b/fdbclient/vexillographer/vexillographer.cs
@ -54,6 +54,8 @@ namespace vexillographer
        public string paramDesc { get; set; }
        public int code { get; set; }
        public bool hidden { get; set; }
+        public bool persistent { get; set; }
+        public int defaultFor { get; set; }
        private string _comment;
        public string comment {
            get {
@ -132,6 +134,9 @@ namespace vexillographer
                        var paramTypeStr = oDoc.AttributeOrNull("paramType");
                        ParamType p = paramTypeStr == null ? ParamType.None : (ParamType)Enum.Parse(typeof(ParamType), paramTypeStr);
                        bool hidden = oDoc.AttributeOrNull("hidden") == "true";
+                        bool persistent = oDoc.AttributeOrNull("persistent") == "true";
+                        String defaultForString = oDoc.AttributeOrNull("defaultFor");
+                        int defaultFor = defaultForString == null ? -1 : int.Parse(defaultForString);
                        string disableOn = oDoc.AttributeOrNull("disableOn");
                        bool disabled = false;
                        if(disableOn != null)
@ -150,7 +155,9 @@ namespace vexillographer
                                paramType = p,
                                paramDesc = oDoc.AttributeOrNull("paramDescription"),
                                comment = oDoc.AttributeOrNull("description"),
-                                hidden = hidden
+                                hidden = hidden,
+                                persistent = persistent,
+                                defaultFor = defaultFor
                            });
                        }
                    }
--- a/fdbrpc/AsyncFileCached.actor.h
+++ b/fdbrpc/AsyncFileCached.actor.h
@ -67,8 +67,6 @@ struct EvictablePageCache : ReferenceCounted<EvictablePageCache> {
 	EvictablePageCache() : pageSize(0), maxPages(0), cacheEvictionType(RANDOM) {}

 	explicit EvictablePageCache(int pageSize, int64_t maxSize) : pageSize(pageSize), maxPages(maxSize / pageSize), cacheEvictionType(evictionPolicyStringToEnum(FLOW_KNOBS->CACHE_EVICTION_POLICY)) {
-		cacheHits.init(LiteralStringRef("EvictablePageCache.CacheHits"));
-		cacheMisses.init(LiteralStringRef("EvictablePageCache.CacheMisses"));
 		cacheEvictions.init(LiteralStringRef("EvictablePageCache.CacheEvictions"));
 	}

@ -82,7 +80,6 @@ struct EvictablePageCache : ReferenceCounted<EvictablePageCache> {
 		} else {
 			lruPages.push_back(*page); // new page is considered the most recently used (placed at LRU tail)
 		}
-		++cacheMisses;
 	}

 	void updateHit(EvictablePage* page) {
@ -91,7 +88,6 @@ struct EvictablePageCache : ReferenceCounted<EvictablePageCache> {
 			lruPages.erase(List::s_iterator_to(*page));
 			lruPages.push_back(*page);
 		}
-		++cacheHits;
 	}

 	void try_evict() {
@ -126,8 +122,6 @@ struct EvictablePageCache : ReferenceCounted<EvictablePageCache> {
 	List lruPages;
 	int pageSize;
 	int64_t maxPages;
-	Int64MetricHandle cacheHits;
-	Int64MetricHandle cacheMisses;
 	Int64MetricHandle cacheEvictions;
 	const CacheEvictionType cacheEvictionType;
 };
@ -278,6 +272,8 @@ private:
 	Int64MetricHandle countFileCacheWrites;
 	Int64MetricHandle countFileCacheReadsBlocked;
 	Int64MetricHandle countFileCacheWritesBlocked;
+	Int64MetricHandle countFileCachePageReadsHit;
+	Int64MetricHandle countFileCachePageReadsMissed;
 	Int64MetricHandle countFileCachePageReadsMerged;
 	Int64MetricHandle countFileCacheReadBytes;

@ -286,28 +282,33 @@ private:
 	Int64MetricHandle countCacheWrites;
 	Int64MetricHandle countCacheReadsBlocked;
 	Int64MetricHandle countCacheWritesBlocked;
+	Int64MetricHandle countCachePageReadsHit;
+	Int64MetricHandle countCachePageReadsMissed;
 	Int64MetricHandle countCachePageReadsMerged;
 	Int64MetricHandle countCacheReadBytes;

-	AsyncFileCached( Reference<IAsyncFile> uncached, const std::string& filename, int64_t length, Reference<EvictablePageCache> pageCache ) 
+	AsyncFileCached( Reference<IAsyncFile> uncached, const std::string& filename, int64_t length, Reference<EvictablePageCache> pageCache )
 		: uncached(uncached), filename(filename), length(length), prevLength(length), pageCache(pageCache), currentTruncate(Void()), currentTruncateSize(0) {
 		if( !g_network->isSimulated() ) {
-			countFileCacheWrites.init(         LiteralStringRef("AsyncFile.CountFileCacheWrites"), filename);
-			countFileCacheReads.init(          LiteralStringRef("AsyncFile.CountFileCacheReads"), filename);
-			countFileCacheWritesBlocked.init(  LiteralStringRef("AsyncFile.CountFileCacheWritesBlocked"), filename);
-			countFileCacheReadsBlocked.init(   LiteralStringRef("AsyncFile.CountFileCacheReadsBlocked"), filename);
+			countFileCacheWrites.init(LiteralStringRef("AsyncFile.CountFileCacheWrites"), filename);
+			countFileCacheReads.init(LiteralStringRef("AsyncFile.CountFileCacheReads"), filename);
+			countFileCacheWritesBlocked.init(LiteralStringRef("AsyncFile.CountFileCacheWritesBlocked"), filename);
+			countFileCacheReadsBlocked.init(LiteralStringRef("AsyncFile.CountFileCacheReadsBlocked"), filename);
+			countFileCachePageReadsHit.init(LiteralStringRef("AsyncFile.CountFileCachePageReadsHit"), filename);
+			countFileCachePageReadsMissed.init(LiteralStringRef("AsyncFile.CountFileCachePageReadsMissed"), filename);
 			countFileCachePageReadsMerged.init(LiteralStringRef("AsyncFile.CountFileCachePageReadsMerged"), filename);
-			countFileCacheFinds.init(          LiteralStringRef("AsyncFile.CountFileCacheFinds"), filename);
-			countFileCacheReadBytes.init(      LiteralStringRef("AsyncFile.CountFileCacheReadBytes"), filename);
+			countFileCacheFinds.init(LiteralStringRef("AsyncFile.CountFileCacheFinds"), filename);
+			countFileCacheReadBytes.init(LiteralStringRef("AsyncFile.CountFileCacheReadBytes"), filename);

-			countCacheWrites.init(         LiteralStringRef("AsyncFile.CountCacheWrites"));
-			countCacheReads.init(          LiteralStringRef("AsyncFile.CountCacheReads"));
-			countCacheWritesBlocked.init(  LiteralStringRef("AsyncFile.CountCacheWritesBlocked"));
-			countCacheReadsBlocked.init(   LiteralStringRef("AsyncFile.CountCacheReadsBlocked"));
+			countCacheWrites.init(LiteralStringRef("AsyncFile.CountCacheWrites"));
+			countCacheReads.init(LiteralStringRef("AsyncFile.CountCacheReads"));
+			countCacheWritesBlocked.init(LiteralStringRef("AsyncFile.CountCacheWritesBlocked"));
+			countCacheReadsBlocked.init(LiteralStringRef("AsyncFile.CountCacheReadsBlocked"));
+			countCachePageReadsHit.init(LiteralStringRef("AsyncFile.CountCachePageReadsHit"));
+			countCachePageReadsMissed.init(LiteralStringRef("AsyncFile.CountCachePageReadsMissed"));
 			countCachePageReadsMerged.init(LiteralStringRef("AsyncFile.CountCachePageReadsMerged"));
-			countCacheFinds.init(          LiteralStringRef("AsyncFile.CountCacheFinds"));
-			countCacheReadBytes.init(      LiteralStringRef("AsyncFile.CountCacheReadBytes"));
-
+			countCacheFinds.init(LiteralStringRef("AsyncFile.CountCacheFinds"));
+			countCacheReadBytes.init(LiteralStringRef("AsyncFile.CountCacheReadBytes"));
 		}
 	}

@ -387,11 +388,18 @@ struct AFCPage : public EvictablePage, public FastAllocated<AFCPage> {

 		// If there are no active readers then if data is valid or we're replacing all of it we can write directly
 		if (valid || fullPage) {
+			if(!fullPage) {
+				++owner->countFileCachePageReadsHit;
+				++owner->countCachePageReadsHit;
+			}
 			valid = true;
 			memcpy( static_cast<uint8_t*>(this->data) + offset, data, length );
 			return yield();
 		}

+		++owner->countFileCachePageReadsMissed;
+		++owner->countCachePageReadsMissed;
+
 		// If data is not valid but no read is in progress, start reading
 		if (notReading.isReady()) {
 			notReading = readThrough( this );
@ -410,7 +418,14 @@ struct AFCPage : public EvictablePage, public FastAllocated<AFCPage> {

 	Future<Void> readZeroCopy() {
 		++zeroCopyRefCount;
-		if (valid) return yield();
+		if (valid) {
+			++owner->countFileCachePageReadsHit;
+			++owner->countCachePageReadsHit;
+			return yield();
+		}
+
+		++owner->countFileCachePageReadsMissed;
+		++owner->countCachePageReadsMissed;

 		if (notReading.isReady()) {
 			notReading = readThrough( this );
@ -428,12 +443,17 @@ struct AFCPage : public EvictablePage, public FastAllocated<AFCPage> {

 	Future<Void> read( void* data, int length, int offset ) {
 		if (valid) {
+			++owner->countFileCachePageReadsHit;
+			++owner->countCachePageReadsHit;
 			owner->countFileCacheReadBytes += length;
 			owner->countCacheReadBytes += length;
 			memcpy( data, static_cast<uint8_t const*>(this->data) + offset, length );
 			return yield();
 		}

+		++owner->countFileCachePageReadsMissed;
+		++owner->countCachePageReadsMissed;
+
 		if (notReading.isReady()) {
 			notReading = readThrough( this );
 		} else {
--- a/fdbrpc/AsyncFileEIO.actor.h
+++ b/fdbrpc/AsyncFileEIO.actor.h
@ -266,7 +266,7 @@ private:
 	}

 	ACTOR static Future<int> read_impl( int fd, void* data, int length, int64_t offset ) {
-		state int taskID = g_network->getCurrentTask();
+		state TaskPriority taskID = g_network->getCurrentTask();
 		state Promise<Void> p;
 		//fprintf(stderr, "eio_read (fd=%d length=%d offset=%lld)\n", fd, length, offset);
 		state eio_req* r = eio_read(fd, data, length, offset, 0, eio_callback, &p);
@ -289,7 +289,7 @@ private:
 	}

 	ACTOR static Future<Void> write_impl( int fd, Reference<ErrorInfo> err, StringRef data, int64_t offset ) {
-		state int taskID = g_network->getCurrentTask();
+		state TaskPriority taskID = g_network->getCurrentTask();
 		state Promise<Void> p;
 		state eio_req* r = eio_write(fd, (void*)data.begin(), data.size(), offset, 0, eio_callback, &p);
 		try { wait( p.getFuture() ); } catch (...) { g_network->setCurrentTask( taskID ); eio_cancel(r); throw; }
@ -299,7 +299,7 @@ private:
 	}

 	ACTOR static Future<Void> truncate_impl( int fd, Reference<ErrorInfo> err, int64_t size ) {
-		state int taskID = g_network->getCurrentTask();
+		state TaskPriority taskID = g_network->getCurrentTask();
 		state Promise<Void> p;
 		state eio_req* r = eio_ftruncate(fd, size, 0, eio_callback, &p);
 		try { wait( p.getFuture() ); } catch (...) { g_network->setCurrentTask( taskID ); eio_cancel(r); throw; }
@ -330,7 +330,7 @@ private:
 	}

 	ACTOR static Future<Void> sync_impl( int fd, Reference<ErrorInfo> err, bool sync_metadata=false ) {
-		state int taskID = g_network->getCurrentTask();
+		state TaskPriority taskID = g_network->getCurrentTask();
 		state Promise<Void> p;
 		state eio_req* r = start_fsync( fd, p, sync_metadata );
 		
@ -350,7 +350,7 @@ private:
 	}

 	ACTOR static Future<int64_t> size_impl( int fd ) {
-		state int taskID = g_network->getCurrentTask();
+		state TaskPriority taskID = g_network->getCurrentTask();
 		state Promise<Void> p;
 		state eio_req* r = eio_fstat( fd, 0, eio_callback, &p );
 		try { wait( p.getFuture() ); } catch (...) { g_network->setCurrentTask( taskID ); eio_cancel(r); throw; }
@ -363,7 +363,7 @@ private:
 	}

 	ACTOR static Future<EIO_STRUCT_STAT> stat_impl( std::string filename ) {
-		state int taskID = g_network->getCurrentTask();
+		state TaskPriority taskID = g_network->getCurrentTask();
 		state Promise<Void> p;
 		state EIO_STRUCT_STAT statdata;
 		state eio_req* r = eio_stat( filename.c_str(), 0, eio_callback, &p );
@ -377,7 +377,7 @@ private:

 	ACTOR template <class R> static Future<R> dispatch_impl( std::function<R()> func) {
 		state Dispatch<R> data( func );
-		state int taskID = g_network->getCurrentTask();
+		state TaskPriority taskID = g_network->getCurrentTask();

 		state eio_req* r = eio_custom( [](eio_req* req) {
 			// Runs on the eio thread pool
@ -418,7 +418,7 @@ private:
 	static void eio_want_poll() {
 		want_poll = 1;
 		// SOMEDAY: NULL for deferred error, no analysis of correctness (itp)
-		onMainThreadVoid([](){ poll_eio(); }, NULL, TaskPollEIO);
+		onMainThreadVoid([](){ poll_eio(); }, NULL, TaskPriority::PollEIO);
 	}

 	static int eio_callback( eio_req* req ) {
--- a/fdbrpc/AsyncFileKAIO.actor.h
+++ b/fdbrpc/AsyncFileKAIO.actor.h
@ -472,9 +472,9 @@ private:
 #endif
 		}

-		int getTask() const { return (prio>>32)+1; }
+		TaskPriority getTask() const { return static_cast<TaskPriority>((prio>>32)+1); }

-		ACTOR static void deliver( Promise<int> result, bool failed, int r, int task ) {
+		ACTOR static void deliver( Promise<int> result, bool failed, int r, TaskPriority task ) {
 			wait( delay(0, task) );
 			if (failed) result.sendError(io_timeout());
 			else if (r < 0) result.sendError(io_error());
@ -649,7 +649,7 @@ private:
 		loop {
 			wait(success(ev->read()));

-			wait(delay(0, TaskDiskIOComplete));
+			wait(delay(0, TaskPriority::DiskIOComplete));

 			linux_ioresult ev[FLOW_KNOBS->MAX_OUTSTANDING];
 			timespec tm; tm.tv_sec = 0; tm.tv_nsec = 0;
--- a/fdbrpc/AsyncFileNonDurable.actor.cpp
+++ b/fdbrpc/AsyncFileNonDurable.actor.cpp
@ -23,13 +23,13 @@

 std::map<std::string, Future<Void>> AsyncFileNonDurable::filesBeingDeleted;

-ACTOR Future<Void> sendOnProcess( ISimulator::ProcessInfo* process, Promise<Void> promise, int taskID ) {
+ACTOR Future<Void> sendOnProcess( ISimulator::ProcessInfo* process, Promise<Void> promise, TaskPriority taskID ) {
 	wait( g_simulator.onProcess( process, taskID ) );
 	promise.send(Void());
 	return Void();
 }

-ACTOR Future<Void> sendErrorOnProcess( ISimulator::ProcessInfo* process, Promise<Void> promise, Error e, int taskID ) {
+ACTOR Future<Void> sendErrorOnProcess( ISimulator::ProcessInfo* process, Promise<Void> promise, Error e, TaskPriority taskID ) {
 	wait( g_simulator.onProcess( process, taskID ) );
 	promise.sendError(e);
 	return Void();
--- a/fdbrpc/AsyncFileNonDurable.actor.h
+++ b/fdbrpc/AsyncFileNonDurable.actor.h
@ -38,8 +38,8 @@
 #undef max
 #undef min

-Future<Void> sendOnProcess( ISimulator::ProcessInfo* const& process, Promise<Void> const& promise, int const& taskID );
-Future<Void> sendErrorOnProcess( ISimulator::ProcessInfo* const& process, Promise<Void> const& promise, Error const& e, int const& taskID );
+ACTOR Future<Void> sendOnProcess( ISimulator::ProcessInfo* process, Promise<Void> promise, TaskPriority taskID );
+ACTOR Future<Void> sendErrorOnProcess( ISimulator::ProcessInfo* process, Promise<Void> promise, Error e, TaskPriority taskID );

 ACTOR template <class T> 
 Future<T> sendErrorOnShutdown( Future<T> in ) {
@ -198,7 +198,7 @@ public:
 	//Creates a new AsyncFileNonDurable which wraps the provided IAsyncFile
 	ACTOR static Future<Reference<IAsyncFile>> open(std::string filename, std::string actualFilename, Future<Reference<IAsyncFile>> wrappedFile, Reference<DiskParameters> diskParameters) {
 		state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
-		state int currentTaskID = g_network->getCurrentTask();
+		state TaskPriority currentTaskID = g_network->getCurrentTask();
 		state Future<Void> shutdown = success(currentProcess->shutdownSignal.getFuture());

 		//TraceEvent("AsyncFileNonDurableOpenBegin").detail("Filename", filename).detail("Addr", g_simulator.getCurrentProcess()->address);
@ -391,7 +391,7 @@ private:

 	ACTOR Future<int> read(AsyncFileNonDurable *self, void *data, int length, int64_t offset) {
 		state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
-		state int currentTaskID = g_network->getCurrentTask();
+		state TaskPriority currentTaskID = g_network->getCurrentTask();
 		wait( g_simulator.onMachine( currentProcess ) );

 		try {
@ -411,7 +411,7 @@ private:
 	//or none of the write.  It may also corrupt parts of sectors which have not been written correctly
 	ACTOR Future<Void> write(AsyncFileNonDurable *self, Promise<Void> writeStarted, Future<Future<Void>> ownFuture, void const* data, int length, int64_t offset) {
 		state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
-		state int currentTaskID = g_network->getCurrentTask();
+		state TaskPriority currentTaskID = g_network->getCurrentTask();
 		wait( g_simulator.onMachine( currentProcess ) );
 		
 		state double delayDuration = deterministicRandom()->random01() * self->maxWriteDelay;
@ -535,7 +535,7 @@ private:
 	//If a kill interrupts the delay, then the truncate may or may not be performed
 	ACTOR Future<Void> truncate(AsyncFileNonDurable *self, Promise<Void> truncateStarted, Future<Future<Void>> ownFuture, int64_t size) {
 		state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
-		state int currentTaskID = g_network->getCurrentTask();
+		state TaskPriority currentTaskID = g_network->getCurrentTask();
 		wait( g_simulator.onMachine( currentProcess ) );
 		
 		state double delayDuration = deterministicRandom()->random01() * self->maxWriteDelay;
@ -573,8 +573,8 @@ private:
 			}
 		}

-		if(g_network->check_yield(TaskDefaultYield)) {
-			wait(delay(0, TaskDefaultYield));
+		if(g_network->check_yield(TaskPriority::DefaultYield)) {
+			wait(delay(0, TaskPriority::DefaultYield));
 		}

 		//If performing a durable truncate, then pass it through to the file.  Otherwise, pass it through with a 1/2 chance
@ -663,7 +663,7 @@ private:

 	ACTOR Future<Void> sync(AsyncFileNonDurable *self, bool durable) {
 		state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
-		state int currentTaskID = g_network->getCurrentTask();
+		state TaskPriority currentTaskID = g_network->getCurrentTask();
 		wait( g_simulator.onMachine( currentProcess ) );

 		try {
@ -695,7 +695,7 @@ private:

 	ACTOR Future<int64_t> size(AsyncFileNonDurable *self) {
 		state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
-		state int currentTaskID = g_network->getCurrentTask();
+		state TaskPriority currentTaskID = g_network->getCurrentTask();

 		wait( g_simulator.onMachine( currentProcess ) );

@ -714,7 +714,7 @@ private:
 	//Finishes all outstanding actors on an AsyncFileNonDurable and then deletes it
 	ACTOR Future<Void> deleteFile(AsyncFileNonDurable *self) {
 		state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
-		state int currentTaskID = g_network->getCurrentTask();
+		state TaskPriority currentTaskID = g_network->getCurrentTask();
 		state std::string filename = self->filename;

 		wait( g_simulator.onMachine( currentProcess ) );
--- a/fdbrpc/FlowTests.actor.cpp
+++ b/fdbrpc/FlowTests.actor.cpp
@ -172,28 +172,28 @@ struct YieldMockNetwork : INetwork, ReferenceCounted<YieldMockNetwork> {
 		t.send(Void());
 	}

-	virtual Future<class Void> delay(double seconds, int taskID) {
+	virtual Future<class Void> delay(double seconds, TaskPriority taskID) {
 		return nextTick.getFuture();
 	}

-	virtual Future<class Void> yield(int taskID) {
+	virtual Future<class Void> yield(TaskPriority taskID) {
 		if (check_yield(taskID))
 			return delay(0,taskID);
 		return Void();
 	}

-	virtual bool check_yield(int taskID) {
+	virtual bool check_yield(TaskPriority taskID) {
 		if (nextYield > 0) --nextYield;
 		return nextYield == 0;
 	}

 	// Delegate everything else.  TODO: Make a base class NetworkWrapper for delegating everything in INetwork
-	virtual int getCurrentTask() { return baseNetwork->getCurrentTask(); }
-	virtual void setCurrentTask(int taskID) { baseNetwork->setCurrentTask(taskID); }
+	virtual TaskPriority getCurrentTask() { return baseNetwork->getCurrentTask(); }
+	virtual void setCurrentTask(TaskPriority taskID) { baseNetwork->setCurrentTask(taskID); }
 	virtual double now() { return baseNetwork->now(); }
 	virtual void stop() { return baseNetwork->stop(); }
 	virtual bool isSimulated() const { return baseNetwork->isSimulated(); }
-	virtual void onMainThread(Promise<Void>&& signal, int taskID) { return baseNetwork->onMainThread(std::move(signal), taskID); }
+	virtual void onMainThread(Promise<Void>&& signal, TaskPriority taskID) { return baseNetwork->onMainThread(std::move(signal), taskID); }
 	bool isOnMainThread() const override { return baseNetwork->isOnMainThread(); }
 	virtual THREAD_HANDLE startThread(THREAD_FUNC_RETURN(*func) (void *), void *arg) { return baseNetwork->startThread(func,arg); }
 	virtual Future< Reference<class IAsyncFile> > open(std::string filename, int64_t flags, int64_t mode) { return IAsyncFileSystem::filesystem()->open(filename,flags,mode); }
--- a/fdbrpc/FlowTransport.actor.cpp
+++ b/fdbrpc/FlowTransport.actor.cpp
@ -50,9 +50,9 @@ const uint64_t TOKEN_STREAM_FLAG = 1;
 class EndpointMap : NonCopyable {
 public:
 	EndpointMap();
-	void insert( NetworkMessageReceiver* r, Endpoint::Token& token, uint32_t priority );
+	void insert( NetworkMessageReceiver* r, Endpoint::Token& token, TaskPriority priority );
 	NetworkMessageReceiver* get( Endpoint::Token const& token );
-	uint32_t getPriority( Endpoint::Token const& token );
+	TaskPriority getPriority( Endpoint::Token const& token );
 	void remove( Endpoint::Token const& token, NetworkMessageReceiver* r );

 private:
@ -86,12 +86,12 @@ void EndpointMap::realloc() {
 	firstFree = oldSize;
 }

-void EndpointMap::insert( NetworkMessageReceiver* r, Endpoint::Token& token, uint32_t priority ) {
+void EndpointMap::insert( NetworkMessageReceiver* r, Endpoint::Token& token, TaskPriority priority ) {
 	if (firstFree == uint32_t(-1)) realloc();
 	int index = firstFree;
 	firstFree = data[index].nextFree;
 	token = Endpoint::Token( token.first(), (token.second()&0xffffffff00000000LL) | index );
-	data[index].token() = Endpoint::Token( token.first(), (token.second()&0xffffffff00000000LL) | priority );
+	data[index].token() = Endpoint::Token( token.first(), (token.second()&0xffffffff00000000LL) | static_cast<uint32_t>(priority) );
 	data[index].receiver = r;
 }

@ -102,11 +102,11 @@ NetworkMessageReceiver* EndpointMap::get( Endpoint::Token const& token ) {
 	return 0;
 }

-uint32_t EndpointMap::getPriority( Endpoint::Token const& token ) {
+TaskPriority EndpointMap::getPriority( Endpoint::Token const& token ) {
 	uint32_t index = token.second();
 	if ( index < data.size() && data[index].token().first() == token.first() && ((data[index].token().second()&0xffffffff00000000LL)|index)==token.second() )
-		return data[index].token().second();
-	return TaskUnknownEndpoint;
+		return static_cast<TaskPriority>(data[index].token().second());
+	return TaskPriority::UnknownEndpoint;
 }

 void EndpointMap::remove( Endpoint::Token const& token, NetworkMessageReceiver* r ) {
@ -122,7 +122,7 @@ struct EndpointNotFoundReceiver : NetworkMessageReceiver {
 	EndpointNotFoundReceiver(EndpointMap& endpoints) {
 		//endpoints[WLTOKEN_ENDPOINT_NOT_FOUND] = this;
 		Endpoint::Token e = WLTOKEN_ENDPOINT_NOT_FOUND;
-		endpoints.insert(this, e, TaskDefaultEndpoint);
+		endpoints.insert(this, e, TaskPriority::DefaultEndpoint);
 		ASSERT( e == WLTOKEN_ENDPOINT_NOT_FOUND );
 	}
 	virtual void receive( ArenaReader& reader ) {
@ -141,7 +141,7 @@ struct EndpointNotFoundReceiver : NetworkMessageReceiver {
 struct PingReceiver : NetworkMessageReceiver {
 	PingReceiver(EndpointMap& endpoints) {
 		Endpoint::Token e = WLTOKEN_PING_PACKET;
-		endpoints.insert(this, e, TaskReadSocket);
+		endpoints.insert(this, e, TaskPriority::ReadSocket);
 		ASSERT( e == WLTOKEN_PING_PACKET );
 	}
 	virtual void receive( ArenaReader& reader ) {
@ -305,11 +305,12 @@ struct Peer : NonCopyable {
 	int peerReferences;
 	bool incompatibleProtocolVersionNewer;
 	int64_t bytesReceived;
+	double lastDataPacketSentTime;

-	explicit Peer( TransportData* transport, NetworkAddress const& destination )
-		: transport(transport), destination(destination), outgoingConnectionIdle(false), lastConnectTime(0.0), reconnectionDelay(FLOW_KNOBS->INITIAL_RECONNECTION_TIME), 
-		  compatible(true), incompatibleProtocolVersionNewer(false), peerReferences(-1), bytesReceived(0)
-	{
+	explicit Peer(TransportData* transport, NetworkAddress const& destination)
+	  : transport(transport), destination(destination), outgoingConnectionIdle(false), lastConnectTime(0.0),
+	    reconnectionDelay(FLOW_KNOBS->INITIAL_RECONNECTION_TIME), compatible(true),
+	    incompatibleProtocolVersionNewer(false), peerReferences(-1), bytesReceived(0), lastDataPacketSentTime(now()) {
 		connect = connectionKeeper(this);
 	}

@ -396,19 +397,44 @@ struct Peer : NonCopyable {
 	}

 	ACTOR static Future<Void> connectionMonitor( Peer *peer ) {
-		state RequestStream< ReplyPromise<Void> > remotePing( Endpoint( {peer->destination}, WLTOKEN_PING_PACKET ) );
-
+		state Endpoint remotePingEndpoint({ peer->destination }, WLTOKEN_PING_PACKET);
 		loop {
-			if(peer->peerReferences == 0 && peer->reliable.empty() && peer->unsent.empty()) {
-				throw connection_unreferenced();
+			if (!FlowTransport::transport().isClient() && !peer->destination.isPublic()) {
+				// Don't send ping messages to clients unless necessary. Instead monitor incoming client pings.
+				state double lastRefreshed = now();
+				state int64_t lastBytesReceived = peer->bytesReceived;
+				loop {
+					wait(delay(FLOW_KNOBS->CONNECTION_MONITOR_LOOP_TIME));
+					if (lastBytesReceived < peer->bytesReceived) {
+						lastRefreshed = now();
+						lastBytesReceived = peer->bytesReceived;
+					} else if (lastRefreshed < now() - FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT *
+							   FLOW_KNOBS->CONNECTION_MONITOR_INCOMING_IDLE_MULTIPLIER) {
+						// If we have not received anything in this period, client must have closed
+						// connection by now. Break loop to check if it is still alive by sending a ping.
+						break;
+					}
+				}
 			}

-			wait( delayJittered( FLOW_KNOBS->CONNECTION_MONITOR_LOOP_TIME ) );
+			if (peer->reliable.empty() && peer->unsent.empty()) {
+				if (peer->peerReferences == 0 &&
+				    (peer->lastDataPacketSentTime < now() - FLOW_KNOBS->CONNECTION_MONITOR_UNREFERENCED_CLOSE_DELAY)) {
+					// TODO: What about when peerReference == -1?
+					throw connection_unreferenced();
+				} else if (FlowTransport::transport().isClient() && peer->destination.isPublic() &&
+				           (peer->lastConnectTime < now() - FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT) &&
+				           (peer->lastDataPacketSentTime < now() - FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT)) {
+					// First condition is necessary because we may get here if we are server.
+					throw connection_idle();
+				}
+			}

-			// SOMEDAY: Stop monitoring and close the connection after a long period of inactivity with no reliable or onDisconnect requests outstanding
+			wait (delayJittered(FLOW_KNOBS->CONNECTION_MONITOR_LOOP_TIME));

+			// TODO: Stop monitoring and close the connection with no onDisconnect requests outstanding
 			state ReplyPromise<Void> reply;
-			FlowTransport::transport().sendUnreliable( SerializeSource<ReplyPromise<Void>>(reply), remotePing.getEndpoint() );
+			FlowTransport::transport().sendUnreliable( SerializeSource<ReplyPromise<Void>>(reply), remotePingEndpoint );
 			state int64_t startingBytes = peer->bytesReceived;
 			state int timeouts = 0;
 			loop {
@ -419,7 +445,10 @@ struct Peer : NonCopyable {
 							throw connection_failed();
 						}
 						if(timeouts > 1) {
-							TraceEvent(SevWarnAlways, "ConnectionSlowPing").suppressFor(1.0).detail("WithAddr", peer->destination).detail("Timeouts", timeouts);
+							TraceEvent(SevWarnAlways, "ConnectionSlowPing")
+							    .suppressFor(1.0)
+							    .detail("WithAddr", peer->destination)
+							    .detail("Timeouts", timeouts);
 						}
 						startingBytes = peer->bytesReceived;
 						timeouts++;
@ -438,10 +467,10 @@ struct Peer : NonCopyable {
 	ACTOR static Future<Void> connectionWriter( Peer* self, Reference<IConnection> conn ) {
 		state double lastWriteTime = now();
 		loop {
-			//wait( delay(0, TaskWriteSocket) );
-			wait( delayJittered(std::max<double>(FLOW_KNOBS->MIN_COALESCE_DELAY, FLOW_KNOBS->MAX_COALESCE_DELAY - (now() - lastWriteTime)), TaskWriteSocket) );
-			//wait( delay(500e-6, TaskWriteSocket) );
-			//wait( yield(TaskWriteSocket) );
+			//wait( delay(0, TaskPriority::WriteSocket) );
+			wait( delayJittered(std::max<double>(FLOW_KNOBS->MIN_COALESCE_DELAY, FLOW_KNOBS->MAX_COALESCE_DELAY - (now() - lastWriteTime)), TaskPriority::WriteSocket) );
+			//wait( delay(500e-6, TaskPriority::WriteSocket) );
+			//wait( yield(TaskPriority::WriteSocket) );

 			// Send until there is nothing left to send
 			loop {
@ -456,7 +485,7 @@ struct Peer : NonCopyable {

 				TEST(true); // We didn't write everything, so apparently the write buffer is full.  Wait for it to be nonfull.
 				wait( conn->onWritable() );
-				wait( yield(TaskWriteSocket) );
+				wait( yield(TaskPriority::WriteSocket) );
 			}

 			// Wait until there is something to send
@ -550,14 +579,21 @@ struct Peer : NonCopyable {
 				self->discardUnreliablePackets();
 				reader = Future<Void>();
 				bool ok = e.code() == error_code_connection_failed || e.code() == error_code_actor_cancelled ||
-				          e.code() == error_code_connection_unreferenced ||
+				          e.code() == error_code_connection_unreferenced || e.code() == error_code_connection_idle ||
 				          (g_network->isSimulated() && e.code() == error_code_checksum_failed);

 				if(self->compatible) {
-					TraceEvent(ok ? SevInfo : SevWarnAlways, "ConnectionClosed", conn ? conn->getDebugID() : UID()).error(e, true).suppressFor(1.0).detail("PeerAddr", self->destination);
+					TraceEvent(ok ? SevInfo : SevWarnAlways, "ConnectionClosed", conn ? conn->getDebugID() : UID())
+					    .error(e, true)
+					    .suppressFor(1.0)
+					    .detail("PeerAddr", self->destination);
 				}
 				else {
-					TraceEvent(ok ? SevInfo : SevWarnAlways, "IncompatibleConnectionClosed", conn ? conn->getDebugID() : UID()).error(e, true).suppressFor(1.0).detail("PeerAddr", self->destination);
+					TraceEvent(ok ? SevInfo : SevWarnAlways, "IncompatibleConnectionClosed",
+					           conn ? conn->getDebugID() : UID())
+					    .error(e, true)
+					    .suppressFor(1.0)
+					    .detail("PeerAddr", self->destination);
 				}

 				if(self->destination.isPublic() && IFailureMonitor::failureMonitor().getState(self->destination).isAvailable()) {
@ -565,20 +601,25 @@ struct Peer : NonCopyable {
 					if(now() - it.second > FLOW_KNOBS->TOO_MANY_CONNECTIONS_CLOSED_RESET_DELAY) {
 						it.first = now();
 					} else if(now() - it.first > FLOW_KNOBS->TOO_MANY_CONNECTIONS_CLOSED_TIMEOUT) {
-						TraceEvent(SevWarnAlways, "TooManyConnectionsClosed", conn ? conn->getDebugID() : UID()).suppressFor(5.0).detail("PeerAddr", self->destination);
+						TraceEvent(SevWarnAlways, "TooManyConnectionsClosed", conn ? conn->getDebugID() : UID())
+						    .suppressFor(5.0)
+						    .detail("PeerAddr", self->destination);
 						self->transport->degraded->set(true);
 					}
 					it.second = now();
 				}

 				if (conn) {
-					if (FlowTransport::transport().isClient()) {
+					if (FlowTransport::transport().isClient() && e.code() != error_code_connection_idle) {
 						clientReconnectDelay = true;
 					}
 					conn->close();
 					conn = Reference<IConnection>();
 				}
-				IFailureMonitor::failureMonitor().notifyDisconnect( self->destination );  //< Clients might send more packets in response, which needs to go out on the next connection
+
+				// Clients might send more packets in response, which needs to go out on the next connection
+				IFailureMonitor::failureMonitor().notifyDisconnect( self->destination );
+
 				if (e.code() == error_code_actor_cancelled) throw;
 				// Try to recover, even from serious errors, by retrying

@ -602,8 +643,8 @@ TransportData::~TransportData() {
 }

 ACTOR static void deliver(TransportData* self, Endpoint destination, ArenaReader reader, bool inReadSocket) {
-	int priority = self->endpoints.getPriority(destination.token);
-	if (priority < TaskReadSocket || !inReadSocket) {
+	TaskPriority priority = self->endpoints.getPriority(destination.token);
+	if (priority < TaskPriority::ReadSocket || !inReadSocket) {
 		wait( delay(0, priority) );
 	} else {
 		g_network->setCurrentTask( priority );
@ -637,7 +678,7 @@ ACTOR static void deliver(TransportData* self, Endpoint destination, ArenaReader
 	}

 	if( inReadSocket )
-		g_network->setCurrentTask( TaskReadSocket );
+		g_network->setCurrentTask( TaskPriority::ReadSocket );
 }

 static void scanPackets(TransportData* transport, uint8_t*& unprocessed_begin, const uint8_t* e, Arena& arena,
@ -797,7 +838,7 @@ ACTOR static Future<Void> connectionReader(
 					if (len == 0) break;
 					state int readBytes = conn->read(unprocessed_end, unprocessed_end + len);
 					if (readBytes == 0) break;
-					wait(yield(TaskReadSocket));
+					wait(yield(TaskPriority::ReadSocket));
 					totalReadBytes += readBytes;
 					unprocessed_end += readBytes;
 				}
@ -908,11 +949,11 @@ ACTOR static Future<Void> connectionReader(
 				if (readWillBlock)
 					break;

-				wait(yield(TaskReadSocket));
+				wait(yield(TaskPriority::ReadSocket));
 			}

 			wait( conn->onReadable() );
-			wait(delay(0, TaskReadSocket));  // We don't want to call conn->read directly from the reactor - we could get stuck in the reactor reading 1 packet at a time
+			wait(delay(0, TaskPriority::ReadSocket));  // We don't want to call conn->read directly from the reactor - we could get stuck in the reactor reading 1 packet at a time
 		}
 	}
 	catch (Error& e) {
@ -956,7 +997,7 @@ ACTOR static Future<Void> listen( TransportData* self, NetworkAddress listenAddr
 				.detail("FromAddress", conn->getPeerAddress())
 				.detail("ListenAddress", listenAddr.toString());
 			incoming.add( connectionIncoming(self, conn) );
-			wait(delay(0) || delay(FLOW_KNOBS->CONNECTION_ACCEPT_DELAY, TaskWriteSocket));
+			wait(delay(0) || delay(FLOW_KNOBS->CONNECTION_ACCEPT_DELAY, TaskPriority::WriteSocket));
 		}
 	} catch (Error& e) {
 		TraceEvent(SevError, "ListenError").error(e);
@ -1047,12 +1088,12 @@ Endpoint FlowTransport::loadedEndpoint( const UID& token ) {
 	return Endpoint(g_currentDeliveryPeerAddress, token);
 }

-void FlowTransport::addPeerReference( const Endpoint& endpoint, NetworkMessageReceiver* receiver ) {
-	if (FlowTransport::transport().isClient()) {
+void FlowTransport::addPeerReference(const Endpoint& endpoint, bool isStream) {
+	if (!isStream || !endpoint.getPrimaryAddress().isValid())
+		return;
+	else if (FlowTransport::transport().isClient())
 		IFailureMonitor::failureMonitor().setStatus(endpoint.getPrimaryAddress(), FailureStatus(false));
-	}

-	if (!receiver->isStream() || !endpoint.getPrimaryAddress().isValid()) return;
 	Peer* peer = self->getPeer(endpoint.getPrimaryAddress());
 	if(peer->peerReferences == -1) {
 		peer->peerReferences = 1;
@ -1061,8 +1102,8 @@ void FlowTransport::addPeerReference( const Endpoint& endpoint, NetworkMessageRe
 	}
 }

-void FlowTransport::removePeerReference( const Endpoint& endpoint, NetworkMessageReceiver* receiver ) {
-	if (!receiver->isStream() || !endpoint.getPrimaryAddress().isValid()) return;
+void FlowTransport::removePeerReference(const Endpoint& endpoint, bool isStream) {
+	if (!isStream || !endpoint.getPrimaryAddress().isValid()) return;
 	Peer* peer = self->getPeer(endpoint.getPrimaryAddress(), false);
 	if(peer) {
 		peer->peerReferences--;
@ -1078,7 +1119,7 @@ void FlowTransport::removePeerReference( const Endpoint& endpoint, NetworkMessag
 	}
 }

-void FlowTransport::addEndpoint( Endpoint& endpoint, NetworkMessageReceiver* receiver, uint32_t taskID ) {
+void FlowTransport::addEndpoint( Endpoint& endpoint, NetworkMessageReceiver* receiver, TaskPriority taskID ) {
 	endpoint.token = deterministicRandom()->randomUniqueID();
 	if (receiver->isStream()) {
 		endpoint.addresses = self->localAddresses;
@ -1094,7 +1135,7 @@ void FlowTransport::removeEndpoint( const Endpoint& endpoint, NetworkMessageRece
 	self->endpoints.remove(endpoint.token, receiver);
 }

-void FlowTransport::addWellKnownEndpoint( Endpoint& endpoint, NetworkMessageReceiver* receiver, uint32_t taskID ) {
+void FlowTransport::addWellKnownEndpoint( Endpoint& endpoint, NetworkMessageReceiver* receiver, TaskPriority taskID ) {
 	endpoint.addresses = self->localAddresses;
 	ASSERT( ((endpoint.token.first() & TOKEN_STREAM_FLAG)!=0) == receiver->isStream() );
 	Endpoint::Token otoken = endpoint.token;
@ -1213,7 +1254,9 @@ static PacketID sendPacket( TransportData* self, ISerializeSource const& what, c
 #endif

 		peer->send(pb, rp, firstUnsent);
-
+		if (destination.token != WLTOKEN_PING_PACKET) {
+			peer->lastDataPacketSentTime = now();
+		}
 		return (PacketID)rp;
 	}
 }
--- a/fdbrpc/FlowTransport.h
+++ b/fdbrpc/FlowTransport.h
@ -132,19 +132,19 @@ public:
 	std::map<NetworkAddress, std::pair<uint64_t, double>>* getIncompatiblePeers();
 	// Returns the same of all peers that have attempted to connect, but have incompatible protocol versions

-	void addPeerReference( const Endpoint&, NetworkMessageReceiver* );
+	void addPeerReference(const Endpoint&, bool isStream);
 	// Signal that a peer connection is being used, even if no messages are currently being sent to the peer

-	void removePeerReference( const Endpoint&, NetworkMessageReceiver* );
+	void removePeerReference(const Endpoint&, bool isStream);
 	// Signal that a peer connection is no longer being used

-	void addEndpoint( Endpoint& endpoint, NetworkMessageReceiver*, uint32_t taskID );
+	void addEndpoint( Endpoint& endpoint, NetworkMessageReceiver*, TaskPriority taskID );
 	// Sets endpoint to be a new local endpoint which delivers messages to the given receiver

 	void removeEndpoint( const Endpoint&, NetworkMessageReceiver* );
 	// The given local endpoint no longer delivers messages to the given receiver or uses resources

-	void addWellKnownEndpoint( Endpoint& endpoint, NetworkMessageReceiver*, uint32_t taskID );
+	void addWellKnownEndpoint( Endpoint& endpoint, NetworkMessageReceiver*, TaskPriority taskID );
 	// Sets endpoint to a new local endpoint (without changing its token) which delivers messages to the given receiver
 	// Implementations may have limitations on when this function is called and what endpoint.token may be!

--- a/fdbrpc/LoadBalance.actor.h
+++ b/fdbrpc/LoadBalance.actor.h
@ -179,7 +179,7 @@ Future< REPLY_TYPE(Request) > loadBalance(
 	Reference<MultiInterface<Multi>> alternatives,
 	RequestStream<Request> Interface::* channel,
 	Request request = Request(),
-	int taskID = TaskDefaultPromiseEndpoint,
+	TaskPriority taskID = TaskPriority::DefaultPromiseEndpoint,
 	bool atMostOnce = false, // if true, throws request_maybe_delivered() instead of retrying automatically
 	QueueModel* model = NULL) 
 {
--- a/fdbrpc/ReplicationPolicy.h
+++ b/fdbrpc/ReplicationPolicy.h
@ -70,6 +70,13 @@ struct IReplicationPolicy : public ReferenceCounted<IReplicationPolicy> {
 		return keys;
 	}
 	virtual void attributeKeys(std::set<std::string>*) const = 0;
+
+	// For flatbuffers, IReplicationPolicy is just encoded as a string using
+	// |serializeReplicationPolicy|. |writer| is a member of IReplicationPolicy
+	// so that this string outlives all calls to
+	// dynamic_size_traits<Reference<IReplicationPolicy>>::save
+	mutable BinaryWriter writer{ IncludeVersion() };
+	mutable bool alreadyWritten = false;
 };

 template <class Archive>
@ -276,12 +283,28 @@ void serializeReplicationPolicy(Ar& ar, Reference<IReplicationPolicy>& policy) {

 template <>
 struct dynamic_size_traits<Reference<IReplicationPolicy>> : std::true_type {
-	static WriteRawMemory save(const Reference<IReplicationPolicy>& value) {
-		BinaryWriter writer(IncludeVersion());
-		serializeReplicationPolicy(writer, const_cast<Reference<IReplicationPolicy>&>(value));
-		std::unique_ptr<uint8_t[]> memory(new uint8_t[writer.getLength()]);
-		memcpy(memory.get(), writer.getData(), writer.getLength());
-		return std::make_pair<OwnershipErasedPtr<const uint8_t>, size_t>(ownedPtr(const_cast<const uint8_t*>(memory.release())), writer.getLength());
+	static Block save(const Reference<IReplicationPolicy>& value) {
+		if (value.getPtr() == nullptr) {
+			static BinaryWriter writer{ IncludeVersion() };
+			writer = BinaryWriter{ IncludeVersion() };
+			serializeReplicationPolicy(writer, const_cast<Reference<IReplicationPolicy>&>(value));
+			return unownedPtr(const_cast<const uint8_t*>(reinterpret_cast<uint8_t*>(writer.getData())),
+			                  writer.getLength());
+		}
+		if (!value->alreadyWritten) {
+			serializeReplicationPolicy(value->writer, const_cast<Reference<IReplicationPolicy>&>(value));
+			value->alreadyWritten = true;
+		}
+		return unownedPtr(const_cast<const uint8_t*>(reinterpret_cast<uint8_t*>(value->writer.getData())),
+		                  value->writer.getLength());
+	}
+
+	static void serialization_done(const Reference<IReplicationPolicy>& value) {
+		if (value.getPtr() == nullptr) {
+			return;
+		}
+		value->alreadyWritten = false;
+		value->writer = BinaryWriter{ IncludeVersion() };
 	}

 	// Context is an arbitrary type that is plumbed by reference throughout the
@ -294,5 +317,6 @@ struct dynamic_size_traits<Reference<IReplicationPolicy>> : std::true_type {
 	}
 };

+static_assert(detail::has_serialization_done<dynamic_size_traits<Reference<IReplicationPolicy>>>::value);

 #endif
--- a/fdbrpc/batcher.actor.h
+++ b/fdbrpc/batcher.actor.h
@ -47,7 +47,7 @@ bool firstInBatch(CommitTransactionRequest x) {
 }

 ACTOR template <class X>
-Future<Void> batcher(PromiseStream<std::pair<std::vector<X>, int> > out, FutureStream<X> in, double avgMinDelay, double* avgMaxDelay, double emptyBatchTimeout, int maxCount, int desiredBytes, int maxBytes, Optional<PromiseStream<Void>> batchStartedStream, int64_t *commitBatchesMemBytesCount, int64_t commitBatchesMemBytesLimit, int taskID = TaskDefaultDelay, Counter* counter = 0)
+Future<Void> batcher(PromiseStream<std::pair<std::vector<X>, int> > out, FutureStream<X> in, double avgMinDelay, double* avgMaxDelay, double emptyBatchTimeout, int maxCount, int desiredBytes, int maxBytes, Optional<PromiseStream<Void>> batchStartedStream, int64_t *commitBatchesMemBytesCount, int64_t commitBatchesMemBytesLimit, TaskPriority taskID = TaskPriority::DefaultDelay, Counter* counter = 0)
 {
 	wait( delayJittered(*avgMaxDelay, taskID) );  // smooth out
 	// This is set up to deliver even zero-size batches if emptyBatchTimeout elapses, because that's what master proxy wants.  The source control history
--- a/fdbrpc/fdbrpc.h
+++ b/fdbrpc/fdbrpc.h
@ -31,15 +31,19 @@
 struct FlowReceiver : private NetworkMessageReceiver {
 	// Common endpoint code for NetSAV<> and NetNotifiedQueue<>

-	FlowReceiver() : m_isLocalEndpoint(false) {}
-	FlowReceiver(Endpoint const& remoteEndpoint) : endpoint(remoteEndpoint), m_isLocalEndpoint(false) {
-		FlowTransport::transport().addPeerReference(endpoint, this);
+	FlowReceiver() : m_isLocalEndpoint(false), m_stream(false) {
 	}
+
+	FlowReceiver(Endpoint const& remoteEndpoint, bool stream)
+	  : endpoint(remoteEndpoint), m_isLocalEndpoint(false), m_stream(stream) {
+		FlowTransport::transport().addPeerReference(endpoint, m_stream);
+	}
+
 	~FlowReceiver() {
 		if (m_isLocalEndpoint) {
 			FlowTransport::transport().removeEndpoint(endpoint, this);
 		} else {
-			FlowTransport::transport().removePeerReference(endpoint, this);
+			FlowTransport::transport().removePeerReference(endpoint, m_stream);
 		}
 	}

@ -48,7 +52,7 @@ struct FlowReceiver : private NetworkMessageReceiver {

 	// If already a remote endpoint, returns that.  Otherwise makes this
 	//   a local endpoint and returns that.
-	const Endpoint& getEndpoint(int taskID) {
+	const Endpoint& getEndpoint(TaskPriority taskID) {
 		if (!endpoint.isValid()) {
 			m_isLocalEndpoint = true;
 			FlowTransport::transport().addEndpoint(endpoint, this, taskID);
@ -56,16 +60,17 @@ struct FlowReceiver : private NetworkMessageReceiver {
 		return endpoint;
 	}

-	void makeWellKnownEndpoint(Endpoint::Token token, int taskID) {
+	void makeWellKnownEndpoint(Endpoint::Token token, TaskPriority taskID) {
 		ASSERT(!endpoint.isValid());
 		m_isLocalEndpoint = true;
 		endpoint.token = token;
 		FlowTransport::transport().addWellKnownEndpoint(endpoint, this, taskID);
 	}

-protected:
+private:
 	Endpoint endpoint;
 	bool m_isLocalEndpoint;
+	bool m_stream;
 };

 template <class T>
@ -74,7 +79,9 @@ struct NetSAV : SAV<T>, FlowReceiver, FastAllocated<NetSAV<T>> {
 	using FastAllocated<NetSAV<T>>::operator delete;

 	NetSAV(int futures, int promises) : SAV<T>(futures, promises) {}
-	NetSAV(int futures, int promises, const Endpoint& remoteEndpoint) : SAV<T>(futures, promises), FlowReceiver(remoteEndpoint) {}
+	NetSAV(int futures, int promises, const Endpoint& remoteEndpoint)
+	  : SAV<T>(futures, promises), FlowReceiver(remoteEndpoint, false) {
+	}

 	virtual void destroy() { delete this; }
 	virtual void receive(ArenaReader& reader) {
@ -128,7 +135,7 @@ public:
 	~ReplyPromise() { if (sav) sav->delPromiseRef(); }

 	ReplyPromise(const Endpoint& endpoint) : sav(new NetSAV<T>(0, 1, endpoint)) {}
-	const Endpoint& getEndpoint(int taskID = TaskDefaultPromiseEndpoint) const { return sav->getEndpoint(taskID); }
+	const Endpoint& getEndpoint(TaskPriority taskID = TaskPriority::DefaultPromiseEndpoint) const { return sav->getEndpoint(taskID); }

 	void operator=(const ReplyPromise& rhs) {
 		if (rhs.sav) rhs.sav->addPromiseRef();
@ -204,19 +211,19 @@ template <class Reply>
 void resetReply(ReplyPromise<Reply> & p) { p.reset(); }

 template <class Request>
-void resetReply(Request& r, int taskID) { r.reply.reset(); r.reply.getEndpoint(taskID); }
+void resetReply(Request& r, TaskPriority taskID) { r.reply.reset(); r.reply.getEndpoint(taskID); }

 template <class Reply>
-void resetReply(ReplyPromise<Reply> & p, int taskID) { p.reset(); p.getEndpoint(taskID); }
+void resetReply(ReplyPromise<Reply> & p, TaskPriority taskID) { p.reset(); p.getEndpoint(taskID); }

 template <class Request>
-void setReplyPriority(Request& r, int taskID) { r.reply.getEndpoint(taskID); }
+void setReplyPriority(Request& r, TaskPriority taskID) { r.reply.getEndpoint(taskID); }

 template <class Reply>
-void setReplyPriority(ReplyPromise<Reply> & p, int taskID) { p.getEndpoint(taskID); }
+void setReplyPriority(ReplyPromise<Reply> & p, TaskPriority taskID) { p.getEndpoint(taskID); }

 template <class Reply>
-void setReplyPriority(const ReplyPromise<Reply> & p, int taskID) { p.getEndpoint(taskID); }
+void setReplyPriority(const ReplyPromise<Reply> & p, TaskPriority taskID) { p.getEndpoint(taskID); }



@ -228,7 +235,8 @@ struct NetNotifiedQueue : NotifiedQueue<T>, FlowReceiver, FastAllocated<NetNotif
 	using FastAllocated<NetNotifiedQueue<T>>::operator delete;

 	NetNotifiedQueue(int futures, int promises) : NotifiedQueue<T>(futures, promises) {}
-	NetNotifiedQueue(int futures, int promises, const Endpoint& remoteEndpoint) : NotifiedQueue<T>(futures, promises), FlowReceiver(remoteEndpoint) {}
+	NetNotifiedQueue(int futures, int promises, const Endpoint& remoteEndpoint)
+	  : NotifiedQueue<T>(futures, promises), FlowReceiver(remoteEndpoint, true) {}

 	virtual void destroy() { delete this; }
 	virtual void receive(ArenaReader& reader) {
@ -281,7 +289,7 @@ public:
 		return reportEndpointFailure(getReplyPromise(value).getFuture(), getEndpoint());
 	}
 	template <class X>
-	Future<REPLY_TYPE(X)> getReply(const X& value, int taskID) const {
+	Future<REPLY_TYPE(X)> getReply(const X& value, TaskPriority taskID) const {
 		setReplyPriority(value, taskID);
 		return getReply(value);
 	}
@ -290,7 +298,7 @@ public:
 		return getReply(ReplyPromise<X>());
 	}
 	template <class X>
-	Future<X> getReplyWithTaskID(int taskID) const {
+	Future<X> getReplyWithTaskID(TaskPriority taskID) const {
 		ReplyPromise<X> reply;
 		reply.getEndpoint(taskID);
 		return getReply(reply);
@ -302,7 +310,7 @@ public:
 	//   If cancelled or returns failure, request was or will be delivered zero or one times.
 	//   The caller must be capable of retrying if this request returns failure
 	template <class X>
-	Future<ErrorOr<REPLY_TYPE(X)>> tryGetReply(const X& value, int taskID) const {
+	Future<ErrorOr<REPLY_TYPE(X)>> tryGetReply(const X& value, TaskPriority taskID) const {
 		setReplyPriority(value, taskID);
 		if (queue->isRemoteEndpoint()) {
 			Future<Void> disc = makeDependent<T>(IFailureMonitor::failureMonitor()).onDisconnectOrFailure(getEndpoint(taskID));
@ -344,7 +352,7 @@ public:
 	//   If it returns failure, the failure detector considers the endpoint failed permanently or for the given amount of time
 	//   See IFailureMonitor::onFailedFor() for an explanation of the duration and slope parameters.
 	template <class X>
-	Future<ErrorOr<REPLY_TYPE(X)>> getReplyUnlessFailedFor(const X& value, double sustainedFailureDuration, double sustainedFailureSlope, int taskID) const {
+	Future<ErrorOr<REPLY_TYPE(X)>> getReplyUnlessFailedFor(const X& value, double sustainedFailureDuration, double sustainedFailureSlope, TaskPriority taskID) const {
 		// If it is local endpoint, no need for failure monitoring
 		return waitValueOrSignal(getReply(value, taskID),
 				makeDependent<T>(IFailureMonitor::failureMonitor()).onFailedFor(getEndpoint(taskID), sustainedFailureDuration, sustainedFailureSlope),
@ -388,8 +396,8 @@ public:
 		//queue = (NetNotifiedQueue<T>*)0xdeadbeef;
 	}

-	Endpoint getEndpoint(int taskID = TaskDefaultEndpoint) const { return queue->getEndpoint(taskID); }
-	void makeWellKnownEndpoint(Endpoint::Token token, int taskID) {
+	Endpoint getEndpoint(TaskPriority taskID = TaskPriority::DefaultEndpoint) const { return queue->getEndpoint(taskID); }
+	void makeWellKnownEndpoint(Endpoint::Token token, TaskPriority taskID) {
 		queue->makeWellKnownEndpoint(token, taskID);
 	}

@ -425,7 +433,10 @@ struct serializable_traits<RequestStream<T>> : std::true_type {
 		} else {
 			const auto& ep = stream.getEndpoint();
 			serializer(ar, ep);
-			UNSTOPPABLE_ASSERT(ep.getPrimaryAddress().isValid());  // No serializing PromiseStreams on a client with no public address
+			if constexpr (Archiver::isSerializing) { // Don't assert this when collecting vtable for flatbuffers
+				UNSTOPPABLE_ASSERT(ep.getPrimaryAddress()
+				                       .isValid()); // No serializing PromiseStreams on a client with no public address
+			}
 		}
 	}
 };
--- a/fdbrpc/genericactors.actor.h
+++ b/fdbrpc/genericactors.actor.h
@ -50,7 +50,7 @@ Future<REPLY_TYPE(Req)> retryBrokenPromise( RequestStream<Req> to, Req request )
 }

 ACTOR template <class Req>
-Future<REPLY_TYPE(Req)> retryBrokenPromise( RequestStream<Req> to, Req request, int taskID ) {
+Future<REPLY_TYPE(Req)> retryBrokenPromise( RequestStream<Req> to, Req request, TaskPriority taskID ) {
 	// Like to.getReply(request), except that a broken_promise exception results in retrying request immediately.
 	// Suitable for use with well known endpoints, which are likely to return to existence after the other process restarts.
 	// Not normally useful for ordinary endpoints, which conventionally are permanently destroyed after replying with broken_promise.
--- a/fdbrpc/sim2.actor.cpp
+++ b/fdbrpc/sim2.actor.cpp
@ -380,9 +380,17 @@ private:

 	ACTOR static Future<Void> trackLeakedConnection( Sim2Conn* self ) {
 		wait( g_simulator.onProcess( self->process ) );
-		// SOMEDAY: Make this value variable? Dependent on buggification status?
-		wait( delay( 20.0 ) );
-		TraceEvent(SevError, "LeakedConnection", self->dbgid).error(connection_leaked()).detail("MyAddr", self->process->address).detail("PeerAddr", self->peerEndpoint).detail("PeerId", self->peerId).detail("Opened", self->opened);
+		if (self->process->address.isPublic()) {
+			wait( delay( FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT * FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT * 1.5 ) );
+		} else {
+			wait( delay( FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT * 1.5 ) );
+		}
+		TraceEvent(SevError, "LeakedConnection", self->dbgid)
+		    .error(connection_leaked())
+		    .detail("MyAddr", self->process->address)
+		    .detail("PeerAddr", self->peerEndpoint)
+		    .detail("PeerId", self->peerId)
+		    .detail("Opened", self->opened);
 		return Void();
 	}
 };
@ -423,7 +431,7 @@ public:
 	ACTOR static Future<Reference<IAsyncFile>> open( std::string filename, int flags, int mode,
 													Reference<DiskParameters> diskParameters = Reference<DiskParameters>(new DiskParameters(25000, 150000000)), bool delayOnWrite = true ) {
 		state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
-		state int currentTaskID = g_network->getCurrentTask();
+		state TaskPriority currentTaskID = g_network->getCurrentTask();

 		if(++openCount >= 3000) {
 			TraceEvent(SevError, "TooManyFiles");
@ -742,11 +750,11 @@ public:
 	// Everything actually network related is delegated to the Sim2Net class; Sim2 is only concerned with simulating machines and time
 	virtual double now() { return time; }

-	virtual Future<class Void> delay( double seconds, int taskID ) {
-		ASSERT(taskID >= TaskMinPriority && taskID <= TaskMaxPriority);
+	virtual Future<class Void> delay( double seconds, TaskPriority taskID ) {
+		ASSERT(taskID >= TaskPriority::Min && taskID <= TaskPriority::Max);
 		return delay( seconds, taskID, currentProcess );
 	}
-	Future<class Void> delay( double seconds, int taskID, ProcessInfo* machine ) {
+	Future<class Void> delay( double seconds, TaskPriority taskID, ProcessInfo* machine ) {
 		ASSERT( seconds >= -0.0001 );
 		seconds = std::max(0.0, seconds);
 		Future<Void> f;
@ -761,13 +769,13 @@ public:

 		return f;
 	}
-	ACTOR static Future<Void> checkShutdown(Sim2 *self, int taskID) {
+	ACTOR static Future<Void> checkShutdown(Sim2 *self, TaskPriority taskID) {
 		wait(success(self->getCurrentProcess()->shutdownSignal.getFuture()));
 		self->setCurrentTask(taskID);
 		return Void();
 	}
-	virtual Future<class Void> yield( int taskID ) {
-		if (taskID == TaskDefaultYield) taskID = currentTaskID;
+	virtual Future<class Void> yield( TaskPriority taskID ) {
+		if (taskID == TaskPriority::DefaultYield) taskID = currentTaskID;
 		if (check_yield(taskID)) {
 			// We want to check that yielders can handle actual time elapsing (it sometimes will outside simulation), but
 			// don't want to prevent instantaneous shutdown of "rebooted" machines.
@ -776,7 +784,7 @@ public:
 		setCurrentTask(taskID);
 		return Void();
 	}
-	virtual bool check_yield( int taskID ) {
+	virtual bool check_yield( TaskPriority taskID ) {
 		if (yielded) return true;
 		if (--yield_limit <= 0) {
 			yield_limit = deterministicRandom()->randomInt(1, 150);  // If yield returns false *too* many times in a row, there could be a stack overflow, since we can't deterministically check stack size as the real network does
@ -784,10 +792,10 @@ public:
 		}
 		return yielded = BUGGIFY_WITH_PROB(0.01);
 	}
-	virtual int getCurrentTask() {
+	virtual TaskPriority getCurrentTask() {
 		return currentTaskID;
 	}
-	virtual void setCurrentTask(int taskID ) {
+	virtual void setCurrentTask(TaskPriority taskID ) {
 		currentTaskID = taskID;
 	}
 	// Sets the taskID/priority of the current task, without yielding
@ -924,7 +932,7 @@ public:
 		}
 		if ( mustBeDurable || deterministicRandom()->random01() < 0.5 ) {
 			state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
-			state int currentTaskID = g_network->getCurrentTask();
+			state TaskPriority currentTaskID = g_network->getCurrentTask();
 			wait( g_simulator.onMachine( currentProcess ) );
 			try {
 				wait( ::delay(0.05 * deterministicRandom()->random01()) );
@ -950,7 +958,7 @@ public:
 	ACTOR static Future<Void> runLoop(Sim2 *self) {
 		state ISimulator::ProcessInfo *callingMachine = self->currentProcess;
 		while ( !self->isStopped ) {
-			wait( self->net2->yield(TaskDefaultYield) );
+			wait( self->net2->yield(TaskPriority::DefaultYield) );

 			self->mutex.enter();
 			if( self->tasks.size() == 0 ) {
@ -1580,23 +1588,23 @@ public:
 		machines.erase(machineId);
 	}

-	Sim2(bool objSerializer) : time(0.0), taskCount(0), yielded(false), yield_limit(0), currentTaskID(-1) {
+	Sim2(bool objSerializer) : time(0.0), taskCount(0), yielded(false), yield_limit(0), currentTaskID(TaskPriority::Zero) {
 		// Not letting currentProcess be NULL eliminates some annoying special cases
 		currentProcess = new ProcessInfo("NoMachine", LocalityData(Optional<Standalone<StringRef>>(), StringRef(), StringRef(), StringRef()), ProcessClass(), {NetworkAddress()}, this, "", "");
 		g_network = net2 = newNet2(false, true, objSerializer);
 		Net2FileSystem::newFileSystem();
-		check_yield(0);
+		check_yield(TaskPriority::Zero);
 	}

 	// Implementation
 	struct Task {
-		int taskID;
+		TaskPriority taskID;
 		double time;
 		uint64_t stable;
 		ProcessInfo* machine;
 		Promise<Void> action;
-		Task( double time, int taskID, uint64_t stable, ProcessInfo* machine, Promise<Void>&& action ) : time(time), taskID(taskID), stable(stable), machine(machine), action(std::move(action)) {}
-		Task( double time, int taskID, uint64_t stable, ProcessInfo* machine, Future<Void>& future ) : time(time), taskID(taskID), stable(stable), machine(machine) { future = action.getFuture(); }
+		Task( double time, TaskPriority taskID, uint64_t stable, ProcessInfo* machine, Promise<Void>&& action ) : time(time), taskID(taskID), stable(stable), machine(machine), action(std::move(action)) {}
+		Task( double time, TaskPriority taskID, uint64_t stable, ProcessInfo* machine, Future<Void>& future ) : time(time), taskID(taskID), stable(stable), machine(machine) { future = action.getFuture(); }
 		Task(Task&& rhs) BOOST_NOEXCEPT : time(rhs.time), taskID(rhs.taskID), stable(rhs.stable), machine(rhs.machine), action(std::move(rhs.action)) {}
 		void operator= ( Task const& rhs ) { taskID = rhs.taskID; time = rhs.time; stable = rhs.stable; machine = rhs.machine; action = rhs.action; }
 		Task( Task const& rhs ) : taskID(rhs.taskID), time(rhs.time), stable(rhs.stable), machine(rhs.machine), action(rhs.action) {}
@ -1643,23 +1651,23 @@ public:
 		}
 	}

-	virtual void onMainThread( Promise<Void>&& signal, int taskID ) {
+	virtual void onMainThread( Promise<Void>&& signal, TaskPriority taskID ) {
 		// This is presumably coming from either a "fake" thread pool thread, i.e. it is actually on this thread
 		// or a thread created with g_network->startThread
 		ASSERT(getCurrentProcess());

 		mutex.enter();
-		ASSERT(taskID >= TaskMinPriority && taskID <= TaskMaxPriority);
+		ASSERT(taskID >= TaskPriority::Min && taskID <= TaskPriority::Max);
 		tasks.push( Task( time, taskID, taskCount++, getCurrentProcess(), std::move(signal) ) );
 		mutex.leave();
 	}
 	bool isOnMainThread() const override {
 		return net2->isOnMainThread();
 	}
-	virtual Future<Void> onProcess( ISimulator::ProcessInfo *process, int taskID ) {
+	virtual Future<Void> onProcess( ISimulator::ProcessInfo *process, TaskPriority taskID ) {
 		return delay( 0, taskID, process );
 	}
-	virtual Future<Void> onMachine( ISimulator::ProcessInfo *process, int taskID ) {
+	virtual Future<Void> onMachine( ISimulator::ProcessInfo *process, TaskPriority taskID ) {
 		if( process->machine == 0 )
 			return Void();
 		return delay( 0, taskID, process->machine->machineProcess );
@ -1668,7 +1676,7 @@ public:
 	//time is guarded by ISimulator::mutex. It is not necessary to guard reads on the main thread because
 	//time should only be modified from the main thread.
 	double time;
-	int currentTaskID;
+	TaskPriority currentTaskID;

 	//taskCount is guarded by ISimulator::mutex
 	uint64_t taskCount;
@ -1698,9 +1706,9 @@ void startNewSimulator(bool objSerializer) {
 }

 ACTOR void doReboot( ISimulator::ProcessInfo *p, ISimulator::KillType kt ) {
-	TraceEvent("RebootingProcessAttempt").detail("ZoneId", p->locality.zoneId()).detail("KillType", kt).detail("Process", p->toString()).detail("StartingClass", p->startingClass.toString()).detail("Failed", p->failed).detail("Excluded", p->excluded).detail("Cleared", p->cleared).detail("Rebooting", p->rebooting).detail("TaskDefaultDelay", TaskDefaultDelay);
+	TraceEvent("RebootingProcessAttempt").detail("ZoneId", p->locality.zoneId()).detail("KillType", kt).detail("Process", p->toString()).detail("StartingClass", p->startingClass.toString()).detail("Failed", p->failed).detail("Excluded", p->excluded).detail("Cleared", p->cleared).detail("Rebooting", p->rebooting).detail("TaskPriorityDefaultDelay", TaskPriority::DefaultDelay);

-	wait( g_sim2.delay( 0, TaskDefaultDelay, p ) ); // Switch to the machine in question
+	wait( g_sim2.delay( 0, TaskPriority::DefaultDelay, p ) ); // Switch to the machine in question

 	try {
 		ASSERT( kt == ISimulator::RebootProcess || kt == ISimulator::Reboot || kt == ISimulator::RebootAndDelete || kt == ISimulator::RebootProcessAndDelete );
--- a/fdbrpc/simulator.h
+++ b/fdbrpc/simulator.h
@ -137,8 +137,8 @@ public:

 	ProcessInfo* getProcess( Endpoint const& endpoint ) { return getProcessByAddress(endpoint.getPrimaryAddress()); }
 	ProcessInfo* getCurrentProcess() { return currentProcess; }
-	virtual Future<Void> onProcess( ISimulator::ProcessInfo *process, int taskID = -1 ) = 0;
-	virtual Future<Void> onMachine( ISimulator::ProcessInfo *process, int taskID = -1 ) = 0;
+	virtual Future<Void> onProcess( ISimulator::ProcessInfo *process, TaskPriority taskID = TaskPriority::Zero ) = 0;
+	virtual Future<Void> onMachine( ISimulator::ProcessInfo *process, TaskPriority taskID = TaskPriority::Zero ) = 0;

 	virtual ProcessInfo* newProcess(const char* name, IPAddress ip, uint16_t port, uint16_t listenPerProcess,
 	                                LocalityData locality, ProcessClass startingClass, const char* dataFolder,
--- a/fdbserver/ClusterController.actor.cpp
+++ b/fdbserver/ClusterController.actor.cpp
@ -107,7 +107,7 @@ public:
 		DBInfo() : masterRegistrationCount(0), recoveryStalled(false), forceRecovery(false), unfinishedRecoveries(0), logGenerations(0),
 			clientInfo( new AsyncVar<ClientDBInfo>( ClientDBInfo() ) ),
 			serverInfo( new AsyncVar<ServerDBInfo>( ServerDBInfo() ) ),
-			db( DatabaseContext::create( clientInfo, Future<Void>(), LocalityData(), true, TaskDefaultEndpoint, true ) )  // SOMEDAY: Locality!
+			db( DatabaseContext::create( clientInfo, Future<Void>(), LocalityData(), true, TaskPriority::DefaultEndpoint, true ) )  // SOMEDAY: Locality!
 		{
 		}

@ -1171,7 +1171,7 @@ public:
 		serverInfo.clusterInterface = ccInterface;
 		serverInfo.myLocality = locality;
 		db.serverInfo->set( serverInfo );
-		cx = openDBOnServer(db.serverInfo, TaskDefaultEndpoint, true, true);
+		cx = openDBOnServer(db.serverInfo, TaskPriority::DefaultEndpoint, true, true);
 	}

 	~ClusterControllerData() {
@ -1425,7 +1425,7 @@ void checkBetterDDOrRK(ClusterControllerData* self) {
 			rkFitness = ProcessClass::ExcludeFit;
 		}
 		if (self->isProxyOrResolver(rkWorker.details.interf.locality.processId()) || rkFitness > bestFitnessForRK) {
-			TraceEvent("CC_HaltRK", self->id).detail("RKID", db.ratekeeper.get().id())
+			TraceEvent("CCHaltRK", self->id).detail("RKID", db.ratekeeper.get().id())
 			.detail("Excluded", rkWorker.priorityInfo.isExcluded)
 			.detail("Fitness", rkFitness).detail("BestFitness", bestFitnessForRK);
 			self->recruitRatekeeper.set(true);
@ -1439,7 +1439,7 @@ void checkBetterDDOrRK(ClusterControllerData* self) {
 			ddFitness = ProcessClass::ExcludeFit;
 		}
 		if (self->isProxyOrResolver(ddWorker.details.interf.locality.processId()) || ddFitness > bestFitnessForDD) {
-			TraceEvent("CC_HaltDD", self->id).detail("DDID", db.distributor.get().id())
+			TraceEvent("CCHaltDD", self->id).detail("DDID", db.distributor.get().id())
 			.detail("Excluded", ddWorker.priorityInfo.isExcluded)
 			.detail("Fitness", ddFitness).detail("BestFitness", bestFitnessForDD);
 			ddWorker.haltDistributor = brokenPromiseToNever(db.distributor.get().haltDataDistributor.getReply(HaltDataDistributorRequest(self->id)));
@ -1920,13 +1920,13 @@ void registerWorker( RegisterWorkerRequest req, ClusterControllerData *self ) {
 			self->clusterControllerDcId == req.distributorInterf.get().locality.dcId() &&
 			!self->recruitingDistributor) {
 		const DataDistributorInterface& di = req.distributorInterf.get();
-		TraceEvent("CC_RegisterDataDistributor", self->id).detail("DDID", di.id());
+		TraceEvent("CCRegisterDataDistributor", self->id).detail("DDID", di.id());
 		self->db.setDistributor(di);
 	}
 	if (req.ratekeeperInterf.present()) {
 		if((self->recruitingRatekeeperID.present() && self->recruitingRatekeeperID.get() != req.ratekeeperInterf.get().id()) ||
 			self->clusterControllerDcId != w.locality.dcId()) {
-				TraceEvent("CC_HaltRegisteringRatekeeper", self->id).detail("RKID", req.ratekeeperInterf.get().id())
+				TraceEvent("CCHaltRegisteringRatekeeper", self->id).detail("RKID", req.ratekeeperInterf.get().id())
 			.detail("DcID", printable(self->clusterControllerDcId))
 			.detail("ReqDcID", printable(w.locality.dcId()))
 			.detail("RecruitingRKID", self->recruitingRatekeeperID.present() ? self->recruitingRatekeeperID.get() : UID());
@ -1934,9 +1934,9 @@ void registerWorker( RegisterWorkerRequest req, ClusterControllerData *self ) {
 		} else if(!self->recruitingRatekeeperID.present()) {
 			const RatekeeperInterface& rki = req.ratekeeperInterf.get();
 			const auto& ratekeeper = self->db.serverInfo->get().ratekeeper;
-			TraceEvent("CC_RegisterRatekeeper", self->id).detail("RKID", rki.id());
+			TraceEvent("CCRegisterRatekeeper", self->id).detail("RKID", rki.id());
 			if (ratekeeper.present() && ratekeeper.get().id() != rki.id() && self->id_worker.count(ratekeeper.get().locality.processId())) {
-				TraceEvent("CC_HaltPreviousRatekeeper", self->id).detail("RKID", ratekeeper.get().id())
+				TraceEvent("CCHaltPreviousRatekeeper", self->id).detail("RKID", ratekeeper.get().id())
 				.detail("DcID", printable(self->clusterControllerDcId))
 				.detail("ReqDcID", printable(w.locality.dcId()))
 				.detail("RecruitingRKID", self->recruitingRatekeeperID.present() ? self->recruitingRatekeeperID.get() : UID());
@ -2475,7 +2475,7 @@ ACTOR Future<Void> handleForcedRecoveries( ClusterControllerData *self, ClusterC
 ACTOR Future<DataDistributorInterface> startDataDistributor( ClusterControllerData *self ) {
 	wait(delay(0.0));  // If master fails at the same time, give it a chance to clear master PID.

-	TraceEvent("CC_StartDataDistributor", self->id);
+	TraceEvent("CCStartDataDistributor", self->id);
 	loop {
 		try {
 			state bool no_distributor = !self->db.serverInfo->get().distributor.present();
@ -2494,16 +2494,16 @@ ACTOR Future<DataDistributorInterface> startDataDistributor( ClusterControllerDa
 			}
 			
 			InitializeDataDistributorRequest req(deterministicRandom()->randomUniqueID());
-			TraceEvent("CC_DataDistributorRecruit", self->id).detail("Addr", worker.interf.address());
+			TraceEvent("CCDataDistributorRecruit", self->id).detail("Addr", worker.interf.address());

 			ErrorOr<DataDistributorInterface> distributor = wait( worker.interf.dataDistributor.getReplyUnlessFailedFor(req, SERVER_KNOBS->WAIT_FOR_DISTRIBUTOR_JOIN_DELAY, 0) );
 			if (distributor.present()) {
-				TraceEvent("CC_DataDistributorRecruited", self->id).detail("Addr", worker.interf.address());
+				TraceEvent("CCDataDistributorRecruited", self->id).detail("Addr", worker.interf.address());
 				return distributor.get();
 			}
 		}
 		catch (Error& e) {
-			TraceEvent("CC_DataDistributorRecruitError", self->id).error(e);
+			TraceEvent("CCDataDistributorRecruitError", self->id).error(e);
 			if ( e.code() != error_code_no_more_servers ) {
 				throw;
 			}
@ -2520,7 +2520,7 @@ ACTOR Future<Void> monitorDataDistributor(ClusterControllerData *self) {
 	loop {
 		if ( self->db.serverInfo->get().distributor.present() ) {
 			wait( waitFailureClient( self->db.serverInfo->get().distributor.get().waitFailure, SERVER_KNOBS->DD_FAILURE_TIME ) );
-			TraceEvent("CC_DataDistributorDied", self->id)
+			TraceEvent("CCDataDistributorDied", self->id)
 			.detail("DistributorId", self->db.serverInfo->get().distributor.get().id());
 			self->db.clearInterf(ProcessClass::DataDistributorClass);
 		} else {
@ -2535,7 +2535,7 @@ ACTOR Future<Void> monitorDataDistributor(ClusterControllerData *self) {
 ACTOR Future<Void> startRatekeeper(ClusterControllerData *self) {
 	wait(delay(0.0));  // If master fails at the same time, give it a chance to clear master PID.

-	TraceEvent("CC_StartRatekeeper", self->id);
+	TraceEvent("CCStartRatekeeper", self->id);
 	loop {
 		try {
 			state bool no_ratekeeper = !self->db.serverInfo->get().ratekeeper.present();
@ -2556,16 +2556,16 @@ ACTOR Future<Void> startRatekeeper(ClusterControllerData *self) {
 			}

 			self->recruitingRatekeeperID = req.reqId;
-			TraceEvent("CC_RecruitRatekeeper", self->id).detail("Addr", worker.interf.address()).detail("RKID", req.reqId);
+			TraceEvent("CCRecruitRatekeeper", self->id).detail("Addr", worker.interf.address()).detail("RKID", req.reqId);

 			ErrorOr<RatekeeperInterface> interf = wait( worker.interf.ratekeeper.getReplyUnlessFailedFor(req, SERVER_KNOBS->WAIT_FOR_RATEKEEPER_JOIN_DELAY, 0) );
 			if (interf.present()) {
 				self->recruitRatekeeper.set(false);
 				self->recruitingRatekeeperID = interf.get().id();
 				const auto& ratekeeper = self->db.serverInfo->get().ratekeeper;
-				TraceEvent("CC_RatekeeperRecruited", self->id).detail("Addr", worker.interf.address()).detail("RKID", interf.get().id());
+				TraceEvent("CCRatekeeperRecruited", self->id).detail("Addr", worker.interf.address()).detail("RKID", interf.get().id());
 				if (ratekeeper.present() && ratekeeper.get().id() != interf.get().id() && self->id_worker.count(ratekeeper.get().locality.processId())) {
-					TraceEvent("CC_HaltRatekeeperAfterRecruit", self->id).detail("RKID", ratekeeper.get().id())
+					TraceEvent("CCHaltRatekeeperAfterRecruit", self->id).detail("RKID", ratekeeper.get().id())
 					.detail("DcID", printable(self->clusterControllerDcId));
 					self->id_worker[ratekeeper.get().locality.processId()].haltRatekeeper = brokenPromiseToNever(ratekeeper.get().haltRatekeeper.getReply(HaltRatekeeperRequest(self->id)));
 				}
@ -2577,7 +2577,7 @@ ACTOR Future<Void> startRatekeeper(ClusterControllerData *self) {
 			}
 		}
 		catch (Error& e) {
-			TraceEvent("CC_RatekeeperRecruitError", self->id).error(e);
+			TraceEvent("CCRatekeeperRecruitError", self->id).error(e);
 			if ( e.code() != error_code_no_more_servers ) {
 				throw;
 			}
@ -2595,7 +2595,7 @@ ACTOR Future<Void> monitorRatekeeper(ClusterControllerData *self) {
 		if ( self->db.serverInfo->get().ratekeeper.present() && !self->recruitRatekeeper.get() ) {
 			choose {
 				when(wait(waitFailureClient( self->db.serverInfo->get().ratekeeper.get().waitFailure, SERVER_KNOBS->RATEKEEPER_FAILURE_TIME )))  {
-					TraceEvent("CC_RatekeeperDied", self->id)
+					TraceEvent("CCRatekeeperDied", self->id)
 					.detail("RKID", self->db.serverInfo->get().ratekeeper.get().id());
 					self->db.clearInterf(ProcessClass::RatekeeperClass);
 				}
--- a/fdbserver/ClusterRecruitmentInterface.h
+++ b/fdbserver/ClusterRecruitmentInterface.h
@ -63,13 +63,13 @@ struct ClusterControllerFullInterface {

 	void initEndpoints() {
 		clientInterface.initEndpoints();
-		recruitFromConfiguration.getEndpoint( TaskClusterController );
-		recruitRemoteFromConfiguration.getEndpoint( TaskClusterController );
-		recruitStorage.getEndpoint( TaskClusterController );
-		registerWorker.getEndpoint( TaskClusterController );
-		getWorkers.getEndpoint( TaskClusterController );
-		registerMaster.getEndpoint( TaskClusterController );
-		getServerDBInfo.getEndpoint( TaskClusterController );
+		recruitFromConfiguration.getEndpoint( TaskPriority::ClusterController );
+		recruitRemoteFromConfiguration.getEndpoint( TaskPriority::ClusterController );
+		recruitStorage.getEndpoint( TaskPriority::ClusterController );
+		registerWorker.getEndpoint( TaskPriority::ClusterController );
+		getWorkers.getEndpoint( TaskPriority::ClusterController );
+		registerMaster.getEndpoint( TaskPriority::ClusterController );
+		getServerDBInfo.getEndpoint( TaskPriority::ClusterController );
 	}

 	template <class Ar>
--- a/fdbserver/Coordination.actor.cpp
+++ b/fdbserver/Coordination.actor.cpp
@ -53,8 +53,8 @@ GenerationRegInterface::GenerationRegInterface( NetworkAddress remote )

 GenerationRegInterface::GenerationRegInterface( INetwork* local )
 {
-	read.makeWellKnownEndpoint( WLTOKEN_GENERATIONREG_READ, TaskCoordination );
-	write.makeWellKnownEndpoint( WLTOKEN_GENERATIONREG_WRITE, TaskCoordination );
+	read.makeWellKnownEndpoint( WLTOKEN_GENERATIONREG_READ, TaskPriority::Coordination );
+	write.makeWellKnownEndpoint( WLTOKEN_GENERATIONREG_WRITE, TaskPriority::Coordination );
 }

 LeaderElectionRegInterface::LeaderElectionRegInterface(NetworkAddress remote)
@ -68,9 +68,9 @@ LeaderElectionRegInterface::LeaderElectionRegInterface(NetworkAddress remote)
 LeaderElectionRegInterface::LeaderElectionRegInterface(INetwork* local) 
 	: ClientLeaderRegInterface(local)
 {
-	candidacy.makeWellKnownEndpoint( WLTOKEN_LEADERELECTIONREG_CANDIDACY, TaskCoordination );
-	leaderHeartbeat.makeWellKnownEndpoint( WLTOKEN_LEADERELECTIONREG_LEADERHEARTBEAT, TaskCoordination );
-	forward.makeWellKnownEndpoint( WLTOKEN_LEADERELECTIONREG_FORWARD, TaskCoordination );
+	candidacy.makeWellKnownEndpoint( WLTOKEN_LEADERELECTIONREG_CANDIDACY, TaskPriority::Coordination );
+	leaderHeartbeat.makeWellKnownEndpoint( WLTOKEN_LEADERELECTIONREG_LEADERHEARTBEAT, TaskPriority::Coordination );
+	forward.makeWellKnownEndpoint( WLTOKEN_LEADERELECTIONREG_FORWARD, TaskPriority::Coordination );
 }

 ServerCoordinators::ServerCoordinators( Reference<ClusterConnectionFile> cf )
--- a/fdbserver/CoroFlow.actor.cpp
+++ b/fdbserver/CoroFlow.actor.cpp
@ -263,7 +263,7 @@ typedef WorkPool<Coroutine, ThreadUnsafeSpinLock, true> CoroPool;



-ACTOR void coroSwitcher( Future<Void> what, int taskID, Coro* coro ) {
+ACTOR void coroSwitcher( Future<Void> what, TaskPriority taskID, Coro* coro ) {
 	try {
 		// state double t = now();
 		wait(what);
--- a/fdbserver/DBCoreState.h
+++ b/fdbserver/DBCoreState.h
@ -76,14 +76,15 @@ struct CoreTLogSet {
 struct OldTLogCoreData {
 	std::vector<CoreTLogSet> tLogs;
 	int32_t logRouterTags;
+	int32_t txsTags;
 	Version epochEnd;
 	std::set<int8_t> pseudoLocalities;

-	OldTLogCoreData() : epochEnd(0), logRouterTags(0) {}
+	OldTLogCoreData() : epochEnd(0), logRouterTags(0), txsTags(0) {}
 	explicit OldTLogCoreData(const OldLogData&);

 	bool operator == (OldTLogCoreData const& rhs) const { 
-		return tLogs == rhs.tLogs && logRouterTags == rhs.logRouterTags && epochEnd == rhs.epochEnd && pseudoLocalities == rhs.pseudoLocalities;
+		return tLogs == rhs.tLogs && logRouterTags == rhs.logRouterTags && txsTags == rhs.txsTags && epochEnd == rhs.epochEnd && pseudoLocalities == rhs.pseudoLocalities;
 	}

 	template <class Archive>
@ -99,18 +100,22 @@ struct OldTLogCoreData {
 		if (ar.protocolVersion().hasPseudoLocalities()) {
 			serializer(ar, pseudoLocalities);
 		}
+		if (ar.protocolVersion().hasShardedTxsTags()) {
+			serializer(ar, txsTags);
+		}
 	}
 };

 struct DBCoreState {
 	std::vector<CoreTLogSet> tLogs;
 	int32_t logRouterTags;
+	int32_t txsTags;
 	std::vector<OldTLogCoreData> oldTLogData;
 	DBRecoveryCount recoveryCount;  // Increases with sequential successful recoveries.
 	LogSystemType logSystemType;
 	std::set<int8_t> pseudoLocalities;
 	
-	DBCoreState() : logRouterTags(0), recoveryCount(0), logSystemType(LogSystemType::empty) {}
+	DBCoreState() : logRouterTags(0), txsTags(0), recoveryCount(0), logSystemType(LogSystemType::empty) {}

 	vector<UID> getPriorCommittedLogServers() {
 		vector<UID> priorCommittedLogServers;
@ -130,7 +135,7 @@ struct DBCoreState {
 	}

 	bool isEqual(DBCoreState const& r) const {
-		return logSystemType == r.logSystemType && recoveryCount == r.recoveryCount && tLogs == r.tLogs && oldTLogData == r.oldTLogData && logRouterTags == r.logRouterTags && pseudoLocalities == r.pseudoLocalities;
+		return logSystemType == r.logSystemType && recoveryCount == r.recoveryCount && tLogs == r.tLogs && oldTLogData == r.oldTLogData && logRouterTags == r.logRouterTags && txsTags == r.txsTags && pseudoLocalities == r.pseudoLocalities;
 	}
 	bool operator == ( const DBCoreState& rhs ) const { return isEqual(rhs); }

@ -148,6 +153,9 @@ struct DBCoreState {
 			if (ar.protocolVersion().hasPseudoLocalities()) {
 				serializer(ar, pseudoLocalities);
 			}
+			if (ar.protocolVersion().hasShardedTxsTags()) {
+				serializer(ar, txsTags);
+			}
 		} else if(ar.isDeserializing) {
 			tLogs.push_back(CoreTLogSet());
 			serializer(ar, tLogs[0].tLogs, tLogs[0].tLogWriteAntiQuorum, recoveryCount, tLogs[0].tLogReplicationFactor, logSystemType);
--- a/fdbserver/DataDistribution.actor.cpp
+++ b/fdbserver/DataDistribution.actor.cpp
@ -88,7 +88,7 @@ struct TCMachineInfo : public ReferenceCounted<TCMachineInfo> {

 ACTOR Future<Void> updateServerMetrics( TCServerInfo *server ) {
 	state StorageServerInterface ssi = server->lastKnownInterface;
-	state Future<ErrorOr<GetPhysicalMetricsReply>> metricsRequest = ssi.getPhysicalMetrics.tryGetReply( GetPhysicalMetricsRequest(), TaskDataDistributionLaunch );
+	state Future<ErrorOr<GetPhysicalMetricsReply>> metricsRequest = ssi.getPhysicalMetrics.tryGetReply( GetPhysicalMetricsRequest(), TaskPriority::DataDistributionLaunch );
 	state Future<Void> resetRequest = Never();
 	state Future<std::pair<StorageServerInterface, ProcessClass>> interfaceChanged( server->onInterfaceChanged );
 	state Future<Void> serverRemoved( server->onRemoved );
@ -104,7 +104,7 @@ ACTOR Future<Void> updateServerMetrics( TCServerInfo *server ) {
 					return Void();
 				}
 				metricsRequest = Never();
-				resetRequest = delay( SERVER_KNOBS->METRIC_DELAY, TaskDataDistributionLaunch );
+				resetRequest = delay( SERVER_KNOBS->METRIC_DELAY, TaskPriority::DataDistributionLaunch );
 			}
 			when( std::pair<StorageServerInterface,ProcessClass> _ssi = wait( interfaceChanged ) ) {
 				ssi = _ssi.first;
@ -120,7 +120,7 @@ ACTOR Future<Void> updateServerMetrics( TCServerInfo *server ) {
 				}
 				else {
 					resetRequest = Never();
-					metricsRequest = ssi.getPhysicalMetrics.tryGetReply( GetPhysicalMetricsRequest(), TaskDataDistributionLaunch );
+					metricsRequest = ssi.getPhysicalMetrics.tryGetReply( GetPhysicalMetricsRequest(), TaskPriority::DataDistributionLaunch );
 				}
 			}
 		}
@ -635,9 +635,9 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 	    shardsAffectedByTeamFailure(shardsAffectedByTeamFailure), doBuildTeams(true), teamBuilder(Void()),
 	    badTeamRemover(Void()), redundantTeamRemover(Void()), configuration(configuration),
 	    readyToStart(readyToStart), clearHealthyZoneFuture(Void()),
-	    checkTeamDelay(delay(SERVER_KNOBS->CHECK_TEAM_DELAY, TaskDataDistribution)),
+	    checkTeamDelay(delay(SERVER_KNOBS->CHECK_TEAM_DELAY, TaskPriority::DataDistribution)),
 	    initialFailureReactionDelay(
-	        delayed(readyToStart, SERVER_KNOBS->INITIAL_FAILURE_REACTION_DELAY, TaskDataDistribution)),
+	        delayed(readyToStart, SERVER_KNOBS->INITIAL_FAILURE_REACTION_DELAY, TaskPriority::DataDistribution)),
 	    healthyTeamCount(0), storageServerSet(new LocalityMap<UID>()),
 	    initializationDoneActor(logOnCompletion(readyToStart && initialFailureReactionDelay, this)),
 	    optimalTeamCount(0), recruitingStream(0), restartRecruiting(SERVER_KNOBS->DEBOUNCE_RECRUITING_DELAY),
@ -671,7 +671,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {

 	ACTOR static Future<Void> logOnCompletion( Future<Void> signal, DDTeamCollection* self ) {
 		wait(signal);
-		wait(delay(SERVER_KNOBS->LOG_ON_COMPLETION_DELAY, TaskDataDistribution));
+		wait(delay(SERVER_KNOBS->LOG_ON_COMPLETION_DELAY, TaskPriority::DataDistribution));

 		if(!self->primary || self->configuration.usableRegions == 1) {
 			TraceEvent("DDTrackerStarting", self->distributorId)
@ -1309,7 +1309,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 	// Five steps to create each machine team, which are document in the function
 	// Reuse ReplicationPolicy selectReplicas func to select machine team
 	// return number of added machine teams
-	int addBestMachineTeams(int targetMachineTeamsToBuild) {
+	int addBestMachineTeams(int targetMachineTeamsToBuild, int remainingMachineTeamBudget) {
 		int addedMachineTeams = 0;
 		int machineTeamsToBuild = 0;

@ -1327,7 +1327,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {

 		int loopCount = 0;
 		// Add a team in each iteration
-		while (addedMachineTeams < machineTeamsToBuild) {
+		while (addedMachineTeams < machineTeamsToBuild || addedMachineTeams < remainingMachineTeamBudget) {
 			// Step 2: Get least used machines from which we choose machines as a machine team
 			std::vector<Reference<TCMachineInfo>> leastUsedMachines; // A less used machine has less number of teams
 			int minTeamCount = std::numeric_limits<int>::max();
@ -1377,6 +1377,8 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 				// that have the least-utilized server
 				team.clear();
 				auto success = machineLocalityMap.selectReplicas(configuration.storagePolicy, forcedAttributes, team);
+				// NOTE: selectReplicas() should always return success when storageTeamSize = 1
+				ASSERT_WE_THINK(configuration.storageTeamSize > 1 || (configuration.storageTeamSize == 1 && success));
 				if (!success) {
 					break;
 				}
@ -1430,6 +1432,9 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {

 				addMachineTeam(machines);
 				addedMachineTeams++;
+				// Update the remaining machine team budget because the budget may decrease by
+				// any value between 1 and storageTeamSize
+				remainingMachineTeamBudget = getRemainingMachineTeamBudget();
 			} else {
 				TraceEvent(SevWarn, "DataDistributionBuildTeams", distributorId)
 				    .detail("Primary", primary)
@ -1589,6 +1594,32 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 		return totalHealthyMachineCount;
 	}

+	std::pair<int64_t, int64_t> calculateMinMaxServerTeamNumOnServer() {
+		int64_t minTeamNumber = std::numeric_limits<int64_t>::max();
+		int64_t maxTeamNumber = 0;
+		for (auto& server : server_info) {
+			if (server_status.get(server.first).isUnhealthy()) {
+				continue;
+			}
+			minTeamNumber = std::min((int64_t) server.second->teams.size(), minTeamNumber);
+			maxTeamNumber = std::max((int64_t) server.second->teams.size(), maxTeamNumber);
+		}
+		return std::make_pair(minTeamNumber, maxTeamNumber);
+	}
+
+	std::pair<int64_t, int64_t> calculateMinMaxMachineTeamNumOnMachine() {
+		int64_t minTeamNumber = std::numeric_limits<int64_t>::max();
+		int64_t maxTeamNumber = 0;
+		for (auto& machine : machine_info) {
+			if (!isMachineHealthy(machine.second)) {
+				continue;
+			}
+			minTeamNumber = std::min<int64_t>((int64_t) machine.second->machineTeams.size(), minTeamNumber);
+			maxTeamNumber = std::max<int64_t>((int64_t) machine.second->machineTeams.size(), maxTeamNumber);
+		}
+		return std::make_pair(minTeamNumber, maxTeamNumber);
+	}
+
 	// Sanity check
 	bool isServerTeamNumberCorrect(Reference<TCMachineTeamInfo>& mt) {
 		int num = 0;
@ -1639,12 +1670,41 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 		return healthyTeamCount;
 	}

+	// Each machine is expected to have SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER,
+	// remainingMachineTeamBudget is the number of machine teams needed to ensure every machine has
+	// SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER teams
+	int getRemainingMachineTeamBudget() {
+		int remainingMachineTeamBudget = 0;
+		for (auto& m : machine_info) {
+			int machineTeamCount = m.second->machineTeams.size();
+			remainingMachineTeamBudget += std::max(0, (int)(SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER - machineTeamCount));
+		}
+
+		// We over-provision the remainingMachineTeamBudget because we do not know, when a new machine team is built,
+		// how many times it can be counted into the budget. For example, when a new machine is added,
+		// a new machine team only consume 1 such budget
+		return remainingMachineTeamBudget;
+	}
+
+	// Each server is expected to have SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER,
+	int getRemainingServerTeamBudget() {
+		// remainingTeamBudget is the number of teams needed to ensure every server has
+		// SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER teams
+		int remainingTeamBudget = 0;
+		for (auto& s : server_info) {
+			int numValidTeams = s.second->teams.size();
+			remainingTeamBudget += std::max(0, (int)(SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER - numValidTeams));
+		}
+
+		return remainingTeamBudget;
+	}
+
 	// Create server teams based on machine teams
 	// Before the number of machine teams reaches the threshold, build a machine team for each server team
 	// When it reaches the threshold, first try to build a server team with existing machine teams; if failed,
 	// build an extra machine team and record the event in trace
-	int addTeamsBestOf(int teamsToBuild, int desiredTeamNumber, int maxTeamNumber) {
-		ASSERT(teamsToBuild > 0);
+	int addTeamsBestOf(int teamsToBuild, int desiredTeamNumber, int maxTeamNumber, int remainingTeamBudget) {
+		ASSERT(teamsToBuild >= 0);
 		ASSERT_WE_THINK(machine_info.size() > 0 || server_info.size() == 0);

 		int addedMachineTeams = 0;
@ -1655,27 +1715,28 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 		// When we change configuration, we may have machine teams with storageTeamSize in the old configuration.
 		int healthyMachineTeamCount = getHealthyMachineTeamCount();
 		int totalMachineTeamCount = machineTeams.size();
-
 		int totalHealthyMachineCount = calculateHealthyMachineCount();
+		int remainingMachineTeamBudget = getRemainingMachineTeamBudget();

 		int desiredMachineTeams = SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * totalHealthyMachineCount;
 		int maxMachineTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * totalHealthyMachineCount;
 		// machineTeamsToBuild mimics how the teamsToBuild is calculated in buildTeams()
-		int machineTeamsToBuild =
-		    std::min(desiredMachineTeams - healthyMachineTeamCount, maxMachineTeams - totalMachineTeamCount);
+		int machineTeamsToBuild = std::max(
+		    0, std::min(desiredMachineTeams - healthyMachineTeamCount, maxMachineTeams - totalMachineTeamCount));

 		TraceEvent("BuildMachineTeams")
 		    .detail("TotalHealthyMachine", totalHealthyMachineCount)
 		    .detail("HealthyMachineTeamCount", healthyMachineTeamCount)
 		    .detail("DesiredMachineTeams", desiredMachineTeams)
 		    .detail("MaxMachineTeams", maxMachineTeams)
-		    .detail("MachineTeamsToBuild", machineTeamsToBuild);
+		    .detail("MachineTeamsToBuild", machineTeamsToBuild)
+		    .detail("RemainingMachineTeamBudget", remainingMachineTeamBudget);
 		// Pre-build all machine teams until we have the desired number of machine teams
-		if (machineTeamsToBuild > 0) {
-			addedMachineTeams = addBestMachineTeams(machineTeamsToBuild);
+		if (machineTeamsToBuild > 0 || remainingMachineTeamBudget > 0) {
+			addedMachineTeams = addBestMachineTeams(machineTeamsToBuild, remainingMachineTeamBudget);
 		}

-		while (addedTeams < teamsToBuild) {
+		while (addedTeams < teamsToBuild || addedTeams < remainingTeamBudget) {
 			// Step 1: Create 1 best machine team
 			std::vector<UID> bestServerTeam;
 			int bestScore = std::numeric_limits<int>::max();
@ -1752,6 +1813,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 			// Step 4: Add the server team
 			addTeam(bestServerTeam.begin(), bestServerTeam.end(), false);
 			addedTeams++;
+			remainingTeamBudget = getRemainingServerTeamBudget();

 			if (++loopCount > 2 * teamsToBuild * (configuration.storageTeamSize + 1)) {
 				break;
@ -1760,10 +1822,14 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {

 		healthyMachineTeamCount = getHealthyMachineTeamCount();

+		std::pair<uint64_t, uint64_t> minMaxTeamNumberOnServer = calculateMinMaxServerTeamNumOnServer();
+		std::pair<uint64_t, uint64_t> minMaxMachineTeamNumberOnMachine = calculateMinMaxMachineTeamNumOnMachine();
+
 		TraceEvent("TeamCollectionInfo", distributorId)
 		    .detail("Primary", primary)
 		    .detail("AddedTeamNumber", addedTeams)
 		    .detail("AimToBuildTeamNumber", teamsToBuild)
+		    .detail("RemainingTeamBudget", remainingTeamBudget)
 		    .detail("CurrentTeamNumber", teams.size())
 		    .detail("DesiredTeamNumber", desiredTeamNumber)
 		    .detail("MaxTeamNumber", maxTeamNumber)
@ -1773,6 +1839,11 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 		    .detail("DesiredMachineTeams", desiredMachineTeams)
 		    .detail("MaxMachineTeams", maxMachineTeams)
 		    .detail("TotalHealthyMachine", totalHealthyMachineCount)
+		    .detail("MinTeamNumberOnServer", minMaxTeamNumberOnServer.first)
+		    .detail("MaxTeamNumberOnServer", minMaxTeamNumberOnServer.second)
+		    .detail("MinMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.first)
+		    .detail("MaxMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.second)
+		    .detail("DoBuildTeams", doBuildTeams)
 		    .trackLatest("TeamCollectionInfo");

 		return addedTeams;
@ -1789,10 +1860,14 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 		int maxMachineTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * totalHealthyMachineCount;
 		int healthyMachineTeamCount = getHealthyMachineTeamCount();

+		std::pair<uint64_t, uint64_t> minMaxTeamNumberOnServer = calculateMinMaxServerTeamNumOnServer();
+		std::pair<uint64_t, uint64_t> minMaxMachineTeamNumberOnMachine = calculateMinMaxMachineTeamNumOnMachine();
+
 		TraceEvent("TeamCollectionInfo", distributorId)
 		    .detail("Primary", primary)
 		    .detail("AddedTeamNumber", 0)
 		    .detail("AimToBuildTeamNumber", 0)
+		    .detail("RemainingTeamBudget", 0)
 		    .detail("CurrentTeamNumber", teams.size())
 		    .detail("DesiredTeamNumber", desiredServerTeams)
 		    .detail("MaxTeamNumber", maxServerTeams)
@ -1802,14 +1877,22 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 		    .detail("DesiredMachineTeams", desiredMachineTeams)
 		    .detail("MaxMachineTeams", maxMachineTeams)
 		    .detail("TotalHealthyMachine", totalHealthyMachineCount)
+		    .detail("MinTeamNumberOnServer", minMaxTeamNumberOnServer.first)
+		    .detail("MaxTeamNumberOnServer", minMaxTeamNumberOnServer.second)
+		    .detail("MinMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.first)
+		    .detail("MaxMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.second)
+		    .detail("DoBuildTeams", doBuildTeams)
 		    .trackLatest("TeamCollectionInfo");

-		// Debug purpose
-//		if (healthyMachineTeamCount > desiredMachineTeams || machineTeams.size() > maxMachineTeams) {
-//			// When the number of machine teams is over the limit, print out the current team info.
-//			traceAllInfo(true);
-//		}
+		// Advance time so that we will not have multiple TeamCollectionInfo at the same time, otherwise
+		// simulation test will randomly pick one TeamCollectionInfo trace, which could be the one before build teams
+		// wait(delay(0.01));

+		// Debug purpose
+		// if (healthyMachineTeamCount > desiredMachineTeams || machineTeams.size() > maxMachineTeams) {
+		// 	// When the number of machine teams is over the limit, print out the current team info.
+		// 	traceAllInfo(true);
+		// }
 	}

 	// Use the current set of known processes (from server_info) to compute an optimized set of storage server teams.
@ -1856,10 +1939,14 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 					totalTeamCount++;
 				}
 			}
+			// Each server is expected to have SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER,
+			// remainingTeamBudget is the number of teams needed to ensure every server has
+			// SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER teams
+			int remainingTeamBudget = self->getRemainingServerTeamBudget();

 			// teamsToBuild is calculated such that we will not build too many teams in the situation
 			// when all (or most of) teams become unhealthy temporarily and then healthy again
-			state int teamsToBuild = std::min(desiredTeams - teamCount, maxTeams - totalTeamCount);
+			state int teamsToBuild = std::max(0, std::min(desiredTeams - teamCount, maxTeams - totalTeamCount));

 			TraceEvent("BuildTeamsBegin", self->distributorId)
 			    .detail("TeamsToBuild", teamsToBuild)
@ -1876,13 +1963,13 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 			    .detail("MachineCount", self->machine_info.size())
 			    .detail("DesiredTeamsPerServer", SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER);

-			if (teamsToBuild > 0) {
+			if (teamsToBuild > 0 || remainingTeamBudget > 0) {
 				state vector<std::vector<UID>> builtTeams;

 				// addTeamsBestOf() will not add more teams than needed.
 				// If the team number is more than the desired, the extra teams are added in the code path when
 				// a team is added as an initial team
-				int addedTeams = self->addTeamsBestOf(teamsToBuild, desiredTeams, maxTeams);
+				int addedTeams = self->addTeamsBestOf(teamsToBuild, desiredTeams, maxTeams, remainingTeamBudget);

 				if (addedTeams <= 0 && self->teams.size() == 0) {
 					TraceEvent(SevWarn, "NoTeamAfterBuildTeam")
@ -1898,10 +1985,14 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 				int maxMachineTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * totalHealthyMachineCount;
 				int healthyMachineTeamCount = self->getHealthyMachineTeamCount();

+				std::pair<uint64_t, uint64_t> minMaxTeamNumberOnServer = self->calculateMinMaxServerTeamNumOnServer();
+				std::pair<uint64_t, uint64_t> minMaxMachineTeamNumberOnMachine = self->calculateMinMaxMachineTeamNumOnMachine();
+
 				TraceEvent("TeamCollectionInfo", self->distributorId)
 				    .detail("Primary", self->primary)
 				    .detail("AddedTeamNumber", 0)
 				    .detail("AimToBuildTeamNumber", teamsToBuild)
+				    .detail("RemainingTeamBudget", remainingTeamBudget)
 				    .detail("CurrentTeamNumber", self->teams.size())
 				    .detail("DesiredTeamNumber", desiredTeams)
 				    .detail("MaxTeamNumber", maxTeams)
@ -1911,6 +2002,11 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 				    .detail("DesiredMachineTeams", desiredMachineTeams)
 				    .detail("MaxMachineTeams", maxMachineTeams)
 				    .detail("TotalHealthyMachine", totalHealthyMachineCount)
+				    .detail("MinTeamNumberOnServer", minMaxTeamNumberOnServer.first)
+				    .detail("MaxTeamNumberOnServer", minMaxTeamNumberOnServer.second)
+				    .detail("MinMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.first)
+				    .detail("MaxMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.second)
+				    .detail("DoBuildTeams", self->doBuildTeams)
 				    .trackLatest("TeamCollectionInfo");
 			}
 		}
@ -1919,7 +2015,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {

 		//Building teams can cause servers to become undesired, which can make teams unhealthy.
 		//Let all of these changes get worked out before responding to the get team request
-		wait( delay(0, TaskDataDistributionLaunch) );
+		wait( delay(0, TaskPriority::DataDistributionLaunch) );

 		return Void();
 	}
@ -2232,7 +2328,7 @@ ACTOR Future<Void> waitUntilHealthy(DDTeamCollection* self) {
 			TraceEvent("WaitUntilHealthyStalled", self->distributorId).detail("Primary", self->primary).detail("ZeroHealthy", self->zeroHealthyTeams->get()).detail("ProcessingUnhealthy", self->processingUnhealthy->get());
 			wait(self->zeroHealthyTeams->onChange() || self->processingUnhealthy->onChange());
 		}
-		wait(delay(SERVER_KNOBS->DD_STALL_CHECK_DELAY, TaskLowPriority)); //After the team trackers wait on the initial failure reaction delay, they yield. We want to make sure every tracker has had the opportunity to send their relocations to the queue.
+		wait(delay(SERVER_KNOBS->DD_STALL_CHECK_DELAY, TaskPriority::Low)); //After the team trackers wait on the initial failure reaction delay, they yield. We want to make sure every tracker has had the opportunity to send their relocations to the queue.
 		if(!self->zeroHealthyTeams->get() && !self->processingUnhealthy->get()) {
 			return Void();
 		}
@ -2308,6 +2404,16 @@ ACTOR Future<Void> teamRemover(DDTeamCollection* self) {
 				team = mt->serverTeams[teamIndex];
 				ASSERT(team->machineTeam->machineIDs == mt->machineIDs); // Sanity check

+				// Check if a server will have 0 team after the team is removed
+				for (auto& s : team->getServers()) {
+					if (s->teams.size() == 0) {
+						TraceEvent(SevError, "TeamRemoverTooAggressive")
+						    .detail("Server", s->id)
+						    .detail("Team", team->getServerIDsStr());
+						self->traceAllInfo(true);
+					}
+				}
+
 				// The team will be marked as a bad team
 				bool foundTeam = self->removeTeam(team);
 				ASSERT(foundTeam == true);
@ -2540,7 +2646,12 @@ ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> tea

 									//If we cannot find the team, it could be a bad team so assume unhealthy priority
 									if(!found) {
-										maxPriority = std::max<int>( maxPriority, PRIORITY_TEAM_UNHEALTHY );
+										// If the input team (in function parameters) is a redundant team, found will be
+										// false We want to differentiate the redundant_team from unhealthy_team in
+										// terms of relocate priority
+										maxPriority =
+										    std::max<int>(maxPriority, redundantTeam ? PRIORITY_TEAM_REDUNDANT
+										                                             : PRIORITY_TEAM_UNHEALTHY);
 									}
 								} else {
 									TEST(true); // A removed server is still associated with a team in SABTF
@ -2638,7 +2749,7 @@ ACTOR Future<Void> trackExcludedServers( DDTeamCollection* self ) {
 				if (nchid != lastChangeID)
 					break;

-				wait( delay( SERVER_KNOBS->SERVER_LIST_DELAY, TaskDataDistribution ) );  // FIXME: make this tr.watch( excludedServersVersionKey ) instead
+				wait( delay( SERVER_KNOBS->SERVER_LIST_DELAY, TaskPriority::DataDistribution ) );  // FIXME: make this tr.watch( excludedServersVersionKey ) instead
 				tr = Transaction(self->cx);
 			} catch (Error& e) {
 				wait( tr.onError(e) );
@ -2734,12 +2845,18 @@ ACTOR Future<Void> waitHealthyZoneChange( DDTeamCollection* self ) {
 			if(val.present()) {
 				auto p = decodeHealthyZoneValue(val.get());
 				if(p.second > tr.getReadVersion().get()) {
-					healthyZoneTimeout = delay((p.second - tr.getReadVersion().get())/(double)SERVER_KNOBS->VERSIONS_PER_SECOND);
-					self->healthyZone.set(p.first);
-				} else {
+					double timeoutSeconds = (p.second - tr.getReadVersion().get())/(double)SERVER_KNOBS->VERSIONS_PER_SECOND;
+					healthyZoneTimeout = delay(timeoutSeconds);
+					if(self->healthyZone.get() != p.first) {
+						TraceEvent("MaintenanceZoneStart", self->distributorId).detail("ZoneID", printable(p.first)).detail("EndVersion", p.second).detail("Duration", timeoutSeconds);
+						self->healthyZone.set(p.first);
+					}
+				} else if(self->healthyZone.get().present()) {
+					TraceEvent("MaintenanceZoneEnd", self->distributorId);
 					self->healthyZone.set(Optional<Key>());
 				}
-			} else {
+			} else if(self->healthyZone.get().present()) {
+				TraceEvent("MaintenanceZoneEnd", self->distributorId);
 				self->healthyZone.set(Optional<Key>());
 			}
 			
@ -2757,14 +2874,14 @@ ACTOR Future<Void> serverMetricsPolling( TCServerInfo *server) {
 	state double lastUpdate = now();
 	loop {
 		wait( updateServerMetrics( server ) );
-		wait( delayUntil( lastUpdate + SERVER_KNOBS->STORAGE_METRICS_POLLING_DELAY + SERVER_KNOBS->STORAGE_METRICS_RANDOM_DELAY * deterministicRandom()->random01(), TaskDataDistributionLaunch ) );
+		wait( delayUntil( lastUpdate + SERVER_KNOBS->STORAGE_METRICS_POLLING_DELAY + SERVER_KNOBS->STORAGE_METRICS_RANDOM_DELAY * deterministicRandom()->random01(), TaskPriority::DataDistributionLaunch ) );
 		lastUpdate = now();
 	}
 }

 //Returns the KeyValueStoreType of server if it is different from self->storeType
 ACTOR Future<KeyValueStoreType> keyValueStoreTypeTracker(DDTeamCollection* self, TCServerInfo *server) {
-	state KeyValueStoreType type = wait(brokenPromiseToNever(server->lastKnownInterface.getKeyValueStoreType.getReplyWithTaskID<KeyValueStoreType>(TaskDataDistribution)));
+	state KeyValueStoreType type = wait(brokenPromiseToNever(server->lastKnownInterface.getKeyValueStoreType.getReplyWithTaskID<KeyValueStoreType>(TaskPriority::DataDistribution)));
 	if(type == self->configuration.storageServerStoreType && (self->includedDCs.empty() || std::find(self->includedDCs.begin(), self->includedDCs.end(), server->lastKnownInterface.locality.dcId()) != self->includedDCs.end()) )
 		wait(Future<Void>(Never()));

@ -2787,7 +2904,7 @@ ACTOR Future<Void> waitForAllDataRemoved( Database cx, UID serverID, Version add
 			}

 			// Wait for any change to the serverKeys for this server
-			wait( delay(SERVER_KNOBS->ALL_DATA_REMOVED_DELAY, TaskDataDistribution) );
+			wait( delay(SERVER_KNOBS->ALL_DATA_REMOVED_DELAY, TaskPriority::DataDistribution) );
 			tr.reset();
 		} catch (Error& e) {
 			wait( tr.onError(e) );
@ -2830,7 +2947,7 @@ ACTOR Future<Void> storageServerFailureTracker(
 			ASSERT(!inHealthyZone);
 			healthChanged = IFailureMonitor::failureMonitor().onStateEqual( interf.waitFailure.getEndpoint(), FailureStatus(false));
 		} else if(!inHealthyZone) {
-			healthChanged = waitFailureClientStrict(interf.waitFailure, SERVER_KNOBS->DATA_DISTRIBUTION_FAILURE_REACTION_TIME, TaskDataDistribution);
+			healthChanged = waitFailureClientStrict(interf.waitFailure, SERVER_KNOBS->DATA_DISTRIBUTION_FAILURE_REACTION_TIME, TaskPriority::DataDistribution);
 		}
 		choose {
 			when ( wait(healthChanged) ) {
@ -2840,6 +2957,7 @@ ACTOR Future<Void> storageServerFailureTracker(
 				}
 				if(status->isFailed && self->healthyZone.get().present() && self->clearHealthyZoneFuture.isReady()) {
 					self->clearHealthyZoneFuture = clearHealthyZone(self->cx);
+					TraceEvent("MaintenanceZoneCleared", self->distributorId);
 					self->healthyZone.set(Optional<Key>());
 				}

@ -2953,11 +3071,14 @@ ACTOR Future<Void> storageServerTracker(
 			if(hasWrongStoreTypeOrDC)
 				self->restartRecruiting.trigger();

-			if ( lastIsUnhealthy && !status.isUnhealthy() && !server->teams.size() ) {
+			if (lastIsUnhealthy && !status.isUnhealthy() &&
+			    server->teams.size() < SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER) {
 				self->doBuildTeams = true;
+				self->restartTeamBuilder.trigger(); // This does not trigger building teams if there exist healthy teams
 			}
 			lastIsUnhealthy = status.isUnhealthy();

+			state bool recordTeamCollectionInfo = false;
 			choose {
 				when( wait( failureTracker ) ) {
 					// The server is failed AND all data has been removed from it, so permanently remove it.
@ -3061,7 +3182,8 @@ ACTOR Future<Void> storageServerTracker(
 							self->badTeamRemover = removeBadTeams(self);
 							self->addActor.send(self->badTeamRemover);
 							// The team number changes, so we need to update the team number info
-							self->traceTeamCollectionInfo();
+							// self->traceTeamCollectionInfo();
+							recordTeamCollectionInfo = true;
 						}
 					}

@ -3069,10 +3191,13 @@ ACTOR Future<Void> storageServerTracker(
 					// We rely on the old failureTracker being actorCancelled since the old actor now has a pointer to an invalid location
 					status = ServerStatus( status.isFailed, status.isUndesired, server->lastKnownInterface.locality );

+					// self->traceTeamCollectionInfo();
+					recordTeamCollectionInfo = true;
 					//Restart the storeTracker for the new interface
 					storeTracker = keyValueStoreTypeTracker(self, server);
 					hasWrongStoreTypeOrDC = false;
 					self->restartTeamBuilder.trigger();
+
 					if(restartRecruiting)
 						self->restartRecruiting.trigger();
 				}
@ -3093,6 +3218,10 @@ ACTOR Future<Void> storageServerTracker(
 					server->wakeUpTracker = Promise<Void>();
 				}
 			}
+
+			if (recordTeamCollectionInfo) {
+				self->traceTeamCollectionInfo();
+			}
 		}
 	} catch( Error &e ) {
 		if (e.code() != error_code_actor_cancelled && errorOut.canBeSet())
@ -3120,7 +3249,7 @@ ACTOR Future<Void> monitorStorageServerRecruitment(DDTeamCollection* self) {
 			loop {
 				choose {
 					when( wait( self->recruitingStream.onChange() ) ) {}
-					when( wait( self->recruitingStream.get() == 0 ? delay(SERVER_KNOBS->RECRUITMENT_IDLE_DELAY, TaskDataDistribution) : Future<Void>(Never()) ) ) { break; }
+					when( wait( self->recruitingStream.get() == 0 ? delay(SERVER_KNOBS->RECRUITMENT_IDLE_DELAY, TaskPriority::DataDistribution) : Future<Void>(Never()) ) ) { break; }
 				}
 			}
 			TraceEvent("StorageServerRecruitment", self->distributorId)
@ -3147,12 +3276,12 @@ ACTOR Future<Void> initializeStorage( DDTeamCollection* self, RecruitStorageRepl

 	self->recruitingIds.insert(interfaceId);
 	self->recruitingLocalities.insert(candidateWorker.worker.address());
-	state ErrorOr<InitializeStorageReply> newServer = wait( candidateWorker.worker.storage.tryGetReply( isr, TaskDataDistribution ) );
+	state ErrorOr<InitializeStorageReply> newServer = wait( candidateWorker.worker.storage.tryGetReply( isr, TaskPriority::DataDistribution ) );
 	if(newServer.isError()) {
 		TraceEvent(SevWarn, "DDRecruitmentError").error(newServer.getError());
 		if( !newServer.isError( error_code_recruitment_failed ) && !newServer.isError( error_code_request_maybe_delivered ) )
 			throw newServer.getError();
-		wait( delay(SERVER_KNOBS->STORAGE_RECRUITMENT_DELAY, TaskDataDistribution) );
+		wait( delay(SERVER_KNOBS->STORAGE_RECRUITMENT_DELAY, TaskPriority::DataDistribution) );
 	}
 	self->recruitingIds.erase(interfaceId);
 	self->recruitingLocalities.erase(candidateWorker.worker.address());
@ -3217,7 +3346,7 @@ ACTOR Future<Void> storageRecruiter( DDTeamCollection* self, Reference<AsyncVar<

 			if(!fCandidateWorker.isValid() || fCandidateWorker.isReady() || rsr.excludeAddresses != lastRequest.excludeAddresses || rsr.criticalRecruitment != lastRequest.criticalRecruitment) {
 				lastRequest = rsr;
-				fCandidateWorker = brokenPromiseToNever( db->get().clusterInterface.recruitStorage.getReply( rsr, TaskDataDistribution ) );
+				fCandidateWorker = brokenPromiseToNever( db->get().clusterInterface.recruitStorage.getReply( rsr, TaskPriority::DataDistribution ) );
 			}

 			choose {
@ -3388,7 +3517,7 @@ ACTOR Future<Void> dataDistributionTeamCollection(
 ACTOR Future<Void> waitForDataDistributionEnabled( Database cx ) {
 	state Transaction tr(cx);
 	loop {
-		wait(delay(SERVER_KNOBS->DD_ENABLED_CHECK_DELAY, TaskDataDistribution));
+		wait(delay(SERVER_KNOBS->DD_ENABLED_CHECK_DELAY, TaskPriority::DataDistribution));

 		try {
 			Optional<Value> mode = wait( tr.get( dataDistributionModeKey ) );
@ -3516,7 +3645,7 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self)
 	state double lastLimited = 0;
 	self->addActor.send( monitorBatchLimitedTime(self->dbInfo, &lastLimited) );

-	state Database cx = openDBOnServer(self->dbInfo, TaskDataDistributionLaunch, true, true);
+	state Database cx = openDBOnServer(self->dbInfo, TaskPriority::DataDistributionLaunch, true, true);
 	cx->locationCacheSize = SERVER_KNOBS->DD_LOCATION_CACHE_SIZE;

 	//cx->setOption( FDBDatabaseOptions::LOCATION_CACHE_SIZE, StringRef((uint8_t*) &SERVER_KNOBS->DD_LOCATION_CACHE_SIZE, 8) );
@ -3646,7 +3775,7 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self)
 					}
 					output.send( RelocateShard( keys, unhealthy ? PRIORITY_TEAM_UNHEALTHY : PRIORITY_RECOVER_MOVE ) );
 				}
-				wait( yield(TaskDataDistribution) );
+				wait( yield(TaskPriority::DataDistribution) );
 			}

 			vector<TeamCollectionInterface> tcis;
@ -3718,7 +3847,7 @@ ACTOR Future<Void> dataDistributor(DataDistributorInterface di, Reference<AsyncV
 	state Future<Void> collection = actorCollection( self->addActor.getFuture() );

 	try {
-		TraceEvent("DataDistributor_Running", di.id());
+		TraceEvent("DataDistributorRunning", di.id());
 		self->addActor.send( waitFailureServer(di.waitFailure.getFuture()) );
 		state Future<Void> distributor = reportErrorsExcept( dataDistribution(self), "DataDistribution", di.id(), &normalDataDistributorErrors() );

@ -3736,10 +3865,10 @@ ACTOR Future<Void> dataDistributor(DataDistributorInterface di, Reference<AsyncV
 	}
 	catch ( Error &err ) {
 		if ( normalDataDistributorErrors().count(err.code()) == 0 ) {
-			TraceEvent("DataDistributor_Error", di.id()).error(err, true);
+			TraceEvent("DataDistributorError", di.id()).error(err, true);
 			throw err;
 		}
-		TraceEvent("DataDistributor_Died", di.id()).error(err, true);
+		TraceEvent("DataDistributorDied", di.id()).error(err, true);
 	}

 	return Void();
@ -3842,7 +3971,7 @@ TEST_CASE("DataDistribution/AddTeamsBestOf/UseMachineID") {
 	Reference<IReplicationPolicy> policy = Reference<IReplicationPolicy>(new PolicyAcross(teamSize, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())));
 	state DDTeamCollection* collection = testMachineTeamCollection(teamSize, policy, processSize);

-	collection->addTeamsBestOf(30, desiredTeams, maxTeams);
+	collection->addTeamsBestOf(30, desiredTeams, maxTeams, 30);

 	ASSERT(collection->sanityCheckTeams() == true);

@ -3867,8 +3996,8 @@ TEST_CASE("DataDistribution/AddTeamsBestOf/NotUseMachineID") {
 		return Void();
 	}

-	collection->addBestMachineTeams(30); // Create machine teams to help debug
-	collection->addTeamsBestOf(30, desiredTeams, maxTeams);
+	collection->addBestMachineTeams(30, 30); // Create machine teams to help debug
+	collection->addTeamsBestOf(30, desiredTeams, maxTeams, 30);
 	collection->sanityCheckTeams(); // Server team may happen to be on the same machine team, although unlikely

 	if (collection) delete (collection);
@ -3883,7 +4012,7 @@ TEST_CASE("DataDistribution/AddAllTeams/isExhaustive") {
 	state int maxTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * processSize;
 	state DDTeamCollection* collection = testTeamCollection(3, policy, processSize);

-	int result = collection->addTeamsBestOf(200, desiredTeams, maxTeams);
+	int result = collection->addTeamsBestOf(200, desiredTeams, maxTeams, 200);

 	delete(collection);

@ -3903,11 +4032,11 @@ TEST_CASE("/DataDistribution/AddAllTeams/withLimit") {

 	state DDTeamCollection* collection = testTeamCollection(3, policy, processSize);

-	int result = collection->addTeamsBestOf(10, desiredTeams, maxTeams);
+	int result = collection->addTeamsBestOf(10, desiredTeams, maxTeams, 10);

 	delete(collection);

-	ASSERT(result == 10);
+	ASSERT(result >= 10);

 	return Void();
 }
@ -3923,9 +4052,9 @@ TEST_CASE("/DataDistribution/AddTeamsBestOf/SkippingBusyServers") {
 	collection->addTeam(std::set<UID>({ UID(1, 0), UID(2, 0), UID(3, 0) }), true);
 	collection->addTeam(std::set<UID>({ UID(1, 0), UID(3, 0), UID(4, 0) }), true);

-	int result = collection->addTeamsBestOf(8, desiredTeams, maxTeams);
+	int result = collection->addTeamsBestOf(8, desiredTeams, maxTeams, 8);

-	ASSERT(result == 8);
+	ASSERT(result >= 8);

 	for(auto process = collection->server_info.begin(); process != collection->server_info.end(); process++) {
 		auto teamCount = process->second->teams.size();
@ -3953,8 +4082,8 @@ TEST_CASE("/DataDistribution/AddTeamsBestOf/NotEnoughServers") {
 	collection->addTeam(std::set<UID>({ UID(1, 0), UID(2, 0), UID(3, 0) }), true);
 	collection->addTeam(std::set<UID>({ UID(1, 0), UID(3, 0), UID(4, 0) }), true);

-	collection->addBestMachineTeams(10);
-	int result = collection->addTeamsBestOf(10, desiredTeams, maxTeams);
+	collection->addBestMachineTeams(10, 10);
+	int result = collection->addTeamsBestOf(10, desiredTeams, maxTeams, 10);

 	if (collection->machineTeams.size() != 10 || result != 8) {
 		collection->traceAllInfo(true); // Debug message
--- a/fdbserver/DataDistributionQueue.actor.cpp
+++ b/fdbserver/DataDistributionQueue.actor.cpp
@ -512,9 +512,9 @@ struct DDQueueData {

 		// FIXME: is the merge case needed
 		if( input.priority == PRIORITY_MERGE_SHARD ) {
-			wait( delay( 0.5, TaskDataDistribution - 2 ) );
+			wait( delay( 0.5, decrementPriority(decrementPriority(TaskPriority::DataDistribution )) ) );
 		} else {
-			wait( delay( 0.0001, TaskDataDistributionLaunch ) );
+			wait( delay( 0.0001, TaskPriority::DataDistributionLaunch ) );
 		}

 		loop {
@ -933,7 +933,7 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
 				    .detail("Count", stuckCount)
 				    .detail("TeamCollectionId", tciIndex)
 				    .detail("NumOfTeamCollections", self->teamCollections.size());
-				wait( delay( SERVER_KNOBS->BEST_TEAM_STUCK_DELAY, TaskDataDistributionLaunch ) );
+				wait( delay( SERVER_KNOBS->BEST_TEAM_STUCK_DELAY, TaskPriority::DataDistributionLaunch ) );
 			}

 			state std::vector<UID> destIds;
@ -993,7 +993,7 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
 			state Error error = success();
 			state Promise<Void> dataMovementComplete;
 			state Future<Void> doMoveKeys = moveKeys(self->cx, rd.keys, destIds, healthyIds, self->lock, dataMovementComplete, &self->startMoveKeysParallelismLock, &self->finishMoveKeysParallelismLock, self->teamCollections.size() > 1, relocateShardInterval.pairID );
-			state Future<Void> pollHealth = signalledTransferComplete ? Never() : delay( SERVER_KNOBS->HEALTH_POLL_TIME, TaskDataDistributionLaunch );
+			state Future<Void> pollHealth = signalledTransferComplete ? Never() : delay( SERVER_KNOBS->HEALTH_POLL_TIME, TaskPriority::DataDistributionLaunch );
 			try {
 				loop {
 					choose {
@ -1016,7 +1016,7 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
 									self->dataTransferComplete.send(rd);
 								}
 							}
-							pollHealth = signalledTransferComplete ? Never() : delay( SERVER_KNOBS->HEALTH_POLL_TIME, TaskDataDistributionLaunch );
+							pollHealth = signalledTransferComplete ? Never() : delay( SERVER_KNOBS->HEALTH_POLL_TIME, TaskPriority::DataDistributionLaunch );
 						}
 						when( wait( signalledTransferComplete ? Never() : dataMovementComplete.getFuture() ) ) {
 							self->fetchKeysComplete.insert( rd );
@ -1066,7 +1066,7 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
 			} else {
 				TEST(true);  // move to removed server
 				healthyDestinations.addDataInFlightToTeam( -metrics.bytes );
-				wait( delay( SERVER_KNOBS->RETRY_RELOCATESHARD_DELAY, TaskDataDistributionLaunch ) );
+				wait( delay( SERVER_KNOBS->RETRY_RELOCATESHARD_DELAY, TaskPriority::DataDistributionLaunch ) );
 			}
 		}
 	} catch (Error& e) {
@ -1125,7 +1125,7 @@ ACTOR Future<Void> BgDDMountainChopper( DDQueueData* self, int teamCollectionInd
 	state double checkDelay = SERVER_KNOBS->BG_DD_POLLING_INTERVAL;
 	state int resetCount = SERVER_KNOBS->DD_REBALANCE_RESET_AMOUNT;
 	loop {
-		wait( delay(checkDelay, TaskDataDistributionLaunch) );
+		wait( delay(checkDelay, TaskPriority::DataDistributionLaunch) );
 		if (self->priority_relocations[PRIORITY_REBALANCE_OVERUTILIZED_TEAM] < SERVER_KNOBS->DD_REBALANCE_PARALLELISM) {
 			state Optional<Reference<IDataDistributionTeam>> randomTeam = wait( brokenPromiseToNever( self->teamCollections[teamCollectionIndex].getTeam.getReply( GetTeamRequest( true, false, true ) ) ) );
 			if( randomTeam.present() ) {
@ -1160,7 +1160,7 @@ ACTOR Future<Void> BgDDValleyFiller( DDQueueData* self, int teamCollectionIndex)
 	state double checkDelay = SERVER_KNOBS->BG_DD_POLLING_INTERVAL;
 	state int resetCount = SERVER_KNOBS->DD_REBALANCE_RESET_AMOUNT;
 	loop {
-		wait( delay(checkDelay, TaskDataDistributionLaunch) );
+		wait( delay(checkDelay, TaskPriority::DataDistributionLaunch) );
 		if (self->priority_relocations[PRIORITY_REBALANCE_UNDERUTILIZED_TEAM] < SERVER_KNOBS->DD_REBALANCE_PARALLELISM) {
 			state Optional<Reference<IDataDistributionTeam>> randomTeam = wait( brokenPromiseToNever( self->teamCollections[teamCollectionIndex].getTeam.getReply( GetTeamRequest( true, false, false ) ) ) );
 			if( randomTeam.present() ) {
@ -1244,7 +1244,7 @@ ACTOR Future<Void> dataDistributionQueue(
 					bool wasEmpty = serversToLaunchFrom.empty();
 					self.queueRelocation( rs, serversToLaunchFrom );
 					if(wasEmpty && !serversToLaunchFrom.empty())
-						launchQueuedWorkTimeout = delay(0, TaskDataDistributionLaunch);
+						launchQueuedWorkTimeout = delay(0, TaskPriority::DataDistributionLaunch);
 				}
 				when ( wait(launchQueuedWorkTimeout) ) {
 					self.launchQueuedWork( serversToLaunchFrom );
@ -1258,7 +1258,7 @@ ACTOR Future<Void> dataDistributionQueue(
 				when ( RelocateData done = waitNext( self.dataTransferComplete.getFuture() ) ) {
 					complete( done, self.busymap );
 					if(serversToLaunchFrom.empty() && !done.src.empty())
-						launchQueuedWorkTimeout = delay(0, TaskDataDistributionLaunch);
+						launchQueuedWorkTimeout = delay(0, TaskPriority::DataDistributionLaunch);
 					serversToLaunchFrom.insert(done.src.begin(), done.src.end());
 				}
 				when ( RelocateData done = waitNext( self.relocationComplete.getFuture() ) ) {
@ -1266,7 +1266,7 @@ ACTOR Future<Void> dataDistributionQueue(
 					self.finishRelocation(done.priority);
 					self.fetchKeysComplete.erase( done );
 					//self.logRelocation( done, "ShardRelocatorDone" );
-					actors.add( tag( delay(0, TaskDataDistributionLaunch), done.keys, rangesComplete ) );
+					actors.add( tag( delay(0, TaskPriority::DataDistributionLaunch), done.keys, rangesComplete ) );
 					if( g_network->isSimulated() && debug_isCheckRelocationDuration() && now() - done.startTime > 60 ) {
 						TraceEvent(SevWarnAlways, "RelocationDurationTooLong").detail("Duration", now() - done.startTime);
 						debug_setCheckRelocationDuration(false);
--- a/fdbserver/DataDistributionTracker.actor.cpp
+++ b/fdbserver/DataDistributionTracker.actor.cpp
@ -140,7 +140,7 @@ ACTOR Future<Void> trackShardBytes(
 		Reference<AsyncVar<Optional<StorageMetrics>>> shardSize,
 		bool addToSizeEstimate = true)
 {
-	wait( delay( 0, TaskDataDistribution ) );
+	wait( delay( 0, TaskPriority::DataDistribution ) );

 	/*TraceEvent("TrackShardBytesStarting")
 		.detail("TrackerID", trackerID)
@ -260,7 +260,7 @@ ACTOR Future<Void> changeSizes( DataDistributionTracker* self, KeyRangeRef keys,
 	}

 	wait( waitForAll( sizes ) );
-	wait( yield(TaskDataDistribution) );
+	wait( yield(TaskPriority::DataDistribution) );

 	int64_t newShardsStartingSize = 0;
 	for ( int i = 0; i < sizes.size(); i++ )
@ -281,7 +281,7 @@ struct HasBeenTrueFor : NonCopyable {
 	Future<Void> set() {
 		if( !trigger.isValid() ) {
 			cleared = Promise<Void>();
-			trigger = delayJittered( SERVER_KNOBS->DD_MERGE_COALESCE_DELAY, TaskDataDistribution - 1 ) || cleared.getFuture();
+			trigger = delayJittered( SERVER_KNOBS->DD_MERGE_COALESCE_DELAY, decrementPriority(TaskPriority::DataDistribution) ) || cleared.getFuture();
 		}
 		return trigger;
 	}
@ -361,7 +361,7 @@ ACTOR Future<Void> shardSplitter(

 		self->sizeChanges.add( changeSizes( self, keys, shardSize->get().get().bytes ) );
 	} else {
-		wait( delay(1.0, TaskDataDistribution) ); //In case the reason the split point was off was due to a discrepancy between storage servers
+		wait( delay(1.0, TaskPriority::DataDistribution) ); //In case the reason the split point was off was due to a discrepancy between storage servers
 	}
 	return Void();
 }
@ -529,7 +529,7 @@ ACTOR Future<Void> shardTracker(
 		wait( yieldedFuture(self->maxShardSize->onChange()) );

 	// Since maxShardSize will become present for all shards at once, avoid slow tasks with a short delay
-	wait( delay( 0, TaskDataDistribution ) );
+	wait( delay( 0, TaskPriority::DataDistribution ) );

 	/*TraceEvent("ShardTracker", self->distributorId)
 		.detail("Begin", keys.begin)
@ -546,7 +546,7 @@ ACTOR Future<Void> shardTracker(

 			// We could have a lot of actors being released from the previous wait at the same time. Immediately calling
 			// delay(0) mitigates the resulting SlowTask
-			wait( delay(0, TaskDataDistribution) );
+			wait( delay(0, TaskPriority::DataDistribution) );
 		}
 	} catch (Error& e) {
 		if (e.code() != error_code_actor_cancelled)
@ -593,12 +593,12 @@ ACTOR Future<Void> trackInitialShards(DataDistributionTracker *self, Reference<I

 	//This line reduces the priority of shard initialization to prevent interference with failure monitoring.
 	//SOMEDAY: Figure out what this priority should actually be
-	wait( delay( 0.0, TaskDataDistribution ) );
+	wait( delay( 0.0, TaskPriority::DataDistribution ) );

 	state int s;
 	for(s=0; s<initData->shards.size()-1; s++) {
 		restartShardTrackers( self, KeyRangeRef( initData->shards[s].key, initData->shards[s+1].key ) );
-		wait( yield( TaskDataDistribution ) );
+		wait( yield( TaskPriority::DataDistribution ) );
 	}

 	Future<Void> initialSize = changeSizes( self, KeyRangeRef(allKeys.begin, allKeys.end), 0 );
--- a/fdbserver/KeyValueStoreSQLite.actor.cpp
+++ b/fdbserver/KeyValueStoreSQLite.actor.cpp
@ -1937,8 +1937,8 @@ KeyValueStoreSQLite::KeyValueStoreSQLite(std::string const& filename, UID id, Ke
 	readCursors.resize(64); //< number of read threads

 	sqlite3_soft_heap_limit64( SERVER_KNOBS->SOFT_HEAP_LIMIT );  // SOMEDAY: Is this a performance issue?  Should we drop the cache sizes for individual threads?
-	int taskId = g_network->getCurrentTask();
-	g_network->setCurrentTask(TaskDiskWrite);
+	TaskPriority taskId = g_network->getCurrentTask();
+	g_network->setCurrentTask(TaskPriority::DiskWrite);
 	writeThread->addThread( new Writer(filename, type==KeyValueStoreType::SSD_BTREE_V2, checkChecksums, checkIntegrity, writesComplete, springCleaningStats, diskBytesUsed, freeListPages, id, &readCursors) );
 	g_network->setCurrentTask(taskId);
 	auto p = new Writer::InitAction();
@ -1963,8 +1963,8 @@ StorageBytes KeyValueStoreSQLite::getStorageBytes() {

 void KeyValueStoreSQLite::startReadThreads() {
 	int nReadThreads = readCursors.size();
-	int taskId = g_network->getCurrentTask();
-	g_network->setCurrentTask(TaskDiskRead);
+	TaskPriority taskId = g_network->getCurrentTask();
+	g_network->setCurrentTask(TaskPriority::DiskRead);
 	for(int i=0; i<nReadThreads; i++)
 		readThreads->addThread( new Reader(filename, type==KeyValueStoreType::SSD_BTREE_V2, readsComplete, logID, &readCursors[i]) );
 	g_network->setCurrentTask(taskId);
--- a/fdbserver/Knobs.cpp
+++ b/fdbserver/Knobs.cpp
@ -68,6 +68,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
 	init( MAX_QUEUE_COMMIT_BYTES,                               15e6 ); if( randomize && BUGGIFY ) MAX_QUEUE_COMMIT_BYTES = 5000;
 	init( VERSIONS_PER_BATCH,                 VERSIONS_PER_SECOND/20 ); if( randomize && BUGGIFY ) VERSIONS_PER_BATCH = std::max<int64_t>(1,VERSIONS_PER_SECOND/1000);
 	init( CONCURRENT_LOG_ROUTER_READS,                             1 );
+	init( LOG_ROUTER_PEEK_FROM_SATELLITES_PREFERRED,               1 ); if( randomize && BUGGIFY ) LOG_ROUTER_PEEK_FROM_SATELLITES_PREFERRED = 0;
 	init( DISK_QUEUE_ADAPTER_MIN_SWITCH_TIME,                    1.0 );
 	init( DISK_QUEUE_ADAPTER_MAX_SWITCH_TIME,                    5.0 );
 	init( TLOG_SPILL_REFERENCE_MAX_PEEK_MEMORY_BYTES,            2e9 ); if ( randomize && BUGGIFY ) TLOG_SPILL_REFERENCE_MAX_PEEK_MEMORY_BYTES = 2e6;
@ -282,6 +283,8 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
 	init( PROXY_SPIN_DELAY,                                     0.01 );
 	init( UPDATE_REMOTE_LOG_VERSION_INTERVAL,                    2.0 );
 	init( MAX_TXS_POP_VERSION_HISTORY,                           1e5 );
+	init( PROXY_FORWARD_DELAY,                                  10.0 );
+	init( MAX_FORWARD_MESSAGES,                                  1e6 ); if( randomize && BUGGIFY ) MAX_FORWARD_MESSAGES = 10;

 	// Master Server
 	// masterCommitter() in the master server will allow lower priority tasks (e.g. DataDistibution)
@ -393,6 +396,15 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
 	init( MAX_TL_SS_VERSION_DIFFERENCE_BATCH,                   1e99 );
 	init( MAX_MACHINES_FALLING_BEHIND,                             1 );

+	init( MAX_TPS_HISTORY_SAMPLES,                               600 );
+	init( NEEDED_TPS_HISTORY_SAMPLES,                            200 );
+	init( TARGET_DURABILITY_LAG_VERSIONS,                      200e6 );
+	init( TARGET_DURABILITY_LAG_VERSIONS_BATCH,                100e6 );
+	init( DURABILITY_LAG_UNLIMITED_THRESHOLD,                   50e6 );
+	init( INITIAL_DURABILITY_LAG_MULTIPLIER,                    1.02 );
+	init( DURABILITY_LAG_REDUCTION_RATE,                      0.9999 );
+	init( DURABILITY_LAG_INCREASE_RATE,                        1.001 );
+
 	//Storage Metrics
 	init( STORAGE_METRICS_AVERAGE_INTERVAL,                    120.0 );
 	init( STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS,        1000.0 / STORAGE_METRICS_AVERAGE_INTERVAL );  // milliHz!
@ -411,6 +423,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
 	init( BUGGIFY_BLOCK_BYTES,                                 10000 );
 	init( STORAGE_COMMIT_BYTES,                             10000000 ); if( randomize && BUGGIFY ) STORAGE_COMMIT_BYTES = 2000000;
 	init( STORAGE_DURABILITY_LAG_REJECT_THRESHOLD,              0.25 );
+	init( STORAGE_DURABILITY_LAG_MIN_RATE,                       0.1 );
 	init( STORAGE_COMMIT_INTERVAL,                               0.5 ); if( randomize && BUGGIFY ) STORAGE_COMMIT_INTERVAL = 2.0;
 	init( UPDATE_SHARD_VERSION_INTERVAL,                        0.25 ); if( randomize && BUGGIFY ) UPDATE_SHARD_VERSION_INTERVAL = 1.0;
 	init( BYTE_SAMPLING_FACTOR,                                  250 ); //cannot buggify because of differences in restarting tests
--- a/fdbserver/Knobs.h
+++ b/fdbserver/Knobs.h
@ -72,6 +72,7 @@ public:
 	int64_t MAX_QUEUE_COMMIT_BYTES;
 	int64_t VERSIONS_PER_BATCH;
 	int CONCURRENT_LOG_ROUTER_READS;
+	int LOG_ROUTER_PEEK_FROM_SATELLITES_PREFERRED; // 0==peek from primary, non-zero==peek from satellites
 	double DISK_QUEUE_ADAPTER_MIN_SWITCH_TIME;
 	double DISK_QUEUE_ADAPTER_MAX_SWITCH_TIME;
 	int64_t TLOG_SPILL_REFERENCE_MAX_PEEK_MEMORY_BYTES;
@ -227,6 +228,8 @@ public:
 	double PROXY_SPIN_DELAY;
 	double UPDATE_REMOTE_LOG_VERSION_INTERVAL;
 	int MAX_TXS_POP_VERSION_HISTORY;
+	double PROXY_FORWARD_DELAY;
+	int MAX_FORWARD_MESSAGES;

 	// Master Server
 	double COMMIT_SLEEP_TIME;
@ -329,6 +332,15 @@ public:
 	double MAX_TL_SS_VERSION_DIFFERENCE_BATCH;
 	int MAX_MACHINES_FALLING_BEHIND;

+	int MAX_TPS_HISTORY_SAMPLES;
+	int NEEDED_TPS_HISTORY_SAMPLES;
+	int64_t TARGET_DURABILITY_LAG_VERSIONS;
+	int64_t TARGET_DURABILITY_LAG_VERSIONS_BATCH;
+	int64_t DURABILITY_LAG_UNLIMITED_THRESHOLD;
+	double INITIAL_DURABILITY_LAG_MULTIPLIER;
+	double DURABILITY_LAG_REDUCTION_RATE;
+	double DURABILITY_LAG_INCREASE_RATE;
+
 	//Storage Metrics
 	double STORAGE_METRICS_AVERAGE_INTERVAL;
 	double STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS;
@ -349,6 +361,7 @@ public:
 	int64_t STORAGE_DURABILITY_LAG_HARD_MAX;
 	int64_t STORAGE_DURABILITY_LAG_SOFT_MAX;
 	double STORAGE_DURABILITY_LAG_REJECT_THRESHOLD;
+	double STORAGE_DURABILITY_LAG_MIN_RATE;
 	int STORAGE_COMMIT_BYTES;
 	double STORAGE_COMMIT_INTERVAL;
 	double UPDATE_SHARD_VERSION_INTERVAL;
--- a/fdbserver/LeaderElection.actor.cpp
+++ b/fdbserver/LeaderElection.actor.cpp
@ -30,7 +30,7 @@ Optional<std::pair<LeaderInfo, bool>> getLeader( const vector<Optional<LeaderInf
 ACTOR Future<Void> submitCandidacy( Key key, LeaderElectionRegInterface coord, LeaderInfo myInfo, UID prevChangeID, Reference<AsyncVar<vector<Optional<LeaderInfo>>>> nominees, int index ) {
 	loop {
 		auto const& nom = nominees->get()[index];
-		Optional<LeaderInfo> li = wait( retryBrokenPromise( coord.candidacy, CandidacyRequest( key, myInfo, nom.present() ? nom.get().changeID : UID(), prevChangeID ), TaskCoordinationReply ) );
+		Optional<LeaderInfo> li = wait( retryBrokenPromise( coord.candidacy, CandidacyRequest( key, myInfo, nom.present() ? nom.get().changeID : UID(), prevChangeID ), TaskPriority::CoordinationReply ) );

 		if (li != nominees->get()[index]) {
 			vector<Optional<LeaderInfo>> v = nominees->get();
@ -150,7 +150,7 @@ ACTOR Future<Void> tryBecomeLeaderInternal(ServerCoordinators coordinators, Valu
 			// we might be breaking the leader election process for someone with better communications but lower ID, so change IDs.
 			if ((!leader.present() || !leader.get().second) && std::count( nominees->get().begin(), nominees->get().end(), myInfo )) {
 				if (!badCandidateTimeout.isValid())
-					badCandidateTimeout = delay( SERVER_KNOBS->POLLING_FREQUENCY*2, TaskCoordinationReply );
+					badCandidateTimeout = delay( SERVER_KNOBS->POLLING_FREQUENCY*2, TaskPriority::CoordinationReply );
 			} else
 				badCandidateTimeout = Future<Void>();

@ -183,12 +183,12 @@ ACTOR Future<Void> tryBecomeLeaderInternal(ServerCoordinators coordinators, Valu
 		state vector<Future<Void>> true_heartbeats;
 		state vector<Future<Void>> false_heartbeats;
 		for(int i=0; i<coordinators.leaderElectionServers.size(); i++) {
-			Future<bool> hb = retryBrokenPromise( coordinators.leaderElectionServers[i].leaderHeartbeat, LeaderHeartbeatRequest( coordinators.clusterKey, myInfo, prevChangeID ), TaskCoordinationReply );
+			Future<bool> hb = retryBrokenPromise( coordinators.leaderElectionServers[i].leaderHeartbeat, LeaderHeartbeatRequest( coordinators.clusterKey, myInfo, prevChangeID ), TaskPriority::CoordinationReply );
 			true_heartbeats.push_back( onEqual(hb, true) );
 			false_heartbeats.push_back( onEqual(hb, false) );
 		}

-		state Future<Void> rate = delay( SERVER_KNOBS->HEARTBEAT_FREQUENCY, TaskCoordinationReply ) || asyncPriorityInfo->onChange(); // SOMEDAY: Move to server side?
+		state Future<Void> rate = delay( SERVER_KNOBS->HEARTBEAT_FREQUENCY, TaskPriority::CoordinationReply ) || asyncPriorityInfo->onChange(); // SOMEDAY: Move to server side?

 		choose {
 			when ( wait( quorum( true_heartbeats, true_heartbeats.size()/2+1 ) ) ) {
--- a/fdbserver/LogRouter.actor.cpp
+++ b/fdbserver/LogRouter.actor.cpp
@ -51,7 +51,7 @@ struct LogRouterData {
 		}

 		// Erase messages not needed to update *from* versions >= before (thus, messages with toversion <= before)
-		ACTOR Future<Void> eraseMessagesBefore( TagData *self, Version before, LogRouterData *tlogData, int taskID ) {
+		ACTOR Future<Void> eraseMessagesBefore( TagData *self, Version before, LogRouterData *tlogData, TaskPriority taskID ) {
 			while(!self->version_messages.empty() && self->version_messages.front().first < before) {
 				Version version = self->version_messages.front().first;
 				int64_t messagesErased = 0;
@ -68,7 +68,7 @@ struct LogRouterData {
 			return Void();
 		}

-		Future<Void> eraseMessagesBefore(Version before, LogRouterData *tlogData, int taskID) {
+		Future<Void> eraseMessagesBefore(Version before, LogRouterData *tlogData, TaskPriority taskID) {
 			return eraseMessagesBefore(this, before, tlogData, taskID);
 		}
 	};
@ -197,7 +197,7 @@ ACTOR Future<Void> waitForVersion( LogRouterData *self, Version ver ) {
 		while(self->minPopped.get() + SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS < ver) {
 			if(self->minPopped.get() + SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS > self->version.get()) {
 				self->version.set( self->minPopped.get() + SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS );
-				wait(yield(TaskTLogCommit));
+				wait(yield(TaskPriority::TLogCommit));
 			} else {
 				wait(self->minPopped.whenAtLeast((self->minPopped.get()+1)));
 			}
@ -220,7 +220,7 @@ ACTOR Future<Void> pullAsyncData( LogRouterData *self ) {
 	loop {
 		loop {
 			choose {
-				when(wait( r ? r->getMore(TaskTLogCommit) : Never() ) ) {
+				when(wait( r ? r->getMore(TaskPriority::TLogCommit) : Never() ) ) {
 					break;
 				}
 				when( wait( dbInfoChange ) ) { //FIXME: does this actually happen?
@ -247,7 +247,7 @@ ACTOR Future<Void> pullAsyncData( LogRouterData *self ) {

 					commitMessages(self, ver, messages);
 					self->version.set( ver );
-					wait(yield(TaskTLogCommit));
+					wait(yield(TaskPriority::TLogCommit));
 					//TraceEvent("LogRouterVersion").detail("Ver",ver);
 				}
 				lastVer = ver;
@ -260,7 +260,7 @@ ACTOR Future<Void> pullAsyncData( LogRouterData *self ) {
 						wait( waitForVersion(self, ver) );

 						self->version.set( ver );
-						wait(yield(TaskTLogCommit));
+						wait(yield(TaskPriority::TLogCommit));
 					}
 					break;
 				}
@ -371,7 +371,7 @@ ACTOR Future<Void> logRouterPop( LogRouterData* self, TLogPopRequest req ) {
 	} else if (req.to > tagData->popped) {
 		tagData->popped = req.to;
 		tagData->durableKnownCommittedVersion = req.durableKnownCommittedVersion;
-		wait(tagData->eraseMessagesBefore( req.to, self, TaskTLogPop ));
+		wait(tagData->eraseMessagesBefore( req.to, self, TaskPriority::TLogPop ));
 	}

 	state Version minPopped = std::numeric_limits<Version>::max();
@ -385,7 +385,7 @@ ACTOR Future<Void> logRouterPop( LogRouterData* self, TLogPopRequest req ) {

 	while(!self->messageBlocks.empty() && self->messageBlocks.front().first < minPopped) {
 		self->messageBlocks.pop_front();
-		wait(yield(TaskTLogPop));
+		wait(yield(TaskPriority::TLogPop));
 	}

 	self->poppedVersion = std::min(minKnownCommittedVersion, self->minKnownCommittedVersion);
--- a/fdbserver/LogSystem.h
+++ b/fdbserver/LogSystem.h
@ -89,9 +89,9 @@ public:
 		return result;
 	}

-	void populateSatelliteTagLocations(int logRouterTags, int oldLogRouterTags) {
+	void populateSatelliteTagLocations(int logRouterTags, int oldLogRouterTags, int txsTags, int oldTxsTags) {
 		satelliteTagLocations.clear();
-		satelliteTagLocations.resize(std::max(logRouterTags,oldLogRouterTags) + 1);
+		satelliteTagLocations.resize(std::max({logRouterTags,oldLogRouterTags,txsTags,oldTxsTags})+1);

 		std::map<int,int> server_usedBest;
 		std::set<std::pair<int,int>> used_servers;
@ -235,7 +235,7 @@ public:
 	                      bool allLocations = false) {
 		if(locality == tagLocalitySatellite) {
 			for(auto& t : tags) {
-				if(t == txsTag || t.locality == tagLocalityLogRouter) {
+				if(t == txsTag || t.locality == tagLocalityTxs || t.locality == tagLocalityLogRouter) {
 					for(int loc : satelliteTagLocations[t == txsTag ? 0 : t.id + 1]) {
 						locations.push_back(locationOffset + loc);
 					}
@ -341,7 +341,7 @@ struct ILogSystem {

 		//returns immediately if hasMessage() returns true.
 		//returns when either the result of hasMessage() or version() has changed, or a cursor has internally been exhausted.
-		virtual Future<Void> getMore(int taskID = TaskTLogPeekReply) = 0;
+		virtual Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply) = 0;

 		//returns when the failure monitor detects that the servers associated with the cursor are failed
 		virtual Future<Void> onFailed() = 0;
@ -407,7 +407,7 @@ struct ILogSystem {
 		virtual StringRef getMessageWithTags();
 		virtual const std::vector<Tag>& getTags();
 		virtual void advanceTo(LogMessageVersion n);
-		virtual Future<Void> getMore(int taskID = TaskTLogPeekReply);
+		virtual Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply);
 		virtual Future<Void> onFailed();
 		virtual bool isActive();
 		virtual bool isExhausted();
@ -455,7 +455,7 @@ struct ILogSystem {
 		virtual StringRef getMessageWithTags();
 		virtual const std::vector<Tag>& getTags();
 		virtual void advanceTo(LogMessageVersion n);
-		virtual Future<Void> getMore(int taskID = TaskTLogPeekReply);
+		virtual Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply);
 		virtual Future<Void> onFailed();
 		virtual bool isActive();
 		virtual bool isExhausted();
@ -500,7 +500,7 @@ struct ILogSystem {
 		virtual StringRef getMessageWithTags();
 		virtual const std::vector<Tag>& getTags();
 		virtual void advanceTo(LogMessageVersion n);
-		virtual Future<Void> getMore(int taskID = TaskTLogPeekReply);
+		virtual Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply);
 		virtual Future<Void> onFailed();
 		virtual bool isActive();
 		virtual bool isExhausted();
@ -521,8 +521,9 @@ struct ILogSystem {
 		std::vector<Reference<IPeekCursor>> cursors;
 		std::vector<LogMessageVersion> epochEnds;
 		Version poppedVersion;
+		bool needsPopped;

-		MultiCursor( std::vector<Reference<IPeekCursor>> cursors, std::vector<LogMessageVersion> epochEnds );
+		MultiCursor( std::vector<Reference<IPeekCursor>> cursors, std::vector<LogMessageVersion> epochEnds, bool needsPopped = true );

 		virtual Reference<IPeekCursor> cloneNoMore();
 		virtual void setProtocolVersion( ProtocolVersion version );
@ -534,7 +535,7 @@ struct ILogSystem {
 		virtual StringRef getMessageWithTags();
 		virtual const std::vector<Tag>& getTags();
 		virtual void advanceTo(LogMessageVersion n);
-		virtual Future<Void> getMore(int taskID = TaskTLogPeekReply);
+		virtual Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply);
 		virtual Future<Void> onFailed();
 		virtual bool isActive();
 		virtual bool isExhausted();
@ -576,13 +577,14 @@ struct ILogSystem {
 		LogMessageVersion messageVersion;
 		Version end;
 		bool hasNextMessage;
+		bool withTags;

 		//FIXME: collectTags is needed to support upgrades from 5.X to 6.0. Remove this code when we no longer support that upgrade.
 		bool collectTags;
 		std::vector<Tag> tags;
 		void combineMessages();

-		BufferedCursor( std::vector<Reference<IPeekCursor>> cursors, Version begin, Version end, bool collectTags );
+		BufferedCursor( std::vector<Reference<IPeekCursor>> cursors, Version begin, Version end, bool withTags, bool collectTags = false );

 		virtual Reference<IPeekCursor> cloneNoMore();
 		virtual void setProtocolVersion( ProtocolVersion version );
@ -594,7 +596,7 @@ struct ILogSystem {
 		virtual StringRef getMessageWithTags();
 		virtual const std::vector<Tag>& getTags();
 		virtual void advanceTo(LogMessageVersion n);
-		virtual Future<Void> getMore(int taskID = TaskTLogPeekReply);
+		virtual Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply);
 		virtual Future<Void> onFailed();
 		virtual bool isActive();
 		virtual bool isExhausted();
@ -653,13 +655,15 @@ struct ILogSystem {
 		// Same contract as peek(), but can only peek from the logs elected in the same generation.
 		// If the preferred log server is down, a different log from the same generation will merge results locally before sending them to the log router.

-	virtual Reference<IPeekCursor> peekSpecial( UID dbgid, Version begin, Tag tag, int8_t peekLocality, Version localEnd ) = 0;
-		// Same contract as peek(), but it allows specifying a preferred peek locality for tags that do not have locality
+	virtual Reference<IPeekCursor> peekTxs( UID dbgid, Version begin, int8_t peekLocality, Version localEnd ) = 0;
+		// Same contract as peek(), but only for peeking the txsLocality. It allows specifying a preferred peek locality.

 	virtual Version getKnownCommittedVersion() = 0;

 	virtual Future<Void> onKnownCommittedVersionChange() = 0;

+	virtual void popTxs( Version upTo, int8_t popLocality = tagLocalityInvalid ) = 0;
+
 	virtual void pop( Version upTo, Tag tag, Version knownCommittedVersion = 0, int8_t popLocality = tagLocalityInvalid ) = 0;
 		// Permits, but does not require, the log subsystem to strip `tag` from any or all messages with message versions < (upTo,0)
 		// The popping of any given message may be arbitrarily delayed.
@ -706,6 +710,8 @@ struct ILogSystem {

 	virtual Tag getRandomRouterTag() = 0;

+	virtual Tag getRandomTxsTag() = 0;
+
 	virtual void stopRejoins() = 0;

 	// Returns the pseudo tag to be popped for the given process class. If the
@ -753,6 +759,10 @@ struct LogPushData : NonCopyable {
 		}
 	}

+	void addTxsTag() {
+		next_message_tags.push_back( logSystem->getRandomTxsTag() );
+	}
+
 	// addTag() adds a tag for the *next* message to be added
 	void addTag( Tag tag ) {
 		next_message_tags.push_back( tag );
--- a/fdbserver/LogSystemConfig.h
+++ b/fdbserver/LogSystemConfig.h
@ -157,9 +157,10 @@ struct OldTLogConf {
 	std::vector<TLogSet> tLogs;
 	Version epochEnd;
 	int32_t logRouterTags;
+	int32_t txsTags;
 	std::set<int8_t> pseudoLocalities;

-	OldTLogConf() : epochEnd(0), logRouterTags(0) {}
+	OldTLogConf() : epochEnd(0), logRouterTags(0), txsTags(0) {}
 	explicit OldTLogConf(const OldLogData&);

 	std::string toString() const {
@ -167,7 +168,7 @@ struct OldTLogConf {
 	}

 	bool operator == ( const OldTLogConf& rhs ) const {
-		return tLogs == rhs.tLogs && epochEnd == rhs.epochEnd && logRouterTags == rhs.logRouterTags && pseudoLocalities == rhs.pseudoLocalities;
+		return tLogs == rhs.tLogs && epochEnd == rhs.epochEnd && logRouterTags == rhs.logRouterTags && txsTags == rhs.txsTags && pseudoLocalities == rhs.pseudoLocalities;
 	}

 	bool isEqualIds(OldTLogConf const& r) const {
@ -184,7 +185,7 @@ struct OldTLogConf {

 	template <class Ar>
 	void serialize( Ar& ar ) {
-		serializer(ar, tLogs, epochEnd, logRouterTags, pseudoLocalities);
+		serializer(ar, tLogs, epochEnd, logRouterTags, pseudoLocalities, txsTags);
 	}
 };

@ -199,6 +200,7 @@ struct LogSystemConfig {
 	LogSystemType logSystemType;
 	std::vector<TLogSet> tLogs;
 	int32_t logRouterTags;
+	int32_t txsTags;
 	std::vector<OldTLogConf> oldTLogs;
 	int32_t expectedLogSets;
 	UID recruitmentID;
@ -206,7 +208,7 @@ struct LogSystemConfig {
 	Optional<Version> recoveredAt;
 	std::set<int8_t> pseudoLocalities;

-	LogSystemConfig() : logSystemType(LogSystemType::empty), logRouterTags(0), expectedLogSets(0), stopped(false) {}
+	LogSystemConfig() : logSystemType(LogSystemType::empty), logRouterTags(0), txsTags(0), expectedLogSets(0), stopped(false) {}

 	std::string toString() const {
 		return format("type: %d oldGenerations: %d tags: %d %s", logSystemType, oldTLogs.size(), logRouterTags, describe(tLogs).c_str());
@ -327,7 +329,7 @@ struct LogSystemConfig {
 	bool operator == ( const LogSystemConfig& rhs ) const { return isEqual(rhs); }

 	bool isEqual(LogSystemConfig const& r) const {
-		return logSystemType == r.logSystemType && tLogs == r.tLogs && oldTLogs == r.oldTLogs && expectedLogSets == r.expectedLogSets && logRouterTags == r.logRouterTags && recruitmentID == r.recruitmentID && stopped == r.stopped && recoveredAt == r.recoveredAt && pseudoLocalities == r.pseudoLocalities;
+		return logSystemType == r.logSystemType && tLogs == r.tLogs && oldTLogs == r.oldTLogs && expectedLogSets == r.expectedLogSets && logRouterTags == r.logRouterTags && txsTags == r.txsTags && recruitmentID == r.recruitmentID && stopped == r.stopped && recoveredAt == r.recoveredAt && pseudoLocalities == r.pseudoLocalities;
 	}

 	bool isEqualIds(LogSystemConfig const& r) const {
@ -358,7 +360,7 @@ struct LogSystemConfig {

 	template <class Ar>
 	void serialize( Ar& ar ) {
-		serializer(ar, logSystemType, tLogs, logRouterTags, oldTLogs, expectedLogSets, recruitmentID, stopped, recoveredAt, pseudoLocalities);
+		serializer(ar, logSystemType, tLogs, logRouterTags, oldTLogs, expectedLogSets, recruitmentID, stopped, recoveredAt, pseudoLocalities, txsTags);
 	}
 };

--- a/fdbserver/LogSystemDiskQueueAdapter.actor.cpp
+++ b/fdbserver/LogSystemDiskQueueAdapter.actor.cpp
@ -42,30 +42,32 @@ public:
 							break;
 						}
 						when( wait( self->localityChanged ) ) {
-							self->cursor = self->logSystem->peekSpecial( UID(), self->recoveryLoc, self->tag, self->peekLocality ? self->peekLocality->get().primaryLocality : tagLocalityInvalid, self->peekLocality ? self->peekLocality->get().knownCommittedVersion : invalidVersion );
+							self->cursor = self->logSystem->peekTxs( UID(), self->recoveryLoc, self->peekLocality ? self->peekLocality->get().primaryLocality : tagLocalityInvalid, self->peekLocality ? self->peekLocality->get().knownCommittedVersion : invalidVersion );
 							self->localityChanged = self->peekLocality->onChange();
 						}
 						when( wait( delay(self->peekTypeSwitches==0 ? SERVER_KNOBS->DISK_QUEUE_ADAPTER_MIN_SWITCH_TIME : SERVER_KNOBS->DISK_QUEUE_ADAPTER_MAX_SWITCH_TIME)) ) {
 							self->peekTypeSwitches++;
 							if(self->peekTypeSwitches%3==1) {
-								self->cursor = self->logSystem->peek( UID(), self->recoveryLoc, self->tag, true );
+								self->cursor = self->logSystem->peekTxs( UID(), self->recoveryLoc, tagLocalityInvalid, invalidVersion );
 								self->localityChanged = Never();
 							} else if(self->peekTypeSwitches%3==2) {
-								self->cursor = self->logSystem->peekSpecial( UID(), self->recoveryLoc, self->tag, self->peekLocality ? self->peekLocality->get().secondaryLocality : tagLocalityInvalid, self->peekLocality ? self->peekLocality->get().knownCommittedVersion : invalidVersion );
+								self->cursor = self->logSystem->peekTxs( UID(), self->recoveryLoc, self->peekLocality ? self->peekLocality->get().secondaryLocality : tagLocalityInvalid, self->peekLocality ? self->peekLocality->get().knownCommittedVersion : invalidVersion );
 								self->localityChanged = self->peekLocality->onChange();
 							} else {
-								self->cursor = self->logSystem->peekSpecial( UID(), self->recoveryLoc, self->tag, self->peekLocality ? self->peekLocality->get().primaryLocality : tagLocalityInvalid, self->peekLocality ? self->peekLocality->get().knownCommittedVersion : invalidVersion );
+								self->cursor = self->logSystem->peekTxs( UID(), self->recoveryLoc, self->peekLocality ? self->peekLocality->get().primaryLocality : tagLocalityInvalid, self->peekLocality ? self->peekLocality->get().knownCommittedVersion : invalidVersion );
 								self->localityChanged = self->peekLocality->onChange();
 							}
 						}
 					}
 				}
-				TraceEvent("PeekNextGetMore").detail("Queue", self->recoveryQueue.size()).detail("Bytes", bytes).detail("Loc", self->recoveryLoc).detail("End", self->logSystem->getEnd()); 
+				TraceEvent("PeekNextGetMore").detail("Queue", self->recoveryQueue.size()).detail("Bytes", bytes).detail("Loc", self->recoveryLoc)
+					.detail("End", self->logSystem->getEnd()).detail("HasMessage", self->cursor->hasMessage()).detail("Version", self->cursor->version().version); 
 				if(self->recoveryQueueDataSize == 0) {
 					self->recoveryQueueLoc = self->recoveryLoc;
 				}
 				if(!self->cursor->hasMessage()) {
 					self->recoveryLoc = self->cursor->version().version;
+					wait(yield());
 					continue;
 				}
 			}
@ -168,6 +170,6 @@ Future<LogSystemDiskQueueAdapter::CommitMessage> LogSystemDiskQueueAdapter::getC
 	return pcm.getFuture();
 }

-LogSystemDiskQueueAdapter* openDiskQueueAdapter( Reference<ILogSystem> logSystem, Tag tag, Reference<AsyncVar<PeekSpecialInfo>> peekLocality ) {
-	return new LogSystemDiskQueueAdapter( logSystem, tag, peekLocality );
+LogSystemDiskQueueAdapter* openDiskQueueAdapter( Reference<ILogSystem> logSystem, Reference<AsyncVar<PeekTxsInfo>> peekLocality ) {
+	return new LogSystemDiskQueueAdapter( logSystem, peekLocality );
 }
--- a/fdbserver/LogSystemDiskQueueAdapter.h
+++ b/fdbserver/LogSystemDiskQueueAdapter.h
@ -25,16 +25,16 @@
 #include "fdbclient/FDBTypes.h"
 #include "fdbserver/IDiskQueue.h"

-struct PeekSpecialInfo {
+struct PeekTxsInfo {
 	int8_t primaryLocality;
 	int8_t secondaryLocality;
 	Version knownCommittedVersion;

-	bool operator == (const PeekSpecialInfo& r) const {
+	bool operator == (const PeekTxsInfo& r) const {
 		return primaryLocality == r.primaryLocality && secondaryLocality == r.secondaryLocality && knownCommittedVersion == r.knownCommittedVersion;
 	}

-	PeekSpecialInfo(int8_t primaryLocality, int8_t secondaryLocality, Version knownCommittedVersion) : primaryLocality(primaryLocality), secondaryLocality(secondaryLocality), knownCommittedVersion(knownCommittedVersion) {}
+	PeekTxsInfo(int8_t primaryLocality, int8_t secondaryLocality, Version knownCommittedVersion) : primaryLocality(primaryLocality), secondaryLocality(secondaryLocality), knownCommittedVersion(knownCommittedVersion) {}
 };

 class LogSystemDiskQueueAdapter : public IDiskQueue {
@ -52,10 +52,10 @@ public:

 	// It does, however, peek the specified tag directly at recovery time.

-	LogSystemDiskQueueAdapter( Reference<ILogSystem> logSystem, Tag tag, Reference<AsyncVar<PeekSpecialInfo>> peekLocality, bool recover=true ) : logSystem(logSystem), tag(tag), peekLocality(peekLocality), enableRecovery(recover), recoveryLoc(1), recoveryQueueLoc(1), poppedUpTo(0), nextCommit(1), recoveryQueueDataSize(0), peekTypeSwitches(0) {
+	LogSystemDiskQueueAdapter( Reference<ILogSystem> logSystem, Reference<AsyncVar<PeekTxsInfo>> peekLocality, bool recover=true ) : logSystem(logSystem), peekLocality(peekLocality), enableRecovery(recover), recoveryLoc(1), recoveryQueueLoc(1), poppedUpTo(0), nextCommit(1), recoveryQueueDataSize(0), peekTypeSwitches(0) {
 		if (enableRecovery) {
 			localityChanged = peekLocality ? peekLocality->onChange() : Never();
-			cursor = logSystem->peekSpecial( UID(), 1, tag, peekLocality ? peekLocality->get().primaryLocality : tagLocalityInvalid, peekLocality ? peekLocality->get().knownCommittedVersion : invalidVersion );
+			cursor = logSystem->peekTxs( UID(), 1, peekLocality ? peekLocality->get().primaryLocality : tagLocalityInvalid, peekLocality ? peekLocality->get().knownCommittedVersion : invalidVersion );
 		}
 	}

@ -92,11 +92,10 @@ public:
 	virtual int getCommitOverhead() { return 0; } //SOMEDAY: could this be more accurate?

 private:
-	Reference<AsyncVar<PeekSpecialInfo>> peekLocality;
+	Reference<AsyncVar<PeekTxsInfo>> peekLocality;
 	Future<Void> localityChanged;
 	Reference<ILogSystem::IPeekCursor> cursor;
 	int peekTypeSwitches;
-	Tag tag;

 	// Recovery state (used while readNext() is being called repeatedly)
 	bool enableRecovery;
@ -114,6 +113,6 @@ private:
 	friend class LogSystemDiskQueueAdapterImpl;
 };

-LogSystemDiskQueueAdapter* openDiskQueueAdapter( Reference<ILogSystem> logSystem, Tag tag, Reference<AsyncVar<PeekSpecialInfo>> peekLocality );
+LogSystemDiskQueueAdapter* openDiskQueueAdapter( Reference<ILogSystem> logSystem, Reference<AsyncVar<PeekTxsInfo>> peekLocality );

 #endif
--- a/fdbserver/LogSystemPeekCursor.actor.cpp
+++ b/fdbserver/LogSystemPeekCursor.actor.cpp
@ -133,7 +133,7 @@ void ILogSystem::ServerPeekCursor::advanceTo(LogMessageVersion n) {
 	}
 }

-ACTOR Future<Void> serverPeekParallelGetMore( ILogSystem::ServerPeekCursor* self, int taskID ) {
+ACTOR Future<Void> serverPeekParallelGetMore( ILogSystem::ServerPeekCursor* self, TaskPriority taskID ) {
 	if( !self->interf || self->messageVersion >= self->end ) {
 		wait( Future<Void>(Never()));
 		throw internal_error();
@ -199,7 +199,7 @@ ACTOR Future<Void> serverPeekParallelGetMore( ILogSystem::ServerPeekCursor* self
 	}
 }

-ACTOR Future<Void> serverPeekGetMore( ILogSystem::ServerPeekCursor* self, int taskID ) {
+ACTOR Future<Void> serverPeekGetMore( ILogSystem::ServerPeekCursor* self, TaskPriority taskID ) {
 	if( !self->interf || self->messageVersion >= self->end ) {
 		wait( Future<Void>(Never()));
 		throw internal_error();
@ -235,7 +235,7 @@ ACTOR Future<Void> serverPeekGetMore( ILogSystem::ServerPeekCursor* self, int ta
 	}
 }

-Future<Void> ILogSystem::ServerPeekCursor::getMore(int taskID) {
+Future<Void> ILogSystem::ServerPeekCursor::getMore(TaskPriority taskID) {
 	//TraceEvent("SPC_GetMore", randomID).detail("HasMessage", hasMessage()).detail("More", !more.isValid() || more.isReady()).detail("MessageVersion", messageVersion.toString()).detail("End", end.toString());
 	if( hasMessage() )
 		return Void();
@ -252,7 +252,7 @@ Future<Void> ILogSystem::ServerPeekCursor::getMore(int taskID) {
 ACTOR Future<Void> serverPeekOnFailed( ILogSystem::ServerPeekCursor* self ) {
 	loop {
 		choose {
-			when( wait( self->interf->get().present() ? IFailureMonitor::failureMonitor().onDisconnectOrFailure( self->interf->get().interf().peekMessages.getEndpoint() ) : Never() ) ) { return Void(); }
+			when( wait( self->interf->get().present() ? IFailureMonitor::failureMonitor().onStateEqual( self->interf->get().interf().peekMessages.getEndpoint(), FailureStatus() ) : Never() ) ) { return Void(); }
 			when( wait( self->interf->onChange() ) ) {}
 		}
 	}
@ -445,7 +445,7 @@ void ILogSystem::MergedPeekCursor::advanceTo(LogMessageVersion n) {
 	}
 }

-ACTOR Future<Void> mergedPeekGetMore(ILogSystem::MergedPeekCursor* self, LogMessageVersion startVersion, int taskID) {
+ACTOR Future<Void> mergedPeekGetMore(ILogSystem::MergedPeekCursor* self, LogMessageVersion startVersion, TaskPriority taskID) {
 	loop {
 		//TraceEvent("MPC_GetMoreA", self->randomID).detail("Start", startVersion.toString());
 		if(self->bestServer >= 0 && self->serverCursors[self->bestServer]->isActive()) {
@ -466,7 +466,7 @@ ACTOR Future<Void> mergedPeekGetMore(ILogSystem::MergedPeekCursor* self, LogMess
 	}
 }

-Future<Void> ILogSystem::MergedPeekCursor::getMore(int taskID) {
+Future<Void> ILogSystem::MergedPeekCursor::getMore(TaskPriority taskID) {
 	if(!serverCursors.size())
 		return Never();
 	
@ -706,7 +706,7 @@ void ILogSystem::SetPeekCursor::advanceTo(LogMessageVersion n) {
 	}
 }

-ACTOR Future<Void> setPeekGetMore(ILogSystem::SetPeekCursor* self, LogMessageVersion startVersion, int taskID) {
+ACTOR Future<Void> setPeekGetMore(ILogSystem::SetPeekCursor* self, LogMessageVersion startVersion, TaskPriority taskID) {
 	loop {
 		//TraceEvent("LPC_GetMore1", self->randomID).detail("Start", startVersion.toString()).detail("Tag", self->tag);
 		if(self->bestServer >= 0 && self->bestSet >= 0 && self->serverCursors[self->bestSet][self->bestServer]->isActive()) {
@ -767,7 +767,7 @@ ACTOR Future<Void> setPeekGetMore(ILogSystem::SetPeekCursor* self, LogMessageVer
 	}
 }

-Future<Void> ILogSystem::SetPeekCursor::getMore(int taskID) {
+Future<Void> ILogSystem::SetPeekCursor::getMore(TaskPriority taskID) {
 	auto startVersion = version();
 	calcHasMessage();
 	if( hasMessage() )
@ -811,7 +811,7 @@ Version ILogSystem::SetPeekCursor::popped() {
 	return poppedVersion;
 }

-ILogSystem::MultiCursor::MultiCursor( std::vector<Reference<IPeekCursor>> cursors, std::vector<LogMessageVersion> epochEnds ) : cursors(cursors), epochEnds(epochEnds), poppedVersion(0) {
+ILogSystem::MultiCursor::MultiCursor( std::vector<Reference<IPeekCursor>> cursors, std::vector<LogMessageVersion> epochEnds, bool needsPopped ) : cursors(cursors), epochEnds(epochEnds), needsPopped(needsPopped), poppedVersion(0) {
 	for(int i = 0; i < std::min<int>(cursors.size(),SERVER_KNOBS->MULTI_CURSOR_PRE_FETCH_LIMIT); i++) {
 		cursors[cursors.size()-i-1]->getMore();
 	}
@ -855,17 +855,17 @@ const std::vector<Tag>& ILogSystem::MultiCursor::getTags() {

 void ILogSystem::MultiCursor::advanceTo(LogMessageVersion n) {
 	while( cursors.size() > 1 && n >= epochEnds.back() ) {
-		poppedVersion = std::max(poppedVersion, cursors.back()->popped());
+		if(needsPopped) poppedVersion = std::max(poppedVersion, cursors.back()->popped());
 		cursors.pop_back();
 		epochEnds.pop_back();
 	}
 	cursors.back()->advanceTo(n);
 }

-Future<Void> ILogSystem::MultiCursor::getMore(int taskID) {
+Future<Void> ILogSystem::MultiCursor::getMore(TaskPriority taskID) {
 	LogMessageVersion startVersion = cursors.back()->version();
 	while( cursors.size() > 1 && cursors.back()->version() >= epochEnds.back() ) {
-		poppedVersion = std::max(poppedVersion, cursors.back()->popped());
+		if(needsPopped) poppedVersion = std::max(poppedVersion, cursors.back()->popped());
 		cursors.pop_back();
 		epochEnds.pop_back();
 	}
@ -896,10 +896,11 @@ Version ILogSystem::MultiCursor::getMinKnownCommittedVersion() {
 }

 Version ILogSystem::MultiCursor::popped() {
+	ASSERT(needsPopped);
 	return std::max(poppedVersion, cursors.back()->popped());
 }

-ILogSystem::BufferedCursor::BufferedCursor( std::vector<Reference<IPeekCursor>> cursors, Version begin, Version end, bool collectTags ) : cursors(cursors), messageVersion(begin), end(end), collectTags(collectTags), hasNextMessage(false), messageIndex(0) {
+ILogSystem::BufferedCursor::BufferedCursor( std::vector<Reference<IPeekCursor>> cursors, Version begin, Version end, bool withTags, bool collectTags ) : cursors(cursors), messageVersion(begin), end(end), withTags(withTags), collectTags(collectTags), hasNextMessage(false), messageIndex(0) {
 	messages.reserve(10000);
 }

@ -962,15 +963,17 @@ void ILogSystem::BufferedCursor::nextMessage() {
 }

 StringRef ILogSystem::BufferedCursor::getMessage() {
-	ASSERT(false);
-	return StringRef();
+	ASSERT(!withTags);
+	return messages[messageIndex].message;
 }

 StringRef ILogSystem::BufferedCursor::getMessageWithTags() {
+	ASSERT(withTags);
 	return messages[messageIndex].message;
 }

 const std::vector<Tag>& ILogSystem::BufferedCursor::getTags() {
+	ASSERT(withTags);
 	return messages[messageIndex].tags;
 }

@ -978,14 +981,14 @@ void ILogSystem::BufferedCursor::advanceTo(LogMessageVersion n) {
 	ASSERT(false);
 }

-ACTOR Future<Void> bufferedGetMoreLoader( ILogSystem::BufferedCursor* self, Reference<ILogSystem::IPeekCursor> cursor, Version maxVersion, int taskID ) {
+ACTOR Future<Void> bufferedGetMoreLoader( ILogSystem::BufferedCursor* self, Reference<ILogSystem::IPeekCursor> cursor, Version maxVersion, TaskPriority taskID ) {
 	loop {
 		wait(yield());
 		if(cursor->version().version >= maxVersion) {
 			return Void();
 		}
 		while(cursor->hasMessage()) {
-			self->messages.push_back(ILogSystem::BufferedCursor::BufferedMessage(cursor->arena(), self->collectTags ? cursor->getMessage() : cursor->getMessageWithTags(), cursor->getTags(), cursor->version()));
+			self->messages.push_back(ILogSystem::BufferedCursor::BufferedMessage(cursor->arena(), (!self->withTags || self->collectTags) ? cursor->getMessage() : cursor->getMessageWithTags(), !self->withTags ? std::vector<Tag>() : cursor->getTags(), cursor->version()));
 			cursor->nextMessage();
 			if(cursor->version().version >= maxVersion) {
 				return Void();
@ -995,7 +998,7 @@ ACTOR Future<Void> bufferedGetMoreLoader( ILogSystem::BufferedCursor* self, Refe
 	}
 }

-ACTOR Future<Void> bufferedGetMore( ILogSystem::BufferedCursor* self, int taskID ) {
+ACTOR Future<Void> bufferedGetMore( ILogSystem::BufferedCursor* self, TaskPriority taskID ) {
 	if( self->messageVersion.version >= self->end ) {
 		wait( Future<Void>(Never()));
 		throw internal_error();
@ -1019,7 +1022,11 @@ ACTOR Future<Void> bufferedGetMore( ILogSystem::BufferedCursor* self, int taskID
 	}
 	self->messageIndex = 0;
 	self->hasNextMessage = self->messages.size() > 0;
-	self->messageVersion = LogMessageVersion(targetVersion);
+	Version minVersion = self->end;
+	for(auto& cursor : self->cursors) {
+		minVersion = std::min(minVersion, cursor->version().version);
+	}
+	self->messageVersion = LogMessageVersion(minVersion);

 	if(self->collectTags) {
 		self->combineMessages();
@ -1029,7 +1036,7 @@ ACTOR Future<Void> bufferedGetMore( ILogSystem::BufferedCursor* self, int taskID
 	return Void();
 }

-Future<Void> ILogSystem::BufferedCursor::getMore(int taskID) {
+Future<Void> ILogSystem::BufferedCursor::getMore(TaskPriority taskID) {
 	if( hasMessage() )
 		return Void();
 	return bufferedGetMore(this, taskID);
--- a/fdbserver/MasterInterface.h
+++ b/fdbserver/MasterInterface.h
@ -50,7 +50,7 @@ struct MasterInterface {
 	}

 	void initEndpoints() {
-		getCommitVersion.getEndpoint( TaskProxyGetConsistentReadVersion );
+		getCommitVersion.getEndpoint( TaskPriority::ProxyGetConsistentReadVersion );
 	}
 };

--- a/fdbserver/MasterProxyServer.actor.cpp
+++ b/fdbserver/MasterProxyServer.actor.cpp
@ -95,11 +95,11 @@ ACTOR Future<Void> getRate(UID myID, Reference<AsyncVar<ServerDBInfo>> db, int64
 	loop choose {
 		when ( wait( db->onChange() ) ) {
 			if ( db->get().ratekeeper.present() ) {
-				TraceEvent("Proxy_RatekeeperChanged", myID)
+				TraceEvent("ProxyRatekeeperChanged", myID)
 				.detail("RKID", db->get().ratekeeper.get().id());
 				nextRequestTimer = Void();  // trigger GetRate request
 			} else {
-				TraceEvent("Proxy_RatekeeperDied", myID);
+				TraceEvent("ProxyRatekeeperDied", myID);
 				nextRequestTimer = Never();
 				reply = Never();
 			}
@ -158,7 +158,7 @@ ACTOR Future<Void> queueTransactionStartRequests(
 				if (now() - *lastGRVTime > *GRVBatchTime)
 					*lastGRVTime = now() - *GRVBatchTime;

-				forwardPromise(GRVTimer, delayJittered(*GRVBatchTime - (now() - *lastGRVTime), TaskProxyGRVTimer));
+				forwardPromise(GRVTimer, delayJittered(*GRVBatchTime - (now() - *lastGRVTime), TaskPriority::ProxyGRVTimer));
 			}

 			transactionQueue->push(std::make_pair(req, counter--));
@ -263,7 +263,7 @@ struct ProxyCommitData {
 			lastVersionTime(0), commitVersionRequestNumber(1), mostRecentProcessedRequestNumber(0),
 			getConsistentReadVersion(getConsistentReadVersion), commit(commit), lastCoalesceTime(0),
 			localCommitBatchesStarted(0), locked(false), commitBatchInterval(SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_INTERVAL_MIN),
-			firstProxy(firstProxy), cx(openDBOnServer(db, TaskDefaultEndpoint, true, true)), db(db),
+			firstProxy(firstProxy), cx(openDBOnServer(db, TaskPriority::DefaultEndpoint, true, true)), db(db),
 			singleKeyMutationEvent(LiteralStringRef("SingleKeyMutation")), commitBatchesMemBytesCount(0), lastTxsPop(0)
 	{}
 };
@ -350,7 +350,7 @@ struct ResolutionRequestBuilder {
 };

 ACTOR Future<Void> commitBatcher(ProxyCommitData *commitData, PromiseStream<std::pair<std::vector<CommitTransactionRequest>, int> > out, FutureStream<CommitTransactionRequest> in, int desiredBytes, int64_t memBytesLimit) {
-	wait(delayJittered(commitData->commitBatchInterval, TaskProxyCommitBatcher));  
+	wait(delayJittered(commitData->commitBatchInterval, TaskPriority::ProxyCommitBatcher));  

 	state double lastBatch = 0;

@ -363,7 +363,7 @@ ACTOR Future<Void> commitBatcher(ProxyCommitData *commitData, PromiseStream<std:
 			timeout = Never();
 		}
 		else {
-			timeout = delayJittered(SERVER_KNOBS->MAX_COMMIT_BATCH_INTERVAL, TaskProxyCommitBatcher);
+			timeout = delayJittered(SERVER_KNOBS->MAX_COMMIT_BATCH_INTERVAL, TaskPriority::ProxyCommitBatcher);
 		}

 		while(!timeout.isReady() && !(batch.size() == SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_COUNT_MAX || batchBytes >= desiredBytes)) {
@ -387,10 +387,10 @@ ACTOR Future<Void> commitBatcher(ProxyCommitData *commitData, PromiseStream<std:
 					if(!batch.size()) {
 						commitData->commitBatchStartNotifications.send(Void());
 						if(now() - lastBatch > commitData->commitBatchInterval) {
-							timeout = delayJittered(SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_INTERVAL_FROM_IDLE, TaskProxyCommitBatcher);
+							timeout = delayJittered(SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_INTERVAL_FROM_IDLE, TaskPriority::ProxyCommitBatcher);
 						}
 						else {
-							timeout = delayJittered(commitData->commitBatchInterval - (now() - lastBatch), TaskProxyCommitBatcher);
+							timeout = delayJittered(commitData->commitBatchInterval - (now() - lastBatch), TaskPriority::ProxyCommitBatcher);
 						}
 					}

@ -398,7 +398,7 @@ ACTOR Future<Void> commitBatcher(ProxyCommitData *commitData, PromiseStream<std:
 						out.send({ batch, batchBytes });
 						lastBatch = now();
 						commitData->commitBatchStartNotifications.send(Void());
-						timeout = delayJittered(commitData->commitBatchInterval, TaskProxyCommitBatcher);
+						timeout = delayJittered(commitData->commitBatchInterval, TaskPriority::ProxyCommitBatcher);
 						batch = std::vector<CommitTransactionRequest>();
 						batchBytes = 0;
 					}
@ -457,7 +457,7 @@ ACTOR Future<Void> commitBatch(
 	ASSERT(SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS <= SERVER_KNOBS->MAX_VERSIONS_IN_FLIGHT);  // since we are using just the former to limit the number of versions actually in flight!

 	// Active load balancing runs at a very high priority (to obtain accurate estimate of memory used by commit batches) so we need to downgrade here
-	wait(delay(0, TaskProxyCommit));
+	wait(delay(0, TaskPriority::ProxyCommit));

 	self->lastVersionTime = t1;

@ -534,7 +534,7 @@ ACTOR Future<Void> commitBatch(
 	vector< Future<ResolveTransactionBatchReply> > replies;
 	for (int r = 0; r<self->resolvers.size(); r++) {
 		requests.requests[r].debugID = debugID;
-		replies.push_back(brokenPromiseToNever(self->resolvers[r].resolve.getReply(requests.requests[r], TaskProxyResolverReply)));
+		replies.push_back(brokenPromiseToNever(self->resolvers[r].resolve.getReply(requests.requests[r], TaskPriority::ProxyResolverReply)));
 	}

 	state vector<vector<int>> transactionResolverMap = std::move( requests.transactionResolverMap );
@ -965,7 +965,7 @@ ACTOR Future<Void> commitBatch(
 				break; 
 			}
 			when(GetReadVersionReply v = wait(self->getConsistentReadVersion.getReply(GetReadVersionRequest(0, GetReadVersionRequest::PRIORITY_SYSTEM_IMMEDIATE | GetReadVersionRequest::FLAG_CAUSAL_READ_RISKY)))) {
-				if(v.version > self->committedVersion.get()) {
+				if(!v.newClientInfo.present() && v.version > self->committedVersion.get()) {
 					self->locked = v.locked;
 					self->metadataVersion = v.metadataVersion;
 					self->committedVersion.set(v.version);
@ -986,7 +986,7 @@ ACTOR Future<Void> commitBatch(
 	bool firstMessage = true;
 	for(auto m : msg.messages) {
 		if(firstMessage) {
-			toCommit.addTag(txsTag);
+			toCommit.addTxsTag();
 		}
 		toCommit.addMessage(StringRef(m.begin(), m.size()), !firstMessage);
 		firstMessage = false;
@ -1033,7 +1033,7 @@ ACTOR Future<Void> commitBatch(

 		self->txsPopVersions.emplace_back(commitVersion, msg.popTo);
 	}
-	self->logSystem->pop(msg.popTo, txsTag);
+	self->logSystem->popTxs(msg.popTo);

 	/////// Phase 5: Replies (CPU bound; no particular order required, though ordered execution would be best for latency)
 	if ( prevVersion && commitVersion - prevVersion < SERVER_KNOBS->MAX_VERSIONS_IN_FLIGHT/2 )
@ -1135,7 +1135,7 @@ ACTOR Future<GetReadVersionReply> getLiveCommittedVersion(ProxyCommitData* commi

 	state vector<Future<GetReadVersionReply>> proxyVersions;
 	for (auto const& p : *otherProxies)
-		proxyVersions.push_back(brokenPromiseToNever(p.getRawCommittedVersion.getReply(GetRawCommittedVersionRequest(debugID), TaskTLogConfirmRunningReply)));
+		proxyVersions.push_back(brokenPromiseToNever(p.getRawCommittedVersion.getReply(GetRawCommittedVersionRequest(debugID), TaskPriority::TLogConfirmRunningReply)));

 	if (!(flags&GetReadVersionRequest::FLAG_CAUSAL_READ_RISKY))
 	{
@ -1292,7 +1292,7 @@ ACTOR static Future<Void> transactionStarter(
 		}

 		if (!transactionQueue.empty())
-			forwardPromise(GRVTimer, delayJittered(SERVER_KNOBS->START_TRANSACTION_BATCH_QUEUE_CHECK_INTERVAL, TaskProxyGRVTimer));
+			forwardPromise(GRVTimer, delayJittered(SERVER_KNOBS->START_TRANSACTION_BATCH_QUEUE_CHECK_INTERVAL, TaskPriority::ProxyGRVTimer));

 		/*TraceEvent("GRVBatch", proxy.id())
 		.detail("Elapsed", elapsed)
@ -1505,7 +1505,7 @@ ACTOR Future<Void> monitorRemoteCommitted(ProxyCommitData* self) {

 			while(self->txsPopVersions.size() && self->txsPopVersions.front().first <= minVersion) {
 				self->lastTxsPop = self->txsPopVersions.front().second;
-				self->logSystem->pop(self->txsPopVersions.front().second, txsTag, 0, tagLocalityRemoteLog);
+				self->logSystem->popTxs(self->txsPopVersions.front().second, tagLocalityRemoteLog);
 				self->txsPopVersions.pop_front();
 			}

@ -1563,7 +1563,7 @@ ACTOR Future<Void> masterProxyServerCore(
 		r->value().emplace_back(0,0);

 	commitData.logSystem = ILogSystem::fromServerDBInfo(proxy.id(), commitData.db->get(), false, addActor);
-	commitData.logAdapter = new LogSystemDiskQueueAdapter(commitData.logSystem, txsTag, Reference<AsyncVar<PeekSpecialInfo>>(), false);
+	commitData.logAdapter = new LogSystemDiskQueueAdapter(commitData.logSystem, Reference<AsyncVar<PeekTxsInfo>>(), false);
 	commitData.txnStateStore = keyValueStoreLogSystem(commitData.logAdapter, proxy.id(), 2e9, true, true, true);
 	createWhitelistBinPathVec(whitelistBinPaths, commitData.whitelistedBinPathVec);

@ -1595,7 +1595,7 @@ ACTOR Future<Void> masterProxyServerCore(
 				for(auto it : commitData.tag_popped) {
 					commitData.logSystem->pop(it.second, it.first);
 				}
-				commitData.logSystem->pop(commitData.lastTxsPop, txsTag, 0, tagLocalityRemoteLog);
+				commitData.logSystem->popTxs(commitData.lastTxsPop, tagLocalityRemoteLog);
 			}

 			Optional<LatencyBandConfig> newLatencyBandConfig = commitData.db->get().latencyBandConfig;
@ -1782,33 +1782,85 @@ ACTOR Future<Void> masterProxyServerCore(

 ACTOR Future<Void> checkRemoved(Reference<AsyncVar<ServerDBInfo>> db, uint64_t recoveryCount, MasterProxyInterface myInterface) {
 	loop{
-		if (db->get().recoveryCount >= recoveryCount && !std::count(db->get().client.proxies.begin(), db->get().client.proxies.end(), myInterface))
-		throw worker_removed();
+		if (db->get().recoveryCount >= recoveryCount && !std::count(db->get().client.proxies.begin(), db->get().client.proxies.end(), myInterface)) {
+			throw worker_removed();
+		}
 		wait(db->onChange());
 	}
 }

+ACTOR template <class X> Future<Void> stripRequests( RequestStream<X> in, PromiseStream<ReplyPromise<REPLY_TYPE(X)>> out, int* count) {
+	loop {
+		X req = waitNext(in.getFuture());
+		out.send(req.reply);
+		if((*count) >= 0 && ++(*count) >= SERVER_KNOBS->MAX_FORWARD_MESSAGES) {
+			TraceEvent(SevWarnAlways, "TooManyProxyForwardRequests");
+			return Void();
+		}
+	}
+}
+
+ACTOR Future<Void> forwardProxy(ClientDBInfo info, PromiseStream<ReplyPromise<CommitID>> commitReplies, PromiseStream<ReplyPromise<GetReadVersionReply>> grvReplies, PromiseStream<ReplyPromise<GetKeyServerLocationsReply>> locationReplies) {
+	loop {
+		choose {
+			when(ReplyPromise<CommitID> req = waitNext(commitReplies.getFuture())) {
+				CommitID rep;
+				rep.newClientInfo = info;
+				req.send(rep);
+			}
+			when(ReplyPromise<GetReadVersionReply> req = waitNext(grvReplies.getFuture())) {
+				GetReadVersionReply rep;
+				rep.newClientInfo = info;
+				req.send(rep);
+			}
+			when(ReplyPromise<GetKeyServerLocationsReply> req = waitNext(locationReplies.getFuture())) {
+				GetKeyServerLocationsReply rep;
+				rep.newClientInfo = info;
+				req.send(rep);
+			}
+		}
+		wait(yield());
+	}
+}
+
 ACTOR Future<Void> masterProxyServer(
 	MasterProxyInterface proxy,
 	InitializeMasterProxyRequest req,
 	Reference<AsyncVar<ServerDBInfo>> db,
 	std::string whitelistBinPaths)
 {
+	state Future<Void> core;
 	try {
-		state Future<Void> core = masterProxyServerCore(proxy, req.master, db, req.recoveryCount, req.recoveryTransactionVersion, req.firstProxy, whitelistBinPaths);
-		loop choose{
-			when(wait(core)) { return Void(); }
-			when(wait(checkRemoved(db, req.recoveryCount, proxy))) {}
-		}
+		core = masterProxyServerCore(proxy, req.master, db, req.recoveryCount, req.recoveryTransactionVersion, req.firstProxy, whitelistBinPaths);
+		wait(core || checkRemoved(db, req.recoveryCount, proxy));
 	}
 	catch (Error& e) {
-		if (e.code() == error_code_actor_cancelled || e.code() == error_code_worker_removed || e.code() == error_code_tlog_stopped ||
-			e.code() == error_code_master_tlog_failed || e.code() == error_code_coordinators_changed || e.code() == error_code_coordinated_state_conflict ||
-			e.code() == error_code_new_coordinators_timed_out)
-		{
-			TraceEvent("MasterProxyTerminated", proxy.id()).error(e, true);
+		TraceEvent("MasterProxyTerminated", proxy.id()).error(e, true);
+
+		if (e.code() != error_code_worker_removed && e.code() != error_code_tlog_stopped &&
+			e.code() != error_code_master_tlog_failed && e.code() != error_code_coordinators_changed &&
+			e.code() != error_code_coordinated_state_conflict && e.code() != error_code_new_coordinators_timed_out) {
+			throw;
+		}
+	}
+	core.cancel();
+	state PromiseStream<ReplyPromise<CommitID>> commitReplies;
+	state PromiseStream<ReplyPromise<GetReadVersionReply>> grvReplies;
+	state PromiseStream<ReplyPromise<GetKeyServerLocationsReply>> locationReplies;
+	state int replyCount = 0;
+	state Future<Void> finishForward = delay(SERVER_KNOBS->PROXY_FORWARD_DELAY) || stripRequests(proxy.commit, commitReplies, &replyCount) || stripRequests(proxy.getConsistentReadVersion, grvReplies, &replyCount) || stripRequests(proxy.getKeyServersLocations, locationReplies, &replyCount);
+	proxy = MasterProxyInterface();
+	loop {
+		if(finishForward.isReady()) {
 			return Void();
 		}
-		throw;
+		if(db->get().client.proxies.size() > 0 && !db->get().client.proxies[0].provisional && db->get().recoveryCount >= req.recoveryCount
+			&& !std::count(db->get().client.proxies.begin(), db->get().client.proxies.end(), proxy)) {
+			replyCount = -1;
+			core = forwardProxy(db->get().client, commitReplies, grvReplies, locationReplies);
+			wait(finishForward);
+			return Void();
+		}
+		wait(db->onChange() || finishForward);
 	}
 }
--- a/fdbserver/MoveKeys.actor.cpp
+++ b/fdbserver/MoveKeys.actor.cpp
@ -130,12 +130,12 @@ ACTOR Future<vector<UID>> addReadWriteDestinations(KeyRangeRef shard, vector<Sto

 	state vector< Future<Optional<UID>> > srcChecks;
 	for(int s=0; s<srcInterfs.size(); s++) {
-		srcChecks.push_back( checkReadWrite( srcInterfs[s].getShardState.getReplyUnlessFailedFor( GetShardStateRequest( shard, GetShardStateRequest::NO_WAIT), SERVER_KNOBS->SERVER_READY_QUORUM_INTERVAL, 0, TaskMoveKeys ), srcInterfs[s].id(), 0 ) );
+		srcChecks.push_back( checkReadWrite( srcInterfs[s].getShardState.getReplyUnlessFailedFor( GetShardStateRequest( shard, GetShardStateRequest::NO_WAIT), SERVER_KNOBS->SERVER_READY_QUORUM_INTERVAL, 0, TaskPriority::MoveKeys ), srcInterfs[s].id(), 0 ) );
 	}

 	state vector< Future<Optional<UID>> > destChecks;
 	for(int s=0; s<destInterfs.size(); s++) {
-		destChecks.push_back( checkReadWrite( destInterfs[s].getShardState.getReplyUnlessFailedFor( GetShardStateRequest( shard, GetShardStateRequest::NO_WAIT), SERVER_KNOBS->SERVER_READY_QUORUM_INTERVAL, 0, TaskMoveKeys ), destInterfs[s].id(), version ) );
+		destChecks.push_back( checkReadWrite( destInterfs[s].getShardState.getReplyUnlessFailedFor( GetShardStateRequest( shard, GetShardStateRequest::NO_WAIT), SERVER_KNOBS->SERVER_READY_QUORUM_INTERVAL, 0, TaskPriority::MoveKeys ), destInterfs[s].id(), version ) );
 	}

 	wait( waitForAll(srcChecks) && waitForAll(destChecks) );
@ -225,7 +225,7 @@ ACTOR Future<Void> startMoveKeys( Database occ, KeyRange keys, vector<UID> serve
 	state TraceInterval interval("RelocateShard_StartMoveKeys");
 	//state TraceInterval waitInterval("");

-	wait( startMoveKeysLock->take( TaskDataDistributionLaunch ) );
+	wait( startMoveKeysLock->take( TaskPriority::DataDistributionLaunch ) );
 	state FlowLock::Releaser releaser( *startMoveKeysLock );

 	TraceEvent(SevDebug, interval.begin(), relocationIntervalId);
@ -255,7 +255,7 @@ ACTOR Future<Void> startMoveKeys( Database occ, KeyRange keys, vector<UID> serve
 					//Keep track of shards for all src servers so that we can preserve their values in serverKeys
 					state Map<UID, VectorRef<KeyRangeRef>> shardMap;

-					tr.info.taskID = TaskMoveKeys;
+					tr.info.taskID = TaskPriority::MoveKeys;
 					tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);

 					wait( checkMoveKeysLock(&tr, lock) );
@ -394,11 +394,11 @@ ACTOR Future<Void> startMoveKeys( Database occ, KeyRange keys, vector<UID> serve
 ACTOR Future<Void> waitForShardReady( StorageServerInterface server, KeyRange keys, Version minVersion, GetShardStateRequest::waitMode mode ) {
 	loop {
 		try {
-			std::pair<Version,Version> rep = wait( server.getShardState.getReply( GetShardStateRequest(keys, mode), TaskMoveKeys ) );
+			std::pair<Version,Version> rep = wait( server.getShardState.getReply( GetShardStateRequest(keys, mode), TaskPriority::MoveKeys ) );
 			if (rep.first >= minVersion) {
 				return Void();
 			}
-			wait( delayJittered( SERVER_KNOBS->SHARD_READY_DELAY, TaskMoveKeys ) );
+			wait( delayJittered( SERVER_KNOBS->SHARD_READY_DELAY, TaskPriority::MoveKeys ) );
 		}
 		catch (Error& e) {
 			if( e.code() != error_code_timed_out ) {
@ -419,7 +419,7 @@ ACTOR Future<Void> checkFetchingState( Database cx, vector<UID> dest, KeyRange k
 		try {
 			if (BUGGIFY) wait(delay(5));

-			tr.info.taskID = TaskMoveKeys;
+			tr.info.taskID = TaskPriority::MoveKeys;
 			tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);

 			vector< Future< Optional<Value> > > serverListEntries;
@ -439,7 +439,7 @@ ACTOR Future<Void> checkFetchingState( Database cx, vector<UID> dest, KeyRange k
 			}

 			wait( timeoutError( waitForAll( requests ),
-					SERVER_KNOBS->SERVER_READY_QUORUM_TIMEOUT, TaskMoveKeys ) );
+					SERVER_KNOBS->SERVER_READY_QUORUM_TIMEOUT, TaskPriority::MoveKeys ) );

 			dataMovementComplete.send(Void());
 			return Void();
@ -480,11 +480,11 @@ ACTOR Future<Void> finishMoveKeys( Database occ, KeyRange keys, vector<UID> dest
 			//printf("finishMoveKeys( '%s'-'%s' )\n", keys.begin.toString().c_str(), keys.end.toString().c_str());
 			loop {
 				try {
-					tr.info.taskID = TaskMoveKeys;
+					tr.info.taskID = TaskPriority::MoveKeys;
 					tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);

 					releaser.release();
-					wait( finishMoveKeysParallelismLock->take( TaskDataDistributionLaunch ) );
+					wait( finishMoveKeysParallelismLock->take( TaskPriority::DataDistributionLaunch ) );
 					releaser = FlowLock::Releaser( *finishMoveKeysParallelismLock );

 					wait( checkMoveKeysLock(&tr, lock) );
@ -632,7 +632,7 @@ ACTOR Future<Void> finishMoveKeys( Database occ, KeyRange keys, vector<UID> dest

 					for(int s=0; s<storageServerInterfaces.size(); s++)
 						serverReady.push_back( waitForShardReady( storageServerInterfaces[s], keys, tr.getReadVersion().get(), GetShardStateRequest::READABLE) );
-					wait( timeout( waitForAll( serverReady ), SERVER_KNOBS->SERVER_READY_QUORUM_TIMEOUT, Void(), TaskMoveKeys ) );
+					wait( timeout( waitForAll( serverReady ), SERVER_KNOBS->SERVER_READY_QUORUM_TIMEOUT, Void(), TaskPriority::MoveKeys ) );
 					int count = dest.size() - newDestinations.size();
 					for(int s=0; s<serverReady.size(); s++)
 						count += serverReady[s].isReady() && !serverReady[s].isError();
@ -808,7 +808,7 @@ ACTOR Future<Void> removeStorageServer( Database cx, UID serverID, MoveKeysLock
 			if (!canRemove) {
 				TEST(true); // The caller had a transaction in flight that assigned keys to the server.  Wait for it to reverse its mistake.
 				TraceEvent(SevWarn,"NoCanRemove").detail("Count", noCanRemoveCount++).detail("ServerID", serverID);
-				wait( delayJittered(SERVER_KNOBS->REMOVE_RETRY_DELAY, TaskDataDistributionLaunch) );
+				wait( delayJittered(SERVER_KNOBS->REMOVE_RETRY_DELAY, TaskPriority::DataDistributionLaunch) );
 				tr.reset();
 				TraceEvent("RemoveStorageServerRetrying").detail("CanRemove", canRemove);
 			} else {
--- a/fdbserver/OldTLogServer_4_6.actor.cpp
+++ b/fdbserver/OldTLogServer_4_6.actor.cpp
@ -48,7 +48,7 @@ namespace oldTLog_4_6 {
 	typedef int16_t OldTag;

 	OldTag convertTag( Tag tag ) {
-		if(tag == invalidTag) return invalidTagOld;
+		if(tag == invalidTag || tag.locality == tagLocalityTxs) return invalidTagOld;
 		if(tag == txsTag) return txsTagOld;
 		ASSERT(tag.id >= 0);
 		return tag.id;
@ -333,7 +333,7 @@ namespace oldTLog_4_6 {
 			}

 			// Erase messages not needed to update *from* versions >= before (thus, messages with toversion <= before)
-			ACTOR Future<Void> eraseMessagesBefore( TagData *self, Version before, int64_t* gBytesErased, Reference<LogData> tlogData, int taskID ) {
+			ACTOR Future<Void> eraseMessagesBefore( TagData *self, Version before, int64_t* gBytesErased, Reference<LogData> tlogData, TaskPriority taskID ) {
 				while(!self->version_messages.empty() && self->version_messages.front().first < before) {
 					Version version = self->version_messages.front().first;
 					std::pair<int, int> &sizes = tlogData->version_sizes[version];
@ -359,7 +359,7 @@ namespace oldTLog_4_6 {
 				return Void();
 			}

-			Future<Void> eraseMessagesBefore(Version before, int64_t* gBytesErased, Reference<LogData> tlogData, int taskID) {
+			Future<Void> eraseMessagesBefore(Version before, int64_t* gBytesErased, Reference<LogData> tlogData, TaskPriority taskID) {
 				return eraseMessagesBefore(this, before, gBytesErased, tlogData, taskID);
 			}
 		};
@ -526,21 +526,21 @@ namespace oldTLog_4_6 {

 				self->persistentData->set( KeyValueRef( persistTagMessagesKey( logData->logId, tag->key, currentVersion ), wr.toValue() ) );

-				Future<Void> f = yield(TaskUpdateStorage);
+				Future<Void> f = yield(TaskPriority::UpdateStorage);
 				if(!f.isReady()) {
 					wait(f);
 					msg = std::upper_bound(tag->value.version_messages.begin(), tag->value.version_messages.end(), std::make_pair(currentVersion, LengthPrefixedStringRef()), CompareFirst<std::pair<Version, LengthPrefixedStringRef>>());
 				}
 			}

-			wait(yield(TaskUpdateStorage));
+			wait(yield(TaskPriority::UpdateStorage));
 		}

 		self->persistentData->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistCurrentVersionKeys.begin), BinaryWriter::toValue(newPersistentDataVersion, Unversioned()) ) );
 		logData->persistentDataVersion = newPersistentDataVersion;

 		wait( self->persistentData->commit() ); // SOMEDAY: This seems to be running pretty often, should we slow it down???
-		wait( delay(0, TaskUpdateStorage) );
+		wait( delay(0, TaskPriority::UpdateStorage) );

 		// Now that the changes we made to persistentData are durable, erase the data we moved from memory and the queue, increase bytesDurable accordingly, and update persistentDataDurableVersion.

@ -548,20 +548,20 @@ namespace oldTLog_4_6 {
 		logData->persistentDataDurableVersion = newPersistentDataVersion;

 		for(tag = logData->tag_data.begin(); tag != logData->tag_data.end(); ++tag) {
-			wait(tag->value.eraseMessagesBefore( newPersistentDataVersion+1, &self->bytesDurable, logData, TaskUpdateStorage ));
-			wait(yield(TaskUpdateStorage));
+			wait(tag->value.eraseMessagesBefore( newPersistentDataVersion+1, &self->bytesDurable, logData, TaskPriority::UpdateStorage ));
+			wait(yield(TaskPriority::UpdateStorage));
 		}

 		logData->version_sizes.erase(logData->version_sizes.begin(), logData->version_sizes.lower_bound(logData->persistentDataDurableVersion));

-		wait(yield(TaskUpdateStorage));
+		wait(yield(TaskPriority::UpdateStorage));

 		while(!logData->messageBlocks.empty() && logData->messageBlocks.front().first <= newPersistentDataVersion) {
 			int64_t bytesErased = int64_t(logData->messageBlocks.front().second.size()) * SERVER_KNOBS->TLOG_MESSAGE_BLOCK_OVERHEAD_FACTOR;
 			logData->bytesDurable += bytesErased;
 			self->bytesDurable += bytesErased;
 			logData->messageBlocks.pop_front();
-			wait(yield(TaskUpdateStorage));
+			wait(yield(TaskPriority::UpdateStorage));
 		}

 		if(logData->bytesDurable.getValue() > logData->bytesInput.getValue() || self->bytesDurable > self->bytesInput) {
@ -586,7 +586,7 @@ namespace oldTLog_4_6 {
 		}

 		if(!self->queueOrder.size()) {
-			wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskUpdateStorage) );
+			wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskPriority::UpdateStorage) );
 			return Void();
 		}

@ -621,14 +621,14 @@ namespace oldTLog_4_6 {
 					}

 					wait( logData->queueCommittedVersion.whenAtLeast( nextVersion ) );
-					wait( delay(0, TaskUpdateStorage) );
+					wait( delay(0, TaskPriority::UpdateStorage) );

 					//TraceEvent("TlogUpdatePersist", self->dbgid).detail("LogId", logData->logId).detail("NextVersion", nextVersion).detail("Version", logData->version.get()).detail("PersistentDataDurableVer", logData->persistentDataDurableVersion).detail("QueueCommitVer", logData->queueCommittedVersion.get()).detail("PersistDataVer", logData->persistentDataVersion);
 					if (nextVersion > logData->persistentDataVersion) {
 						self->updatePersist = updatePersistentData(self, logData, nextVersion);
 						wait( self->updatePersist );
 					} else {
-						wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskUpdateStorage) );
+						wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskPriority::UpdateStorage) );
 					}

 					if( logData->removed.isReady() ) {
@ -639,9 +639,9 @@ namespace oldTLog_4_6 {
 				if(logData->persistentDataDurableVersion == logData->version.get()) {
 					self->queueOrder.pop_front();
 				}
-				wait( delay(0.0, TaskUpdateStorage) );
+				wait( delay(0.0, TaskPriority::UpdateStorage) );
 			} else {
-				wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskUpdateStorage) );
+				wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskPriority::UpdateStorage) );
 			}
 		}
 		else if(logData->initialized) {
@ -650,7 +650,7 @@ namespace oldTLog_4_6 {
 			while( totalSize < SERVER_KNOBS->UPDATE_STORAGE_BYTE_LIMIT && sizeItr != logData->version_sizes.end()
 					&& (logData->bytesInput.getValue() - logData->bytesDurable.getValue() - totalSize >= SERVER_KNOBS->TLOG_SPILL_THRESHOLD || sizeItr->value.first == 0) )
 			{
-				wait( yield(TaskUpdateStorage) );
+				wait( yield(TaskPriority::UpdateStorage) );

 				++sizeItr;
 				nextVersion = sizeItr == logData->version_sizes.end() ? logData->version.get() : sizeItr->key;
@ -662,7 +662,7 @@ namespace oldTLog_4_6 {
 						totalSize += it->second.expectedSize();
 					}

-					wait(yield(TaskUpdateStorage));
+					wait(yield(TaskPriority::UpdateStorage));
 				}

 				prevVersion = nextVersion;
@ -673,7 +673,7 @@ namespace oldTLog_4_6 {
 			//TraceEvent("UpdateStorageVer", logData->logId).detail("NextVersion", nextVersion).detail("PersistentDataVersion", logData->persistentDataVersion).detail("TotalSize", totalSize);

 			wait( logData->queueCommittedVersion.whenAtLeast( nextVersion ) );
-			wait( delay(0, TaskUpdateStorage) );
+			wait( delay(0, TaskPriority::UpdateStorage) );

 			if (nextVersion > logData->persistentDataVersion) {
 				self->updatePersist = updatePersistentData(self, logData, nextVersion);
@ -681,21 +681,21 @@ namespace oldTLog_4_6 {
 			}

 			if( totalSize < SERVER_KNOBS->UPDATE_STORAGE_BYTE_LIMIT ) {
-				wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskUpdateStorage) );
+				wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskPriority::UpdateStorage) );
 			}
 			else {
 				//recovery wants to commit to persistant data when updatePersistentData is not active, this delay ensures that immediately after
 				//updatePersist returns another one has not been started yet.
-				wait( delay(0.0, TaskUpdateStorage) );
+				wait( delay(0.0, TaskPriority::UpdateStorage) );
 			}
 		} else {
-			wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskUpdateStorage) );
+			wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskPriority::UpdateStorage) );
 		}
 		return Void();
 	}

 	ACTOR Future<Void> updateStorageLoop( TLogData* self ) {
-		wait(delay(0, TaskUpdateStorage));
+		wait(delay(0, TaskPriority::UpdateStorage));

 		loop {
 			wait( updateStorage(self) );
@ -823,7 +823,7 @@ namespace oldTLog_4_6 {
 			ti->value.popped_recently = true;
 			//if (to.epoch == self->epoch())
 			if ( req.to > logData->persistentDataDurableVersion )
-				wait(ti->value.eraseMessagesBefore( req.to, &self->bytesDurable, logData, TaskTLogPop ));
+				wait(ti->value.eraseMessagesBefore( req.to, &self->bytesDurable, logData, TaskPriority::TLogPop ));
 		}

 		req.reply.send(Void());
--- a/fdbserver/OldTLogServer_6_0.actor.cpp
+++ b/fdbserver/OldTLogServer_6_0.actor.cpp
@ -195,6 +195,7 @@ static const KeyRangeRef persistCurrentVersionKeys = KeyRangeRef( LiteralStringR
 static const KeyRangeRef persistKnownCommittedVersionKeys = KeyRangeRef( LiteralStringRef( "knownCommitted/" ), LiteralStringRef( "knownCommitted0" ) );
 static const KeyRangeRef persistLocalityKeys = KeyRangeRef( LiteralStringRef( "Locality/" ), LiteralStringRef( "Locality0" ) );
 static const KeyRangeRef persistLogRouterTagsKeys = KeyRangeRef( LiteralStringRef( "LogRouterTags/" ), LiteralStringRef( "LogRouterTags0" ) );
+static const KeyRangeRef persistTxsTagsKeys = KeyRangeRef( LiteralStringRef( "TxsTags/" ), LiteralStringRef( "TxsTags0" ) );
 static const KeyRange persistTagMessagesKeys = prefixRange(LiteralStringRef("TagMsg/"));
 static const KeyRange persistTagPoppedKeys = prefixRange(LiteralStringRef("TagPop/"));

@ -291,7 +292,7 @@ struct TLogData : NonCopyable {
 			  concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS),
 			  ignorePopRequest(false), ignorePopDeadline(), ignorePopUid(), dataFolder(folder), toBePopped()
 		{
-			cx = openDBOnServer(dbInfo, TaskDefaultEndpoint, true, true);
+			cx = openDBOnServer(dbInfo, TaskPriority::DefaultEndpoint, true, true);
 		}
 };

@ -317,7 +318,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
 		}

 		// Erase messages not needed to update *from* versions >= before (thus, messages with toversion <= before)
-		ACTOR Future<Void> eraseMessagesBefore( TagData *self, Version before, TLogData *tlogData, Reference<LogData> logData, int taskID ) {
+		ACTOR Future<Void> eraseMessagesBefore( TagData *self, Version before, TLogData *tlogData, Reference<LogData> logData, TaskPriority taskID ) {
 			while(!self->versionMessages.empty() && self->versionMessages.front().first < before) {
 				Version version = self->versionMessages.front().first;
 				std::pair<int,int> &sizes = logData->version_sizes[version];
@ -327,7 +328,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
 					auto const& m = self->versionMessages.front();
 					++messagesErased;

-					if(self->tag != txsTag) {
+					if(self->tag.locality != tagLocalityTxs && self->tag != txsTag) {
 						sizes.first -= m.second.expectedSize();
 					} else {
 						sizes.second -= m.second.expectedSize();
@ -346,7 +347,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
 			return Void();
 		}

-		Future<Void> eraseMessagesBefore(Version before, TLogData *tlogData, Reference<LogData> logData, int taskID) {
+		Future<Void> eraseMessagesBefore(Version before, TLogData *tlogData, Reference<LogData> logData, TaskPriority taskID) {
 			return eraseMessagesBefore(this, before, tlogData, logData, taskID);
 		}
 	};
@ -378,7 +379,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
 	Version knownCommittedVersion, durableKnownCommittedVersion, minKnownCommittedVersion;

 	struct PeekTrackerData {
-		std::map<int, Promise<Version>> sequence_version;
+		std::map<int, Promise<std::pair<Version, bool>>> sequence_version;
 		double lastUpdate;
 	};

@ -401,7 +402,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {

 	//only callable after getTagData returns a null reference
 	Reference<TagData> createTagData(Tag tag, Version popped, bool nothingPersistent, bool poppedRecently, bool unpoppedRecovered) {
-		if(tag.locality != tagLocalityLogRouter && allTags.size() && !allTags.count(tag) && popped <= recoveredAt) {
+		if(tag.locality != tagLocalityLogRouter && tag.locality != tagLocalityTxs && allTags.size() && !allTags.count(tag) && popped <= recoveredAt) {
 			popped = recoveredAt + 1;
 		}
 		Reference<TagData> newTagData = Reference<TagData>( new TagData(tag, popped, nothingPersistent, poppedRecently, unpoppedRecovered) );
@ -434,9 +435,10 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
 	Future<Void> terminated;
 	FlowLock execOpLock;
 	bool execOpCommitInProgress;
+	int txsTags;

-	explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, UID recruitmentID, std::vector<Tag> tags) : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()),
-			cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), recruitmentID(recruitmentID),
+	explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, int txsTags, UID recruitmentID, std::vector<Tag> tags) : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()),
+			cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), txsTags(txsTags), recruitmentID(recruitmentID),
 			logSystem(new AsyncVar<Reference<ILogSystem>>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()),
 			// These are initialized differently on init() or recovery
 			recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0),
@ -483,6 +485,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
 			tLogData->persistentData->clear( singleKeyRange(logIdKey.withPrefix(persistKnownCommittedVersionKeys.begin)) );
 			tLogData->persistentData->clear( singleKeyRange(logIdKey.withPrefix(persistLocalityKeys.begin)) );
 			tLogData->persistentData->clear( singleKeyRange(logIdKey.withPrefix(persistLogRouterTagsKeys.begin)) );
+			tLogData->persistentData->clear( singleKeyRange(logIdKey.withPrefix(persistTxsTagsKeys.begin)) );
 			tLogData->persistentData->clear( singleKeyRange(logIdKey.withPrefix(persistRecoveryCountKeys.begin)) );
 			Key msgKey = logIdKey.withPrefix(persistTagMessagesKeys.begin);
 			tLogData->persistentData->clear( KeyRangeRef( msgKey, strinc(msgKey) ) );
@ -616,14 +619,14 @@ ACTOR Future<Void> updatePersistentData( TLogData* self, Reference<LogData> logD

 					self->persistentData->set( KeyValueRef( persistTagMessagesKey( logData->logId, tagData->tag, currentVersion ), wr.toValue() ) );

-					Future<Void> f = yield(TaskUpdateStorage);
+					Future<Void> f = yield(TaskPriority::UpdateStorage);
 					if(!f.isReady()) {
 						wait(f);
 						msg = std::upper_bound(tagData->versionMessages.begin(), tagData->versionMessages.end(), std::make_pair(currentVersion, LengthPrefixedStringRef()), CompareFirst<std::pair<Version, LengthPrefixedStringRef>>());
 					}
 				}

-				wait(yield(TaskUpdateStorage));
+				wait(yield(TaskPriority::UpdateStorage));
 			}
 		}
 	}
@ -633,7 +636,7 @@ ACTOR Future<Void> updatePersistentData( TLogData* self, Reference<LogData> logD
 	logData->persistentDataVersion = newPersistentDataVersion;

 	wait( self->persistentData->commit() ); // SOMEDAY: This seems to be running pretty often, should we slow it down???
-	wait( delay(0, TaskUpdateStorage) );
+	wait( delay(0, TaskPriority::UpdateStorage) );

 	// Now that the changes we made to persistentData are durable, erase the data we moved from memory and the queue, increase bytesDurable accordingly, and update persistentDataDurableVersion.

@ -643,22 +646,22 @@ ACTOR Future<Void> updatePersistentData( TLogData* self, Reference<LogData> logD
 	for(tagLocality = 0; tagLocality < logData->tag_data.size(); tagLocality++) {
 		for(tagId = 0; tagId < logData->tag_data[tagLocality].size(); tagId++) {
 			if(logData->tag_data[tagLocality][tagId]) {
-				wait(logData->tag_data[tagLocality][tagId]->eraseMessagesBefore( newPersistentDataVersion+1, self, logData, TaskUpdateStorage ));
-				wait(yield(TaskUpdateStorage));
+				wait(logData->tag_data[tagLocality][tagId]->eraseMessagesBefore( newPersistentDataVersion+1, self, logData, TaskPriority::UpdateStorage ));
+				wait(yield(TaskPriority::UpdateStorage));
 			}
 		}
 	}

 	logData->version_sizes.erase(logData->version_sizes.begin(), logData->version_sizes.lower_bound(logData->persistentDataDurableVersion));

-	wait(yield(TaskUpdateStorage));
+	wait(yield(TaskPriority::UpdateStorage));

 	while(!logData->messageBlocks.empty() && logData->messageBlocks.front().first <= newPersistentDataVersion) {
 		int64_t bytesErased = int64_t(logData->messageBlocks.front().second.size()) * SERVER_KNOBS->TLOG_MESSAGE_BLOCK_OVERHEAD_FACTOR;
 		logData->bytesDurable += bytesErased;
 		self->bytesDurable += bytesErased;
 		logData->messageBlocks.pop_front();
-		wait(yield(TaskUpdateStorage));
+		wait(yield(TaskPriority::UpdateStorage));
 	}

 	if(logData->bytesDurable.getValue() > logData->bytesInput.getValue() || self->bytesDurable > self->bytesInput) {
@ -683,7 +686,7 @@ ACTOR Future<Void> updateStorage( TLogData* self ) {
 	}

 	if(!self->queueOrder.size()) {
-		wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskUpdateStorage) );
+		wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskPriority::UpdateStorage) );
 		return Void();
 	}

@ -707,7 +710,7 @@ ACTOR Future<Void> updateStorage( TLogData* self ) {
 				}

 				wait( logData->queueCommittedVersion.whenAtLeast( nextVersion ) );
-				wait( delay(0, TaskUpdateStorage) );
+				wait( delay(0, TaskPriority::UpdateStorage) );

 				//TraceEvent("TlogUpdatePersist", self->dbgid).detail("LogId", logData->logId).detail("NextVersion", nextVersion).detail("Version", logData->version.get()).detail("PersistentDataDurableVer", logData->persistentDataDurableVersion).detail("QueueCommitVer", logData->queueCommittedVersion.get()).detail("PersistDataVer", logData->persistentDataVersion);
 				if (nextVersion > logData->persistentDataVersion) {
@ -716,7 +719,7 @@ ACTOR Future<Void> updateStorage( TLogData* self ) {
 					wait( updatePersistentData(self, logData, nextVersion) );
 					commitLockReleaser.release();
 				} else {
-					wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskUpdateStorage) );
+					wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskPriority::UpdateStorage) );
 				}

 				if( logData->removed.isReady() ) {
@ -727,9 +730,9 @@ ACTOR Future<Void> updateStorage( TLogData* self ) {
 			if(logData->persistentDataDurableVersion == logData->version.get()) {
 				self->queueOrder.pop_front();
 			}
-			wait( delay(0.0, TaskUpdateStorage) );
+			wait( delay(0.0, TaskPriority::UpdateStorage) );
 		} else {
-			wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskUpdateStorage) );
+			wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskPriority::UpdateStorage) );
 		}
 	}
 	else if(logData->initialized) {
@ -750,7 +753,7 @@ ACTOR Future<Void> updateStorage( TLogData* self ) {
 		//TraceEvent("UpdateStorageVer", logData->logId).detail("NextVersion", nextVersion).detail("PersistentDataVersion", logData->persistentDataVersion).detail("TotalSize", totalSize);

 		wait( logData->queueCommittedVersion.whenAtLeast( nextVersion ) );
-		wait( delay(0, TaskUpdateStorage) );
+		wait( delay(0, TaskPriority::UpdateStorage) );

 		if (nextVersion > logData->persistentDataVersion) {
 			wait( self->persistentDataCommitLock.take() );
@ -760,21 +763,21 @@ ACTOR Future<Void> updateStorage( TLogData* self ) {
 		}

 		if( totalSize < SERVER_KNOBS->UPDATE_STORAGE_BYTE_LIMIT ) {
-			wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskUpdateStorage) );
+			wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskPriority::UpdateStorage) );
 		}
 		else {
 			//recovery wants to commit to persistant data when updatePersistentData is not active, this delay ensures that immediately after
 			//updatePersist returns another one has not been started yet.
-			wait( delay(0.0, TaskUpdateStorage) );
+			wait( delay(0.0, TaskPriority::UpdateStorage) );
 		}
 	} else {
-		wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskUpdateStorage) );
+		wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskPriority::UpdateStorage) );
 	}
 	return Void();
 }

 ACTOR Future<Void> updateStorageLoop( TLogData* self ) {
-	wait(delay(0, TaskUpdateStorage));
+	wait(delay(0, TaskPriority::UpdateStorage));

 	loop {
 		wait( updateStorage(self) );
@ -823,7 +826,7 @@ void commitMessages( TLogData* self, Reference<LogData> logData, Version version
 		block.append(block.arena(), msg.message.begin(), msg.message.size());
 		for(auto tag : msg.tags) {
 			if(logData->locality == tagLocalitySatellite) {
-				if(!(tag == txsTag || tag.locality == tagLocalityLogRouter)) {
+				if(!(tag.locality == tagLocalityTxs || tag.locality == tagLocalityLogRouter || tag == txsTag)) {
 					continue;
 				}
 			} else if(!(logData->locality == tagLocalitySpecial || logData->locality == tag.locality || tag.locality < 0)) {
@ -836,6 +839,9 @@ void commitMessages( TLogData* self, Reference<LogData> logData, Version version
 				}
 				tag.id = tag.id % logData->logRouterTags;
 			}
+			if(tag.locality == tagLocalityTxs) {
+				tag.id = tag.id % logData->txsTags;
+			}
 			Reference<LogData::TagData> tagData = logData->getTagData(tag);
 			if(!tagData) {
 				tagData = logData->createTagData(tag, 0, true, true, false);
@ -846,7 +852,7 @@ void commitMessages( TLogData* self, Reference<LogData> logData, Version version
 				if(tagData->versionMessages.back().second.expectedSize() > SERVER_KNOBS->MAX_MESSAGE_SIZE) {
 					TraceEvent(SevWarnAlways, "LargeMessage").detail("Size", tagData->versionMessages.back().second.expectedSize());
 				}
-				if (tag != txsTag) {
+				if (tag.locality != tagLocalityTxs && tag != txsTag) {
 					expectedBytes += tagData->versionMessages.back().second.expectedSize();
 				} else {
 					txsBytes += tagData->versionMessages.back().second.expectedSize();
@ -914,7 +920,7 @@ std::deque<std::pair<Version, LengthPrefixedStringRef>> & getVersionMessages( Re
 };

 ACTOR Future<Void> tLogPopCore( TLogData* self, Tag inputTag, Version to, Reference<LogData> logData ) {
-	if (self->ignorePopRequest && inputTag != txsTag) {
+	if (self->ignorePopRequest && inputTag.locality != tagLocalityTxs && inputTag != txsTag) {
 		TraceEvent("IgnoringPopRequest").detail("IgnorePopDeadline", self->ignorePopDeadline);

 		if (self->toBePopped.find(inputTag) == self->toBePopped.end()
@ -952,7 +958,7 @@ ACTOR Future<Void> tLogPopCore( TLogData* self, Tag inputTag, Version to, Refere
 		}

 		if (upTo > logData->persistentDataDurableVersion)
-			wait(tagData->eraseMessagesBefore(upTo, self, logData, TaskTLogPop));
+			wait(tagData->eraseMessagesBefore(upTo, self, logData, TaskPriority::TLogPop));
 		//TraceEvent("TLogPop", self->dbgid).detail("Tag", tag.toString()).detail("To", upTo);
 	}
 	return Void();
@ -1025,9 +1031,9 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
 			sequence = req.sequence.get().second;
 			auto& trackerData = logData->peekTracker[peekId];
 			if (sequence == 0) {
-				Promise<Version> firstRequest;
+				Promise<std::pair<Version, bool>> firstRequest;
 				trackerData.sequence_version[0] = firstRequest;
-				firstRequest.send(req.begin);
+				firstRequest.send(std::make_pair(req.begin, req.onlySpilled));
 			}
 			auto seqBegin = trackerData.sequence_version.begin();
 			while(trackerData.sequence_version.size() && seqBegin->first <= sequence - SERVER_KNOBS->PARALLEL_GET_MORE_REQUESTS) {
@ -1038,7 +1044,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
 				seqBegin = trackerData.sequence_version.begin();
 			}
 			if (seqBegin != trackerData.sequence_version.end() && !seqBegin->second.isSet()) {
-				seqBegin->second.send(req.begin);
+				seqBegin->second.send(std::make_pair(req.begin, req.onlySpilled));
 			}

 			if(trackerData.sequence_version.size() && sequence < seqBegin->first) {
@ -1046,8 +1052,9 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
 			}

 			trackerData.lastUpdate = now();
-			Version ver = wait(trackerData.sequence_version[sequence].getFuture());
-			req.begin = ver;
+			std::pair<Version, bool> prevPeekData = wait(trackerData.sequence_version[sequence].getFuture());
+			req.begin = prevPeekData.first;
+			req.onlySpilled = prevPeekData.second;
 			wait(yield());
 		} catch( Error &e ) {
 			if(e.code() == error_code_timed_out) {
@ -1065,7 +1072,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
 			auto& trackerData = logData->peekTracker[peekId];
 			auto& sequenceData = trackerData.sequence_version[sequence+1];
 			if (!sequenceData.isSet()) {
-				sequenceData.send(req.begin);
+				sequenceData.send(std::make_pair(req.begin, req.onlySpilled));
 			}
 		}
 		return Void();
@ -1081,16 +1088,16 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
 	if( req.tag.locality == tagLocalityLogRouter ) {
 		wait( self->concurrentLogRouterReads.take() );
 		state FlowLock::Releaser globalReleaser(self->concurrentLogRouterReads);
-		wait( delay(0.0, TaskLowPriority) );
+		wait( delay(0.0, TaskPriority::Low) );
 	}

-	if( req.begin <= logData->persistentDataDurableVersion && req.tag != txsTag) {
+	if( req.begin <= logData->persistentDataDurableVersion && req.tag.locality != tagLocalityTxs && req.tag != txsTag) {
 		// Reading spilled data will almost always imply that the storage server is >5s behind the rest
 		// of the cluster.  We shouldn't prioritize spending CPU on helping this server catch up
 		// slightly faster over keeping the rest of the cluster operating normally.
 		// txsTag is only ever peeked on recovery, and we would still wish to prioritize requests
 		// that impact recovery duration.
-		wait(delay(0, TaskTLogSpilledPeekReply));
+		wait(delay(0, TaskPriority::TLogSpilledPeekReply));
 	}

 	Version poppedVer = poppedVersion(logData, req.tag);
@ -1113,13 +1120,13 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
 				return Void();
 			}
 			if(sequenceData.isSet()) {
-				if(sequenceData.getFuture().get() != rep.end) {
+				if(sequenceData.getFuture().get().first != rep.end) {
 					TEST(true); //tlog peek second attempt ended at a different version
 					req.reply.sendError(timed_out());
 					return Void();
 				}
 			} else {
-				sequenceData.send(rep.end);
+				sequenceData.send(std::make_pair(rep.end, rep.onlySpilled));
 			}
 			rep.begin = req.begin;
 		}
@ -1189,13 +1196,13 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
 			return Void();
 		}
 		if(sequenceData.isSet()) {
-			if(sequenceData.getFuture().get() != reply.end) {
+			if(sequenceData.getFuture().get().first != reply.end) {
 				TEST(true); //tlog peek second attempt ended at a different version
 				req.reply.sendError(timed_out());
 				return Void();
 			}
 		} else {
-			sequenceData.send(reply.end);
+			sequenceData.send(std::make_pair(reply.end, reply.onlySpilled));
 		}
 		reply.begin = req.begin;
 	}
@ -1208,7 +1215,7 @@ ACTOR Future<Void> watchDegraded(TLogData* self) {
 	//This delay is divided into multiple delays to avoid marking the tlog as degraded because of a single SlowTask
 	state int loopCount = 0;
 	while(loopCount < SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT) {
-		wait(delay(SERVER_KNOBS->TLOG_DEGRADED_DURATION/SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT, TaskLowPriority));
+		wait(delay(SERVER_KNOBS->TLOG_DEGRADED_DURATION/SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT, TaskPriority::Low));
 		loopCount++;
 	}
 	TraceEvent(SevWarnAlways, "TLogDegraded", self->dbgid);
@ -1338,7 +1345,7 @@ void execProcessingHelper(TLogData* self,
 		rd >> messageLength >> sub >> tagCount;
 		for (int i = 0; i < tagCount; i++) {
 			rd >> tmpTag;
-			if (tmpTag == txsTag) {
+			if (tmpTag.locality == tagLocalityTxs || tmpTag == txsTag) {
 				hasTxsTag = true;
 			}
 			execTags->push_back(execTags->arena(), tmpTag);
@ -1544,7 +1551,7 @@ ACTOR Future<Void> tLogCommit(
 				.detail("PersistentDataDurableVersion", logData->persistentDataDurableVersion);
 			waitStartT = now();
 		}
-		wait( delayJittered(.005, TaskTLogCommit) );
+		wait( delayJittered(.005, TaskPriority::TLogCommit) );
 	}

 	// while exec op is being committed, no new transactions will be admitted.
@ -1667,6 +1674,7 @@ ACTOR Future<Void> initPersistentState( TLogData* self, Reference<LogData> logDa
 	storage->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistKnownCommittedVersionKeys.begin), BinaryWriter::toValue(logData->knownCommittedVersion, Unversioned()) ) );
 	storage->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistLocalityKeys.begin), BinaryWriter::toValue(logData->locality, Unversioned()) ) );
 	storage->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistLogRouterTagsKeys.begin), BinaryWriter::toValue(logData->logRouterTags, Unversioned()) ) );
+	storage->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistTxsTagsKeys.begin), BinaryWriter::toValue(logData->txsTags, Unversioned()) ) );
 	storage->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistRecoveryCountKeys.begin), BinaryWriter::toValue(logData->recoveryCount, Unversioned()) ) );

 	for(auto tag : logData->allTags) {
@ -1884,7 +1892,7 @@ ACTOR Future<Void> pullAsyncData( TLogData* self, Reference<LogData> logData, st
 	while (!endVersion.present() || logData->version.get() < endVersion.get()) {
 		loop {
 			choose {
-				when(wait( r ? r->getMore(TaskTLogCommit) : Never() ) ) {
+				when(wait( r ? r->getMore(TaskPriority::TLogCommit) : Never() ) ) {
 					break;
 				}
 				when( wait( dbInfoChange ) ) {
@ -1907,7 +1915,7 @@ ACTOR Future<Void> pullAsyncData( TLogData* self, Reference<LogData> logData, st
 					.detail("PersistentDataDurableVersion", logData->persistentDataDurableVersion);
 				waitStartT = now();
 			}
-			wait( delayJittered(.005, TaskTLogCommit) );
+			wait( delayJittered(.005, TaskPriority::TLogCommit) );
 		}

 		state Version ver = 0;
@ -1947,7 +1955,7 @@ ACTOR Future<Void> pullAsyncData( TLogData* self, Reference<LogData> logData, st

 					// Notifies the commitQueue actor to commit persistentQueue, and also unblocks tLogPeekMessages actors
 					logData->version.set( ver );
-					wait( yield(TaskTLogCommit) );
+					wait( yield(TaskPriority::TLogCommit) );
 				}
 				lastVer = ver;
 				ver = r->version().version;
@ -1984,7 +1992,7 @@ ACTOR Future<Void> pullAsyncData( TLogData* self, Reference<LogData> logData, st

 						// Notifies the commitQueue actor to commit persistentQueue, and also unblocks tLogPeekMessages actors
 						logData->version.set( ver );
-						wait( yield(TaskTLogCommit) );
+						wait( yield(TaskPriority::TLogCommit) );
 					}
 					break;
 				}
@ -2075,12 +2083,13 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
 	state Future<Standalone<VectorRef<KeyValueRef>>> fKnownCommitted = storage->readRange(persistKnownCommittedVersionKeys);
 	state Future<Standalone<VectorRef<KeyValueRef>>> fLocality = storage->readRange(persistLocalityKeys);
 	state Future<Standalone<VectorRef<KeyValueRef>>> fLogRouterTags = storage->readRange(persistLogRouterTagsKeys);
+	state Future<Standalone<VectorRef<KeyValueRef>>> fTxsTags = storage->readRange(persistTxsTagsKeys);
 	state Future<Standalone<VectorRef<KeyValueRef>>> fRecoverCounts = storage->readRange(persistRecoveryCountKeys);

 	// FIXME: metadata in queue?

 	wait( waitForAll( (vector<Future<Optional<Value>>>(), fFormat ) ) );
-	wait( waitForAll( (vector<Future<Standalone<VectorRef<KeyValueRef>>>>(), fVers, fKnownCommitted, fLocality, fLogRouterTags, fRecoverCounts) ) );
+	wait( waitForAll( (vector<Future<Standalone<VectorRef<KeyValueRef>>>>(), fVers, fKnownCommitted, fLocality, fLogRouterTags, fTxsTags, fRecoverCounts) ) );

 	if (fFormat.get().present() && !persistFormatReadableRange.contains( fFormat.get().get() )) {
 		//FIXME: remove when we no longer need to test upgrades from 4.X releases
@ -2132,6 +2141,11 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
 		id_logRouterTags[ BinaryReader::fromStringRef<UID>(it.key.removePrefix(persistLogRouterTagsKeys.begin), Unversioned())] = BinaryReader::fromStringRef<int>( it.value, Unversioned() );
 	}

+	state std::map<UID, int> id_txsTags;
+	for(auto it : fTxsTags.get()) {
+		id_txsTags[ BinaryReader::fromStringRef<UID>(it.key.removePrefix(persistTxsTagsKeys.begin), Unversioned())] = BinaryReader::fromStringRef<int>( it.value, Unversioned() );
+	}
+
 	state std::map<UID, Version> id_knownCommitted;
 	for(auto it : fKnownCommitted.get()) {
 		id_knownCommitted[ BinaryReader::fromStringRef<UID>(it.key.removePrefix(persistKnownCommittedVersionKeys.begin), Unversioned())] = BinaryReader::fromStringRef<Version>( it.value, Unversioned() );
@ -2157,7 +2171,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
 		DUMPTOKEN( recruited.confirmRunning );

 		//We do not need the remoteTag, because we will not be loading any additional data
-		logData = Reference<LogData>( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], UID(), std::vector<Tag>()) );
+		logData = Reference<LogData>( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], id_txsTags[id1], UID(), std::vector<Tag>()) );
 		logData->locality = id_locality[id1];
 		logData->stopped = true;
 		self->id_data[id1] = logData;
@ -2340,7 +2354,7 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
 		it.second->stopCommit.trigger();
 	}

-	state Reference<LogData> logData = Reference<LogData>( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.recruitmentID, req.allTags) );
+	state Reference<LogData> logData = Reference<LogData>( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, req.allTags) );
 	self->id_data[recruited.id()] = logData;
 	logData->locality = req.locality;
 	logData->recoveryCount = req.epoch;
--- a/fdbserver/Orderer.actor.h
+++ b/fdbserver/Orderer.actor.h
@ -38,7 +38,7 @@ public:
 		ready = NotifiedVersion(s);
 		started = false;
 	}
-	Future<bool> order( Seq s, int taskID = TaskDefaultYield ) {
+	Future<bool> order( Seq s, TaskPriority taskID = TaskPriority::DefaultYield ) {
 		if ( ready.get() < s )
 			return waitAndOrder( this, s, taskID );
 		else
@ -54,7 +54,7 @@ public:
 		return ready.whenAtLeast(v);
 	}
 private:
-	ACTOR static Future<bool> waitAndOrder( Orderer<Seq>* self, Seq s, int taskID ) {
+	ACTOR static Future<bool> waitAndOrder( Orderer<Seq>* self, Seq s, TaskPriority taskID ) {
 		wait( self->ready.whenAtLeast(s) );
 		wait( yield( taskID ) || self->shutdown.getFuture() );
 		return self->dedup(s);
--- a/fdbserver/QuietDatabase.actor.cpp
+++ b/fdbserver/QuietDatabase.actor.cpp
@ -291,6 +291,15 @@ ACTOR Future<bool> getTeamCollectionValid(Database cx, WorkerInterface dataDistr
 			int64_t desiredMachineTeamNumber = boost::lexical_cast<int64_t>(teamCollectionInfoMessage.getValue("DesiredMachineTeams"));
 			int64_t maxMachineTeamNumber = boost::lexical_cast<int64_t>(teamCollectionInfoMessage.getValue("MaxMachineTeams"));

+			int64_t minServerTeamOnServer =
+			    boost::lexical_cast<int64_t>(teamCollectionInfoMessage.getValue("MinTeamNumberOnServer"));
+			int64_t maxServerTeamOnServer =
+			    boost::lexical_cast<int64_t>(teamCollectionInfoMessage.getValue("MaxTeamNumberOnServer"));
+			int64_t minMachineTeamOnMachine =
+			    boost::lexical_cast<int64_t>(teamCollectionInfoMessage.getValue("MinMachineTeamNumberOnMachine"));
+			int64_t maxMachineTeamOnMachine =
+			    boost::lexical_cast<int64_t>(teamCollectionInfoMessage.getValue("MaxMachineTeamNumberOnMachine"));
+
 			// Team number is always valid when we disable teamRemover. This avoids false positive in simulation test
 			if (SERVER_KNOBS->TR_FLAG_DISABLE_TEAM_REMOVER) {
 				TraceEvent("GetTeamCollectionValid")
@ -300,7 +309,10 @@ ACTOR Future<bool> getTeamCollectionValid(Database cx, WorkerInterface dataDistr

 			// The if condition should be consistent with the condition in teamRemover() that decides
 			// if redundant teams exist.
-			if (healthyMachineTeamCount > desiredMachineTeamNumber) {
+			if (healthyMachineTeamCount > desiredMachineTeamNumber ||
+			    (minMachineTeamOnMachine <= 0 && SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER == 3)) {
+				// When DESIRED_TEAMS_PER_SERVER == 1, we see minMachineTeamOnMachine can be 0 in one out of 30k test
+				// cases. Only check DESIRED_TEAMS_PER_SERVER == 3 for now since it is mostly used configuration.
 				TraceEvent("GetTeamCollectionValid")
 				    .detail("CurrentTeamNumber", currentTeamNumber)
 				    .detail("DesiredTeamNumber", desiredTeamNumber)
@ -308,7 +320,13 @@ ACTOR Future<bool> getTeamCollectionValid(Database cx, WorkerInterface dataDistr
 				    .detail("CurrentHealthyMachineTeamNumber", healthyMachineTeamCount)
 				    .detail("DesiredMachineTeams", desiredMachineTeamNumber)
 				    .detail("CurrentMachineTeamNumber", currentMachineTeamNumber)
-				    .detail("MaxMachineTeams", maxMachineTeamNumber);
+				    .detail("MaxMachineTeams", maxMachineTeamNumber)
+				    .detail("MinTeamNumberOnServer", minServerTeamOnServer)
+				    .detail("MaxTeamNumberOnServer", maxServerTeamOnServer)
+				    .detail("MinMachineTeamNumberOnMachine", minMachineTeamOnMachine)
+				    .detail("MaxMachineTeamNumberOnMachine", maxMachineTeamOnMachine)
+				    .detail("DesiredTeamsPerServer", SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER)
+				    .detail("MaxTeamsPerServer", SERVER_KNOBS->MAX_TEAMS_PER_SERVER);
 				return false;
 			} else {
 				return true;
--- a/fdbserver/Ratekeeper.actor.cpp
+++ b/fdbserver/Ratekeeper.actor.cpp
@ -41,6 +41,7 @@ enum limitReason_t {
 	storage_server_min_free_space_ratio,  // a storage server's normal limits are being reduced by a low free space ratio
 	log_server_min_free_space,
 	log_server_min_free_space_ratio,
+	storage_server_durability_lag,
 	limitReason_t_end
 };

@ -56,7 +57,8 @@ const char* limitReasonName[] = {
 	"storage_server_min_free_space",
 	"storage_server_min_free_space_ratio",
 	"log_server_min_free_space",
-	"log_server_min_free_space_ratio"
+	"log_server_min_free_space_ratio",
+	"storage_server_durability_lag"
 };
 static_assert(sizeof(limitReasonName) / sizeof(limitReasonName[0]) == limitReason_t_end, "limitReasonDesc table size");

@ -72,7 +74,8 @@ const char* limitReasonDesc[] = {
 	"Storage server running out of space (approaching 100MB limit).",
 	"Storage server running out of space (approaching 5% limit).",
 	"Log server running out of space (approaching 100MB limit).",
-	"Log server running out of space (approaching 5% limit)."
+	"Log server running out of space (approaching 5% limit).",
+	"Storage server durable version falling behind."
 };

 static_assert(sizeof(limitReasonDesc) / sizeof(limitReasonDesc[0]) == limitReason_t_end, "limitReasonDesc table size");
@ -84,14 +87,14 @@ struct StorageQueueInfo {
 	StorageQueuingMetricsReply lastReply;
 	StorageQueuingMetricsReply prevReply;
 	Smoother smoothDurableBytes, smoothInputBytes, verySmoothDurableBytes;
-	Smoother smoothDurableVersion, smoothLatestVersion;
+	Smoother verySmoothDurableVersion, smoothLatestVersion;
 	Smoother smoothFreeSpace;
 	Smoother smoothTotalSpace;
 	double localRateLimit;
 	limitReason_t limitReason;
 	StorageQueueInfo(UID id, LocalityData locality) : valid(false), id(id), locality(locality), smoothDurableBytes(SERVER_KNOBS->SMOOTHING_AMOUNT),
 		smoothInputBytes(SERVER_KNOBS->SMOOTHING_AMOUNT), verySmoothDurableBytes(SERVER_KNOBS->SLOW_SMOOTHING_AMOUNT),
-		smoothDurableVersion(1.), smoothLatestVersion(1.), smoothFreeSpace(SERVER_KNOBS->SMOOTHING_AMOUNT),
+		verySmoothDurableVersion(SERVER_KNOBS->SLOW_SMOOTHING_AMOUNT), smoothLatestVersion(SERVER_KNOBS->SMOOTHING_AMOUNT), smoothFreeSpace(SERVER_KNOBS->SMOOTHING_AMOUNT),
 		smoothTotalSpace(SERVER_KNOBS->SMOOTHING_AMOUNT)
 	{
 		// FIXME: this is a tacky workaround for a potential uninitialized use in trackStorageServerQueueInfo
@ -126,9 +129,13 @@ struct RatekeeperLimits {
 	int64_t logSpringBytes;
 	double maxVersionDifference;

+	int64_t durabilityLagTargetVersions;
+	int64_t lastDurabilityLag;
+	double durabilityLagLimit;
+
 	std::string context;

-	RatekeeperLimits(std::string context, int64_t storageTargetBytes, int64_t storageSpringBytes, int64_t logTargetBytes, int64_t logSpringBytes, double maxVersionDifference) :
+	RatekeeperLimits(std::string context, int64_t storageTargetBytes, int64_t storageSpringBytes, int64_t logTargetBytes, int64_t logSpringBytes, double maxVersionDifference, int64_t durabilityLagTargetVersions) :
 		tpsLimit(std::numeric_limits<double>::infinity()),
 		tpsLimitMetric(StringRef("Ratekeeper.TPSLimit" + context)),
 		reasonMetric(StringRef("Ratekeeper.Reason" + context)),
@ -137,6 +144,9 @@ struct RatekeeperLimits {
 		logTargetBytes(logTargetBytes),
 		logSpringBytes(logSpringBytes),
 		maxVersionDifference(maxVersionDifference),
+		durabilityLagTargetVersions(durabilityLagTargetVersions),
+		durabilityLagLimit(std::numeric_limits<double>::infinity()),
+		lastDurabilityLag(0),
 		context(context)
 	{}
 };
@ -167,11 +177,13 @@ struct RatekeeperData {
 	RatekeeperLimits normalLimits;
 	RatekeeperLimits batchLimits;

+	Deque<double> actualTpsHistory;
+
 	RatekeeperData() : smoothReleasedTransactions(SERVER_KNOBS->SMOOTHING_AMOUNT), smoothBatchReleasedTransactions(SERVER_KNOBS->SMOOTHING_AMOUNT), smoothTotalDurableBytes(SERVER_KNOBS->SLOW_SMOOTHING_AMOUNT), 
 		actualTpsMetric(LiteralStringRef("Ratekeeper.ActualTPS")),
 		lastWarning(0),
-		normalLimits("", SERVER_KNOBS->TARGET_BYTES_PER_STORAGE_SERVER, SERVER_KNOBS->SPRING_BYTES_STORAGE_SERVER, SERVER_KNOBS->TARGET_BYTES_PER_TLOG, SERVER_KNOBS->SPRING_BYTES_TLOG, SERVER_KNOBS->MAX_TL_SS_VERSION_DIFFERENCE),
-		batchLimits("Batch", SERVER_KNOBS->TARGET_BYTES_PER_STORAGE_SERVER_BATCH, SERVER_KNOBS->SPRING_BYTES_STORAGE_SERVER_BATCH, SERVER_KNOBS->TARGET_BYTES_PER_TLOG_BATCH, SERVER_KNOBS->SPRING_BYTES_TLOG_BATCH, SERVER_KNOBS->MAX_TL_SS_VERSION_DIFFERENCE_BATCH)
+		normalLimits("", SERVER_KNOBS->TARGET_BYTES_PER_STORAGE_SERVER, SERVER_KNOBS->SPRING_BYTES_STORAGE_SERVER, SERVER_KNOBS->TARGET_BYTES_PER_TLOG, SERVER_KNOBS->SPRING_BYTES_TLOG, SERVER_KNOBS->MAX_TL_SS_VERSION_DIFFERENCE, SERVER_KNOBS->TARGET_DURABILITY_LAG_VERSIONS),
+		batchLimits("Batch", SERVER_KNOBS->TARGET_BYTES_PER_STORAGE_SERVER_BATCH, SERVER_KNOBS->SPRING_BYTES_STORAGE_SERVER_BATCH, SERVER_KNOBS->TARGET_BYTES_PER_TLOG_BATCH, SERVER_KNOBS->SPRING_BYTES_TLOG_BATCH, SERVER_KNOBS->MAX_TL_SS_VERSION_DIFFERENCE_BATCH, SERVER_KNOBS->TARGET_DURABILITY_LAG_VERSIONS_BATCH)
 	{}
 };

@ -194,7 +206,7 @@ ACTOR Future<Void> trackStorageServerQueueInfo( RatekeeperData* self, StorageSer
 					myQueueInfo->value.smoothInputBytes.reset(reply.get().bytesInput);
 					myQueueInfo->value.smoothFreeSpace.reset(reply.get().storageBytes.available);
 					myQueueInfo->value.smoothTotalSpace.reset(reply.get().storageBytes.total);
-					myQueueInfo->value.smoothDurableVersion.reset(reply.get().durableVersion);
+					myQueueInfo->value.verySmoothDurableVersion.reset(reply.get().durableVersion);
 					myQueueInfo->value.smoothLatestVersion.reset(reply.get().version);
 				} else {
 					self->smoothTotalDurableBytes.addDelta( reply.get().bytesDurable - myQueueInfo->value.prevReply.bytesDurable );
@ -203,7 +215,7 @@ ACTOR Future<Void> trackStorageServerQueueInfo( RatekeeperData* self, StorageSer
 					myQueueInfo->value.smoothInputBytes.setTotal( reply.get().bytesInput );
 					myQueueInfo->value.smoothFreeSpace.setTotal( reply.get().storageBytes.available );
 					myQueueInfo->value.smoothTotalSpace.setTotal( reply.get().storageBytes.total );
-					myQueueInfo->value.smoothDurableVersion.setTotal(reply.get().durableVersion);
+					myQueueInfo->value.verySmoothDurableVersion.setTotal(reply.get().durableVersion);
 					myQueueInfo->value.smoothLatestVersion.setTotal(reply.get().version);
 				}
 			} else {
@ -297,7 +309,7 @@ ACTOR Future<Void> trackEachStorageServer(
 ACTOR Future<Void> monitorServerListChange(
 		Reference<AsyncVar<ServerDBInfo>> dbInfo,
 		PromiseStream< std::pair<UID, Optional<StorageServerInterface>> > serverChanges) {
-	state Database db = openDBOnServer(dbInfo, TaskRatekeeper, true, true);
+	state Database db = openDBOnServer(dbInfo, TaskPriority::Ratekeeper, true, true);
 	state std::map<UID, StorageServerInterface> oldServers;
 	state Transaction tr(db);

@ -341,6 +353,11 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
 	self->actualTpsMetric = (int64_t)actualTps;
 	// SOMEDAY: Remove the max( 1.0, ... ) since the below calculations _should_ be able to recover back up from this value
 	actualTps = std::max( std::max( 1.0, actualTps ), self->smoothTotalDurableBytes.smoothRate() / CLIENT_KNOBS->TRANSACTION_SIZE_LIMIT );
+	
+	if(self->actualTpsHistory.size() > SERVER_KNOBS->MAX_TPS_HISTORY_SAMPLES) {
+		self->actualTpsHistory.pop_front();
+	}
+	self->actualTpsHistory.push_back(actualTps);

 	limits->tpsLimit = std::numeric_limits<double>::infinity();
 	UID reasonID = UID();
@ -350,12 +367,14 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {

 	int64_t worstFreeSpaceStorageServer = std::numeric_limits<int64_t>::max();
 	int64_t worstStorageQueueStorageServer = 0;
-	int64_t worstStorageDurabilityLagStorageServer = 0;
 	int64_t limitingStorageQueueStorageServer = 0;
+	int64_t worstDurabilityLag = 0;
 	double worstStorageLocalLimit = 0;
 	double limitingStorageLocalLimit = 0;

 	std::multimap<double, StorageQueueInfo*> storageTpsLimitReverseIndex;
+	std::multimap<int64_t, StorageQueueInfo*> storageDurabilityLagReverseIndex;
+
 	std::map<UID, limitReason_t> ssReasons;

 	// Look at each storage server's write queue and local rate, compute and store the desired rate ratio
@ -384,8 +403,10 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
 		worstStorageQueueStorageServer = std::max(worstStorageQueueStorageServer, storageQueue);
 		worstStorageLocalLimit = std::min(worstStorageLocalLimit, ss.localRateLimit);

-		int64_t storageDurabilityLag = ss.smoothLatestVersion.smoothTotal() - ss.smoothDurableVersion.smoothTotal();
-		worstStorageDurabilityLagStorageServer = std::max(worstStorageDurabilityLagStorageServer, storageDurabilityLag);
+		int64_t storageDurabilityLag = ss.smoothLatestVersion.smoothTotal() - ss.verySmoothDurableVersion.smoothTotal();
+		worstDurabilityLag = std::max(worstDurabilityLag, storageDurabilityLag);
+
+		storageDurabilityLagReverseIndex.insert(std::make_pair(-1*storageDurabilityLag, &ss));

 		auto& ssMetrics = self->healthMetrics.storageStats[ss.id];
 		ssMetrics.storageQueue = storageQueue;
@ -446,9 +467,6 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
 		ssReasons[ss.id] = ssLimitReason;
 	}

-	self->healthMetrics.worstStorageQueue = worstStorageQueueStorageServer;
-	self->healthMetrics.worstStorageDurabilityLag = worstStorageDurabilityLagStorageServer;
-
 	std::set<Optional<Standalone<StringRef>>> ignoredMachines;
 	for (auto ss = storageTpsLimitReverseIndex.begin(); ss != storageTpsLimitReverseIndex.end() && ss->first < limits->tpsLimit; ++ss) {
 		if (ignoredMachines.size() < std::min(self->configuration.storageTeamSize - 1, SERVER_KNOBS->MAX_MACHINES_FALLING_BEHIND)) {
@ -468,6 +486,46 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
 		break;
 	}

+	int64_t limitingDurabilityLag = 0;
+
+	std::set<Optional<Standalone<StringRef>>> ignoredDurabilityLagMachines;
+	for (auto ss = storageDurabilityLagReverseIndex.begin(); ss != storageDurabilityLagReverseIndex.end(); ++ss) {
+		if (ignoredDurabilityLagMachines.size() < std::min(self->configuration.storageTeamSize - 1, SERVER_KNOBS->MAX_MACHINES_FALLING_BEHIND)) {
+			ignoredDurabilityLagMachines.insert(ss->second->locality.zoneId());
+			continue;
+		}
+		if (ignoredDurabilityLagMachines.count(ss->second->locality.zoneId()) > 0) {
+			continue;
+		}
+
+		limitingDurabilityLag = -1*ss->first;
+		if(limitingDurabilityLag > limits->durabilityLagTargetVersions && self->actualTpsHistory.size() > SERVER_KNOBS->NEEDED_TPS_HISTORY_SAMPLES) {
+			if(limits->durabilityLagLimit == std::numeric_limits<double>::infinity()) {
+				double maxTps = 0;
+				for(int i = 0; i < self->actualTpsHistory.size(); i++) {
+					maxTps = std::max(maxTps, self->actualTpsHistory[i]);
+				}
+				limits->durabilityLagLimit = SERVER_KNOBS->INITIAL_DURABILITY_LAG_MULTIPLIER*maxTps;
+			}
+			if( limitingDurabilityLag > limits->lastDurabilityLag ) {
+				limits->durabilityLagLimit = SERVER_KNOBS->DURABILITY_LAG_REDUCTION_RATE*limits->durabilityLagLimit;
+			}
+			if(limits->durabilityLagLimit < limits->tpsLimit) {
+				limits->tpsLimit = limits->durabilityLagLimit;
+				limitReason = limitReason_t::storage_server_durability_lag;
+			}
+		} else if(limits->durabilityLagLimit != std::numeric_limits<double>::infinity() && limitingDurabilityLag > limits->durabilityLagTargetVersions - SERVER_KNOBS->DURABILITY_LAG_UNLIMITED_THRESHOLD) {
+			limits->durabilityLagLimit = SERVER_KNOBS->DURABILITY_LAG_INCREASE_RATE*limits->durabilityLagLimit;
+		} else {
+			limits->durabilityLagLimit = std::numeric_limits<double>::infinity();
+		}
+		limits->lastDurabilityLag = limitingDurabilityLag;
+		break;
+	}
+
+	self->healthMetrics.worstStorageQueue = worstStorageQueueStorageServer;
+	self->healthMetrics.worstStorageDurabilityLag = worstDurabilityLag;
+
 	double writeToReadLatencyLimit = 0;
 	Version worstVersionLag = 0;
 	Version limitingVersionLag = 0;
@ -613,12 +671,14 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
 			.detail("TotalDiskUsageBytes", totalDiskUsageBytes)
 			.detail("WorstStorageServerVersionLag", worstVersionLag)
 			.detail("LimitingStorageServerVersionLag", limitingVersionLag)
+			.detail("WorstDurabilityLag", worstDurabilityLag)
+			.detail("LimitingDurabilityLag", limitingDurabilityLag)
 			.trackLatest(name.c_str());
 	}
 }

 ACTOR Future<Void> configurationMonitor(Reference<AsyncVar<ServerDBInfo>> dbInfo, DatabaseConfiguration* conf) {
-	state Database cx = openDBOnServer(dbInfo, TaskDefaultEndpoint, true, true);
+	state Database cx = openDBOnServer(dbInfo, TaskPriority::DefaultEndpoint, true, true);
 	loop {
 		state ReadYourWritesTransaction tr(cx);

@ -650,7 +710,7 @@ ACTOR Future<Void> ratekeeper(RatekeeperInterface rkInterf, Reference<AsyncVar<S
 	state Promise<Void> err;
 	state Future<Void> collection = actorCollection( self.addActor.getFuture() );

-	TraceEvent("Ratekeeper_Starting", rkInterf.id());
+	TraceEvent("RatekeeperStarting", rkInterf.id());
 	self.addActor.send( waitFailureServer(rkInterf.waitFailure.getFuture()) );
 	self.addActor.send( configurationMonitor(dbInfo, &self.configuration) );

@ -732,7 +792,7 @@ ACTOR Future<Void> ratekeeper(RatekeeperInterface rkInterf, Reference<AsyncVar<S
 		}
 	}
 	catch (Error& err) {
-		TraceEvent("Ratekeeper_Died", rkInterf.id()).error(err, true);
+		TraceEvent("RatekeeperDied", rkInterf.id()).error(err, true);
 	}
 	return Void();
 }
--- a/fdbserver/Resolver.actor.cpp
+++ b/fdbserver/Resolver.actor.cpp
@ -114,9 +114,9 @@ ACTOR Future<Void> resolveBatch(
 		}
 	}

-	if (check_yield(TaskDefaultEndpoint)) {
-		wait( delay( 0, TaskLowPriority ) || delay( SERVER_KNOBS->COMMIT_SLEEP_TIME ) );  // FIXME: Is this still right?
-		g_network->setCurrentTask(TaskDefaultEndpoint);
+	if (check_yield(TaskPriority::DefaultEndpoint)) {
+		wait( delay( 0, TaskPriority::Low ) || delay( SERVER_KNOBS->COMMIT_SLEEP_TIME ) );  // FIXME: Is this still right?
+		g_network->setCurrentTask(TaskPriority::DefaultEndpoint);
 	}

 	if (self->version.get() == req.prevVersion) {  // Not a duplicate (check relies on no waiting between here and self->version.set() below!)
--- a/fdbserver/ResolverInterface.h
+++ b/fdbserver/ResolverInterface.h
@ -44,8 +44,8 @@ struct ResolverInterface {
 	bool operator != ( ResolverInterface const& r ) const { return id() != r.id(); }
 	NetworkAddress address() const { return resolve.getEndpoint().getPrimaryAddress(); }
 	void initEndpoints() {
-		metrics.getEndpoint( TaskResolutionMetrics );
-		split.getEndpoint( TaskResolutionMetrics );
+		metrics.getEndpoint( TaskPriority::ResolutionMetrics );
+		split.getEndpoint( TaskPriority::ResolutionMetrics );
 	}

 	template <class Ar> 
--- a/fdbserver/Restore.actor.cpp
+++ b/fdbserver/Restore.actor.cpp
@ -24,7 +24,7 @@
 #include "flow/actorcompiler.h"  // This must be the last #include.

 ACTOR Future<Void> restoreWorker(Reference<ClusterConnectionFile> ccf, LocalityData locality) {
-	state Database cx = Database::createDatabase(ccf->getFilename(), Database::API_VERSION_LATEST,locality);
+	state Database cx = Database::createDatabase(ccf->getFilename(), Database::API_VERSION_LATEST, true, locality);
 	state RestoreInterface interf;
 	interf.initEndpoints();
 	state Optional<RestoreInterface> leaderInterf;
--- a/Show More
+++ b/Show More