Merge branch 'master' into track-run-loop-busyness

# Conflicts: # documentation/sphinx/source/release-notes.rst # flow/Net2.actor.cpp # flow/network.h
2019-07-09 18:39:23 -07:00 · 2019-07-09 18:39:23 -07:00 · 69d7c4f79c
parent 7e70fa7fcb 49121172ea
commit 69d7c4f79c
150 changed files with 6678 additions and 2873 deletions
--- a/bindings/bindingtester/bindingtester.py
+++ b/bindings/bindingtester/bindingtester.py
@ -68,6 +68,10 @@ class ResultSet(object):

        self.tester_results[name] = results

+    @staticmethod
+    def _min_tuple(t1, t2):
+        return t1 if fdb.tuple.compare(t1, t2) < 0 else t2
+
    def check_for_errors(self):
        if len(self.tester_results) == 1:
            return (0, False)
@ -97,7 +101,7 @@ class ResultSet(object):

            # If these results aren't using sequence numbers, then we match two results based on whether they share the same key
            else:
-                min_key = min([r.key(self.specification) for r in results.values()])
+                min_key = reduce(ResultSet._min_tuple, [r.key(self.specification) for r in results.values()])
                results = {i: r for i, r in results.items() if Result.tuples_match(r.key(self.specification), min_key)}

            # Increment the indices for those testers which produced a result in this iteration
--- a/bindings/c/CMakeLists.txt
+++ b/bindings/c/CMakeLists.txt
@ -75,6 +75,20 @@ if(NOT WIN32)
  target_link_libraries(mako PRIVATE fdb_c)
 endif()

+set(c_workloads_srcs
+  test/workloads/workloads.cpp
+  test/workloads/workloads.h
+  test/workloads/SimpleWorkload.cpp)
+
+if(OPEN_FOR_IDE)
+  add_library(c_workloads OBJECT ${c_workloads_srcs})
+else()
+  add_library(c_workloads SHARED ${c_workloads_srcs})
+endif()
+set_target_properties(c_workloads PROPERTIES
+  LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/share/foundationdb")
+target_link_libraries(c_workloads PUBLIC fdb_c)
+
 # TODO: re-enable once the old vcxproj-based build system is removed.
 #generate_export_header(fdb_c EXPORT_MACRO_NAME "DLLEXPORT"
 #  EXPORT_FILE_NAME ${CMAKE_CURRENT_BINARY_DIR}/foundationdb/fdb_c_export.h)
--- a/bindings/c/foundationdb/ClientWorkload.h
+++ b/bindings/c/foundationdb/ClientWorkload.h
@ -0,0 +1,104 @@
+/*
+ * ClientWorkload.h
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2019 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#ifndef CLIENT_WORKLOAD_H
+#define CLIENT_WORKLOAD_H
+#include <string>
+#include <vector>
+#include <functional>
+#include <memory>
+
+#ifndef DLLEXPORT
+#if defined(_MSC_VER)
+#define DLLEXPORT __declspec(dllexport)
+#elif defined(__GNUG__)
+#define DLLEXPORT __attribute__((visibility("default")))
+#else
+#error Missing symbol export
+#endif
+#endif
+
+typedef struct FDB_future FDBFuture;
+typedef struct FDB_database FDBDatabase;
+typedef struct FDB_transaction FDBTransaction;
+
+enum class FDBSeverity { Debug, Info, Warn, WarnAlways, Error };
+
+class FDBLogger {
+public:
+	virtual void trace(FDBSeverity sev, const std::string& name,
+	                   const std::vector<std::pair<std::string, std::string>>& details) = 0;
+};
+
+class FDBWorkloadContext : public FDBLogger {
+public:
+	virtual uint64_t getProcessID() const = 0;
+	virtual void setProcessID(uint64_t processID) = 0;
+	virtual double now() const = 0;
+	virtual uint32_t rnd() const = 0;
+	virtual bool getOption(const std::string& name, bool defaultValue) = 0;
+	virtual long getOption(const std::string& name, long defaultValue) = 0;
+	virtual unsigned long getOption(const std::string& name, unsigned long defaultValue) = 0;
+	virtual double getOption(const std::string& name, double defaultValue) = 0;
+	virtual std::string getOption(const std::string& name, std::string defaultValue) = 0;
+	virtual int clientId() const = 0;
+	virtual int clientCount() const = 0;
+	virtual int64_t sharedRandomNumber() const = 0;
+};
+
+struct FDBPromise {
+	virtual void send(void*) = 0;
+};
+
+template <class T>
+class GenericPromise {
+	std::shared_ptr<FDBPromise> impl;
+
+public:
+	template <class Ptr>
+	explicit GenericPromise(Ptr&& impl) : impl(std::forward<Ptr>(impl)) {}
+	void send(T val) { impl->send(&val); }
+};
+
+struct FDBPerfMetric {
+	std::string name;
+	double value;
+	bool averaged;
+	std::string format_code = "0.3g";
+};
+
+class DLLEXPORT FDBWorkload {
+public:
+	virtual std::string description() const = 0;
+	virtual bool init(FDBWorkloadContext* context) = 0;
+	virtual void setup(FDBDatabase* db, GenericPromise<bool> done) = 0;
+	virtual void start(FDBDatabase* db, GenericPromise<bool> done) = 0;
+	virtual void check(FDBDatabase* db, GenericPromise<bool> done) = 0;
+	virtual void getMetrics(std::vector<FDBPerfMetric>& out) const = 0;
+	virtual double getCheckTimeout() { return 3000; }
+};
+
+class DLLEXPORT FDBWorkloadFactory {
+public:
+	virtual std::shared_ptr<FDBWorkload> create(const std::string& name) = 0;
+};
+
+#endif
--- a/bindings/c/test/mako/mako.c
+++ b/bindings/c/test/mako/mako.c
@ -580,7 +580,7 @@ int run_transaction(FDBTransaction *transaction, mako_args_t *args,
            sprintf(keystr + KEYPREFIXLEN + randstrlen, "%0.*d",
                    digits(args->txnspec.ops[i][OP_RANGE]), rangei);
            if (rangei == 0) {
-              strncpy(keystr2, keystr, strlen(keystr));
+              strcpy(keystr2, keystr);
              keystr2[strlen(keystr)] = '\0';
            }
            rc = run_op_insert(transaction, keystr, valstr);
--- a/bindings/c/test/workloads/SimpleWorkload.cpp
+++ b/bindings/c/test/workloads/SimpleWorkload.cpp
@ -0,0 +1,372 @@
+/*
+ * workloads.h
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2019 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define FDB_API_VERSION 610
+#include "foundationdb/fdb_c.h"
+#undef DLLEXPORT
+#include "workloads.h"
+
+#include <unordered_map>
+#include <functional>
+#include <random>
+#include <iostream>
+
+namespace {
+
+struct SimpleWorkload : FDBWorkload {
+	static const std::string name;
+	static const std::string KEY_PREFIX;
+	std::mt19937 random;
+	bool success = true;
+	FDBWorkloadContext* context = nullptr;
+	unsigned long numTuples, numActors, insertsPerTx, opsPerTx;
+	double runFor;
+
+	// stats
+	std::vector<double> gets, txs, retries;
+
+	template <class Actor>
+	struct ActorBase {
+		using Callback = std::function<void(Actor*)>;
+		Callback done;
+		SimpleWorkload& self;
+		FDBDatabase* db;
+		fdb_error_t error = 0;
+		FDBFuture* currentFuture = nullptr;
+		int numWaiters = 0;
+
+		ActorBase(const Callback& done, SimpleWorkload& self, FDBDatabase* db) : done(done), self(self), db(db) {}
+
+		Actor* super() { return static_cast<Actor*>(this); }
+
+		const Actor* super() const { return static_cast<const Actor*>(this); }
+
+		template <class State>
+		void wait(FDBFuture* future, State state) {
+			if (++numWaiters != 1) {
+				std::cerr << "More than one wait in one actor" << std::endl;
+				std::terminate();
+			}
+			super()->state = state;
+			currentFuture = future;
+			if (fdb_future_is_ready(future)) {
+				callback(future, this);
+			} else {
+				auto err = fdb_future_set_callback(future, &ActorBase<Actor>::callback, this);
+				if (err) {
+					auto self = static_cast<Actor*>(this);
+					self->callbacks[self->state].onError(err);
+					fdb_future_destroy(future);
+				}
+			}
+		}
+
+		static void callback(FDBFuture* future, void* data) {
+			auto self = reinterpret_cast<Actor*>(data);
+			--self->numWaiters;
+			auto err = fdb_future_get_error(future);
+			if (err) {
+				self->callbacks[self->state].onError(fdb_future_get_error(future));
+			} else {
+				self->callbacks[self->state].onSuccess(future);
+			}
+			fdb_future_destroy(future);
+		}
+	};
+
+	struct ActorCallback {
+		std::function<void(FDBFuture*)> onSuccess;
+		std::function<void(fdb_error_t)> onError;
+	};
+
+	struct PopulateActor : ActorBase<PopulateActor> {
+		enum class State { Commit, Retry };
+		State state;
+		FDBTransaction* tx = nullptr;
+
+		unsigned long from, to, lastTx = 0;
+		std::unordered_map<State, ActorCallback> callbacks;
+
+		PopulateActor(const Callback& promise, SimpleWorkload& self, FDBDatabase* db, unsigned long from,
+		              unsigned long to)
+		  : ActorBase(promise, self, db), from(from), to(to) {
+			error = fdb_database_create_transaction(db, &tx);
+			if (error) {
+				done(this);
+			}
+			setCallbacks();
+		}
+
+		~PopulateActor() {
+			if (tx) {
+				fdb_transaction_destroy(tx);
+			}
+		}
+
+		void run() {
+			if (error || from >= to) {
+				done(this);
+				return;
+			}
+			lastTx = 0;
+			unsigned ops = 0;
+			for (; from < to && ops < self.insertsPerTx; ++ops, ++from) {
+				std::string value = std::to_string(from);
+				std::string key = KEY_PREFIX + value;
+				fdb_transaction_set(tx, reinterpret_cast<const uint8_t*>(key.c_str()), key.size(),
+				                    reinterpret_cast<const uint8_t*>(value.c_str()), value.size());
+			}
+			lastTx = ops;
+			auto commit_future = fdb_transaction_commit(tx);
+			wait(commit_future, State::Commit);
+		}
+
+		void setCallbacks() {
+			callbacks[State::Commit] = {
+				[this](FDBFuture* future) {
+				    fdb_transaction_reset(tx);
+				    self.context->trace(FDBSeverity::Debug, "TXComplete", { { "NumInserts", std::to_string(lastTx) } });
+				    lastTx = 0;
+				    run();
+				},
+				[this](fdb_error_t error) { wait(fdb_transaction_on_error(tx, error), State::Retry); }
+			};
+			callbacks[State::Retry] = { [this](FDBFuture* future) {
+				                           from -= lastTx;
+				                           fdb_transaction_reset(tx);
+				                           run();
+				                       },
+				                        [this](fdb_error_t error) {
+				                            self.context->trace(FDBSeverity::Error, "AssertionFailure",
+				                                                { { "Reason", "tx.onError failed" },
+				                                                  { "Error", std::string(fdb_get_error(error)) } });
+				                            self.success = false;
+				                            done(this);
+				                        } };
+		}
+	};
+
+	struct ClientActor : ActorBase<ClientActor> {
+		enum class State { Get, Commit, Retry };
+		State state;
+		std::unordered_map<State, ActorCallback> callbacks;
+		unsigned long ops = 0;
+		std::uniform_int_distribution<decltype(SimpleWorkload::numTuples)> random;
+		FDBTransaction* tx = nullptr;
+
+		unsigned numCommits = 0;
+		unsigned numRetries = 0;
+		unsigned numGets = 0;
+		double startTime;
+
+		ClientActor(const Callback& promise, SimpleWorkload& self, FDBDatabase* db)
+		  : ActorBase(promise, self, db), random(0, self.numTuples - 1), startTime(self.context->now()) {
+			error = fdb_database_create_transaction(db, &tx);
+			if (error) {
+				done(this);
+			}
+			setCallbacks();
+		}
+
+		~ClientActor() {
+			if (tx) {
+				fdb_transaction_destroy(tx);
+			}
+		}
+
+		void run() { get(); }
+
+		void get() {
+			if (self.context->now() > startTime + self.runFor) {
+				done(this);
+				return;
+			}
+			auto key = KEY_PREFIX + std::to_string(random(self.random));
+			auto f = fdb_transaction_get(tx, reinterpret_cast<const uint8_t*>(key.c_str()), key.size(), false);
+			wait(f, State::Get);
+		}
+
+		void commit() {
+			if (self.context->now() > startTime + self.runFor) {
+				done(this);
+				return;
+			}
+			wait(fdb_transaction_commit(tx), State::Commit);
+		}
+
+		void setCallbacks() {
+			callbacks[State::Get] = { [this](FDBFuture* future) {
+				                         ++numGets;
+				                         if (++ops >= self.opsPerTx) {
+					                         commit();
+				                         } else {
+					                         get();
+				                         }
+				                     },
+				                      [this](fdb_error_t error) {
+				                          wait(fdb_transaction_on_error(tx, error), State::Retry);
+				                      } };
+			callbacks[State::Retry] = { [this](FDBFuture* future) {
+				                           ops = 0;
+				                           fdb_transaction_reset(tx);
+				                           ++numRetries;
+				                           get();
+				                       },
+				                        [this](fdb_error_t) {
+				                            self.context->trace(FDBSeverity::Error, "AssertionFailure",
+				                                                { { "Reason", "tx.onError failed" },
+				                                                  { "Error", std::string(fdb_get_error(error)) } });
+				                            self.success = false;
+				                            done(this);
+				                        } };
+			callbacks[State::Commit] = { [this](FDBFuture* future) {
+				                            ++numCommits;
+				                            ops = 0;
+				                            fdb_transaction_reset(tx);
+				                            get();
+				                        },
+				                         [this](fdb_error_t) {
+				                             wait(fdb_transaction_on_error(tx, error), State::Retry);
+				                         } };
+		}
+	};
+
+	std::string description() const override { return name; }
+	bool init(FDBWorkloadContext* context) override {
+		this->context = context;
+		context->trace(FDBSeverity::Info, "SimpleWorkloadInit", {});
+		random = decltype(random)(context->rnd());
+		numTuples = context->getOption("numTuples", 100000ul);
+		numActors = context->getOption("numActors", 100ul);
+		insertsPerTx = context->getOption("insertsPerTx", 100ul);
+		opsPerTx = context->getOption("opsPerTx", 100ul);
+		runFor = context->getOption("runFor", 10.0);
+		auto err = fdb_select_api_version(610);
+		if (err) {
+			context->trace(FDBSeverity::Info, "SelectAPIVersionFailed",
+			               { { "Error", std::string(fdb_get_error(err)) } });
+		}
+		return true;
+	}
+	void setup(FDBDatabase* db, GenericPromise<bool> done) override {
+		if (this->context->clientId() == 0) {
+			done.send(true);
+			return;
+		}
+		struct Populator {
+			std::vector<PopulateActor*> actors;
+			GenericPromise<bool> promise;
+			bool success = true;
+
+			void operator()(PopulateActor* done) {
+				if (done->error) {
+					success = false;
+				}
+				for (int i = 0; i < actors.size(); ++i) {
+					if (actors[i] == done) {
+						actors[i] = actors.back();
+						delete done;
+						actors.pop_back();
+					}
+				}
+				if (actors.empty()) {
+					promise.send(success);
+					delete this;
+				}
+			}
+		};
+		decltype(numTuples) from = 0;
+		auto p = new Populator{ {}, std::move(done) };
+		for (decltype(numActors) i = 0; i < numActors; ++i) {
+			decltype(from) to = from + (numTuples / numActors);
+			if (i == numActors - 1) {
+				to = numTuples;
+			}
+			auto actor = new PopulateActor([p](PopulateActor* self) { (*p)(self); }, *this, db, from, to);
+			p->actors.emplace_back(actor);
+			from = to;
+		}
+		for (auto actor : p->actors) {
+			actor->run();
+		}
+	}
+	void start(FDBDatabase* db, GenericPromise<bool> done) override {
+		if (!success) {
+			done.send(false);
+		}
+		struct ClientRunner {
+			std::vector<ClientActor*> actors;
+			GenericPromise<bool> done;
+			SimpleWorkload* self;
+
+			void operator()(ClientActor* actor) {
+				double now = self->context->now();
+				for (int i = 0; i < actors.size(); ++i) {
+					if (actors[i] == actor) {
+						actors[i] = actors.back();
+						actors.pop_back();
+					}
+				}
+				double s = now - actor->startTime;
+				if (s > 0.01) {
+					self->gets.emplace_back(double(actor->numGets) / s);
+					self->txs.emplace_back(double(actor->numCommits) / s);
+					self->retries.emplace_back(double(actor->numRetries) / s);
+				}
+				delete actor;
+				if (actors.empty()) {
+					done.send(self->success);
+					delete this;
+				}
+			}
+		};
+		auto runner = new ClientRunner{ {}, std::move(done), this };
+		for (decltype(numActors) i = 0; i < numActors; ++i) {
+			auto actor = new ClientActor([runner](ClientActor* self) { (*runner)(self); }, *this, db);
+			runner->actors.push_back(actor);
+		}
+		for (auto actor : runner->actors) {
+			actor->run();
+		}
+	}
+	void check(FDBDatabase* db, GenericPromise<bool> done) override { done.send(success); }
+
+	template <class Vec>
+	double accumulateMetric(const Vec& v) const {
+		double res = 0.0;
+		for (auto val : v) {
+			res += val;
+		}
+		return res / double(v.size());
+	}
+
+	void getMetrics(std::vector<FDBPerfMetric>& out) const override {
+		out.emplace_back(FDBPerfMetric{ "Get/s", accumulateMetric(gets), true });
+		out.emplace_back(FDBPerfMetric{ "Tx/s", accumulateMetric(txs), true });
+		out.emplace_back(FDBPerfMetric{ "Retries/s", accumulateMetric(retries), true });
+	}
+};
+
+const std::string SimpleWorkload::name = "SimpleWorkload";
+const std::string SimpleWorkload::KEY_PREFIX = "csimple/";
+
+} // namespace
+
+FDBWorkloadFactoryT<SimpleWorkload> simpleWorkload(SimpleWorkload::name);
--- a/bindings/c/test/workloads/workloads.cpp
+++ b/bindings/c/test/workloads/workloads.cpp
@ -0,0 +1,41 @@
+/*
+ * workloads.h
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2019 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "workloads.h"
+
+FDBWorkloadFactoryImpl::~FDBWorkloadFactoryImpl() {}
+
+std::map<std::string, IFDBWorkloadFactory*>& FDBWorkloadFactoryImpl::factories() {
+    static std::map<std::string, IFDBWorkloadFactory*> _factories;
+    return _factories;
+}
+
+std::shared_ptr<FDBWorkload> FDBWorkloadFactoryImpl::create(const std::string &name) {
+    auto res = factories().find(name);
+    if (res == factories().end()) {
+        return nullptr;
+    }
+    return res->second->create();
+}
+
+FDBWorkloadFactory* workloadFactory(FDBLogger*) {
+    static FDBWorkloadFactoryImpl impl;
+    return &impl;
+}
--- a/bindings/c/test/workloads/workloads.h
+++ b/bindings/c/test/workloads/workloads.h
@ -0,0 +1,47 @@
+/*
+ * workloads.h
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2019 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include "foundationdb/ClientWorkload.h"
+
+#include <map>
+
+struct IFDBWorkloadFactory {
+	virtual std::shared_ptr<FDBWorkload> create() = 0;
+};
+
+struct FDBWorkloadFactoryImpl : FDBWorkloadFactory {
+	~FDBWorkloadFactoryImpl();
+	static std::map<std::string, IFDBWorkloadFactory*>& factories();
+	std::shared_ptr<FDBWorkload> create(const std::string& name) override;
+};
+
+template<class WorkloadType>
+struct FDBWorkloadFactoryT : IFDBWorkloadFactory {
+	explicit FDBWorkloadFactoryT(const std::string& name) {
+		FDBWorkloadFactoryImpl::factories()[name] = this;
+	}
+
+	std::shared_ptr<FDBWorkload> create() override {
+		return std::make_shared<WorkloadType>();
+	}
+};
+
+extern "C" DLLEXPORT FDBWorkloadFactory* workloadFactory(FDBLogger*);
--- a/bindings/flow/fdb_flow.actor.cpp
+++ b/bindings/flow/fdb_flow.actor.cpp
@ -85,7 +85,7 @@ void fdb_flow_test() {

 	openTraceFile(NetworkAddress(), 1000000, 1000000, ".");
 	systemMonitor();
-	uncancellable(recurring(&systemMonitor, 5.0, TaskFlushTrace));
+	uncancellable(recurring(&systemMonitor, 5.0, TaskPriority::FlushTrace));

 	Future<Void> t = _test();

@ -179,7 +179,7 @@ namespace FDB {
 	}

 	void backToFutureCallback( FDBFuture* f, void* data ) {
-		g_network->onMainThread( Promise<Void>((SAV<Void>*)data), TaskDefaultOnMainThread ); // SOMEDAY: think about this priority
+		g_network->onMainThread( Promise<Void>((SAV<Void>*)data), TaskPriority::DefaultOnMainThread ); // SOMEDAY: think about this priority
 	}

 	// backToFuture<Type>( FDBFuture*, (FDBFuture* -> Type) ) -> Future<Type>
--- a/bindings/java/CMakeLists.txt
+++ b/bindings/java/CMakeLists.txt
@ -53,6 +53,8 @@ set(JAVA_BINDING_SRCS
  src/main/com/apple/foundationdb/TransactionContext.java
  src/main/com/apple/foundationdb/testing/AbstractWorkload.java
  src/main/com/apple/foundationdb/testing/WorkloadContext.java
+  src/main/com/apple/foundationdb/testing/Promise.java
+  src/main/com/apple/foundationdb/testing/PerfMetric.java
  src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java
  src/main/com/apple/foundationdb/tuple/IterableComparator.java
  src/main/com/apple/foundationdb/tuple/package-info.java
@ -119,11 +121,11 @@ endif()

 if(OPEN_FOR_IDE)
  add_library(fdb_java OBJECT fdbJNI.cpp)
+  add_library(java_workloads OBJECT JavaWorkload.cpp)
 else()
  add_library(fdb_java SHARED fdbJNI.cpp)
+  add_library(java_workloads SHARED JavaWorkload.cpp)
 endif()
-message(DEBUG ${JNI_INCLUDE_DIRS})
-message(DEBUG ${JNI_LIBRARIES})
 target_include_directories(fdb_java PRIVATE ${JNI_INCLUDE_DIRS})
 # libfdb_java.so is loaded by fdb-java.jar and doesn't need to depened on jvm shared libraries.
 target_link_libraries(fdb_java PRIVATE fdb_c)
@ -132,6 +134,10 @@ set_target_properties(fdb_java PROPERTIES
 if(APPLE)
  set_target_properties(fdb_java PROPERTIES SUFFIX ".jnilib")
 endif()
+set_target_properties(java_workloads PROPERTIES
+  LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/share/foundationdb")
+target_link_libraries(java_workloads PUBLIC fdb_c ${JNI_LIBRARIES})
+target_include_directories(java_workloads PUBLIC ${JNI_INCLUDE_DIRS})

 set(CMAKE_JAVA_COMPILE_FLAGS "-source" "1.8" "-target" "1.8")
 set(CMAKE_JNI_TARGET TRUE)
--- a/bindings/java/JavaWorkload.cpp
+++ b/bindings/java/JavaWorkload.cpp
@ -0,0 +1,614 @@
+/*
+ * JavaWorkload.cpp
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2019 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <foundationdb/ClientWorkload.h>
+#define FDB_API_VERSION 610
+#include <foundationdb/fdb_c.h>
+
+#include <jni.h>
+#include <set>
+#include <iostream>
+#include <boost/algorithm/string.hpp>
+
+namespace {
+// to make logging more convenient
+// this should be fine as it is guarded by
+// a anon namespace
+auto debug = FDBSeverity::Debug;
+auto info = FDBSeverity::Info;
+auto error = FDBSeverity::Error;
+
+void printTrace(JNIEnv* env, jclass, jlong logger, jint severity, jstring message, jobject details) {
+	auto log = reinterpret_cast<FDBLogger*>(logger);
+	jboolean isCopy;
+	const char* msg = env->GetStringUTFChars(message, &isCopy);
+	std::vector<std::pair<std::string, std::string>> detailsMap;
+	if (details != nullptr) {
+		jclass mapClass = env->FindClass("java/util/Map");
+		jclass setClass = env->FindClass("java/util/Set");
+		jclass iteratorClass = env->FindClass("java/util/Iterator");
+		jmethodID keySetID = env->GetMethodID(mapClass, "keySet", "()Ljava/util/Set;");
+		jobject keySet = env->CallObjectMethod(details, keySetID);
+		jmethodID iteratorMethodID = env->GetMethodID(setClass, "iterator", "()Ljava/util/Iterator;");
+		jobject iterator = env->CallObjectMethod(keySet, iteratorMethodID);
+		jmethodID hasNextID = env->GetMethodID(iteratorClass, "hasNext", "()Z");
+		jmethodID nextID = env->GetMethodID(iteratorClass, "next", "()Ljava/lang/Object;");
+		jmethodID getID = env->GetMethodID(mapClass, "get", "(Ljava/lang/Object;)Ljava/lang/Object;");
+		while (env->CallBooleanMethod(iterator, hasNextID)) {
+			jobject next = env->CallObjectMethod(iterator, nextID);
+			jstring key = jstring(next);
+			jstring value = jstring(env->CallObjectMethod(details, getID, next));
+			auto keyStr = env->GetStringUTFChars(key, nullptr);
+			auto keyLen = env->GetStringUTFLength(key);
+			auto valueStr = env->GetStringUTFChars(value, nullptr);
+			auto valueLen = env->GetStringUTFLength(value);
+			detailsMap.emplace_back(std::string(keyStr, keyLen), std::string(valueStr, valueLen));
+			env->ReleaseStringUTFChars(key, keyStr);
+			env->ReleaseStringUTFChars(value, valueStr);
+			env->DeleteLocalRef(key);
+			env->DeleteLocalRef(value);
+		}
+	}
+	FDBSeverity sev;
+	if (severity < 10) {
+		sev = debug;
+	} else if (severity < 20) {
+		sev = FDBSeverity::Info;
+	} else if (severity < 30) {
+		sev = FDBSeverity::Warn;
+	} else if (severity < 40) {
+		sev = FDBSeverity::WarnAlways;
+	}
+	log->trace(sev, msg, detailsMap);
+	if (isCopy) {
+		env->ReleaseStringUTFChars(message, msg);
+	}
+}
+
+jlong getProcessID(JNIEnv* env, jclass, jlong self) {
+	FDBWorkloadContext* context = reinterpret_cast<FDBWorkloadContext*>(self);
+	return jlong(context->getProcessID());
+}
+
+void setProcessID(JNIEnv* env, jclass, jlong self, jlong processID) {
+	FDBWorkloadContext* context = reinterpret_cast<FDBWorkloadContext*>(self);
+	context->setProcessID(processID);
+}
+
+jboolean getOptionBool(JNIEnv* env, jclass, jlong self, jstring name, jboolean defaultValue) {
+	FDBWorkloadContext* context = reinterpret_cast<FDBWorkloadContext*>(self);
+	jboolean isCopy = true;
+	const char* utf = env->GetStringUTFChars(name, &isCopy);
+	auto res = jboolean(context->getOption(utf, bool(defaultValue)));
+	if (isCopy) {
+		env->ReleaseStringUTFChars(name, utf);
+	}
+	return res;
+}
+
+jlong getOptionLong(JNIEnv* env, jclass, jlong self, jstring name, jlong defaultValue) {
+	FDBWorkloadContext* context = reinterpret_cast<FDBWorkloadContext*>(self);
+	jboolean isCopy = true;
+	const char* utf = env->GetStringUTFChars(name, &isCopy);
+	auto res = jlong(context->getOption(utf, long(defaultValue)));
+	if (isCopy) {
+		env->ReleaseStringUTFChars(name, utf);
+	}
+	return res;
+}
+
+jdouble getOptionDouble(JNIEnv* env, jclass, jlong self, jstring name, jdouble defaultValue) {
+	FDBWorkloadContext* context = reinterpret_cast<FDBWorkloadContext*>(self);
+	jboolean isCopy = true;
+	const char* utf = env->GetStringUTFChars(name, &isCopy);
+	auto res = jdouble(context->getOption(utf, double(defaultValue)));
+	if (isCopy) {
+		env->ReleaseStringUTFChars(name, utf);
+	}
+	return res;
+}
+
+jstring getOptionString(JNIEnv* env, jclass, jlong self, jstring name, jstring defaultValue) {
+	FDBWorkloadContext* context = reinterpret_cast<FDBWorkloadContext*>(self);
+	jboolean isCopy;
+	jboolean defIsCopy;
+	const char* nameStr = env->GetStringUTFChars(name, &isCopy);
+	const char* defStr = env->GetStringUTFChars(defaultValue, &defIsCopy);
+	auto res = context->getOption(nameStr, std::string(defStr));
+	if (isCopy) {
+		env->ReleaseStringUTFChars(name, nameStr);
+	}
+	if (defIsCopy) {
+		env->ReleaseStringUTFChars(defaultValue, defStr);
+	}
+	return env->NewStringUTF(res.c_str());
+}
+
+jint getClientID(JNIEnv* env, jclass, jlong self) {
+	FDBWorkloadContext* context = reinterpret_cast<FDBWorkloadContext*>(self);
+	return jint(context->clientId());
+}
+
+jint getClientCount(JNIEnv* env, jclass, jlong self) {
+	FDBWorkloadContext* context = reinterpret_cast<FDBWorkloadContext*>(self);
+	return jint(context->clientCount());
+}
+
+jlong getSharedRandomNumber(JNIEnv* env, jclass, jlong self) {
+	FDBWorkloadContext* context = reinterpret_cast<FDBWorkloadContext*>(self);
+	return jlong(context->sharedRandomNumber());
+}
+
+struct JavaPromise {
+	GenericPromise<bool> impl;
+	JavaPromise(GenericPromise<bool>&& promise) : impl(std::move(promise)) {}
+
+	void send(bool val) {
+		impl.send(val);
+		delete this;
+	}
+};
+
+void promiseSend(JNIEnv, jclass, jlong self, jboolean value) {
+	auto p = reinterpret_cast<JavaPromise*>(self);
+	p->send(bool(value));
+}
+
+struct JNIError {
+	JNIEnv* env;
+	jthrowable throwable = nullptr;
+	const char* file;
+	int line;
+
+	std::string location() const {
+		if (file == nullptr) {
+			return "UNKNOWN";
+		} else {
+			return file + std::string(":") + std::to_string(line);
+		}
+	}
+
+	std::string toString() {
+		if (!throwable) {
+			return "JNIError";
+		} else {
+			jboolean isCopy = false;
+			jmethodID toStringM =
+			    env->GetMethodID(env->FindClass("java/lang/Object"), "toString", "()Ljava/lang/String;");
+			jstring s = (jstring)env->CallObjectMethod(throwable, toStringM);
+			const char* utf = env->GetStringUTFChars(s, &isCopy);
+			std::string res(utf);
+			env->ReleaseStringUTFChars(s, utf);
+			return res;
+		}
+	}
+};
+
+struct JVM {
+	FDBLogger* log;
+	JavaVM* jvm;
+	JNIEnv* env;
+	std::set<std::string> classPath;
+	bool healthy = false;
+	jclass throwableClass;
+	jclass abstractWorkloadClass = nullptr;
+	//  this is a bit ugly - but JNINativeMethod requires
+	// char*  not const char *
+	std::vector<char*> charArrays;
+
+	void checkExceptionImpl(const char* file, int line) {
+		if (env->ExceptionCheck()) {
+			throw JNIError{ env, env->ExceptionOccurred(), file, line };
+		}
+	}
+
+#define checkException() checkExceptionImpl(__FILE__, __LINE__)
+
+	void success(int res) {
+		bool didThrow = env->ExceptionCheck();
+		if (res == JNI_ERR || didThrow) {
+			throw JNIError{ env, didThrow ? env->ExceptionOccurred() : nullptr };
+		}
+	}
+
+	JVM(FDBLogger* log) : log(log) {
+		try {
+			log->trace(FDBSeverity::Debug, "InitializeJVM", {});
+			JavaVMInitArgs args;
+			args.version = JNI_VERSION_1_6;
+			args.ignoreUnrecognized = JNI_TRUE;
+			args.nOptions = 0;
+			success(JNI_CreateJavaVM(&jvm, reinterpret_cast<void**>(&env), &args));
+			log->trace(debug, "JVMCreated", {});
+			throwableClass = env->FindClass("java/lang/Throwable");
+		} catch (JNIError& e) {
+			healthy = false;
+			env->ExceptionClear();
+		}
+	}
+
+	~JVM() {
+		log->trace(debug, "JVMDestruct", {});
+		if (jvm) {
+			jvm->DestroyJavaVM();
+		}
+		for (auto& a : charArrays) {
+			delete[] a;
+		}
+		log->trace(debug, "JVMDestructDone", {});
+	}
+
+	void setNativeMethods(jclass clazz,
+	                      const std::initializer_list<std::tuple<std::string_view, std::string_view, void*>>& methods) {
+		charArrays.reserve(charArrays.size() + 2 * methods.size());
+		std::unique_ptr<JNINativeMethod[]> nativeMethods;
+		int numNativeMethods = methods.size();
+		nativeMethods.reset(new JNINativeMethod[numNativeMethods]);
+		int i = 0;
+		for (const auto& t : methods) {
+			auto& w = nativeMethods[i];
+			auto nameStr = std::get<0>(t);
+			auto sigStr = std::get<1>(t);
+			charArrays.push_back(new char[nameStr.size() + 1]);
+			char* name = charArrays.back();
+			charArrays.push_back(new char[sigStr.size() + 1]);
+			char* sig = charArrays.back();
+			memcpy(name, nameStr.data(), nameStr.size());
+			memcpy(sig, sigStr.data(), sigStr.size());
+			name[nameStr.size()] = '\0';
+			sig[sigStr.size()] = '\0';
+			w.name = name;
+			w.signature = sig;
+			w.fnPtr = std::get<2>(t);
+			log->trace(info, "PreparedNativeMethod",
+			           { { "Name", w.name },
+			             { "Signature", w.signature },
+			             { "Ptr", std::to_string(reinterpret_cast<uintptr_t>(w.fnPtr)) } });
+			++i;
+		}
+		env->RegisterNatives(clazz, nativeMethods.get(), numNativeMethods);
+		checkException();
+	}
+
+	jclass getClassImpl(const char* file, int line, const char* name) {
+		auto res = env->FindClass(name);
+		checkExceptionImpl(file, line);
+		return res;
+	}
+
+#define getClass(name) getClassImpl(__FILE__, __LINE__, name)
+
+	jmethodID getMethodImpl(const char* file, int line, jclass clazz, const char* name, const char* signature) {
+		auto res = env->GetMethodID(clazz, name, signature);
+		checkExceptionImpl(file, line);
+		return res;
+	}
+
+#define getMethod(clazz, name, signature) getMethodImpl(__FILE__, __LINE__, clazz, name, signature)
+
+	jfieldID getFieldImpl(const char* file, int line, jclass clazz, const char* name, const char* signature) {
+		auto res = env->GetFieldID(clazz, name, signature);
+		checkException();
+		return res;
+	}
+
+#define getField(clazz, name, signature) getFieldImpl(__FILE__, __LINE__, clazz, name, signature)
+
+	void addToClassPath(const std::string& path) {
+		log->trace(info, "TryAddToClassPath", { { "Path", path } });
+		if (!env) {
+			throw JNIError{};
+		}
+		if (classPath.count(path) > 0) {
+			// already added
+			return;
+		}
+		auto p = env->NewStringUTF(path.c_str());
+		checkException();
+		auto fileClass = getClass("java/io/File");
+		auto file = env->NewObject(fileClass, getMethod(fileClass, "<init>", "(Ljava/lang/String;)V"), p);
+		checkException();
+		auto uri = env->CallObjectMethod(file, env->GetMethodID(fileClass, "toURI", "()Ljava/net/URI;"));
+		checkException();
+		auto uriClass = getClass("java/net/URI");
+		auto url = env->CallObjectMethod(uri, getMethod(uriClass, "toURL", "()Ljava/net/URL;"));
+		checkException();
+		auto classLoaderClass = getClass("java/lang/ClassLoader");
+		auto sysLoaderMethod =
+		    env->GetStaticMethodID(classLoaderClass, "getSystemClassLoader", "()Ljava/lang/ClassLoader;");
+		checkException();
+		auto classLoader = env->CallStaticObjectMethod(classLoaderClass, sysLoaderMethod);
+		checkException();
+		auto urlLoaderClass = getClass("java/net/URLClassLoader");
+		env->CallVoidMethod(classLoader, getMethod(urlLoaderClass, "addURL", "(Ljava/net/URL;)V"), url);
+		env->DeleteLocalRef(classLoader);
+		checkException();
+	}
+
+	void init() {
+		if (abstractWorkloadClass != nullptr) {
+			return;
+		}
+		abstractWorkloadClass = getClass("com/apple/foundationdb/testing/AbstractWorkload");
+		setNativeMethods(abstractWorkloadClass,
+		                 { { "log", "(JILjava/lang/String;Ljava/util/Map;)V", reinterpret_cast<void*>(&printTrace) } });
+		auto loggerField = env->GetStaticFieldID(abstractWorkloadClass, "logger", "J");
+		checkException();
+		env->SetStaticLongField(abstractWorkloadClass, loggerField, reinterpret_cast<jlong>(log));
+		log->trace(info, "SetLogger", { { "Logger", std::to_string(reinterpret_cast<jlong>(log)) } });
+		setNativeMethods(getClass("com/apple/foundationdb/testing/WorkloadContext"),
+		                 { { "getProcessID", "(J)J", reinterpret_cast<void*>(&getProcessID) },
+		                   { "setProcessID", "(JJ)V", reinterpret_cast<void*>(&setProcessID) },
+		                   { "getOption", "(JLjava/lang/String;Z)Z", reinterpret_cast<void*>(&getOptionBool) },
+		                   { "getOption", "(JLjava/lang/String;J)J", reinterpret_cast<void*>(&getOptionLong) },
+		                   { "getOption", "(JLjava/lang/String;D)D", reinterpret_cast<void*>(&getOptionDouble) },
+		                   { "getOption", "(JLjava/lang/String;Ljava/lang/String;)Ljava/lang/String;",
+		                     reinterpret_cast<void*>(&getOptionString) },
+		                   { "getClientID", "(J)I", reinterpret_cast<void*>(&getClientID) },
+		                   { "getClientCount", "(J)I", reinterpret_cast<void*>(&getClientCount) },
+		                   { "getSharedRandomNumber", "(J)J", reinterpret_cast<void*>(&getSharedRandomNumber) } });
+		setNativeMethods(getClass("com/apple/foundationdb/testing/Promise"),
+		                 { { "send", "(JZ)V", reinterpret_cast<void*>(&promiseSend) } });
+		auto fdbClass = getClass("com/apple/foundationdb/FDB");
+		jmethodID selectMethod =
+		    env->GetStaticMethodID(fdbClass, "selectAPIVersion", "(IZ)Lcom/apple/foundationdb/FDB;");
+		checkException();
+		env->CallStaticObjectMethod(fdbClass, selectMethod, jint(610), jboolean(false));
+		checkException();
+	}
+
+	jobject createWorkloadContext(FDBWorkloadContext* context) {
+		auto clazz = getClass("com/apple/foundationdb/testing/WorkloadContext");
+		auto constructor = getMethod(clazz, "<init>", "(J)V");
+		auto jContext = reinterpret_cast<jlong>(context);
+		jobject res = env->NewObject(clazz, constructor, jContext);
+		std::cout.flush();
+		checkException();
+		auto field = env->GetFieldID(clazz, "impl", "J");
+		checkException();
+		auto impl = env->GetLongField(res, field);
+		checkException();
+		if (impl != jContext) {
+			log->trace(error, "ContextNotCorrect",
+			           { { "Expected", std::to_string(jContext) }, { "Impl", std::to_string(impl) } });
+			std::terminate();
+		}
+		return res;
+	}
+
+	jobject createWorkload(jobject context, const std::string& workloadName) {
+		auto clazz = getClass(workloadName.c_str());
+		if (!env->IsAssignableFrom(clazz, abstractWorkloadClass)) {
+			log->trace(error, "ClassNotAWorkload", { { "Class", workloadName } });
+			return nullptr;
+		}
+		auto constructor = getMethod(clazz, "<init>", "(Lcom/apple/foundationdb/testing/WorkloadContext;)V");
+		auto res = env->NewObject(clazz, constructor, context);
+		checkException();
+		env->NewGlobalRef(res);
+		return res;
+	}
+
+	jobject createPromise(GenericPromise<bool>&& promise) {
+		auto p = std::make_unique<JavaPromise>(std::move(promise));
+		auto clazz = getClass("com/apple/foundationdb/testing/Promise");
+		auto res = env->NewObject(clazz, getMethod(clazz, "<init>", "(J)V"), reinterpret_cast<jlong>(p.get()));
+		checkException();
+		p.release();
+		return res;
+	}
+
+	void shutdownWorkload(jobject workload, const std::string& workloadName) {
+		auto clazz = getClass(workloadName.c_str());
+		env->CallVoidMethod(workload, getMethod(clazz, "shutdown", "()V"));
+		checkException();
+	}
+
+	std::string jtoStr(jstring str) {
+		jboolean isCopy;
+		auto arr = env->GetStringUTFChars(str, &isCopy);
+		std::string res(arr);
+		if (isCopy) {
+			env->ReleaseStringUTFChars(str, arr);
+		}
+		return res;
+	}
+
+	void getMetrics(jobject workload, const std::string& workloadName, std::vector<FDBPerfMetric>& result) {
+		auto clazz = getClass(workloadName.c_str());
+		auto perfMetricClass = getClass("Lcom/apple/foundationdb/testing/PerfMetric;");
+		auto nameId = getField(perfMetricClass, "name", "Ljava/lang/String;");
+		auto valueId = getField(perfMetricClass, "value", "D");
+		auto averagedId = getField(perfMetricClass, "averaged", "Z");
+		auto formatCodeId = getField(perfMetricClass, "formatCode", "Ljava/lang/String;");
+		auto v = env->CallObjectMethod(workload, getMethod(clazz, "getMetrics", "()Ljava/util/List;"));
+		checkException();
+		auto listClass = getClass("java/util/List");
+		auto iter = env->CallObjectMethod(v, getMethod(listClass, "iterator", "()Ljava/util/Iterator;"));
+		checkException();
+		auto iterClass = getClass("java/util/Iterator");
+		auto hasNextM = getMethod(iterClass, "hasNext", "()Z");
+		auto nextM = getMethod(iterClass, "next", "()Ljava/lang/Object;");
+		jboolean hasNext = env->CallBooleanMethod(iter, hasNextM);
+		checkException();
+		while (hasNext) {
+			auto perfMetric = env->CallObjectMethod(iter, nextM);
+			checkException();
+			auto name = jtoStr(jstring(env->GetObjectField(perfMetric, nameId)));
+			checkException();
+			auto value = env->GetDoubleField(perfMetric, valueId);
+			checkException();
+			auto averaged = env->GetBooleanField(perfMetric, averagedId);
+			checkException();
+			auto formatCode = jtoStr(jstring(env->GetObjectField(perfMetric, formatCodeId)));
+			result.emplace_back(FDBPerfMetric{ name, value, bool(averaged), formatCode });
+			hasNext = env->CallBooleanMethod(iter, hasNextM);
+			checkException();
+		}
+		return;
+	}
+
+	jobject createDatabase(jobject workload, FDBDatabase* db) {
+		auto executor =
+		    env->CallObjectMethod(workload, getMethod(getClass("com/apple/foundationdb/testing/AbstractWorkload"),
+		                                              "getExecutor", "()Ljava/util/concurrent/Executor;"));
+		auto databaseClass = getClass("com/apple/foundationdb/FDBDatabase");
+		jlong databasePtr = reinterpret_cast<jlong>(db);
+		jobject javaDatabase =
+		    env->NewObject(databaseClass, getMethod(databaseClass, "<init>", "(JLjava/util/concurrent/Executor;)V"),
+		                   databasePtr, executor);
+		env->DeleteLocalRef(executor);
+		return javaDatabase;
+	}
+
+	void callWorkload(jobject workload, FDBDatabase* db, const char* method, GenericPromise<bool>&& promise) {
+		jobject jPromise = nullptr;
+		try {
+			auto clazz = getClass("com/apple/foundationdb/testing/AbstractWorkload");
+			auto jdb = createDatabase(workload, db);
+			jPromise = createPromise(std::move(promise));
+			env->CallVoidMethod(
+			    workload,
+			    getMethod(clazz, method,
+			              "(Lcom/apple/foundationdb/Database;Lcom/apple/foundationdb/testing/Promise;)V"),
+			    jdb, jPromise);
+			env->DeleteLocalRef(jdb);
+			env->DeleteLocalRef(jPromise);
+			jPromise = nullptr;
+			checkException();
+		} catch (...) {
+			if (jPromise) {
+				env->DeleteLocalRef(jPromise);
+			}
+			throw;
+		}
+	}
+};
+
+struct JavaWorkload : FDBWorkload {
+	std::shared_ptr<JVM> jvm;
+	FDBLogger& log;
+	FDBWorkloadContext* context = nullptr;
+	std::string name;
+	bool failed = false;
+	jobject workload = nullptr;
+	JavaWorkload(const std::shared_ptr<JVM>& jvm, FDBLogger& log, const std::string& name)
+		: jvm(jvm), log(log), name(name) {
+		boost::replace_all(this->name, ".", "/");
+	}
+	~JavaWorkload() {
+		if (workload) {
+			try {
+				jvm->shutdownWorkload(workload, name);
+				jvm->env->DeleteGlobalRef(workload);
+			} catch (JNIError& e) {
+				log.trace(error, "JNIShutDownUnsucessful", { { "Error", e.toString() }, { "Location", e.location() } });
+			}
+		}
+	}
+
+	std::string description() const override { return name; }
+	bool init(FDBWorkloadContext* context) override {
+		this->context = context;
+		try {
+			std::string classPath = context->getOption("classPath", std::string(""));
+			std::vector<std::string> paths;
+			boost::split(paths, classPath, boost::is_any_of(";,"), boost::token_compress_on);
+			for (const auto& path : paths) {
+				jvm->addToClassPath(path);
+			}
+			jvm->init();
+			jobject jContext = jvm->createWorkloadContext(context);
+			if (jContext == nullptr) {
+				failed = true;
+				return failed;
+			}
+			workload = jvm->createWorkload(jContext, name);
+		} catch (JNIError& e) {
+			failed = true;
+			log.trace(error, "JNIError", { { "Location", e.location() }, { "Error", e.toString() } });
+		}
+		return failed;
+	};
+	void setup(FDBDatabase* db, GenericPromise<bool> done) override {
+		if (failed) {
+			done.send(false);
+			return;
+		}
+		try {
+			jvm->callWorkload(workload, db, "setup", std::move(done));
+		} catch (JNIError& e) {
+			failed = true;
+			log.trace(error, "SetupFailedWithJNIError", { { "Error", e.toString() }, { "Location", e.location() } });
+		}
+	}
+	void start(FDBDatabase* db, GenericPromise<bool> done) override {
+		if (failed) {
+			done.send(false);
+			return;
+		}
+		try {
+			jvm->callWorkload(workload, db, "start", std::move(done));
+		} catch (JNIError& e) {
+			failed = true;
+			log.trace(error, "StartFailedWithJNIError", { { "Error", e.toString() }, { "Location", e.location() } });
+		}
+	}
+	void check(FDBDatabase* db, GenericPromise<bool> done) override {
+		if (failed) {
+			done.send(false);
+			return;
+		}
+		try {
+			jvm->callWorkload(workload, db, "check", std::move(done));
+		} catch (JNIError& e) {
+			failed = true;
+			log.trace(error, "CheckFailedWithJNIError", { { "Error", e.toString() }, { "Location", e.location() } });
+		}
+	}
+	void getMetrics(std::vector<FDBPerfMetric>& out) const override {
+		jvm->getMetrics(workload, name, out);
+	}
+};
+
+struct JavaWorkloadFactory : FDBWorkloadFactory {
+	FDBLogger* log;
+	std::weak_ptr<JVM> jvm;
+	JavaWorkloadFactory(FDBLogger* log) : log(log) {}
+	JavaWorkloadFactory(const JavaWorkloadFactory&) = delete;
+	JavaWorkloadFactory& operator=(const JavaWorkloadFactory&) = delete;
+	std::shared_ptr<FDBWorkload> create(const std::string& name) override {
+		auto jvmPtr = jvm.lock();
+		if (!jvmPtr) {
+			jvmPtr = std::make_shared<JVM>(log);
+			jvm = jvmPtr;
+		}
+		return std::make_shared<JavaWorkload>(jvmPtr, *log, name);
+	}
+};
+
+} // namespace
+
+extern "C" DLLEXPORT FDBWorkloadFactory* workloadFactory(FDBLogger* logger);
+
+FDBWorkloadFactory* workloadFactory(FDBLogger* logger) {
+	static JavaWorkloadFactory factory(logger);
+	return &factory;
+}
--- a/bindings/java/src/main/com/apple/foundationdb/testing/AbstractWorkload.java
+++ b/bindings/java/src/main/com/apple/foundationdb/testing/AbstractWorkload.java
@ -22,11 +22,8 @@ package com.apple.foundationdb.testing;

 import com.apple.foundationdb.Database;

-import java.io.File;
-import java.io.IOException;
-import java.lang.reflect.Method;
-import java.net.URL;
-import java.net.URLClassLoader;
+import java.util.List;
+import java.util.ArrayList;
 import java.util.concurrent.Executor;
 import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
@ -35,94 +32,45 @@ import java.util.concurrent.SynchronousQueue;
 import java.util.Map;

 public abstract class AbstractWorkload {
-	private static final Class<?>[] parameters = new Class<?>[]{URL.class};
 	protected WorkloadContext context;
 	private ThreadPoolExecutor executorService;

 	public AbstractWorkload(WorkloadContext context) {
 		this.context = context;
+		long contextID = context.getProcessID();
 		executorService =
-        new ThreadPoolExecutor(1, 2,
-            10, TimeUnit.SECONDS,
-			new SynchronousQueue<>()) {
-			@Override
-			protected void beforeExecute(Thread t, Runnable r) {
-				setProcessID(context.getProcessID());
-				super.beforeExecute(t, r);
-			}
-		};
+			new ThreadPoolExecutor(1, 2,
+								   10, TimeUnit.SECONDS,
+								   new SynchronousQueue<>()) {
+				@Override
+				protected void beforeExecute(Thread t, Runnable r) {
+					context.setProcessID(contextID);
+					super.beforeExecute(t, r);
+				}
+			};
 	}

-	private Executor getExecutor() {
+	protected Executor getExecutor() {
 		return executorService;
 	}

-	public abstract void setup(Database db);
-	public abstract void start(Database db);
-	public abstract boolean check(Database db);
-	public double getCheckTimeout() {
+	protected abstract void setup(Database db, Promise promise);
+	protected abstract void start(Database db, Promise promise);
+	protected abstract void check(Database db, Promise promise);
+	protected List<PerfMetric> getMetrics() {
+		return new ArrayList<PerfMetric>();
+	}
+	protected double getCheckTimeout() {
 		return 3000;
 	}

-	private void setup(Database db, long voidCallback) {
-		AbstractWorkload self = this;
-		getExecutor().execute(new Runnable(){
-			public void run() {
-				self.setup(db);
-				self.sendVoid(voidCallback);
-			}
-		});
-	}
-	private void start(Database db, long voidCallback) {
-		AbstractWorkload self = this;
-		getExecutor().execute(new Runnable(){
-			public void run() {
-				self.start(db);
-				self.sendVoid(voidCallback);
-			}
-		});
-	}
-	private void check(Database db, long boolCallback) {
-		AbstractWorkload self = this;
-		getExecutor().execute(new Runnable(){
-			public void run() {
-				boolean res = self.check(db);
-				self.sendBool(boolCallback, res);
-			}
-		});
-	}
-
 	private void shutdown() {
 		executorService.shutdown();
 	}

-	public native void log(int severity, String message, Map<String, String> details);
-	private native void setProcessID(long processID);
-	private native void sendVoid(long handle);
-	private native void sendBool(long handle, boolean value);
-
-	// Helper functions to add to the class path at Runtime - will be called
-	// from C++
-	private static void addFile(String s) throws IOException {
-		File f = new File(s);
-		addFile(f);
-	}
-
-	private static void addFile(File f) throws IOException {
-		addURL(f.toURI().toURL());
-	}
-
-	private static void addURL(URL u) throws IOException {
-		URLClassLoader sysLoader = (URLClassLoader) ClassLoader.getSystemClassLoader();
-		Class<URLClassLoader> sysClass = URLClassLoader.class;
-
-		try {
-			Method method = sysClass.getDeclaredMethod("addURL", parameters);
-			method.setAccessible(true);
-			method.invoke(sysLoader, new Object[]{u});
-		} catch (Throwable t) {
-			t.printStackTrace();
-			throw new IOException("Error, could not add URL to system classloader");
-		}
+	private static long logger;
+	public static void log(int severity, String message, Map<String, String> details) {
+		log(logger, severity, message, details);
 	}
+	private static native void log(long logger, int severity, String message, Map<String, String> details);
 }
--- a/bindings/java/src/main/com/apple/foundationdb/testing/PerfMetric.java
+++ b/bindings/java/src/main/com/apple/foundationdb/testing/PerfMetric.java
@ -0,0 +1,69 @@
+/*
+ * PerfMetric.java
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2019 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.apple.foundationdb.testing;
+
+public class PerfMetric {
+	private String name;
+	private double value;
+	private boolean averaged;
+	private String formatCode;
+
+	public PerfMetric(String name, double value) {
+		this(name, value, true, "0.3g");
+	}
+
+	public PerfMetric(String name, double value, boolean averaged) {
+		this(name, value, averaged, "0.3g");
+	}
+
+	public PerfMetric(String name, double value, boolean averaged, String formatCode) {
+		this.name = name;
+		this.value = value;
+		this.averaged = averaged;
+		this.formatCode = formatCode;
+	}
+
+	public String getName() {
+		return name;
+	}
+	public double getValue() {
+		return value;
+	}
+	public boolean isAveraged() {
+		return averaged;
+	}
+	public String getFormatCode() {
+		return formatCode;
+	}
+
+	public void setName(String name) {
+		this.name = name;
+	}
+	public void setValue(double value) {
+		this.value = value;
+	}
+	public void setAveraged(boolean averaged) {
+		this.averaged = averaged;
+	}
+	public void setFormatCode(String formatCode) {
+		this.formatCode = formatCode;
+	}
+}
--- a/bindings/java/src/main/com/apple/foundationdb/testing/Promise.java
+++ b/bindings/java/src/main/com/apple/foundationdb/testing/Promise.java
@ -0,0 +1,44 @@
+/*
+ * Promise.java
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2019 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.apple.foundationdb.testing;
+
+public class Promise {
+	private long nativePromise;
+	private boolean wasSet;
+	private static native void send(long self, boolean value);
+	private Promise(long nativePromise) {
+		this.nativePromise = nativePromise;
+		this.wasSet = false;
+	}
+
+	public boolean canBeSet() {
+		return !wasSet;
+	}
+
+	public void send(boolean value) {
+		if (wasSet) {
+			throw new IllegalStateException("Promise was already set");
+		}
+		wasSet = true;
+		send(nativePromise, value);
+	}
+
+}
--- a/bindings/java/src/main/com/apple/foundationdb/testing/WorkloadContext.java
+++ b/bindings/java/src/main/com/apple/foundationdb/testing/WorkloadContext.java
@ -23,39 +23,54 @@ package com.apple.foundationdb.testing;
 import java.util.Map;

 public class WorkloadContext {
-	private Map<String, String> options;
-	private int clientId, clientCount;
-	long sharedRandomNumber, processID;
+	long impl;

-	public WorkloadContext(Map<String, String> options, int clientId, int clientCount, long sharedRandomNumber, long processID)
+	private WorkloadContext(long impl)
 	{
-		this.options = options;
-		this.clientId = clientId;
-		this.clientCount = clientCount;
-		this.sharedRandomNumber = sharedRandomNumber;
-		this.processID = processID;
-	}
-
-	public String getOption(String name, String defaultValue) {
-		if (options.containsKey(name)) {
-			return options.get(name);
-		}
-		return defaultValue;
-	}
-
-	public int getClientId() {
-		return clientId;
-	}
-
-	public int getClientCount() {
-		return clientCount;
-	}
-
-	public long getSharedRandomNumber() {
-		return sharedRandomNumber;
+		this.impl = impl;
 	}

 	public long getProcessID() {
-		return processID;
+		return getProcessID(impl);
 	}
+
+	public void setProcessID(long processID) {
+		setProcessID(impl, processID);
+	}
+
+	public int getClientID() {
+		return getClientID(impl);
+	}
+
+	public int getClientCount() {
+		return getClientCount(impl);
+	}
+
+	public long getSharedRandomNumber() {
+		return getSharedRandomNumber(impl);
+	}
+
+	public String getOption(String name, String defaultValue) {
+		return getOption(impl, name, defaultValue);
+	}
+	public long getOption(String name, long defaultValue) {
+		return getOption(impl, name, defaultValue);
+	}
+	public boolean getOption(String name, boolean defaultValue) {
+		return getOption(impl, name, defaultValue);
+	}
+	public double getOption(String name, double defaultValue) {
+		return getOption(impl, name, defaultValue);
+	}
+
+	private static native long getProcessID(long self);
+	private static native void setProcessID(long self, long processID);
+	private static native boolean getOption(long impl, String name, boolean defaultValue);
+	private static native long getOption(long impl, String name, long defaultValue);
+	private static native double getOption(long impl, String name, double defaultValue);
+	private static native String getOption(long impl, String name, String defaultValue);
+	private static native int getClientID(long self);
+	private static native int getClientCount(long self);
+	private static native long getSharedRandomNumber(long self);
+
 }
--- a/build/Dockerfile
+++ b/build/Dockerfile
@ -10,7 +10,7 @@ RUN yum install -y yum-utils &&\
 	yum -y install devtoolset-8 java-1.8.0-openjdk-devel \
 		rh-python36-python-devel devtoolset-8-valgrind-devel \
 		mono-core rh-ruby24 golang python27 rpm-build debbuild \
-		python-pip npm dos2unix valgrind-devel ccache &&\
+		python-pip npm dos2unix valgrind-devel ccache distcc &&\
 	pip install boto3==1.1.1

 USER root
--- a/build/docker-compose.yaml
+++ b/build/docker-compose.yaml
@ -2,7 +2,7 @@ version: "3"

 services:
  common: &common
-    image: foundationdb/foundationdb-build:0.1.5
+    image: foundationdb/foundationdb-build:0.1.6

  build-setup: &build-setup
    <<: *common
@ -60,7 +60,7 @@ services:

  snapshot-cmake: &snapshot-cmake
    <<: *build-setup
-    command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -DFDB_RELEASE=0 -DVALGRIND=0 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && make -j "$${MAKEJOBS}" packages preinstall && cpack'
+    command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -DCMAKE_COLOR_MAKEFILE=0 -DFDB_RELEASE=0 -DVALGRIND=0 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && make -j "$${MAKEJOBS}" packages preinstall && cpack'

  prb-cmake:
    <<: *snapshot-cmake
@ -68,7 +68,7 @@ services:

  snapshot-ctest: &snapshot-ctest
    <<: *build-setup
-    command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -DFDB_RELEASE=1 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && make -j "$${MAKEJOBS}" && ctest -L fast -j "$${MAKEJOBS}" --output-on-failure'
+    command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -DCMAKE_COLOR_MAKEFILE=0 -DFDB_RELEASE=1 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && make -j "$${MAKEJOBS}" && ctest -L fast -j "$${MAKEJOBS}" --output-on-failure'

  prb-ctest:
    <<: *snapshot-ctest
@ -76,7 +76,7 @@ services:

  snapshot-correctness: &snapshot-correctness
    <<: *build-setup
-    command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -DFDB_RELEASE=1 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && make -j "$${MAKEJOBS}" && ctest -j "$${MAKEJOBS}" --output-on-failure
+    command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -DCMAKE_COLOR_MAKEFILE=0 -DFDB_RELEASE=1 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && make -j "$${MAKEJOBS}" && ctest -j "$${MAKEJOBS}" --output-on-failure

  prb-correctness:
    <<: *snapshot-correctness
--- a/cmake/AddFdbTest.cmake
+++ b/cmake/AddFdbTest.cmake
@ -123,6 +123,8 @@ function(add_fdb_test)
    ${ADD_FDB_TEST_TEST_FILES}
    WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
 	get_filename_component(test_dir_full ${first_file} DIRECTORY)
-	get_filename_component(test_dir ${test_dir_full} NAME)
-	set_tests_properties(${test_name} PROPERTIES TIMEOUT ${this_test_timeout} LABELS "${test_dir}")
+	if(NOT ${test_dir_full} STREQUAL "")
+		get_filename_component(test_dir ${test_dir_full} NAME)
+		set_tests_properties(${test_name} PROPERTIES TIMEOUT ${this_test_timeout} LABELS "${test_dir}")
+	endif()
 endfunction()
--- a/cmake/ConfigureCompiler.cmake
+++ b/cmake/ConfigureCompiler.cmake
@ -161,7 +161,6 @@ else()
    -Wno-deprecated
    -fvisibility=hidden
    -Wreturn-type
-    -fdiagnostics-color=always
    -fPIC)
  if (GPERFTOOLS_FOUND AND GCC)
    add_compile_options(
--- a/cmake/FlowCommands.cmake
+++ b/cmake/FlowCommands.cmake
@ -180,12 +180,12 @@ function(add_flow_target)
      list(APPEND generated_files ${CMAKE_CURRENT_BINARY_DIR}/${generated})
      if(WIN32)
        add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${generated}"
-          COMMAND $<TARGET_FILE:actorcompiler> "${CMAKE_CURRENT_SOURCE_DIR}/${src}" "${CMAKE_CURRENT_BINARY_DIR}/${generated}" ${actor_compiler_flags} ${actor_compiler_flags}
+          COMMAND $<TARGET_FILE:actorcompiler> "${CMAKE_CURRENT_SOURCE_DIR}/${src}" "${CMAKE_CURRENT_BINARY_DIR}/${generated}" ${actor_compiler_flags}
          DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${src}" actorcompiler
          COMMENT "Compile actor: ${src}")
      else()
        add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${generated}"
-          COMMAND ${MONO_EXECUTABLE} ${actor_exe} "${CMAKE_CURRENT_SOURCE_DIR}/${src}" "${CMAKE_CURRENT_BINARY_DIR}/${generated}" ${actor_compiler_flags} ${actor_compiler_flags} > /dev/null
+          COMMAND ${MONO_EXECUTABLE} ${actor_exe} "${CMAKE_CURRENT_SOURCE_DIR}/${src}" "${CMAKE_CURRENT_BINARY_DIR}/${generated}" ${actor_compiler_flags} > /dev/null
          DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${src}" actorcompiler
          COMMENT "Compile actor: ${src}")
      endif()
--- a/documentation/sphinx/source/client-testing.rst
+++ b/documentation/sphinx/source/client-testing.rst
@ -1,3 +1,6 @@
+.. default-domain:: cpp
+.. highlight:: cpp
+
 ###############
 Client Testing
 ###############
@ -49,24 +52,112 @@ that gets called by server processes running the ``tester`` role. Additionally,
 simulates a full fdb cluster with several machines and different configurations in one process. This simulator can run the same
 workloads you can run on a real cluster. It will also inject random failures like network partitions and disk failures.

-Currently, workloads can only be implemented in Java, support for other languages might come later.
-
 This tutorial explains how one can implement a workload, how one can orchestrate a workload on a cluster with multiple clients, and
 how one can run a workload within a simulator. Running in a simulator is also useful as it does not require any setup: you can simply
 run one command that will provide you with a fully functional FoundationDB cluster.

-Preparing the fdbserver Binary
-==============================
+General Overview
+================

-In order to run a Java workload, ``fdbserver`` needs to be able to embed a JVM. Because of that it needs to be linked against JNI.
-The official FDB binaries do not link against JNI and therefore one can't use that to run a Java workload. Instead you need to
-download the sources and build them. Make sure that ``cmake`` can find Java and pass ``-DWITH_JAVA_WORKLOAD=ON`` to cmake.
+Workloads in FoundationDB are generally compiled into the binary. However, FoundationDB also provides the ability to load workloads
+dynamically. This is done through ``dlopen`` (on Unix like operating systems) or ``LoadLibrary`` (on Windows).

-After FoundationDB was built, you can use ``bin/fdbserver`` to run the server. The jar file containing the client library can be
-found in ``packages/fdb-VERSION.jar``. Both of these are in the build directory.
+Parallelism and Determinism
+===========================

-Implementing a Workload
-=======================
+A workload can run either in a simulation or on a real cluster. In simulation, ``fdbserver`` will simulate a whole cluster and will
+use a deterministic random number generator to simulate random behavior and random failures. This random number generator is initialized
+with a random seed. In case of a test failure, the user can reuse the given seed and rerun the same test in order to further observe
+and debug the behavior.
+
+However, this will only work as long as the workload doesn't introduce any non-deterministic behavior. One example of non-deterministic
+behavior is the running multiple threads.
+
+The workload is created in the main network thread and it will run in the main network thread. Because of this, using any blocking
+function (for example ``blockUntilReady`` on a future object) will result in a deadlock. Using the callback API is therefore required
+if one wants to keep the simulator's deterministic behavior.
+
+For existing applications and layers, however, not using the blocking API might not be an option. For these use-cases, a user can chose
+to start new threads and use the blocking API from within these threads. This will mean that test failures will be non-deterministic and
+might be hard to reproduce.
+
+To start a new thread, one has to "bind" operating system threads to their simulated processes. This can be done by setting the
+``ProcessId`` in the child threads when they get created. In Java this is done by only starting new threads through the provided
+``Executor``. In the C++ API one can use the ``FDBWorkloadContext`` to do that. For example:
+
+.. code-block:: C++
+
+   template<class Fun>
+   std::thread startThread(FDBWorkloadContext* context, Fun fun) {
+       auto processId = context->getProcessID();
+       return std::thread([context, processID, fun](
+           context->setProcessID(processID);
+           fun();
+       ));
+   }
+
+Finding the Shared Object
+=========================
+
+When the test starts, ``fdbserver`` needs to find the shared object to load. The name of this shared object has to be provided.
+
+For Java, we provide an implementation in ``libjava_workloads.so`` which can be built out of the sources. The tester will look
+for the key ``libraryName`` in the test file which should be the name of the library without extension and without the ``lib``
+prefix (so ``java_workloads`` if you want to write a Java workload).
+
+By default, the process will look for the library in the directory ``../shared/foundationdb/`` relative to the location of the
+``fdbserver`` binary. If the library is somewhere else on the system, one can provide the absolute path to the library (only
+the folder, not the file name) in the test file with the ``libraryPath`` option.
+
+Implementing a C++ Workload
+===========================
+
+In order to implement a workload, one has to build a shared library that links against the fdb client library. This library has to
+exppse a function (with C linkage) called workloadFactory which needs to return a pointer to an object of type ``FDBWorkloadFactory``.
+This mechanism allows the other to implement as many workloads within one library as she wants. To do this the pure virtual classes
+``FDBWorkloadFactory`` and ``FDBWorkload`` have to be implemented.
+
+.. function:: FDBWorkloadFactory* workloadFactory(FDBLogger*)
+
+   This function has to be defined within the shared library and will be called by ``fdbserver`` for looking up a specific workload.
+   ``FDBLogger`` will be passed and is guaranteed to survive for the lifetime of the process. This class can be used to write to the
+   FoundationDB traces. Logging anything with severity ``FDBSeverity::Error`` will result in a hard test failure. This function needs
+   to have c-linkage, so define it in a ``extern "C"`` block.
+
+.. function:: std::shared_ptr<FDBWorkload> FDBWorkload::create(const std::string& name)
+
+   This is the only method to be implemented in ``FDBWorkloadFactory``. If the test file contains a key-value pair ``workloadName``
+   the value will be passed to this method (empty string otherwise). This way, a library author can implement many workloads in one
+   library and use the test file to chose which one to run (or run multiple workloads either concurrently or serially).
+
+.. function:: std::string FDBWorkload::description() const
+
+   This method has to return the name of the workload. This can be a static name and is primarily used for tracing.
+
+.. function:: bool FDBWorkload::init(FDBWorkloadContext* context)
+
+   Right after initialization
+
+.. function:: void FDBWorkload::setup(FDBDatabase* db, GenericPromise<bool> done)
+
+   This method will be called by the tester during the setup phase. It should be used to populate the database.
+
+.. function:: void FDBWorkload::start(FDBDatabase* db, GenericPromise<bool> done)
+
+   This method should run the actual test.
+
+.. function:: void FDBWorkload::check(FDBDatabase* db, GenericPromise<bool> done)
+
+   When the tester completes, this method will be called. A workload should run any consistency/correctness tests
+   during this phase.
+
+.. function:: void FDBWorkload::getMetrics(std::vector<FDBPerfMetric>& out) const
+
+   If a workload collects metrics (like latencies or throughput numbers), these should be reported back here.
+   The multitester (or test orchestrator) will collect all metrics from all test clients and it will aggregate them.
+
+Implementing a Java Workload
+============================

 In order to implement your own workload in Java you can simply create an implementation of the abstract class ``AbstractWorkload``.
 A minimal implementation will look like this:
@ -74,6 +165,7 @@ A minimal implementation will look like this:
 .. code-block:: java

   package my.package;
+   import com.apple.foundationdb.testing.Promise;
   import com.apple.foundationdb.testing.AbstractWorkload;
   import com.apple.foundationdb.testing.WorkloadContext;

@ -83,19 +175,21 @@ A minimal implementation will look like this:
       }

       @Override
-       public void setup(Database db) {
+       public void setup(Database db, Promise promise) {
           log(20, "WorkloadSetup", null);
+           promise.send(true);
       }

       @Override
       public void start(Database db) {
           log(20, "WorkloadStarted", null);
+           promise.send(true);
       }

       @Override
       public boolean check(Database db) {
           log(20, "WorkloadFailureCheck", null);
-           return true;
+           promise.send(true);
       }
   }

@ -165,9 +259,9 @@ A test file might look like this:
 .. code-block:: none

   testTitle=MyTest
-     testName=JavaWorkload
-     workloadClass=my.package.MinimalWorkload
-     jvmOptions=-Djava.class.path=*PATH_TO_FDB_CLIENT_JAR*,*other options you want to pass to the JVM*
+     testName=External
+     libraryName=java_workloads
+     workloadName=my.package.MinimalWorkload
     classPath=PATH_TO_JAR_OR_DIR_CONTAINING_WORKLOAD,OTHER_DEPENDENCIES

     testName=Attrition
@ -176,15 +270,15 @@ A test file might look like this:
     machinesToKill=3

   testTitle=AnotherTest
-     workloadClass=my.package.MinimalWorkload
-     workloadClass=my.package.MinimalWorkload
-     jvmOptions=-Djava.class.path=*PATH_TO_FDB_CLIENT_JAR*,*other options you want to pass to the JVM*
+     testName=External
+     libraryName=java_workloads
+     workloadName=my.package.MinimalWorkload
     classPath=PATH_TO_JAR_OR_DIR_CONTAINING_WORKLOAD,OTHER_DEPENDENCIES
-     someOpion=foo
+     someOption=foo

-     workloadClass=my.package.AnotherWorkload
-     workloadClass=my.package.AnotherWorkload
-     jvmOptions=-Djava.class.path=*PATH_TO_FDB_CLIENT_JAR*,*other options you want to pass to the JVM*
+     testName=External
+     libraryName=java_workloads
+     workloadName=my.package.AnotherWorkload
     classPath=PATH_TO_JAR_OR_DIR_CONTAINING_WORKLOAD,OTHER_DEPENDENCIES
     anotherOption=foo

--- a/documentation/sphinx/source/downloads.rst
+++ b/documentation/sphinx/source/downloads.rst
@ -10,38 +10,38 @@ macOS

 The macOS installation package is supported on macOS 10.7+. It includes the client and (optionally) the server.

-* `FoundationDB-6.1.10.pkg <https://www.foundationdb.org/downloads/6.1.10/macOS/installers/FoundationDB-6.1.10.pkg>`_
+* `FoundationDB-6.1.11.pkg <https://www.foundationdb.org/downloads/6.1.11/macOS/installers/FoundationDB-6.1.11.pkg>`_

 Ubuntu
 ------

 The Ubuntu packages are supported on 64-bit Ubuntu 12.04+, but beware of the Linux kernel bug in Ubuntu 12.x.

-* `foundationdb-clients-6.1.10-1_amd64.deb <https://www.foundationdb.org/downloads/6.1.10/ubuntu/installers/foundationdb-clients_6.1.10-1_amd64.deb>`_
-* `foundationdb-server-6.1.10-1_amd64.deb <https://www.foundationdb.org/downloads/6.1.10/ubuntu/installers/foundationdb-server_6.1.10-1_amd64.deb>`_ (depends on the clients package)
+* `foundationdb-clients-6.1.11-1_amd64.deb <https://www.foundationdb.org/downloads/6.1.11/ubuntu/installers/foundationdb-clients_6.1.11-1_amd64.deb>`_
+* `foundationdb-server-6.1.11-1_amd64.deb <https://www.foundationdb.org/downloads/6.1.11/ubuntu/installers/foundationdb-server_6.1.11-1_amd64.deb>`_ (depends on the clients package)

 RHEL/CentOS EL6
 ---------------

 The RHEL/CentOS EL6 packages are supported on 64-bit RHEL/CentOS 6.x.

-* `foundationdb-clients-6.1.10-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.10/rhel6/installers/foundationdb-clients-6.1.10-1.el6.x86_64.rpm>`_
-* `foundationdb-server-6.1.10-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.10/rhel6/installers/foundationdb-server-6.1.10-1.el6.x86_64.rpm>`_ (depends on the clients package)
+* `foundationdb-clients-6.1.11-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.11/rhel6/installers/foundationdb-clients-6.1.11-1.el6.x86_64.rpm>`_
+* `foundationdb-server-6.1.11-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.11/rhel6/installers/foundationdb-server-6.1.11-1.el6.x86_64.rpm>`_ (depends on the clients package)

 RHEL/CentOS EL7
 ---------------

 The RHEL/CentOS EL7 packages are supported on 64-bit RHEL/CentOS 7.x.

-* `foundationdb-clients-6.1.10-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.10/rhel7/installers/foundationdb-clients-6.1.10-1.el7.x86_64.rpm>`_
-* `foundationdb-server-6.1.10-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.10/rhel7/installers/foundationdb-server-6.1.10-1.el7.x86_64.rpm>`_ (depends on the clients package)
+* `foundationdb-clients-6.1.11-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.11/rhel7/installers/foundationdb-clients-6.1.11-1.el7.x86_64.rpm>`_
+* `foundationdb-server-6.1.11-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.11/rhel7/installers/foundationdb-server-6.1.11-1.el7.x86_64.rpm>`_ (depends on the clients package)

 Windows
 -------

 The Windows installer is supported on 64-bit Windows XP and later. It includes the client and (optionally) the server.

-* `foundationdb-6.1.10-x64.msi <https://www.foundationdb.org/downloads/6.1.10/windows/installers/foundationdb-6.1.10-x64.msi>`_
+* `foundationdb-6.1.11-x64.msi <https://www.foundationdb.org/downloads/6.1.11/windows/installers/foundationdb-6.1.11-x64.msi>`_

 API Language Bindings
 =====================
@ -58,18 +58,18 @@ On macOS and Windows, the FoundationDB Python API bindings are installed as part

 If you need to use the FoundationDB Python API from other Python installations or paths, download the Python package:

-* `foundationdb-6.1.10.tar.gz <https://www.foundationdb.org/downloads/6.1.10/bindings/python/foundationdb-6.1.10.tar.gz>`_
+* `foundationdb-6.1.11.tar.gz <https://www.foundationdb.org/downloads/6.1.11/bindings/python/foundationdb-6.1.11.tar.gz>`_

 Ruby 1.9.3/2.0.0+
 -----------------

-* `fdb-6.1.10.gem <https://www.foundationdb.org/downloads/6.1.10/bindings/ruby/fdb-6.1.10.gem>`_
+* `fdb-6.1.11.gem <https://www.foundationdb.org/downloads/6.1.11/bindings/ruby/fdb-6.1.11.gem>`_

 Java 8+
 -------

-* `fdb-java-6.1.10.jar <https://www.foundationdb.org/downloads/6.1.10/bindings/java/fdb-java-6.1.10.jar>`_
-* `fdb-java-6.1.10-javadoc.jar <https://www.foundationdb.org/downloads/6.1.10/bindings/java/fdb-java-6.1.10-javadoc.jar>`_
+* `fdb-java-6.1.11.jar <https://www.foundationdb.org/downloads/6.1.11/bindings/java/fdb-java-6.1.11.jar>`_
+* `fdb-java-6.1.11-javadoc.jar <https://www.foundationdb.org/downloads/6.1.11/bindings/java/fdb-java-6.1.11-javadoc.jar>`_

 Go 1.11+
 --------
--- a/documentation/sphinx/source/mr-status-json-schemas.rst.inc
+++ b/documentation/sphinx/source/mr-status-json-schemas.rst.inc
@ -265,7 +265,10 @@
      },
      "incompatible_connections":[
      ],
-      "datacenter_version_difference":0,
+      "datacenter_lag":{
+         "seconds":1.0,
+         "versions":1000000
+      },
      "degraded_processes":0,
      "database_available":true,
      "database_locked":false,
--- a/documentation/sphinx/source/old-release-notes/release-notes-610.rst
+++ b/documentation/sphinx/source/old-release-notes/release-notes-610.rst
@ -2,6 +2,14 @@
 Release Notes
 #############

+6.1.11
+======
+
+Fixes
+-----
+
+* Machines which were added to a cluster immediately after the cluster was upgraded to 6.1 would not be given data. `(PR #1764) <https://github.com/apple/foundationdb/pull/1764>`_
+
 6.1.10
 ======

@ -174,4 +182,4 @@ Earlier release notes
 * :doc:`Beta 2 (API Version 22) </old-release-notes/release-notes-022>`
 * :doc:`Beta 1 (API Version 21) </old-release-notes/release-notes-021>`
 * :doc:`Alpha 6 (API Version 16) </old-release-notes/release-notes-016>`
-* :doc:`Alpha 5 (API Version 14) </old-release-notes/release-notes-014>`
+* :doc:`Alpha 5 (API Version 14) </old-release-notes/release-notes-014>`
--- a/documentation/sphinx/source/release-notes.rst
+++ b/documentation/sphinx/source/release-notes.rst
@ -14,10 +14,13 @@ Performance
 Fixes
 -----

+* If a cluster is upgraded during an ``onError`` call, the cluster could return a ``cluster_version_changed`` error. `(PR #1734) <https://github.com/apple/foundationdb/pull/1734>`_.
+
 Status
 ------

 * Added ``run_loop_busy`` to the ``processes`` section to record the fraction of time the run loop is busy. `(PR #1760) <https://github.com/apple/foundationdb/pull/1760>`_.
+* Remove ``cluster.datacenter_version_difference`` and replace it with ``cluster.datacenter_lag`` that has subfields ``versions`` and ``seconds``. `(PR #1800) <https://github.com/apple/foundationdb/pull/1800>`_.

 Bindings
 --------
--- a/fdbbackup/backup.actor.cpp
+++ b/fdbbackup/backup.actor.cpp
@ -3244,7 +3244,7 @@ int main(int argc, char* argv[]) {
 			}

 			try {
-				db = Database::createDatabase(ccf, -1, localities);
+				db = Database::createDatabase(ccf, -1, true, localities);
 			}
 			catch (Error& e) {
 				fprintf(stderr, "ERROR: %s\n", e.what());
@ -3266,7 +3266,7 @@ int main(int argc, char* argv[]) {
 			}

 			try {
-				sourceDb = Database::createDatabase(sourceCcf, -1, localities);
+				sourceDb = Database::createDatabase(sourceCcf, -1, true, localities);
 			}
 			catch (Error& e) {
 				fprintf(stderr, "ERROR: %s\n", e.what());
--- a/fdbcli/fdbcli.actor.cpp
+++ b/fdbcli/fdbcli.actor.cpp
@ -59,31 +59,44 @@ extern const char* getHGVersion();

 std::vector<std::string> validOptions;

-enum { OPT_CONNFILE, OPT_DATABASE, OPT_HELP, OPT_TRACE, OPT_TRACE_DIR, OPT_TIMEOUT, OPT_EXEC, OPT_NO_STATUS, OPT_STATUS_FROM_JSON, OPT_VERSION, OPT_TRACE_FORMAT };
+enum {
+	OPT_CONNFILE,
+	OPT_DATABASE,
+	OPT_HELP,
+	OPT_TRACE,
+	OPT_TRACE_DIR,
+	OPT_TIMEOUT,
+	OPT_EXEC,
+	OPT_NO_STATUS,
+	OPT_STATUS_FROM_JSON,
+	OPT_VERSION,
+	OPT_TRACE_FORMAT,
+	OPT_USE_OBJECT_SERIALIZER
+};

-CSimpleOpt::SOption g_rgOptions[] = {
-	{ OPT_CONNFILE, "-C", SO_REQ_SEP },
-	{ OPT_CONNFILE, "--cluster_file", SO_REQ_SEP },
-	{ OPT_DATABASE, "-d", SO_REQ_SEP },
-	{ OPT_TRACE, "--log", SO_NONE },
-	{ OPT_TRACE_DIR, "--log-dir", SO_REQ_SEP },
-	{ OPT_TIMEOUT, "--timeout", SO_REQ_SEP },
-	{ OPT_EXEC, "--exec", SO_REQ_SEP },
-	{ OPT_NO_STATUS, "--no-status", SO_NONE },
-	{ OPT_HELP, "-?", SO_NONE },
-	{ OPT_HELP, "-h", SO_NONE },
-	{ OPT_HELP, "--help", SO_NONE },
-	{ OPT_STATUS_FROM_JSON, "--status-from-json", SO_REQ_SEP },
-	{ OPT_VERSION,         "--version",        SO_NONE },
-	{ OPT_VERSION,         "-v",               SO_NONE },
-	{ OPT_TRACE_FORMAT, "--trace_format", SO_REQ_SEP },
+CSimpleOpt::SOption g_rgOptions[] = { { OPT_CONNFILE, "-C", SO_REQ_SEP },
+	                                  { OPT_CONNFILE, "--cluster_file", SO_REQ_SEP },
+	                                  { OPT_DATABASE, "-d", SO_REQ_SEP },
+	                                  { OPT_TRACE, "--log", SO_NONE },
+	                                  { OPT_TRACE_DIR, "--log-dir", SO_REQ_SEP },
+	                                  { OPT_TIMEOUT, "--timeout", SO_REQ_SEP },
+	                                  { OPT_EXEC, "--exec", SO_REQ_SEP },
+	                                  { OPT_NO_STATUS, "--no-status", SO_NONE },
+	                                  { OPT_HELP, "-?", SO_NONE },
+	                                  { OPT_HELP, "-h", SO_NONE },
+	                                  { OPT_HELP, "--help", SO_NONE },
+	                                  { OPT_STATUS_FROM_JSON, "--status-from-json", SO_REQ_SEP },
+	                                  { OPT_VERSION, "--version", SO_NONE },
+	                                  { OPT_VERSION, "-v", SO_NONE },
+	                                  { OPT_TRACE_FORMAT, "--trace_format", SO_REQ_SEP },
+	                                  { OPT_USE_OBJECT_SERIALIZER, "-S", SO_REQ_SEP },
+	                                  { OPT_USE_OBJECT_SERIALIZER, "--object-serializer", SO_REQ_SEP },

 #ifndef TLS_DISABLED
-	TLS_OPTION_FLAGS
+	                                  TLS_OPTION_FLAGS
 #endif

-	SO_END_OF_OPTIONS
-};
+	                                      SO_END_OF_OPTIONS };

 void printAtCol(const char* text, int col) {
 	const char* iter = text;
@ -401,21 +414,25 @@ static void printProgramUsage(const char* name) {
 		   "                 FDB_CLUSTER_FILE environment variable, then `./fdb.cluster',\n"
 		   "                 then `%s'.\n", platform::getDefaultClusterFilePath().c_str());
 	printf("  --log          Enables trace file logging for the CLI session.\n"
-		   "  --log-dir PATH Specifes the output directory for trace files. If\n"
-		   "                 unspecified, defaults to the current directory. Has\n"
-		   "                 no effect unless --log is specified.\n"
-		   "  --trace_format FORMAT\n"
-		   "                 Select the format of the log files. xml (the default) and json\n"
-		   "                 are supported. Has no effect unless --log is specified.\n"
-		   "  --exec CMDS    Immediately executes the semicolon separated CLI commands\n"
-		   "                 and then exits.\n"
-		   "  --no-status    Disables the initial status check done when starting\n"
-		   "                 the CLI.\n"
+	       "  --log-dir PATH Specifes the output directory for trace files. If\n"
+	       "                 unspecified, defaults to the current directory. Has\n"
+	       "                 no effect unless --log is specified.\n"
+	       "  --trace_format FORMAT\n"
+	       "                 Select the format of the log files. xml (the default) and json\n"
+	       "                 are supported. Has no effect unless --log is specified.\n"
+	       "  -S ON|OFF, --object-serializer ON|OFF\n"
+	       "                 Use object serializer for sending messages. The object serializer\n"
+	       "                 is currently a beta feature and it allows fdb processes to talk to\n"
+	       "                 each other even if they don't have the same version\n"
+	       "  --exec CMDS    Immediately executes the semicolon separated CLI commands\n"
+	       "                 and then exits.\n"
+	       "  --no-status    Disables the initial status check done when starting\n"
+	       "                 the CLI.\n"
 #ifndef TLS_DISABLED
-		   TLS_HELP
+	       TLS_HELP
 #endif
-		   "  -v, --version  Print FoundationDB CLI version information and exit.\n"
-		   "  -h, --help     Display this help and exit.\n");
+	       "  -v, --version  Print FoundationDB CLI version information and exit.\n"
+	       "  -h, --help     Display this help and exit.\n");
 }


@ -2332,6 +2349,7 @@ struct CLIOptions {
 	bool trace;
 	std::string traceDir;
 	std::string traceFormat;
+	bool useObjectSerializer = false;
 	int exit_timeout;
 	Optional<std::string> exec;
 	bool initialStatusCheck;
@ -2403,41 +2421,55 @@ struct CLIOptions {

 #ifndef TLS_DISABLED
 			// TLS Options
-			case TLSOptions::OPT_TLS_PLUGIN:
-				args.OptionArg();
-				break;
-			case TLSOptions::OPT_TLS_CERTIFICATES:
-				tlsCertPath = args.OptionArg();
-				break;
-			case TLSOptions::OPT_TLS_CA_FILE:
-				tlsCAPath = args.OptionArg();
-				break;
-			case TLSOptions::OPT_TLS_KEY:
-				tlsKeyPath = args.OptionArg();
-				break;
-			case TLSOptions::OPT_TLS_PASSWORD:
-				tlsPassword = args.OptionArg();
-				break;
-			case TLSOptions::OPT_TLS_VERIFY_PEERS:
-				tlsVerifyPeers = args.OptionArg();
-				break;
+		    case TLSOptions::OPT_TLS_PLUGIN:
+			    args.OptionArg();
+			    break;
+		    case TLSOptions::OPT_TLS_CERTIFICATES:
+			    tlsCertPath = args.OptionArg();
+			    break;
+		    case TLSOptions::OPT_TLS_CA_FILE:
+			    tlsCAPath = args.OptionArg();
+			    break;
+		    case TLSOptions::OPT_TLS_KEY:
+			    tlsKeyPath = args.OptionArg();
+			    break;
+		    case TLSOptions::OPT_TLS_PASSWORD:
+			    tlsPassword = args.OptionArg();
+			    break;
+		    case TLSOptions::OPT_TLS_VERIFY_PEERS:
+			    tlsVerifyPeers = args.OptionArg();
+			    break;
 #endif
-			case OPT_HELP:
-				printProgramUsage(program_name.c_str());
-				return 0;
-			case OPT_STATUS_FROM_JSON:
-				return printStatusFromJSON(args.OptionArg());
-			case OPT_TRACE_FORMAT:
-				if (!validateTraceFormat(args.OptionArg())) {
-					fprintf(stderr, "WARNING: Unrecognized trace format `%s'\n", args.OptionArg());
-				}
-				traceFormat = args.OptionArg();
-				break;
-			case OPT_VERSION:
-				printVersion();
-				return FDB_EXIT_SUCCESS;
-		}
-		return -1;
+		    case OPT_HELP:
+			    printProgramUsage(program_name.c_str());
+			    return 0;
+		    case OPT_STATUS_FROM_JSON:
+			    return printStatusFromJSON(args.OptionArg());
+		    case OPT_TRACE_FORMAT:
+			    if (!validateTraceFormat(args.OptionArg())) {
+				    fprintf(stderr, "WARNING: Unrecognized trace format `%s'\n", args.OptionArg());
+			    }
+			    traceFormat = args.OptionArg();
+			    break;
+		    case OPT_USE_OBJECT_SERIALIZER: {
+			    std::string s = args.OptionArg();
+			    std::transform(s.begin(), s.end(), s.begin(), ::tolower);
+			    if (s == "on" || s == "true" || s == "1") {
+				    useObjectSerializer = true;
+			    } else if (s == "off" || s == "false" || s == "0") {
+				    useObjectSerializer = false;
+			    } else {
+				    fprintf(stderr, "ERROR: Could not parse object serializer option: `%s'\n", s.c_str());
+				    printProgramUsage(program_name.c_str());
+				    flushAndExit(FDB_EXIT_ERROR);
+			    }
+			    break;
+		    }
+		    case OPT_VERSION:
+			    printVersion();
+			    return FDB_EXIT_SUCCESS;
+		    }
+		    return -1;
 	}
 };

@ -2484,7 +2516,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
 	TraceEvent::setNetworkThread();

 	try {
-		db = Database::createDatabase(ccf, -1);
+		db = Database::createDatabase(ccf, -1, false);
 		if (!opt.exec.present()) {
 			printf("Using cluster file `%s'.\n", ccf->getFilename().c_str());
 		}
@ -3490,6 +3522,11 @@ int main(int argc, char **argv) {
 		}
 		setNetworkOption(FDBNetworkOptions::ENABLE_SLOW_TASK_PROFILING);
 	}
+	// The USE_OBJECT_SERIALIZER network option expects an 8 byte little endian integer which is interpreted as zero =
+	// false, non-zero = true.
+	setNetworkOption(FDBNetworkOptions::USE_OBJECT_SERIALIZER,
+	                 opt.useObjectSerializer ? LiteralStringRef("\x01\x00\x00\x00\x00\x00\x00\x00")
+	                                         : LiteralStringRef("\x00\x00\x00\x00\x00\x00\x00\x00"));

 	initHelp();

--- a/fdbclient/BackupAgentBase.actor.cpp
+++ b/fdbclient/BackupAgentBase.actor.cpp
@ -419,7 +419,7 @@ ACTOR Future<Void> readCommitted(Database cx, PromiseStream<RangeResultWithVersi

 			//add lock
 			releaser.release();
-			wait(lock->take(TaskDefaultYield, limits.bytes + CLIENT_KNOBS->VALUE_SIZE_LIMIT + CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT));
+			wait(lock->take(TaskPriority::DefaultYield, limits.bytes + CLIENT_KNOBS->VALUE_SIZE_LIMIT + CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT));
 			releaser = FlowLock::Releaser(*lock, limits.bytes + CLIENT_KNOBS->VALUE_SIZE_LIMIT + CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT);

 			state Standalone<RangeResultRef> values = wait(tr.getRange(begin, end, limits));
@ -495,7 +495,7 @@ ACTOR Future<Void> readCommitted(Database cx, PromiseStream<RCGroup> results, Fu
 			//add lock
 			wait(active);
 			releaser.release();
-			wait(lock->take(TaskDefaultYield, rangevalue.expectedSize() + rcGroup.items.expectedSize()));
+			wait(lock->take(TaskPriority::DefaultYield, rangevalue.expectedSize() + rcGroup.items.expectedSize()));
 			releaser = FlowLock::Releaser(*lock, rangevalue.expectedSize() + rcGroup.items.expectedSize());

 			for (auto & s : rangevalue){
@ -613,7 +613,7 @@ ACTOR Future<int> dumpData(Database cx, PromiseStream<RCGroup> results, Referenc
 		req.flags = req.flags | CommitTransactionRequest::FLAG_IS_LOCK_AWARE;

 		totalBytes += mutationSize;
-		wait( commitLock->take(TaskDefaultYield, mutationSize) );
+		wait( commitLock->take(TaskPriority::DefaultYield, mutationSize) );
 		addActor.send( commitLock->releaseWhen( success(commit.getReply(req)), mutationSize ) );

 		if(endOfStream) {
@ -653,7 +653,7 @@ ACTOR Future<Void> coalesceKeyVersionCache(Key uid, Version endVersion, Referenc
 		req.transaction.read_snapshot = committedVersion->get();
 		req.flags = req.flags | CommitTransactionRequest::FLAG_IS_LOCK_AWARE;

-		wait( commitLock->take(TaskDefaultYield, mutationSize) );
+		wait( commitLock->take(TaskPriority::DefaultYield, mutationSize) );
 		addActor.send( commitLock->releaseWhen( success(commit.getReply(req)), mutationSize ) );
 	}

@ -671,7 +671,7 @@ ACTOR Future<Void> applyMutations(Database cx, Key uid, Key addPrefix, Key remov
 	try {
 		loop {
 			if(beginVersion >= *endVersion) {
-				wait( commitLock.take(TaskDefaultYield, CLIENT_KNOBS->BACKUP_LOCK_BYTES) );
+				wait( commitLock.take(TaskPriority::DefaultYield, CLIENT_KNOBS->BACKUP_LOCK_BYTES) );
 				commitLock.release(CLIENT_KNOBS->BACKUP_LOCK_BYTES);
 				if(beginVersion >= *endVersion) {
 					return Void();
--- a/fdbclient/ClusterInterface.h
+++ b/fdbclient/ClusterInterface.h
@ -52,12 +52,12 @@ struct ClusterInterface {
 	}

 	void initEndpoints() {
-		openDatabase.getEndpoint( TaskClusterController );
-		failureMonitoring.getEndpoint( TaskFailureMonitor );
-		databaseStatus.getEndpoint( TaskClusterController );
-		ping.getEndpoint( TaskClusterController );
-		getClientWorkers.getEndpoint( TaskClusterController );
-		forceRecovery.getEndpoint( TaskClusterController );
+		openDatabase.getEndpoint( TaskPriority::ClusterController );
+		failureMonitoring.getEndpoint( TaskPriority::FailureMonitor );
+		databaseStatus.getEndpoint( TaskPriority::ClusterController );
+		ping.getEndpoint( TaskPriority::ClusterController );
+		getClientWorkers.getEndpoint( TaskPriority::ClusterController );
+		forceRecovery.getEndpoint( TaskPriority::ClusterController );
 	}

 	template <class Ar>
--- a/fdbclient/DatabaseContext.h
+++ b/fdbclient/DatabaseContext.h
@ -54,11 +54,11 @@ public:

 	// For internal (fdbserver) use only
 	static Database create( Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface, Reference<ClusterConnectionFile> connFile, LocalityData const& clientLocality );
-	static Database create( Reference<AsyncVar<ClientDBInfo>> clientInfo, Future<Void> clientInfoMonitor, LocalityData clientLocality, bool enableLocalityLoadBalance, int taskID=TaskDefaultEndpoint, bool lockAware=false, int apiVersion=Database::API_VERSION_LATEST );
+	static Database create( Reference<AsyncVar<ClientDBInfo>> clientInfo, Future<Void> clientInfoMonitor, LocalityData clientLocality, bool enableLocalityLoadBalance, TaskPriority taskID=TaskPriority::DefaultEndpoint, bool lockAware=false, int apiVersion=Database::API_VERSION_LATEST );

 	~DatabaseContext();

-	Database clone() const { return Database(new DatabaseContext( cluster, clientInfo, clientInfoMonitor, dbId, taskID, clientLocality, enableLocalityLoadBalance, lockAware, apiVersion )); }
+	Database clone() const { return Database(new DatabaseContext( cluster, clientInfo, clientInfoMonitor, taskID, clientLocality, enableLocalityLoadBalance, lockAware, internal, apiVersion )); }

 	std::pair<KeyRange,Reference<LocationInfo>> getCachedLocation( const KeyRef&, bool isBackward = false );
 	bool getCachedLocations( const KeyRangeRef&, vector<std::pair<KeyRange,Reference<LocationInfo>>>&, int limit, bool reverse );
@ -97,8 +97,8 @@ public:

 //private: 
 	explicit DatabaseContext( Reference<Cluster> cluster, Reference<AsyncVar<ClientDBInfo>> clientDBInfo,
-		Future<Void> clientInfoMonitor, Standalone<StringRef> dbId, int taskID, LocalityData const& clientLocality, 
-		bool enableLocalityLoadBalance, bool lockAware, int apiVersion = Database::API_VERSION_LATEST );
+		Future<Void> clientInfoMonitor, TaskPriority taskID, LocalityData const& clientLocality, 
+		bool enableLocalityLoadBalance, bool lockAware, bool internal = true, int apiVersion = Database::API_VERSION_LATEST );

 	explicit DatabaseContext( const Error &err );

@ -133,22 +133,26 @@ public:

 	std::map< UID, StorageServerInfo* > server_interf;

-	Standalone<StringRef> dbId;
+	UID dbId;
+	bool internal; // Only contexts created through the C client and fdbcli are non-internal
+
+	CounterCollection cc;
+
+	Counter transactionReadVersions;
+	Counter transactionLogicalReads;
+	Counter transactionPhysicalReads;
+	Counter transactionCommittedMutations;
+	Counter transactionCommittedMutationBytes;
+	Counter transactionsCommitStarted;
+	Counter transactionsCommitCompleted;
+	Counter transactionsTooOld;
+	Counter transactionsFutureVersions;
+	Counter transactionsNotCommitted;
+	Counter transactionsMaybeCommitted;
+	Counter transactionsResourceConstrained;
+	Counter transactionsProcessBehind;
+	Counter transactionWaitsForFullRecovery;

-	int64_t transactionReadVersions;
-	int64_t transactionLogicalReads;
-	int64_t transactionPhysicalReads;
-	int64_t transactionCommittedMutations;
-	int64_t transactionCommittedMutationBytes;
-	int64_t transactionsCommitStarted;
-	int64_t transactionsCommitCompleted;
-	int64_t transactionsTooOld;
-	int64_t transactionsFutureVersions;
-	int64_t transactionsNotCommitted;
-	int64_t transactionsMaybeCommitted;
-	int64_t transactionsResourceConstrained;
-	int64_t transactionsProcessBehind;
-	int64_t transactionWaitsForFullRecovery;
 	ContinuousSample<double> latencies, readLatencies, commitLatencies, GRVLatencies, mutationsPerCommit, bytesPerCommit;

 	int outstandingWatches;
@ -162,7 +166,7 @@ public:

 	Future<Void> logger;

-	int taskID;
+	TaskPriority taskID;

 	Int64MetricHandle getValueSubmitted;
 	EventMetricHandle<GetValueComplete> getValueCompleted;
--- a/fdbclient/FDBTypes.h
+++ b/fdbclient/FDBTypes.h
@ -601,8 +601,9 @@ struct TLogVersion {
 		// V1 = 1,  // 4.6 is dispatched to via 6.0
 		V2 = 2, // 6.0
 		V3 = 3, // 6.1
+		V4 = 4, // 6.2
 		MIN_SUPPORTED = V2,
-		MAX_SUPPORTED = V3,
+		MAX_SUPPORTED = V4,
 		MIN_RECRUITABLE = V2,
 		DEFAULT = V3,
 	} version;
@ -624,6 +625,7 @@ struct TLogVersion {
 	static ErrorOr<TLogVersion> FromStringRef( StringRef s ) {
 		if (s == LiteralStringRef("2")) return V2;
 		if (s == LiteralStringRef("3")) return V3;
+		if (s == LiteralStringRef("4")) return V4;
 		return default_error_or();
 	}
 };
--- a/fdbclient/FailureMonitorClient.actor.cpp
+++ b/fdbclient/FailureMonitorClient.actor.cpp
@ -41,7 +41,7 @@ ACTOR Future<Void> failureMonitorClientLoop(
 {
 	state Version version = 0;
 	state Future<FailureMonitoringReply> request = Never();
-	state Future<Void> nextRequest = delay(0, TaskFailureMonitor);
+	state Future<Void> nextRequest = delay(0, TaskPriority::FailureMonitor);
 	state Future<Void> requestTimeout = Never();
 	state double before = now();
 	state double waitfor = 0;
@ -61,7 +61,7 @@ ACTOR Future<Void> failureMonitorClientLoop(
 		loop {
 			choose {
 				when( FailureMonitoringReply reply = wait( request ) ) {
-					g_network->setCurrentTask(TaskDefaultDelay);
+					g_network->setCurrentTask(TaskPriority::DefaultDelay);
 					request = Never();
 					requestTimeout = Never();
 					if (reply.allOthersFailed) {
@ -122,10 +122,10 @@ ACTOR Future<Void> failureMonitorClientLoop(
 					}
 					before = now();
 					waitfor = reply.clientRequestIntervalMS * .001;
-					nextRequest = delayJittered( waitfor, TaskFailureMonitor );
+					nextRequest = delayJittered( waitfor, TaskPriority::FailureMonitor );
 				}
 				when( wait( requestTimeout ) ) {
-					g_network->setCurrentTask(TaskDefaultDelay);
+					g_network->setCurrentTask(TaskPriority::DefaultDelay);
 					requestTimeout = Never();
 					TraceEvent(SevWarn, "FailureMonitoringServerDown").detail("OldServerID",controller.id());
 					monitor->setStatus(controlAddr.address, FailureStatus(true));
@ -136,7 +136,7 @@ ACTOR Future<Void> failureMonitorClientLoop(
 					}
 				}
 				when( wait( nextRequest ) ) {
-					g_network->setCurrentTask(TaskDefaultDelay);
+					g_network->setCurrentTask(TaskPriority::DefaultDelay);
 					nextRequest = Never();

 					double elapsed = now() - before;
@ -152,9 +152,9 @@ ACTOR Future<Void> failureMonitorClientLoop(
 					req.addresses = g_network->getLocalAddresses();
 					if (trackMyStatus)
 						req.senderStatus = FailureStatus(false);
-					request = controller.failureMonitoring.getReply( req, TaskFailureMonitor );
+					request = controller.failureMonitoring.getReply( req, TaskPriority::FailureMonitor );
 					if(!controller.failureMonitoring.getEndpoint().isLocal())
-						requestTimeout = delay( fmState->serverFailedTimeout, TaskFailureMonitor );
+						requestTimeout = delay( fmState->serverFailedTimeout, TaskPriority::FailureMonitor );
 				}
 			}
 		}
--- a/fdbclient/HTTP.actor.cpp
+++ b/fdbclient/HTTP.actor.cpp
@ -93,7 +93,7 @@ namespace HTTP {
 		loop {
 			// Wait for connection to have something to read
 			wait(conn->onReadable());
-			wait( delay( 0, TaskReadSocket ) );
+			wait( delay( 0, TaskPriority::ReadSocket ) );

 			// Read into buffer
 			int originalSize = buf->size();
@ -353,7 +353,7 @@ namespace HTTP {

 			loop {
 				wait(conn->onWritable());
-				wait( delay( 0, TaskWriteSocket ) );
+				wait( delay( 0, TaskPriority::WriteSocket ) );

 				// If we already got a response, before finishing sending the request, then close the connection,
 				// set the Connection header to "close" as a hint to the caller that this connection can't be used
--- a/fdbclient/ManagementAPI.actor.cpp
+++ b/fdbclient/ManagementAPI.actor.cpp
@ -967,7 +967,7 @@ ACTOR Future<CoordinatorsResult::Type> changeQuorum( Database cx, Reference<IQuo
 			vector<Future<Optional<LeaderInfo>>> leaderServers;
 			ClientCoordinators coord( Reference<ClusterConnectionFile>( new ClusterConnectionFile( conn ) ) );
 			for( int i = 0; i < coord.clientLeaderServers.size(); i++ )
-				leaderServers.push_back( retryBrokenPromise( coord.clientLeaderServers[i].getLeader, GetLeaderRequest( coord.clusterKey, UID() ), TaskCoordinationReply ) );
+				leaderServers.push_back( retryBrokenPromise( coord.clientLeaderServers[i].getLeader, GetLeaderRequest( coord.clusterKey, UID() ), TaskPriority::CoordinationReply ) );

 			choose {
 				when( wait( waitForAll( leaderServers ) ) ) {}
@ -1047,7 +1047,7 @@ struct AutoQuorumChange : IQuorumChange {
 		ClientCoordinators coord(ccf);
 		vector<Future<Optional<LeaderInfo>>> leaderServers;
 		for( int i = 0; i < coord.clientLeaderServers.size(); i++ )
-			leaderServers.push_back( retryBrokenPromise( coord.clientLeaderServers[i].getLeader, GetLeaderRequest( coord.clusterKey, UID() ), TaskCoordinationReply ) );
+			leaderServers.push_back( retryBrokenPromise( coord.clientLeaderServers[i].getLeader, GetLeaderRequest( coord.clusterKey, UID() ), TaskPriority::CoordinationReply ) );
 		Optional<vector<Optional<LeaderInfo>>> results = wait( timeout( getAll(leaderServers), CLIENT_KNOBS->IS_ACCEPTABLE_DELAY ) );
 		if (!results.present()) return false;  // Not all responded
 		for(auto& r : results.get())
--- a/fdbclient/MasterProxyInterface.h
+++ b/fdbclient/MasterProxyInterface.h
@ -67,10 +67,10 @@ struct MasterProxyInterface {
 	}

 	void initEndpoints() {
-		getConsistentReadVersion.getEndpoint(TaskProxyGetConsistentReadVersion);
-		getRawCommittedVersion.getEndpoint(TaskProxyGetRawCommittedVersion);
-		commit.getEndpoint(TaskProxyCommitDispatcher);
-		getStorageServerRejoinInfo.getEndpoint(TaskProxyStorageRejoin);
+		getConsistentReadVersion.getEndpoint(TaskPriority::ProxyGetConsistentReadVersion);
+		getRawCommittedVersion.getEndpoint(TaskPriority::ProxyGetRawCommittedVersion);
+		commit.getEndpoint(TaskPriority::ProxyCommitDispatcher);
+		getStorageServerRejoinInfo.getEndpoint(TaskPriority::ProxyStorageRejoin);
 		//getKeyServersLocations.getEndpoint(TaskProxyGetKeyServersLocations); //do not increase the priority of these requests, because clients cans bring down the cluster with too many of these messages.
 	}
 };
--- a/fdbclient/MonitorLeader.actor.cpp
+++ b/fdbclient/MonitorLeader.actor.cpp
@ -371,7 +371,7 @@ ClientLeaderRegInterface::ClientLeaderRegInterface( NetworkAddress remote )
 }

 ClientLeaderRegInterface::ClientLeaderRegInterface( INetwork* local ) {
-	getLeader.makeWellKnownEndpoint( WLTOKEN_CLIENTLEADERREG_GETLEADER, TaskCoordination );
+	getLeader.makeWellKnownEndpoint( WLTOKEN_CLIENTLEADERREG_GETLEADER, TaskPriority::Coordination );
 }

 // Nominee is the worker among all workers that are considered as leader by a coordinator
@ -380,7 +380,7 @@ ClientLeaderRegInterface::ClientLeaderRegInterface( INetwork* local ) {
 ACTOR Future<Void> monitorNominee( Key key, ClientLeaderRegInterface coord, AsyncTrigger* nomineeChange, Optional<LeaderInfo> *info, int generation, Reference<AsyncVar<int>> connectedCoordinatorsNum ) {
 	state bool hasCounted = false;
 	loop {
-		state Optional<LeaderInfo> li = wait( retryBrokenPromise( coord.getLeader, GetLeaderRequest( key, info->present() ? info->get().changeID : UID() ), TaskCoordinationReply ) );
+		state Optional<LeaderInfo> li = wait( retryBrokenPromise( coord.getLeader, GetLeaderRequest( key, info->present() ? info->get().changeID : UID() ), TaskPriority::CoordinationReply ) );
 		if (li.present() && !hasCounted && connectedCoordinatorsNum.isValid()) {
 			connectedCoordinatorsNum->set(connectedCoordinatorsNum->get() + 1);
 			hasCounted = true;
--- a/fdbclient/MultiVersionTransaction.actor.cpp
+++ b/fdbclient/MultiVersionTransaction.actor.cpp
@ -588,7 +588,20 @@ ThreadFuture<Void> MultiVersionTransaction::onError(Error const& e) {
 	else {
 		auto tr = getTransaction();
 		auto f = tr.transaction ? tr.transaction->onError(e) : ThreadFuture<Void>(Never());
-		return abortableFuture(f, tr.onChange);
+		f = abortableFuture(f, tr.onChange);
+
+		return flatMapThreadFuture<Void, Void>(f, [this, e](ErrorOr<Void> ready) {
+			if(!ready.isError() || ready.getError().code() != error_code_cluster_version_changed) {
+				if(ready.isError()) {
+					return ErrorOr<ThreadFuture<Void>>(ready.getError());
+				}
+
+				return ErrorOr<ThreadFuture<Void>>(Void());
+			}
+
+			updateTransaction();
+			return ErrorOr<ThreadFuture<Void>>(onError(e));
+		});
 	}
 }

--- a/fdbclient/NativeAPI.actor.cpp
+++ b/fdbclient/NativeAPI.actor.cpp
@ -208,24 +208,18 @@ template <> void addref( DatabaseContext* ptr ) { ptr->addref(); }
 template <> void delref( DatabaseContext* ptr ) { ptr->delref(); }

 ACTOR Future<Void> databaseLogger( DatabaseContext *cx ) {
+	state double lastLogged = 0;
 	loop {
-		wait( delay( CLIENT_KNOBS->SYSTEM_MONITOR_INTERVAL, cx->taskID ) );
-		TraceEvent("TransactionMetrics")
+		wait(delay(CLIENT_KNOBS->SYSTEM_MONITOR_INTERVAL, cx->taskID));
+		TraceEvent ev("TransactionMetrics", cx->dbId);
+
+		ev.detail("Elapsed", (lastLogged == 0) ? 0 : now() - lastLogged)
 			.detail("Cluster", cx->cluster && cx->getConnectionFile() ? cx->getConnectionFile()->getConnectionString().clusterKeyName().toString() : "")
-			.detail("ReadVersions", cx->transactionReadVersions)
-			.detail("LogicalUncachedReads", cx->transactionLogicalReads)
-			.detail("PhysicalReadRequests", cx->transactionPhysicalReads)
-			.detail("CommittedMutations", cx->transactionCommittedMutations)
-			.detail("CommittedMutationBytes", cx->transactionCommittedMutationBytes)
-			.detail("CommitStarted", cx->transactionsCommitStarted)
-			.detail("CommitCompleted", cx->transactionsCommitCompleted)
-			.detail("TooOld", cx->transactionsTooOld)
-			.detail("FutureVersions", cx->transactionsFutureVersions)
-			.detail("NotCommitted", cx->transactionsNotCommitted)
-			.detail("MaybeCommitted", cx->transactionsMaybeCommitted)
-			.detail("ResourceConstrained", cx->transactionsResourceConstrained)
-			.detail("ProcessBehind", cx->transactionsProcessBehind)
-			.detail("MeanLatency", cx->latencies.mean())
+			.detail("Internal", cx->internal);
+
+		cx->cc.logToTraceEvent(ev);
+
+		ev.detail("MeanLatency", cx->latencies.mean())
 			.detail("MedianLatency", cx->latencies.median())
 			.detail("Latency90", cx->latencies.percentile(0.90))
 			.detail("Latency98", cx->latencies.percentile(0.98))
@ -245,12 +239,15 @@ ACTOR Future<Void> databaseLogger( DatabaseContext *cx ) {
 			.detail("MeanBytesPerCommit", cx->bytesPerCommit.mean())
 			.detail("MedianBytesPerCommit", cx->bytesPerCommit.median())
 			.detail("MaxBytesPerCommit", cx->bytesPerCommit.max());
+
 		cx->latencies.clear();
 		cx->readLatencies.clear();
 		cx->GRVLatencies.clear();
 		cx->commitLatencies.clear();
 		cx->mutationsPerCommit.clear();
 		cx->bytesPerCommit.clear();
+
+		lastLogged = now();
 	}
 }

@ -508,18 +505,21 @@ ACTOR static Future<HealthMetrics> getHealthMetricsActor(DatabaseContext *cx, bo
 Future<HealthMetrics> DatabaseContext::getHealthMetrics(bool detailed = false) {
 	return getHealthMetricsActor(this, detailed);
 }
-
 DatabaseContext::DatabaseContext(
-	Reference<Cluster> cluster, Reference<AsyncVar<ClientDBInfo>> clientInfo, Future<Void> clientInfoMonitor, Standalone<StringRef> dbId, 
-	int taskID, LocalityData const& clientLocality, bool enableLocalityLoadBalance, bool lockAware, int apiVersion ) 
-	: cluster(cluster), clientInfo(clientInfo), clientInfoMonitor(clientInfoMonitor), dbId(dbId), taskID(taskID), clientLocality(clientLocality), enableLocalityLoadBalance(enableLocalityLoadBalance),
-	lockAware(lockAware), apiVersion(apiVersion), provisional(false),
-	transactionReadVersions(0), transactionLogicalReads(0), transactionPhysicalReads(0), transactionCommittedMutations(0), transactionCommittedMutationBytes(0), 
-	transactionsCommitStarted(0), transactionsCommitCompleted(0), transactionsTooOld(0), transactionsFutureVersions(0), transactionsNotCommitted(0), 
-	transactionsMaybeCommitted(0), transactionsResourceConstrained(0), transactionsProcessBehind(0), outstandingWatches(0), transactionTimeout(0.0), transactionMaxRetries(-1),
+	Reference<Cluster> cluster, Reference<AsyncVar<ClientDBInfo>> clientInfo, Future<Void> clientInfoMonitor,
+	TaskPriority taskID, LocalityData const& clientLocality, bool enableLocalityLoadBalance, bool lockAware, bool internal, int apiVersion ) 
+	: cluster(cluster), clientInfo(clientInfo), clientInfoMonitor(clientInfoMonitor), taskID(taskID), clientLocality(clientLocality), enableLocalityLoadBalance(enableLocalityLoadBalance),
+	lockAware(lockAware), apiVersion(apiVersion), provisional(false), cc("TransactionMetrics"),
+	transactionReadVersions("ReadVersions", cc), transactionLogicalReads("LogicalUncachedReads", cc), transactionPhysicalReads("PhysicalReadRequests", cc), 
+	transactionCommittedMutations("CommittedMutations", cc), transactionCommittedMutationBytes("CommittedMutationBytes", cc), transactionsCommitStarted("CommitStarted", cc), 
+	transactionsCommitCompleted("CommitCompleted", cc), transactionsTooOld("TooOld", cc), transactionsFutureVersions("FutureVersions", cc), 
+	transactionsNotCommitted("NotCommitted", cc), transactionsMaybeCommitted("MaybeCommitted", cc), transactionsResourceConstrained("ResourceConstrained", cc), 
+	transactionsProcessBehind("ProcessBehind", cc), transactionWaitsForFullRecovery("WaitsForFullRecovery", cc), outstandingWatches(0), transactionTimeout(0.0), transactionMaxRetries(-1),
 	latencies(1000), readLatencies(1000), commitLatencies(1000), GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000), mvCacheInsertLocation(0),
-	healthMetricsLastUpdated(0), detailedHealthMetricsLastUpdated(0)
+	healthMetricsLastUpdated(0), detailedHealthMetricsLastUpdated(0), internal(internal)
 {
+	dbId = deterministicRandom()->randomUniqueID();
+
 	metadataVersionCache.resize(CLIENT_KNOBS->METADATA_VERSION_CACHE_SIZE);
 	maxOutstandingWatches = CLIENT_KNOBS->DEFAULT_MAX_OUTSTANDING_WATCHES;

@ -539,7 +539,14 @@ DatabaseContext::DatabaseContext(
 	clientStatusUpdater.actor = clientStatusUpdateActor(this);
 }

-DatabaseContext::DatabaseContext( const Error &err ) : deferredError(err), latencies(1000), readLatencies(1000), commitLatencies(1000), GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000) {}
+DatabaseContext::DatabaseContext( const Error &err ) : deferredError(err), cc("TransactionMetrics"),
+	transactionReadVersions("ReadVersions", cc), transactionLogicalReads("LogicalUncachedReads", cc), transactionPhysicalReads("PhysicalReadRequests", cc), 
+	transactionCommittedMutations("CommittedMutations", cc), transactionCommittedMutationBytes("CommittedMutationBytes", cc), transactionsCommitStarted("CommitStarted", cc), 
+	transactionsCommitCompleted("CommitCompleted", cc), transactionsTooOld("TooOld", cc), transactionsFutureVersions("FutureVersions", cc), 
+	transactionsNotCommitted("NotCommitted", cc), transactionsMaybeCommitted("MaybeCommitted", cc), transactionsResourceConstrained("ResourceConstrained", cc), 
+	transactionsProcessBehind("ProcessBehind", cc), transactionWaitsForFullRecovery("WaitsForFullRecovery", cc), latencies(1000), readLatencies(1000), commitLatencies(1000), 
+	GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000), 
+	internal(false) {}

 ACTOR static Future<Void> monitorClientInfo( Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface, Reference<ClusterConnectionFile> ccf, Reference<AsyncVar<ClientDBInfo>> outInfo, Reference<AsyncVar<int>> connectedCoordinatorsNumDelayed ) {
 	try {
@ -592,13 +599,13 @@ ACTOR static Future<Void> monitorClientInfo( Reference<AsyncVar<Optional<Cluster
 						}

 						onProxyFailureVec.push_back(
-						    IFailureMonitor::failureMonitor().onDisconnectOrFailure(
-						        proxy.getConsistentReadVersion.getEndpoint()) ||
-						    IFailureMonitor::failureMonitor().onDisconnectOrFailure(proxy.commit.getEndpoint()) ||
-						    IFailureMonitor::failureMonitor().onDisconnectOrFailure(
-						        proxy.getKeyServersLocations.getEndpoint()) ||
-						    IFailureMonitor::failureMonitor().onDisconnectOrFailure(
-						        proxy.getStorageServerRejoinInfo.getEndpoint()));
+						    IFailureMonitor::failureMonitor().onStateEqual(
+						        proxy.getConsistentReadVersion.getEndpoint(), FailureStatus()) ||
+						    IFailureMonitor::failureMonitor().onStateEqual(proxy.commit.getEndpoint(), FailureStatus()) ||
+						    IFailureMonitor::failureMonitor().onStateEqual(
+						        proxy.getKeyServersLocations.getEndpoint(), FailureStatus()) ||
+						    IFailureMonitor::failureMonitor().onStateEqual(
+						        proxy.getStorageServerRejoinInfo.getEndpoint(), FailureStatus()));
 					}
 					if (skipWaitForProxyFail) continue;

@ -632,11 +639,11 @@ Database DatabaseContext::create(Reference<AsyncVar<Optional<ClusterInterface>>>
 	Reference<AsyncVar<ClientDBInfo>> clientInfo(new AsyncVar<ClientDBInfo>());
 	Future<Void> clientInfoMonitor = delayedAsyncVar(connectedCoordinatorsNum, connectedCoordinatorsNumDelayed, CLIENT_KNOBS->CHECK_CONNECTED_COORDINATOR_NUM_DELAY) || monitorClientInfo(clusterInterface, connFile, clientInfo, connectedCoordinatorsNumDelayed);

-	return Database(new DatabaseContext(cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false));
+	return Database(new DatabaseContext(cluster, clientInfo, clientInfoMonitor, TaskPriority::DefaultEndpoint, clientLocality, true, false, true));
 }

-Database DatabaseContext::create(Reference<AsyncVar<ClientDBInfo>> clientInfo, Future<Void> clientInfoMonitor, LocalityData clientLocality, bool enableLocalityLoadBalance, int taskID, bool lockAware, int apiVersion) {
-	return Database( new DatabaseContext( Reference<Cluster>(nullptr), clientInfo, clientInfoMonitor, LiteralStringRef(""), taskID, clientLocality, enableLocalityLoadBalance, lockAware, apiVersion ) );
+Database DatabaseContext::create(Reference<AsyncVar<ClientDBInfo>> clientInfo, Future<Void> clientInfoMonitor, LocalityData clientLocality, bool enableLocalityLoadBalance, TaskPriority taskID, bool lockAware, int apiVersion) {
+	return Database( new DatabaseContext( Reference<Cluster>(nullptr), clientInfo, clientInfoMonitor, taskID, clientLocality, enableLocalityLoadBalance, lockAware, true, apiVersion ) );
 }

 DatabaseContext::~DatabaseContext() {
@ -816,7 +823,7 @@ Reference<ClusterConnectionFile> DatabaseContext::getConnectionFile() {
 	return cluster->getConnectionFile();
 }

-Database Database::createDatabase( Reference<ClusterConnectionFile> connFile, int apiVersion, LocalityData const& clientLocality, DatabaseContext *preallocatedDb ) {
+Database Database::createDatabase( Reference<ClusterConnectionFile> connFile, int apiVersion, bool internal, LocalityData const& clientLocality, DatabaseContext *preallocatedDb ) {
 	Reference<AsyncVar<int>> connectedCoordinatorsNum(new AsyncVar<int>(0)); // Number of connected coordinators for the client
 	Reference<AsyncVar<int>> connectedCoordinatorsNumDelayed(new AsyncVar<int>(0));
 	Reference<Cluster> cluster(new Cluster(connFile, connectedCoordinatorsNum, apiVersion));
@ -825,18 +832,18 @@ Database Database::createDatabase( Reference<ClusterConnectionFile> connFile, in

 	DatabaseContext *db;
 	if(preallocatedDb) {
-		db = new (preallocatedDb) DatabaseContext(cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false, apiVersion);
+		db = new (preallocatedDb) DatabaseContext(cluster, clientInfo, clientInfoMonitor, TaskPriority::DefaultEndpoint, clientLocality, true, false, internal, apiVersion);
 	}
 	else {
-		db = new DatabaseContext(cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false, apiVersion);
+		db = new DatabaseContext(cluster, clientInfo, clientInfoMonitor, TaskPriority::DefaultEndpoint, clientLocality, true, false, internal, apiVersion);
 	}

 	return Database(db);
 }

-Database Database::createDatabase( std::string connFileName, int apiVersion, LocalityData const& clientLocality ) {
+Database Database::createDatabase( std::string connFileName, int apiVersion, bool internal, LocalityData const& clientLocality ) {
 	Reference<ClusterConnectionFile> rccf = Reference<ClusterConnectionFile>(new ClusterConnectionFile(ClusterConnectionFile::lookupClusterFileName(connFileName).first));
-	return Database::createDatabase(rccf, apiVersion, clientLocality);
+	return Database::createDatabase(rccf, apiVersion, internal, clientLocality);
 }

 extern IPAddress determinePublicIPAutomatically(ClusterConnectionString const& ccs);
@ -884,7 +891,7 @@ void Cluster::init( Reference<ClusterConnectionFile> connFile, bool startClientI
 			initializeSystemMonitorMachineState(SystemMonitorMachineState(IPAddress(publicIP)));

 			systemMonitor();
-			uncancellable( recurring( &systemMonitor, CLIENT_KNOBS->SYSTEM_MONITOR_INTERVAL, TaskFlushTrace ) );
+			uncancellable( recurring( &systemMonitor, CLIENT_KNOBS->SYSTEM_MONITOR_INTERVAL, TaskPriority::FlushTrace ) );
 		}

 		failMon = failureMonitorClient( clusterInterface, false );
@ -1058,7 +1065,7 @@ void setupNetwork(uint64_t transportId, bool useMetrics) {
 		networkOptions.logClientInfo = true;

 	g_network = newNet2(false, useMetrics || networkOptions.traceDirectory.present(), networkOptions.useObjectSerializer);
-	FlowTransport::createInstance(transportId);
+	FlowTransport::createInstance(true, transportId);
 	Net2FileSystem::newFileSystem();

 	initTLSOptions();
@ -1240,7 +1247,7 @@ ACTOR Future< pair<KeyRange,Reference<LocationInfo>> > getKeyLocation_internal(
 	loop {
 		choose {
 			when ( wait( cx->onMasterProxiesChanged() ) ) {}
-			when ( GetKeyServerLocationsReply rep = wait( loadBalance( cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::getKeyServersLocations, GetKeyServerLocationsRequest(key, Optional<KeyRef>(), 100, isBackward, key.arena()), TaskDefaultPromiseEndpoint ) ) ) {
+			when ( GetKeyServerLocationsReply rep = wait( loadBalance( cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::getKeyServersLocations, GetKeyServerLocationsRequest(key, Optional<KeyRef>(), 100, isBackward, key.arena()), TaskPriority::DefaultPromiseEndpoint ) ) ) {
 				if( info.debugID.present() )
 					g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getKeyLocation.After");
 				ASSERT( rep.results.size() == 1 );
@ -1277,7 +1284,7 @@ ACTOR Future< vector< pair<KeyRange,Reference<LocationInfo>> > > getKeyRangeLoca
 	loop {
 		choose {
 			when ( wait( cx->onMasterProxiesChanged() ) ) {}
-			when ( GetKeyServerLocationsReply _rep = wait( loadBalance( cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::getKeyServersLocations, GetKeyServerLocationsRequest(keys.begin, keys.end, limit, reverse, keys.arena()), TaskDefaultPromiseEndpoint ) ) ) {
+			when ( GetKeyServerLocationsReply _rep = wait( loadBalance( cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::getKeyServersLocations, GetKeyServerLocationsRequest(keys.begin, keys.end, limit, reverse, keys.arena()), TaskPriority::DefaultPromiseEndpoint ) ) ) {
 				state GetKeyServerLocationsReply rep = _rep;
 				if( info.debugID.present() )
 					g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getKeyLocations.After");
@ -1398,7 +1405,7 @@ ACTOR Future<Optional<Value>> getValue( Future<Version> version, Key key, Databa
 			}
 			state GetValueReply reply = wait(
 			    loadBalance(ssi.second, &StorageServerInterface::getValue, GetValueRequest(key, ver, getValueID),
-			                TaskDefaultPromiseEndpoint, false, cx->enableLocalityLoadBalance ? &cx->queueModel : NULL));
+			                TaskPriority::DefaultPromiseEndpoint, false, cx->enableLocalityLoadBalance ? &cx->queueModel : NULL));
 			double latency = now() - startTimeD;
 			cx->readLatencies.addSample(latency);
 			if (trLogInfo) {
@ -1461,7 +1468,7 @@ ACTOR Future<Key> getKey( Database cx, KeySelector k, Future<Version> version, T
 			if( info.debugID.present() )
 				g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getKey.Before"); //.detail("StartKey", k.getKey()).detail("Offset",k.offset).detail("OrEqual",k.orEqual);
 			++cx->transactionPhysicalReads;
-			GetKeyReply reply = wait( loadBalance( ssi.second, &StorageServerInterface::getKey, GetKeyRequest(k, version.get()), TaskDefaultPromiseEndpoint, false, cx->enableLocalityLoadBalance ? &cx->queueModel : NULL ) );
+			GetKeyReply reply = wait( loadBalance( ssi.second, &StorageServerInterface::getKey, GetKeyRequest(k, version.get()), TaskPriority::DefaultPromiseEndpoint, false, cx->enableLocalityLoadBalance ? &cx->queueModel : NULL ) );
 			if( info.debugID.present() )
 				g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getKey.After"); //.detail("NextKey",reply.sel.key).detail("Offset", reply.sel.offset).detail("OrEqual", k.orEqual);
 			k = reply.sel;
@ -1524,7 +1531,7 @@ ACTOR Future< Void > watchValue( Future<Version> version, Key key, Optional<Valu
 				g_traceBatch.addAttach("WatchValueAttachID", info.debugID.get().first(), watchValueID.get().first());
 				g_traceBatch.addEvent("WatchValueDebug", watchValueID.get().first(), "NativeAPI.watchValue.Before"); //.detail("TaskID", g_network->getCurrentTask());
 			}
-			state Version resp = wait( loadBalance( ssi.second, &StorageServerInterface::watchValue, WatchValueRequest(key, value, ver, watchValueID), TaskDefaultPromiseEndpoint ) );
+			state Version resp = wait( loadBalance( ssi.second, &StorageServerInterface::watchValue, WatchValueRequest(key, value, ver, watchValueID), TaskPriority::DefaultPromiseEndpoint ) );
 			if( info.debugID.present() ) {
 				g_traceBatch.addEvent("WatchValueDebug", watchValueID.get().first(), "NativeAPI.watchValue.After"); //.detail("TaskID", g_network->getCurrentTask());
 			}
@ -1616,7 +1623,7 @@ ACTOR Future<Standalone<RangeResultRef>> getExactRange( Database cx, Version ver
 						.detail("Servers", locations[shard].second->description());*/
 				}
 				++cx->transactionPhysicalReads;
-				GetKeyValuesReply rep = wait( loadBalance( locations[shard].second, &StorageServerInterface::getKeyValues, req, TaskDefaultPromiseEndpoint, false, cx->enableLocalityLoadBalance ? &cx->queueModel : NULL ) );
+				GetKeyValuesReply rep = wait( loadBalance( locations[shard].second, &StorageServerInterface::getKeyValues, req, TaskPriority::DefaultPromiseEndpoint, false, cx->enableLocalityLoadBalance ? &cx->queueModel : NULL ) );
 				if( info.debugID.present() )
 					g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getExactRange.After");
 				output.arena().dependsOn( rep.arena );
@ -1893,7 +1900,7 @@ ACTOR Future<Standalone<RangeResultRef>> getRange( Database cx, Reference<Transa
 							transaction_too_old(), future_version()
 								});
 				}
-				GetKeyValuesReply rep = wait( loadBalance(beginServer.second, &StorageServerInterface::getKeyValues, req, TaskDefaultPromiseEndpoint, false, cx->enableLocalityLoadBalance ? &cx->queueModel : NULL ) );
+				GetKeyValuesReply rep = wait( loadBalance(beginServer.second, &StorageServerInterface::getKeyValues, req, TaskPriority::DefaultPromiseEndpoint, false, cx->enableLocalityLoadBalance ? &cx->queueModel : NULL ) );

 				if( info.debugID.present() ) {
 					g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getRange.After");//.detail("SizeOf", rep.data.size());
@ -2694,7 +2701,7 @@ ACTOR static Future<Void> tryCommit( Database cx, Reference<TransactionLogInfo>
 			const std::vector<MasterProxyInterface>& proxies = cx->clientInfo->get().proxies;
 			reply = proxies.size() ? throwErrorOr ( brokenPromiseToMaybeDelivered ( proxies[0].commit.tryGetReply(req) ) ) : Never();
 		} else {
-			reply = loadBalance( cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::commit, req, TaskDefaultPromiseEndpoint, true );
+			reply = loadBalance( cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::commit, req, TaskPriority::DefaultPromiseEndpoint, true );
 		}

 		choose {
@ -2718,7 +2725,7 @@ ACTOR static Future<Void> tryCommit( Database cx, Reference<TransactionLogInfo>
 					tr->versionstampPromise.send(ret);

 					tr->numErrors = 0;
-					cx->transactionsCommitCompleted++;
+					++cx->transactionsCommitCompleted;
 					cx->transactionCommittedMutations += req.transaction.mutations.size();
 					cx->transactionCommittedMutationBytes += req.transaction.mutations.expectedSize();

@ -2793,7 +2800,7 @@ Future<Void> Transaction::commitMutations() {
 			return Void();
 		}

-		cx->transactionsCommitStarted++;
+		++cx->transactionsCommitStarted;

 		if(options.readOnly)
 			return transaction_read_only();
@ -3074,7 +3081,7 @@ ACTOR Future<Void> readVersionBatcher( DatabaseContext *cx, FutureStream< std::p
 				if (requests.size() == CLIENT_KNOBS->MAX_BATCH_SIZE)
 					send_batch = true;
 				else if (!timeout.isValid())
-					timeout = delay(batchTime, TaskProxyGetConsistentReadVersion);
+					timeout = delay(batchTime, TaskPriority::ProxyGetConsistentReadVersion);
 			}
 			when(wait(timeout.isValid() ? timeout : Never())) {
 				send_batch = true;
@ -3126,7 +3133,7 @@ ACTOR Future<Version> extractReadVersion(DatabaseContext* cx, Reference<Transact
 }

 Future<Version> Transaction::getReadVersion(uint32_t flags) {
-	cx->transactionReadVersions++;
+	++cx->transactionReadVersions;
 	flags |= options.getReadVersionFlags;

 	auto& batcher = cx->versionBatcher[ flags ];
@ -3162,15 +3169,15 @@ Future<Void> Transaction::onError( Error const& e ) {
 		e.code() == error_code_cluster_not_fully_recovered)
 	{
 		if(e.code() == error_code_not_committed)
-			cx->transactionsNotCommitted++;
+			++cx->transactionsNotCommitted;
 		if(e.code() == error_code_commit_unknown_result)
-			cx->transactionsMaybeCommitted++;
+			++cx->transactionsMaybeCommitted;
 		if (e.code() == error_code_proxy_memory_limit_exceeded)
-			cx->transactionsResourceConstrained++;
+			++cx->transactionsResourceConstrained;
 		if (e.code() == error_code_process_behind)
-			cx->transactionsProcessBehind++;
+			++cx->transactionsProcessBehind;
 		if (e.code() == error_code_cluster_not_fully_recovered) {
-			cx->transactionWaitsForFullRecovery++;
+			++cx->transactionWaitsForFullRecovery;
 		}

 		double backoff = getBackoff(e.code());
@ -3181,9 +3188,9 @@ Future<Void> Transaction::onError( Error const& e ) {
 		e.code() == error_code_future_version)
 	{
 		if( e.code() == error_code_transaction_too_old )
-			cx->transactionsTooOld++;
+			++cx->transactionsTooOld;
 		else if( e.code() == error_code_future_version )
-			cx->transactionsFutureVersions++;
+			++cx->transactionsFutureVersions;

 		double maxBackoff = options.maxBackoff;
 		reset();
@ -3235,7 +3242,7 @@ ACTOR Future< StorageMetrics > waitStorageMetricsMultipleLocations(
 		WaitMetricsRequest req(locations[i].first, StorageMetrics(), StorageMetrics());
 		req.min.bytes = 0;
 		req.max.bytes = -1;
-		fx[i] = loadBalance( locations[i].second, &StorageServerInterface::waitMetrics, req, TaskDataDistribution );
+		fx[i] = loadBalance( locations[i].second, &StorageServerInterface::waitMetrics, req, TaskPriority::DataDistribution );
 	}
 	wait( waitForAll(fx) );

@ -3266,7 +3273,7 @@ ACTOR Future< StorageMetrics > waitStorageMetrics(
 	int shardLimit )
 {
 	loop {
-		vector< pair<KeyRange, Reference<LocationInfo>> > locations = wait( getKeyRangeLocations( cx, keys, shardLimit, false, &StorageServerInterface::waitMetrics, TransactionInfo(TaskDataDistribution) ) );
+		vector< pair<KeyRange, Reference<LocationInfo>> > locations = wait( getKeyRangeLocations( cx, keys, shardLimit, false, &StorageServerInterface::waitMetrics, TransactionInfo(TaskPriority::DataDistribution) ) );

 		//SOMEDAY: Right now, if there are too many shards we delay and check again later. There may be a better solution to this.
 		if(locations.size() < shardLimit) {
@ -3276,7 +3283,7 @@ ACTOR Future< StorageMetrics > waitStorageMetrics(
 					fx = waitStorageMetricsMultipleLocations( locations, min, max, permittedError );
 				} else {
 					WaitMetricsRequest req( keys, min, max );
-					fx = loadBalance( locations[0].second, &StorageServerInterface::waitMetrics, req, TaskDataDistribution );
+					fx = loadBalance( locations[0].second, &StorageServerInterface::waitMetrics, req, TaskPriority::DataDistribution );
 				}
 				StorageMetrics x = wait(fx);
 				return x;
@ -3286,14 +3293,14 @@ ACTOR Future< StorageMetrics > waitStorageMetrics(
 					throw;
 				}
 				cx->invalidateCache(keys);
-				wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskDataDistribution));
+				wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskPriority::DataDistribution));
 			}
 		} else {
 			TraceEvent(SevWarn, "WaitStorageMetricsPenalty")
 				.detail("Keys", keys)
 				.detail("Limit", CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT)
 				.detail("JitteredSecondsOfPenitence", CLIENT_KNOBS->STORAGE_METRICS_TOO_MANY_SHARDS_DELAY);
-			wait(delayJittered(CLIENT_KNOBS->STORAGE_METRICS_TOO_MANY_SHARDS_DELAY, TaskDataDistribution));
+			wait(delayJittered(CLIENT_KNOBS->STORAGE_METRICS_TOO_MANY_SHARDS_DELAY, TaskPriority::DataDistribution));
 			// make sure that the next getKeyRangeLocations() call will actually re-fetch the range
 			cx->invalidateCache( keys );
 		}
@ -3319,13 +3326,13 @@ Future< StorageMetrics > Transaction::getStorageMetrics( KeyRange const& keys, i
 ACTOR Future< Standalone<VectorRef<KeyRef>> > splitStorageMetrics( Database cx, KeyRange keys, StorageMetrics limit, StorageMetrics estimated )
 {
 	loop {
-		state vector< pair<KeyRange, Reference<LocationInfo>> > locations = wait( getKeyRangeLocations( cx, keys, CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT, false, &StorageServerInterface::splitMetrics, TransactionInfo(TaskDataDistribution) ) );
+		state vector< pair<KeyRange, Reference<LocationInfo>> > locations = wait( getKeyRangeLocations( cx, keys, CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT, false, &StorageServerInterface::splitMetrics, TransactionInfo(TaskPriority::DataDistribution) ) );
 		state StorageMetrics used;
 		state Standalone<VectorRef<KeyRef>> results;

 		//SOMEDAY: Right now, if there are too many shards we delay and check again later. There may be a better solution to this.
 		if(locations.size() == CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT) {
-			wait(delay(CLIENT_KNOBS->STORAGE_METRICS_TOO_MANY_SHARDS_DELAY, TaskDataDistribution));
+			wait(delay(CLIENT_KNOBS->STORAGE_METRICS_TOO_MANY_SHARDS_DELAY, TaskPriority::DataDistribution));
 			cx->invalidateCache(keys);
 		}
 		else {
@ -3336,7 +3343,7 @@ ACTOR Future< Standalone<VectorRef<KeyRef>> > splitStorageMetrics( Database cx,
 				state int i = 0;
 				for(; i<locations.size(); i++) {
 					SplitMetricsRequest req( locations[i].first, limit, used, estimated, i == locations.size() - 1 );
-					SplitMetricsReply res = wait( loadBalance( locations[i].second, &StorageServerInterface::splitMetrics, req, TaskDataDistribution ) );
+					SplitMetricsReply res = wait( loadBalance( locations[i].second, &StorageServerInterface::splitMetrics, req, TaskPriority::DataDistribution ) );
 					if( res.splits.size() && res.splits[0] <= results.back() ) { // split points are out of order, possibly because of moving data, throw error to retry
 						ASSERT_WE_THINK(false);   // FIXME: This seems impossible and doesn't seem to be covered by testing
 						throw all_alternatives_failed();
@ -3362,7 +3369,7 @@ ACTOR Future< Standalone<VectorRef<KeyRef>> > splitStorageMetrics( Database cx,
 					throw;
 				}
 				cx->invalidateCache( keys );
-				wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskDataDistribution));
+				wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskPriority::DataDistribution));
 			}
 		}
 	}
--- a/fdbclient/NativeAPI.actor.h
+++ b/fdbclient/NativeAPI.actor.h
@ -74,8 +74,8 @@ class Database {
 public:
 	enum { API_VERSION_LATEST = -1 };

-	static Database createDatabase( Reference<ClusterConnectionFile> connFile, int apiVersion, LocalityData const& clientLocality=LocalityData(), DatabaseContext *preallocatedDb=nullptr );
-	static Database createDatabase( std::string connFileName, int apiVersion, LocalityData const& clientLocality=LocalityData() ); 
+	static Database createDatabase( Reference<ClusterConnectionFile> connFile, int apiVersion, bool internal=true, LocalityData const& clientLocality=LocalityData(), DatabaseContext *preallocatedDb=nullptr );
+	static Database createDatabase( std::string connFileName, int apiVersion, bool internal=true, LocalityData const& clientLocality=LocalityData() ); 

 	Database() {}  // an uninitialized database can be destructed or reassigned safely; that's it
 	void operator= ( Database const& rhs ) { db = rhs.db; }
@ -163,10 +163,10 @@ struct TransactionOptions {

 struct TransactionInfo {
 	Optional<UID> debugID;
-	int taskID;
+	TaskPriority taskID;
 	bool useProvisionalProxies;

-	explicit TransactionInfo( int taskID ) : taskID(taskID), useProvisionalProxies(false) {}
+	explicit TransactionInfo( TaskPriority taskID ) : taskID(taskID), useProvisionalProxies(false) {}
 };

 struct TransactionLogInfo : public ReferenceCounted<TransactionLogInfo>, NonCopyable {
@ -286,7 +286,7 @@ public:
 	void flushTrLogsIfEnabled();

 	// These are to permit use as state variables in actors:
-	Transaction() : info( TaskDefaultEndpoint ) {}
+	Transaction() : info( TaskPriority::DefaultEndpoint ) {}
 	void operator=(Transaction&& r) BOOST_NOEXCEPT;

 	void reset();
--- a/fdbclient/Schemas.cpp
+++ b/fdbclient/Schemas.cpp
@ -250,8 +250,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
                  "storage_server_min_free_space",
                  "storage_server_min_free_space_ratio",
                  "log_server_min_free_space",
-                  "log_server_min_free_space_ratio",
-                  "storage_server_read_load"
+                  "log_server_min_free_space_ratio"
               ]
            },
            "description":"The database is not being saturated by the workload."
@ -270,8 +269,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
                  "storage_server_min_free_space",
                  "storage_server_min_free_space_ratio",
                  "log_server_min_free_space",
-                  "log_server_min_free_space_ratio",
-                  "storage_server_read_load"
+                  "log_server_min_free_space_ratio"
               ]
            },
            "description":"The database is not being saturated by the workload."
@ -288,7 +286,10 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
      "incompatible_connections":[

      ],
-      "datacenter_version_difference":0,
+      "datacenter_lag": {
+         "seconds" : 1.0,
+         "versions" : 1000000
+      },
      "degraded_processes":0,
      "database_available":true,
      "database_locked":false,
--- a/fdbclient/StatusClient.actor.cpp
+++ b/fdbclient/StatusClient.actor.cpp
@ -291,7 +291,7 @@ ACTOR Future<Optional<StatusObject>> clientCoordinatorsStatusFetcher(Reference<C

 		state vector<Future<Optional<LeaderInfo>>> leaderServers;
 		for (int i = 0; i < coord.clientLeaderServers.size(); i++)
-			leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader, GetLeaderRequest(coord.clusterKey, UID()), TaskCoordinationReply));
+			leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader, GetLeaderRequest(coord.clusterKey, UID()), TaskPriority::CoordinationReply));

 		wait( smartQuorum(leaderServers, leaderServers.size() / 2 + 1, 1.5) || delay(2.0) );

--- a/fdbclient/StorageServerInterface.h
+++ b/fdbclient/StorageServerInterface.h
@ -80,9 +80,9 @@ struct StorageServerInterface {
 	bool operator == (StorageServerInterface const& s) const { return uniqueID == s.uniqueID; }
 	bool operator < (StorageServerInterface const& s) const { return uniqueID < s.uniqueID; }
 	void initEndpoints() {
-		getValue.getEndpoint( TaskLoadBalancedEndpoint );
-		getKey.getEndpoint( TaskLoadBalancedEndpoint );
-		getKeyValues.getEndpoint( TaskLoadBalancedEndpoint );
+		getValue.getEndpoint( TaskPriority::LoadBalancedEndpoint );
+		getKey.getEndpoint( TaskPriority::LoadBalancedEndpoint );
+		getKeyValues.getEndpoint( TaskPriority::LoadBalancedEndpoint );
 	}
 };

--- a/fdbclient/ThreadSafeTransaction.actor.cpp
+++ b/fdbclient/ThreadSafeTransaction.actor.cpp
@ -68,7 +68,7 @@ ThreadSafeDatabase::ThreadSafeDatabase(std::string connFilename, int apiVersion)

 	onMainThreadVoid([db, connFile, apiVersion](){ 
 		try {
-			Database::createDatabase(connFile, apiVersion, LocalityData(), db).extractPtr();
+			Database::createDatabase(connFile, apiVersion, false, LocalityData(), db).extractPtr();
 		}
 		catch(Error &e) {
 			new (db) DatabaseContext(e);
--- a/fdbclient/VersionedMap.actor.h
+++ b/fdbclient/VersionedMap.actor.h
@ -31,7 +31,7 @@
 #include "flow/actorcompiler.h"  // This must be the last #include.

 ACTOR template <class Tree>
-Future<Void> deferredCleanupActor( std::vector<Tree> toFree, int taskID = 7000 ) {
+Future<Void> deferredCleanupActor( std::vector<Tree> toFree, TaskPriority taskID = TaskPriority::DefaultYield ) {
 	state int freeCount = 0;
 	while (!toFree.empty()) {
 		Tree a = std::move( toFree.back() );
--- a/fdbclient/VersionedMap.h
+++ b/fdbclient/VersionedMap.h
@ -511,7 +511,7 @@ public:
 		oldestVersion = newOldestVersion;
 	}

-	Future<Void> forgetVersionsBeforeAsync( Version newOldestVersion, int taskID = 7000 ) {
+	Future<Void> forgetVersionsBeforeAsync( Version newOldestVersion, TaskPriority taskID = TaskPriority::DefaultYield ) {
 		ASSERT( newOldestVersion <= latestVersion );
 		roots[newOldestVersion] = getRoot(newOldestVersion);

--- a/fdbrpc/AsyncFileEIO.actor.h
+++ b/fdbrpc/AsyncFileEIO.actor.h
@ -266,7 +266,7 @@ private:
 	}

 	ACTOR static Future<int> read_impl( int fd, void* data, int length, int64_t offset ) {
-		state int taskID = g_network->getCurrentTask();
+		state TaskPriority taskID = g_network->getCurrentTask();
 		state Promise<Void> p;
 		//fprintf(stderr, "eio_read (fd=%d length=%d offset=%lld)\n", fd, length, offset);
 		state eio_req* r = eio_read(fd, data, length, offset, 0, eio_callback, &p);
@ -289,7 +289,7 @@ private:
 	}

 	ACTOR static Future<Void> write_impl( int fd, Reference<ErrorInfo> err, StringRef data, int64_t offset ) {
-		state int taskID = g_network->getCurrentTask();
+		state TaskPriority taskID = g_network->getCurrentTask();
 		state Promise<Void> p;
 		state eio_req* r = eio_write(fd, (void*)data.begin(), data.size(), offset, 0, eio_callback, &p);
 		try { wait( p.getFuture() ); } catch (...) { g_network->setCurrentTask( taskID ); eio_cancel(r); throw; }
@ -299,7 +299,7 @@ private:
 	}

 	ACTOR static Future<Void> truncate_impl( int fd, Reference<ErrorInfo> err, int64_t size ) {
-		state int taskID = g_network->getCurrentTask();
+		state TaskPriority taskID = g_network->getCurrentTask();
 		state Promise<Void> p;
 		state eio_req* r = eio_ftruncate(fd, size, 0, eio_callback, &p);
 		try { wait( p.getFuture() ); } catch (...) { g_network->setCurrentTask( taskID ); eio_cancel(r); throw; }
@ -330,7 +330,7 @@ private:
 	}

 	ACTOR static Future<Void> sync_impl( int fd, Reference<ErrorInfo> err, bool sync_metadata=false ) {
-		state int taskID = g_network->getCurrentTask();
+		state TaskPriority taskID = g_network->getCurrentTask();
 		state Promise<Void> p;
 		state eio_req* r = start_fsync( fd, p, sync_metadata );
 		
@ -350,7 +350,7 @@ private:
 	}

 	ACTOR static Future<int64_t> size_impl( int fd ) {
-		state int taskID = g_network->getCurrentTask();
+		state TaskPriority taskID = g_network->getCurrentTask();
 		state Promise<Void> p;
 		state eio_req* r = eio_fstat( fd, 0, eio_callback, &p );
 		try { wait( p.getFuture() ); } catch (...) { g_network->setCurrentTask( taskID ); eio_cancel(r); throw; }
@ -363,7 +363,7 @@ private:
 	}

 	ACTOR static Future<EIO_STRUCT_STAT> stat_impl( std::string filename ) {
-		state int taskID = g_network->getCurrentTask();
+		state TaskPriority taskID = g_network->getCurrentTask();
 		state Promise<Void> p;
 		state EIO_STRUCT_STAT statdata;
 		state eio_req* r = eio_stat( filename.c_str(), 0, eio_callback, &p );
@ -377,7 +377,7 @@ private:

 	ACTOR template <class R> static Future<R> dispatch_impl( std::function<R()> func) {
 		state Dispatch<R> data( func );
-		state int taskID = g_network->getCurrentTask();
+		state TaskPriority taskID = g_network->getCurrentTask();

 		state eio_req* r = eio_custom( [](eio_req* req) {
 			// Runs on the eio thread pool
@ -418,7 +418,7 @@ private:
 	static void eio_want_poll() {
 		want_poll = 1;
 		// SOMEDAY: NULL for deferred error, no analysis of correctness (itp)
-		onMainThreadVoid([](){ poll_eio(); }, NULL, TaskPollEIO);
+		onMainThreadVoid([](){ poll_eio(); }, NULL, TaskPriority::PollEIO);
 	}

 	static int eio_callback( eio_req* req ) {
--- a/fdbrpc/AsyncFileKAIO.actor.h
+++ b/fdbrpc/AsyncFileKAIO.actor.h
@ -472,9 +472,9 @@ private:
 #endif
 		}

-		int getTask() const { return (prio>>32)+1; }
+		TaskPriority getTask() const { return static_cast<TaskPriority>((prio>>32)+1); }

-		ACTOR static void deliver( Promise<int> result, bool failed, int r, int task ) {
+		ACTOR static void deliver( Promise<int> result, bool failed, int r, TaskPriority task ) {
 			wait( delay(0, task) );
 			if (failed) result.sendError(io_timeout());
 			else if (r < 0) result.sendError(io_error());
@ -649,7 +649,7 @@ private:
 		loop {
 			wait(success(ev->read()));

-			wait(delay(0, TaskDiskIOComplete));
+			wait(delay(0, TaskPriority::DiskIOComplete));

 			linux_ioresult ev[FLOW_KNOBS->MAX_OUTSTANDING];
 			timespec tm; tm.tv_sec = 0; tm.tv_nsec = 0;
--- a/fdbrpc/AsyncFileNonDurable.actor.cpp
+++ b/fdbrpc/AsyncFileNonDurable.actor.cpp
@ -23,13 +23,13 @@

 std::map<std::string, Future<Void>> AsyncFileNonDurable::filesBeingDeleted;

-ACTOR Future<Void> sendOnProcess( ISimulator::ProcessInfo* process, Promise<Void> promise, int taskID ) {
+ACTOR Future<Void> sendOnProcess( ISimulator::ProcessInfo* process, Promise<Void> promise, TaskPriority taskID ) {
 	wait( g_simulator.onProcess( process, taskID ) );
 	promise.send(Void());
 	return Void();
 }

-ACTOR Future<Void> sendErrorOnProcess( ISimulator::ProcessInfo* process, Promise<Void> promise, Error e, int taskID ) {
+ACTOR Future<Void> sendErrorOnProcess( ISimulator::ProcessInfo* process, Promise<Void> promise, Error e, TaskPriority taskID ) {
 	wait( g_simulator.onProcess( process, taskID ) );
 	promise.sendError(e);
 	return Void();
--- a/fdbrpc/AsyncFileNonDurable.actor.h
+++ b/fdbrpc/AsyncFileNonDurable.actor.h
@ -38,8 +38,8 @@
 #undef max
 #undef min

-Future<Void> sendOnProcess( ISimulator::ProcessInfo* const& process, Promise<Void> const& promise, int const& taskID );
-Future<Void> sendErrorOnProcess( ISimulator::ProcessInfo* const& process, Promise<Void> const& promise, Error const& e, int const& taskID );
+ACTOR Future<Void> sendOnProcess( ISimulator::ProcessInfo* process, Promise<Void> promise, TaskPriority taskID );
+ACTOR Future<Void> sendErrorOnProcess( ISimulator::ProcessInfo* process, Promise<Void> promise, Error e, TaskPriority taskID );

 ACTOR template <class T> 
 Future<T> sendErrorOnShutdown( Future<T> in ) {
@ -198,7 +198,7 @@ public:
 	//Creates a new AsyncFileNonDurable which wraps the provided IAsyncFile
 	ACTOR static Future<Reference<IAsyncFile>> open(std::string filename, std::string actualFilename, Future<Reference<IAsyncFile>> wrappedFile, Reference<DiskParameters> diskParameters) {
 		state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
-		state int currentTaskID = g_network->getCurrentTask();
+		state TaskPriority currentTaskID = g_network->getCurrentTask();
 		state Future<Void> shutdown = success(currentProcess->shutdownSignal.getFuture());

 		//TraceEvent("AsyncFileNonDurableOpenBegin").detail("Filename", filename).detail("Addr", g_simulator.getCurrentProcess()->address);
@ -391,7 +391,7 @@ private:

 	ACTOR Future<int> read(AsyncFileNonDurable *self, void *data, int length, int64_t offset) {
 		state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
-		state int currentTaskID = g_network->getCurrentTask();
+		state TaskPriority currentTaskID = g_network->getCurrentTask();
 		wait( g_simulator.onMachine( currentProcess ) );

 		try {
@ -411,7 +411,7 @@ private:
 	//or none of the write.  It may also corrupt parts of sectors which have not been written correctly
 	ACTOR Future<Void> write(AsyncFileNonDurable *self, Promise<Void> writeStarted, Future<Future<Void>> ownFuture, void const* data, int length, int64_t offset) {
 		state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
-		state int currentTaskID = g_network->getCurrentTask();
+		state TaskPriority currentTaskID = g_network->getCurrentTask();
 		wait( g_simulator.onMachine( currentProcess ) );
 		
 		state double delayDuration = deterministicRandom()->random01() * self->maxWriteDelay;
@ -535,7 +535,7 @@ private:
 	//If a kill interrupts the delay, then the truncate may or may not be performed
 	ACTOR Future<Void> truncate(AsyncFileNonDurable *self, Promise<Void> truncateStarted, Future<Future<Void>> ownFuture, int64_t size) {
 		state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
-		state int currentTaskID = g_network->getCurrentTask();
+		state TaskPriority currentTaskID = g_network->getCurrentTask();
 		wait( g_simulator.onMachine( currentProcess ) );
 		
 		state double delayDuration = deterministicRandom()->random01() * self->maxWriteDelay;
@ -573,8 +573,8 @@ private:
 			}
 		}

-		if(g_network->check_yield(TaskDefaultYield)) {
-			wait(delay(0, TaskDefaultYield));
+		if(g_network->check_yield(TaskPriority::DefaultYield)) {
+			wait(delay(0, TaskPriority::DefaultYield));
 		}

 		//If performing a durable truncate, then pass it through to the file.  Otherwise, pass it through with a 1/2 chance
@ -663,7 +663,7 @@ private:

 	ACTOR Future<Void> sync(AsyncFileNonDurable *self, bool durable) {
 		state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
-		state int currentTaskID = g_network->getCurrentTask();
+		state TaskPriority currentTaskID = g_network->getCurrentTask();
 		wait( g_simulator.onMachine( currentProcess ) );

 		try {
@ -695,7 +695,7 @@ private:

 	ACTOR Future<int64_t> size(AsyncFileNonDurable *self) {
 		state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
-		state int currentTaskID = g_network->getCurrentTask();
+		state TaskPriority currentTaskID = g_network->getCurrentTask();

 		wait( g_simulator.onMachine( currentProcess ) );

@ -714,7 +714,7 @@ private:
 	//Finishes all outstanding actors on an AsyncFileNonDurable and then deletes it
 	ACTOR Future<Void> deleteFile(AsyncFileNonDurable *self) {
 		state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
-		state int currentTaskID = g_network->getCurrentTask();
+		state TaskPriority currentTaskID = g_network->getCurrentTask();
 		state std::string filename = self->filename;

 		wait( g_simulator.onMachine( currentProcess ) );
--- a/fdbrpc/FlowTests.actor.cpp
+++ b/fdbrpc/FlowTests.actor.cpp
@ -172,28 +172,29 @@ struct YieldMockNetwork : INetwork, ReferenceCounted<YieldMockNetwork> {
 		t.send(Void());
 	}

-	virtual Future<class Void> delay(double seconds, int taskID) {
+	virtual Future<class Void> delay(double seconds, TaskPriority taskID) {
 		return nextTick.getFuture();
 	}

-	virtual Future<class Void> yield(int taskID) {
+	virtual Future<class Void> yield(TaskPriority taskID) {
 		if (check_yield(taskID))
 			return delay(0,taskID);
 		return Void();
 	}

-	virtual bool check_yield(int taskID) {
+	virtual bool check_yield(TaskPriority taskID) {
 		if (nextYield > 0) --nextYield;
 		return nextYield == 0;
 	}

 	// Delegate everything else.  TODO: Make a base class NetworkWrapper for delegating everything in INetwork
-	virtual int getCurrentTask() { return baseNetwork->getCurrentTask(); }
-	virtual void setCurrentTask(int taskID) { baseNetwork->setCurrentTask(taskID); }
+	virtual TaskPriority getCurrentTask() { return baseNetwork->getCurrentTask(); }
+	virtual void setCurrentTask(TaskPriority taskID) { baseNetwork->setCurrentTask(taskID); }
 	virtual double now() { return baseNetwork->now(); }
 	virtual void stop() { return baseNetwork->stop(); }
 	virtual bool isSimulated() const { return baseNetwork->isSimulated(); }
-	virtual void onMainThread(Promise<Void>&& signal, int taskID) { return baseNetwork->onMainThread(std::move(signal), taskID); }
+	virtual void onMainThread(Promise<Void>&& signal, TaskPriority taskID) { return baseNetwork->onMainThread(std::move(signal), taskID); }
+	bool isOnMainThread() const override { return baseNetwork->isOnMainThread(); }
 	virtual THREAD_HANDLE startThread(THREAD_FUNC_RETURN(*func) (void *), void *arg) { return baseNetwork->startThread(func,arg); }
 	virtual Future< Reference<class IAsyncFile> > open(std::string filename, int64_t flags, int64_t mode) { return IAsyncFileSystem::filesystem()->open(filename,flags,mode); }
 	virtual Future< Void > deleteFile(std::string filename, bool mustBeDurable) { return IAsyncFileSystem::filesystem()->deleteFile(filename,mustBeDurable); }
--- a/fdbrpc/FlowTransport.actor.cpp
+++ b/fdbrpc/FlowTransport.actor.cpp
@ -18,22 +18,25 @@
 * limitations under the License.
 */

-#include "flow/flow.h"
 #include "fdbrpc/FlowTransport.h"
-#include "fdbrpc/genericactors.actor.h"
-#include "fdbrpc/fdbrpc.h"
-#include "flow/Net2Packet.h"
-#include "flow/ActorCollection.h"
-#include "flow/TDMetric.actor.h"
-#include "flow/ObjectSerializer.h"
-#include "fdbrpc/FailureMonitor.h"
-#include "fdbrpc/crc32c.h"
-#include "fdbrpc/simulator.h"
-#include <unordered_map>

+#include <unordered_map>
 #if VALGRIND
 #include <memcheck.h>
 #endif
+
+#include "fdbrpc/crc32c.h"
+#include "fdbrpc/fdbrpc.h"
+#include "fdbrpc/FailureMonitor.h"
+#include "fdbrpc/genericactors.actor.h"
+#include "fdbrpc/simulator.h"
+#include "flow/ActorCollection.h"
+#include "flow/Error.h"
+#include "flow/flow.h"
+#include "flow/Net2Packet.h"
+#include "flow/TDMetric.actor.h"
+#include "flow/ObjectSerializer.h"
+#include "flow/ProtocolVersion.h"
 #include "flow/actorcompiler.h"  // This must be the last #include.

 static NetworkAddressList g_currentDeliveryPeerAddress = NetworkAddressList();
@ -47,9 +50,9 @@ const uint64_t TOKEN_STREAM_FLAG = 1;
 class EndpointMap : NonCopyable {
 public:
 	EndpointMap();
-	void insert( NetworkMessageReceiver* r, Endpoint::Token& token, uint32_t priority );
+	void insert( NetworkMessageReceiver* r, Endpoint::Token& token, TaskPriority priority );
 	NetworkMessageReceiver* get( Endpoint::Token const& token );
-	uint32_t getPriority( Endpoint::Token const& token );
+	TaskPriority getPriority( Endpoint::Token const& token );
 	void remove( Endpoint::Token const& token, NetworkMessageReceiver* r );

 private:
@ -83,12 +86,12 @@ void EndpointMap::realloc() {
 	firstFree = oldSize;
 }

-void EndpointMap::insert( NetworkMessageReceiver* r, Endpoint::Token& token, uint32_t priority ) {
+void EndpointMap::insert( NetworkMessageReceiver* r, Endpoint::Token& token, TaskPriority priority ) {
 	if (firstFree == uint32_t(-1)) realloc();
 	int index = firstFree;
 	firstFree = data[index].nextFree;
 	token = Endpoint::Token( token.first(), (token.second()&0xffffffff00000000LL) | index );
-	data[index].token() = Endpoint::Token( token.first(), (token.second()&0xffffffff00000000LL) | priority );
+	data[index].token() = Endpoint::Token( token.first(), (token.second()&0xffffffff00000000LL) | static_cast<uint32_t>(priority) );
 	data[index].receiver = r;
 }

@ -99,11 +102,11 @@ NetworkMessageReceiver* EndpointMap::get( Endpoint::Token const& token ) {
 	return 0;
 }

-uint32_t EndpointMap::getPriority( Endpoint::Token const& token ) {
+TaskPriority EndpointMap::getPriority( Endpoint::Token const& token ) {
 	uint32_t index = token.second();
 	if ( index < data.size() && data[index].token().first() == token.first() && ((data[index].token().second()&0xffffffff00000000LL)|index)==token.second() )
-		return data[index].token().second();
-	return TaskUnknownEndpoint;
+		return static_cast<TaskPriority>(data[index].token().second());
+	return TaskPriority::UnknownEndpoint;
 }

 void EndpointMap::remove( Endpoint::Token const& token, NetworkMessageReceiver* r ) {
@ -119,7 +122,7 @@ struct EndpointNotFoundReceiver : NetworkMessageReceiver {
 	EndpointNotFoundReceiver(EndpointMap& endpoints) {
 		//endpoints[WLTOKEN_ENDPOINT_NOT_FOUND] = this;
 		Endpoint::Token e = WLTOKEN_ENDPOINT_NOT_FOUND;
-		endpoints.insert(this, e, TaskDefaultEndpoint);
+		endpoints.insert(this, e, TaskPriority::DefaultEndpoint);
 		ASSERT( e == WLTOKEN_ENDPOINT_NOT_FOUND );
 	}
 	virtual void receive( ArenaReader& reader ) {
@ -138,7 +141,7 @@ struct EndpointNotFoundReceiver : NetworkMessageReceiver {
 struct PingReceiver : NetworkMessageReceiver {
 	PingReceiver(EndpointMap& endpoints) {
 		Endpoint::Token e = WLTOKEN_PING_PACKET;
-		endpoints.insert(this, e, TaskReadSocket);
+		endpoints.insert(this, e, TaskPriority::ReadSocket);
 		ASSERT( e == WLTOKEN_PING_PACKET );
 	}
 	virtual void receive( ArenaReader& reader ) {
@ -302,11 +305,12 @@ struct Peer : NonCopyable {
 	int peerReferences;
 	bool incompatibleProtocolVersionNewer;
 	int64_t bytesReceived;
+	double lastDataPacketSentTime;

-	explicit Peer( TransportData* transport, NetworkAddress const& destination )
-		: transport(transport), destination(destination), outgoingConnectionIdle(false), lastConnectTime(0.0), reconnectionDelay(FLOW_KNOBS->INITIAL_RECONNECTION_TIME), 
-		  compatible(true), incompatibleProtocolVersionNewer(false), peerReferences(-1), bytesReceived(0)
-	{
+	explicit Peer(TransportData* transport, NetworkAddress const& destination)
+	  : transport(transport), destination(destination), outgoingConnectionIdle(false), lastConnectTime(0.0),
+	    reconnectionDelay(FLOW_KNOBS->INITIAL_RECONNECTION_TIME), compatible(true),
+	    incompatibleProtocolVersionNewer(false), peerReferences(-1), bytesReceived(0), lastDataPacketSentTime(now()) {
 		connect = connectionKeeper(this);
 	}

@ -339,7 +343,7 @@ struct Peer : NonCopyable {
 		pkt.connectionId = transport->transportId;

 		PacketBuffer* pb_first = new PacketBuffer;
-		PacketWriter wr( pb_first, NULL, Unversioned() );
+		PacketWriter wr( pb_first, nullptr, Unversioned() );
 		pkt.serialize(wr);
 		unsent.prependWriteBuffer(pb_first, wr.finish());
 	}
@ -351,7 +355,7 @@ struct Peer : NonCopyable {
 		// If there are reliable packets, compact reliable packets into a new unsent range
 		if(!reliable.empty()) {
 			PacketBuffer* pb = unsent.getWriteBuffer();
-			pb = reliable.compact(pb, NULL);
+			pb = reliable.compact(pb, nullptr);
 			unsent.setWriteBuffer(pb);
 		}
 	}
@ -393,19 +397,44 @@ struct Peer : NonCopyable {
 	}

 	ACTOR static Future<Void> connectionMonitor( Peer *peer ) {
-		state RequestStream< ReplyPromise<Void> > remotePing( Endpoint( {peer->destination}, WLTOKEN_PING_PACKET ) );
-
+		state Endpoint remotePingEndpoint({ peer->destination }, WLTOKEN_PING_PACKET);
 		loop {
-			if(peer->peerReferences == 0 && peer->reliable.empty() && peer->unsent.empty()) {
-				throw connection_unreferenced();
+			if (!FlowTransport::transport().isClient() && !peer->destination.isPublic()) {
+				// Don't send ping messages to clients unless necessary. Instead monitor incoming client pings.
+				state double lastRefreshed = now();
+				state int64_t lastBytesReceived = peer->bytesReceived;
+				loop {
+					wait(delay(FLOW_KNOBS->CONNECTION_MONITOR_LOOP_TIME));
+					if (lastBytesReceived < peer->bytesReceived) {
+						lastRefreshed = now();
+						lastBytesReceived = peer->bytesReceived;
+					} else if (lastRefreshed < now() - FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT *
+							   FLOW_KNOBS->CONNECTION_MONITOR_INCOMING_IDLE_MULTIPLIER) {
+						// If we have not received anything in this period, client must have closed
+						// connection by now. Break loop to check if it is still alive by sending a ping.
+						break;
+					}
+				}
 			}

-			wait( delayJittered( FLOW_KNOBS->CONNECTION_MONITOR_LOOP_TIME ) );
+			if (peer->reliable.empty() && peer->unsent.empty()) {
+				if (peer->peerReferences == 0 &&
+				    (peer->lastDataPacketSentTime < now() - FLOW_KNOBS->CONNECTION_MONITOR_UNREFERENCED_CLOSE_DELAY)) {
+					// TODO: What about when peerReference == -1?
+					throw connection_unreferenced();
+				} else if (FlowTransport::transport().isClient() && peer->destination.isPublic() &&
+				           (peer->lastConnectTime < now() - FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT) &&
+				           (peer->lastDataPacketSentTime < now() - FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT)) {
+					// First condition is necessary because we may get here if we are server.
+					throw connection_idle();
+				}
+			}

-			// SOMEDAY: Stop monitoring and close the connection after a long period of inactivity with no reliable or onDisconnect requests outstanding
+			wait (delayJittered(FLOW_KNOBS->CONNECTION_MONITOR_LOOP_TIME));

+			// TODO: Stop monitoring and close the connection with no onDisconnect requests outstanding
 			state ReplyPromise<Void> reply;
-			FlowTransport::transport().sendUnreliable( SerializeSource<ReplyPromise<Void>>(reply), remotePing.getEndpoint() );
+			FlowTransport::transport().sendUnreliable( SerializeSource<ReplyPromise<Void>>(reply), remotePingEndpoint );
 			state int64_t startingBytes = peer->bytesReceived;
 			state int timeouts = 0;
 			loop {
@ -416,7 +445,10 @@ struct Peer : NonCopyable {
 							throw connection_failed();
 						}
 						if(timeouts > 1) {
-							TraceEvent(SevWarnAlways, "ConnectionSlowPing").suppressFor(1.0).detail("WithAddr", peer->destination).detail("Timeouts", timeouts);
+							TraceEvent(SevWarnAlways, "ConnectionSlowPing")
+							    .suppressFor(1.0)
+							    .detail("WithAddr", peer->destination)
+							    .detail("Timeouts", timeouts);
 						}
 						startingBytes = peer->bytesReceived;
 						timeouts++;
@ -435,16 +467,16 @@ struct Peer : NonCopyable {
 	ACTOR static Future<Void> connectionWriter( Peer* self, Reference<IConnection> conn ) {
 		state double lastWriteTime = now();
 		loop {
-			//wait( delay(0, TaskWriteSocket) );
-			wait( delayJittered(std::max<double>(FLOW_KNOBS->MIN_COALESCE_DELAY, FLOW_KNOBS->MAX_COALESCE_DELAY - (now() - lastWriteTime)), TaskWriteSocket) );
-			//wait( delay(500e-6, TaskWriteSocket) );
-			//wait( yield(TaskWriteSocket) );
+			//wait( delay(0, TaskPriority::WriteSocket) );
+			wait( delayJittered(std::max<double>(FLOW_KNOBS->MIN_COALESCE_DELAY, FLOW_KNOBS->MAX_COALESCE_DELAY - (now() - lastWriteTime)), TaskPriority::WriteSocket) );
+			//wait( delay(500e-6, TaskPriority::WriteSocket) );
+			//wait( yield(TaskPriority::WriteSocket) );

 			// Send until there is nothing left to send
 			loop {
 				lastWriteTime = now();

-				int sent = conn->write( self->unsent.getUnsent() );
+				int sent = conn->write(self->unsent.getUnsent(), /* limit= */ FLOW_KNOBS->MAX_PACKET_SEND_BYTES);
 				if (sent) {
 					self->transport->bytesSent += sent;
 					self->unsent.sent(sent);
@ -453,7 +485,7 @@ struct Peer : NonCopyable {

 				TEST(true); // We didn't write everything, so apparently the write buffer is full.  Wait for it to be nonfull.
 				wait( conn->onWritable() );
-				wait( yield(TaskWriteSocket) );
+				wait( yield(TaskPriority::WriteSocket) );
 			}

 			// Wait until there is something to send
@ -547,14 +579,21 @@ struct Peer : NonCopyable {
 				self->discardUnreliablePackets();
 				reader = Future<Void>();
 				bool ok = e.code() == error_code_connection_failed || e.code() == error_code_actor_cancelled ||
-				          e.code() == error_code_connection_unreferenced ||
+				          e.code() == error_code_connection_unreferenced || e.code() == error_code_connection_idle ||
 				          (g_network->isSimulated() && e.code() == error_code_checksum_failed);

 				if(self->compatible) {
-					TraceEvent(ok ? SevInfo : SevWarnAlways, "ConnectionClosed", conn ? conn->getDebugID() : UID()).error(e, true).suppressFor(1.0).detail("PeerAddr", self->destination);
+					TraceEvent(ok ? SevInfo : SevWarnAlways, "ConnectionClosed", conn ? conn->getDebugID() : UID())
+					    .error(e, true)
+					    .suppressFor(1.0)
+					    .detail("PeerAddr", self->destination);
 				}
 				else {
-					TraceEvent(ok ? SevInfo : SevWarnAlways, "IncompatibleConnectionClosed", conn ? conn->getDebugID() : UID()).error(e, true).suppressFor(1.0).detail("PeerAddr", self->destination);
+					TraceEvent(ok ? SevInfo : SevWarnAlways, "IncompatibleConnectionClosed",
+					           conn ? conn->getDebugID() : UID())
+					    .error(e, true)
+					    .suppressFor(1.0)
+					    .detail("PeerAddr", self->destination);
 				}

 				if(self->destination.isPublic() && IFailureMonitor::failureMonitor().getState(self->destination).isAvailable()) {
@ -562,20 +601,25 @@ struct Peer : NonCopyable {
 					if(now() - it.second > FLOW_KNOBS->TOO_MANY_CONNECTIONS_CLOSED_RESET_DELAY) {
 						it.first = now();
 					} else if(now() - it.first > FLOW_KNOBS->TOO_MANY_CONNECTIONS_CLOSED_TIMEOUT) {
-						TraceEvent(SevWarnAlways, "TooManyConnectionsClosed", conn ? conn->getDebugID() : UID()).suppressFor(5.0).detail("PeerAddr", self->destination);
+						TraceEvent(SevWarnAlways, "TooManyConnectionsClosed", conn ? conn->getDebugID() : UID())
+						    .suppressFor(5.0)
+						    .detail("PeerAddr", self->destination);
 						self->transport->degraded->set(true);
 					}
 					it.second = now();
 				}

 				if (conn) {
-					if (FlowTransport::transport().isClient()) {
+					if (FlowTransport::transport().isClient() && e.code() != error_code_connection_idle) {
 						clientReconnectDelay = true;
 					}
 					conn->close();
 					conn = Reference<IConnection>();
 				}
-				IFailureMonitor::failureMonitor().notifyDisconnect( self->destination );  //< Clients might send more packets in response, which needs to go out on the next connection
+
+				// Clients might send more packets in response, which needs to go out on the next connection
+				IFailureMonitor::failureMonitor().notifyDisconnect( self->destination );
+
 				if (e.code() == error_code_actor_cancelled) throw;
 				// Try to recover, even from serious errors, by retrying

@ -599,8 +643,8 @@ TransportData::~TransportData() {
 }

 ACTOR static void deliver(TransportData* self, Endpoint destination, ArenaReader reader, bool inReadSocket) {
-	int priority = self->endpoints.getPriority(destination.token);
-	if (priority < TaskReadSocket || !inReadSocket) {
+	TaskPriority priority = self->endpoints.getPriority(destination.token);
+	if (priority < TaskPriority::ReadSocket || !inReadSocket) {
 		wait( delay(0, priority) );
 	} else {
 		g_network->setCurrentTask( priority );
@ -634,21 +678,17 @@ ACTOR static void deliver(TransportData* self, Endpoint destination, ArenaReader
 	}

 	if( inReadSocket )
-		g_network->setCurrentTask( TaskReadSocket );
+		g_network->setCurrentTask( TaskPriority::ReadSocket );
 }

-static void scanPackets(TransportData* transport, uint8_t*& unprocessed_begin, uint8_t* e, Arena& arena,
+static void scanPackets(TransportData* transport, uint8_t*& unprocessed_begin, const uint8_t* e, Arena& arena,
                        NetworkAddress const& peerAddress, ProtocolVersion peerProtocolVersion) {
 	// Find each complete packet in the given byte range and queue a ready task to deliver it.
 	// Remove the complete packets from the range by increasing unprocessed_begin.
 	// There won't be more than 64K of data plus one packet, so this shouldn't take a long time.
 	uint8_t* p = unprocessed_begin;

-	bool checksumEnabled = true;
-	if (peerAddress.isTLS()) {
-		checksumEnabled = false;
-	}
-
+	const bool checksumEnabled = !peerAddress.isTLS();
 	loop {
 		uint32_t packetLen, packetChecksum;

@ -734,6 +774,23 @@ static void scanPackets(TransportData* transport, uint8_t*& unprocessed_begin, u
 	}
 }

+// Given unprocessed buffer [begin, end), check if next packet size is known and return
+// enough size for the next packet, whose format is: {size, optional_checksum, data} +
+// next_packet_size.
+static int getNewBufferSize(const uint8_t* begin, const uint8_t* end, const NetworkAddress& peerAddress) {
+	const int len = end - begin;
+	if (len < sizeof(uint32_t)) {
+		return FLOW_KNOBS->MIN_PACKET_BUFFER_BYTES;
+	}
+	const uint32_t packetLen = *(uint32_t*)begin;
+	if (packetLen > FLOW_KNOBS->PACKET_LIMIT) {
+		TraceEvent(SevError, "Net2_PacketLimitExceeded").detail("FromPeer", peerAddress.toString()).detail("Length", (int)packetLen);
+		throw platform_error();
+	}
+	return std::max<uint32_t>(FLOW_KNOBS->MIN_PACKET_BUFFER_BYTES,
+	                          packetLen + sizeof(uint32_t) * (peerAddress.isTLS() ? 2 : 3));
+}
+
 ACTOR static Future<Void> connectionReader(
 		TransportData* transport,
 		Reference<IConnection> conn,
@ -741,12 +798,12 @@ ACTOR static Future<Void> connectionReader(
 		Promise<Peer*> onConnected)
 {
 	// This actor exists whenever there is an open or opening connection, whether incoming or outgoing
-	// For incoming connections conn is set and peer is initially NULL; for outgoing connections it is the reverse
+	// For incoming connections conn is set and peer is initially nullptr; for outgoing connections it is the reverse

 	state Arena arena;
-	state uint8_t* unprocessed_begin = NULL;
-	state uint8_t* unprocessed_end = NULL;
-	state uint8_t* buffer_end = NULL;
+	state uint8_t* unprocessed_begin = nullptr;
+	state uint8_t* unprocessed_end = nullptr;
+	state uint8_t* buffer_end = nullptr;
 	state bool expectConnectPacket = true;
 	state bool compatible = false;
 	state bool incompatiblePeerCounted = false;
@ -761,12 +818,12 @@ ACTOR static Future<Void> connectionReader(
 	try {
 		loop {
 			loop {
-				int readAllBytes = buffer_end - unprocessed_end;
-				if (readAllBytes < 4096) {
+				state int readAllBytes = buffer_end - unprocessed_end;
+				if (readAllBytes < FLOW_KNOBS->MIN_PACKET_BUFFER_FREE_BYTES) {
 					Arena newArena;
-					int unproc_len = unprocessed_end - unprocessed_begin;
-					int len = std::max( 65536, unproc_len*2 );
-					uint8_t* newBuffer = new (newArena) uint8_t[ len ];
+					const int unproc_len = unprocessed_end - unprocessed_begin;
+					const int len = getNewBufferSize(unprocessed_begin, unprocessed_end, peerAddress);
+					uint8_t* const newBuffer = new (newArena) uint8_t[ len ];
 					memcpy( newBuffer, unprocessed_begin, unproc_len );
 					arena = newArena;
 					unprocessed_begin = newBuffer;
@ -775,13 +832,21 @@ ACTOR static Future<Void> connectionReader(
 					readAllBytes = buffer_end - unprocessed_end;
 				}

-				int readBytes = conn->read( unprocessed_end, buffer_end );
-				if(peer) {
-					peer->bytesReceived += readBytes;
+				state int totalReadBytes = 0;
+				while (true) {
+					const int len = std::min<int>(buffer_end - unprocessed_end, FLOW_KNOBS->MAX_PACKET_SEND_BYTES);
+					if (len == 0) break;
+					state int readBytes = conn->read(unprocessed_end, unprocessed_end + len);
+					if (readBytes == 0) break;
+					wait(yield(TaskPriority::ReadSocket));
+					totalReadBytes += readBytes;
+					unprocessed_end += readBytes;
 				}
-				if (!readBytes) break;
-				state bool readWillBlock = readBytes != readAllBytes;
-				unprocessed_end += readBytes;
+				if (peer) {
+					peer->bytesReceived += totalReadBytes;
+				}
+				if (totalReadBytes == 0) break;
+				state bool readWillBlock = totalReadBytes != readAllBytes;

 				if (expectConnectPacket && unprocessed_end-unprocessed_begin>=CONNECT_PACKET_V0_SIZE) {
 					// At the beginning of a connection, we expect to receive a packet containing the protocol version and the listening port of the remote process
@ -884,11 +949,11 @@ ACTOR static Future<Void> connectionReader(
 				if (readWillBlock)
 					break;

-				wait(yield(TaskReadSocket));
+				wait(yield(TaskPriority::ReadSocket));
 			}

 			wait( conn->onReadable() );
-			wait(delay(0, TaskReadSocket));  // We don't want to call conn->read directly from the reactor - we could get stuck in the reactor reading 1 packet at a time
+			wait(delay(0, TaskPriority::ReadSocket));  // We don't want to call conn->read directly from the reactor - we could get stuck in the reactor reading 1 packet at a time
 		}
 	}
 	catch (Error& e) {
@ -932,7 +997,7 @@ ACTOR static Future<Void> listen( TransportData* self, NetworkAddress listenAddr
 				.detail("FromAddress", conn->getPeerAddress())
 				.detail("ListenAddress", listenAddr.toString());
 			incoming.add( connectionIncoming(self, conn) );
-			wait(delay(0) || delay(FLOW_KNOBS->CONNECTION_ACCEPT_DELAY, TaskWriteSocket));
+			wait(delay(0) || delay(FLOW_KNOBS->CONNECTION_ACCEPT_DELAY, TaskPriority::WriteSocket));
 		}
 	} catch (Error& e) {
 		TraceEvent(SevError, "ListenError").error(e);
@ -946,7 +1011,7 @@ Peer* TransportData::getPeer( NetworkAddress const& address, bool openConnection
 		return peer->second;
 	}
 	if(!openConnection) {
-		return NULL;
+		return nullptr;
 	}
 	Peer* newPeer = new Peer(this, address);
 	peers[address] = newPeer;
@ -1023,12 +1088,12 @@ Endpoint FlowTransport::loadedEndpoint( const UID& token ) {
 	return Endpoint(g_currentDeliveryPeerAddress, token);
 }

-void FlowTransport::addPeerReference( const Endpoint& endpoint, NetworkMessageReceiver* receiver ) {
-	if (FlowTransport::transport().isClient()) {
+void FlowTransport::addPeerReference(const Endpoint& endpoint, bool isStream) {
+	if (!isStream || !endpoint.getPrimaryAddress().isValid())
+		return;
+	else if (FlowTransport::transport().isClient())
 		IFailureMonitor::failureMonitor().setStatus(endpoint.getPrimaryAddress(), FailureStatus(false));
-	}

-	if (!receiver->isStream() || !endpoint.getPrimaryAddress().isValid()) return;
 	Peer* peer = self->getPeer(endpoint.getPrimaryAddress());
 	if(peer->peerReferences == -1) {
 		peer->peerReferences = 1;
@ -1037,8 +1102,8 @@ void FlowTransport::addPeerReference( const Endpoint& endpoint, NetworkMessageRe
 	}
 }

-void FlowTransport::removePeerReference( const Endpoint& endpoint, NetworkMessageReceiver* receiver ) {
-	if (!receiver->isStream() || !endpoint.getPrimaryAddress().isValid()) return;
+void FlowTransport::removePeerReference(const Endpoint& endpoint, bool isStream) {
+	if (!isStream || !endpoint.getPrimaryAddress().isValid()) return;
 	Peer* peer = self->getPeer(endpoint.getPrimaryAddress(), false);
 	if(peer) {
 		peer->peerReferences--;
@ -1054,7 +1119,7 @@ void FlowTransport::removePeerReference( const Endpoint& endpoint, NetworkMessag
 	}
 }

-void FlowTransport::addEndpoint( Endpoint& endpoint, NetworkMessageReceiver* receiver, uint32_t taskID ) {
+void FlowTransport::addEndpoint( Endpoint& endpoint, NetworkMessageReceiver* receiver, TaskPriority taskID ) {
 	endpoint.token = deterministicRandom()->randomUniqueID();
 	if (receiver->isStream()) {
 		endpoint.addresses = self->localAddresses;
@ -1070,7 +1135,7 @@ void FlowTransport::removeEndpoint( const Endpoint& endpoint, NetworkMessageRece
 	self->endpoints.remove(endpoint.token, receiver);
 }

-void FlowTransport::addWellKnownEndpoint( Endpoint& endpoint, NetworkMessageReceiver* receiver, uint32_t taskID ) {
+void FlowTransport::addWellKnownEndpoint( Endpoint& endpoint, NetworkMessageReceiver* receiver, TaskPriority taskID ) {
 	endpoint.addresses = self->localAddresses;
 	ASSERT( ((endpoint.token.first() & TOKEN_STREAM_FLAG)!=0) == receiver->isStream() );
 	Endpoint::Token otoken = endpoint.token;
@ -1100,13 +1165,9 @@ static PacketID sendPacket( TransportData* self, ISerializeSource const& what, c
 		ASSERT(copy.size() > 0);
 		deliver(self, destination, ArenaReader(copy.arena(), copy, AssumeVersion(currentProtocolVersion)), false);

-		return (PacketID)NULL;
+		return (PacketID)nullptr;
 	} else {
-		bool checksumEnabled = true;
-		if (destination.getPrimaryAddress().isTLS()) {
-			checksumEnabled = false;
-		}
-
+		const bool checksumEnabled = !destination.getPrimaryAddress().isTLS();
 		++self->countPacketsGenerated;

 		Peer* peer = self->getPeer(destination.getPrimaryAddress(), openConnection);
@ -1114,7 +1175,7 @@ static PacketID sendPacket( TransportData* self, ISerializeSource const& what, c
 		// If there isn't an open connection, a public address, or the peer isn't compatible, we can't send
 		if (!peer || (peer->outgoingConnectionIdle && !destination.getPrimaryAddress().isPublic()) || (peer->incompatibleProtocolVersionNewer && destination.token != WLTOKEN_PING_PACKET)) {
 			TEST(true);  // Can't send to private address without a compatible open connection
-			return (PacketID)NULL;
+			return (PacketID)nullptr;
 		}

 		bool firstUnsent = peer->unsent.empty();
@ -1193,7 +1254,9 @@ static PacketID sendPacket( TransportData* self, ISerializeSource const& what, c
 #endif

 		peer->send(pb, rp, firstUnsent);
-
+		if (destination.token != WLTOKEN_PING_PACKET) {
+			peer->lastDataPacketSentTime = now();
+		}
 		return (PacketID)rp;
 	}
 }
--- a/fdbrpc/FlowTransport.h
+++ b/fdbrpc/FlowTransport.h
@ -23,6 +23,7 @@
 #pragma once

 #include <algorithm>
+#include "flow/genericactors.actor.h"
 #include "flow/network.h"
 #include "flow/FileIdentifier.h"

@ -109,7 +110,7 @@ public:
 	FlowTransport(uint64_t transportId);
 	~FlowTransport();

-	static void createInstance(bool isClient, uint64_t transportId = 0);
+	static void createInstance(bool isClient, uint64_t transportId);
 	// Creates a new FlowTransport and makes FlowTransport::transport() return it.  This uses g_network->global() variables,
 	// so it will be private to a simulation.

@ -131,19 +132,19 @@ public:
 	std::map<NetworkAddress, std::pair<uint64_t, double>>* getIncompatiblePeers();
 	// Returns the same of all peers that have attempted to connect, but have incompatible protocol versions

-	void addPeerReference( const Endpoint&, NetworkMessageReceiver* );
+	void addPeerReference(const Endpoint&, bool isStream);
 	// Signal that a peer connection is being used, even if no messages are currently being sent to the peer

-	void removePeerReference( const Endpoint&, NetworkMessageReceiver* );
+	void removePeerReference(const Endpoint&, bool isStream);
 	// Signal that a peer connection is no longer being used

-	void addEndpoint( Endpoint& endpoint, NetworkMessageReceiver*, uint32_t taskID );
+	void addEndpoint( Endpoint& endpoint, NetworkMessageReceiver*, TaskPriority taskID );
 	// Sets endpoint to be a new local endpoint which delivers messages to the given receiver

 	void removeEndpoint( const Endpoint&, NetworkMessageReceiver* );
 	// The given local endpoint no longer delivers messages to the given receiver or uses resources

-	void addWellKnownEndpoint( Endpoint& endpoint, NetworkMessageReceiver*, uint32_t taskID );
+	void addWellKnownEndpoint( Endpoint& endpoint, NetworkMessageReceiver*, TaskPriority taskID );
 	// Sets endpoint to a new local endpoint (without changing its token) which delivers messages to the given receiver
 	// Implementations may have limitations on when this function is called and what endpoint.token may be!

--- a/fdbrpc/LoadBalance.actor.h
+++ b/fdbrpc/LoadBalance.actor.h
@ -179,7 +179,7 @@ Future< REPLY_TYPE(Request) > loadBalance(
 	Reference<MultiInterface<Multi>> alternatives,
 	RequestStream<Request> Interface::* channel,
 	Request request = Request(),
-	int taskID = TaskDefaultPromiseEndpoint,
+	TaskPriority taskID = TaskPriority::DefaultPromiseEndpoint,
 	bool atMostOnce = false, // if true, throws request_maybe_delivered() instead of retrying automatically
 	QueueModel* model = NULL) 
 {
--- a/fdbrpc/ReplicationPolicy.h
+++ b/fdbrpc/ReplicationPolicy.h
@ -70,6 +70,13 @@ struct IReplicationPolicy : public ReferenceCounted<IReplicationPolicy> {
 		return keys;
 	}
 	virtual void attributeKeys(std::set<std::string>*) const = 0;
+
+	// For flatbuffers, IReplicationPolicy is just encoded as a string using
+	// |serializeReplicationPolicy|. |writer| is a member of IReplicationPolicy
+	// so that this string outlives all calls to
+	// dynamic_size_traits<Reference<IReplicationPolicy>>::save
+	mutable BinaryWriter writer{ IncludeVersion() };
+	mutable bool alreadyWritten = false;
 };

 template <class Archive>
@ -276,12 +283,28 @@ void serializeReplicationPolicy(Ar& ar, Reference<IReplicationPolicy>& policy) {

 template <>
 struct dynamic_size_traits<Reference<IReplicationPolicy>> : std::true_type {
-	static WriteRawMemory save(const Reference<IReplicationPolicy>& value) {
-		BinaryWriter writer(IncludeVersion());
-		serializeReplicationPolicy(writer, const_cast<Reference<IReplicationPolicy>&>(value));
-		std::unique_ptr<uint8_t[]> memory(new uint8_t[writer.getLength()]);
-		memcpy(memory.get(), writer.getData(), writer.getLength());
-		return std::make_pair<OwnershipErasedPtr<const uint8_t>, size_t>(ownedPtr(const_cast<const uint8_t*>(memory.release())), writer.getLength());
+	static Block save(const Reference<IReplicationPolicy>& value) {
+		if (value.getPtr() == nullptr) {
+			static BinaryWriter writer{ IncludeVersion() };
+			writer = BinaryWriter{ IncludeVersion() };
+			serializeReplicationPolicy(writer, const_cast<Reference<IReplicationPolicy>&>(value));
+			return unownedPtr(const_cast<const uint8_t*>(reinterpret_cast<uint8_t*>(writer.getData())),
+			                  writer.getLength());
+		}
+		if (!value->alreadyWritten) {
+			serializeReplicationPolicy(value->writer, const_cast<Reference<IReplicationPolicy>&>(value));
+			value->alreadyWritten = true;
+		}
+		return unownedPtr(const_cast<const uint8_t*>(reinterpret_cast<uint8_t*>(value->writer.getData())),
+		                  value->writer.getLength());
+	}
+
+	static void serialization_done(const Reference<IReplicationPolicy>& value) {
+		if (value.getPtr() == nullptr) {
+			return;
+		}
+		value->alreadyWritten = false;
+		value->writer = BinaryWriter{ IncludeVersion() };
 	}

 	// Context is an arbitrary type that is plumbed by reference throughout the
@ -294,5 +317,6 @@ struct dynamic_size_traits<Reference<IReplicationPolicy>> : std::true_type {
 	}
 };

+static_assert(detail::has_serialization_done<dynamic_size_traits<Reference<IReplicationPolicy>>>::value);

 #endif
--- a/fdbrpc/batcher.actor.h
+++ b/fdbrpc/batcher.actor.h
@ -47,7 +47,7 @@ bool firstInBatch(CommitTransactionRequest x) {
 }

 ACTOR template <class X>
-Future<Void> batcher(PromiseStream<std::pair<std::vector<X>, int> > out, FutureStream<X> in, double avgMinDelay, double* avgMaxDelay, double emptyBatchTimeout, int maxCount, int desiredBytes, int maxBytes, Optional<PromiseStream<Void>> batchStartedStream, int64_t *commitBatchesMemBytesCount, int64_t commitBatchesMemBytesLimit, int taskID = TaskDefaultDelay, Counter* counter = 0)
+Future<Void> batcher(PromiseStream<std::pair<std::vector<X>, int> > out, FutureStream<X> in, double avgMinDelay, double* avgMaxDelay, double emptyBatchTimeout, int maxCount, int desiredBytes, int maxBytes, Optional<PromiseStream<Void>> batchStartedStream, int64_t *commitBatchesMemBytesCount, int64_t commitBatchesMemBytesLimit, TaskPriority taskID = TaskPriority::DefaultDelay, Counter* counter = 0)
 {
 	wait( delayJittered(*avgMaxDelay, taskID) );  // smooth out
 	// This is set up to deliver even zero-size batches if emptyBatchTimeout elapses, because that's what master proxy wants.  The source control history
--- a/fdbrpc/fdbrpc.h
+++ b/fdbrpc/fdbrpc.h
@ -31,15 +31,19 @@
 struct FlowReceiver : private NetworkMessageReceiver {
 	// Common endpoint code for NetSAV<> and NetNotifiedQueue<>

-	FlowReceiver() : m_isLocalEndpoint(false) {}
-	FlowReceiver(Endpoint const& remoteEndpoint) : endpoint(remoteEndpoint), m_isLocalEndpoint(false) {
-		FlowTransport::transport().addPeerReference(endpoint, this);
+	FlowReceiver() : m_isLocalEndpoint(false), m_stream(false) {
 	}
+
+	FlowReceiver(Endpoint const& remoteEndpoint, bool stream)
+	  : endpoint(remoteEndpoint), m_isLocalEndpoint(false), m_stream(stream) {
+		FlowTransport::transport().addPeerReference(endpoint, m_stream);
+	}
+
 	~FlowReceiver() {
 		if (m_isLocalEndpoint) {
 			FlowTransport::transport().removeEndpoint(endpoint, this);
 		} else {
-			FlowTransport::transport().removePeerReference(endpoint, this);
+			FlowTransport::transport().removePeerReference(endpoint, m_stream);
 		}
 	}

@ -48,7 +52,7 @@ struct FlowReceiver : private NetworkMessageReceiver {

 	// If already a remote endpoint, returns that.  Otherwise makes this
 	//   a local endpoint and returns that.
-	const Endpoint& getEndpoint(int taskID) {
+	const Endpoint& getEndpoint(TaskPriority taskID) {
 		if (!endpoint.isValid()) {
 			m_isLocalEndpoint = true;
 			FlowTransport::transport().addEndpoint(endpoint, this, taskID);
@ -56,16 +60,17 @@ struct FlowReceiver : private NetworkMessageReceiver {
 		return endpoint;
 	}

-	void makeWellKnownEndpoint(Endpoint::Token token, int taskID) {
+	void makeWellKnownEndpoint(Endpoint::Token token, TaskPriority taskID) {
 		ASSERT(!endpoint.isValid());
 		m_isLocalEndpoint = true;
 		endpoint.token = token;
 		FlowTransport::transport().addWellKnownEndpoint(endpoint, this, taskID);
 	}

-protected:
+private:
 	Endpoint endpoint;
 	bool m_isLocalEndpoint;
+	bool m_stream;
 };

 template <class T>
@ -74,7 +79,9 @@ struct NetSAV : SAV<T>, FlowReceiver, FastAllocated<NetSAV<T>> {
 	using FastAllocated<NetSAV<T>>::operator delete;

 	NetSAV(int futures, int promises) : SAV<T>(futures, promises) {}
-	NetSAV(int futures, int promises, const Endpoint& remoteEndpoint) : SAV<T>(futures, promises), FlowReceiver(remoteEndpoint) {}
+	NetSAV(int futures, int promises, const Endpoint& remoteEndpoint)
+	  : SAV<T>(futures, promises), FlowReceiver(remoteEndpoint, false) {
+	}

 	virtual void destroy() { delete this; }
 	virtual void receive(ArenaReader& reader) {
@ -128,7 +135,7 @@ public:
 	~ReplyPromise() { if (sav) sav->delPromiseRef(); }

 	ReplyPromise(const Endpoint& endpoint) : sav(new NetSAV<T>(0, 1, endpoint)) {}
-	const Endpoint& getEndpoint(int taskID = TaskDefaultPromiseEndpoint) const { return sav->getEndpoint(taskID); }
+	const Endpoint& getEndpoint(TaskPriority taskID = TaskPriority::DefaultPromiseEndpoint) const { return sav->getEndpoint(taskID); }

 	void operator=(const ReplyPromise& rhs) {
 		if (rhs.sav) rhs.sav->addPromiseRef();
@ -204,19 +211,19 @@ template <class Reply>
 void resetReply(ReplyPromise<Reply> & p) { p.reset(); }

 template <class Request>
-void resetReply(Request& r, int taskID) { r.reply.reset(); r.reply.getEndpoint(taskID); }
+void resetReply(Request& r, TaskPriority taskID) { r.reply.reset(); r.reply.getEndpoint(taskID); }

 template <class Reply>
-void resetReply(ReplyPromise<Reply> & p, int taskID) { p.reset(); p.getEndpoint(taskID); }
+void resetReply(ReplyPromise<Reply> & p, TaskPriority taskID) { p.reset(); p.getEndpoint(taskID); }

 template <class Request>
-void setReplyPriority(Request& r, int taskID) { r.reply.getEndpoint(taskID); }
+void setReplyPriority(Request& r, TaskPriority taskID) { r.reply.getEndpoint(taskID); }

 template <class Reply>
-void setReplyPriority(ReplyPromise<Reply> & p, int taskID) { p.getEndpoint(taskID); }
+void setReplyPriority(ReplyPromise<Reply> & p, TaskPriority taskID) { p.getEndpoint(taskID); }

 template <class Reply>
-void setReplyPriority(const ReplyPromise<Reply> & p, int taskID) { p.getEndpoint(taskID); }
+void setReplyPriority(const ReplyPromise<Reply> & p, TaskPriority taskID) { p.getEndpoint(taskID); }



@ -228,7 +235,8 @@ struct NetNotifiedQueue : NotifiedQueue<T>, FlowReceiver, FastAllocated<NetNotif
 	using FastAllocated<NetNotifiedQueue<T>>::operator delete;

 	NetNotifiedQueue(int futures, int promises) : NotifiedQueue<T>(futures, promises) {}
-	NetNotifiedQueue(int futures, int promises, const Endpoint& remoteEndpoint) : NotifiedQueue<T>(futures, promises), FlowReceiver(remoteEndpoint) {}
+	NetNotifiedQueue(int futures, int promises, const Endpoint& remoteEndpoint)
+	  : NotifiedQueue<T>(futures, promises), FlowReceiver(remoteEndpoint, true) {}

 	virtual void destroy() { delete this; }
 	virtual void receive(ArenaReader& reader) {
@ -281,7 +289,7 @@ public:
 		return reportEndpointFailure(getReplyPromise(value).getFuture(), getEndpoint());
 	}
 	template <class X>
-	Future<REPLY_TYPE(X)> getReply(const X& value, int taskID) const {
+	Future<REPLY_TYPE(X)> getReply(const X& value, TaskPriority taskID) const {
 		setReplyPriority(value, taskID);
 		return getReply(value);
 	}
@ -290,7 +298,7 @@ public:
 		return getReply(ReplyPromise<X>());
 	}
 	template <class X>
-	Future<X> getReplyWithTaskID(int taskID) const {
+	Future<X> getReplyWithTaskID(TaskPriority taskID) const {
 		ReplyPromise<X> reply;
 		reply.getEndpoint(taskID);
 		return getReply(reply);
@ -302,7 +310,7 @@ public:
 	//   If cancelled or returns failure, request was or will be delivered zero or one times.
 	//   The caller must be capable of retrying if this request returns failure
 	template <class X>
-	Future<ErrorOr<REPLY_TYPE(X)>> tryGetReply(const X& value, int taskID) const {
+	Future<ErrorOr<REPLY_TYPE(X)>> tryGetReply(const X& value, TaskPriority taskID) const {
 		setReplyPriority(value, taskID);
 		if (queue->isRemoteEndpoint()) {
 			Future<Void> disc = makeDependent<T>(IFailureMonitor::failureMonitor()).onDisconnectOrFailure(getEndpoint(taskID));
@ -344,7 +352,7 @@ public:
 	//   If it returns failure, the failure detector considers the endpoint failed permanently or for the given amount of time
 	//   See IFailureMonitor::onFailedFor() for an explanation of the duration and slope parameters.
 	template <class X>
-	Future<ErrorOr<REPLY_TYPE(X)>> getReplyUnlessFailedFor(const X& value, double sustainedFailureDuration, double sustainedFailureSlope, int taskID) const {
+	Future<ErrorOr<REPLY_TYPE(X)>> getReplyUnlessFailedFor(const X& value, double sustainedFailureDuration, double sustainedFailureSlope, TaskPriority taskID) const {
 		// If it is local endpoint, no need for failure monitoring
 		return waitValueOrSignal(getReply(value, taskID),
 				makeDependent<T>(IFailureMonitor::failureMonitor()).onFailedFor(getEndpoint(taskID), sustainedFailureDuration, sustainedFailureSlope),
@ -388,8 +396,8 @@ public:
 		//queue = (NetNotifiedQueue<T>*)0xdeadbeef;
 	}

-	Endpoint getEndpoint(int taskID = TaskDefaultEndpoint) const { return queue->getEndpoint(taskID); }
-	void makeWellKnownEndpoint(Endpoint::Token token, int taskID) {
+	Endpoint getEndpoint(TaskPriority taskID = TaskPriority::DefaultEndpoint) const { return queue->getEndpoint(taskID); }
+	void makeWellKnownEndpoint(Endpoint::Token token, TaskPriority taskID) {
 		queue->makeWellKnownEndpoint(token, taskID);
 	}

@ -425,7 +433,10 @@ struct serializable_traits<RequestStream<T>> : std::true_type {
 		} else {
 			const auto& ep = stream.getEndpoint();
 			serializer(ar, ep);
-			UNSTOPPABLE_ASSERT(ep.getPrimaryAddress().isValid());  // No serializing PromiseStreams on a client with no public address
+			if constexpr (Archiver::isSerializing) { // Don't assert this when collecting vtable for flatbuffers
+				UNSTOPPABLE_ASSERT(ep.getPrimaryAddress()
+				                       .isValid()); // No serializing PromiseStreams on a client with no public address
+			}
 		}
 	}
 };
--- a/fdbrpc/genericactors.actor.h
+++ b/fdbrpc/genericactors.actor.h
@ -50,7 +50,7 @@ Future<REPLY_TYPE(Req)> retryBrokenPromise( RequestStream<Req> to, Req request )
 }

 ACTOR template <class Req>
-Future<REPLY_TYPE(Req)> retryBrokenPromise( RequestStream<Req> to, Req request, int taskID ) {
+Future<REPLY_TYPE(Req)> retryBrokenPromise( RequestStream<Req> to, Req request, TaskPriority taskID ) {
 	// Like to.getReply(request), except that a broken_promise exception results in retrying request immediately.
 	// Suitable for use with well known endpoints, which are likely to return to existence after the other process restarts.
 	// Not normally useful for ordinary endpoints, which conventionally are permanently destroyed after replying with broken_promise.
--- a/fdbrpc/sim2.actor.cpp
+++ b/fdbrpc/sim2.actor.cpp
@ -381,8 +381,17 @@ private:
 	ACTOR static Future<Void> trackLeakedConnection( Sim2Conn* self ) {
 		wait( g_simulator.onProcess( self->process ) );
 		// SOMEDAY: Make this value variable? Dependent on buggification status?
-		wait( delay( 20.0 ) );
-		TraceEvent(SevError, "LeakedConnection", self->dbgid).error(connection_leaked()).detail("MyAddr", self->process->address).detail("PeerAddr", self->peerEndpoint).detail("PeerId", self->peerId).detail("Opened", self->opened);
+		if (self->process->address.isPublic()) {
+			wait( delay( FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT * FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT * 1.5 ) );
+		} else {
+			wait( delay( FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT * 1.5 ) );
+		}
+		TraceEvent(SevError, "LeakedConnection", self->dbgid)
+		    .error(connection_leaked())
+		    .detail("MyAddr", self->process->address)
+		    .detail("PeerAddr", self->peerEndpoint)
+		    .detail("PeerId", self->peerId)
+		    .detail("Opened", self->opened);
 		return Void();
 	}
 };
@ -423,7 +432,7 @@ public:
 	ACTOR static Future<Reference<IAsyncFile>> open( std::string filename, int flags, int mode,
 													Reference<DiskParameters> diskParameters = Reference<DiskParameters>(new DiskParameters(25000, 150000000)), bool delayOnWrite = true ) {
 		state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
-		state int currentTaskID = g_network->getCurrentTask();
+		state TaskPriority currentTaskID = g_network->getCurrentTask();

 		if(++openCount >= 3000) {
 			TraceEvent(SevError, "TooManyFiles");
@ -742,11 +751,11 @@ public:
 	// Everything actually network related is delegated to the Sim2Net class; Sim2 is only concerned with simulating machines and time
 	virtual double now() { return time; }

-	virtual Future<class Void> delay( double seconds, int taskID ) {
-		ASSERT(taskID >= TaskMinPriority && taskID <= TaskMaxPriority);
+	virtual Future<class Void> delay( double seconds, TaskPriority taskID ) {
+		ASSERT(taskID >= TaskPriority::Min && taskID <= TaskPriority::Max);
 		return delay( seconds, taskID, currentProcess );
 	}
-	Future<class Void> delay( double seconds, int taskID, ProcessInfo* machine ) {
+	Future<class Void> delay( double seconds, TaskPriority taskID, ProcessInfo* machine ) {
 		ASSERT( seconds >= -0.0001 );
 		seconds = std::max(0.0, seconds);
 		Future<Void> f;
@ -761,13 +770,13 @@ public:

 		return f;
 	}
-	ACTOR static Future<Void> checkShutdown(Sim2 *self, int taskID) {
+	ACTOR static Future<Void> checkShutdown(Sim2 *self, TaskPriority taskID) {
 		wait(success(self->getCurrentProcess()->shutdownSignal.getFuture()));
 		self->setCurrentTask(taskID);
 		return Void();
 	}
-	virtual Future<class Void> yield( int taskID ) {
-		if (taskID == TaskDefaultYield) taskID = currentTaskID;
+	virtual Future<class Void> yield( TaskPriority taskID ) {
+		if (taskID == TaskPriority::DefaultYield) taskID = currentTaskID;
 		if (check_yield(taskID)) {
 			// We want to check that yielders can handle actual time elapsing (it sometimes will outside simulation), but
 			// don't want to prevent instantaneous shutdown of "rebooted" machines.
@ -776,7 +785,7 @@ public:
 		setCurrentTask(taskID);
 		return Void();
 	}
-	virtual bool check_yield( int taskID ) {
+	virtual bool check_yield( TaskPriority taskID ) {
 		if (yielded) return true;
 		if (--yield_limit <= 0) {
 			yield_limit = deterministicRandom()->randomInt(1, 150);  // If yield returns false *too* many times in a row, there could be a stack overflow, since we can't deterministically check stack size as the real network does
@ -784,10 +793,10 @@ public:
 		}
 		return yielded = BUGGIFY_WITH_PROB(0.01);
 	}
-	virtual int getCurrentTask() {
+	virtual TaskPriority getCurrentTask() {
 		return currentTaskID;
 	}
-	virtual void setCurrentTask(int taskID ) {
+	virtual void setCurrentTask(TaskPriority taskID ) {
 		currentTaskID = taskID;
 	}
 	// Sets the taskID/priority of the current task, without yielding
@ -924,7 +933,7 @@ public:
 		}
 		if ( mustBeDurable || deterministicRandom()->random01() < 0.5 ) {
 			state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
-			state int currentTaskID = g_network->getCurrentTask();
+			state TaskPriority currentTaskID = g_network->getCurrentTask();
 			wait( g_simulator.onMachine( currentProcess ) );
 			try {
 				wait( ::delay(0.05 * deterministicRandom()->random01()) );
@ -950,7 +959,7 @@ public:
 	ACTOR static Future<Void> runLoop(Sim2 *self) {
 		state ISimulator::ProcessInfo *callingMachine = self->currentProcess;
 		while ( !self->isStopped ) {
-			wait( self->net2->yield(TaskDefaultYield) );
+			wait( self->net2->yield(TaskPriority::DefaultYield) );

 			self->mutex.enter();
 			if( self->tasks.size() == 0 ) {
@ -1580,23 +1589,23 @@ public:
 		machines.erase(machineId);
 	}

-	Sim2(bool objSerializer) : time(0.0), taskCount(0), yielded(false), yield_limit(0), currentTaskID(-1) {
+	Sim2(bool objSerializer) : time(0.0), taskCount(0), yielded(false), yield_limit(0), currentTaskID(TaskPriority::Zero) {
 		// Not letting currentProcess be NULL eliminates some annoying special cases
 		currentProcess = new ProcessInfo("NoMachine", LocalityData(Optional<Standalone<StringRef>>(), StringRef(), StringRef(), StringRef()), ProcessClass(), {NetworkAddress()}, this, "", "");
 		g_network = net2 = newNet2(false, true, objSerializer);
 		Net2FileSystem::newFileSystem();
-		check_yield(0);
+		check_yield(TaskPriority::Zero);
 	}

 	// Implementation
 	struct Task {
-		int taskID;
+		TaskPriority taskID;
 		double time;
 		uint64_t stable;
 		ProcessInfo* machine;
 		Promise<Void> action;
-		Task( double time, int taskID, uint64_t stable, ProcessInfo* machine, Promise<Void>&& action ) : time(time), taskID(taskID), stable(stable), machine(machine), action(std::move(action)) {}
-		Task( double time, int taskID, uint64_t stable, ProcessInfo* machine, Future<Void>& future ) : time(time), taskID(taskID), stable(stable), machine(machine) { future = action.getFuture(); }
+		Task( double time, TaskPriority taskID, uint64_t stable, ProcessInfo* machine, Promise<Void>&& action ) : time(time), taskID(taskID), stable(stable), machine(machine), action(std::move(action)) {}
+		Task( double time, TaskPriority taskID, uint64_t stable, ProcessInfo* machine, Future<Void>& future ) : time(time), taskID(taskID), stable(stable), machine(machine) { future = action.getFuture(); }
 		Task(Task&& rhs) BOOST_NOEXCEPT : time(rhs.time), taskID(rhs.taskID), stable(rhs.stable), machine(rhs.machine), action(std::move(rhs.action)) {}
 		void operator= ( Task const& rhs ) { taskID = rhs.taskID; time = rhs.time; stable = rhs.stable; machine = rhs.machine; action = rhs.action; }
 		Task( Task const& rhs ) : taskID(rhs.taskID), time(rhs.time), stable(rhs.stable), machine(rhs.machine), action(rhs.action) {}
@ -1643,20 +1652,23 @@ public:
 		}
 	}

-	virtual void onMainThread( Promise<Void>&& signal, int taskID ) {
+	virtual void onMainThread( Promise<Void>&& signal, TaskPriority taskID ) {
 		// This is presumably coming from either a "fake" thread pool thread, i.e. it is actually on this thread
 		// or a thread created with g_network->startThread
 		ASSERT(getCurrentProcess());

 		mutex.enter();
-		ASSERT(taskID >= TaskMinPriority && taskID <= TaskMaxPriority);
+		ASSERT(taskID >= TaskPriority::Min && taskID <= TaskPriority::Max);
 		tasks.push( Task( time, taskID, taskCount++, getCurrentProcess(), std::move(signal) ) );
 		mutex.leave();
 	}
-	virtual Future<Void> onProcess( ISimulator::ProcessInfo *process, int taskID ) {
+	bool isOnMainThread() const override {
+		return net2->isOnMainThread();
+	}
+	virtual Future<Void> onProcess( ISimulator::ProcessInfo *process, TaskPriority taskID ) {
 		return delay( 0, taskID, process );
 	}
-	virtual Future<Void> onMachine( ISimulator::ProcessInfo *process, int taskID ) {
+	virtual Future<Void> onMachine( ISimulator::ProcessInfo *process, TaskPriority taskID ) {
 		if( process->machine == 0 )
 			return Void();
 		return delay( 0, taskID, process->machine->machineProcess );
@ -1665,7 +1677,7 @@ public:
 	//time is guarded by ISimulator::mutex. It is not necessary to guard reads on the main thread because
 	//time should only be modified from the main thread.
 	double time;
-	int currentTaskID;
+	TaskPriority currentTaskID;

 	//taskCount is guarded by ISimulator::mutex
 	uint64_t taskCount;
@ -1695,9 +1707,9 @@ void startNewSimulator(bool objSerializer) {
 }

 ACTOR void doReboot( ISimulator::ProcessInfo *p, ISimulator::KillType kt ) {
-	TraceEvent("RebootingProcessAttempt").detail("ZoneId", p->locality.zoneId()).detail("KillType", kt).detail("Process", p->toString()).detail("StartingClass", p->startingClass.toString()).detail("Failed", p->failed).detail("Excluded", p->excluded).detail("Cleared", p->cleared).detail("Rebooting", p->rebooting).detail("TaskDefaultDelay", TaskDefaultDelay);
+	TraceEvent("RebootingProcessAttempt").detail("ZoneId", p->locality.zoneId()).detail("KillType", kt).detail("Process", p->toString()).detail("StartingClass", p->startingClass.toString()).detail("Failed", p->failed).detail("Excluded", p->excluded).detail("Cleared", p->cleared).detail("Rebooting", p->rebooting).detail("TaskPriorityDefaultDelay", TaskPriority::DefaultDelay);

-	wait( g_sim2.delay( 0, TaskDefaultDelay, p ) ); // Switch to the machine in question
+	wait( g_sim2.delay( 0, TaskPriority::DefaultDelay, p ) ); // Switch to the machine in question

 	try {
 		ASSERT( kt == ISimulator::RebootProcess || kt == ISimulator::Reboot || kt == ISimulator::RebootAndDelete || kt == ISimulator::RebootProcessAndDelete );
--- a/fdbrpc/simulator.h
+++ b/fdbrpc/simulator.h
@ -137,8 +137,8 @@ public:

 	ProcessInfo* getProcess( Endpoint const& endpoint ) { return getProcessByAddress(endpoint.getPrimaryAddress()); }
 	ProcessInfo* getCurrentProcess() { return currentProcess; }
-	virtual Future<Void> onProcess( ISimulator::ProcessInfo *process, int taskID = -1 ) = 0;
-	virtual Future<Void> onMachine( ISimulator::ProcessInfo *process, int taskID = -1 ) = 0;
+	virtual Future<Void> onProcess( ISimulator::ProcessInfo *process, TaskPriority taskID = TaskPriority::Zero ) = 0;
+	virtual Future<Void> onMachine( ISimulator::ProcessInfo *process, TaskPriority taskID = TaskPriority::Zero ) = 0;

 	virtual ProcessInfo* newProcess(const char* name, IPAddress ip, uint16_t port, uint16_t listenPerProcess,
 	                                LocalityData locality, ProcessClass startingClass, const char* dataFolder,
--- a/fdbserver/CMakeLists.txt
+++ b/fdbserver/CMakeLists.txt
@ -119,6 +119,7 @@ set(FDBSERVER_SRCS
  workloads/DiskDurability.actor.cpp
  workloads/DiskDurabilityTest.actor.cpp
  workloads/DummyWorkload.actor.cpp
+  workloads/ExternalWorkload.actor.cpp
  workloads/FastTriggeredWatches.actor.cpp
  workloads/FileSystem.actor.cpp
  workloads/Fuzz.cpp
@ -192,26 +193,15 @@ if(NOT WIN32)
  target_compile_options(fdb_sqlite BEFORE PRIVATE -w) # disable warnings for third party
 endif()

-set(java_workload_docstring "Build the Java workloads (makes fdbserver link against JNI)")
-set(WITH_JAVA_WORKLOAD OFF CACHE BOOL "${java_workload_docstring}")
-if(WITH_JAVA_WORKLOAD)
-  list(APPEND FDBSERVER_SRCS workloads/JavaWorkload.actor.cpp)
-endif()
-
 file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/workloads)

 add_flow_target(EXECUTABLE NAME fdbserver SRCS ${FDBSERVER_SRCS})
 target_include_directories(fdbserver PRIVATE
+  ${CMAKE_SOURCE_DIR}/bindings/c
+  ${CMAKE_BINARY_DIR}/bindings/c
  ${CMAKE_CURRENT_BINARY_DIR}/workloads
  ${CMAKE_CURRENT_SOURCE_DIR}/workloads)
 target_link_libraries(fdbserver PRIVATE fdbclient fdb_sqlite)
-if(WITH_JAVA_WORKLOAD)
-  if(NOT JNI_FOUND)
-    message(SEND_ERROR "Trying to build Java workload but couldn't find JNI")
-  endif()
-  target_include_directories(fdbserver PRIVATE "${JNI_INCLUDE_DIRS}")
-  target_link_libraries(fdbserver PRIVATE "${JNI_LIBRARIES}")
-endif()
 if (GPERFTOOLS_FOUND)
  add_compile_definitions(USE_GPERFTOOLS)
  target_link_libraries(fdbserver PRIVATE gperftools)
--- a/fdbserver/ClusterController.actor.cpp
+++ b/fdbserver/ClusterController.actor.cpp
@ -107,7 +107,7 @@ public:
 		DBInfo() : masterRegistrationCount(0), recoveryStalled(false), forceRecovery(false), unfinishedRecoveries(0), logGenerations(0),
 			clientInfo( new AsyncVar<ClientDBInfo>( ClientDBInfo() ) ),
 			serverInfo( new AsyncVar<ServerDBInfo>( ServerDBInfo() ) ),
-			db( DatabaseContext::create( clientInfo, Future<Void>(), LocalityData(), true, TaskDefaultEndpoint, true ) )  // SOMEDAY: Locality!
+			db( DatabaseContext::create( clientInfo, Future<Void>(), LocalityData(), true, TaskPriority::DefaultEndpoint, true ) )  // SOMEDAY: Locality!
 		{
 		}

@ -1171,7 +1171,7 @@ public:
 		serverInfo.clusterInterface = ccInterface;
 		serverInfo.myLocality = locality;
 		db.serverInfo->set( serverInfo );
-		cx = openDBOnServer(db.serverInfo, TaskDefaultEndpoint, true, true);
+		cx = openDBOnServer(db.serverInfo, TaskPriority::DefaultEndpoint, true, true);
 	}

 	~ClusterControllerData() {
@ -1425,7 +1425,7 @@ void checkBetterDDOrRK(ClusterControllerData* self) {
 			rkFitness = ProcessClass::ExcludeFit;
 		}
 		if (self->isProxyOrResolver(rkWorker.details.interf.locality.processId()) || rkFitness > bestFitnessForRK) {
-			TraceEvent("CC_HaltRK", self->id).detail("RKID", db.ratekeeper.get().id())
+			TraceEvent("CCHaltRK", self->id).detail("RKID", db.ratekeeper.get().id())
 			.detail("Excluded", rkWorker.priorityInfo.isExcluded)
 			.detail("Fitness", rkFitness).detail("BestFitness", bestFitnessForRK);
 			self->recruitRatekeeper.set(true);
@ -1439,7 +1439,7 @@ void checkBetterDDOrRK(ClusterControllerData* self) {
 			ddFitness = ProcessClass::ExcludeFit;
 		}
 		if (self->isProxyOrResolver(ddWorker.details.interf.locality.processId()) || ddFitness > bestFitnessForDD) {
-			TraceEvent("CC_HaltDD", self->id).detail("DDID", db.distributor.get().id())
+			TraceEvent("CCHaltDD", self->id).detail("DDID", db.distributor.get().id())
 			.detail("Excluded", ddWorker.priorityInfo.isExcluded)
 			.detail("Fitness", ddFitness).detail("BestFitness", bestFitnessForDD);
 			ddWorker.haltDistributor = brokenPromiseToNever(db.distributor.get().haltDataDistributor.getReply(HaltDataDistributorRequest(self->id)));
@ -1920,13 +1920,13 @@ void registerWorker( RegisterWorkerRequest req, ClusterControllerData *self ) {
 			self->clusterControllerDcId == req.distributorInterf.get().locality.dcId() &&
 			!self->recruitingDistributor) {
 		const DataDistributorInterface& di = req.distributorInterf.get();
-		TraceEvent("CC_RegisterDataDistributor", self->id).detail("DDID", di.id());
+		TraceEvent("CCRegisterDataDistributor", self->id).detail("DDID", di.id());
 		self->db.setDistributor(di);
 	}
 	if (req.ratekeeperInterf.present()) {
 		if((self->recruitingRatekeeperID.present() && self->recruitingRatekeeperID.get() != req.ratekeeperInterf.get().id()) ||
 			self->clusterControllerDcId != w.locality.dcId()) {
-				TraceEvent("CC_HaltRegisteringRatekeeper", self->id).detail("RKID", req.ratekeeperInterf.get().id())
+				TraceEvent("CCHaltRegisteringRatekeeper", self->id).detail("RKID", req.ratekeeperInterf.get().id())
 			.detail("DcID", printable(self->clusterControllerDcId))
 			.detail("ReqDcID", printable(w.locality.dcId()))
 			.detail("RecruitingRKID", self->recruitingRatekeeperID.present() ? self->recruitingRatekeeperID.get() : UID());
@ -1934,9 +1934,9 @@ void registerWorker( RegisterWorkerRequest req, ClusterControllerData *self ) {
 		} else if(!self->recruitingRatekeeperID.present()) {
 			const RatekeeperInterface& rki = req.ratekeeperInterf.get();
 			const auto& ratekeeper = self->db.serverInfo->get().ratekeeper;
-			TraceEvent("CC_RegisterRatekeeper", self->id).detail("RKID", rki.id());
+			TraceEvent("CCRegisterRatekeeper", self->id).detail("RKID", rki.id());
 			if (ratekeeper.present() && ratekeeper.get().id() != rki.id() && self->id_worker.count(ratekeeper.get().locality.processId())) {
-				TraceEvent("CC_HaltPreviousRatekeeper", self->id).detail("RKID", ratekeeper.get().id())
+				TraceEvent("CCHaltPreviousRatekeeper", self->id).detail("RKID", ratekeeper.get().id())
 				.detail("DcID", printable(self->clusterControllerDcId))
 				.detail("ReqDcID", printable(w.locality.dcId()))
 				.detail("RecruitingRKID", self->recruitingRatekeeperID.present() ? self->recruitingRatekeeperID.get() : UID());
@ -2475,7 +2475,7 @@ ACTOR Future<Void> handleForcedRecoveries( ClusterControllerData *self, ClusterC
 ACTOR Future<DataDistributorInterface> startDataDistributor( ClusterControllerData *self ) {
 	wait(delay(0.0));  // If master fails at the same time, give it a chance to clear master PID.

-	TraceEvent("CC_StartDataDistributor", self->id);
+	TraceEvent("CCStartDataDistributor", self->id);
 	loop {
 		try {
 			state bool no_distributor = !self->db.serverInfo->get().distributor.present();
@ -2494,16 +2494,16 @@ ACTOR Future<DataDistributorInterface> startDataDistributor( ClusterControllerDa
 			}
 			
 			InitializeDataDistributorRequest req(deterministicRandom()->randomUniqueID());
-			TraceEvent("CC_DataDistributorRecruit", self->id).detail("Addr", worker.interf.address());
+			TraceEvent("CCDataDistributorRecruit", self->id).detail("Addr", worker.interf.address());

 			ErrorOr<DataDistributorInterface> distributor = wait( worker.interf.dataDistributor.getReplyUnlessFailedFor(req, SERVER_KNOBS->WAIT_FOR_DISTRIBUTOR_JOIN_DELAY, 0) );
 			if (distributor.present()) {
-				TraceEvent("CC_DataDistributorRecruited", self->id).detail("Addr", worker.interf.address());
+				TraceEvent("CCDataDistributorRecruited", self->id).detail("Addr", worker.interf.address());
 				return distributor.get();
 			}
 		}
 		catch (Error& e) {
-			TraceEvent("CC_DataDistributorRecruitError", self->id).error(e);
+			TraceEvent("CCDataDistributorRecruitError", self->id).error(e);
 			if ( e.code() != error_code_no_more_servers ) {
 				throw;
 			}
@ -2520,7 +2520,7 @@ ACTOR Future<Void> monitorDataDistributor(ClusterControllerData *self) {
 	loop {
 		if ( self->db.serverInfo->get().distributor.present() ) {
 			wait( waitFailureClient( self->db.serverInfo->get().distributor.get().waitFailure, SERVER_KNOBS->DD_FAILURE_TIME ) );
-			TraceEvent("CC_DataDistributorDied", self->id)
+			TraceEvent("CCDataDistributorDied", self->id)
 			.detail("DistributorId", self->db.serverInfo->get().distributor.get().id());
 			self->db.clearInterf(ProcessClass::DataDistributorClass);
 		} else {
@ -2535,7 +2535,7 @@ ACTOR Future<Void> monitorDataDistributor(ClusterControllerData *self) {
 ACTOR Future<Void> startRatekeeper(ClusterControllerData *self) {
 	wait(delay(0.0));  // If master fails at the same time, give it a chance to clear master PID.

-	TraceEvent("CC_StartRatekeeper", self->id);
+	TraceEvent("CCStartRatekeeper", self->id);
 	loop {
 		try {
 			state bool no_ratekeeper = !self->db.serverInfo->get().ratekeeper.present();
@ -2556,16 +2556,16 @@ ACTOR Future<Void> startRatekeeper(ClusterControllerData *self) {
 			}

 			self->recruitingRatekeeperID = req.reqId;
-			TraceEvent("CC_RecruitRatekeeper", self->id).detail("Addr", worker.interf.address()).detail("RKID", req.reqId);
+			TraceEvent("CCRecruitRatekeeper", self->id).detail("Addr", worker.interf.address()).detail("RKID", req.reqId);

 			ErrorOr<RatekeeperInterface> interf = wait( worker.interf.ratekeeper.getReplyUnlessFailedFor(req, SERVER_KNOBS->WAIT_FOR_RATEKEEPER_JOIN_DELAY, 0) );
 			if (interf.present()) {
 				self->recruitRatekeeper.set(false);
 				self->recruitingRatekeeperID = interf.get().id();
 				const auto& ratekeeper = self->db.serverInfo->get().ratekeeper;
-				TraceEvent("CC_RatekeeperRecruited", self->id).detail("Addr", worker.interf.address()).detail("RKID", interf.get().id());
+				TraceEvent("CCRatekeeperRecruited", self->id).detail("Addr", worker.interf.address()).detail("RKID", interf.get().id());
 				if (ratekeeper.present() && ratekeeper.get().id() != interf.get().id() && self->id_worker.count(ratekeeper.get().locality.processId())) {
-					TraceEvent("CC_HaltRatekeeperAfterRecruit", self->id).detail("RKID", ratekeeper.get().id())
+					TraceEvent("CCHaltRatekeeperAfterRecruit", self->id).detail("RKID", ratekeeper.get().id())
 					.detail("DcID", printable(self->clusterControllerDcId));
 					self->id_worker[ratekeeper.get().locality.processId()].haltRatekeeper = brokenPromiseToNever(ratekeeper.get().haltRatekeeper.getReply(HaltRatekeeperRequest(self->id)));
 				}
@ -2577,7 +2577,7 @@ ACTOR Future<Void> startRatekeeper(ClusterControllerData *self) {
 			}
 		}
 		catch (Error& e) {
-			TraceEvent("CC_RatekeeperRecruitError", self->id).error(e);
+			TraceEvent("CCRatekeeperRecruitError", self->id).error(e);
 			if ( e.code() != error_code_no_more_servers ) {
 				throw;
 			}
@ -2595,7 +2595,7 @@ ACTOR Future<Void> monitorRatekeeper(ClusterControllerData *self) {
 		if ( self->db.serverInfo->get().ratekeeper.present() && !self->recruitRatekeeper.get() ) {
 			choose {
 				when(wait(waitFailureClient( self->db.serverInfo->get().ratekeeper.get().waitFailure, SERVER_KNOBS->RATEKEEPER_FAILURE_TIME )))  {
-					TraceEvent("CC_RatekeeperDied", self->id)
+					TraceEvent("CCRatekeeperDied", self->id)
 					.detail("RKID", self->db.serverInfo->get().ratekeeper.get().id());
 					self->db.clearInterf(ProcessClass::RatekeeperClass);
 				}
--- a/fdbserver/ClusterRecruitmentInterface.h
+++ b/fdbserver/ClusterRecruitmentInterface.h
@ -63,13 +63,13 @@ struct ClusterControllerFullInterface {

 	void initEndpoints() {
 		clientInterface.initEndpoints();
-		recruitFromConfiguration.getEndpoint( TaskClusterController );
-		recruitRemoteFromConfiguration.getEndpoint( TaskClusterController );
-		recruitStorage.getEndpoint( TaskClusterController );
-		registerWorker.getEndpoint( TaskClusterController );
-		getWorkers.getEndpoint( TaskClusterController );
-		registerMaster.getEndpoint( TaskClusterController );
-		getServerDBInfo.getEndpoint( TaskClusterController );
+		recruitFromConfiguration.getEndpoint( TaskPriority::ClusterController );
+		recruitRemoteFromConfiguration.getEndpoint( TaskPriority::ClusterController );
+		recruitStorage.getEndpoint( TaskPriority::ClusterController );
+		registerWorker.getEndpoint( TaskPriority::ClusterController );
+		getWorkers.getEndpoint( TaskPriority::ClusterController );
+		registerMaster.getEndpoint( TaskPriority::ClusterController );
+		getServerDBInfo.getEndpoint( TaskPriority::ClusterController );
 	}

 	template <class Ar>
--- a/fdbserver/Coordination.actor.cpp
+++ b/fdbserver/Coordination.actor.cpp
@ -20,8 +20,9 @@

 #include "fdbserver/CoordinationInterface.h"
 #include "fdbserver/IKeyValueStore.h"
-#include "flow/ActorCollection.h"
 #include "fdbserver/Knobs.h"
+#include "fdbserver/WorkerInterface.actor.h"
+#include "flow/ActorCollection.h"
 #include "flow/UnitTest.h"
 #include "flow/IndexedSet.h"
 #include "flow/actorcompiler.h"  // This must be the last #include.
@ -52,8 +53,8 @@ GenerationRegInterface::GenerationRegInterface( NetworkAddress remote )

 GenerationRegInterface::GenerationRegInterface( INetwork* local )
 {
-	read.makeWellKnownEndpoint( WLTOKEN_GENERATIONREG_READ, TaskCoordination );
-	write.makeWellKnownEndpoint( WLTOKEN_GENERATIONREG_WRITE, TaskCoordination );
+	read.makeWellKnownEndpoint( WLTOKEN_GENERATIONREG_READ, TaskPriority::Coordination );
+	write.makeWellKnownEndpoint( WLTOKEN_GENERATIONREG_WRITE, TaskPriority::Coordination );
 }

 LeaderElectionRegInterface::LeaderElectionRegInterface(NetworkAddress remote)
@ -67,9 +68,9 @@ LeaderElectionRegInterface::LeaderElectionRegInterface(NetworkAddress remote)
 LeaderElectionRegInterface::LeaderElectionRegInterface(INetwork* local) 
 	: ClientLeaderRegInterface(local)
 {
-	candidacy.makeWellKnownEndpoint( WLTOKEN_LEADERELECTIONREG_CANDIDACY, TaskCoordination );
-	leaderHeartbeat.makeWellKnownEndpoint( WLTOKEN_LEADERELECTIONREG_LEADERHEARTBEAT, TaskCoordination );
-	forward.makeWellKnownEndpoint( WLTOKEN_LEADERELECTIONREG_FORWARD, TaskCoordination );
+	candidacy.makeWellKnownEndpoint( WLTOKEN_LEADERELECTIONREG_CANDIDACY, TaskPriority::Coordination );
+	leaderHeartbeat.makeWellKnownEndpoint( WLTOKEN_LEADERELECTIONREG_LEADERHEARTBEAT, TaskPriority::Coordination );
+	forward.makeWellKnownEndpoint( WLTOKEN_LEADERELECTIONREG_FORWARD, TaskPriority::Coordination );
 }

 ServerCoordinators::ServerCoordinators( Reference<ClusterConnectionFile> cf )
@ -360,11 +361,11 @@ struct LeaderRegisterCollection {
 		return Void();
 	}

-	LeaderElectionRegInterface& getInterface(KeyRef key) {
+	LeaderElectionRegInterface& getInterface(KeyRef key, UID id) {
 		auto i = registerInterfaces.find( key );
 		if (i == registerInterfaces.end()) {
 			Key k = key;
-			Future<Void> a = wrap(this, k, leaderRegister(registerInterfaces[k], k) );
+			Future<Void> a = wrap(this, k, leaderRegister(registerInterfaces[k], k), id);
 			if (a.isError()) throw a.getError();
 			ASSERT( !a.isReady() );
 			actors.add( a );
@ -374,11 +375,15 @@ struct LeaderRegisterCollection {
 		return i->value;
 	}

-	ACTOR static Future<Void> wrap( LeaderRegisterCollection* self, Key key, Future<Void> actor ) {
+	ACTOR static Future<Void> wrap( LeaderRegisterCollection* self, Key key, Future<Void> actor, UID id ) {
 		state Error e;
 		try { 
+			// FIXME: Get worker ID here
+			startRole(Role::COORDINATOR, id, UID());
 			wait(actor); 
+			endRole(Role::COORDINATOR, id, "Coordinator changed");
 		} catch (Error& err) {
+			endRole(Role::COORDINATOR, id, err.what(), err.code() == error_code_actor_cancelled, err);
 			if (err.code() == error_code_actor_cancelled)
 				throw;
 			e = err;
@ -392,7 +397,7 @@ struct LeaderRegisterCollection {

 // leaderServer multiplexes multiple leaderRegisters onto a single LeaderElectionRegInterface,
 // creating and destroying them on demand.
-ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf, OnDemandStore *pStore) {
+ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf, OnDemandStore *pStore, UID id) {
 	state LeaderRegisterCollection regs( pStore );
 	state ActorCollection forwarders(false);

@ -404,21 +409,21 @@ ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf, OnDemandStore
 			if( forward.present() )
 				req.reply.send( forward.get() );
 			else
-				regs.getInterface(req.key).getLeader.send( req );
+				regs.getInterface(req.key, id).getLeader.send( req );
 		}
 		when ( CandidacyRequest req = waitNext( interf.candidacy.getFuture() ) ) {
 			Optional<LeaderInfo> forward = regs.getForward(req.key);
 			if( forward.present() )
 				req.reply.send( forward.get() );
 			else
-				regs.getInterface(req.key).candidacy.send(req);
+				regs.getInterface(req.key, id).candidacy.send(req);
 		}
 		when ( LeaderHeartbeatRequest req = waitNext( interf.leaderHeartbeat.getFuture() ) ) {
 			Optional<LeaderInfo> forward = regs.getForward(req.key);
 			if( forward.present() )
 				req.reply.send( false );
 			else
-				regs.getInterface(req.key).leaderHeartbeat.send(req);
+				regs.getInterface(req.key, id).leaderHeartbeat.send(req);
 		}
 		when ( ForwardRequest req = waitNext( interf.forward.getFuture() ) ) {
 			Optional<LeaderInfo> forward = regs.getForward(req.key);
@ -426,7 +431,7 @@ ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf, OnDemandStore
 				req.reply.send( Void() );
 			else {
 				forwarders.add( LeaderRegisterCollection::setForward( &regs, req.key, ClusterConnectionString(req.conn.toString()) ) );
-				regs.getInterface(req.key).forward.send(req);
+				regs.getInterface(req.key, id).forward.send(req);
 			}
 		}
 		when( wait( forwarders.getResult() ) ) { ASSERT(false); throw internal_error(); }
@ -442,7 +447,7 @@ ACTOR Future<Void> coordinationServer(std::string dataFolder) {
 	TraceEvent("CoordinationServer", myID).detail("MyInterfaceAddr", myInterface.read.getEndpoint().getPrimaryAddress()).detail("Folder", dataFolder);

 	try {
-		wait( localGenerationReg(myInterface, &store) || leaderServer(myLeaderInterface, &store) || store.getError() );
+		wait( localGenerationReg(myInterface, &store) || leaderServer(myLeaderInterface, &store, myID) || store.getError() );
 		throw internal_error();
 	} catch (Error& e) {
 		TraceEvent("CoordinationServerError", myID).error(e, true);
--- a/fdbserver/CoroFlow.actor.cpp
+++ b/fdbserver/CoroFlow.actor.cpp
@ -263,7 +263,7 @@ typedef WorkPool<Coroutine, ThreadUnsafeSpinLock, true> CoroPool;



-ACTOR void coroSwitcher( Future<Void> what, int taskID, Coro* coro ) {
+ACTOR void coroSwitcher( Future<Void> what, TaskPriority taskID, Coro* coro ) {
 	try {
 		// state double t = now();
 		wait(what);
--- a/fdbserver/DataDistribution.actor.cpp
+++ b/fdbserver/DataDistribution.actor.cpp
@ -88,7 +88,7 @@ struct TCMachineInfo : public ReferenceCounted<TCMachineInfo> {

 ACTOR Future<Void> updateServerMetrics( TCServerInfo *server ) {
 	state StorageServerInterface ssi = server->lastKnownInterface;
-	state Future<ErrorOr<GetPhysicalMetricsReply>> metricsRequest = ssi.getPhysicalMetrics.tryGetReply( GetPhysicalMetricsRequest(), TaskDataDistributionLaunch );
+	state Future<ErrorOr<GetPhysicalMetricsReply>> metricsRequest = ssi.getPhysicalMetrics.tryGetReply( GetPhysicalMetricsRequest(), TaskPriority::DataDistributionLaunch );
 	state Future<Void> resetRequest = Never();
 	state Future<std::pair<StorageServerInterface, ProcessClass>> interfaceChanged( server->onInterfaceChanged );
 	state Future<Void> serverRemoved( server->onRemoved );
@ -104,7 +104,7 @@ ACTOR Future<Void> updateServerMetrics( TCServerInfo *server ) {
 					return Void();
 				}
 				metricsRequest = Never();
-				resetRequest = delay( SERVER_KNOBS->METRIC_DELAY, TaskDataDistributionLaunch );
+				resetRequest = delay( SERVER_KNOBS->METRIC_DELAY, TaskPriority::DataDistributionLaunch );
 			}
 			when( std::pair<StorageServerInterface,ProcessClass> _ssi = wait( interfaceChanged ) ) {
 				ssi = _ssi.first;
@ -120,7 +120,7 @@ ACTOR Future<Void> updateServerMetrics( TCServerInfo *server ) {
 				}
 				else {
 					resetRequest = Never();
-					metricsRequest = ssi.getPhysicalMetrics.tryGetReply( GetPhysicalMetricsRequest(), TaskDataDistributionLaunch );
+					metricsRequest = ssi.getPhysicalMetrics.tryGetReply( GetPhysicalMetricsRequest(), TaskPriority::DataDistributionLaunch );
 				}
 			}
 		}
@ -635,9 +635,9 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 	    shardsAffectedByTeamFailure(shardsAffectedByTeamFailure), doBuildTeams(true), teamBuilder(Void()),
 	    badTeamRemover(Void()), redundantTeamRemover(Void()), configuration(configuration),
 	    readyToStart(readyToStart), clearHealthyZoneFuture(Void()),
-	    checkTeamDelay(delay(SERVER_KNOBS->CHECK_TEAM_DELAY, TaskDataDistribution)),
+	    checkTeamDelay(delay(SERVER_KNOBS->CHECK_TEAM_DELAY, TaskPriority::DataDistribution)),
 	    initialFailureReactionDelay(
-	        delayed(readyToStart, SERVER_KNOBS->INITIAL_FAILURE_REACTION_DELAY, TaskDataDistribution)),
+	        delayed(readyToStart, SERVER_KNOBS->INITIAL_FAILURE_REACTION_DELAY, TaskPriority::DataDistribution)),
 	    healthyTeamCount(0), storageServerSet(new LocalityMap<UID>()),
 	    initializationDoneActor(logOnCompletion(readyToStart && initialFailureReactionDelay, this)),
 	    optimalTeamCount(0), recruitingStream(0), restartRecruiting(SERVER_KNOBS->DEBOUNCE_RECRUITING_DELAY),
@ -671,7 +671,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {

 	ACTOR static Future<Void> logOnCompletion( Future<Void> signal, DDTeamCollection* self ) {
 		wait(signal);
-		wait(delay(SERVER_KNOBS->LOG_ON_COMPLETION_DELAY, TaskDataDistribution));
+		wait(delay(SERVER_KNOBS->LOG_ON_COMPLETION_DELAY, TaskPriority::DataDistribution));

 		if(!self->primary || self->configuration.usableRegions == 1) {
 			TraceEvent("DDTrackerStarting", self->distributorId)
@ -1309,7 +1309,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 	// Five steps to create each machine team, which are document in the function
 	// Reuse ReplicationPolicy selectReplicas func to select machine team
 	// return number of added machine teams
-	int addBestMachineTeams(int targetMachineTeamsToBuild) {
+	int addBestMachineTeams(int targetMachineTeamsToBuild, int remainingMachineTeamBudget) {
 		int addedMachineTeams = 0;
 		int machineTeamsToBuild = 0;

@ -1327,7 +1327,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {

 		int loopCount = 0;
 		// Add a team in each iteration
-		while (addedMachineTeams < machineTeamsToBuild) {
+		while (addedMachineTeams < machineTeamsToBuild || addedMachineTeams < remainingMachineTeamBudget) {
 			// Step 2: Get least used machines from which we choose machines as a machine team
 			std::vector<Reference<TCMachineInfo>> leastUsedMachines; // A less used machine has less number of teams
 			int minTeamCount = std::numeric_limits<int>::max();
@ -1377,6 +1377,8 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 				// that have the least-utilized server
 				team.clear();
 				auto success = machineLocalityMap.selectReplicas(configuration.storagePolicy, forcedAttributes, team);
+				// NOTE: selectReplicas() should always return success when storageTeamSize = 1
+				ASSERT_WE_THINK(configuration.storageTeamSize > 1 || (configuration.storageTeamSize == 1 && success));
 				if (!success) {
 					break;
 				}
@ -1430,6 +1432,9 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {

 				addMachineTeam(machines);
 				addedMachineTeams++;
+				// Update the remaining machine team budget because the budget may decrease by
+				// any value between 1 and storageTeamSize
+				remainingMachineTeamBudget = getRemainingMachineTeamBudget();
 			} else {
 				TraceEvent(SevWarn, "DataDistributionBuildTeams", distributorId)
 				    .detail("Primary", primary)
@ -1589,6 +1594,32 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 		return totalHealthyMachineCount;
 	}

+	std::pair<int64_t, int64_t> calculateMinMaxServerTeamNumOnServer() {
+		int64_t minTeamNumber = std::numeric_limits<int64_t>::max();
+		int64_t maxTeamNumber = 0;
+		for (auto& server : server_info) {
+			if (server_status.get(server.first).isUnhealthy()) {
+				continue;
+			}
+			minTeamNumber = std::min((int64_t) server.second->teams.size(), minTeamNumber);
+			maxTeamNumber = std::max((int64_t) server.second->teams.size(), maxTeamNumber);
+		}
+		return std::make_pair(minTeamNumber, maxTeamNumber);
+	}
+
+	std::pair<int64_t, int64_t> calculateMinMaxMachineTeamNumOnMachine() {
+		int64_t minTeamNumber = std::numeric_limits<int64_t>::max();
+		int64_t maxTeamNumber = 0;
+		for (auto& machine : machine_info) {
+			if (!isMachineHealthy(machine.second)) {
+				continue;
+			}
+			minTeamNumber = std::min<int64_t>((int64_t) machine.second->machineTeams.size(), minTeamNumber);
+			maxTeamNumber = std::max<int64_t>((int64_t) machine.second->machineTeams.size(), maxTeamNumber);
+		}
+		return std::make_pair(minTeamNumber, maxTeamNumber);
+	}
+
 	// Sanity check
 	bool isServerTeamNumberCorrect(Reference<TCMachineTeamInfo>& mt) {
 		int num = 0;
@ -1639,12 +1670,41 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 		return healthyTeamCount;
 	}

+	// Each machine is expected to have SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER,
+	// remainingMachineTeamBudget is the number of machine teams needed to ensure every machine has
+	// SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER teams
+	int getRemainingMachineTeamBudget() {
+		int remainingMachineTeamBudget = 0;
+		for (auto& m : machine_info) {
+			int machineTeamCount = m.second->machineTeams.size();
+			remainingMachineTeamBudget += std::max(0, (int)(SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER - machineTeamCount));
+		}
+
+		// We over-provision the remainingMachineTeamBudget because we do not know, when a new machine team is built,
+		// how many times it can be counted into the budget. For example, when a new machine is added,
+		// a new machine team only consume 1 such budget
+		return remainingMachineTeamBudget;
+	}
+
+	// Each server is expected to have SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER,
+	int getRemainingServerTeamBudget() {
+		// remainingTeamBudget is the number of teams needed to ensure every server has
+		// SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER teams
+		int remainingTeamBudget = 0;
+		for (auto& s : server_info) {
+			int numValidTeams = s.second->teams.size();
+			remainingTeamBudget += std::max(0, (int)(SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER - numValidTeams));
+		}
+
+		return remainingTeamBudget;
+	}
+
 	// Create server teams based on machine teams
 	// Before the number of machine teams reaches the threshold, build a machine team for each server team
 	// When it reaches the threshold, first try to build a server team with existing machine teams; if failed,
 	// build an extra machine team and record the event in trace
-	int addTeamsBestOf(int teamsToBuild, int desiredTeamNumber, int maxTeamNumber) {
-		ASSERT(teamsToBuild > 0);
+	int addTeamsBestOf(int teamsToBuild, int desiredTeamNumber, int maxTeamNumber, int remainingTeamBudget) {
+		ASSERT(teamsToBuild >= 0);
 		ASSERT_WE_THINK(machine_info.size() > 0 || server_info.size() == 0);

 		int addedMachineTeams = 0;
@ -1655,27 +1715,28 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 		// When we change configuration, we may have machine teams with storageTeamSize in the old configuration.
 		int healthyMachineTeamCount = getHealthyMachineTeamCount();
 		int totalMachineTeamCount = machineTeams.size();
-
 		int totalHealthyMachineCount = calculateHealthyMachineCount();
+		int remainingMachineTeamBudget = getRemainingMachineTeamBudget();

 		int desiredMachineTeams = SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * totalHealthyMachineCount;
 		int maxMachineTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * totalHealthyMachineCount;
 		// machineTeamsToBuild mimics how the teamsToBuild is calculated in buildTeams()
-		int machineTeamsToBuild =
-		    std::min(desiredMachineTeams - healthyMachineTeamCount, maxMachineTeams - totalMachineTeamCount);
+		int machineTeamsToBuild = std::max(
+		    0, std::min(desiredMachineTeams - healthyMachineTeamCount, maxMachineTeams - totalMachineTeamCount));

 		TraceEvent("BuildMachineTeams")
 		    .detail("TotalHealthyMachine", totalHealthyMachineCount)
 		    .detail("HealthyMachineTeamCount", healthyMachineTeamCount)
 		    .detail("DesiredMachineTeams", desiredMachineTeams)
 		    .detail("MaxMachineTeams", maxMachineTeams)
-		    .detail("MachineTeamsToBuild", machineTeamsToBuild);
+		    .detail("MachineTeamsToBuild", machineTeamsToBuild)
+		    .detail("RemainingMachineTeamBudget", remainingMachineTeamBudget);
 		// Pre-build all machine teams until we have the desired number of machine teams
-		if (machineTeamsToBuild > 0) {
-			addedMachineTeams = addBestMachineTeams(machineTeamsToBuild);
+		if (machineTeamsToBuild > 0 || remainingMachineTeamBudget > 0) {
+			addedMachineTeams = addBestMachineTeams(machineTeamsToBuild, remainingMachineTeamBudget);
 		}

-		while (addedTeams < teamsToBuild) {
+		while (addedTeams < teamsToBuild || addedTeams < remainingTeamBudget) {
 			// Step 1: Create 1 best machine team
 			std::vector<UID> bestServerTeam;
 			int bestScore = std::numeric_limits<int>::max();
@ -1752,6 +1813,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 			// Step 4: Add the server team
 			addTeam(bestServerTeam.begin(), bestServerTeam.end(), false);
 			addedTeams++;
+			remainingTeamBudget = getRemainingServerTeamBudget();

 			if (++loopCount > 2 * teamsToBuild * (configuration.storageTeamSize + 1)) {
 				break;
@ -1760,10 +1822,14 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {

 		healthyMachineTeamCount = getHealthyMachineTeamCount();

+		std::pair<uint64_t, uint64_t> minMaxTeamNumberOnServer = calculateMinMaxServerTeamNumOnServer();
+		std::pair<uint64_t, uint64_t> minMaxMachineTeamNumberOnMachine = calculateMinMaxMachineTeamNumOnMachine();
+
 		TraceEvent("TeamCollectionInfo", distributorId)
 		    .detail("Primary", primary)
 		    .detail("AddedTeamNumber", addedTeams)
 		    .detail("AimToBuildTeamNumber", teamsToBuild)
+		    .detail("RemainingTeamBudget", remainingTeamBudget)
 		    .detail("CurrentTeamNumber", teams.size())
 		    .detail("DesiredTeamNumber", desiredTeamNumber)
 		    .detail("MaxTeamNumber", maxTeamNumber)
@ -1773,6 +1839,11 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 		    .detail("DesiredMachineTeams", desiredMachineTeams)
 		    .detail("MaxMachineTeams", maxMachineTeams)
 		    .detail("TotalHealthyMachine", totalHealthyMachineCount)
+		    .detail("MinTeamNumberOnServer", minMaxTeamNumberOnServer.first)
+		    .detail("MaxTeamNumberOnServer", minMaxTeamNumberOnServer.second)
+		    .detail("MinMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.first)
+		    .detail("MaxMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.second)
+		    .detail("DoBuildTeams", doBuildTeams)
 		    .trackLatest("TeamCollectionInfo");

 		return addedTeams;
@ -1789,10 +1860,14 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 		int maxMachineTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * totalHealthyMachineCount;
 		int healthyMachineTeamCount = getHealthyMachineTeamCount();

+		std::pair<uint64_t, uint64_t> minMaxTeamNumberOnServer = calculateMinMaxServerTeamNumOnServer();
+		std::pair<uint64_t, uint64_t> minMaxMachineTeamNumberOnMachine = calculateMinMaxMachineTeamNumOnMachine();
+
 		TraceEvent("TeamCollectionInfo", distributorId)
 		    .detail("Primary", primary)
 		    .detail("AddedTeamNumber", 0)
 		    .detail("AimToBuildTeamNumber", 0)
+		    .detail("RemainingTeamBudget", 0)
 		    .detail("CurrentTeamNumber", teams.size())
 		    .detail("DesiredTeamNumber", desiredServerTeams)
 		    .detail("MaxTeamNumber", maxServerTeams)
@ -1802,14 +1877,22 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 		    .detail("DesiredMachineTeams", desiredMachineTeams)
 		    .detail("MaxMachineTeams", maxMachineTeams)
 		    .detail("TotalHealthyMachine", totalHealthyMachineCount)
+		    .detail("MinTeamNumberOnServer", minMaxTeamNumberOnServer.first)
+		    .detail("MaxTeamNumberOnServer", minMaxTeamNumberOnServer.second)
+		    .detail("MinMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.first)
+		    .detail("MaxMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.second)
+		    .detail("DoBuildTeams", doBuildTeams)
 		    .trackLatest("TeamCollectionInfo");

-		// Debug purpose
-//		if (healthyMachineTeamCount > desiredMachineTeams || machineTeams.size() > maxMachineTeams) {
-//			// When the number of machine teams is over the limit, print out the current team info.
-//			traceAllInfo(true);
-//		}
+		// Advance time so that we will not have multiple TeamCollectionInfo at the same time, otherwise
+		// simulation test will randomly pick one TeamCollectionInfo trace, which could be the one before build teams
+		// wait(delay(0.01));

+		// Debug purpose
+		// if (healthyMachineTeamCount > desiredMachineTeams || machineTeams.size() > maxMachineTeams) {
+		// 	// When the number of machine teams is over the limit, print out the current team info.
+		// 	traceAllInfo(true);
+		// }
 	}

 	// Use the current set of known processes (from server_info) to compute an optimized set of storage server teams.
@ -1856,10 +1939,14 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 					totalTeamCount++;
 				}
 			}
+			// Each server is expected to have SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER,
+			// remainingTeamBudget is the number of teams needed to ensure every server has
+			// SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER teams
+			int remainingTeamBudget = self->getRemainingServerTeamBudget();

 			// teamsToBuild is calculated such that we will not build too many teams in the situation
 			// when all (or most of) teams become unhealthy temporarily and then healthy again
-			state int teamsToBuild = std::min(desiredTeams - teamCount, maxTeams - totalTeamCount);
+			state int teamsToBuild = std::max(0, std::min(desiredTeams - teamCount, maxTeams - totalTeamCount));

 			TraceEvent("BuildTeamsBegin", self->distributorId)
 			    .detail("TeamsToBuild", teamsToBuild)
@ -1876,13 +1963,13 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 			    .detail("MachineCount", self->machine_info.size())
 			    .detail("DesiredTeamsPerServer", SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER);

-			if (teamsToBuild > 0) {
+			if (teamsToBuild > 0 || remainingTeamBudget > 0) {
 				state vector<std::vector<UID>> builtTeams;

 				// addTeamsBestOf() will not add more teams than needed.
 				// If the team number is more than the desired, the extra teams are added in the code path when
 				// a team is added as an initial team
-				int addedTeams = self->addTeamsBestOf(teamsToBuild, desiredTeams, maxTeams);
+				int addedTeams = self->addTeamsBestOf(teamsToBuild, desiredTeams, maxTeams, remainingTeamBudget);

 				if (addedTeams <= 0 && self->teams.size() == 0) {
 					TraceEvent(SevWarn, "NoTeamAfterBuildTeam")
@ -1898,10 +1985,14 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 				int maxMachineTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * totalHealthyMachineCount;
 				int healthyMachineTeamCount = self->getHealthyMachineTeamCount();

+				std::pair<uint64_t, uint64_t> minMaxTeamNumberOnServer = self->calculateMinMaxServerTeamNumOnServer();
+				std::pair<uint64_t, uint64_t> minMaxMachineTeamNumberOnMachine = self->calculateMinMaxMachineTeamNumOnMachine();
+
 				TraceEvent("TeamCollectionInfo", self->distributorId)
 				    .detail("Primary", self->primary)
 				    .detail("AddedTeamNumber", 0)
 				    .detail("AimToBuildTeamNumber", teamsToBuild)
+				    .detail("RemainingTeamBudget", remainingTeamBudget)
 				    .detail("CurrentTeamNumber", self->teams.size())
 				    .detail("DesiredTeamNumber", desiredTeams)
 				    .detail("MaxTeamNumber", maxTeams)
@ -1911,6 +2002,11 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
 				    .detail("DesiredMachineTeams", desiredMachineTeams)
 				    .detail("MaxMachineTeams", maxMachineTeams)
 				    .detail("TotalHealthyMachine", totalHealthyMachineCount)
+				    .detail("MinTeamNumberOnServer", minMaxTeamNumberOnServer.first)
+				    .detail("MaxTeamNumberOnServer", minMaxTeamNumberOnServer.second)
+				    .detail("MinMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.first)
+				    .detail("MaxMachineTeamNumberOnMachine", minMaxMachineTeamNumberOnMachine.second)
+				    .detail("DoBuildTeams", self->doBuildTeams)
 				    .trackLatest("TeamCollectionInfo");
 			}
 		}
@ -1919,7 +2015,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {

 		//Building teams can cause servers to become undesired, which can make teams unhealthy.
 		//Let all of these changes get worked out before responding to the get team request
-		wait( delay(0, TaskDataDistributionLaunch) );
+		wait( delay(0, TaskPriority::DataDistributionLaunch) );

 		return Void();
 	}
@ -2232,7 +2328,7 @@ ACTOR Future<Void> waitUntilHealthy(DDTeamCollection* self) {
 			TraceEvent("WaitUntilHealthyStalled", self->distributorId).detail("Primary", self->primary).detail("ZeroHealthy", self->zeroHealthyTeams->get()).detail("ProcessingUnhealthy", self->processingUnhealthy->get());
 			wait(self->zeroHealthyTeams->onChange() || self->processingUnhealthy->onChange());
 		}
-		wait(delay(SERVER_KNOBS->DD_STALL_CHECK_DELAY, TaskLowPriority)); //After the team trackers wait on the initial failure reaction delay, they yield. We want to make sure every tracker has had the opportunity to send their relocations to the queue.
+		wait(delay(SERVER_KNOBS->DD_STALL_CHECK_DELAY, TaskPriority::Low)); //After the team trackers wait on the initial failure reaction delay, they yield. We want to make sure every tracker has had the opportunity to send their relocations to the queue.
 		if(!self->zeroHealthyTeams->get() && !self->processingUnhealthy->get()) {
 			return Void();
 		}
@ -2308,6 +2404,16 @@ ACTOR Future<Void> teamRemover(DDTeamCollection* self) {
 				team = mt->serverTeams[teamIndex];
 				ASSERT(team->machineTeam->machineIDs == mt->machineIDs); // Sanity check

+				// Check if a server will have 0 team after the team is removed
+				for (auto& s : team->getServers()) {
+					if (s->teams.size() == 0) {
+						TraceEvent(SevError, "TeamRemoverTooAggressive")
+						    .detail("Server", s->id)
+						    .detail("Team", team->getServerIDsStr());
+						self->traceAllInfo(true);
+					}
+				}
+
 				// The team will be marked as a bad team
 				bool foundTeam = self->removeTeam(team);
 				ASSERT(foundTeam == true);
@ -2540,7 +2646,12 @@ ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> tea

 									//If we cannot find the team, it could be a bad team so assume unhealthy priority
 									if(!found) {
-										maxPriority = std::max<int>( maxPriority, PRIORITY_TEAM_UNHEALTHY );
+										// If the input team (in function parameters) is a redundant team, found will be
+										// false We want to differentiate the redundant_team from unhealthy_team in
+										// terms of relocate priority
+										maxPriority =
+										    std::max<int>(maxPriority, redundantTeam ? PRIORITY_TEAM_REDUNDANT
+										                                             : PRIORITY_TEAM_UNHEALTHY);
 									}
 								} else {
 									TEST(true); // A removed server is still associated with a team in SABTF
@ -2638,7 +2749,7 @@ ACTOR Future<Void> trackExcludedServers( DDTeamCollection* self ) {
 				if (nchid != lastChangeID)
 					break;

-				wait( delay( SERVER_KNOBS->SERVER_LIST_DELAY, TaskDataDistribution ) );  // FIXME: make this tr.watch( excludedServersVersionKey ) instead
+				wait( delay( SERVER_KNOBS->SERVER_LIST_DELAY, TaskPriority::DataDistribution ) );  // FIXME: make this tr.watch( excludedServersVersionKey ) instead
 				tr = Transaction(self->cx);
 			} catch (Error& e) {
 				wait( tr.onError(e) );
@ -2734,12 +2845,18 @@ ACTOR Future<Void> waitHealthyZoneChange( DDTeamCollection* self ) {
 			if(val.present()) {
 				auto p = decodeHealthyZoneValue(val.get());
 				if(p.second > tr.getReadVersion().get()) {
-					healthyZoneTimeout = delay((p.second - tr.getReadVersion().get())/(double)SERVER_KNOBS->VERSIONS_PER_SECOND);
-					self->healthyZone.set(p.first);
-				} else {
+					double timeoutSeconds = (p.second - tr.getReadVersion().get())/(double)SERVER_KNOBS->VERSIONS_PER_SECOND;
+					healthyZoneTimeout = delay(timeoutSeconds);
+					if(self->healthyZone.get() != p.first) {
+						TraceEvent("MaintenanceZoneStart", self->distributorId).detail("ZoneID", printable(p.first)).detail("EndVersion", p.second).detail("Duration", timeoutSeconds);
+						self->healthyZone.set(p.first);
+					}
+				} else if(self->healthyZone.get().present()) {
+					TraceEvent("MaintenanceZoneEnd", self->distributorId);
 					self->healthyZone.set(Optional<Key>());
 				}
-			} else {
+			} else if(self->healthyZone.get().present()) {
+				TraceEvent("MaintenanceZoneEnd", self->distributorId);
 				self->healthyZone.set(Optional<Key>());
 			}
 			
@ -2757,14 +2874,14 @@ ACTOR Future<Void> serverMetricsPolling( TCServerInfo *server) {
 	state double lastUpdate = now();
 	loop {
 		wait( updateServerMetrics( server ) );
-		wait( delayUntil( lastUpdate + SERVER_KNOBS->STORAGE_METRICS_POLLING_DELAY + SERVER_KNOBS->STORAGE_METRICS_RANDOM_DELAY * deterministicRandom()->random01(), TaskDataDistributionLaunch ) );
+		wait( delayUntil( lastUpdate + SERVER_KNOBS->STORAGE_METRICS_POLLING_DELAY + SERVER_KNOBS->STORAGE_METRICS_RANDOM_DELAY * deterministicRandom()->random01(), TaskPriority::DataDistributionLaunch ) );
 		lastUpdate = now();
 	}
 }

 //Returns the KeyValueStoreType of server if it is different from self->storeType
 ACTOR Future<KeyValueStoreType> keyValueStoreTypeTracker(DDTeamCollection* self, TCServerInfo *server) {
-	state KeyValueStoreType type = wait(brokenPromiseToNever(server->lastKnownInterface.getKeyValueStoreType.getReplyWithTaskID<KeyValueStoreType>(TaskDataDistribution)));
+	state KeyValueStoreType type = wait(brokenPromiseToNever(server->lastKnownInterface.getKeyValueStoreType.getReplyWithTaskID<KeyValueStoreType>(TaskPriority::DataDistribution)));
 	if(type == self->configuration.storageServerStoreType && (self->includedDCs.empty() || std::find(self->includedDCs.begin(), self->includedDCs.end(), server->lastKnownInterface.locality.dcId()) != self->includedDCs.end()) )
 		wait(Future<Void>(Never()));

@ -2787,7 +2904,7 @@ ACTOR Future<Void> waitForAllDataRemoved( Database cx, UID serverID, Version add
 			}

 			// Wait for any change to the serverKeys for this server
-			wait( delay(SERVER_KNOBS->ALL_DATA_REMOVED_DELAY, TaskDataDistribution) );
+			wait( delay(SERVER_KNOBS->ALL_DATA_REMOVED_DELAY, TaskPriority::DataDistribution) );
 			tr.reset();
 		} catch (Error& e) {
 			wait( tr.onError(e) );
@ -2830,7 +2947,7 @@ ACTOR Future<Void> storageServerFailureTracker(
 			ASSERT(!inHealthyZone);
 			healthChanged = IFailureMonitor::failureMonitor().onStateEqual( interf.waitFailure.getEndpoint(), FailureStatus(false));
 		} else if(!inHealthyZone) {
-			healthChanged = waitFailureClientStrict(interf.waitFailure, SERVER_KNOBS->DATA_DISTRIBUTION_FAILURE_REACTION_TIME, TaskDataDistribution);
+			healthChanged = waitFailureClientStrict(interf.waitFailure, SERVER_KNOBS->DATA_DISTRIBUTION_FAILURE_REACTION_TIME, TaskPriority::DataDistribution);
 		}
 		choose {
 			when ( wait(healthChanged) ) {
@ -2840,6 +2957,7 @@ ACTOR Future<Void> storageServerFailureTracker(
 				}
 				if(status->isFailed && self->healthyZone.get().present() && self->clearHealthyZoneFuture.isReady()) {
 					self->clearHealthyZoneFuture = clearHealthyZone(self->cx);
+					TraceEvent("MaintenanceZoneCleared", self->distributorId);
 					self->healthyZone.set(Optional<Key>());
 				}

@ -2953,11 +3071,14 @@ ACTOR Future<Void> storageServerTracker(
 			if(hasWrongStoreTypeOrDC)
 				self->restartRecruiting.trigger();

-			if ( lastIsUnhealthy && !status.isUnhealthy() && !server->teams.size() ) {
+			if (lastIsUnhealthy && !status.isUnhealthy() &&
+			    server->teams.size() < SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER) {
 				self->doBuildTeams = true;
+				self->restartTeamBuilder.trigger(); // This does not trigger building teams if there exist healthy teams
 			}
 			lastIsUnhealthy = status.isUnhealthy();

+			state bool recordTeamCollectionInfo = false;
 			choose {
 				when( wait( failureTracker ) ) {
 					// The server is failed AND all data has been removed from it, so permanently remove it.
@ -3061,7 +3182,8 @@ ACTOR Future<Void> storageServerTracker(
 							self->badTeamRemover = removeBadTeams(self);
 							self->addActor.send(self->badTeamRemover);
 							// The team number changes, so we need to update the team number info
-							self->traceTeamCollectionInfo();
+							// self->traceTeamCollectionInfo();
+							recordTeamCollectionInfo = true;
 						}
 					}

@ -3069,10 +3191,13 @@ ACTOR Future<Void> storageServerTracker(
 					// We rely on the old failureTracker being actorCancelled since the old actor now has a pointer to an invalid location
 					status = ServerStatus( status.isFailed, status.isUndesired, server->lastKnownInterface.locality );

+					// self->traceTeamCollectionInfo();
+					recordTeamCollectionInfo = true;
 					//Restart the storeTracker for the new interface
 					storeTracker = keyValueStoreTypeTracker(self, server);
 					hasWrongStoreTypeOrDC = false;
 					self->restartTeamBuilder.trigger();
+
 					if(restartRecruiting)
 						self->restartRecruiting.trigger();
 				}
@ -3093,6 +3218,10 @@ ACTOR Future<Void> storageServerTracker(
 					server->wakeUpTracker = Promise<Void>();
 				}
 			}
+
+			if (recordTeamCollectionInfo) {
+				self->traceTeamCollectionInfo();
+			}
 		}
 	} catch( Error &e ) {
 		if (e.code() != error_code_actor_cancelled && errorOut.canBeSet())
@ -3120,7 +3249,7 @@ ACTOR Future<Void> monitorStorageServerRecruitment(DDTeamCollection* self) {
 			loop {
 				choose {
 					when( wait( self->recruitingStream.onChange() ) ) {}
-					when( wait( self->recruitingStream.get() == 0 ? delay(SERVER_KNOBS->RECRUITMENT_IDLE_DELAY, TaskDataDistribution) : Future<Void>(Never()) ) ) { break; }
+					when( wait( self->recruitingStream.get() == 0 ? delay(SERVER_KNOBS->RECRUITMENT_IDLE_DELAY, TaskPriority::DataDistribution) : Future<Void>(Never()) ) ) { break; }
 				}
 			}
 			TraceEvent("StorageServerRecruitment", self->distributorId)
@ -3147,12 +3276,12 @@ ACTOR Future<Void> initializeStorage( DDTeamCollection* self, RecruitStorageRepl

 	self->recruitingIds.insert(interfaceId);
 	self->recruitingLocalities.insert(candidateWorker.worker.address());
-	state ErrorOr<InitializeStorageReply> newServer = wait( candidateWorker.worker.storage.tryGetReply( isr, TaskDataDistribution ) );
+	state ErrorOr<InitializeStorageReply> newServer = wait( candidateWorker.worker.storage.tryGetReply( isr, TaskPriority::DataDistribution ) );
 	if(newServer.isError()) {
 		TraceEvent(SevWarn, "DDRecruitmentError").error(newServer.getError());
 		if( !newServer.isError( error_code_recruitment_failed ) && !newServer.isError( error_code_request_maybe_delivered ) )
 			throw newServer.getError();
-		wait( delay(SERVER_KNOBS->STORAGE_RECRUITMENT_DELAY, TaskDataDistribution) );
+		wait( delay(SERVER_KNOBS->STORAGE_RECRUITMENT_DELAY, TaskPriority::DataDistribution) );
 	}
 	self->recruitingIds.erase(interfaceId);
 	self->recruitingLocalities.erase(candidateWorker.worker.address());
@ -3217,7 +3346,7 @@ ACTOR Future<Void> storageRecruiter( DDTeamCollection* self, Reference<AsyncVar<

 			if(!fCandidateWorker.isValid() || fCandidateWorker.isReady() || rsr.excludeAddresses != lastRequest.excludeAddresses || rsr.criticalRecruitment != lastRequest.criticalRecruitment) {
 				lastRequest = rsr;
-				fCandidateWorker = brokenPromiseToNever( db->get().clusterInterface.recruitStorage.getReply( rsr, TaskDataDistribution ) );
+				fCandidateWorker = brokenPromiseToNever( db->get().clusterInterface.recruitStorage.getReply( rsr, TaskPriority::DataDistribution ) );
 			}

 			choose {
@ -3388,7 +3517,7 @@ ACTOR Future<Void> dataDistributionTeamCollection(
 ACTOR Future<Void> waitForDataDistributionEnabled( Database cx ) {
 	state Transaction tr(cx);
 	loop {
-		wait(delay(SERVER_KNOBS->DD_ENABLED_CHECK_DELAY, TaskDataDistribution));
+		wait(delay(SERVER_KNOBS->DD_ENABLED_CHECK_DELAY, TaskPriority::DataDistribution));

 		try {
 			Optional<Value> mode = wait( tr.get( dataDistributionModeKey ) );
@ -3516,7 +3645,7 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self)
 	state double lastLimited = 0;
 	self->addActor.send( monitorBatchLimitedTime(self->dbInfo, &lastLimited) );

-	state Database cx = openDBOnServer(self->dbInfo, TaskDataDistributionLaunch, true, true);
+	state Database cx = openDBOnServer(self->dbInfo, TaskPriority::DataDistributionLaunch, true, true);
 	cx->locationCacheSize = SERVER_KNOBS->DD_LOCATION_CACHE_SIZE;

 	//cx->setOption( FDBDatabaseOptions::LOCATION_CACHE_SIZE, StringRef((uint8_t*) &SERVER_KNOBS->DD_LOCATION_CACHE_SIZE, 8) );
@ -3646,7 +3775,7 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self)
 					}
 					output.send( RelocateShard( keys, unhealthy ? PRIORITY_TEAM_UNHEALTHY : PRIORITY_RECOVER_MOVE ) );
 				}
-				wait( yield(TaskDataDistribution) );
+				wait( yield(TaskPriority::DataDistribution) );
 			}

 			vector<TeamCollectionInterface> tcis;
@ -3718,7 +3847,7 @@ ACTOR Future<Void> dataDistributor(DataDistributorInterface di, Reference<AsyncV
 	state Future<Void> collection = actorCollection( self->addActor.getFuture() );

 	try {
-		TraceEvent("DataDistributor_Running", di.id());
+		TraceEvent("DataDistributorRunning", di.id());
 		self->addActor.send( waitFailureServer(di.waitFailure.getFuture()) );
 		state Future<Void> distributor = reportErrorsExcept( dataDistribution(self), "DataDistribution", di.id(), &normalDataDistributorErrors() );

@ -3736,10 +3865,10 @@ ACTOR Future<Void> dataDistributor(DataDistributorInterface di, Reference<AsyncV
 	}
 	catch ( Error &err ) {
 		if ( normalDataDistributorErrors().count(err.code()) == 0 ) {
-			TraceEvent("DataDistributor_Error", di.id()).error(err, true);
+			TraceEvent("DataDistributorError", di.id()).error(err, true);
 			throw err;
 		}
-		TraceEvent("DataDistributor_Died", di.id()).error(err, true);
+		TraceEvent("DataDistributorDied", di.id()).error(err, true);
 	}

 	return Void();
@ -3842,7 +3971,7 @@ TEST_CASE("DataDistribution/AddTeamsBestOf/UseMachineID") {
 	Reference<IReplicationPolicy> policy = Reference<IReplicationPolicy>(new PolicyAcross(teamSize, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())));
 	state DDTeamCollection* collection = testMachineTeamCollection(teamSize, policy, processSize);

-	collection->addTeamsBestOf(30, desiredTeams, maxTeams);
+	collection->addTeamsBestOf(30, desiredTeams, maxTeams, 30);

 	ASSERT(collection->sanityCheckTeams() == true);

@ -3867,8 +3996,8 @@ TEST_CASE("DataDistribution/AddTeamsBestOf/NotUseMachineID") {
 		return Void();
 	}

-	collection->addBestMachineTeams(30); // Create machine teams to help debug
-	collection->addTeamsBestOf(30, desiredTeams, maxTeams);
+	collection->addBestMachineTeams(30, 30); // Create machine teams to help debug
+	collection->addTeamsBestOf(30, desiredTeams, maxTeams, 30);
 	collection->sanityCheckTeams(); // Server team may happen to be on the same machine team, although unlikely

 	if (collection) delete (collection);
@ -3883,7 +4012,7 @@ TEST_CASE("DataDistribution/AddAllTeams/isExhaustive") {
 	state int maxTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * processSize;
 	state DDTeamCollection* collection = testTeamCollection(3, policy, processSize);

-	int result = collection->addTeamsBestOf(200, desiredTeams, maxTeams);
+	int result = collection->addTeamsBestOf(200, desiredTeams, maxTeams, 200);

 	delete(collection);

@ -3903,11 +4032,11 @@ TEST_CASE("/DataDistribution/AddAllTeams/withLimit") {

 	state DDTeamCollection* collection = testTeamCollection(3, policy, processSize);

-	int result = collection->addTeamsBestOf(10, desiredTeams, maxTeams);
+	int result = collection->addTeamsBestOf(10, desiredTeams, maxTeams, 10);

 	delete(collection);

-	ASSERT(result == 10);
+	ASSERT(result >= 10);

 	return Void();
 }
@ -3923,9 +4052,9 @@ TEST_CASE("/DataDistribution/AddTeamsBestOf/SkippingBusyServers") {
 	collection->addTeam(std::set<UID>({ UID(1, 0), UID(2, 0), UID(3, 0) }), true);
 	collection->addTeam(std::set<UID>({ UID(1, 0), UID(3, 0), UID(4, 0) }), true);

-	int result = collection->addTeamsBestOf(8, desiredTeams, maxTeams);
+	int result = collection->addTeamsBestOf(8, desiredTeams, maxTeams, 8);

-	ASSERT(result == 8);
+	ASSERT(result >= 8);

 	for(auto process = collection->server_info.begin(); process != collection->server_info.end(); process++) {
 		auto teamCount = process->second->teams.size();
@ -3953,8 +4082,8 @@ TEST_CASE("/DataDistribution/AddTeamsBestOf/NotEnoughServers") {
 	collection->addTeam(std::set<UID>({ UID(1, 0), UID(2, 0), UID(3, 0) }), true);
 	collection->addTeam(std::set<UID>({ UID(1, 0), UID(3, 0), UID(4, 0) }), true);

-	collection->addBestMachineTeams(10);
-	int result = collection->addTeamsBestOf(10, desiredTeams, maxTeams);
+	collection->addBestMachineTeams(10, 10);
+	int result = collection->addTeamsBestOf(10, desiredTeams, maxTeams, 10);

 	if (collection->machineTeams.size() != 10 || result != 8) {
 		collection->traceAllInfo(true); // Debug message
--- a/fdbserver/DataDistributionQueue.actor.cpp
+++ b/fdbserver/DataDistributionQueue.actor.cpp
@ -512,9 +512,9 @@ struct DDQueueData {

 		// FIXME: is the merge case needed
 		if( input.priority == PRIORITY_MERGE_SHARD ) {
-			wait( delay( 0.5, TaskDataDistribution - 2 ) );
+			wait( delay( 0.5, decrementPriority(decrementPriority(TaskPriority::DataDistribution )) ) );
 		} else {
-			wait( delay( 0.0001, TaskDataDistributionLaunch ) );
+			wait( delay( 0.0001, TaskPriority::DataDistributionLaunch ) );
 		}

 		loop {
@ -933,7 +933,7 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
 				    .detail("Count", stuckCount)
 				    .detail("TeamCollectionId", tciIndex)
 				    .detail("NumOfTeamCollections", self->teamCollections.size());
-				wait( delay( SERVER_KNOBS->BEST_TEAM_STUCK_DELAY, TaskDataDistributionLaunch ) );
+				wait( delay( SERVER_KNOBS->BEST_TEAM_STUCK_DELAY, TaskPriority::DataDistributionLaunch ) );
 			}

 			state std::vector<UID> destIds;
@ -993,7 +993,7 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
 			state Error error = success();
 			state Promise<Void> dataMovementComplete;
 			state Future<Void> doMoveKeys = moveKeys(self->cx, rd.keys, destIds, healthyIds, self->lock, dataMovementComplete, &self->startMoveKeysParallelismLock, &self->finishMoveKeysParallelismLock, self->teamCollections.size() > 1, relocateShardInterval.pairID );
-			state Future<Void> pollHealth = signalledTransferComplete ? Never() : delay( SERVER_KNOBS->HEALTH_POLL_TIME, TaskDataDistributionLaunch );
+			state Future<Void> pollHealth = signalledTransferComplete ? Never() : delay( SERVER_KNOBS->HEALTH_POLL_TIME, TaskPriority::DataDistributionLaunch );
 			try {
 				loop {
 					choose {
@ -1016,7 +1016,7 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
 									self->dataTransferComplete.send(rd);
 								}
 							}
-							pollHealth = signalledTransferComplete ? Never() : delay( SERVER_KNOBS->HEALTH_POLL_TIME, TaskDataDistributionLaunch );
+							pollHealth = signalledTransferComplete ? Never() : delay( SERVER_KNOBS->HEALTH_POLL_TIME, TaskPriority::DataDistributionLaunch );
 						}
 						when( wait( signalledTransferComplete ? Never() : dataMovementComplete.getFuture() ) ) {
 							self->fetchKeysComplete.insert( rd );
@ -1066,7 +1066,7 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
 			} else {
 				TEST(true);  // move to removed server
 				healthyDestinations.addDataInFlightToTeam( -metrics.bytes );
-				wait( delay( SERVER_KNOBS->RETRY_RELOCATESHARD_DELAY, TaskDataDistributionLaunch ) );
+				wait( delay( SERVER_KNOBS->RETRY_RELOCATESHARD_DELAY, TaskPriority::DataDistributionLaunch ) );
 			}
 		}
 	} catch (Error& e) {
@ -1125,7 +1125,7 @@ ACTOR Future<Void> BgDDMountainChopper( DDQueueData* self, int teamCollectionInd
 	state double checkDelay = SERVER_KNOBS->BG_DD_POLLING_INTERVAL;
 	state int resetCount = SERVER_KNOBS->DD_REBALANCE_RESET_AMOUNT;
 	loop {
-		wait( delay(checkDelay, TaskDataDistributionLaunch) );
+		wait( delay(checkDelay, TaskPriority::DataDistributionLaunch) );
 		if (self->priority_relocations[PRIORITY_REBALANCE_OVERUTILIZED_TEAM] < SERVER_KNOBS->DD_REBALANCE_PARALLELISM) {
 			state Optional<Reference<IDataDistributionTeam>> randomTeam = wait( brokenPromiseToNever( self->teamCollections[teamCollectionIndex].getTeam.getReply( GetTeamRequest( true, false, true ) ) ) );
 			if( randomTeam.present() ) {
@ -1160,7 +1160,7 @@ ACTOR Future<Void> BgDDValleyFiller( DDQueueData* self, int teamCollectionIndex)
 	state double checkDelay = SERVER_KNOBS->BG_DD_POLLING_INTERVAL;
 	state int resetCount = SERVER_KNOBS->DD_REBALANCE_RESET_AMOUNT;
 	loop {
-		wait( delay(checkDelay, TaskDataDistributionLaunch) );
+		wait( delay(checkDelay, TaskPriority::DataDistributionLaunch) );
 		if (self->priority_relocations[PRIORITY_REBALANCE_UNDERUTILIZED_TEAM] < SERVER_KNOBS->DD_REBALANCE_PARALLELISM) {
 			state Optional<Reference<IDataDistributionTeam>> randomTeam = wait( brokenPromiseToNever( self->teamCollections[teamCollectionIndex].getTeam.getReply( GetTeamRequest( true, false, false ) ) ) );
 			if( randomTeam.present() ) {
@ -1244,7 +1244,7 @@ ACTOR Future<Void> dataDistributionQueue(
 					bool wasEmpty = serversToLaunchFrom.empty();
 					self.queueRelocation( rs, serversToLaunchFrom );
 					if(wasEmpty && !serversToLaunchFrom.empty())
-						launchQueuedWorkTimeout = delay(0, TaskDataDistributionLaunch);
+						launchQueuedWorkTimeout = delay(0, TaskPriority::DataDistributionLaunch);
 				}
 				when ( wait(launchQueuedWorkTimeout) ) {
 					self.launchQueuedWork( serversToLaunchFrom );
@ -1258,7 +1258,7 @@ ACTOR Future<Void> dataDistributionQueue(
 				when ( RelocateData done = waitNext( self.dataTransferComplete.getFuture() ) ) {
 					complete( done, self.busymap );
 					if(serversToLaunchFrom.empty() && !done.src.empty())
-						launchQueuedWorkTimeout = delay(0, TaskDataDistributionLaunch);
+						launchQueuedWorkTimeout = delay(0, TaskPriority::DataDistributionLaunch);
 					serversToLaunchFrom.insert(done.src.begin(), done.src.end());
 				}
 				when ( RelocateData done = waitNext( self.relocationComplete.getFuture() ) ) {
@ -1266,7 +1266,7 @@ ACTOR Future<Void> dataDistributionQueue(
 					self.finishRelocation(done.priority);
 					self.fetchKeysComplete.erase( done );
 					//self.logRelocation( done, "ShardRelocatorDone" );
-					actors.add( tag( delay(0, TaskDataDistributionLaunch), done.keys, rangesComplete ) );
+					actors.add( tag( delay(0, TaskPriority::DataDistributionLaunch), done.keys, rangesComplete ) );
 					if( g_network->isSimulated() && debug_isCheckRelocationDuration() && now() - done.startTime > 60 ) {
 						TraceEvent(SevWarnAlways, "RelocationDurationTooLong").detail("Duration", now() - done.startTime);
 						debug_setCheckRelocationDuration(false);
--- a/fdbserver/DataDistributionTracker.actor.cpp
+++ b/fdbserver/DataDistributionTracker.actor.cpp
@ -140,7 +140,7 @@ ACTOR Future<Void> trackShardBytes(
 		Reference<AsyncVar<Optional<StorageMetrics>>> shardSize,
 		bool addToSizeEstimate = true)
 {
-	wait( delay( 0, TaskDataDistribution ) );
+	wait( delay( 0, TaskPriority::DataDistribution ) );

 	/*TraceEvent("TrackShardBytesStarting")
 		.detail("TrackerID", trackerID)
@ -260,7 +260,7 @@ ACTOR Future<Void> changeSizes( DataDistributionTracker* self, KeyRangeRef keys,
 	}

 	wait( waitForAll( sizes ) );
-	wait( yield(TaskDataDistribution) );
+	wait( yield(TaskPriority::DataDistribution) );

 	int64_t newShardsStartingSize = 0;
 	for ( int i = 0; i < sizes.size(); i++ )
@ -281,7 +281,7 @@ struct HasBeenTrueFor : NonCopyable {
 	Future<Void> set() {
 		if( !trigger.isValid() ) {
 			cleared = Promise<Void>();
-			trigger = delayJittered( SERVER_KNOBS->DD_MERGE_COALESCE_DELAY, TaskDataDistribution - 1 ) || cleared.getFuture();
+			trigger = delayJittered( SERVER_KNOBS->DD_MERGE_COALESCE_DELAY, decrementPriority(TaskPriority::DataDistribution) ) || cleared.getFuture();
 		}
 		return trigger;
 	}
@ -361,7 +361,7 @@ ACTOR Future<Void> shardSplitter(

 		self->sizeChanges.add( changeSizes( self, keys, shardSize->get().get().bytes ) );
 	} else {
-		wait( delay(1.0, TaskDataDistribution) ); //In case the reason the split point was off was due to a discrepancy between storage servers
+		wait( delay(1.0, TaskPriority::DataDistribution) ); //In case the reason the split point was off was due to a discrepancy between storage servers
 	}
 	return Void();
 }
@ -529,7 +529,7 @@ ACTOR Future<Void> shardTracker(
 		wait( yieldedFuture(self->maxShardSize->onChange()) );

 	// Since maxShardSize will become present for all shards at once, avoid slow tasks with a short delay
-	wait( delay( 0, TaskDataDistribution ) );
+	wait( delay( 0, TaskPriority::DataDistribution ) );

 	/*TraceEvent("ShardTracker", self->distributorId)
 		.detail("Begin", keys.begin)
@ -546,7 +546,7 @@ ACTOR Future<Void> shardTracker(

 			// We could have a lot of actors being released from the previous wait at the same time. Immediately calling
 			// delay(0) mitigates the resulting SlowTask
-			wait( delay(0, TaskDataDistribution) );
+			wait( delay(0, TaskPriority::DataDistribution) );
 		}
 	} catch (Error& e) {
 		if (e.code() != error_code_actor_cancelled)
@ -593,12 +593,12 @@ ACTOR Future<Void> trackInitialShards(DataDistributionTracker *self, Reference<I

 	//This line reduces the priority of shard initialization to prevent interference with failure monitoring.
 	//SOMEDAY: Figure out what this priority should actually be
-	wait( delay( 0.0, TaskDataDistribution ) );
+	wait( delay( 0.0, TaskPriority::DataDistribution ) );

 	state int s;
 	for(s=0; s<initData->shards.size()-1; s++) {
 		restartShardTrackers( self, KeyRangeRef( initData->shards[s].key, initData->shards[s+1].key ) );
-		wait( yield( TaskDataDistribution ) );
+		wait( yield( TaskPriority::DataDistribution ) );
 	}

 	Future<Void> initialSize = changeSizes( self, KeyRangeRef(allKeys.begin, allKeys.end), 0 );
--- a/fdbserver/DeltaTree.h
+++ b/fdbserver/DeltaTree.h
@ -0,0 +1,412 @@
+/*
+ * MutablePrefixTree.h
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "flow/flow.h"
+#include "flow/Arena.h"
+#include "fdbclient/FDBTypes.h"
+#include "fdbserver/Knobs.h"
+#include "fdbserver/PrefixTree.h"
+#include <string.h>
+
+// Delta Tree is a memory mappable binary tree of T objects such that each node's item is
+// stored as a Delta which can reproduce the node's T item given the node's greatest
+// lesser ancestor and the node's least greater ancestor.
+//
+// The Delta type is intended to make use of ordered prefix compression and borrow all
+// available prefix bytes from the ancestor T which shares the most prefix bytes with
+// the item T being encoded.
+//
+// T requirements
+//
+//    Must be compatible with Standalone<T> and must implement the following additional methods:
+//
+//    // Writes to d a delta which can create *this from base
+//    // commonPrefix can be passed in if known
+//    void writeDelta(dT &d, const T &base, int commonPrefix = -1) const;
+//
+//    // Compare *this to t, returns < 0 for less than, 0 for equal, > 0 for greater than
+//    int compare(const T &rhs) const;
+//
+//    // Get the common prefix bytes between *this and base
+//    // skip is a hint of how many prefix bytes are already known to be the same
+//    int getCommonPrefixLen(const T &base, int skip) const;
+//
+//    // Returns the size of the delta object needed to make *this from base
+//    // TODO: Explain contract required for deltaSize to be used to predict final 
+//    // balanced tree size incrementally while adding sorted items to a build set
+//    int deltaSize(const T &base) const;
+//
+// DeltaT requirements
+//
+//    // Returns the size of this dT instance
+//    int size();
+//
+//    // Returns the T created by applying the delta to prev or next
+//    T apply(const T &base, Arena &localStorage) const;
+//
+//    // Stores a boolean which DeltaTree will later use to determine the base node for a node's delta
+//    void setPrefixSource(bool val);
+//
+//    // Retrieves the previously stored boolean
+//    bool getPrefixSource() const;
+//
+#pragma pack(push,1)
+template <typename T, typename DeltaT = typename T::Delta, typename OffsetT = uint16_t>
+struct DeltaTree {
+
+	static int MaximumTreeSize() {
+		return std::numeric_limits<OffsetT>::max();
+	};
+
+	struct Node {
+		OffsetT leftChildOffset;
+		OffsetT rightChildOffset;
+
+		inline DeltaT & delta() {
+			return *(DeltaT *)(this + 1);
+		};
+
+		inline const DeltaT & delta() const {
+			return *(const DeltaT *)(this + 1);
+		};
+
+		Node * rightChild() const {
+			//printf("Node(%p): leftOffset=%d  rightOffset=%d  deltaSize=%d\n", this, (int)leftChildOffset, (int)rightChildOffset, (int)delta().size());
+			return rightChildOffset == 0 ? nullptr : (Node *)((uint8_t *)&delta() + rightChildOffset);
+		}
+
+		Node * leftChild() const {
+			//printf("Node(%p): leftOffset=%d  rightOffset=%d  deltaSize=%d\n", this, (int)leftChildOffset, (int)rightChildOffset, (int)delta().size());
+			return leftChildOffset == 0 ? nullptr : (Node *)((uint8_t *)&delta() + leftChildOffset);
+		}
+
+		int size() const {
+			return sizeof(Node) + delta().size();
+		}
+	};
+
+	struct {
+		OffsetT nodeBytes;     // Total size of all Nodes including the root
+		uint8_t initialDepth;  // Levels in the tree as of the last rebuild
+	};
+#pragma pack(pop)
+
+	inline Node & root() {
+		return *(Node *)(this + 1);
+	}
+
+	inline const Node & root() const {
+		return *(const Node *)(this + 1);
+	}
+
+	int size() const {
+		return sizeof(DeltaTree) + nodeBytes; 
+	}
+
+public:
+	// Get count of total overhead bytes (everything but the user-formatted Delta) for a tree given size n
+	static inline int GetTreeOverhead(int n = 0) {
+		return sizeof(DeltaTree) + (n * sizeof(Node));
+	}
+
+	struct DecodedNode {
+		DecodedNode(Node *raw, const T *prev, const T *next, Arena &arena)
+		  : raw(raw), parent(nullptr), left(nullptr), right(nullptr), prev(prev), next(next),
+		    item(raw->delta().apply(raw->delta().getPrefixSource() ? *prev : *next, arena))
+		{
+			//printf("DecodedNode1 raw=%p delta=%s\n", raw, raw->delta().toString().c_str());
+		}
+		  
+		DecodedNode(Node *raw, DecodedNode *parent, bool left, Arena &arena)
+		  : parent(parent), raw(raw), left(nullptr), right(nullptr),
+		    prev(left ? parent->prev : &parent->item),
+		    next(left ? &parent->item : parent->next),
+		    item(raw->delta().apply(raw->delta().getPrefixSource() ? *prev : *next, arena))
+		{
+			//printf("DecodedNode2 raw=%p delta=%s\n", raw, raw->delta().toString().c_str());
+		}
+
+		Node *raw;
+		DecodedNode *parent;
+		DecodedNode *left;
+		DecodedNode *right;
+		const T *prev;  // greatest ancestor to the left
+		const T *next;  // least ancestor to the right
+		T item;
+
+		DecodedNode *getRight(Arena &arena) {
+			if(right == nullptr) {
+				Node *n = raw->rightChild();
+				if(n != nullptr) {
+					right = new (arena) DecodedNode(n, this, false, arena);
+				}
+			}
+			return right;
+		}
+
+		DecodedNode *getLeft(Arena &arena) {
+			if(left == nullptr) {
+				Node *n = raw->leftChild();
+				if(n != nullptr) {
+					left = new (arena) DecodedNode(n, this, true, arena);
+				}
+			}
+			return left;
+		}
+	};
+
+	struct Cursor;
+
+	// A Reader is used to read a Tree by getting cursors into it.
+	// Any node decoded by any cursor is placed in cache for use
+	// by other cursors.
+	struct Reader : FastAllocated<Reader> {
+		Reader(const void *treePtr = nullptr, const T *lowerBound = nullptr, const T *upperBound = nullptr)
+			: tree((DeltaTree *)treePtr), lower(lowerBound), upper(upperBound)  {
+
+			// TODO: Remove these copies into arena and require users of Reader to keep prev and next alive during its lifetime
+			lower = new(arena) T(arena, *lower);
+			upper = new(arena) T(arena, *upper);
+
+			root = (tree->nodeBytes == 0) ? nullptr : new (arena) DecodedNode(&tree->root(), lower, upper, arena);
+		}
+
+		const T *lowerBound() const {
+			return lower;
+		}
+
+		const T *upperBound() const {
+			return upper;
+		}
+
+		Arena arena;
+		DeltaTree *tree;
+		DecodedNode *root;
+		const T *lower;
+		const T *upper;
+
+		Cursor getCursor() {
+			return Cursor(this);
+		}
+	};
+
+	// Cursor provides a way to seek into a PrefixTree and iterate over its contents
+	// All Cursors from a Reader share the same decoded node 'cache' (tree of DecodedNodes)
+	struct Cursor {
+		Cursor() : reader(nullptr), node(nullptr) {
+		}
+
+		Cursor(Reader *r) : reader(r), node(reader->root) {
+		}
+
+		Reader *reader;
+		DecodedNode *node;
+
+		bool valid() const {
+			return node != nullptr;
+		}
+
+		const T & get() const {
+			return node->item;
+		}
+
+		const T & getOrUpperBound() const {
+			return valid() ? node->item : *reader->upperBound();
+		}
+
+		// Moves the cursor to the node with the greatest key less than or equal to s.  If successful,
+		// returns true, otherwise returns false and the cursor will be at the node with the next key
+		// greater than s.
+		bool seekLessThanOrEqual(const T &s) {
+			node = nullptr;
+			DecodedNode *n = reader->root;
+
+			while(n != nullptr) {
+				int cmp = s.compare(n->item);
+
+				if(cmp == 0) {
+					node = n;
+					return true;
+				}
+
+				if(cmp < 0) {
+					n = n->getLeft(reader->arena);
+				}
+				else {
+					// n < s so store it in node as a potential result
+					node = n;
+					n = n->getRight(reader->arena);
+				}
+			}
+
+			return node != nullptr;
+		}
+
+		bool moveFirst() {
+			DecodedNode *n = reader->root;
+			node = n;
+			while(n != nullptr) {
+				n = n->getLeft(reader->arena);
+				if(n != nullptr)
+					node = n;
+			}
+			return node != nullptr;
+		}
+
+		bool moveLast() {
+			DecodedNode *n = reader->root;
+			node = n;
+			while(n != nullptr) {
+				n = n->getRight(reader->arena);
+				if(n != nullptr)
+					node = n;
+			}
+			return node != nullptr;
+		}
+
+		bool moveNext() {
+			// Try to go right
+			DecodedNode *n = node->getRight(reader->arena);
+			if(n != nullptr) {
+				// Go left as far as possible
+				while(n != nullptr) {
+					node = n;
+					n = n->getLeft(reader->arena);
+				}
+				return true;
+			}
+
+			// Follow parent links until a greater parent is found
+			while(node->parent != nullptr) {
+				bool greaterParent = node->parent->left == node;
+				node = node->parent;
+				if(greaterParent) {
+					return true;
+				}
+			}
+
+			node = nullptr;
+			return false;
+		}
+
+		bool movePrev() {
+			// Try to go left
+			DecodedNode *n = node->getLeft(reader->arena);
+			if(n != nullptr) {
+				// Go right as far as possible
+				while(n != nullptr) {
+					node = n;
+					n = n->getRight(reader->arena);
+				}
+				return true;
+			}
+
+			// Follow parent links until a lesser parent is found
+			while(node->parent != nullptr) {
+				bool lesserParent = node->parent->right == node;
+				node = node->parent;
+				if(lesserParent) {
+					return true;
+				}
+			}
+
+			node = nullptr;
+			return false;
+		}
+	};
+
+	// Returns number of bytes written
+	int build(const T *begin, const T *end, const T *prev, const T *next) {
+		//printf("tree size: %d   node size: %d\n", sizeof(DeltaTree), sizeof(Node));
+		int count = end - begin;
+		initialDepth = (uint8_t)log2(count) + 1;
+
+		// The boundary leading to the new page acts as the last time we branched right
+		if(begin != end) {
+			nodeBytes = build(root(), begin, end, prev, next);
+		}
+		else {
+			nodeBytes = 0;
+		}
+		return size();
+	}
+
+private:
+	static OffsetT build(Node &root, const T *begin, const T *end, const T *prev, const T *next) {
+		//printf("build: %s to %s\n", begin->toString().c_str(), (end - 1)->toString().c_str());
+		//printf("build: root at %p  sizeof(Node) %d  delta at %p  \n", &root, sizeof(Node), &root.delta());
+		ASSERT(end != begin);
+		int count = end - begin;
+
+		// Find key to be stored in root
+		int mid = perfectSubtreeSplitPointCached(count);
+		const T &item = begin[mid];
+
+		// Get the common prefix length between next and prev
+		// Since mid is between them, we can skip that length to determine the common prefix length
+		// between mid and prev and between mid and next.
+		int nextPrevCommon = prev->getCommonPrefixLen(*next, 0);
+		int commonWithPrev = item.getCommonPrefixLen(*prev, nextPrevCommon);
+		int commonWithNext = item.getCommonPrefixLen(*next, nextPrevCommon);
+
+		bool prefixSourcePrev;
+		int commonPrefix;
+		const T *base;
+		if(commonWithPrev >= commonWithNext) {
+			prefixSourcePrev = true;
+			commonPrefix = commonWithPrev;
+			base = prev;
+		}
+		else {
+			prefixSourcePrev = false;
+			commonPrefix = commonWithNext;
+			base = next;
+		}
+
+		int deltaSize = item.writeDelta(root.delta(), *base, commonPrefix);
+		root.delta().setPrefixSource(prefixSourcePrev);
+		//printf("Serialized %s to %p\n", item.toString().c_str(), &root.delta());
+
+		// Continue writing after the serialized Delta.
+		uint8_t *wptr = (uint8_t *)&root.delta() + deltaSize;
+
+		// Serialize left child
+		if(count > 1) {
+			wptr += build(*(Node *)wptr, begin, begin + mid, prev, &item);
+			root.leftChildOffset = deltaSize;
+		}
+		else {
+			root.leftChildOffset = 0;
+		}
+
+		// Serialize right child
+		if(count > 2) {
+			root.rightChildOffset = wptr - (uint8_t *)&root.delta();
+			wptr += build(*(Node *)wptr, begin + mid + 1, end, &item, next);
+		}
+		else {
+			root.rightChildOffset = 0;
+		}
+
+		return wptr - (uint8_t *)&root;
+	}
+};
--- a/fdbserver/IPager.h
+++ b/fdbserver/IPager.h
@ -31,8 +31,7 @@

 #define debug_printf_always(...) { fprintf(stdout, "%s %f ", g_network->getLocalAddress().toString().c_str(), now()), fprintf(stdout, __VA_ARGS__); fflush(stdout); }

-template <class... T>
-void debug_printf_noop(T&&...) {}
+#define debug_printf_noop(...)

 #if REDWOOD_DEBUG
  #define debug_printf debug_printf_always
@ -42,11 +41,18 @@ void debug_printf_noop(T&&...) {}

 #define BEACON fprintf(stderr, "%s: %s line %d \n", __FUNCTION__, __FILE__, __LINE__)

+#ifndef VALGRIND
+#define VALGRIND_MAKE_MEM_UNDEFINED(x, y)
+#define VALGRIND_MAKE_MEM_DEFINED(x, y)
+#endif
+
 typedef uint32_t LogicalPageID; // uint64_t?
 static const int invalidLogicalPageID = LogicalPageID(-1);

 class IPage {
 public:
+	IPage() : userData(nullptr) {}
+
 	virtual uint8_t const* begin() const = 0;
 	virtual uint8_t* mutate() = 0;

@ -57,10 +63,17 @@ public:
 		return StringRef(begin(), size());
 	}

-	virtual ~IPage() {}
+	virtual ~IPage() {
+		if(userData != nullptr && userDataDestructor != nullptr) {
+			userDataDestructor(userData);
+		}
+	}

 	virtual void addref() const = 0;
 	virtual void delref() const = 0;
+
+	mutable void *userData;
+	mutable void (*userDataDestructor)(void *);
 };

 class IPagerSnapshot {
--- a/fdbserver/IVersionedStore.h
+++ b/fdbserver/IVersionedStore.h
@ -44,6 +44,8 @@ public:

 	virtual void addref() = 0;
 	virtual void delref() = 0;
+
+	virtual std::string toString() const = 0;
 };

 class IVersionedStore : public IClosable {
--- a/fdbserver/IndirectShadowPager.actor.cpp
+++ b/fdbserver/IndirectShadowPager.actor.cpp
@ -23,12 +23,12 @@

 #include "flow/UnitTest.h"
 #include "flow/actorcompiler.h"
+#include "fdbrpc/crc32c.h"

 struct SumType {
-	bool operator==(const SumType &rhs) const { return part1 == rhs.part1 && part2 == rhs.part2; }
-	uint32_t part1;
-	uint32_t part2;
-	std::string toString() { return format("0x%08x%08x", part1, part2); }
+	bool operator==(const SumType &rhs) const { return crc == rhs.crc; }
+	uint32_t crc;
+	std::string toString() { return format("0x%08x", crc); }
 };

 bool checksum(IAsyncFile *file, uint8_t *page, int pageSize, LogicalPageID logical, PhysicalPageID physical, bool write) {
@ -41,15 +41,17 @@ bool checksum(IAsyncFile *file, uint8_t *page, int pageSize, LogicalPageID logic
 	pageSize -= IndirectShadowPage::PAGE_OVERHEAD_BYTES;
 	SumType sum;
 	SumType *pSumInPage = (SumType *)(page + pageSize);
-
 	// Write sum directly to page or to sum variable based on mode
 	SumType *sumOut = write ? pSumInPage : &sum;
-	sumOut->part1 = physical;
-	sumOut->part2 = logical; 
-	hashlittle2(page, pageSize, &sumOut->part1, &sumOut->part2);
+	sumOut->crc = crc32c_append(logical, page, pageSize);
+	VALGRIND_MAKE_MEM_DEFINED(sumOut, sizeof(SumType));

 	debug_printf("checksum %s%s logical %d physical %d size %d checksums page %s calculated %s data at %p %s\n",
-		write ? "write" : "read", (!write && sum != *pSumInPage) ? " MISMATCH" : "", logical, physical, pageSize, write ? "NA" : pSumInPage->toString().c_str(), sumOut->toString().c_str(), page, "" /*StringRef((uint8_t *)page, pageSize).toHexString().c_str()*/);
+		write ? "write" : "read",
+		(!write && sum != *pSumInPage) ? " MISMATCH" : "",
+		logical, physical, pageSize,
+		write ? "NA" : pSumInPage->toString().c_str(),
+		sumOut->toString().c_str(), page, "");

 	// Verify if not in write mode
 	if(!write && sum != *pSumInPage) {
@ -75,10 +77,6 @@ inline void checksumWrite(IAsyncFile *file, uint8_t *page, int pageSize, Logical

 IndirectShadowPage::IndirectShadowPage() : fastAllocated(true) {
 	data = (uint8_t*)FastAllocator<4096>::allocate();
-#if VALGRIND
-	// Prevent valgrind errors caused by writing random unneeded bytes to disk.
-	memset(data, 0, size());
-#endif
 }

 IndirectShadowPage::~IndirectShadowPage() {
@ -276,7 +274,7 @@ ACTOR Future<Void> recover(IndirectShadowPager *pager) {

 ACTOR Future<Void> housekeeper(IndirectShadowPager *pager) {
 	wait(pager->recovery);
-
+	wait(Never());
 	loop {
 		state LogicalPageID pageID = 0;
 		for(; pageID < pager->pageTable.size(); ++pageID) {
--- a/fdbserver/KeyValueStoreMemory.actor.cpp
+++ b/fdbserver/KeyValueStoreMemory.actor.cpp
@ -400,7 +400,7 @@ private:

 		bool ok = count < 1e6;
 		if( !ok ) {
-			TraceEvent(/*ok ? SevInfo : */SevWarnAlways, "KVSMemCommit_queue", id)
+			TraceEvent(/*ok ? SevInfo : */SevWarnAlways, "KVSMemCommitQueue", id)
 				.detail("Bytes", total)
 				.detail("Log", log)
 				.detail("Ops", count)
@ -715,7 +715,7 @@ KeyValueStoreMemory::KeyValueStoreMemory( IDiskQueue* log, UID id, int64_t memor

 IKeyValueStore* keyValueStoreMemory( std::string const& basename, UID logID, int64_t memoryLimit, std::string ext ) {
 	TraceEvent("KVSMemOpening", logID).detail("Basename", basename).detail("MemoryLimit", memoryLimit);
-	IDiskQueue *log = openDiskQueue( basename, ext, logID, DiskQueueVersion::V0 );
+	IDiskQueue *log = openDiskQueue( basename, ext, logID, DiskQueueVersion::V1 );
 	return new KeyValueStoreMemory( log, logID, memoryLimit, false, false, false );
 }

--- a/fdbserver/KeyValueStoreSQLite.actor.cpp
+++ b/fdbserver/KeyValueStoreSQLite.actor.cpp
@ -1937,8 +1937,8 @@ KeyValueStoreSQLite::KeyValueStoreSQLite(std::string const& filename, UID id, Ke
 	readCursors.resize(64); //< number of read threads

 	sqlite3_soft_heap_limit64( SERVER_KNOBS->SOFT_HEAP_LIMIT );  // SOMEDAY: Is this a performance issue?  Should we drop the cache sizes for individual threads?
-	int taskId = g_network->getCurrentTask();
-	g_network->setCurrentTask(TaskDiskWrite);
+	TaskPriority taskId = g_network->getCurrentTask();
+	g_network->setCurrentTask(TaskPriority::DiskWrite);
 	writeThread->addThread( new Writer(filename, type==KeyValueStoreType::SSD_BTREE_V2, checkChecksums, checkIntegrity, writesComplete, springCleaningStats, diskBytesUsed, freeListPages, id, &readCursors) );
 	g_network->setCurrentTask(taskId);
 	auto p = new Writer::InitAction();
@ -1963,8 +1963,8 @@ StorageBytes KeyValueStoreSQLite::getStorageBytes() {

 void KeyValueStoreSQLite::startReadThreads() {
 	int nReadThreads = readCursors.size();
-	int taskId = g_network->getCurrentTask();
-	g_network->setCurrentTask(TaskDiskRead);
+	TaskPriority taskId = g_network->getCurrentTask();
+	g_network->setCurrentTask(TaskPriority::DiskRead);
 	for(int i=0; i<nReadThreads; i++)
 		readThreads->addThread( new Reader(filename, type==KeyValueStoreType::SSD_BTREE_V2, readsComplete, logID, &readCursors[i]) );
 	g_network->setCurrentTask(taskId);
--- a/fdbserver/Knobs.cpp
+++ b/fdbserver/Knobs.cpp
@ -68,6 +68,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
 	init( MAX_QUEUE_COMMIT_BYTES,                               15e6 ); if( randomize && BUGGIFY ) MAX_QUEUE_COMMIT_BYTES = 5000;
 	init( VERSIONS_PER_BATCH,                 VERSIONS_PER_SECOND/20 ); if( randomize && BUGGIFY ) VERSIONS_PER_BATCH = std::max<int64_t>(1,VERSIONS_PER_SECOND/1000);
 	init( CONCURRENT_LOG_ROUTER_READS,                             1 );
+	init( LOG_ROUTER_PEEK_FROM_SATELLITES_PREFERRED,               1 ); if( randomize && BUGGIFY ) LOG_ROUTER_PEEK_FROM_SATELLITES_PREFERRED = 0;
 	init( DISK_QUEUE_ADAPTER_MIN_SWITCH_TIME,                    1.0 );
 	init( DISK_QUEUE_ADAPTER_MAX_SWITCH_TIME,                    5.0 );
 	init( TLOG_SPILL_REFERENCE_MAX_PEEK_MEMORY_BYTES,            2e9 ); if ( randomize && BUGGIFY ) TLOG_SPILL_REFERENCE_MAX_PEEK_MEMORY_BYTES = 2e6;
@ -371,8 +372,8 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
 	init( TARGET_BYTES_PER_STORAGE_SERVER_BATCH,               500e6 ); if( smallStorageTarget ) TARGET_BYTES_PER_STORAGE_SERVER_BATCH = 1500e3;
 	init( SPRING_BYTES_STORAGE_SERVER_BATCH,                    50e6 ); if( smallStorageTarget ) SPRING_BYTES_STORAGE_SERVER_BATCH = 150e3;
 	init( STORAGE_HARD_LIMIT_BYTES,                           1500e6 ); if( smallStorageTarget ) STORAGE_HARD_LIMIT_BYTES = 4500e3;
-	init( STORAGE_DURABILITY_LAG_SOFT_MAX,                      20e6 ); if( smallStorageTarget ) STORAGE_DURABILITY_LAG_SOFT_MAX = 10e6;
-	init( STORAGE_DURABILITY_LAG_HARD_MAX,                     200e6 ); if( smallStorageTarget ) STORAGE_DURABILITY_LAG_HARD_MAX = 100e6;
+	init( STORAGE_DURABILITY_LAG_HARD_MAX,                    2000e6 ); if( smallStorageTarget ) STORAGE_DURABILITY_LAG_HARD_MAX = 100e6;
+	init( STORAGE_DURABILITY_LAG_SOFT_MAX,                     200e6 ); if( smallStorageTarget ) STORAGE_DURABILITY_LAG_SOFT_MAX = 10e6;

 	bool smallTlogTarget = randomize && BUGGIFY;
 	init( TARGET_BYTES_PER_TLOG,                              2400e6 ); if( smallTlogTarget ) TARGET_BYTES_PER_TLOG = 2000e3;
@ -410,6 +411,8 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
 	init( FETCH_KEYS_PARALLELISM_BYTES,                          4e6 ); if( randomize && BUGGIFY ) FETCH_KEYS_PARALLELISM_BYTES = 3e6;
 	init( BUGGIFY_BLOCK_BYTES,                                 10000 );
 	init( STORAGE_COMMIT_BYTES,                             10000000 ); if( randomize && BUGGIFY ) STORAGE_COMMIT_BYTES = 2000000;
+	init( STORAGE_DURABILITY_LAG_REJECT_THRESHOLD,              0.25 );
+	init( STORAGE_DURABILITY_LAG_MIN_RATE,                       0.1 );
 	init( STORAGE_COMMIT_INTERVAL,                               0.5 ); if( randomize && BUGGIFY ) STORAGE_COMMIT_INTERVAL = 2.0;
 	init( UPDATE_SHARD_VERSION_INTERVAL,                        0.25 ); if( randomize && BUGGIFY ) UPDATE_SHARD_VERSION_INTERVAL = 1.0;
 	init( BYTE_SAMPLING_FACTOR,                                  250 ); //cannot buggify because of differences in restarting tests
@ -419,7 +422,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
 	init( LONG_BYTE_SAMPLE_RECOVERY_DELAY,                      60.0 );
 	init( BYTE_SAMPLE_LOAD_PARALLELISM,                            8 ); if( randomize && BUGGIFY ) BYTE_SAMPLE_LOAD_PARALLELISM = 1;
 	init( BYTE_SAMPLE_LOAD_DELAY,                                0.0 ); if( randomize && BUGGIFY ) BYTE_SAMPLE_LOAD_DELAY = 0.1;
-	init( BYTE_SAMPLE_START_DELAY,                               1.0 ); if( randomize && BUGGIFY ) BYTE_SAMPLE_LOAD_DELAY = 0.0;
+	init( BYTE_SAMPLE_START_DELAY,                               1.0 ); if( randomize && BUGGIFY ) BYTE_SAMPLE_START_DELAY = 0.0;
 	init( UPDATE_STORAGE_PROCESS_STATS_INTERVAL,                 5.0 );

 	//Wait Failure
--- a/fdbserver/Knobs.h
+++ b/fdbserver/Knobs.h
@ -72,6 +72,7 @@ public:
 	int64_t MAX_QUEUE_COMMIT_BYTES;
 	int64_t VERSIONS_PER_BATCH;
 	int CONCURRENT_LOG_ROUTER_READS;
+	int LOG_ROUTER_PEEK_FROM_SATELLITES_PREFERRED; // 0==peek from primary, non-zero==peek from satellites
 	double DISK_QUEUE_ADAPTER_MIN_SWITCH_TIME;
 	double DISK_QUEUE_ADAPTER_MAX_SWITCH_TIME;
 	int64_t TLOG_SPILL_REFERENCE_MAX_PEEK_MEMORY_BYTES;
@ -346,8 +347,10 @@ public:
 	int FETCH_KEYS_PARALLELISM_BYTES;
 	int BUGGIFY_BLOCK_BYTES;
 	int64_t STORAGE_HARD_LIMIT_BYTES;
-	int64_t STORAGE_DURABILITY_LAG_SOFT_MAX;
 	int64_t STORAGE_DURABILITY_LAG_HARD_MAX;
+	int64_t STORAGE_DURABILITY_LAG_SOFT_MAX;
+	double STORAGE_DURABILITY_LAG_REJECT_THRESHOLD;
+	double STORAGE_DURABILITY_LAG_MIN_RATE;
 	int STORAGE_COMMIT_BYTES;
 	double STORAGE_COMMIT_INTERVAL;
 	double UPDATE_SHARD_VERSION_INTERVAL;
--- a/fdbserver/LeaderElection.actor.cpp
+++ b/fdbserver/LeaderElection.actor.cpp
@ -30,7 +30,7 @@ Optional<std::pair<LeaderInfo, bool>> getLeader( const vector<Optional<LeaderInf
 ACTOR Future<Void> submitCandidacy( Key key, LeaderElectionRegInterface coord, LeaderInfo myInfo, UID prevChangeID, Reference<AsyncVar<vector<Optional<LeaderInfo>>>> nominees, int index ) {
 	loop {
 		auto const& nom = nominees->get()[index];
-		Optional<LeaderInfo> li = wait( retryBrokenPromise( coord.candidacy, CandidacyRequest( key, myInfo, nom.present() ? nom.get().changeID : UID(), prevChangeID ), TaskCoordinationReply ) );
+		Optional<LeaderInfo> li = wait( retryBrokenPromise( coord.candidacy, CandidacyRequest( key, myInfo, nom.present() ? nom.get().changeID : UID(), prevChangeID ), TaskPriority::CoordinationReply ) );

 		if (li != nominees->get()[index]) {
 			vector<Optional<LeaderInfo>> v = nominees->get();
@ -150,7 +150,7 @@ ACTOR Future<Void> tryBecomeLeaderInternal(ServerCoordinators coordinators, Valu
 			// we might be breaking the leader election process for someone with better communications but lower ID, so change IDs.
 			if ((!leader.present() || !leader.get().second) && std::count( nominees->get().begin(), nominees->get().end(), myInfo )) {
 				if (!badCandidateTimeout.isValid())
-					badCandidateTimeout = delay( SERVER_KNOBS->POLLING_FREQUENCY*2, TaskCoordinationReply );
+					badCandidateTimeout = delay( SERVER_KNOBS->POLLING_FREQUENCY*2, TaskPriority::CoordinationReply );
 			} else
 				badCandidateTimeout = Future<Void>();

@ -183,12 +183,12 @@ ACTOR Future<Void> tryBecomeLeaderInternal(ServerCoordinators coordinators, Valu
 		state vector<Future<Void>> true_heartbeats;
 		state vector<Future<Void>> false_heartbeats;
 		for(int i=0; i<coordinators.leaderElectionServers.size(); i++) {
-			Future<bool> hb = retryBrokenPromise( coordinators.leaderElectionServers[i].leaderHeartbeat, LeaderHeartbeatRequest( coordinators.clusterKey, myInfo, prevChangeID ), TaskCoordinationReply );
+			Future<bool> hb = retryBrokenPromise( coordinators.leaderElectionServers[i].leaderHeartbeat, LeaderHeartbeatRequest( coordinators.clusterKey, myInfo, prevChangeID ), TaskPriority::CoordinationReply );
 			true_heartbeats.push_back( onEqual(hb, true) );
 			false_heartbeats.push_back( onEqual(hb, false) );
 		}

-		state Future<Void> rate = delay( SERVER_KNOBS->HEARTBEAT_FREQUENCY, TaskCoordinationReply ) || asyncPriorityInfo->onChange(); // SOMEDAY: Move to server side?
+		state Future<Void> rate = delay( SERVER_KNOBS->HEARTBEAT_FREQUENCY, TaskPriority::CoordinationReply ) || asyncPriorityInfo->onChange(); // SOMEDAY: Move to server side?

 		choose {
 			when ( wait( quorum( true_heartbeats, true_heartbeats.size()/2+1 ) ) ) {
--- a/fdbserver/LogRouter.actor.cpp
+++ b/fdbserver/LogRouter.actor.cpp
@ -51,7 +51,7 @@ struct LogRouterData {
 		}

 		// Erase messages not needed to update *from* versions >= before (thus, messages with toversion <= before)
-		ACTOR Future<Void> eraseMessagesBefore( TagData *self, Version before, LogRouterData *tlogData, int taskID ) {
+		ACTOR Future<Void> eraseMessagesBefore( TagData *self, Version before, LogRouterData *tlogData, TaskPriority taskID ) {
 			while(!self->version_messages.empty() && self->version_messages.front().first < before) {
 				Version version = self->version_messages.front().first;
 				int64_t messagesErased = 0;
@ -68,7 +68,7 @@ struct LogRouterData {
 			return Void();
 		}

-		Future<Void> eraseMessagesBefore(Version before, LogRouterData *tlogData, int taskID) {
+		Future<Void> eraseMessagesBefore(Version before, LogRouterData *tlogData, TaskPriority taskID) {
 			return eraseMessagesBefore(this, before, tlogData, taskID);
 		}
 	};
@ -197,7 +197,7 @@ ACTOR Future<Void> waitForVersion( LogRouterData *self, Version ver ) {
 		while(self->minPopped.get() + SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS < ver) {
 			if(self->minPopped.get() + SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS > self->version.get()) {
 				self->version.set( self->minPopped.get() + SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS );
-				wait(yield(TaskTLogCommit));
+				wait(yield(TaskPriority::TLogCommit));
 			} else {
 				wait(self->minPopped.whenAtLeast((self->minPopped.get()+1)));
 			}
@ -220,7 +220,7 @@ ACTOR Future<Void> pullAsyncData( LogRouterData *self ) {
 	loop {
 		loop {
 			choose {
-				when(wait( r ? r->getMore(TaskTLogCommit) : Never() ) ) {
+				when(wait( r ? r->getMore(TaskPriority::TLogCommit) : Never() ) ) {
 					break;
 				}
 				when( wait( dbInfoChange ) ) { //FIXME: does this actually happen?
@ -247,7 +247,7 @@ ACTOR Future<Void> pullAsyncData( LogRouterData *self ) {

 					commitMessages(self, ver, messages);
 					self->version.set( ver );
-					wait(yield(TaskTLogCommit));
+					wait(yield(TaskPriority::TLogCommit));
 					//TraceEvent("LogRouterVersion").detail("Ver",ver);
 				}
 				lastVer = ver;
@ -260,7 +260,7 @@ ACTOR Future<Void> pullAsyncData( LogRouterData *self ) {
 						wait( waitForVersion(self, ver) );

 						self->version.set( ver );
-						wait(yield(TaskTLogCommit));
+						wait(yield(TaskPriority::TLogCommit));
 					}
 					break;
 				}
@ -371,7 +371,7 @@ ACTOR Future<Void> logRouterPop( LogRouterData* self, TLogPopRequest req ) {
 	} else if (req.to > tagData->popped) {
 		tagData->popped = req.to;
 		tagData->durableKnownCommittedVersion = req.durableKnownCommittedVersion;
-		wait(tagData->eraseMessagesBefore( req.to, self, TaskTLogPop ));
+		wait(tagData->eraseMessagesBefore( req.to, self, TaskPriority::TLogPop ));
 	}

 	state Version minPopped = std::numeric_limits<Version>::max();
@ -385,7 +385,7 @@ ACTOR Future<Void> logRouterPop( LogRouterData* self, TLogPopRequest req ) {

 	while(!self->messageBlocks.empty() && self->messageBlocks.front().first < minPopped) {
 		self->messageBlocks.pop_front();
-		wait(yield(TaskTLogPop));
+		wait(yield(TaskPriority::TLogPop));
 	}

 	self->poppedVersion = std::min(minKnownCommittedVersion, self->minKnownCommittedVersion);
--- a/fdbserver/LogSystem.h
+++ b/fdbserver/LogSystem.h
@ -341,7 +341,7 @@ struct ILogSystem {

 		//returns immediately if hasMessage() returns true.
 		//returns when either the result of hasMessage() or version() has changed, or a cursor has internally been exhausted.
-		virtual Future<Void> getMore(int taskID = TaskTLogPeekReply) = 0;
+		virtual Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply) = 0;

 		//returns when the failure monitor detects that the servers associated with the cursor are failed
 		virtual Future<Void> onFailed() = 0;
@ -407,7 +407,7 @@ struct ILogSystem {
 		virtual StringRef getMessageWithTags();
 		virtual const std::vector<Tag>& getTags();
 		virtual void advanceTo(LogMessageVersion n);
-		virtual Future<Void> getMore(int taskID = TaskTLogPeekReply);
+		virtual Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply);
 		virtual Future<Void> onFailed();
 		virtual bool isActive();
 		virtual bool isExhausted();
@ -455,7 +455,7 @@ struct ILogSystem {
 		virtual StringRef getMessageWithTags();
 		virtual const std::vector<Tag>& getTags();
 		virtual void advanceTo(LogMessageVersion n);
-		virtual Future<Void> getMore(int taskID = TaskTLogPeekReply);
+		virtual Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply);
 		virtual Future<Void> onFailed();
 		virtual bool isActive();
 		virtual bool isExhausted();
@ -500,7 +500,7 @@ struct ILogSystem {
 		virtual StringRef getMessageWithTags();
 		virtual const std::vector<Tag>& getTags();
 		virtual void advanceTo(LogMessageVersion n);
-		virtual Future<Void> getMore(int taskID = TaskTLogPeekReply);
+		virtual Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply);
 		virtual Future<Void> onFailed();
 		virtual bool isActive();
 		virtual bool isExhausted();
@ -534,7 +534,7 @@ struct ILogSystem {
 		virtual StringRef getMessageWithTags();
 		virtual const std::vector<Tag>& getTags();
 		virtual void advanceTo(LogMessageVersion n);
-		virtual Future<Void> getMore(int taskID = TaskTLogPeekReply);
+		virtual Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply);
 		virtual Future<Void> onFailed();
 		virtual bool isActive();
 		virtual bool isExhausted();
@ -594,7 +594,7 @@ struct ILogSystem {
 		virtual StringRef getMessageWithTags();
 		virtual const std::vector<Tag>& getTags();
 		virtual void advanceTo(LogMessageVersion n);
-		virtual Future<Void> getMore(int taskID = TaskTLogPeekReply);
+		virtual Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply);
 		virtual Future<Void> onFailed();
 		virtual bool isActive();
 		virtual bool isExhausted();
--- a/fdbserver/LogSystemPeekCursor.actor.cpp
+++ b/fdbserver/LogSystemPeekCursor.actor.cpp
@ -133,7 +133,7 @@ void ILogSystem::ServerPeekCursor::advanceTo(LogMessageVersion n) {
 	}
 }

-ACTOR Future<Void> serverPeekParallelGetMore( ILogSystem::ServerPeekCursor* self, int taskID ) {
+ACTOR Future<Void> serverPeekParallelGetMore( ILogSystem::ServerPeekCursor* self, TaskPriority taskID ) {
 	if( !self->interf || self->messageVersion >= self->end ) {
 		wait( Future<Void>(Never()));
 		throw internal_error();
@ -198,7 +198,7 @@ ACTOR Future<Void> serverPeekParallelGetMore( ILogSystem::ServerPeekCursor* self
 	}
 }

-ACTOR Future<Void> serverPeekGetMore( ILogSystem::ServerPeekCursor* self, int taskID ) {
+ACTOR Future<Void> serverPeekGetMore( ILogSystem::ServerPeekCursor* self, TaskPriority taskID ) {
 	if( !self->interf || self->messageVersion >= self->end ) {
 		wait( Future<Void>(Never()));
 		throw internal_error();
@ -234,7 +234,7 @@ ACTOR Future<Void> serverPeekGetMore( ILogSystem::ServerPeekCursor* self, int ta
 	}
 }

-Future<Void> ILogSystem::ServerPeekCursor::getMore(int taskID) {
+Future<Void> ILogSystem::ServerPeekCursor::getMore(TaskPriority taskID) {
 	//TraceEvent("SPC_GetMore", randomID).detail("HasMessage", hasMessage()).detail("More", !more.isValid() || more.isReady()).detail("MessageVersion", messageVersion.toString()).detail("End", end.toString());
 	if( hasMessage() )
 		return Void();
@ -444,7 +444,7 @@ void ILogSystem::MergedPeekCursor::advanceTo(LogMessageVersion n) {
 	}
 }

-ACTOR Future<Void> mergedPeekGetMore(ILogSystem::MergedPeekCursor* self, LogMessageVersion startVersion, int taskID) {
+ACTOR Future<Void> mergedPeekGetMore(ILogSystem::MergedPeekCursor* self, LogMessageVersion startVersion, TaskPriority taskID) {
 	loop {
 		//TraceEvent("MPC_GetMoreA", self->randomID).detail("Start", startVersion.toString());
 		if(self->bestServer >= 0 && self->serverCursors[self->bestServer]->isActive()) {
@ -465,7 +465,7 @@ ACTOR Future<Void> mergedPeekGetMore(ILogSystem::MergedPeekCursor* self, LogMess
 	}
 }

-Future<Void> ILogSystem::MergedPeekCursor::getMore(int taskID) {
+Future<Void> ILogSystem::MergedPeekCursor::getMore(TaskPriority taskID) {
 	if(!serverCursors.size())
 		return Never();
 	
@ -705,7 +705,7 @@ void ILogSystem::SetPeekCursor::advanceTo(LogMessageVersion n) {
 	}
 }

-ACTOR Future<Void> setPeekGetMore(ILogSystem::SetPeekCursor* self, LogMessageVersion startVersion, int taskID) {
+ACTOR Future<Void> setPeekGetMore(ILogSystem::SetPeekCursor* self, LogMessageVersion startVersion, TaskPriority taskID) {
 	loop {
 		//TraceEvent("LPC_GetMore1", self->randomID).detail("Start", startVersion.toString()).detail("Tag", self->tag);
 		if(self->bestServer >= 0 && self->bestSet >= 0 && self->serverCursors[self->bestSet][self->bestServer]->isActive()) {
@ -766,7 +766,7 @@ ACTOR Future<Void> setPeekGetMore(ILogSystem::SetPeekCursor* self, LogMessageVer
 	}
 }

-Future<Void> ILogSystem::SetPeekCursor::getMore(int taskID) {
+Future<Void> ILogSystem::SetPeekCursor::getMore(TaskPriority taskID) {
 	auto startVersion = version();
 	calcHasMessage();
 	if( hasMessage() )
@ -861,7 +861,7 @@ void ILogSystem::MultiCursor::advanceTo(LogMessageVersion n) {
 	cursors.back()->advanceTo(n);
 }

-Future<Void> ILogSystem::MultiCursor::getMore(int taskID) {
+Future<Void> ILogSystem::MultiCursor::getMore(TaskPriority taskID) {
 	LogMessageVersion startVersion = cursors.back()->version();
 	while( cursors.size() > 1 && cursors.back()->version() >= epochEnds.back() ) {
 		poppedVersion = std::max(poppedVersion, cursors.back()->popped());
@ -977,7 +977,7 @@ void ILogSystem::BufferedCursor::advanceTo(LogMessageVersion n) {
 	ASSERT(false);
 }

-ACTOR Future<Void> bufferedGetMoreLoader( ILogSystem::BufferedCursor* self, Reference<ILogSystem::IPeekCursor> cursor, Version maxVersion, int taskID ) {
+ACTOR Future<Void> bufferedGetMoreLoader( ILogSystem::BufferedCursor* self, Reference<ILogSystem::IPeekCursor> cursor, Version maxVersion, TaskPriority taskID ) {
 	loop {
 		wait(yield());
 		if(cursor->version().version >= maxVersion) {
@ -994,7 +994,7 @@ ACTOR Future<Void> bufferedGetMoreLoader( ILogSystem::BufferedCursor* self, Refe
 	}
 }

-ACTOR Future<Void> bufferedGetMore( ILogSystem::BufferedCursor* self, int taskID ) {
+ACTOR Future<Void> bufferedGetMore( ILogSystem::BufferedCursor* self, TaskPriority taskID ) {
 	if( self->messageVersion.version >= self->end ) {
 		wait( Future<Void>(Never()));
 		throw internal_error();
@ -1028,7 +1028,7 @@ ACTOR Future<Void> bufferedGetMore( ILogSystem::BufferedCursor* self, int taskID
 	return Void();
 }

-Future<Void> ILogSystem::BufferedCursor::getMore(int taskID) {
+Future<Void> ILogSystem::BufferedCursor::getMore(TaskPriority taskID) {
 	if( hasMessage() )
 		return Void();
 	return bufferedGetMore(this, taskID);
--- a/fdbserver/MasterInterface.h
+++ b/fdbserver/MasterInterface.h
@ -50,7 +50,7 @@ struct MasterInterface {
 	}

 	void initEndpoints() {
-		getCommitVersion.getEndpoint( TaskProxyGetConsistentReadVersion );
+		getCommitVersion.getEndpoint( TaskPriority::ProxyGetConsistentReadVersion );
 	}
 };

--- a/fdbserver/MasterProxyServer.actor.cpp
+++ b/fdbserver/MasterProxyServer.actor.cpp
@ -95,11 +95,11 @@ ACTOR Future<Void> getRate(UID myID, Reference<AsyncVar<ServerDBInfo>> db, int64
 	loop choose {
 		when ( wait( db->onChange() ) ) {
 			if ( db->get().ratekeeper.present() ) {
-				TraceEvent("Proxy_RatekeeperChanged", myID)
+				TraceEvent("ProxyRatekeeperChanged", myID)
 				.detail("RKID", db->get().ratekeeper.get().id());
 				nextRequestTimer = Void();  // trigger GetRate request
 			} else {
-				TraceEvent("Proxy_RatekeeperDied", myID);
+				TraceEvent("ProxyRatekeeperDied", myID);
 				nextRequestTimer = Never();
 				reply = Never();
 			}
@ -158,7 +158,7 @@ ACTOR Future<Void> queueTransactionStartRequests(
 				if (now() - *lastGRVTime > *GRVBatchTime)
 					*lastGRVTime = now() - *GRVBatchTime;

-				forwardPromise(GRVTimer, delayJittered(*GRVBatchTime - (now() - *lastGRVTime), TaskProxyGRVTimer));
+				forwardPromise(GRVTimer, delayJittered(*GRVBatchTime - (now() - *lastGRVTime), TaskPriority::ProxyGRVTimer));
 			}

 			transactionQueue->push(std::make_pair(req, counter--));
@ -263,7 +263,7 @@ struct ProxyCommitData {
 			lastVersionTime(0), commitVersionRequestNumber(1), mostRecentProcessedRequestNumber(0),
 			getConsistentReadVersion(getConsistentReadVersion), commit(commit), lastCoalesceTime(0),
 			localCommitBatchesStarted(0), locked(false), commitBatchInterval(SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_INTERVAL_MIN),
-			firstProxy(firstProxy), cx(openDBOnServer(db, TaskDefaultEndpoint, true, true)), db(db),
+			firstProxy(firstProxy), cx(openDBOnServer(db, TaskPriority::DefaultEndpoint, true, true)), db(db),
 			singleKeyMutationEvent(LiteralStringRef("SingleKeyMutation")), commitBatchesMemBytesCount(0), lastTxsPop(0)
 	{}
 };
@ -350,7 +350,7 @@ struct ResolutionRequestBuilder {
 };

 ACTOR Future<Void> commitBatcher(ProxyCommitData *commitData, PromiseStream<std::pair<std::vector<CommitTransactionRequest>, int> > out, FutureStream<CommitTransactionRequest> in, int desiredBytes, int64_t memBytesLimit) {
-	wait(delayJittered(commitData->commitBatchInterval, TaskProxyCommitBatcher));  
+	wait(delayJittered(commitData->commitBatchInterval, TaskPriority::ProxyCommitBatcher));  

 	state double lastBatch = 0;

@ -363,7 +363,7 @@ ACTOR Future<Void> commitBatcher(ProxyCommitData *commitData, PromiseStream<std:
 			timeout = Never();
 		}
 		else {
-			timeout = delayJittered(SERVER_KNOBS->MAX_COMMIT_BATCH_INTERVAL, TaskProxyCommitBatcher);
+			timeout = delayJittered(SERVER_KNOBS->MAX_COMMIT_BATCH_INTERVAL, TaskPriority::ProxyCommitBatcher);
 		}

 		while(!timeout.isReady() && !(batch.size() == SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_COUNT_MAX || batchBytes >= desiredBytes)) {
@ -387,10 +387,10 @@ ACTOR Future<Void> commitBatcher(ProxyCommitData *commitData, PromiseStream<std:
 					if(!batch.size()) {
 						commitData->commitBatchStartNotifications.send(Void());
 						if(now() - lastBatch > commitData->commitBatchInterval) {
-							timeout = delayJittered(SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_INTERVAL_FROM_IDLE, TaskProxyCommitBatcher);
+							timeout = delayJittered(SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_INTERVAL_FROM_IDLE, TaskPriority::ProxyCommitBatcher);
 						}
 						else {
-							timeout = delayJittered(commitData->commitBatchInterval - (now() - lastBatch), TaskProxyCommitBatcher);
+							timeout = delayJittered(commitData->commitBatchInterval - (now() - lastBatch), TaskPriority::ProxyCommitBatcher);
 						}
 					}

@ -398,7 +398,7 @@ ACTOR Future<Void> commitBatcher(ProxyCommitData *commitData, PromiseStream<std:
 						out.send({ batch, batchBytes });
 						lastBatch = now();
 						commitData->commitBatchStartNotifications.send(Void());
-						timeout = delayJittered(commitData->commitBatchInterval, TaskProxyCommitBatcher);
+						timeout = delayJittered(commitData->commitBatchInterval, TaskPriority::ProxyCommitBatcher);
 						batch = std::vector<CommitTransactionRequest>();
 						batchBytes = 0;
 					}
@ -457,7 +457,7 @@ ACTOR Future<Void> commitBatch(
 	ASSERT(SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS <= SERVER_KNOBS->MAX_VERSIONS_IN_FLIGHT);  // since we are using just the former to limit the number of versions actually in flight!

 	// Active load balancing runs at a very high priority (to obtain accurate estimate of memory used by commit batches) so we need to downgrade here
-	wait(delay(0, TaskProxyCommit));
+	wait(delay(0, TaskPriority::ProxyCommit));

 	self->lastVersionTime = t1;

@ -534,7 +534,7 @@ ACTOR Future<Void> commitBatch(
 	vector< Future<ResolveTransactionBatchReply> > replies;
 	for (int r = 0; r<self->resolvers.size(); r++) {
 		requests.requests[r].debugID = debugID;
-		replies.push_back(brokenPromiseToNever(self->resolvers[r].resolve.getReply(requests.requests[r], TaskProxyResolverReply)));
+		replies.push_back(brokenPromiseToNever(self->resolvers[r].resolve.getReply(requests.requests[r], TaskPriority::ProxyResolverReply)));
 	}

 	state vector<vector<int>> transactionResolverMap = std::move( requests.transactionResolverMap );
@ -1135,7 +1135,7 @@ ACTOR Future<GetReadVersionReply> getLiveCommittedVersion(ProxyCommitData* commi

 	state vector<Future<GetReadVersionReply>> proxyVersions;
 	for (auto const& p : *otherProxies)
-		proxyVersions.push_back(brokenPromiseToNever(p.getRawCommittedVersion.getReply(GetRawCommittedVersionRequest(debugID), TaskTLogConfirmRunningReply)));
+		proxyVersions.push_back(brokenPromiseToNever(p.getRawCommittedVersion.getReply(GetRawCommittedVersionRequest(debugID), TaskPriority::TLogConfirmRunningReply)));

 	if (!(flags&GetReadVersionRequest::FLAG_CAUSAL_READ_RISKY))
 	{
@ -1292,7 +1292,7 @@ ACTOR static Future<Void> transactionStarter(
 		}

 		if (!transactionQueue.empty())
-			forwardPromise(GRVTimer, delayJittered(SERVER_KNOBS->START_TRANSACTION_BATCH_QUEUE_CHECK_INTERVAL, TaskProxyGRVTimer));
+			forwardPromise(GRVTimer, delayJittered(SERVER_KNOBS->START_TRANSACTION_BATCH_QUEUE_CHECK_INTERVAL, TaskPriority::ProxyGRVTimer));

 		/*TraceEvent("GRVBatch", proxy.id())
 		.detail("Elapsed", elapsed)
--- a/fdbserver/MoveKeys.actor.cpp
+++ b/fdbserver/MoveKeys.actor.cpp
@ -130,12 +130,12 @@ ACTOR Future<vector<UID>> addReadWriteDestinations(KeyRangeRef shard, vector<Sto

 	state vector< Future<Optional<UID>> > srcChecks;
 	for(int s=0; s<srcInterfs.size(); s++) {
-		srcChecks.push_back( checkReadWrite( srcInterfs[s].getShardState.getReplyUnlessFailedFor( GetShardStateRequest( shard, GetShardStateRequest::NO_WAIT), SERVER_KNOBS->SERVER_READY_QUORUM_INTERVAL, 0, TaskMoveKeys ), srcInterfs[s].id(), 0 ) );
+		srcChecks.push_back( checkReadWrite( srcInterfs[s].getShardState.getReplyUnlessFailedFor( GetShardStateRequest( shard, GetShardStateRequest::NO_WAIT), SERVER_KNOBS->SERVER_READY_QUORUM_INTERVAL, 0, TaskPriority::MoveKeys ), srcInterfs[s].id(), 0 ) );
 	}

 	state vector< Future<Optional<UID>> > destChecks;
 	for(int s=0; s<destInterfs.size(); s++) {
-		destChecks.push_back( checkReadWrite( destInterfs[s].getShardState.getReplyUnlessFailedFor( GetShardStateRequest( shard, GetShardStateRequest::NO_WAIT), SERVER_KNOBS->SERVER_READY_QUORUM_INTERVAL, 0, TaskMoveKeys ), destInterfs[s].id(), version ) );
+		destChecks.push_back( checkReadWrite( destInterfs[s].getShardState.getReplyUnlessFailedFor( GetShardStateRequest( shard, GetShardStateRequest::NO_WAIT), SERVER_KNOBS->SERVER_READY_QUORUM_INTERVAL, 0, TaskPriority::MoveKeys ), destInterfs[s].id(), version ) );
 	}

 	wait( waitForAll(srcChecks) && waitForAll(destChecks) );
@ -225,7 +225,7 @@ ACTOR Future<Void> startMoveKeys( Database occ, KeyRange keys, vector<UID> serve
 	state TraceInterval interval("RelocateShard_StartMoveKeys");
 	//state TraceInterval waitInterval("");

-	wait( startMoveKeysLock->take( TaskDataDistributionLaunch ) );
+	wait( startMoveKeysLock->take( TaskPriority::DataDistributionLaunch ) );
 	state FlowLock::Releaser releaser( *startMoveKeysLock );

 	TraceEvent(SevDebug, interval.begin(), relocationIntervalId);
@ -255,7 +255,7 @@ ACTOR Future<Void> startMoveKeys( Database occ, KeyRange keys, vector<UID> serve
 					//Keep track of shards for all src servers so that we can preserve their values in serverKeys
 					state Map<UID, VectorRef<KeyRangeRef>> shardMap;

-					tr.info.taskID = TaskMoveKeys;
+					tr.info.taskID = TaskPriority::MoveKeys;
 					tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);

 					wait( checkMoveKeysLock(&tr, lock) );
@ -394,11 +394,11 @@ ACTOR Future<Void> startMoveKeys( Database occ, KeyRange keys, vector<UID> serve
 ACTOR Future<Void> waitForShardReady( StorageServerInterface server, KeyRange keys, Version minVersion, GetShardStateRequest::waitMode mode ) {
 	loop {
 		try {
-			std::pair<Version,Version> rep = wait( server.getShardState.getReply( GetShardStateRequest(keys, mode), TaskMoveKeys ) );
+			std::pair<Version,Version> rep = wait( server.getShardState.getReply( GetShardStateRequest(keys, mode), TaskPriority::MoveKeys ) );
 			if (rep.first >= minVersion) {
 				return Void();
 			}
-			wait( delayJittered( SERVER_KNOBS->SHARD_READY_DELAY, TaskMoveKeys ) );
+			wait( delayJittered( SERVER_KNOBS->SHARD_READY_DELAY, TaskPriority::MoveKeys ) );
 		}
 		catch (Error& e) {
 			if( e.code() != error_code_timed_out ) {
@ -419,7 +419,7 @@ ACTOR Future<Void> checkFetchingState( Database cx, vector<UID> dest, KeyRange k
 		try {
 			if (BUGGIFY) wait(delay(5));

-			tr.info.taskID = TaskMoveKeys;
+			tr.info.taskID = TaskPriority::MoveKeys;
 			tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);

 			vector< Future< Optional<Value> > > serverListEntries;
@ -439,7 +439,7 @@ ACTOR Future<Void> checkFetchingState( Database cx, vector<UID> dest, KeyRange k
 			}

 			wait( timeoutError( waitForAll( requests ),
-					SERVER_KNOBS->SERVER_READY_QUORUM_TIMEOUT, TaskMoveKeys ) );
+					SERVER_KNOBS->SERVER_READY_QUORUM_TIMEOUT, TaskPriority::MoveKeys ) );

 			dataMovementComplete.send(Void());
 			return Void();
@ -480,11 +480,11 @@ ACTOR Future<Void> finishMoveKeys( Database occ, KeyRange keys, vector<UID> dest
 			//printf("finishMoveKeys( '%s'-'%s' )\n", keys.begin.toString().c_str(), keys.end.toString().c_str());
 			loop {
 				try {
-					tr.info.taskID = TaskMoveKeys;
+					tr.info.taskID = TaskPriority::MoveKeys;
 					tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);

 					releaser.release();
-					wait( finishMoveKeysParallelismLock->take( TaskDataDistributionLaunch ) );
+					wait( finishMoveKeysParallelismLock->take( TaskPriority::DataDistributionLaunch ) );
 					releaser = FlowLock::Releaser( *finishMoveKeysParallelismLock );

 					wait( checkMoveKeysLock(&tr, lock) );
@ -632,7 +632,7 @@ ACTOR Future<Void> finishMoveKeys( Database occ, KeyRange keys, vector<UID> dest

 					for(int s=0; s<storageServerInterfaces.size(); s++)
 						serverReady.push_back( waitForShardReady( storageServerInterfaces[s], keys, tr.getReadVersion().get(), GetShardStateRequest::READABLE) );
-					wait( timeout( waitForAll( serverReady ), SERVER_KNOBS->SERVER_READY_QUORUM_TIMEOUT, Void(), TaskMoveKeys ) );
+					wait( timeout( waitForAll( serverReady ), SERVER_KNOBS->SERVER_READY_QUORUM_TIMEOUT, Void(), TaskPriority::MoveKeys ) );
 					int count = dest.size() - newDestinations.size();
 					for(int s=0; s<serverReady.size(); s++)
 						count += serverReady[s].isReady() && !serverReady[s].isError();
@ -808,7 +808,7 @@ ACTOR Future<Void> removeStorageServer( Database cx, UID serverID, MoveKeysLock
 			if (!canRemove) {
 				TEST(true); // The caller had a transaction in flight that assigned keys to the server.  Wait for it to reverse its mistake.
 				TraceEvent(SevWarn,"NoCanRemove").detail("Count", noCanRemoveCount++).detail("ServerID", serverID);
-				wait( delayJittered(SERVER_KNOBS->REMOVE_RETRY_DELAY, TaskDataDistributionLaunch) );
+				wait( delayJittered(SERVER_KNOBS->REMOVE_RETRY_DELAY, TaskPriority::DataDistributionLaunch) );
 				tr.reset();
 				TraceEvent("RemoveStorageServerRetrying").detail("CanRemove", canRemove);
 			} else {
--- a/fdbserver/OldTLogServer_4_6.actor.cpp
+++ b/fdbserver/OldTLogServer_4_6.actor.cpp
@ -333,7 +333,7 @@ namespace oldTLog_4_6 {
 			}

 			// Erase messages not needed to update *from* versions >= before (thus, messages with toversion <= before)
-			ACTOR Future<Void> eraseMessagesBefore( TagData *self, Version before, int64_t* gBytesErased, Reference<LogData> tlogData, int taskID ) {
+			ACTOR Future<Void> eraseMessagesBefore( TagData *self, Version before, int64_t* gBytesErased, Reference<LogData> tlogData, TaskPriority taskID ) {
 				while(!self->version_messages.empty() && self->version_messages.front().first < before) {
 					Version version = self->version_messages.front().first;
 					std::pair<int, int> &sizes = tlogData->version_sizes[version];
@ -359,7 +359,7 @@ namespace oldTLog_4_6 {
 				return Void();
 			}

-			Future<Void> eraseMessagesBefore(Version before, int64_t* gBytesErased, Reference<LogData> tlogData, int taskID) {
+			Future<Void> eraseMessagesBefore(Version before, int64_t* gBytesErased, Reference<LogData> tlogData, TaskPriority taskID) {
 				return eraseMessagesBefore(this, before, gBytesErased, tlogData, taskID);
 			}
 		};
@ -526,21 +526,21 @@ namespace oldTLog_4_6 {

 				self->persistentData->set( KeyValueRef( persistTagMessagesKey( logData->logId, tag->key, currentVersion ), wr.toValue() ) );

-				Future<Void> f = yield(TaskUpdateStorage);
+				Future<Void> f = yield(TaskPriority::UpdateStorage);
 				if(!f.isReady()) {
 					wait(f);
 					msg = std::upper_bound(tag->value.version_messages.begin(), tag->value.version_messages.end(), std::make_pair(currentVersion, LengthPrefixedStringRef()), CompareFirst<std::pair<Version, LengthPrefixedStringRef>>());
 				}
 			}

-			wait(yield(TaskUpdateStorage));
+			wait(yield(TaskPriority::UpdateStorage));
 		}

 		self->persistentData->set( KeyValueRef( BinaryWriter::toValue(logData->logId,Unversioned()).withPrefix(persistCurrentVersionKeys.begin), BinaryWriter::toValue(newPersistentDataVersion, Unversioned()) ) );
 		logData->persistentDataVersion = newPersistentDataVersion;

 		wait( self->persistentData->commit() ); // SOMEDAY: This seems to be running pretty often, should we slow it down???
-		wait( delay(0, TaskUpdateStorage) );
+		wait( delay(0, TaskPriority::UpdateStorage) );

 		// Now that the changes we made to persistentData are durable, erase the data we moved from memory and the queue, increase bytesDurable accordingly, and update persistentDataDurableVersion.

@ -548,20 +548,20 @@ namespace oldTLog_4_6 {
 		logData->persistentDataDurableVersion = newPersistentDataVersion;

 		for(tag = logData->tag_data.begin(); tag != logData->tag_data.end(); ++tag) {
-			wait(tag->value.eraseMessagesBefore( newPersistentDataVersion+1, &self->bytesDurable, logData, TaskUpdateStorage ));
-			wait(yield(TaskUpdateStorage));
+			wait(tag->value.eraseMessagesBefore( newPersistentDataVersion+1, &self->bytesDurable, logData, TaskPriority::UpdateStorage ));
+			wait(yield(TaskPriority::UpdateStorage));
 		}

 		logData->version_sizes.erase(logData->version_sizes.begin(), logData->version_sizes.lower_bound(logData->persistentDataDurableVersion));

-		wait(yield(TaskUpdateStorage));
+		wait(yield(TaskPriority::UpdateStorage));

 		while(!logData->messageBlocks.empty() && logData->messageBlocks.front().first <= newPersistentDataVersion) {
 			int64_t bytesErased = int64_t(logData->messageBlocks.front().second.size()) * SERVER_KNOBS->TLOG_MESSAGE_BLOCK_OVERHEAD_FACTOR;
 			logData->bytesDurable += bytesErased;
 			self->bytesDurable += bytesErased;
 			logData->messageBlocks.pop_front();
-			wait(yield(TaskUpdateStorage));
+			wait(yield(TaskPriority::UpdateStorage));
 		}

 		if(logData->bytesDurable.getValue() > logData->bytesInput.getValue() || self->bytesDurable > self->bytesInput) {
@ -586,7 +586,7 @@ namespace oldTLog_4_6 {
 		}

 		if(!self->queueOrder.size()) {
-			wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskUpdateStorage) );
+			wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskPriority::UpdateStorage) );
 			return Void();
 		}

@ -621,14 +621,14 @@ namespace oldTLog_4_6 {
 					}

 					wait( logData->queueCommittedVersion.whenAtLeast( nextVersion ) );
-					wait( delay(0, TaskUpdateStorage) );
+					wait( delay(0, TaskPriority::UpdateStorage) );

 					//TraceEvent("TlogUpdatePersist", self->dbgid).detail("LogId", logData->logId).detail("NextVersion", nextVersion).detail("Version", logData->version.get()).detail("PersistentDataDurableVer", logData->persistentDataDurableVersion).detail("QueueCommitVer", logData->queueCommittedVersion.get()).detail("PersistDataVer", logData->persistentDataVersion);
 					if (nextVersion > logData->persistentDataVersion) {
 						self->updatePersist = updatePersistentData(self, logData, nextVersion);
 						wait( self->updatePersist );
 					} else {
-						wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskUpdateStorage) );
+						wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskPriority::UpdateStorage) );
 					}

 					if( logData->removed.isReady() ) {
@ -639,9 +639,9 @@ namespace oldTLog_4_6 {
 				if(logData->persistentDataDurableVersion == logData->version.get()) {
 					self->queueOrder.pop_front();
 				}
-				wait( delay(0.0, TaskUpdateStorage) );
+				wait( delay(0.0, TaskPriority::UpdateStorage) );
 			} else {
-				wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskUpdateStorage) );
+				wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskPriority::UpdateStorage) );
 			}
 		}
 		else if(logData->initialized) {
@ -650,7 +650,7 @@ namespace oldTLog_4_6 {
 			while( totalSize < SERVER_KNOBS->UPDATE_STORAGE_BYTE_LIMIT && sizeItr != logData->version_sizes.end()
 					&& (logData->bytesInput.getValue() - logData->bytesDurable.getValue() - totalSize >= SERVER_KNOBS->TLOG_SPILL_THRESHOLD || sizeItr->value.first == 0) )
 			{
-				wait( yield(TaskUpdateStorage) );
+				wait( yield(TaskPriority::UpdateStorage) );

 				++sizeItr;
 				nextVersion = sizeItr == logData->version_sizes.end() ? logData->version.get() : sizeItr->key;
@ -662,7 +662,7 @@ namespace oldTLog_4_6 {
 						totalSize += it->second.expectedSize();
 					}

-					wait(yield(TaskUpdateStorage));
+					wait(yield(TaskPriority::UpdateStorage));
 				}

 				prevVersion = nextVersion;
@ -673,7 +673,7 @@ namespace oldTLog_4_6 {
 			//TraceEvent("UpdateStorageVer", logData->logId).detail("NextVersion", nextVersion).detail("PersistentDataVersion", logData->persistentDataVersion).detail("TotalSize", totalSize);

 			wait( logData->queueCommittedVersion.whenAtLeast( nextVersion ) );
-			wait( delay(0, TaskUpdateStorage) );
+			wait( delay(0, TaskPriority::UpdateStorage) );

 			if (nextVersion > logData->persistentDataVersion) {
 				self->updatePersist = updatePersistentData(self, logData, nextVersion);
@ -681,21 +681,21 @@ namespace oldTLog_4_6 {
 			}

 			if( totalSize < SERVER_KNOBS->UPDATE_STORAGE_BYTE_LIMIT ) {
-				wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskUpdateStorage) );
+				wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskPriority::UpdateStorage) );
 			}
 			else {
 				//recovery wants to commit to persistant data when updatePersistentData is not active, this delay ensures that immediately after
 				//updatePersist returns another one has not been started yet.
-				wait( delay(0.0, TaskUpdateStorage) );
+				wait( delay(0.0, TaskPriority::UpdateStorage) );
 			}
 		} else {
-			wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskUpdateStorage) );
+			wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskPriority::UpdateStorage) );
 		}
 		return Void();
 	}

 	ACTOR Future<Void> updateStorageLoop( TLogData* self ) {
-		wait(delay(0, TaskUpdateStorage));
+		wait(delay(0, TaskPriority::UpdateStorage));

 		loop {
 			wait( updateStorage(self) );
@ -823,7 +823,7 @@ namespace oldTLog_4_6 {
 			ti->value.popped_recently = true;
 			//if (to.epoch == self->epoch())
 			if ( req.to > logData->persistentDataDurableVersion )
-				wait(ti->value.eraseMessagesBefore( req.to, &self->bytesDurable, logData, TaskTLogPop ));
+				wait(ti->value.eraseMessagesBefore( req.to, &self->bytesDurable, logData, TaskPriority::TLogPop ));
 		}

 		req.reply.send(Void());
--- a/fdbserver/OldTLogServer_6_0.actor.cpp
+++ b/fdbserver/OldTLogServer_6_0.actor.cpp
@ -297,7 +297,7 @@ struct TLogData : NonCopyable {
 			  concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS),
 			  ignorePopRequest(false), ignorePopDeadline(), ignorePopUid(), dataFolder(folder), toBePopped()
 		{
-			cx = openDBOnServer(dbInfo, TaskDefaultEndpoint, true, true);
+			cx = openDBOnServer(dbInfo, TaskPriority::DefaultEndpoint, true, true);
 		}
 };

@ -323,7 +323,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
 		}

 		// Erase messages not needed to update *from* versions >= before (thus, messages with toversion <= before)
-		ACTOR Future<Void> eraseMessagesBefore( TagData *self, Version before, TLogData *tlogData, Reference<LogData> logData, int taskID ) {
+		ACTOR Future<Void> eraseMessagesBefore( TagData *self, Version before, TLogData *tlogData, Reference<LogData> logData, TaskPriority taskID ) {
 			while(!self->versionMessages.empty() && self->versionMessages.front().first < before) {
 				Version version = self->versionMessages.front().first;
 				std::pair<int,int> &sizes = logData->version_sizes[version];
@ -352,7 +352,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
 			return Void();
 		}

-		Future<Void> eraseMessagesBefore(Version before, TLogData *tlogData, Reference<LogData> logData, int taskID) {
+		Future<Void> eraseMessagesBefore(Version before, TLogData *tlogData, Reference<LogData> logData, TaskPriority taskID) {
 			return eraseMessagesBefore(this, before, tlogData, logData, taskID);
 		}
 	};
@ -607,14 +607,14 @@ ACTOR Future<Void> updatePersistentData( TLogData* self, Reference<LogData> logD

 					self->persistentData->set( KeyValueRef( persistTagMessagesKey( logData->logId, tagData->tag, currentVersion ), wr.toValue() ) );

-					Future<Void> f = yield(TaskUpdateStorage);
+					Future<Void> f = yield(TaskPriority::UpdateStorage);
 					if(!f.isReady()) {
 						wait(f);
 						msg = std::upper_bound(tagData->versionMessages.begin(), tagData->versionMessages.end(), std::make_pair(currentVersion, LengthPrefixedStringRef()), CompareFirst<std::pair<Version, LengthPrefixedStringRef>>());
 					}
 				}

-				wait(yield(TaskUpdateStorage));
+				wait(yield(TaskPriority::UpdateStorage));
 			}
 		}
 	}
@ -624,7 +624,7 @@ ACTOR Future<Void> updatePersistentData( TLogData* self, Reference<LogData> logD
 	logData->persistentDataVersion = newPersistentDataVersion;

 	wait( self->persistentData->commit() ); // SOMEDAY: This seems to be running pretty often, should we slow it down???
-	wait( delay(0, TaskUpdateStorage) );
+	wait( delay(0, TaskPriority::UpdateStorage) );

 	// Now that the changes we made to persistentData are durable, erase the data we moved from memory and the queue, increase bytesDurable accordingly, and update persistentDataDurableVersion.

@ -634,22 +634,22 @@ ACTOR Future<Void> updatePersistentData( TLogData* self, Reference<LogData> logD
 	for(tagLocality = 0; tagLocality < logData->tag_data.size(); tagLocality++) {
 		for(tagId = 0; tagId < logData->tag_data[tagLocality].size(); tagId++) {
 			if(logData->tag_data[tagLocality][tagId]) {
-				wait(logData->tag_data[tagLocality][tagId]->eraseMessagesBefore( newPersistentDataVersion+1, self, logData, TaskUpdateStorage ));
-				wait(yield(TaskUpdateStorage));
+				wait(logData->tag_data[tagLocality][tagId]->eraseMessagesBefore( newPersistentDataVersion+1, self, logData, TaskPriority::UpdateStorage ));
+				wait(yield(TaskPriority::UpdateStorage));
 			}
 		}
 	}

 	logData->version_sizes.erase(logData->version_sizes.begin(), logData->version_sizes.lower_bound(logData->persistentDataDurableVersion));

-	wait(yield(TaskUpdateStorage));
+	wait(yield(TaskPriority::UpdateStorage));

 	while(!logData->messageBlocks.empty() && logData->messageBlocks.front().first <= newPersistentDataVersion) {
 		int64_t bytesErased = int64_t(logData->messageBlocks.front().second.size()) * SERVER_KNOBS->TLOG_MESSAGE_BLOCK_OVERHEAD_FACTOR;
 		logData->bytesDurable += bytesErased;
 		self->bytesDurable += bytesErased;
 		logData->messageBlocks.pop_front();
-		wait(yield(TaskUpdateStorage));
+		wait(yield(TaskPriority::UpdateStorage));
 	}

 	if(logData->bytesDurable.getValue() > logData->bytesInput.getValue() || self->bytesDurable > self->bytesInput) {
@ -674,7 +674,7 @@ ACTOR Future<Void> updateStorage( TLogData* self ) {
 	}

 	if(!self->queueOrder.size()) {
-		wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskUpdateStorage) );
+		wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskPriority::UpdateStorage) );
 		return Void();
 	}

@ -698,7 +698,7 @@ ACTOR Future<Void> updateStorage( TLogData* self ) {
 				}

 				wait( logData->queueCommittedVersion.whenAtLeast( nextVersion ) );
-				wait( delay(0, TaskUpdateStorage) );
+				wait( delay(0, TaskPriority::UpdateStorage) );

 				//TraceEvent("TlogUpdatePersist", self->dbgid).detail("LogId", logData->logId).detail("NextVersion", nextVersion).detail("Version", logData->version.get()).detail("PersistentDataDurableVer", logData->persistentDataDurableVersion).detail("QueueCommitVer", logData->queueCommittedVersion.get()).detail("PersistDataVer", logData->persistentDataVersion);
 				if (nextVersion > logData->persistentDataVersion) {
@ -707,7 +707,7 @@ ACTOR Future<Void> updateStorage( TLogData* self ) {
 					wait( updatePersistentData(self, logData, nextVersion) );
 					commitLockReleaser.release();
 				} else {
-					wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskUpdateStorage) );
+					wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskPriority::UpdateStorage) );
 				}

 				if( logData->removed.isReady() ) {
@ -718,9 +718,9 @@ ACTOR Future<Void> updateStorage( TLogData* self ) {
 			if(logData->persistentDataDurableVersion == logData->version.get()) {
 				self->queueOrder.pop_front();
 			}
-			wait( delay(0.0, TaskUpdateStorage) );
+			wait( delay(0.0, TaskPriority::UpdateStorage) );
 		} else {
-			wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskUpdateStorage) );
+			wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskPriority::UpdateStorage) );
 		}
 	}
 	else if(logData->initialized) {
@ -741,7 +741,7 @@ ACTOR Future<Void> updateStorage( TLogData* self ) {
 		//TraceEvent("UpdateStorageVer", logData->logId).detail("NextVersion", nextVersion).detail("PersistentDataVersion", logData->persistentDataVersion).detail("TotalSize", totalSize);

 		wait( logData->queueCommittedVersion.whenAtLeast( nextVersion ) );
-		wait( delay(0, TaskUpdateStorage) );
+		wait( delay(0, TaskPriority::UpdateStorage) );

 		if (nextVersion > logData->persistentDataVersion) {
 			wait( self->persistentDataCommitLock.take() );
@ -751,21 +751,21 @@ ACTOR Future<Void> updateStorage( TLogData* self ) {
 		}

 		if( totalSize < SERVER_KNOBS->UPDATE_STORAGE_BYTE_LIMIT ) {
-			wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskUpdateStorage) );
+			wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskPriority::UpdateStorage) );
 		}
 		else {
 			//recovery wants to commit to persistant data when updatePersistentData is not active, this delay ensures that immediately after
 			//updatePersist returns another one has not been started yet.
-			wait( delay(0.0, TaskUpdateStorage) );
+			wait( delay(0.0, TaskPriority::UpdateStorage) );
 		}
 	} else {
-		wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskUpdateStorage) );
+		wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskPriority::UpdateStorage) );
 	}
 	return Void();
 }

 ACTOR Future<Void> updateStorageLoop( TLogData* self ) {
-	wait(delay(0, TaskUpdateStorage));
+	wait(delay(0, TaskPriority::UpdateStorage));

 	loop {
 		wait( updateStorage(self) );
@ -943,7 +943,7 @@ ACTOR Future<Void> tLogPopCore( TLogData* self, Tag inputTag, Version to, Refere
 		}

 		if (upTo > logData->persistentDataDurableVersion)
-			wait(tagData->eraseMessagesBefore(upTo, self, logData, TaskTLogPop));
+			wait(tagData->eraseMessagesBefore(upTo, self, logData, TaskPriority::TLogPop));
 		//TraceEvent("TLogPop", self->dbgid).detail("Tag", tag.toString()).detail("To", upTo);
 	}
 	return Void();
@ -1059,7 +1059,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
 	if( req.tag.locality == tagLocalityLogRouter ) {
 		wait( self->concurrentLogRouterReads.take() );
 		state FlowLock::Releaser globalReleaser(self->concurrentLogRouterReads);
-		wait( delay(0.0, TaskLowPriority) );
+		wait( delay(0.0, TaskPriority::Low) );
 	}

 	if( req.begin <= logData->persistentDataDurableVersion && req.tag != txsTag) {
@ -1068,7 +1068,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
 		// slightly faster over keeping the rest of the cluster operating normally.
 		// txsTag is only ever peeked on recovery, and we would still wish to prioritize requests
 		// that impact recovery duration.
-		wait(delay(0, TaskTLogSpilledPeekReply));
+		wait(delay(0, TaskPriority::TLogSpilledPeekReply));
 	}

 	Version poppedVer = poppedVersion(logData, req.tag);
@ -1182,7 +1182,7 @@ ACTOR Future<Void> watchDegraded(TLogData* self) {
 	//This delay is divided into multiple delays to avoid marking the tlog as degraded because of a single SlowTask
 	state int loopCount = 0;
 	while(loopCount < SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT) {
-		wait(delay(SERVER_KNOBS->TLOG_DEGRADED_DURATION/SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT, TaskLowPriority));
+		wait(delay(SERVER_KNOBS->TLOG_DEGRADED_DURATION/SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT, TaskPriority::Low));
 		loopCount++;
 	}
 	TraceEvent(SevWarnAlways, "TLogDegraded", self->dbgid);
@ -1518,7 +1518,7 @@ ACTOR Future<Void> tLogCommit(
 				.detail("PersistentDataDurableVersion", logData->persistentDataDurableVersion);
 			waitStartT = now();
 		}
-		wait( delayJittered(.005, TaskTLogCommit) );
+		wait( delayJittered(.005, TaskPriority::TLogCommit) );
 	}

 	// while exec op is being committed, no new transactions will be admitted.
@ -1858,7 +1858,7 @@ ACTOR Future<Void> pullAsyncData( TLogData* self, Reference<LogData> logData, st
 	while (!endVersion.present() || logData->version.get() < endVersion.get()) {
 		loop {
 			choose {
-				when(wait( r ? r->getMore(TaskTLogCommit) : Never() ) ) {
+				when(wait( r ? r->getMore(TaskPriority::TLogCommit) : Never() ) ) {
 					break;
 				}
 				when( wait( dbInfoChange ) ) {
@ -1881,7 +1881,7 @@ ACTOR Future<Void> pullAsyncData( TLogData* self, Reference<LogData> logData, st
 					.detail("PersistentDataDurableVersion", logData->persistentDataDurableVersion);
 				waitStartT = now();
 			}
-			wait( delayJittered(.005, TaskTLogCommit) );
+			wait( delayJittered(.005, TaskPriority::TLogCommit) );
 		}

 		state Version ver = 0;
@ -1921,7 +1921,7 @@ ACTOR Future<Void> pullAsyncData( TLogData* self, Reference<LogData> logData, st

 					// Notifies the commitQueue actor to commit persistentQueue, and also unblocks tLogPeekMessages actors
 					logData->version.set( ver );
-					wait( yield(TaskTLogCommit) );
+					wait( yield(TaskPriority::TLogCommit) );
 				}
 				lastVer = ver;
 				ver = r->version().version;
@ -1958,7 +1958,7 @@ ACTOR Future<Void> pullAsyncData( TLogData* self, Reference<LogData> logData, st

 						// Notifies the commitQueue actor to commit persistentQueue, and also unblocks tLogPeekMessages actors
 						logData->version.set( ver );
-						wait( yield(TaskTLogCommit) );
+						wait( yield(TaskPriority::TLogCommit) );
 					}
 					break;
 				}
--- a/fdbserver/Orderer.actor.h
+++ b/fdbserver/Orderer.actor.h
@ -38,7 +38,7 @@ public:
 		ready = NotifiedVersion(s);
 		started = false;
 	}
-	Future<bool> order( Seq s, int taskID = TaskDefaultYield ) {
+	Future<bool> order( Seq s, TaskPriority taskID = TaskPriority::DefaultYield ) {
 		if ( ready.get() < s )
 			return waitAndOrder( this, s, taskID );
 		else
@ -54,7 +54,7 @@ public:
 		return ready.whenAtLeast(v);
 	}
 private:
-	ACTOR static Future<bool> waitAndOrder( Orderer<Seq>* self, Seq s, int taskID ) {
+	ACTOR static Future<bool> waitAndOrder( Orderer<Seq>* self, Seq s, TaskPriority taskID ) {
 		wait( self->ready.whenAtLeast(s) );
 		wait( yield( taskID ) || self->shutdown.getFuture() );
 		return self->dedup(s);
--- a/fdbserver/QuietDatabase.actor.cpp
+++ b/fdbserver/QuietDatabase.actor.cpp
@ -291,6 +291,15 @@ ACTOR Future<bool> getTeamCollectionValid(Database cx, WorkerInterface dataDistr
 			int64_t desiredMachineTeamNumber = boost::lexical_cast<int64_t>(teamCollectionInfoMessage.getValue("DesiredMachineTeams"));
 			int64_t maxMachineTeamNumber = boost::lexical_cast<int64_t>(teamCollectionInfoMessage.getValue("MaxMachineTeams"));

+			int64_t minServerTeamOnServer =
+			    boost::lexical_cast<int64_t>(teamCollectionInfoMessage.getValue("MinTeamNumberOnServer"));
+			int64_t maxServerTeamOnServer =
+			    boost::lexical_cast<int64_t>(teamCollectionInfoMessage.getValue("MaxTeamNumberOnServer"));
+			int64_t minMachineTeamOnMachine =
+			    boost::lexical_cast<int64_t>(teamCollectionInfoMessage.getValue("MinMachineTeamNumberOnMachine"));
+			int64_t maxMachineTeamOnMachine =
+			    boost::lexical_cast<int64_t>(teamCollectionInfoMessage.getValue("MaxMachineTeamNumberOnMachine"));
+
 			// Team number is always valid when we disable teamRemover. This avoids false positive in simulation test
 			if (SERVER_KNOBS->TR_FLAG_DISABLE_TEAM_REMOVER) {
 				TraceEvent("GetTeamCollectionValid")
@ -300,7 +309,10 @@ ACTOR Future<bool> getTeamCollectionValid(Database cx, WorkerInterface dataDistr

 			// The if condition should be consistent with the condition in teamRemover() that decides
 			// if redundant teams exist.
-			if (healthyMachineTeamCount > desiredMachineTeamNumber) {
+			if (healthyMachineTeamCount > desiredMachineTeamNumber ||
+			    (minMachineTeamOnMachine <= 0 && SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER == 3)) {
+				// When DESIRED_TEAMS_PER_SERVER == 1, we see minMachineTeamOnMachine can be 0 in one out of 30k test
+				// cases. Only check DESIRED_TEAMS_PER_SERVER == 3 for now since it is mostly used configuration.
 				TraceEvent("GetTeamCollectionValid")
 				    .detail("CurrentTeamNumber", currentTeamNumber)
 				    .detail("DesiredTeamNumber", desiredTeamNumber)
@ -308,7 +320,13 @@ ACTOR Future<bool> getTeamCollectionValid(Database cx, WorkerInterface dataDistr
 				    .detail("CurrentHealthyMachineTeamNumber", healthyMachineTeamCount)
 				    .detail("DesiredMachineTeams", desiredMachineTeamNumber)
 				    .detail("CurrentMachineTeamNumber", currentMachineTeamNumber)
-				    .detail("MaxMachineTeams", maxMachineTeamNumber);
+				    .detail("MaxMachineTeams", maxMachineTeamNumber)
+				    .detail("MinTeamNumberOnServer", minServerTeamOnServer)
+				    .detail("MaxTeamNumberOnServer", maxServerTeamOnServer)
+				    .detail("MinMachineTeamNumberOnMachine", minMachineTeamOnMachine)
+				    .detail("MaxMachineTeamNumberOnMachine", maxMachineTeamOnMachine)
+				    .detail("DesiredTeamsPerServer", SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER)
+				    .detail("MaxTeamsPerServer", SERVER_KNOBS->MAX_TEAMS_PER_SERVER);
 				return false;
 			} else {
 				return true;
--- a/fdbserver/Ratekeeper.actor.cpp
+++ b/fdbserver/Ratekeeper.actor.cpp
@ -41,7 +41,6 @@ enum limitReason_t {
 	storage_server_min_free_space_ratio,  // a storage server's normal limits are being reduced by a low free space ratio
 	log_server_min_free_space,
 	log_server_min_free_space_ratio,
-	storage_server_read_load,
 	limitReason_t_end
 };

@ -57,8 +56,7 @@ const char* limitReasonName[] = {
 	"storage_server_min_free_space",
 	"storage_server_min_free_space_ratio",
 	"log_server_min_free_space",
-	"log_server_min_free_space_ratio",
-	"storage_server_read_load"
+	"log_server_min_free_space_ratio"
 };
 static_assert(sizeof(limitReasonName) / sizeof(limitReasonName[0]) == limitReason_t_end, "limitReasonDesc table size");

@ -74,8 +72,7 @@ const char* limitReasonDesc[] = {
 	"Storage server running out of space (approaching 100MB limit).",
 	"Storage server running out of space (approaching 5% limit).",
 	"Log server running out of space (approaching 100MB limit).",
-	"Log server running out of space (approaching 5% limit).",
-	"Storage server is overwhelmed by read workload",
+	"Log server running out of space (approaching 5% limit)."
 };

 static_assert(sizeof(limitReasonDesc) / sizeof(limitReasonDesc[0]) == limitReason_t_end, "limitReasonDesc table size");
@ -300,7 +297,7 @@ ACTOR Future<Void> trackEachStorageServer(
 ACTOR Future<Void> monitorServerListChange(
 		Reference<AsyncVar<ServerDBInfo>> dbInfo,
 		PromiseStream< std::pair<UID, Optional<StorageServerInterface>> > serverChanges) {
-	state Database db = openDBOnServer(dbInfo, TaskRatekeeper, true, true);
+	state Database db = openDBOnServer(dbInfo, TaskPriority::Ratekeeper, true, true);
 	state std::map<UID, StorageServerInterface> oldServers;
 	state Transaction tr(db);

@ -396,8 +393,7 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
 		ssMetrics.cpuUsage = ss.lastReply.cpuUsage;
 		ssMetrics.diskUsage = ss.lastReply.diskUsage;

-		int64_t b = storageQueue - targetBytes;
-		double targetRateRatio = std::min(( b + springBytes ) / (double)springBytes, 2.0);
+		double targetRateRatio = std::min(( storageQueue - targetBytes + springBytes ) / (double)springBytes, 2.0);

 		double inputRate = ss.smoothInputBytes.smoothRate();
 		//inputRate = std::max( inputRate, actualTps / SERVER_KNOBS->MAX_TRANSACTIONS_PER_BYTE );
@ -433,16 +429,9 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
 			double lim = actualTps * x;
 			if (lim < limitTps) {
 				limitTps = lim;
-				if (ssLimitReason == limitReason_t::unlimited || ssLimitReason == limitReason_t::storage_server_write_bandwidth_mvcc)
+				if (ssLimitReason == limitReason_t::unlimited || ssLimitReason == limitReason_t::storage_server_write_bandwidth_mvcc) {
 					ssLimitReason = limitReason_t::storage_server_write_queue_size;
-			}
-		}
-
-		if (ss.localRateLimit < 0.99) {
-			auto lim = double(self->actualTpsMetric) * ss.localRateLimit;
-			if (lim < limitTps) {
-				limitTps = lim;
-				ssLimitReason = limitReason_t::storage_server_read_load;
+				}
 			}
 		}

@ -629,7 +618,7 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
 }

 ACTOR Future<Void> configurationMonitor(Reference<AsyncVar<ServerDBInfo>> dbInfo, DatabaseConfiguration* conf) {
-	state Database cx = openDBOnServer(dbInfo, TaskDefaultEndpoint, true, true);
+	state Database cx = openDBOnServer(dbInfo, TaskPriority::DefaultEndpoint, true, true);
 	loop {
 		state ReadYourWritesTransaction tr(cx);

@ -661,7 +650,7 @@ ACTOR Future<Void> ratekeeper(RatekeeperInterface rkInterf, Reference<AsyncVar<S
 	state Promise<Void> err;
 	state Future<Void> collection = actorCollection( self.addActor.getFuture() );

-	TraceEvent("Ratekeeper_Starting", rkInterf.id());
+	TraceEvent("RatekeeperStarting", rkInterf.id());
 	self.addActor.send( waitFailureServer(rkInterf.waitFailure.getFuture()) );
 	self.addActor.send( configurationMonitor(dbInfo, &self.configuration) );

@ -743,7 +732,7 @@ ACTOR Future<Void> ratekeeper(RatekeeperInterface rkInterf, Reference<AsyncVar<S
 		}
 	}
 	catch (Error& err) {
-		TraceEvent("Ratekeeper_Died", rkInterf.id()).error(err, true);
+		TraceEvent("RatekeeperDied", rkInterf.id()).error(err, true);
 	}
 	return Void();
 }
--- a/fdbserver/Resolver.actor.cpp
+++ b/fdbserver/Resolver.actor.cpp
@ -114,9 +114,9 @@ ACTOR Future<Void> resolveBatch(
 		}
 	}

-	if (check_yield(TaskDefaultEndpoint)) {
-		wait( delay( 0, TaskLowPriority ) || delay( SERVER_KNOBS->COMMIT_SLEEP_TIME ) );  // FIXME: Is this still right?
-		g_network->setCurrentTask(TaskDefaultEndpoint);
+	if (check_yield(TaskPriority::DefaultEndpoint)) {
+		wait( delay( 0, TaskPriority::Low ) || delay( SERVER_KNOBS->COMMIT_SLEEP_TIME ) );  // FIXME: Is this still right?
+		g_network->setCurrentTask(TaskPriority::DefaultEndpoint);
 	}

 	if (self->version.get() == req.prevVersion) {  // Not a duplicate (check relies on no waiting between here and self->version.set() below!)
--- a/fdbserver/ResolverInterface.h
+++ b/fdbserver/ResolverInterface.h
@ -44,8 +44,8 @@ struct ResolverInterface {
 	bool operator != ( ResolverInterface const& r ) const { return id() != r.id(); }
 	NetworkAddress address() const { return resolve.getEndpoint().getPrimaryAddress(); }
 	void initEndpoints() {
-		metrics.getEndpoint( TaskResolutionMetrics );
-		split.getEndpoint( TaskResolutionMetrics );
+		metrics.getEndpoint( TaskPriority::ResolutionMetrics );
+		split.getEndpoint( TaskPriority::ResolutionMetrics );
 	}

 	template <class Ar> 
--- a/fdbserver/Restore.actor.cpp
+++ b/fdbserver/Restore.actor.cpp
@ -24,7 +24,7 @@
 #include "flow/actorcompiler.h"  // This must be the last #include.

 ACTOR Future<Void> restoreWorker(Reference<ClusterConnectionFile> ccf, LocalityData locality) {
-	state Database cx = Database::createDatabase(ccf->getFilename(), Database::API_VERSION_LATEST,locality);
+	state Database cx = Database::createDatabase(ccf->getFilename(), Database::API_VERSION_LATEST, true, locality);
 	state RestoreInterface interf;
 	interf.initEndpoints();
 	state Optional<RestoreInterface> leaderInterf;
--- a/fdbserver/RestoreInterface.h
+++ b/fdbserver/RestoreInterface.h
@ -37,7 +37,7 @@ struct RestoreInterface {
 	NetworkAddress address() const { return test.getEndpoint().getPrimaryAddress(); }

 	void initEndpoints() {
-		test.getEndpoint( TaskClusterController );
+		test.getEndpoint( TaskPriority::ClusterController );
 	}

 	template <class Ar>
--- a/fdbserver/SimulatedCluster.actor.cpp
+++ b/fdbserver/SimulatedCluster.actor.cpp
@ -215,7 +215,7 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<ClusterConnec
 		    g_simulator.newProcess("Server", ip, port, listenPerProcess, localities, processClass, dataFolder->c_str(),
 		                           coordFolder->c_str());
 		wait(g_simulator.onProcess(process,
-		                           TaskDefaultYield)); // Now switch execution to the process on which we will run
+		                           TaskPriority::DefaultYield)); // Now switch execution to the process on which we will run
 		state Future<ISimulator::KillType> onShutdown = process->onShutdown();

 		try {
@ -850,23 +850,15 @@ void SimulationConfig::generateNormalConfig(int minimumReplication, int minimumR
 	}

 	if (deterministicRandom()->random01() < 0.5) {
-		if (deterministicRandom()->random01() < 0.5) {
-			set_config("log_spill:=1");  // VALUE
-		}
-		int logVersion = deterministicRandom()->randomInt( 0, 3 );
-		switch (logVersion) {
-		case 0:
-			break;
-		case 1:
-			set_config("log_version:=2");  // 6.0
-			break;
-		case 2:
-			set_config("log_version:=3");  // 6.1
-			break;
-		}
+		int logSpill = deterministicRandom()->randomInt( TLogSpillType::VALUE, TLogSpillType::END );
+		set_config(format("log_spill:=%d", logSpill));
+		int logVersion = deterministicRandom()->randomInt( TLogVersion::MIN_RECRUITABLE, TLogVersion::MAX_SUPPORTED+1 );
+		set_config(format("log_version:=%d", logVersion));
 	} else {
-		set_config("log_version:=3");  // 6.1
-		set_config("log_spill:=2");  // REFERENCE
+		if (deterministicRandom()->random01() < 0.7)
+			set_config(format("log_version:=%d", TLogVersion::MAX_SUPPORTED));
+		if (deterministicRandom()->random01() < 0.5)
+			set_config(format("log_spill:=%d", TLogSpillType::DEFAULT));
 	}

 	if(generateFearless || (datacenters == 2 && deterministicRandom()->random01() < 0.5)) {
@ -1399,7 +1391,7 @@ ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool reboot
 	                                        Standalone<StringRef>(deterministicRandom()->randomUniqueID().toString()),
 	                                        Optional<Standalone<StringRef>>()),
 	                           ProcessClass(ProcessClass::TesterClass, ProcessClass::CommandLineSource), "", ""),
-	    TaskDefaultYield));
+	    TaskPriority::DefaultYield));
 	Sim2FileSystem::newFileSystem();
 	FlowTransport::createInstance(true, 1);
 	if (tlsOptions->enabled()) {
@ -1435,8 +1427,8 @@ ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool reboot
 	}

 	TraceEvent("SimulatedSystemDestruct");
-	destructed = true;
-	systemActors.clear();
-
 	g_simulator.stop();
+	destructed = true;
+	wait(Never());
+	ASSERT(false);
 }
--- a/fdbserver/Status.actor.cpp
+++ b/fdbserver/Status.actor.cpp
@ -1822,7 +1822,7 @@ ACTOR Future<JsonBuilderObject> layerStatusFetcher(Database cx, JsonBuilderArray
 ACTOR Future<JsonBuilderObject> lockedStatusFetcher(Reference<AsyncVar<struct ServerDBInfo>> db, JsonBuilderArray *messages, std::set<std::string> *incomplete_reasons) {
 	state JsonBuilderObject statusObj;

-	state Database cx = openDBOnServer(db, TaskDefaultEndpoint, true, false); // Open a new database connection that isn't lock-aware
+	state Database cx = openDBOnServer(db, TaskPriority::DefaultEndpoint, true, false); // Open a new database connection that isn't lock-aware
 	state Transaction tr(cx);
 	state int timeoutSeconds = 5;
 	state Future<Void> getTimeout = delay(timeoutSeconds);
@ -2117,7 +2117,11 @@ ACTOR Future<StatusReply> clusterGetStatus(
 			incompatibleConnectionsArray.push_back(it.toString());
 		}
 		statusObj["incompatible_connections"] = incompatibleConnectionsArray;
-		statusObj["datacenter_version_difference"] = datacenterVersionDifference;
+
+		StatusObject datacenterLag;
+		datacenterLag["versions"] = datacenterVersionDifference;
+		datacenterLag["seconds"] = datacenterVersionDifference / (double)SERVER_KNOBS->VERSIONS_PER_SECOND;
+		statusObj["datacenter_lag"] = datacenterLag;

 		int totalDegraded = 0;
 		for(auto& it : workers) {
--- a/fdbserver/TLogInterface.h
+++ b/fdbserver/TLogInterface.h
@ -56,11 +56,11 @@ struct TLogInterface {
 	bool operator == ( TLogInterface const& r ) const { return id() == r.id(); }
 	NetworkAddress address() const { return peekMessages.getEndpoint().getPrimaryAddress(); }
 	void initEndpoints() {
-		getQueuingMetrics.getEndpoint( TaskTLogQueuingMetrics );
-		popMessages.getEndpoint( TaskTLogPop );
-		peekMessages.getEndpoint( TaskTLogPeek );
-		confirmRunning.getEndpoint( TaskTLogConfirmRunning );
-		commit.getEndpoint( TaskTLogCommit );
+		getQueuingMetrics.getEndpoint( TaskPriority::TLogQueuingMetrics );
+		popMessages.getEndpoint( TaskPriority::TLogPop );
+		peekMessages.getEndpoint( TaskPriority::TLogPeek );
+		confirmRunning.getEndpoint( TaskPriority::TLogConfirmRunning );
+		commit.getEndpoint( TaskPriority::TLogCommit );
 	}

 	template <class Ar> 
--- a/fdbserver/TLogServer.actor.cpp
+++ b/fdbserver/TLogServer.actor.cpp
@ -349,7 +349,7 @@ struct TLogData : NonCopyable {
 			  concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS),
 			  ignorePopRequest(false), ignorePopDeadline(), ignorePopUid(), dataFolder(folder), toBePopped()
 		{
-			cx = openDBOnServer(dbInfo, TaskDefaultEndpoint, true, true);
+			cx = openDBOnServer(dbInfo, TaskPriority::DefaultEndpoint, true, true);
 		}
 };

@ -379,7 +379,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
 		}

 		// Erase messages not needed to update *from* versions >= before (thus, messages with toversion <= before)
-		ACTOR Future<Void> eraseMessagesBefore( TagData *self, Version before, TLogData *tlogData, Reference<LogData> logData, int taskID ) {
+		ACTOR Future<Void> eraseMessagesBefore( TagData *self, Version before, TLogData *tlogData, Reference<LogData> logData, TaskPriority taskID ) {
 			while(!self->versionMessages.empty() && self->versionMessages.front().first < before) {
 				Version version = self->versionMessages.front().first;
 				std::pair<int,int> &sizes = logData->version_sizes[version];
@ -408,7 +408,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
 			return Void();
 		}

-		Future<Void> eraseMessagesBefore(Version before, TLogData *tlogData, Reference<LogData> logData, int taskID) {
+		Future<Void> eraseMessagesBefore(Version before, TLogData *tlogData, Reference<LogData> logData, TaskPriority taskID) {
 			return eraseMessagesBefore(this, before, tlogData, logData, taskID);
 		}
 	};
@ -766,7 +766,7 @@ ACTOR Future<Void> updatePersistentData( TLogData* self, Reference<LogData> logD
 		for(tagId = 0; tagId < logData->tag_data[tagLocality].size(); tagId++) {
 			state Reference<LogData::TagData> tagData = logData->tag_data[tagLocality][tagId];
 			if(tagData) {
-				wait(tagData->eraseMessagesBefore( tagData->popped, self, logData, TaskUpdateStorage ));
+				wait(tagData->eraseMessagesBefore( tagData->popped, self, logData, TaskPriority::UpdateStorage ));
 				state Version currentVersion = 0;
 				// Clear recently popped versions from persistentData if necessary
 				updatePersistentPopped( self, logData, tagData );
@ -819,7 +819,7 @@ ACTOR Future<Void> updatePersistentData( TLogData* self, Reference<LogData> logD
 							wr << uint32_t(0);
 						}

-						Future<Void> f = yield(TaskUpdateStorage);
+						Future<Void> f = yield(TaskPriority::UpdateStorage);
 						if(!f.isReady()) {
 							wait(f);
 							msg = std::upper_bound(tagData->versionMessages.begin(), tagData->versionMessages.end(), std::make_pair(currentVersion, LengthPrefixedStringRef()), CompareFirst<std::pair<Version, LengthPrefixedStringRef>>());
@ -832,7 +832,7 @@ ACTOR Future<Void> updatePersistentData( TLogData* self, Reference<LogData> logD
 					tagData->poppedLocation = std::min(tagData->poppedLocation, firstLocation);
 				}

-				wait(yield(TaskUpdateStorage));
+				wait(yield(TaskPriority::UpdateStorage));
 			}
 		}
 	}
@ -847,7 +847,7 @@ ACTOR Future<Void> updatePersistentData( TLogData* self, Reference<LogData> logD
 	logData->persistentDataVersion = newPersistentDataVersion;

 	wait( self->persistentData->commit() ); // SOMEDAY: This seems to be running pretty often, should we slow it down???
-	wait( delay(0, TaskUpdateStorage) );
+	wait( delay(0, TaskPriority::UpdateStorage) );

 	// Now that the changes we made to persistentData are durable, erase the data we moved from memory and the queue, increase bytesDurable accordingly, and update persistentDataDurableVersion.

@ -857,22 +857,22 @@ ACTOR Future<Void> updatePersistentData( TLogData* self, Reference<LogData> logD
 	for(tagLocality = 0; tagLocality < logData->tag_data.size(); tagLocality++) {
 		for(tagId = 0; tagId < logData->tag_data[tagLocality].size(); tagId++) {
 			if(logData->tag_data[tagLocality][tagId]) {
-				wait(logData->tag_data[tagLocality][tagId]->eraseMessagesBefore( newPersistentDataVersion+1, self, logData, TaskUpdateStorage ));
-				wait(yield(TaskUpdateStorage));
+				wait(logData->tag_data[tagLocality][tagId]->eraseMessagesBefore( newPersistentDataVersion+1, self, logData, TaskPriority::UpdateStorage ));
+				wait(yield(TaskPriority::UpdateStorage));
 			}
 		}
 	}

 	logData->version_sizes.erase(logData->version_sizes.begin(), logData->version_sizes.lower_bound(logData->persistentDataDurableVersion));

-	wait(yield(TaskUpdateStorage));
+	wait(yield(TaskPriority::UpdateStorage));

 	while(!logData->messageBlocks.empty() && logData->messageBlocks.front().first <= newPersistentDataVersion) {
 		int64_t bytesErased = int64_t(logData->messageBlocks.front().second.size()) * SERVER_KNOBS->TLOG_MESSAGE_BLOCK_OVERHEAD_FACTOR;
 		logData->bytesDurable += bytesErased;
 		self->bytesDurable += bytesErased;
 		logData->messageBlocks.pop_front();
-		wait(yield(TaskUpdateStorage));
+		wait(yield(TaskPriority::UpdateStorage));
 	}

 	if(logData->bytesDurable.getValue() > logData->bytesInput.getValue() || self->bytesDurable > self->bytesInput) {
@ -915,7 +915,7 @@ ACTOR Future<Void> updateStorage( TLogData* self ) {
 	}

 	if(!self->spillOrder.size()) {
-		wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskUpdateStorage) );
+		wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskPriority::UpdateStorage) );
 		return Void();
 	}

@ -940,7 +940,7 @@ ACTOR Future<Void> updateStorage( TLogData* self ) {
 				}

 				wait( logData->queueCommittedVersion.whenAtLeast( nextVersion ) );
-				wait( delay(0, TaskUpdateStorage) );
+				wait( delay(0, TaskPriority::UpdateStorage) );

 				//TraceEvent("TlogUpdatePersist", self->dbgid).detail("LogId", logData->logId).detail("NextVersion", nextVersion).detail("Version", logData->version.get()).detail("PersistentDataDurableVer", logData->persistentDataDurableVersion).detail("QueueCommitVer", logData->queueCommittedVersion.get()).detail("PersistDataVer", logData->persistentDataVersion);
 				if (nextVersion > logData->persistentDataVersion) {
@ -953,7 +953,7 @@ ACTOR Future<Void> updateStorage( TLogData* self ) {
 					}
 					commitLockReleaser.release();
 				} else {
-					wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskUpdateStorage) );
+					wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskPriority::UpdateStorage) );
 				}

 				if( logData->removed.isReady() ) {
@ -964,9 +964,9 @@ ACTOR Future<Void> updateStorage( TLogData* self ) {
 			if(logData->persistentDataDurableVersion == logData->version.get()) {
 				self->spillOrder.pop_front();
 			}
-			wait( delay(0.0, TaskUpdateStorage) );
+			wait( delay(0.0, TaskPriority::UpdateStorage) );
 		} else {
-			wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskUpdateStorage) );
+			wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskPriority::UpdateStorage) );
 		}
 	}
 	else if(logData->initialized) {
@ -988,7 +988,7 @@ ACTOR Future<Void> updateStorage( TLogData* self ) {
 		//TraceEvent("UpdateStorageVer", logData->logId).detail("NextVersion", nextVersion).detail("PersistentDataVersion", logData->persistentDataVersion).detail("TotalSize", totalSize);

 		wait( logData->queueCommittedVersion.whenAtLeast( nextVersion ) );
-		wait( delay(0, TaskUpdateStorage) );
+		wait( delay(0, TaskPriority::UpdateStorage) );

 		if (nextVersion > logData->persistentDataVersion) {
 			wait( self->persistentDataCommitLock.take() );
@ -1001,21 +1001,21 @@ ACTOR Future<Void> updateStorage( TLogData* self ) {
 		}

 		if( totalSize < SERVER_KNOBS->UPDATE_STORAGE_BYTE_LIMIT ) {
-			wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskUpdateStorage) );
+			wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskPriority::UpdateStorage) );
 		}
 		else {
 			//recovery wants to commit to persistant data when updatePersistentData is not active, this delay ensures that immediately after
 			//updatePersist returns another one has not been started yet.
-			wait( delay(0.0, TaskUpdateStorage) );
+			wait( delay(0.0, TaskPriority::UpdateStorage) );
 		}
 	} else {
-		wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskUpdateStorage) );
+		wait( delay(BUGGIFY ? SERVER_KNOBS->BUGGIFY_TLOG_STORAGE_MIN_UPDATE_INTERVAL : SERVER_KNOBS->TLOG_STORAGE_MIN_UPDATE_INTERVAL, TaskPriority::UpdateStorage) );
 	}
 	return Void();
 }

 ACTOR Future<Void> updateStorageLoop( TLogData* self ) {
-	wait(delay(0, TaskUpdateStorage));
+	wait(delay(0, TaskPriority::UpdateStorage));

 	loop {
 		wait( updateStorage(self) );
@ -1194,7 +1194,7 @@ ACTOR Future<Void> tLogPopCore( TLogData* self, Tag inputTag, Version to, Refere
 		}

 		if (upTo > logData->persistentDataDurableVersion)
-			wait(tagData->eraseMessagesBefore(upTo, self, logData, TaskTLogPop));
+			wait(tagData->eraseMessagesBefore(upTo, self, logData, TaskPriority::TLogPop));
 		//TraceEvent("TLogPop", self->dbgid).detail("Tag", tag.toString()).detail("To", upTo);
 	}
 	return Void();
@ -1346,7 +1346,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
 	if( req.tag.locality == tagLocalityLogRouter ) {
 		wait( self->concurrentLogRouterReads.take() );
 		state FlowLock::Releaser globalReleaser(self->concurrentLogRouterReads);
-		wait( delay(0.0, TaskLowPriority) );
+		wait( delay(0.0, TaskPriority::Low) );
 	}

 	if( req.begin <= logData->persistentDataDurableVersion && req.tag != txsTag) {
@ -1355,7 +1355,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
 		// slightly faster over keeping the rest of the cluster operating normally.
 		// txsTag is only ever peeked on recovery, and we would still wish to prioritize requests
 		// that impact recovery duration.
-		wait(delay(0, TaskTLogSpilledPeekReply));
+		wait(delay(0, TaskPriority::TLogSpilledPeekReply));
 	}

 	Version poppedVer = poppedVersion(logData, req.tag);
@ -1464,7 +1464,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
 				if (earlyEnd) break;
 			}
 			earlyEnd = earlyEnd || (kvrefs.size() >= SERVER_KNOBS->TLOG_SPILL_REFERENCE_MAX_BATCHES_PER_PEEK+1);
-			wait( self->peekMemoryLimiter.take(TaskTLogSpilledPeekReply, commitBytes) );
+			wait( self->peekMemoryLimiter.take(TaskPriority::TLogSpilledPeekReply, commitBytes) );
 			state FlowLock::Releaser memoryReservation(self->peekMemoryLimiter, commitBytes);
 			state std::vector<Future<Standalone<StringRef>>> messageReads;
 			messageReads.reserve( commitLocations.size() );
@ -1556,7 +1556,7 @@ ACTOR Future<Void> watchDegraded(TLogData* self) {
 	//This delay is divided into multiple delays to avoid marking the tlog as degraded because of a single SlowTask
 	state int loopCount = 0;
 	while(loopCount < SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT) {
-		wait(delay(SERVER_KNOBS->TLOG_DEGRADED_DURATION/SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT, TaskLowPriority));
+		wait(delay(SERVER_KNOBS->TLOG_DEGRADED_DURATION/SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT, TaskPriority::Low));
 		loopCount++;
 	}
 	TraceEvent(SevWarnAlways, "TLogDegraded", self->dbgid);
@ -1892,7 +1892,7 @@ ACTOR Future<Void> tLogCommit(
 				.detail("PersistentDataDurableVersion", logData->persistentDataDurableVersion);
 			waitStartT = now();
 		}
-		wait( delayJittered(.005, TaskTLogCommit) );
+		wait( delayJittered(.005, TaskPriority::TLogCommit) );
 	}

 	// while exec op is being committed, no new transactions will be admitted.
@ -2236,10 +2236,14 @@ ACTOR Future<Void> pullAsyncData( TLogData* self, Reference<LogData> logData, st
 	state Version tagAt = beginVersion;
 	state Version lastVer = 0;

+	if (endVersion.present()) {
+		TraceEvent("TLogRestoreReplicationFactor", self->dbgid).detail("LogId", logData->logId).detail("Locality", logData->locality).detail("RecoverFrom", beginVersion).detail("RecoverTo", endVersion.get());
+	}
+
 	while (!endVersion.present() || logData->version.get() < endVersion.get()) {
 		loop {
 			choose {
-				when(wait( r ? r->getMore(TaskTLogCommit) : Never() ) ) {
+				when(wait( r ? r->getMore(TaskPriority::TLogCommit) : Never() ) ) {
 					break;
 				}
 				when( wait( dbInfoChange ) ) {
@ -2262,7 +2266,7 @@ ACTOR Future<Void> pullAsyncData( TLogData* self, Reference<LogData> logData, st
 					.detail("PersistentDataDurableVersion", logData->persistentDataDurableVersion);
 				waitStartT = now();
 			}
-			wait( delayJittered(.005, TaskTLogCommit) );
+			wait( delayJittered(.005, TaskPriority::TLogCommit) );
 		}

 		state Version ver = 0;
@ -2302,7 +2306,7 @@ ACTOR Future<Void> pullAsyncData( TLogData* self, Reference<LogData> logData, st

 					// Notifies the commitQueue actor to commit persistentQueue, and also unblocks tLogPeekMessages actors
 					logData->version.set( ver );
-					wait( yield(TaskTLogCommit) );
+					wait( yield(TaskPriority::TLogCommit) );
 				}
 				lastVer = ver;
 				ver = r->version().version;
@ -2339,7 +2343,7 @@ ACTOR Future<Void> pullAsyncData( TLogData* self, Reference<LogData> logData, st

 						// Notifies the commitQueue actor to commit persistentQueue, and also unblocks tLogPeekMessages actors
 						logData->version.set( ver );
-						wait( yield(TaskTLogCommit) );
+						wait( yield(TaskPriority::TLogCommit) );
 					}
 					break;
 				}
--- a/fdbserver/TagPartitionedLogSystem.actor.cpp
+++ b/fdbserver/TagPartitionedLogSystem.actor.cpp
@ -431,7 +431,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
 				vector<Future<Void>> tLogCommitResults;
 				for(int loc=0; loc< it->logServers.size(); loc++) {
 					Standalone<StringRef> msg = data.getMessages(location);
-					allReplies.push_back( it->logServers[loc]->get().interf().commit.getReply( TLogCommitRequest( msg.arena(), prevVersion, version, knownCommittedVersion, minKnownCommittedVersion, msg, data.getHasExecOp(), debugID ), TaskTLogCommitReply ) );
+					allReplies.push_back( it->logServers[loc]->get().interf().commit.getReply( TLogCommitRequest( msg.arena(), prevVersion, version, knownCommittedVersion, minKnownCommittedVersion, msg, data.getHasExecOp(), debugID ), TaskPriority::TLogCommitReply ) );
 					Future<Void> commitSuccess = success(allReplies.back());
 					addActor.get().send(commitSuccess);
 					tLogCommitResults.push_back(commitSuccess);
@ -803,27 +803,52 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
 		if( found ) {
 			if(stopped) {
 				std::vector<Reference<LogSet>> localSets;
-				int bestSet = 0;
+				int bestPrimarySet = 0;
+				int bestSatelliteSet = -1;
 				for(auto& log : tLogs) {
 					if(log->isLocal && log->logServers.size()) {
 						TraceEvent("TLogPeekLogRouterLocalSet", dbgid).detail("Tag", tag.toString()).detail("Begin", begin).detail("LogServers", log->logServerString());
 						localSets.push_back(log);
-						if(log->locality != tagLocalitySatellite) {
-							bestSet = localSets.size() - 1;
+						if(log->locality == tagLocalitySatellite) {
+							bestSatelliteSet = localSets.size() - 1;
+						} else {
+							bestPrimarySet = localSets.size() - 1;
 						}
 					}
 				}
+				int bestSet = bestPrimarySet;
+				if (SERVER_KNOBS->LOG_ROUTER_PEEK_FROM_SATELLITES_PREFERRED &&
+				    bestSatelliteSet != -1 &&
+				    tLogs[bestSatelliteSet]->tLogVersion >= TLogVersion::V4 ) {
+					bestSet = bestSatelliteSet;
+				}

 				TraceEvent("TLogPeekLogRouterSets", dbgid).detail("Tag", tag.toString()).detail("Begin", begin);
 				//FIXME: do this merge on one of the logs in the other data center to avoid sending multiple copies across the WAN
 				return Reference<ILogSystem::SetPeekCursor>( new ILogSystem::SetPeekCursor( localSets, bestSet, localSets[bestSet]->bestLocationFor( tag ), tag, begin, getPeekEnd(), true ) );
 			} else {
-				for( auto& log : tLogs ) {
-					if(log->logServers.size() && log->isLocal && log->locality != tagLocalitySatellite) {
-						TraceEvent("TLogPeekLogRouterBestOnly", dbgid).detail("Tag", tag.toString()).detail("Begin", begin).detail("LogId", log->logServers[log->bestLocationFor( tag )]->get().id());
-						return Reference<ILogSystem::ServerPeekCursor>( new ILogSystem::ServerPeekCursor( log->logServers[log->bestLocationFor( tag )], tag, begin, getPeekEnd(), false, true ) );
+				int bestPrimarySet = -1;
+				int bestSatelliteSet = -1;
+				for( int i = 0; i < tLogs.size(); i++ ) {
+					const auto& log = tLogs[i];
+					if(log->logServers.size() && log->isLocal) {
+						if (log->locality == tagLocalitySatellite) {
+							bestSatelliteSet = i;
+							break;
+						} else {
+							if (bestPrimarySet == -1) bestPrimarySet = i;
+						}
 					}
 				}
+				int bestSet = bestPrimarySet;
+				if (SERVER_KNOBS->LOG_ROUTER_PEEK_FROM_SATELLITES_PREFERRED &&
+				    bestSatelliteSet != -1 &&
+				    tLogs[bestSatelliteSet]->tLogVersion >= TLogVersion::V4 ) {
+					bestSet = bestSatelliteSet;
+				}
+				const auto& log = tLogs[bestSet];
+				TraceEvent("TLogPeekLogRouterBestOnly", dbgid).detail("Tag", tag.toString()).detail("Begin", begin).detail("LogId", log->logServers[log->bestLocationFor( tag )]->get().id());
+				return Reference<ILogSystem::ServerPeekCursor>( new ILogSystem::ServerPeekCursor( log->logServers[log->bestLocationFor( tag )], tag, begin, getPeekEnd(), false, true ) );
 			}
 		}
 		bool firstOld = true;
@ -836,17 +861,26 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
 				}
 			}
 			if( found ) {
-				int bestSet = 0;
+				int bestPrimarySet = 0;
+				int bestSatelliteSet = -1;
 				std::vector<Reference<LogSet>> localSets;
 				for(auto& log : old.tLogs) {
 					if(log->isLocal && log->logServers.size()) {
 						TraceEvent("TLogPeekLogRouterOldLocalSet", dbgid).detail("Tag", tag.toString()).detail("Begin", begin).detail("LogServers", log->logServerString());
 						localSets.push_back(log);
-						if(log->locality != tagLocalitySatellite) {
-							bestSet = localSets.size()-1;
+						if(log->locality == tagLocalitySatellite) {
+							bestSatelliteSet = localSets.size() - 1;
+						} else {
+							bestPrimarySet = localSets.size() - 1;
 						}
 					}
 				}
+				int bestSet = bestPrimarySet;
+				if (SERVER_KNOBS->LOG_ROUTER_PEEK_FROM_SATELLITES_PREFERRED &&
+				    bestSatelliteSet != -1 &&
+				    old.tLogs[bestSatelliteSet]->tLogVersion >= TLogVersion::V4 ) {
+					bestSet = bestSatelliteSet;
+				}

 				TraceEvent("TLogPeekLogRouterOldSets", dbgid).detail("Tag", tag.toString()).detail("Begin", begin).detail("OldEpoch", old.epochEnd).detail("RecoveredAt", recoveredAt.present() ? recoveredAt.get() : -1).detail("FirstOld", firstOld);
 				//FIXME: do this merge on one of the logs in the other data center to avoid sending multiple copies across the WAN
@ -961,7 +995,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
 			if( t->get().present() ) {
 				alive.push_back( brokenPromiseToNever(
 				    t->get().interf().confirmRunning.getReply( TLogConfirmRunningRequest(debugID),
-				                                               TaskTLogConfirmRunningReply ) ) );
+				                                               TaskPriority::TLogConfirmRunningReply ) ) );
 				numPresent++;
 			} else {
 				alive.push_back( Never() );
@ -1477,7 +1511,16 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
 				logSystem->rejoins = rejoins;
 				logSystem->lockResults = lockResults;
 				logSystem->recoverAt = minEnd;
-				logSystem->knownCommittedVersion = knownCommittedVersion;
+				if (knownCommittedVersion > minEnd) {
+					// FIXME: Remove the Sev40 once disk snapshot v2 feature is enabled, in all other
+					// code paths we should never be here.
+					TraceEvent(SevError, "KCVIsInvalid")
+						.detail("KnownCommittedVersion", knownCommittedVersion)
+						.detail("MinEnd", minEnd);
+					logSystem->knownCommittedVersion = minEnd;
+				} else {
+					logSystem->knownCommittedVersion = knownCommittedVersion;
+				}
 				logSystem->remoteLogsWrittenToCoreState = true;
 				logSystem->stopped = true;
 				logSystem->pseudoLocalities = prevState.pseudoLocalities;
@ -1949,12 +1992,25 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
 				req.logRouterTags = logSystem->logRouterTags;
 			}

-			for(int i = -1; i < oldLogSystem->logRouterTags; i++) {
-				Tag tag = i == -1 ? txsTag : Tag(tagLocalityLogRouter, i);
-				locations.clear();
-				logSystem->tLogs[1]->getPushLocations( vector<Tag>(1, tag), locations, 0 );
-				for(int loc : locations)
-					sreqs[ loc ].recoverTags.push_back( tag );
+			locations.clear();
+			logSystem->tLogs[1]->getPushLocations( {txsTag}, locations, 0 );
+			for(int loc : locations)
+				sreqs[ loc ].recoverTags.push_back( txsTag );
+
+			if (logSystem->logRouterTags) {
+				for(int i = 0; i < oldLogSystem->logRouterTags; i++) {
+					Tag tag = Tag(tagLocalityLogRouter, i);
+					// Sattelite logs will index a mutation with tagLocalityLogRouter with an id greater than
+					// the number of log routers as having an id mod the number of log routers.  We thus need
+					// to make sure that if we're going from more log routers in the previous generation to
+					// less log routers in the newer one, that we map the log router tags onto satellites that
+					// are the preferred location for id%logRouterTags.
+					Tag pushLocation = Tag(tagLocalityLogRouter, i%logSystem->logRouterTags);
+					locations.clear();
+					logSystem->tLogs[1]->getPushLocations( {pushLocation}, locations, 0 );
+					for(int loc : locations)
+						sreqs[ loc ].recoverTags.push_back( tag );
+				}
 			}

 			for( int i = 0; i < recr.satelliteTLogs.size(); i++ )
--- a/fdbserver/VFSAsync.cpp
+++ b/fdbserver/VFSAsync.cpp
@ -713,7 +713,7 @@ static int asyncSleep(sqlite3_vfs *pVfs, int microseconds){
 			waitFor( delay(FLOW_KNOBS->MAX_BUGGIFIED_DELAY) );
 			return 0;
 		}
-		waitFor( g_network->delay( microseconds*1e-6, TaskDefaultDelay ) || simCancel );
+		waitFor( g_network->delay( microseconds*1e-6, TaskPriority::DefaultDelay ) || simCancel );
 		return microseconds;
 	} catch( Error &e ) {
 		TraceEvent(SevError, "AsyncSleepError").error(e,true);
--- a/fdbserver/VersionedBTree.actor.cpp
+++ b/fdbserver/VersionedBTree.actor.cpp
--- a/fdbserver/WaitFailure.actor.cpp
+++ b/fdbserver/WaitFailure.actor.cpp
@ -37,7 +37,7 @@ ACTOR Future<Void> waitFailureServer(FutureStream<ReplyPromise<Void>> waitFailur
 	}
 }

-ACTOR Future<Void> waitFailureClient(RequestStream<ReplyPromise<Void>> waitFailure, double reactionTime, double reactionSlope, int taskID){
+ACTOR Future<Void> waitFailureClient(RequestStream<ReplyPromise<Void>> waitFailure, double reactionTime, double reactionSlope, TaskPriority taskID){
 	loop {
 		try {
 			state double start = now();
@ -55,7 +55,7 @@ ACTOR Future<Void> waitFailureClient(RequestStream<ReplyPromise<Void>> waitFailu
 	}
 }

-ACTOR Future<Void> waitFailureClientStrict(RequestStream<ReplyPromise<Void>> waitFailure, double failureReactionTime, int taskID){
+ACTOR Future<Void> waitFailureClientStrict(RequestStream<ReplyPromise<Void>> waitFailure, double failureReactionTime, TaskPriority taskID){
 	loop {
 		wait(waitFailureClient(waitFailure, 0, 0, taskID));
 		wait(delay(failureReactionTime, taskID) || IFailureMonitor::failureMonitor().onStateEqual( waitFailure.getEndpoint(), FailureStatus(false)));
@ -65,7 +65,7 @@ ACTOR Future<Void> waitFailureClientStrict(RequestStream<ReplyPromise<Void>> wai
 	}
 }

-ACTOR Future<Void> waitFailureTracker(RequestStream<ReplyPromise<Void>> waitFailure, Reference<AsyncVar<bool>> failed, double reactionTime, double reactionSlope, int taskID){
+ACTOR Future<Void> waitFailureTracker(RequestStream<ReplyPromise<Void>> waitFailure, Reference<AsyncVar<bool>> failed, double reactionTime, double reactionSlope, TaskPriority taskID){
 	loop {
 		try {	
 			failed->set( IFailureMonitor::failureMonitor().getState(waitFailure.getEndpoint()).isFailed() );
--- a/fdbserver/WaitFailure.h
+++ b/fdbserver/WaitFailure.h
@ -26,13 +26,13 @@ Future<Void> waitFailureServer(const FutureStream<ReplyPromise<Void>>& waitFailu

 // talks to a wait failure server, returns Void on failure
 Future<Void> waitFailureClient(const RequestStream<ReplyPromise<Void>>& waitFailure, 
-	double const& failureReactionTime=0, double const& failureReactionSlope=0, int const& taskID=TaskDefaultEndpoint);
+	double const& failureReactionTime=0, double const& failureReactionSlope=0, TaskPriority const& taskID=TaskPriority::DefaultEndpoint);

 // talks to a wait failure server, returns Void on failure, reaction time is always waited
-Future<Void> waitFailureClientStrict(const RequestStream<ReplyPromise<Void>>& waitFailure, double const& failureReactionTime=0, int const& taskID=TaskDefaultEndpoint);
+Future<Void> waitFailureClientStrict(const RequestStream<ReplyPromise<Void>>& waitFailure, double const& failureReactionTime=0, TaskPriority const& taskID=TaskPriority::DefaultEndpoint);

 // talks to a wait failure server, updates failed to be true or false based on failure status. 
 Future<Void> waitFailureTracker(const RequestStream<ReplyPromise<Void>>& waitFailure, Reference<AsyncVar<bool>> const& failed,
-	double const& failureReactionTime=0, double const& failureReactionSlope=0, int const& taskID=TaskDefaultEndpoint);
+	double const& failureReactionTime=0, double const& failureReactionSlope=0, TaskPriority const& taskID=TaskPriority::DefaultEndpoint);

-#endif
+#endif
--- a/fdbserver/WorkerInterface.actor.h
+++ b/fdbserver/WorkerInterface.actor.h
@ -369,6 +369,7 @@ struct Role {
 	static const Role LOG_ROUTER;
 	static const Role DATA_DISTRIBUTOR;
 	static const Role RATEKEEPER;
+	static const Role COORDINATOR;

 	std::string roleName;
 	std::string abbreviation;
@ -392,7 +393,7 @@ void endRole(const Role &role, UID id, std::string reason, bool ok = true, Error

 struct ServerDBInfo;

-class Database openDBOnServer( Reference<AsyncVar<ServerDBInfo>> const& db, int taskID = TaskDefaultEndpoint, bool enableLocalityLoadBalance = true, bool lockAware = false );
+class Database openDBOnServer( Reference<AsyncVar<ServerDBInfo>> const& db, TaskPriority taskID = TaskPriority::DefaultEndpoint, bool enableLocalityLoadBalance = true, bool lockAware = false );
 ACTOR Future<Void> extractClusterInterface(Reference<AsyncVar<Optional<struct ClusterControllerFullInterface>>> a,
                                           Reference<AsyncVar<Optional<struct ClusterInterface>>> b);

@ -415,7 +416,8 @@ ACTOR Future<Void> storageServer(IKeyValueStore* persistentData, StorageServerIn
                                 Reference<AsyncVar<ServerDBInfo>> db, std::string folder);
 ACTOR Future<Void> storageServer(IKeyValueStore* persistentData, StorageServerInterface ssi,
                                 Reference<AsyncVar<ServerDBInfo>> db, std::string folder,
-                                 Promise<Void> recovered); // changes pssi->id() to be the recovered ID
+                                 Promise<Void> recovered,
+                                 Reference<ClusterConnectionFile> connFile );  // changes pssi->id() to be the recovered ID); // changes pssi->id() to be the recovered ID
 ACTOR Future<Void> masterServer(MasterInterface mi, Reference<AsyncVar<ServerDBInfo>> db,
                                ServerCoordinators serverCoordinators, LifetimeToken lifetime, bool forceRecovery);
 ACTOR Future<Void> masterProxyServer(MasterProxyInterface proxy, InitializeMasterProxyRequest req,
--- a/Show More
+++ b/Show More