From a2a2064a50c719612840f4f2c652151094386805 Mon Sep 17 00:00:00 2001
From: "A.J. Beamon" <ajbeamon@apple.com>
Date: Fri, 3 Aug 2018 14:26:04 -0700
Subject: [PATCH 01/34] Don't log a SevWarnAlways event for
 external_client_already_loaded errors.

---
 fdbclient/MultiVersionTransaction.actor.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fdbclient/MultiVersionTransaction.actor.cpp b/fdbclient/MultiVersionTransaction.actor.cpp
index 10bc77ae90..b26d96dcdf 100644
--- a/fdbclient/MultiVersionTransaction.actor.cpp
+++ b/fdbclient/MultiVersionTransaction.actor.cpp
@@ -910,12 +910,13 @@ void MultiVersionApi::runOnExternalClients(std::function<void(Reference<ClientIn
 			}
 		}
 		catch(Error &e) {
-			TraceEvent(SevWarnAlways, "ExternalClientFailure").error(e).detail("LibPath", c->second->libPath);
 			if(e.code() == error_code_external_client_already_loaded) {
+				TraceEvent(SevInfo, "ExternalClientAlreadyLoaded").error(e).detail("LibPath", c->second->libPath);
 				c = externalClients.erase(c);
 				continue;
 			}
 			else {
+				TraceEvent(SevWarnAlways, "ExternalClientFailure").error(e).detail("LibPath", c->second->libPath);
 				c->second->failed = true;
 				newFailure = true;
 			}

From 71f89f372fe67291b52d71ef6c93f1120197b671 Mon Sep 17 00:00:00 2001
From: Evan Tschannen <ejt@apple.com>
Date: Fri, 3 Aug 2018 15:53:38 -0700
Subject: [PATCH 02/34] changed a trace event name to avoid scope type mismatch
 on the tag field

---
 fdbserver/TLogServer.actor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp
index 28fae6159d..b38a754265 100644
--- a/fdbserver/TLogServer.actor.cpp
+++ b/fdbserver/TLogServer.actor.cpp
@@ -1775,7 +1775,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
 			for(auto &kv : data) {
 				Tag tag = decodeTagPoppedKey(rawId, kv.key);
 				Version popped = decodeTagPoppedValue(kv.value);
-				TraceEvent("TLogRestorePop", logData->logId).detail("Tag", tag.toString()).detail("To", popped);
+				TraceEvent("TLogRestorePopped", logData->logId).detail("Tag", tag.toString()).detail("To", popped);
 				auto tagData = logData->getTagData(tag);
 				ASSERT( !tagData );
 				logData->createTagData(tag, popped, false, false, false);

From be1a4d74c7b63d75d8d7b6f2e3511dd2ba33bcb1 Mon Sep 17 00:00:00 2001
From: Evan Tschannen <ejt@apple.com>
Date: Sat, 4 Aug 2018 10:31:30 -0700
Subject: [PATCH 03/34] tlogs serve reads to log routers at a low priority, to
 prevent them from using all their resources catching up a remote dc that has
 been down for a long time increase the amount of memory ratekeeper budgets
 for tlogs so that there is a gap after the spill threshold to prevent
 temporarily overshooting the budget

---
 fdbserver/ClusterController.actor.cpp | 2 +-
 fdbserver/Knobs.cpp                   | 3 ++-
 fdbserver/Knobs.h                     | 1 +
 fdbserver/TLogServer.actor.cpp        | 9 ++++++++-
 flow/network.h                        | 1 -
 5 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp
index 13e9fd12ac..1efd1dfe07 100644
--- a/fdbserver/ClusterController.actor.cpp
+++ b/fdbserver/ClusterController.actor.cpp
@@ -2069,7 +2069,7 @@ ACTOR Future<Void> updateDatacenterVersionDifference( ClusterControllerData *sel
 
 		state Optional<TLogInterface> primaryLog;
 		state Optional<TLogInterface> remoteLog;
-		if(self->db.serverInfo->get().recoveryState == RecoveryState::FULLY_RECOVERED) {
+		if(self->db.serverInfo->get().recoveryState >= RecoveryState::ALL_LOGS_RECRUITED) {
 			for(auto& logSet : self->db.serverInfo->get().logSystemConfig.tLogs) {
 				if(logSet.isLocal && logSet.locality != tagLocalitySatellite) {
 					for(auto& tLog : logSet.tLogs) {
diff --git a/fdbserver/Knobs.cpp b/fdbserver/Knobs.cpp
index d9900f0d60..abd3e1c72d 100644
--- a/fdbserver/Knobs.cpp
+++ b/fdbserver/Knobs.cpp
@@ -63,6 +63,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
 	init( PARALLEL_GET_MORE_REQUESTS,                             32 ); if( randomize && BUGGIFY ) PARALLEL_GET_MORE_REQUESTS = 2;
 	init( MAX_QUEUE_COMMIT_BYTES,                               15e6 ); if( randomize && BUGGIFY ) MAX_QUEUE_COMMIT_BYTES = 5000;
 	init( VERSIONS_PER_BATCH,                 VERSIONS_PER_SECOND/20 ); if( randomize && BUGGIFY ) VERSIONS_PER_BATCH = std::max<int64_t>(1,VERSIONS_PER_SECOND/1000);
+	init( CONCURRENT_LOG_ROUTER_READS,                             1 );
 
 	// Data distribution queue
 	init( HEALTH_POLL_TIME,                                      1.0 );
@@ -322,7 +323,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
 	init( STORAGE_HARD_LIMIT_BYTES,                           1500e6 ); if( smallStorageTarget ) STORAGE_HARD_LIMIT_BYTES = 4500e3;
 
 	bool smallTlogTarget = randomize && BUGGIFY;
-	init( TARGET_BYTES_PER_TLOG,                              2000e6 ); if( smallTlogTarget ) TARGET_BYTES_PER_TLOG = 2000e3;
+	init( TARGET_BYTES_PER_TLOG,                              2400e6 ); if( smallTlogTarget ) TARGET_BYTES_PER_TLOG = 2000e3;
 	init( SPRING_BYTES_TLOG,								   400e6 ); if( smallTlogTarget ) SPRING_BYTES_TLOG = 200e3;
 	init( TLOG_SPILL_THRESHOLD,                               1500e6 ); if( smallTlogTarget ) TLOG_SPILL_THRESHOLD = 1500e3; if( randomize && BUGGIFY ) TLOG_SPILL_THRESHOLD = 0;
 	init( TLOG_HARD_LIMIT_BYTES,                              3000e6 ); if( smallTlogTarget ) TLOG_HARD_LIMIT_BYTES = 3000e3;
diff --git a/fdbserver/Knobs.h b/fdbserver/Knobs.h
index 87231f854a..c3be2c5303 100644
--- a/fdbserver/Knobs.h
+++ b/fdbserver/Knobs.h
@@ -67,6 +67,7 @@ public:
 	int PARALLEL_GET_MORE_REQUESTS;
 	int64_t MAX_QUEUE_COMMIT_BYTES;
 	int64_t VERSIONS_PER_BATCH;
+	int CONCURRENT_LOG_ROUTER_READS;
 
 	// Data distribution queue
 	double HEALTH_POLL_TIME;
diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp
index b38a754265..0188808c2e 100644
--- a/fdbserver/TLogServer.actor.cpp
+++ b/fdbserver/TLogServer.actor.cpp
@@ -270,13 +270,14 @@ struct TLogData : NonCopyable {
 
 	PromiseStream<Future<Void>> sharedActors;
 	bool terminated;
+	FlowLock concurrentLogRouterReads;
 
 	TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> const& dbInfo)
 			: dbgid(dbgid), instanceID(g_random->randomUniqueID().first()),
 			  persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)),
 			  dbInfo(dbInfo), queueCommitBegin(0), queueCommitEnd(0), prevVersion(0),
 			  diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false),
-			  bytesInput(0), bytesDurable(0), updatePersist(Void()), terminated(false)
+			  bytesInput(0), bytesDurable(0), updatePersist(Void()), terminated(false), concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS)
 		{
 		}
 };
@@ -991,6 +992,12 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
 		Void _ = wait( delay(SERVER_KNOBS->TLOG_PEEK_DELAY, g_network->getCurrentTask()) );
 	}
 
+	if( req.tag.locality == tagLocalityLogRouter ) {
+		Void _ = wait( self->concurrentLogRouterReads.take() );
+		state FlowLock::Releaser globalReleaser(self->concurrentLogRouterReads);
+		Void _ = wait( delay(0.0, TaskLowPriority) );
+	}
+
 	Version poppedVer = poppedVersion(logData, req.tag);
 	if(poppedVer > req.begin) {
 		TLogPeekReply rep;
diff --git a/flow/network.h b/flow/network.h
index c93bcff320..2c40228e81 100644
--- a/flow/network.h
+++ b/flow/network.h
@@ -68,7 +68,6 @@ enum {
 	TaskDataDistribution = 3500,
 	TaskDiskWrite = 3010,
 	TaskUpdateStorage = 3000,
-	TaskBatchCopy = 2900,
 	TaskLowPriority = 2000,
 
 	TaskMinPriority = 1000

From fec285146c442d4564ea6e42ce5ca8912f7c6a22 Mon Sep 17 00:00:00 2001
From: Evan Tschannen <ejt@apple.com>
Date: Sat, 4 Aug 2018 12:36:48 -0700
Subject: [PATCH 04/34] significant cpu optimization in update storage

---
 fdbserver/TLogServer.actor.cpp | 74 +++++++++-------------------------
 1 file changed, 19 insertions(+), 55 deletions(-)

diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp
index 0188808c2e..dd6a060e69 100644
--- a/fdbserver/TLogServer.actor.cpp
+++ b/fdbserver/TLogServer.actor.cpp
@@ -288,19 +288,17 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
 		bool nothingPersistent;				// true means tag is *known* to have no messages in persistentData.  false means nothing.
 		bool poppedRecently;					// `popped` has changed since last updatePersistentData
 		Version popped;				// see popped version tracking contract below
-		bool updateVersionSizes;
 		bool unpoppedRecovered;
 		Tag tag;
 
-		TagData( Tag tag, Version popped, bool nothingPersistent, bool poppedRecently, bool unpoppedRecovered ) : tag(tag), nothingPersistent(nothingPersistent), popped(popped), poppedRecently(poppedRecently), unpoppedRecovered(unpoppedRecovered), updateVersionSizes(tag != txsTag) {}
+		TagData( Tag tag, Version popped, bool nothingPersistent, bool poppedRecently, bool unpoppedRecovered ) : tag(tag), nothingPersistent(nothingPersistent), popped(popped), poppedRecently(poppedRecently), unpoppedRecovered(unpoppedRecovered) {}
 
-		TagData(TagData&& r) noexcept(true) : versionMessages(std::move(r.versionMessages)), nothingPersistent(r.nothingPersistent), poppedRecently(r.poppedRecently), popped(r.popped), updateVersionSizes(r.updateVersionSizes), tag(r.tag), unpoppedRecovered(r.unpoppedRecovered) {}
+		TagData(TagData&& r) noexcept(true) : versionMessages(std::move(r.versionMessages)), nothingPersistent(r.nothingPersistent), poppedRecently(r.poppedRecently), popped(r.popped), tag(r.tag), unpoppedRecovered(r.unpoppedRecovered) {}
 		void operator= (TagData&& r) noexcept(true) {
 			versionMessages = std::move(r.versionMessages);
 			nothingPersistent = r.nothingPersistent;
 			poppedRecently = r.poppedRecently;
 			popped = r.popped;
-			updateVersionSizes = r.updateVersionSizes;
 			tag = r.tag;
 			unpoppedRecovered = r.unpoppedRecovered;
 		}
@@ -309,15 +307,17 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
 		ACTOR Future<Void> eraseMessagesBefore( TagData *self, Version before, int64_t* gBytesErased, Reference<LogData> tlogData, int taskID ) {
 			while(!self->versionMessages.empty() && self->versionMessages.front().first < before) {
 				Version version = self->versionMessages.front().first;
-				std::pair<int, int> &sizes = tlogData->version_sizes[version];
+				std::pair<int,int> &sizes = tlogData->version_sizes[version];
 				int64_t messagesErased = 0;
 
 				while(!self->versionMessages.empty() && self->versionMessages.front().first == version) {
 					auto const& m = self->versionMessages.front();
 					++messagesErased;
 
-					if(self->updateVersionSizes) {
+					if(self->tag != txsTag) {
 						sizes.first -= m.second.expectedSize();
+					} else {
+						sizes.second -= m.second.expectedSize();
 					}
 
 					self->versionMessages.pop_front();
@@ -648,7 +648,6 @@ ACTOR Future<Void> updateStorage( TLogData* self ) {
 	}
 
 	state Reference<LogData> logData = self->id_data[self->queueOrder.front()];
-	state Version prevVersion = 0;
 	state Version nextVersion = 0;
 	state int totalSize = 0;
 
@@ -660,33 +659,12 @@ ACTOR Future<Void> updateStorage( TLogData* self ) {
 		if (self->bytesInput - self->bytesDurable >= SERVER_KNOBS->TLOG_SPILL_THRESHOLD) {
 			while(logData->persistentDataDurableVersion != logData->version.get()) {
 				totalSize = 0;
-				std::vector<std::pair<std::deque<std::pair<Version, LengthPrefixedStringRef>>::iterator, std::deque<std::pair<Version, LengthPrefixedStringRef>>::iterator>> iters;
-
-				for(tagLocality = 0; tagLocality < logData->tag_data.size(); tagLocality++) {
-					for(tagId = 0; tagId < logData->tag_data[tagLocality].size(); tagId++) {
-						tagData = logData->tag_data[tagLocality][tagId];
-						if(tagData) {
-							iters.push_back(std::make_pair(tagData->versionMessages.begin(), tagData->versionMessages.end()));
-						}
-					}
-				}
-
-				nextVersion = 0;
-				while( totalSize < SERVER_KNOBS->UPDATE_STORAGE_BYTE_LIMIT || nextVersion <= logData->persistentDataVersion ) {
-					nextVersion = logData->version.get();
-					for( auto &it : iters )
-						if(it.first != it.second)
-							nextVersion = std::min( nextVersion, it.first->first + 1 );
-
-					if(nextVersion == logData->version.get())
-						break;
-
-					for( auto &it : iters ) {
-						while (it.first != it.second && it.first->first < nextVersion) {
-							totalSize += it.first->second.expectedSize();
-							++it.first;
-						}
-					}
+				Map<Version, std::pair<int,int>>::iterator sizeItr = logData->version_sizes.begin();
+				while( totalSize < SERVER_KNOBS->UPDATE_STORAGE_BYTE_LIMIT && sizeItr != logData->version_sizes.end() )
+				{
+					totalSize += sizeItr->value.first + sizeItr->value.second;
+					++sizeItr;
+					nextVersion = sizeItr == logData->version_sizes.end() ? logData->version.get() : sizeItr->key;
 				}
 
 				Void _ = wait( logData->queueCommittedVersion.whenAtLeast( nextVersion ) );
@@ -715,30 +693,13 @@ ACTOR Future<Void> updateStorage( TLogData* self ) {
 	}
 	else if(logData->initialized) {
 		ASSERT(self->queueOrder.size() == 1);
-		state Map<Version, std::pair<int, int>>::iterator sizeItr = logData->version_sizes.begin();
+		Map<Version, std::pair<int,int>>::iterator sizeItr = logData->version_sizes.begin();
 		while( totalSize < SERVER_KNOBS->UPDATE_STORAGE_BYTE_LIMIT && sizeItr != logData->version_sizes.end()
 				&& (logData->bytesInput.getValue() - logData->bytesDurable.getValue() - totalSize >= SERVER_KNOBS->TLOG_SPILL_THRESHOLD || sizeItr->value.first == 0) )
 		{
-			Void _ = wait( yield(TaskUpdateStorage) );
-
+			totalSize += sizeItr->value.first + sizeItr->value.second;
 			++sizeItr;
 			nextVersion = sizeItr == logData->version_sizes.end() ? logData->version.get() : sizeItr->key;
-
-			for(tagLocality = 0; tagLocality < logData->tag_data.size(); tagLocality++) {
-				for(tagId = 0; tagId < logData->tag_data[tagLocality].size(); tagId++) {
-					tagData = logData->tag_data[tagLocality][tagId];
-					if(tagData) {
-						auto it = std::lower_bound(tagData->versionMessages.begin(), tagData->versionMessages.end(), std::make_pair(prevVersion, LengthPrefixedStringRef()), CompareFirst<std::pair<Version, LengthPrefixedStringRef>>());
-						for(; it != tagData->versionMessages.end() && it->first < nextVersion; ++it) {
-							totalSize += it->second.expectedSize();
-						}
-
-						Void _ = wait(yield(TaskUpdateStorage));
-					}
-				}
-			}
-
-			prevVersion = nextVersion;
 		}
 
 		nextVersion = std::max<Version>(nextVersion, logData->persistentDataVersion);
@@ -780,7 +741,8 @@ void commitMessages( Reference<LogData> self, Version version, const std::vector
 	// way to do the memory allocation right as we receive the messages in the network layer.
 
 	int64_t addedBytes = 0;
-	int64_t expectedBytes = 0;
+	int expectedBytes = 0;
+	int txsBytes = 0;
 
 	if(!taggedMessages.size()) {
 		return;
@@ -840,6 +802,8 @@ void commitMessages( Reference<LogData> self, Version version, const std::vector
 				}
 				if (tag != txsTag) {
 					expectedBytes += tagData->versionMessages.back().second.expectedSize();
+				} else {
+					txsBytes += tagData->versionMessages.back().second.expectedSize();
 				}
 
 				// The factor of VERSION_MESSAGES_OVERHEAD is intended to be an overestimate of the actual memory used to store this data in a std::deque.
@@ -855,7 +819,7 @@ void commitMessages( Reference<LogData> self, Version version, const std::vector
 	self->messageBlocks.push_back( std::make_pair(version, block) );
 	addedBytes += int64_t(block.size()) * SERVER_KNOBS->TLOG_MESSAGE_BLOCK_OVERHEAD_FACTOR;
 
-	self->version_sizes[version] = make_pair(expectedBytes, expectedBytes);
+	self->version_sizes[version] = std::make_pair(expectedBytes, txsBytes);
 	self->bytesInput += addedBytes;
 	bytesInput += addedBytes;
 

From 9d0a07a400973c8456b436072a956278afebebb8 Mon Sep 17 00:00:00 2001
From: Evan Tschannen <ejt@apple.com>
Date: Sat, 4 Aug 2018 12:50:56 -0700
Subject: [PATCH 05/34] fix: trackLatest for master recovery state was wrong,
 causing status to report incorrect recovery states

---
 fdbserver/masterserver.actor.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fdbserver/masterserver.actor.cpp b/fdbserver/masterserver.actor.cpp
index a499e90d15..10b71358b9 100644
--- a/fdbserver/masterserver.actor.cpp
+++ b/fdbserver/masterserver.actor.cpp
@@ -1067,19 +1067,19 @@ ACTOR Future<Void> trackTlogRecovery( Reference<MasterData> self, Reference<Asyn
 			TraceEvent("MasterRecoveryState", self->dbgid)
 			.detail("StatusCode", RecoveryStatus::fully_recovered)
 			.detail("Status", RecoveryStatus::names[RecoveryStatus::fully_recovered])
-			.trackLatest(format("%s/MasterRecoveryState", printable(self->dbName).c_str() ).c_str());
+			.trackLatest("MasterRecoveryState");
 		} else if( !newState.oldTLogData.size() && self->recoveryState < RecoveryState::STORAGE_RECOVERED ) {
 			self->recoveryState = RecoveryState::STORAGE_RECOVERED;
 			TraceEvent("MasterRecoveryState", self->dbgid)
 			.detail("StatusCode", RecoveryStatus::storage_recovered)
 			.detail("Status", RecoveryStatus::names[RecoveryStatus::storage_recovered])
-			.trackLatest(format("%s/MasterRecoveryState", printable(self->dbName).c_str() ).c_str());
+			.trackLatest("MasterRecoveryState");
 		} else if( allLogs && self->recoveryState < RecoveryState::ALL_LOGS_RECRUITED ) {
 			self->recoveryState = RecoveryState::ALL_LOGS_RECRUITED;
 			TraceEvent("MasterRecoveryState", self->dbgid)
 			.detail("StatusCode", RecoveryStatus::all_logs_recruited)
 			.detail("Status", RecoveryStatus::names[RecoveryStatus::all_logs_recruited])
-			.trackLatest(format("%s/MasterRecoveryState", printable(self->dbName).c_str() ).c_str());
+			.trackLatest("MasterRecoveryState");
 		}
 
 		if(newState.oldTLogData.size() && self->configuration.repopulateRegionAntiQuorum > 0 && self->logSystem->remoteStorageRecovered()) {

From c757c68bfac0b37cfb26db9412d4a208d30308c8 Mon Sep 17 00:00:00 2001
From: Evan Tschannen <ejt@apple.com>
Date: Sat, 4 Aug 2018 23:53:37 -0700
Subject: [PATCH 06/34] fix: nextVersion needs to be set to logData->version if
 version_sizes is empty

---
 fdbserver/TLogServer.actor.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp
index dd6a060e69..38bacad70d 100644
--- a/fdbserver/TLogServer.actor.cpp
+++ b/fdbserver/TLogServer.actor.cpp
@@ -648,7 +648,7 @@ ACTOR Future<Void> updateStorage( TLogData* self ) {
 	}
 
 	state Reference<LogData> logData = self->id_data[self->queueOrder.front()];
-	state Version nextVersion = 0;
+	state Version nextVersion = logData->version.get();
 	state int totalSize = 0;
 
 	state int tagLocality = 0;
@@ -660,6 +660,7 @@ ACTOR Future<Void> updateStorage( TLogData* self ) {
 			while(logData->persistentDataDurableVersion != logData->version.get()) {
 				totalSize = 0;
 				Map<Version, std::pair<int,int>>::iterator sizeItr = logData->version_sizes.begin();
+				nextVersion = logData->version.get();
 				while( totalSize < SERVER_KNOBS->UPDATE_STORAGE_BYTE_LIMIT && sizeItr != logData->version_sizes.end() )
 				{
 					totalSize += sizeItr->value.first + sizeItr->value.second;
@@ -702,8 +703,6 @@ ACTOR Future<Void> updateStorage( TLogData* self ) {
 			nextVersion = sizeItr == logData->version_sizes.end() ? logData->version.get() : sizeItr->key;
 		}
 
-		nextVersion = std::max<Version>(nextVersion, logData->persistentDataVersion);
-
 		//TraceEvent("UpdateStorageVer", logData->logId).detail("NextVersion", nextVersion).detail("PersistentDataVersion", logData->persistentDataVersion).detail("TotalSize", totalSize);
 
 		Void _ = wait( logData->queueCommittedVersion.whenAtLeast( nextVersion ) );

From 5f7253928f9042226acb8716948d87410f7f9375 Mon Sep 17 00:00:00 2001
From: Evan Tschannen <ejt@apple.com>
Date: Sun, 5 Aug 2018 21:47:05 -0700
Subject: [PATCH 07/34] updated release notes for 6.0.5

---
 documentation/sphinx/source/release-notes.rst | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst
index fa274f378e..a1c240dad5 100644
--- a/documentation/sphinx/source/release-notes.rst
+++ b/documentation/sphinx/source/release-notes.rst
@@ -2,7 +2,7 @@
 Release Notes
 #############
 
-6.0.4
+6.0.5
 =====
 
 Features
@@ -23,6 +23,7 @@ Performance
 * Clients optimistically assume the first leader reply from a coordinator is correct. `(PR #425) <https://github.com/apple/foundationdb/pull/425>`_
 * Network connections are now closed after no interface needs the connection. [6.0.1] `(Issue #375) <https://github.com/apple/foundationdb/issues/375>`_
 * Significantly improved the CPU efficiency of copy mutations to transaction logs during recovery. [6.0.2] `(PR #595) <https://github.com/apple/foundationdb/pull/595>`_
+* A cluster configured with usable_regions=2 did not limit the rate at which it could copying data from the primary DC to the remote DC. This caused poor performance when recovering from a DC outage. [6.0.5] `(PR #673) <https://github.com/apple/foundationdb/pull/673>`_
 
 Fixes
 -----
@@ -36,6 +37,8 @@ Fixes
 * A client could fail to connect to a cluster when the cluster was upgraded to a version compatible with the client. This affected upgrades that were using the multi-version client to maintain compatibility with both versions of the cluster. [6.0.4] `(PR #637) <https://github.com/apple/foundationdb/pull/637>`_
 * A large number of concurrent read attempts could bring the database down after a cluster reboot. [6.0.4] `(PR #650) <https://github.com/apple/foundationdb/pull/650>`_
 * Automatic suppression of trace events which occur too frequently was happening before trace events were suppressed by other mechanisms. [6.0.4] `(PR #656) <https://github.com/apple/foundationdb/pull/656>`_
+* After a recovery, the rate at which transactions logs made mutations durable to disk was around 5 times slower than normal. [6.0.5] `(PR #666) <https://github.com/apple/foundationdb/pull/666>`_
+* Clusters configured to use TLS could get stuck spending all of their CPU opening new connections. [6.0.5] `(PR #666) <https://github.com/apple/foundationdb/pull/666>`_
 
 Status
 ------

From 6f328d41ac10da69e5b09903ce3833e3737a784e Mon Sep 17 00:00:00 2001
From: Evan Tschannen <ejt@apple.com>
Date: Mon, 6 Aug 2018 12:12:55 -0700
Subject: [PATCH 08/34] suppressed spammy trace events

---
 fdbrpc/TLSConnection.actor.cpp   | 10 +++++-----
 fdbserver/StorageMetrics.actor.h |  2 +-
 flow/Platform.cpp                |  8 ++++----
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/fdbrpc/TLSConnection.actor.cpp b/fdbrpc/TLSConnection.actor.cpp
index 0e12274158..0bc3eae9c9 100644
--- a/fdbrpc/TLSConnection.actor.cpp
+++ b/fdbrpc/TLSConnection.actor.cpp
@@ -172,7 +172,7 @@ TLSNetworkConnections::TLSNetworkConnections( Reference<TLSOptions> options ) :
 Future<Reference<IConnection>> TLSNetworkConnections::connect( NetworkAddress toAddr, std::string host) {
 	if ( toAddr.isTLS() ) {
 		NetworkAddress clearAddr( toAddr.ip, toAddr.port, toAddr.isPublic(), false );
-		TraceEvent("TLSConnectionConnecting").detail("ToAddr", toAddr);
+		TraceEvent("TLSConnectionConnecting").suppressFor(1.0).detail("ToAddr", toAddr);
 		if (host.empty() || host == toIPString(toAddr.ip))
 			return wrap(options->get_policy(TLSOptions::POLICY_VERIFY_PEERS), true, network->connect(clearAddr), std::string(""));
 		else
@@ -199,10 +199,10 @@ Reference<IListener> TLSNetworkConnections::listen( NetworkAddress localAddr ) {
 
 void TLSOptions::set_cert_file( std::string const& cert_file ) {
 	try {
-		TraceEvent("TLSConnectionSettingCertFile").detail("CertFilePath", cert_file);
+		TraceEvent("TLSConnectionSettingCertFile").suppressFor(1.0).detail("CertFilePath", cert_file);
 		set_cert_data( readFileBytes( cert_file, CERT_FILE_MAX_SIZE ) );
 	} catch ( Error& ) {
-		TraceEvent(SevError, "TLSOptionsSetCertFileError").detail("Filename", cert_file);
+		TraceEvent(SevError, "TLSOptionsSetCertFileError").suppressFor(1.0).detail("Filename", cert_file);
 		throw;
 	}
 }
@@ -345,12 +345,12 @@ Reference<ITLSPolicy> TLSOptions::get_policy(PolicyType type) {
 
 void TLSOptions::init_plugin() {
 
-	TraceEvent("TLSConnectionLoadingPlugin").detail("Plugin", tlsPluginName);
+	TraceEvent("TLSConnectionLoadingPlugin").suppressFor(1.0).detail("Plugin", tlsPluginName);
 
 	plugin = loadPlugin<ITLSPlugin>( tlsPluginName );
 
 	if ( !plugin ) {
-		TraceEvent(SevError, "TLSConnectionPluginInitError").detail("Plugin", tlsPluginName).GetLastError();
+		TraceEvent(SevError, "TLSConnectionPluginInitError").suppressFor(1.0).detail("Plugin", tlsPluginName).GetLastError();
 		throw tls_error();
 	}
 
diff --git a/fdbserver/StorageMetrics.actor.h b/fdbserver/StorageMetrics.actor.h
index 575543e2ca..6401e75e22 100644
--- a/fdbserver/StorageMetrics.actor.h
+++ b/fdbserver/StorageMetrics.actor.h
@@ -72,7 +72,7 @@ struct StorageMetricSample {
 		}
 
 		// If we didn't return above, we didn't find anything.
-		TraceEvent(SevWarnAlways, "CannotSplitLastSampleKey").detail("Range", printable(range)).detail("Offset", offset);
+		TraceEvent(SevWarn, "CannotSplitLastSampleKey").detail("Range", printable(range)).detail("Offset", offset);
 		return front ? range.end : range.begin;
 	}
 };
diff --git a/flow/Platform.cpp b/flow/Platform.cpp
index f3d70646eb..b99ed58d6d 100644
--- a/flow/Platform.cpp
+++ b/flow/Platform.cpp
@@ -2458,12 +2458,12 @@ void* loadLibrary(const char* lib_path) {
 #if defined(__unixish__)
 	dlobj = dlopen( lib_path, RTLD_LAZY | RTLD_LOCAL );
 	if(dlobj == NULL) {
-		TraceEvent(SevWarn, "LoadLibraryFailed").detail("Library", lib_path).detail("Error", dlerror());
+		TraceEvent(SevWarn, "LoadLibraryFailed").suppressFor(1.0).detail("Library", lib_path).detail("Error", dlerror());
 	}
 #else
 	dlobj = LoadLibrary( lib_path );
 	if(dlobj == NULL) {
-		TraceEvent(SevWarn, "LoadLibraryFailed").detail("Library", lib_path).GetLastError();
+		TraceEvent(SevWarn, "LoadLibraryFailed").suppressFor(1.0).detail("Library", lib_path).GetLastError();
 	}
 #endif
 
@@ -2476,12 +2476,12 @@ void* loadFunction(void* lib, const char* func_name) {
 #if defined(__unixish__)
 	dlfcn = dlsym( lib, func_name );
 	if(dlfcn == NULL) {
-		TraceEvent(SevWarn, "LoadFunctionFailed").detail("Function", func_name).detail("Error", dlerror());
+		TraceEvent(SevWarn, "LoadFunctionFailed").suppressFor(1.0).detail("Function", func_name).detail("Error", dlerror());
 	}
 #else
 	dlfcn = GetProcAddress( (HINSTANCE)lib, func_name );
 	if(dlfcn == NULL) {
-		TraceEvent(SevWarn, "LoadFunctionFailed").detail("Function", func_name).GetLastError();
+		TraceEvent(SevWarn, "LoadFunctionFailed").suppressFor(1.0).detail("Function", func_name).GetLastError();
 	}
 #endif
 

From c3c5dd28057392f19527fb2bfe5bc28b8eeb338c Mon Sep 17 00:00:00 2001
From: Evan Tschannen <ejt@apple.com>
Date: Mon, 6 Aug 2018 17:55:24 -0700
Subject: [PATCH 09/34] do not suppress loadlibrary and loadfunction trace
 events

---
 flow/Platform.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/flow/Platform.cpp b/flow/Platform.cpp
index b99ed58d6d..f3d70646eb 100644
--- a/flow/Platform.cpp
+++ b/flow/Platform.cpp
@@ -2458,12 +2458,12 @@ void* loadLibrary(const char* lib_path) {
 #if defined(__unixish__)
 	dlobj = dlopen( lib_path, RTLD_LAZY | RTLD_LOCAL );
 	if(dlobj == NULL) {
-		TraceEvent(SevWarn, "LoadLibraryFailed").suppressFor(1.0).detail("Library", lib_path).detail("Error", dlerror());
+		TraceEvent(SevWarn, "LoadLibraryFailed").detail("Library", lib_path).detail("Error", dlerror());
 	}
 #else
 	dlobj = LoadLibrary( lib_path );
 	if(dlobj == NULL) {
-		TraceEvent(SevWarn, "LoadLibraryFailed").suppressFor(1.0).detail("Library", lib_path).GetLastError();
+		TraceEvent(SevWarn, "LoadLibraryFailed").detail("Library", lib_path).GetLastError();
 	}
 #endif
 
@@ -2476,12 +2476,12 @@ void* loadFunction(void* lib, const char* func_name) {
 #if defined(__unixish__)
 	dlfcn = dlsym( lib, func_name );
 	if(dlfcn == NULL) {
-		TraceEvent(SevWarn, "LoadFunctionFailed").suppressFor(1.0).detail("Function", func_name).detail("Error", dlerror());
+		TraceEvent(SevWarn, "LoadFunctionFailed").detail("Function", func_name).detail("Error", dlerror());
 	}
 #else
 	dlfcn = GetProcAddress( (HINSTANCE)lib, func_name );
 	if(dlfcn == NULL) {
-		TraceEvent(SevWarn, "LoadFunctionFailed").suppressFor(1.0).detail("Function", func_name).GetLastError();
+		TraceEvent(SevWarn, "LoadFunctionFailed").detail("Function", func_name).GetLastError();
 	}
 #endif
 

From fb46385a3910c044a242362fb0f77b1252fa6b95 Mon Sep 17 00:00:00 2001
From: Steve Atherton <stevea@apple.com>
Date: Thu, 26 Jul 2018 17:21:23 -0700
Subject: [PATCH 10/34] Merge pull request #628 from
 alexmiller-apple/reloadcertificates

Reload certificates if changed.

This is a cherry-pick of #628 back to release-6.0
---
 fdbrpc/AsyncFileEIO.actor.h     |  32 +++---
 fdbrpc/AsyncFileWinASIO.actor.h |   8 ++
 fdbrpc/IAsyncFile.h             |   4 +
 fdbrpc/Net2FileSystem.cpp       |   4 +
 fdbrpc/Net2FileSystem.h         |   7 +-
 fdbrpc/TLSConnection.actor.cpp  | 168 ++++++++++++++++++++++++--------
 fdbrpc/TLSConnection.h          |  49 ++++++----
 fdbrpc/sim2.actor.cpp           |   9 ++
 fdbrpc/simulator.h              |   6 +-
 flow/FastRef.h                  |   1 +
 flow/Knobs.cpp                  |   1 +
 flow/Knobs.h                    |   2 +
 12 files changed, 220 insertions(+), 71 deletions(-)

diff --git a/fdbrpc/AsyncFileEIO.actor.h b/fdbrpc/AsyncFileEIO.actor.h
index ef98b0b841..0e18a93323 100644
--- a/fdbrpc/AsyncFileEIO.actor.h
+++ b/fdbrpc/AsyncFileEIO.actor.h
@@ -101,6 +101,11 @@ public:
 			return Void();
 	}
 
+	ACTOR static Future<std::time_t> lastWriteTime( std::string filename ) {
+		EIO_STRUCT_STAT statdata = wait(stat_impl(filename));
+		return statdata.st_mtime;
+	}
+
 	virtual void addref() { ReferenceCounted<AsyncFileEIO>::addref(); }
 	virtual void delref() { ReferenceCounted<AsyncFileEIO>::delref(); }
 
@@ -139,15 +144,7 @@ public:
 	virtual Future<int64_t> size() {
 		++countFileLogicalReads;
 		++countLogicalReads;
-				
-		struct stat buf;
-		if (fstat( fd, &buf )) {
-			TraceEvent("AsyncFileEIOFStatError").detail("Fd",fd).GetLastError();
-			return io_error();
-		}
-		return buf.st_size;
-		
-		//return size_impl(fd);
+		return size_impl(fd);
 	}
 	virtual std::string getFilename() {
 		return filename;
@@ -356,15 +353,28 @@ private:
 		state Promise<Void> p;
 		state eio_req* r = eio_fstat( fd, 0, eio_callback, &p );
 		try { Void _ = wait( p.getFuture() ); } catch (...) { g_network->setCurrentTask( taskID ); eio_cancel(r); throw; }
-		if (r->result) error("StatError", fd, r);
+		if (r->result) error("FStatError", fd, r);
 		EIO_STRUCT_STAT *statdata = (EIO_STRUCT_STAT *)r->ptr2;
-		if (!statdata) error("StatBufferError", fd, r);
+		if (!statdata) error("FStatBufferError", fd, r);
 		state int64_t size = statdata->st_size;
 		free(statdata);
 		Void _ = wait( delay(0, taskID) );
 		return size;
 	}
 
+	ACTOR static Future<EIO_STRUCT_STAT> stat_impl( std::string filename ) {
+		state int taskID = g_network->getCurrentTask();
+		state Promise<Void> p;
+		state EIO_STRUCT_STAT statdata;
+		state eio_req* r = eio_stat( filename.c_str(), 0, eio_callback, &p );
+		try { Void _ = wait( p.getFuture() ); } catch (...) { g_network->setCurrentTask( taskID ); eio_cancel(r); throw; }
+		if (r->result) error("StatError", 0, r);
+		if (!r->ptr2) error("StatBufferError", 0, r);
+		statdata = *EIO_STAT_BUF(r);
+		Void _ = wait( delay (0, taskID) );
+		return statdata;
+	}
+
 	ACTOR template <class R> static Future<R> dispatch_impl( std::function<R()> func) {
 		state Dispatch<R> data( func );
 		state int taskID = g_network->getCurrentTask();
diff --git a/fdbrpc/AsyncFileWinASIO.actor.h b/fdbrpc/AsyncFileWinASIO.actor.h
index 79cfb95fe7..08dd940eff 100644
--- a/fdbrpc/AsyncFileWinASIO.actor.h
+++ b/fdbrpc/AsyncFileWinASIO.actor.h
@@ -76,6 +76,14 @@ public:
 		// SOMEDAY: What is necessary to implement mustBeDurable on Windows?  Does DeleteFile take care of it?  DeleteFileTransacted?
 		return Void();
 	}
+	static Future<std::time_t> lastWriteTime( std::string filename ) {
+		// TODO(alexmiller): I have no idea about windows
+		struct _stat buf;
+		if (_stat( filename.c_str(), &buf ) != 0) {
+			throw io_error();
+		}
+		return buf->st_mtime;
+	}
 
 	virtual void addref() { ReferenceCounted<AsyncFileWinASIO>::addref(); }
 	virtual void delref() { ReferenceCounted<AsyncFileWinASIO>::delref(); }
diff --git a/fdbrpc/IAsyncFile.h b/fdbrpc/IAsyncFile.h
index f333964541..764b8e6426 100644
--- a/fdbrpc/IAsyncFile.h
+++ b/fdbrpc/IAsyncFile.h
@@ -22,6 +22,7 @@
 #define FLOW_IASYNCFILE_H
 #pragma once
 
+#include <ctime>
 #include "flow/flow.h"
 
 // All outstanding operations must be cancelled before the destructor of IAsyncFile is called.
@@ -96,6 +97,9 @@ public:
 	// If mustBeDurable, returns only when the file is guaranteed to be deleted even after a power failure.
 	virtual Future<Void> incrementalDeleteFile( std::string filename, bool mustBeDurable );
 
+	// Returns the time of the last modification of the file.
+	virtual Future<std::time_t> lastWriteTime( std::string filename ) = 0;
+
 	static IAsyncFileSystem* filesystem() { return filesystem(g_network); }
 	static runCycleFuncPtr runCycleFunc() { return reinterpret_cast<runCycleFuncPtr>(reinterpret_cast<flowGlobalType>(g_network->global(INetwork::enRunCycleFunc))); }
 
diff --git a/fdbrpc/Net2FileSystem.cpp b/fdbrpc/Net2FileSystem.cpp
index 504681b6e0..4c7e29d76b 100644
--- a/fdbrpc/Net2FileSystem.cpp
+++ b/fdbrpc/Net2FileSystem.cpp
@@ -74,6 +74,10 @@ Future< Void > Net2FileSystem::deleteFile( std::string filename, bool mustBeDura
 	return Net2AsyncFile::deleteFile(filename, mustBeDurable);
 }
 
+Future< std::time_t > Net2FileSystem::lastWriteTime( std::string filename ) {
+	return Net2AsyncFile::lastWriteTime( filename );
+}
+
 void Net2FileSystem::newFileSystem(double ioTimeout, std::string fileSystemPath)
 {
 	g_network->setGlobal(INetwork::enFileSystem, (flowGlobalType) new Net2FileSystem(ioTimeout, fileSystemPath));
diff --git a/fdbrpc/Net2FileSystem.h b/fdbrpc/Net2FileSystem.h
index 2af15f95ba..2e917970bd 100644
--- a/fdbrpc/Net2FileSystem.h
+++ b/fdbrpc/Net2FileSystem.h
@@ -26,11 +26,14 @@
 
 class Net2FileSystem : public IAsyncFileSystem {
 public:
-	virtual Future< Reference<class IAsyncFile> > open( std::string filename, int64_t flags, int64_t mode );
 	// Opens a file for asynchronous I/O
+	virtual Future< Reference<class IAsyncFile> > open( std::string filename, int64_t flags, int64_t mode );
 
-	virtual Future< Void > deleteFile( std::string filename, bool mustBeDurable );
 	// Deletes the given file.  If mustBeDurable, returns only when the file is guaranteed to be deleted even after a power failure.
+	virtual Future< Void > deleteFile( std::string filename, bool mustBeDurable );
+
+	// Returns the time of the last modification of the file.
+	virtual Future< std::time_t > lastWriteTime( std::string filename );
 
 	//void init();
 
diff --git a/fdbrpc/TLSConnection.actor.cpp b/fdbrpc/TLSConnection.actor.cpp
index 0e12274158..c4bf00b5df 100644
--- a/fdbrpc/TLSConnection.actor.cpp
+++ b/fdbrpc/TLSConnection.actor.cpp
@@ -20,12 +20,14 @@
 
 #include "flow/actorcompiler.h"
 #include "flow/network.h"
+#include "flow/Knobs.h"
 
 #include "TLSConnection.h"
 
 #include "ITLSPlugin.h"
 #include "LoadPlugin.h"
 #include "Platform.h"
+#include "IAsyncFile.h"
 #include <memory>
 
 // Name of specialized TLS Plugin
@@ -161,7 +163,7 @@ ACTOR Future<Reference<IConnection>> wrap( Reference<ITLSPolicy> policy, bool is
 }
 
 Future<Reference<IConnection>> TLSListener::accept() {
-	return wrap( policy, false, listener->accept(), "");
+	return wrap( options->get_policy(TLSOptions::POLICY_VERIFY_PEERS), false, listener->accept(), "");
 }
 
 TLSNetworkConnections::TLSNetworkConnections( Reference<TLSOptions> options ) : options(options) {
@@ -173,6 +175,10 @@ Future<Reference<IConnection>> TLSNetworkConnections::connect( NetworkAddress to
 	if ( toAddr.isTLS() ) {
 		NetworkAddress clearAddr( toAddr.ip, toAddr.port, toAddr.isPublic(), false );
 		TraceEvent("TLSConnectionConnecting").detail("ToAddr", toAddr);
+		// For FDB<->FDB connections, we don't have hostnames and can't verify IP
+		// addresses against certificates, so we have our own peer verifying logic
+		// to use. For FDB<->external system connections, we can use the standard
+		// hostname-based certificate verification logic.
 		if (host.empty() || host == toIPString(toAddr.ip))
 			return wrap(options->get_policy(TLSOptions::POLICY_VERIFY_PEERS), true, network->connect(clearAddr), std::string(""));
 		else
@@ -189,7 +195,7 @@ Reference<IListener> TLSNetworkConnections::listen( NetworkAddress localAddr ) {
 	if ( localAddr.isTLS() ) {
 		NetworkAddress clearAddr( localAddr.ip, localAddr.port, localAddr.isPublic(), false );
 		TraceEvent("TLSConnectionListening").detail("OnAddr", localAddr);
-		return Reference<IListener>(new TLSListener( options->get_policy(TLSOptions::POLICY_VERIFY_PEERS), network->listen( clearAddr ) ));
+		return Reference<IListener>(new TLSListener( options, network->listen( clearAddr ) ));
 	}
 	return network->listen( localAddr );
 }
@@ -200,6 +206,7 @@ Reference<IListener> TLSNetworkConnections::listen( NetworkAddress localAddr ) {
 void TLSOptions::set_cert_file( std::string const& cert_file ) {
 	try {
 		TraceEvent("TLSConnectionSettingCertFile").detail("CertFilePath", cert_file);
+		policyInfo.cert_path = cert_file;
 		set_cert_data( readFileBytes( cert_file, CERT_FILE_MAX_SIZE ) );
 	} catch ( Error& ) {
 		TraceEvent(SevError, "TLSOptionsSetCertFileError").detail("Filename", cert_file);
@@ -210,6 +217,7 @@ void TLSOptions::set_cert_file( std::string const& cert_file ) {
 void TLSOptions::set_ca_file(std::string const& ca_file) {
 	try {
 		TraceEvent("TLSConnectionSettingCAFile").detail("CAPath", ca_file);
+		policyInfo.ca_path = ca_file;
 		set_ca_data(readFileBytes(ca_file, CERT_FILE_MAX_SIZE));
 	}
 	catch (Error&) {
@@ -219,26 +227,26 @@ void TLSOptions::set_ca_file(std::string const& ca_file) {
 }
 
 void TLSOptions::set_ca_data(std::string const& ca_data) {
-	if (!policyVerifyPeersSet || !policyVerifyPeersNotSet)
+	if (!policyVerifyPeersSet.get() || !policyVerifyPeersNotSet.get())
 		init_plugin();
 
 	TraceEvent("TLSConnectionSettingCAData").detail("CADataSize", ca_data.size());
-	if (!policyVerifyPeersSet->set_ca_data((const uint8_t*)&ca_data[0], ca_data.size()))
+	if (!policyVerifyPeersSet.get()->set_ca_data((const uint8_t*)&ca_data[0], ca_data.size()))
 		throw tls_error();
-	if (!policyVerifyPeersNotSet->set_ca_data((const uint8_t*)&ca_data[0], ca_data.size()))
+	if (!policyVerifyPeersNotSet.get()->set_ca_data((const uint8_t*)&ca_data[0], ca_data.size()))
 		throw tls_error();
 
 	ca_set = true;
 }
 
 void TLSOptions::set_cert_data( std::string const& cert_data ) {
-	if (!policyVerifyPeersSet || !policyVerifyPeersNotSet)
+	if (!policyVerifyPeersSet.get() || !policyVerifyPeersNotSet.get())
 		init_plugin();
 
 	TraceEvent("TLSConnectionSettingCertData").detail("CertDataSize", cert_data.size());
-	if ( !policyVerifyPeersSet->set_cert_data( (const uint8_t*)&cert_data[0], cert_data.size() ) )
+	if ( !policyVerifyPeersSet.get()->set_cert_data( (const uint8_t*)&cert_data[0], cert_data.size() ) )
 		throw tls_error();
-	if (!policyVerifyPeersNotSet->set_cert_data((const uint8_t*)&cert_data[0], cert_data.size()))
+	if (!policyVerifyPeersNotSet.get()->set_cert_data((const uint8_t*)&cert_data[0], cert_data.size()))
 		throw tls_error();
 
 	certs_set = true;
@@ -246,12 +254,13 @@ void TLSOptions::set_cert_data( std::string const& cert_data ) {
 
 void TLSOptions::set_key_password(std::string const& password) {
 	TraceEvent("TLSConnectionSettingPassword");
-	keyPassword = password;
+	policyInfo.keyPassword = password;
 }
 
 void TLSOptions::set_key_file( std::string const& key_file ) {
 	try {
 		TraceEvent("TLSConnectionSettingKeyFile").detail("KeyFilePath", key_file);
+		policyInfo.key_path = key_file;
 		set_key_data( readFileBytes( key_file, CERT_FILE_MAX_SIZE ) );
 	} catch ( Error& ) {
 		TraceEvent(SevError, "TLSOptionsSetKeyFileError").detail("Filename", key_file);
@@ -260,20 +269,20 @@ void TLSOptions::set_key_file( std::string const& key_file ) {
 }
 
 void TLSOptions::set_key_data( std::string const& key_data ) {
-	if (!policyVerifyPeersSet || !policyVerifyPeersNotSet)
+	if (!policyVerifyPeersSet.get() || !policyVerifyPeersNotSet.get())
 		init_plugin();
-	const char *passphrase = keyPassword.empty() ? NULL : keyPassword.c_str();
+	const char *passphrase = policyInfo.keyPassword.empty() ? NULL : policyInfo.keyPassword.c_str();
 	TraceEvent("TLSConnectionSettingKeyData").detail("KeyDataSize", key_data.size());
-	if ( !policyVerifyPeersSet->set_key_data( (const uint8_t*)&key_data[0], key_data.size(), passphrase) )
+	if ( !policyVerifyPeersSet.get()->set_key_data( (const uint8_t*)&key_data[0], key_data.size(), passphrase) )
 		throw tls_error();
-	if (!policyVerifyPeersNotSet->set_key_data((const uint8_t*)&key_data[0], key_data.size(), passphrase))
+	if (!policyVerifyPeersNotSet.get()->set_key_data((const uint8_t*)&key_data[0], key_data.size(), passphrase))
 		throw tls_error();
 
 	key_set = true;
 }
 
 void TLSOptions::set_verify_peers( std::vector<std::string> const& verify_peers ) {
-	if (!policyVerifyPeersSet)
+	if (!policyVerifyPeersSet.get())
 		init_plugin();
 	{
 		TraceEvent e("TLSConnectionSettingVerifyPeers");
@@ -287,9 +296,10 @@ void TLSOptions::set_verify_peers( std::vector<std::string> const& verify_peers
 		verify_peers_len[i] = verify_peers[i].size();
 	}
 
-	if (!policyVerifyPeersSet->set_verify_peers(verify_peers.size(), verify_peers_arr.get(), verify_peers_len.get()))
+	if (!policyVerifyPeersSet.get()->set_verify_peers(verify_peers.size(), verify_peers_arr.get(), verify_peers_len.get()))
 		throw tls_error();
 
+	policyInfo.verify_peers = verify_peers;
 	verify_peers_set = true;
 }
 
@@ -299,43 +309,125 @@ void TLSOptions::register_network() {
 	new TLSNetworkConnections( Reference<TLSOptions>::addRef( this ) );
 }
 
+ACTOR static Future<ErrorOr<Standalone<StringRef>>> readEntireFile( std::string filename ) {
+	state Reference<IAsyncFile> file = wait(IAsyncFileSystem::filesystem()->open(filename, IAsyncFile::OPEN_READONLY, 0));
+	state int64_t filesize = wait(file->size());
+	state Standalone<StringRef> buf = makeString(filesize);
+	int rc = wait(file->read(mutateString(buf), filesize, 0));
+	if (rc != filesize) {
+		// File modified during read, probably.  The mtime should change, and thus we'll be called again.
+		return tls_error();
+	}
+	return buf;
+}
+
+ACTOR static Future<Void> watchFileForChanges( std::string filename, AsyncVar<Standalone<StringRef>> *contents_var ) {
+	state std::time_t lastModTime = wait(IAsyncFileSystem::filesystem()->lastWriteTime(filename));
+	loop {
+		Void _ = wait(delay(FLOW_KNOBS->TLS_CERT_REFRESH_DELAY_SECONDS));
+		std::time_t modtime = wait(IAsyncFileSystem::filesystem()->lastWriteTime(filename));
+		if (lastModTime != modtime) {
+			lastModTime = modtime;
+			ErrorOr<Standalone<StringRef>> contents = wait(readEntireFile(filename));
+			if (contents.present()) {
+				contents_var->set(contents.get());
+			}
+		}
+	}
+}
+
+ACTOR static Future<Void> reloadConfigurationOnChange( TLSOptions::PolicyInfo *pci, Reference<ITLSPlugin> plugin, AsyncVar<Reference<ITLSPolicy>> *realVerifyPeersPolicy, AsyncVar<Reference<ITLSPolicy>> *realNoVerifyPeersPolicy ) {
+	if (FLOW_KNOBS->TLS_CERT_REFRESH_DELAY_SECONDS <= 0) {
+		return Void();
+	}
+	loop {
+		// Early in bootup, the filesystem might not be initialized yet.  Wait until it is.
+		if (IAsyncFileSystem::filesystem() != nullptr) {
+			break;
+		}
+		Void _ = wait(delay(1.0));
+	}
+	state AsyncVar<Standalone<StringRef>> ca_var;
+	state AsyncVar<Standalone<StringRef>> key_var;
+	state AsyncVar<Standalone<StringRef>> cert_var;
+	state std::vector<Future<Void>> lifetimes;
+	if (!pci->ca_path.empty()) lifetimes.push_back(watchFileForChanges(pci->ca_path, &ca_var));
+	if (!pci->key_path.empty()) lifetimes.push_back(watchFileForChanges(pci->key_path, &key_var));
+	if (!pci->cert_path.empty()) lifetimes.push_back(watchFileForChanges(pci->cert_path, &cert_var));
+	loop {
+		state Future<Void> ca_changed = ca_var.onChange();
+		state Future<Void> key_changed = key_var.onChange();
+		state Future<Void> cert_changed = cert_var.onChange();
+		Void _ = wait( ca_changed || key_changed || cert_changed );
+		if (ca_changed.isReady()) pci->ca_contents = ca_var.get();
+		if (key_changed.isReady()) pci->key_contents = key_var.get();
+		if (cert_changed.isReady()) pci->cert_contents = cert_var.get();
+		try {
+			Reference<ITLSPolicy> verifypeers = Reference<ITLSPolicy>(plugin->create_policy());
+			verifypeers->set_ca_data(pci->ca_contents.begin(), pci->ca_contents.size());
+			verifypeers->set_key_data(pci->key_contents.begin(), pci->key_contents.size(), pci->keyPassword.c_str());
+			verifypeers->set_cert_data(pci->cert_contents.begin(), pci->cert_contents.size());
+			{
+				std::unique_ptr<const uint8_t *[]> verify_peers_arr(new const uint8_t*[pci->verify_peers.size()]);
+				std::unique_ptr<int[]> verify_peers_len(new int[pci->verify_peers.size()]);
+				for (int i = 0; i < pci->verify_peers.size(); i++) {
+					verify_peers_arr[i] = (const uint8_t *)&pci->verify_peers[i][0];
+					verify_peers_len[i] = pci->verify_peers[i].size();
+				}
+				verifypeers->set_verify_peers(pci->verify_peers.size(), verify_peers_arr.get(), verify_peers_len.get());
+			}
+			Reference<ITLSPolicy> noverifypeers = Reference<ITLSPolicy>(plugin->create_policy());
+			noverifypeers->set_ca_data(pci->ca_contents.begin(), pci->ca_contents.size());
+			noverifypeers->set_key_data(pci->key_contents.begin(), pci->key_contents.size(), pci->keyPassword.c_str());
+			noverifypeers->set_cert_data(pci->cert_contents.begin(), pci->cert_contents.size());
+
+			realVerifyPeersPolicy->set(verifypeers);
+			realNoVerifyPeersPolicy->set(noverifypeers);
+		} catch (Error& e) {
+			// Some files didn't match up, they should in the future, and we'll retry then.
+			TraceEvent(SevWarn, "TLSCertificateRefresh").error(e);
+		}
+	}
+}
+
 const char *defaultCertFileName = "fdb.pem";
 
 Reference<ITLSPolicy> TLSOptions::get_policy(PolicyType type) {
 	if ( !certs_set ) {
-		std::string certFile;
-		if ( !platform::getEnvironmentVar( "FDB_TLS_CERTIFICATE_FILE", certFile ) )
-			certFile = fileExists(defaultCertFileName) ? defaultCertFileName : joinPath(platform::getDefaultConfigPath(), defaultCertFileName);
-		set_cert_file( certFile );
+		if ( !platform::getEnvironmentVar( "FDB_TLS_CERTIFICATE_FILE", policyInfo.cert_path ) )
+			policyInfo.cert_path = fileExists(defaultCertFileName) ? defaultCertFileName : joinPath(platform::getDefaultConfigPath(), defaultCertFileName);
+		set_cert_file( policyInfo.cert_path );
 	}
 	if ( !key_set ) {
-		std::string keyFile;
-		if ( keyPassword.empty() )
-			platform::getEnvironmentVar( "FDB_TLS_PASSWORD", keyPassword );
-		if ( !platform::getEnvironmentVar( "FDB_TLS_KEY_FILE", keyFile ) )
-			keyFile = fileExists(defaultCertFileName) ? defaultCertFileName : joinPath(platform::getDefaultConfigPath(), defaultCertFileName);
-		set_key_file( keyFile );
+		if ( policyInfo.keyPassword.empty() )
+			platform::getEnvironmentVar( "FDB_TLS_PASSWORD", policyInfo.keyPassword );
+		if ( !platform::getEnvironmentVar( "FDB_TLS_KEY_FILE", policyInfo.key_path ) )
+			policyInfo.key_path = fileExists(defaultCertFileName) ? defaultCertFileName : joinPath(platform::getDefaultConfigPath(), defaultCertFileName);
+		set_key_file( policyInfo.key_path );
 	}
 	if( !verify_peers_set ) {
-		std::string verifyPeerString;
-		if (platform::getEnvironmentVar("FDB_TLS_VERIFY_PEERS", verifyPeerString))
-			set_verify_peers({ verifyPeerString });
+		std::string verify_peers;
+		if (platform::getEnvironmentVar("FDB_TLS_VERIFY_PEERS", verify_peers))
+			set_verify_peers({ verify_peers });
 		else
 			set_verify_peers({ std::string("Check.Valid=1")});
 	}
 	if (!ca_set) {
-		std::string caFile;
-		if (platform::getEnvironmentVar("FDB_TLS_CA_FILE", caFile))
-			set_ca_file(caFile);
+		if (platform::getEnvironmentVar("FDB_TLS_CA_FILE", policyInfo.ca_path))
+			set_ca_file(policyInfo.ca_path);
+	}
+
+	if (!configurationReloader.present()) {
+		configurationReloader = reloadConfigurationOnChange(&policyInfo, plugin, &policyVerifyPeersSet, &policyVerifyPeersNotSet);
 	}
 
 	Reference<ITLSPolicy> policy;
 	switch (type) {
 	case POLICY_VERIFY_PEERS:
-		policy = policyVerifyPeersSet;
+		policy = policyVerifyPeersSet.get();
 		break;
 	case POLICY_NO_VERIFY_PEERS:
-		policy = policyVerifyPeersNotSet;
+		policy = policyVerifyPeersNotSet.get();
 		break;
 	default:
 		ASSERT_ABORT(0);
@@ -354,15 +446,15 @@ void TLSOptions::init_plugin() {
 		throw tls_error();
 	}
 
-	policyVerifyPeersSet = Reference<ITLSPolicy>( plugin->create_policy() );
-	if ( !policyVerifyPeersSet) {
+	policyVerifyPeersSet = AsyncVar<Reference<ITLSPolicy>>(Reference<ITLSPolicy>(plugin->create_policy()));
+	if ( !policyVerifyPeersSet.get()) {
 		// Hopefully create_policy logged something with the log func
 		TraceEvent(SevError, "TLSConnectionCreatePolicyVerifyPeersSetError");
 		throw tls_error();
 	}
 
-	policyVerifyPeersNotSet = Reference<ITLSPolicy>(plugin->create_policy());
-	if (!policyVerifyPeersNotSet) {
+	policyVerifyPeersNotSet = AsyncVar<Reference<ITLSPolicy>>(Reference<ITLSPolicy>(plugin->create_policy()));
+	if (!policyVerifyPeersNotSet.get()) {
 		// Hopefully create_policy logged something with the log func
 		TraceEvent(SevError, "TLSConnectionCreatePolicyVerifyPeersNotSetError");
 		throw tls_error();
@@ -370,5 +462,5 @@ void TLSOptions::init_plugin() {
 }
 
 bool TLSOptions::enabled() {
-	return !!policyVerifyPeersSet && !!policyVerifyPeersNotSet;
+	return policyVerifyPeersSet.get().isValid() && policyVerifyPeersNotSet.get().isValid();
 }
diff --git a/fdbrpc/TLSConnection.h b/fdbrpc/TLSConnection.h
index 08ac201d68..c0db28f4fe 100644
--- a/fdbrpc/TLSConnection.h
+++ b/fdbrpc/TLSConnection.h
@@ -65,20 +65,6 @@ struct TLSConnection : IConnection, ReferenceCounted<TLSConnection> {
 	virtual UID getDebugID() { return uid; }
 };
 
-struct TLSListener : IListener, ReferenceCounted<TLSListener> {
-	Reference<IListener> listener;
-	Reference<ITLSPolicy> policy;
-
-	TLSListener( Reference<ITLSPolicy> policy, Reference<IListener> listener ) : policy(policy), listener(listener) {}
-
-	virtual void addref() { ReferenceCounted<TLSListener>::addref(); }
-	virtual void delref() { ReferenceCounted<TLSListener>::delref(); }
-
-	virtual Future<Reference<IConnection>> accept();
-
-	virtual NetworkAddress getListenAddress() { return listener->getListenAddress(); }
-};
-
 struct TLSOptions : ReferenceCounted<TLSOptions> {
 	enum { OPT_TLS = 100000, OPT_TLS_PLUGIN, OPT_TLS_CERTIFICATES, OPT_TLS_KEY, OPT_TLS_VERIFY_PEERS, OPT_TLS_CA_FILE, OPT_TLS_PASSWORD };
 	enum PolicyType { POLICY_VERIFY_PEERS = 1, POLICY_NO_VERIFY_PEERS };
@@ -103,14 +89,41 @@ struct TLSOptions : ReferenceCounted<TLSOptions> {
 	Reference<ITLSPolicy> get_policy(PolicyType type);
 	bool enabled();
 
+	struct PolicyInfo {
+		std::string ca_path;
+		Standalone<StringRef> ca_contents;
+		std::string key_path;
+		std::string keyPassword;
+		Standalone<StringRef> key_contents;
+		std::string cert_path;
+		Standalone<StringRef> cert_contents;
+		std::vector<std::string> verify_peers;
+	};
+
 private:
-	void init_plugin( );
+	void init_plugin();
 
 	Reference<ITLSPlugin> plugin;
-	Reference<ITLSPolicy> policyVerifyPeersSet;
-	Reference<ITLSPolicy> policyVerifyPeersNotSet;
+	PolicyInfo policyInfo;
+	AsyncVar<Reference<ITLSPolicy>> policyVerifyPeersSet;
+	AsyncVar<Reference<ITLSPolicy>> policyVerifyPeersNotSet;
+	Optional<Future<Void>> configurationReloader;
+
 	bool certs_set, key_set, verify_peers_set, ca_set;
-	std::string keyPassword;
+};
+
+struct TLSListener : IListener, ReferenceCounted<TLSListener> {
+	Reference<IListener> listener;
+	Reference<TLSOptions> options;
+
+	TLSListener( Reference<TLSOptions> options, Reference<IListener> listener ) : options(options), listener(listener) {}
+
+	virtual void addref() { ReferenceCounted<TLSListener>::addref(); }
+	virtual void delref() { ReferenceCounted<TLSListener>::delref(); }
+
+	virtual Future<Reference<IConnection>> accept();
+
+	virtual NetworkAddress getListenAddress() { return listener->getListenAddress(); }
 };
 
 struct TLSNetworkConnections : INetworkConnections {
diff --git a/fdbrpc/sim2.actor.cpp b/fdbrpc/sim2.actor.cpp
index 73be87a5f8..368bdd539f 100644
--- a/fdbrpc/sim2.actor.cpp
+++ b/fdbrpc/sim2.actor.cpp
@@ -1756,6 +1756,15 @@ Future< Void > Sim2FileSystem::deleteFile( std::string filename, bool mustBeDura
 	return Sim2::deleteFileImpl(&g_sim2, filename, mustBeDurable);
 }
 
+Future< std::time_t > Sim2FileSystem::lastWriteTime( std::string filename ) {
+	// TODO: update this map upon file writes.
+	static std::map<std::string, double> fileWrites;
+	if (BUGGIFY && g_random->random01() < 0.01) {
+		fileWrites[filename] = now();
+	}
+	return fileWrites[filename];
+}
+
 void Sim2FileSystem::newFileSystem()
 {
 	g_network->setGlobal(INetwork::enFileSystem, (flowGlobalType) new Sim2FileSystem());
diff --git a/fdbrpc/simulator.h b/fdbrpc/simulator.h
index b91c96b753..8335da18bb 100644
--- a/fdbrpc/simulator.h
+++ b/fdbrpc/simulator.h
@@ -342,11 +342,13 @@ extern Future<Void> waitUntilDiskReady(Reference<DiskParameters> parameters, int
 
 class Sim2FileSystem : public IAsyncFileSystem {
 public:
-	virtual Future< Reference<class IAsyncFile> > open( std::string filename, int64_t flags, int64_t mode );
 	// Opens a file for asynchronous I/O
+	virtual Future< Reference<class IAsyncFile> > open( std::string filename, int64_t flags, int64_t mode );
 
-	virtual Future< Void > deleteFile( std::string filename, bool mustBeDurable );
 	// Deletes the given file.  If mustBeDurable, returns only when the file is guaranteed to be deleted even after a power failure.
+	virtual Future< Void > deleteFile( std::string filename, bool mustBeDurable );
+
+	virtual Future< std::time_t > lastWriteTime( std::string filename );
 
 	Sim2FileSystem() {}
 
diff --git a/flow/FastRef.h b/flow/FastRef.h
index cd5d319888..106b3b91a9 100644
--- a/flow/FastRef.h
+++ b/flow/FastRef.h
@@ -153,6 +153,7 @@ public:
 		return Reference<T>::addRef((T*)ptr);
 	}
 
+	bool isValid() const { return ptr != NULL; }
 	explicit operator bool() const { return ptr != NULL; }
 
 private:
diff --git a/flow/Knobs.cpp b/flow/Knobs.cpp
index e4aeef1f34..f37d175f73 100644
--- a/flow/Knobs.cpp
+++ b/flow/Knobs.cpp
@@ -59,6 +59,7 @@ FlowKnobs::FlowKnobs(bool randomize, bool isSimulated) {
 	init( MAX_RECONNECTION_TIME,                               0.5 );
 	init( RECONNECTION_TIME_GROWTH_RATE,                       1.2 );
 	init( RECONNECTION_RESET_TIME,                             5.0 );
+	init( TLS_CERT_REFRESH_DELAY_SECONDS,                 12*60*60 );
 
 	//AsyncFileCached
 	init( PAGE_CACHE_4K,                                2000LL<<20 );
diff --git a/flow/Knobs.h b/flow/Knobs.h
index 807975300e..90cdbeab7b 100644
--- a/flow/Knobs.h
+++ b/flow/Knobs.h
@@ -80,6 +80,8 @@ public:
 	double RECONNECTION_TIME_GROWTH_RATE;
 	double RECONNECTION_RESET_TIME;
 
+	int TLS_CERT_REFRESH_DELAY_SECONDS;
+
 	//AsyncFileCached
 	int64_t PAGE_CACHE_4K;
 	int64_t PAGE_CACHE_64K;

From c38d5247fd4a6330b35b19d3572bc7b9812c5f20 Mon Sep 17 00:00:00 2001
From: Alex Miller <alexmiller@apple.com>
Date: Tue, 31 Jul 2018 16:09:11 -0700
Subject: [PATCH 11/34] Initialize client TLSOptions before first use.

Because initializing them after first use is called SEGFAULT.

This affects both fdbcli and fdbbackup when passed TLS options.  I didn't track
down what introduced this, I just fixed it.
---
 fdbclient/NativeAPI.actor.cpp | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp
index 6638ed5b2b..a73225bfb1 100644
--- a/fdbclient/NativeAPI.actor.cpp
+++ b/fdbclient/NativeAPI.actor.cpp
@@ -64,6 +64,12 @@ using std::make_pair;
 NetworkOptions networkOptions;
 Reference<TLSOptions> tlsOptions;
 
+static void initTLSOptions() {
+	if (!tlsOptions) {
+		tlsOptions = Reference<TLSOptions>(new TLSOptions());
+	}
+}
+
 static const Key CLIENT_LATENCY_INFO_PREFIX = LiteralStringRef("client_latency/");
 static const Key CLIENT_LATENCY_INFO_CTR_PREFIX = LiteralStringRef("client_latency_counter/");
 
@@ -783,36 +789,45 @@ void setNetworkOption(FDBNetworkOptions::Option option, Optional<StringRef> valu
 		}
 		case FDBNetworkOptions::TLS_PLUGIN:
 			validateOptionValue(value, true);
+			initTLSOptions();
 			break;
 		case FDBNetworkOptions::TLS_CERT_PATH:
 			validateOptionValue(value, true);
+			initTLSOptions();
 			tlsOptions->set_cert_file( value.get().toString() );
 			break;
 		case FDBNetworkOptions::TLS_CERT_BYTES:
+			initTLSOptions();
 			tlsOptions->set_cert_data( value.get().toString() );
 			break;
 		case FDBNetworkOptions::TLS_CA_PATH:
 			validateOptionValue(value, true);
+			initTLSOptions();
 			tlsOptions->set_ca_file( value.get().toString() );
 			break;
 		case FDBNetworkOptions::TLS_CA_BYTES:
 			validateOptionValue(value, true);
+			initTLSOptions();
 			tlsOptions->set_ca_data(value.get().toString());
 			break;
 		case FDBNetworkOptions::TLS_PASSWORD:
 			validateOptionValue(value, true);
+			initTLSOptions();
 			tlsOptions->set_key_password(value.get().toString());
 			break;
 		case FDBNetworkOptions::TLS_KEY_PATH:
 			validateOptionValue(value, true);
+			initTLSOptions();
 			tlsOptions->set_key_file( value.get().toString() );
 			break;
 		case FDBNetworkOptions::TLS_KEY_BYTES:
 			validateOptionValue(value, true);
+			initTLSOptions();
 			tlsOptions->set_key_data( value.get().toString() );
 			break;
 		case FDBNetworkOptions::TLS_VERIFY_PEERS:
 			validateOptionValue(value, true);
+			initTLSOptions();
 			try {
 				tlsOptions->set_verify_peers({ value.get().toString() });
 			} catch( Error& e ) {
@@ -871,7 +886,7 @@ void setupNetwork(uint64_t transportId, bool useMetrics) {
 	FlowTransport::createInstance(transportId);
 	Net2FileSystem::newFileSystem();
 
-	tlsOptions = Reference<TLSOptions>( new TLSOptions );
+	initTLSOptions();
 
 #ifndef TLS_DISABLED
 	tlsOptions->register_network();

From 22f2a1feddf6f4054b2e67eb54cc98d7cab1ffff Mon Sep 17 00:00:00 2001
From: Evan Tschannen <36455792+etschannen@users.noreply.github.com>
Date: Mon, 6 Aug 2018 17:47:59 -0700
Subject: [PATCH 12/34] Merge pull request #676 from etschannen/master

fix: we should not free statdata ourselves, it will be deleted by libeio itself
---
 fdbrpc/AsyncFileEIO.actor.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fdbrpc/AsyncFileEIO.actor.h b/fdbrpc/AsyncFileEIO.actor.h
index 0e18a93323..2de8d95cc8 100644
--- a/fdbrpc/AsyncFileEIO.actor.h
+++ b/fdbrpc/AsyncFileEIO.actor.h
@@ -357,7 +357,6 @@ private:
 		EIO_STRUCT_STAT *statdata = (EIO_STRUCT_STAT *)r->ptr2;
 		if (!statdata) error("FStatBufferError", fd, r);
 		state int64_t size = statdata->st_size;
-		free(statdata);
 		Void _ = wait( delay(0, taskID) );
 		return size;
 	}

From a716c2b193ce6c2528758ae4aa84be6db371cd67 Mon Sep 17 00:00:00 2001
From: Evan Tschannen <ejt@apple.com>
Date: Mon, 6 Aug 2018 18:31:17 -0700
Subject: [PATCH 13/34] fixed release notes

---
 documentation/sphinx/source/release-notes.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst
index a1c240dad5..1839d2ad23 100644
--- a/documentation/sphinx/source/release-notes.rst
+++ b/documentation/sphinx/source/release-notes.rst
@@ -23,7 +23,7 @@ Performance
 * Clients optimistically assume the first leader reply from a coordinator is correct. `(PR #425) <https://github.com/apple/foundationdb/pull/425>`_
 * Network connections are now closed after no interface needs the connection. [6.0.1] `(Issue #375) <https://github.com/apple/foundationdb/issues/375>`_
 * Significantly improved the CPU efficiency of copy mutations to transaction logs during recovery. [6.0.2] `(PR #595) <https://github.com/apple/foundationdb/pull/595>`_
-* A cluster configured with usable_regions=2 did not limit the rate at which it could copying data from the primary DC to the remote DC. This caused poor performance when recovering from a DC outage. [6.0.5] `(PR #673) <https://github.com/apple/foundationdb/pull/673>`_
+* A cluster configured with usable_regions=2 did not limit the rate at which it could copy data from the primary DC to the remote DC. This caused poor performance when recovering from a DC outage. [6.0.5] `(PR #673) <https://github.com/apple/foundationdb/pull/673>`_
 
 Fixes
 -----
@@ -37,7 +37,7 @@ Fixes
 * A client could fail to connect to a cluster when the cluster was upgraded to a version compatible with the client. This affected upgrades that were using the multi-version client to maintain compatibility with both versions of the cluster. [6.0.4] `(PR #637) <https://github.com/apple/foundationdb/pull/637>`_
 * A large number of concurrent read attempts could bring the database down after a cluster reboot. [6.0.4] `(PR #650) <https://github.com/apple/foundationdb/pull/650>`_
 * Automatic suppression of trace events which occur too frequently was happening before trace events were suppressed by other mechanisms. [6.0.4] `(PR #656) <https://github.com/apple/foundationdb/pull/656>`_
-* After a recovery, the rate at which transactions logs made mutations durable to disk was around 5 times slower than normal. [6.0.5] `(PR #666) <https://github.com/apple/foundationdb/pull/666>`_
+* After a recovery, the rate at which transaction logs made mutations durable to disk was around 5 times slower than normal. [6.0.5] `(PR #666) <https://github.com/apple/foundationdb/pull/666>`_
 * Clusters configured to use TLS could get stuck spending all of their CPU opening new connections. [6.0.5] `(PR #666) <https://github.com/apple/foundationdb/pull/666>`_
 
 Status

From ff0e14d5a774dad379ff3b16e27b196d4085112f Mon Sep 17 00:00:00 2001
From: Alex Miller <alexmiller@apple.com>
Date: Mon, 30 Jul 2018 17:13:37 -0700
Subject: [PATCH 14/34] Fix a compilation error on windows.

---
 fdbrpc/AsyncFileWinASIO.actor.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fdbrpc/AsyncFileWinASIO.actor.h b/fdbrpc/AsyncFileWinASIO.actor.h
index 08dd940eff..781d5b9bac 100644
--- a/fdbrpc/AsyncFileWinASIO.actor.h
+++ b/fdbrpc/AsyncFileWinASIO.actor.h
@@ -82,7 +82,7 @@ public:
 		if (_stat( filename.c_str(), &buf ) != 0) {
 			throw io_error();
 		}
-		return buf->st_mtime;
+		return buf.st_mtime;
 	}
 
 	virtual void addref() { ReferenceCounted<AsyncFileWinASIO>::addref(); }

From e0cf525951327077cfcc153dd285d7436fb29f1d Mon Sep 17 00:00:00 2001
From: "A.J. Beamon" <ajbeamon@apple.com>
Date: Tue, 7 Aug 2018 11:02:09 -0700
Subject: [PATCH 15/34] Fix: use new data lag fields when making storage server
 message indicating high lag.

---
 fdbserver/Status.actor.cpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp
index a804928536..aab66fcc3e 100644
--- a/fdbserver/Status.actor.cpp
+++ b/fdbserver/Status.actor.cpp
@@ -574,11 +574,13 @@ ACTOR static Future<StatusObject> processStatusFetcher(
 	}
 
 	state std::vector<std::pair<StorageServerInterface, TraceEventFields>>::iterator ss;
-	state std::map<NetworkAddress, int64_t> ssLag;
+	state std::map<NetworkAddress, double> ssLag;
 	for(ss = storageServers.begin(); ss != storageServers.end(); ++ss) {
 		StatusObject const& roleStatus = roles.addRole( "storage", ss->first, ss->second, maxTLogVersion );
-		if(roleStatus.count("data_version_lag") > 0) {
-			ssLag[ss->first.address()] = roleStatus.at("data_version_lag").get_int64();
+		JSONDoc doc(roleStatus);
+		double lagSeconds;
+		if(doc.tryGet("data_lag.seconds", lagSeconds)) {
+			ssLag[ss->first.address()] = lagSeconds;
 		}
 		Void _ = wait(yield());
 	}
@@ -754,8 +756,8 @@ ACTOR static Future<StatusObject> processStatusFetcher(
 				messages.push_back(tracefileOpenErrorMap[strAddress]);
 			}
 
-			if(ssLag[address] > 60 * SERVER_KNOBS->VERSIONS_PER_SECOND) {
-				messages.push_back(makeMessage("storage_server_lagging", format("Storage server lagging by %ld seconds.", ssLag[address] / SERVER_KNOBS->VERSIONS_PER_SECOND).c_str()));
+			if(ssLag[address] >= 60) {
+				messages.push_back(makeMessage("storage_server_lagging", format("Storage server lagging by %.2f seconds.", ssLag[address]).c_str()));
 			}
 
 			// Store the message array into the status object that represents the worker process

From 7d831ef9c3dcd794d4fe87ad42728d0b7d967554 Mon Sep 17 00:00:00 2001
From: "A.J. Beamon" <ajbeamon@apple.com>
Date: Tue, 7 Aug 2018 15:41:51 -0700
Subject: [PATCH 16/34] Revert change that prints lag with 2 decimal points of
 precision.

---
 fdbserver/Status.actor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp
index aab66fcc3e..ef9f5b0403 100644
--- a/fdbserver/Status.actor.cpp
+++ b/fdbserver/Status.actor.cpp
@@ -757,7 +757,7 @@ ACTOR static Future<StatusObject> processStatusFetcher(
 			}
 
 			if(ssLag[address] >= 60) {
-				messages.push_back(makeMessage("storage_server_lagging", format("Storage server lagging by %.2f seconds.", ssLag[address]).c_str()));
+				messages.push_back(makeMessage("storage_server_lagging", format("Storage server lagging by %ld seconds.", (int64_t)ssLag[address]).c_str()));
 			}
 
 			// Store the message array into the status object that represents the worker process

From 3bb8dad431c7bb233e2d409ec1f11af211fedaf8 Mon Sep 17 00:00:00 2001
From: Evan Tschannen <ejt@apple.com>
Date: Tue, 7 Aug 2018 17:00:43 -0700
Subject: [PATCH 17/34] TooManyNotifications is only sevWanAlways if it happens
 more than once a day. Status continuously adds to notifications currently, so
 we expect this to trigger every 4-5 days.

---
 fdbserver/Coordination.actor.cpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fdbserver/Coordination.actor.cpp b/fdbserver/Coordination.actor.cpp
index 3e1703c3c8..81e01985f3 100644
--- a/fdbserver/Coordination.actor.cpp
+++ b/fdbserver/Coordination.actor.cpp
@@ -208,6 +208,7 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
 	state Future<Void> nextInterval = delay( 0 );
 	state double candidateDelay = SERVER_KNOBS->CANDIDATE_MIN_DELAY;
 	state int leaderIntervalCount = 0;
+	state double lastNotifiedCleared = now();
 
 	loop choose {
 		when ( GetLeaderRequest req = waitNext( interf.getLeader.getFuture() ) ) {
@@ -216,10 +217,11 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
 			} else {
 				notify.push_back( req.reply );
 				if(notify.size() > SERVER_KNOBS->MAX_NOTIFICATIONS) {
-					TraceEvent(SevWarnAlways, "TooManyNotifications").detail("Amount", notify.size());
+					TraceEvent(now() - lastNotifiedCleared < 100000 ? SevWarnAlways : SevWarn, "TooManyNotifications").detail("Amount", notify.size());
 					for(int i=0; i<notify.size(); i++)
 						notify[i].send( currentNominee.get() );
 					notify.clear();
+					lastNotifiedCleared = now();
 				}
 			}
 		}
@@ -232,10 +234,11 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
 			} else {
 				notify.push_back( req.reply );
 				if(notify.size() > SERVER_KNOBS->MAX_NOTIFICATIONS) {
-					TraceEvent(SevWarnAlways, "TooManyNotifications").detail("Amount", notify.size());
+					TraceEvent(now() - lastNotifiedCleared < 100000 ? SevWarnAlways : SevWarn, "TooManyNotifications").detail("Amount", notify.size());
 					for(int i=0; i<notify.size(); i++)
 						notify[i].send( currentNominee.get() );
 					notify.clear();
+					lastNotifiedCleared = now();
 				}
 			}
 		}
@@ -252,6 +255,7 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
 			for(int i=0; i<notify.size(); i++)
 				notify[i].send( newInfo );
 			notify.clear();
+			lastNotifiedCleared = now();
 			req.reply.send( Void() );
 			return Void();
 		}
@@ -280,6 +284,7 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
 					for(int i=0; i<notify.size(); i++)
 						notify[i].send( nextNominee );
 					notify.clear();
+					lastNotifiedCleared = now();
 					currentNominee = nextNominee;
 				} else if (currentNominee.present() && nextNominee.present() && currentNominee.get().equalInternalId(nextNominee.get())) {
 					// leader becomes better

From da3d7e86da5b10fd587ac1bfcdce8371d55dfeda Mon Sep 17 00:00:00 2001
From: "A.J. Beamon" <ajbeamon@apple.com>
Date: Wed, 8 Aug 2018 09:11:05 -0700
Subject: [PATCH 18/34] Updated release notes to specifically call out the
 movement of data_version_lag to data_lag.versions and data_lag.seconds.

---
 documentation/sphinx/source/release-notes.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst
index 1839d2ad23..ea38b3f70e 100644
--- a/documentation/sphinx/source/release-notes.rst
+++ b/documentation/sphinx/source/release-notes.rst
@@ -43,8 +43,9 @@ Fixes
 Status
 ------
 
-* The replication factor in status JSON is stored under "redundancy_mode" instead of "redundancy":"factor". `(PR #492) <https://github.com/apple/foundationdb/pull/492>`_
-* Additional metrics for storage server lag as well as the number of watches and mutation count have been added and are exposed through status. `(PR #521) <https://github.com/apple/foundationdb/pull/521>`_
+* The replication factor in status JSON is stored under ``redundancy_mode`` instead of ``redundancy.factor``. `(PR #492) <https://github.com/apple/foundationdb/pull/492>`_
+* The metric ``data_version_lag`` has been replaced by ``data_lag.versions`` and ``data_lag.seconds``. `(PR #521) <https://github.com/apple/foundationdb/pull/521>`_
+* Additional metrics for the number of watches and mutation count have been added and are exposed through status. `(PR #521) <https://github.com/apple/foundationdb/pull/521>`_
 
 
 Bindings

From 7f7755165c7c64dfc7a96fbfe6e4a141810e650e Mon Sep 17 00:00:00 2001
From: Evan Tschannen <ejt@apple.com>
Date: Wed, 8 Aug 2018 17:29:32 -0700
Subject: [PATCH 19/34] slowly send notifications to clients to clear the list
 of dead clients

---
 fdbserver/Coordination.actor.cpp | 19 +++++++++++--------
 fdbserver/Knobs.cpp              |  2 ++
 fdbserver/Knobs.h                |  2 ++
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/fdbserver/Coordination.actor.cpp b/fdbserver/Coordination.actor.cpp
index 81e01985f3..da2a4d01df 100644
--- a/fdbserver/Coordination.actor.cpp
+++ b/fdbserver/Coordination.actor.cpp
@@ -204,11 +204,11 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
 	state std::set<LeaderInfo> availableCandidates;
 	state std::set<LeaderInfo> availableLeaders;
 	state Optional<LeaderInfo> currentNominee;
-	state vector<ReplyPromise<Optional<LeaderInfo>>> notify;
+	state Deque<ReplyPromise<Optional<LeaderInfo>>> notify;
 	state Future<Void> nextInterval = delay( 0 );
 	state double candidateDelay = SERVER_KNOBS->CANDIDATE_MIN_DELAY;
 	state int leaderIntervalCount = 0;
-	state double lastNotifiedCleared = now();
+	state Future<Void> notifyCheck = delay(SERVER_KNOBS->NOTIFICATION_FULL_CLEAR_TIME / SERVER_KNOBS->MIN_NOTIFICATIONS);
 
 	loop choose {
 		when ( GetLeaderRequest req = waitNext( interf.getLeader.getFuture() ) ) {
@@ -217,11 +217,10 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
 			} else {
 				notify.push_back( req.reply );
 				if(notify.size() > SERVER_KNOBS->MAX_NOTIFICATIONS) {
-					TraceEvent(now() - lastNotifiedCleared < 100000 ? SevWarnAlways : SevWarn, "TooManyNotifications").detail("Amount", notify.size());
+					TraceEvent(SevWarnAlways, "TooManyNotifications").detail("Amount", notify.size());
 					for(int i=0; i<notify.size(); i++)
 						notify[i].send( currentNominee.get() );
 					notify.clear();
-					lastNotifiedCleared = now();
 				}
 			}
 		}
@@ -234,11 +233,10 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
 			} else {
 				notify.push_back( req.reply );
 				if(notify.size() > SERVER_KNOBS->MAX_NOTIFICATIONS) {
-					TraceEvent(now() - lastNotifiedCleared < 100000 ? SevWarnAlways : SevWarn, "TooManyNotifications").detail("Amount", notify.size());
+					TraceEvent(SevWarnAlways, "TooManyNotifications").detail("Amount", notify.size());
 					for(int i=0; i<notify.size(); i++)
 						notify[i].send( currentNominee.get() );
 					notify.clear();
-					lastNotifiedCleared = now();
 				}
 			}
 		}
@@ -255,7 +253,6 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
 			for(int i=0; i<notify.size(); i++)
 				notify[i].send( newInfo );
 			notify.clear();
-			lastNotifiedCleared = now();
 			req.reply.send( Void() );
 			return Void();
 		}
@@ -284,7 +281,6 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
 					for(int i=0; i<notify.size(); i++)
 						notify[i].send( nextNominee );
 					notify.clear();
-					lastNotifiedCleared = now();
 					currentNominee = nextNominee;
 				} else if (currentNominee.present() && nextNominee.present() && currentNominee.get().equalInternalId(nextNominee.get())) {
 					// leader becomes better
@@ -306,6 +302,13 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
 				availableCandidates.clear();
 			}
 		}
+		when( Void _ = wait(notifyCheck) ) {
+			notifyCheck = delay( SERVER_KNOBS->NOTIFICATION_FULL_CLEAR_TIME / std::max<double>(SERVER_KNOBS->MIN_NOTIFICATIONS, notify.size()) );
+			if(!notify.empty() && currentNominee.present()) {
+				notify.front().send( currentNominee.get() );
+				notify.pop_front();
+			}
+		}
 	}
 }
 
diff --git a/fdbserver/Knobs.cpp b/fdbserver/Knobs.cpp
index abd3e1c72d..df12a79753 100644
--- a/fdbserver/Knobs.cpp
+++ b/fdbserver/Knobs.cpp
@@ -212,6 +212,8 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
 	// Leader election
 	bool longLeaderElection = randomize && BUGGIFY;
 	init( MAX_NOTIFICATIONS,                                  100000 );
+	init( MIN_NOTIFICATIONS,                                     100 );
+	init( NOTIFICATION_FULL_CLEAR_TIME,                      10000.0 );
 	init( CANDIDATE_MIN_DELAY,                                  0.05 );
 	init( CANDIDATE_MAX_DELAY,                                   1.0 );
 	init( CANDIDATE_GROWTH_RATE,                                 1.2 );
diff --git a/fdbserver/Knobs.h b/fdbserver/Knobs.h
index c3be2c5303..2e476ec60c 100644
--- a/fdbserver/Knobs.h
+++ b/fdbserver/Knobs.h
@@ -158,6 +158,8 @@ public:
 
 	// Leader election
 	int MAX_NOTIFICATIONS;
+	int MIN_NOTIFICATIONS;
+	double NOTIFICATION_FULL_CLEAR_TIME;
 	double CANDIDATE_MIN_DELAY;
 	double CANDIDATE_MAX_DELAY;
 	double CANDIDATE_GROWTH_RATE;

From 66bcd6772da3248c9eb7e25a51e9884dbd837aab Mon Sep 17 00:00:00 2001
From: "A.J. Beamon" <ajbeamon@apple.com>
Date: Thu, 9 Aug 2018 09:08:28 -0700
Subject: [PATCH 20/34] Add missing release note.

---
 documentation/sphinx/source/release-notes.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst
index c380cd67ce..712e290448 100644
--- a/documentation/sphinx/source/release-notes.rst
+++ b/documentation/sphinx/source/release-notes.rst
@@ -27,6 +27,7 @@ Fixes
 -----
 
 * A client could fail to connect to a cluster when the cluster was upgraded to a version compatible with the client. This affected upgrades that were using the multi-version client to maintain compatibility with both versions of the cluster. `(PR #637) <https://github.com/apple/foundationdb/pull/637>`_
+* Incorrect accounting of incompatible connections led to occasional assertion failures. `(PR #637) <https://github.com/apple/foundationdb/pull/637>`_
 
 5.2.6
 =====

From 06daadaf25f9129cf8b74a42c56c8d21767c3aac Mon Sep 17 00:00:00 2001
From: "A.J. Beamon" <ajbeamon@apple.com>
Date: Thu, 9 Aug 2018 09:09:00 -0700
Subject: [PATCH 21/34] Add missing release note.

---
 .../sphinx/source/old-release-notes/release-notes-520.rst        | 1 +
 1 file changed, 1 insertion(+)

diff --git a/documentation/sphinx/source/old-release-notes/release-notes-520.rst b/documentation/sphinx/source/old-release-notes/release-notes-520.rst
index c380cd67ce..712e290448 100644
--- a/documentation/sphinx/source/old-release-notes/release-notes-520.rst
+++ b/documentation/sphinx/source/old-release-notes/release-notes-520.rst
@@ -27,6 +27,7 @@ Fixes
 -----
 
 * A client could fail to connect to a cluster when the cluster was upgraded to a version compatible with the client. This affected upgrades that were using the multi-version client to maintain compatibility with both versions of the cluster. `(PR #637) <https://github.com/apple/foundationdb/pull/637>`_
+* Incorrect accounting of incompatible connections led to occasional assertion failures. `(PR #637) <https://github.com/apple/foundationdb/pull/637>`_
 
 5.2.6
 =====

From 6f02ea843a1646dabcf546a58694434c0740d5db Mon Sep 17 00:00:00 2001
From: Evan Tschannen <ejt@apple.com>
Date: Thu, 9 Aug 2018 12:37:46 -0700
Subject: [PATCH 22/34] prevented a slow task when too many shards were sent to
 the data distribution queue after switching to a fearless deployment

---
 fdbserver/DataDistribution.actor.cpp        | 36 ++++++++++++---------
 fdbserver/DataDistribution.h                |  6 ++--
 fdbserver/DataDistributionQueue.actor.cpp   | 20 ++++++------
 fdbserver/DataDistributionTracker.actor.cpp | 23 +++++++++----
 4 files changed, 49 insertions(+), 36 deletions(-)

diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp
index 375807b8d8..bd9320c15e 100644
--- a/fdbserver/DataDistribution.actor.cpp
+++ b/fdbserver/DataDistribution.actor.cpp
@@ -1379,23 +1379,25 @@ ACTOR Future<Void> teamTracker( DDTeamCollection *self, Reference<IDataDistribut
 
 					for(int i=0; i<shards.size(); i++) {
 						int maxPriority = team->getPriority();
-						auto teams = self->shardsAffectedByTeamFailure->getTeamsFor( shards[i] );
-						for( int t=0; t<teams.size(); t++) {
-							if( teams[t].servers.size() && self->server_info.count( teams[t].servers[0] ) ) {
-								auto& info = self->server_info[teams[t].servers[0]];
+						if(maxPriority < PRIORITY_TEAM_0_LEFT) {
+							auto teams = self->shardsAffectedByTeamFailure->getTeamsFor( shards[i] );
+							for( int t=0; t<teams.size(); t++) {
+								if( teams[t].servers.size() && self->server_info.count( teams[t].servers[0] ) ) {
+									auto& info = self->server_info[teams[t].servers[0]];
 
-								bool found = false;
-								for( int i = 0; i < info->teams.size(); i++ ) {
-									if( info->teams[i]->serverIDs == teams[t].servers ) {
-										maxPriority = std::max( maxPriority, info->teams[i]->getPriority() );
-										found = true;
-										break;
+									bool found = false;
+									for( int i = 0; i < info->teams.size(); i++ ) {
+										if( info->teams[i]->serverIDs == teams[t].servers ) {
+											maxPriority = std::max( maxPriority, info->teams[i]->getPriority() );
+											found = true;
+											break;
+										}
 									}
-								}
 
-								TEST(!found); // A removed team is still associated with a shard in SABTF
-							} else {
-								TEST(teams[t].servers.size()); // A removed server is still associated with a team in SABTF
+									TEST(!found); // A removed team is still associated with a shard in SABTF
+								} else {
+									TEST(teams[t].servers.size()); // A removed server is still associated with a team in SABTF
+								}
 							}
 						}
 
@@ -2193,6 +2195,7 @@ ACTOR Future<Void> dataDistribution(
 			ASSERT(configuration.storageTeamSize > 0);
 
 			state PromiseStream<RelocateShard> output;
+			state PromiseStream<RelocateShard> input;
 			state PromiseStream<Promise<int64_t>> getAverageShardBytes;
 			state PromiseStream<GetMetricsRequest> getShardMetrics;
 			state Reference<AsyncVar<bool>> processingUnhealthy( new AsyncVar<bool>(false) );
@@ -2218,6 +2221,7 @@ ACTOR Future<Void> dataDistribution(
 			}
 
 			Reference<ShardsAffectedByTeamFailure> shardsAffectedByTeamFailure( new ShardsAffectedByTeamFailure );
+			actors.push_back(yieldPromiseStream(output.getFuture(), input));
 
 			for(int s=0; s<initData->shards.size() - 1; s++) {
 				KeyRangeRef keys = KeyRangeRef(initData->shards[s].key, initData->shards[s+1].key);
@@ -2236,8 +2240,8 @@ ACTOR Future<Void> dataDistribution(
 			}
 
 			actors.push_back( pollMoveKeysLock(cx, lock) );
-			actors.push_back( reportErrorsExcept( dataDistributionTracker( initData, cx, output, getShardMetrics, getAverageShardBytes.getFuture(), readyToStart, anyZeroHealthyTeams, mi.id() ), "DDTracker", mi.id(), &normalDDQueueErrors() ) );
-			actors.push_back( reportErrorsExcept( dataDistributionQueue( cx, output, getShardMetrics, processingUnhealthy, tcis, shardsAffectedByTeamFailure, lock, getAverageShardBytes, mi, storageTeamSize, lastLimited, recoveryCommitVersion ), "DDQueue", mi.id(), &normalDDQueueErrors() ) );
+			actors.push_back( reportErrorsExcept( dataDistributionTracker( initData, cx, output, shardsAffectedByTeamFailure, getShardMetrics, getAverageShardBytes.getFuture(), readyToStart, anyZeroHealthyTeams, mi.id() ), "DDTracker", mi.id(), &normalDDQueueErrors() ) );
+			actors.push_back( reportErrorsExcept( dataDistributionQueue( cx, output, input.getFuture(), getShardMetrics, processingUnhealthy, tcis, shardsAffectedByTeamFailure, lock, getAverageShardBytes, mi, storageTeamSize, lastLimited, recoveryCommitVersion ), "DDQueue", mi.id(), &normalDDQueueErrors() ) );
 			actors.push_back( reportErrorsExcept( dataDistributionTeamCollection( initData, tcis[0], cx, db, shardsAffectedByTeamFailure, lock, output, mi.id(), configuration, primaryDcId, configuration.usableRegions > 1 ? remoteDcIds : std::vector<Optional<Key>>(), serverChanges, readyToStart.getFuture(), zeroHealthyTeams[0], true, processingUnhealthy ), "DDTeamCollectionPrimary", mi.id(), &normalDDQueueErrors() ) );
 			if (configuration.usableRegions > 1) {
 				actors.push_back( reportErrorsExcept( dataDistributionTeamCollection( initData, tcis[1], cx, db, shardsAffectedByTeamFailure, lock, output, mi.id(), configuration, remoteDcIds, Optional<std::vector<Optional<Key>>>(), Optional<PromiseStream< std::pair<UID, Optional<StorageServerInterface>> >>(), readyToStart.getFuture() && remoteRecovered, zeroHealthyTeams[1], false, processingUnhealthy ), "DDTeamCollectionSecondary", mi.id(), &normalDDQueueErrors() ) );
diff --git a/fdbserver/DataDistribution.h b/fdbserver/DataDistribution.h
index 76446cf818..8515eafc33 100644
--- a/fdbserver/DataDistribution.h
+++ b/fdbserver/DataDistribution.h
@@ -210,6 +210,7 @@ Future<Void> dataDistributionTracker(
 	Reference<InitialDataDistribution> const& initData,
 	Database const& cx,
 	PromiseStream<RelocateShard> const& output,
+	Reference<ShardsAffectedByTeamFailure> const& shardsAffectedByTeamFailure,
 	PromiseStream<GetMetricsRequest> const& getShardMetrics,
 	FutureStream<Promise<int64_t>> const& getAverageShardBytes,
 	Promise<Void> const& readyToStart,
@@ -218,7 +219,8 @@ Future<Void> dataDistributionTracker(
 
 Future<Void> dataDistributionQueue(
 	Database const& cx,
-	PromiseStream<RelocateShard> const& input,
+	PromiseStream<RelocateShard> const& output,
+	FutureStream<RelocateShard> const& input,
 	PromiseStream<GetMetricsRequest> const& getShardMetrics,
 	Reference<AsyncVar<bool>> const& processingUnhealthy,
 	vector<TeamCollectionInterface> const& teamCollection,
@@ -245,4 +247,4 @@ struct ShardSizeBounds {
 ShardSizeBounds getShardSizeBounds(KeyRangeRef shard, int64_t maxShardSize);
 
 //Determines the maximum shard size based on the size of the database
-int64_t getMaxShardSize( double dbSizeEstimate );
\ No newline at end of file
+int64_t getMaxShardSize( double dbSizeEstimate );
diff --git a/fdbserver/DataDistributionQueue.actor.cpp b/fdbserver/DataDistributionQueue.actor.cpp
index ba63a78100..3fe0a0b8ff 100644
--- a/fdbserver/DataDistributionQueue.actor.cpp
+++ b/fdbserver/DataDistributionQueue.actor.cpp
@@ -362,7 +362,8 @@ struct DDQueueData {
 	PromiseStream<RelocateData> relocationComplete;
 	PromiseStream<RelocateData> fetchSourceServersComplete;
 
-	PromiseStream<RelocateShard> input;
+	PromiseStream<RelocateShard> output;
+	FutureStream<RelocateShard> input;
 	PromiseStream<GetMetricsRequest> getShardMetrics;
 
 	double* lastLimited;
@@ -393,10 +394,10 @@ struct DDQueueData {
 
 	DDQueueData( MasterInterface mi, MoveKeysLock lock, Database cx, std::vector<TeamCollectionInterface> teamCollections,
 		Reference<ShardsAffectedByTeamFailure> sABTF, PromiseStream<Promise<int64_t>> getAverageShardBytes,
-		int teamSize, PromiseStream<RelocateShard> input, PromiseStream<GetMetricsRequest> getShardMetrics, double* lastLimited, Version recoveryVersion ) :
+		int teamSize, PromiseStream<RelocateShard> output, FutureStream<RelocateShard> input, PromiseStream<GetMetricsRequest> getShardMetrics, double* lastLimited, Version recoveryVersion ) :
 			activeRelocations( 0 ), queuedRelocations( 0 ), bytesWritten ( 0 ), teamCollections( teamCollections ),
 			shardsAffectedByTeamFailure( sABTF ), getAverageShardBytes( getAverageShardBytes ), mi( mi ), lock( lock ),
-			cx( cx ), teamSize( teamSize ), input( input ), getShardMetrics( getShardMetrics ), startMoveKeysParallelismLock( SERVER_KNOBS->DD_MOVE_KEYS_PARALLELISM ),
+			cx( cx ), teamSize( teamSize ), output( output ), input( input ), getShardMetrics( getShardMetrics ), startMoveKeysParallelismLock( SERVER_KNOBS->DD_MOVE_KEYS_PARALLELISM ),
 			finishMoveKeysParallelismLock( SERVER_KNOBS->DD_MOVE_KEYS_PARALLELISM ), lastLimited(lastLimited), recoveryVersion(recoveryVersion),
 			suppressIntervals(0), lastInterval(0), unhealthyRelocations(0), rawProcessingUnhealthy( new AsyncVar<bool>(false) ) {}
 
@@ -569,10 +570,6 @@ struct DDQueueData {
 
 	//This function cannot handle relocation requests which split a shard into three pieces
 	void queueRelocation( RelocateData rd, std::set<UID> &serversToLaunchFrom ) {
-		// Update sabtf for changes from DDTracker
-		if( rd.changesBoundaries() )
-			shardsAffectedByTeamFailure->defineShard( rd.keys );
-
 		//TraceEvent("QueueRelocationBegin").detail("Begin", printable(rd.keys.begin)).detail("End", printable(rd.keys.end));
 
 		// remove all items from both queues that are fully contained in the new relocation (i.e. will be overwritten)
@@ -1086,7 +1083,7 @@ ACTOR Future<bool> rebalanceTeams( DDQueueData* self, int priority, Reference<ID
 				.detail("SourceTeam", sourceTeam->getDesc())
 				.detail("DestTeam", destTeam->getDesc());
 
-			self->input.send( RelocateShard( moveShard, priority ) );
+			self->output.send( RelocateShard( moveShard, priority ) );
 			return true;
 		}
 	}
@@ -1166,7 +1163,8 @@ ACTOR Future<Void> BgDDValleyFiller( DDQueueData* self, int teamCollectionIndex)
 
 ACTOR Future<Void> dataDistributionQueue(
 	Database cx,
-	PromiseStream<RelocateShard> input,
+	PromiseStream<RelocateShard> output,
+	FutureStream<RelocateShard> input,
 	PromiseStream<GetMetricsRequest> getShardMetrics,
 	Reference<AsyncVar<bool>> processingUnhealthy,
 	std::vector<TeamCollectionInterface> teamCollections,
@@ -1178,7 +1176,7 @@ ACTOR Future<Void> dataDistributionQueue(
 	double* lastLimited,
 	Version recoveryVersion)
 {
-	state DDQueueData self( mi, lock, cx, teamCollections, shardsAffectedByTeamFailure, getAverageShardBytes, teamSize, input, getShardMetrics, lastLimited, recoveryVersion );
+	state DDQueueData self( mi, lock, cx, teamCollections, shardsAffectedByTeamFailure, getAverageShardBytes, teamSize, output, input, getShardMetrics, lastLimited, recoveryVersion );
 	state std::set<UID> serversToLaunchFrom;
 	state KeyRange keysToLaunchFrom;
 	state RelocateData launchData;
@@ -1213,7 +1211,7 @@ ACTOR Future<Void> dataDistributionQueue(
 			ASSERT( launchData.startTime == -1 && keysToLaunchFrom.empty() );
 
 			choose {
-				when ( RelocateShard rs = waitNext( self.input.getFuture() ) ) {
+				when ( RelocateShard rs = waitNext( self.input ) ) {
 					bool wasEmpty = serversToLaunchFrom.empty();
 					self.queueRelocation( rs, serversToLaunchFrom );
 					if(wasEmpty && !serversToLaunchFrom.empty())
diff --git a/fdbserver/DataDistributionTracker.actor.cpp b/fdbserver/DataDistributionTracker.actor.cpp
index e4f0939255..5a596d4028 100644
--- a/fdbserver/DataDistributionTracker.actor.cpp
+++ b/fdbserver/DataDistributionTracker.actor.cpp
@@ -74,14 +74,15 @@ struct DataDistributionTracker {
 
 	// CapacityTracker
 	PromiseStream<RelocateShard> output;
+	Reference<ShardsAffectedByTeamFailure> shardsAffectedByTeamFailure;
 
 	Promise<Void> readyToStart;
 	Reference<AsyncVar<bool>> anyZeroHealthyTeams;
 
-	DataDistributionTracker(Database cx, UID masterId, Promise<Void> const& readyToStart, PromiseStream<RelocateShard> const& output, Reference<AsyncVar<bool>> anyZeroHealthyTeams)
+	DataDistributionTracker(Database cx, UID masterId, Promise<Void> const& readyToStart, PromiseStream<RelocateShard> const& output, Reference<ShardsAffectedByTeamFailure> shardsAffectedByTeamFailure, Reference<AsyncVar<bool>> anyZeroHealthyTeams)
 		: cx(cx), masterId( masterId ), dbSizeEstimate( new AsyncVar<int64_t>() ),
 			maxShardSize( new AsyncVar<Optional<int64_t>>() ),
-			sizeChanges(false), readyToStart(readyToStart), output( output ), anyZeroHealthyTeams(anyZeroHealthyTeams) {}
+			sizeChanges(false), readyToStart(readyToStart), output( output ), shardsAffectedByTeamFailure(shardsAffectedByTeamFailure), anyZeroHealthyTeams(anyZeroHealthyTeams) {}
 
 	~DataDistributionTracker()
 	{
@@ -357,10 +358,16 @@ ACTOR Future<Void> shardSplitter(
 		for( int i = numShards-1; i > skipRange; i-- )
 			restartShardTrackers( self, KeyRangeRef(splitKeys[i], splitKeys[i+1]) );
 
-		for( int i = 0; i < skipRange; i++ )
-			self->output.send( RelocateShard( KeyRangeRef(splitKeys[i], splitKeys[i+1]), PRIORITY_SPLIT_SHARD) );
-		for( int i = numShards-1; i > skipRange; i-- )
-			self->output.send( RelocateShard(  KeyRangeRef(splitKeys[i], splitKeys[i+1]), PRIORITY_SPLIT_SHARD) );
+		for( int i = 0; i < skipRange; i++ ) {
+			KeyRangeRef r(splitKeys[i], splitKeys[i+1]);
+			self->shardsAffectedByTeamFailure->defineShard( r );
+			self->output.send( RelocateShard( r, PRIORITY_SPLIT_SHARD) );
+		}
+		for( int i = numShards-1; i > skipRange; i-- ) {
+			KeyRangeRef r(splitKeys[i], splitKeys[i+1]);
+			self->shardsAffectedByTeamFailure->defineShard( r );
+			self->output.send( RelocateShard( r, PRIORITY_SPLIT_SHARD) );
+		}
 
 		self->sizeChanges.add( changeSizes( self, keys, shardSize->get().get().bytes ) );
 	} else {
@@ -461,6 +468,7 @@ Future<Void> shardMerger(
 		.detail("TrackerID", trackerId);
 
 	restartShardTrackers( self, mergeRange, endingStats );
+	self->shardsAffectedByTeamFailure->defineShard( mergeRange );
 	self->output.send( RelocateShard( mergeRange, PRIORITY_MERGE_SHARD ) );
 
 	// We are about to be cancelled by the call to restartShardTrackers
@@ -661,13 +669,14 @@ ACTOR Future<Void> dataDistributionTracker(
 	Reference<InitialDataDistribution> initData,
 	Database cx,
 	PromiseStream<RelocateShard> output,
+	Reference<ShardsAffectedByTeamFailure> shardsAffectedByTeamFailure,
 	PromiseStream<GetMetricsRequest> getShardMetrics,
 	FutureStream<Promise<int64_t>> getAverageShardBytes,
 	Promise<Void> readyToStart,
 	Reference<AsyncVar<bool>> anyZeroHealthyTeams,
 	UID masterId)
 {
-	state DataDistributionTracker self(cx, masterId, readyToStart, output, anyZeroHealthyTeams);
+	state DataDistributionTracker self(cx, masterId, readyToStart, output, shardsAffectedByTeamFailure, anyZeroHealthyTeams);
 	state Future<Void> loggingTrigger = Void();
 	try {
 		Void _ = wait( trackInitialShards( &self, initData ) );

From 7c5d414f7b09024c6c5774ba2b74ec56db1047e7 Mon Sep 17 00:00:00 2001
From: Evan Tschannen <ejt@apple.com>
Date: Thu, 9 Aug 2018 12:38:35 -0700
Subject: [PATCH 23/34] fix: during destruction logData could attempt to
 dereference tLogData after it has been deleted

---
 fdbserver/TLogServer.actor.cpp | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp
index 38bacad70d..78c18d1aea 100644
--- a/fdbserver/TLogServer.actor.cpp
+++ b/fdbserver/TLogServer.actor.cpp
@@ -269,7 +269,7 @@ struct TLogData : NonCopyable {
 	Future<Void> updatePersist; //SOMEDAY: integrate the recovery and update storage so that only one of them is committing to persistant data.
 
 	PromiseStream<Future<Void>> sharedActors;
-	bool terminated;
+	Promise<Void> terminated;
 	FlowLock concurrentLogRouterReads;
 
 	TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> const& dbInfo)
@@ -277,7 +277,7 @@ struct TLogData : NonCopyable {
 			  persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)),
 			  dbInfo(dbInfo), queueCommitBegin(0), queueCommitEnd(0), prevVersion(0),
 			  diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false),
-			  bytesInput(0), bytesDurable(0), updatePersist(Void()), terminated(false), concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS)
+			  bytesInput(0), bytesDurable(0), updatePersist(Void()), concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS)
 		{
 		}
 };
@@ -411,10 +411,11 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
 	int8_t locality;
 	UID recruitmentID;
 	std::set<Tag> allTags;
+	Future<Void> terminated;
 
 	explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, UID recruitmentID, std::vector<Tag> tags) : tLogData(tLogData), knownCommittedVersion(1), logId(interf.id()),
 			cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), recruitmentID(recruitmentID),
-			logSystem(new AsyncVar<Reference<ILogSystem>>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), allTags(tags.begin(), tags.end()),
+			logSystem(new AsyncVar<Reference<ILogSystem>>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()),
 			// These are initialized differently on init() or recovery
 			recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0),
 			logRouterPopToVersion(0), locality(tagLocalityInvalid)
@@ -440,13 +441,14 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
 	}
 
 	~LogData() {
-		tLogData->bytesDurable += bytesInput.getValue() - bytesDurable.getValue();
-		TraceEvent("TLogBytesWhenRemoved", logId).detail("SharedBytesInput", tLogData->bytesInput).detail("SharedBytesDurable", tLogData->bytesDurable).detail("LocalBytesInput", bytesInput.getValue()).detail("LocalBytesDurable", bytesDurable.getValue());
-
-		ASSERT_ABORT(tLogData->bytesDurable <= tLogData->bytesInput);
 		endRole(logId, "TLog", "Error", true);
 
-		if(!tLogData->terminated) {
+		if(!terminated.isReady()) {
+			tLogData->bytesDurable += bytesInput.getValue() - bytesDurable.getValue();
+			TraceEvent("TLogBytesWhenRemoved", logId).detail("SharedBytesInput", tLogData->bytesInput).detail("SharedBytesDurable", tLogData->bytesDurable).detail("LocalBytesInput", bytesInput.getValue()).detail("LocalBytesDurable", bytesDurable.getValue());
+
+			ASSERT_ABORT(tLogData->bytesDurable <= tLogData->bytesInput);
+
 			Key logIdKey = BinaryWriter::toValue(logId,Unversioned());
 			tLogData->persistentData->clear( singleKeyRange(logIdKey.withPrefix(persistCurrentVersionKeys.begin)) );
 			tLogData->persistentData->clear( singleKeyRange(logIdKey.withPrefix(persistKnownCommittedVersionKeys.begin)) );
@@ -1488,7 +1490,7 @@ ACTOR Future<Void> pullAsyncData( TLogData* self, Reference<LogData> logData, st
 
 					commitMessages(logData, ver, messages, self->bytesInput);
 
-					if(self->terminated) {
+					if(self->terminated.isSet()) {
 						return Void();
 					}
 
@@ -1525,7 +1527,7 @@ ACTOR Future<Void> pullAsyncData( TLogData* self, Reference<LogData> logData, st
 							logData->knownCommittedVersion = std::max(logData->knownCommittedVersion, r->popped());
 						}
 
-						if(self->terminated) {
+						if(self->terminated.isSet()) {
 							return Void();
 						}
 
@@ -2048,7 +2050,7 @@ ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQ
 			}
 		}
 	} catch (Error& e) {
-		self.terminated = true;
+		self.terminated.send(Void());
 		TraceEvent("TLogError", tlogId).error(e, true);
 		endRole(tlogId, "SharedTLog", "Error", true);
 		if(recovered.canBeSet()) recovered.send(Void());

From 9c918a28f62609a2913bf386436b639ef5469f32 Mon Sep 17 00:00:00 2001
From: Evan Tschannen <ejt@apple.com>
Date: Thu, 9 Aug 2018 13:16:09 -0700
Subject: [PATCH 24/34] fix: status was reporting no replicas remaining when
 the remote datacenter was initially configured with usable_regions=2

---
 documentation/StatusSchema.json          |  28 +++-
 fdbserver/DataDistribution.actor.cpp     |  23 ++-
 fdbserver/QuietDatabase.actor.cpp        |   2 +-
 fdbserver/Status.actor.cpp               | 175 ++++++++++++++---------
 tests/fast/SidebandWithStatus.txt        |   2 +-
 tests/rare/LargeApiCorrectnessStatus.txt |   2 +-
 tests/slow/DDBalanceAndRemoveStatus.txt  |   2 +-
 7 files changed, 154 insertions(+), 80 deletions(-)

diff --git a/documentation/StatusSchema.json b/documentation/StatusSchema.json
index bf75c63f29..a5ff9af057 100644
--- a/documentation/StatusSchema.json
+++ b/documentation/StatusSchema.json
@@ -420,11 +420,35 @@
          "total_disk_used_bytes":0,
          "total_kv_size_bytes":0,
          "partitions_count":2,
-         "moving_data":{  
+         "moving_data":{
             "total_written_bytes":0,
             "in_flight_bytes":0,
-            "in_queue_bytes":0
+            "in_queue_bytes":0,
+            "highest_priority":0
          },
+         "team_trackers":[
+            {
+                "primary":true,
+                "in_flight_bytes":0,
+                "unhealthy_servers":0,
+                "state":{
+                    "healthy":true,
+                    "min_replicas_remaining":0,
+                    "name":{
+                       "$enum":[
+                          "initializing",
+                          "missing_data",
+                          "healing",
+                          "healthy_repartitioning",
+                          "healthy_removing_server",
+                          "healthy_rebalancing",
+                          "healthy"
+                       ]
+                    },
+                    "description":""
+                }
+            }
+         ],
          "least_operating_space_bytes_storage_server":0,
          "max_machine_failures_without_losing_data":0
       },
diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp
index bd9320c15e..b4cdee0379 100644
--- a/fdbserver/DataDistribution.actor.cpp
+++ b/fdbserver/DataDistribution.actor.cpp
@@ -520,6 +520,7 @@ struct DDTeamCollection {
 	vector<UID> allServers;
 	ServerStatusMap server_status;
 	int64_t unhealthyServers;
+	std::map<int,int> priority_teams;
 	std::map<UID, Reference<TCServerInfo>> server_info;
 	vector<Reference<TCTeamInfo>> teams;
 	Reference<ShardsAffectedByTeamFailure> shardsAffectedByTeamFailure;
@@ -1277,6 +1278,7 @@ ACTOR Future<Void> teamTracker( DDTeamCollection *self, Reference<IDataDistribut
 
 	Void _ = wait( yield() );
 	TraceEvent("TeamTrackerStarting", self->masterId).detail("Reason", "Initial wait complete (sc)").detail("Team", team->getDesc());
+	self->priority_teams[team->getPriority()]++;
 
 	try {
 		loop {
@@ -1371,6 +1373,12 @@ ACTOR Future<Void> teamTracker( DDTeamCollection *self, Reference<IDataDistribut
 					team->setPriority( PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER );
 				else
 					team->setPriority( PRIORITY_TEAM_HEALTHY );
+
+				if(lastPriority != team->getPriority()) {
+					self->priority_teams[lastPriority]--;
+					self->priority_teams[team->getPriority()]++;
+				}
+
 				TraceEvent("TeamPriorityChange", self->masterId).detail("Priority", team->getPriority());
 
 				lastZeroHealthy = self->zeroHealthyTeams->get(); //set this again in case it changed from this teams health changing
@@ -1431,6 +1439,7 @@ ACTOR Future<Void> teamTracker( DDTeamCollection *self, Reference<IDataDistribut
 			Void _ = wait( yield() );
 		}
 	} catch(Error& e) {
+		self->priority_teams[team->getPriority()]--;
 		if( team->isHealthy() ) {
 			self->healthyTeamCount--;
 			ASSERT( self->healthyTeamCount >= 0 );
@@ -1997,8 +2006,14 @@ ACTOR Future<Void> dataDistributionTeamCollection(
 				}
 			}
 			when( Void _ = wait( loggingTrigger ) ) {
-				TraceEvent("TotalDataInFlight", masterId).detail("TotalBytes", self.getDebugTotalDataInFlight()).detail("UnhealthyServers", self.unhealthyServers).trackLatest(
-					(cx->dbName.toString() + "/TotalDataInFlight").c_str());
+				int highestPriority = 0;
+				for(auto it : self.priority_teams) {
+					if(it.second > 0) {
+						highestPriority = std::max(highestPriority, it.first);
+					}
+				}
+				TraceEvent("TotalDataInFlight", masterId).detail("Primary", self.primary).detail("TotalBytes", self.getDebugTotalDataInFlight()).detail("UnhealthyServers", self.unhealthyServers)
+					.detail("HighestPriority", highestPriority).trackLatest( self.primary ? "TotalDataInFlight" : "TotalDataInFlightRemote" );
 				loggingTrigger = delay( SERVER_KNOBS->DATA_DISTRIBUTION_LOGGING_INTERVAL );
 				self.countHealthyTeams();
 			}
@@ -2184,8 +2199,8 @@ ACTOR Future<Void> dataDistribution(
 					.detail( "HighestPriority", 0 )
 					.trackLatest( format("%s/MovingData", printable(cx->dbName).c_str() ).c_str() );
 
-				TraceEvent("TotalDataInFlight", mi.id()).detail("TotalBytes", 0)
-					.trackLatest((cx->dbName.toString() + "/TotalDataInFlight").c_str());
+				TraceEvent("TotalDataInFlight", mi.id()).detail("Primary", true).detail("TotalBytes", 0).detail("UnhealthyServers", 0).detail("HighestPriority", 0).trackLatest("TotalDataInFlight");
+				TraceEvent("TotalDataInFlight", mi.id()).detail("Primary", false).detail("TotalBytes", 0).detail("UnhealthyServers", 0).detail("HighestPriority", configuration.usableRegions > 1 ? 0 : -1).trackLatest("TotalDataInFlightRemote");
 
 				Void _ = wait( waitForDataDistributionEnabled(cx) );
 				TraceEvent("DataDistributionEnabled");
diff --git a/fdbserver/QuietDatabase.actor.cpp b/fdbserver/QuietDatabase.actor.cpp
index 1d8055403d..052826d557 100644
--- a/fdbserver/QuietDatabase.actor.cpp
+++ b/fdbserver/QuietDatabase.actor.cpp
@@ -69,7 +69,7 @@ ACTOR Future<int64_t> getDataInFlight( Database cx, WorkerInterface masterWorker
 	try {
 		TraceEvent("DataInFlight").detail("Database", printable(cx->dbName)).detail("Stage", "ContactingMaster");
 		TraceEventFields md = wait( timeoutError(masterWorker.eventLogRequest.getReply(
-			EventLogRequest( StringRef( cx->dbName.toString() + "/TotalDataInFlight" ) ) ), 1.0 ) );
+			EventLogRequest( LiteralStringRef("TotalDataInFlight") ) ), 1.0 ) );
 		int64_t dataInFlight;
 		sscanf(md.getValue("TotalBytes").c_str(), "%lld", &dataInFlight);
 		return dataInFlight;
diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp
index ef9f5b0403..ab6f5b0ad4 100644
--- a/fdbserver/Status.actor.cpp
+++ b/fdbserver/Status.actor.cpp
@@ -1070,7 +1070,6 @@ static StatusObject configurationFetcher(Optional<DatabaseConfiguration> conf, S
 }
 
 ACTOR static Future<StatusObject> dataStatusFetcher(std::pair<WorkerInterface, ProcessClass> mWorker, std::string dbName, int *minReplicasRemaining) {
-	state StatusObject stateSectionObj;
 	state StatusObject statusObjData;
 
 	try {
@@ -1079,96 +1078,135 @@ ACTOR static Future<StatusObject> dataStatusFetcher(std::pair<WorkerInterface, P
 		// TODO:  Should this be serial?
 		futures.push_back(timeoutError(mWorker.first.eventLogRequest.getReply(EventLogRequest(StringRef(dbName + "/DDTrackerStarting"))), 1.0));
 		futures.push_back(timeoutError(mWorker.first.eventLogRequest.getReply(EventLogRequest(StringRef(dbName + "/DDTrackerStats"))), 1.0));
+		futures.push_back(timeoutError(mWorker.first.eventLogRequest.getReply(EventLogRequest(StringRef(dbName + "/MovingData"))), 1.0));
+		futures.push_back(timeoutError(mWorker.first.eventLogRequest.getReply(EventLogRequest(LiteralStringRef("TotalDataInFlight"))), 1.0));
+		futures.push_back(timeoutError(mWorker.first.eventLogRequest.getReply(EventLogRequest(LiteralStringRef("TotalDataInFlightRemote"))), 1.0));
 
 		std::vector<TraceEventFields> dataInfo = wait(getAll(futures));
 
 		TraceEventFields startingStats = dataInfo[0];
-		state TraceEventFields dataStats = dataInfo[1];
+		TraceEventFields dataStats = dataInfo[1];
 
 		if (startingStats.size() && startingStats.getValue("State") != "Active") {
+			StatusObject stateSectionObj;
 			stateSectionObj["name"] = "initializing";
 			stateSectionObj["description"] = "(Re)initializing automatic data distribution";
+			statusObjData["state"] = stateSectionObj;
+			return statusObjData;
 		}
-		else {
-			state TraceEventFields md = wait(timeoutError(mWorker.first.eventLogRequest.getReply(EventLogRequest(StringRef(dbName + "/MovingData"))), 1.0));
 
-			// If we have a MovingData message, parse it.
-			if (md.size())
-			{
-				int64_t partitionsInQueue = parseInt64(md.getValue("InQueue"));
-				int64_t partitionsInFlight = parseInt64(md.getValue("InFlight"));
-				int64_t averagePartitionSize = parseInt64(md.getValue("AverageShardSize"));
-				int64_t totalBytesWritten = parseInt64(md.getValue("BytesWritten"));
-				int highestPriority = parseInt(md.getValue("HighestPriority"));
+		TraceEventFields md = dataInfo[2];
 
-				if( averagePartitionSize >= 0 ) {
-					StatusObject moving_data;
-					moving_data["in_queue_bytes"] = partitionsInQueue * averagePartitionSize;
-					moving_data["in_flight_bytes"] = partitionsInFlight * averagePartitionSize;
-					moving_data["total_written_bytes"] = totalBytesWritten;
+		// If we have a MovingData message, parse it.
+		if (md.size())
+		{
+			int64_t partitionsInQueue = parseInt64(md.getValue("InQueue"));
+			int64_t partitionsInFlight = parseInt64(md.getValue("InFlight"));
+			int64_t averagePartitionSize = parseInt64(md.getValue("AverageShardSize"));
+			int64_t totalBytesWritten = parseInt64(md.getValue("BytesWritten"));
+			int highestPriority = parseInt(md.getValue("HighestPriority"));
 
-					// TODO: moving_data["rate_bytes"] = makeCounter(hz, c, r);
-					statusObjData["moving_data"] = moving_data;
+			if( averagePartitionSize >= 0 ) {
+				StatusObject moving_data;
+				moving_data["in_queue_bytes"] = partitionsInQueue * averagePartitionSize;
+				moving_data["in_flight_bytes"] = partitionsInFlight * averagePartitionSize;
+				moving_data["total_written_bytes"] = totalBytesWritten;
+				moving_data["highest_priority"] = highestPriority;
 
-					statusObjData["average_partition_size_bytes"] = averagePartitionSize;
-				}
+				// TODO: moving_data["rate_bytes"] = makeCounter(hz, c, r);
+				statusObjData["moving_data"] = moving_data;
 
-				if (highestPriority >= PRIORITY_TEAM_0_LEFT) {
-					stateSectionObj["healthy"] = false;
-					stateSectionObj["name"] = "missing_data";
-					stateSectionObj["description"] = "No replicas remain of some data";
-					stateSectionObj["min_replicas_remaining"] = 0;
+				statusObjData["average_partition_size_bytes"] = averagePartitionSize;
+			}
+		}
+
+		if (dataStats.size())
+		{
+			int64_t totalDBBytes = parseInt64(dataStats.getValue("TotalSizeBytes"));
+			statusObjData["total_kv_size_bytes"] = totalDBBytes;
+			int shards = parseInt(dataStats.getValue("Shards"));
+			statusObjData["partitions_count"] = shards;
+		}
+
+		StatusArray teamTrackers;
+		for(int i = 0; i < 2; i++) {
+			TraceEventFields inFlight = dataInfo[3 + i];
+			if (!inFlight.size()) {
+				continue;
+			}
+
+			bool primary = parseInt(inFlight.getValue("Primary"));
+			int64_t totalDataInFlight = parseInt64(inFlight.getValue("TotalBytes"));
+			int unhealthyServers = parseInt(inFlight.getValue("UnhealthyServers"));
+			int highestPriority = parseInt(inFlight.getValue("HighestPriority"));
+
+			StatusObject team_tracker;
+			team_tracker["primary"] = primary;
+			team_tracker["in_flight_bytes"] = totalDataInFlight;
+			team_tracker["unhealthy_servers"] = unhealthyServers;
+
+			StatusObject stateSectionObj;
+			if (highestPriority >= PRIORITY_TEAM_0_LEFT) {
+				stateSectionObj["healthy"] = false;
+				stateSectionObj["name"] = "missing_data";
+				stateSectionObj["description"] = "No replicas remain of some data";
+				stateSectionObj["min_replicas_remaining"] = 0;
+				if(primary) {
 					*minReplicasRemaining = 0;
 				}
-				else if (highestPriority >= PRIORITY_TEAM_1_LEFT) {
-					stateSectionObj["healthy"] = false;
-					stateSectionObj["name"] = "healing";
-					stateSectionObj["description"] = "Only one replica remains of some data";
-					stateSectionObj["min_replicas_remaining"] = 1;
+			}
+			else if (highestPriority >= PRIORITY_TEAM_1_LEFT) {
+				stateSectionObj["healthy"] = false;
+				stateSectionObj["name"] = "healing";
+				stateSectionObj["description"] = "Only one replica remains of some data";
+				stateSectionObj["min_replicas_remaining"] = 1;
+				if(primary) {
 					*minReplicasRemaining = 1;
 				}
-				else if (highestPriority >= PRIORITY_TEAM_2_LEFT) {
-					stateSectionObj["healthy"] = false;
-					stateSectionObj["name"] = "healing";
-					stateSectionObj["description"] = "Only two replicas remain of some data";
-					stateSectionObj["min_replicas_remaining"] = 2;
+			}
+			else if (highestPriority >= PRIORITY_TEAM_2_LEFT) {
+				stateSectionObj["healthy"] = false;
+				stateSectionObj["name"] = "healing";
+				stateSectionObj["description"] = "Only two replicas remain of some data";
+				stateSectionObj["min_replicas_remaining"] = 2;
+				if(primary) {
 					*minReplicasRemaining = 2;
 				}
-				else if (highestPriority >= PRIORITY_TEAM_UNHEALTHY) {
-					stateSectionObj["healthy"] = false;
-					stateSectionObj["name"] = "healing";
-					stateSectionObj["description"] = "Restoring replication factor";
-				}
-				else if (highestPriority >= PRIORITY_MERGE_SHARD) {
-					stateSectionObj["healthy"] = true;
-					stateSectionObj["name"] = "healthy_repartitioning";
-					stateSectionObj["description"] = "Repartitioning.";
-				}
-				else if (highestPriority >= PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER) {
-					stateSectionObj["healthy"] = true;
-					stateSectionObj["name"] = "healthy_removing_server";
-					stateSectionObj["description"] = "Removing storage server";
-				}
-				else if (highestPriority >= PRIORITY_REBALANCE_SHARD) {
-					stateSectionObj["healthy"] = true;
-					stateSectionObj["name"] = "healthy_rebalancing";
-					stateSectionObj["description"] = "Rebalancing";
-				}
-				else if (highestPriority >= 0) {
-					stateSectionObj["healthy"] = true;
-					stateSectionObj["name"] = "healthy";
-				}
+			}
+			else if (highestPriority >= PRIORITY_TEAM_UNHEALTHY) {
+				stateSectionObj["healthy"] = false;
+				stateSectionObj["name"] = "healing";
+				stateSectionObj["description"] = "Restoring replication factor";
+			}
+			else if (highestPriority >= PRIORITY_MERGE_SHARD) {
+				stateSectionObj["healthy"] = true;
+				stateSectionObj["name"] = "healthy_repartitioning";
+				stateSectionObj["description"] = "Repartitioning.";
+			}
+			else if (highestPriority >= PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER) {
+				stateSectionObj["healthy"] = true;
+				stateSectionObj["name"] = "healthy_removing_server";
+				stateSectionObj["description"] = "Removing storage server";
+			}
+			else if (highestPriority >= PRIORITY_REBALANCE_SHARD) {
+				stateSectionObj["healthy"] = true;
+				stateSectionObj["name"] = "healthy_rebalancing";
+				stateSectionObj["description"] = "Rebalancing";
+			}
+			else if (highestPriority >= 0) {
+				stateSectionObj["healthy"] = true;
+				stateSectionObj["name"] = "healthy";
 			}
 
-			if (dataStats.size())
-			{
-				int64_t totalDBBytes = parseInt64(dataStats.getValue("TotalSizeBytes"));
-				statusObjData["total_kv_size_bytes"] = totalDBBytes;
-				int shards = parseInt(dataStats.getValue("Shards"));
-				statusObjData["partitions_count"] = shards;
+			if(!stateSectionObj.empty()) {
+				team_tracker["state"] = stateSectionObj;
+				teamTrackers.push_back(team_tracker);
+				if(primary) {
+					statusObjData["state"] = stateSectionObj;
+				}
 			}
-
 		}
+		statusObjData["team_trackers"] = teamTrackers;
 	}
 	catch (Error &e) {
 		if (e.code() == error_code_actor_cancelled)
@@ -1177,9 +1215,6 @@ ACTOR static Future<StatusObject> dataStatusFetcher(std::pair<WorkerInterface, P
 		// from the "cluster" perspective - from the client perspective it is not but that is indicated elsewhere.
 	}
 
-	if (!stateSectionObj.empty())
-		statusObjData["state"] = stateSectionObj;
-
 	return statusObjData;
 }
 
diff --git a/tests/fast/SidebandWithStatus.txt b/tests/fast/SidebandWithStatus.txt
index 54097822b8..38dcdcaa36 100644
--- a/tests/fast/SidebandWithStatus.txt
+++ b/tests/fast/SidebandWithStatus.txt
@@ -5,7 +5,7 @@ testTitle=CloggedCausalConsistencyTest
 
     testName=Status
     testDuration=30.0
-    schema={"cluster":{"qos":{"limiting_version_lag_storage_server":0,"released_transactions_per_second":0,"transactions_per_second_limit":0,"limiting_queue_bytes_storage_server":0,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","description":"The database is not being saturated by the workload.","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]}},"worst_version_lag_storage_server":0,"worst_queue_bytes_log_server":460,"worst_queue_bytes_storage_server":0},"database_locked":false,"full_replication":true,"recovery_state":{"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","accepting_commits","all_logs_recruited","storage_recovered","fully_recovered"]},"missing_logs":"7f8d623d0cb9966e","required_resolvers":1,"required_logs":3,"description":"Recovery complete."},"connection_string":"a:a@127.0.0.1:4000","machines":{"$map":{"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"tcp_segments_retransmitted":{"hz":0.0}},"locality":{"$map":"value"},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","cpu":{"logical_core_utilization":0.4}}},"layers":{"_valid":true,"_error":"some error description"},"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"generation":2,"protocol_version":"fdb00a400050001","datacenter_version_difference":0,"configuration":{"logs":2,"log_replicas":2,"storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"excluded_servers":[{"address":"10.0.4.1"}],"remote_logs":5,"log_anti_quorum":0,"storage_replicas":1,"coordinators_count":1,"regions":[{"satellite_redundancy_mode":"one_satellite_single","satellite_anti_quorum":0,"satellite_usable_dcs":1,"datacenters":[{"priority":1,"satellite":1,"id":"mr"}],"satellite_log_policy":"(zoneid^3x1)","satellite_log_replicas":1,"satellite_logs":2}],"usable_regions":1,"redundancy_mode":"single","auto_logs":3,"proxies":5,"resolvers":1,"log_replication_policy":"(zoneid^3x1)","remote_redundancy_mode":"remote_single","repopulate_anti_quorum":1,"remote_log_replicas":3,"log_routers":10,"storage_replication_policy":"(zoneid^3x1)","auto_proxies":3,"auto_resolvers":1},"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"description":"","name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"min_replicas_remaining":0},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"max_machine_failures_without_losing_data":0,"moving_data":{"in_queue_bytes":0,"total_written_bytes":0,"in_flight_bytes":0},"least_operating_space_bytes_storage_server":0,"partitions_count":2},"old_logs":[{"satellite_log_fault_tolerance":2,"logs":[{"healthy":true,"id":"7f8d623d0cb9966e","address":"1.2.3.4:1234"}],"satellite_log_write_anti_quorum":0,"remote_log_fault_tolerance":2,"log_fault_tolerance":2,"log_write_anti_quorum":0,"satellite_log_replication_factor":3,"remote_log_replication_factor":3,"log_replication_factor":3}],"processes":{"$map":{"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"data_lag":{"seconds":5.0,"versions":12341234},"input_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"kvstore_used_bytes":12341234,"stored_bytes":12341234,"kvstore_free_bytes":12341234,"durable_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"id":"eb84471d68c12d1d26f692a50000003f","data_version":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"queue_disk_available_bytes":12341234,"kvstore_available_bytes":12341234,"queue_disk_total_bytes":12341234,"queue_disk_used_bytes":12341234,"queue_disk_free_bytes":12341234,"kvstore_total_bytes":12341234,"finished_queries":{"hz":0.0,"counter":0,"roughness":0.0}}],"locality":{"$map":"value"},"messages":[{"description":"abc","type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"<stuff/>","time":12345.12312}],"address":"1.2.3.4:1234","command_line":"-r simulation","disk":{"free_bytes":3451233456234,"reads":{"hz":0.0,"counter":0,"sectors":0},"busy":0.0,"writes":{"hz":0.0,"counter":0,"sectors":0},"total_bytes":123412341234},"version":"3.0.0","excluded":false,"memory":{"available_bytes":0,"unused_allocated_memory":0,"limit_bytes":0,"used_bytes":0},"machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","uptime_seconds":1234.2345,"cpu":{"usage_cores":0.0},"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"connections_closed":{"hz":0.0},"connection_errors":{"hz":0.0},"current_connections":0,"connections_established":{"hz":0.0}}}},"workload":{"operations":{"writes":{"hz":0.0,"counter":0,"roughness":0.0},"reads":{"hz":0.0,"counter":0,"roughness":0.0}},"keys":{"read":{"hz":0.0,"counter":0,"roughness":0.0}},"bytes":{"read":{"hz":0.0,"counter":0,"roughness":0.0},"written":{"hz":0.0,"counter":0,"roughness":0.0}},"transactions":{"started":{"hz":0.0,"counter":0,"roughness":0.0},"conflicted":{"hz":0.0,"counter":0,"roughness":0.0},"committed":{"hz":0.0,"counter":0,"roughness":0.0}}},"incompatible_connections":[],"clients":{"count":1,"supported_versions":[{"count":1,"protocol_version":"fdb00a400050001","client_version":"3.0.0","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d","connected_clients":[{"log_group":"default","address":"127.0.0.1:9898"}]}]},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"database_available":true,"cluster_controller_timestamp":1415650089,"latency_probe":{"immediate_priority_transaction_start_seconds":0.0,"transaction_start_seconds":0.0,"batch_priority_transaction_start_seconds":0.0,"read_seconds":7,"commit_seconds":0.02}},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"database_status":{"available":true,"healthy":true}}}
+    schema={"cluster":{"qos":{"limiting_version_lag_storage_server":0,"released_transactions_per_second":0,"transactions_per_second_limit":0,"limiting_queue_bytes_storage_server":0,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","description":"The database is not being saturated by the workload.","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]}},"worst_version_lag_storage_server":0,"worst_queue_bytes_log_server":460,"worst_queue_bytes_storage_server":0},"database_locked":false,"full_replication":true,"recovery_state":{"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","accepting_commits","all_logs_recruited","storage_recovered","fully_recovered"]},"missing_logs":"7f8d623d0cb9966e","required_resolvers":1,"required_logs":3,"description":"Recovery complete."},"connection_string":"a:a@127.0.0.1:4000","machines":{"$map":{"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"tcp_segments_retransmitted":{"hz":0.0}},"locality":{"$map":"value"},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","cpu":{"logical_core_utilization":0.4}}},"layers":{"_valid":true,"_error":"some error description"},"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"generation":2,"protocol_version":"fdb00a400050001","datacenter_version_difference":0,"configuration":{"logs":2,"log_replicas":2,"storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"excluded_servers":[{"address":"10.0.4.1"}],"remote_logs":5,"log_anti_quorum":0,"storage_replicas":1,"coordinators_count":1,"regions":[{"satellite_redundancy_mode":"one_satellite_single","satellite_anti_quorum":0,"satellite_usable_dcs":1,"datacenters":[{"priority":1,"satellite":1,"id":"mr"}],"satellite_log_policy":"(zoneid^3x1)","satellite_log_replicas":1,"satellite_logs":2}],"usable_regions":1,"redundancy_mode":"single","auto_logs":3,"proxies":5,"resolvers":1,"log_replication_policy":"(zoneid^3x1)","remote_redundancy_mode":"remote_single","repopulate_anti_quorum":1,"remote_log_replicas":3,"log_routers":10,"storage_replication_policy":"(zoneid^3x1)","auto_proxies":3,"auto_resolvers":1},"data":{"team_trackers":[{"state":{"healthy":true,"description":"","name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"min_replicas_remaining":0},"unhealthy_servers":0,"primary":true,"in_flight_bytes":0}],"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"description":"","name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"min_replicas_remaining":0},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"max_machine_failures_without_losing_data":0,"moving_data":{"in_queue_bytes":0,"total_written_bytes":0,"highest_priority":0,"in_flight_bytes":0},"least_operating_space_bytes_storage_server":0,"partitions_count":2},"old_logs":[{"satellite_log_fault_tolerance":2,"logs":[{"healthy":true,"id":"7f8d623d0cb9966e","address":"1.2.3.4:1234"}],"satellite_log_write_anti_quorum":0,"remote_log_fault_tolerance":2,"log_fault_tolerance":2,"log_write_anti_quorum":0,"satellite_log_replication_factor":3,"remote_log_replication_factor":3,"log_replication_factor":3}],"processes":{"$map":{"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"data_lag":{"seconds":5.0,"versions":12341234},"input_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"kvstore_used_bytes":12341234,"stored_bytes":12341234,"kvstore_free_bytes":12341234,"durable_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"id":"eb84471d68c12d1d26f692a50000003f","data_version":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"queue_disk_available_bytes":12341234,"kvstore_available_bytes":12341234,"queue_disk_total_bytes":12341234,"queue_disk_used_bytes":12341234,"queue_disk_free_bytes":12341234,"kvstore_total_bytes":12341234,"finished_queries":{"hz":0.0,"counter":0,"roughness":0.0}}],"locality":{"$map":"value"},"messages":[{"description":"abc","type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"<stuff/>","time":12345.12312}],"address":"1.2.3.4:1234","command_line":"-r simulation","disk":{"free_bytes":3451233456234,"reads":{"hz":0.0,"counter":0,"sectors":0},"busy":0.0,"writes":{"hz":0.0,"counter":0,"sectors":0},"total_bytes":123412341234},"version":"3.0.0","excluded":false,"memory":{"available_bytes":0,"unused_allocated_memory":0,"limit_bytes":0,"used_bytes":0},"machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","uptime_seconds":1234.2345,"cpu":{"usage_cores":0.0},"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"connections_closed":{"hz":0.0},"connection_errors":{"hz":0.0},"current_connections":0,"connections_established":{"hz":0.0}}}},"workload":{"operations":{"writes":{"hz":0.0,"counter":0,"roughness":0.0},"reads":{"hz":0.0,"counter":0,"roughness":0.0}},"keys":{"read":{"hz":0.0,"counter":0,"roughness":0.0}},"bytes":{"read":{"hz":0.0,"counter":0,"roughness":0.0},"written":{"hz":0.0,"counter":0,"roughness":0.0}},"transactions":{"started":{"hz":0.0,"counter":0,"roughness":0.0},"conflicted":{"hz":0.0,"counter":0,"roughness":0.0},"committed":{"hz":0.0,"counter":0,"roughness":0.0}}},"incompatible_connections":[],"clients":{"count":1,"supported_versions":[{"count":1,"protocol_version":"fdb00a400050001","client_version":"3.0.0","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d","connected_clients":[{"log_group":"default","address":"127.0.0.1:9898"}]}]},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"database_available":true,"cluster_controller_timestamp":1415650089,"latency_probe":{"immediate_priority_transaction_start_seconds":0.0,"transaction_start_seconds":0.0,"batch_priority_transaction_start_seconds":0.0,"read_seconds":7,"commit_seconds":0.02}},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"database_status":{"available":true,"healthy":true}}}
 
     testName=RandomClogging
     testDuration=30.0
diff --git a/tests/rare/LargeApiCorrectnessStatus.txt b/tests/rare/LargeApiCorrectnessStatus.txt
index 331a780cd0..0cf6149f39 100644
--- a/tests/rare/LargeApiCorrectnessStatus.txt
+++ b/tests/rare/LargeApiCorrectnessStatus.txt
@@ -24,4 +24,4 @@ testTitle=ApiCorrectnessTest
 
     testName=Status
     testDuration=30.0
-	schema={"cluster":{"qos":{"limiting_version_lag_storage_server":0,"released_transactions_per_second":0,"transactions_per_second_limit":0,"limiting_queue_bytes_storage_server":0,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","description":"The database is not being saturated by the workload.","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]}},"worst_version_lag_storage_server":0,"worst_queue_bytes_log_server":460,"worst_queue_bytes_storage_server":0},"database_locked":false,"full_replication":true,"recovery_state":{"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","accepting_commits","all_logs_recruited","storage_recovered","fully_recovered"]},"missing_logs":"7f8d623d0cb9966e","required_resolvers":1,"required_logs":3,"description":"Recovery complete."},"connection_string":"a:a@127.0.0.1:4000","machines":{"$map":{"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"tcp_segments_retransmitted":{"hz":0.0}},"locality":{"$map":"value"},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","cpu":{"logical_core_utilization":0.4}}},"layers":{"_valid":true,"_error":"some error description"},"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"generation":2,"protocol_version":"fdb00a400050001","datacenter_version_difference":0,"configuration":{"logs":2,"log_replicas":2,"storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"excluded_servers":[{"address":"10.0.4.1"}],"remote_logs":5,"log_anti_quorum":0,"storage_replicas":1,"coordinators_count":1,"regions":[{"satellite_redundancy_mode":"one_satellite_single","satellite_anti_quorum":0,"satellite_usable_dcs":1,"datacenters":[{"priority":1,"satellite":1,"id":"mr"}],"satellite_log_policy":"(zoneid^3x1)","satellite_log_replicas":1,"satellite_logs":2}],"usable_regions":1,"redundancy_mode":"single","auto_logs":3,"proxies":5,"resolvers":1,"log_replication_policy":"(zoneid^3x1)","remote_redundancy_mode":"remote_single","repopulate_anti_quorum":1,"remote_log_replicas":3,"log_routers":10,"storage_replication_policy":"(zoneid^3x1)","auto_proxies":3,"auto_resolvers":1},"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"description":"","name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"min_replicas_remaining":0},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"max_machine_failures_without_losing_data":0,"moving_data":{"in_queue_bytes":0,"total_written_bytes":0,"in_flight_bytes":0},"least_operating_space_bytes_storage_server":0,"partitions_count":2},"old_logs":[{"satellite_log_fault_tolerance":2,"logs":[{"healthy":true,"id":"7f8d623d0cb9966e","address":"1.2.3.4:1234"}],"satellite_log_write_anti_quorum":0,"remote_log_fault_tolerance":2,"log_fault_tolerance":2,"log_write_anti_quorum":0,"satellite_log_replication_factor":3,"remote_log_replication_factor":3,"log_replication_factor":3}],"processes":{"$map":{"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"data_lag":{"seconds":5.0,"versions":12341234},"input_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"kvstore_used_bytes":12341234,"stored_bytes":12341234,"kvstore_free_bytes":12341234,"durable_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"id":"eb84471d68c12d1d26f692a50000003f","data_version":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"queue_disk_available_bytes":12341234,"kvstore_available_bytes":12341234,"queue_disk_total_bytes":12341234,"queue_disk_used_bytes":12341234,"queue_disk_free_bytes":12341234,"kvstore_total_bytes":12341234,"finished_queries":{"hz":0.0,"counter":0,"roughness":0.0}}],"locality":{"$map":"value"},"messages":[{"description":"abc","type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"<stuff/>","time":12345.12312}],"address":"1.2.3.4:1234","command_line":"-r simulation","disk":{"free_bytes":3451233456234,"reads":{"hz":0.0,"counter":0,"sectors":0},"busy":0.0,"writes":{"hz":0.0,"counter":0,"sectors":0},"total_bytes":123412341234},"version":"3.0.0","excluded":false,"memory":{"available_bytes":0,"unused_allocated_memory":0,"limit_bytes":0,"used_bytes":0},"machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","uptime_seconds":1234.2345,"cpu":{"usage_cores":0.0},"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"connections_closed":{"hz":0.0},"connection_errors":{"hz":0.0},"current_connections":0,"connections_established":{"hz":0.0}}}},"workload":{"operations":{"writes":{"hz":0.0,"counter":0,"roughness":0.0},"reads":{"hz":0.0,"counter":0,"roughness":0.0}},"keys":{"read":{"hz":0.0,"counter":0,"roughness":0.0}},"bytes":{"read":{"hz":0.0,"counter":0,"roughness":0.0},"written":{"hz":0.0,"counter":0,"roughness":0.0}},"transactions":{"started":{"hz":0.0,"counter":0,"roughness":0.0},"conflicted":{"hz":0.0,"counter":0,"roughness":0.0},"committed":{"hz":0.0,"counter":0,"roughness":0.0}}},"incompatible_connections":[],"clients":{"count":1,"supported_versions":[{"count":1,"protocol_version":"fdb00a400050001","client_version":"3.0.0","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d","connected_clients":[{"log_group":"default","address":"127.0.0.1:9898"}]}]},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"database_available":true,"cluster_controller_timestamp":1415650089,"latency_probe":{"immediate_priority_transaction_start_seconds":0.0,"transaction_start_seconds":0.0,"batch_priority_transaction_start_seconds":0.0,"read_seconds":7,"commit_seconds":0.02}},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"database_status":{"available":true,"healthy":true}}}
+	schema={"cluster":{"qos":{"limiting_version_lag_storage_server":0,"released_transactions_per_second":0,"transactions_per_second_limit":0,"limiting_queue_bytes_storage_server":0,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","description":"The database is not being saturated by the workload.","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]}},"worst_version_lag_storage_server":0,"worst_queue_bytes_log_server":460,"worst_queue_bytes_storage_server":0},"database_locked":false,"full_replication":true,"recovery_state":{"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","accepting_commits","all_logs_recruited","storage_recovered","fully_recovered"]},"missing_logs":"7f8d623d0cb9966e","required_resolvers":1,"required_logs":3,"description":"Recovery complete."},"connection_string":"a:a@127.0.0.1:4000","machines":{"$map":{"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"tcp_segments_retransmitted":{"hz":0.0}},"locality":{"$map":"value"},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","cpu":{"logical_core_utilization":0.4}}},"layers":{"_valid":true,"_error":"some error description"},"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"generation":2,"protocol_version":"fdb00a400050001","datacenter_version_difference":0,"configuration":{"logs":2,"log_replicas":2,"storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"excluded_servers":[{"address":"10.0.4.1"}],"remote_logs":5,"log_anti_quorum":0,"storage_replicas":1,"coordinators_count":1,"regions":[{"satellite_redundancy_mode":"one_satellite_single","satellite_anti_quorum":0,"satellite_usable_dcs":1,"datacenters":[{"priority":1,"satellite":1,"id":"mr"}],"satellite_log_policy":"(zoneid^3x1)","satellite_log_replicas":1,"satellite_logs":2}],"usable_regions":1,"redundancy_mode":"single","auto_logs":3,"proxies":5,"resolvers":1,"log_replication_policy":"(zoneid^3x1)","remote_redundancy_mode":"remote_single","repopulate_anti_quorum":1,"remote_log_replicas":3,"log_routers":10,"storage_replication_policy":"(zoneid^3x1)","auto_proxies":3,"auto_resolvers":1},"data":{"team_trackers":[{"state":{"healthy":true,"description":"","name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"min_replicas_remaining":0},"unhealthy_servers":0,"primary":true,"in_flight_bytes":0}],"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"description":"","name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"min_replicas_remaining":0},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"max_machine_failures_without_losing_data":0,"moving_data":{"in_queue_bytes":0,"total_written_bytes":0,"highest_priority":0,"in_flight_bytes":0},"least_operating_space_bytes_storage_server":0,"partitions_count":2},"old_logs":[{"satellite_log_fault_tolerance":2,"logs":[{"healthy":true,"id":"7f8d623d0cb9966e","address":"1.2.3.4:1234"}],"satellite_log_write_anti_quorum":0,"remote_log_fault_tolerance":2,"log_fault_tolerance":2,"log_write_anti_quorum":0,"satellite_log_replication_factor":3,"remote_log_replication_factor":3,"log_replication_factor":3}],"processes":{"$map":{"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"data_lag":{"seconds":5.0,"versions":12341234},"input_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"kvstore_used_bytes":12341234,"stored_bytes":12341234,"kvstore_free_bytes":12341234,"durable_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"id":"eb84471d68c12d1d26f692a50000003f","data_version":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"queue_disk_available_bytes":12341234,"kvstore_available_bytes":12341234,"queue_disk_total_bytes":12341234,"queue_disk_used_bytes":12341234,"queue_disk_free_bytes":12341234,"kvstore_total_bytes":12341234,"finished_queries":{"hz":0.0,"counter":0,"roughness":0.0}}],"locality":{"$map":"value"},"messages":[{"description":"abc","type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"<stuff/>","time":12345.12312}],"address":"1.2.3.4:1234","command_line":"-r simulation","disk":{"free_bytes":3451233456234,"reads":{"hz":0.0,"counter":0,"sectors":0},"busy":0.0,"writes":{"hz":0.0,"counter":0,"sectors":0},"total_bytes":123412341234},"version":"3.0.0","excluded":false,"memory":{"available_bytes":0,"unused_allocated_memory":0,"limit_bytes":0,"used_bytes":0},"machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","uptime_seconds":1234.2345,"cpu":{"usage_cores":0.0},"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"connections_closed":{"hz":0.0},"connection_errors":{"hz":0.0},"current_connections":0,"connections_established":{"hz":0.0}}}},"workload":{"operations":{"writes":{"hz":0.0,"counter":0,"roughness":0.0},"reads":{"hz":0.0,"counter":0,"roughness":0.0}},"keys":{"read":{"hz":0.0,"counter":0,"roughness":0.0}},"bytes":{"read":{"hz":0.0,"counter":0,"roughness":0.0},"written":{"hz":0.0,"counter":0,"roughness":0.0}},"transactions":{"started":{"hz":0.0,"counter":0,"roughness":0.0},"conflicted":{"hz":0.0,"counter":0,"roughness":0.0},"committed":{"hz":0.0,"counter":0,"roughness":0.0}}},"incompatible_connections":[],"clients":{"count":1,"supported_versions":[{"count":1,"protocol_version":"fdb00a400050001","client_version":"3.0.0","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d","connected_clients":[{"log_group":"default","address":"127.0.0.1:9898"}]}]},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"database_available":true,"cluster_controller_timestamp":1415650089,"latency_probe":{"immediate_priority_transaction_start_seconds":0.0,"transaction_start_seconds":0.0,"batch_priority_transaction_start_seconds":0.0,"read_seconds":7,"commit_seconds":0.02}},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"database_status":{"available":true,"healthy":true}}}
diff --git a/tests/slow/DDBalanceAndRemoveStatus.txt b/tests/slow/DDBalanceAndRemoveStatus.txt
index 42780c1bb4..eebafa8623 100644
--- a/tests/slow/DDBalanceAndRemoveStatus.txt
+++ b/tests/slow/DDBalanceAndRemoveStatus.txt
@@ -43,4 +43,4 @@ testTitle=DDBalance_test
 
     testName=Status
     testDuration=30.0
-    schema={"cluster":{"qos":{"limiting_version_lag_storage_server":0,"released_transactions_per_second":0,"transactions_per_second_limit":0,"limiting_queue_bytes_storage_server":0,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","description":"The database is not being saturated by the workload.","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]}},"worst_version_lag_storage_server":0,"worst_queue_bytes_log_server":460,"worst_queue_bytes_storage_server":0},"database_locked":false,"full_replication":true,"recovery_state":{"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","accepting_commits","all_logs_recruited","storage_recovered","fully_recovered"]},"missing_logs":"7f8d623d0cb9966e","required_resolvers":1,"required_logs":3,"description":"Recovery complete."},"connection_string":"a:a@127.0.0.1:4000","machines":{"$map":{"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"tcp_segments_retransmitted":{"hz":0.0}},"locality":{"$map":"value"},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","cpu":{"logical_core_utilization":0.4}}},"layers":{"_valid":true,"_error":"some error description"},"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"generation":2,"protocol_version":"fdb00a400050001","datacenter_version_difference":0,"configuration":{"logs":2,"log_replicas":2,"storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"excluded_servers":[{"address":"10.0.4.1"}],"remote_logs":5,"log_anti_quorum":0,"storage_replicas":1,"coordinators_count":1,"regions":[{"satellite_redundancy_mode":"one_satellite_single","satellite_anti_quorum":0,"satellite_usable_dcs":1,"datacenters":[{"priority":1,"satellite":1,"id":"mr"}],"satellite_log_policy":"(zoneid^3x1)","satellite_log_replicas":1,"satellite_logs":2}],"usable_regions":1,"redundancy_mode":"single","auto_logs":3,"proxies":5,"resolvers":1,"log_replication_policy":"(zoneid^3x1)","remote_redundancy_mode":"remote_single","repopulate_anti_quorum":1,"remote_log_replicas":3,"log_routers":10,"storage_replication_policy":"(zoneid^3x1)","auto_proxies":3,"auto_resolvers":1},"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"description":"","name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"min_replicas_remaining":0},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"max_machine_failures_without_losing_data":0,"moving_data":{"in_queue_bytes":0,"total_written_bytes":0,"in_flight_bytes":0},"least_operating_space_bytes_storage_server":0,"partitions_count":2},"old_logs":[{"satellite_log_fault_tolerance":2,"logs":[{"healthy":true,"id":"7f8d623d0cb9966e","address":"1.2.3.4:1234"}],"satellite_log_write_anti_quorum":0,"remote_log_fault_tolerance":2,"log_fault_tolerance":2,"log_write_anti_quorum":0,"satellite_log_replication_factor":3,"remote_log_replication_factor":3,"log_replication_factor":3}],"processes":{"$map":{"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"data_lag":{"seconds":5.0,"versions":12341234},"input_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"kvstore_used_bytes":12341234,"stored_bytes":12341234,"kvstore_free_bytes":12341234,"durable_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"id":"eb84471d68c12d1d26f692a50000003f","data_version":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"queue_disk_available_bytes":12341234,"kvstore_available_bytes":12341234,"queue_disk_total_bytes":12341234,"queue_disk_used_bytes":12341234,"queue_disk_free_bytes":12341234,"kvstore_total_bytes":12341234,"finished_queries":{"hz":0.0,"counter":0,"roughness":0.0}}],"locality":{"$map":"value"},"messages":[{"description":"abc","type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"<stuff/>","time":12345.12312}],"address":"1.2.3.4:1234","command_line":"-r simulation","disk":{"free_bytes":3451233456234,"reads":{"hz":0.0,"counter":0,"sectors":0},"busy":0.0,"writes":{"hz":0.0,"counter":0,"sectors":0},"total_bytes":123412341234},"version":"3.0.0","excluded":false,"memory":{"available_bytes":0,"unused_allocated_memory":0,"limit_bytes":0,"used_bytes":0},"machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","uptime_seconds":1234.2345,"cpu":{"usage_cores":0.0},"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"connections_closed":{"hz":0.0},"connection_errors":{"hz":0.0},"current_connections":0,"connections_established":{"hz":0.0}}}},"workload":{"operations":{"writes":{"hz":0.0,"counter":0,"roughness":0.0},"reads":{"hz":0.0,"counter":0,"roughness":0.0}},"keys":{"read":{"hz":0.0,"counter":0,"roughness":0.0}},"bytes":{"read":{"hz":0.0,"counter":0,"roughness":0.0},"written":{"hz":0.0,"counter":0,"roughness":0.0}},"transactions":{"started":{"hz":0.0,"counter":0,"roughness":0.0},"conflicted":{"hz":0.0,"counter":0,"roughness":0.0},"committed":{"hz":0.0,"counter":0,"roughness":0.0}}},"incompatible_connections":[],"clients":{"count":1,"supported_versions":[{"count":1,"protocol_version":"fdb00a400050001","client_version":"3.0.0","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d","connected_clients":[{"log_group":"default","address":"127.0.0.1:9898"}]}]},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"database_available":true,"cluster_controller_timestamp":1415650089,"latency_probe":{"immediate_priority_transaction_start_seconds":0.0,"transaction_start_seconds":0.0,"batch_priority_transaction_start_seconds":0.0,"read_seconds":7,"commit_seconds":0.02}},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"database_status":{"available":true,"healthy":true}}}
+    schema={"cluster":{"qos":{"limiting_version_lag_storage_server":0,"released_transactions_per_second":0,"transactions_per_second_limit":0,"limiting_queue_bytes_storage_server":0,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","description":"The database is not being saturated by the workload.","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]}},"worst_version_lag_storage_server":0,"worst_queue_bytes_log_server":460,"worst_queue_bytes_storage_server":0},"database_locked":false,"full_replication":true,"recovery_state":{"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","accepting_commits","all_logs_recruited","storage_recovered","fully_recovered"]},"missing_logs":"7f8d623d0cb9966e","required_resolvers":1,"required_logs":3,"description":"Recovery complete."},"connection_string":"a:a@127.0.0.1:4000","machines":{"$map":{"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"tcp_segments_retransmitted":{"hz":0.0}},"locality":{"$map":"value"},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","cpu":{"logical_core_utilization":0.4}}},"layers":{"_valid":true,"_error":"some error description"},"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"generation":2,"protocol_version":"fdb00a400050001","datacenter_version_difference":0,"configuration":{"logs":2,"log_replicas":2,"storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"excluded_servers":[{"address":"10.0.4.1"}],"remote_logs":5,"log_anti_quorum":0,"storage_replicas":1,"coordinators_count":1,"regions":[{"satellite_redundancy_mode":"one_satellite_single","satellite_anti_quorum":0,"satellite_usable_dcs":1,"datacenters":[{"priority":1,"satellite":1,"id":"mr"}],"satellite_log_policy":"(zoneid^3x1)","satellite_log_replicas":1,"satellite_logs":2}],"usable_regions":1,"redundancy_mode":"single","auto_logs":3,"proxies":5,"resolvers":1,"log_replication_policy":"(zoneid^3x1)","remote_redundancy_mode":"remote_single","repopulate_anti_quorum":1,"remote_log_replicas":3,"log_routers":10,"storage_replication_policy":"(zoneid^3x1)","auto_proxies":3,"auto_resolvers":1},"data":{"team_trackers":[{"state":{"healthy":true,"description":"","name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"min_replicas_remaining":0},"unhealthy_servers":0,"primary":true,"in_flight_bytes":0}],"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"description":"","name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"min_replicas_remaining":0},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"max_machine_failures_without_losing_data":0,"moving_data":{"in_queue_bytes":0,"total_written_bytes":0,"highest_priority":0,"in_flight_bytes":0},"least_operating_space_bytes_storage_server":0,"partitions_count":2},"old_logs":[{"satellite_log_fault_tolerance":2,"logs":[{"healthy":true,"id":"7f8d623d0cb9966e","address":"1.2.3.4:1234"}],"satellite_log_write_anti_quorum":0,"remote_log_fault_tolerance":2,"log_fault_tolerance":2,"log_write_anti_quorum":0,"satellite_log_replication_factor":3,"remote_log_replication_factor":3,"log_replication_factor":3}],"processes":{"$map":{"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"data_lag":{"seconds":5.0,"versions":12341234},"input_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"kvstore_used_bytes":12341234,"stored_bytes":12341234,"kvstore_free_bytes":12341234,"durable_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"id":"eb84471d68c12d1d26f692a50000003f","data_version":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"queue_disk_available_bytes":12341234,"kvstore_available_bytes":12341234,"queue_disk_total_bytes":12341234,"queue_disk_used_bytes":12341234,"queue_disk_free_bytes":12341234,"kvstore_total_bytes":12341234,"finished_queries":{"hz":0.0,"counter":0,"roughness":0.0}}],"locality":{"$map":"value"},"messages":[{"description":"abc","type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"<stuff/>","time":12345.12312}],"address":"1.2.3.4:1234","command_line":"-r simulation","disk":{"free_bytes":3451233456234,"reads":{"hz":0.0,"counter":0,"sectors":0},"busy":0.0,"writes":{"hz":0.0,"counter":0,"sectors":0},"total_bytes":123412341234},"version":"3.0.0","excluded":false,"memory":{"available_bytes":0,"unused_allocated_memory":0,"limit_bytes":0,"used_bytes":0},"machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","uptime_seconds":1234.2345,"cpu":{"usage_cores":0.0},"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"connections_closed":{"hz":0.0},"connection_errors":{"hz":0.0},"current_connections":0,"connections_established":{"hz":0.0}}}},"workload":{"operations":{"writes":{"hz":0.0,"counter":0,"roughness":0.0},"reads":{"hz":0.0,"counter":0,"roughness":0.0}},"keys":{"read":{"hz":0.0,"counter":0,"roughness":0.0}},"bytes":{"read":{"hz":0.0,"counter":0,"roughness":0.0},"written":{"hz":0.0,"counter":0,"roughness":0.0}},"transactions":{"started":{"hz":0.0,"counter":0,"roughness":0.0},"conflicted":{"hz":0.0,"counter":0,"roughness":0.0},"committed":{"hz":0.0,"counter":0,"roughness":0.0}}},"incompatible_connections":[],"clients":{"count":1,"supported_versions":[{"count":1,"protocol_version":"fdb00a400050001","client_version":"3.0.0","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d","connected_clients":[{"log_group":"default","address":"127.0.0.1:9898"}]}]},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"database_available":true,"cluster_controller_timestamp":1415650089,"latency_probe":{"immediate_priority_transaction_start_seconds":0.0,"transaction_start_seconds":0.0,"batch_priority_transaction_start_seconds":0.0,"read_seconds":7,"commit_seconds":0.02}},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"database_status":{"available":true,"healthy":true}}}

From 9e94e36b366248bcabf80e950f3d4156c333f4b0 Mon Sep 17 00:00:00 2001
From: Evan Tschannen <ejt@apple.com>
Date: Thu, 9 Aug 2018 13:43:23 -0700
Subject: [PATCH 25/34] updated release notes for 6.0.5

---
 documentation/sphinx/source/release-notes.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst
index ea38b3f70e..6f3c28805d 100644
--- a/documentation/sphinx/source/release-notes.rst
+++ b/documentation/sphinx/source/release-notes.rst
@@ -13,6 +13,7 @@ Features
 * The TLS plugin is now statically linked into the client and server binaries and no longer requires a separate library. `(Issue #436) <https://github.com/apple/foundationdb/issues/436>`_
 * TLS peer verification now supports verifiying on Subject Alternative Name. `(Issue #514) <https://github.com/apple/foundationdb/issues/514>`_
 * TLS peer verification now supports suffix matching by field. `(Issue #515) <https://github.com/apple/foundationdb/issues/515>`_
+* TLS certificates are automatically reloaded after being updated. [6.0.5] `(Issue #505) <https://github.com/apple/foundationdb/issues/505>`_
 
 Performance
 -----------
@@ -39,6 +40,9 @@ Fixes
 * Automatic suppression of trace events which occur too frequently was happening before trace events were suppressed by other mechanisms. [6.0.4] `(PR #656) <https://github.com/apple/foundationdb/pull/656>`_
 * After a recovery, the rate at which transaction logs made mutations durable to disk was around 5 times slower than normal. [6.0.5] `(PR #666) <https://github.com/apple/foundationdb/pull/666>`_
 * Clusters configured to use TLS could get stuck spending all of their CPU opening new connections. [6.0.5] `(PR #666) <https://github.com/apple/foundationdb/pull/666>`_
+* Configuring usable_regions=2 on a cluster with a large amount of data caused commits to pause for a few seconds. [6.0.5] `(PR #687) <https://github.com/apple/foundationdb/pull/687>`_
+* On clusters configured with usable_regions=2, status reported no replicas remaining when the primary DC was still healthy. [6.0.5] `(PR #687) <https://github.com/apple/foundationdb/pull/687>`_
+* Clients could crash when passing in TLS options. [6.0.5] `(PR #649) <https://github.com/apple/foundationdb/pull/649>`_
 
 Status
 ------

From 896bde4a48ee82ed858763869d8fffed53f682f3 Mon Sep 17 00:00:00 2001
From: Alex Miller <alexmiller@apple.com>
Date: Thu, 9 Aug 2018 14:49:29 -0700
Subject: [PATCH 26/34] Fix fdbserver segfaulting if a mismatched certificate
 and key set.

This turned out to be a simple typo of two similar variables.  fdbserver
will still die as a result of this error, but it will die gracefully and
print out a slightly helpful error message.
---
 FDBLibTLS/FDBLibTLSSession.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/FDBLibTLS/FDBLibTLSSession.cpp b/FDBLibTLS/FDBLibTLSSession.cpp
index 07a48dbc6c..d754e60854 100644
--- a/FDBLibTLS/FDBLibTLSSession.cpp
+++ b/FDBLibTLS/FDBLibTLSSession.cpp
@@ -85,12 +85,12 @@ FDBLibTLSSession::FDBLibTLSSession(Reference<FDBLibTLSPolicy> policy, bool is_cl
 			throw std::runtime_error("FDBLibTLSServerError");
 		}
 		if (tls_configure(tls_sctx, policy->tls_cfg) == -1) {
-			TraceEvent(SevError, "FDBLibTLSConfigureError", uid).detail("LibTLSErrorMessage", tls_error(tls_ctx));
+			TraceEvent(SevError, "FDBLibTLSConfigureError", uid).detail("LibTLSErrorMessage", tls_error(tls_sctx));
 			tls_free(tls_sctx);
 			throw std::runtime_error("FDBLibTLSConfigureError");
 		}
 		if (tls_accept_cbs(tls_sctx, &tls_ctx, tls_read_func, tls_write_func, this) == -1) {
-			TraceEvent(SevError, "FDBLibTLSAcceptError", uid).detail("LibTLSErrorMessage", tls_error(tls_ctx));
+			TraceEvent(SevError, "FDBLibTLSAcceptError", uid).detail("LibTLSErrorMessage", tls_error(tls_sctx));
 			tls_free(tls_sctx);
 			throw std::runtime_error("FDBLibTLSAcceptError");
 		}

From 4c7001571be87b18387ec021e933d45c95f40a11 Mon Sep 17 00:00:00 2001
From: Evan Tschannen <ejt@apple.com>
Date: Thu, 9 Aug 2018 18:43:02 -0700
Subject: [PATCH 27/34] increased the default list request rate to speed up
 backup expire

---
 fdbclient/Knobs.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fdbclient/Knobs.cpp b/fdbclient/Knobs.cpp
index a658e85f51..b208302dae 100644
--- a/fdbclient/Knobs.cpp
+++ b/fdbclient/Knobs.cpp
@@ -168,7 +168,7 @@ ClientKnobs::ClientKnobs(bool randomize) {
 	init( BLOBSTORE_MAX_SEND_BYTES_PER_SECOND,      1e9 );
 	init( BLOBSTORE_MAX_RECV_BYTES_PER_SECOND,      1e9 );
 
-	init( BLOBSTORE_LIST_REQUESTS_PER_SECOND,        25 );
+	init( BLOBSTORE_LIST_REQUESTS_PER_SECOND,       200 );
 	init( BLOBSTORE_WRITE_REQUESTS_PER_SECOND,       50 );
 	init( BLOBSTORE_READ_REQUESTS_PER_SECOND,       100 );
 	init( BLOBSTORE_DELETE_REQUESTS_PER_SECOND,     200 );

From 0438a3f91dff73b0b14dad81350f3b038048d464 Mon Sep 17 00:00:00 2001
From: Evan Tschannen <ejt@apple.com>
Date: Thu, 9 Aug 2018 22:59:14 -0700
Subject: [PATCH 28/34] added the ability to change the database configuration
 from a JSON document stored in a file. This provides a cleaner way to change
 the region configuration.

---
 fdbcli/fdbcli.actor.cpp | 70 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/fdbcli/fdbcli.actor.cpp b/fdbcli/fdbcli.actor.cpp
index f3e05c13e7..c30940f49d 100644
--- a/fdbcli/fdbcli.actor.cpp
+++ b/fdbcli/fdbcli.actor.cpp
@@ -443,8 +443,12 @@ void initHelp() {
 		"All keys between BEGINKEY (inclusive) and ENDKEY (exclusive) are cleared from the database. This command will succeed even if the specified range is empty, but may fail because of conflicts." ESCAPINGK);
 	helpMap["configure"] = CommandHelp(
 		"configure [new] <single|double|triple|three_data_hall|three_datacenter|ssd|memory|proxies=<PROXIES>|logs=<LOGS>|resolvers=<RESOLVERS>>*",
-		"change database configuration",
+		"change the database configuration",
 		"The `new' option, if present, initializes a new database with the given configuration rather than changing the configuration of an existing one. When used, both a redundancy mode and a storage engine must be specified.\n\nRedundancy mode:\n  single - one copy of the data.  Not fault tolerant.\n  double - two copies of data (survive one failure).\n  triple - three copies of data (survive two failures).\n  three_data_hall - See the Admin Guide.\n  three_datacenter - See the Admin Guide.\n\nStorage engine:\n  ssd - B-Tree storage engine optimized for solid state disks.\n  memory - Durable in-memory storage engine for small datasets.\n\nproxies=<PROXIES>: Sets the desired number of proxies in the cluster. Must be at least 1, or set to -1 which restores the number of proxies to the default value.\n\nlogs=<LOGS>: Sets the desired number of log servers in the cluster. Must be at least 1, or set to -1 which restores the number of logs to the default value.\n\nresolvers=<RESOLVERS>: Sets the desired number of resolvers in the cluster. Must be at least 1, or set to -1 which restores the number of resolvers to the default value.\n\nSee the FoundationDB Administration Guide for more information.");
+	helpMap["configurefile"] = CommandHelp(
+		"configurefile <FILENAME>",
+		"change the database configuration from a file",
+		"Load a JSON document from the provided file, and change the database configuration to match the contents of the JSON document. The format should be the same as the \"configuration\" entry in status JSON without \"excluded_servers\" or \"coordinators_count\".");
 	helpMap["coordinators"] = CommandHelp(
 		"coordinators auto|<ADDRESS>+ [description=new_cluster_description]",
 		"change cluster coordinators or description",
@@ -1595,6 +1599,59 @@ ACTOR Future<bool> configure( Database db, std::vector<StringRef> tokens, Refere
 	return ret;
 }
 
+ACTOR Future<bool> configurefile(Database db, std::string filePath) {
+	std::string contents(readFileBytes(filePath, 100000));
+	json_spirit::mValue schema;
+	json_spirit::read_string( contents, schema );
+	StatusObject configJSON = schema.get_obj();
+	std::string configString;
+	for(auto kv : configJSON) {
+		if(!configString.empty()) {
+			configString += " ";
+		}
+		if( kv.second.type() == json_spirit::int_type ) {
+			configString += kv.first + ":=" + format("%d", kv.second.get_int());
+		} else if( kv.second.type() == json_spirit::str_type ) {
+			configString += kv.second.get_str();
+		} else if( kv.second.type() == json_spirit::array_type ) {
+			configString += kv.first + "=" + json_spirit::write_string(json_spirit::mValue(kv.second.get_array()), json_spirit::Output_options::none);
+		} else {
+			printUsage(LiteralStringRef("configurefile"));
+			return true;
+		}
+	}
+	ConfigurationResult::Type result = wait( makeInterruptable( changeConfig(db, configString) ) );
+	// Real errors get thrown from makeInterruptable and printed by the catch block in cli(), but
+	// there are various results specific to changeConfig() that we need to report:
+	bool ret;
+	switch(result) {
+	case ConfigurationResult::NO_OPTIONS_PROVIDED:
+	case ConfigurationResult::CONFLICTING_OPTIONS:
+	case ConfigurationResult::UNKNOWN_OPTION:
+	case ConfigurationResult::INCOMPLETE_CONFIGURATION:
+		printUsage(LiteralStringRef("configurefile"));
+		ret = true;
+		break;
+
+	case ConfigurationResult::DATABASE_ALREADY_CREATED:
+		printf("ERROR: Database already exists! To change configuration, don't say `new'\n");
+		ret=true;
+		break;
+	case ConfigurationResult::DATABASE_CREATED:
+		printf("Database created\n");
+		ret=false;
+		break;
+	case ConfigurationResult::SUCCESS:
+		printf("Configuration changed\n");
+		ret=false;
+		break;
+	default:
+		ASSERT(false);
+		ret=true;
+	};
+	return ret;
+}
+
 // FIXME: Factor address parsing from coordinators, include, exclude
 
 ACTOR Future<bool> coordinators( Database db, std::vector<StringRef> tokens, bool isClusterTLS ) {
@@ -2457,6 +2514,17 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
 					continue;
 				}
 
+				if (tokencmp(tokens[0], "configurefile")) {
+					if (tokens.size() != 2) {
+						printUsage(tokens[0]);
+						is_error = true;
+					} else {
+						bool err = wait( configurefile( db, tokens[1].toString() ) );
+						if (err) is_error = true;
+					}
+					continue;
+				}
+
 				if (tokencmp(tokens[0], "coordinators")) {
 					auto cs = ClusterConnectionFile( ccf->getFilename() ).getConnectionString();
 					if (tokens.size() < 2) {

From ae8371b490878fab99006c1003bf2fce46b299d0 Mon Sep 17 00:00:00 2001
From: Evan Tschannen <ejt@apple.com>
Date: Thu, 9 Aug 2018 23:04:49 -0700
Subject: [PATCH 29/34] updated release notes

---
 documentation/sphinx/source/release-notes.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst
index 6f3c28805d..affb458b3e 100644
--- a/documentation/sphinx/source/release-notes.rst
+++ b/documentation/sphinx/source/release-notes.rst
@@ -14,6 +14,7 @@ Features
 * TLS peer verification now supports verifiying on Subject Alternative Name. `(Issue #514) <https://github.com/apple/foundationdb/issues/514>`_
 * TLS peer verification now supports suffix matching by field. `(Issue #515) <https://github.com/apple/foundationdb/issues/515>`_
 * TLS certificates are automatically reloaded after being updated. [6.0.5] `(Issue #505) <https://github.com/apple/foundationdb/issues/505>`_
+* fdbcli now supports changing the database configuration based on a JSON document stored in a file. [6.0.5] `(PR #692) <https://github.com/apple/foundationdb/pull/692>`_
 
 Performance
 -----------

From b05220fcf2ced5abc92321a8bb39b3580c6c84a9 Mon Sep 17 00:00:00 2001
From: Evan Tschannen <ejt@apple.com>
Date: Thu, 9 Aug 2018 23:56:06 -0700
Subject: [PATCH 30/34] renamed configurefile to fileconfigure

---
 fdbcli/fdbcli.actor.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fdbcli/fdbcli.actor.cpp b/fdbcli/fdbcli.actor.cpp
index c30940f49d..f53b7ff3db 100644
--- a/fdbcli/fdbcli.actor.cpp
+++ b/fdbcli/fdbcli.actor.cpp
@@ -445,8 +445,8 @@ void initHelp() {
 		"configure [new] <single|double|triple|three_data_hall|three_datacenter|ssd|memory|proxies=<PROXIES>|logs=<LOGS>|resolvers=<RESOLVERS>>*",
 		"change the database configuration",
 		"The `new' option, if present, initializes a new database with the given configuration rather than changing the configuration of an existing one. When used, both a redundancy mode and a storage engine must be specified.\n\nRedundancy mode:\n  single - one copy of the data.  Not fault tolerant.\n  double - two copies of data (survive one failure).\n  triple - three copies of data (survive two failures).\n  three_data_hall - See the Admin Guide.\n  three_datacenter - See the Admin Guide.\n\nStorage engine:\n  ssd - B-Tree storage engine optimized for solid state disks.\n  memory - Durable in-memory storage engine for small datasets.\n\nproxies=<PROXIES>: Sets the desired number of proxies in the cluster. Must be at least 1, or set to -1 which restores the number of proxies to the default value.\n\nlogs=<LOGS>: Sets the desired number of log servers in the cluster. Must be at least 1, or set to -1 which restores the number of logs to the default value.\n\nresolvers=<RESOLVERS>: Sets the desired number of resolvers in the cluster. Must be at least 1, or set to -1 which restores the number of resolvers to the default value.\n\nSee the FoundationDB Administration Guide for more information.");
-	helpMap["configurefile"] = CommandHelp(
-		"configurefile <FILENAME>",
+	helpMap["fileconfigure"] = CommandHelp(
+		"fileconfigure <FILENAME>",
 		"change the database configuration from a file",
 		"Load a JSON document from the provided file, and change the database configuration to match the contents of the JSON document. The format should be the same as the \"configuration\" entry in status JSON without \"excluded_servers\" or \"coordinators_count\".");
 	helpMap["coordinators"] = CommandHelp(
@@ -1599,7 +1599,7 @@ ACTOR Future<bool> configure( Database db, std::vector<StringRef> tokens, Refere
 	return ret;
 }
 
-ACTOR Future<bool> configurefile(Database db, std::string filePath) {
+ACTOR Future<bool> fileconfigure(Database db, std::string filePath) {
 	std::string contents(readFileBytes(filePath, 100000));
 	json_spirit::mValue schema;
 	json_spirit::read_string( contents, schema );
@@ -1616,7 +1616,7 @@ ACTOR Future<bool> configurefile(Database db, std::string filePath) {
 		} else if( kv.second.type() == json_spirit::array_type ) {
 			configString += kv.first + "=" + json_spirit::write_string(json_spirit::mValue(kv.second.get_array()), json_spirit::Output_options::none);
 		} else {
-			printUsage(LiteralStringRef("configurefile"));
+			printUsage(LiteralStringRef("fileconfigure"));
 			return true;
 		}
 	}
@@ -1629,7 +1629,7 @@ ACTOR Future<bool> configurefile(Database db, std::string filePath) {
 	case ConfigurationResult::CONFLICTING_OPTIONS:
 	case ConfigurationResult::UNKNOWN_OPTION:
 	case ConfigurationResult::INCOMPLETE_CONFIGURATION:
-		printUsage(LiteralStringRef("configurefile"));
+		printUsage(LiteralStringRef("fileconfigure"));
 		ret = true;
 		break;
 
@@ -2514,12 +2514,12 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
 					continue;
 				}
 
-				if (tokencmp(tokens[0], "configurefile")) {
+				if (tokencmp(tokens[0], "fileconfigure")) {
 					if (tokens.size() != 2) {
 						printUsage(tokens[0]);
 						is_error = true;
 					} else {
-						bool err = wait( configurefile( db, tokens[1].toString() ) );
+						bool err = wait( fileconfigure( db, tokens[1].toString() ) );
 						if (err) is_error = true;
 					}
 					continue;

From 72b025ecc8b1603ca6b347474fdc89df2b503ed5 Mon Sep 17 00:00:00 2001
From: Evan Tschannen <ejt@apple.com>
Date: Fri, 10 Aug 2018 10:16:01 -0700
Subject: [PATCH 31/34] backed out fileconfigure

---
 documentation/sphinx/source/release-notes.rst |  1 -
 fdbcli/fdbcli.actor.cpp                       | 70 +------------------
 2 files changed, 1 insertion(+), 70 deletions(-)

diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst
index affb458b3e..6f3c28805d 100644
--- a/documentation/sphinx/source/release-notes.rst
+++ b/documentation/sphinx/source/release-notes.rst
@@ -14,7 +14,6 @@ Features
 * TLS peer verification now supports verifiying on Subject Alternative Name. `(Issue #514) <https://github.com/apple/foundationdb/issues/514>`_
 * TLS peer verification now supports suffix matching by field. `(Issue #515) <https://github.com/apple/foundationdb/issues/515>`_
 * TLS certificates are automatically reloaded after being updated. [6.0.5] `(Issue #505) <https://github.com/apple/foundationdb/issues/505>`_
-* fdbcli now supports changing the database configuration based on a JSON document stored in a file. [6.0.5] `(PR #692) <https://github.com/apple/foundationdb/pull/692>`_
 
 Performance
 -----------
diff --git a/fdbcli/fdbcli.actor.cpp b/fdbcli/fdbcli.actor.cpp
index f53b7ff3db..f3e05c13e7 100644
--- a/fdbcli/fdbcli.actor.cpp
+++ b/fdbcli/fdbcli.actor.cpp
@@ -443,12 +443,8 @@ void initHelp() {
 		"All keys between BEGINKEY (inclusive) and ENDKEY (exclusive) are cleared from the database. This command will succeed even if the specified range is empty, but may fail because of conflicts." ESCAPINGK);
 	helpMap["configure"] = CommandHelp(
 		"configure [new] <single|double|triple|three_data_hall|three_datacenter|ssd|memory|proxies=<PROXIES>|logs=<LOGS>|resolvers=<RESOLVERS>>*",
-		"change the database configuration",
+		"change database configuration",
 		"The `new' option, if present, initializes a new database with the given configuration rather than changing the configuration of an existing one. When used, both a redundancy mode and a storage engine must be specified.\n\nRedundancy mode:\n  single - one copy of the data.  Not fault tolerant.\n  double - two copies of data (survive one failure).\n  triple - three copies of data (survive two failures).\n  three_data_hall - See the Admin Guide.\n  three_datacenter - See the Admin Guide.\n\nStorage engine:\n  ssd - B-Tree storage engine optimized for solid state disks.\n  memory - Durable in-memory storage engine for small datasets.\n\nproxies=<PROXIES>: Sets the desired number of proxies in the cluster. Must be at least 1, or set to -1 which restores the number of proxies to the default value.\n\nlogs=<LOGS>: Sets the desired number of log servers in the cluster. Must be at least 1, or set to -1 which restores the number of logs to the default value.\n\nresolvers=<RESOLVERS>: Sets the desired number of resolvers in the cluster. Must be at least 1, or set to -1 which restores the number of resolvers to the default value.\n\nSee the FoundationDB Administration Guide for more information.");
-	helpMap["fileconfigure"] = CommandHelp(
-		"fileconfigure <FILENAME>",
-		"change the database configuration from a file",
-		"Load a JSON document from the provided file, and change the database configuration to match the contents of the JSON document. The format should be the same as the \"configuration\" entry in status JSON without \"excluded_servers\" or \"coordinators_count\".");
 	helpMap["coordinators"] = CommandHelp(
 		"coordinators auto|<ADDRESS>+ [description=new_cluster_description]",
 		"change cluster coordinators or description",
@@ -1599,59 +1595,6 @@ ACTOR Future<bool> configure( Database db, std::vector<StringRef> tokens, Refere
 	return ret;
 }
 
-ACTOR Future<bool> fileconfigure(Database db, std::string filePath) {
-	std::string contents(readFileBytes(filePath, 100000));
-	json_spirit::mValue schema;
-	json_spirit::read_string( contents, schema );
-	StatusObject configJSON = schema.get_obj();
-	std::string configString;
-	for(auto kv : configJSON) {
-		if(!configString.empty()) {
-			configString += " ";
-		}
-		if( kv.second.type() == json_spirit::int_type ) {
-			configString += kv.first + ":=" + format("%d", kv.second.get_int());
-		} else if( kv.second.type() == json_spirit::str_type ) {
-			configString += kv.second.get_str();
-		} else if( kv.second.type() == json_spirit::array_type ) {
-			configString += kv.first + "=" + json_spirit::write_string(json_spirit::mValue(kv.second.get_array()), json_spirit::Output_options::none);
-		} else {
-			printUsage(LiteralStringRef("fileconfigure"));
-			return true;
-		}
-	}
-	ConfigurationResult::Type result = wait( makeInterruptable( changeConfig(db, configString) ) );
-	// Real errors get thrown from makeInterruptable and printed by the catch block in cli(), but
-	// there are various results specific to changeConfig() that we need to report:
-	bool ret;
-	switch(result) {
-	case ConfigurationResult::NO_OPTIONS_PROVIDED:
-	case ConfigurationResult::CONFLICTING_OPTIONS:
-	case ConfigurationResult::UNKNOWN_OPTION:
-	case ConfigurationResult::INCOMPLETE_CONFIGURATION:
-		printUsage(LiteralStringRef("fileconfigure"));
-		ret = true;
-		break;
-
-	case ConfigurationResult::DATABASE_ALREADY_CREATED:
-		printf("ERROR: Database already exists! To change configuration, don't say `new'\n");
-		ret=true;
-		break;
-	case ConfigurationResult::DATABASE_CREATED:
-		printf("Database created\n");
-		ret=false;
-		break;
-	case ConfigurationResult::SUCCESS:
-		printf("Configuration changed\n");
-		ret=false;
-		break;
-	default:
-		ASSERT(false);
-		ret=true;
-	};
-	return ret;
-}
-
 // FIXME: Factor address parsing from coordinators, include, exclude
 
 ACTOR Future<bool> coordinators( Database db, std::vector<StringRef> tokens, bool isClusterTLS ) {
@@ -2514,17 +2457,6 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
 					continue;
 				}
 
-				if (tokencmp(tokens[0], "fileconfigure")) {
-					if (tokens.size() != 2) {
-						printUsage(tokens[0]);
-						is_error = true;
-					} else {
-						bool err = wait( fileconfigure( db, tokens[1].toString() ) );
-						if (err) is_error = true;
-					}
-					continue;
-				}
-
 				if (tokencmp(tokens[0], "coordinators")) {
 					auto cs = ClusterConnectionFile( ccf->getFilename() ).getConnectionString();
 					if (tokens.size() < 2) {

From dee3ff761bcdb0e1cdaa72b63804bdd0ff2f66f5 Mon Sep 17 00:00:00 2001
From: Evan Tschannen <ejt@apple.com>
Date: Fri, 10 Aug 2018 11:41:42 -0700
Subject: [PATCH 32/34] updated release notes

---
 documentation/sphinx/source/release-notes.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst
index 6f3c28805d..84316e67c3 100644
--- a/documentation/sphinx/source/release-notes.rst
+++ b/documentation/sphinx/source/release-notes.rst
@@ -43,6 +43,7 @@ Fixes
 * Configuring usable_regions=2 on a cluster with a large amount of data caused commits to pause for a few seconds. [6.0.5] `(PR #687) <https://github.com/apple/foundationdb/pull/687>`_
 * On clusters configured with usable_regions=2, status reported no replicas remaining when the primary DC was still healthy. [6.0.5] `(PR #687) <https://github.com/apple/foundationdb/pull/687>`_
 * Clients could crash when passing in TLS options. [6.0.5] `(PR #649) <https://github.com/apple/foundationdb/pull/649>`_
+* A mismatched TLS certificate and key set could cause the server to crash. [6.0.5] `(PR #689) <https://github.com/apple/foundationdb/pull/689>`_
 
 Status
 ------
@@ -64,6 +65,7 @@ Other Changes
 
 * Does not support upgrades from any version older than 5.0.
 * Normalized the capitalization of trace event names and attributes. `(PR #455) <https://github.com/apple/foundationdb/pull/455>`_
+* Increased the memory requirements of the transaction log by 400MB. [6.0.5] `(PR #673) <https://github.com/apple/foundationdb/pull/673>`_
 
 Earlier release notes
 ---------------------

From 6bc8dd3a2b19d20633ccb79461b4af68ad60219b Mon Sep 17 00:00:00 2001
From: "A.J. Beamon" <ajbeamon@apple.com>
Date: Fri, 10 Aug 2018 12:12:17 -0700
Subject: [PATCH 33/34] Prepare for 6.0.5 release

---
 documentation/sphinx/source/downloads.rst | 24 +++++++++++------------
 packaging/msi/FDBInstaller.wxs            |  2 +-
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/documentation/sphinx/source/downloads.rst b/documentation/sphinx/source/downloads.rst
index 3a80f82b8a..bc4c260ebe 100644
--- a/documentation/sphinx/source/downloads.rst
+++ b/documentation/sphinx/source/downloads.rst
@@ -10,38 +10,38 @@ macOS
 
 The macOS installation package is supported on macOS 10.7+. It includes the client and (optionally) the server.
 
-* `FoundationDB-6.0.4.pkg <https://www.foundationdb.org/downloads/6.0.4/macOS/installers/FoundationDB-6.0.4.pkg>`_
+* `FoundationDB-6.0.5.pkg <https://www.foundationdb.org/downloads/6.0.5/macOS/installers/FoundationDB-6.0.5.pkg>`_
 
 Ubuntu
 ------
 
 The Ubuntu packages are supported on 64-bit Ubuntu 12.04+, but beware of the Linux kernel bug in Ubuntu 12.x.
 
-* `foundationdb-clients-6.0.4-1_amd64.deb <https://www.foundationdb.org/downloads/6.0.4/ubuntu/installers/foundationdb-clients_6.0.4-1_amd64.deb>`_
-* `foundationdb-server-6.0.4-1_amd64.deb <https://www.foundationdb.org/downloads/6.0.4/ubuntu/installers/foundationdb-server_6.0.4-1_amd64.deb>`_ (depends on the clients package)
+* `foundationdb-clients-6.0.5-1_amd64.deb <https://www.foundationdb.org/downloads/6.0.5/ubuntu/installers/foundationdb-clients_6.0.5-1_amd64.deb>`_
+* `foundationdb-server-6.0.5-1_amd64.deb <https://www.foundationdb.org/downloads/6.0.5/ubuntu/installers/foundationdb-server_6.0.5-1_amd64.deb>`_ (depends on the clients package)
 
 RHEL/CentOS EL6
 ---------------
 
 The RHEL/CentOS EL6 packages are supported on 64-bit RHEL/CentOS 6.x.
 
-* `foundationdb-clients-6.0.4-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.0.4/rhel6/installers/foundationdb-clients-6.0.4-1.el6.x86_64.rpm>`_
-* `foundationdb-server-6.0.4-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.0.4/rhel6/installers/foundationdb-server-6.0.4-1.el6.x86_64.rpm>`_ (depends on the clients package)
+* `foundationdb-clients-6.0.5-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.0.5/rhel6/installers/foundationdb-clients-6.0.5-1.el6.x86_64.rpm>`_
+* `foundationdb-server-6.0.5-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.0.5/rhel6/installers/foundationdb-server-6.0.5-1.el6.x86_64.rpm>`_ (depends on the clients package)
 
 RHEL/CentOS EL7
 ---------------
 
 The RHEL/CentOS EL7 packages are supported on 64-bit RHEL/CentOS 7.x.
 
-* `foundationdb-clients-6.0.4-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.0.4/rhel7/installers/foundationdb-clients-6.0.4-1.el7.x86_64.rpm>`_
-* `foundationdb-server-6.0.4-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.0.4/rhel7/installers/foundationdb-server-6.0.4-1.el7.x86_64.rpm>`_ (depends on the clients package)
+* `foundationdb-clients-6.0.5-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.0.5/rhel7/installers/foundationdb-clients-6.0.5-1.el7.x86_64.rpm>`_
+* `foundationdb-server-6.0.5-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.0.5/rhel7/installers/foundationdb-server-6.0.5-1.el7.x86_64.rpm>`_ (depends on the clients package)
 
 Windows
 -------
 
 The Windows installer is supported on 64-bit Windows XP and later. It includes the client and (optionally) the server.
 
-* `foundationdb-6.0.4-x64.msi <https://www.foundationdb.org/downloads/6.0.4/windows/installers/foundationdb-6.0.4-x64.msi>`_
+* `foundationdb-6.0.5-x64.msi <https://www.foundationdb.org/downloads/6.0.5/windows/installers/foundationdb-6.0.5-x64.msi>`_
 
 API Language Bindings
 =====================
@@ -58,18 +58,18 @@ On macOS and Windows, the FoundationDB Python API bindings are installed as part
 
 If you need to use the FoundationDB Python API from other Python installations or paths, download the Python package:
 
-* `foundationdb-6.0.4.tar.gz <https://www.foundationdb.org/downloads/6.0.4/bindings/python/foundationdb-6.0.4.tar.gz>`_
+* `foundationdb-6.0.5.tar.gz <https://www.foundationdb.org/downloads/6.0.5/bindings/python/foundationdb-6.0.5.tar.gz>`_
 
 Ruby 1.9.3/2.0.0+
 -----------------
 
-* `fdb-6.0.4.gem <https://www.foundationdb.org/downloads/6.0.4/bindings/ruby/fdb-6.0.4.gem>`_
+* `fdb-6.0.5.gem <https://www.foundationdb.org/downloads/6.0.5/bindings/ruby/fdb-6.0.5.gem>`_
 
 Java 8+
 -------
 
-* `fdb-java-6.0.4.jar <https://www.foundationdb.org/downloads/6.0.4/bindings/java/fdb-java-6.0.4.jar>`_
-* `fdb-java-6.0.4-javadoc.jar <https://www.foundationdb.org/downloads/6.0.4/bindings/java/fdb-java-6.0.4-javadoc.jar>`_
+* `fdb-java-6.0.5.jar <https://www.foundationdb.org/downloads/6.0.5/bindings/java/fdb-java-6.0.5.jar>`_
+* `fdb-java-6.0.5-javadoc.jar <https://www.foundationdb.org/downloads/6.0.5/bindings/java/fdb-java-6.0.5-javadoc.jar>`_
 
 Go 1.1+
 -------
diff --git a/packaging/msi/FDBInstaller.wxs b/packaging/msi/FDBInstaller.wxs
index a7cbb7e82f..927081601a 100644
--- a/packaging/msi/FDBInstaller.wxs
+++ b/packaging/msi/FDBInstaller.wxs
@@ -32,7 +32,7 @@
 
 <Wix xmlns='http://schemas.microsoft.com/wix/2006/wi'>
   <Product Name='$(var.Title)'
-           Id='{B6805A9A-CACA-4C08-9BE2-1EFAB91C9117}'
+           Id='{632477E3-EE9A-4A10-A794-DFEE2FD43E5D}'
            UpgradeCode='{A95EA002-686E-4164-8356-C715B7F8B1C8}'
            Version='$(var.Version)'
            Manufacturer='$(var.Manufacturer)'

From 61480bdf4e7099b1920aa3055c1692a885f9a4c2 Mon Sep 17 00:00:00 2001
From: "A.J. Beamon" <ajbeamon@apple.com>
Date: Fri, 10 Aug 2018 14:28:56 -0700
Subject: [PATCH 34/34] Post-release updates for 6.0.5 release

---
 packaging/msi/FDBInstaller.wxs | 2 +-
 versions.target                | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/packaging/msi/FDBInstaller.wxs b/packaging/msi/FDBInstaller.wxs
index 927081601a..5aa1a93597 100644
--- a/packaging/msi/FDBInstaller.wxs
+++ b/packaging/msi/FDBInstaller.wxs
@@ -32,7 +32,7 @@
 
 <Wix xmlns='http://schemas.microsoft.com/wix/2006/wi'>
   <Product Name='$(var.Title)'
-           Id='{632477E3-EE9A-4A10-A794-DFEE2FD43E5D}'
+           Id='{28BA5AF3-B475-4C57-85AF-80A0C049EFE1}'
            UpgradeCode='{A95EA002-686E-4164-8356-C715B7F8B1C8}'
            Version='$(var.Version)'
            Manufacturer='$(var.Manufacturer)'
diff --git a/versions.target b/versions.target
index 268ecb4539..c5dc0dfb11 100644
--- a/versions.target
+++ b/versions.target
@@ -1,7 +1,7 @@
 <?xml version="1.0"?>
 <Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <PropertyGroup>
-    <Version>6.0.5</Version>
+    <Version>6.0.6</Version>
     <PackageName>6.0</PackageName>
   </PropertyGroup>
 </Project>