From 94385447bc4f7b99da8ec30a711f9ad5fd2b8f74 Mon Sep 17 00:00:00 2001
From: Meng Xu <meng_xu@apple.com>
Date: Tue, 26 Feb 2019 16:20:05 -0800
Subject: [PATCH 01/47] Status: Get if client configured TLS

To understand if all clients have configured TLS,
we check the tlsoption when a client tries to open database.
This is similar to how we track the versions of multi-version clients.
---
 documentation/sphinx/source/mr-status.rst |  1 +
 fdbclient/ClusterInterface.h              |  3 ++-
 fdbclient/NativeAPI.actor.cpp             |  1 +
 fdbclient/Schemas.cpp                     |  3 ++-
 fdbrpc/FlowTransport.actor.cpp            |  5 ++++-
 fdbrpc/TLSConnection.h                    |  6 ++++++
 fdbserver/ClusterController.actor.cpp     |  9 +++++++--
 fdbserver/Status.actor.cpp                | 12 ++++++++++--
 fdbserver/Status.h                        |  2 +-
 flow/serialize.h                          |  2 +-
 10 files changed, 35 insertions(+), 9 deletions(-)
diff --git a/documentation/sphinx/source/mr-status.rst b/documentation/sphinx/source/mr-status.rst
index 772db55f2e..251f23d556 100644
--- a/documentation/sphinx/source/mr-status.rst
+++ b/documentation/sphinx/source/mr-status.rst
@@ -81,6 +81,7 @@ The following format informally describes the JSON containing the status data. T
               {
                 "address": "127.0.0.1:1234",
                 "log_group": "default"
+                "tls_configured": true
               }
             ],
             "count": 1,
diff --git a/fdbclient/ClusterInterface.h b/fdbclient/ClusterInterface.h
index cbc61bd908..a133f269a4 100644
--- a/fdbclient/ClusterInterface.h
+++ b/fdbclient/ClusterInterface.h
@@ -119,13 +119,14 @@ struct OpenDatabaseRequest {
 	Arena arena;
 	StringRef issues, traceLogGroup;
 	VectorRef<ClientVersionRef> supportedVersions;
+	bool client_tls_configured;
 	UID knownClientInfoID;
 	ReplyPromise< struct ClientDBInfo > reply;
 
 	template <class Ar>
 	void serialize(Ar& ar) {
 		ASSERT( ar.protocolVersion() >= 0x0FDB00A400040001LL );
-		serializer(ar, issues, supportedVersions, traceLogGroup, knownClientInfoID, reply, arena);
+		serializer(ar, issues, supportedVersions, client_tls_configured, traceLogGroup, knownClientInfoID, reply, arena);
 	}
 };
 
diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp
index b15bdf27dd..49ba9181f7 100644
--- a/fdbclient/NativeAPI.actor.cpp
+++ b/fdbclient/NativeAPI.actor.cpp
@@ -499,6 +499,7 @@ ACTOR static Future<Void> monitorClientInfo( Reference<AsyncVar<Optional<Cluster
 			OpenDatabaseRequest req;
 			req.knownClientInfoID = outInfo->get().id;
 			req.supportedVersions = VectorRef<ClientVersionRef>(req.arena, networkOptions.supportedVersions);
+			req.client_tls_configured = tlsOptions.isValid() && tlsOptions->isConfigured(); // Monitor if client TLS is configured
 			req.traceLogGroup = StringRef(req.arena, networkOptions.traceLogGroup);
 
 			ClusterConnectionString fileConnectionString;
diff --git a/fdbclient/Schemas.cpp b/fdbclient/Schemas.cpp
index d8f2883630..b387f7b7df 100644
--- a/fdbclient/Schemas.cpp
+++ b/fdbclient/Schemas.cpp
@@ -278,7 +278,8 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
                  "connected_clients":[
                      {
                          "address":"127.0.0.1:9898",
-                         "log_group":"default"
+                         "log_group":"default",
+                         "tls_configured":true
                      }
                  ],
                  "count" : 1,
diff --git a/fdbrpc/FlowTransport.actor.cpp b/fdbrpc/FlowTransport.actor.cpp
index 3e015d2995..912a147945 100644
--- a/fdbrpc/FlowTransport.actor.cpp
+++ b/fdbrpc/FlowTransport.actor.cpp
@@ -299,6 +299,7 @@ struct Peer : NonCopyable {
 				break;
 			}
 		}
+
 		if ( !destination.isPublic() || outgoingConnectionIdle || destination > compatibleAddr ) {
 			// Keep the new connection
 			TraceEvent("IncomingConnection", conn->getDebugID())
@@ -648,6 +649,7 @@ ACTOR static Future<Void> connectionReader(
 
 					uint64_t connectionId = 0;
 					int32_t connectPacketSize = p->minimumSize();
+					bool client_tls_capable = false;
 					if ( unprocessed_end-unprocessed_begin >= connectPacketSize ) {
 						if(p->protocolVersion >= 0x0FDB00A444020001) {
 							connectionId = p->connectionId;
@@ -687,7 +689,8 @@ ACTOR static Future<Void> connectionReader(
 							TraceEvent("ConnectionEstablished", conn->getDebugID())
 								.suppressFor(1.0)
 								.detail("Peer", conn->getPeerAddress())
-								.detail("ConnectionId", connectionId);
+								.detail("ConnectionId", connectionId)
+								.detail("ConnectPacketSize", connectPacketSize);
 						}
 
 						if(connectionId > 1) {
diff --git a/fdbrpc/TLSConnection.h b/fdbrpc/TLSConnection.h
index f8395b66a0..aab2c5228c 100644
--- a/fdbrpc/TLSConnection.h
+++ b/fdbrpc/TLSConnection.h
@@ -89,6 +89,12 @@ struct TLSOptions : ReferenceCounted<TLSOptions> {
 	Reference<ITLSPolicy> get_policy(PolicyType type);
 	bool enabled();
 
+	bool isConfigured() const {
+		return policyInfo.ca_path.size() || policyInfo.ca_contents.size() || policyInfo.cert_path.size() ||
+		       policyInfo.cert_contents.size() || policyInfo.key_path.size() || policyInfo.key_contents.size();
+	}
+
+
 	struct PolicyInfo {
 		std::string ca_path;
 		Standalone<StringRef> ca_contents;
diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp
index 53962c0340..e7635d03bf 100644
--- a/fdbserver/ClusterController.actor.cpp
+++ b/fdbserver/ClusterController.actor.cpp
@@ -92,6 +92,7 @@ public:
 		ProcessIssuesMap clientsWithIssues, workersWithIssues;
 		std::map<NetworkAddress, double> incompatibleConnections;
 		ClientVersionMap clientVersionMap;
+		std::map<NetworkAddress, bool> clientTLSConfigMap; // Does the client has TLS configured
 		std::map<NetworkAddress, std::string> traceLogGroupMap;
 		AsyncTrigger forceMasterFailure;
 		int64_t masterRegistrationCount;
@@ -1221,6 +1222,7 @@ ACTOR Future<Void> clusterOpenDatabase(
 	UID knownClientInfoID,
 	std::string issues,
 	Standalone<VectorRef<ClientVersionRef>> supportedVersions,
+	bool client_tls_configured,
 	Standalone<StringRef> traceLogGroup,
 	ReplyPromise<ClientDBInfo> reply)
 {
@@ -1232,6 +1234,8 @@ ACTOR Future<Void> clusterOpenDatabase(
 		db->clientVersionMap[reply.getEndpoint().getPrimaryAddress()] = supportedVersions;
 	}
 
+	db->clientTLSConfigMap[reply.getEndpoint().getPrimaryAddress()] = client_tls_configured;
+
 	db->traceLogGroupMap[reply.getEndpoint().getPrimaryAddress()] = traceLogGroup.toString();
 
 	while (db->clientInfo->get().id == knownClientInfoID) {
@@ -1243,6 +1247,7 @@ ACTOR Future<Void> clusterOpenDatabase(
 
 	removeIssue( db->clientsWithIssues, reply.getEndpoint().getPrimaryAddress(), issues, issueID );
 	db->clientVersionMap.erase(reply.getEndpoint().getPrimaryAddress());
+	db->clientTLSConfigMap.erase(reply.getEndpoint().getPrimaryAddress());
 	db->traceLogGroupMap.erase(reply.getEndpoint().getPrimaryAddress());
 
 	reply.send( db->clientInfo->get() );
@@ -1907,7 +1912,7 @@ ACTOR Future<Void> statusServer(FutureStream< StatusRequest> requests,
 				}
 			}
 
-			state ErrorOr<StatusReply> result = wait(errorOr(clusterGetStatus(self->db.serverInfo, self->cx, workers, self->db.workersWithIssues, self->db.clientsWithIssues, self->db.clientVersionMap, self->db.traceLogGroupMap, coordinators, incompatibleConnections, self->datacenterVersionDifference)));
+			state ErrorOr<StatusReply> result = wait(errorOr(clusterGetStatus(self->db.serverInfo, self->cx, workers, self->db.workersWithIssues, self->db.clientsWithIssues, self->db.clientVersionMap, self->db.clientTLSConfigMap, self->db.traceLogGroupMap, coordinators, incompatibleConnections, self->datacenterVersionDifference)));
 			if (result.isError() && result.getError().code() == error_code_actor_cancelled)
 				throw result.getError();
 
@@ -2402,7 +2407,7 @@ ACTOR Future<Void> clusterControllerCore( ClusterControllerFullInterface interf,
 			return Void();
 		}
 		when( OpenDatabaseRequest req = waitNext( interf.clientInterface.openDatabase.getFuture() ) ) {
-			self.addActor.send( clusterOpenDatabase( &self.db, req.knownClientInfoID, req.issues.toString(), req.supportedVersions, req.traceLogGroup, req.reply ) );
+			self.addActor.send( clusterOpenDatabase( &self.db, req.knownClientInfoID, req.issues.toString(), req.supportedVersions, req.client_tls_configured, req.traceLogGroup, req.reply ) );
 		}
 		when( RecruitFromConfigurationRequest req = waitNext( interf.recruitFromConfiguration.getFuture() ) ) {
 			self.addActor.send( clusterRecruitFromConfiguration( &self, req ) );
diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp
index 87071beec6..44635c16ff 100644
--- a/fdbserver/Status.actor.cpp
+++ b/fdbserver/Status.actor.cpp
@@ -839,7 +839,9 @@ ACTOR static Future<JsonBuilderObject> processStatusFetcher(
 	return processMap;
 }
 
-static JsonBuilderObject clientStatusFetcher(ClientVersionMap clientVersionMap, std::map<NetworkAddress, std::string> traceLogGroupMap) {
+static JsonBuilderObject clientStatusFetcher(ClientVersionMap clientVersionMap,
+									std::map<NetworkAddress, std::string> traceLogGroupMap,
+									std::map<NetworkAddress, bool> clientTLSConfigMap) {
 	JsonBuilderObject clientStatus;
 
 	clientStatus["count"] = (int64_t)clientVersionMap.size();
@@ -864,6 +866,11 @@ static JsonBuilderObject clientStatusFetcher(ClientVersionMap clientVersionMap,
 			JsonBuilderObject cli;
 			cli["address"] = client.toString();
 			cli["log_group"] = traceLogGroupMap[client];
+			bool client_tls_configured = false;
+			if (clientTLSConfigMap.find(client) != clientTLSConfigMap.end()) {
+				client_tls_configured = clientTLSConfigMap[client];
+			}
+			cli["tls_configured"] = client_tls_configured;
 			clients.push_back(cli);
 		}
 
@@ -1809,6 +1816,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
 		ProcessIssuesMap workerIssues,
 		ProcessIssuesMap clientIssues,
 		ClientVersionMap clientVersionMap,
+		std::map<NetworkAddress, bool> clientTLSConfigMap,
 		std::map<NetworkAddress, std::string> traceLogGroupMap,
 		ServerCoordinators coordinators,
 		std::vector<NetworkAddress> incompatibleConnections,
@@ -2029,7 +2037,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
 
 		JsonBuilderObject processStatus = wait(processStatusFetcher(db, workers, pMetrics, mMetrics, latestError, traceFileOpenErrors, programStarts, processIssues, storageServers, tLogs, proxies, cx, configuration, &status_incomplete_reasons));
 		statusObj["processes"] = processStatus;
-		statusObj["clients"] = clientStatusFetcher(clientVersionMap, traceLogGroupMap);
+		statusObj["clients"] = clientStatusFetcher(clientVersionMap, traceLogGroupMap, clientTLSConfigMap);
 
 		JsonBuilderArray incompatibleConnectionsArray;
 		for(auto it : incompatibleConnections) {
diff --git a/fdbserver/Status.h b/fdbserver/Status.h
index 9697d3d77d..a923b2b9cc 100644
--- a/fdbserver/Status.h
+++ b/fdbserver/Status.h
@@ -31,7 +31,7 @@ typedef std::map< NetworkAddress, std::pair<std::string,UID> > ProcessIssuesMap;
 typedef std::map< NetworkAddress, Standalone<VectorRef<ClientVersionRef>> > ClientVersionMap;
 
 Future<StatusReply> clusterGetStatus( Reference<AsyncVar<struct ServerDBInfo>> const& db, Database const& cx, vector<std::pair<WorkerInterface, ProcessClass>> const& workers,
-	ProcessIssuesMap const& workerIssues, ProcessIssuesMap const& clientIssues, ClientVersionMap const& clientVersionMap, std::map<NetworkAddress, std::string> const& traceLogGroupMap,
+	ProcessIssuesMap const& workerIssues, ProcessIssuesMap const& clientIssues, ClientVersionMap const& clientVersionMap, std::map<NetworkAddress, bool> const& clientTLSConfigMap, std::map<NetworkAddress, std::string> const& traceLogGroupMap,
 	ServerCoordinators const& coordinators, std::vector<NetworkAddress> const& incompatibleConnections, Version const& datacenterVersionDifference );
 
 #endif
diff --git a/flow/serialize.h b/flow/serialize.h
index f5754da8b1..7189f124ad 100644
--- a/flow/serialize.h
+++ b/flow/serialize.h
@@ -589,7 +589,7 @@ struct SendBuffer {
 
 struct PacketBuffer : SendBuffer, FastAllocated<PacketBuffer> {
 	int reference_count;
-	enum { DATA_SIZE = 4096 - 28 };
+	enum { DATA_SIZE = 4096 - 28 }; //28 is the size of the PacketBuffer fields
 	uint8_t data[ DATA_SIZE ];
 
 	PacketBuffer() : reference_count(1) {

From c0535c49bbee6deb309d52319fe4ff971483be5c Mon Sep 17 00:00:00 2001
From: Meng Xu <meng_xu@apple.com>
Date: Tue, 26 Feb 2019 16:20:05 -0800
Subject: [PATCH 02/47] Status: TLS client status

Use ClientStatusInfo structure for each network address (client),
instead of passing each status info as a parameter.
---
 fdbserver/ClusterController.actor.cpp | 13 +++++--------
 fdbserver/Status.actor.cpp            | 14 ++++++--------
 fdbserver/Status.h                    |  7 ++++++-
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp
index e7635d03bf..5481779093 100644
--- a/fdbserver/ClusterController.actor.cpp
+++ b/fdbserver/ClusterController.actor.cpp
@@ -92,8 +92,7 @@ public:
 		ProcessIssuesMap clientsWithIssues, workersWithIssues;
 		std::map<NetworkAddress, double> incompatibleConnections;
 		ClientVersionMap clientVersionMap;
-		std::map<NetworkAddress, bool> clientTLSConfigMap; // Does the client has TLS configured
-		std::map<NetworkAddress, std::string> traceLogGroupMap;
+		std::map<NetworkAddress, ClientStatusInfo> clientStatusInfoMap;
 		AsyncTrigger forceMasterFailure;
 		int64_t masterRegistrationCount;
 		bool recoveryStalled;
@@ -1234,9 +1233,7 @@ ACTOR Future<Void> clusterOpenDatabase(
 		db->clientVersionMap[reply.getEndpoint().getPrimaryAddress()] = supportedVersions;
 	}
 
-	db->clientTLSConfigMap[reply.getEndpoint().getPrimaryAddress()] = client_tls_configured;
-
-	db->traceLogGroupMap[reply.getEndpoint().getPrimaryAddress()] = traceLogGroup.toString();
+	db->clientStatusInfoMap[reply.getEndpoint().getPrimaryAddress()] = {traceLogGroup.toString(), client_tls_configured};
 
 	while (db->clientInfo->get().id == knownClientInfoID) {
 		choose {
@@ -1247,8 +1244,7 @@ ACTOR Future<Void> clusterOpenDatabase(
 
 	removeIssue( db->clientsWithIssues, reply.getEndpoint().getPrimaryAddress(), issues, issueID );
 	db->clientVersionMap.erase(reply.getEndpoint().getPrimaryAddress());
-	db->clientTLSConfigMap.erase(reply.getEndpoint().getPrimaryAddress());
-	db->traceLogGroupMap.erase(reply.getEndpoint().getPrimaryAddress());
+	db->clientStatusInfoMap.erase(reply.getEndpoint().getPrimaryAddress());
 
 	reply.send( db->clientInfo->get() );
 	return Void();
@@ -1912,7 +1908,8 @@ ACTOR Future<Void> statusServer(FutureStream< StatusRequest> requests,
 				}
 			}
 
-			state ErrorOr<StatusReply> result = wait(errorOr(clusterGetStatus(self->db.serverInfo, self->cx, workers, self->db.workersWithIssues, self->db.clientsWithIssues, self->db.clientVersionMap, self->db.clientTLSConfigMap, self->db.traceLogGroupMap, coordinators, incompatibleConnections, self->datacenterVersionDifference)));
+			state ErrorOr<StatusReply> result = wait(errorOr(clusterGetStatus(self->db.serverInfo, self->cx, workers, self->db.workersWithIssues, self->db.clientsWithIssues, self->db.clientVersionMap, self->db.clientStatusInfoMap, coordinators, incompatibleConnections, self->datacenterVersionDifference)));
+
 			if (result.isError() && result.getError().code() == error_code_actor_cancelled)
 				throw result.getError();
 
diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp
index 44635c16ff..cb2d5bbb62 100644
--- a/fdbserver/Status.actor.cpp
+++ b/fdbserver/Status.actor.cpp
@@ -840,8 +840,7 @@ ACTOR static Future<JsonBuilderObject> processStatusFetcher(
 }
 
 static JsonBuilderObject clientStatusFetcher(ClientVersionMap clientVersionMap,
-									std::map<NetworkAddress, std::string> traceLogGroupMap,
-									std::map<NetworkAddress, bool> clientTLSConfigMap) {
+									std::map<NetworkAddress, ClientStatusInfo> clientStatusInfoMap) {
 	JsonBuilderObject clientStatus;
 
 	clientStatus["count"] = (int64_t)clientVersionMap.size();
@@ -865,10 +864,10 @@ static JsonBuilderObject clientStatusFetcher(ClientVersionMap clientVersionMap,
 		for(auto client : cv.second) {
 			JsonBuilderObject cli;
 			cli["address"] = client.toString();
-			cli["log_group"] = traceLogGroupMap[client];
+			cli["log_group"] = clientStatusInfoMap[client].traceLogGroup;
 			bool client_tls_configured = false;
-			if (clientTLSConfigMap.find(client) != clientTLSConfigMap.end()) {
-				client_tls_configured = clientTLSConfigMap[client];
+			if (clientStatusInfoMap.find(client) != clientStatusInfoMap.end()) {
+				client_tls_configured = clientStatusInfoMap[client].clientTLSConfigured;
 			}
 			cli["tls_configured"] = client_tls_configured;
 			clients.push_back(cli);
@@ -1816,8 +1815,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
 		ProcessIssuesMap workerIssues,
 		ProcessIssuesMap clientIssues,
 		ClientVersionMap clientVersionMap,
-		std::map<NetworkAddress, bool> clientTLSConfigMap,
-		std::map<NetworkAddress, std::string> traceLogGroupMap,
+		std::map<NetworkAddress, ClientStatusInfo> clientStatusInfoMap,
 		ServerCoordinators coordinators,
 		std::vector<NetworkAddress> incompatibleConnections,
 		Version datacenterVersionDifference )
@@ -2037,7 +2035,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
 
 		JsonBuilderObject processStatus = wait(processStatusFetcher(db, workers, pMetrics, mMetrics, latestError, traceFileOpenErrors, programStarts, processIssues, storageServers, tLogs, proxies, cx, configuration, &status_incomplete_reasons));
 		statusObj["processes"] = processStatus;
-		statusObj["clients"] = clientStatusFetcher(clientVersionMap, traceLogGroupMap, clientTLSConfigMap);
+		statusObj["clients"] = clientStatusFetcher(clientVersionMap, clientStatusInfoMap);
 
 		JsonBuilderArray incompatibleConnectionsArray;
 		for(auto it : incompatibleConnections) {
diff --git a/fdbserver/Status.h b/fdbserver/Status.h
index a923b2b9cc..d076e3885c 100644
--- a/fdbserver/Status.h
+++ b/fdbserver/Status.h
@@ -30,8 +30,13 @@
 typedef std::map< NetworkAddress, std::pair<std::string,UID> > ProcessIssuesMap;
 typedef std::map< NetworkAddress, Standalone<VectorRef<ClientVersionRef>> > ClientVersionMap;
 
+struct ClientStatusInfo {
+	std::string traceLogGroup;
+	bool clientTLSConfigured; // Does client configure its TLS options
+};
+
 Future<StatusReply> clusterGetStatus( Reference<AsyncVar<struct ServerDBInfo>> const& db, Database const& cx, vector<std::pair<WorkerInterface, ProcessClass>> const& workers,
-	ProcessIssuesMap const& workerIssues, ProcessIssuesMap const& clientIssues, ClientVersionMap const& clientVersionMap, std::map<NetworkAddress, bool> const& clientTLSConfigMap, std::map<NetworkAddress, std::string> const& traceLogGroupMap,
+	ProcessIssuesMap const& workerIssues, ProcessIssuesMap const& clientIssues, ClientVersionMap const& clientVersionMap, std::map<NetworkAddress, struct ClientStatusInfo> const& clientStatusInfoMap,
 	ServerCoordinators const& coordinators, std::vector<NetworkAddress> const& incompatibleConnections, Version const& datacenterVersionDifference );
 
 #endif

From b7a52e81e233c10dae30bc501a3c12805f57a38e Mon Sep 17 00:00:00 2001
From: Meng Xu <meng_xu@apple.com>
Date: Tue, 5 Mar 2019 21:00:19 -0800
Subject: [PATCH 03/47] Status: Count connected coordinators per client

A client will always try to connect all coordinators.
This commit let Status track the number of connected coordinators
for each client.

This allows us to do canary in coordinators. For example,
when we switch from non-TLS to TLS, we can switch 1 coordinator
from non-TLS to TLS. This can help check if a client has the ability
to connect through TLS.
We can make the non-TLS to TLS switch for each coordinators
one by one. This avoid the risk of losing connection in the switch.
---
 fdbclient/ClusterInterface.h          |  4 ++--
 fdbclient/ManagementAPI.actor.cpp     |  3 ++-
 fdbclient/MonitorLeader.actor.cpp     | 21 ++++++++++++++-----
 fdbclient/MonitorLeader.h             |  8 ++++----
 fdbclient/NativeAPI.actor.cpp         | 29 ++++++++++++++++-----------
 fdbclient/NativeAPI.actor.h           |  6 +++---
 fdbclient/ReadYourWrites.actor.cpp    |  3 ++-
 fdbclient/Schemas.cpp                 |  2 +-
 fdbclient/StatusClient.actor.cpp      |  3 ++-
 fdbserver/ClusterController.actor.cpp |  8 +++++---
 fdbserver/Status.actor.cpp            |  6 +++---
 fdbserver/Status.h                    |  2 +-
 fdbserver/tester.actor.cpp            |  3 ++-
 fdbserver/worker.actor.cpp            |  3 ++-
 14 files changed, 62 insertions(+), 39 deletions(-)

diff --git a/fdbclient/ClusterInterface.h b/fdbclient/ClusterInterface.h
index a133f269a4..6b45fa2226 100644
--- a/fdbclient/ClusterInterface.h
+++ b/fdbclient/ClusterInterface.h
@@ -119,14 +119,14 @@ struct OpenDatabaseRequest {
 	Arena arena;
 	StringRef issues, traceLogGroup;
 	VectorRef<ClientVersionRef> supportedVersions;
-	bool client_tls_configured;
+	int connectedCoordinatorsNum; // Number of coordinators connected by the client
 	UID knownClientInfoID;
 	ReplyPromise< struct ClientDBInfo > reply;
 
 	template <class Ar>
 	void serialize(Ar& ar) {
 		ASSERT( ar.protocolVersion() >= 0x0FDB00A400040001LL );
-		serializer(ar, issues, supportedVersions, client_tls_configured, traceLogGroup, knownClientInfoID, reply, arena);
+		serializer(ar, issues, supportedVersions, connectedCoordinatorsNum, traceLogGroup, knownClientInfoID, reply, arena);
 	}
 };
 
diff --git a/fdbclient/ManagementAPI.actor.cpp b/fdbclient/ManagementAPI.actor.cpp
index df17d6cc93..14e15c54d0 100644
--- a/fdbclient/ManagementAPI.actor.cpp
+++ b/fdbclient/ManagementAPI.actor.cpp
@@ -1542,7 +1542,8 @@ ACTOR Future<Void> checkDatabaseLock( Reference<ReadYourWritesTransaction> tr, U
 
 ACTOR Future<Void> forceRecovery( Reference<ClusterConnectionFile> clusterFile, Key dcId ) {
 	state Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface(new AsyncVar<Optional<ClusterInterface>>);
-	state Future<Void> leaderMon = monitorLeader<ClusterInterface>(clusterFile, clusterInterface);
+	state Reference<AsyncVar<int>> unused(new AsyncVar<int>);
+	state Future<Void> leaderMon = monitorLeader<ClusterInterface>(clusterFile, clusterInterface, unused);
 
 	loop {
 		choose {
diff --git a/fdbclient/MonitorLeader.actor.cpp b/fdbclient/MonitorLeader.actor.cpp
index 64482da09d..32dac8c5fb 100644
--- a/fdbclient/MonitorLeader.actor.cpp
+++ b/fdbclient/MonitorLeader.actor.cpp
@@ -308,9 +308,15 @@ ClientLeaderRegInterface::ClientLeaderRegInterface( INetwork* local ) {
 	getLeader.makeWellKnownEndpoint( WLTOKEN_CLIENTLEADERREG_GETLEADER, TaskCoordination );
 }
 
-ACTOR Future<Void> monitorNominee( Key key, ClientLeaderRegInterface coord, AsyncTrigger* nomineeChange, Optional<LeaderInfo> *info, int generation ) {
+// Nominee is the worker among all workers that are considered as leader by a coordinator
+// This function contacts a coordinator coord to ask if the worker is considered as a leader (i.e., if the worker
+// is a nominee)
+ACTOR Future<Void> monitorNominee( Key key, ClientLeaderRegInterface coord, AsyncTrigger* nomineeChange, Optional<LeaderInfo> *info, int generation, Reference<AsyncVar<int>> connectedCoordinatorsNum ) {
 	loop {
 		state Optional<LeaderInfo> li = wait( retryBrokenPromise( coord.getLeader, GetLeaderRequest( key, info->present() ? info->get().changeID : UID() ), TaskCoordinationReply ) );
+		if (li.present()) {
+			connectedCoordinatorsNum->set(connectedCoordinatorsNum->get() + 1);
+		}
 		wait( Future<Void>(Void()) ); // Make sure we weren't cancelled
 
 		TraceEvent("GetLeaderReply").suppressFor(1.0).detail("Coordinator", coord.getLeader.getEndpoint().getPrimaryAddress()).detail("Nominee", li.present() ? li.get().changeID : UID()).detail("Generation", generation);
@@ -385,7 +391,8 @@ struct MonitorLeaderInfo {
 	explicit MonitorLeaderInfo( Reference<ClusterConnectionFile> intermediateConnFile ) : intermediateConnFile(intermediateConnFile), hasConnected(false), generation(0) {}
 };
 
-ACTOR Future<MonitorLeaderInfo> monitorLeaderOneGeneration( Reference<ClusterConnectionFile> connFile, Reference<AsyncVar<Value>> outSerializedLeaderInfo, MonitorLeaderInfo info ) {
+// Leader is the process that will be elected by coordinators as the cluster controller
+ACTOR Future<MonitorLeaderInfo> monitorLeaderOneGeneration( Reference<ClusterConnectionFile> connFile, Reference<AsyncVar<Value>> outSerializedLeaderInfo, MonitorLeaderInfo info,  Reference<AsyncVar<int>> connectedCoordinatorsNum) {
 	state ClientCoordinators coordinators( info.intermediateConnFile );
 	state AsyncTrigger nomineeChange;
 	state std::vector<Optional<LeaderInfo>> nominees;
@@ -394,8 +401,9 @@ ACTOR Future<MonitorLeaderInfo> monitorLeaderOneGeneration( Reference<ClusterCon
 	nominees.resize(coordinators.clientLeaderServers.size());
 
 	std::vector<Future<Void>> actors;
+	// Ask all coordinators if the worker is considered as a leader (leader nominee) by the coordinator.
 	for(int i=0; i<coordinators.clientLeaderServers.size(); i++)
-		actors.push_back( monitorNominee( coordinators.clusterKey, coordinators.clientLeaderServers[i], &nomineeChange, &nominees[i], info.generation ) );
+		actors.push_back( monitorNominee( coordinators.clusterKey, coordinators.clientLeaderServers[i], &nomineeChange, &nominees[i], info.generation, connectedCoordinatorsNum) );
 	allActors = waitForAll(actors);
 
 	loop {
@@ -426,11 +434,14 @@ ACTOR Future<MonitorLeaderInfo> monitorLeaderOneGeneration( Reference<ClusterCon
 	}
 }
 
-ACTOR Future<Void> monitorLeaderInternal( Reference<ClusterConnectionFile> connFile, Reference<AsyncVar<Value>> outSerializedLeaderInfo ) {
+ACTOR Future<Void> monitorLeaderInternal( Reference<ClusterConnectionFile> connFile, Reference<AsyncVar<Value>> outSerializedLeaderInfo, Reference<AsyncVar<int>> connectedCoordinatorsNum ) {
 	state MonitorLeaderInfo info(connFile);
 	loop {
-		MonitorLeaderInfo _info = wait( monitorLeaderOneGeneration( connFile, outSerializedLeaderInfo, info) );
+		// set the AsyncVar to 0
+		connectedCoordinatorsNum->set(0);
+		MonitorLeaderInfo _info = wait( monitorLeaderOneGeneration( connFile, outSerializedLeaderInfo, info, connectedCoordinatorsNum) );
 		info = _info;
 		info.generation++;
+
 	}
 }
diff --git a/fdbclient/MonitorLeader.h b/fdbclient/MonitorLeader.h
index c62133d136..87aa37c503 100644
--- a/fdbclient/MonitorLeader.h
+++ b/fdbclient/MonitorLeader.h
@@ -30,19 +30,19 @@
 class ClientCoordinators;
 
 template <class LeaderInterface>
-Future<Void> monitorLeader( Reference<ClusterConnectionFile> const& connFile, Reference<AsyncVar<Optional<LeaderInterface>>> const& outKnownLeader );
+Future<Void> monitorLeader( Reference<ClusterConnectionFile> const& connFile, Reference<AsyncVar<Optional<LeaderInterface>>> const& outKnownLeader, Reference<AsyncVar<int>> connectedCoordinatorsNum );
 // Monitors the given coordination group's leader election process and provides a best current guess
 // of the current leader.  If a leader is elected for long enough and communication with a quorum of
 // coordinators is possible, eventually outKnownLeader will be that leader's interface.
 
 #pragma region Implementation
 
-Future<Void> monitorLeaderInternal( Reference<ClusterConnectionFile> const& connFile, Reference<AsyncVar<Value>> const& outSerializedLeaderInfo );
+Future<Void> monitorLeaderInternal( Reference<ClusterConnectionFile> const& connFile, Reference<AsyncVar<Value>> const& outSerializedLeaderInfo, Reference<AsyncVar<int>> const& connectedCoordinatorsNum  );
 
 template <class LeaderInterface>
-Future<Void> monitorLeader( Reference<ClusterConnectionFile> const& connFile, Reference<AsyncVar<Optional<LeaderInterface>>> const& outKnownLeader ) {
+Future<Void> monitorLeader( Reference<ClusterConnectionFile> const& connFile, Reference<AsyncVar<Optional<LeaderInterface>>> const& outKnownLeader, Reference<AsyncVar<int>> connectedCoordinatorsNum ) {
 	Reference<AsyncVar<Value>> serializedInfo( new AsyncVar<Value> );
-	Future<Void> m = monitorLeaderInternal( connFile, serializedInfo );
+	Future<Void> m = monitorLeaderInternal( connFile, serializedInfo, connectedCoordinatorsNum );
 	return m || asyncDeserialize( serializedInfo, outKnownLeader );
 }
 
diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp
index 49ba9181f7..9f83c9d84d 100644
--- a/fdbclient/NativeAPI.actor.cpp
+++ b/fdbclient/NativeAPI.actor.cpp
@@ -492,14 +492,14 @@ DatabaseContext::DatabaseContext(
 
 DatabaseContext::DatabaseContext( const Error &err ) : deferredError(err), latencies(1000), readLatencies(1000), commitLatencies(1000), GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000) {}
 
-ACTOR static Future<Void> monitorClientInfo( Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface, Reference<ClusterConnectionFile> ccf, Reference<AsyncVar<ClientDBInfo>> outInfo ) {
+ACTOR static Future<Void> monitorClientInfo( Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface, Reference<ClusterConnectionFile> ccf, Reference<AsyncVar<ClientDBInfo>> outInfo, Reference<AsyncVar<int>> connectedCoordinatorsNum ) {
 	try {
 		state Optional<double> incorrectTime;
 		loop {
 			OpenDatabaseRequest req;
 			req.knownClientInfoID = outInfo->get().id;
 			req.supportedVersions = VectorRef<ClientVersionRef>(req.arena, networkOptions.supportedVersions);
-			req.client_tls_configured = tlsOptions.isValid() && tlsOptions->isConfigured(); // Monitor if client TLS is configured
+			req.connectedCoordinatorsNum = connectedCoordinatorsNum->get();
 			req.traceLogGroup = StringRef(req.arena, networkOptions.traceLogGroup);
 
 			ClusterConnectionString fileConnectionString;
@@ -530,6 +530,7 @@ ACTOR static Future<Void> monitorClientInfo( Reference<AsyncVar<Optional<Cluster
 					if(clusterInterface->get().present())
 						TraceEvent("ClientInfo_CCInterfaceChange").detail("CCID", clusterInterface->get().get().id());
 				}
+				when( wait( connectedCoordinatorsNum->onChange() ) ) {}
 			}
 		}
 	} catch( Error& e ) {
@@ -542,10 +543,13 @@ ACTOR static Future<Void> monitorClientInfo( Reference<AsyncVar<Optional<Cluster
 	}
 }
 
+// Create database context and monitor the cluster status;
+// Notify client when cluster info (e.g., cluster controller) changes
 Database DatabaseContext::create(Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface, Reference<ClusterConnectionFile> connFile, LocalityData const& clientLocality) {
-	Reference<Cluster> cluster(new Cluster(connFile, clusterInterface));
+	Reference<AsyncVar<int>> connectedCoordinatorsNum(new AsyncVar<int>());
+	Reference<Cluster> cluster(new Cluster(connFile, clusterInterface, connectedCoordinatorsNum));
 	Reference<AsyncVar<ClientDBInfo>> clientInfo(new AsyncVar<ClientDBInfo>());
-	Future<Void> clientInfoMonitor = monitorClientInfo(clusterInterface, connFile, clientInfo);
+	Future<Void> clientInfoMonitor = monitorClientInfo(clusterInterface, connFile, clientInfo, connectedCoordinatorsNum);
 
 	return Database(new DatabaseContext(cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false));
 }
@@ -710,9 +714,10 @@ Reference<ClusterConnectionFile> DatabaseContext::getConnectionFile() {
 }
 
 Database Database::createDatabase( Reference<ClusterConnectionFile> connFile, int apiVersion, LocalityData const& clientLocality ) {
-	Reference<Cluster> cluster(new Cluster(connFile, apiVersion));
+	Reference<AsyncVar<int>> connectedCoordinatorsNum(new AsyncVar<int>(0)); // Number of connected coordinators for the client
+	Reference<Cluster> cluster(new Cluster(connFile, connectedCoordinatorsNum, apiVersion));
 	Reference<AsyncVar<ClientDBInfo>> clientInfo(new AsyncVar<ClientDBInfo>());
-	Future<Void> clientInfoMonitor = monitorClientInfo(cluster->getClusterInterface(), connFile, clientInfo);
+	Future<Void> clientInfoMonitor = monitorClientInfo(cluster->getClusterInterface(), connFile, clientInfo, connectedCoordinatorsNum);
 
 	return Database( new DatabaseContext( cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false, apiVersion ) );
 }
@@ -724,19 +729,19 @@ Database Database::createDatabase( std::string connFileName, int apiVersion, Loc
 
 extern uint32_t determinePublicIPAutomatically( ClusterConnectionString const& ccs );
 
-Cluster::Cluster( Reference<ClusterConnectionFile> connFile, int apiVersion ) 
+Cluster::Cluster( Reference<ClusterConnectionFile> connFile,  Reference<AsyncVar<int>> connectedCoordinatorsNum, int apiVersion )
 	: clusterInterface(new AsyncVar<Optional<ClusterInterface>>())
 {
-	init(connFile, true, apiVersion);
+	init(connFile, true, connectedCoordinatorsNum, apiVersion);
 }
 
-Cluster::Cluster( Reference<ClusterConnectionFile> connFile, Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface) 
+Cluster::Cluster( Reference<ClusterConnectionFile> connFile,  Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface, Reference<AsyncVar<int>> connectedCoordinatorsNum)
 	: clusterInterface(clusterInterface)
 {
-	init(connFile, true);
+	init(connFile, true, connectedCoordinatorsNum);
 }
 
-void Cluster::init( Reference<ClusterConnectionFile> connFile, bool startClientInfoMonitor, int apiVersion ) {
+void Cluster::init( Reference<ClusterConnectionFile> connFile, bool startClientInfoMonitor, Reference<AsyncVar<int>> connectedCoordinatorsNum, int apiVersion ) {
 	connectionFile = connFile;
 	connected = clusterInterface->onChange();
 
@@ -770,7 +775,7 @@ void Cluster::init( Reference<ClusterConnectionFile> connFile, bool startClientI
 			uncancellable( recurring( &systemMonitor, CLIENT_KNOBS->SYSTEM_MONITOR_INTERVAL, TaskFlushTrace ) );
 		}
 
-		leaderMon = monitorLeader( connFile, clusterInterface );
+		leaderMon = monitorLeader( connFile, clusterInterface, connectedCoordinatorsNum );
 		failMon = failureMonitorClient( clusterInterface, false );
 	}
 }
diff --git a/fdbclient/NativeAPI.actor.h b/fdbclient/NativeAPI.actor.h
index f5ba76e385..5ccbd6c9bb 100644
--- a/fdbclient/NativeAPI.actor.h
+++ b/fdbclient/NativeAPI.actor.h
@@ -115,8 +115,8 @@ void stopNetwork();
  */
 class Cluster : public ReferenceCounted<Cluster>, NonCopyable {
 public:
-	Cluster(Reference<ClusterConnectionFile> connFile, int apiVersion=Database::API_VERSION_LATEST);
-	Cluster(Reference<ClusterConnectionFile> connFile, Reference<AsyncVar<Optional<struct ClusterInterface>>> clusterInterface);
+	Cluster(Reference<ClusterConnectionFile> connFile,  Reference<AsyncVar<int>> connectedCoordinatorsNum, int apiVersion=Database::API_VERSION_LATEST);
+	Cluster(Reference<ClusterConnectionFile> connFile, Reference<AsyncVar<Optional<struct ClusterInterface>>> clusterInterface, Reference<AsyncVar<int>> connectedCoordinatorsNum);
 
 	~Cluster();
 
@@ -126,7 +126,7 @@ public:
 	Future<Void> onConnected();
 
 private: 
-	void init(Reference<ClusterConnectionFile> connFile, bool startClientInfoMonitor, int apiVersion=Database::API_VERSION_LATEST);
+	void init(Reference<ClusterConnectionFile> connFile, bool startClientInfoMonitor, Reference<AsyncVar<int>> connectedCoornidatorsNum, int apiVersion=Database::API_VERSION_LATEST);
 
 	Reference<AsyncVar<Optional<struct ClusterInterface>>> clusterInterface;
 	Reference<ClusterConnectionFile> connectionFile;
diff --git a/fdbclient/ReadYourWrites.actor.cpp b/fdbclient/ReadYourWrites.actor.cpp
index b7240329db..7a77a1cd8a 100644
--- a/fdbclient/ReadYourWrites.actor.cpp
+++ b/fdbclient/ReadYourWrites.actor.cpp
@@ -1159,7 +1159,8 @@ ACTOR Future<Optional<Value>> getJSON(Reference<ClusterConnectionFile> clusterFi
 
 ACTOR Future<Standalone<RangeResultRef>> getWorkerInterfaces (Reference<ClusterConnectionFile> clusterFile){
 	state Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface(new AsyncVar<Optional<ClusterInterface>>);
-	state Future<Void> leaderMon = monitorLeader<ClusterInterface>(clusterFile, clusterInterface);
+	Reference<AsyncVar<int>> unused_connectedCoordinatorsNum(new AsyncVar<int>());
+	state Future<Void> leaderMon = monitorLeader<ClusterInterface>(clusterFile, clusterInterface, unused_connectedCoordinatorsNum);
 
 	loop{
 		choose {
diff --git a/fdbclient/Schemas.cpp b/fdbclient/Schemas.cpp
index b387f7b7df..2b2cc448ec 100644
--- a/fdbclient/Schemas.cpp
+++ b/fdbclient/Schemas.cpp
@@ -279,7 +279,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
                      {
                          "address":"127.0.0.1:9898",
                          "log_group":"default",
-                         "tls_configured":true
+                         "connected_coordinators":2
                      }
                  ],
                  "count" : 1,
diff --git a/fdbclient/StatusClient.actor.cpp b/fdbclient/StatusClient.actor.cpp
index 9cd7358c07..84f51fb453 100644
--- a/fdbclient/StatusClient.actor.cpp
+++ b/fdbclient/StatusClient.actor.cpp
@@ -463,11 +463,12 @@ ACTOR Future<StatusObject> statusFetcherImpl( Reference<ClusterConnectionFile> f
 	state bool quorum_reachable = false;
 	state int coordinatorsFaultTolerance = 0;
 	state Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface(new AsyncVar<Optional<ClusterInterface>>);
+	state Reference<AsyncVar<int>> connectedCoordinatorsNum(new AsyncVar<int>(0));
 
 	try {
 		state int64_t clientTime = time(0);
 
-		state Future<Void> leaderMon = monitorLeader<ClusterInterface>(f, clusterInterface);
+		state Future<Void> leaderMon = monitorLeader<ClusterInterface>(f, clusterInterface, connectedCoordinatorsNum);
 
 		StatusObject _statusObjClient = wait(clientStatusFetcher(f, &clientMessages, &quorum_reachable, &coordinatorsFaultTolerance));
 		statusObjClient = _statusObjClient;
diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp
index 5481779093..c56494d8ed 100644
--- a/fdbserver/ClusterController.actor.cpp
+++ b/fdbserver/ClusterController.actor.cpp
@@ -93,6 +93,7 @@ public:
 		std::map<NetworkAddress, double> incompatibleConnections;
 		ClientVersionMap clientVersionMap;
 		std::map<NetworkAddress, ClientStatusInfo> clientStatusInfoMap;
+		int connectedCoordinatorsNum; // Number of connected coordinators
 		AsyncTrigger forceMasterFailure;
 		int64_t masterRegistrationCount;
 		bool recoveryStalled;
@@ -1221,7 +1222,7 @@ ACTOR Future<Void> clusterOpenDatabase(
 	UID knownClientInfoID,
 	std::string issues,
 	Standalone<VectorRef<ClientVersionRef>> supportedVersions,
-	bool client_tls_configured,
+	int connectedCoordinatorsNum,
 	Standalone<StringRef> traceLogGroup,
 	ReplyPromise<ClientDBInfo> reply)
 {
@@ -1233,7 +1234,8 @@ ACTOR Future<Void> clusterOpenDatabase(
 		db->clientVersionMap[reply.getEndpoint().getPrimaryAddress()] = supportedVersions;
 	}
 
-	db->clientStatusInfoMap[reply.getEndpoint().getPrimaryAddress()] = {traceLogGroup.toString(), client_tls_configured};
+
+	db->clientStatusInfoMap[reply.getEndpoint().getPrimaryAddress()] = {traceLogGroup.toString(), connectedCoordinatorsNum};
 
 	while (db->clientInfo->get().id == knownClientInfoID) {
 		choose {
@@ -2404,7 +2406,7 @@ ACTOR Future<Void> clusterControllerCore( ClusterControllerFullInterface interf,
 			return Void();
 		}
 		when( OpenDatabaseRequest req = waitNext( interf.clientInterface.openDatabase.getFuture() ) ) {
-			self.addActor.send( clusterOpenDatabase( &self.db, req.knownClientInfoID, req.issues.toString(), req.supportedVersions, req.client_tls_configured, req.traceLogGroup, req.reply ) );
+			self.addActor.send( clusterOpenDatabase( &self.db, req.knownClientInfoID, req.issues.toString(), req.supportedVersions, req.connectedCoordinatorsNum, req.traceLogGroup, req.reply ) );
 		}
 		when( RecruitFromConfigurationRequest req = waitNext( interf.recruitFromConfiguration.getFuture() ) ) {
 			self.addActor.send( clusterRecruitFromConfiguration( &self, req ) );
diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp
index cb2d5bbb62..17b068d2a2 100644
--- a/fdbserver/Status.actor.cpp
+++ b/fdbserver/Status.actor.cpp
@@ -865,11 +865,11 @@ static JsonBuilderObject clientStatusFetcher(ClientVersionMap clientVersionMap,
 			JsonBuilderObject cli;
 			cli["address"] = client.toString();
 			cli["log_group"] = clientStatusInfoMap[client].traceLogGroup;
-			bool client_tls_configured = false;
+			int connectedCoordinatorsNum = 0;
 			if (clientStatusInfoMap.find(client) != clientStatusInfoMap.end()) {
-				client_tls_configured = clientStatusInfoMap[client].clientTLSConfigured;
+				connectedCoordinatorsNum = clientStatusInfoMap[client].connectedCoordinatorsNum;
 			}
-			cli["tls_configured"] = client_tls_configured;
+			cli["connected_coordinators"] = (int)connectedCoordinatorsNum;
 			clients.push_back(cli);
 		}
 
diff --git a/fdbserver/Status.h b/fdbserver/Status.h
index d076e3885c..1bc8acc916 100644
--- a/fdbserver/Status.h
+++ b/fdbserver/Status.h
@@ -32,7 +32,7 @@ typedef std::map< NetworkAddress, Standalone<VectorRef<ClientVersionRef>> > Clie
 
 struct ClientStatusInfo {
 	std::string traceLogGroup;
-	bool clientTLSConfigured; // Does client configure its TLS options
+	int connectedCoordinatorsNum;
 };
 
 Future<StatusReply> clusterGetStatus( Reference<AsyncVar<struct ServerDBInfo>> const& db, Database const& cx, vector<std::pair<WorkerInterface, ProcessClass>> const& workers,
diff --git a/fdbserver/tester.actor.cpp b/fdbserver/tester.actor.cpp
index 21af2fc02b..33861dfbb6 100644
--- a/fdbserver/tester.actor.cpp
+++ b/fdbserver/tester.actor.cpp
@@ -1121,8 +1121,9 @@ ACTOR Future<Void> runTests( Reference<ClusterConnectionFile> connFile, test_typ
 	state vector<TestSpec> testSpecs;
 	Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> cc( new AsyncVar<Optional<ClusterControllerFullInterface>> );
 	Reference<AsyncVar<Optional<ClusterInterface>>> ci( new AsyncVar<Optional<ClusterInterface>> );
+	Reference<AsyncVar<int>> connectedCoordinatorsNum( new AsyncVar<int>(0) );
 	vector<Future<Void>> actors;
-	actors.push_back( reportErrors(monitorLeader( connFile, cc ), "MonitorLeader") );
+	actors.push_back( reportErrors(monitorLeader( connFile, cc, connectedCoordinatorsNum ), "MonitorLeader") );
 	actors.push_back( reportErrors(extractClusterInterface( cc,ci ),"ExtractClusterInterface") );
 	actors.push_back( reportErrors(failureMonitorClient( ci, false ),"FailureMonitorClient") );
 
diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp
index c4b0dd0def..778eba1503 100644
--- a/fdbserver/worker.actor.cpp
+++ b/fdbserver/worker.actor.cpp
@@ -1215,10 +1215,11 @@ ACTOR Future<Void> fdbd(
 		Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> cc(new AsyncVar<Optional<ClusterControllerFullInterface>>);
 		Reference<AsyncVar<Optional<ClusterInterface>>> ci(new AsyncVar<Optional<ClusterInterface>>);
 		Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo(new AsyncVar<ClusterControllerPriorityInfo>(getCCPriorityInfo(fitnessFilePath, processClass)));
+		Reference<AsyncVar<int>> unused(new AsyncVar<int>);
 		Promise<Void> recoveredDiskFiles;
 
 		v.push_back(reportErrors(monitorAndWriteCCPriorityInfo(fitnessFilePath, asyncPriorityInfo), "MonitorAndWriteCCPriorityInfo"));
-		v.push_back( reportErrors( processClass == ProcessClass::TesterClass ? monitorLeader( connFile, cc ) : clusterController( connFile, cc , asyncPriorityInfo, recoveredDiskFiles.getFuture(), localities ), "ClusterController") );
+		v.push_back( reportErrors( processClass == ProcessClass::TesterClass ? monitorLeader( connFile, cc, unused ) : clusterController( connFile, cc , asyncPriorityInfo, recoveredDiskFiles.getFuture(), localities ), "ClusterController") );
 		v.push_back( reportErrors(extractClusterInterface( cc, ci ), "ExtractClusterInterface") );
 		v.push_back( reportErrors(failureMonitorClient( ci, true ), "FailureMonitorClient") );
 		v.push_back( reportErrorsExcept(workerServer(connFile, cc, localities, asyncPriorityInfo, processClass, dataFolder, memoryLimit, metricsConnFile, metricsPrefix, recoveredDiskFiles), "WorkerServer", UID(), &normalWorkerErrors()) );

From 820548223ae4c20c5cfe96b7904b855bc0e69fd3 Mon Sep 17 00:00:00 2001
From: Meng Xu <meng_xu@apple.com>
Date: Tue, 5 Mar 2019 21:29:06 -0800
Subject: [PATCH 04/47] Status: connected_coordinators misc minor changes

Change the rst document file;
Change the coding style to be consistent with the nearby code;
Ensure we always initilize the connectedCoordinatesNum to 0
even when the variable is not used.
---
 documentation/sphinx/source/mr-status.rst | 2 +-
 fdbclient/NativeAPI.actor.cpp             | 2 +-
 fdbclient/ReadYourWrites.actor.cpp        | 4 ++--
 fdbrpc/FlowTransport.actor.cpp            | 4 +---
 fdbrpc/TLSConnection.h                    | 6 ------
 fdbserver/ClusterController.actor.cpp     | 1 -
 fdbserver/Status.actor.cpp                | 7 ++-----
 fdbserver/worker.actor.cpp                | 2 +-
 8 files changed, 8 insertions(+), 20 deletions(-)

diff --git a/documentation/sphinx/source/mr-status.rst b/documentation/sphinx/source/mr-status.rst
index 251f23d556..65be3cbf8a 100644
--- a/documentation/sphinx/source/mr-status.rst
+++ b/documentation/sphinx/source/mr-status.rst
@@ -81,7 +81,7 @@ The following format informally describes the JSON containing the status data. T
               {
                 "address": "127.0.0.1:1234",
                 "log_group": "default"
-                "tls_configured": true
+                "connected_coordinators": 2
               }
             ],
             "count": 1,
diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp
index 9f83c9d84d..c9154ca9fa 100644
--- a/fdbclient/NativeAPI.actor.cpp
+++ b/fdbclient/NativeAPI.actor.cpp
@@ -546,7 +546,7 @@ ACTOR static Future<Void> monitorClientInfo( Reference<AsyncVar<Optional<Cluster
 // Create database context and monitor the cluster status;
 // Notify client when cluster info (e.g., cluster controller) changes
 Database DatabaseContext::create(Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface, Reference<ClusterConnectionFile> connFile, LocalityData const& clientLocality) {
-	Reference<AsyncVar<int>> connectedCoordinatorsNum(new AsyncVar<int>());
+	Reference<AsyncVar<int>> connectedCoordinatorsNum(new AsyncVar<int>(0));
 	Reference<Cluster> cluster(new Cluster(connFile, clusterInterface, connectedCoordinatorsNum));
 	Reference<AsyncVar<ClientDBInfo>> clientInfo(new AsyncVar<ClientDBInfo>());
 	Future<Void> clientInfoMonitor = monitorClientInfo(clusterInterface, connFile, clientInfo, connectedCoordinatorsNum);
diff --git a/fdbclient/ReadYourWrites.actor.cpp b/fdbclient/ReadYourWrites.actor.cpp
index 7a77a1cd8a..8b4c308141 100644
--- a/fdbclient/ReadYourWrites.actor.cpp
+++ b/fdbclient/ReadYourWrites.actor.cpp
@@ -1159,8 +1159,8 @@ ACTOR Future<Optional<Value>> getJSON(Reference<ClusterConnectionFile> clusterFi
 
 ACTOR Future<Standalone<RangeResultRef>> getWorkerInterfaces (Reference<ClusterConnectionFile> clusterFile){
 	state Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface(new AsyncVar<Optional<ClusterInterface>>);
-	Reference<AsyncVar<int>> unused_connectedCoordinatorsNum(new AsyncVar<int>());
-	state Future<Void> leaderMon = monitorLeader<ClusterInterface>(clusterFile, clusterInterface, unused_connectedCoordinatorsNum);
+	Reference<AsyncVar<int>> unused(new AsyncVar<int>(0));
+	state Future<Void> leaderMon = monitorLeader<ClusterInterface>(clusterFile, clusterInterface, unused);
 
 	loop{
 		choose {
diff --git a/fdbrpc/FlowTransport.actor.cpp b/fdbrpc/FlowTransport.actor.cpp
index 912a147945..2f00e486f8 100644
--- a/fdbrpc/FlowTransport.actor.cpp
+++ b/fdbrpc/FlowTransport.actor.cpp
@@ -649,7 +649,6 @@ ACTOR static Future<Void> connectionReader(
 
 					uint64_t connectionId = 0;
 					int32_t connectPacketSize = p->minimumSize();
-					bool client_tls_capable = false;
 					if ( unprocessed_end-unprocessed_begin >= connectPacketSize ) {
 						if(p->protocolVersion >= 0x0FDB00A444020001) {
 							connectionId = p->connectionId;
@@ -689,8 +688,7 @@ ACTOR static Future<Void> connectionReader(
 							TraceEvent("ConnectionEstablished", conn->getDebugID())
 								.suppressFor(1.0)
 								.detail("Peer", conn->getPeerAddress())
-								.detail("ConnectionId", connectionId)
-								.detail("ConnectPacketSize", connectPacketSize);
+								.detail("ConnectionId", connectionId);
 						}
 
 						if(connectionId > 1) {
diff --git a/fdbrpc/TLSConnection.h b/fdbrpc/TLSConnection.h
index aab2c5228c..f8395b66a0 100644
--- a/fdbrpc/TLSConnection.h
+++ b/fdbrpc/TLSConnection.h
@@ -89,12 +89,6 @@ struct TLSOptions : ReferenceCounted<TLSOptions> {
 	Reference<ITLSPolicy> get_policy(PolicyType type);
 	bool enabled();
 
-	bool isConfigured() const {
-		return policyInfo.ca_path.size() || policyInfo.ca_contents.size() || policyInfo.cert_path.size() ||
-		       policyInfo.cert_contents.size() || policyInfo.key_path.size() || policyInfo.key_contents.size();
-	}
-
-
 	struct PolicyInfo {
 		std::string ca_path;
 		Standalone<StringRef> ca_contents;
diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp
index c56494d8ed..63d8b828f1 100644
--- a/fdbserver/ClusterController.actor.cpp
+++ b/fdbserver/ClusterController.actor.cpp
@@ -93,7 +93,6 @@ public:
 		std::map<NetworkAddress, double> incompatibleConnections;
 		ClientVersionMap clientVersionMap;
 		std::map<NetworkAddress, ClientStatusInfo> clientStatusInfoMap;
-		int connectedCoordinatorsNum; // Number of connected coordinators
 		AsyncTrigger forceMasterFailure;
 		int64_t masterRegistrationCount;
 		bool recoveryStalled;
diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp
index 17b068d2a2..189a879cbc 100644
--- a/fdbserver/Status.actor.cpp
+++ b/fdbserver/Status.actor.cpp
@@ -864,12 +864,9 @@ static JsonBuilderObject clientStatusFetcher(ClientVersionMap clientVersionMap,
 		for(auto client : cv.second) {
 			JsonBuilderObject cli;
 			cli["address"] = client.toString();
+			ASSERT(clientStatusInfoMap.find(client) != clientStatusInfoMap.end());
 			cli["log_group"] = clientStatusInfoMap[client].traceLogGroup;
-			int connectedCoordinatorsNum = 0;
-			if (clientStatusInfoMap.find(client) != clientStatusInfoMap.end()) {
-				connectedCoordinatorsNum = clientStatusInfoMap[client].connectedCoordinatorsNum;
-			}
-			cli["connected_coordinators"] = (int)connectedCoordinatorsNum;
+			cli["connected_coordinators"]  = (int) clientStatusInfoMap[client].connectedCoordinatorsNum;
 			clients.push_back(cli);
 		}
 
diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp
index 778eba1503..a61e10a83c 100644
--- a/fdbserver/worker.actor.cpp
+++ b/fdbserver/worker.actor.cpp
@@ -1215,7 +1215,7 @@ ACTOR Future<Void> fdbd(
 		Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> cc(new AsyncVar<Optional<ClusterControllerFullInterface>>);
 		Reference<AsyncVar<Optional<ClusterInterface>>> ci(new AsyncVar<Optional<ClusterInterface>>);
 		Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo(new AsyncVar<ClusterControllerPriorityInfo>(getCCPriorityInfo(fitnessFilePath, processClass)));
-		Reference<AsyncVar<int>> unused(new AsyncVar<int>);
+		Reference<AsyncVar<int>> unused(new AsyncVar<int>(0));
 		Promise<Void> recoveredDiskFiles;
 
 		v.push_back(reportErrors(monitorAndWriteCCPriorityInfo(fitnessFilePath, asyncPriorityInfo), "MonitorAndWriteCCPriorityInfo"));

From ca8bbad657648f3f54e1246e33716cda1b221ce2 Mon Sep 17 00:00:00 2001
From: Stephen Atherton <stevea@apple.com>
Date: Wed, 6 Mar 2019 14:14:06 -0800
Subject: [PATCH 05/47] Added --json option to fdbbackup describe.  Also added
 expired percentage indicator to snapshot details.

---
 fdbbackup/backup.actor.cpp          | 13 ++++--
 fdbclient/BackupContainer.actor.cpp | 61 ++++++++++++++++++++++++++++-
 fdbclient/BackupContainer.h         | 16 ++++++++
 3 files changed, 84 insertions(+), 6 deletions(-)

diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp
index d4fc79cc09..0fe25cc51b 100644
--- a/fdbbackup/backup.actor.cpp
+++ b/fdbbackup/backup.actor.cpp
@@ -94,7 +94,7 @@ enum {
 	OPT_EXPIRE_BEFORE_VERSION, OPT_EXPIRE_BEFORE_DATETIME, OPT_EXPIRE_DELETE_BEFORE_DAYS,
 	OPT_EXPIRE_RESTORABLE_AFTER_VERSION, OPT_EXPIRE_RESTORABLE_AFTER_DATETIME, OPT_EXPIRE_MIN_RESTORABLE_DAYS,
 	OPT_BASEURL, OPT_BLOB_CREDENTIALS, OPT_DESCRIBE_DEEP, OPT_DESCRIBE_TIMESTAMPS,
-	OPT_DUMP_BEGIN, OPT_DUMP_END,
+	OPT_DUMP_BEGIN, OPT_DUMP_END, OPT_JSON,
 
 	// Backup and Restore constants
 	OPT_TAGNAME, OPT_BACKUPKEYS, OPT_WAITFORDONE,
@@ -433,6 +433,7 @@ CSimpleOpt::SOption g_rgBackupDescribeOptions[] = {
 	{ OPT_KNOB,            "--knob_",          SO_REQ_SEP },
 	{ OPT_DESCRIBE_DEEP,   "--deep",           SO_NONE },
 	{ OPT_DESCRIBE_TIMESTAMPS, "--version_timestamps", SO_NONE },
+	{ OPT_JSON,            "--json",           SO_NONE},
 #ifndef TLS_DISABLED
 	TLS_OPTION_FLAGS
 #endif
@@ -2117,13 +2118,13 @@ ACTOR Future<Void> deleteBackupContainer(const char *name, std::string destinati
 	return Void();
 }
 
-ACTOR Future<Void> describeBackup(const char *name, std::string destinationContainer, bool deep, Optional<Database> cx) {
+ACTOR Future<Void> describeBackup(const char *name, std::string destinationContainer, bool deep, Optional<Database> cx, bool json) {
 	try {
 		Reference<IBackupContainer> c = openBackupContainer(name, destinationContainer);
 		state BackupDescription desc = wait(c->describeBackup(deep));
 		if(cx.present())
 			wait(desc.resolveVersionTimes(cx.get()));
-		printf("%s\n", desc.toString().c_str());
+		printf("%s\n", (json ? desc.toJSON() : desc.toString()).c_str());
 	}
 	catch (Error& e) {
 		if(e.code() == error_code_actor_cancelled)
@@ -2547,6 +2548,7 @@ int main(int argc, char* argv[]) {
 		Version dumpEnd = std::numeric_limits<Version>::max();
 		std::string restoreClusterFileDest;
 		std::string restoreClusterFileOrig;
+		bool jsonOutput = false;
 
 		if( argc == 1 ) {
 			printUsage(programExe, false);
@@ -2844,6 +2846,9 @@ int main(int argc, char* argv[]) {
 				case OPT_DUMP_END:
 					dumpEnd = parseVersion(args->OptionArg());
 					break;
+				case OPT_JSON:
+					jsonOutput = true;
+					break;
 			}
 		}
 
@@ -3200,7 +3205,7 @@ int main(int argc, char* argv[]) {
 					return FDB_EXIT_ERROR;
 
 				// Only pass database optionDatabase Describe will lookup version timestamps if a cluster file was given, but quietly skip them if not.
-				f = stopAfter( describeBackup(argv[0], destinationContainer, describeDeep, describeTimestamps ? Optional<Database>(db) : Optional<Database>()) );
+				f = stopAfter( describeBackup(argv[0], destinationContainer, describeDeep, describeTimestamps ? Optional<Database>(db) : Optional<Database>(), jsonOutput) );
 				break;
 
 			case BACKUP_LIST:
diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp
index c89df6e244..2ec4d6fa12 100644
--- a/fdbclient/BackupContainer.actor.cpp
+++ b/fdbclient/BackupContainer.actor.cpp
@@ -33,6 +33,7 @@
 #include <algorithm>
 #include <time.h>
 #include "flow/actorcompiler.h" // has to be last include
+#include "JsonBuilder.h"
 
 namespace IBackupFile_impl {
 
@@ -142,8 +143,8 @@ std::string BackupDescription::toString() const {
 	};
 
 	for(const KeyspaceSnapshotFile &m : snapshots) {
-		info.append(format("Snapshot:  startVersion=%s  endVersion=%s  totalBytes=%lld  restorable=%s\n",
-			formatVersion(m.beginVersion).c_str(), formatVersion(m.endVersion).c_str(), m.totalSize, m.restorable.orDefault(false) ? "true" : "false"));
+		info.append(format("Snapshot:  startVersion=%s  endVersion=%s  totalBytes=%lld  restorable=%s  expiredPct=%.2f\n",
+			formatVersion(m.beginVersion).c_str(), formatVersion(m.endVersion).c_str(), m.totalSize, m.restorable.orDefault(false) ? "true" : "false", m.expiredPct(expiredEndVersion)));
 	}
 
 	info.append(format("SnapshotBytes: %lld\n", snapshotBytes));
@@ -169,6 +170,62 @@ std::string BackupDescription::toString() const {
 	return info;
 }
 
+std::string BackupDescription::toJSON() const {
+	JsonBuilderObject doc;
+
+	doc.setKey("URL", url.c_str());
+	doc.setKey("Restorable", maxRestorableVersion.present());
+
+	auto formatVersion = [&](Version v) {
+		JsonBuilderObject doc;
+		doc.setKey("Version", v);
+		if(!versionTimeMap.empty()) {
+			auto i = versionTimeMap.find(v);
+			if(i != versionTimeMap.end())
+				doc.setKey("Timestamp", formatTime(i->second));
+		}
+		else if(maxLogEnd.present()) {
+			double days = double(v - maxLogEnd.get()) / (CLIENT_KNOBS->CORE_VERSIONSPERSECOND * 24 * 60 * 60);
+			doc.setKey("RelativeDays", days);
+		}
+		return doc;
+	};
+
+	JsonBuilderArray snapshotsArray;
+	for(const KeyspaceSnapshotFile &m : snapshots) {
+		JsonBuilderObject snapshotDoc;
+		snapshotDoc.setKey("StartVersion", formatVersion(m.beginVersion));
+		snapshotDoc.setKey("EndVersion", formatVersion(m.endVersion));
+		snapshotDoc.setKey("Restorable", m.restorable.orDefault(false));
+		snapshotDoc.setKey("TotalBytes", m.totalSize);
+		snapshotDoc.setKey("PercentageExpired", m.expiredPct(expiredEndVersion));
+		snapshotsArray.push_back(snapshotDoc);
+	}
+	doc.setKey("Snapshots", snapshotsArray);
+
+	doc.setKey("TotalSnapshotBytes", snapshotBytes);
+
+	if(expiredEndVersion.present())
+		doc.setKey("ExpiredEndVersion", formatVersion(expiredEndVersion.get()));
+	if(unreliableEndVersion.present())
+		doc.setKey("UnreliableEndVersion", formatVersion(unreliableEndVersion.get()));
+	if(minLogBegin.present())
+		doc.setKey("MinLogBeginVersion", formatVersion(minLogBegin.get()));
+	if(contiguousLogEnd.present())
+		doc.setKey("ContiguousLogEndVersion", formatVersion(contiguousLogEnd.get()));
+	if(maxLogEnd.present())
+		doc.setKey("MaxLogEndVersion", formatVersion(maxLogEnd.get()));
+	if(minRestorableVersion.present())
+		doc.setKey("MinRestorableVersion", formatVersion(minRestorableVersion.get()));
+	if(maxRestorableVersion.present())
+		doc.setKey("MaxRestorableVersion", formatVersion(maxRestorableVersion.get()));
+
+	if(!extendedDetail.empty())
+		doc.setKey("ExtendedDetail", extendedDetail);
+
+	return doc.getJson();
+}
+
 /* BackupContainerFileSystem implements a backup container which stores files in a nested folder structure.
  * Inheritors must only defined methods for writing, reading, deleting, sizing, and listing files.
  *
diff --git a/fdbclient/BackupContainer.h b/fdbclient/BackupContainer.h
index bb2e397f5a..552223cc25 100644
--- a/fdbclient/BackupContainer.h
+++ b/fdbclient/BackupContainer.h
@@ -89,6 +89,21 @@ struct KeyspaceSnapshotFile {
 	std::string fileName;
 	int64_t totalSize;
 	Optional<bool> restorable;  // Whether or not the snapshot can be used in a restore, if known
+	bool isSingleVersion() const {
+		return beginVersion == endVersion;
+	}
+	double expiredPct(Optional<Version> expiredEnd) const {
+		double pctExpired = 0;
+		if(expiredEnd.present() && expiredEnd.get() > beginVersion) {
+			if(isSingleVersion()) {
+				pctExpired = 1;
+			}
+			else {
+				pctExpired = double(std::min(endVersion, expiredEnd.get()) - beginVersion) / (endVersion - beginVersion);
+			}
+		}
+		return pctExpired * 100;
+	}
 
 	// Order by beginVersion, break ties with endVersion
 	bool operator< (const KeyspaceSnapshotFile &rhs) const {
@@ -132,6 +147,7 @@ struct BackupDescription {
 	std::map<Version, int64_t> versionTimeMap;
 
 	std::string toString() const;
+	std::string toJSON() const;
 };
 
 struct RestorableFileSet {

From 5a1ba1bb2473c9951b4c393b7e568dac14b2db2b Mon Sep 17 00:00:00 2001
From: Meng Xu <meng_xu@apple.com>
Date: Wed, 6 Mar 2019 16:59:25 -0800
Subject: [PATCH 06/47] ReleaseNote: Add new info in status

Add in release note:
Show the number of connected coordinators per client in JSON status
---
 documentation/sphinx/source/release-notes.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst
index fa83fc2875..4074b1481a 100644
--- a/documentation/sphinx/source/release-notes.rst
+++ b/documentation/sphinx/source/release-notes.rst
@@ -9,6 +9,7 @@ Features
 --------
 Improved replication mechanism, a new hierarchical replication technique that further significantly reduces the frequency of data loss events even when multiple machines (e.g., fault-tolerant zones in the current code) permanently fail at the same time.  `(PR #964) <https://github.com/apple/foundationdb/pull/964>`.
 
+* Show the number of connected coordinators per client in JSON status `(PR #1222) <https://github.com/apple/foundationdb/pull/1222>`_
 
 * Get read version, read, and commit requests are counted and aggregated by server-side latency in configurable latency bands and output in JSON status. `(PR #1084) <https://github.com/apple/foundationdb/pull/1084>`_
 * Added configuration option to choose log spilling implementation `(PR #1160) <https://github.com/apple/foundationdb/pull/1160>`_

From 1399aee532bed936ecc7504939358e2471860f19 Mon Sep 17 00:00:00 2001
From: Stephen Atherton <stevea@apple.com>
Date: Wed, 6 Mar 2019 21:32:46 -0800
Subject: [PATCH 07/47] Added --json option to fdbbackup status.

---
 fdbbackup/backup.actor.cpp          |   7 +-
 fdbclient/BackupAgent.actor.h       |   1 +
 fdbclient/FileBackupAgent.actor.cpp | 164 ++++++++++++++++++++++++++++
 3 files changed, 169 insertions(+), 3 deletions(-)

diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp
index 0fe25cc51b..5ecf6f2e55 100644
--- a/fdbbackup/backup.actor.cpp
+++ b/fdbbackup/backup.actor.cpp
@@ -214,6 +214,7 @@ CSimpleOpt::SOption g_rgBackupStatusOptions[] = {
 	{ OPT_HELP,            "-h",               SO_NONE },
 	{ OPT_HELP,            "--help",           SO_NONE },
 	{ OPT_DEVHELP,         "--dev-help",       SO_NONE },
+	{ OPT_JSON,            "--json",           SO_NONE},
 #ifndef TLS_DISABLED
 	TLS_OPTION_FLAGS
 #endif
@@ -1703,12 +1704,12 @@ ACTOR Future<Void> statusDBBackup(Database src, Database dest, std::string tagNa
 	return Void();
 }
 
-ACTOR Future<Void> statusBackup(Database db, std::string tagName, bool showErrors) {
+ACTOR Future<Void> statusBackup(Database db, std::string tagName, bool showErrors, bool json) {
 	try
 	{
 		state FileBackupAgent backupAgent;
 
-		std::string	statusText = wait(backupAgent.getStatus(db, showErrors, tagName));
+		std::string statusText = wait(json ? backupAgent.getStatusJSON(db, tagName) : backupAgent.getStatus(db, showErrors, tagName));
 		printf("%s\n", statusText.c_str());
 	}
 	catch (Error& e) {
@@ -3150,7 +3151,7 @@ int main(int argc, char* argv[]) {
 			case BACKUP_STATUS:
 				if(!initCluster())
 					return FDB_EXIT_ERROR;
-				f = stopAfter( statusBackup(db, tagName, true) );
+				f = stopAfter( statusBackup(db, tagName, true, jsonOutput) );
 				break;
 
 			case BACKUP_ABORT:
diff --git a/fdbclient/BackupAgent.actor.h b/fdbclient/BackupAgent.actor.h
index 0d2f492276..4684cc1a59 100644
--- a/fdbclient/BackupAgent.actor.h
+++ b/fdbclient/BackupAgent.actor.h
@@ -283,6 +283,7 @@ public:
 	}
 
 	Future<std::string> getStatus(Database cx, bool showErrors, std::string tagName);
+	Future<std::string> getStatusJSON(Database cx, std::string tagName);
 
 	Future<Version> getLastRestorable(Reference<ReadYourWritesTransaction> tr, Key tagName);
 	void setLastRestorable(Reference<ReadYourWritesTransaction> tr, Key tagName, Version version);
diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp
index 05b96017ae..ba8c66fb78 100644
--- a/fdbclient/FileBackupAgent.actor.cpp
+++ b/fdbclient/FileBackupAgent.actor.cpp
@@ -34,6 +34,7 @@
 #include <boost/algorithm/string/split.hpp>
 #include <boost/algorithm/string/classification.hpp>
 #include <algorithm>
+#include "JsonBuilder.h"
 
 #include "flow/actorcompiler.h"  // This must be the last #include.
 
@@ -3813,6 +3814,165 @@ public:
 		return Void();
 	}
 
+	struct TimestampedVersion {
+		Optional<Version> version;
+		Optional<int64_t> epochs;
+
+		bool present() const {
+			return version.present();
+		}
+
+		JsonBuilderObject toJSON() const {
+			JsonBuilderObject doc;
+			if(version.present()) {
+				doc.setKey("Version", version.get());
+				if(epochs.present()) {
+					doc.setKey("Epochs", epochs.get());
+					doc.setKey("Timestamp", timeStampToString(epochs));
+				}
+			}
+			return doc;
+		}
+	};
+
+	// Helper actor for generating status
+	// If f is present, lookup epochs using timekeeper and tr, return TimestampedVersion
+	ACTOR static Future<TimestampedVersion> getTimestampedVersion(Reference<ReadYourWritesTransaction> tr, Future<Optional<Version>> f) {
+		state TimestampedVersion tv;
+		wait(store(tv.version, f));
+		if(tv.version.present()) {
+			wait(store(tv.epochs, timeKeeperEpochsFromVersion(tv.version.get(), tr)));
+		}
+		return tv;
+	}
+
+	ACTOR static Future<std::string> getStatusJSON(FileBackupAgent* backupAgent, Database cx, std::string tagName) {
+		state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
+		state JsonBuilderObject doc;
+
+		loop {
+			try {
+				tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
+				tr->setOption(FDBTransactionOptions::LOCK_AWARE);
+
+				state KeyBackedTag tag = makeBackupTag(tagName);
+				state Optional<UidAndAbortedFlagT> uidAndAbortedFlag;
+				state Optional<Value> paused;
+				state Version recentReadVersion;
+
+				wait( store(paused, tr->get(backupAgent->taskBucket->getPauseKey())) && store(uidAndAbortedFlag, tag.get(tr)) && store(recentReadVersion, tr->getReadVersion()) );
+
+				doc.setKey("AllBackupsPaused", paused.present());
+				doc.setKey("Tag", tag.tagName);
+
+				if(uidAndAbortedFlag.present()) {
+					state BackupConfig config(uidAndAbortedFlag.get().first);
+
+					state EBackupState backupState = wait(config.stateEnum().getD(tr, false, EBackupState::STATE_NEVERRAN));
+					JsonBuilderObject statusDoc;
+					statusDoc.setKey("Enum", (int)backupState);
+					statusDoc.setKey("Description", BackupAgentBase::getStateText(backupState));
+					doc.setKey("Status", statusDoc);
+
+					state Future<Void> done = Void();
+
+					if(backupState != BackupAgentBase::STATE_NEVERRAN) {
+						state Reference<IBackupContainer> bc;
+						state TimestampedVersion latestRestorable;
+
+						wait( store(latestRestorable, getTimestampedVersion(tr, config.getLatestRestorableVersion(tr)))
+							&& store(bc, config.backupContainer().getOrThrow(tr))
+						);
+
+						doc.setKey("Restorable", latestRestorable.present());
+
+						if(latestRestorable.present() && backupState != BackupAgentBase::STATE_COMPLETED) {
+							JsonBuilderObject o = latestRestorable.toJSON();
+							o.setKey("LagSeconds", (recentReadVersion - latestRestorable.version.get()) / CLIENT_KNOBS->CORE_VERSIONSPERSECOND);
+							doc.setKey("LatestRestorablePoint", o);
+						}
+						doc.setKey("DestinationURL", bc->getURL());
+
+						if(backupState == BackupAgentBase::STATE_COMPLETED) {
+							doc.setKey("Completed", latestRestorable.toJSON());
+						}
+					}
+
+					if(backupState == BackupAgentBase::STATE_DIFFERENTIAL || backupState == BackupAgentBase::STATE_BACKUP) {
+						state int64_t snapshotInterval;
+						state int64_t logBytesWritten;
+						state int64_t rangeBytesWritten;
+						state bool stopWhenDone;
+						state TimestampedVersion snapshotBegin;
+						state TimestampedVersion snapshotTargetEnd;
+						state TimestampedVersion latestLogEnd;
+						state TimestampedVersion latestSnapshotEnd;
+
+						wait(  store(snapshotInterval, config.snapshotIntervalSeconds().getOrThrow(tr))
+							&& store(logBytesWritten, config.logBytesWritten().getD(tr))
+							&& store(rangeBytesWritten, config.rangeBytesWritten().getD(tr))
+							&& store(stopWhenDone, config.stopWhenDone().getOrThrow(tr)) 
+							&& store(snapshotBegin,      getTimestampedVersion(tr, config.snapshotBeginVersion().get(tr)))
+							&& store(snapshotTargetEnd,  getTimestampedVersion(tr, config.snapshotTargetEndVersion().get(tr)))
+							&& store(latestLogEnd,       getTimestampedVersion(tr, config.latestLogEndVersion().get(tr)))
+							&& store(latestSnapshotEnd,  getTimestampedVersion(tr, config.latestSnapshotEndVersion().get(tr)))
+						);
+
+						doc.setKey("StopAfterSnapshot", stopWhenDone);
+						doc.setKey("SnapshotIntervalSeconds", snapshotInterval);
+						doc.setKey("LogBytesWritten", logBytesWritten);
+						doc.setKey("RangeBytesWritten", rangeBytesWritten);
+
+						if(latestLogEnd.present()) {
+							doc.setKey("LatestLogEnd", latestLogEnd.toJSON());
+						}
+
+						if(latestSnapshotEnd.present()) {
+							doc.setKey("LatestSnapshotEnd", latestSnapshotEnd.toJSON());
+						}
+
+						JsonBuilderObject snapshot;
+
+						if(snapshotBegin.present()) {
+							snapshot.setKey("Begin", snapshotBegin.toJSON());
+
+							if(snapshotTargetEnd.present()) {
+								snapshot.setKey("EndTarget", snapshotTargetEnd.toJSON());
+
+								Version interval = snapshotTargetEnd.version.get() - snapshotBegin.version.get();
+								snapshot.setKey("IntervalSeconds", interval / CLIENT_KNOBS->CORE_VERSIONSPERSECOND);
+
+								Version elapsed = recentReadVersion - snapshotBegin.version.get();
+								double progress = (interval > 0) ? (100.0 * elapsed / interval) : 100;
+								snapshot.setKey("ExpectedProgress", progress);
+							}
+						}
+
+						doc.setKey("CurrentSnapshot", snapshot);
+					}
+
+					KeyBackedMap<int64_t, std::pair<std::string, Version>>::PairsType errors = wait(config.lastErrorPerType().getRange(tr, 0, std::numeric_limits<int>::max(), CLIENT_KNOBS->TOO_MANY));
+					JsonBuilderArray errorList;
+					for(auto &e : errors) {
+						std::string msg = e.second.first;
+						Version ver = e.second.second;
+
+						JsonBuilderObject errDoc;
+						errDoc.setKey("Message", msg.c_str());
+						errDoc.setKey("RelativeSeconds", (ver - recentReadVersion) / CLIENT_KNOBS->CORE_VERSIONSPERSECOND);
+					}
+					doc.setKey("Errors", errorList);
+				}
+				break;
+			}
+			catch (Error &e) {
+				wait(tr->onError(e));
+			}
+		}
+
+		return doc.getJson();
+	}
+
 	ACTOR static Future<std::string> getStatus(FileBackupAgent* backupAgent, Database cx, bool showErrors, std::string tagName) {
 		state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
 		state std::string statusText;
@@ -4179,6 +4339,10 @@ Future<std::string> FileBackupAgent::getStatus(Database cx, bool showErrors, std
 	return FileBackupAgentImpl::getStatus(this, cx, showErrors, tagName);
 }
 
+Future<std::string> FileBackupAgent::getStatusJSON(Database cx, std::string tagName) {
+	return FileBackupAgentImpl::getStatusJSON(this, cx, tagName);
+}
+
 Future<Version> FileBackupAgent::getLastRestorable(Reference<ReadYourWritesTransaction> tr, Key tagName) {
 	return FileBackupAgentImpl::getLastRestorable(this, tr, tagName);
 }

From 06c11a316db71fc38d054ed06f8a590dd61658f4 Mon Sep 17 00:00:00 2001
From: Stephen Atherton <stevea@apple.com>
Date: Wed, 6 Mar 2019 22:34:25 -0800
Subject: [PATCH 08/47] Normalized timestamp to text format across backup and
 restore tooling.  Added epochs field to JSON objects describing versions and
 timestamps in backup status and describe output, renamed some fields for
 clarity.

---
 fdbclient/BackupAgent.actor.h       | 11 +++++++++
 fdbclient/BackupContainer.actor.cpp | 38 ++++++++++++-----------------
 fdbclient/FileBackupAgent.actor.cpp | 13 +++-------
 3 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/fdbclient/BackupAgent.actor.h b/fdbclient/BackupAgent.actor.h
index 4684cc1a59..ef05df3e17 100644
--- a/fdbclient/BackupAgent.actor.h
+++ b/fdbclient/BackupAgent.actor.h
@@ -38,6 +38,16 @@
 
 class BackupAgentBase : NonCopyable {
 public:
+	// Time formatter for anything backup or restore related
+	static std::string formatTime(int64_t epochs) {
+		time_t curTime = (time_t)epochs;
+		char buffer[128];
+		struct tm timeinfo;
+		getLocalTime(&curTime, &timeinfo);
+		strftime(buffer, 128, "%Y/%m/%d %H:%M:%S", &timeinfo);
+		return buffer;
+	}
+
 	// Type of program being executed
 	enum enumActionResult {
 		RESULT_SUCCESSFUL = 0, RESULT_ERRORED = 1, RESULT_DUPLICATE = 2, RESULT_UNNEEDED = 3
@@ -179,6 +189,7 @@ public:
 		return defaultTagName;
 	}
 
+	// This is only used for automatic backup name generation
 	static Standalone<StringRef> getCurrentTime() {
 		double t = now();
 		time_t curTime = t;
diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp
index 2ec4d6fa12..3154ed73ee 100644
--- a/fdbclient/BackupContainer.actor.cpp
+++ b/fdbclient/BackupContainer.actor.cpp
@@ -19,6 +19,8 @@
  */
 
 #include "fdbclient/BackupContainer.h"
+#include "fdbclient/BackupAgent.actor.h"
+#include "fdbclient/JsonBuilder.h"
 #include "flow/Trace.h"
 #include "flow/UnitTest.h"
 #include "flow/Hash3.h"
@@ -33,7 +35,6 @@
 #include <algorithm>
 #include <time.h>
 #include "flow/actorcompiler.h" // has to be last include
-#include "JsonBuilder.h"
 
 namespace IBackupFile_impl {
 
@@ -69,15 +70,6 @@ void BackupFileList::toStream(FILE *fout) const {
 	}
 }
 
-std::string formatTime(int64_t t) {
-	time_t curTime = (time_t)t;
-	char buffer[128];
-	struct tm timeinfo;
-	getLocalTime(&curTime, &timeinfo);
-	strftime(buffer, 128, "%Y-%m-%d %H:%M:%S", &timeinfo);
-	return buffer;
-}
-
 Future<Void> fetchTimes(Reference<ReadYourWritesTransaction> tr,  std::map<Version, int64_t> *pVersionTimeMap) {
 	std::vector<Future<Void>> futures;
 
@@ -128,7 +120,7 @@ std::string BackupDescription::toString() const {
 		if(!versionTimeMap.empty()) {
 			auto i = versionTimeMap.find(v);
 			if(i != versionTimeMap.end())
-				s = format("%lld (%s)", v, formatTime(i->second).c_str());
+				s = format("%lld (%s)", v, BackupAgentBase::formatTime(i->second).c_str());
 			else
 				s = format("%lld (unknown)", v);
 		}
@@ -181,8 +173,10 @@ std::string BackupDescription::toJSON() const {
 		doc.setKey("Version", v);
 		if(!versionTimeMap.empty()) {
 			auto i = versionTimeMap.find(v);
-			if(i != versionTimeMap.end())
-				doc.setKey("Timestamp", formatTime(i->second));
+			if(i != versionTimeMap.end()) {
+				doc.setKey("Timestamp", BackupAgentBase::formatTime(i->second));
+				doc.setKey("Epochs", i->second);
+			}
 		}
 		else if(maxLogEnd.present()) {
 			double days = double(v - maxLogEnd.get()) / (CLIENT_KNOBS->CORE_VERSIONSPERSECOND * 24 * 60 * 60);
@@ -194,8 +188,8 @@ std::string BackupDescription::toJSON() const {
 	JsonBuilderArray snapshotsArray;
 	for(const KeyspaceSnapshotFile &m : snapshots) {
 		JsonBuilderObject snapshotDoc;
-		snapshotDoc.setKey("StartVersion", formatVersion(m.beginVersion));
-		snapshotDoc.setKey("EndVersion", formatVersion(m.endVersion));
+		snapshotDoc.setKey("Start", formatVersion(m.beginVersion));
+		snapshotDoc.setKey("End", formatVersion(m.endVersion));
 		snapshotDoc.setKey("Restorable", m.restorable.orDefault(false));
 		snapshotDoc.setKey("TotalBytes", m.totalSize);
 		snapshotDoc.setKey("PercentageExpired", m.expiredPct(expiredEndVersion));
@@ -206,19 +200,19 @@ std::string BackupDescription::toJSON() const {
 	doc.setKey("TotalSnapshotBytes", snapshotBytes);
 
 	if(expiredEndVersion.present())
-		doc.setKey("ExpiredEndVersion", formatVersion(expiredEndVersion.get()));
+		doc.setKey("ExpiredEnd", formatVersion(expiredEndVersion.get()));
 	if(unreliableEndVersion.present())
-		doc.setKey("UnreliableEndVersion", formatVersion(unreliableEndVersion.get()));
+		doc.setKey("UnreliableEnd", formatVersion(unreliableEndVersion.get()));
 	if(minLogBegin.present())
-		doc.setKey("MinLogBeginVersion", formatVersion(minLogBegin.get()));
+		doc.setKey("MinLogBegin", formatVersion(minLogBegin.get()));
 	if(contiguousLogEnd.present())
-		doc.setKey("ContiguousLogEndVersion", formatVersion(contiguousLogEnd.get()));
+		doc.setKey("ContiguousLogEnd", formatVersion(contiguousLogEnd.get()));
 	if(maxLogEnd.present())
-		doc.setKey("MaxLogEndVersion", formatVersion(maxLogEnd.get()));
+		doc.setKey("MaxLogEnd", formatVersion(maxLogEnd.get()));
 	if(minRestorableVersion.present())
-		doc.setKey("MinRestorableVersion", formatVersion(minRestorableVersion.get()));
+		doc.setKey("MinRestorablePoint", formatVersion(minRestorableVersion.get()));
 	if(maxRestorableVersion.present())
-		doc.setKey("MaxRestorableVersion", formatVersion(maxRestorableVersion.get()));
+		doc.setKey("MaxRestorablePoint", formatVersion(maxRestorableVersion.get()));
 
 	if(!extendedDetail.empty())
 		doc.setKey("ExtendedDetail", extendedDetail);
diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp
index ba8c66fb78..1cb3efe7e8 100644
--- a/fdbclient/FileBackupAgent.actor.cpp
+++ b/fdbclient/FileBackupAgent.actor.cpp
@@ -24,6 +24,7 @@
 #include "fdbclient/ManagementAPI.actor.h"
 #include "fdbclient/Status.h"
 #include "fdbclient/KeyBackedTypes.h"
+#include "fdbclient/JsonBuilder.h"
 
 #include <ctime>
 #include <climits>
@@ -34,7 +35,6 @@
 #include <boost/algorithm/string/split.hpp>
 #include <boost/algorithm/string/classification.hpp>
 #include <algorithm>
-#include "JsonBuilder.h"
 
 #include "flow/actorcompiler.h"  // This must be the last #include.
 
@@ -47,15 +47,10 @@ static std::string versionToString(Optional<Version> version) {
 		return "N/A";
 }
 
-static std::string timeStampToString(Optional<int64_t> ts) {
-	if (!ts.present())
+static std::string timeStampToString(Optional<int64_t> epochs) {
+	if (!epochs.present())
 		return "N/A";
-	time_t curTs = ts.get();
-	char buffer[128];
-	struct tm* timeinfo;
-	timeinfo = localtime(&curTs);
-	strftime(buffer, 128, "%D %T", timeinfo);
-	return std::string(buffer);
+	return BackupAgentBase::formatTime(epochs.get());
 }
 
 static Future<Optional<int64_t>> getTimestampFromVersion(Optional<Version> ver, Reference<ReadYourWritesTransaction> tr) {

From 8bdb21a04e7aed8c2652fcbab3e75cebc7beee0f Mon Sep 17 00:00:00 2001
From: Stephen Atherton <stevea@apple.com>
Date: Wed, 6 Mar 2019 22:44:37 -0800
Subject: [PATCH 09/47] Added release note for new json output options on
 backup.

---
 documentation/sphinx/source/release-notes.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst
index e6cda1a0b3..43bce080ad 100644
--- a/documentation/sphinx/source/release-notes.rst
+++ b/documentation/sphinx/source/release-notes.rst
@@ -16,6 +16,7 @@ Improved replication mechanism, a new hierarchical replication technique that fu
 * Batch priority transactions are now limited separately by ratekeeper and will be throttled at lower levels of cluster saturation. This makes it possible to run a more intense background load at saturation without significantly affecting normal priority transactions. It is still recommended not to run excessive loads at batch priority. `(PR #1198) <https://github.com/apple/foundationdb/pull/1198>`_
 * Restore now requires the destnation cluster to be specified explicitly to avoid confusion. `(PR #1240) <https://github.com/apple/foundationdb/pull/1240>`_
 * Restore target version can now be specified by timestamp if the original cluster is available. `(PR #1240) <https://github.com/apple/foundationdb/pull/1240>`_
+* Backup status and describe commands now have a --json output option. `(PR #1248) <https://github.com/apple/foundationdb/pull/1248>`_
 
 Performance
 -----------

From 7f405130bb1a60ec8325bfe004e6bcaf1d7433fa Mon Sep 17 00:00:00 2001
From: Meng Xu <meng_xu@apple.com>
Date: Thu, 7 Mar 2019 16:06:19 -0800
Subject: [PATCH 10/47] Change a typo in Cluster init function

---
 fdbclient/NativeAPI.actor.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fdbclient/NativeAPI.actor.h b/fdbclient/NativeAPI.actor.h
index ef819437ed..057bd6910d 100644
--- a/fdbclient/NativeAPI.actor.h
+++ b/fdbclient/NativeAPI.actor.h
@@ -126,7 +126,7 @@ public:
 	Future<Void> onConnected();
 
 private: 
-	void init(Reference<ClusterConnectionFile> connFile, bool startClientInfoMonitor, Reference<AsyncVar<int>> connectedCoornidatorsNum, int apiVersion=Database::API_VERSION_LATEST);
+	void init(Reference<ClusterConnectionFile> connFile, bool startClientInfoMonitor, Reference<AsyncVar<int>> connectedCoordinatorsNum, int apiVersion=Database::API_VERSION_LATEST);
 
 	Reference<AsyncVar<Optional<struct ClusterInterface>>> clusterInterface;
 	Reference<ClusterConnectionFile> connectionFile;

From 023bbb566f3631585415004ea7af9412d76c99ed Mon Sep 17 00:00:00 2001
From: Stephen Atherton <stevea@apple.com>
Date: Sun, 10 Mar 2019 16:00:01 -0700
Subject: [PATCH 11/47] Renamed backup state enums for clarity, added backup
 state names.  Changed Epochs to EpochSeconds in backup JSON along with some
 other renaming/moving of fields, and added information about snapshot
 dispatch.  Changed timestamp format for input/output in all backup/restore
 contexts to be a fully qualified time with timezone offset.  Added
 information about the last snapshot dispatch to backup config and status (not
 yet populated).

---
 fdbbackup/backup.actor.cpp              | 12 ++--
 fdbclient/BackupAgent.actor.h           | 79 ++++++++++++++++++++++---
 fdbclient/BackupContainer.actor.cpp     | 17 ++----
 fdbclient/DatabaseBackupAgent.actor.cpp | 16 ++---
 fdbclient/FileBackupAgent.actor.cpp     | 48 +++++++++------
 5 files changed, 117 insertions(+), 55 deletions(-)

diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp
index 5ecf6f2e55..f6842be829 100644
--- a/fdbbackup/backup.actor.cpp
+++ b/fdbbackup/backup.actor.cpp
@@ -840,7 +840,7 @@ static void printBackupUsage(bool devhelp) {
 		   "                 File containing blob credentials in JSON format.  Can be specified multiple times for multiple files.  See below for more details.\n");
 	printf("  --expire_before_timestamp DATETIME\n"
 		   "                 Datetime cutoff for expire operations.  Requires a cluster file and will use version/timestamp metadata\n"
-		   "                 in the database to obtain a cutoff version very close to the timestamp given in YYYY-MM-DD.HH:MI:SS format (UTC).\n");
+		   "                 in the database to obtain a cutoff version very close to the timestamp given in %s.\n", BackupAgentBase::timeFormat().c_str());
 	printf("  --expire_before_version VERSION\n"
 	       "                 Version cutoff for expire operations.  Deletes data files containing no data at or after VERSION.\n");
 	printf("  --delete_before_days NUM_DAYS\n"
@@ -913,7 +913,7 @@ static void printRestoreUsage(bool devhelp ) {
 	printf(TLS_HELP);
 #endif
 	printf("  -v DBVERSION   The version at which the database will be restored.\n");
-	printf("  --timestamp    Instead of a numeric version, use this to specify a timestamp in YYYY-MM-DD.HH:MI:SS format (UTC)\n");
+	printf("  --timestamp    Instead of a numeric version, use this to specify a timestamp in %s\n", BackupAgentBase::timeFormat().c_str());
 	printf("                 and it will be converted to a version from that time using metadata in orig_cluster_file.\n");
 	printf("  --orig_cluster_file CONNFILE\n");
 	printf("                 The cluster file for the original database from which the backup was created.  The original database\n");
@@ -1252,8 +1252,8 @@ ACTOR Future<std::string> getLayerStatus(Reference<ReadYourWritesTransaction> tr
 			tagRoot.create("current_status") = statusText;
 			tagRoot.create("last_restorable_version") = tagLastRestorableVersions[j].get();
 			tagRoot.create("last_restorable_seconds_behind") = last_restorable_seconds_behind;
-			tagRoot.create("running_backup") = (status == BackupAgentBase::STATE_DIFFERENTIAL || status == BackupAgentBase::STATE_BACKUP);
-			tagRoot.create("running_backup_is_restorable") = (status == BackupAgentBase::STATE_DIFFERENTIAL);
+			tagRoot.create("running_backup") = (status == BackupAgentBase::STATE_RUNNING_DIFFERENTIAL || status == BackupAgentBase::STATE_RUNNING);
+			tagRoot.create("running_backup_is_restorable") = (status == BackupAgentBase::STATE_RUNNING_DIFFERENTIAL);
 			tagRoot.create("range_bytes_written") = tagRangeBytes[j].get();
 			tagRoot.create("mutation_log_bytes_written") = tagLogBytes[j].get();
 			tagRoot.create("mutation_stream_id") = backupTagUids[j].toString();
@@ -1296,8 +1296,8 @@ ACTOR Future<std::string> getLayerStatus(Reference<ReadYourWritesTransaction> tr
 			BackupAgentBase::enumState status = (BackupAgentBase::enumState)backupStatus[i].get();
 
 			JSONDoc tagRoot = tagsRoot.create(tagName);
-			tagRoot.create("running_backup") = (status == BackupAgentBase::STATE_DIFFERENTIAL || status == BackupAgentBase::STATE_BACKUP);
-			tagRoot.create("running_backup_is_restorable") = (status == BackupAgentBase::STATE_DIFFERENTIAL);
+			tagRoot.create("running_backup") = (status == BackupAgentBase::STATE_RUNNING_DIFFERENTIAL || status == BackupAgentBase::STATE_RUNNING);
+			tagRoot.create("running_backup_is_restorable") = (status == BackupAgentBase::STATE_RUNNING_DIFFERENTIAL);
 			tagRoot.create("range_bytes_written") = tagRangeBytesDR[i].get();
 			tagRoot.create("mutation_log_bytes_written") = tagLogBytesDR[i].get();
 			tagRoot.create("mutation_stream_id") = drTagUids[i].toString();
diff --git a/fdbclient/BackupAgent.actor.h b/fdbclient/BackupAgent.actor.h
index ef05df3e17..2e4b89edfd 100644
--- a/fdbclient/BackupAgent.actor.h
+++ b/fdbclient/BackupAgent.actor.h
@@ -44,17 +44,29 @@ public:
 		char buffer[128];
 		struct tm timeinfo;
 		getLocalTime(&curTime, &timeinfo);
-		strftime(buffer, 128, "%Y/%m/%d %H:%M:%S", &timeinfo);
+		strftime(buffer, 128, "%Y/%m/%d.%H:%M:%S%z", &timeinfo);
 		return buffer;
 	}
 
+	static std::string timeFormat() {
+		return "YYYY/MM/DD.HH:MI:SS[+/-]HHMM";
+	}
+
+	static int64_t parseTime(std::string timestamp) {
+		struct tm out;
+		if (strptime(timestamp.c_str(), "%Y/%m/%d.%H:%M:%S%z", &out) == nullptr) {
+			return -1;
+		}
+		return (int64_t) mktime(&out);
+	}
+
 	// Type of program being executed
 	enum enumActionResult {
 		RESULT_SUCCESSFUL = 0, RESULT_ERRORED = 1, RESULT_DUPLICATE = 2, RESULT_UNNEEDED = 3
 	};
 
 	enum enumState {
-		STATE_ERRORED = 0, STATE_SUBMITTED = 1, STATE_BACKUP = 2, STATE_DIFFERENTIAL = 3, STATE_COMPLETED = 4, STATE_NEVERRAN = 5, STATE_ABORTED = 6, STATE_PARTIALLY_ABORTED = 7
+		STATE_ERRORED = 0, STATE_SUBMITTED = 1, STATE_RUNNING = 2, STATE_RUNNING_DIFFERENTIAL = 3, STATE_COMPLETED = 4, STATE_NEVERRAN = 5, STATE_ABORTED = 6, STATE_PARTIALLY_ABORTED = 7
 	};
 
 	static const Key keyFolderId;
@@ -100,11 +112,11 @@ public:
 		}
 
 		else if (!stateText.compare("has been started")) {
-			enState = STATE_BACKUP;
+			enState = STATE_RUNNING;
 		}
 
 		else if (!stateText.compare("is differential")) {
-			enState = STATE_DIFFERENTIAL;
+			enState = STATE_RUNNING_DIFFERENTIAL;
 		}
 
 		else if (!stateText.compare("has been completed")) {
@@ -122,7 +134,7 @@ public:
 		return enState;
 	}
 
-	// Convert the status text to an enumerated value
+	// Convert the status enum to a text description
 	static const char* getStateText(enumState enState)
 	{
 		const char* stateText;
@@ -138,10 +150,10 @@ public:
 		case STATE_SUBMITTED:
 			stateText = "has been submitted";
 			break;
-		case STATE_BACKUP:
+		case STATE_RUNNING:
 			stateText = "has been started";
 			break;
-		case STATE_DIFFERENTIAL:
+		case STATE_RUNNING_DIFFERENTIAL:
 			stateText = "is differential";
 			break;
 		case STATE_COMPLETED:
@@ -161,6 +173,45 @@ public:
 		return stateText;
 	}
 
+	// Convert the status enum to a name
+	static const char* getStateName(enumState enState)
+	{
+		const char* s;
+
+		switch (enState)
+		{
+		case STATE_ERRORED:
+			s = "Errored";
+			break;
+		case STATE_NEVERRAN:
+			s = "NeverRan";
+			break;
+		case STATE_SUBMITTED:
+			s = "Submitted";
+			break;
+		case STATE_RUNNING:
+			s = "Running";
+			break;
+		case STATE_RUNNING_DIFFERENTIAL:
+			s = "RunningDifferentially";
+			break;
+		case STATE_COMPLETED:
+			s = "Completed";
+			break;
+		case STATE_ABORTED:
+			s = "Aborted";
+			break;
+		case STATE_PARTIALLY_ABORTED:
+			s = "Aborting";
+			break;
+		default:
+			s = "<undefined>";
+			break;
+		}
+
+		return s;
+	}
+
 	// Determine if the specified state is runnable
 	static bool isRunnable(enumState enState)
 	{
@@ -169,8 +220,8 @@ public:
 		switch (enState)
 		{
 		case STATE_SUBMITTED:
-		case STATE_BACKUP:
-		case STATE_DIFFERENTIAL:
+		case STATE_RUNNING:
+		case STATE_RUNNING_DIFFERENTIAL:
 		case STATE_PARTIALLY_ABORTED:
 			isRunnable = true;
 			break;
@@ -691,6 +742,14 @@ public:
 		return configSpace.pack(LiteralStringRef(__FUNCTION__));
 	}
 
+	KeyBackedBinaryValue<int64_t> snapshotDispatchLastShardsBehind() {
+		return configSpace.pack(LiteralStringRef(__FUNCTION__));
+	}
+
+	KeyBackedProperty<Version> snapshotDispatchLastVersion() {
+		return configSpace.pack(LiteralStringRef(__FUNCTION__));
+	}
+
 	Future<Void> initNewSnapshot(Reference<ReadYourWritesTransaction> tr, int64_t intervalSeconds = -1) {
 		BackupConfig &copy = *this;  // Capture this by value instead of this ptr
 
@@ -714,6 +773,8 @@ public:
 			copy.snapshotBeginVersion().set(tr, beginVersion.get());
 			copy.snapshotTargetEndVersion().set(tr, endVersion);
 			copy.snapshotRangeFileCount().set(tr, 0);
+			copy.snapshotDispatchLastVersion().clear(tr);
+			copy.snapshotDispatchLastShardsBehind().clear(tr);
 
 			return Void();
 		});
diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp
index 3154ed73ee..cbe2fcb32f 100644
--- a/fdbclient/BackupContainer.actor.cpp
+++ b/fdbclient/BackupContainer.actor.cpp
@@ -175,7 +175,7 @@ std::string BackupDescription::toJSON() const {
 			auto i = versionTimeMap.find(v);
 			if(i != versionTimeMap.end()) {
 				doc.setKey("Timestamp", BackupAgentBase::formatTime(i->second));
-				doc.setKey("Epochs", i->second);
+				doc.setKey("EpochSeconds", i->second);
 			}
 		}
 		else if(maxLogEnd.present()) {
@@ -1628,20 +1628,11 @@ ACTOR Future<Version> timeKeeperVersionFromDatetime(std::string datetime, Databa
 	state KeyBackedMap<int64_t, Version> versionMap(timeKeeperPrefixRange.begin);
 	state Reference<ReadYourWritesTransaction> tr = Reference<ReadYourWritesTransaction>(new ReadYourWritesTransaction(db));
 
-	int year, month, day, hour, minute, second;
-	if (sscanf(datetime.c_str(), "%d-%d-%d.%d:%d:%d", &year, &month, &day, &hour, &minute, &second) != 6) {
-		fprintf(stderr, "ERROR: Incorrect date/time format.\n");
+	state int64_t time = BackupAgentBase::parseTime(datetime);
+	if(time < 0) {
+		fprintf(stderr, "ERROR: Incorrect date/time or format.  Format is %s.\n", BackupAgentBase::timeFormat().c_str());
 		throw backup_error();
 	}
-	struct tm expDateTime = {0};
-	expDateTime.tm_year = year - 1900;
-	expDateTime.tm_mon = month - 1;
-	expDateTime.tm_mday = day;
-	expDateTime.tm_hour = hour;
-	expDateTime.tm_min = minute;
-	expDateTime.tm_sec = second;
-	expDateTime.tm_isdst = -1;
-	state int64_t time = (int64_t) mktime(&expDateTime);
 
 	loop {
 		try {
diff --git a/fdbclient/DatabaseBackupAgent.actor.cpp b/fdbclient/DatabaseBackupAgent.actor.cpp
index e9fb6b9f2b..5522dca3f2 100644
--- a/fdbclient/DatabaseBackupAgent.actor.cpp
+++ b/fdbclient/DatabaseBackupAgent.actor.cpp
@@ -1376,7 +1376,7 @@ namespace dbBackup {
 				try {
 					tr.setOption(FDBTransactionOptions::LOCK_AWARE);
 					tr.addReadConflictRange(singleKeyRange(sourceStates.pack(DatabaseBackupAgent::keyStateStatus)));
-					tr.set(sourceStates.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_DIFFERENTIAL)));
+					tr.set(sourceStates.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_RUNNING_DIFFERENTIAL)));
 
 					Key versionKey = task->params[DatabaseBackupAgent::keyConfigLogUid].withPrefix(task->params[BackupAgentBase::destUid]).withPrefix(backupLatestVersionsPrefix);
 					Optional<Key> prevBeginVersion = wait(tr.get(versionKey));
@@ -1418,7 +1418,7 @@ namespace dbBackup {
 				wait(success(FinishedFullBackupTaskFunc::addTask(tr, taskBucket, task, TaskCompletionKey::noSignal())));
 			}
 			else { // Start the writing of logs, if differential
-				tr->set(states.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_DIFFERENTIAL)));
+				tr->set(states.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_RUNNING_DIFFERENTIAL)));
 
 				allPartsDone = futureBucket->future(tr);
 
@@ -1544,7 +1544,7 @@ namespace dbBackup {
 
 					srcTr2->set( Subspace(databaseBackupPrefixRange.begin).get(BackupAgentBase::keySourceTagName).pack(task->params[BackupAgentBase::keyTagName]), logUidValue );
 					srcTr2->set( sourceStates.pack(DatabaseBackupAgent::keyFolderId), task->params[DatabaseBackupAgent::keyFolderId] );
-					srcTr2->set( sourceStates.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_BACKUP)));
+					srcTr2->set( sourceStates.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_RUNNING)));
 
 					state Key destPath = destUidValue.withPrefix(backupLogKeys.begin);
 					// Start logging the mutations for the specified ranges of the tag
@@ -1572,7 +1572,7 @@ namespace dbBackup {
 
 			tr->set(logUidValue.withPrefix(applyMutationsBeginRange.begin), BinaryWriter::toValue(beginVersion, Unversioned()));
 			tr->set(logUidValue.withPrefix(applyMutationsEndRange.begin), BinaryWriter::toValue(beginVersion, Unversioned()));
-			tr->set(states.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_BACKUP)));
+			tr->set(states.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_RUNNING)));
 
 			state Reference<TaskFuture>	kvBackupRangeComplete = futureBucket->future(tr);
 			state Reference<TaskFuture>	kvBackupComplete = futureBucket->future(tr);
@@ -1776,7 +1776,7 @@ public:
 				}
 
 				// Break, if in differential mode (restorable) and stopWhenDone is not enabled
-				if ((!stopWhenDone) && (BackupAgentBase::STATE_DIFFERENTIAL == status)) {
+				if ((!stopWhenDone) && (BackupAgentBase::STATE_RUNNING_DIFFERENTIAL == status)) {
 					return status;
 				}
 
@@ -1939,7 +1939,7 @@ public:
 		state int status = wait(backupAgent->getStateValue(dest, destlogUid));
 
 		TraceEvent("DBA_SwitchoverStart").detail("Status", status);
-		if (status != BackupAgentBase::STATE_DIFFERENTIAL && status != BackupAgentBase::STATE_COMPLETED) {
+		if (status != BackupAgentBase::STATE_RUNNING_DIFFERENTIAL && status != BackupAgentBase::STATE_COMPLETED) {
 			throw backup_duplicate();
 		}
 
@@ -2296,10 +2296,10 @@ public:
 					case BackupAgentBase::STATE_SUBMITTED:
 						statusText += "The DR on tag `" + tagNameDisplay + "' is NOT a complete copy of the primary database (just started).\n";
 						break;
-					case BackupAgentBase::STATE_BACKUP:
+					case BackupAgentBase::STATE_RUNNING:
 						statusText += "The DR on tag `" + tagNameDisplay + "' is NOT a complete copy of the primary database.\n";
 						break;
-					case BackupAgentBase::STATE_DIFFERENTIAL:
+					case BackupAgentBase::STATE_RUNNING_DIFFERENTIAL:
 						statusText += "The DR on tag `" + tagNameDisplay + "' is a complete copy of the primary database.\n";
 						break;
 					case BackupAgentBase::STATE_COMPLETED:
diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp
index 1cb3efe7e8..905f777ce2 100644
--- a/fdbclient/FileBackupAgent.actor.cpp
+++ b/fdbclient/FileBackupAgent.actor.cpp
@@ -2059,8 +2059,8 @@ namespace fileBackup {
 			}
 
 			// If the backup is restorable but the state is not differential then set state to differential
-			if(restorableVersion.present() && backupState != BackupAgentBase::STATE_DIFFERENTIAL)
-				config.stateEnum().set(tr, BackupAgentBase::STATE_DIFFERENTIAL);
+			if(restorableVersion.present() && backupState != BackupAgentBase::STATE_RUNNING_DIFFERENTIAL)
+				config.stateEnum().set(tr, BackupAgentBase::STATE_RUNNING_DIFFERENTIAL);
 
 			// If stopWhenDone is set and there is a restorable version, set the done future and do not create further tasks.
 			if(stopWhenDone && restorableVersion.present()) {
@@ -2295,8 +2295,8 @@ namespace fileBackup {
 			}
 
 			// If the backup is restorable and the state isn't differential the set state to differential
-			if(restorableVersion.present() && backupState != BackupAgentBase::STATE_DIFFERENTIAL)
-				config.stateEnum().set(tr, BackupAgentBase::STATE_DIFFERENTIAL);
+			if(restorableVersion.present() && backupState != BackupAgentBase::STATE_RUNNING_DIFFERENTIAL)
+				config.stateEnum().set(tr, BackupAgentBase::STATE_RUNNING_DIFFERENTIAL);
 
 			// Unless we are to stop, start the next snapshot using the default interval
 			Reference<TaskFuture> snapshotDoneFuture = task->getDoneFuture(futureBucket);
@@ -2376,7 +2376,7 @@ namespace fileBackup {
 				config.startMutationLogs(tr, backupRange, destUidValue);
 			}
 
-			config.stateEnum().set(tr, EBackupState::STATE_BACKUP);
+			config.stateEnum().set(tr, EBackupState::STATE_RUNNING);
 
 			state Reference<TaskFuture>	backupFinished = futureBucket->future(tr);
 
@@ -3474,7 +3474,7 @@ public:
 				// Break, if one of the following is true
 				//  - no longer runnable
 				//  - in differential mode (restorable) and stopWhenDone is not enabled
-				if( !FileBackupAgent::isRunnable(status) || ((!stopWhenDone) && (BackupAgentBase::STATE_DIFFERENTIAL == status) )) {
+				if( !FileBackupAgent::isRunnable(status) || ((!stopWhenDone) && (BackupAgentBase::STATE_RUNNING_DIFFERENTIAL == status) )) {
 
 					if(pContainer != nullptr) {
 						Reference<IBackupContainer> c = wait(config.backupContainer().getOrThrow(tr, false, backup_invalid_info()));
@@ -3822,7 +3822,7 @@ public:
 			if(version.present()) {
 				doc.setKey("Version", version.get());
 				if(epochs.present()) {
-					doc.setKey("Epochs", epochs.get());
+					doc.setKey("EpochSeconds", epochs.get());
 					doc.setKey("Timestamp", timeStampToString(epochs));
 				}
 			}
@@ -3865,8 +3865,10 @@ public:
 
 					state EBackupState backupState = wait(config.stateEnum().getD(tr, false, EBackupState::STATE_NEVERRAN));
 					JsonBuilderObject statusDoc;
-					statusDoc.setKey("Enum", (int)backupState);
+					statusDoc.setKey("Name", BackupAgentBase::getStateName(backupState));
 					statusDoc.setKey("Description", BackupAgentBase::getStateText(backupState));
+					statusDoc.setKey("Completed", backupState == BackupAgentBase::STATE_COMPLETED);
+					statusDoc.setKey("Running", BackupAgentBase::isRunnable(backupState));
 					doc.setKey("Status", statusDoc);
 
 					state Future<Void> done = Void();
@@ -3881,19 +3883,17 @@ public:
 
 						doc.setKey("Restorable", latestRestorable.present());
 
-						if(latestRestorable.present() && backupState != BackupAgentBase::STATE_COMPLETED) {
+						if(latestRestorable.present()) {
 							JsonBuilderObject o = latestRestorable.toJSON();
-							o.setKey("LagSeconds", (recentReadVersion - latestRestorable.version.get()) / CLIENT_KNOBS->CORE_VERSIONSPERSECOND);
+							if(backupState != BackupAgentBase::STATE_COMPLETED) {
+								o.setKey("LagSeconds", (recentReadVersion - latestRestorable.version.get()) / CLIENT_KNOBS->CORE_VERSIONSPERSECOND);
+							}
 							doc.setKey("LatestRestorablePoint", o);
 						}
 						doc.setKey("DestinationURL", bc->getURL());
-
-						if(backupState == BackupAgentBase::STATE_COMPLETED) {
-							doc.setKey("Completed", latestRestorable.toJSON());
-						}
 					}
 
-					if(backupState == BackupAgentBase::STATE_DIFFERENTIAL || backupState == BackupAgentBase::STATE_BACKUP) {
+					if(backupState == BackupAgentBase::STATE_RUNNING_DIFFERENTIAL || backupState == BackupAgentBase::STATE_RUNNING) {
 						state int64_t snapshotInterval;
 						state int64_t logBytesWritten;
 						state int64_t rangeBytesWritten;
@@ -3902,6 +3902,8 @@ public:
 						state TimestampedVersion snapshotTargetEnd;
 						state TimestampedVersion latestLogEnd;
 						state TimestampedVersion latestSnapshotEnd;
+						state TimestampedVersion snapshotLastDispatch;
+						state Optional<int64_t> snapshotLastDispatchShardsBehind;
 
 						wait(  store(snapshotInterval, config.snapshotIntervalSeconds().getOrThrow(tr))
 							&& store(logBytesWritten, config.logBytesWritten().getD(tr))
@@ -3911,6 +3913,8 @@ public:
 							&& store(snapshotTargetEnd,  getTimestampedVersion(tr, config.snapshotTargetEndVersion().get(tr)))
 							&& store(latestLogEnd,       getTimestampedVersion(tr, config.latestLogEndVersion().get(tr)))
 							&& store(latestSnapshotEnd,  getTimestampedVersion(tr, config.latestSnapshotEndVersion().get(tr)))
+							&& store(snapshotLastDispatch,             getTimestampedVersion(tr, config.snapshotDispatchLastVersion().get(tr)))
+							&& store(snapshotLastDispatchShardsBehind, config.snapshotDispatchLastShardsBehind().get(tr))
 						);
 
 						doc.setKey("StopAfterSnapshot", stopWhenDone);
@@ -3941,6 +3945,12 @@ public:
 								double progress = (interval > 0) ? (100.0 * elapsed / interval) : 100;
 								snapshot.setKey("ExpectedProgress", progress);
 							}
+
+							JsonBuilderObject dispatchDoc = snapshotLastDispatch.toJSON();
+							if(snapshotLastDispatchShardsBehind.present()) {
+								dispatchDoc.setKey("ShardsBehind", snapshotLastDispatchShardsBehind.get());
+							}
+							snapshot.setKey("LastDispatch", dispatchDoc);
 						}
 
 						doc.setKey("CurrentSnapshot", snapshot);
@@ -4010,11 +4020,11 @@ public:
 						case BackupAgentBase::STATE_SUBMITTED:
 							statusText += "The backup on tag `" + tagName + "' is in progress (just started) to " + bc->getURL() + ".\n";
 							break;
-						case BackupAgentBase::STATE_BACKUP:
+						case BackupAgentBase::STATE_RUNNING:
 							statusText += "The backup on tag `" + tagName + "' is in progress to " + bc->getURL() + ".\n";
 							snapshotProgress = true;
 							break;
-						case BackupAgentBase::STATE_DIFFERENTIAL:
+						case BackupAgentBase::STATE_RUNNING_DIFFERENTIAL:
 							statusText += "The backup on tag `" + tagName + "' is restorable but continuing to " + bc->getURL() + ".\n";
 							snapshotProgress = true;
 							break;
@@ -4057,7 +4067,7 @@ public:
 									);
 
 						statusText += format("Snapshot interval is %lld seconds.  ", snapshotInterval);
-						if(backupState == BackupAgentBase::STATE_DIFFERENTIAL)
+						if(backupState == BackupAgentBase::STATE_RUNNING_DIFFERENTIAL)
 							statusText += format("Current snapshot progress target is %3.2f%% (>100%% means the snapshot is supposed to be done)\n", 100.0 * (recentReadVersion - snapshotBeginVersion) / (snapshotTargetEndVersion - snapshotBeginVersion)) ;
 						else
 							statusText += "The initial snapshot is still running.\n";
@@ -4202,7 +4212,7 @@ public:
 				backupConfig = BackupConfig(uidFlag.first);
 				state EBackupState status = wait(backupConfig.stateEnum().getOrThrow(ryw_tr));
 
-				if (status != BackupAgentBase::STATE_DIFFERENTIAL ) {
+				if (status != BackupAgentBase::STATE_RUNNING_DIFFERENTIAL ) {
 					throw backup_duplicate();
 				}
 

From f2953db7d817a708b95269b971bc600d4a350ac7 Mon Sep 17 00:00:00 2001
From: Stephen Atherton <stevea@apple.com>
Date: Mon, 11 Mar 2019 01:25:51 -0700
Subject: [PATCH 12/47] Added updating of backup snapshot shards behind in
 snapshot dispatcher so status can determine if a snapshot is lagging the
 configured speed.

---
 fdbclient/BackupAgent.actor.h       |  2 +-
 fdbclient/FileBackupAgent.actor.cpp | 34 ++++++++++++++++++++++++++---
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/fdbclient/BackupAgent.actor.h b/fdbclient/BackupAgent.actor.h
index 2e4b89edfd..929efe9458 100644
--- a/fdbclient/BackupAgent.actor.h
+++ b/fdbclient/BackupAgent.actor.h
@@ -742,7 +742,7 @@ public:
 		return configSpace.pack(LiteralStringRef(__FUNCTION__));
 	}
 
-	KeyBackedBinaryValue<int64_t> snapshotDispatchLastShardsBehind() {
+	KeyBackedProperty<int64_t> snapshotDispatchLastShardsBehind() {
 		return configSpace.pack(LiteralStringRef(__FUNCTION__));
 	}
 
diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp
index 905f777ce2..b6f2f89d9d 100644
--- a/fdbclient/FileBackupAgent.actor.cpp
+++ b/fdbclient/FileBackupAgent.actor.cpp
@@ -1273,6 +1273,10 @@ namespace fileBackup {
 		static const uint32_t version;
 
 		static struct {
+			// Set by Execute, used by Finish
+			static TaskParam<int64_t> shardsBehind() {
+				return LiteralStringRef(__FUNCTION__);
+			}
 			// Set by Execute, used by Finish
 			static TaskParam<bool> snapshotFinished() {
 				return LiteralStringRef(__FUNCTION__);
@@ -1369,8 +1373,11 @@ namespace fileBackup {
 								&& store(recentReadVersion, tr->getReadVersion())
 								&& taskBucket->keepRunning(tr, task));
 
-					// If the snapshot batch future key does not exist, create it, set it, and commit
-					// Also initialize the target snapshot end version if it is not yet set.
+					// If the snapshot batch future key does not exist, this is the first execution of this dispatch task so
+					//    - create and set the snapshot batch future key
+					//    - initialize the batch size to 0
+					//    - initialize the target snapshot end version if it is not yet set
+					//    - commit
 					if(!snapshotBatchFutureKey.present()) {
 						snapshotBatchFuture = futureBucket->future(tr);
 						config.snapshotBatchFuture().set(tr, snapshotBatchFuture->pack());
@@ -1540,14 +1547,28 @@ namespace fileBackup {
 			// Calculate number of shards that should be done before the next interval end
 			// timeElapsed is between 0 and 1 and represents what portion of the shards we should have completed by now
 			double timeElapsed;
+			Version snapshotScheduledVersionInterval = snapshotTargetEndVersion - snapshotBeginVersion;
 			if(snapshotTargetEndVersion > snapshotBeginVersion)
-				timeElapsed = std::min(1.0, (double)(nextDispatchVersion - snapshotBeginVersion) / (snapshotTargetEndVersion - snapshotBeginVersion));
+				timeElapsed = std::min(1.0, (double)(nextDispatchVersion - snapshotBeginVersion) / (snapshotScheduledVersionInterval));
 			else
 				timeElapsed = 1.0;
 
 			state int countExpectedShardsDone = countAllShards * timeElapsed;
 			state int countShardsToDispatch = std::max<int>(0, countExpectedShardsDone - countShardsDone);
 
+			// Calculate the number of shards that would have been dispatched by a normal (on-schedule) BackupSnapshotDispatchTask given
+			// the dispatch window and the start and expected-end versions of the current snapshot.
+			int64_t dispatchWindow = nextDispatchVersion - recentReadVersion;
+			int countShardsExpectedPerNormalWindow = (double(dispatchWindow) / snapshotScheduledVersionInterval) * countAllShards;
+			// countShardsThisDispatch is how many total shards are to be dispatched by this dispatch cycle.
+			// Since this dispatch cycle can span many incrementally progressing separate executions of the BackupSnapshotDispatchTask
+			// instance, this is calculated as the number of shards dispatched so far in the dispatch batch plus the number of shards
+			// the current execution is going to attempt to do.
+			int countShardsThisDispatch = countShardsToDispatch + snapshotBatchSize.get();
+			// The number of shards 'behind' the snapshot is the count of how may additional shards beyond normal are being dispatched, if any.
+			int countShardsBehind = std::max<int64_t>(0, countShardsToDispatch + snapshotBatchSize.get() - countShardsExpectedPerNormalWindow); 
+			Params.shardsBehind().set(task, countShardsBehind);
+
 			TraceEvent("FileBackupSnapshotDispatchStats")
 				.detail("BackupUID", config.getUid())
 				.detail("AllShards", countAllShards)
@@ -1555,6 +1576,7 @@ namespace fileBackup {
 				.detail("ShardsNotDone", countShardsNotDone)
 				.detail("ExpectedShardsDone", countExpectedShardsDone)
 				.detail("ShardsToDispatch", countShardsToDispatch)
+				.detail("ShardsBehind", countShardsBehind)
 				.detail("SnapshotBeginVersion", snapshotBeginVersion)
 				.detail("SnapshotTargetEndVersion", snapshotTargetEndVersion)
 				.detail("NextDispatchVersion", nextDispatchVersion)
@@ -1627,6 +1649,8 @@ namespace fileBackup {
 							ASSERT(snapshotBatchSize.get() == oldBatchSize);
 							config.snapshotBatchSize().set(tr, newBatchSize);
 							snapshotBatchSize = newBatchSize;
+							config.snapshotDispatchLastShardsBehind().set(tr, Params.shardsBehind().get(task));
+							config.snapshotDispatchLastVersion().set(tr, tr->getReadVersion().get());
 						}
 
 						state std::vector<Future<Void>> addTaskFutures;
@@ -1729,6 +1753,10 @@ namespace fileBackup {
 			config.snapshotBatchDispatchDoneKey().clear(tr);
 			config.snapshotBatchSize().clear(tr);
 
+			// Update shardsBehind here again in case the execute phase did not actually have to create any shard tasks
+			config.snapshotDispatchLastShardsBehind().set(tr, Params.shardsBehind().getOrDefault(task, 0));
+			config.snapshotDispatchLastVersion().set(tr, tr->getReadVersion().get());
+
 			state Reference<TaskFuture> snapshotFinishedFuture = task->getDoneFuture(futureBucket);
 
 			// If the snapshot is finished, the next task is to write a snapshot manifest, otherwise it's another snapshot dispatch task.

From f0024c876ec36bb74ad6f7926b27af4015b513bc Mon Sep 17 00:00:00 2001
From: Stephen Atherton <stevea@apple.com>
Date: Mon, 11 Mar 2019 01:42:19 -0700
Subject: [PATCH 13/47] Changed backup paused JSON field name to be more
 precise and match the human readable status version.

---
 fdbclient/FileBackupAgent.actor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp
index b6f2f89d9d..20e007afe7 100644
--- a/fdbclient/FileBackupAgent.actor.cpp
+++ b/fdbclient/FileBackupAgent.actor.cpp
@@ -3885,7 +3885,7 @@ public:
 
 				wait( store(paused, tr->get(backupAgent->taskBucket->getPauseKey())) && store(uidAndAbortedFlag, tag.get(tr)) && store(recentReadVersion, tr->getReadVersion()) );
 
-				doc.setKey("AllBackupsPaused", paused.present());
+				doc.setKey("BackupAgentsPaused", paused.present());
 				doc.setKey("Tag", tag.tagName);
 
 				if(uidAndAbortedFlag.present()) {

From adc5553cd93cacc65232a7827c2f4769969b4860 Mon Sep 17 00:00:00 2001
From: Stephen Atherton <stevea@apple.com>
Date: Mon, 11 Mar 2019 02:05:01 -0700
Subject: [PATCH 14/47] Updated backup documentation regarding timestamp
 format.

---
 documentation/sphinx/source/backups.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/documentation/sphinx/source/backups.rst b/documentation/sphinx/source/backups.rst
index f4d95d23e9..826f79a011 100644
--- a/documentation/sphinx/source/backups.rst
+++ b/documentation/sphinx/source/backups.rst
@@ -307,7 +307,7 @@ The ``expire`` subcommand will remove data from a backup prior to some point in
 The expiration CUTOFF must be specified by one of the two following arguments:
    
   ``--expire_before_timestamp <DATETIME>``
-    Specifies the expiration cutoff to DATETIME.  Requires a cluster file and will use version/timestamp metadata in the database to convert DATETIME to a database commit version.  DATETIME must be in the form "YYYY-MM-DD.HH:MI:SS" in UTC.
+    Specifies the expiration cutoff to DATETIME.  Requires a cluster file and will use version/timestamp metadata in the database to convert DATETIME to a database commit version.  DATETIME must be in the form "YYYY/MM/DD.HH:MI:SS+hhmm", for example "2018/12/31.23:59:59-0800".
 
   ``--expire_before_version <VERSION>``
     Specifies the cutoff by a database commit version.
@@ -315,7 +315,7 @@ The expiration CUTOFF must be specified by one of the two following arguments:
 Optionally, the user can specify a minimum RESTORABILITY guarantee with one of the following options.
 
   ``--restorable_after_timestamp <DATETIME>``
-    Specifies that the backup must be restorable to DATETIME and later.  Requires a cluster file and will use version/timestamp metadata in the database to convert DATETIME to a database commit version.  DATETIME must be in the form "YYYY-MM-DD.HH:MI:SS" in UTC.
+    Specifies that the backup must be restorable to DATETIME and later.  Requires a cluster file and will use version/timestamp metadata in the database to convert DATETIME to a database commit version.  DATETIME must be in the form "YYYY/MM/DD.HH:MI:SS+hhmm", for example "2018/12/31.23:59:59-0800".
 
   ``--restorable_after_version <VERSION>``
     Specifies that the backup must be restorable as of VERSION and later.
@@ -419,8 +419,8 @@ The ``start`` command will start a new restore on the specified (or default) tag
 ``-v <VERSION>``
   Instead of the latest version the backup can be restored to, restore to VERSION.
 
-``--timestamp <YYYY-MM-DD.HH:MI:SS>``
-  Instead of the latest version the backup can be restored to, restore to a version from approximately the given timestamp.  Requires orig_cluster_file to be specified.
+``--timestamp <DATETIME>``
+  Instead of the latest version the backup can be restored to, restore to a version from approximately the given timestamp.  Requires orig_cluster_file to be specified.  DATETIME must be in the form "YYYY/MM/DD.HH:MI:SS+hhmm", for example "2018/12/31.23:59:59-0800".
 
 ``--orig_cluster_file <CONNFILE>``
   The cluster file for the original database from which the backup was created.  The original database is only needed to convert a --timestamp argument to a database version.

From ad4d2f192b24d87e24312e32d77cc25fe8fb8b95 Mon Sep 17 00:00:00 2001
From: Stephen Atherton <stevea@apple.com>
Date: Mon, 11 Mar 2019 02:10:06 -0700
Subject: [PATCH 15/47] Added release note for backup/restore datetime format
 changes as it breaks compatibility with existing tooling.

---
 documentation/sphinx/source/release-notes.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst
index 43bce080ad..c64525441f 100644
--- a/documentation/sphinx/source/release-notes.rst
+++ b/documentation/sphinx/source/release-notes.rst
@@ -25,6 +25,7 @@ Fixes
 -----
 
 * Python: Creating a ``SingleFloat`` for the tuple layer didn't work with integers. `(PR #1216) <https://github.com/apple/foundationdb/pull/1216>`_
+* Standardized datetime string format across all backup and restore command options and outputs. `(PR #1248) <https://github.com/apple/foundationdb/pull/1248>`_
 
 Status
 ------

From 9d1f06e2b61d324f17b920b8e100c254d01b11a8 Mon Sep 17 00:00:00 2001
From: Meng Xu <42559636+xumengpanda@users.noreply.github.com>
Date: Mon, 11 Mar 2019 16:14:37 -0700
Subject: [PATCH 16/47] Update documentation/sphinx/source/mr-status.rst

---
 documentation/sphinx/source/mr-status.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/documentation/sphinx/source/mr-status.rst b/documentation/sphinx/source/mr-status.rst
index 65be3cbf8a..07841b6f87 100644
--- a/documentation/sphinx/source/mr-status.rst
+++ b/documentation/sphinx/source/mr-status.rst
@@ -80,7 +80,7 @@ The following format informally describes the JSON containing the status data. T
             "connected_clients": [
               {
                 "address": "127.0.0.1:1234",
-                "log_group": "default"
+                "log_group": "default",
                 "connected_coordinators": 2
               }
             ],

From 78ff3d92c1450322cc56627743f657158fcd393c Mon Sep 17 00:00:00 2001
From: Alec Grieser <agrieser@apple.com>
Date: Sun, 17 Feb 2019 10:09:42 -0800
Subject: [PATCH 17/47] memoize the packed Tuple representation

---
 bindings/java/CMakeLists.txt                  |   1 +
 .../tuple/IterableComparator.java             |   2 +-
 .../com/apple/foundationdb/tuple/Tuple.java   | 156 ++++--
 .../apple/foundationdb/tuple/TupleUtil.java   | 458 ++++++++++--------
 .../test/TuplePerformanceTest.java            |  12 +-
 5 files changed, 373 insertions(+), 256 deletions(-)

diff --git a/bindings/java/CMakeLists.txt b/bindings/java/CMakeLists.txt
index 8a67e8f08a..93e7e7ea8e 100644
--- a/bindings/java/CMakeLists.txt
+++ b/bindings/java/CMakeLists.txt
@@ -89,6 +89,7 @@ set(JAVA_TESTS_SRCS
   src/test/com/apple/foundationdb/test/TesterArgs.java
   src/test/com/apple/foundationdb/test/TestResult.java
   src/test/com/apple/foundationdb/test/TupleTest.java
+  src/test/com/apple/foundationdb/test/TuplePerformanceTest.java
   src/test/com/apple/foundationdb/test/VersionstampSmokeTest.java
   src/test/com/apple/foundationdb/test/WatchTest.java
   src/test/com/apple/foundationdb/test/WhileTrueTest.java)
diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/IterableComparator.java b/bindings/java/src/main/com/apple/foundationdb/tuple/IterableComparator.java
index 71aa23e9b1..1587b3fd6e 100644
--- a/bindings/java/src/main/com/apple/foundationdb/tuple/IterableComparator.java
+++ b/bindings/java/src/main/com/apple/foundationdb/tuple/IterableComparator.java
@@ -34,7 +34,7 @@ import java.util.Iterator;
  *    tuple1.compareTo(tuple2)
  *      == new IterableComparator().compare(tuple1, tuple2)
  *      == new IterableComparator().compare(tuple1.getItems(), tuple2.getItems()),
- *      == ByteArrayUtil.compareUnsigned(tuple1.pack(), tuple2.pack())}
+ *      == ByteArrayUtil.compareUnsigned(tuple1.packInternal(), tuple2.packInternal())}
  * </pre>
  *
  * <p>
diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java b/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java
index 557432d4e3..7b14632452 100644
--- a/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java
+++ b/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java
@@ -68,10 +68,11 @@ import com.apple.foundationdb.Range;
  * This class is not thread safe.
  */
 public class Tuple implements Comparable<Tuple>, Iterable<Object> {
-	private static IterableComparator comparator = new IterableComparator();
+	private static final IterableComparator comparator = new IterableComparator();
 
 	private List<Object> elements;
 	private int memoizedHash = 0;
+	private byte[] packed = null;
 
 	private Tuple(List<? extends Object> elements, Object newItem) {
 		this(elements);
@@ -82,6 +83,12 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 		this.elements = new ArrayList<>(elements);
 	}
 
+	private enum VersionstampExpectations {
+		UNKNOWN,
+		HAS_INCOMPLETE,
+		HAS_NO_INCOMPLETE
+	}
+
 	/**
 	 * Creates a copy of this {@code Tuple} with an appended last element. The parameter
 	 *  is untyped but only {@link String}, {@code byte[]}, {@link Number}s, {@link UUID}s,
@@ -261,7 +268,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 * @return a newly created {@code Tuple}
 	 */
 	public Tuple addAll(List<? extends Object> o) {
-		List<Object> merged = new ArrayList<Object>(o.size() + this.elements.size());
+		List<Object> merged = new ArrayList<>(o.size() + this.elements.size());
 		merged.addAll(this.elements);
 		merged.addAll(o);
 		return new Tuple(merged);
@@ -275,7 +282,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 * @return a newly created {@code Tuple}
 	 */
 	public Tuple addAll(Tuple other) {
-		List<Object> merged = new ArrayList<Object>(this.size() + other.size());
+		List<Object> merged = new ArrayList<>(this.size() + other.size());
 		merged.addAll(this.elements);
 		merged.addAll(other.peekItems());
 		return new Tuple(merged);
@@ -285,10 +292,10 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 * Get an encoded representation of this {@code Tuple}. Each element is encoded to
 	 *  {@code byte}s and concatenated.
 	 *
-	 * @return a serialized representation of this {@code Tuple}.
+	 * @return a packed representation of this {@code Tuple}.
 	 */
 	public byte[] pack() {
-		return pack(null);
+		return packInternal(null, true);
 	}
 
 	/**
@@ -296,11 +303,36 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 *  {@code byte}s and concatenated, and then the prefix supplied is prepended to
 	 *  the array.
 	 *
-	 * @param prefix additional byte-array prefix to prepend to serialized bytes.
-	 * @return a serialized representation of this {@code Tuple} prepended by the {@code prefix}.
+	 * @param prefix additional byte-array prefix to prepend to packed bytes.
+	 * @return a packed representation of this {@code Tuple} prepended by the {@code prefix}.
 	 */
 	public byte[] pack(byte[] prefix) {
-		return TupleUtil.pack(elements, prefix);
+		return packInternal(prefix, true);
+	}
+
+	byte[] packInternal(byte[] prefix, boolean copy) {
+		boolean hasPrefix = prefix != null && prefix.length > 1;
+		if(packed == null) {
+			byte[] result = TupleUtil.pack(elements, prefix);
+			if(hasPrefix) {
+				packed = Arrays.copyOfRange(result, prefix.length, result.length);
+				return result;
+			}
+			else {
+				packed = result;
+			}
+		}
+		if(hasPrefix) {
+			return ByteArrayUtil.join(prefix, packed);
+		}
+		else {
+			if(copy) {
+				return Arrays.copyOf(packed, packed.length);
+			}
+			else {
+				return packed;
+			}
+		}
 	}
 
 	/**
@@ -309,7 +341,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 *  This works the same as the {@link #packWithVersionstamp(byte[]) one-paramter version of this method},
 	 *  but it does not add any prefix to the array.
 	 *
-	 * @return a serialized representation of this {@code Tuple} for use with versionstamp ops.
+	 * @return a packed representation of this {@code Tuple} for use with versionstamp ops.
 	 * @throws IllegalArgumentException if there is not exactly one incomplete {@link Versionstamp} included in this {@code Tuple}
 	 */
 	public byte[] packWithVersionstamp() {
@@ -322,28 +354,71 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 *  There must be exactly one incomplete {@link Versionstamp} instance within this
 	 *  {@code Tuple} or this will throw an {@link IllegalArgumentException}.
 	 *  Each element is encoded to {@code byte}s and concatenated, the prefix
-	 *  is then prepended to the array, and then the index of the serialized incomplete
+	 *  is then prepended to the array, and then the index of the packed incomplete
 	 *  {@link Versionstamp} is appended as a little-endian integer. This can then be passed
 	 *  as the key to
 	 *  {@link com.apple.foundationdb.Transaction#mutate(com.apple.foundationdb.MutationType, byte[], byte[]) Transaction.mutate()}
 	 *  with the {@code SET_VERSIONSTAMPED_KEY} {@link com.apple.foundationdb.MutationType}, and the transaction's
 	 *  version will then be filled in at commit time.
 	 *
-	 * @param prefix additional byte-array prefix to prepend to serialized bytes.
-	 * @return a serialized representation of this {@code Tuple} for use with versionstamp ops.
+	 * @param prefix additional byte-array prefix to prepend to packed bytes.
+	 * @return a packed representation of this {@code Tuple} for use with versionstamp ops.
 	 * @throws IllegalArgumentException if there is not exactly one incomplete {@link Versionstamp} included in this {@code Tuple}
 	 */
 	public byte[] packWithVersionstamp(byte[] prefix) {
 		return TupleUtil.packWithVersionstamp(elements, prefix);
 	}
 
+	byte[] packWithVersionstampInternal(byte[] prefix, boolean copy) {
+		boolean hasPrefix = prefix != null && prefix.length > 0;
+		if(packed == null) {
+			byte[] result = TupleUtil.packWithVersionstamp(elements, prefix);
+			if(hasPrefix) {
+				byte[] withoutPrefix = Arrays.copyOfRange(result, prefix.length, result.length);
+				TupleUtil.adjustVersionPosition(packed, -1 * prefix.length);
+				packed = withoutPrefix;
+				return result;
+			}
+			else {
+				packed = result;
+			}
+		}
+		if(hasPrefix) {
+			byte[] withPrefix = ByteArrayUtil.join(prefix, packed);
+			TupleUtil.adjustVersionPosition(withPrefix, prefix.length);
+			return withPrefix;
+		}
+		else {
+			if(copy) {
+				return Arrays.copyOf(packed, packed.length);
+			}
+			else {
+				return packed;
+			}
+		}
+	}
+
+	byte[] packMaybeVersionstamp(byte[] prefix) {
+		if(packed == null) {
+			if(hasIncompleteVersionstamp()) {
+				return packWithVersionstampInternal(prefix, false);
+			}
+			else {
+				return packInternal(prefix, false);
+			}
+		}
+		else {
+			return packed;
+		}
+	}
+
 	/**
 	 * Gets the unserialized contents of this {@code Tuple}.
 	 *
 	 * @return the elements that make up this {@code Tuple}.
 	 */
 	public List<Object> getItems() {
-		return new ArrayList<Object>(elements);
+		return new ArrayList<>(elements);
 	}
 
 	/**
@@ -385,7 +460,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 * @see #fromItems(Iterable)
 	 */
 	public Tuple() {
-		this.elements = new LinkedList<Object>();
+		this.elements = new LinkedList<>();
 	}
 
 	/**
@@ -413,6 +488,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	public static Tuple fromBytes(byte[] bytes, int offset, int length) {
 		Tuple t = new Tuple();
 		t.elements = TupleUtil.unpack(bytes, offset, length);
+		t.packed = Arrays.copyOfRange(bytes, offset, offset + length);
 		return t;
 	}
 
@@ -623,13 +699,14 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 		Object o = this.elements.get(index);
 		if(o == null) {
 			return null;
-		} else if(o instanceof Tuple) {
+		}
+		else if(o instanceof Tuple) {
 			return ((Tuple)o).getItems();
-		} else if(o instanceof List<?>) {
-			List<Object> ret = new LinkedList<Object>();
-			ret.addAll((List<? extends Object>)o);
-			return ret;
-		} else {
+		}
+		else if(o instanceof List<?>) {
+			return new ArrayList<>((List<?>) o);
+		}
+		else {
 			throw new ClassCastException("Cannot convert item of type " + o.getClass() + " to list");
 		}
 	}
@@ -678,11 +755,10 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 * @throws IllegalStateException if this {@code Tuple} is empty
 	 */
 	public Tuple popFront() {
-		if(elements.size() == 0)
+		if(elements.isEmpty())
 			throw new IllegalStateException("Tuple contains no elements");
 
-
-		List<Object> items = new ArrayList<Object>(elements.size() - 1);
+		List<Object> items = new ArrayList<>(elements.size() - 1);
 		for(int i = 1; i < this.elements.size(); i++) {
 			items.add(this.elements.get(i));
 		}
@@ -697,11 +773,10 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 * @throws IllegalStateException if this {@code Tuple} is empty
 	 */
 	public Tuple popBack() {
-		if(elements.size() == 0)
+		if(elements.isEmpty())
 			throw new IllegalStateException("Tuple contains no elements");
 
-
-		List<Object> items = new ArrayList<Object>(elements.size() - 1);
+		List<Object> items = new ArrayList<>(elements.size() - 1);
 		for(int i = 0; i < this.elements.size() - 1; i++) {
 			items.add(this.elements.get(i));
 		}
@@ -718,12 +793,18 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 *   Tuple t = Tuple.from("a", "b");
 	 *   Range r = t.range();</pre>
 	 * {@code r} includes all tuples ("a", "b", ...)
+	 * <br>
+	 * This function will throw an error if this {@code Tuple} contains an incomplete
+	 *  {@link Versionstamp}.
 	 *
 	 * @return the range of keys containing all {@code Tuple}s that have this {@code Tuple}
 	 *  as a prefix
 	 */
 	public Range range() {
-		byte[] p = pack();
+		if(hasIncompleteVersionstamp()) {
+			throw new IllegalStateException("Tuple with incomplete versionstamp used for range");
+		}
+		byte[] p = packInternal(null, false);
 		//System.out.println("Packed tuple is: " + ByteArrayUtil.printable(p));
 		return new Range(ByteArrayUtil.join(p, new byte[] {0x0}),
 						 ByteArrayUtil.join(p, new byte[] {(byte)0xff}));
@@ -742,6 +823,16 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 		return TupleUtil.hasIncompleteVersionstamp(stream());
 	}
 
+	/**
+	 * Get the number of bytes in the packed representation of this {@code Tuple}.
+	 *
+	 * @return
+	 */
+	public int getPackedSize() {
+		byte[] p = packMaybeVersionstamp(null);
+		return p.length;
+	}
+
 	/**
 	 * Compare the byte-array representation of this {@code Tuple} against another. This method
 	 *  will sort {@code Tuple}s in the same order that they would be sorted as keys in
@@ -772,14 +863,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	@Override
 	public int hashCode() {
 		if(memoizedHash == 0) {
-			byte[] packed;
-			if(hasIncompleteVersionstamp()) {
-				packed = packWithVersionstamp(null);
-			}
-			else {
-				packed = pack();
-			}
-			memoizedHash = Arrays.hashCode(packed);
+			memoizedHash = Arrays.hashCode(packMaybeVersionstamp(null));
 		}
 		return memoizedHash;
 	}
@@ -1011,7 +1095,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	}
 
 	private static Tuple createTuple(int items) {
-		List<Object> elements = new ArrayList<Object>(items);
+		List<Object> elements = new ArrayList<>(items);
 		for(int i = 0; i < items; i++) {
 			elements.add(new byte[]{99});
 		}
diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java
index cf1d337f2e..f25828f47d 100644
--- a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java
+++ b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java
@@ -28,7 +28,6 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.LinkedList;
 import java.util.List;
 import java.util.UUID;
 import java.util.stream.Stream;
@@ -73,22 +72,45 @@ class TupleUtil {
 	}
 
 	static class DecodeResult {
-		final int end;
-		final Object o;
+		final List<Object> values;
+		int end;
 
-		DecodeResult(int pos, Object o) {
-			this.end = pos;
-			this.o = o;
+		DecodeResult() {
+			values = new ArrayList<>();
+			end = 0;
+		}
+
+		void add(Object value, int end) {
+			values.add(value);
+			this.end = end;
 		}
 	}
 
 	static class EncodeResult {
-		final int totalLength;
-		final int versionPos;
+		final List<byte[]> encodedValues;
+		int totalLength;
+		int versionPos;
 
-		EncodeResult(int totalLength, int versionPos) {
-			this.totalLength = totalLength;
+		EncodeResult(int capacity) {
+			this.encodedValues = new ArrayList<>(capacity);
+			totalLength = 0;
+			versionPos = -1;
+		}
+
+		EncodeResult add(byte[] encoded, int versionPos) {
+			if(versionPos >= 0 && this.versionPos >= 0) {
+				throw new IllegalArgumentException("Multiple incomplete Versionstamps included in Tuple");
+			}
+			encodedValues.add(encoded);
+			totalLength += encoded.length;
 			this.versionPos = versionPos;
+			return this;
+		}
+
+		EncodeResult add(byte[] encoded) {
+			encodedValues.add(encoded);
+			totalLength += encoded.length;
+			return this;
 		}
 	}
 
@@ -129,10 +151,44 @@ class TupleUtil {
 		return bytes;
 	}
 
-	public static byte[] join(List<byte[]> items) {
+	static byte[] join(List<byte[]> items) {
 		return ByteArrayUtil.join(null, items);
 	}
 
+	private static void adjustVersionPosition300(byte[] packed, int delta) {
+		int offsetOffset = packed.length - Short.BYTES;
+		ByteBuffer buffer = ByteBuffer.wrap(packed, offsetOffset, Short.BYTES).order(ByteOrder.LITTLE_ENDIAN);
+		int versionPosition = buffer.getShort() + delta;
+		if(versionPosition > 0xffff) {
+			throw new IllegalArgumentException("Tuple has incomplete version at position " + versionPosition + " which is greater than the maximum " + 0xffff);
+		}
+		if(versionPosition < 0) {
+			throw new IllegalArgumentException("Tuple has an incomplete version at a negative position");
+		}
+		buffer.position(offsetOffset);
+		buffer.putShort((short)versionPosition);
+	}
+
+	private static void adjustVersionPosition520(byte[] packed, int delta) {
+		int offsetOffset = packed.length - Integer.BYTES;
+		ByteBuffer buffer = ByteBuffer.wrap(packed, offsetOffset, Integer.BYTES).order(ByteOrder.LITTLE_ENDIAN);
+		int versionPosition = buffer.getInt() + delta;
+		if(versionPosition < 0) {
+			throw new IllegalArgumentException("Tuple has an incomplete version at a negative position");
+		}
+		buffer.position(offsetOffset);
+		buffer.putInt(versionPosition);
+	}
+
+	static void adjustVersionPosition(byte[] packed, int delta) {
+		if(FDB.instance().getAPIVersion() < 520) {
+			adjustVersionPosition300(packed, delta);
+		}
+		else {
+			adjustVersionPosition520(packed, delta);
+		}
+	}
+
 	static int getCodeFor(Object o) {
 		if(o == null)
 			return nil;
@@ -159,71 +215,60 @@ class TupleUtil {
 		throw new IllegalArgumentException("Unsupported data type: " + o.getClass().getName());
 	}
 
-	static EncodeResult encode(Object t, boolean nested, List<byte[]> encoded) {
+	static void encode(EncodeResult result, Object t, boolean nested) {
 		if(t == null) {
 			if(nested) {
-				encoded.add(NULL_ESCAPED_ARR);
-				return new EncodeResult(NULL_ESCAPED_ARR.length, -1);
+				result.add(NULL_ESCAPED_ARR);
 			}
 			else {
-				encoded.add(NULL_ARR);
-				return new EncodeResult(NULL_ARR.length, -1);
+				result.add(NULL_ARR);
 			}
 		}
-		if(t instanceof byte[])
-			return encode((byte[]) t, encoded);
-		if(t instanceof String)
-			return encode((String)t, encoded);
-		if(t instanceof BigInteger)
-			return encode((BigInteger)t, encoded);
-		if(t instanceof Float)
-			return encode((Float)t, encoded);
-		if(t instanceof Double)
-			return encode((Double)t, encoded);
-		if(t instanceof Boolean)
-			return encode((Boolean)t, encoded);
-		if(t instanceof UUID)
-			return encode((UUID)t, encoded);
-		if(t instanceof Number)
-			return encode(((Number)t).longValue(), encoded);
-		if(t instanceof Versionstamp)
-			return encode((Versionstamp)t, encoded);
-		if(t instanceof List<?>)
-			return encode((List<?>)t, encoded);
-		if(t instanceof Tuple)
-			return encode(((Tuple)t).getItems(), encoded);
-		throw new IllegalArgumentException("Unsupported data type: " + t.getClass().getName());
+		else if(t instanceof byte[])
+			encode(result, (byte[]) t);
+		else if(t instanceof String)
+			encode(result, (String)t);
+		else if(t instanceof BigInteger)
+			encode(result, (BigInteger)t);
+		else if(t instanceof Float)
+			encode(result, (Float)t);
+		else if(t instanceof Double)
+			encode(result, (Double)t);
+		else if(t instanceof Boolean)
+			encode(result, (Boolean)t);
+		else if(t instanceof UUID)
+			encode(result, (UUID)t);
+		else if(t instanceof Number)
+			encode(result, ((Number)t).longValue());
+		else if(t instanceof Versionstamp)
+			encode(result, (Versionstamp)t);
+		else if(t instanceof List<?>)
+			encode(result, (List<?>)t);
+		else if(t instanceof Tuple)
+			encode(result, ((Tuple)t).getItems());
+		else
+			throw new IllegalArgumentException("Unsupported data type: " + t.getClass().getName());
 	}
 
-	static EncodeResult encode(Object t, List<byte[]> encoded) {
-		return encode(t, false, encoded);
+	static void encode(EncodeResult result, Object t) {
+		encode(result, t, false);
 	}
 
-	static EncodeResult encode(byte[] bytes, List<byte[]> encoded) {
-		encoded.add(BYTES_ARR);
+	static void encode(EncodeResult result, byte[] bytes) {
 		byte[] escaped = ByteArrayUtil.replace(bytes, NULL_ARR, NULL_ESCAPED_ARR);
-		encoded.add(escaped);
-		encoded.add(new byte[] {nil});
-
-		//System.out.println("Joining bytes...");
-		return new EncodeResult(2 + escaped.length,-1);
+		result.add(BYTES_ARR).add(escaped).add(NULL_ARR);
 	}
 
-	static EncodeResult encode(String s, List<byte[]> encoded) {
-		encoded.add(STRING_ARR);
+	static void encode(EncodeResult result, String s) {
 		byte[] escaped = ByteArrayUtil.replace(s.getBytes(UTF8), NULL_ARR, NULL_ESCAPED_ARR);
-		encoded.add(escaped);
-		encoded.add(NULL_ARR);
-
-		//System.out.println("Joining string...");
-		return new EncodeResult(2 + escaped.length, -1);
+		result.add(STRING_ARR).add(escaped).add(NULL_ARR);
 	}
 
-	static EncodeResult encode(BigInteger i, List<byte[]> encoded) {
+	static void encode(EncodeResult result, BigInteger i) {
 		//System.out.println("Encoding integral " + i);
 		if(i.equals(BigInteger.ZERO)) {
-			encoded.add(new byte[]{INT_ZERO_CODE});
-			return new EncodeResult(1,-1);
+			result.add(new byte[]{INT_ZERO_CODE});
+			return;
 		}
 		byte[] bytes = i.toByteArray();
 		if(i.compareTo(BigInteger.ZERO) > 0) {
@@ -232,177 +277,171 @@ class TupleUtil {
 				if(length > 0xff) {
 					throw new IllegalArgumentException("BigInteger magnitude is too large (more than 255 bytes)");
 				}
-				byte[] result = new byte[length + 2];
-				result[0] = POS_INT_END;
-				result[1] = (byte)(length);
-				System.arraycopy(bytes, bytes.length - length, result, 2, length);
-				encoded.add(result);
-				return new EncodeResult(result.length, -1);
+				byte[] intBytes = new byte[length + 2];
+				intBytes[0] = POS_INT_END;
+				intBytes[1] = (byte)(length);
+				System.arraycopy(bytes, bytes.length - length, intBytes, 2, length);
+				result.add(intBytes);
 			}
-			int n = ByteArrayUtil.bisectLeft(size_limits, i);
-			assert n <= size_limits.length;
-			//byte[] bytes = ByteBuffer.allocate(8).order(ByteOrder.BIG_ENDIAN).putLong(i).array();
-			//System.out.println("  -- integral has 'n' of " + n + " and output bytes of " + bytes.length);
-			byte[] result = new byte[n+1];
-			result[0] = (byte)(INT_ZERO_CODE + n);
-			System.arraycopy(bytes, bytes.length - n, result, 1, n);
-			encoded.add(result);
-			return new EncodeResult(result.length, -1);
-		}
-		if(i.negate().compareTo(size_limits[size_limits.length-1]) > 0) {
-			int length = byteLength(i.negate().toByteArray());
-			if(length > 0xff) {
-				throw new IllegalArgumentException("BigInteger magnitude is too large (more than 255 bytes)");
+			else {
+				int n = ByteArrayUtil.bisectLeft(size_limits, i);
+				assert n <= size_limits.length;
+				//byte[] bytes = ByteBuffer.allocate(8).order(ByteOrder.BIG_ENDIAN).putLong(i).array();
+				//System.out.println("  -- integral has 'n' of " + n + " and output bytes of " + bytes.length);
+				byte[] intBytes = new byte[n + 1];
+				intBytes[0] = (byte) (INT_ZERO_CODE + n);
+				System.arraycopy(bytes, bytes.length - n, intBytes, 1, n);
+				result.add(intBytes);
 			}
-			BigInteger offset = BigInteger.ONE.shiftLeft(length*8).subtract(BigInteger.ONE);
-			byte[] adjusted = i.add(offset).toByteArray();
-			byte[] result = new byte[length + 2];
-			result[0] = NEG_INT_START;
-			result[1] = (byte)(length ^ 0xff);
-			if(adjusted.length >= length) {
-				System.arraycopy(adjusted, adjusted.length - length, result, 2, length);
-			} else {
-				Arrays.fill(result, 2, result.length - adjusted.length, (byte)0x00);
-				System.arraycopy(adjusted, 0, result, result.length - adjusted.length, adjusted.length);
+		}
+		else {
+			if(i.negate().compareTo(size_limits[size_limits.length - 1]) > 0) {
+				int length = byteLength(i.negate().toByteArray());
+				if (length > 0xff) {
+					throw new IllegalArgumentException("BigInteger magnitude is too large (more than 255 bytes)");
+				}
+				BigInteger offset = BigInteger.ONE.shiftLeft(length * 8).subtract(BigInteger.ONE);
+				byte[] adjusted = i.add(offset).toByteArray();
+				byte[] intBytes = new byte[length + 2];
+				intBytes[0] = NEG_INT_START;
+				intBytes[1] = (byte) (length ^ 0xff);
+				if (adjusted.length >= length) {
+					System.arraycopy(adjusted, adjusted.length - length, intBytes, 2, length);
+				} else {
+					Arrays.fill(intBytes, 2, intBytes.length - adjusted.length, (byte) 0x00);
+					System.arraycopy(adjusted, 0, intBytes, intBytes.length - adjusted.length, adjusted.length);
+				}
+				result.add(intBytes);
+			}
+			else {
+				int n = ByteArrayUtil.bisectLeft(size_limits, i.negate());
+
+				assert n >= 0 && n < size_limits.length; // can we do this? it seems to be required for the following statement
+
+				long maxv = size_limits[n].add(i).longValue();
+				byte[] adjustedBytes = ByteBuffer.allocate(8).order(ByteOrder.BIG_ENDIAN).putLong(maxv).array();
+				byte[] intBytes = new byte[n + 1];
+				intBytes[0] = (byte) (20 - n);
+				System.arraycopy(adjustedBytes, adjustedBytes.length - n, intBytes, 1, n);
+				result.add(intBytes);
 			}
-			encoded.add(result);
-			return new EncodeResult(result.length, -1);
 		}
-		int n = ByteArrayUtil.bisectLeft(size_limits, i.negate());
-
-		assert n >= 0 && n < size_limits.length; // can we do this? it seems to be required for the following statement
-
-		long maxv = size_limits[n].add(i).longValue();
-		byte[] adjustedBytes = ByteBuffer.allocate(8).order(ByteOrder.BIG_ENDIAN).putLong(maxv).array();
-		byte[] result = new byte[n+1];
-		result[0] = (byte)(20 - n);
-		System.arraycopy(adjustedBytes, adjustedBytes.length - n, result, 1, n);
-		encoded.add(result);
-		return new EncodeResult(result.length, -1);
 	}
 
-	static EncodeResult encode(Integer i, List<byte[]> encoded) {
-		return encode(i.longValue(), encoded);
+	static void encode(EncodeResult result, Integer i) {
+		encode(result, i.longValue());
 	}
 
-	static EncodeResult encode(long i, List<byte[]> encoded) {
-		return encode(BigInteger.valueOf(i), encoded);
+	static void encode(EncodeResult result, long i) {
+		encode(result, BigInteger.valueOf(i));
 	}
 
-	static EncodeResult encode(Float f, List<byte[]> encoded) {
-		byte[] result = ByteBuffer.allocate(5).order(ByteOrder.BIG_ENDIAN).put(FLOAT_CODE).putFloat(f).array();
-		floatingPointCoding(result, 1, true);
-		encoded.add(result);
-		return new EncodeResult(result.length, -1);
+	static void encode(EncodeResult result, Float f) {
+		byte[] floatBytes = ByteBuffer.allocate(5).order(ByteOrder.BIG_ENDIAN).put(FLOAT_CODE).putFloat(f).array();
+		floatingPointCoding(floatBytes, 1, true);
+		result.add(floatBytes);
 	}
 
-	static EncodeResult encode(Double d, List<byte[]> encoded) {
-		byte[] result = ByteBuffer.allocate(9).order(ByteOrder.BIG_ENDIAN).put(DOUBLE_CODE).putDouble(d).array();
-		floatingPointCoding(result, 1, true);
-		encoded.add(result);
-		return new EncodeResult(result.length, -1);
+	static void encode(EncodeResult result, Double d) {
+		byte[] doubleBytes = ByteBuffer.allocate(9).order(ByteOrder.BIG_ENDIAN).put(DOUBLE_CODE).putDouble(d).array();
+		floatingPointCoding(doubleBytes, 1, true);
+		result.add(doubleBytes);
 	}
 
-	static EncodeResult encode(Boolean b, List<byte[]> encoded) {
-		if (b) {
-			encoded.add(TRUE_ARR);
-		} else {
-			encoded.add(FALSE_ARR);
+	static void encode(EncodeResult result, Boolean b) {
+		if(b) {
+			result.add(TRUE_ARR);
+		}
+		else {
+			result.add(FALSE_ARR);
 		}
-		return new EncodeResult(1, -1);
 	}
 
-	static EncodeResult encode(UUID uuid, List<byte[]> encoded) {
-		byte[] result = ByteBuffer.allocate(17).put(UUID_CODE).order(ByteOrder.BIG_ENDIAN)
+	static void encode(EncodeResult result, UUID uuid) {
+		byte[] uuidBytes = ByteBuffer.allocate(17).put(UUID_CODE).order(ByteOrder.BIG_ENDIAN)
 				.putLong(uuid.getMostSignificantBits()).putLong(uuid.getLeastSignificantBits())
 				.array();
-		encoded.add(result);
-		return new EncodeResult(result.length, -1);
+		result.add(uuidBytes);
 	}
 
-	static EncodeResult encode(Versionstamp v, List<byte[]> encoded) {
-		encoded.add(VERSIONSTAMP_ARR);
-		encoded.add(v.getBytes());
-		return new EncodeResult(1 + Versionstamp.LENGTH, (v.isComplete() ? -1 : 1));
-	}
-
-	static EncodeResult encode(List<?> value, List<byte[]> encoded) {
-		int lenSoFar = 0;
-		int versionPos = -1;
-		encoded.add(NESTED_ARR);
-		for(Object t : value) {
-			EncodeResult childResult = encode(t, true, encoded);
-			if(childResult.versionPos > 0) {
-				if(versionPos > 0) {
-					throw new IllegalArgumentException("Multiple incomplete Versionstamps included in Tuple");
-				}
-				versionPos = lenSoFar + childResult.versionPos;
-			}
-			lenSoFar += childResult.totalLength;
+	static void encode(EncodeResult result, Versionstamp v) {
+		result.add(VERSIONSTAMP_ARR);
+		if(v.isComplete()) {
+			result.add(v.getBytes());
+		}
+		else {
+			result.add(v.getBytes(), result.totalLength);
 		}
-		encoded.add(NULL_ARR);
-		return new EncodeResult(lenSoFar + 2, (versionPos < 0 ? -1 : versionPos + 1));
 	}
 
-	static DecodeResult decode(byte[] rep, int pos, int last) {
+	static void encode(EncodeResult result, List<?> value) {
+		result.add(NESTED_ARR);
+		for(Object t : value) {
+			encode(result, t, true);
+		}
+		result.add(NULL_ARR);
+	}
+
+	static void decode(DecodeResult result, byte[] rep, int pos, int last) {
 		//System.out.println("Decoding '" + ArrayUtils.printable(rep) + "' at " + pos);
 
 		// SOMEDAY: codes over 127 will be a problem with the signed Java byte mess
 		int code = rep[pos];
 		int start = pos + 1;
 		if(code == nil) {
-			return new DecodeResult(start, null);
+			result.add(null, start);
 		}
-		if(code == BYTES_CODE) {
+		else if(code == BYTES_CODE) {
 			int end = ByteArrayUtil.findTerminator(rep, (byte)0x0, (byte)0xff, start, last);
 			//System.out.println("End of byte string: " + end);
 			byte[] range = ByteArrayUtil.replace(rep, start, end - start, NULL_ESCAPED_ARR, new byte[] { nil });
 			//System.out.println(" -> byte string contents: '" + ArrayUtils.printable(range) + "'");
-			return new DecodeResult(end + 1, range);
+			result.add(range, end + 1);
 		}
-		if(code == STRING_CODE) {
+		else if(code == STRING_CODE) {
 			int end = ByteArrayUtil.findTerminator(rep, (byte)0x0, (byte)0xff, start, last);
 			//System.out.println("End of UTF8 string: " + end);
 			byte[] stringBytes = ByteArrayUtil.replace(rep, start, end - start, NULL_ESCAPED_ARR, new byte[] { nil });
 			String str = new String(stringBytes, UTF8);
 			//System.out.println(" -> UTF8 string contents: '" + str + "'");
-			return new DecodeResult(end + 1, str);
+			result.add(str, end + 1);
 		}
-		if(code == FLOAT_CODE) {
+		else if(code == FLOAT_CODE) {
 			byte[] resBytes = Arrays.copyOfRange(rep, start, start+4);
 			floatingPointCoding(resBytes, 0, false);
 			float res = ByteBuffer.wrap(resBytes).order(ByteOrder.BIG_ENDIAN).getFloat();
-			return new DecodeResult(start + 4, res);
+			result.add(res, start + Float.BYTES);
 		}
-		if(code == DOUBLE_CODE) {
+		else if(code == DOUBLE_CODE) {
 			byte[] resBytes = Arrays.copyOfRange(rep, start, start+8);
 			floatingPointCoding(resBytes, 0, false);
 			double res = ByteBuffer.wrap(resBytes).order(ByteOrder.BIG_ENDIAN).getDouble();
-			return new DecodeResult(start + 8, res);
+			result.add(res, start + Double.BYTES);
 		}
-		if(code == FALSE_CODE) {
-			return new DecodeResult(start, false);
+		else if(code == FALSE_CODE) {
+			result.add(false, start);
 		}
-		if(code == TRUE_CODE) {
-			return new DecodeResult(start, true);
+		else if(code == TRUE_CODE) {
+			result.add(true, start);
 		}
-		if(code == UUID_CODE) {
+		else if(code == UUID_CODE) {
 			ByteBuffer bb = ByteBuffer.wrap(rep, start, 16).order(ByteOrder.BIG_ENDIAN);
 			long msb = bb.getLong();
 			long lsb = bb.getLong();
-			return new DecodeResult(start + 16, new UUID(msb, lsb));
+			result.add(new UUID(msb, lsb), start + 16);
 		}
-		if(code == POS_INT_END) {
+		else if(code == POS_INT_END) {
 			int n = rep[start] & 0xff;
-			return new DecodeResult(start + n + 1, new BigInteger(ByteArrayUtil.join(new byte[]{0x00}, Arrays.copyOfRange(rep, start+1, start+n+1))));
+			BigInteger res = new BigInteger(ByteArrayUtil.join(new byte[]{0x00}, Arrays.copyOfRange(rep, start+1, start+n+1)));
+			result.add(res, start + n + 1);
 		}
-		if(code == NEG_INT_START) {
+		else if(code == NEG_INT_START) {
 			int n = (rep[start] ^ 0xff) & 0xff;
 			BigInteger origValue = new BigInteger(ByteArrayUtil.join(new byte[]{0x00}, Arrays.copyOfRange(rep, start+1, start+n+1)));
 			BigInteger offset = BigInteger.ONE.shiftLeft(n*8).subtract(BigInteger.ONE);
-			return new DecodeResult(start + n + 1, origValue.subtract(offset));
+			result.add(origValue.subtract(offset), start + n + 1);
 		}
-		if(code > NEG_INT_START && code < POS_INT_END) {
+		else if(code > NEG_INT_START && code < POS_INT_END) {
 			// decode a long
 			byte[] longBytes = new byte[9];
 			boolean upper = code >= INT_ZERO_CODE;
@@ -426,36 +465,37 @@ class TupleUtil {
 				val.compareTo(BigInteger.valueOf(Long.MAX_VALUE))>0) {
 				// This can occur if the thing can be represented with 8 bytes but not
 				// the right sign information.
-				return new DecodeResult(end, val);
+				result.add(val, end);
+			} else {
+				result.add(val.longValue(), end);
 			}
-			return new DecodeResult(end, val.longValue());
 		}
-		if(code == VERSIONSTAMP_CODE) {
-			return new DecodeResult(
-					start + Versionstamp.LENGTH,
-					Versionstamp.fromBytes(Arrays.copyOfRange(rep, start, start + Versionstamp.LENGTH)));
+		else if(code == VERSIONSTAMP_CODE) {
+			Versionstamp val = Versionstamp.fromBytes(Arrays.copyOfRange(rep, start, start + Versionstamp.LENGTH));
+			result.add(val, start + Versionstamp.LENGTH);
 		}
-		if(code == NESTED_CODE) {
-			List<Object> items = new LinkedList<Object>();
+		else if(code == NESTED_CODE) {
+			DecodeResult subResult = new DecodeResult();
 			int endPos = start;
 			while(endPos < rep.length) {
 				if(rep[endPos] == nil) {
 					if(endPos + 1 < rep.length && rep[endPos+1] == (byte)0xff) {
-						items.add(null);
+						subResult.add(null, endPos + 2);
 						endPos += 2;
 					} else {
 						endPos += 1;
 						break;
 					}
 				} else {
-					DecodeResult subResult = decode(rep, endPos, last);
-					items.add(subResult.o);
+					decode(subResult, rep, endPos, last);
 					endPos = subResult.end;
 				}
 			}
-			return new DecodeResult(endPos, items);
+			result.add(subResult.values, endPos);
+		}
+		else {
+			throw new IllegalArgumentException("Unknown tuple data type " + code + " at index " + pos);
 		}
-		throw new IllegalArgumentException("Unknown tuple data type " + code + " at index " + pos);
 	}
 
 	static int compareSignedBigEndian(byte[] arr1, byte[] arr2) {
@@ -539,62 +579,51 @@ class TupleUtil {
 	}
 
 	static List<Object> unpack(byte[] bytes, int start, int length) {
-		List<Object> items = new LinkedList<>();
+		DecodeResult decodeResult = new DecodeResult();
 		int pos = start;
 		int end = start + length;
 		while(pos < end) {
-			DecodeResult decoded = decode(bytes, pos, end);
-			items.add(decoded.o);
-			pos = decoded.end;
+			decode(decodeResult, bytes, pos, end);
+			pos = decodeResult.end;
 		}
-		return items;
+		return decodeResult.values;
 	}
 
-	static EncodeResult encodeAll(List<Object> items, byte[] prefix, List<byte[]> encoded) {
+	static void encodeAll(EncodeResult result, List<Object> items, byte[] prefix) {
 		if(prefix != null) {
-			encoded.add(prefix);
+			result.add(prefix);
 		}
-		int lenSoFar = (prefix == null) ? 0 : prefix.length;
-		int versionPos = -1;
 		for(Object t : items) {
-			EncodeResult result = encode(t, encoded);
-			if(result.versionPos > 0) {
-				if(versionPos > 0) {
-					throw new IllegalArgumentException("Multiple incomplete Versionstamps included in Tuple");
-				}
-				versionPos = result.versionPos + lenSoFar;
-			}
-			lenSoFar += result.totalLength;
+			encode(result, t);
 		}
 		//System.out.println("Joining whole tuple...");
-		return new EncodeResult(lenSoFar, versionPos);
 	}
 
 	static byte[] pack(List<Object> items, byte[] prefix) {
-		List<byte[]> encoded = new ArrayList<>(2 * items.size() + (prefix == null ? 0 : 1));
-		EncodeResult result = encodeAll(items, prefix, encoded);
-		if(result.versionPos > 0) {
-			throw new IllegalArgumentException("Incomplete Versionstamp included in vanilla tuple pack");
+		EncodeResult result = new EncodeResult(2 * items.size() + (prefix == null ? 0 : 1));
+		encodeAll(result, items, prefix);
+		if(result.versionPos >= 0) {
+			throw new IllegalArgumentException("Incomplete Versionstamp included in vanilla tuple packInternal");
 		} else {
-			return ByteArrayUtil.join(null, encoded);
+			return ByteArrayUtil.join(null, result.encodedValues);
 		}
 	}
 
 	static byte[] packWithVersionstamp(List<Object> items, byte[] prefix) {
-		List<byte[]> encoded = new ArrayList<>(2 * items.size() + (prefix == null ? 1 : 2));
-		EncodeResult result = encodeAll(items, prefix, encoded);
+		EncodeResult result = new EncodeResult(2 * items.size() + (prefix == null ? 1 : 2));
+		encodeAll(result, items, prefix);
 		if(result.versionPos < 0) {
-			throw new IllegalArgumentException("No incomplete Versionstamp included in tuple pack with versionstamp");
+			throw new IllegalArgumentException("No incomplete Versionstamp included in tuple packInternal with versionstamp");
 		} else {
 			if(result.versionPos > 0xffff) {
 				throw new IllegalArgumentException("Tuple has incomplete version at position " + result.versionPos + " which is greater than the maximum " + 0xffff);
 			}
 			if (FDB.instance().getAPIVersion() < 520) {
-				encoded.add(ByteBuffer.allocate(2).order(ByteOrder.LITTLE_ENDIAN).putShort((short)result.versionPos).array());
+				result.add(ByteBuffer.allocate(Short.BYTES).order(ByteOrder.LITTLE_ENDIAN).putShort((short)result.versionPos).array());
 			} else {
-				encoded.add(ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN).putInt(result.versionPos).array());
+				result.add(ByteBuffer.allocate(Integer.BYTES).order(ByteOrder.LITTLE_ENDIAN).putInt(result.versionPos).array());
 			}
-			return ByteArrayUtil.join(null, encoded);
+			return ByteArrayUtil.join(null, result.encodedValues);
 		}
 	}
 
@@ -617,7 +646,10 @@ class TupleUtil {
 	public static void main(String[] args) {
 		try {
 			byte[] bytes = pack(Collections.singletonList(4), null);
-			assert 4 == (Integer)(decode(bytes, 0, bytes.length).o);
+			DecodeResult result = new DecodeResult();
+			decode(result, bytes, 0, bytes.length);
+			int val = (int)result.values.get(0);
+			assert 4 == val;
 		} catch (Exception e) {
 			e.printStackTrace();
 			System.out.println("Error " + e.getMessage());
@@ -625,7 +657,9 @@ class TupleUtil {
 
 		try {
 			byte[] bytes = pack(Collections.singletonList("\u021Aest \u0218tring"), null);
-			String string = (String)(decode(bytes, 0, bytes.length).o);
+			DecodeResult result = new DecodeResult();
+			decode(result, bytes, 0, bytes.length);
+			String string = (String)result.values.get(0);
 			System.out.println("contents -> " + string);
 			assert "\u021Aest \u0218tring".equals(string);
 		} catch (Exception e) {
@@ -635,7 +669,7 @@ class TupleUtil {
 
 		/*Object[] a = new Object[] { "\u0000a", -2, "b\u0001", 12345, ""};
 		List<Object> o = Arrays.asList(a);
-		byte[] packed = pack( o, null );
+		byte[] packed = packInternal( o, null );
 		System.out.println("packed length: " + packed.length);
 		o = unpack( packed, 0, packed.length );
 		System.out.println("unpacked elements: " + o);
diff --git a/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java b/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java
index df9ccf6d45..dada5131d8 100644
--- a/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java
+++ b/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java
@@ -25,17 +25,15 @@ public class TuplePerformanceTest {
 
 	public Tuple createTuple(int length) {
 		List<Object> values = new ArrayList<>(length);
-		for(int i = 0; i < length; i++) {
+		for (int i = 0; i < length; i++) {
 			double choice = r.nextDouble();
-			if(choice < 0.1) {
+			if (choice < 0.1) {
 				values.add(null);
-			}
-			else if(choice < 0.2) {
+			} else if (choice < 0.2) {
 				byte[] bytes = new byte[r.nextInt(20)];
 				r.nextBytes(bytes);
 				values.add(bytes);
-			}
-			else if(choice < 0.3) {
+			} else if (choice < 0.3) {
 				char[] chars = new char[r.nextInt(20)];
 				for (int j = 0; j < chars.length; j++) {
 					chars[j] = (char)('a' + r.nextInt(26));
@@ -171,7 +169,7 @@ public class TuplePerformanceTest {
 	}
 
 	public static void main(String[] args) {
-		TuplePerformanceTest tester = new TuplePerformanceTest(new Random(), 100_000, 10_000);
+		TuplePerformanceTest tester = new TuplePerformanceTest(new Random(), 100_000, 10_000_000);
 		tester.run();
 	}
 }

From e6ce0ebd2717c1223c0f2aac9e37581e14a14516 Mon Sep 17 00:00:00 2001
From: Alec Grieser <agrieser@apple.com>
Date: Sun, 24 Feb 2019 20:49:10 -0800
Subject: [PATCH 18/47] improve tuple performance tester for more types and add
 serialization check in TupleTest

---
 .../test/TuplePerformanceTest.java            | 76 ++++++++++++--
 .../apple/foundationdb/test/TupleTest.java    | 98 ++++++++++++++++++-
 2 files changed, 162 insertions(+), 12 deletions(-)

diff --git a/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java b/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java
index dada5131d8..cf79ff41a9 100644
--- a/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java
+++ b/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java
@@ -13,30 +13,40 @@ import com.apple.foundationdb.tuple.Versionstamp;
 
 public class TuplePerformanceTest {
 
+	private enum GeneratedTypes {
+		ALL,
+		LONG,
+		FLOATING_POINT
+	}
+
 	private final Random r;
 	private final int ignoreIterations;
 	private final int iterations;
+	private final GeneratedTypes generatedTypes;
 
-	public TuplePerformanceTest(Random r, int ignoreIterations, int iterations) {
+	public TuplePerformanceTest(Random r, int ignoreIterations, int iterations, GeneratedTypes generatedTypes) {
 		this.r = r;
 		this.ignoreIterations = ignoreIterations;
 		this.iterations = iterations;
+		this.generatedTypes = generatedTypes;
 	}
 
-	public Tuple createTuple(int length) {
+	public Tuple createMultiTypeTuple(int length) {
 		List<Object> values = new ArrayList<>(length);
-		for (int i = 0; i < length; i++) {
+		for(int i = 0; i < length; i++) {
 			double choice = r.nextDouble();
-			if (choice < 0.1) {
+			if(choice < 0.1) {
 				values.add(null);
-			} else if (choice < 0.2) {
+			}
+			else if(choice < 0.2) {
 				byte[] bytes = new byte[r.nextInt(20)];
 				r.nextBytes(bytes);
 				values.add(bytes);
-			} else if (choice < 0.3) {
+			}
+			else if(choice < 0.3) {
 				char[] chars = new char[r.nextInt(20)];
 				for (int j = 0; j < chars.length; j++) {
-					chars[j] = (char)('a' + r.nextInt(26));
+					chars[j] = (char) ('a' + r.nextInt(26));
 				}
 				values.add(new String(chars));
 			}
@@ -67,7 +77,55 @@ public class TuplePerformanceTest {
 				values.add(nested);
 			}
 		}
-		return Tuple.from(values);
+		return Tuple.fromItems(values);
+	}
+
+	public Tuple createLongsTuple(int length) {
+		List<Object> values = new ArrayList<>(length);
+		for(int i = 0; i < length; i++) {
+			int byteLength = r.nextInt(Long.BYTES + 1);
+			long val = 0L;
+			for(int x = 0; x < byteLength; x++) {
+				int nextBytes = r.nextInt(256);
+				val = (val << 8) + nextBytes;
+			}
+			values.add(val);
+		}
+		return Tuple.fromItems(values);
+	}
+
+	public Tuple createFloatingPointTuple(int length) {
+		List<Object> values = new ArrayList<>(length);
+		for(int i = 0; i < length; i++) {
+			double choice = r.nextDouble();
+			if(choice < 0.40) {
+				values.add(r.nextFloat());
+			}
+			else if(choice < 0.80) {
+				values.add(r.nextDouble());
+			}
+			// These last two are more likely to produce NaN values
+			else if(choice < 0.90) {
+				values.add(Float.intBitsToFloat(r.nextInt()));
+			}
+			else {
+				values.add(Double.longBitsToDouble(r.nextLong()));
+			}
+		}
+		return Tuple.fromItems(values);
+	}
+
+	public Tuple createTuple(int length) {
+		switch (generatedTypes) {
+			case ALL:
+				return createMultiTypeTuple(length);
+			case LONG:
+				return createLongsTuple(length);
+			case FLOATING_POINT:
+				return createFloatingPointTuple(length);
+			default:
+				throw new IllegalStateException("unknown generated types " + generatedTypes);
+		}
 	}
 
 	public void run() {
@@ -169,7 +227,7 @@ public class TuplePerformanceTest {
 	}
 
 	public static void main(String[] args) {
-		TuplePerformanceTest tester = new TuplePerformanceTest(new Random(), 100_000, 10_000_000);
+		TuplePerformanceTest tester = new TuplePerformanceTest(new Random(), 100_000, 10_000_000, GeneratedTypes.ALL);
 		tester.run();
 	}
 }
diff --git a/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java b/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java
index ad9297e02d..528c11f93a 100644
--- a/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java
+++ b/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java
@@ -20,24 +20,116 @@
 
 package com.apple.foundationdb.test;
 
-import com.apple.foundationdb.Database;
-import com.apple.foundationdb.FDB;
 import com.apple.foundationdb.TransactionContext;
+import com.apple.foundationdb.tuple.ByteArrayUtil;
 import com.apple.foundationdb.tuple.Tuple;
 
+import java.math.BigInteger;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+
 public class TupleTest {
+	private static final byte FF = (byte)0xff;
+
 	public static void main(String[] args) throws InterruptedException {
 		final int reps = 1000;
 		try {
-			FDB fdb = FDB.selectAPIVersion(610);
+			// FDB fdb = FDB.selectAPIVersion(610);
+			serializedForms();
+			/*
 			try(Database db = fdb.open()) {
 				runTests(reps, db);
 			}
+			*/
 		} catch(Throwable t) {
 			t.printStackTrace();
 		}
 	}
 
+	private static class TupleSerialization {
+		private final Tuple tuple;
+		private final byte[] serialization;
+
+		TupleSerialization(Tuple tuple, byte[] serialization) {
+			this.tuple = tuple;
+			this.serialization = serialization;
+		}
+
+		static void addAll(List<TupleSerialization> list, Object... args) {
+			for(int i = 0; i < args.length; i += 2) {
+				TupleSerialization serialization = new TupleSerialization((Tuple)args[i], (byte[])args[i + 1]);
+				list.add(serialization);
+			}
+		}
+	}
+
+	private static void serializedForms() {
+		List<TupleSerialization> serializations = new ArrayList<>();
+		TupleSerialization.addAll(serializations,
+				Tuple.from(0L), new byte[]{0x14},
+				Tuple.from(BigInteger.ZERO), new byte[]{0x14},
+				Tuple.from(1L), new byte[]{0x15, 0x01},
+				Tuple.from(BigInteger.ONE), new byte[]{0x15, 0x01},
+				Tuple.from(-1L), new byte[]{0x13, FF - 1},
+				Tuple.from(BigInteger.ONE.negate()), new byte[]{0x13, FF - 1},
+				Tuple.from(255L), new byte[]{0x15, FF},
+				Tuple.from(BigInteger.valueOf(255)), new byte[]{0x15, FF},
+				Tuple.from(-255L), new byte[]{0x13, 0x00},
+				Tuple.from(BigInteger.valueOf(-255)), new byte[]{0x13, 0x00},
+				Tuple.from(256L), new byte[]{0x16, 0x01, 0x00},
+				Tuple.from(BigInteger.valueOf(256)), new byte[]{0x16, 0x01, 0x00},
+				Tuple.from(-256L), new byte[]{0x12, FF - 1, FF},
+				Tuple.from(BigInteger.valueOf(-256)), new byte[]{0x12, FF - 1, FF},
+				Tuple.from(65536), new byte[]{0x17, 0x01, 0x00, 0x00},
+				Tuple.from(-65536), new byte[]{0x11, FF - 1, FF, FF},
+				Tuple.from(Long.MAX_VALUE), new byte[]{0x1C, 0x7f, FF, FF, FF, FF, FF, FF, FF},
+				Tuple.from(BigInteger.valueOf(Long.MAX_VALUE)), new byte[]{0x1C, 0x7f, FF, FF, FF, FF, FF, FF, FF},
+				Tuple.from(BigInteger.valueOf(Long.MAX_VALUE).add(BigInteger.ONE)), new byte[]{0x1C, (byte)0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+				Tuple.from(BigInteger.ONE.shiftLeft(64).subtract(BigInteger.ONE)), new byte[]{0x1C, FF, FF, FF, FF, FF, FF, FF, FF},
+				Tuple.from(BigInteger.ONE.shiftLeft(64)), new byte[]{0x1D, 0x09, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+				Tuple.from(-((1L << 32) - 1)), new byte[]{0x10, 0x00, 0x00, 0x00, 0x00},
+				Tuple.from(BigInteger.ONE.shiftLeft(32).subtract(BigInteger.ONE).negate()), new byte[]{0x10, 0x00, 0x00, 0x00, 0x00},
+				Tuple.from(Long.MIN_VALUE + 2), new byte[]{0x0C, (byte)0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
+				Tuple.from(Long.MIN_VALUE + 1), new byte[]{0x0C, (byte)0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+				Tuple.from(BigInteger.valueOf(Long.MIN_VALUE).add(BigInteger.ONE)), new byte[]{0x0C, (byte)0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+				Tuple.from(Long.MIN_VALUE), new byte[]{0x0C, 0x7f, FF, FF, FF, FF, FF, FF, FF},
+				Tuple.from(BigInteger.valueOf(Long.MIN_VALUE)), new byte[]{0x0C, 0x7f, FF, FF, FF, FF, FF, FF, FF},
+				Tuple.from(BigInteger.valueOf(Long.MIN_VALUE).subtract(BigInteger.ONE)), new byte[]{0x0C, 0x7f, FF, FF, FF, FF, FF, FF, FF - 1},
+				Tuple.from(BigInteger.ONE.shiftLeft(64).subtract(BigInteger.ONE).negate()), new byte[]{0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+				Tuple.from(3.14f), new byte[]{0x20, (byte)0xc0, 0x48, (byte)0xf5, (byte)0xc3},
+				Tuple.from(-3.14f), new byte[]{0x20, (byte)0x3f, (byte)0xb7, (byte)0x0a, (byte)0x3c},
+				Tuple.from(3.14), new byte[]{0x21, (byte)0xc0, (byte)0x09, (byte)0x1e, (byte)0xb8, (byte)0x51, (byte)0xeb, (byte)0x85, (byte)0x1f},
+				Tuple.from(-3.14), new byte[]{0x21, (byte)0x3f, (byte)0xf6, (byte)0xe1, (byte)0x47, (byte)0xae, (byte)0x14, (byte)0x7a, (byte)0xe0},
+				Tuple.from(0.0f), new byte[]{0x20, (byte)0x80, 0x00, 0x00, 0x00},
+				Tuple.from(-0.0f), new byte[]{0x20, 0x7f, FF, FF, FF},
+				Tuple.from(0.0), new byte[]{0x21, (byte)0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+				Tuple.from(-0.0), new byte[]{0x21, 0x7f, FF, FF, FF, FF, FF, FF, FF},
+				Tuple.from(Float.POSITIVE_INFINITY), new byte[]{0x20, FF, (byte)0x80, 0x00, 0x00},
+				Tuple.from(Float.NEGATIVE_INFINITY), new byte[]{0x20, 0x00, 0x7f, FF, FF},
+				Tuple.from(Double.POSITIVE_INFINITY), new byte[]{0x21, FF, (byte)0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+				Tuple.from(Double.NEGATIVE_INFINITY), new byte[]{0x21, 0x00, 0x0f, FF, FF, FF, FF, FF, FF},
+				Tuple.from(Float.intBitsToFloat(Integer.MAX_VALUE)), new byte[]{0x20, FF, FF, FF, FF},
+				Tuple.from(Double.longBitsToDouble(Long.MAX_VALUE)), new byte[]{0x21, FF, FF, FF, FF, FF, FF, FF, FF},
+				Tuple.from(Float.intBitsToFloat(~0)), new byte[]{0x20, 0x00, 0x00, 0x00, 0x00},
+				Tuple.from(Double.longBitsToDouble(~0L)), new byte[]{0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
+		);
+
+		for(TupleSerialization serialization : serializations) {
+			System.out.println("Packing " + serialization.tuple + " (expecting: " + ByteArrayUtil.printable(serialization.serialization) + ")");
+			if(!Arrays.equals(serialization.tuple.pack(), serialization.serialization)) {
+				throw new RuntimeException("Tuple " + serialization.tuple + " has serialization " + ByteArrayUtil.printable(serialization.tuple.pack()) +
+						" which does not match expected serialization " + ByteArrayUtil.printable(serialization.serialization));
+			}
+			if(!Objects.equals(serialization.tuple, Tuple.fromBytes(serialization.serialization))) {
+				throw new RuntimeException("Tuple " + serialization.tuple + " does not match deserialization " + Tuple.fromBytes(serialization.serialization) +
+						" which comes from serialization " + ByteArrayUtil.printable(serialization.serialization));
+			}
+		}
+		System.out.println("All tuples had matching serializations");
+	}
+
 	private static void runTests(final int reps, TransactionContext db) {
 		System.out.println("Running tests...");
 		long start = System.currentTimeMillis();

From e9771364d797133444623ec4ebf7ce0bce5d517e Mon Sep 17 00:00:00 2001
From: Alec Grieser <agrieser@apple.com>
Date: Sun, 24 Feb 2019 20:52:28 -0800
Subject: [PATCH 19/47] various Java tuple performance tweaks

These include:

* Memoizing packed representations within Tuples
* Using longs instead of BigIntegers if possible
* As much as possible sticking to manipulating primitive types when using floats/doubles
---
 .../foundationdb/tuple/ByteArrayUtil.java     |   3 +-
 .../com/apple/foundationdb/tuple/Tuple.java   |  14 +-
 .../apple/foundationdb/tuple/TupleUtil.java   | 446 ++++++++++--------
 3 files changed, 252 insertions(+), 211 deletions(-)

diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java b/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java
index 247ae78fb0..eeea3e1799 100644
--- a/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java
+++ b/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java
@@ -229,8 +229,7 @@ public class ByteArrayUtil {
 		int n = Arrays.binarySearch(arr, i);
 		if(n >= 0)
 			return n;
-		int ip = (n + 1) * -1;
-		return ip;
+		return (n + 1) * -1;
 	}
 
 	/**
diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java b/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java
index 7b14632452..b3761d8c5d 100644
--- a/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java
+++ b/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java
@@ -824,9 +824,12 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	}
 
 	/**
-	 * Get the number of bytes in the packed representation of this {@code Tuple}.
+	 * Get the number of bytes in the packed representation of this {@code Tuple}. Note that at the
+	 *  moment, this number is calculated by packing the {@code Tuple} and looking at its size. This method
+	 *  will memoize the result, however, so asking the same {@code Tuple} for its size multiple times
+	 *  is a fast operation.
 	 *
-	 * @return
+	 * @return the number of bytes in the packed representation of this {@code Tuple}
 	 */
 	public int getPackedSize() {
 		byte[] p = packMaybeVersionstamp(null);
@@ -847,7 +850,12 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 */
 	@Override
 	public int compareTo(Tuple t) {
-		return comparator.compare(elements, t.elements);
+		if(packed != null && t.packed != null) {
+			return ByteArrayUtil.compareUnsigned(packed, t.packed);
+		}
+		else {
+			return comparator.compare(elements, t.elements);
+		}
 	}
 
 	/**
diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java
index f25828f47d..5b220d2c90 100644
--- a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java
+++ b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java
@@ -36,8 +36,10 @@ import com.apple.foundationdb.FDB;
 
 class TupleUtil {
 	private static final byte nil = 0x00;
-	private static final BigInteger[] size_limits;
+	private static final BigInteger[] BIG_INT_SIZE_LIMITS;
 	private static final Charset UTF8;
+	private static final BigInteger LONG_MIN_VALUE = BigInteger.valueOf(Long.MIN_VALUE);
+	private static final BigInteger LONG_MAX_VALUE = BigInteger.valueOf(Long.MAX_VALUE);
 	private static final IterableComparator iterableComparator;
 
 	private static final byte BYTES_CODE            = 0x01;
@@ -55,27 +57,28 @@ class TupleUtil {
 
 	private static final byte[] NULL_ARR           = new byte[] {nil};
 	private static final byte[] NULL_ESCAPED_ARR   = new byte[] {nil, (byte)0xFF};
-	private static final byte[] BYTES_ARR          = new byte[]{0x01};
-	private static final byte[] STRING_ARR         = new byte[]{0x02};
-	private static final byte[] NESTED_ARR         = new byte[]{0x05};
-	private static final byte[] FALSE_ARR          = new byte[]{0x26};
-	private static final byte[] TRUE_ARR           = new byte[]{0x27};
-	private static final byte[] VERSIONSTAMP_ARR   = new byte[]{0x33};
+	private static final byte[] BYTES_ARR          = new byte[]{BYTES_CODE};
+	private static final byte[] STRING_ARR         = new byte[]{STRING_CODE};
+	private static final byte[] NESTED_ARR         = new byte[]{NESTED_CODE};
+	private static final byte[] INT_ZERO_ARR       = new byte[]{INT_ZERO_CODE};
+	private static final byte[] FALSE_ARR          = new byte[]{FALSE_CODE};
+	private static final byte[] TRUE_ARR           = new byte[]{TRUE_CODE};
+	private static final byte[] VERSIONSTAMP_ARR   = new byte[]{VERSIONSTAMP_CODE};
 
 	static {
-		size_limits = new BigInteger[9];
-		for(int i = 0; i < 9; i++) {
-			size_limits[i] = (BigInteger.ONE).shiftLeft(i * 8).subtract(BigInteger.ONE);
+		BIG_INT_SIZE_LIMITS = new BigInteger[9];
+		for(int i = 0; i < BIG_INT_SIZE_LIMITS.length; i++) {
+			BIG_INT_SIZE_LIMITS[i] = (BigInteger.ONE).shiftLeft(i * 8).subtract(BigInteger.ONE);
 		}
 		UTF8 = Charset.forName("UTF-8");
 		iterableComparator = new IterableComparator();
 	}
 
-	static class DecodeResult {
+	static class DecodeState {
 		final List<Object> values;
 		int end;
 
-		DecodeResult() {
+		DecodeState() {
 			values = new ArrayList<>();
 			end = 0;
 		}
@@ -86,18 +89,18 @@ class TupleUtil {
 		}
 	}
 
-	static class EncodeResult {
+	static class EncodeState {
 		final List<byte[]> encodedValues;
 		int totalLength;
 		int versionPos;
 
-		EncodeResult(int capacity) {
+		EncodeState(int capacity) {
 			this.encodedValues = new ArrayList<>(capacity);
 			totalLength = 0;
 			versionPos = -1;
 		}
 
-		EncodeResult add(byte[] encoded, int versionPos) {
+		EncodeState add(byte[] encoded, int versionPos) {
 			if(versionPos >= 0 && this.versionPos >= 0) {
 				throw new IllegalArgumentException("Multiple incomplete Versionstamps included in Tuple");
 			}
@@ -107,7 +110,7 @@ class TupleUtil {
 			return this;
 		}
 
-		EncodeResult add(byte[] encoded) {
+		EncodeState add(byte[] encoded) {
 			encodedValues.add(encoded);
 			totalLength += encoded.length;
 			return this;
@@ -122,37 +125,37 @@ class TupleUtil {
 		return 0;
 	}
 
-	/**
-	 * Takes the Big-Endian byte representation of a floating point number and adjusts
-	 * it so that it sorts correctly. For encoding, if the sign bit is 1 (the number
-	 * is negative), then we need to flip all of the bits; otherwise, just flip the
-	 * sign bit. For decoding, if the sign bit is 0 (the number is negative), then
-	 * we also need to flip all of the bits; otherwise, just flip the sign bit.
-	 * This will mutate in place the given array.
-	 *
-	 * @param bytes Big-Endian IEEE encoding of a floating point number
-	 * @param start the (zero-indexed) first byte in the array to mutate
-	 * @param encode <code>true</code> if we encoding the float and <code>false</code> if we are decoding
-	 * @return the encoded {@code byte[]}
-	 */
-	static byte[] floatingPointCoding(byte[] bytes, int start, boolean encode) {
-		if(encode && (bytes[start] & (byte)0x80) != (byte)0x00) {
-			for(int i = start; i < bytes.length; i++) {
-				bytes[i] = (byte) (bytes[i] ^ 0xff);
-			}
-		} else if(!encode && (bytes[start] & (byte)0x80) != (byte)0x80) {
-			for(int i = start; i < bytes.length; i++) {
-				bytes[i] = (byte) (bytes[i] ^ 0xff);
-			}
-		} else {
-			bytes[start] = (byte) (0x80 ^ bytes[start]);
-		}
+	// These four functions are for adjusting the encoding of floating point numbers so
+	// that when their byte representation is written out in big-endian order, unsigned
+	// lexicographic byte comparison orders the values in the same way as the semantic
+	// ordering of the values. This means flipping all bits for negative values and flipping
+	// only the most-significant bit (i.e., the sign bit as all values in Java are signed)
+	// in the case that the number is positive. For these purposes, 0.0 is positive and -0.0
+	// is negative.
 
-		return bytes;
+	static int encodeFloatBits(float f) {
+		int intBits = Float.floatToRawIntBits(f);
+		return (intBits < 0) ? (~intBits) : (intBits ^ Integer.MIN_VALUE);
 	}
 
-	static byte[] join(List<byte[]> items) {
-		return ByteArrayUtil.join(null, items);
+	static long encodeDoubleBits(double d) {
+		long longBits = Double.doubleToRawLongBits(d);
+		return (longBits < 0L) ? (~longBits) : (longBits ^ Long.MIN_VALUE);
+	}
+
+	static float decodeFloatBits(int i) {
+		int origBits = (i >= 0) ? (~i) : (i ^ Integer.MIN_VALUE);
+		return Float.intBitsToFloat(origBits);
+	}
+
+	static double decodeDoubleBits(long l) {
+		long origBits = (l >= 0) ? (~l) : (l ^ Long.MIN_VALUE);
+		return Double.longBitsToDouble(origBits);
+	}
+
+	// Get the number of bytes in the representation of a long.
+	static int byteCount(long i) {
+		return (Long.SIZE + 7 - Long.numberOfLeadingZeros(i >= 0 ? i : -i)) / 8;
 	}
 
 	private static void adjustVersionPosition300(byte[] packed, int delta) {
@@ -215,64 +218,64 @@ class TupleUtil {
 		throw new IllegalArgumentException("Unsupported data type: " + o.getClass().getName());
 	}
 
-	static void encode(EncodeResult result, Object t, boolean nested) {
+	static void encode(EncodeState state, Object t, boolean nested) {
 		if(t == null) {
 			if(nested) {
-				result.add(NULL_ESCAPED_ARR);
+				state.add(NULL_ESCAPED_ARR);
 			}
 			else {
-				result.add(NULL_ARR);
+				state.add(NULL_ARR);
 			}
 		}
 		else if(t instanceof byte[])
-			encode(result, (byte[]) t);
+			encode(state, (byte[]) t);
 		else if(t instanceof String)
-			encode(result, (String)t);
-		else if(t instanceof BigInteger)
-			encode(result, (BigInteger)t);
+			encode(state, (String)t);
 		else if(t instanceof Float)
-			encode(result, (Float)t);
+			encode(state, (Float)t);
 		else if(t instanceof Double)
-			encode(result, (Double)t);
+			encode(state, (Double)t);
 		else if(t instanceof Boolean)
-			encode(result, (Boolean)t);
+			encode(state, (Boolean)t);
 		else if(t instanceof UUID)
-			encode(result, (UUID)t);
+			encode(state, (UUID)t);
+		else if(t instanceof BigInteger)
+			encode(state, (BigInteger)t);
 		else if(t instanceof Number)
-			encode(result, ((Number)t).longValue());
+			encode(state, ((Number)t).longValue());
 		else if(t instanceof Versionstamp)
-			encode(result, (Versionstamp)t);
+			encode(state, (Versionstamp)t);
 		else if(t instanceof List<?>)
-			encode(result, (List<?>)t);
+			encode(state, (List<?>)t);
 		else if(t instanceof Tuple)
-			encode(result, ((Tuple)t).getItems());
+			encode(state, ((Tuple)t).getItems());
 		else
 			throw new IllegalArgumentException("Unsupported data type: " + t.getClass().getName());
 	}
 
-	static void encode(EncodeResult result, Object t) {
-		encode(result, t, false);
+	static void encode(EncodeState state, Object t) {
+		encode(state, t, false);
 	}
 
-	static void encode(EncodeResult result, byte[] bytes) {
+	static void encode(EncodeState state, byte[] bytes) {
 		byte[] escaped = ByteArrayUtil.replace(bytes, NULL_ARR, NULL_ESCAPED_ARR);
-		result.add(BYTES_ARR).add(escaped).add(NULL_ARR);
+		state.add(BYTES_ARR).add(escaped).add(NULL_ARR);
 	}
 
-	static void encode(EncodeResult result, String s) {
+	static void encode(EncodeState state, String s) {
 		byte[] escaped = ByteArrayUtil.replace(s.getBytes(UTF8), NULL_ARR, NULL_ESCAPED_ARR);
-		result.add(STRING_ARR).add(escaped).add(NULL_ARR);
+		state.add(STRING_ARR).add(escaped).add(NULL_ARR);
 	}
 
-	static void encode(EncodeResult result, BigInteger i) {
+	static void encode(EncodeState state, BigInteger i) {
 		//System.out.println("Encoding integral " + i);
 		if(i.equals(BigInteger.ZERO)) {
-			result.add(new byte[]{INT_ZERO_CODE});
+			state.add(INT_ZERO_ARR);
 			return;
 		}
 		byte[] bytes = i.toByteArray();
 		if(i.compareTo(BigInteger.ZERO) > 0) {
-			if(i.compareTo(size_limits[size_limits.length-1]) > 0) {
+			if(i.compareTo(BIG_INT_SIZE_LIMITS[BIG_INT_SIZE_LIMITS.length-1]) > 0) {
 				int length = byteLength(bytes);
 				if(length > 0xff) {
 					throw new IllegalArgumentException("BigInteger magnitude is too large (more than 255 bytes)");
@@ -281,21 +284,20 @@ class TupleUtil {
 				intBytes[0] = POS_INT_END;
 				intBytes[1] = (byte)(length);
 				System.arraycopy(bytes, bytes.length - length, intBytes, 2, length);
-				result.add(intBytes);
+				state.add(intBytes);
 			}
 			else {
-				int n = ByteArrayUtil.bisectLeft(size_limits, i);
-				assert n <= size_limits.length;
-				//byte[] bytes = ByteBuffer.allocate(8).order(ByteOrder.BIG_ENDIAN).putLong(i).array();
+				int n = ByteArrayUtil.bisectLeft(BIG_INT_SIZE_LIMITS, i);
+				assert n <= BIG_INT_SIZE_LIMITS.length;
 				//System.out.println("  -- integral has 'n' of " + n + " and output bytes of " + bytes.length);
 				byte[] intBytes = new byte[n + 1];
 				intBytes[0] = (byte) (INT_ZERO_CODE + n);
 				System.arraycopy(bytes, bytes.length - n, intBytes, 1, n);
-				result.add(intBytes);
+				state.add(intBytes);
 			}
 		}
 		else {
-			if(i.negate().compareTo(size_limits[size_limits.length - 1]) > 0) {
+			if(i.negate().compareTo(BIG_INT_SIZE_LIMITS[BIG_INT_SIZE_LIMITS.length - 1]) > 0) {
 				int length = byteLength(i.negate().toByteArray());
 				if (length > 0xff) {
 					throw new IllegalArgumentException("BigInteger magnitude is too large (more than 255 bytes)");
@@ -311,92 +313,109 @@ class TupleUtil {
 					Arrays.fill(intBytes, 2, intBytes.length - adjusted.length, (byte) 0x00);
 					System.arraycopy(adjusted, 0, intBytes, intBytes.length - adjusted.length, adjusted.length);
 				}
-				result.add(intBytes);
+				state.add(intBytes);
 			}
 			else {
-				int n = ByteArrayUtil.bisectLeft(size_limits, i.negate());
+				int n = ByteArrayUtil.bisectLeft(BIG_INT_SIZE_LIMITS, i.negate());
 
-				assert n >= 0 && n < size_limits.length; // can we do this? it seems to be required for the following statement
+				assert n >= 0 && n < BIG_INT_SIZE_LIMITS.length; // can we do this? it seems to be required for the following statement
 
-				long maxv = size_limits[n].add(i).longValue();
+				long maxv = BIG_INT_SIZE_LIMITS[n].add(i).longValue();
 				byte[] adjustedBytes = ByteBuffer.allocate(8).order(ByteOrder.BIG_ENDIAN).putLong(maxv).array();
 				byte[] intBytes = new byte[n + 1];
-				intBytes[0] = (byte) (20 - n);
+				intBytes[0] = (byte) (INT_ZERO_CODE - n);
 				System.arraycopy(adjustedBytes, adjustedBytes.length - n, intBytes, 1, n);
-				result.add(intBytes);
+				state.add(intBytes);
 			}
 		}
 	}
 
-	static void encode(EncodeResult result, Integer i) {
-		encode(result, i.longValue());
+	static void encode(EncodeState state, long i) {
+		if(i == 0L) {
+			state.add(INT_ZERO_ARR);
+			return;
+		}
+		int n = byteCount(i);
+		byte[] intBytes = new byte[n + 1];
+		// First byte encodes number of bytes (as difference from INT_ZERO_CODE)
+		intBytes[0] = (byte)(INT_ZERO_CODE + (i >= 0 ? n : -n));
+		// For positive integers, copy the bytes in big-endian order excluding leading 0x00 bytes.
+		// For negative integers, copy the bytes of the one's complement representation excluding
+		// the leading 0xff bytes. As Java stores negative values in two's complement, we subtract 1
+		// from negative values.
+		long val = Long.reverseBytes((i >= 0) ? i : (i - 1)) >> (Long.SIZE - 8 * n);
+		for(int x = 1; x < intBytes.length; x++) {
+			intBytes[x] = (byte)(val & 0xff);
+			val >>= 8;
+		}
+		state.add(intBytes);
 	}
 
-	static void encode(EncodeResult result, long i) {
-		encode(result, BigInteger.valueOf(i));
+	static void encode(EncodeState state, Float f) {
+		byte[] floatBytes = ByteBuffer.allocate(1 + Float.BYTES).order(ByteOrder.BIG_ENDIAN)
+				.put(FLOAT_CODE)
+				.putInt(encodeFloatBits(f))
+				.array();
+		state.add(floatBytes);
 	}
 
-	static void encode(EncodeResult result, Float f) {
-		byte[] floatBytes = ByteBuffer.allocate(5).order(ByteOrder.BIG_ENDIAN).put(FLOAT_CODE).putFloat(f).array();
-		floatingPointCoding(floatBytes, 1, true);
-		result.add(floatBytes);
+	static void encode(EncodeState state, Double d) {
+		byte[] doubleBytes = ByteBuffer.allocate(1 + Double.BYTES).order(ByteOrder.BIG_ENDIAN)
+				.put(DOUBLE_CODE)
+				.putLong(encodeDoubleBits(d))
+				.array();
+		state.add(doubleBytes);
 	}
 
-	static void encode(EncodeResult result, Double d) {
-		byte[] doubleBytes = ByteBuffer.allocate(9).order(ByteOrder.BIG_ENDIAN).put(DOUBLE_CODE).putDouble(d).array();
-		floatingPointCoding(doubleBytes, 1, true);
-		result.add(doubleBytes);
-	}
-
-	static void encode(EncodeResult result, Boolean b) {
+	static void encode(EncodeState state, Boolean b) {
 		if(b) {
-			result.add(TRUE_ARR);
+			state.add(TRUE_ARR);
 		}
 		else {
-			result.add(FALSE_ARR);
+			state.add(FALSE_ARR);
 		}
 	}
 
-	static void encode(EncodeResult result, UUID uuid) {
+	static void encode(EncodeState state, UUID uuid) {
 		byte[] uuidBytes = ByteBuffer.allocate(17).put(UUID_CODE).order(ByteOrder.BIG_ENDIAN)
 				.putLong(uuid.getMostSignificantBits()).putLong(uuid.getLeastSignificantBits())
 				.array();
-		result.add(uuidBytes);
+		state.add(uuidBytes);
 	}
 
-	static void encode(EncodeResult result, Versionstamp v) {
-		result.add(VERSIONSTAMP_ARR);
+	static void encode(EncodeState state, Versionstamp v) {
+		state.add(VERSIONSTAMP_ARR);
 		if(v.isComplete()) {
-			result.add(v.getBytes());
+			state.add(v.getBytes());
 		}
 		else {
-			result.add(v.getBytes(), result.totalLength);
+			state.add(v.getBytes(), state.totalLength);
 		}
 	}
 
-	static void encode(EncodeResult result, List<?> value) {
-		result.add(NESTED_ARR);
+	static void encode(EncodeState state, List<?> value) {
+		state.add(NESTED_ARR);
 		for(Object t : value) {
-			encode(result, t, true);
+			encode(state, t, true);
 		}
-		result.add(NULL_ARR);
+		state.add(NULL_ARR);
 	}
 
-	static void decode(DecodeResult result, byte[] rep, int pos, int last) {
+	static void decode(DecodeState state, byte[] rep, int pos, int last) {
 		//System.out.println("Decoding '" + ArrayUtils.printable(rep) + "' at " + pos);
 
 		// SOMEDAY: codes over 127 will be a problem with the signed Java byte mess
 		int code = rep[pos];
 		int start = pos + 1;
 		if(code == nil) {
-			result.add(null, start);
+			state.add(null, start);
 		}
 		else if(code == BYTES_CODE) {
 			int end = ByteArrayUtil.findTerminator(rep, (byte)0x0, (byte)0xff, start, last);
 			//System.out.println("End of byte string: " + end);
 			byte[] range = ByteArrayUtil.replace(rep, start, end - start, NULL_ESCAPED_ARR, new byte[] { nil });
 			//System.out.println(" -> byte string contents: '" + ArrayUtils.printable(range) + "'");
-			result.add(range, end + 1);
+			state.add(range, end + 1);
 		}
 		else if(code == STRING_CODE) {
 			int end = ByteArrayUtil.findTerminator(rep, (byte)0x0, (byte)0xff, start, last);
@@ -404,78 +423,91 @@ class TupleUtil {
 			byte[] stringBytes = ByteArrayUtil.replace(rep, start, end - start, NULL_ESCAPED_ARR, new byte[] { nil });
 			String str = new String(stringBytes, UTF8);
 			//System.out.println(" -> UTF8 string contents: '" + str + "'");
-			result.add(str, end + 1);
+			state.add(str, end + 1);
 		}
 		else if(code == FLOAT_CODE) {
-			byte[] resBytes = Arrays.copyOfRange(rep, start, start+4);
-			floatingPointCoding(resBytes, 0, false);
-			float res = ByteBuffer.wrap(resBytes).order(ByteOrder.BIG_ENDIAN).getFloat();
-			result.add(res, start + Float.BYTES);
+			int rawFloatBits = ByteBuffer.wrap(rep, start, Float.BYTES).getInt();
+			float res = decodeFloatBits(rawFloatBits);
+			state.add(res, start + Float.BYTES);
 		}
 		else if(code == DOUBLE_CODE) {
-			byte[] resBytes = Arrays.copyOfRange(rep, start, start+8);
-			floatingPointCoding(resBytes, 0, false);
-			double res = ByteBuffer.wrap(resBytes).order(ByteOrder.BIG_ENDIAN).getDouble();
-			result.add(res, start + Double.BYTES);
+			long rawDoubleBits = ByteBuffer.wrap(rep, start, Double.BYTES).getLong();
+			double res = decodeDoubleBits(rawDoubleBits);
+			state.add(res, start + Double.BYTES);
 		}
 		else if(code == FALSE_CODE) {
-			result.add(false, start);
+			state.add(false, start);
 		}
 		else if(code == TRUE_CODE) {
-			result.add(true, start);
+			state.add(true, start);
 		}
 		else if(code == UUID_CODE) {
 			ByteBuffer bb = ByteBuffer.wrap(rep, start, 16).order(ByteOrder.BIG_ENDIAN);
 			long msb = bb.getLong();
 			long lsb = bb.getLong();
-			result.add(new UUID(msb, lsb), start + 16);
+			state.add(new UUID(msb, lsb), start + 16);
 		}
 		else if(code == POS_INT_END) {
 			int n = rep[start] & 0xff;
 			BigInteger res = new BigInteger(ByteArrayUtil.join(new byte[]{0x00}, Arrays.copyOfRange(rep, start+1, start+n+1)));
-			result.add(res, start + n + 1);
+			state.add(res, start + n + 1);
 		}
 		else if(code == NEG_INT_START) {
 			int n = (rep[start] ^ 0xff) & 0xff;
 			BigInteger origValue = new BigInteger(ByteArrayUtil.join(new byte[]{0x00}, Arrays.copyOfRange(rep, start+1, start+n+1)));
 			BigInteger offset = BigInteger.ONE.shiftLeft(n*8).subtract(BigInteger.ONE);
-			result.add(origValue.subtract(offset), start + n + 1);
+			state.add(origValue.subtract(offset), start + n + 1);
 		}
 		else if(code > NEG_INT_START && code < POS_INT_END) {
 			// decode a long
-			byte[] longBytes = new byte[9];
-			boolean upper = code >= INT_ZERO_CODE;
-			int n = upper ? code - 20 : 20 - code;
+			boolean positive = code >= INT_ZERO_CODE;
+			int n = positive ? code - INT_ZERO_CODE : INT_ZERO_CODE - code;
 			int end = start + n;
 
 			if(rep.length < end) {
 				throw new RuntimeException("Invalid tuple (possible truncation)");
 			}
 
-			System.arraycopy(rep, start, longBytes, longBytes.length-n, n);
-			if (!upper)
-				for(int i=longBytes.length-n; i<longBytes.length; i++)
-					longBytes[i] = (byte)(longBytes[i] ^ 0xff);
+			if(positive && (n < Long.BYTES || rep[start] > 0)) {
+				long res = 0L;
+				for(int i = start; i < end; i++) {
+					res = (res << 8) + (rep[i] & 0xff);
+				}
+				state.add(res, end);
+			}
+			else if(!positive && (n < Long.BYTES || rep[start] < 0)) {
+				long res = ~0L;
+				for(int i = start; i < end; i++) {
+					res = (res << 8) + (rep[i] & 0xff);
+				}
+				state.add(res + 1, end);
+			}
+			else {
+				byte[] longBytes = new byte[9];
+				System.arraycopy(rep, start, longBytes, longBytes.length-n, n);
+				if (!positive)
+					for(int i=longBytes.length-n; i<longBytes.length; i++)
+						longBytes[i] = (byte)(longBytes[i] ^ 0xff);
 
-			BigInteger val = new BigInteger(longBytes);
-			if (!upper) val = val.negate();
+				BigInteger val = new BigInteger(longBytes);
+				if (!positive) val = val.negate();
 
-			// Convert to long if in range -- otherwise, leave as BigInteger.
-			if (val.compareTo(BigInteger.valueOf(Long.MIN_VALUE))<0||
-				val.compareTo(BigInteger.valueOf(Long.MAX_VALUE))>0) {
-				// This can occur if the thing can be represented with 8 bytes but not
-				// the right sign information.
-				result.add(val, end);
-			} else {
-				result.add(val.longValue(), end);
+				// Convert to long if in range -- otherwise, leave as BigInteger.
+				if (val.compareTo(LONG_MIN_VALUE) >= 0 && val.compareTo(LONG_MAX_VALUE) <= 0) {
+					state.add(val.longValue(), end);
+				} else {
+					// This can occur if the thing can be represented with 8 bytes but not
+					// the right sign information.
+					state.add(val, end);
+				}
 			}
 		}
 		else if(code == VERSIONSTAMP_CODE) {
 			Versionstamp val = Versionstamp.fromBytes(Arrays.copyOfRange(rep, start, start + Versionstamp.LENGTH));
-			result.add(val, start + Versionstamp.LENGTH);
+			state.add(val, start + Versionstamp.LENGTH);
 		}
 		else if(code == NESTED_CODE) {
-			DecodeResult subResult = new DecodeResult();
+			DecodeState subResult = new DecodeState();
 			int endPos = start;
 			while(endPos < rep.length) {
 				if(rep[endPos] == nil) {
@@ -491,25 +523,13 @@ class TupleUtil {
 					endPos = subResult.end;
 				}
 			}
-			result.add(subResult.values, endPos);
+			state.add(subResult.values, endPos);
 		}
 		else {
 			throw new IllegalArgumentException("Unknown tuple data type " + code + " at index " + pos);
 		}
 	}
 
-	static int compareSignedBigEndian(byte[] arr1, byte[] arr2) {
-		if(arr1[0] < 0 && arr2[0] < 0) {
-			return -1 * ByteArrayUtil.compareUnsigned(arr1, arr2);
-		} else if(arr1[0] < 0) {
-			return -1;
-		} else if(arr2[0] < 0) {
-			return 1;
-		} else {
-			return ByteArrayUtil.compareUnsigned(arr1, arr2);
-		}
-	}
-
 	static int compareItems(Object item1, Object item2) {
 		int code1 = TupleUtil.getCodeFor(item1);
 		int code2 = TupleUtil.getCodeFor(item2);
@@ -529,33 +549,39 @@ class TupleUtil {
 			return ByteArrayUtil.compareUnsigned(((String)item1).getBytes(UTF8), ((String)item2).getBytes(UTF8));
 		}
 		if(code1 == INT_ZERO_CODE) {
-			BigInteger bi1;
-			if(item1 instanceof BigInteger) {
-				bi1 = (BigInteger)item1;
-			} else {
-				bi1 = BigInteger.valueOf(((Number)item1).longValue());
+			if(item1 instanceof Long && item2 instanceof Long) {
+				// This should be the common case, so it's probably worth including as a way out.
+				return Long.compare((Long)item1, (Long)item2);
 			}
-			BigInteger bi2;
-			if(item2 instanceof BigInteger) {
-				bi2 = (BigInteger)item2;
-			} else {
-				bi2 = BigInteger.valueOf(((Number)item2).longValue());
+			else {
+				BigInteger bi1;
+				if (item1 instanceof BigInteger) {
+					bi1 = (BigInteger) item1;
+				} else {
+					bi1 = BigInteger.valueOf(((Number) item1).longValue());
+				}
+				BigInteger bi2;
+				if (item2 instanceof BigInteger) {
+					bi2 = (BigInteger) item2;
+				} else {
+					bi2 = BigInteger.valueOf(((Number) item2).longValue());
+				}
+				return bi1.compareTo(bi2);
 			}
-			return bi1.compareTo(bi2);
-		}
-		if(code1 == DOUBLE_CODE) {
-			// This is done over vanilla double comparison basically to handle NaN
-			// sorting correctly.
-			byte[] dBytes1 = ByteBuffer.allocate(8).putDouble((Double)item1).array();
-			byte[] dBytes2 = ByteBuffer.allocate(8).putDouble((Double)item2).array();
-			return compareSignedBigEndian(dBytes1, dBytes2);
 		}
 		if(code1 == FLOAT_CODE) {
 			// This is done for the same reason that double comparison is done
 			// that way.
-			byte[] fBytes1 = ByteBuffer.allocate(4).putFloat((Float)item1).array();
-			byte[] fBytes2 = ByteBuffer.allocate(4).putFloat((Float)item2).array();
-			return compareSignedBigEndian(fBytes1, fBytes2);
+			int fbits1 = encodeFloatBits((Float)item1);
+			int fbits2 = encodeFloatBits((Float)item2);
+			return Integer.compareUnsigned(fbits1, fbits2);
+		}
+		if(code1 == DOUBLE_CODE) {
+			// This is done over vanilla double comparison basically to handle NaN
+			// sorting correctly.
+			long dbits1 = encodeDoubleBits((Double)item1);
+			long dbits2 = encodeDoubleBits((Double)item2);
+			return Long.compareUnsigned(dbits1, dbits2);
 		}
 		if(code1 == FALSE_CODE) {
 			return Boolean.compare((Boolean)item1, (Boolean)item2);
@@ -579,51 +605,53 @@ class TupleUtil {
 	}
 
 	static List<Object> unpack(byte[] bytes, int start, int length) {
-		DecodeResult decodeResult = new DecodeResult();
+		DecodeState decodeState = new DecodeState();
 		int pos = start;
 		int end = start + length;
 		while(pos < end) {
-			decode(decodeResult, bytes, pos, end);
-			pos = decodeResult.end;
+			decode(decodeState, bytes, pos, end);
+			pos = decodeState.end;
 		}
-		return decodeResult.values;
+		return decodeState.values;
 	}
 
-	static void encodeAll(EncodeResult result, List<Object> items, byte[] prefix) {
+	static void encodeAll(EncodeState state, List<Object> items, byte[] prefix) {
 		if(prefix != null) {
-			result.add(prefix);
+			state.add(prefix);
 		}
 		for(Object t : items) {
-			encode(result, t);
+			encode(state, t);
 		}
 		//System.out.println("Joining whole tuple...");
 	}
 
 	static byte[] pack(List<Object> items, byte[] prefix) {
-		EncodeResult result = new EncodeResult(2 * items.size() + (prefix == null ? 0 : 1));
-		encodeAll(result, items, prefix);
-		if(result.versionPos >= 0) {
+		EncodeState state = new EncodeState(2 * items.size() + (prefix == null ? 0 : 1));
+		encodeAll(state, items, prefix);
+		if(state.versionPos >= 0) {
 			throw new IllegalArgumentException("Incomplete Versionstamp included in vanilla tuple packInternal");
-		} else {
-			return ByteArrayUtil.join(null, result.encodedValues);
+		}
+		else {
+			return ByteArrayUtil.join(null, state.encodedValues);
 		}
 	}
 
 	static byte[] packWithVersionstamp(List<Object> items, byte[] prefix) {
-		EncodeResult result = new EncodeResult(2 * items.size() + (prefix == null ? 1 : 2));
-		encodeAll(result, items, prefix);
-		if(result.versionPos < 0) {
+		EncodeState state = new EncodeState(2 * items.size() + (prefix == null ? 1 : 2));
+		encodeAll(state, items, prefix);
+		if(state.versionPos < 0) {
 			throw new IllegalArgumentException("No incomplete Versionstamp included in tuple packInternal with versionstamp");
-		} else {
-			if(result.versionPos > 0xffff) {
-				throw new IllegalArgumentException("Tuple has incomplete version at position " + result.versionPos + " which is greater than the maximum " + 0xffff);
+		}
+		else {
+			if(state.versionPos > 0xffff) {
+				throw new IllegalArgumentException("Tuple has incomplete version at position " + state.versionPos + " which is greater than the maximum " + 0xffff);
 			}
 			if (FDB.instance().getAPIVersion() < 520) {
-				result.add(ByteBuffer.allocate(Short.BYTES).order(ByteOrder.LITTLE_ENDIAN).putShort((short)result.versionPos).array());
+				state.add(ByteBuffer.allocate(Short.BYTES).order(ByteOrder.LITTLE_ENDIAN).putShort((short)state.versionPos).array());
 			} else {
-				result.add(ByteBuffer.allocate(Integer.BYTES).order(ByteOrder.LITTLE_ENDIAN).putInt(result.versionPos).array());
+				state.add(ByteBuffer.allocate(Integer.BYTES).order(ByteOrder.LITTLE_ENDIAN).putInt(state.versionPos).array());
 			}
-			return ByteArrayUtil.join(null, result.encodedValues);
+			return ByteArrayUtil.join(null, state.encodedValues);
 		}
 	}
 
@@ -631,13 +659,17 @@ class TupleUtil {
 		return items.anyMatch(item -> {
 			if(item == null) {
 				return false;
-			} else if(item instanceof Versionstamp) {
+			}
+			else if(item instanceof Versionstamp) {
 				return !((Versionstamp) item).isComplete();
-			} else if(item instanceof Tuple) {
+			}
+			else if(item instanceof Tuple) {
 				return hasIncompleteVersionstamp(((Tuple) item).stream());
-			} else if(item instanceof Collection<?>) {
+			}
+			else if(item instanceof Collection<?>) {
 				return hasIncompleteVersionstamp(((Collection) item).stream());
-			} else {
+			}
+			else {
 				return false;
 			}
 		});
@@ -646,23 +678,25 @@ class TupleUtil {
 	public static void main(String[] args) {
 		try {
 			byte[] bytes = pack(Collections.singletonList(4), null);
-			DecodeResult result = new DecodeResult();
+			DecodeState result = new DecodeState();
 			decode(result, bytes, 0, bytes.length);
 			int val = (int)result.values.get(0);
 			assert 4 == val;
-		} catch (Exception e) {
+		}
+		catch(Exception e) {
 			e.printStackTrace();
 			System.out.println("Error " + e.getMessage());
 		}
 
 		try {
 			byte[] bytes = pack(Collections.singletonList("\u021Aest \u0218tring"), null);
-			DecodeResult result = new DecodeResult();
+			DecodeState result = new DecodeState();
 			decode(result, bytes, 0, bytes.length);
 			String string = (String)result.values.get(0);
 			System.out.println("contents -> " + string);
 			assert "\u021Aest \u0218tring".equals(string);
-		} catch (Exception e) {
+		}
+		catch(Exception e) {
 			e.printStackTrace();
 			System.out.println("Error " + e.getMessage());
 		}

From a74dfa548782da90c87f11b30b6cd087d843efd1 Mon Sep 17 00:00:00 2001
From: Alec Grieser <agrieser@apple.com>
Date: Sun, 24 Feb 2019 23:49:31 -0800
Subject: [PATCH 20/47] compare strings by unicode codepoint without copying

---
 bindings/java/CMakeLists.txt                  |  1 +
 .../apple/foundationdb/tuple/StringUtil.java  | 75 ++++++++++++++++
 .../apple/foundationdb/tuple/TupleUtil.java   |  8 +-
 .../test/TuplePerformanceTest.java            | 55 +++++++-----
 .../apple/foundationdb/test/TupleTest.java    | 85 ++++++++++++++++++-
 5 files changed, 201 insertions(+), 23 deletions(-)
 create mode 100644 bindings/java/src/main/com/apple/foundationdb/tuple/StringUtil.java

diff --git a/bindings/java/CMakeLists.txt b/bindings/java/CMakeLists.txt
index 93e7e7ea8e..f8c1c25a65 100644
--- a/bindings/java/CMakeLists.txt
+++ b/bindings/java/CMakeLists.txt
@@ -56,6 +56,7 @@ set(JAVA_BINDING_SRCS
   src/main/com/apple/foundationdb/tuple/package-info.java
   src/main/com/apple/foundationdb/tuple/Tuple.java
   src/main/com/apple/foundationdb/tuple/TupleUtil.java
+  src/main/com/apple/foundationdb/tuple/StringUtil.java
   src/main/com/apple/foundationdb/tuple/Versionstamp.java)
 
 set(JAVA_TESTS_SRCS
diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/StringUtil.java b/bindings/java/src/main/com/apple/foundationdb/tuple/StringUtil.java
new file mode 100644
index 0000000000..660d04a6e1
--- /dev/null
+++ b/bindings/java/src/main/com/apple/foundationdb/tuple/StringUtil.java
@@ -0,0 +1,75 @@
+/*
+ * StringUtil.java
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.apple.foundationdb.tuple;
+
+final class StringUtil {
+	private static final char SURROGATE_COUNT = Character.MAX_LOW_SURROGATE - Character.MIN_HIGH_SURROGATE + 1;
+	private static final char ABOVE_SURROGATES = Character.MAX_VALUE - Character.MAX_LOW_SURROGATE;
+
+	static char adjustForSurrogates(char c, String s, int pos) {
+		if(c > Character.MAX_LOW_SURROGATE) {
+			return (char)(c - SURROGATE_COUNT);
+		}
+		else {
+			// Validate the UTF-16 string as this can do weird things on invalid strings
+			if((Character.isHighSurrogate(c) && (pos + 1 >= s.length() || !Character.isLowSurrogate(s.charAt(pos + 1)))) ||
+					(Character.isLowSurrogate(c) && (pos == 0 || !Character.isHighSurrogate(s.charAt(pos - 1))))) {
+				throw new IllegalArgumentException("malformed UTF-16 string does not follow high surrogate with low surrogate");
+			}
+			return (char)(c + ABOVE_SURROGATES);
+
+		}
+	}
+
+	// Compare two strings based on their UTF-8 code point values. Note that Java stores strings
+	// using UTF-16. However, {@link Tuple}s are encoded using UTF-8. Using unsigned byte comparison,
+	// UTF-8 strings will sort based on their Unicode codepoints. However, UTF-16 strings <em>almost</em>,
+	// but not quite, sort that way. This can be addressed by fixing up surrogates. There are 0x800 surrogate
+	// values and about 0x2000 code points above the maximum surrogate value. For anything that is a surrogate,
+	// shift it up by 0x2000, and anything that is above the maximum surrogate value, shift it down by 0x800.
+	// This makes all surrogates sort after all non-surrogates.
+	//
+	// See: https://ssl.icu-project.org/docs/papers/utf16_code_point_order.html
+	static int compareUtf8(String s1, String s2) {
+		// Ignore common prefix at the beginning which will compare equal regardless of encoding
+		int pos = 0;
+		while(pos < s1.length() && pos < s2.length() && s1.charAt(pos) == s2.charAt(pos)) {
+			pos++;
+		}
+		if(pos >= s1.length() || pos >= s2.length()) {
+			// One string is the prefix of another, so return based on length.
+			return Integer.compare(s1.length(), s2.length());
+		}
+		// Compare first different character
+		char c1 = s1.charAt(pos);
+		char c2 = s2.charAt(pos);
+		// Apply "fix up" for surrogates
+		if(c1 >= Character.MIN_HIGH_SURROGATE) {
+			c1 = adjustForSurrogates(c1, s1, pos);
+		}
+		if(c2 >= Character.MIN_HIGH_SURROGATE) {
+			c2 = adjustForSurrogates(c2, s2, pos);
+		}
+		return Character.compare(c1, c2);
+	}
+
+	private StringUtil() {}
+}
diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java
index 5b220d2c90..34d0f78653 100644
--- a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java
+++ b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java
@@ -546,7 +546,13 @@ class TupleUtil {
 			return ByteArrayUtil.compareUnsigned((byte[])item1, (byte[])item2);
 		}
 		if(code1 == STRING_CODE) {
-			return ByteArrayUtil.compareUnsigned(((String)item1).getBytes(UTF8), ((String)item2).getBytes(UTF8));
+			try {
+				return StringUtil.compareUtf8((String)item1, (String)item2);
+			}
+			catch(IllegalArgumentException e) {
+				// Encountered malformed unicode when comparing. Use byte comparison.
+				return ByteArrayUtil.compareUnsigned(((String)item1).getBytes(UTF8), ((String)item2).getBytes(UTF8));
+			}
 		}
 		if(code1 == INT_ZERO_CODE) {
 			if(item1 instanceof Long && item2 instanceof Long) {
diff --git a/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java b/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java
index cf79ff41a9..3de9b76785 100644
--- a/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java
+++ b/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java
@@ -142,6 +142,7 @@ public class TuplePerformanceTest {
 		long packNanos = 0L;
 		long unpackNanos = 0L;
 		long equalsNanos = 0L;
+		long equalsArrayNanos = 0L;
 		long hashNanos = 0L;
 		long secondHashNanos = 0L;
 		long subspacePackNanos = 0L;
@@ -164,12 +165,22 @@ public class TuplePerformanceTest {
 			endNanos = System.nanoTime();
 			unpackNanos += endNanos - startNanos;
 
+			// Copy items over as if both are packed, their byte arrays are compared
+			Tuple tCopy = Tuple.fromList(t.getItems());
+			Tuple t2Copy = Tuple.fromList(t2.getItems());
+			startNanos = System.nanoTime();
+			if (!tCopy.equals(t2Copy)) {
+				throw new RuntimeException("deserialized did not match serialized: " + t + " -- " + t2);
+			}
+			endNanos = System.nanoTime();
+			equalsNanos += endNanos - startNanos;
+
 			startNanos = System.nanoTime();
 			if(!t.equals(t2)) {
 				throw new RuntimeException("deserialized did not match serialized: " + t + " -- " + t2);
 			}
 			endNanos = System.nanoTime();
-			equalsNanos += endNanos - startNanos;
+			equalsArrayNanos += endNanos - startNanos;
 
 			startNanos = System.nanoTime();
 			byte[] subspacePacked = subspace.pack(t);
@@ -182,7 +193,7 @@ public class TuplePerformanceTest {
 			startNanos = System.nanoTime();
 			Tuple t3 = subspace.unpack(subspacePacked);
 			endNanos = System.nanoTime();
-			if(!t.equals(t3)) {
+			if (!Tuple.fromList(t.getItems()).equals(Tuple.fromList(t3.getItems())) || !t.equals(t3)) {
 				throw new RuntimeException("does not unpack equally from subspace");
 			}
 			if(!Arrays.equals(t.pack(), t3.pack())) {
@@ -205,25 +216,27 @@ public class TuplePerformanceTest {
 		}
 
 		System.out.println("Test ended.");
-		System.out.printf("  Total elements:                  %d%n", totalLength);
-		System.out.printf("  Total bytes:                     %d kB%n", totalBytes / 1000);
-		System.out.printf("  Bytes per tuple:                 %f B%n", totalBytes * 1.0 / iterations);
-		System.out.printf("  Pack time:                       %f s%n", packNanos * 1e-9);
-		System.out.printf("  Pack time per tuple:             %f \u03BCs%n", packNanos * 1e-3 / iterations);
-		System.out.printf("  Pack time per kB:                %f \u03BCs%n", packNanos * 1.0 / totalBytes);
-		System.out.printf("  Serialization rate:              %f objects / \u03BCs%n", totalLength * 1000.0 / packNanos);
-		System.out.printf("  Unpack time:                     %f s%n", unpackNanos * 1e-9);
-		System.out.printf("  Unpack time per tuple:           %f \u03BCs%n", unpackNanos * 1e-3 / iterations);
-		System.out.printf("  Equals time:                     %f s%n", equalsNanos * 1e-9);
-		System.out.printf("  Equals time per tuple:           %f \u03BCs%n", equalsNanos * 1e-3 / iterations);
-		System.out.printf("  Subspace pack time:              %f s%n", subspacePackNanos * 1e-9);
-		System.out.printf("  Subspace pack time per tuple:    %f \u03BCs%n", subspacePackNanos * 1e-3 / iterations);
-		System.out.printf("  Subspace unpack time:            %f s%n", subspaceUnpackNanos * 1e-9);
-		System.out.printf("  Subspace unpack time per tuple:  %f \u03BCs%n", subspaceUnpackNanos * 1e-3 / iterations);
-		System.out.printf("  Hash time:                       %f s%n", hashNanos * 1e-9);
-		System.out.printf("  Hash time per tuple:             %f \u03BCs%n", hashNanos * 1e-3 / iterations);
-		System.out.printf("  Second hash time:                %f s%n", secondHashNanos * 1e-9);
-		System.out.printf("  Second hash time per tuple:      %f \u03BCs%n", secondHashNanos * 1e-3 / iterations);
+		System.out.printf("  Total elements:                        %d%n", totalLength);
+		System.out.printf("  Total bytes:                           %d kB%n", totalBytes / 1000);
+		System.out.printf("  Bytes per tuple:                       %f B%n", totalBytes * 1.0 / iterations);
+		System.out.printf("  Pack time:                             %f s%n", packNanos * 1e-9);
+		System.out.printf("  Pack time per tuple:                   %f \u03BCs%n", packNanos * 1e-3 / iterations);
+		System.out.printf("  Pack time per kB:                      %f \u03BCs%n", packNanos * 1.0 / totalBytes);
+		System.out.printf("  Serialization rate:                    %f objects / \u03BCs%n", totalLength * 1000.0 / packNanos);
+		System.out.printf("  Unpack time:                           %f s%n", unpackNanos * 1e-9);
+		System.out.printf("  Unpack time per tuple:                 %f \u03BCs%n", unpackNanos * 1e-3 / iterations);
+		System.out.printf("  Equals time:                           %f s%n", equalsNanos * 1e-9);
+		System.out.printf("  Equals time per tuple:                 %f \u03BCs%n", equalsNanos * 1e-3 / iterations);
+		System.out.printf("  Equals time (using packed):            %f s%n", equalsArrayNanos * 1e-9);
+		System.out.printf("  Equals time (using packed) per tuple:  %f \u03BCs%n", equalsArrayNanos * 1e-3 / iterations);
+		System.out.printf("  Subspace pack time:                    %f s%n", subspacePackNanos * 1e-9);
+		System.out.printf("  Subspace pack time per tuple:          %f \u03BCs%n", subspacePackNanos * 1e-3 / iterations);
+		System.out.printf("  Subspace unpack time:                  %f s%n", subspaceUnpackNanos * 1e-9);
+		System.out.printf("  Subspace unpack time per tuple:        %f \u03BCs%n", subspaceUnpackNanos * 1e-3 / iterations);
+		System.out.printf("  Hash time:                             %f s%n", hashNanos * 1e-9);
+		System.out.printf("  Hash time per tuple:                   %f \u03BCs%n", hashNanos * 1e-3 / iterations);
+		System.out.printf("  Second hash time:                      %f s%n", secondHashNanos * 1e-9);
+		System.out.printf("  Second hash time per tuple:            %f \u03BCs%n", secondHashNanos * 1e-3 / iterations);
 	}
 
 	public static void main(String[] args) {
diff --git a/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java b/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java
index 528c11f93a..305c1a90f0 100644
--- a/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java
+++ b/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java
@@ -29,6 +29,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Objects;
+import java.util.UUID;
 
 public class TupleTest {
 	private static final byte FF = (byte)0xff;
@@ -38,6 +39,7 @@ public class TupleTest {
 		try {
 			// FDB fdb = FDB.selectAPIVersion(610);
 			serializedForms();
+			comparisons();
 			/*
 			try(Database db = fdb.open()) {
 				runTests(reps, db);
@@ -113,7 +115,16 @@ public class TupleTest {
 				Tuple.from(Float.intBitsToFloat(Integer.MAX_VALUE)), new byte[]{0x20, FF, FF, FF, FF},
 				Tuple.from(Double.longBitsToDouble(Long.MAX_VALUE)), new byte[]{0x21, FF, FF, FF, FF, FF, FF, FF, FF},
 				Tuple.from(Float.intBitsToFloat(~0)), new byte[]{0x20, 0x00, 0x00, 0x00, 0x00},
-				Tuple.from(Double.longBitsToDouble(~0L)), new byte[]{0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
+				Tuple.from(Double.longBitsToDouble(~0L)), new byte[]{0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+				Tuple.from(""), new byte[]{0x02, 0x00},
+				Tuple.from("hello"), new byte[]{0x02, 'h', 'e', 'l', 'l', 'o', 0x00},
+				Tuple.from("\u4e2d\u6587"), new byte[]{0x02, (byte)0xe4, (byte)0xb8, (byte)0xad, (byte)0xe6, (byte)0x96, (byte)0x87, 0x00},
+				Tuple.from("\u03bc\u03ac\u03b8\u03b7\u03bc\u03b1"), new byte[]{0x02, (byte)0xce, (byte)0xbc, (byte)0xce, (byte)0xac, (byte)0xce, (byte)0xb8, (byte)0xce, (byte)0xb7, (byte)0xce, (byte)0xbc, (byte)0xce, (byte)0xb1, 0x00},
+				Tuple.from(new String(new int[]{0x1f525}, 0, 1)), new byte[]{0x02, (byte)0xf0, (byte)0x9f, (byte)0x94, (byte)0xa5, 0x00},
+				Tuple.from("\ud83d\udd25"), new byte[]{0x02, (byte)0xf0, (byte)0x9f, (byte)0x94, (byte)0xa5, 0x00},
+				Tuple.from("\ud83e\udd6f"), new byte[]{0x02, (byte)0xf0, (byte)0x9f, (byte)0xa5, (byte)0xaf, 0x00},
+				Tuple.from("\udd25\ud83e\udd6f"), new byte[]{0x02, 0x3f, (byte)0xf0, (byte)0x9f, (byte)0xa5, (byte)0xaf, 0x00}, // malformed string - low surrogate without high surrogate
+				Tuple.from("a\udd25\ud83e\udd6f"), new byte[]{0x02, 'a', 0x3f, (byte)0xf0, (byte)0x9f, (byte)0xa5, (byte)0xaf, 0x00} // malformed string - low surrogate without high surrogate
 		);
 
 		for(TupleSerialization serialization : serializations) {
@@ -130,6 +141,78 @@ public class TupleTest {
 		System.out.println("All tuples had matching serializations");
 	}
 
+	private static void comparisons() {
+		List<Tuple> tuples = Arrays.asList(
+				Tuple.from(0L),
+				Tuple.from(BigInteger.ZERO),
+				Tuple.from(1L),
+				Tuple.from(BigInteger.ONE),
+				Tuple.from(-1L),
+				Tuple.from(BigInteger.ONE.negate()),
+				Tuple.from(Long.MAX_VALUE),
+				Tuple.from(Long.MIN_VALUE),
+				Tuple.from(BigInteger.valueOf(Long.MIN_VALUE).subtract(BigInteger.ONE)),
+				Tuple.from(BigInteger.valueOf(Long.MIN_VALUE).shiftLeft(1)),
+				Tuple.from(-0.0f),
+				Tuple.from(0.0f),
+				Tuple.from(-0.0),
+				Tuple.from(0.0),
+				Tuple.from(Float.NEGATIVE_INFINITY),
+				Tuple.from(Double.NEGATIVE_INFINITY),
+				Tuple.from(Float.NaN),
+				Tuple.from(Double.NaN),
+				Tuple.from(Float.intBitsToFloat(Float.floatToIntBits(Float.NaN) + 1)),
+				Tuple.from(Double.longBitsToDouble(Double.doubleToLongBits(Double.NaN) + 1)),
+				Tuple.from(Float.intBitsToFloat(Float.floatToIntBits(Float.NaN) + 2)),
+				Tuple.from(Double.longBitsToDouble(Double.doubleToLongBits(Double.NaN) + 2)),
+				Tuple.from(Float.intBitsToFloat(Float.floatToIntBits(Float.NaN) ^ Integer.MIN_VALUE)),
+				Tuple.from(Double.longBitsToDouble(Double.doubleToLongBits(Double.NaN) ^ Long.MIN_VALUE)),
+				Tuple.from(Float.intBitsToFloat(Float.floatToIntBits(Float.NaN) ^ Integer.MIN_VALUE + 1)),
+				Tuple.from(Double.longBitsToDouble(Double.doubleToLongBits(Double.NaN) ^ Long.MIN_VALUE + 1)),
+				Tuple.from(Float.POSITIVE_INFINITY),
+				Tuple.from(Double.POSITIVE_INFINITY),
+				Tuple.from((Object)new byte[0]),
+				Tuple.from((Object)new byte[]{0x00}),
+				Tuple.from((Object)new byte[]{0x00, FF}),
+				Tuple.from((Object)new byte[]{0x7f}),
+				Tuple.from((Object)new byte[]{(byte)0x80}),
+				Tuple.from("a"),
+				Tuple.from("\u03bc\u03ac\u03b8\u03b7\u03bc\u03b1"),
+				Tuple.from("\u03bc\u03b1\u0301\u03b8\u03b7\u03bc\u03b1"),
+				Tuple.from("\u4e2d\u6587"),
+				Tuple.from("\u4e2d\u570B"),
+				Tuple.from("\ud83d\udd25"),
+				Tuple.from("\ud83e\udd6f"),
+				Tuple.from("a\ud83d\udd25"),
+				Tuple.from("\ufb49"),
+				Tuple.from("\ud83d\udd25\ufb49"),
+				Tuple.from("\ud8ed\ud8ed"), // malformed string -- two high surrogates
+				Tuple.from("\ud8ed\ud8eda"), // malformed string -- two high surrogates
+				Tuple.from("\udd25\udd25"), // malformed string -- two low surrogates
+				Tuple.from("a\udd25\ud8ed"), // malformed string -- two low surrogates
+				Tuple.from("\udd25\ud83e\udd6f"), // malformed string -- low surrogate followed by high then low surrogate
+				Tuple.from("\udd6f\ud83e\udd6f"), // malformed string -- low surrogate followed by high then low surrogate
+				Tuple.from(new UUID(-1, 0)),
+				Tuple.from(new UUID(-1, -1)),
+				Tuple.from(new UUID(1, -1)),
+				Tuple.from(new UUID(1, 1))
+		);
+
+		for(Tuple t1 : tuples) {
+			for(Tuple t2 : tuples) {
+				System.out.println("Comparing " + t1 + " and " + t2);
+				// Copy the items over to new tuples to avoid having them use the memoized packed representations
+				Tuple t1copy = Tuple.fromList(t1.getItems());
+				Tuple t2copy = Tuple.fromList(t2.getItems());
+				int semanticComparison = t1copy.compareTo(t2copy);
+				int byteComparison = ByteArrayUtil.compareUnsigned(t1.pack(), t2.pack());
+				if(Integer.signum(semanticComparison) != Integer.signum(byteComparison)) {
+					throw new RuntimeException("Tuple t1 and t2 comparison mismatched: semantic = " + semanticComparison + " while byte order = " + byteComparison);
+				}
+			}
+		}
+	}
+
 	private static void runTests(final int reps, TransactionContext db) {
 		System.out.println("Running tests...");
 		long start = System.currentTimeMillis();

From 663d750e1de2ceb1a2d8fd78ab5c511eeec37fd9 Mon Sep 17 00:00:00 2001
From: Alec Grieser <agrieser@apple.com>
Date: Mon, 25 Feb 2019 21:59:16 -0800
Subject: [PATCH 21/47] pack Tuples with a single byte array allocation of the
 right size

---
 .../foundationdb/tuple/ByteArrayUtil.java     | 143 +++----
 .../apple/foundationdb/tuple/StringUtil.java  |  43 +++
 .../com/apple/foundationdb/tuple/Tuple.java   |  56 +--
 .../apple/foundationdb/tuple/TupleUtil.java   | 357 +++++++++++-------
 .../foundationdb/test/AsyncStackTester.java   |   6 +-
 .../apple/foundationdb/test/StackTester.java  |   8 +-
 .../test/TuplePerformanceTest.java            |  64 +++-
 .../apple/foundationdb/test/TupleTest.java    | 107 +++++-
 8 files changed, 547 insertions(+), 237 deletions(-)

diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java b/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java
index eeea3e1799..d848c296ff 100644
--- a/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java
+++ b/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java
@@ -20,7 +20,6 @@
 
 package com.apple.foundationdb.tuple;
 
-import java.math.BigInteger;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
 import java.util.Arrays;
@@ -154,7 +153,10 @@ public class ByteArrayUtil {
 	 * @return a newly created array where {@code pattern} replaced with {@code replacement}
 	 */
 	public static byte[] replace(byte[] src, byte[] pattern, byte[] replacement) {
-		return join(replacement, split(src, pattern));
+		if(src == null) {
+			return null;
+		}
+		return replace(src, 0, src.length, pattern, replacement);
 	}
 
 	/**
@@ -171,7 +173,69 @@ public class ByteArrayUtil {
 	 */
 	public static byte[] replace(byte[] src, int offset, int length,
 			byte[] pattern, byte[] replacement) {
-		return join(replacement, split(src, offset, length, pattern));
+		if(pattern == null || pattern.length == 0) {
+			return Arrays.copyOfRange(src, offset, offset + length);
+		}
+		ByteBuffer dest;
+		if(replacement == null || replacement.length != pattern.length) {
+			// Array might change size. This is the "tricky" case.
+			byte patternFirst = pattern[0];
+			int patternOccurrences = 0;
+			int currentPosition = offset;
+			while(currentPosition < offset + length) {
+				if(src[currentPosition] == patternFirst && regionEquals(src, currentPosition, pattern)) {
+					patternOccurrences++;
+					currentPosition += pattern.length;
+				}
+				else {
+					currentPosition++;
+				}
+			}
+			if(patternOccurrences == 0) {
+				// Pattern doesn't occur. Just return a copy of the needed region.
+				return Arrays.copyOfRange(src, offset, offset + length);
+			}
+			int replacementLength = (replacement == null) ? 0 : replacement.length;
+			int newLength = length + patternOccurrences * (replacementLength - pattern.length);
+			if(newLength == 0) {
+				return new byte[0];
+			}
+			else {
+				dest = ByteBuffer.allocate(newLength);
+			}
+		}
+		else {
+			// No matter what, the array will stay the same size as replacement.length = pattern.length
+			dest = ByteBuffer.allocate(length);
+		}
+		replace(src, offset, length, pattern, replacement, dest);
+		return dest.array();
+	}
+
+	static void replace(byte[] src, int offset, int length, byte[] pattern, byte[] replacement, ByteBuffer dest) {
+		if(pattern == null || pattern.length == 0) {
+			dest.put(src, offset, length);
+			return;
+		}
+		byte patternFirst = pattern[0];
+		int lastPosition = offset;
+		int currentPosition = offset;
+
+		while(currentPosition < offset + length) {
+			if(src[currentPosition] == patternFirst && regionEquals(src, currentPosition, pattern)) {
+				dest.put(src, lastPosition, currentPosition - lastPosition);
+				if(replacement != null) {
+					dest.put(replacement);
+				}
+				currentPosition += pattern.length;
+				lastPosition = currentPosition;
+			}
+			else {
+				currentPosition++;
+			}
+		}
+
+		dest.put(src, lastPosition, currentPosition - lastPosition);
 	}
 
 	/**
@@ -203,7 +267,7 @@ public class ByteArrayUtil {
 	 * @return a list of byte arrays from {@code src} now not containing {@code delimiter}
 	 */
 	public static List<byte[]> split(byte[] src, int offset, int length, byte[] delimiter) {
-		List<byte[]> parts = new LinkedList<byte[]>();
+		List<byte[]> parts = new LinkedList<>();
 		int idx = offset;
 		int lastSplitEnd = offset;
 		while(idx <= (offset+length) - delimiter.length) {
@@ -225,13 +289,6 @@ public class ByteArrayUtil {
 		return parts;
 	}
 
-	static int bisectLeft(BigInteger[] arr, BigInteger i) {
-		int n = Arrays.binarySearch(arr, i);
-		if(n >= 0)
-			return n;
-		return (n + 1) * -1;
-	}
-
 	/**
 	 * Compare byte arrays for equality and ordering purposes. Elements in the array
 	 *  are interpreted and compared as unsigned bytes. Neither parameter
@@ -276,61 +333,6 @@ public class ByteArrayUtil {
 		return true;
 	}
 
-	/**
-	 * Scan through an array of bytes to find the first occurrence of a specific value.
-	 *
-	 * @param src array to scan. Must not be {@code null}.
-	 * @param what the value for which to search.
-	 * @param start the index at which to start the search. If this is at or after
-	 *  the end of {@code src}, the result will always be {@code -1}.
-	 * @param end the index one past the last entry at which to search
-	 *
-	 * @return return the location of the first instance of {@code value}, or
-	 *  {@code -1} if not found.
-	 */
-	static int findNext(byte[] src, byte what, int start, int end) {
-		for(int i = start; i < end; i++) {
-			if(src[i] == what)
-				return i;
-		}
-		return -1;
-	}
-
-	/**
-	 * Gets the index of the first element after the next occurrence of the byte sequence [nm]
-	 * @param v the bytes to scan through
-	 * @param n first character to find
-	 * @param m second character to find
-	 * @param start the index at which to start the scan
-	 *
-	 * @return the index after the next occurrence of [nm]
-	 */
-	static int findTerminator(byte[] v, byte n, byte m, int start) {
-		return findTerminator(v, n, m, start, v.length);
-	}
-
-	/**
-	 * Gets the index of the first element after the next occurrence of the byte sequence [nm]
-	 * @param v the bytes to scan through
-	 * @param n first character to find
-	 * @param m second character to find
-	 * @param start the index at which to start the scan
-	 * @param end the index at which to stop the search (exclusive)
-	 *
-	 * @return the index after the next occurrence of [nm]
-	 */
-	static int findTerminator(byte[] v, byte n, byte m, int start, int end) {
-		int pos = start;
-		while(true) {
-			pos = findNext(v, n, pos, end);
-			if(pos < 0)
-				return end;
-			if(pos + 1 == end || v[pos+1] != m)
-				return pos;
-			pos += 2;
-		}
-	}
-
 	/**
 	 * Computes the first key that would sort outside the range prefixed by {@code key}.
 	 *  {@code key} must be non-null, and contain at least some character this is not
@@ -417,5 +419,14 @@ public class ByteArrayUtil {
 		return s.toString();
 	}
 
+	static int nullCount(byte[] val) {
+		int nulls = 0;
+		for(int i = 0; i < val.length; i++) {
+			if(val[i] == 0x00)
+				nulls += 1;
+		}
+		return nulls;
+	}
+
 	private ByteArrayUtil() {}
 }
diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/StringUtil.java b/bindings/java/src/main/com/apple/foundationdb/tuple/StringUtil.java
index 660d04a6e1..cd1d18d627 100644
--- a/bindings/java/src/main/com/apple/foundationdb/tuple/StringUtil.java
+++ b/bindings/java/src/main/com/apple/foundationdb/tuple/StringUtil.java
@@ -71,5 +71,48 @@ final class StringUtil {
 		return Character.compare(c1, c2);
 	}
 
+	static int packedSize(String s) {
+		final int strLength = s.length();
+		int size = 0;
+		int pos = 0;
+
+		while(pos < strLength) {
+			char c = s.charAt(pos);
+			if(c == '\0') {
+				// Null is encoded as \x00\xff
+				size += 2;
+			}
+			else if(c <= 0x7f) {
+				// ASCII code point. Only 1 byte.
+				size += 1;
+			}
+			else if(c <= 0x07ff) {
+				// 2 byte code point
+				size += 2;
+			}
+			else if(Character.isHighSurrogate(c)) {
+				if(pos + 1 < s.length() && Character.isLowSurrogate(s.charAt(pos + 1))) {
+					// High surrogate followed by low surrogate means the code point
+					// is between U+10000 and U+10FFFF, so it requires 4 bytes.
+					size += 4;
+					pos += 1;
+				}
+				else {
+					throw new IllegalArgumentException("malformed UTF-16 has high surrogate not followed by low surrogate");
+				}
+			}
+			else if(Character.isLowSurrogate(c)) {
+				throw new IllegalArgumentException("malformed UTF-16 has low surrogate without prior high surrogate");
+			}
+			else {
+				// 3 byte code point
+				size += 3;
+			}
+			pos += 1;
+		}
+
+		return size;
+	}
+
 	private StringUtil() {}
 }
diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java b/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java
index b3761d8c5d..5fa9726c14 100644
--- a/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java
+++ b/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java
@@ -73,6 +73,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	private List<Object> elements;
 	private int memoizedHash = 0;
 	private byte[] packed = null;
+	private int memoizedPackedSize = -1;
 
 	private Tuple(List<? extends Object> elements, Object newItem) {
 		this(elements);
@@ -83,12 +84,6 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 		this.elements = new ArrayList<>(elements);
 	}
 
-	private enum VersionstampExpectations {
-		UNKNOWN,
-		HAS_INCOMPLETE,
-		HAS_NO_INCOMPLETE
-	}
-
 	/**
 	 * Creates a copy of this {@code Tuple} with an appended last element. The parameter
 	 *  is untyped but only {@link String}, {@code byte[]}, {@link Number}s, {@link UUID}s,
@@ -313,13 +308,15 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	byte[] packInternal(byte[] prefix, boolean copy) {
 		boolean hasPrefix = prefix != null && prefix.length > 1;
 		if(packed == null) {
-			byte[] result = TupleUtil.pack(elements, prefix);
+			byte[] result = TupleUtil.pack(elements, prefix, getPackedSize());
 			if(hasPrefix) {
 				packed = Arrays.copyOfRange(result, prefix.length, result.length);
+				memoizedPackedSize = packed.length;
 				return result;
 			}
 			else {
 				packed = result;
+				memoizedPackedSize = packed.length;
 			}
 		}
 		if(hasPrefix) {
@@ -366,21 +363,23 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 * @throws IllegalArgumentException if there is not exactly one incomplete {@link Versionstamp} included in this {@code Tuple}
 	 */
 	public byte[] packWithVersionstamp(byte[] prefix) {
-		return TupleUtil.packWithVersionstamp(elements, prefix);
+		return TupleUtil.packWithVersionstamp(elements, prefix, getPackedSize());
 	}
 
 	byte[] packWithVersionstampInternal(byte[] prefix, boolean copy) {
 		boolean hasPrefix = prefix != null && prefix.length > 0;
 		if(packed == null) {
-			byte[] result = TupleUtil.packWithVersionstamp(elements, prefix);
+			byte[] result = TupleUtil.packWithVersionstamp(elements, prefix, getPackedSize());
 			if(hasPrefix) {
 				byte[] withoutPrefix = Arrays.copyOfRange(result, prefix.length, result.length);
 				TupleUtil.adjustVersionPosition(packed, -1 * prefix.length);
 				packed = withoutPrefix;
+				memoizedPackedSize = packed.length;
 				return result;
 			}
 			else {
 				packed = result;
+				memoizedPackedSize = packed.length;
 			}
 		}
 		if(hasPrefix) {
@@ -398,13 +397,13 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 		}
 	}
 
-	byte[] packMaybeVersionstamp(byte[] prefix) {
+	byte[] packMaybeVersionstamp() {
 		if(packed == null) {
 			if(hasIncompleteVersionstamp()) {
-				return packWithVersionstampInternal(prefix, false);
+				return packWithVersionstampInternal(null, false);
 			}
 			else {
-				return packInternal(prefix, false);
+				return packInternal(null, false);
 			}
 		}
 		else {
@@ -489,6 +488,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 		Tuple t = new Tuple();
 		t.elements = TupleUtil.unpack(bytes, offset, length);
 		t.packed = Arrays.copyOfRange(bytes, offset, offset + length);
+		t.memoizedPackedSize = length;
 		return t;
 	}
 
@@ -727,11 +727,14 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 		Object o = this.elements.get(index);
 		if(o == null) {
 			return null;
-		} else if(o instanceof Tuple) {
+		}
+		else if(o instanceof Tuple) {
 			return (Tuple)o;
-		} else if(o instanceof List<?>) {
-			return Tuple.fromItems((List<? extends Object>)o);
-		} else {
+		}
+		else if(o instanceof List<?>) {
+			return Tuple.fromItems((List<?>)o);
+		}
+		else {
 			throw new ClassCastException("Cannot convert item of type " + o.getClass() + " to tuple");
 		}
 	}
@@ -824,16 +827,23 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	}
 
 	/**
-	 * Get the number of bytes in the packed representation of this {@code Tuple}. Note that at the
-	 *  moment, this number is calculated by packing the {@code Tuple} and looking at its size. This method
-	 *  will memoize the result, however, so asking the same {@code Tuple} for its size multiple times
-	 *  is a fast operation.
+	 * Get the number of bytes in the packed representation of this {@code Tuple}. This is done by summing
+	 *  the serialized sizes of all of the elements of this {@code Tuple} and does not pack everything
+	 *  into a single {@code Tuple}. The return value of this function is stored within this {@code Tuple}
+	 *  after this function has been called so that subsequent calls on the same object are fast. This method
+	 *  does not validate that there is no more than one incomplete {@link Versionstamp} in this {@code Tuple}.
 	 *
 	 * @return the number of bytes in the packed representation of this {@code Tuple}
 	 */
 	public int getPackedSize() {
-		byte[] p = packMaybeVersionstamp(null);
-		return p.length;
+		if(memoizedPackedSize < 0) {
+			memoizedPackedSize = getPackedSize(false);
+		}
+		return memoizedPackedSize;
+	}
+
+	int getPackedSize(boolean nested) {
+		return TupleUtil.getPackedSize(elements, nested);
 	}
 
 	/**
@@ -871,7 +881,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	@Override
 	public int hashCode() {
 		if(memoizedHash == 0) {
-			memoizedHash = Arrays.hashCode(packMaybeVersionstamp(null));
+			memoizedHash = Arrays.hashCode(packMaybeVersionstamp());
 		}
 		return memoizedHash;
 	}
diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java
index 34d0f78653..fc1fbc7262 100644
--- a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java
+++ b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java
@@ -36,11 +36,10 @@ import com.apple.foundationdb.FDB;
 
 class TupleUtil {
 	private static final byte nil = 0x00;
-	private static final BigInteger[] BIG_INT_SIZE_LIMITS;
-	private static final Charset UTF8;
+	private static final Charset UTF8 = Charset.forName("UTF-8");
 	private static final BigInteger LONG_MIN_VALUE = BigInteger.valueOf(Long.MIN_VALUE);
 	private static final BigInteger LONG_MAX_VALUE = BigInteger.valueOf(Long.MAX_VALUE);
-	private static final IterableComparator iterableComparator;
+	private static final IterableComparator iterableComparator = new IterableComparator();
 
 	private static final byte BYTES_CODE            = 0x01;
 	private static final byte STRING_CODE           = 0x02;
@@ -57,26 +56,11 @@ class TupleUtil {
 
 	private static final byte[] NULL_ARR           = new byte[] {nil};
 	private static final byte[] NULL_ESCAPED_ARR   = new byte[] {nil, (byte)0xFF};
-	private static final byte[] BYTES_ARR          = new byte[]{BYTES_CODE};
-	private static final byte[] STRING_ARR         = new byte[]{STRING_CODE};
-	private static final byte[] NESTED_ARR         = new byte[]{NESTED_CODE};
-	private static final byte[] INT_ZERO_ARR       = new byte[]{INT_ZERO_CODE};
-	private static final byte[] FALSE_ARR          = new byte[]{FALSE_CODE};
-	private static final byte[] TRUE_ARR           = new byte[]{TRUE_CODE};
-	private static final byte[] VERSIONSTAMP_ARR   = new byte[]{VERSIONSTAMP_CODE};
-
-	static {
-		BIG_INT_SIZE_LIMITS = new BigInteger[9];
-		for(int i = 0; i < BIG_INT_SIZE_LIMITS.length; i++) {
-			BIG_INT_SIZE_LIMITS[i] = (BigInteger.ONE).shiftLeft(i * 8).subtract(BigInteger.ONE);
-		}
-		UTF8 = Charset.forName("UTF-8");
-		iterableComparator = new IterableComparator();
-	}
 
 	static class DecodeState {
 		final List<Object> values;
 		int end;
+		int nullCount; // Basically a hack to allow findTerminator to return the terminator and null count
 
 		DecodeState() {
 			values = new ArrayList<>();
@@ -87,15 +71,36 @@ class TupleUtil {
 			values.add(value);
 			this.end = end;
 		}
+
+		int findNullTerminator(byte[] bytes, int from, int to) {
+			nullCount = 0;
+			int x = from;
+			while(x < to) {
+				if(bytes[x] == 0x00) {
+					if(x + 1 >= to || bytes[x + 1] != (byte)0xFF) {
+						return x;
+					}
+					else {
+						nullCount++;
+						x += 2;
+					}
+				}
+				else {
+					x += 1;
+				}
+			}
+			throw new IllegalArgumentException("no terminator found for bytes starting at " + from);
+		}
 	}
 
 	static class EncodeState {
-		final List<byte[]> encodedValues;
+		final ByteBuffer encodedBytes;
 		int totalLength;
 		int versionPos;
 
-		EncodeState(int capacity) {
-			this.encodedValues = new ArrayList<>(capacity);
+		EncodeState(ByteBuffer dest) {
+			encodedBytes = dest;
+			encodedBytes.order(ByteOrder.BIG_ENDIAN);
 			totalLength = 0;
 			versionPos = -1;
 		}
@@ -104,25 +109,52 @@ class TupleUtil {
 			if(versionPos >= 0 && this.versionPos >= 0) {
 				throw new IllegalArgumentException("Multiple incomplete Versionstamps included in Tuple");
 			}
-			encodedValues.add(encoded);
+			encodedBytes.put(encoded);
 			totalLength += encoded.length;
 			this.versionPos = versionPos;
 			return this;
 		}
 
 		EncodeState add(byte[] encoded) {
-			encodedValues.add(encoded);
+			encodedBytes.put(encoded);
 			totalLength += encoded.length;
 			return this;
 		}
-	}
 
-	static int byteLength(byte[] bytes) {
-		for(int i = 0; i < bytes.length; i++) {
-			if(bytes[i] == 0x00) continue;
-			return bytes.length - i;
+		EncodeState add(byte[] encoded, int offset, int length) {
+			encodedBytes.put(encoded, offset, length);
+			totalLength += length;
+			return this;
+		}
+
+		EncodeState addNullEscaped(byte[] encoded) {
+			int nullCount = ByteArrayUtil.nullCount(encoded);
+			if(nullCount == 0) {
+				encodedBytes.put(encoded);
+			}
+			else {
+				ByteArrayUtil.replace(encoded, 0, encoded.length, NULL_ARR, NULL_ESCAPED_ARR, encodedBytes);
+			}
+			return this;
+		}
+
+		EncodeState add(byte b) {
+			encodedBytes.put(b);
+			totalLength++;
+			return this;
+		}
+
+		EncodeState add(int i) {
+			encodedBytes.putInt(i);
+			totalLength += Integer.BYTES;
+			return this;
+		}
+
+		EncodeState add(long l) {
+			encodedBytes.putLong(l);
+			totalLength += Long.BYTES;
+			return this;
 		}
-		return 0;
 	}
 
 	// These four functions are for adjusting the encoding of floating point numbers so
@@ -153,11 +185,16 @@ class TupleUtil {
 		return Double.longBitsToDouble(origBits);
 	}
 
-	// Get the number of bytes in the representation of a long.
-	static int byteCount(long i) {
+	// Get the minimal number of bytes in the representation of a long.
+	static int minimalByteCount(long i) {
 		return (Long.SIZE + 7 - Long.numberOfLeadingZeros(i >= 0 ? i : -i)) / 8;
 	}
 
+	static int minimalByteCount(BigInteger i) {
+		int bitLength = (i.compareTo(BigInteger.ZERO) >= 0) ? i.bitLength() : i.negate().bitLength();
+		return (bitLength + 7) / 8;
+	}
+
 	private static void adjustVersionPosition300(byte[] packed, int delta) {
 		int offsetOffset = packed.length - Short.BYTES;
 		ByteBuffer buffer = ByteBuffer.wrap(packed, offsetOffset, Short.BYTES).order(ByteOrder.LITTLE_ENDIAN);
@@ -224,7 +261,7 @@ class TupleUtil {
 				state.add(NULL_ESCAPED_ARR);
 			}
 			else {
-				state.add(NULL_ARR);
+				state.add(nil);
 			}
 		}
 		else if(t instanceof byte[])
@@ -258,133 +295,104 @@ class TupleUtil {
 	}
 
 	static void encode(EncodeState state, byte[] bytes) {
-		byte[] escaped = ByteArrayUtil.replace(bytes, NULL_ARR, NULL_ESCAPED_ARR);
-		state.add(BYTES_ARR).add(escaped).add(NULL_ARR);
+		state.add(BYTES_CODE).addNullEscaped(bytes).add(nil);
 	}
 
 	static void encode(EncodeState state, String s) {
-		byte[] escaped = ByteArrayUtil.replace(s.getBytes(UTF8), NULL_ARR, NULL_ESCAPED_ARR);
-		state.add(STRING_ARR).add(escaped).add(NULL_ARR);
+		byte[] bytes = s.getBytes(UTF8);
+		state.add(STRING_CODE).addNullEscaped(bytes).add(nil);
 	}
 
 	static void encode(EncodeState state, BigInteger i) {
 		//System.out.println("Encoding integral " + i);
 		if(i.equals(BigInteger.ZERO)) {
-			state.add(INT_ZERO_ARR);
+			state.add(INT_ZERO_CODE);
 			return;
 		}
-		byte[] bytes = i.toByteArray();
+		int n = minimalByteCount(i);
+		if(n > 0xff) {
+			throw new IllegalArgumentException("BigInteger magnitude is too large (more than 255 bytes)");
+		}
 		if(i.compareTo(BigInteger.ZERO) > 0) {
-			if(i.compareTo(BIG_INT_SIZE_LIMITS[BIG_INT_SIZE_LIMITS.length-1]) > 0) {
-				int length = byteLength(bytes);
-				if(length > 0xff) {
-					throw new IllegalArgumentException("BigInteger magnitude is too large (more than 255 bytes)");
-				}
-				byte[] intBytes = new byte[length + 2];
-				intBytes[0] = POS_INT_END;
-				intBytes[1] = (byte)(length);
-				System.arraycopy(bytes, bytes.length - length, intBytes, 2, length);
-				state.add(intBytes);
+			byte[] bytes = i.toByteArray();
+			if(n > Long.BYTES) {
+				state.add(POS_INT_END);
+				state.add((byte)n);
+				state.add(bytes, bytes.length - n, n);
 			}
 			else {
-				int n = ByteArrayUtil.bisectLeft(BIG_INT_SIZE_LIMITS, i);
-				assert n <= BIG_INT_SIZE_LIMITS.length;
 				//System.out.println("  -- integral has 'n' of " + n + " and output bytes of " + bytes.length);
-				byte[] intBytes = new byte[n + 1];
-				intBytes[0] = (byte) (INT_ZERO_CODE + n);
-				System.arraycopy(bytes, bytes.length - n, intBytes, 1, n);
-				state.add(intBytes);
+				state.add((byte)(INT_ZERO_CODE + n));
+				state.add(bytes, bytes.length - n, n);
 			}
 		}
 		else {
-			if(i.negate().compareTo(BIG_INT_SIZE_LIMITS[BIG_INT_SIZE_LIMITS.length - 1]) > 0) {
-				int length = byteLength(i.negate().toByteArray());
-				if (length > 0xff) {
-					throw new IllegalArgumentException("BigInteger magnitude is too large (more than 255 bytes)");
+			byte[] bytes = i.subtract(BigInteger.ONE).toByteArray();
+			if(n > Long.BYTES) {
+				state.add(NEG_INT_START);
+				state.add((byte)(n ^ 0xff));
+				if(bytes.length >= n) {
+					state.add(bytes, bytes.length - n, n);
 				}
-				BigInteger offset = BigInteger.ONE.shiftLeft(length * 8).subtract(BigInteger.ONE);
-				byte[] adjusted = i.add(offset).toByteArray();
-				byte[] intBytes = new byte[length + 2];
-				intBytes[0] = NEG_INT_START;
-				intBytes[1] = (byte) (length ^ 0xff);
-				if (adjusted.length >= length) {
-					System.arraycopy(adjusted, adjusted.length - length, intBytes, 2, length);
-				} else {
-					Arrays.fill(intBytes, 2, intBytes.length - adjusted.length, (byte) 0x00);
-					System.arraycopy(adjusted, 0, intBytes, intBytes.length - adjusted.length, adjusted.length);
+				else {
+					for(int x = 0; x < n - bytes.length; x++) {
+						state.add((byte)0x00);
+					}
+					state.add(bytes, 0, bytes.length);
 				}
-				state.add(intBytes);
 			}
 			else {
-				int n = ByteArrayUtil.bisectLeft(BIG_INT_SIZE_LIMITS, i.negate());
-
-				assert n >= 0 && n < BIG_INT_SIZE_LIMITS.length; // can we do this? it seems to be required for the following statement
-
-				long maxv = BIG_INT_SIZE_LIMITS[n].add(i).longValue();
-				byte[] adjustedBytes = ByteBuffer.allocate(8).order(ByteOrder.BIG_ENDIAN).putLong(maxv).array();
-				byte[] intBytes = new byte[n + 1];
-				intBytes[0] = (byte) (INT_ZERO_CODE - n);
-				System.arraycopy(adjustedBytes, adjustedBytes.length - n, intBytes, 1, n);
-				state.add(intBytes);
+				state.add((byte)(INT_ZERO_CODE - n));
+				if(bytes.length >= n) {
+					state.add(bytes, bytes.length - n, n);
+				}
+				else {
+					for(int x = 0; x < n - bytes.length; x++) {
+						state.add((byte)0x00);
+					}
+					state.add(bytes, 0, bytes.length);
+				}
 			}
 		}
 	}
 
 	static void encode(EncodeState state, long i) {
 		if(i == 0L) {
-			state.add(INT_ZERO_ARR);
+			state.add(INT_ZERO_CODE);
 			return;
 		}
-		int n = byteCount(i);
-		byte[] intBytes = new byte[n + 1];
+		int n = minimalByteCount(i);
 		// First byte encodes number of bytes (as difference from INT_ZERO_CODE)
-		intBytes[0] = (byte)(INT_ZERO_CODE + (i >= 0 ? n : -n));
+		state.add((byte)(INT_ZERO_CODE + (i >= 0 ? n : -n)));
 		// For positive integers, copy the bytes in big-endian order excluding leading 0x00 bytes.
 		// For negative integers, copy the bytes of the one's complement representation excluding
 		// the leading 0xff bytes. As Java stores negative values in two's complement, we subtract 1
 		// from negative values.
 		long val = Long.reverseBytes((i >= 0) ? i : (i - 1)) >> (Long.SIZE - 8 * n);
-		for(int x = 1; x < intBytes.length; x++) {
-			intBytes[x] = (byte)(val & 0xff);
+		for(int x = 0; x < n; x++) {
+			state.add((byte)(val & 0xff));
 			val >>= 8;
 		}
-		state.add(intBytes);
 	}
 
 	static void encode(EncodeState state, Float f) {
-		byte[] floatBytes = ByteBuffer.allocate(1 + Float.BYTES).order(ByteOrder.BIG_ENDIAN)
-				.put(FLOAT_CODE)
-				.putInt(encodeFloatBits(f))
-				.array();
-		state.add(floatBytes);
+		state.add(FLOAT_CODE).add(encodeFloatBits(f));
 	}
 
 	static void encode(EncodeState state, Double d) {
-		byte[] doubleBytes = ByteBuffer.allocate(1 + Double.BYTES).order(ByteOrder.BIG_ENDIAN)
-				.put(DOUBLE_CODE)
-				.putLong(encodeDoubleBits(d))
-				.array();
-		state.add(doubleBytes);
+		state.add(DOUBLE_CODE).add(encodeDoubleBits(d));
 	}
 
 	static void encode(EncodeState state, Boolean b) {
-		if(b) {
-			state.add(TRUE_ARR);
-		}
-		else {
-			state.add(FALSE_ARR);
-		}
+		state.add(b ? TRUE_CODE : FALSE_CODE);
 	}
 
 	static void encode(EncodeState state, UUID uuid) {
-		byte[] uuidBytes = ByteBuffer.allocate(17).put(UUID_CODE).order(ByteOrder.BIG_ENDIAN)
-				.putLong(uuid.getMostSignificantBits()).putLong(uuid.getLeastSignificantBits())
-				.array();
-		state.add(uuidBytes);
+		state.add(UUID_CODE).add(uuid.getMostSignificantBits()).add(uuid.getLeastSignificantBits());
 	}
 
 	static void encode(EncodeState state, Versionstamp v) {
-		state.add(VERSIONSTAMP_ARR);
+		state.add(VERSIONSTAMP_CODE);
 		if(v.isComplete()) {
 			state.add(v.getBytes());
 		}
@@ -394,11 +402,11 @@ class TupleUtil {
 	}
 
 	static void encode(EncodeState state, List<?> value) {
-		state.add(NESTED_ARR);
+		state.add(NESTED_CODE);
 		for(Object t : value) {
 			encode(state, t, true);
 		}
-		state.add(NULL_ARR);
+		state.add(nil);
 	}
 
 	static void decode(DecodeState state, byte[] rep, int pos, int last) {
@@ -411,17 +419,32 @@ class TupleUtil {
 			state.add(null, start);
 		}
 		else if(code == BYTES_CODE) {
-			int end = ByteArrayUtil.findTerminator(rep, (byte)0x0, (byte)0xff, start, last);
+			int end = state.findNullTerminator(rep, start, last);
 			//System.out.println("End of byte string: " + end);
-			byte[] range = ByteArrayUtil.replace(rep, start, end - start, NULL_ESCAPED_ARR, new byte[] { nil });
+			byte[] range;
+			if(state.nullCount == 0) {
+				range = Arrays.copyOfRange(rep, start, end);
+			}
+			else {
+				ByteBuffer dest = ByteBuffer.allocate(end - start - state.nullCount);
+				ByteArrayUtil.replace(rep, start, end - start, NULL_ESCAPED_ARR, NULL_ARR, dest);
+				range = dest.array();
+			}
 			//System.out.println(" -> byte string contents: '" + ArrayUtils.printable(range) + "'");
 			state.add(range, end + 1);
 		}
 		else if(code == STRING_CODE) {
-			int end = ByteArrayUtil.findTerminator(rep, (byte)0x0, (byte)0xff, start, last);
+			int end = state.findNullTerminator(rep, start, last);
 			//System.out.println("End of UTF8 string: " + end);
-			byte[] stringBytes = ByteArrayUtil.replace(rep, start, end - start, NULL_ESCAPED_ARR, new byte[] { nil });
-			String str = new String(stringBytes, UTF8);
+			String str;
+			if(state.nullCount == 0) {
+				str = new String(rep, start, end - start, UTF8);
+			}
+			else {
+				ByteBuffer dest = ByteBuffer.allocate(end - start - state.nullCount);
+				ByteArrayUtil.replace(rep, start, end - start, NULL_ESCAPED_ARR, NULL_ARR, dest);
+				str = new String(dest.array(), UTF8);
+			}
 			//System.out.println(" -> UTF8 string contents: '" + str + "'");
 			state.add(str, end + 1);
 		}
@@ -442,19 +465,23 @@ class TupleUtil {
 			state.add(true, start);
 		}
 		else if(code == UUID_CODE) {
-			ByteBuffer bb = ByteBuffer.wrap(rep, start, 16).order(ByteOrder.BIG_ENDIAN);
+			ByteBuffer bb = ByteBuffer.wrap(rep, start, 2 * Long.BYTES).order(ByteOrder.BIG_ENDIAN);
 			long msb = bb.getLong();
 			long lsb = bb.getLong();
 			state.add(new UUID(msb, lsb), start + 16);
 		}
 		else if(code == POS_INT_END) {
 			int n = rep[start] & 0xff;
-			BigInteger res = new BigInteger(ByteArrayUtil.join(new byte[]{0x00}, Arrays.copyOfRange(rep, start+1, start+n+1)));
+			byte[] intBytes = new byte[n + 1];
+			System.arraycopy(rep, start + 1, intBytes, 1, n);
+			BigInteger res = new BigInteger(intBytes);
 			state.add(res, start + n + 1);
 		}
 		else if(code == NEG_INT_START) {
 			int n = (rep[start] ^ 0xff) & 0xff;
-			BigInteger origValue = new BigInteger(ByteArrayUtil.join(new byte[]{0x00}, Arrays.copyOfRange(rep, start+1, start+n+1)));
+			byte[] intBytes = new byte[n + 1];
+			System.arraycopy(rep, start + 1, intBytes, 1, n);
+			BigInteger origValue = new BigInteger(intBytes);
 			BigInteger offset = BigInteger.ONE.shiftLeft(n*8).subtract(BigInteger.ONE);
 			state.add(origValue.subtract(offset), start + n + 1);
 		}
@@ -464,7 +491,7 @@ class TupleUtil {
 			int n = positive ? code - INT_ZERO_CODE : INT_ZERO_CODE - code;
 			int end = start + n;
 
-			if(rep.length < end) {
+			if(rep.length < last) {
 				throw new RuntimeException("Invalid tuple (possible truncation)");
 			}
 
@@ -509,9 +536,9 @@ class TupleUtil {
 		else if(code == NESTED_CODE) {
 			DecodeState subResult = new DecodeState();
 			int endPos = start;
-			while(endPos < rep.length) {
+			while(endPos < last) {
 				if(rep[endPos] == nil) {
-					if(endPos + 1 < rep.length && rep[endPos+1] == (byte)0xff) {
+					if(endPos + 1 < last && rep[endPos+1] == (byte)0xff) {
 						subResult.add(null, endPos + 2);
 						endPos += 2;
 					} else {
@@ -631,19 +658,27 @@ class TupleUtil {
 		//System.out.println("Joining whole tuple...");
 	}
 
-	static byte[] pack(List<Object> items, byte[] prefix) {
-		EncodeState state = new EncodeState(2 * items.size() + (prefix == null ? 0 : 1));
+	static byte[] pack(List<Object> items, byte[] prefix, int expectedSize) {
+		ByteBuffer dest = ByteBuffer.allocate(expectedSize + (prefix != null ? prefix.length : 0));
+		EncodeState state = new EncodeState(dest);
+		if(prefix != null) {
+			state.add(prefix);
+		}
 		encodeAll(state, items, prefix);
 		if(state.versionPos >= 0) {
 			throw new IllegalArgumentException("Incomplete Versionstamp included in vanilla tuple packInternal");
 		}
 		else {
-			return ByteArrayUtil.join(null, state.encodedValues);
+			return dest.array();
 		}
 	}
 
-	static byte[] packWithVersionstamp(List<Object> items, byte[] prefix) {
-		EncodeState state = new EncodeState(2 * items.size() + (prefix == null ? 1 : 2));
+	static byte[] packWithVersionstamp(List<Object> items, byte[] prefix, int expectedSize) {
+		ByteBuffer dest = ByteBuffer.allocate(expectedSize + (prefix != null ? prefix.length : 0));
+		EncodeState state = new EncodeState(dest);
+		if(prefix != null) {
+			state.add(prefix);
+		}
 		encodeAll(state, items, prefix);
 		if(state.versionPos < 0) {
 			throw new IllegalArgumentException("No incomplete Versionstamp included in tuple packInternal with versionstamp");
@@ -652,15 +687,73 @@ class TupleUtil {
 			if(state.versionPos > 0xffff) {
 				throw new IllegalArgumentException("Tuple has incomplete version at position " + state.versionPos + " which is greater than the maximum " + 0xffff);
 			}
+			dest.order(ByteOrder.LITTLE_ENDIAN);
 			if (FDB.instance().getAPIVersion() < 520) {
-				state.add(ByteBuffer.allocate(Short.BYTES).order(ByteOrder.LITTLE_ENDIAN).putShort((short)state.versionPos).array());
+				dest.putShort((short)state.versionPos);
 			} else {
-				state.add(ByteBuffer.allocate(Integer.BYTES).order(ByteOrder.LITTLE_ENDIAN).putInt(state.versionPos).array());
+				dest.putInt(state.versionPos);
 			}
-			return ByteArrayUtil.join(null, state.encodedValues);
+			return dest.array();
 		}
 	}
 
+	static int getPackedSize(List<?> items, boolean nested) {
+		int packedSize = 0;
+		for(Object item : items) {
+			if(item == null)
+				packedSize += nested ? 2 : 1;
+			else if(item instanceof byte[]) {
+				byte[] bytes = (byte[])item;
+				packedSize += 2 + bytes.length + ByteArrayUtil.nullCount((byte[])item);
+			}
+			else if(item instanceof String) {
+				try {
+					int strPackedSize = StringUtil.packedSize((String)item);
+					packedSize += 2 + strPackedSize;
+				}
+				catch (IllegalArgumentException e) {
+					// The unicode was malformed. Grab the array and count the bytes
+					byte[] strBytes = ((String)item).getBytes(UTF8);
+					packedSize += 2 + strBytes.length + ByteArrayUtil.nullCount(strBytes);
+				}
+			}
+			else if(item instanceof Float)
+				packedSize += 1 + Float.BYTES;
+			else if(item instanceof Double)
+				packedSize += 1 + Double.BYTES;
+			else if(item instanceof Boolean)
+				packedSize += 1;
+			else if(item instanceof UUID)
+				packedSize += 1 + 2 * Long.BYTES;
+			else if(item instanceof BigInteger) {
+				BigInteger bigInt = (BigInteger)item;
+				int byteCount = minimalByteCount(bigInt);
+				// If byteCount <= 8, then the encoding uses 1 byte for both the size
+				// and type code. If byteCount > 8, then there is 1 byte for the type code
+				// and 1 byte for the length. In both cases, the value is followed by
+				// the byte count.
+				packedSize += byteCount + ((byteCount <= 8) ? 1 : 2);
+			}
+			else if(item instanceof Number)
+				packedSize += 1 + minimalByteCount(((Number)item).longValue());
+			else if(item instanceof Versionstamp) {
+				packedSize += 1 + Versionstamp.LENGTH;
+				Versionstamp versionstamp = (Versionstamp)item;
+				if(!versionstamp.isComplete()) {
+					int suffixSize = FDB.instance().getAPIVersion() < 520 ? Short.BYTES : Integer.BYTES;
+					packedSize += suffixSize;
+				}
+			}
+			else if(item instanceof List<?>)
+				packedSize += 2 + getPackedSize((List<?>)item, true);
+			else if(item instanceof Tuple)
+				packedSize += 2 + ((Tuple)item).getPackedSize(true);
+			else
+				throw new IllegalArgumentException("unknown type " + item.getClass() + " for tuple packing");
+		}
+		return packedSize;
+	}
+
 	static boolean hasIncompleteVersionstamp(Stream<?> items) {
 		return items.anyMatch(item -> {
 			if(item == null) {
@@ -683,10 +776,10 @@ class TupleUtil {
 
 	public static void main(String[] args) {
 		try {
-			byte[] bytes = pack(Collections.singletonList(4), null);
+			byte[] bytes = pack(Collections.singletonList(4), null, 2);
 			DecodeState result = new DecodeState();
 			decode(result, bytes, 0, bytes.length);
-			int val = (int)result.values.get(0);
+			int val = ((Number)result.values.get(0)).intValue();
 			assert 4 == val;
 		}
 		catch(Exception e) {
@@ -695,7 +788,7 @@ class TupleUtil {
 		}
 
 		try {
-			byte[] bytes = pack(Collections.singletonList("\u021Aest \u0218tring"), null);
+			byte[] bytes = pack(Collections.singletonList("\u021Aest \u0218tring"), null, 15);
 			DecodeState result = new DecodeState();
 			decode(result, bytes, 0, bytes.length);
 			String string = (String)result.values.get(0);
diff --git a/bindings/java/src/test/com/apple/foundationdb/test/AsyncStackTester.java b/bindings/java/src/test/com/apple/foundationdb/test/AsyncStackTester.java
index 617586fe9d..f9d7d12c3a 100644
--- a/bindings/java/src/test/com/apple/foundationdb/test/AsyncStackTester.java
+++ b/bindings/java/src/test/com/apple/foundationdb/test/AsyncStackTester.java
@@ -412,7 +412,11 @@ public class AsyncStackTester {
 				return inst.popParams(listSize).thenAcceptAsync(rawElements -> {
 					List<Tuple> tuples = new ArrayList<>(listSize);
 					for(Object o : rawElements) {
-						tuples.add(Tuple.fromBytes((byte[])o));
+						// Unpacking a tuple keeps around the serialized representation and uses
+						// it for comparison if it's available. To test semantic comparison, recreate
+						// the tuple from the item list.
+						Tuple t = Tuple.fromBytes((byte[])o);
+						tuples.add(Tuple.fromList(t.getItems()));
 					}
 					Collections.sort(tuples);
 					for(Tuple t : tuples) {
diff --git a/bindings/java/src/test/com/apple/foundationdb/test/StackTester.java b/bindings/java/src/test/com/apple/foundationdb/test/StackTester.java
index 96281dec72..06f9b435d5 100644
--- a/bindings/java/src/test/com/apple/foundationdb/test/StackTester.java
+++ b/bindings/java/src/test/com/apple/foundationdb/test/StackTester.java
@@ -368,9 +368,13 @@ public class StackTester {
 			else if (op == StackOperation.TUPLE_SORT) {
 				int listSize = StackUtils.getInt(inst.popParam().join());
 				List<Object> rawElements = inst.popParams(listSize).join();
-				List<Tuple> tuples = new ArrayList<Tuple>(listSize);
+				List<Tuple> tuples = new ArrayList<>(listSize);
 				for(Object o : rawElements) {
-					tuples.add(Tuple.fromBytes((byte[])o));
+					// Unpacking a tuple keeps around the serialized representation and uses
+					// it for comparison if it's available. To test semantic comparison, recreate
+					// the tuple from the item list.
+					Tuple t = Tuple.fromBytes((byte[])o);
+					tuples.add(Tuple.fromList(t.getItems()));
 				}
 				Collections.sort(tuples);
 				for(Tuple t : tuples) {
diff --git a/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java b/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java
index 3de9b76785..54448e3ac9 100644
--- a/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java
+++ b/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java
@@ -16,7 +16,8 @@ public class TuplePerformanceTest {
 	private enum GeneratedTypes {
 		ALL,
 		LONG,
-		FLOATING_POINT
+		FLOATING_POINT,
+		STRING_LIKE
 	}
 
 	private final Random r;
@@ -77,7 +78,7 @@ public class TuplePerformanceTest {
 				values.add(nested);
 			}
 		}
-		return Tuple.fromItems(values);
+		return Tuple.fromList(values);
 	}
 
 	public Tuple createLongsTuple(int length) {
@@ -91,7 +92,7 @@ public class TuplePerformanceTest {
 			}
 			values.add(val);
 		}
-		return Tuple.fromItems(values);
+		return Tuple.fromList(values);
 	}
 
 	public Tuple createFloatingPointTuple(int length) {
@@ -112,7 +113,41 @@ public class TuplePerformanceTest {
 				values.add(Double.longBitsToDouble(r.nextLong()));
 			}
 		}
-		return Tuple.fromItems(values);
+		return Tuple.fromList(values);
+	}
+
+	public Tuple createStringLikeTuple(int length) {
+		List<Object> values = new ArrayList<>(length);
+		for(int i = 0; i < length; i++) {
+			double choice = r.nextDouble();
+			if(choice < 0.4) {
+				byte[] arr = new byte[r.nextInt(20)];
+				r.nextBytes(arr);
+				values.add(arr);
+			}
+			else if(choice < 0.8) {
+				// Random ASCII codepoints
+				int[] codepoints = new int[r.nextInt(20)];
+				for(int x = 0; x < codepoints.length; x++) {
+					codepoints[x] = r.nextInt(0x7F);
+				}
+				values.add(new String(codepoints, 0, codepoints.length));
+			}
+			else if(choice < 0.9) {
+				// All zeroes
+				byte[] zeroes = new byte[r.nextInt(20)];
+				values.add(zeroes);
+			}
+			else {
+				// Random Unicode codepoints
+				int[] codepoints = new int[r.nextInt(20)];
+				for(int x = 0; x < codepoints.length; x++) {
+					codepoints[x] = r.nextInt(0x10FFFF);
+				}
+				values.add(new String(codepoints, 0, codepoints.length));
+			}
+		}
+		return Tuple.fromList(values);
 	}
 
 	public Tuple createTuple(int length) {
@@ -123,6 +158,8 @@ public class TuplePerformanceTest {
 				return createLongsTuple(length);
 			case FLOATING_POINT:
 				return createFloatingPointTuple(length);
+			case STRING_LIKE:
+				return createStringLikeTuple(length);
 			default:
 				throw new IllegalStateException("unknown generated types " + generatedTypes);
 		}
@@ -143,6 +180,7 @@ public class TuplePerformanceTest {
 		long unpackNanos = 0L;
 		long equalsNanos = 0L;
 		long equalsArrayNanos = 0L;
+		long sizeNanos = 0L;
 		long hashNanos = 0L;
 		long secondHashNanos = 0L;
 		long subspacePackNanos = 0L;
@@ -150,6 +188,9 @@ public class TuplePerformanceTest {
 		long totalLength = 0L;
 		long totalBytes = 0L;
 		for(int i = 0; i < iterations; i++) {
+			if(i % 100_000 == 0) {
+				System.out.println("   iteration " + i);
+			}
 			int length = r.nextInt(20);
 			Tuple t = createTuple(length);
 
@@ -157,8 +198,8 @@ public class TuplePerformanceTest {
 			byte[] serialized = t.pack();
 			long endNanos = System.nanoTime();
 			packNanos += endNanos - startNanos;
-			totalLength += length;
-			totalBytes += serialized.length;
+			totalLength += t.size();
+			totalBytes += t.getPackedSize();
 
 			startNanos = System.nanoTime();
 			Tuple t2 = Tuple.fromBytes(serialized);
@@ -182,6 +223,15 @@ public class TuplePerformanceTest {
 			endNanos = System.nanoTime();
 			equalsArrayNanos += endNanos - startNanos;
 
+			tCopy = Tuple.fromList(t.getItems());
+			startNanos = System.nanoTime();
+			int size = tCopy.getPackedSize();
+			endNanos = System.nanoTime();
+			if (size != t.pack().length) {
+				throw new RuntimeException("packed size did not match actual packed length: " + t + " -- " + " " + tCopy.getPackedSize() + " instead of " + t.getPackedSize());
+			}
+			sizeNanos += endNanos - startNanos;
+
 			startNanos = System.nanoTime();
 			byte[] subspacePacked = subspace.pack(t);
 			endNanos = System.nanoTime();
@@ -229,6 +279,8 @@ public class TuplePerformanceTest {
 		System.out.printf("  Equals time per tuple:                 %f \u03BCs%n", equalsNanos * 1e-3 / iterations);
 		System.out.printf("  Equals time (using packed):            %f s%n", equalsArrayNanos * 1e-9);
 		System.out.printf("  Equals time (using packed) per tuple:  %f \u03BCs%n", equalsArrayNanos * 1e-3 / iterations);
+		System.out.printf("  Size time:                             %f s%n", sizeNanos * 1e-9);
+		System.out.printf("  Size time per tuple:                   %f \u03BCs%n", sizeNanos * 1e-3 / iterations);
 		System.out.printf("  Subspace pack time:                    %f s%n", subspacePackNanos * 1e-9);
 		System.out.printf("  Subspace pack time per tuple:          %f \u03BCs%n", subspacePackNanos * 1e-3 / iterations);
 		System.out.printf("  Subspace unpack time:                  %f s%n", subspaceUnpackNanos * 1e-9);
diff --git a/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java b/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java
index 305c1a90f0..2f0fd1c2c4 100644
--- a/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java
+++ b/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java
@@ -20,10 +20,6 @@
 
 package com.apple.foundationdb.test;
 
-import com.apple.foundationdb.TransactionContext;
-import com.apple.foundationdb.tuple.ByteArrayUtil;
-import com.apple.foundationdb.tuple.Tuple;
-
 import java.math.BigInteger;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -31,6 +27,11 @@ import java.util.List;
 import java.util.Objects;
 import java.util.UUID;
 
+import com.apple.foundationdb.TransactionContext;
+import com.apple.foundationdb.tuple.ByteArrayUtil;
+import com.apple.foundationdb.tuple.Tuple;
+import com.apple.foundationdb.tuple.Versionstamp;
+
 public class TupleTest {
 	private static final byte FF = (byte)0xff;
 
@@ -40,6 +41,7 @@ public class TupleTest {
 			// FDB fdb = FDB.selectAPIVersion(610);
 			serializedForms();
 			comparisons();
+			replaceTests();
 			/*
 			try(Database db = fdb.open()) {
 				runTests(reps, db);
@@ -70,6 +72,7 @@ public class TupleTest {
 	private static void serializedForms() {
 		List<TupleSerialization> serializations = new ArrayList<>();
 		TupleSerialization.addAll(serializations,
+				Tuple.from(), new byte[0],
 				Tuple.from(0L), new byte[]{0x14},
 				Tuple.from(BigInteger.ZERO), new byte[]{0x14},
 				Tuple.from(1L), new byte[]{0x15, 0x01},
@@ -116,6 +119,9 @@ public class TupleTest {
 				Tuple.from(Double.longBitsToDouble(Long.MAX_VALUE)), new byte[]{0x21, FF, FF, FF, FF, FF, FF, FF, FF},
 				Tuple.from(Float.intBitsToFloat(~0)), new byte[]{0x20, 0x00, 0x00, 0x00, 0x00},
 				Tuple.from(Double.longBitsToDouble(~0L)), new byte[]{0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+				Tuple.from((Object)new byte[0]), new byte[]{0x01, 0x00},
+				Tuple.from((Object)new byte[]{0x01, 0x02, 0x03}), new byte[]{0x01, 0x01, 0x02, 0x03, 0x00},
+				Tuple.from((Object)new byte[]{0x00, 0x00, 0x00, 0x04}), new byte[]{0x01, 0x00, FF, 0x00, FF, 0x00, FF, 0x04, 0x00},
 				Tuple.from(""), new byte[]{0x02, 0x00},
 				Tuple.from("hello"), new byte[]{0x02, 'h', 'e', 'l', 'l', 'o', 0x00},
 				Tuple.from("\u4e2d\u6587"), new byte[]{0x02, (byte)0xe4, (byte)0xb8, (byte)0xad, (byte)0xe6, (byte)0x96, (byte)0x87, 0x00},
@@ -123,17 +129,42 @@ public class TupleTest {
 				Tuple.from(new String(new int[]{0x1f525}, 0, 1)), new byte[]{0x02, (byte)0xf0, (byte)0x9f, (byte)0x94, (byte)0xa5, 0x00},
 				Tuple.from("\ud83d\udd25"), new byte[]{0x02, (byte)0xf0, (byte)0x9f, (byte)0x94, (byte)0xa5, 0x00},
 				Tuple.from("\ud83e\udd6f"), new byte[]{0x02, (byte)0xf0, (byte)0x9f, (byte)0xa5, (byte)0xaf, 0x00},
+				Tuple.from("\ud83d"), new byte[]{0x02, 0x3f, 0x00},
 				Tuple.from("\udd25\ud83e\udd6f"), new byte[]{0x02, 0x3f, (byte)0xf0, (byte)0x9f, (byte)0xa5, (byte)0xaf, 0x00}, // malformed string - low surrogate without high surrogate
-				Tuple.from("a\udd25\ud83e\udd6f"), new byte[]{0x02, 'a', 0x3f, (byte)0xf0, (byte)0x9f, (byte)0xa5, (byte)0xaf, 0x00} // malformed string - low surrogate without high surrogate
+				Tuple.from("a\udd25\ud83e\udd6f"), new byte[]{0x02, 'a', 0x3f, (byte)0xf0, (byte)0x9f, (byte)0xa5, (byte)0xaf, 0x00}, // malformed string - low surrogate without high surrogate
+				Tuple.from(Tuple.from((Object)null)), new byte[]{0x05, 0x00, FF, 0x00},
+				Tuple.from(Tuple.from(null, "hello")), new byte[]{0x05, 0x00, FF, 0x02, 'h', 'e', 'l', 'l', 'o', 0x00, 0x00},
+				Tuple.from(Arrays.asList(null, "hello")), new byte[]{0x05, 0x00, FF, 0x02, 'h', 'e', 'l', 'l', 'o', 0x00, 0x00},
+				Tuple.from(Tuple.from(null, "hell\0")), new byte[]{0x05, 0x00, FF, 0x02, 'h', 'e', 'l', 'l', 0x00, FF, 0x00, 0x00},
+				Tuple.from(Arrays.asList(null, "hell\0")), new byte[]{0x05, 0x00, FF, 0x02, 'h', 'e', 'l', 'l', 0x00, FF, 0x00, 0x00},
+				Tuple.from(Tuple.from((Object)null), "hello"), new byte[]{0x05, 0x00, FF, 0x00, 0x02, 'h', 'e', 'l', 'l', 'o', 0x00},
+				Tuple.from(Tuple.from((Object)null), "hello", new byte[]{0x01, 0x00}, new byte[0]), new byte[]{0x05, 0x00, FF, 0x00, 0x02, 'h', 'e', 'l', 'l', 'o', 0x00, 0x01, 0x01, 0x00, FF, 0x00, 0x01, 0x00},
+				Tuple.from(new UUID(0xba5eba11, 0x5ca1ab1e)), new byte[]{0x30, FF, FF, FF, FF, (byte)0xba, 0x5e, (byte)0xba, 0x11, 0x00, 0x00, 0x00, 0x00, 0x5c, (byte)0xa1, (byte)0xab, 0x1e},
+				Tuple.from(false), new byte[]{0x26},
+				Tuple.from(true), new byte[]{0x27},
+				Tuple.from((short)0x3019), new byte[]{0x16, 0x30, 0x19},
+				Tuple.from((byte)0x03), new byte[]{0x15, 0x03},
+				Tuple.from(Versionstamp.complete(new byte[]{(byte)0xaa, (byte)0xbb, (byte)0xcc, (byte)0xdd, (byte)0xee, FF, 0x00, 0x01, 0x02, 0x03})), new byte[]{0x33, (byte)0xaa, (byte)0xbb, (byte)0xcc, (byte)0xdd, (byte)0xee, FF, 0x00, 0x01, 0x02, 0x03, 0x00, 0x00},
+				Tuple.from(Versionstamp.complete(new byte[]{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a}, 657)), new byte[]{0x33, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x02, (byte)0x91}
 		);
+		Tuple bigTuple = new Tuple();
+		List<byte[]> serializedForms = new ArrayList<>();
+		for(TupleSerialization serialization : serializations) {
+			bigTuple = bigTuple.addAll(serialization.tuple);
+			serializedForms.add(serialization.serialization);
+		}
+		serializations.add(new TupleSerialization(bigTuple, ByteArrayUtil.join(null, serializedForms)));
 
 		for(TupleSerialization serialization : serializations) {
 			System.out.println("Packing " + serialization.tuple + " (expecting: " + ByteArrayUtil.printable(serialization.serialization) + ")");
+			if(serialization.tuple.getPackedSize() != serialization.serialization.length) {
+				throw new RuntimeException("Tuple " + serialization.tuple + " packed size " + serialization.tuple.getPackedSize() + " does not match expected packed size " + serialization.serialization.length);
+			}
 			if(!Arrays.equals(serialization.tuple.pack(), serialization.serialization)) {
 				throw new RuntimeException("Tuple " + serialization.tuple + " has serialization " + ByteArrayUtil.printable(serialization.tuple.pack()) +
 						" which does not match expected serialization " + ByteArrayUtil.printable(serialization.serialization));
 			}
-			if(!Objects.equals(serialization.tuple, Tuple.fromBytes(serialization.serialization))) {
+			if(!Objects.equals(serialization.tuple, Tuple.fromItems(Tuple.fromBytes(serialization.serialization).getItems()))) {
 				throw new RuntimeException("Tuple " + serialization.tuple + " does not match deserialization " + Tuple.fromBytes(serialization.serialization) +
 						" which comes from serialization " + ByteArrayUtil.printable(serialization.serialization));
 			}
@@ -176,6 +207,16 @@ public class TupleTest {
 				Tuple.from((Object)new byte[]{0x00, FF}),
 				Tuple.from((Object)new byte[]{0x7f}),
 				Tuple.from((Object)new byte[]{(byte)0x80}),
+				Tuple.from(null, new byte[0]),
+				Tuple.from(null, new byte[]{0x00}),
+				Tuple.from(null, new byte[]{0x00, FF}),
+				Tuple.from(null, new byte[]{0x7f}),
+				Tuple.from(null, new byte[]{(byte)0x80}),
+				Tuple.from(Tuple.from(null, new byte[0])),
+				Tuple.from(Tuple.from(null, new byte[]{0x00})),
+				Tuple.from(Tuple.from(null, new byte[]{0x00, FF})),
+				Tuple.from(Tuple.from(null, new byte[]{0x7f})),
+				Tuple.from(Tuple.from(null, new byte[]{(byte)0x80})),
 				Tuple.from("a"),
 				Tuple.from("\u03bc\u03ac\u03b8\u03b7\u03bc\u03b1"),
 				Tuple.from("\u03bc\u03b1\u0301\u03b8\u03b7\u03bc\u03b1"),
@@ -195,7 +236,18 @@ public class TupleTest {
 				Tuple.from(new UUID(-1, 0)),
 				Tuple.from(new UUID(-1, -1)),
 				Tuple.from(new UUID(1, -1)),
-				Tuple.from(new UUID(1, 1))
+				Tuple.from(new UUID(1, 1)),
+				Tuple.from(false),
+				Tuple.from(true),
+				Tuple.from(Arrays.asList(0, 1, 2)),
+				Tuple.from(Arrays.asList(0, 1), "hello"),
+				Tuple.from(Arrays.asList(0, 1), "help"),
+				Tuple.from(Versionstamp.complete(new byte[]{0x0a, (byte)0xbb, (byte)0xcc, (byte)0xdd, (byte)0xee, FF, 0x00, 0x01, 0x02, 0x03})),
+				Tuple.from(Versionstamp.complete(new byte[]{(byte)0xaa, (byte)0xbb, (byte)0xcc, (byte)0xdd, (byte)0xee, FF, 0x00, 0x01, 0x02, 0x03})),
+				Tuple.from(Versionstamp.complete(new byte[]{(byte)0xaa, (byte)0xbb, (byte)0xcc, (byte)0xdd, (byte)0xee, FF, 0x00, 0x01, 0x02, 0x03}, 1)),
+				Tuple.from(Versionstamp.complete(new byte[]{(byte)0xaa, (byte)0xbb, (byte)0xcc, (byte)0xdd, (byte)0xee, FF, 0x00, 0x01, 0x02, 0x03}, 0xa101)),
+				Tuple.from(Versionstamp.complete(new byte[]{(byte)0xaa, (byte)0xbb, (byte)0xcc, (byte)0xdd, (byte)0xee, FF, 0x00, 0x01, 0x02, 0x03}, 65535))
+
 		);
 
 		for(Tuple t1 : tuples) {
@@ -209,6 +261,47 @@ public class TupleTest {
 				if(Integer.signum(semanticComparison) != Integer.signum(byteComparison)) {
 					throw new RuntimeException("Tuple t1 and t2 comparison mismatched: semantic = " + semanticComparison + " while byte order = " + byteComparison);
 				}
+				int implicitByteComparison = t1.compareTo(t2);
+				if(Integer.signum(semanticComparison) != Integer.signum(implicitByteComparison)) {
+					throw new RuntimeException("Tuple t1 and t2 comparison mismatched: semantic = " + semanticComparison + " while implicit byte order = " + implicitByteComparison);
+				}
+			}
+		}
+	}
+
+	// These should be in ArrayUtilTest, but those can't be run at the moment, so here they go.
+	private static void replaceTests() {
+		List<byte[]> arrays = Arrays.asList(
+				new byte[]{0x01, 0x02, 0x01, 0x02}, new byte[]{0x01, 0x02}, new byte[]{0x03, 0x04}, new byte[]{0x03, 0x04, 0x03, 0x04},
+				new byte[]{0x01, 0x02, 0x01, 0x02}, new byte[]{0x01, 0x02}, new byte[]{0x03}, new byte[]{0x03, 0x03},
+				new byte[]{0x01, 0x02, 0x01, 0x02}, new byte[]{0x01, 0x02}, new byte[]{0x03, 0x04, 0x05}, new byte[]{0x03, 0x04, 0x05, 0x03, 0x04, 0x05},
+				new byte[]{0x00, 0x01, 0x02, 0x00, 0x01, 0x02, 0x00}, new byte[]{0x01, 0x02}, new byte[]{0x03, 0x04, 0x05}, new byte[]{0x00, 0x03, 0x04, 0x05, 0x00, 0x03, 0x04, 0x05, 0x00},
+				new byte[]{0x01, 0x01, 0x01, 0x01}, new byte[]{0x01, 0x02}, new byte[]{0x03, 0x04}, new byte[]{0x01, 0x01, 0x01, 0x01},
+				new byte[]{0x01, 0x01, 0x01, 0x01}, new byte[]{0x01, 0x02}, new byte[]{0x03}, new byte[]{0x01, 0x01, 0x01, 0x01},
+				new byte[]{0x01, 0x01, 0x01, 0x01}, new byte[]{0x01, 0x02}, new byte[]{0x03, 0x04, 0x05}, new byte[]{0x01, 0x01, 0x01, 0x01},
+				new byte[]{0x01, 0x01, 0x01, 0x01, 0x01}, new byte[]{0x01, 0x01}, new byte[]{0x03, 0x04, 0x05}, new byte[]{0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x01},
+				new byte[]{0x01, 0x01, 0x01, 0x01, 0x01}, new byte[]{0x01, 0x01}, new byte[]{0x03, 0x04}, new byte[]{0x03, 0x04, 0x03, 0x04, 0x01},
+				new byte[]{0x01, 0x01, 0x01, 0x01, 0x01}, new byte[]{0x01, 0x01}, new byte[]{0x03}, new byte[]{0x03, 0x03, 0x01},
+				new byte[]{0x01, 0x02, 0x01, 0x02}, new byte[]{0x01, 0x02}, null, new byte[0],
+				new byte[]{0x01, 0x02, 0x01, 0x02}, new byte[]{0x01, 0x02}, new byte[0], new byte[0],
+				new byte[]{0x01, 0x02, 0x01, 0x02}, null, new byte[]{0x04}, new byte[]{0x01, 0x02, 0x01, 0x02},
+				new byte[]{0x01, 0x02, 0x01, 0x02}, new byte[0], new byte[]{0x04}, new byte[]{0x01, 0x02, 0x01, 0x02},
+				null, new byte[]{0x01, 0x02}, new byte[]{0x04}, null
+		);
+		for(int i = 0; i < arrays.size(); i += 4) {
+			byte[] src = arrays.get(i);
+			byte[] pattern = arrays.get(i + 1);
+			byte[] replacement = arrays.get(i + 2);
+			byte[] expectedResults = arrays.get(i + 3);
+			byte[] results = ByteArrayUtil.replace(src, pattern, replacement);
+			if(!Arrays.equals(results, expectedResults)) {
+				throw new RuntimeException("results " + ByteArrayUtil.printable(results) + " did not match expected results " +
+						ByteArrayUtil.printable(expectedResults) + " when replacing " + ByteArrayUtil.printable(pattern) +
+						" with " + ByteArrayUtil.printable(replacement) + " in " + ByteArrayUtil.printable(src));
+			}
+			if(src != null && src == results) {
+				throw new RuntimeException("src and results array are pointer-equal when replacing " + ByteArrayUtil.printable(pattern) +
+						" with " + ByteArrayUtil.printable(replacement) + " in " + ByteArrayUtil.printable(src));
 			}
 		}
 	}

From 39fd30330f95454ee46486a9fe7dd54d5ade26ac Mon Sep 17 00:00:00 2001
From: Alec Grieser <agrieser@apple.com>
Date: Wed, 27 Feb 2019 20:25:30 -0800
Subject: [PATCH 22/47] memoize incomplete versionstamp information in Tuples ;
 add more tests

---
 .../apple/foundationdb/subspace/Subspace.java |   7 +-
 .../tuple/IterableComparator.java             |   2 +-
 .../com/apple/foundationdb/tuple/Tuple.java   | 291 ++++----
 .../apple/foundationdb/tuple/TupleUtil.java   | 117 ++--
 .../foundationdb/tuple/Versionstamp.java      |   4 +-
 .../apple/foundationdb/test/TupleTest.java    | 620 +++++++++++++++++-
 6 files changed, 862 insertions(+), 179 deletions(-)

diff --git a/bindings/java/src/main/com/apple/foundationdb/subspace/Subspace.java b/bindings/java/src/main/com/apple/foundationdb/subspace/Subspace.java
index 59c3f94329..4b811f5149 100644
--- a/bindings/java/src/main/com/apple/foundationdb/subspace/Subspace.java
+++ b/bindings/java/src/main/com/apple/foundationdb/subspace/Subspace.java
@@ -46,8 +46,8 @@ import com.apple.foundationdb.tuple.Versionstamp;
  * </p>
  */
 public class Subspace {
-	static final Tuple EMPTY_TUPLE = Tuple.from();
-	static final byte[] EMPTY_BYTES = new byte[0];
+	private static final Tuple EMPTY_TUPLE = Tuple.from();
+	private static final byte[] EMPTY_BYTES = new byte[0];
 
 	private final byte[] rawPrefix;
 
@@ -248,8 +248,7 @@ public class Subspace {
 	 * @return the {@link Range} of keyspace corresponding to {@code tuple}
 	 */
 	public Range range(Tuple tuple) {
-		Range p = tuple.range();
-		return new Range(join(rawPrefix, p.begin), join(rawPrefix, p.end));
+		return tuple.range(rawPrefix);
 	}
 
 	/**
diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/IterableComparator.java b/bindings/java/src/main/com/apple/foundationdb/tuple/IterableComparator.java
index 1587b3fd6e..71aa23e9b1 100644
--- a/bindings/java/src/main/com/apple/foundationdb/tuple/IterableComparator.java
+++ b/bindings/java/src/main/com/apple/foundationdb/tuple/IterableComparator.java
@@ -34,7 +34,7 @@ import java.util.Iterator;
  *    tuple1.compareTo(tuple2)
  *      == new IterableComparator().compare(tuple1, tuple2)
  *      == new IterableComparator().compare(tuple1.getItems(), tuple2.getItems()),
- *      == ByteArrayUtil.compareUnsigned(tuple1.packInternal(), tuple2.packInternal())}
+ *      == ByteArrayUtil.compareUnsigned(tuple1.pack(), tuple2.pack())}
  * </pre>
  *
  * <p>
diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java b/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java
index 5fa9726c14..ea47870037 100644
--- a/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java
+++ b/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java
@@ -21,11 +21,11 @@
 package com.apple.foundationdb.tuple;
 
 import java.math.BigInteger;
+import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.Iterator;
-import java.util.LinkedList;
 import java.util.List;
 import java.util.UUID;
 import java.util.stream.Collectors;
@@ -69,19 +69,39 @@ import com.apple.foundationdb.Range;
  */
 public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	private static final IterableComparator comparator = new IterableComparator();
+	private static final byte[] EMPTY_BYTES = new byte[0];
 
-	private List<Object> elements;
-	private int memoizedHash = 0;
+	List<Object> elements;
 	private byte[] packed = null;
+	private int memoizedHash = 0;
 	private int memoizedPackedSize = -1;
+	private final boolean incompleteVersionstamp;
 
-	private Tuple(List<? extends Object> elements, Object newItem) {
-		this(elements);
+	private Tuple(Tuple original, Object newItem, boolean itemHasIncompleteVersionstamp) {
+		this.elements = new ArrayList<>(original.elements.size() + 1);
+		this.elements.addAll(original.elements);
 		this.elements.add(newItem);
+		incompleteVersionstamp = original.incompleteVersionstamp || itemHasIncompleteVersionstamp;
 	}
 
-	private Tuple(List<? extends Object> elements) {
-		this.elements = new ArrayList<>(elements);
+	private Tuple(List<Object> elements) {
+		this.elements = elements;
+		incompleteVersionstamp = TupleUtil.hasIncompleteVersionstamp(elements.stream());
+	}
+
+	/**
+	 * Construct a new empty {@code Tuple}. After creation, items can be added
+	 *  with calls to the variations of {@code add()}.
+	 *
+	 * @see #from(Object...)
+	 * @see #fromBytes(byte[])
+	 * @see #fromItems(Iterable)
+	 */
+	public Tuple() {
+		elements = Collections.emptyList();
+		packed = EMPTY_BYTES;
+		memoizedPackedSize = 0;
+		incompleteVersionstamp = false;
 	}
 
 	/**
@@ -107,7 +127,10 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 				!(o instanceof Versionstamp)) {
 			throw new IllegalArgumentException("Parameter type (" + o.getClass().getName() + ") not recognized");
 		}
-		return new Tuple(this.elements, o);
+		return new Tuple(this, o,
+				(o instanceof Versionstamp && !((Versionstamp)o).isComplete()) ||
+						(o instanceof List<?> && TupleUtil.hasIncompleteVersionstamp(((List)o).stream())) ||
+						(o instanceof Tuple && ((Tuple) o).hasIncompleteVersionstamp()));
 	}
 
 	/**
@@ -118,7 +141,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 * @return a newly created {@code Tuple}
 	 */
 	public Tuple add(String s) {
-		return new Tuple(this.elements, s);
+		return new Tuple(this, s, false);
 	}
 
 	/**
@@ -129,7 +152,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 * @return a newly created {@code Tuple}
 	 */
 	public Tuple add(long l) {
-		return new Tuple(this.elements, l);
+		return new Tuple(this, l, false);
 	}
 
 	/**
@@ -140,7 +163,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 * @return a newly created {@code Tuple}
 	 */
 	public Tuple add(byte[] b) {
-		return new Tuple(this.elements, b);
+		return new Tuple(this, b, false);
 	}
 
 	/**
@@ -151,7 +174,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 * @return a newly created {@code Tuple}
 	 */
 	public Tuple add(boolean b) {
-		return new Tuple(this.elements, b);
+		return new Tuple(this, b, false);
 	}
 
 	/**
@@ -162,7 +185,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 * @return a newly created {@code Tuple}
 	 */
 	public Tuple add(UUID uuid) {
-		return new Tuple(this.elements, uuid);
+		return new Tuple(this, uuid, false);
 	}
 
 	/**
@@ -178,7 +201,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 		if(bi == null) {
 			throw new NullPointerException("Number types in Tuple cannot be null");
 		}
-		return new Tuple(this.elements, bi);
+		return new Tuple(this, bi, false);
 	}
 
 	/**
@@ -189,7 +212,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 * @return a newly created {@code Tuple}
 	 */
 	public Tuple add(float f) {
-		return new Tuple(this.elements, f);
+		return new Tuple(this, f, false);
 	}
 
 	/**
@@ -200,7 +223,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 * @return a newly created {@code Tuple}
 	 */
 	public Tuple add(double d) {
-		return new Tuple(this.elements, d);
+		return new Tuple(this, d, false);
 	}
 
 	/**
@@ -212,11 +235,11 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 * @return a newly created {@code Tuple}
 	 */
 	public Tuple add(Versionstamp v) {
-		return new Tuple(this.elements, v);
+		return new Tuple(this, v, !v.isComplete());
 	}
 
 	/**
-	 * Creates a copy of this {@code Tuple} with an {@link List} appended as the last element.
+	 * Creates a copy of this {@code Tuple} with a {@link List} appended as the last element.
 	 *  This does not add the elements individually (for that, use {@link Tuple#addAll(List) Tuple.addAll}).
 	 *  This adds the list as a single element nested within the outer {@code Tuple}.
 	 *
@@ -224,8 +247,8 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 *
 	 * @return a newly created {@code Tuple}
 	 */
-	public Tuple add(List<? extends Object> l) {
-		return new Tuple(this.elements, l);
+	public Tuple add(List<?> l) {
+		return new Tuple(this, l, TupleUtil.hasIncompleteVersionstamp(l.stream()));
 	}
 
 	/**
@@ -238,7 +261,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 * @return a newly created {@code Tuple}
 	 */
 	public Tuple add(Tuple t) {
-		return new Tuple(this.elements, t);
+		return new Tuple(this, t, t.hasIncompleteVersionstamp());
 	}
 
 	/**
@@ -251,7 +274,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 * @return a newly created {@code Tuple}
 	 */
 	public Tuple add(byte[] b, int offset, int length) {
-		return new Tuple(this.elements, Arrays.copyOfRange(b, offset, offset + length));
+		return new Tuple(this, Arrays.copyOfRange(b, offset, offset + length), false);
 	}
 
 	/**
@@ -262,7 +285,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 *
 	 * @return a newly created {@code Tuple}
 	 */
-	public Tuple addAll(List<? extends Object> o) {
+	public Tuple addAll(List<?> o) {
 		List<Object> merged = new ArrayList<>(o.size() + this.elements.size());
 		merged.addAll(this.elements);
 		merged.addAll(o);
@@ -279,8 +302,15 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	public Tuple addAll(Tuple other) {
 		List<Object> merged = new ArrayList<>(this.size() + other.size());
 		merged.addAll(this.elements);
-		merged.addAll(other.peekItems());
-		return new Tuple(merged);
+		merged.addAll(other.elements);
+		Tuple t = new Tuple(merged);
+		if(!t.hasIncompleteVersionstamp() && packed != null && other.packed != null) {
+			t.packed = ByteArrayUtil.join(packed, other.packed);
+		}
+		if(memoizedPackedSize >= 0 && other.memoizedPackedSize >= 0) {
+			t.memoizedPackedSize = memoizedPackedSize + other.memoizedPackedSize;
+		}
+		return t;
 	}
 
 	/**
@@ -306,29 +336,44 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	}
 
 	byte[] packInternal(byte[] prefix, boolean copy) {
-		boolean hasPrefix = prefix != null && prefix.length > 1;
-		if(packed == null) {
-			byte[] result = TupleUtil.pack(elements, prefix, getPackedSize());
-			if(hasPrefix) {
-				packed = Arrays.copyOfRange(result, prefix.length, result.length);
-				memoizedPackedSize = packed.length;
-				return result;
-			}
-			else {
-				packed = result;
-				memoizedPackedSize = packed.length;
-			}
+		if(hasIncompleteVersionstamp()) {
+			throw new IllegalArgumentException("Incomplete Versionstamp included in vanilla tuple pack");
 		}
+		if(packed == null) {
+			packed = TupleUtil.pack(elements, getPackedSize());
+		}
+		boolean hasPrefix = prefix != null && prefix.length > 0;
 		if(hasPrefix) {
 			return ByteArrayUtil.join(prefix, packed);
 		}
+		else if(copy) {
+			return Arrays.copyOf(packed, packed.length);
+		}
 		else {
-			if(copy) {
-				return Arrays.copyOf(packed, packed.length);
-			}
-			else {
-				return packed;
-			}
+			return packed;
+		}
+	}
+
+	/**
+	 * Pack an encoded representation of this {@code Tuple} onto the end of the given {@link ByteBuffer}.
+	 *  It is up to the caller to ensure that there is enough space allocated within the buffer
+	 *  to avoid {@link java.nio.BufferOverflowException}s. The client may call {@link #getPackedSize()}
+	 *  to determine how large this {@code Tuple} will be once packed in order to allocate sufficient memory.
+	 * <br>
+	 * <br>
+	 * This method will throw an error if there are any incomplete {@link Versionstamp}s in this {@code Tuple}.
+	 *
+	 * @param dest the destination {@link ByteBuffer} for the encoded {@code Tuple}
+	 */
+	public void packInto(ByteBuffer dest) {
+		if(hasIncompleteVersionstamp()) {
+			throw new IllegalArgumentException("Incomplete Versionstamp included in vanilla tuple pack");
+		}
+		if(packed == null) {
+			TupleUtil.pack(dest, elements);
+		}
+		else {
+			dest.put(packed);
 		}
 	}
 
@@ -363,37 +408,27 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 * @throws IllegalArgumentException if there is not exactly one incomplete {@link Versionstamp} included in this {@code Tuple}
 	 */
 	public byte[] packWithVersionstamp(byte[] prefix) {
-		return TupleUtil.packWithVersionstamp(elements, prefix, getPackedSize());
+		return packWithVersionstampInternal(prefix, true);
 	}
 
 	byte[] packWithVersionstampInternal(byte[] prefix, boolean copy) {
-		boolean hasPrefix = prefix != null && prefix.length > 0;
-		if(packed == null) {
-			byte[] result = TupleUtil.packWithVersionstamp(elements, prefix, getPackedSize());
-			if(hasPrefix) {
-				byte[] withoutPrefix = Arrays.copyOfRange(result, prefix.length, result.length);
-				TupleUtil.adjustVersionPosition(packed, -1 * prefix.length);
-				packed = withoutPrefix;
-				memoizedPackedSize = packed.length;
-				return result;
-			}
-			else {
-				packed = result;
-				memoizedPackedSize = packed.length;
-			}
+		if(!hasIncompleteVersionstamp()) {
+			throw new IllegalArgumentException("No incomplete Versionstamp included in tuple pack with versionstamp");
 		}
+		if(packed == null) {
+			packed = TupleUtil.packWithVersionstamp(elements, getPackedSize());
+		}
+		boolean hasPrefix = prefix != null && prefix.length > 0;
 		if(hasPrefix) {
 			byte[] withPrefix = ByteArrayUtil.join(prefix, packed);
 			TupleUtil.adjustVersionPosition(withPrefix, prefix.length);
 			return withPrefix;
 		}
+		else if(copy) {
+			return Arrays.copyOf(packed, packed.length);
+		}
 		else {
-			if(copy) {
-				return Arrays.copyOf(packed, packed.length);
-			}
-			else {
-				return packed;
-			}
+			return packed;
 		}
 	}
 
@@ -429,16 +464,6 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 		return elements.stream();
 	}
 
-	/**
-	 * Returns the internal elements that make up this tuple. For internal use only, as
-	 *  modifications to the result will mean that this Tuple is modified.
-	 *
-	 * @return the elements of this Tuple, without copying
-	 */
-	private List<Object> peekItems() {
-		return this.elements;
-	}
-
 	/**
 	 * Gets an {@code Iterator} over the {@code Objects} in this {@code Tuple}. This {@code Iterator} is
 	 *  unmodifiable and will throw an exception if {@link Iterator#remove() remove()} is called.
@@ -450,18 +475,6 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 		return Collections.unmodifiableList(this.elements).iterator();
 	}
 
-	/**
-	 * Construct a new empty {@code Tuple}. After creation, items can be added
-	 *  with calls the the variations of {@code add()}.
-	 *
-	 * @see #from(Object...)
-	 * @see #fromBytes(byte[])
-	 * @see #fromItems(Iterable)
-	 */
-	public Tuple() {
-		this.elements = new LinkedList<>();
-	}
-
 	/**
 	 * Construct a new {@code Tuple} with elements decoded from a supplied {@code byte} array.
 	 *  The passed byte array must not be {@code null}.
@@ -485,9 +498,15 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 * @return a new {@code Tuple} constructed by deserializing the specified slice of the provided {@code byte} array
 	 */
 	public static Tuple fromBytes(byte[] bytes, int offset, int length) {
-		Tuple t = new Tuple();
-		t.elements = TupleUtil.unpack(bytes, offset, length);
-		t.packed = Arrays.copyOfRange(bytes, offset, offset + length);
+		if(offset < 0 || offset > bytes.length) {
+			throw new IllegalArgumentException("Invalid offset for Tuple deserialization");
+		}
+		if(length < 0 || offset + length > bytes.length) {
+			throw new IllegalArgumentException("Invalid length for Tuple deserialization");
+		}
+		byte[] packed = Arrays.copyOfRange(bytes, offset, offset + length);
+		Tuple t = new Tuple(TupleUtil.unpack(packed));
+		t.packed = packed;
 		t.memoizedPackedSize = length;
 		return t;
 	}
@@ -732,7 +751,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 			return (Tuple)o;
 		}
 		else if(o instanceof List<?>) {
-			return Tuple.fromItems((List<?>)o);
+			return Tuple.fromList((List<?>)o);
 		}
 		else {
 			throw new ClassCastException("Cannot convert item of type " + o.getClass() + " to tuple");
@@ -761,11 +780,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 		if(elements.isEmpty())
 			throw new IllegalStateException("Tuple contains no elements");
 
-		List<Object> items = new ArrayList<>(elements.size() - 1);
-		for(int i = 1; i < this.elements.size(); i++) {
-			items.add(this.elements.get(i));
-		}
-		return new Tuple(items);
+		return new Tuple(elements.subList(1, elements.size()));
 	}
 
 	/**
@@ -779,11 +794,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 		if(elements.isEmpty())
 			throw new IllegalStateException("Tuple contains no elements");
 
-		List<Object> items = new ArrayList<>(elements.size() - 1);
-		for(int i = 0; i < this.elements.size() - 1; i++) {
-			items.add(this.elements.get(i));
-		}
-		return new Tuple(items);
+		return new Tuple(elements.subList(0, elements.size() - 1));
 	}
 
 	/**
@@ -800,17 +811,39 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 * This function will throw an error if this {@code Tuple} contains an incomplete
 	 *  {@link Versionstamp}.
 	 *
-	 * @return the range of keys containing all {@code Tuple}s that have this {@code Tuple}
-	 *  as a prefix
+	 * @return the range of keys containing all possible keys that have this {@code Tuple}
+	 *  as a strict prefix
 	 */
 	public Range range() {
+		return range(null);
+	}
+
+	/**
+	 * Returns a range representing all keys that encode {@code Tuple}s strictly starting
+	 *  with the given prefix followed by this {@code Tuple}.
+	 * <br>
+	 * <br>
+	 * For example:
+	 * <pre>
+	 *   Tuple t = Tuple.from("a", "b");
+	 *   Range r = t.range(Tuple.from("c").pack());</pre>
+	 * {@code r} contains all tuples ("c", "a", "b", ...)
+	 * <br>
+	 * This function will throw an error if this {@code Tuple} contains an incomplete
+	 *  {@link Versionstamp}.
+	 *
+	 * @param prefix a byte prefix to precede all elements in the range
+	 *
+	 * @return the range of keys containing all possible keys that have {@code prefix}
+	 *   followed by this {@code Tuple} as a strict prefix
+	 */
+	public Range range(byte[] prefix) {
 		if(hasIncompleteVersionstamp()) {
 			throw new IllegalStateException("Tuple with incomplete versionstamp used for range");
 		}
-		byte[] p = packInternal(null, false);
-		//System.out.println("Packed tuple is: " + ByteArrayUtil.printable(p));
+		byte[] p = packInternal(prefix, false);
 		return new Range(ByteArrayUtil.join(p, new byte[] {0x0}),
-						 ByteArrayUtil.join(p, new byte[] {(byte)0xff}));
+				ByteArrayUtil.join(p, new byte[] {(byte)0xff}));
 	}
 
 	/**
@@ -823,7 +856,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 *  {@code Tuple}
 	 */
 	public boolean hasIncompleteVersionstamp() {
-		return TupleUtil.hasIncompleteVersionstamp(stream());
+		return incompleteVersionstamp;
 	}
 
 	/**
@@ -843,7 +876,21 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	}
 
 	int getPackedSize(boolean nested) {
-		return TupleUtil.getPackedSize(elements, nested);
+		if(memoizedPackedSize >= 0) {
+			if(!nested) {
+				return memoizedPackedSize;
+			}
+			int nullCount = 0;
+			for(Object elem : elements) {
+				if(elem == null) {
+					nullCount++;
+				}
+			}
+			return memoizedPackedSize + nullCount;
+		}
+		else {
+			return TupleUtil.getPackedSize(elements, nested);
+		}
 	}
 
 	/**
@@ -860,7 +907,9 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 */
 	@Override
 	public int compareTo(Tuple t) {
-		if(packed != null && t.packed != null) {
+		// If either tuple has an incomplete versionstamp, then there is a possibility that the byte order
+		// is not the semantic comparison order.
+		if(packed != null && t.packed != null && !hasIncompleteVersionstamp() && !t.hasIncompleteVersionstamp()) {
 			return ByteArrayUtil.compareUnsigned(packed, t.packed);
 		}
 		else {
@@ -959,12 +1008,15 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 *
 	 * @return a new {@code Tuple} with the given items as its elements
 	 */
-	public static Tuple fromItems(Iterable<? extends Object> items) {
-		Tuple t = new Tuple();
-		for(Object o : items) {
-			t = t.addObject(o);
+	public static Tuple fromItems(Iterable<?> items) {
+		if(items instanceof List<?>) {
+			return Tuple.fromList((List<?>)items);
 		}
-		return t;
+		List<Object> elements = new ArrayList<>();
+		for(Object o : items) {
+			elements.add(o);
+		}
+		return new Tuple(elements);
 	}
 
 	/**
@@ -977,8 +1029,9 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 *
 	 * @return a new {@code Tuple} with the given items as its elements
 	 */
-	public static Tuple fromList(List<? extends Object> items) {
-		return new Tuple(items);
+	public static Tuple fromList(List<?> items) {
+		List<Object> elements = new ArrayList<>(items);
+		return new Tuple(elements);
 	}
 
 	/**
@@ -992,10 +1045,8 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 *
 	 * @return a new {@code Tuple} with the given items as its elements
 	 */
-	public static Tuple fromStream(Stream<? extends Object> items) {
-		Tuple t = new Tuple();
-		t.elements = items.collect(Collectors.toList());
-		return t;
+	public static Tuple fromStream(Stream<?> items) {
+		return new Tuple(items.collect(Collectors.toList()));
 	}
 
 	/**
@@ -1009,7 +1060,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 * @return a new {@code Tuple} with the given items as its elements
 	 */
 	public static Tuple from(Object... items) {
-		return fromList(Arrays.asList(items));
+		return new Tuple(Arrays.asList(items));
 	}
 
 	static void main(String[] args) {
diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java
index fc1fbc7262..63a1944b5d 100644
--- a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java
+++ b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java
@@ -21,6 +21,7 @@
 package com.apple.foundationdb.tuple;
 
 import java.math.BigInteger;
+import java.nio.BufferOverflowException;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
 import java.nio.charset.Charset;
@@ -89,7 +90,7 @@ class TupleUtil {
 					x += 1;
 				}
 			}
-			throw new IllegalArgumentException("no terminator found for bytes starting at " + from);
+			throw new IllegalArgumentException("No terminator found for bytes starting at " + from);
 		}
 	}
 
@@ -135,6 +136,7 @@ class TupleUtil {
 			else {
 				ByteArrayUtil.replace(encoded, 0, encoded.length, NULL_ARR, NULL_ESCAPED_ARR, encodedBytes);
 			}
+			totalLength += encoded.length + nullCount;
 			return this;
 		}
 
@@ -157,6 +159,10 @@ class TupleUtil {
 		}
 	}
 
+	private static boolean useOldVersionOffsetFormat() {
+		return FDB.instance().getAPIVersion() < 520;
+	}
+
 	// These four functions are for adjusting the encoding of floating point numbers so
 	// that when their byte representation is written out in big-endian order, unsigned
 	// lexicographic byte comparison orders the values in the same way as the semantic
@@ -165,32 +171,32 @@ class TupleUtil {
 	// in the case that the number is positive. For these purposes, 0.0 is positive and -0.0
 	// is negative.
 
-	static int encodeFloatBits(float f) {
+	private static int encodeFloatBits(float f) {
 		int intBits = Float.floatToRawIntBits(f);
 		return (intBits < 0) ? (~intBits) : (intBits ^ Integer.MIN_VALUE);
 	}
 
-	static long encodeDoubleBits(double d) {
+	private static long encodeDoubleBits(double d) {
 		long longBits = Double.doubleToRawLongBits(d);
 		return (longBits < 0L) ? (~longBits) : (longBits ^ Long.MIN_VALUE);
 	}
 
-	static float decodeFloatBits(int i) {
+	private static float decodeFloatBits(int i) {
 		int origBits = (i >= 0) ? (~i) : (i ^ Integer.MIN_VALUE);
 		return Float.intBitsToFloat(origBits);
 	}
 
-	static double decodeDoubleBits(long l) {
+	private static double decodeDoubleBits(long l) {
 		long origBits = (l >= 0) ? (~l) : (l ^ Long.MIN_VALUE);
 		return Double.longBitsToDouble(origBits);
 	}
 
 	// Get the minimal number of bytes in the representation of a long.
-	static int minimalByteCount(long i) {
+	private static int minimalByteCount(long i) {
 		return (Long.SIZE + 7 - Long.numberOfLeadingZeros(i >= 0 ? i : -i)) / 8;
 	}
 
-	static int minimalByteCount(BigInteger i) {
+	private static int minimalByteCount(BigInteger i) {
 		int bitLength = (i.compareTo(BigInteger.ZERO) >= 0) ? i.bitLength() : i.negate().bitLength();
 		return (bitLength + 7) / 8;
 	}
@@ -221,7 +227,7 @@ class TupleUtil {
 	}
 
 	static void adjustVersionPosition(byte[] packed, int delta) {
-		if(FDB.instance().getAPIVersion() < 520) {
+		if(useOldVersionOffsetFormat()) {
 			adjustVersionPosition300(packed, delta);
 		}
 		else {
@@ -285,7 +291,7 @@ class TupleUtil {
 		else if(t instanceof List<?>)
 			encode(state, (List<?>)t);
 		else if(t instanceof Tuple)
-			encode(state, ((Tuple)t).getItems());
+			encode(state, (Tuple)t);
 		else
 			throw new IllegalArgumentException("Unsupported data type: " + t.getClass().getName());
 	}
@@ -409,6 +415,10 @@ class TupleUtil {
 		state.add(nil);
 	}
 
+	static void encode(EncodeState state, Tuple value) {
+		encode(state, value.elements);
+	}
+
 	static void decode(DecodeState state, byte[] rep, int pos, int last) {
 		//System.out.println("Decoding '" + ArrayUtils.printable(rep) + "' at " + pos);
 
@@ -491,8 +501,8 @@ class TupleUtil {
 			int n = positive ? code - INT_ZERO_CODE : INT_ZERO_CODE - code;
 			int end = start + n;
 
-			if(rep.length < last) {
-				throw new RuntimeException("Invalid tuple (possible truncation)");
+			if(last < end) {
+				throw new IllegalArgumentException("Invalid tuple (possible truncation)");
 			}
 
 			if(positive && (n < Long.BYTES || rep[start] > 0)) {
@@ -530,12 +540,16 @@ class TupleUtil {
 			}
 		}
 		else if(code == VERSIONSTAMP_CODE) {
+			if(start + Versionstamp.LENGTH > last) {
+				throw new IllegalArgumentException("Invalid tuple (possible truncation)");
+			}
 			Versionstamp val = Versionstamp.fromBytes(Arrays.copyOfRange(rep, start, start + Versionstamp.LENGTH));
 			state.add(val, start + Versionstamp.LENGTH);
 		}
 		else if(code == NESTED_CODE) {
 			DecodeState subResult = new DecodeState();
 			int endPos = start;
+			boolean foundEnd = false;
 			while(endPos < last) {
 				if(rep[endPos] == nil) {
 					if(endPos + 1 < last && rep[endPos+1] == (byte)0xff) {
@@ -543,6 +557,7 @@ class TupleUtil {
 						endPos += 2;
 					} else {
 						endPos += 1;
+						foundEnd = true;
 						break;
 					}
 				} else {
@@ -550,6 +565,9 @@ class TupleUtil {
 					endPos = subResult.end;
 				}
 			}
+			if(!foundEnd) {
+				throw new IllegalArgumentException("No terminator found for nested tuple starting at " + start);
+			}
 			state.add(subResult.values, endPos);
 		}
 		else {
@@ -558,6 +576,10 @@ class TupleUtil {
 	}
 
 	static int compareItems(Object item1, Object item2) {
+		if(item1 == item2) {
+			// If we have pointer equality, just return 0 immediately.
+			return 0;
+		}
 		int code1 = TupleUtil.getCodeFor(item1);
 		int code2 = TupleUtil.getCodeFor(item2);
 
@@ -603,14 +625,14 @@ class TupleUtil {
 			}
 		}
 		if(code1 == FLOAT_CODE) {
-			// This is done for the same reason that double comparison is done
-			// that way.
+			// This is done over vanilla float comparison basically to handle NaNs
+			// sorting correctly.
 			int fbits1 = encodeFloatBits((Float)item1);
 			int fbits2 = encodeFloatBits((Float)item2);
 			return Integer.compareUnsigned(fbits1, fbits2);
 		}
 		if(code1 == DOUBLE_CODE) {
-			// This is done over vanilla double comparison basically to handle NaN
+			// This is done over vanilla double comparison basically to handle NaNs
 			// sorting correctly.
 			long dbits1 = encodeDoubleBits((Double)item1);
 			long dbits2 = encodeDoubleBits((Double)item2);
@@ -637,58 +659,57 @@ class TupleUtil {
 		throw new IllegalArgumentException("Unknown tuple data type: " + item1.getClass());
 	}
 
-	static List<Object> unpack(byte[] bytes, int start, int length) {
-		DecodeState decodeState = new DecodeState();
-		int pos = start;
-		int end = start + length;
-		while(pos < end) {
-			decode(decodeState, bytes, pos, end);
-			pos = decodeState.end;
+	static List<Object> unpack(byte[] bytes) {
+		try {
+			DecodeState decodeState = new DecodeState();
+			int pos = 0;
+			int end = bytes.length;
+			while (pos < end) {
+				decode(decodeState, bytes, pos, end);
+				pos = decodeState.end;
+			}
+			return decodeState.values;
+		}
+		catch(IndexOutOfBoundsException | BufferOverflowException e) {
+			throw new IllegalArgumentException("Invalid tuple (possible truncation)", e);
 		}
-		return decodeState.values;
 	}
 
-	static void encodeAll(EncodeState state, List<Object> items, byte[] prefix) {
-		if(prefix != null) {
-			state.add(prefix);
-		}
+	static void encodeAll(EncodeState state, List<Object> items) {
 		for(Object t : items) {
 			encode(state, t);
 		}
-		//System.out.println("Joining whole tuple...");
 	}
 
-	static byte[] pack(List<Object> items, byte[] prefix, int expectedSize) {
-		ByteBuffer dest = ByteBuffer.allocate(expectedSize + (prefix != null ? prefix.length : 0));
+	static void pack(ByteBuffer dest, List<Object> items) {
+		ByteOrder origOrder = dest.order();
 		EncodeState state = new EncodeState(dest);
-		if(prefix != null) {
-			state.add(prefix);
-		}
-		encodeAll(state, items, prefix);
+		encodeAll(state, items);
+		dest.order(origOrder);
 		if(state.versionPos >= 0) {
-			throw new IllegalArgumentException("Incomplete Versionstamp included in vanilla tuple packInternal");
-		}
-		else {
-			return dest.array();
+			throw new IllegalArgumentException("Incomplete Versionstamp included in vanilla tuple pack");
 		}
 	}
 
-	static byte[] packWithVersionstamp(List<Object> items, byte[] prefix, int expectedSize) {
-		ByteBuffer dest = ByteBuffer.allocate(expectedSize + (prefix != null ? prefix.length : 0));
+	static byte[] pack(List<Object> items, int expectedSize) {
+		ByteBuffer dest = ByteBuffer.allocate(expectedSize);
+		pack(dest, items);
+		return dest.array();
+	}
+
+	static byte[] packWithVersionstamp(List<Object> items, int expectedSize) {
+		ByteBuffer dest = ByteBuffer.allocate(expectedSize);
 		EncodeState state = new EncodeState(dest);
-		if(prefix != null) {
-			state.add(prefix);
-		}
-		encodeAll(state, items, prefix);
+		encodeAll(state, items);
 		if(state.versionPos < 0) {
 			throw new IllegalArgumentException("No incomplete Versionstamp included in tuple packInternal with versionstamp");
 		}
 		else {
-			if(state.versionPos > 0xffff) {
+			if(useOldVersionOffsetFormat() && state.versionPos > 0xffff) {
 				throw new IllegalArgumentException("Tuple has incomplete version at position " + state.versionPos + " which is greater than the maximum " + 0xffff);
 			}
 			dest.order(ByteOrder.LITTLE_ENDIAN);
-			if (FDB.instance().getAPIVersion() < 520) {
+			if (useOldVersionOffsetFormat()) {
 				dest.putShort((short)state.versionPos);
 			} else {
 				dest.putInt(state.versionPos);
@@ -740,7 +761,7 @@ class TupleUtil {
 				packedSize += 1 + Versionstamp.LENGTH;
 				Versionstamp versionstamp = (Versionstamp)item;
 				if(!versionstamp.isComplete()) {
-					int suffixSize = FDB.instance().getAPIVersion() < 520 ? Short.BYTES : Integer.BYTES;
+					int suffixSize = useOldVersionOffsetFormat() ? Short.BYTES : Integer.BYTES;
 					packedSize += suffixSize;
 				}
 			}
@@ -776,7 +797,7 @@ class TupleUtil {
 
 	public static void main(String[] args) {
 		try {
-			byte[] bytes = pack(Collections.singletonList(4), null, 2);
+			byte[] bytes = pack(Collections.singletonList(4), 2);
 			DecodeState result = new DecodeState();
 			decode(result, bytes, 0, bytes.length);
 			int val = ((Number)result.values.get(0)).intValue();
@@ -788,7 +809,7 @@ class TupleUtil {
 		}
 
 		try {
-			byte[] bytes = pack(Collections.singletonList("\u021Aest \u0218tring"), null, 15);
+			byte[] bytes = pack(Collections.singletonList("\u021Aest \u0218tring"), 15);
 			DecodeState result = new DecodeState();
 			decode(result, bytes, 0, bytes.length);
 			String string = (String)result.values.get(0);
diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/Versionstamp.java b/bindings/java/src/main/com/apple/foundationdb/tuple/Versionstamp.java
index 85c6de37ae..07c3218eac 100644
--- a/bindings/java/src/main/com/apple/foundationdb/tuple/Versionstamp.java
+++ b/bindings/java/src/main/com/apple/foundationdb/tuple/Versionstamp.java
@@ -94,8 +94,8 @@ public class Versionstamp implements Comparable<Versionstamp> {
 	private static final byte[] UNSET_TRANSACTION_VERSION = {(byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff,
 	                                                         (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff};
 
-	private boolean complete;
-	private byte[] versionBytes;
+	private final boolean complete;
+	private final byte[] versionBytes;
 
 	/**
 	 * From a byte array, unpack the user version starting at the given position.
diff --git a/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java b/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java
index 2f0fd1c2c4..ac2b033748 100644
--- a/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java
+++ b/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java
@@ -21,13 +21,21 @@
 package com.apple.foundationdb.test;
 
 import java.math.BigInteger;
+import java.nio.BufferOverflowException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
 import java.util.Objects;
 import java.util.UUID;
+import java.util.stream.Stream;
 
+import com.apple.foundationdb.Database;
+import com.apple.foundationdb.FDB;
 import com.apple.foundationdb.TransactionContext;
+import com.apple.foundationdb.subspace.Subspace;
 import com.apple.foundationdb.tuple.ByteArrayUtil;
 import com.apple.foundationdb.tuple.Tuple;
 import com.apple.foundationdb.tuple.Versionstamp;
@@ -38,15 +46,19 @@ public class TupleTest {
 	public static void main(String[] args) throws InterruptedException {
 		final int reps = 1000;
 		try {
-			// FDB fdb = FDB.selectAPIVersion(610);
-			serializedForms();
+			FDB fdb = FDB.selectAPIVersion(610);
+			addMethods();
 			comparisons();
+			emptyTuple();
+			incompleteVersionstamps();
+			intoBuffer();
+			offsetsAndLengths();
+			malformedBytes();
 			replaceTests();
-			/*
+			serializedForms();
 			try(Database db = fdb.open()) {
 				runTests(reps, db);
 			}
-			*/
 		} catch(Throwable t) {
 			t.printStackTrace();
 		}
@@ -269,6 +281,606 @@ public class TupleTest {
 		}
 	}
 
+	private static void emptyTuple() {
+		Tuple t = new Tuple();
+		if(!t.isEmpty()) {
+			throw new RuntimeException("empty tuple is not empty");
+		}
+		if(t.getPackedSize() != 0) {
+			throw new RuntimeException("empty tuple packed size is not 0");
+		}
+		if(t.pack().length != 0) {
+			throw new RuntimeException("empty tuple is not packed to the empty byte string");
+		}
+	}
+
+	private static void addMethods() {
+		List<Tuple> baseTuples = Arrays.asList(
+				new Tuple(),
+				Tuple.from(),
+				Tuple.from((Object)null),
+				Tuple.from("prefix"),
+				Tuple.from("prefix", null),
+				Tuple.from(new UUID(100, 1000)),
+				Tuple.from(Versionstamp.incomplete(1)),
+				Tuple.from(Tuple.from(Versionstamp.incomplete(2))),
+				Tuple.from(Collections.singletonList(Versionstamp.incomplete(3)))
+		);
+		List<Object> toAdd = Arrays.asList(
+				null,
+				1066L,
+				BigInteger.valueOf(1066),
+				-3.14f,
+				2.71828,
+				new byte[]{0x01, 0x02, 0x03},
+				new byte[]{0x01, 0x00, 0x02, 0x00, 0x03},
+				"hello there",
+				"hell\0 there",
+				"\ud83d\udd25",
+				"\ufb14",
+				false,
+				true,
+				Float.NaN,
+				Float.intBitsToFloat(Integer.MAX_VALUE),
+				Double.NaN,
+				Double.longBitsToDouble(Long.MAX_VALUE),
+				Versionstamp.complete(new byte[]{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09}, 100),
+				Versionstamp.incomplete(4),
+				new UUID(-1, 1),
+				Tuple.from((Object)null),
+				Tuple.from("suffix", "tuple"),
+				Tuple.from("s\0ffix", "tuple"),
+				Arrays.asList("suffix", "tuple"),
+				Arrays.asList("suffix", null, "tuple"),
+				Tuple.from("suffix", null, "tuple"),
+				Tuple.from("suffix", Versionstamp.incomplete(4), "tuple"),
+				Arrays.asList("suffix", Arrays.asList("inner", Versionstamp.incomplete(5), "tuple"), "tuple")
+		);
+
+		for(Tuple baseTuple : baseTuples) {
+			for(Object newItem : toAdd) {
+				int baseSize = baseTuple.size();
+				Tuple freshTuple = Tuple.fromStream(Stream.concat(baseTuple.stream(), Stream.of(newItem)));
+				if(freshTuple.size() != baseSize + 1) {
+					throw new RuntimeException("freshTuple size was not one larger than base size");
+				}
+				Tuple withObjectAdded = baseTuple.addObject(newItem);
+				if(withObjectAdded.size() != baseSize + 1) {
+					throw new RuntimeException("withObjectAdded size was not one larger than the base size");
+				}
+				// Use the appropriate "add" overload.
+				Tuple withValueAdded;
+				if(newItem == null) {
+					withValueAdded = baseTuple.addObject(null);
+				}
+				else if(newItem instanceof byte[]) {
+					withValueAdded = baseTuple.add((byte[])newItem);
+				}
+				else if(newItem instanceof String) {
+					withValueAdded = baseTuple.add((String)newItem);
+				}
+				else if(newItem instanceof Long) {
+					withValueAdded = baseTuple.add((Long)newItem);
+				}
+				else if(newItem instanceof BigInteger) {
+					withValueAdded = baseTuple.add((BigInteger)newItem);
+				}
+				else if(newItem instanceof Float) {
+					withValueAdded = baseTuple.add((Float)newItem);
+				}
+				else if(newItem instanceof Double) {
+					withValueAdded = baseTuple.add((Double)newItem);
+				}
+				else if(newItem instanceof Boolean) {
+					withValueAdded = baseTuple.add((Boolean)newItem);
+				}
+				else if(newItem instanceof UUID) {
+					withValueAdded = baseTuple.add((UUID)newItem);
+				}
+				else if(newItem instanceof Versionstamp) {
+					withValueAdded = baseTuple.add((Versionstamp)newItem);
+				}
+				else if(newItem instanceof List<?>) {
+					withValueAdded = baseTuple.add((List<?>)newItem);
+				}
+				else if(newItem instanceof Tuple) {
+					withValueAdded = baseTuple.add((Tuple)newItem);
+				}
+				else {
+					throw new RuntimeException("unknown type for tuple serialization " + newItem.getClass());
+				}
+				// Use Tuple.addAll, which has optimizations if both tuples have been packed already
+				// Getting their hash codes memoizes the packed representation.
+				Tuple newItemTuple = Tuple.from(newItem);
+				baseTuple.hashCode();
+				newItemTuple.hashCode();
+				Tuple withTupleAddedAll = baseTuple.addAll(newItemTuple);
+				Tuple withListAddedAll = baseTuple.addAll(Collections.singletonList(newItem));
+				List<Tuple> allTuples = Arrays.asList(freshTuple, withObjectAdded, withValueAdded, withTupleAddedAll, withListAddedAll);
+
+				int basePlusNewSize = baseTuple.getPackedSize() + Tuple.from(newItem).getPackedSize();
+				int freshTuplePackedSize = freshTuple.getPackedSize();
+				int withObjectAddedPackedSize = withObjectAdded.getPackedSize();
+				int withValueAddedPackedSize = withValueAdded.getPackedSize();
+				int withTupleAddedAllPackedSize = withTupleAddedAll.getPackedSize();
+				int withListAddAllPackedSize = withListAddedAll.getPackedSize();
+				if(basePlusNewSize != freshTuplePackedSize || basePlusNewSize != withObjectAddedPackedSize ||
+						basePlusNewSize != withValueAddedPackedSize || basePlusNewSize != withTupleAddedAllPackedSize ||
+						basePlusNewSize != withListAddAllPackedSize) {
+					throw new RuntimeException("packed sizes not equivalent");
+				}
+				byte[] concatPacked;
+				byte[] prefixPacked;
+				byte[] freshPacked;
+				byte[] objectAddedPacked;
+				byte[] valueAddedPacked;
+				byte[] tupleAddedAllPacked;
+				byte[] listAddedAllPacked;
+				if(!baseTuple.hasIncompleteVersionstamp() && !Tuple.from(newItem).hasIncompleteVersionstamp()) {
+					concatPacked = ByteArrayUtil.join(baseTuple.pack(), Tuple.from(newItem).pack());
+					prefixPacked = Tuple.from(newItem).pack(baseTuple.pack());
+					freshPacked = freshTuple.pack();
+					objectAddedPacked = withObjectAdded.pack();
+					valueAddedPacked = withValueAdded.pack();
+					tupleAddedAllPacked = withTupleAddedAll.pack();
+					listAddedAllPacked = withListAddedAll.pack();
+
+					for(Tuple t : allTuples) {
+						try {
+							t.packWithVersionstamp();
+							throw new RuntimeException("able to pack tuple without incomplete versionstamp using packWithVersionstamp");
+						}
+						catch(IllegalArgumentException e) {
+							// eat
+						}
+					}
+				}
+				else if(!baseTuple.hasIncompleteVersionstamp() && Tuple.from(newItem).hasIncompleteVersionstamp()) {
+					concatPacked = newItemTuple.packWithVersionstamp(baseTuple.pack());
+					try {
+						prefixPacked = Tuple.from(newItem).packWithVersionstamp(baseTuple.pack());
+					}
+					catch(NullPointerException e) {
+						prefixPacked = Tuple.from(newItem).packWithVersionstamp(baseTuple.pack());
+					}
+					freshPacked = freshTuple.packWithVersionstamp();
+					objectAddedPacked = withObjectAdded.packWithVersionstamp();
+					valueAddedPacked = withValueAdded.packWithVersionstamp();
+					tupleAddedAllPacked = withTupleAddedAll.packWithVersionstamp();
+					listAddedAllPacked = withListAddedAll.packWithVersionstamp();
+
+					for(Tuple t : allTuples) {
+						try {
+							t.pack();
+							throw new RuntimeException("able to pack tuple with incomplete versionstamp");
+						}
+						catch(IllegalArgumentException e) {
+							// eat
+						}
+					}
+				}
+				else if(baseTuple.hasIncompleteVersionstamp() && !Tuple.from(newItem).hasIncompleteVersionstamp()) {
+					concatPacked = baseTuple.addAll(Tuple.from(newItem)).packWithVersionstamp();
+					prefixPacked = baseTuple.addObject(newItem).packWithVersionstamp();
+					freshPacked = freshTuple.packWithVersionstamp();
+					objectAddedPacked = withObjectAdded.packWithVersionstamp();
+					valueAddedPacked = withValueAdded.packWithVersionstamp();
+					tupleAddedAllPacked = withTupleAddedAll.packWithVersionstamp();
+					listAddedAllPacked = withListAddedAll.packWithVersionstamp();
+
+					for(Tuple t : allTuples) {
+						try {
+							t.pack();
+							throw new RuntimeException("able to pack tuple with incomplete versionstamp");
+						}
+						catch(IllegalArgumentException e) {
+							// eat
+						}
+					}
+				}
+				else {
+					for(Tuple t : allTuples) {
+						try {
+							t.pack();
+							throw new RuntimeException("able to pack tuple with two versionstamps using pack");
+						}
+						catch(IllegalArgumentException e) {
+							// eat
+						}
+						try {
+							t.packWithVersionstamp();
+							throw new RuntimeException("able to pack tuple with two versionstamps using packWithVersionstamp");
+						}
+						catch(IllegalArgumentException e) {
+							// eat
+						}
+						try {
+							t.hashCode();
+							throw new RuntimeException("able to get hash code of tuple with two versionstamps");
+						}
+						catch(IllegalArgumentException e) {
+							// eat
+						}
+					}
+					concatPacked = null;
+					prefixPacked = null;
+					freshPacked = null;
+					objectAddedPacked = null;
+					valueAddedPacked = null;
+					tupleAddedAllPacked = null;
+					listAddedAllPacked = null;
+				}
+				if(!Arrays.equals(concatPacked, freshPacked) ||
+						!Arrays.equals(freshPacked, prefixPacked) ||
+						!Arrays.equals(freshPacked, objectAddedPacked) ||
+						!Arrays.equals(freshPacked, valueAddedPacked) ||
+						!Arrays.equals(freshPacked, tupleAddedAllPacked) ||
+						!Arrays.equals(freshPacked, listAddedAllPacked)) {
+					throw new RuntimeException("packed values are not concatenation of original packings");
+				}
+				if(freshPacked != null && freshPacked.length != basePlusNewSize) {
+					throw new RuntimeException("packed length did not match expectation");
+				}
+				if(freshPacked != null) {
+					if(freshTuple.hashCode() != Arrays.hashCode(freshPacked)) {
+						throw new IllegalArgumentException("hash code does not match fresh packed");
+					}
+					for(Tuple t : allTuples) {
+						if(t.hashCode() != freshTuple.hashCode()) {
+							throw new IllegalArgumentException("hash code mismatch");
+						}
+						if(Tuple.fromItems(t.getItems()).hashCode() != freshTuple.hashCode()) {
+							throw new IllegalArgumentException("hash code mismatch after re-compute");
+						}
+					}
+				}
+			}
+		}
+	}
+
+	private static void incompleteVersionstamps() {
+		if(FDB.instance().getAPIVersion() < 520) {
+			throw new IllegalStateException("cannot run test with API version " + FDB.instance().getAPIVersion());
+		}
+		// This is a tricky case where there are two tuples with identical representations but different semantics.
+		byte[] arr = new byte[0x0100fe];
+		Arrays.fill(arr, (byte)0x7f); // The actual value doesn't matter, but it can't be zero.
+		Tuple t1 = Tuple.from(arr, Versionstamp.complete(new byte[]{FF, FF, FF, FF, FF, FF, FF, FF, FF, FF}), new byte[]{0x01, 0x01});
+		Tuple t2 = Tuple.from(arr, Versionstamp.incomplete());
+		if(t1.equals(t2)) {
+			throw new RuntimeException("tuples " + t1 + " and " + t2 + " compared equal");
+		}
+		byte[] bytes1 = t1.pack();
+		byte[] bytes2 = t2.packWithVersionstamp();
+		if(!Arrays.equals(bytes1, bytes2)) {
+			throw new RuntimeException("tuples " + t1 + " and " + t2 + " did not have matching representations");
+		}
+		if(t1.equals(t2)) {
+			throw new RuntimeException("tuples " + t1 + " and " + t2 + " compared equal with memoized packed representations");
+		}
+
+		// Make sure position information adjustment works.
+		Tuple t3 = Tuple.from(Versionstamp.incomplete(1));
+		if(t3.getPackedSize() != 1 + Versionstamp.LENGTH + Integer.BYTES) {
+			throw new RuntimeException("incomplete versionstamp has incorrect packed size " + t3.getPackedSize());
+		}
+		byte[] bytes3 = t3.packWithVersionstamp();
+		if(ByteBuffer.wrap(bytes3, bytes3.length - Integer.BYTES, Integer.BYTES).order(ByteOrder.LITTLE_ENDIAN).getInt() != 1) {
+			throw new RuntimeException("incomplete versionstamp has incorrect position");
+		}
+		if(!Tuple.fromBytes(bytes3, 0, bytes3.length - Integer.BYTES).equals(Tuple.from(Versionstamp.incomplete(1)))) {
+			throw new RuntimeException("unpacked bytes did not match");
+		}
+		Subspace subspace = new Subspace(Tuple.from("prefix"));
+		byte[] bytes4 = subspace.packWithVersionstamp(t3);
+		if(ByteBuffer.wrap(bytes4, bytes4.length - Integer.BYTES, Integer.BYTES).order(ByteOrder.LITTLE_ENDIAN).getInt() != 1 + subspace.getKey().length) {
+			throw new RuntimeException("incomplete versionstamp has incorrect position with prefix");
+		}
+		if(!Tuple.fromBytes(bytes4, 0, bytes4.length - Integer.BYTES).equals(Tuple.from("prefix", Versionstamp.incomplete(1)))) {
+			throw new RuntimeException("unpacked bytes with subspace did not match");
+		}
+		try {
+			// At this point, the representation is cached, so an easy bug would be to have it return the already serialized value
+			t3.pack();
+			throw new RuntimeException("was able to pack versionstamp with incomplete versionstamp");
+		} catch(IllegalArgumentException e) {
+			// eat
+		}
+
+		// Tuples with two incomplete versionstamps somewhere.
+		List<Tuple> twoIncompleteList = Arrays.asList(
+				Tuple.from(Versionstamp.incomplete(1), Versionstamp.incomplete(2)),
+				Tuple.from(Tuple.from(Versionstamp.incomplete(3)), Tuple.from(Versionstamp.incomplete(4))),
+				new Tuple().add(Versionstamp.incomplete()).add(Versionstamp.incomplete()),
+				new Tuple().add(Versionstamp.incomplete()).add(3L).add(Versionstamp.incomplete()),
+				Tuple.from(Tuple.from(Versionstamp.incomplete()), "dummy_string").add(Tuple.from(Versionstamp.incomplete())),
+				Tuple.from(Arrays.asList(Versionstamp.incomplete(), "dummy_string")).add(Tuple.from(Versionstamp.incomplete())),
+				Tuple.from(Tuple.from(Versionstamp.incomplete()), "dummy_string").add(Collections.singletonList(Versionstamp.incomplete()))
+		);
+		for(Tuple t : twoIncompleteList) {
+			if(!t.hasIncompleteVersionstamp()) {
+				throw new RuntimeException("tuple doesn't think it has incomplete versionstamp");
+			}
+			if(t.getPackedSize() < 2 * (1 + Versionstamp.LENGTH + Integer.BYTES)) {
+				throw new RuntimeException("tuple packed size " + t.getPackedSize() + " is smaller than expected");
+			}
+			try {
+				t.pack();
+				throw new RuntimeException("no error thrown when packing any incomplete versionstamps");
+			}
+			catch(IllegalArgumentException e) {
+				// eat
+			}
+			try {
+				t.packWithVersionstamp();
+				throw new RuntimeException("no error thrown when packing with versionstamp with two incompletes");
+			}
+			catch(IllegalArgumentException e) {
+				// eat
+			}
+		}
+	}
+
+	// Assumes API version < 520
+	private static void incompleteVersionstamps300() {
+		if(FDB.instance().getAPIVersion() >= 520) {
+			throw new IllegalStateException("cannot run test with API version " + FDB.instance().getAPIVersion());
+		}
+		Tuple t1 = Tuple.from(Versionstamp.complete(new byte[]{FF, FF, FF, FF, FF, FF, FF, FF, FF, FF}), new byte[]{});
+		Tuple t2 = Tuple.from(Versionstamp.incomplete());
+		if(t1.equals(t2)) {
+			throw new RuntimeException("tuples " + t1 + " and " + t2 + " compared equal");
+		}
+		byte[] bytes1 = t1.pack();
+		byte[] bytes2 = t2.packWithVersionstamp();
+		if(!Arrays.equals(bytes1, bytes2)) {
+			throw new RuntimeException("tuples " + t1 + " and " + t2 + " did not have matching representations");
+		}
+		if(t1.equals(t2)) {
+			throw new RuntimeException("tuples " + t1 + " and " + t2 + " compared equal with memoized packed representations");
+		}
+
+		// Make sure position information adjustment works.
+		Tuple t3 = Tuple.from(Versionstamp.incomplete(1));
+		if(t3.getPackedSize() != 1 + Versionstamp.LENGTH + Short.BYTES) {
+			throw new RuntimeException("incomplete versionstamp has incorrect packed size " + t3.getPackedSize());
+		}
+		byte[] bytes3 = t3.packWithVersionstamp();
+		if(ByteBuffer.wrap(bytes3, bytes3.length - Short.BYTES, Short.BYTES).order(ByteOrder.LITTLE_ENDIAN).getShort() != 1) {
+			throw new RuntimeException("incomplete versionstamp has incorrect position");
+		}
+		if(!Tuple.fromBytes(bytes3, 0, bytes3.length - Short.BYTES).equals(Tuple.from(Versionstamp.incomplete(1)))) {
+			throw new RuntimeException("unpacked bytes did not match");
+		}
+		Subspace subspace = new Subspace(Tuple.from("prefix"));
+		byte[] bytes4 = subspace.packWithVersionstamp(t3);
+		if(ByteBuffer.wrap(bytes4, bytes4.length - Short.BYTES, Short.BYTES).order(ByteOrder.LITTLE_ENDIAN).getShort() != 1 + subspace.getKey().length) {
+			throw new RuntimeException("incomplete versionstamp has incorrect position with prefix");
+		}
+		if(!Tuple.fromBytes(bytes4, 0, bytes4.length - Short.BYTES).equals(Tuple.from("prefix", Versionstamp.incomplete(1)))) {
+			throw new RuntimeException("unpacked bytes with subspace did not match");
+		}
+
+		// Make sure an offset > 0xFFFF throws an error.
+		Tuple t4 = Tuple.from(Versionstamp.incomplete(2));
+		byte[] bytes5 = t4.packWithVersionstamp(); // Get bytes memoized.
+		if(ByteBuffer.wrap(bytes5, bytes5.length - Short.BYTES, Short.BYTES).order(ByteOrder.LITTLE_ENDIAN).getShort() != 1) {
+			throw new RuntimeException("incomplete versionstamp has incorrect position with prefix");
+		}
+		byte[] bytes6 = t4.packWithVersionstamp(new byte[0xfffe]); // Offset is 0xffff
+		if(!Arrays.equals(Arrays.copyOfRange(bytes5, 0, 1 + Versionstamp.LENGTH), Arrays.copyOfRange(bytes6, 0xfffe, 0xffff + Versionstamp.LENGTH))) {
+			throw new RuntimeException("area before versionstamp offset did not match");
+		}
+		if((ByteBuffer.wrap(bytes6, bytes6.length - Short.BYTES, Short.BYTES).order(ByteOrder.LITTLE_ENDIAN).getShort() & 0xffff) != 0xffff) {
+			throw new RuntimeException("incomplete versionstamp has incorrect position with prefix");
+		}
+		try {
+			t4.packWithVersionstamp(new byte[0xffff]); // Offset is 0x10000
+			throw new RuntimeException("able to pack versionstamp with offset that is too large");
+		}
+		catch(IllegalArgumentException e) {
+			// eat
+		}
+		// Same as before, but packed representation is not memoized.
+		try {
+			Tuple.from(Versionstamp.incomplete(3)).packWithVersionstamp(new byte[0xffff]); // Offset is 0x10000
+			throw new RuntimeException("able to pack versionstamp with offset that is too large");
+		}
+		catch(IllegalArgumentException e) {
+			// eat
+		}
+	}
+
+	private static void malformedBytes() {
+		List<byte[]> malformedSequences = Arrays.asList(
+				new byte[]{0x01, (byte)0xde, (byte)0xad, (byte)0xc0, (byte)0xde}, // no termination character for byte array
+				new byte[]{0x01, (byte)0xde, (byte)0xad, 0x00, FF, (byte)0xc0, (byte)0xde}, // no termination character but null in middle
+				new byte[]{0x02, 'h', 'e', 'l', 'l', 'o'}, // no termination character for string
+				new byte[]{0x02, 'h', 'e', 'l', 0x00, FF, 'l', 'o'}, // no termination character but null in the middle
+				// Invalid UTF-8 decodes malformed as U+FFFD rather than throwing an error
+				// new byte[]{0x02, 'u', 't', 'f', 0x08, (byte)0x80, 0x00}, // invalid utf-8 code point start character
+				// new byte[]{0x02, 'u', 't', 'f', 0x08, (byte)0xc0, 0x01, 0x00}, // invalid utf-8 code point second character
+				new byte[]{0x05, 0x02, 'h', 'e', 'l', 'l', 'o', 0x00}, // no termination character for nested tuple
+				new byte[]{0x05, 0x02, 'h', 'e', 'l', 'l', 'o', 0x00, 0x00, FF, 0x02, 't', 'h', 'e', 'r', 'e', 0x00}, // no termination character for nested tuple but null in the middle
+				new byte[]{0x16, 0x01}, // integer truncation
+				new byte[]{0x12, 0x01}, // integer truncation
+				new byte[]{0x1d, 0x09, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, // integer truncation
+				new byte[]{0x0b, 0x09 ^ FF, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, // integer truncation
+				new byte[]{0x20, 0x01, 0x02, 0x03}, // float truncation
+				new byte[]{0x21, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07}, // double truncation
+				new byte[]{0x30, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e}, // UUID truncation
+				new byte[]{0x33, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b}, // versionstamp truncation
+				new byte[]{FF} // unknown start code
+		);
+		for(byte[] sequence : malformedSequences) {
+			try {
+				Tuple t = Tuple.fromBytes(sequence);
+				throw new RuntimeException("Able to unpack " + ByteArrayUtil.printable(sequence) + " into " + t);
+			}
+			catch(IllegalArgumentException e) {
+				System.out.println("Error for " + ByteArrayUtil.printable(sequence) + ": " + e.getMessage());
+			}
+		}
+
+		// Perfectly good byte sequences, but using the offset and length to remove terminal bytes
+		List<byte[]> wellFormedSequences = Arrays.asList(
+				Tuple.from((Object)new byte[]{0x01, 0x02}).pack(),
+				Tuple.from("hello").pack(),
+				Tuple.from("hell\0").pack(),
+				Tuple.from(1066L).pack(),
+				Tuple.from(-1066L).pack(),
+				Tuple.from(BigInteger.ONE.shiftLeft(Long.SIZE + 1)).pack(),
+				Tuple.from(BigInteger.ONE.shiftLeft(Long.SIZE + 1).negate()).pack(),
+				Tuple.from(-3.14f).pack(),
+				Tuple.from(2.71828).pack(),
+				Tuple.from(new UUID(1066L, 1415L)).pack(),
+				Tuple.from(Versionstamp.fromBytes(new byte[]{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c})).pack()
+		);
+		for(byte[] sequence : wellFormedSequences) {
+			try {
+				Tuple t = Tuple.fromBytes(sequence, 0, sequence.length - 1);
+				throw new RuntimeException("Able to unpack " + ByteArrayUtil.printable(sequence) + " into " + t + " without last character");
+			}
+			catch(IllegalArgumentException e) {
+				System.out.println("Error for " + ByteArrayUtil.printable(sequence) + ": " + e.getMessage());
+			}
+		}
+	}
+
+	private static void offsetsAndLengths() {
+		List<Tuple> tuples = Arrays.asList(
+				new Tuple(),
+				Tuple.from((Object)null),
+				Tuple.from(null, new byte[]{0x10, 0x66}),
+				Tuple.from("dummy_string"),
+				Tuple.from(1066L)
+		);
+		Tuple allTuples = tuples.stream().reduce(new Tuple(), Tuple::addAll);
+		byte[] allTupleBytes = allTuples.pack();
+
+		// Unpack each tuple individually using their lengths
+		int offset = 0;
+		for(Tuple t : tuples) {
+			int length = t.getPackedSize();
+			Tuple unpacked = Tuple.fromBytes(allTupleBytes, offset, length);
+			if(!unpacked.equals(t)) {
+				throw new RuntimeException("unpacked tuple " + unpacked + " does not match serialized tuple " + t);
+			}
+			offset += length;
+		}
+
+		// Unpack successive pairs of tuples.
+		offset = 0;
+		for(int i = 0; i < tuples.size() - 1; i++) {
+			Tuple combinedTuple = tuples.get(i).addAll(tuples.get(i + 1));
+			Tuple unpacked = Tuple.fromBytes(allTupleBytes, offset, combinedTuple.getPackedSize());
+			if(!unpacked.equals(combinedTuple)) {
+				throw new RuntimeException("unpacked tuple " + unpacked + " does not match combined tuple " + combinedTuple);
+			}
+			offset += tuples.get(i).getPackedSize();
+		}
+
+		// Allow an offset to equal the length of the array, but essentially only a zero-length is allowed there.
+		Tuple emptyAtEndTuple = Tuple.fromBytes(allTupleBytes, allTupleBytes.length, 0);
+		if(!emptyAtEndTuple.isEmpty()) {
+			throw new RuntimeException("tuple with no bytes is not empty");
+		}
+
+		try {
+			Tuple.fromBytes(allTupleBytes, -1, 4);
+			throw new RuntimeException("able to give negative offset to fromBytes");
+		}
+		catch(IllegalArgumentException e) {
+			// eat
+		}
+		try {
+			Tuple.fromBytes(allTupleBytes, allTupleBytes.length + 1, 4);
+			throw new RuntimeException("able to give offset larger than array to fromBytes");
+		}
+		catch(IllegalArgumentException e) {
+			// eat
+		}
+		try {
+			Tuple.fromBytes(allTupleBytes, 0, -1);
+			throw new RuntimeException("able to give negative length to fromBytes");
+		}
+		catch(IllegalArgumentException e) {
+			// eat
+		}
+		try {
+			Tuple.fromBytes(allTupleBytes, 0, allTupleBytes.length + 1);
+			throw new RuntimeException("able to give length larger than array to fromBytes");
+		}
+		catch(IllegalArgumentException e) {
+			// eat
+		}
+		try {
+			Tuple.fromBytes(allTupleBytes, allTupleBytes.length / 2, allTupleBytes.length / 2 + 2);
+			throw new RuntimeException("able to exceed array length in fromBytes");
+		}
+		catch(IllegalArgumentException e) {
+			// eat
+		}
+	}
+
+	private static void intoBuffer() {
+		Tuple t = Tuple.from("hello", 3.14f, "world");
+		ByteBuffer buffer = ByteBuffer.allocate("hello".length() + 2 + Float.BYTES + 1 + "world".length() + 2);
+		t.packInto(buffer);
+		if(!Arrays.equals(t.pack(), buffer.array())) {
+			throw new RuntimeException("buffer and tuple do not match");
+		}
+
+		buffer = ByteBuffer.allocate(t.getPackedSize() + 2);
+		buffer.order(ByteOrder.LITTLE_ENDIAN);
+		t.packInto(buffer);
+		if(!Arrays.equals(ByteArrayUtil.join(t.pack(), new byte[]{0x00, 0x00}), buffer.array())) {
+			throw new RuntimeException("buffer and tuple do not match");
+		}
+		if(!buffer.order().equals(ByteOrder.LITTLE_ENDIAN)) {
+			throw new RuntimeException("byte order changed");
+		}
+
+		buffer = ByteBuffer.allocate(t.getPackedSize() + 2);
+		buffer.put((byte)0x01).put((byte)0x02);
+		t.packInto(buffer);
+		if(!Arrays.equals(t.pack(new byte[]{0x01, 0x02}), buffer.array())) {
+			throw new RuntimeException("buffer and tuple do not match");
+		}
+
+		buffer = ByteBuffer.allocate(t.getPackedSize() - 1);
+		try {
+			t.packInto(buffer);
+			throw new RuntimeException("able to pack into buffer that was too small");
+		}
+		catch(BufferOverflowException e) {
+			// eat
+		}
+
+		Tuple tCopy = Tuple.fromItems(t.getItems()); // remove memoized stuff
+		buffer = ByteBuffer.allocate(t.getPackedSize() - 1);
+		try {
+			tCopy.packInto(buffer);
+			throw new RuntimeException("able to pack into buffer that was too small");
+		}
+		catch(BufferOverflowException e) {
+			// eat
+		}
+
+		Tuple tWithIncomplete = Tuple.from(Versionstamp.incomplete(3));
+		buffer = ByteBuffer.allocate(tWithIncomplete.getPackedSize());
+		try {
+			tWithIncomplete.packInto(buffer);
+			throw new RuntimeException("able to pack incomplete versionstamp into buffer");
+		}
+		catch(IllegalArgumentException e) {
+			// eat
+		}
+		if(buffer.arrayOffset() != 0) {
+			throw new RuntimeException("offset changed after unsuccessful pack with incomplete versionstamp");
+		}
+	}
+
 	// These should be in ArrayUtilTest, but those can't be run at the moment, so here they go.
 	private static void replaceTests() {
 		List<byte[]> arrays = Arrays.asList(

From a1c32ce057f714761e3a3614db2b07497acb8fb9 Mon Sep 17 00:00:00 2001
From: Alec Grieser <agrieser@apple.com>
Date: Thu, 28 Feb 2019 09:35:04 -0800
Subject: [PATCH 23/47] update release notes with Tuple improvements

---
 documentation/sphinx/source/release-notes.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst
index 606e63d229..a6e03e7ee2 100644
--- a/documentation/sphinx/source/release-notes.rst
+++ b/documentation/sphinx/source/release-notes.rst
@@ -40,10 +40,15 @@ Bindings
 * Java: Deprecated ``FDB.createCluster`` and ``Cluster``. The preferred way to get a ``Database`` is by using ``FDB.open``, which should work in both new and old API versions. `(PR #942) <https://github.com/apple/foundationdb/pull/942>`_
 * Java: Removed ``Cluster(long cPtr, Executor executor)`` constructor. This is API breaking for any code that has subclassed the ``Cluster`` class and is not protected by API versioning. `(PR #942) <https://github.com/apple/foundationdb/pull/942>`_
 * Java: Several methods relevant to read-only transactions have been moved into the ``ReadTransaction`` interface.
+* Java: Tuples now cache previous hash codes and equality checking no longer requires packing the underlying Tuples. `(PR #1166) <https://github.com/apple/foundationdb/pull/1166>`_
+* Java: Tuple performance has been improved to use fewer allocations when packing and unpacking. `(Issue #1206) <https://github.com/apple/foundationdb/issues/1206>`_
+* Java: Unpacking a Tuple with a byte array or string that is missing the end-of-string character now throws an error. `(Issue #671) <https://github.com/apple/foundationdb/issues/671>`_
+* Java: Unpacking a Tuple constrained to a subset of the underlying array now throws an error when it encounters a truncated integer. `(Issue #672) <https://github.com/apple/foundationdb/issues/672>`_
 * Ruby: Removed ``FDB.init``, ``FDB.create_cluster``, and ``FDB.Cluster``. ``FDB.open`` no longer accepts a ``database_name`` parameter. `(PR #942) <https://github.com/apple/foundationdb/pull/942>`_
 * Golang: Deprecated ``fdb.StartNetwork``, ``fdb.Open``, ``fdb.MustOpen``, and ``fdb.CreateCluster`` and added ``fdb.OpenDatabase`` and ``fdb.MustOpenDatabase``. The preferred way to start the network and get a ``Database`` is by using ``FDB.OpenDatabase`` or ``FDB.OpenDefault``. `(PR #942) <https://github.com/apple/foundationdb/pull/942>`_
 * Flow: Deprecated ``API::createCluster`` and ``Cluster`` and added ``API::createDatabase``. The preferred way to get a ``Database`` is by using ``API::createDatabase``. `(PR #942) <https://github.com/apple/foundationdb/pull/942>`_
 * Golang: Added ``fdb.Printable`` to print a human-readable string for a given byte array. Add ``Key.String()``, which converts the ``Key`` to a ``string`` using the ``Printable`` function. `(PR #1010) <https://github.com/apple/foundationdb/pull/1010>`_
+* Golang: Tuples now support ``Versionstamp`` operations. `(PR #1187) <https://github.com/apple/foundationdb/pull/1187>`_
 * Python: Python signal handling didn't work when waiting on a future. In particular, pressing Ctrl-C would not successfully interrupt the program. `(PR #1138) <https://github.com/apple/foundationdb/pull/1138>`_
 
 Other Changes

From 40aa2ba6f0cddec4a5be3d8a545e3d8651405008 Mon Sep 17 00:00:00 2001
From: Alec Grieser <agrieser@apple.com>
Date: Thu, 28 Feb 2019 16:30:09 -0800
Subject: [PATCH 24/47] CMakeLists alphabetization and Javadoc improvements

---
 bindings/java/CMakeLists.txt                  |  4 +--
 .../com/apple/foundationdb/tuple/Tuple.java   | 34 ++++++++++++++-----
 2 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/bindings/java/CMakeLists.txt b/bindings/java/CMakeLists.txt
index f8c1c25a65..77a0d5aea0 100644
--- a/bindings/java/CMakeLists.txt
+++ b/bindings/java/CMakeLists.txt
@@ -54,9 +54,9 @@ set(JAVA_BINDING_SRCS
   src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java
   src/main/com/apple/foundationdb/tuple/IterableComparator.java
   src/main/com/apple/foundationdb/tuple/package-info.java
+  src/main/com/apple/foundationdb/tuple/StringUtil.java
   src/main/com/apple/foundationdb/tuple/Tuple.java
   src/main/com/apple/foundationdb/tuple/TupleUtil.java
-  src/main/com/apple/foundationdb/tuple/StringUtil.java
   src/main/com/apple/foundationdb/tuple/Versionstamp.java)
 
 set(JAVA_TESTS_SRCS
@@ -89,8 +89,8 @@ set(JAVA_TESTS_SRCS
   src/test/com/apple/foundationdb/test/StackUtils.java
   src/test/com/apple/foundationdb/test/TesterArgs.java
   src/test/com/apple/foundationdb/test/TestResult.java
-  src/test/com/apple/foundationdb/test/TupleTest.java
   src/test/com/apple/foundationdb/test/TuplePerformanceTest.java
+  src/test/com/apple/foundationdb/test/TupleTest.java
   src/test/com/apple/foundationdb/test/VersionstampSmokeTest.java
   src/test/com/apple/foundationdb/test/WatchTest.java
   src/test/com/apple/foundationdb/test/WhileTrueTest.java)
diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java b/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java
index ea47870037..e5556faaa6 100644
--- a/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java
+++ b/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java
@@ -315,9 +315,11 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 
 	/**
 	 * Get an encoded representation of this {@code Tuple}. Each element is encoded to
-	 *  {@code byte}s and concatenated.
+	 *  {@code byte}s and concatenated. Note that once a {@code Tuple} has been packed, its
+	 *  serialized representation is stored internally so that future calls to this function
+	 *  are faster than the initial call.
 	 *
-	 * @return a packed representation of this {@code Tuple}.
+	 * @return a packed representation of this {@code Tuple}
 	 */
 	public byte[] pack() {
 		return packInternal(null, true);
@@ -326,10 +328,12 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	/**
 	 * Get an encoded representation of this {@code Tuple}. Each element is encoded to
 	 *  {@code byte}s and concatenated, and then the prefix supplied is prepended to
-	 *  the array.
+	 *  the array. Note that once a {@code Tuple} has been packed, its serialized representation
+	 *  is stored internally so that future calls to this function are faster than the
+	 *  initial call.
 	 *
-	 * @param prefix additional byte-array prefix to prepend to packed bytes.
-	 * @return a packed representation of this {@code Tuple} prepended by the {@code prefix}.
+	 * @param prefix additional byte-array prefix to prepend to the packed bytes
+	 * @return a packed representation of this {@code Tuple} prepended by the {@code prefix}
 	 */
 	public byte[] pack(byte[] prefix) {
 		return packInternal(prefix, true);
@@ -359,6 +363,9 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 *  It is up to the caller to ensure that there is enough space allocated within the buffer
 	 *  to avoid {@link java.nio.BufferOverflowException}s. The client may call {@link #getPackedSize()}
 	 *  to determine how large this {@code Tuple} will be once packed in order to allocate sufficient memory.
+	 *  Note that unlike {@link #pack()}, the serialized representation of this {@code Tuple} is not stored, so
+	 *  calling this function multiple times with the same {@code Tuple} requires serializing the {@code Tuple}
+	 *  multiple times.
 	 * <br>
 	 * <br>
 	 * This method will throw an error if there are any incomplete {@link Versionstamp}s in this {@code Tuple}.
@@ -402,6 +409,10 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 *  {@link com.apple.foundationdb.Transaction#mutate(com.apple.foundationdb.MutationType, byte[], byte[]) Transaction.mutate()}
 	 *  with the {@code SET_VERSIONSTAMPED_KEY} {@link com.apple.foundationdb.MutationType}, and the transaction's
 	 *  version will then be filled in at commit time.
+	 * <br>
+	 * <br>
+	 * Note that once a {@code Tuple} has been packed, its serialized representation is stored internally so that
+	 *  future calls to this function are faster than the initial call.
 	 *
 	 * @param prefix additional byte-array prefix to prepend to packed bytes.
 	 * @return a packed representation of this {@code Tuple} for use with versionstamp ops.
@@ -477,11 +488,14 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 
 	/**
 	 * Construct a new {@code Tuple} with elements decoded from a supplied {@code byte} array.
-	 *  The passed byte array must not be {@code null}.
+	 *  The passed byte array must not be {@code null}. This will throw an exception if the passed byte
+	 *  array does not represent a valid {@code Tuple}. For example, this will throw an error if it
+	 *  encounters an unknown type code or if there is a packed element that appears to be truncated.
 	 *
 	 * @param bytes encoded {@code Tuple} source
 	 *
 	 * @return a new {@code Tuple} constructed by deserializing the provided {@code byte} array
+	 * @throws IllegalArgumentException if {@code bytes} does not represent a valid {@code Tuple}
 	 */
 	public static Tuple fromBytes(byte[] bytes) {
 		return fromBytes(bytes, 0, bytes.length);
@@ -489,13 +503,17 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 
 	/**
 	 * Construct a new {@code Tuple} with elements decoded from a supplied {@code byte} array.
-	 *  The passed byte array must not be {@code null}.
+	 *  The passed byte array must not be {@code null}. This will throw an exception if the specified slice of
+	 *  the passed byte array does not represent a valid {@code Tuple}. For example, this will throw an error
+	 *  if it encounters an unknown type code or if there is a packed element that appears to be truncated.
 	 *
 	 * @param bytes encoded {@code Tuple} source
 	 * @param offset starting offset of byte array of encoded data
 	 * @param length length of encoded data within the source
 	 *
 	 * @return a new {@code Tuple} constructed by deserializing the specified slice of the provided {@code byte} array
+	 * @throws IllegalArgumentException if {@code offset} or {@code length} are negative or would exceed the size of
+	 *  the array or if {@code bytes} does not represent a valid {@code Tuple}
 	 */
 	public static Tuple fromBytes(byte[] bytes, int offset, int length) {
 		if(offset < 0 || offset > bytes.length) {
@@ -864,7 +882,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
 	 *  the serialized sizes of all of the elements of this {@code Tuple} and does not pack everything
 	 *  into a single {@code Tuple}. The return value of this function is stored within this {@code Tuple}
 	 *  after this function has been called so that subsequent calls on the same object are fast. This method
-	 *  does not validate that there is no more than one incomplete {@link Versionstamp} in this {@code Tuple}.
+	 *  does not validate that there is not more than one incomplete {@link Versionstamp} in this {@code Tuple}.
 	 *
 	 * @return the number of bytes in the packed representation of this {@code Tuple}
 	 */

From 75e475563a65815758f4c81ce8cc593b661bc2da Mon Sep 17 00:00:00 2001
From: Alec Grieser <agrieser@apple.com>
Date: Fri, 1 Mar 2019 16:31:51 -0800
Subject: [PATCH 25/47] clarify comments and be more strict about using
 UUID_BYTES constant

---
 .../main/com/apple/foundationdb/tuple/TupleUtil.java  | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java
index 63a1944b5d..e0e43e48df 100644
--- a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java
+++ b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java
@@ -40,6 +40,7 @@ class TupleUtil {
 	private static final Charset UTF8 = Charset.forName("UTF-8");
 	private static final BigInteger LONG_MIN_VALUE = BigInteger.valueOf(Long.MIN_VALUE);
 	private static final BigInteger LONG_MAX_VALUE = BigInteger.valueOf(Long.MAX_VALUE);
+	private static final int UUID_BYTES = 2 * Long.BYTES;
 	private static final IterableComparator iterableComparator = new IterableComparator();
 
 	private static final byte BYTES_CODE            = 0x01;
@@ -475,10 +476,10 @@ class TupleUtil {
 			state.add(true, start);
 		}
 		else if(code == UUID_CODE) {
-			ByteBuffer bb = ByteBuffer.wrap(rep, start, 2 * Long.BYTES).order(ByteOrder.BIG_ENDIAN);
+			ByteBuffer bb = ByteBuffer.wrap(rep, start, UUID_BYTES).order(ByteOrder.BIG_ENDIAN);
 			long msb = bb.getLong();
 			long lsb = bb.getLong();
-			state.add(new UUID(msb, lsb), start + 16);
+			state.add(new UUID(msb, lsb), start + UUID_BYTES);
 		}
 		else if(code == POS_INT_END) {
 			int n = rep[start] & 0xff;
@@ -533,8 +534,8 @@ class TupleUtil {
 				if (val.compareTo(LONG_MIN_VALUE) >= 0 && val.compareTo(LONG_MAX_VALUE) <= 0) {
 					state.add(val.longValue(), end);
 				} else {
-					// This can occur if the thing can be represented with 8 bytes but not
-					// the right sign information.
+					// This can occur if the thing can be represented with 8 bytes but requires using
+					// the most-significant bit as a normal bit instead of the sign bit.
 					state.add(val, end);
 				}
 			}
@@ -745,7 +746,7 @@ class TupleUtil {
 			else if(item instanceof Boolean)
 				packedSize += 1;
 			else if(item instanceof UUID)
-				packedSize += 1 + 2 * Long.BYTES;
+				packedSize += 1 + UUID_BYTES;
 			else if(item instanceof BigInteger) {
 				BigInteger bigInt = (BigInteger)item;
 				int byteCount = minimalByteCount(bigInt);

From f66ddb13c2f748e07d3136a06cdda0f471b6da05 Mon Sep 17 00:00:00 2001
From: Alec Grieser <agrieser@apple.com>
Date: Fri, 1 Mar 2019 16:54:15 -0800
Subject: [PATCH 26/47] rewrite replace without a buffer to use replace with a
 buffer to first get length

---
 .../foundationdb/tuple/ByteArrayUtil.java     | 64 +++++++++++--------
 .../apple/foundationdb/test/TupleTest.java    | 36 +++++++++++
 2 files changed, 73 insertions(+), 27 deletions(-)

diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java b/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java
index d848c296ff..83a49051e1 100644
--- a/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java
+++ b/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java
@@ -173,35 +173,31 @@ public class ByteArrayUtil {
 	 */
 	public static byte[] replace(byte[] src, int offset, int length,
 			byte[] pattern, byte[] replacement) {
+		if(offset < 0 || offset > src.length) {
+			throw new IllegalArgumentException("Invalid offset for array pattern replacement");
+		}
+		if(length < 0 || offset + length > src.length) {
+			throw new IllegalArgumentException("Invalid length for array pattern replacement");
+		}
 		if(pattern == null || pattern.length == 0) {
 			return Arrays.copyOfRange(src, offset, offset + length);
 		}
 		ByteBuffer dest;
 		if(replacement == null || replacement.length != pattern.length) {
 			// Array might change size. This is the "tricky" case.
-			byte patternFirst = pattern[0];
-			int patternOccurrences = 0;
-			int currentPosition = offset;
-			while(currentPosition < offset + length) {
-				if(src[currentPosition] == patternFirst && regionEquals(src, currentPosition, pattern)) {
-					patternOccurrences++;
-					currentPosition += pattern.length;
+			int newLength = replace(src, offset, length, pattern, replacement, null);
+			if(newLength != length) {
+				if(newLength < 0) {
+					System.out.println("oops");
+					newLength = replace(src, offset, length, pattern, replacement, null);
 				}
-				else {
-					currentPosition++;
-				}
-			}
-			if(patternOccurrences == 0) {
-				// Pattern doesn't occur. Just return a copy of the needed region.
-				return Arrays.copyOfRange(src, offset, offset + length);
-			}
-			int replacementLength = (replacement == null) ? 0 : replacement.length;
-			int newLength = length + patternOccurrences * (replacementLength - pattern.length);
-			if(newLength == 0) {
-				return new byte[0];
+				dest = ByteBuffer.allocate(newLength);
 			}
 			else {
-				dest = ByteBuffer.allocate(newLength);
+				// If the array size didn't change, as the pattern and replacement lengths
+				// differ, it must be the case that there weren't any occurrences of pattern in src
+				// between offset and offset + length, so we can just return a copy.
+				return Arrays.copyOfRange(src, offset, offset + length);
 			}
 		}
 		else {
@@ -212,21 +208,30 @@ public class ByteArrayUtil {
 		return dest.array();
 	}
 
-	static void replace(byte[] src, int offset, int length, byte[] pattern, byte[] replacement, ByteBuffer dest) {
+	// Replace any occurrences of pattern in src between offset and offset + length with replacement.
+	// The new array is serialized into dest and the new length is returned.
+	static int replace(byte[] src, int offset, int length, byte[] pattern, byte[] replacement, ByteBuffer dest) {
 		if(pattern == null || pattern.length == 0) {
-			dest.put(src, offset, length);
-			return;
+			if(dest != null) {
+				dest.put(src, offset, length);
+			}
+			return length;
 		}
 		byte patternFirst = pattern[0];
 		int lastPosition = offset;
 		int currentPosition = offset;
+		int newLength = 0;
+		int replacementLength = replacement == null ? 0 : replacement.length;
 
 		while(currentPosition < offset + length) {
 			if(src[currentPosition] == patternFirst && regionEquals(src, currentPosition, pattern)) {
-				dest.put(src, lastPosition, currentPosition - lastPosition);
-				if(replacement != null) {
-					dest.put(replacement);
+				if(dest != null) {
+					dest.put(src, lastPosition, currentPosition - lastPosition);
+					if(replacement != null) {
+						dest.put(replacement);
+					}
 				}
+				newLength += currentPosition - lastPosition + replacementLength;
 				currentPosition += pattern.length;
 				lastPosition = currentPosition;
 			}
@@ -235,7 +240,12 @@ public class ByteArrayUtil {
 			}
 		}
 
-		dest.put(src, lastPosition, currentPosition - lastPosition);
+		newLength += currentPosition - lastPosition;
+		if(dest != null) {
+			dest.put(src, lastPosition, currentPosition - lastPosition);
+		}
+
+		return newLength;
 	}
 
 	/**
diff --git a/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java b/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java
index ac2b033748..f6152664ec 100644
--- a/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java
+++ b/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java
@@ -916,6 +916,42 @@ public class TupleTest {
 						" with " + ByteArrayUtil.printable(replacement) + " in " + ByteArrayUtil.printable(src));
 			}
 		}
+
+		try {
+			ByteArrayUtil.replace(null, 0, 1, new byte[]{0x00}, new byte[]{0x00, FF});
+			throw new RuntimeException("able to replace null bytes");
+		}
+		catch(NullPointerException e) {
+			// eat
+		}
+		try {
+			ByteArrayUtil.replace(new byte[]{0x00, 0x01}, -1, 2, new byte[]{0x00}, new byte[]{0x00, FF});
+			throw new RuntimeException("able to use negative offset");
+		}
+		catch(IllegalArgumentException e) {
+			// eat
+		}
+		try {
+			ByteArrayUtil.replace(new byte[]{0x00, 0x01}, 3, 2, new byte[]{0x00}, new byte[]{0x00, FF});
+			throw new RuntimeException("able to use offset after end of array");
+		}
+		catch(IllegalArgumentException e) {
+			// eat
+		}
+		try {
+			ByteArrayUtil.replace(new byte[]{0x00, 0x01}, 1, -1, new byte[]{0x00}, new byte[]{0x00, FF});
+			throw new RuntimeException("able to use negative length");
+		}
+		catch(IllegalArgumentException e) {
+			// eat
+		}
+		try {
+			ByteArrayUtil.replace(new byte[]{0x00, 0x01}, 1, 2, new byte[]{0x00}, new byte[]{0x00, FF});
+			throw new RuntimeException("able to give length that exceeds end of the array");
+		}
+		catch(IllegalArgumentException e) {
+			// eat
+		}
 	}
 
 	private static void runTests(final int reps, TransactionContext db) {

From 734029820269a09af1228a6d8572df443aab4a8b Mon Sep 17 00:00:00 2001
From: Alec Grieser <agrieser@apple.com>
Date: Fri, 1 Mar 2019 17:05:48 -0800
Subject: [PATCH 27/47] remove debugging printing that was accidentally added

---
 .../src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java  | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java b/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java
index 83a49051e1..fe39fa332e 100644
--- a/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java
+++ b/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java
@@ -187,10 +187,6 @@ public class ByteArrayUtil {
 			// Array might change size. This is the "tricky" case.
 			int newLength = replace(src, offset, length, pattern, replacement, null);
 			if(newLength != length) {
-				if(newLength < 0) {
-					System.out.println("oops");
-					newLength = replace(src, offset, length, pattern, replacement, null);
-				}
 				dest = ByteBuffer.allocate(newLength);
 			}
 			else {

From 46f4b028071c2a3aa1755a572e3aca5201106f16 Mon Sep 17 00:00:00 2001
From: Meng Xu <meng_xu@apple.com>
Date: Mon, 11 Mar 2019 17:10:06 -0700
Subject: [PATCH 28/47] TLS Status: Resolve review comments

Use connectedCoordinatorsNumDelayed to reduce the load on cluster controller;
Set connectedCoordinatorsNum to null by default for monitorLeader()
---
 fdbclient/ManagementAPI.actor.cpp     |  3 +--
 fdbclient/MonitorLeader.actor.cpp     |  6 ++++--
 fdbclient/MonitorLeader.h             |  2 +-
 fdbclient/NativeAPI.actor.cpp         | 14 ++++++++------
 fdbclient/ReadYourWrites.actor.cpp    |  3 +--
 fdbclient/StatusClient.actor.cpp      |  3 +--
 fdbserver/ClusterController.actor.cpp |  2 +-
 fdbserver/Status.h                    |  3 +++
 fdbserver/tester.actor.cpp            |  3 +--
 fdbserver/worker.actor.cpp            |  3 +--
 10 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/fdbclient/ManagementAPI.actor.cpp b/fdbclient/ManagementAPI.actor.cpp
index 73f3316c66..fa54b5b391 100644
--- a/fdbclient/ManagementAPI.actor.cpp
+++ b/fdbclient/ManagementAPI.actor.cpp
@@ -1542,8 +1542,7 @@ ACTOR Future<Void> checkDatabaseLock( Reference<ReadYourWritesTransaction> tr, U
 
 ACTOR Future<Void> forceRecovery( Reference<ClusterConnectionFile> clusterFile, Key dcId ) {
 	state Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface(new AsyncVar<Optional<ClusterInterface>>);
-	state Reference<AsyncVar<int>> unused(new AsyncVar<int>);
-	state Future<Void> leaderMon = monitorLeader<ClusterInterface>(clusterFile, clusterInterface, unused);
+	state Future<Void> leaderMon = monitorLeader<ClusterInterface>(clusterFile, clusterInterface);
 
 	loop {
 		choose {
diff --git a/fdbclient/MonitorLeader.actor.cpp b/fdbclient/MonitorLeader.actor.cpp
index cff9f6157c..835b85a215 100644
--- a/fdbclient/MonitorLeader.actor.cpp
+++ b/fdbclient/MonitorLeader.actor.cpp
@@ -335,9 +335,11 @@ ClientLeaderRegInterface::ClientLeaderRegInterface( INetwork* local ) {
 // is a nominee)
 ACTOR Future<Void> monitorNominee( Key key, ClientLeaderRegInterface coord, AsyncTrigger* nomineeChange, Optional<LeaderInfo> *info, int generation, Reference<AsyncVar<int>> connectedCoordinatorsNum ) {
 	loop {
+		state bool hasExisted = false;
 		state Optional<LeaderInfo> li = wait( retryBrokenPromise( coord.getLeader, GetLeaderRequest( key, info->present() ? info->get().changeID : UID() ), TaskCoordinationReply ) );
-		if (li.present()) {
+		if (li.present() && !hasExisted && connectedCoordinatorsNum.isValid()) {
 			connectedCoordinatorsNum->set(connectedCoordinatorsNum->get() + 1);
+			hasExisted = true;
 		}
 		wait( Future<Void>(Void()) ); // Make sure we weren't cancelled
 
@@ -460,7 +462,7 @@ ACTOR Future<Void> monitorLeaderInternal( Reference<ClusterConnectionFile> connF
 	state MonitorLeaderInfo info(connFile);
 	loop {
 		// set the AsyncVar to 0
-		connectedCoordinatorsNum->set(0);
+		if (connectedCoordinatorsNum.isValid()) connectedCoordinatorsNum->set(0);
 		MonitorLeaderInfo _info = wait( monitorLeaderOneGeneration( connFile, outSerializedLeaderInfo, info, connectedCoordinatorsNum) );
 		info = _info;
 		info.generation++;
diff --git a/fdbclient/MonitorLeader.h b/fdbclient/MonitorLeader.h
index 87aa37c503..a1ed1c88ad 100644
--- a/fdbclient/MonitorLeader.h
+++ b/fdbclient/MonitorLeader.h
@@ -30,7 +30,7 @@
 class ClientCoordinators;
 
 template <class LeaderInterface>
-Future<Void> monitorLeader( Reference<ClusterConnectionFile> const& connFile, Reference<AsyncVar<Optional<LeaderInterface>>> const& outKnownLeader, Reference<AsyncVar<int>> connectedCoordinatorsNum );
+Future<Void> monitorLeader( Reference<ClusterConnectionFile> const& connFile, Reference<AsyncVar<Optional<LeaderInterface>>> const& outKnownLeader, Reference<AsyncVar<int>> connectedCoordinatorsNum = Reference<AsyncVar<int>>() );
 // Monitors the given coordination group's leader election process and provides a best current guess
 // of the current leader.  If a leader is elected for long enough and communication with a quorum of
 // coordinators is possible, eventually outKnownLeader will be that leader's interface.
diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp
index c797c32f2e..e7d7a3e243 100644
--- a/fdbclient/NativeAPI.actor.cpp
+++ b/fdbclient/NativeAPI.actor.cpp
@@ -533,14 +533,14 @@ DatabaseContext::DatabaseContext(
 
 DatabaseContext::DatabaseContext( const Error &err ) : deferredError(err), latencies(1000), readLatencies(1000), commitLatencies(1000), GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000) {}
 
-ACTOR static Future<Void> monitorClientInfo( Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface, Reference<ClusterConnectionFile> ccf, Reference<AsyncVar<ClientDBInfo>> outInfo, Reference<AsyncVar<int>> connectedCoordinatorsNum ) {
+ACTOR static Future<Void> monitorClientInfo( Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface, Reference<ClusterConnectionFile> ccf, Reference<AsyncVar<ClientDBInfo>> outInfo, Reference<AsyncVar<int>> connectedCoordinatorsNumDelayed ) {
 	try {
 		state Optional<double> incorrectTime;
 		loop {
 			OpenDatabaseRequest req;
 			req.knownClientInfoID = outInfo->get().id;
 			req.supportedVersions = VectorRef<ClientVersionRef>(req.arena, networkOptions.supportedVersions);
-			req.connectedCoordinatorsNum = connectedCoordinatorsNum->get();
+			req.connectedCoordinatorsNum = connectedCoordinatorsNumDelayed->get();
 			req.traceLogGroup = StringRef(req.arena, networkOptions.traceLogGroup);
 
 			ClusterConnectionString fileConnectionString;
@@ -571,7 +571,7 @@ ACTOR static Future<Void> monitorClientInfo( Reference<AsyncVar<Optional<Cluster
 					if(clusterInterface->get().present())
 						TraceEvent("ClientInfo_CCInterfaceChange").detail("CCID", clusterInterface->get().get().id());
 				}
-				when( wait( connectedCoordinatorsNum->onChange() ) ) {}
+				when( wait( connectedCoordinatorsNumDelayed->onChange() ) ) {}
 			}
 		}
 	} catch( Error& e ) {
@@ -588,9 +588,10 @@ ACTOR static Future<Void> monitorClientInfo( Reference<AsyncVar<Optional<Cluster
 // Notify client when cluster info (e.g., cluster controller) changes
 Database DatabaseContext::create(Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface, Reference<ClusterConnectionFile> connFile, LocalityData const& clientLocality) {
 	Reference<AsyncVar<int>> connectedCoordinatorsNum(new AsyncVar<int>(0));
+	Reference<AsyncVar<int>> connectedCoordinatorsNumDelayed(new AsyncVar<int>(0));
 	Reference<Cluster> cluster(new Cluster(connFile, clusterInterface, connectedCoordinatorsNum));
 	Reference<AsyncVar<ClientDBInfo>> clientInfo(new AsyncVar<ClientDBInfo>());
-	Future<Void> clientInfoMonitor = monitorClientInfo(clusterInterface, connFile, clientInfo, connectedCoordinatorsNum);
+	Future<Void> clientInfoMonitor = monitorClientInfo(clusterInterface, connFile, clientInfo, connectedCoordinatorsNumDelayed) || delayedAsyncVar(connectedCoordinatorsNum, connectedCoordinatorsNumDelayed, 1.0);
 
 	return Database(new DatabaseContext(cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false));
 }
@@ -756,9 +757,10 @@ Reference<ClusterConnectionFile> DatabaseContext::getConnectionFile() {
 
 Database Database::createDatabase( Reference<ClusterConnectionFile> connFile, int apiVersion, LocalityData const& clientLocality ) {
 	Reference<AsyncVar<int>> connectedCoordinatorsNum(new AsyncVar<int>(0)); // Number of connected coordinators for the client
-	Reference<Cluster> cluster(new Cluster(connFile, connectedCoordinatorsNum, apiVersion));
+	Reference<AsyncVar<int>> connectedCoordinatorsNumDelayed(new AsyncVar<int>(0));
+	Reference<Cluster> cluster(new Cluster(connFile, connectedCoordinatorsNumDelayed, apiVersion));
 	Reference<AsyncVar<ClientDBInfo>> clientInfo(new AsyncVar<ClientDBInfo>());
-	Future<Void> clientInfoMonitor = monitorClientInfo(cluster->getClusterInterface(), connFile, clientInfo, connectedCoordinatorsNum);
+	Future<Void> clientInfoMonitor = monitorClientInfo(cluster->getClusterInterface(), connFile, clientInfo, connectedCoordinatorsNumDelayed) || delayedAsyncVar(connectedCoordinatorsNum, connectedCoordinatorsNumDelayed, 1.0);
 
 	return Database( new DatabaseContext( cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false, apiVersion ) );
 }
diff --git a/fdbclient/ReadYourWrites.actor.cpp b/fdbclient/ReadYourWrites.actor.cpp
index 24d088e5af..cc6bce8a86 100644
--- a/fdbclient/ReadYourWrites.actor.cpp
+++ b/fdbclient/ReadYourWrites.actor.cpp
@@ -1159,8 +1159,7 @@ ACTOR Future<Optional<Value>> getJSON(Reference<ClusterConnectionFile> clusterFi
 
 ACTOR Future<Standalone<RangeResultRef>> getWorkerInterfaces (Reference<ClusterConnectionFile> clusterFile){
 	state Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface(new AsyncVar<Optional<ClusterInterface>>);
-	Reference<AsyncVar<int>> unused(new AsyncVar<int>(0));
-	state Future<Void> leaderMon = monitorLeader<ClusterInterface>(clusterFile, clusterInterface, unused);
+	state Future<Void> leaderMon = monitorLeader<ClusterInterface>(clusterFile, clusterInterface);
 
 	loop{
 		choose {
diff --git a/fdbclient/StatusClient.actor.cpp b/fdbclient/StatusClient.actor.cpp
index 84f51fb453..9cd7358c07 100644
--- a/fdbclient/StatusClient.actor.cpp
+++ b/fdbclient/StatusClient.actor.cpp
@@ -463,12 +463,11 @@ ACTOR Future<StatusObject> statusFetcherImpl( Reference<ClusterConnectionFile> f
 	state bool quorum_reachable = false;
 	state int coordinatorsFaultTolerance = 0;
 	state Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface(new AsyncVar<Optional<ClusterInterface>>);
-	state Reference<AsyncVar<int>> connectedCoordinatorsNum(new AsyncVar<int>(0));
 
 	try {
 		state int64_t clientTime = time(0);
 
-		state Future<Void> leaderMon = monitorLeader<ClusterInterface>(f, clusterInterface, connectedCoordinatorsNum);
+		state Future<Void> leaderMon = monitorLeader<ClusterInterface>(f, clusterInterface);
 
 		StatusObject _statusObjClient = wait(clientStatusFetcher(f, &clientMessages, &quorum_reachable, &coordinatorsFaultTolerance));
 		statusObjClient = _statusObjClient;
diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp
index 6fe15ae63e..04853215ff 100644
--- a/fdbserver/ClusterController.actor.cpp
+++ b/fdbserver/ClusterController.actor.cpp
@@ -1234,7 +1234,7 @@ ACTOR Future<Void> clusterOpenDatabase(
 	}
 
 
-	db->clientStatusInfoMap[reply.getEndpoint().getPrimaryAddress()] = {traceLogGroup.toString(), connectedCoordinatorsNum};
+	db->clientStatusInfoMap[reply.getEndpoint().getPrimaryAddress()] = ClientStatusInfo(traceLogGroup.toString(), connectedCoordinatorsNum);
 
 	while (db->clientInfo->get().id == knownClientInfoID) {
 		choose {
diff --git a/fdbserver/Status.h b/fdbserver/Status.h
index 1bc8acc916..200d3567c0 100644
--- a/fdbserver/Status.h
+++ b/fdbserver/Status.h
@@ -33,6 +33,9 @@ typedef std::map< NetworkAddress, Standalone<VectorRef<ClientVersionRef>> > Clie
 struct ClientStatusInfo {
 	std::string traceLogGroup;
 	int connectedCoordinatorsNum;
+
+	ClientStatusInfo() : connectedCoordinatorsNum(0) {}
+	ClientStatusInfo(std::string const& traceLogGroup, int const connectedCoordinatorsNum) : traceLogGroup(traceLogGroup), connectedCoordinatorsNum(connectedCoordinatorsNum) {}
 };
 
 Future<StatusReply> clusterGetStatus( Reference<AsyncVar<struct ServerDBInfo>> const& db, Database const& cx, vector<std::pair<WorkerInterface, ProcessClass>> const& workers,
diff --git a/fdbserver/tester.actor.cpp b/fdbserver/tester.actor.cpp
index edf2e9bbbc..135053f0e6 100644
--- a/fdbserver/tester.actor.cpp
+++ b/fdbserver/tester.actor.cpp
@@ -1127,9 +1127,8 @@ ACTOR Future<Void> runTests( Reference<ClusterConnectionFile> connFile, test_typ
 	state vector<TestSpec> testSpecs;
 	Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> cc( new AsyncVar<Optional<ClusterControllerFullInterface>> );
 	Reference<AsyncVar<Optional<ClusterInterface>>> ci( new AsyncVar<Optional<ClusterInterface>> );
-	Reference<AsyncVar<int>> connectedCoordinatorsNum( new AsyncVar<int>(0) );
 	vector<Future<Void>> actors;
-	actors.push_back( reportErrors(monitorLeader( connFile, cc, connectedCoordinatorsNum ), "MonitorLeader") );
+	actors.push_back( reportErrors(monitorLeader( connFile, cc ), "MonitorLeader") );
 	actors.push_back( reportErrors(extractClusterInterface( cc,ci ),"ExtractClusterInterface") );
 	actors.push_back( reportErrors(failureMonitorClient( ci, false ),"FailureMonitorClient") );
 
diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp
index a61e10a83c..c4b0dd0def 100644
--- a/fdbserver/worker.actor.cpp
+++ b/fdbserver/worker.actor.cpp
@@ -1215,11 +1215,10 @@ ACTOR Future<Void> fdbd(
 		Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> cc(new AsyncVar<Optional<ClusterControllerFullInterface>>);
 		Reference<AsyncVar<Optional<ClusterInterface>>> ci(new AsyncVar<Optional<ClusterInterface>>);
 		Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo(new AsyncVar<ClusterControllerPriorityInfo>(getCCPriorityInfo(fitnessFilePath, processClass)));
-		Reference<AsyncVar<int>> unused(new AsyncVar<int>(0));
 		Promise<Void> recoveredDiskFiles;
 
 		v.push_back(reportErrors(monitorAndWriteCCPriorityInfo(fitnessFilePath, asyncPriorityInfo), "MonitorAndWriteCCPriorityInfo"));
-		v.push_back( reportErrors( processClass == ProcessClass::TesterClass ? monitorLeader( connFile, cc, unused ) : clusterController( connFile, cc , asyncPriorityInfo, recoveredDiskFiles.getFuture(), localities ), "ClusterController") );
+		v.push_back( reportErrors( processClass == ProcessClass::TesterClass ? monitorLeader( connFile, cc ) : clusterController( connFile, cc , asyncPriorityInfo, recoveredDiskFiles.getFuture(), localities ), "ClusterController") );
 		v.push_back( reportErrors(extractClusterInterface( cc, ci ), "ExtractClusterInterface") );
 		v.push_back( reportErrors(failureMonitorClient( ci, true ), "FailureMonitorClient") );
 		v.push_back( reportErrorsExcept(workerServer(connFile, cc, localities, asyncPriorityInfo, processClass, dataFolder, memoryLimit, metricsConnFile, metricsPrefix, recoveredDiskFiles), "WorkerServer", UID(), &normalWorkerErrors()) );

From d9e9e0c5211dd05990e6147cf0fd6dfcc0fed352 Mon Sep 17 00:00:00 2001
From: Alec Grieser <agrieser@apple.com>
Date: Mon, 11 Mar 2019 18:26:08 -0700
Subject: [PATCH 29/47] use bitwise or instead of addition when reconsituting
 long

---
 .../java/src/main/com/apple/foundationdb/tuple/TupleUtil.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java
index e0e43e48df..6ddfae83f9 100644
--- a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java
+++ b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java
@@ -509,14 +509,14 @@ class TupleUtil {
 			if(positive && (n < Long.BYTES || rep[start] > 0)) {
 				long res = 0L;
 				for(int i = start; i < end; i++) {
-					res = (res << 8) + (rep[i] & 0xff);
+					res = (res << 8) | (rep[i] & 0xff);
 				}
 				state.add(res, end);
 			}
 			else if(!positive && (n < Long.BYTES || rep[start] < 0)) {
 				long res = ~0L;
 				for(int i = start; i < end; i++) {
-					res = (res << 8) + (rep[i] & 0xff);
+					res = (res << 8) | (rep[i] & 0xff);
 				}
 				state.add(res + 1, end);
 			}

From c1745b90611bf54a8d04ca765006781e6289e8cf Mon Sep 17 00:00:00 2001
From: Stephen Atherton <stevea@apple.com>
Date: Mon, 11 Mar 2019 19:43:59 -0700
Subject: [PATCH 30/47] Bug fix, backup snapshot dispatch shardsBehind
 calculation would divide by zero if the snapshot scheduled interval is 0.

---
 fdbclient/FileBackupAgent.actor.cpp | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp
index 20e007afe7..a354dbf38f 100644
--- a/fdbclient/FileBackupAgent.actor.cpp
+++ b/fdbclient/FileBackupAgent.actor.cpp
@@ -1559,7 +1559,17 @@ namespace fileBackup {
 			// Calculate the number of shards that would have been dispatched by a normal (on-schedule) BackupSnapshotDispatchTask given
 			// the dispatch window and the start and expected-end versions of the current snapshot.
 			int64_t dispatchWindow = nextDispatchVersion - recentReadVersion;
-			int countShardsExpectedPerNormalWindow = (double(dispatchWindow) / snapshotScheduledVersionInterval) * countAllShards;
+
+			// If the scheduled snapshot interval is 0 (such as for initial, as-fast-as-possible snapshot) then all shards are considered late
+			int countShardsExpectedPerNormalWindow;
+			if(snapshotScheduledVersionInterval == 0) {
+				countShardsExpectedPerNormalWindow = 0;
+			}
+			else {
+				// A dispatchWindow of 0 means the target end version is <= now which also results in all shards being considered late
+				countShardsExpectedPerNormalWindow = (double(dispatchWindow) / snapshotScheduledVersionInterval) * countAllShards;
+			}
+
 			// countShardsThisDispatch is how many total shards are to be dispatched by this dispatch cycle.
 			// Since this dispatch cycle can span many incrementally progressing separate executions of the BackupSnapshotDispatchTask
 			// instance, this is calculated as the number of shards dispatched so far in the dispatch batch plus the number of shards

From e9b8bf601e563d385575027465c48bb428cbaea8 Mon Sep 17 00:00:00 2001
From: Stephen Atherton <stevea@apple.com>
Date: Tue, 12 Mar 2019 03:34:38 -0700
Subject: [PATCH 31/47] Added backup status JSON output to backup workload to
 get sim coverage.

---
 fdbserver/workloads/BackupCorrectness.actor.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fdbserver/workloads/BackupCorrectness.actor.cpp b/fdbserver/workloads/BackupCorrectness.actor.cpp
index 3923b15e9f..6315165730 100644
--- a/fdbserver/workloads/BackupCorrectness.actor.cpp
+++ b/fdbserver/workloads/BackupCorrectness.actor.cpp
@@ -192,6 +192,8 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload {
 		loop {
 			std::string status = wait(agent.getStatus(cx, true, tag));
 			puts(status.c_str());
+			std::string statusJSON = wait(agent.getStatusJSON(cx, tag));
+			puts(statusJSON.c_str());
 			wait(delay(2.0));
 		}
 	}

From 22f5624494ac5287af928b0b65c0d8fac0c8da39 Mon Sep 17 00:00:00 2001
From: Meng Xu <meng_xu@apple.com>
Date: Tue, 12 Mar 2019 15:08:08 -0700
Subject: [PATCH 32/47] TLS Status: Reduce cluster controller load

When the coordinator changes, we use delayedAsyncVar() to reduce
the frequency for cluster controller to send the updated connectedCoordinatorsNumDelayed
to clients.
This help reduce the cluster controllers workload
---
 fdbclient/MonitorLeader.actor.cpp | 6 +++---
 fdbclient/NativeAPI.actor.cpp     | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/fdbclient/MonitorLeader.actor.cpp b/fdbclient/MonitorLeader.actor.cpp
index 835b85a215..6bdd6a990a 100644
--- a/fdbclient/MonitorLeader.actor.cpp
+++ b/fdbclient/MonitorLeader.actor.cpp
@@ -334,12 +334,12 @@ ClientLeaderRegInterface::ClientLeaderRegInterface( INetwork* local ) {
 // This function contacts a coordinator coord to ask if the worker is considered as a leader (i.e., if the worker
 // is a nominee)
 ACTOR Future<Void> monitorNominee( Key key, ClientLeaderRegInterface coord, AsyncTrigger* nomineeChange, Optional<LeaderInfo> *info, int generation, Reference<AsyncVar<int>> connectedCoordinatorsNum ) {
+	state bool hasCounted = false;
 	loop {
-		state bool hasExisted = false;
 		state Optional<LeaderInfo> li = wait( retryBrokenPromise( coord.getLeader, GetLeaderRequest( key, info->present() ? info->get().changeID : UID() ), TaskCoordinationReply ) );
-		if (li.present() && !hasExisted && connectedCoordinatorsNum.isValid()) {
+		if (li.present() && !hasCounted && connectedCoordinatorsNum.isValid()) {
 			connectedCoordinatorsNum->set(connectedCoordinatorsNum->get() + 1);
-			hasExisted = true;
+			hasCounted = true;
 		}
 		wait( Future<Void>(Void()) ); // Make sure we weren't cancelled
 
diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp
index e7d7a3e243..e998a211dd 100644
--- a/fdbclient/NativeAPI.actor.cpp
+++ b/fdbclient/NativeAPI.actor.cpp
@@ -591,7 +591,7 @@ Database DatabaseContext::create(Reference<AsyncVar<Optional<ClusterInterface>>>
 	Reference<AsyncVar<int>> connectedCoordinatorsNumDelayed(new AsyncVar<int>(0));
 	Reference<Cluster> cluster(new Cluster(connFile, clusterInterface, connectedCoordinatorsNum));
 	Reference<AsyncVar<ClientDBInfo>> clientInfo(new AsyncVar<ClientDBInfo>());
-	Future<Void> clientInfoMonitor = monitorClientInfo(clusterInterface, connFile, clientInfo, connectedCoordinatorsNumDelayed) || delayedAsyncVar(connectedCoordinatorsNum, connectedCoordinatorsNumDelayed, 1.0);
+	Future<Void> clientInfoMonitor = delayedAsyncVar(connectedCoordinatorsNum, connectedCoordinatorsNumDelayed, 1.0) || monitorClientInfo(clusterInterface, connFile, clientInfo, connectedCoordinatorsNumDelayed);
 
 	return Database(new DatabaseContext(cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false));
 }
@@ -758,9 +758,9 @@ Reference<ClusterConnectionFile> DatabaseContext::getConnectionFile() {
 Database Database::createDatabase( Reference<ClusterConnectionFile> connFile, int apiVersion, LocalityData const& clientLocality ) {
 	Reference<AsyncVar<int>> connectedCoordinatorsNum(new AsyncVar<int>(0)); // Number of connected coordinators for the client
 	Reference<AsyncVar<int>> connectedCoordinatorsNumDelayed(new AsyncVar<int>(0));
-	Reference<Cluster> cluster(new Cluster(connFile, connectedCoordinatorsNumDelayed, apiVersion));
+	Reference<Cluster> cluster(new Cluster(connFile, connectedCoordinatorsNum, apiVersion));
 	Reference<AsyncVar<ClientDBInfo>> clientInfo(new AsyncVar<ClientDBInfo>());
-	Future<Void> clientInfoMonitor = monitorClientInfo(cluster->getClusterInterface(), connFile, clientInfo, connectedCoordinatorsNumDelayed) || delayedAsyncVar(connectedCoordinatorsNum, connectedCoordinatorsNumDelayed, 1.0);
+	Future<Void> clientInfoMonitor = delayedAsyncVar(connectedCoordinatorsNum, connectedCoordinatorsNumDelayed, 1.0) || monitorClientInfo(cluster->getClusterInterface(), connFile, clientInfo, connectedCoordinatorsNumDelayed);
 
 	return Database( new DatabaseContext( cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false, apiVersion ) );
 }

From ff8bac8d208f94d1fb389ff3a4a022bdf47cf319 Mon Sep 17 00:00:00 2001
From: Vishesh Yadav <vishesh_yadav@apple.com>
Date: Tue, 12 Mar 2019 17:58:55 -0700
Subject: [PATCH 33/47] doc: Some documentation for IPv6

---
 documentation/sphinx/source/administration.rst | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/documentation/sphinx/source/administration.rst b/documentation/sphinx/source/administration.rst
index 8ddc88ac6f..1e0111b584 100644
--- a/documentation/sphinx/source/administration.rst
+++ b/documentation/sphinx/source/administration.rst
@@ -141,6 +141,21 @@ Any client connected to FoundationDB can access information about its cluster fi
 * To get the path to the cluster file, read the key ``\xFF\xFF/cluster_file_path``.
 * To get the contents of the cluster file, read the key ``\xFF\xFF/connection_string``.
 
+.. _ipv6-support:
+
+IPv6 Support
+============
+
+FoundationDB (since v6.1) can accept network connections from clients connecting over IPv6. IPv6 address/port pair is represented as ``[IP]:PORT``, e.g. "[::1]:4800", "[abcd::dead:beef]:4500".
+
+1) The cluster file can contain mix of IPv6 and IPv6 addresses. For example::
+
+     description:ID@127.0.0.1:4500,[::1]:4500,...
+
+2) Starting ``fdbserver`` with IPv6::
+
+     $ /path/to/fdbserver -C fdb.cluster -p \[::1\]:4500
+
 .. _adding-machines-to-a-cluster:
 
 Adding machines to a cluster

From a2108047aa3eb2d9718afe6ba74b9a6228c5f072 Mon Sep 17 00:00:00 2001
From: Evan Tschannen <ejt@apple.com>
Date: Wed, 13 Mar 2019 13:14:39 -0700
Subject: [PATCH 34/47] removed LocalitySetRef and IRepPolicyRef typedefs,
 because for clarity the Ref suffix is reserved for arena allocated objects
 instead of reference counted objects.

---
 fdbclient/DatabaseConfiguration.cpp         |  28 ++---
 fdbclient/DatabaseConfiguration.h           |  12 +--
 fdbclient/ManagementAPI.actor.cpp           |  44 ++++----
 fdbrpc/Replication.h                        |  32 +++---
 fdbrpc/ReplicationPolicy.cpp                |  36 +++----
 fdbrpc/ReplicationPolicy.h                  |  64 ++++++------
 fdbrpc/ReplicationTypes.h                   |   3 -
 fdbrpc/ReplicationUtils.cpp                 | 110 ++++++++++----------
 fdbrpc/ReplicationUtils.h                   |  20 ++--
 fdbrpc/simulator.h                          |  10 +-
 fdbserver/ClusterController.actor.cpp       |   8 +-
 fdbserver/DBCoreState.h                     |   2 +-
 fdbserver/DataDistribution.actor.cpp        |  16 +--
 fdbserver/LogSystem.h                       |  10 +-
 fdbserver/LogSystemConfig.h                 |   2 +-
 fdbserver/LogSystemPeekCursor.actor.cpp     |   2 +-
 fdbserver/TagPartitionedLogSystem.actor.cpp |   8 +-
 fdbserver/WorkerInterface.actor.h           |   2 +-
 18 files changed, 203 insertions(+), 206 deletions(-)

diff --git a/fdbclient/DatabaseConfiguration.cpp b/fdbclient/DatabaseConfiguration.cpp
index 0af3402b73..1bc518e0e4 100644
--- a/fdbclient/DatabaseConfiguration.cpp
+++ b/fdbclient/DatabaseConfiguration.cpp
@@ -38,7 +38,7 @@ void DatabaseConfiguration::resetInternal() {
 	autoDesiredTLogCount = CLIENT_KNOBS->DEFAULT_AUTO_LOGS;
 	usableRegions = 1;
 	regions.clear();
-	tLogPolicy = storagePolicy = remoteTLogPolicy = IRepPolicyRef();
+	tLogPolicy = storagePolicy = remoteTLogPolicy = Reference<IReplicationPolicy>();
 	remoteDesiredTLogCount = -1;
 	remoteTLogReplicationFactor = repopulateRegionAntiQuorum = 0;
 }
@@ -48,7 +48,7 @@ void parse( int* i, ValueRef const& v ) {
 	*i = atoi(v.toString().c_str());
 }
 
-void parseReplicationPolicy(IRepPolicyRef* policy, ValueRef const& v) {
+void parseReplicationPolicy(Reference<IReplicationPolicy>* policy, ValueRef const& v) {
 	BinaryReader reader(v, IncludeVersion());
 	serializeReplicationPolicy(reader, *policy);
 }
@@ -91,35 +91,35 @@ void parse( std::vector<RegionInfo>* regions, ValueRef const& v ) {
 					info.satelliteTLogReplicationFactor = 1;
 					info.satelliteTLogUsableDcs = 1;
 					info.satelliteTLogWriteAntiQuorum = 0;
-					info.satelliteTLogPolicy = IRepPolicyRef(new PolicyOne());
+					info.satelliteTLogPolicy = Reference<IReplicationPolicy>(new PolicyOne());
 				} else if(satelliteReplication == "one_satellite_double") {
 					info.satelliteTLogReplicationFactor = 2;
 					info.satelliteTLogUsableDcs = 1;
 					info.satelliteTLogWriteAntiQuorum = 0;
-					info.satelliteTLogPolicy = IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne())));
+					info.satelliteTLogPolicy = Reference<IReplicationPolicy>(new PolicyAcross(2, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())));
 				} else if(satelliteReplication == "one_satellite_triple") {
 					info.satelliteTLogReplicationFactor = 3;
 					info.satelliteTLogUsableDcs = 1;
 					info.satelliteTLogWriteAntiQuorum = 0;
-					info.satelliteTLogPolicy = IRepPolicyRef(new PolicyAcross(3, "zoneid", IRepPolicyRef(new PolicyOne())));
+					info.satelliteTLogPolicy = Reference<IReplicationPolicy>(new PolicyAcross(3, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())));
 				} else if(satelliteReplication == "two_satellite_safe") {
 					info.satelliteTLogReplicationFactor = 4;
 					info.satelliteTLogUsableDcs = 2;
 					info.satelliteTLogWriteAntiQuorum = 0;
-					info.satelliteTLogPolicy = IRepPolicyRef(new PolicyAcross(2, "dcid", IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne())))));
+					info.satelliteTLogPolicy = Reference<IReplicationPolicy>(new PolicyAcross(2, "dcid", Reference<IReplicationPolicy>(new PolicyAcross(2, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())))));
 					info.satelliteTLogReplicationFactorFallback = 2;
 					info.satelliteTLogUsableDcsFallback = 1;
 					info.satelliteTLogWriteAntiQuorumFallback = 0;
-					info.satelliteTLogPolicyFallback = IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne())));
+					info.satelliteTLogPolicyFallback = Reference<IReplicationPolicy>(new PolicyAcross(2, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())));
 				} else if(satelliteReplication == "two_satellite_fast") {
 					info.satelliteTLogReplicationFactor = 4;
 					info.satelliteTLogUsableDcs = 2;
 					info.satelliteTLogWriteAntiQuorum = 2;
-					info.satelliteTLogPolicy = IRepPolicyRef(new PolicyAcross(2, "dcid", IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne())))));
+					info.satelliteTLogPolicy = Reference<IReplicationPolicy>(new PolicyAcross(2, "dcid", Reference<IReplicationPolicy>(new PolicyAcross(2, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())))));
 					info.satelliteTLogReplicationFactorFallback = 2;
 					info.satelliteTLogUsableDcsFallback = 1;
 					info.satelliteTLogWriteAntiQuorumFallback = 0;
-					info.satelliteTLogPolicyFallback = IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne())));
+					info.satelliteTLogPolicyFallback = Reference<IReplicationPolicy>(new PolicyAcross(2, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())));
 				} else {
 					throw invalid_option();
 				}
@@ -141,20 +141,20 @@ void parse( std::vector<RegionInfo>* regions, ValueRef const& v ) {
 
 void DatabaseConfiguration::setDefaultReplicationPolicy() {
 	if(!storagePolicy) {
-		storagePolicy = IRepPolicyRef(new PolicyAcross(storageTeamSize, "zoneid", IRepPolicyRef(new PolicyOne())));
+		storagePolicy = Reference<IReplicationPolicy>(new PolicyAcross(storageTeamSize, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())));
 	}
 	if(!tLogPolicy) {
-		tLogPolicy = IRepPolicyRef(new PolicyAcross(tLogReplicationFactor, "zoneid", IRepPolicyRef(new PolicyOne())));
+		tLogPolicy = Reference<IReplicationPolicy>(new PolicyAcross(tLogReplicationFactor, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())));
 	}
 	if(remoteTLogReplicationFactor > 0 && !remoteTLogPolicy) {
-		remoteTLogPolicy = IRepPolicyRef(new PolicyAcross(remoteTLogReplicationFactor, "zoneid", IRepPolicyRef(new PolicyOne())));
+		remoteTLogPolicy = Reference<IReplicationPolicy>(new PolicyAcross(remoteTLogReplicationFactor, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())));
 	}
 	for(auto& r : regions) {
 		if(r.satelliteTLogReplicationFactor > 0 && !r.satelliteTLogPolicy) {
-			r.satelliteTLogPolicy = IRepPolicyRef(new PolicyAcross(r.satelliteTLogReplicationFactor, "zoneid", IRepPolicyRef(new PolicyOne())));
+			r.satelliteTLogPolicy = Reference<IReplicationPolicy>(new PolicyAcross(r.satelliteTLogReplicationFactor, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())));
 		}
 		if(r.satelliteTLogReplicationFactorFallback > 0 && !r.satelliteTLogPolicyFallback) {
-			r.satelliteTLogPolicyFallback = IRepPolicyRef(new PolicyAcross(r.satelliteTLogReplicationFactorFallback, "zoneid", IRepPolicyRef(new PolicyOne())));
+			r.satelliteTLogPolicyFallback = Reference<IReplicationPolicy>(new PolicyAcross(r.satelliteTLogReplicationFactorFallback, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())));
 		}
 	}
 }
diff --git a/fdbclient/DatabaseConfiguration.h b/fdbclient/DatabaseConfiguration.h
index 5df38f1fb2..18bf0b0352 100644
--- a/fdbclient/DatabaseConfiguration.h
+++ b/fdbclient/DatabaseConfiguration.h
@@ -49,13 +49,13 @@ struct RegionInfo {
 	Key dcId;
 	int32_t priority;
 
-	IRepPolicyRef satelliteTLogPolicy;
+	Reference<IReplicationPolicy> satelliteTLogPolicy;
 	int32_t satelliteDesiredTLogCount;
 	int32_t satelliteTLogReplicationFactor;
 	int32_t satelliteTLogWriteAntiQuorum;
 	int32_t satelliteTLogUsableDcs;
 
-	IRepPolicyRef satelliteTLogPolicyFallback;
+	Reference<IReplicationPolicy> satelliteTLogPolicyFallback;
 	int32_t satelliteTLogReplicationFactorFallback;
 	int32_t satelliteTLogWriteAntiQuorumFallback;
 	int32_t satelliteTLogUsableDcsFallback;
@@ -157,7 +157,7 @@ struct DatabaseConfiguration {
 	int32_t autoResolverCount;
 
 	// TLogs
-	IRepPolicyRef tLogPolicy;
+	Reference<IReplicationPolicy> tLogPolicy;
 	int32_t desiredTLogCount;
 	int32_t autoDesiredTLogCount;
 	int32_t tLogWriteAntiQuorum;
@@ -167,7 +167,7 @@ struct DatabaseConfiguration {
 	TLogSpillType tLogSpillType;
 
 	// Storage Servers
-	IRepPolicyRef storagePolicy;
+	Reference<IReplicationPolicy> storagePolicy;
 	int32_t storageTeamSize;
 	KeyValueStoreType storageServerStoreType;
 
@@ -175,7 +175,7 @@ struct DatabaseConfiguration {
 	int32_t desiredLogRouterCount;
 	int32_t remoteDesiredTLogCount;
 	int32_t remoteTLogReplicationFactor;
-	IRepPolicyRef remoteTLogPolicy;
+	Reference<IReplicationPolicy> remoteTLogPolicy;
 
 	//Data centers
 	int32_t usableRegions;
@@ -195,7 +195,7 @@ struct DatabaseConfiguration {
 		if(desired == -1) return autoDesiredTLogCount; return desired;
 	}
 	int32_t getRemoteTLogReplicationFactor() const { if(remoteTLogReplicationFactor == 0) return tLogReplicationFactor; return remoteTLogReplicationFactor; }
-	IRepPolicyRef getRemoteTLogPolicy() const { if(remoteTLogReplicationFactor == 0) return tLogPolicy; return remoteTLogPolicy; }
+	Reference<IReplicationPolicy> getRemoteTLogPolicy() const { if(remoteTLogReplicationFactor == 0) return tLogPolicy; return remoteTLogPolicy; }
 
 	bool operator == ( DatabaseConfiguration const& rhs ) const {
 		const_cast<DatabaseConfiguration*>(this)->makeConfigurationImmutable();
diff --git a/fdbclient/ManagementAPI.actor.cpp b/fdbclient/ManagementAPI.actor.cpp
index fa54b5b391..04cbbeb45e 100644
--- a/fdbclient/ManagementAPI.actor.cpp
+++ b/fdbclient/ManagementAPI.actor.cpp
@@ -99,42 +99,42 @@ std::map<std::string, std::string> configForToken( std::string const& mode ) {
 	}
 
 	std::string redundancy, log_replicas;
-	IRepPolicyRef storagePolicy;
-	IRepPolicyRef tLogPolicy;
+	Reference<IReplicationPolicy> storagePolicy;
+	Reference<IReplicationPolicy> tLogPolicy;
 
 	bool redundancySpecified = true;
 	if (mode == "single") {
 		redundancy="1";
 		log_replicas="1";
-		storagePolicy = tLogPolicy = IRepPolicyRef(new PolicyOne());
+		storagePolicy = tLogPolicy = Reference<IReplicationPolicy>(new PolicyOne());
 
 	} else if(mode == "double" || mode == "fast_recovery_double") {
 		redundancy="2";
 		log_replicas="2";
-		storagePolicy = tLogPolicy = IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne())));
+		storagePolicy = tLogPolicy = Reference<IReplicationPolicy>(new PolicyAcross(2, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())));
 	} else if(mode == "triple" || mode == "fast_recovery_triple") {
 		redundancy="3";
 		log_replicas="3";
-		storagePolicy = tLogPolicy = IRepPolicyRef(new PolicyAcross(3, "zoneid", IRepPolicyRef(new PolicyOne())));
+		storagePolicy = tLogPolicy = Reference<IReplicationPolicy>(new PolicyAcross(3, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())));
 	} else if(mode == "three_datacenter" || mode == "multi_dc") {
 		redundancy="6";
 		log_replicas="4";
-		storagePolicy = IRepPolicyRef(new PolicyAcross(3, "dcid",
-			IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne())))
+		storagePolicy = Reference<IReplicationPolicy>(new PolicyAcross(3, "dcid",
+			Reference<IReplicationPolicy>(new PolicyAcross(2, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())))
 		));
-		tLogPolicy = IRepPolicyRef(new PolicyAcross(2, "dcid",
-			IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne())))
+		tLogPolicy = Reference<IReplicationPolicy>(new PolicyAcross(2, "dcid",
+			Reference<IReplicationPolicy>(new PolicyAcross(2, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())))
 		));
 	} else if(mode == "three_datacenter_fallback") {
 		redundancy="4";
 		log_replicas="4";
-		storagePolicy = tLogPolicy = IRepPolicyRef(new PolicyAcross(2, "dcid", IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne())))));
+		storagePolicy = tLogPolicy = Reference<IReplicationPolicy>(new PolicyAcross(2, "dcid", Reference<IReplicationPolicy>(new PolicyAcross(2, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())))));
 	} else if(mode == "three_data_hall") {
 		redundancy="3";
 		log_replicas="4";
-		storagePolicy = IRepPolicyRef(new PolicyAcross(3, "data_hall", IRepPolicyRef(new PolicyOne())));
-		tLogPolicy = IRepPolicyRef(new PolicyAcross(2, "data_hall",
-			IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne())))
+		storagePolicy = Reference<IReplicationPolicy>(new PolicyAcross(3, "data_hall", Reference<IReplicationPolicy>(new PolicyOne())));
+		tLogPolicy = Reference<IReplicationPolicy>(new PolicyAcross(2, "data_hall",
+			Reference<IReplicationPolicy>(new PolicyAcross(2, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())))
 		));
 	} else
 		redundancySpecified = false;
@@ -154,29 +154,29 @@ std::map<std::string, std::string> configForToken( std::string const& mode ) {
 	}
 
 	std::string remote_redundancy, remote_log_replicas;
-	IRepPolicyRef remoteTLogPolicy;
+	Reference<IReplicationPolicy> remoteTLogPolicy;
 	bool remoteRedundancySpecified = true;
 	if (mode == "remote_default") {
 		remote_redundancy="0";
 		remote_log_replicas="0";
-		remoteTLogPolicy = IRepPolicyRef();
+		remoteTLogPolicy = Reference<IReplicationPolicy>();
 	} else if (mode == "remote_single") {
 		remote_redundancy="1";
 		remote_log_replicas="1";
-		remoteTLogPolicy = IRepPolicyRef(new PolicyOne());
+		remoteTLogPolicy = Reference<IReplicationPolicy>(new PolicyOne());
 	} else if(mode == "remote_double") {
 		remote_redundancy="2";
 		remote_log_replicas="2";
-		remoteTLogPolicy = IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne())));
+		remoteTLogPolicy = Reference<IReplicationPolicy>(new PolicyAcross(2, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())));
 	} else if(mode == "remote_triple") {
 		remote_redundancy="3";
 		remote_log_replicas="3";
-		remoteTLogPolicy = IRepPolicyRef(new PolicyAcross(3, "zoneid", IRepPolicyRef(new PolicyOne())));
+		remoteTLogPolicy = Reference<IReplicationPolicy>(new PolicyAcross(3, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())));
 	} else if(mode == "remote_three_data_hall") { //FIXME: not tested in simulation
 		remote_redundancy="3";
 		remote_log_replicas="4";
-		remoteTLogPolicy = IRepPolicyRef(new PolicyAcross(2, "data_hall",
-			IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne())))
+		remoteTLogPolicy = Reference<IReplicationPolicy>(new PolicyAcross(2, "data_hall",
+			Reference<IReplicationPolicy>(new PolicyAcross(2, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())))
 		));
 	} else
 		remoteRedundancySpecified = false;
@@ -212,7 +212,7 @@ ConfigurationResult::Type buildConfiguration( std::vector<StringRef> const& mode
 	auto p = configKeysPrefix.toString();
 	if(!outConf.count(p + "storage_replication_policy") && outConf.count(p + "storage_replicas")) {
 		int storageCount = stoi(outConf[p + "storage_replicas"]);
-		IRepPolicyRef storagePolicy = IRepPolicyRef(new PolicyAcross(storageCount, "zoneid", IRepPolicyRef(new PolicyOne())));
+		Reference<IReplicationPolicy> storagePolicy = Reference<IReplicationPolicy>(new PolicyAcross(storageCount, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())));
 		BinaryWriter policyWriter(IncludeVersion());
 		serializeReplicationPolicy(policyWriter, storagePolicy);
 		outConf[p+"storage_replication_policy"] = policyWriter.toStringRef().toString();
@@ -220,7 +220,7 @@ ConfigurationResult::Type buildConfiguration( std::vector<StringRef> const& mode
 
 	if(!outConf.count(p + "log_replication_policy") && outConf.count(p + "log_replicas")) {
 		int logCount = stoi(outConf[p + "log_replicas"]);
-		IRepPolicyRef logPolicy = IRepPolicyRef(new PolicyAcross(logCount, "zoneid", IRepPolicyRef(new PolicyOne())));
+		Reference<IReplicationPolicy> logPolicy = Reference<IReplicationPolicy>(new PolicyAcross(logCount, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())));
 		BinaryWriter policyWriter(IncludeVersion());
 		serializeReplicationPolicy(policyWriter, logPolicy);
 		outConf[p+"log_replication_policy"] = policyWriter.toStringRef().toString();
diff --git a/fdbrpc/Replication.h b/fdbrpc/Replication.h
index 828ca1fd42..e8e32b79fa 100644
--- a/fdbrpc/Replication.h
+++ b/fdbrpc/Replication.h
@@ -36,23 +36,23 @@ public:
 	virtual void delref() { ReferenceCounted<LocalitySet>::delref(); }
 
 	bool selectReplicas(
-		IRepPolicyRef const&								policy,
+		Reference<IReplicationPolicy> const&								policy,
 		std::vector<LocalityEntry> const&		alsoServers,
 		std::vector<LocalityEntry>	&				results)
 	{
-		LocalitySetRef	fromServers = LocalitySetRef::addRef(this);
+		Reference<LocalitySet>	fromServers = Reference<LocalitySet>::addRef(this);
 		return policy->selectReplicas(fromServers, alsoServers, results);
 	}
 
 	bool selectReplicas(
-		IRepPolicyRef const&								policy,
+		Reference<IReplicationPolicy> const&								policy,
 		std::vector<LocalityEntry>	&				results)
 	{	return selectReplicas(policy, std::vector<LocalityEntry>(), results);	}
 
 	bool validate(
-		IRepPolicyRef const&								policy) const
+		Reference<IReplicationPolicy> const&								policy) const
 	{
-		LocalitySetRef const	solutionSet = LocalitySetRef::addRef((LocalitySet*) this);
+		Reference<LocalitySet> const	solutionSet = Reference<LocalitySet>::addRef((LocalitySet*) this);
 		return policy->validate(solutionSet);
 	}
 
@@ -159,7 +159,7 @@ public:
 	}
 
 	static void staticDisplayEntries(
-		LocalitySetRef		const&		fromServers,
+		Reference<LocalitySet>		const&		fromServers,
 		std::vector<LocalityEntry> const&		entryArray,
 		const char*													name = "zone")
 	{
@@ -174,8 +174,8 @@ public:
 	// the specified value for the given key
 	// The returned LocalitySet contains the LocalityRecords that have the same value as
 	// the indexValue under the same indexKey (e.g., zoneid)
-	LocalitySetRef restrict(AttribKey indexKey, AttribValue indexValue ) {
-		LocalitySetRef	localitySet;
+	Reference<LocalitySet> restrict(AttribKey indexKey, AttribValue indexValue ) {
+		Reference<LocalitySet>	localitySet;
 		LocalityCacheRecord			searchRecord(AttribRecord(indexKey, indexValue), localitySet);
 		auto itKeyValue = std::lower_bound(_cacheArray.begin(), _cacheArray.end(), searchRecord, LocalityCacheRecord::compareKeyValue);
 
@@ -185,7 +185,7 @@ public:
 			localitySet = itKeyValue->_resultset;
 		}
 		else {
-			localitySet = LocalitySetRef(new LocalitySet(*_localitygroup));
+			localitySet = Reference<LocalitySet>(new LocalitySet(*_localitygroup));
 			_cachemisses ++;
 			// If the key is not within the current key set, skip it because no items within
 			// the current entry array has the key
@@ -213,8 +213,8 @@ public:
 	}
 
 	// This function is used to create an subset containing the specified entries
-	LocalitySetRef restrict(std::vector<LocalityEntry> const&	entryArray) {
-		LocalitySetRef	localitySet(new LocalitySet(*_localitygroup));
+	Reference<LocalitySet> restrict(std::vector<LocalityEntry> const&	entryArray) {
+		Reference<LocalitySet>	localitySet(new LocalitySet(*_localitygroup));
 		for (auto& entry : entryArray) {
 			localitySet->add(getRecordViaEntry(entry), *this);
 		}
@@ -453,8 +453,8 @@ protected:
 	// This class stores the cache record for each entry within the locality set
 	struct LocalityCacheRecord {
 		AttribRecord							_attribute;
-		LocalitySetRef		_resultset;
-		LocalityCacheRecord(AttribRecord const& attribute, LocalitySetRef resultset):_attribute(attribute),_resultset(resultset){}
+		Reference<LocalitySet>		_resultset;
+		LocalityCacheRecord(AttribRecord const& attribute, Reference<LocalitySet> resultset):_attribute(attribute),_resultset(resultset){}
 		LocalityCacheRecord(LocalityCacheRecord const& source):_attribute(source._attribute),_resultset(source._resultset){}
 		virtual ~LocalityCacheRecord(){}
 		LocalityCacheRecord& operator=(LocalityCacheRecord const& source) {
@@ -584,7 +584,7 @@ struct LocalityMap : public LocalityGroup  {
 	virtual ~LocalityMap() {}
 
 	bool selectReplicas(
-		IRepPolicyRef const&								policy,
+		Reference<IReplicationPolicy> const&								policy,
 		std::vector<LocalityEntry> const&		alsoServers,
 		std::vector<LocalityEntry>&					entryResults,
 		std::vector<V*>	&										results)
@@ -601,7 +601,7 @@ struct LocalityMap : public LocalityGroup  {
 	}
 
 	bool selectReplicas(
-		IRepPolicyRef const&								policy,
+		Reference<IReplicationPolicy> const&								policy,
 		std::vector<LocalityEntry> const&		alsoServers,
 		std::vector<V*>	&										results)
 	{
@@ -610,7 +610,7 @@ struct LocalityMap : public LocalityGroup  {
 	}
 
 	bool selectReplicas(
-		IRepPolicyRef const&								policy,
+		Reference<IReplicationPolicy> const&								policy,
 		std::vector<V*>	&										results)
 	{	return selectReplicas(policy, std::vector<LocalityEntry>(), results);	}
 
diff --git a/fdbrpc/ReplicationPolicy.cpp b/fdbrpc/ReplicationPolicy.cpp
index 070b8dd767..59b8f511d1 100644
--- a/fdbrpc/ReplicationPolicy.cpp
+++ b/fdbrpc/ReplicationPolicy.cpp
@@ -24,14 +24,14 @@
 
 
 bool IReplicationPolicy::selectReplicas(
-	LocalitySetRef &										fromServers,
+	Reference<LocalitySet> &										fromServers,
 	std::vector<LocalityEntry>	&				results )
 {
 	return selectReplicas(fromServers, std::vector<LocalityEntry>(), results);
 }
 
 bool IReplicationPolicy::validate(
-	LocalitySetRef const&								solutionSet ) const
+	Reference<LocalitySet> const&								solutionSet ) const
 {
 	return validate(solutionSet->getEntries(), solutionSet);
 }
@@ -40,7 +40,7 @@ bool IReplicationPolicy::validateFull(
 	bool																solved,
 	std::vector<LocalityEntry>	const&	solutionSet,
 	std::vector<LocalityEntry> const&		alsoServers,
-	LocalitySetRef const&				fromServers )
+	Reference<LocalitySet> const&				fromServers )
 {
 	bool	valid = true;
 	std::vector<LocalityEntry>	totalSolution(solutionSet);
@@ -105,7 +105,7 @@ bool IReplicationPolicy::validateFull(
 }
 
 bool PolicyOne::selectReplicas(
-	LocalitySetRef	&						fromServers,
+	Reference<LocalitySet>	&						fromServers,
 	std::vector<LocalityEntry> const&		alsoServers,
 	std::vector<LocalityEntry>	&				results )
 {
@@ -131,12 +131,12 @@ bool PolicyOne::selectReplicas(
 
 bool PolicyOne::validate(
 	std::vector<LocalityEntry>	const&	solutionSet,
-	LocalitySetRef const&				fromServers ) const
+	Reference<LocalitySet> const&				fromServers ) const
 {
 	return ((solutionSet.size() > 0) && (fromServers->size() > 0));
 }
 
-PolicyAcross::PolicyAcross(int count, std::string const& attribKey, IRepPolicyRef const policy):
+PolicyAcross::PolicyAcross(int count, std::string const& attribKey, Reference<IReplicationPolicy> const policy):
 	_count(count),_attribKey(attribKey),_policy(policy)
 {
 	return;
@@ -150,7 +150,7 @@ PolicyAcross::~PolicyAcross()
 // Debug purpose only
 // Trace all record entries to help debug
 // fromServers is the servers locality to be printed out.
-void IReplicationPolicy::traceLocalityRecords(LocalitySetRef const& fromServers) {
+void IReplicationPolicy::traceLocalityRecords(Reference<LocalitySet> const& fromServers) {
 	std::vector<Reference<LocalityRecord>> const& recordArray = fromServers->getRecordArray();
 	TraceEvent("LocalityRecordArray").detail("Size", recordArray.size());
 	for (auto& record : recordArray) {
@@ -158,7 +158,7 @@ void IReplicationPolicy::traceLocalityRecords(LocalitySetRef const& fromServers)
 	}
 }
 
-void IReplicationPolicy::traceOneLocalityRecord(Reference<LocalityRecord> record, LocalitySetRef const& fromServers) {
+void IReplicationPolicy::traceOneLocalityRecord(Reference<LocalityRecord> record, Reference<LocalitySet> const& fromServers) {
 	int localityEntryIndex = record->_entryIndex._id;
 	Reference<KeyValueMap> const& dataMap = record->_dataMap;
 	std::vector<AttribRecord> const& keyValueArray = dataMap->_keyvaluearray;
@@ -185,7 +185,7 @@ void IReplicationPolicy::traceOneLocalityRecord(Reference<LocalityRecord> record
 // return true if the team satisfies the policy; false otherwise
 bool PolicyAcross::validate(
 		std::vector<LocalityEntry>	const&	solutionSet,
-		LocalitySetRef const&				fromServers ) const
+		Reference<LocalitySet> const&				fromServers ) const
 {
 	bool			valid = true;
 	int				count = 0;
@@ -262,7 +262,7 @@ bool PolicyAcross::validate(
 // that should be excluded from being selected as replicas.
 // FIXME: Simplify this function, such as removing unnecessary printf
 bool PolicyAcross::selectReplicas(
-	LocalitySetRef	&						fromServers,
+	Reference<LocalitySet>	&						fromServers,
 	std::vector<LocalityEntry> const&		alsoServers,
 	std::vector<LocalityEntry>	&				results )
 {
@@ -437,7 +437,7 @@ bool PolicyAcross::selectReplicas(
 
 bool PolicyAnd::validate(
 	std::vector<LocalityEntry>	const&	solutionSet,
-	LocalitySetRef const&				fromServers ) const
+	Reference<LocalitySet> const&				fromServers ) const
 {
 	bool valid = true;
 	for (auto& policy : _policies) {
@@ -450,7 +450,7 @@ bool PolicyAnd::validate(
 }
 
 bool PolicyAnd::selectReplicas(
-	LocalitySetRef	&						fromServers,
+	Reference<LocalitySet>	&						fromServers,
 	std::vector<LocalityEntry> const&		alsoServers,
 	std::vector<LocalityEntry>	&				results )
 {
@@ -486,26 +486,26 @@ bool PolicyAnd::selectReplicas(
 	return passed;
 }
 
-void testPolicySerialization(IRepPolicyRef& policy) {
+void testPolicySerialization(Reference<IReplicationPolicy>& policy) {
 	std::string	policyInfo = policy->info();
 
 	BinaryWriter writer(IncludeVersion());
 	serializeReplicationPolicy(writer, policy);
 
 	BinaryReader reader(writer.getData(), writer.getLength(), IncludeVersion());
-	IRepPolicyRef copy;
+	Reference<IReplicationPolicy> copy;
 	serializeReplicationPolicy(reader, copy);
 
 	ASSERT(policy->info() == copy->info());
 }
 
 void testReplicationPolicy(int nTests) {
-	IRepPolicyRef policy = IRepPolicyRef(new PolicyAcross(1, "data_hall", IRepPolicyRef(new PolicyOne())));
+	Reference<IReplicationPolicy> policy = Reference<IReplicationPolicy>(new PolicyAcross(1, "data_hall", Reference<IReplicationPolicy>(new PolicyOne())));
 	testPolicySerialization(policy);
 
-	policy = IRepPolicyRef(new PolicyAnd({
-		IRepPolicyRef(new PolicyAcross(2, "data_center", IRepPolicyRef(new PolicyAcross(3, "rack", IRepPolicyRef(new PolicyOne()))))),
-		IRepPolicyRef(new PolicyAcross(2, "data_center", IRepPolicyRef(new PolicyAcross(2, "data_hall", IRepPolicyRef(new PolicyOne())))))
+	policy = Reference<IReplicationPolicy>(new PolicyAnd({
+		Reference<IReplicationPolicy>(new PolicyAcross(2, "data_center", Reference<IReplicationPolicy>(new PolicyAcross(3, "rack", Reference<IReplicationPolicy>(new PolicyOne()))))),
+		Reference<IReplicationPolicy>(new PolicyAcross(2, "data_center", Reference<IReplicationPolicy>(new PolicyAcross(2, "data_hall", Reference<IReplicationPolicy>(new PolicyOne())))))
 	}));
 
 	testPolicySerialization(policy);
diff --git a/fdbrpc/ReplicationPolicy.h b/fdbrpc/ReplicationPolicy.h
index 74bc0baa80..74ccdbb312 100644
--- a/fdbrpc/ReplicationPolicy.h
+++ b/fdbrpc/ReplicationPolicy.h
@@ -26,7 +26,7 @@
 #include "fdbrpc/ReplicationTypes.h"
 
 template <class Ar>
-void serializeReplicationPolicy(Ar& ar, IRepPolicyRef& policy);
+void serializeReplicationPolicy(Ar& ar, Reference<IReplicationPolicy>& policy);
 extern void testReplicationPolicy(int nTests);
 
 
@@ -40,36 +40,36 @@ struct IReplicationPolicy : public ReferenceCounted<IReplicationPolicy> {
 		virtual int maxResults() const = 0;
 		virtual int depth() const = 0;
 		virtual bool selectReplicas(
-			LocalitySetRef &										fromServers,
+			Reference<LocalitySet> &										fromServers,
 			std::vector<LocalityEntry> const&		alsoServers,
 			std::vector<LocalityEntry>	&				results ) = 0;
-	    virtual void traceLocalityRecords(LocalitySetRef const& fromServers);
-	    virtual void traceOneLocalityRecord(Reference<LocalityRecord> record, LocalitySetRef const& fromServers);
+	    virtual void traceLocalityRecords(Reference<LocalitySet> const& fromServers);
+	    virtual void traceOneLocalityRecord(Reference<LocalityRecord> record, Reference<LocalitySet> const& fromServers);
 	    virtual bool validate(
 			std::vector<LocalityEntry>	const&	solutionSet,
-			LocalitySetRef const&								fromServers ) const = 0;
+			Reference<LocalitySet> const&								fromServers ) const = 0;
 
 		bool operator == ( const IReplicationPolicy& r ) const { return info() == r.info(); }
 		bool operator != ( const IReplicationPolicy& r ) const { return info() != r.info(); }
 
 		template <class Ar>
 		void serialize(Ar& ar) {
-			IRepPolicyRef	refThis(this);
+			Reference<IReplicationPolicy>	refThis(this);
 			serializeReplicationPolicy(ar, refThis);
 			refThis->delref_no_destroy();
 		}
 
 		// Utility functions
 		bool selectReplicas(
-			LocalitySetRef &										fromServers,
+			Reference<LocalitySet> &										fromServers,
 			std::vector<LocalityEntry>	&				results );
 		bool validate(
-			LocalitySetRef const&								solutionSet ) const;
+			Reference<LocalitySet> const&								solutionSet ) const;
 		bool validateFull(
 			bool																solved,
 			std::vector<LocalityEntry>	const&	solutionSet,
 			std::vector<LocalityEntry> const&		alsoServers,
-			LocalitySetRef const&								fromServers );
+			Reference<LocalitySet> const&								fromServers );
 
 		// Returns a set of the attributes that this policy uses in selection and validation.
 		std::set<std::string> attributeKeys() const
@@ -78,7 +78,7 @@ struct IReplicationPolicy : public ReferenceCounted<IReplicationPolicy> {
 };
 
 template <class Archive>
-inline void load( Archive& ar, IRepPolicyRef& value ) {
+inline void load( Archive& ar, Reference<IReplicationPolicy>& value ) {
 	bool	present = (value.getPtr());
 	ar >> present;
 	if (present) {
@@ -90,11 +90,11 @@ inline void load( Archive& ar, IRepPolicyRef& value ) {
 }
 
 template <class Archive>
-inline void save( Archive& ar, const IRepPolicyRef& value ) {
+inline void save( Archive& ar, const Reference<IReplicationPolicy>& value ) {
 	bool	present = (value.getPtr());
 	ar << present;
 	if (present) {
-		serializeReplicationPolicy(ar, (IRepPolicyRef&) value);
+		serializeReplicationPolicy(ar, (Reference<IReplicationPolicy>&) value);
 	}
 }
 
@@ -107,9 +107,9 @@ struct PolicyOne : IReplicationPolicy, public ReferenceCounted<PolicyOne> {
 	virtual int depth() const { return 1; }
 	virtual bool validate(
 		std::vector<LocalityEntry>	const&	solutionSet,
-		LocalitySetRef const&				fromServers ) const;
+		Reference<LocalitySet> const&				fromServers ) const;
 	virtual bool selectReplicas(
-		LocalitySetRef	&						fromServers,
+		Reference<LocalitySet>	&						fromServers,
 		std::vector<LocalityEntry> const&		alsoServers,
 		std::vector<LocalityEntry>	&				results );
 	template <class Ar>
@@ -119,7 +119,7 @@ struct PolicyOne : IReplicationPolicy, public ReferenceCounted<PolicyOne> {
 };
 
 struct PolicyAcross : IReplicationPolicy, public ReferenceCounted<PolicyAcross> {
-	PolicyAcross(int count, std::string const& attribKey, IRepPolicyRef const policy);
+	PolicyAcross(int count, std::string const& attribKey, Reference<IReplicationPolicy> const policy);
 	virtual ~PolicyAcross();
 	virtual std::string name() const { return "Across"; }
 	virtual std::string info() const
@@ -128,9 +128,9 @@ struct PolicyAcross : IReplicationPolicy, public ReferenceCounted<PolicyAcross>
 	virtual int depth() const  { return 1 + _policy->depth(); }
 	virtual bool validate(
 		std::vector<LocalityEntry>	const&	solutionSet,
-		LocalitySetRef const&				fromServers ) const;
+		Reference<LocalitySet> const&				fromServers ) const;
 	virtual bool selectReplicas(
-		LocalitySetRef	&						fromServers,
+		Reference<LocalitySet>	&						fromServers,
 		std::vector<LocalityEntry> const&		alsoServers,
 		std::vector<LocalityEntry>	&				results );
 
@@ -149,18 +149,18 @@ struct PolicyAcross : IReplicationPolicy, public ReferenceCounted<PolicyAcross>
 protected:
 	int																_count;
 	std::string												_attribKey;
-	IRepPolicyRef								_policy;
+	Reference<IReplicationPolicy>								_policy;
 
 	// Cache temporary members
 	std::vector<AttribValue>					_usedValues;
 	std::vector<LocalityEntry>				_newResults;
-	LocalitySetRef										_selected;
+	Reference<LocalitySet>										_selected;
 	VectorRef<std::pair<int,int>>			_addedResults;
 	Arena															_arena;
 };
 
 struct PolicyAnd : IReplicationPolicy, public ReferenceCounted<PolicyAnd> {
-	PolicyAnd(std::vector<IRepPolicyRef> policies): _policies(policies), _sortedPolicies(policies)
+	PolicyAnd(std::vector<Reference<IReplicationPolicy>> policies): _policies(policies), _sortedPolicies(policies)
 	{
 		// Sort the policy array
 		std::sort(_sortedPolicies.begin(), _sortedPolicies.end(), PolicyAnd::comparePolicy);
@@ -194,14 +194,14 @@ struct PolicyAnd : IReplicationPolicy, public ReferenceCounted<PolicyAnd> {
 	}
 	virtual bool validate(
 		std::vector<LocalityEntry>	const&	solutionSet,
-		LocalitySetRef const&				fromServers ) const;
+		Reference<LocalitySet> const&				fromServers ) const;
 
 	virtual bool selectReplicas(
-		LocalitySetRef	&						fromServers,
+		Reference<LocalitySet>	&						fromServers,
 		std::vector<LocalityEntry> const&		alsoServers,
 		std::vector<LocalityEntry>	&				results );
 
-	static bool comparePolicy(const IRepPolicyRef& rhs, const IRepPolicyRef& lhs)
+	static bool comparePolicy(const Reference<IReplicationPolicy>& rhs, const Reference<IReplicationPolicy>& lhs)
 	{ return (lhs->maxResults() < rhs->maxResults()) || (!(rhs->maxResults() < lhs->maxResults()) && (lhs->depth() < rhs->depth())); }
 
 	template <class Ar>
@@ -219,18 +219,18 @@ struct PolicyAnd : IReplicationPolicy, public ReferenceCounted<PolicyAnd> {
 	}
 
 	virtual void attributeKeys(std::set<std::string> *set) const override
-	{ for (const IRepPolicyRef& r : _policies) { r->attributeKeys(set); } }
+	{ for (const Reference<IReplicationPolicy>& r : _policies) { r->attributeKeys(set); } }
 
 protected:
-	std::vector<IRepPolicyRef>			_policies;
-	std::vector<IRepPolicyRef>			_sortedPolicies;
+	std::vector<Reference<IReplicationPolicy>>			_policies;
+	std::vector<Reference<IReplicationPolicy>>			_sortedPolicies;
 };
 
 extern int testReplication();
 
 
 template <class Ar>
-void serializeReplicationPolicy(Ar& ar, IRepPolicyRef& policy) {
+void serializeReplicationPolicy(Ar& ar, Reference<IReplicationPolicy>& policy) {
 	if(Ar::isDeserializing) {
 		StringRef name;
 		serializer(ar, name);
@@ -238,20 +238,20 @@ void serializeReplicationPolicy(Ar& ar, IRepPolicyRef& policy) {
 		if(name == LiteralStringRef("One")) {
 			PolicyOne* pointer = new PolicyOne();
 			pointer->serialize(ar);
-			policy = IRepPolicyRef(pointer);
+			policy = Reference<IReplicationPolicy>(pointer);
 		}
 		else if(name == LiteralStringRef("Across")) {
-			PolicyAcross* pointer = new PolicyAcross(0, "", IRepPolicyRef());
+			PolicyAcross* pointer = new PolicyAcross(0, "", Reference<IReplicationPolicy>());
 			pointer->serialize(ar);
-			policy = IRepPolicyRef(pointer);
+			policy = Reference<IReplicationPolicy>(pointer);
 		}
 		else if(name == LiteralStringRef("And")) {
 			PolicyAnd* pointer = new PolicyAnd({});
 			pointer->serialize(ar);
-			policy = IRepPolicyRef(pointer);
+			policy = Reference<IReplicationPolicy>(pointer);
 		}
 		else if(name == LiteralStringRef("None")) {
-			policy = IRepPolicyRef();
+			policy = Reference<IReplicationPolicy>();
 		}
 		else {
 			TraceEvent(SevError, "SerializingInvalidPolicyType")
diff --git a/fdbrpc/ReplicationTypes.h b/fdbrpc/ReplicationTypes.h
index ef5463f54b..9a9f517d15 100644
--- a/fdbrpc/ReplicationTypes.h
+++ b/fdbrpc/ReplicationTypes.h
@@ -34,9 +34,6 @@ struct LocalityRecord;
 struct StringToIntMap;
 struct IReplicationPolicy;
 
-typedef Reference<LocalitySet>	LocalitySetRef;
-typedef Reference<IReplicationPolicy>	IRepPolicyRef;
-
 extern int g_replicationdebug;
 
 struct AttribKey {
diff --git a/fdbrpc/ReplicationUtils.cpp b/fdbrpc/ReplicationUtils.cpp
index ae92fd7950..d2c7e734a0 100644
--- a/fdbrpc/ReplicationUtils.cpp
+++ b/fdbrpc/ReplicationUtils.cpp
@@ -27,8 +27,8 @@
 
 
 double ratePolicy(
-	LocalitySetRef &					localitySet,
-	IRepPolicyRef	const&			policy,
+	Reference<LocalitySet> &					localitySet,
+	Reference<IReplicationPolicy>	const&			policy,
 	unsigned int							nTestTotal)
 {
 	double	rating = -1.0;
@@ -85,14 +85,14 @@ double ratePolicy(
 
 bool findBestPolicySet(
 	std::vector<LocalityEntry>&	bestResults,
-	LocalitySetRef &						localitySet,
-	IRepPolicyRef	const&				policy,
+	Reference<LocalitySet> &						localitySet,
+	Reference<IReplicationPolicy>	const&				policy,
 	unsigned int								nMinItems,
 	unsigned int								nSelectTests,
 	unsigned int								nPolicyTests)
 {
 	bool													bSucceeded = true;
-	LocalitySetRef								bestLocalitySet, testLocalitySet;
+	Reference<LocalitySet>								bestLocalitySet, testLocalitySet;
 	std::vector<LocalityEntry>		results;
 	double												testRate, bestRate = -1.0;
 
@@ -162,15 +162,15 @@ bool findBestPolicySet(
 
 bool findBestUniquePolicySet(
 	std::vector<LocalityEntry>&	bestResults,
-	LocalitySetRef &						localitySet,
-	IRepPolicyRef	const&				policy,
+	Reference<LocalitySet> &						localitySet,
+	Reference<IReplicationPolicy>	const&				policy,
 	StringRef										localityUniquenessKey,
 	unsigned int								nMinItems,
 	unsigned int								nSelectTests,
 	unsigned int								nPolicyTests)
 {
 	bool													bSucceeded = true;
-	LocalitySetRef								bestLocalitySet, testLocalitySet;
+	Reference<LocalitySet>								bestLocalitySet, testLocalitySet;
 	std::vector<LocalityEntry>		results;
 	double												testRate, bestRate = -1.0;
 
@@ -262,7 +262,7 @@ bool findBestUniquePolicySet(
 bool validateAllCombinations(
 	std::vector<LocalityData> &				offendingCombo,
 	LocalityGroup const&							localitySet,
-	IRepPolicyRef	const&							policy,
+	Reference<IReplicationPolicy>	const&							policy,
 	std::vector<LocalityData> const&	newItems,
 	unsigned int											nCombinationSize,
 	bool															bCheckIfValid)
@@ -286,12 +286,12 @@ bool validateAllCombinations(
 	}
 	else
 	{
-		bool					bIsValidGroup;
+		bool bIsValidGroup;
 		LocalityGroup	localityGroup;
 		std::string bitmask(nCombinationSize, 1); // K leading 1's
 
 		bitmask.resize(newItems.size(), 0); // N-K trailing 0's
-
+		
 		do
 		{
 			localityGroup.deep_copy(localitySet);
@@ -337,7 +337,7 @@ bool validateAllCombinations(
 
 bool validateAllCombinations(
 	LocalityGroup const&							localitySet,
-	IRepPolicyRef	const&							policy,
+	Reference<IReplicationPolicy>	const&							policy,
 	std::vector<LocalityData> const&	newItems,
 	unsigned int											nCombinationSize,
 	bool															bCheckIfValid)
@@ -358,10 +358,10 @@ repTestType	convertToTestType(int	iValue) {
 	return sValue;
 }
 
-LocalitySetRef	createTestLocalityMap(std::vector<repTestType>& indexes, int dcTotal,
+Reference<LocalitySet>	createTestLocalityMap(std::vector<repTestType>& indexes, int dcTotal,
 	int szTotal, int rackTotal, int slotTotal, int independentItems, int independentTotal)
 {
-	LocalitySetRef			buildServer(new LocalityMap<repTestType>());
+	Reference<LocalitySet>			buildServer(new LocalityMap<repTestType>());
 	LocalityMap<repTestType>*		serverMap = (LocalityMap<repTestType>*) buildServer.getPtr();
 	int													serverValue, dcLoop, szLoop, rackLoop, slotLoop;
 	std::string									dcText, szText, rackText, slotText, independentName, independentText;
@@ -442,8 +442,8 @@ LocalitySetRef	createTestLocalityMap(std::vector<repTestType>& indexes, int dcTo
 }
 
 bool	testPolicy(
-	LocalitySetRef										servers,
-	IRepPolicyRef const&							policy,
+	Reference<LocalitySet>										servers,
+	Reference<IReplicationPolicy> const&							policy,
 	std::vector<LocalityEntry> const& including,
 	bool															validate)
 {
@@ -506,109 +506,109 @@ bool	testPolicy(
 }
 
 bool	testPolicy(
-	LocalitySetRef						servers,
-	IRepPolicyRef const&							policy,
+	Reference<LocalitySet>						servers,
+	Reference<IReplicationPolicy> const&							policy,
 	bool															validate)
 {
 	return testPolicy(servers, policy, emptyEntryArray, validate);
 }
 
 
-std::vector<IRepPolicyRef> const&	getStaticPolicies()
+std::vector<Reference<IReplicationPolicy>> const&	getStaticPolicies()
 {
-	static std::vector<IRepPolicyRef> staticPolicies;
+	static std::vector<Reference<IReplicationPolicy>> staticPolicies;
 
 	if (staticPolicies.empty())
 	{
 			staticPolicies = {
 
-			IRepPolicyRef( new PolicyOne() ),
+			Reference<IReplicationPolicy>( new PolicyOne() ),
 
 			// 1 'dc^2 x 1'
-			IRepPolicyRef( new PolicyAcross(2, "dc", IRepPolicyRef( new PolicyOne() ) ) ),
+			Reference<IReplicationPolicy>( new PolicyAcross(2, "dc", Reference<IReplicationPolicy>( new PolicyOne() ) ) ),
 
 			// 2 'dc^3 x 1'
-			IRepPolicyRef( new PolicyAcross(3, "dc", IRepPolicyRef( new PolicyOne() ) ) ),
+			Reference<IReplicationPolicy>( new PolicyAcross(3, "dc", Reference<IReplicationPolicy>( new PolicyOne() ) ) ),
 
 			// 3 'sz^3 x 1'
-			IRepPolicyRef( new PolicyAcross(3, "sz", IRepPolicyRef( new PolicyOne() ) ) ),
+			Reference<IReplicationPolicy>( new PolicyAcross(3, "sz", Reference<IReplicationPolicy>( new PolicyOne() ) ) ),
 
 			// 4 'dc^1 x az^3 x 1'
-			IRepPolicyRef( new PolicyAcross(1, "dc", IRepPolicyRef( new PolicyAcross(3, "az", IRepPolicyRef( new PolicyOne() ))) ) ),
+			Reference<IReplicationPolicy>( new PolicyAcross(1, "dc", Reference<IReplicationPolicy>( new PolicyAcross(3, "az", Reference<IReplicationPolicy>( new PolicyOne() ))) ) ),
 
 			// 5 '(sz^3 x rack^2 x 1) + (dc^2 x az^3 x 1)'
-			IRepPolicyRef( new PolicyAnd( { IRepPolicyRef(new PolicyAcross(3, "sz", IRepPolicyRef(new PolicyAcross(2, "rack", IRepPolicyRef(new PolicyOne() ))))), IRepPolicyRef(new PolicyAcross(2, "dc", IRepPolicyRef(new PolicyAcross(3, "az", IRepPolicyRef(new PolicyOne()) ))) )} ) ),
+			Reference<IReplicationPolicy>( new PolicyAnd( { Reference<IReplicationPolicy>(new PolicyAcross(3, "sz", Reference<IReplicationPolicy>(new PolicyAcross(2, "rack", Reference<IReplicationPolicy>(new PolicyOne() ))))), Reference<IReplicationPolicy>(new PolicyAcross(2, "dc", Reference<IReplicationPolicy>(new PolicyAcross(3, "az", Reference<IReplicationPolicy>(new PolicyOne()) ))) )} ) ),
 
 			// 6 '(sz^1 x 1)'
-			IRepPolicyRef( new PolicyAcross(1, "sz", IRepPolicyRef(new PolicyOne())) ),
+			Reference<IReplicationPolicy>( new PolicyAcross(1, "sz", Reference<IReplicationPolicy>(new PolicyOne())) ),
 
 			// 7 '(sz^1 x 1) + (sz^1 x 1)'
-			IRepPolicyRef( new PolicyAnd( { IRepPolicyRef(new PolicyAcross(1, "sz", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(1, "sz", IRepPolicyRef(new PolicyOne()))) } ) ),
+			Reference<IReplicationPolicy>( new PolicyAnd( { Reference<IReplicationPolicy>(new PolicyAcross(1, "sz", Reference<IReplicationPolicy>(new PolicyOne()))), Reference<IReplicationPolicy>(new PolicyAcross(1, "sz", Reference<IReplicationPolicy>(new PolicyOne()))) } ) ),
 
 			// 8 '(sz^2 x 1) + (sz^2 x 1)'
-			IRepPolicyRef( new PolicyAnd( { IRepPolicyRef(new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))) } ) ),
+			Reference<IReplicationPolicy>( new PolicyAnd( { Reference<IReplicationPolicy>(new PolicyAcross(2, "sz", Reference<IReplicationPolicy>(new PolicyOne()))), Reference<IReplicationPolicy>(new PolicyAcross(2, "sz", Reference<IReplicationPolicy>(new PolicyOne()))) } ) ),
 
 			// 9 '(dc^1 x sz^2 x 1)'
-			IRepPolicyRef( new PolicyAcross(1, "dc", IRepPolicyRef( new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))))),
+			Reference<IReplicationPolicy>( new PolicyAcross(1, "dc", Reference<IReplicationPolicy>( new PolicyAcross(2, "sz", Reference<IReplicationPolicy>(new PolicyOne()))))),
 
 			//10 '(dc^2 x sz^2 x 1)'
-			IRepPolicyRef( new PolicyAcross(2, "dc", IRepPolicyRef( new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))))),
+			Reference<IReplicationPolicy>( new PolicyAcross(2, "dc", Reference<IReplicationPolicy>( new PolicyAcross(2, "sz", Reference<IReplicationPolicy>(new PolicyOne()))))),
 
 			//11 '(dc^1 x sz^2 x 1) + (dc^2 x sz^2 x 1)'
-			IRepPolicyRef( new PolicyAnd( { IRepPolicyRef(new PolicyAcross(1, "dc", IRepPolicyRef( new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))))), IRepPolicyRef(new PolicyAcross(2, "dc", IRepPolicyRef( new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))))) } ) ),
+			Reference<IReplicationPolicy>( new PolicyAnd( { Reference<IReplicationPolicy>(new PolicyAcross(1, "dc", Reference<IReplicationPolicy>( new PolicyAcross(2, "sz", Reference<IReplicationPolicy>(new PolicyOne()))))), Reference<IReplicationPolicy>(new PolicyAcross(2, "dc", Reference<IReplicationPolicy>( new PolicyAcross(2, "sz", Reference<IReplicationPolicy>(new PolicyOne()))))) } ) ),
 
 			//12 '(dc^2 x sz^2 x 1) + (dc^1 x sz^2 x 1)'
-			IRepPolicyRef( new PolicyAnd( { IRepPolicyRef(new PolicyAcross(2, "dc", IRepPolicyRef( new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))))), IRepPolicyRef(new PolicyAcross(1, "dc", IRepPolicyRef( new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))))) } ) ),
+			Reference<IReplicationPolicy>( new PolicyAnd( { Reference<IReplicationPolicy>(new PolicyAcross(2, "dc", Reference<IReplicationPolicy>( new PolicyAcross(2, "sz", Reference<IReplicationPolicy>(new PolicyOne()))))), Reference<IReplicationPolicy>(new PolicyAcross(1, "dc", Reference<IReplicationPolicy>( new PolicyAcross(2, "sz", Reference<IReplicationPolicy>(new PolicyOne()))))) } ) ),
 
 			//13 '(sz^2 x 1) + (dc^1 x sz^2 x 1)'
-			IRepPolicyRef( new PolicyAnd( { IRepPolicyRef(new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(1, "dc", IRepPolicyRef( new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))))) } ) ),
+			Reference<IReplicationPolicy>( new PolicyAnd( { Reference<IReplicationPolicy>(new PolicyAcross(2, "sz", Reference<IReplicationPolicy>(new PolicyOne()))), Reference<IReplicationPolicy>(new PolicyAcross(1, "dc", Reference<IReplicationPolicy>( new PolicyAcross(2, "sz", Reference<IReplicationPolicy>(new PolicyOne()))))) } ) ),
 
 			//14 '(sz^2 x 1) + (dc^2 x sz^2 x 1)'
-			IRepPolicyRef( new PolicyAnd( { IRepPolicyRef(new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(2, "dc", IRepPolicyRef( new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))))) } ) ),
+			Reference<IReplicationPolicy>( new PolicyAnd( { Reference<IReplicationPolicy>(new PolicyAcross(2, "sz", Reference<IReplicationPolicy>(new PolicyOne()))), Reference<IReplicationPolicy>(new PolicyAcross(2, "dc", Reference<IReplicationPolicy>( new PolicyAcross(2, "sz", Reference<IReplicationPolicy>(new PolicyOne()))))) } ) ),
 
 			//15 '(sz^3 x 1) + (dc^2 x sz^2 x 1)'
-			IRepPolicyRef( new PolicyAnd( { IRepPolicyRef(new PolicyAcross(3, "sz", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(2, "dc", IRepPolicyRef( new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))))) } ) ),
+			Reference<IReplicationPolicy>( new PolicyAnd( { Reference<IReplicationPolicy>(new PolicyAcross(3, "sz", Reference<IReplicationPolicy>(new PolicyOne()))), Reference<IReplicationPolicy>(new PolicyAcross(2, "dc", Reference<IReplicationPolicy>( new PolicyAcross(2, "sz", Reference<IReplicationPolicy>(new PolicyOne()))))) } ) ),
 
 			//16 '(sz^1 x 1) + (sz^2 x 1)'
-			IRepPolicyRef( new PolicyAnd( { IRepPolicyRef(new PolicyAcross(1, "sz", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))) } ) ),
+			Reference<IReplicationPolicy>( new PolicyAnd( { Reference<IReplicationPolicy>(new PolicyAcross(1, "sz", Reference<IReplicationPolicy>(new PolicyOne()))), Reference<IReplicationPolicy>(new PolicyAcross(2, "sz", Reference<IReplicationPolicy>(new PolicyOne()))) } ) ),
 
 			//17 '(sz^2 x 1) + (sz^3 x 1)'
-			IRepPolicyRef( new PolicyAnd( { IRepPolicyRef(new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(3, "sz", IRepPolicyRef(new PolicyOne()))) } ) ),
+			Reference<IReplicationPolicy>( new PolicyAnd( { Reference<IReplicationPolicy>(new PolicyAcross(2, "sz", Reference<IReplicationPolicy>(new PolicyOne()))), Reference<IReplicationPolicy>(new PolicyAcross(3, "sz", Reference<IReplicationPolicy>(new PolicyOne()))) } ) ),
 
 			//18 '(sz^1 x 1) + (sz^2 x 1) + (sz^3 x 1)'
-			IRepPolicyRef( new PolicyAnd( { IRepPolicyRef(new PolicyAcross(1, "sz", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(3, "sz", IRepPolicyRef(new PolicyOne()))) } ) ),
+			Reference<IReplicationPolicy>( new PolicyAnd( { Reference<IReplicationPolicy>(new PolicyAcross(1, "sz", Reference<IReplicationPolicy>(new PolicyOne()))), Reference<IReplicationPolicy>(new PolicyAcross(2, "sz", Reference<IReplicationPolicy>(new PolicyOne()))), Reference<IReplicationPolicy>(new PolicyAcross(3, "sz", Reference<IReplicationPolicy>(new PolicyOne()))) } ) ),
 
 			//19 '(sz^1 x 1) + (machine^1 x 1)'
-			IRepPolicyRef( new PolicyAnd( { IRepPolicyRef(new PolicyAcross(1, "sz", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(1, "zoneid", IRepPolicyRef(new PolicyOne()))) } ) ),
+			Reference<IReplicationPolicy>( new PolicyAnd( { Reference<IReplicationPolicy>(new PolicyAcross(1, "sz", Reference<IReplicationPolicy>(new PolicyOne()))), Reference<IReplicationPolicy>(new PolicyAcross(1, "zoneid", Reference<IReplicationPolicy>(new PolicyOne()))) } ) ),
 
 			// '(dc^1 x 1) + (sz^1 x 1) + (machine^1 x 1)'
-		//	IRepPolicyRef( new PolicyAnd( { IRepPolicyRef(new PolicyAcross(1, "dc", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(1, "sz", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(1, "zoneid", IRepPolicyRef(new PolicyOne()))) } ) ),
+		//	Reference<IReplicationPolicy>( new PolicyAnd( { Reference<IReplicationPolicy>(new PolicyAcross(1, "dc", Reference<IReplicationPolicy>(new PolicyOne()))), Reference<IReplicationPolicy>(new PolicyAcross(1, "sz", Reference<IReplicationPolicy>(new PolicyOne()))), Reference<IReplicationPolicy>(new PolicyAcross(1, "zoneid", Reference<IReplicationPolicy>(new PolicyOne()))) } ) ),
 
 			// '(dc^1 x sz^3 x 1)'
-			IRepPolicyRef( new PolicyAcross(1, "dc", IRepPolicyRef( new PolicyAcross(3, "sz", IRepPolicyRef(new PolicyOne())))) ),
+			Reference<IReplicationPolicy>( new PolicyAcross(1, "dc", Reference<IReplicationPolicy>( new PolicyAcross(3, "sz", Reference<IReplicationPolicy>(new PolicyOne())))) ),
 
 			// '(dc^2 x sz^3 x 1)'
-			IRepPolicyRef( new PolicyAcross(2, "dc", IRepPolicyRef( new PolicyAcross(3, "sz", IRepPolicyRef(new PolicyOne())))) ),
+			Reference<IReplicationPolicy>( new PolicyAcross(2, "dc", Reference<IReplicationPolicy>( new PolicyAcross(3, "sz", Reference<IReplicationPolicy>(new PolicyOne())))) ),
 
 			// '(dc^2 x az^3 x 1)'
-			IRepPolicyRef( new PolicyAcross(2, "dc", IRepPolicyRef( new PolicyAcross(3, "az", IRepPolicyRef(new PolicyOne())))) ),
+			Reference<IReplicationPolicy>( new PolicyAcross(2, "dc", Reference<IReplicationPolicy>( new PolicyAcross(3, "az", Reference<IReplicationPolicy>(new PolicyOne())))) ),
 
 			// '(sz^1 x 1) + (dc^2 x az^3 x 1)'
-			IRepPolicyRef( new PolicyAnd({IRepPolicyRef(new PolicyAcross(1, "sz", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(2, "dc", IRepPolicyRef( new PolicyAcross(3, "az", IRepPolicyRef(new PolicyOne())))))}) ),
+			Reference<IReplicationPolicy>( new PolicyAnd({Reference<IReplicationPolicy>(new PolicyAcross(1, "sz", Reference<IReplicationPolicy>(new PolicyOne()))), Reference<IReplicationPolicy>(new PolicyAcross(2, "dc", Reference<IReplicationPolicy>( new PolicyAcross(3, "az", Reference<IReplicationPolicy>(new PolicyOne())))))}) ),
 
 			// 'dc^1 x (az^2 x 1) + (sz^2 x 1)'
-		//	IRepPolicyRef( new PolicyAcross(1, "dc", IRepPolicyRef(new PolicyAnd({IRepPolicyRef(new PolicyAcross(2, "az", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne())))}))) ),
+		//	Reference<IReplicationPolicy>( new PolicyAcross(1, "dc", Reference<IReplicationPolicy>(new PolicyAnd({Reference<IReplicationPolicy>(new PolicyAcross(2, "az", Reference<IReplicationPolicy>(new PolicyOne()))), Reference<IReplicationPolicy>(new PolicyAcross(2, "sz", Reference<IReplicationPolicy>(new PolicyOne())))}))) ),
 
 			// Require backtracking
-			IRepPolicyRef( new PolicyAcross(8, "zoneid", IRepPolicyRef(new PolicyAcross(1, "az", IRepPolicyRef(new PolicyOne()))) ) ),
-			IRepPolicyRef( new PolicyAcross(8, "zoneid", IRepPolicyRef(new PolicyAcross(1, "sz", IRepPolicyRef(new PolicyOne()))) ) )
+			Reference<IReplicationPolicy>( new PolicyAcross(8, "zoneid", Reference<IReplicationPolicy>(new PolicyAcross(1, "az", Reference<IReplicationPolicy>(new PolicyOne()))) ) ),
+			Reference<IReplicationPolicy>( new PolicyAcross(8, "zoneid", Reference<IReplicationPolicy>(new PolicyAcross(1, "sz", Reference<IReplicationPolicy>(new PolicyOne()))) ) )
 		};
 	}
 	return staticPolicies;
 }
 
 
-IRepPolicyRef const randomAcrossPolicy(LocalitySet const&	serverSet)
+Reference<IReplicationPolicy> const randomAcrossPolicy(LocalitySet const&	serverSet)
 {
 	int	usedKeyTotal, keysUsed, keyIndex, valueTotal, maxValueTotal, maxKeyTotal, skips, lastKeyIndex;
 	std::vector<std::string>	keyArray(serverSet.getGroupKeyMap()->_lookuparray);
@@ -616,7 +616,7 @@ IRepPolicyRef const randomAcrossPolicy(LocalitySet const&	serverSet)
 	AttribKey									indexKey;
 	Optional<AttribValue>			keyValue;
 	std::string								keyText;
-	IRepPolicyRef							policy(new PolicyOne());
+	Reference<IReplicationPolicy>							policy(new PolicyOne());
 
 	// Determine the number of keys to used within the policy
 	usedKeyTotal = g_random->randomInt(1, keyArray.size()+1);
@@ -669,7 +669,7 @@ IRepPolicyRef const randomAcrossPolicy(LocalitySet const&	serverSet)
 			}
 			valueTotal = g_random->randomInt(1, valueSet.size()+2);
 			if ((valueTotal > maxValueTotal) && (g_random->random01() > .25)) valueTotal = maxValueTotal;
-			policy = IRepPolicyRef( new PolicyAcross(valueTotal, keyText, policy) );
+			policy = Reference<IReplicationPolicy>( new PolicyAcross(valueTotal, keyText, policy) );
 			if (g_replicationdebug > 1) {
 				printf("  item%3d: (%3d =>%3d) %-10s  =>%4d\n", keysUsed+1, keyIndex, indexKey._id, keyText.c_str(), valueTotal);
 			}
@@ -725,8 +725,8 @@ int testReplication()
 	int											policyMin = policyMinEnv ? atoi(policyMinEnv) : 2;
 	int											policyIndex, testCounter, alsoSize, debugBackup, maxAlsoSize;
 	std::vector<repTestType>				serverIndexes;
-	LocalitySetRef	testServers;
-	std::vector<IRepPolicyRef>	policies;
+	Reference<LocalitySet>	testServers;
+	std::vector<Reference<IReplicationPolicy>>	policies;
 	std::vector<LocalityEntry>	alsoServers, bestSet;
 	int											totalErrors = 0;
 
@@ -819,12 +819,12 @@ void filterLocalityDataForPolicy(const std::set<std::string>& keys, LocalityData
 }
 }
 
-void filterLocalityDataForPolicy(IRepPolicyRef policy, LocalityData* ld) {
+void filterLocalityDataForPolicy(Reference<IReplicationPolicy> policy, LocalityData* ld) {
 	if (!policy) return;
 	filterLocalityDataForPolicy(policy->attributeKeys(), ld);
 }
 
-void filterLocalityDataForPolicy(IRepPolicyRef policy, std::vector<LocalityData>* vld) {
+void filterLocalityDataForPolicy(Reference<IReplicationPolicy> policy, std::vector<LocalityData>* vld) {
 	if (!policy) return;
 	std::set<std::string> keys = policy->attributeKeys();
 	for (LocalityData& ld : *vld) {
diff --git a/fdbrpc/ReplicationUtils.h b/fdbrpc/ReplicationUtils.h
index f359e7489f..f9f1987e78 100644
--- a/fdbrpc/ReplicationUtils.h
+++ b/fdbrpc/ReplicationUtils.h
@@ -34,22 +34,22 @@ extern repTestType	convertToTestType(int	iValue);
 extern int testReplication();
 
 extern double ratePolicy(
-	LocalitySetRef &					localitySet,
-	IRepPolicyRef	const&			policy,
+	Reference<LocalitySet> &					localitySet,
+	Reference<IReplicationPolicy>	const&			policy,
 	unsigned int							nSelectTests);
 
 extern bool findBestPolicySet(
 	std::vector<LocalityEntry>&	bestResults,
-	LocalitySetRef &						localitySet,
-	IRepPolicyRef	const&				policy,
+	Reference<LocalitySet> &						localitySet,
+	Reference<IReplicationPolicy>	const&				policy,
 	unsigned int								nMinItems,
 	unsigned int								nSelectTests,
 	unsigned int								nPolicyTests);
 
 extern bool findBestUniquePolicySet(
 	std::vector<LocalityEntry>&	bestResults,
-	LocalitySetRef &						localitySet,
-	IRepPolicyRef	const&				policy,
+	Reference<LocalitySet> &						localitySet,
+	Reference<IReplicationPolicy>	const&				policy,
 	StringRef										localityUniquenessKey,
 	unsigned int								nMinItems,
 	unsigned int								nSelectTests,
@@ -60,20 +60,20 @@ extern bool findBestUniquePolicySet(
 extern bool validateAllCombinations(
 	std::vector<LocalityData> &				offendingCombo,
 	LocalityGroup const&							localitySet,
-	IRepPolicyRef	const&							policy,
+	Reference<IReplicationPolicy>	const&							policy,
 	std::vector<LocalityData> const&	newItems,
 	unsigned int											nCombinationSize,
 	bool															bCheckIfValid = true);
 
 extern bool validateAllCombinations(
 	LocalityGroup const&							localitySet,
-	IRepPolicyRef	const&							policy,
+	Reference<IReplicationPolicy>	const&							policy,
 	std::vector<LocalityData> const&	newItems,
 	unsigned int											nCombinationSize,
 	bool															bCheckIfValid = true);
 
 /// Remove all pieces of locality information from the LocalityData that will not be used when validating the policy.
-void filterLocalityDataForPolicy(IRepPolicyRef policy, LocalityData* ld);
-void filterLocalityDataForPolicy(IRepPolicyRef policy, std::vector<LocalityData>* vld);
+void filterLocalityDataForPolicy(Reference<IReplicationPolicy> policy, LocalityData* ld);
+void filterLocalityDataForPolicy(Reference<IReplicationPolicy> policy, std::vector<LocalityData>* vld);
 
 #endif
diff --git a/fdbrpc/simulator.h b/fdbrpc/simulator.h
index 2987c80655..7cb645e70f 100644
--- a/fdbrpc/simulator.h
+++ b/fdbrpc/simulator.h
@@ -280,11 +280,11 @@ public:
 	std::set<NetworkAddress> protectedAddresses;
 	std::map<NetworkAddress, ProcessInfo*> currentlyRebootingProcesses;
 	class ClusterConnectionString* extraDB;
-	IRepPolicyRef storagePolicy;
-	IRepPolicyRef tLogPolicy;
+	Reference<IReplicationPolicy> storagePolicy;
+	Reference<IReplicationPolicy> tLogPolicy;
 	int32_t tLogWriteAntiQuorum;
 	Optional<Standalone<StringRef>> primaryDcId;
-	IRepPolicyRef remoteTLogPolicy;
+	Reference<IReplicationPolicy> remoteTLogPolicy;
 	int32_t usableRegions;
 	std::string disablePrimary;
 	std::string disableRemote;
@@ -292,8 +292,8 @@ public:
 	bool allowLogSetKills;
 	Optional<Standalone<StringRef>> remoteDcId;
 	bool hasSatelliteReplication;
-	IRepPolicyRef satelliteTLogPolicy;
-	IRepPolicyRef satelliteTLogPolicyFallback;
+	Reference<IReplicationPolicy> satelliteTLogPolicy;
+	Reference<IReplicationPolicy> satelliteTLogPolicyFallback;
 	int32_t satelliteTLogWriteAntiQuorum;
 	int32_t satelliteTLogWriteAntiQuorumFallback;
 	std::vector<Optional<Standalone<StringRef>>> primarySatelliteDcIds;
diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp
index ca805e0db1..40556f0c40 100644
--- a/fdbserver/ClusterController.actor.cpp
+++ b/fdbserver/ClusterController.actor.cpp
@@ -234,10 +234,10 @@ public:
 		throw no_more_servers();
 	}
 
-	std::vector<WorkerDetails> getWorkersForSeedServers( DatabaseConfiguration const& conf, IRepPolicyRef const& policy, Optional<Optional<Standalone<StringRef>>> const& dcId = Optional<Optional<Standalone<StringRef>>>() ) {
+	std::vector<WorkerDetails> getWorkersForSeedServers( DatabaseConfiguration const& conf, Reference<IReplicationPolicy> const& policy, Optional<Optional<Standalone<StringRef>>> const& dcId = Optional<Optional<Standalone<StringRef>>>() ) {
 		std::map<ProcessClass::Fitness, vector<WorkerDetails>> fitness_workers;
 		std::vector<WorkerDetails> results;
-		LocalitySetRef logServerSet = Reference<LocalitySet>(new LocalityMap<WorkerDetails>());
+		Reference<LocalitySet> logServerSet = Reference<LocalitySet>(new LocalityMap<WorkerDetails>());
 		LocalityMap<WorkerDetails>* logServerMap = (LocalityMap<WorkerDetails>*) logServerSet.getPtr();
 		bool bCompleted = false;
 
@@ -275,11 +275,11 @@ public:
 		return results;
 	}
 
-	std::vector<WorkerDetails> getWorkersForTlogs( DatabaseConfiguration const& conf, int32_t required, int32_t desired, IRepPolicyRef const& policy, std::map< Optional<Standalone<StringRef>>, int>& id_used, bool checkStable = false, std::set<Optional<Key>> dcIds = std::set<Optional<Key>>() ) {
+	std::vector<WorkerDetails> getWorkersForTlogs( DatabaseConfiguration const& conf, int32_t required, int32_t desired, Reference<IReplicationPolicy> const& policy, std::map< Optional<Standalone<StringRef>>, int>& id_used, bool checkStable = false, std::set<Optional<Key>> dcIds = std::set<Optional<Key>>() ) {
 		std::map<std::pair<ProcessClass::Fitness,bool>, vector<WorkerDetails>> fitness_workers;
 		std::vector<WorkerDetails> results;
 		std::vector<LocalityData> unavailableLocals;
-		LocalitySetRef logServerSet;
+		Reference<LocalitySet> logServerSet;
 		LocalityMap<WorkerDetails>* logServerMap;
 		bool bCompleted = false;
 
diff --git a/fdbserver/DBCoreState.h b/fdbserver/DBCoreState.h
index 1b6d3e3bc1..ebaaff8c73 100644
--- a/fdbserver/DBCoreState.h
+++ b/fdbserver/DBCoreState.h
@@ -41,7 +41,7 @@ struct CoreTLogSet {
 	int32_t tLogWriteAntiQuorum; // The write anti quorum previously used to write to tLogs, which might be different from the anti quorum suggested by the current configuration going forward!
 	int32_t tLogReplicationFactor; // The replication factor previously used to write to tLogs, which might be different from the current configuration
 	std::vector< LocalityData > tLogLocalities; // Stores the localities of the log servers
-	IRepPolicyRef tLogPolicy;
+	Reference<IReplicationPolicy> tLogPolicy;
 	bool isLocal;
 	int8_t locality;
 	Version startVersion;
diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp
index f57e5c6b13..ead66847d1 100644
--- a/fdbserver/DataDistribution.actor.cpp
+++ b/fdbserver/DataDistribution.actor.cpp
@@ -3698,7 +3698,7 @@ ACTOR Future<Void> dataDistributor(DataDistributorInterface di, Reference<AsyncV
 	return Void();
 }
 
-DDTeamCollection* testTeamCollection(int teamSize, IRepPolicyRef policy, int processCount) {
+DDTeamCollection* testTeamCollection(int teamSize, Reference<IReplicationPolicy> policy, int processCount) {
 	Database database = DatabaseContext::create(
 		Reference<AsyncVar<ClientDBInfo>>(new AsyncVar<ClientDBInfo>()),
 		Never(),
@@ -3740,7 +3740,7 @@ DDTeamCollection* testTeamCollection(int teamSize, IRepPolicyRef policy, int pro
 	return collection;
 }
 
-DDTeamCollection* testMachineTeamCollection(int teamSize, IRepPolicyRef policy, int processCount) {
+DDTeamCollection* testMachineTeamCollection(int teamSize, Reference<IReplicationPolicy> policy, int processCount) {
 	Database database = DatabaseContext::create(Reference<AsyncVar<ClientDBInfo>>(new AsyncVar<ClientDBInfo>()),
 	                                            Never(), LocalityData(), false);
 
@@ -3792,7 +3792,7 @@ TEST_CASE("DataDistribution/AddTeamsBestOf/UseMachineID") {
 	int desiredTeams = SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * processSize;
 	int maxTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * processSize;
 
-	IRepPolicyRef policy = IRepPolicyRef(new PolicyAcross(teamSize, "zoneid", IRepPolicyRef(new PolicyOne())));
+	Reference<IReplicationPolicy> policy = Reference<IReplicationPolicy>(new PolicyAcross(teamSize, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())));
 	state DDTeamCollection* collection = testMachineTeamCollection(teamSize, policy, processSize);
 
 	int result = collection->addTeamsBestOf(30, desiredTeams, maxTeams);
@@ -3812,7 +3812,7 @@ TEST_CASE("DataDistribution/AddTeamsBestOf/NotUseMachineID") {
 	int desiredTeams = SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * processSize;
 	int maxTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * processSize;
 
-	IRepPolicyRef policy = IRepPolicyRef(new PolicyAcross(teamSize, "zoneid", IRepPolicyRef(new PolicyOne())));
+	Reference<IReplicationPolicy> policy = Reference<IReplicationPolicy>(new PolicyAcross(teamSize, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())));
 	state DDTeamCollection* collection = testMachineTeamCollection(teamSize, policy, processSize);
 
 	if (collection == NULL) {
@@ -3830,7 +3830,7 @@ TEST_CASE("DataDistribution/AddTeamsBestOf/NotUseMachineID") {
 }
 
 TEST_CASE("DataDistribution/AddAllTeams/isExhaustive") {
-	IRepPolicyRef policy = IRepPolicyRef(new PolicyAcross(3, "zoneid", IRepPolicyRef(new PolicyOne())));
+	Reference<IReplicationPolicy> policy = Reference<IReplicationPolicy>(new PolicyAcross(3, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())));
 	state int processSize = 10;
 	state int desiredTeams = SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * processSize;
 	state int maxTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * processSize;
@@ -3849,7 +3849,7 @@ TEST_CASE("DataDistribution/AddAllTeams/isExhaustive") {
 }
 
 TEST_CASE("/DataDistribution/AddAllTeams/withLimit") {
-	IRepPolicyRef policy = IRepPolicyRef(new PolicyAcross(3, "zoneid", IRepPolicyRef(new PolicyOne())));
+	Reference<IReplicationPolicy> policy = Reference<IReplicationPolicy>(new PolicyAcross(3, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())));
 	state int processSize = 10;
 	state int desiredTeams = SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * processSize;
 	state int maxTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * processSize;
@@ -3867,7 +3867,7 @@ TEST_CASE("/DataDistribution/AddAllTeams/withLimit") {
 
 TEST_CASE("/DataDistribution/AddTeamsBestOf/SkippingBusyServers") {
 	wait(Future<Void>(Void()));
-	IRepPolicyRef policy = IRepPolicyRef(new PolicyAcross(3, "zoneid", IRepPolicyRef(new PolicyOne())));
+	Reference<IReplicationPolicy> policy = Reference<IReplicationPolicy>(new PolicyAcross(3, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())));
 	state int processSize = 10;
 	state int desiredTeams = SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * processSize;
 	state int maxTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * processSize;
@@ -3897,7 +3897,7 @@ TEST_CASE("/DataDistribution/AddTeamsBestOf/SkippingBusyServers") {
 TEST_CASE("/DataDistribution/AddTeamsBestOf/NotEnoughServers") {
 	wait(Future<Void>(Void()));
 
-	IRepPolicyRef policy = IRepPolicyRef(new PolicyAcross(3, "zoneid", IRepPolicyRef(new PolicyOne())));
+	Reference<IReplicationPolicy> policy = Reference<IReplicationPolicy>(new PolicyAcross(3, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())));
 	state int processSize = 5;
 	state int desiredTeams = SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * processSize;
 	state int maxTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * processSize;
diff --git a/fdbserver/LogSystem.h b/fdbserver/LogSystem.h
index 8b8dc7e8dc..3ed53b0475 100644
--- a/fdbserver/LogSystem.h
+++ b/fdbserver/LogSystem.h
@@ -40,8 +40,8 @@ public:
 	int32_t tLogReplicationFactor;
 	std::vector< LocalityData > tLogLocalities; // Stores the localities of the log servers
 	TLogVersion tLogVersion;
-	IRepPolicyRef tLogPolicy;
-	LocalitySetRef logServerSet;
+	Reference<IReplicationPolicy> tLogPolicy;
+	Reference<LocalitySet> logServerSet;
 	std::vector<int> logIndexArray;
 	std::vector<LocalityEntry> logEntryArray;
 	bool isLocal;
@@ -84,7 +84,7 @@ public:
 			used_servers.insert(std::make_pair(0,i));
 		}
 
-		LocalitySetRef serverSet = Reference<LocalitySet>(new LocalityMap<std::pair<int,int>>());
+		Reference<LocalitySet> serverSet = Reference<LocalitySet>(new LocalityMap<std::pair<int,int>>());
 		LocalityMap<std::pair<int,int>>* serverMap = (LocalityMap<std::pair<int,int>>*) serverSet.getPtr();
 		std::vector<std::pair<int,int>> resultPairs;
 		for(int loc = 0; loc < satelliteTagLocations.size(); loc++) {
@@ -189,7 +189,7 @@ public:
 	void updateLocalitySet( vector<LocalityData> const& localities ) {
 		LocalityMap<int>* logServerMap;
 
-		logServerSet = LocalitySetRef(new LocalityMap<int>());
+		logServerSet = Reference<LocalitySet>(new LocalityMap<int>());
 		logServerMap = (LocalityMap<int>*) logServerSet.getPtr();
 
 		logEntryArray.clear();
@@ -412,7 +412,7 @@ struct ILogSystem {
 		int tLogReplicationFactor;
 
 		MergedPeekCursor( vector< Reference<ILogSystem::IPeekCursor> > const& serverCursors, Version begin );
-		MergedPeekCursor( std::vector<Reference<AsyncVar<OptionalInterface<TLogInterface>>>> const& logServers, int bestServer, int readQuorum, Tag tag, Version begin, Version end, bool parallelGetMore, std::vector<LocalityData> const& tLogLocalities, IRepPolicyRef const tLogPolicy, int tLogReplicationFactor );
+		MergedPeekCursor( std::vector<Reference<AsyncVar<OptionalInterface<TLogInterface>>>> const& logServers, int bestServer, int readQuorum, Tag tag, Version begin, Version end, bool parallelGetMore, std::vector<LocalityData> const& tLogLocalities, Reference<IReplicationPolicy> const tLogPolicy, int tLogReplicationFactor );
 		MergedPeekCursor( vector< Reference<IPeekCursor> > const& serverCursors, LogMessageVersion const& messageVersion, int bestServer, int readQuorum, Optional<LogMessageVersion> nextVersion, Reference<LogSet> logSet, int tLogReplicationFactor );
 
 		virtual Reference<IPeekCursor> cloneNoMore();
diff --git a/fdbserver/LogSystemConfig.h b/fdbserver/LogSystemConfig.h
index 3c24dc84b5..6890726579 100644
--- a/fdbserver/LogSystemConfig.h
+++ b/fdbserver/LogSystemConfig.h
@@ -61,7 +61,7 @@ struct TLogSet {
 	int32_t tLogWriteAntiQuorum, tLogReplicationFactor;
 	std::vector< LocalityData > tLogLocalities; // Stores the localities of the log servers
 	TLogVersion tLogVersion;
-	IRepPolicyRef tLogPolicy;
+	Reference<IReplicationPolicy> tLogPolicy;
 	bool isLocal;
 	int8_t locality;
 	Version startVersion;
diff --git a/fdbserver/LogSystemPeekCursor.actor.cpp b/fdbserver/LogSystemPeekCursor.actor.cpp
index 1351fb207f..feb4a2e8a0 100644
--- a/fdbserver/LogSystemPeekCursor.actor.cpp
+++ b/fdbserver/LogSystemPeekCursor.actor.cpp
@@ -273,7 +273,7 @@ ILogSystem::MergedPeekCursor::MergedPeekCursor( vector< Reference<ILogSystem::IP
 }
 
 ILogSystem::MergedPeekCursor::MergedPeekCursor( std::vector<Reference<AsyncVar<OptionalInterface<TLogInterface>>>> const& logServers, int bestServer, int readQuorum, Tag tag, Version begin, Version end,
-	bool parallelGetMore, std::vector< LocalityData > const& tLogLocalities, IRepPolicyRef const tLogPolicy, int tLogReplicationFactor )
+	bool parallelGetMore, std::vector< LocalityData > const& tLogLocalities, Reference<IReplicationPolicy> const tLogPolicy, int tLogReplicationFactor )
 	: bestServer(bestServer), readQuorum(readQuorum), tag(tag), currentCursor(0), hasNextMessage(false), messageVersion(begin), randomID(g_random->randomUniqueID()), tLogReplicationFactor(tLogReplicationFactor) {
 	if(tLogPolicy) {
 		logSet = Reference<LogSet>( new LogSet() );
diff --git a/fdbserver/TagPartitionedLogSystem.actor.cpp b/fdbserver/TagPartitionedLogSystem.actor.cpp
index 262d5a3449..3ecaa26e16 100644
--- a/fdbserver/TagPartitionedLogSystem.actor.cpp
+++ b/fdbserver/TagPartitionedLogSystem.actor.cpp
@@ -531,12 +531,12 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
 		}
 		if(begin >= lastBegin) {
 			TraceEvent("TLogPeekRemoteBestOnly", dbgid).detail("Tag", tag.toString()).detail("Begin", begin).detail("BestSet", bestSet).detail("BestSetStart", lastBegin).detail("LogRouterIds", tLogs[bestSet]->logRouterString());
-			return Reference<ILogSystem::MergedPeekCursor>( new ILogSystem::MergedPeekCursor( tLogs[bestSet]->logRouters, -1, (int)tLogs[bestSet]->logRouters.size(), tag, begin, getPeekEnd(), false, std::vector<LocalityData>(), IRepPolicyRef(), 0 ) );
+			return Reference<ILogSystem::MergedPeekCursor>( new ILogSystem::MergedPeekCursor( tLogs[bestSet]->logRouters, -1, (int)tLogs[bestSet]->logRouters.size(), tag, begin, getPeekEnd(), false, std::vector<LocalityData>(), Reference<IReplicationPolicy>(), 0 ) );
 		} else {
 			std::vector< Reference<ILogSystem::IPeekCursor> > cursors;
 			std::vector< LogMessageVersion > epochEnds;
 			TraceEvent("TLogPeekRemoteAddingBest", dbgid).detail("Tag", tag.toString()).detail("Begin", begin).detail("BestSet", bestSet).detail("BestSetStart", lastBegin).detail("LogRouterIds", tLogs[bestSet]->logRouterString());
-			cursors.push_back( Reference<ILogSystem::MergedPeekCursor>( new ILogSystem::MergedPeekCursor( tLogs[bestSet]->logRouters, -1, (int)tLogs[bestSet]->logRouters.size(), tag, lastBegin, getPeekEnd(), false, std::vector<LocalityData>(), IRepPolicyRef(), 0 ) ) );
+			cursors.push_back( Reference<ILogSystem::MergedPeekCursor>( new ILogSystem::MergedPeekCursor( tLogs[bestSet]->logRouters, -1, (int)tLogs[bestSet]->logRouters.size(), tag, lastBegin, getPeekEnd(), false, std::vector<LocalityData>(), Reference<IReplicationPolicy>(), 0 ) ) );
 			int i = 0;
 			while(begin < lastBegin) {
 				if(i == oldLogData.size()) {
@@ -565,7 +565,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
 					TraceEvent("TLogPeekRemoteAddingOldBest", dbgid).detail("Tag", tag.toString()).detail("Begin", begin).detail("BestOldSet", bestOldSet).detail("LogRouterIds", oldLogData[i].tLogs[bestOldSet]->logRouterString())
 					.detail("LastBegin", lastBegin).detail("ThisBegin", thisBegin).detail("BestStartVer", oldLogData[i].tLogs[bestOldSet]->startVersion);
 					cursors.push_back( Reference<ILogSystem::MergedPeekCursor>( new ILogSystem::MergedPeekCursor(  oldLogData[i].tLogs[bestOldSet]->logRouters, -1, (int)oldLogData[i].tLogs[bestOldSet]->logRouters.size(), tag,
-						thisBegin, lastBegin, false, std::vector<LocalityData>(), IRepPolicyRef(), 0 ) ) );
+						thisBegin, lastBegin, false, std::vector<LocalityData>(), Reference<IReplicationPolicy>(), 0 ) ) );
 					epochEnds.push_back(LogMessageVersion(lastBegin));
 					lastBegin = thisBegin;
 				}
@@ -1566,7 +1566,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
 	}
 
 	ACTOR static Future<Void> recruitOldLogRouters( TagPartitionedLogSystem* self, vector<WorkerInterface> workers, LogEpoch recoveryCount, int8_t locality, Version startVersion,
-		std::vector<LocalityData> tLogLocalities, IRepPolicyRef tLogPolicy, bool forRemote ) {
+		std::vector<LocalityData> tLogLocalities, Reference<IReplicationPolicy> tLogPolicy, bool forRemote ) {
 		state vector<vector<Future<TLogInterface>>> logRouterInitializationReplies;
 		state vector<Future<TLogInterface>> allReplies;
 		int nextRouter = 0;
diff --git a/fdbserver/WorkerInterface.actor.h b/fdbserver/WorkerInterface.actor.h
index eef28a8cfe..12481e0596 100644
--- a/fdbserver/WorkerInterface.actor.h
+++ b/fdbserver/WorkerInterface.actor.h
@@ -120,7 +120,7 @@ struct InitializeLogRouterRequest {
 	Tag routerTag;
 	Version startVersion;
 	std::vector<LocalityData> tLogLocalities;
-	IRepPolicyRef tLogPolicy;
+	Reference<IReplicationPolicy> tLogPolicy;
 	int8_t locality;
 	ReplyPromise<struct TLogInterface> reply;
 

From 7f480253486939e3921457e3d8d64764f0566a0b Mon Sep 17 00:00:00 2001
From: Evan Tschannen <ejt@apple.com>
Date: Wed, 13 Mar 2019 14:47:17 -0700
Subject: [PATCH 35/47] optimize confirm epoch alive

---
 fdbserver/TagPartitionedLogSystem.actor.cpp | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/fdbserver/TagPartitionedLogSystem.actor.cpp b/fdbserver/TagPartitionedLogSystem.actor.cpp
index 3ecaa26e16..87da4bcfc1 100644
--- a/fdbserver/TagPartitionedLogSystem.actor.cpp
+++ b/fdbserver/TagPartitionedLogSystem.actor.cpp
@@ -959,24 +959,17 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
 
 		wait( quorum( alive, std::min(logSet->tLogReplicationFactor, numPresent - logSet->tLogWriteAntiQuorum) ) );
 
-		state Reference<LocalityGroup> locked(new LocalityGroup());
-		state std::vector<bool> responded(alive.size());
-		for (int i = 0; i < alive.size(); i++) {
-			responded[i] = false;
-		}
+		state std::vector<LocalityEntry> locked;
+		state std::vector<bool> responded(alive.size(), false);
 		loop {
 			for (int i = 0; i < alive.size(); i++) {
 				if (!responded[i] && alive[i].isReady() && !alive[i].isError()) {
-					locked->add(logSet->tLogLocalities[i]);
+					locked.push_back(logSet->logEntryArray[i]);
 					responded[i] = true;
 				}
 			}
-			bool quorum_obtained = locked->validate(logSet->tLogPolicy);
-			// We intentionally skip considering antiquorums, as the CPU cost of doing so is prohibitive.
-			if (logSet->tLogReplicationFactor == 1 && locked->size() > 0) {
-				ASSERT(quorum_obtained);
-			}
-			if (quorum_obtained) {
+
+			if (logSet->satisfiesPolicy(locked)) {
 				return Void();
 			}
 

From e8cb85ed8e190ead1c8af1e10f5bd6ee6dacd012 Mon Sep 17 00:00:00 2001
From: Evan Tschannen <ejt@apple.com>
Date: Wed, 13 Mar 2019 14:47:35 -0700
Subject: [PATCH 36/47] optimize validateAllCombinations

---
 fdbrpc/ReplicationUtils.cpp | 32 +++++++++++++++++++++++---------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/fdbrpc/ReplicationUtils.cpp b/fdbrpc/ReplicationUtils.cpp
index d2c7e734a0..f4fd1770f7 100644
--- a/fdbrpc/ReplicationUtils.cpp
+++ b/fdbrpc/ReplicationUtils.cpp
@@ -287,24 +287,38 @@ bool validateAllCombinations(
 	else
 	{
 		bool bIsValidGroup;
-		LocalityGroup	localityGroup;
-		std::string bitmask(nCombinationSize, 1); // K leading 1's
+		Reference<LocalitySet> localSet = Reference<LocalitySet>( new LocalityGroup() );
+		LocalityGroup* localGroup = (LocalityGroup*) localSet.getPtr();
+		localGroup->deep_copy(localitySet);
 
+		std::vector<LocalityEntry> originalEntries = localGroup->getEntries();
+
+		for (int i = 0; i < newItems.size(); ++i) {
+			localGroup->add(newItems[i]);
+		}
+		
+		std::string bitmask(nCombinationSize, 1); // K leading 1's
 		bitmask.resize(newItems.size(), 0); // N-K trailing 0's
 		
+		std::vector<LocalityEntry> localityGroupEntries;
+		std::vector<LocalityEntry> resultEntries;
 		do
 		{
-			localityGroup.deep_copy(localitySet);
-
+			localityGroupEntries = originalEntries;
 			// [0..N-1] integers
-			for (int i = 0; i < newItems.size(); ++i) {
+			for (int i = 0; i < bitmask.size(); ++i) {
 				if (bitmask[i]) {
-					localityGroup.add(newItems[i]);
+					localityGroupEntries.push_back(localGroup->getEntry(originalEntries.size() + i));
 				}
 			}
 
-			// Check if the group combination passes validation
-			bIsValidGroup = localityGroup.validate(policy);
+			resultEntries.clear();
+
+			// Run the policy, assert if unable to satisfy
+			bool result = localSet->selectReplicas(policy, localityGroupEntries, resultEntries);
+			ASSERT(result);
+
+			bIsValidGroup = resultEntries.size() == 0;
 
 			if (((bCheckIfValid)	&&
 					 (!bIsValidGroup)	)			||
@@ -319,7 +333,7 @@ bool validateAllCombinations(
 				}
 				if (g_replicationdebug > 2) {
 					printf("Invalid group\n");
-					localityGroup.DisplayEntries();
+					localGroup->DisplayEntries();
 				}
 				if (g_replicationdebug > 3) {
 					printf("Full set\n");

From e7d1f9e5f14d8725ff8dbdee9a705d92e1e389d5 Mon Sep 17 00:00:00 2001
From: Evan Tschannen <ejt@apple.com>
Date: Wed, 13 Mar 2019 15:59:03 -0700
Subject: [PATCH 37/47] fixed review comments

---
 fdbrpc/ReplicationUtils.cpp                 | 8 ++++----
 fdbserver/TagPartitionedLogSystem.actor.cpp | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/fdbrpc/ReplicationUtils.cpp b/fdbrpc/ReplicationUtils.cpp
index f4fd1770f7..6c6099c107 100644
--- a/fdbrpc/ReplicationUtils.cpp
+++ b/fdbrpc/ReplicationUtils.cpp
@@ -291,7 +291,8 @@ bool validateAllCombinations(
 		LocalityGroup* localGroup = (LocalityGroup*) localSet.getPtr();
 		localGroup->deep_copy(localitySet);
 
-		std::vector<LocalityEntry> originalEntries = localGroup->getEntries();
+		std::vector<LocalityEntry> localityGroupEntries = localGroup->getEntries();
+		int originalSize = localityGroupEntries.size();
 
 		for (int i = 0; i < newItems.size(); ++i) {
 			localGroup->add(newItems[i]);
@@ -300,15 +301,14 @@ bool validateAllCombinations(
 		std::string bitmask(nCombinationSize, 1); // K leading 1's
 		bitmask.resize(newItems.size(), 0); // N-K trailing 0's
 		
-		std::vector<LocalityEntry> localityGroupEntries;
 		std::vector<LocalityEntry> resultEntries;
 		do
 		{
-			localityGroupEntries = originalEntries;
+			localityGroupEntries.resize(originalSize);
 			// [0..N-1] integers
 			for (int i = 0; i < bitmask.size(); ++i) {
 				if (bitmask[i]) {
-					localityGroupEntries.push_back(localGroup->getEntry(originalEntries.size() + i));
+					localityGroupEntries.push_back(localGroup->getEntry(originalSize + i));
 				}
 			}
 
diff --git a/fdbserver/TagPartitionedLogSystem.actor.cpp b/fdbserver/TagPartitionedLogSystem.actor.cpp
index 87da4bcfc1..0b1c4787bd 100644
--- a/fdbserver/TagPartitionedLogSystem.actor.cpp
+++ b/fdbserver/TagPartitionedLogSystem.actor.cpp
@@ -959,17 +959,17 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
 
 		wait( quorum( alive, std::min(logSet->tLogReplicationFactor, numPresent - logSet->tLogWriteAntiQuorum) ) );
 
-		state std::vector<LocalityEntry> locked;
+		state std::vector<LocalityEntry> aliveEntries;
 		state std::vector<bool> responded(alive.size(), false);
 		loop {
 			for (int i = 0; i < alive.size(); i++) {
 				if (!responded[i] && alive[i].isReady() && !alive[i].isError()) {
-					locked.push_back(logSet->logEntryArray[i]);
+					aliveEntries.push_back(logSet->logEntryArray[i]);
 					responded[i] = true;
 				}
 			}
 
-			if (logSet->satisfiesPolicy(locked)) {
+			if (logSet->satisfiesPolicy(aliveEntries)) {
 				return Void();
 			}
 

From e30e2af1f3169187843049956082f5cdd508d395 Mon Sep 17 00:00:00 2001
From: Meng Xu <meng_xu@apple.com>
Date: Wed, 13 Mar 2019 16:54:56 -0700
Subject: [PATCH 38/47] ClientKnobs: Add CHECK_CONNECTED_COORDINATOR_NUM_DELAY

---
 fdbclient/Knobs.cpp           | 3 +++
 fdbclient/Knobs.h             | 3 +++
 fdbclient/NativeAPI.actor.cpp | 5 +++--
 fdbserver/Knobs.h             | 1 +
 4 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/fdbclient/Knobs.cpp b/fdbclient/Knobs.cpp
index c166d3aedc..5168333bb0 100644
--- a/fdbclient/Knobs.cpp
+++ b/fdbclient/Knobs.cpp
@@ -192,4 +192,7 @@ ClientKnobs::ClientKnobs(bool randomize) {
 	init( CONSISTENCY_CHECK_RATE_LIMIT_MAX,		  50e6 );
 	init( CONSISTENCY_CHECK_ONE_ROUND_TARGET_COMPLETION_TIME,	7 * 24 * 60 * 60 ); // 7 days
 	init( CONSISTENCY_CHECK_RATE_WINDOW,		  1.0  );
+
+	// TLS related
+	init( CHECK_CONNECTED_COORDINATOR_NUM_DELAY,  1.0 ); if( randomize && BUGGIFY ) CHECK_CONNECTED_COORDINATOR_NUM_DELAY =  g_random->random01() * 60.0; // In seconds
 }
diff --git a/fdbclient/Knobs.h b/fdbclient/Knobs.h
index 7a11ae1616..e7999cc588 100644
--- a/fdbclient/Knobs.h
+++ b/fdbclient/Knobs.h
@@ -183,6 +183,9 @@ public:
 	int CONSISTENCY_CHECK_ONE_ROUND_TARGET_COMPLETION_TIME;
 	int CONSISTENCY_CHECK_RATE_WINDOW;
 
+	// TLS related
+	int CHECK_CONNECTED_COORDINATOR_NUM_DELAY;
+
 	ClientKnobs(bool randomize = false);
 };
 
diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp
index 7e37cfa643..f1ad5a5c93 100644
--- a/fdbclient/NativeAPI.actor.cpp
+++ b/fdbclient/NativeAPI.actor.cpp
@@ -35,6 +35,7 @@
 #include "fdbclient/MutationList.h"
 #include "fdbclient/CoordinationInterface.h"
 #include "fdbclient/MonitorLeader.h"
+#include "fdbclient/Knobs.h"
 #if defined(CMAKE_BUILD) || !defined(WIN32)
 #include "versions.h"
 #endif
@@ -592,7 +593,7 @@ Database DatabaseContext::create(Reference<AsyncVar<Optional<ClusterInterface>>>
 	Reference<AsyncVar<int>> connectedCoordinatorsNumDelayed(new AsyncVar<int>(0));
 	Reference<Cluster> cluster(new Cluster(connFile, clusterInterface, connectedCoordinatorsNum));
 	Reference<AsyncVar<ClientDBInfo>> clientInfo(new AsyncVar<ClientDBInfo>());
-	Future<Void> clientInfoMonitor = delayedAsyncVar(connectedCoordinatorsNum, connectedCoordinatorsNumDelayed, 1.0) || monitorClientInfo(clusterInterface, connFile, clientInfo, connectedCoordinatorsNumDelayed);
+	Future<Void> clientInfoMonitor = delayedAsyncVar(connectedCoordinatorsNum, connectedCoordinatorsNumDelayed, CLIENT_KNOBS->CHECK_CONNECTED_COORDINATOR_NUM_DELAY) || monitorClientInfo(clusterInterface, connFile, clientInfo, connectedCoordinatorsNumDelayed);
 
 	return Database(new DatabaseContext(cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false));
 }
@@ -761,7 +762,7 @@ Database Database::createDatabase( Reference<ClusterConnectionFile> connFile, in
 	Reference<AsyncVar<int>> connectedCoordinatorsNumDelayed(new AsyncVar<int>(0));
 	Reference<Cluster> cluster(new Cluster(connFile, connectedCoordinatorsNum, apiVersion));
 	Reference<AsyncVar<ClientDBInfo>> clientInfo(new AsyncVar<ClientDBInfo>());
-	Future<Void> clientInfoMonitor = delayedAsyncVar(connectedCoordinatorsNum, connectedCoordinatorsNumDelayed, 1.0) || monitorClientInfo(cluster->getClusterInterface(), connFile, clientInfo, connectedCoordinatorsNumDelayed);
+	Future<Void> clientInfoMonitor = delayedAsyncVar(connectedCoordinatorsNum, connectedCoordinatorsNumDelayed, CLIENT_KNOBS->CHECK_CONNECTED_COORDINATOR_NUM_DELAY) || monitorClientInfo(cluster->getClusterInterface(), connFile, clientInfo, connectedCoordinatorsNumDelayed);
 
 	return Database( new DatabaseContext( cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false, apiVersion ) );
 }
diff --git a/fdbserver/Knobs.h b/fdbserver/Knobs.h
index f3698b3561..4fdc8a0152 100644
--- a/fdbserver/Knobs.h
+++ b/fdbserver/Knobs.h
@@ -376,6 +376,7 @@ public:
 	int64_t TIME_KEEPER_DELAY;
 	int64_t TIME_KEEPER_MAX_ENTRIES;
 
+
 	ServerKnobs(bool randomize = false, ClientKnobs* clientKnobs = NULL);
 };
 

From 529068c3e20ad844c4d2b98f44498bebe6c796e4 Mon Sep 17 00:00:00 2001
From: Vishesh Yadav <vishesh_yadav@apple.com>
Date: Wed, 13 Mar 2019 15:34:52 -0700
Subject: [PATCH 39/47] doc: Live TLS migration

---
 documentation/sphinx/source/tls.rst | 48 +++++++++++++++++++++++++++--
 1 file changed, 45 insertions(+), 3 deletions(-)

diff --git a/documentation/sphinx/source/tls.rst b/documentation/sphinx/source/tls.rst
index 1884622498..f5b5c94852 100644
--- a/documentation/sphinx/source/tls.rst
+++ b/documentation/sphinx/source/tls.rst
@@ -29,10 +29,52 @@ This will configure the new cluster to communicate with TLS.
 
 .. note:: Depending on your operating system, version and configuration, there may be a firewall in place that prevents external access to certain ports. If necessary, please consult the appropriate documentation for your OS and ensure that all machines in your cluster can reach the ports configured in your :ref:`configuration file <foundationdb-conf>`.
 
-.. _converting-existing-cluster:
+.. _converting-existing-cluster-after-6.1:
 
-Converting an existing cluster to use TLS
-=========================================
+Converting an existing cluster to use TLS (since v6.1)
+======================================================
+
+Since version 6.1, FoundationDB clusters can be converted to TLS without downtime. FoundationDB server can listen to TLS and unencrypted traffic simultaneously on two separate ports. As a result, FDB clusters can live migrate to TLS:
+
+1) Restart each FoundationDB server individually, but with an additional listen address for TLS traffic::
+
+     /path/to/fdbserver -C fdb.cluster -p 127.0.0.1:4500 -p 127.0.0.1:4600:tls
+
+   Since, the server still listens to unencrypted traffic and the cluster file still contains the old address, rest of the processes will be able to talk to this new process.
+
+2) Once all processes are listening to both TLS and unencrypted traffic, switch one or more coordinator to use TLS. Therefore, if the old coordinator list was ``127.0.0.1:4500,127.0.0.1:4501,127.0.0.1:4502``, the new one would be something like ``127.0.0.1:4600:tls,127.0.0.1:4501,127.0.0.1:4502``. Switching few coordinators to TLS at a time allows a smoother migration and a window to find out clients who do not yet have TLS configured. The number of coordinators each client can connect to can be seen via  ``fdbstatus`` (look for ``connected_coordinators`` field in ``clients``)::
+
+    "clients" : {
+        "count" : 2,
+        "supported_versions" : [
+            {
+                "client_version" : "6.1.0",
+                "connected_clients" : [
+                    {
+                        "address" : "127.0.0.1:42916",
+                        "connected_coordinators": 3,
+                        "log_group" : "default"
+                    },
+                    {
+                        "address" : "127.0.0.1:42918",
+                        "connected_coordinators": 2,
+                        "log_group" : "default"
+                    }
+                ]
+            }, ...
+        ]
+    }
+
+3) If there exist a client (e.g., the client 127.0.0.1:42918 in the above example) that cannot connect to all coordinators after a coordinator is switched to TLS, it mean the client does not set up its TLS correctly. System operator should notify the client to correct the client's TLS configuration. Otherwise, when all coordinators are switched to TLS ports, the client will loose connection.
+
+4) Repeat (2) and (3) until all the addresses in coordinator list are TLS.
+
+5) Restart each FoundationDB server, but only with one public address that listens to TLS traffic only.
+
+.. _converting-existing-cluster-before-6.1:
+
+Converting an existing cluster to use TLS (< v6.1)
+==================================================
 
 Enabling TLS on an existing (non-TLS) cluster cannot be accomplished without downtime because all processes must have TLS enabled to communicate. At startup, each server process enables TLS if the addresses in its cluster file are TLS-enabled. As a result, server processes must be stopped and restarted to convert them to use TLS. To convert the cluster to TLS in the most conservative way:
 

From c6edcc7f06b1c2027c49e06b4b57b04a94da8d5c Mon Sep 17 00:00:00 2001
From: Stephen Atherton <stevea@apple.com>
Date: Thu, 14 Mar 2019 02:10:14 -0700
Subject: [PATCH 40/47] Added schema version string to backup JSON status docs.
  Bug fix in backup status JSON, the document was being created outside the
 transaction retry loop so retries would combine partial element sets across
 all tries into the result.

---
 fdbclient/BackupContainer.actor.cpp | 1 +
 fdbclient/FileBackupAgent.actor.cpp | 9 +++++----
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp
index 03f661e3d2..b62b6ed42b 100644
--- a/fdbclient/BackupContainer.actor.cpp
+++ b/fdbclient/BackupContainer.actor.cpp
@@ -165,6 +165,7 @@ std::string BackupDescription::toString() const {
 std::string BackupDescription::toJSON() const {
 	JsonBuilderObject doc;
 
+	doc.setKey("SchemaVersion", "1.0.0");
 	doc.setKey("URL", url.c_str());
 	doc.setKey("Restorable", maxRestorableVersion.present());
 
diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp
index 31e434dd70..9f607f0806 100644
--- a/fdbclient/FileBackupAgent.actor.cpp
+++ b/fdbclient/FileBackupAgent.actor.cpp
@@ -3908,10 +3908,12 @@ public:
 
 	ACTOR static Future<std::string> getStatusJSON(FileBackupAgent* backupAgent, Database cx, std::string tagName) {
 		state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
-		state JsonBuilderObject doc;
 
 		loop {
 			try {
+				state JsonBuilderObject doc;
+				doc.setKey("SchemaVersion", "1.0.0");
+
 				tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
 				tr->setOption(FDBTransactionOptions::LOCK_AWARE);
 
@@ -4033,14 +4035,13 @@ public:
 					}
 					doc.setKey("Errors", errorList);
 				}
-				break;
+
+				return doc.getJson();
 			}
 			catch (Error &e) {
 				wait(tr->onError(e));
 			}
 		}
-
-		return doc.getJson();
 	}
 
 	ACTOR static Future<std::string> getStatus(FileBackupAgent* backupAgent, Database cx, bool showErrors, std::string tagName) {

From cab284027d6fac5e3f8acbff57749738c571a866 Mon Sep 17 00:00:00 2001
From: "A.J. Beamon" <ajbeamon@apple.com>
Date: Thu, 14 Mar 2019 10:26:22 -0700
Subject: [PATCH 41/47] Be more careful about accessing ThreadSafeDatabase's db
 member. Preallocate the db on the calling thread so that other callers can
 access the pointer immediately.

---
 fdbclient/DatabaseContext.h               | 10 +++++++--
 fdbclient/NativeAPI.actor.cpp             |  8 ++++++++
 fdbclient/ThreadSafeTransaction.actor.cpp | 25 +++++++++++++++--------
 3 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/fdbclient/DatabaseContext.h b/fdbclient/DatabaseContext.h
index ecb27a20d2..2a5c5ef46a 100644
--- a/fdbclient/DatabaseContext.h
+++ b/fdbclient/DatabaseContext.h
@@ -46,8 +46,14 @@ private:
 typedef MultiInterface<ReferencedInterface<StorageServerInterface>> LocationInfo;
 typedef MultiInterface<MasterProxyInterface> ProxyInfo;
 
-class DatabaseContext : public ReferenceCounted<DatabaseContext>, NonCopyable {
+class DatabaseContext : public ReferenceCounted<DatabaseContext>, public FastAllocated<DatabaseContext>, NonCopyable {
 public:
+	static DatabaseContext* allocateOnForeignThread() {
+		return (DatabaseContext*)DatabaseContext::operator new(sizeof(DatabaseContext));
+	}
+
+	static void initialize( Reference<ClusterConnectionFile> connFile, int apiVersion, DatabaseContext *db );
+
 	// For internal (fdbserver) use only
 	static Database create( Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface, Reference<ClusterConnectionFile> connFile, LocalityData const& clientLocality );
 	static Database create( Reference<AsyncVar<ClientDBInfo>> clientInfo, Future<Void> clientInfoMonitor, LocalityData clientLocality, bool enableLocalityLoadBalance, int taskID=TaskDefaultEndpoint, bool lockAware=false, int apiVersion=Database::API_VERSION_LATEST );
@@ -97,7 +103,7 @@ public:
 		bool enableLocalityLoadBalance, bool lockAware, int apiVersion = Database::API_VERSION_LATEST );
 
 	explicit DatabaseContext( const Error &err );
-
+	
 	// Key DB-specific information
 	AsyncTrigger masterProxiesChangeTrigger;
 	Future<Void> monitorMasterProxiesInfoChange;
diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp
index 115b1ff805..c115277c09 100644
--- a/fdbclient/NativeAPI.actor.cpp
+++ b/fdbclient/NativeAPI.actor.cpp
@@ -583,6 +583,14 @@ ACTOR static Future<Void> monitorClientInfo( Reference<AsyncVar<Optional<Cluster
 	}
 }
 
+void DatabaseContext::initialize( Reference<ClusterConnectionFile> connFile, int apiVersion, DatabaseContext *db ) {
+	Reference<Cluster> cluster(new Cluster(connFile, apiVersion));
+	Reference<AsyncVar<ClientDBInfo>> clientInfo(new AsyncVar<ClientDBInfo>());
+	Future<Void> clientInfoMonitor = monitorClientInfo(cluster->getClusterInterface(), connFile, clientInfo);
+
+	new (db) DatabaseContext( cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, LocalityData(), true, false, apiVersion );
+}
+
 Database DatabaseContext::create(Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface, Reference<ClusterConnectionFile> connFile, LocalityData const& clientLocality) {
 	Reference<Cluster> cluster(new Cluster(connFile, clusterInterface));
 	Reference<AsyncVar<ClientDBInfo>> clientInfo(new AsyncVar<ClientDBInfo>());
diff --git a/fdbclient/ThreadSafeTransaction.actor.cpp b/fdbclient/ThreadSafeTransaction.actor.cpp
index 41c7ba8879..02437fcf73 100644
--- a/fdbclient/ThreadSafeTransaction.actor.cpp
+++ b/fdbclient/ThreadSafeTransaction.actor.cpp
@@ -30,7 +30,8 @@
 // Therefore, it is unsafe to call (explicitly or implicitly) this->addRef in any of these functions.
 
 ThreadFuture<Void> ThreadSafeDatabase::onConnected() {
-	return onMainThread( [this]() -> Future<Void> {
+	DatabaseContext *db = this->db;
+	return onMainThread( [db]() -> Future<Void> {
 		db->checkDeferredError();
 		return db->onConnected();
 	} );
@@ -50,24 +51,30 @@ Reference<ITransaction> ThreadSafeDatabase::createTransaction() {
 }
 
 void ThreadSafeDatabase::setOption( FDBDatabaseOptions::Option option, Optional<StringRef> value) {
+	DatabaseContext *db = this->db;
 	Standalone<Optional<StringRef>> passValue = value;
-	onMainThreadVoid( [this, option, passValue](){ db->setOption(option, passValue.contents()); }, &db->deferredError );
+	onMainThreadVoid( [db, option, passValue](){ 
+		db->checkDeferredError();
+		db->setOption(option, passValue.contents()); 
+	}, &db->deferredError );
 }
 
 ThreadSafeDatabase::ThreadSafeDatabase(std::string connFilename, int apiVersion) {
-	db = NULL; // All accesses to db happen on the main thread, so this pointer will be set by the time anybody uses it
-
 	Reference<ClusterConnectionFile> connFile = Reference<ClusterConnectionFile>(new ClusterConnectionFile(ClusterConnectionFile::lookupClusterFileName(connFilename).first));
-	onMainThreadVoid([this, connFile, apiVersion](){ 
+
+	// Allocate memory for the Database from this thread (so the pointer is known for subsequent method calls)
+	// but run its constructor on the main thread
+	DatabaseContext *db = this->db = DatabaseContext::allocateOnForeignThread();
+
+	onMainThreadVoid([db, connFile, apiVersion](){ 
 		try {
-			Database db = Database::createDatabase(connFile, apiVersion);
-			this->db = db.extractPtr();
+			DatabaseContext::initialize(connFile, apiVersion, db);
 		}
 		catch(Error &e) {
-			this->db = new DatabaseContext(e);
+			new (db) DatabaseContext(e);
 		}
 		catch(...) {
-			this->db = new DatabaseContext(unknown_error());
+			new (db) DatabaseContext(unknown_error());
 		}
 	}, NULL);
 }

From bf170493c689a920cba6671c0924320e82f380cb Mon Sep 17 00:00:00 2001
From: "A.J. Beamon" <ajbeamon@apple.com>
Date: Thu, 14 Mar 2019 11:40:51 -0700
Subject: [PATCH 42/47] RYOW onError no longer resets a transaction when it
 gets a non-retryable error and the transaction is not already in an error
 state (as of API version 610).

---
 documentation/sphinx/source/release-notes.rst | 3 ++-
 fdbclient/ReadYourWrites.actor.cpp            | 7 ++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst
index 596119f8c8..d2f94145a4 100644
--- a/documentation/sphinx/source/release-notes.rst
+++ b/documentation/sphinx/source/release-notes.rst
@@ -23,6 +23,7 @@ Features
 * Added a new atomic op `CompareAndClear`. `(PR #1105) <https://github.com/apple/foundationdb/pull/1105>`_
 * Added support for IPv6. `(PR #1176) https://github.com/apple/foundationdb/pull/1178`_
 * FDB can now simultaneously listen to TLS and unencrypted ports to facilitate smoother migration to TLS. `(PR #1157) https://github.com/apple/foundationdb/pull/1157`_
+* Added `DISABLE_POSIX_KERNEL_AIO` knob to fallback to libeio instead of kernel async I/O (KAIO) for systems that do not support KAIO or O_DIRECT flag. `(PR #1283) https://github.com/apple/foundationdb/pull/1283`_
 
 Performance
 -----------
@@ -33,7 +34,7 @@ Fixes
 -----
 
 * Python: Creating a ``SingleFloat`` for the tuple layer didn't work with integers. `(PR #1216) <https://github.com/apple/foundationdb/pull/1216>`_
-* Added `DISABLE_POSIX_KERNEL_AIO` knob to fallback to libeio instead of kernel async I/O (KAIO) for systems that do not support KAIO or O_DIRECT flag. `(PR #1283) https://github.com/apple/foundationdb/pull/1283`_
+* In some cases, calling ``OnError`` with a non-retryable error would partially reset a transaction. As of API version 610, the transaction will no longer be reset in these cases and will instead put the transaction into an error state. `(PR #) <>`_
 
 Status
 ------
diff --git a/fdbclient/ReadYourWrites.actor.cpp b/fdbclient/ReadYourWrites.actor.cpp
index bc4e200c62..fabe4f8430 100644
--- a/fdbclient/ReadYourWrites.actor.cpp
+++ b/fdbclient/ReadYourWrites.actor.cpp
@@ -1097,7 +1097,12 @@ public:
 			return Void(); 
 		} catch( Error &e ) {
 			if ( !ryw->resetPromise.isSet() ) {
-				ryw->resetRyow();
+				if(ryw->tr.apiVersionAtLeast(610)) {
+					ryw->resetPromise.sendError(transaction_cancelled());
+				}
+				else {
+					ryw->resetRyow();
+				}
 			}
 			if( e.code() == error_code_broken_promise )
 				throw transaction_cancelled();

From 59f8e07b316fc7ef5a0922f3ae85551ba5f15cd0 Mon Sep 17 00:00:00 2001
From: "A.J. Beamon" <ajbeamon@apple.com>
Date: Thu, 14 Mar 2019 11:42:32 -0700
Subject: [PATCH 43/47] Update release notes with pull request link.

---
 documentation/sphinx/source/release-notes.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst
index d2f94145a4..2fd01f30c2 100644
--- a/documentation/sphinx/source/release-notes.rst
+++ b/documentation/sphinx/source/release-notes.rst
@@ -34,7 +34,7 @@ Fixes
 -----
 
 * Python: Creating a ``SingleFloat`` for the tuple layer didn't work with integers. `(PR #1216) <https://github.com/apple/foundationdb/pull/1216>`_
-* In some cases, calling ``OnError`` with a non-retryable error would partially reset a transaction. As of API version 610, the transaction will no longer be reset in these cases and will instead put the transaction into an error state. `(PR #) <>`_
+* In some cases, calling ``OnError`` with a non-retryable error would partially reset a transaction. As of API version 610, the transaction will no longer be reset in these cases and will instead put the transaction into an error state. `(PR #1298) <https://github.com/apple/foundationdb/pull/1298>`_
 
 Status
 ------

From e12d242d189c74b7a362031e9c77b06be675efd9 Mon Sep 17 00:00:00 2001
From: "A.J. Beamon" <ajbeamon@apple.com>
Date: Thu, 14 Mar 2019 13:42:03 -0700
Subject: [PATCH 44/47] Address review comments.

---
 fdbclient/DatabaseContext.h               |  4 +---
 fdbclient/NativeAPI.actor.cpp             | 20 ++++++++++----------
 fdbclient/NativeAPI.actor.h               |  2 +-
 fdbclient/ThreadSafeTransaction.actor.cpp |  2 +-
 4 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/fdbclient/DatabaseContext.h b/fdbclient/DatabaseContext.h
index 2a5c5ef46a..788cdabbd8 100644
--- a/fdbclient/DatabaseContext.h
+++ b/fdbclient/DatabaseContext.h
@@ -52,8 +52,6 @@ public:
 		return (DatabaseContext*)DatabaseContext::operator new(sizeof(DatabaseContext));
 	}
 
-	static void initialize( Reference<ClusterConnectionFile> connFile, int apiVersion, DatabaseContext *db );
-
 	// For internal (fdbserver) use only
 	static Database create( Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface, Reference<ClusterConnectionFile> connFile, LocalityData const& clientLocality );
 	static Database create( Reference<AsyncVar<ClientDBInfo>> clientInfo, Future<Void> clientInfoMonitor, LocalityData clientLocality, bool enableLocalityLoadBalance, int taskID=TaskDefaultEndpoint, bool lockAware=false, int apiVersion=Database::API_VERSION_LATEST );
@@ -103,7 +101,7 @@ public:
 		bool enableLocalityLoadBalance, bool lockAware, int apiVersion = Database::API_VERSION_LATEST );
 
 	explicit DatabaseContext( const Error &err );
-	
+
 	// Key DB-specific information
 	AsyncTrigger masterProxiesChangeTrigger;
 	Future<Void> monitorMasterProxiesInfoChange;
diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp
index c115277c09..e186f59e00 100644
--- a/fdbclient/NativeAPI.actor.cpp
+++ b/fdbclient/NativeAPI.actor.cpp
@@ -583,14 +583,6 @@ ACTOR static Future<Void> monitorClientInfo( Reference<AsyncVar<Optional<Cluster
 	}
 }
 
-void DatabaseContext::initialize( Reference<ClusterConnectionFile> connFile, int apiVersion, DatabaseContext *db ) {
-	Reference<Cluster> cluster(new Cluster(connFile, apiVersion));
-	Reference<AsyncVar<ClientDBInfo>> clientInfo(new AsyncVar<ClientDBInfo>());
-	Future<Void> clientInfoMonitor = monitorClientInfo(cluster->getClusterInterface(), connFile, clientInfo);
-
-	new (db) DatabaseContext( cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, LocalityData(), true, false, apiVersion );
-}
-
 Database DatabaseContext::create(Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface, Reference<ClusterConnectionFile> connFile, LocalityData const& clientLocality) {
 	Reference<Cluster> cluster(new Cluster(connFile, clusterInterface));
 	Reference<AsyncVar<ClientDBInfo>> clientInfo(new AsyncVar<ClientDBInfo>());
@@ -758,12 +750,20 @@ Reference<ClusterConnectionFile> DatabaseContext::getConnectionFile() {
 	return cluster->getConnectionFile();
 }
 
-Database Database::createDatabase( Reference<ClusterConnectionFile> connFile, int apiVersion, LocalityData const& clientLocality ) {
+Database Database::createDatabase( Reference<ClusterConnectionFile> connFile, int apiVersion, LocalityData const& clientLocality, DatabaseContext *preallocatedDb ) {
 	Reference<Cluster> cluster(new Cluster(connFile, apiVersion));
 	Reference<AsyncVar<ClientDBInfo>> clientInfo(new AsyncVar<ClientDBInfo>());
 	Future<Void> clientInfoMonitor = monitorClientInfo(cluster->getClusterInterface(), connFile, clientInfo);
 
-	return Database( new DatabaseContext( cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false, apiVersion ) );
+	DatabaseContext *db;
+	if(preallocatedDb) {
+		db = new (preallocatedDb) DatabaseContext(cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false, apiVersion);
+	}
+	else {
+		db = new DatabaseContext(cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false, apiVersion);
+	}
+
+	return Database(db);
 }
 
 Database Database::createDatabase( std::string connFileName, int apiVersion, LocalityData const& clientLocality ) {
diff --git a/fdbclient/NativeAPI.actor.h b/fdbclient/NativeAPI.actor.h
index 50b42730ab..2a1813b91c 100644
--- a/fdbclient/NativeAPI.actor.h
+++ b/fdbclient/NativeAPI.actor.h
@@ -68,7 +68,7 @@ class Database {
 public:
 	enum { API_VERSION_LATEST = -1 };
 
-	static Database createDatabase( Reference<ClusterConnectionFile> connFile, int apiVersion, LocalityData const& clientLocality=LocalityData() );
+	static Database createDatabase( Reference<ClusterConnectionFile> connFile, int apiVersion, LocalityData const& clientLocality=LocalityData(), DatabaseContext *preallocatedDb=nullptr );
 	static Database createDatabase( std::string connFileName, int apiVersion, LocalityData const& clientLocality=LocalityData() ); 
 
 	Database() {}  // an uninitialized database can be destructed or reassigned safely; that's it
diff --git a/fdbclient/ThreadSafeTransaction.actor.cpp b/fdbclient/ThreadSafeTransaction.actor.cpp
index 02437fcf73..90db1b932d 100644
--- a/fdbclient/ThreadSafeTransaction.actor.cpp
+++ b/fdbclient/ThreadSafeTransaction.actor.cpp
@@ -68,7 +68,7 @@ ThreadSafeDatabase::ThreadSafeDatabase(std::string connFilename, int apiVersion)
 
 	onMainThreadVoid([db, connFile, apiVersion](){ 
 		try {
-			DatabaseContext::initialize(connFile, apiVersion, db);
+			Database::createDatabase(connFile, apiVersion, LocalityData(), db).extractPtr();
 		}
 		catch(Error &e) {
 			new (db) DatabaseContext(e);

From 98f8fa62e980b1e288a850e3e993d2569e952120 Mon Sep 17 00:00:00 2001
From: "A.J. Beamon" <ajbeamon@apple.com>
Date: Thu, 14 Mar 2019 15:05:36 -0700
Subject: [PATCH 45/47] Update generated.go

---
 bindings/go/src/fdb/generated.go | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/bindings/go/src/fdb/generated.go b/bindings/go/src/fdb/generated.go
index aea915cb22..a2507b1674 100644
--- a/bindings/go/src/fdb/generated.go
+++ b/bindings/go/src/fdb/generated.go
@@ -92,9 +92,9 @@ func (o NetworkOptions) SetTraceLogGroup(param string) error {
 	return o.setOpt(33, []byte(param))
 }
 
-// Selects trace output format for this client. xml (the default) and json are supported.
+// Select the format of the log files. xml (the default) and json are supported.
 //
-// Parameter: trace format
+// Parameter: Format of trace files
 func (o NetworkOptions) SetTraceFormat(param string) error {
 	return o.setOpt(34, []byte(param))
 }
@@ -351,13 +351,25 @@ func (o TransactionOptions) SetDebugRetryLogging(param string) error {
 	return o.setOpt(401, []byte(param))
 }
 
-// Enables tracing for this transaction and logs results to the client trace logs. Client trace logging must be enabled to get log output.
+// Deprecated
 //
 // Parameter: String identifier to be used in the logs when tracing this transaction. The identifier must not exceed 100 characters.
 func (o TransactionOptions) SetTransactionLoggingEnable(param string) error {
 	return o.setOpt(402, []byte(param))
 }
 
+// Sets a client provided identifier for the transaction that will be used in scenarios like tracing or profiling. Client trace logging or transaction profiling must be separately enabled.
+//
+// Parameter: String identifier to be used when tracing or profiling this transaction. The identifier must not exceed 100 characters.
+func (o TransactionOptions) SetDebugTransactionIdentifier(param string) error {
+	return o.setOpt(403, []byte(param))
+}
+
+// Enables tracing for this transaction and logs results to the client trace logs. The DEBUG_TRANSACTION_IDENTIFIER option must be set before using this option, and client trace logging must be enabled and to get log output.
+func (o TransactionOptions) SetLogTransaction() error {
+	return o.setOpt(404, nil)
+}
+
 // Set a timeout in milliseconds which, when elapsed, will cause the transaction automatically to be cancelled. Valid parameter values are ``[0, INT_MAX]``. If set to 0, will disable all timeouts. All pending and any future uses of the transaction will throw an exception. The transaction can be used again after it is reset. Like all transaction options, a timeout must be reset after a call to onError. This behavior allows the user to make the timeout dynamic.
 //
 // Parameter: value in milliseconds of timeout
@@ -512,12 +524,12 @@ func (t Transaction) Min(key KeyConvertible, param []byte) {
 	t.atomicOp(key.FDBKey(), param, 13)
 }
 
-// Transforms ``key`` using a versionstamp for the transaction. Sets the transformed key in the database to ``param``. The key is transformed by removing the final four bytes from the key and reading those as a little-Endian 32-bit integer to get a position ``pos``. The 10 bytes of the key from ``pos`` to ``pos + 10`` are replaced with the versionstamp of the transaction used. The first byte of the key is position 0. A versionstamp is a 10 byte, unique, monotonically (but not sequentially) increasing value for each committed transaction. The first 8 bytes are the committed version of the database (serialized in big-Endian order). The last 2 bytes are monotonic in the serialization order for transactions. WARNING: At this time, versionstamps are compatible with the Tuple layer only in the Java and Python bindings. Also, note that prior to API version 520, the offset was computed from only the final two bytes rather than the final four bytes.
+// Transforms ``key`` using a versionstamp for the transaction. Sets the transformed key in the database to ``param``. The key is transformed by removing the final four bytes from the key and reading those as a little-Endian 32-bit integer to get a position ``pos``. The 10 bytes of the key from ``pos`` to ``pos + 10`` are replaced with the versionstamp of the transaction used. The first byte of the key is position 0. A versionstamp is a 10 byte, unique, monotonically (but not sequentially) increasing value for each committed transaction. The first 8 bytes are the committed version of the database (serialized in big-Endian order). The last 2 bytes are monotonic in the serialization order for transactions. WARNING: At this time, versionstamps are compatible with the Tuple layer only in the Java, Python, and Go bindings. Also, note that prior to API version 520, the offset was computed from only the final two bytes rather than the final four bytes.
 func (t Transaction) SetVersionstampedKey(key KeyConvertible, param []byte) {
 	t.atomicOp(key.FDBKey(), param, 14)
 }
 
-// Transforms ``param`` using a versionstamp for the transaction. Sets the ``key`` given to the transformed ``param``. The parameter is transformed by removing the final four bytes from ``param`` and reading those as a little-Endian 32-bit integer to get a position ``pos``. The 10 bytes of the parameter from ``pos`` to ``pos + 10`` are replaced with the versionstamp of the transaction used. The first byte of the parameter is position 0. A versionstamp is a 10 byte, unique, monotonically (but not sequentially) increasing value for each committed transaction. The first 8 bytes are the committed version of the database (serialized in big-Endian order). The last 2 bytes are monotonic in the serialization order for transactions. WARNING: At this time, versionstamps are compatible with the Tuple layer only in the Java and Python bindings. Also, note that prior to API version 520, the versionstamp was always placed at the beginning of the parameter rather than computing an offset.
+// Transforms ``param`` using a versionstamp for the transaction. Sets the ``key`` given to the transformed ``param``. The parameter is transformed by removing the final four bytes from ``param`` and reading those as a little-Endian 32-bit integer to get a position ``pos``. The 10 bytes of the parameter from ``pos`` to ``pos + 10`` are replaced with the versionstamp of the transaction used. The first byte of the parameter is position 0. A versionstamp is a 10 byte, unique, monotonically (but not sequentially) increasing value for each committed transaction. The first 8 bytes are the committed version of the database (serialized in big-Endian order). The last 2 bytes are monotonic in the serialization order for transactions. WARNING: At this time, versionstamps are compatible with the Tuple layer only in the Java, Python, and Go bindings. Also, note that prior to API version 520, the versionstamp was always placed at the beginning of the parameter rather than computing an offset.
 func (t Transaction) SetVersionstampedValue(key KeyConvertible, param []byte) {
 	t.atomicOp(key.FDBKey(), param, 15)
 }
@@ -532,6 +544,11 @@ func (t Transaction) ByteMax(key KeyConvertible, param []byte) {
 	t.atomicOp(key.FDBKey(), param, 17)
 }
 
+// Performs an atomic ``compare and clear`` operation. If the existing value in the database is equal to the given value, then given key is cleared.
+func (t Transaction) CompareAndClear(key KeyConvertible, param []byte) {
+	t.atomicOp(key.FDBKey(), param, 20)
+}
+
 type conflictRangeType int
 
 const (

From 9ed41a49321b7eab6a086fb3ff4a6aff105256fe Mon Sep 17 00:00:00 2001
From: Moussa Ehsan <mehsan@apple.com>
Date: Fri, 15 Mar 2019 10:24:33 -0700
Subject: [PATCH 46/47] Update python version to 3.7 in the docs

This change updates the documentation to reflect that we support python 3.7.
---
 documentation/sphinx/source/api-python.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/documentation/sphinx/source/api-python.rst b/documentation/sphinx/source/api-python.rst
index 11f04d652a..394016c379 100644
--- a/documentation/sphinx/source/api-python.rst
+++ b/documentation/sphinx/source/api-python.rst
@@ -53,7 +53,7 @@ Python API
 Installation
 ============
 
-The FoundationDB Python API is compatible with Python 2.7 - 3.6. You will need to have a Python version within this range on your system before the FoundationDB Python API can be installed.
+The FoundationDB Python API is compatible with Python 2.7 - 3.7. You will need to have a Python version within this range on your system before the FoundationDB Python API can be installed. Also please note that Python 3.7 no longer bundles a full copy of libffi, which is used for building the _ctypes module on non-OSX UNIX platforms. Hence, if you are using Python 3.7, you should make sure libffi is already installed on your system.
 
 On macOS, the FoundationDB Python API is installed as part of the FoundationDB installation (see :ref:`installing-client-binaries`). On Ubuntu or RHEL/CentOS, you will need to install the FoundationDB Python API manually.
 

From 40293d6fae9c143bba827ddbb5ae0ebf0082c6fa Mon Sep 17 00:00:00 2001
From: "A.J. Beamon" <ajbeamon@users.noreply.github.com>
Date: Fri, 15 Mar 2019 11:05:37 -0700
Subject: [PATCH 47/47] Update documentation/sphinx/source/api-python.rst

Co-Authored-By: amouehsan <mehsan@apple.com>
---
 documentation/sphinx/source/api-python.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/documentation/sphinx/source/api-python.rst b/documentation/sphinx/source/api-python.rst
index 394016c379..2c08b2bf84 100644
--- a/documentation/sphinx/source/api-python.rst
+++ b/documentation/sphinx/source/api-python.rst
@@ -53,7 +53,7 @@ Python API
 Installation
 ============
 
-The FoundationDB Python API is compatible with Python 2.7 - 3.7. You will need to have a Python version within this range on your system before the FoundationDB Python API can be installed. Also please note that Python 3.7 no longer bundles a full copy of libffi, which is used for building the _ctypes module on non-OSX UNIX platforms. Hence, if you are using Python 3.7, you should make sure libffi is already installed on your system.
+The FoundationDB Python API is compatible with Python 2.7 - 3.7. You will need to have a Python version within this range on your system before the FoundationDB Python API can be installed. Also please note that Python 3.7 no longer bundles a full copy of libffi, which is used for building the _ctypes module on non-macOS UNIX platforms. Hence, if you are using Python 3.7, you should make sure libffi is already installed on your system.
 
 On macOS, the FoundationDB Python API is installed as part of the FoundationDB installation (see :ref:`installing-client-binaries`). On Ubuntu or RHEL/CentOS, you will need to install the FoundationDB Python API manually.