From 94385447bc4f7b99da8ec30a711f9ad5fd2b8f74 Mon Sep 17 00:00:00 2001 From: Meng Xu Date: Tue, 26 Feb 2019 16:20:05 -0800 Subject: [PATCH 01/47] Status: Get if client configured TLS To understand if all clients have configured TLS, we check the tlsoption when a client tries to open database. This is similar to how we track the versions of multi-version clients. --- documentation/sphinx/source/mr-status.rst | 1 + fdbclient/ClusterInterface.h | 3 ++- fdbclient/NativeAPI.actor.cpp | 1 + fdbclient/Schemas.cpp | 3 ++- fdbrpc/FlowTransport.actor.cpp | 5 ++++- fdbrpc/TLSConnection.h | 6 ++++++ fdbserver/ClusterController.actor.cpp | 9 +++++++-- fdbserver/Status.actor.cpp | 12 ++++++++++-- fdbserver/Status.h | 2 +- flow/serialize.h | 2 +- 10 files changed, 35 insertions(+), 9 deletions(-) diff --git a/documentation/sphinx/source/mr-status.rst b/documentation/sphinx/source/mr-status.rst index 772db55f2e..251f23d556 100644 --- a/documentation/sphinx/source/mr-status.rst +++ b/documentation/sphinx/source/mr-status.rst @@ -81,6 +81,7 @@ The following format informally describes the JSON containing the status data. T { "address": "127.0.0.1:1234", "log_group": "default" + "tls_configured": true } ], "count": 1, diff --git a/fdbclient/ClusterInterface.h b/fdbclient/ClusterInterface.h index cbc61bd908..a133f269a4 100644 --- a/fdbclient/ClusterInterface.h +++ b/fdbclient/ClusterInterface.h @@ -119,13 +119,14 @@ struct OpenDatabaseRequest { Arena arena; StringRef issues, traceLogGroup; VectorRef supportedVersions; + bool client_tls_configured; UID knownClientInfoID; ReplyPromise< struct ClientDBInfo > reply; template void serialize(Ar& ar) { ASSERT( ar.protocolVersion() >= 0x0FDB00A400040001LL ); - serializer(ar, issues, supportedVersions, traceLogGroup, knownClientInfoID, reply, arena); + serializer(ar, issues, supportedVersions, client_tls_configured, traceLogGroup, knownClientInfoID, reply, arena); } }; diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index b15bdf27dd..49ba9181f7 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -499,6 +499,7 @@ ACTOR static Future monitorClientInfo( Referenceget().id; req.supportedVersions = VectorRef(req.arena, networkOptions.supportedVersions); + req.client_tls_configured = tlsOptions.isValid() && tlsOptions->isConfigured(); // Monitor if client TLS is configured req.traceLogGroup = StringRef(req.arena, networkOptions.traceLogGroup); ClusterConnectionString fileConnectionString; diff --git a/fdbclient/Schemas.cpp b/fdbclient/Schemas.cpp index d8f2883630..b387f7b7df 100644 --- a/fdbclient/Schemas.cpp +++ b/fdbclient/Schemas.cpp @@ -278,7 +278,8 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( "connected_clients":[ { "address":"127.0.0.1:9898", - "log_group":"default" + "log_group":"default", + "tls_configured":true } ], "count" : 1, diff --git a/fdbrpc/FlowTransport.actor.cpp b/fdbrpc/FlowTransport.actor.cpp index 3e015d2995..912a147945 100644 --- a/fdbrpc/FlowTransport.actor.cpp +++ b/fdbrpc/FlowTransport.actor.cpp @@ -299,6 +299,7 @@ struct Peer : NonCopyable { break; } } + if ( !destination.isPublic() || outgoingConnectionIdle || destination > compatibleAddr ) { // Keep the new connection TraceEvent("IncomingConnection", conn->getDebugID()) @@ -648,6 +649,7 @@ ACTOR static Future connectionReader( uint64_t connectionId = 0; int32_t connectPacketSize = p->minimumSize(); + bool client_tls_capable = false; if ( unprocessed_end-unprocessed_begin >= connectPacketSize ) { if(p->protocolVersion >= 0x0FDB00A444020001) { connectionId = p->connectionId; @@ -687,7 +689,8 @@ ACTOR static Future connectionReader( TraceEvent("ConnectionEstablished", conn->getDebugID()) .suppressFor(1.0) .detail("Peer", conn->getPeerAddress()) - .detail("ConnectionId", connectionId); + .detail("ConnectionId", connectionId) + .detail("ConnectPacketSize", connectPacketSize); } if(connectionId > 1) { diff --git a/fdbrpc/TLSConnection.h b/fdbrpc/TLSConnection.h index f8395b66a0..aab2c5228c 100644 --- a/fdbrpc/TLSConnection.h +++ b/fdbrpc/TLSConnection.h @@ -89,6 +89,12 @@ struct TLSOptions : ReferenceCounted { Reference get_policy(PolicyType type); bool enabled(); + bool isConfigured() const { + return policyInfo.ca_path.size() || policyInfo.ca_contents.size() || policyInfo.cert_path.size() || + policyInfo.cert_contents.size() || policyInfo.key_path.size() || policyInfo.key_contents.size(); + } + + struct PolicyInfo { std::string ca_path; Standalone ca_contents; diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 53962c0340..e7635d03bf 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -92,6 +92,7 @@ public: ProcessIssuesMap clientsWithIssues, workersWithIssues; std::map incompatibleConnections; ClientVersionMap clientVersionMap; + std::map clientTLSConfigMap; // Does the client has TLS configured std::map traceLogGroupMap; AsyncTrigger forceMasterFailure; int64_t masterRegistrationCount; @@ -1221,6 +1222,7 @@ ACTOR Future clusterOpenDatabase( UID knownClientInfoID, std::string issues, Standalone> supportedVersions, + bool client_tls_configured, Standalone traceLogGroup, ReplyPromise reply) { @@ -1232,6 +1234,8 @@ ACTOR Future clusterOpenDatabase( db->clientVersionMap[reply.getEndpoint().getPrimaryAddress()] = supportedVersions; } + db->clientTLSConfigMap[reply.getEndpoint().getPrimaryAddress()] = client_tls_configured; + db->traceLogGroupMap[reply.getEndpoint().getPrimaryAddress()] = traceLogGroup.toString(); while (db->clientInfo->get().id == knownClientInfoID) { @@ -1243,6 +1247,7 @@ ACTOR Future clusterOpenDatabase( removeIssue( db->clientsWithIssues, reply.getEndpoint().getPrimaryAddress(), issues, issueID ); db->clientVersionMap.erase(reply.getEndpoint().getPrimaryAddress()); + db->clientTLSConfigMap.erase(reply.getEndpoint().getPrimaryAddress()); db->traceLogGroupMap.erase(reply.getEndpoint().getPrimaryAddress()); reply.send( db->clientInfo->get() ); @@ -1907,7 +1912,7 @@ ACTOR Future statusServer(FutureStream< StatusRequest> requests, } } - state ErrorOr result = wait(errorOr(clusterGetStatus(self->db.serverInfo, self->cx, workers, self->db.workersWithIssues, self->db.clientsWithIssues, self->db.clientVersionMap, self->db.traceLogGroupMap, coordinators, incompatibleConnections, self->datacenterVersionDifference))); + state ErrorOr result = wait(errorOr(clusterGetStatus(self->db.serverInfo, self->cx, workers, self->db.workersWithIssues, self->db.clientsWithIssues, self->db.clientVersionMap, self->db.clientTLSConfigMap, self->db.traceLogGroupMap, coordinators, incompatibleConnections, self->datacenterVersionDifference))); if (result.isError() && result.getError().code() == error_code_actor_cancelled) throw result.getError(); @@ -2402,7 +2407,7 @@ ACTOR Future clusterControllerCore( ClusterControllerFullInterface interf, return Void(); } when( OpenDatabaseRequest req = waitNext( interf.clientInterface.openDatabase.getFuture() ) ) { - self.addActor.send( clusterOpenDatabase( &self.db, req.knownClientInfoID, req.issues.toString(), req.supportedVersions, req.traceLogGroup, req.reply ) ); + self.addActor.send( clusterOpenDatabase( &self.db, req.knownClientInfoID, req.issues.toString(), req.supportedVersions, req.client_tls_configured, req.traceLogGroup, req.reply ) ); } when( RecruitFromConfigurationRequest req = waitNext( interf.recruitFromConfiguration.getFuture() ) ) { self.addActor.send( clusterRecruitFromConfiguration( &self, req ) ); diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index 87071beec6..44635c16ff 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -839,7 +839,9 @@ ACTOR static Future processStatusFetcher( return processMap; } -static JsonBuilderObject clientStatusFetcher(ClientVersionMap clientVersionMap, std::map traceLogGroupMap) { +static JsonBuilderObject clientStatusFetcher(ClientVersionMap clientVersionMap, + std::map traceLogGroupMap, + std::map clientTLSConfigMap) { JsonBuilderObject clientStatus; clientStatus["count"] = (int64_t)clientVersionMap.size(); @@ -864,6 +866,11 @@ static JsonBuilderObject clientStatusFetcher(ClientVersionMap clientVersionMap, JsonBuilderObject cli; cli["address"] = client.toString(); cli["log_group"] = traceLogGroupMap[client]; + bool client_tls_configured = false; + if (clientTLSConfigMap.find(client) != clientTLSConfigMap.end()) { + client_tls_configured = clientTLSConfigMap[client]; + } + cli["tls_configured"] = client_tls_configured; clients.push_back(cli); } @@ -1809,6 +1816,7 @@ ACTOR Future clusterGetStatus( ProcessIssuesMap workerIssues, ProcessIssuesMap clientIssues, ClientVersionMap clientVersionMap, + std::map clientTLSConfigMap, std::map traceLogGroupMap, ServerCoordinators coordinators, std::vector incompatibleConnections, @@ -2029,7 +2037,7 @@ ACTOR Future clusterGetStatus( JsonBuilderObject processStatus = wait(processStatusFetcher(db, workers, pMetrics, mMetrics, latestError, traceFileOpenErrors, programStarts, processIssues, storageServers, tLogs, proxies, cx, configuration, &status_incomplete_reasons)); statusObj["processes"] = processStatus; - statusObj["clients"] = clientStatusFetcher(clientVersionMap, traceLogGroupMap); + statusObj["clients"] = clientStatusFetcher(clientVersionMap, traceLogGroupMap, clientTLSConfigMap); JsonBuilderArray incompatibleConnectionsArray; for(auto it : incompatibleConnections) { diff --git a/fdbserver/Status.h b/fdbserver/Status.h index 9697d3d77d..a923b2b9cc 100644 --- a/fdbserver/Status.h +++ b/fdbserver/Status.h @@ -31,7 +31,7 @@ typedef std::map< NetworkAddress, std::pair > ProcessIssuesMap; typedef std::map< NetworkAddress, Standalone> > ClientVersionMap; Future clusterGetStatus( Reference> const& db, Database const& cx, vector> const& workers, - ProcessIssuesMap const& workerIssues, ProcessIssuesMap const& clientIssues, ClientVersionMap const& clientVersionMap, std::map const& traceLogGroupMap, + ProcessIssuesMap const& workerIssues, ProcessIssuesMap const& clientIssues, ClientVersionMap const& clientVersionMap, std::map const& clientTLSConfigMap, std::map const& traceLogGroupMap, ServerCoordinators const& coordinators, std::vector const& incompatibleConnections, Version const& datacenterVersionDifference ); #endif diff --git a/flow/serialize.h b/flow/serialize.h index f5754da8b1..7189f124ad 100644 --- a/flow/serialize.h +++ b/flow/serialize.h @@ -589,7 +589,7 @@ struct SendBuffer { struct PacketBuffer : SendBuffer, FastAllocated { int reference_count; - enum { DATA_SIZE = 4096 - 28 }; + enum { DATA_SIZE = 4096 - 28 }; //28 is the size of the PacketBuffer fields uint8_t data[ DATA_SIZE ]; PacketBuffer() : reference_count(1) { From c0535c49bbee6deb309d52319fe4ff971483be5c Mon Sep 17 00:00:00 2001 From: Meng Xu Date: Tue, 26 Feb 2019 16:20:05 -0800 Subject: [PATCH 02/47] Status: TLS client status Use ClientStatusInfo structure for each network address (client), instead of passing each status info as a parameter. --- fdbserver/ClusterController.actor.cpp | 13 +++++-------- fdbserver/Status.actor.cpp | 14 ++++++-------- fdbserver/Status.h | 7 ++++++- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index e7635d03bf..5481779093 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -92,8 +92,7 @@ public: ProcessIssuesMap clientsWithIssues, workersWithIssues; std::map incompatibleConnections; ClientVersionMap clientVersionMap; - std::map clientTLSConfigMap; // Does the client has TLS configured - std::map traceLogGroupMap; + std::map clientStatusInfoMap; AsyncTrigger forceMasterFailure; int64_t masterRegistrationCount; bool recoveryStalled; @@ -1234,9 +1233,7 @@ ACTOR Future clusterOpenDatabase( db->clientVersionMap[reply.getEndpoint().getPrimaryAddress()] = supportedVersions; } - db->clientTLSConfigMap[reply.getEndpoint().getPrimaryAddress()] = client_tls_configured; - - db->traceLogGroupMap[reply.getEndpoint().getPrimaryAddress()] = traceLogGroup.toString(); + db->clientStatusInfoMap[reply.getEndpoint().getPrimaryAddress()] = {traceLogGroup.toString(), client_tls_configured}; while (db->clientInfo->get().id == knownClientInfoID) { choose { @@ -1247,8 +1244,7 @@ ACTOR Future clusterOpenDatabase( removeIssue( db->clientsWithIssues, reply.getEndpoint().getPrimaryAddress(), issues, issueID ); db->clientVersionMap.erase(reply.getEndpoint().getPrimaryAddress()); - db->clientTLSConfigMap.erase(reply.getEndpoint().getPrimaryAddress()); - db->traceLogGroupMap.erase(reply.getEndpoint().getPrimaryAddress()); + db->clientStatusInfoMap.erase(reply.getEndpoint().getPrimaryAddress()); reply.send( db->clientInfo->get() ); return Void(); @@ -1912,7 +1908,8 @@ ACTOR Future statusServer(FutureStream< StatusRequest> requests, } } - state ErrorOr result = wait(errorOr(clusterGetStatus(self->db.serverInfo, self->cx, workers, self->db.workersWithIssues, self->db.clientsWithIssues, self->db.clientVersionMap, self->db.clientTLSConfigMap, self->db.traceLogGroupMap, coordinators, incompatibleConnections, self->datacenterVersionDifference))); + state ErrorOr result = wait(errorOr(clusterGetStatus(self->db.serverInfo, self->cx, workers, self->db.workersWithIssues, self->db.clientsWithIssues, self->db.clientVersionMap, self->db.clientStatusInfoMap, coordinators, incompatibleConnections, self->datacenterVersionDifference))); + if (result.isError() && result.getError().code() == error_code_actor_cancelled) throw result.getError(); diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index 44635c16ff..cb2d5bbb62 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -840,8 +840,7 @@ ACTOR static Future processStatusFetcher( } static JsonBuilderObject clientStatusFetcher(ClientVersionMap clientVersionMap, - std::map traceLogGroupMap, - std::map clientTLSConfigMap) { + std::map clientStatusInfoMap) { JsonBuilderObject clientStatus; clientStatus["count"] = (int64_t)clientVersionMap.size(); @@ -865,10 +864,10 @@ static JsonBuilderObject clientStatusFetcher(ClientVersionMap clientVersionMap, for(auto client : cv.second) { JsonBuilderObject cli; cli["address"] = client.toString(); - cli["log_group"] = traceLogGroupMap[client]; + cli["log_group"] = clientStatusInfoMap[client].traceLogGroup; bool client_tls_configured = false; - if (clientTLSConfigMap.find(client) != clientTLSConfigMap.end()) { - client_tls_configured = clientTLSConfigMap[client]; + if (clientStatusInfoMap.find(client) != clientStatusInfoMap.end()) { + client_tls_configured = clientStatusInfoMap[client].clientTLSConfigured; } cli["tls_configured"] = client_tls_configured; clients.push_back(cli); @@ -1816,8 +1815,7 @@ ACTOR Future clusterGetStatus( ProcessIssuesMap workerIssues, ProcessIssuesMap clientIssues, ClientVersionMap clientVersionMap, - std::map clientTLSConfigMap, - std::map traceLogGroupMap, + std::map clientStatusInfoMap, ServerCoordinators coordinators, std::vector incompatibleConnections, Version datacenterVersionDifference ) @@ -2037,7 +2035,7 @@ ACTOR Future clusterGetStatus( JsonBuilderObject processStatus = wait(processStatusFetcher(db, workers, pMetrics, mMetrics, latestError, traceFileOpenErrors, programStarts, processIssues, storageServers, tLogs, proxies, cx, configuration, &status_incomplete_reasons)); statusObj["processes"] = processStatus; - statusObj["clients"] = clientStatusFetcher(clientVersionMap, traceLogGroupMap, clientTLSConfigMap); + statusObj["clients"] = clientStatusFetcher(clientVersionMap, clientStatusInfoMap); JsonBuilderArray incompatibleConnectionsArray; for(auto it : incompatibleConnections) { diff --git a/fdbserver/Status.h b/fdbserver/Status.h index a923b2b9cc..d076e3885c 100644 --- a/fdbserver/Status.h +++ b/fdbserver/Status.h @@ -30,8 +30,13 @@ typedef std::map< NetworkAddress, std::pair > ProcessIssuesMap; typedef std::map< NetworkAddress, Standalone> > ClientVersionMap; +struct ClientStatusInfo { + std::string traceLogGroup; + bool clientTLSConfigured; // Does client configure its TLS options +}; + Future clusterGetStatus( Reference> const& db, Database const& cx, vector> const& workers, - ProcessIssuesMap const& workerIssues, ProcessIssuesMap const& clientIssues, ClientVersionMap const& clientVersionMap, std::map const& clientTLSConfigMap, std::map const& traceLogGroupMap, + ProcessIssuesMap const& workerIssues, ProcessIssuesMap const& clientIssues, ClientVersionMap const& clientVersionMap, std::map const& clientStatusInfoMap, ServerCoordinators const& coordinators, std::vector const& incompatibleConnections, Version const& datacenterVersionDifference ); #endif From b7a52e81e233c10dae30bc501a3c12805f57a38e Mon Sep 17 00:00:00 2001 From: Meng Xu Date: Tue, 5 Mar 2019 21:00:19 -0800 Subject: [PATCH 03/47] Status: Count connected coordinators per client A client will always try to connect all coordinators. This commit let Status track the number of connected coordinators for each client. This allows us to do canary in coordinators. For example, when we switch from non-TLS to TLS, we can switch 1 coordinator from non-TLS to TLS. This can help check if a client has the ability to connect through TLS. We can make the non-TLS to TLS switch for each coordinators one by one. This avoid the risk of losing connection in the switch. --- fdbclient/ClusterInterface.h | 4 ++-- fdbclient/ManagementAPI.actor.cpp | 3 ++- fdbclient/MonitorLeader.actor.cpp | 21 ++++++++++++++----- fdbclient/MonitorLeader.h | 8 ++++---- fdbclient/NativeAPI.actor.cpp | 29 ++++++++++++++++----------- fdbclient/NativeAPI.actor.h | 6 +++--- fdbclient/ReadYourWrites.actor.cpp | 3 ++- fdbclient/Schemas.cpp | 2 +- fdbclient/StatusClient.actor.cpp | 3 ++- fdbserver/ClusterController.actor.cpp | 8 +++++--- fdbserver/Status.actor.cpp | 6 +++--- fdbserver/Status.h | 2 +- fdbserver/tester.actor.cpp | 3 ++- fdbserver/worker.actor.cpp | 3 ++- 14 files changed, 62 insertions(+), 39 deletions(-) diff --git a/fdbclient/ClusterInterface.h b/fdbclient/ClusterInterface.h index a133f269a4..6b45fa2226 100644 --- a/fdbclient/ClusterInterface.h +++ b/fdbclient/ClusterInterface.h @@ -119,14 +119,14 @@ struct OpenDatabaseRequest { Arena arena; StringRef issues, traceLogGroup; VectorRef supportedVersions; - bool client_tls_configured; + int connectedCoordinatorsNum; // Number of coordinators connected by the client UID knownClientInfoID; ReplyPromise< struct ClientDBInfo > reply; template void serialize(Ar& ar) { ASSERT( ar.protocolVersion() >= 0x0FDB00A400040001LL ); - serializer(ar, issues, supportedVersions, client_tls_configured, traceLogGroup, knownClientInfoID, reply, arena); + serializer(ar, issues, supportedVersions, connectedCoordinatorsNum, traceLogGroup, knownClientInfoID, reply, arena); } }; diff --git a/fdbclient/ManagementAPI.actor.cpp b/fdbclient/ManagementAPI.actor.cpp index df17d6cc93..14e15c54d0 100644 --- a/fdbclient/ManagementAPI.actor.cpp +++ b/fdbclient/ManagementAPI.actor.cpp @@ -1542,7 +1542,8 @@ ACTOR Future checkDatabaseLock( Reference tr, U ACTOR Future forceRecovery( Reference clusterFile, Key dcId ) { state Reference>> clusterInterface(new AsyncVar>); - state Future leaderMon = monitorLeader(clusterFile, clusterInterface); + state Reference> unused(new AsyncVar); + state Future leaderMon = monitorLeader(clusterFile, clusterInterface, unused); loop { choose { diff --git a/fdbclient/MonitorLeader.actor.cpp b/fdbclient/MonitorLeader.actor.cpp index 64482da09d..32dac8c5fb 100644 --- a/fdbclient/MonitorLeader.actor.cpp +++ b/fdbclient/MonitorLeader.actor.cpp @@ -308,9 +308,15 @@ ClientLeaderRegInterface::ClientLeaderRegInterface( INetwork* local ) { getLeader.makeWellKnownEndpoint( WLTOKEN_CLIENTLEADERREG_GETLEADER, TaskCoordination ); } -ACTOR Future monitorNominee( Key key, ClientLeaderRegInterface coord, AsyncTrigger* nomineeChange, Optional *info, int generation ) { +// Nominee is the worker among all workers that are considered as leader by a coordinator +// This function contacts a coordinator coord to ask if the worker is considered as a leader (i.e., if the worker +// is a nominee) +ACTOR Future monitorNominee( Key key, ClientLeaderRegInterface coord, AsyncTrigger* nomineeChange, Optional *info, int generation, Reference> connectedCoordinatorsNum ) { loop { state Optional li = wait( retryBrokenPromise( coord.getLeader, GetLeaderRequest( key, info->present() ? info->get().changeID : UID() ), TaskCoordinationReply ) ); + if (li.present()) { + connectedCoordinatorsNum->set(connectedCoordinatorsNum->get() + 1); + } wait( Future(Void()) ); // Make sure we weren't cancelled TraceEvent("GetLeaderReply").suppressFor(1.0).detail("Coordinator", coord.getLeader.getEndpoint().getPrimaryAddress()).detail("Nominee", li.present() ? li.get().changeID : UID()).detail("Generation", generation); @@ -385,7 +391,8 @@ struct MonitorLeaderInfo { explicit MonitorLeaderInfo( Reference intermediateConnFile ) : intermediateConnFile(intermediateConnFile), hasConnected(false), generation(0) {} }; -ACTOR Future monitorLeaderOneGeneration( Reference connFile, Reference> outSerializedLeaderInfo, MonitorLeaderInfo info ) { +// Leader is the process that will be elected by coordinators as the cluster controller +ACTOR Future monitorLeaderOneGeneration( Reference connFile, Reference> outSerializedLeaderInfo, MonitorLeaderInfo info, Reference> connectedCoordinatorsNum) { state ClientCoordinators coordinators( info.intermediateConnFile ); state AsyncTrigger nomineeChange; state std::vector> nominees; @@ -394,8 +401,9 @@ ACTOR Future monitorLeaderOneGeneration( Reference> actors; + // Ask all coordinators if the worker is considered as a leader (leader nominee) by the coordinator. for(int i=0; i monitorLeaderOneGeneration( Reference monitorLeaderInternal( Reference connFile, Reference> outSerializedLeaderInfo ) { +ACTOR Future monitorLeaderInternal( Reference connFile, Reference> outSerializedLeaderInfo, Reference> connectedCoordinatorsNum ) { state MonitorLeaderInfo info(connFile); loop { - MonitorLeaderInfo _info = wait( monitorLeaderOneGeneration( connFile, outSerializedLeaderInfo, info) ); + // set the AsyncVar to 0 + connectedCoordinatorsNum->set(0); + MonitorLeaderInfo _info = wait( monitorLeaderOneGeneration( connFile, outSerializedLeaderInfo, info, connectedCoordinatorsNum) ); info = _info; info.generation++; + } } diff --git a/fdbclient/MonitorLeader.h b/fdbclient/MonitorLeader.h index c62133d136..87aa37c503 100644 --- a/fdbclient/MonitorLeader.h +++ b/fdbclient/MonitorLeader.h @@ -30,19 +30,19 @@ class ClientCoordinators; template -Future monitorLeader( Reference const& connFile, Reference>> const& outKnownLeader ); +Future monitorLeader( Reference const& connFile, Reference>> const& outKnownLeader, Reference> connectedCoordinatorsNum ); // Monitors the given coordination group's leader election process and provides a best current guess // of the current leader. If a leader is elected for long enough and communication with a quorum of // coordinators is possible, eventually outKnownLeader will be that leader's interface. #pragma region Implementation -Future monitorLeaderInternal( Reference const& connFile, Reference> const& outSerializedLeaderInfo ); +Future monitorLeaderInternal( Reference const& connFile, Reference> const& outSerializedLeaderInfo, Reference> const& connectedCoordinatorsNum ); template -Future monitorLeader( Reference const& connFile, Reference>> const& outKnownLeader ) { +Future monitorLeader( Reference const& connFile, Reference>> const& outKnownLeader, Reference> connectedCoordinatorsNum ) { Reference> serializedInfo( new AsyncVar ); - Future m = monitorLeaderInternal( connFile, serializedInfo ); + Future m = monitorLeaderInternal( connFile, serializedInfo, connectedCoordinatorsNum ); return m || asyncDeserialize( serializedInfo, outKnownLeader ); } diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 49ba9181f7..9f83c9d84d 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -492,14 +492,14 @@ DatabaseContext::DatabaseContext( DatabaseContext::DatabaseContext( const Error &err ) : deferredError(err), latencies(1000), readLatencies(1000), commitLatencies(1000), GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000) {} -ACTOR static Future monitorClientInfo( Reference>> clusterInterface, Reference ccf, Reference> outInfo ) { +ACTOR static Future monitorClientInfo( Reference>> clusterInterface, Reference ccf, Reference> outInfo, Reference> connectedCoordinatorsNum ) { try { state Optional incorrectTime; loop { OpenDatabaseRequest req; req.knownClientInfoID = outInfo->get().id; req.supportedVersions = VectorRef(req.arena, networkOptions.supportedVersions); - req.client_tls_configured = tlsOptions.isValid() && tlsOptions->isConfigured(); // Monitor if client TLS is configured + req.connectedCoordinatorsNum = connectedCoordinatorsNum->get(); req.traceLogGroup = StringRef(req.arena, networkOptions.traceLogGroup); ClusterConnectionString fileConnectionString; @@ -530,6 +530,7 @@ ACTOR static Future monitorClientInfo( Referenceget().present()) TraceEvent("ClientInfo_CCInterfaceChange").detail("CCID", clusterInterface->get().get().id()); } + when( wait( connectedCoordinatorsNum->onChange() ) ) {} } } } catch( Error& e ) { @@ -542,10 +543,13 @@ ACTOR static Future monitorClientInfo( Reference>> clusterInterface, Reference connFile, LocalityData const& clientLocality) { - Reference cluster(new Cluster(connFile, clusterInterface)); + Reference> connectedCoordinatorsNum(new AsyncVar()); + Reference cluster(new Cluster(connFile, clusterInterface, connectedCoordinatorsNum)); Reference> clientInfo(new AsyncVar()); - Future clientInfoMonitor = monitorClientInfo(clusterInterface, connFile, clientInfo); + Future clientInfoMonitor = monitorClientInfo(clusterInterface, connFile, clientInfo, connectedCoordinatorsNum); return Database(new DatabaseContext(cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false)); } @@ -710,9 +714,10 @@ Reference DatabaseContext::getConnectionFile() { } Database Database::createDatabase( Reference connFile, int apiVersion, LocalityData const& clientLocality ) { - Reference cluster(new Cluster(connFile, apiVersion)); + Reference> connectedCoordinatorsNum(new AsyncVar(0)); // Number of connected coordinators for the client + Reference cluster(new Cluster(connFile, connectedCoordinatorsNum, apiVersion)); Reference> clientInfo(new AsyncVar()); - Future clientInfoMonitor = monitorClientInfo(cluster->getClusterInterface(), connFile, clientInfo); + Future clientInfoMonitor = monitorClientInfo(cluster->getClusterInterface(), connFile, clientInfo, connectedCoordinatorsNum); return Database( new DatabaseContext( cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false, apiVersion ) ); } @@ -724,19 +729,19 @@ Database Database::createDatabase( std::string connFileName, int apiVersion, Loc extern uint32_t determinePublicIPAutomatically( ClusterConnectionString const& ccs ); -Cluster::Cluster( Reference connFile, int apiVersion ) +Cluster::Cluster( Reference connFile, Reference> connectedCoordinatorsNum, int apiVersion ) : clusterInterface(new AsyncVar>()) { - init(connFile, true, apiVersion); + init(connFile, true, connectedCoordinatorsNum, apiVersion); } -Cluster::Cluster( Reference connFile, Reference>> clusterInterface) +Cluster::Cluster( Reference connFile, Reference>> clusterInterface, Reference> connectedCoordinatorsNum) : clusterInterface(clusterInterface) { - init(connFile, true); + init(connFile, true, connectedCoordinatorsNum); } -void Cluster::init( Reference connFile, bool startClientInfoMonitor, int apiVersion ) { +void Cluster::init( Reference connFile, bool startClientInfoMonitor, Reference> connectedCoordinatorsNum, int apiVersion ) { connectionFile = connFile; connected = clusterInterface->onChange(); @@ -770,7 +775,7 @@ void Cluster::init( Reference connFile, bool startClientI uncancellable( recurring( &systemMonitor, CLIENT_KNOBS->SYSTEM_MONITOR_INTERVAL, TaskFlushTrace ) ); } - leaderMon = monitorLeader( connFile, clusterInterface ); + leaderMon = monitorLeader( connFile, clusterInterface, connectedCoordinatorsNum ); failMon = failureMonitorClient( clusterInterface, false ); } } diff --git a/fdbclient/NativeAPI.actor.h b/fdbclient/NativeAPI.actor.h index f5ba76e385..5ccbd6c9bb 100644 --- a/fdbclient/NativeAPI.actor.h +++ b/fdbclient/NativeAPI.actor.h @@ -115,8 +115,8 @@ void stopNetwork(); */ class Cluster : public ReferenceCounted, NonCopyable { public: - Cluster(Reference connFile, int apiVersion=Database::API_VERSION_LATEST); - Cluster(Reference connFile, Reference>> clusterInterface); + Cluster(Reference connFile, Reference> connectedCoordinatorsNum, int apiVersion=Database::API_VERSION_LATEST); + Cluster(Reference connFile, Reference>> clusterInterface, Reference> connectedCoordinatorsNum); ~Cluster(); @@ -126,7 +126,7 @@ public: Future onConnected(); private: - void init(Reference connFile, bool startClientInfoMonitor, int apiVersion=Database::API_VERSION_LATEST); + void init(Reference connFile, bool startClientInfoMonitor, Reference> connectedCoornidatorsNum, int apiVersion=Database::API_VERSION_LATEST); Reference>> clusterInterface; Reference connectionFile; diff --git a/fdbclient/ReadYourWrites.actor.cpp b/fdbclient/ReadYourWrites.actor.cpp index b7240329db..7a77a1cd8a 100644 --- a/fdbclient/ReadYourWrites.actor.cpp +++ b/fdbclient/ReadYourWrites.actor.cpp @@ -1159,7 +1159,8 @@ ACTOR Future> getJSON(Reference clusterFi ACTOR Future> getWorkerInterfaces (Reference clusterFile){ state Reference>> clusterInterface(new AsyncVar>); - state Future leaderMon = monitorLeader(clusterFile, clusterInterface); + Reference> unused_connectedCoordinatorsNum(new AsyncVar()); + state Future leaderMon = monitorLeader(clusterFile, clusterInterface, unused_connectedCoordinatorsNum); loop{ choose { diff --git a/fdbclient/Schemas.cpp b/fdbclient/Schemas.cpp index b387f7b7df..2b2cc448ec 100644 --- a/fdbclient/Schemas.cpp +++ b/fdbclient/Schemas.cpp @@ -279,7 +279,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( { "address":"127.0.0.1:9898", "log_group":"default", - "tls_configured":true + "connected_coordinators":2 } ], "count" : 1, diff --git a/fdbclient/StatusClient.actor.cpp b/fdbclient/StatusClient.actor.cpp index 9cd7358c07..84f51fb453 100644 --- a/fdbclient/StatusClient.actor.cpp +++ b/fdbclient/StatusClient.actor.cpp @@ -463,11 +463,12 @@ ACTOR Future statusFetcherImpl( Reference f state bool quorum_reachable = false; state int coordinatorsFaultTolerance = 0; state Reference>> clusterInterface(new AsyncVar>); + state Reference> connectedCoordinatorsNum(new AsyncVar(0)); try { state int64_t clientTime = time(0); - state Future leaderMon = monitorLeader(f, clusterInterface); + state Future leaderMon = monitorLeader(f, clusterInterface, connectedCoordinatorsNum); StatusObject _statusObjClient = wait(clientStatusFetcher(f, &clientMessages, &quorum_reachable, &coordinatorsFaultTolerance)); statusObjClient = _statusObjClient; diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 5481779093..c56494d8ed 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -93,6 +93,7 @@ public: std::map incompatibleConnections; ClientVersionMap clientVersionMap; std::map clientStatusInfoMap; + int connectedCoordinatorsNum; // Number of connected coordinators AsyncTrigger forceMasterFailure; int64_t masterRegistrationCount; bool recoveryStalled; @@ -1221,7 +1222,7 @@ ACTOR Future clusterOpenDatabase( UID knownClientInfoID, std::string issues, Standalone> supportedVersions, - bool client_tls_configured, + int connectedCoordinatorsNum, Standalone traceLogGroup, ReplyPromise reply) { @@ -1233,7 +1234,8 @@ ACTOR Future clusterOpenDatabase( db->clientVersionMap[reply.getEndpoint().getPrimaryAddress()] = supportedVersions; } - db->clientStatusInfoMap[reply.getEndpoint().getPrimaryAddress()] = {traceLogGroup.toString(), client_tls_configured}; + + db->clientStatusInfoMap[reply.getEndpoint().getPrimaryAddress()] = {traceLogGroup.toString(), connectedCoordinatorsNum}; while (db->clientInfo->get().id == knownClientInfoID) { choose { @@ -2404,7 +2406,7 @@ ACTOR Future clusterControllerCore( ClusterControllerFullInterface interf, return Void(); } when( OpenDatabaseRequest req = waitNext( interf.clientInterface.openDatabase.getFuture() ) ) { - self.addActor.send( clusterOpenDatabase( &self.db, req.knownClientInfoID, req.issues.toString(), req.supportedVersions, req.client_tls_configured, req.traceLogGroup, req.reply ) ); + self.addActor.send( clusterOpenDatabase( &self.db, req.knownClientInfoID, req.issues.toString(), req.supportedVersions, req.connectedCoordinatorsNum, req.traceLogGroup, req.reply ) ); } when( RecruitFromConfigurationRequest req = waitNext( interf.recruitFromConfiguration.getFuture() ) ) { self.addActor.send( clusterRecruitFromConfiguration( &self, req ) ); diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index cb2d5bbb62..17b068d2a2 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -865,11 +865,11 @@ static JsonBuilderObject clientStatusFetcher(ClientVersionMap clientVersionMap, JsonBuilderObject cli; cli["address"] = client.toString(); cli["log_group"] = clientStatusInfoMap[client].traceLogGroup; - bool client_tls_configured = false; + int connectedCoordinatorsNum = 0; if (clientStatusInfoMap.find(client) != clientStatusInfoMap.end()) { - client_tls_configured = clientStatusInfoMap[client].clientTLSConfigured; + connectedCoordinatorsNum = clientStatusInfoMap[client].connectedCoordinatorsNum; } - cli["tls_configured"] = client_tls_configured; + cli["connected_coordinators"] = (int)connectedCoordinatorsNum; clients.push_back(cli); } diff --git a/fdbserver/Status.h b/fdbserver/Status.h index d076e3885c..1bc8acc916 100644 --- a/fdbserver/Status.h +++ b/fdbserver/Status.h @@ -32,7 +32,7 @@ typedef std::map< NetworkAddress, Standalone> > Clie struct ClientStatusInfo { std::string traceLogGroup; - bool clientTLSConfigured; // Does client configure its TLS options + int connectedCoordinatorsNum; }; Future clusterGetStatus( Reference> const& db, Database const& cx, vector> const& workers, diff --git a/fdbserver/tester.actor.cpp b/fdbserver/tester.actor.cpp index 21af2fc02b..33861dfbb6 100644 --- a/fdbserver/tester.actor.cpp +++ b/fdbserver/tester.actor.cpp @@ -1121,8 +1121,9 @@ ACTOR Future runTests( Reference connFile, test_typ state vector testSpecs; Reference>> cc( new AsyncVar> ); Reference>> ci( new AsyncVar> ); + Reference> connectedCoordinatorsNum( new AsyncVar(0) ); vector> actors; - actors.push_back( reportErrors(monitorLeader( connFile, cc ), "MonitorLeader") ); + actors.push_back( reportErrors(monitorLeader( connFile, cc, connectedCoordinatorsNum ), "MonitorLeader") ); actors.push_back( reportErrors(extractClusterInterface( cc,ci ),"ExtractClusterInterface") ); actors.push_back( reportErrors(failureMonitorClient( ci, false ),"FailureMonitorClient") ); diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index c4b0dd0def..778eba1503 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -1215,10 +1215,11 @@ ACTOR Future fdbd( Reference>> cc(new AsyncVar>); Reference>> ci(new AsyncVar>); Reference> asyncPriorityInfo(new AsyncVar(getCCPriorityInfo(fitnessFilePath, processClass))); + Reference> unused(new AsyncVar); Promise recoveredDiskFiles; v.push_back(reportErrors(monitorAndWriteCCPriorityInfo(fitnessFilePath, asyncPriorityInfo), "MonitorAndWriteCCPriorityInfo")); - v.push_back( reportErrors( processClass == ProcessClass::TesterClass ? monitorLeader( connFile, cc ) : clusterController( connFile, cc , asyncPriorityInfo, recoveredDiskFiles.getFuture(), localities ), "ClusterController") ); + v.push_back( reportErrors( processClass == ProcessClass::TesterClass ? monitorLeader( connFile, cc, unused ) : clusterController( connFile, cc , asyncPriorityInfo, recoveredDiskFiles.getFuture(), localities ), "ClusterController") ); v.push_back( reportErrors(extractClusterInterface( cc, ci ), "ExtractClusterInterface") ); v.push_back( reportErrors(failureMonitorClient( ci, true ), "FailureMonitorClient") ); v.push_back( reportErrorsExcept(workerServer(connFile, cc, localities, asyncPriorityInfo, processClass, dataFolder, memoryLimit, metricsConnFile, metricsPrefix, recoveredDiskFiles), "WorkerServer", UID(), &normalWorkerErrors()) ); From 820548223ae4c20c5cfe96b7904b855bc0e69fd3 Mon Sep 17 00:00:00 2001 From: Meng Xu Date: Tue, 5 Mar 2019 21:29:06 -0800 Subject: [PATCH 04/47] Status: connected_coordinators misc minor changes Change the rst document file; Change the coding style to be consistent with the nearby code; Ensure we always initilize the connectedCoordinatesNum to 0 even when the variable is not used. --- documentation/sphinx/source/mr-status.rst | 2 +- fdbclient/NativeAPI.actor.cpp | 2 +- fdbclient/ReadYourWrites.actor.cpp | 4 ++-- fdbrpc/FlowTransport.actor.cpp | 4 +--- fdbrpc/TLSConnection.h | 6 ------ fdbserver/ClusterController.actor.cpp | 1 - fdbserver/Status.actor.cpp | 7 ++----- fdbserver/worker.actor.cpp | 2 +- 8 files changed, 8 insertions(+), 20 deletions(-) diff --git a/documentation/sphinx/source/mr-status.rst b/documentation/sphinx/source/mr-status.rst index 251f23d556..65be3cbf8a 100644 --- a/documentation/sphinx/source/mr-status.rst +++ b/documentation/sphinx/source/mr-status.rst @@ -81,7 +81,7 @@ The following format informally describes the JSON containing the status data. T { "address": "127.0.0.1:1234", "log_group": "default" - "tls_configured": true + "connected_coordinators": 2 } ], "count": 1, diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 9f83c9d84d..c9154ca9fa 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -546,7 +546,7 @@ ACTOR static Future monitorClientInfo( Reference>> clusterInterface, Reference connFile, LocalityData const& clientLocality) { - Reference> connectedCoordinatorsNum(new AsyncVar()); + Reference> connectedCoordinatorsNum(new AsyncVar(0)); Reference cluster(new Cluster(connFile, clusterInterface, connectedCoordinatorsNum)); Reference> clientInfo(new AsyncVar()); Future clientInfoMonitor = monitorClientInfo(clusterInterface, connFile, clientInfo, connectedCoordinatorsNum); diff --git a/fdbclient/ReadYourWrites.actor.cpp b/fdbclient/ReadYourWrites.actor.cpp index 7a77a1cd8a..8b4c308141 100644 --- a/fdbclient/ReadYourWrites.actor.cpp +++ b/fdbclient/ReadYourWrites.actor.cpp @@ -1159,8 +1159,8 @@ ACTOR Future> getJSON(Reference clusterFi ACTOR Future> getWorkerInterfaces (Reference clusterFile){ state Reference>> clusterInterface(new AsyncVar>); - Reference> unused_connectedCoordinatorsNum(new AsyncVar()); - state Future leaderMon = monitorLeader(clusterFile, clusterInterface, unused_connectedCoordinatorsNum); + Reference> unused(new AsyncVar(0)); + state Future leaderMon = monitorLeader(clusterFile, clusterInterface, unused); loop{ choose { diff --git a/fdbrpc/FlowTransport.actor.cpp b/fdbrpc/FlowTransport.actor.cpp index 912a147945..2f00e486f8 100644 --- a/fdbrpc/FlowTransport.actor.cpp +++ b/fdbrpc/FlowTransport.actor.cpp @@ -649,7 +649,6 @@ ACTOR static Future connectionReader( uint64_t connectionId = 0; int32_t connectPacketSize = p->minimumSize(); - bool client_tls_capable = false; if ( unprocessed_end-unprocessed_begin >= connectPacketSize ) { if(p->protocolVersion >= 0x0FDB00A444020001) { connectionId = p->connectionId; @@ -689,8 +688,7 @@ ACTOR static Future connectionReader( TraceEvent("ConnectionEstablished", conn->getDebugID()) .suppressFor(1.0) .detail("Peer", conn->getPeerAddress()) - .detail("ConnectionId", connectionId) - .detail("ConnectPacketSize", connectPacketSize); + .detail("ConnectionId", connectionId); } if(connectionId > 1) { diff --git a/fdbrpc/TLSConnection.h b/fdbrpc/TLSConnection.h index aab2c5228c..f8395b66a0 100644 --- a/fdbrpc/TLSConnection.h +++ b/fdbrpc/TLSConnection.h @@ -89,12 +89,6 @@ struct TLSOptions : ReferenceCounted { Reference get_policy(PolicyType type); bool enabled(); - bool isConfigured() const { - return policyInfo.ca_path.size() || policyInfo.ca_contents.size() || policyInfo.cert_path.size() || - policyInfo.cert_contents.size() || policyInfo.key_path.size() || policyInfo.key_contents.size(); - } - - struct PolicyInfo { std::string ca_path; Standalone ca_contents; diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index c56494d8ed..63d8b828f1 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -93,7 +93,6 @@ public: std::map incompatibleConnections; ClientVersionMap clientVersionMap; std::map clientStatusInfoMap; - int connectedCoordinatorsNum; // Number of connected coordinators AsyncTrigger forceMasterFailure; int64_t masterRegistrationCount; bool recoveryStalled; diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index 17b068d2a2..189a879cbc 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -864,12 +864,9 @@ static JsonBuilderObject clientStatusFetcher(ClientVersionMap clientVersionMap, for(auto client : cv.second) { JsonBuilderObject cli; cli["address"] = client.toString(); + ASSERT(clientStatusInfoMap.find(client) != clientStatusInfoMap.end()); cli["log_group"] = clientStatusInfoMap[client].traceLogGroup; - int connectedCoordinatorsNum = 0; - if (clientStatusInfoMap.find(client) != clientStatusInfoMap.end()) { - connectedCoordinatorsNum = clientStatusInfoMap[client].connectedCoordinatorsNum; - } - cli["connected_coordinators"] = (int)connectedCoordinatorsNum; + cli["connected_coordinators"] = (int) clientStatusInfoMap[client].connectedCoordinatorsNum; clients.push_back(cli); } diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 778eba1503..a61e10a83c 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -1215,7 +1215,7 @@ ACTOR Future fdbd( Reference>> cc(new AsyncVar>); Reference>> ci(new AsyncVar>); Reference> asyncPriorityInfo(new AsyncVar(getCCPriorityInfo(fitnessFilePath, processClass))); - Reference> unused(new AsyncVar); + Reference> unused(new AsyncVar(0)); Promise recoveredDiskFiles; v.push_back(reportErrors(monitorAndWriteCCPriorityInfo(fitnessFilePath, asyncPriorityInfo), "MonitorAndWriteCCPriorityInfo")); From ca8bbad657648f3f54e1246e33716cda1b221ce2 Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Wed, 6 Mar 2019 14:14:06 -0800 Subject: [PATCH 05/47] Added --json option to fdbbackup describe. Also added expired percentage indicator to snapshot details. --- fdbbackup/backup.actor.cpp | 13 ++++-- fdbclient/BackupContainer.actor.cpp | 61 ++++++++++++++++++++++++++++- fdbclient/BackupContainer.h | 16 ++++++++ 3 files changed, 84 insertions(+), 6 deletions(-) diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp index d4fc79cc09..0fe25cc51b 100644 --- a/fdbbackup/backup.actor.cpp +++ b/fdbbackup/backup.actor.cpp @@ -94,7 +94,7 @@ enum { OPT_EXPIRE_BEFORE_VERSION, OPT_EXPIRE_BEFORE_DATETIME, OPT_EXPIRE_DELETE_BEFORE_DAYS, OPT_EXPIRE_RESTORABLE_AFTER_VERSION, OPT_EXPIRE_RESTORABLE_AFTER_DATETIME, OPT_EXPIRE_MIN_RESTORABLE_DAYS, OPT_BASEURL, OPT_BLOB_CREDENTIALS, OPT_DESCRIBE_DEEP, OPT_DESCRIBE_TIMESTAMPS, - OPT_DUMP_BEGIN, OPT_DUMP_END, + OPT_DUMP_BEGIN, OPT_DUMP_END, OPT_JSON, // Backup and Restore constants OPT_TAGNAME, OPT_BACKUPKEYS, OPT_WAITFORDONE, @@ -433,6 +433,7 @@ CSimpleOpt::SOption g_rgBackupDescribeOptions[] = { { OPT_KNOB, "--knob_", SO_REQ_SEP }, { OPT_DESCRIBE_DEEP, "--deep", SO_NONE }, { OPT_DESCRIBE_TIMESTAMPS, "--version_timestamps", SO_NONE }, + { OPT_JSON, "--json", SO_NONE}, #ifndef TLS_DISABLED TLS_OPTION_FLAGS #endif @@ -2117,13 +2118,13 @@ ACTOR Future deleteBackupContainer(const char *name, std::string destinati return Void(); } -ACTOR Future describeBackup(const char *name, std::string destinationContainer, bool deep, Optional cx) { +ACTOR Future describeBackup(const char *name, std::string destinationContainer, bool deep, Optional cx, bool json) { try { Reference c = openBackupContainer(name, destinationContainer); state BackupDescription desc = wait(c->describeBackup(deep)); if(cx.present()) wait(desc.resolveVersionTimes(cx.get())); - printf("%s\n", desc.toString().c_str()); + printf("%s\n", (json ? desc.toJSON() : desc.toString()).c_str()); } catch (Error& e) { if(e.code() == error_code_actor_cancelled) @@ -2547,6 +2548,7 @@ int main(int argc, char* argv[]) { Version dumpEnd = std::numeric_limits::max(); std::string restoreClusterFileDest; std::string restoreClusterFileOrig; + bool jsonOutput = false; if( argc == 1 ) { printUsage(programExe, false); @@ -2844,6 +2846,9 @@ int main(int argc, char* argv[]) { case OPT_DUMP_END: dumpEnd = parseVersion(args->OptionArg()); break; + case OPT_JSON: + jsonOutput = true; + break; } } @@ -3200,7 +3205,7 @@ int main(int argc, char* argv[]) { return FDB_EXIT_ERROR; // Only pass database optionDatabase Describe will lookup version timestamps if a cluster file was given, but quietly skip them if not. - f = stopAfter( describeBackup(argv[0], destinationContainer, describeDeep, describeTimestamps ? Optional(db) : Optional()) ); + f = stopAfter( describeBackup(argv[0], destinationContainer, describeDeep, describeTimestamps ? Optional(db) : Optional(), jsonOutput) ); break; case BACKUP_LIST: diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp index c89df6e244..2ec4d6fa12 100644 --- a/fdbclient/BackupContainer.actor.cpp +++ b/fdbclient/BackupContainer.actor.cpp @@ -33,6 +33,7 @@ #include #include #include "flow/actorcompiler.h" // has to be last include +#include "JsonBuilder.h" namespace IBackupFile_impl { @@ -142,8 +143,8 @@ std::string BackupDescription::toString() const { }; for(const KeyspaceSnapshotFile &m : snapshots) { - info.append(format("Snapshot: startVersion=%s endVersion=%s totalBytes=%lld restorable=%s\n", - formatVersion(m.beginVersion).c_str(), formatVersion(m.endVersion).c_str(), m.totalSize, m.restorable.orDefault(false) ? "true" : "false")); + info.append(format("Snapshot: startVersion=%s endVersion=%s totalBytes=%lld restorable=%s expiredPct=%.2f\n", + formatVersion(m.beginVersion).c_str(), formatVersion(m.endVersion).c_str(), m.totalSize, m.restorable.orDefault(false) ? "true" : "false", m.expiredPct(expiredEndVersion))); } info.append(format("SnapshotBytes: %lld\n", snapshotBytes)); @@ -169,6 +170,62 @@ std::string BackupDescription::toString() const { return info; } +std::string BackupDescription::toJSON() const { + JsonBuilderObject doc; + + doc.setKey("URL", url.c_str()); + doc.setKey("Restorable", maxRestorableVersion.present()); + + auto formatVersion = [&](Version v) { + JsonBuilderObject doc; + doc.setKey("Version", v); + if(!versionTimeMap.empty()) { + auto i = versionTimeMap.find(v); + if(i != versionTimeMap.end()) + doc.setKey("Timestamp", formatTime(i->second)); + } + else if(maxLogEnd.present()) { + double days = double(v - maxLogEnd.get()) / (CLIENT_KNOBS->CORE_VERSIONSPERSECOND * 24 * 60 * 60); + doc.setKey("RelativeDays", days); + } + return doc; + }; + + JsonBuilderArray snapshotsArray; + for(const KeyspaceSnapshotFile &m : snapshots) { + JsonBuilderObject snapshotDoc; + snapshotDoc.setKey("StartVersion", formatVersion(m.beginVersion)); + snapshotDoc.setKey("EndVersion", formatVersion(m.endVersion)); + snapshotDoc.setKey("Restorable", m.restorable.orDefault(false)); + snapshotDoc.setKey("TotalBytes", m.totalSize); + snapshotDoc.setKey("PercentageExpired", m.expiredPct(expiredEndVersion)); + snapshotsArray.push_back(snapshotDoc); + } + doc.setKey("Snapshots", snapshotsArray); + + doc.setKey("TotalSnapshotBytes", snapshotBytes); + + if(expiredEndVersion.present()) + doc.setKey("ExpiredEndVersion", formatVersion(expiredEndVersion.get())); + if(unreliableEndVersion.present()) + doc.setKey("UnreliableEndVersion", formatVersion(unreliableEndVersion.get())); + if(minLogBegin.present()) + doc.setKey("MinLogBeginVersion", formatVersion(minLogBegin.get())); + if(contiguousLogEnd.present()) + doc.setKey("ContiguousLogEndVersion", formatVersion(contiguousLogEnd.get())); + if(maxLogEnd.present()) + doc.setKey("MaxLogEndVersion", formatVersion(maxLogEnd.get())); + if(minRestorableVersion.present()) + doc.setKey("MinRestorableVersion", formatVersion(minRestorableVersion.get())); + if(maxRestorableVersion.present()) + doc.setKey("MaxRestorableVersion", formatVersion(maxRestorableVersion.get())); + + if(!extendedDetail.empty()) + doc.setKey("ExtendedDetail", extendedDetail); + + return doc.getJson(); +} + /* BackupContainerFileSystem implements a backup container which stores files in a nested folder structure. * Inheritors must only defined methods for writing, reading, deleting, sizing, and listing files. * diff --git a/fdbclient/BackupContainer.h b/fdbclient/BackupContainer.h index bb2e397f5a..552223cc25 100644 --- a/fdbclient/BackupContainer.h +++ b/fdbclient/BackupContainer.h @@ -89,6 +89,21 @@ struct KeyspaceSnapshotFile { std::string fileName; int64_t totalSize; Optional restorable; // Whether or not the snapshot can be used in a restore, if known + bool isSingleVersion() const { + return beginVersion == endVersion; + } + double expiredPct(Optional expiredEnd) const { + double pctExpired = 0; + if(expiredEnd.present() && expiredEnd.get() > beginVersion) { + if(isSingleVersion()) { + pctExpired = 1; + } + else { + pctExpired = double(std::min(endVersion, expiredEnd.get()) - beginVersion) / (endVersion - beginVersion); + } + } + return pctExpired * 100; + } // Order by beginVersion, break ties with endVersion bool operator< (const KeyspaceSnapshotFile &rhs) const { @@ -132,6 +147,7 @@ struct BackupDescription { std::map versionTimeMap; std::string toString() const; + std::string toJSON() const; }; struct RestorableFileSet { From 5a1ba1bb2473c9951b4c393b7e568dac14b2db2b Mon Sep 17 00:00:00 2001 From: Meng Xu Date: Wed, 6 Mar 2019 16:59:25 -0800 Subject: [PATCH 06/47] ReleaseNote: Add new info in status Add in release note: Show the number of connected coordinators per client in JSON status --- documentation/sphinx/source/release-notes.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst index fa83fc2875..4074b1481a 100644 --- a/documentation/sphinx/source/release-notes.rst +++ b/documentation/sphinx/source/release-notes.rst @@ -9,6 +9,7 @@ Features -------- Improved replication mechanism, a new hierarchical replication technique that further significantly reduces the frequency of data loss events even when multiple machines (e.g., fault-tolerant zones in the current code) permanently fail at the same time. `(PR #964) `. +* Show the number of connected coordinators per client in JSON status `(PR #1222) `_ * Get read version, read, and commit requests are counted and aggregated by server-side latency in configurable latency bands and output in JSON status. `(PR #1084) `_ * Added configuration option to choose log spilling implementation `(PR #1160) `_ From 1399aee532bed936ecc7504939358e2471860f19 Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Wed, 6 Mar 2019 21:32:46 -0800 Subject: [PATCH 07/47] Added --json option to fdbbackup status. --- fdbbackup/backup.actor.cpp | 7 +- fdbclient/BackupAgent.actor.h | 1 + fdbclient/FileBackupAgent.actor.cpp | 164 ++++++++++++++++++++++++++++ 3 files changed, 169 insertions(+), 3 deletions(-) diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp index 0fe25cc51b..5ecf6f2e55 100644 --- a/fdbbackup/backup.actor.cpp +++ b/fdbbackup/backup.actor.cpp @@ -214,6 +214,7 @@ CSimpleOpt::SOption g_rgBackupStatusOptions[] = { { OPT_HELP, "-h", SO_NONE }, { OPT_HELP, "--help", SO_NONE }, { OPT_DEVHELP, "--dev-help", SO_NONE }, + { OPT_JSON, "--json", SO_NONE}, #ifndef TLS_DISABLED TLS_OPTION_FLAGS #endif @@ -1703,12 +1704,12 @@ ACTOR Future statusDBBackup(Database src, Database dest, std::string tagNa return Void(); } -ACTOR Future statusBackup(Database db, std::string tagName, bool showErrors) { +ACTOR Future statusBackup(Database db, std::string tagName, bool showErrors, bool json) { try { state FileBackupAgent backupAgent; - std::string statusText = wait(backupAgent.getStatus(db, showErrors, tagName)); + std::string statusText = wait(json ? backupAgent.getStatusJSON(db, tagName) : backupAgent.getStatus(db, showErrors, tagName)); printf("%s\n", statusText.c_str()); } catch (Error& e) { @@ -3150,7 +3151,7 @@ int main(int argc, char* argv[]) { case BACKUP_STATUS: if(!initCluster()) return FDB_EXIT_ERROR; - f = stopAfter( statusBackup(db, tagName, true) ); + f = stopAfter( statusBackup(db, tagName, true, jsonOutput) ); break; case BACKUP_ABORT: diff --git a/fdbclient/BackupAgent.actor.h b/fdbclient/BackupAgent.actor.h index 0d2f492276..4684cc1a59 100644 --- a/fdbclient/BackupAgent.actor.h +++ b/fdbclient/BackupAgent.actor.h @@ -283,6 +283,7 @@ public: } Future getStatus(Database cx, bool showErrors, std::string tagName); + Future getStatusJSON(Database cx, std::string tagName); Future getLastRestorable(Reference tr, Key tagName); void setLastRestorable(Reference tr, Key tagName, Version version); diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp index 05b96017ae..ba8c66fb78 100644 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -34,6 +34,7 @@ #include #include #include +#include "JsonBuilder.h" #include "flow/actorcompiler.h" // This must be the last #include. @@ -3813,6 +3814,165 @@ public: return Void(); } + struct TimestampedVersion { + Optional version; + Optional epochs; + + bool present() const { + return version.present(); + } + + JsonBuilderObject toJSON() const { + JsonBuilderObject doc; + if(version.present()) { + doc.setKey("Version", version.get()); + if(epochs.present()) { + doc.setKey("Epochs", epochs.get()); + doc.setKey("Timestamp", timeStampToString(epochs)); + } + } + return doc; + } + }; + + // Helper actor for generating status + // If f is present, lookup epochs using timekeeper and tr, return TimestampedVersion + ACTOR static Future getTimestampedVersion(Reference tr, Future> f) { + state TimestampedVersion tv; + wait(store(tv.version, f)); + if(tv.version.present()) { + wait(store(tv.epochs, timeKeeperEpochsFromVersion(tv.version.get(), tr))); + } + return tv; + } + + ACTOR static Future getStatusJSON(FileBackupAgent* backupAgent, Database cx, std::string tagName) { + state Reference tr(new ReadYourWritesTransaction(cx)); + state JsonBuilderObject doc; + + loop { + try { + tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); + tr->setOption(FDBTransactionOptions::LOCK_AWARE); + + state KeyBackedTag tag = makeBackupTag(tagName); + state Optional uidAndAbortedFlag; + state Optional paused; + state Version recentReadVersion; + + wait( store(paused, tr->get(backupAgent->taskBucket->getPauseKey())) && store(uidAndAbortedFlag, tag.get(tr)) && store(recentReadVersion, tr->getReadVersion()) ); + + doc.setKey("AllBackupsPaused", paused.present()); + doc.setKey("Tag", tag.tagName); + + if(uidAndAbortedFlag.present()) { + state BackupConfig config(uidAndAbortedFlag.get().first); + + state EBackupState backupState = wait(config.stateEnum().getD(tr, false, EBackupState::STATE_NEVERRAN)); + JsonBuilderObject statusDoc; + statusDoc.setKey("Enum", (int)backupState); + statusDoc.setKey("Description", BackupAgentBase::getStateText(backupState)); + doc.setKey("Status", statusDoc); + + state Future done = Void(); + + if(backupState != BackupAgentBase::STATE_NEVERRAN) { + state Reference bc; + state TimestampedVersion latestRestorable; + + wait( store(latestRestorable, getTimestampedVersion(tr, config.getLatestRestorableVersion(tr))) + && store(bc, config.backupContainer().getOrThrow(tr)) + ); + + doc.setKey("Restorable", latestRestorable.present()); + + if(latestRestorable.present() && backupState != BackupAgentBase::STATE_COMPLETED) { + JsonBuilderObject o = latestRestorable.toJSON(); + o.setKey("LagSeconds", (recentReadVersion - latestRestorable.version.get()) / CLIENT_KNOBS->CORE_VERSIONSPERSECOND); + doc.setKey("LatestRestorablePoint", o); + } + doc.setKey("DestinationURL", bc->getURL()); + + if(backupState == BackupAgentBase::STATE_COMPLETED) { + doc.setKey("Completed", latestRestorable.toJSON()); + } + } + + if(backupState == BackupAgentBase::STATE_DIFFERENTIAL || backupState == BackupAgentBase::STATE_BACKUP) { + state int64_t snapshotInterval; + state int64_t logBytesWritten; + state int64_t rangeBytesWritten; + state bool stopWhenDone; + state TimestampedVersion snapshotBegin; + state TimestampedVersion snapshotTargetEnd; + state TimestampedVersion latestLogEnd; + state TimestampedVersion latestSnapshotEnd; + + wait( store(snapshotInterval, config.snapshotIntervalSeconds().getOrThrow(tr)) + && store(logBytesWritten, config.logBytesWritten().getD(tr)) + && store(rangeBytesWritten, config.rangeBytesWritten().getD(tr)) + && store(stopWhenDone, config.stopWhenDone().getOrThrow(tr)) + && store(snapshotBegin, getTimestampedVersion(tr, config.snapshotBeginVersion().get(tr))) + && store(snapshotTargetEnd, getTimestampedVersion(tr, config.snapshotTargetEndVersion().get(tr))) + && store(latestLogEnd, getTimestampedVersion(tr, config.latestLogEndVersion().get(tr))) + && store(latestSnapshotEnd, getTimestampedVersion(tr, config.latestSnapshotEndVersion().get(tr))) + ); + + doc.setKey("StopAfterSnapshot", stopWhenDone); + doc.setKey("SnapshotIntervalSeconds", snapshotInterval); + doc.setKey("LogBytesWritten", logBytesWritten); + doc.setKey("RangeBytesWritten", rangeBytesWritten); + + if(latestLogEnd.present()) { + doc.setKey("LatestLogEnd", latestLogEnd.toJSON()); + } + + if(latestSnapshotEnd.present()) { + doc.setKey("LatestSnapshotEnd", latestSnapshotEnd.toJSON()); + } + + JsonBuilderObject snapshot; + + if(snapshotBegin.present()) { + snapshot.setKey("Begin", snapshotBegin.toJSON()); + + if(snapshotTargetEnd.present()) { + snapshot.setKey("EndTarget", snapshotTargetEnd.toJSON()); + + Version interval = snapshotTargetEnd.version.get() - snapshotBegin.version.get(); + snapshot.setKey("IntervalSeconds", interval / CLIENT_KNOBS->CORE_VERSIONSPERSECOND); + + Version elapsed = recentReadVersion - snapshotBegin.version.get(); + double progress = (interval > 0) ? (100.0 * elapsed / interval) : 100; + snapshot.setKey("ExpectedProgress", progress); + } + } + + doc.setKey("CurrentSnapshot", snapshot); + } + + KeyBackedMap>::PairsType errors = wait(config.lastErrorPerType().getRange(tr, 0, std::numeric_limits::max(), CLIENT_KNOBS->TOO_MANY)); + JsonBuilderArray errorList; + for(auto &e : errors) { + std::string msg = e.second.first; + Version ver = e.second.second; + + JsonBuilderObject errDoc; + errDoc.setKey("Message", msg.c_str()); + errDoc.setKey("RelativeSeconds", (ver - recentReadVersion) / CLIENT_KNOBS->CORE_VERSIONSPERSECOND); + } + doc.setKey("Errors", errorList); + } + break; + } + catch (Error &e) { + wait(tr->onError(e)); + } + } + + return doc.getJson(); + } + ACTOR static Future getStatus(FileBackupAgent* backupAgent, Database cx, bool showErrors, std::string tagName) { state Reference tr(new ReadYourWritesTransaction(cx)); state std::string statusText; @@ -4179,6 +4339,10 @@ Future FileBackupAgent::getStatus(Database cx, bool showErrors, std return FileBackupAgentImpl::getStatus(this, cx, showErrors, tagName); } +Future FileBackupAgent::getStatusJSON(Database cx, std::string tagName) { + return FileBackupAgentImpl::getStatusJSON(this, cx, tagName); +} + Future FileBackupAgent::getLastRestorable(Reference tr, Key tagName) { return FileBackupAgentImpl::getLastRestorable(this, tr, tagName); } From 06c11a316db71fc38d054ed06f8a590dd61658f4 Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Wed, 6 Mar 2019 22:34:25 -0800 Subject: [PATCH 08/47] Normalized timestamp to text format across backup and restore tooling. Added epochs field to JSON objects describing versions and timestamps in backup status and describe output, renamed some fields for clarity. --- fdbclient/BackupAgent.actor.h | 11 +++++++++ fdbclient/BackupContainer.actor.cpp | 38 ++++++++++++----------------- fdbclient/FileBackupAgent.actor.cpp | 13 +++------- 3 files changed, 31 insertions(+), 31 deletions(-) diff --git a/fdbclient/BackupAgent.actor.h b/fdbclient/BackupAgent.actor.h index 4684cc1a59..ef05df3e17 100644 --- a/fdbclient/BackupAgent.actor.h +++ b/fdbclient/BackupAgent.actor.h @@ -38,6 +38,16 @@ class BackupAgentBase : NonCopyable { public: + // Time formatter for anything backup or restore related + static std::string formatTime(int64_t epochs) { + time_t curTime = (time_t)epochs; + char buffer[128]; + struct tm timeinfo; + getLocalTime(&curTime, &timeinfo); + strftime(buffer, 128, "%Y/%m/%d %H:%M:%S", &timeinfo); + return buffer; + } + // Type of program being executed enum enumActionResult { RESULT_SUCCESSFUL = 0, RESULT_ERRORED = 1, RESULT_DUPLICATE = 2, RESULT_UNNEEDED = 3 @@ -179,6 +189,7 @@ public: return defaultTagName; } + // This is only used for automatic backup name generation static Standalone getCurrentTime() { double t = now(); time_t curTime = t; diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp index 2ec4d6fa12..3154ed73ee 100644 --- a/fdbclient/BackupContainer.actor.cpp +++ b/fdbclient/BackupContainer.actor.cpp @@ -19,6 +19,8 @@ */ #include "fdbclient/BackupContainer.h" +#include "fdbclient/BackupAgent.actor.h" +#include "fdbclient/JsonBuilder.h" #include "flow/Trace.h" #include "flow/UnitTest.h" #include "flow/Hash3.h" @@ -33,7 +35,6 @@ #include #include #include "flow/actorcompiler.h" // has to be last include -#include "JsonBuilder.h" namespace IBackupFile_impl { @@ -69,15 +70,6 @@ void BackupFileList::toStream(FILE *fout) const { } } -std::string formatTime(int64_t t) { - time_t curTime = (time_t)t; - char buffer[128]; - struct tm timeinfo; - getLocalTime(&curTime, &timeinfo); - strftime(buffer, 128, "%Y-%m-%d %H:%M:%S", &timeinfo); - return buffer; -} - Future fetchTimes(Reference tr, std::map *pVersionTimeMap) { std::vector> futures; @@ -128,7 +120,7 @@ std::string BackupDescription::toString() const { if(!versionTimeMap.empty()) { auto i = versionTimeMap.find(v); if(i != versionTimeMap.end()) - s = format("%lld (%s)", v, formatTime(i->second).c_str()); + s = format("%lld (%s)", v, BackupAgentBase::formatTime(i->second).c_str()); else s = format("%lld (unknown)", v); } @@ -181,8 +173,10 @@ std::string BackupDescription::toJSON() const { doc.setKey("Version", v); if(!versionTimeMap.empty()) { auto i = versionTimeMap.find(v); - if(i != versionTimeMap.end()) - doc.setKey("Timestamp", formatTime(i->second)); + if(i != versionTimeMap.end()) { + doc.setKey("Timestamp", BackupAgentBase::formatTime(i->second)); + doc.setKey("Epochs", i->second); + } } else if(maxLogEnd.present()) { double days = double(v - maxLogEnd.get()) / (CLIENT_KNOBS->CORE_VERSIONSPERSECOND * 24 * 60 * 60); @@ -194,8 +188,8 @@ std::string BackupDescription::toJSON() const { JsonBuilderArray snapshotsArray; for(const KeyspaceSnapshotFile &m : snapshots) { JsonBuilderObject snapshotDoc; - snapshotDoc.setKey("StartVersion", formatVersion(m.beginVersion)); - snapshotDoc.setKey("EndVersion", formatVersion(m.endVersion)); + snapshotDoc.setKey("Start", formatVersion(m.beginVersion)); + snapshotDoc.setKey("End", formatVersion(m.endVersion)); snapshotDoc.setKey("Restorable", m.restorable.orDefault(false)); snapshotDoc.setKey("TotalBytes", m.totalSize); snapshotDoc.setKey("PercentageExpired", m.expiredPct(expiredEndVersion)); @@ -206,19 +200,19 @@ std::string BackupDescription::toJSON() const { doc.setKey("TotalSnapshotBytes", snapshotBytes); if(expiredEndVersion.present()) - doc.setKey("ExpiredEndVersion", formatVersion(expiredEndVersion.get())); + doc.setKey("ExpiredEnd", formatVersion(expiredEndVersion.get())); if(unreliableEndVersion.present()) - doc.setKey("UnreliableEndVersion", formatVersion(unreliableEndVersion.get())); + doc.setKey("UnreliableEnd", formatVersion(unreliableEndVersion.get())); if(minLogBegin.present()) - doc.setKey("MinLogBeginVersion", formatVersion(minLogBegin.get())); + doc.setKey("MinLogBegin", formatVersion(minLogBegin.get())); if(contiguousLogEnd.present()) - doc.setKey("ContiguousLogEndVersion", formatVersion(contiguousLogEnd.get())); + doc.setKey("ContiguousLogEnd", formatVersion(contiguousLogEnd.get())); if(maxLogEnd.present()) - doc.setKey("MaxLogEndVersion", formatVersion(maxLogEnd.get())); + doc.setKey("MaxLogEnd", formatVersion(maxLogEnd.get())); if(minRestorableVersion.present()) - doc.setKey("MinRestorableVersion", formatVersion(minRestorableVersion.get())); + doc.setKey("MinRestorablePoint", formatVersion(minRestorableVersion.get())); if(maxRestorableVersion.present()) - doc.setKey("MaxRestorableVersion", formatVersion(maxRestorableVersion.get())); + doc.setKey("MaxRestorablePoint", formatVersion(maxRestorableVersion.get())); if(!extendedDetail.empty()) doc.setKey("ExtendedDetail", extendedDetail); diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp index ba8c66fb78..1cb3efe7e8 100644 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -24,6 +24,7 @@ #include "fdbclient/ManagementAPI.actor.h" #include "fdbclient/Status.h" #include "fdbclient/KeyBackedTypes.h" +#include "fdbclient/JsonBuilder.h" #include #include @@ -34,7 +35,6 @@ #include #include #include -#include "JsonBuilder.h" #include "flow/actorcompiler.h" // This must be the last #include. @@ -47,15 +47,10 @@ static std::string versionToString(Optional version) { return "N/A"; } -static std::string timeStampToString(Optional ts) { - if (!ts.present()) +static std::string timeStampToString(Optional epochs) { + if (!epochs.present()) return "N/A"; - time_t curTs = ts.get(); - char buffer[128]; - struct tm* timeinfo; - timeinfo = localtime(&curTs); - strftime(buffer, 128, "%D %T", timeinfo); - return std::string(buffer); + return BackupAgentBase::formatTime(epochs.get()); } static Future> getTimestampFromVersion(Optional ver, Reference tr) { From 8bdb21a04e7aed8c2652fcbab3e75cebc7beee0f Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Wed, 6 Mar 2019 22:44:37 -0800 Subject: [PATCH 09/47] Added release note for new json output options on backup. --- documentation/sphinx/source/release-notes.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst index e6cda1a0b3..43bce080ad 100644 --- a/documentation/sphinx/source/release-notes.rst +++ b/documentation/sphinx/source/release-notes.rst @@ -16,6 +16,7 @@ Improved replication mechanism, a new hierarchical replication technique that fu * Batch priority transactions are now limited separately by ratekeeper and will be throttled at lower levels of cluster saturation. This makes it possible to run a more intense background load at saturation without significantly affecting normal priority transactions. It is still recommended not to run excessive loads at batch priority. `(PR #1198) `_ * Restore now requires the destnation cluster to be specified explicitly to avoid confusion. `(PR #1240) `_ * Restore target version can now be specified by timestamp if the original cluster is available. `(PR #1240) `_ +* Backup status and describe commands now have a --json output option. `(PR #1248) `_ Performance ----------- From 7f405130bb1a60ec8325bfe004e6bcaf1d7433fa Mon Sep 17 00:00:00 2001 From: Meng Xu Date: Thu, 7 Mar 2019 16:06:19 -0800 Subject: [PATCH 10/47] Change a typo in Cluster init function --- fdbclient/NativeAPI.actor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbclient/NativeAPI.actor.h b/fdbclient/NativeAPI.actor.h index ef819437ed..057bd6910d 100644 --- a/fdbclient/NativeAPI.actor.h +++ b/fdbclient/NativeAPI.actor.h @@ -126,7 +126,7 @@ public: Future onConnected(); private: - void init(Reference connFile, bool startClientInfoMonitor, Reference> connectedCoornidatorsNum, int apiVersion=Database::API_VERSION_LATEST); + void init(Reference connFile, bool startClientInfoMonitor, Reference> connectedCoordinatorsNum, int apiVersion=Database::API_VERSION_LATEST); Reference>> clusterInterface; Reference connectionFile; From 023bbb566f3631585415004ea7af9412d76c99ed Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Sun, 10 Mar 2019 16:00:01 -0700 Subject: [PATCH 11/47] Renamed backup state enums for clarity, added backup state names. Changed Epochs to EpochSeconds in backup JSON along with some other renaming/moving of fields, and added information about snapshot dispatch. Changed timestamp format for input/output in all backup/restore contexts to be a fully qualified time with timezone offset. Added information about the last snapshot dispatch to backup config and status (not yet populated). --- fdbbackup/backup.actor.cpp | 12 ++-- fdbclient/BackupAgent.actor.h | 79 ++++++++++++++++++++++--- fdbclient/BackupContainer.actor.cpp | 17 ++---- fdbclient/DatabaseBackupAgent.actor.cpp | 16 ++--- fdbclient/FileBackupAgent.actor.cpp | 48 +++++++++------ 5 files changed, 117 insertions(+), 55 deletions(-) diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp index 5ecf6f2e55..f6842be829 100644 --- a/fdbbackup/backup.actor.cpp +++ b/fdbbackup/backup.actor.cpp @@ -840,7 +840,7 @@ static void printBackupUsage(bool devhelp) { " File containing blob credentials in JSON format. Can be specified multiple times for multiple files. See below for more details.\n"); printf(" --expire_before_timestamp DATETIME\n" " Datetime cutoff for expire operations. Requires a cluster file and will use version/timestamp metadata\n" - " in the database to obtain a cutoff version very close to the timestamp given in YYYY-MM-DD.HH:MI:SS format (UTC).\n"); + " in the database to obtain a cutoff version very close to the timestamp given in %s.\n", BackupAgentBase::timeFormat().c_str()); printf(" --expire_before_version VERSION\n" " Version cutoff for expire operations. Deletes data files containing no data at or after VERSION.\n"); printf(" --delete_before_days NUM_DAYS\n" @@ -913,7 +913,7 @@ static void printRestoreUsage(bool devhelp ) { printf(TLS_HELP); #endif printf(" -v DBVERSION The version at which the database will be restored.\n"); - printf(" --timestamp Instead of a numeric version, use this to specify a timestamp in YYYY-MM-DD.HH:MI:SS format (UTC)\n"); + printf(" --timestamp Instead of a numeric version, use this to specify a timestamp in %s\n", BackupAgentBase::timeFormat().c_str()); printf(" and it will be converted to a version from that time using metadata in orig_cluster_file.\n"); printf(" --orig_cluster_file CONNFILE\n"); printf(" The cluster file for the original database from which the backup was created. The original database\n"); @@ -1252,8 +1252,8 @@ ACTOR Future getLayerStatus(Reference tr tagRoot.create("current_status") = statusText; tagRoot.create("last_restorable_version") = tagLastRestorableVersions[j].get(); tagRoot.create("last_restorable_seconds_behind") = last_restorable_seconds_behind; - tagRoot.create("running_backup") = (status == BackupAgentBase::STATE_DIFFERENTIAL || status == BackupAgentBase::STATE_BACKUP); - tagRoot.create("running_backup_is_restorable") = (status == BackupAgentBase::STATE_DIFFERENTIAL); + tagRoot.create("running_backup") = (status == BackupAgentBase::STATE_RUNNING_DIFFERENTIAL || status == BackupAgentBase::STATE_RUNNING); + tagRoot.create("running_backup_is_restorable") = (status == BackupAgentBase::STATE_RUNNING_DIFFERENTIAL); tagRoot.create("range_bytes_written") = tagRangeBytes[j].get(); tagRoot.create("mutation_log_bytes_written") = tagLogBytes[j].get(); tagRoot.create("mutation_stream_id") = backupTagUids[j].toString(); @@ -1296,8 +1296,8 @@ ACTOR Future getLayerStatus(Reference tr BackupAgentBase::enumState status = (BackupAgentBase::enumState)backupStatus[i].get(); JSONDoc tagRoot = tagsRoot.create(tagName); - tagRoot.create("running_backup") = (status == BackupAgentBase::STATE_DIFFERENTIAL || status == BackupAgentBase::STATE_BACKUP); - tagRoot.create("running_backup_is_restorable") = (status == BackupAgentBase::STATE_DIFFERENTIAL); + tagRoot.create("running_backup") = (status == BackupAgentBase::STATE_RUNNING_DIFFERENTIAL || status == BackupAgentBase::STATE_RUNNING); + tagRoot.create("running_backup_is_restorable") = (status == BackupAgentBase::STATE_RUNNING_DIFFERENTIAL); tagRoot.create("range_bytes_written") = tagRangeBytesDR[i].get(); tagRoot.create("mutation_log_bytes_written") = tagLogBytesDR[i].get(); tagRoot.create("mutation_stream_id") = drTagUids[i].toString(); diff --git a/fdbclient/BackupAgent.actor.h b/fdbclient/BackupAgent.actor.h index ef05df3e17..2e4b89edfd 100644 --- a/fdbclient/BackupAgent.actor.h +++ b/fdbclient/BackupAgent.actor.h @@ -44,17 +44,29 @@ public: char buffer[128]; struct tm timeinfo; getLocalTime(&curTime, &timeinfo); - strftime(buffer, 128, "%Y/%m/%d %H:%M:%S", &timeinfo); + strftime(buffer, 128, "%Y/%m/%d.%H:%M:%S%z", &timeinfo); return buffer; } + static std::string timeFormat() { + return "YYYY/MM/DD.HH:MI:SS[+/-]HHMM"; + } + + static int64_t parseTime(std::string timestamp) { + struct tm out; + if (strptime(timestamp.c_str(), "%Y/%m/%d.%H:%M:%S%z", &out) == nullptr) { + return -1; + } + return (int64_t) mktime(&out); + } + // Type of program being executed enum enumActionResult { RESULT_SUCCESSFUL = 0, RESULT_ERRORED = 1, RESULT_DUPLICATE = 2, RESULT_UNNEEDED = 3 }; enum enumState { - STATE_ERRORED = 0, STATE_SUBMITTED = 1, STATE_BACKUP = 2, STATE_DIFFERENTIAL = 3, STATE_COMPLETED = 4, STATE_NEVERRAN = 5, STATE_ABORTED = 6, STATE_PARTIALLY_ABORTED = 7 + STATE_ERRORED = 0, STATE_SUBMITTED = 1, STATE_RUNNING = 2, STATE_RUNNING_DIFFERENTIAL = 3, STATE_COMPLETED = 4, STATE_NEVERRAN = 5, STATE_ABORTED = 6, STATE_PARTIALLY_ABORTED = 7 }; static const Key keyFolderId; @@ -100,11 +112,11 @@ public: } else if (!stateText.compare("has been started")) { - enState = STATE_BACKUP; + enState = STATE_RUNNING; } else if (!stateText.compare("is differential")) { - enState = STATE_DIFFERENTIAL; + enState = STATE_RUNNING_DIFFERENTIAL; } else if (!stateText.compare("has been completed")) { @@ -122,7 +134,7 @@ public: return enState; } - // Convert the status text to an enumerated value + // Convert the status enum to a text description static const char* getStateText(enumState enState) { const char* stateText; @@ -138,10 +150,10 @@ public: case STATE_SUBMITTED: stateText = "has been submitted"; break; - case STATE_BACKUP: + case STATE_RUNNING: stateText = "has been started"; break; - case STATE_DIFFERENTIAL: + case STATE_RUNNING_DIFFERENTIAL: stateText = "is differential"; break; case STATE_COMPLETED: @@ -161,6 +173,45 @@ public: return stateText; } + // Convert the status enum to a name + static const char* getStateName(enumState enState) + { + const char* s; + + switch (enState) + { + case STATE_ERRORED: + s = "Errored"; + break; + case STATE_NEVERRAN: + s = "NeverRan"; + break; + case STATE_SUBMITTED: + s = "Submitted"; + break; + case STATE_RUNNING: + s = "Running"; + break; + case STATE_RUNNING_DIFFERENTIAL: + s = "RunningDifferentially"; + break; + case STATE_COMPLETED: + s = "Completed"; + break; + case STATE_ABORTED: + s = "Aborted"; + break; + case STATE_PARTIALLY_ABORTED: + s = "Aborting"; + break; + default: + s = ""; + break; + } + + return s; + } + // Determine if the specified state is runnable static bool isRunnable(enumState enState) { @@ -169,8 +220,8 @@ public: switch (enState) { case STATE_SUBMITTED: - case STATE_BACKUP: - case STATE_DIFFERENTIAL: + case STATE_RUNNING: + case STATE_RUNNING_DIFFERENTIAL: case STATE_PARTIALLY_ABORTED: isRunnable = true; break; @@ -691,6 +742,14 @@ public: return configSpace.pack(LiteralStringRef(__FUNCTION__)); } + KeyBackedBinaryValue snapshotDispatchLastShardsBehind() { + return configSpace.pack(LiteralStringRef(__FUNCTION__)); + } + + KeyBackedProperty snapshotDispatchLastVersion() { + return configSpace.pack(LiteralStringRef(__FUNCTION__)); + } + Future initNewSnapshot(Reference tr, int64_t intervalSeconds = -1) { BackupConfig © = *this; // Capture this by value instead of this ptr @@ -714,6 +773,8 @@ public: copy.snapshotBeginVersion().set(tr, beginVersion.get()); copy.snapshotTargetEndVersion().set(tr, endVersion); copy.snapshotRangeFileCount().set(tr, 0); + copy.snapshotDispatchLastVersion().clear(tr); + copy.snapshotDispatchLastShardsBehind().clear(tr); return Void(); }); diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp index 3154ed73ee..cbe2fcb32f 100644 --- a/fdbclient/BackupContainer.actor.cpp +++ b/fdbclient/BackupContainer.actor.cpp @@ -175,7 +175,7 @@ std::string BackupDescription::toJSON() const { auto i = versionTimeMap.find(v); if(i != versionTimeMap.end()) { doc.setKey("Timestamp", BackupAgentBase::formatTime(i->second)); - doc.setKey("Epochs", i->second); + doc.setKey("EpochSeconds", i->second); } } else if(maxLogEnd.present()) { @@ -1628,20 +1628,11 @@ ACTOR Future timeKeeperVersionFromDatetime(std::string datetime, Databa state KeyBackedMap versionMap(timeKeeperPrefixRange.begin); state Reference tr = Reference(new ReadYourWritesTransaction(db)); - int year, month, day, hour, minute, second; - if (sscanf(datetime.c_str(), "%d-%d-%d.%d:%d:%d", &year, &month, &day, &hour, &minute, &second) != 6) { - fprintf(stderr, "ERROR: Incorrect date/time format.\n"); + state int64_t time = BackupAgentBase::parseTime(datetime); + if(time < 0) { + fprintf(stderr, "ERROR: Incorrect date/time or format. Format is %s.\n", BackupAgentBase::timeFormat().c_str()); throw backup_error(); } - struct tm expDateTime = {0}; - expDateTime.tm_year = year - 1900; - expDateTime.tm_mon = month - 1; - expDateTime.tm_mday = day; - expDateTime.tm_hour = hour; - expDateTime.tm_min = minute; - expDateTime.tm_sec = second; - expDateTime.tm_isdst = -1; - state int64_t time = (int64_t) mktime(&expDateTime); loop { try { diff --git a/fdbclient/DatabaseBackupAgent.actor.cpp b/fdbclient/DatabaseBackupAgent.actor.cpp index e9fb6b9f2b..5522dca3f2 100644 --- a/fdbclient/DatabaseBackupAgent.actor.cpp +++ b/fdbclient/DatabaseBackupAgent.actor.cpp @@ -1376,7 +1376,7 @@ namespace dbBackup { try { tr.setOption(FDBTransactionOptions::LOCK_AWARE); tr.addReadConflictRange(singleKeyRange(sourceStates.pack(DatabaseBackupAgent::keyStateStatus))); - tr.set(sourceStates.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_DIFFERENTIAL))); + tr.set(sourceStates.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_RUNNING_DIFFERENTIAL))); Key versionKey = task->params[DatabaseBackupAgent::keyConfigLogUid].withPrefix(task->params[BackupAgentBase::destUid]).withPrefix(backupLatestVersionsPrefix); Optional prevBeginVersion = wait(tr.get(versionKey)); @@ -1418,7 +1418,7 @@ namespace dbBackup { wait(success(FinishedFullBackupTaskFunc::addTask(tr, taskBucket, task, TaskCompletionKey::noSignal()))); } else { // Start the writing of logs, if differential - tr->set(states.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_DIFFERENTIAL))); + tr->set(states.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_RUNNING_DIFFERENTIAL))); allPartsDone = futureBucket->future(tr); @@ -1544,7 +1544,7 @@ namespace dbBackup { srcTr2->set( Subspace(databaseBackupPrefixRange.begin).get(BackupAgentBase::keySourceTagName).pack(task->params[BackupAgentBase::keyTagName]), logUidValue ); srcTr2->set( sourceStates.pack(DatabaseBackupAgent::keyFolderId), task->params[DatabaseBackupAgent::keyFolderId] ); - srcTr2->set( sourceStates.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_BACKUP))); + srcTr2->set( sourceStates.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_RUNNING))); state Key destPath = destUidValue.withPrefix(backupLogKeys.begin); // Start logging the mutations for the specified ranges of the tag @@ -1572,7 +1572,7 @@ namespace dbBackup { tr->set(logUidValue.withPrefix(applyMutationsBeginRange.begin), BinaryWriter::toValue(beginVersion, Unversioned())); tr->set(logUidValue.withPrefix(applyMutationsEndRange.begin), BinaryWriter::toValue(beginVersion, Unversioned())); - tr->set(states.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_BACKUP))); + tr->set(states.pack(DatabaseBackupAgent::keyStateStatus), StringRef(BackupAgentBase::getStateText(BackupAgentBase::STATE_RUNNING))); state Reference kvBackupRangeComplete = futureBucket->future(tr); state Reference kvBackupComplete = futureBucket->future(tr); @@ -1776,7 +1776,7 @@ public: } // Break, if in differential mode (restorable) and stopWhenDone is not enabled - if ((!stopWhenDone) && (BackupAgentBase::STATE_DIFFERENTIAL == status)) { + if ((!stopWhenDone) && (BackupAgentBase::STATE_RUNNING_DIFFERENTIAL == status)) { return status; } @@ -1939,7 +1939,7 @@ public: state int status = wait(backupAgent->getStateValue(dest, destlogUid)); TraceEvent("DBA_SwitchoverStart").detail("Status", status); - if (status != BackupAgentBase::STATE_DIFFERENTIAL && status != BackupAgentBase::STATE_COMPLETED) { + if (status != BackupAgentBase::STATE_RUNNING_DIFFERENTIAL && status != BackupAgentBase::STATE_COMPLETED) { throw backup_duplicate(); } @@ -2296,10 +2296,10 @@ public: case BackupAgentBase::STATE_SUBMITTED: statusText += "The DR on tag `" + tagNameDisplay + "' is NOT a complete copy of the primary database (just started).\n"; break; - case BackupAgentBase::STATE_BACKUP: + case BackupAgentBase::STATE_RUNNING: statusText += "The DR on tag `" + tagNameDisplay + "' is NOT a complete copy of the primary database.\n"; break; - case BackupAgentBase::STATE_DIFFERENTIAL: + case BackupAgentBase::STATE_RUNNING_DIFFERENTIAL: statusText += "The DR on tag `" + tagNameDisplay + "' is a complete copy of the primary database.\n"; break; case BackupAgentBase::STATE_COMPLETED: diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp index 1cb3efe7e8..905f777ce2 100644 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -2059,8 +2059,8 @@ namespace fileBackup { } // If the backup is restorable but the state is not differential then set state to differential - if(restorableVersion.present() && backupState != BackupAgentBase::STATE_DIFFERENTIAL) - config.stateEnum().set(tr, BackupAgentBase::STATE_DIFFERENTIAL); + if(restorableVersion.present() && backupState != BackupAgentBase::STATE_RUNNING_DIFFERENTIAL) + config.stateEnum().set(tr, BackupAgentBase::STATE_RUNNING_DIFFERENTIAL); // If stopWhenDone is set and there is a restorable version, set the done future and do not create further tasks. if(stopWhenDone && restorableVersion.present()) { @@ -2295,8 +2295,8 @@ namespace fileBackup { } // If the backup is restorable and the state isn't differential the set state to differential - if(restorableVersion.present() && backupState != BackupAgentBase::STATE_DIFFERENTIAL) - config.stateEnum().set(tr, BackupAgentBase::STATE_DIFFERENTIAL); + if(restorableVersion.present() && backupState != BackupAgentBase::STATE_RUNNING_DIFFERENTIAL) + config.stateEnum().set(tr, BackupAgentBase::STATE_RUNNING_DIFFERENTIAL); // Unless we are to stop, start the next snapshot using the default interval Reference snapshotDoneFuture = task->getDoneFuture(futureBucket); @@ -2376,7 +2376,7 @@ namespace fileBackup { config.startMutationLogs(tr, backupRange, destUidValue); } - config.stateEnum().set(tr, EBackupState::STATE_BACKUP); + config.stateEnum().set(tr, EBackupState::STATE_RUNNING); state Reference backupFinished = futureBucket->future(tr); @@ -3474,7 +3474,7 @@ public: // Break, if one of the following is true // - no longer runnable // - in differential mode (restorable) and stopWhenDone is not enabled - if( !FileBackupAgent::isRunnable(status) || ((!stopWhenDone) && (BackupAgentBase::STATE_DIFFERENTIAL == status) )) { + if( !FileBackupAgent::isRunnable(status) || ((!stopWhenDone) && (BackupAgentBase::STATE_RUNNING_DIFFERENTIAL == status) )) { if(pContainer != nullptr) { Reference c = wait(config.backupContainer().getOrThrow(tr, false, backup_invalid_info())); @@ -3822,7 +3822,7 @@ public: if(version.present()) { doc.setKey("Version", version.get()); if(epochs.present()) { - doc.setKey("Epochs", epochs.get()); + doc.setKey("EpochSeconds", epochs.get()); doc.setKey("Timestamp", timeStampToString(epochs)); } } @@ -3865,8 +3865,10 @@ public: state EBackupState backupState = wait(config.stateEnum().getD(tr, false, EBackupState::STATE_NEVERRAN)); JsonBuilderObject statusDoc; - statusDoc.setKey("Enum", (int)backupState); + statusDoc.setKey("Name", BackupAgentBase::getStateName(backupState)); statusDoc.setKey("Description", BackupAgentBase::getStateText(backupState)); + statusDoc.setKey("Completed", backupState == BackupAgentBase::STATE_COMPLETED); + statusDoc.setKey("Running", BackupAgentBase::isRunnable(backupState)); doc.setKey("Status", statusDoc); state Future done = Void(); @@ -3881,19 +3883,17 @@ public: doc.setKey("Restorable", latestRestorable.present()); - if(latestRestorable.present() && backupState != BackupAgentBase::STATE_COMPLETED) { + if(latestRestorable.present()) { JsonBuilderObject o = latestRestorable.toJSON(); - o.setKey("LagSeconds", (recentReadVersion - latestRestorable.version.get()) / CLIENT_KNOBS->CORE_VERSIONSPERSECOND); + if(backupState != BackupAgentBase::STATE_COMPLETED) { + o.setKey("LagSeconds", (recentReadVersion - latestRestorable.version.get()) / CLIENT_KNOBS->CORE_VERSIONSPERSECOND); + } doc.setKey("LatestRestorablePoint", o); } doc.setKey("DestinationURL", bc->getURL()); - - if(backupState == BackupAgentBase::STATE_COMPLETED) { - doc.setKey("Completed", latestRestorable.toJSON()); - } } - if(backupState == BackupAgentBase::STATE_DIFFERENTIAL || backupState == BackupAgentBase::STATE_BACKUP) { + if(backupState == BackupAgentBase::STATE_RUNNING_DIFFERENTIAL || backupState == BackupAgentBase::STATE_RUNNING) { state int64_t snapshotInterval; state int64_t logBytesWritten; state int64_t rangeBytesWritten; @@ -3902,6 +3902,8 @@ public: state TimestampedVersion snapshotTargetEnd; state TimestampedVersion latestLogEnd; state TimestampedVersion latestSnapshotEnd; + state TimestampedVersion snapshotLastDispatch; + state Optional snapshotLastDispatchShardsBehind; wait( store(snapshotInterval, config.snapshotIntervalSeconds().getOrThrow(tr)) && store(logBytesWritten, config.logBytesWritten().getD(tr)) @@ -3911,6 +3913,8 @@ public: && store(snapshotTargetEnd, getTimestampedVersion(tr, config.snapshotTargetEndVersion().get(tr))) && store(latestLogEnd, getTimestampedVersion(tr, config.latestLogEndVersion().get(tr))) && store(latestSnapshotEnd, getTimestampedVersion(tr, config.latestSnapshotEndVersion().get(tr))) + && store(snapshotLastDispatch, getTimestampedVersion(tr, config.snapshotDispatchLastVersion().get(tr))) + && store(snapshotLastDispatchShardsBehind, config.snapshotDispatchLastShardsBehind().get(tr)) ); doc.setKey("StopAfterSnapshot", stopWhenDone); @@ -3941,6 +3945,12 @@ public: double progress = (interval > 0) ? (100.0 * elapsed / interval) : 100; snapshot.setKey("ExpectedProgress", progress); } + + JsonBuilderObject dispatchDoc = snapshotLastDispatch.toJSON(); + if(snapshotLastDispatchShardsBehind.present()) { + dispatchDoc.setKey("ShardsBehind", snapshotLastDispatchShardsBehind.get()); + } + snapshot.setKey("LastDispatch", dispatchDoc); } doc.setKey("CurrentSnapshot", snapshot); @@ -4010,11 +4020,11 @@ public: case BackupAgentBase::STATE_SUBMITTED: statusText += "The backup on tag `" + tagName + "' is in progress (just started) to " + bc->getURL() + ".\n"; break; - case BackupAgentBase::STATE_BACKUP: + case BackupAgentBase::STATE_RUNNING: statusText += "The backup on tag `" + tagName + "' is in progress to " + bc->getURL() + ".\n"; snapshotProgress = true; break; - case BackupAgentBase::STATE_DIFFERENTIAL: + case BackupAgentBase::STATE_RUNNING_DIFFERENTIAL: statusText += "The backup on tag `" + tagName + "' is restorable but continuing to " + bc->getURL() + ".\n"; snapshotProgress = true; break; @@ -4057,7 +4067,7 @@ public: ); statusText += format("Snapshot interval is %lld seconds. ", snapshotInterval); - if(backupState == BackupAgentBase::STATE_DIFFERENTIAL) + if(backupState == BackupAgentBase::STATE_RUNNING_DIFFERENTIAL) statusText += format("Current snapshot progress target is %3.2f%% (>100%% means the snapshot is supposed to be done)\n", 100.0 * (recentReadVersion - snapshotBeginVersion) / (snapshotTargetEndVersion - snapshotBeginVersion)) ; else statusText += "The initial snapshot is still running.\n"; @@ -4202,7 +4212,7 @@ public: backupConfig = BackupConfig(uidFlag.first); state EBackupState status = wait(backupConfig.stateEnum().getOrThrow(ryw_tr)); - if (status != BackupAgentBase::STATE_DIFFERENTIAL ) { + if (status != BackupAgentBase::STATE_RUNNING_DIFFERENTIAL ) { throw backup_duplicate(); } From f2953db7d817a708b95269b971bc600d4a350ac7 Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Mon, 11 Mar 2019 01:25:51 -0700 Subject: [PATCH 12/47] Added updating of backup snapshot shards behind in snapshot dispatcher so status can determine if a snapshot is lagging the configured speed. --- fdbclient/BackupAgent.actor.h | 2 +- fdbclient/FileBackupAgent.actor.cpp | 34 ++++++++++++++++++++++++++--- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/fdbclient/BackupAgent.actor.h b/fdbclient/BackupAgent.actor.h index 2e4b89edfd..929efe9458 100644 --- a/fdbclient/BackupAgent.actor.h +++ b/fdbclient/BackupAgent.actor.h @@ -742,7 +742,7 @@ public: return configSpace.pack(LiteralStringRef(__FUNCTION__)); } - KeyBackedBinaryValue snapshotDispatchLastShardsBehind() { + KeyBackedProperty snapshotDispatchLastShardsBehind() { return configSpace.pack(LiteralStringRef(__FUNCTION__)); } diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp index 905f777ce2..b6f2f89d9d 100644 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -1273,6 +1273,10 @@ namespace fileBackup { static const uint32_t version; static struct { + // Set by Execute, used by Finish + static TaskParam shardsBehind() { + return LiteralStringRef(__FUNCTION__); + } // Set by Execute, used by Finish static TaskParam snapshotFinished() { return LiteralStringRef(__FUNCTION__); @@ -1369,8 +1373,11 @@ namespace fileBackup { && store(recentReadVersion, tr->getReadVersion()) && taskBucket->keepRunning(tr, task)); - // If the snapshot batch future key does not exist, create it, set it, and commit - // Also initialize the target snapshot end version if it is not yet set. + // If the snapshot batch future key does not exist, this is the first execution of this dispatch task so + // - create and set the snapshot batch future key + // - initialize the batch size to 0 + // - initialize the target snapshot end version if it is not yet set + // - commit if(!snapshotBatchFutureKey.present()) { snapshotBatchFuture = futureBucket->future(tr); config.snapshotBatchFuture().set(tr, snapshotBatchFuture->pack()); @@ -1540,14 +1547,28 @@ namespace fileBackup { // Calculate number of shards that should be done before the next interval end // timeElapsed is between 0 and 1 and represents what portion of the shards we should have completed by now double timeElapsed; + Version snapshotScheduledVersionInterval = snapshotTargetEndVersion - snapshotBeginVersion; if(snapshotTargetEndVersion > snapshotBeginVersion) - timeElapsed = std::min(1.0, (double)(nextDispatchVersion - snapshotBeginVersion) / (snapshotTargetEndVersion - snapshotBeginVersion)); + timeElapsed = std::min(1.0, (double)(nextDispatchVersion - snapshotBeginVersion) / (snapshotScheduledVersionInterval)); else timeElapsed = 1.0; state int countExpectedShardsDone = countAllShards * timeElapsed; state int countShardsToDispatch = std::max(0, countExpectedShardsDone - countShardsDone); + // Calculate the number of shards that would have been dispatched by a normal (on-schedule) BackupSnapshotDispatchTask given + // the dispatch window and the start and expected-end versions of the current snapshot. + int64_t dispatchWindow = nextDispatchVersion - recentReadVersion; + int countShardsExpectedPerNormalWindow = (double(dispatchWindow) / snapshotScheduledVersionInterval) * countAllShards; + // countShardsThisDispatch is how many total shards are to be dispatched by this dispatch cycle. + // Since this dispatch cycle can span many incrementally progressing separate executions of the BackupSnapshotDispatchTask + // instance, this is calculated as the number of shards dispatched so far in the dispatch batch plus the number of shards + // the current execution is going to attempt to do. + int countShardsThisDispatch = countShardsToDispatch + snapshotBatchSize.get(); + // The number of shards 'behind' the snapshot is the count of how may additional shards beyond normal are being dispatched, if any. + int countShardsBehind = std::max(0, countShardsToDispatch + snapshotBatchSize.get() - countShardsExpectedPerNormalWindow); + Params.shardsBehind().set(task, countShardsBehind); + TraceEvent("FileBackupSnapshotDispatchStats") .detail("BackupUID", config.getUid()) .detail("AllShards", countAllShards) @@ -1555,6 +1576,7 @@ namespace fileBackup { .detail("ShardsNotDone", countShardsNotDone) .detail("ExpectedShardsDone", countExpectedShardsDone) .detail("ShardsToDispatch", countShardsToDispatch) + .detail("ShardsBehind", countShardsBehind) .detail("SnapshotBeginVersion", snapshotBeginVersion) .detail("SnapshotTargetEndVersion", snapshotTargetEndVersion) .detail("NextDispatchVersion", nextDispatchVersion) @@ -1627,6 +1649,8 @@ namespace fileBackup { ASSERT(snapshotBatchSize.get() == oldBatchSize); config.snapshotBatchSize().set(tr, newBatchSize); snapshotBatchSize = newBatchSize; + config.snapshotDispatchLastShardsBehind().set(tr, Params.shardsBehind().get(task)); + config.snapshotDispatchLastVersion().set(tr, tr->getReadVersion().get()); } state std::vector> addTaskFutures; @@ -1729,6 +1753,10 @@ namespace fileBackup { config.snapshotBatchDispatchDoneKey().clear(tr); config.snapshotBatchSize().clear(tr); + // Update shardsBehind here again in case the execute phase did not actually have to create any shard tasks + config.snapshotDispatchLastShardsBehind().set(tr, Params.shardsBehind().getOrDefault(task, 0)); + config.snapshotDispatchLastVersion().set(tr, tr->getReadVersion().get()); + state Reference snapshotFinishedFuture = task->getDoneFuture(futureBucket); // If the snapshot is finished, the next task is to write a snapshot manifest, otherwise it's another snapshot dispatch task. From f0024c876ec36bb74ad6f7926b27af4015b513bc Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Mon, 11 Mar 2019 01:42:19 -0700 Subject: [PATCH 13/47] Changed backup paused JSON field name to be more precise and match the human readable status version. --- fdbclient/FileBackupAgent.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp index b6f2f89d9d..20e007afe7 100644 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -3885,7 +3885,7 @@ public: wait( store(paused, tr->get(backupAgent->taskBucket->getPauseKey())) && store(uidAndAbortedFlag, tag.get(tr)) && store(recentReadVersion, tr->getReadVersion()) ); - doc.setKey("AllBackupsPaused", paused.present()); + doc.setKey("BackupAgentsPaused", paused.present()); doc.setKey("Tag", tag.tagName); if(uidAndAbortedFlag.present()) { From adc5553cd93cacc65232a7827c2f4769969b4860 Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Mon, 11 Mar 2019 02:05:01 -0700 Subject: [PATCH 14/47] Updated backup documentation regarding timestamp format. --- documentation/sphinx/source/backups.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/documentation/sphinx/source/backups.rst b/documentation/sphinx/source/backups.rst index f4d95d23e9..826f79a011 100644 --- a/documentation/sphinx/source/backups.rst +++ b/documentation/sphinx/source/backups.rst @@ -307,7 +307,7 @@ The ``expire`` subcommand will remove data from a backup prior to some point in The expiration CUTOFF must be specified by one of the two following arguments: ``--expire_before_timestamp `` - Specifies the expiration cutoff to DATETIME. Requires a cluster file and will use version/timestamp metadata in the database to convert DATETIME to a database commit version. DATETIME must be in the form "YYYY-MM-DD.HH:MI:SS" in UTC. + Specifies the expiration cutoff to DATETIME. Requires a cluster file and will use version/timestamp metadata in the database to convert DATETIME to a database commit version. DATETIME must be in the form "YYYY/MM/DD.HH:MI:SS+hhmm", for example "2018/12/31.23:59:59-0800". ``--expire_before_version `` Specifies the cutoff by a database commit version. @@ -315,7 +315,7 @@ The expiration CUTOFF must be specified by one of the two following arguments: Optionally, the user can specify a minimum RESTORABILITY guarantee with one of the following options. ``--restorable_after_timestamp `` - Specifies that the backup must be restorable to DATETIME and later. Requires a cluster file and will use version/timestamp metadata in the database to convert DATETIME to a database commit version. DATETIME must be in the form "YYYY-MM-DD.HH:MI:SS" in UTC. + Specifies that the backup must be restorable to DATETIME and later. Requires a cluster file and will use version/timestamp metadata in the database to convert DATETIME to a database commit version. DATETIME must be in the form "YYYY/MM/DD.HH:MI:SS+hhmm", for example "2018/12/31.23:59:59-0800". ``--restorable_after_version `` Specifies that the backup must be restorable as of VERSION and later. @@ -419,8 +419,8 @@ The ``start`` command will start a new restore on the specified (or default) tag ``-v `` Instead of the latest version the backup can be restored to, restore to VERSION. -``--timestamp `` - Instead of the latest version the backup can be restored to, restore to a version from approximately the given timestamp. Requires orig_cluster_file to be specified. +``--timestamp `` + Instead of the latest version the backup can be restored to, restore to a version from approximately the given timestamp. Requires orig_cluster_file to be specified. DATETIME must be in the form "YYYY/MM/DD.HH:MI:SS+hhmm", for example "2018/12/31.23:59:59-0800". ``--orig_cluster_file `` The cluster file for the original database from which the backup was created. The original database is only needed to convert a --timestamp argument to a database version. From ad4d2f192b24d87e24312e32d77cc25fe8fb8b95 Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Mon, 11 Mar 2019 02:10:06 -0700 Subject: [PATCH 15/47] Added release note for backup/restore datetime format changes as it breaks compatibility with existing tooling. --- documentation/sphinx/source/release-notes.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst index 43bce080ad..c64525441f 100644 --- a/documentation/sphinx/source/release-notes.rst +++ b/documentation/sphinx/source/release-notes.rst @@ -25,6 +25,7 @@ Fixes ----- * Python: Creating a ``SingleFloat`` for the tuple layer didn't work with integers. `(PR #1216) `_ +* Standardized datetime string format across all backup and restore command options and outputs. `(PR #1248) `_ Status ------ From 9d1f06e2b61d324f17b920b8e100c254d01b11a8 Mon Sep 17 00:00:00 2001 From: Meng Xu <42559636+xumengpanda@users.noreply.github.com> Date: Mon, 11 Mar 2019 16:14:37 -0700 Subject: [PATCH 16/47] Update documentation/sphinx/source/mr-status.rst --- documentation/sphinx/source/mr-status.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentation/sphinx/source/mr-status.rst b/documentation/sphinx/source/mr-status.rst index 65be3cbf8a..07841b6f87 100644 --- a/documentation/sphinx/source/mr-status.rst +++ b/documentation/sphinx/source/mr-status.rst @@ -80,7 +80,7 @@ The following format informally describes the JSON containing the status data. T "connected_clients": [ { "address": "127.0.0.1:1234", - "log_group": "default" + "log_group": "default", "connected_coordinators": 2 } ], From 78ff3d92c1450322cc56627743f657158fcd393c Mon Sep 17 00:00:00 2001 From: Alec Grieser Date: Sun, 17 Feb 2019 10:09:42 -0800 Subject: [PATCH 17/47] memoize the packed Tuple representation --- bindings/java/CMakeLists.txt | 1 + .../tuple/IterableComparator.java | 2 +- .../com/apple/foundationdb/tuple/Tuple.java | 156 ++++-- .../apple/foundationdb/tuple/TupleUtil.java | 458 ++++++++++-------- .../test/TuplePerformanceTest.java | 12 +- 5 files changed, 373 insertions(+), 256 deletions(-) diff --git a/bindings/java/CMakeLists.txt b/bindings/java/CMakeLists.txt index 8a67e8f08a..93e7e7ea8e 100644 --- a/bindings/java/CMakeLists.txt +++ b/bindings/java/CMakeLists.txt @@ -89,6 +89,7 @@ set(JAVA_TESTS_SRCS src/test/com/apple/foundationdb/test/TesterArgs.java src/test/com/apple/foundationdb/test/TestResult.java src/test/com/apple/foundationdb/test/TupleTest.java + src/test/com/apple/foundationdb/test/TuplePerformanceTest.java src/test/com/apple/foundationdb/test/VersionstampSmokeTest.java src/test/com/apple/foundationdb/test/WatchTest.java src/test/com/apple/foundationdb/test/WhileTrueTest.java) diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/IterableComparator.java b/bindings/java/src/main/com/apple/foundationdb/tuple/IterableComparator.java index 71aa23e9b1..1587b3fd6e 100644 --- a/bindings/java/src/main/com/apple/foundationdb/tuple/IterableComparator.java +++ b/bindings/java/src/main/com/apple/foundationdb/tuple/IterableComparator.java @@ -34,7 +34,7 @@ import java.util.Iterator; * tuple1.compareTo(tuple2) * == new IterableComparator().compare(tuple1, tuple2) * == new IterableComparator().compare(tuple1.getItems(), tuple2.getItems()), - * == ByteArrayUtil.compareUnsigned(tuple1.pack(), tuple2.pack())} + * == ByteArrayUtil.compareUnsigned(tuple1.packInternal(), tuple2.packInternal())} * * *

diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java b/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java index 557432d4e3..7b14632452 100644 --- a/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java +++ b/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java @@ -68,10 +68,11 @@ import com.apple.foundationdb.Range; * This class is not thread safe. */ public class Tuple implements Comparable, Iterable { - private static IterableComparator comparator = new IterableComparator(); + private static final IterableComparator comparator = new IterableComparator(); private List elements; private int memoizedHash = 0; + private byte[] packed = null; private Tuple(List elements, Object newItem) { this(elements); @@ -82,6 +83,12 @@ public class Tuple implements Comparable, Iterable { this.elements = new ArrayList<>(elements); } + private enum VersionstampExpectations { + UNKNOWN, + HAS_INCOMPLETE, + HAS_NO_INCOMPLETE + } + /** * Creates a copy of this {@code Tuple} with an appended last element. The parameter * is untyped but only {@link String}, {@code byte[]}, {@link Number}s, {@link UUID}s, @@ -261,7 +268,7 @@ public class Tuple implements Comparable, Iterable { * @return a newly created {@code Tuple} */ public Tuple addAll(List o) { - List merged = new ArrayList(o.size() + this.elements.size()); + List merged = new ArrayList<>(o.size() + this.elements.size()); merged.addAll(this.elements); merged.addAll(o); return new Tuple(merged); @@ -275,7 +282,7 @@ public class Tuple implements Comparable, Iterable { * @return a newly created {@code Tuple} */ public Tuple addAll(Tuple other) { - List merged = new ArrayList(this.size() + other.size()); + List merged = new ArrayList<>(this.size() + other.size()); merged.addAll(this.elements); merged.addAll(other.peekItems()); return new Tuple(merged); @@ -285,10 +292,10 @@ public class Tuple implements Comparable, Iterable { * Get an encoded representation of this {@code Tuple}. Each element is encoded to * {@code byte}s and concatenated. * - * @return a serialized representation of this {@code Tuple}. + * @return a packed representation of this {@code Tuple}. */ public byte[] pack() { - return pack(null); + return packInternal(null, true); } /** @@ -296,11 +303,36 @@ public class Tuple implements Comparable, Iterable { * {@code byte}s and concatenated, and then the prefix supplied is prepended to * the array. * - * @param prefix additional byte-array prefix to prepend to serialized bytes. - * @return a serialized representation of this {@code Tuple} prepended by the {@code prefix}. + * @param prefix additional byte-array prefix to prepend to packed bytes. + * @return a packed representation of this {@code Tuple} prepended by the {@code prefix}. */ public byte[] pack(byte[] prefix) { - return TupleUtil.pack(elements, prefix); + return packInternal(prefix, true); + } + + byte[] packInternal(byte[] prefix, boolean copy) { + boolean hasPrefix = prefix != null && prefix.length > 1; + if(packed == null) { + byte[] result = TupleUtil.pack(elements, prefix); + if(hasPrefix) { + packed = Arrays.copyOfRange(result, prefix.length, result.length); + return result; + } + else { + packed = result; + } + } + if(hasPrefix) { + return ByteArrayUtil.join(prefix, packed); + } + else { + if(copy) { + return Arrays.copyOf(packed, packed.length); + } + else { + return packed; + } + } } /** @@ -309,7 +341,7 @@ public class Tuple implements Comparable, Iterable { * This works the same as the {@link #packWithVersionstamp(byte[]) one-paramter version of this method}, * but it does not add any prefix to the array. * - * @return a serialized representation of this {@code Tuple} for use with versionstamp ops. + * @return a packed representation of this {@code Tuple} for use with versionstamp ops. * @throws IllegalArgumentException if there is not exactly one incomplete {@link Versionstamp} included in this {@code Tuple} */ public byte[] packWithVersionstamp() { @@ -322,28 +354,71 @@ public class Tuple implements Comparable, Iterable { * There must be exactly one incomplete {@link Versionstamp} instance within this * {@code Tuple} or this will throw an {@link IllegalArgumentException}. * Each element is encoded to {@code byte}s and concatenated, the prefix - * is then prepended to the array, and then the index of the serialized incomplete + * is then prepended to the array, and then the index of the packed incomplete * {@link Versionstamp} is appended as a little-endian integer. This can then be passed * as the key to * {@link com.apple.foundationdb.Transaction#mutate(com.apple.foundationdb.MutationType, byte[], byte[]) Transaction.mutate()} * with the {@code SET_VERSIONSTAMPED_KEY} {@link com.apple.foundationdb.MutationType}, and the transaction's * version will then be filled in at commit time. * - * @param prefix additional byte-array prefix to prepend to serialized bytes. - * @return a serialized representation of this {@code Tuple} for use with versionstamp ops. + * @param prefix additional byte-array prefix to prepend to packed bytes. + * @return a packed representation of this {@code Tuple} for use with versionstamp ops. * @throws IllegalArgumentException if there is not exactly one incomplete {@link Versionstamp} included in this {@code Tuple} */ public byte[] packWithVersionstamp(byte[] prefix) { return TupleUtil.packWithVersionstamp(elements, prefix); } + byte[] packWithVersionstampInternal(byte[] prefix, boolean copy) { + boolean hasPrefix = prefix != null && prefix.length > 0; + if(packed == null) { + byte[] result = TupleUtil.packWithVersionstamp(elements, prefix); + if(hasPrefix) { + byte[] withoutPrefix = Arrays.copyOfRange(result, prefix.length, result.length); + TupleUtil.adjustVersionPosition(packed, -1 * prefix.length); + packed = withoutPrefix; + return result; + } + else { + packed = result; + } + } + if(hasPrefix) { + byte[] withPrefix = ByteArrayUtil.join(prefix, packed); + TupleUtil.adjustVersionPosition(withPrefix, prefix.length); + return withPrefix; + } + else { + if(copy) { + return Arrays.copyOf(packed, packed.length); + } + else { + return packed; + } + } + } + + byte[] packMaybeVersionstamp(byte[] prefix) { + if(packed == null) { + if(hasIncompleteVersionstamp()) { + return packWithVersionstampInternal(prefix, false); + } + else { + return packInternal(prefix, false); + } + } + else { + return packed; + } + } + /** * Gets the unserialized contents of this {@code Tuple}. * * @return the elements that make up this {@code Tuple}. */ public List getItems() { - return new ArrayList(elements); + return new ArrayList<>(elements); } /** @@ -385,7 +460,7 @@ public class Tuple implements Comparable, Iterable { * @see #fromItems(Iterable) */ public Tuple() { - this.elements = new LinkedList(); + this.elements = new LinkedList<>(); } /** @@ -413,6 +488,7 @@ public class Tuple implements Comparable, Iterable { public static Tuple fromBytes(byte[] bytes, int offset, int length) { Tuple t = new Tuple(); t.elements = TupleUtil.unpack(bytes, offset, length); + t.packed = Arrays.copyOfRange(bytes, offset, offset + length); return t; } @@ -623,13 +699,14 @@ public class Tuple implements Comparable, Iterable { Object o = this.elements.get(index); if(o == null) { return null; - } else if(o instanceof Tuple) { + } + else if(o instanceof Tuple) { return ((Tuple)o).getItems(); - } else if(o instanceof List) { - List ret = new LinkedList(); - ret.addAll((List)o); - return ret; - } else { + } + else if(o instanceof List) { + return new ArrayList<>((List) o); + } + else { throw new ClassCastException("Cannot convert item of type " + o.getClass() + " to list"); } } @@ -678,11 +755,10 @@ public class Tuple implements Comparable, Iterable { * @throws IllegalStateException if this {@code Tuple} is empty */ public Tuple popFront() { - if(elements.size() == 0) + if(elements.isEmpty()) throw new IllegalStateException("Tuple contains no elements"); - - List items = new ArrayList(elements.size() - 1); + List items = new ArrayList<>(elements.size() - 1); for(int i = 1; i < this.elements.size(); i++) { items.add(this.elements.get(i)); } @@ -697,11 +773,10 @@ public class Tuple implements Comparable, Iterable { * @throws IllegalStateException if this {@code Tuple} is empty */ public Tuple popBack() { - if(elements.size() == 0) + if(elements.isEmpty()) throw new IllegalStateException("Tuple contains no elements"); - - List items = new ArrayList(elements.size() - 1); + List items = new ArrayList<>(elements.size() - 1); for(int i = 0; i < this.elements.size() - 1; i++) { items.add(this.elements.get(i)); } @@ -718,12 +793,18 @@ public class Tuple implements Comparable, Iterable { * Tuple t = Tuple.from("a", "b"); * Range r = t.range(); * {@code r} includes all tuples ("a", "b", ...) + *
+ * This function will throw an error if this {@code Tuple} contains an incomplete + * {@link Versionstamp}. * * @return the range of keys containing all {@code Tuple}s that have this {@code Tuple} * as a prefix */ public Range range() { - byte[] p = pack(); + if(hasIncompleteVersionstamp()) { + throw new IllegalStateException("Tuple with incomplete versionstamp used for range"); + } + byte[] p = packInternal(null, false); //System.out.println("Packed tuple is: " + ByteArrayUtil.printable(p)); return new Range(ByteArrayUtil.join(p, new byte[] {0x0}), ByteArrayUtil.join(p, new byte[] {(byte)0xff})); @@ -742,6 +823,16 @@ public class Tuple implements Comparable, Iterable { return TupleUtil.hasIncompleteVersionstamp(stream()); } + /** + * Get the number of bytes in the packed representation of this {@code Tuple}. + * + * @return + */ + public int getPackedSize() { + byte[] p = packMaybeVersionstamp(null); + return p.length; + } + /** * Compare the byte-array representation of this {@code Tuple} against another. This method * will sort {@code Tuple}s in the same order that they would be sorted as keys in @@ -772,14 +863,7 @@ public class Tuple implements Comparable, Iterable { @Override public int hashCode() { if(memoizedHash == 0) { - byte[] packed; - if(hasIncompleteVersionstamp()) { - packed = packWithVersionstamp(null); - } - else { - packed = pack(); - } - memoizedHash = Arrays.hashCode(packed); + memoizedHash = Arrays.hashCode(packMaybeVersionstamp(null)); } return memoizedHash; } @@ -1011,7 +1095,7 @@ public class Tuple implements Comparable, Iterable { } private static Tuple createTuple(int items) { - List elements = new ArrayList(items); + List elements = new ArrayList<>(items); for(int i = 0; i < items; i++) { elements.add(new byte[]{99}); } diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java index cf1d337f2e..f25828f47d 100644 --- a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java +++ b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java @@ -28,7 +28,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; -import java.util.LinkedList; import java.util.List; import java.util.UUID; import java.util.stream.Stream; @@ -73,22 +72,45 @@ class TupleUtil { } static class DecodeResult { - final int end; - final Object o; + final List values; + int end; - DecodeResult(int pos, Object o) { - this.end = pos; - this.o = o; + DecodeResult() { + values = new ArrayList<>(); + end = 0; + } + + void add(Object value, int end) { + values.add(value); + this.end = end; } } static class EncodeResult { - final int totalLength; - final int versionPos; + final List encodedValues; + int totalLength; + int versionPos; - EncodeResult(int totalLength, int versionPos) { - this.totalLength = totalLength; + EncodeResult(int capacity) { + this.encodedValues = new ArrayList<>(capacity); + totalLength = 0; + versionPos = -1; + } + + EncodeResult add(byte[] encoded, int versionPos) { + if(versionPos >= 0 && this.versionPos >= 0) { + throw new IllegalArgumentException("Multiple incomplete Versionstamps included in Tuple"); + } + encodedValues.add(encoded); + totalLength += encoded.length; this.versionPos = versionPos; + return this; + } + + EncodeResult add(byte[] encoded) { + encodedValues.add(encoded); + totalLength += encoded.length; + return this; } } @@ -129,10 +151,44 @@ class TupleUtil { return bytes; } - public static byte[] join(List items) { + static byte[] join(List items) { return ByteArrayUtil.join(null, items); } + private static void adjustVersionPosition300(byte[] packed, int delta) { + int offsetOffset = packed.length - Short.BYTES; + ByteBuffer buffer = ByteBuffer.wrap(packed, offsetOffset, Short.BYTES).order(ByteOrder.LITTLE_ENDIAN); + int versionPosition = buffer.getShort() + delta; + if(versionPosition > 0xffff) { + throw new IllegalArgumentException("Tuple has incomplete version at position " + versionPosition + " which is greater than the maximum " + 0xffff); + } + if(versionPosition < 0) { + throw new IllegalArgumentException("Tuple has an incomplete version at a negative position"); + } + buffer.position(offsetOffset); + buffer.putShort((short)versionPosition); + } + + private static void adjustVersionPosition520(byte[] packed, int delta) { + int offsetOffset = packed.length - Integer.BYTES; + ByteBuffer buffer = ByteBuffer.wrap(packed, offsetOffset, Integer.BYTES).order(ByteOrder.LITTLE_ENDIAN); + int versionPosition = buffer.getInt() + delta; + if(versionPosition < 0) { + throw new IllegalArgumentException("Tuple has an incomplete version at a negative position"); + } + buffer.position(offsetOffset); + buffer.putInt(versionPosition); + } + + static void adjustVersionPosition(byte[] packed, int delta) { + if(FDB.instance().getAPIVersion() < 520) { + adjustVersionPosition300(packed, delta); + } + else { + adjustVersionPosition520(packed, delta); + } + } + static int getCodeFor(Object o) { if(o == null) return nil; @@ -159,71 +215,60 @@ class TupleUtil { throw new IllegalArgumentException("Unsupported data type: " + o.getClass().getName()); } - static EncodeResult encode(Object t, boolean nested, List encoded) { + static void encode(EncodeResult result, Object t, boolean nested) { if(t == null) { if(nested) { - encoded.add(NULL_ESCAPED_ARR); - return new EncodeResult(NULL_ESCAPED_ARR.length, -1); + result.add(NULL_ESCAPED_ARR); } else { - encoded.add(NULL_ARR); - return new EncodeResult(NULL_ARR.length, -1); + result.add(NULL_ARR); } } - if(t instanceof byte[]) - return encode((byte[]) t, encoded); - if(t instanceof String) - return encode((String)t, encoded); - if(t instanceof BigInteger) - return encode((BigInteger)t, encoded); - if(t instanceof Float) - return encode((Float)t, encoded); - if(t instanceof Double) - return encode((Double)t, encoded); - if(t instanceof Boolean) - return encode((Boolean)t, encoded); - if(t instanceof UUID) - return encode((UUID)t, encoded); - if(t instanceof Number) - return encode(((Number)t).longValue(), encoded); - if(t instanceof Versionstamp) - return encode((Versionstamp)t, encoded); - if(t instanceof List) - return encode((List)t, encoded); - if(t instanceof Tuple) - return encode(((Tuple)t).getItems(), encoded); - throw new IllegalArgumentException("Unsupported data type: " + t.getClass().getName()); + else if(t instanceof byte[]) + encode(result, (byte[]) t); + else if(t instanceof String) + encode(result, (String)t); + else if(t instanceof BigInteger) + encode(result, (BigInteger)t); + else if(t instanceof Float) + encode(result, (Float)t); + else if(t instanceof Double) + encode(result, (Double)t); + else if(t instanceof Boolean) + encode(result, (Boolean)t); + else if(t instanceof UUID) + encode(result, (UUID)t); + else if(t instanceof Number) + encode(result, ((Number)t).longValue()); + else if(t instanceof Versionstamp) + encode(result, (Versionstamp)t); + else if(t instanceof List) + encode(result, (List)t); + else if(t instanceof Tuple) + encode(result, ((Tuple)t).getItems()); + else + throw new IllegalArgumentException("Unsupported data type: " + t.getClass().getName()); } - static EncodeResult encode(Object t, List encoded) { - return encode(t, false, encoded); + static void encode(EncodeResult result, Object t) { + encode(result, t, false); } - static EncodeResult encode(byte[] bytes, List encoded) { - encoded.add(BYTES_ARR); + static void encode(EncodeResult result, byte[] bytes) { byte[] escaped = ByteArrayUtil.replace(bytes, NULL_ARR, NULL_ESCAPED_ARR); - encoded.add(escaped); - encoded.add(new byte[] {nil}); - - //System.out.println("Joining bytes..."); - return new EncodeResult(2 + escaped.length,-1); + result.add(BYTES_ARR).add(escaped).add(NULL_ARR); } - static EncodeResult encode(String s, List encoded) { - encoded.add(STRING_ARR); + static void encode(EncodeResult result, String s) { byte[] escaped = ByteArrayUtil.replace(s.getBytes(UTF8), NULL_ARR, NULL_ESCAPED_ARR); - encoded.add(escaped); - encoded.add(NULL_ARR); - - //System.out.println("Joining string..."); - return new EncodeResult(2 + escaped.length, -1); + result.add(STRING_ARR).add(escaped).add(NULL_ARR); } - static EncodeResult encode(BigInteger i, List encoded) { + static void encode(EncodeResult result, BigInteger i) { //System.out.println("Encoding integral " + i); if(i.equals(BigInteger.ZERO)) { - encoded.add(new byte[]{INT_ZERO_CODE}); - return new EncodeResult(1,-1); + result.add(new byte[]{INT_ZERO_CODE}); + return; } byte[] bytes = i.toByteArray(); if(i.compareTo(BigInteger.ZERO) > 0) { @@ -232,177 +277,171 @@ class TupleUtil { if(length > 0xff) { throw new IllegalArgumentException("BigInteger magnitude is too large (more than 255 bytes)"); } - byte[] result = new byte[length + 2]; - result[0] = POS_INT_END; - result[1] = (byte)(length); - System.arraycopy(bytes, bytes.length - length, result, 2, length); - encoded.add(result); - return new EncodeResult(result.length, -1); + byte[] intBytes = new byte[length + 2]; + intBytes[0] = POS_INT_END; + intBytes[1] = (byte)(length); + System.arraycopy(bytes, bytes.length - length, intBytes, 2, length); + result.add(intBytes); } - int n = ByteArrayUtil.bisectLeft(size_limits, i); - assert n <= size_limits.length; - //byte[] bytes = ByteBuffer.allocate(8).order(ByteOrder.BIG_ENDIAN).putLong(i).array(); - //System.out.println(" -- integral has 'n' of " + n + " and output bytes of " + bytes.length); - byte[] result = new byte[n+1]; - result[0] = (byte)(INT_ZERO_CODE + n); - System.arraycopy(bytes, bytes.length - n, result, 1, n); - encoded.add(result); - return new EncodeResult(result.length, -1); - } - if(i.negate().compareTo(size_limits[size_limits.length-1]) > 0) { - int length = byteLength(i.negate().toByteArray()); - if(length > 0xff) { - throw new IllegalArgumentException("BigInteger magnitude is too large (more than 255 bytes)"); + else { + int n = ByteArrayUtil.bisectLeft(size_limits, i); + assert n <= size_limits.length; + //byte[] bytes = ByteBuffer.allocate(8).order(ByteOrder.BIG_ENDIAN).putLong(i).array(); + //System.out.println(" -- integral has 'n' of " + n + " and output bytes of " + bytes.length); + byte[] intBytes = new byte[n + 1]; + intBytes[0] = (byte) (INT_ZERO_CODE + n); + System.arraycopy(bytes, bytes.length - n, intBytes, 1, n); + result.add(intBytes); } - BigInteger offset = BigInteger.ONE.shiftLeft(length*8).subtract(BigInteger.ONE); - byte[] adjusted = i.add(offset).toByteArray(); - byte[] result = new byte[length + 2]; - result[0] = NEG_INT_START; - result[1] = (byte)(length ^ 0xff); - if(adjusted.length >= length) { - System.arraycopy(adjusted, adjusted.length - length, result, 2, length); - } else { - Arrays.fill(result, 2, result.length - adjusted.length, (byte)0x00); - System.arraycopy(adjusted, 0, result, result.length - adjusted.length, adjusted.length); + } + else { + if(i.negate().compareTo(size_limits[size_limits.length - 1]) > 0) { + int length = byteLength(i.negate().toByteArray()); + if (length > 0xff) { + throw new IllegalArgumentException("BigInteger magnitude is too large (more than 255 bytes)"); + } + BigInteger offset = BigInteger.ONE.shiftLeft(length * 8).subtract(BigInteger.ONE); + byte[] adjusted = i.add(offset).toByteArray(); + byte[] intBytes = new byte[length + 2]; + intBytes[0] = NEG_INT_START; + intBytes[1] = (byte) (length ^ 0xff); + if (adjusted.length >= length) { + System.arraycopy(adjusted, adjusted.length - length, intBytes, 2, length); + } else { + Arrays.fill(intBytes, 2, intBytes.length - adjusted.length, (byte) 0x00); + System.arraycopy(adjusted, 0, intBytes, intBytes.length - adjusted.length, adjusted.length); + } + result.add(intBytes); + } + else { + int n = ByteArrayUtil.bisectLeft(size_limits, i.negate()); + + assert n >= 0 && n < size_limits.length; // can we do this? it seems to be required for the following statement + + long maxv = size_limits[n].add(i).longValue(); + byte[] adjustedBytes = ByteBuffer.allocate(8).order(ByteOrder.BIG_ENDIAN).putLong(maxv).array(); + byte[] intBytes = new byte[n + 1]; + intBytes[0] = (byte) (20 - n); + System.arraycopy(adjustedBytes, adjustedBytes.length - n, intBytes, 1, n); + result.add(intBytes); } - encoded.add(result); - return new EncodeResult(result.length, -1); } - int n = ByteArrayUtil.bisectLeft(size_limits, i.negate()); - - assert n >= 0 && n < size_limits.length; // can we do this? it seems to be required for the following statement - - long maxv = size_limits[n].add(i).longValue(); - byte[] adjustedBytes = ByteBuffer.allocate(8).order(ByteOrder.BIG_ENDIAN).putLong(maxv).array(); - byte[] result = new byte[n+1]; - result[0] = (byte)(20 - n); - System.arraycopy(adjustedBytes, adjustedBytes.length - n, result, 1, n); - encoded.add(result); - return new EncodeResult(result.length, -1); } - static EncodeResult encode(Integer i, List encoded) { - return encode(i.longValue(), encoded); + static void encode(EncodeResult result, Integer i) { + encode(result, i.longValue()); } - static EncodeResult encode(long i, List encoded) { - return encode(BigInteger.valueOf(i), encoded); + static void encode(EncodeResult result, long i) { + encode(result, BigInteger.valueOf(i)); } - static EncodeResult encode(Float f, List encoded) { - byte[] result = ByteBuffer.allocate(5).order(ByteOrder.BIG_ENDIAN).put(FLOAT_CODE).putFloat(f).array(); - floatingPointCoding(result, 1, true); - encoded.add(result); - return new EncodeResult(result.length, -1); + static void encode(EncodeResult result, Float f) { + byte[] floatBytes = ByteBuffer.allocate(5).order(ByteOrder.BIG_ENDIAN).put(FLOAT_CODE).putFloat(f).array(); + floatingPointCoding(floatBytes, 1, true); + result.add(floatBytes); } - static EncodeResult encode(Double d, List encoded) { - byte[] result = ByteBuffer.allocate(9).order(ByteOrder.BIG_ENDIAN).put(DOUBLE_CODE).putDouble(d).array(); - floatingPointCoding(result, 1, true); - encoded.add(result); - return new EncodeResult(result.length, -1); + static void encode(EncodeResult result, Double d) { + byte[] doubleBytes = ByteBuffer.allocate(9).order(ByteOrder.BIG_ENDIAN).put(DOUBLE_CODE).putDouble(d).array(); + floatingPointCoding(doubleBytes, 1, true); + result.add(doubleBytes); } - static EncodeResult encode(Boolean b, List encoded) { - if (b) { - encoded.add(TRUE_ARR); - } else { - encoded.add(FALSE_ARR); + static void encode(EncodeResult result, Boolean b) { + if(b) { + result.add(TRUE_ARR); + } + else { + result.add(FALSE_ARR); } - return new EncodeResult(1, -1); } - static EncodeResult encode(UUID uuid, List encoded) { - byte[] result = ByteBuffer.allocate(17).put(UUID_CODE).order(ByteOrder.BIG_ENDIAN) + static void encode(EncodeResult result, UUID uuid) { + byte[] uuidBytes = ByteBuffer.allocate(17).put(UUID_CODE).order(ByteOrder.BIG_ENDIAN) .putLong(uuid.getMostSignificantBits()).putLong(uuid.getLeastSignificantBits()) .array(); - encoded.add(result); - return new EncodeResult(result.length, -1); + result.add(uuidBytes); } - static EncodeResult encode(Versionstamp v, List encoded) { - encoded.add(VERSIONSTAMP_ARR); - encoded.add(v.getBytes()); - return new EncodeResult(1 + Versionstamp.LENGTH, (v.isComplete() ? -1 : 1)); - } - - static EncodeResult encode(List value, List encoded) { - int lenSoFar = 0; - int versionPos = -1; - encoded.add(NESTED_ARR); - for(Object t : value) { - EncodeResult childResult = encode(t, true, encoded); - if(childResult.versionPos > 0) { - if(versionPos > 0) { - throw new IllegalArgumentException("Multiple incomplete Versionstamps included in Tuple"); - } - versionPos = lenSoFar + childResult.versionPos; - } - lenSoFar += childResult.totalLength; + static void encode(EncodeResult result, Versionstamp v) { + result.add(VERSIONSTAMP_ARR); + if(v.isComplete()) { + result.add(v.getBytes()); + } + else { + result.add(v.getBytes(), result.totalLength); } - encoded.add(NULL_ARR); - return new EncodeResult(lenSoFar + 2, (versionPos < 0 ? -1 : versionPos + 1)); } - static DecodeResult decode(byte[] rep, int pos, int last) { + static void encode(EncodeResult result, List value) { + result.add(NESTED_ARR); + for(Object t : value) { + encode(result, t, true); + } + result.add(NULL_ARR); + } + + static void decode(DecodeResult result, byte[] rep, int pos, int last) { //System.out.println("Decoding '" + ArrayUtils.printable(rep) + "' at " + pos); // SOMEDAY: codes over 127 will be a problem with the signed Java byte mess int code = rep[pos]; int start = pos + 1; if(code == nil) { - return new DecodeResult(start, null); + result.add(null, start); } - if(code == BYTES_CODE) { + else if(code == BYTES_CODE) { int end = ByteArrayUtil.findTerminator(rep, (byte)0x0, (byte)0xff, start, last); //System.out.println("End of byte string: " + end); byte[] range = ByteArrayUtil.replace(rep, start, end - start, NULL_ESCAPED_ARR, new byte[] { nil }); //System.out.println(" -> byte string contents: '" + ArrayUtils.printable(range) + "'"); - return new DecodeResult(end + 1, range); + result.add(range, end + 1); } - if(code == STRING_CODE) { + else if(code == STRING_CODE) { int end = ByteArrayUtil.findTerminator(rep, (byte)0x0, (byte)0xff, start, last); //System.out.println("End of UTF8 string: " + end); byte[] stringBytes = ByteArrayUtil.replace(rep, start, end - start, NULL_ESCAPED_ARR, new byte[] { nil }); String str = new String(stringBytes, UTF8); //System.out.println(" -> UTF8 string contents: '" + str + "'"); - return new DecodeResult(end + 1, str); + result.add(str, end + 1); } - if(code == FLOAT_CODE) { + else if(code == FLOAT_CODE) { byte[] resBytes = Arrays.copyOfRange(rep, start, start+4); floatingPointCoding(resBytes, 0, false); float res = ByteBuffer.wrap(resBytes).order(ByteOrder.BIG_ENDIAN).getFloat(); - return new DecodeResult(start + 4, res); + result.add(res, start + Float.BYTES); } - if(code == DOUBLE_CODE) { + else if(code == DOUBLE_CODE) { byte[] resBytes = Arrays.copyOfRange(rep, start, start+8); floatingPointCoding(resBytes, 0, false); double res = ByteBuffer.wrap(resBytes).order(ByteOrder.BIG_ENDIAN).getDouble(); - return new DecodeResult(start + 8, res); + result.add(res, start + Double.BYTES); } - if(code == FALSE_CODE) { - return new DecodeResult(start, false); + else if(code == FALSE_CODE) { + result.add(false, start); } - if(code == TRUE_CODE) { - return new DecodeResult(start, true); + else if(code == TRUE_CODE) { + result.add(true, start); } - if(code == UUID_CODE) { + else if(code == UUID_CODE) { ByteBuffer bb = ByteBuffer.wrap(rep, start, 16).order(ByteOrder.BIG_ENDIAN); long msb = bb.getLong(); long lsb = bb.getLong(); - return new DecodeResult(start + 16, new UUID(msb, lsb)); + result.add(new UUID(msb, lsb), start + 16); } - if(code == POS_INT_END) { + else if(code == POS_INT_END) { int n = rep[start] & 0xff; - return new DecodeResult(start + n + 1, new BigInteger(ByteArrayUtil.join(new byte[]{0x00}, Arrays.copyOfRange(rep, start+1, start+n+1)))); + BigInteger res = new BigInteger(ByteArrayUtil.join(new byte[]{0x00}, Arrays.copyOfRange(rep, start+1, start+n+1))); + result.add(res, start + n + 1); } - if(code == NEG_INT_START) { + else if(code == NEG_INT_START) { int n = (rep[start] ^ 0xff) & 0xff; BigInteger origValue = new BigInteger(ByteArrayUtil.join(new byte[]{0x00}, Arrays.copyOfRange(rep, start+1, start+n+1))); BigInteger offset = BigInteger.ONE.shiftLeft(n*8).subtract(BigInteger.ONE); - return new DecodeResult(start + n + 1, origValue.subtract(offset)); + result.add(origValue.subtract(offset), start + n + 1); } - if(code > NEG_INT_START && code < POS_INT_END) { + else if(code > NEG_INT_START && code < POS_INT_END) { // decode a long byte[] longBytes = new byte[9]; boolean upper = code >= INT_ZERO_CODE; @@ -426,36 +465,37 @@ class TupleUtil { val.compareTo(BigInteger.valueOf(Long.MAX_VALUE))>0) { // This can occur if the thing can be represented with 8 bytes but not // the right sign information. - return new DecodeResult(end, val); + result.add(val, end); + } else { + result.add(val.longValue(), end); } - return new DecodeResult(end, val.longValue()); } - if(code == VERSIONSTAMP_CODE) { - return new DecodeResult( - start + Versionstamp.LENGTH, - Versionstamp.fromBytes(Arrays.copyOfRange(rep, start, start + Versionstamp.LENGTH))); + else if(code == VERSIONSTAMP_CODE) { + Versionstamp val = Versionstamp.fromBytes(Arrays.copyOfRange(rep, start, start + Versionstamp.LENGTH)); + result.add(val, start + Versionstamp.LENGTH); } - if(code == NESTED_CODE) { - List items = new LinkedList(); + else if(code == NESTED_CODE) { + DecodeResult subResult = new DecodeResult(); int endPos = start; while(endPos < rep.length) { if(rep[endPos] == nil) { if(endPos + 1 < rep.length && rep[endPos+1] == (byte)0xff) { - items.add(null); + subResult.add(null, endPos + 2); endPos += 2; } else { endPos += 1; break; } } else { - DecodeResult subResult = decode(rep, endPos, last); - items.add(subResult.o); + decode(subResult, rep, endPos, last); endPos = subResult.end; } } - return new DecodeResult(endPos, items); + result.add(subResult.values, endPos); + } + else { + throw new IllegalArgumentException("Unknown tuple data type " + code + " at index " + pos); } - throw new IllegalArgumentException("Unknown tuple data type " + code + " at index " + pos); } static int compareSignedBigEndian(byte[] arr1, byte[] arr2) { @@ -539,62 +579,51 @@ class TupleUtil { } static List unpack(byte[] bytes, int start, int length) { - List items = new LinkedList<>(); + DecodeResult decodeResult = new DecodeResult(); int pos = start; int end = start + length; while(pos < end) { - DecodeResult decoded = decode(bytes, pos, end); - items.add(decoded.o); - pos = decoded.end; + decode(decodeResult, bytes, pos, end); + pos = decodeResult.end; } - return items; + return decodeResult.values; } - static EncodeResult encodeAll(List items, byte[] prefix, List encoded) { + static void encodeAll(EncodeResult result, List items, byte[] prefix) { if(prefix != null) { - encoded.add(prefix); + result.add(prefix); } - int lenSoFar = (prefix == null) ? 0 : prefix.length; - int versionPos = -1; for(Object t : items) { - EncodeResult result = encode(t, encoded); - if(result.versionPos > 0) { - if(versionPos > 0) { - throw new IllegalArgumentException("Multiple incomplete Versionstamps included in Tuple"); - } - versionPos = result.versionPos + lenSoFar; - } - lenSoFar += result.totalLength; + encode(result, t); } //System.out.println("Joining whole tuple..."); - return new EncodeResult(lenSoFar, versionPos); } static byte[] pack(List items, byte[] prefix) { - List encoded = new ArrayList<>(2 * items.size() + (prefix == null ? 0 : 1)); - EncodeResult result = encodeAll(items, prefix, encoded); - if(result.versionPos > 0) { - throw new IllegalArgumentException("Incomplete Versionstamp included in vanilla tuple pack"); + EncodeResult result = new EncodeResult(2 * items.size() + (prefix == null ? 0 : 1)); + encodeAll(result, items, prefix); + if(result.versionPos >= 0) { + throw new IllegalArgumentException("Incomplete Versionstamp included in vanilla tuple packInternal"); } else { - return ByteArrayUtil.join(null, encoded); + return ByteArrayUtil.join(null, result.encodedValues); } } static byte[] packWithVersionstamp(List items, byte[] prefix) { - List encoded = new ArrayList<>(2 * items.size() + (prefix == null ? 1 : 2)); - EncodeResult result = encodeAll(items, prefix, encoded); + EncodeResult result = new EncodeResult(2 * items.size() + (prefix == null ? 1 : 2)); + encodeAll(result, items, prefix); if(result.versionPos < 0) { - throw new IllegalArgumentException("No incomplete Versionstamp included in tuple pack with versionstamp"); + throw new IllegalArgumentException("No incomplete Versionstamp included in tuple packInternal with versionstamp"); } else { if(result.versionPos > 0xffff) { throw new IllegalArgumentException("Tuple has incomplete version at position " + result.versionPos + " which is greater than the maximum " + 0xffff); } if (FDB.instance().getAPIVersion() < 520) { - encoded.add(ByteBuffer.allocate(2).order(ByteOrder.LITTLE_ENDIAN).putShort((short)result.versionPos).array()); + result.add(ByteBuffer.allocate(Short.BYTES).order(ByteOrder.LITTLE_ENDIAN).putShort((short)result.versionPos).array()); } else { - encoded.add(ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN).putInt(result.versionPos).array()); + result.add(ByteBuffer.allocate(Integer.BYTES).order(ByteOrder.LITTLE_ENDIAN).putInt(result.versionPos).array()); } - return ByteArrayUtil.join(null, encoded); + return ByteArrayUtil.join(null, result.encodedValues); } } @@ -617,7 +646,10 @@ class TupleUtil { public static void main(String[] args) { try { byte[] bytes = pack(Collections.singletonList(4), null); - assert 4 == (Integer)(decode(bytes, 0, bytes.length).o); + DecodeResult result = new DecodeResult(); + decode(result, bytes, 0, bytes.length); + int val = (int)result.values.get(0); + assert 4 == val; } catch (Exception e) { e.printStackTrace(); System.out.println("Error " + e.getMessage()); @@ -625,7 +657,9 @@ class TupleUtil { try { byte[] bytes = pack(Collections.singletonList("\u021Aest \u0218tring"), null); - String string = (String)(decode(bytes, 0, bytes.length).o); + DecodeResult result = new DecodeResult(); + decode(result, bytes, 0, bytes.length); + String string = (String)result.values.get(0); System.out.println("contents -> " + string); assert "\u021Aest \u0218tring".equals(string); } catch (Exception e) { @@ -635,7 +669,7 @@ class TupleUtil { /*Object[] a = new Object[] { "\u0000a", -2, "b\u0001", 12345, ""}; List o = Arrays.asList(a); - byte[] packed = pack( o, null ); + byte[] packed = packInternal( o, null ); System.out.println("packed length: " + packed.length); o = unpack( packed, 0, packed.length ); System.out.println("unpacked elements: " + o); diff --git a/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java b/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java index df9ccf6d45..dada5131d8 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java @@ -25,17 +25,15 @@ public class TuplePerformanceTest { public Tuple createTuple(int length) { List values = new ArrayList<>(length); - for(int i = 0; i < length; i++) { + for (int i = 0; i < length; i++) { double choice = r.nextDouble(); - if(choice < 0.1) { + if (choice < 0.1) { values.add(null); - } - else if(choice < 0.2) { + } else if (choice < 0.2) { byte[] bytes = new byte[r.nextInt(20)]; r.nextBytes(bytes); values.add(bytes); - } - else if(choice < 0.3) { + } else if (choice < 0.3) { char[] chars = new char[r.nextInt(20)]; for (int j = 0; j < chars.length; j++) { chars[j] = (char)('a' + r.nextInt(26)); @@ -171,7 +169,7 @@ public class TuplePerformanceTest { } public static void main(String[] args) { - TuplePerformanceTest tester = new TuplePerformanceTest(new Random(), 100_000, 10_000); + TuplePerformanceTest tester = new TuplePerformanceTest(new Random(), 100_000, 10_000_000); tester.run(); } } From e6ce0ebd2717c1223c0f2aac9e37581e14a14516 Mon Sep 17 00:00:00 2001 From: Alec Grieser Date: Sun, 24 Feb 2019 20:49:10 -0800 Subject: [PATCH 18/47] improve tuple performance tester for more types and add serialization check in TupleTest --- .../test/TuplePerformanceTest.java | 76 ++++++++++++-- .../apple/foundationdb/test/TupleTest.java | 98 ++++++++++++++++++- 2 files changed, 162 insertions(+), 12 deletions(-) diff --git a/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java b/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java index dada5131d8..cf79ff41a9 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java @@ -13,30 +13,40 @@ import com.apple.foundationdb.tuple.Versionstamp; public class TuplePerformanceTest { + private enum GeneratedTypes { + ALL, + LONG, + FLOATING_POINT + } + private final Random r; private final int ignoreIterations; private final int iterations; + private final GeneratedTypes generatedTypes; - public TuplePerformanceTest(Random r, int ignoreIterations, int iterations) { + public TuplePerformanceTest(Random r, int ignoreIterations, int iterations, GeneratedTypes generatedTypes) { this.r = r; this.ignoreIterations = ignoreIterations; this.iterations = iterations; + this.generatedTypes = generatedTypes; } - public Tuple createTuple(int length) { + public Tuple createMultiTypeTuple(int length) { List values = new ArrayList<>(length); - for (int i = 0; i < length; i++) { + for(int i = 0; i < length; i++) { double choice = r.nextDouble(); - if (choice < 0.1) { + if(choice < 0.1) { values.add(null); - } else if (choice < 0.2) { + } + else if(choice < 0.2) { byte[] bytes = new byte[r.nextInt(20)]; r.nextBytes(bytes); values.add(bytes); - } else if (choice < 0.3) { + } + else if(choice < 0.3) { char[] chars = new char[r.nextInt(20)]; for (int j = 0; j < chars.length; j++) { - chars[j] = (char)('a' + r.nextInt(26)); + chars[j] = (char) ('a' + r.nextInt(26)); } values.add(new String(chars)); } @@ -67,7 +77,55 @@ public class TuplePerformanceTest { values.add(nested); } } - return Tuple.from(values); + return Tuple.fromItems(values); + } + + public Tuple createLongsTuple(int length) { + List values = new ArrayList<>(length); + for(int i = 0; i < length; i++) { + int byteLength = r.nextInt(Long.BYTES + 1); + long val = 0L; + for(int x = 0; x < byteLength; x++) { + int nextBytes = r.nextInt(256); + val = (val << 8) + nextBytes; + } + values.add(val); + } + return Tuple.fromItems(values); + } + + public Tuple createFloatingPointTuple(int length) { + List values = new ArrayList<>(length); + for(int i = 0; i < length; i++) { + double choice = r.nextDouble(); + if(choice < 0.40) { + values.add(r.nextFloat()); + } + else if(choice < 0.80) { + values.add(r.nextDouble()); + } + // These last two are more likely to produce NaN values + else if(choice < 0.90) { + values.add(Float.intBitsToFloat(r.nextInt())); + } + else { + values.add(Double.longBitsToDouble(r.nextLong())); + } + } + return Tuple.fromItems(values); + } + + public Tuple createTuple(int length) { + switch (generatedTypes) { + case ALL: + return createMultiTypeTuple(length); + case LONG: + return createLongsTuple(length); + case FLOATING_POINT: + return createFloatingPointTuple(length); + default: + throw new IllegalStateException("unknown generated types " + generatedTypes); + } } public void run() { @@ -169,7 +227,7 @@ public class TuplePerformanceTest { } public static void main(String[] args) { - TuplePerformanceTest tester = new TuplePerformanceTest(new Random(), 100_000, 10_000_000); + TuplePerformanceTest tester = new TuplePerformanceTest(new Random(), 100_000, 10_000_000, GeneratedTypes.ALL); tester.run(); } } diff --git a/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java b/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java index ad9297e02d..528c11f93a 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java @@ -20,24 +20,116 @@ package com.apple.foundationdb.test; -import com.apple.foundationdb.Database; -import com.apple.foundationdb.FDB; import com.apple.foundationdb.TransactionContext; +import com.apple.foundationdb.tuple.ByteArrayUtil; import com.apple.foundationdb.tuple.Tuple; +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; + public class TupleTest { + private static final byte FF = (byte)0xff; + public static void main(String[] args) throws InterruptedException { final int reps = 1000; try { - FDB fdb = FDB.selectAPIVersion(610); + // FDB fdb = FDB.selectAPIVersion(610); + serializedForms(); + /* try(Database db = fdb.open()) { runTests(reps, db); } + */ } catch(Throwable t) { t.printStackTrace(); } } + private static class TupleSerialization { + private final Tuple tuple; + private final byte[] serialization; + + TupleSerialization(Tuple tuple, byte[] serialization) { + this.tuple = tuple; + this.serialization = serialization; + } + + static void addAll(List list, Object... args) { + for(int i = 0; i < args.length; i += 2) { + TupleSerialization serialization = new TupleSerialization((Tuple)args[i], (byte[])args[i + 1]); + list.add(serialization); + } + } + } + + private static void serializedForms() { + List serializations = new ArrayList<>(); + TupleSerialization.addAll(serializations, + Tuple.from(0L), new byte[]{0x14}, + Tuple.from(BigInteger.ZERO), new byte[]{0x14}, + Tuple.from(1L), new byte[]{0x15, 0x01}, + Tuple.from(BigInteger.ONE), new byte[]{0x15, 0x01}, + Tuple.from(-1L), new byte[]{0x13, FF - 1}, + Tuple.from(BigInteger.ONE.negate()), new byte[]{0x13, FF - 1}, + Tuple.from(255L), new byte[]{0x15, FF}, + Tuple.from(BigInteger.valueOf(255)), new byte[]{0x15, FF}, + Tuple.from(-255L), new byte[]{0x13, 0x00}, + Tuple.from(BigInteger.valueOf(-255)), new byte[]{0x13, 0x00}, + Tuple.from(256L), new byte[]{0x16, 0x01, 0x00}, + Tuple.from(BigInteger.valueOf(256)), new byte[]{0x16, 0x01, 0x00}, + Tuple.from(-256L), new byte[]{0x12, FF - 1, FF}, + Tuple.from(BigInteger.valueOf(-256)), new byte[]{0x12, FF - 1, FF}, + Tuple.from(65536), new byte[]{0x17, 0x01, 0x00, 0x00}, + Tuple.from(-65536), new byte[]{0x11, FF - 1, FF, FF}, + Tuple.from(Long.MAX_VALUE), new byte[]{0x1C, 0x7f, FF, FF, FF, FF, FF, FF, FF}, + Tuple.from(BigInteger.valueOf(Long.MAX_VALUE)), new byte[]{0x1C, 0x7f, FF, FF, FF, FF, FF, FF, FF}, + Tuple.from(BigInteger.valueOf(Long.MAX_VALUE).add(BigInteger.ONE)), new byte[]{0x1C, (byte)0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + Tuple.from(BigInteger.ONE.shiftLeft(64).subtract(BigInteger.ONE)), new byte[]{0x1C, FF, FF, FF, FF, FF, FF, FF, FF}, + Tuple.from(BigInteger.ONE.shiftLeft(64)), new byte[]{0x1D, 0x09, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + Tuple.from(-((1L << 32) - 1)), new byte[]{0x10, 0x00, 0x00, 0x00, 0x00}, + Tuple.from(BigInteger.ONE.shiftLeft(32).subtract(BigInteger.ONE).negate()), new byte[]{0x10, 0x00, 0x00, 0x00, 0x00}, + Tuple.from(Long.MIN_VALUE + 2), new byte[]{0x0C, (byte)0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, + Tuple.from(Long.MIN_VALUE + 1), new byte[]{0x0C, (byte)0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + Tuple.from(BigInteger.valueOf(Long.MIN_VALUE).add(BigInteger.ONE)), new byte[]{0x0C, (byte)0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + Tuple.from(Long.MIN_VALUE), new byte[]{0x0C, 0x7f, FF, FF, FF, FF, FF, FF, FF}, + Tuple.from(BigInteger.valueOf(Long.MIN_VALUE)), new byte[]{0x0C, 0x7f, FF, FF, FF, FF, FF, FF, FF}, + Tuple.from(BigInteger.valueOf(Long.MIN_VALUE).subtract(BigInteger.ONE)), new byte[]{0x0C, 0x7f, FF, FF, FF, FF, FF, FF, FF - 1}, + Tuple.from(BigInteger.ONE.shiftLeft(64).subtract(BigInteger.ONE).negate()), new byte[]{0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + Tuple.from(3.14f), new byte[]{0x20, (byte)0xc0, 0x48, (byte)0xf5, (byte)0xc3}, + Tuple.from(-3.14f), new byte[]{0x20, (byte)0x3f, (byte)0xb7, (byte)0x0a, (byte)0x3c}, + Tuple.from(3.14), new byte[]{0x21, (byte)0xc0, (byte)0x09, (byte)0x1e, (byte)0xb8, (byte)0x51, (byte)0xeb, (byte)0x85, (byte)0x1f}, + Tuple.from(-3.14), new byte[]{0x21, (byte)0x3f, (byte)0xf6, (byte)0xe1, (byte)0x47, (byte)0xae, (byte)0x14, (byte)0x7a, (byte)0xe0}, + Tuple.from(0.0f), new byte[]{0x20, (byte)0x80, 0x00, 0x00, 0x00}, + Tuple.from(-0.0f), new byte[]{0x20, 0x7f, FF, FF, FF}, + Tuple.from(0.0), new byte[]{0x21, (byte)0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + Tuple.from(-0.0), new byte[]{0x21, 0x7f, FF, FF, FF, FF, FF, FF, FF}, + Tuple.from(Float.POSITIVE_INFINITY), new byte[]{0x20, FF, (byte)0x80, 0x00, 0x00}, + Tuple.from(Float.NEGATIVE_INFINITY), new byte[]{0x20, 0x00, 0x7f, FF, FF}, + Tuple.from(Double.POSITIVE_INFINITY), new byte[]{0x21, FF, (byte)0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + Tuple.from(Double.NEGATIVE_INFINITY), new byte[]{0x21, 0x00, 0x0f, FF, FF, FF, FF, FF, FF}, + Tuple.from(Float.intBitsToFloat(Integer.MAX_VALUE)), new byte[]{0x20, FF, FF, FF, FF}, + Tuple.from(Double.longBitsToDouble(Long.MAX_VALUE)), new byte[]{0x21, FF, FF, FF, FF, FF, FF, FF, FF}, + Tuple.from(Float.intBitsToFloat(~0)), new byte[]{0x20, 0x00, 0x00, 0x00, 0x00}, + Tuple.from(Double.longBitsToDouble(~0L)), new byte[]{0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} + ); + + for(TupleSerialization serialization : serializations) { + System.out.println("Packing " + serialization.tuple + " (expecting: " + ByteArrayUtil.printable(serialization.serialization) + ")"); + if(!Arrays.equals(serialization.tuple.pack(), serialization.serialization)) { + throw new RuntimeException("Tuple " + serialization.tuple + " has serialization " + ByteArrayUtil.printable(serialization.tuple.pack()) + + " which does not match expected serialization " + ByteArrayUtil.printable(serialization.serialization)); + } + if(!Objects.equals(serialization.tuple, Tuple.fromBytes(serialization.serialization))) { + throw new RuntimeException("Tuple " + serialization.tuple + " does not match deserialization " + Tuple.fromBytes(serialization.serialization) + + " which comes from serialization " + ByteArrayUtil.printable(serialization.serialization)); + } + } + System.out.println("All tuples had matching serializations"); + } + private static void runTests(final int reps, TransactionContext db) { System.out.println("Running tests..."); long start = System.currentTimeMillis(); From e9771364d797133444623ec4ebf7ce0bce5d517e Mon Sep 17 00:00:00 2001 From: Alec Grieser Date: Sun, 24 Feb 2019 20:52:28 -0800 Subject: [PATCH 19/47] various Java tuple performance tweaks These include: * Memoizing packed representations within Tuples * Using longs instead of BigIntegers if possible * As much as possible sticking to manipulating primitive types when using floats/doubles --- .../foundationdb/tuple/ByteArrayUtil.java | 3 +- .../com/apple/foundationdb/tuple/Tuple.java | 14 +- .../apple/foundationdb/tuple/TupleUtil.java | 446 ++++++++++-------- 3 files changed, 252 insertions(+), 211 deletions(-) diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java b/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java index 247ae78fb0..eeea3e1799 100644 --- a/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java +++ b/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java @@ -229,8 +229,7 @@ public class ByteArrayUtil { int n = Arrays.binarySearch(arr, i); if(n >= 0) return n; - int ip = (n + 1) * -1; - return ip; + return (n + 1) * -1; } /** diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java b/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java index 7b14632452..b3761d8c5d 100644 --- a/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java +++ b/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java @@ -824,9 +824,12 @@ public class Tuple implements Comparable, Iterable { } /** - * Get the number of bytes in the packed representation of this {@code Tuple}. + * Get the number of bytes in the packed representation of this {@code Tuple}. Note that at the + * moment, this number is calculated by packing the {@code Tuple} and looking at its size. This method + * will memoize the result, however, so asking the same {@code Tuple} for its size multiple times + * is a fast operation. * - * @return + * @return the number of bytes in the packed representation of this {@code Tuple} */ public int getPackedSize() { byte[] p = packMaybeVersionstamp(null); @@ -847,7 +850,12 @@ public class Tuple implements Comparable, Iterable { */ @Override public int compareTo(Tuple t) { - return comparator.compare(elements, t.elements); + if(packed != null && t.packed != null) { + return ByteArrayUtil.compareUnsigned(packed, t.packed); + } + else { + return comparator.compare(elements, t.elements); + } } /** diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java index f25828f47d..5b220d2c90 100644 --- a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java +++ b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java @@ -36,8 +36,10 @@ import com.apple.foundationdb.FDB; class TupleUtil { private static final byte nil = 0x00; - private static final BigInteger[] size_limits; + private static final BigInteger[] BIG_INT_SIZE_LIMITS; private static final Charset UTF8; + private static final BigInteger LONG_MIN_VALUE = BigInteger.valueOf(Long.MIN_VALUE); + private static final BigInteger LONG_MAX_VALUE = BigInteger.valueOf(Long.MAX_VALUE); private static final IterableComparator iterableComparator; private static final byte BYTES_CODE = 0x01; @@ -55,27 +57,28 @@ class TupleUtil { private static final byte[] NULL_ARR = new byte[] {nil}; private static final byte[] NULL_ESCAPED_ARR = new byte[] {nil, (byte)0xFF}; - private static final byte[] BYTES_ARR = new byte[]{0x01}; - private static final byte[] STRING_ARR = new byte[]{0x02}; - private static final byte[] NESTED_ARR = new byte[]{0x05}; - private static final byte[] FALSE_ARR = new byte[]{0x26}; - private static final byte[] TRUE_ARR = new byte[]{0x27}; - private static final byte[] VERSIONSTAMP_ARR = new byte[]{0x33}; + private static final byte[] BYTES_ARR = new byte[]{BYTES_CODE}; + private static final byte[] STRING_ARR = new byte[]{STRING_CODE}; + private static final byte[] NESTED_ARR = new byte[]{NESTED_CODE}; + private static final byte[] INT_ZERO_ARR = new byte[]{INT_ZERO_CODE}; + private static final byte[] FALSE_ARR = new byte[]{FALSE_CODE}; + private static final byte[] TRUE_ARR = new byte[]{TRUE_CODE}; + private static final byte[] VERSIONSTAMP_ARR = new byte[]{VERSIONSTAMP_CODE}; static { - size_limits = new BigInteger[9]; - for(int i = 0; i < 9; i++) { - size_limits[i] = (BigInteger.ONE).shiftLeft(i * 8).subtract(BigInteger.ONE); + BIG_INT_SIZE_LIMITS = new BigInteger[9]; + for(int i = 0; i < BIG_INT_SIZE_LIMITS.length; i++) { + BIG_INT_SIZE_LIMITS[i] = (BigInteger.ONE).shiftLeft(i * 8).subtract(BigInteger.ONE); } UTF8 = Charset.forName("UTF-8"); iterableComparator = new IterableComparator(); } - static class DecodeResult { + static class DecodeState { final List values; int end; - DecodeResult() { + DecodeState() { values = new ArrayList<>(); end = 0; } @@ -86,18 +89,18 @@ class TupleUtil { } } - static class EncodeResult { + static class EncodeState { final List encodedValues; int totalLength; int versionPos; - EncodeResult(int capacity) { + EncodeState(int capacity) { this.encodedValues = new ArrayList<>(capacity); totalLength = 0; versionPos = -1; } - EncodeResult add(byte[] encoded, int versionPos) { + EncodeState add(byte[] encoded, int versionPos) { if(versionPos >= 0 && this.versionPos >= 0) { throw new IllegalArgumentException("Multiple incomplete Versionstamps included in Tuple"); } @@ -107,7 +110,7 @@ class TupleUtil { return this; } - EncodeResult add(byte[] encoded) { + EncodeState add(byte[] encoded) { encodedValues.add(encoded); totalLength += encoded.length; return this; @@ -122,37 +125,37 @@ class TupleUtil { return 0; } - /** - * Takes the Big-Endian byte representation of a floating point number and adjusts - * it so that it sorts correctly. For encoding, if the sign bit is 1 (the number - * is negative), then we need to flip all of the bits; otherwise, just flip the - * sign bit. For decoding, if the sign bit is 0 (the number is negative), then - * we also need to flip all of the bits; otherwise, just flip the sign bit. - * This will mutate in place the given array. - * - * @param bytes Big-Endian IEEE encoding of a floating point number - * @param start the (zero-indexed) first byte in the array to mutate - * @param encode true if we encoding the float and false if we are decoding - * @return the encoded {@code byte[]} - */ - static byte[] floatingPointCoding(byte[] bytes, int start, boolean encode) { - if(encode && (bytes[start] & (byte)0x80) != (byte)0x00) { - for(int i = start; i < bytes.length; i++) { - bytes[i] = (byte) (bytes[i] ^ 0xff); - } - } else if(!encode && (bytes[start] & (byte)0x80) != (byte)0x80) { - for(int i = start; i < bytes.length; i++) { - bytes[i] = (byte) (bytes[i] ^ 0xff); - } - } else { - bytes[start] = (byte) (0x80 ^ bytes[start]); - } + // These four functions are for adjusting the encoding of floating point numbers so + // that when their byte representation is written out in big-endian order, unsigned + // lexicographic byte comparison orders the values in the same way as the semantic + // ordering of the values. This means flipping all bits for negative values and flipping + // only the most-significant bit (i.e., the sign bit as all values in Java are signed) + // in the case that the number is positive. For these purposes, 0.0 is positive and -0.0 + // is negative. - return bytes; + static int encodeFloatBits(float f) { + int intBits = Float.floatToRawIntBits(f); + return (intBits < 0) ? (~intBits) : (intBits ^ Integer.MIN_VALUE); } - static byte[] join(List items) { - return ByteArrayUtil.join(null, items); + static long encodeDoubleBits(double d) { + long longBits = Double.doubleToRawLongBits(d); + return (longBits < 0L) ? (~longBits) : (longBits ^ Long.MIN_VALUE); + } + + static float decodeFloatBits(int i) { + int origBits = (i >= 0) ? (~i) : (i ^ Integer.MIN_VALUE); + return Float.intBitsToFloat(origBits); + } + + static double decodeDoubleBits(long l) { + long origBits = (l >= 0) ? (~l) : (l ^ Long.MIN_VALUE); + return Double.longBitsToDouble(origBits); + } + + // Get the number of bytes in the representation of a long. + static int byteCount(long i) { + return (Long.SIZE + 7 - Long.numberOfLeadingZeros(i >= 0 ? i : -i)) / 8; } private static void adjustVersionPosition300(byte[] packed, int delta) { @@ -215,64 +218,64 @@ class TupleUtil { throw new IllegalArgumentException("Unsupported data type: " + o.getClass().getName()); } - static void encode(EncodeResult result, Object t, boolean nested) { + static void encode(EncodeState state, Object t, boolean nested) { if(t == null) { if(nested) { - result.add(NULL_ESCAPED_ARR); + state.add(NULL_ESCAPED_ARR); } else { - result.add(NULL_ARR); + state.add(NULL_ARR); } } else if(t instanceof byte[]) - encode(result, (byte[]) t); + encode(state, (byte[]) t); else if(t instanceof String) - encode(result, (String)t); - else if(t instanceof BigInteger) - encode(result, (BigInteger)t); + encode(state, (String)t); else if(t instanceof Float) - encode(result, (Float)t); + encode(state, (Float)t); else if(t instanceof Double) - encode(result, (Double)t); + encode(state, (Double)t); else if(t instanceof Boolean) - encode(result, (Boolean)t); + encode(state, (Boolean)t); else if(t instanceof UUID) - encode(result, (UUID)t); + encode(state, (UUID)t); + else if(t instanceof BigInteger) + encode(state, (BigInteger)t); else if(t instanceof Number) - encode(result, ((Number)t).longValue()); + encode(state, ((Number)t).longValue()); else if(t instanceof Versionstamp) - encode(result, (Versionstamp)t); + encode(state, (Versionstamp)t); else if(t instanceof List) - encode(result, (List)t); + encode(state, (List)t); else if(t instanceof Tuple) - encode(result, ((Tuple)t).getItems()); + encode(state, ((Tuple)t).getItems()); else throw new IllegalArgumentException("Unsupported data type: " + t.getClass().getName()); } - static void encode(EncodeResult result, Object t) { - encode(result, t, false); + static void encode(EncodeState state, Object t) { + encode(state, t, false); } - static void encode(EncodeResult result, byte[] bytes) { + static void encode(EncodeState state, byte[] bytes) { byte[] escaped = ByteArrayUtil.replace(bytes, NULL_ARR, NULL_ESCAPED_ARR); - result.add(BYTES_ARR).add(escaped).add(NULL_ARR); + state.add(BYTES_ARR).add(escaped).add(NULL_ARR); } - static void encode(EncodeResult result, String s) { + static void encode(EncodeState state, String s) { byte[] escaped = ByteArrayUtil.replace(s.getBytes(UTF8), NULL_ARR, NULL_ESCAPED_ARR); - result.add(STRING_ARR).add(escaped).add(NULL_ARR); + state.add(STRING_ARR).add(escaped).add(NULL_ARR); } - static void encode(EncodeResult result, BigInteger i) { + static void encode(EncodeState state, BigInteger i) { //System.out.println("Encoding integral " + i); if(i.equals(BigInteger.ZERO)) { - result.add(new byte[]{INT_ZERO_CODE}); + state.add(INT_ZERO_ARR); return; } byte[] bytes = i.toByteArray(); if(i.compareTo(BigInteger.ZERO) > 0) { - if(i.compareTo(size_limits[size_limits.length-1]) > 0) { + if(i.compareTo(BIG_INT_SIZE_LIMITS[BIG_INT_SIZE_LIMITS.length-1]) > 0) { int length = byteLength(bytes); if(length > 0xff) { throw new IllegalArgumentException("BigInteger magnitude is too large (more than 255 bytes)"); @@ -281,21 +284,20 @@ class TupleUtil { intBytes[0] = POS_INT_END; intBytes[1] = (byte)(length); System.arraycopy(bytes, bytes.length - length, intBytes, 2, length); - result.add(intBytes); + state.add(intBytes); } else { - int n = ByteArrayUtil.bisectLeft(size_limits, i); - assert n <= size_limits.length; - //byte[] bytes = ByteBuffer.allocate(8).order(ByteOrder.BIG_ENDIAN).putLong(i).array(); + int n = ByteArrayUtil.bisectLeft(BIG_INT_SIZE_LIMITS, i); + assert n <= BIG_INT_SIZE_LIMITS.length; //System.out.println(" -- integral has 'n' of " + n + " and output bytes of " + bytes.length); byte[] intBytes = new byte[n + 1]; intBytes[0] = (byte) (INT_ZERO_CODE + n); System.arraycopy(bytes, bytes.length - n, intBytes, 1, n); - result.add(intBytes); + state.add(intBytes); } } else { - if(i.negate().compareTo(size_limits[size_limits.length - 1]) > 0) { + if(i.negate().compareTo(BIG_INT_SIZE_LIMITS[BIG_INT_SIZE_LIMITS.length - 1]) > 0) { int length = byteLength(i.negate().toByteArray()); if (length > 0xff) { throw new IllegalArgumentException("BigInteger magnitude is too large (more than 255 bytes)"); @@ -311,92 +313,109 @@ class TupleUtil { Arrays.fill(intBytes, 2, intBytes.length - adjusted.length, (byte) 0x00); System.arraycopy(adjusted, 0, intBytes, intBytes.length - adjusted.length, adjusted.length); } - result.add(intBytes); + state.add(intBytes); } else { - int n = ByteArrayUtil.bisectLeft(size_limits, i.negate()); + int n = ByteArrayUtil.bisectLeft(BIG_INT_SIZE_LIMITS, i.negate()); - assert n >= 0 && n < size_limits.length; // can we do this? it seems to be required for the following statement + assert n >= 0 && n < BIG_INT_SIZE_LIMITS.length; // can we do this? it seems to be required for the following statement - long maxv = size_limits[n].add(i).longValue(); + long maxv = BIG_INT_SIZE_LIMITS[n].add(i).longValue(); byte[] adjustedBytes = ByteBuffer.allocate(8).order(ByteOrder.BIG_ENDIAN).putLong(maxv).array(); byte[] intBytes = new byte[n + 1]; - intBytes[0] = (byte) (20 - n); + intBytes[0] = (byte) (INT_ZERO_CODE - n); System.arraycopy(adjustedBytes, adjustedBytes.length - n, intBytes, 1, n); - result.add(intBytes); + state.add(intBytes); } } } - static void encode(EncodeResult result, Integer i) { - encode(result, i.longValue()); + static void encode(EncodeState state, long i) { + if(i == 0L) { + state.add(INT_ZERO_ARR); + return; + } + int n = byteCount(i); + byte[] intBytes = new byte[n + 1]; + // First byte encodes number of bytes (as difference from INT_ZERO_CODE) + intBytes[0] = (byte)(INT_ZERO_CODE + (i >= 0 ? n : -n)); + // For positive integers, copy the bytes in big-endian order excluding leading 0x00 bytes. + // For negative integers, copy the bytes of the one's complement representation excluding + // the leading 0xff bytes. As Java stores negative values in two's complement, we subtract 1 + // from negative values. + long val = Long.reverseBytes((i >= 0) ? i : (i - 1)) >> (Long.SIZE - 8 * n); + for(int x = 1; x < intBytes.length; x++) { + intBytes[x] = (byte)(val & 0xff); + val >>= 8; + } + state.add(intBytes); } - static void encode(EncodeResult result, long i) { - encode(result, BigInteger.valueOf(i)); + static void encode(EncodeState state, Float f) { + byte[] floatBytes = ByteBuffer.allocate(1 + Float.BYTES).order(ByteOrder.BIG_ENDIAN) + .put(FLOAT_CODE) + .putInt(encodeFloatBits(f)) + .array(); + state.add(floatBytes); } - static void encode(EncodeResult result, Float f) { - byte[] floatBytes = ByteBuffer.allocate(5).order(ByteOrder.BIG_ENDIAN).put(FLOAT_CODE).putFloat(f).array(); - floatingPointCoding(floatBytes, 1, true); - result.add(floatBytes); + static void encode(EncodeState state, Double d) { + byte[] doubleBytes = ByteBuffer.allocate(1 + Double.BYTES).order(ByteOrder.BIG_ENDIAN) + .put(DOUBLE_CODE) + .putLong(encodeDoubleBits(d)) + .array(); + state.add(doubleBytes); } - static void encode(EncodeResult result, Double d) { - byte[] doubleBytes = ByteBuffer.allocate(9).order(ByteOrder.BIG_ENDIAN).put(DOUBLE_CODE).putDouble(d).array(); - floatingPointCoding(doubleBytes, 1, true); - result.add(doubleBytes); - } - - static void encode(EncodeResult result, Boolean b) { + static void encode(EncodeState state, Boolean b) { if(b) { - result.add(TRUE_ARR); + state.add(TRUE_ARR); } else { - result.add(FALSE_ARR); + state.add(FALSE_ARR); } } - static void encode(EncodeResult result, UUID uuid) { + static void encode(EncodeState state, UUID uuid) { byte[] uuidBytes = ByteBuffer.allocate(17).put(UUID_CODE).order(ByteOrder.BIG_ENDIAN) .putLong(uuid.getMostSignificantBits()).putLong(uuid.getLeastSignificantBits()) .array(); - result.add(uuidBytes); + state.add(uuidBytes); } - static void encode(EncodeResult result, Versionstamp v) { - result.add(VERSIONSTAMP_ARR); + static void encode(EncodeState state, Versionstamp v) { + state.add(VERSIONSTAMP_ARR); if(v.isComplete()) { - result.add(v.getBytes()); + state.add(v.getBytes()); } else { - result.add(v.getBytes(), result.totalLength); + state.add(v.getBytes(), state.totalLength); } } - static void encode(EncodeResult result, List value) { - result.add(NESTED_ARR); + static void encode(EncodeState state, List value) { + state.add(NESTED_ARR); for(Object t : value) { - encode(result, t, true); + encode(state, t, true); } - result.add(NULL_ARR); + state.add(NULL_ARR); } - static void decode(DecodeResult result, byte[] rep, int pos, int last) { + static void decode(DecodeState state, byte[] rep, int pos, int last) { //System.out.println("Decoding '" + ArrayUtils.printable(rep) + "' at " + pos); // SOMEDAY: codes over 127 will be a problem with the signed Java byte mess int code = rep[pos]; int start = pos + 1; if(code == nil) { - result.add(null, start); + state.add(null, start); } else if(code == BYTES_CODE) { int end = ByteArrayUtil.findTerminator(rep, (byte)0x0, (byte)0xff, start, last); //System.out.println("End of byte string: " + end); byte[] range = ByteArrayUtil.replace(rep, start, end - start, NULL_ESCAPED_ARR, new byte[] { nil }); //System.out.println(" -> byte string contents: '" + ArrayUtils.printable(range) + "'"); - result.add(range, end + 1); + state.add(range, end + 1); } else if(code == STRING_CODE) { int end = ByteArrayUtil.findTerminator(rep, (byte)0x0, (byte)0xff, start, last); @@ -404,78 +423,91 @@ class TupleUtil { byte[] stringBytes = ByteArrayUtil.replace(rep, start, end - start, NULL_ESCAPED_ARR, new byte[] { nil }); String str = new String(stringBytes, UTF8); //System.out.println(" -> UTF8 string contents: '" + str + "'"); - result.add(str, end + 1); + state.add(str, end + 1); } else if(code == FLOAT_CODE) { - byte[] resBytes = Arrays.copyOfRange(rep, start, start+4); - floatingPointCoding(resBytes, 0, false); - float res = ByteBuffer.wrap(resBytes).order(ByteOrder.BIG_ENDIAN).getFloat(); - result.add(res, start + Float.BYTES); + int rawFloatBits = ByteBuffer.wrap(rep, start, Float.BYTES).getInt(); + float res = decodeFloatBits(rawFloatBits); + state.add(res, start + Float.BYTES); } else if(code == DOUBLE_CODE) { - byte[] resBytes = Arrays.copyOfRange(rep, start, start+8); - floatingPointCoding(resBytes, 0, false); - double res = ByteBuffer.wrap(resBytes).order(ByteOrder.BIG_ENDIAN).getDouble(); - result.add(res, start + Double.BYTES); + long rawDoubleBits = ByteBuffer.wrap(rep, start, Double.BYTES).getLong(); + double res = decodeDoubleBits(rawDoubleBits); + state.add(res, start + Double.BYTES); } else if(code == FALSE_CODE) { - result.add(false, start); + state.add(false, start); } else if(code == TRUE_CODE) { - result.add(true, start); + state.add(true, start); } else if(code == UUID_CODE) { ByteBuffer bb = ByteBuffer.wrap(rep, start, 16).order(ByteOrder.BIG_ENDIAN); long msb = bb.getLong(); long lsb = bb.getLong(); - result.add(new UUID(msb, lsb), start + 16); + state.add(new UUID(msb, lsb), start + 16); } else if(code == POS_INT_END) { int n = rep[start] & 0xff; BigInteger res = new BigInteger(ByteArrayUtil.join(new byte[]{0x00}, Arrays.copyOfRange(rep, start+1, start+n+1))); - result.add(res, start + n + 1); + state.add(res, start + n + 1); } else if(code == NEG_INT_START) { int n = (rep[start] ^ 0xff) & 0xff; BigInteger origValue = new BigInteger(ByteArrayUtil.join(new byte[]{0x00}, Arrays.copyOfRange(rep, start+1, start+n+1))); BigInteger offset = BigInteger.ONE.shiftLeft(n*8).subtract(BigInteger.ONE); - result.add(origValue.subtract(offset), start + n + 1); + state.add(origValue.subtract(offset), start + n + 1); } else if(code > NEG_INT_START && code < POS_INT_END) { // decode a long - byte[] longBytes = new byte[9]; - boolean upper = code >= INT_ZERO_CODE; - int n = upper ? code - 20 : 20 - code; + boolean positive = code >= INT_ZERO_CODE; + int n = positive ? code - INT_ZERO_CODE : INT_ZERO_CODE - code; int end = start + n; if(rep.length < end) { throw new RuntimeException("Invalid tuple (possible truncation)"); } - System.arraycopy(rep, start, longBytes, longBytes.length-n, n); - if (!upper) - for(int i=longBytes.length-n; i 0)) { + long res = 0L; + for(int i = start; i < end; i++) { + res = (res << 8) + (rep[i] & 0xff); + } + state.add(res, end); + } + else if(!positive && (n < Long.BYTES || rep[start] < 0)) { + long res = ~0L; + for(int i = start; i < end; i++) { + res = (res << 8) + (rep[i] & 0xff); + } + state.add(res + 1, end); + } + else { + byte[] longBytes = new byte[9]; + System.arraycopy(rep, start, longBytes, longBytes.length-n, n); + if (!positive) + for(int i=longBytes.length-n; i0) { - // This can occur if the thing can be represented with 8 bytes but not - // the right sign information. - result.add(val, end); - } else { - result.add(val.longValue(), end); + // Convert to long if in range -- otherwise, leave as BigInteger. + if (val.compareTo(LONG_MIN_VALUE) >= 0 && val.compareTo(LONG_MAX_VALUE) <= 0) { + state.add(val.longValue(), end); + } else { + // This can occur if the thing can be represented with 8 bytes but not + // the right sign information. + state.add(val, end); + } } } else if(code == VERSIONSTAMP_CODE) { Versionstamp val = Versionstamp.fromBytes(Arrays.copyOfRange(rep, start, start + Versionstamp.LENGTH)); - result.add(val, start + Versionstamp.LENGTH); + state.add(val, start + Versionstamp.LENGTH); } else if(code == NESTED_CODE) { - DecodeResult subResult = new DecodeResult(); + DecodeState subResult = new DecodeState(); int endPos = start; while(endPos < rep.length) { if(rep[endPos] == nil) { @@ -491,25 +523,13 @@ class TupleUtil { endPos = subResult.end; } } - result.add(subResult.values, endPos); + state.add(subResult.values, endPos); } else { throw new IllegalArgumentException("Unknown tuple data type " + code + " at index " + pos); } } - static int compareSignedBigEndian(byte[] arr1, byte[] arr2) { - if(arr1[0] < 0 && arr2[0] < 0) { - return -1 * ByteArrayUtil.compareUnsigned(arr1, arr2); - } else if(arr1[0] < 0) { - return -1; - } else if(arr2[0] < 0) { - return 1; - } else { - return ByteArrayUtil.compareUnsigned(arr1, arr2); - } - } - static int compareItems(Object item1, Object item2) { int code1 = TupleUtil.getCodeFor(item1); int code2 = TupleUtil.getCodeFor(item2); @@ -529,33 +549,39 @@ class TupleUtil { return ByteArrayUtil.compareUnsigned(((String)item1).getBytes(UTF8), ((String)item2).getBytes(UTF8)); } if(code1 == INT_ZERO_CODE) { - BigInteger bi1; - if(item1 instanceof BigInteger) { - bi1 = (BigInteger)item1; - } else { - bi1 = BigInteger.valueOf(((Number)item1).longValue()); + if(item1 instanceof Long && item2 instanceof Long) { + // This should be the common case, so it's probably worth including as a way out. + return Long.compare((Long)item1, (Long)item2); } - BigInteger bi2; - if(item2 instanceof BigInteger) { - bi2 = (BigInteger)item2; - } else { - bi2 = BigInteger.valueOf(((Number)item2).longValue()); + else { + BigInteger bi1; + if (item1 instanceof BigInteger) { + bi1 = (BigInteger) item1; + } else { + bi1 = BigInteger.valueOf(((Number) item1).longValue()); + } + BigInteger bi2; + if (item2 instanceof BigInteger) { + bi2 = (BigInteger) item2; + } else { + bi2 = BigInteger.valueOf(((Number) item2).longValue()); + } + return bi1.compareTo(bi2); } - return bi1.compareTo(bi2); - } - if(code1 == DOUBLE_CODE) { - // This is done over vanilla double comparison basically to handle NaN - // sorting correctly. - byte[] dBytes1 = ByteBuffer.allocate(8).putDouble((Double)item1).array(); - byte[] dBytes2 = ByteBuffer.allocate(8).putDouble((Double)item2).array(); - return compareSignedBigEndian(dBytes1, dBytes2); } if(code1 == FLOAT_CODE) { // This is done for the same reason that double comparison is done // that way. - byte[] fBytes1 = ByteBuffer.allocate(4).putFloat((Float)item1).array(); - byte[] fBytes2 = ByteBuffer.allocate(4).putFloat((Float)item2).array(); - return compareSignedBigEndian(fBytes1, fBytes2); + int fbits1 = encodeFloatBits((Float)item1); + int fbits2 = encodeFloatBits((Float)item2); + return Integer.compareUnsigned(fbits1, fbits2); + } + if(code1 == DOUBLE_CODE) { + // This is done over vanilla double comparison basically to handle NaN + // sorting correctly. + long dbits1 = encodeDoubleBits((Double)item1); + long dbits2 = encodeDoubleBits((Double)item2); + return Long.compareUnsigned(dbits1, dbits2); } if(code1 == FALSE_CODE) { return Boolean.compare((Boolean)item1, (Boolean)item2); @@ -579,51 +605,53 @@ class TupleUtil { } static List unpack(byte[] bytes, int start, int length) { - DecodeResult decodeResult = new DecodeResult(); + DecodeState decodeState = new DecodeState(); int pos = start; int end = start + length; while(pos < end) { - decode(decodeResult, bytes, pos, end); - pos = decodeResult.end; + decode(decodeState, bytes, pos, end); + pos = decodeState.end; } - return decodeResult.values; + return decodeState.values; } - static void encodeAll(EncodeResult result, List items, byte[] prefix) { + static void encodeAll(EncodeState state, List items, byte[] prefix) { if(prefix != null) { - result.add(prefix); + state.add(prefix); } for(Object t : items) { - encode(result, t); + encode(state, t); } //System.out.println("Joining whole tuple..."); } static byte[] pack(List items, byte[] prefix) { - EncodeResult result = new EncodeResult(2 * items.size() + (prefix == null ? 0 : 1)); - encodeAll(result, items, prefix); - if(result.versionPos >= 0) { + EncodeState state = new EncodeState(2 * items.size() + (prefix == null ? 0 : 1)); + encodeAll(state, items, prefix); + if(state.versionPos >= 0) { throw new IllegalArgumentException("Incomplete Versionstamp included in vanilla tuple packInternal"); - } else { - return ByteArrayUtil.join(null, result.encodedValues); + } + else { + return ByteArrayUtil.join(null, state.encodedValues); } } static byte[] packWithVersionstamp(List items, byte[] prefix) { - EncodeResult result = new EncodeResult(2 * items.size() + (prefix == null ? 1 : 2)); - encodeAll(result, items, prefix); - if(result.versionPos < 0) { + EncodeState state = new EncodeState(2 * items.size() + (prefix == null ? 1 : 2)); + encodeAll(state, items, prefix); + if(state.versionPos < 0) { throw new IllegalArgumentException("No incomplete Versionstamp included in tuple packInternal with versionstamp"); - } else { - if(result.versionPos > 0xffff) { - throw new IllegalArgumentException("Tuple has incomplete version at position " + result.versionPos + " which is greater than the maximum " + 0xffff); + } + else { + if(state.versionPos > 0xffff) { + throw new IllegalArgumentException("Tuple has incomplete version at position " + state.versionPos + " which is greater than the maximum " + 0xffff); } if (FDB.instance().getAPIVersion() < 520) { - result.add(ByteBuffer.allocate(Short.BYTES).order(ByteOrder.LITTLE_ENDIAN).putShort((short)result.versionPos).array()); + state.add(ByteBuffer.allocate(Short.BYTES).order(ByteOrder.LITTLE_ENDIAN).putShort((short)state.versionPos).array()); } else { - result.add(ByteBuffer.allocate(Integer.BYTES).order(ByteOrder.LITTLE_ENDIAN).putInt(result.versionPos).array()); + state.add(ByteBuffer.allocate(Integer.BYTES).order(ByteOrder.LITTLE_ENDIAN).putInt(state.versionPos).array()); } - return ByteArrayUtil.join(null, result.encodedValues); + return ByteArrayUtil.join(null, state.encodedValues); } } @@ -631,13 +659,17 @@ class TupleUtil { return items.anyMatch(item -> { if(item == null) { return false; - } else if(item instanceof Versionstamp) { + } + else if(item instanceof Versionstamp) { return !((Versionstamp) item).isComplete(); - } else if(item instanceof Tuple) { + } + else if(item instanceof Tuple) { return hasIncompleteVersionstamp(((Tuple) item).stream()); - } else if(item instanceof Collection) { + } + else if(item instanceof Collection) { return hasIncompleteVersionstamp(((Collection) item).stream()); - } else { + } + else { return false; } }); @@ -646,23 +678,25 @@ class TupleUtil { public static void main(String[] args) { try { byte[] bytes = pack(Collections.singletonList(4), null); - DecodeResult result = new DecodeResult(); + DecodeState result = new DecodeState(); decode(result, bytes, 0, bytes.length); int val = (int)result.values.get(0); assert 4 == val; - } catch (Exception e) { + } + catch(Exception e) { e.printStackTrace(); System.out.println("Error " + e.getMessage()); } try { byte[] bytes = pack(Collections.singletonList("\u021Aest \u0218tring"), null); - DecodeResult result = new DecodeResult(); + DecodeState result = new DecodeState(); decode(result, bytes, 0, bytes.length); String string = (String)result.values.get(0); System.out.println("contents -> " + string); assert "\u021Aest \u0218tring".equals(string); - } catch (Exception e) { + } + catch(Exception e) { e.printStackTrace(); System.out.println("Error " + e.getMessage()); } From a74dfa548782da90c87f11b30b6cd087d843efd1 Mon Sep 17 00:00:00 2001 From: Alec Grieser Date: Sun, 24 Feb 2019 23:49:31 -0800 Subject: [PATCH 20/47] compare strings by unicode codepoint without copying --- bindings/java/CMakeLists.txt | 1 + .../apple/foundationdb/tuple/StringUtil.java | 75 ++++++++++++++++ .../apple/foundationdb/tuple/TupleUtil.java | 8 +- .../test/TuplePerformanceTest.java | 55 +++++++----- .../apple/foundationdb/test/TupleTest.java | 85 ++++++++++++++++++- 5 files changed, 201 insertions(+), 23 deletions(-) create mode 100644 bindings/java/src/main/com/apple/foundationdb/tuple/StringUtil.java diff --git a/bindings/java/CMakeLists.txt b/bindings/java/CMakeLists.txt index 93e7e7ea8e..f8c1c25a65 100644 --- a/bindings/java/CMakeLists.txt +++ b/bindings/java/CMakeLists.txt @@ -56,6 +56,7 @@ set(JAVA_BINDING_SRCS src/main/com/apple/foundationdb/tuple/package-info.java src/main/com/apple/foundationdb/tuple/Tuple.java src/main/com/apple/foundationdb/tuple/TupleUtil.java + src/main/com/apple/foundationdb/tuple/StringUtil.java src/main/com/apple/foundationdb/tuple/Versionstamp.java) set(JAVA_TESTS_SRCS diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/StringUtil.java b/bindings/java/src/main/com/apple/foundationdb/tuple/StringUtil.java new file mode 100644 index 0000000000..660d04a6e1 --- /dev/null +++ b/bindings/java/src/main/com/apple/foundationdb/tuple/StringUtil.java @@ -0,0 +1,75 @@ +/* + * StringUtil.java + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.apple.foundationdb.tuple; + +final class StringUtil { + private static final char SURROGATE_COUNT = Character.MAX_LOW_SURROGATE - Character.MIN_HIGH_SURROGATE + 1; + private static final char ABOVE_SURROGATES = Character.MAX_VALUE - Character.MAX_LOW_SURROGATE; + + static char adjustForSurrogates(char c, String s, int pos) { + if(c > Character.MAX_LOW_SURROGATE) { + return (char)(c - SURROGATE_COUNT); + } + else { + // Validate the UTF-16 string as this can do weird things on invalid strings + if((Character.isHighSurrogate(c) && (pos + 1 >= s.length() || !Character.isLowSurrogate(s.charAt(pos + 1)))) || + (Character.isLowSurrogate(c) && (pos == 0 || !Character.isHighSurrogate(s.charAt(pos - 1))))) { + throw new IllegalArgumentException("malformed UTF-16 string does not follow high surrogate with low surrogate"); + } + return (char)(c + ABOVE_SURROGATES); + + } + } + + // Compare two strings based on their UTF-8 code point values. Note that Java stores strings + // using UTF-16. However, {@link Tuple}s are encoded using UTF-8. Using unsigned byte comparison, + // UTF-8 strings will sort based on their Unicode codepoints. However, UTF-16 strings almost, + // but not quite, sort that way. This can be addressed by fixing up surrogates. There are 0x800 surrogate + // values and about 0x2000 code points above the maximum surrogate value. For anything that is a surrogate, + // shift it up by 0x2000, and anything that is above the maximum surrogate value, shift it down by 0x800. + // This makes all surrogates sort after all non-surrogates. + // + // See: https://ssl.icu-project.org/docs/papers/utf16_code_point_order.html + static int compareUtf8(String s1, String s2) { + // Ignore common prefix at the beginning which will compare equal regardless of encoding + int pos = 0; + while(pos < s1.length() && pos < s2.length() && s1.charAt(pos) == s2.charAt(pos)) { + pos++; + } + if(pos >= s1.length() || pos >= s2.length()) { + // One string is the prefix of another, so return based on length. + return Integer.compare(s1.length(), s2.length()); + } + // Compare first different character + char c1 = s1.charAt(pos); + char c2 = s2.charAt(pos); + // Apply "fix up" for surrogates + if(c1 >= Character.MIN_HIGH_SURROGATE) { + c1 = adjustForSurrogates(c1, s1, pos); + } + if(c2 >= Character.MIN_HIGH_SURROGATE) { + c2 = adjustForSurrogates(c2, s2, pos); + } + return Character.compare(c1, c2); + } + + private StringUtil() {} +} diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java index 5b220d2c90..34d0f78653 100644 --- a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java +++ b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java @@ -546,7 +546,13 @@ class TupleUtil { return ByteArrayUtil.compareUnsigned((byte[])item1, (byte[])item2); } if(code1 == STRING_CODE) { - return ByteArrayUtil.compareUnsigned(((String)item1).getBytes(UTF8), ((String)item2).getBytes(UTF8)); + try { + return StringUtil.compareUtf8((String)item1, (String)item2); + } + catch(IllegalArgumentException e) { + // Encountered malformed unicode when comparing. Use byte comparison. + return ByteArrayUtil.compareUnsigned(((String)item1).getBytes(UTF8), ((String)item2).getBytes(UTF8)); + } } if(code1 == INT_ZERO_CODE) { if(item1 instanceof Long && item2 instanceof Long) { diff --git a/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java b/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java index cf79ff41a9..3de9b76785 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java @@ -142,6 +142,7 @@ public class TuplePerformanceTest { long packNanos = 0L; long unpackNanos = 0L; long equalsNanos = 0L; + long equalsArrayNanos = 0L; long hashNanos = 0L; long secondHashNanos = 0L; long subspacePackNanos = 0L; @@ -164,12 +165,22 @@ public class TuplePerformanceTest { endNanos = System.nanoTime(); unpackNanos += endNanos - startNanos; + // Copy items over as if both are packed, their byte arrays are compared + Tuple tCopy = Tuple.fromList(t.getItems()); + Tuple t2Copy = Tuple.fromList(t2.getItems()); + startNanos = System.nanoTime(); + if (!tCopy.equals(t2Copy)) { + throw new RuntimeException("deserialized did not match serialized: " + t + " -- " + t2); + } + endNanos = System.nanoTime(); + equalsNanos += endNanos - startNanos; + startNanos = System.nanoTime(); if(!t.equals(t2)) { throw new RuntimeException("deserialized did not match serialized: " + t + " -- " + t2); } endNanos = System.nanoTime(); - equalsNanos += endNanos - startNanos; + equalsArrayNanos += endNanos - startNanos; startNanos = System.nanoTime(); byte[] subspacePacked = subspace.pack(t); @@ -182,7 +193,7 @@ public class TuplePerformanceTest { startNanos = System.nanoTime(); Tuple t3 = subspace.unpack(subspacePacked); endNanos = System.nanoTime(); - if(!t.equals(t3)) { + if (!Tuple.fromList(t.getItems()).equals(Tuple.fromList(t3.getItems())) || !t.equals(t3)) { throw new RuntimeException("does not unpack equally from subspace"); } if(!Arrays.equals(t.pack(), t3.pack())) { @@ -205,25 +216,27 @@ public class TuplePerformanceTest { } System.out.println("Test ended."); - System.out.printf(" Total elements: %d%n", totalLength); - System.out.printf(" Total bytes: %d kB%n", totalBytes / 1000); - System.out.printf(" Bytes per tuple: %f B%n", totalBytes * 1.0 / iterations); - System.out.printf(" Pack time: %f s%n", packNanos * 1e-9); - System.out.printf(" Pack time per tuple: %f \u03BCs%n", packNanos * 1e-3 / iterations); - System.out.printf(" Pack time per kB: %f \u03BCs%n", packNanos * 1.0 / totalBytes); - System.out.printf(" Serialization rate: %f objects / \u03BCs%n", totalLength * 1000.0 / packNanos); - System.out.printf(" Unpack time: %f s%n", unpackNanos * 1e-9); - System.out.printf(" Unpack time per tuple: %f \u03BCs%n", unpackNanos * 1e-3 / iterations); - System.out.printf(" Equals time: %f s%n", equalsNanos * 1e-9); - System.out.printf(" Equals time per tuple: %f \u03BCs%n", equalsNanos * 1e-3 / iterations); - System.out.printf(" Subspace pack time: %f s%n", subspacePackNanos * 1e-9); - System.out.printf(" Subspace pack time per tuple: %f \u03BCs%n", subspacePackNanos * 1e-3 / iterations); - System.out.printf(" Subspace unpack time: %f s%n", subspaceUnpackNanos * 1e-9); - System.out.printf(" Subspace unpack time per tuple: %f \u03BCs%n", subspaceUnpackNanos * 1e-3 / iterations); - System.out.printf(" Hash time: %f s%n", hashNanos * 1e-9); - System.out.printf(" Hash time per tuple: %f \u03BCs%n", hashNanos * 1e-3 / iterations); - System.out.printf(" Second hash time: %f s%n", secondHashNanos * 1e-9); - System.out.printf(" Second hash time per tuple: %f \u03BCs%n", secondHashNanos * 1e-3 / iterations); + System.out.printf(" Total elements: %d%n", totalLength); + System.out.printf(" Total bytes: %d kB%n", totalBytes / 1000); + System.out.printf(" Bytes per tuple: %f B%n", totalBytes * 1.0 / iterations); + System.out.printf(" Pack time: %f s%n", packNanos * 1e-9); + System.out.printf(" Pack time per tuple: %f \u03BCs%n", packNanos * 1e-3 / iterations); + System.out.printf(" Pack time per kB: %f \u03BCs%n", packNanos * 1.0 / totalBytes); + System.out.printf(" Serialization rate: %f objects / \u03BCs%n", totalLength * 1000.0 / packNanos); + System.out.printf(" Unpack time: %f s%n", unpackNanos * 1e-9); + System.out.printf(" Unpack time per tuple: %f \u03BCs%n", unpackNanos * 1e-3 / iterations); + System.out.printf(" Equals time: %f s%n", equalsNanos * 1e-9); + System.out.printf(" Equals time per tuple: %f \u03BCs%n", equalsNanos * 1e-3 / iterations); + System.out.printf(" Equals time (using packed): %f s%n", equalsArrayNanos * 1e-9); + System.out.printf(" Equals time (using packed) per tuple: %f \u03BCs%n", equalsArrayNanos * 1e-3 / iterations); + System.out.printf(" Subspace pack time: %f s%n", subspacePackNanos * 1e-9); + System.out.printf(" Subspace pack time per tuple: %f \u03BCs%n", subspacePackNanos * 1e-3 / iterations); + System.out.printf(" Subspace unpack time: %f s%n", subspaceUnpackNanos * 1e-9); + System.out.printf(" Subspace unpack time per tuple: %f \u03BCs%n", subspaceUnpackNanos * 1e-3 / iterations); + System.out.printf(" Hash time: %f s%n", hashNanos * 1e-9); + System.out.printf(" Hash time per tuple: %f \u03BCs%n", hashNanos * 1e-3 / iterations); + System.out.printf(" Second hash time: %f s%n", secondHashNanos * 1e-9); + System.out.printf(" Second hash time per tuple: %f \u03BCs%n", secondHashNanos * 1e-3 / iterations); } public static void main(String[] args) { diff --git a/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java b/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java index 528c11f93a..305c1a90f0 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java @@ -29,6 +29,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Objects; +import java.util.UUID; public class TupleTest { private static final byte FF = (byte)0xff; @@ -38,6 +39,7 @@ public class TupleTest { try { // FDB fdb = FDB.selectAPIVersion(610); serializedForms(); + comparisons(); /* try(Database db = fdb.open()) { runTests(reps, db); @@ -113,7 +115,16 @@ public class TupleTest { Tuple.from(Float.intBitsToFloat(Integer.MAX_VALUE)), new byte[]{0x20, FF, FF, FF, FF}, Tuple.from(Double.longBitsToDouble(Long.MAX_VALUE)), new byte[]{0x21, FF, FF, FF, FF, FF, FF, FF, FF}, Tuple.from(Float.intBitsToFloat(~0)), new byte[]{0x20, 0x00, 0x00, 0x00, 0x00}, - Tuple.from(Double.longBitsToDouble(~0L)), new byte[]{0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} + Tuple.from(Double.longBitsToDouble(~0L)), new byte[]{0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + Tuple.from(""), new byte[]{0x02, 0x00}, + Tuple.from("hello"), new byte[]{0x02, 'h', 'e', 'l', 'l', 'o', 0x00}, + Tuple.from("\u4e2d\u6587"), new byte[]{0x02, (byte)0xe4, (byte)0xb8, (byte)0xad, (byte)0xe6, (byte)0x96, (byte)0x87, 0x00}, + Tuple.from("\u03bc\u03ac\u03b8\u03b7\u03bc\u03b1"), new byte[]{0x02, (byte)0xce, (byte)0xbc, (byte)0xce, (byte)0xac, (byte)0xce, (byte)0xb8, (byte)0xce, (byte)0xb7, (byte)0xce, (byte)0xbc, (byte)0xce, (byte)0xb1, 0x00}, + Tuple.from(new String(new int[]{0x1f525}, 0, 1)), new byte[]{0x02, (byte)0xf0, (byte)0x9f, (byte)0x94, (byte)0xa5, 0x00}, + Tuple.from("\ud83d\udd25"), new byte[]{0x02, (byte)0xf0, (byte)0x9f, (byte)0x94, (byte)0xa5, 0x00}, + Tuple.from("\ud83e\udd6f"), new byte[]{0x02, (byte)0xf0, (byte)0x9f, (byte)0xa5, (byte)0xaf, 0x00}, + Tuple.from("\udd25\ud83e\udd6f"), new byte[]{0x02, 0x3f, (byte)0xf0, (byte)0x9f, (byte)0xa5, (byte)0xaf, 0x00}, // malformed string - low surrogate without high surrogate + Tuple.from("a\udd25\ud83e\udd6f"), new byte[]{0x02, 'a', 0x3f, (byte)0xf0, (byte)0x9f, (byte)0xa5, (byte)0xaf, 0x00} // malformed string - low surrogate without high surrogate ); for(TupleSerialization serialization : serializations) { @@ -130,6 +141,78 @@ public class TupleTest { System.out.println("All tuples had matching serializations"); } + private static void comparisons() { + List tuples = Arrays.asList( + Tuple.from(0L), + Tuple.from(BigInteger.ZERO), + Tuple.from(1L), + Tuple.from(BigInteger.ONE), + Tuple.from(-1L), + Tuple.from(BigInteger.ONE.negate()), + Tuple.from(Long.MAX_VALUE), + Tuple.from(Long.MIN_VALUE), + Tuple.from(BigInteger.valueOf(Long.MIN_VALUE).subtract(BigInteger.ONE)), + Tuple.from(BigInteger.valueOf(Long.MIN_VALUE).shiftLeft(1)), + Tuple.from(-0.0f), + Tuple.from(0.0f), + Tuple.from(-0.0), + Tuple.from(0.0), + Tuple.from(Float.NEGATIVE_INFINITY), + Tuple.from(Double.NEGATIVE_INFINITY), + Tuple.from(Float.NaN), + Tuple.from(Double.NaN), + Tuple.from(Float.intBitsToFloat(Float.floatToIntBits(Float.NaN) + 1)), + Tuple.from(Double.longBitsToDouble(Double.doubleToLongBits(Double.NaN) + 1)), + Tuple.from(Float.intBitsToFloat(Float.floatToIntBits(Float.NaN) + 2)), + Tuple.from(Double.longBitsToDouble(Double.doubleToLongBits(Double.NaN) + 2)), + Tuple.from(Float.intBitsToFloat(Float.floatToIntBits(Float.NaN) ^ Integer.MIN_VALUE)), + Tuple.from(Double.longBitsToDouble(Double.doubleToLongBits(Double.NaN) ^ Long.MIN_VALUE)), + Tuple.from(Float.intBitsToFloat(Float.floatToIntBits(Float.NaN) ^ Integer.MIN_VALUE + 1)), + Tuple.from(Double.longBitsToDouble(Double.doubleToLongBits(Double.NaN) ^ Long.MIN_VALUE + 1)), + Tuple.from(Float.POSITIVE_INFINITY), + Tuple.from(Double.POSITIVE_INFINITY), + Tuple.from((Object)new byte[0]), + Tuple.from((Object)new byte[]{0x00}), + Tuple.from((Object)new byte[]{0x00, FF}), + Tuple.from((Object)new byte[]{0x7f}), + Tuple.from((Object)new byte[]{(byte)0x80}), + Tuple.from("a"), + Tuple.from("\u03bc\u03ac\u03b8\u03b7\u03bc\u03b1"), + Tuple.from("\u03bc\u03b1\u0301\u03b8\u03b7\u03bc\u03b1"), + Tuple.from("\u4e2d\u6587"), + Tuple.from("\u4e2d\u570B"), + Tuple.from("\ud83d\udd25"), + Tuple.from("\ud83e\udd6f"), + Tuple.from("a\ud83d\udd25"), + Tuple.from("\ufb49"), + Tuple.from("\ud83d\udd25\ufb49"), + Tuple.from("\ud8ed\ud8ed"), // malformed string -- two high surrogates + Tuple.from("\ud8ed\ud8eda"), // malformed string -- two high surrogates + Tuple.from("\udd25\udd25"), // malformed string -- two low surrogates + Tuple.from("a\udd25\ud8ed"), // malformed string -- two low surrogates + Tuple.from("\udd25\ud83e\udd6f"), // malformed string -- low surrogate followed by high then low surrogate + Tuple.from("\udd6f\ud83e\udd6f"), // malformed string -- low surrogate followed by high then low surrogate + Tuple.from(new UUID(-1, 0)), + Tuple.from(new UUID(-1, -1)), + Tuple.from(new UUID(1, -1)), + Tuple.from(new UUID(1, 1)) + ); + + for(Tuple t1 : tuples) { + for(Tuple t2 : tuples) { + System.out.println("Comparing " + t1 + " and " + t2); + // Copy the items over to new tuples to avoid having them use the memoized packed representations + Tuple t1copy = Tuple.fromList(t1.getItems()); + Tuple t2copy = Tuple.fromList(t2.getItems()); + int semanticComparison = t1copy.compareTo(t2copy); + int byteComparison = ByteArrayUtil.compareUnsigned(t1.pack(), t2.pack()); + if(Integer.signum(semanticComparison) != Integer.signum(byteComparison)) { + throw new RuntimeException("Tuple t1 and t2 comparison mismatched: semantic = " + semanticComparison + " while byte order = " + byteComparison); + } + } + } + } + private static void runTests(final int reps, TransactionContext db) { System.out.println("Running tests..."); long start = System.currentTimeMillis(); From 663d750e1de2ceb1a2d8fd78ab5c511eeec37fd9 Mon Sep 17 00:00:00 2001 From: Alec Grieser Date: Mon, 25 Feb 2019 21:59:16 -0800 Subject: [PATCH 21/47] pack Tuples with a single byte array allocation of the right size --- .../foundationdb/tuple/ByteArrayUtil.java | 143 +++---- .../apple/foundationdb/tuple/StringUtil.java | 43 +++ .../com/apple/foundationdb/tuple/Tuple.java | 56 +-- .../apple/foundationdb/tuple/TupleUtil.java | 357 +++++++++++------- .../foundationdb/test/AsyncStackTester.java | 6 +- .../apple/foundationdb/test/StackTester.java | 8 +- .../test/TuplePerformanceTest.java | 64 +++- .../apple/foundationdb/test/TupleTest.java | 107 +++++- 8 files changed, 547 insertions(+), 237 deletions(-) diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java b/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java index eeea3e1799..d848c296ff 100644 --- a/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java +++ b/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java @@ -20,7 +20,6 @@ package com.apple.foundationdb.tuple; -import java.math.BigInteger; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.Arrays; @@ -154,7 +153,10 @@ public class ByteArrayUtil { * @return a newly created array where {@code pattern} replaced with {@code replacement} */ public static byte[] replace(byte[] src, byte[] pattern, byte[] replacement) { - return join(replacement, split(src, pattern)); + if(src == null) { + return null; + } + return replace(src, 0, src.length, pattern, replacement); } /** @@ -171,7 +173,69 @@ public class ByteArrayUtil { */ public static byte[] replace(byte[] src, int offset, int length, byte[] pattern, byte[] replacement) { - return join(replacement, split(src, offset, length, pattern)); + if(pattern == null || pattern.length == 0) { + return Arrays.copyOfRange(src, offset, offset + length); + } + ByteBuffer dest; + if(replacement == null || replacement.length != pattern.length) { + // Array might change size. This is the "tricky" case. + byte patternFirst = pattern[0]; + int patternOccurrences = 0; + int currentPosition = offset; + while(currentPosition < offset + length) { + if(src[currentPosition] == patternFirst && regionEquals(src, currentPosition, pattern)) { + patternOccurrences++; + currentPosition += pattern.length; + } + else { + currentPosition++; + } + } + if(patternOccurrences == 0) { + // Pattern doesn't occur. Just return a copy of the needed region. + return Arrays.copyOfRange(src, offset, offset + length); + } + int replacementLength = (replacement == null) ? 0 : replacement.length; + int newLength = length + patternOccurrences * (replacementLength - pattern.length); + if(newLength == 0) { + return new byte[0]; + } + else { + dest = ByteBuffer.allocate(newLength); + } + } + else { + // No matter what, the array will stay the same size as replacement.length = pattern.length + dest = ByteBuffer.allocate(length); + } + replace(src, offset, length, pattern, replacement, dest); + return dest.array(); + } + + static void replace(byte[] src, int offset, int length, byte[] pattern, byte[] replacement, ByteBuffer dest) { + if(pattern == null || pattern.length == 0) { + dest.put(src, offset, length); + return; + } + byte patternFirst = pattern[0]; + int lastPosition = offset; + int currentPosition = offset; + + while(currentPosition < offset + length) { + if(src[currentPosition] == patternFirst && regionEquals(src, currentPosition, pattern)) { + dest.put(src, lastPosition, currentPosition - lastPosition); + if(replacement != null) { + dest.put(replacement); + } + currentPosition += pattern.length; + lastPosition = currentPosition; + } + else { + currentPosition++; + } + } + + dest.put(src, lastPosition, currentPosition - lastPosition); } /** @@ -203,7 +267,7 @@ public class ByteArrayUtil { * @return a list of byte arrays from {@code src} now not containing {@code delimiter} */ public static List split(byte[] src, int offset, int length, byte[] delimiter) { - List parts = new LinkedList(); + List parts = new LinkedList<>(); int idx = offset; int lastSplitEnd = offset; while(idx <= (offset+length) - delimiter.length) { @@ -225,13 +289,6 @@ public class ByteArrayUtil { return parts; } - static int bisectLeft(BigInteger[] arr, BigInteger i) { - int n = Arrays.binarySearch(arr, i); - if(n >= 0) - return n; - return (n + 1) * -1; - } - /** * Compare byte arrays for equality and ordering purposes. Elements in the array * are interpreted and compared as unsigned bytes. Neither parameter @@ -276,61 +333,6 @@ public class ByteArrayUtil { return true; } - /** - * Scan through an array of bytes to find the first occurrence of a specific value. - * - * @param src array to scan. Must not be {@code null}. - * @param what the value for which to search. - * @param start the index at which to start the search. If this is at or after - * the end of {@code src}, the result will always be {@code -1}. - * @param end the index one past the last entry at which to search - * - * @return return the location of the first instance of {@code value}, or - * {@code -1} if not found. - */ - static int findNext(byte[] src, byte what, int start, int end) { - for(int i = start; i < end; i++) { - if(src[i] == what) - return i; - } - return -1; - } - - /** - * Gets the index of the first element after the next occurrence of the byte sequence [nm] - * @param v the bytes to scan through - * @param n first character to find - * @param m second character to find - * @param start the index at which to start the scan - * - * @return the index after the next occurrence of [nm] - */ - static int findTerminator(byte[] v, byte n, byte m, int start) { - return findTerminator(v, n, m, start, v.length); - } - - /** - * Gets the index of the first element after the next occurrence of the byte sequence [nm] - * @param v the bytes to scan through - * @param n first character to find - * @param m second character to find - * @param start the index at which to start the scan - * @param end the index at which to stop the search (exclusive) - * - * @return the index after the next occurrence of [nm] - */ - static int findTerminator(byte[] v, byte n, byte m, int start, int end) { - int pos = start; - while(true) { - pos = findNext(v, n, pos, end); - if(pos < 0) - return end; - if(pos + 1 == end || v[pos+1] != m) - return pos; - pos += 2; - } - } - /** * Computes the first key that would sort outside the range prefixed by {@code key}. * {@code key} must be non-null, and contain at least some character this is not @@ -417,5 +419,14 @@ public class ByteArrayUtil { return s.toString(); } + static int nullCount(byte[] val) { + int nulls = 0; + for(int i = 0; i < val.length; i++) { + if(val[i] == 0x00) + nulls += 1; + } + return nulls; + } + private ByteArrayUtil() {} } diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/StringUtil.java b/bindings/java/src/main/com/apple/foundationdb/tuple/StringUtil.java index 660d04a6e1..cd1d18d627 100644 --- a/bindings/java/src/main/com/apple/foundationdb/tuple/StringUtil.java +++ b/bindings/java/src/main/com/apple/foundationdb/tuple/StringUtil.java @@ -71,5 +71,48 @@ final class StringUtil { return Character.compare(c1, c2); } + static int packedSize(String s) { + final int strLength = s.length(); + int size = 0; + int pos = 0; + + while(pos < strLength) { + char c = s.charAt(pos); + if(c == '\0') { + // Null is encoded as \x00\xff + size += 2; + } + else if(c <= 0x7f) { + // ASCII code point. Only 1 byte. + size += 1; + } + else if(c <= 0x07ff) { + // 2 byte code point + size += 2; + } + else if(Character.isHighSurrogate(c)) { + if(pos + 1 < s.length() && Character.isLowSurrogate(s.charAt(pos + 1))) { + // High surrogate followed by low surrogate means the code point + // is between U+10000 and U+10FFFF, so it requires 4 bytes. + size += 4; + pos += 1; + } + else { + throw new IllegalArgumentException("malformed UTF-16 has high surrogate not followed by low surrogate"); + } + } + else if(Character.isLowSurrogate(c)) { + throw new IllegalArgumentException("malformed UTF-16 has low surrogate without prior high surrogate"); + } + else { + // 3 byte code point + size += 3; + } + pos += 1; + } + + return size; + } + private StringUtil() {} } diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java b/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java index b3761d8c5d..5fa9726c14 100644 --- a/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java +++ b/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java @@ -73,6 +73,7 @@ public class Tuple implements Comparable, Iterable { private List elements; private int memoizedHash = 0; private byte[] packed = null; + private int memoizedPackedSize = -1; private Tuple(List elements, Object newItem) { this(elements); @@ -83,12 +84,6 @@ public class Tuple implements Comparable, Iterable { this.elements = new ArrayList<>(elements); } - private enum VersionstampExpectations { - UNKNOWN, - HAS_INCOMPLETE, - HAS_NO_INCOMPLETE - } - /** * Creates a copy of this {@code Tuple} with an appended last element. The parameter * is untyped but only {@link String}, {@code byte[]}, {@link Number}s, {@link UUID}s, @@ -313,13 +308,15 @@ public class Tuple implements Comparable, Iterable { byte[] packInternal(byte[] prefix, boolean copy) { boolean hasPrefix = prefix != null && prefix.length > 1; if(packed == null) { - byte[] result = TupleUtil.pack(elements, prefix); + byte[] result = TupleUtil.pack(elements, prefix, getPackedSize()); if(hasPrefix) { packed = Arrays.copyOfRange(result, prefix.length, result.length); + memoizedPackedSize = packed.length; return result; } else { packed = result; + memoizedPackedSize = packed.length; } } if(hasPrefix) { @@ -366,21 +363,23 @@ public class Tuple implements Comparable, Iterable { * @throws IllegalArgumentException if there is not exactly one incomplete {@link Versionstamp} included in this {@code Tuple} */ public byte[] packWithVersionstamp(byte[] prefix) { - return TupleUtil.packWithVersionstamp(elements, prefix); + return TupleUtil.packWithVersionstamp(elements, prefix, getPackedSize()); } byte[] packWithVersionstampInternal(byte[] prefix, boolean copy) { boolean hasPrefix = prefix != null && prefix.length > 0; if(packed == null) { - byte[] result = TupleUtil.packWithVersionstamp(elements, prefix); + byte[] result = TupleUtil.packWithVersionstamp(elements, prefix, getPackedSize()); if(hasPrefix) { byte[] withoutPrefix = Arrays.copyOfRange(result, prefix.length, result.length); TupleUtil.adjustVersionPosition(packed, -1 * prefix.length); packed = withoutPrefix; + memoizedPackedSize = packed.length; return result; } else { packed = result; + memoizedPackedSize = packed.length; } } if(hasPrefix) { @@ -398,13 +397,13 @@ public class Tuple implements Comparable, Iterable { } } - byte[] packMaybeVersionstamp(byte[] prefix) { + byte[] packMaybeVersionstamp() { if(packed == null) { if(hasIncompleteVersionstamp()) { - return packWithVersionstampInternal(prefix, false); + return packWithVersionstampInternal(null, false); } else { - return packInternal(prefix, false); + return packInternal(null, false); } } else { @@ -489,6 +488,7 @@ public class Tuple implements Comparable, Iterable { Tuple t = new Tuple(); t.elements = TupleUtil.unpack(bytes, offset, length); t.packed = Arrays.copyOfRange(bytes, offset, offset + length); + t.memoizedPackedSize = length; return t; } @@ -727,11 +727,14 @@ public class Tuple implements Comparable, Iterable { Object o = this.elements.get(index); if(o == null) { return null; - } else if(o instanceof Tuple) { + } + else if(o instanceof Tuple) { return (Tuple)o; - } else if(o instanceof List) { - return Tuple.fromItems((List)o); - } else { + } + else if(o instanceof List) { + return Tuple.fromItems((List)o); + } + else { throw new ClassCastException("Cannot convert item of type " + o.getClass() + " to tuple"); } } @@ -824,16 +827,23 @@ public class Tuple implements Comparable, Iterable { } /** - * Get the number of bytes in the packed representation of this {@code Tuple}. Note that at the - * moment, this number is calculated by packing the {@code Tuple} and looking at its size. This method - * will memoize the result, however, so asking the same {@code Tuple} for its size multiple times - * is a fast operation. + * Get the number of bytes in the packed representation of this {@code Tuple}. This is done by summing + * the serialized sizes of all of the elements of this {@code Tuple} and does not pack everything + * into a single {@code Tuple}. The return value of this function is stored within this {@code Tuple} + * after this function has been called so that subsequent calls on the same object are fast. This method + * does not validate that there is no more than one incomplete {@link Versionstamp} in this {@code Tuple}. * * @return the number of bytes in the packed representation of this {@code Tuple} */ public int getPackedSize() { - byte[] p = packMaybeVersionstamp(null); - return p.length; + if(memoizedPackedSize < 0) { + memoizedPackedSize = getPackedSize(false); + } + return memoizedPackedSize; + } + + int getPackedSize(boolean nested) { + return TupleUtil.getPackedSize(elements, nested); } /** @@ -871,7 +881,7 @@ public class Tuple implements Comparable, Iterable { @Override public int hashCode() { if(memoizedHash == 0) { - memoizedHash = Arrays.hashCode(packMaybeVersionstamp(null)); + memoizedHash = Arrays.hashCode(packMaybeVersionstamp()); } return memoizedHash; } diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java index 34d0f78653..fc1fbc7262 100644 --- a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java +++ b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java @@ -36,11 +36,10 @@ import com.apple.foundationdb.FDB; class TupleUtil { private static final byte nil = 0x00; - private static final BigInteger[] BIG_INT_SIZE_LIMITS; - private static final Charset UTF8; + private static final Charset UTF8 = Charset.forName("UTF-8"); private static final BigInteger LONG_MIN_VALUE = BigInteger.valueOf(Long.MIN_VALUE); private static final BigInteger LONG_MAX_VALUE = BigInteger.valueOf(Long.MAX_VALUE); - private static final IterableComparator iterableComparator; + private static final IterableComparator iterableComparator = new IterableComparator(); private static final byte BYTES_CODE = 0x01; private static final byte STRING_CODE = 0x02; @@ -57,26 +56,11 @@ class TupleUtil { private static final byte[] NULL_ARR = new byte[] {nil}; private static final byte[] NULL_ESCAPED_ARR = new byte[] {nil, (byte)0xFF}; - private static final byte[] BYTES_ARR = new byte[]{BYTES_CODE}; - private static final byte[] STRING_ARR = new byte[]{STRING_CODE}; - private static final byte[] NESTED_ARR = new byte[]{NESTED_CODE}; - private static final byte[] INT_ZERO_ARR = new byte[]{INT_ZERO_CODE}; - private static final byte[] FALSE_ARR = new byte[]{FALSE_CODE}; - private static final byte[] TRUE_ARR = new byte[]{TRUE_CODE}; - private static final byte[] VERSIONSTAMP_ARR = new byte[]{VERSIONSTAMP_CODE}; - - static { - BIG_INT_SIZE_LIMITS = new BigInteger[9]; - for(int i = 0; i < BIG_INT_SIZE_LIMITS.length; i++) { - BIG_INT_SIZE_LIMITS[i] = (BigInteger.ONE).shiftLeft(i * 8).subtract(BigInteger.ONE); - } - UTF8 = Charset.forName("UTF-8"); - iterableComparator = new IterableComparator(); - } static class DecodeState { final List values; int end; + int nullCount; // Basically a hack to allow findTerminator to return the terminator and null count DecodeState() { values = new ArrayList<>(); @@ -87,15 +71,36 @@ class TupleUtil { values.add(value); this.end = end; } + + int findNullTerminator(byte[] bytes, int from, int to) { + nullCount = 0; + int x = from; + while(x < to) { + if(bytes[x] == 0x00) { + if(x + 1 >= to || bytes[x + 1] != (byte)0xFF) { + return x; + } + else { + nullCount++; + x += 2; + } + } + else { + x += 1; + } + } + throw new IllegalArgumentException("no terminator found for bytes starting at " + from); + } } static class EncodeState { - final List encodedValues; + final ByteBuffer encodedBytes; int totalLength; int versionPos; - EncodeState(int capacity) { - this.encodedValues = new ArrayList<>(capacity); + EncodeState(ByteBuffer dest) { + encodedBytes = dest; + encodedBytes.order(ByteOrder.BIG_ENDIAN); totalLength = 0; versionPos = -1; } @@ -104,25 +109,52 @@ class TupleUtil { if(versionPos >= 0 && this.versionPos >= 0) { throw new IllegalArgumentException("Multiple incomplete Versionstamps included in Tuple"); } - encodedValues.add(encoded); + encodedBytes.put(encoded); totalLength += encoded.length; this.versionPos = versionPos; return this; } EncodeState add(byte[] encoded) { - encodedValues.add(encoded); + encodedBytes.put(encoded); totalLength += encoded.length; return this; } - } - static int byteLength(byte[] bytes) { - for(int i = 0; i < bytes.length; i++) { - if(bytes[i] == 0x00) continue; - return bytes.length - i; + EncodeState add(byte[] encoded, int offset, int length) { + encodedBytes.put(encoded, offset, length); + totalLength += length; + return this; + } + + EncodeState addNullEscaped(byte[] encoded) { + int nullCount = ByteArrayUtil.nullCount(encoded); + if(nullCount == 0) { + encodedBytes.put(encoded); + } + else { + ByteArrayUtil.replace(encoded, 0, encoded.length, NULL_ARR, NULL_ESCAPED_ARR, encodedBytes); + } + return this; + } + + EncodeState add(byte b) { + encodedBytes.put(b); + totalLength++; + return this; + } + + EncodeState add(int i) { + encodedBytes.putInt(i); + totalLength += Integer.BYTES; + return this; + } + + EncodeState add(long l) { + encodedBytes.putLong(l); + totalLength += Long.BYTES; + return this; } - return 0; } // These four functions are for adjusting the encoding of floating point numbers so @@ -153,11 +185,16 @@ class TupleUtil { return Double.longBitsToDouble(origBits); } - // Get the number of bytes in the representation of a long. - static int byteCount(long i) { + // Get the minimal number of bytes in the representation of a long. + static int minimalByteCount(long i) { return (Long.SIZE + 7 - Long.numberOfLeadingZeros(i >= 0 ? i : -i)) / 8; } + static int minimalByteCount(BigInteger i) { + int bitLength = (i.compareTo(BigInteger.ZERO) >= 0) ? i.bitLength() : i.negate().bitLength(); + return (bitLength + 7) / 8; + } + private static void adjustVersionPosition300(byte[] packed, int delta) { int offsetOffset = packed.length - Short.BYTES; ByteBuffer buffer = ByteBuffer.wrap(packed, offsetOffset, Short.BYTES).order(ByteOrder.LITTLE_ENDIAN); @@ -224,7 +261,7 @@ class TupleUtil { state.add(NULL_ESCAPED_ARR); } else { - state.add(NULL_ARR); + state.add(nil); } } else if(t instanceof byte[]) @@ -258,133 +295,104 @@ class TupleUtil { } static void encode(EncodeState state, byte[] bytes) { - byte[] escaped = ByteArrayUtil.replace(bytes, NULL_ARR, NULL_ESCAPED_ARR); - state.add(BYTES_ARR).add(escaped).add(NULL_ARR); + state.add(BYTES_CODE).addNullEscaped(bytes).add(nil); } static void encode(EncodeState state, String s) { - byte[] escaped = ByteArrayUtil.replace(s.getBytes(UTF8), NULL_ARR, NULL_ESCAPED_ARR); - state.add(STRING_ARR).add(escaped).add(NULL_ARR); + byte[] bytes = s.getBytes(UTF8); + state.add(STRING_CODE).addNullEscaped(bytes).add(nil); } static void encode(EncodeState state, BigInteger i) { //System.out.println("Encoding integral " + i); if(i.equals(BigInteger.ZERO)) { - state.add(INT_ZERO_ARR); + state.add(INT_ZERO_CODE); return; } - byte[] bytes = i.toByteArray(); + int n = minimalByteCount(i); + if(n > 0xff) { + throw new IllegalArgumentException("BigInteger magnitude is too large (more than 255 bytes)"); + } if(i.compareTo(BigInteger.ZERO) > 0) { - if(i.compareTo(BIG_INT_SIZE_LIMITS[BIG_INT_SIZE_LIMITS.length-1]) > 0) { - int length = byteLength(bytes); - if(length > 0xff) { - throw new IllegalArgumentException("BigInteger magnitude is too large (more than 255 bytes)"); - } - byte[] intBytes = new byte[length + 2]; - intBytes[0] = POS_INT_END; - intBytes[1] = (byte)(length); - System.arraycopy(bytes, bytes.length - length, intBytes, 2, length); - state.add(intBytes); + byte[] bytes = i.toByteArray(); + if(n > Long.BYTES) { + state.add(POS_INT_END); + state.add((byte)n); + state.add(bytes, bytes.length - n, n); } else { - int n = ByteArrayUtil.bisectLeft(BIG_INT_SIZE_LIMITS, i); - assert n <= BIG_INT_SIZE_LIMITS.length; //System.out.println(" -- integral has 'n' of " + n + " and output bytes of " + bytes.length); - byte[] intBytes = new byte[n + 1]; - intBytes[0] = (byte) (INT_ZERO_CODE + n); - System.arraycopy(bytes, bytes.length - n, intBytes, 1, n); - state.add(intBytes); + state.add((byte)(INT_ZERO_CODE + n)); + state.add(bytes, bytes.length - n, n); } } else { - if(i.negate().compareTo(BIG_INT_SIZE_LIMITS[BIG_INT_SIZE_LIMITS.length - 1]) > 0) { - int length = byteLength(i.negate().toByteArray()); - if (length > 0xff) { - throw new IllegalArgumentException("BigInteger magnitude is too large (more than 255 bytes)"); + byte[] bytes = i.subtract(BigInteger.ONE).toByteArray(); + if(n > Long.BYTES) { + state.add(NEG_INT_START); + state.add((byte)(n ^ 0xff)); + if(bytes.length >= n) { + state.add(bytes, bytes.length - n, n); } - BigInteger offset = BigInteger.ONE.shiftLeft(length * 8).subtract(BigInteger.ONE); - byte[] adjusted = i.add(offset).toByteArray(); - byte[] intBytes = new byte[length + 2]; - intBytes[0] = NEG_INT_START; - intBytes[1] = (byte) (length ^ 0xff); - if (adjusted.length >= length) { - System.arraycopy(adjusted, adjusted.length - length, intBytes, 2, length); - } else { - Arrays.fill(intBytes, 2, intBytes.length - adjusted.length, (byte) 0x00); - System.arraycopy(adjusted, 0, intBytes, intBytes.length - adjusted.length, adjusted.length); + else { + for(int x = 0; x < n - bytes.length; x++) { + state.add((byte)0x00); + } + state.add(bytes, 0, bytes.length); } - state.add(intBytes); } else { - int n = ByteArrayUtil.bisectLeft(BIG_INT_SIZE_LIMITS, i.negate()); - - assert n >= 0 && n < BIG_INT_SIZE_LIMITS.length; // can we do this? it seems to be required for the following statement - - long maxv = BIG_INT_SIZE_LIMITS[n].add(i).longValue(); - byte[] adjustedBytes = ByteBuffer.allocate(8).order(ByteOrder.BIG_ENDIAN).putLong(maxv).array(); - byte[] intBytes = new byte[n + 1]; - intBytes[0] = (byte) (INT_ZERO_CODE - n); - System.arraycopy(adjustedBytes, adjustedBytes.length - n, intBytes, 1, n); - state.add(intBytes); + state.add((byte)(INT_ZERO_CODE - n)); + if(bytes.length >= n) { + state.add(bytes, bytes.length - n, n); + } + else { + for(int x = 0; x < n - bytes.length; x++) { + state.add((byte)0x00); + } + state.add(bytes, 0, bytes.length); + } } } } static void encode(EncodeState state, long i) { if(i == 0L) { - state.add(INT_ZERO_ARR); + state.add(INT_ZERO_CODE); return; } - int n = byteCount(i); - byte[] intBytes = new byte[n + 1]; + int n = minimalByteCount(i); // First byte encodes number of bytes (as difference from INT_ZERO_CODE) - intBytes[0] = (byte)(INT_ZERO_CODE + (i >= 0 ? n : -n)); + state.add((byte)(INT_ZERO_CODE + (i >= 0 ? n : -n))); // For positive integers, copy the bytes in big-endian order excluding leading 0x00 bytes. // For negative integers, copy the bytes of the one's complement representation excluding // the leading 0xff bytes. As Java stores negative values in two's complement, we subtract 1 // from negative values. long val = Long.reverseBytes((i >= 0) ? i : (i - 1)) >> (Long.SIZE - 8 * n); - for(int x = 1; x < intBytes.length; x++) { - intBytes[x] = (byte)(val & 0xff); + for(int x = 0; x < n; x++) { + state.add((byte)(val & 0xff)); val >>= 8; } - state.add(intBytes); } static void encode(EncodeState state, Float f) { - byte[] floatBytes = ByteBuffer.allocate(1 + Float.BYTES).order(ByteOrder.BIG_ENDIAN) - .put(FLOAT_CODE) - .putInt(encodeFloatBits(f)) - .array(); - state.add(floatBytes); + state.add(FLOAT_CODE).add(encodeFloatBits(f)); } static void encode(EncodeState state, Double d) { - byte[] doubleBytes = ByteBuffer.allocate(1 + Double.BYTES).order(ByteOrder.BIG_ENDIAN) - .put(DOUBLE_CODE) - .putLong(encodeDoubleBits(d)) - .array(); - state.add(doubleBytes); + state.add(DOUBLE_CODE).add(encodeDoubleBits(d)); } static void encode(EncodeState state, Boolean b) { - if(b) { - state.add(TRUE_ARR); - } - else { - state.add(FALSE_ARR); - } + state.add(b ? TRUE_CODE : FALSE_CODE); } static void encode(EncodeState state, UUID uuid) { - byte[] uuidBytes = ByteBuffer.allocate(17).put(UUID_CODE).order(ByteOrder.BIG_ENDIAN) - .putLong(uuid.getMostSignificantBits()).putLong(uuid.getLeastSignificantBits()) - .array(); - state.add(uuidBytes); + state.add(UUID_CODE).add(uuid.getMostSignificantBits()).add(uuid.getLeastSignificantBits()); } static void encode(EncodeState state, Versionstamp v) { - state.add(VERSIONSTAMP_ARR); + state.add(VERSIONSTAMP_CODE); if(v.isComplete()) { state.add(v.getBytes()); } @@ -394,11 +402,11 @@ class TupleUtil { } static void encode(EncodeState state, List value) { - state.add(NESTED_ARR); + state.add(NESTED_CODE); for(Object t : value) { encode(state, t, true); } - state.add(NULL_ARR); + state.add(nil); } static void decode(DecodeState state, byte[] rep, int pos, int last) { @@ -411,17 +419,32 @@ class TupleUtil { state.add(null, start); } else if(code == BYTES_CODE) { - int end = ByteArrayUtil.findTerminator(rep, (byte)0x0, (byte)0xff, start, last); + int end = state.findNullTerminator(rep, start, last); //System.out.println("End of byte string: " + end); - byte[] range = ByteArrayUtil.replace(rep, start, end - start, NULL_ESCAPED_ARR, new byte[] { nil }); + byte[] range; + if(state.nullCount == 0) { + range = Arrays.copyOfRange(rep, start, end); + } + else { + ByteBuffer dest = ByteBuffer.allocate(end - start - state.nullCount); + ByteArrayUtil.replace(rep, start, end - start, NULL_ESCAPED_ARR, NULL_ARR, dest); + range = dest.array(); + } //System.out.println(" -> byte string contents: '" + ArrayUtils.printable(range) + "'"); state.add(range, end + 1); } else if(code == STRING_CODE) { - int end = ByteArrayUtil.findTerminator(rep, (byte)0x0, (byte)0xff, start, last); + int end = state.findNullTerminator(rep, start, last); //System.out.println("End of UTF8 string: " + end); - byte[] stringBytes = ByteArrayUtil.replace(rep, start, end - start, NULL_ESCAPED_ARR, new byte[] { nil }); - String str = new String(stringBytes, UTF8); + String str; + if(state.nullCount == 0) { + str = new String(rep, start, end - start, UTF8); + } + else { + ByteBuffer dest = ByteBuffer.allocate(end - start - state.nullCount); + ByteArrayUtil.replace(rep, start, end - start, NULL_ESCAPED_ARR, NULL_ARR, dest); + str = new String(dest.array(), UTF8); + } //System.out.println(" -> UTF8 string contents: '" + str + "'"); state.add(str, end + 1); } @@ -442,19 +465,23 @@ class TupleUtil { state.add(true, start); } else if(code == UUID_CODE) { - ByteBuffer bb = ByteBuffer.wrap(rep, start, 16).order(ByteOrder.BIG_ENDIAN); + ByteBuffer bb = ByteBuffer.wrap(rep, start, 2 * Long.BYTES).order(ByteOrder.BIG_ENDIAN); long msb = bb.getLong(); long lsb = bb.getLong(); state.add(new UUID(msb, lsb), start + 16); } else if(code == POS_INT_END) { int n = rep[start] & 0xff; - BigInteger res = new BigInteger(ByteArrayUtil.join(new byte[]{0x00}, Arrays.copyOfRange(rep, start+1, start+n+1))); + byte[] intBytes = new byte[n + 1]; + System.arraycopy(rep, start + 1, intBytes, 1, n); + BigInteger res = new BigInteger(intBytes); state.add(res, start + n + 1); } else if(code == NEG_INT_START) { int n = (rep[start] ^ 0xff) & 0xff; - BigInteger origValue = new BigInteger(ByteArrayUtil.join(new byte[]{0x00}, Arrays.copyOfRange(rep, start+1, start+n+1))); + byte[] intBytes = new byte[n + 1]; + System.arraycopy(rep, start + 1, intBytes, 1, n); + BigInteger origValue = new BigInteger(intBytes); BigInteger offset = BigInteger.ONE.shiftLeft(n*8).subtract(BigInteger.ONE); state.add(origValue.subtract(offset), start + n + 1); } @@ -464,7 +491,7 @@ class TupleUtil { int n = positive ? code - INT_ZERO_CODE : INT_ZERO_CODE - code; int end = start + n; - if(rep.length < end) { + if(rep.length < last) { throw new RuntimeException("Invalid tuple (possible truncation)"); } @@ -509,9 +536,9 @@ class TupleUtil { else if(code == NESTED_CODE) { DecodeState subResult = new DecodeState(); int endPos = start; - while(endPos < rep.length) { + while(endPos < last) { if(rep[endPos] == nil) { - if(endPos + 1 < rep.length && rep[endPos+1] == (byte)0xff) { + if(endPos + 1 < last && rep[endPos+1] == (byte)0xff) { subResult.add(null, endPos + 2); endPos += 2; } else { @@ -631,19 +658,27 @@ class TupleUtil { //System.out.println("Joining whole tuple..."); } - static byte[] pack(List items, byte[] prefix) { - EncodeState state = new EncodeState(2 * items.size() + (prefix == null ? 0 : 1)); + static byte[] pack(List items, byte[] prefix, int expectedSize) { + ByteBuffer dest = ByteBuffer.allocate(expectedSize + (prefix != null ? prefix.length : 0)); + EncodeState state = new EncodeState(dest); + if(prefix != null) { + state.add(prefix); + } encodeAll(state, items, prefix); if(state.versionPos >= 0) { throw new IllegalArgumentException("Incomplete Versionstamp included in vanilla tuple packInternal"); } else { - return ByteArrayUtil.join(null, state.encodedValues); + return dest.array(); } } - static byte[] packWithVersionstamp(List items, byte[] prefix) { - EncodeState state = new EncodeState(2 * items.size() + (prefix == null ? 1 : 2)); + static byte[] packWithVersionstamp(List items, byte[] prefix, int expectedSize) { + ByteBuffer dest = ByteBuffer.allocate(expectedSize + (prefix != null ? prefix.length : 0)); + EncodeState state = new EncodeState(dest); + if(prefix != null) { + state.add(prefix); + } encodeAll(state, items, prefix); if(state.versionPos < 0) { throw new IllegalArgumentException("No incomplete Versionstamp included in tuple packInternal with versionstamp"); @@ -652,15 +687,73 @@ class TupleUtil { if(state.versionPos > 0xffff) { throw new IllegalArgumentException("Tuple has incomplete version at position " + state.versionPos + " which is greater than the maximum " + 0xffff); } + dest.order(ByteOrder.LITTLE_ENDIAN); if (FDB.instance().getAPIVersion() < 520) { - state.add(ByteBuffer.allocate(Short.BYTES).order(ByteOrder.LITTLE_ENDIAN).putShort((short)state.versionPos).array()); + dest.putShort((short)state.versionPos); } else { - state.add(ByteBuffer.allocate(Integer.BYTES).order(ByteOrder.LITTLE_ENDIAN).putInt(state.versionPos).array()); + dest.putInt(state.versionPos); } - return ByteArrayUtil.join(null, state.encodedValues); + return dest.array(); } } + static int getPackedSize(List items, boolean nested) { + int packedSize = 0; + for(Object item : items) { + if(item == null) + packedSize += nested ? 2 : 1; + else if(item instanceof byte[]) { + byte[] bytes = (byte[])item; + packedSize += 2 + bytes.length + ByteArrayUtil.nullCount((byte[])item); + } + else if(item instanceof String) { + try { + int strPackedSize = StringUtil.packedSize((String)item); + packedSize += 2 + strPackedSize; + } + catch (IllegalArgumentException e) { + // The unicode was malformed. Grab the array and count the bytes + byte[] strBytes = ((String)item).getBytes(UTF8); + packedSize += 2 + strBytes.length + ByteArrayUtil.nullCount(strBytes); + } + } + else if(item instanceof Float) + packedSize += 1 + Float.BYTES; + else if(item instanceof Double) + packedSize += 1 + Double.BYTES; + else if(item instanceof Boolean) + packedSize += 1; + else if(item instanceof UUID) + packedSize += 1 + 2 * Long.BYTES; + else if(item instanceof BigInteger) { + BigInteger bigInt = (BigInteger)item; + int byteCount = minimalByteCount(bigInt); + // If byteCount <= 8, then the encoding uses 1 byte for both the size + // and type code. If byteCount > 8, then there is 1 byte for the type code + // and 1 byte for the length. In both cases, the value is followed by + // the byte count. + packedSize += byteCount + ((byteCount <= 8) ? 1 : 2); + } + else if(item instanceof Number) + packedSize += 1 + minimalByteCount(((Number)item).longValue()); + else if(item instanceof Versionstamp) { + packedSize += 1 + Versionstamp.LENGTH; + Versionstamp versionstamp = (Versionstamp)item; + if(!versionstamp.isComplete()) { + int suffixSize = FDB.instance().getAPIVersion() < 520 ? Short.BYTES : Integer.BYTES; + packedSize += suffixSize; + } + } + else if(item instanceof List) + packedSize += 2 + getPackedSize((List)item, true); + else if(item instanceof Tuple) + packedSize += 2 + ((Tuple)item).getPackedSize(true); + else + throw new IllegalArgumentException("unknown type " + item.getClass() + " for tuple packing"); + } + return packedSize; + } + static boolean hasIncompleteVersionstamp(Stream items) { return items.anyMatch(item -> { if(item == null) { @@ -683,10 +776,10 @@ class TupleUtil { public static void main(String[] args) { try { - byte[] bytes = pack(Collections.singletonList(4), null); + byte[] bytes = pack(Collections.singletonList(4), null, 2); DecodeState result = new DecodeState(); decode(result, bytes, 0, bytes.length); - int val = (int)result.values.get(0); + int val = ((Number)result.values.get(0)).intValue(); assert 4 == val; } catch(Exception e) { @@ -695,7 +788,7 @@ class TupleUtil { } try { - byte[] bytes = pack(Collections.singletonList("\u021Aest \u0218tring"), null); + byte[] bytes = pack(Collections.singletonList("\u021Aest \u0218tring"), null, 15); DecodeState result = new DecodeState(); decode(result, bytes, 0, bytes.length); String string = (String)result.values.get(0); diff --git a/bindings/java/src/test/com/apple/foundationdb/test/AsyncStackTester.java b/bindings/java/src/test/com/apple/foundationdb/test/AsyncStackTester.java index 617586fe9d..f9d7d12c3a 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/AsyncStackTester.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/AsyncStackTester.java @@ -412,7 +412,11 @@ public class AsyncStackTester { return inst.popParams(listSize).thenAcceptAsync(rawElements -> { List tuples = new ArrayList<>(listSize); for(Object o : rawElements) { - tuples.add(Tuple.fromBytes((byte[])o)); + // Unpacking a tuple keeps around the serialized representation and uses + // it for comparison if it's available. To test semantic comparison, recreate + // the tuple from the item list. + Tuple t = Tuple.fromBytes((byte[])o); + tuples.add(Tuple.fromList(t.getItems())); } Collections.sort(tuples); for(Tuple t : tuples) { diff --git a/bindings/java/src/test/com/apple/foundationdb/test/StackTester.java b/bindings/java/src/test/com/apple/foundationdb/test/StackTester.java index 96281dec72..06f9b435d5 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/StackTester.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/StackTester.java @@ -368,9 +368,13 @@ public class StackTester { else if (op == StackOperation.TUPLE_SORT) { int listSize = StackUtils.getInt(inst.popParam().join()); List rawElements = inst.popParams(listSize).join(); - List tuples = new ArrayList(listSize); + List tuples = new ArrayList<>(listSize); for(Object o : rawElements) { - tuples.add(Tuple.fromBytes((byte[])o)); + // Unpacking a tuple keeps around the serialized representation and uses + // it for comparison if it's available. To test semantic comparison, recreate + // the tuple from the item list. + Tuple t = Tuple.fromBytes((byte[])o); + tuples.add(Tuple.fromList(t.getItems())); } Collections.sort(tuples); for(Tuple t : tuples) { diff --git a/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java b/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java index 3de9b76785..54448e3ac9 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/TuplePerformanceTest.java @@ -16,7 +16,8 @@ public class TuplePerformanceTest { private enum GeneratedTypes { ALL, LONG, - FLOATING_POINT + FLOATING_POINT, + STRING_LIKE } private final Random r; @@ -77,7 +78,7 @@ public class TuplePerformanceTest { values.add(nested); } } - return Tuple.fromItems(values); + return Tuple.fromList(values); } public Tuple createLongsTuple(int length) { @@ -91,7 +92,7 @@ public class TuplePerformanceTest { } values.add(val); } - return Tuple.fromItems(values); + return Tuple.fromList(values); } public Tuple createFloatingPointTuple(int length) { @@ -112,7 +113,41 @@ public class TuplePerformanceTest { values.add(Double.longBitsToDouble(r.nextLong())); } } - return Tuple.fromItems(values); + return Tuple.fromList(values); + } + + public Tuple createStringLikeTuple(int length) { + List values = new ArrayList<>(length); + for(int i = 0; i < length; i++) { + double choice = r.nextDouble(); + if(choice < 0.4) { + byte[] arr = new byte[r.nextInt(20)]; + r.nextBytes(arr); + values.add(arr); + } + else if(choice < 0.8) { + // Random ASCII codepoints + int[] codepoints = new int[r.nextInt(20)]; + for(int x = 0; x < codepoints.length; x++) { + codepoints[x] = r.nextInt(0x7F); + } + values.add(new String(codepoints, 0, codepoints.length)); + } + else if(choice < 0.9) { + // All zeroes + byte[] zeroes = new byte[r.nextInt(20)]; + values.add(zeroes); + } + else { + // Random Unicode codepoints + int[] codepoints = new int[r.nextInt(20)]; + for(int x = 0; x < codepoints.length; x++) { + codepoints[x] = r.nextInt(0x10FFFF); + } + values.add(new String(codepoints, 0, codepoints.length)); + } + } + return Tuple.fromList(values); } public Tuple createTuple(int length) { @@ -123,6 +158,8 @@ public class TuplePerformanceTest { return createLongsTuple(length); case FLOATING_POINT: return createFloatingPointTuple(length); + case STRING_LIKE: + return createStringLikeTuple(length); default: throw new IllegalStateException("unknown generated types " + generatedTypes); } @@ -143,6 +180,7 @@ public class TuplePerformanceTest { long unpackNanos = 0L; long equalsNanos = 0L; long equalsArrayNanos = 0L; + long sizeNanos = 0L; long hashNanos = 0L; long secondHashNanos = 0L; long subspacePackNanos = 0L; @@ -150,6 +188,9 @@ public class TuplePerformanceTest { long totalLength = 0L; long totalBytes = 0L; for(int i = 0; i < iterations; i++) { + if(i % 100_000 == 0) { + System.out.println(" iteration " + i); + } int length = r.nextInt(20); Tuple t = createTuple(length); @@ -157,8 +198,8 @@ public class TuplePerformanceTest { byte[] serialized = t.pack(); long endNanos = System.nanoTime(); packNanos += endNanos - startNanos; - totalLength += length; - totalBytes += serialized.length; + totalLength += t.size(); + totalBytes += t.getPackedSize(); startNanos = System.nanoTime(); Tuple t2 = Tuple.fromBytes(serialized); @@ -182,6 +223,15 @@ public class TuplePerformanceTest { endNanos = System.nanoTime(); equalsArrayNanos += endNanos - startNanos; + tCopy = Tuple.fromList(t.getItems()); + startNanos = System.nanoTime(); + int size = tCopy.getPackedSize(); + endNanos = System.nanoTime(); + if (size != t.pack().length) { + throw new RuntimeException("packed size did not match actual packed length: " + t + " -- " + " " + tCopy.getPackedSize() + " instead of " + t.getPackedSize()); + } + sizeNanos += endNanos - startNanos; + startNanos = System.nanoTime(); byte[] subspacePacked = subspace.pack(t); endNanos = System.nanoTime(); @@ -229,6 +279,8 @@ public class TuplePerformanceTest { System.out.printf(" Equals time per tuple: %f \u03BCs%n", equalsNanos * 1e-3 / iterations); System.out.printf(" Equals time (using packed): %f s%n", equalsArrayNanos * 1e-9); System.out.printf(" Equals time (using packed) per tuple: %f \u03BCs%n", equalsArrayNanos * 1e-3 / iterations); + System.out.printf(" Size time: %f s%n", sizeNanos * 1e-9); + System.out.printf(" Size time per tuple: %f \u03BCs%n", sizeNanos * 1e-3 / iterations); System.out.printf(" Subspace pack time: %f s%n", subspacePackNanos * 1e-9); System.out.printf(" Subspace pack time per tuple: %f \u03BCs%n", subspacePackNanos * 1e-3 / iterations); System.out.printf(" Subspace unpack time: %f s%n", subspaceUnpackNanos * 1e-9); diff --git a/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java b/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java index 305c1a90f0..2f0fd1c2c4 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java @@ -20,10 +20,6 @@ package com.apple.foundationdb.test; -import com.apple.foundationdb.TransactionContext; -import com.apple.foundationdb.tuple.ByteArrayUtil; -import com.apple.foundationdb.tuple.Tuple; - import java.math.BigInteger; import java.util.ArrayList; import java.util.Arrays; @@ -31,6 +27,11 @@ import java.util.List; import java.util.Objects; import java.util.UUID; +import com.apple.foundationdb.TransactionContext; +import com.apple.foundationdb.tuple.ByteArrayUtil; +import com.apple.foundationdb.tuple.Tuple; +import com.apple.foundationdb.tuple.Versionstamp; + public class TupleTest { private static final byte FF = (byte)0xff; @@ -40,6 +41,7 @@ public class TupleTest { // FDB fdb = FDB.selectAPIVersion(610); serializedForms(); comparisons(); + replaceTests(); /* try(Database db = fdb.open()) { runTests(reps, db); @@ -70,6 +72,7 @@ public class TupleTest { private static void serializedForms() { List serializations = new ArrayList<>(); TupleSerialization.addAll(serializations, + Tuple.from(), new byte[0], Tuple.from(0L), new byte[]{0x14}, Tuple.from(BigInteger.ZERO), new byte[]{0x14}, Tuple.from(1L), new byte[]{0x15, 0x01}, @@ -116,6 +119,9 @@ public class TupleTest { Tuple.from(Double.longBitsToDouble(Long.MAX_VALUE)), new byte[]{0x21, FF, FF, FF, FF, FF, FF, FF, FF}, Tuple.from(Float.intBitsToFloat(~0)), new byte[]{0x20, 0x00, 0x00, 0x00, 0x00}, Tuple.from(Double.longBitsToDouble(~0L)), new byte[]{0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + Tuple.from((Object)new byte[0]), new byte[]{0x01, 0x00}, + Tuple.from((Object)new byte[]{0x01, 0x02, 0x03}), new byte[]{0x01, 0x01, 0x02, 0x03, 0x00}, + Tuple.from((Object)new byte[]{0x00, 0x00, 0x00, 0x04}), new byte[]{0x01, 0x00, FF, 0x00, FF, 0x00, FF, 0x04, 0x00}, Tuple.from(""), new byte[]{0x02, 0x00}, Tuple.from("hello"), new byte[]{0x02, 'h', 'e', 'l', 'l', 'o', 0x00}, Tuple.from("\u4e2d\u6587"), new byte[]{0x02, (byte)0xe4, (byte)0xb8, (byte)0xad, (byte)0xe6, (byte)0x96, (byte)0x87, 0x00}, @@ -123,17 +129,42 @@ public class TupleTest { Tuple.from(new String(new int[]{0x1f525}, 0, 1)), new byte[]{0x02, (byte)0xf0, (byte)0x9f, (byte)0x94, (byte)0xa5, 0x00}, Tuple.from("\ud83d\udd25"), new byte[]{0x02, (byte)0xf0, (byte)0x9f, (byte)0x94, (byte)0xa5, 0x00}, Tuple.from("\ud83e\udd6f"), new byte[]{0x02, (byte)0xf0, (byte)0x9f, (byte)0xa5, (byte)0xaf, 0x00}, + Tuple.from("\ud83d"), new byte[]{0x02, 0x3f, 0x00}, Tuple.from("\udd25\ud83e\udd6f"), new byte[]{0x02, 0x3f, (byte)0xf0, (byte)0x9f, (byte)0xa5, (byte)0xaf, 0x00}, // malformed string - low surrogate without high surrogate - Tuple.from("a\udd25\ud83e\udd6f"), new byte[]{0x02, 'a', 0x3f, (byte)0xf0, (byte)0x9f, (byte)0xa5, (byte)0xaf, 0x00} // malformed string - low surrogate without high surrogate + Tuple.from("a\udd25\ud83e\udd6f"), new byte[]{0x02, 'a', 0x3f, (byte)0xf0, (byte)0x9f, (byte)0xa5, (byte)0xaf, 0x00}, // malformed string - low surrogate without high surrogate + Tuple.from(Tuple.from((Object)null)), new byte[]{0x05, 0x00, FF, 0x00}, + Tuple.from(Tuple.from(null, "hello")), new byte[]{0x05, 0x00, FF, 0x02, 'h', 'e', 'l', 'l', 'o', 0x00, 0x00}, + Tuple.from(Arrays.asList(null, "hello")), new byte[]{0x05, 0x00, FF, 0x02, 'h', 'e', 'l', 'l', 'o', 0x00, 0x00}, + Tuple.from(Tuple.from(null, "hell\0")), new byte[]{0x05, 0x00, FF, 0x02, 'h', 'e', 'l', 'l', 0x00, FF, 0x00, 0x00}, + Tuple.from(Arrays.asList(null, "hell\0")), new byte[]{0x05, 0x00, FF, 0x02, 'h', 'e', 'l', 'l', 0x00, FF, 0x00, 0x00}, + Tuple.from(Tuple.from((Object)null), "hello"), new byte[]{0x05, 0x00, FF, 0x00, 0x02, 'h', 'e', 'l', 'l', 'o', 0x00}, + Tuple.from(Tuple.from((Object)null), "hello", new byte[]{0x01, 0x00}, new byte[0]), new byte[]{0x05, 0x00, FF, 0x00, 0x02, 'h', 'e', 'l', 'l', 'o', 0x00, 0x01, 0x01, 0x00, FF, 0x00, 0x01, 0x00}, + Tuple.from(new UUID(0xba5eba11, 0x5ca1ab1e)), new byte[]{0x30, FF, FF, FF, FF, (byte)0xba, 0x5e, (byte)0xba, 0x11, 0x00, 0x00, 0x00, 0x00, 0x5c, (byte)0xa1, (byte)0xab, 0x1e}, + Tuple.from(false), new byte[]{0x26}, + Tuple.from(true), new byte[]{0x27}, + Tuple.from((short)0x3019), new byte[]{0x16, 0x30, 0x19}, + Tuple.from((byte)0x03), new byte[]{0x15, 0x03}, + Tuple.from(Versionstamp.complete(new byte[]{(byte)0xaa, (byte)0xbb, (byte)0xcc, (byte)0xdd, (byte)0xee, FF, 0x00, 0x01, 0x02, 0x03})), new byte[]{0x33, (byte)0xaa, (byte)0xbb, (byte)0xcc, (byte)0xdd, (byte)0xee, FF, 0x00, 0x01, 0x02, 0x03, 0x00, 0x00}, + Tuple.from(Versionstamp.complete(new byte[]{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a}, 657)), new byte[]{0x33, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x02, (byte)0x91} ); + Tuple bigTuple = new Tuple(); + List serializedForms = new ArrayList<>(); + for(TupleSerialization serialization : serializations) { + bigTuple = bigTuple.addAll(serialization.tuple); + serializedForms.add(serialization.serialization); + } + serializations.add(new TupleSerialization(bigTuple, ByteArrayUtil.join(null, serializedForms))); for(TupleSerialization serialization : serializations) { System.out.println("Packing " + serialization.tuple + " (expecting: " + ByteArrayUtil.printable(serialization.serialization) + ")"); + if(serialization.tuple.getPackedSize() != serialization.serialization.length) { + throw new RuntimeException("Tuple " + serialization.tuple + " packed size " + serialization.tuple.getPackedSize() + " does not match expected packed size " + serialization.serialization.length); + } if(!Arrays.equals(serialization.tuple.pack(), serialization.serialization)) { throw new RuntimeException("Tuple " + serialization.tuple + " has serialization " + ByteArrayUtil.printable(serialization.tuple.pack()) + " which does not match expected serialization " + ByteArrayUtil.printable(serialization.serialization)); } - if(!Objects.equals(serialization.tuple, Tuple.fromBytes(serialization.serialization))) { + if(!Objects.equals(serialization.tuple, Tuple.fromItems(Tuple.fromBytes(serialization.serialization).getItems()))) { throw new RuntimeException("Tuple " + serialization.tuple + " does not match deserialization " + Tuple.fromBytes(serialization.serialization) + " which comes from serialization " + ByteArrayUtil.printable(serialization.serialization)); } @@ -176,6 +207,16 @@ public class TupleTest { Tuple.from((Object)new byte[]{0x00, FF}), Tuple.from((Object)new byte[]{0x7f}), Tuple.from((Object)new byte[]{(byte)0x80}), + Tuple.from(null, new byte[0]), + Tuple.from(null, new byte[]{0x00}), + Tuple.from(null, new byte[]{0x00, FF}), + Tuple.from(null, new byte[]{0x7f}), + Tuple.from(null, new byte[]{(byte)0x80}), + Tuple.from(Tuple.from(null, new byte[0])), + Tuple.from(Tuple.from(null, new byte[]{0x00})), + Tuple.from(Tuple.from(null, new byte[]{0x00, FF})), + Tuple.from(Tuple.from(null, new byte[]{0x7f})), + Tuple.from(Tuple.from(null, new byte[]{(byte)0x80})), Tuple.from("a"), Tuple.from("\u03bc\u03ac\u03b8\u03b7\u03bc\u03b1"), Tuple.from("\u03bc\u03b1\u0301\u03b8\u03b7\u03bc\u03b1"), @@ -195,7 +236,18 @@ public class TupleTest { Tuple.from(new UUID(-1, 0)), Tuple.from(new UUID(-1, -1)), Tuple.from(new UUID(1, -1)), - Tuple.from(new UUID(1, 1)) + Tuple.from(new UUID(1, 1)), + Tuple.from(false), + Tuple.from(true), + Tuple.from(Arrays.asList(0, 1, 2)), + Tuple.from(Arrays.asList(0, 1), "hello"), + Tuple.from(Arrays.asList(0, 1), "help"), + Tuple.from(Versionstamp.complete(new byte[]{0x0a, (byte)0xbb, (byte)0xcc, (byte)0xdd, (byte)0xee, FF, 0x00, 0x01, 0x02, 0x03})), + Tuple.from(Versionstamp.complete(new byte[]{(byte)0xaa, (byte)0xbb, (byte)0xcc, (byte)0xdd, (byte)0xee, FF, 0x00, 0x01, 0x02, 0x03})), + Tuple.from(Versionstamp.complete(new byte[]{(byte)0xaa, (byte)0xbb, (byte)0xcc, (byte)0xdd, (byte)0xee, FF, 0x00, 0x01, 0x02, 0x03}, 1)), + Tuple.from(Versionstamp.complete(new byte[]{(byte)0xaa, (byte)0xbb, (byte)0xcc, (byte)0xdd, (byte)0xee, FF, 0x00, 0x01, 0x02, 0x03}, 0xa101)), + Tuple.from(Versionstamp.complete(new byte[]{(byte)0xaa, (byte)0xbb, (byte)0xcc, (byte)0xdd, (byte)0xee, FF, 0x00, 0x01, 0x02, 0x03}, 65535)) + ); for(Tuple t1 : tuples) { @@ -209,6 +261,47 @@ public class TupleTest { if(Integer.signum(semanticComparison) != Integer.signum(byteComparison)) { throw new RuntimeException("Tuple t1 and t2 comparison mismatched: semantic = " + semanticComparison + " while byte order = " + byteComparison); } + int implicitByteComparison = t1.compareTo(t2); + if(Integer.signum(semanticComparison) != Integer.signum(implicitByteComparison)) { + throw new RuntimeException("Tuple t1 and t2 comparison mismatched: semantic = " + semanticComparison + " while implicit byte order = " + implicitByteComparison); + } + } + } + } + + // These should be in ArrayUtilTest, but those can't be run at the moment, so here they go. + private static void replaceTests() { + List arrays = Arrays.asList( + new byte[]{0x01, 0x02, 0x01, 0x02}, new byte[]{0x01, 0x02}, new byte[]{0x03, 0x04}, new byte[]{0x03, 0x04, 0x03, 0x04}, + new byte[]{0x01, 0x02, 0x01, 0x02}, new byte[]{0x01, 0x02}, new byte[]{0x03}, new byte[]{0x03, 0x03}, + new byte[]{0x01, 0x02, 0x01, 0x02}, new byte[]{0x01, 0x02}, new byte[]{0x03, 0x04, 0x05}, new byte[]{0x03, 0x04, 0x05, 0x03, 0x04, 0x05}, + new byte[]{0x00, 0x01, 0x02, 0x00, 0x01, 0x02, 0x00}, new byte[]{0x01, 0x02}, new byte[]{0x03, 0x04, 0x05}, new byte[]{0x00, 0x03, 0x04, 0x05, 0x00, 0x03, 0x04, 0x05, 0x00}, + new byte[]{0x01, 0x01, 0x01, 0x01}, new byte[]{0x01, 0x02}, new byte[]{0x03, 0x04}, new byte[]{0x01, 0x01, 0x01, 0x01}, + new byte[]{0x01, 0x01, 0x01, 0x01}, new byte[]{0x01, 0x02}, new byte[]{0x03}, new byte[]{0x01, 0x01, 0x01, 0x01}, + new byte[]{0x01, 0x01, 0x01, 0x01}, new byte[]{0x01, 0x02}, new byte[]{0x03, 0x04, 0x05}, new byte[]{0x01, 0x01, 0x01, 0x01}, + new byte[]{0x01, 0x01, 0x01, 0x01, 0x01}, new byte[]{0x01, 0x01}, new byte[]{0x03, 0x04, 0x05}, new byte[]{0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x01}, + new byte[]{0x01, 0x01, 0x01, 0x01, 0x01}, new byte[]{0x01, 0x01}, new byte[]{0x03, 0x04}, new byte[]{0x03, 0x04, 0x03, 0x04, 0x01}, + new byte[]{0x01, 0x01, 0x01, 0x01, 0x01}, new byte[]{0x01, 0x01}, new byte[]{0x03}, new byte[]{0x03, 0x03, 0x01}, + new byte[]{0x01, 0x02, 0x01, 0x02}, new byte[]{0x01, 0x02}, null, new byte[0], + new byte[]{0x01, 0x02, 0x01, 0x02}, new byte[]{0x01, 0x02}, new byte[0], new byte[0], + new byte[]{0x01, 0x02, 0x01, 0x02}, null, new byte[]{0x04}, new byte[]{0x01, 0x02, 0x01, 0x02}, + new byte[]{0x01, 0x02, 0x01, 0x02}, new byte[0], new byte[]{0x04}, new byte[]{0x01, 0x02, 0x01, 0x02}, + null, new byte[]{0x01, 0x02}, new byte[]{0x04}, null + ); + for(int i = 0; i < arrays.size(); i += 4) { + byte[] src = arrays.get(i); + byte[] pattern = arrays.get(i + 1); + byte[] replacement = arrays.get(i + 2); + byte[] expectedResults = arrays.get(i + 3); + byte[] results = ByteArrayUtil.replace(src, pattern, replacement); + if(!Arrays.equals(results, expectedResults)) { + throw new RuntimeException("results " + ByteArrayUtil.printable(results) + " did not match expected results " + + ByteArrayUtil.printable(expectedResults) + " when replacing " + ByteArrayUtil.printable(pattern) + + " with " + ByteArrayUtil.printable(replacement) + " in " + ByteArrayUtil.printable(src)); + } + if(src != null && src == results) { + throw new RuntimeException("src and results array are pointer-equal when replacing " + ByteArrayUtil.printable(pattern) + + " with " + ByteArrayUtil.printable(replacement) + " in " + ByteArrayUtil.printable(src)); } } } From 39fd30330f95454ee46486a9fe7dd54d5ade26ac Mon Sep 17 00:00:00 2001 From: Alec Grieser Date: Wed, 27 Feb 2019 20:25:30 -0800 Subject: [PATCH 22/47] memoize incomplete versionstamp information in Tuples ; add more tests --- .../apple/foundationdb/subspace/Subspace.java | 7 +- .../tuple/IterableComparator.java | 2 +- .../com/apple/foundationdb/tuple/Tuple.java | 291 ++++---- .../apple/foundationdb/tuple/TupleUtil.java | 117 ++-- .../foundationdb/tuple/Versionstamp.java | 4 +- .../apple/foundationdb/test/TupleTest.java | 620 +++++++++++++++++- 6 files changed, 862 insertions(+), 179 deletions(-) diff --git a/bindings/java/src/main/com/apple/foundationdb/subspace/Subspace.java b/bindings/java/src/main/com/apple/foundationdb/subspace/Subspace.java index 59c3f94329..4b811f5149 100644 --- a/bindings/java/src/main/com/apple/foundationdb/subspace/Subspace.java +++ b/bindings/java/src/main/com/apple/foundationdb/subspace/Subspace.java @@ -46,8 +46,8 @@ import com.apple.foundationdb.tuple.Versionstamp; *

*/ public class Subspace { - static final Tuple EMPTY_TUPLE = Tuple.from(); - static final byte[] EMPTY_BYTES = new byte[0]; + private static final Tuple EMPTY_TUPLE = Tuple.from(); + private static final byte[] EMPTY_BYTES = new byte[0]; private final byte[] rawPrefix; @@ -248,8 +248,7 @@ public class Subspace { * @return the {@link Range} of keyspace corresponding to {@code tuple} */ public Range range(Tuple tuple) { - Range p = tuple.range(); - return new Range(join(rawPrefix, p.begin), join(rawPrefix, p.end)); + return tuple.range(rawPrefix); } /** diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/IterableComparator.java b/bindings/java/src/main/com/apple/foundationdb/tuple/IterableComparator.java index 1587b3fd6e..71aa23e9b1 100644 --- a/bindings/java/src/main/com/apple/foundationdb/tuple/IterableComparator.java +++ b/bindings/java/src/main/com/apple/foundationdb/tuple/IterableComparator.java @@ -34,7 +34,7 @@ import java.util.Iterator; * tuple1.compareTo(tuple2) * == new IterableComparator().compare(tuple1, tuple2) * == new IterableComparator().compare(tuple1.getItems(), tuple2.getItems()), - * == ByteArrayUtil.compareUnsigned(tuple1.packInternal(), tuple2.packInternal())} + * == ByteArrayUtil.compareUnsigned(tuple1.pack(), tuple2.pack())} * * *

diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java b/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java index 5fa9726c14..ea47870037 100644 --- a/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java +++ b/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java @@ -21,11 +21,11 @@ package com.apple.foundationdb.tuple; import java.math.BigInteger; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Iterator; -import java.util.LinkedList; import java.util.List; import java.util.UUID; import java.util.stream.Collectors; @@ -69,19 +69,39 @@ import com.apple.foundationdb.Range; */ public class Tuple implements Comparable, Iterable { private static final IterableComparator comparator = new IterableComparator(); + private static final byte[] EMPTY_BYTES = new byte[0]; - private List elements; - private int memoizedHash = 0; + List elements; private byte[] packed = null; + private int memoizedHash = 0; private int memoizedPackedSize = -1; + private final boolean incompleteVersionstamp; - private Tuple(List elements, Object newItem) { - this(elements); + private Tuple(Tuple original, Object newItem, boolean itemHasIncompleteVersionstamp) { + this.elements = new ArrayList<>(original.elements.size() + 1); + this.elements.addAll(original.elements); this.elements.add(newItem); + incompleteVersionstamp = original.incompleteVersionstamp || itemHasIncompleteVersionstamp; } - private Tuple(List elements) { - this.elements = new ArrayList<>(elements); + private Tuple(List elements) { + this.elements = elements; + incompleteVersionstamp = TupleUtil.hasIncompleteVersionstamp(elements.stream()); + } + + /** + * Construct a new empty {@code Tuple}. After creation, items can be added + * with calls to the variations of {@code add()}. + * + * @see #from(Object...) + * @see #fromBytes(byte[]) + * @see #fromItems(Iterable) + */ + public Tuple() { + elements = Collections.emptyList(); + packed = EMPTY_BYTES; + memoizedPackedSize = 0; + incompleteVersionstamp = false; } /** @@ -107,7 +127,10 @@ public class Tuple implements Comparable, Iterable { !(o instanceof Versionstamp)) { throw new IllegalArgumentException("Parameter type (" + o.getClass().getName() + ") not recognized"); } - return new Tuple(this.elements, o); + return new Tuple(this, o, + (o instanceof Versionstamp && !((Versionstamp)o).isComplete()) || + (o instanceof List && TupleUtil.hasIncompleteVersionstamp(((List)o).stream())) || + (o instanceof Tuple && ((Tuple) o).hasIncompleteVersionstamp())); } /** @@ -118,7 +141,7 @@ public class Tuple implements Comparable, Iterable { * @return a newly created {@code Tuple} */ public Tuple add(String s) { - return new Tuple(this.elements, s); + return new Tuple(this, s, false); } /** @@ -129,7 +152,7 @@ public class Tuple implements Comparable, Iterable { * @return a newly created {@code Tuple} */ public Tuple add(long l) { - return new Tuple(this.elements, l); + return new Tuple(this, l, false); } /** @@ -140,7 +163,7 @@ public class Tuple implements Comparable, Iterable { * @return a newly created {@code Tuple} */ public Tuple add(byte[] b) { - return new Tuple(this.elements, b); + return new Tuple(this, b, false); } /** @@ -151,7 +174,7 @@ public class Tuple implements Comparable, Iterable { * @return a newly created {@code Tuple} */ public Tuple add(boolean b) { - return new Tuple(this.elements, b); + return new Tuple(this, b, false); } /** @@ -162,7 +185,7 @@ public class Tuple implements Comparable, Iterable { * @return a newly created {@code Tuple} */ public Tuple add(UUID uuid) { - return new Tuple(this.elements, uuid); + return new Tuple(this, uuid, false); } /** @@ -178,7 +201,7 @@ public class Tuple implements Comparable, Iterable { if(bi == null) { throw new NullPointerException("Number types in Tuple cannot be null"); } - return new Tuple(this.elements, bi); + return new Tuple(this, bi, false); } /** @@ -189,7 +212,7 @@ public class Tuple implements Comparable, Iterable { * @return a newly created {@code Tuple} */ public Tuple add(float f) { - return new Tuple(this.elements, f); + return new Tuple(this, f, false); } /** @@ -200,7 +223,7 @@ public class Tuple implements Comparable, Iterable { * @return a newly created {@code Tuple} */ public Tuple add(double d) { - return new Tuple(this.elements, d); + return new Tuple(this, d, false); } /** @@ -212,11 +235,11 @@ public class Tuple implements Comparable, Iterable { * @return a newly created {@code Tuple} */ public Tuple add(Versionstamp v) { - return new Tuple(this.elements, v); + return new Tuple(this, v, !v.isComplete()); } /** - * Creates a copy of this {@code Tuple} with an {@link List} appended as the last element. + * Creates a copy of this {@code Tuple} with a {@link List} appended as the last element. * This does not add the elements individually (for that, use {@link Tuple#addAll(List) Tuple.addAll}). * This adds the list as a single element nested within the outer {@code Tuple}. * @@ -224,8 +247,8 @@ public class Tuple implements Comparable, Iterable { * * @return a newly created {@code Tuple} */ - public Tuple add(List l) { - return new Tuple(this.elements, l); + public Tuple add(List l) { + return new Tuple(this, l, TupleUtil.hasIncompleteVersionstamp(l.stream())); } /** @@ -238,7 +261,7 @@ public class Tuple implements Comparable, Iterable { * @return a newly created {@code Tuple} */ public Tuple add(Tuple t) { - return new Tuple(this.elements, t); + return new Tuple(this, t, t.hasIncompleteVersionstamp()); } /** @@ -251,7 +274,7 @@ public class Tuple implements Comparable, Iterable { * @return a newly created {@code Tuple} */ public Tuple add(byte[] b, int offset, int length) { - return new Tuple(this.elements, Arrays.copyOfRange(b, offset, offset + length)); + return new Tuple(this, Arrays.copyOfRange(b, offset, offset + length), false); } /** @@ -262,7 +285,7 @@ public class Tuple implements Comparable, Iterable { * * @return a newly created {@code Tuple} */ - public Tuple addAll(List o) { + public Tuple addAll(List o) { List merged = new ArrayList<>(o.size() + this.elements.size()); merged.addAll(this.elements); merged.addAll(o); @@ -279,8 +302,15 @@ public class Tuple implements Comparable, Iterable { public Tuple addAll(Tuple other) { List merged = new ArrayList<>(this.size() + other.size()); merged.addAll(this.elements); - merged.addAll(other.peekItems()); - return new Tuple(merged); + merged.addAll(other.elements); + Tuple t = new Tuple(merged); + if(!t.hasIncompleteVersionstamp() && packed != null && other.packed != null) { + t.packed = ByteArrayUtil.join(packed, other.packed); + } + if(memoizedPackedSize >= 0 && other.memoizedPackedSize >= 0) { + t.memoizedPackedSize = memoizedPackedSize + other.memoizedPackedSize; + } + return t; } /** @@ -306,29 +336,44 @@ public class Tuple implements Comparable, Iterable { } byte[] packInternal(byte[] prefix, boolean copy) { - boolean hasPrefix = prefix != null && prefix.length > 1; - if(packed == null) { - byte[] result = TupleUtil.pack(elements, prefix, getPackedSize()); - if(hasPrefix) { - packed = Arrays.copyOfRange(result, prefix.length, result.length); - memoizedPackedSize = packed.length; - return result; - } - else { - packed = result; - memoizedPackedSize = packed.length; - } + if(hasIncompleteVersionstamp()) { + throw new IllegalArgumentException("Incomplete Versionstamp included in vanilla tuple pack"); } + if(packed == null) { + packed = TupleUtil.pack(elements, getPackedSize()); + } + boolean hasPrefix = prefix != null && prefix.length > 0; if(hasPrefix) { return ByteArrayUtil.join(prefix, packed); } + else if(copy) { + return Arrays.copyOf(packed, packed.length); + } else { - if(copy) { - return Arrays.copyOf(packed, packed.length); - } - else { - return packed; - } + return packed; + } + } + + /** + * Pack an encoded representation of this {@code Tuple} onto the end of the given {@link ByteBuffer}. + * It is up to the caller to ensure that there is enough space allocated within the buffer + * to avoid {@link java.nio.BufferOverflowException}s. The client may call {@link #getPackedSize()} + * to determine how large this {@code Tuple} will be once packed in order to allocate sufficient memory. + *
+ *
+ * This method will throw an error if there are any incomplete {@link Versionstamp}s in this {@code Tuple}. + * + * @param dest the destination {@link ByteBuffer} for the encoded {@code Tuple} + */ + public void packInto(ByteBuffer dest) { + if(hasIncompleteVersionstamp()) { + throw new IllegalArgumentException("Incomplete Versionstamp included in vanilla tuple pack"); + } + if(packed == null) { + TupleUtil.pack(dest, elements); + } + else { + dest.put(packed); } } @@ -363,37 +408,27 @@ public class Tuple implements Comparable, Iterable { * @throws IllegalArgumentException if there is not exactly one incomplete {@link Versionstamp} included in this {@code Tuple} */ public byte[] packWithVersionstamp(byte[] prefix) { - return TupleUtil.packWithVersionstamp(elements, prefix, getPackedSize()); + return packWithVersionstampInternal(prefix, true); } byte[] packWithVersionstampInternal(byte[] prefix, boolean copy) { - boolean hasPrefix = prefix != null && prefix.length > 0; - if(packed == null) { - byte[] result = TupleUtil.packWithVersionstamp(elements, prefix, getPackedSize()); - if(hasPrefix) { - byte[] withoutPrefix = Arrays.copyOfRange(result, prefix.length, result.length); - TupleUtil.adjustVersionPosition(packed, -1 * prefix.length); - packed = withoutPrefix; - memoizedPackedSize = packed.length; - return result; - } - else { - packed = result; - memoizedPackedSize = packed.length; - } + if(!hasIncompleteVersionstamp()) { + throw new IllegalArgumentException("No incomplete Versionstamp included in tuple pack with versionstamp"); } + if(packed == null) { + packed = TupleUtil.packWithVersionstamp(elements, getPackedSize()); + } + boolean hasPrefix = prefix != null && prefix.length > 0; if(hasPrefix) { byte[] withPrefix = ByteArrayUtil.join(prefix, packed); TupleUtil.adjustVersionPosition(withPrefix, prefix.length); return withPrefix; } + else if(copy) { + return Arrays.copyOf(packed, packed.length); + } else { - if(copy) { - return Arrays.copyOf(packed, packed.length); - } - else { - return packed; - } + return packed; } } @@ -429,16 +464,6 @@ public class Tuple implements Comparable, Iterable { return elements.stream(); } - /** - * Returns the internal elements that make up this tuple. For internal use only, as - * modifications to the result will mean that this Tuple is modified. - * - * @return the elements of this Tuple, without copying - */ - private List peekItems() { - return this.elements; - } - /** * Gets an {@code Iterator} over the {@code Objects} in this {@code Tuple}. This {@code Iterator} is * unmodifiable and will throw an exception if {@link Iterator#remove() remove()} is called. @@ -450,18 +475,6 @@ public class Tuple implements Comparable, Iterable { return Collections.unmodifiableList(this.elements).iterator(); } - /** - * Construct a new empty {@code Tuple}. After creation, items can be added - * with calls the the variations of {@code add()}. - * - * @see #from(Object...) - * @see #fromBytes(byte[]) - * @see #fromItems(Iterable) - */ - public Tuple() { - this.elements = new LinkedList<>(); - } - /** * Construct a new {@code Tuple} with elements decoded from a supplied {@code byte} array. * The passed byte array must not be {@code null}. @@ -485,9 +498,15 @@ public class Tuple implements Comparable, Iterable { * @return a new {@code Tuple} constructed by deserializing the specified slice of the provided {@code byte} array */ public static Tuple fromBytes(byte[] bytes, int offset, int length) { - Tuple t = new Tuple(); - t.elements = TupleUtil.unpack(bytes, offset, length); - t.packed = Arrays.copyOfRange(bytes, offset, offset + length); + if(offset < 0 || offset > bytes.length) { + throw new IllegalArgumentException("Invalid offset for Tuple deserialization"); + } + if(length < 0 || offset + length > bytes.length) { + throw new IllegalArgumentException("Invalid length for Tuple deserialization"); + } + byte[] packed = Arrays.copyOfRange(bytes, offset, offset + length); + Tuple t = new Tuple(TupleUtil.unpack(packed)); + t.packed = packed; t.memoizedPackedSize = length; return t; } @@ -732,7 +751,7 @@ public class Tuple implements Comparable, Iterable { return (Tuple)o; } else if(o instanceof List) { - return Tuple.fromItems((List)o); + return Tuple.fromList((List)o); } else { throw new ClassCastException("Cannot convert item of type " + o.getClass() + " to tuple"); @@ -761,11 +780,7 @@ public class Tuple implements Comparable, Iterable { if(elements.isEmpty()) throw new IllegalStateException("Tuple contains no elements"); - List items = new ArrayList<>(elements.size() - 1); - for(int i = 1; i < this.elements.size(); i++) { - items.add(this.elements.get(i)); - } - return new Tuple(items); + return new Tuple(elements.subList(1, elements.size())); } /** @@ -779,11 +794,7 @@ public class Tuple implements Comparable, Iterable { if(elements.isEmpty()) throw new IllegalStateException("Tuple contains no elements"); - List items = new ArrayList<>(elements.size() - 1); - for(int i = 0; i < this.elements.size() - 1; i++) { - items.add(this.elements.get(i)); - } - return new Tuple(items); + return new Tuple(elements.subList(0, elements.size() - 1)); } /** @@ -800,17 +811,39 @@ public class Tuple implements Comparable, Iterable { * This function will throw an error if this {@code Tuple} contains an incomplete * {@link Versionstamp}. * - * @return the range of keys containing all {@code Tuple}s that have this {@code Tuple} - * as a prefix + * @return the range of keys containing all possible keys that have this {@code Tuple} + * as a strict prefix */ public Range range() { + return range(null); + } + + /** + * Returns a range representing all keys that encode {@code Tuple}s strictly starting + * with the given prefix followed by this {@code Tuple}. + *
+ *
+ * For example: + *
+	 *   Tuple t = Tuple.from("a", "b");
+	 *   Range r = t.range(Tuple.from("c").pack());
+ * {@code r} contains all tuples ("c", "a", "b", ...) + *
+ * This function will throw an error if this {@code Tuple} contains an incomplete + * {@link Versionstamp}. + * + * @param prefix a byte prefix to precede all elements in the range + * + * @return the range of keys containing all possible keys that have {@code prefix} + * followed by this {@code Tuple} as a strict prefix + */ + public Range range(byte[] prefix) { if(hasIncompleteVersionstamp()) { throw new IllegalStateException("Tuple with incomplete versionstamp used for range"); } - byte[] p = packInternal(null, false); - //System.out.println("Packed tuple is: " + ByteArrayUtil.printable(p)); + byte[] p = packInternal(prefix, false); return new Range(ByteArrayUtil.join(p, new byte[] {0x0}), - ByteArrayUtil.join(p, new byte[] {(byte)0xff})); + ByteArrayUtil.join(p, new byte[] {(byte)0xff})); } /** @@ -823,7 +856,7 @@ public class Tuple implements Comparable, Iterable { * {@code Tuple} */ public boolean hasIncompleteVersionstamp() { - return TupleUtil.hasIncompleteVersionstamp(stream()); + return incompleteVersionstamp; } /** @@ -843,7 +876,21 @@ public class Tuple implements Comparable, Iterable { } int getPackedSize(boolean nested) { - return TupleUtil.getPackedSize(elements, nested); + if(memoizedPackedSize >= 0) { + if(!nested) { + return memoizedPackedSize; + } + int nullCount = 0; + for(Object elem : elements) { + if(elem == null) { + nullCount++; + } + } + return memoizedPackedSize + nullCount; + } + else { + return TupleUtil.getPackedSize(elements, nested); + } } /** @@ -860,7 +907,9 @@ public class Tuple implements Comparable, Iterable { */ @Override public int compareTo(Tuple t) { - if(packed != null && t.packed != null) { + // If either tuple has an incomplete versionstamp, then there is a possibility that the byte order + // is not the semantic comparison order. + if(packed != null && t.packed != null && !hasIncompleteVersionstamp() && !t.hasIncompleteVersionstamp()) { return ByteArrayUtil.compareUnsigned(packed, t.packed); } else { @@ -959,12 +1008,15 @@ public class Tuple implements Comparable, Iterable { * * @return a new {@code Tuple} with the given items as its elements */ - public static Tuple fromItems(Iterable items) { - Tuple t = new Tuple(); - for(Object o : items) { - t = t.addObject(o); + public static Tuple fromItems(Iterable items) { + if(items instanceof List) { + return Tuple.fromList((List)items); } - return t; + List elements = new ArrayList<>(); + for(Object o : items) { + elements.add(o); + } + return new Tuple(elements); } /** @@ -977,8 +1029,9 @@ public class Tuple implements Comparable, Iterable { * * @return a new {@code Tuple} with the given items as its elements */ - public static Tuple fromList(List items) { - return new Tuple(items); + public static Tuple fromList(List items) { + List elements = new ArrayList<>(items); + return new Tuple(elements); } /** @@ -992,10 +1045,8 @@ public class Tuple implements Comparable, Iterable { * * @return a new {@code Tuple} with the given items as its elements */ - public static Tuple fromStream(Stream items) { - Tuple t = new Tuple(); - t.elements = items.collect(Collectors.toList()); - return t; + public static Tuple fromStream(Stream items) { + return new Tuple(items.collect(Collectors.toList())); } /** @@ -1009,7 +1060,7 @@ public class Tuple implements Comparable, Iterable { * @return a new {@code Tuple} with the given items as its elements */ public static Tuple from(Object... items) { - return fromList(Arrays.asList(items)); + return new Tuple(Arrays.asList(items)); } static void main(String[] args) { diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java index fc1fbc7262..63a1944b5d 100644 --- a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java +++ b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java @@ -21,6 +21,7 @@ package com.apple.foundationdb.tuple; import java.math.BigInteger; +import java.nio.BufferOverflowException; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.charset.Charset; @@ -89,7 +90,7 @@ class TupleUtil { x += 1; } } - throw new IllegalArgumentException("no terminator found for bytes starting at " + from); + throw new IllegalArgumentException("No terminator found for bytes starting at " + from); } } @@ -135,6 +136,7 @@ class TupleUtil { else { ByteArrayUtil.replace(encoded, 0, encoded.length, NULL_ARR, NULL_ESCAPED_ARR, encodedBytes); } + totalLength += encoded.length + nullCount; return this; } @@ -157,6 +159,10 @@ class TupleUtil { } } + private static boolean useOldVersionOffsetFormat() { + return FDB.instance().getAPIVersion() < 520; + } + // These four functions are for adjusting the encoding of floating point numbers so // that when their byte representation is written out in big-endian order, unsigned // lexicographic byte comparison orders the values in the same way as the semantic @@ -165,32 +171,32 @@ class TupleUtil { // in the case that the number is positive. For these purposes, 0.0 is positive and -0.0 // is negative. - static int encodeFloatBits(float f) { + private static int encodeFloatBits(float f) { int intBits = Float.floatToRawIntBits(f); return (intBits < 0) ? (~intBits) : (intBits ^ Integer.MIN_VALUE); } - static long encodeDoubleBits(double d) { + private static long encodeDoubleBits(double d) { long longBits = Double.doubleToRawLongBits(d); return (longBits < 0L) ? (~longBits) : (longBits ^ Long.MIN_VALUE); } - static float decodeFloatBits(int i) { + private static float decodeFloatBits(int i) { int origBits = (i >= 0) ? (~i) : (i ^ Integer.MIN_VALUE); return Float.intBitsToFloat(origBits); } - static double decodeDoubleBits(long l) { + private static double decodeDoubleBits(long l) { long origBits = (l >= 0) ? (~l) : (l ^ Long.MIN_VALUE); return Double.longBitsToDouble(origBits); } // Get the minimal number of bytes in the representation of a long. - static int minimalByteCount(long i) { + private static int minimalByteCount(long i) { return (Long.SIZE + 7 - Long.numberOfLeadingZeros(i >= 0 ? i : -i)) / 8; } - static int minimalByteCount(BigInteger i) { + private static int minimalByteCount(BigInteger i) { int bitLength = (i.compareTo(BigInteger.ZERO) >= 0) ? i.bitLength() : i.negate().bitLength(); return (bitLength + 7) / 8; } @@ -221,7 +227,7 @@ class TupleUtil { } static void adjustVersionPosition(byte[] packed, int delta) { - if(FDB.instance().getAPIVersion() < 520) { + if(useOldVersionOffsetFormat()) { adjustVersionPosition300(packed, delta); } else { @@ -285,7 +291,7 @@ class TupleUtil { else if(t instanceof List) encode(state, (List)t); else if(t instanceof Tuple) - encode(state, ((Tuple)t).getItems()); + encode(state, (Tuple)t); else throw new IllegalArgumentException("Unsupported data type: " + t.getClass().getName()); } @@ -409,6 +415,10 @@ class TupleUtil { state.add(nil); } + static void encode(EncodeState state, Tuple value) { + encode(state, value.elements); + } + static void decode(DecodeState state, byte[] rep, int pos, int last) { //System.out.println("Decoding '" + ArrayUtils.printable(rep) + "' at " + pos); @@ -491,8 +501,8 @@ class TupleUtil { int n = positive ? code - INT_ZERO_CODE : INT_ZERO_CODE - code; int end = start + n; - if(rep.length < last) { - throw new RuntimeException("Invalid tuple (possible truncation)"); + if(last < end) { + throw new IllegalArgumentException("Invalid tuple (possible truncation)"); } if(positive && (n < Long.BYTES || rep[start] > 0)) { @@ -530,12 +540,16 @@ class TupleUtil { } } else if(code == VERSIONSTAMP_CODE) { + if(start + Versionstamp.LENGTH > last) { + throw new IllegalArgumentException("Invalid tuple (possible truncation)"); + } Versionstamp val = Versionstamp.fromBytes(Arrays.copyOfRange(rep, start, start + Versionstamp.LENGTH)); state.add(val, start + Versionstamp.LENGTH); } else if(code == NESTED_CODE) { DecodeState subResult = new DecodeState(); int endPos = start; + boolean foundEnd = false; while(endPos < last) { if(rep[endPos] == nil) { if(endPos + 1 < last && rep[endPos+1] == (byte)0xff) { @@ -543,6 +557,7 @@ class TupleUtil { endPos += 2; } else { endPos += 1; + foundEnd = true; break; } } else { @@ -550,6 +565,9 @@ class TupleUtil { endPos = subResult.end; } } + if(!foundEnd) { + throw new IllegalArgumentException("No terminator found for nested tuple starting at " + start); + } state.add(subResult.values, endPos); } else { @@ -558,6 +576,10 @@ class TupleUtil { } static int compareItems(Object item1, Object item2) { + if(item1 == item2) { + // If we have pointer equality, just return 0 immediately. + return 0; + } int code1 = TupleUtil.getCodeFor(item1); int code2 = TupleUtil.getCodeFor(item2); @@ -603,14 +625,14 @@ class TupleUtil { } } if(code1 == FLOAT_CODE) { - // This is done for the same reason that double comparison is done - // that way. + // This is done over vanilla float comparison basically to handle NaNs + // sorting correctly. int fbits1 = encodeFloatBits((Float)item1); int fbits2 = encodeFloatBits((Float)item2); return Integer.compareUnsigned(fbits1, fbits2); } if(code1 == DOUBLE_CODE) { - // This is done over vanilla double comparison basically to handle NaN + // This is done over vanilla double comparison basically to handle NaNs // sorting correctly. long dbits1 = encodeDoubleBits((Double)item1); long dbits2 = encodeDoubleBits((Double)item2); @@ -637,58 +659,57 @@ class TupleUtil { throw new IllegalArgumentException("Unknown tuple data type: " + item1.getClass()); } - static List unpack(byte[] bytes, int start, int length) { - DecodeState decodeState = new DecodeState(); - int pos = start; - int end = start + length; - while(pos < end) { - decode(decodeState, bytes, pos, end); - pos = decodeState.end; + static List unpack(byte[] bytes) { + try { + DecodeState decodeState = new DecodeState(); + int pos = 0; + int end = bytes.length; + while (pos < end) { + decode(decodeState, bytes, pos, end); + pos = decodeState.end; + } + return decodeState.values; + } + catch(IndexOutOfBoundsException | BufferOverflowException e) { + throw new IllegalArgumentException("Invalid tuple (possible truncation)", e); } - return decodeState.values; } - static void encodeAll(EncodeState state, List items, byte[] prefix) { - if(prefix != null) { - state.add(prefix); - } + static void encodeAll(EncodeState state, List items) { for(Object t : items) { encode(state, t); } - //System.out.println("Joining whole tuple..."); } - static byte[] pack(List items, byte[] prefix, int expectedSize) { - ByteBuffer dest = ByteBuffer.allocate(expectedSize + (prefix != null ? prefix.length : 0)); + static void pack(ByteBuffer dest, List items) { + ByteOrder origOrder = dest.order(); EncodeState state = new EncodeState(dest); - if(prefix != null) { - state.add(prefix); - } - encodeAll(state, items, prefix); + encodeAll(state, items); + dest.order(origOrder); if(state.versionPos >= 0) { - throw new IllegalArgumentException("Incomplete Versionstamp included in vanilla tuple packInternal"); - } - else { - return dest.array(); + throw new IllegalArgumentException("Incomplete Versionstamp included in vanilla tuple pack"); } } - static byte[] packWithVersionstamp(List items, byte[] prefix, int expectedSize) { - ByteBuffer dest = ByteBuffer.allocate(expectedSize + (prefix != null ? prefix.length : 0)); + static byte[] pack(List items, int expectedSize) { + ByteBuffer dest = ByteBuffer.allocate(expectedSize); + pack(dest, items); + return dest.array(); + } + + static byte[] packWithVersionstamp(List items, int expectedSize) { + ByteBuffer dest = ByteBuffer.allocate(expectedSize); EncodeState state = new EncodeState(dest); - if(prefix != null) { - state.add(prefix); - } - encodeAll(state, items, prefix); + encodeAll(state, items); if(state.versionPos < 0) { throw new IllegalArgumentException("No incomplete Versionstamp included in tuple packInternal with versionstamp"); } else { - if(state.versionPos > 0xffff) { + if(useOldVersionOffsetFormat() && state.versionPos > 0xffff) { throw new IllegalArgumentException("Tuple has incomplete version at position " + state.versionPos + " which is greater than the maximum " + 0xffff); } dest.order(ByteOrder.LITTLE_ENDIAN); - if (FDB.instance().getAPIVersion() < 520) { + if (useOldVersionOffsetFormat()) { dest.putShort((short)state.versionPos); } else { dest.putInt(state.versionPos); @@ -740,7 +761,7 @@ class TupleUtil { packedSize += 1 + Versionstamp.LENGTH; Versionstamp versionstamp = (Versionstamp)item; if(!versionstamp.isComplete()) { - int suffixSize = FDB.instance().getAPIVersion() < 520 ? Short.BYTES : Integer.BYTES; + int suffixSize = useOldVersionOffsetFormat() ? Short.BYTES : Integer.BYTES; packedSize += suffixSize; } } @@ -776,7 +797,7 @@ class TupleUtil { public static void main(String[] args) { try { - byte[] bytes = pack(Collections.singletonList(4), null, 2); + byte[] bytes = pack(Collections.singletonList(4), 2); DecodeState result = new DecodeState(); decode(result, bytes, 0, bytes.length); int val = ((Number)result.values.get(0)).intValue(); @@ -788,7 +809,7 @@ class TupleUtil { } try { - byte[] bytes = pack(Collections.singletonList("\u021Aest \u0218tring"), null, 15); + byte[] bytes = pack(Collections.singletonList("\u021Aest \u0218tring"), 15); DecodeState result = new DecodeState(); decode(result, bytes, 0, bytes.length); String string = (String)result.values.get(0); diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/Versionstamp.java b/bindings/java/src/main/com/apple/foundationdb/tuple/Versionstamp.java index 85c6de37ae..07c3218eac 100644 --- a/bindings/java/src/main/com/apple/foundationdb/tuple/Versionstamp.java +++ b/bindings/java/src/main/com/apple/foundationdb/tuple/Versionstamp.java @@ -94,8 +94,8 @@ public class Versionstamp implements Comparable { private static final byte[] UNSET_TRANSACTION_VERSION = {(byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff}; - private boolean complete; - private byte[] versionBytes; + private final boolean complete; + private final byte[] versionBytes; /** * From a byte array, unpack the user version starting at the given position. diff --git a/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java b/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java index 2f0fd1c2c4..ac2b033748 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java @@ -21,13 +21,21 @@ package com.apple.foundationdb.test; import java.math.BigInteger; +import java.nio.BufferOverflowException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.Objects; import java.util.UUID; +import java.util.stream.Stream; +import com.apple.foundationdb.Database; +import com.apple.foundationdb.FDB; import com.apple.foundationdb.TransactionContext; +import com.apple.foundationdb.subspace.Subspace; import com.apple.foundationdb.tuple.ByteArrayUtil; import com.apple.foundationdb.tuple.Tuple; import com.apple.foundationdb.tuple.Versionstamp; @@ -38,15 +46,19 @@ public class TupleTest { public static void main(String[] args) throws InterruptedException { final int reps = 1000; try { - // FDB fdb = FDB.selectAPIVersion(610); - serializedForms(); + FDB fdb = FDB.selectAPIVersion(610); + addMethods(); comparisons(); + emptyTuple(); + incompleteVersionstamps(); + intoBuffer(); + offsetsAndLengths(); + malformedBytes(); replaceTests(); - /* + serializedForms(); try(Database db = fdb.open()) { runTests(reps, db); } - */ } catch(Throwable t) { t.printStackTrace(); } @@ -269,6 +281,606 @@ public class TupleTest { } } + private static void emptyTuple() { + Tuple t = new Tuple(); + if(!t.isEmpty()) { + throw new RuntimeException("empty tuple is not empty"); + } + if(t.getPackedSize() != 0) { + throw new RuntimeException("empty tuple packed size is not 0"); + } + if(t.pack().length != 0) { + throw new RuntimeException("empty tuple is not packed to the empty byte string"); + } + } + + private static void addMethods() { + List baseTuples = Arrays.asList( + new Tuple(), + Tuple.from(), + Tuple.from((Object)null), + Tuple.from("prefix"), + Tuple.from("prefix", null), + Tuple.from(new UUID(100, 1000)), + Tuple.from(Versionstamp.incomplete(1)), + Tuple.from(Tuple.from(Versionstamp.incomplete(2))), + Tuple.from(Collections.singletonList(Versionstamp.incomplete(3))) + ); + List toAdd = Arrays.asList( + null, + 1066L, + BigInteger.valueOf(1066), + -3.14f, + 2.71828, + new byte[]{0x01, 0x02, 0x03}, + new byte[]{0x01, 0x00, 0x02, 0x00, 0x03}, + "hello there", + "hell\0 there", + "\ud83d\udd25", + "\ufb14", + false, + true, + Float.NaN, + Float.intBitsToFloat(Integer.MAX_VALUE), + Double.NaN, + Double.longBitsToDouble(Long.MAX_VALUE), + Versionstamp.complete(new byte[]{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09}, 100), + Versionstamp.incomplete(4), + new UUID(-1, 1), + Tuple.from((Object)null), + Tuple.from("suffix", "tuple"), + Tuple.from("s\0ffix", "tuple"), + Arrays.asList("suffix", "tuple"), + Arrays.asList("suffix", null, "tuple"), + Tuple.from("suffix", null, "tuple"), + Tuple.from("suffix", Versionstamp.incomplete(4), "tuple"), + Arrays.asList("suffix", Arrays.asList("inner", Versionstamp.incomplete(5), "tuple"), "tuple") + ); + + for(Tuple baseTuple : baseTuples) { + for(Object newItem : toAdd) { + int baseSize = baseTuple.size(); + Tuple freshTuple = Tuple.fromStream(Stream.concat(baseTuple.stream(), Stream.of(newItem))); + if(freshTuple.size() != baseSize + 1) { + throw new RuntimeException("freshTuple size was not one larger than base size"); + } + Tuple withObjectAdded = baseTuple.addObject(newItem); + if(withObjectAdded.size() != baseSize + 1) { + throw new RuntimeException("withObjectAdded size was not one larger than the base size"); + } + // Use the appropriate "add" overload. + Tuple withValueAdded; + if(newItem == null) { + withValueAdded = baseTuple.addObject(null); + } + else if(newItem instanceof byte[]) { + withValueAdded = baseTuple.add((byte[])newItem); + } + else if(newItem instanceof String) { + withValueAdded = baseTuple.add((String)newItem); + } + else if(newItem instanceof Long) { + withValueAdded = baseTuple.add((Long)newItem); + } + else if(newItem instanceof BigInteger) { + withValueAdded = baseTuple.add((BigInteger)newItem); + } + else if(newItem instanceof Float) { + withValueAdded = baseTuple.add((Float)newItem); + } + else if(newItem instanceof Double) { + withValueAdded = baseTuple.add((Double)newItem); + } + else if(newItem instanceof Boolean) { + withValueAdded = baseTuple.add((Boolean)newItem); + } + else if(newItem instanceof UUID) { + withValueAdded = baseTuple.add((UUID)newItem); + } + else if(newItem instanceof Versionstamp) { + withValueAdded = baseTuple.add((Versionstamp)newItem); + } + else if(newItem instanceof List) { + withValueAdded = baseTuple.add((List)newItem); + } + else if(newItem instanceof Tuple) { + withValueAdded = baseTuple.add((Tuple)newItem); + } + else { + throw new RuntimeException("unknown type for tuple serialization " + newItem.getClass()); + } + // Use Tuple.addAll, which has optimizations if both tuples have been packed already + // Getting their hash codes memoizes the packed representation. + Tuple newItemTuple = Tuple.from(newItem); + baseTuple.hashCode(); + newItemTuple.hashCode(); + Tuple withTupleAddedAll = baseTuple.addAll(newItemTuple); + Tuple withListAddedAll = baseTuple.addAll(Collections.singletonList(newItem)); + List allTuples = Arrays.asList(freshTuple, withObjectAdded, withValueAdded, withTupleAddedAll, withListAddedAll); + + int basePlusNewSize = baseTuple.getPackedSize() + Tuple.from(newItem).getPackedSize(); + int freshTuplePackedSize = freshTuple.getPackedSize(); + int withObjectAddedPackedSize = withObjectAdded.getPackedSize(); + int withValueAddedPackedSize = withValueAdded.getPackedSize(); + int withTupleAddedAllPackedSize = withTupleAddedAll.getPackedSize(); + int withListAddAllPackedSize = withListAddedAll.getPackedSize(); + if(basePlusNewSize != freshTuplePackedSize || basePlusNewSize != withObjectAddedPackedSize || + basePlusNewSize != withValueAddedPackedSize || basePlusNewSize != withTupleAddedAllPackedSize || + basePlusNewSize != withListAddAllPackedSize) { + throw new RuntimeException("packed sizes not equivalent"); + } + byte[] concatPacked; + byte[] prefixPacked; + byte[] freshPacked; + byte[] objectAddedPacked; + byte[] valueAddedPacked; + byte[] tupleAddedAllPacked; + byte[] listAddedAllPacked; + if(!baseTuple.hasIncompleteVersionstamp() && !Tuple.from(newItem).hasIncompleteVersionstamp()) { + concatPacked = ByteArrayUtil.join(baseTuple.pack(), Tuple.from(newItem).pack()); + prefixPacked = Tuple.from(newItem).pack(baseTuple.pack()); + freshPacked = freshTuple.pack(); + objectAddedPacked = withObjectAdded.pack(); + valueAddedPacked = withValueAdded.pack(); + tupleAddedAllPacked = withTupleAddedAll.pack(); + listAddedAllPacked = withListAddedAll.pack(); + + for(Tuple t : allTuples) { + try { + t.packWithVersionstamp(); + throw new RuntimeException("able to pack tuple without incomplete versionstamp using packWithVersionstamp"); + } + catch(IllegalArgumentException e) { + // eat + } + } + } + else if(!baseTuple.hasIncompleteVersionstamp() && Tuple.from(newItem).hasIncompleteVersionstamp()) { + concatPacked = newItemTuple.packWithVersionstamp(baseTuple.pack()); + try { + prefixPacked = Tuple.from(newItem).packWithVersionstamp(baseTuple.pack()); + } + catch(NullPointerException e) { + prefixPacked = Tuple.from(newItem).packWithVersionstamp(baseTuple.pack()); + } + freshPacked = freshTuple.packWithVersionstamp(); + objectAddedPacked = withObjectAdded.packWithVersionstamp(); + valueAddedPacked = withValueAdded.packWithVersionstamp(); + tupleAddedAllPacked = withTupleAddedAll.packWithVersionstamp(); + listAddedAllPacked = withListAddedAll.packWithVersionstamp(); + + for(Tuple t : allTuples) { + try { + t.pack(); + throw new RuntimeException("able to pack tuple with incomplete versionstamp"); + } + catch(IllegalArgumentException e) { + // eat + } + } + } + else if(baseTuple.hasIncompleteVersionstamp() && !Tuple.from(newItem).hasIncompleteVersionstamp()) { + concatPacked = baseTuple.addAll(Tuple.from(newItem)).packWithVersionstamp(); + prefixPacked = baseTuple.addObject(newItem).packWithVersionstamp(); + freshPacked = freshTuple.packWithVersionstamp(); + objectAddedPacked = withObjectAdded.packWithVersionstamp(); + valueAddedPacked = withValueAdded.packWithVersionstamp(); + tupleAddedAllPacked = withTupleAddedAll.packWithVersionstamp(); + listAddedAllPacked = withListAddedAll.packWithVersionstamp(); + + for(Tuple t : allTuples) { + try { + t.pack(); + throw new RuntimeException("able to pack tuple with incomplete versionstamp"); + } + catch(IllegalArgumentException e) { + // eat + } + } + } + else { + for(Tuple t : allTuples) { + try { + t.pack(); + throw new RuntimeException("able to pack tuple with two versionstamps using pack"); + } + catch(IllegalArgumentException e) { + // eat + } + try { + t.packWithVersionstamp(); + throw new RuntimeException("able to pack tuple with two versionstamps using packWithVersionstamp"); + } + catch(IllegalArgumentException e) { + // eat + } + try { + t.hashCode(); + throw new RuntimeException("able to get hash code of tuple with two versionstamps"); + } + catch(IllegalArgumentException e) { + // eat + } + } + concatPacked = null; + prefixPacked = null; + freshPacked = null; + objectAddedPacked = null; + valueAddedPacked = null; + tupleAddedAllPacked = null; + listAddedAllPacked = null; + } + if(!Arrays.equals(concatPacked, freshPacked) || + !Arrays.equals(freshPacked, prefixPacked) || + !Arrays.equals(freshPacked, objectAddedPacked) || + !Arrays.equals(freshPacked, valueAddedPacked) || + !Arrays.equals(freshPacked, tupleAddedAllPacked) || + !Arrays.equals(freshPacked, listAddedAllPacked)) { + throw new RuntimeException("packed values are not concatenation of original packings"); + } + if(freshPacked != null && freshPacked.length != basePlusNewSize) { + throw new RuntimeException("packed length did not match expectation"); + } + if(freshPacked != null) { + if(freshTuple.hashCode() != Arrays.hashCode(freshPacked)) { + throw new IllegalArgumentException("hash code does not match fresh packed"); + } + for(Tuple t : allTuples) { + if(t.hashCode() != freshTuple.hashCode()) { + throw new IllegalArgumentException("hash code mismatch"); + } + if(Tuple.fromItems(t.getItems()).hashCode() != freshTuple.hashCode()) { + throw new IllegalArgumentException("hash code mismatch after re-compute"); + } + } + } + } + } + } + + private static void incompleteVersionstamps() { + if(FDB.instance().getAPIVersion() < 520) { + throw new IllegalStateException("cannot run test with API version " + FDB.instance().getAPIVersion()); + } + // This is a tricky case where there are two tuples with identical representations but different semantics. + byte[] arr = new byte[0x0100fe]; + Arrays.fill(arr, (byte)0x7f); // The actual value doesn't matter, but it can't be zero. + Tuple t1 = Tuple.from(arr, Versionstamp.complete(new byte[]{FF, FF, FF, FF, FF, FF, FF, FF, FF, FF}), new byte[]{0x01, 0x01}); + Tuple t2 = Tuple.from(arr, Versionstamp.incomplete()); + if(t1.equals(t2)) { + throw new RuntimeException("tuples " + t1 + " and " + t2 + " compared equal"); + } + byte[] bytes1 = t1.pack(); + byte[] bytes2 = t2.packWithVersionstamp(); + if(!Arrays.equals(bytes1, bytes2)) { + throw new RuntimeException("tuples " + t1 + " and " + t2 + " did not have matching representations"); + } + if(t1.equals(t2)) { + throw new RuntimeException("tuples " + t1 + " and " + t2 + " compared equal with memoized packed representations"); + } + + // Make sure position information adjustment works. + Tuple t3 = Tuple.from(Versionstamp.incomplete(1)); + if(t3.getPackedSize() != 1 + Versionstamp.LENGTH + Integer.BYTES) { + throw new RuntimeException("incomplete versionstamp has incorrect packed size " + t3.getPackedSize()); + } + byte[] bytes3 = t3.packWithVersionstamp(); + if(ByteBuffer.wrap(bytes3, bytes3.length - Integer.BYTES, Integer.BYTES).order(ByteOrder.LITTLE_ENDIAN).getInt() != 1) { + throw new RuntimeException("incomplete versionstamp has incorrect position"); + } + if(!Tuple.fromBytes(bytes3, 0, bytes3.length - Integer.BYTES).equals(Tuple.from(Versionstamp.incomplete(1)))) { + throw new RuntimeException("unpacked bytes did not match"); + } + Subspace subspace = new Subspace(Tuple.from("prefix")); + byte[] bytes4 = subspace.packWithVersionstamp(t3); + if(ByteBuffer.wrap(bytes4, bytes4.length - Integer.BYTES, Integer.BYTES).order(ByteOrder.LITTLE_ENDIAN).getInt() != 1 + subspace.getKey().length) { + throw new RuntimeException("incomplete versionstamp has incorrect position with prefix"); + } + if(!Tuple.fromBytes(bytes4, 0, bytes4.length - Integer.BYTES).equals(Tuple.from("prefix", Versionstamp.incomplete(1)))) { + throw new RuntimeException("unpacked bytes with subspace did not match"); + } + try { + // At this point, the representation is cached, so an easy bug would be to have it return the already serialized value + t3.pack(); + throw new RuntimeException("was able to pack versionstamp with incomplete versionstamp"); + } catch(IllegalArgumentException e) { + // eat + } + + // Tuples with two incomplete versionstamps somewhere. + List twoIncompleteList = Arrays.asList( + Tuple.from(Versionstamp.incomplete(1), Versionstamp.incomplete(2)), + Tuple.from(Tuple.from(Versionstamp.incomplete(3)), Tuple.from(Versionstamp.incomplete(4))), + new Tuple().add(Versionstamp.incomplete()).add(Versionstamp.incomplete()), + new Tuple().add(Versionstamp.incomplete()).add(3L).add(Versionstamp.incomplete()), + Tuple.from(Tuple.from(Versionstamp.incomplete()), "dummy_string").add(Tuple.from(Versionstamp.incomplete())), + Tuple.from(Arrays.asList(Versionstamp.incomplete(), "dummy_string")).add(Tuple.from(Versionstamp.incomplete())), + Tuple.from(Tuple.from(Versionstamp.incomplete()), "dummy_string").add(Collections.singletonList(Versionstamp.incomplete())) + ); + for(Tuple t : twoIncompleteList) { + if(!t.hasIncompleteVersionstamp()) { + throw new RuntimeException("tuple doesn't think it has incomplete versionstamp"); + } + if(t.getPackedSize() < 2 * (1 + Versionstamp.LENGTH + Integer.BYTES)) { + throw new RuntimeException("tuple packed size " + t.getPackedSize() + " is smaller than expected"); + } + try { + t.pack(); + throw new RuntimeException("no error thrown when packing any incomplete versionstamps"); + } + catch(IllegalArgumentException e) { + // eat + } + try { + t.packWithVersionstamp(); + throw new RuntimeException("no error thrown when packing with versionstamp with two incompletes"); + } + catch(IllegalArgumentException e) { + // eat + } + } + } + + // Assumes API version < 520 + private static void incompleteVersionstamps300() { + if(FDB.instance().getAPIVersion() >= 520) { + throw new IllegalStateException("cannot run test with API version " + FDB.instance().getAPIVersion()); + } + Tuple t1 = Tuple.from(Versionstamp.complete(new byte[]{FF, FF, FF, FF, FF, FF, FF, FF, FF, FF}), new byte[]{}); + Tuple t2 = Tuple.from(Versionstamp.incomplete()); + if(t1.equals(t2)) { + throw new RuntimeException("tuples " + t1 + " and " + t2 + " compared equal"); + } + byte[] bytes1 = t1.pack(); + byte[] bytes2 = t2.packWithVersionstamp(); + if(!Arrays.equals(bytes1, bytes2)) { + throw new RuntimeException("tuples " + t1 + " and " + t2 + " did not have matching representations"); + } + if(t1.equals(t2)) { + throw new RuntimeException("tuples " + t1 + " and " + t2 + " compared equal with memoized packed representations"); + } + + // Make sure position information adjustment works. + Tuple t3 = Tuple.from(Versionstamp.incomplete(1)); + if(t3.getPackedSize() != 1 + Versionstamp.LENGTH + Short.BYTES) { + throw new RuntimeException("incomplete versionstamp has incorrect packed size " + t3.getPackedSize()); + } + byte[] bytes3 = t3.packWithVersionstamp(); + if(ByteBuffer.wrap(bytes3, bytes3.length - Short.BYTES, Short.BYTES).order(ByteOrder.LITTLE_ENDIAN).getShort() != 1) { + throw new RuntimeException("incomplete versionstamp has incorrect position"); + } + if(!Tuple.fromBytes(bytes3, 0, bytes3.length - Short.BYTES).equals(Tuple.from(Versionstamp.incomplete(1)))) { + throw new RuntimeException("unpacked bytes did not match"); + } + Subspace subspace = new Subspace(Tuple.from("prefix")); + byte[] bytes4 = subspace.packWithVersionstamp(t3); + if(ByteBuffer.wrap(bytes4, bytes4.length - Short.BYTES, Short.BYTES).order(ByteOrder.LITTLE_ENDIAN).getShort() != 1 + subspace.getKey().length) { + throw new RuntimeException("incomplete versionstamp has incorrect position with prefix"); + } + if(!Tuple.fromBytes(bytes4, 0, bytes4.length - Short.BYTES).equals(Tuple.from("prefix", Versionstamp.incomplete(1)))) { + throw new RuntimeException("unpacked bytes with subspace did not match"); + } + + // Make sure an offset > 0xFFFF throws an error. + Tuple t4 = Tuple.from(Versionstamp.incomplete(2)); + byte[] bytes5 = t4.packWithVersionstamp(); // Get bytes memoized. + if(ByteBuffer.wrap(bytes5, bytes5.length - Short.BYTES, Short.BYTES).order(ByteOrder.LITTLE_ENDIAN).getShort() != 1) { + throw new RuntimeException("incomplete versionstamp has incorrect position with prefix"); + } + byte[] bytes6 = t4.packWithVersionstamp(new byte[0xfffe]); // Offset is 0xffff + if(!Arrays.equals(Arrays.copyOfRange(bytes5, 0, 1 + Versionstamp.LENGTH), Arrays.copyOfRange(bytes6, 0xfffe, 0xffff + Versionstamp.LENGTH))) { + throw new RuntimeException("area before versionstamp offset did not match"); + } + if((ByteBuffer.wrap(bytes6, bytes6.length - Short.BYTES, Short.BYTES).order(ByteOrder.LITTLE_ENDIAN).getShort() & 0xffff) != 0xffff) { + throw new RuntimeException("incomplete versionstamp has incorrect position with prefix"); + } + try { + t4.packWithVersionstamp(new byte[0xffff]); // Offset is 0x10000 + throw new RuntimeException("able to pack versionstamp with offset that is too large"); + } + catch(IllegalArgumentException e) { + // eat + } + // Same as before, but packed representation is not memoized. + try { + Tuple.from(Versionstamp.incomplete(3)).packWithVersionstamp(new byte[0xffff]); // Offset is 0x10000 + throw new RuntimeException("able to pack versionstamp with offset that is too large"); + } + catch(IllegalArgumentException e) { + // eat + } + } + + private static void malformedBytes() { + List malformedSequences = Arrays.asList( + new byte[]{0x01, (byte)0xde, (byte)0xad, (byte)0xc0, (byte)0xde}, // no termination character for byte array + new byte[]{0x01, (byte)0xde, (byte)0xad, 0x00, FF, (byte)0xc0, (byte)0xde}, // no termination character but null in middle + new byte[]{0x02, 'h', 'e', 'l', 'l', 'o'}, // no termination character for string + new byte[]{0x02, 'h', 'e', 'l', 0x00, FF, 'l', 'o'}, // no termination character but null in the middle + // Invalid UTF-8 decodes malformed as U+FFFD rather than throwing an error + // new byte[]{0x02, 'u', 't', 'f', 0x08, (byte)0x80, 0x00}, // invalid utf-8 code point start character + // new byte[]{0x02, 'u', 't', 'f', 0x08, (byte)0xc0, 0x01, 0x00}, // invalid utf-8 code point second character + new byte[]{0x05, 0x02, 'h', 'e', 'l', 'l', 'o', 0x00}, // no termination character for nested tuple + new byte[]{0x05, 0x02, 'h', 'e', 'l', 'l', 'o', 0x00, 0x00, FF, 0x02, 't', 'h', 'e', 'r', 'e', 0x00}, // no termination character for nested tuple but null in the middle + new byte[]{0x16, 0x01}, // integer truncation + new byte[]{0x12, 0x01}, // integer truncation + new byte[]{0x1d, 0x09, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, // integer truncation + new byte[]{0x0b, 0x09 ^ FF, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, // integer truncation + new byte[]{0x20, 0x01, 0x02, 0x03}, // float truncation + new byte[]{0x21, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07}, // double truncation + new byte[]{0x30, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e}, // UUID truncation + new byte[]{0x33, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b}, // versionstamp truncation + new byte[]{FF} // unknown start code + ); + for(byte[] sequence : malformedSequences) { + try { + Tuple t = Tuple.fromBytes(sequence); + throw new RuntimeException("Able to unpack " + ByteArrayUtil.printable(sequence) + " into " + t); + } + catch(IllegalArgumentException e) { + System.out.println("Error for " + ByteArrayUtil.printable(sequence) + ": " + e.getMessage()); + } + } + + // Perfectly good byte sequences, but using the offset and length to remove terminal bytes + List wellFormedSequences = Arrays.asList( + Tuple.from((Object)new byte[]{0x01, 0x02}).pack(), + Tuple.from("hello").pack(), + Tuple.from("hell\0").pack(), + Tuple.from(1066L).pack(), + Tuple.from(-1066L).pack(), + Tuple.from(BigInteger.ONE.shiftLeft(Long.SIZE + 1)).pack(), + Tuple.from(BigInteger.ONE.shiftLeft(Long.SIZE + 1).negate()).pack(), + Tuple.from(-3.14f).pack(), + Tuple.from(2.71828).pack(), + Tuple.from(new UUID(1066L, 1415L)).pack(), + Tuple.from(Versionstamp.fromBytes(new byte[]{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c})).pack() + ); + for(byte[] sequence : wellFormedSequences) { + try { + Tuple t = Tuple.fromBytes(sequence, 0, sequence.length - 1); + throw new RuntimeException("Able to unpack " + ByteArrayUtil.printable(sequence) + " into " + t + " without last character"); + } + catch(IllegalArgumentException e) { + System.out.println("Error for " + ByteArrayUtil.printable(sequence) + ": " + e.getMessage()); + } + } + } + + private static void offsetsAndLengths() { + List tuples = Arrays.asList( + new Tuple(), + Tuple.from((Object)null), + Tuple.from(null, new byte[]{0x10, 0x66}), + Tuple.from("dummy_string"), + Tuple.from(1066L) + ); + Tuple allTuples = tuples.stream().reduce(new Tuple(), Tuple::addAll); + byte[] allTupleBytes = allTuples.pack(); + + // Unpack each tuple individually using their lengths + int offset = 0; + for(Tuple t : tuples) { + int length = t.getPackedSize(); + Tuple unpacked = Tuple.fromBytes(allTupleBytes, offset, length); + if(!unpacked.equals(t)) { + throw new RuntimeException("unpacked tuple " + unpacked + " does not match serialized tuple " + t); + } + offset += length; + } + + // Unpack successive pairs of tuples. + offset = 0; + for(int i = 0; i < tuples.size() - 1; i++) { + Tuple combinedTuple = tuples.get(i).addAll(tuples.get(i + 1)); + Tuple unpacked = Tuple.fromBytes(allTupleBytes, offset, combinedTuple.getPackedSize()); + if(!unpacked.equals(combinedTuple)) { + throw new RuntimeException("unpacked tuple " + unpacked + " does not match combined tuple " + combinedTuple); + } + offset += tuples.get(i).getPackedSize(); + } + + // Allow an offset to equal the length of the array, but essentially only a zero-length is allowed there. + Tuple emptyAtEndTuple = Tuple.fromBytes(allTupleBytes, allTupleBytes.length, 0); + if(!emptyAtEndTuple.isEmpty()) { + throw new RuntimeException("tuple with no bytes is not empty"); + } + + try { + Tuple.fromBytes(allTupleBytes, -1, 4); + throw new RuntimeException("able to give negative offset to fromBytes"); + } + catch(IllegalArgumentException e) { + // eat + } + try { + Tuple.fromBytes(allTupleBytes, allTupleBytes.length + 1, 4); + throw new RuntimeException("able to give offset larger than array to fromBytes"); + } + catch(IllegalArgumentException e) { + // eat + } + try { + Tuple.fromBytes(allTupleBytes, 0, -1); + throw new RuntimeException("able to give negative length to fromBytes"); + } + catch(IllegalArgumentException e) { + // eat + } + try { + Tuple.fromBytes(allTupleBytes, 0, allTupleBytes.length + 1); + throw new RuntimeException("able to give length larger than array to fromBytes"); + } + catch(IllegalArgumentException e) { + // eat + } + try { + Tuple.fromBytes(allTupleBytes, allTupleBytes.length / 2, allTupleBytes.length / 2 + 2); + throw new RuntimeException("able to exceed array length in fromBytes"); + } + catch(IllegalArgumentException e) { + // eat + } + } + + private static void intoBuffer() { + Tuple t = Tuple.from("hello", 3.14f, "world"); + ByteBuffer buffer = ByteBuffer.allocate("hello".length() + 2 + Float.BYTES + 1 + "world".length() + 2); + t.packInto(buffer); + if(!Arrays.equals(t.pack(), buffer.array())) { + throw new RuntimeException("buffer and tuple do not match"); + } + + buffer = ByteBuffer.allocate(t.getPackedSize() + 2); + buffer.order(ByteOrder.LITTLE_ENDIAN); + t.packInto(buffer); + if(!Arrays.equals(ByteArrayUtil.join(t.pack(), new byte[]{0x00, 0x00}), buffer.array())) { + throw new RuntimeException("buffer and tuple do not match"); + } + if(!buffer.order().equals(ByteOrder.LITTLE_ENDIAN)) { + throw new RuntimeException("byte order changed"); + } + + buffer = ByteBuffer.allocate(t.getPackedSize() + 2); + buffer.put((byte)0x01).put((byte)0x02); + t.packInto(buffer); + if(!Arrays.equals(t.pack(new byte[]{0x01, 0x02}), buffer.array())) { + throw new RuntimeException("buffer and tuple do not match"); + } + + buffer = ByteBuffer.allocate(t.getPackedSize() - 1); + try { + t.packInto(buffer); + throw new RuntimeException("able to pack into buffer that was too small"); + } + catch(BufferOverflowException e) { + // eat + } + + Tuple tCopy = Tuple.fromItems(t.getItems()); // remove memoized stuff + buffer = ByteBuffer.allocate(t.getPackedSize() - 1); + try { + tCopy.packInto(buffer); + throw new RuntimeException("able to pack into buffer that was too small"); + } + catch(BufferOverflowException e) { + // eat + } + + Tuple tWithIncomplete = Tuple.from(Versionstamp.incomplete(3)); + buffer = ByteBuffer.allocate(tWithIncomplete.getPackedSize()); + try { + tWithIncomplete.packInto(buffer); + throw new RuntimeException("able to pack incomplete versionstamp into buffer"); + } + catch(IllegalArgumentException e) { + // eat + } + if(buffer.arrayOffset() != 0) { + throw new RuntimeException("offset changed after unsuccessful pack with incomplete versionstamp"); + } + } + // These should be in ArrayUtilTest, but those can't be run at the moment, so here they go. private static void replaceTests() { List arrays = Arrays.asList( From a1c32ce057f714761e3a3614db2b07497acb8fb9 Mon Sep 17 00:00:00 2001 From: Alec Grieser Date: Thu, 28 Feb 2019 09:35:04 -0800 Subject: [PATCH 23/47] update release notes with Tuple improvements --- documentation/sphinx/source/release-notes.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst index 606e63d229..a6e03e7ee2 100644 --- a/documentation/sphinx/source/release-notes.rst +++ b/documentation/sphinx/source/release-notes.rst @@ -40,10 +40,15 @@ Bindings * Java: Deprecated ``FDB.createCluster`` and ``Cluster``. The preferred way to get a ``Database`` is by using ``FDB.open``, which should work in both new and old API versions. `(PR #942) `_ * Java: Removed ``Cluster(long cPtr, Executor executor)`` constructor. This is API breaking for any code that has subclassed the ``Cluster`` class and is not protected by API versioning. `(PR #942) `_ * Java: Several methods relevant to read-only transactions have been moved into the ``ReadTransaction`` interface. +* Java: Tuples now cache previous hash codes and equality checking no longer requires packing the underlying Tuples. `(PR #1166) `_ +* Java: Tuple performance has been improved to use fewer allocations when packing and unpacking. `(Issue #1206) `_ +* Java: Unpacking a Tuple with a byte array or string that is missing the end-of-string character now throws an error. `(Issue #671) `_ +* Java: Unpacking a Tuple constrained to a subset of the underlying array now throws an error when it encounters a truncated integer. `(Issue #672) `_ * Ruby: Removed ``FDB.init``, ``FDB.create_cluster``, and ``FDB.Cluster``. ``FDB.open`` no longer accepts a ``database_name`` parameter. `(PR #942) `_ * Golang: Deprecated ``fdb.StartNetwork``, ``fdb.Open``, ``fdb.MustOpen``, and ``fdb.CreateCluster`` and added ``fdb.OpenDatabase`` and ``fdb.MustOpenDatabase``. The preferred way to start the network and get a ``Database`` is by using ``FDB.OpenDatabase`` or ``FDB.OpenDefault``. `(PR #942) `_ * Flow: Deprecated ``API::createCluster`` and ``Cluster`` and added ``API::createDatabase``. The preferred way to get a ``Database`` is by using ``API::createDatabase``. `(PR #942) `_ * Golang: Added ``fdb.Printable`` to print a human-readable string for a given byte array. Add ``Key.String()``, which converts the ``Key`` to a ``string`` using the ``Printable`` function. `(PR #1010) `_ +* Golang: Tuples now support ``Versionstamp`` operations. `(PR #1187) `_ * Python: Python signal handling didn't work when waiting on a future. In particular, pressing Ctrl-C would not successfully interrupt the program. `(PR #1138) `_ Other Changes From 40aa2ba6f0cddec4a5be3d8a545e3d8651405008 Mon Sep 17 00:00:00 2001 From: Alec Grieser Date: Thu, 28 Feb 2019 16:30:09 -0800 Subject: [PATCH 24/47] CMakeLists alphabetization and Javadoc improvements --- bindings/java/CMakeLists.txt | 4 +-- .../com/apple/foundationdb/tuple/Tuple.java | 34 ++++++++++++++----- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/bindings/java/CMakeLists.txt b/bindings/java/CMakeLists.txt index f8c1c25a65..77a0d5aea0 100644 --- a/bindings/java/CMakeLists.txt +++ b/bindings/java/CMakeLists.txt @@ -54,9 +54,9 @@ set(JAVA_BINDING_SRCS src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java src/main/com/apple/foundationdb/tuple/IterableComparator.java src/main/com/apple/foundationdb/tuple/package-info.java + src/main/com/apple/foundationdb/tuple/StringUtil.java src/main/com/apple/foundationdb/tuple/Tuple.java src/main/com/apple/foundationdb/tuple/TupleUtil.java - src/main/com/apple/foundationdb/tuple/StringUtil.java src/main/com/apple/foundationdb/tuple/Versionstamp.java) set(JAVA_TESTS_SRCS @@ -89,8 +89,8 @@ set(JAVA_TESTS_SRCS src/test/com/apple/foundationdb/test/StackUtils.java src/test/com/apple/foundationdb/test/TesterArgs.java src/test/com/apple/foundationdb/test/TestResult.java - src/test/com/apple/foundationdb/test/TupleTest.java src/test/com/apple/foundationdb/test/TuplePerformanceTest.java + src/test/com/apple/foundationdb/test/TupleTest.java src/test/com/apple/foundationdb/test/VersionstampSmokeTest.java src/test/com/apple/foundationdb/test/WatchTest.java src/test/com/apple/foundationdb/test/WhileTrueTest.java) diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java b/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java index ea47870037..e5556faaa6 100644 --- a/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java +++ b/bindings/java/src/main/com/apple/foundationdb/tuple/Tuple.java @@ -315,9 +315,11 @@ public class Tuple implements Comparable, Iterable { /** * Get an encoded representation of this {@code Tuple}. Each element is encoded to - * {@code byte}s and concatenated. + * {@code byte}s and concatenated. Note that once a {@code Tuple} has been packed, its + * serialized representation is stored internally so that future calls to this function + * are faster than the initial call. * - * @return a packed representation of this {@code Tuple}. + * @return a packed representation of this {@code Tuple} */ public byte[] pack() { return packInternal(null, true); @@ -326,10 +328,12 @@ public class Tuple implements Comparable, Iterable { /** * Get an encoded representation of this {@code Tuple}. Each element is encoded to * {@code byte}s and concatenated, and then the prefix supplied is prepended to - * the array. + * the array. Note that once a {@code Tuple} has been packed, its serialized representation + * is stored internally so that future calls to this function are faster than the + * initial call. * - * @param prefix additional byte-array prefix to prepend to packed bytes. - * @return a packed representation of this {@code Tuple} prepended by the {@code prefix}. + * @param prefix additional byte-array prefix to prepend to the packed bytes + * @return a packed representation of this {@code Tuple} prepended by the {@code prefix} */ public byte[] pack(byte[] prefix) { return packInternal(prefix, true); @@ -359,6 +363,9 @@ public class Tuple implements Comparable, Iterable { * It is up to the caller to ensure that there is enough space allocated within the buffer * to avoid {@link java.nio.BufferOverflowException}s. The client may call {@link #getPackedSize()} * to determine how large this {@code Tuple} will be once packed in order to allocate sufficient memory. + * Note that unlike {@link #pack()}, the serialized representation of this {@code Tuple} is not stored, so + * calling this function multiple times with the same {@code Tuple} requires serializing the {@code Tuple} + * multiple times. *
*
* This method will throw an error if there are any incomplete {@link Versionstamp}s in this {@code Tuple}. @@ -402,6 +409,10 @@ public class Tuple implements Comparable, Iterable { * {@link com.apple.foundationdb.Transaction#mutate(com.apple.foundationdb.MutationType, byte[], byte[]) Transaction.mutate()} * with the {@code SET_VERSIONSTAMPED_KEY} {@link com.apple.foundationdb.MutationType}, and the transaction's * version will then be filled in at commit time. + *
+ *
+ * Note that once a {@code Tuple} has been packed, its serialized representation is stored internally so that + * future calls to this function are faster than the initial call. * * @param prefix additional byte-array prefix to prepend to packed bytes. * @return a packed representation of this {@code Tuple} for use with versionstamp ops. @@ -477,11 +488,14 @@ public class Tuple implements Comparable, Iterable { /** * Construct a new {@code Tuple} with elements decoded from a supplied {@code byte} array. - * The passed byte array must not be {@code null}. + * The passed byte array must not be {@code null}. This will throw an exception if the passed byte + * array does not represent a valid {@code Tuple}. For example, this will throw an error if it + * encounters an unknown type code or if there is a packed element that appears to be truncated. * * @param bytes encoded {@code Tuple} source * * @return a new {@code Tuple} constructed by deserializing the provided {@code byte} array + * @throws IllegalArgumentException if {@code bytes} does not represent a valid {@code Tuple} */ public static Tuple fromBytes(byte[] bytes) { return fromBytes(bytes, 0, bytes.length); @@ -489,13 +503,17 @@ public class Tuple implements Comparable, Iterable { /** * Construct a new {@code Tuple} with elements decoded from a supplied {@code byte} array. - * The passed byte array must not be {@code null}. + * The passed byte array must not be {@code null}. This will throw an exception if the specified slice of + * the passed byte array does not represent a valid {@code Tuple}. For example, this will throw an error + * if it encounters an unknown type code or if there is a packed element that appears to be truncated. * * @param bytes encoded {@code Tuple} source * @param offset starting offset of byte array of encoded data * @param length length of encoded data within the source * * @return a new {@code Tuple} constructed by deserializing the specified slice of the provided {@code byte} array + * @throws IllegalArgumentException if {@code offset} or {@code length} are negative or would exceed the size of + * the array or if {@code bytes} does not represent a valid {@code Tuple} */ public static Tuple fromBytes(byte[] bytes, int offset, int length) { if(offset < 0 || offset > bytes.length) { @@ -864,7 +882,7 @@ public class Tuple implements Comparable, Iterable { * the serialized sizes of all of the elements of this {@code Tuple} and does not pack everything * into a single {@code Tuple}. The return value of this function is stored within this {@code Tuple} * after this function has been called so that subsequent calls on the same object are fast. This method - * does not validate that there is no more than one incomplete {@link Versionstamp} in this {@code Tuple}. + * does not validate that there is not more than one incomplete {@link Versionstamp} in this {@code Tuple}. * * @return the number of bytes in the packed representation of this {@code Tuple} */ From 75e475563a65815758f4c81ce8cc593b661bc2da Mon Sep 17 00:00:00 2001 From: Alec Grieser Date: Fri, 1 Mar 2019 16:31:51 -0800 Subject: [PATCH 25/47] clarify comments and be more strict about using UUID_BYTES constant --- .../main/com/apple/foundationdb/tuple/TupleUtil.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java index 63a1944b5d..e0e43e48df 100644 --- a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java +++ b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java @@ -40,6 +40,7 @@ class TupleUtil { private static final Charset UTF8 = Charset.forName("UTF-8"); private static final BigInteger LONG_MIN_VALUE = BigInteger.valueOf(Long.MIN_VALUE); private static final BigInteger LONG_MAX_VALUE = BigInteger.valueOf(Long.MAX_VALUE); + private static final int UUID_BYTES = 2 * Long.BYTES; private static final IterableComparator iterableComparator = new IterableComparator(); private static final byte BYTES_CODE = 0x01; @@ -475,10 +476,10 @@ class TupleUtil { state.add(true, start); } else if(code == UUID_CODE) { - ByteBuffer bb = ByteBuffer.wrap(rep, start, 2 * Long.BYTES).order(ByteOrder.BIG_ENDIAN); + ByteBuffer bb = ByteBuffer.wrap(rep, start, UUID_BYTES).order(ByteOrder.BIG_ENDIAN); long msb = bb.getLong(); long lsb = bb.getLong(); - state.add(new UUID(msb, lsb), start + 16); + state.add(new UUID(msb, lsb), start + UUID_BYTES); } else if(code == POS_INT_END) { int n = rep[start] & 0xff; @@ -533,8 +534,8 @@ class TupleUtil { if (val.compareTo(LONG_MIN_VALUE) >= 0 && val.compareTo(LONG_MAX_VALUE) <= 0) { state.add(val.longValue(), end); } else { - // This can occur if the thing can be represented with 8 bytes but not - // the right sign information. + // This can occur if the thing can be represented with 8 bytes but requires using + // the most-significant bit as a normal bit instead of the sign bit. state.add(val, end); } } @@ -745,7 +746,7 @@ class TupleUtil { else if(item instanceof Boolean) packedSize += 1; else if(item instanceof UUID) - packedSize += 1 + 2 * Long.BYTES; + packedSize += 1 + UUID_BYTES; else if(item instanceof BigInteger) { BigInteger bigInt = (BigInteger)item; int byteCount = minimalByteCount(bigInt); From f66ddb13c2f748e07d3136a06cdda0f471b6da05 Mon Sep 17 00:00:00 2001 From: Alec Grieser Date: Fri, 1 Mar 2019 16:54:15 -0800 Subject: [PATCH 26/47] rewrite replace without a buffer to use replace with a buffer to first get length --- .../foundationdb/tuple/ByteArrayUtil.java | 64 +++++++++++-------- .../apple/foundationdb/test/TupleTest.java | 36 +++++++++++ 2 files changed, 73 insertions(+), 27 deletions(-) diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java b/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java index d848c296ff..83a49051e1 100644 --- a/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java +++ b/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java @@ -173,35 +173,31 @@ public class ByteArrayUtil { */ public static byte[] replace(byte[] src, int offset, int length, byte[] pattern, byte[] replacement) { + if(offset < 0 || offset > src.length) { + throw new IllegalArgumentException("Invalid offset for array pattern replacement"); + } + if(length < 0 || offset + length > src.length) { + throw new IllegalArgumentException("Invalid length for array pattern replacement"); + } if(pattern == null || pattern.length == 0) { return Arrays.copyOfRange(src, offset, offset + length); } ByteBuffer dest; if(replacement == null || replacement.length != pattern.length) { // Array might change size. This is the "tricky" case. - byte patternFirst = pattern[0]; - int patternOccurrences = 0; - int currentPosition = offset; - while(currentPosition < offset + length) { - if(src[currentPosition] == patternFirst && regionEquals(src, currentPosition, pattern)) { - patternOccurrences++; - currentPosition += pattern.length; + int newLength = replace(src, offset, length, pattern, replacement, null); + if(newLength != length) { + if(newLength < 0) { + System.out.println("oops"); + newLength = replace(src, offset, length, pattern, replacement, null); } - else { - currentPosition++; - } - } - if(patternOccurrences == 0) { - // Pattern doesn't occur. Just return a copy of the needed region. - return Arrays.copyOfRange(src, offset, offset + length); - } - int replacementLength = (replacement == null) ? 0 : replacement.length; - int newLength = length + patternOccurrences * (replacementLength - pattern.length); - if(newLength == 0) { - return new byte[0]; + dest = ByteBuffer.allocate(newLength); } else { - dest = ByteBuffer.allocate(newLength); + // If the array size didn't change, as the pattern and replacement lengths + // differ, it must be the case that there weren't any occurrences of pattern in src + // between offset and offset + length, so we can just return a copy. + return Arrays.copyOfRange(src, offset, offset + length); } } else { @@ -212,21 +208,30 @@ public class ByteArrayUtil { return dest.array(); } - static void replace(byte[] src, int offset, int length, byte[] pattern, byte[] replacement, ByteBuffer dest) { + // Replace any occurrences of pattern in src between offset and offset + length with replacement. + // The new array is serialized into dest and the new length is returned. + static int replace(byte[] src, int offset, int length, byte[] pattern, byte[] replacement, ByteBuffer dest) { if(pattern == null || pattern.length == 0) { - dest.put(src, offset, length); - return; + if(dest != null) { + dest.put(src, offset, length); + } + return length; } byte patternFirst = pattern[0]; int lastPosition = offset; int currentPosition = offset; + int newLength = 0; + int replacementLength = replacement == null ? 0 : replacement.length; while(currentPosition < offset + length) { if(src[currentPosition] == patternFirst && regionEquals(src, currentPosition, pattern)) { - dest.put(src, lastPosition, currentPosition - lastPosition); - if(replacement != null) { - dest.put(replacement); + if(dest != null) { + dest.put(src, lastPosition, currentPosition - lastPosition); + if(replacement != null) { + dest.put(replacement); + } } + newLength += currentPosition - lastPosition + replacementLength; currentPosition += pattern.length; lastPosition = currentPosition; } @@ -235,7 +240,12 @@ public class ByteArrayUtil { } } - dest.put(src, lastPosition, currentPosition - lastPosition); + newLength += currentPosition - lastPosition; + if(dest != null) { + dest.put(src, lastPosition, currentPosition - lastPosition); + } + + return newLength; } /** diff --git a/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java b/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java index ac2b033748..f6152664ec 100644 --- a/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java +++ b/bindings/java/src/test/com/apple/foundationdb/test/TupleTest.java @@ -916,6 +916,42 @@ public class TupleTest { " with " + ByteArrayUtil.printable(replacement) + " in " + ByteArrayUtil.printable(src)); } } + + try { + ByteArrayUtil.replace(null, 0, 1, new byte[]{0x00}, new byte[]{0x00, FF}); + throw new RuntimeException("able to replace null bytes"); + } + catch(NullPointerException e) { + // eat + } + try { + ByteArrayUtil.replace(new byte[]{0x00, 0x01}, -1, 2, new byte[]{0x00}, new byte[]{0x00, FF}); + throw new RuntimeException("able to use negative offset"); + } + catch(IllegalArgumentException e) { + // eat + } + try { + ByteArrayUtil.replace(new byte[]{0x00, 0x01}, 3, 2, new byte[]{0x00}, new byte[]{0x00, FF}); + throw new RuntimeException("able to use offset after end of array"); + } + catch(IllegalArgumentException e) { + // eat + } + try { + ByteArrayUtil.replace(new byte[]{0x00, 0x01}, 1, -1, new byte[]{0x00}, new byte[]{0x00, FF}); + throw new RuntimeException("able to use negative length"); + } + catch(IllegalArgumentException e) { + // eat + } + try { + ByteArrayUtil.replace(new byte[]{0x00, 0x01}, 1, 2, new byte[]{0x00}, new byte[]{0x00, FF}); + throw new RuntimeException("able to give length that exceeds end of the array"); + } + catch(IllegalArgumentException e) { + // eat + } } private static void runTests(final int reps, TransactionContext db) { From 734029820269a09af1228a6d8572df443aab4a8b Mon Sep 17 00:00:00 2001 From: Alec Grieser Date: Fri, 1 Mar 2019 17:05:48 -0800 Subject: [PATCH 27/47] remove debugging printing that was accidentally added --- .../src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java b/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java index 83a49051e1..fe39fa332e 100644 --- a/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java +++ b/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java @@ -187,10 +187,6 @@ public class ByteArrayUtil { // Array might change size. This is the "tricky" case. int newLength = replace(src, offset, length, pattern, replacement, null); if(newLength != length) { - if(newLength < 0) { - System.out.println("oops"); - newLength = replace(src, offset, length, pattern, replacement, null); - } dest = ByteBuffer.allocate(newLength); } else { From 46f4b028071c2a3aa1755a572e3aca5201106f16 Mon Sep 17 00:00:00 2001 From: Meng Xu Date: Mon, 11 Mar 2019 17:10:06 -0700 Subject: [PATCH 28/47] TLS Status: Resolve review comments Use connectedCoordinatorsNumDelayed to reduce the load on cluster controller; Set connectedCoordinatorsNum to null by default for monitorLeader() --- fdbclient/ManagementAPI.actor.cpp | 3 +-- fdbclient/MonitorLeader.actor.cpp | 6 ++++-- fdbclient/MonitorLeader.h | 2 +- fdbclient/NativeAPI.actor.cpp | 14 ++++++++------ fdbclient/ReadYourWrites.actor.cpp | 3 +-- fdbclient/StatusClient.actor.cpp | 3 +-- fdbserver/ClusterController.actor.cpp | 2 +- fdbserver/Status.h | 3 +++ fdbserver/tester.actor.cpp | 3 +-- fdbserver/worker.actor.cpp | 3 +-- 10 files changed, 22 insertions(+), 20 deletions(-) diff --git a/fdbclient/ManagementAPI.actor.cpp b/fdbclient/ManagementAPI.actor.cpp index 73f3316c66..fa54b5b391 100644 --- a/fdbclient/ManagementAPI.actor.cpp +++ b/fdbclient/ManagementAPI.actor.cpp @@ -1542,8 +1542,7 @@ ACTOR Future checkDatabaseLock( Reference tr, U ACTOR Future forceRecovery( Reference clusterFile, Key dcId ) { state Reference>> clusterInterface(new AsyncVar>); - state Reference> unused(new AsyncVar); - state Future leaderMon = monitorLeader(clusterFile, clusterInterface, unused); + state Future leaderMon = monitorLeader(clusterFile, clusterInterface); loop { choose { diff --git a/fdbclient/MonitorLeader.actor.cpp b/fdbclient/MonitorLeader.actor.cpp index cff9f6157c..835b85a215 100644 --- a/fdbclient/MonitorLeader.actor.cpp +++ b/fdbclient/MonitorLeader.actor.cpp @@ -335,9 +335,11 @@ ClientLeaderRegInterface::ClientLeaderRegInterface( INetwork* local ) { // is a nominee) ACTOR Future monitorNominee( Key key, ClientLeaderRegInterface coord, AsyncTrigger* nomineeChange, Optional *info, int generation, Reference> connectedCoordinatorsNum ) { loop { + state bool hasExisted = false; state Optional li = wait( retryBrokenPromise( coord.getLeader, GetLeaderRequest( key, info->present() ? info->get().changeID : UID() ), TaskCoordinationReply ) ); - if (li.present()) { + if (li.present() && !hasExisted && connectedCoordinatorsNum.isValid()) { connectedCoordinatorsNum->set(connectedCoordinatorsNum->get() + 1); + hasExisted = true; } wait( Future(Void()) ); // Make sure we weren't cancelled @@ -460,7 +462,7 @@ ACTOR Future monitorLeaderInternal( Reference connF state MonitorLeaderInfo info(connFile); loop { // set the AsyncVar to 0 - connectedCoordinatorsNum->set(0); + if (connectedCoordinatorsNum.isValid()) connectedCoordinatorsNum->set(0); MonitorLeaderInfo _info = wait( monitorLeaderOneGeneration( connFile, outSerializedLeaderInfo, info, connectedCoordinatorsNum) ); info = _info; info.generation++; diff --git a/fdbclient/MonitorLeader.h b/fdbclient/MonitorLeader.h index 87aa37c503..a1ed1c88ad 100644 --- a/fdbclient/MonitorLeader.h +++ b/fdbclient/MonitorLeader.h @@ -30,7 +30,7 @@ class ClientCoordinators; template -Future monitorLeader( Reference const& connFile, Reference>> const& outKnownLeader, Reference> connectedCoordinatorsNum ); +Future monitorLeader( Reference const& connFile, Reference>> const& outKnownLeader, Reference> connectedCoordinatorsNum = Reference>() ); // Monitors the given coordination group's leader election process and provides a best current guess // of the current leader. If a leader is elected for long enough and communication with a quorum of // coordinators is possible, eventually outKnownLeader will be that leader's interface. diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index c797c32f2e..e7d7a3e243 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -533,14 +533,14 @@ DatabaseContext::DatabaseContext( DatabaseContext::DatabaseContext( const Error &err ) : deferredError(err), latencies(1000), readLatencies(1000), commitLatencies(1000), GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000) {} -ACTOR static Future monitorClientInfo( Reference>> clusterInterface, Reference ccf, Reference> outInfo, Reference> connectedCoordinatorsNum ) { +ACTOR static Future monitorClientInfo( Reference>> clusterInterface, Reference ccf, Reference> outInfo, Reference> connectedCoordinatorsNumDelayed ) { try { state Optional incorrectTime; loop { OpenDatabaseRequest req; req.knownClientInfoID = outInfo->get().id; req.supportedVersions = VectorRef(req.arena, networkOptions.supportedVersions); - req.connectedCoordinatorsNum = connectedCoordinatorsNum->get(); + req.connectedCoordinatorsNum = connectedCoordinatorsNumDelayed->get(); req.traceLogGroup = StringRef(req.arena, networkOptions.traceLogGroup); ClusterConnectionString fileConnectionString; @@ -571,7 +571,7 @@ ACTOR static Future monitorClientInfo( Referenceget().present()) TraceEvent("ClientInfo_CCInterfaceChange").detail("CCID", clusterInterface->get().get().id()); } - when( wait( connectedCoordinatorsNum->onChange() ) ) {} + when( wait( connectedCoordinatorsNumDelayed->onChange() ) ) {} } } } catch( Error& e ) { @@ -588,9 +588,10 @@ ACTOR static Future monitorClientInfo( Reference>> clusterInterface, Reference connFile, LocalityData const& clientLocality) { Reference> connectedCoordinatorsNum(new AsyncVar(0)); + Reference> connectedCoordinatorsNumDelayed(new AsyncVar(0)); Reference cluster(new Cluster(connFile, clusterInterface, connectedCoordinatorsNum)); Reference> clientInfo(new AsyncVar()); - Future clientInfoMonitor = monitorClientInfo(clusterInterface, connFile, clientInfo, connectedCoordinatorsNum); + Future clientInfoMonitor = monitorClientInfo(clusterInterface, connFile, clientInfo, connectedCoordinatorsNumDelayed) || delayedAsyncVar(connectedCoordinatorsNum, connectedCoordinatorsNumDelayed, 1.0); return Database(new DatabaseContext(cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false)); } @@ -756,9 +757,10 @@ Reference DatabaseContext::getConnectionFile() { Database Database::createDatabase( Reference connFile, int apiVersion, LocalityData const& clientLocality ) { Reference> connectedCoordinatorsNum(new AsyncVar(0)); // Number of connected coordinators for the client - Reference cluster(new Cluster(connFile, connectedCoordinatorsNum, apiVersion)); + Reference> connectedCoordinatorsNumDelayed(new AsyncVar(0)); + Reference cluster(new Cluster(connFile, connectedCoordinatorsNumDelayed, apiVersion)); Reference> clientInfo(new AsyncVar()); - Future clientInfoMonitor = monitorClientInfo(cluster->getClusterInterface(), connFile, clientInfo, connectedCoordinatorsNum); + Future clientInfoMonitor = monitorClientInfo(cluster->getClusterInterface(), connFile, clientInfo, connectedCoordinatorsNumDelayed) || delayedAsyncVar(connectedCoordinatorsNum, connectedCoordinatorsNumDelayed, 1.0); return Database( new DatabaseContext( cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false, apiVersion ) ); } diff --git a/fdbclient/ReadYourWrites.actor.cpp b/fdbclient/ReadYourWrites.actor.cpp index 24d088e5af..cc6bce8a86 100644 --- a/fdbclient/ReadYourWrites.actor.cpp +++ b/fdbclient/ReadYourWrites.actor.cpp @@ -1159,8 +1159,7 @@ ACTOR Future> getJSON(Reference clusterFi ACTOR Future> getWorkerInterfaces (Reference clusterFile){ state Reference>> clusterInterface(new AsyncVar>); - Reference> unused(new AsyncVar(0)); - state Future leaderMon = monitorLeader(clusterFile, clusterInterface, unused); + state Future leaderMon = monitorLeader(clusterFile, clusterInterface); loop{ choose { diff --git a/fdbclient/StatusClient.actor.cpp b/fdbclient/StatusClient.actor.cpp index 84f51fb453..9cd7358c07 100644 --- a/fdbclient/StatusClient.actor.cpp +++ b/fdbclient/StatusClient.actor.cpp @@ -463,12 +463,11 @@ ACTOR Future statusFetcherImpl( Reference f state bool quorum_reachable = false; state int coordinatorsFaultTolerance = 0; state Reference>> clusterInterface(new AsyncVar>); - state Reference> connectedCoordinatorsNum(new AsyncVar(0)); try { state int64_t clientTime = time(0); - state Future leaderMon = monitorLeader(f, clusterInterface, connectedCoordinatorsNum); + state Future leaderMon = monitorLeader(f, clusterInterface); StatusObject _statusObjClient = wait(clientStatusFetcher(f, &clientMessages, &quorum_reachable, &coordinatorsFaultTolerance)); statusObjClient = _statusObjClient; diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 6fe15ae63e..04853215ff 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -1234,7 +1234,7 @@ ACTOR Future clusterOpenDatabase( } - db->clientStatusInfoMap[reply.getEndpoint().getPrimaryAddress()] = {traceLogGroup.toString(), connectedCoordinatorsNum}; + db->clientStatusInfoMap[reply.getEndpoint().getPrimaryAddress()] = ClientStatusInfo(traceLogGroup.toString(), connectedCoordinatorsNum); while (db->clientInfo->get().id == knownClientInfoID) { choose { diff --git a/fdbserver/Status.h b/fdbserver/Status.h index 1bc8acc916..200d3567c0 100644 --- a/fdbserver/Status.h +++ b/fdbserver/Status.h @@ -33,6 +33,9 @@ typedef std::map< NetworkAddress, Standalone> > Clie struct ClientStatusInfo { std::string traceLogGroup; int connectedCoordinatorsNum; + + ClientStatusInfo() : connectedCoordinatorsNum(0) {} + ClientStatusInfo(std::string const& traceLogGroup, int const connectedCoordinatorsNum) : traceLogGroup(traceLogGroup), connectedCoordinatorsNum(connectedCoordinatorsNum) {} }; Future clusterGetStatus( Reference> const& db, Database const& cx, vector> const& workers, diff --git a/fdbserver/tester.actor.cpp b/fdbserver/tester.actor.cpp index edf2e9bbbc..135053f0e6 100644 --- a/fdbserver/tester.actor.cpp +++ b/fdbserver/tester.actor.cpp @@ -1127,9 +1127,8 @@ ACTOR Future runTests( Reference connFile, test_typ state vector testSpecs; Reference>> cc( new AsyncVar> ); Reference>> ci( new AsyncVar> ); - Reference> connectedCoordinatorsNum( new AsyncVar(0) ); vector> actors; - actors.push_back( reportErrors(monitorLeader( connFile, cc, connectedCoordinatorsNum ), "MonitorLeader") ); + actors.push_back( reportErrors(monitorLeader( connFile, cc ), "MonitorLeader") ); actors.push_back( reportErrors(extractClusterInterface( cc,ci ),"ExtractClusterInterface") ); actors.push_back( reportErrors(failureMonitorClient( ci, false ),"FailureMonitorClient") ); diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index a61e10a83c..c4b0dd0def 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -1215,11 +1215,10 @@ ACTOR Future fdbd( Reference>> cc(new AsyncVar>); Reference>> ci(new AsyncVar>); Reference> asyncPriorityInfo(new AsyncVar(getCCPriorityInfo(fitnessFilePath, processClass))); - Reference> unused(new AsyncVar(0)); Promise recoveredDiskFiles; v.push_back(reportErrors(monitorAndWriteCCPriorityInfo(fitnessFilePath, asyncPriorityInfo), "MonitorAndWriteCCPriorityInfo")); - v.push_back( reportErrors( processClass == ProcessClass::TesterClass ? monitorLeader( connFile, cc, unused ) : clusterController( connFile, cc , asyncPriorityInfo, recoveredDiskFiles.getFuture(), localities ), "ClusterController") ); + v.push_back( reportErrors( processClass == ProcessClass::TesterClass ? monitorLeader( connFile, cc ) : clusterController( connFile, cc , asyncPriorityInfo, recoveredDiskFiles.getFuture(), localities ), "ClusterController") ); v.push_back( reportErrors(extractClusterInterface( cc, ci ), "ExtractClusterInterface") ); v.push_back( reportErrors(failureMonitorClient( ci, true ), "FailureMonitorClient") ); v.push_back( reportErrorsExcept(workerServer(connFile, cc, localities, asyncPriorityInfo, processClass, dataFolder, memoryLimit, metricsConnFile, metricsPrefix, recoveredDiskFiles), "WorkerServer", UID(), &normalWorkerErrors()) ); From d9e9e0c5211dd05990e6147cf0fd6dfcc0fed352 Mon Sep 17 00:00:00 2001 From: Alec Grieser Date: Mon, 11 Mar 2019 18:26:08 -0700 Subject: [PATCH 29/47] use bitwise or instead of addition when reconsituting long --- .../java/src/main/com/apple/foundationdb/tuple/TupleUtil.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java index e0e43e48df..6ddfae83f9 100644 --- a/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java +++ b/bindings/java/src/main/com/apple/foundationdb/tuple/TupleUtil.java @@ -509,14 +509,14 @@ class TupleUtil { if(positive && (n < Long.BYTES || rep[start] > 0)) { long res = 0L; for(int i = start; i < end; i++) { - res = (res << 8) + (rep[i] & 0xff); + res = (res << 8) | (rep[i] & 0xff); } state.add(res, end); } else if(!positive && (n < Long.BYTES || rep[start] < 0)) { long res = ~0L; for(int i = start; i < end; i++) { - res = (res << 8) + (rep[i] & 0xff); + res = (res << 8) | (rep[i] & 0xff); } state.add(res + 1, end); } From c1745b90611bf54a8d04ca765006781e6289e8cf Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Mon, 11 Mar 2019 19:43:59 -0700 Subject: [PATCH 30/47] Bug fix, backup snapshot dispatch shardsBehind calculation would divide by zero if the snapshot scheduled interval is 0. --- fdbclient/FileBackupAgent.actor.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp index 20e007afe7..a354dbf38f 100644 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -1559,7 +1559,17 @@ namespace fileBackup { // Calculate the number of shards that would have been dispatched by a normal (on-schedule) BackupSnapshotDispatchTask given // the dispatch window and the start and expected-end versions of the current snapshot. int64_t dispatchWindow = nextDispatchVersion - recentReadVersion; - int countShardsExpectedPerNormalWindow = (double(dispatchWindow) / snapshotScheduledVersionInterval) * countAllShards; + + // If the scheduled snapshot interval is 0 (such as for initial, as-fast-as-possible snapshot) then all shards are considered late + int countShardsExpectedPerNormalWindow; + if(snapshotScheduledVersionInterval == 0) { + countShardsExpectedPerNormalWindow = 0; + } + else { + // A dispatchWindow of 0 means the target end version is <= now which also results in all shards being considered late + countShardsExpectedPerNormalWindow = (double(dispatchWindow) / snapshotScheduledVersionInterval) * countAllShards; + } + // countShardsThisDispatch is how many total shards are to be dispatched by this dispatch cycle. // Since this dispatch cycle can span many incrementally progressing separate executions of the BackupSnapshotDispatchTask // instance, this is calculated as the number of shards dispatched so far in the dispatch batch plus the number of shards From e9b8bf601e563d385575027465c48bb428cbaea8 Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Tue, 12 Mar 2019 03:34:38 -0700 Subject: [PATCH 31/47] Added backup status JSON output to backup workload to get sim coverage. --- fdbserver/workloads/BackupCorrectness.actor.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fdbserver/workloads/BackupCorrectness.actor.cpp b/fdbserver/workloads/BackupCorrectness.actor.cpp index 3923b15e9f..6315165730 100644 --- a/fdbserver/workloads/BackupCorrectness.actor.cpp +++ b/fdbserver/workloads/BackupCorrectness.actor.cpp @@ -192,6 +192,8 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload { loop { std::string status = wait(agent.getStatus(cx, true, tag)); puts(status.c_str()); + std::string statusJSON = wait(agent.getStatusJSON(cx, tag)); + puts(statusJSON.c_str()); wait(delay(2.0)); } } From 22f5624494ac5287af928b0b65c0d8fac0c8da39 Mon Sep 17 00:00:00 2001 From: Meng Xu Date: Tue, 12 Mar 2019 15:08:08 -0700 Subject: [PATCH 32/47] TLS Status: Reduce cluster controller load When the coordinator changes, we use delayedAsyncVar() to reduce the frequency for cluster controller to send the updated connectedCoordinatorsNumDelayed to clients. This help reduce the cluster controllers workload --- fdbclient/MonitorLeader.actor.cpp | 6 +++--- fdbclient/NativeAPI.actor.cpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fdbclient/MonitorLeader.actor.cpp b/fdbclient/MonitorLeader.actor.cpp index 835b85a215..6bdd6a990a 100644 --- a/fdbclient/MonitorLeader.actor.cpp +++ b/fdbclient/MonitorLeader.actor.cpp @@ -334,12 +334,12 @@ ClientLeaderRegInterface::ClientLeaderRegInterface( INetwork* local ) { // This function contacts a coordinator coord to ask if the worker is considered as a leader (i.e., if the worker // is a nominee) ACTOR Future monitorNominee( Key key, ClientLeaderRegInterface coord, AsyncTrigger* nomineeChange, Optional *info, int generation, Reference> connectedCoordinatorsNum ) { + state bool hasCounted = false; loop { - state bool hasExisted = false; state Optional li = wait( retryBrokenPromise( coord.getLeader, GetLeaderRequest( key, info->present() ? info->get().changeID : UID() ), TaskCoordinationReply ) ); - if (li.present() && !hasExisted && connectedCoordinatorsNum.isValid()) { + if (li.present() && !hasCounted && connectedCoordinatorsNum.isValid()) { connectedCoordinatorsNum->set(connectedCoordinatorsNum->get() + 1); - hasExisted = true; + hasCounted = true; } wait( Future(Void()) ); // Make sure we weren't cancelled diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index e7d7a3e243..e998a211dd 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -591,7 +591,7 @@ Database DatabaseContext::create(Reference>> Reference> connectedCoordinatorsNumDelayed(new AsyncVar(0)); Reference cluster(new Cluster(connFile, clusterInterface, connectedCoordinatorsNum)); Reference> clientInfo(new AsyncVar()); - Future clientInfoMonitor = monitorClientInfo(clusterInterface, connFile, clientInfo, connectedCoordinatorsNumDelayed) || delayedAsyncVar(connectedCoordinatorsNum, connectedCoordinatorsNumDelayed, 1.0); + Future clientInfoMonitor = delayedAsyncVar(connectedCoordinatorsNum, connectedCoordinatorsNumDelayed, 1.0) || monitorClientInfo(clusterInterface, connFile, clientInfo, connectedCoordinatorsNumDelayed); return Database(new DatabaseContext(cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false)); } @@ -758,9 +758,9 @@ Reference DatabaseContext::getConnectionFile() { Database Database::createDatabase( Reference connFile, int apiVersion, LocalityData const& clientLocality ) { Reference> connectedCoordinatorsNum(new AsyncVar(0)); // Number of connected coordinators for the client Reference> connectedCoordinatorsNumDelayed(new AsyncVar(0)); - Reference cluster(new Cluster(connFile, connectedCoordinatorsNumDelayed, apiVersion)); + Reference cluster(new Cluster(connFile, connectedCoordinatorsNum, apiVersion)); Reference> clientInfo(new AsyncVar()); - Future clientInfoMonitor = monitorClientInfo(cluster->getClusterInterface(), connFile, clientInfo, connectedCoordinatorsNumDelayed) || delayedAsyncVar(connectedCoordinatorsNum, connectedCoordinatorsNumDelayed, 1.0); + Future clientInfoMonitor = delayedAsyncVar(connectedCoordinatorsNum, connectedCoordinatorsNumDelayed, 1.0) || monitorClientInfo(cluster->getClusterInterface(), connFile, clientInfo, connectedCoordinatorsNumDelayed); return Database( new DatabaseContext( cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false, apiVersion ) ); } From ff8bac8d208f94d1fb389ff3a4a022bdf47cf319 Mon Sep 17 00:00:00 2001 From: Vishesh Yadav Date: Tue, 12 Mar 2019 17:58:55 -0700 Subject: [PATCH 33/47] doc: Some documentation for IPv6 --- documentation/sphinx/source/administration.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/documentation/sphinx/source/administration.rst b/documentation/sphinx/source/administration.rst index 8ddc88ac6f..1e0111b584 100644 --- a/documentation/sphinx/source/administration.rst +++ b/documentation/sphinx/source/administration.rst @@ -141,6 +141,21 @@ Any client connected to FoundationDB can access information about its cluster fi * To get the path to the cluster file, read the key ``\xFF\xFF/cluster_file_path``. * To get the contents of the cluster file, read the key ``\xFF\xFF/connection_string``. +.. _ipv6-support: + +IPv6 Support +============ + +FoundationDB (since v6.1) can accept network connections from clients connecting over IPv6. IPv6 address/port pair is represented as ``[IP]:PORT``, e.g. "[::1]:4800", "[abcd::dead:beef]:4500". + +1) The cluster file can contain mix of IPv6 and IPv6 addresses. For example:: + + description:ID@127.0.0.1:4500,[::1]:4500,... + +2) Starting ``fdbserver`` with IPv6:: + + $ /path/to/fdbserver -C fdb.cluster -p \[::1\]:4500 + .. _adding-machines-to-a-cluster: Adding machines to a cluster From a2108047aa3eb2d9718afe6ba74b9a6228c5f072 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Wed, 13 Mar 2019 13:14:39 -0700 Subject: [PATCH 34/47] removed LocalitySetRef and IRepPolicyRef typedefs, because for clarity the Ref suffix is reserved for arena allocated objects instead of reference counted objects. --- fdbclient/DatabaseConfiguration.cpp | 28 ++--- fdbclient/DatabaseConfiguration.h | 12 +-- fdbclient/ManagementAPI.actor.cpp | 44 ++++---- fdbrpc/Replication.h | 32 +++--- fdbrpc/ReplicationPolicy.cpp | 36 +++---- fdbrpc/ReplicationPolicy.h | 64 ++++++------ fdbrpc/ReplicationTypes.h | 3 - fdbrpc/ReplicationUtils.cpp | 110 ++++++++++---------- fdbrpc/ReplicationUtils.h | 20 ++-- fdbrpc/simulator.h | 10 +- fdbserver/ClusterController.actor.cpp | 8 +- fdbserver/DBCoreState.h | 2 +- fdbserver/DataDistribution.actor.cpp | 16 +-- fdbserver/LogSystem.h | 10 +- fdbserver/LogSystemConfig.h | 2 +- fdbserver/LogSystemPeekCursor.actor.cpp | 2 +- fdbserver/TagPartitionedLogSystem.actor.cpp | 8 +- fdbserver/WorkerInterface.actor.h | 2 +- 18 files changed, 203 insertions(+), 206 deletions(-) diff --git a/fdbclient/DatabaseConfiguration.cpp b/fdbclient/DatabaseConfiguration.cpp index 0af3402b73..1bc518e0e4 100644 --- a/fdbclient/DatabaseConfiguration.cpp +++ b/fdbclient/DatabaseConfiguration.cpp @@ -38,7 +38,7 @@ void DatabaseConfiguration::resetInternal() { autoDesiredTLogCount = CLIENT_KNOBS->DEFAULT_AUTO_LOGS; usableRegions = 1; regions.clear(); - tLogPolicy = storagePolicy = remoteTLogPolicy = IRepPolicyRef(); + tLogPolicy = storagePolicy = remoteTLogPolicy = Reference(); remoteDesiredTLogCount = -1; remoteTLogReplicationFactor = repopulateRegionAntiQuorum = 0; } @@ -48,7 +48,7 @@ void parse( int* i, ValueRef const& v ) { *i = atoi(v.toString().c_str()); } -void parseReplicationPolicy(IRepPolicyRef* policy, ValueRef const& v) { +void parseReplicationPolicy(Reference* policy, ValueRef const& v) { BinaryReader reader(v, IncludeVersion()); serializeReplicationPolicy(reader, *policy); } @@ -91,35 +91,35 @@ void parse( std::vector* regions, ValueRef const& v ) { info.satelliteTLogReplicationFactor = 1; info.satelliteTLogUsableDcs = 1; info.satelliteTLogWriteAntiQuorum = 0; - info.satelliteTLogPolicy = IRepPolicyRef(new PolicyOne()); + info.satelliteTLogPolicy = Reference(new PolicyOne()); } else if(satelliteReplication == "one_satellite_double") { info.satelliteTLogReplicationFactor = 2; info.satelliteTLogUsableDcs = 1; info.satelliteTLogWriteAntiQuorum = 0; - info.satelliteTLogPolicy = IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne()))); + info.satelliteTLogPolicy = Reference(new PolicyAcross(2, "zoneid", Reference(new PolicyOne()))); } else if(satelliteReplication == "one_satellite_triple") { info.satelliteTLogReplicationFactor = 3; info.satelliteTLogUsableDcs = 1; info.satelliteTLogWriteAntiQuorum = 0; - info.satelliteTLogPolicy = IRepPolicyRef(new PolicyAcross(3, "zoneid", IRepPolicyRef(new PolicyOne()))); + info.satelliteTLogPolicy = Reference(new PolicyAcross(3, "zoneid", Reference(new PolicyOne()))); } else if(satelliteReplication == "two_satellite_safe") { info.satelliteTLogReplicationFactor = 4; info.satelliteTLogUsableDcs = 2; info.satelliteTLogWriteAntiQuorum = 0; - info.satelliteTLogPolicy = IRepPolicyRef(new PolicyAcross(2, "dcid", IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne()))))); + info.satelliteTLogPolicy = Reference(new PolicyAcross(2, "dcid", Reference(new PolicyAcross(2, "zoneid", Reference(new PolicyOne()))))); info.satelliteTLogReplicationFactorFallback = 2; info.satelliteTLogUsableDcsFallback = 1; info.satelliteTLogWriteAntiQuorumFallback = 0; - info.satelliteTLogPolicyFallback = IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne()))); + info.satelliteTLogPolicyFallback = Reference(new PolicyAcross(2, "zoneid", Reference(new PolicyOne()))); } else if(satelliteReplication == "two_satellite_fast") { info.satelliteTLogReplicationFactor = 4; info.satelliteTLogUsableDcs = 2; info.satelliteTLogWriteAntiQuorum = 2; - info.satelliteTLogPolicy = IRepPolicyRef(new PolicyAcross(2, "dcid", IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne()))))); + info.satelliteTLogPolicy = Reference(new PolicyAcross(2, "dcid", Reference(new PolicyAcross(2, "zoneid", Reference(new PolicyOne()))))); info.satelliteTLogReplicationFactorFallback = 2; info.satelliteTLogUsableDcsFallback = 1; info.satelliteTLogWriteAntiQuorumFallback = 0; - info.satelliteTLogPolicyFallback = IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne()))); + info.satelliteTLogPolicyFallback = Reference(new PolicyAcross(2, "zoneid", Reference(new PolicyOne()))); } else { throw invalid_option(); } @@ -141,20 +141,20 @@ void parse( std::vector* regions, ValueRef const& v ) { void DatabaseConfiguration::setDefaultReplicationPolicy() { if(!storagePolicy) { - storagePolicy = IRepPolicyRef(new PolicyAcross(storageTeamSize, "zoneid", IRepPolicyRef(new PolicyOne()))); + storagePolicy = Reference(new PolicyAcross(storageTeamSize, "zoneid", Reference(new PolicyOne()))); } if(!tLogPolicy) { - tLogPolicy = IRepPolicyRef(new PolicyAcross(tLogReplicationFactor, "zoneid", IRepPolicyRef(new PolicyOne()))); + tLogPolicy = Reference(new PolicyAcross(tLogReplicationFactor, "zoneid", Reference(new PolicyOne()))); } if(remoteTLogReplicationFactor > 0 && !remoteTLogPolicy) { - remoteTLogPolicy = IRepPolicyRef(new PolicyAcross(remoteTLogReplicationFactor, "zoneid", IRepPolicyRef(new PolicyOne()))); + remoteTLogPolicy = Reference(new PolicyAcross(remoteTLogReplicationFactor, "zoneid", Reference(new PolicyOne()))); } for(auto& r : regions) { if(r.satelliteTLogReplicationFactor > 0 && !r.satelliteTLogPolicy) { - r.satelliteTLogPolicy = IRepPolicyRef(new PolicyAcross(r.satelliteTLogReplicationFactor, "zoneid", IRepPolicyRef(new PolicyOne()))); + r.satelliteTLogPolicy = Reference(new PolicyAcross(r.satelliteTLogReplicationFactor, "zoneid", Reference(new PolicyOne()))); } if(r.satelliteTLogReplicationFactorFallback > 0 && !r.satelliteTLogPolicyFallback) { - r.satelliteTLogPolicyFallback = IRepPolicyRef(new PolicyAcross(r.satelliteTLogReplicationFactorFallback, "zoneid", IRepPolicyRef(new PolicyOne()))); + r.satelliteTLogPolicyFallback = Reference(new PolicyAcross(r.satelliteTLogReplicationFactorFallback, "zoneid", Reference(new PolicyOne()))); } } } diff --git a/fdbclient/DatabaseConfiguration.h b/fdbclient/DatabaseConfiguration.h index 5df38f1fb2..18bf0b0352 100644 --- a/fdbclient/DatabaseConfiguration.h +++ b/fdbclient/DatabaseConfiguration.h @@ -49,13 +49,13 @@ struct RegionInfo { Key dcId; int32_t priority; - IRepPolicyRef satelliteTLogPolicy; + Reference satelliteTLogPolicy; int32_t satelliteDesiredTLogCount; int32_t satelliteTLogReplicationFactor; int32_t satelliteTLogWriteAntiQuorum; int32_t satelliteTLogUsableDcs; - IRepPolicyRef satelliteTLogPolicyFallback; + Reference satelliteTLogPolicyFallback; int32_t satelliteTLogReplicationFactorFallback; int32_t satelliteTLogWriteAntiQuorumFallback; int32_t satelliteTLogUsableDcsFallback; @@ -157,7 +157,7 @@ struct DatabaseConfiguration { int32_t autoResolverCount; // TLogs - IRepPolicyRef tLogPolicy; + Reference tLogPolicy; int32_t desiredTLogCount; int32_t autoDesiredTLogCount; int32_t tLogWriteAntiQuorum; @@ -167,7 +167,7 @@ struct DatabaseConfiguration { TLogSpillType tLogSpillType; // Storage Servers - IRepPolicyRef storagePolicy; + Reference storagePolicy; int32_t storageTeamSize; KeyValueStoreType storageServerStoreType; @@ -175,7 +175,7 @@ struct DatabaseConfiguration { int32_t desiredLogRouterCount; int32_t remoteDesiredTLogCount; int32_t remoteTLogReplicationFactor; - IRepPolicyRef remoteTLogPolicy; + Reference remoteTLogPolicy; //Data centers int32_t usableRegions; @@ -195,7 +195,7 @@ struct DatabaseConfiguration { if(desired == -1) return autoDesiredTLogCount; return desired; } int32_t getRemoteTLogReplicationFactor() const { if(remoteTLogReplicationFactor == 0) return tLogReplicationFactor; return remoteTLogReplicationFactor; } - IRepPolicyRef getRemoteTLogPolicy() const { if(remoteTLogReplicationFactor == 0) return tLogPolicy; return remoteTLogPolicy; } + Reference getRemoteTLogPolicy() const { if(remoteTLogReplicationFactor == 0) return tLogPolicy; return remoteTLogPolicy; } bool operator == ( DatabaseConfiguration const& rhs ) const { const_cast(this)->makeConfigurationImmutable(); diff --git a/fdbclient/ManagementAPI.actor.cpp b/fdbclient/ManagementAPI.actor.cpp index fa54b5b391..04cbbeb45e 100644 --- a/fdbclient/ManagementAPI.actor.cpp +++ b/fdbclient/ManagementAPI.actor.cpp @@ -99,42 +99,42 @@ std::map configForToken( std::string const& mode ) { } std::string redundancy, log_replicas; - IRepPolicyRef storagePolicy; - IRepPolicyRef tLogPolicy; + Reference storagePolicy; + Reference tLogPolicy; bool redundancySpecified = true; if (mode == "single") { redundancy="1"; log_replicas="1"; - storagePolicy = tLogPolicy = IRepPolicyRef(new PolicyOne()); + storagePolicy = tLogPolicy = Reference(new PolicyOne()); } else if(mode == "double" || mode == "fast_recovery_double") { redundancy="2"; log_replicas="2"; - storagePolicy = tLogPolicy = IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne()))); + storagePolicy = tLogPolicy = Reference(new PolicyAcross(2, "zoneid", Reference(new PolicyOne()))); } else if(mode == "triple" || mode == "fast_recovery_triple") { redundancy="3"; log_replicas="3"; - storagePolicy = tLogPolicy = IRepPolicyRef(new PolicyAcross(3, "zoneid", IRepPolicyRef(new PolicyOne()))); + storagePolicy = tLogPolicy = Reference(new PolicyAcross(3, "zoneid", Reference(new PolicyOne()))); } else if(mode == "three_datacenter" || mode == "multi_dc") { redundancy="6"; log_replicas="4"; - storagePolicy = IRepPolicyRef(new PolicyAcross(3, "dcid", - IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne()))) + storagePolicy = Reference(new PolicyAcross(3, "dcid", + Reference(new PolicyAcross(2, "zoneid", Reference(new PolicyOne()))) )); - tLogPolicy = IRepPolicyRef(new PolicyAcross(2, "dcid", - IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne()))) + tLogPolicy = Reference(new PolicyAcross(2, "dcid", + Reference(new PolicyAcross(2, "zoneid", Reference(new PolicyOne()))) )); } else if(mode == "three_datacenter_fallback") { redundancy="4"; log_replicas="4"; - storagePolicy = tLogPolicy = IRepPolicyRef(new PolicyAcross(2, "dcid", IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne()))))); + storagePolicy = tLogPolicy = Reference(new PolicyAcross(2, "dcid", Reference(new PolicyAcross(2, "zoneid", Reference(new PolicyOne()))))); } else if(mode == "three_data_hall") { redundancy="3"; log_replicas="4"; - storagePolicy = IRepPolicyRef(new PolicyAcross(3, "data_hall", IRepPolicyRef(new PolicyOne()))); - tLogPolicy = IRepPolicyRef(new PolicyAcross(2, "data_hall", - IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne()))) + storagePolicy = Reference(new PolicyAcross(3, "data_hall", Reference(new PolicyOne()))); + tLogPolicy = Reference(new PolicyAcross(2, "data_hall", + Reference(new PolicyAcross(2, "zoneid", Reference(new PolicyOne()))) )); } else redundancySpecified = false; @@ -154,29 +154,29 @@ std::map configForToken( std::string const& mode ) { } std::string remote_redundancy, remote_log_replicas; - IRepPolicyRef remoteTLogPolicy; + Reference remoteTLogPolicy; bool remoteRedundancySpecified = true; if (mode == "remote_default") { remote_redundancy="0"; remote_log_replicas="0"; - remoteTLogPolicy = IRepPolicyRef(); + remoteTLogPolicy = Reference(); } else if (mode == "remote_single") { remote_redundancy="1"; remote_log_replicas="1"; - remoteTLogPolicy = IRepPolicyRef(new PolicyOne()); + remoteTLogPolicy = Reference(new PolicyOne()); } else if(mode == "remote_double") { remote_redundancy="2"; remote_log_replicas="2"; - remoteTLogPolicy = IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne()))); + remoteTLogPolicy = Reference(new PolicyAcross(2, "zoneid", Reference(new PolicyOne()))); } else if(mode == "remote_triple") { remote_redundancy="3"; remote_log_replicas="3"; - remoteTLogPolicy = IRepPolicyRef(new PolicyAcross(3, "zoneid", IRepPolicyRef(new PolicyOne()))); + remoteTLogPolicy = Reference(new PolicyAcross(3, "zoneid", Reference(new PolicyOne()))); } else if(mode == "remote_three_data_hall") { //FIXME: not tested in simulation remote_redundancy="3"; remote_log_replicas="4"; - remoteTLogPolicy = IRepPolicyRef(new PolicyAcross(2, "data_hall", - IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne()))) + remoteTLogPolicy = Reference(new PolicyAcross(2, "data_hall", + Reference(new PolicyAcross(2, "zoneid", Reference(new PolicyOne()))) )); } else remoteRedundancySpecified = false; @@ -212,7 +212,7 @@ ConfigurationResult::Type buildConfiguration( std::vector const& mode auto p = configKeysPrefix.toString(); if(!outConf.count(p + "storage_replication_policy") && outConf.count(p + "storage_replicas")) { int storageCount = stoi(outConf[p + "storage_replicas"]); - IRepPolicyRef storagePolicy = IRepPolicyRef(new PolicyAcross(storageCount, "zoneid", IRepPolicyRef(new PolicyOne()))); + Reference storagePolicy = Reference(new PolicyAcross(storageCount, "zoneid", Reference(new PolicyOne()))); BinaryWriter policyWriter(IncludeVersion()); serializeReplicationPolicy(policyWriter, storagePolicy); outConf[p+"storage_replication_policy"] = policyWriter.toStringRef().toString(); @@ -220,7 +220,7 @@ ConfigurationResult::Type buildConfiguration( std::vector const& mode if(!outConf.count(p + "log_replication_policy") && outConf.count(p + "log_replicas")) { int logCount = stoi(outConf[p + "log_replicas"]); - IRepPolicyRef logPolicy = IRepPolicyRef(new PolicyAcross(logCount, "zoneid", IRepPolicyRef(new PolicyOne()))); + Reference logPolicy = Reference(new PolicyAcross(logCount, "zoneid", Reference(new PolicyOne()))); BinaryWriter policyWriter(IncludeVersion()); serializeReplicationPolicy(policyWriter, logPolicy); outConf[p+"log_replication_policy"] = policyWriter.toStringRef().toString(); diff --git a/fdbrpc/Replication.h b/fdbrpc/Replication.h index 828ca1fd42..e8e32b79fa 100644 --- a/fdbrpc/Replication.h +++ b/fdbrpc/Replication.h @@ -36,23 +36,23 @@ public: virtual void delref() { ReferenceCounted::delref(); } bool selectReplicas( - IRepPolicyRef const& policy, + Reference const& policy, std::vector const& alsoServers, std::vector & results) { - LocalitySetRef fromServers = LocalitySetRef::addRef(this); + Reference fromServers = Reference::addRef(this); return policy->selectReplicas(fromServers, alsoServers, results); } bool selectReplicas( - IRepPolicyRef const& policy, + Reference const& policy, std::vector & results) { return selectReplicas(policy, std::vector(), results); } bool validate( - IRepPolicyRef const& policy) const + Reference const& policy) const { - LocalitySetRef const solutionSet = LocalitySetRef::addRef((LocalitySet*) this); + Reference const solutionSet = Reference::addRef((LocalitySet*) this); return policy->validate(solutionSet); } @@ -159,7 +159,7 @@ public: } static void staticDisplayEntries( - LocalitySetRef const& fromServers, + Reference const& fromServers, std::vector const& entryArray, const char* name = "zone") { @@ -174,8 +174,8 @@ public: // the specified value for the given key // The returned LocalitySet contains the LocalityRecords that have the same value as // the indexValue under the same indexKey (e.g., zoneid) - LocalitySetRef restrict(AttribKey indexKey, AttribValue indexValue ) { - LocalitySetRef localitySet; + Reference restrict(AttribKey indexKey, AttribValue indexValue ) { + Reference localitySet; LocalityCacheRecord searchRecord(AttribRecord(indexKey, indexValue), localitySet); auto itKeyValue = std::lower_bound(_cacheArray.begin(), _cacheArray.end(), searchRecord, LocalityCacheRecord::compareKeyValue); @@ -185,7 +185,7 @@ public: localitySet = itKeyValue->_resultset; } else { - localitySet = LocalitySetRef(new LocalitySet(*_localitygroup)); + localitySet = Reference(new LocalitySet(*_localitygroup)); _cachemisses ++; // If the key is not within the current key set, skip it because no items within // the current entry array has the key @@ -213,8 +213,8 @@ public: } // This function is used to create an subset containing the specified entries - LocalitySetRef restrict(std::vector const& entryArray) { - LocalitySetRef localitySet(new LocalitySet(*_localitygroup)); + Reference restrict(std::vector const& entryArray) { + Reference localitySet(new LocalitySet(*_localitygroup)); for (auto& entry : entryArray) { localitySet->add(getRecordViaEntry(entry), *this); } @@ -453,8 +453,8 @@ protected: // This class stores the cache record for each entry within the locality set struct LocalityCacheRecord { AttribRecord _attribute; - LocalitySetRef _resultset; - LocalityCacheRecord(AttribRecord const& attribute, LocalitySetRef resultset):_attribute(attribute),_resultset(resultset){} + Reference _resultset; + LocalityCacheRecord(AttribRecord const& attribute, Reference resultset):_attribute(attribute),_resultset(resultset){} LocalityCacheRecord(LocalityCacheRecord const& source):_attribute(source._attribute),_resultset(source._resultset){} virtual ~LocalityCacheRecord(){} LocalityCacheRecord& operator=(LocalityCacheRecord const& source) { @@ -584,7 +584,7 @@ struct LocalityMap : public LocalityGroup { virtual ~LocalityMap() {} bool selectReplicas( - IRepPolicyRef const& policy, + Reference const& policy, std::vector const& alsoServers, std::vector& entryResults, std::vector & results) @@ -601,7 +601,7 @@ struct LocalityMap : public LocalityGroup { } bool selectReplicas( - IRepPolicyRef const& policy, + Reference const& policy, std::vector const& alsoServers, std::vector & results) { @@ -610,7 +610,7 @@ struct LocalityMap : public LocalityGroup { } bool selectReplicas( - IRepPolicyRef const& policy, + Reference const& policy, std::vector & results) { return selectReplicas(policy, std::vector(), results); } diff --git a/fdbrpc/ReplicationPolicy.cpp b/fdbrpc/ReplicationPolicy.cpp index 070b8dd767..59b8f511d1 100644 --- a/fdbrpc/ReplicationPolicy.cpp +++ b/fdbrpc/ReplicationPolicy.cpp @@ -24,14 +24,14 @@ bool IReplicationPolicy::selectReplicas( - LocalitySetRef & fromServers, + Reference & fromServers, std::vector & results ) { return selectReplicas(fromServers, std::vector(), results); } bool IReplicationPolicy::validate( - LocalitySetRef const& solutionSet ) const + Reference const& solutionSet ) const { return validate(solutionSet->getEntries(), solutionSet); } @@ -40,7 +40,7 @@ bool IReplicationPolicy::validateFull( bool solved, std::vector const& solutionSet, std::vector const& alsoServers, - LocalitySetRef const& fromServers ) + Reference const& fromServers ) { bool valid = true; std::vector totalSolution(solutionSet); @@ -105,7 +105,7 @@ bool IReplicationPolicy::validateFull( } bool PolicyOne::selectReplicas( - LocalitySetRef & fromServers, + Reference & fromServers, std::vector const& alsoServers, std::vector & results ) { @@ -131,12 +131,12 @@ bool PolicyOne::selectReplicas( bool PolicyOne::validate( std::vector const& solutionSet, - LocalitySetRef const& fromServers ) const + Reference const& fromServers ) const { return ((solutionSet.size() > 0) && (fromServers->size() > 0)); } -PolicyAcross::PolicyAcross(int count, std::string const& attribKey, IRepPolicyRef const policy): +PolicyAcross::PolicyAcross(int count, std::string const& attribKey, Reference const policy): _count(count),_attribKey(attribKey),_policy(policy) { return; @@ -150,7 +150,7 @@ PolicyAcross::~PolicyAcross() // Debug purpose only // Trace all record entries to help debug // fromServers is the servers locality to be printed out. -void IReplicationPolicy::traceLocalityRecords(LocalitySetRef const& fromServers) { +void IReplicationPolicy::traceLocalityRecords(Reference const& fromServers) { std::vector> const& recordArray = fromServers->getRecordArray(); TraceEvent("LocalityRecordArray").detail("Size", recordArray.size()); for (auto& record : recordArray) { @@ -158,7 +158,7 @@ void IReplicationPolicy::traceLocalityRecords(LocalitySetRef const& fromServers) } } -void IReplicationPolicy::traceOneLocalityRecord(Reference record, LocalitySetRef const& fromServers) { +void IReplicationPolicy::traceOneLocalityRecord(Reference record, Reference const& fromServers) { int localityEntryIndex = record->_entryIndex._id; Reference const& dataMap = record->_dataMap; std::vector const& keyValueArray = dataMap->_keyvaluearray; @@ -185,7 +185,7 @@ void IReplicationPolicy::traceOneLocalityRecord(Reference record // return true if the team satisfies the policy; false otherwise bool PolicyAcross::validate( std::vector const& solutionSet, - LocalitySetRef const& fromServers ) const + Reference const& fromServers ) const { bool valid = true; int count = 0; @@ -262,7 +262,7 @@ bool PolicyAcross::validate( // that should be excluded from being selected as replicas. // FIXME: Simplify this function, such as removing unnecessary printf bool PolicyAcross::selectReplicas( - LocalitySetRef & fromServers, + Reference & fromServers, std::vector const& alsoServers, std::vector & results ) { @@ -437,7 +437,7 @@ bool PolicyAcross::selectReplicas( bool PolicyAnd::validate( std::vector const& solutionSet, - LocalitySetRef const& fromServers ) const + Reference const& fromServers ) const { bool valid = true; for (auto& policy : _policies) { @@ -450,7 +450,7 @@ bool PolicyAnd::validate( } bool PolicyAnd::selectReplicas( - LocalitySetRef & fromServers, + Reference & fromServers, std::vector const& alsoServers, std::vector & results ) { @@ -486,26 +486,26 @@ bool PolicyAnd::selectReplicas( return passed; } -void testPolicySerialization(IRepPolicyRef& policy) { +void testPolicySerialization(Reference& policy) { std::string policyInfo = policy->info(); BinaryWriter writer(IncludeVersion()); serializeReplicationPolicy(writer, policy); BinaryReader reader(writer.getData(), writer.getLength(), IncludeVersion()); - IRepPolicyRef copy; + Reference copy; serializeReplicationPolicy(reader, copy); ASSERT(policy->info() == copy->info()); } void testReplicationPolicy(int nTests) { - IRepPolicyRef policy = IRepPolicyRef(new PolicyAcross(1, "data_hall", IRepPolicyRef(new PolicyOne()))); + Reference policy = Reference(new PolicyAcross(1, "data_hall", Reference(new PolicyOne()))); testPolicySerialization(policy); - policy = IRepPolicyRef(new PolicyAnd({ - IRepPolicyRef(new PolicyAcross(2, "data_center", IRepPolicyRef(new PolicyAcross(3, "rack", IRepPolicyRef(new PolicyOne()))))), - IRepPolicyRef(new PolicyAcross(2, "data_center", IRepPolicyRef(new PolicyAcross(2, "data_hall", IRepPolicyRef(new PolicyOne()))))) + policy = Reference(new PolicyAnd({ + Reference(new PolicyAcross(2, "data_center", Reference(new PolicyAcross(3, "rack", Reference(new PolicyOne()))))), + Reference(new PolicyAcross(2, "data_center", Reference(new PolicyAcross(2, "data_hall", Reference(new PolicyOne()))))) })); testPolicySerialization(policy); diff --git a/fdbrpc/ReplicationPolicy.h b/fdbrpc/ReplicationPolicy.h index 74bc0baa80..74ccdbb312 100644 --- a/fdbrpc/ReplicationPolicy.h +++ b/fdbrpc/ReplicationPolicy.h @@ -26,7 +26,7 @@ #include "fdbrpc/ReplicationTypes.h" template -void serializeReplicationPolicy(Ar& ar, IRepPolicyRef& policy); +void serializeReplicationPolicy(Ar& ar, Reference& policy); extern void testReplicationPolicy(int nTests); @@ -40,36 +40,36 @@ struct IReplicationPolicy : public ReferenceCounted { virtual int maxResults() const = 0; virtual int depth() const = 0; virtual bool selectReplicas( - LocalitySetRef & fromServers, + Reference & fromServers, std::vector const& alsoServers, std::vector & results ) = 0; - virtual void traceLocalityRecords(LocalitySetRef const& fromServers); - virtual void traceOneLocalityRecord(Reference record, LocalitySetRef const& fromServers); + virtual void traceLocalityRecords(Reference const& fromServers); + virtual void traceOneLocalityRecord(Reference record, Reference const& fromServers); virtual bool validate( std::vector const& solutionSet, - LocalitySetRef const& fromServers ) const = 0; + Reference const& fromServers ) const = 0; bool operator == ( const IReplicationPolicy& r ) const { return info() == r.info(); } bool operator != ( const IReplicationPolicy& r ) const { return info() != r.info(); } template void serialize(Ar& ar) { - IRepPolicyRef refThis(this); + Reference refThis(this); serializeReplicationPolicy(ar, refThis); refThis->delref_no_destroy(); } // Utility functions bool selectReplicas( - LocalitySetRef & fromServers, + Reference & fromServers, std::vector & results ); bool validate( - LocalitySetRef const& solutionSet ) const; + Reference const& solutionSet ) const; bool validateFull( bool solved, std::vector const& solutionSet, std::vector const& alsoServers, - LocalitySetRef const& fromServers ); + Reference const& fromServers ); // Returns a set of the attributes that this policy uses in selection and validation. std::set attributeKeys() const @@ -78,7 +78,7 @@ struct IReplicationPolicy : public ReferenceCounted { }; template -inline void load( Archive& ar, IRepPolicyRef& value ) { +inline void load( Archive& ar, Reference& value ) { bool present = (value.getPtr()); ar >> present; if (present) { @@ -90,11 +90,11 @@ inline void load( Archive& ar, IRepPolicyRef& value ) { } template -inline void save( Archive& ar, const IRepPolicyRef& value ) { +inline void save( Archive& ar, const Reference& value ) { bool present = (value.getPtr()); ar << present; if (present) { - serializeReplicationPolicy(ar, (IRepPolicyRef&) value); + serializeReplicationPolicy(ar, (Reference&) value); } } @@ -107,9 +107,9 @@ struct PolicyOne : IReplicationPolicy, public ReferenceCounted { virtual int depth() const { return 1; } virtual bool validate( std::vector const& solutionSet, - LocalitySetRef const& fromServers ) const; + Reference const& fromServers ) const; virtual bool selectReplicas( - LocalitySetRef & fromServers, + Reference & fromServers, std::vector const& alsoServers, std::vector & results ); template @@ -119,7 +119,7 @@ struct PolicyOne : IReplicationPolicy, public ReferenceCounted { }; struct PolicyAcross : IReplicationPolicy, public ReferenceCounted { - PolicyAcross(int count, std::string const& attribKey, IRepPolicyRef const policy); + PolicyAcross(int count, std::string const& attribKey, Reference const policy); virtual ~PolicyAcross(); virtual std::string name() const { return "Across"; } virtual std::string info() const @@ -128,9 +128,9 @@ struct PolicyAcross : IReplicationPolicy, public ReferenceCounted virtual int depth() const { return 1 + _policy->depth(); } virtual bool validate( std::vector const& solutionSet, - LocalitySetRef const& fromServers ) const; + Reference const& fromServers ) const; virtual bool selectReplicas( - LocalitySetRef & fromServers, + Reference & fromServers, std::vector const& alsoServers, std::vector & results ); @@ -149,18 +149,18 @@ struct PolicyAcross : IReplicationPolicy, public ReferenceCounted protected: int _count; std::string _attribKey; - IRepPolicyRef _policy; + Reference _policy; // Cache temporary members std::vector _usedValues; std::vector _newResults; - LocalitySetRef _selected; + Reference _selected; VectorRef> _addedResults; Arena _arena; }; struct PolicyAnd : IReplicationPolicy, public ReferenceCounted { - PolicyAnd(std::vector policies): _policies(policies), _sortedPolicies(policies) + PolicyAnd(std::vector> policies): _policies(policies), _sortedPolicies(policies) { // Sort the policy array std::sort(_sortedPolicies.begin(), _sortedPolicies.end(), PolicyAnd::comparePolicy); @@ -194,14 +194,14 @@ struct PolicyAnd : IReplicationPolicy, public ReferenceCounted { } virtual bool validate( std::vector const& solutionSet, - LocalitySetRef const& fromServers ) const; + Reference const& fromServers ) const; virtual bool selectReplicas( - LocalitySetRef & fromServers, + Reference & fromServers, std::vector const& alsoServers, std::vector & results ); - static bool comparePolicy(const IRepPolicyRef& rhs, const IRepPolicyRef& lhs) + static bool comparePolicy(const Reference& rhs, const Reference& lhs) { return (lhs->maxResults() < rhs->maxResults()) || (!(rhs->maxResults() < lhs->maxResults()) && (lhs->depth() < rhs->depth())); } template @@ -219,18 +219,18 @@ struct PolicyAnd : IReplicationPolicy, public ReferenceCounted { } virtual void attributeKeys(std::set *set) const override - { for (const IRepPolicyRef& r : _policies) { r->attributeKeys(set); } } + { for (const Reference& r : _policies) { r->attributeKeys(set); } } protected: - std::vector _policies; - std::vector _sortedPolicies; + std::vector> _policies; + std::vector> _sortedPolicies; }; extern int testReplication(); template -void serializeReplicationPolicy(Ar& ar, IRepPolicyRef& policy) { +void serializeReplicationPolicy(Ar& ar, Reference& policy) { if(Ar::isDeserializing) { StringRef name; serializer(ar, name); @@ -238,20 +238,20 @@ void serializeReplicationPolicy(Ar& ar, IRepPolicyRef& policy) { if(name == LiteralStringRef("One")) { PolicyOne* pointer = new PolicyOne(); pointer->serialize(ar); - policy = IRepPolicyRef(pointer); + policy = Reference(pointer); } else if(name == LiteralStringRef("Across")) { - PolicyAcross* pointer = new PolicyAcross(0, "", IRepPolicyRef()); + PolicyAcross* pointer = new PolicyAcross(0, "", Reference()); pointer->serialize(ar); - policy = IRepPolicyRef(pointer); + policy = Reference(pointer); } else if(name == LiteralStringRef("And")) { PolicyAnd* pointer = new PolicyAnd({}); pointer->serialize(ar); - policy = IRepPolicyRef(pointer); + policy = Reference(pointer); } else if(name == LiteralStringRef("None")) { - policy = IRepPolicyRef(); + policy = Reference(); } else { TraceEvent(SevError, "SerializingInvalidPolicyType") diff --git a/fdbrpc/ReplicationTypes.h b/fdbrpc/ReplicationTypes.h index ef5463f54b..9a9f517d15 100644 --- a/fdbrpc/ReplicationTypes.h +++ b/fdbrpc/ReplicationTypes.h @@ -34,9 +34,6 @@ struct LocalityRecord; struct StringToIntMap; struct IReplicationPolicy; -typedef Reference LocalitySetRef; -typedef Reference IRepPolicyRef; - extern int g_replicationdebug; struct AttribKey { diff --git a/fdbrpc/ReplicationUtils.cpp b/fdbrpc/ReplicationUtils.cpp index ae92fd7950..d2c7e734a0 100644 --- a/fdbrpc/ReplicationUtils.cpp +++ b/fdbrpc/ReplicationUtils.cpp @@ -27,8 +27,8 @@ double ratePolicy( - LocalitySetRef & localitySet, - IRepPolicyRef const& policy, + Reference & localitySet, + Reference const& policy, unsigned int nTestTotal) { double rating = -1.0; @@ -85,14 +85,14 @@ double ratePolicy( bool findBestPolicySet( std::vector& bestResults, - LocalitySetRef & localitySet, - IRepPolicyRef const& policy, + Reference & localitySet, + Reference const& policy, unsigned int nMinItems, unsigned int nSelectTests, unsigned int nPolicyTests) { bool bSucceeded = true; - LocalitySetRef bestLocalitySet, testLocalitySet; + Reference bestLocalitySet, testLocalitySet; std::vector results; double testRate, bestRate = -1.0; @@ -162,15 +162,15 @@ bool findBestPolicySet( bool findBestUniquePolicySet( std::vector& bestResults, - LocalitySetRef & localitySet, - IRepPolicyRef const& policy, + Reference & localitySet, + Reference const& policy, StringRef localityUniquenessKey, unsigned int nMinItems, unsigned int nSelectTests, unsigned int nPolicyTests) { bool bSucceeded = true; - LocalitySetRef bestLocalitySet, testLocalitySet; + Reference bestLocalitySet, testLocalitySet; std::vector results; double testRate, bestRate = -1.0; @@ -262,7 +262,7 @@ bool findBestUniquePolicySet( bool validateAllCombinations( std::vector & offendingCombo, LocalityGroup const& localitySet, - IRepPolicyRef const& policy, + Reference const& policy, std::vector const& newItems, unsigned int nCombinationSize, bool bCheckIfValid) @@ -286,12 +286,12 @@ bool validateAllCombinations( } else { - bool bIsValidGroup; + bool bIsValidGroup; LocalityGroup localityGroup; std::string bitmask(nCombinationSize, 1); // K leading 1's bitmask.resize(newItems.size(), 0); // N-K trailing 0's - + do { localityGroup.deep_copy(localitySet); @@ -337,7 +337,7 @@ bool validateAllCombinations( bool validateAllCombinations( LocalityGroup const& localitySet, - IRepPolicyRef const& policy, + Reference const& policy, std::vector const& newItems, unsigned int nCombinationSize, bool bCheckIfValid) @@ -358,10 +358,10 @@ repTestType convertToTestType(int iValue) { return sValue; } -LocalitySetRef createTestLocalityMap(std::vector& indexes, int dcTotal, +Reference createTestLocalityMap(std::vector& indexes, int dcTotal, int szTotal, int rackTotal, int slotTotal, int independentItems, int independentTotal) { - LocalitySetRef buildServer(new LocalityMap()); + Reference buildServer(new LocalityMap()); LocalityMap* serverMap = (LocalityMap*) buildServer.getPtr(); int serverValue, dcLoop, szLoop, rackLoop, slotLoop; std::string dcText, szText, rackText, slotText, independentName, independentText; @@ -442,8 +442,8 @@ LocalitySetRef createTestLocalityMap(std::vector& indexes, int dcTo } bool testPolicy( - LocalitySetRef servers, - IRepPolicyRef const& policy, + Reference servers, + Reference const& policy, std::vector const& including, bool validate) { @@ -506,109 +506,109 @@ bool testPolicy( } bool testPolicy( - LocalitySetRef servers, - IRepPolicyRef const& policy, + Reference servers, + Reference const& policy, bool validate) { return testPolicy(servers, policy, emptyEntryArray, validate); } -std::vector const& getStaticPolicies() +std::vector> const& getStaticPolicies() { - static std::vector staticPolicies; + static std::vector> staticPolicies; if (staticPolicies.empty()) { staticPolicies = { - IRepPolicyRef( new PolicyOne() ), + Reference( new PolicyOne() ), // 1 'dc^2 x 1' - IRepPolicyRef( new PolicyAcross(2, "dc", IRepPolicyRef( new PolicyOne() ) ) ), + Reference( new PolicyAcross(2, "dc", Reference( new PolicyOne() ) ) ), // 2 'dc^3 x 1' - IRepPolicyRef( new PolicyAcross(3, "dc", IRepPolicyRef( new PolicyOne() ) ) ), + Reference( new PolicyAcross(3, "dc", Reference( new PolicyOne() ) ) ), // 3 'sz^3 x 1' - IRepPolicyRef( new PolicyAcross(3, "sz", IRepPolicyRef( new PolicyOne() ) ) ), + Reference( new PolicyAcross(3, "sz", Reference( new PolicyOne() ) ) ), // 4 'dc^1 x az^3 x 1' - IRepPolicyRef( new PolicyAcross(1, "dc", IRepPolicyRef( new PolicyAcross(3, "az", IRepPolicyRef( new PolicyOne() ))) ) ), + Reference( new PolicyAcross(1, "dc", Reference( new PolicyAcross(3, "az", Reference( new PolicyOne() ))) ) ), // 5 '(sz^3 x rack^2 x 1) + (dc^2 x az^3 x 1)' - IRepPolicyRef( new PolicyAnd( { IRepPolicyRef(new PolicyAcross(3, "sz", IRepPolicyRef(new PolicyAcross(2, "rack", IRepPolicyRef(new PolicyOne() ))))), IRepPolicyRef(new PolicyAcross(2, "dc", IRepPolicyRef(new PolicyAcross(3, "az", IRepPolicyRef(new PolicyOne()) ))) )} ) ), + Reference( new PolicyAnd( { Reference(new PolicyAcross(3, "sz", Reference(new PolicyAcross(2, "rack", Reference(new PolicyOne() ))))), Reference(new PolicyAcross(2, "dc", Reference(new PolicyAcross(3, "az", Reference(new PolicyOne()) ))) )} ) ), // 6 '(sz^1 x 1)' - IRepPolicyRef( new PolicyAcross(1, "sz", IRepPolicyRef(new PolicyOne())) ), + Reference( new PolicyAcross(1, "sz", Reference(new PolicyOne())) ), // 7 '(sz^1 x 1) + (sz^1 x 1)' - IRepPolicyRef( new PolicyAnd( { IRepPolicyRef(new PolicyAcross(1, "sz", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(1, "sz", IRepPolicyRef(new PolicyOne()))) } ) ), + Reference( new PolicyAnd( { Reference(new PolicyAcross(1, "sz", Reference(new PolicyOne()))), Reference(new PolicyAcross(1, "sz", Reference(new PolicyOne()))) } ) ), // 8 '(sz^2 x 1) + (sz^2 x 1)' - IRepPolicyRef( new PolicyAnd( { IRepPolicyRef(new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))) } ) ), + Reference( new PolicyAnd( { Reference(new PolicyAcross(2, "sz", Reference(new PolicyOne()))), Reference(new PolicyAcross(2, "sz", Reference(new PolicyOne()))) } ) ), // 9 '(dc^1 x sz^2 x 1)' - IRepPolicyRef( new PolicyAcross(1, "dc", IRepPolicyRef( new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))))), + Reference( new PolicyAcross(1, "dc", Reference( new PolicyAcross(2, "sz", Reference(new PolicyOne()))))), //10 '(dc^2 x sz^2 x 1)' - IRepPolicyRef( new PolicyAcross(2, "dc", IRepPolicyRef( new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))))), + Reference( new PolicyAcross(2, "dc", Reference( new PolicyAcross(2, "sz", Reference(new PolicyOne()))))), //11 '(dc^1 x sz^2 x 1) + (dc^2 x sz^2 x 1)' - IRepPolicyRef( new PolicyAnd( { IRepPolicyRef(new PolicyAcross(1, "dc", IRepPolicyRef( new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))))), IRepPolicyRef(new PolicyAcross(2, "dc", IRepPolicyRef( new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))))) } ) ), + Reference( new PolicyAnd( { Reference(new PolicyAcross(1, "dc", Reference( new PolicyAcross(2, "sz", Reference(new PolicyOne()))))), Reference(new PolicyAcross(2, "dc", Reference( new PolicyAcross(2, "sz", Reference(new PolicyOne()))))) } ) ), //12 '(dc^2 x sz^2 x 1) + (dc^1 x sz^2 x 1)' - IRepPolicyRef( new PolicyAnd( { IRepPolicyRef(new PolicyAcross(2, "dc", IRepPolicyRef( new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))))), IRepPolicyRef(new PolicyAcross(1, "dc", IRepPolicyRef( new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))))) } ) ), + Reference( new PolicyAnd( { Reference(new PolicyAcross(2, "dc", Reference( new PolicyAcross(2, "sz", Reference(new PolicyOne()))))), Reference(new PolicyAcross(1, "dc", Reference( new PolicyAcross(2, "sz", Reference(new PolicyOne()))))) } ) ), //13 '(sz^2 x 1) + (dc^1 x sz^2 x 1)' - IRepPolicyRef( new PolicyAnd( { IRepPolicyRef(new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(1, "dc", IRepPolicyRef( new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))))) } ) ), + Reference( new PolicyAnd( { Reference(new PolicyAcross(2, "sz", Reference(new PolicyOne()))), Reference(new PolicyAcross(1, "dc", Reference( new PolicyAcross(2, "sz", Reference(new PolicyOne()))))) } ) ), //14 '(sz^2 x 1) + (dc^2 x sz^2 x 1)' - IRepPolicyRef( new PolicyAnd( { IRepPolicyRef(new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(2, "dc", IRepPolicyRef( new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))))) } ) ), + Reference( new PolicyAnd( { Reference(new PolicyAcross(2, "sz", Reference(new PolicyOne()))), Reference(new PolicyAcross(2, "dc", Reference( new PolicyAcross(2, "sz", Reference(new PolicyOne()))))) } ) ), //15 '(sz^3 x 1) + (dc^2 x sz^2 x 1)' - IRepPolicyRef( new PolicyAnd( { IRepPolicyRef(new PolicyAcross(3, "sz", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(2, "dc", IRepPolicyRef( new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))))) } ) ), + Reference( new PolicyAnd( { Reference(new PolicyAcross(3, "sz", Reference(new PolicyOne()))), Reference(new PolicyAcross(2, "dc", Reference( new PolicyAcross(2, "sz", Reference(new PolicyOne()))))) } ) ), //16 '(sz^1 x 1) + (sz^2 x 1)' - IRepPolicyRef( new PolicyAnd( { IRepPolicyRef(new PolicyAcross(1, "sz", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))) } ) ), + Reference( new PolicyAnd( { Reference(new PolicyAcross(1, "sz", Reference(new PolicyOne()))), Reference(new PolicyAcross(2, "sz", Reference(new PolicyOne()))) } ) ), //17 '(sz^2 x 1) + (sz^3 x 1)' - IRepPolicyRef( new PolicyAnd( { IRepPolicyRef(new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(3, "sz", IRepPolicyRef(new PolicyOne()))) } ) ), + Reference( new PolicyAnd( { Reference(new PolicyAcross(2, "sz", Reference(new PolicyOne()))), Reference(new PolicyAcross(3, "sz", Reference(new PolicyOne()))) } ) ), //18 '(sz^1 x 1) + (sz^2 x 1) + (sz^3 x 1)' - IRepPolicyRef( new PolicyAnd( { IRepPolicyRef(new PolicyAcross(1, "sz", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(3, "sz", IRepPolicyRef(new PolicyOne()))) } ) ), + Reference( new PolicyAnd( { Reference(new PolicyAcross(1, "sz", Reference(new PolicyOne()))), Reference(new PolicyAcross(2, "sz", Reference(new PolicyOne()))), Reference(new PolicyAcross(3, "sz", Reference(new PolicyOne()))) } ) ), //19 '(sz^1 x 1) + (machine^1 x 1)' - IRepPolicyRef( new PolicyAnd( { IRepPolicyRef(new PolicyAcross(1, "sz", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(1, "zoneid", IRepPolicyRef(new PolicyOne()))) } ) ), + Reference( new PolicyAnd( { Reference(new PolicyAcross(1, "sz", Reference(new PolicyOne()))), Reference(new PolicyAcross(1, "zoneid", Reference(new PolicyOne()))) } ) ), // '(dc^1 x 1) + (sz^1 x 1) + (machine^1 x 1)' - // IRepPolicyRef( new PolicyAnd( { IRepPolicyRef(new PolicyAcross(1, "dc", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(1, "sz", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(1, "zoneid", IRepPolicyRef(new PolicyOne()))) } ) ), + // Reference( new PolicyAnd( { Reference(new PolicyAcross(1, "dc", Reference(new PolicyOne()))), Reference(new PolicyAcross(1, "sz", Reference(new PolicyOne()))), Reference(new PolicyAcross(1, "zoneid", Reference(new PolicyOne()))) } ) ), // '(dc^1 x sz^3 x 1)' - IRepPolicyRef( new PolicyAcross(1, "dc", IRepPolicyRef( new PolicyAcross(3, "sz", IRepPolicyRef(new PolicyOne())))) ), + Reference( new PolicyAcross(1, "dc", Reference( new PolicyAcross(3, "sz", Reference(new PolicyOne())))) ), // '(dc^2 x sz^3 x 1)' - IRepPolicyRef( new PolicyAcross(2, "dc", IRepPolicyRef( new PolicyAcross(3, "sz", IRepPolicyRef(new PolicyOne())))) ), + Reference( new PolicyAcross(2, "dc", Reference( new PolicyAcross(3, "sz", Reference(new PolicyOne())))) ), // '(dc^2 x az^3 x 1)' - IRepPolicyRef( new PolicyAcross(2, "dc", IRepPolicyRef( new PolicyAcross(3, "az", IRepPolicyRef(new PolicyOne())))) ), + Reference( new PolicyAcross(2, "dc", Reference( new PolicyAcross(3, "az", Reference(new PolicyOne())))) ), // '(sz^1 x 1) + (dc^2 x az^3 x 1)' - IRepPolicyRef( new PolicyAnd({IRepPolicyRef(new PolicyAcross(1, "sz", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(2, "dc", IRepPolicyRef( new PolicyAcross(3, "az", IRepPolicyRef(new PolicyOne())))))}) ), + Reference( new PolicyAnd({Reference(new PolicyAcross(1, "sz", Reference(new PolicyOne()))), Reference(new PolicyAcross(2, "dc", Reference( new PolicyAcross(3, "az", Reference(new PolicyOne())))))}) ), // 'dc^1 x (az^2 x 1) + (sz^2 x 1)' - // IRepPolicyRef( new PolicyAcross(1, "dc", IRepPolicyRef(new PolicyAnd({IRepPolicyRef(new PolicyAcross(2, "az", IRepPolicyRef(new PolicyOne()))), IRepPolicyRef(new PolicyAcross(2, "sz", IRepPolicyRef(new PolicyOne())))}))) ), + // Reference( new PolicyAcross(1, "dc", Reference(new PolicyAnd({Reference(new PolicyAcross(2, "az", Reference(new PolicyOne()))), Reference(new PolicyAcross(2, "sz", Reference(new PolicyOne())))}))) ), // Require backtracking - IRepPolicyRef( new PolicyAcross(8, "zoneid", IRepPolicyRef(new PolicyAcross(1, "az", IRepPolicyRef(new PolicyOne()))) ) ), - IRepPolicyRef( new PolicyAcross(8, "zoneid", IRepPolicyRef(new PolicyAcross(1, "sz", IRepPolicyRef(new PolicyOne()))) ) ) + Reference( new PolicyAcross(8, "zoneid", Reference(new PolicyAcross(1, "az", Reference(new PolicyOne()))) ) ), + Reference( new PolicyAcross(8, "zoneid", Reference(new PolicyAcross(1, "sz", Reference(new PolicyOne()))) ) ) }; } return staticPolicies; } -IRepPolicyRef const randomAcrossPolicy(LocalitySet const& serverSet) +Reference const randomAcrossPolicy(LocalitySet const& serverSet) { int usedKeyTotal, keysUsed, keyIndex, valueTotal, maxValueTotal, maxKeyTotal, skips, lastKeyIndex; std::vector keyArray(serverSet.getGroupKeyMap()->_lookuparray); @@ -616,7 +616,7 @@ IRepPolicyRef const randomAcrossPolicy(LocalitySet const& serverSet) AttribKey indexKey; Optional keyValue; std::string keyText; - IRepPolicyRef policy(new PolicyOne()); + Reference policy(new PolicyOne()); // Determine the number of keys to used within the policy usedKeyTotal = g_random->randomInt(1, keyArray.size()+1); @@ -669,7 +669,7 @@ IRepPolicyRef const randomAcrossPolicy(LocalitySet const& serverSet) } valueTotal = g_random->randomInt(1, valueSet.size()+2); if ((valueTotal > maxValueTotal) && (g_random->random01() > .25)) valueTotal = maxValueTotal; - policy = IRepPolicyRef( new PolicyAcross(valueTotal, keyText, policy) ); + policy = Reference( new PolicyAcross(valueTotal, keyText, policy) ); if (g_replicationdebug > 1) { printf(" item%3d: (%3d =>%3d) %-10s =>%4d\n", keysUsed+1, keyIndex, indexKey._id, keyText.c_str(), valueTotal); } @@ -725,8 +725,8 @@ int testReplication() int policyMin = policyMinEnv ? atoi(policyMinEnv) : 2; int policyIndex, testCounter, alsoSize, debugBackup, maxAlsoSize; std::vector serverIndexes; - LocalitySetRef testServers; - std::vector policies; + Reference testServers; + std::vector> policies; std::vector alsoServers, bestSet; int totalErrors = 0; @@ -819,12 +819,12 @@ void filterLocalityDataForPolicy(const std::set& keys, LocalityData } } -void filterLocalityDataForPolicy(IRepPolicyRef policy, LocalityData* ld) { +void filterLocalityDataForPolicy(Reference policy, LocalityData* ld) { if (!policy) return; filterLocalityDataForPolicy(policy->attributeKeys(), ld); } -void filterLocalityDataForPolicy(IRepPolicyRef policy, std::vector* vld) { +void filterLocalityDataForPolicy(Reference policy, std::vector* vld) { if (!policy) return; std::set keys = policy->attributeKeys(); for (LocalityData& ld : *vld) { diff --git a/fdbrpc/ReplicationUtils.h b/fdbrpc/ReplicationUtils.h index f359e7489f..f9f1987e78 100644 --- a/fdbrpc/ReplicationUtils.h +++ b/fdbrpc/ReplicationUtils.h @@ -34,22 +34,22 @@ extern repTestType convertToTestType(int iValue); extern int testReplication(); extern double ratePolicy( - LocalitySetRef & localitySet, - IRepPolicyRef const& policy, + Reference & localitySet, + Reference const& policy, unsigned int nSelectTests); extern bool findBestPolicySet( std::vector& bestResults, - LocalitySetRef & localitySet, - IRepPolicyRef const& policy, + Reference & localitySet, + Reference const& policy, unsigned int nMinItems, unsigned int nSelectTests, unsigned int nPolicyTests); extern bool findBestUniquePolicySet( std::vector& bestResults, - LocalitySetRef & localitySet, - IRepPolicyRef const& policy, + Reference & localitySet, + Reference const& policy, StringRef localityUniquenessKey, unsigned int nMinItems, unsigned int nSelectTests, @@ -60,20 +60,20 @@ extern bool findBestUniquePolicySet( extern bool validateAllCombinations( std::vector & offendingCombo, LocalityGroup const& localitySet, - IRepPolicyRef const& policy, + Reference const& policy, std::vector const& newItems, unsigned int nCombinationSize, bool bCheckIfValid = true); extern bool validateAllCombinations( LocalityGroup const& localitySet, - IRepPolicyRef const& policy, + Reference const& policy, std::vector const& newItems, unsigned int nCombinationSize, bool bCheckIfValid = true); /// Remove all pieces of locality information from the LocalityData that will not be used when validating the policy. -void filterLocalityDataForPolicy(IRepPolicyRef policy, LocalityData* ld); -void filterLocalityDataForPolicy(IRepPolicyRef policy, std::vector* vld); +void filterLocalityDataForPolicy(Reference policy, LocalityData* ld); +void filterLocalityDataForPolicy(Reference policy, std::vector* vld); #endif diff --git a/fdbrpc/simulator.h b/fdbrpc/simulator.h index 2987c80655..7cb645e70f 100644 --- a/fdbrpc/simulator.h +++ b/fdbrpc/simulator.h @@ -280,11 +280,11 @@ public: std::set protectedAddresses; std::map currentlyRebootingProcesses; class ClusterConnectionString* extraDB; - IRepPolicyRef storagePolicy; - IRepPolicyRef tLogPolicy; + Reference storagePolicy; + Reference tLogPolicy; int32_t tLogWriteAntiQuorum; Optional> primaryDcId; - IRepPolicyRef remoteTLogPolicy; + Reference remoteTLogPolicy; int32_t usableRegions; std::string disablePrimary; std::string disableRemote; @@ -292,8 +292,8 @@ public: bool allowLogSetKills; Optional> remoteDcId; bool hasSatelliteReplication; - IRepPolicyRef satelliteTLogPolicy; - IRepPolicyRef satelliteTLogPolicyFallback; + Reference satelliteTLogPolicy; + Reference satelliteTLogPolicyFallback; int32_t satelliteTLogWriteAntiQuorum; int32_t satelliteTLogWriteAntiQuorumFallback; std::vector>> primarySatelliteDcIds; diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index ca805e0db1..40556f0c40 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -234,10 +234,10 @@ public: throw no_more_servers(); } - std::vector getWorkersForSeedServers( DatabaseConfiguration const& conf, IRepPolicyRef const& policy, Optional>> const& dcId = Optional>>() ) { + std::vector getWorkersForSeedServers( DatabaseConfiguration const& conf, Reference const& policy, Optional>> const& dcId = Optional>>() ) { std::map> fitness_workers; std::vector results; - LocalitySetRef logServerSet = Reference(new LocalityMap()); + Reference logServerSet = Reference(new LocalityMap()); LocalityMap* logServerMap = (LocalityMap*) logServerSet.getPtr(); bool bCompleted = false; @@ -275,11 +275,11 @@ public: return results; } - std::vector getWorkersForTlogs( DatabaseConfiguration const& conf, int32_t required, int32_t desired, IRepPolicyRef const& policy, std::map< Optional>, int>& id_used, bool checkStable = false, std::set> dcIds = std::set>() ) { + std::vector getWorkersForTlogs( DatabaseConfiguration const& conf, int32_t required, int32_t desired, Reference const& policy, std::map< Optional>, int>& id_used, bool checkStable = false, std::set> dcIds = std::set>() ) { std::map, vector> fitness_workers; std::vector results; std::vector unavailableLocals; - LocalitySetRef logServerSet; + Reference logServerSet; LocalityMap* logServerMap; bool bCompleted = false; diff --git a/fdbserver/DBCoreState.h b/fdbserver/DBCoreState.h index 1b6d3e3bc1..ebaaff8c73 100644 --- a/fdbserver/DBCoreState.h +++ b/fdbserver/DBCoreState.h @@ -41,7 +41,7 @@ struct CoreTLogSet { int32_t tLogWriteAntiQuorum; // The write anti quorum previously used to write to tLogs, which might be different from the anti quorum suggested by the current configuration going forward! int32_t tLogReplicationFactor; // The replication factor previously used to write to tLogs, which might be different from the current configuration std::vector< LocalityData > tLogLocalities; // Stores the localities of the log servers - IRepPolicyRef tLogPolicy; + Reference tLogPolicy; bool isLocal; int8_t locality; Version startVersion; diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index f57e5c6b13..ead66847d1 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -3698,7 +3698,7 @@ ACTOR Future dataDistributor(DataDistributorInterface di, Reference policy, int processCount) { Database database = DatabaseContext::create( Reference>(new AsyncVar()), Never(), @@ -3740,7 +3740,7 @@ DDTeamCollection* testTeamCollection(int teamSize, IRepPolicyRef policy, int pro return collection; } -DDTeamCollection* testMachineTeamCollection(int teamSize, IRepPolicyRef policy, int processCount) { +DDTeamCollection* testMachineTeamCollection(int teamSize, Reference policy, int processCount) { Database database = DatabaseContext::create(Reference>(new AsyncVar()), Never(), LocalityData(), false); @@ -3792,7 +3792,7 @@ TEST_CASE("DataDistribution/AddTeamsBestOf/UseMachineID") { int desiredTeams = SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * processSize; int maxTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * processSize; - IRepPolicyRef policy = IRepPolicyRef(new PolicyAcross(teamSize, "zoneid", IRepPolicyRef(new PolicyOne()))); + Reference policy = Reference(new PolicyAcross(teamSize, "zoneid", Reference(new PolicyOne()))); state DDTeamCollection* collection = testMachineTeamCollection(teamSize, policy, processSize); int result = collection->addTeamsBestOf(30, desiredTeams, maxTeams); @@ -3812,7 +3812,7 @@ TEST_CASE("DataDistribution/AddTeamsBestOf/NotUseMachineID") { int desiredTeams = SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * processSize; int maxTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * processSize; - IRepPolicyRef policy = IRepPolicyRef(new PolicyAcross(teamSize, "zoneid", IRepPolicyRef(new PolicyOne()))); + Reference policy = Reference(new PolicyAcross(teamSize, "zoneid", Reference(new PolicyOne()))); state DDTeamCollection* collection = testMachineTeamCollection(teamSize, policy, processSize); if (collection == NULL) { @@ -3830,7 +3830,7 @@ TEST_CASE("DataDistribution/AddTeamsBestOf/NotUseMachineID") { } TEST_CASE("DataDistribution/AddAllTeams/isExhaustive") { - IRepPolicyRef policy = IRepPolicyRef(new PolicyAcross(3, "zoneid", IRepPolicyRef(new PolicyOne()))); + Reference policy = Reference(new PolicyAcross(3, "zoneid", Reference(new PolicyOne()))); state int processSize = 10; state int desiredTeams = SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * processSize; state int maxTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * processSize; @@ -3849,7 +3849,7 @@ TEST_CASE("DataDistribution/AddAllTeams/isExhaustive") { } TEST_CASE("/DataDistribution/AddAllTeams/withLimit") { - IRepPolicyRef policy = IRepPolicyRef(new PolicyAcross(3, "zoneid", IRepPolicyRef(new PolicyOne()))); + Reference policy = Reference(new PolicyAcross(3, "zoneid", Reference(new PolicyOne()))); state int processSize = 10; state int desiredTeams = SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * processSize; state int maxTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * processSize; @@ -3867,7 +3867,7 @@ TEST_CASE("/DataDistribution/AddAllTeams/withLimit") { TEST_CASE("/DataDistribution/AddTeamsBestOf/SkippingBusyServers") { wait(Future(Void())); - IRepPolicyRef policy = IRepPolicyRef(new PolicyAcross(3, "zoneid", IRepPolicyRef(new PolicyOne()))); + Reference policy = Reference(new PolicyAcross(3, "zoneid", Reference(new PolicyOne()))); state int processSize = 10; state int desiredTeams = SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * processSize; state int maxTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * processSize; @@ -3897,7 +3897,7 @@ TEST_CASE("/DataDistribution/AddTeamsBestOf/SkippingBusyServers") { TEST_CASE("/DataDistribution/AddTeamsBestOf/NotEnoughServers") { wait(Future(Void())); - IRepPolicyRef policy = IRepPolicyRef(new PolicyAcross(3, "zoneid", IRepPolicyRef(new PolicyOne()))); + Reference policy = Reference(new PolicyAcross(3, "zoneid", Reference(new PolicyOne()))); state int processSize = 5; state int desiredTeams = SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * processSize; state int maxTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * processSize; diff --git a/fdbserver/LogSystem.h b/fdbserver/LogSystem.h index 8b8dc7e8dc..3ed53b0475 100644 --- a/fdbserver/LogSystem.h +++ b/fdbserver/LogSystem.h @@ -40,8 +40,8 @@ public: int32_t tLogReplicationFactor; std::vector< LocalityData > tLogLocalities; // Stores the localities of the log servers TLogVersion tLogVersion; - IRepPolicyRef tLogPolicy; - LocalitySetRef logServerSet; + Reference tLogPolicy; + Reference logServerSet; std::vector logIndexArray; std::vector logEntryArray; bool isLocal; @@ -84,7 +84,7 @@ public: used_servers.insert(std::make_pair(0,i)); } - LocalitySetRef serverSet = Reference(new LocalityMap>()); + Reference serverSet = Reference(new LocalityMap>()); LocalityMap>* serverMap = (LocalityMap>*) serverSet.getPtr(); std::vector> resultPairs; for(int loc = 0; loc < satelliteTagLocations.size(); loc++) { @@ -189,7 +189,7 @@ public: void updateLocalitySet( vector const& localities ) { LocalityMap* logServerMap; - logServerSet = LocalitySetRef(new LocalityMap()); + logServerSet = Reference(new LocalityMap()); logServerMap = (LocalityMap*) logServerSet.getPtr(); logEntryArray.clear(); @@ -412,7 +412,7 @@ struct ILogSystem { int tLogReplicationFactor; MergedPeekCursor( vector< Reference > const& serverCursors, Version begin ); - MergedPeekCursor( std::vector>>> const& logServers, int bestServer, int readQuorum, Tag tag, Version begin, Version end, bool parallelGetMore, std::vector const& tLogLocalities, IRepPolicyRef const tLogPolicy, int tLogReplicationFactor ); + MergedPeekCursor( std::vector>>> const& logServers, int bestServer, int readQuorum, Tag tag, Version begin, Version end, bool parallelGetMore, std::vector const& tLogLocalities, Reference const tLogPolicy, int tLogReplicationFactor ); MergedPeekCursor( vector< Reference > const& serverCursors, LogMessageVersion const& messageVersion, int bestServer, int readQuorum, Optional nextVersion, Reference logSet, int tLogReplicationFactor ); virtual Reference cloneNoMore(); diff --git a/fdbserver/LogSystemConfig.h b/fdbserver/LogSystemConfig.h index 3c24dc84b5..6890726579 100644 --- a/fdbserver/LogSystemConfig.h +++ b/fdbserver/LogSystemConfig.h @@ -61,7 +61,7 @@ struct TLogSet { int32_t tLogWriteAntiQuorum, tLogReplicationFactor; std::vector< LocalityData > tLogLocalities; // Stores the localities of the log servers TLogVersion tLogVersion; - IRepPolicyRef tLogPolicy; + Reference tLogPolicy; bool isLocal; int8_t locality; Version startVersion; diff --git a/fdbserver/LogSystemPeekCursor.actor.cpp b/fdbserver/LogSystemPeekCursor.actor.cpp index 1351fb207f..feb4a2e8a0 100644 --- a/fdbserver/LogSystemPeekCursor.actor.cpp +++ b/fdbserver/LogSystemPeekCursor.actor.cpp @@ -273,7 +273,7 @@ ILogSystem::MergedPeekCursor::MergedPeekCursor( vector< Reference>>> const& logServers, int bestServer, int readQuorum, Tag tag, Version begin, Version end, - bool parallelGetMore, std::vector< LocalityData > const& tLogLocalities, IRepPolicyRef const tLogPolicy, int tLogReplicationFactor ) + bool parallelGetMore, std::vector< LocalityData > const& tLogLocalities, Reference const tLogPolicy, int tLogReplicationFactor ) : bestServer(bestServer), readQuorum(readQuorum), tag(tag), currentCursor(0), hasNextMessage(false), messageVersion(begin), randomID(g_random->randomUniqueID()), tLogReplicationFactor(tLogReplicationFactor) { if(tLogPolicy) { logSet = Reference( new LogSet() ); diff --git a/fdbserver/TagPartitionedLogSystem.actor.cpp b/fdbserver/TagPartitionedLogSystem.actor.cpp index 262d5a3449..3ecaa26e16 100644 --- a/fdbserver/TagPartitionedLogSystem.actor.cpp +++ b/fdbserver/TagPartitionedLogSystem.actor.cpp @@ -531,12 +531,12 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted= lastBegin) { TraceEvent("TLogPeekRemoteBestOnly", dbgid).detail("Tag", tag.toString()).detail("Begin", begin).detail("BestSet", bestSet).detail("BestSetStart", lastBegin).detail("LogRouterIds", tLogs[bestSet]->logRouterString()); - return Reference( new ILogSystem::MergedPeekCursor( tLogs[bestSet]->logRouters, -1, (int)tLogs[bestSet]->logRouters.size(), tag, begin, getPeekEnd(), false, std::vector(), IRepPolicyRef(), 0 ) ); + return Reference( new ILogSystem::MergedPeekCursor( tLogs[bestSet]->logRouters, -1, (int)tLogs[bestSet]->logRouters.size(), tag, begin, getPeekEnd(), false, std::vector(), Reference(), 0 ) ); } else { std::vector< Reference > cursors; std::vector< LogMessageVersion > epochEnds; TraceEvent("TLogPeekRemoteAddingBest", dbgid).detail("Tag", tag.toString()).detail("Begin", begin).detail("BestSet", bestSet).detail("BestSetStart", lastBegin).detail("LogRouterIds", tLogs[bestSet]->logRouterString()); - cursors.push_back( Reference( new ILogSystem::MergedPeekCursor( tLogs[bestSet]->logRouters, -1, (int)tLogs[bestSet]->logRouters.size(), tag, lastBegin, getPeekEnd(), false, std::vector(), IRepPolicyRef(), 0 ) ) ); + cursors.push_back( Reference( new ILogSystem::MergedPeekCursor( tLogs[bestSet]->logRouters, -1, (int)tLogs[bestSet]->logRouters.size(), tag, lastBegin, getPeekEnd(), false, std::vector(), Reference(), 0 ) ) ); int i = 0; while(begin < lastBegin) { if(i == oldLogData.size()) { @@ -565,7 +565,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedlogRouterString()) .detail("LastBegin", lastBegin).detail("ThisBegin", thisBegin).detail("BestStartVer", oldLogData[i].tLogs[bestOldSet]->startVersion); cursors.push_back( Reference( new ILogSystem::MergedPeekCursor( oldLogData[i].tLogs[bestOldSet]->logRouters, -1, (int)oldLogData[i].tLogs[bestOldSet]->logRouters.size(), tag, - thisBegin, lastBegin, false, std::vector(), IRepPolicyRef(), 0 ) ) ); + thisBegin, lastBegin, false, std::vector(), Reference(), 0 ) ) ); epochEnds.push_back(LogMessageVersion(lastBegin)); lastBegin = thisBegin; } @@ -1566,7 +1566,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted recruitOldLogRouters( TagPartitionedLogSystem* self, vector workers, LogEpoch recoveryCount, int8_t locality, Version startVersion, - std::vector tLogLocalities, IRepPolicyRef tLogPolicy, bool forRemote ) { + std::vector tLogLocalities, Reference tLogPolicy, bool forRemote ) { state vector>> logRouterInitializationReplies; state vector> allReplies; int nextRouter = 0; diff --git a/fdbserver/WorkerInterface.actor.h b/fdbserver/WorkerInterface.actor.h index eef28a8cfe..12481e0596 100644 --- a/fdbserver/WorkerInterface.actor.h +++ b/fdbserver/WorkerInterface.actor.h @@ -120,7 +120,7 @@ struct InitializeLogRouterRequest { Tag routerTag; Version startVersion; std::vector tLogLocalities; - IRepPolicyRef tLogPolicy; + Reference tLogPolicy; int8_t locality; ReplyPromise reply; From 7f480253486939e3921457e3d8d64764f0566a0b Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Wed, 13 Mar 2019 14:47:17 -0700 Subject: [PATCH 35/47] optimize confirm epoch alive --- fdbserver/TagPartitionedLogSystem.actor.cpp | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/fdbserver/TagPartitionedLogSystem.actor.cpp b/fdbserver/TagPartitionedLogSystem.actor.cpp index 3ecaa26e16..87da4bcfc1 100644 --- a/fdbserver/TagPartitionedLogSystem.actor.cpp +++ b/fdbserver/TagPartitionedLogSystem.actor.cpp @@ -959,24 +959,17 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedtLogReplicationFactor, numPresent - logSet->tLogWriteAntiQuorum) ) ); - state Reference locked(new LocalityGroup()); - state std::vector responded(alive.size()); - for (int i = 0; i < alive.size(); i++) { - responded[i] = false; - } + state std::vector locked; + state std::vector responded(alive.size(), false); loop { for (int i = 0; i < alive.size(); i++) { if (!responded[i] && alive[i].isReady() && !alive[i].isError()) { - locked->add(logSet->tLogLocalities[i]); + locked.push_back(logSet->logEntryArray[i]); responded[i] = true; } } - bool quorum_obtained = locked->validate(logSet->tLogPolicy); - // We intentionally skip considering antiquorums, as the CPU cost of doing so is prohibitive. - if (logSet->tLogReplicationFactor == 1 && locked->size() > 0) { - ASSERT(quorum_obtained); - } - if (quorum_obtained) { + + if (logSet->satisfiesPolicy(locked)) { return Void(); } From e8cb85ed8e190ead1c8af1e10f5bd6ee6dacd012 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Wed, 13 Mar 2019 14:47:35 -0700 Subject: [PATCH 36/47] optimize validateAllCombinations --- fdbrpc/ReplicationUtils.cpp | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/fdbrpc/ReplicationUtils.cpp b/fdbrpc/ReplicationUtils.cpp index d2c7e734a0..f4fd1770f7 100644 --- a/fdbrpc/ReplicationUtils.cpp +++ b/fdbrpc/ReplicationUtils.cpp @@ -287,24 +287,38 @@ bool validateAllCombinations( else { bool bIsValidGroup; - LocalityGroup localityGroup; - std::string bitmask(nCombinationSize, 1); // K leading 1's + Reference localSet = Reference( new LocalityGroup() ); + LocalityGroup* localGroup = (LocalityGroup*) localSet.getPtr(); + localGroup->deep_copy(localitySet); + std::vector originalEntries = localGroup->getEntries(); + + for (int i = 0; i < newItems.size(); ++i) { + localGroup->add(newItems[i]); + } + + std::string bitmask(nCombinationSize, 1); // K leading 1's bitmask.resize(newItems.size(), 0); // N-K trailing 0's + std::vector localityGroupEntries; + std::vector resultEntries; do { - localityGroup.deep_copy(localitySet); - + localityGroupEntries = originalEntries; // [0..N-1] integers - for (int i = 0; i < newItems.size(); ++i) { + for (int i = 0; i < bitmask.size(); ++i) { if (bitmask[i]) { - localityGroup.add(newItems[i]); + localityGroupEntries.push_back(localGroup->getEntry(originalEntries.size() + i)); } } - // Check if the group combination passes validation - bIsValidGroup = localityGroup.validate(policy); + resultEntries.clear(); + + // Run the policy, assert if unable to satisfy + bool result = localSet->selectReplicas(policy, localityGroupEntries, resultEntries); + ASSERT(result); + + bIsValidGroup = resultEntries.size() == 0; if (((bCheckIfValid) && (!bIsValidGroup) ) || @@ -319,7 +333,7 @@ bool validateAllCombinations( } if (g_replicationdebug > 2) { printf("Invalid group\n"); - localityGroup.DisplayEntries(); + localGroup->DisplayEntries(); } if (g_replicationdebug > 3) { printf("Full set\n"); From e7d1f9e5f14d8725ff8dbdee9a705d92e1e389d5 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Wed, 13 Mar 2019 15:59:03 -0700 Subject: [PATCH 37/47] fixed review comments --- fdbrpc/ReplicationUtils.cpp | 8 ++++---- fdbserver/TagPartitionedLogSystem.actor.cpp | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fdbrpc/ReplicationUtils.cpp b/fdbrpc/ReplicationUtils.cpp index f4fd1770f7..6c6099c107 100644 --- a/fdbrpc/ReplicationUtils.cpp +++ b/fdbrpc/ReplicationUtils.cpp @@ -291,7 +291,8 @@ bool validateAllCombinations( LocalityGroup* localGroup = (LocalityGroup*) localSet.getPtr(); localGroup->deep_copy(localitySet); - std::vector originalEntries = localGroup->getEntries(); + std::vector localityGroupEntries = localGroup->getEntries(); + int originalSize = localityGroupEntries.size(); for (int i = 0; i < newItems.size(); ++i) { localGroup->add(newItems[i]); @@ -300,15 +301,14 @@ bool validateAllCombinations( std::string bitmask(nCombinationSize, 1); // K leading 1's bitmask.resize(newItems.size(), 0); // N-K trailing 0's - std::vector localityGroupEntries; std::vector resultEntries; do { - localityGroupEntries = originalEntries; + localityGroupEntries.resize(originalSize); // [0..N-1] integers for (int i = 0; i < bitmask.size(); ++i) { if (bitmask[i]) { - localityGroupEntries.push_back(localGroup->getEntry(originalEntries.size() + i)); + localityGroupEntries.push_back(localGroup->getEntry(originalSize + i)); } } diff --git a/fdbserver/TagPartitionedLogSystem.actor.cpp b/fdbserver/TagPartitionedLogSystem.actor.cpp index 87da4bcfc1..0b1c4787bd 100644 --- a/fdbserver/TagPartitionedLogSystem.actor.cpp +++ b/fdbserver/TagPartitionedLogSystem.actor.cpp @@ -959,17 +959,17 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedtLogReplicationFactor, numPresent - logSet->tLogWriteAntiQuorum) ) ); - state std::vector locked; + state std::vector aliveEntries; state std::vector responded(alive.size(), false); loop { for (int i = 0; i < alive.size(); i++) { if (!responded[i] && alive[i].isReady() && !alive[i].isError()) { - locked.push_back(logSet->logEntryArray[i]); + aliveEntries.push_back(logSet->logEntryArray[i]); responded[i] = true; } } - if (logSet->satisfiesPolicy(locked)) { + if (logSet->satisfiesPolicy(aliveEntries)) { return Void(); } From e30e2af1f3169187843049956082f5cdd508d395 Mon Sep 17 00:00:00 2001 From: Meng Xu Date: Wed, 13 Mar 2019 16:54:56 -0700 Subject: [PATCH 38/47] ClientKnobs: Add CHECK_CONNECTED_COORDINATOR_NUM_DELAY --- fdbclient/Knobs.cpp | 3 +++ fdbclient/Knobs.h | 3 +++ fdbclient/NativeAPI.actor.cpp | 5 +++-- fdbserver/Knobs.h | 1 + 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fdbclient/Knobs.cpp b/fdbclient/Knobs.cpp index c166d3aedc..5168333bb0 100644 --- a/fdbclient/Knobs.cpp +++ b/fdbclient/Knobs.cpp @@ -192,4 +192,7 @@ ClientKnobs::ClientKnobs(bool randomize) { init( CONSISTENCY_CHECK_RATE_LIMIT_MAX, 50e6 ); init( CONSISTENCY_CHECK_ONE_ROUND_TARGET_COMPLETION_TIME, 7 * 24 * 60 * 60 ); // 7 days init( CONSISTENCY_CHECK_RATE_WINDOW, 1.0 ); + + // TLS related + init( CHECK_CONNECTED_COORDINATOR_NUM_DELAY, 1.0 ); if( randomize && BUGGIFY ) CHECK_CONNECTED_COORDINATOR_NUM_DELAY = g_random->random01() * 60.0; // In seconds } diff --git a/fdbclient/Knobs.h b/fdbclient/Knobs.h index 7a11ae1616..e7999cc588 100644 --- a/fdbclient/Knobs.h +++ b/fdbclient/Knobs.h @@ -183,6 +183,9 @@ public: int CONSISTENCY_CHECK_ONE_ROUND_TARGET_COMPLETION_TIME; int CONSISTENCY_CHECK_RATE_WINDOW; + // TLS related + int CHECK_CONNECTED_COORDINATOR_NUM_DELAY; + ClientKnobs(bool randomize = false); }; diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 7e37cfa643..f1ad5a5c93 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -35,6 +35,7 @@ #include "fdbclient/MutationList.h" #include "fdbclient/CoordinationInterface.h" #include "fdbclient/MonitorLeader.h" +#include "fdbclient/Knobs.h" #if defined(CMAKE_BUILD) || !defined(WIN32) #include "versions.h" #endif @@ -592,7 +593,7 @@ Database DatabaseContext::create(Reference>> Reference> connectedCoordinatorsNumDelayed(new AsyncVar(0)); Reference cluster(new Cluster(connFile, clusterInterface, connectedCoordinatorsNum)); Reference> clientInfo(new AsyncVar()); - Future clientInfoMonitor = delayedAsyncVar(connectedCoordinatorsNum, connectedCoordinatorsNumDelayed, 1.0) || monitorClientInfo(clusterInterface, connFile, clientInfo, connectedCoordinatorsNumDelayed); + Future clientInfoMonitor = delayedAsyncVar(connectedCoordinatorsNum, connectedCoordinatorsNumDelayed, CLIENT_KNOBS->CHECK_CONNECTED_COORDINATOR_NUM_DELAY) || monitorClientInfo(clusterInterface, connFile, clientInfo, connectedCoordinatorsNumDelayed); return Database(new DatabaseContext(cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false)); } @@ -761,7 +762,7 @@ Database Database::createDatabase( Reference connFile, in Reference> connectedCoordinatorsNumDelayed(new AsyncVar(0)); Reference cluster(new Cluster(connFile, connectedCoordinatorsNum, apiVersion)); Reference> clientInfo(new AsyncVar()); - Future clientInfoMonitor = delayedAsyncVar(connectedCoordinatorsNum, connectedCoordinatorsNumDelayed, 1.0) || monitorClientInfo(cluster->getClusterInterface(), connFile, clientInfo, connectedCoordinatorsNumDelayed); + Future clientInfoMonitor = delayedAsyncVar(connectedCoordinatorsNum, connectedCoordinatorsNumDelayed, CLIENT_KNOBS->CHECK_CONNECTED_COORDINATOR_NUM_DELAY) || monitorClientInfo(cluster->getClusterInterface(), connFile, clientInfo, connectedCoordinatorsNumDelayed); return Database( new DatabaseContext( cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false, apiVersion ) ); } diff --git a/fdbserver/Knobs.h b/fdbserver/Knobs.h index f3698b3561..4fdc8a0152 100644 --- a/fdbserver/Knobs.h +++ b/fdbserver/Knobs.h @@ -376,6 +376,7 @@ public: int64_t TIME_KEEPER_DELAY; int64_t TIME_KEEPER_MAX_ENTRIES; + ServerKnobs(bool randomize = false, ClientKnobs* clientKnobs = NULL); }; From 529068c3e20ad844c4d2b98f44498bebe6c796e4 Mon Sep 17 00:00:00 2001 From: Vishesh Yadav Date: Wed, 13 Mar 2019 15:34:52 -0700 Subject: [PATCH 39/47] doc: Live TLS migration --- documentation/sphinx/source/tls.rst | 48 +++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/documentation/sphinx/source/tls.rst b/documentation/sphinx/source/tls.rst index 1884622498..f5b5c94852 100644 --- a/documentation/sphinx/source/tls.rst +++ b/documentation/sphinx/source/tls.rst @@ -29,10 +29,52 @@ This will configure the new cluster to communicate with TLS. .. note:: Depending on your operating system, version and configuration, there may be a firewall in place that prevents external access to certain ports. If necessary, please consult the appropriate documentation for your OS and ensure that all machines in your cluster can reach the ports configured in your :ref:`configuration file `. -.. _converting-existing-cluster: +.. _converting-existing-cluster-after-6.1: -Converting an existing cluster to use TLS -========================================= +Converting an existing cluster to use TLS (since v6.1) +====================================================== + +Since version 6.1, FoundationDB clusters can be converted to TLS without downtime. FoundationDB server can listen to TLS and unencrypted traffic simultaneously on two separate ports. As a result, FDB clusters can live migrate to TLS: + +1) Restart each FoundationDB server individually, but with an additional listen address for TLS traffic:: + + /path/to/fdbserver -C fdb.cluster -p 127.0.0.1:4500 -p 127.0.0.1:4600:tls + + Since, the server still listens to unencrypted traffic and the cluster file still contains the old address, rest of the processes will be able to talk to this new process. + +2) Once all processes are listening to both TLS and unencrypted traffic, switch one or more coordinator to use TLS. Therefore, if the old coordinator list was ``127.0.0.1:4500,127.0.0.1:4501,127.0.0.1:4502``, the new one would be something like ``127.0.0.1:4600:tls,127.0.0.1:4501,127.0.0.1:4502``. Switching few coordinators to TLS at a time allows a smoother migration and a window to find out clients who do not yet have TLS configured. The number of coordinators each client can connect to can be seen via ``fdbstatus`` (look for ``connected_coordinators`` field in ``clients``):: + + "clients" : { + "count" : 2, + "supported_versions" : [ + { + "client_version" : "6.1.0", + "connected_clients" : [ + { + "address" : "127.0.0.1:42916", + "connected_coordinators": 3, + "log_group" : "default" + }, + { + "address" : "127.0.0.1:42918", + "connected_coordinators": 2, + "log_group" : "default" + } + ] + }, ... + ] + } + +3) If there exist a client (e.g., the client 127.0.0.1:42918 in the above example) that cannot connect to all coordinators after a coordinator is switched to TLS, it mean the client does not set up its TLS correctly. System operator should notify the client to correct the client's TLS configuration. Otherwise, when all coordinators are switched to TLS ports, the client will loose connection. + +4) Repeat (2) and (3) until all the addresses in coordinator list are TLS. + +5) Restart each FoundationDB server, but only with one public address that listens to TLS traffic only. + +.. _converting-existing-cluster-before-6.1: + +Converting an existing cluster to use TLS (< v6.1) +================================================== Enabling TLS on an existing (non-TLS) cluster cannot be accomplished without downtime because all processes must have TLS enabled to communicate. At startup, each server process enables TLS if the addresses in its cluster file are TLS-enabled. As a result, server processes must be stopped and restarted to convert them to use TLS. To convert the cluster to TLS in the most conservative way: From c6edcc7f06b1c2027c49e06b4b57b04a94da8d5c Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Thu, 14 Mar 2019 02:10:14 -0700 Subject: [PATCH 40/47] Added schema version string to backup JSON status docs. Bug fix in backup status JSON, the document was being created outside the transaction retry loop so retries would combine partial element sets across all tries into the result. --- fdbclient/BackupContainer.actor.cpp | 1 + fdbclient/FileBackupAgent.actor.cpp | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp index 03f661e3d2..b62b6ed42b 100644 --- a/fdbclient/BackupContainer.actor.cpp +++ b/fdbclient/BackupContainer.actor.cpp @@ -165,6 +165,7 @@ std::string BackupDescription::toString() const { std::string BackupDescription::toJSON() const { JsonBuilderObject doc; + doc.setKey("SchemaVersion", "1.0.0"); doc.setKey("URL", url.c_str()); doc.setKey("Restorable", maxRestorableVersion.present()); diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp index 31e434dd70..9f607f0806 100644 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -3908,10 +3908,12 @@ public: ACTOR static Future getStatusJSON(FileBackupAgent* backupAgent, Database cx, std::string tagName) { state Reference tr(new ReadYourWritesTransaction(cx)); - state JsonBuilderObject doc; loop { try { + state JsonBuilderObject doc; + doc.setKey("SchemaVersion", "1.0.0"); + tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->setOption(FDBTransactionOptions::LOCK_AWARE); @@ -4033,14 +4035,13 @@ public: } doc.setKey("Errors", errorList); } - break; + + return doc.getJson(); } catch (Error &e) { wait(tr->onError(e)); } } - - return doc.getJson(); } ACTOR static Future getStatus(FileBackupAgent* backupAgent, Database cx, bool showErrors, std::string tagName) { From cab284027d6fac5e3f8acbff57749738c571a866 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Thu, 14 Mar 2019 10:26:22 -0700 Subject: [PATCH 41/47] Be more careful about accessing ThreadSafeDatabase's db member. Preallocate the db on the calling thread so that other callers can access the pointer immediately. --- fdbclient/DatabaseContext.h | 10 +++++++-- fdbclient/NativeAPI.actor.cpp | 8 ++++++++ fdbclient/ThreadSafeTransaction.actor.cpp | 25 +++++++++++++++-------- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/fdbclient/DatabaseContext.h b/fdbclient/DatabaseContext.h index ecb27a20d2..2a5c5ef46a 100644 --- a/fdbclient/DatabaseContext.h +++ b/fdbclient/DatabaseContext.h @@ -46,8 +46,14 @@ private: typedef MultiInterface> LocationInfo; typedef MultiInterface ProxyInfo; -class DatabaseContext : public ReferenceCounted, NonCopyable { +class DatabaseContext : public ReferenceCounted, public FastAllocated, NonCopyable { public: + static DatabaseContext* allocateOnForeignThread() { + return (DatabaseContext*)DatabaseContext::operator new(sizeof(DatabaseContext)); + } + + static void initialize( Reference connFile, int apiVersion, DatabaseContext *db ); + // For internal (fdbserver) use only static Database create( Reference>> clusterInterface, Reference connFile, LocalityData const& clientLocality ); static Database create( Reference> clientInfo, Future clientInfoMonitor, LocalityData clientLocality, bool enableLocalityLoadBalance, int taskID=TaskDefaultEndpoint, bool lockAware=false, int apiVersion=Database::API_VERSION_LATEST ); @@ -97,7 +103,7 @@ public: bool enableLocalityLoadBalance, bool lockAware, int apiVersion = Database::API_VERSION_LATEST ); explicit DatabaseContext( const Error &err ); - + // Key DB-specific information AsyncTrigger masterProxiesChangeTrigger; Future monitorMasterProxiesInfoChange; diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 115b1ff805..c115277c09 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -583,6 +583,14 @@ ACTOR static Future monitorClientInfo( Reference connFile, int apiVersion, DatabaseContext *db ) { + Reference cluster(new Cluster(connFile, apiVersion)); + Reference> clientInfo(new AsyncVar()); + Future clientInfoMonitor = monitorClientInfo(cluster->getClusterInterface(), connFile, clientInfo); + + new (db) DatabaseContext( cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, LocalityData(), true, false, apiVersion ); +} + Database DatabaseContext::create(Reference>> clusterInterface, Reference connFile, LocalityData const& clientLocality) { Reference cluster(new Cluster(connFile, clusterInterface)); Reference> clientInfo(new AsyncVar()); diff --git a/fdbclient/ThreadSafeTransaction.actor.cpp b/fdbclient/ThreadSafeTransaction.actor.cpp index 41c7ba8879..02437fcf73 100644 --- a/fdbclient/ThreadSafeTransaction.actor.cpp +++ b/fdbclient/ThreadSafeTransaction.actor.cpp @@ -30,7 +30,8 @@ // Therefore, it is unsafe to call (explicitly or implicitly) this->addRef in any of these functions. ThreadFuture ThreadSafeDatabase::onConnected() { - return onMainThread( [this]() -> Future { + DatabaseContext *db = this->db; + return onMainThread( [db]() -> Future { db->checkDeferredError(); return db->onConnected(); } ); @@ -50,24 +51,30 @@ Reference ThreadSafeDatabase::createTransaction() { } void ThreadSafeDatabase::setOption( FDBDatabaseOptions::Option option, Optional value) { + DatabaseContext *db = this->db; Standalone> passValue = value; - onMainThreadVoid( [this, option, passValue](){ db->setOption(option, passValue.contents()); }, &db->deferredError ); + onMainThreadVoid( [db, option, passValue](){ + db->checkDeferredError(); + db->setOption(option, passValue.contents()); + }, &db->deferredError ); } ThreadSafeDatabase::ThreadSafeDatabase(std::string connFilename, int apiVersion) { - db = NULL; // All accesses to db happen on the main thread, so this pointer will be set by the time anybody uses it - Reference connFile = Reference(new ClusterConnectionFile(ClusterConnectionFile::lookupClusterFileName(connFilename).first)); - onMainThreadVoid([this, connFile, apiVersion](){ + + // Allocate memory for the Database from this thread (so the pointer is known for subsequent method calls) + // but run its constructor on the main thread + DatabaseContext *db = this->db = DatabaseContext::allocateOnForeignThread(); + + onMainThreadVoid([db, connFile, apiVersion](){ try { - Database db = Database::createDatabase(connFile, apiVersion); - this->db = db.extractPtr(); + DatabaseContext::initialize(connFile, apiVersion, db); } catch(Error &e) { - this->db = new DatabaseContext(e); + new (db) DatabaseContext(e); } catch(...) { - this->db = new DatabaseContext(unknown_error()); + new (db) DatabaseContext(unknown_error()); } }, NULL); } From bf170493c689a920cba6671c0924320e82f380cb Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Thu, 14 Mar 2019 11:40:51 -0700 Subject: [PATCH 42/47] RYOW onError no longer resets a transaction when it gets a non-retryable error and the transaction is not already in an error state (as of API version 610). --- documentation/sphinx/source/release-notes.rst | 3 ++- fdbclient/ReadYourWrites.actor.cpp | 7 ++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst index 596119f8c8..d2f94145a4 100644 --- a/documentation/sphinx/source/release-notes.rst +++ b/documentation/sphinx/source/release-notes.rst @@ -23,6 +23,7 @@ Features * Added a new atomic op `CompareAndClear`. `(PR #1105) `_ * Added support for IPv6. `(PR #1176) https://github.com/apple/foundationdb/pull/1178`_ * FDB can now simultaneously listen to TLS and unencrypted ports to facilitate smoother migration to TLS. `(PR #1157) https://github.com/apple/foundationdb/pull/1157`_ +* Added `DISABLE_POSIX_KERNEL_AIO` knob to fallback to libeio instead of kernel async I/O (KAIO) for systems that do not support KAIO or O_DIRECT flag. `(PR #1283) https://github.com/apple/foundationdb/pull/1283`_ Performance ----------- @@ -33,7 +34,7 @@ Fixes ----- * Python: Creating a ``SingleFloat`` for the tuple layer didn't work with integers. `(PR #1216) `_ -* Added `DISABLE_POSIX_KERNEL_AIO` knob to fallback to libeio instead of kernel async I/O (KAIO) for systems that do not support KAIO or O_DIRECT flag. `(PR #1283) https://github.com/apple/foundationdb/pull/1283`_ +* In some cases, calling ``OnError`` with a non-retryable error would partially reset a transaction. As of API version 610, the transaction will no longer be reset in these cases and will instead put the transaction into an error state. `(PR #) <>`_ Status ------ diff --git a/fdbclient/ReadYourWrites.actor.cpp b/fdbclient/ReadYourWrites.actor.cpp index bc4e200c62..fabe4f8430 100644 --- a/fdbclient/ReadYourWrites.actor.cpp +++ b/fdbclient/ReadYourWrites.actor.cpp @@ -1097,7 +1097,12 @@ public: return Void(); } catch( Error &e ) { if ( !ryw->resetPromise.isSet() ) { - ryw->resetRyow(); + if(ryw->tr.apiVersionAtLeast(610)) { + ryw->resetPromise.sendError(transaction_cancelled()); + } + else { + ryw->resetRyow(); + } } if( e.code() == error_code_broken_promise ) throw transaction_cancelled(); From 59f8e07b316fc7ef5a0922f3ae85551ba5f15cd0 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Thu, 14 Mar 2019 11:42:32 -0700 Subject: [PATCH 43/47] Update release notes with pull request link. --- documentation/sphinx/source/release-notes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst index d2f94145a4..2fd01f30c2 100644 --- a/documentation/sphinx/source/release-notes.rst +++ b/documentation/sphinx/source/release-notes.rst @@ -34,7 +34,7 @@ Fixes ----- * Python: Creating a ``SingleFloat`` for the tuple layer didn't work with integers. `(PR #1216) `_ -* In some cases, calling ``OnError`` with a non-retryable error would partially reset a transaction. As of API version 610, the transaction will no longer be reset in these cases and will instead put the transaction into an error state. `(PR #) <>`_ +* In some cases, calling ``OnError`` with a non-retryable error would partially reset a transaction. As of API version 610, the transaction will no longer be reset in these cases and will instead put the transaction into an error state. `(PR #1298) `_ Status ------ From e12d242d189c74b7a362031e9c77b06be675efd9 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Thu, 14 Mar 2019 13:42:03 -0700 Subject: [PATCH 44/47] Address review comments. --- fdbclient/DatabaseContext.h | 4 +--- fdbclient/NativeAPI.actor.cpp | 20 ++++++++++---------- fdbclient/NativeAPI.actor.h | 2 +- fdbclient/ThreadSafeTransaction.actor.cpp | 2 +- 4 files changed, 13 insertions(+), 15 deletions(-) diff --git a/fdbclient/DatabaseContext.h b/fdbclient/DatabaseContext.h index 2a5c5ef46a..788cdabbd8 100644 --- a/fdbclient/DatabaseContext.h +++ b/fdbclient/DatabaseContext.h @@ -52,8 +52,6 @@ public: return (DatabaseContext*)DatabaseContext::operator new(sizeof(DatabaseContext)); } - static void initialize( Reference connFile, int apiVersion, DatabaseContext *db ); - // For internal (fdbserver) use only static Database create( Reference>> clusterInterface, Reference connFile, LocalityData const& clientLocality ); static Database create( Reference> clientInfo, Future clientInfoMonitor, LocalityData clientLocality, bool enableLocalityLoadBalance, int taskID=TaskDefaultEndpoint, bool lockAware=false, int apiVersion=Database::API_VERSION_LATEST ); @@ -103,7 +101,7 @@ public: bool enableLocalityLoadBalance, bool lockAware, int apiVersion = Database::API_VERSION_LATEST ); explicit DatabaseContext( const Error &err ); - + // Key DB-specific information AsyncTrigger masterProxiesChangeTrigger; Future monitorMasterProxiesInfoChange; diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index c115277c09..e186f59e00 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -583,14 +583,6 @@ ACTOR static Future monitorClientInfo( Reference connFile, int apiVersion, DatabaseContext *db ) { - Reference cluster(new Cluster(connFile, apiVersion)); - Reference> clientInfo(new AsyncVar()); - Future clientInfoMonitor = monitorClientInfo(cluster->getClusterInterface(), connFile, clientInfo); - - new (db) DatabaseContext( cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, LocalityData(), true, false, apiVersion ); -} - Database DatabaseContext::create(Reference>> clusterInterface, Reference connFile, LocalityData const& clientLocality) { Reference cluster(new Cluster(connFile, clusterInterface)); Reference> clientInfo(new AsyncVar()); @@ -758,12 +750,20 @@ Reference DatabaseContext::getConnectionFile() { return cluster->getConnectionFile(); } -Database Database::createDatabase( Reference connFile, int apiVersion, LocalityData const& clientLocality ) { +Database Database::createDatabase( Reference connFile, int apiVersion, LocalityData const& clientLocality, DatabaseContext *preallocatedDb ) { Reference cluster(new Cluster(connFile, apiVersion)); Reference> clientInfo(new AsyncVar()); Future clientInfoMonitor = monitorClientInfo(cluster->getClusterInterface(), connFile, clientInfo); - return Database( new DatabaseContext( cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false, apiVersion ) ); + DatabaseContext *db; + if(preallocatedDb) { + db = new (preallocatedDb) DatabaseContext(cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false, apiVersion); + } + else { + db = new DatabaseContext(cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false, apiVersion); + } + + return Database(db); } Database Database::createDatabase( std::string connFileName, int apiVersion, LocalityData const& clientLocality ) { diff --git a/fdbclient/NativeAPI.actor.h b/fdbclient/NativeAPI.actor.h index 50b42730ab..2a1813b91c 100644 --- a/fdbclient/NativeAPI.actor.h +++ b/fdbclient/NativeAPI.actor.h @@ -68,7 +68,7 @@ class Database { public: enum { API_VERSION_LATEST = -1 }; - static Database createDatabase( Reference connFile, int apiVersion, LocalityData const& clientLocality=LocalityData() ); + static Database createDatabase( Reference connFile, int apiVersion, LocalityData const& clientLocality=LocalityData(), DatabaseContext *preallocatedDb=nullptr ); static Database createDatabase( std::string connFileName, int apiVersion, LocalityData const& clientLocality=LocalityData() ); Database() {} // an uninitialized database can be destructed or reassigned safely; that's it diff --git a/fdbclient/ThreadSafeTransaction.actor.cpp b/fdbclient/ThreadSafeTransaction.actor.cpp index 02437fcf73..90db1b932d 100644 --- a/fdbclient/ThreadSafeTransaction.actor.cpp +++ b/fdbclient/ThreadSafeTransaction.actor.cpp @@ -68,7 +68,7 @@ ThreadSafeDatabase::ThreadSafeDatabase(std::string connFilename, int apiVersion) onMainThreadVoid([db, connFile, apiVersion](){ try { - DatabaseContext::initialize(connFile, apiVersion, db); + Database::createDatabase(connFile, apiVersion, LocalityData(), db).extractPtr(); } catch(Error &e) { new (db) DatabaseContext(e); From 98f8fa62e980b1e288a850e3e993d2569e952120 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Thu, 14 Mar 2019 15:05:36 -0700 Subject: [PATCH 45/47] Update generated.go --- bindings/go/src/fdb/generated.go | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/bindings/go/src/fdb/generated.go b/bindings/go/src/fdb/generated.go index aea915cb22..a2507b1674 100644 --- a/bindings/go/src/fdb/generated.go +++ b/bindings/go/src/fdb/generated.go @@ -92,9 +92,9 @@ func (o NetworkOptions) SetTraceLogGroup(param string) error { return o.setOpt(33, []byte(param)) } -// Selects trace output format for this client. xml (the default) and json are supported. +// Select the format of the log files. xml (the default) and json are supported. // -// Parameter: trace format +// Parameter: Format of trace files func (o NetworkOptions) SetTraceFormat(param string) error { return o.setOpt(34, []byte(param)) } @@ -351,13 +351,25 @@ func (o TransactionOptions) SetDebugRetryLogging(param string) error { return o.setOpt(401, []byte(param)) } -// Enables tracing for this transaction and logs results to the client trace logs. Client trace logging must be enabled to get log output. +// Deprecated // // Parameter: String identifier to be used in the logs when tracing this transaction. The identifier must not exceed 100 characters. func (o TransactionOptions) SetTransactionLoggingEnable(param string) error { return o.setOpt(402, []byte(param)) } +// Sets a client provided identifier for the transaction that will be used in scenarios like tracing or profiling. Client trace logging or transaction profiling must be separately enabled. +// +// Parameter: String identifier to be used when tracing or profiling this transaction. The identifier must not exceed 100 characters. +func (o TransactionOptions) SetDebugTransactionIdentifier(param string) error { + return o.setOpt(403, []byte(param)) +} + +// Enables tracing for this transaction and logs results to the client trace logs. The DEBUG_TRANSACTION_IDENTIFIER option must be set before using this option, and client trace logging must be enabled and to get log output. +func (o TransactionOptions) SetLogTransaction() error { + return o.setOpt(404, nil) +} + // Set a timeout in milliseconds which, when elapsed, will cause the transaction automatically to be cancelled. Valid parameter values are ``[0, INT_MAX]``. If set to 0, will disable all timeouts. All pending and any future uses of the transaction will throw an exception. The transaction can be used again after it is reset. Like all transaction options, a timeout must be reset after a call to onError. This behavior allows the user to make the timeout dynamic. // // Parameter: value in milliseconds of timeout @@ -512,12 +524,12 @@ func (t Transaction) Min(key KeyConvertible, param []byte) { t.atomicOp(key.FDBKey(), param, 13) } -// Transforms ``key`` using a versionstamp for the transaction. Sets the transformed key in the database to ``param``. The key is transformed by removing the final four bytes from the key and reading those as a little-Endian 32-bit integer to get a position ``pos``. The 10 bytes of the key from ``pos`` to ``pos + 10`` are replaced with the versionstamp of the transaction used. The first byte of the key is position 0. A versionstamp is a 10 byte, unique, monotonically (but not sequentially) increasing value for each committed transaction. The first 8 bytes are the committed version of the database (serialized in big-Endian order). The last 2 bytes are monotonic in the serialization order for transactions. WARNING: At this time, versionstamps are compatible with the Tuple layer only in the Java and Python bindings. Also, note that prior to API version 520, the offset was computed from only the final two bytes rather than the final four bytes. +// Transforms ``key`` using a versionstamp for the transaction. Sets the transformed key in the database to ``param``. The key is transformed by removing the final four bytes from the key and reading those as a little-Endian 32-bit integer to get a position ``pos``. The 10 bytes of the key from ``pos`` to ``pos + 10`` are replaced with the versionstamp of the transaction used. The first byte of the key is position 0. A versionstamp is a 10 byte, unique, monotonically (but not sequentially) increasing value for each committed transaction. The first 8 bytes are the committed version of the database (serialized in big-Endian order). The last 2 bytes are monotonic in the serialization order for transactions. WARNING: At this time, versionstamps are compatible with the Tuple layer only in the Java, Python, and Go bindings. Also, note that prior to API version 520, the offset was computed from only the final two bytes rather than the final four bytes. func (t Transaction) SetVersionstampedKey(key KeyConvertible, param []byte) { t.atomicOp(key.FDBKey(), param, 14) } -// Transforms ``param`` using a versionstamp for the transaction. Sets the ``key`` given to the transformed ``param``. The parameter is transformed by removing the final four bytes from ``param`` and reading those as a little-Endian 32-bit integer to get a position ``pos``. The 10 bytes of the parameter from ``pos`` to ``pos + 10`` are replaced with the versionstamp of the transaction used. The first byte of the parameter is position 0. A versionstamp is a 10 byte, unique, monotonically (but not sequentially) increasing value for each committed transaction. The first 8 bytes are the committed version of the database (serialized in big-Endian order). The last 2 bytes are monotonic in the serialization order for transactions. WARNING: At this time, versionstamps are compatible with the Tuple layer only in the Java and Python bindings. Also, note that prior to API version 520, the versionstamp was always placed at the beginning of the parameter rather than computing an offset. +// Transforms ``param`` using a versionstamp for the transaction. Sets the ``key`` given to the transformed ``param``. The parameter is transformed by removing the final four bytes from ``param`` and reading those as a little-Endian 32-bit integer to get a position ``pos``. The 10 bytes of the parameter from ``pos`` to ``pos + 10`` are replaced with the versionstamp of the transaction used. The first byte of the parameter is position 0. A versionstamp is a 10 byte, unique, monotonically (but not sequentially) increasing value for each committed transaction. The first 8 bytes are the committed version of the database (serialized in big-Endian order). The last 2 bytes are monotonic in the serialization order for transactions. WARNING: At this time, versionstamps are compatible with the Tuple layer only in the Java, Python, and Go bindings. Also, note that prior to API version 520, the versionstamp was always placed at the beginning of the parameter rather than computing an offset. func (t Transaction) SetVersionstampedValue(key KeyConvertible, param []byte) { t.atomicOp(key.FDBKey(), param, 15) } @@ -532,6 +544,11 @@ func (t Transaction) ByteMax(key KeyConvertible, param []byte) { t.atomicOp(key.FDBKey(), param, 17) } +// Performs an atomic ``compare and clear`` operation. If the existing value in the database is equal to the given value, then given key is cleared. +func (t Transaction) CompareAndClear(key KeyConvertible, param []byte) { + t.atomicOp(key.FDBKey(), param, 20) +} + type conflictRangeType int const ( From 9ed41a49321b7eab6a086fb3ff4a6aff105256fe Mon Sep 17 00:00:00 2001 From: Moussa Ehsan Date: Fri, 15 Mar 2019 10:24:33 -0700 Subject: [PATCH 46/47] Update python version to 3.7 in the docs This change updates the documentation to reflect that we support python 3.7. --- documentation/sphinx/source/api-python.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentation/sphinx/source/api-python.rst b/documentation/sphinx/source/api-python.rst index 11f04d652a..394016c379 100644 --- a/documentation/sphinx/source/api-python.rst +++ b/documentation/sphinx/source/api-python.rst @@ -53,7 +53,7 @@ Python API Installation ============ -The FoundationDB Python API is compatible with Python 2.7 - 3.6. You will need to have a Python version within this range on your system before the FoundationDB Python API can be installed. +The FoundationDB Python API is compatible with Python 2.7 - 3.7. You will need to have a Python version within this range on your system before the FoundationDB Python API can be installed. Also please note that Python 3.7 no longer bundles a full copy of libffi, which is used for building the _ctypes module on non-OSX UNIX platforms. Hence, if you are using Python 3.7, you should make sure libffi is already installed on your system. On macOS, the FoundationDB Python API is installed as part of the FoundationDB installation (see :ref:`installing-client-binaries`). On Ubuntu or RHEL/CentOS, you will need to install the FoundationDB Python API manually. From 40293d6fae9c143bba827ddbb5ae0ebf0082c6fa Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Fri, 15 Mar 2019 11:05:37 -0700 Subject: [PATCH 47/47] Update documentation/sphinx/source/api-python.rst Co-Authored-By: amouehsan --- documentation/sphinx/source/api-python.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentation/sphinx/source/api-python.rst b/documentation/sphinx/source/api-python.rst index 394016c379..2c08b2bf84 100644 --- a/documentation/sphinx/source/api-python.rst +++ b/documentation/sphinx/source/api-python.rst @@ -53,7 +53,7 @@ Python API Installation ============ -The FoundationDB Python API is compatible with Python 2.7 - 3.7. You will need to have a Python version within this range on your system before the FoundationDB Python API can be installed. Also please note that Python 3.7 no longer bundles a full copy of libffi, which is used for building the _ctypes module on non-OSX UNIX platforms. Hence, if you are using Python 3.7, you should make sure libffi is already installed on your system. +The FoundationDB Python API is compatible with Python 2.7 - 3.7. You will need to have a Python version within this range on your system before the FoundationDB Python API can be installed. Also please note that Python 3.7 no longer bundles a full copy of libffi, which is used for building the _ctypes module on non-macOS UNIX platforms. Hence, if you are using Python 3.7, you should make sure libffi is already installed on your system. On macOS, the FoundationDB Python API is installed as part of the FoundationDB installation (see :ref:`installing-client-binaries`). On Ubuntu or RHEL/CentOS, you will need to install the FoundationDB Python API manually.