diff --git a/documentation/StatusSchema.json b/documentation/StatusSchema.json index f6638e59d3..a3733a5736 100644 --- a/documentation/StatusSchema.json +++ b/documentation/StatusSchema.json @@ -150,7 +150,12 @@ ], "log_replication_factor":3, "log_write_anti_quorum":0, - "log_fault_tolerance":2 + "log_fault_tolerance":2, + "remote_log_replication_factor":3, + "remote_log_fault_tolerance":2, + "satellite_log_replication_factor":3, + "satellite_log_write_anti_quorum":0, + "satellite_log_fault_tolerance":2 } ], "fault_tolerance":{ @@ -188,6 +193,7 @@ "incompatible_connections":[ ], + "datacenter_version_difference":0, "database_available":true, "database_locked":false, "generation":2, @@ -231,6 +237,7 @@ "$enum":[ "unreachable_master_worker", "unreadable_configuration", + "full_replication_timeout", "client_issues", "unreachable_processes", "immediate_priority_transaction_start_probe_timeout", @@ -332,25 +339,32 @@ }, "cluster_controller_timestamp":1415650089, "protocol_version":"fdb00a400050001", - "configuration":{ + "full_replication":true, + "configuration":{ + "log_anti_quorum":0, + "log_replicas":2, + "log_replication_policy":"(zoneid^3x1)", + "redundancy_mode":"single", + "regions":[{ + "datacenters":[{ + "id":"mr", + "priority":1, + "satellite":1 + }], + "satellite_redundancy_mode":"one_satellite_single", + "satellite_log_replicas":1, + "satellite_usable_dcs":1, + "satellite_anti_quorum":0, + "satellite_log_policy":"(zoneid^3x1)", + "satellite_logs":2 + }], + "remote_redundancy_mode":"remote_single", + "remote_log_replicas":3, + "remote_logs":5, + "storage_quorum":1, + "storage_replicas":1, "resolvers":1, - "redundancy":{ - "factor":{ - "$enum":[ - "single", - "double", - "triple", - "custom", - "two_datacenter", - "three_datacenter", - "three_data_hall", - "fast_recovery_double", - "fast_recovery_triple" - ] - } - }, - "storage_policy":"(zoneid^3x1)", - "tlog_policy":"(zoneid^2x1)", + "storage_replication_policy":"(zoneid^3x1)", "logs":2, "storage_engine":{ "$enum":[ @@ -367,6 +381,9 @@ "address":"10.0.4.1" } ], + "auto_proxies":3, + "auto_resolvers":1, + "auto_logs":3, "proxies":5 }, "data":{ diff --git a/fdbclient/DatabaseConfiguration.cpp b/fdbclient/DatabaseConfiguration.cpp index db0851ac5e..d724c52f9a 100644 --- a/fdbclient/DatabaseConfiguration.cpp +++ b/fdbclient/DatabaseConfiguration.cpp @@ -58,10 +58,27 @@ void parse( std::vector* regions, ValueRef const& v ) { regions->clear(); for (StatusObjectReader dc : regionArray) { RegionInfo info; - std::string idStr; - dc.get("id", idStr); - info.dcId = idStr; - dc.get("priority", info.priority); + json_spirit::mArray datacenters; + dc.get("datacenters", datacenters); + bool nonSatelliteDatacenters = 0; + for (StatusObjectReader s : datacenters) { + std::string idStr; + if (s.has("satellite") && s.last().get_int() == 1) { + SatelliteInfo satInfo; + s.get("id", idStr); + satInfo.dcId = idStr; + s.get("priority", satInfo.priority); + info.satellites.push_back(satInfo); + } else { + if (nonSatelliteDatacenters > 0) throw invalid_option(); + nonSatelliteDatacenters++; + s.get("id", idStr); + info.dcId = idStr; + s.get("priority", info.priority); + } + } + std::sort(info.satellites.begin(), info.satellites.end(), SatelliteInfo::sort_by_priority() ); + if (nonSatelliteDatacenters != 1) throw invalid_option(); dc.tryGet("satellite_logs", info.satelliteDesiredTLogCount); std::string satelliteReplication; if(dc.tryGet("satellite_redundancy_mode", satelliteReplication)) { @@ -97,18 +114,6 @@ void parse( std::vector* regions, ValueRef const& v ) { dc.tryGet("satellite_log_replicas", info.satelliteTLogReplicationFactor); dc.tryGet("satellite_usable_dcs", info.satelliteTLogUsableDcs); dc.tryGet("satellite_anti_quorum", info.satelliteTLogWriteAntiQuorum); - json_spirit::mArray satellites; - if( dc.tryGet("satellites", satellites) ) { - for (StatusObjectReader s : satellites) { - SatelliteInfo satInfo; - std::string sidStr; - s.get("id", sidStr); - satInfo.dcId = sidStr; - s.get("priority", satInfo.priority); - info.satellites.push_back(satInfo); - } - std::sort(info.satellites.begin(), info.satellites.end(), SatelliteInfo::sort_by_priority() ); - } regions->push_back(info); } std::sort(regions->begin(), regions->end(), RegionInfo::sort_by_priority() ); @@ -155,24 +160,22 @@ bool DatabaseConfiguration::isValid() const { getDesiredRemoteLogs() >= 1 && remoteTLogReplicationFactor >= 0 && regions.size() <= 2 && - ( remoteTLogReplicationFactor == 0 || ( remoteTLogPolicy && regions.size() == 2 && durableStorageQuorum == storageTeamSize ) ) ) ) { + ( remoteTLogReplicationFactor == 0 || ( remoteTLogPolicy && regions.size() == 2 && durableStorageQuorum == storageTeamSize ) ) && + ( regions.size() == 0 || regions[0].priority >= 0 ) ) ) { return false; } std::set dcIds; - std::set priorities; dcIds.insert(Key()); for(auto& r : regions) { if( !(!dcIds.count(r.dcId) && - !priorities.count(r.priority) && r.satelliteTLogReplicationFactor >= 0 && r.satelliteTLogWriteAntiQuorum >= 0 && - r.satelliteTLogUsableDcs >= 0 && + r.satelliteTLogUsableDcs >= 1 && ( r.satelliteTLogReplicationFactor == 0 || ( r.satelliteTLogPolicy && r.satellites.size() ) ) ) ) { return false; } dcIds.insert(r.dcId); - priorities.insert(r.priority); for(auto& s : r.satellites) { if(dcIds.count(s.dcId)) { return false; @@ -246,44 +249,47 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const { if(regions.size()) { StatusArray regionArr; for(auto& r : regions) { + StatusObject regionObj; + StatusArray dcArr; StatusObject dcObj; dcObj["id"] = r.dcId.toString(); dcObj["priority"] = r.priority; + dcArr.push_back(dcObj); if(r.satelliteTLogReplicationFactor == 1 && r.satelliteTLogUsableDcs == 1 && r.satelliteTLogWriteAntiQuorum == 0) { - dcObj["satellite_redundancy_mode"] = "one_satellite_single"; + regionObj["satellite_redundancy_mode"] = "one_satellite_single"; } else if(r.satelliteTLogReplicationFactor == 2 && r.satelliteTLogUsableDcs == 1 && r.satelliteTLogWriteAntiQuorum == 0) { - dcObj["satellite_redundancy_mode"] = "one_satellite_double"; + regionObj["satellite_redundancy_mode"] = "one_satellite_double"; } else if(r.satelliteTLogReplicationFactor == 3 && r.satelliteTLogUsableDcs == 1 && r.satelliteTLogWriteAntiQuorum == 0) { - dcObj["satellite_redundancy_mode"] = "one_satellite_triple"; + regionObj["satellite_redundancy_mode"] = "one_satellite_triple"; } else if(r.satelliteTLogReplicationFactor == 4 && r.satelliteTLogUsableDcs == 2 && r.satelliteTLogWriteAntiQuorum == 0) { - dcObj["satellite_redundancy_mode"] = "two_satellite_safe"; + regionObj["satellite_redundancy_mode"] = "two_satellite_safe"; } else if(r.satelliteTLogReplicationFactor == 4 && r.satelliteTLogUsableDcs == 2 && r.satelliteTLogWriteAntiQuorum == 2) { - dcObj["satellite_redundancy_mode"] = "two_satellite_fast"; + regionObj["satellite_redundancy_mode"] = "two_satellite_fast"; } else if(r.satelliteTLogReplicationFactor != 0) { - dcObj["satellite_log_replicas"] = r.satelliteTLogReplicationFactor; - dcObj["satellite_usable_dcs"] = r.satelliteTLogUsableDcs; - dcObj["satellite_anti_quorum"] = r.satelliteTLogWriteAntiQuorum; - if(r.satelliteTLogPolicy) dcObj["satellite_log_policy"] = r.satelliteTLogPolicy->info(); + regionObj["satellite_log_replicas"] = r.satelliteTLogReplicationFactor; + regionObj["satellite_usable_dcs"] = r.satelliteTLogUsableDcs; + regionObj["satellite_anti_quorum"] = r.satelliteTLogWriteAntiQuorum; + if(r.satelliteTLogPolicy) regionObj["satellite_log_policy"] = r.satelliteTLogPolicy->info(); } if( r.satelliteDesiredTLogCount != -1 ) { - dcObj["satellite_logs"] = r.satelliteDesiredTLogCount; + regionObj["satellite_logs"] = r.satelliteDesiredTLogCount; } if(r.satellites.size()) { - StatusArray satellitesArr; for(auto& s : r.satellites) { StatusObject satObj; satObj["id"] = s.dcId.toString(); satObj["priority"] = s.priority; + satObj["satellite"] = 1; - satellitesArr.push_back(satObj); + dcArr.push_back(satObj); } - dcObj["satellites"] = satellitesArr; } - regionArr.push_back(dcObj); + regionObj["datacenters"] = dcArr; + regionArr.push_back(regionObj); } result["regions"] = regionArr; } diff --git a/fdbclient/DatabaseConfiguration.h b/fdbclient/DatabaseConfiguration.h index 03c30195ad..749e82dfd8 100644 --- a/fdbclient/DatabaseConfiguration.h +++ b/fdbclient/DatabaseConfiguration.h @@ -57,7 +57,7 @@ struct RegionInfo { std::vector satellites; - RegionInfo() : priority(0), satelliteDesiredTLogCount(-1), satelliteTLogReplicationFactor(0), satelliteTLogWriteAntiQuorum(0), satelliteTLogUsableDcs(0) {} + RegionInfo() : priority(0), satelliteDesiredTLogCount(-1), satelliteTLogReplicationFactor(0), satelliteTLogWriteAntiQuorum(0), satelliteTLogUsableDcs(1) {} struct sort_by_priority { bool operator ()(RegionInfo const&a, RegionInfo const& b) const { return a.priority > b.priority; } diff --git a/fdbclient/FDBTypes.h b/fdbclient/FDBTypes.h index f70862cdc4..afefef3782 100644 --- a/fdbclient/FDBTypes.h +++ b/fdbclient/FDBTypes.h @@ -33,7 +33,7 @@ typedef StringRef KeyRef; typedef StringRef ValueRef; typedef int64_t Generation; -enum { tagLocalitySpecial = -1, tagLocalityLogRouter = -2, tagLocalityRemoteLog = -3, tagLocalityUpgraded = -4, tagLocalityInvalid = -99 }; //The TLog and LogRouter require these number to be as compact as possible +enum { tagLocalitySpecial = -1, tagLocalityLogRouter = -2, tagLocalityRemoteLog = -3, tagLocalityUpgraded = -4, tagLocalitySatellite = -5, tagLocalityInvalid = -99 }; //The TLog and LogRouter require these number to be as compact as possible #pragma pack(push, 1) struct Tag { diff --git a/fdbclient/SystemData.cpp b/fdbclient/SystemData.cpp index 14b6af1729..d942528d24 100644 --- a/fdbclient/SystemData.cpp +++ b/fdbclient/SystemData.cpp @@ -258,6 +258,8 @@ int decodeDatacenterReplicasValue( ValueRef const& value ) { return s; } +const KeyRef primaryDatacenterKey = LiteralStringRef("\xff/primaryDatacenter"); + // serverListKeys.contains(k) iff k.startsWith( serverListKeys.begin ) because '/'+1 == '0' const KeyRangeRef serverListKeys( LiteralStringRef("\xff/serverList/"), diff --git a/fdbclient/SystemData.h b/fdbclient/SystemData.h index 3440f49091..f86940f363 100644 --- a/fdbclient/SystemData.h +++ b/fdbclient/SystemData.h @@ -86,6 +86,8 @@ const Value datacenterReplicasValue( int const& ); Optional decodeDatacenterReplicasKey( KeyRef const& ); int decodeDatacenterReplicasValue( ValueRef const& ); +extern const KeyRef primaryDatacenterKey; + // "\xff/serverList/[[serverID]]" := "[[StorageServerInterface]]" // Storage servers are listed here when they are recruited - always before assigning them keys // Storage servers removed from here are never replaced. The same fdbserver, if re-recruited, will always diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 2b4e06fa2b..0d36950eeb 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -89,11 +89,12 @@ public: std::map traceLogGroupMap; Promise forceMasterFailure; int64_t masterRegistrationCount; + bool recoveryStalled; DatabaseConfiguration config; // Asynchronously updated via master registration DatabaseConfiguration fullyRecoveredConfig; Database db; - DBInfo() : masterRegistrationCount(0), + DBInfo() : masterRegistrationCount(0), recoveryStalled(false), clientInfo( new AsyncVar( ClientDBInfo() ) ), serverInfo( new AsyncVar( ServerDBInfo( LiteralStringRef("DB") ) ) ), db( DatabaseContext::create( clientInfo, Future(), LocalityData(), true, TaskDefaultEndpoint, true ) ) // SOMEDAY: Locality! @@ -512,15 +513,31 @@ public: std::vector> satelliteLogs; if(region.satelliteTLogReplicationFactor > 0) { - std::set> satelliteDCs; - for(auto& s : region.satellites) { - satelliteDCs.insert(s.dcId); - } - //FIXME: recruitment does not respect usable_dcs, a.k.a if usable_dcs is 1 we should recruit all tlogs in one data center - satelliteLogs = getWorkersForTlogs( req.configuration, region.satelliteTLogReplicationFactor, req.configuration.getDesiredSatelliteLogs(dcId), region.satelliteTLogPolicy, id_used, false, satelliteDCs ); + int startDC = 0; + loop { + if(startDC > 0 && startDC >= region.satellites.size() + 1 - region.satelliteTLogUsableDcs) { + throw no_more_servers(); + } - for(int i = 0; i < satelliteLogs.size(); i++) { - result.satelliteTLogs.push_back(satelliteLogs[i].first); + try { + std::set> satelliteDCs; + for(int s = startDC; s < std::min(startDC + region.satelliteTLogUsableDcs, region.satellites.size()); s++) { + satelliteDCs.insert(region.satellites[s].dcId); + } + + satelliteLogs = getWorkersForTlogs( req.configuration, region.satelliteTLogReplicationFactor, req.configuration.getDesiredSatelliteLogs(dcId), region.satelliteTLogPolicy, id_used, false, satelliteDCs ); + + for(int i = 0; i < satelliteLogs.size(); i++) { + result.satelliteTLogs.push_back(satelliteLogs[i].first); + } + break; + } catch (Error &e) { + if(e.code() != error_code_no_more_servers) { + throw; + } + } + + startDC++; } } @@ -556,35 +573,39 @@ public: RecruitFromConfigurationReply findWorkersForConfiguration( RecruitFromConfigurationRequest const& req ) { if(req.configuration.regions.size() > 1) { + std::vector regions = req.configuration.regions; + if(regions[0].priority == regions[1].priority && clusterControllerDcId.present() && regions[1].dcId == clusterControllerDcId.get()) { + std::swap(regions[0], regions[1]); + } bool setPrimaryDesired = false; try { - auto reply = findWorkersForConfiguration(req, req.configuration.regions[0].dcId); + auto reply = findWorkersForConfiguration(req, regions[0].dcId); setPrimaryDesired = true; vector> dcPriority; - dcPriority.push_back(req.configuration.regions[0].dcId); - dcPriority.push_back(req.configuration.regions[1].dcId); + dcPriority.push_back(regions[0].dcId); + dcPriority.push_back(regions[1].dcId); desiredDcIds.set(dcPriority); if(reply.isError()) { throw reply.getError(); - } else if(clusterControllerDcId.present() && req.configuration.regions[0].dcId == clusterControllerDcId.get()) { + } else if(clusterControllerDcId.present() && regions[0].dcId == clusterControllerDcId.get()) { return reply.get(); } throw no_more_servers(); } catch( Error& e ) { - if (e.code() != error_code_no_more_servers) { + if (e.code() != error_code_no_more_servers || regions[1].priority < 0 || now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) { throw; } TraceEvent(SevWarn, "AttemptingRecruitmentInRemoteDC", id).error(e); - auto reply = findWorkersForConfiguration(req, req.configuration.regions[1].dcId); + auto reply = findWorkersForConfiguration(req, regions[1].dcId); if(!setPrimaryDesired) { vector> dcPriority; - dcPriority.push_back(req.configuration.regions[1].dcId); - dcPriority.push_back(req.configuration.regions[0].dcId); + dcPriority.push_back(regions[1].dcId); + dcPriority.push_back(regions[0].dcId); desiredDcIds.set(dcPriority); } if(reply.isError()) { throw reply.getError(); - } else if(clusterControllerDcId.present() && req.configuration.regions[1].dcId == clusterControllerDcId.get()) { + } else if(clusterControllerDcId.present() && regions[1].dcId == clusterControllerDcId.get()) { return reply.get(); } throw; @@ -690,36 +711,51 @@ public: } } - void checkPrimaryDC() { - if(db.config.regions.size() > 1 && clusterControllerDcId.present() && db.config.regions[0].dcId != clusterControllerDcId.get()) { - try { - std::map< Optional>, int> id_used; - getWorkerForRoleInDatacenter(db.config.regions[0].dcId, ProcessClass::ClusterController, ProcessClass::ExcludeFit, db.config, id_used, true); - getWorkerForRoleInDatacenter(db.config.regions[0].dcId, ProcessClass::Master, ProcessClass::ExcludeFit, db.config, id_used, true); + void checkRegions(const std::vector& regions) { + if(desiredDcIds.get().present() && desiredDcIds.get().get().size() == 2 && desiredDcIds.get().get()[0].get() == regions[0].dcId && desiredDcIds.get().get()[1].get() == regions[1].dcId) { + return; + } + + try { + std::map< Optional>, int> id_used; + getWorkerForRoleInDatacenter(regions[0].dcId, ProcessClass::ClusterController, ProcessClass::ExcludeFit, db.config, id_used, true); + getWorkerForRoleInDatacenter(regions[0].dcId, ProcessClass::Master, ProcessClass::ExcludeFit, db.config, id_used, true); - std::set> primaryDC; - primaryDC.insert(db.config.regions[0].dcId); - getWorkersForTlogs(db.config, db.config.tLogReplicationFactor, db.config.desiredTLogCount, db.config.tLogPolicy, id_used, true, primaryDC); + std::set> primaryDC; + primaryDC.insert(regions[0].dcId); + getWorkersForTlogs(db.config, db.config.tLogReplicationFactor, db.config.desiredTLogCount, db.config.tLogPolicy, id_used, true, primaryDC); - if(db.config.regions[0].satelliteTLogReplicationFactor > 0) { - std::set> satelliteDCs; - for(auto &s : db.config.regions[0].satellites) { - satelliteDCs.insert(s.dcId); - } - getWorkersForTlogs(db.config, db.config.regions[0].satelliteTLogReplicationFactor, db.config.getDesiredSatelliteLogs(db.config.regions[0].dcId), db.config.regions[0].satelliteTLogPolicy, id_used, true, satelliteDCs); + if(regions[0].satelliteTLogReplicationFactor > 0) { + std::set> satelliteDCs; + for(auto &s : regions[0].satellites) { + satelliteDCs.insert(s.dcId); } + getWorkersForTlogs(db.config, regions[0].satelliteTLogReplicationFactor, db.config.getDesiredSatelliteLogs(regions[0].dcId), regions[0].satelliteTLogPolicy, id_used, true, satelliteDCs); + } - getWorkerForRoleInDatacenter( db.config.regions[0].dcId, ProcessClass::Resolver, ProcessClass::ExcludeFit, db.config, id_used, true ); - getWorkerForRoleInDatacenter( db.config.regions[0].dcId, ProcessClass::Proxy, ProcessClass::ExcludeFit, db.config, id_used, true ); + getWorkerForRoleInDatacenter( regions[0].dcId, ProcessClass::Resolver, ProcessClass::ExcludeFit, db.config, id_used, true ); + getWorkerForRoleInDatacenter( regions[0].dcId, ProcessClass::Proxy, ProcessClass::ExcludeFit, db.config, id_used, true ); - vector> dcPriority; - dcPriority.push_back(db.config.regions[0].dcId); - dcPriority.push_back(db.config.regions[1].dcId); - desiredDcIds.set(dcPriority); - } catch( Error &e ) { - if(e.code() != error_code_no_more_servers) { - throw; + vector> dcPriority; + dcPriority.push_back(regions[0].dcId); + dcPriority.push_back(regions[1].dcId); + desiredDcIds.set(dcPriority); + } catch( Error &e ) { + if(e.code() != error_code_no_more_servers) { + throw; + } + } + } + + void checkRecoveryStalled() { + if(db.serverInfo->get().recoveryState < RecoveryState::RECOVERY_TRANSACTION && db.recoveryStalled ) { + if(db.config.regions.size() > 1 && clusterControllerDcId.present()) { + auto regions = db.config.regions; + if(clusterControllerDcId.get() == regions[0].dcId) { + std::swap(regions[0], regions[1]); } + ASSERT(clusterControllerDcId.get() == regions[1].dcId); + checkRegions(regions); } } } @@ -732,7 +768,10 @@ public: return false; } - checkPrimaryDC(); + if(db.config.regions.size() > 1 && clusterControllerDcId.present() && db.config.regions[0].priority > db.config.regions[1].priority && + db.config.regions[0].dcId != clusterControllerDcId.get() && versionDifferenceUpdated && datacenterVersionDifference < SERVER_KNOBS->MAX_VERSION_DIFFERENCE) { + checkRegions(db.config.regions); + } // Get master process auto masterWorker = id_worker.find(dbi.master.locality.processId()); @@ -755,10 +794,11 @@ public: if ( tlogWorker->second.priorityInfo.isExcluded ) return true; - if(logSet.isLocal && logSet.hasBestPolicy > HasBestPolicyNone) { - tlogs.push_back(std::make_pair(tlogWorker->second.interf, tlogWorker->second.processClass)); - } else if(logSet.isLocal) { + if(logSet.isLocal && logSet.locality == tagLocalitySatellite) { satellite_tlogs.push_back(std::make_pair(tlogWorker->second.interf, tlogWorker->second.processClass)); + } + else if(logSet.isLocal) { + tlogs.push_back(std::make_pair(tlogWorker->second.interf, tlogWorker->second.processClass)); } else { remote_tlogs.push_back(std::make_pair(tlogWorker->second.interf, tlogWorker->second.processClass)); } @@ -917,9 +957,11 @@ public: DBInfo db; Database cx; double startTime; + Version datacenterVersionDifference; + bool versionDifferenceUpdated; explicit ClusterControllerData( ClusterControllerFullInterface ccInterface ) - : id(ccInterface.id()), ac(false), betterMasterExistsChecker(Void()), gotProcessClasses(false), gotFullyRecoveredConfig(false), startTime(now()) + : id(ccInterface.id()), ac(false), betterMasterExistsChecker(Void()), gotProcessClasses(false), gotFullyRecoveredConfig(false), startTime(now()), datacenterVersionDifference(0), versionDifferenceUpdated(false) { auto serverInfo = db.serverInfo->get(); serverInfo.id = g_random->randomUniqueID(); @@ -987,6 +1029,7 @@ ACTOR Future clusterWatchDatabase( ClusterControllerData* cluster, Cluster iMaster = newMaster.get(); db->masterRegistrationCount = 0; + db->recoveryStalled = false; db->forceMasterFailure = Promise(); auto dbInfo = ServerDBInfo( LiteralStringRef("DB") ); @@ -1182,6 +1225,8 @@ ACTOR Future doCheckOutstandingMasterRequests( ClusterControllerData* self } void checkOutstandingMasterRequests( ClusterControllerData* self ) { + self->checkRecoveryStalled(); + if( !self->betterMasterExistsChecker.isReady() ) return; @@ -1465,6 +1510,7 @@ void clusterRegisterMaster( ClusterControllerData* self, RegisterMasterRequest c } db->masterRegistrationCount = req.registrationCount; + db->recoveryStalled = req.recoveryStalled; if ( req.configuration.present() ) { db->config = req.configuration.get(); @@ -1705,7 +1751,7 @@ ACTOR Future statusServer(FutureStream< StatusRequest> requests, } } - ErrorOr result = wait(errorOr(clusterGetStatus(self->db.serverInfo, self->cx, workers, self->db.workersWithIssues, self->db.clientsWithIssues, self->db.clientVersionMap, self->db.traceLogGroupMap, coordinators, incompatibleConnections))); + ErrorOr result = wait(errorOr(clusterGetStatus(self->db.serverInfo, self->cx, workers, self->db.workersWithIssues, self->db.clientsWithIssues, self->db.clientVersionMap, self->db.traceLogGroupMap, coordinators, incompatibleConnections, self->datacenterVersionDifference))); if (result.isError() && result.getError().code() == error_code_actor_cancelled) throw result.getError(); @@ -1893,6 +1939,65 @@ ACTOR Future updatedChangedDatacenters(ClusterControllerData *self) { } } +ACTOR Future updateDatacenterVersionDifference( ClusterControllerData *self ) { + loop { + self->versionDifferenceUpdated = false; + if(self->db.serverInfo->get().recoveryState >= RecoveryState::FULLY_RECOVERED && self->db.config.remoteTLogReplicationFactor == 0) { + self->versionDifferenceUpdated = true; + self->datacenterVersionDifference = 0; + Void _ = wait(self->db.serverInfo->onChange()); + continue; + } + + state Optional primaryLog; + state Optional remoteLog; + if(self->db.serverInfo->get().recoveryState == RecoveryState::REMOTE_RECOVERED) { + for(auto& logSet : self->db.serverInfo->get().logSystemConfig.tLogs) { + if(logSet.isLocal && logSet.locality != tagLocalitySatellite) { + for(auto& tLog : logSet.tLogs) { + if(tLog.present()) { + primaryLog = tLog.interf(); + break; + } + } + } + if(!logSet.isLocal) { + for(auto& tLog : logSet.tLogs) { + if(tLog.present()) { + remoteLog = tLog.interf(); + break; + } + } + } + } + } + + if(!primaryLog.present() || !remoteLog.present()) { + Void _ = wait(self->db.serverInfo->onChange()); + continue; + } + + state Future onChange = self->db.serverInfo->onChange(); + loop { + state Future primaryMetrics = primaryLog.get().getQueuingMetrics.getReply( TLogQueuingMetricsRequest() ); + state Future remoteMetrics = remoteLog.get().getQueuingMetrics.getReply( TLogQueuingMetricsRequest() ); + + Void _ = wait( ( success(primaryMetrics) && success(remoteMetrics) ) || onChange ); + if(onChange.isReady()) { + break; + } + + self->versionDifferenceUpdated = true; + self->datacenterVersionDifference = primaryMetrics.get().v - remoteMetrics.get().v; + + Void _ = wait( delay(SERVER_KNOBS->VERSION_LAG_METRIC_INTERVAL) || onChange ); + if(onChange.isReady()) { + break; + } + } + } +} + ACTOR Future clusterControllerCore( ClusterControllerFullInterface interf, Future leaderFail, ServerCoordinators coordinators ) { state ClusterControllerData self( interf ); state Future coordinationPingDelay = delay( SERVER_KNOBS->WORKER_COORDINATION_PING_DELAY ); @@ -1910,6 +2015,7 @@ ACTOR Future clusterControllerCore( ClusterControllerFullInterface interf, addActor.send( monitorClientTxnInfoConfigs(&self.db) ); addActor.send( updatedChangingDatacenters(&self) ); addActor.send( updatedChangedDatacenters(&self) ); + addActor.send( updateDatacenterVersionDifference(&self) ); //printf("%s: I am the cluster controller\n", g_network->getLocalAddress().toString().c_str()); loop choose { diff --git a/fdbserver/ClusterRecruitmentInterface.h b/fdbserver/ClusterRecruitmentInterface.h index d7b6200ef2..e07de7e444 100644 --- a/fdbserver/ClusterRecruitmentInterface.h +++ b/fdbserver/ClusterRecruitmentInterface.h @@ -202,6 +202,7 @@ struct RegisterMasterRequest { Optional configuration; vector priorCommittedLogServers; RecoveryState recoveryState; + bool recoveryStalled; ReplyPromise reply; @@ -210,7 +211,7 @@ struct RegisterMasterRequest { template void serialize( Ar& ar ) { ASSERT( ar.protocolVersion() >= 0x0FDB00A200040001LL ); - ar & dbName & id & mi & logSystemConfig & proxies & resolvers & recoveryCount & registrationCount & configuration & priorCommittedLogServers & recoveryState & reply; + ar & dbName & id & mi & logSystemConfig & proxies & resolvers & recoveryCount & registrationCount & configuration & priorCommittedLogServers & recoveryState & recoveryStalled & reply; } }; diff --git a/fdbserver/DBCoreState.h b/fdbserver/DBCoreState.h index 1ba0e240f1..8df078da35 100644 --- a/fdbserver/DBCoreState.h +++ b/fdbserver/DBCoreState.h @@ -41,20 +41,19 @@ struct CoreTLogSet { std::vector< LocalityData > tLogLocalities; // Stores the localities of the log servers IRepPolicyRef tLogPolicy; bool isLocal; - int32_t hasBestPolicy; int8_t locality; Version startVersion; - CoreTLogSet() : tLogWriteAntiQuorum(0), tLogReplicationFactor(0), isLocal(true), hasBestPolicy(HasBestPolicyId), locality(tagLocalityUpgraded), startVersion(invalidVersion) {} + CoreTLogSet() : tLogWriteAntiQuorum(0), tLogReplicationFactor(0), isLocal(true), locality(tagLocalityUpgraded), startVersion(invalidVersion) {} bool operator == (CoreTLogSet const& rhs) const { - return tLogs == rhs.tLogs && tLogWriteAntiQuorum == rhs.tLogWriteAntiQuorum && tLogReplicationFactor == rhs.tLogReplicationFactor && isLocal == rhs.isLocal && hasBestPolicy == rhs.hasBestPolicy && + return tLogs == rhs.tLogs && tLogWriteAntiQuorum == rhs.tLogWriteAntiQuorum && tLogReplicationFactor == rhs.tLogReplicationFactor && isLocal == rhs.isLocal && locality == rhs.locality && startVersion == rhs.startVersion && ((!tLogPolicy && !rhs.tLogPolicy) || (tLogPolicy && rhs.tLogPolicy && (tLogPolicy->info() == rhs.tLogPolicy->info()))); } template void serialize(Archive& ar) { - ar & tLogs & tLogWriteAntiQuorum & tLogReplicationFactor & tLogPolicy & tLogLocalities & isLocal & hasBestPolicy & locality & startVersion; + ar & tLogs & tLogWriteAntiQuorum & tLogReplicationFactor & tLogPolicy & tLogLocalities & isLocal & locality & startVersion; } }; @@ -110,7 +109,7 @@ struct DBCoreState { template void serialize(Archive& ar) { //FIXME: remove when we no longer need to test upgrades from 4.X releases - if(ar.protocolVersion() < 0x0FDB00A460010001LL) { + if(g_network->isSimulated() && ar.protocolVersion() < 0x0FDB00A460010001LL) { TraceEvent("ElapsedTime").detail("SimTime", now()).detail("RealTime", 0).detail("RandomUnseed", 0); flushAndExit(0); } diff --git a/fdbserver/Knobs.cpp b/fdbserver/Knobs.cpp index 281cf6047b..d1052c3d64 100644 --- a/fdbserver/Knobs.cpp +++ b/fdbserver/Knobs.cpp @@ -55,8 +55,8 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) { init( MAX_QUEUE_COMMIT_BYTES, 15e6 ); if( randomize && BUGGIFY ) MAX_QUEUE_COMMIT_BYTES = 5000; // Versions - init( MAX_VERSIONS_IN_FLIGHT, 100000000 ); - init( VERSIONS_PER_SECOND, 1000000 ); + init( VERSIONS_PER_SECOND, 1e6 ); + init( MAX_VERSIONS_IN_FLIGHT, 100 * VERSIONS_PER_SECOND ); init( MAX_READ_TRANSACTION_LIFE_VERSIONS, 5 * VERSIONS_PER_SECOND ); if (randomize && BUGGIFY) MAX_READ_TRANSACTION_LIFE_VERSIONS=std::max(1, 0.1 * VERSIONS_PER_SECOND); else if( randomize && BUGGIFY ) MAX_READ_TRANSACTION_LIFE_VERSIONS = 10 * VERSIONS_PER_SECOND; init( MAX_WRITE_TRANSACTION_LIFE_VERSIONS, 5 * VERSIONS_PER_SECOND ); if (randomize && BUGGIFY) MAX_WRITE_TRANSACTION_LIFE_VERSIONS=std::max(1, 1 * VERSIONS_PER_SECOND); init( MAX_COMMIT_BATCH_INTERVAL, 0.5 ); if( randomize && BUGGIFY ) MAX_COMMIT_BATCH_INTERVAL = 2.0; // Each master proxy generates a CommitTransactionBatchRequest at least this often, so that versions always advance smoothly @@ -256,6 +256,8 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) { init( MIN_BALANCE_DIFFERENCE, 10000 ); init( SECONDS_BEFORE_NO_FAILURE_DELAY, 8 * 3600 ); init( MAX_TXS_SEND_MEMORY, 1e7 ); if( randomize && BUGGIFY ) MAX_TXS_SEND_MEMORY = 1e5; + init( MAX_RECOVERY_VERSIONS, 200 * VERSIONS_PER_SECOND ); if( randomize && BUGGIFY ) MAX_RECOVERY_VERSIONS = VERSIONS_PER_SECOND; + init( MAX_RECOVERY_TIME, 20.0 ); if( randomize && BUGGIFY ) MAX_RECOVERY_TIME = 1.0; // Resolver init( SAMPLE_OFFSET_PER_KEY, 100 ); @@ -276,6 +278,9 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) { init( ATTEMPT_RECRUITMENT_DELAY, 0.035 ); init( WORKER_FAILURE_TIME, 1.0 ); if( randomize && BUGGIFY ) WORKER_FAILURE_TIME = 10.0; init( CHECK_BETTER_MASTER_INTERVAL, 1.0 ); if( randomize && BUGGIFY ) CHECK_BETTER_MASTER_INTERVAL = 0.001; + init( VERSION_LAG_METRIC_INTERVAL, 0.5 ); if( randomize && BUGGIFY ) VERSION_LAG_METRIC_INTERVAL = 10.0; + init( MAX_VERSION_DIFFERENCE, 20 * VERSIONS_PER_SECOND ); + init( INCOMPATIBLE_PEERS_LOGGING_INTERVAL, 600 ); if( randomize && BUGGIFY ) INCOMPATIBLE_PEERS_LOGGING_INTERVAL = 60.0; init( EXPECTED_MASTER_FITNESS, ProcessClass::GoodFit ); init( EXPECTED_TLOG_FITNESS, ProcessClass::GoodFit ); diff --git a/fdbserver/Knobs.h b/fdbserver/Knobs.h index 0c478e6eee..92f222bb2a 100644 --- a/fdbserver/Knobs.h +++ b/fdbserver/Knobs.h @@ -60,11 +60,10 @@ public: int64_t MAX_QUEUE_COMMIT_BYTES; // Versions + int VERSIONS_PER_SECOND; int MAX_VERSIONS_IN_FLIGHT; int MAX_READ_TRANSACTION_LIFE_VERSIONS; int MAX_WRITE_TRANSACTION_LIFE_VERSIONS; - - int VERSIONS_PER_SECOND; double MAX_COMMIT_BATCH_INTERVAL; // Each master proxy generates a CommitTransactionBatchRequest at least this often, so that versions always advance smoothly // Data distribution queue @@ -199,6 +198,8 @@ public: int64_t MIN_BALANCE_DIFFERENCE; double SECONDS_BEFORE_NO_FAILURE_DELAY; int64_t MAX_TXS_SEND_MEMORY; + int64_t MAX_RECOVERY_VERSIONS; + double MAX_RECOVERY_TIME; // Resolver int64_t SAMPLE_OFFSET_PER_KEY; @@ -219,6 +220,8 @@ public: double WORKER_FAILURE_TIME; double CHECK_BETTER_MASTER_INTERVAL; double INCOMPATIBLE_PEERS_LOGGING_INTERVAL; + double VERSION_LAG_METRIC_INTERVAL; + int64_t MAX_VERSION_DIFFERENCE; // Knobs used to select the best policy (via monte carlo) int POLICY_RATING_TESTS; // number of tests per policy (in order to compare) diff --git a/fdbserver/LogRouter.actor.cpp b/fdbserver/LogRouter.actor.cpp index 8b5f88e246..6f33c9f9f6 100644 --- a/fdbserver/LogRouter.actor.cpp +++ b/fdbserver/LogRouter.actor.cpp @@ -105,7 +105,6 @@ struct LogRouterData { //setup just enough of a logSet to be able to call getPushLocations logSet.logServers.resize(req.tLogLocalities.size()); logSet.tLogPolicy = req.tLogPolicy; - logSet.hasBestPolicy = req.hasBestPolicy; logSet.locality = req.locality; logSet.updateLocalitySet(req.tLogLocalities); @@ -413,7 +412,7 @@ ACTOR Future logRouter( Reference> db) { try { - TraceEvent("LogRouterStart", interf.id()).detail("Start", req.startVersion).detail("Tag", req.routerTag.toString()).detail("Localities", req.tLogLocalities.size()).detail("HasBestPolicy", req.hasBestPolicy).detail("Locality", req.locality); + TraceEvent("LogRouterStart", interf.id()).detail("Start", req.startVersion).detail("Tag", req.routerTag.toString()).detail("Localities", req.tLogLocalities.size()).detail("Locality", req.locality); state Future core = logRouterCore(interf, req, db); loop choose{ when(Void _ = wait(core)) { return Void(); } diff --git a/fdbserver/LogSystem.h b/fdbserver/LogSystem.h index 3011a43d2d..c11d042659 100644 --- a/fdbserver/LogSystem.h +++ b/fdbserver/LogSystem.h @@ -44,12 +44,11 @@ public: std::vector logIndexArray; std::map logEntryMap; bool isLocal; - int32_t hasBestPolicy; int8_t locality; Version startVersion; std::vector> replies; - LogSet() : tLogWriteAntiQuorum(0), tLogReplicationFactor(0), isLocal(true), hasBestPolicy(HasBestPolicyId), locality(tagLocalityInvalid), startVersion(invalidVersion) {} + LogSet() : tLogWriteAntiQuorum(0), tLogReplicationFactor(0), isLocal(true), locality(tagLocalityInvalid), startVersion(invalidVersion) {} std::string logRouterString() { std::string result; @@ -74,17 +73,8 @@ public: } int bestLocationFor( Tag tag ) { - if(hasBestPolicy == HasBestPolicyNone) { - return -1; - } else if(hasBestPolicy == HasBestPolicyId) { - //This policy supports upgrades from 5.X - if(tag == txsTag) return txsTagOld % logServers.size(); - return tag.id % logServers.size(); - } else { - //Unsupported policy - ASSERT(false); - throw internal_error(); - } + if(tag == txsTag) return txsTagOld % logServers.size(); + return tag.id % logServers.size(); } void updateLocalitySet() { @@ -127,11 +117,9 @@ public: alsoServers.clear(); resultEntries.clear(); - if(hasBestPolicy) { - for(auto& t : tags) { - if(t.locality == locality || t.locality == tagLocalitySpecial || locality == tagLocalitySpecial || (isLocal && t.locality == tagLocalityLogRouter)) { - newLocations.push_back(bestLocationFor(t)); - } + for(auto& t : tags) { + if(locality == tagLocalitySpecial || t.locality == locality || t.locality < 0) { + newLocations.push_back(bestLocationFor(t)); } } @@ -490,7 +478,8 @@ struct ILogSystem { // Call only on an ILogSystem obtained from recoverAndEndEpoch() // Returns the first unreadable version number of the recovered epoch (i.e. message version numbers < (get_end(), 0) will be readable) - virtual Future> newEpoch( struct RecruitFromConfigurationReply const& recr, Future const& fRemoteWorkers, DatabaseConfiguration const& config, LogEpoch recoveryCount, int8_t primaryLocality, int8_t remoteLocality, std::vector const& allTags ) = 0; + virtual Future> newEpoch( struct RecruitFromConfigurationReply const& recr, Future const& fRemoteWorkers, DatabaseConfiguration const& config, + LogEpoch recoveryCount, int8_t primaryLocality, int8_t remoteLocality, std::vector const& allTags, Reference> const& recruitmentStalled ) = 0; // Call only on an ILogSystem obtained from recoverAndEndEpoch() // Returns an ILogSystem representing a new epoch immediately following this one. The new epoch is only provisional until the caller updates the coordinated DBCoreState diff --git a/fdbserver/LogSystemConfig.h b/fdbserver/LogSystemConfig.h index 82ff90d64f..0caea9d362 100644 --- a/fdbserver/LogSystemConfig.h +++ b/fdbserver/LogSystemConfig.h @@ -55,8 +55,6 @@ protected: Optional iface; }; -enum { HasBestPolicyNone = 0, HasBestPolicyId = 1 }; - struct TLogSet { std::vector> tLogs; std::vector> logRouters; @@ -64,18 +62,17 @@ struct TLogSet { std::vector< LocalityData > tLogLocalities; // Stores the localities of the log servers IRepPolicyRef tLogPolicy; bool isLocal; - int32_t hasBestPolicy; int8_t locality; Version startVersion; - TLogSet() : tLogWriteAntiQuorum(0), tLogReplicationFactor(0), isLocal(true), hasBestPolicy(HasBestPolicyId), locality(tagLocalityInvalid), startVersion(invalidVersion) {} + TLogSet() : tLogWriteAntiQuorum(0), tLogReplicationFactor(0), isLocal(true), locality(tagLocalityInvalid), startVersion(invalidVersion) {} std::string toString() const { - return format("anti: %d replication: %d local: %d best: %d routers: %d tLogs: %s locality: %d", tLogWriteAntiQuorum, tLogReplicationFactor, isLocal, hasBestPolicy, logRouters.size(), describe(tLogs).c_str(), locality); + return format("anti: %d replication: %d local: %d routers: %d tLogs: %s locality: %d", tLogWriteAntiQuorum, tLogReplicationFactor, isLocal, logRouters.size(), describe(tLogs).c_str(), locality); } bool operator == ( const TLogSet& rhs ) const { - if (tLogWriteAntiQuorum != rhs.tLogWriteAntiQuorum || tLogReplicationFactor != rhs.tLogReplicationFactor || isLocal != rhs.isLocal || hasBestPolicy != rhs.hasBestPolicy || + if (tLogWriteAntiQuorum != rhs.tLogWriteAntiQuorum || tLogReplicationFactor != rhs.tLogReplicationFactor || isLocal != rhs.isLocal || startVersion != rhs.startVersion || tLogs.size() != rhs.tLogs.size() || locality != rhs.locality || logRouters.size() != rhs.logRouters.size()) { return false; } @@ -96,7 +93,7 @@ struct TLogSet { } bool isEqualIds(TLogSet const& r) const { - if (tLogWriteAntiQuorum != r.tLogWriteAntiQuorum || tLogReplicationFactor != r.tLogReplicationFactor || isLocal != r.isLocal || hasBestPolicy != r.hasBestPolicy || startVersion != r.startVersion || tLogs.size() != r.tLogs.size() || locality != r.locality) { + if (tLogWriteAntiQuorum != r.tLogWriteAntiQuorum || tLogReplicationFactor != r.tLogReplicationFactor || isLocal != r.isLocal || startVersion != r.startVersion || tLogs.size() != r.tLogs.size() || locality != r.locality) { return false; } if ((tLogPolicy && !r.tLogPolicy) || (!tLogPolicy && r.tLogPolicy) || (tLogPolicy && (tLogPolicy->info() != r.tLogPolicy->info()))) { @@ -112,7 +109,7 @@ struct TLogSet { template void serialize( Ar& ar ) { - ar & tLogs & logRouters & tLogWriteAntiQuorum & tLogReplicationFactor & tLogPolicy & tLogLocalities & isLocal & hasBestPolicy & locality & startVersion; + ar & tLogs & logRouters & tLogWriteAntiQuorum & tLogReplicationFactor & tLogPolicy & tLogLocalities & isLocal & locality & startVersion; } }; diff --git a/fdbserver/SimulatedCluster.actor.cpp b/fdbserver/SimulatedCluster.actor.cpp index 95102554c1..f311cc43de 100644 --- a/fdbserver/SimulatedCluster.actor.cpp +++ b/fdbserver/SimulatedCluster.actor.cpp @@ -762,28 +762,32 @@ void SimulationConfig::generateNormalConfig(int minimumReplication) { if(generateFearless || (datacenters == 2 && g_random->random01() < 0.5)) { StatusObject primaryObj; - primaryObj["id"] = "0"; - primaryObj["priority"] = 0; + StatusObject primaryDcObj; + primaryDcObj["id"] = "0"; + primaryDcObj["priority"] = 2; + StatusArray primaryDcArr; + primaryDcArr.push_back(primaryDcObj); StatusObject remoteObj; - remoteObj["id"] = "1"; - remoteObj["priority"] = 1; + StatusObject remoteDcObj; + remoteDcObj["id"] = "1"; + remoteDcObj["priority"] = 1; + StatusArray remoteDcArr; + remoteDcArr.push_back(remoteDcObj); bool needsRemote = generateFearless; if(generateFearless) { StatusObject primarySatelliteObj; primarySatelliteObj["id"] = "2"; primarySatelliteObj["priority"] = 1; - StatusArray primarySatellitesArr; - primarySatellitesArr.push_back(primarySatelliteObj); - primaryObj["satellites"] = primarySatellitesArr; + primarySatelliteObj["satellite"] = 1; + primaryDcArr.push_back(primarySatelliteObj); StatusObject remoteSatelliteObj; remoteSatelliteObj["id"] = "3"; remoteSatelliteObj["priority"] = 1; - StatusArray remoteSatellitesArr; - remoteSatellitesArr.push_back(remoteSatelliteObj); - remoteObj["satellites"] = remoteSatellitesArr; + remoteSatelliteObj["satellite"] = 1; + remoteDcArr.push_back(remoteSatelliteObj); int satellite_replication_type = g_random->randomInt(0,5); switch (satellite_replication_type) { @@ -823,7 +827,7 @@ void SimulationConfig::generateNormalConfig(int minimumReplication) { primaryObj["satellite_logs"] = logs; remoteObj["satellite_logs"] = logs; } - + int remote_replication_type = g_random->randomInt(0,5); switch (remote_replication_type) { case 0: { @@ -858,6 +862,9 @@ void SimulationConfig::generateNormalConfig(int minimumReplication) { if (g_random->random01() < 0.25) db.remoteDesiredTLogCount = g_random->randomInt(1,7); } + primaryObj["datacenters"] = primaryDcArr; + remoteObj["datacenters"] = remoteDcArr; + StatusArray regionArr; regionArr.push_back(primaryObj); if(needsRemote || g_random->random01() < 0.5) { @@ -866,8 +873,8 @@ void SimulationConfig::generateNormalConfig(int minimumReplication) { set_config("regions=" + json_spirit::write_string(json_spirit::mValue(regionArr), json_spirit::Output_options::none)); } - - if(generateFearless && minimumReplication > 1) { + + if(generateFearless && minimumReplication > 1) { //low latency tests in fearless configurations need 4 machines per datacenter (3 for triple replication, 1 that is down during failures). machine_count = 16; } else if(generateFearless) { diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index 6c5188b9f5..1b2387789a 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -1068,8 +1068,9 @@ ACTOR static Future latencyProbeFetcher(Database cx, StatusArray * return statusObj; } -ACTOR static Future> loadConfiguration(Database cx, StatusArray *messages, std::set *status_incomplete_reasons){ +ACTOR static Future,Optional>> loadConfiguration(Database cx, StatusArray *messages, std::set *status_incomplete_reasons){ state Optional result; + state Optional fullReplication; state Transaction tr(cx); state Future getConfTimeout = delay(5.0); @@ -1090,7 +1091,34 @@ ACTOR static Future> loadConfiguration(Database result = configuration; } when(Void _ = wait(getConfTimeout)) { - messages->push_back(makeMessage("unreadable_configuration", "Unable to read database configuration.")); + if(!result.present()) { + messages->push_back(makeMessage("unreadable_configuration", "Unable to read database configuration.")); + } else { + messages->push_back(makeMessage("full_replication_timeout", "Unable to read datacenter replicas.")); + } + break; + } + } + + ASSERT(result.present()); + state std::vector>> replicasFutures; + for(auto& region : result.get().regions) { + replicasFutures.push_back(tr.get(datacenterReplicasKeyFor(region.dcId))); + } + + choose { + when( Void _ = wait( waitForAll(replicasFutures) ) ) { + int unreplicated = 0; + for(int i = 0; i < result.get().regions.size(); i++) { + if( !replicasFutures[i].get().present() || decodeDatacenterReplicasValue(replicasFutures[i].get().get()) < result.get().storageTeamSize ) { + unreplicated++; + } + } + + fullReplication = (!unreplicated || (result.get().remoteTLogReplicationFactor == 0 && unreplicated < result.get().regions.size())); + } + when(Void _ = wait(getConfTimeout)) { + messages->push_back(makeMessage("full_replication_timeout", "Unable to read datacenter replicas.")); } } break; @@ -1099,7 +1127,7 @@ ACTOR static Future> loadConfiguration(Database Void _ = wait(tr.onError(e)); } } - return result; + return std::make_pair(result, fullReplication); } static StatusObject configurationFetcher(Optional conf, ServerCoordinators coordinators, std::set *incomplete_reasons) { @@ -1283,17 +1311,35 @@ ACTOR static Future>> getTLogsAndMe return results; } -static std::set getTLogEligibleMachines(vector> workers, DatabaseConfiguration configuration) { - std::set tlogEligibleMachines; +static int getExtraTLogEligibleMachines(vector> workers, DatabaseConfiguration configuration) { + std::set allMachines; + std::map> dcId_machine; for(auto worker : workers) { if(worker.second.machineClassFitness(ProcessClass::TLog) < ProcessClass::NeverAssign && !configuration.isExcludedServer(worker.first.address())) { - tlogEligibleMachines.insert(worker.first.locality.zoneId().get()); + allMachines.insert(worker.first.locality.zoneId().get()); + if(worker.first.locality.dcId().present()) { + dcId_machine[worker.first.locality.dcId().get()].insert(worker.first.locality.zoneId().get()); + } } } - return tlogEligibleMachines; + if(configuration.regions.size() == 0) { + return allMachines.size() - std::max( configuration.remoteTLogReplicationFactor, std::max(configuration.tLogReplicationFactor, configuration.storageTeamSize) ); + } + int extraTlogEligibleMachines = std::numeric_limits::max(); + for(auto& region : configuration.regions) { + extraTlogEligibleMachines = std::min( extraTlogEligibleMachines, dcId_machine[region.dcId].size() - std::max( configuration.remoteTLogReplicationFactor, std::max(configuration.tLogReplicationFactor, configuration.storageTeamSize) ) ); + if(region.satelliteTLogReplicationFactor > 0) { + int totalSatelliteEligible = 0; + for(auto& sat : region.satellites) { + totalSatelliteEligible += dcId_machine[sat.dcId].size(); + } + extraTlogEligibleMachines = std::min( extraTlogEligibleMachines, totalSatelliteEligible - region.satelliteTLogReplicationFactor ); + } + } + return extraTlogEligibleMachines; } ACTOR static Future workloadStatusFetcher(Reference> db, vector> workers, std::pair mWorker, @@ -1465,12 +1511,20 @@ static StatusArray oldTlogFetcher(int* oldLogFaultTolerance, Reference>& workers, int numTLogEligibleMachines, int minReplicasRemaining) { */ -static StatusObject faultToleranceStatusFetcher(DatabaseConfiguration configuration, ServerCoordinators coordinators, std::vector>& workers, int numTLogEligibleMachines, int minReplicasRemaining) { +static StatusObject faultToleranceStatusFetcher(DatabaseConfiguration configuration, ServerCoordinators coordinators, std::vector>& workers, int extraTlogEligibleMachines, int minReplicasRemaining) { StatusObject statusObj; // without losing data @@ -1528,7 +1582,7 @@ static StatusObject faultToleranceStatusFetcher(DatabaseConfiguration configurat statusObj["max_machine_failures_without_losing_data"] = std::max(machineFailuresWithoutLosingData, 0); // without losing availablity - statusObj["max_machine_failures_without_losing_availability"] = std::max(std::min(numTLogEligibleMachines - configuration.minMachinesRequiredPerDatacenter(), machineFailuresWithoutLosingData), 0); + statusObj["max_machine_failures_without_losing_availability"] = std::max(std::min(extraTlogEligibleMachines, machineFailuresWithoutLosingData), 0); return statusObj; } @@ -1696,7 +1750,8 @@ ACTOR Future clusterGetStatus( ClientVersionMap clientVersionMap, std::map traceLogGroupMap, ServerCoordinators coordinators, - std::vector incompatibleConnections ) + std::vector incompatibleConnections, + Version datacenterVersionDifference ) { // since we no longer offer multi-database support, all databases must be named DB state std::string dbName = "DB"; @@ -1776,11 +1831,17 @@ ACTOR Future clusterGetStatus( statusObj["protocol_version"] = format("%llx", currentProtocolVersion); - state Optional configuration = Optional(); + state Optional configuration; + state Optional fullReplication; if(!(recoveryStateStatus.count("name") && recoveryStateStatus["name"] == RecoveryStatus::names[RecoveryStatus::configuration_missing])) { - Optional _configuration = wait(loadConfiguration(cx, &messages, &status_incomplete_reasons)); - configuration = _configuration; + std::pair,Optional> loadResults = wait(loadConfiguration(cx, &messages, &status_incomplete_reasons)); + configuration = loadResults.first; + fullReplication = loadResults.second; + } + + if(fullReplication.present()) { + statusObj["full_replication"] = fullReplication.get(); } statusObj["machines"] = machineStatusFetcher(mMetrics, workers, configuration, &status_incomplete_reasons); @@ -1817,8 +1878,8 @@ ACTOR Future clusterGetStatus( } if(configuration.present()) { - std::set tlogEligibleMachines = getTLogEligibleMachines(workers, configuration.get()); - statusObj["fault_tolerance"] = faultToleranceStatusFetcher(configuration.get(), coordinators, workers, tlogEligibleMachines.size(), minReplicasRemaining); + int extraTlogEligibleMachines = getExtraTLogEligibleMachines(workers, configuration.get()); + statusObj["fault_tolerance"] = faultToleranceStatusFetcher(configuration.get(), coordinators, workers, extraTlogEligibleMachines, minReplicasRemaining); } StatusObject configObj = configurationFetcher(configuration, coordinators, &status_incomplete_reasons); @@ -1880,6 +1941,7 @@ ACTOR Future clusterGetStatus( incompatibleConnectionsArray.push_back(it.toString()); } statusObj["incompatible_connections"] = incompatibleConnectionsArray; + statusObj["datacenter_version_difference"] = datacenterVersionDifference; if (!recoveryStateStatus.empty()) statusObj["recovery_state"] = recoveryStateStatus; diff --git a/fdbserver/Status.h b/fdbserver/Status.h index 99e8ed38b5..8d1e8a3bd3 100644 --- a/fdbserver/Status.h +++ b/fdbserver/Status.h @@ -32,6 +32,7 @@ typedef std::map< NetworkAddress, Standalone> > Clie std::string extractAttribute( std::string const& expanded, std::string const& attributeToExtract ); Future clusterGetStatus( Reference> const& db, Database const& cx, vector> const& workers, - ProcessIssuesMap const& workerIssues, ProcessIssuesMap const& clientIssues, ClientVersionMap const& clientVersionMap, std::map const& traceLogGroupMap, ServerCoordinators const& coordinators, std::vector const& incompatibleConnections ); + ProcessIssuesMap const& workerIssues, ProcessIssuesMap const& clientIssues, ClientVersionMap const& clientVersionMap, std::map const& traceLogGroupMap, + ServerCoordinators const& coordinators, std::vector const& incompatibleConnections, Version const& datacenterVersionDifference ); #endif diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index 3018f9f7ba..3d62b0cfdb 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -1621,8 +1621,10 @@ ACTOR Future restorePersistentState( TLogData* self, LocalityData locality if (fFormat.get().present() && !persistFormatReadableRange.contains( fFormat.get().get() )) { //FIXME: remove when we no longer need to test upgrades from 4.X releases - TraceEvent("ElapsedTime").detail("SimTime", now()).detail("RealTime", 0).detail("RandomUnseed", 0); - flushAndExit(0); + if(g_network->isSimulated()) { + TraceEvent("ElapsedTime").detail("SimTime", now()).detail("RealTime", 0).detail("RandomUnseed", 0); + flushAndExit(0); + } TraceEvent(SevError, "UnsupportedDBFormat", self->dbgid).detail("Format", printable(fFormat.get().get())).detail("Expected", persistFormat.value.toString()); throw worker_recovery_failed(); @@ -1917,7 +1919,7 @@ ACTOR Future tLogStart( TLogData* self, InitializeTLogRequest req, Localit self->newLogData.trigger(); if(req.isPrimary && !logData->stopped && logData->unrecoveredBefore <= req.recoverAt) { - if(req.recoverFrom.logRouterTags > 0 && req.locality != tagLocalityInvalid) { + if(req.recoverFrom.logRouterTags > 0 && req.locality != tagLocalitySatellite) { logData->logRouterPopToVersion = req.recoverAt; std::vector tags; tags.push_back(logData->remoteTag); diff --git a/fdbserver/TagPartitionedLogSystem.actor.cpp b/fdbserver/TagPartitionedLogSystem.actor.cpp index ce63ba5768..c6c39748c6 100644 --- a/fdbserver/TagPartitionedLogSystem.actor.cpp +++ b/fdbserver/TagPartitionedLogSystem.actor.cpp @@ -150,7 +150,6 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedtLogPolicy = tLogSet.tLogPolicy; logSet->tLogLocalities = tLogSet.tLogLocalities; logSet->isLocal = tLogSet.isLocal; - logSet->hasBestPolicy = tLogSet.hasBestPolicy; logSet->locality = tLogSet.locality; logSet->startVersion = tLogSet.startVersion; logSet->updateLocalitySet(); @@ -176,7 +175,6 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedtLogPolicy = tLogData.tLogPolicy; logSet->tLogLocalities = tLogData.tLogLocalities; logSet->isLocal = tLogData.isLocal; - logSet->hasBestPolicy = tLogData.hasBestPolicy; logSet->locality = tLogData.locality; logSet->startVersion = tLogData.startVersion; //logSet.UpdateLocalitySet(); we do not update the locality set, since we never push to old logs @@ -211,7 +209,6 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedtLogPolicy = tLogSet.tLogPolicy; logSet->tLogLocalities = tLogSet.tLogLocalities; logSet->isLocal = tLogSet.isLocal; - logSet->hasBestPolicy = tLogSet.hasBestPolicy; logSet->locality = tLogSet.locality; logSet->startVersion = tLogSet.startVersion; //logSet->updateLocalitySet(); we do not update the locality set, since we never push to old logs @@ -237,7 +234,6 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedtLogPolicy = tLogSet.tLogPolicy; logSet->tLogLocalities = tLogSet.tLogLocalities; logSet->isLocal = tLogSet.isLocal; - logSet->hasBestPolicy = tLogSet.hasBestPolicy; logSet->locality = tLogSet.locality; logSet->startVersion = tLogSet.startVersion; //logSet->updateLocalitySet(); we do not update the locality set, since we never push to old logs @@ -272,7 +268,6 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedtLogReplicationFactor; coreSet.tLogPolicy = t->tLogPolicy; coreSet.isLocal = t->isLocal; - coreSet.hasBestPolicy = t->hasBestPolicy; coreSet.locality = t->locality; coreSet.startVersion = t->startVersion; newState.tLogs.push_back(coreSet); @@ -294,7 +289,6 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedtLogReplicationFactor; coreSet.tLogPolicy = t->tLogPolicy; coreSet.isLocal = t->isLocal; - coreSet.hasBestPolicy = t->hasBestPolicy; coreSet.locality = t->locality; coreSet.startVersion = t->startVersion; newState.oldTLogData[i].tLogs.push_back(coreSet); @@ -414,17 +408,15 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted peekAll( UID dbgid, Version begin, Version end, Tag tag, bool parallelGetMore, bool throwIfDead ) { - int bestSet = -1; - int nextBestSet = -1; + int bestSet = 0; std::vector> localSets; Version lastBegin = 0; for(auto& log : tLogs) { - if(log->isLocal && log->logServers.size() && (log->locality == tag.locality || tag.locality == tagLocalitySpecial || log->locality == tagLocalitySpecial || log->locality == tagLocalityUpgraded || tag.locality == tagLocalityLogRouter)) { + if(log->isLocal && log->logServers.size() && (log->locality == tagLocalitySpecial || log->locality == tagLocalityUpgraded || log->locality == tag.locality || tag.locality < 0)) { lastBegin = std::max(lastBegin, log->startVersion); localSets.push_back(log); - if(log->hasBestPolicy) { + if(log->locality != tagLocalitySatellite) { bestSet = localSets.size()-1; - nextBestSet = bestSet; } } } @@ -434,17 +426,15 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted= lastBegin) { - TraceEvent("TLogPeekAllCurrentOnly", dbgid).detail("Tag", tag.toString()).detail("Begin", begin).detail("End", end).detail("BestLogs", bestSet >= 0 ? localSets[bestSet]->logServerString() : "no best set"); - return Reference( new ILogSystem::SetPeekCursor( localSets, bestSet == -1 ? nextBestSet : bestSet, - bestSet >= 0 ? localSets[bestSet]->bestLocationFor( tag ) : -1, tag, begin, end, parallelGetMore ) ); + TraceEvent("TLogPeekAllCurrentOnly", dbgid).detail("Tag", tag.toString()).detail("Begin", begin).detail("End", end).detail("BestLogs", localSets[bestSet]->logServerString()); + return Reference( new ILogSystem::SetPeekCursor( localSets, bestSet, localSets[bestSet]->bestLocationFor( tag ), tag, begin, end, parallelGetMore ) ); } else { std::vector< Reference > cursors; std::vector< LogMessageVersion > epochEnds; if(lastBegin < end) { - TraceEvent("TLogPeekAllAddingCurrent", dbgid).detail("Tag", tag.toString()).detail("Begin", begin).detail("End", end).detail("BestLogs", bestSet >= 0 ? localSets[bestSet]->logServerString() : "no best set"); - cursors.push_back( Reference( new ILogSystem::SetPeekCursor( localSets, bestSet == -1 ? nextBestSet : bestSet, - bestSet >= 0 ? localSets[bestSet]->bestLocationFor( tag ) : -1, tag, lastBegin, end, parallelGetMore)) ); + TraceEvent("TLogPeekAllAddingCurrent", dbgid).detail("Tag", tag.toString()).detail("Begin", begin).detail("End", end).detail("BestLogs", localSets[bestSet]->logServerString()); + cursors.push_back( Reference( new ILogSystem::SetPeekCursor( localSets, bestSet, localSets[bestSet]->bestLocationFor( tag ), tag, lastBegin, end, parallelGetMore)) ); } int i = 0; while(begin < lastBegin) { @@ -460,17 +450,15 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted> localOldSets; Version thisBegin = begin; for(auto& log : oldLogData[i].tLogs) { - if(log->isLocal && log->logServers.size() && (log->locality == tag.locality || tag.locality == tagLocalitySpecial || log->locality == tagLocalitySpecial || log->locality == tagLocalityUpgraded || tag.locality == tagLocalityLogRouter)) { + if(log->isLocal && log->logServers.size() && (log->locality == tagLocalitySpecial || log->locality == tagLocalityUpgraded || log->locality == tag.locality || tag.locality < 0)) { thisBegin = std::max(thisBegin, log->startVersion); localOldSets.push_back(log); - if(log->hasBestPolicy) { + if(log->locality != tagLocalitySatellite) { bestOldSet = localOldSets.size()-1; - nextBestOldSet = bestOldSet; } } } @@ -486,10 +474,8 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted= 0 ? localOldSets[bestOldSet]->logServerString() : "no best set").detail("LastBegin", lastBegin).detail("ThisBegin", thisBegin); - cursors.push_back( Reference( new ILogSystem::SetPeekCursor( localOldSets, bestOldSet == -1 ? nextBestOldSet : bestOldSet, - bestOldSet >= 0 ? localOldSets[bestOldSet]->bestLocationFor( tag ) : -1, tag, thisBegin, std::min(lastBegin, end), parallelGetMore)) ); + TraceEvent("TLogPeekAllAddingOld", dbgid).detail("Tag", tag.toString()).detail("Begin", begin).detail("End", end).detail("BestLogs", localOldSets[bestOldSet]->logServerString()).detail("LastBegin", lastBegin).detail("ThisBegin", thisBegin); + cursors.push_back( Reference( new ILogSystem::SetPeekCursor( localOldSets, bestOldSet, localOldSets[bestOldSet]->bestLocationFor( tag ), tag, thisBegin, std::min(lastBegin, end), parallelGetMore)) ); epochEnds.push_back(LogMessageVersion(std::min(lastBegin, end))); } lastBegin = thisBegin; @@ -596,11 +582,13 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted peekLocal( UID dbgid, Tag tag, Version begin, Version end ) { + ASSERT(tag.locality >= 0 || tag.locality == tagLocalityUpgraded); + int bestSet = -1; bool foundSpecial = false; for(int t = 0; t < tLogs.size(); t++) { - if(tLogs[t]->logServers.size() && tLogs[t]->hasBestPolicy && (tLogs[t]->locality == tag.locality || tag.locality == tagLocalitySpecial || tLogs[t]->locality == tagLocalitySpecial || tLogs[t]->locality == tagLocalityUpgraded || (tLogs[t]->isLocal && tag.locality == tagLocalityLogRouter))) { - if( tLogs[t]->locality == tagLocalitySpecial ) { + if(tLogs[t]->logServers.size() && (tLogs[t]->locality == tagLocalitySpecial || tLogs[t]->locality == tagLocalityUpgraded || tLogs[t]->locality == tag.locality)) { + if( tLogs[t]->locality == tagLocalitySpecial || tLogs[t]->locality == tagLocalityUpgraded ) { foundSpecial = true; } bestSet = t; @@ -633,8 +621,8 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedlogServers.size() && oldLogData[i].tLogs[t]->hasBestPolicy && (oldLogData[i].tLogs[t]->locality == tag.locality || tag.locality == tagLocalitySpecial || oldLogData[i].tLogs[t]->locality == tagLocalitySpecial || oldLogData[i].tLogs[t]->locality == tagLocalityUpgraded || (oldLogData[i].tLogs[t]->isLocal && tag.locality == tagLocalityLogRouter))) { - if( oldLogData[i].tLogs[t]->locality == tagLocalitySpecial ) { + if(oldLogData[i].tLogs[t]->logServers.size() && (oldLogData[i].tLogs[t]->locality == tagLocalitySpecial || oldLogData[i].tLogs[t]->locality == tagLocalityUpgraded || oldLogData[i].tLogs[t]->locality == tag.locality)) { + if( oldLogData[i].tLogs[t]->locality == tagLocalitySpecial || oldLogData[i].tLogs[t]->locality == tagLocalityUpgraded ) { foundSpecial = true; } bestOldSet = t; @@ -709,27 +697,24 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted> localSets; + int bestSet = 0; for(auto& log : tLogs) { if(log->isLocal && log->logServers.size()) { TraceEvent("TLogPeekLogRouterLocalSet", dbgid).detail("Tag", tag.toString()).detail("Begin", begin).detail("LogServers", log->logServerString()); localSets.push_back(log); - if(log->hasBestPolicy) { - bestSet = localSets.size()-1; - nextBestSet = bestSet; + if(log->locality != tagLocalitySatellite) { + bestSet = localSets.size() - 1; } } } TraceEvent("TLogPeekLogRouterSets", dbgid).detail("Tag", tag.toString()).detail("Begin", begin); //FIXME: do this merge on one of the logs in the other data center to avoid sending multiple copies across the WAN - return Reference( new ILogSystem::SetPeekCursor( localSets, bestSet == -1 ? nextBestSet : bestSet, - bestSet >= 0 ? localSets[bestSet]->bestLocationFor( tag ) : -1, tag, begin, getPeekEnd(), false ) ); + return Reference( new ILogSystem::SetPeekCursor( localSets, bestSet, localSets[bestSet]->bestLocationFor( tag ), tag, begin, getPeekEnd(), false ) ); } else { for( auto& log : tLogs ) { - if( log->logServers.size() && log->isLocal && log->hasBestPolicy ) { + if(log->logServers.size() && log->isLocal && log->locality != tagLocalitySatellite) { TraceEvent("TLogPeekLogRouterBestOnly", dbgid).detail("Tag", tag.toString()).detail("Begin", begin).detail("LogId", log->logServers[log->bestLocationFor( tag )]->get().id()); return Reference( new ILogSystem::ServerPeekCursor( log->logServers[log->bestLocationFor( tag )], tag, begin, getPeekEnd(), false, false ) ); } @@ -751,24 +736,21 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted> localSets; for(auto& log : old.tLogs) { if(log->isLocal && log->logServers.size()) { TraceEvent("TLogPeekLogRouterOldLocalSet", dbgid).detail("Tag", tag.toString()).detail("Begin", begin).detail("LogServers", log->logServerString()); localSets.push_back(log); - if(log->hasBestPolicy) { + if(log->locality != tagLocalitySatellite) { bestSet = localSets.size()-1; - nextBestSet = bestSet; } } } TraceEvent("TLogPeekLogRouterOldSets", dbgid).detail("Tag", tag.toString()).detail("Begin", begin).detail("OldEpoch", old.epochEnd).detail("PreviousEpochEndVersion", previousEpochEndVersion.present() ? previousEpochEndVersion.get() : -1).detail("FirstOld", firstOld); //FIXME: do this merge on one of the logs in the other data center to avoid sending multiple copies across the WAN - return Reference( new ILogSystem::SetPeekCursor( localSets, bestSet == -1 ? nextBestSet : bestSet, - bestSet >= 0 ? localSets[bestSet]->bestLocationFor( tag ) : -1, tag, begin, firstOld && previousEpochEndVersion.present() ? previousEpochEndVersion.get() + 1 : old.epochEnd, false ) ); + return Reference( new ILogSystem::SetPeekCursor( localSets, bestSet, localSets[bestSet]->bestLocationFor( tag ), tag, begin, firstOld && previousEpochEndVersion.present() ? previousEpochEndVersion.get() + 1 : old.epochEnd, false ) ); } firstOld = false; } @@ -812,12 +794,14 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedlogServers) { - Version prev = outstandingPops[std::make_pair(log->get().id(),tag)].first; - if (prev < upTo) - outstandingPops[std::make_pair(log->get().id(),tag)] = std::make_pair(upTo, knownCommittedVersion); - if (prev == 0) - actors.add( popFromLog( this, log, tag, 1.0 ) ); //< FIXME: knob + if(t->locality == tagLocalitySpecial || t->locality == tag.locality || tag.locality < 0) { + for(auto& log : t->logServers) { + Version prev = outstandingPops[std::make_pair(log->get().id(),tag)].first; + if (prev < upTo) + outstandingPops[std::make_pair(log->get().id(),tag)] = std::make_pair(upTo, knownCommittedVersion); + if (prev == 0) + actors.add( popFromLog( this, log, tag, 1.0 ) ); //< FIXME: knob + } } } } @@ -924,10 +908,10 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted> newEpoch( RecruitFromConfigurationReply const& recr, Future const& fRemoteWorkers, DatabaseConfiguration const& config, LogEpoch recoveryCount, int8_t primaryLocality, int8_t remoteLocality, std::vector const& allTags ) { + virtual Future> newEpoch( RecruitFromConfigurationReply const& recr, Future const& fRemoteWorkers, DatabaseConfiguration const& config, LogEpoch recoveryCount, int8_t primaryLocality, int8_t remoteLocality, std::vector const& allTags, Reference> const& recruitmentStalled ) { // Call only after end_epoch() has successfully completed. Returns a new epoch immediately following this one. The new epoch // is only provisional until the caller updates the coordinated DBCoreState - return newEpoch( Reference::addRef(this), recr, fRemoteWorkers, config, recoveryCount, primaryLocality, remoteLocality, allTags ); + return newEpoch( Reference::addRef(this), recr, fRemoteWorkers, config, recoveryCount, primaryLocality, remoteLocality, allTags, recruitmentStalled ); } virtual LogSystemConfig getLogSystemConfig() { @@ -948,7 +932,6 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedtLogPolicy; log.tLogLocalities = logSet->tLogLocalities; log.isLocal = logSet->isLocal; - log.hasBestPolicy = logSet->hasBestPolicy; log.locality = logSet->locality; log.startVersion = logSet->startVersion; @@ -975,7 +958,6 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedtLogPolicy; log.tLogLocalities = logSet->tLogLocalities; log.isLocal = logSet->isLocal; - log.hasBestPolicy = logSet->hasBestPolicy; log.locality = logSet->locality; log.startVersion = logSet->startVersion; @@ -1211,7 +1193,6 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedtLogPolicy = coreSet.tLogPolicy; logSet->tLogLocalities = coreSet.tLogLocalities; logSet->isLocal = coreSet.isLocal; - logSet->hasBestPolicy = coreSet.hasBestPolicy; logSet->locality = coreSet.locality; logSet->startVersion = coreSet.startVersion; logFailed.push_back(failed); @@ -1235,7 +1216,6 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedtLogPolicy = log.tLogPolicy; logSet->tLogLocalities = log.tLogLocalities; logSet->isLocal = log.isLocal; - logSet->hasBestPolicy = log.hasBestPolicy; logSet->locality = log.locality; logSet->startVersion = log.startVersion; } @@ -1327,7 +1307,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted recruitOldLogRouters( TagPartitionedLogSystem* self, vector workers, LogEpoch recoveryCount, int8_t locality, Version startVersion, - std::vector tLogLocalities, IRepPolicyRef tLogPolicy, int32_t hasBestPolicy, bool forRemote ) { + std::vector tLogLocalities, IRepPolicyRef tLogPolicy, bool forRemote ) { state vector>> logRouterInitializationReplies; state vector> allReplies; int nextRouter = 0; @@ -1377,7 +1357,6 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedTLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY ) ), master_recovery_failed() ); logRouterInitializationReplies.back().push_back( reply ); @@ -1427,7 +1406,6 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedTLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY ) ), master_recovery_failed() ); logRouterInitializationReplies.back().push_back( reply ); @@ -1504,7 +1482,6 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedtLogReplicationFactor = configuration.remoteTLogReplicationFactor; logSet->tLogPolicy = configuration.remoteTLogPolicy; logSet->isLocal = false; - logSet->hasBestPolicy = HasBestPolicyId; logSet->locality = remoteLocality; logSet->startVersion = oldLogSystem->knownCommittedVersion + 1; @@ -1532,7 +1509,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted oldRouterRecruitment = Void(); if(logSet->startVersion < oldLogSystem->knownCommittedVersion + 1) { - oldRouterRecruitment = TagPartitionedLogSystem::recruitOldLogRouters(self, remoteWorkers.logRouters, recoveryCount, remoteLocality, logSet->startVersion, localities, logSet->tLogPolicy, logSet->hasBestPolicy, true); + oldRouterRecruitment = TagPartitionedLogSystem::recruitOldLogRouters(self, remoteWorkers.logRouters, recoveryCount, remoteLocality, logSet->startVersion, localities, logSet->tLogPolicy, true); } state vector> logRouterInitializationReplies; @@ -1543,11 +1520,17 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedtLogs[0]->startVersion, logSet->startVersion); req.tLogLocalities = localities; req.tLogPolicy = logSet->tLogPolicy; - req.hasBestPolicy = logSet->hasBestPolicy; req.locality = remoteLocality; logRouterInitializationReplies.push_back( transformErrors( throwErrorOr( remoteWorkers.logRouters[i%remoteWorkers.logRouters.size()].logRouter.getReplyUnlessFailedFor( req, SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY ) ), master_recovery_failed() ) ); } + std::vector localTags; + for(auto& tag : allTags) { + if(remoteLocality == tagLocalitySpecial || remoteLocality == tag.locality || tag.locality < 0) { + localTags.push_back(tag); + } + } + state vector> remoteTLogInitializationReplies; vector< InitializeTLogRequest > remoteTLogReqs( remoteWorkers.remoteTLogs.size() ); for( int i = 0; i < remoteWorkers.remoteTLogs.size(); i++ ) { @@ -1561,7 +1544,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedstartVersion; req.logRouterTags = 0; } @@ -1595,7 +1578,8 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted> newEpoch( Reference oldLogSystem, RecruitFromConfigurationReply recr, Future fRemoteWorkers, DatabaseConfiguration configuration, LogEpoch recoveryCount, int8_t primaryLocality, int8_t remoteLocality, std::vector allTags ) { + ACTOR static Future> newEpoch( Reference oldLogSystem, RecruitFromConfigurationReply recr, Future fRemoteWorkers, DatabaseConfiguration configuration, LogEpoch recoveryCount, + int8_t primaryLocality, int8_t remoteLocality, std::vector allTags, Reference> recruitmentStalled ) { state double startTime = now(); state Reference logSystem( new TagPartitionedLogSystem(oldLogSystem->getDebugID(), oldLogSystem->locality) ); logSystem->logSystemType = 2; @@ -1609,7 +1593,6 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedtLogs[0]->tLogReplicationFactor = configuration.tLogReplicationFactor; logSystem->tLogs[0]->tLogPolicy = configuration.tLogPolicy; logSystem->tLogs[0]->isLocal = true; - logSystem->tLogs[0]->hasBestPolicy = HasBestPolicyId; logSystem->tLogs[0]->locality = primaryLocality; state RegionInfo region = configuration.getRegion(recr.dcId); @@ -1620,8 +1603,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedtLogs[1]->tLogReplicationFactor = region.satelliteTLogReplicationFactor; logSystem->tLogs[1]->tLogPolicy = region.satelliteTLogPolicy; logSystem->tLogs[1]->isLocal = true; - logSystem->tLogs[1]->hasBestPolicy = HasBestPolicyNone; - logSystem->tLogs[1]->locality = tagLocalityInvalid; + logSystem->tLogs[1]->locality = tagLocalitySatellite; logSystem->tLogs[1]->startVersion = oldLogSystem->knownCommittedVersion + 1; logSystem->expectedLogSets++; } @@ -1651,6 +1633,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedlockResults[lockNum].isCurrent && oldLogSystem->lockResults[lockNum].logSet->isLocal) { break; } + state Future stalledAfter = setAfter(recruitmentStalled, SERVER_KNOBS->MAX_RECOVERY_TIME, true); loop { auto versions = TagPartitionedLogSystem::getDurableVersion(logSystem->dbgid, oldLogSystem->lockResults[lockNum]); if(versions.present()) { @@ -1659,6 +1642,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedlockResults[lockNum]) ); } + stalledAfter.cancel(); break; } lockNum++; @@ -1673,14 +1657,30 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted oldRouterRecruitment = Never(); TraceEvent("NewEpochStartVersion", oldLogSystem->getDebugID()).detail("StartVersion", logSystem->tLogs[0]->startVersion).detail("EpochEnd", oldLogSystem->knownCommittedVersion + 1).detail("Locality", primaryLocality).detail("OldLogRouterTags", oldLogSystem->logRouterTags); if(oldLogSystem->logRouterTags > 0 || logSystem->tLogs[0]->startVersion < oldLogSystem->knownCommittedVersion + 1) { - oldRouterRecruitment = TagPartitionedLogSystem::recruitOldLogRouters(oldLogSystem.getPtr(), recr.oldLogRouters, recoveryCount, primaryLocality, logSystem->tLogs[0]->startVersion, localities, logSystem->tLogs[0]->tLogPolicy, logSystem->tLogs[0]->hasBestPolicy, false); + oldRouterRecruitment = TagPartitionedLogSystem::recruitOldLogRouters(oldLogSystem.getPtr(), recr.oldLogRouters, recoveryCount, primaryLocality, logSystem->tLogs[0]->startVersion, localities, logSystem->tLogs[0]->tLogPolicy, false); + if(oldLogSystem->knownCommittedVersion - logSystem->tLogs[0]->startVersion > SERVER_KNOBS->MAX_RECOVERY_VERSIONS) { + //make sure we can recover in the other DC. + for(auto& lockResult : oldLogSystem->lockResults) { + if(lockResult.logSet->locality == remoteLocality) { + if( TagPartitionedLogSystem::getDurableVersion(logSystem->dbgid, lockResult).present() ) { + recruitmentStalled->set(true); + } + } + } + } } else { oldLogSystem->logSystemConfigChanged.trigger(); } + std::vector localTags; + for(auto& tag : allTags) { + if(primaryLocality == tagLocalitySpecial || primaryLocality == tag.locality || tag.locality < 0) { + localTags.push_back(tag); + } + } + state vector> initializationReplies; vector< InitializeTLogRequest > reqs( recr.tLogs.size() ); - for( int i = 0; i < recr.tLogs.size(); i++ ) { InitializeTLogRequest &req = reqs[i]; req.recruitmentID = logSystem->recruitmentID; @@ -1692,7 +1692,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedtLogs[0]->startVersion; req.logRouterTags = logSystem->logRouterTags; } @@ -1706,7 +1706,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedtLogs[0]->bestLocationFor( tag ) ].recoverTags.push_back( tag ); } std::vector locations; - for( Tag tag : allTags ) { + for( Tag tag : localTags ) { locations.clear(); logSystem->tLogs[0]->getPushLocations( vector(1, tag), locations, 0 ); for(int loc : locations) @@ -1719,9 +1719,11 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted> recoveryComplete; if(region.satelliteTLogReplicationFactor > 0) { + std::vector satelliteTags; + satelliteTags.push_back(txsTag); + state vector> satelliteInitializationReplies; vector< InitializeTLogRequest > sreqs( recr.satelliteTLogs.size() ); - for( int i = 0; i < recr.satelliteTLogs.size(); i++ ) { InitializeTLogRequest &req = sreqs[i]; req.recruitmentID = logSystem->recruitmentID; @@ -1730,10 +1732,10 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedepochEndVersion.get(); req.knownCommittedVersion = oldLogSystem->knownCommittedVersion; req.epoch = recoveryCount; - req.locality = tagLocalityInvalid; + req.locality = tagLocalitySatellite; req.remoteTag = Tag(); req.isPrimary = true; - req.allTags = allTags; + req.allTags = satelliteTags; req.startVersion = oldLogSystem->knownCommittedVersion + 1; req.logRouterTags = logSystem->logRouterTags; } diff --git a/fdbserver/WorkerInterface.h b/fdbserver/WorkerInterface.h index 00609f1b67..7913bd52c2 100644 --- a/fdbserver/WorkerInterface.h +++ b/fdbserver/WorkerInterface.h @@ -98,13 +98,12 @@ struct InitializeLogRouterRequest { Version startVersion; std::vector tLogLocalities; IRepPolicyRef tLogPolicy; - int32_t hasBestPolicy; int8_t locality; ReplyPromise reply; template void serialize(Ar& ar) { - ar & recoveryCount & routerTag & startVersion & tLogLocalities & tLogPolicy & hasBestPolicy & locality & reply; + ar & recoveryCount & routerTag & startVersion & tLogLocalities & tLogPolicy & locality & reply; } }; diff --git a/fdbserver/masterserver.actor.cpp b/fdbserver/masterserver.actor.cpp index 7e7636ae82..b18b95b93a 100644 --- a/fdbserver/masterserver.actor.cpp +++ b/fdbserver/masterserver.actor.cpp @@ -219,6 +219,7 @@ struct MasterData : NonCopyable, ReferenceCounted { std::set resolverNeedingChanges; PromiseStream> addActor; + Reference> recruitmentStalled; MasterData( Reference> const& dbInfo, @@ -246,7 +247,8 @@ struct MasterData : NonCopyable, ReferenceCounted { txnStateStore(0), memoryLimit(2e9), addActor(addActor), - hasConfiguration(false) + hasConfiguration(false), + recruitmentStalled( Reference>( new AsyncVar() ) ) { } ~MasterData() { if(txnStateStore) txnStateStore->close(); } @@ -311,10 +313,10 @@ ACTOR Future newTLogServers( Reference self, RecruitFromConfig Future fRemoteWorkers = brokenPromiseToNever( self->clusterController.recruitRemoteFromConfiguration.getReply( RecruitRemoteFromConfigurationRequest( self->configuration, remoteDcId, recr.tLogs.size() ) ) ); - Reference newLogSystem = wait( oldLogSystem->newEpoch( recr, fRemoteWorkers, self->configuration, self->cstate.myDBState.recoveryCount + 1, self->dcId_locality[recr.dcId], self->dcId_locality[remoteDcId], self->allTags ) ); + Reference newLogSystem = wait( oldLogSystem->newEpoch( recr, fRemoteWorkers, self->configuration, self->cstate.myDBState.recoveryCount + 1, self->dcId_locality[recr.dcId], self->dcId_locality[remoteDcId], self->allTags, self->recruitmentStalled ) ); self->logSystem = newLogSystem; } else { - Reference newLogSystem = wait( oldLogSystem->newEpoch( recr, Never(), self->configuration, self->cstate.myDBState.recoveryCount + 1, tagLocalitySpecial, tagLocalitySpecial, self->allTags ) ); + Reference newLogSystem = wait( oldLogSystem->newEpoch( recr, Never(), self->configuration, self->cstate.myDBState.recoveryCount + 1, tagLocalitySpecial, tagLocalitySpecial, self->allTags, self->recruitmentStalled ) ); self->logSystem = newLogSystem; } return Void(); @@ -395,31 +397,31 @@ ACTOR Future updateLogsValue( Reference self, Database cx ) { try { Optional> value = wait( tr.get(logsKey) ); ASSERT(value.present()); - - std::vector> logConf; auto logs = decodeLogsValue(value.get()); - for(auto& log : self->logSystem->getLogSystemConfig().tLogs) { - for(auto& tl : log.tLogs) { - logConf.push_back(tl); - } + + std::set logIds; + for(auto& log : logs.first) { + logIds.insert(log.first); } - bool match = (logs.first.size() == logConf.size()); - if(match) { - for(int i = 0; i < logs.first.size(); i++) { - if(logs.first[i].first != logConf[i].id()) { - match = false; + bool found = false; + for(auto& logSet : self->logSystem->getLogSystemConfig().tLogs) { + for(auto& log : logSet.tLogs) { + if(logIds.count(log.id())) { + found = true; break; } } + if(found) { + break; + } } - if(!match) { + if(!found) { TEST(true); //old master attempted to change logsKey return Void(); } - //FIXME: include remote logs in the log key tr.set(logsKey, self->logSystem->getLogsValue()); Void _ = wait( tr.commit() ); return Void(); @@ -442,6 +444,7 @@ Future sendMasterRegistration( MasterData* self, LogSystemConfig const& lo masterReq.registrationCount = ++self->registrationCount; masterReq.priorCommittedLogServers = priorCommittedLogServers; masterReq.recoveryState = self->recoveryState; + masterReq.recoveryStalled = self->recruitmentStalled->get(); return brokenPromiseToNever( self->clusterController.registerMaster.getReply( masterReq ) ); } @@ -704,7 +707,7 @@ ACTOR Future sendInitialCommitToResolvers( Reference self ) { ACTOR Future triggerUpdates( Reference self, Reference oldLogSystem ) { loop { - Void _ = wait( oldLogSystem->onLogSystemConfigChange() || self->cstate.fullyRecovered.getFuture() ); + Void _ = wait( oldLogSystem->onLogSystemConfigChange() || self->cstate.fullyRecovered.getFuture() || self->recruitmentStalled->onChange() ); if(self->cstate.fullyRecovered.isSet()) return Void(); @@ -1192,6 +1195,7 @@ ACTOR Future masterCore( Reference self ) { tr.set(recoveryCommitRequest.arena, backupVersionKey, backupVersionValue); tr.set(recoveryCommitRequest.arena, coordinatorsKey, self->coordinators.ccf->getConnectionString().toString()); tr.set(recoveryCommitRequest.arena, logsKey, self->logSystem->getLogsValue()); + tr.set(recoveryCommitRequest.arena, primaryDatacenterKey, self->myInterface.locality.dcId().present() ? self->myInterface.locality.dcId().get() : StringRef()); applyMetadataMutations(self->dbgid, recoveryCommitRequest.arena, tr.mutations.slice(mmApplied, tr.mutations.size()), self->txnStateStore, NULL, NULL); mmApplied = tr.mutations.size(); diff --git a/fdbserver/workloads/StatusWorkload.actor.cpp b/fdbserver/workloads/StatusWorkload.actor.cpp index bc10b093e2..83e80471cf 100644 --- a/fdbserver/workloads/StatusWorkload.actor.cpp +++ b/fdbserver/workloads/StatusWorkload.actor.cpp @@ -57,7 +57,7 @@ struct StatusWorkload : TestWorkload { return Void(); } virtual Future start(Database const& cx) { - //if (clientId != 0) + if (clientId != 0) return Void(); Reference cluster = cx->cluster; if (!cluster) { diff --git a/flow/genericactors.actor.h b/flow/genericactors.actor.h index a0459339c6..9c8dbb87bf 100644 --- a/flow/genericactors.actor.h +++ b/flow/genericactors.actor.h @@ -739,6 +739,13 @@ Future delayedAsyncVar( Reference> in, Reference> } } +ACTOR template +Future setAfter( Reference> var, double time, T val ) { + Void _ = wait( delay( time ) ); + var->set( val ); + return Void(); +} + Future allTrue( const std::vector>& all ); Future anyTrue( std::vector>> const& input, Reference> const& output ); Future cancelOnly( std::vector> const& futures ); diff --git a/tests/fast/SidebandWithStatus.txt b/tests/fast/SidebandWithStatus.txt index 00c626f671..ae53e55311 100644 --- a/tests/fast/SidebandWithStatus.txt +++ b/tests/fast/SidebandWithStatus.txt @@ -5,7 +5,7 @@ testTitle=CloggedCausalConsistencyTest testName=Status testDuration=30.0 - schema={"cluster":{"layers":{"_valid":true,"_error":"some error description"},"processes":{"$map":{"version":"3.0.0","machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","locality":{"$map":"value"},"class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"input_bytes":{"hz":0,"counter":0,"roughness":0},"stored_bytes":12341234,"kvstore_used_bytes":12341234,"kvstore_available_bytes":12341234,"kvstore_free_bytes":12341234,"kvstore_total_bytes":12341234,"durable_bytes":{"hz":0,"counter":0,"roughness":0},"queue_disk_used_bytes":12341234,"queue_disk_available_bytes":12341234,"queue_disk_free_bytes":12341234,"queue_disk_total_bytes":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"data_version":12341234,"data_version_lag":12341234,"id":"eb84471d68c12d1d26f692a50000003f","finished_queries":{"hz":0,"counter":0,"roughness":0}}],"command_line":"-r simulation","memory":{"available_bytes":0,"limit_bytes":0,"used_bytes":0},"messages":[{"time":12345.12312,"type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"","description":"abc"}],"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","excluded":false,"address":"1.2.3.4:1234","disk":{"free_bytes":3451233456234,"reads":{"hz":0,"counter":0,"sectors":0},"busy":0,"writes":{"hz":0,"counter":0,"sectors":0},"total_bytes":123412341234},"uptime_seconds":1234.2345,"cpu":{"usage_cores":0},"network":{"current_connections":0,"connections_established":{"hz":0},"connections_closed":{"hz":0},"connection_errors":{"hz":0},"megabits_sent":{"hz":0},"megabits_received":{"hz":0}}}},"old_logs":[{"logs":[{"id":"7f8d623d0cb9966e","healthy":true,"address":"1.2.3.4:1234"}],"log_replication_factor":3,"log_write_anti_quorum":0,"log_fault_tolerance":2}],"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"qos":{"worst_queue_bytes_log_server":460,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]},"description":"The database is not being saturated by the workload."},"transactions_per_second_limit":0,"released_transactions_per_second":0,"limiting_queue_bytes_storage_server":0,"worst_queue_bytes_storage_server":0,"limiting_version_lag_storage_server":0,"worst_version_lag_storage_server":0},"incompatible_connections":[],"database_available":true,"database_locked":false,"generation":2,"latency_probe":{"read_seconds":7,"immediate_priority_transaction_start_seconds":0,"batch_priority_transaction_start_seconds":0,"transaction_start_seconds":0,"commit_seconds":0.02},"clients":{"count":1,"supported_versions":[{"client_version":"3.0.0","connected_clients":[{"address":"127.0.0.1:9898","log_group":"default"}],"count":1,"protocol_version":"fdb00a400050001","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d"}]},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"recovery_state":{"required_resolvers":1,"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","fully_recovered"]},"required_logs":3,"missing_logs":"7f8d623d0cb9966e","description":"Recovery complete."},"workload":{"operations":{"writes":{"hz":0,"counter":0,"roughness":0},"reads":{"hz":0,"counter":0,"roughness":0}},"bytes":{"written":{"hz":0,"counter":0,"roughness":0},"read":{"hz":0,"counter":0,"roughness":0}},"keys":{"read":{"hz":0,"counter":0,"roughness":0}},"transactions":{"started":{"hz":0,"counter":0,"roughness":0},"conflicted":{"hz":0,"counter":0,"roughness":0},"committed":{"hz":0,"counter":0,"roughness":0}}},"cluster_controller_timestamp":1415650089,"protocol_version":"fdb00a400050001","configuration":{"resolvers":1,"redundancy":{"factor":{"$enum":["single","double","triple","custom","two_datacenter","three_datacenter","three_data_hall","fast_recovery_double","fast_recovery_triple"]}},"storage_policy":"(zoneid^3x1)","tlog_policy":"(zoneid^2x1)","logs":2,"storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"coordinators_count":1,"excluded_servers":[{"address":"10.0.4.1"}],"proxies":5},"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"min_replicas_remaining":0,"name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"description":""},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"partitions_count":2,"moving_data":{"total_written_bytes":0,"in_flight_bytes":0,"in_queue_bytes":0},"least_operating_space_bytes_storage_server":0,"max_machine_failures_without_losing_data":0},"machines":{"$map":{"network":{"megabits_sent":{"hz":0},"megabits_received":{"hz":0},"tcp_segments_retransmitted":{"hz":0}},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","locality":{"$map":"value"},"cpu":{"logical_core_utilization":0.4}}}},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"database_status":{"available":true,"healthy":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true}}} + schema={"cluster":{"layers":{"_valid":true,"_error":"some error description"},"datacenter_version_difference":0,"processes":{"$map":{"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"data_version_lag":12341234,"input_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"kvstore_used_bytes":12341234,"stored_bytes":12341234,"kvstore_free_bytes":12341234,"durable_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"id":"eb84471d68c12d1d26f692a50000003f","data_version":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"queue_disk_available_bytes":12341234,"kvstore_available_bytes":12341234,"queue_disk_total_bytes":12341234,"queue_disk_used_bytes":12341234,"queue_disk_free_bytes":12341234,"kvstore_total_bytes":12341234,"finished_queries":{"hz":0.0,"counter":0,"roughness":0.0}}],"locality":{"$map":"value"},"messages":[{"description":"abc","type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"","time":12345.12312}],"address":"1.2.3.4:1234","command_line":"-r simulation","disk":{"free_bytes":3451233456234,"reads":{"hz":0.0,"counter":0,"sectors":0},"busy":0.0,"writes":{"hz":0.0,"counter":0,"sectors":0},"total_bytes":123412341234},"version":"3.0.0","excluded":false,"memory":{"available_bytes":0,"used_bytes":0,"limit_bytes":0},"machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","uptime_seconds":1234.2345,"cpu":{"usage_cores":0.0},"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"connections_closed":{"hz":0.0},"connection_errors":{"hz":0.0},"current_connections":0,"connections_established":{"hz":0.0}}}},"clients":{"count":1,"supported_versions":[{"count":1,"protocol_version":"fdb00a400050001","client_version":"3.0.0","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d","connected_clients":[{"log_group":"default","address":"127.0.0.1:9898"}]}]},"qos":{"limiting_version_lag_storage_server":0,"released_transactions_per_second":0,"transactions_per_second_limit":0,"limiting_queue_bytes_storage_server":0,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","description":"The database is not being saturated by the workload.","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]}},"worst_version_lag_storage_server":0,"worst_queue_bytes_log_server":460,"worst_queue_bytes_storage_server":0},"incompatible_connections":[],"full_replication":true,"database_locked":false,"generation":2,"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"description":"","name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"min_replicas_remaining":0},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"max_machine_failures_without_losing_data":0,"moving_data":{"in_queue_bytes":0,"total_written_bytes":0,"in_flight_bytes":0},"least_operating_space_bytes_storage_server":0,"partitions_count":2},"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"database_available":true,"recovery_state":{"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","fully_recovered"]},"missing_logs":"7f8d623d0cb9966e","required_resolvers":1,"required_logs":3,"description":"Recovery complete."},"workload":{"operations":{"writes":{"hz":0.0,"counter":0,"roughness":0.0},"reads":{"hz":0.0,"counter":0,"roughness":0.0}},"keys":{"read":{"hz":0.0,"counter":0,"roughness":0.0}},"bytes":{"read":{"hz":0.0,"counter":0,"roughness":0.0},"written":{"hz":0.0,"counter":0,"roughness":0.0}},"transactions":{"started":{"hz":0.0,"counter":0,"roughness":0.0},"conflicted":{"hz":0.0,"counter":0,"roughness":0.0},"committed":{"hz":0.0,"counter":0,"roughness":0.0}}},"cluster_controller_timestamp":1415650089,"protocol_version":"fdb00a400050001","configuration":{"resolvers":1,"regions":[{"satellite_redundancy_mode":"one_satellite_single","satellite_anti_quorum":0,"satellite_usable_dcs":1,"datacenters":[{"priority":1,"satellite":1,"id":"mr"}],"satellite_log_policy":"(zoneid^3x1)","satellite_log_replicas":1,"satellite_logs":2}],"remote_logs":5,"auto_logs":3,"logs":2,"log_anti_quorum":0,"storage_replicas":1,"log_replicas":2,"remote_redundancy_mode":"remote_single","storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"coordinators_count":1,"log_replication_policy":"(zoneid^3x1)","storage_replication_policy":"(zoneid^3x1)","remote_log_replicas":3,"excluded_servers":[{"address":"10.0.4.1"}],"auto_proxies":3,"proxies":5,"auto_resolvers":1,"redundancy_mode":"single","storage_quorum":1},"latency_probe":{"immediate_priority_transaction_start_seconds":0.0,"transaction_start_seconds":0.0,"batch_priority_transaction_start_seconds":0.0,"read_seconds":7,"commit_seconds":0.02},"machines":{"$map":{"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"tcp_segments_retransmitted":{"hz":0.0}},"locality":{"$map":"value"},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","cpu":{"logical_core_utilization":0.4}}},"old_logs":[{"satellite_log_fault_tolerance":2,"logs":[{"healthy":true,"id":"7f8d623d0cb9966e","address":"1.2.3.4:1234"}],"satellite_log_write_anti_quorum":0,"remote_log_fault_tolerance":2,"log_fault_tolerance":2,"log_write_anti_quorum":0,"satellite_log_replication_factor":3,"remote_log_replication_factor":3,"log_replication_factor":3}]},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"database_status":{"available":true,"healthy":true}}} testName=RandomClogging testDuration=30.0 diff --git a/tests/rare/LargeApiCorrectnessStatus.txt b/tests/rare/LargeApiCorrectnessStatus.txt index dabaab09b1..43077bea6a 100644 --- a/tests/rare/LargeApiCorrectnessStatus.txt +++ b/tests/rare/LargeApiCorrectnessStatus.txt @@ -24,4 +24,4 @@ testTitle=ApiCorrectnessTest testName=Status testDuration=30.0 - schema={"cluster":{"layers":{"_valid":true,"_error":"some error description"},"processes":{"$map":{"version":"3.0.0","machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","locality":{"$map":"value"},"class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"input_bytes":{"hz":0,"counter":0,"roughness":0},"stored_bytes":12341234,"kvstore_used_bytes":12341234,"kvstore_available_bytes":12341234,"kvstore_free_bytes":12341234,"kvstore_total_bytes":12341234,"durable_bytes":{"hz":0,"counter":0,"roughness":0},"queue_disk_used_bytes":12341234,"queue_disk_available_bytes":12341234,"queue_disk_free_bytes":12341234,"queue_disk_total_bytes":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"data_version":12341234,"data_version_lag":12341234,"id":"eb84471d68c12d1d26f692a50000003f","finished_queries":{"hz":0,"counter":0,"roughness":0}}],"command_line":"-r simulation","memory":{"available_bytes":0,"limit_bytes":0,"used_bytes":0},"messages":[{"time":12345.12312,"type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"","description":"abc"}],"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","excluded":false,"address":"1.2.3.4:1234","disk":{"free_bytes":3451233456234,"reads":{"hz":0,"counter":0,"sectors":0},"busy":0,"writes":{"hz":0,"counter":0,"sectors":0},"total_bytes":123412341234},"uptime_seconds":1234.2345,"cpu":{"usage_cores":0},"network":{"current_connections":0,"connections_established":{"hz":0},"connections_closed":{"hz":0},"connection_errors":{"hz":0},"megabits_sent":{"hz":0},"megabits_received":{"hz":0}}}},"old_logs":[{"logs":[{"id":"7f8d623d0cb9966e","healthy":true,"address":"1.2.3.4:1234"}],"log_replication_factor":3,"log_write_anti_quorum":0,"log_fault_tolerance":2}],"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"qos":{"worst_queue_bytes_log_server":460,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]},"description":"The database is not being saturated by the workload."},"transactions_per_second_limit":0,"released_transactions_per_second":0,"limiting_queue_bytes_storage_server":0,"worst_queue_bytes_storage_server":0,"limiting_version_lag_storage_server":0,"worst_version_lag_storage_server":0},"incompatible_connections":[],"database_available":true,"database_locked":false,"generation":2,"latency_probe":{"read_seconds":7,"immediate_priority_transaction_start_seconds":0,"batch_priority_transaction_start_seconds":0,"transaction_start_seconds":0,"commit_seconds":0.02},"clients":{"count":1,"supported_versions":[{"client_version":"3.0.0","connected_clients":[{"address":"127.0.0.1:9898","log_group":"default"}],"count":1,"protocol_version":"fdb00a400050001","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d"}]},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"recovery_state":{"required_resolvers":1,"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","fully_recovered"]},"required_logs":3,"missing_logs":"7f8d623d0cb9966e","description":"Recovery complete."},"workload":{"operations":{"writes":{"hz":0,"counter":0,"roughness":0},"reads":{"hz":0,"counter":0,"roughness":0}},"bytes":{"written":{"hz":0,"counter":0,"roughness":0},"read":{"hz":0,"counter":0,"roughness":0}},"keys":{"read":{"hz":0,"counter":0,"roughness":0}},"transactions":{"started":{"hz":0,"counter":0,"roughness":0},"conflicted":{"hz":0,"counter":0,"roughness":0},"committed":{"hz":0,"counter":0,"roughness":0}}},"cluster_controller_timestamp":1415650089,"protocol_version":"fdb00a400050001","configuration":{"resolvers":1,"redundancy":{"factor":{"$enum":["single","double","triple","custom","two_datacenter","three_datacenter","three_data_hall","fast_recovery_double","fast_recovery_triple"]}},"storage_policy":"(zoneid^3x1)","tlog_policy":"(zoneid^2x1)","logs":2,"storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"coordinators_count":1,"excluded_servers":[{"address":"10.0.4.1"}],"proxies":5},"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"min_replicas_remaining":0,"name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"description":""},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"partitions_count":2,"moving_data":{"total_written_bytes":0,"in_flight_bytes":0,"in_queue_bytes":0},"least_operating_space_bytes_storage_server":0,"max_machine_failures_without_losing_data":0},"machines":{"$map":{"network":{"megabits_sent":{"hz":0},"megabits_received":{"hz":0},"tcp_segments_retransmitted":{"hz":0}},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","locality":{"$map":"value"},"cpu":{"logical_core_utilization":0.4}}}},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"database_status":{"available":true,"healthy":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true}}} + schema={"cluster":{"layers":{"_valid":true,"_error":"some error description"},"datacenter_version_difference":0,"processes":{"$map":{"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"data_version_lag":12341234,"input_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"kvstore_used_bytes":12341234,"stored_bytes":12341234,"kvstore_free_bytes":12341234,"durable_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"id":"eb84471d68c12d1d26f692a50000003f","data_version":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"queue_disk_available_bytes":12341234,"kvstore_available_bytes":12341234,"queue_disk_total_bytes":12341234,"queue_disk_used_bytes":12341234,"queue_disk_free_bytes":12341234,"kvstore_total_bytes":12341234,"finished_queries":{"hz":0.0,"counter":0,"roughness":0.0}}],"locality":{"$map":"value"},"messages":[{"description":"abc","type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"","time":12345.12312}],"address":"1.2.3.4:1234","command_line":"-r simulation","disk":{"free_bytes":3451233456234,"reads":{"hz":0.0,"counter":0,"sectors":0},"busy":0.0,"writes":{"hz":0.0,"counter":0,"sectors":0},"total_bytes":123412341234},"version":"3.0.0","excluded":false,"memory":{"available_bytes":0,"used_bytes":0,"limit_bytes":0},"machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","uptime_seconds":1234.2345,"cpu":{"usage_cores":0.0},"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"connections_closed":{"hz":0.0},"connection_errors":{"hz":0.0},"current_connections":0,"connections_established":{"hz":0.0}}}},"clients":{"count":1,"supported_versions":[{"count":1,"protocol_version":"fdb00a400050001","client_version":"3.0.0","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d","connected_clients":[{"log_group":"default","address":"127.0.0.1:9898"}]}]},"qos":{"limiting_version_lag_storage_server":0,"released_transactions_per_second":0,"transactions_per_second_limit":0,"limiting_queue_bytes_storage_server":0,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","description":"The database is not being saturated by the workload.","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]}},"worst_version_lag_storage_server":0,"worst_queue_bytes_log_server":460,"worst_queue_bytes_storage_server":0},"incompatible_connections":[],"full_replication":true,"database_locked":false,"generation":2,"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"description":"","name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"min_replicas_remaining":0},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"max_machine_failures_without_losing_data":0,"moving_data":{"in_queue_bytes":0,"total_written_bytes":0,"in_flight_bytes":0},"least_operating_space_bytes_storage_server":0,"partitions_count":2},"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"database_available":true,"recovery_state":{"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","fully_recovered"]},"missing_logs":"7f8d623d0cb9966e","required_resolvers":1,"required_logs":3,"description":"Recovery complete."},"workload":{"operations":{"writes":{"hz":0.0,"counter":0,"roughness":0.0},"reads":{"hz":0.0,"counter":0,"roughness":0.0}},"keys":{"read":{"hz":0.0,"counter":0,"roughness":0.0}},"bytes":{"read":{"hz":0.0,"counter":0,"roughness":0.0},"written":{"hz":0.0,"counter":0,"roughness":0.0}},"transactions":{"started":{"hz":0.0,"counter":0,"roughness":0.0},"conflicted":{"hz":0.0,"counter":0,"roughness":0.0},"committed":{"hz":0.0,"counter":0,"roughness":0.0}}},"cluster_controller_timestamp":1415650089,"protocol_version":"fdb00a400050001","configuration":{"resolvers":1,"regions":[{"satellite_redundancy_mode":"one_satellite_single","satellite_anti_quorum":0,"satellite_usable_dcs":1,"datacenters":[{"priority":1,"satellite":1,"id":"mr"}],"satellite_log_policy":"(zoneid^3x1)","satellite_log_replicas":1,"satellite_logs":2}],"remote_logs":5,"auto_logs":3,"logs":2,"log_anti_quorum":0,"storage_replicas":1,"log_replicas":2,"remote_redundancy_mode":"remote_single","storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"coordinators_count":1,"log_replication_policy":"(zoneid^3x1)","storage_replication_policy":"(zoneid^3x1)","remote_log_replicas":3,"excluded_servers":[{"address":"10.0.4.1"}],"auto_proxies":3,"proxies":5,"auto_resolvers":1,"redundancy_mode":"single","storage_quorum":1},"latency_probe":{"immediate_priority_transaction_start_seconds":0.0,"transaction_start_seconds":0.0,"batch_priority_transaction_start_seconds":0.0,"read_seconds":7,"commit_seconds":0.02},"machines":{"$map":{"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"tcp_segments_retransmitted":{"hz":0.0}},"locality":{"$map":"value"},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","cpu":{"logical_core_utilization":0.4}}},"old_logs":[{"satellite_log_fault_tolerance":2,"logs":[{"healthy":true,"id":"7f8d623d0cb9966e","address":"1.2.3.4:1234"}],"satellite_log_write_anti_quorum":0,"remote_log_fault_tolerance":2,"log_fault_tolerance":2,"log_write_anti_quorum":0,"satellite_log_replication_factor":3,"remote_log_replication_factor":3,"log_replication_factor":3}]},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"database_status":{"available":true,"healthy":true}}} diff --git a/tests/slow/DDBalanceAndRemoveStatus.txt b/tests/slow/DDBalanceAndRemoveStatus.txt index fc27f71849..f4ce50c783 100644 --- a/tests/slow/DDBalanceAndRemoveStatus.txt +++ b/tests/slow/DDBalanceAndRemoveStatus.txt @@ -43,4 +43,4 @@ testTitle=DDBalance_test testName=Status testDuration=30.0 - schema={"cluster":{"layers":{"_valid":true,"_error":"some error description"},"processes":{"$map":{"version":"3.0.0","machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","locality":{"$map":"value"},"class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"input_bytes":{"hz":0,"counter":0,"roughness":0},"stored_bytes":12341234,"kvstore_used_bytes":12341234,"kvstore_available_bytes":12341234,"kvstore_free_bytes":12341234,"kvstore_total_bytes":12341234,"durable_bytes":{"hz":0,"counter":0,"roughness":0},"queue_disk_used_bytes":12341234,"queue_disk_available_bytes":12341234,"queue_disk_free_bytes":12341234,"queue_disk_total_bytes":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"data_version":12341234,"data_version_lag":12341234,"id":"eb84471d68c12d1d26f692a50000003f","finished_queries":{"hz":0,"counter":0,"roughness":0}}],"command_line":"-r simulation","memory":{"available_bytes":0,"limit_bytes":0,"used_bytes":0},"messages":[{"time":12345.12312,"type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"","description":"abc"}],"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","excluded":false,"address":"1.2.3.4:1234","disk":{"free_bytes":3451233456234,"reads":{"hz":0,"counter":0,"sectors":0},"busy":0,"writes":{"hz":0,"counter":0,"sectors":0},"total_bytes":123412341234},"uptime_seconds":1234.2345,"cpu":{"usage_cores":0},"network":{"current_connections":0,"connections_established":{"hz":0},"connections_closed":{"hz":0},"connection_errors":{"hz":0},"megabits_sent":{"hz":0},"megabits_received":{"hz":0}}}},"old_logs":[{"logs":[{"id":"7f8d623d0cb9966e","healthy":true,"address":"1.2.3.4:1234"}],"log_replication_factor":3,"log_write_anti_quorum":0,"log_fault_tolerance":2}],"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"qos":{"worst_queue_bytes_log_server":460,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]},"description":"The database is not being saturated by the workload."},"transactions_per_second_limit":0,"released_transactions_per_second":0,"limiting_queue_bytes_storage_server":0,"worst_queue_bytes_storage_server":0,"limiting_version_lag_storage_server":0,"worst_version_lag_storage_server":0},"incompatible_connections":[],"database_available":true,"database_locked":false,"generation":2,"latency_probe":{"read_seconds":7,"immediate_priority_transaction_start_seconds":0,"batch_priority_transaction_start_seconds":0,"transaction_start_seconds":0,"commit_seconds":0.02},"clients":{"count":1,"supported_versions":[{"client_version":"3.0.0","connected_clients":[{"address":"127.0.0.1:9898","log_group":"default"}],"count":1,"protocol_version":"fdb00a400050001","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d"}]},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"recovery_state":{"required_resolvers":1,"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","fully_recovered"]},"required_logs":3,"missing_logs":"7f8d623d0cb9966e","description":"Recovery complete."},"workload":{"operations":{"writes":{"hz":0,"counter":0,"roughness":0},"reads":{"hz":0,"counter":0,"roughness":0}},"bytes":{"written":{"hz":0,"counter":0,"roughness":0},"read":{"hz":0,"counter":0,"roughness":0}},"keys":{"read":{"hz":0,"counter":0,"roughness":0}},"transactions":{"started":{"hz":0,"counter":0,"roughness":0},"conflicted":{"hz":0,"counter":0,"roughness":0},"committed":{"hz":0,"counter":0,"roughness":0}}},"cluster_controller_timestamp":1415650089,"protocol_version":"fdb00a400050001","configuration":{"resolvers":1,"redundancy":{"factor":{"$enum":["single","double","triple","custom","two_datacenter","three_datacenter","three_data_hall","fast_recovery_double","fast_recovery_triple"]}},"storage_policy":"(zoneid^3x1)","tlog_policy":"(zoneid^2x1)","logs":2,"storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"coordinators_count":1,"excluded_servers":[{"address":"10.0.4.1"}],"proxies":5},"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"min_replicas_remaining":0,"name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"description":""},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"partitions_count":2,"moving_data":{"total_written_bytes":0,"in_flight_bytes":0,"in_queue_bytes":0},"least_operating_space_bytes_storage_server":0,"max_machine_failures_without_losing_data":0},"machines":{"$map":{"network":{"megabits_sent":{"hz":0},"megabits_received":{"hz":0},"tcp_segments_retransmitted":{"hz":0}},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","locality":{"$map":"value"},"cpu":{"logical_core_utilization":0.4}}}},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"database_status":{"available":true,"healthy":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true}}} + {"cluster":{"layers":{"_valid":true,"_error":"some error description"},"datacenter_version_difference":0,"processes":{"$map":{"fault_domain":"0ccb4e0fdbdb5583010f6b77d9d10ece","class_source":{"$enum":["command_line","configure_auto","set_class"]},"class_type":{"$enum":["unset","storage","transaction","resolution","proxy","master","test"]},"roles":[{"query_queue_max":0,"data_version_lag":12341234,"input_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"kvstore_used_bytes":12341234,"stored_bytes":12341234,"kvstore_free_bytes":12341234,"durable_bytes":{"hz":0.0,"counter":0,"roughness":0.0},"id":"eb84471d68c12d1d26f692a50000003f","data_version":12341234,"role":{"$enum":["master","proxy","log","storage","resolver","cluster_controller"]},"queue_disk_available_bytes":12341234,"kvstore_available_bytes":12341234,"queue_disk_total_bytes":12341234,"queue_disk_used_bytes":12341234,"queue_disk_free_bytes":12341234,"kvstore_total_bytes":12341234,"finished_queries":{"hz":0.0,"counter":0,"roughness":0.0}}],"locality":{"$map":"value"},"messages":[{"description":"abc","type":"x","name":{"$enum":["file_open_error","incorrect_cluster_file_contents","process_error","io_error","io_timeout","platform_error","storage_server_lagging","(other FDB error messages)"]},"raw_log_message":"","time":12345.12312}],"address":"1.2.3.4:1234","command_line":"-r simulation","disk":{"free_bytes":3451233456234,"reads":{"hz":0.0,"counter":0,"sectors":0},"busy":0.0,"writes":{"hz":0.0,"counter":0,"sectors":0},"total_bytes":123412341234},"version":"3.0.0","excluded":false,"memory":{"available_bytes":0,"used_bytes":0,"limit_bytes":0},"machine_id":"0ccb4e0feddb5583010f6b77d9d10ece","uptime_seconds":1234.2345,"cpu":{"usage_cores":0.0},"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"connections_closed":{"hz":0.0},"connection_errors":{"hz":0.0},"current_connections":0,"connections_established":{"hz":0.0}}}},"clients":{"count":1,"supported_versions":[{"count":1,"protocol_version":"fdb00a400050001","client_version":"3.0.0","source_version":"9430e1127b4991cbc5ab2b17f41cfffa5de07e9d","connected_clients":[{"log_group":"default","address":"127.0.0.1:9898"}]}]},"qos":{"limiting_version_lag_storage_server":0,"released_transactions_per_second":0,"transactions_per_second_limit":0,"limiting_queue_bytes_storage_server":0,"performance_limited_by":{"reason_server_id":"7f8d623d0cb9966e","description":"The database is not being saturated by the workload.","reason_id":0,"name":{"$enum":["workload","storage_server_write_queue_size","storage_server_write_bandwidth_mvcc","storage_server_readable_behind","log_server_mvcc_write_bandwidth","log_server_write_queue","storage_server_min_free_space","storage_server_min_free_space_ratio","log_server_min_free_space","log_server_min_free_space_ratio"]}},"worst_version_lag_storage_server":0,"worst_queue_bytes_log_server":460,"worst_queue_bytes_storage_server":0},"incompatible_connections":[],"full_replication":true,"database_locked":false,"generation":2,"data":{"least_operating_space_bytes_log_server":0,"average_partition_size_bytes":0,"state":{"healthy":true,"description":"","name":{"$enum":["initializing","missing_data","healing","healthy_repartitioning","healthy_removing_server","healthy_rebalancing","healthy"]},"min_replicas_remaining":0},"least_operating_space_ratio_storage_server":0.1,"max_machine_failures_without_losing_availability":0,"total_disk_used_bytes":0,"total_kv_size_bytes":0,"max_machine_failures_without_losing_data":0,"moving_data":{"in_queue_bytes":0,"total_written_bytes":0,"in_flight_bytes":0},"least_operating_space_bytes_storage_server":0,"partitions_count":2},"fault_tolerance":{"max_machine_failures_without_losing_availability":0,"max_machine_failures_without_losing_data":0},"messages":[{"reasons":[{"description":"Blah."}],"unreachable_processes":[{"address":"1.2.3.4:1234"}],"name":{"$enum":["unreachable_master_worker","unreadable_configuration","full_replication_timeout","client_issues","unreachable_processes","immediate_priority_transaction_start_probe_timeout","batch_priority_transaction_start_probe_timeout","transaction_start_probe_timeout","read_probe_timeout","commit_probe_timeout","storage_servers_error","status_incomplete","layer_status_incomplete","database_availability_timeout"]},"issues":[{"name":{"$enum":["incorrect_cluster_file_contents"]},"description":"Cluster file contents do not match current cluster connection string. Verify cluster file is writable and has not been overwritten externally."}],"description":"abc"}],"database_available":true,"recovery_state":{"required_proxies":1,"name":{"$enum":["reading_coordinated_state","locking_coordinated_state","locking_old_transaction_servers","reading_transaction_system_state","configuration_missing","configuration_never_created","configuration_invalid","recruiting_transaction_servers","initializing_transaction_servers","recovery_transaction","writing_coordinated_state","fully_recovered"]},"missing_logs":"7f8d623d0cb9966e","required_resolvers":1,"required_logs":3,"description":"Recovery complete."},"workload":{"operations":{"writes":{"hz":0.0,"counter":0,"roughness":0.0},"reads":{"hz":0.0,"counter":0,"roughness":0.0}},"keys":{"read":{"hz":0.0,"counter":0,"roughness":0.0}},"bytes":{"read":{"hz":0.0,"counter":0,"roughness":0.0},"written":{"hz":0.0,"counter":0,"roughness":0.0}},"transactions":{"started":{"hz":0.0,"counter":0,"roughness":0.0},"conflicted":{"hz":0.0,"counter":0,"roughness":0.0},"committed":{"hz":0.0,"counter":0,"roughness":0.0}}},"cluster_controller_timestamp":1415650089,"protocol_version":"fdb00a400050001","configuration":{"resolvers":1,"regions":[{"satellite_redundancy_mode":"one_satellite_single","satellite_anti_quorum":0,"satellite_usable_dcs":1,"datacenters":[{"priority":1,"satellite":1,"id":"mr"}],"satellite_log_policy":"(zoneid^3x1)","satellite_log_replicas":1,"satellite_logs":2}],"remote_logs":5,"auto_logs":3,"logs":2,"log_anti_quorum":0,"storage_replicas":1,"log_replicas":2,"remote_redundancy_mode":"remote_single","storage_engine":{"$enum":["ssd","ssd-1","ssd-2","memory","custom"]},"coordinators_count":1,"log_replication_policy":"(zoneid^3x1)","storage_replication_policy":"(zoneid^3x1)","remote_log_replicas":3,"excluded_servers":[{"address":"10.0.4.1"}],"auto_proxies":3,"proxies":5,"auto_resolvers":1,"redundancy_mode":"single","storage_quorum":1},"latency_probe":{"immediate_priority_transaction_start_seconds":0.0,"transaction_start_seconds":0.0,"batch_priority_transaction_start_seconds":0.0,"read_seconds":7,"commit_seconds":0.02},"machines":{"$map":{"network":{"megabits_sent":{"hz":0.0},"megabits_received":{"hz":0.0},"tcp_segments_retransmitted":{"hz":0.0}},"locality":{"$map":"value"},"memory":{"free_bytes":0,"committed_bytes":0,"total_bytes":0},"contributing_workers":4,"datacenter_id":"6344abf1813eb05b","excluded":false,"address":"1.2.3.4","machine_id":"6344abf1813eb05b","cpu":{"logical_core_utilization":0.4}}},"old_logs":[{"satellite_log_fault_tolerance":2,"logs":[{"healthy":true,"id":"7f8d623d0cb9966e","address":"1.2.3.4:1234"}],"satellite_log_write_anti_quorum":0,"remote_log_fault_tolerance":2,"log_fault_tolerance":2,"log_write_anti_quorum":0,"satellite_log_replication_factor":3,"remote_log_replication_factor":3,"log_replication_factor":3}]},"client":{"coordinators":{"coordinators":[{"reachable":true,"address":"127.0.0.1:4701"}],"quorum_reachable":true},"cluster_file":{"path":"/etc/foundationdb/fdb.cluster","up_to_date":true},"messages":[{"name":{"$enum":["inconsistent_cluster_file","unreachable_cluster_controller","no_cluster_controller","status_incomplete_client","status_incomplete_coordinators","status_incomplete_error","status_incomplete_timeout","status_incomplete_cluster","quorum_not_reachable"]},"description":"The cluster file is not up to date."}],"timestamp":1415650089,"database_status":{"available":true,"healthy":true}}}