From 6357ad1750201c874277918acde9d8b83e85b63f Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Wed, 28 Sep 2022 16:18:44 -0700 Subject: [PATCH] pass info through cc data to populate in status --- fdbclient/Metacluster.cpp | 13 +++++ fdbclient/MetaclusterManagement.actor.cpp | 12 ++-- fdbclient/include/fdbclient/FDBTypes.h | 13 ----- fdbclient/include/fdbclient/Metacluster.h | 11 ++++ fdbserver/ClusterController.actor.cpp | 55 +++++++++++++++---- fdbserver/ClusterRecovery.actor.cpp | 13 +++-- fdbserver/Status.actor.cpp | 25 ++++++++- .../fdbserver/ClusterController.actor.h | 3 + fdbserver/include/fdbserver/Status.h | 5 +- 9 files changed, 112 insertions(+), 38 deletions(-) diff --git a/fdbclient/Metacluster.cpp b/fdbclient/Metacluster.cpp index 6463033db8..993b70fa70 100644 --- a/fdbclient/Metacluster.cpp +++ b/fdbclient/Metacluster.cpp @@ -24,6 +24,19 @@ FDB_DEFINE_BOOLEAN_PARAM(AddNewTenants); FDB_DEFINE_BOOLEAN_PARAM(RemoveMissingTenants); +std::string clusterTypeToString(const ClusterType& clusterType) { + switch (clusterType) { + case ClusterType::STANDALONE: + return "standalone"; + case ClusterType::METACLUSTER_MANAGEMENT: + return "metacluster_management"; + case ClusterType::METACLUSTER_DATA: + return "metacluster_data"; + default: + return "unknown"; + } +} + std::string DataClusterEntry::clusterStateToString(DataClusterState clusterState) { switch (clusterState) { case DataClusterState::READY: diff --git a/fdbclient/MetaclusterManagement.actor.cpp b/fdbclient/MetaclusterManagement.actor.cpp index 3daa503e12..397021df57 100644 --- a/fdbclient/MetaclusterManagement.actor.cpp +++ b/fdbclient/MetaclusterManagement.actor.cpp @@ -27,15 +27,15 @@ namespace MetaclusterAPI { -ACTOR std::pair metaclusterCapacity(std::map clusters) { - ClusterUsage totalCapacity; - ClusterUsage totalAllocated; +std::pair metaclusterCapacity(std::map clusters) { + ClusterUsage tenantGroupCapacity; + ClusterUsage tenantGroupsAllocated; for (auto cluster : clusters) { - totalCapacity.numTenantGroups += + tenantGroupCapacity.numTenantGroups += std::max(cluster.second.entry.capacity.numTenantGroups, cluster.second.entry.allocated.numTenantGroups); - totalAllocated.numTenantGroups += cluster.second.entry.allocated.numTenantGroups; + tenantGroupsAllocated.numTenantGroups += cluster.second.entry.allocated.numTenantGroups; } - return { totalCapacity, totalAllocated }; + return { tenantGroupCapacity, tenantGroupsAllocated }; } ACTOR Future> openDatabase(ClusterConnectionString connectionString) { diff --git a/fdbclient/include/fdbclient/FDBTypes.h b/fdbclient/include/fdbclient/FDBTypes.h index c2cad11c88..89ddacad2d 100644 --- a/fdbclient/include/fdbclient/FDBTypes.h +++ b/fdbclient/include/fdbclient/FDBTypes.h @@ -1463,19 +1463,6 @@ typedef Standalone ClusterName; enum class ClusterType { STANDALONE, METACLUSTER_MANAGEMENT, METACLUSTER_DATA }; -// std::string getClusterType(const ClusterType& clusterType) { -// switch (clusterType) { -// case ClusterType::STANDALONE: -// return "standalone"; -// case ClusterType::METACLUSTER_MANAGEMENT: -// return "metacluster_management"; -// case ClusterType::METACLUSTER_DATA: -// return "metacluster_data"; -// default: -// return "unknown"; -// } -// } - struct GRVCacheSpace { Version cachedReadVersion; double lastGrvTime; diff --git a/fdbclient/include/fdbclient/Metacluster.h b/fdbclient/include/fdbclient/Metacluster.h index 7f07286ae4..39b876b7ae 100644 --- a/fdbclient/include/fdbclient/Metacluster.h +++ b/fdbclient/include/fdbclient/Metacluster.h @@ -53,6 +53,8 @@ struct Traceable : std::true_type { } }; +std::string clusterTypeToString(const ClusterType& clusterType); + // Represents the various states that a data cluster could be in. // // READY - the data cluster is active @@ -98,6 +100,15 @@ struct DataClusterEntry { } }; +struct MetaclusterMetrics { + int numTenants = 0; + int numDataClusters = 0; + int tenantGroupCapacity = 0; + int tenantGroupsAllocated = 0; + + MetaclusterMetrics() = default; +}; + struct MetaclusterRegistrationEntry { constexpr static FileIdentifier file_identifier = 13448589; diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 19a799352a..4296751899 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -44,6 +44,7 @@ #include "fdbserver/ClusterRecovery.actor.h" #include "fdbserver/DataDistributorInterface.h" #include "fdbserver/DBCoreState.h" +#include "fdbclient/Metacluster.h" #include "fdbclient/MetaclusterManagement.actor.h" #include "fdbserver/MoveKeys.actor.h" #include "fdbserver/LeaderElection.h" @@ -1499,7 +1500,9 @@ ACTOR Future statusServer(FutureStream requests, coordinators, incompatibleConnections, self->datacenterVersionDifference, - configBroadcaster))); + configBroadcaster, + self->db.metaclusterRegistration, + self->db.metaclusterMetrics))); if (result.isError() && result.getError().code() == error_code_actor_cancelled) throw result.getError(); @@ -2686,17 +2689,45 @@ ACTOR Future workerHealthMonitor(ClusterControllerData* self) { ACTOR Future metaclusterMetricsUpdater(ClusterControllerData* self) { state Reference tr = makeReference(self->cx); loop { - try { - std::map clusters = - wait(MetaclusterAPI::listClustersTransaction(tr, ""_sr, "\xff"_sr, CLIENT_KNOBS->MAX_DATA_CLUSTERS)); - - auto capacityNumbers = MetaclusterAPI::metaclusterCapacity(clusters); - TraceEvent("MetaclusterCapacity") - .detail("DataClusters", clusters.size()) - .detail("TotalCapacity", capacityNumbers.first.numTenantGroups) - .detail("AllocatedCapacity", capacityNumbers.second.numTenantGroups); - } catch (Error& e) { - wait(tr->onError(e)); + if (self->db.clusterType == ClusterType::METACLUSTER_MANAGEMENT) { + try { + tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS); + state std::map clusters = + // wait(MetaclusterAPI::listClustersTransaction(tr, ""_sr, "\xff"_sr, + // CLIENT_KNOBS->MAX_DATA_CLUSTERS)); + wait(MetaclusterAPI::listClusters( + self->cx.getReference(), ""_sr, "\xff"_sr, CLIENT_KNOBS->MAX_DATA_CLUSTERS)); + state int64_t tenantCount = + wait(MetaclusterAPI::ManagementClusterMetadata::tenantMetadata().tenantCount.getD( + tr, Snapshot::False, 0)); + state std::pair capacityNumbers = + MetaclusterAPI::metaclusterCapacity(clusters); + // TraceEvent("MetaclusterCapacityDebugTr") + // .detail("DataClusters", clusters.size()) + // .detail("TenantGroupCapacity", capacityNumbers.first.numTenantGroups) + // .detail("TenantGroupsAllocated", capacityNumbers.second.numTenantGroups); + // state std::map clusters2 = + // wait(MetaclusterAPI::listClusters(self->cx.getReference(), ""_sr, "\xff"_sr, + // CLIENT_KNOBS->MAX_DATA_CLUSTERS)); state std::pair capacityNumbers2 = + // MetaclusterAPI::metaclusterCapacity(clusters2); TraceEvent("MetaclusterCapacityDebugDb") + // .detail("DataClusters", clusters2.size()) + // .detail("TenantGroupCapacity", capacityNumbers2.first.numTenantGroups) + // .detail("TenantGroupsAllocated", capacityNumbers2.second.numTenantGroups); + MetaclusterMetrics metrics; + metrics.numTenants = tenantCount; + metrics.numDataClusters = clusters.size(); + metrics.tenantGroupCapacity = capacityNumbers.first.numTenantGroups; + metrics.tenantGroupsAllocated = capacityNumbers.second.numTenantGroups; + self->db.metaclusterMetrics = metrics; + TraceEvent("MetaclusterCapacity") + .detail("DataClusters", self->db.metaclusterMetrics.numDataClusters) + .detail("TenantGroupCapacity", self->db.metaclusterMetrics.tenantGroupCapacity) + .detail("TenantGroupsAllocated", self->db.metaclusterMetrics.tenantGroupsAllocated); + } catch (Error& e) { + TraceEvent("MetaclusterUpdaterError").error(e); + wait(tr->onError(e)); + continue; + } } // Background updater updates every minute wait(delay(60.0)); diff --git a/fdbserver/ClusterRecovery.actor.cpp b/fdbserver/ClusterRecovery.actor.cpp index b643b11332..612ad79416 100644 --- a/fdbserver/ClusterRecovery.actor.cpp +++ b/fdbserver/ClusterRecovery.actor.cpp @@ -1169,6 +1169,7 @@ ACTOR Future readTransactionSystemState(Reference sel Optional clusterName; Optional clusterId; if (metaclusterRegistration.present()) { + self->controllerData->db.metaclusterRegistration = metaclusterRegistration.get(); self->controllerData->db.metaclusterName = metaclusterRegistration.get().metaclusterName; self->controllerData->db.clusterType = metaclusterRegistration.get().clusterType; metaclusterName = metaclusterRegistration.get().metaclusterName; @@ -1178,15 +1179,17 @@ ACTOR Future readTransactionSystemState(Reference sel clusterId = metaclusterRegistration.get().id; } } else { + self->controllerData->db.metaclusterRegistration = Optional(); + self->controllerData->db.metaclusterName = Optional(); self->controllerData->db.clusterType = ClusterType::STANDALONE; } TraceEvent("MetaclusterMetadata") - .detail("ClusterType", self->controllerData->db.clusterType) - .detail("MetaclusterName", metaclusterName.present() ? metaclusterName.get() : ClusterName()) - .detail("MetaclusterId", metaclusterId.present() ? metaclusterId.get() : UID()) - .detail("ClusterName", clusterName.present() ? clusterName.get() : ClusterName()) - .detail("ClusterId", clusterId.present() ? clusterId.get() : UID()); + .detail("ClusterType", clusterTypeToString(self->controllerData->db.clusterType)) + .detail("MetaclusterName", metaclusterName) + .detail("MetaclusterId", metaclusterId) + .detail("DataClusterName", clusterName) + .detail("DataClusterId", clusterId); uniquify(self->allTags); diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index c449249dff..fef71c5357 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -19,6 +19,7 @@ */ #include +#include "fdbclient/Metacluster.h" #include "fmt/format.h" #include "fdbclient/BlobWorkerInterface.h" #include "fdbclient/KeyBackedTypes.h" @@ -2919,7 +2920,9 @@ ACTOR Future clusterGetStatus( ServerCoordinators coordinators, std::vector incompatibleConnections, Version datacenterVersionDifference, - ConfigBroadcaster const* configBroadcaster) { + ConfigBroadcaster const* configBroadcaster, + Optional metaclusterRegistration, + MetaclusterMetrics metaclusterMetrics) { state double tStart = timer(); state JsonBuilderArray messages; @@ -3061,6 +3064,7 @@ ACTOR Future clusterGetStatus( state JsonBuilderObject qos; state JsonBuilderObject dataOverlay; state JsonBuilderObject tenants; + state JsonBuilderObject metacluster; state JsonBuilderObject storageWiggler; state std::unordered_set wiggleServers; @@ -3243,6 +3247,25 @@ ACTOR Future clusterGetStatus( if (!qos.empty()) statusObj["qos"] = qos; + // Metacluster metadata + if (metaclusterRegistration.present()) { + metacluster["cluster_type"] = clusterTypeToString(metaclusterRegistration.get().clusterType); + metacluster["metacluster_name"] = metaclusterRegistration.get().metaclusterName; + metacluster["metacluster_id"] = metaclusterRegistration.get().metaclusterId.toString(); + if (metaclusterRegistration.get().clusterType == ClusterType::METACLUSTER_DATA) { + metacluster["data_cluster_name"] = metaclusterRegistration.get().name; + metacluster["data_cluster_id"] = metaclusterRegistration.get().id.toString(); + } else { // clusterType == ClusterType::METACLUSTER_MANAGEMENT + metacluster["num_data_clusters"] = metaclusterMetrics.numDataClusters; + tenants["num_tenants"] = metaclusterMetrics.numTenants; + tenants["tenant_group_capacity"] = metaclusterMetrics.tenantGroupCapacity; + tenants["tenant_groups_allocated"] = metaclusterMetrics.tenantGroupsAllocated; + } + } else { + metacluster["cluster_type"] = clusterTypeToString(ClusterType::STANDALONE); + } + statusObj["metacluster"] = metacluster; + if (!tenants.empty()) statusObj["tenants"] = tenants; diff --git a/fdbserver/include/fdbserver/ClusterController.actor.h b/fdbserver/include/fdbserver/ClusterController.actor.h index e67e9b0ce8..c43377c296 100644 --- a/fdbserver/include/fdbserver/ClusterController.actor.h +++ b/fdbserver/include/fdbserver/ClusterController.actor.h @@ -31,6 +31,7 @@ #define FDBSERVER_CLUSTERCONTROLLER_ACTOR_H #include "fdbclient/DatabaseContext.h" +#include "fdbclient/Metacluster.h" #include "fdbrpc/Replication.h" #include "fdbrpc/ReplicationUtils.h" #include "fdbserver/Knobs.h" @@ -142,6 +143,8 @@ public: AsyncVar blobGranulesEnabled; ClusterType clusterType = ClusterType::STANDALONE; Optional metaclusterName; + Optional metaclusterRegistration; + MetaclusterMetrics metaclusterMetrics; DBInfo() : clientInfo(new AsyncVar()), serverInfo(new AsyncVar()), diff --git a/fdbserver/include/fdbserver/Status.h b/fdbserver/include/fdbserver/Status.h index 63e78f49f0..ac8f5ebc40 100644 --- a/fdbserver/include/fdbserver/Status.h +++ b/fdbserver/include/fdbserver/Status.h @@ -27,6 +27,7 @@ #include "fdbserver/WorkerInterface.actor.h" #include "fdbserver/MasterInterface.h" #include "fdbclient/ClusterInterface.h" +#include "fdbclient/Metacluster.h" struct ProcessIssues { NetworkAddress address; @@ -44,7 +45,9 @@ Future clusterGetStatus( ServerCoordinators const& coordinators, std::vector const& incompatibleConnections, Version const& datacenterVersionDifference, - ConfigBroadcaster const* const& conifgBroadcaster); + ConfigBroadcaster const* const& conifgBroadcaster, + Optional const& metaclusterRegistration, + MetaclusterMetrics const& metaclusterMetrics); struct WorkerEvents : std::map {}; Future>>> latestEventOnWorkers(