pass info through cc data to populate in status

This commit is contained in:
Jon Fu 2022-09-28 16:18:44 -07:00
parent 0fa462fca9
commit 6357ad1750
9 changed files with 112 additions and 38 deletions

View File

@ -24,6 +24,19 @@
FDB_DEFINE_BOOLEAN_PARAM(AddNewTenants);
FDB_DEFINE_BOOLEAN_PARAM(RemoveMissingTenants);
std::string clusterTypeToString(const ClusterType& clusterType) {
switch (clusterType) {
case ClusterType::STANDALONE:
return "standalone";
case ClusterType::METACLUSTER_MANAGEMENT:
return "metacluster_management";
case ClusterType::METACLUSTER_DATA:
return "metacluster_data";
default:
return "unknown";
}
}
std::string DataClusterEntry::clusterStateToString(DataClusterState clusterState) {
switch (clusterState) {
case DataClusterState::READY:

View File

@ -27,15 +27,15 @@
namespace MetaclusterAPI {
ACTOR std::pair<ClusterUsage, ClusterUsage> metaclusterCapacity(std::map<ClusterName, DataClusterMetadata> clusters) {
ClusterUsage totalCapacity;
ClusterUsage totalAllocated;
std::pair<ClusterUsage, ClusterUsage> metaclusterCapacity(std::map<ClusterName, DataClusterMetadata> clusters) {
ClusterUsage tenantGroupCapacity;
ClusterUsage tenantGroupsAllocated;
for (auto cluster : clusters) {
totalCapacity.numTenantGroups +=
tenantGroupCapacity.numTenantGroups +=
std::max(cluster.second.entry.capacity.numTenantGroups, cluster.second.entry.allocated.numTenantGroups);
totalAllocated.numTenantGroups += cluster.second.entry.allocated.numTenantGroups;
tenantGroupsAllocated.numTenantGroups += cluster.second.entry.allocated.numTenantGroups;
}
return { totalCapacity, totalAllocated };
return { tenantGroupCapacity, tenantGroupsAllocated };
}
ACTOR Future<Reference<IDatabase>> openDatabase(ClusterConnectionString connectionString) {

View File

@ -1463,19 +1463,6 @@ typedef Standalone<ClusterNameRef> ClusterName;
enum class ClusterType { STANDALONE, METACLUSTER_MANAGEMENT, METACLUSTER_DATA };
// std::string getClusterType(const ClusterType& clusterType) {
// switch (clusterType) {
// case ClusterType::STANDALONE:
// return "standalone";
// case ClusterType::METACLUSTER_MANAGEMENT:
// return "metacluster_management";
// case ClusterType::METACLUSTER_DATA:
// return "metacluster_data";
// default:
// return "unknown";
// }
// }
struct GRVCacheSpace {
Version cachedReadVersion;
double lastGrvTime;

View File

@ -53,6 +53,8 @@ struct Traceable<ClusterUsage> : std::true_type {
}
};
std::string clusterTypeToString(const ClusterType& clusterType);
// Represents the various states that a data cluster could be in.
//
// READY - the data cluster is active
@ -98,6 +100,15 @@ struct DataClusterEntry {
}
};
struct MetaclusterMetrics {
int numTenants = 0;
int numDataClusters = 0;
int tenantGroupCapacity = 0;
int tenantGroupsAllocated = 0;
MetaclusterMetrics() = default;
};
struct MetaclusterRegistrationEntry {
constexpr static FileIdentifier file_identifier = 13448589;

View File

@ -44,6 +44,7 @@
#include "fdbserver/ClusterRecovery.actor.h"
#include "fdbserver/DataDistributorInterface.h"
#include "fdbserver/DBCoreState.h"
#include "fdbclient/Metacluster.h"
#include "fdbclient/MetaclusterManagement.actor.h"
#include "fdbserver/MoveKeys.actor.h"
#include "fdbserver/LeaderElection.h"
@ -1499,7 +1500,9 @@ ACTOR Future<Void> statusServer(FutureStream<StatusRequest> requests,
coordinators,
incompatibleConnections,
self->datacenterVersionDifference,
configBroadcaster)));
configBroadcaster,
self->db.metaclusterRegistration,
self->db.metaclusterMetrics)));
if (result.isError() && result.getError().code() == error_code_actor_cancelled)
throw result.getError();
@ -2686,17 +2689,45 @@ ACTOR Future<Void> workerHealthMonitor(ClusterControllerData* self) {
ACTOR Future<Void> metaclusterMetricsUpdater(ClusterControllerData* self) {
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(self->cx);
loop {
try {
std::map<ClusterName, DataClusterMetadata> clusters =
wait(MetaclusterAPI::listClustersTransaction(tr, ""_sr, "\xff"_sr, CLIENT_KNOBS->MAX_DATA_CLUSTERS));
auto capacityNumbers = MetaclusterAPI::metaclusterCapacity(clusters);
TraceEvent("MetaclusterCapacity")
.detail("DataClusters", clusters.size())
.detail("TotalCapacity", capacityNumbers.first.numTenantGroups)
.detail("AllocatedCapacity", capacityNumbers.second.numTenantGroups);
} catch (Error& e) {
wait(tr->onError(e));
if (self->db.clusterType == ClusterType::METACLUSTER_MANAGEMENT) {
try {
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
state std::map<ClusterName, DataClusterMetadata> clusters =
// wait(MetaclusterAPI::listClustersTransaction(tr, ""_sr, "\xff"_sr,
// CLIENT_KNOBS->MAX_DATA_CLUSTERS));
wait(MetaclusterAPI::listClusters(
self->cx.getReference(), ""_sr, "\xff"_sr, CLIENT_KNOBS->MAX_DATA_CLUSTERS));
state int64_t tenantCount =
wait(MetaclusterAPI::ManagementClusterMetadata::tenantMetadata().tenantCount.getD(
tr, Snapshot::False, 0));
state std::pair<ClusterUsage, ClusterUsage> capacityNumbers =
MetaclusterAPI::metaclusterCapacity(clusters);
// TraceEvent("MetaclusterCapacityDebugTr")
// .detail("DataClusters", clusters.size())
// .detail("TenantGroupCapacity", capacityNumbers.first.numTenantGroups)
// .detail("TenantGroupsAllocated", capacityNumbers.second.numTenantGroups);
// state std::map<ClusterName, DataClusterMetadata> clusters2 =
// wait(MetaclusterAPI::listClusters(self->cx.getReference(), ""_sr, "\xff"_sr,
// CLIENT_KNOBS->MAX_DATA_CLUSTERS)); state std::pair<ClusterUsage, ClusterUsage> capacityNumbers2 =
// MetaclusterAPI::metaclusterCapacity(clusters2); TraceEvent("MetaclusterCapacityDebugDb")
// .detail("DataClusters", clusters2.size())
// .detail("TenantGroupCapacity", capacityNumbers2.first.numTenantGroups)
// .detail("TenantGroupsAllocated", capacityNumbers2.second.numTenantGroups);
MetaclusterMetrics metrics;
metrics.numTenants = tenantCount;
metrics.numDataClusters = clusters.size();
metrics.tenantGroupCapacity = capacityNumbers.first.numTenantGroups;
metrics.tenantGroupsAllocated = capacityNumbers.second.numTenantGroups;
self->db.metaclusterMetrics = metrics;
TraceEvent("MetaclusterCapacity")
.detail("DataClusters", self->db.metaclusterMetrics.numDataClusters)
.detail("TenantGroupCapacity", self->db.metaclusterMetrics.tenantGroupCapacity)
.detail("TenantGroupsAllocated", self->db.metaclusterMetrics.tenantGroupsAllocated);
} catch (Error& e) {
TraceEvent("MetaclusterUpdaterError").error(e);
wait(tr->onError(e));
continue;
}
}
// Background updater updates every minute
wait(delay(60.0));

View File

@ -1169,6 +1169,7 @@ ACTOR Future<Void> readTransactionSystemState(Reference<ClusterRecoveryData> sel
Optional<ClusterName> clusterName;
Optional<UID> clusterId;
if (metaclusterRegistration.present()) {
self->controllerData->db.metaclusterRegistration = metaclusterRegistration.get();
self->controllerData->db.metaclusterName = metaclusterRegistration.get().metaclusterName;
self->controllerData->db.clusterType = metaclusterRegistration.get().clusterType;
metaclusterName = metaclusterRegistration.get().metaclusterName;
@ -1178,15 +1179,17 @@ ACTOR Future<Void> readTransactionSystemState(Reference<ClusterRecoveryData> sel
clusterId = metaclusterRegistration.get().id;
}
} else {
self->controllerData->db.metaclusterRegistration = Optional<MetaclusterRegistrationEntry>();
self->controllerData->db.metaclusterName = Optional<ClusterName>();
self->controllerData->db.clusterType = ClusterType::STANDALONE;
}
TraceEvent("MetaclusterMetadata")
.detail("ClusterType", self->controllerData->db.clusterType)
.detail("MetaclusterName", metaclusterName.present() ? metaclusterName.get() : ClusterName())
.detail("MetaclusterId", metaclusterId.present() ? metaclusterId.get() : UID())
.detail("ClusterName", clusterName.present() ? clusterName.get() : ClusterName())
.detail("ClusterId", clusterId.present() ? clusterId.get() : UID());
.detail("ClusterType", clusterTypeToString(self->controllerData->db.clusterType))
.detail("MetaclusterName", metaclusterName)
.detail("MetaclusterId", metaclusterId)
.detail("DataClusterName", clusterName)
.detail("DataClusterId", clusterId);
uniquify(self->allTags);

View File

@ -19,6 +19,7 @@
*/
#include <cinttypes>
#include "fdbclient/Metacluster.h"
#include "fmt/format.h"
#include "fdbclient/BlobWorkerInterface.h"
#include "fdbclient/KeyBackedTypes.h"
@ -2919,7 +2920,9 @@ ACTOR Future<StatusReply> clusterGetStatus(
ServerCoordinators coordinators,
std::vector<NetworkAddress> incompatibleConnections,
Version datacenterVersionDifference,
ConfigBroadcaster const* configBroadcaster) {
ConfigBroadcaster const* configBroadcaster,
Optional<MetaclusterRegistrationEntry> metaclusterRegistration,
MetaclusterMetrics metaclusterMetrics) {
state double tStart = timer();
state JsonBuilderArray messages;
@ -3061,6 +3064,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
state JsonBuilderObject qos;
state JsonBuilderObject dataOverlay;
state JsonBuilderObject tenants;
state JsonBuilderObject metacluster;
state JsonBuilderObject storageWiggler;
state std::unordered_set<UID> wiggleServers;
@ -3243,6 +3247,25 @@ ACTOR Future<StatusReply> clusterGetStatus(
if (!qos.empty())
statusObj["qos"] = qos;
// Metacluster metadata
if (metaclusterRegistration.present()) {
metacluster["cluster_type"] = clusterTypeToString(metaclusterRegistration.get().clusterType);
metacluster["metacluster_name"] = metaclusterRegistration.get().metaclusterName;
metacluster["metacluster_id"] = metaclusterRegistration.get().metaclusterId.toString();
if (metaclusterRegistration.get().clusterType == ClusterType::METACLUSTER_DATA) {
metacluster["data_cluster_name"] = metaclusterRegistration.get().name;
metacluster["data_cluster_id"] = metaclusterRegistration.get().id.toString();
} else { // clusterType == ClusterType::METACLUSTER_MANAGEMENT
metacluster["num_data_clusters"] = metaclusterMetrics.numDataClusters;
tenants["num_tenants"] = metaclusterMetrics.numTenants;
tenants["tenant_group_capacity"] = metaclusterMetrics.tenantGroupCapacity;
tenants["tenant_groups_allocated"] = metaclusterMetrics.tenantGroupsAllocated;
}
} else {
metacluster["cluster_type"] = clusterTypeToString(ClusterType::STANDALONE);
}
statusObj["metacluster"] = metacluster;
if (!tenants.empty())
statusObj["tenants"] = tenants;

View File

@ -31,6 +31,7 @@
#define FDBSERVER_CLUSTERCONTROLLER_ACTOR_H
#include "fdbclient/DatabaseContext.h"
#include "fdbclient/Metacluster.h"
#include "fdbrpc/Replication.h"
#include "fdbrpc/ReplicationUtils.h"
#include "fdbserver/Knobs.h"
@ -142,6 +143,8 @@ public:
AsyncVar<bool> blobGranulesEnabled;
ClusterType clusterType = ClusterType::STANDALONE;
Optional<ClusterName> metaclusterName;
Optional<MetaclusterRegistrationEntry> metaclusterRegistration;
MetaclusterMetrics metaclusterMetrics;
DBInfo()
: clientInfo(new AsyncVar<ClientDBInfo>()), serverInfo(new AsyncVar<ServerDBInfo>()),

View File

@ -27,6 +27,7 @@
#include "fdbserver/WorkerInterface.actor.h"
#include "fdbserver/MasterInterface.h"
#include "fdbclient/ClusterInterface.h"
#include "fdbclient/Metacluster.h"
struct ProcessIssues {
NetworkAddress address;
@ -44,7 +45,9 @@ Future<StatusReply> clusterGetStatus(
ServerCoordinators const& coordinators,
std::vector<NetworkAddress> const& incompatibleConnections,
Version const& datacenterVersionDifference,
ConfigBroadcaster const* const& conifgBroadcaster);
ConfigBroadcaster const* const& conifgBroadcaster,
Optional<MetaclusterRegistrationEntry> const& metaclusterRegistration,
MetaclusterMetrics const& metaclusterMetrics);
struct WorkerEvents : std::map<NetworkAddress, TraceEventFields> {};
Future<Optional<std::pair<WorkerEvents, std::set<std::string>>>> latestEventOnWorkers(