Created StorageStats struct to combine health metrics for storage servers
This commit is contained in:
parent
1bb08b6e14
commit
a20f5482bc
|
@ -649,15 +649,24 @@ struct ClusterControllerPriorityInfo {
|
|||
};
|
||||
|
||||
struct HealthMetrics {
|
||||
struct StorageStats {
|
||||
int64_t storageQueue;
|
||||
int64_t storageNDV;
|
||||
double diskUsage;
|
||||
double cpuUsage;
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, storageQueue, storageNDV, diskUsage, cpuUsage);
|
||||
}
|
||||
};
|
||||
|
||||
int64_t worstStorageQueue;
|
||||
int64_t worstStorageNDV;
|
||||
int64_t worstTLogQueue;
|
||||
double tpsLimit;
|
||||
std::map<UID, int64_t> storageQueue;
|
||||
std::map<UID, int64_t> storageNDV;
|
||||
std::map<UID, StorageStats> storageStats;
|
||||
std::map<UID, int64_t> tLogQueue;
|
||||
std::map<UID, double> cpuUsage;
|
||||
std::map<UID, double> diskUsage;
|
||||
|
||||
HealthMetrics()
|
||||
: worstStorageQueue(0)
|
||||
|
@ -674,23 +683,17 @@ struct HealthMetrics {
|
|||
tpsLimit = hm.tpsLimit;
|
||||
|
||||
if (!detailedOutput) {
|
||||
storageQueue = std::map<UID, int64_t>();
|
||||
storageNDV = std::map<UID, int64_t>();
|
||||
tLogQueue = std::map<UID, int64_t>();
|
||||
cpuUsage = std::map<UID, double>();
|
||||
diskUsage = std::map<UID, double>();
|
||||
storageStats.clear();
|
||||
tLogQueue.clear();
|
||||
} else if (detailedInput) {
|
||||
storageQueue = hm.storageQueue;
|
||||
storageNDV = hm.storageNDV;
|
||||
storageStats = hm.storageStats;
|
||||
tLogQueue = hm.tLogQueue;
|
||||
cpuUsage = hm.cpuUsage;
|
||||
diskUsage = hm.diskUsage;
|
||||
}
|
||||
}
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, worstStorageQueue, worstStorageNDV, worstTLogQueue, tpsLimit, storageQueue, storageNDV, tLogQueue, cpuUsage, diskUsage);
|
||||
serializer(ar, worstStorageQueue, worstStorageNDV, worstTLogQueue, tpsLimit, storageStats, tLogQueue);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -296,11 +296,11 @@ void updateRate( Ratekeeper* self ) {
|
|||
}
|
||||
|
||||
int64_t storageQueue = ss.lastReply.bytesInput - ss.smoothDurableBytes.smoothTotal();
|
||||
self->healthMetrics.storageQueue[ss.id] = storageQueue;
|
||||
self->healthMetrics.storageStats[ss.id].storageQueue = storageQueue;
|
||||
worstStorageQueueStorageServer = std::max(worstStorageQueueStorageServer, storageQueue);
|
||||
|
||||
int64_t storageNDV = ss.smoothDesiredOldestVersion.smoothTotal() - ss.smoothDurableVersion.smoothTotal();
|
||||
self->healthMetrics.storageNDV[ss.id] = storageNDV;
|
||||
self->healthMetrics.storageStats[ss.id].storageNDV = storageNDV;
|
||||
worstStorageNDVStorageServer = std::max(worstStorageNDVStorageServer, storageNDV);
|
||||
|
||||
int64_t b = storageQueue - targetBytes;
|
||||
|
@ -584,11 +584,10 @@ ACTOR Future<Void> rateKeeper(
|
|||
reply.detailed = req.detailed;
|
||||
if (req.detailed) {
|
||||
for (const auto &s : self.storageQueueInfo) {
|
||||
reply.healthMetrics.cpuUsage[s.key] = s.value.lastReply.cpuUsage;
|
||||
reply.healthMetrics.diskUsage[s.key] = s.value.lastReply.diskUsage;
|
||||
self.healthMetrics.storageStats[s.key].cpuUsage = s.value.lastReply.cpuUsage;
|
||||
self.healthMetrics.storageStats[s.key].diskUsage = s.value.lastReply.diskUsage;
|
||||
}
|
||||
reply.healthMetrics.storageQueue = self.healthMetrics.storageQueue;
|
||||
reply.healthMetrics.storageNDV = self.healthMetrics.storageNDV;
|
||||
reply.healthMetrics.storageStats = self.healthMetrics.storageStats;
|
||||
reply.healthMetrics.tLogQueue = self.healthMetrics.tLogQueue;
|
||||
}
|
||||
|
||||
|
|
|
@ -118,15 +118,19 @@ struct ThrottlingWorkload : KVWorkload {
|
|||
.detail("TpsLimit", healthMetrics.tpsLimit);
|
||||
|
||||
TraceEvent traceStorageQueue("StorageQueue");
|
||||
for (const auto& ss : healthMetrics.storageQueue) {
|
||||
self->detailedWorstStorageQueue = std::max(self->detailedWorstStorageQueue, ss.second);
|
||||
traceStorageQueue.detail(format("Storage/%s", ss.first.toString().c_str()), ss.second);
|
||||
}
|
||||
|
||||
TraceEvent traceStorageNDV("StorageNDV");
|
||||
for (const auto& ss : healthMetrics.storageNDV) {
|
||||
self->detailedWorstStorageNDV = std::max(self->detailedWorstStorageNDV, ss.second);
|
||||
traceStorageNDV.detail(format("Storage/%s", ss.first.toString().c_str()), ss.second);
|
||||
TraceEvent traceCpuUsage("CpuUsage");
|
||||
TraceEvent traceDiskUsage("DiskUsage");
|
||||
for (const auto& ss : healthMetrics.storageStats) {
|
||||
auto storageStats = ss.second;
|
||||
self->detailedWorstStorageQueue = std::max(self->detailedWorstStorageQueue, storageStats.storageQueue);
|
||||
traceStorageQueue.detail(format("Storage/%s", ss.first.toString().c_str()), storageStats.storageQueue);
|
||||
self->detailedWorstStorageNDV = std::max(self->detailedWorstStorageNDV, storageStats.storageNDV);
|
||||
traceStorageNDV.detail(format("Storage/%s", ss.first.toString().c_str()), storageStats.storageNDV);
|
||||
self->detailedWorstCpuUsage = std::max(self->detailedWorstCpuUsage, storageStats.cpuUsage);
|
||||
traceCpuUsage.detail(format("Storage/%s", ss.first.toString().c_str()), storageStats.cpuUsage);
|
||||
self->detailedWorstDiskUsage = std::max(self->detailedWorstDiskUsage, storageStats.diskUsage);
|
||||
traceDiskUsage.detail(format("Storage/%s", ss.first.toString().c_str()), storageStats.diskUsage);
|
||||
}
|
||||
|
||||
TraceEvent traceTLogQueue("TLogQueue");
|
||||
|
@ -134,18 +138,6 @@ struct ThrottlingWorkload : KVWorkload {
|
|||
self->detailedWorstTLogQueue = std::max(self->detailedWorstTLogQueue, ss.second);
|
||||
traceTLogQueue.detail(format("TLog/%s", ss.first.toString().c_str()), ss.second);
|
||||
}
|
||||
|
||||
TraceEvent traceCpuUsage("CpuUsage");
|
||||
for (const auto& ss : healthMetrics.cpuUsage) {
|
||||
self->detailedWorstCpuUsage = std::max(self->detailedWorstCpuUsage, ss.second);
|
||||
traceCpuUsage.detail(format("Storage/%s", ss.first.toString().c_str()), ss.second);
|
||||
}
|
||||
|
||||
TraceEvent traceDiskUsage("DiskUsage");
|
||||
for (const auto& ss : healthMetrics.diskUsage) {
|
||||
self->detailedWorstDiskUsage = std::max(self->detailedWorstDiskUsage, ss.second);
|
||||
traceDiskUsage.detail(format("Storage/%s", ss.first.toString().c_str()), ss.second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -180,9 +172,7 @@ struct ThrottlingWorkload : KVWorkload {
|
|||
if (!self->sendDetailedHealthMetrics) {
|
||||
// Clear detailed health metrics that are already populated
|
||||
wait(delay(2 * CLIENT_KNOBS->UPDATE_DETAILED_HEALTH_METRICS_INTERVAL));
|
||||
cx->healthMetrics.cpuUsage.clear();
|
||||
cx->healthMetrics.storageQueue.clear();
|
||||
cx->healthMetrics.storageNDV.clear();
|
||||
cx->healthMetrics.storageStats.clear();
|
||||
cx->healthMetrics.tLogQueue.clear();
|
||||
}
|
||||
return Void();
|
||||
|
|
|
@ -6,5 +6,5 @@ actorsPerClient=10
|
|||
readsPerTransaction=10
|
||||
writesPerTransaction=10
|
||||
throttlingMultiplier=0.5
|
||||
sendDetailedHealthMetrics=false
|
||||
sendDetailedHealthMetrics=true
|
||||
maxBurst=10000
|
||||
|
|
Loading…
Reference in New Issue