Created StorageStats struct to combine health metrics for storage servers

This commit is contained in:
Trevor Clinkenbeard 2019-02-20 11:57:41 -08:00
parent 1bb08b6e14
commit a20f5482bc
4 changed files with 36 additions and 44 deletions

View File

@ -649,15 +649,24 @@ struct ClusterControllerPriorityInfo {
};
struct HealthMetrics {
struct StorageStats {
int64_t storageQueue;
int64_t storageNDV;
double diskUsage;
double cpuUsage;
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, storageQueue, storageNDV, diskUsage, cpuUsage);
}
};
int64_t worstStorageQueue;
int64_t worstStorageNDV;
int64_t worstTLogQueue;
double tpsLimit;
std::map<UID, int64_t> storageQueue;
std::map<UID, int64_t> storageNDV;
std::map<UID, StorageStats> storageStats;
std::map<UID, int64_t> tLogQueue;
std::map<UID, double> cpuUsage;
std::map<UID, double> diskUsage;
HealthMetrics()
: worstStorageQueue(0)
@ -674,23 +683,17 @@ struct HealthMetrics {
tpsLimit = hm.tpsLimit;
if (!detailedOutput) {
storageQueue = std::map<UID, int64_t>();
storageNDV = std::map<UID, int64_t>();
tLogQueue = std::map<UID, int64_t>();
cpuUsage = std::map<UID, double>();
diskUsage = std::map<UID, double>();
storageStats.clear();
tLogQueue.clear();
} else if (detailedInput) {
storageQueue = hm.storageQueue;
storageNDV = hm.storageNDV;
storageStats = hm.storageStats;
tLogQueue = hm.tLogQueue;
cpuUsage = hm.cpuUsage;
diskUsage = hm.diskUsage;
}
}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, worstStorageQueue, worstStorageNDV, worstTLogQueue, tpsLimit, storageQueue, storageNDV, tLogQueue, cpuUsage, diskUsage);
serializer(ar, worstStorageQueue, worstStorageNDV, worstTLogQueue, tpsLimit, storageStats, tLogQueue);
}
};

View File

@ -296,11 +296,11 @@ void updateRate( Ratekeeper* self ) {
}
int64_t storageQueue = ss.lastReply.bytesInput - ss.smoothDurableBytes.smoothTotal();
self->healthMetrics.storageQueue[ss.id] = storageQueue;
self->healthMetrics.storageStats[ss.id].storageQueue = storageQueue;
worstStorageQueueStorageServer = std::max(worstStorageQueueStorageServer, storageQueue);
int64_t storageNDV = ss.smoothDesiredOldestVersion.smoothTotal() - ss.smoothDurableVersion.smoothTotal();
self->healthMetrics.storageNDV[ss.id] = storageNDV;
self->healthMetrics.storageStats[ss.id].storageNDV = storageNDV;
worstStorageNDVStorageServer = std::max(worstStorageNDVStorageServer, storageNDV);
int64_t b = storageQueue - targetBytes;
@ -584,11 +584,10 @@ ACTOR Future<Void> rateKeeper(
reply.detailed = req.detailed;
if (req.detailed) {
for (const auto &s : self.storageQueueInfo) {
reply.healthMetrics.cpuUsage[s.key] = s.value.lastReply.cpuUsage;
reply.healthMetrics.diskUsage[s.key] = s.value.lastReply.diskUsage;
self.healthMetrics.storageStats[s.key].cpuUsage = s.value.lastReply.cpuUsage;
self.healthMetrics.storageStats[s.key].diskUsage = s.value.lastReply.diskUsage;
}
reply.healthMetrics.storageQueue = self.healthMetrics.storageQueue;
reply.healthMetrics.storageNDV = self.healthMetrics.storageNDV;
reply.healthMetrics.storageStats = self.healthMetrics.storageStats;
reply.healthMetrics.tLogQueue = self.healthMetrics.tLogQueue;
}

View File

@ -118,15 +118,19 @@ struct ThrottlingWorkload : KVWorkload {
.detail("TpsLimit", healthMetrics.tpsLimit);
TraceEvent traceStorageQueue("StorageQueue");
for (const auto& ss : healthMetrics.storageQueue) {
self->detailedWorstStorageQueue = std::max(self->detailedWorstStorageQueue, ss.second);
traceStorageQueue.detail(format("Storage/%s", ss.first.toString().c_str()), ss.second);
}
TraceEvent traceStorageNDV("StorageNDV");
for (const auto& ss : healthMetrics.storageNDV) {
self->detailedWorstStorageNDV = std::max(self->detailedWorstStorageNDV, ss.second);
traceStorageNDV.detail(format("Storage/%s", ss.first.toString().c_str()), ss.second);
TraceEvent traceCpuUsage("CpuUsage");
TraceEvent traceDiskUsage("DiskUsage");
for (const auto& ss : healthMetrics.storageStats) {
auto storageStats = ss.second;
self->detailedWorstStorageQueue = std::max(self->detailedWorstStorageQueue, storageStats.storageQueue);
traceStorageQueue.detail(format("Storage/%s", ss.first.toString().c_str()), storageStats.storageQueue);
self->detailedWorstStorageNDV = std::max(self->detailedWorstStorageNDV, storageStats.storageNDV);
traceStorageNDV.detail(format("Storage/%s", ss.first.toString().c_str()), storageStats.storageNDV);
self->detailedWorstCpuUsage = std::max(self->detailedWorstCpuUsage, storageStats.cpuUsage);
traceCpuUsage.detail(format("Storage/%s", ss.first.toString().c_str()), storageStats.cpuUsage);
self->detailedWorstDiskUsage = std::max(self->detailedWorstDiskUsage, storageStats.diskUsage);
traceDiskUsage.detail(format("Storage/%s", ss.first.toString().c_str()), storageStats.diskUsage);
}
TraceEvent traceTLogQueue("TLogQueue");
@ -134,18 +138,6 @@ struct ThrottlingWorkload : KVWorkload {
self->detailedWorstTLogQueue = std::max(self->detailedWorstTLogQueue, ss.second);
traceTLogQueue.detail(format("TLog/%s", ss.first.toString().c_str()), ss.second);
}
TraceEvent traceCpuUsage("CpuUsage");
for (const auto& ss : healthMetrics.cpuUsage) {
self->detailedWorstCpuUsage = std::max(self->detailedWorstCpuUsage, ss.second);
traceCpuUsage.detail(format("Storage/%s", ss.first.toString().c_str()), ss.second);
}
TraceEvent traceDiskUsage("DiskUsage");
for (const auto& ss : healthMetrics.diskUsage) {
self->detailedWorstDiskUsage = std::max(self->detailedWorstDiskUsage, ss.second);
traceDiskUsage.detail(format("Storage/%s", ss.first.toString().c_str()), ss.second);
}
}
}
@ -180,9 +172,7 @@ struct ThrottlingWorkload : KVWorkload {
if (!self->sendDetailedHealthMetrics) {
// Clear detailed health metrics that are already populated
wait(delay(2 * CLIENT_KNOBS->UPDATE_DETAILED_HEALTH_METRICS_INTERVAL));
cx->healthMetrics.cpuUsage.clear();
cx->healthMetrics.storageQueue.clear();
cx->healthMetrics.storageNDV.clear();
cx->healthMetrics.storageStats.clear();
cx->healthMetrics.tLogQueue.clear();
}
return Void();

View File

@ -6,5 +6,5 @@ actorsPerClient=10
readsPerTransaction=10
writesPerTransaction=10
throttlingMultiplier=0.5
sendDetailedHealthMetrics=false
sendDetailedHealthMetrics=true
maxBurst=10000