Log latency metrics for batch GRV requests

This commit is contained in:
Dan Lambright 2021-03-22 10:39:06 -04:00
parent 2f61bf3c42
commit 48a475366c
5 changed files with 55 additions and 6 deletions

View File

@ -134,6 +134,20 @@
"p99.9":0.0 "p99.9":0.0
} }
}, },
"grv_batch_latency_statistics":{
"default":{
"count":0,
"min":0.0,
"max":0.0,
"median":0.0,
"mean":0.0,
"p25":0.0,
"p90":0.0,
"p95":0.0,
"p99":0.0,
"p99.9":0.0
}
},
"read_latency_statistics":{ "read_latency_statistics":{
"count":0, "count":0,
"min":0.0, "min":0.0,

View File

@ -157,6 +157,20 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
"p99.9":0.0 "p99.9":0.0
} }
}, },
"grv_batch_latency_statistics":{
"default":{
"count":0,
"min":0.0,
"max":0.0,
"median":0.0,
"mean":0.0,
"p25":0.0,
"p90":0.0,
"p95":0.0,
"p99":0.0,
"p99.9":0.0
}
},
"read_latency_statistics":{ "read_latency_statistics":{
"count":0, "count":0,
"min":0.0, "min":0.0,

View File

@ -47,6 +47,7 @@ struct GrvProxyStats {
LatencyBands grvLatencyBands; LatencyBands grvLatencyBands;
LatencySample grvLatencySample; LatencySample grvLatencySample;
LatencySample grvBatchLatencySample;
Future<Void> logger; Future<Void> logger;
@ -101,6 +102,8 @@ struct GrvProxyStats {
id, id,
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
SERVER_KNOBS->LATENCY_SAMPLE_SIZE), SERVER_KNOBS->LATENCY_SAMPLE_SIZE),
grvBatchLatencySample("BatchLatencyMetrics", id, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
SERVER_KNOBS->LATENCY_SAMPLE_SIZE),
grvLatencyBands("GRVLatencyMetrics", id, SERVER_KNOBS->STORAGE_LOGGING_DELAY) { grvLatencyBands("GRVLatencyMetrics", id, SERVER_KNOBS->STORAGE_LOGGING_DELAY) {
// The rate at which the limit(budget) is allowed to grow. // The rate at which the limit(budget) is allowed to grow.
specialCounter(cc, "SystemAndDefaultTxnRateAllowed", [this]() { return this->transactionRateAllowed; }); specialCounter(cc, "SystemAndDefaultTxnRateAllowed", [this]() { return this->transactionRateAllowed; });
@ -528,6 +531,9 @@ ACTOR Future<Void> sendGrvReplies(Future<GetReadVersionReply> replyFuture,
if (request.priority >= TransactionPriority::DEFAULT) { if (request.priority >= TransactionPriority::DEFAULT) {
stats->grvLatencyBands.addMeasurement(duration); stats->grvLatencyBands.addMeasurement(duration);
} }
else {
stats->grvBatchLatencySample.addMeasurement(duration);
}
if (request.flags & GetReadVersionRequest::FLAG_USE_MIN_KNOWN_COMMITTED_VERSION) { if (request.flags & GetReadVersionRequest::FLAG_USE_MIN_KNOWN_COMMITTED_VERSION) {
// Only backup worker may infrequently use this flag. // Only backup worker may infrequently use this flag.

View File

@ -499,6 +499,11 @@ struct RolesInfo {
maxTLogVersion - version - SERVER_KNOBS->STORAGE_LOGGING_DELAY * SERVER_KNOBS->VERSIONS_PER_SECOND); maxTLogVersion - version - SERVER_KNOBS->STORAGE_LOGGING_DELAY * SERVER_KNOBS->VERSIONS_PER_SECOND);
} }
TraceEventFields const& batchLatencyMetrics = metrics.at("BatchLatencyMetrics");
if(batchLatencyMetrics.size()) {
obj["grv_batch_latency_statistics"] = addLatencyStatistics(batchLatencyMetrics);
}
TraceEventFields const& readLatencyMetrics = metrics.at("ReadLatencyMetrics"); TraceEventFields const& readLatencyMetrics = metrics.at("ReadLatencyMetrics");
if (readLatencyMetrics.size()) { if (readLatencyMetrics.size()) {
obj["read_latency_statistics"] = addLatencyStatistics(readLatencyMetrics); obj["read_latency_statistics"] = addLatencyStatistics(readLatencyMetrics);
@ -641,6 +646,10 @@ struct RolesInfo {
if (grvLatencyBands.size()) { if (grvLatencyBands.size()) {
obj["grv_latency_bands"] = addLatencyBandInfo(grvLatencyBands); obj["grv_latency_bands"] = addLatencyBandInfo(grvLatencyBands);
} }
TraceEventFields const& grvBatchMetrics = metrics.at("BatchLatencyMetrics");
if(grvBatchMetrics.size()) {
obj["grv_batch_latency_statistics"] = addLatencyStatistics(grvBatchMetrics);
}
} catch (Error& e) { } catch (Error& e) {
if (e.code() != error_code_attribute_not_found) { if (e.code() != error_code_attribute_not_found) {
throw e; throw e;
@ -1812,7 +1821,7 @@ ACTOR static Future<vector<std::pair<StorageServerInterface, EventMap>>> getStor
getServerMetrics(servers, getServerMetrics(servers,
address_workers, address_workers,
std::vector<std::string>{ std::vector<std::string>{
"StorageMetrics", "ReadLatencyMetrics", "ReadLatencyBands", "BusiestReadTag" })) && "StorageMetrics", "ReadLatencyMetrics", "ReadLatencyBands", "BusiestReadTag", "BatchLatencyMetrics" })) &&
store(busiestWriteTags, getServerBusiestWriteTags(servers, address_workers, rkWorker))); store(busiestWriteTags, getServerBusiestWriteTags(servers, address_workers, rkWorker)));
ASSERT(busiestWriteTags.size() == results.size()); ASSERT(busiestWriteTags.size() == results.size());
@ -1850,7 +1859,7 @@ ACTOR static Future<vector<std::pair<GrvProxyInterface, EventMap>>> getGrvProxie
vector<std::pair<GrvProxyInterface, EventMap>> results = vector<std::pair<GrvProxyInterface, EventMap>> results =
wait(getServerMetrics(db->get().client.grvProxies, wait(getServerMetrics(db->get().client.grvProxies,
address_workers, address_workers,
std::vector<std::string>{ "GRVLatencyMetrics", "GRVLatencyBands" })); std::vector<std::string>{ "GRVLatencyMetrics", "GRVLatencyBands", "BatchLatencyMetrics" }));
return results; return results;
} }

View File

@ -678,6 +678,7 @@ public:
LatencySample readLatencySample; LatencySample readLatencySample;
LatencyBands readLatencyBands; LatencyBands readLatencyBands;
LatencySample batchLatencySample;
Counters(StorageServer* self) Counters(StorageServer* self)
: cc("StorageServer", self->thisServerID.toString()), getKeyQueries("GetKeyQueries", cc), : cc("StorageServer", self->thisServerID.toString()), getKeyQueries("GetKeyQueries", cc),
@ -692,10 +693,15 @@ public:
updateBatches("UpdateBatches", cc), updateVersions("UpdateVersions", cc), loops("Loops", cc), updateBatches("UpdateBatches", cc), updateVersions("UpdateVersions", cc), loops("Loops", cc),
fetchWaitingMS("FetchWaitingMS", cc), fetchWaitingCount("FetchWaitingCount", cc), fetchWaitingMS("FetchWaitingMS", cc), fetchWaitingCount("FetchWaitingCount", cc),
fetchExecutingMS("FetchExecutingMS", cc), fetchExecutingCount("FetchExecutingCount", cc), fetchExecutingMS("FetchExecutingMS", cc), fetchExecutingCount("FetchExecutingCount", cc),
readsRejected("ReadsRejected", cc), readLatencySample("ReadLatencyMetrics", readsRejected("ReadsRejected", cc),
self->thisServerID, batchLatencySample("BatchLatencyMetrics",
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, self->thisServerID,
SERVER_KNOBS->LATENCY_SAMPLE_SIZE), SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
SERVER_KNOBS->LATENCY_SAMPLE_SIZE),
readLatencySample("ReadLatencyMetrics",
self->thisServerID,
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
SERVER_KNOBS->LATENCY_SAMPLE_SIZE),
readLatencyBands("ReadLatencyBands", self->thisServerID, SERVER_KNOBS->STORAGE_LOGGING_DELAY) { readLatencyBands("ReadLatencyBands", self->thisServerID, SERVER_KNOBS->STORAGE_LOGGING_DELAY) {
specialCounter(cc, "LastTLogVersion", [self]() { return self->lastTLogVersion; }); specialCounter(cc, "LastTLogVersion", [self]() { return self->lastTLogVersion; });
specialCounter(cc, "Version", [self]() { return self->version.get(); }); specialCounter(cc, "Version", [self]() { return self->version.get(); });