Merge pull request #9480 from apple/sfc-gh-dadkins/tlog-queue-metrics
tlog: Measure time spent waiting for previous versions separately from actual commit time
This commit is contained in:
commit
7963e6ef69
|
@ -361,6 +361,20 @@ struct TLogData : NonCopyable {
|
|||
// Distribution of end-to-end server latency of tlog commit requests.
|
||||
Reference<Histogram> commitLatencyDist;
|
||||
|
||||
// Distribution of queue wait times, per request.
|
||||
// This is the time spent waiting for previous versions.
|
||||
//
|
||||
// Note: we only wait for previous versions to enter the
|
||||
// in-memory DiskQueue commit queue, not until the records are
|
||||
// flushed and durable.
|
||||
Reference<Histogram> queueWaitLatencyDist;
|
||||
|
||||
// Distribution of just the disk commit times, per request.
|
||||
//
|
||||
// Time starts as soon as this request is done waiting for previous versions,
|
||||
// and ends when the data is flushed and durable.
|
||||
Reference<Histogram> timeUntilDurableDist;
|
||||
|
||||
TLogData(UID dbgid,
|
||||
UID workerID,
|
||||
IKeyValueStore* persistentData,
|
||||
|
@ -376,7 +390,9 @@ struct TLogData : NonCopyable {
|
|||
peekMemoryLimiter(SERVER_KNOBS->TLOG_SPILL_REFERENCE_MAX_PEEK_MEMORY_BYTES),
|
||||
concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS), ignorePopDeadline(0), dataFolder(folder),
|
||||
degraded(degraded),
|
||||
commitLatencyDist(Histogram::getHistogram("tLog"_sr, "commit"_sr, Histogram::Unit::milliseconds)) {
|
||||
commitLatencyDist(Histogram::getHistogram("tLog"_sr, "commit"_sr, Histogram::Unit::milliseconds)),
|
||||
queueWaitLatencyDist(Histogram::getHistogram("tLog"_sr, "QueueWait"_sr, Histogram::Unit::milliseconds)),
|
||||
timeUntilDurableDist(Histogram::getHistogram("tLog"_sr, "TimeUntilDurable"_sr, Histogram::Unit::milliseconds)) {
|
||||
cx = openDBOnServer(dbInfo, TaskPriority::DefaultEndpoint, LockAware::True);
|
||||
}
|
||||
};
|
||||
|
@ -2308,6 +2324,10 @@ ACTOR Future<Void> tLogCommit(TLogData* self,
|
|||
|
||||
wait(logData->version.whenAtLeast(req.prevVersion));
|
||||
|
||||
// Time until now has been spent waiting in the queue to do actual work.
|
||||
state double queueWaitEndTime = g_network->timer();
|
||||
self->queueWaitLatencyDist->sampleSeconds(queueWaitEndTime - req.requestTime());
|
||||
|
||||
// Calling check_yield instead of yield to avoid a destruction ordering problem in simulation
|
||||
if (g_network->check_yield(g_network->getCurrentTask())) {
|
||||
wait(delay(0, g_network->getCurrentTask()));
|
||||
|
@ -2371,24 +2391,29 @@ ACTOR Future<Void> tLogCommit(TLogData* self,
|
|||
wait(
|
||||
timeoutWarning(logData->queueCommittedVersion.whenAtLeast(req.version) || stopped, 0.1, warningCollectorInput));
|
||||
|
||||
// This is the point at which the transaction is durable (unless it timed out, or the tlog stopped).
|
||||
const double durableTime = g_network->timer();
|
||||
|
||||
if (stopped.isReady()) {
|
||||
ASSERT(logData->stopped());
|
||||
req.reply.sendError(tlog_stopped());
|
||||
return Void();
|
||||
}
|
||||
|
||||
// Measure server-side RPC latency from the time a request was
|
||||
// received to time the response was sent.
|
||||
const double endTime = g_network->timer();
|
||||
|
||||
if (isNotDuplicate) {
|
||||
self->commitLatencyDist->sampleSeconds(endTime - req.requestTime());
|
||||
}
|
||||
|
||||
if (req.debugID.present())
|
||||
g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.After");
|
||||
|
||||
req.reply.send(logData->durableKnownCommittedVersion);
|
||||
|
||||
// Measure server-side RPC latency from the time a request was
|
||||
// received until time the response was sent.
|
||||
const double endTime = g_network->timer();
|
||||
|
||||
if (isNotDuplicate) {
|
||||
self->timeUntilDurableDist->sampleSeconds(durableTime - queueWaitEndTime);
|
||||
self->commitLatencyDist->sampleSeconds(endTime - req.requestTime());
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue