From 5d6333fab9fef6ce355e951b059a84434ae0b8c3 Mon Sep 17 00:00:00 2001 From: Dan Lambright Date: Fri, 3 May 2024 17:04:51 -0400 Subject: [PATCH] Add logging to LogRouter's waitForVersion function (#11359) * Add logging to waitForVersion * Respond to review comments. --------- Co-authored-by: Dan Lambright --- fdbserver/LogRouter.actor.cpp | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/fdbserver/LogRouter.actor.cpp b/fdbserver/LogRouter.actor.cpp index 916e61aab8..85e9fcbc36 100644 --- a/fdbserver/LogRouter.actor.cpp +++ b/fdbserver/LogRouter.actor.cpp @@ -256,6 +256,7 @@ ACTOR Future waitForVersion(LogRouterData* self, Version ver) { // The only time the log router should allow a gap in versions larger than MAX_READ_TRANSACTION_LIFE_VERSIONS is // when processing epoch end. Since one set of log routers is created per generation of transaction logs, the gap // caused by epoch end will be within MAX_VERSIONS_IN_FLIGHT of the log routers start version. + state double startTime = now(); if (self->version.get() < self->startVersion) { // Log router needs to wait for remote tLogs to process data, whose version is less than self->startVersion, @@ -297,6 +298,26 @@ ACTOR Future waitForVersion(LogRouterData* self, Version ver) { return Void(); } +ACTOR Future waitForVersionAndLog(LogRouterData* self, Version ver) { + state Future f = waitForVersion(self, ver); + state double emitInterval = 60.0; + loop { + choose { + when(wait(f)) { + return Void(); + } + when(wait(delay(emitInterval))) { + TraceEvent("LogRouterWaitForVersionLongDelay", self->dbgid) + .detail("WaitForVersion", ver) + .detail("StartVersion", self->startVersion) + .detail("Version", self->version.get()) + .detail("MinPopped", self->minPopped.get()) + .detail("FoundEpochEnd", self->foundEpochEnd); + } + } + } +} + ACTOR Future> getPeekCursorData(LogRouterData* self, Reference r, Version startVersion) { @@ -379,7 +400,7 @@ ACTOR Future pullAsyncData(LogRouterData* self) { if (!foundMessage || r->version().version != ver) { ASSERT(r->version().version > lastVer); if (ver) { - wait(waitForVersion(self, ver)); + wait(waitForVersionAndLog(self, ver)); commitMessages(self, ver, messages); self->version.set(ver); @@ -394,7 +415,7 @@ ACTOR Future pullAsyncData(LogRouterData* self) { if (!foundMessage) { ver--; // ver is the next possible version we will get data for if (ver > self->version.get() && ver >= r->popped()) { - wait(waitForVersion(self, ver)); + wait(waitForVersionAndLog(self, ver)); self->version.set(ver); wait(yield(TaskPriority::TLogCommit));