From 81c7bddaf8389b71824dd04c6369210f3042366c Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Sun, 6 May 2018 18:15:12 -0700 Subject: [PATCH] fix: must check for log router errors while waiting on satellite replies because the recruitmentID will not be updated if it threw an error --- fdbserver/TLogServer.actor.cpp | 2 +- fdbserver/TagPartitionedLogSystem.actor.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index 9a0986ae55..52e8298b27 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -1835,7 +1835,7 @@ ACTOR Future updateLogSystem(TLogData* self, Reference logData, L } else { logData->logSystem->get()->pop(logData->logRouterPoppedVersion, logData->remoteTag, logData->durableKnownCommittedVersion, logData->locality); } - TraceEvent("TLogUpdate", self->dbgid).detail("logId", logData->logId).detail("recoverFrom", recoverFrom.toString()).detail("dbInfo", self->dbInfo->get().logSystemConfig.toString()).detail("found", found).detail("logSystem", (bool) logSystem->get() ).detail("recoveryState", self->dbInfo->get().recoveryState); + TraceEvent("TLogUpdate", self->dbgid).detail("logId", logData->logId).detail("recruitmentID", logData->recruitmentID).detail("dbRecruitmentID", self->dbInfo->get().logSystemConfig.recruitmentID).detail("recoverFrom", recoverFrom.toString()).detail("dbInfo", self->dbInfo->get().logSystemConfig.toString()).detail("found", found).detail("logSystem", (bool) logSystem->get() ).detail("recoveryState", self->dbInfo->get().recoveryState); for(auto it : self->dbInfo->get().logSystemConfig.oldTLogs) { TraceEvent("TLogUpdateOld", self->dbgid).detail("logId", logData->logId).detail("dbInfo", it.toString()); } diff --git a/fdbserver/TagPartitionedLogSystem.actor.cpp b/fdbserver/TagPartitionedLogSystem.actor.cpp index 9f725be676..b46e6ee3a0 100644 --- a/fdbserver/TagPartitionedLogSystem.actor.cpp +++ b/fdbserver/TagPartitionedLogSystem.actor.cpp @@ -1747,7 +1747,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedTLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY ) ), master_recovery_failed() ) ); - Void _ = wait( waitForAll( satelliteInitializationReplies ) ); + Void _ = wait( waitForAll( satelliteInitializationReplies ) || oldRouterRecruitment ); for( int i = 0; i < satelliteInitializationReplies.size(); i++ ) { logSystem->tLogs[1]->logServers[i] = Reference>>( new AsyncVar>( OptionalInterface(satelliteInitializationReplies[i].get()) ) );