diff --git a/fdbserver/BackupWorker.actor.cpp b/fdbserver/BackupWorker.actor.cpp index c2587eb905..177aa41e65 100644 --- a/fdbserver/BackupWorker.actor.cpp +++ b/fdbserver/BackupWorker.actor.cpp @@ -72,6 +72,7 @@ struct BackupData { const Optional endVersion; // old epoch's end version (inclusive), or empty for current epoch const LogEpoch recruitedEpoch; const LogEpoch backupEpoch; + LogEpoch oldestBackupEpoch = 0; Version minKnownCommittedVersion; Version savedVersion; AsyncVar> logSystem; @@ -169,13 +170,12 @@ struct BackupData { } void pop() { - const LogEpoch oldest = logSystem.get()->getOldestBackupEpoch(); - if (backupEpoch > oldest) { + if (backupEpoch > oldestBackupEpoch) { // Defer pop if old epoch hasn't finished popping yet. TraceEvent("BackupWorkerPopDeferred", myId) .suppressFor(1.0) .detail("BackupEpoch", backupEpoch) - .detail("OldestEpoch", oldest) + .detail("OldestEpoch", oldestBackupEpoch) .detail("Version", savedVersion); return; } @@ -552,6 +552,14 @@ ACTOR Future saveMutationsToFile(BackupData* self, Version popVersion, int MutationRef m; if (!message.isBackupMessage(&m)) continue; + if (debugMutation("addMutation", message.version.version, m)) { + TraceEvent("BackupWorkerDebug", self->myId) + .detail("Version", message.version.toString()) + .detail("Mutation", m.toString()) + .detail("KCV", self->minKnownCommittedVersion) + .detail("SavedVersion", self->savedVersion); + } + std::vector> adds; if (m.type != MutationRef::Type::ClearRange) { for (int index : keyRangeMap[m.param1]) { @@ -804,15 +812,14 @@ ACTOR Future backupWorker(BackupInterface interf, InitializeBackupRequest dbInfoChange = db->onChange(); Reference ls = ILogSystem::fromServerDBInfo(self.myId, db->get(), true); bool hasPseudoLocality = ls.isValid() && ls->hasPseudoLocality(tagLocalityBackup); - LogEpoch oldestBackupEpoch = 0; if (hasPseudoLocality) { self.logSystem.set(ls); self.pop(); - oldestBackupEpoch = ls->getOldestBackupEpoch(); + self.oldestBackupEpoch = std::max(self.oldestBackupEpoch, ls->getOldestBackupEpoch()); } TraceEvent("BackupWorkerLogSystem", self.myId) .detail("HasBackupLocality", hasPseudoLocality) - .detail("OldestBackupEpoch", oldestBackupEpoch) + .detail("OldestBackupEpoch", self.oldestBackupEpoch) .detail("Tag", self.tag.toString()); } when(wait(done)) { diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 28648fbd10..fc00bccbf0 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -2052,8 +2052,17 @@ ACTOR Future clusterRecruitRemoteFromConfiguration( ClusterControllerData* void clusterRegisterMaster( ClusterControllerData* self, RegisterMasterRequest const& req ) { req.reply.send( Void() ); - TraceEvent("MasterRegistrationReceived", self->id).detail("MasterId", req.id).detail("Master", req.mi.toString()).detail("Tlogs", describe(req.logSystemConfig.tLogs)).detail("Resolvers", req.resolvers.size()) - .detail("RecoveryState", (int)req.recoveryState).detail("RegistrationCount", req.registrationCount).detail("Proxies", req.proxies.size()).detail("RecoveryCount", req.recoveryCount).detail("Stalled", req.recoveryStalled); + TraceEvent("MasterRegistrationReceived", self->id) + .detail("MasterId", req.id) + .detail("Master", req.mi.toString()) + .detail("Tlogs", describe(req.logSystemConfig.tLogs)) + .detail("Resolvers", req.resolvers.size()) + .detail("RecoveryState", (int)req.recoveryState) + .detail("RegistrationCount", req.registrationCount) + .detail("Proxies", req.proxies.size()) + .detail("RecoveryCount", req.recoveryCount) + .detail("Stalled", req.recoveryStalled) + .detail("OldestBackupEpoch", req.logSystemConfig.oldestBackupEpoch); //make sure the request comes from an active database auto db = &self->db; diff --git a/fdbserver/TagPartitionedLogSystem.actor.cpp b/fdbserver/TagPartitionedLogSystem.actor.cpp index 146430cbee..9e94c2e5a1 100644 --- a/fdbserver/TagPartitionedLogSystem.actor.cpp +++ b/fdbserver/TagPartitionedLogSystem.actor.cpp @@ -202,7 +202,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted>> addActor = Optional>>()) : dbgid(dbgid), logSystemType(LogSystemType::empty), expectedLogSets(0), logRouterTags(0), txsTags(0), - repopulateRegionAntiQuorum(0), epoch(e), oldestBackupEpoch(e), recoveryCompleteWrittenToCoreState(false), + repopulateRegionAntiQuorum(0), epoch(e), oldestBackupEpoch(0), recoveryCompleteWrittenToCoreState(false), locality(locality), remoteLogsWrittenToCoreState(false), hasRemoteServers(false), stopped(false), addActor(addActor), popActors(false) {} @@ -309,6 +309,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedlogSystemType = lsConf.logSystemType; + logSystem->oldestBackupEpoch = lsConf.oldestBackupEpoch; return logSystem; } @@ -1394,6 +1395,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCountedbackupWorkers.push_back(worker); } + TraceEvent("SetOldestBackupEpoch", dbgid).detail("Epoch", oldestBackupEpoch); backupWorkerChanged.trigger(); }