diff --git a/fdbserver/CMakeLists.txt b/fdbserver/CMakeLists.txt
index b2e5ff445f..1b5f88a002 100644
--- a/fdbserver/CMakeLists.txt
+++ b/fdbserver/CMakeLists.txt
@@ -117,6 +117,7 @@ set(FDBSERVER_SRCS
   StorageMetrics.h
   storageserver.actor.cpp
   TagPartitionedLogSystem.actor.cpp
+  TagPartitionedLogSystem.actor.h
   template_fdb.h
   tester.actor.cpp
   TesterInterface.actor.h
diff --git a/fdbserver/TagPartitionedLogSystem.actor.cpp b/fdbserver/TagPartitionedLogSystem.actor.cpp
index f8d75e1c72..897f73d351 100644
--- a/fdbserver/TagPartitionedLogSystem.actor.cpp
+++ b/fdbserver/TagPartitionedLogSystem.actor.cpp
@@ -18,18 +18,19 @@
  * limitations under the License.
  */
 
-#include "flow/ActorCollection.h"
-#include "fdbserver/LogSystem.h"
-#include "fdbserver/ServerDBInfo.h"
-#include "fdbserver/DBCoreState.h"
-#include "fdbserver/WaitFailure.h"
 #include "fdbclient/SystemData.h"
-#include "fdbrpc/simulator.h"
 #include "fdbrpc/Replication.h"
 #include "fdbrpc/ReplicationUtils.h"
+#include "fdbrpc/simulator.h"
+#include "fdbserver/DBCoreState.h"
 #include "fdbserver/Knobs.h"
-#include "fdbserver/RecoveryState.h"
 #include "fdbserver/LogProtocolMessage.h"
+#include "fdbserver/LogSystem.h"
+#include "fdbserver/RecoveryState.h"
+#include "fdbserver/ServerDBInfo.h"
+#include "fdbserver/WaitFailure.h"
+#include "flow/ActorCollection.h"
+
 #include "flow/actorcompiler.h" // This must be the last #include.
 
 ACTOR Future<Version> minVersionWhenReady(Future<Void> f, std::vector<Future<TLogCommitReply>> replies) {
@@ -206,345 +207,62 @@ struct TagPartitionedLogSystem final : ILogSystem, ReferenceCounted<TagPartition
 	    recoveryCompleteWrittenToCoreState(false), remoteLogsWrittenToCoreState(false), hasRemoteServers(false),
 	    locality(locality), addActor(addActor), popActors(false) {}
 
-	void stopRejoins() final { rejoins = Future<Void>(); }
+	void stopRejoins() final;
 
-	void addref() final { ReferenceCounted<TagPartitionedLogSystem>::addref(); }
+	void addref() final;
 
-	void delref() final { ReferenceCounted<TagPartitionedLogSystem>::delref(); }
+	void delref() final;
 
-	std::string describe() const final {
-		std::string result;
-		for (int i = 0; i < tLogs.size(); i++) {
-			result += format("%d: ", i);
-			for (int j = 0; j < tLogs[i]->logServers.size(); j++) {
-				result += tLogs[i]->logServers[j]->get().id().toString() +
-				          ((j == tLogs[i]->logServers.size() - 1) ? " " : ", ");
-			}
-		}
-		return result;
-	}
+	std::string describe() const final;
 
-	UID getDebugID() const final { return dbgid; }
+	UID getDebugID() const final;
 
-	void addPseudoLocality(int8_t locality) {
-		ASSERT(locality < 0);
-		pseudoLocalities.insert(locality);
-		for (uint16_t i = 0; i < logRouterTags; i++) {
-			pseudoLocalityPopVersion[Tag(locality, i)] = 0;
-		}
-	}
+	void addPseudoLocality(int8_t locality);
 
-	Tag getPseudoPopTag(Tag tag, ProcessClass::ClassType type) const final {
-		switch (type) {
-		case ProcessClass::LogRouterClass:
-			if (tag.locality == tagLocalityLogRouter) {
-				ASSERT(pseudoLocalities.count(tagLocalityLogRouterMapped) > 0);
-				tag.locality = tagLocalityLogRouterMapped;
-			}
-			break;
+	Tag getPseudoPopTag(Tag tag, ProcessClass::ClassType type) const final;
 
-		case ProcessClass::BackupClass:
-			if (tag.locality == tagLocalityLogRouter) {
-				ASSERT(pseudoLocalities.count(tagLocalityBackup) > 0);
-				tag.locality = tagLocalityBackup;
-			}
-			break;
-
-		default: // This should be an error at caller site.
-			break;
-		}
-		return tag;
-	}
-
-	bool hasPseudoLocality(int8_t locality) const final { return pseudoLocalities.count(locality) > 0; }
+	bool hasPseudoLocality(int8_t locality) const final;
 
 	// Return the min version of all pseudoLocalities, i.e., logRouter and backupTag
-	Version popPseudoLocalityTag(Tag tag, Version upTo) final {
-		ASSERT(isPseudoLocality(tag.locality) && hasPseudoLocality(tag.locality));
-
-		Version& localityVersion = pseudoLocalityPopVersion[tag];
-		localityVersion = std::max(localityVersion, upTo);
-		Version minVersion = localityVersion;
-		// Why do we need to use the minimum popped version among all tags? Reason: for example,
-		// 2 pseudo tags pop 100 or 150, respectively. It's only safe to pop min(100, 150),
-		// because [101,150) is needed by another pseudo tag.
-		for (const int8_t locality : pseudoLocalities) {
-			minVersion = std::min(minVersion, pseudoLocalityPopVersion[Tag(locality, tag.id)]);
-		}
-		// TraceEvent("TLogPopPseudoTag", dbgid).detail("Tag", tag.toString()).detail("Version", upTo).detail("PopVersion", minVersion);
-		return minVersion;
-	}
+	Version popPseudoLocalityTag(Tag tag, Version upTo) final;
 
 	static Future<Void> recoverAndEndEpoch(Reference<AsyncVar<Reference<ILogSystem>>> const& outLogSystem,
 	                                       UID const& dbgid,
 	                                       DBCoreState const& oldState,
 	                                       FutureStream<TLogRejoinRequest> const& rejoins,
 	                                       LocalityData const& locality,
-	                                       bool* forceRecovery) {
-		return epochEnd(outLogSystem, dbgid, oldState, rejoins, locality, forceRecovery);
-	}
+	                                       bool* forceRecovery);
 
 	static Reference<ILogSystem> fromLogSystemConfig(UID const& dbgid,
 	                                                 LocalityData const& locality,
 	                                                 LogSystemConfig const& lsConf,
 	                                                 bool excludeRemote,
 	                                                 bool useRecoveredAt,
-	                                                 Optional<PromiseStream<Future<Void>>> addActor) {
-		ASSERT(lsConf.logSystemType == LogSystemType::tagPartitioned ||
-		       (lsConf.logSystemType == LogSystemType::empty && !lsConf.tLogs.size()));
-		// ASSERT(lsConf.epoch == epoch);  //< FIXME
-		auto logSystem = makeReference<TagPartitionedLogSystem>(dbgid, locality, lsConf.epoch, addActor);
-
-		logSystem->tLogs.reserve(lsConf.tLogs.size());
-		logSystem->expectedLogSets = lsConf.expectedLogSets;
-		logSystem->logRouterTags = lsConf.logRouterTags;
-		logSystem->txsTags = lsConf.txsTags;
-		logSystem->recruitmentID = lsConf.recruitmentID;
-		logSystem->stopped = lsConf.stopped;
-		if (useRecoveredAt) {
-			logSystem->recoveredAt = lsConf.recoveredAt;
-		}
-		logSystem->pseudoLocalities = lsConf.pseudoLocalities;
-		for (const TLogSet& tLogSet : lsConf.tLogs) {
-			if (!excludeRemote || tLogSet.isLocal) {
-				logSystem->tLogs.push_back(makeReference<LogSet>(tLogSet));
-			}
-		}
-
-		for (const auto& oldTlogConf : lsConf.oldTLogs) {
-			logSystem->oldLogData.emplace_back(oldTlogConf);
-			//TraceEvent("BWFromLSConf")
-			//    .detail("Epoch", logSystem->oldLogData.back().epoch)
-			//    .detail("Version", logSystem->oldLogData.back().epochEnd);
-		}
-
-		logSystem->logSystemType = lsConf.logSystemType;
-		logSystem->oldestBackupEpoch = lsConf.oldestBackupEpoch;
-		return logSystem;
-	}
+	                                                 Optional<PromiseStream<Future<Void>>> addActor);
 
 	static Reference<ILogSystem> fromOldLogSystemConfig(UID const& dbgid,
 	                                                    LocalityData const& locality,
-	                                                    LogSystemConfig const& lsConf) {
-		ASSERT(lsConf.logSystemType == LogSystemType::tagPartitioned ||
-		       (lsConf.logSystemType == LogSystemType::empty && !lsConf.tLogs.size()));
-		// ASSERT(lsConf.epoch == epoch);  //< FIXME
-		const LogEpoch e = lsConf.oldTLogs.size() > 0 ? lsConf.oldTLogs[0].epoch : 0;
-		auto logSystem = makeReference<TagPartitionedLogSystem>(dbgid, locality, e);
-
-		if (lsConf.oldTLogs.size()) {
-			for (const TLogSet& tLogSet : lsConf.oldTLogs[0].tLogs) {
-				logSystem->tLogs.push_back(makeReference<LogSet>(tLogSet));
-			}
-			logSystem->logRouterTags = lsConf.oldTLogs[0].logRouterTags;
-			logSystem->txsTags = lsConf.oldTLogs[0].txsTags;
-			// logSystem->epochEnd = lsConf.oldTLogs[0].epochEnd;
-
-			for (int i = 1; i < lsConf.oldTLogs.size(); i++) {
-				logSystem->oldLogData.emplace_back(lsConf.oldTLogs[i]);
-			}
-		}
-		logSystem->logSystemType = lsConf.logSystemType;
-		logSystem->stopped = true;
-		logSystem->pseudoLocalities = lsConf.pseudoLocalities;
-
-		return logSystem;
-	}
+	                                                    LogSystemConfig const& lsConf);
 
 	// Convert TagPartitionedLogSystem to DBCoreState and override input newState as return value
-	void toCoreState(DBCoreState& newState) final {
-		if (recoveryComplete.isValid() && recoveryComplete.isError())
-			throw recoveryComplete.getError();
+	void toCoreState(DBCoreState& newState) final;
 
-		if (remoteRecoveryComplete.isValid() && remoteRecoveryComplete.isError())
-			throw remoteRecoveryComplete.getError();
+	bool remoteStorageRecovered() final;
 
-		newState.tLogs.clear();
-		newState.logRouterTags = logRouterTags;
-		newState.txsTags = txsTags;
-		newState.pseudoLocalities = pseudoLocalities;
-		for (const auto& t : tLogs) {
-			if (t->logServers.size()) {
-				newState.tLogs.emplace_back(*t);
-				newState.tLogs.back().tLogLocalities.clear();
-				for (const auto& log : t->logServers) {
-					newState.tLogs.back().tLogLocalities.push_back(log->get().interf().filteredLocality);
-				}
-			}
-		}
+	Future<Void> onCoreStateChanged() final;
 
-		newState.oldTLogData.clear();
-		if (!recoveryComplete.isValid() || !recoveryComplete.isReady() ||
-		    (repopulateRegionAntiQuorum == 0 &&
-		     (!remoteRecoveryComplete.isValid() || !remoteRecoveryComplete.isReady())) ||
-		    epoch != oldestBackupEpoch) {
-			for (const auto& oldData : oldLogData) {
-				newState.oldTLogData.emplace_back(oldData);
-				TraceEvent("BWToCore")
-				    .detail("Epoch", newState.oldTLogData.back().epoch)
-				    .detail("TotalTags", newState.oldTLogData.back().logRouterTags)
-				    .detail("BeginVersion", newState.oldTLogData.back().epochBegin)
-				    .detail("EndVersion", newState.oldTLogData.back().epochEnd);
-			}
-		}
+	void coreStateWritten(DBCoreState const& newState) final;
 
-		newState.logSystemType = logSystemType;
-	}
+	Future<Void> onError() final;
 
-	bool remoteStorageRecovered() final { return remoteRecoveryComplete.isValid() && remoteRecoveryComplete.isReady(); }
+	ACTOR static Future<Void> onError_internal(TagPartitionedLogSystem* self);
 
-	Future<Void> onCoreStateChanged() final {
-		std::vector<Future<Void>> changes;
-		changes.push_back(Never());
-		if (recoveryComplete.isValid() && !recoveryComplete.isReady()) {
-			changes.push_back(recoveryComplete);
-		}
-		if (remoteRecovery.isValid() && !remoteRecovery.isReady()) {
-			changes.push_back(remoteRecovery);
-		}
-		if (remoteRecoveryComplete.isValid() && !remoteRecoveryComplete.isReady()) {
-			changes.push_back(remoteRecoveryComplete);
-		}
-		changes.push_back(backupWorkerChanged.onTrigger()); // changes to oldestBackupEpoch
-		return waitForAny(changes);
-	}
-
-	void coreStateWritten(DBCoreState const& newState) final {
-		if (!newState.oldTLogData.size()) {
-			recoveryCompleteWrittenToCoreState.set(true);
-		}
-		for (auto& t : newState.tLogs) {
-			if (!t.isLocal) {
-				TraceEvent("RemoteLogsWritten", dbgid).log();
-				remoteLogsWrittenToCoreState = true;
-				break;
-			}
-		}
-	}
-
-	Future<Void> onError() final { return onError_internal(this); }
-
-	ACTOR static Future<Void> onError_internal(TagPartitionedLogSystem* self) {
-		// Never returns normally, but throws an error if the subsystem stops working
-		loop {
-			std::vector<Future<Void>> failed;
-			std::vector<Future<Void>> backupFailed(1, Never());
-			std::vector<Future<Void>> changes;
-
-			for (auto& it : self->tLogs) {
-				for (auto& t : it->logServers) {
-					if (t->get().present()) {
-						failed.push_back(waitFailureClient(t->get().interf().waitFailure,
-						                                   SERVER_KNOBS->TLOG_TIMEOUT,
-						                                   -SERVER_KNOBS->TLOG_TIMEOUT /
-						                                       SERVER_KNOBS->SECONDS_BEFORE_NO_FAILURE_DELAY,
-						                                   /*trace=*/true));
-					} else {
-						changes.push_back(t->onChange());
-					}
-				}
-				for (auto& t : it->logRouters) {
-					if (t->get().present()) {
-						failed.push_back(waitFailureClient(t->get().interf().waitFailure,
-						                                   SERVER_KNOBS->TLOG_TIMEOUT,
-						                                   -SERVER_KNOBS->TLOG_TIMEOUT /
-						                                       SERVER_KNOBS->SECONDS_BEFORE_NO_FAILURE_DELAY,
-						                                   /*trace=*/true));
-					} else {
-						changes.push_back(t->onChange());
-					}
-				}
-				for (const auto& worker : it->backupWorkers) {
-					if (worker->get().present()) {
-						backupFailed.push_back(waitFailureClient(worker->get().interf().waitFailure,
-						                                         SERVER_KNOBS->BACKUP_TIMEOUT,
-						                                         -SERVER_KNOBS->BACKUP_TIMEOUT /
-						                                             SERVER_KNOBS->SECONDS_BEFORE_NO_FAILURE_DELAY,
-						                                         /*trace=*/true));
-					} else {
-						changes.push_back(worker->onChange());
-					}
-				}
-			}
-
-			if (!self->recoveryCompleteWrittenToCoreState.get()) {
-				for (auto& old : self->oldLogData) {
-					for (auto& it : old.tLogs) {
-						for (auto& t : it->logRouters) {
-							if (t->get().present()) {
-								failed.push_back(waitFailureClient(t->get().interf().waitFailure,
-								                                   SERVER_KNOBS->TLOG_TIMEOUT,
-								                                   -SERVER_KNOBS->TLOG_TIMEOUT /
-								                                       SERVER_KNOBS->SECONDS_BEFORE_NO_FAILURE_DELAY,
-								                                   /*trace=*/true));
-							} else {
-								changes.push_back(t->onChange());
-							}
-						}
-					}
-					// Monitor changes of backup workers for old epochs.
-					for (const auto& worker : old.tLogs[0]->backupWorkers) {
-						if (worker->get().present()) {
-							backupFailed.push_back(waitFailureClient(worker->get().interf().waitFailure,
-							                                         SERVER_KNOBS->BACKUP_TIMEOUT,
-							                                         -SERVER_KNOBS->BACKUP_TIMEOUT /
-							                                             SERVER_KNOBS->SECONDS_BEFORE_NO_FAILURE_DELAY,
-							                                         /*trace=*/true));
-						} else {
-							changes.push_back(worker->onChange());
-						}
-					}
-				}
-			}
-
-			if (self->hasRemoteServers && (!self->remoteRecovery.isReady() || self->remoteRecovery.isError())) {
-				changes.push_back(self->remoteRecovery);
-			}
-
-			changes.push_back(self->recoveryCompleteWrittenToCoreState.onChange());
-			changes.push_back(self->backupWorkerChanged.onTrigger());
-
-			ASSERT(failed.size() >= 1);
-			wait(quorum(changes, 1) || tagError<Void>(quorum(failed, 1), master_tlog_failed()) ||
-			     tagError<Void>(quorum(backupFailed, 1), master_backup_worker_failed()));
-		}
-	}
-
-	ACTOR static Future<Void> pushResetChecker(Reference<ConnectionResetInfo> self, NetworkAddress addr) {
-		self->slowReplies = 0;
-		self->fastReplies = 0;
-		wait(delay(SERVER_KNOBS->PUSH_STATS_INTERVAL));
-		TraceEvent("SlowPushStats")
-		    .detail("PeerAddress", addr)
-		    .detail("SlowReplies", self->slowReplies)
-		    .detail("FastReplies", self->fastReplies);
-		if (self->slowReplies >= SERVER_KNOBS->PUSH_STATS_SLOW_AMOUNT &&
-		    self->slowReplies / double(self->slowReplies + self->fastReplies) >= SERVER_KNOBS->PUSH_STATS_SLOW_RATIO) {
-			FlowTransport::transport().resetConnection(addr);
-			self->lastReset = now();
-		}
-		return Void();
-	}
+	ACTOR static Future<Void> pushResetChecker(Reference<ConnectionResetInfo> self, NetworkAddress addr);
 
 	ACTOR static Future<TLogCommitReply> recordPushMetrics(Reference<ConnectionResetInfo> self,
 	                                                       Reference<Histogram> dist,
 	                                                       NetworkAddress addr,
-	                                                       Future<TLogCommitReply> in) {
-		state double startTime = now();
-		TLogCommitReply t = wait(in);
-		if (now() - self->lastReset > SERVER_KNOBS->PUSH_RESET_INTERVAL) {
-			if (now() - startTime > SERVER_KNOBS->PUSH_MAX_LATENCY) {
-				if (self->resetCheck.isReady()) {
-					self->resetCheck = pushResetChecker(self, addr);
-				}
-				self->slowReplies++;
-			} else {
-				self->fastReplies++;
-			}
-		}
-		dist->sampleSeconds(now() - startTime);
-		return t;
-	}
+	                                                       Future<TLogCommitReply> in);
 
 	Future<Version> push(Version prevVersion,
 	                     Version version,
@@ -552,1082 +270,73 @@ struct TagPartitionedLogSystem final : ILogSystem, ReferenceCounted<TagPartition
 	                     Version minKnownCommittedVersion,
 	                     LogPushData& data,
 	                     SpanID const& spanContext,
-	                     Optional<UID> debugID) final {
-		// FIXME: Randomize request order as in LegacyLogSystem?
-		vector<Future<Void>> quorumResults;
-		vector<Future<TLogCommitReply>> allReplies;
-		int location = 0;
-		Span span("TPLS:push"_loc, spanContext);
-		for (auto& it : tLogs) {
-			if (it->isLocal && it->logServers.size()) {
-				if (it->connectionResetTrackers.size() == 0) {
-					for (int i = 0; i < it->logServers.size(); i++) {
-						it->connectionResetTrackers.push_back(makeReference<ConnectionResetInfo>());
-					}
-				}
-				if (it->tlogPushDistTrackers.empty()) {
-					for (int i = 0; i < it->logServers.size(); i++) {
-						it->tlogPushDistTrackers.push_back(
-						    Histogram::getHistogram("ToTlog_" + it->logServers[i]->get().interf().uniqueID.toString(),
-						                            it->logServers[i]->get().interf().address().toString(),
-						                            Histogram::Unit::microseconds));
-					}
-				}
-				vector<Future<Void>> tLogCommitResults;
-				for (int loc = 0; loc < it->logServers.size(); loc++) {
-					Standalone<StringRef> msg = data.getMessages(location);
-					data.recordEmptyMessage(location, msg);
-					allReplies.push_back(recordPushMetrics(
-					    it->connectionResetTrackers[loc],
-					    it->tlogPushDistTrackers[loc],
-					    it->logServers[loc]->get().interf().address(),
-					    it->logServers[loc]->get().interf().commit.getReply(TLogCommitRequest(spanContext,
-					                                                                          msg.arena(),
-					                                                                          prevVersion,
-					                                                                          version,
-					                                                                          knownCommittedVersion,
-					                                                                          minKnownCommittedVersion,
-					                                                                          msg,
-					                                                                          debugID),
-					                                                        TaskPriority::ProxyTLogCommitReply)));
-					Future<Void> commitSuccess = success(allReplies.back());
-					addActor.get().send(commitSuccess);
-					tLogCommitResults.push_back(commitSuccess);
-					location++;
-				}
-				quorumResults.push_back(quorum(tLogCommitResults, tLogCommitResults.size() - it->tLogWriteAntiQuorum));
-			}
-		}
+	                     Optional<UID> debugID) final;
 
-		return minVersionWhenReady(waitForAll(quorumResults), allReplies);
-	}
+	Reference<IPeekCursor> peekAll(UID dbgid, Version begin, Version end, Tag tag, bool parallelGetMore);
 
-	Reference<IPeekCursor> peekAll(UID dbgid, Version begin, Version end, Tag tag, bool parallelGetMore) {
-		int bestSet = 0;
-		std::vector<Reference<LogSet>> localSets;
-		Version lastBegin = 0;
-		bool foundSpecial = false;
-		for (auto& log : tLogs) {
-			if (log->locality == tagLocalitySpecial || log->locality == tagLocalityUpgraded) {
-				foundSpecial = true;
-			}
-			if (log->isLocal && log->logServers.size() &&
-			    (log->locality == tagLocalitySpecial || log->locality == tagLocalityUpgraded ||
-			     log->locality == tag.locality || tag == txsTag || tag.locality == tagLocalityTxs ||
-			     tag.locality == tagLocalityLogRouter ||
-			     ((tag.locality == tagLocalityUpgraded || tag == cacheTag) && log->locality != tagLocalitySatellite))) {
-				lastBegin = std::max(lastBegin, log->startVersion);
-				localSets.push_back(log);
-				if (log->locality != tagLocalitySatellite) {
-					bestSet = localSets.size() - 1;
-				}
-			}
-		}
+	Reference<IPeekCursor> peekRemote(UID dbgid, Version begin, Optional<Version> end, Tag tag, bool parallelGetMore);
 
-		if (!localSets.size()) {
-			lastBegin = end;
-		}
-
-		if (begin >= lastBegin && localSets.size()) {
-			TraceEvent("TLogPeekAllCurrentOnly", dbgid)
-			    .detail("Tag", tag.toString())
-			    .detail("Begin", begin)
-			    .detail("End", end)
-			    .detail("BestLogs", localSets[bestSet]->logServerString());
-			return makeReference<ILogSystem::SetPeekCursor>(
-			    localSets, bestSet, localSets[bestSet]->bestLocationFor(tag), tag, begin, end, parallelGetMore);
-		} else {
-			std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
-			std::vector<LogMessageVersion> epochEnds;
-
-			if (lastBegin < end && localSets.size()) {
-				TraceEvent("TLogPeekAllAddingCurrent", dbgid)
-				    .detail("Tag", tag.toString())
-				    .detail("Begin", begin)
-				    .detail("End", end)
-				    .detail("BestLogs", localSets[bestSet]->logServerString());
-				cursors.push_back(makeReference<ILogSystem::SetPeekCursor>(localSets,
-				                                                           bestSet,
-				                                                           localSets[bestSet]->bestLocationFor(tag),
-				                                                           tag,
-				                                                           lastBegin,
-				                                                           end,
-				                                                           parallelGetMore));
-			}
-			for (int i = 0; begin < lastBegin; i++) {
-				if (i == oldLogData.size()) {
-					if (tag == txsTag || tag.locality == tagLocalityTxs || tag == cacheTag) {
-						break;
-					}
-					TraceEvent("TLogPeekAllDead", dbgid)
-					    .detail("Tag", tag.toString())
-					    .detail("Begin", begin)
-					    .detail("End", end)
-					    .detail("LastBegin", lastBegin)
-					    .detail("OldLogDataSize", oldLogData.size());
-					return makeReference<ILogSystem::ServerPeekCursor>(
-					    Reference<AsyncVar<OptionalInterface<TLogInterface>>>(),
-					    tag,
-					    begin,
-					    getPeekEnd(),
-					    false,
-					    false);
-				}
-
-				int bestOldSet = 0;
-				std::vector<Reference<LogSet>> localOldSets;
-				Version thisBegin = begin;
-				bool thisSpecial = false;
-				for (auto& log : oldLogData[i].tLogs) {
-					if (log->locality == tagLocalitySpecial || log->locality == tagLocalityUpgraded) {
-						thisSpecial = true;
-					}
-					if (log->isLocal && log->logServers.size() &&
-					    (log->locality == tagLocalitySpecial || log->locality == tagLocalityUpgraded ||
-					     log->locality == tag.locality || tag == txsTag || tag.locality == tagLocalityTxs ||
-					     tag.locality == tagLocalityLogRouter ||
-					     ((tag.locality == tagLocalityUpgraded || tag == cacheTag) &&
-					      log->locality != tagLocalitySatellite))) {
-						thisBegin = std::max(thisBegin, log->startVersion);
-						localOldSets.push_back(log);
-						if (log->locality != tagLocalitySatellite) {
-							bestOldSet = localOldSets.size() - 1;
-						}
-					}
-				}
-
-				if (!localOldSets.size()) {
-					TraceEvent("TLogPeekAllNoLocalSets", dbgid)
-					    .detail("Tag", tag.toString())
-					    .detail("Begin", begin)
-					    .detail("End", end)
-					    .detail("LastBegin", lastBegin);
-					if (!cursors.size() && !foundSpecial) {
-						continue;
-					}
-					return makeReference<ILogSystem::ServerPeekCursor>(
-					    Reference<AsyncVar<OptionalInterface<TLogInterface>>>(),
-					    tag,
-					    begin,
-					    getPeekEnd(),
-					    false,
-					    false);
-				}
-				if (thisSpecial) {
-					foundSpecial = true;
-				}
-
-				if (thisBegin < lastBegin) {
-					if (thisBegin < end) {
-						TraceEvent("TLogPeekAllAddingOld", dbgid)
-						    .detail("Tag", tag.toString())
-						    .detail("Begin", begin)
-						    .detail("End", end)
-						    .detail("BestLogs", localOldSets[bestOldSet]->logServerString())
-						    .detail("LastBegin", lastBegin)
-						    .detail("ThisBegin", thisBegin);
-						cursors.push_back(
-						    makeReference<ILogSystem::SetPeekCursor>(localOldSets,
-						                                             bestOldSet,
-						                                             localOldSets[bestOldSet]->bestLocationFor(tag),
-						                                             tag,
-						                                             thisBegin,
-						                                             std::min(lastBegin, end),
-						                                             parallelGetMore));
-						epochEnds.push_back(LogMessageVersion(std::min(lastBegin, end)));
-					}
-					lastBegin = thisBegin;
-				}
-			}
-
-			return makeReference<ILogSystem::MultiCursor>(cursors, epochEnds);
-		}
-	}
-
-	Reference<IPeekCursor> peekRemote(UID dbgid, Version begin, Optional<Version> end, Tag tag, bool parallelGetMore) {
-		int bestSet = -1;
-		Version lastBegin = recoveredAt.present() ? recoveredAt.get() + 1 : 0;
-		for (int t = 0; t < tLogs.size(); t++) {
-			if (tLogs[t]->isLocal) {
-				lastBegin = std::max(lastBegin, tLogs[t]->startVersion);
-			}
-
-			if (tLogs[t]->logRouters.size()) {
-				ASSERT(bestSet == -1);
-				bestSet = t;
-			}
-		}
-		if (bestSet == -1) {
-			TraceEvent("TLogPeekRemoteNoBestSet", dbgid)
-			    .detail("Tag", tag.toString())
-			    .detail("Begin", begin)
-			    .detail("End", end.present() ? end.get() : getPeekEnd());
-			return makeReference<ILogSystem::ServerPeekCursor>(Reference<AsyncVar<OptionalInterface<TLogInterface>>>(),
-			                                                   tag,
-			                                                   begin,
-			                                                   getPeekEnd(),
-			                                                   false,
-			                                                   parallelGetMore);
-		}
-		if (begin >= lastBegin) {
-			TraceEvent("TLogPeekRemoteBestOnly", dbgid)
-			    .detail("Tag", tag.toString())
-			    .detail("Begin", begin)
-			    .detail("End", end.present() ? end.get() : getPeekEnd())
-			    .detail("BestSet", bestSet)
-			    .detail("BestSetStart", lastBegin)
-			    .detail("LogRouterIds", tLogs[bestSet]->logRouterString());
-			return makeReference<ILogSystem::BufferedCursor>(
-			    tLogs[bestSet]->logRouters, tag, begin, end.present() ? end.get() + 1 : getPeekEnd(), parallelGetMore);
-		} else {
-			std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
-			std::vector<LogMessageVersion> epochEnds;
-			TraceEvent("TLogPeekRemoteAddingBest", dbgid)
-			    .detail("Tag", tag.toString())
-			    .detail("Begin", begin)
-			    .detail("End", end.present() ? end.get() : getPeekEnd())
-			    .detail("BestSet", bestSet)
-			    .detail("BestSetStart", lastBegin)
-			    .detail("LogRouterIds", tLogs[bestSet]->logRouterString());
-			cursors.push_back(makeReference<ILogSystem::BufferedCursor>(tLogs[bestSet]->logRouters,
-			                                                            tag,
-			                                                            lastBegin,
-			                                                            end.present() ? end.get() + 1 : getPeekEnd(),
-			                                                            parallelGetMore));
-			int i = 0;
-			while (begin < lastBegin) {
-				if (i == oldLogData.size()) {
-					TraceEvent("TLogPeekRemoteDead", dbgid)
-					    .detail("Tag", tag.toString())
-					    .detail("Begin", begin)
-					    .detail("End", end.present() ? end.get() : getPeekEnd())
-					    .detail("LastBegin", lastBegin)
-					    .detail("OldLogDataSize", oldLogData.size());
-					return makeReference<ILogSystem::ServerPeekCursor>(
-					    Reference<AsyncVar<OptionalInterface<TLogInterface>>>(),
-					    tag,
-					    begin,
-					    getPeekEnd(),
-					    false,
-					    parallelGetMore);
-				}
-
-				int bestOldSet = -1;
-				Version thisBegin = begin;
-				for (int t = 0; t < oldLogData[i].tLogs.size(); t++) {
-					if (oldLogData[i].tLogs[t]->isLocal) {
-						thisBegin = std::max(thisBegin, oldLogData[i].tLogs[t]->startVersion);
-					}
-
-					if (oldLogData[i].tLogs[t]->logRouters.size()) {
-						ASSERT(bestOldSet == -1);
-						bestOldSet = t;
-					}
-				}
-				if (bestOldSet == -1) {
-					TraceEvent("TLogPeekRemoteNoOldBestSet", dbgid)
-					    .detail("Tag", tag.toString())
-					    .detail("Begin", begin)
-					    .detail("End", end.present() ? end.get() : getPeekEnd());
-					return makeReference<ILogSystem::ServerPeekCursor>(
-					    Reference<AsyncVar<OptionalInterface<TLogInterface>>>(),
-					    tag,
-					    begin,
-					    getPeekEnd(),
-					    false,
-					    parallelGetMore);
-				}
-
-				if (thisBegin < lastBegin) {
-					TraceEvent("TLogPeekRemoteAddingOldBest", dbgid)
-					    .detail("Tag", tag.toString())
-					    .detail("Begin", begin)
-					    .detail("End", end.present() ? end.get() : getPeekEnd())
-					    .detail("BestOldSet", bestOldSet)
-					    .detail("LogRouterIds", oldLogData[i].tLogs[bestOldSet]->logRouterString())
-					    .detail("LastBegin", lastBegin)
-					    .detail("ThisBegin", thisBegin)
-					    .detail("BestStartVer", oldLogData[i].tLogs[bestOldSet]->startVersion);
-					cursors.push_back(makeReference<ILogSystem::BufferedCursor>(
-					    oldLogData[i].tLogs[bestOldSet]->logRouters, tag, thisBegin, lastBegin, parallelGetMore));
-					epochEnds.emplace_back(lastBegin);
-					lastBegin = thisBegin;
-				}
-				i++;
-			}
-
-			return makeReference<ILogSystem::MultiCursor>(cursors, epochEnds);
-		}
-	}
-
-	Reference<IPeekCursor> peek(UID dbgid, Version begin, Optional<Version> end, Tag tag, bool parallelGetMore) final {
-		if (!tLogs.size()) {
-			TraceEvent("TLogPeekNoLogSets", dbgid).detail("Tag", tag.toString()).detail("Begin", begin);
-			return makeReference<ILogSystem::ServerPeekCursor>(
-			    Reference<AsyncVar<OptionalInterface<TLogInterface>>>(), tag, begin, getPeekEnd(), false, false);
-		}
-
-		if (tag.locality == tagLocalityRemoteLog) {
-			return peekRemote(dbgid, begin, end, tag, parallelGetMore);
-		} else {
-			return peekAll(dbgid, begin, getPeekEnd(), tag, parallelGetMore);
-		}
-	}
+	Reference<IPeekCursor> peek(UID dbgid, Version begin, Optional<Version> end, Tag tag, bool parallelGetMore) final;
 
 	Reference<IPeekCursor> peek(UID dbgid,
 	                            Version begin,
 	                            Optional<Version> end,
 	                            std::vector<Tag> tags,
-	                            bool parallelGetMore) final {
-		if (tags.empty()) {
-			TraceEvent("TLogPeekNoTags", dbgid).detail("Begin", begin);
-			return makeReference<ILogSystem::ServerPeekCursor>(
-			    Reference<AsyncVar<OptionalInterface<TLogInterface>>>(), invalidTag, begin, getPeekEnd(), false, false);
-		}
-
-		if (tags.size() == 1) {
-			return peek(dbgid, begin, end, tags[0], parallelGetMore);
-		}
-
-		std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
-		cursors.reserve(tags.size());
-		for (auto tag : tags) {
-			cursors.push_back(peek(dbgid, begin, end, tag, parallelGetMore));
-		}
-		return makeReference<ILogSystem::BufferedCursor>(cursors,
-		                                                 begin,
-		                                                 end.present() ? end.get() + 1 : getPeekEnd(),
-		                                                 true,
-		                                                 tLogs[0]->locality == tagLocalityUpgraded,
-		                                                 false);
-	}
+	                            bool parallelGetMore) final;
 
 	Reference<IPeekCursor> peekLocal(UID dbgid,
 	                                 Tag tag,
 	                                 Version begin,
 	                                 Version end,
 	                                 bool useMergePeekCursors,
-	                                 int8_t peekLocality = tagLocalityInvalid) {
-		if (tag.locality >= 0 || tag.locality == tagLocalityUpgraded || tag.locality == tagLocalitySpecial) {
-			peekLocality = tag.locality;
-		}
-		ASSERT(peekLocality >= 0 || peekLocality == tagLocalityUpgraded || tag.locality == tagLocalitySpecial);
-
-		int bestSet = -1;
-		bool foundSpecial = false;
-		int logCount = 0;
-		for (int t = 0; t < tLogs.size(); t++) {
-			if (tLogs[t]->logServers.size() && tLogs[t]->locality != tagLocalitySatellite) {
-				logCount++;
-			}
-			if (tLogs[t]->logServers.size() &&
-			    (tLogs[t]->locality == tagLocalitySpecial || tLogs[t]->locality == tagLocalityUpgraded ||
-			     tLogs[t]->locality == peekLocality || peekLocality == tagLocalityUpgraded ||
-			     peekLocality == tagLocalitySpecial)) {
-				if (tLogs[t]->locality == tagLocalitySpecial || tLogs[t]->locality == tagLocalityUpgraded) {
-					foundSpecial = true;
-				}
-				bestSet = t;
-				break;
-			}
-		}
-		if (bestSet == -1) {
-			TraceEvent("TLogPeekLocalNoBestSet", dbgid)
-			    .detail("Tag", tag.toString())
-			    .detail("Begin", begin)
-			    .detail("End", end)
-			    .detail("LogCount", logCount);
-			if (useMergePeekCursors || logCount > 1) {
-				throw worker_removed();
-			} else {
-				return makeReference<ILogSystem::ServerPeekCursor>(
-				    Reference<AsyncVar<OptionalInterface<TLogInterface>>>(), tag, begin, getPeekEnd(), false, false);
-			}
-		}
-
-		if (begin >= tLogs[bestSet]->startVersion) {
-			TraceEvent("TLogPeekLocalBestOnly", dbgid)
-			    .detail("Tag", tag.toString())
-			    .detail("Begin", begin)
-			    .detail("End", end)
-			    .detail("BestSet", bestSet)
-			    .detail("BestSetStart", tLogs[bestSet]->startVersion)
-			    .detail("LogId", tLogs[bestSet]->logServers[tLogs[bestSet]->bestLocationFor(tag)]->get().id());
-			if (useMergePeekCursors) {
-				return makeReference<ILogSystem::MergedPeekCursor>(tLogs[bestSet]->logServers,
-				                                                   tLogs[bestSet]->bestLocationFor(tag),
-				                                                   tLogs[bestSet]->logServers.size() + 1 -
-				                                                       tLogs[bestSet]->tLogReplicationFactor,
-				                                                   tag,
-				                                                   begin,
-				                                                   end,
-				                                                   true,
-				                                                   tLogs[bestSet]->tLogLocalities,
-				                                                   tLogs[bestSet]->tLogPolicy,
-				                                                   tLogs[bestSet]->tLogReplicationFactor);
-			} else {
-				return makeReference<ILogSystem::ServerPeekCursor>(
-				    tLogs[bestSet]->logServers[tLogs[bestSet]->bestLocationFor(tag)], tag, begin, end, false, false);
-			}
-		} else {
-			std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
-			std::vector<LogMessageVersion> epochEnds;
-
-			if (tLogs[bestSet]->startVersion < end) {
-				TraceEvent("TLogPeekLocalAddingBest", dbgid)
-				    .detail("Tag", tag.toString())
-				    .detail("Begin", begin)
-				    .detail("End", end)
-				    .detail("BestSet", bestSet)
-				    .detail("BestSetStart", tLogs[bestSet]->startVersion)
-				    .detail("LogId", tLogs[bestSet]->logServers[tLogs[bestSet]->bestLocationFor(tag)]->get().id());
-				if (useMergePeekCursors) {
-					cursors.push_back(makeReference<ILogSystem::MergedPeekCursor>(
-					    tLogs[bestSet]->logServers,
-					    tLogs[bestSet]->bestLocationFor(tag),
-					    tLogs[bestSet]->logServers.size() + 1 - tLogs[bestSet]->tLogReplicationFactor,
-					    tag,
-					    tLogs[bestSet]->startVersion,
-					    end,
-					    true,
-					    tLogs[bestSet]->tLogLocalities,
-					    tLogs[bestSet]->tLogPolicy,
-					    tLogs[bestSet]->tLogReplicationFactor));
-				} else {
-					cursors.push_back(makeReference<ILogSystem::ServerPeekCursor>(
-					    tLogs[bestSet]->logServers[tLogs[bestSet]->bestLocationFor(tag)],
-					    tag,
-					    tLogs[bestSet]->startVersion,
-					    end,
-					    false,
-					    false));
-				}
-			}
-			Version lastBegin = tLogs[bestSet]->startVersion;
-			for (int i = 0; begin < lastBegin; i++) {
-				if (i == oldLogData.size()) {
-					if ((tag == txsTag || tag.locality == tagLocalityTxs) && cursors.size()) {
-						break;
-					}
-					TraceEvent("TLogPeekLocalDead", dbgid)
-					    .detail("Tag", tag.toString())
-					    .detail("Begin", begin)
-					    .detail("End", end)
-					    .detail("LastBegin", lastBegin)
-					    .detail("OldLogDataSize", oldLogData.size());
-					throw worker_removed();
-				}
-
-				int bestOldSet = -1;
-				logCount = 0;
-				bool nextFoundSpecial = false;
-				for (int t = 0; t < oldLogData[i].tLogs.size(); t++) {
-					if (oldLogData[i].tLogs[t]->logServers.size() &&
-					    oldLogData[i].tLogs[t]->locality != tagLocalitySatellite) {
-						logCount++;
-					}
-					if (oldLogData[i].tLogs[t]->logServers.size() &&
-					    (oldLogData[i].tLogs[t]->locality == tagLocalitySpecial ||
-					     oldLogData[i].tLogs[t]->locality == tagLocalityUpgraded ||
-					     oldLogData[i].tLogs[t]->locality == peekLocality || peekLocality == tagLocalityUpgraded ||
-					     peekLocality == tagLocalitySpecial)) {
-						if (oldLogData[i].tLogs[t]->locality == tagLocalitySpecial ||
-						    oldLogData[i].tLogs[t]->locality == tagLocalityUpgraded) {
-							nextFoundSpecial = true;
-						}
-						if (foundSpecial && !oldLogData[i].tLogs[t]->isLocal) {
-							TraceEvent("TLogPeekLocalRemoteBeforeSpecial", dbgid)
-							    .detail("Tag", tag.toString())
-							    .detail("Begin", begin)
-							    .detail("End", end)
-							    .detail("LastBegin", lastBegin)
-							    .detail("OldLogDataSize", oldLogData.size())
-							    .detail("Idx", i);
-							throw worker_removed();
-						}
-						bestOldSet = t;
-						break;
-					}
-				}
-
-				if (bestOldSet == -1) {
-					TraceEvent("TLogPeekLocalNoBestSet", dbgid)
-					    .detail("Tag", tag.toString())
-					    .detail("Begin", begin)
-					    .detail("End", end)
-					    .detail("LastBegin", lastBegin)
-					    .detail("OldLogDataSize", oldLogData.size())
-					    .detail("Idx", i)
-					    .detail("LogRouterTags", oldLogData[i].logRouterTags)
-					    .detail("LogCount", logCount)
-					    .detail("FoundSpecial", foundSpecial);
-					if (oldLogData[i].logRouterTags == 0 || logCount > 1 || foundSpecial) {
-						throw worker_removed();
-					}
-					continue;
-				}
-
-				foundSpecial = nextFoundSpecial;
-
-				Version thisBegin = std::max(oldLogData[i].tLogs[bestOldSet]->startVersion, begin);
-				if (thisBegin < lastBegin) {
-					if (thisBegin < end) {
-						TraceEvent("TLogPeekLocalAddingOldBest", dbgid)
-						    .detail("Tag", tag.toString())
-						    .detail("Begin", begin)
-						    .detail("End", end)
-						    .detail("BestOldSet", bestOldSet)
-						    .detail("LogServers", oldLogData[i].tLogs[bestOldSet]->logServerString())
-						    .detail("ThisBegin", thisBegin)
-						    .detail("LastBegin", lastBegin);
-						// detail("LogId",
-						// oldLogData[i].tLogs[bestOldSet]->logServers[tLogs[bestOldSet]->bestLocationFor( tag
-						// )]->get().id());
-						cursors.push_back(makeReference<ILogSystem::MergedPeekCursor>(
-						    oldLogData[i].tLogs[bestOldSet]->logServers,
-						    oldLogData[i].tLogs[bestOldSet]->bestLocationFor(tag),
-						    oldLogData[i].tLogs[bestOldSet]->logServers.size() + 1 -
-						        oldLogData[i].tLogs[bestOldSet]->tLogReplicationFactor,
-						    tag,
-						    thisBegin,
-						    std::min(lastBegin, end),
-						    useMergePeekCursors,
-						    oldLogData[i].tLogs[bestOldSet]->tLogLocalities,
-						    oldLogData[i].tLogs[bestOldSet]->tLogPolicy,
-						    oldLogData[i].tLogs[bestOldSet]->tLogReplicationFactor));
-						epochEnds.emplace_back(std::min(lastBegin, end));
-					}
-					lastBegin = thisBegin;
-				}
-			}
-
-			return makeReference<ILogSystem::MultiCursor>(cursors, epochEnds);
-		}
-	}
+	                                 int8_t peekLocality = tagLocalityInvalid);
 
 	Reference<IPeekCursor> peekTxs(UID dbgid,
 	                               Version begin,
 	                               int8_t peekLocality,
 	                               Version localEnd,
-	                               bool canDiscardPopped) final {
-		Version end = getEnd();
-		if (!tLogs.size()) {
-			TraceEvent("TLogPeekTxsNoLogs", dbgid).log();
-			return makeReference<ILogSystem::ServerPeekCursor>(
-			    Reference<AsyncVar<OptionalInterface<TLogInterface>>>(), txsTag, begin, end, false, false);
-		}
-		TraceEvent("TLogPeekTxs", dbgid)
-		    .detail("Begin", begin)
-		    .detail("End", end)
-		    .detail("LocalEnd", localEnd)
-		    .detail("PeekLocality", peekLocality)
-		    .detail("CanDiscardPopped", canDiscardPopped);
-
-		int maxTxsTags = txsTags;
-		bool needsOldTxs = tLogs[0]->tLogVersion < TLogVersion::V4;
-		for (auto& it : oldLogData) {
-			maxTxsTags = std::max<int>(maxTxsTags, it.txsTags);
-			needsOldTxs = needsOldTxs || it.tLogs[0]->tLogVersion < TLogVersion::V4;
-		}
-
-		if (peekLocality < 0 || localEnd == invalidVersion || localEnd <= begin) {
-			std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
-			cursors.reserve(maxTxsTags);
-			for (int i = 0; i < maxTxsTags; i++) {
-				cursors.push_back(peekAll(dbgid, begin, end, Tag(tagLocalityTxs, i), true));
-			}
-			// SOMEDAY: remove once upgrades from 6.2 are no longer supported
-			if (needsOldTxs) {
-				cursors.push_back(peekAll(dbgid, begin, end, txsTag, true));
-			}
-
-			return makeReference<ILogSystem::BufferedCursor>(cursors, begin, end, false, false, canDiscardPopped);
-		}
-
-		try {
-			if (localEnd >= end) {
-				std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
-				cursors.reserve(maxTxsTags);
-				for (int i = 0; i < maxTxsTags; i++) {
-					cursors.push_back(peekLocal(dbgid, Tag(tagLocalityTxs, i), begin, end, true, peekLocality));
-				}
-				// SOMEDAY: remove once upgrades from 6.2 are no longer supported
-				if (needsOldTxs) {
-					cursors.push_back(peekLocal(dbgid, txsTag, begin, end, true, peekLocality));
-				}
-
-				return makeReference<ILogSystem::BufferedCursor>(cursors, begin, end, false, false, canDiscardPopped);
-			}
-
-			std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
-			std::vector<LogMessageVersion> epochEnds;
-
-			cursors.resize(2);
-
-			std::vector<Reference<ILogSystem::IPeekCursor>> localCursors;
-			std::vector<Reference<ILogSystem::IPeekCursor>> allCursors;
-			for (int i = 0; i < maxTxsTags; i++) {
-				localCursors.push_back(peekLocal(dbgid, Tag(tagLocalityTxs, i), begin, localEnd, true, peekLocality));
-				allCursors.push_back(peekAll(dbgid, localEnd, end, Tag(tagLocalityTxs, i), true));
-			}
-			// SOMEDAY: remove once upgrades from 6.2 are no longer supported
-			if (needsOldTxs) {
-				localCursors.push_back(peekLocal(dbgid, txsTag, begin, localEnd, true, peekLocality));
-				allCursors.push_back(peekAll(dbgid, localEnd, end, txsTag, true));
-			}
-
-			cursors[1] = makeReference<ILogSystem::BufferedCursor>(
-			    localCursors, begin, localEnd, false, false, canDiscardPopped);
-			cursors[0] = makeReference<ILogSystem::BufferedCursor>(allCursors, localEnd, end, false, false, false);
-			epochEnds.emplace_back(localEnd);
-
-			return makeReference<ILogSystem::MultiCursor>(cursors, epochEnds);
-		} catch (Error& e) {
-			if (e.code() == error_code_worker_removed) {
-				std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
-				cursors.reserve(maxTxsTags);
-				for (int i = 0; i < maxTxsTags; i++) {
-					cursors.push_back(peekAll(dbgid, begin, end, Tag(tagLocalityTxs, i), true));
-				}
-				// SOMEDAY: remove once upgrades from 6.2 are no longer supported
-				if (needsOldTxs) {
-					cursors.push_back(peekAll(dbgid, begin, end, txsTag, true));
-				}
-
-				return makeReference<ILogSystem::BufferedCursor>(cursors, begin, end, false, false, canDiscardPopped);
-			}
-			throw;
-		}
-	}
+	                               bool canDiscardPopped) final;
 
 	Reference<IPeekCursor> peekSingle(UID dbgid,
 	                                  Version begin,
 	                                  Tag tag,
-	                                  std::vector<std::pair<Version, Tag>> history) final {
-		while (history.size() && begin >= history.back().first) {
-			history.pop_back();
-		}
-
-		if (history.size() == 0) {
-			TraceEvent("TLogPeekSingleNoHistory", dbgid).detail("Tag", tag.toString()).detail("Begin", begin);
-			return peekLocal(dbgid, tag, begin, getPeekEnd(), false);
-		} else {
-			std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
-			std::vector<LogMessageVersion> epochEnds;
-
-			TraceEvent("TLogPeekSingleAddingLocal", dbgid)
-			    .detail("Tag", tag.toString())
-			    .detail("Begin", history[0].first);
-			cursors.push_back(peekLocal(dbgid, tag, history[0].first, getPeekEnd(), false));
-
-			for (int i = 0; i < history.size(); i++) {
-				TraceEvent("TLogPeekSingleAddingOld", dbgid)
-				    .detail("Tag", tag.toString())
-				    .detail("HistoryTag", history[i].second.toString())
-				    .detail("Begin", i + 1 == history.size() ? begin : std::max(history[i + 1].first, begin))
-				    .detail("End", history[i].first);
-				cursors.push_back(peekLocal(dbgid,
-				                            history[i].second,
-				                            i + 1 == history.size() ? begin : std::max(history[i + 1].first, begin),
-				                            history[i].first,
-				                            false));
-				epochEnds.emplace_back(history[i].first);
-			}
-
-			return makeReference<ILogSystem::MultiCursor>(cursors, epochEnds);
-		}
-	}
+	                                  std::vector<std::pair<Version, Tag>> history) final;
 
 	// LogRouter or BackupWorker use this function to obtain a cursor for peeking tlogs of a generation (i.e., epoch).
 	// Specifically, the epoch is determined by looking up "dbgid" in tlog sets of generations.
 	// The returned cursor can peek data at the "tag" from the given "begin" version to that epoch's end version or
 	// the recovery version for the latest old epoch. For the current epoch, the cursor has no end version.
-	Reference<IPeekCursor> peekLogRouter(UID dbgid, Version begin, Tag tag) final {
-		bool found = false;
-		for (const auto& log : tLogs) {
-			found = log->hasLogRouter(dbgid) || log->hasBackupWorker(dbgid);
-			if (found) {
-				break;
-			}
-		}
-		if (found) {
-			if (stopped) {
-				std::vector<Reference<LogSet>> localSets;
-				int bestPrimarySet = 0;
-				int bestSatelliteSet = -1;
-				for (auto& log : tLogs) {
-					if (log->isLocal && log->logServers.size()) {
-						TraceEvent("TLogPeekLogRouterLocalSet", dbgid)
-						    .detail("Tag", tag.toString())
-						    .detail("Begin", begin)
-						    .detail("LogServers", log->logServerString());
-						localSets.push_back(log);
-						if (log->locality == tagLocalitySatellite) {
-							bestSatelliteSet = localSets.size() - 1;
-						} else {
-							bestPrimarySet = localSets.size() - 1;
-						}
-					}
-				}
-				int bestSet = bestPrimarySet;
-				if (SERVER_KNOBS->LOG_ROUTER_PEEK_FROM_SATELLITES_PREFERRED && bestSatelliteSet != -1 &&
-				    tLogs[bestSatelliteSet]->tLogVersion >= TLogVersion::V4) {
-					bestSet = bestSatelliteSet;
-				}
+	Reference<IPeekCursor> peekLogRouter(UID dbgid, Version begin, Tag tag) final;
 
-				TraceEvent("TLogPeekLogRouterSets", dbgid).detail("Tag", tag.toString()).detail("Begin", begin);
-				// FIXME: do this merge on one of the logs in the other data center to avoid sending multiple copies
-				// across the WAN
-				return makeReference<ILogSystem::SetPeekCursor>(
-				    localSets, bestSet, localSets[bestSet]->bestLocationFor(tag), tag, begin, getPeekEnd(), true);
-			} else {
-				int bestPrimarySet = -1;
-				int bestSatelliteSet = -1;
-				for (int i = 0; i < tLogs.size(); i++) {
-					const auto& log = tLogs[i];
-					if (log->logServers.size() && log->isLocal) {
-						if (log->locality == tagLocalitySatellite) {
-							bestSatelliteSet = i;
-							break;
-						} else {
-							if (bestPrimarySet == -1)
-								bestPrimarySet = i;
-						}
-					}
-				}
-				int bestSet = bestPrimarySet;
-				if (SERVER_KNOBS->LOG_ROUTER_PEEK_FROM_SATELLITES_PREFERRED && bestSatelliteSet != -1 &&
-				    tLogs[bestSatelliteSet]->tLogVersion >= TLogVersion::V4) {
-					bestSet = bestSatelliteSet;
-				}
-				const auto& log = tLogs[bestSet];
-				TraceEvent("TLogPeekLogRouterBestOnly", dbgid)
-				    .detail("Tag", tag.toString())
-				    .detail("Begin", begin)
-				    .detail("LogId", log->logServers[log->bestLocationFor(tag)]->get().id());
-				return makeReference<ILogSystem::ServerPeekCursor>(
-				    log->logServers[log->bestLocationFor(tag)], tag, begin, getPeekEnd(), false, true);
-			}
-		}
-		bool firstOld = true;
-		for (const auto& old : oldLogData) {
-			found = false;
-			for (const auto& log : old.tLogs) {
-				found = log->hasLogRouter(dbgid) || log->hasBackupWorker(dbgid);
-				if (found) {
-					break;
-				}
-			}
-			if (found) {
-				int bestPrimarySet = 0;
-				int bestSatelliteSet = -1;
-				std::vector<Reference<LogSet>> localSets;
-				for (auto& log : old.tLogs) {
-					if (log->isLocal && log->logServers.size()) {
-						TraceEvent("TLogPeekLogRouterOldLocalSet", dbgid)
-						    .detail("Tag", tag.toString())
-						    .detail("Begin", begin)
-						    .detail("LogServers", log->logServerString());
-						localSets.push_back(log);
-						if (log->locality == tagLocalitySatellite) {
-							bestSatelliteSet = localSets.size() - 1;
-						} else {
-							bestPrimarySet = localSets.size() - 1;
-						}
-					}
-				}
-				int bestSet = bestPrimarySet;
-				if (SERVER_KNOBS->LOG_ROUTER_PEEK_FROM_SATELLITES_PREFERRED && bestSatelliteSet != -1 &&
-				    old.tLogs[bestSatelliteSet]->tLogVersion >= TLogVersion::V4) {
-					bestSet = bestSatelliteSet;
-				}
+	Version getKnownCommittedVersion() final;
 
-				TraceEvent("TLogPeekLogRouterOldSets", dbgid)
-				    .detail("Tag", tag.toString())
-				    .detail("Begin", begin)
-				    .detail("OldEpoch", old.epochEnd)
-				    .detail("RecoveredAt", recoveredAt.present() ? recoveredAt.get() : -1)
-				    .detail("FirstOld", firstOld);
-				// FIXME: do this merge on one of the logs in the other data center to avoid sending multiple copies
-				// across the WAN
-				return makeReference<ILogSystem::SetPeekCursor>(
-				    localSets,
-				    bestSet,
-				    localSets[bestSet]->bestLocationFor(tag),
-				    tag,
-				    begin,
-				    firstOld && recoveredAt.present() ? recoveredAt.get() + 1 : old.epochEnd,
-				    true);
-			}
-			firstOld = false;
-		}
-		return makeReference<ILogSystem::ServerPeekCursor>(
-		    Reference<AsyncVar<OptionalInterface<TLogInterface>>>(), tag, begin, getPeekEnd(), false, false);
-	}
+	Future<Void> onKnownCommittedVersionChange() final;
 
-	Version getKnownCommittedVersion() final {
-		Version result = invalidVersion;
-		for (auto& it : lockResults) {
-			auto versions = TagPartitionedLogSystem::getDurableVersion(dbgid, it);
-			if (versions.present()) {
-				result = std::max(result, versions.get().first);
-			}
-		}
-		return result;
-	}
+	void popLogRouter(Version upTo, Tag tag, Version durableKnownCommittedVersion, int8_t popLocality);
 
-	Future<Void> onKnownCommittedVersionChange() final {
-		std::vector<Future<Void>> result;
-		for (auto& it : lockResults) {
-			result.push_back(TagPartitionedLogSystem::getDurableVersionChanged(it));
-		}
-		if (!result.size()) {
-			return Never();
-		}
-		return waitForAny(result);
-	}
-
-	void popLogRouter(Version upTo,
-	                  Tag tag,
-	                  Version durableKnownCommittedVersion,
-	                  int8_t popLocality) { // FIXME: do not need to pop all generations of old logs
-		if (!upTo)
-			return;
-		for (auto& t : tLogs) {
-			if (t->locality == popLocality) {
-				for (auto& log : t->logRouters) {
-					Version prev = outstandingPops[std::make_pair(log->get().id(), tag)].first;
-					if (prev < upTo)
-						outstandingPops[std::make_pair(log->get().id(), tag)] =
-						    std::make_pair(upTo, durableKnownCommittedVersion);
-					if (prev == 0) {
-						popActors.add(popFromLog(
-						    this, log, tag, 0.0)); // Fast pop time because log routers can only hold 5 seconds of data.
-					}
-				}
-			}
-		}
-
-		for (auto& old : oldLogData) {
-			for (auto& t : old.tLogs) {
-				if (t->locality == popLocality) {
-					for (auto& log : t->logRouters) {
-						Version prev = outstandingPops[std::make_pair(log->get().id(), tag)].first;
-						if (prev < upTo)
-							outstandingPops[std::make_pair(log->get().id(), tag)] =
-							    std::make_pair(upTo, durableKnownCommittedVersion);
-						if (prev == 0)
-							popActors.add(popFromLog(this, log, tag, 0.0));
-					}
-				}
-			}
-		}
-	}
-
-	void popTxs(Version upTo, int8_t popLocality) final {
-		if (getTLogVersion() < TLogVersion::V4) {
-			pop(upTo, txsTag, 0, popLocality);
-		} else {
-			for (int i = 0; i < txsTags; i++) {
-				pop(upTo, Tag(tagLocalityTxs, i), 0, popLocality);
-			}
-		}
-	}
+	void popTxs(Version upTo, int8_t popLocality) final;
 
 	// pop 'tag.locality' type data up to the 'upTo' version
-	void pop(Version upTo, Tag tag, Version durableKnownCommittedVersion, int8_t popLocality) final {
-		if (upTo <= 0)
-			return;
-		if (tag.locality == tagLocalityRemoteLog) {
-			popLogRouter(upTo, tag, durableKnownCommittedVersion, popLocality);
-			return;
-		}
-		for (auto& t : tLogs) {
-			if (t->locality == tagLocalitySpecial || t->locality == tag.locality ||
-			    tag.locality == tagLocalityUpgraded ||
-			    (tag.locality < 0 && ((popLocality == tagLocalityInvalid) == t->isLocal))) {
-				for (auto& log : t->logServers) {
-					Version prev = outstandingPops[std::make_pair(log->get().id(), tag)].first;
-					if (prev < upTo) {
-						// update pop version for popFromLog actor
-						outstandingPops[std::make_pair(log->get().id(), tag)] =
-						    std::make_pair(upTo, durableKnownCommittedVersion);
-					}
-					if (prev == 0) {
-						// pop tag from log upto version defined in outstandingPops[].first
-						popActors.add(popFromLog(this, log, tag, 1.0)); //< FIXME: knob
-					}
-				}
-			}
-		}
-	}
+	void pop(Version upTo, Tag tag, Version durableKnownCommittedVersion, int8_t popLocality) final;
 
 	// pop tag from log up to the version defined in self->outstandingPops[].first
 	ACTOR static Future<Void> popFromLog(TagPartitionedLogSystem* self,
 	                                     Reference<AsyncVar<OptionalInterface<TLogInterface>>> log,
 	                                     Tag tag,
-	                                     double time) {
-		state Version last = 0;
-		loop {
-			wait(delay(time, TaskPriority::TLogPop));
+	                                     double time);
 
-			// to: first is upto version, second is durableKnownComittedVersion
-			state std::pair<Version, Version> to = self->outstandingPops[std::make_pair(log->get().id(), tag)];
+	ACTOR static Future<Version> getPoppedFromTLog(Reference<AsyncVar<OptionalInterface<TLogInterface>>> log, Tag tag);
 
-			if (to.first <= last) {
-				self->outstandingPops.erase(std::make_pair(log->get().id(), tag));
-				return Void();
-			}
+	ACTOR static Future<Version> getPoppedTxs(TagPartitionedLogSystem* self);
 
-			try {
-				if (!log->get().present())
-					return Void();
-				wait(log->get().interf().popMessages.getReply(TLogPopRequest(to.first, to.second, tag),
-				                                              TaskPriority::TLogPop));
+	Future<Version> getTxsPoppedVersion() final;
 
-				last = to.first;
-			} catch (Error& e) {
-				if (e.code() == error_code_actor_cancelled)
-					throw;
-				TraceEvent((e.code() == error_code_broken_promise) ? SevInfo : SevError, "LogPopError", self->dbgid)
-				    .error(e)
-				    .detail("Log", log->get().id());
-				return Void(); // Leaving outstandingPops filled in means no further pop requests to this tlog from this
-				               // logSystem
-			}
-		}
-	}
-
-	ACTOR static Future<Version> getPoppedFromTLog(Reference<AsyncVar<OptionalInterface<TLogInterface>>> log, Tag tag) {
-		loop {
-			choose {
-				when(TLogPeekReply rep =
-				         wait(log->get().present() ? brokenPromiseToNever(log->get().interf().peekMessages.getReply(
-				                                         TLogPeekRequest(-1, tag, false, false)))
-				                                   : Never())) {
-					ASSERT(rep.popped.present());
-					return rep.popped.get();
-				}
-				when(wait(log->onChange())) {}
-			}
-		}
-	}
-
-	ACTOR static Future<Version> getPoppedTxs(TagPartitionedLogSystem* self) {
-		state std::vector<std::vector<Future<Version>>> poppedFutures;
-		state std::vector<Future<Void>> poppedReady;
-		if (self->tLogs.size()) {
-			poppedFutures.push_back(std::vector<Future<Version>>());
-			for (auto& it : self->tLogs) {
-				for (auto& log : it->logServers) {
-					poppedFutures.back().push_back(getPoppedFromTLog(
-					    log, self->tLogs[0]->tLogVersion < TLogVersion::V4 ? txsTag : Tag(tagLocalityTxs, 0)));
-				}
-			}
-			poppedReady.push_back(waitForAny(poppedFutures.back()));
-		}
-
-		for (auto& old : self->oldLogData) {
-			if (old.tLogs.size()) {
-				poppedFutures.push_back(std::vector<Future<Version>>());
-				for (auto& it : old.tLogs) {
-					for (auto& log : it->logServers) {
-						poppedFutures.back().push_back(getPoppedFromTLog(
-						    log, old.tLogs[0]->tLogVersion < TLogVersion::V4 ? txsTag : Tag(tagLocalityTxs, 0)));
-					}
-				}
-				poppedReady.push_back(waitForAny(poppedFutures.back()));
-			}
-		}
-
-		state UID dbgid = self->dbgid;
-		state Future<Void> maxGetPoppedDuration = delay(SERVER_KNOBS->TXS_POPPED_MAX_DELAY);
-		wait(waitForAll(poppedReady) || maxGetPoppedDuration);
-
-		if (maxGetPoppedDuration.isReady()) {
-			TraceEvent(SevWarnAlways, "PoppedTxsNotReady", dbgid).log();
-		}
-
-		Version maxPopped = 1;
-		for (auto& it : poppedFutures) {
-			for (auto& v : it) {
-				if (v.isReady()) {
-					maxPopped = std::max(maxPopped, v.get());
-				}
-			}
-		}
-		return maxPopped;
-	}
-
-	Future<Version> getTxsPoppedVersion() final { return getPoppedTxs(this); }
-
-	ACTOR static Future<Void> confirmEpochLive_internal(Reference<LogSet> logSet, Optional<UID> debugID) {
-		state vector<Future<Void>> alive;
-		int numPresent = 0;
-		for (auto& t : logSet->logServers) {
-			if (t->get().present()) {
-				alive.push_back(brokenPromiseToNever(t->get().interf().confirmRunning.getReply(
-				    TLogConfirmRunningRequest(debugID), TaskPriority::TLogConfirmRunningReply)));
-				numPresent++;
-			} else {
-				alive.push_back(Never());
-			}
-		}
-
-		wait(quorum(alive, std::min(logSet->tLogReplicationFactor, numPresent - logSet->tLogWriteAntiQuorum)));
-
-		state std::vector<LocalityEntry> aliveEntries;
-		state std::vector<bool> responded(alive.size(), false);
-		loop {
-			for (int i = 0; i < alive.size(); i++) {
-				if (!responded[i] && alive[i].isReady() && !alive[i].isError()) {
-					aliveEntries.push_back(logSet->logEntryArray[i]);
-					responded[i] = true;
-				}
-			}
-
-			if (logSet->satisfiesPolicy(aliveEntries)) {
-				return Void();
-			}
-
-			// The current set of responders that we have weren't enough to form a quorum, so we must
-			// wait for more responses and try again.
-			std::vector<Future<Void>> changes;
-			for (int i = 0; i < alive.size(); i++) {
-				if (!alive[i].isReady()) {
-					changes.push_back(ready(alive[i]));
-				} else if (alive[i].isReady() && alive[i].isError() &&
-				           alive[i].getError().code() == error_code_tlog_stopped) {
-					// All commits must go to all TLogs.  If any TLog is stopped, then our epoch has ended.
-					return Never();
-				}
-			}
-			ASSERT(changes.size() != 0);
-			wait(waitForAny(changes));
-		}
-	}
+	ACTOR static Future<Void> confirmEpochLive_internal(Reference<LogSet> logSet, Optional<UID> debugID);
 
 	// Returns success after confirming that pushes in the current epoch are still possible
-	Future<Void> confirmEpochLive(Optional<UID> debugID) final {
-		vector<Future<Void>> quorumResults;
-		for (auto& it : tLogs) {
-			if (it->isLocal && it->logServers.size()) {
-				quorumResults.push_back(confirmEpochLive_internal(it, debugID));
-			}
-		}
+	Future<Void> confirmEpochLive(Optional<UID> debugID) final;
 
-		return waitForAll(quorumResults);
-	}
-
-	Future<Void> endEpoch() final {
-		std::vector<Future<Void>> lockResults;
-		for (auto& logSet : tLogs) {
-			for (auto& log : logSet->logServers) {
-				lockResults.push_back(success(lockTLog(dbgid, log)));
-			}
-		}
-		return waitForAll(lockResults);
-	}
+	Future<Void> endEpoch() final;
 
 	// Call only after end_epoch() has successfully completed.  Returns a new epoch immediately following this one.
 	// The new epoch is only provisional until the caller updates the coordinated DBCoreState.
@@ -1638,657 +347,63 @@ struct TagPartitionedLogSystem final : ILogSystem, ReferenceCounted<TagPartition
 	                                       int8_t primaryLocality,
 	                                       int8_t remoteLocality,
 	                                       std::vector<Tag> const& allTags,
-	                                       Reference<AsyncVar<bool>> const& recruitmentStalled) final {
-		return newEpoch(Reference<TagPartitionedLogSystem>::addRef(this),
-		                recr,
-		                fRemoteWorkers,
-		                config,
-		                recoveryCount,
-		                primaryLocality,
-		                remoteLocality,
-		                allTags,
-		                recruitmentStalled);
-	}
+	                                       Reference<AsyncVar<bool>> const& recruitmentStalled) final;
 
-	LogSystemConfig getLogSystemConfig() const final {
-		LogSystemConfig logSystemConfig(epoch);
-		logSystemConfig.logSystemType = logSystemType;
-		logSystemConfig.expectedLogSets = expectedLogSets;
-		logSystemConfig.logRouterTags = logRouterTags;
-		logSystemConfig.txsTags = txsTags;
-		logSystemConfig.recruitmentID = recruitmentID;
-		logSystemConfig.stopped = stopped;
-		logSystemConfig.recoveredAt = recoveredAt;
-		logSystemConfig.pseudoLocalities = pseudoLocalities;
-		logSystemConfig.oldestBackupEpoch = oldestBackupEpoch;
-		for (const Reference<LogSet>& logSet : tLogs) {
-			if (logSet->isLocal || remoteLogsWrittenToCoreState) {
-				logSystemConfig.tLogs.emplace_back(*logSet);
-			}
-		}
+	LogSystemConfig getLogSystemConfig() const final;
 
-		if (!recoveryCompleteWrittenToCoreState.get()) {
-			for (const auto& oldData : oldLogData) {
-				logSystemConfig.oldTLogs.emplace_back(oldData);
-			}
-		}
-		return logSystemConfig;
-	}
+	Standalone<StringRef> getLogsValue() const final;
 
-	Standalone<StringRef> getLogsValue() const final {
-		vector<std::pair<UID, NetworkAddress>> logs;
-		vector<std::pair<UID, NetworkAddress>> oldLogs;
-		for (auto& t : tLogs) {
-			if (t->isLocal || remoteLogsWrittenToCoreState) {
-				for (int i = 0; i < t->logServers.size(); i++) {
-					logs.emplace_back(t->logServers[i]->get().id(),
-					                  t->logServers[i]->get().present() ? t->logServers[i]->get().interf().address()
-					                                                    : NetworkAddress());
-				}
-			}
-		}
-		if (!recoveryCompleteWrittenToCoreState.get()) {
-			for (int i = 0; i < oldLogData.size(); i++) {
-				for (auto& t : oldLogData[i].tLogs) {
-					for (int j = 0; j < t->logServers.size(); j++) {
-						oldLogs.emplace_back(t->logServers[j]->get().id(),
-						                     t->logServers[j]->get().present()
-						                         ? t->logServers[j]->get().interf().address()
-						                         : NetworkAddress());
-					}
-				}
-			}
-		}
-		return logsValue(logs, oldLogs);
-	}
+	Future<Void> onLogSystemConfigChange() final;
 
-	Future<Void> onLogSystemConfigChange() final {
-		std::vector<Future<Void>> changes;
-		changes.push_back(logSystemConfigChanged.onTrigger());
-		for (auto& t : tLogs) {
-			for (int i = 0; i < t->logServers.size(); i++) {
-				changes.push_back(t->logServers[i]->onChange());
-			}
-		}
-		for (int i = 0; i < oldLogData.size(); i++) {
-			for (auto& t : oldLogData[i].tLogs) {
-				for (int j = 0; j < t->logServers.size(); j++) {
-					changes.push_back(t->logServers[j]->onChange());
-				}
-			}
-		}
+	Version getEnd() const final;
 
-		if (hasRemoteServers && !remoteRecovery.isReady()) {
-			changes.push_back(remoteRecovery);
-		}
+	Version getPeekEnd() const;
 
-		return waitForAny(changes);
-	}
+	void getPushLocations(VectorRef<Tag> tags, std::vector<int>& locations, bool allLocations) const final;
 
-	Version getEnd() const final {
-		ASSERT(recoverAt.present());
-		return recoverAt.get() + 1;
-	}
+	bool hasRemoteLogs() const final;
 
-	Version getPeekEnd() const {
-		if (recoverAt.present())
-			return getEnd();
-		else
-			return std::numeric_limits<Version>::max();
-	}
+	Tag getRandomRouterTag() const final;
 
-	void getPushLocations(VectorRef<Tag> tags, std::vector<int>& locations, bool allLocations) const final {
-		int locationOffset = 0;
-		for (auto& log : tLogs) {
-			if (log->isLocal && log->logServers.size()) {
-				log->getPushLocations(tags, locations, locationOffset, allLocations);
-				locationOffset += log->logServers.size();
-			}
-		}
-	}
+	Tag getRandomTxsTag() const final;
 
-	bool hasRemoteLogs() const final { return logRouterTags > 0 || pseudoLocalities.size() > 0; }
+	TLogVersion getTLogVersion() const final;
 
-	Tag getRandomRouterTag() const final {
-		return Tag(tagLocalityLogRouter, deterministicRandom()->randomInt(0, logRouterTags));
-	}
+	int getLogRouterTags() const final;
 
-	Tag getRandomTxsTag() const final { return Tag(tagLocalityTxs, deterministicRandom()->randomInt(0, txsTags)); }
+	Version getBackupStartVersion() const final;
 
-	TLogVersion getTLogVersion() const final { return tLogs[0]->tLogVersion; }
+	std::map<LogEpoch, ILogSystem::EpochTagsVersionsInfo> getOldEpochTagsVersionsInfo() const final;
 
-	int getLogRouterTags() const final { return logRouterTags; }
+	inline Reference<LogSet> getEpochLogSet(LogEpoch epoch) const;
 
-	Version getBackupStartVersion() const final {
-		ASSERT(tLogs.size() > 0);
-		return backupStartVersion;
-	}
+	void setBackupWorkers(const std::vector<InitializeBackupReply>& replies) final;
 
-	std::map<LogEpoch, ILogSystem::EpochTagsVersionsInfo> getOldEpochTagsVersionsInfo() const final {
-		std::map<LogEpoch, EpochTagsVersionsInfo> epochInfos;
-		for (const auto& old : oldLogData) {
-			epochInfos.insert(
-			    { old.epoch, ILogSystem::EpochTagsVersionsInfo(old.logRouterTags, old.epochBegin, old.epochEnd) });
-			TraceEvent("OldEpochTagsVersions", dbgid)
-			    .detail("Epoch", old.epoch)
-			    .detail("Tags", old.logRouterTags)
-			    .detail("BeginVersion", old.epochBegin)
-			    .detail("EndVersion", old.epochEnd);
-		}
-		return epochInfos;
-	}
+	bool removeBackupWorker(const BackupWorkerDoneRequest& req) final;
 
-	inline Reference<LogSet> getEpochLogSet(LogEpoch epoch) const {
-		for (const auto& old : oldLogData) {
-			if (epoch == old.epoch)
-				return old.tLogs[0];
-		}
-		return Reference<LogSet>(nullptr);
-	}
+	LogEpoch getOldestBackupEpoch() const final;
 
-	void setBackupWorkers(const std::vector<InitializeBackupReply>& replies) final {
-		ASSERT(tLogs.size() > 0);
-
-		Reference<LogSet> logset = tLogs[0]; // Master recruits this epoch's worker first.
-		LogEpoch logsetEpoch = this->epoch;
-		oldestBackupEpoch = this->epoch;
-		for (const auto& reply : replies) {
-			if (removedBackupWorkers.count(reply.interf.id()) > 0) {
-				removedBackupWorkers.erase(reply.interf.id());
-				continue;
-			}
-			auto worker = makeReference<AsyncVar<OptionalInterface<BackupInterface>>>(
-			    OptionalInterface<BackupInterface>(reply.interf));
-			if (reply.backupEpoch != logsetEpoch) {
-				// find the logset from oldLogData
-				logsetEpoch = reply.backupEpoch;
-				oldestBackupEpoch = std::min(oldestBackupEpoch, logsetEpoch);
-				logset = getEpochLogSet(logsetEpoch);
-				ASSERT(logset.isValid());
-			}
-			logset->backupWorkers.push_back(worker);
-			TraceEvent("AddBackupWorker", dbgid)
-			    .detail("Epoch", logsetEpoch)
-			    .detail("BackupWorkerID", reply.interf.id());
-		}
-		TraceEvent("SetOldestBackupEpoch", dbgid).detail("Epoch", oldestBackupEpoch);
-		backupWorkerChanged.trigger();
-	}
-
-	bool removeBackupWorker(const BackupWorkerDoneRequest& req) final {
-		bool removed = false;
-		Reference<LogSet> logset = getEpochLogSet(req.backupEpoch);
-		if (logset.isValid()) {
-			for (auto it = logset->backupWorkers.begin(); it != logset->backupWorkers.end(); it++) {
-				if (it->getPtr()->get().interf().id() == req.workerUID) {
-					logset->backupWorkers.erase(it);
-					removed = true;
-					break;
-				}
-			}
-		}
-
-		if (removed) {
-			oldestBackupEpoch = epoch;
-			for (const auto& old : oldLogData) {
-				if (old.epoch < oldestBackupEpoch && old.tLogs[0]->backupWorkers.size() > 0) {
-					oldestBackupEpoch = old.epoch;
-				}
-			}
-			backupWorkerChanged.trigger();
-		} else {
-			removedBackupWorkers.insert(req.workerUID);
-		}
-
-		TraceEvent("RemoveBackupWorker", dbgid)
-		    .detail("Removed", removed)
-		    .detail("BackupEpoch", req.backupEpoch)
-		    .detail("WorkerID", req.workerUID)
-		    .detail("OldestBackupEpoch", oldestBackupEpoch);
-		return removed;
-	}
-
-	LogEpoch getOldestBackupEpoch() const final { return oldestBackupEpoch; }
-
-	void setOldestBackupEpoch(LogEpoch epoch) final {
-		oldestBackupEpoch = epoch;
-		backupWorkerChanged.trigger();
-	}
+	void setOldestBackupEpoch(LogEpoch epoch) final;
 
 	ACTOR static Future<Void> monitorLog(Reference<AsyncVar<OptionalInterface<TLogInterface>>> logServer,
-	                                     Reference<AsyncVar<bool>> failed) {
-		state Future<Void> waitFailure;
-		loop {
-			if (logServer->get().present())
-				waitFailure = waitFailureTracker(logServer->get().interf().waitFailure, failed);
-			else
-				failed->set(true);
-			wait(logServer->onChange());
-		}
-	}
+	                                     Reference<AsyncVar<bool>> failed);
 
 	Optional<std::pair<Version, Version>> static getDurableVersion(
 	    UID dbgid,
 	    LogLockInfo lockInfo,
 	    std::vector<Reference<AsyncVar<bool>>> failed = std::vector<Reference<AsyncVar<bool>>>(),
-	    Optional<Version> lastEnd = Optional<Version>()) {
-		Reference<LogSet> logSet = lockInfo.logSet;
-		// To ensure consistent recovery, the number of servers NOT in the write quorum plus the number of servers NOT
-		// in the read quorum have to be strictly less than the replication factor.  Otherwise there could be a replica
-		// set consistent entirely of servers that are out of date due to not being in the write quorum or unavailable
-		// due to not being in the read quorum. So with N = # of tlogs, W = antiquorum, R = required count, F =
-		// replication factor, W + (N - R) < F, and optimally (N-W)+(N-R)=F-1.  Thus R=N+1-F+W.
-		int requiredCount =
-		    (int)logSet->logServers.size() + 1 - logSet->tLogReplicationFactor + logSet->tLogWriteAntiQuorum;
-		ASSERT(requiredCount > 0 && requiredCount <= logSet->logServers.size());
-		ASSERT(logSet->tLogReplicationFactor >= 1 && logSet->tLogReplicationFactor <= logSet->logServers.size());
-		ASSERT(logSet->tLogWriteAntiQuorum >= 0 && logSet->tLogWriteAntiQuorum < logSet->logServers.size());
-
-		std::vector<LocalityData> availableItems, badCombo;
-		std::vector<TLogLockResult> results;
-		std::string sServerState;
-		LocalityGroup unResponsiveSet;
-
-		for (int t = 0; t < logSet->logServers.size(); t++) {
-			if (lockInfo.replies[t].isReady() && !lockInfo.replies[t].isError() &&
-			    (!failed.size() || !failed[t]->get())) {
-				results.push_back(lockInfo.replies[t].get());
-				availableItems.push_back(logSet->tLogLocalities[t]);
-				sServerState += 'a';
-			} else {
-				unResponsiveSet.add(logSet->tLogLocalities[t]);
-				sServerState += 'f';
-			}
-		}
-
-		// Check if the list of results is not larger than the anti quorum
-		bool bTooManyFailures = (results.size() <= logSet->tLogWriteAntiQuorum);
-
-		// Check if failed logs complete the policy
-		bTooManyFailures = bTooManyFailures || ((unResponsiveSet.size() >= logSet->tLogReplicationFactor) &&
-		                                        (unResponsiveSet.validate(logSet->tLogPolicy)));
-
-		// Check all combinations of the AntiQuorum within the failed
-		if (!bTooManyFailures && (logSet->tLogWriteAntiQuorum) &&
-		    (!validateAllCombinations(
-		        badCombo, unResponsiveSet, logSet->tLogPolicy, availableItems, logSet->tLogWriteAntiQuorum, false))) {
-			TraceEvent("EpochEndBadCombo", dbgid)
-			    .detail("Required", requiredCount)
-			    .detail("Present", results.size())
-			    .detail("ServerState", sServerState);
-			bTooManyFailures = true;
-		}
-
-		ASSERT(logSet->logServers.size() == lockInfo.replies.size());
-		if (!bTooManyFailures) {
-			std::sort(results.begin(), results.end(), sort_by_end());
-			int absent = logSet->logServers.size() - results.size();
-			int safe_range_begin = logSet->tLogWriteAntiQuorum;
-			int new_safe_range_begin = std::min(logSet->tLogWriteAntiQuorum, (int)(results.size() - 1));
-			int safe_range_end = logSet->tLogReplicationFactor - absent;
-
-			if (!lastEnd.present() || ((safe_range_end > 0) && (safe_range_end - 1 < results.size()) &&
-			                           results[safe_range_end - 1].end < lastEnd.get())) {
-				Version knownCommittedVersion = 0;
-				for (int i = 0; i < results.size(); i++) {
-					knownCommittedVersion = std::max(knownCommittedVersion, results[i].knownCommittedVersion);
-				}
-
-				if (knownCommittedVersion > results[new_safe_range_begin].end) {
-					knownCommittedVersion = results[new_safe_range_begin].end;
-				}
-
-				TraceEvent("GetDurableResult", dbgid)
-				    .detail("Required", requiredCount)
-				    .detail("Present", results.size())
-				    .detail("ServerState", sServerState)
-				    .detail("RecoveryVersion",
-				            ((safe_range_end > 0) && (safe_range_end - 1 < results.size()))
-				                ? results[safe_range_end - 1].end
-				                : -1)
-				    .detail("EndVersion", results[new_safe_range_begin].end)
-				    .detail("SafeBegin", safe_range_begin)
-				    .detail("SafeEnd", safe_range_end)
-				    .detail("NewSafeBegin", new_safe_range_begin)
-				    .detail("KnownCommittedVersion", knownCommittedVersion)
-				    .detail("EpochEnd", lockInfo.epochEnd);
-
-				return std::make_pair(knownCommittedVersion, results[new_safe_range_begin].end);
-			}
-		}
-		TraceEvent("GetDurableResultWaiting", dbgid)
-		    .detail("Required", requiredCount)
-		    .detail("Present", results.size())
-		    .detail("ServerState", sServerState);
-		return Optional<std::pair<Version, Version>>();
-	}
+	    Optional<Version> lastEnd = Optional<Version>());
 
 	ACTOR static Future<Void> getDurableVersionChanged(
 	    LogLockInfo lockInfo,
-	    std::vector<Reference<AsyncVar<bool>>> failed = std::vector<Reference<AsyncVar<bool>>>()) {
-		// Wait for anything relevant to change
-		std::vector<Future<Void>> changes;
-		for (int j = 0; j < lockInfo.logSet->logServers.size(); j++) {
-			if (!lockInfo.replies[j].isReady())
-				changes.push_back(ready(lockInfo.replies[j]));
-			else {
-				changes.push_back(lockInfo.logSet->logServers[j]->onChange());
-				if (failed.size()) {
-					changes.push_back(failed[j]->onChange());
-				}
-			}
-		}
-		ASSERT(changes.size());
-		wait(waitForAny(changes));
-		return Void();
-	}
+	    std::vector<Reference<AsyncVar<bool>>> failed = std::vector<Reference<AsyncVar<bool>>>());
 
 	ACTOR static Future<Void> epochEnd(Reference<AsyncVar<Reference<ILogSystem>>> outLogSystem,
 	                                   UID dbgid,
 	                                   DBCoreState prevState,
 	                                   FutureStream<TLogRejoinRequest> rejoinRequests,
 	                                   LocalityData locality,
-	                                   bool* forceRecovery) {
-		// Stops a co-quorum of tlogs so that no further versions can be committed until the DBCoreState coordination
-		// state is changed Creates a new logSystem representing the (now frozen) epoch No other important side effects.
-		// The writeQuorum in the master info is from the previous configuration
-
-		if (!prevState.tLogs.size()) {
-			// This is a brand new database
-			auto logSystem = makeReference<TagPartitionedLogSystem>(dbgid, locality, 0);
-			logSystem->logSystemType = prevState.logSystemType;
-			logSystem->recoverAt = 0;
-			logSystem->knownCommittedVersion = 0;
-			logSystem->stopped = true;
-			outLogSystem->set(logSystem);
-			wait(Future<Void>(Never()));
-			throw internal_error();
-		}
-
-		if (*forceRecovery) {
-			DBCoreState modifiedState = prevState;
-
-			int8_t primaryLocality = -1;
-			for (auto& coreSet : modifiedState.tLogs) {
-				if (coreSet.isLocal && coreSet.locality >= 0 && coreSet.tLogLocalities[0].dcId() != locality.dcId()) {
-					primaryLocality = coreSet.locality;
-					break;
-				}
-			}
-
-			bool foundRemote = false;
-			int8_t remoteLocality = -1;
-			int modifiedLogSets = 0;
-			int removedLogSets = 0;
-			if (primaryLocality >= 0) {
-				auto copiedLogs = modifiedState.tLogs;
-				for (auto& coreSet : copiedLogs) {
-					if (coreSet.locality != primaryLocality && coreSet.locality >= 0) {
-						foundRemote = true;
-						remoteLocality = coreSet.locality;
-						modifiedState.tLogs.clear();
-						modifiedState.tLogs.push_back(coreSet);
-						modifiedState.tLogs[0].isLocal = true;
-						modifiedState.logRouterTags = 0;
-						modifiedLogSets++;
-						break;
-					}
-				}
-
-				while (!foundRemote && modifiedState.oldTLogData.size()) {
-					for (auto& coreSet : modifiedState.oldTLogData[0].tLogs) {
-						if (coreSet.locality != primaryLocality && coreSet.locality >= tagLocalitySpecial) {
-							foundRemote = true;
-							remoteLocality = coreSet.locality;
-							modifiedState.tLogs.clear();
-							modifiedState.tLogs.push_back(coreSet);
-							modifiedState.tLogs[0].isLocal = true;
-							modifiedState.logRouterTags = 0;
-							modifiedState.txsTags = modifiedState.oldTLogData[0].txsTags;
-							modifiedLogSets++;
-							break;
-						}
-					}
-					modifiedState.oldTLogData.erase(modifiedState.oldTLogData.begin());
-					removedLogSets++;
-				}
-
-				if (foundRemote) {
-					for (int i = 0; i < modifiedState.oldTLogData.size(); i++) {
-						bool found = false;
-						auto copiedLogs = modifiedState.oldTLogData[i].tLogs;
-						for (auto& coreSet : copiedLogs) {
-							if (coreSet.locality == remoteLocality || coreSet.locality == tagLocalitySpecial) {
-								found = true;
-								if (!coreSet.isLocal || copiedLogs.size() > 1) {
-									modifiedState.oldTLogData[i].tLogs.clear();
-									modifiedState.oldTLogData[i].tLogs.push_back(coreSet);
-									modifiedState.oldTLogData[i].tLogs[0].isLocal = true;
-									modifiedState.oldTLogData[i].logRouterTags = 0;
-									modifiedState.oldTLogData[i].epochBegin =
-									    modifiedState.oldTLogData[i].tLogs[0].startVersion;
-									modifiedState.oldTLogData[i].epochEnd =
-									    (i == 0 ? modifiedState.tLogs[0].startVersion
-									            : modifiedState.oldTLogData[i - 1].tLogs[0].startVersion);
-									modifiedLogSets++;
-								}
-								break;
-							}
-						}
-						if (!found) {
-							modifiedState.oldTLogData.erase(modifiedState.oldTLogData.begin() + i);
-							removedLogSets++;
-							i--;
-						}
-					}
-					prevState = modifiedState;
-				} else {
-					*forceRecovery = false;
-				}
-			} else {
-				*forceRecovery = false;
-			}
-			TraceEvent(SevWarnAlways, "ForcedRecovery", dbgid)
-			    .detail("PrimaryLocality", primaryLocality)
-			    .detail("RemoteLocality", remoteLocality)
-			    .detail("FoundRemote", foundRemote)
-			    .detail("Modified", modifiedLogSets)
-			    .detail("Removed", removedLogSets);
-			for (int i = 0; i < prevState.tLogs.size(); i++) {
-				TraceEvent("ForcedRecoveryTLogs", dbgid)
-				    .detail("I", i)
-				    .detail("Log", ::describe(prevState.tLogs[i].tLogs))
-				    .detail("Loc", prevState.tLogs[i].locality)
-				    .detail("Txs", prevState.txsTags);
-			}
-			for (int i = 0; i < prevState.oldTLogData.size(); i++) {
-				for (int j = 0; j < prevState.oldTLogData[i].tLogs.size(); j++) {
-					TraceEvent("ForcedRecoveryTLogs", dbgid)
-					    .detail("I", i)
-					    .detail("J", j)
-					    .detail("Log", ::describe(prevState.oldTLogData[i].tLogs[j].tLogs))
-					    .detail("Loc", prevState.oldTLogData[i].tLogs[j].locality)
-					    .detail("Txs", prevState.oldTLogData[i].txsTags);
-				}
-			}
-		}
-
-		TEST(true); // Master recovery from pre-existing database
-
-		// trackRejoins listens for rejoin requests from the tLogs that we are recovering from, to learn their
-		// TLogInterfaces
-		state std::vector<LogLockInfo> lockResults;
-		state
-		    std::vector<std::pair<Reference<AsyncVar<OptionalInterface<TLogInterface>>>, Reference<IReplicationPolicy>>>
-		        allLogServers;
-		state std::vector<Reference<LogSet>> logServers;
-		state std::vector<OldLogData> oldLogData;
-		state std::vector<std::vector<Reference<AsyncVar<bool>>>> logFailed;
-		state std::vector<Future<Void>> failureTrackers;
-
-		for (const CoreTLogSet& coreSet : prevState.tLogs) {
-			logServers.push_back(makeReference<LogSet>(coreSet));
-			std::vector<Reference<AsyncVar<bool>>> failed;
-
-			for (const auto& logVar : logServers.back()->logServers) {
-				allLogServers.emplace_back(logVar, coreSet.tLogPolicy);
-				failed.push_back(makeReference<AsyncVar<bool>>());
-				failureTrackers.push_back(monitorLog(logVar, failed.back()));
-			}
-			logFailed.push_back(failed);
-		}
-
-		for (const auto& oldTlogData : prevState.oldTLogData) {
-			oldLogData.emplace_back(oldTlogData);
-
-			for (const auto& logSet : oldLogData.back().tLogs) {
-				for (const auto& logVar : logSet->logServers) {
-					allLogServers.emplace_back(logVar, logSet->tLogPolicy);
-				}
-			}
-		}
-		state Future<Void> rejoins = trackRejoins(dbgid, allLogServers, rejoinRequests);
-
-		lockResults.resize(logServers.size());
-		std::set<int8_t> lockedLocalities;
-		bool foundSpecial = false;
-		for (int i = 0; i < logServers.size(); i++) {
-			if (logServers[i]->locality == tagLocalitySpecial || logServers[i]->locality == tagLocalityUpgraded) {
-				foundSpecial = true;
-			}
-			lockedLocalities.insert(logServers[i]->locality);
-			lockResults[i].isCurrent = true;
-			lockResults[i].logSet = logServers[i];
-			for (int t = 0; t < logServers[i]->logServers.size(); t++) {
-				lockResults[i].replies.push_back(lockTLog(dbgid, logServers[i]->logServers[t]));
-			}
-		}
-
-		for (auto& old : oldLogData) {
-			if (foundSpecial) {
-				break;
-			}
-			for (auto& log : old.tLogs) {
-				if (log->locality == tagLocalitySpecial || log->locality == tagLocalityUpgraded) {
-					foundSpecial = true;
-					break;
-				}
-				if (!lockedLocalities.count(log->locality)) {
-					TraceEvent("EpochEndLockExtra").detail("Locality", log->locality);
-					TEST(true); // locking old generations for version information
-					lockedLocalities.insert(log->locality);
-					LogLockInfo lockResult;
-					lockResult.epochEnd = old.epochEnd;
-					lockResult.logSet = log;
-					for (int t = 0; t < log->logServers.size(); t++) {
-						lockResult.replies.push_back(lockTLog(dbgid, log->logServers[t]));
-					}
-					lockResults.push_back(lockResult);
-				}
-			}
-		}
-
-		if (*forceRecovery) {
-			state std::vector<LogLockInfo> allLockResults;
-			ASSERT(lockResults.size() == 1);
-			allLockResults.push_back(lockResults[0]);
-			for (auto& old : oldLogData) {
-				ASSERT(old.tLogs.size() == 1);
-				LogLockInfo lockResult;
-				lockResult.epochEnd = old.epochEnd;
-				lockResult.logSet = old.tLogs[0];
-				for (int t = 0; t < old.tLogs[0]->logServers.size(); t++) {
-					lockResult.replies.push_back(lockTLog(dbgid, old.tLogs[0]->logServers[t]));
-				}
-				allLockResults.push_back(lockResult);
-			}
-
-			state int lockNum = 0;
-			state Version maxRecoveryVersion = 0;
-			state int maxRecoveryIndex = 0;
-			while (lockNum < allLockResults.size()) {
-				auto versions = TagPartitionedLogSystem::getDurableVersion(dbgid, allLockResults[lockNum]);
-				if (versions.present()) {
-					if (versions.get().second > maxRecoveryVersion) {
-						TraceEvent("HigherRecoveryVersion", dbgid)
-						    .detail("Idx", lockNum)
-						    .detail("Ver", versions.get().second);
-						maxRecoveryVersion = versions.get().second;
-						maxRecoveryIndex = lockNum;
-					}
-					lockNum++;
-				} else {
-					wait(TagPartitionedLogSystem::getDurableVersionChanged(allLockResults[lockNum]));
-				}
-			}
-			if (maxRecoveryIndex > 0) {
-				logServers = oldLogData[maxRecoveryIndex - 1].tLogs;
-				prevState.txsTags = oldLogData[maxRecoveryIndex - 1].txsTags;
-				lockResults[0] = allLockResults[maxRecoveryIndex];
-				lockResults[0].isCurrent = true;
-
-				std::vector<Reference<AsyncVar<bool>>> failed;
-				for (auto& log : logServers[0]->logServers) {
-					failed.push_back(makeReference<AsyncVar<bool>>());
-					failureTrackers.push_back(monitorLog(log, failed.back()));
-				}
-				ASSERT(logFailed.size() == 1);
-				logFailed[0] = failed;
-				oldLogData.erase(oldLogData.begin(), oldLogData.begin() + maxRecoveryIndex);
-			}
-		}
-
-		state Optional<Version> lastEnd;
-		state Version knownCommittedVersion = 0;
-		loop {
-			Version minEnd = std::numeric_limits<Version>::max();
-			Version maxEnd = 0;
-			std::vector<Future<Void>> changes;
-			for (int log = 0; log < logServers.size(); log++) {
-				if (!logServers[log]->isLocal) {
-					continue;
-				}
-				auto versions =
-				    TagPartitionedLogSystem::getDurableVersion(dbgid, lockResults[log], logFailed[log], lastEnd);
-				if (versions.present()) {
-					knownCommittedVersion = std::max(knownCommittedVersion, versions.get().first);
-					maxEnd = std::max(maxEnd, versions.get().second);
-					minEnd = std::min(minEnd, versions.get().second);
-				}
-				changes.push_back(TagPartitionedLogSystem::getDurableVersionChanged(lockResults[log], logFailed[log]));
-			}
-
-			if (maxEnd > 0 && (!lastEnd.present() || maxEnd < lastEnd.get())) {
-				TEST(lastEnd.present()); // Restarting recovery at an earlier point
-
-				auto logSystem = makeReference<TagPartitionedLogSystem>(dbgid, locality, prevState.recoveryCount);
-
-				lastEnd = minEnd;
-				logSystem->tLogs = logServers;
-				logSystem->logRouterTags = prevState.logRouterTags;
-				logSystem->txsTags = prevState.txsTags;
-				logSystem->oldLogData = oldLogData;
-				logSystem->logSystemType = prevState.logSystemType;
-				logSystem->rejoins = rejoins;
-				logSystem->lockResults = lockResults;
-				if (knownCommittedVersion > minEnd) {
-					knownCommittedVersion = minEnd;
-				}
-				logSystem->recoverAt = minEnd;
-				logSystem->knownCommittedVersion = knownCommittedVersion;
-				TraceEvent(SevDebug, "FinalRecoveryVersionInfo")
-				    .detail("KCV", knownCommittedVersion)
-				    .detail("MinEnd", minEnd);
-				logSystem->remoteLogsWrittenToCoreState = true;
-				logSystem->stopped = true;
-				logSystem->pseudoLocalities = prevState.pseudoLocalities;
-
-				outLogSystem->set(logSystem);
-			}
-
-			wait(waitForAny(changes));
-		}
-	}
+	                                   bool* forceRecovery);
 
 	ACTOR static Future<Void> recruitOldLogRouters(TagPartitionedLogSystem* self,
 	                                               vector<WorkerInterface> workers,
@@ -2297,192 +412,11 @@ struct TagPartitionedLogSystem final : ILogSystem, ReferenceCounted<TagPartition
 	                                               Version startVersion,
 	                                               std::vector<LocalityData> tLogLocalities,
 	                                               Reference<IReplicationPolicy> tLogPolicy,
-	                                               bool forRemote) {
-		state vector<vector<Future<TLogInterface>>> logRouterInitializationReplies;
-		state vector<Future<TLogInterface>> allReplies;
-		int nextRouter = 0;
-		state Version lastStart = std::numeric_limits<Version>::max();
+	                                               bool forRemote);
 
-		if (!forRemote) {
-			Version maxStart = getMaxLocalStartVersion(self->tLogs);
+	static Version getMaxLocalStartVersion(const std::vector<Reference<LogSet>>& tLogs);
 
-			lastStart = std::max(startVersion, maxStart);
-			if (self->logRouterTags == 0) {
-				ASSERT_WE_THINK(false);
-				self->logSystemConfigChanged.trigger();
-				return Void();
-			}
-
-			bool found = false;
-			for (auto& tLogs : self->tLogs) {
-				if (tLogs->locality == locality) {
-					found = true;
-				}
-
-				tLogs->logRouters.clear();
-			}
-
-			if (!found) {
-				TraceEvent("RecruitingOldLogRoutersAddingLocality")
-				    .detail("Locality", locality)
-				    .detail("LastStart", lastStart);
-				auto newLogSet = makeReference<LogSet>();
-				newLogSet->locality = locality;
-				newLogSet->startVersion = lastStart;
-				newLogSet->isLocal = false;
-				self->tLogs.push_back(newLogSet);
-			}
-
-			for (auto& tLogs : self->tLogs) {
-				// Recruit log routers for old generations of the primary locality
-				if (tLogs->locality == locality) {
-					logRouterInitializationReplies.emplace_back();
-					for (int i = 0; i < self->logRouterTags; i++) {
-						InitializeLogRouterRequest req;
-						req.recoveryCount = recoveryCount;
-						req.routerTag = Tag(tagLocalityLogRouter, i);
-						req.startVersion = lastStart;
-						req.tLogLocalities = tLogLocalities;
-						req.tLogPolicy = tLogPolicy;
-						req.locality = locality;
-						auto reply = transformErrors(
-						    throwErrorOr(workers[nextRouter].logRouter.getReplyUnlessFailedFor(
-						        req, SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
-						    master_recovery_failed());
-						logRouterInitializationReplies.back().push_back(reply);
-						allReplies.push_back(reply);
-						nextRouter = (nextRouter + 1) % workers.size();
-					}
-				}
-			}
-		}
-
-		for (auto& old : self->oldLogData) {
-			Version maxStart = getMaxLocalStartVersion(old.tLogs);
-
-			if (old.logRouterTags == 0 || maxStart >= lastStart) {
-				break;
-			}
-			lastStart = std::max(startVersion, maxStart);
-			bool found = false;
-			for (auto& tLogs : old.tLogs) {
-				if (tLogs->locality == locality) {
-					found = true;
-				}
-				tLogs->logRouters.clear();
-			}
-
-			if (!found) {
-				TraceEvent("RecruitingOldLogRoutersAddingLocality")
-				    .detail("Locality", locality)
-				    .detail("LastStart", lastStart);
-				auto newLogSet = makeReference<LogSet>();
-				newLogSet->locality = locality;
-				newLogSet->startVersion = lastStart;
-				old.tLogs.push_back(newLogSet);
-			}
-
-			for (auto& tLogs : old.tLogs) {
-				// Recruit log routers for old generations of the primary locality
-				if (tLogs->locality == locality) {
-					logRouterInitializationReplies.emplace_back();
-					for (int i = 0; i < old.logRouterTags; i++) {
-						InitializeLogRouterRequest req;
-						req.recoveryCount = recoveryCount;
-						req.routerTag = Tag(tagLocalityLogRouter, i);
-						req.startVersion = lastStart;
-						req.tLogLocalities = tLogLocalities;
-						req.tLogPolicy = tLogPolicy;
-						req.locality = locality;
-						auto reply = transformErrors(
-						    throwErrorOr(workers[nextRouter].logRouter.getReplyUnlessFailedFor(
-						        req, SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
-						    master_recovery_failed());
-						logRouterInitializationReplies.back().push_back(reply);
-						allReplies.push_back(reply);
-						nextRouter = (nextRouter + 1) % workers.size();
-					}
-				}
-			}
-		}
-
-		wait(waitForAll(allReplies));
-
-		int nextReplies = 0;
-		lastStart = std::numeric_limits<Version>::max();
-		vector<Future<Void>> failed;
-
-		if (!forRemote) {
-			Version maxStart = getMaxLocalStartVersion(self->tLogs);
-
-			lastStart = std::max(startVersion, maxStart);
-			for (auto& tLogs : self->tLogs) {
-				if (tLogs->locality == locality) {
-					for (int i = 0; i < logRouterInitializationReplies[nextReplies].size(); i++) {
-						tLogs->logRouters.push_back(makeReference<AsyncVar<OptionalInterface<TLogInterface>>>(
-						    OptionalInterface<TLogInterface>(logRouterInitializationReplies[nextReplies][i].get())));
-						failed.push_back(waitFailureClient(
-						    logRouterInitializationReplies[nextReplies][i].get().waitFailure,
-						    SERVER_KNOBS->TLOG_TIMEOUT,
-						    -SERVER_KNOBS->TLOG_TIMEOUT / SERVER_KNOBS->SECONDS_BEFORE_NO_FAILURE_DELAY,
-						    /*trace=*/true));
-					}
-					nextReplies++;
-				}
-			}
-		}
-
-		for (auto& old : self->oldLogData) {
-			Version maxStart = getMaxLocalStartVersion(old.tLogs);
-			if (old.logRouterTags == 0 || maxStart >= lastStart) {
-				break;
-			}
-			lastStart = std::max(startVersion, maxStart);
-			for (auto& tLogs : old.tLogs) {
-				if (tLogs->locality == locality) {
-					for (int i = 0; i < logRouterInitializationReplies[nextReplies].size(); i++) {
-						tLogs->logRouters.push_back(makeReference<AsyncVar<OptionalInterface<TLogInterface>>>(
-						    OptionalInterface<TLogInterface>(logRouterInitializationReplies[nextReplies][i].get())));
-						if (!forRemote) {
-							failed.push_back(waitFailureClient(
-							    logRouterInitializationReplies[nextReplies][i].get().waitFailure,
-							    SERVER_KNOBS->TLOG_TIMEOUT,
-							    -SERVER_KNOBS->TLOG_TIMEOUT / SERVER_KNOBS->SECONDS_BEFORE_NO_FAILURE_DELAY,
-							    /*trace=*/true));
-						}
-					}
-					nextReplies++;
-				}
-			}
-		}
-
-		if (!forRemote) {
-			self->logSystemConfigChanged.trigger();
-			wait(failed.size() ? tagError<Void>(quorum(failed, 1), master_tlog_failed()) : Future<Void>(Never()));
-			throw internal_error();
-		}
-		return Void();
-	}
-
-	static Version getMaxLocalStartVersion(const std::vector<Reference<LogSet>>& tLogs) {
-		Version maxStart = 0;
-		for (const auto& logSet : tLogs) {
-			if (logSet->isLocal) {
-				maxStart = std::max(maxStart, logSet->startVersion);
-			}
-		}
-		return maxStart;
-	}
-
-	static std::vector<Tag> getLocalTags(int8_t locality, const std::vector<Tag>& allTags) {
-		std::vector<Tag> localTags;
-		for (const auto& tag : allTags) {
-			if (locality == tagLocalitySpecial || locality == tag.locality || tag.locality < 0) {
-				localTags.push_back(tag);
-			}
-		}
-		return localTags;
-	}
+	static std::vector<Tag> getLocalTags(int8_t locality, const std::vector<Tag>& allTags);
 
 	ACTOR static Future<Void> newRemoteEpoch(TagPartitionedLogSystem* self,
 	                                         Reference<TagPartitionedLogSystem> oldLogSystem,
@@ -2490,185 +424,7 @@ struct TagPartitionedLogSystem final : ILogSystem, ReferenceCounted<TagPartition
 	                                         DatabaseConfiguration configuration,
 	                                         LogEpoch recoveryCount,
 	                                         int8_t remoteLocality,
-	                                         std::vector<Tag> allTags) {
-		TraceEvent("RemoteLogRecruitment_WaitingForWorkers").log();
-		state RecruitRemoteFromConfigurationReply remoteWorkers = wait(fRemoteWorkers);
-
-		state Reference<LogSet> logSet(new LogSet());
-		logSet->tLogReplicationFactor = configuration.getRemoteTLogReplicationFactor();
-		logSet->tLogVersion = configuration.tLogVersion;
-		logSet->tLogPolicy = configuration.getRemoteTLogPolicy();
-		logSet->isLocal = false;
-		logSet->locality = remoteLocality;
-
-		logSet->startVersion = oldLogSystem->knownCommittedVersion + 1;
-		state int lockNum = 0;
-		while (lockNum < oldLogSystem->lockResults.size()) {
-			if (oldLogSystem->lockResults[lockNum].logSet->locality == remoteLocality) {
-				loop {
-					auto versions =
-					    TagPartitionedLogSystem::getDurableVersion(self->dbgid, oldLogSystem->lockResults[lockNum]);
-					if (versions.present()) {
-						logSet->startVersion =
-						    std::min(std::min(versions.get().first + 1, oldLogSystem->lockResults[lockNum].epochEnd),
-						             logSet->startVersion);
-						break;
-					}
-					wait(TagPartitionedLogSystem::getDurableVersionChanged(oldLogSystem->lockResults[lockNum]));
-				}
-				break;
-			}
-			lockNum++;
-		}
-
-		vector<LocalityData> localities;
-		localities.resize(remoteWorkers.remoteTLogs.size());
-		for (int i = 0; i < remoteWorkers.remoteTLogs.size(); i++) {
-			localities[i] = remoteWorkers.remoteTLogs[i].locality;
-		}
-
-		state Future<Void> oldRouterRecruitment = Void();
-		if (logSet->startVersion < oldLogSystem->knownCommittedVersion + 1) {
-			ASSERT(oldLogSystem->logRouterTags > 0);
-			oldRouterRecruitment = TagPartitionedLogSystem::recruitOldLogRouters(self,
-			                                                                     remoteWorkers.logRouters,
-			                                                                     recoveryCount,
-			                                                                     remoteLocality,
-			                                                                     logSet->startVersion,
-			                                                                     localities,
-			                                                                     logSet->tLogPolicy,
-			                                                                     true);
-		}
-
-		state vector<Future<TLogInterface>> logRouterInitializationReplies;
-		const Version startVersion = oldLogSystem->logRouterTags == 0
-		                                 ? oldLogSystem->recoverAt.get() + 1
-		                                 : std::max(self->tLogs[0]->startVersion, logSet->startVersion);
-		for (int i = 0; i < self->logRouterTags; i++) {
-			InitializeLogRouterRequest req;
-			req.recoveryCount = recoveryCount;
-			req.routerTag = Tag(tagLocalityLogRouter, i);
-			req.startVersion = startVersion;
-			req.tLogLocalities = localities;
-			req.tLogPolicy = logSet->tLogPolicy;
-			req.locality = remoteLocality;
-			logRouterInitializationReplies.push_back(transformErrors(
-			    throwErrorOr(
-			        remoteWorkers.logRouters[i % remoteWorkers.logRouters.size()].logRouter.getReplyUnlessFailedFor(
-			            req, SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
-			    master_recovery_failed()));
-		}
-
-		std::vector<Tag> localTags = getLocalTags(remoteLocality, allTags);
-		LogSystemConfig oldLogSystemConfig = oldLogSystem->getLogSystemConfig();
-
-		logSet->tLogLocalities.resize(remoteWorkers.remoteTLogs.size());
-		logSet->logServers.resize(
-		    remoteWorkers.remoteTLogs
-		        .size()); // Dummy interfaces, so that logSystem->getPushLocations() below uses the correct size
-		logSet->updateLocalitySet(localities);
-
-		state vector<Future<TLogInterface>> remoteTLogInitializationReplies;
-		vector<InitializeTLogRequest> remoteTLogReqs(remoteWorkers.remoteTLogs.size());
-
-		bool nonShardedTxs = self->getTLogVersion() < TLogVersion::V4;
-		if (oldLogSystem->logRouterTags == 0) {
-			std::vector<int> locations;
-			for (Tag tag : localTags) {
-				locations.clear();
-				logSet->getPushLocations(VectorRef<Tag>(&tag, 1), locations, 0);
-				for (int loc : locations)
-					remoteTLogReqs[loc].recoverTags.push_back(tag);
-			}
-
-			if (oldLogSystem->tLogs.size()) {
-				int maxTxsTags = oldLogSystem->txsTags;
-				bool needsOldTxs = oldLogSystem->tLogs[0]->tLogVersion < TLogVersion::V4;
-				for (auto& it : oldLogSystem->oldLogData) {
-					maxTxsTags = std::max<int>(maxTxsTags, it.txsTags);
-					needsOldTxs = needsOldTxs || it.tLogs[0]->tLogVersion < TLogVersion::V4;
-				}
-				for (int i = needsOldTxs ? -1 : 0; i < maxTxsTags; i++) {
-					Tag tag = i == -1 ? txsTag : Tag(tagLocalityTxs, i);
-					Tag pushTag = (i == -1 || nonShardedTxs) ? txsTag : Tag(tagLocalityTxs, i % self->txsTags);
-					locations.clear();
-					logSet->getPushLocations(VectorRef<Tag>(&pushTag, 1), locations, 0);
-					for (int loc : locations)
-						remoteTLogReqs[loc].recoverTags.push_back(tag);
-				}
-			}
-		}
-
-		if (oldLogSystem->tLogs.size()) {
-			if (nonShardedTxs) {
-				localTags.push_back(txsTag);
-			} else {
-				for (int i = 0; i < self->txsTags; i++) {
-					localTags.push_back(Tag(tagLocalityTxs, i));
-				}
-			}
-		}
-
-		for (int i = 0; i < remoteWorkers.remoteTLogs.size(); i++) {
-			InitializeTLogRequest& req = remoteTLogReqs[i];
-			req.recruitmentID = self->recruitmentID;
-			req.logVersion = configuration.tLogVersion;
-			req.storeType = configuration.tLogDataStoreType;
-			req.spillType = configuration.tLogSpillType;
-			req.recoverFrom = oldLogSystemConfig;
-			req.recoverAt = oldLogSystem->recoverAt.get();
-			req.knownCommittedVersion = oldLogSystem->knownCommittedVersion;
-			req.epoch = recoveryCount;
-			req.remoteTag = Tag(tagLocalityRemoteLog, i);
-			req.locality = remoteLocality;
-			req.isPrimary = false;
-			req.allTags = localTags;
-			req.startVersion = logSet->startVersion;
-			req.logRouterTags = 0;
-			req.txsTags = self->txsTags;
-		}
-
-		remoteTLogInitializationReplies.reserve(remoteWorkers.remoteTLogs.size());
-		for (int i = 0; i < remoteWorkers.remoteTLogs.size(); i++)
-			remoteTLogInitializationReplies.push_back(transformErrors(
-			    throwErrorOr(remoteWorkers.remoteTLogs[i].tLog.getReplyUnlessFailedFor(
-			        remoteTLogReqs[i], SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
-			    master_recovery_failed()));
-
-		TraceEvent("RemoteLogRecruitment_InitializingRemoteLogs")
-		    .detail("StartVersion", logSet->startVersion)
-		    .detail("LocalStart", self->tLogs[0]->startVersion)
-		    .detail("LogRouterTags", self->logRouterTags);
-		wait(waitForAll(remoteTLogInitializationReplies) && waitForAll(logRouterInitializationReplies) &&
-		     oldRouterRecruitment);
-
-		for (int i = 0; i < logRouterInitializationReplies.size(); i++) {
-			logSet->logRouters.push_back(makeReference<AsyncVar<OptionalInterface<TLogInterface>>>(
-			    OptionalInterface<TLogInterface>(logRouterInitializationReplies[i].get())));
-		}
-
-		for (int i = 0; i < remoteTLogInitializationReplies.size(); i++) {
-			logSet->logServers[i] = makeReference<AsyncVar<OptionalInterface<TLogInterface>>>(
-			    OptionalInterface<TLogInterface>(remoteTLogInitializationReplies[i].get()));
-			logSet->tLogLocalities[i] = remoteWorkers.remoteTLogs[i].locality;
-		}
-		filterLocalityDataForPolicy(logSet->tLogPolicy, &logSet->tLogLocalities);
-
-		std::vector<Future<Void>> recoveryComplete;
-		recoveryComplete.reserve(logSet->logServers.size());
-		for (int i = 0; i < logSet->logServers.size(); i++)
-			recoveryComplete.push_back(transformErrors(
-			    throwErrorOr(logSet->logServers[i]->get().interf().recoveryFinished.getReplyUnlessFailedFor(
-			        TLogRecoveryFinishedRequest(),
-			        SERVER_KNOBS->TLOG_TIMEOUT,
-			        SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
-			    master_recovery_failed()));
-
-		self->remoteRecoveryComplete = waitForAll(recoveryComplete);
-		self->tLogs.push_back(logSet);
-		TraceEvent("RemoteLogRecruitment_CompletingRecovery").log();
-		return Void();
-	}
+	                                         std::vector<Tag> allTags);
 
 	ACTOR static Future<Reference<ILogSystem>> newEpoch(Reference<TagPartitionedLogSystem> oldLogSystem,
 	                                                    RecruitFromConfigurationReply recr,
@@ -2678,624 +434,189 @@ struct TagPartitionedLogSystem final : ILogSystem, ReferenceCounted<TagPartition
 	                                                    int8_t primaryLocality,
 	                                                    int8_t remoteLocality,
 	                                                    std::vector<Tag> allTags,
-	                                                    Reference<AsyncVar<bool>> recruitmentStalled) {
-		state double startTime = now();
-		state Reference<TagPartitionedLogSystem> logSystem(
-		    new TagPartitionedLogSystem(oldLogSystem->getDebugID(), oldLogSystem->locality, recoveryCount));
-		logSystem->logSystemType = LogSystemType::tagPartitioned;
-		logSystem->expectedLogSets = 1;
-		logSystem->recoveredAt = oldLogSystem->recoverAt;
-		logSystem->repopulateRegionAntiQuorum = configuration.repopulateRegionAntiQuorum;
-		logSystem->recruitmentID = deterministicRandom()->randomUniqueID();
-		logSystem->txsTags = configuration.tLogVersion >= TLogVersion::V4 ? recr.tLogs.size() : 0;
-		oldLogSystem->recruitmentID = logSystem->recruitmentID;
-
-		if (configuration.usableRegions > 1) {
-			logSystem->logRouterTags =
-			    recr.tLogs.size() *
-			    std::max<int>(1, configuration.desiredLogRouterCount / std::max<int>(1, recr.tLogs.size()));
-			logSystem->expectedLogSets++;
-			logSystem->addPseudoLocality(tagLocalityLogRouterMapped);
-			TraceEvent e("AddPseudoLocality", logSystem->getDebugID());
-			e.detail("Locality1", "LogRouterMapped");
-			if (configuration.backupWorkerEnabled) {
-				logSystem->addPseudoLocality(tagLocalityBackup);
-				e.detail("Locality2", "Backup");
-			}
-		} else if (configuration.backupWorkerEnabled) {
-			// Single region uses log router tag for backup workers.
-			logSystem->logRouterTags =
-			    recr.tLogs.size() *
-			    std::max<int>(1, configuration.desiredLogRouterCount / std::max<int>(1, recr.tLogs.size()));
-			logSystem->addPseudoLocality(tagLocalityBackup);
-			TraceEvent("AddPseudoLocality", logSystem->getDebugID()).detail("Locality", "Backup");
-		}
-
-		logSystem->tLogs.push_back(makeReference<LogSet>());
-		logSystem->tLogs[0]->tLogVersion = configuration.tLogVersion;
-		logSystem->tLogs[0]->tLogWriteAntiQuorum = configuration.tLogWriteAntiQuorum;
-		logSystem->tLogs[0]->tLogReplicationFactor = configuration.tLogReplicationFactor;
-		logSystem->tLogs[0]->tLogPolicy = configuration.tLogPolicy;
-		logSystem->tLogs[0]->isLocal = true;
-		logSystem->tLogs[0]->locality = primaryLocality;
-
-		state RegionInfo region = configuration.getRegion(recr.dcId);
-
-		state int maxTxsTags = oldLogSystem->txsTags;
-		state bool needsOldTxs = oldLogSystem->tLogs.size() && oldLogSystem->getTLogVersion() < TLogVersion::V4;
-		for (auto& it : oldLogSystem->oldLogData) {
-			maxTxsTags = std::max<int>(maxTxsTags, it.txsTags);
-			needsOldTxs = needsOldTxs || it.tLogs[0]->tLogVersion < TLogVersion::V4;
-		}
-
-		if (region.satelliteTLogReplicationFactor > 0 && configuration.usableRegions > 1) {
-			logSystem->tLogs.push_back(makeReference<LogSet>());
-			if (recr.satelliteFallback) {
-				logSystem->tLogs[1]->tLogWriteAntiQuorum = region.satelliteTLogWriteAntiQuorumFallback;
-				logSystem->tLogs[1]->tLogReplicationFactor = region.satelliteTLogReplicationFactorFallback;
-				logSystem->tLogs[1]->tLogPolicy = region.satelliteTLogPolicyFallback;
-			} else {
-				logSystem->tLogs[1]->tLogWriteAntiQuorum = region.satelliteTLogWriteAntiQuorum;
-				logSystem->tLogs[1]->tLogReplicationFactor = region.satelliteTLogReplicationFactor;
-				logSystem->tLogs[1]->tLogPolicy = region.satelliteTLogPolicy;
-			}
-			logSystem->tLogs[1]->isLocal = true;
-			logSystem->tLogs[1]->locality = tagLocalitySatellite;
-			logSystem->tLogs[1]->tLogVersion = configuration.tLogVersion;
-			logSystem->tLogs[1]->startVersion = oldLogSystem->knownCommittedVersion + 1;
-
-			logSystem->tLogs[1]->tLogLocalities.resize(recr.satelliteTLogs.size());
-			for (int i = 0; i < recr.satelliteTLogs.size(); i++) {
-				logSystem->tLogs[1]->tLogLocalities[i] = recr.satelliteTLogs[i].locality;
-			}
-			filterLocalityDataForPolicy(logSystem->tLogs[1]->tLogPolicy, &logSystem->tLogs[1]->tLogLocalities);
-
-			logSystem->tLogs[1]->logServers.resize(
-			    recr.satelliteTLogs
-			        .size()); // Dummy interfaces, so that logSystem->getPushLocations() below uses the correct size
-			logSystem->tLogs[1]->updateLocalitySet(logSystem->tLogs[1]->tLogLocalities);
-			logSystem->tLogs[1]->populateSatelliteTagLocations(
-			    logSystem->logRouterTags, oldLogSystem->logRouterTags, logSystem->txsTags, maxTxsTags);
-			logSystem->expectedLogSets++;
-		}
-
-		if (oldLogSystem->tLogs.size()) {
-			logSystem->oldLogData.emplace_back();
-			logSystem->oldLogData[0].tLogs = oldLogSystem->tLogs;
-			logSystem->oldLogData[0].epochBegin = oldLogSystem->tLogs[0]->startVersion;
-			logSystem->oldLogData[0].epochEnd = oldLogSystem->knownCommittedVersion + 1;
-			logSystem->oldLogData[0].logRouterTags = oldLogSystem->logRouterTags;
-			logSystem->oldLogData[0].txsTags = oldLogSystem->txsTags;
-			logSystem->oldLogData[0].pseudoLocalities = oldLogSystem->pseudoLocalities;
-			logSystem->oldLogData[0].epoch = oldLogSystem->epoch;
-		}
-		logSystem->oldLogData.insert(
-		    logSystem->oldLogData.end(), oldLogSystem->oldLogData.begin(), oldLogSystem->oldLogData.end());
-
-		logSystem->tLogs[0]->startVersion = oldLogSystem->knownCommittedVersion + 1;
-		logSystem->backupStartVersion = oldLogSystem->knownCommittedVersion + 1;
-		state int lockNum = 0;
-		while (lockNum < oldLogSystem->lockResults.size()) {
-			if (oldLogSystem->lockResults[lockNum].logSet->locality == primaryLocality) {
-				if (oldLogSystem->lockResults[lockNum].isCurrent &&
-				    oldLogSystem->lockResults[lockNum].logSet->isLocal) {
-					break;
-				}
-				state Future<Void> stalledAfter = setAfter(recruitmentStalled, SERVER_KNOBS->MAX_RECOVERY_TIME, true);
-				loop {
-					auto versions = TagPartitionedLogSystem::getDurableVersion(logSystem->dbgid,
-					                                                           oldLogSystem->lockResults[lockNum]);
-					if (versions.present()) {
-						logSystem->tLogs[0]->startVersion =
-						    std::min(std::min(versions.get().first + 1, oldLogSystem->lockResults[lockNum].epochEnd),
-						             logSystem->tLogs[0]->startVersion);
-						break;
-					}
-					wait(TagPartitionedLogSystem::getDurableVersionChanged(oldLogSystem->lockResults[lockNum]));
-				}
-				stalledAfter.cancel();
-				break;
-			}
-			lockNum++;
-		}
-
-		vector<LocalityData> localities;
-		localities.resize(recr.tLogs.size());
-		for (int i = 0; i < recr.tLogs.size(); i++) {
-			localities[i] = recr.tLogs[i].locality;
-		}
-
-		state Future<Void> oldRouterRecruitment = Never();
-		TraceEvent("NewEpochStartVersion", oldLogSystem->getDebugID())
-		    .detail("StartVersion", logSystem->tLogs[0]->startVersion)
-		    .detail("EpochEnd", oldLogSystem->knownCommittedVersion + 1)
-		    .detail("Locality", primaryLocality)
-		    .detail("OldLogRouterTags", oldLogSystem->logRouterTags);
-		if (oldLogSystem->logRouterTags > 0 ||
-		    logSystem->tLogs[0]->startVersion < oldLogSystem->knownCommittedVersion + 1) {
-			oldRouterRecruitment = TagPartitionedLogSystem::recruitOldLogRouters(oldLogSystem.getPtr(),
-			                                                                     recr.oldLogRouters,
-			                                                                     recoveryCount,
-			                                                                     primaryLocality,
-			                                                                     logSystem->tLogs[0]->startVersion,
-			                                                                     localities,
-			                                                                     logSystem->tLogs[0]->tLogPolicy,
-			                                                                     false);
-			if (oldLogSystem->knownCommittedVersion - logSystem->tLogs[0]->startVersion >
-			    SERVER_KNOBS->MAX_RECOVERY_VERSIONS) {
-				// make sure we can recover in the other DC.
-				for (auto& lockResult : oldLogSystem->lockResults) {
-					if (lockResult.logSet->locality == remoteLocality) {
-						if (TagPartitionedLogSystem::getDurableVersion(logSystem->dbgid, lockResult).present()) {
-							recruitmentStalled->set(true);
-						}
-					}
-				}
-			}
-		} else {
-			oldLogSystem->logSystemConfigChanged.trigger();
-		}
-
-		std::vector<Tag> localTags = getLocalTags(primaryLocality, allTags);
-		state LogSystemConfig oldLogSystemConfig = oldLogSystem->getLogSystemConfig();
-
-		state vector<Future<TLogInterface>> initializationReplies;
-		vector<InitializeTLogRequest> reqs(recr.tLogs.size());
-
-		logSystem->tLogs[0]->tLogLocalities.resize(recr.tLogs.size());
-		logSystem->tLogs[0]->logServers.resize(
-		    recr.tLogs.size()); // Dummy interfaces, so that logSystem->getPushLocations() below uses the correct size
-		logSystem->tLogs[0]->updateLocalitySet(localities);
-
-		std::vector<int> locations;
-		for (Tag tag : localTags) {
-			locations.clear();
-			logSystem->tLogs[0]->getPushLocations(VectorRef<Tag>(&tag, 1), locations, 0);
-			for (int loc : locations)
-				reqs[loc].recoverTags.push_back(tag);
-		}
-		for (int i = 0; i < oldLogSystem->logRouterTags; i++) {
-			Tag tag = Tag(tagLocalityLogRouter, i);
-			reqs[logSystem->tLogs[0]->bestLocationFor(tag)].recoverTags.push_back(tag);
-		}
-		bool nonShardedTxs = logSystem->getTLogVersion() < TLogVersion::V4;
-		if (oldLogSystem->tLogs.size()) {
-			for (int i = needsOldTxs ? -1 : 0; i < maxTxsTags; i++) {
-				Tag tag = i == -1 ? txsTag : Tag(tagLocalityTxs, i);
-				Tag pushTag = (i == -1 || nonShardedTxs) ? txsTag : Tag(tagLocalityTxs, i % logSystem->txsTags);
-				locations.clear();
-				logSystem->tLogs[0]->getPushLocations(VectorRef<Tag>(&pushTag, 1), locations, 0);
-				for (int loc : locations)
-					reqs[loc].recoverTags.push_back(tag);
-			}
-			if (nonShardedTxs) {
-				localTags.push_back(txsTag);
-			} else {
-				for (int i = 0; i < logSystem->txsTags; i++) {
-					localTags.push_back(Tag(tagLocalityTxs, i));
-				}
-			}
-		}
-
-		for (int i = 0; i < recr.tLogs.size(); i++) {
-			InitializeTLogRequest& req = reqs[i];
-			req.recruitmentID = logSystem->recruitmentID;
-			req.logVersion = configuration.tLogVersion;
-			req.storeType = configuration.tLogDataStoreType;
-			req.spillType = configuration.tLogSpillType;
-			req.recoverFrom = oldLogSystemConfig;
-			req.recoverAt = oldLogSystem->recoverAt.get();
-			req.knownCommittedVersion = oldLogSystem->knownCommittedVersion;
-			req.epoch = recoveryCount;
-			req.locality = primaryLocality;
-			req.remoteTag = Tag(tagLocalityRemoteLog, i);
-			req.isPrimary = true;
-			req.allTags = localTags;
-			req.startVersion = logSystem->tLogs[0]->startVersion;
-			req.logRouterTags = logSystem->logRouterTags;
-			req.txsTags = logSystem->txsTags;
-		}
-
-		initializationReplies.reserve(recr.tLogs.size());
-		for (int i = 0; i < recr.tLogs.size(); i++)
-			initializationReplies.push_back(transformErrors(
-			    throwErrorOr(recr.tLogs[i].tLog.getReplyUnlessFailedFor(
-			        reqs[i], SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
-			    master_recovery_failed()));
-
-		state std::vector<Future<Void>> recoveryComplete;
-
-		if (region.satelliteTLogReplicationFactor > 0 && configuration.usableRegions > 1) {
-			state vector<Future<TLogInterface>> satelliteInitializationReplies;
-			vector<InitializeTLogRequest> sreqs(recr.satelliteTLogs.size());
-			std::vector<Tag> satelliteTags;
-
-			if (logSystem->logRouterTags) {
-				for (int i = 0; i < oldLogSystem->logRouterTags; i++) {
-					Tag tag = Tag(tagLocalityLogRouter, i);
-					// Satellite logs will index a mutation with tagLocalityLogRouter with an id greater than
-					// the number of log routers as having an id mod the number of log routers.  We thus need
-					// to make sure that if we're going from more log routers in the previous generation to
-					// less log routers in the newer one, that we map the log router tags onto satellites that
-					// are the preferred location for id%logRouterTags.
-					Tag pushLocation = Tag(tagLocalityLogRouter, i % logSystem->logRouterTags);
-					locations.clear();
-					logSystem->tLogs[1]->getPushLocations(VectorRef<Tag>(&pushLocation, 1), locations, 0);
-					for (int loc : locations)
-						sreqs[loc].recoverTags.push_back(tag);
-				}
-			}
-			if (oldLogSystem->tLogs.size()) {
-				for (int i = needsOldTxs ? -1 : 0; i < maxTxsTags; i++) {
-					Tag tag = i == -1 ? txsTag : Tag(tagLocalityTxs, i);
-					Tag pushTag = (i == -1 || nonShardedTxs) ? txsTag : Tag(tagLocalityTxs, i % logSystem->txsTags);
-					locations.clear();
-					logSystem->tLogs[1]->getPushLocations(VectorRef<Tag>(&pushTag, 1), locations, 0);
-					for (int loc : locations)
-						sreqs[loc].recoverTags.push_back(tag);
-				}
-				if (nonShardedTxs) {
-					satelliteTags.push_back(txsTag);
-				} else {
-					for (int i = 0; i < logSystem->txsTags; i++) {
-						satelliteTags.push_back(Tag(tagLocalityTxs, i));
-					}
-				}
-			}
-
-			for (int i = 0; i < recr.satelliteTLogs.size(); i++) {
-				InitializeTLogRequest& req = sreqs[i];
-				req.recruitmentID = logSystem->recruitmentID;
-				req.logVersion = configuration.tLogVersion;
-				req.storeType = configuration.tLogDataStoreType;
-				req.spillType = configuration.tLogSpillType;
-				req.recoverFrom = oldLogSystemConfig;
-				req.recoverAt = oldLogSystem->recoverAt.get();
-				req.knownCommittedVersion = oldLogSystem->knownCommittedVersion;
-				req.epoch = recoveryCount;
-				req.locality = tagLocalitySatellite;
-				req.remoteTag = Tag();
-				req.isPrimary = true;
-				req.allTags = satelliteTags;
-				req.startVersion = oldLogSystem->knownCommittedVersion + 1;
-				req.logRouterTags = logSystem->logRouterTags;
-				req.txsTags = logSystem->txsTags;
-			}
-
-			satelliteInitializationReplies.reserve(recr.satelliteTLogs.size());
-			for (int i = 0; i < recr.satelliteTLogs.size(); i++)
-				satelliteInitializationReplies.push_back(transformErrors(
-				    throwErrorOr(recr.satelliteTLogs[i].tLog.getReplyUnlessFailedFor(
-				        sreqs[i], SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
-				    master_recovery_failed()));
-
-			wait(waitForAll(satelliteInitializationReplies) || oldRouterRecruitment);
-
-			for (int i = 0; i < satelliteInitializationReplies.size(); i++) {
-				logSystem->tLogs[1]->logServers[i] = makeReference<AsyncVar<OptionalInterface<TLogInterface>>>(
-				    OptionalInterface<TLogInterface>(satelliteInitializationReplies[i].get()));
-			}
-
-			for (int i = 0; i < logSystem->tLogs[1]->logServers.size(); i++)
-				recoveryComplete.push_back(transformErrors(
-				    throwErrorOr(
-				        logSystem->tLogs[1]->logServers[i]->get().interf().recoveryFinished.getReplyUnlessFailedFor(
-				            TLogRecoveryFinishedRequest(),
-				            SERVER_KNOBS->TLOG_TIMEOUT,
-				            SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
-				    master_recovery_failed()));
-		}
-
-		wait(waitForAll(initializationReplies) || oldRouterRecruitment);
-
-		for (int i = 0; i < initializationReplies.size(); i++) {
-			logSystem->tLogs[0]->logServers[i] = makeReference<AsyncVar<OptionalInterface<TLogInterface>>>(
-			    OptionalInterface<TLogInterface>(initializationReplies[i].get()));
-			logSystem->tLogs[0]->tLogLocalities[i] = recr.tLogs[i].locality;
-		}
-		filterLocalityDataForPolicy(logSystem->tLogs[0]->tLogPolicy, &logSystem->tLogs[0]->tLogLocalities);
-
-		// Don't force failure of recovery if it took us a long time to recover. This avoids multiple long running
-		// recoveries causing tests to timeout
-		if (BUGGIFY && now() - startTime < 300 && g_network->isSimulated() && g_simulator.speedUpSimulation)
-			throw master_recovery_failed();
-
-		for (int i = 0; i < logSystem->tLogs[0]->logServers.size(); i++)
-			recoveryComplete.push_back(transformErrors(
-			    throwErrorOr(
-			        logSystem->tLogs[0]->logServers[i]->get().interf().recoveryFinished.getReplyUnlessFailedFor(
-			            TLogRecoveryFinishedRequest(),
-			            SERVER_KNOBS->TLOG_TIMEOUT,
-			            SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
-			    master_recovery_failed()));
-		logSystem->recoveryComplete = waitForAll(recoveryComplete);
-
-		if (configuration.usableRegions > 1) {
-			logSystem->hasRemoteServers = true;
-			logSystem->remoteRecovery = TagPartitionedLogSystem::newRemoteEpoch(logSystem.getPtr(),
-			                                                                    oldLogSystem,
-			                                                                    fRemoteWorkers,
-			                                                                    configuration,
-			                                                                    recoveryCount,
-			                                                                    remoteLocality,
-			                                                                    allTags);
-			if (oldLogSystem->tLogs.size() > 0 && oldLogSystem->tLogs[0]->locality == tagLocalitySpecial) {
-				// The wait is required so that we know both primary logs and remote logs have copied the data between
-				// the known committed version and the recovery version.
-				// FIXME: we can remove this wait once we are able to have log routers which can ship data to the remote
-				// logs without using log router tags.
-				wait(logSystem->remoteRecovery);
-			}
-		} else {
-			logSystem->hasRemoteServers = false;
-			logSystem->remoteRecovery = logSystem->recoveryComplete;
-			logSystem->remoteRecoveryComplete = logSystem->recoveryComplete;
-		}
-
-		return logSystem;
-	}
+	                                                    Reference<AsyncVar<bool>> recruitmentStalled);
 
 	ACTOR static Future<Void> trackRejoins(
 	    UID dbgid,
 	    std::vector<std::pair<Reference<AsyncVar<OptionalInterface<TLogInterface>>>, Reference<IReplicationPolicy>>>
 	        logServers,
-	    FutureStream<struct TLogRejoinRequest> rejoinRequests) {
-		state std::map<UID, ReplyPromise<TLogRejoinReply>> lastReply;
-		state std::set<UID> logsWaiting;
-		state double startTime = now();
-		state Future<Void> warnTimeout = delay(SERVER_KNOBS->TLOG_SLOW_REJOIN_WARN_TIMEOUT_SECS);
+	    FutureStream<struct TLogRejoinRequest> rejoinRequests);
 
-		for (const auto& log : logServers) {
-			logsWaiting.insert(log.first->get().id());
-		}
-
-		try {
-			loop choose {
-				when(TLogRejoinRequest req = waitNext(rejoinRequests)) {
-					int pos = -1;
-					for (int i = 0; i < logServers.size(); i++) {
-						if (logServers[i].first->get().id() == req.myInterface.id()) {
-							pos = i;
-							logsWaiting.erase(logServers[i].first->get().id());
-							break;
-						}
-					}
-					if (pos != -1) {
-						TraceEvent("TLogJoinedMe", dbgid)
-						    .detail("TLog", req.myInterface.id())
-						    .detail("Address", req.myInterface.commit.getEndpoint().getPrimaryAddress().toString());
-						if (!logServers[pos].first->get().present() ||
-						    req.myInterface.commit.getEndpoint() !=
-						        logServers[pos].first->get().interf().commit.getEndpoint()) {
-							TLogInterface interf = req.myInterface;
-							filterLocalityDataForPolicyDcAndProcess(logServers[pos].second, &interf.filteredLocality);
-							logServers[pos].first->setUnconditional(OptionalInterface<TLogInterface>(interf));
-						}
-						lastReply[req.myInterface.id()].send(TLogRejoinReply{ false });
-						lastReply[req.myInterface.id()] = req.reply;
-					} else {
-						TraceEvent("TLogJoinedMeUnknown", dbgid)
-						    .detail("TLog", req.myInterface.id())
-						    .detail("Address", req.myInterface.commit.getEndpoint().getPrimaryAddress().toString());
-						req.reply.send(true);
-					}
-				}
-				when(wait(warnTimeout)) {
-					for (const auto& logId : logsWaiting) {
-						TraceEvent(SevWarnAlways, "TLogRejoinSlow", dbgid)
-						    .detail("Elapsed", startTime - now())
-						    .detail("LogId", logId);
-					}
-					warnTimeout = Never();
-				}
-			}
-		} catch (...) {
-			for (auto it = lastReply.begin(); it != lastReply.end(); ++it)
-				it->second.send(TLogRejoinReply{ true });
-			throw;
-		}
-	}
-
-	ACTOR static Future<TLogLockResult> lockTLog(UID myID, Reference<AsyncVar<OptionalInterface<TLogInterface>>> tlog) {
-		TraceEvent("TLogLockStarted", myID).detail("TLog", tlog->get().id());
-		loop {
-			choose {
-				when(TLogLockResult data =
-				         wait(tlog->get().present()
-				                  ? brokenPromiseToNever(tlog->get().interf().lock.getReply<TLogLockResult>())
-				                  : Never())) {
-					TraceEvent("TLogLocked", myID).detail("TLog", tlog->get().id()).detail("End", data.end);
-					return data;
-				}
-				when(wait(tlog->onChange())) {}
-			}
-		}
-	}
+	ACTOR static Future<TLogLockResult> lockTLog(UID myID, Reference<AsyncVar<OptionalInterface<TLogInterface>>> tlog);
 
 	// FIXME: disabled during merge, update and use in epochEnd()
 	/*
-	static void lockMinimalTLogSet(const UID& dbgid, const DBCoreState& prevState,
-	                               const std::vector<Reference<AsyncVar<OptionalInterface<TLogInterface>>>>& logServers,
-	                               const std::vector<Reference<AsyncVar<bool>>>& logFailed,
-	                               vector<Future<TLogLockResult>>* tLogReply ) {
-	    // Invariant: tLogReply[i] must correspond to the tlog stored as logServers[i].
-	    ASSERT(tLogReply->size() == prevState.tLogLocalities.size());
-	    ASSERT(logFailed.size() == tLogReply->size());
+	    static void lockMinimalTLogSet(const UID& dbgid, const DBCoreState& prevState,
+	                                   const std::vector<Reference<AsyncVar<OptionalInterface<TLogInterface>>>>&
+	   logServers, const std::vector<Reference<AsyncVar<bool>>>& logFailed, vector<Future<TLogLockResult>>* tLogReply )
+	   {
+	        // Invariant: tLogReply[i] must correspond to the tlog stored as logServers[i].
+	        ASSERT(tLogReply->size() == prevState.tLogLocalities.size());
+	        ASSERT(logFailed.size() == tLogReply->size());
 
-	    // For any given index, only one of the following will be true.
-	    auto locking_completed = [&logFailed, tLogReply](int index) {
-	        const auto& entry = tLogReply->at(index);
-	        return !logFailed[index]->get() && entry.isValid() && entry.isReady() && !entry.isError();
-	    };
-	    auto locking_failed = [&logFailed, tLogReply](int index) {
-	        const auto& entry = tLogReply->at(index);
-	        return logFailed[index]->get() || (entry.isValid() && entry.isReady() && entry.isError());
-	    };
-	    auto locking_pending = [&logFailed, tLogReply](int index) {
-	        const auto& entry = tLogReply->at(index);
-	        return !logFailed[index]->get() && (entry.isValid() && !entry.isReady());
-	    };
-	    auto locking_skipped = [&logFailed, tLogReply](int index) {
-	        const auto& entry = tLogReply->at(index);
-	        return !logFailed[index]->get() && !entry.isValid();
-	    };
+	        // For any given index, only one of the following will be true.
+	        auto locking_completed = [&logFailed, tLogReply](int index) {
+	            const auto& entry = tLogReply->at(index);
+	            return !logFailed[index]->get() && entry.isValid() && entry.isReady() && !entry.isError();
+	        };
+	        auto locking_failed = [&logFailed, tLogReply](int index) {
+	            const auto& entry = tLogReply->at(index);
+	            return logFailed[index]->get() || (entry.isValid() && entry.isReady() && entry.isError());
+	        };
+	        auto locking_pending = [&logFailed, tLogReply](int index) {
+	            const auto& entry = tLogReply->at(index);
+	            return !logFailed[index]->get() && (entry.isValid() && !entry.isReady());
+	        };
+	        auto locking_skipped = [&logFailed, tLogReply](int index) {
+	            const auto& entry = tLogReply->at(index);
+	            return !logFailed[index]->get() && !entry.isValid();
+	        };
 
-	    auto can_obtain_quorum = [&prevState](std::function<bool(int)> filter) {
-	        LocalityGroup filter_true;
-	        std::vector<LocalityData> filter_false, unused;
-	        for (int i = 0; i < prevState.tLogLocalities.size() ; i++) {
-	            if (filter(i)) {
-	                filter_true.add(prevState.tLogLocalities[i]);
-	            } else {
-	                filter_false.push_back(prevState.tLogLocalities[i]);
+	        auto can_obtain_quorum = [&prevState](std::function<bool(int)> filter) {
+	            LocalityGroup filter_true;
+	            std::vector<LocalityData> filter_false, unused;
+	            for (int i = 0; i < prevState.tLogLocalities.size() ; i++) {
+	                if (filter(i)) {
+	                    filter_true.add(prevState.tLogLocalities[i]);
+	                } else {
+	                    filter_false.push_back(prevState.tLogLocalities[i]);
+	                }
+	            }
+	            bool valid = filter_true.validate(prevState.tLogPolicy);
+	            if (!valid && prevState.tLogWriteAntiQuorum > 0 ) {
+	                valid = !validateAllCombinations(unused, filter_true, prevState.tLogPolicy, filter_false,
+	    prevState.tLogWriteAntiQuorum, false);
+	            }
+	            return valid;
+	        };
+
+	        // Step 1: Verify that if all the failed TLogs come back, they can't form a quorum.
+	        if (can_obtain_quorum(locking_failed)) {
+	            TraceEvent(SevInfo, "MasterRecoveryTLogLockingImpossible", dbgid).log();
+	            return;
+	        }
+
+	        // Step 2: It's possible for us to succeed, but we need to lock additional logs.
+	        //
+	        // First, we need an accurate picture of what TLogs we're capable of locking. We can't tell the
+	        // difference between a temporarily failed TLog and a permanently failed TLog. Thus, we assume
+	        // all failures are permanent, and manually re-issue lock requests if they rejoin.
+	        for (int i = 0; i < logFailed.size(); i++) {
+	            const auto& r = tLogReply->at(i);
+	            TEST(locking_failed(i) && (r.isValid() && !r.isReady()));  // A TLog failed with a pending request.
+	            // The reboot_a_tlog BUGGIFY below should cause the above case to be hit.
+	            if (locking_failed(i)) {
+	                tLogReply->at(i) = Future<TLogLockResult>();
 	            }
 	        }
-	        bool valid = filter_true.validate(prevState.tLogPolicy);
-	        if (!valid && prevState.tLogWriteAntiQuorum > 0 ) {
-	            valid = !validateAllCombinations(unused, filter_true, prevState.tLogPolicy, filter_false,
-	prevState.tLogWriteAntiQuorum, false);
+
+	        // We're trying to paritition the set of old tlogs into two sets, L and R, such that:
+	        // (1). R does not validate the policy
+	        // (2). |R| is as large as possible
+	        // (3). L contains all the already-locked TLogs
+	        // and then we only issue lock requests to TLogs in L. This is safe, as R does not have quorum,
+	        // so no commits may occur.  It does not matter if L forms a quorum or not.
+	        //
+	        // We form these sets by starting with L as all machines and R as the empty set, and moving a
+	        // random machine from L to R until (1) or (2) no longer holds as true. Code-wise, L is
+	        // [0..end-can_omit), and R is [end-can_omit..end), and we move a random machine via randomizing
+	        // the order of the tlogs. Choosing a random machine was verified to generate a good-enough
+	        // result to be interesting intests sufficiently frequently that we don't need to try to
+	        // calculate the exact optimal solution.
+	        std::vector<std::pair<LocalityData, int>> tlogs;
+	        for (int i = 0; i < prevState.tLogLocalities.size(); i++) {
+	            tlogs.emplace_back(prevState.tLogLocalities[i], i);
 	        }
-	        return valid;
-	    };
+	        deterministicRandom()->randomShuffle(tlogs);
+	        // Rearrange the array such that things that the left is logs closer to being locked, and
+	        // the right is logs that can't be locked.  This makes us prefer locking already-locked TLogs,
+	        // which is how we respect the decisions made in the previous execution.
+	        auto idx_to_order = [&locking_completed, &locking_failed, &locking_pending, &locking_skipped](int index) {
+	            bool complete = locking_completed(index);
+	            bool pending = locking_pending(index);
+	            bool skipped = locking_skipped(index);
+	            bool failed = locking_failed(index);
 
-	    // Step 1: Verify that if all the failed TLogs come back, they can't form a quorum.
-	    if (can_obtain_quorum(locking_failed)) {
-	        TraceEvent(SevInfo, "MasterRecoveryTLogLockingImpossible", dbgid).log();
-	        return;
-	    }
+	            ASSERT( complete + pending + skipped + failed == 1 );
 
-	    // Step 2: It's possible for us to succeed, but we need to lock additional logs.
-	    //
-	    // First, we need an accurate picture of what TLogs we're capable of locking. We can't tell the
-	    // difference between a temporarily failed TLog and a permanently failed TLog. Thus, we assume
-	    // all failures are permanent, and manually re-issue lock requests if they rejoin.
-	    for (int i = 0; i < logFailed.size(); i++) {
-	        const auto& r = tLogReply->at(i);
-	        TEST(locking_failed(i) && (r.isValid() && !r.isReady()));  // A TLog failed with a pending request.
-	        // The reboot_a_tlog BUGGIFY below should cause the above case to be hit.
-	        if (locking_failed(i)) {
-	            tLogReply->at(i) = Future<TLogLockResult>();
+	            if (complete) return 0;
+	            if (pending) return 1;
+	            if (skipped) return 2;
+	            if (failed) return 3;
+
+	            ASSERT(false);  // Programmer error.
+	            return -1;
+	        };
+	        std::sort(tlogs.begin(), tlogs.end(),
+	            // TODO: Change long type to `auto` once toolchain supports C++17.
+	            [&idx_to_order](const std::pair<LocalityData, int>& lhs, const std::pair<LocalityData, int>& rhs) {
+	                return idx_to_order(lhs.second) < idx_to_order(rhs.second);
+	            });
+
+	        // Indexes that aren't in the vector are the ones we're considering omitting. Remove indexes until
+	        // the removed set forms a quorum.
+	        int can_omit = 0;
+	        std::vector<int> to_lock_indexes;
+	        for (auto it = tlogs.cbegin() ; it != tlogs.cend() - 1 ; it++ ) {
+	            to_lock_indexes.push_back(it->second);
 	        }
-	    }
-
-	    // We're trying to paritition the set of old tlogs into two sets, L and R, such that:
-	    // (1). R does not validate the policy
-	    // (2). |R| is as large as possible
-	    // (3). L contains all the already-locked TLogs
-	    // and then we only issue lock requests to TLogs in L. This is safe, as R does not have quorum,
-	    // so no commits may occur.  It does not matter if L forms a quorum or not.
-	    //
-	    // We form these sets by starting with L as all machines and R as the empty set, and moving a
-	    // random machine from L to R until (1) or (2) no longer holds as true. Code-wise, L is
-	    // [0..end-can_omit), and R is [end-can_omit..end), and we move a random machine via randomizing
-	    // the order of the tlogs. Choosing a random machine was verified to generate a good-enough
-	    // result to be interesting intests sufficiently frequently that we don't need to try to
-	    // calculate the exact optimal solution.
-	    std::vector<std::pair<LocalityData, int>> tlogs;
-	    for (int i = 0; i < prevState.tLogLocalities.size(); i++) {
-	        tlogs.emplace_back(prevState.tLogLocalities[i], i);
-	    }
-	    deterministicRandom()->randomShuffle(tlogs);
-	    // Rearrange the array such that things that the left is logs closer to being locked, and
-	    // the right is logs that can't be locked.  This makes us prefer locking already-locked TLogs,
-	    // which is how we respect the decisions made in the previous execution.
-	    auto idx_to_order = [&locking_completed, &locking_failed, &locking_pending, &locking_skipped](int index) {
-	        bool complete = locking_completed(index);
-	        bool pending = locking_pending(index);
-	        bool skipped = locking_skipped(index);
-	        bool failed = locking_failed(index);
-
-	        ASSERT( complete + pending + skipped + failed == 1 );
-
-	        if (complete) return 0;
-	        if (pending) return 1;
-	        if (skipped) return 2;
-	        if (failed) return 3;
-
-	        ASSERT(false);  // Programmer error.
-	        return -1;
-	    };
-	    std::sort(tlogs.begin(), tlogs.end(),
-	        // TODO: Change long type to `auto` once toolchain supports C++17.
-	        [&idx_to_order](const std::pair<LocalityData, int>& lhs, const std::pair<LocalityData, int>& rhs) {
-	            return idx_to_order(lhs.second) < idx_to_order(rhs.second);
-	        });
-
-	    // Indexes that aren't in the vector are the ones we're considering omitting. Remove indexes until
-	    // the removed set forms a quorum.
-	    int can_omit = 0;
-	    std::vector<int> to_lock_indexes;
-	    for (auto it = tlogs.cbegin() ; it != tlogs.cend() - 1 ; it++ ) {
-	        to_lock_indexes.push_back(it->second);
-	    }
-	    auto filter = [&to_lock_indexes](int index) {
-	        return std::find(to_lock_indexes.cbegin(), to_lock_indexes.cend(), index) == to_lock_indexes.cend();
-	    };
-	    while(true) {
-	        if (can_obtain_quorum(filter)) {
-	            break;
-	        } else {
-	            can_omit++;
-	            ASSERT(can_omit < tlogs.size());
-	            to_lock_indexes.pop_back();
+	        auto filter = [&to_lock_indexes](int index) {
+	            return std::find(to_lock_indexes.cbegin(), to_lock_indexes.cend(), index) == to_lock_indexes.cend();
+	        };
+	        while(true) {
+	            if (can_obtain_quorum(filter)) {
+	                break;
+	            } else {
+	                can_omit++;
+	                ASSERT(can_omit < tlogs.size());
+	                to_lock_indexes.pop_back();
+	            }
 	        }
-	    }
 
-	    if (prevState.tLogReplicationFactor - prevState.tLogWriteAntiQuorum == 1) {
-	        ASSERT(can_omit == 0);
-	    }
-	    // Our previous check of making sure there aren't too many failed logs should have prevented this.
-	    ASSERT(!locking_failed(tlogs[tlogs.size()-can_omit-1].second));
-
-	    // If we've managed to leave more tlogs unlocked than (RF-AQ), it means we've hit the case
-	    // where the policy engine has allowed us to have multiple logs in the same failure domain
-	    // with independant sets of data. This case will validated that no code is relying on the old
-	    // quorum=(RF-AQ) logic, and now goes through the policy engine instead.
-	    TEST(can_omit >= prevState.tLogReplicationFactor - prevState.tLogWriteAntiQuorum);  // Locking a subset of the
-	TLogs while ending an epoch. const bool reboot_a_tlog = g_network->now() - g_simulator.lastConnectionFailure >
-	g_simulator.connectionFailuresDisableDuration && BUGGIFY && deterministicRandom()->random01() < 0.25;
-	    TraceEvent(SevInfo, "MasterRecoveryTLogLocking", dbgid)
-	        detail("Locks", tlogs.size() - can_omit)
-	        detail("Skipped", can_omit)
-	        detail("Replication", prevState.tLogReplicationFactor)
-	        detail("Antiquorum", prevState.tLogWriteAntiQuorum)
-	        detail("RebootBuggify", reboot_a_tlog);
-	    for (int i = 0; i < tlogs.size() - can_omit; i++) {
-	        const int index = tlogs[i].second;
-	        Future<TLogLockResult>& entry = tLogReply->at(index);
-	        if (!entry.isValid()) {
-	            entry = lockTLog( dbgid, logServers[index] );
+	        if (prevState.tLogReplicationFactor - prevState.tLogWriteAntiQuorum == 1) {
+	            ASSERT(can_omit == 0);
 	        }
-	    }
-	    if (reboot_a_tlog) {
-	        g_simulator.lastConnectionFailure = g_network->now();
+	        // Our previous check of making sure there aren't too many failed logs should have prevented this.
+	        ASSERT(!locking_failed(tlogs[tlogs.size()-can_omit-1].second));
+
+	        // If we've managed to leave more tlogs unlocked than (RF-AQ), it means we've hit the case
+	        // where the policy engine has allowed us to have multiple logs in the same failure domain
+	        // with independant sets of data. This case will validated that no code is relying on the old
+	        // quorum=(RF-AQ) logic, and now goes through the policy engine instead.
+	        TEST(can_omit >= prevState.tLogReplicationFactor - prevState.tLogWriteAntiQuorum);  // Locking a subset of
+	   the TLogs while ending an epoch. const bool reboot_a_tlog = g_network->now() - g_simulator.lastConnectionFailure
+	   > g_simulator.connectionFailuresDisableDuration && BUGGIFY && deterministicRandom()->random01() < 0.25;
+	        TraceEvent(SevInfo, "MasterRecoveryTLogLocking", dbgid)
+	            detail("Locks", tlogs.size() - can_omit)
+	            detail("Skipped", can_omit)
+	            detail("Replication", prevState.tLogReplicationFactor)
+	            detail("Antiquorum", prevState.tLogWriteAntiQuorum)
+	            detail("RebootBuggify", reboot_a_tlog);
 	        for (int i = 0; i < tlogs.size() - can_omit; i++) {
 	            const int index = tlogs[i].second;
-	            if (logServers[index]->get().present()) {
-	                g_simulator.rebootProcess(
-	                    g_simulator.getProcessByAddress(
-	                        logServers[index]->get().interf().address()),
-	                    ISimulator::RebootProcess);
-	                break;
+	            Future<TLogLockResult>& entry = tLogReply->at(index);
+	            if (!entry.isValid()) {
+	                entry = lockTLog( dbgid, logServers[index] );
 	            }
 	        }
-	    }
-	    // Intentionally leave `tlogs.size() - can_omit` .. `tlogs.size()` as !isValid() Futures.
-	}*/
+	        if (reboot_a_tlog) {
+	            g_simulator.lastConnectionFailure = g_network->now();
+	            for (int i = 0; i < tlogs.size() - can_omit; i++) {
+	                const int index = tlogs[i].second;
+	                if (logServers[index]->get().present()) {
+	                    g_simulator.rebootProcess(
+	                        g_simulator.getProcessByAddress(
+	                            logServers[index]->get().interf().address()),
+	                        ISimulator::RebootProcess);
+	                    break;
+	                }
+	            }
+	        }
+	        // Intentionally leave `tlogs.size() - can_omit` .. `tlogs.size()` as !isValid() Futures.
+	    }*/
 
 	template <class T>
-	static vector<T> getReadyNonError(vector<Future<T>> const& futures) {
-		// Return the values of those futures which have (non-error) values ready
-		std::vector<T> result;
-		for (auto& f : futures)
-			if (f.isReady() && !f.isError())
-				result.push_back(f.get());
-		return result;
-	}
-
-	struct sort_by_end {
-		bool operator()(TLogLockResult const& a, TLogLockResult const& b) const { return a.end < b.end; }
-	};
+	static vector<T> getReadyNonError(vector<Future<T>> const& futures);
 };
 
 Future<Void> ILogSystem::recoverAndEndEpoch(Reference<AsyncVar<Reference<ILogSystem>>> const& outLogSystem,
@@ -3339,3 +660,2921 @@ Reference<ILogSystem> ILogSystem::fromServerDBInfo(UID const& dbgid,
                                                    Optional<PromiseStream<Future<Void>>> addActor) {
 	return fromLogSystemConfig(dbgid, dbInfo.myLocality, dbInfo.logSystemConfig, false, useRecoveredAt, addActor);
 }
+
+void TagPartitionedLogSystem::stopRejoins() {
+	rejoins = Future<Void>();
+}
+
+void TagPartitionedLogSystem::addref() {
+	ReferenceCounted<TagPartitionedLogSystem>::addref();
+}
+
+void TagPartitionedLogSystem::delref() {
+	ReferenceCounted<TagPartitionedLogSystem>::delref();
+}
+
+std::string TagPartitionedLogSystem::describe() const {
+	std::string result;
+	for (int i = 0; i < tLogs.size(); i++) {
+		result += format("%d: ", i);
+		for (int j = 0; j < tLogs[i]->logServers.size(); j++) {
+			result +=
+			    tLogs[i]->logServers[j]->get().id().toString() + ((j == tLogs[i]->logServers.size() - 1) ? " " : ", ");
+		}
+	}
+	return result;
+}
+
+UID TagPartitionedLogSystem::getDebugID() const {
+	return dbgid;
+}
+
+void TagPartitionedLogSystem::addPseudoLocality(int8_t locality) {
+	ASSERT(locality < 0);
+	pseudoLocalities.insert(locality);
+	for (uint16_t i = 0; i < logRouterTags; i++) {
+		pseudoLocalityPopVersion[Tag(locality, i)] = 0;
+	}
+}
+
+Tag TagPartitionedLogSystem::getPseudoPopTag(Tag tag, ProcessClass::ClassType type) const {
+	switch (type) {
+	case ProcessClass::LogRouterClass:
+		if (tag.locality == tagLocalityLogRouter) {
+			ASSERT(pseudoLocalities.count(tagLocalityLogRouterMapped) > 0);
+			tag.locality = tagLocalityLogRouterMapped;
+		}
+		break;
+
+	case ProcessClass::BackupClass:
+		if (tag.locality == tagLocalityLogRouter) {
+			ASSERT(pseudoLocalities.count(tagLocalityBackup) > 0);
+			tag.locality = tagLocalityBackup;
+		}
+		break;
+
+	default: // This should be an error at caller site.
+		break;
+	}
+	return tag;
+}
+
+bool TagPartitionedLogSystem::hasPseudoLocality(int8_t locality) const {
+	return pseudoLocalities.count(locality) > 0;
+}
+
+Version TagPartitionedLogSystem::popPseudoLocalityTag(Tag tag, Version upTo) {
+	ASSERT(isPseudoLocality(tag.locality) && hasPseudoLocality(tag.locality));
+
+	Version& localityVersion = pseudoLocalityPopVersion[tag];
+	localityVersion = std::max(localityVersion, upTo);
+	Version minVersion = localityVersion;
+	// Why do we need to use the minimum popped version among all tags? Reason: for example,
+	// 2 pseudo tags pop 100 or 150, respectively. It's only safe to pop min(100, 150),
+	// because [101,150) is needed by another pseudo tag.
+	for (const int8_t locality : pseudoLocalities) {
+		minVersion = std::min(minVersion, pseudoLocalityPopVersion[Tag(locality, tag.id)]);
+	}
+	// TraceEvent("TLogPopPseudoTag", dbgid).detail("Tag", tag.toString()).detail("Version", upTo).detail("PopVersion", minVersion);
+	return minVersion;
+}
+
+Future<Void> TagPartitionedLogSystem::recoverAndEndEpoch(Reference<AsyncVar<Reference<ILogSystem>>> const& outLogSystem,
+                                                         UID const& dbgid,
+                                                         DBCoreState const& oldState,
+                                                         FutureStream<TLogRejoinRequest> const& rejoins,
+                                                         LocalityData const& locality,
+                                                         bool* forceRecovery) {
+	return epochEnd(outLogSystem, dbgid, oldState, rejoins, locality, forceRecovery);
+}
+
+Reference<ILogSystem> TagPartitionedLogSystem::fromLogSystemConfig(UID const& dbgid,
+                                                                   LocalityData const& locality,
+                                                                   LogSystemConfig const& lsConf,
+                                                                   bool excludeRemote,
+                                                                   bool useRecoveredAt,
+                                                                   Optional<PromiseStream<Future<Void>>> addActor) {
+	ASSERT(lsConf.logSystemType == LogSystemType::tagPartitioned ||
+	       (lsConf.logSystemType == LogSystemType::empty && !lsConf.tLogs.size()));
+	// ASSERT(lsConf.epoch == epoch);  //< FIXME
+	auto logSystem = makeReference<TagPartitionedLogSystem>(dbgid, locality, lsConf.epoch, addActor);
+
+	logSystem->tLogs.reserve(lsConf.tLogs.size());
+	logSystem->expectedLogSets = lsConf.expectedLogSets;
+	logSystem->logRouterTags = lsConf.logRouterTags;
+	logSystem->txsTags = lsConf.txsTags;
+	logSystem->recruitmentID = lsConf.recruitmentID;
+	logSystem->stopped = lsConf.stopped;
+	if (useRecoveredAt) {
+		logSystem->recoveredAt = lsConf.recoveredAt;
+	}
+	logSystem->pseudoLocalities = lsConf.pseudoLocalities;
+	for (const TLogSet& tLogSet : lsConf.tLogs) {
+		if (!excludeRemote || tLogSet.isLocal) {
+			logSystem->tLogs.push_back(makeReference<LogSet>(tLogSet));
+		}
+	}
+
+	for (const auto& oldTlogConf : lsConf.oldTLogs) {
+		logSystem->oldLogData.emplace_back(oldTlogConf);
+		//TraceEvent("BWFromLSConf")
+		//    .detail("Epoch", logSystem->oldLogData.back().epoch)
+		//    .detail("Version", logSystem->oldLogData.back().epochEnd);
+	}
+
+	logSystem->logSystemType = lsConf.logSystemType;
+	logSystem->oldestBackupEpoch = lsConf.oldestBackupEpoch;
+	return logSystem;
+}
+
+Reference<ILogSystem> TagPartitionedLogSystem::fromOldLogSystemConfig(UID const& dbgid,
+                                                                      LocalityData const& locality,
+                                                                      LogSystemConfig const& lsConf) {
+	ASSERT(lsConf.logSystemType == LogSystemType::tagPartitioned ||
+	       (lsConf.logSystemType == LogSystemType::empty && !lsConf.tLogs.size()));
+	// ASSERT(lsConf.epoch == epoch);  //< FIXME
+	const LogEpoch e = lsConf.oldTLogs.size() > 0 ? lsConf.oldTLogs[0].epoch : 0;
+	auto logSystem = makeReference<TagPartitionedLogSystem>(dbgid, locality, e);
+
+	if (lsConf.oldTLogs.size()) {
+		for (const TLogSet& tLogSet : lsConf.oldTLogs[0].tLogs) {
+			logSystem->tLogs.push_back(makeReference<LogSet>(tLogSet));
+		}
+		logSystem->logRouterTags = lsConf.oldTLogs[0].logRouterTags;
+		logSystem->txsTags = lsConf.oldTLogs[0].txsTags;
+		// logSystem->epochEnd = lsConf.oldTLogs[0].epochEnd;
+
+		for (int i = 1; i < lsConf.oldTLogs.size(); i++) {
+			logSystem->oldLogData.emplace_back(lsConf.oldTLogs[i]);
+		}
+	}
+	logSystem->logSystemType = lsConf.logSystemType;
+	logSystem->stopped = true;
+	logSystem->pseudoLocalities = lsConf.pseudoLocalities;
+
+	return logSystem;
+}
+
+void TagPartitionedLogSystem::toCoreState(DBCoreState& newState) {
+	if (recoveryComplete.isValid() && recoveryComplete.isError())
+		throw recoveryComplete.getError();
+
+	if (remoteRecoveryComplete.isValid() && remoteRecoveryComplete.isError())
+		throw remoteRecoveryComplete.getError();
+
+	newState.tLogs.clear();
+	newState.logRouterTags = logRouterTags;
+	newState.txsTags = txsTags;
+	newState.pseudoLocalities = pseudoLocalities;
+	for (const auto& t : tLogs) {
+		if (t->logServers.size()) {
+			newState.tLogs.emplace_back(*t);
+			newState.tLogs.back().tLogLocalities.clear();
+			for (const auto& log : t->logServers) {
+				newState.tLogs.back().tLogLocalities.push_back(log->get().interf().filteredLocality);
+			}
+		}
+	}
+
+	newState.oldTLogData.clear();
+	if (!recoveryComplete.isValid() || !recoveryComplete.isReady() ||
+	    (repopulateRegionAntiQuorum == 0 && (!remoteRecoveryComplete.isValid() || !remoteRecoveryComplete.isReady())) ||
+	    epoch != oldestBackupEpoch) {
+		for (const auto& oldData : oldLogData) {
+			newState.oldTLogData.emplace_back(oldData);
+			TraceEvent("BWToCore")
+			    .detail("Epoch", newState.oldTLogData.back().epoch)
+			    .detail("TotalTags", newState.oldTLogData.back().logRouterTags)
+			    .detail("BeginVersion", newState.oldTLogData.back().epochBegin)
+			    .detail("EndVersion", newState.oldTLogData.back().epochEnd);
+		}
+	}
+
+	newState.logSystemType = logSystemType;
+}
+
+bool TagPartitionedLogSystem::remoteStorageRecovered() {
+	return remoteRecoveryComplete.isValid() && remoteRecoveryComplete.isReady();
+}
+
+Future<Void> TagPartitionedLogSystem::onCoreStateChanged() {
+	std::vector<Future<Void>> changes;
+	changes.push_back(Never());
+	if (recoveryComplete.isValid() && !recoveryComplete.isReady()) {
+		changes.push_back(recoveryComplete);
+	}
+	if (remoteRecovery.isValid() && !remoteRecovery.isReady()) {
+		changes.push_back(remoteRecovery);
+	}
+	if (remoteRecoveryComplete.isValid() && !remoteRecoveryComplete.isReady()) {
+		changes.push_back(remoteRecoveryComplete);
+	}
+	changes.push_back(backupWorkerChanged.onTrigger()); // changes to oldestBackupEpoch
+	return waitForAny(changes);
+}
+
+void TagPartitionedLogSystem::coreStateWritten(DBCoreState const& newState) {
+	if (!newState.oldTLogData.size()) {
+		recoveryCompleteWrittenToCoreState.set(true);
+	}
+	for (auto& t : newState.tLogs) {
+		if (!t.isLocal) {
+			TraceEvent("RemoteLogsWritten", dbgid).log();
+			remoteLogsWrittenToCoreState = true;
+			break;
+		}
+	}
+}
+
+Future<Void> TagPartitionedLogSystem::onError() {
+	return onError_internal(this);
+}
+
+ACTOR Future<Void> TagPartitionedLogSystem::onError_internal(TagPartitionedLogSystem* self) {
+	// Never returns normally, but throws an error if the subsystem stops working
+	loop {
+		std::vector<Future<Void>> failed;
+		std::vector<Future<Void>> backupFailed(1, Never());
+		std::vector<Future<Void>> changes;
+
+		for (auto& it : self->tLogs) {
+			for (auto& t : it->logServers) {
+				if (t->get().present()) {
+					failed.push_back(
+					    waitFailureClient(t->get().interf().waitFailure,
+					                      SERVER_KNOBS->TLOG_TIMEOUT,
+					                      -SERVER_KNOBS->TLOG_TIMEOUT / SERVER_KNOBS->SECONDS_BEFORE_NO_FAILURE_DELAY,
+					                      /*trace=*/true));
+				} else {
+					changes.push_back(t->onChange());
+				}
+			}
+			for (auto& t : it->logRouters) {
+				if (t->get().present()) {
+					failed.push_back(
+					    waitFailureClient(t->get().interf().waitFailure,
+					                      SERVER_KNOBS->TLOG_TIMEOUT,
+					                      -SERVER_KNOBS->TLOG_TIMEOUT / SERVER_KNOBS->SECONDS_BEFORE_NO_FAILURE_DELAY,
+					                      /*trace=*/true));
+				} else {
+					changes.push_back(t->onChange());
+				}
+			}
+			for (const auto& worker : it->backupWorkers) {
+				if (worker->get().present()) {
+					backupFailed.push_back(
+					    waitFailureClient(worker->get().interf().waitFailure,
+					                      SERVER_KNOBS->BACKUP_TIMEOUT,
+					                      -SERVER_KNOBS->BACKUP_TIMEOUT / SERVER_KNOBS->SECONDS_BEFORE_NO_FAILURE_DELAY,
+					                      /*trace=*/true));
+				} else {
+					changes.push_back(worker->onChange());
+				}
+			}
+		}
+
+		if (!self->recoveryCompleteWrittenToCoreState.get()) {
+			for (auto& old : self->oldLogData) {
+				for (auto& it : old.tLogs) {
+					for (auto& t : it->logRouters) {
+						if (t->get().present()) {
+							failed.push_back(waitFailureClient(t->get().interf().waitFailure,
+							                                   SERVER_KNOBS->TLOG_TIMEOUT,
+							                                   -SERVER_KNOBS->TLOG_TIMEOUT /
+							                                       SERVER_KNOBS->SECONDS_BEFORE_NO_FAILURE_DELAY,
+							                                   /*trace=*/true));
+						} else {
+							changes.push_back(t->onChange());
+						}
+					}
+				}
+				// Monitor changes of backup workers for old epochs.
+				for (const auto& worker : old.tLogs[0]->backupWorkers) {
+					if (worker->get().present()) {
+						backupFailed.push_back(waitFailureClient(worker->get().interf().waitFailure,
+						                                         SERVER_KNOBS->BACKUP_TIMEOUT,
+						                                         -SERVER_KNOBS->BACKUP_TIMEOUT /
+						                                             SERVER_KNOBS->SECONDS_BEFORE_NO_FAILURE_DELAY,
+						                                         /*trace=*/true));
+					} else {
+						changes.push_back(worker->onChange());
+					}
+				}
+			}
+		}
+
+		if (self->hasRemoteServers && (!self->remoteRecovery.isReady() || self->remoteRecovery.isError())) {
+			changes.push_back(self->remoteRecovery);
+		}
+
+		changes.push_back(self->recoveryCompleteWrittenToCoreState.onChange());
+		changes.push_back(self->backupWorkerChanged.onTrigger());
+
+		ASSERT(failed.size() >= 1);
+		wait(quorum(changes, 1) || tagError<Void>(quorum(failed, 1), master_tlog_failed()) ||
+		     tagError<Void>(quorum(backupFailed, 1), master_backup_worker_failed()));
+	}
+}
+
+ACTOR Future<Void> TagPartitionedLogSystem::pushResetChecker(Reference<ConnectionResetInfo> self, NetworkAddress addr) {
+	self->slowReplies = 0;
+	self->fastReplies = 0;
+	wait(delay(SERVER_KNOBS->PUSH_STATS_INTERVAL));
+	TraceEvent("SlowPushStats")
+	    .detail("PeerAddress", addr)
+	    .detail("SlowReplies", self->slowReplies)
+	    .detail("FastReplies", self->fastReplies);
+	if (self->slowReplies >= SERVER_KNOBS->PUSH_STATS_SLOW_AMOUNT &&
+	    self->slowReplies / double(self->slowReplies + self->fastReplies) >= SERVER_KNOBS->PUSH_STATS_SLOW_RATIO) {
+		FlowTransport::transport().resetConnection(addr);
+		self->lastReset = now();
+	}
+	return Void();
+}
+
+ACTOR Future<TLogCommitReply> TagPartitionedLogSystem::recordPushMetrics(Reference<ConnectionResetInfo> self,
+                                                                         Reference<Histogram> dist,
+                                                                         NetworkAddress addr,
+                                                                         Future<TLogCommitReply> in) {
+	state double startTime = now();
+	TLogCommitReply t = wait(in);
+	if (now() - self->lastReset > SERVER_KNOBS->PUSH_RESET_INTERVAL) {
+		if (now() - startTime > SERVER_KNOBS->PUSH_MAX_LATENCY) {
+			if (self->resetCheck.isReady()) {
+				self->resetCheck = TagPartitionedLogSystem::pushResetChecker(self, addr);
+			}
+			self->slowReplies++;
+		} else {
+			self->fastReplies++;
+		}
+	}
+	dist->sampleSeconds(now() - startTime);
+	return t;
+}
+
+Future<Version> TagPartitionedLogSystem::push(Version prevVersion,
+                                              Version version,
+                                              Version knownCommittedVersion,
+                                              Version minKnownCommittedVersion,
+                                              LogPushData& data,
+                                              SpanID const& spanContext,
+                                              Optional<UID> debugID) {
+	// FIXME: Randomize request order as in LegacyLogSystem?
+	vector<Future<Void>> quorumResults;
+	vector<Future<TLogCommitReply>> allReplies;
+	int location = 0;
+	Span span("TPLS:push"_loc, spanContext);
+	for (auto& it : tLogs) {
+		if (it->isLocal && it->logServers.size()) {
+			if (it->connectionResetTrackers.size() == 0) {
+				for (int i = 0; i < it->logServers.size(); i++) {
+					it->connectionResetTrackers.push_back(makeReference<ConnectionResetInfo>());
+				}
+			}
+			if (it->tlogPushDistTrackers.empty()) {
+				for (int i = 0; i < it->logServers.size(); i++) {
+					it->tlogPushDistTrackers.push_back(
+					    Histogram::getHistogram("ToTlog_" + it->logServers[i]->get().interf().uniqueID.toString(),
+					                            it->logServers[i]->get().interf().address().toString(),
+					                            Histogram::Unit::microseconds));
+				}
+			}
+			vector<Future<Void>> tLogCommitResults;
+			for (int loc = 0; loc < it->logServers.size(); loc++) {
+				Standalone<StringRef> msg = data.getMessages(location);
+				data.recordEmptyMessage(location, msg);
+				allReplies.push_back(recordPushMetrics(
+				    it->connectionResetTrackers[loc],
+				    it->tlogPushDistTrackers[loc],
+				    it->logServers[loc]->get().interf().address(),
+				    it->logServers[loc]->get().interf().commit.getReply(TLogCommitRequest(spanContext,
+				                                                                          msg.arena(),
+				                                                                          prevVersion,
+				                                                                          version,
+				                                                                          knownCommittedVersion,
+				                                                                          minKnownCommittedVersion,
+				                                                                          msg,
+				                                                                          debugID),
+				                                                        TaskPriority::ProxyTLogCommitReply)));
+				Future<Void> commitSuccess = success(allReplies.back());
+				addActor.get().send(commitSuccess);
+				tLogCommitResults.push_back(commitSuccess);
+				location++;
+			}
+			quorumResults.push_back(quorum(tLogCommitResults, tLogCommitResults.size() - it->tLogWriteAntiQuorum));
+		}
+	}
+
+	return minVersionWhenReady(waitForAll(quorumResults), allReplies);
+}
+
+Reference<ILogSystem::IPeekCursor> TagPartitionedLogSystem::peekAll(UID dbgid,
+                                                                    Version begin,
+                                                                    Version end,
+                                                                    Tag tag,
+                                                                    bool parallelGetMore) {
+	int bestSet = 0;
+	std::vector<Reference<LogSet>> localSets;
+	Version lastBegin = 0;
+	bool foundSpecial = false;
+	for (auto& log : tLogs) {
+		if (log->locality == tagLocalitySpecial || log->locality == tagLocalityUpgraded) {
+			foundSpecial = true;
+		}
+		if (log->isLocal && log->logServers.size() &&
+		    (log->locality == tagLocalitySpecial || log->locality == tagLocalityUpgraded ||
+		     log->locality == tag.locality || tag == txsTag || tag.locality == tagLocalityTxs ||
+		     tag.locality == tagLocalityLogRouter ||
+		     ((tag.locality == tagLocalityUpgraded || tag == cacheTag) && log->locality != tagLocalitySatellite))) {
+			lastBegin = std::max(lastBegin, log->startVersion);
+			localSets.push_back(log);
+			if (log->locality != tagLocalitySatellite) {
+				bestSet = localSets.size() - 1;
+			}
+		}
+	}
+
+	if (!localSets.size()) {
+		lastBegin = end;
+	}
+
+	if (begin >= lastBegin && localSets.size()) {
+		TraceEvent("TLogPeekAllCurrentOnly", dbgid)
+		    .detail("Tag", tag.toString())
+		    .detail("Begin", begin)
+		    .detail("End", end)
+		    .detail("BestLogs", localSets[bestSet]->logServerString());
+		return makeReference<ILogSystem::SetPeekCursor>(
+		    localSets, bestSet, localSets[bestSet]->bestLocationFor(tag), tag, begin, end, parallelGetMore);
+	} else {
+		std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
+		std::vector<LogMessageVersion> epochEnds;
+
+		if (lastBegin < end && localSets.size()) {
+			TraceEvent("TLogPeekAllAddingCurrent", dbgid)
+			    .detail("Tag", tag.toString())
+			    .detail("Begin", begin)
+			    .detail("End", end)
+			    .detail("BestLogs", localSets[bestSet]->logServerString());
+			cursors.push_back(makeReference<ILogSystem::SetPeekCursor>(
+			    localSets, bestSet, localSets[bestSet]->bestLocationFor(tag), tag, lastBegin, end, parallelGetMore));
+		}
+		for (int i = 0; begin < lastBegin; i++) {
+			if (i == oldLogData.size()) {
+				if (tag == txsTag || tag.locality == tagLocalityTxs || tag == cacheTag) {
+					break;
+				}
+				TraceEvent("TLogPeekAllDead", dbgid)
+				    .detail("Tag", tag.toString())
+				    .detail("Begin", begin)
+				    .detail("End", end)
+				    .detail("LastBegin", lastBegin)
+				    .detail("OldLogDataSize", oldLogData.size());
+				return makeReference<ILogSystem::ServerPeekCursor>(
+				    Reference<AsyncVar<OptionalInterface<TLogInterface>>>(), tag, begin, getPeekEnd(), false, false);
+			}
+
+			int bestOldSet = 0;
+			std::vector<Reference<LogSet>> localOldSets;
+			Version thisBegin = begin;
+			bool thisSpecial = false;
+			for (auto& log : oldLogData[i].tLogs) {
+				if (log->locality == tagLocalitySpecial || log->locality == tagLocalityUpgraded) {
+					thisSpecial = true;
+				}
+				if (log->isLocal && log->logServers.size() &&
+				    (log->locality == tagLocalitySpecial || log->locality == tagLocalityUpgraded ||
+				     log->locality == tag.locality || tag == txsTag || tag.locality == tagLocalityTxs ||
+				     tag.locality == tagLocalityLogRouter ||
+				     ((tag.locality == tagLocalityUpgraded || tag == cacheTag) &&
+				      log->locality != tagLocalitySatellite))) {
+					thisBegin = std::max(thisBegin, log->startVersion);
+					localOldSets.push_back(log);
+					if (log->locality != tagLocalitySatellite) {
+						bestOldSet = localOldSets.size() - 1;
+					}
+				}
+			}
+
+			if (!localOldSets.size()) {
+				TraceEvent("TLogPeekAllNoLocalSets", dbgid)
+				    .detail("Tag", tag.toString())
+				    .detail("Begin", begin)
+				    .detail("End", end)
+				    .detail("LastBegin", lastBegin);
+				if (!cursors.size() && !foundSpecial) {
+					continue;
+				}
+				return makeReference<ILogSystem::ServerPeekCursor>(
+				    Reference<AsyncVar<OptionalInterface<TLogInterface>>>(), tag, begin, getPeekEnd(), false, false);
+			}
+			if (thisSpecial) {
+				foundSpecial = true;
+			}
+
+			if (thisBegin < lastBegin) {
+				if (thisBegin < end) {
+					TraceEvent("TLogPeekAllAddingOld", dbgid)
+					    .detail("Tag", tag.toString())
+					    .detail("Begin", begin)
+					    .detail("End", end)
+					    .detail("BestLogs", localOldSets[bestOldSet]->logServerString())
+					    .detail("LastBegin", lastBegin)
+					    .detail("ThisBegin", thisBegin);
+					cursors.push_back(
+					    makeReference<ILogSystem::SetPeekCursor>(localOldSets,
+					                                             bestOldSet,
+					                                             localOldSets[bestOldSet]->bestLocationFor(tag),
+					                                             tag,
+					                                             thisBegin,
+					                                             std::min(lastBegin, end),
+					                                             parallelGetMore));
+					epochEnds.push_back(LogMessageVersion(std::min(lastBegin, end)));
+				}
+				lastBegin = thisBegin;
+			}
+		}
+
+		return makeReference<ILogSystem::MultiCursor>(cursors, epochEnds);
+	}
+}
+
+Reference<ILogSystem::IPeekCursor> TagPartitionedLogSystem::peekRemote(UID dbgid,
+                                                                       Version begin,
+                                                                       Optional<Version> end,
+                                                                       Tag tag,
+                                                                       bool parallelGetMore) {
+	int bestSet = -1;
+	Version lastBegin = recoveredAt.present() ? recoveredAt.get() + 1 : 0;
+	for (int t = 0; t < tLogs.size(); t++) {
+		if (tLogs[t]->isLocal) {
+			lastBegin = std::max(lastBegin, tLogs[t]->startVersion);
+		}
+
+		if (tLogs[t]->logRouters.size()) {
+			ASSERT(bestSet == -1);
+			bestSet = t;
+		}
+	}
+	if (bestSet == -1) {
+		TraceEvent("TLogPeekRemoteNoBestSet", dbgid)
+		    .detail("Tag", tag.toString())
+		    .detail("Begin", begin)
+		    .detail("End", end.present() ? end.get() : getPeekEnd());
+		return makeReference<ILogSystem::ServerPeekCursor>(
+		    Reference<AsyncVar<OptionalInterface<TLogInterface>>>(), tag, begin, getPeekEnd(), false, parallelGetMore);
+	}
+	if (begin >= lastBegin) {
+		TraceEvent("TLogPeekRemoteBestOnly", dbgid)
+		    .detail("Tag", tag.toString())
+		    .detail("Begin", begin)
+		    .detail("End", end.present() ? end.get() : getPeekEnd())
+		    .detail("BestSet", bestSet)
+		    .detail("BestSetStart", lastBegin)
+		    .detail("LogRouterIds", tLogs[bestSet]->logRouterString());
+		return makeReference<ILogSystem::BufferedCursor>(
+		    tLogs[bestSet]->logRouters, tag, begin, end.present() ? end.get() + 1 : getPeekEnd(), parallelGetMore);
+	} else {
+		std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
+		std::vector<LogMessageVersion> epochEnds;
+		TraceEvent("TLogPeekRemoteAddingBest", dbgid)
+		    .detail("Tag", tag.toString())
+		    .detail("Begin", begin)
+		    .detail("End", end.present() ? end.get() : getPeekEnd())
+		    .detail("BestSet", bestSet)
+		    .detail("BestSetStart", lastBegin)
+		    .detail("LogRouterIds", tLogs[bestSet]->logRouterString());
+		cursors.push_back(makeReference<ILogSystem::BufferedCursor>(
+		    tLogs[bestSet]->logRouters, tag, lastBegin, end.present() ? end.get() + 1 : getPeekEnd(), parallelGetMore));
+		int i = 0;
+		while (begin < lastBegin) {
+			if (i == oldLogData.size()) {
+				TraceEvent("TLogPeekRemoteDead", dbgid)
+				    .detail("Tag", tag.toString())
+				    .detail("Begin", begin)
+				    .detail("End", end.present() ? end.get() : getPeekEnd())
+				    .detail("LastBegin", lastBegin)
+				    .detail("OldLogDataSize", oldLogData.size());
+				return makeReference<ILogSystem::ServerPeekCursor>(
+				    Reference<AsyncVar<OptionalInterface<TLogInterface>>>(),
+				    tag,
+				    begin,
+				    getPeekEnd(),
+				    false,
+				    parallelGetMore);
+			}
+
+			int bestOldSet = -1;
+			Version thisBegin = begin;
+			for (int t = 0; t < oldLogData[i].tLogs.size(); t++) {
+				if (oldLogData[i].tLogs[t]->isLocal) {
+					thisBegin = std::max(thisBegin, oldLogData[i].tLogs[t]->startVersion);
+				}
+
+				if (oldLogData[i].tLogs[t]->logRouters.size()) {
+					ASSERT(bestOldSet == -1);
+					bestOldSet = t;
+				}
+			}
+			if (bestOldSet == -1) {
+				TraceEvent("TLogPeekRemoteNoOldBestSet", dbgid)
+				    .detail("Tag", tag.toString())
+				    .detail("Begin", begin)
+				    .detail("End", end.present() ? end.get() : getPeekEnd());
+				return makeReference<ILogSystem::ServerPeekCursor>(
+				    Reference<AsyncVar<OptionalInterface<TLogInterface>>>(),
+				    tag,
+				    begin,
+				    getPeekEnd(),
+				    false,
+				    parallelGetMore);
+			}
+
+			if (thisBegin < lastBegin) {
+				TraceEvent("TLogPeekRemoteAddingOldBest", dbgid)
+				    .detail("Tag", tag.toString())
+				    .detail("Begin", begin)
+				    .detail("End", end.present() ? end.get() : getPeekEnd())
+				    .detail("BestOldSet", bestOldSet)
+				    .detail("LogRouterIds", oldLogData[i].tLogs[bestOldSet]->logRouterString())
+				    .detail("LastBegin", lastBegin)
+				    .detail("ThisBegin", thisBegin)
+				    .detail("BestStartVer", oldLogData[i].tLogs[bestOldSet]->startVersion);
+				cursors.push_back(makeReference<ILogSystem::BufferedCursor>(
+				    oldLogData[i].tLogs[bestOldSet]->logRouters, tag, thisBegin, lastBegin, parallelGetMore));
+				epochEnds.emplace_back(lastBegin);
+				lastBegin = thisBegin;
+			}
+			i++;
+		}
+
+		return makeReference<ILogSystem::MultiCursor>(cursors, epochEnds);
+	}
+}
+
+Reference<ILogSystem::IPeekCursor> TagPartitionedLogSystem::peek(UID dbgid,
+                                                                 Version begin,
+                                                                 Optional<Version> end,
+                                                                 Tag tag,
+                                                                 bool parallelGetMore) {
+	if (!tLogs.size()) {
+		TraceEvent("TLogPeekNoLogSets", dbgid).detail("Tag", tag.toString()).detail("Begin", begin);
+		return makeReference<ILogSystem::ServerPeekCursor>(
+		    Reference<AsyncVar<OptionalInterface<TLogInterface>>>(), tag, begin, getPeekEnd(), false, false);
+	}
+
+	if (tag.locality == tagLocalityRemoteLog) {
+		return peekRemote(dbgid, begin, end, tag, parallelGetMore);
+	} else {
+		return peekAll(dbgid, begin, getPeekEnd(), tag, parallelGetMore);
+	}
+}
+
+Reference<ILogSystem::IPeekCursor> TagPartitionedLogSystem::peek(UID dbgid,
+                                                                 Version begin,
+                                                                 Optional<Version> end,
+                                                                 std::vector<Tag> tags,
+                                                                 bool parallelGetMore) {
+	if (tags.empty()) {
+		TraceEvent("TLogPeekNoTags", dbgid).detail("Begin", begin);
+		return makeReference<ILogSystem::ServerPeekCursor>(
+		    Reference<AsyncVar<OptionalInterface<TLogInterface>>>(), invalidTag, begin, getPeekEnd(), false, false);
+	}
+
+	if (tags.size() == 1) {
+		return peek(dbgid, begin, end, tags[0], parallelGetMore);
+	}
+
+	std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
+	cursors.reserve(tags.size());
+	for (auto tag : tags) {
+		cursors.push_back(peek(dbgid, begin, end, tag, parallelGetMore));
+	}
+	return makeReference<ILogSystem::BufferedCursor>(cursors,
+	                                                 begin,
+	                                                 end.present() ? end.get() + 1 : getPeekEnd(),
+	                                                 true,
+	                                                 tLogs[0]->locality == tagLocalityUpgraded,
+	                                                 false);
+}
+
+Reference<ILogSystem::IPeekCursor> TagPartitionedLogSystem::peekLocal(UID dbgid,
+                                                                      Tag tag,
+                                                                      Version begin,
+                                                                      Version end,
+                                                                      bool useMergePeekCursors,
+                                                                      int8_t peekLocality) {
+	if (tag.locality >= 0 || tag.locality == tagLocalityUpgraded || tag.locality == tagLocalitySpecial) {
+		peekLocality = tag.locality;
+	}
+	ASSERT(peekLocality >= 0 || peekLocality == tagLocalityUpgraded || tag.locality == tagLocalitySpecial);
+
+	int bestSet = -1;
+	bool foundSpecial = false;
+	int logCount = 0;
+	for (int t = 0; t < tLogs.size(); t++) {
+		if (tLogs[t]->logServers.size() && tLogs[t]->locality != tagLocalitySatellite) {
+			logCount++;
+		}
+		if (tLogs[t]->logServers.size() &&
+		    (tLogs[t]->locality == tagLocalitySpecial || tLogs[t]->locality == tagLocalityUpgraded ||
+		     tLogs[t]->locality == peekLocality || peekLocality == tagLocalityUpgraded ||
+		     peekLocality == tagLocalitySpecial)) {
+			if (tLogs[t]->locality == tagLocalitySpecial || tLogs[t]->locality == tagLocalityUpgraded) {
+				foundSpecial = true;
+			}
+			bestSet = t;
+			break;
+		}
+	}
+	if (bestSet == -1) {
+		TraceEvent("TLogPeekLocalNoBestSet", dbgid)
+		    .detail("Tag", tag.toString())
+		    .detail("Begin", begin)
+		    .detail("End", end)
+		    .detail("LogCount", logCount);
+		if (useMergePeekCursors || logCount > 1) {
+			throw worker_removed();
+		} else {
+			return makeReference<ILogSystem::ServerPeekCursor>(
+			    Reference<AsyncVar<OptionalInterface<TLogInterface>>>(), tag, begin, getPeekEnd(), false, false);
+		}
+	}
+
+	if (begin >= tLogs[bestSet]->startVersion) {
+		TraceEvent("TLogPeekLocalBestOnly", dbgid)
+		    .detail("Tag", tag.toString())
+		    .detail("Begin", begin)
+		    .detail("End", end)
+		    .detail("BestSet", bestSet)
+		    .detail("BestSetStart", tLogs[bestSet]->startVersion)
+		    .detail("LogId", tLogs[bestSet]->logServers[tLogs[bestSet]->bestLocationFor(tag)]->get().id());
+		if (useMergePeekCursors) {
+			return makeReference<ILogSystem::MergedPeekCursor>(tLogs[bestSet]->logServers,
+			                                                   tLogs[bestSet]->bestLocationFor(tag),
+			                                                   tLogs[bestSet]->logServers.size() + 1 -
+			                                                       tLogs[bestSet]->tLogReplicationFactor,
+			                                                   tag,
+			                                                   begin,
+			                                                   end,
+			                                                   true,
+			                                                   tLogs[bestSet]->tLogLocalities,
+			                                                   tLogs[bestSet]->tLogPolicy,
+			                                                   tLogs[bestSet]->tLogReplicationFactor);
+		} else {
+			return makeReference<ILogSystem::ServerPeekCursor>(
+			    tLogs[bestSet]->logServers[tLogs[bestSet]->bestLocationFor(tag)], tag, begin, end, false, false);
+		}
+	} else {
+		std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
+		std::vector<LogMessageVersion> epochEnds;
+
+		if (tLogs[bestSet]->startVersion < end) {
+			TraceEvent("TLogPeekLocalAddingBest", dbgid)
+			    .detail("Tag", tag.toString())
+			    .detail("Begin", begin)
+			    .detail("End", end)
+			    .detail("BestSet", bestSet)
+			    .detail("BestSetStart", tLogs[bestSet]->startVersion)
+			    .detail("LogId", tLogs[bestSet]->logServers[tLogs[bestSet]->bestLocationFor(tag)]->get().id());
+			if (useMergePeekCursors) {
+				cursors.push_back(makeReference<ILogSystem::MergedPeekCursor>(tLogs[bestSet]->logServers,
+				                                                              tLogs[bestSet]->bestLocationFor(tag),
+				                                                              tLogs[bestSet]->logServers.size() + 1 -
+				                                                                  tLogs[bestSet]->tLogReplicationFactor,
+				                                                              tag,
+				                                                              tLogs[bestSet]->startVersion,
+				                                                              end,
+				                                                              true,
+				                                                              tLogs[bestSet]->tLogLocalities,
+				                                                              tLogs[bestSet]->tLogPolicy,
+				                                                              tLogs[bestSet]->tLogReplicationFactor));
+			} else {
+				cursors.push_back(makeReference<ILogSystem::ServerPeekCursor>(
+				    tLogs[bestSet]->logServers[tLogs[bestSet]->bestLocationFor(tag)],
+				    tag,
+				    tLogs[bestSet]->startVersion,
+				    end,
+				    false,
+				    false));
+			}
+		}
+		Version lastBegin = tLogs[bestSet]->startVersion;
+		for (int i = 0; begin < lastBegin; i++) {
+			if (i == oldLogData.size()) {
+				if ((tag == txsTag || tag.locality == tagLocalityTxs) && cursors.size()) {
+					break;
+				}
+				TraceEvent("TLogPeekLocalDead", dbgid)
+				    .detail("Tag", tag.toString())
+				    .detail("Begin", begin)
+				    .detail("End", end)
+				    .detail("LastBegin", lastBegin)
+				    .detail("OldLogDataSize", oldLogData.size());
+				throw worker_removed();
+			}
+
+			int bestOldSet = -1;
+			logCount = 0;
+			bool nextFoundSpecial = false;
+			for (int t = 0; t < oldLogData[i].tLogs.size(); t++) {
+				if (oldLogData[i].tLogs[t]->logServers.size() &&
+				    oldLogData[i].tLogs[t]->locality != tagLocalitySatellite) {
+					logCount++;
+				}
+				if (oldLogData[i].tLogs[t]->logServers.size() &&
+				    (oldLogData[i].tLogs[t]->locality == tagLocalitySpecial ||
+				     oldLogData[i].tLogs[t]->locality == tagLocalityUpgraded ||
+				     oldLogData[i].tLogs[t]->locality == peekLocality || peekLocality == tagLocalityUpgraded ||
+				     peekLocality == tagLocalitySpecial)) {
+					if (oldLogData[i].tLogs[t]->locality == tagLocalitySpecial ||
+					    oldLogData[i].tLogs[t]->locality == tagLocalityUpgraded) {
+						nextFoundSpecial = true;
+					}
+					if (foundSpecial && !oldLogData[i].tLogs[t]->isLocal) {
+						TraceEvent("TLogPeekLocalRemoteBeforeSpecial", dbgid)
+						    .detail("Tag", tag.toString())
+						    .detail("Begin", begin)
+						    .detail("End", end)
+						    .detail("LastBegin", lastBegin)
+						    .detail("OldLogDataSize", oldLogData.size())
+						    .detail("Idx", i);
+						throw worker_removed();
+					}
+					bestOldSet = t;
+					break;
+				}
+			}
+
+			if (bestOldSet == -1) {
+				TraceEvent("TLogPeekLocalNoBestSet", dbgid)
+				    .detail("Tag", tag.toString())
+				    .detail("Begin", begin)
+				    .detail("End", end)
+				    .detail("LastBegin", lastBegin)
+				    .detail("OldLogDataSize", oldLogData.size())
+				    .detail("Idx", i)
+				    .detail("LogRouterTags", oldLogData[i].logRouterTags)
+				    .detail("LogCount", logCount)
+				    .detail("FoundSpecial", foundSpecial);
+				if (oldLogData[i].logRouterTags == 0 || logCount > 1 || foundSpecial) {
+					throw worker_removed();
+				}
+				continue;
+			}
+
+			foundSpecial = nextFoundSpecial;
+
+			Version thisBegin = std::max(oldLogData[i].tLogs[bestOldSet]->startVersion, begin);
+			if (thisBegin < lastBegin) {
+				if (thisBegin < end) {
+					TraceEvent("TLogPeekLocalAddingOldBest", dbgid)
+					    .detail("Tag", tag.toString())
+					    .detail("Begin", begin)
+					    .detail("End", end)
+					    .detail("BestOldSet", bestOldSet)
+					    .detail("LogServers", oldLogData[i].tLogs[bestOldSet]->logServerString())
+					    .detail("ThisBegin", thisBegin)
+					    .detail("LastBegin", lastBegin);
+					// detail("LogId",
+					// oldLogData[i].tLogs[bestOldSet]->logServers[tLogs[bestOldSet]->bestLocationFor( tag
+					// )]->get().id());
+					cursors.push_back(makeReference<ILogSystem::MergedPeekCursor>(
+					    oldLogData[i].tLogs[bestOldSet]->logServers,
+					    oldLogData[i].tLogs[bestOldSet]->bestLocationFor(tag),
+					    oldLogData[i].tLogs[bestOldSet]->logServers.size() + 1 -
+					        oldLogData[i].tLogs[bestOldSet]->tLogReplicationFactor,
+					    tag,
+					    thisBegin,
+					    std::min(lastBegin, end),
+					    useMergePeekCursors,
+					    oldLogData[i].tLogs[bestOldSet]->tLogLocalities,
+					    oldLogData[i].tLogs[bestOldSet]->tLogPolicy,
+					    oldLogData[i].tLogs[bestOldSet]->tLogReplicationFactor));
+					epochEnds.emplace_back(std::min(lastBegin, end));
+				}
+				lastBegin = thisBegin;
+			}
+		}
+
+		return makeReference<ILogSystem::MultiCursor>(cursors, epochEnds);
+	}
+}
+
+Reference<ILogSystem::IPeekCursor> TagPartitionedLogSystem::peekTxs(UID dbgid,
+                                                                    Version begin,
+                                                                    int8_t peekLocality,
+                                                                    Version localEnd,
+                                                                    bool canDiscardPopped) {
+	Version end = getEnd();
+	if (!tLogs.size()) {
+		TraceEvent("TLogPeekTxsNoLogs", dbgid).log();
+		return makeReference<ILogSystem::ServerPeekCursor>(
+		    Reference<AsyncVar<OptionalInterface<TLogInterface>>>(), txsTag, begin, end, false, false);
+	}
+	TraceEvent("TLogPeekTxs", dbgid)
+	    .detail("Begin", begin)
+	    .detail("End", end)
+	    .detail("LocalEnd", localEnd)
+	    .detail("PeekLocality", peekLocality)
+	    .detail("CanDiscardPopped", canDiscardPopped);
+
+	int maxTxsTags = txsTags;
+	bool needsOldTxs = tLogs[0]->tLogVersion < TLogVersion::V4;
+	for (auto& it : oldLogData) {
+		maxTxsTags = std::max<int>(maxTxsTags, it.txsTags);
+		needsOldTxs = needsOldTxs || it.tLogs[0]->tLogVersion < TLogVersion::V4;
+	}
+
+	if (peekLocality < 0 || localEnd == invalidVersion || localEnd <= begin) {
+		std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
+		cursors.reserve(maxTxsTags);
+		for (int i = 0; i < maxTxsTags; i++) {
+			cursors.push_back(peekAll(dbgid, begin, end, Tag(tagLocalityTxs, i), true));
+		}
+		// SOMEDAY: remove once upgrades from 6.2 are no longer supported
+		if (needsOldTxs) {
+			cursors.push_back(peekAll(dbgid, begin, end, txsTag, true));
+		}
+
+		return makeReference<ILogSystem::BufferedCursor>(cursors, begin, end, false, false, canDiscardPopped);
+	}
+
+	try {
+		if (localEnd >= end) {
+			std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
+			cursors.reserve(maxTxsTags);
+			for (int i = 0; i < maxTxsTags; i++) {
+				cursors.push_back(peekLocal(dbgid, Tag(tagLocalityTxs, i), begin, end, true, peekLocality));
+			}
+			// SOMEDAY: remove once upgrades from 6.2 are no longer supported
+			if (needsOldTxs) {
+				cursors.push_back(peekLocal(dbgid, txsTag, begin, end, true, peekLocality));
+			}
+
+			return makeReference<ILogSystem::BufferedCursor>(cursors, begin, end, false, false, canDiscardPopped);
+		}
+
+		std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
+		std::vector<LogMessageVersion> epochEnds;
+
+		cursors.resize(2);
+
+		std::vector<Reference<ILogSystem::IPeekCursor>> localCursors;
+		std::vector<Reference<ILogSystem::IPeekCursor>> allCursors;
+		for (int i = 0; i < maxTxsTags; i++) {
+			localCursors.push_back(peekLocal(dbgid, Tag(tagLocalityTxs, i), begin, localEnd, true, peekLocality));
+			allCursors.push_back(peekAll(dbgid, localEnd, end, Tag(tagLocalityTxs, i), true));
+		}
+		// SOMEDAY: remove once upgrades from 6.2 are no longer supported
+		if (needsOldTxs) {
+			localCursors.push_back(peekLocal(dbgid, txsTag, begin, localEnd, true, peekLocality));
+			allCursors.push_back(peekAll(dbgid, localEnd, end, txsTag, true));
+		}
+
+		cursors[1] =
+		    makeReference<ILogSystem::BufferedCursor>(localCursors, begin, localEnd, false, false, canDiscardPopped);
+		cursors[0] = makeReference<ILogSystem::BufferedCursor>(allCursors, localEnd, end, false, false, false);
+		epochEnds.emplace_back(localEnd);
+
+		return makeReference<ILogSystem::MultiCursor>(cursors, epochEnds);
+	} catch (Error& e) {
+		if (e.code() == error_code_worker_removed) {
+			std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
+			cursors.reserve(maxTxsTags);
+			for (int i = 0; i < maxTxsTags; i++) {
+				cursors.push_back(peekAll(dbgid, begin, end, Tag(tagLocalityTxs, i), true));
+			}
+			// SOMEDAY: remove once upgrades from 6.2 are no longer supported
+			if (needsOldTxs) {
+				cursors.push_back(peekAll(dbgid, begin, end, txsTag, true));
+			}
+
+			return makeReference<ILogSystem::BufferedCursor>(cursors, begin, end, false, false, canDiscardPopped);
+		}
+		throw;
+	}
+}
+
+Reference<ILogSystem::IPeekCursor> TagPartitionedLogSystem::peekSingle(UID dbgid,
+                                                                       Version begin,
+                                                                       Tag tag,
+                                                                       std::vector<std::pair<Version, Tag>> history) {
+	while (history.size() && begin >= history.back().first) {
+		history.pop_back();
+	}
+
+	if (history.size() == 0) {
+		TraceEvent("TLogPeekSingleNoHistory", dbgid).detail("Tag", tag.toString()).detail("Begin", begin);
+		return peekLocal(dbgid, tag, begin, getPeekEnd(), false);
+	} else {
+		std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
+		std::vector<LogMessageVersion> epochEnds;
+
+		TraceEvent("TLogPeekSingleAddingLocal", dbgid).detail("Tag", tag.toString()).detail("Begin", history[0].first);
+		cursors.push_back(peekLocal(dbgid, tag, history[0].first, getPeekEnd(), false));
+
+		for (int i = 0; i < history.size(); i++) {
+			TraceEvent("TLogPeekSingleAddingOld", dbgid)
+			    .detail("Tag", tag.toString())
+			    .detail("HistoryTag", history[i].second.toString())
+			    .detail("Begin", i + 1 == history.size() ? begin : std::max(history[i + 1].first, begin))
+			    .detail("End", history[i].first);
+			cursors.push_back(peekLocal(dbgid,
+			                            history[i].second,
+			                            i + 1 == history.size() ? begin : std::max(history[i + 1].first, begin),
+			                            history[i].first,
+			                            false));
+			epochEnds.emplace_back(history[i].first);
+		}
+
+		return makeReference<ILogSystem::MultiCursor>(cursors, epochEnds);
+	}
+}
+
+Reference<ILogSystem::IPeekCursor> TagPartitionedLogSystem::peekLogRouter(UID dbgid, Version begin, Tag tag) {
+	bool found = false;
+	for (const auto& log : tLogs) {
+		found = log->hasLogRouter(dbgid) || log->hasBackupWorker(dbgid);
+		if (found) {
+			break;
+		}
+	}
+	if (found) {
+		if (stopped) {
+			std::vector<Reference<LogSet>> localSets;
+			int bestPrimarySet = 0;
+			int bestSatelliteSet = -1;
+			for (auto& log : tLogs) {
+				if (log->isLocal && log->logServers.size()) {
+					TraceEvent("TLogPeekLogRouterLocalSet", dbgid)
+					    .detail("Tag", tag.toString())
+					    .detail("Begin", begin)
+					    .detail("LogServers", log->logServerString());
+					localSets.push_back(log);
+					if (log->locality == tagLocalitySatellite) {
+						bestSatelliteSet = localSets.size() - 1;
+					} else {
+						bestPrimarySet = localSets.size() - 1;
+					}
+				}
+			}
+			int bestSet = bestPrimarySet;
+			if (SERVER_KNOBS->LOG_ROUTER_PEEK_FROM_SATELLITES_PREFERRED && bestSatelliteSet != -1 &&
+			    tLogs[bestSatelliteSet]->tLogVersion >= TLogVersion::V4) {
+				bestSet = bestSatelliteSet;
+			}
+
+			TraceEvent("TLogPeekLogRouterSets", dbgid).detail("Tag", tag.toString()).detail("Begin", begin);
+			// FIXME: do this merge on one of the logs in the other data center to avoid sending multiple copies
+			// across the WAN
+			return makeReference<ILogSystem::SetPeekCursor>(
+			    localSets, bestSet, localSets[bestSet]->bestLocationFor(tag), tag, begin, getPeekEnd(), true);
+		} else {
+			int bestPrimarySet = -1;
+			int bestSatelliteSet = -1;
+			for (int i = 0; i < tLogs.size(); i++) {
+				const auto& log = tLogs[i];
+				if (log->logServers.size() && log->isLocal) {
+					if (log->locality == tagLocalitySatellite) {
+						bestSatelliteSet = i;
+						break;
+					} else {
+						if (bestPrimarySet == -1)
+							bestPrimarySet = i;
+					}
+				}
+			}
+			int bestSet = bestPrimarySet;
+			if (SERVER_KNOBS->LOG_ROUTER_PEEK_FROM_SATELLITES_PREFERRED && bestSatelliteSet != -1 &&
+			    tLogs[bestSatelliteSet]->tLogVersion >= TLogVersion::V4) {
+				bestSet = bestSatelliteSet;
+			}
+			const auto& log = tLogs[bestSet];
+			TraceEvent("TLogPeekLogRouterBestOnly", dbgid)
+			    .detail("Tag", tag.toString())
+			    .detail("Begin", begin)
+			    .detail("LogId", log->logServers[log->bestLocationFor(tag)]->get().id());
+			return makeReference<ILogSystem::ServerPeekCursor>(
+			    log->logServers[log->bestLocationFor(tag)], tag, begin, getPeekEnd(), false, true);
+		}
+	}
+	bool firstOld = true;
+	for (const auto& old : oldLogData) {
+		found = false;
+		for (const auto& log : old.tLogs) {
+			found = log->hasLogRouter(dbgid) || log->hasBackupWorker(dbgid);
+			if (found) {
+				break;
+			}
+		}
+		if (found) {
+			int bestPrimarySet = 0;
+			int bestSatelliteSet = -1;
+			std::vector<Reference<LogSet>> localSets;
+			for (auto& log : old.tLogs) {
+				if (log->isLocal && log->logServers.size()) {
+					TraceEvent("TLogPeekLogRouterOldLocalSet", dbgid)
+					    .detail("Tag", tag.toString())
+					    .detail("Begin", begin)
+					    .detail("LogServers", log->logServerString());
+					localSets.push_back(log);
+					if (log->locality == tagLocalitySatellite) {
+						bestSatelliteSet = localSets.size() - 1;
+					} else {
+						bestPrimarySet = localSets.size() - 1;
+					}
+				}
+			}
+			int bestSet = bestPrimarySet;
+			if (SERVER_KNOBS->LOG_ROUTER_PEEK_FROM_SATELLITES_PREFERRED && bestSatelliteSet != -1 &&
+			    old.tLogs[bestSatelliteSet]->tLogVersion >= TLogVersion::V4) {
+				bestSet = bestSatelliteSet;
+			}
+
+			TraceEvent("TLogPeekLogRouterOldSets", dbgid)
+			    .detail("Tag", tag.toString())
+			    .detail("Begin", begin)
+			    .detail("OldEpoch", old.epochEnd)
+			    .detail("RecoveredAt", recoveredAt.present() ? recoveredAt.get() : -1)
+			    .detail("FirstOld", firstOld);
+			// FIXME: do this merge on one of the logs in the other data center to avoid sending multiple copies
+			// across the WAN
+			return makeReference<ILogSystem::SetPeekCursor>(localSets,
+			                                                bestSet,
+			                                                localSets[bestSet]->bestLocationFor(tag),
+			                                                tag,
+			                                                begin,
+			                                                firstOld && recoveredAt.present() ? recoveredAt.get() + 1
+			                                                                                  : old.epochEnd,
+			                                                true);
+		}
+		firstOld = false;
+	}
+	return makeReference<ILogSystem::ServerPeekCursor>(
+	    Reference<AsyncVar<OptionalInterface<TLogInterface>>>(), tag, begin, getPeekEnd(), false, false);
+}
+
+Version TagPartitionedLogSystem::getKnownCommittedVersion() {
+	Version result = invalidVersion;
+	for (auto& it : lockResults) {
+		auto versions = TagPartitionedLogSystem::getDurableVersion(dbgid, it);
+		if (versions.present()) {
+			result = std::max(result, versions.get().first);
+		}
+	}
+	return result;
+}
+
+Future<Void> TagPartitionedLogSystem::onKnownCommittedVersionChange() {
+	std::vector<Future<Void>> result;
+	for (auto& it : lockResults) {
+		result.push_back(TagPartitionedLogSystem::getDurableVersionChanged(it));
+	}
+	if (!result.size()) {
+		return Never();
+	}
+	return waitForAny(result);
+}
+
+void TagPartitionedLogSystem::popLogRouter(
+    Version upTo,
+    Tag tag,
+    Version durableKnownCommittedVersion,
+    int8_t popLocality) { // FIXME: do not need to pop all generations of old logs
+	if (!upTo)
+		return;
+	for (auto& t : tLogs) {
+		if (t->locality == popLocality) {
+			for (auto& log : t->logRouters) {
+				Version prev = outstandingPops[std::make_pair(log->get().id(), tag)].first;
+				if (prev < upTo)
+					outstandingPops[std::make_pair(log->get().id(), tag)] =
+					    std::make_pair(upTo, durableKnownCommittedVersion);
+				if (prev == 0) {
+					popActors.add(popFromLog(
+					    this, log, tag, 0.0)); // Fast pop time because log routers can only hold 5 seconds of data.
+				}
+			}
+		}
+	}
+
+	for (auto& old : oldLogData) {
+		for (auto& t : old.tLogs) {
+			if (t->locality == popLocality) {
+				for (auto& log : t->logRouters) {
+					Version prev = outstandingPops[std::make_pair(log->get().id(), tag)].first;
+					if (prev < upTo)
+						outstandingPops[std::make_pair(log->get().id(), tag)] =
+						    std::make_pair(upTo, durableKnownCommittedVersion);
+					if (prev == 0)
+						popActors.add(popFromLog(this, log, tag, 0.0));
+				}
+			}
+		}
+	}
+}
+
+void TagPartitionedLogSystem::popTxs(Version upTo, int8_t popLocality) {
+	if (getTLogVersion() < TLogVersion::V4) {
+		pop(upTo, txsTag, 0, popLocality);
+	} else {
+		for (int i = 0; i < txsTags; i++) {
+			pop(upTo, Tag(tagLocalityTxs, i), 0, popLocality);
+		}
+	}
+}
+
+void TagPartitionedLogSystem::pop(Version upTo, Tag tag, Version durableKnownCommittedVersion, int8_t popLocality) {
+	if (upTo <= 0)
+		return;
+	if (tag.locality == tagLocalityRemoteLog) {
+		popLogRouter(upTo, tag, durableKnownCommittedVersion, popLocality);
+		return;
+	}
+	for (auto& t : tLogs) {
+		if (t->locality == tagLocalitySpecial || t->locality == tag.locality || tag.locality == tagLocalityUpgraded ||
+		    (tag.locality < 0 && ((popLocality == tagLocalityInvalid) == t->isLocal))) {
+			for (auto& log : t->logServers) {
+				Version prev = outstandingPops[std::make_pair(log->get().id(), tag)].first;
+				if (prev < upTo) {
+					// update pop version for popFromLog actor
+					outstandingPops[std::make_pair(log->get().id(), tag)] =
+					    std::make_pair(upTo, durableKnownCommittedVersion);
+				}
+				if (prev == 0) {
+					// pop tag from log upto version defined in outstandingPops[].first
+					popActors.add(popFromLog(this, log, tag, 1.0)); //< FIXME: knob
+				}
+			}
+		}
+	}
+}
+
+ACTOR Future<Void> TagPartitionedLogSystem::popFromLog(TagPartitionedLogSystem* self,
+                                                       Reference<AsyncVar<OptionalInterface<TLogInterface>>> log,
+                                                       Tag tag,
+                                                       double time) {
+	state Version last = 0;
+	loop {
+		wait(delay(time, TaskPriority::TLogPop));
+
+		// to: first is upto version, second is durableKnownComittedVersion
+		state std::pair<Version, Version> to = self->outstandingPops[std::make_pair(log->get().id(), tag)];
+
+		if (to.first <= last) {
+			self->outstandingPops.erase(std::make_pair(log->get().id(), tag));
+			return Void();
+		}
+
+		try {
+			if (!log->get().present())
+				return Void();
+			wait(log->get().interf().popMessages.getReply(TLogPopRequest(to.first, to.second, tag),
+			                                              TaskPriority::TLogPop));
+
+			last = to.first;
+		} catch (Error& e) {
+			if (e.code() == error_code_actor_cancelled)
+				throw;
+			TraceEvent((e.code() == error_code_broken_promise) ? SevInfo : SevError, "LogPopError", self->dbgid)
+			    .error(e)
+			    .detail("Log", log->get().id());
+			return Void(); // Leaving outstandingPops filled in means no further pop requests to this tlog from this
+			// logSystem
+		}
+	}
+}
+
+ACTOR Future<Version> TagPartitionedLogSystem::getPoppedFromTLog(
+    Reference<AsyncVar<OptionalInterface<TLogInterface>>> log,
+    Tag tag) {
+
+	loop {
+		choose {
+			when(TLogPeekReply rep =
+			         wait(log->get().present() ? brokenPromiseToNever(log->get().interf().peekMessages.getReply(
+			                                         TLogPeekRequest(-1, tag, false, false)))
+			                                   : Never())) {
+				ASSERT(rep.popped.present());
+				return rep.popped.get();
+			}
+			when(wait(log->onChange())) {}
+		}
+	}
+}
+
+ACTOR Future<Version> TagPartitionedLogSystem::getPoppedTxs(TagPartitionedLogSystem* self) {
+	state std::vector<std::vector<Future<Version>>> poppedFutures;
+	state std::vector<Future<Void>> poppedReady;
+	if (self->tLogs.size()) {
+		poppedFutures.push_back(std::vector<Future<Version>>());
+		for (auto& it : self->tLogs) {
+			for (auto& log : it->logServers) {
+				poppedFutures.back().push_back(TagPartitionedLogSystem::getPoppedFromTLog(
+				    log, self->tLogs[0]->tLogVersion < TLogVersion::V4 ? txsTag : Tag(tagLocalityTxs, 0)));
+			}
+		}
+		poppedReady.push_back(waitForAny(poppedFutures.back()));
+	}
+
+	for (auto& old : self->oldLogData) {
+		if (old.tLogs.size()) {
+			poppedFutures.push_back(std::vector<Future<Version>>());
+			for (auto& it : old.tLogs) {
+				for (auto& log : it->logServers) {
+					poppedFutures.back().push_back(TagPartitionedLogSystem::getPoppedFromTLog(
+					    log, old.tLogs[0]->tLogVersion < TLogVersion::V4 ? txsTag : Tag(tagLocalityTxs, 0)));
+				}
+			}
+			poppedReady.push_back(waitForAny(poppedFutures.back()));
+		}
+	}
+
+	state UID dbgid = self->dbgid;
+	state Future<Void> maxGetPoppedDuration = delay(SERVER_KNOBS->TXS_POPPED_MAX_DELAY);
+	wait(waitForAll(poppedReady) || maxGetPoppedDuration);
+
+	if (maxGetPoppedDuration.isReady()) {
+		TraceEvent(SevWarnAlways, "PoppedTxsNotReady", dbgid).log();
+	}
+
+	Version maxPopped = 1;
+	for (auto& it : poppedFutures) {
+		for (auto& v : it) {
+			if (v.isReady()) {
+				maxPopped = std::max(maxPopped, v.get());
+			}
+		}
+	}
+	return maxPopped;
+}
+
+Future<Version> TagPartitionedLogSystem::getTxsPoppedVersion() {
+	return getPoppedTxs(this);
+}
+
+ACTOR Future<Void> TagPartitionedLogSystem::confirmEpochLive_internal(Reference<LogSet> logSet, Optional<UID> debugID) {
+	state vector<Future<Void>> alive;
+	int numPresent = 0;
+	for (auto& t : logSet->logServers) {
+		if (t->get().present()) {
+			alive.push_back(brokenPromiseToNever(t->get().interf().confirmRunning.getReply(
+			    TLogConfirmRunningRequest(debugID), TaskPriority::TLogConfirmRunningReply)));
+			numPresent++;
+		} else {
+			alive.push_back(Never());
+		}
+	}
+
+	wait(quorum(alive, std::min(logSet->tLogReplicationFactor, numPresent - logSet->tLogWriteAntiQuorum)));
+
+	state std::vector<LocalityEntry> aliveEntries;
+	state std::vector<bool> responded(alive.size(), false);
+	loop {
+		for (int i = 0; i < alive.size(); i++) {
+			if (!responded[i] && alive[i].isReady() && !alive[i].isError()) {
+				aliveEntries.push_back(logSet->logEntryArray[i]);
+				responded[i] = true;
+			}
+		}
+
+		if (logSet->satisfiesPolicy(aliveEntries)) {
+			return Void();
+		}
+
+		// The current set of responders that we have weren't enough to form a quorum, so we must
+		// wait for more responses and try again.
+		std::vector<Future<Void>> changes;
+		for (int i = 0; i < alive.size(); i++) {
+			if (!alive[i].isReady()) {
+				changes.push_back(ready(alive[i]));
+			} else if (alive[i].isReady() && alive[i].isError() &&
+			           alive[i].getError().code() == error_code_tlog_stopped) {
+				// All commits must go to all TLogs.  If any TLog is stopped, then our epoch has ended.
+				return Never();
+			}
+		}
+		ASSERT(changes.size() != 0);
+		wait(waitForAny(changes));
+	}
+}
+
+Future<Void> TagPartitionedLogSystem::confirmEpochLive(Optional<UID> debugID) {
+	vector<Future<Void>> quorumResults;
+	for (auto& it : tLogs) {
+		if (it->isLocal && it->logServers.size()) {
+			quorumResults.push_back(confirmEpochLive_internal(it, debugID));
+		}
+	}
+
+	return waitForAll(quorumResults);
+}
+
+Future<Void> TagPartitionedLogSystem::endEpoch() {
+	std::vector<Future<Void>> lockResults;
+	for (auto& logSet : tLogs) {
+		for (auto& log : logSet->logServers) {
+			lockResults.push_back(success(lockTLog(dbgid, log)));
+		}
+	}
+	return waitForAll(lockResults);
+}
+
+Future<Reference<ILogSystem>> TagPartitionedLogSystem::newEpoch(
+    RecruitFromConfigurationReply const& recr,
+    Future<RecruitRemoteFromConfigurationReply> const& fRemoteWorkers,
+    DatabaseConfiguration const& config,
+    LogEpoch recoveryCount,
+    int8_t primaryLocality,
+    int8_t remoteLocality,
+    std::vector<Tag> const& allTags,
+    Reference<AsyncVar<bool>> const& recruitmentStalled) {
+	return newEpoch(Reference<TagPartitionedLogSystem>::addRef(this),
+	                recr,
+	                fRemoteWorkers,
+	                config,
+	                recoveryCount,
+	                primaryLocality,
+	                remoteLocality,
+	                allTags,
+	                recruitmentStalled);
+}
+
+LogSystemConfig TagPartitionedLogSystem::getLogSystemConfig() const {
+	LogSystemConfig logSystemConfig(epoch);
+	logSystemConfig.logSystemType = logSystemType;
+	logSystemConfig.expectedLogSets = expectedLogSets;
+	logSystemConfig.logRouterTags = logRouterTags;
+	logSystemConfig.txsTags = txsTags;
+	logSystemConfig.recruitmentID = recruitmentID;
+	logSystemConfig.stopped = stopped;
+	logSystemConfig.recoveredAt = recoveredAt;
+	logSystemConfig.pseudoLocalities = pseudoLocalities;
+	logSystemConfig.oldestBackupEpoch = oldestBackupEpoch;
+	for (const Reference<LogSet>& logSet : tLogs) {
+		if (logSet->isLocal || remoteLogsWrittenToCoreState) {
+			logSystemConfig.tLogs.emplace_back(*logSet);
+		}
+	}
+
+	if (!recoveryCompleteWrittenToCoreState.get()) {
+		for (const auto& oldData : oldLogData) {
+			logSystemConfig.oldTLogs.emplace_back(oldData);
+		}
+	}
+	return logSystemConfig;
+}
+
+Standalone<StringRef> TagPartitionedLogSystem::getLogsValue() const {
+	vector<std::pair<UID, NetworkAddress>> logs;
+	vector<std::pair<UID, NetworkAddress>> oldLogs;
+	for (auto& t : tLogs) {
+		if (t->isLocal || remoteLogsWrittenToCoreState) {
+			for (int i = 0; i < t->logServers.size(); i++) {
+				logs.emplace_back(t->logServers[i]->get().id(),
+				                  t->logServers[i]->get().present() ? t->logServers[i]->get().interf().address()
+				                                                    : NetworkAddress());
+			}
+		}
+	}
+	if (!recoveryCompleteWrittenToCoreState.get()) {
+		for (int i = 0; i < oldLogData.size(); i++) {
+			for (auto& t : oldLogData[i].tLogs) {
+				for (int j = 0; j < t->logServers.size(); j++) {
+					oldLogs.emplace_back(t->logServers[j]->get().id(),
+					                     t->logServers[j]->get().present() ? t->logServers[j]->get().interf().address()
+					                                                       : NetworkAddress());
+				}
+			}
+		}
+	}
+	return logsValue(logs, oldLogs);
+}
+
+Future<Void> TagPartitionedLogSystem::onLogSystemConfigChange() {
+	std::vector<Future<Void>> changes;
+	changes.push_back(logSystemConfigChanged.onTrigger());
+	for (auto& t : tLogs) {
+		for (int i = 0; i < t->logServers.size(); i++) {
+			changes.push_back(t->logServers[i]->onChange());
+		}
+	}
+	for (int i = 0; i < oldLogData.size(); i++) {
+		for (auto& t : oldLogData[i].tLogs) {
+			for (int j = 0; j < t->logServers.size(); j++) {
+				changes.push_back(t->logServers[j]->onChange());
+			}
+		}
+	}
+
+	if (hasRemoteServers && !remoteRecovery.isReady()) {
+		changes.push_back(remoteRecovery);
+	}
+
+	return waitForAny(changes);
+}
+
+Version TagPartitionedLogSystem::getEnd() const {
+	ASSERT(recoverAt.present());
+	return recoverAt.get() + 1;
+}
+
+Version TagPartitionedLogSystem::getPeekEnd() const {
+	if (recoverAt.present())
+		return getEnd();
+	else
+		return std::numeric_limits<Version>::max();
+}
+
+void TagPartitionedLogSystem::getPushLocations(VectorRef<Tag> tags,
+                                               std::vector<int>& locations,
+                                               bool allLocations) const {
+	int locationOffset = 0;
+	for (auto& log : tLogs) {
+		if (log->isLocal && log->logServers.size()) {
+			log->getPushLocations(tags, locations, locationOffset, allLocations);
+			locationOffset += log->logServers.size();
+		}
+	}
+}
+
+bool TagPartitionedLogSystem::hasRemoteLogs() const {
+	return logRouterTags > 0 || pseudoLocalities.size() > 0;
+}
+
+Tag TagPartitionedLogSystem::getRandomRouterTag() const {
+	return Tag(tagLocalityLogRouter, deterministicRandom()->randomInt(0, logRouterTags));
+}
+
+Tag TagPartitionedLogSystem::getRandomTxsTag() const {
+	return Tag(tagLocalityTxs, deterministicRandom()->randomInt(0, txsTags));
+}
+
+TLogVersion TagPartitionedLogSystem::getTLogVersion() const {
+	return tLogs[0]->tLogVersion;
+}
+
+int TagPartitionedLogSystem::getLogRouterTags() const {
+	return logRouterTags;
+}
+
+Version TagPartitionedLogSystem::getBackupStartVersion() const {
+	ASSERT(tLogs.size() > 0);
+	return backupStartVersion;
+}
+
+std::map<LogEpoch, ILogSystem::EpochTagsVersionsInfo> TagPartitionedLogSystem::getOldEpochTagsVersionsInfo() const {
+	std::map<LogEpoch, EpochTagsVersionsInfo> epochInfos;
+	for (const auto& old : oldLogData) {
+		epochInfos.insert(
+		    { old.epoch, ILogSystem::EpochTagsVersionsInfo(old.logRouterTags, old.epochBegin, old.epochEnd) });
+		TraceEvent("OldEpochTagsVersions", dbgid)
+		    .detail("Epoch", old.epoch)
+		    .detail("Tags", old.logRouterTags)
+		    .detail("BeginVersion", old.epochBegin)
+		    .detail("EndVersion", old.epochEnd);
+	}
+	return epochInfos;
+}
+
+inline Reference<LogSet> TagPartitionedLogSystem::getEpochLogSet(LogEpoch epoch) const {
+	for (const auto& old : oldLogData) {
+		if (epoch == old.epoch)
+			return old.tLogs[0];
+	}
+	return Reference<LogSet>(nullptr);
+}
+
+void TagPartitionedLogSystem::setBackupWorkers(const std::vector<InitializeBackupReply>& replies) {
+	ASSERT(tLogs.size() > 0);
+
+	Reference<LogSet> logset = tLogs[0]; // Master recruits this epoch's worker first.
+	LogEpoch logsetEpoch = this->epoch;
+	oldestBackupEpoch = this->epoch;
+	for (const auto& reply : replies) {
+		if (removedBackupWorkers.count(reply.interf.id()) > 0) {
+			removedBackupWorkers.erase(reply.interf.id());
+			continue;
+		}
+		auto worker = makeReference<AsyncVar<OptionalInterface<BackupInterface>>>(
+		    OptionalInterface<BackupInterface>(reply.interf));
+		if (reply.backupEpoch != logsetEpoch) {
+			// find the logset from oldLogData
+			logsetEpoch = reply.backupEpoch;
+			oldestBackupEpoch = std::min(oldestBackupEpoch, logsetEpoch);
+			logset = getEpochLogSet(logsetEpoch);
+			ASSERT(logset.isValid());
+		}
+		logset->backupWorkers.push_back(worker);
+		TraceEvent("AddBackupWorker", dbgid).detail("Epoch", logsetEpoch).detail("BackupWorkerID", reply.interf.id());
+	}
+	TraceEvent("SetOldestBackupEpoch", dbgid).detail("Epoch", oldestBackupEpoch);
+	backupWorkerChanged.trigger();
+}
+
+bool TagPartitionedLogSystem::removeBackupWorker(const BackupWorkerDoneRequest& req) {
+	bool removed = false;
+	Reference<LogSet> logset = getEpochLogSet(req.backupEpoch);
+	if (logset.isValid()) {
+		for (auto it = logset->backupWorkers.begin(); it != logset->backupWorkers.end(); it++) {
+			if (it->getPtr()->get().interf().id() == req.workerUID) {
+				logset->backupWorkers.erase(it);
+				removed = true;
+				break;
+			}
+		}
+	}
+
+	if (removed) {
+		oldestBackupEpoch = epoch;
+		for (const auto& old : oldLogData) {
+			if (old.epoch < oldestBackupEpoch && old.tLogs[0]->backupWorkers.size() > 0) {
+				oldestBackupEpoch = old.epoch;
+			}
+		}
+		backupWorkerChanged.trigger();
+	} else {
+		removedBackupWorkers.insert(req.workerUID);
+	}
+
+	TraceEvent("RemoveBackupWorker", dbgid)
+	    .detail("Removed", removed)
+	    .detail("BackupEpoch", req.backupEpoch)
+	    .detail("WorkerID", req.workerUID)
+	    .detail("OldestBackupEpoch", oldestBackupEpoch);
+	return removed;
+}
+
+LogEpoch TagPartitionedLogSystem::getOldestBackupEpoch() const {
+	return oldestBackupEpoch;
+}
+
+void TagPartitionedLogSystem::setOldestBackupEpoch(LogEpoch epoch) {
+	oldestBackupEpoch = epoch;
+	backupWorkerChanged.trigger();
+}
+
+ACTOR Future<Void> TagPartitionedLogSystem::monitorLog(Reference<AsyncVar<OptionalInterface<TLogInterface>>> logServer,
+                                                       Reference<AsyncVar<bool>> failed) {
+	state Future<Void> waitFailure;
+	loop {
+		if (logServer->get().present())
+			waitFailure = waitFailureTracker(logServer->get().interf().waitFailure, failed);
+		else
+			failed->set(true);
+		wait(logServer->onChange());
+	}
+}
+
+Optional<std::pair<Version, Version>> TagPartitionedLogSystem::getDurableVersion(
+    UID dbgid,
+    LogLockInfo lockInfo,
+    std::vector<Reference<AsyncVar<bool>>> failed,
+    Optional<Version> lastEnd) {
+
+	Reference<LogSet> logSet = lockInfo.logSet;
+	// To ensure consistent recovery, the number of servers NOT in the write quorum plus the number of servers NOT
+	// in the read quorum have to be strictly less than the replication factor.  Otherwise there could be a replica
+	// set consistent entirely of servers that are out of date due to not being in the write quorum or unavailable
+	// due to not being in the read quorum. So with N = # of tlogs, W = antiquorum, R = required count, F =
+	// replication factor, W + (N - R) < F, and optimally (N-W)+(N-R)=F-1.  Thus R=N+1-F+W.
+	int requiredCount =
+	    (int)logSet->logServers.size() + 1 - logSet->tLogReplicationFactor + logSet->tLogWriteAntiQuorum;
+	ASSERT(requiredCount > 0 && requiredCount <= logSet->logServers.size());
+	ASSERT(logSet->tLogReplicationFactor >= 1 && logSet->tLogReplicationFactor <= logSet->logServers.size());
+	ASSERT(logSet->tLogWriteAntiQuorum >= 0 && logSet->tLogWriteAntiQuorum < logSet->logServers.size());
+
+	std::vector<LocalityData> availableItems, badCombo;
+	std::vector<TLogLockResult> results;
+	std::string sServerState;
+	LocalityGroup unResponsiveSet;
+
+	for (int t = 0; t < logSet->logServers.size(); t++) {
+		if (lockInfo.replies[t].isReady() && !lockInfo.replies[t].isError() && (!failed.size() || !failed[t]->get())) {
+			results.push_back(lockInfo.replies[t].get());
+			availableItems.push_back(logSet->tLogLocalities[t]);
+			sServerState += 'a';
+		} else {
+			unResponsiveSet.add(logSet->tLogLocalities[t]);
+			sServerState += 'f';
+		}
+	}
+
+	// Check if the list of results is not larger than the anti quorum
+	bool bTooManyFailures = (results.size() <= logSet->tLogWriteAntiQuorum);
+
+	// Check if failed logs complete the policy
+	bTooManyFailures = bTooManyFailures || ((unResponsiveSet.size() >= logSet->tLogReplicationFactor) &&
+	                                        (unResponsiveSet.validate(logSet->tLogPolicy)));
+
+	// Check all combinations of the AntiQuorum within the failed
+	if (!bTooManyFailures && (logSet->tLogWriteAntiQuorum) &&
+	    (!validateAllCombinations(
+	        badCombo, unResponsiveSet, logSet->tLogPolicy, availableItems, logSet->tLogWriteAntiQuorum, false))) {
+		TraceEvent("EpochEndBadCombo", dbgid)
+		    .detail("Required", requiredCount)
+		    .detail("Present", results.size())
+		    .detail("ServerState", sServerState);
+		bTooManyFailures = true;
+	}
+
+	ASSERT(logSet->logServers.size() == lockInfo.replies.size());
+	if (!bTooManyFailures) {
+		std::sort(results.begin(), results.end(), [](const TLogLockResult& a, const TLogLockResult& b) -> bool {
+			return a.end < b.end;
+		});
+		int absent = logSet->logServers.size() - results.size();
+		int safe_range_begin = logSet->tLogWriteAntiQuorum;
+		int new_safe_range_begin = std::min(logSet->tLogWriteAntiQuorum, (int)(results.size() - 1));
+		int safe_range_end = logSet->tLogReplicationFactor - absent;
+
+		if (!lastEnd.present() || ((safe_range_end > 0) && (safe_range_end - 1 < results.size()) &&
+		                           results[safe_range_end - 1].end < lastEnd.get())) {
+			Version knownCommittedVersion = 0;
+			for (int i = 0; i < results.size(); i++) {
+				knownCommittedVersion = std::max(knownCommittedVersion, results[i].knownCommittedVersion);
+			}
+
+			if (knownCommittedVersion > results[new_safe_range_begin].end) {
+				knownCommittedVersion = results[new_safe_range_begin].end;
+			}
+
+			TraceEvent("GetDurableResult", dbgid)
+			    .detail("Required", requiredCount)
+			    .detail("Present", results.size())
+			    .detail("ServerState", sServerState)
+			    .detail("RecoveryVersion",
+			            ((safe_range_end > 0) && (safe_range_end - 1 < results.size()))
+			                ? results[safe_range_end - 1].end
+			                : -1)
+			    .detail("EndVersion", results[new_safe_range_begin].end)
+			    .detail("SafeBegin", safe_range_begin)
+			    .detail("SafeEnd", safe_range_end)
+			    .detail("NewSafeBegin", new_safe_range_begin)
+			    .detail("KnownCommittedVersion", knownCommittedVersion)
+			    .detail("EpochEnd", lockInfo.epochEnd);
+
+			return std::make_pair(knownCommittedVersion, results[new_safe_range_begin].end);
+		}
+	}
+	TraceEvent("GetDurableResultWaiting", dbgid)
+	    .detail("Required", requiredCount)
+	    .detail("Present", results.size())
+	    .detail("ServerState", sServerState);
+	return Optional<std::pair<Version, Version>>();
+}
+
+ACTOR Future<Void> TagPartitionedLogSystem::getDurableVersionChanged(
+    LogLockInfo lockInfo,
+    std::vector<Reference<AsyncVar<bool>>> failed) {
+	// Wait for anything relevant to change
+	std::vector<Future<Void>> changes;
+	for (int j = 0; j < lockInfo.logSet->logServers.size(); j++) {
+		if (!lockInfo.replies[j].isReady())
+			changes.push_back(ready(lockInfo.replies[j]));
+		else {
+			changes.push_back(lockInfo.logSet->logServers[j]->onChange());
+			if (failed.size()) {
+				changes.push_back(failed[j]->onChange());
+			}
+		}
+	}
+	ASSERT(changes.size());
+	wait(waitForAny(changes));
+	return Void();
+}
+
+ACTOR Future<Void> TagPartitionedLogSystem::epochEnd(Reference<AsyncVar<Reference<ILogSystem>>> outLogSystem,
+                                                     UID dbgid,
+                                                     DBCoreState prevState,
+                                                     FutureStream<TLogRejoinRequest> rejoinRequests,
+                                                     LocalityData locality,
+                                                     bool* forceRecovery) {
+	// Stops a co-quorum of tlogs so that no further versions can be committed until the DBCoreState coordination
+	// state is changed Creates a new logSystem representing the (now frozen) epoch No other important side effects.
+	// The writeQuorum in the master info is from the previous configuration
+
+	if (!prevState.tLogs.size()) {
+		// This is a brand new database
+		auto logSystem = makeReference<TagPartitionedLogSystem>(dbgid, locality, 0);
+		logSystem->logSystemType = prevState.logSystemType;
+		logSystem->recoverAt = 0;
+		logSystem->knownCommittedVersion = 0;
+		logSystem->stopped = true;
+		outLogSystem->set(logSystem);
+		wait(Future<Void>(Never()));
+		throw internal_error();
+	}
+
+	if (*forceRecovery) {
+		DBCoreState modifiedState = prevState;
+
+		int8_t primaryLocality = -1;
+		for (auto& coreSet : modifiedState.tLogs) {
+			if (coreSet.isLocal && coreSet.locality >= 0 && coreSet.tLogLocalities[0].dcId() != locality.dcId()) {
+				primaryLocality = coreSet.locality;
+				break;
+			}
+		}
+
+		bool foundRemote = false;
+		int8_t remoteLocality = -1;
+		int modifiedLogSets = 0;
+		int removedLogSets = 0;
+		if (primaryLocality >= 0) {
+			auto copiedLogs = modifiedState.tLogs;
+			for (auto& coreSet : copiedLogs) {
+				if (coreSet.locality != primaryLocality && coreSet.locality >= 0) {
+					foundRemote = true;
+					remoteLocality = coreSet.locality;
+					modifiedState.tLogs.clear();
+					modifiedState.tLogs.push_back(coreSet);
+					modifiedState.tLogs[0].isLocal = true;
+					modifiedState.logRouterTags = 0;
+					modifiedLogSets++;
+					break;
+				}
+			}
+
+			while (!foundRemote && modifiedState.oldTLogData.size()) {
+				for (auto& coreSet : modifiedState.oldTLogData[0].tLogs) {
+					if (coreSet.locality != primaryLocality && coreSet.locality >= tagLocalitySpecial) {
+						foundRemote = true;
+						remoteLocality = coreSet.locality;
+						modifiedState.tLogs.clear();
+						modifiedState.tLogs.push_back(coreSet);
+						modifiedState.tLogs[0].isLocal = true;
+						modifiedState.logRouterTags = 0;
+						modifiedState.txsTags = modifiedState.oldTLogData[0].txsTags;
+						modifiedLogSets++;
+						break;
+					}
+				}
+				modifiedState.oldTLogData.erase(modifiedState.oldTLogData.begin());
+				removedLogSets++;
+			}
+
+			if (foundRemote) {
+				for (int i = 0; i < modifiedState.oldTLogData.size(); i++) {
+					bool found = false;
+					auto copiedLogs = modifiedState.oldTLogData[i].tLogs;
+					for (auto& coreSet : copiedLogs) {
+						if (coreSet.locality == remoteLocality || coreSet.locality == tagLocalitySpecial) {
+							found = true;
+							if (!coreSet.isLocal || copiedLogs.size() > 1) {
+								modifiedState.oldTLogData[i].tLogs.clear();
+								modifiedState.oldTLogData[i].tLogs.push_back(coreSet);
+								modifiedState.oldTLogData[i].tLogs[0].isLocal = true;
+								modifiedState.oldTLogData[i].logRouterTags = 0;
+								modifiedState.oldTLogData[i].epochBegin =
+								    modifiedState.oldTLogData[i].tLogs[0].startVersion;
+								modifiedState.oldTLogData[i].epochEnd =
+								    (i == 0 ? modifiedState.tLogs[0].startVersion
+								            : modifiedState.oldTLogData[i - 1].tLogs[0].startVersion);
+								modifiedLogSets++;
+							}
+							break;
+						}
+					}
+					if (!found) {
+						modifiedState.oldTLogData.erase(modifiedState.oldTLogData.begin() + i);
+						removedLogSets++;
+						i--;
+					}
+				}
+				prevState = modifiedState;
+			} else {
+				*forceRecovery = false;
+			}
+		} else {
+			*forceRecovery = false;
+		}
+		TraceEvent(SevWarnAlways, "ForcedRecovery", dbgid)
+		    .detail("PrimaryLocality", primaryLocality)
+		    .detail("RemoteLocality", remoteLocality)
+		    .detail("FoundRemote", foundRemote)
+		    .detail("Modified", modifiedLogSets)
+		    .detail("Removed", removedLogSets);
+		for (int i = 0; i < prevState.tLogs.size(); i++) {
+			TraceEvent("ForcedRecoveryTLogs", dbgid)
+			    .detail("I", i)
+			    .detail("Log", ::describe(prevState.tLogs[i].tLogs))
+			    .detail("Loc", prevState.tLogs[i].locality)
+			    .detail("Txs", prevState.txsTags);
+		}
+		for (int i = 0; i < prevState.oldTLogData.size(); i++) {
+			for (int j = 0; j < prevState.oldTLogData[i].tLogs.size(); j++) {
+				TraceEvent("ForcedRecoveryTLogs", dbgid)
+				    .detail("I", i)
+				    .detail("J", j)
+				    .detail("Log", ::describe(prevState.oldTLogData[i].tLogs[j].tLogs))
+				    .detail("Loc", prevState.oldTLogData[i].tLogs[j].locality)
+				    .detail("Txs", prevState.oldTLogData[i].txsTags);
+			}
+		}
+	}
+
+	TEST(true); // Master recovery from pre-existing database
+
+	// trackRejoins listens for rejoin requests from the tLogs that we are recovering from, to learn their
+	// TLogInterfaces
+	state std::vector<LogLockInfo> lockResults;
+	state std::vector<std::pair<Reference<AsyncVar<OptionalInterface<TLogInterface>>>, Reference<IReplicationPolicy>>>
+	    allLogServers;
+	state std::vector<Reference<LogSet>> logServers;
+	state std::vector<OldLogData> oldLogData;
+	state std::vector<std::vector<Reference<AsyncVar<bool>>>> logFailed;
+	state std::vector<Future<Void>> failureTrackers;
+
+	for (const CoreTLogSet& coreSet : prevState.tLogs) {
+		logServers.push_back(makeReference<LogSet>(coreSet));
+		std::vector<Reference<AsyncVar<bool>>> failed;
+
+		for (const auto& logVar : logServers.back()->logServers) {
+			allLogServers.emplace_back(logVar, coreSet.tLogPolicy);
+			failed.push_back(makeReference<AsyncVar<bool>>());
+			failureTrackers.push_back(TagPartitionedLogSystem::monitorLog(logVar, failed.back()));
+		}
+		logFailed.push_back(failed);
+	}
+
+	for (const auto& oldTlogData : prevState.oldTLogData) {
+		oldLogData.emplace_back(oldTlogData);
+
+		for (const auto& logSet : oldLogData.back().tLogs) {
+			for (const auto& logVar : logSet->logServers) {
+				allLogServers.emplace_back(logVar, logSet->tLogPolicy);
+			}
+		}
+	}
+	state Future<Void> rejoins = TagPartitionedLogSystem::trackRejoins(dbgid, allLogServers, rejoinRequests);
+
+	lockResults.resize(logServers.size());
+	std::set<int8_t> lockedLocalities;
+	bool foundSpecial = false;
+	for (int i = 0; i < logServers.size(); i++) {
+		if (logServers[i]->locality == tagLocalitySpecial || logServers[i]->locality == tagLocalityUpgraded) {
+			foundSpecial = true;
+		}
+		lockedLocalities.insert(logServers[i]->locality);
+		lockResults[i].isCurrent = true;
+		lockResults[i].logSet = logServers[i];
+		for (int t = 0; t < logServers[i]->logServers.size(); t++) {
+			lockResults[i].replies.push_back(TagPartitionedLogSystem::lockTLog(dbgid, logServers[i]->logServers[t]));
+		}
+	}
+
+	for (auto& old : oldLogData) {
+		if (foundSpecial) {
+			break;
+		}
+		for (auto& log : old.tLogs) {
+			if (log->locality == tagLocalitySpecial || log->locality == tagLocalityUpgraded) {
+				foundSpecial = true;
+				break;
+			}
+			if (!lockedLocalities.count(log->locality)) {
+				TraceEvent("EpochEndLockExtra").detail("Locality", log->locality);
+				TEST(true); // locking old generations for version information
+				lockedLocalities.insert(log->locality);
+				LogLockInfo lockResult;
+				lockResult.epochEnd = old.epochEnd;
+				lockResult.logSet = log;
+				for (int t = 0; t < log->logServers.size(); t++) {
+					lockResult.replies.push_back(TagPartitionedLogSystem::lockTLog(dbgid, log->logServers[t]));
+				}
+				lockResults.push_back(lockResult);
+			}
+		}
+	}
+
+	if (*forceRecovery) {
+		state std::vector<LogLockInfo> allLockResults;
+		ASSERT(lockResults.size() == 1);
+		allLockResults.push_back(lockResults[0]);
+		for (auto& old : oldLogData) {
+			ASSERT(old.tLogs.size() == 1);
+			LogLockInfo lockResult;
+			lockResult.epochEnd = old.epochEnd;
+			lockResult.logSet = old.tLogs[0];
+			for (int t = 0; t < old.tLogs[0]->logServers.size(); t++) {
+				lockResult.replies.push_back(TagPartitionedLogSystem::lockTLog(dbgid, old.tLogs[0]->logServers[t]));
+			}
+			allLockResults.push_back(lockResult);
+		}
+
+		state int lockNum = 0;
+		state Version maxRecoveryVersion = 0;
+		state int maxRecoveryIndex = 0;
+		while (lockNum < allLockResults.size()) {
+			auto versions = TagPartitionedLogSystem::getDurableVersion(dbgid, allLockResults[lockNum]);
+			if (versions.present()) {
+				if (versions.get().second > maxRecoveryVersion) {
+					TraceEvent("HigherRecoveryVersion", dbgid)
+					    .detail("Idx", lockNum)
+					    .detail("Ver", versions.get().second);
+					maxRecoveryVersion = versions.get().second;
+					maxRecoveryIndex = lockNum;
+				}
+				lockNum++;
+			} else {
+				wait(TagPartitionedLogSystem::getDurableVersionChanged(allLockResults[lockNum]));
+			}
+		}
+		if (maxRecoveryIndex > 0) {
+			logServers = oldLogData[maxRecoveryIndex - 1].tLogs;
+			prevState.txsTags = oldLogData[maxRecoveryIndex - 1].txsTags;
+			lockResults[0] = allLockResults[maxRecoveryIndex];
+			lockResults[0].isCurrent = true;
+
+			std::vector<Reference<AsyncVar<bool>>> failed;
+			for (auto& log : logServers[0]->logServers) {
+				failed.push_back(makeReference<AsyncVar<bool>>());
+				failureTrackers.push_back(TagPartitionedLogSystem::monitorLog(log, failed.back()));
+			}
+			ASSERT(logFailed.size() == 1);
+			logFailed[0] = failed;
+			oldLogData.erase(oldLogData.begin(), oldLogData.begin() + maxRecoveryIndex);
+		}
+	}
+
+	state Optional<Version> lastEnd;
+	state Version knownCommittedVersion = 0;
+	loop {
+		Version minEnd = std::numeric_limits<Version>::max();
+		Version maxEnd = 0;
+		std::vector<Future<Void>> changes;
+		for (int log = 0; log < logServers.size(); log++) {
+			if (!logServers[log]->isLocal) {
+				continue;
+			}
+			auto versions =
+			    TagPartitionedLogSystem::getDurableVersion(dbgid, lockResults[log], logFailed[log], lastEnd);
+			if (versions.present()) {
+				knownCommittedVersion = std::max(knownCommittedVersion, versions.get().first);
+				maxEnd = std::max(maxEnd, versions.get().second);
+				minEnd = std::min(minEnd, versions.get().second);
+			}
+			changes.push_back(TagPartitionedLogSystem::getDurableVersionChanged(lockResults[log], logFailed[log]));
+		}
+
+		if (maxEnd > 0 && (!lastEnd.present() || maxEnd < lastEnd.get())) {
+			TEST(lastEnd.present()); // Restarting recovery at an earlier point
+
+			auto logSystem = makeReference<TagPartitionedLogSystem>(dbgid, locality, prevState.recoveryCount);
+
+			lastEnd = minEnd;
+			logSystem->tLogs = logServers;
+			logSystem->logRouterTags = prevState.logRouterTags;
+			logSystem->txsTags = prevState.txsTags;
+			logSystem->oldLogData = oldLogData;
+			logSystem->logSystemType = prevState.logSystemType;
+			logSystem->rejoins = rejoins;
+			logSystem->lockResults = lockResults;
+			if (knownCommittedVersion > minEnd) {
+				knownCommittedVersion = minEnd;
+			}
+			logSystem->recoverAt = minEnd;
+			logSystem->knownCommittedVersion = knownCommittedVersion;
+			TraceEvent(SevDebug, "FinalRecoveryVersionInfo")
+			    .detail("KCV", knownCommittedVersion)
+			    .detail("MinEnd", minEnd);
+			logSystem->remoteLogsWrittenToCoreState = true;
+			logSystem->stopped = true;
+			logSystem->pseudoLocalities = prevState.pseudoLocalities;
+
+			outLogSystem->set(logSystem);
+		}
+
+		wait(waitForAny(changes));
+	}
+}
+
+ACTOR Future<Void> TagPartitionedLogSystem::recruitOldLogRouters(TagPartitionedLogSystem* self,
+                                                                 vector<WorkerInterface> workers,
+                                                                 LogEpoch recoveryCount,
+                                                                 int8_t locality,
+                                                                 Version startVersion,
+                                                                 std::vector<LocalityData> tLogLocalities,
+                                                                 Reference<IReplicationPolicy> tLogPolicy,
+                                                                 bool forRemote) {
+	state vector<vector<Future<TLogInterface>>> logRouterInitializationReplies;
+	state vector<Future<TLogInterface>> allReplies;
+	int nextRouter = 0;
+	state Version lastStart = std::numeric_limits<Version>::max();
+
+	if (!forRemote) {
+		Version maxStart = TagPartitionedLogSystem::getMaxLocalStartVersion(self->tLogs);
+
+		lastStart = std::max(startVersion, maxStart);
+		if (self->logRouterTags == 0) {
+			ASSERT_WE_THINK(false);
+			self->logSystemConfigChanged.trigger();
+			return Void();
+		}
+
+		bool found = false;
+		for (auto& tLogs : self->tLogs) {
+			if (tLogs->locality == locality) {
+				found = true;
+			}
+
+			tLogs->logRouters.clear();
+		}
+
+		if (!found) {
+			TraceEvent("RecruitingOldLogRoutersAddingLocality")
+			    .detail("Locality", locality)
+			    .detail("LastStart", lastStart);
+			auto newLogSet = makeReference<LogSet>();
+			newLogSet->locality = locality;
+			newLogSet->startVersion = lastStart;
+			newLogSet->isLocal = false;
+			self->tLogs.push_back(newLogSet);
+		}
+
+		for (auto& tLogs : self->tLogs) {
+			// Recruit log routers for old generations of the primary locality
+			if (tLogs->locality == locality) {
+				logRouterInitializationReplies.emplace_back();
+				for (int i = 0; i < self->logRouterTags; i++) {
+					InitializeLogRouterRequest req;
+					req.recoveryCount = recoveryCount;
+					req.routerTag = Tag(tagLocalityLogRouter, i);
+					req.startVersion = lastStart;
+					req.tLogLocalities = tLogLocalities;
+					req.tLogPolicy = tLogPolicy;
+					req.locality = locality;
+					auto reply = transformErrors(
+					    throwErrorOr(workers[nextRouter].logRouter.getReplyUnlessFailedFor(
+					        req, SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
+					    master_recovery_failed());
+					logRouterInitializationReplies.back().push_back(reply);
+					allReplies.push_back(reply);
+					nextRouter = (nextRouter + 1) % workers.size();
+				}
+			}
+		}
+	}
+
+	for (auto& old : self->oldLogData) {
+		Version maxStart = TagPartitionedLogSystem::getMaxLocalStartVersion(old.tLogs);
+
+		if (old.logRouterTags == 0 || maxStart >= lastStart) {
+			break;
+		}
+		lastStart = std::max(startVersion, maxStart);
+		bool found = false;
+		for (auto& tLogs : old.tLogs) {
+			if (tLogs->locality == locality) {
+				found = true;
+			}
+			tLogs->logRouters.clear();
+		}
+
+		if (!found) {
+			TraceEvent("RecruitingOldLogRoutersAddingLocality")
+			    .detail("Locality", locality)
+			    .detail("LastStart", lastStart);
+			auto newLogSet = makeReference<LogSet>();
+			newLogSet->locality = locality;
+			newLogSet->startVersion = lastStart;
+			old.tLogs.push_back(newLogSet);
+		}
+
+		for (auto& tLogs : old.tLogs) {
+			// Recruit log routers for old generations of the primary locality
+			if (tLogs->locality == locality) {
+				logRouterInitializationReplies.emplace_back();
+				for (int i = 0; i < old.logRouterTags; i++) {
+					InitializeLogRouterRequest req;
+					req.recoveryCount = recoveryCount;
+					req.routerTag = Tag(tagLocalityLogRouter, i);
+					req.startVersion = lastStart;
+					req.tLogLocalities = tLogLocalities;
+					req.tLogPolicy = tLogPolicy;
+					req.locality = locality;
+					auto reply = transformErrors(
+					    throwErrorOr(workers[nextRouter].logRouter.getReplyUnlessFailedFor(
+					        req, SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
+					    master_recovery_failed());
+					logRouterInitializationReplies.back().push_back(reply);
+					allReplies.push_back(reply);
+					nextRouter = (nextRouter + 1) % workers.size();
+				}
+			}
+		}
+	}
+
+	wait(waitForAll(allReplies));
+
+	int nextReplies = 0;
+	lastStart = std::numeric_limits<Version>::max();
+	vector<Future<Void>> failed;
+
+	if (!forRemote) {
+		Version maxStart = TagPartitionedLogSystem::getMaxLocalStartVersion(self->tLogs);
+
+		lastStart = std::max(startVersion, maxStart);
+		for (auto& tLogs : self->tLogs) {
+			if (tLogs->locality == locality) {
+				for (int i = 0; i < logRouterInitializationReplies[nextReplies].size(); i++) {
+					tLogs->logRouters.push_back(makeReference<AsyncVar<OptionalInterface<TLogInterface>>>(
+					    OptionalInterface<TLogInterface>(logRouterInitializationReplies[nextReplies][i].get())));
+					failed.push_back(
+					    waitFailureClient(logRouterInitializationReplies[nextReplies][i].get().waitFailure,
+					                      SERVER_KNOBS->TLOG_TIMEOUT,
+					                      -SERVER_KNOBS->TLOG_TIMEOUT / SERVER_KNOBS->SECONDS_BEFORE_NO_FAILURE_DELAY,
+					                      /*trace=*/true));
+				}
+				nextReplies++;
+			}
+		}
+	}
+
+	for (auto& old : self->oldLogData) {
+		Version maxStart = TagPartitionedLogSystem::getMaxLocalStartVersion(old.tLogs);
+		if (old.logRouterTags == 0 || maxStart >= lastStart) {
+			break;
+		}
+		lastStart = std::max(startVersion, maxStart);
+		for (auto& tLogs : old.tLogs) {
+			if (tLogs->locality == locality) {
+				for (int i = 0; i < logRouterInitializationReplies[nextReplies].size(); i++) {
+					tLogs->logRouters.push_back(makeReference<AsyncVar<OptionalInterface<TLogInterface>>>(
+					    OptionalInterface<TLogInterface>(logRouterInitializationReplies[nextReplies][i].get())));
+					if (!forRemote) {
+						failed.push_back(waitFailureClient(
+						    logRouterInitializationReplies[nextReplies][i].get().waitFailure,
+						    SERVER_KNOBS->TLOG_TIMEOUT,
+						    -SERVER_KNOBS->TLOG_TIMEOUT / SERVER_KNOBS->SECONDS_BEFORE_NO_FAILURE_DELAY,
+						    /*trace=*/true));
+					}
+				}
+				nextReplies++;
+			}
+		}
+	}
+
+	if (!forRemote) {
+		self->logSystemConfigChanged.trigger();
+		wait(failed.size() ? tagError<Void>(quorum(failed, 1), master_tlog_failed()) : Future<Void>(Never()));
+		throw internal_error();
+	}
+	return Void();
+}
+
+Version TagPartitionedLogSystem::getMaxLocalStartVersion(const std::vector<Reference<LogSet>>& tLogs) {
+	Version maxStart = 0;
+	for (const auto& logSet : tLogs) {
+		if (logSet->isLocal) {
+			maxStart = std::max(maxStart, logSet->startVersion);
+		}
+	}
+	return maxStart;
+}
+
+std::vector<Tag> TagPartitionedLogSystem::getLocalTags(int8_t locality, const std::vector<Tag>& allTags) {
+	std::vector<Tag> localTags;
+	for (const auto& tag : allTags) {
+		if (locality == tagLocalitySpecial || locality == tag.locality || tag.locality < 0) {
+			localTags.push_back(tag);
+		}
+	}
+	return localTags;
+}
+
+ACTOR Future<Void> TagPartitionedLogSystem::newRemoteEpoch(TagPartitionedLogSystem* self,
+                                                           Reference<TagPartitionedLogSystem> oldLogSystem,
+                                                           Future<RecruitRemoteFromConfigurationReply> fRemoteWorkers,
+                                                           DatabaseConfiguration configuration,
+                                                           LogEpoch recoveryCount,
+                                                           int8_t remoteLocality,
+                                                           std::vector<Tag> allTags) {
+	TraceEvent("RemoteLogRecruitment_WaitingForWorkers").log();
+	state RecruitRemoteFromConfigurationReply remoteWorkers = wait(fRemoteWorkers);
+
+	state Reference<LogSet> logSet(new LogSet());
+	logSet->tLogReplicationFactor = configuration.getRemoteTLogReplicationFactor();
+	logSet->tLogVersion = configuration.tLogVersion;
+	logSet->tLogPolicy = configuration.getRemoteTLogPolicy();
+	logSet->isLocal = false;
+	logSet->locality = remoteLocality;
+
+	logSet->startVersion = oldLogSystem->knownCommittedVersion + 1;
+	state int lockNum = 0;
+	while (lockNum < oldLogSystem->lockResults.size()) {
+		if (oldLogSystem->lockResults[lockNum].logSet->locality == remoteLocality) {
+			loop {
+				auto versions =
+				    TagPartitionedLogSystem::getDurableVersion(self->dbgid, oldLogSystem->lockResults[lockNum]);
+				if (versions.present()) {
+					logSet->startVersion =
+					    std::min(std::min(versions.get().first + 1, oldLogSystem->lockResults[lockNum].epochEnd),
+					             logSet->startVersion);
+					break;
+				}
+				wait(TagPartitionedLogSystem::getDurableVersionChanged(oldLogSystem->lockResults[lockNum]));
+			}
+			break;
+		}
+		lockNum++;
+	}
+
+	vector<LocalityData> localities;
+	localities.resize(remoteWorkers.remoteTLogs.size());
+	for (int i = 0; i < remoteWorkers.remoteTLogs.size(); i++) {
+		localities[i] = remoteWorkers.remoteTLogs[i].locality;
+	}
+
+	state Future<Void> oldRouterRecruitment = Void();
+	if (logSet->startVersion < oldLogSystem->knownCommittedVersion + 1) {
+		ASSERT(oldLogSystem->logRouterTags > 0);
+		oldRouterRecruitment = TagPartitionedLogSystem::recruitOldLogRouters(self,
+		                                                                     remoteWorkers.logRouters,
+		                                                                     recoveryCount,
+		                                                                     remoteLocality,
+		                                                                     logSet->startVersion,
+		                                                                     localities,
+		                                                                     logSet->tLogPolicy,
+		                                                                     true);
+	}
+
+	state vector<Future<TLogInterface>> logRouterInitializationReplies;
+	const Version startVersion = oldLogSystem->logRouterTags == 0
+	                                 ? oldLogSystem->recoverAt.get() + 1
+	                                 : std::max(self->tLogs[0]->startVersion, logSet->startVersion);
+	for (int i = 0; i < self->logRouterTags; i++) {
+		InitializeLogRouterRequest req;
+		req.recoveryCount = recoveryCount;
+		req.routerTag = Tag(tagLocalityLogRouter, i);
+		req.startVersion = startVersion;
+		req.tLogLocalities = localities;
+		req.tLogPolicy = logSet->tLogPolicy;
+		req.locality = remoteLocality;
+		logRouterInitializationReplies.push_back(transformErrors(
+		    throwErrorOr(
+		        remoteWorkers.logRouters[i % remoteWorkers.logRouters.size()].logRouter.getReplyUnlessFailedFor(
+		            req, SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
+		    master_recovery_failed()));
+	}
+
+	std::vector<Tag> localTags = TagPartitionedLogSystem::getLocalTags(remoteLocality, allTags);
+	LogSystemConfig oldLogSystemConfig = oldLogSystem->getLogSystemConfig();
+
+	logSet->tLogLocalities.resize(remoteWorkers.remoteTLogs.size());
+	logSet->logServers.resize(
+	    remoteWorkers.remoteTLogs
+	        .size()); // Dummy interfaces, so that logSystem->getPushLocations() below uses the correct size
+	logSet->updateLocalitySet(localities);
+
+	state vector<Future<TLogInterface>> remoteTLogInitializationReplies;
+	vector<InitializeTLogRequest> remoteTLogReqs(remoteWorkers.remoteTLogs.size());
+
+	bool nonShardedTxs = self->getTLogVersion() < TLogVersion::V4;
+	if (oldLogSystem->logRouterTags == 0) {
+		std::vector<int> locations;
+		for (Tag tag : localTags) {
+			locations.clear();
+			logSet->getPushLocations(VectorRef<Tag>(&tag, 1), locations, 0);
+			for (int loc : locations)
+				remoteTLogReqs[loc].recoverTags.push_back(tag);
+		}
+
+		if (oldLogSystem->tLogs.size()) {
+			int maxTxsTags = oldLogSystem->txsTags;
+			bool needsOldTxs = oldLogSystem->tLogs[0]->tLogVersion < TLogVersion::V4;
+			for (auto& it : oldLogSystem->oldLogData) {
+				maxTxsTags = std::max<int>(maxTxsTags, it.txsTags);
+				needsOldTxs = needsOldTxs || it.tLogs[0]->tLogVersion < TLogVersion::V4;
+			}
+			for (int i = needsOldTxs ? -1 : 0; i < maxTxsTags; i++) {
+				Tag tag = i == -1 ? txsTag : Tag(tagLocalityTxs, i);
+				Tag pushTag = (i == -1 || nonShardedTxs) ? txsTag : Tag(tagLocalityTxs, i % self->txsTags);
+				locations.clear();
+				logSet->getPushLocations(VectorRef<Tag>(&pushTag, 1), locations, 0);
+				for (int loc : locations)
+					remoteTLogReqs[loc].recoverTags.push_back(tag);
+			}
+		}
+	}
+
+	if (oldLogSystem->tLogs.size()) {
+		if (nonShardedTxs) {
+			localTags.push_back(txsTag);
+		} else {
+			for (int i = 0; i < self->txsTags; i++) {
+				localTags.push_back(Tag(tagLocalityTxs, i));
+			}
+		}
+	}
+
+	for (int i = 0; i < remoteWorkers.remoteTLogs.size(); i++) {
+		InitializeTLogRequest& req = remoteTLogReqs[i];
+		req.recruitmentID = self->recruitmentID;
+		req.logVersion = configuration.tLogVersion;
+		req.storeType = configuration.tLogDataStoreType;
+		req.spillType = configuration.tLogSpillType;
+		req.recoverFrom = oldLogSystemConfig;
+		req.recoverAt = oldLogSystem->recoverAt.get();
+		req.knownCommittedVersion = oldLogSystem->knownCommittedVersion;
+		req.epoch = recoveryCount;
+		req.remoteTag = Tag(tagLocalityRemoteLog, i);
+		req.locality = remoteLocality;
+		req.isPrimary = false;
+		req.allTags = localTags;
+		req.startVersion = logSet->startVersion;
+		req.logRouterTags = 0;
+		req.txsTags = self->txsTags;
+	}
+
+	remoteTLogInitializationReplies.reserve(remoteWorkers.remoteTLogs.size());
+	for (int i = 0; i < remoteWorkers.remoteTLogs.size(); i++)
+		remoteTLogInitializationReplies.push_back(transformErrors(
+		    throwErrorOr(remoteWorkers.remoteTLogs[i].tLog.getReplyUnlessFailedFor(
+		        remoteTLogReqs[i], SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
+		    master_recovery_failed()));
+
+	TraceEvent("RemoteLogRecruitment_InitializingRemoteLogs")
+	    .detail("StartVersion", logSet->startVersion)
+	    .detail("LocalStart", self->tLogs[0]->startVersion)
+	    .detail("LogRouterTags", self->logRouterTags);
+	wait(waitForAll(remoteTLogInitializationReplies) && waitForAll(logRouterInitializationReplies) &&
+	     oldRouterRecruitment);
+
+	for (int i = 0; i < logRouterInitializationReplies.size(); i++) {
+		logSet->logRouters.push_back(makeReference<AsyncVar<OptionalInterface<TLogInterface>>>(
+		    OptionalInterface<TLogInterface>(logRouterInitializationReplies[i].get())));
+	}
+
+	for (int i = 0; i < remoteTLogInitializationReplies.size(); i++) {
+		logSet->logServers[i] = makeReference<AsyncVar<OptionalInterface<TLogInterface>>>(
+		    OptionalInterface<TLogInterface>(remoteTLogInitializationReplies[i].get()));
+		logSet->tLogLocalities[i] = remoteWorkers.remoteTLogs[i].locality;
+	}
+	filterLocalityDataForPolicy(logSet->tLogPolicy, &logSet->tLogLocalities);
+
+	std::vector<Future<Void>> recoveryComplete;
+	recoveryComplete.reserve(logSet->logServers.size());
+	for (int i = 0; i < logSet->logServers.size(); i++)
+		recoveryComplete.push_back(
+		    transformErrors(throwErrorOr(logSet->logServers[i]->get().interf().recoveryFinished.getReplyUnlessFailedFor(
+		                        TLogRecoveryFinishedRequest(),
+		                        SERVER_KNOBS->TLOG_TIMEOUT,
+		                        SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
+		                    master_recovery_failed()));
+
+	self->remoteRecoveryComplete = waitForAll(recoveryComplete);
+	self->tLogs.push_back(logSet);
+	TraceEvent("RemoteLogRecruitment_CompletingRecovery").log();
+	return Void();
+}
+
+ACTOR Future<Reference<ILogSystem>> TagPartitionedLogSystem::newEpoch(
+    Reference<TagPartitionedLogSystem> oldLogSystem,
+    RecruitFromConfigurationReply recr,
+    Future<RecruitRemoteFromConfigurationReply> fRemoteWorkers,
+    DatabaseConfiguration configuration,
+    LogEpoch recoveryCount,
+    int8_t primaryLocality,
+    int8_t remoteLocality,
+    std::vector<Tag> allTags,
+    Reference<AsyncVar<bool>> recruitmentStalled) {
+	state double startTime = now();
+	state Reference<TagPartitionedLogSystem> logSystem(
+	    new TagPartitionedLogSystem(oldLogSystem->getDebugID(), oldLogSystem->locality, recoveryCount));
+	logSystem->logSystemType = LogSystemType::tagPartitioned;
+	logSystem->expectedLogSets = 1;
+	logSystem->recoveredAt = oldLogSystem->recoverAt;
+	logSystem->repopulateRegionAntiQuorum = configuration.repopulateRegionAntiQuorum;
+	logSystem->recruitmentID = deterministicRandom()->randomUniqueID();
+	logSystem->txsTags = configuration.tLogVersion >= TLogVersion::V4 ? recr.tLogs.size() : 0;
+	oldLogSystem->recruitmentID = logSystem->recruitmentID;
+
+	if (configuration.usableRegions > 1) {
+		logSystem->logRouterTags =
+		    recr.tLogs.size() *
+		    std::max<int>(1, configuration.desiredLogRouterCount / std::max<int>(1, recr.tLogs.size()));
+		logSystem->expectedLogSets++;
+		logSystem->addPseudoLocality(tagLocalityLogRouterMapped);
+		TraceEvent e("AddPseudoLocality", logSystem->getDebugID());
+		e.detail("Locality1", "LogRouterMapped");
+		if (configuration.backupWorkerEnabled) {
+			logSystem->addPseudoLocality(tagLocalityBackup);
+			e.detail("Locality2", "Backup");
+		}
+	} else if (configuration.backupWorkerEnabled) {
+		// Single region uses log router tag for backup workers.
+		logSystem->logRouterTags =
+		    recr.tLogs.size() *
+		    std::max<int>(1, configuration.desiredLogRouterCount / std::max<int>(1, recr.tLogs.size()));
+		logSystem->addPseudoLocality(tagLocalityBackup);
+		TraceEvent("AddPseudoLocality", logSystem->getDebugID()).detail("Locality", "Backup");
+	}
+
+	logSystem->tLogs.push_back(makeReference<LogSet>());
+	logSystem->tLogs[0]->tLogVersion = configuration.tLogVersion;
+	logSystem->tLogs[0]->tLogWriteAntiQuorum = configuration.tLogWriteAntiQuorum;
+	logSystem->tLogs[0]->tLogReplicationFactor = configuration.tLogReplicationFactor;
+	logSystem->tLogs[0]->tLogPolicy = configuration.tLogPolicy;
+	logSystem->tLogs[0]->isLocal = true;
+	logSystem->tLogs[0]->locality = primaryLocality;
+
+	state RegionInfo region = configuration.getRegion(recr.dcId);
+
+	state int maxTxsTags = oldLogSystem->txsTags;
+	state bool needsOldTxs = oldLogSystem->tLogs.size() && oldLogSystem->getTLogVersion() < TLogVersion::V4;
+	for (auto& it : oldLogSystem->oldLogData) {
+		maxTxsTags = std::max<int>(maxTxsTags, it.txsTags);
+		needsOldTxs = needsOldTxs || it.tLogs[0]->tLogVersion < TLogVersion::V4;
+	}
+
+	if (region.satelliteTLogReplicationFactor > 0 && configuration.usableRegions > 1) {
+		logSystem->tLogs.push_back(makeReference<LogSet>());
+		if (recr.satelliteFallback) {
+			logSystem->tLogs[1]->tLogWriteAntiQuorum = region.satelliteTLogWriteAntiQuorumFallback;
+			logSystem->tLogs[1]->tLogReplicationFactor = region.satelliteTLogReplicationFactorFallback;
+			logSystem->tLogs[1]->tLogPolicy = region.satelliteTLogPolicyFallback;
+		} else {
+			logSystem->tLogs[1]->tLogWriteAntiQuorum = region.satelliteTLogWriteAntiQuorum;
+			logSystem->tLogs[1]->tLogReplicationFactor = region.satelliteTLogReplicationFactor;
+			logSystem->tLogs[1]->tLogPolicy = region.satelliteTLogPolicy;
+		}
+		logSystem->tLogs[1]->isLocal = true;
+		logSystem->tLogs[1]->locality = tagLocalitySatellite;
+		logSystem->tLogs[1]->tLogVersion = configuration.tLogVersion;
+		logSystem->tLogs[1]->startVersion = oldLogSystem->knownCommittedVersion + 1;
+
+		logSystem->tLogs[1]->tLogLocalities.resize(recr.satelliteTLogs.size());
+		for (int i = 0; i < recr.satelliteTLogs.size(); i++) {
+			logSystem->tLogs[1]->tLogLocalities[i] = recr.satelliteTLogs[i].locality;
+		}
+		filterLocalityDataForPolicy(logSystem->tLogs[1]->tLogPolicy, &logSystem->tLogs[1]->tLogLocalities);
+
+		logSystem->tLogs[1]->logServers.resize(
+		    recr.satelliteTLogs
+		        .size()); // Dummy interfaces, so that logSystem->getPushLocations() below uses the correct size
+		logSystem->tLogs[1]->updateLocalitySet(logSystem->tLogs[1]->tLogLocalities);
+		logSystem->tLogs[1]->populateSatelliteTagLocations(
+		    logSystem->logRouterTags, oldLogSystem->logRouterTags, logSystem->txsTags, maxTxsTags);
+		logSystem->expectedLogSets++;
+	}
+
+	if (oldLogSystem->tLogs.size()) {
+		logSystem->oldLogData.emplace_back();
+		logSystem->oldLogData[0].tLogs = oldLogSystem->tLogs;
+		logSystem->oldLogData[0].epochBegin = oldLogSystem->tLogs[0]->startVersion;
+		logSystem->oldLogData[0].epochEnd = oldLogSystem->knownCommittedVersion + 1;
+		logSystem->oldLogData[0].logRouterTags = oldLogSystem->logRouterTags;
+		logSystem->oldLogData[0].txsTags = oldLogSystem->txsTags;
+		logSystem->oldLogData[0].pseudoLocalities = oldLogSystem->pseudoLocalities;
+		logSystem->oldLogData[0].epoch = oldLogSystem->epoch;
+	}
+	logSystem->oldLogData.insert(
+	    logSystem->oldLogData.end(), oldLogSystem->oldLogData.begin(), oldLogSystem->oldLogData.end());
+
+	logSystem->tLogs[0]->startVersion = oldLogSystem->knownCommittedVersion + 1;
+	logSystem->backupStartVersion = oldLogSystem->knownCommittedVersion + 1;
+	state int lockNum = 0;
+	while (lockNum < oldLogSystem->lockResults.size()) {
+		if (oldLogSystem->lockResults[lockNum].logSet->locality == primaryLocality) {
+			if (oldLogSystem->lockResults[lockNum].isCurrent && oldLogSystem->lockResults[lockNum].logSet->isLocal) {
+				break;
+			}
+			state Future<Void> stalledAfter = setAfter(recruitmentStalled, SERVER_KNOBS->MAX_RECOVERY_TIME, true);
+			loop {
+				auto versions =
+				    TagPartitionedLogSystem::getDurableVersion(logSystem->dbgid, oldLogSystem->lockResults[lockNum]);
+				if (versions.present()) {
+					logSystem->tLogs[0]->startVersion =
+					    std::min(std::min(versions.get().first + 1, oldLogSystem->lockResults[lockNum].epochEnd),
+					             logSystem->tLogs[0]->startVersion);
+					break;
+				}
+				wait(TagPartitionedLogSystem::getDurableVersionChanged(oldLogSystem->lockResults[lockNum]));
+			}
+			stalledAfter.cancel();
+			break;
+		}
+		lockNum++;
+	}
+
+	vector<LocalityData> localities;
+	localities.resize(recr.tLogs.size());
+	for (int i = 0; i < recr.tLogs.size(); i++) {
+		localities[i] = recr.tLogs[i].locality;
+	}
+
+	state Future<Void> oldRouterRecruitment = Never();
+	TraceEvent("NewEpochStartVersion", oldLogSystem->getDebugID())
+	    .detail("StartVersion", logSystem->tLogs[0]->startVersion)
+	    .detail("EpochEnd", oldLogSystem->knownCommittedVersion + 1)
+	    .detail("Locality", primaryLocality)
+	    .detail("OldLogRouterTags", oldLogSystem->logRouterTags);
+	if (oldLogSystem->logRouterTags > 0 ||
+	    logSystem->tLogs[0]->startVersion < oldLogSystem->knownCommittedVersion + 1) {
+		oldRouterRecruitment = TagPartitionedLogSystem::recruitOldLogRouters(oldLogSystem.getPtr(),
+		                                                                     recr.oldLogRouters,
+		                                                                     recoveryCount,
+		                                                                     primaryLocality,
+		                                                                     logSystem->tLogs[0]->startVersion,
+		                                                                     localities,
+		                                                                     logSystem->tLogs[0]->tLogPolicy,
+		                                                                     false);
+		if (oldLogSystem->knownCommittedVersion - logSystem->tLogs[0]->startVersion >
+		    SERVER_KNOBS->MAX_RECOVERY_VERSIONS) {
+			// make sure we can recover in the other DC.
+			for (auto& lockResult : oldLogSystem->lockResults) {
+				if (lockResult.logSet->locality == remoteLocality) {
+					if (TagPartitionedLogSystem::getDurableVersion(logSystem->dbgid, lockResult).present()) {
+						recruitmentStalled->set(true);
+					}
+				}
+			}
+		}
+	} else {
+		oldLogSystem->logSystemConfigChanged.trigger();
+	}
+
+	std::vector<Tag> localTags = TagPartitionedLogSystem::getLocalTags(primaryLocality, allTags);
+	state LogSystemConfig oldLogSystemConfig = oldLogSystem->getLogSystemConfig();
+
+	state vector<Future<TLogInterface>> initializationReplies;
+	vector<InitializeTLogRequest> reqs(recr.tLogs.size());
+
+	logSystem->tLogs[0]->tLogLocalities.resize(recr.tLogs.size());
+	logSystem->tLogs[0]->logServers.resize(
+	    recr.tLogs.size()); // Dummy interfaces, so that logSystem->getPushLocations() below uses the correct size
+	logSystem->tLogs[0]->updateLocalitySet(localities);
+
+	std::vector<int> locations;
+	for (Tag tag : localTags) {
+		locations.clear();
+		logSystem->tLogs[0]->getPushLocations(VectorRef<Tag>(&tag, 1), locations, 0);
+		for (int loc : locations)
+			reqs[loc].recoverTags.push_back(tag);
+	}
+	for (int i = 0; i < oldLogSystem->logRouterTags; i++) {
+		Tag tag = Tag(tagLocalityLogRouter, i);
+		reqs[logSystem->tLogs[0]->bestLocationFor(tag)].recoverTags.push_back(tag);
+	}
+	bool nonShardedTxs = logSystem->getTLogVersion() < TLogVersion::V4;
+	if (oldLogSystem->tLogs.size()) {
+		for (int i = needsOldTxs ? -1 : 0; i < maxTxsTags; i++) {
+			Tag tag = i == -1 ? txsTag : Tag(tagLocalityTxs, i);
+			Tag pushTag = (i == -1 || nonShardedTxs) ? txsTag : Tag(tagLocalityTxs, i % logSystem->txsTags);
+			locations.clear();
+			logSystem->tLogs[0]->getPushLocations(VectorRef<Tag>(&pushTag, 1), locations, 0);
+			for (int loc : locations)
+				reqs[loc].recoverTags.push_back(tag);
+		}
+		if (nonShardedTxs) {
+			localTags.push_back(txsTag);
+		} else {
+			for (int i = 0; i < logSystem->txsTags; i++) {
+				localTags.push_back(Tag(tagLocalityTxs, i));
+			}
+		}
+	}
+
+	for (int i = 0; i < recr.tLogs.size(); i++) {
+		InitializeTLogRequest& req = reqs[i];
+		req.recruitmentID = logSystem->recruitmentID;
+		req.logVersion = configuration.tLogVersion;
+		req.storeType = configuration.tLogDataStoreType;
+		req.spillType = configuration.tLogSpillType;
+		req.recoverFrom = oldLogSystemConfig;
+		req.recoverAt = oldLogSystem->recoverAt.get();
+		req.knownCommittedVersion = oldLogSystem->knownCommittedVersion;
+		req.epoch = recoveryCount;
+		req.locality = primaryLocality;
+		req.remoteTag = Tag(tagLocalityRemoteLog, i);
+		req.isPrimary = true;
+		req.allTags = localTags;
+		req.startVersion = logSystem->tLogs[0]->startVersion;
+		req.logRouterTags = logSystem->logRouterTags;
+		req.txsTags = logSystem->txsTags;
+	}
+
+	initializationReplies.reserve(recr.tLogs.size());
+	for (int i = 0; i < recr.tLogs.size(); i++)
+		initializationReplies.push_back(transformErrors(
+		    throwErrorOr(recr.tLogs[i].tLog.getReplyUnlessFailedFor(
+		        reqs[i], SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
+		    master_recovery_failed()));
+
+	state std::vector<Future<Void>> recoveryComplete;
+
+	if (region.satelliteTLogReplicationFactor > 0 && configuration.usableRegions > 1) {
+		state vector<Future<TLogInterface>> satelliteInitializationReplies;
+		vector<InitializeTLogRequest> sreqs(recr.satelliteTLogs.size());
+		std::vector<Tag> satelliteTags;
+
+		if (logSystem->logRouterTags) {
+			for (int i = 0; i < oldLogSystem->logRouterTags; i++) {
+				Tag tag = Tag(tagLocalityLogRouter, i);
+				// Satellite logs will index a mutation with tagLocalityLogRouter with an id greater than
+				// the number of log routers as having an id mod the number of log routers.  We thus need
+				// to make sure that if we're going from more log routers in the previous generation to
+				// less log routers in the newer one, that we map the log router tags onto satellites that
+				// are the preferred location for id%logRouterTags.
+				Tag pushLocation = Tag(tagLocalityLogRouter, i % logSystem->logRouterTags);
+				locations.clear();
+				logSystem->tLogs[1]->getPushLocations(VectorRef<Tag>(&pushLocation, 1), locations, 0);
+				for (int loc : locations)
+					sreqs[loc].recoverTags.push_back(tag);
+			}
+		}
+		if (oldLogSystem->tLogs.size()) {
+			for (int i = needsOldTxs ? -1 : 0; i < maxTxsTags; i++) {
+				Tag tag = i == -1 ? txsTag : Tag(tagLocalityTxs, i);
+				Tag pushTag = (i == -1 || nonShardedTxs) ? txsTag : Tag(tagLocalityTxs, i % logSystem->txsTags);
+				locations.clear();
+				logSystem->tLogs[1]->getPushLocations(VectorRef<Tag>(&pushTag, 1), locations, 0);
+				for (int loc : locations)
+					sreqs[loc].recoverTags.push_back(tag);
+			}
+			if (nonShardedTxs) {
+				satelliteTags.push_back(txsTag);
+			} else {
+				for (int i = 0; i < logSystem->txsTags; i++) {
+					satelliteTags.push_back(Tag(tagLocalityTxs, i));
+				}
+			}
+		}
+
+		for (int i = 0; i < recr.satelliteTLogs.size(); i++) {
+			InitializeTLogRequest& req = sreqs[i];
+			req.recruitmentID = logSystem->recruitmentID;
+			req.logVersion = configuration.tLogVersion;
+			req.storeType = configuration.tLogDataStoreType;
+			req.spillType = configuration.tLogSpillType;
+			req.recoverFrom = oldLogSystemConfig;
+			req.recoverAt = oldLogSystem->recoverAt.get();
+			req.knownCommittedVersion = oldLogSystem->knownCommittedVersion;
+			req.epoch = recoveryCount;
+			req.locality = tagLocalitySatellite;
+			req.remoteTag = Tag();
+			req.isPrimary = true;
+			req.allTags = satelliteTags;
+			req.startVersion = oldLogSystem->knownCommittedVersion + 1;
+			req.logRouterTags = logSystem->logRouterTags;
+			req.txsTags = logSystem->txsTags;
+		}
+
+		satelliteInitializationReplies.reserve(recr.satelliteTLogs.size());
+		for (int i = 0; i < recr.satelliteTLogs.size(); i++)
+			satelliteInitializationReplies.push_back(transformErrors(
+			    throwErrorOr(recr.satelliteTLogs[i].tLog.getReplyUnlessFailedFor(
+			        sreqs[i], SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
+			    master_recovery_failed()));
+
+		wait(waitForAll(satelliteInitializationReplies) || oldRouterRecruitment);
+
+		for (int i = 0; i < satelliteInitializationReplies.size(); i++) {
+			logSystem->tLogs[1]->logServers[i] = makeReference<AsyncVar<OptionalInterface<TLogInterface>>>(
+			    OptionalInterface<TLogInterface>(satelliteInitializationReplies[i].get()));
+		}
+
+		for (int i = 0; i < logSystem->tLogs[1]->logServers.size(); i++)
+			recoveryComplete.push_back(transformErrors(
+			    throwErrorOr(
+			        logSystem->tLogs[1]->logServers[i]->get().interf().recoveryFinished.getReplyUnlessFailedFor(
+			            TLogRecoveryFinishedRequest(),
+			            SERVER_KNOBS->TLOG_TIMEOUT,
+			            SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
+			    master_recovery_failed()));
+	}
+
+	wait(waitForAll(initializationReplies) || oldRouterRecruitment);
+
+	for (int i = 0; i < initializationReplies.size(); i++) {
+		logSystem->tLogs[0]->logServers[i] = makeReference<AsyncVar<OptionalInterface<TLogInterface>>>(
+		    OptionalInterface<TLogInterface>(initializationReplies[i].get()));
+		logSystem->tLogs[0]->tLogLocalities[i] = recr.tLogs[i].locality;
+	}
+	filterLocalityDataForPolicy(logSystem->tLogs[0]->tLogPolicy, &logSystem->tLogs[0]->tLogLocalities);
+
+	// Don't force failure of recovery if it took us a long time to recover. This avoids multiple long running
+	// recoveries causing tests to timeout
+	if (BUGGIFY && now() - startTime < 300 && g_network->isSimulated() && g_simulator.speedUpSimulation)
+		throw master_recovery_failed();
+
+	for (int i = 0; i < logSystem->tLogs[0]->logServers.size(); i++)
+		recoveryComplete.push_back(transformErrors(
+		    throwErrorOr(logSystem->tLogs[0]->logServers[i]->get().interf().recoveryFinished.getReplyUnlessFailedFor(
+		        TLogRecoveryFinishedRequest(),
+		        SERVER_KNOBS->TLOG_TIMEOUT,
+		        SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
+		    master_recovery_failed()));
+	logSystem->recoveryComplete = waitForAll(recoveryComplete);
+
+	if (configuration.usableRegions > 1) {
+		logSystem->hasRemoteServers = true;
+		logSystem->remoteRecovery = TagPartitionedLogSystem::newRemoteEpoch(
+		    logSystem.getPtr(), oldLogSystem, fRemoteWorkers, configuration, recoveryCount, remoteLocality, allTags);
+		if (oldLogSystem->tLogs.size() > 0 && oldLogSystem->tLogs[0]->locality == tagLocalitySpecial) {
+			// The wait is required so that we know both primary logs and remote logs have copied the data between
+			// the known committed version and the recovery version.
+			// FIXME: we can remove this wait once we are able to have log routers which can ship data to the remote
+			// logs without using log router tags.
+			wait(logSystem->remoteRecovery);
+		}
+	} else {
+		logSystem->hasRemoteServers = false;
+		logSystem->remoteRecovery = logSystem->recoveryComplete;
+		logSystem->remoteRecoveryComplete = logSystem->recoveryComplete;
+	}
+
+	return logSystem;
+}
+
+ACTOR Future<Void> TagPartitionedLogSystem::trackRejoins(
+    UID dbgid,
+    std::vector<std::pair<Reference<AsyncVar<OptionalInterface<TLogInterface>>>, Reference<IReplicationPolicy>>>
+        logServers,
+    FutureStream<struct TLogRejoinRequest> rejoinRequests) {
+	state std::map<UID, ReplyPromise<TLogRejoinReply>> lastReply;
+	state std::set<UID> logsWaiting;
+	state double startTime = now();
+	state Future<Void> warnTimeout = delay(SERVER_KNOBS->TLOG_SLOW_REJOIN_WARN_TIMEOUT_SECS);
+
+	for (const auto& log : logServers) {
+		logsWaiting.insert(log.first->get().id());
+	}
+
+	try {
+		loop choose {
+			when(TLogRejoinRequest req = waitNext(rejoinRequests)) {
+				int pos = -1;
+				for (int i = 0; i < logServers.size(); i++) {
+					if (logServers[i].first->get().id() == req.myInterface.id()) {
+						pos = i;
+						logsWaiting.erase(logServers[i].first->get().id());
+						break;
+					}
+				}
+				if (pos != -1) {
+					TraceEvent("TLogJoinedMe", dbgid)
+					    .detail("TLog", req.myInterface.id())
+					    .detail("Address", req.myInterface.commit.getEndpoint().getPrimaryAddress().toString());
+					if (!logServers[pos].first->get().present() ||
+					    req.myInterface.commit.getEndpoint() !=
+					        logServers[pos].first->get().interf().commit.getEndpoint()) {
+						TLogInterface interf = req.myInterface;
+						filterLocalityDataForPolicyDcAndProcess(logServers[pos].second, &interf.filteredLocality);
+						logServers[pos].first->setUnconditional(OptionalInterface<TLogInterface>(interf));
+					}
+					lastReply[req.myInterface.id()].send(TLogRejoinReply{ false });
+					lastReply[req.myInterface.id()] = req.reply;
+				} else {
+					TraceEvent("TLogJoinedMeUnknown", dbgid)
+					    .detail("TLog", req.myInterface.id())
+					    .detail("Address", req.myInterface.commit.getEndpoint().getPrimaryAddress().toString());
+					req.reply.send(true);
+				}
+			}
+			when(wait(warnTimeout)) {
+				for (const auto& logId : logsWaiting) {
+					TraceEvent(SevWarnAlways, "TLogRejoinSlow", dbgid)
+					    .detail("Elapsed", startTime - now())
+					    .detail("LogId", logId);
+				}
+				warnTimeout = Never();
+			}
+		}
+	} catch (...) {
+		for (auto it = lastReply.begin(); it != lastReply.end(); ++it)
+			it->second.send(TLogRejoinReply{ true });
+		throw;
+	}
+}
+
+ACTOR Future<TLogLockResult> TagPartitionedLogSystem::lockTLog(
+    UID myID,
+    Reference<AsyncVar<OptionalInterface<TLogInterface>>> tlog) {
+
+	TraceEvent("TLogLockStarted", myID).detail("TLog", tlog->get().id());
+	loop {
+		choose {
+			when(TLogLockResult data = wait(
+			         tlog->get().present() ? brokenPromiseToNever(tlog->get().interf().lock.getReply<TLogLockResult>())
+			                               : Never())) {
+				TraceEvent("TLogLocked", myID).detail("TLog", tlog->get().id()).detail("End", data.end);
+				return data;
+			}
+			when(wait(tlog->onChange())) {}
+		}
+	}
+}
+
+template <class T>
+vector<T> TagPartitionedLogSystem::getReadyNonError(vector<Future<T>> const& futures) {
+	// Return the values of those futures which have (non-error) values ready
+	std::vector<T> result;
+	for (auto& f : futures)
+		if (f.isReady() && !f.isError())
+			result.push_back(f.get());
+	return result;
+}
diff --git a/fdbserver/TagPartitionedLogSystem.actor.h b/fdbserver/TagPartitionedLogSystem.actor.h
new file mode 100644
index 0000000000..0115393197
--- /dev/null
+++ b/fdbserver/TagPartitionedLogSystem.actor.h
@@ -0,0 +1,554 @@
+/*
+ * TagPartitionedLogSystem.actor.h
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_TAGPARTITIONEDLOGSYSTEM_ACTOR_G_H)
+#define FDBSERVER_TAGPARTITIONEDLOGSYSTEM_ACTOR_G_H
+#include "fdbclient/NativeAPI.actor.g.h"
+#elif !defined(FDBSERVER_TAGPARTITIONEDLOGSYSTEM_ACTOR_H)
+#define FDBSERVER_TAGPARTITIONEDLOGSYSTEM_ACTOR_H
+
+#pragma once
+
+#include "fdbclient/SystemData.h"
+#include "fdbrpc/Replication.h"
+#include "fdbrpc/ReplicationUtils.h"
+#include "fdbrpc/simulator.h"
+#include "fdbserver/DBCoreState.h"
+#include "fdbserver/Knobs.h"
+#include "fdbserver/LogProtocolMessage.h"
+#include "fdbserver/LogSystem.h"
+#include "fdbserver/RecoveryState.h"
+#include "fdbserver/ServerDBInfo.h"
+#include "fdbserver/WaitFailure.h"
+#include "flow/ActorCollection.h"
+
+#include "flow/actorcompiler.h" // This must be the last #include.
+
+// TagPartitionedLogSystem info in old epoch
+struct OldLogData {
+	std::vector<Reference<LogSet>> tLogs;
+	int32_t logRouterTags;
+	int32_t txsTags; // The number of txsTags, which may change across generations.
+	Version epochBegin, epochEnd;
+	std::set<int8_t> pseudoLocalities;
+	LogEpoch epoch;
+
+	OldLogData() : logRouterTags(0), txsTags(0), epochBegin(0), epochEnd(0), epoch(0) {}
+
+	// Constructor for T of OldTLogConf and OldTLogCoreData
+	template <class T>
+	explicit OldLogData(const T& conf)
+	  : logRouterTags(conf.logRouterTags), txsTags(conf.txsTags), epochBegin(conf.epochBegin), epochEnd(conf.epochEnd),
+	    pseudoLocalities(conf.pseudoLocalities), epoch(conf.epoch) {
+		tLogs.resize(conf.tLogs.size());
+		for (int j = 0; j < conf.tLogs.size(); j++) {
+			auto logSet = makeReference<LogSet>(conf.tLogs[j]);
+			tLogs[j] = logSet;
+		}
+	}
+};
+
+struct LogLockInfo {
+	Version epochEnd;
+	bool isCurrent;
+	Reference<LogSet> logSet;
+	std::vector<Future<TLogLockResult>> replies;
+
+	LogLockInfo() : epochEnd(std::numeric_limits<Version>::max()), isCurrent(false) {}
+};
+
+struct TagPartitionedLogSystem final : ILogSystem, ReferenceCounted<TagPartitionedLogSystem> {
+	const UID dbgid;
+	LogSystemType logSystemType;
+	std::vector<Reference<LogSet>> tLogs; // LogSets in different locations: primary, satellite, or remote
+	int expectedLogSets;
+	int logRouterTags;
+	int txsTags;
+	UID recruitmentID;
+	int repopulateRegionAntiQuorum;
+	bool stopped;
+	std::set<int8_t> pseudoLocalities; // Represent special localities that will be mapped to tagLocalityLogRouter
+	const LogEpoch epoch;
+	LogEpoch oldestBackupEpoch;
+
+	// new members
+	std::map<Tag, Version> pseudoLocalityPopVersion;
+	Future<Void> rejoins;
+	Future<Void> recoveryComplete;
+	Future<Void> remoteRecovery;
+	Future<Void> remoteRecoveryComplete;
+	std::vector<LogLockInfo> lockResults;
+	AsyncVar<bool> recoveryCompleteWrittenToCoreState;
+	bool remoteLogsWrittenToCoreState;
+	bool hasRemoteServers;
+	AsyncTrigger backupWorkerChanged;
+	std::set<UID> removedBackupWorkers; // Workers that are removed before setting them.
+
+	Optional<Version> recoverAt;
+	Optional<Version> recoveredAt;
+	Version knownCommittedVersion;
+	Version backupStartVersion = invalidVersion; // max(tLogs[0].startVersion, previous epochEnd).
+	LocalityData locality;
+	// For each currently running popFromLog actor, outstandingPops is
+	// (logID, tag)->(max popped version, durableKnownCommittedVersion).
+	// Why do we need durableKnownCommittedVersion? knownCommittedVersion gives the lower bound of what data
+	// will need to be copied into the next generation to restore the replication factor.
+	// Guess: It probably serves as a minimum version of what data should be on a TLog in the next generation and
+	// sending a pop for anything less than durableKnownCommittedVersion for the TLog will be absurd.
+	std::map<std::pair<UID, Tag>, std::pair<Version, Version>> outstandingPops;
+
+	Optional<PromiseStream<Future<Void>>> addActor;
+	ActorCollection popActors;
+	std::vector<OldLogData> oldLogData; // each element has the log info. in one old epoch.
+	AsyncTrigger logSystemConfigChanged;
+
+	TagPartitionedLogSystem(UID dbgid,
+	                        LocalityData locality,
+	                        LogEpoch e,
+	                        Optional<PromiseStream<Future<Void>>> addActor = Optional<PromiseStream<Future<Void>>>())
+	  : dbgid(dbgid), logSystemType(LogSystemType::empty), expectedLogSets(0), logRouterTags(0), txsTags(0),
+	    repopulateRegionAntiQuorum(0), stopped(false), epoch(e), oldestBackupEpoch(0),
+	    recoveryCompleteWrittenToCoreState(false), remoteLogsWrittenToCoreState(false), hasRemoteServers(false),
+	    locality(locality), addActor(addActor), popActors(false) {}
+
+	void stopRejoins() final;
+
+	void addref() final;
+
+	void delref() final;
+
+	std::string describe() const final;
+
+	UID getDebugID() const final;
+
+	void addPseudoLocality(int8_t locality);
+
+	Tag getPseudoPopTag(Tag tag, ProcessClass::ClassType type) const final;
+
+	bool hasPseudoLocality(int8_t locality) const final;
+
+	// Return the min version of all pseudoLocalities, i.e., logRouter and backupTag
+	Version popPseudoLocalityTag(Tag tag, Version upTo) final;
+
+	static Future<Void> recoverAndEndEpoch(Reference<AsyncVar<Reference<ILogSystem>>> const& outLogSystem,
+	                                       UID const& dbgid,
+	                                       DBCoreState const& oldState,
+	                                       FutureStream<TLogRejoinRequest> const& rejoins,
+	                                       LocalityData const& locality,
+	                                       bool* forceRecovery);
+
+	static Reference<ILogSystem> fromLogSystemConfig(UID const& dbgid,
+	                                                 LocalityData const& locality,
+	                                                 LogSystemConfig const& lsConf,
+	                                                 bool excludeRemote,
+	                                                 bool useRecoveredAt,
+	                                                 Optional<PromiseStream<Future<Void>>> addActor);
+
+	static Reference<ILogSystem> fromOldLogSystemConfig(UID const& dbgid,
+	                                                    LocalityData const& locality,
+	                                                    LogSystemConfig const& lsConf);
+
+	// Convert TagPartitionedLogSystem to DBCoreState and override input newState as return value
+	void toCoreState(DBCoreState& newState) final;
+
+	bool remoteStorageRecovered() final;
+
+	Future<Void> onCoreStateChanged() final;
+
+	void coreStateWritten(DBCoreState const& newState) final;
+
+	Future<Void> onError() final;
+
+	ACTOR static Future<Void> onError_internal(TagPartitionedLogSystem* self);
+
+	ACTOR static Future<Void> pushResetChecker(Reference<ConnectionResetInfo> self, NetworkAddress addr);
+
+	ACTOR static Future<TLogCommitReply> recordPushMetrics(Reference<ConnectionResetInfo> self,
+	                                                       Reference<Histogram> dist,
+	                                                       NetworkAddress addr,
+	                                                       Future<TLogCommitReply> in);
+
+	Future<Version> push(Version prevVersion,
+	                     Version version,
+	                     Version knownCommittedVersion,
+	                     Version minKnownCommittedVersion,
+	                     LogPushData& data,
+	                     SpanID const& spanContext,
+	                     Optional<UID> debugID) final;
+
+	Reference<IPeekCursor> peekAll(UID dbgid, Version begin, Version end, Tag tag, bool parallelGetMore);
+
+	Reference<IPeekCursor> peekRemote(UID dbgid, Version begin, Optional<Version> end, Tag tag, bool parallelGetMore);
+
+	Reference<IPeekCursor> peek(UID dbgid, Version begin, Optional<Version> end, Tag tag, bool parallelGetMore) final;
+
+	Reference<IPeekCursor> peek(UID dbgid,
+	                            Version begin,
+	                            Optional<Version> end,
+	                            std::vector<Tag> tags,
+	                            bool parallelGetMore) final;
+
+	Reference<IPeekCursor> peekLocal(UID dbgid,
+	                                 Tag tag,
+	                                 Version begin,
+	                                 Version end,
+	                                 bool useMergePeekCursors,
+	                                 int8_t peekLocality = tagLocalityInvalid);
+
+	Reference<IPeekCursor> peekTxs(UID dbgid,
+	                               Version begin,
+	                               int8_t peekLocality,
+	                               Version localEnd,
+	                               bool canDiscardPopped) final;
+
+	Reference<IPeekCursor> peekSingle(UID dbgid,
+	                                  Version begin,
+	                                  Tag tag,
+	                                  std::vector<std::pair<Version, Tag>> history) final;
+
+	// LogRouter or BackupWorker use this function to obtain a cursor for peeking tlogs of a generation (i.e., epoch).
+	// Specifically, the epoch is determined by looking up "dbgid" in tlog sets of generations.
+	// The returned cursor can peek data at the "tag" from the given "begin" version to that epoch's end version or
+	// the recovery version for the latest old epoch. For the current epoch, the cursor has no end version.
+	Reference<IPeekCursor> peekLogRouter(UID dbgid, Version begin, Tag tag) final;
+
+	Version getKnownCommittedVersion() final;
+
+	Future<Void> onKnownCommittedVersionChange() final;
+
+	void popLogRouter(Version upTo, Tag tag, Version durableKnownCommittedVersion, int8_t popLocality);
+
+	void popTxs(Version upTo, int8_t popLocality) final;
+
+	// pop 'tag.locality' type data up to the 'upTo' version
+	void pop(Version upTo, Tag tag, Version durableKnownCommittedVersion, int8_t popLocality) final;
+
+	// pop tag from log up to the version defined in self->outstandingPops[].first
+	ACTOR static Future<Void> popFromLog(TagPartitionedLogSystem* self,
+	                                     Reference<AsyncVar<OptionalInterface<TLogInterface>>> log,
+	                                     Tag tag,
+	                                     double time);
+
+	ACTOR static Future<Version> getPoppedFromTLog(Reference<AsyncVar<OptionalInterface<TLogInterface>>> log, Tag tag);
+
+	ACTOR static Future<Version> getPoppedTxs(TagPartitionedLogSystem* self);
+
+	Future<Version> getTxsPoppedVersion() final;
+
+	ACTOR static Future<Void> confirmEpochLive_internal(Reference<LogSet> logSet, Optional<UID> debugID);
+
+	// Returns success after confirming that pushes in the current epoch are still possible
+	Future<Void> confirmEpochLive(Optional<UID> debugID) final;
+
+	Future<Void> endEpoch() final;
+
+	// Call only after end_epoch() has successfully completed.  Returns a new epoch immediately following this one.
+	// The new epoch is only provisional until the caller updates the coordinated DBCoreState.
+	Future<Reference<ILogSystem>> newEpoch(RecruitFromConfigurationReply const& recr,
+	                                       Future<RecruitRemoteFromConfigurationReply> const& fRemoteWorkers,
+	                                       DatabaseConfiguration const& config,
+	                                       LogEpoch recoveryCount,
+	                                       int8_t primaryLocality,
+	                                       int8_t remoteLocality,
+	                                       std::vector<Tag> const& allTags,
+	                                       Reference<AsyncVar<bool>> const& recruitmentStalled) final;
+
+	LogSystemConfig getLogSystemConfig() const final;
+
+	Standalone<StringRef> getLogsValue() const final;
+
+	Future<Void> onLogSystemConfigChange() final;
+
+	Version getEnd() const final;
+
+	Version getPeekEnd() const;
+
+	void getPushLocations(VectorRef<Tag> tags, std::vector<int>& locations, bool allLocations) const final;
+
+	bool hasRemoteLogs() const final;
+
+	Tag getRandomRouterTag() const final;
+
+	Tag getRandomTxsTag() const final;
+
+	TLogVersion getTLogVersion() const final;
+
+	int getLogRouterTags() const final;
+
+	Version getBackupStartVersion() const final;
+
+	std::map<LogEpoch, ILogSystem::EpochTagsVersionsInfo> getOldEpochTagsVersionsInfo() const final;
+
+	inline Reference<LogSet> getEpochLogSet(LogEpoch epoch) const;
+
+	void setBackupWorkers(const std::vector<InitializeBackupReply>& replies) final;
+
+	bool removeBackupWorker(const BackupWorkerDoneRequest& req) final;
+
+	LogEpoch getOldestBackupEpoch() const final;
+
+	void setOldestBackupEpoch(LogEpoch epoch) final;
+
+	ACTOR static Future<Void> monitorLog(Reference<AsyncVar<OptionalInterface<TLogInterface>>> logServer,
+	                                     Reference<AsyncVar<bool>> failed);
+
+	Optional<std::pair<Version, Version>> static getDurableVersion(
+	    UID dbgid,
+	    LogLockInfo lockInfo,
+	    std::vector<Reference<AsyncVar<bool>>> failed = std::vector<Reference<AsyncVar<bool>>>(),
+	    Optional<Version> lastEnd = Optional<Version>());
+
+	ACTOR static Future<Void> getDurableVersionChanged(
+	    LogLockInfo lockInfo,
+	    std::vector<Reference<AsyncVar<bool>>> failed = std::vector<Reference<AsyncVar<bool>>>());
+
+	ACTOR static Future<Void> epochEnd(Reference<AsyncVar<Reference<ILogSystem>>> outLogSystem,
+	                                   UID dbgid,
+	                                   DBCoreState prevState,
+	                                   FutureStream<TLogRejoinRequest> rejoinRequests,
+	                                   LocalityData locality,
+	                                   bool* forceRecovery);
+
+	ACTOR static Future<Void> recruitOldLogRouters(TagPartitionedLogSystem* self,
+	                                               vector<WorkerInterface> workers,
+	                                               LogEpoch recoveryCount,
+	                                               int8_t locality,
+	                                               Version startVersion,
+	                                               std::vector<LocalityData> tLogLocalities,
+	                                               Reference<IReplicationPolicy> tLogPolicy,
+	                                               bool forRemote);
+
+	static Version getMaxLocalStartVersion(const std::vector<Reference<LogSet>>& tLogs);
+
+	static std::vector<Tag> getLocalTags(int8_t locality, const std::vector<Tag>& allTags);
+
+	ACTOR static Future<Void> newRemoteEpoch(TagPartitionedLogSystem* self,
+	                                         Reference<TagPartitionedLogSystem> oldLogSystem,
+	                                         Future<RecruitRemoteFromConfigurationReply> fRemoteWorkers,
+	                                         DatabaseConfiguration configuration,
+	                                         LogEpoch recoveryCount,
+	                                         int8_t remoteLocality,
+	                                         std::vector<Tag> allTags);
+
+	ACTOR static Future<Reference<ILogSystem>> newEpoch(Reference<TagPartitionedLogSystem> oldLogSystem,
+	                                                    RecruitFromConfigurationReply recr,
+	                                                    Future<RecruitRemoteFromConfigurationReply> fRemoteWorkers,
+	                                                    DatabaseConfiguration configuration,
+	                                                    LogEpoch recoveryCount,
+	                                                    int8_t primaryLocality,
+	                                                    int8_t remoteLocality,
+	                                                    std::vector<Tag> allTags,
+	                                                    Reference<AsyncVar<bool>> recruitmentStalled);
+
+	ACTOR static Future<Void> trackRejoins(
+	    UID dbgid,
+	    std::vector<std::pair<Reference<AsyncVar<OptionalInterface<TLogInterface>>>, Reference<IReplicationPolicy>>>
+	        logServers,
+	    FutureStream<struct TLogRejoinRequest> rejoinRequests);
+
+	ACTOR static Future<TLogLockResult> lockTLog(UID myID, Reference<AsyncVar<OptionalInterface<TLogInterface>>> tlog);
+
+	// FIXME: disabled during merge, update and use in epochEnd()
+	/*
+	    static void lockMinimalTLogSet(const UID& dbgid, const DBCoreState& prevState,
+	                                   const std::vector<Reference<AsyncVar<OptionalInterface<TLogInterface>>>>&
+	   logServers, const std::vector<Reference<AsyncVar<bool>>>& logFailed, vector<Future<TLogLockResult>>* tLogReply )
+	   {
+	        // Invariant: tLogReply[i] must correspond to the tlog stored as logServers[i].
+	        ASSERT(tLogReply->size() == prevState.tLogLocalities.size());
+	        ASSERT(logFailed.size() == tLogReply->size());
+
+	        // For any given index, only one of the following will be true.
+	        auto locking_completed = [&logFailed, tLogReply](int index) {
+	            const auto& entry = tLogReply->at(index);
+	            return !logFailed[index]->get() && entry.isValid() && entry.isReady() && !entry.isError();
+	        };
+	        auto locking_failed = [&logFailed, tLogReply](int index) {
+	            const auto& entry = tLogReply->at(index);
+	            return logFailed[index]->get() || (entry.isValid() && entry.isReady() && entry.isError());
+	        };
+	        auto locking_pending = [&logFailed, tLogReply](int index) {
+	            const auto& entry = tLogReply->at(index);
+	            return !logFailed[index]->get() && (entry.isValid() && !entry.isReady());
+	        };
+	        auto locking_skipped = [&logFailed, tLogReply](int index) {
+	            const auto& entry = tLogReply->at(index);
+	            return !logFailed[index]->get() && !entry.isValid();
+	        };
+
+	        auto can_obtain_quorum = [&prevState](std::function<bool(int)> filter) {
+	            LocalityGroup filter_true;
+	            std::vector<LocalityData> filter_false, unused;
+	            for (int i = 0; i < prevState.tLogLocalities.size() ; i++) {
+	                if (filter(i)) {
+	                    filter_true.add(prevState.tLogLocalities[i]);
+	                } else {
+	                    filter_false.push_back(prevState.tLogLocalities[i]);
+	                }
+	            }
+	            bool valid = filter_true.validate(prevState.tLogPolicy);
+	            if (!valid && prevState.tLogWriteAntiQuorum > 0 ) {
+	                valid = !validateAllCombinations(unused, filter_true, prevState.tLogPolicy, filter_false,
+	    prevState.tLogWriteAntiQuorum, false);
+	            }
+	            return valid;
+	        };
+
+	        // Step 1: Verify that if all the failed TLogs come back, they can't form a quorum.
+	        if (can_obtain_quorum(locking_failed)) {
+	            TraceEvent(SevInfo, "MasterRecoveryTLogLockingImpossible", dbgid).log();
+	            return;
+	        }
+
+	        // Step 2: It's possible for us to succeed, but we need to lock additional logs.
+	        //
+	        // First, we need an accurate picture of what TLogs we're capable of locking. We can't tell the
+	        // difference between a temporarily failed TLog and a permanently failed TLog. Thus, we assume
+	        // all failures are permanent, and manually re-issue lock requests if they rejoin.
+	        for (int i = 0; i < logFailed.size(); i++) {
+	            const auto& r = tLogReply->at(i);
+	            TEST(locking_failed(i) && (r.isValid() && !r.isReady()));  // A TLog failed with a pending request.
+	            // The reboot_a_tlog BUGGIFY below should cause the above case to be hit.
+	            if (locking_failed(i)) {
+	                tLogReply->at(i) = Future<TLogLockResult>();
+	            }
+	        }
+
+	        // We're trying to paritition the set of old tlogs into two sets, L and R, such that:
+	        // (1). R does not validate the policy
+	        // (2). |R| is as large as possible
+	        // (3). L contains all the already-locked TLogs
+	        // and then we only issue lock requests to TLogs in L. This is safe, as R does not have quorum,
+	        // so no commits may occur.  It does not matter if L forms a quorum or not.
+	        //
+	        // We form these sets by starting with L as all machines and R as the empty set, and moving a
+	        // random machine from L to R until (1) or (2) no longer holds as true. Code-wise, L is
+	        // [0..end-can_omit), and R is [end-can_omit..end), and we move a random machine via randomizing
+	        // the order of the tlogs. Choosing a random machine was verified to generate a good-enough
+	        // result to be interesting intests sufficiently frequently that we don't need to try to
+	        // calculate the exact optimal solution.
+	        std::vector<std::pair<LocalityData, int>> tlogs;
+	        for (int i = 0; i < prevState.tLogLocalities.size(); i++) {
+	            tlogs.emplace_back(prevState.tLogLocalities[i], i);
+	        }
+	        deterministicRandom()->randomShuffle(tlogs);
+	        // Rearrange the array such that things that the left is logs closer to being locked, and
+	        // the right is logs that can't be locked.  This makes us prefer locking already-locked TLogs,
+	        // which is how we respect the decisions made in the previous execution.
+	        auto idx_to_order = [&locking_completed, &locking_failed, &locking_pending, &locking_skipped](int index) {
+	            bool complete = locking_completed(index);
+	            bool pending = locking_pending(index);
+	            bool skipped = locking_skipped(index);
+	            bool failed = locking_failed(index);
+
+	            ASSERT( complete + pending + skipped + failed == 1 );
+
+	            if (complete) return 0;
+	            if (pending) return 1;
+	            if (skipped) return 2;
+	            if (failed) return 3;
+
+	            ASSERT(false);  // Programmer error.
+	            return -1;
+	        };
+	        std::sort(tlogs.begin(), tlogs.end(),
+	            // TODO: Change long type to `auto` once toolchain supports C++17.
+	            [&idx_to_order](const std::pair<LocalityData, int>& lhs, const std::pair<LocalityData, int>& rhs) {
+	                return idx_to_order(lhs.second) < idx_to_order(rhs.second);
+	            });
+
+	        // Indexes that aren't in the vector are the ones we're considering omitting. Remove indexes until
+	        // the removed set forms a quorum.
+	        int can_omit = 0;
+	        std::vector<int> to_lock_indexes;
+	        for (auto it = tlogs.cbegin() ; it != tlogs.cend() - 1 ; it++ ) {
+	            to_lock_indexes.push_back(it->second);
+	        }
+	        auto filter = [&to_lock_indexes](int index) {
+	            return std::find(to_lock_indexes.cbegin(), to_lock_indexes.cend(), index) == to_lock_indexes.cend();
+	        };
+	        while(true) {
+	            if (can_obtain_quorum(filter)) {
+	                break;
+	            } else {
+	                can_omit++;
+	                ASSERT(can_omit < tlogs.size());
+	                to_lock_indexes.pop_back();
+	            }
+	        }
+
+	        if (prevState.tLogReplicationFactor - prevState.tLogWriteAntiQuorum == 1) {
+	            ASSERT(can_omit == 0);
+	        }
+	        // Our previous check of making sure there aren't too many failed logs should have prevented this.
+	        ASSERT(!locking_failed(tlogs[tlogs.size()-can_omit-1].second));
+
+	        // If we've managed to leave more tlogs unlocked than (RF-AQ), it means we've hit the case
+	        // where the policy engine has allowed us to have multiple logs in the same failure domain
+	        // with independant sets of data. This case will validated that no code is relying on the old
+	        // quorum=(RF-AQ) logic, and now goes through the policy engine instead.
+	        TEST(can_omit >= prevState.tLogReplicationFactor - prevState.tLogWriteAntiQuorum);  // Locking a subset of
+	   the TLogs while ending an epoch. const bool reboot_a_tlog = g_network->now() - g_simulator.lastConnectionFailure
+	   > g_simulator.connectionFailuresDisableDuration && BUGGIFY && deterministicRandom()->random01() < 0.25;
+	        TraceEvent(SevInfo, "MasterRecoveryTLogLocking", dbgid)
+	            detail("Locks", tlogs.size() - can_omit)
+	            detail("Skipped", can_omit)
+	            detail("Replication", prevState.tLogReplicationFactor)
+	            detail("Antiquorum", prevState.tLogWriteAntiQuorum)
+	            detail("RebootBuggify", reboot_a_tlog);
+	        for (int i = 0; i < tlogs.size() - can_omit; i++) {
+	            const int index = tlogs[i].second;
+	            Future<TLogLockResult>& entry = tLogReply->at(index);
+	            if (!entry.isValid()) {
+	                entry = lockTLog( dbgid, logServers[index] );
+	            }
+	        }
+	        if (reboot_a_tlog) {
+	            g_simulator.lastConnectionFailure = g_network->now();
+	            for (int i = 0; i < tlogs.size() - can_omit; i++) {
+	                const int index = tlogs[i].second;
+	                if (logServers[index]->get().present()) {
+	                    g_simulator.rebootProcess(
+	                        g_simulator.getProcessByAddress(
+	                            logServers[index]->get().interf().address()),
+	                        ISimulator::RebootProcess);
+	                    break;
+	                }
+	            }
+	        }
+	        // Intentionally leave `tlogs.size() - can_omit` .. `tlogs.size()` as !isValid() Futures.
+	    }*/
+
+	template <class T>
+	static vector<T> getReadyNonError(vector<Future<T>> const& futures);
+};
+
+template <class T>
+vector<T> TagPartitionedLogSystem::getReadyNonError(vector<Future<T>> const& futures) {
+	// Return the values of those futures which have (non-error) values ready
+	std::vector<T> result;
+	for (auto& f : futures)
+		if (f.isReady() && !f.isError())
+			result.push_back(f.get());
+	return result;
+}
+
+#include "flow/unactorcompiler.h"
+#endif // FDBSERVER_TAGPARTITIONEDLOGSYSTEM_ACTOR_H
\ No newline at end of file